aboutsummaryrefslogtreecommitdiff
path: root/src/common/module.h
blob: 28e8e9c50961332d485edb6e5a971e2401bc69a4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
// -*- mode: c++ -*-

// Copyright 2010 Google LLC
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google LLC nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>

// module.h: Define google_breakpad::Module. A Module holds debugging
// information, and can write that information out as a Breakpad
// symbol file.

#ifndef COMMON_LINUX_MODULE_H__
#define COMMON_LINUX_MODULE_H__

#include <functional>
#include <iostream>
#include <limits>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>

#include "common/string_view.h"
#include "common/symbol_data.h"
#include "common/unordered.h"
#include "common/using_std_string.h"
#include "google_breakpad/common/breakpad_types.h"

namespace google_breakpad {

using std::set;
using std::vector;
using std::map;

// A Module represents the contents of a module, and supports methods
// for adding information produced by parsing STABS or DWARF data
// --- possibly both from the same file --- and then writing out the
// unified contents as a Breakpad-format symbol file.
class Module {
 public:
  // The type of addresses and sizes in a symbol table.
  typedef uint64_t Address;
  static constexpr uint64_t kMaxAddress = std::numeric_limits<Address>::max();
  struct File;
  struct Function;
  struct InlineOrigin;
  struct Inline;
  struct Line;
  struct Extern;

  // Addresses appearing in File, Function, and Line structures are
  // absolute, not relative to the the module's load address.  That
  // is, if the module were loaded at its nominal load address, the
  // addresses would be correct.

  // A source file.
  struct File {
    explicit File(const string& name_input) : name(name_input), source_id(0) {}

    // The name of the source file.
    const string name;

    // The file's source id.  The Write member function clears this
    // field and assigns source ids a fresh, so any value placed here
    // before calling Write will be lost.
    int source_id;
  };

  // An address range.
  struct Range {
    Range(const Address address_input, const Address size_input) :
        address(address_input), size(size_input) { }

    Address address;
    Address size;
  };

  // A function.
  struct Function {
    Function(StringView name_input, const Address& address_input) :
        name(name_input), address(address_input), parameter_size(0) {}

    // For sorting by address.  (Not style-guide compliant, but it's
    // stupid not to put this in the struct.)
    static bool CompareByAddress(const Function* x, const Function* y) {
      return x->address < y->address;
    }

    // The function's name.
    StringView name;

    // The start address and the address ranges covered by the function.
    const Address address;
    vector<Range> ranges;

    // The function's parameter size.
    Address parameter_size;

    // Source lines belonging to this function, sorted by increasing
    // address.
    vector<Line> lines;

    // Inlined call sites belonging to this functions.
    vector<std::unique_ptr<Inline>> inlines;

    // If this symbol has been folded with other symbols in the linked binary.
    bool is_multiple = false;

    // If the function's name should be filled out from a matching Extern,
    // should they not match.
    bool prefer_extern_name = false;
  };

  struct InlineOrigin {
    explicit InlineOrigin(StringView name) : id(-1), name(name) {}

    // A unique id for each InlineOrigin object. INLINE records use the id to
    // refer to its INLINE_ORIGIN record.
    int id;

    // The inlined function's name.
    StringView name;
  };

  // A inlined call site.
  struct Inline {
    Inline(InlineOrigin* origin,
           const vector<Range>& ranges,
           int call_site_line,
           int call_site_file_id,
           int inline_nest_level,
           vector<std::unique_ptr<Inline>> child_inlines)
        : origin(origin),
          ranges(ranges),
          call_site_line(call_site_line),
          call_site_file_id(call_site_file_id),
          call_site_file(nullptr),
          inline_nest_level(inline_nest_level),
          child_inlines(std::move(child_inlines)) {}

    InlineOrigin* origin;

    // The list of addresses and sizes.
    vector<Range> ranges;

    int call_site_line;

    // The id is only meanful inside a CU. It's only used for looking up real
    // File* after scanning a CU.
    int call_site_file_id;

    File* call_site_file;

    int inline_nest_level;

    // A list of inlines which are children of this inline.
    vector<std::unique_ptr<Inline>> child_inlines;

    int getCallSiteFileID() const {
      return call_site_file ? call_site_file->source_id : -1;
    }

    static void InlineDFS(
        vector<std::unique_ptr<Module::Inline>>& inlines,
        std::function<void(std::unique_ptr<Module::Inline>&)> const& forEach) {
      for (std::unique_ptr<Module::Inline>& in : inlines) {
        forEach(in);
        InlineDFS(in->child_inlines, forEach);
      }
    }
  };

  typedef map<uint64_t, InlineOrigin*> InlineOriginByOffset;

  class InlineOriginMap {
   public:
    // Add INLINE ORIGIN to the module. Return a pointer to origin .
    InlineOrigin* GetOrCreateInlineOrigin(uint64_t offset, StringView name);

    // offset is the offset of a DW_TAG_subprogram. specification_offset is the
    // value of its DW_AT_specification or equals to offset if
    // DW_AT_specification doesn't exist in that DIE.
    void SetReference(uint64_t offset, uint64_t specification_offset);

    ~InlineOriginMap() {
      for (const auto& iter : inline_origins_) {
        delete iter.second;
      }
    }

   private:
    // A map from a DW_TAG_subprogram's offset to the DW_TAG_subprogram.
    InlineOriginByOffset inline_origins_;

    // A map from a DW_TAG_subprogram's offset to the offset of its
    // specification or abstract origin subprogram. The set of values in this
    // map should always be the same set of keys in inline_origins_.
    map<uint64_t, uint64_t> references_;
  };

  map<std::string, InlineOriginMap> inline_origin_maps;

  // A source line.
  struct Line {
    // For sorting by address.  (Not style-guide compliant, but it's
    // stupid not to put this in the struct.)
    static bool CompareByAddress(const Module::Line& x, const Module::Line& y) {
      return x.address < y.address;
    }

    Address address, size;    // The address and size of the line's code.
    File* file;                // The source file.
    int number;                // The source line number.
  };

  // An exported symbol.
  struct Extern {
    explicit Extern(const Address& address_input) : address(address_input) {}
    const Address address;
    string name;
    // If this symbol has been folded with other symbols in the linked binary.
    bool is_multiple = false;
  };

  // A map from register names to postfix expressions that recover
  // their their values. This can represent a complete set of rules to
  // follow at some address, or a set of changes to be applied to an
  // extant set of rules.
  typedef map<string, string> RuleMap;

  // A map from addresses to RuleMaps, representing changes that take
  // effect at given addresses.
  typedef map<Address, RuleMap> RuleChangeMap;

  // A range of 'STACK CFI' stack walking information. An instance of
  // this structure corresponds to a 'STACK CFI INIT' record and the
  // subsequent 'STACK CFI' records that fall within its range.
  struct StackFrameEntry {
    // The starting address and number of bytes of machine code this
    // entry covers.
    Address address, size;

    // The initial register recovery rules, in force at the starting
    // address.
    RuleMap initial_rules;

    // A map from addresses to rule changes. To find the rules in
    // force at a given address, start with initial_rules, and then
    // apply the changes given in this map for all addresses up to and
    // including the address you're interested in.
    RuleChangeMap rule_changes;
  };

  struct FunctionCompare {
    bool operator() (const Function* lhs, const Function* rhs) const {
      if (lhs->address == rhs->address)
        return lhs->name < rhs->name;
      return lhs->address < rhs->address;
    }
  };

  struct InlineOriginCompare {
    bool operator()(const InlineOrigin* lhs, const InlineOrigin* rhs) const {
      return lhs->name < rhs->name;
    }
  };

  struct ExternCompare {
    // Defining is_transparent allows
    // std::set<std::unique_ptr<Extern>, ExternCompare>::find() to be called
    // with an Extern* and have set use the overloads below.
    using is_transparent = void;
    bool operator() (const std::unique_ptr<Extern>& lhs,
                     const std::unique_ptr<Extern>& rhs) const {
      return lhs->address < rhs->address;
    }
    bool operator() (const Extern* lhs, const std::unique_ptr<Extern>& rhs) const {
      return lhs->address < rhs->address;
    }
    bool operator() (const std::unique_ptr<Extern>& lhs, const Extern* rhs) const {
      return lhs->address < rhs->address;
    }
  };

  // Create a new module with the given name, operating system,
  // architecture, and ID string.
  // NB: `enable_multiple_field` is temporary while transitioning to enabling
  // writing the multiple field permanently.
  Module(const string& name,
         const string& os,
         const string& architecture,
         const string& id,
         const string& code_id = "",
         bool enable_multiple_field = false,
         bool prefer_extern_name = false);
  ~Module();

  // Set the module's load address to LOAD_ADDRESS; addresses given
  // for functions and lines will be written to the Breakpad symbol
  // file as offsets from this address.  Construction initializes this
  // module's load address to zero: addresses written to the symbol
  // file will be the same as they appear in the Function, Line, and
  // StackFrameEntry structures.
  //
  // Note that this member function has no effect on addresses stored
  // in the data added to this module; the Write member function
  // simply subtracts off the load address from addresses before it
  // prints them. Only the last load address given before calling
  // Write is used.
  void SetLoadAddress(Address load_address);

  // Sets address filtering on elements added to the module.  This allows
  // libraries with extraneous debug symbols to generate symbol files containing
  // only relevant symbols.  For example, an LLD-generated partition library may
  // contain debug information pertaining to all partitions derived from a
  // single "combined" library.  Filtering applies only to elements added after
  // this method is called.
  void SetAddressRanges(const vector<Range>& ranges);

  // Add FUNCTION to the module. FUNCTION's name must not be empty.
  // This module owns all Function objects added with this function:
  // destroying the module destroys them as well.
  // Return false if the function is duplicate and needs to be freed.
  bool AddFunction(Function* function);

  // Add STACK_FRAME_ENTRY to the module.
  // This module owns all StackFrameEntry objects added with this
  // function: destroying the module destroys them as well.
  void AddStackFrameEntry(std::unique_ptr<StackFrameEntry> stack_frame_entry);

  // Add PUBLIC to the module.
  // This module owns all Extern objects added with this function:
  // destroying the module destroys them as well.
  void AddExtern(std::unique_ptr<Extern> ext);

  // If this module has a file named NAME, return a pointer to it. If
  // it has none, then create one and return a pointer to the new
  // file. This module owns all File objects created using these
  // functions; destroying the module destroys them as well.
  File* FindFile(const string& name);
  File* FindFile(const char* name);

  // If this module has a file named NAME, return a pointer to it.
  // Otherwise, return NULL.
  File* FindExistingFile(const string& name);

  // Insert pointers to the functions added to this module at I in
  // VEC. The pointed-to Functions are still owned by this module.
  // (Since this is effectively a copy of the function list, this is
  // mostly useful for testing; other uses should probably get a more
  // appropriate interface.)
  void GetFunctions(vector<Function*>* vec, vector<Function*>::iterator i);

  // Insert pointers to the externs added to this module at I in
  // VEC. The pointed-to Externs are still owned by this module.
  // (Since this is effectively a copy of the extern list, this is
  // mostly useful for testing; other uses should probably get a more
  // appropriate interface.)
  void GetExterns(vector<Extern*>* vec, vector<Extern*>::iterator i);

  // Clear VEC and fill it with pointers to the Files added to this
  // module, sorted by name. The pointed-to Files are still owned by
  // this module. (Since this is effectively a copy of the file list,
  // this is mostly useful for testing; other uses should probably get
  // a more appropriate interface.)
  void GetFiles(vector<File*>* vec);

  // Clear VEC and fill it with pointers to the StackFrameEntry
  // objects that have been added to this module. (Since this is
  // effectively a copy of the stack frame entry list, this is mostly
  // useful for testing; other uses should probably get
  // a more appropriate interface.)
  void GetStackFrameEntries(vector<StackFrameEntry*>* vec) const;

  // Find those files in this module that are actually referred to by
  // functions' line number data, and assign them source id numbers.
  // Set the source id numbers for all other files --- unused by the
  // source line data --- to -1.  We do this before writing out the
  // symbol file, at which point we omit any unused files.
  void AssignSourceIds();

  // This function should be called before AssignSourceIds() to get the set of
  // valid InlineOrigins*.
  void CreateInlineOrigins(
      set<InlineOrigin*, InlineOriginCompare>& inline_origins);

  // Call AssignSourceIds, and write this module to STREAM in the
  // breakpad symbol format. Return true if all goes well, or false if
  // an error occurs. This method writes out:
  // - a header based on the values given to the constructor,
  // If symbol_data is not CFI then:
  // - the source files added via FindFile,
  // - the functions added via AddFunctions, each with its lines,
  // - all public records,
  // If symbol_data is CFI then:
  // - all CFI records.
  // Addresses in the output are all relative to the load address
  // established by SetLoadAddress.
  bool Write(std::ostream& stream, SymbolData symbol_data);

  // Place the name in the global set of strings. Return a StringView points to
  // a string inside the pool.
  StringView AddStringToPool(const string& str) {
    auto result = common_strings_.insert(str);
    return *(result.first);
  }

  string name() const { return name_; }
  string os() const { return os_; }
  string architecture() const { return architecture_; }
  string identifier() const { return id_; }
  string code_identifier() const { return code_id_; }

 private:
  // Report an error that has occurred writing the symbol file, using
  // errno to find the appropriate cause.  Return false.
  static bool ReportError();

  // Write RULE_MAP to STREAM, in the form appropriate for 'STACK CFI'
  // records, without a final newline. Return true if all goes well;
  // if an error occurs, return false, and leave errno set.
  static bool WriteRuleMap(const RuleMap& rule_map, std::ostream& stream);

  // Returns true of the specified address resides with an specified address
  // range, or if no ranges have been specified.
  bool AddressIsInModule(Address address) const;

  // Module header entries.
  string name_, os_, architecture_, id_, code_id_;

  // The module's nominal load address.  Addresses for functions and
  // lines are absolute, assuming the module is loaded at this
  // address.
  Address load_address_;

  // The set of valid address ranges of the module.  If specified, attempts to
  // add elements residing outside these ranges will be silently filtered.
  vector<Range> address_ranges_;

  // Relation for maps whose keys are strings shared with some other
  // structure.
  struct CompareStringPtrs {
    bool operator()(const string* x, const string* y) const { return *x < *y; }
  };

  // A map from filenames to File structures.  The map's keys are
  // pointers to the Files' names.
  typedef map<const string*, File*, CompareStringPtrs> FileByNameMap;

  // A set containing Function structures, sorted by address.
  typedef set<Function*, FunctionCompare> FunctionSet;

  // A set containing Extern structures, sorted by address.
  typedef set<std::unique_ptr<Extern>, ExternCompare> ExternSet;

  // The module owns all the files and functions that have been added
  // to it; destroying the module frees the Files and Functions these
  // point to.
  FileByNameMap files_;    // This module's source files.
  FunctionSet functions_;  // This module's functions.
  // Used to quickly look up whether a function exists at a particular address.
  unordered_set<Address> function_addresses_;

  // The module owns all the call frame info entries that have been
  // added to it.
  vector<std::unique_ptr<StackFrameEntry>> stack_frame_entries_;

  // The module owns all the externs that have been added to it;
  // destroying the module frees the Externs these point to.
  ExternSet externs_;

  unordered_set<string> common_strings_;

  // Whether symbols sharing an address should be collapsed into a single entry
  // and marked with an `m` in the output. See
  // https://bugs.chromium.org/p/google-breakpad/issues/detail?id=751 and docs
  // at
  // https://chromium.googlesource.com/breakpad/breakpad/+/master/docs/symbol_files.md#records-3
  bool enable_multiple_field_;

  // If a Function and an Extern share the same address but have a different
  // name, prefer the name of the Extern.
  //
  // Use this when dumping Mach-O .dSYMs built with -gmlt (Minimum Line Tables),
  // as the Function's fully-qualified name will only be present in the STABS
  // (which are placed in the Extern), not in the DWARF symbols (which are
  // placed in the Function).
  bool prefer_extern_name_;
};

}  // namespace google_breakpad

#endif  // COMMON_LINUX_MODULE_H__