Coverage Report

Created: 2024-01-21 06:52

/src/bloaty/src/bloaty.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2016 Google Inc. All Rights Reserved.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
// This file contains APIs for use within Bloaty.  None of these APIs have any
16
// guarantees whatsoever about their stability!  The public API for bloaty is
17
// its command-line interface.
18
19
#ifndef BLOATY_H_
20
#define BLOATY_H_
21
22
#include <stdlib.h>
23
#define __STDC_LIMIT_MACROS
24
#define __STDC_FORMAT_MACROS
25
#include <stdint.h>
26
#include <inttypes.h>
27
28
#include <memory>
29
#include <set>
30
#include <string>
31
#include <unordered_map>
32
#include <vector>
33
34
#include "absl/strings/string_view.h"
35
#include "absl/strings/strip.h"
36
#include "capstone/capstone.h"
37
38
#include "dwarf/debug_info.h"
39
#include "bloaty.pb.h"
40
#include "range_map.h"
41
#include "re.h"
42
43
namespace bloaty {
44
45
extern int verbose_level;
46
47
class NameMunger;
48
class Options;
49
struct DualMap;
50
struct DisassemblyInfo;
51
52
enum class DataSource {
53
  kArchiveMembers,
54
  kCompileUnits,
55
  kInlines,
56
  kInputFiles,
57
  kRawRanges,
58
  kSections,
59
  kSegments,
60
61
  // We always set this to one of the concrete symbol types below before
62
  // setting it on a sink.
63
  kSymbols,
64
65
  kRawSymbols,
66
  kFullSymbols,
67
  kShortSymbols
68
};
69
70
class InputFile {
71
 public:
72
17.0M
  InputFile(absl::string_view filename) : filename_(filename) {}
73
  InputFile(const InputFile&) = delete;
74
  InputFile& operator=(const InputFile&) = delete;
75
  virtual bool TryOpen(absl::string_view filename,
76
                       std::unique_ptr<InputFile>& file) = 0;
77
17.0M
  virtual ~InputFile() {}
78
79
2.49M
  const std::string& filename() const { return filename_; }
80
171M
  absl::string_view data() const { return data_; }
81
82
 private:
83
  const std::string filename_;
84
85
 protected:
86
  absl::string_view data_;
87
};
88
89
class InputFileFactory {
90
 public:
91
2.76M
  virtual ~InputFileFactory() {}
92
93
  // Throws if the file could not be opened.
94
  virtual std::unique_ptr<InputFile> OpenFile(
95
      const std::string& filename) const = 0;
96
};
97
98
class MmapInputFileFactory : public InputFileFactory {
99
 public:
100
  std::unique_ptr<InputFile> OpenFile(
101
      const std::string& filename) const override;
102
};
103
104
// NOTE: all sizes are uint64, even on 32-bit platforms:
105
//   - 32-bit platforms can have files >4GB in some cases.
106
//   - for object files (not executables/shared libs) we pack both a section
107
//     index and an address into the "vmaddr" value, and we need enough bits to
108
//     safely do this.
109
110
// A RangeSink allows data sources to assign labels to ranges of VM address
111
// space and/or file offsets.
112
class RangeSink {
113
public:
114
  RangeSink(const InputFile *file, const Options &options,
115
            DataSource data_source, const DualMap *translator,
116
            google::protobuf::Arena *arena);
117
  RangeSink(const RangeSink &) = delete;
118
  RangeSink &operator=(const RangeSink &) = delete;
119
  ~RangeSink();
120
121
39.3k
  const Options &options() const { return options_; }
122
123
  void AddOutput(DualMap *map, const NameMunger *munger);
124
125
4.55M
  DataSource data_source() const { return data_source_; }
126
3.78M
  const InputFile &input_file() const { return *file_; }
127
881k
  bool IsBaseMap() const { return translator_ == nullptr; }
128
129
  // If vmsize or filesize is zero, this mapping is presumed not to exist in
130
  // that domain.  For example, .bss mappings don't exist in the file, and
131
  // .debug_* mappings don't exist in memory.
132
  void AddRange(const char *analyzer, absl::string_view name, uint64_t vmaddr,
133
                uint64_t vmsize, uint64_t fileoff, uint64_t filesize);
134
135
  void AddRange(const char *analyzer, absl::string_view name, uint64_t vmaddr,
136
55.7M
                uint64_t vmsize, absl::string_view file_range) {
137
55.7M
    AddRange(analyzer, name, vmaddr, vmsize,
138
55.7M
             file_range.data() - file_->data().data(), file_range.size());
139
55.7M
  }
140
141
  void AddFileRange(const char* analyzer, absl::string_view name,
142
                    uint64_t fileoff, uint64_t filesize);
143
144
  // Like AddFileRange(), but the label is whatever label was previously
145
  // assigned to VM address |label_from_vmaddr|.  If no existing label is
146
  // assigned to |label_from_vmaddr|, this function does nothing.
147
  void AddFileRangeForVMAddr(const char* analyzer, uint64_t label_from_vmaddr,
148
                             absl::string_view file_range);
149
  void AddVMRangeForVMAddr(const char* analyzer, uint64_t label_from_vmaddr,
150
                           uint64_t addr, uint64_t size);
151
152
  // Applies this label from |from_file_range| to |file_range|, but only if the
153
  // entire |from_file_range| has a single label.  If not, this does nothing.
154
  void AddFileRangeForFileRange(const char* analyzer,
155
                                absl::string_view from_file_range,
156
                                absl::string_view file_range);
157
158
  void AddFileRange(const char* analyzer, absl::string_view name,
159
4.40M
                    absl::string_view file_range) {
160
    // When separate debug files are being used, the DWARF analyzer will try to
161
    // add sections of the debug file.  We want to prevent this because we only
162
    // want to profile the main file (not the debug file), so we filter these
163
    // out.  This approach is simple to implement, but does result in some
164
    // useless work being done.  We may want to avoid doing this useless work in
165
    // the first place.
166
4.40M
    if (FileContainsPointer(file_range.data())) {
167
4.37M
      AddFileRange(analyzer, name, file_range.data() - file_->data().data(),
168
4.37M
                   file_range.size());
169
4.37M
    }
170
4.40M
  }
171
172
  // The VM-only functions below may not be used to populate the base map!
173
174
  // Adds a region to the memory map.  It should not overlap any previous
175
  // region added with Add(), but it should overlap the base memory map.
176
  void AddVMRange(const char* analyzer, uint64_t vmaddr, uint64_t vmsize,
177
                  const std::string& name);
178
179
  // Like Add(), but allows that this addr/size might have previously been added
180
  // already under a different name.  If so, this name becomes an alias of the
181
  // previous name.
182
  //
183
  // This is for things like symbol tables that sometimes map multiple names to
184
  // the same physical function.
185
  void AddVMRangeAllowAlias(const char* analyzer, uint64_t vmaddr,
186
                            uint64_t size, const std::string& name);
187
188
  // Like Add(), but allows that this addr/size might have previously been added
189
  // already under a different name.  If so, this add is simply ignored.
190
  //
191
  // This is for cases like sourcefiles.  Sometimes a single function appears to
192
  // come from multiple source files.  But if it does, we don't want to alias
193
  // the entire source file to another, because it's probably only part of the
194
  // source file that overlaps.
195
  void AddVMRangeIgnoreDuplicate(const char* analyzer, uint64_t vmaddr,
196
                                 uint64_t size, const std::string& name);
197
198
39.3k
  const DualMap& MapAtIndex(size_t index) const {
199
39.3k
    return *outputs_[index].first;
200
39.3k
  }
201
202
  // Translates the given pointer (which must be within the range of
203
  // input_file().data()) to a VM address.
204
  uint64_t TranslateFileToVM(const char* ptr);
205
  absl::string_view TranslateVMToFile(uint64_t address);
206
279k
  const DualMap* Translator() { return translator_; }
207
208
209
  // Decompresses zlib-formatted data and returns the decompressed data.
210
  // Since the decompressed data is not actually part of the file, any
211
  // Add*Range() calls to this region will be no-ops.
212
  absl::string_view ZlibDecompress(absl::string_view contents,
213
                                   uint64_t uncompressed_size);
214
215
  static constexpr uint64_t kUnknownSize = RangeMap::kUnknownSize;
216
217
 private:
218
4.76M
  bool FileContainsPointer(const void* ptr) const {
219
4.76M
    absl::string_view file_data = file_->data();
220
4.76M
    return ptr >= file_data.data() && ptr < file_data.data() + file_data.size();
221
4.76M
  }
222
223
  bool ContainsVerboseVMAddr(uint64_t vmaddr, uint64_t vmsize);
224
  bool ContainsVerboseFileOffset(uint64_t fileoff, uint64_t filesize);
225
  bool IsVerboseForVMRange(uint64_t vmaddr, uint64_t vmsize);
226
  bool IsVerboseForFileRange(uint64_t fileoff, uint64_t filesize);
227
228
  const InputFile* file_;
229
  const Options options_;
230
  DataSource data_source_;
231
  const DualMap* translator_;
232
  std::vector<std::pair<DualMap*, const NameMunger*>> outputs_;
233
  google::protobuf::Arena *arena_;
234
};
235
236
// NameMunger //////////////////////////////////////////////////////////////////
237
238
// Use to transform input names according to the user's configuration.
239
// For example, the user can use regexes.
240
class NameMunger {
241
 public:
242
166M
  NameMunger() {}
243
  NameMunger(const NameMunger&) = delete;
244
  NameMunger& operator=(const NameMunger&) = delete;
245
246
  // Adds a regex that will be applied to all names.  All regexes will be
247
  // applied in sequence.
248
  void AddRegex(const std::string& regex, const std::string& replacement);
249
  std::string Munge(absl::string_view name) const;
250
251
0
  bool IsEmpty() const { return regexes_.empty(); }
252
253
 private:
254
  std::vector<std::pair<std::unique_ptr<ReImpl>, std::string>> regexes_;
255
};
256
257
typedef std::map<absl::string_view, std::pair<uint64_t, uint64_t>> SymbolTable;
258
259
// Represents an object/executable file in a format like ELF, Mach-O, PE, etc.
260
// To support a new file type, implement this interface.
261
class ObjectFile {
262
 public:
263
  ObjectFile(std::unique_ptr<InputFile> file_data)
264
1.00M
      : file_data_(std::move(file_data)), debug_file_(this) {}
265
1.00M
  virtual ~ObjectFile() {}
266
267
  virtual std::string GetBuildId() const = 0;
268
269
  // Process this file, pushing data to |sinks| as appropriate for each data
270
  // source.  If any debug files match the build id for this file, it will be
271
  // given here, otherwise it is |this|.
272
  virtual void ProcessFile(const std::vector<RangeSink*>& sinks) const = 0;
273
274
  virtual bool GetDisassemblyInfo(absl::string_view symbol,
275
                                  DataSource symbol_source,
276
                                  DisassemblyInfo* info) const = 0;
277
278
2.51M
  const InputFile& file_data() const { return *file_data_; }
279
280
  // Sets the debug file for |this|.  |file| must outlive this instance.
281
0
  void set_debug_file(const ObjectFile* file) {
282
0
    assert(debug_file_->GetBuildId() == GetBuildId());
283
0
    debug_file_ = file;
284
0
  }
285
286
204k
  const ObjectFile& debug_file() const { return *debug_file_; }
287
288
 private:
289
  std::unique_ptr<InputFile> file_data_;
290
  const ObjectFile* debug_file_;
291
};
292
293
std::unique_ptr<ObjectFile> TryOpenELFFile(std::unique_ptr<InputFile>& file);
294
std::unique_ptr<ObjectFile> TryOpenMachOFile(std::unique_ptr<InputFile>& file);
295
std::unique_ptr<ObjectFile> TryOpenWebAssemblyFile(std::unique_ptr<InputFile>& file);
296
std::unique_ptr<ObjectFile> TryOpenPEFile(std::unique_ptr<InputFile>& file);
297
298
// Provided by dwarf.cc.  To use these, a module should fill in a dwarf::File
299
// and then call these functions.
300
void ReadDWARFCompileUnits(const dwarf::File& file, const DualMap& map,
301
                           const dwarf::CU* skeleton, RangeSink* sink);
302
inline void ReadDWARFCompileUnits(const dwarf::File& file, const DualMap& map,
303
30.2k
                                  RangeSink* sink) {
304
30.2k
  return ReadDWARFCompileUnits(file, map, nullptr, sink);
305
30.2k
}
306
void ReadDWARFInlines(const dwarf::File& file, RangeSink* sink,
307
                      bool include_line);
308
void ReadEhFrame(absl::string_view contents, RangeSink* sink);
309
void ReadEhFrameHdr(absl::string_view contents, RangeSink* sink);
310
311
// Demangle C++ symbols according to the Itanium ABI.  The |source| argument
312
// controls what demangling mode we are using.
313
std::string ItaniumDemangle(absl::string_view symbol, DataSource source);
314
315
316
// DualMap /////////////////////////////////////////////////////////////////////
317
318
// Contains a RangeMap for VM space and file space for a given file.
319
320
struct DualMap {
321
  RangeMap vm_map;
322
  RangeMap file_map;
323
};
324
325
struct DisassemblyInfo {
326
  absl::string_view text;
327
  DualMap symbol_map;
328
  cs_arch arch;
329
  cs_mode mode;
330
  uint64_t start_address;
331
};
332
333
std::string DisassembleFunction(const DisassemblyInfo& info);
334
void DisassembleFindReferences(const DisassemblyInfo& info, RangeSink* sink);
335
336
// Top-level API ///////////////////////////////////////////////////////////////
337
338
// This should only be used by main.cc and unit tests.
339
340
class Rollup;
341
342
struct DomainSizes {
343
  int64_t vm;
344
  int64_t file;
345
};
346
347
struct RollupRow {
348
19.0M
  RollupRow(const std::string& name_) : name(name_) {}
349
350
  std::string name;
351
  DomainSizes size = {0, 0};
352
  DomainSizes filtered_size = {0, 0};
353
354
  int64_t other_count = 0;
355
  int64_t sortkey;
356
  double vmpercent;
357
  double filepercent;
358
359
  // The size of the base in a diff mode. Otherwise stay 0.
360
  DomainSizes old_size = {0, 0};
361
  
362
  std::vector<RollupRow> sorted_children;
363
364
16.1M
  static bool Compare(const RollupRow& a, const RollupRow& b) {
365
    // Sort value high-to-low.
366
16.1M
    if (a.sortkey != b.sortkey) {
367
10.6M
      return a.sortkey > b.sortkey;
368
10.6M
    }
369
    // Sort name low to high.
370
5.52M
    return a.name < b.name;
371
16.1M
  }
372
};
373
374
enum class OutputFormat {
375
  kPrettyPrint,
376
  kCSV,
377
  kTSV,
378
};
379
380
enum class ShowDomain {
381
  kShowFile,
382
  kShowVM,
383
  kShowBoth,
384
};
385
386
struct OutputOptions {
387
  OutputFormat output_format = OutputFormat::kPrettyPrint;
388
  size_t max_label_len = 80;
389
  ShowDomain show = ShowDomain::kShowBoth;
390
  bool showAllSizesCSV = false;
391
};
392
393
struct RollupOutput {
394
 public:
395
16.5M
  RollupOutput() : toplevel_row_("TOTAL") {}
396
  RollupOutput(const RollupOutput&) = delete;
397
  RollupOutput& operator=(const RollupOutput&) = delete;
398
399
411k
  void AddDataSourceName(absl::string_view name) {
400
411k
    source_names_.emplace_back(std::string(name));
401
411k
  }
402
403
0
  const std::vector<std::string>& source_names() const { return source_names_; }
404
  void Print(const OutputOptions& options, std::ostream* out);
405
0
  void SetDisassembly(absl::string_view disassembly) {
406
0
    disassembly_ = std::string(disassembly);
407
0
  }
408
409
0
  absl::string_view GetDisassembly() { return disassembly_; }
410
411
  // For debugging.
412
0
  const RollupRow& toplevel_row() const { return toplevel_row_; }
413
0
  bool diff_mode() const { return diff_mode_; }
414
415
 private:
416
  friend class Rollup;
417
418
  std::vector<std::string> source_names_;
419
  RollupRow toplevel_row_;
420
  std::string disassembly_;
421
422
  // When we are in diff mode, rollup sizes are relative to the baseline.
423
  bool diff_mode_ = false;
424
425
  static bool IsSame(const std::string& a, const std::string& b);
426
  void PrettyPrint(const OutputOptions& options, std::ostream* out) const;
427
  void PrintToCSV(std::ostream* out, bool tabs, bool csvDiff) const;
428
  void PrettyPrintRow(const RollupRow& row, size_t indent,
429
                      const OutputOptions& options, std::ostream* out) const;
430
  void PrettyPrintTree(const RollupRow& row, size_t indent,
431
                       const OutputOptions& options, std::ostream* out) const;
432
  void PrintRowToCSV(const RollupRow& row,
433
                     std::vector<std::string> parent_labels,
434
                     std::ostream* out, bool tabs, bool csvDiff) const;
435
  void PrintTreeToCSV(const RollupRow& row,
436
                      std::vector<std::string> parent_labels,
437
                      std::ostream* out, bool tabs, bool csvDiff) const;
438
};
439
440
bool ParseOptions(bool skip_unknown, int* argc, char** argv[], Options* options,
441
                  OutputOptions* output_options, std::string* error);
442
bool BloatyMain(const Options& options, const InputFileFactory& file_factory,
443
                RollupOutput* output, std::string* error);
444
445
}  // namespace bloaty
446
447
#endif