Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2016 Google Inc. All Rights Reserved. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | // This file contains APIs for use within Bloaty. None of these APIs have any |
16 | | // guarantees whatsoever about their stability! The public API for bloaty is |
17 | | // its command-line interface. |
18 | | |
19 | | #ifndef BLOATY_H_ |
20 | | #define BLOATY_H_ |
21 | | |
22 | | #include <stdlib.h> |
23 | | #define __STDC_LIMIT_MACROS |
24 | | #define __STDC_FORMAT_MACROS |
25 | | #include <stdint.h> |
26 | | #include <inttypes.h> |
27 | | |
28 | | #include <memory> |
29 | | #include <set> |
30 | | #include <string> |
31 | | #include <unordered_map> |
32 | | #include <vector> |
33 | | |
34 | | #include "absl/strings/string_view.h" |
35 | | #include "absl/strings/strip.h" |
36 | | #include "capstone/capstone.h" |
37 | | |
38 | | #include "dwarf/debug_info.h" |
39 | | #include "bloaty.pb.h" |
40 | | #include "range_map.h" |
41 | | #include "re.h" |
42 | | |
43 | | namespace bloaty { |
44 | | |
45 | | extern int verbose_level; |
46 | | |
47 | | class NameMunger; |
48 | | class Options; |
49 | | struct DualMap; |
50 | | struct DisassemblyInfo; |
51 | | |
52 | | enum class DataSource { |
53 | | kArchiveMembers, |
54 | | kCompileUnits, |
55 | | kInlines, |
56 | | kInputFiles, |
57 | | kRawRanges, |
58 | | kSections, |
59 | | kSegments, |
60 | | |
61 | | // We always set this to one of the concrete symbol types below before |
62 | | // setting it on a sink. |
63 | | kSymbols, |
64 | | |
65 | | kRawSymbols, |
66 | | kFullSymbols, |
67 | | kShortSymbols |
68 | | }; |
69 | | |
70 | | class InputFile { |
71 | | public: |
72 | 17.0M | InputFile(absl::string_view filename) : filename_(filename) {} |
73 | | InputFile(const InputFile&) = delete; |
74 | | InputFile& operator=(const InputFile&) = delete; |
75 | | virtual bool TryOpen(absl::string_view filename, |
76 | | std::unique_ptr<InputFile>& file) = 0; |
77 | 17.0M | virtual ~InputFile() {} |
78 | | |
79 | 2.49M | const std::string& filename() const { return filename_; } |
80 | 171M | absl::string_view data() const { return data_; } |
81 | | |
82 | | private: |
83 | | const std::string filename_; |
84 | | |
85 | | protected: |
86 | | absl::string_view data_; |
87 | | }; |
88 | | |
89 | | class InputFileFactory { |
90 | | public: |
91 | 2.76M | virtual ~InputFileFactory() {} |
92 | | |
93 | | // Throws if the file could not be opened. |
94 | | virtual std::unique_ptr<InputFile> OpenFile( |
95 | | const std::string& filename) const = 0; |
96 | | }; |
97 | | |
98 | | class MmapInputFileFactory : public InputFileFactory { |
99 | | public: |
100 | | std::unique_ptr<InputFile> OpenFile( |
101 | | const std::string& filename) const override; |
102 | | }; |
103 | | |
104 | | // NOTE: all sizes are uint64, even on 32-bit platforms: |
105 | | // - 32-bit platforms can have files >4GB in some cases. |
106 | | // - for object files (not executables/shared libs) we pack both a section |
107 | | // index and an address into the "vmaddr" value, and we need enough bits to |
108 | | // safely do this. |
109 | | |
110 | | // A RangeSink allows data sources to assign labels to ranges of VM address |
111 | | // space and/or file offsets. |
112 | | class RangeSink { |
113 | | public: |
114 | | RangeSink(const InputFile *file, const Options &options, |
115 | | DataSource data_source, const DualMap *translator, |
116 | | google::protobuf::Arena *arena); |
117 | | RangeSink(const RangeSink &) = delete; |
118 | | RangeSink &operator=(const RangeSink &) = delete; |
119 | | ~RangeSink(); |
120 | | |
121 | 39.3k | const Options &options() const { return options_; } |
122 | | |
123 | | void AddOutput(DualMap *map, const NameMunger *munger); |
124 | | |
125 | 4.55M | DataSource data_source() const { return data_source_; } |
126 | 3.78M | const InputFile &input_file() const { return *file_; } |
127 | 881k | bool IsBaseMap() const { return translator_ == nullptr; } |
128 | | |
129 | | // If vmsize or filesize is zero, this mapping is presumed not to exist in |
130 | | // that domain. For example, .bss mappings don't exist in the file, and |
131 | | // .debug_* mappings don't exist in memory. |
132 | | void AddRange(const char *analyzer, absl::string_view name, uint64_t vmaddr, |
133 | | uint64_t vmsize, uint64_t fileoff, uint64_t filesize); |
134 | | |
135 | | void AddRange(const char *analyzer, absl::string_view name, uint64_t vmaddr, |
136 | 55.7M | uint64_t vmsize, absl::string_view file_range) { |
137 | 55.7M | AddRange(analyzer, name, vmaddr, vmsize, |
138 | 55.7M | file_range.data() - file_->data().data(), file_range.size()); |
139 | 55.7M | } |
140 | | |
141 | | void AddFileRange(const char* analyzer, absl::string_view name, |
142 | | uint64_t fileoff, uint64_t filesize); |
143 | | |
144 | | // Like AddFileRange(), but the label is whatever label was previously |
145 | | // assigned to VM address |label_from_vmaddr|. If no existing label is |
146 | | // assigned to |label_from_vmaddr|, this function does nothing. |
147 | | void AddFileRangeForVMAddr(const char* analyzer, uint64_t label_from_vmaddr, |
148 | | absl::string_view file_range); |
149 | | void AddVMRangeForVMAddr(const char* analyzer, uint64_t label_from_vmaddr, |
150 | | uint64_t addr, uint64_t size); |
151 | | |
152 | | // Applies this label from |from_file_range| to |file_range|, but only if the |
153 | | // entire |from_file_range| has a single label. If not, this does nothing. |
154 | | void AddFileRangeForFileRange(const char* analyzer, |
155 | | absl::string_view from_file_range, |
156 | | absl::string_view file_range); |
157 | | |
158 | | void AddFileRange(const char* analyzer, absl::string_view name, |
159 | 4.40M | absl::string_view file_range) { |
160 | | // When separate debug files are being used, the DWARF analyzer will try to |
161 | | // add sections of the debug file. We want to prevent this because we only |
162 | | // want to profile the main file (not the debug file), so we filter these |
163 | | // out. This approach is simple to implement, but does result in some |
164 | | // useless work being done. We may want to avoid doing this useless work in |
165 | | // the first place. |
166 | 4.40M | if (FileContainsPointer(file_range.data())) { |
167 | 4.37M | AddFileRange(analyzer, name, file_range.data() - file_->data().data(), |
168 | 4.37M | file_range.size()); |
169 | 4.37M | } |
170 | 4.40M | } |
171 | | |
172 | | // The VM-only functions below may not be used to populate the base map! |
173 | | |
174 | | // Adds a region to the memory map. It should not overlap any previous |
175 | | // region added with Add(), but it should overlap the base memory map. |
176 | | void AddVMRange(const char* analyzer, uint64_t vmaddr, uint64_t vmsize, |
177 | | const std::string& name); |
178 | | |
179 | | // Like Add(), but allows that this addr/size might have previously been added |
180 | | // already under a different name. If so, this name becomes an alias of the |
181 | | // previous name. |
182 | | // |
183 | | // This is for things like symbol tables that sometimes map multiple names to |
184 | | // the same physical function. |
185 | | void AddVMRangeAllowAlias(const char* analyzer, uint64_t vmaddr, |
186 | | uint64_t size, const std::string& name); |
187 | | |
188 | | // Like Add(), but allows that this addr/size might have previously been added |
189 | | // already under a different name. If so, this add is simply ignored. |
190 | | // |
191 | | // This is for cases like sourcefiles. Sometimes a single function appears to |
192 | | // come from multiple source files. But if it does, we don't want to alias |
193 | | // the entire source file to another, because it's probably only part of the |
194 | | // source file that overlaps. |
195 | | void AddVMRangeIgnoreDuplicate(const char* analyzer, uint64_t vmaddr, |
196 | | uint64_t size, const std::string& name); |
197 | | |
198 | 39.3k | const DualMap& MapAtIndex(size_t index) const { |
199 | 39.3k | return *outputs_[index].first; |
200 | 39.3k | } |
201 | | |
202 | | // Translates the given pointer (which must be within the range of |
203 | | // input_file().data()) to a VM address. |
204 | | uint64_t TranslateFileToVM(const char* ptr); |
205 | | absl::string_view TranslateVMToFile(uint64_t address); |
206 | 279k | const DualMap* Translator() { return translator_; } |
207 | | |
208 | | |
209 | | // Decompresses zlib-formatted data and returns the decompressed data. |
210 | | // Since the decompressed data is not actually part of the file, any |
211 | | // Add*Range() calls to this region will be no-ops. |
212 | | absl::string_view ZlibDecompress(absl::string_view contents, |
213 | | uint64_t uncompressed_size); |
214 | | |
215 | | static constexpr uint64_t kUnknownSize = RangeMap::kUnknownSize; |
216 | | |
217 | | private: |
218 | 4.76M | bool FileContainsPointer(const void* ptr) const { |
219 | 4.76M | absl::string_view file_data = file_->data(); |
220 | 4.76M | return ptr >= file_data.data() && ptr < file_data.data() + file_data.size(); |
221 | 4.76M | } |
222 | | |
223 | | bool ContainsVerboseVMAddr(uint64_t vmaddr, uint64_t vmsize); |
224 | | bool ContainsVerboseFileOffset(uint64_t fileoff, uint64_t filesize); |
225 | | bool IsVerboseForVMRange(uint64_t vmaddr, uint64_t vmsize); |
226 | | bool IsVerboseForFileRange(uint64_t fileoff, uint64_t filesize); |
227 | | |
228 | | const InputFile* file_; |
229 | | const Options options_; |
230 | | DataSource data_source_; |
231 | | const DualMap* translator_; |
232 | | std::vector<std::pair<DualMap*, const NameMunger*>> outputs_; |
233 | | google::protobuf::Arena *arena_; |
234 | | }; |
235 | | |
236 | | // NameMunger ////////////////////////////////////////////////////////////////// |
237 | | |
238 | | // Use to transform input names according to the user's configuration. |
239 | | // For example, the user can use regexes. |
240 | | class NameMunger { |
241 | | public: |
242 | 166M | NameMunger() {} |
243 | | NameMunger(const NameMunger&) = delete; |
244 | | NameMunger& operator=(const NameMunger&) = delete; |
245 | | |
246 | | // Adds a regex that will be applied to all names. All regexes will be |
247 | | // applied in sequence. |
248 | | void AddRegex(const std::string& regex, const std::string& replacement); |
249 | | std::string Munge(absl::string_view name) const; |
250 | | |
251 | 0 | bool IsEmpty() const { return regexes_.empty(); } |
252 | | |
253 | | private: |
254 | | std::vector<std::pair<std::unique_ptr<ReImpl>, std::string>> regexes_; |
255 | | }; |
256 | | |
257 | | typedef std::map<absl::string_view, std::pair<uint64_t, uint64_t>> SymbolTable; |
258 | | |
259 | | // Represents an object/executable file in a format like ELF, Mach-O, PE, etc. |
260 | | // To support a new file type, implement this interface. |
261 | | class ObjectFile { |
262 | | public: |
263 | | ObjectFile(std::unique_ptr<InputFile> file_data) |
264 | 1.00M | : file_data_(std::move(file_data)), debug_file_(this) {} |
265 | 1.00M | virtual ~ObjectFile() {} |
266 | | |
267 | | virtual std::string GetBuildId() const = 0; |
268 | | |
269 | | // Process this file, pushing data to |sinks| as appropriate for each data |
270 | | // source. If any debug files match the build id for this file, it will be |
271 | | // given here, otherwise it is |this|. |
272 | | virtual void ProcessFile(const std::vector<RangeSink*>& sinks) const = 0; |
273 | | |
274 | | virtual bool GetDisassemblyInfo(absl::string_view symbol, |
275 | | DataSource symbol_source, |
276 | | DisassemblyInfo* info) const = 0; |
277 | | |
278 | 2.51M | const InputFile& file_data() const { return *file_data_; } |
279 | | |
280 | | // Sets the debug file for |this|. |file| must outlive this instance. |
281 | 0 | void set_debug_file(const ObjectFile* file) { |
282 | 0 | assert(debug_file_->GetBuildId() == GetBuildId()); |
283 | 0 | debug_file_ = file; |
284 | 0 | } |
285 | | |
286 | 204k | const ObjectFile& debug_file() const { return *debug_file_; } |
287 | | |
288 | | private: |
289 | | std::unique_ptr<InputFile> file_data_; |
290 | | const ObjectFile* debug_file_; |
291 | | }; |
292 | | |
293 | | std::unique_ptr<ObjectFile> TryOpenELFFile(std::unique_ptr<InputFile>& file); |
294 | | std::unique_ptr<ObjectFile> TryOpenMachOFile(std::unique_ptr<InputFile>& file); |
295 | | std::unique_ptr<ObjectFile> TryOpenWebAssemblyFile(std::unique_ptr<InputFile>& file); |
296 | | std::unique_ptr<ObjectFile> TryOpenPEFile(std::unique_ptr<InputFile>& file); |
297 | | |
298 | | // Provided by dwarf.cc. To use these, a module should fill in a dwarf::File |
299 | | // and then call these functions. |
300 | | void ReadDWARFCompileUnits(const dwarf::File& file, const DualMap& map, |
301 | | const dwarf::CU* skeleton, RangeSink* sink); |
302 | | inline void ReadDWARFCompileUnits(const dwarf::File& file, const DualMap& map, |
303 | 30.2k | RangeSink* sink) { |
304 | 30.2k | return ReadDWARFCompileUnits(file, map, nullptr, sink); |
305 | 30.2k | } |
306 | | void ReadDWARFInlines(const dwarf::File& file, RangeSink* sink, |
307 | | bool include_line); |
308 | | void ReadEhFrame(absl::string_view contents, RangeSink* sink); |
309 | | void ReadEhFrameHdr(absl::string_view contents, RangeSink* sink); |
310 | | |
311 | | // Demangle C++ symbols according to the Itanium ABI. The |source| argument |
312 | | // controls what demangling mode we are using. |
313 | | std::string ItaniumDemangle(absl::string_view symbol, DataSource source); |
314 | | |
315 | | |
316 | | // DualMap ///////////////////////////////////////////////////////////////////// |
317 | | |
318 | | // Contains a RangeMap for VM space and file space for a given file. |
319 | | |
320 | | struct DualMap { |
321 | | RangeMap vm_map; |
322 | | RangeMap file_map; |
323 | | }; |
324 | | |
325 | | struct DisassemblyInfo { |
326 | | absl::string_view text; |
327 | | DualMap symbol_map; |
328 | | cs_arch arch; |
329 | | cs_mode mode; |
330 | | uint64_t start_address; |
331 | | }; |
332 | | |
333 | | std::string DisassembleFunction(const DisassemblyInfo& info); |
334 | | void DisassembleFindReferences(const DisassemblyInfo& info, RangeSink* sink); |
335 | | |
336 | | // Top-level API /////////////////////////////////////////////////////////////// |
337 | | |
338 | | // This should only be used by main.cc and unit tests. |
339 | | |
340 | | class Rollup; |
341 | | |
342 | | struct DomainSizes { |
343 | | int64_t vm; |
344 | | int64_t file; |
345 | | }; |
346 | | |
347 | | struct RollupRow { |
348 | 19.0M | RollupRow(const std::string& name_) : name(name_) {} |
349 | | |
350 | | std::string name; |
351 | | DomainSizes size = {0, 0}; |
352 | | DomainSizes filtered_size = {0, 0}; |
353 | | |
354 | | int64_t other_count = 0; |
355 | | int64_t sortkey; |
356 | | double vmpercent; |
357 | | double filepercent; |
358 | | |
359 | | // The size of the base in a diff mode. Otherwise stay 0. |
360 | | DomainSizes old_size = {0, 0}; |
361 | | |
362 | | std::vector<RollupRow> sorted_children; |
363 | | |
364 | 16.1M | static bool Compare(const RollupRow& a, const RollupRow& b) { |
365 | | // Sort value high-to-low. |
366 | 16.1M | if (a.sortkey != b.sortkey) { |
367 | 10.6M | return a.sortkey > b.sortkey; |
368 | 10.6M | } |
369 | | // Sort name low to high. |
370 | 5.52M | return a.name < b.name; |
371 | 16.1M | } |
372 | | }; |
373 | | |
374 | | enum class OutputFormat { |
375 | | kPrettyPrint, |
376 | | kCSV, |
377 | | kTSV, |
378 | | }; |
379 | | |
380 | | enum class ShowDomain { |
381 | | kShowFile, |
382 | | kShowVM, |
383 | | kShowBoth, |
384 | | }; |
385 | | |
386 | | struct OutputOptions { |
387 | | OutputFormat output_format = OutputFormat::kPrettyPrint; |
388 | | size_t max_label_len = 80; |
389 | | ShowDomain show = ShowDomain::kShowBoth; |
390 | | bool showAllSizesCSV = false; |
391 | | }; |
392 | | |
393 | | struct RollupOutput { |
394 | | public: |
395 | 16.5M | RollupOutput() : toplevel_row_("TOTAL") {} |
396 | | RollupOutput(const RollupOutput&) = delete; |
397 | | RollupOutput& operator=(const RollupOutput&) = delete; |
398 | | |
399 | 411k | void AddDataSourceName(absl::string_view name) { |
400 | 411k | source_names_.emplace_back(std::string(name)); |
401 | 411k | } |
402 | | |
403 | 0 | const std::vector<std::string>& source_names() const { return source_names_; } |
404 | | void Print(const OutputOptions& options, std::ostream* out); |
405 | 0 | void SetDisassembly(absl::string_view disassembly) { |
406 | 0 | disassembly_ = std::string(disassembly); |
407 | 0 | } |
408 | | |
409 | 0 | absl::string_view GetDisassembly() { return disassembly_; } |
410 | | |
411 | | // For debugging. |
412 | 0 | const RollupRow& toplevel_row() const { return toplevel_row_; } |
413 | 0 | bool diff_mode() const { return diff_mode_; } |
414 | | |
415 | | private: |
416 | | friend class Rollup; |
417 | | |
418 | | std::vector<std::string> source_names_; |
419 | | RollupRow toplevel_row_; |
420 | | std::string disassembly_; |
421 | | |
422 | | // When we are in diff mode, rollup sizes are relative to the baseline. |
423 | | bool diff_mode_ = false; |
424 | | |
425 | | static bool IsSame(const std::string& a, const std::string& b); |
426 | | void PrettyPrint(const OutputOptions& options, std::ostream* out) const; |
427 | | void PrintToCSV(std::ostream* out, bool tabs, bool csvDiff) const; |
428 | | void PrettyPrintRow(const RollupRow& row, size_t indent, |
429 | | const OutputOptions& options, std::ostream* out) const; |
430 | | void PrettyPrintTree(const RollupRow& row, size_t indent, |
431 | | const OutputOptions& options, std::ostream* out) const; |
432 | | void PrintRowToCSV(const RollupRow& row, |
433 | | std::vector<std::string> parent_labels, |
434 | | std::ostream* out, bool tabs, bool csvDiff) const; |
435 | | void PrintTreeToCSV(const RollupRow& row, |
436 | | std::vector<std::string> parent_labels, |
437 | | std::ostream* out, bool tabs, bool csvDiff) const; |
438 | | }; |
439 | | |
440 | | bool ParseOptions(bool skip_unknown, int* argc, char** argv[], Options* options, |
441 | | OutputOptions* output_options, std::string* error); |
442 | | bool BloatyMain(const Options& options, const InputFileFactory& file_factory, |
443 | | RollupOutput* output, std::string* error); |
444 | | |
445 | | } // namespace bloaty |
446 | | |
447 | | #endif |