Coverage Report

Created: 2024-05-15 07:00

/src/bloaty/src/bloaty.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2016 Google Inc. All Rights Reserved.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#include <stddef.h>
16
17
// For some reason this isn't getting defined by zconf.h in 32-bit builds.
18
// It's very hard to figure out why. For the moment this seems to fix it,
19
// but ideally we'd have a better solution here.
20
typedef size_t z_size_t;
21
#include <assert.h>
22
#include <fcntl.h>
23
#include <limits.h>
24
#include <math.h>
25
#include <signal.h>
26
#include <stdlib.h>
27
#include <zlib.h>
28
29
#include <atomic>
30
#include <cmath>
31
#include <fstream>
32
#include <iostream>
33
#include <limits>
34
#include <map>
35
#include <memory>
36
#include <mutex>
37
#include <sstream>
38
#include <string>
39
#include <thread>
40
#include <unordered_map>
41
#include <vector>
42
#if !defined(_WIN32)
43
#include <sys/mman.h>
44
#include <sys/wait.h>
45
#include <unistd.h>
46
#else
47
#include <windows.h>
48
#endif
49
#include <sys/stat.h>
50
#include <sys/types.h>
51
52
#include "absl/debugging/internal/demangle.h"
53
#include "absl/memory/memory.h"
54
#include "absl/strings/numbers.h"
55
#include "absl/strings/str_join.h"
56
#include "absl/strings/string_view.h"
57
#include "absl/strings/substitute.h"
58
#include "bloaty.h"
59
#include "bloaty.pb.h"
60
#include "google/protobuf/io/zero_copy_stream_impl.h"
61
#include "google/protobuf/text_format.h"
62
#include "re.h"
63
#include "util.h"
64
65
using absl::string_view;
66
67
namespace bloaty {
68
69
// Use a global since we would have to plumb it through so many call-stacks
70
// otherwise.  We would make this thread_local but that's not supported on OS X
71
// right now.
72
int verbose_level = 0;
73
ShowDomain show = ShowDomain::kShowBoth;
74
75
struct DataSourceDefinition {
76
  DataSource number;
77
  const char* name;
78
  const char* description;
79
};
80
81
constexpr DataSourceDefinition data_sources[] = {
82
    {DataSource::kArchiveMembers, "armembers", "the .o files in a .a file"},
83
    {DataSource::kCompileUnits, "compileunits",
84
     "source file for the .o file (translation unit). requires debug info."},
85
    {DataSource::kInputFiles, "inputfiles",
86
     "the filename specified on the Bloaty command-line"},
87
    {DataSource::kInlines, "inlines",
88
     "source line/file where inlined code came from.  requires debug info."},
89
    {DataSource::kSections, "sections", "object file section"},
90
    {DataSource::kSegments, "segments", "load commands in the binary"},
91
    // We require that all symbols sources are >= kSymbols.
92
    {DataSource::kSymbols, "symbols",
93
     "symbols from symbol table (configure demangling with --demangle)"},
94
    {DataSource::kRawSymbols, "rawsymbols", "unmangled symbols"},
95
    {DataSource::kFullSymbols, "fullsymbols", "full demangled symbols"},
96
    {DataSource::kShortSymbols, "shortsymbols", "short demangled symbols"},
97
};
98
99
0
#define ARRAY_SIZE(array) (sizeof(array) / sizeof(array[0]))
100
101
0
const char* GetDataSourceLabel(DataSource source) {
102
0
  for (size_t i = 0; i < ARRAY_SIZE(data_sources); i++) {
103
0
    if (data_sources[i].number == source) {
104
0
      return data_sources[i].name;
105
0
    }
106
0
  }
107
0
  fprintf(stderr, "Unknown data source label: %d\n", static_cast<int>(source));
108
0
  exit(1);
109
0
  return nullptr;
110
0
}
111
112
0
int SignOf(long val) {
113
0
  if (val < 0) {
114
0
    return -1;
115
0
  } else if (val > 0) {
116
0
    return 1;
117
0
  } else {
118
0
    return 0;
119
0
  }
120
0
}
121
122
60.4M
void CheckedAdd(int64_t* accum, int64_t val) {
123
60.4M
#if ABSL_HAVE_BUILTIN(__builtin_add_overflow)
124
60.4M
  if (__builtin_add_overflow(*accum, val, accum)) {
125
2.34k
    THROW("integer overflow");
126
2.34k
  }
127
#else
128
  bool safe = *accum < 0
129
                  ? (val >= std::numeric_limits<int64_t>::max() - *accum)
130
                  : (val <= std::numeric_limits<int64_t>::max() - *accum);
131
  if (!safe) {
132
    THROW("integer overflow");
133
  }
134
  *accum += val;
135
#endif
136
60.4M
}
137
138
0
static std::string CSVEscape(string_view str) {
139
0
  bool need_escape = false;
140
141
0
  for (char ch : str) {
142
0
    if (ch == '"' || ch == ',') {
143
0
      need_escape = true;
144
0
      break;
145
0
    }
146
0
  }
147
148
0
  if (need_escape) {
149
0
    std::string ret = "\"";
150
0
    for (char ch : str) {
151
0
      if (ch == '"') {
152
0
        ret += "\"\"";
153
0
      } else {
154
0
        ret += ch;
155
0
      }
156
0
    }
157
0
    ret += "\"";
158
0
    return ret;
159
0
  } else {
160
0
    return std::string(str);
161
0
  }
162
0
}
163
164
extern "C" char* __cxa_demangle(const char* mangled_name, char* buf, size_t* n,
165
                                int* status);
166
167
7.03M
std::string ItaniumDemangle(string_view symbol, DataSource source) {
168
7.03M
  if (source != DataSource::kShortSymbols &&
169
7.03M
      source != DataSource::kFullSymbols) {
170
    // No demangling.
171
3.34M
    return std::string(symbol);
172
3.34M
  }
173
174
3.68M
  string_view demangle_from = symbol;
175
3.68M
  if (absl::StartsWith(demangle_from, "__Z")) {
176
18.8k
    demangle_from.remove_prefix(1);
177
18.8k
  }
178
179
3.68M
  if (source == DataSource::kShortSymbols) {
180
3.68M
    char demangled[1024];
181
3.68M
    if (absl::debugging_internal::Demangle(demangle_from.data(), demangled,
182
3.68M
                                           sizeof(demangled))) {
183
24.8k
      return std::string(demangled);
184
3.65M
    } else {
185
3.65M
      return std::string(symbol);
186
3.65M
    }
187
3.68M
  } else if (source == DataSource::kFullSymbols) {
188
0
    char* demangled = __cxa_demangle(demangle_from.data(), NULL, NULL, NULL);
189
0
    if (demangled) {
190
0
      std::string ret(demangled);
191
0
      free(demangled);
192
0
      return ret;
193
0
    } else {
194
0
      return std::string(symbol);
195
0
    }
196
0
  } else {
197
0
    printf("Unexpected source: %d\n", (int)source);
198
0
    BLOATY_UNREACHABLE();
199
0
  }
200
3.68M
}
201
202
// NameMunger //////////////////////////////////////////////////////////////////
203
204
void NameMunger::AddRegex(const std::string& regex,
205
0
                          const std::string& replacement) {
206
0
  auto reg = absl::make_unique<ReImpl>(regex);
207
0
  regexes_.push_back(std::make_pair(std::move(reg), replacement));
208
0
}
209
210
103M
std::string NameMunger::Munge(string_view name) const {
211
103M
  std::string name_str(name);
212
103M
  std::string ret(name);
213
214
103M
  for (const auto& pair : regexes_) {
215
0
    if (ReImpl::Extract(name_str, *pair.first, pair.second, &ret)) {
216
0
      return ret;
217
0
    }
218
0
  }
219
220
103M
  return name_str;
221
103M
}
222
223
// Rollup //////////////////////////////////////////////////////////////////////
224
225
// A Rollup is a hierarchical tally of sizes.  Its graphical representation is
226
// something like this:
227
//
228
//  93.3%  93.3%   3.02M Unmapped
229
//      38.2%  38.2%   1.16M .debug_info
230
//      23.9%  62.1%    740k .debug_str
231
//      12.1%  74.2%    374k .debug_pubnames
232
//      11.7%  86.0%    363k .debug_loc
233
//       8.9%  94.9%    275k [Other]
234
//       5.1% 100.0%    158k .debug_ranges
235
//   6.7% 100.0%    222k LOAD [R E]
236
//      61.0%  61.0%    135k .text
237
//      21.4%  82.3%   47.5k .rodata
238
//       6.2%  88.5%   13.8k .gcc_except_table
239
//       5.9%  94.4%   13.2k .eh_frame
240
//       5.6% 100.0%   12.4k [Other]
241
//   0.0% 100.0%   1.40k [Other]
242
// 100.0%   3.24M TOTAL
243
//
244
// Rollup is the generic data structure, before we apply output massaging like
245
// collapsing excess elements into "[Other]" or sorting.
246
247
std::string others_label = "[Other]";
248
249
class Rollup {
250
 public:
251
7.50M
  Rollup() {}
252
  Rollup(const Rollup&) = delete;
253
  Rollup& operator=(const Rollup&) = delete;
254
255
  Rollup(Rollup&& other) = default;
256
834k
  Rollup& operator=(Rollup&& other) = default;
257
258
  void AddSizes(const std::vector<std::string>& names, uint64_t size,
259
27.4M
                bool is_vmsize) {
260
    // We start at 1 to exclude the base map (see base_map_).
261
27.4M
    AddInternal(names, 1, size, is_vmsize);
262
27.4M
  }
263
264
  // Prints a graphical representation of the rollup.
265
320k
  void CreateRollupOutput(const Options& options, RollupOutput* output) const {
266
320k
    CreateDiffModeRollupOutput(nullptr, options, output);
267
320k
    output->diff_mode_ = false;
268
320k
  }
269
270
  void CreateDiffModeRollupOutput(Rollup* base, const Options& options,
271
320k
                                  RollupOutput* output) const {
272
320k
    RollupRow* row = &output->toplevel_row_;
273
320k
    row->size.vm = vm_total_;
274
320k
    row->size.file = file_total_;
275
320k
    row->filtered_size.vm = filtered_vm_total_;
276
320k
    row->filtered_size.file = filtered_file_total_;
277
320k
    row->vmpercent = 100;
278
320k
    row->filepercent = 100;
279
320k
    output->diff_mode_ = true;
280
320k
    CreateRows(row, base, options, true);
281
320k
  }
282
283
834k
  void SetFilterRegex(const ReImpl* regex) { filter_regex_ = regex; }
284
285
  // Add the values in "other" from this.
286
0
  void Add(const Rollup& other) {
287
0
    vm_total_ += other.vm_total_;
288
0
    file_total_ += other.file_total_;
289
290
0
    for (const auto& other_child : other.children_) {
291
0
      auto& child = children_[other_child.first];
292
0
      if (child.get() == NULL) {
293
0
        child.reset(new Rollup());
294
0
      }
295
0
      child->Add(*other_child.second);
296
0
    }
297
0
  }
298
299
  // Create entries for all children which exist in "other" but not in this.
300
0
  void AddEntriesFrom(const Rollup& other) {
301
0
    for (const auto& other_child : other.children_) {
302
0
      auto& child = children_[other_child.first];
303
0
      if (child.get() == NULL) {
304
0
        child.reset(new Rollup());
305
0
      }
306
0
      child->AddEntriesFrom(*other_child.second);
307
0
    }
308
0
  }
309
310
1.15M
  int64_t file_total() const { return file_total_; }
311
1.15M
  int64_t filtered_file_total() const { return filtered_file_total_; }
312
313
 private:
314
  int64_t vm_total_ = 0;
315
  int64_t file_total_ = 0;
316
  int64_t filtered_vm_total_ = 0;
317
  int64_t filtered_file_total_ = 0;
318
319
  const ReImpl* filter_regex_ = nullptr;
320
321
  // Putting Rollup by value seems to work on some compilers/libs but not
322
  // others.
323
  typedef std::unordered_map<std::string, std::unique_ptr<Rollup>> ChildMap;
324
  ChildMap children_;
325
  static Rollup* empty_;
326
327
0
  static Rollup* GetEmpty() {
328
0
    if (!empty_) {
329
0
      empty_ = new Rollup();
330
0
    }
331
0
    return empty_;
332
0
  }
333
334
  // Adds "size" bytes to the rollup under the label names[i].
335
  // If there are more entries names[i+1, i+2, etc] add them to sub-rollups.
336
  void AddInternal(const std::vector<std::string>& names, size_t i,
337
54.9M
                   uint64_t size, bool is_vmsize) {
338
54.9M
    if (filter_regex_ != nullptr) {
339
      // filter_regex_ is only set in the root rollup, which checks the full
340
      // label hierarchy for a match to determine whether a region should be
341
      // considered.
342
0
      bool any_matched = false;
343
344
0
      for (const auto& name : names) {
345
0
        if (ReImpl::PartialMatch(name, *filter_regex_)) {
346
0
          any_matched = true;
347
0
          break;
348
0
        }
349
0
      }
350
351
0
      if (!any_matched) {
352
        // Ignore this region in the rollup and don't visit sub-rollups.
353
0
        if (is_vmsize) {
354
0
          CheckedAdd(&filtered_vm_total_, size);
355
0
        } else {
356
0
          CheckedAdd(&filtered_file_total_, size);
357
0
        }
358
0
        return;
359
0
      }
360
0
    }
361
362
54.9M
    if (is_vmsize) {
363
32.4M
      CheckedAdd(&vm_total_, size);
364
32.4M
    } else {
365
22.5M
      CheckedAdd(&file_total_, size);
366
22.5M
    }
367
368
54.9M
    if (i < names.size()) {
369
27.4M
      auto& child = children_[names[i]];
370
27.4M
      if (child.get() == nullptr) {
371
5.19M
        child.reset(new Rollup());
372
5.19M
      }
373
27.4M
      child->AddInternal(names, i + 1, size, is_vmsize);
374
27.4M
    }
375
54.9M
  }
376
377
5.16M
  static double Percent(int64_t part, int64_t whole) {
378
5.16M
    if (whole == 0) {
379
181k
      if (part == 0) {
380
181k
        return NAN;
381
181k
      } else if (part > 0) {
382
0
        return INFINITY;
383
0
      } else {
384
0
        return -INFINITY;
385
0
      }
386
4.98M
    } else {
387
4.98M
      return static_cast<double>(part) / static_cast<double>(whole) * 100;
388
4.98M
    }
389
5.16M
  }
390
391
  void CreateRows(RollupRow* row, const Rollup* base, const Options& options,
392
                  bool is_toplevel) const;
393
  void SortAndAggregateRows(RollupRow* row, const Rollup* base,
394
                            const Options& options, bool is_toplevel) const;
395
};
396
397
void Rollup::CreateRows(RollupRow* row, const Rollup* base,
398
2.90M
                        const Options& options, bool is_toplevel) const {
399
2.90M
  if (base) {
400
    // For a diff, the percentage is a comparison against the previous size of
401
    // the same label at the same level.
402
0
    row->vmpercent = Percent(vm_total_, base->vm_total_);
403
0
    row->filepercent = Percent(file_total_, base->file_total_);
404
0
  }
405
406
5.15M
  for (const auto& value : children_) {
407
5.15M
    int64_t vm_total = value.second->vm_total_;
408
5.15M
    int64_t file_total = value.second->file_total_;
409
5.15M
    Rollup* base_child = nullptr;
410
411
5.15M
    if (base) {
412
      // Reassign sizes to base during a diff to compare to target sizes.
413
0
      auto it = base->children_.find(value.first);
414
0
      if (it != base->children_.end()) {
415
0
        base_child = it->second.get();
416
0
        vm_total -= base_child->vm_total_;
417
0
        file_total -= base_child->file_total_;
418
0
      }
419
0
    }
420
421
5.15M
    if (vm_total != 0 || file_total != 0) {
422
5.15M
      row->sorted_children.emplace_back(value.first);
423
5.15M
      RollupRow& child_row = row->sorted_children.back();
424
5.15M
      child_row.size.vm = vm_total;
425
5.15M
      child_row.size.file = file_total;
426
427
      // Preserve the old size for this label in the RollupRow output.
428
      // If there is a diff base, the old sizes come from the size of the label
429
      // in that base.  Otherwise, the old size stays 0.
430
5.15M
      if (base_child) {
431
0
        child_row.old_size.vm = base_child->vm_total_;
432
0
        child_row.old_size.file = base_child->file_total_;
433
0
      }
434
5.15M
    }
435
5.15M
  }
436
437
2.90M
  SortAndAggregateRows(row, base, options, is_toplevel);
438
2.90M
}
439
440
Rollup* Rollup::empty_;
441
442
void Rollup::SortAndAggregateRows(RollupRow* row, const Rollup* base,
443
                                  const Options& options,
444
2.90M
                                  bool is_toplevel) const {
445
2.90M
  std::vector<RollupRow>& child_rows = row->sorted_children;
446
447
  // We don't want to output a solitary "[None]" or "[Unmapped]" row except at
448
  // the top level.
449
2.90M
  if (!is_toplevel && child_rows.size() == 1 &&
450
2.90M
      (child_rows[0].name == "[None]" || child_rows[0].name == "[Unmapped]")) {
451
0
    child_rows.clear();
452
0
  }
453
454
  // We don't want to output a single row that has exactly the same size and
455
  // label as the parent.
456
2.90M
  if (child_rows.size() == 1 && child_rows[0].name == row->name) {
457
0
    child_rows.clear();
458
0
  }
459
460
2.90M
  if (child_rows.empty()) {
461
2.58M
    return;
462
2.58M
  }
463
464
  // First sort by magnitude.
465
5.15M
  for (auto& child : child_rows) {
466
5.15M
    switch (options.sort_by()) {
467
0
      case Options::SORTBY_VMSIZE:
468
0
        child.sortkey = std::abs(child.size.vm);
469
0
        break;
470
0
      case Options::SORTBY_FILESIZE:
471
0
        child.sortkey = std::abs(child.size.file);
472
0
        break;
473
5.15M
      case Options::SORTBY_BOTH:
474
5.15M
        child.sortkey =
475
5.15M
            std::max(std::abs(child.size.vm), std::abs(child.size.file));
476
5.15M
        break;
477
0
      default:
478
0
        BLOATY_UNREACHABLE();
479
5.15M
    }
480
5.15M
  }
481
482
320k
  std::sort(child_rows.begin(), child_rows.end(), &RollupRow::Compare);
483
484
320k
  RollupRow others_row(others_label);
485
320k
  others_row.other_count = child_rows.size() - options.max_rows_per_level();
486
320k
  others_row.name = absl::Substitute("[$0 Others]", others_row.other_count);
487
320k
  Rollup others_rollup;
488
320k
  Rollup others_base;
489
490
  // Filter out everything but the top 'row_limit'.  Add rows that were filtered
491
  // out to "others_row".
492
320k
  size_t i = child_rows.size() - 1;
493
2.97M
  while (i >= options.max_rows_per_level()) {
494
2.65M
    CheckedAdd(&others_row.size.vm, child_rows[i].size.vm);
495
2.65M
    CheckedAdd(&others_row.size.file, child_rows[i].size.file);
496
2.65M
    if (base) {
497
0
      auto it = base->children_.find(child_rows[i].name);
498
0
      if (it != base->children_.end()) {
499
0
        CheckedAdd(&others_base.vm_total_, it->second->vm_total_);
500
0
        CheckedAdd(&others_base.file_total_, it->second->file_total_);
501
0
      }
502
0
    }
503
504
2.65M
    child_rows.erase(child_rows.end() - 1);
505
2.65M
    i--;
506
2.65M
  }
507
508
320k
  if (std::abs(others_row.size.vm) > 0 || std::abs(others_row.size.file) > 0) {
509
77.8k
    child_rows.push_back(others_row);
510
77.8k
    CheckedAdd(&others_rollup.vm_total_, others_row.size.vm);
511
77.8k
    CheckedAdd(&others_rollup.file_total_, others_row.size.file);
512
77.8k
  }
513
514
  // Now sort by actual value (positive or negative).
515
2.58M
  for (auto& child : child_rows) {
516
2.58M
    switch (options.sort_by()) {
517
0
      case Options::SORTBY_VMSIZE:
518
0
        child.sortkey = child.size.vm;
519
0
        break;
520
0
      case Options::SORTBY_FILESIZE:
521
0
        child.sortkey = child.size.file;
522
0
        break;
523
2.58M
      case Options::SORTBY_BOTH:
524
2.58M
        if (std::abs(child.size.vm) > std::abs(child.size.file)) {
525
224k
          child.sortkey = child.size.vm;
526
2.35M
        } else {
527
2.35M
          child.sortkey = child.size.file;
528
2.35M
        }
529
2.58M
        break;
530
0
      default:
531
0
        BLOATY_UNREACHABLE();
532
2.58M
    }
533
2.58M
  }
534
535
320k
  std::sort(child_rows.begin(), child_rows.end(), &RollupRow::Compare);
536
537
  // For a non-diff, the percentage is compared to the total size of the parent.
538
320k
  if (!base) {
539
2.58M
    for (auto& child_row : child_rows) {
540
2.58M
      child_row.vmpercent = Percent(child_row.size.vm, row->size.vm);
541
2.58M
      child_row.filepercent = Percent(child_row.size.file, row->size.file);
542
2.58M
    }
543
320k
  }
544
545
  // Recurse into sub-rows, (except "Other", which isn't a real row).
546
2.58M
  for (auto& child_row : child_rows) {
547
2.58M
    const Rollup* child_rollup;
548
2.58M
    const Rollup* child_base = nullptr;
549
550
2.58M
    if (child_row.other_count > 0) {
551
77.8k
      child_rollup = &others_rollup;
552
77.8k
      if (base) {
553
0
        child_base = &others_base;
554
0
      }
555
2.50M
    } else {
556
2.50M
      auto it = children_.find(child_row.name);
557
2.50M
      if (it == children_.end()) {
558
0
        THROWF("internal error, couldn't find name $0", child_row.name);
559
0
      }
560
2.50M
      child_rollup = it->second.get();
561
2.50M
      assert(child_rollup);
562
563
2.50M
      if (base) {
564
0
        auto it = base->children_.find(child_row.name);
565
0
        if (it == base->children_.end()) {
566
0
          child_base = GetEmpty();
567
0
        } else {
568
0
          child_base = it->second.get();
569
0
        }
570
0
      }
571
2.50M
    }
572
573
2.58M
    child_rollup->CreateRows(&child_row, child_base, options, false);
574
2.58M
  }
575
320k
}
576
577
// RollupOutput ////////////////////////////////////////////////////////////////
578
579
// RollupOutput represents rollup data after we have applied output massaging
580
// like collapsing excess rows into "[Other]" and sorted the output.  Once the
581
// data is in this format, we can print it to the screen (or verify the output
582
// in unit tests).
583
584
namespace {
585
586
0
std::string FixedWidthString(const std::string& input, size_t size) {
587
0
  if (input.size() < size) {
588
0
    std::string ret = input;
589
0
    while (ret.size() < size) {
590
0
      ret += " ";
591
0
    }
592
0
    return ret;
593
0
  } else {
594
0
    return input.substr(0, size);
595
0
  }
596
0
}
597
598
0
bool ShowFile(const OutputOptions& options) {
599
0
  return options.show != ShowDomain::kShowVM;
600
0
}
601
602
0
bool ShowVM(const OutputOptions& options) {
603
0
  return options.show != ShowDomain::kShowFile;
604
0
}
605
606
0
std::string LeftPad(const std::string& input, size_t size) {
607
0
  std::string ret = input;
608
0
  while (ret.size() < size) {
609
0
    ret = " " + ret;
610
0
  }
611
612
0
  return ret;
613
0
}
614
615
0
std::string DoubleStringPrintf(const char* fmt, double d) {
616
0
  char buf[1024];
617
0
  snprintf(buf, sizeof(buf), fmt, d);
618
0
  return std::string(buf);
619
0
}
620
621
0
std::string SiPrint(int64_t size, bool force_sign) {
622
0
  const char* prefixes[] = {"", "Ki", "Mi", "Gi", "Ti"};
623
0
  size_t num_prefixes = 5;
624
0
  size_t n = 0;
625
0
  double size_d = size;
626
0
  while (fabs(size_d) > 1024 && n < num_prefixes - 2) {
627
0
    size_d /= 1024;
628
0
    n++;
629
0
  }
630
631
0
  std::string ret;
632
633
0
  if (fabs(size_d) > 100 || n == 0) {
634
0
    ret = std::to_string(static_cast<int64_t>(size_d)) + prefixes[n];
635
0
    if (force_sign && size > 0) {
636
0
      ret = "+" + ret;
637
0
    }
638
0
  } else if (fabs(size_d) > 10) {
639
0
    if (force_sign) {
640
0
      ret = DoubleStringPrintf("%+0.1f", size_d) + prefixes[n];
641
0
    } else {
642
0
      ret = DoubleStringPrintf("%0.1f", size_d) + prefixes[n];
643
0
    }
644
0
  } else {
645
0
    if (force_sign) {
646
0
      ret = DoubleStringPrintf("%+0.2f", size_d) + prefixes[n];
647
0
    } else {
648
0
      ret = DoubleStringPrintf("%0.2f", size_d) + prefixes[n];
649
0
    }
650
0
  }
651
652
0
  return LeftPad(ret, 7);
653
0
}
654
655
0
std::string PercentString(double percent, bool diff_mode) {
656
0
  if (diff_mode) {
657
0
    if (percent == 0 || std::isnan(percent)) {
658
0
      return " [ = ]";
659
0
    } else if (percent == -100) {
660
0
      return " [DEL]";
661
0
    } else if (std::isinf(percent)) {
662
0
      return " [NEW]";
663
0
    } else {
664
      // We want to keep this fixed-width even if the percent is very large.
665
0
      std::string str;
666
0
      if (percent > 1000) {
667
0
        int digits = log10(percent) - 1;
668
0
        str = DoubleStringPrintf("%+2.0f", percent / pow(10, digits)) + "e" +
669
0
              std::to_string(digits) + "%";
670
0
      } else if (percent > 10) {
671
0
        str = DoubleStringPrintf("%+4.0f%%", percent);
672
0
      } else {
673
0
        str = DoubleStringPrintf("%+5.1F%%", percent);
674
0
      }
675
676
0
      return LeftPad(str, 6);
677
0
    }
678
0
  } else {
679
0
    return DoubleStringPrintf("%5.1F%%", percent);
680
0
  }
681
0
}
682
683
}  // namespace
684
685
0
void RollupOutput::Print(const OutputOptions& options, std::ostream* out) {
686
0
  if (!source_names_.empty()) {
687
0
    switch (options.output_format) {
688
0
      case bloaty::OutputFormat::kPrettyPrint:
689
0
        PrettyPrint(options, out);
690
0
        break;
691
0
      case bloaty::OutputFormat::kCSV:
692
0
        PrintToCSV(out, /*tabs=*/false, options.showAllSizesCSV);
693
0
        break;
694
0
      case bloaty::OutputFormat::kTSV:
695
0
        PrintToCSV(out, /*tabs=*/true, options.showAllSizesCSV);
696
0
        break;
697
0
      default:
698
0
        BLOATY_UNREACHABLE();
699
0
    }
700
0
  }
701
702
0
  if (!disassembly_.empty()) {
703
0
    *out << disassembly_;
704
0
  }
705
0
}
706
707
void RollupOutput::PrettyPrintRow(const RollupRow& row, size_t indent,
708
                                  const OutputOptions& options,
709
0
                                  std::ostream* out) const {
710
0
  if (&row != &toplevel_row_) {
711
    // Avoid printing this row if it is only zero.
712
    // This can happen when using --domain if the row is zero for this domain.
713
0
    if ((!ShowFile(options) && row.size.vm == 0) ||
714
0
        (!ShowVM(options) && row.size.file == 0)) {
715
0
      return;
716
0
    }
717
0
  }
718
719
0
  *out << FixedWidthString("", indent) << " ";
720
721
0
  if (ShowFile(options)) {
722
0
    *out << PercentString(row.filepercent, diff_mode_) << " "
723
0
         << SiPrint(row.size.file, diff_mode_) << " ";
724
0
  }
725
726
0
  if (ShowVM(options)) {
727
0
    *out << PercentString(row.vmpercent, diff_mode_) << " "
728
0
         << SiPrint(row.size.vm, diff_mode_) << " ";
729
0
  }
730
731
0
  *out << "   " << row.name << "\n";
732
0
}
733
734
0
bool RollupOutput::IsSame(const std::string& a, const std::string& b) {
735
0
  if (a == b) {
736
0
    return true;
737
0
  }
738
739
0
  if (absl::EndsWith(b, a + "]") || absl::EndsWith(a, b + "]")) {
740
0
    return true;
741
0
  }
742
743
0
  return false;
744
0
}
745
746
void RollupOutput::PrettyPrintTree(const RollupRow& row, size_t indent,
747
                                   const OutputOptions& options,
748
0
                                   std::ostream* out) const {
749
  // Rows are printed before their sub-rows.
750
0
  PrettyPrintRow(row, indent, options, out);
751
752
0
  if (!row.size.vm && !row.size.file) {
753
0
    return;
754
0
  }
755
756
0
  if (row.sorted_children.size() == 1 &&
757
0
      row.sorted_children[0].sorted_children.size() == 0 &&
758
0
      IsSame(row.name, row.sorted_children[0].name)) {
759
0
    return;
760
0
  }
761
762
0
  for (const auto& child : row.sorted_children) {
763
0
    PrettyPrintTree(child, indent + 2, options, out);
764
0
  }
765
0
}
766
767
void RollupOutput::PrettyPrint(const OutputOptions& options,
768
0
                               std::ostream* out) const {
769
0
  if (ShowFile(options)) {
770
0
    *out << "    FILE SIZE   ";
771
0
  }
772
773
0
  if (ShowVM(options)) {
774
0
    *out << "     VM SIZE    ";
775
0
  }
776
777
0
  *out << "\n";
778
779
0
  if (ShowFile(options)) {
780
0
    *out << " -------------- ";
781
0
  }
782
783
0
  if (ShowVM(options)) {
784
0
    *out << " -------------- ";
785
0
  }
786
787
0
  *out << "\n";
788
789
0
  for (const auto& child : toplevel_row_.sorted_children) {
790
0
    PrettyPrintTree(child, 0, options, out);
791
0
  }
792
793
  // The "TOTAL" row comes after all other rows.
794
0
  PrettyPrintRow(toplevel_row_, 0, options, out);
795
796
0
  uint64_t file_filtered = 0;
797
0
  uint64_t vm_filtered = 0;
798
0
  uint64_t filtered = 0;
799
0
  if (ShowFile(options)) {
800
0
    filtered += toplevel_row_.filtered_size.file;
801
0
  }
802
0
  if (ShowVM(options)) {
803
0
    filtered += toplevel_row_.filtered_size.vm;
804
0
  }
805
806
0
  if (vm_filtered == 0 && file_filtered == 0) {
807
0
    return;
808
0
  }
809
810
0
  *out << "Filtering enabled (source_filter); omitted";
811
812
0
  if (file_filtered > 0 && vm_filtered > 0) {
813
0
    *out << " file =" << SiPrint(file_filtered, /*force_sign=*/false)
814
0
         << ", vm =" << SiPrint(vm_filtered, /*force_sign=*/false);
815
0
  } else if (file_filtered > 0) {
816
0
    *out << SiPrint(file_filtered, /*force_sign=*/false);
817
0
  } else {
818
0
    *out << SiPrint(vm_filtered, /*force_sign=*/false);
819
0
  }
820
821
0
  *out << " of entries\n";
822
0
}
823
824
void RollupOutput::PrintRowToCSV(const RollupRow& row,
825
                                 std::vector<std::string> parent_labels,
826
0
                                 std::ostream* out, bool tabs, bool csvDiff) const {
827
0
  while (parent_labels.size() < source_names_.size()) {
828
    // If this label had no data at this level, append an empty string.
829
0
    parent_labels.push_back("");
830
0
  }
831
832
0
  parent_labels.push_back(std::to_string(row.size.vm));
833
0
  parent_labels.push_back(std::to_string(row.size.file));
834
835
  // If in diff where both old size are 0, get new size by adding diff size to
836
  // old size.
837
0
  if (csvDiff) {
838
0
    parent_labels.push_back(std::to_string(row.old_size.vm));
839
0
    parent_labels.push_back(std::to_string(row.old_size.file));
840
0
    parent_labels.push_back(std::to_string(row.old_size.vm + (row.size.vm)));
841
0
    parent_labels.push_back(
842
0
        std::to_string(row.old_size.file + (row.size.file)));}
843
844
0
  std::string sep = tabs ? "\t" : ",";
845
0
  *out << absl::StrJoin(parent_labels, sep) << "\n";
846
0
}
847
848
void RollupOutput::PrintTreeToCSV(const RollupRow& row,
849
                                  std::vector<std::string> parent_labels,
850
0
                                  std::ostream* out, bool tabs, bool csvDiff) const {
851
0
  if (tabs) {
852
0
    parent_labels.push_back(row.name);
853
0
  } else {
854
0
    parent_labels.push_back(CSVEscape(row.name));
855
0
  }
856
857
0
  if (row.sorted_children.size() > 0) {
858
0
    for (const auto& child_row : row.sorted_children) {
859
0
      PrintTreeToCSV(child_row, parent_labels, out, tabs, csvDiff);
860
0
    }
861
0
  } else {
862
0
    PrintRowToCSV(row, parent_labels, out, tabs, csvDiff);
863
0
  }
864
0
}
865
866
void RollupOutput::PrintToCSV(std::ostream* out, bool tabs,
867
0
                              bool csvDiff) const {
868
0
  std::vector<std::string> names(source_names_);
869
0
  names.push_back("vmsize");
870
0
  names.push_back("filesize");
871
0
  if (csvDiff) {
872
0
    names.push_back("original_vmsize");
873
0
    names.push_back("original_filesize");
874
0
    names.push_back("current_vmsize");
875
0
    names.push_back("current_filesize");
876
0
  }
877
0
  std::string sep = tabs ? "\t" : ",";
878
0
  *out << absl::StrJoin(names, sep) << "\n";
879
0
  for (const auto& child_row : toplevel_row_.sorted_children) {
880
0
    PrintTreeToCSV(child_row, std::vector<std::string>(), out, tabs, csvDiff);
881
0
  }
882
0
}
883
884
// RangeMap ////////////////////////////////////////////////////////////////////
885
886
constexpr uint64_t RangeSink::kUnknownSize;
887
888
// MmapInputFile ///////////////////////////////////////////////////////////////
889
890
#if !defined(_WIN32)
891
class MmapInputFile : public InputFile {
892
 public:
893
  MmapInputFile(string_view filename, string_view data);
894
  MmapInputFile(const MmapInputFile&) = delete;
895
  MmapInputFile& operator=(const MmapInputFile&) = delete;
896
  ~MmapInputFile() override;
897
898
  bool TryOpen(absl::string_view filename,
899
0
               std::unique_ptr<InputFile>& file) override {
900
0
    return DoTryOpen(filename, file);
901
0
  }
902
  static bool DoTryOpen(absl::string_view filename,
903
                        std::unique_ptr<InputFile>& file);
904
};
905
906
class FileDescriptor {
907
 public:
908
0
  FileDescriptor(int fd) : fd_(fd) {}
909
910
0
  ~FileDescriptor() {
911
0
    if (fd_ >= 0 && close(fd_) < 0) {
912
0
      fprintf(stderr, "bloaty: error calling close(): %s\n", strerror(errno));
913
0
    }
914
0
  }
915
916
0
  int fd() { return fd_; }
917
918
 private:
919
  int fd_;
920
};
921
922
bool MmapInputFile::DoTryOpen(absl::string_view filename,
923
0
                              std::unique_ptr<InputFile>& file) {
924
0
  std::string str(filename);
925
0
  FileDescriptor fd(open(str.c_str(), O_RDONLY));
926
0
  struct stat buf;
927
0
  const char* map;
928
929
0
  if (fd.fd() < 0) {
930
0
    std::cerr << absl::Substitute("couldn't open file '$0': $1\n", filename,
931
0
                                  strerror(errno));
932
0
    return false;
933
0
  }
934
935
0
  if (fstat(fd.fd(), &buf) < 0) {
936
0
    std::cerr << absl::Substitute("couldn't stat file '$0': $1\n", filename,
937
0
                                  strerror(errno));
938
0
    return false;
939
0
  }
940
941
0
  map = static_cast<char*>(
942
0
      mmap(nullptr, buf.st_size, PROT_READ, MAP_SHARED, fd.fd(), 0));
943
944
0
  if (map == MAP_FAILED) {
945
0
    std::cerr << absl::Substitute("couldn't mmap file '$0': $1", filename,
946
0
                                  strerror(errno));
947
0
    return false;
948
0
  }
949
950
0
  file.reset(new MmapInputFile(filename, string_view(map, buf.st_size)));
951
0
  return true;
952
0
}
953
954
MmapInputFile::MmapInputFile(string_view filename, string_view data)
955
0
    : InputFile(filename) {
956
0
  data_ = data;
957
0
}
958
959
0
MmapInputFile::~MmapInputFile() {
960
0
  if (data_.data() != nullptr &&
961
0
      munmap(const_cast<char*>(data_.data()), data_.size()) != 0) {
962
0
    fprintf(stderr, "bloaty: error calling munmap(): %s\n", strerror(errno));
963
0
  }
964
0
}
965
966
std::unique_ptr<InputFile> MmapInputFileFactory::OpenFile(
967
0
    const std::string& filename) const {
968
0
  std::unique_ptr<InputFile> ret;
969
0
  if (!MmapInputFile::DoTryOpen(filename, ret)) {
970
0
    THROW("Failed to open file.");
971
0
  }
972
0
  return ret;
973
0
}
974
975
#else  // !_WIN32
976
977
// MmapInputFile ///////////////////////////////////////////////////////////////
978
979
class Win32MMapInputFile : public InputFile {
980
 public:
981
  Win32MMapInputFile(string_view filename, string_view data);
982
  Win32MMapInputFile(const Win32MMapInputFile&) = delete;
983
  Win32MMapInputFile& operator=(const Win32MMapInputFile&) = delete;
984
  ~Win32MMapInputFile() override;
985
986
  bool TryOpen(absl::string_view filename,
987
               std::unique_ptr<InputFile>& file) override {
988
    return DoTryOpen(filename, file);
989
  }
990
  static bool DoTryOpen(absl::string_view filename,
991
                        std::unique_ptr<InputFile>& file);
992
};
993
994
class Win32Handle {
995
 public:
996
  Win32Handle(HANDLE h) : h_(h) {}
997
998
  ~Win32Handle() {
999
    if (h_ && h_ != INVALID_HANDLE_VALUE && !CloseHandle(h_)) {
1000
      fprintf(stderr, "bloaty: error calling CloseHandle(): %d\n",
1001
              GetLastError());
1002
    }
1003
  }
1004
1005
  HANDLE h() { return h_; }
1006
1007
 private:
1008
  HANDLE h_;
1009
};
1010
1011
Win32MMapInputFile::Win32MMapInputFile(string_view filename, string_view data)
1012
    : InputFile(filename) {
1013
  data_ = data;
1014
}
1015
1016
bool Win32MMapInputFile::DoTryOpen(absl::string_view filename,
1017
                                   std::unique_ptr<InputFile>& file) {
1018
  std::string str(filename);
1019
  Win32Handle fd(::CreateFileA(str.c_str(), FILE_GENERIC_READ, FILE_SHARE_READ,
1020
                               NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,
1021
                               NULL));
1022
  LARGE_INTEGER li = {};
1023
  const char* map;
1024
1025
  if (fd.h() == INVALID_HANDLE_VALUE) {
1026
    std::cerr << absl::Substitute("couldn't open file '$0': $1", filename,
1027
                                  ::GetLastError());
1028
    return false;
1029
  }
1030
1031
  if (!::GetFileSizeEx(fd.h(), &li)) {
1032
    std::cerr << absl::Substitute("couldn't stat file '$0': $1", filename,
1033
                                  ::GetLastError());
1034
    return false;
1035
  }
1036
1037
  Win32Handle mapfd(
1038
      ::CreateFileMappingA(fd.h(), NULL, PAGE_READONLY, 0, 0, nullptr));
1039
  if (!mapfd.h()) {
1040
    std::cerr << absl::Substitute("couldn't create file mapping '$0': $1",
1041
                                  filename, ::GetLastError());
1042
    return false;
1043
  }
1044
1045
  map = static_cast<char*>(::MapViewOfFile(mapfd.h(), FILE_MAP_READ, 0, 0, 0));
1046
  if (!map) {
1047
    std::cerr << absl::Substitute("couldn't MapViewOfFile file '$0': $1",
1048
                                  filename, ::GetLastError());
1049
    return false;
1050
  }
1051
1052
  file.reset(new Win32MMapInputFile(filename, string_view(map, li.QuadPart)));
1053
  return true;
1054
}
1055
1056
Win32MMapInputFile::~Win32MMapInputFile() {
1057
  if (data_.data() != nullptr && !::UnmapViewOfFile(data_.data())) {
1058
    fprintf(stderr, "bloaty: error calling UnmapViewOfFile(): %d\n",
1059
            ::GetLastError());
1060
  }
1061
}
1062
1063
std::unique_ptr<InputFile> MmapInputFileFactory::OpenFile(
1064
    const std::string& filename) const {
1065
  std::unique_ptr<InputFile> ret;
1066
  if (!Win32MMapInputFile::DoTryOpen(filename, ret)) {
1067
    THROW("Failed to open file.");
1068
  }
1069
  return ret;
1070
}
1071
1072
#endif
1073
1074
// RangeSink ///////////////////////////////////////////////////////////////////
1075
1076
RangeSink::RangeSink(const InputFile* file, const Options& options,
1077
                     DataSource data_source, const DualMap* translator,
1078
                     google::protobuf::Arena* arena)
1079
    : file_(file),
1080
      options_(options),
1081
      data_source_(data_source),
1082
      translator_(translator),
1083
1.74M
      arena_(arena) {}
1084
1085
1.74M
RangeSink::~RangeSink() {}
1086
1087
uint64_t debug_vmaddr = -1;
1088
uint64_t debug_fileoff = -1;
1089
1090
97.9M
bool RangeSink::ContainsVerboseVMAddr(uint64_t vmaddr, uint64_t vmsize) {
1091
97.9M
  return options_.verbose_level() > 1 ||
1092
97.9M
         (options_.has_debug_vmaddr() && options_.debug_vmaddr() >= vmaddr &&
1093
97.9M
          options_.debug_vmaddr() < (vmaddr + vmsize));
1094
97.9M
}
1095
1096
134M
bool RangeSink::ContainsVerboseFileOffset(uint64_t fileoff, uint64_t filesize) {
1097
134M
  return options_.verbose_level() > 1 ||
1098
134M
         (options_.has_debug_fileoff() && options_.debug_fileoff() >= fileoff &&
1099
134M
          options_.debug_fileoff() < (fileoff + filesize));
1100
134M
}
1101
1102
97.9M
bool RangeSink::IsVerboseForVMRange(uint64_t vmaddr, uint64_t vmsize) {
1103
97.9M
  if (vmsize == RangeMap::kUnknownSize) {
1104
4.03M
    vmsize = UINT64_MAX - vmaddr;
1105
4.03M
  }
1106
1107
97.9M
  if (vmaddr + vmsize < vmaddr) {
1108
5.61k
    THROWF("Overflow in vm range, vmaddr=$0, vmsize=$1", vmaddr, vmsize);
1109
5.61k
  }
1110
1111
97.9M
  if (ContainsVerboseVMAddr(vmaddr, vmsize)) {
1112
0
    return true;
1113
0
  }
1114
1115
97.9M
  if (translator_ && options_.has_debug_fileoff()) {
1116
0
    RangeMap vm_map;
1117
0
    RangeMap file_map;
1118
0
    bool contains = false;
1119
0
    vm_map.AddRangeWithTranslation(vmaddr, vmsize, "", translator_->vm_map,
1120
0
                                   false, &file_map);
1121
0
    file_map.ForEachRange(
1122
0
        [this, &contains](uint64_t fileoff, uint64_t filesize) {
1123
0
          if (ContainsVerboseFileOffset(fileoff, filesize)) {
1124
0
            contains = true;
1125
0
          }
1126
0
        });
1127
0
    return contains;
1128
0
  }
1129
1130
97.9M
  return false;
1131
97.9M
}
1132
1133
134M
bool RangeSink::IsVerboseForFileRange(uint64_t fileoff, uint64_t filesize) {
1134
134M
  if (filesize == RangeMap::kUnknownSize) {
1135
0
    filesize = UINT64_MAX - fileoff;
1136
0
  }
1137
1138
134M
  if (fileoff + filesize < fileoff) {
1139
0
    THROWF("Overflow in file range, fileoff=$0, filesize=$1", fileoff,
1140
0
           filesize);
1141
0
  }
1142
1143
134M
  if (ContainsVerboseFileOffset(fileoff, filesize)) {
1144
0
    return true;
1145
0
  }
1146
1147
134M
  if (translator_ && options_.has_debug_vmaddr()) {
1148
0
    RangeMap vm_map;
1149
0
    RangeMap file_map;
1150
0
    bool contains = false;
1151
0
    file_map.AddRangeWithTranslation(fileoff, filesize, "",
1152
0
                                     translator_->file_map, false, &vm_map);
1153
0
    vm_map.ForEachRange([this, &contains](uint64_t vmaddr, uint64_t vmsize) {
1154
0
      if (ContainsVerboseVMAddr(vmaddr, vmsize)) {
1155
0
        contains = true;
1156
0
      }
1157
0
    });
1158
0
    return contains;
1159
0
  }
1160
1161
134M
  return false;
1162
134M
}
1163
1164
1.74M
void RangeSink::AddOutput(DualMap* map, const NameMunger* munger) {
1165
1.74M
  outputs_.push_back(std::make_pair(map, munger));
1166
1.74M
}
1167
1168
void RangeSink::AddFileRange(const char* analyzer, string_view name,
1169
8.88M
                             uint64_t fileoff, uint64_t filesize) {
1170
8.88M
  bool verbose = IsVerboseForFileRange(fileoff, filesize);
1171
8.88M
  if (verbose) {
1172
0
    printf("[%s, %s] AddFileRange(%.*s, %" PRIx64 ", %" PRIx64 ")\n",
1173
0
           GetDataSourceLabel(data_source_), analyzer, (int)name.size(),
1174
0
           name.data(), fileoff, filesize);
1175
0
  }
1176
8.88M
  for (auto& pair : outputs_) {
1177
8.88M
    const std::string label = pair.second->Munge(name);
1178
8.88M
    if (translator_) {
1179
3.37M
      bool ok = pair.first->file_map.AddRangeWithTranslation(
1180
3.37M
          fileoff, filesize, label, translator_->file_map, verbose,
1181
3.37M
          &pair.first->vm_map);
1182
3.37M
      if (!ok) {
1183
15.3k
        WARN("File range ($0, $1) for label $2 extends beyond base map",
1184
15.3k
             fileoff, filesize, name);
1185
15.3k
      }
1186
5.51M
    } else {
1187
5.51M
      pair.first->file_map.AddRange(fileoff, filesize, label);
1188
5.51M
    }
1189
8.88M
  }
1190
8.88M
}
1191
1192
void RangeSink::AddFileRangeForVMAddr(const char* analyzer,
1193
                                      uint64_t label_from_vmaddr,
1194
38.5M
                                      string_view file_range) {
1195
38.5M
  uint64_t file_offset = file_range.data() - file_->data().data();
1196
38.5M
  bool verbose = IsVerboseForFileRange(file_offset, file_range.size());
1197
38.5M
  if (verbose) {
1198
0
    printf("[%s, %s] AddFileRangeForVMAddr(%" PRIx64 ", [%" PRIx64 ", %zx])\n",
1199
0
           GetDataSourceLabel(data_source_), analyzer, label_from_vmaddr,
1200
0
           file_offset, file_range.size());
1201
0
  }
1202
38.5M
  assert(translator_);
1203
38.5M
  for (auto& pair : outputs_) {
1204
38.5M
    std::string label;
1205
38.5M
    if (pair.first->vm_map.TryGetLabel(label_from_vmaddr, &label)) {
1206
24.2M
      bool ok = pair.first->file_map.AddRangeWithTranslation(
1207
24.2M
          file_offset, file_range.size(), label, translator_->file_map, verbose,
1208
24.2M
          &pair.first->vm_map);
1209
24.2M
      if (!ok) {
1210
337
        WARN("File range ($0, $1) for label $2 extends beyond base map",
1211
337
             file_offset, file_range.size(), label);
1212
337
      }
1213
24.2M
    } else if (verbose_level > 1) {
1214
0
      printf("No label found for vmaddr %" PRIx64 "\n", label_from_vmaddr);
1215
0
    }
1216
38.5M
  }
1217
38.5M
}
1218
1219
void RangeSink::AddFileRangeForFileRange(const char* analyzer,
1220
                                         absl::string_view from_file_range,
1221
0
                                         absl::string_view file_range) {
1222
0
  uint64_t file_offset = file_range.data() - file_->data().data();
1223
0
  uint64_t from_file_offset = from_file_range.data() - file_->data().data();
1224
0
  bool verbose = IsVerboseForFileRange(file_offset, file_range.size());
1225
0
  if (verbose) {
1226
0
    printf("[%s, %s] AddFileRangeForFileRange([%" PRIx64 ", %zx], [%" PRIx64
1227
0
           ", %zx])\n",
1228
0
           GetDataSourceLabel(data_source_), analyzer, from_file_offset,
1229
0
           from_file_range.size(), file_offset, file_range.size());
1230
0
  }
1231
0
  assert(translator_);
1232
0
  for (auto& pair : outputs_) {
1233
0
    std::string label;
1234
0
    if (pair.first->file_map.TryGetLabelForRange(
1235
0
            from_file_offset, from_file_range.size(), &label)) {
1236
0
      bool ok = pair.first->file_map.AddRangeWithTranslation(
1237
0
          file_offset, file_range.size(), label, translator_->file_map, verbose,
1238
0
          &pair.first->vm_map);
1239
0
      if (!ok) {
1240
0
        WARN("File range ($0, $1) for label $2 extends beyond base map",
1241
0
             file_offset, file_range.size(), label);
1242
0
      }
1243
0
    } else if (verbose_level > 1) {
1244
0
      printf("No label found for file range [%" PRIx64 ", %zx]\n",
1245
0
             from_file_offset, from_file_range.size());
1246
0
    }
1247
0
  }
1248
0
}
1249
1250
void RangeSink::AddVMRangeForVMAddr(const char* analyzer,
1251
                                    uint64_t label_from_vmaddr, uint64_t addr,
1252
3.56M
                                    uint64_t size) {
1253
3.56M
  bool verbose = IsVerboseForVMRange(addr, size);
1254
3.56M
  if (verbose) {
1255
0
    printf("[%s, %s] AddVMRangeForVMAddr(%" PRIx64 ", [%" PRIx64 ", %" PRIx64
1256
0
           "])\n",
1257
0
           GetDataSourceLabel(data_source_), analyzer, label_from_vmaddr, addr,
1258
0
           size);
1259
0
  }
1260
3.56M
  assert(translator_);
1261
3.56M
  for (auto& pair : outputs_) {
1262
3.56M
    std::string label;
1263
3.56M
    if (pair.first->vm_map.TryGetLabel(label_from_vmaddr, &label)) {
1264
3.52M
      bool ok = pair.first->vm_map.AddRangeWithTranslation(
1265
3.52M
          addr, size, label, translator_->vm_map, verbose,
1266
3.52M
          &pair.first->file_map);
1267
3.52M
      if (!ok && verbose_level > 1) {
1268
0
        WARN("VM range ($0, $1) for label $2 extends beyond base map", addr,
1269
0
             size, label);
1270
0
      }
1271
3.52M
    } else if (verbose_level > 1) {
1272
0
      printf("No label found for vmaddr %" PRIx64 "\n", label_from_vmaddr);
1273
0
    }
1274
3.56M
  }
1275
3.56M
}
1276
1277
void RangeSink::AddVMRange(const char* analyzer, uint64_t vmaddr,
1278
7.31M
                           uint64_t vmsize, const std::string& name) {
1279
7.31M
  bool verbose = IsVerboseForVMRange(vmaddr, vmsize);
1280
7.31M
  if (verbose) {
1281
0
    printf("[%s, %s] AddVMRange(%.*s, %" PRIx64 ", %" PRIx64 ")\n",
1282
0
           GetDataSourceLabel(data_source_), analyzer, (int)name.size(),
1283
0
           name.data(), vmaddr, vmsize);
1284
0
  }
1285
7.31M
  assert(translator_);
1286
7.30M
  for (auto& pair : outputs_) {
1287
7.30M
    const std::string label = pair.second->Munge(name);
1288
7.30M
    bool ok = pair.first->vm_map.AddRangeWithTranslation(
1289
7.30M
        vmaddr, vmsize, label, translator_->vm_map, verbose,
1290
7.30M
        &pair.first->file_map);
1291
7.30M
    if (!ok) {
1292
915k
      WARN("VM range ($0, $1) for label $2 extends beyond base map", vmaddr,
1293
915k
           vmsize, name);
1294
915k
    }
1295
7.30M
  }
1296
7.31M
}
1297
1298
void RangeSink::AddVMRangeAllowAlias(const char* analyzer, uint64_t vmaddr,
1299
6.56M
                                     uint64_t size, const std::string& name) {
1300
  // TODO: maybe track alias (but what would we use it for?)
1301
  // TODO: verify that it is in fact an alias.
1302
6.56M
  AddVMRange(analyzer, vmaddr, size, name);
1303
6.56M
}
1304
1305
void RangeSink::AddVMRangeIgnoreDuplicate(const char* analyzer, uint64_t vmaddr,
1306
                                          uint64_t vmsize,
1307
281k
                                          const std::string& name) {
1308
  // TODO suppress warning that AddVMRange alone might trigger.
1309
281k
  AddVMRange(analyzer, vmaddr, vmsize, name);
1310
281k
}
1311
1312
void RangeSink::AddRange(const char* analyzer, string_view name,
1313
                         uint64_t vmaddr, uint64_t vmsize, uint64_t fileoff,
1314
87.0M
                         uint64_t filesize) {
1315
87.0M
  if (vmsize == RangeMap::kUnknownSize || filesize == RangeMap::kUnknownSize) {
1316
    // AddRange() is used for segments and sections; the mappings that establish
1317
    // the file <-> vm mapping.  The size should always be known.  Moreover it
1318
    // would be unclear how the logic should work if the size was *not* known.
1319
272
    THROW("AddRange() does not allow unknown size.");
1320
272
  }
1321
1322
87.0M
  if (IsVerboseForVMRange(vmaddr, vmsize) ||
1323
87.0M
      IsVerboseForFileRange(fileoff, filesize)) {
1324
0
    printf("[%s, %s] AddRange(%.*s, %" PRIx64 ", %" PRIx64 ", %" PRIx64
1325
0
           ", %" PRIx64 ")\n",
1326
0
           GetDataSourceLabel(data_source_), analyzer, (int)name.size(),
1327
0
           name.data(), vmaddr, vmsize, fileoff, filesize);
1328
0
  }
1329
1330
87.0M
  if (translator_) {
1331
39.7M
    if (!translator_->vm_map.CoversRange(vmaddr, vmsize) ||
1332
39.7M
        !translator_->file_map.CoversRange(fileoff, filesize)) {
1333
31.0k
      THROW("Tried to add range that is not covered by base map.");
1334
31.0k
    }
1335
39.7M
  }
1336
1337
87.0M
  for (auto& pair : outputs_) {
1338
87.0M
    const std::string label = pair.second->Munge(name);
1339
87.0M
    uint64_t common = std::min(vmsize, filesize);
1340
1341
87.0M
    pair.first->vm_map.AddDualRange(vmaddr, common, fileoff, label);
1342
87.0M
    pair.first->file_map.AddDualRange(fileoff, common, vmaddr, label);
1343
1344
87.0M
    pair.first->vm_map.AddRange(vmaddr + common, vmsize - common, label);
1345
87.0M
    pair.first->file_map.AddRange(fileoff + common, filesize - common, label);
1346
87.0M
  }
1347
87.0M
}
1348
1349
1.05M
uint64_t RangeSink::TranslateFileToVM(const char* ptr) {
1350
1.05M
  assert(translator_);
1351
0
  uint64_t offset = ptr - file_->data().data();
1352
1.05M
  uint64_t translated;
1353
1.05M
  if (!FileContainsPointer(ptr) ||
1354
1.05M
      !translator_->file_map.Translate(offset, &translated)) {
1355
2.17k
    THROWF("Can't translate file offset ($0) to VM, contains: $1, map:\n$2",
1356
2.17k
           offset, FileContainsPointer(ptr),
1357
2.17k
           translator_->file_map.DebugString().c_str());
1358
2.17k
  }
1359
1.05M
  return translated;
1360
1.05M
}
1361
1362
648k
absl::string_view RangeSink::TranslateVMToFile(uint64_t address) {
1363
648k
  assert(translator_);
1364
0
  uint64_t translated;
1365
648k
  if (!translator_->vm_map.Translate(address, &translated) ||
1366
648k
      translated > file_->data().size()) {
1367
614
    THROWF("Can't translate VM pointer ($0) to file", address);
1368
1369
614
  }
1370
647k
  return file_->data().substr(translated);
1371
648k
}
1372
1373
absl::string_view RangeSink::ZlibDecompress(absl::string_view data,
1374
39.5k
                                            uint64_t uncompressed_size) {
1375
39.5k
  if (!arena_) {
1376
0
    THROW("This range sink isn't prepared to zlib decompress.");
1377
0
  }
1378
39.5k
  uint64_t mb = 1 << 20;
1379
  // Limit for uncompressed size is 30x the compressed size + 128MB.
1380
39.5k
  if (uncompressed_size >
1381
39.5k
      static_cast<uint64_t>(data.size()) * 30 + (128 * mb)) {
1382
5.99k
    fprintf(stderr,
1383
5.99k
            "warning: ignoring compressed debug data, implausible uncompressed "
1384
5.99k
            "size (compressed: %zu, uncompressed: %" PRIu64 ")\n",
1385
5.99k
            data.size(), uncompressed_size);
1386
5.99k
    return absl::string_view();
1387
5.99k
  }
1388
33.5k
  unsigned char* dbuf =
1389
33.5k
      arena_->google::protobuf::Arena::CreateArray<unsigned char>(
1390
33.5k
          arena_, uncompressed_size);
1391
33.5k
  uLongf zliblen = uncompressed_size;
1392
33.5k
  if (uncompress(dbuf, &zliblen, (unsigned char*)(data.data()), data.size()) !=
1393
33.5k
      Z_OK) {
1394
3.10k
    THROW("Error decompressing debug info");
1395
3.10k
  }
1396
30.4k
  string_view sv(reinterpret_cast<char*>(dbuf), zliblen);
1397
30.4k
  return sv;
1398
33.5k
}
1399
1400
// ThreadSafeIterIndex /////////////////////////////////////////////////////////
1401
1402
class ThreadSafeIterIndex {
1403
 public:
1404
834k
  ThreadSafeIterIndex(int max) : index_(0), max_(max) {}
1405
1406
1.15M
  bool TryGetNext(int* index) {
1407
1.15M
    int ret = index_.fetch_add(1, std::memory_order_relaxed);
1408
1.15M
    if (ret >= max_) {
1409
320k
      return false;
1410
834k
    } else {
1411
834k
      *index = ret;
1412
834k
      return true;
1413
834k
    }
1414
1.15M
  }
1415
1416
514k
  void Abort(string_view error) {
1417
514k
    std::lock_guard<std::mutex> lock(mutex_);
1418
514k
    index_ = max_;
1419
514k
    error_ = std::string(error);
1420
514k
  }
1421
1422
834k
  bool TryGetError(std::string* error) {
1423
834k
    std::lock_guard<std::mutex> lock(mutex_);
1424
834k
    if (error_.empty()) {
1425
320k
      return false;
1426
514k
    } else {
1427
514k
      *error = error_;
1428
514k
      return true;
1429
514k
    }
1430
834k
  }
1431
1432
 private:
1433
  std::atomic<int> index_;
1434
  std::string error_;
1435
  std::mutex mutex_;
1436
  const int max_;
1437
};
1438
1439
// Bloaty //////////////////////////////////////////////////////////////////////
1440
1441
// Represents a program execution and associated state.
1442
1443
struct ConfiguredDataSource {
1444
  ConfiguredDataSource(const DataSourceDefinition& definition_)
1445
      : definition(definition_),
1446
        effective_source(definition_.number),
1447
10.0M
        munger(new NameMunger()) {}
1448
1449
  const DataSourceDefinition& definition;
1450
  // This will differ from definition.number for kSymbols, where we use the
1451
  // --demangle flag to set the true/effective source.
1452
  DataSource effective_source;
1453
  std::unique_ptr<NameMunger> munger;
1454
};
1455
1456
class Bloaty {
1457
 public:
1458
  Bloaty(const InputFileFactory& factory, const Options& options);
1459
  Bloaty(const Bloaty&) = delete;
1460
  Bloaty& operator=(const Bloaty&) = delete;
1461
1462
  void AddFilename(const std::string& filename, bool base_file);
1463
  void AddDebugFilename(const std::string& filename);
1464
1465
0
  size_t GetSourceCount() const { return sources_.size(); }
1466
1467
  void DefineCustomDataSource(const CustomDataSource& source);
1468
1469
  void AddDataSource(const std::string& name);
1470
  void ScanAndRollup(const Options& options, RollupOutput* output);
1471
  void DisassembleFunction(string_view function, const Options& options,
1472
                           RollupOutput* output);
1473
1474
 private:
1475
  template <size_t T>
1476
  void AddBuiltInSources(const DataSourceDefinition (&sources)[T],
1477
1.00M
                         const Options& options) {
1478
11.0M
    for (size_t i = 0; i < T; i++) {
1479
10.0M
      const DataSourceDefinition& source = sources[i];
1480
10.0M
      auto configured_source = absl::make_unique<ConfiguredDataSource>(source);
1481
1482
10.0M
      if (configured_source->effective_source == DataSource::kSymbols) {
1483
1.00M
        configured_source->effective_source = EffectiveSymbolSource(options);
1484
1.00M
      }
1485
1486
10.0M
      all_known_sources_[source.name] = std::move(configured_source);
1487
10.0M
    }
1488
1.00M
  }
1489
1490
1.00M
  static DataSource EffectiveSymbolSource(const Options& options) {
1491
1.00M
    switch (options.demangle()) {
1492
0
      case Options::DEMANGLE_NONE:
1493
0
        return DataSource::kRawSymbols;
1494
1.00M
      case Options::DEMANGLE_SHORT:
1495
1.00M
        return DataSource::kShortSymbols;
1496
0
      case Options::DEMANGLE_FULL:
1497
0
        return DataSource::kFullSymbols;
1498
0
      default:
1499
0
        BLOATY_UNREACHABLE();
1500
1.00M
    }
1501
1.00M
  }
1502
1503
  void ScanAndRollupFiles(const std::vector<std::string>& filenames,
1504
                          std::vector<std::string>* build_ids,
1505
                          Rollup* rollup) const;
1506
  void ScanAndRollupFile(const std::string& filename, Rollup* rollup,
1507
                         std::vector<std::string>* out_build_ids) const;
1508
1509
  std::unique_ptr<ObjectFile> GetObjectFile(const std::string& filename) const;
1510
1511
  const InputFileFactory& file_factory_;
1512
  const Options options_;
1513
1514
  // All data sources, indexed by name.
1515
  // Contains both built-in sources and custom sources.
1516
  std::map<std::string, std::unique_ptr<ConfiguredDataSource>>
1517
      all_known_sources_;
1518
1519
  // Sources the user has actually selected, in the order selected.
1520
  // Points to entries in all_known_sources_.
1521
  std::vector<ConfiguredDataSource*> sources_;
1522
  std::vector<std::string> source_names_;
1523
1524
  struct InputFileInfo {
1525
    std::string filename_;
1526
    std::string build_id_;
1527
  };
1528
  std::vector<InputFileInfo> input_files_;
1529
  std::vector<InputFileInfo> base_files_;
1530
  std::map<std::string, std::string> debug_files_;
1531
1532
  // For allocating memory, like to decompress compressed sections.
1533
  std::unique_ptr<google::protobuf::Arena> arena_;
1534
};
1535
1536
Bloaty::Bloaty(const InputFileFactory& factory, const Options& options)
1537
    : file_factory_(factory),
1538
      options_(options),
1539
1.00M
      arena_(std::make_unique<google::protobuf::Arena>()) {
1540
1.00M
  AddBuiltInSources(data_sources, options);
1541
1.00M
}
1542
1543
std::unique_ptr<ObjectFile> Bloaty::GetObjectFile(
1544
1.84M
    const std::string& filename) const {
1545
1.84M
  std::unique_ptr<InputFile> file(file_factory_.OpenFile(filename));
1546
1.84M
  auto object_file = TryOpenELFFile(file);
1547
1548
1.84M
  if (!object_file.get()) {
1549
397k
    object_file = TryOpenMachOFile(file);
1550
397k
  }
1551
1552
1.84M
  if (!object_file.get()) {
1553
153k
    object_file = TryOpenWebAssemblyFile(file);
1554
153k
  }
1555
1556
1.84M
  if (!object_file.get()) {
1557
109k
    object_file = TryOpenPEFile(file);
1558
109k
  }
1559
1560
1.84M
  if (!object_file.get()) {
1561
109k
    THROWF("unknown file type for file '$0'", filename.c_str());
1562
109k
  }
1563
1564
1.73M
  return object_file;
1565
1.84M
}
1566
1567
1.00M
void Bloaty::AddFilename(const std::string& filename, bool is_base) {
1568
1.00M
  auto object_file = GetObjectFile(filename);
1569
1.00M
  std::string build_id = object_file->GetBuildId();
1570
1571
1.00M
  if (is_base) {
1572
0
    base_files_.push_back({filename, build_id});
1573
1.00M
  } else {
1574
1.00M
    input_files_.push_back({filename, build_id});
1575
1.00M
  }
1576
1.00M
}
1577
1578
0
void Bloaty::AddDebugFilename(const std::string& filename) {
1579
0
  auto object_file = GetObjectFile(filename);
1580
0
  std::string build_id = object_file->GetBuildId();
1581
0
  if (build_id.size() == 0) {
1582
0
    THROWF("File '$0' has no build ID, cannot be used as a debug file",
1583
0
           filename);
1584
0
  }
1585
0
  debug_files_[build_id] = filename;
1586
0
}
1587
1588
0
void Bloaty::DefineCustomDataSource(const CustomDataSource& source) {
1589
0
  if (source.base_data_source() == "symbols") {
1590
0
    THROW(
1591
0
        "For custom data sources, use one of {rawsymbols, shortsymbols, "
1592
0
        "fullsymbols} for base_data_source instead of 'symbols', so you aren't "
1593
0
        "sensitive to the --demangle parameter.");
1594
0
  }
1595
1596
0
  auto iter = all_known_sources_.find(source.base_data_source());
1597
1598
0
  if (iter == all_known_sources_.end()) {
1599
0
    THROWF(
1600
0
        "custom data source '$0': no such base source '$1'.\nTry "
1601
0
        "--list-sources to see valid sources.",
1602
0
        source.name(), source.base_data_source());
1603
0
  } else if (!iter->second->munger->IsEmpty()) {
1604
0
    THROWF("custom data source '$0' tries to depend on custom data source '$1'",
1605
0
           source.name(), source.base_data_source());
1606
0
  }
1607
1608
0
  all_known_sources_[source.name()] =
1609
0
      absl::make_unique<ConfiguredDataSource>(iter->second->definition);
1610
0
  NameMunger* munger = all_known_sources_[source.name()]->munger.get();
1611
0
  for (const auto& regex : source.rewrite()) {
1612
0
    munger->AddRegex(regex.pattern(), regex.replacement());
1613
0
  }
1614
0
}
1615
1616
834k
void Bloaty::AddDataSource(const std::string& name) {
1617
834k
  source_names_.emplace_back(name);
1618
834k
  auto it = all_known_sources_.find(name);
1619
834k
  if (it == all_known_sources_.end()) {
1620
0
    THROWF("no such data source: $0.\nTry --list-sources to see valid sources.",
1621
0
           name);
1622
0
  }
1623
1624
834k
  sources_.emplace_back(it->second.get());
1625
834k
}
1626
1627
// All of the DualMaps for a given file.
1628
struct DualMaps {
1629
 public:
1630
834k
  DualMaps() {
1631
    // Base map.
1632
834k
    AppendMap();
1633
834k
  }
1634
1635
1.66M
  DualMap* AppendMap() {
1636
1.66M
    maps_.emplace_back(new DualMap);
1637
1.66M
    return maps_.back().get();
1638
1.66M
  }
1639
1640
322k
  void ComputeRollup(Rollup* rollup) {
1641
644k
    for (auto& map : maps_) {
1642
644k
      map->vm_map.Compress();
1643
644k
      map->file_map.Compress();
1644
644k
    }
1645
322k
    RangeMap::ComputeRollup(VmMaps(), [=](const std::vector<std::string>& keys,
1646
16.2M
                                          uint64_t addr, uint64_t end) {
1647
16.2M
      return rollup->AddSizes(keys, end - addr, true);
1648
16.2M
    });
1649
322k
    RangeMap::ComputeRollup(
1650
322k
        FileMaps(),
1651
11.2M
        [=](const std::vector<std::string>& keys, uint64_t addr, uint64_t end) {
1652
11.2M
          return rollup->AddSizes(keys, end - addr, false);
1653
11.2M
        });
1654
322k
  }
1655
1656
0
  void PrintMaps(const std::vector<const RangeMap*> maps) {
1657
0
    uint64_t last = 0;
1658
0
    uint64_t max = maps[0]->GetMaxAddress();
1659
0
    int hex_digits = max > 0 ? std::ceil(std::log2(max) / 4) : 0;
1660
0
    RangeMap::ComputeRollup(maps, [&](const std::vector<std::string>& keys,
1661
0
                                      uint64_t addr, uint64_t end) {
1662
0
      if (addr > last) {
1663
0
        PrintMapRow("[-- Nothing mapped --]", last, addr, hex_digits);
1664
0
      }
1665
0
      PrintMapRow(KeysToString(keys), addr, end, hex_digits);
1666
0
      last = end;
1667
0
    });
1668
0
    printf("\n");
1669
0
  }
1670
1671
0
  void PrintFileMaps() { PrintMaps(FileMaps()); }
1672
0
  void PrintVMMaps() { PrintMaps(VmMaps()); }
1673
1674
0
  std::string KeysToString(const std::vector<std::string>& keys) {
1675
0
    std::string ret;
1676
1677
    // Start at offset 1 to skip the base map.
1678
0
    for (size_t i = 1; i < keys.size(); i++) {
1679
0
      if (i > 1) {
1680
0
        ret += "\t";
1681
0
      }
1682
0
      ret += keys[i];
1683
0
    }
1684
1685
0
    return ret;
1686
0
  }
1687
1688
  void PrintMapRow(string_view str, uint64_t start, uint64_t end,
1689
0
                   int hex_digits) {
1690
0
    printf("%.*" PRIx64 "-%.*" PRIx64 "\t %s\t\t%.*s\n", hex_digits, start,
1691
0
           hex_digits, end, LeftPad(std::to_string(end - start), 10).c_str(),
1692
0
           (int)str.size(), str.data());
1693
0
  }
1694
1695
1.66M
  DualMap* base_map() { return maps_[0].get(); }
1696
1697
 private:
1698
322k
  std::vector<const RangeMap*> VmMaps() const {
1699
322k
    std::vector<const RangeMap*> ret;
1700
644k
    for (const auto& map : maps_) {
1701
644k
      ret.push_back(&map->vm_map);
1702
644k
    }
1703
322k
    return ret;
1704
322k
  }
1705
1706
320k
  std::vector<const RangeMap*> FileMaps() const {
1707
320k
    std::vector<const RangeMap*> ret;
1708
640k
    for (const auto& map : maps_) {
1709
640k
      ret.push_back(&map->file_map);
1710
640k
    }
1711
320k
    return ret;
1712
320k
  }
1713
1714
  std::vector<std::unique_ptr<DualMap>> maps_;
1715
};
1716
1717
void Bloaty::ScanAndRollupFile(const std::string& filename, Rollup* rollup,
1718
834k
                               std::vector<std::string>* out_build_ids) const {
1719
834k
  auto file = GetObjectFile(filename);
1720
1721
834k
  DualMaps maps;
1722
834k
  std::vector<std::unique_ptr<RangeSink>> sinks;
1723
834k
  std::vector<RangeSink*> sink_ptrs;
1724
834k
  std::vector<RangeSink*> filename_sink_ptrs;
1725
1726
  // Base map always goes first.
1727
834k
  sinks.push_back(absl::make_unique<RangeSink>(
1728
834k
      &file->file_data(), options_, DataSource::kSegments, nullptr, nullptr));
1729
834k
  NameMunger empty_munger;
1730
834k
  sinks.back()->AddOutput(maps.base_map(), &empty_munger);
1731
834k
  sink_ptrs.push_back(sinks.back().get());
1732
1733
834k
  for (auto source : sources_) {
1734
834k
    sinks.push_back(absl::make_unique<RangeSink>(
1735
834k
        &file->file_data(), options_, source->effective_source, maps.base_map(),
1736
834k
        arena_.get()));
1737
834k
    sinks.back()->AddOutput(maps.AppendMap(), source->munger.get());
1738
    // We handle the kInputFiles data source internally, without handing it off
1739
    // to the file format implementation.  This seems slightly simpler, since
1740
    // the file format has to deal with armembers too.
1741
834k
    if (source->effective_source == DataSource::kInputFiles) {
1742
0
      filename_sink_ptrs.push_back(sinks.back().get());
1743
834k
    } else {
1744
834k
      sink_ptrs.push_back(sinks.back().get());
1745
834k
    }
1746
834k
  }
1747
1748
834k
  std::unique_ptr<ObjectFile> debug_file;
1749
834k
  std::string build_id = file->GetBuildId();
1750
834k
  if (!build_id.empty()) {
1751
125k
    auto iter = debug_files_.find(build_id);
1752
125k
    if (iter != debug_files_.end()) {
1753
0
      debug_file = GetObjectFile(iter->second);
1754
0
      file->set_debug_file(debug_file.get());
1755
0
      out_build_ids->push_back(build_id);
1756
0
    }
1757
125k
  }
1758
1759
834k
  int64_t filesize_before =
1760
834k
      rollup->file_total() + rollup->filtered_file_total();
1761
834k
  file->ProcessFile(sink_ptrs);
1762
1763
  // kInputFile source: Copy the base map to the filename sink(s).
1764
834k
  for (auto sink : filename_sink_ptrs) {
1765
0
    maps.base_map()->vm_map.ForEachRange(
1766
0
        [sink](uint64_t start, uint64_t length) {
1767
0
          sink->AddVMRange("inputfile_vmcopier", start, length,
1768
0
                           sink->input_file().filename());
1769
0
        });
1770
0
    maps.base_map()->file_map.ForEachRange(
1771
0
        [sink](uint64_t start, uint64_t length) {
1772
0
          sink->AddFileRange("inputfile_filecopier",
1773
0
                             sink->input_file().filename(), start, length);
1774
0
        });
1775
0
  }
1776
1777
834k
  maps.ComputeRollup(rollup);
1778
1779
  // The ObjectFile implementation must guarantee this.
1780
834k
  int64_t filesize =
1781
834k
      rollup->file_total() + rollup->filtered_file_total() - filesize_before;
1782
834k
  (void)filesize;
1783
834k
  assert(filesize == file->file_data().data().size());
1784
1785
834k
  if (verbose_level > 0 || options_.dump_raw_map()) {
1786
0
    printf("Maps for %s:\n\n", filename.c_str());
1787
0
    if (show != ShowDomain::kShowVM) {
1788
0
      printf("FILE MAP:\n");
1789
0
      maps.PrintFileMaps();
1790
0
    }
1791
0
    if (show != ShowDomain::kShowFile) {
1792
0
      printf("VM MAP:\n");
1793
0
      maps.PrintVMMaps();
1794
0
    }
1795
0
  }
1796
834k
}
1797
1798
void Bloaty::ScanAndRollupFiles(const std::vector<std::string>& filenames,
1799
                                std::vector<std::string>* build_ids,
1800
834k
                                Rollup* rollup) const {
1801
834k
  int num_cpus = std::thread::hardware_concurrency();
1802
834k
  int num_threads = std::min(num_cpus, static_cast<int>(filenames.size()));
1803
1804
834k
  struct PerThreadData {
1805
834k
    Rollup rollup;
1806
834k
    std::vector<std::string> build_ids;
1807
834k
  };
1808
1809
834k
  std::vector<PerThreadData> thread_data(num_threads);
1810
834k
  std::vector<std::thread> threads(num_threads);
1811
834k
  ThreadSafeIterIndex index(filenames.size());
1812
1813
834k
  std::unique_ptr<ReImpl> regex = nullptr;
1814
834k
  if (options_.has_source_filter()) {
1815
0
    regex = absl::make_unique<ReImpl>(options_.source_filter());
1816
0
  }
1817
1818
1.66M
  for (int i = 0; i < num_threads; i++) {
1819
834k
    thread_data[i].rollup.SetFilterRegex(regex.get());
1820
1821
834k
    threads[i] = std::thread(
1822
834k
        [this, &index, &filenames](PerThreadData* data) {
1823
834k
          try {
1824
834k
            int j;
1825
1.66M
            while (index.TryGetNext(&j)) {
1826
834k
              ScanAndRollupFile(filenames[j], &data->rollup, &data->build_ids);
1827
834k
            }
1828
834k
          } catch (const bloaty::Error& e) {
1829
514k
            index.Abort(e.what());
1830
514k
          }
1831
834k
        },
1832
834k
        &thread_data[i]);
1833
834k
  }
1834
1835
1.66M
  for (int i = 0; i < num_threads; i++) {
1836
834k
    threads[i].join();
1837
834k
    PerThreadData* data = &thread_data[i];
1838
834k
    if (i == 0) {
1839
834k
      *rollup = std::move(data->rollup);
1840
834k
    } else {
1841
292
      rollup->Add(data->rollup);
1842
292
    }
1843
1844
834k
    build_ids->insert(build_ids->end(), data->build_ids.begin(),
1845
834k
                      data->build_ids.end());
1846
834k
  }
1847
1848
834k
  std::string error;
1849
834k
  if (index.TryGetError(&error)) {
1850
514k
    THROW(error.c_str());
1851
514k
  }
1852
834k
}
1853
1854
834k
void Bloaty::ScanAndRollup(const Options& options, RollupOutput* output) {
1855
834k
  if (input_files_.empty()) {
1856
0
    THROW("no filename specified");
1857
0
  }
1858
1859
834k
  for (const auto& name : source_names_) {
1860
834k
    output->AddDataSourceName(name);
1861
834k
  }
1862
1863
834k
  Rollup rollup;
1864
834k
  std::vector<std::string> build_ids;
1865
834k
  std::vector<std::string> input_filenames;
1866
834k
  for (const auto& file_info : input_files_) {
1867
834k
    input_filenames.push_back(file_info.filename_);
1868
834k
  }
1869
834k
  ScanAndRollupFiles(input_filenames, &build_ids, &rollup);
1870
1871
834k
  if (!base_files_.empty()) {
1872
0
    Rollup base;
1873
0
    std::vector<std::string> base_filenames;
1874
0
    for (const auto& file_info : base_files_) {
1875
0
      base_filenames.push_back(file_info.filename_);
1876
0
    }
1877
0
    ScanAndRollupFiles(base_filenames, &build_ids, &base);
1878
0
    rollup.AddEntriesFrom(base);
1879
0
    rollup.CreateDiffModeRollupOutput(&base, options, output);
1880
834k
  } else {
1881
834k
    rollup.CreateRollupOutput(options, output);
1882
834k
  }
1883
1884
834k
  for (const auto& build_id : build_ids) {
1885
0
    debug_files_.erase(build_id);
1886
0
  }
1887
1888
  // Error out if some --debug-files were not used.
1889
834k
  if (!debug_files_.empty()) {
1890
0
    std::string input_files;
1891
0
    std::string unused_debug;
1892
0
    for (const auto& pair : debug_files_) {
1893
0
      unused_debug += absl::Substitute(
1894
0
          "$0   $1\n", absl::BytesToHexString(pair.first).c_str(),
1895
0
          pair.second.c_str());
1896
0
    }
1897
1898
0
    for (const auto& file_info : input_files_) {
1899
0
      input_files += absl::Substitute(
1900
0
          "$0   $1\n", absl::BytesToHexString(file_info.build_id_).c_str(),
1901
0
          file_info.filename_.c_str());
1902
0
    }
1903
0
    for (const auto& file_info : base_files_) {
1904
0
      input_files += absl::Substitute(
1905
0
          "$0   $1\n", absl::BytesToHexString(file_info.build_id_).c_str(),
1906
0
          file_info.filename_.c_str());
1907
0
    }
1908
0
    THROWF("Debug file(s) did not match any input file:\n$0\nInput Files:\n$1",
1909
0
           unused_debug.c_str(), input_files.c_str());
1910
0
  }
1911
834k
}
1912
1913
void Bloaty::DisassembleFunction(string_view function, const Options& options,
1914
0
                                 RollupOutput* output) {
1915
0
  DisassemblyInfo info;
1916
0
  for (const auto& file_info : input_files_) {
1917
0
    auto file = GetObjectFile(file_info.filename_);
1918
0
    if (file->GetDisassemblyInfo(function, EffectiveSymbolSource(options),
1919
0
                                 &info)) {
1920
0
      output->SetDisassembly(::bloaty::DisassembleFunction(info));
1921
0
      return;
1922
0
    }
1923
0
  }
1924
1925
0
  THROWF("Couldn't find function $0 to disassemble", function);
1926
0
}
1927
1928
const char usage[] = R"(Bloaty McBloatface: a size profiler for binaries.
1929
1930
USAGE: bloaty [OPTION]... FILE... [-- BASE_FILE...]
1931
1932
Options:
1933
1934
  --csv              Output in CSV format instead of human-readable.
1935
  --tsv              Output in TSV format instead of human-readable.
1936
  -c FILE            Load configuration from <file>.
1937
  -d SOURCE,SOURCE   Comma-separated list of sources to scan.
1938
  --debug-file=FILE  Use this file for debug symbols and/or symbol table.
1939
  -C MODE            How to demangle symbols.  Possible values are:
1940
  --demangle=MODE      --demangle=none   no demangling, print raw symbols
1941
                       --demangle=short  demangle, but omit arg/return types
1942
                       --demangle=full   print full demangled type
1943
                     The default is --demangle=short.
1944
  --disassemble=FUNCTION
1945
                     Disassemble this function (EXPERIMENTAL)
1946
  --domain=DOMAIN    Which domains to show.  Possible values are:
1947
                       --domain=vm
1948
                       --domain=file
1949
                       --domain=both (the default)
1950
  -n NUM             How many rows to show per level before collapsing
1951
                     other keys into '[Other]'.  Set to '0' for unlimited.
1952
                     Defaults to 20.
1953
  -s SORTBY          Whether to sort by VM or File size.  Possible values
1954
                     are:
1955
                       -s vm
1956
                       -s file
1957
                       -s both (the default: sorts by max(vm, file)).
1958
  -w                 Wide output; don't truncate long labels.
1959
  --help             Display this message and exit.
1960
  --list-sources     Show a list of available sources and exit.
1961
  --source-filter=PATTERN
1962
                     Only show keys with names matching this pattern.
1963
1964
Options for debugging Bloaty:
1965
1966
  --debug-vmaddr=ADDR
1967
  --debug-fileoff=OFF
1968
                     Print extended debugging information for the given
1969
                     VM address and/or file offset.
1970
  -v                 Verbose output.  Dumps warnings encountered during
1971
                     processing and full VM/file maps at the end.
1972
                     Add more v's (-vv, -vvv) for even more.
1973
)";
1974
1975
class ArgParser {
1976
 public:
1977
  ArgParser(int* argc, char** argv[])
1978
      : argc_(*argc),
1979
        argv_(*argv, *argv + *argc),
1980
        out_argc_(argc),
1981
0
        out_argv_(argv) {
1982
0
    *out_argc_ = 0;
1983
0
    ConsumeAndSaveArg();  // Executable name.
1984
0
  }
1985
1986
0
  bool IsDone() { return index_ == argc_; }
1987
1988
0
  string_view Arg() {
1989
0
    assert(!IsDone());
1990
0
    return string_view(argv_[index_]);
1991
0
  }
1992
1993
0
  string_view ConsumeArg() {
1994
0
    string_view ret = Arg();
1995
0
    index_++;
1996
0
    return ret;
1997
0
  }
1998
1999
0
  void ConsumeAndSaveArg() { (*out_argv_)[(*out_argc_)++] = argv_[index_++]; }
2000
2001
  // Singular flag like --csv or -v.
2002
0
  bool TryParseFlag(string_view flag) {
2003
0
    if (Arg() == flag) {
2004
0
      ConsumeArg();
2005
0
      return true;
2006
0
    } else {
2007
0
      return false;
2008
0
    }
2009
0
  }
2010
2011
  // Option taking an argument, for example:
2012
  //   -n 20
2013
  //   --config=file.bloaty
2014
  //
2015
  // For --long-options we accept both:
2016
  //   --long_option value
2017
  //   --long_option=value
2018
0
  bool TryParseOption(string_view flag, string_view* val) {
2019
0
    assert(flag.size() > 1);
2020
0
    bool is_long = flag[1] == '-';
2021
0
    string_view arg = Arg();
2022
0
    if (TryParseFlag(flag)) {
2023
0
      if (IsDone()) {
2024
0
        THROWF("option '$0' requires an argument", flag);
2025
0
      }
2026
0
      *val = ConsumeArg();
2027
0
      return true;
2028
0
    } else if (is_long && absl::ConsumePrefix(&arg, std::string(flag) + "=")) {
2029
0
      *val = arg;
2030
0
      index_++;
2031
0
      return true;
2032
0
    } else {
2033
0
      return false;
2034
0
    }
2035
0
  }
2036
2037
0
  bool TryParseIntegerOption(string_view flag, int* val) {
2038
0
    string_view val_str;
2039
0
    if (!TryParseOption(flag, &val_str)) {
2040
0
      return false;
2041
0
    }
2042
2043
0
    if (!absl::SimpleAtoi(val_str, val)) {
2044
0
      THROWF("option '$0' had non-integral argument: $1", flag, val_str);
2045
0
    }
2046
2047
0
    return true;
2048
0
  }
2049
2050
0
  bool TryParseUint64Option(string_view flag, uint64_t* val) {
2051
0
    string_view val_str;
2052
0
    if (!TryParseOption(flag, &val_str)) {
2053
0
      return false;
2054
0
    }
2055
2056
0
    try {
2057
0
      *val = std::stoull(std::string(val_str), nullptr, 0);
2058
0
    } catch (...) {
2059
0
      THROWF("option '$0' had non-integral argument: $1", flag, val_str);
2060
0
    }
2061
2062
0
    return true;
2063
0
  }
2064
2065
 public:
2066
  int argc_;
2067
  std::vector<char*> argv_;
2068
  int* out_argc_;
2069
  char*** out_argv_;
2070
  int index_ = 0;
2071
};
2072
2073
bool DoParseOptions(bool skip_unknown, int* argc, char** argv[],
2074
0
                    Options* options, OutputOptions* output_options) {
2075
0
  bool saw_separator = false;
2076
0
  ArgParser args(argc, argv);
2077
0
  string_view option;
2078
0
  int int_option;
2079
0
  uint64_t uint64_option;
2080
0
  bool has_domain = false;
2081
2082
0
  while (!args.IsDone()) {
2083
0
    if (args.TryParseFlag("--")) {
2084
0
      if (saw_separator) {
2085
0
        THROW("'--' option should only be specified once");
2086
0
      }
2087
0
      saw_separator = true;
2088
0
    } else if (args.TryParseFlag("--csv")) {
2089
0
      output_options->output_format = OutputFormat::kCSV;
2090
0
    } else if (args.TryParseFlag("--tsv")) {
2091
0
      output_options->output_format = OutputFormat::kTSV;
2092
0
    } else if (args.TryParseFlag("--raw-map")) {
2093
0
      options->set_dump_raw_map(true);
2094
0
    } else if (args.TryParseOption("-c", &option)) {
2095
0
      std::ifstream input_file(std::string(option), std::ios::in);
2096
0
      if (!input_file.is_open()) {
2097
0
        THROWF("couldn't open file $0", option);
2098
0
      }
2099
0
      google::protobuf::io::IstreamInputStream stream(&input_file);
2100
0
      if (!google::protobuf::TextFormat::Merge(&stream, options)) {
2101
0
        THROWF("error parsing configuration out of file $0", option);
2102
0
      }
2103
0
    } else if (args.TryParseOption("-d", &option)) {
2104
0
      std::vector<std::string> names = absl::StrSplit(option, ',');
2105
0
      for (const auto& name : names) {
2106
0
        options->add_data_source(name);
2107
0
      }
2108
0
    } else if (args.TryParseOption("-C", &option) ||
2109
0
               args.TryParseOption("--demangle", &option)) {
2110
0
      if (option == "none") {
2111
0
        options->set_demangle(Options::DEMANGLE_NONE);
2112
0
      } else if (option == "short") {
2113
0
        options->set_demangle(Options::DEMANGLE_SHORT);
2114
0
      } else if (option == "full") {
2115
0
        options->set_demangle(Options::DEMANGLE_FULL);
2116
0
      } else {
2117
0
        THROWF("unknown value for --demangle: $0", option);
2118
0
      }
2119
0
    } else if (args.TryParseOption("--debug-file", &option)) {
2120
0
      options->add_debug_filename(std::string(option));
2121
0
    } else if (args.TryParseUint64Option("--debug-fileoff", &uint64_option)) {
2122
0
      if (options->has_debug_fileoff()) {
2123
0
        THROW("currently we only support a single debug fileoff");
2124
0
      }
2125
0
      options->set_debug_fileoff(uint64_option);
2126
0
    } else if (args.TryParseUint64Option("--debug-vmaddr", &uint64_option)) {
2127
0
      if (options->has_debug_vmaddr()) {
2128
0
        THROW("currently we only support a single debug vmaddr");
2129
0
      }
2130
0
      options->set_debug_vmaddr(uint64_option);
2131
0
    } else if (args.TryParseOption("--disassemble", &option)) {
2132
0
      options->mutable_disassemble_function()->assign(std::string(option));
2133
0
    } else if (args.TryParseIntegerOption("-n", &int_option)) {
2134
0
      if (int_option == 0) {
2135
0
        options->set_max_rows_per_level(INT64_MAX);
2136
0
      } else {
2137
0
        options->set_max_rows_per_level(int_option);
2138
0
      }
2139
0
    } else if (args.TryParseOption("--domain", &option)) {
2140
0
      has_domain = true;
2141
0
      if (option == "vm") {
2142
0
        show = output_options->show = ShowDomain::kShowVM;
2143
0
      } else if (option == "file") {
2144
0
        show = output_options->show = ShowDomain::kShowFile;
2145
0
      } else if (option == "both") {
2146
0
        show = output_options->show = ShowDomain::kShowBoth;
2147
0
      } else {
2148
0
        THROWF("unknown value for --domain: $0", option);
2149
0
      }
2150
0
    } else if (args.TryParseOption("-s", &option)) {
2151
0
      if (option == "vm") {
2152
0
        options->set_sort_by(Options::SORTBY_VMSIZE);
2153
0
      } else if (option == "file") {
2154
0
        options->set_sort_by(Options::SORTBY_FILESIZE);
2155
0
      } else if (option == "both") {
2156
0
        options->set_sort_by(Options::SORTBY_BOTH);
2157
0
      } else {
2158
0
        THROWF("unknown value for -s: $0", option);
2159
0
      }
2160
0
    } else if (args.TryParseOption("--source-filter", &option)) {
2161
0
      options->set_source_filter(std::string(option));
2162
0
    } else if (args.TryParseFlag("-v")) {
2163
0
      options->set_verbose_level(1);
2164
0
    } else if (args.TryParseFlag("-vv")) {
2165
0
      options->set_verbose_level(2);
2166
0
    } else if (args.TryParseFlag("-vvv")) {
2167
0
      options->set_verbose_level(3);
2168
0
    } else if (args.TryParseFlag("-w")) {
2169
0
      output_options->max_label_len = SIZE_MAX;
2170
0
    } else if (args.TryParseFlag("--list-sources")) {
2171
0
      for (const auto& source : data_sources) {
2172
0
        fprintf(stderr, "%s %s\n", FixedWidthString(source.name, 15).c_str(),
2173
0
                source.description);
2174
0
      }
2175
0
      return false;
2176
0
    } else if (args.TryParseFlag("--help")) {
2177
0
      puts(usage);
2178
0
      return false;
2179
0
    } else if (args.TryParseFlag("--version")) {
2180
0
      printf("Bloaty McBloatface 1.1\n");
2181
0
      exit(0);
2182
0
    } else if (absl::StartsWith(args.Arg(), "-")) {
2183
0
      if (skip_unknown) {
2184
0
        args.ConsumeAndSaveArg();
2185
0
      } else {
2186
0
        THROWF("Unknown option: $0", args.Arg());
2187
0
      }
2188
0
    } else {
2189
0
      if (saw_separator) {
2190
0
        output_options->showAllSizesCSV = true;
2191
0
        options->add_base_filename(std::string(args.ConsumeArg()));
2192
0
      } else {
2193
0
        options->add_filename(std::string(args.ConsumeArg()));
2194
0
      }
2195
0
    }
2196
0
  }
2197
2198
0
  if (options->data_source_size() == 0 &&
2199
0
      !options->has_disassemble_function()) {
2200
    // Default when no sources are specified.
2201
0
    options->add_data_source("sections");
2202
0
  }
2203
2204
0
  if (has_domain && !options->has_sort_by()) {
2205
    // Default to sorting by what we are showing.
2206
0
    switch (output_options->show) {
2207
0
      case ShowDomain::kShowFile:
2208
0
        options->set_sort_by(Options::SORTBY_FILESIZE);
2209
0
        break;
2210
0
      case ShowDomain::kShowVM:
2211
0
        options->set_sort_by(Options::SORTBY_VMSIZE);
2212
0
        break;
2213
0
      case ShowDomain::kShowBoth:
2214
0
        options->set_sort_by(Options::SORTBY_BOTH);
2215
0
        break;
2216
0
    }
2217
0
  }
2218
2219
0
  return true;
2220
0
}
2221
2222
bool ParseOptions(bool skip_unknown, int* argc, char** argv[], Options* options,
2223
0
                  OutputOptions* output_options, std::string* error) {
2224
0
  try {
2225
0
    return DoParseOptions(skip_unknown, argc, argv, options, output_options);
2226
0
  } catch (const bloaty::Error& e) {
2227
0
    error->assign(e.what());
2228
0
    return false;
2229
0
  }
2230
0
}
2231
2232
void BloatyDoMain(const Options& options, const InputFileFactory& file_factory,
2233
1.00M
                  RollupOutput* output) {
2234
1.00M
  bloaty::Bloaty bloaty(file_factory, options);
2235
2236
1.00M
  if (options.filename_size() == 0) {
2237
0
    THROW("must specify at least one file");
2238
0
  }
2239
2240
1.00M
  if (options.max_rows_per_level() < 1) {
2241
0
    THROW("max_rows_per_level must be at least 1");
2242
0
  }
2243
2244
1.00M
  for (auto& filename : options.filename()) {
2245
1.00M
    bloaty.AddFilename(filename, false);
2246
1.00M
  }
2247
2248
1.00M
  for (auto& base_filename : options.base_filename()) {
2249
0
    bloaty.AddFilename(base_filename, true);
2250
0
  }
2251
2252
1.00M
  for (auto& debug_filename : options.debug_filename()) {
2253
0
    bloaty.AddDebugFilename(debug_filename);
2254
0
  }
2255
2256
1.00M
  for (const auto& custom_data_source : options.custom_data_source()) {
2257
0
    bloaty.DefineCustomDataSource(custom_data_source);
2258
0
  }
2259
2260
1.00M
  for (const auto& data_source : options.data_source()) {
2261
834k
    bloaty.AddDataSource(data_source);
2262
834k
  }
2263
2264
1.00M
  if (options.has_source_filter()) {
2265
0
    ReImpl re(options.source_filter());
2266
0
    if (!re.ok()) {
2267
0
      THROW("invalid regex for source_filter");
2268
0
    }
2269
0
  }
2270
2271
1.00M
  verbose_level = options.verbose_level();
2272
2273
1.00M
  if (options.data_source_size() > 0) {
2274
834k
    bloaty.ScanAndRollup(options, output);
2275
834k
  } else if (options.has_disassemble_function()) {
2276
0
    bloaty.DisassembleFunction(options.disassemble_function(), options, output);
2277
0
  }
2278
1.00M
}
2279
2280
bool BloatyMain(const Options& options, const InputFileFactory& file_factory,
2281
1.00M
                RollupOutput* output, std::string* error) {
2282
1.00M
  try {
2283
1.00M
    BloatyDoMain(options, file_factory, output);
2284
1.00M
    return true;
2285
1.00M
  } catch (const bloaty::Error& e) {
2286
687k
    error->assign(e.what());
2287
687k
    return false;
2288
687k
  }
2289
1.00M
}
2290
2291
}  // namespace bloaty