Coverage Report

Created: 2025-10-28 07:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/brpc/src/butil/third_party/symbolize/symbolize.cc
Line
Count
Source
1
// Copyright (c) 2006, Google Inc.
2
// All rights reserved.
3
//
4
// Redistribution and use in source and binary forms, with or without
5
// modification, are permitted provided that the following conditions are
6
// met:
7
//
8
//     * Redistributions of source code must retain the above copyright
9
// notice, this list of conditions and the following disclaimer.
10
//     * Redistributions in binary form must reproduce the above
11
// copyright notice, this list of conditions and the following disclaimer
12
// in the documentation and/or other materials provided with the
13
// distribution.
14
//     * Neither the name of Google Inc. nor the names of its
15
// contributors may be used to endorse or promote products derived from
16
// this software without specific prior written permission.
17
//
18
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
//
30
// Author: Satoru Takabayashi
31
// Stack-footprint reduction work done by Raksit Ashok
32
//
33
// Implementation note:
34
//
35
// We don't use heaps but only use stacks.  We want to reduce the
36
// stack consumption so that the symbolizer can run on small stacks.
37
//
38
// Here are some numbers collected with GCC 4.1.0 on x86:
39
// - sizeof(Elf32_Sym)  = 16
40
// - sizeof(Elf32_Shdr) = 40
41
// - sizeof(Elf64_Sym)  = 24
42
// - sizeof(Elf64_Shdr) = 64
43
//
44
// This implementation is intended to be async-signal-safe but uses
45
// some functions which are not guaranteed to be so, such as memchr()
46
// and memmove().  We assume they are async-signal-safe.
47
//
48
// Additional header can be specified by the GLOG_BUILD_CONFIG_INCLUDE
49
// macro to add platform specific defines (e.g. OS_OPENBSD).
50
51
#ifdef GLOG_BUILD_CONFIG_INCLUDE
52
#include GLOG_BUILD_CONFIG_INCLUDE
53
#endif  // GLOG_BUILD_CONFIG_INCLUDE
54
55
#include "utilities.h"
56
57
#if defined(HAVE_SYMBOLIZE)
58
59
#include <limits>
60
61
#include "symbolize.h"
62
#include "demangle.h"
63
#include "butil/compiler_specific.h"
64
65
_START_GOOGLE_NAMESPACE_
66
67
// We don't use assert() since it's not guaranteed to be
68
// async-signal-safe.  Instead we define a minimal assertion
69
// macro. So far, we don't need pretty printing for __FILE__, etc.
70
71
// A wrapper for abort() to make it callable in ? :.
72
0
static int AssertFail() {
73
0
  abort();
74
0
  return 0;  // Should not reach.
75
0
}
76
77
2.14M
#define SAFE_ASSERT(expr) ((expr) ? 0 : AssertFail())
78
79
// NOTE(gejun): Mark as weak symbol to avoid conflict with same functions in 
80
// glog, same reason applies to other functions marked weak in this file.
81
static SymbolizeCallback g_symbolize_callback = NULL;
82
0
void BAIDU_WEAK InstallSymbolizeCallback(SymbolizeCallback callback) {
83
0
  g_symbolize_callback = callback;
84
0
}
85
86
static SymbolizeOpenObjectFileCallback g_symbolize_open_object_file_callback =
87
    NULL;
88
void BAIDU_WEAK InstallSymbolizeOpenObjectFileCallback(
89
0
    SymbolizeOpenObjectFileCallback callback) {
90
0
  g_symbolize_open_object_file_callback = callback;
91
0
}
92
93
// This function wraps the Demangle function to provide an interface
94
// where the input symbol is demangled in-place.
95
// To keep stack consumption low, we would like this function to not
96
// get inlined.
97
2.89k
static ATTRIBUTE_NOINLINE void DemangleInplace(char *out, int out_size) {
98
2.89k
  char demangled[256];  // Big enough for sane demangled symbols.
99
2.89k
  if (Demangle(out, demangled, sizeof(demangled))) {
100
    // Demangling succeeded. Copy to out if the space allows.
101
2.22k
    size_t len = strlen(demangled);
102
2.22k
    if (len + 1 <= (size_t)out_size) {  // +1 for '\0'.
103
2.22k
      SAFE_ASSERT(len < sizeof(demangled));
104
2.22k
      memmove(out, demangled, len + 1);
105
2.22k
    }
106
2.22k
  }
107
2.89k
}
108
109
_END_GOOGLE_NAMESPACE_
110
111
#if defined(__ELF__)
112
113
#include <dlfcn.h>
114
#if defined(OS_OPENBSD)
115
#include <sys/exec_elf.h>
116
#else
117
#include <elf.h>
118
#endif
119
#include <errno.h>
120
#include <fcntl.h>
121
#include <limits.h>
122
#include <stdint.h>
123
#include <stdio.h>
124
#include <stdlib.h>
125
#include <stddef.h>
126
#include <string.h>
127
#include <sys/stat.h>
128
#include <sys/types.h>
129
#include <unistd.h>
130
131
#include "symbolize.h"
132
#include "config.h"
133
#include "glog/raw_logging.h"
134
135
// Re-runs fn until it doesn't cause EINTR.
136
440k
#define NO_INTR(fn)   do {} while ((fn) < 0 && errno == EINTR)
137
138
_START_GOOGLE_NAMESPACE_
139
140
// Read up to "count" bytes from file descriptor "fd" into the buffer
141
// starting at "buf" while handling short reads and EINTR.  On
142
// success, return the number of bytes read.  Otherwise, return -1.
143
428k
static ssize_t ReadPersistent(const int fd, void *buf, const size_t count) {
144
428k
  SAFE_ASSERT(fd >= 0);
145
428k
  SAFE_ASSERT(count <= (size_t)std::numeric_limits<ssize_t>::max());
146
428k
  char *buf0 = reinterpret_cast<char *>(buf);
147
428k
  ssize_t num_bytes = 0;
148
856k
  while ((size_t)num_bytes < count) {
149
428k
    ssize_t len;
150
428k
    NO_INTR(len = read(fd, buf0 + num_bytes, count - num_bytes));
151
428k
    if (len < 0) {  // There was an error other than EINTR.
152
0
      return -1;
153
0
    }
154
428k
    if (len == 0) {  // Reached EOF.
155
0
      break;
156
0
    }
157
428k
    num_bytes += len;
158
428k
  }
159
428k
  SAFE_ASSERT((size_t)num_bytes <= count);
160
428k
  return num_bytes;
161
428k
}
162
163
// Read up to "count" bytes from "offset" in the file pointed by file
164
// descriptor "fd" into the buffer starting at "buf".  On success,
165
// return the number of bytes read.  Otherwise, return -1.
166
static ssize_t ReadFromOffset(const int fd, void *buf,
167
425k
                              const size_t count, const off_t offset) {
168
425k
  off_t off = lseek(fd, offset, SEEK_SET);
169
425k
  if (off == (off_t)-1) {
170
0
    return -1;
171
0
  }
172
425k
  return ReadPersistent(fd, buf, count);
173
425k
}
174
175
// Try reading exactly "count" bytes from "offset" bytes in a file
176
// pointed by "fd" into the buffer starting at "buf" while handling
177
// short reads and EINTR.  On success, return true. Otherwise, return
178
// false.
179
static bool ReadFromOffsetExact(const int fd, void *buf,
180
8.96k
                                const size_t count, const off_t offset) {
181
8.96k
  ssize_t len = ReadFromOffset(fd, buf, count, offset);
182
8.96k
  return len == (ssize_t)count;
183
8.96k
}
184
185
// Returns elf_header.e_type if the file pointed by fd is an ELF binary.
186
2.98k
static int FileGetElfType(const int fd) {
187
2.98k
  ElfW(Ehdr) elf_header;
188
2.98k
  if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
189
0
    return -1;
190
0
  }
191
2.98k
  if (memcmp(elf_header.e_ident, ELFMAG, SELFMAG) != 0) {
192
0
    return -1;
193
0
  }
194
2.98k
  return elf_header.e_type;
195
2.98k
}
196
197
// Read the section headers in the given ELF binary, and if a section
198
// of the specified type is found, set the output to this section header
199
// and return true.  Otherwise, return false.
200
// To keep stack consumption low, we would like this function to not get
201
// inlined.
202
static ATTRIBUTE_NOINLINE bool
203
GetSectionHeaderByType(const int fd, ElfW(Half) sh_num, const off_t sh_offset,
204
3.08k
                       ElfW(Word) type, ElfW(Shdr) *out) {
205
  // Read at most 16 section headers at a time to save read calls.
206
3.08k
  ElfW(Shdr) buf[16];
207
9.33k
  for (int i = 0; i < sh_num;) {
208
9.24k
    const ssize_t num_bytes_left = (sh_num - i) * sizeof(buf[0]);
209
9.24k
    const ssize_t num_bytes_to_read =
210
9.24k
        ((ssize_t)sizeof(buf) > num_bytes_left) ? num_bytes_left : sizeof(buf);
211
9.24k
    const ssize_t len = ReadFromOffset(fd, buf, num_bytes_to_read,
212
9.24k
                                       sh_offset + i * sizeof(buf[0]));
213
9.24k
    SAFE_ASSERT(len % sizeof(buf[0]) == 0);
214
9.24k
    const ssize_t num_headers_in_buf = len / sizeof(buf[0]);
215
9.24k
    SAFE_ASSERT((size_t)num_headers_in_buf <= sizeof(buf) / sizeof(buf[0]));
216
152k
    for (int j = 0; j < num_headers_in_buf; ++j) {
217
145k
      if (buf[j].sh_type == type) {
218
2.98k
        *out = buf[j];
219
2.98k
        return true;
220
2.98k
      }
221
145k
    }
222
6.25k
    i += num_headers_in_buf;
223
6.25k
  }
224
93
  return false;
225
3.08k
}
226
227
// There is no particular reason to limit section name to 63 characters,
228
// but there has (as yet) been no need for anything longer either.
229
const int kMaxSectionNameLen = 64;
230
231
// name_len should include terminating '\0'.
232
bool BAIDU_WEAK GetSectionHeaderByName(int fd, const char *name, size_t name_len,
233
0
                            ElfW(Shdr) *out) {
234
0
  ElfW(Ehdr) elf_header;
235
0
  if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
236
0
    return false;
237
0
  }
238
239
0
  ElfW(Shdr) shstrtab;
240
0
  off_t shstrtab_offset = (elf_header.e_shoff +
241
0
                           elf_header.e_shentsize * elf_header.e_shstrndx);
242
0
  if (!ReadFromOffsetExact(fd, &shstrtab, sizeof(shstrtab), shstrtab_offset)) {
243
0
    return false;
244
0
  }
245
246
0
  for (int i = 0; i < elf_header.e_shnum; ++i) {
247
0
    off_t section_header_offset = (elf_header.e_shoff +
248
0
                                   elf_header.e_shentsize * i);
249
0
    if (!ReadFromOffsetExact(fd, out, sizeof(*out), section_header_offset)) {
250
0
      return false;
251
0
    }
252
0
    char header_name[kMaxSectionNameLen];
253
0
    if (sizeof(header_name) < name_len) {
254
0
      RAW_LOG(WARNING, "Section name '%s' is too long (%" PRIuS "); "
255
0
              "section will not be found (even if present).", name, name_len);
256
      // No point in even trying.
257
0
      return false;
258
0
    }
259
0
    off_t name_offset = shstrtab.sh_offset + out->sh_name;
260
0
    ssize_t n_read = ReadFromOffset(fd, &header_name, name_len, name_offset);
261
0
    if (n_read == -1) {
262
0
      return false;
263
0
    } else if ((size_t)n_read != name_len) {
264
      // Short read -- name could be at end of file.
265
0
      continue;
266
0
    }
267
0
    if (memcmp(header_name, name, name_len) == 0) {
268
0
      return true;
269
0
    }
270
0
  }
271
0
  return false;
272
0
}
273
274
// Read a symbol table and look for the symbol containing the
275
// pc. Iterate over symbols in a symbol table and look for the symbol
276
// containing "pc".  On success, return true and write the symbol name
277
// to out.  Otherwise, return false.
278
// To keep stack consumption low, we would like this function to not get
279
// inlined.
280
static ATTRIBUTE_NOINLINE bool
281
FindSymbol(uint64_t pc, const int fd, char *out, int out_size,
282
           uint64_t *out_saddr, uint64_t symbol_offset,
283
2.98k
           const ElfW(Shdr) *strtab, const ElfW(Shdr) *symtab) {
284
2.98k
  if (symtab == NULL) {
285
0
    return false;
286
0
  }
287
2.98k
  const int num_symbols = symtab->sh_size / symtab->sh_entsize;
288
404k
  for (int i = 0; i < num_symbols;) {
289
404k
    off_t offset = symtab->sh_offset + i * symtab->sh_entsize;
290
291
    // If we are reading Elf64_Sym's, we want to limit this array to
292
    // 32 elements (to keep stack consumption low), otherwise we can
293
    // have a 64 element Elf32_Sym array.
294
404k
#if __WORDSIZE == 64
295
404k
#define NUM_SYMBOLS 32
296
#else
297
#define NUM_SYMBOLS 64
298
#endif
299
300
    // Read at most NUM_SYMBOLS symbols at once to save read() calls.
301
404k
    ElfW(Sym) buf[NUM_SYMBOLS];
302
404k
    const ssize_t len = ReadFromOffset(fd, &buf, sizeof(buf), offset);
303
404k
    SAFE_ASSERT(len % sizeof(buf[0]) == 0);
304
404k
    const ssize_t num_symbols_in_buf = len / sizeof(buf[0]);
305
404k
    SAFE_ASSERT((size_t)num_symbols_in_buf <= sizeof(buf)/sizeof(buf[0]));
306
13.2M
    for (int j = 0; j < num_symbols_in_buf; ++j) {
307
12.8M
      const ElfW(Sym)& symbol = buf[j];
308
12.8M
      uint64_t start_address = symbol.st_value;
309
12.8M
      start_address += symbol_offset;
310
12.8M
      uint64_t end_address = start_address + symbol.st_size;
311
12.8M
      if (symbol.st_value != 0 &&  // Skip null value symbols.
312
12.5M
          symbol.st_shndx != 0 &&  // Skip undefined symbols.
313
12.5M
          start_address <= pc && pc < end_address) {
314
2.89k
        if (NULL != out) {
315
2.89k
          ssize_t len1 = ReadFromOffset(
316
2.89k
              fd, out, out_size, strtab->sh_offset + symbol.st_name);
317
2.89k
          if (len1 <= 0 || memchr(out, '\0', out_size) == NULL) {
318
0
            return false;
319
0
          }
320
2.89k
        }
321
2.89k
        if (NULL != out_saddr) {
322
0
          *out_saddr = start_address;
323
0
        }
324
2.89k
        return true;  // Obtained the symbol name.
325
2.89k
      }
326
12.8M
    }
327
401k
    i += num_symbols_in_buf;
328
401k
  }
329
95
  return false;
330
2.98k
}
331
332
// Get the symbol name of "pc" from the file pointed by "fd".  Process
333
// both regular and dynamic symbol tables if necessary.  On success,
334
// write the symbol name to "out" and return true.  Otherwise, return
335
// false.
336
static bool GetSymbolFromObjectFile(const int fd, uint64_t pc,
337
                                    char *out, int out_size,
338
                                    uint64_t *out_saddr,
339
2.98k
                                    uint64_t map_start_address) {
340
  // Read the ELF header.
341
2.98k
  ElfW(Ehdr) elf_header;
342
2.98k
  if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
343
0
    return false;
344
0
  }
345
346
2.98k
  uint64_t symbol_offset = 0;
347
2.98k
  if (elf_header.e_type == ET_DYN) {  // DSO needs offset adjustment.
348
2.98k
    symbol_offset = map_start_address;
349
2.98k
  }
350
351
2.98k
  ElfW(Shdr) symtab, strtab;
352
353
  // Consult a regular symbol table first.
354
2.98k
  if (GetSectionHeaderByType(fd, elf_header.e_shnum, elf_header.e_shoff,
355
2.98k
                             SHT_SYMTAB, &symtab)) {
356
2.89k
    if (!ReadFromOffsetExact(fd, &strtab, sizeof(strtab), elf_header.e_shoff +
357
2.89k
                             symtab.sh_link * sizeof(symtab))) {
358
0
      return false;
359
0
    }
360
2.89k
    if (FindSymbol(pc, fd, out, out_size, out_saddr,
361
2.89k
                   symbol_offset, &strtab, &symtab)) {
362
2.89k
      return true;  // Found the symbol in a regular symbol table.
363
2.89k
    }
364
2.89k
  }
365
366
  // If the symbol is not found, then consult a dynamic symbol table.
367
94
  if (GetSectionHeaderByType(fd, elf_header.e_shnum, elf_header.e_shoff,
368
94
                             SHT_DYNSYM, &symtab)) {
369
94
    if (!ReadFromOffsetExact(fd, &strtab, sizeof(strtab), elf_header.e_shoff +
370
94
                             symtab.sh_link * sizeof(symtab))) {
371
0
      return false;
372
0
    }
373
94
    if (FindSymbol(pc, fd, out, out_size, out_saddr,
374
94
                   symbol_offset, &strtab, &symtab)) {
375
0
      return true;  // Found the symbol in a dynamic symbol table.
376
0
    }
377
94
  }
378
379
94
  return false;
380
94
}
381
382
namespace {
383
// Thin wrapper around a file descriptor so that the file descriptor
384
// gets closed for sure.
385
struct FileDescriptor {
386
  const int fd_;
387
5.97k
  explicit FileDescriptor(int fd) : fd_(fd) {}
388
5.97k
  ~FileDescriptor() {
389
5.97k
    if (fd_ >= 0) {
390
5.97k
      NO_INTR(close(fd_));
391
5.97k
    }
392
5.97k
  }
393
11.9k
  int get() { return fd_; }
394
395
 private:
396
  explicit FileDescriptor(const FileDescriptor&);
397
  void operator=(const FileDescriptor&);
398
};
399
400
// Helper class for reading lines from file.
401
//
402
// Note: we don't use ProcMapsIterator since the object is big (it has
403
// a 5k array member) and uses async-unsafe functions such as sscanf()
404
// and snprintf().
405
class LineReader {
406
 public:
407
2.98k
  explicit LineReader(int fd, char *buf, int buf_len) : fd_(fd),
408
2.98k
    buf_(buf), buf_len_(buf_len), bol_(buf), eol_(buf), eod_(buf) {
409
2.98k
  }
410
411
  // Read '\n'-terminated line from file.  On success, modify "bol"
412
  // and "eol", then return true.  Otherwise, return false.
413
  //
414
  // Note: if the last line doesn't end with '\n', the line will be
415
  // dropped.  It's an intentional behavior to make the code simple.
416
7.27k
  bool ReadLine(const char **bol, const char **eol) {
417
7.27k
    if (BufferIsEmpty()) {  // First time.
418
2.98k
      const ssize_t num_bytes = ReadPersistent(fd_, buf_, buf_len_);
419
2.98k
      if (num_bytes <= 0) {  // EOF or error.
420
0
        return false;
421
0
      }
422
2.98k
      eod_ = buf_ + num_bytes;
423
2.98k
      bol_ = buf_;
424
4.29k
    } else {
425
4.29k
      bol_ = eol_ + 1;  // Advance to the next line in the buffer.
426
4.29k
      SAFE_ASSERT(bol_ <= eod_);  // "bol_" can point to "eod_".
427
4.29k
      if (!HasCompleteLine()) {
428
93
        const int incomplete_line_length = eod_ - bol_;
429
        // Move the trailing incomplete line to the beginning.
430
93
        memmove(buf_, bol_, incomplete_line_length);
431
        // Read text from file and append it.
432
93
        char * const append_pos = buf_ + incomplete_line_length;
433
93
        const int capacity_left = buf_len_ - incomplete_line_length;
434
93
        const ssize_t num_bytes = ReadPersistent(fd_, append_pos,
435
93
                                                 capacity_left);
436
93
        if (num_bytes <= 0) {  // EOF or error.
437
0
          return false;
438
0
        }
439
93
        eod_ = append_pos + num_bytes;
440
93
        bol_ = buf_;
441
93
      }
442
4.29k
    }
443
7.27k
    eol_ = FindLineFeed();
444
7.27k
    if (eol_ == NULL) {  // '\n' not found.  Malformed line.
445
0
      return false;
446
0
    }
447
7.27k
    *eol_ = '\0';  // Replace '\n' with '\0'.
448
449
7.27k
    *bol = bol_;
450
7.27k
    *eol = eol_;
451
7.27k
    return true;
452
7.27k
  }
453
454
  // Beginning of line.
455
0
  const char *bol() {
456
0
    return bol_;
457
0
  }
458
459
  // End of line.
460
0
  const char *eol() {
461
0
    return eol_;
462
0
  }
463
464
 private:
465
  explicit LineReader(const LineReader&);
466
  void operator=(const LineReader&);
467
468
11.5k
  char *FindLineFeed() {
469
11.5k
    return reinterpret_cast<char *>(memchr(bol_, '\n', eod_ - bol_));
470
11.5k
  }
471
472
11.5k
  bool BufferIsEmpty() {
473
11.5k
    return buf_ == eod_;
474
11.5k
  }
475
476
4.29k
  bool HasCompleteLine() {
477
4.29k
    return !BufferIsEmpty() && FindLineFeed() != NULL;
478
4.29k
  }
479
480
  const int fd_;
481
  char * const buf_;
482
  const int buf_len_;
483
  char *bol_;
484
  char *eol_;
485
  const char *eod_;  // End of data in "buf_".
486
};
487
}  // namespace
488
489
// Place the hex number read from "start" into "*hex".  The pointer to
490
// the first non-hex character or "end" is returned.
491
17.5k
static char *GetHex(const char *start, const char *end, uint64_t *hex) {
492
17.5k
  *hex = 0;
493
17.5k
  const char *p;
494
216k
  for (p = start; p < end; ++p) {
495
216k
    int ch = *p;
496
216k
    if ((ch >= '0' && ch <= '9') ||
497
198k
        (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f')) {
498
198k
      *hex = (*hex << 4) | (ch < 'A' ? ch - '0' : (ch & 0xF) + 9);
499
198k
    } else {  // Encountered the first non-hex character.
500
17.5k
      break;
501
17.5k
    }
502
216k
  }
503
17.5k
  SAFE_ASSERT(p <= end);
504
17.5k
  return const_cast<char *>(p);
505
17.5k
}
506
507
// Searches for the object file (from /proc/self/maps) that contains
508
// the specified pc.  If found, sets |start_address| to the start address
509
// of where this object file is mapped in memory, sets the module base
510
// address into |base_address|, copies the object file name into
511
// |out_file_name|, and attempts to open the object file.  If the object
512
// file is opened successfully, returns the file descriptor.  Otherwise,
513
// returns -1.  |out_file_name_size| is the size of the file name buffer
514
// (including the null-terminator).
515
static ATTRIBUTE_NOINLINE int
516
OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
517
                                             uint64_t &start_address,
518
                                             uint64_t &base_address,
519
                                             char *out_file_name,
520
2.98k
                                             int out_file_name_size) {
521
2.98k
  int object_fd;
522
523
  // Open /proc/self/maps.
524
2.98k
  int maps_fd;
525
2.98k
  NO_INTR(maps_fd = open("/proc/self/maps", O_RDONLY));
526
2.98k
  FileDescriptor wrapped_maps_fd(maps_fd);
527
2.98k
  if (wrapped_maps_fd.get() < 0) {
528
0
    return -1;
529
0
  }
530
531
  // Iterate over maps and look for the map containing the pc.  Then
532
  // look into the symbol tables inside.
533
2.98k
  char buf[1024];  // Big enough for line of sane /proc/self/maps
534
2.98k
  int num_maps = 0;
535
2.98k
  LineReader reader(wrapped_maps_fd.get(), buf, sizeof(buf));
536
7.27k
  while (true) {
537
7.27k
    num_maps++;
538
7.27k
    const char *cursor;
539
7.27k
    const char *eol;
540
7.27k
    if (!reader.ReadLine(&cursor, &eol)) {  // EOF or malformed line.
541
0
      return -1;
542
0
    }
543
544
    // Start parsing line in /proc/self/maps.  Here is an example:
545
    //
546
    // 08048000-0804c000 r-xp 00000000 08:01 2142121    /bin/cat
547
    //
548
    // We want start address (08048000), end address (0804c000), flags
549
    // (r-xp) and file name (/bin/cat).
550
551
    // Read start address.
552
7.27k
    cursor = GetHex(cursor, eol, &start_address);
553
7.27k
    if (cursor == eol || *cursor != '-') {
554
0
      return -1;  // Malformed line.
555
0
    }
556
7.27k
    ++cursor;  // Skip '-'.
557
558
    // Read end address.
559
7.27k
    uint64_t end_address;
560
7.27k
    cursor = GetHex(cursor, eol, &end_address);
561
7.27k
    if (cursor == eol || *cursor != ' ') {
562
0
      return -1;  // Malformed line.
563
0
    }
564
7.27k
    ++cursor;  // Skip ' '.
565
566
    // Check start and end addresses.
567
7.27k
    if (!(start_address <= pc && pc < end_address)) {
568
4.29k
      continue;  // We skip this map.  PC isn't in this map.
569
4.29k
    }
570
571
    // Read flags.  Skip flags until we encounter a space or eol.
572
2.98k
    const char * const flags_start = cursor;
573
14.9k
    while (cursor < eol && *cursor != ' ') {
574
11.9k
      ++cursor;
575
11.9k
    }
576
    // We expect at least four letters for flags (ex. "r-xp").
577
2.98k
    if (cursor == eol || cursor < flags_start + 4) {
578
0
      return -1;  // Malformed line.
579
0
    }
580
581
    // Check flags.  We are only interested in "r-x" maps.
582
2.98k
    if (memcmp(flags_start, "r-x", 3) != 0) {  // Not a "r-x" map.
583
0
      continue;  // We skip this map.
584
0
    }
585
2.98k
    ++cursor;  // Skip ' '.
586
587
    // Read file offset.
588
2.98k
    uint64_t file_offset;
589
2.98k
    cursor = GetHex(cursor, eol, &file_offset);
590
2.98k
    if (cursor == eol || *cursor != ' ') {
591
0
      return -1;  // Malformed line.
592
0
    }
593
2.98k
    ++cursor;  // Skip ' '.
594
595
    // Don't subtract 'start_address' from the first entry:
596
    // * If a binary is compiled w/o -pie, then the first entry in
597
    //   process maps is likely the binary itself (all dynamic libs
598
    //   are mapped higher in address space). For such a binary,
599
    //   instruction offset in binary coincides with the actual
600
    //   instruction address in virtual memory (as code section
601
    //   is mapped to a fixed memory range).
602
    // * If a binary is compiled with -pie, all the modules are
603
    //   mapped high at address space (in particular, higher than
604
    //   shadow memory of the tool), so the module can't be the
605
    //   first entry.
606
2.98k
    base_address = ((num_maps == 1) ? 0U : start_address) - file_offset;
607
608
    // Skip to file name.  "cursor" now points to dev.  We need to
609
    // skip at least two spaces for dev and inode.
610
2.98k
    int num_spaces = 0;
611
101k
    while (cursor < eol) {
612
101k
      if (*cursor == ' ') {
613
62.7k
        ++num_spaces;
614
62.7k
      } else if (num_spaces >= 2) {
615
        // The first non-space character after skipping two spaces
616
        // is the beginning of the file name.
617
2.98k
        break;
618
2.98k
      }
619
98.6k
      ++cursor;
620
98.6k
    }
621
2.98k
    if (cursor == eol) {
622
0
      return -1;  // Malformed line.
623
0
    }
624
625
    // Finally, "cursor" now points to file name of our interest.
626
2.98k
    NO_INTR(object_fd = open(cursor, O_RDONLY));
627
2.98k
    if (object_fd < 0) {
628
      // Failed to open object file.  Copy the object file name to
629
      // |out_file_name|.
630
0
      strncpy(out_file_name, cursor, out_file_name_size);
631
      // Making sure |out_file_name| is always null-terminated.
632
0
      out_file_name[out_file_name_size - 1] = '\0';
633
0
      return -1;
634
0
    }
635
2.98k
    return object_fd;
636
2.98k
  }
637
2.98k
}
638
639
// POSIX doesn't define any async-signal safe function for converting
640
// an integer to ASCII. We'll have to define our own version.
641
// itoa_r() converts a (signed) integer to ASCII. It returns "buf", if the
642
// conversion was successful or NULL otherwise. It never writes more than "sz"
643
// bytes. Output will be truncated as needed, and a NUL character is always
644
// appended.
645
// NOTE: code from sandbox/linux/seccomp-bpf/demo.cc.
646
0
char *itoa_r(intptr_t i, char *buf, size_t sz, int base, size_t padding) {
647
  // Make sure we can write at least one NUL byte.
648
0
  size_t n = 1;
649
0
  if (n > sz)
650
0
    return NULL;
651
652
0
  if (base < 2 || base > 16) {
653
0
    buf[0] = '\000';
654
0
    return NULL;
655
0
  }
656
657
0
  char *start = buf;
658
659
0
  uintptr_t j = i;
660
661
  // Handle negative numbers (only for base 10).
662
0
  if (i < 0 && base == 10) {
663
0
    j = -i;
664
665
    // Make sure we can write the '-' character.
666
0
    if (++n > sz) {
667
0
      buf[0] = '\000';
668
0
      return NULL;
669
0
    }
670
0
    *start++ = '-';
671
0
  }
672
673
  // Loop until we have converted the entire number. Output at least one
674
  // character (i.e. '0').
675
0
  char *ptr = start;
676
0
  do {
677
    // Make sure there is still enough space left in our output buffer.
678
0
    if (++n > sz) {
679
0
      buf[0] = '\000';
680
0
      return NULL;
681
0
    }
682
683
    // Output the next digit.
684
0
    *ptr++ = "0123456789abcdef"[j % base];
685
0
    j /= base;
686
687
0
    if (padding > 0)
688
0
      padding--;
689
0
  } while (j > 0 || padding > 0);
690
691
  // Terminate the output with a NUL character.
692
0
  *ptr = '\000';
693
694
  // Conversion to ASCII actually resulted in the digits being in reverse
695
  // order. We can't easily generate them in forward order, as we can't tell
696
  // the number of characters needed until we are done converting.
697
  // So, now, we reverse the string (except for the possible "-" sign).
698
0
  while (--ptr > start) {
699
0
    char ch = *ptr;
700
0
    *ptr = *start;
701
0
    *start++ = ch;
702
0
  }
703
0
  return buf;
704
0
}
705
706
// Safely appends string |source| to string |dest|.  Never writes past the
707
// buffer size |dest_size| and guarantees that |dest| is null-terminated.
708
2.98k
void SafeAppendString(const char* source, char* dest, int dest_size) {
709
2.98k
  int dest_string_length = strlen(dest);
710
2.98k
  SAFE_ASSERT(dest_string_length < dest_size);
711
2.98k
  dest += dest_string_length;
712
2.98k
  dest_size -= dest_string_length;
713
2.98k
  strncpy(dest, source, dest_size);
714
  // Making sure |dest| is always null-terminated.
715
2.98k
  dest[dest_size - 1] = '\0';
716
2.98k
}
717
718
// Converts a 64-bit value into a hex string, and safely appends it to |dest|.
719
// Never writes past the buffer size |dest_size| and guarantees that |dest| is
720
// null-terminated.
721
0
void SafeAppendHexNumber(uint64_t value, char* dest, int dest_size) {
722
  // 64-bit numbers in hex can have up to 16 digits.
723
0
  char buf[17] = {'\0'};
724
0
  SafeAppendString(itoa_r(value, buf, sizeof(buf), 16, 0), dest, dest_size);
725
0
}
726
727
// The implementation of our symbolization routine.  If it
728
// successfully finds the symbol containing "pc" and obtains the
729
// symbol name, returns true and write the symbol name to "out".
730
// Otherwise, returns false. If Callback function is installed via
731
// InstallSymbolizeCallback(), the function is also called in this function,
732
// and "out" is used as its output.
733
// To keep stack consumption low, we would like this function to not
734
// get inlined.
735
static ATTRIBUTE_NOINLINE bool SymbolizeAndDemangle(void *pc, char *out,
736
                                                    int out_size,
737
2.98k
                                                    uint64_t *out_saddr) {
738
2.98k
  uint64_t pc0 = reinterpret_cast<uintptr_t>(pc);
739
2.98k
  uint64_t start_address = 0;
740
2.98k
  uint64_t base_address = 0;
741
2.98k
  int object_fd = -1;
742
743
2.98k
  if ((NULL == out || out_size < 1) &&
744
2.98k
      NULL == out_saddr) {
745
0
    return false;
746
0
  }
747
2.98k
  if (NULL != out) {
748
2.98k
    out[0] = '\0';
749
2.98k
    SafeAppendString("(", out, out_size);
750
2.98k
  }
751
752
2.98k
  if (g_symbolize_open_object_file_callback) {
753
0
    object_fd = g_symbolize_open_object_file_callback(pc0, start_address,
754
0
                                                      base_address, out + 1,
755
0
                                                      out_size - 1);
756
2.98k
  } else {
757
2.98k
    object_fd = OpenObjectFileContainingPcAndGetStartAddress(pc0, start_address,
758
2.98k
                                                             base_address,
759
2.98k
                                                             out + 1,
760
2.98k
                                                             out_size - 1);
761
2.98k
  }
762
763
  // Check whether a file name was returned.
764
2.98k
  if (object_fd < 0) {
765
0
    if (NULL != out && out[1] && NULL == out_saddr) {
766
      // The object file containing PC was determined successfully however the
767
      // object file was not opened successfully.  This is still considered
768
      // success because the object file name and offset are known and tools
769
      // like asan_symbolize.py can be used for the symbolization.
770
0
      out[out_size - 1] = '\0';  // Making sure |out| is always null-terminated.
771
0
      SafeAppendString("+0x", out, out_size);
772
0
      SafeAppendHexNumber(pc0 - base_address, out, out_size);
773
0
      SafeAppendString(")", out, out_size);
774
0
      return true;
775
0
    }
776
    // Failed to determine the object file containing PC.  Bail out.
777
0
    return false;
778
0
  }
779
2.98k
  FileDescriptor wrapped_object_fd(object_fd);
780
2.98k
  int elf_type = FileGetElfType(wrapped_object_fd.get());
781
2.98k
  if (elf_type == -1) {
782
0
    return false;
783
0
  }
784
2.98k
  if (g_symbolize_callback) {
785
    // Run the call back if it's installed.
786
    // Note: relocation (and much of the rest of this code) will be
787
    // wrong for prelinked shared libraries and PIE executables.
788
0
    uint64_t relocation = (elf_type == ET_DYN) ? start_address : 0;
789
0
    int num_bytes_written = g_symbolize_callback(wrapped_object_fd.get(),
790
0
                                                 pc, out, out_size,
791
0
                                                 relocation);
792
0
    if (num_bytes_written > 0) {
793
0
      out += num_bytes_written;
794
0
      out_size -= num_bytes_written;
795
0
    }
796
0
  }
797
2.98k
  if (!GetSymbolFromObjectFile(wrapped_object_fd.get(), pc0,
798
2.98k
                               out, out_size, out_saddr,
799
2.98k
                               start_address)) {
800
94
    return false;
801
94
  }
802
803
2.89k
  if (NULL != out) {
804
    // Symbolization succeeded.  Now we try to demangle the symbol.
805
2.89k
    DemangleInplace(out, out_size);
806
2.89k
  }
807
2.89k
  return true;
808
2.98k
}
809
810
_END_GOOGLE_NAMESPACE_
811
812
#elif defined(OS_MACOSX) && defined(HAVE_DLADDR)
813
814
#include <dlfcn.h>
815
#include <string.h>
816
817
_START_GOOGLE_NAMESPACE_
818
819
static ATTRIBUTE_NOINLINE bool SymbolizeAndDemangle(void *pc, char *out,
820
                                                    int out_size,
821
                                                    uint64_t *out_saddr) {
822
  Dl_info info{};
823
  if (0 == dladdr(pc, &info)) {
824
    return false;
825
  }
826
  if (NULL != out) {
827
    if ((int)strlen(info.dli_sname) >= out_size) {
828
      return false;
829
    }
830
    strcpy(out, info.dli_sname);
831
    // Symbolization succeeded.  Now we try to demangle the symbol.
832
    DemangleInplace(out, out_size);
833
  }
834
  if (NULL != out_saddr) {
835
    *out_saddr = (uint64_t)info.dli_saddr;
836
  }
837
  return true;
838
}
839
840
_END_GOOGLE_NAMESPACE_
841
842
#else
843
# error BUG: HAVE_SYMBOLIZE was wrongly set
844
#endif
845
846
_START_GOOGLE_NAMESPACE_
847
848
2.98k
bool BAIDU_WEAK Symbolize(void *pc, char *out, int out_size) {
849
2.98k
  SAFE_ASSERT(out_size >= 0);
850
2.98k
  return SymbolizeAndDemangle(pc, out, out_size, NULL);
851
2.98k
}
852
853
0
bool BAIDU_WEAK SymbolizeAddress(void *pc, uint64_t *out) {
854
0
  SAFE_ASSERT(NULL != out);
855
  return SymbolizeAndDemangle(pc, NULL, 0, out);
856
0
}
857
858
_END_GOOGLE_NAMESPACE_
859
860
#else  /* HAVE_SYMBOLIZE */
861
862
#include <assert.h>
863
864
#include "config.h"
865
866
_START_GOOGLE_NAMESPACE_
867
868
// TODO: Support other environments.
869
bool BAIDU_WEAK Symbolize(void *pc, char *out, int out_size) {
870
  assert(0);
871
  return false;
872
}
873
874
bool BAIDU_WEAK SymbolizeAddress(void *pc, uint64_t *out) {
875
  assert(0);
876
  return false;
877
}
878
879
_END_GOOGLE_NAMESPACE_
880
881
#endif