/src/brpc/src/butil/third_party/symbolize/symbolize.cc
Line | Count | Source |
1 | | // Copyright (c) 2006, Google Inc. |
2 | | // All rights reserved. |
3 | | // |
4 | | // Redistribution and use in source and binary forms, with or without |
5 | | // modification, are permitted provided that the following conditions are |
6 | | // met: |
7 | | // |
8 | | // * Redistributions of source code must retain the above copyright |
9 | | // notice, this list of conditions and the following disclaimer. |
10 | | // * Redistributions in binary form must reproduce the above |
11 | | // copyright notice, this list of conditions and the following disclaimer |
12 | | // in the documentation and/or other materials provided with the |
13 | | // distribution. |
14 | | // * Neither the name of Google Inc. nor the names of its |
15 | | // contributors may be used to endorse or promote products derived from |
16 | | // this software without specific prior written permission. |
17 | | // |
18 | | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
19 | | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
20 | | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
21 | | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
22 | | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
23 | | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
24 | | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
25 | | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
26 | | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 | | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 | | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | | // |
30 | | // Author: Satoru Takabayashi |
31 | | // Stack-footprint reduction work done by Raksit Ashok |
32 | | // |
33 | | // Implementation note: |
34 | | // |
35 | | // We don't use heaps but only use stacks. We want to reduce the |
36 | | // stack consumption so that the symbolizer can run on small stacks. |
37 | | // |
38 | | // Here are some numbers collected with GCC 4.1.0 on x86: |
39 | | // - sizeof(Elf32_Sym) = 16 |
40 | | // - sizeof(Elf32_Shdr) = 40 |
41 | | // - sizeof(Elf64_Sym) = 24 |
42 | | // - sizeof(Elf64_Shdr) = 64 |
43 | | // |
44 | | // This implementation is intended to be async-signal-safe but uses |
45 | | // some functions which are not guaranteed to be so, such as memchr() |
46 | | // and memmove(). We assume they are async-signal-safe. |
47 | | // |
48 | | // Additional header can be specified by the GLOG_BUILD_CONFIG_INCLUDE |
49 | | // macro to add platform specific defines (e.g. OS_OPENBSD). |
50 | | |
51 | | #ifdef GLOG_BUILD_CONFIG_INCLUDE |
52 | | #include GLOG_BUILD_CONFIG_INCLUDE |
53 | | #endif // GLOG_BUILD_CONFIG_INCLUDE |
54 | | |
55 | | #include "utilities.h" |
56 | | |
57 | | #if defined(HAVE_SYMBOLIZE) |
58 | | |
59 | | #include <limits> |
60 | | |
61 | | #include "symbolize.h" |
62 | | #include "demangle.h" |
63 | | #include "butil/compiler_specific.h" |
64 | | |
65 | | _START_GOOGLE_NAMESPACE_ |
66 | | |
67 | | // We don't use assert() since it's not guaranteed to be |
68 | | // async-signal-safe. Instead we define a minimal assertion |
69 | | // macro. So far, we don't need pretty printing for __FILE__, etc. |
70 | | |
71 | | // A wrapper for abort() to make it callable in ? :. |
72 | 0 | static int AssertFail() { |
73 | 0 | abort(); |
74 | 0 | return 0; // Should not reach. |
75 | 0 | } |
76 | | |
77 | 2.14M | #define SAFE_ASSERT(expr) ((expr) ? 0 : AssertFail()) |
78 | | |
79 | | // NOTE(gejun): Mark as weak symbol to avoid conflict with same functions in |
80 | | // glog, same reason applies to other functions marked weak in this file. |
81 | | static SymbolizeCallback g_symbolize_callback = NULL; |
82 | 0 | void BAIDU_WEAK InstallSymbolizeCallback(SymbolizeCallback callback) { |
83 | 0 | g_symbolize_callback = callback; |
84 | 0 | } |
85 | | |
86 | | static SymbolizeOpenObjectFileCallback g_symbolize_open_object_file_callback = |
87 | | NULL; |
88 | | void BAIDU_WEAK InstallSymbolizeOpenObjectFileCallback( |
89 | 0 | SymbolizeOpenObjectFileCallback callback) { |
90 | 0 | g_symbolize_open_object_file_callback = callback; |
91 | 0 | } |
92 | | |
93 | | // This function wraps the Demangle function to provide an interface |
94 | | // where the input symbol is demangled in-place. |
95 | | // To keep stack consumption low, we would like this function to not |
96 | | // get inlined. |
97 | 2.89k | static ATTRIBUTE_NOINLINE void DemangleInplace(char *out, int out_size) { |
98 | 2.89k | char demangled[256]; // Big enough for sane demangled symbols. |
99 | 2.89k | if (Demangle(out, demangled, sizeof(demangled))) { |
100 | | // Demangling succeeded. Copy to out if the space allows. |
101 | 2.22k | size_t len = strlen(demangled); |
102 | 2.22k | if (len + 1 <= (size_t)out_size) { // +1 for '\0'. |
103 | 2.22k | SAFE_ASSERT(len < sizeof(demangled)); |
104 | 2.22k | memmove(out, demangled, len + 1); |
105 | 2.22k | } |
106 | 2.22k | } |
107 | 2.89k | } |
108 | | |
109 | | _END_GOOGLE_NAMESPACE_ |
110 | | |
111 | | #if defined(__ELF__) |
112 | | |
113 | | #include <dlfcn.h> |
114 | | #if defined(OS_OPENBSD) |
115 | | #include <sys/exec_elf.h> |
116 | | #else |
117 | | #include <elf.h> |
118 | | #endif |
119 | | #include <errno.h> |
120 | | #include <fcntl.h> |
121 | | #include <limits.h> |
122 | | #include <stdint.h> |
123 | | #include <stdio.h> |
124 | | #include <stdlib.h> |
125 | | #include <stddef.h> |
126 | | #include <string.h> |
127 | | #include <sys/stat.h> |
128 | | #include <sys/types.h> |
129 | | #include <unistd.h> |
130 | | |
131 | | #include "symbolize.h" |
132 | | #include "config.h" |
133 | | #include "glog/raw_logging.h" |
134 | | |
135 | | // Re-runs fn until it doesn't cause EINTR. |
136 | 440k | #define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR) |
137 | | |
138 | | _START_GOOGLE_NAMESPACE_ |
139 | | |
140 | | // Read up to "count" bytes from file descriptor "fd" into the buffer |
141 | | // starting at "buf" while handling short reads and EINTR. On |
142 | | // success, return the number of bytes read. Otherwise, return -1. |
143 | 428k | static ssize_t ReadPersistent(const int fd, void *buf, const size_t count) { |
144 | 428k | SAFE_ASSERT(fd >= 0); |
145 | 428k | SAFE_ASSERT(count <= (size_t)std::numeric_limits<ssize_t>::max()); |
146 | 428k | char *buf0 = reinterpret_cast<char *>(buf); |
147 | 428k | ssize_t num_bytes = 0; |
148 | 856k | while ((size_t)num_bytes < count) { |
149 | 428k | ssize_t len; |
150 | 428k | NO_INTR(len = read(fd, buf0 + num_bytes, count - num_bytes)); |
151 | 428k | if (len < 0) { // There was an error other than EINTR. |
152 | 0 | return -1; |
153 | 0 | } |
154 | 428k | if (len == 0) { // Reached EOF. |
155 | 0 | break; |
156 | 0 | } |
157 | 428k | num_bytes += len; |
158 | 428k | } |
159 | 428k | SAFE_ASSERT((size_t)num_bytes <= count); |
160 | 428k | return num_bytes; |
161 | 428k | } |
162 | | |
163 | | // Read up to "count" bytes from "offset" in the file pointed by file |
164 | | // descriptor "fd" into the buffer starting at "buf". On success, |
165 | | // return the number of bytes read. Otherwise, return -1. |
166 | | static ssize_t ReadFromOffset(const int fd, void *buf, |
167 | 425k | const size_t count, const off_t offset) { |
168 | 425k | off_t off = lseek(fd, offset, SEEK_SET); |
169 | 425k | if (off == (off_t)-1) { |
170 | 0 | return -1; |
171 | 0 | } |
172 | 425k | return ReadPersistent(fd, buf, count); |
173 | 425k | } |
174 | | |
175 | | // Try reading exactly "count" bytes from "offset" bytes in a file |
176 | | // pointed by "fd" into the buffer starting at "buf" while handling |
177 | | // short reads and EINTR. On success, return true. Otherwise, return |
178 | | // false. |
179 | | static bool ReadFromOffsetExact(const int fd, void *buf, |
180 | 8.96k | const size_t count, const off_t offset) { |
181 | 8.96k | ssize_t len = ReadFromOffset(fd, buf, count, offset); |
182 | 8.96k | return len == (ssize_t)count; |
183 | 8.96k | } |
184 | | |
185 | | // Returns elf_header.e_type if the file pointed by fd is an ELF binary. |
186 | 2.98k | static int FileGetElfType(const int fd) { |
187 | 2.98k | ElfW(Ehdr) elf_header; |
188 | 2.98k | if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { |
189 | 0 | return -1; |
190 | 0 | } |
191 | 2.98k | if (memcmp(elf_header.e_ident, ELFMAG, SELFMAG) != 0) { |
192 | 0 | return -1; |
193 | 0 | } |
194 | 2.98k | return elf_header.e_type; |
195 | 2.98k | } |
196 | | |
197 | | // Read the section headers in the given ELF binary, and if a section |
198 | | // of the specified type is found, set the output to this section header |
199 | | // and return true. Otherwise, return false. |
200 | | // To keep stack consumption low, we would like this function to not get |
201 | | // inlined. |
202 | | static ATTRIBUTE_NOINLINE bool |
203 | | GetSectionHeaderByType(const int fd, ElfW(Half) sh_num, const off_t sh_offset, |
204 | 3.08k | ElfW(Word) type, ElfW(Shdr) *out) { |
205 | | // Read at most 16 section headers at a time to save read calls. |
206 | 3.08k | ElfW(Shdr) buf[16]; |
207 | 9.33k | for (int i = 0; i < sh_num;) { |
208 | 9.24k | const ssize_t num_bytes_left = (sh_num - i) * sizeof(buf[0]); |
209 | 9.24k | const ssize_t num_bytes_to_read = |
210 | 9.24k | ((ssize_t)sizeof(buf) > num_bytes_left) ? num_bytes_left : sizeof(buf); |
211 | 9.24k | const ssize_t len = ReadFromOffset(fd, buf, num_bytes_to_read, |
212 | 9.24k | sh_offset + i * sizeof(buf[0])); |
213 | 9.24k | SAFE_ASSERT(len % sizeof(buf[0]) == 0); |
214 | 9.24k | const ssize_t num_headers_in_buf = len / sizeof(buf[0]); |
215 | 9.24k | SAFE_ASSERT((size_t)num_headers_in_buf <= sizeof(buf) / sizeof(buf[0])); |
216 | 152k | for (int j = 0; j < num_headers_in_buf; ++j) { |
217 | 145k | if (buf[j].sh_type == type) { |
218 | 2.98k | *out = buf[j]; |
219 | 2.98k | return true; |
220 | 2.98k | } |
221 | 145k | } |
222 | 6.25k | i += num_headers_in_buf; |
223 | 6.25k | } |
224 | 93 | return false; |
225 | 3.08k | } |
226 | | |
227 | | // There is no particular reason to limit section name to 63 characters, |
228 | | // but there has (as yet) been no need for anything longer either. |
229 | | const int kMaxSectionNameLen = 64; |
230 | | |
231 | | // name_len should include terminating '\0'. |
232 | | bool BAIDU_WEAK GetSectionHeaderByName(int fd, const char *name, size_t name_len, |
233 | 0 | ElfW(Shdr) *out) { |
234 | 0 | ElfW(Ehdr) elf_header; |
235 | 0 | if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { |
236 | 0 | return false; |
237 | 0 | } |
238 | | |
239 | 0 | ElfW(Shdr) shstrtab; |
240 | 0 | off_t shstrtab_offset = (elf_header.e_shoff + |
241 | 0 | elf_header.e_shentsize * elf_header.e_shstrndx); |
242 | 0 | if (!ReadFromOffsetExact(fd, &shstrtab, sizeof(shstrtab), shstrtab_offset)) { |
243 | 0 | return false; |
244 | 0 | } |
245 | | |
246 | 0 | for (int i = 0; i < elf_header.e_shnum; ++i) { |
247 | 0 | off_t section_header_offset = (elf_header.e_shoff + |
248 | 0 | elf_header.e_shentsize * i); |
249 | 0 | if (!ReadFromOffsetExact(fd, out, sizeof(*out), section_header_offset)) { |
250 | 0 | return false; |
251 | 0 | } |
252 | 0 | char header_name[kMaxSectionNameLen]; |
253 | 0 | if (sizeof(header_name) < name_len) { |
254 | 0 | RAW_LOG(WARNING, "Section name '%s' is too long (%" PRIuS "); " |
255 | 0 | "section will not be found (even if present).", name, name_len); |
256 | | // No point in even trying. |
257 | 0 | return false; |
258 | 0 | } |
259 | 0 | off_t name_offset = shstrtab.sh_offset + out->sh_name; |
260 | 0 | ssize_t n_read = ReadFromOffset(fd, &header_name, name_len, name_offset); |
261 | 0 | if (n_read == -1) { |
262 | 0 | return false; |
263 | 0 | } else if ((size_t)n_read != name_len) { |
264 | | // Short read -- name could be at end of file. |
265 | 0 | continue; |
266 | 0 | } |
267 | 0 | if (memcmp(header_name, name, name_len) == 0) { |
268 | 0 | return true; |
269 | 0 | } |
270 | 0 | } |
271 | 0 | return false; |
272 | 0 | } |
273 | | |
274 | | // Read a symbol table and look for the symbol containing the |
275 | | // pc. Iterate over symbols in a symbol table and look for the symbol |
276 | | // containing "pc". On success, return true and write the symbol name |
277 | | // to out. Otherwise, return false. |
278 | | // To keep stack consumption low, we would like this function to not get |
279 | | // inlined. |
280 | | static ATTRIBUTE_NOINLINE bool |
281 | | FindSymbol(uint64_t pc, const int fd, char *out, int out_size, |
282 | | uint64_t *out_saddr, uint64_t symbol_offset, |
283 | 2.98k | const ElfW(Shdr) *strtab, const ElfW(Shdr) *symtab) { |
284 | 2.98k | if (symtab == NULL) { |
285 | 0 | return false; |
286 | 0 | } |
287 | 2.98k | const int num_symbols = symtab->sh_size / symtab->sh_entsize; |
288 | 404k | for (int i = 0; i < num_symbols;) { |
289 | 404k | off_t offset = symtab->sh_offset + i * symtab->sh_entsize; |
290 | | |
291 | | // If we are reading Elf64_Sym's, we want to limit this array to |
292 | | // 32 elements (to keep stack consumption low), otherwise we can |
293 | | // have a 64 element Elf32_Sym array. |
294 | 404k | #if __WORDSIZE == 64 |
295 | 404k | #define NUM_SYMBOLS 32 |
296 | | #else |
297 | | #define NUM_SYMBOLS 64 |
298 | | #endif |
299 | | |
300 | | // Read at most NUM_SYMBOLS symbols at once to save read() calls. |
301 | 404k | ElfW(Sym) buf[NUM_SYMBOLS]; |
302 | 404k | const ssize_t len = ReadFromOffset(fd, &buf, sizeof(buf), offset); |
303 | 404k | SAFE_ASSERT(len % sizeof(buf[0]) == 0); |
304 | 404k | const ssize_t num_symbols_in_buf = len / sizeof(buf[0]); |
305 | 404k | SAFE_ASSERT((size_t)num_symbols_in_buf <= sizeof(buf)/sizeof(buf[0])); |
306 | 13.2M | for (int j = 0; j < num_symbols_in_buf; ++j) { |
307 | 12.8M | const ElfW(Sym)& symbol = buf[j]; |
308 | 12.8M | uint64_t start_address = symbol.st_value; |
309 | 12.8M | start_address += symbol_offset; |
310 | 12.8M | uint64_t end_address = start_address + symbol.st_size; |
311 | 12.8M | if (symbol.st_value != 0 && // Skip null value symbols. |
312 | 12.5M | symbol.st_shndx != 0 && // Skip undefined symbols. |
313 | 12.5M | start_address <= pc && pc < end_address) { |
314 | 2.89k | if (NULL != out) { |
315 | 2.89k | ssize_t len1 = ReadFromOffset( |
316 | 2.89k | fd, out, out_size, strtab->sh_offset + symbol.st_name); |
317 | 2.89k | if (len1 <= 0 || memchr(out, '\0', out_size) == NULL) { |
318 | 0 | return false; |
319 | 0 | } |
320 | 2.89k | } |
321 | 2.89k | if (NULL != out_saddr) { |
322 | 0 | *out_saddr = start_address; |
323 | 0 | } |
324 | 2.89k | return true; // Obtained the symbol name. |
325 | 2.89k | } |
326 | 12.8M | } |
327 | 401k | i += num_symbols_in_buf; |
328 | 401k | } |
329 | 95 | return false; |
330 | 2.98k | } |
331 | | |
332 | | // Get the symbol name of "pc" from the file pointed by "fd". Process |
333 | | // both regular and dynamic symbol tables if necessary. On success, |
334 | | // write the symbol name to "out" and return true. Otherwise, return |
335 | | // false. |
336 | | static bool GetSymbolFromObjectFile(const int fd, uint64_t pc, |
337 | | char *out, int out_size, |
338 | | uint64_t *out_saddr, |
339 | 2.98k | uint64_t map_start_address) { |
340 | | // Read the ELF header. |
341 | 2.98k | ElfW(Ehdr) elf_header; |
342 | 2.98k | if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { |
343 | 0 | return false; |
344 | 0 | } |
345 | | |
346 | 2.98k | uint64_t symbol_offset = 0; |
347 | 2.98k | if (elf_header.e_type == ET_DYN) { // DSO needs offset adjustment. |
348 | 2.98k | symbol_offset = map_start_address; |
349 | 2.98k | } |
350 | | |
351 | 2.98k | ElfW(Shdr) symtab, strtab; |
352 | | |
353 | | // Consult a regular symbol table first. |
354 | 2.98k | if (GetSectionHeaderByType(fd, elf_header.e_shnum, elf_header.e_shoff, |
355 | 2.98k | SHT_SYMTAB, &symtab)) { |
356 | 2.89k | if (!ReadFromOffsetExact(fd, &strtab, sizeof(strtab), elf_header.e_shoff + |
357 | 2.89k | symtab.sh_link * sizeof(symtab))) { |
358 | 0 | return false; |
359 | 0 | } |
360 | 2.89k | if (FindSymbol(pc, fd, out, out_size, out_saddr, |
361 | 2.89k | symbol_offset, &strtab, &symtab)) { |
362 | 2.89k | return true; // Found the symbol in a regular symbol table. |
363 | 2.89k | } |
364 | 2.89k | } |
365 | | |
366 | | // If the symbol is not found, then consult a dynamic symbol table. |
367 | 94 | if (GetSectionHeaderByType(fd, elf_header.e_shnum, elf_header.e_shoff, |
368 | 94 | SHT_DYNSYM, &symtab)) { |
369 | 94 | if (!ReadFromOffsetExact(fd, &strtab, sizeof(strtab), elf_header.e_shoff + |
370 | 94 | symtab.sh_link * sizeof(symtab))) { |
371 | 0 | return false; |
372 | 0 | } |
373 | 94 | if (FindSymbol(pc, fd, out, out_size, out_saddr, |
374 | 94 | symbol_offset, &strtab, &symtab)) { |
375 | 0 | return true; // Found the symbol in a dynamic symbol table. |
376 | 0 | } |
377 | 94 | } |
378 | | |
379 | 94 | return false; |
380 | 94 | } |
381 | | |
382 | | namespace { |
383 | | // Thin wrapper around a file descriptor so that the file descriptor |
384 | | // gets closed for sure. |
385 | | struct FileDescriptor { |
386 | | const int fd_; |
387 | 5.97k | explicit FileDescriptor(int fd) : fd_(fd) {} |
388 | 5.97k | ~FileDescriptor() { |
389 | 5.97k | if (fd_ >= 0) { |
390 | 5.97k | NO_INTR(close(fd_)); |
391 | 5.97k | } |
392 | 5.97k | } |
393 | 11.9k | int get() { return fd_; } |
394 | | |
395 | | private: |
396 | | explicit FileDescriptor(const FileDescriptor&); |
397 | | void operator=(const FileDescriptor&); |
398 | | }; |
399 | | |
400 | | // Helper class for reading lines from file. |
401 | | // |
402 | | // Note: we don't use ProcMapsIterator since the object is big (it has |
403 | | // a 5k array member) and uses async-unsafe functions such as sscanf() |
404 | | // and snprintf(). |
405 | | class LineReader { |
406 | | public: |
407 | 2.98k | explicit LineReader(int fd, char *buf, int buf_len) : fd_(fd), |
408 | 2.98k | buf_(buf), buf_len_(buf_len), bol_(buf), eol_(buf), eod_(buf) { |
409 | 2.98k | } |
410 | | |
411 | | // Read '\n'-terminated line from file. On success, modify "bol" |
412 | | // and "eol", then return true. Otherwise, return false. |
413 | | // |
414 | | // Note: if the last line doesn't end with '\n', the line will be |
415 | | // dropped. It's an intentional behavior to make the code simple. |
416 | 7.27k | bool ReadLine(const char **bol, const char **eol) { |
417 | 7.27k | if (BufferIsEmpty()) { // First time. |
418 | 2.98k | const ssize_t num_bytes = ReadPersistent(fd_, buf_, buf_len_); |
419 | 2.98k | if (num_bytes <= 0) { // EOF or error. |
420 | 0 | return false; |
421 | 0 | } |
422 | 2.98k | eod_ = buf_ + num_bytes; |
423 | 2.98k | bol_ = buf_; |
424 | 4.29k | } else { |
425 | 4.29k | bol_ = eol_ + 1; // Advance to the next line in the buffer. |
426 | 4.29k | SAFE_ASSERT(bol_ <= eod_); // "bol_" can point to "eod_". |
427 | 4.29k | if (!HasCompleteLine()) { |
428 | 93 | const int incomplete_line_length = eod_ - bol_; |
429 | | // Move the trailing incomplete line to the beginning. |
430 | 93 | memmove(buf_, bol_, incomplete_line_length); |
431 | | // Read text from file and append it. |
432 | 93 | char * const append_pos = buf_ + incomplete_line_length; |
433 | 93 | const int capacity_left = buf_len_ - incomplete_line_length; |
434 | 93 | const ssize_t num_bytes = ReadPersistent(fd_, append_pos, |
435 | 93 | capacity_left); |
436 | 93 | if (num_bytes <= 0) { // EOF or error. |
437 | 0 | return false; |
438 | 0 | } |
439 | 93 | eod_ = append_pos + num_bytes; |
440 | 93 | bol_ = buf_; |
441 | 93 | } |
442 | 4.29k | } |
443 | 7.27k | eol_ = FindLineFeed(); |
444 | 7.27k | if (eol_ == NULL) { // '\n' not found. Malformed line. |
445 | 0 | return false; |
446 | 0 | } |
447 | 7.27k | *eol_ = '\0'; // Replace '\n' with '\0'. |
448 | | |
449 | 7.27k | *bol = bol_; |
450 | 7.27k | *eol = eol_; |
451 | 7.27k | return true; |
452 | 7.27k | } |
453 | | |
454 | | // Beginning of line. |
455 | 0 | const char *bol() { |
456 | 0 | return bol_; |
457 | 0 | } |
458 | | |
459 | | // End of line. |
460 | 0 | const char *eol() { |
461 | 0 | return eol_; |
462 | 0 | } |
463 | | |
464 | | private: |
465 | | explicit LineReader(const LineReader&); |
466 | | void operator=(const LineReader&); |
467 | | |
468 | 11.5k | char *FindLineFeed() { |
469 | 11.5k | return reinterpret_cast<char *>(memchr(bol_, '\n', eod_ - bol_)); |
470 | 11.5k | } |
471 | | |
472 | 11.5k | bool BufferIsEmpty() { |
473 | 11.5k | return buf_ == eod_; |
474 | 11.5k | } |
475 | | |
476 | 4.29k | bool HasCompleteLine() { |
477 | 4.29k | return !BufferIsEmpty() && FindLineFeed() != NULL; |
478 | 4.29k | } |
479 | | |
480 | | const int fd_; |
481 | | char * const buf_; |
482 | | const int buf_len_; |
483 | | char *bol_; |
484 | | char *eol_; |
485 | | const char *eod_; // End of data in "buf_". |
486 | | }; |
487 | | } // namespace |
488 | | |
489 | | // Place the hex number read from "start" into "*hex". The pointer to |
490 | | // the first non-hex character or "end" is returned. |
491 | 17.5k | static char *GetHex(const char *start, const char *end, uint64_t *hex) { |
492 | 17.5k | *hex = 0; |
493 | 17.5k | const char *p; |
494 | 216k | for (p = start; p < end; ++p) { |
495 | 216k | int ch = *p; |
496 | 216k | if ((ch >= '0' && ch <= '9') || |
497 | 198k | (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f')) { |
498 | 198k | *hex = (*hex << 4) | (ch < 'A' ? ch - '0' : (ch & 0xF) + 9); |
499 | 198k | } else { // Encountered the first non-hex character. |
500 | 17.5k | break; |
501 | 17.5k | } |
502 | 216k | } |
503 | 17.5k | SAFE_ASSERT(p <= end); |
504 | 17.5k | return const_cast<char *>(p); |
505 | 17.5k | } |
506 | | |
507 | | // Searches for the object file (from /proc/self/maps) that contains |
508 | | // the specified pc. If found, sets |start_address| to the start address |
509 | | // of where this object file is mapped in memory, sets the module base |
510 | | // address into |base_address|, copies the object file name into |
511 | | // |out_file_name|, and attempts to open the object file. If the object |
512 | | // file is opened successfully, returns the file descriptor. Otherwise, |
513 | | // returns -1. |out_file_name_size| is the size of the file name buffer |
514 | | // (including the null-terminator). |
515 | | static ATTRIBUTE_NOINLINE int |
516 | | OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc, |
517 | | uint64_t &start_address, |
518 | | uint64_t &base_address, |
519 | | char *out_file_name, |
520 | 2.98k | int out_file_name_size) { |
521 | 2.98k | int object_fd; |
522 | | |
523 | | // Open /proc/self/maps. |
524 | 2.98k | int maps_fd; |
525 | 2.98k | NO_INTR(maps_fd = open("/proc/self/maps", O_RDONLY)); |
526 | 2.98k | FileDescriptor wrapped_maps_fd(maps_fd); |
527 | 2.98k | if (wrapped_maps_fd.get() < 0) { |
528 | 0 | return -1; |
529 | 0 | } |
530 | | |
531 | | // Iterate over maps and look for the map containing the pc. Then |
532 | | // look into the symbol tables inside. |
533 | 2.98k | char buf[1024]; // Big enough for line of sane /proc/self/maps |
534 | 2.98k | int num_maps = 0; |
535 | 2.98k | LineReader reader(wrapped_maps_fd.get(), buf, sizeof(buf)); |
536 | 7.27k | while (true) { |
537 | 7.27k | num_maps++; |
538 | 7.27k | const char *cursor; |
539 | 7.27k | const char *eol; |
540 | 7.27k | if (!reader.ReadLine(&cursor, &eol)) { // EOF or malformed line. |
541 | 0 | return -1; |
542 | 0 | } |
543 | | |
544 | | // Start parsing line in /proc/self/maps. Here is an example: |
545 | | // |
546 | | // 08048000-0804c000 r-xp 00000000 08:01 2142121 /bin/cat |
547 | | // |
548 | | // We want start address (08048000), end address (0804c000), flags |
549 | | // (r-xp) and file name (/bin/cat). |
550 | | |
551 | | // Read start address. |
552 | 7.27k | cursor = GetHex(cursor, eol, &start_address); |
553 | 7.27k | if (cursor == eol || *cursor != '-') { |
554 | 0 | return -1; // Malformed line. |
555 | 0 | } |
556 | 7.27k | ++cursor; // Skip '-'. |
557 | | |
558 | | // Read end address. |
559 | 7.27k | uint64_t end_address; |
560 | 7.27k | cursor = GetHex(cursor, eol, &end_address); |
561 | 7.27k | if (cursor == eol || *cursor != ' ') { |
562 | 0 | return -1; // Malformed line. |
563 | 0 | } |
564 | 7.27k | ++cursor; // Skip ' '. |
565 | | |
566 | | // Check start and end addresses. |
567 | 7.27k | if (!(start_address <= pc && pc < end_address)) { |
568 | 4.29k | continue; // We skip this map. PC isn't in this map. |
569 | 4.29k | } |
570 | | |
571 | | // Read flags. Skip flags until we encounter a space or eol. |
572 | 2.98k | const char * const flags_start = cursor; |
573 | 14.9k | while (cursor < eol && *cursor != ' ') { |
574 | 11.9k | ++cursor; |
575 | 11.9k | } |
576 | | // We expect at least four letters for flags (ex. "r-xp"). |
577 | 2.98k | if (cursor == eol || cursor < flags_start + 4) { |
578 | 0 | return -1; // Malformed line. |
579 | 0 | } |
580 | | |
581 | | // Check flags. We are only interested in "r-x" maps. |
582 | 2.98k | if (memcmp(flags_start, "r-x", 3) != 0) { // Not a "r-x" map. |
583 | 0 | continue; // We skip this map. |
584 | 0 | } |
585 | 2.98k | ++cursor; // Skip ' '. |
586 | | |
587 | | // Read file offset. |
588 | 2.98k | uint64_t file_offset; |
589 | 2.98k | cursor = GetHex(cursor, eol, &file_offset); |
590 | 2.98k | if (cursor == eol || *cursor != ' ') { |
591 | 0 | return -1; // Malformed line. |
592 | 0 | } |
593 | 2.98k | ++cursor; // Skip ' '. |
594 | | |
595 | | // Don't subtract 'start_address' from the first entry: |
596 | | // * If a binary is compiled w/o -pie, then the first entry in |
597 | | // process maps is likely the binary itself (all dynamic libs |
598 | | // are mapped higher in address space). For such a binary, |
599 | | // instruction offset in binary coincides with the actual |
600 | | // instruction address in virtual memory (as code section |
601 | | // is mapped to a fixed memory range). |
602 | | // * If a binary is compiled with -pie, all the modules are |
603 | | // mapped high at address space (in particular, higher than |
604 | | // shadow memory of the tool), so the module can't be the |
605 | | // first entry. |
606 | 2.98k | base_address = ((num_maps == 1) ? 0U : start_address) - file_offset; |
607 | | |
608 | | // Skip to file name. "cursor" now points to dev. We need to |
609 | | // skip at least two spaces for dev and inode. |
610 | 2.98k | int num_spaces = 0; |
611 | 101k | while (cursor < eol) { |
612 | 101k | if (*cursor == ' ') { |
613 | 62.7k | ++num_spaces; |
614 | 62.7k | } else if (num_spaces >= 2) { |
615 | | // The first non-space character after skipping two spaces |
616 | | // is the beginning of the file name. |
617 | 2.98k | break; |
618 | 2.98k | } |
619 | 98.6k | ++cursor; |
620 | 98.6k | } |
621 | 2.98k | if (cursor == eol) { |
622 | 0 | return -1; // Malformed line. |
623 | 0 | } |
624 | | |
625 | | // Finally, "cursor" now points to file name of our interest. |
626 | 2.98k | NO_INTR(object_fd = open(cursor, O_RDONLY)); |
627 | 2.98k | if (object_fd < 0) { |
628 | | // Failed to open object file. Copy the object file name to |
629 | | // |out_file_name|. |
630 | 0 | strncpy(out_file_name, cursor, out_file_name_size); |
631 | | // Making sure |out_file_name| is always null-terminated. |
632 | 0 | out_file_name[out_file_name_size - 1] = '\0'; |
633 | 0 | return -1; |
634 | 0 | } |
635 | 2.98k | return object_fd; |
636 | 2.98k | } |
637 | 2.98k | } |
638 | | |
639 | | // POSIX doesn't define any async-signal safe function for converting |
640 | | // an integer to ASCII. We'll have to define our own version. |
641 | | // itoa_r() converts a (signed) integer to ASCII. It returns "buf", if the |
642 | | // conversion was successful or NULL otherwise. It never writes more than "sz" |
643 | | // bytes. Output will be truncated as needed, and a NUL character is always |
644 | | // appended. |
645 | | // NOTE: code from sandbox/linux/seccomp-bpf/demo.cc. |
646 | 0 | char *itoa_r(intptr_t i, char *buf, size_t sz, int base, size_t padding) { |
647 | | // Make sure we can write at least one NUL byte. |
648 | 0 | size_t n = 1; |
649 | 0 | if (n > sz) |
650 | 0 | return NULL; |
651 | | |
652 | 0 | if (base < 2 || base > 16) { |
653 | 0 | buf[0] = '\000'; |
654 | 0 | return NULL; |
655 | 0 | } |
656 | | |
657 | 0 | char *start = buf; |
658 | |
|
659 | 0 | uintptr_t j = i; |
660 | | |
661 | | // Handle negative numbers (only for base 10). |
662 | 0 | if (i < 0 && base == 10) { |
663 | 0 | j = -i; |
664 | | |
665 | | // Make sure we can write the '-' character. |
666 | 0 | if (++n > sz) { |
667 | 0 | buf[0] = '\000'; |
668 | 0 | return NULL; |
669 | 0 | } |
670 | 0 | *start++ = '-'; |
671 | 0 | } |
672 | | |
673 | | // Loop until we have converted the entire number. Output at least one |
674 | | // character (i.e. '0'). |
675 | 0 | char *ptr = start; |
676 | 0 | do { |
677 | | // Make sure there is still enough space left in our output buffer. |
678 | 0 | if (++n > sz) { |
679 | 0 | buf[0] = '\000'; |
680 | 0 | return NULL; |
681 | 0 | } |
682 | | |
683 | | // Output the next digit. |
684 | 0 | *ptr++ = "0123456789abcdef"[j % base]; |
685 | 0 | j /= base; |
686 | |
|
687 | 0 | if (padding > 0) |
688 | 0 | padding--; |
689 | 0 | } while (j > 0 || padding > 0); |
690 | | |
691 | | // Terminate the output with a NUL character. |
692 | 0 | *ptr = '\000'; |
693 | | |
694 | | // Conversion to ASCII actually resulted in the digits being in reverse |
695 | | // order. We can't easily generate them in forward order, as we can't tell |
696 | | // the number of characters needed until we are done converting. |
697 | | // So, now, we reverse the string (except for the possible "-" sign). |
698 | 0 | while (--ptr > start) { |
699 | 0 | char ch = *ptr; |
700 | 0 | *ptr = *start; |
701 | 0 | *start++ = ch; |
702 | 0 | } |
703 | 0 | return buf; |
704 | 0 | } |
705 | | |
706 | | // Safely appends string |source| to string |dest|. Never writes past the |
707 | | // buffer size |dest_size| and guarantees that |dest| is null-terminated. |
708 | 2.98k | void SafeAppendString(const char* source, char* dest, int dest_size) { |
709 | 2.98k | int dest_string_length = strlen(dest); |
710 | 2.98k | SAFE_ASSERT(dest_string_length < dest_size); |
711 | 2.98k | dest += dest_string_length; |
712 | 2.98k | dest_size -= dest_string_length; |
713 | 2.98k | strncpy(dest, source, dest_size); |
714 | | // Making sure |dest| is always null-terminated. |
715 | 2.98k | dest[dest_size - 1] = '\0'; |
716 | 2.98k | } |
717 | | |
718 | | // Converts a 64-bit value into a hex string, and safely appends it to |dest|. |
719 | | // Never writes past the buffer size |dest_size| and guarantees that |dest| is |
720 | | // null-terminated. |
721 | 0 | void SafeAppendHexNumber(uint64_t value, char* dest, int dest_size) { |
722 | | // 64-bit numbers in hex can have up to 16 digits. |
723 | 0 | char buf[17] = {'\0'}; |
724 | 0 | SafeAppendString(itoa_r(value, buf, sizeof(buf), 16, 0), dest, dest_size); |
725 | 0 | } |
726 | | |
727 | | // The implementation of our symbolization routine. If it |
728 | | // successfully finds the symbol containing "pc" and obtains the |
729 | | // symbol name, returns true and write the symbol name to "out". |
730 | | // Otherwise, returns false. If Callback function is installed via |
731 | | // InstallSymbolizeCallback(), the function is also called in this function, |
732 | | // and "out" is used as its output. |
733 | | // To keep stack consumption low, we would like this function to not |
734 | | // get inlined. |
735 | | static ATTRIBUTE_NOINLINE bool SymbolizeAndDemangle(void *pc, char *out, |
736 | | int out_size, |
737 | 2.98k | uint64_t *out_saddr) { |
738 | 2.98k | uint64_t pc0 = reinterpret_cast<uintptr_t>(pc); |
739 | 2.98k | uint64_t start_address = 0; |
740 | 2.98k | uint64_t base_address = 0; |
741 | 2.98k | int object_fd = -1; |
742 | | |
743 | 2.98k | if ((NULL == out || out_size < 1) && |
744 | 2.98k | NULL == out_saddr) { |
745 | 0 | return false; |
746 | 0 | } |
747 | 2.98k | if (NULL != out) { |
748 | 2.98k | out[0] = '\0'; |
749 | 2.98k | SafeAppendString("(", out, out_size); |
750 | 2.98k | } |
751 | | |
752 | 2.98k | if (g_symbolize_open_object_file_callback) { |
753 | 0 | object_fd = g_symbolize_open_object_file_callback(pc0, start_address, |
754 | 0 | base_address, out + 1, |
755 | 0 | out_size - 1); |
756 | 2.98k | } else { |
757 | 2.98k | object_fd = OpenObjectFileContainingPcAndGetStartAddress(pc0, start_address, |
758 | 2.98k | base_address, |
759 | 2.98k | out + 1, |
760 | 2.98k | out_size - 1); |
761 | 2.98k | } |
762 | | |
763 | | // Check whether a file name was returned. |
764 | 2.98k | if (object_fd < 0) { |
765 | 0 | if (NULL != out && out[1] && NULL == out_saddr) { |
766 | | // The object file containing PC was determined successfully however the |
767 | | // object file was not opened successfully. This is still considered |
768 | | // success because the object file name and offset are known and tools |
769 | | // like asan_symbolize.py can be used for the symbolization. |
770 | 0 | out[out_size - 1] = '\0'; // Making sure |out| is always null-terminated. |
771 | 0 | SafeAppendString("+0x", out, out_size); |
772 | 0 | SafeAppendHexNumber(pc0 - base_address, out, out_size); |
773 | 0 | SafeAppendString(")", out, out_size); |
774 | 0 | return true; |
775 | 0 | } |
776 | | // Failed to determine the object file containing PC. Bail out. |
777 | 0 | return false; |
778 | 0 | } |
779 | 2.98k | FileDescriptor wrapped_object_fd(object_fd); |
780 | 2.98k | int elf_type = FileGetElfType(wrapped_object_fd.get()); |
781 | 2.98k | if (elf_type == -1) { |
782 | 0 | return false; |
783 | 0 | } |
784 | 2.98k | if (g_symbolize_callback) { |
785 | | // Run the call back if it's installed. |
786 | | // Note: relocation (and much of the rest of this code) will be |
787 | | // wrong for prelinked shared libraries and PIE executables. |
788 | 0 | uint64_t relocation = (elf_type == ET_DYN) ? start_address : 0; |
789 | 0 | int num_bytes_written = g_symbolize_callback(wrapped_object_fd.get(), |
790 | 0 | pc, out, out_size, |
791 | 0 | relocation); |
792 | 0 | if (num_bytes_written > 0) { |
793 | 0 | out += num_bytes_written; |
794 | 0 | out_size -= num_bytes_written; |
795 | 0 | } |
796 | 0 | } |
797 | 2.98k | if (!GetSymbolFromObjectFile(wrapped_object_fd.get(), pc0, |
798 | 2.98k | out, out_size, out_saddr, |
799 | 2.98k | start_address)) { |
800 | 94 | return false; |
801 | 94 | } |
802 | | |
803 | 2.89k | if (NULL != out) { |
804 | | // Symbolization succeeded. Now we try to demangle the symbol. |
805 | 2.89k | DemangleInplace(out, out_size); |
806 | 2.89k | } |
807 | 2.89k | return true; |
808 | 2.98k | } |
809 | | |
810 | | _END_GOOGLE_NAMESPACE_ |
811 | | |
812 | | #elif defined(OS_MACOSX) && defined(HAVE_DLADDR) |
813 | | |
814 | | #include <dlfcn.h> |
815 | | #include <string.h> |
816 | | |
817 | | _START_GOOGLE_NAMESPACE_ |
818 | | |
819 | | static ATTRIBUTE_NOINLINE bool SymbolizeAndDemangle(void *pc, char *out, |
820 | | int out_size, |
821 | | uint64_t *out_saddr) { |
822 | | Dl_info info{}; |
823 | | if (0 == dladdr(pc, &info)) { |
824 | | return false; |
825 | | } |
826 | | if (NULL != out) { |
827 | | if ((int)strlen(info.dli_sname) >= out_size) { |
828 | | return false; |
829 | | } |
830 | | strcpy(out, info.dli_sname); |
831 | | // Symbolization succeeded. Now we try to demangle the symbol. |
832 | | DemangleInplace(out, out_size); |
833 | | } |
834 | | if (NULL != out_saddr) { |
835 | | *out_saddr = (uint64_t)info.dli_saddr; |
836 | | } |
837 | | return true; |
838 | | } |
839 | | |
840 | | _END_GOOGLE_NAMESPACE_ |
841 | | |
842 | | #else |
843 | | # error BUG: HAVE_SYMBOLIZE was wrongly set |
844 | | #endif |
845 | | |
846 | | _START_GOOGLE_NAMESPACE_ |
847 | | |
848 | 2.98k | bool BAIDU_WEAK Symbolize(void *pc, char *out, int out_size) { |
849 | 2.98k | SAFE_ASSERT(out_size >= 0); |
850 | 2.98k | return SymbolizeAndDemangle(pc, out, out_size, NULL); |
851 | 2.98k | } |
852 | | |
853 | 0 | bool BAIDU_WEAK SymbolizeAddress(void *pc, uint64_t *out) { |
854 | 0 | SAFE_ASSERT(NULL != out); |
855 | | return SymbolizeAndDemangle(pc, NULL, 0, out); |
856 | 0 | } |
857 | | |
858 | | _END_GOOGLE_NAMESPACE_ |
859 | | |
860 | | #else /* HAVE_SYMBOLIZE */ |
861 | | |
862 | | #include <assert.h> |
863 | | |
864 | | #include "config.h" |
865 | | |
866 | | _START_GOOGLE_NAMESPACE_ |
867 | | |
868 | | // TODO: Support other environments. |
869 | | bool BAIDU_WEAK Symbolize(void *pc, char *out, int out_size) { |
870 | | assert(0); |
871 | | return false; |
872 | | } |
873 | | |
874 | | bool BAIDU_WEAK SymbolizeAddress(void *pc, uint64_t *out) { |
875 | | assert(0); |
876 | | return false; |
877 | | } |
878 | | |
879 | | _END_GOOGLE_NAMESPACE_ |
880 | | |
881 | | #endif |