/src/abseil-cpp/absl/debugging/internal/elf_mem_image.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2017 The Abseil Authors. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | // Allow dynamic symbol lookup in an in-memory Elf image. |
16 | | // |
17 | | |
18 | | #include "absl/debugging/internal/elf_mem_image.h" |
19 | | |
20 | | #ifdef ABSL_HAVE_ELF_MEM_IMAGE // defined in elf_mem_image.h |
21 | | |
22 | | #include <string.h> |
23 | | |
24 | | #include <cassert> |
25 | | #include <cstddef> |
26 | | #include <cstdint> |
27 | | |
28 | | #include "absl/base/config.h" |
29 | | #include "absl/base/internal/raw_logging.h" |
30 | | |
31 | | // From binutils/include/elf/common.h (this doesn't appear to be documented |
32 | | // anywhere else). |
33 | | // |
34 | | // /* This flag appears in a Versym structure. It means that the symbol |
35 | | // is hidden, and is only visible with an explicit version number. |
36 | | // This is a GNU extension. */ |
37 | | // #define VERSYM_HIDDEN 0x8000 |
38 | | // |
39 | | // /* This is the mask for the rest of the Versym information. */ |
40 | | // #define VERSYM_VERSION 0x7fff |
41 | | |
42 | 0 | #define VERSYM_VERSION 0x7fff |
43 | | |
44 | | namespace absl { |
45 | | ABSL_NAMESPACE_BEGIN |
46 | | namespace debugging_internal { |
47 | | |
48 | | namespace { |
49 | | |
50 | | #if __SIZEOF_POINTER__ == 4 |
51 | | const int kElfClass = ELFCLASS32; |
52 | | int ElfBind(const ElfW(Sym) *symbol) { return ELF32_ST_BIND(symbol->st_info); } |
53 | | int ElfType(const ElfW(Sym) *symbol) { return ELF32_ST_TYPE(symbol->st_info); } |
54 | | #elif __SIZEOF_POINTER__ == 8 |
55 | | const int kElfClass = ELFCLASS64; |
56 | 0 | int ElfBind(const ElfW(Sym) *symbol) { return ELF64_ST_BIND(symbol->st_info); } |
57 | 0 | int ElfType(const ElfW(Sym) *symbol) { return ELF64_ST_TYPE(symbol->st_info); } |
58 | | #else |
59 | | const int kElfClass = -1; |
60 | | int ElfBind(const ElfW(Sym) *) { |
61 | | ABSL_RAW_LOG(FATAL, "Unexpected word size"); |
62 | | return 0; |
63 | | } |
64 | | int ElfType(const ElfW(Sym) *) { |
65 | | ABSL_RAW_LOG(FATAL, "Unexpected word size"); |
66 | | return 0; |
67 | | } |
68 | | #endif |
69 | | |
70 | | // Extract an element from one of the ELF tables, cast it to desired type. |
71 | | // This is just a simple arithmetic and a glorified cast. |
72 | | // Callers are responsible for bounds checking. |
73 | | template <typename T> |
74 | | const T *GetTableElement(const ElfW(Ehdr) * ehdr, ElfW(Off) table_offset, |
75 | 0 | ElfW(Word) element_size, size_t index) { |
76 | 0 | return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr) |
77 | 0 | + table_offset |
78 | 0 | + index * element_size); |
79 | 0 | } Unexecuted instantiation: elf_mem_image.cc:Elf64_Phdr const* absl::debugging_internal::(anonymous namespace)::GetTableElement<Elf64_Phdr>(Elf64_Ehdr const*, unsigned long, unsigned int, unsigned long) Unexecuted instantiation: elf_mem_image.cc:char const* absl::debugging_internal::(anonymous namespace)::GetTableElement<char>(Elf64_Ehdr const*, unsigned long, unsigned int, unsigned long) |
80 | | |
81 | | } // namespace |
82 | | |
83 | | // The value of this variable doesn't matter; it's used only for its |
84 | | // unique address. |
85 | | const int ElfMemImage::kInvalidBaseSentinel = 0; |
86 | | |
87 | 0 | ElfMemImage::ElfMemImage(const void *base) { |
88 | 0 | ABSL_RAW_CHECK(base != kInvalidBase, "bad pointer"); |
89 | 0 | Init(base); |
90 | 0 | } |
91 | | |
92 | 0 | uint32_t ElfMemImage::GetNumSymbols() const { return num_syms_; } |
93 | | |
94 | 0 | const ElfW(Sym) * ElfMemImage::GetDynsym(uint32_t index) const { |
95 | 0 | ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range"); |
96 | 0 | return dynsym_ + index; |
97 | 0 | } |
98 | | |
99 | 0 | const ElfW(Versym) *ElfMemImage::GetVersym(uint32_t index) const { |
100 | 0 | ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range"); |
101 | 0 | return versym_ + index; |
102 | 0 | } |
103 | | |
104 | 0 | const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const { |
105 | 0 | ABSL_RAW_CHECK(index >= 0 && index < ehdr_->e_phnum, "index out of range"); |
106 | 0 | return GetTableElement<ElfW(Phdr)>(ehdr_, ehdr_->e_phoff, ehdr_->e_phentsize, |
107 | 0 | static_cast<size_t>(index)); |
108 | 0 | } |
109 | | |
110 | 0 | const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const { |
111 | 0 | ABSL_RAW_CHECK(offset < strsize_, "offset out of range"); |
112 | 0 | return dynstr_ + offset; |
113 | 0 | } |
114 | | |
115 | 0 | const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const { |
116 | 0 | if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) { |
117 | | // Symbol corresponds to "special" (e.g. SHN_ABS) section. |
118 | 0 | return reinterpret_cast<const void *>(sym->st_value); |
119 | 0 | } |
120 | 0 | ABSL_RAW_CHECK(link_base_ < sym->st_value, "symbol out of range"); |
121 | 0 | return GetTableElement<char>(ehdr_, 0, 1, sym->st_value - link_base_); |
122 | 0 | } |
123 | | |
124 | 0 | const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const { |
125 | 0 | ABSL_RAW_CHECK(0 <= index && static_cast<size_t>(index) <= verdefnum_, |
126 | 0 | "index out of range"); |
127 | 0 | const ElfW(Verdef) *version_definition = verdef_; |
128 | 0 | while (version_definition->vd_ndx < index && version_definition->vd_next) { |
129 | 0 | const char *const version_definition_as_char = |
130 | 0 | reinterpret_cast<const char *>(version_definition); |
131 | 0 | version_definition = |
132 | 0 | reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char + |
133 | 0 | version_definition->vd_next); |
134 | 0 | } |
135 | 0 | return version_definition->vd_ndx == index ? version_definition : nullptr; |
136 | 0 | } |
137 | | |
138 | | const ElfW(Verdaux) *ElfMemImage::GetVerdefAux( |
139 | 0 | const ElfW(Verdef) *verdef) const { |
140 | 0 | return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1); |
141 | 0 | } |
142 | | |
143 | 0 | const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const { |
144 | 0 | ABSL_RAW_CHECK(offset < strsize_, "offset out of range"); |
145 | 0 | return dynstr_ + offset; |
146 | 0 | } |
147 | | |
148 | 0 | void ElfMemImage::Init(const void *base) { |
149 | 0 | ehdr_ = nullptr; |
150 | 0 | dynsym_ = nullptr; |
151 | 0 | dynstr_ = nullptr; |
152 | 0 | versym_ = nullptr; |
153 | 0 | verdef_ = nullptr; |
154 | 0 | num_syms_ = 0; |
155 | 0 | strsize_ = 0; |
156 | 0 | verdefnum_ = 0; |
157 | | // Sentinel: PT_LOAD .p_vaddr can't possibly be this. |
158 | 0 | link_base_ = ~ElfW(Addr){0}; // NOLINT(readability/braces) |
159 | 0 | if (!base) { |
160 | 0 | return; |
161 | 0 | } |
162 | 0 | const char *const base_as_char = reinterpret_cast<const char *>(base); |
163 | 0 | if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 || |
164 | 0 | base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) { |
165 | 0 | assert(false); |
166 | 0 | return; |
167 | 0 | } |
168 | 0 | int elf_class = base_as_char[EI_CLASS]; |
169 | 0 | if (elf_class != kElfClass) { |
170 | 0 | assert(false); |
171 | 0 | return; |
172 | 0 | } |
173 | 0 | switch (base_as_char[EI_DATA]) { |
174 | 0 | case ELFDATA2LSB: { |
175 | | #ifndef ABSL_IS_LITTLE_ENDIAN |
176 | | assert(false); |
177 | | return; |
178 | | #endif |
179 | 0 | break; |
180 | 0 | } |
181 | 0 | case ELFDATA2MSB: { |
182 | 0 | #ifndef ABSL_IS_BIG_ENDIAN |
183 | 0 | assert(false); |
184 | 0 | return; |
185 | 0 | #endif |
186 | 0 | break; |
187 | 0 | } |
188 | 0 | default: { |
189 | 0 | assert(false); |
190 | 0 | return; |
191 | 0 | } |
192 | 0 | } |
193 | | |
194 | 0 | ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base); |
195 | 0 | const ElfW(Phdr) *dynamic_program_header = nullptr; |
196 | 0 | for (int i = 0; i < ehdr_->e_phnum; ++i) { |
197 | 0 | const ElfW(Phdr) *const program_header = GetPhdr(i); |
198 | 0 | switch (program_header->p_type) { |
199 | 0 | case PT_LOAD: |
200 | 0 | if (!~link_base_) { |
201 | 0 | link_base_ = program_header->p_vaddr; |
202 | 0 | } |
203 | 0 | break; |
204 | 0 | case PT_DYNAMIC: |
205 | 0 | dynamic_program_header = program_header; |
206 | 0 | break; |
207 | 0 | } |
208 | 0 | } |
209 | 0 | if (!~link_base_ || !dynamic_program_header) { |
210 | 0 | assert(false); |
211 | | // Mark this image as not present. Can not recur infinitely. |
212 | 0 | Init(nullptr); |
213 | 0 | return; |
214 | 0 | } |
215 | 0 | ptrdiff_t relocation = |
216 | 0 | base_as_char - reinterpret_cast<const char *>(link_base_); |
217 | 0 | ElfW(Dyn)* dynamic_entry = reinterpret_cast<ElfW(Dyn)*>( |
218 | 0 | static_cast<intptr_t>(dynamic_program_header->p_vaddr) + relocation); |
219 | 0 | uint32_t *sysv_hash = nullptr; |
220 | 0 | uint32_t *gnu_hash = nullptr; |
221 | 0 | for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) { |
222 | 0 | const auto value = |
223 | 0 | static_cast<intptr_t>(dynamic_entry->d_un.d_val) + relocation; |
224 | 0 | switch (dynamic_entry->d_tag) { |
225 | 0 | case DT_HASH: |
226 | 0 | sysv_hash = reinterpret_cast<uint32_t *>(value); |
227 | 0 | break; |
228 | 0 | case DT_GNU_HASH: |
229 | 0 | gnu_hash = reinterpret_cast<uint32_t *>(value); |
230 | 0 | break; |
231 | 0 | case DT_SYMTAB: |
232 | 0 | dynsym_ = reinterpret_cast<ElfW(Sym) *>(value); |
233 | 0 | break; |
234 | 0 | case DT_STRTAB: |
235 | 0 | dynstr_ = reinterpret_cast<const char *>(value); |
236 | 0 | break; |
237 | 0 | case DT_VERSYM: |
238 | 0 | versym_ = reinterpret_cast<ElfW(Versym) *>(value); |
239 | 0 | break; |
240 | 0 | case DT_VERDEF: |
241 | 0 | verdef_ = reinterpret_cast<ElfW(Verdef) *>(value); |
242 | 0 | break; |
243 | 0 | case DT_VERDEFNUM: |
244 | 0 | verdefnum_ = static_cast<size_t>(dynamic_entry->d_un.d_val); |
245 | 0 | break; |
246 | 0 | case DT_STRSZ: |
247 | 0 | strsize_ = static_cast<size_t>(dynamic_entry->d_un.d_val); |
248 | 0 | break; |
249 | 0 | default: |
250 | | // Unrecognized entries explicitly ignored. |
251 | 0 | break; |
252 | 0 | } |
253 | 0 | } |
254 | 0 | if ((!sysv_hash && !gnu_hash) || !dynsym_ || !dynstr_ || !versym_ || |
255 | 0 | !verdef_ || !verdefnum_ || !strsize_) { |
256 | 0 | assert(false); // invalid VDSO |
257 | | // Mark this image as not present. Can not recur infinitely. |
258 | 0 | Init(nullptr); |
259 | 0 | return; |
260 | 0 | } |
261 | 0 | if (sysv_hash) { |
262 | 0 | num_syms_ = sysv_hash[1]; |
263 | 0 | } else { |
264 | 0 | assert(gnu_hash); |
265 | | // Compute the number of symbols for DT_GNU_HASH, which is specified by |
266 | | // https://sourceware.org/gnu-gabi/program-loading-and-dynamic-linking.txt |
267 | 0 | uint32_t nbuckets = gnu_hash[0]; |
268 | | // The buckets array is located after the header (4 uint32) and the bloom |
269 | | // filter (size_t array of gnu_hash[2] elements). |
270 | 0 | uint32_t *buckets = gnu_hash + 4 + sizeof(size_t) / 4 * gnu_hash[2]; |
271 | | // Find the chain of the last non-empty bucket. |
272 | 0 | uint32_t idx = 0; |
273 | 0 | for (uint32_t i = nbuckets; i > 0;) { |
274 | 0 | idx = buckets[--i]; |
275 | 0 | if (idx != 0) break; |
276 | 0 | } |
277 | 0 | if (idx != 0) { |
278 | | // Find the last element of the chain, which has an odd value. |
279 | | // Add one to get the number of symbols. |
280 | 0 | uint32_t *chain = buckets + nbuckets - gnu_hash[1]; |
281 | 0 | while (chain[idx++] % 2 == 0) { |
282 | 0 | } |
283 | 0 | } |
284 | 0 | num_syms_ = idx; |
285 | 0 | } |
286 | 0 | } |
287 | | |
288 | | bool ElfMemImage::LookupSymbol(const char *name, |
289 | | const char *version, |
290 | | int type, |
291 | 0 | SymbolInfo *info_out) const { |
292 | 0 | for (const SymbolInfo& info : *this) { |
293 | 0 | if (strcmp(info.name, name) == 0 && strcmp(info.version, version) == 0 && |
294 | 0 | ElfType(info.symbol) == type) { |
295 | 0 | if (info_out) { |
296 | 0 | *info_out = info; |
297 | 0 | } |
298 | 0 | return true; |
299 | 0 | } |
300 | 0 | } |
301 | 0 | return false; |
302 | 0 | } |
303 | | |
304 | | bool ElfMemImage::LookupSymbolByAddress(const void *address, |
305 | 0 | SymbolInfo *info_out) const { |
306 | 0 | for (const SymbolInfo& info : *this) { |
307 | 0 | const char *const symbol_start = |
308 | 0 | reinterpret_cast<const char *>(info.address); |
309 | 0 | const char *const symbol_end = symbol_start + info.symbol->st_size; |
310 | 0 | if (symbol_start <= address && address < symbol_end) { |
311 | 0 | if (info_out) { |
312 | | // Client wants to know details for that symbol (the usual case). |
313 | 0 | if (ElfBind(info.symbol) == STB_GLOBAL) { |
314 | | // Strong symbol; just return it. |
315 | 0 | *info_out = info; |
316 | 0 | return true; |
317 | 0 | } else { |
318 | | // Weak or local. Record it, but keep looking for a strong one. |
319 | 0 | *info_out = info; |
320 | 0 | } |
321 | 0 | } else { |
322 | | // Client only cares if there is an overlapping symbol. |
323 | 0 | return true; |
324 | 0 | } |
325 | 0 | } |
326 | 0 | } |
327 | 0 | return false; |
328 | 0 | } |
329 | | |
330 | | ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, |
331 | | uint32_t index) |
332 | 0 | : index_(index), image_(image) {} |
333 | | |
334 | 0 | const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const { |
335 | 0 | return &info_; |
336 | 0 | } |
337 | | |
338 | 0 | const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const { |
339 | 0 | return info_; |
340 | 0 | } |
341 | | |
342 | 0 | bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const { |
343 | 0 | return this->image_ == rhs.image_ && this->index_ == rhs.index_; |
344 | 0 | } |
345 | | |
346 | 0 | bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const { |
347 | 0 | return !(*this == rhs); |
348 | 0 | } |
349 | | |
350 | 0 | ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() { |
351 | 0 | this->Update(1); |
352 | 0 | return *this; |
353 | 0 | } |
354 | | |
355 | 0 | ElfMemImage::SymbolIterator ElfMemImage::begin() const { |
356 | 0 | SymbolIterator it(this, 0); |
357 | 0 | it.Update(0); |
358 | 0 | return it; |
359 | 0 | } |
360 | | |
361 | 0 | ElfMemImage::SymbolIterator ElfMemImage::end() const { |
362 | 0 | return SymbolIterator(this, GetNumSymbols()); |
363 | 0 | } |
364 | | |
365 | 0 | void ElfMemImage::SymbolIterator::Update(uint32_t increment) { |
366 | 0 | const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_); |
367 | 0 | ABSL_RAW_CHECK(image->IsPresent() || increment == 0, ""); |
368 | 0 | if (!image->IsPresent()) { |
369 | 0 | return; |
370 | 0 | } |
371 | 0 | index_ += increment; |
372 | 0 | if (index_ >= image->GetNumSymbols()) { |
373 | 0 | index_ = image->GetNumSymbols(); |
374 | 0 | return; |
375 | 0 | } |
376 | 0 | const ElfW(Sym) *symbol = image->GetDynsym(index_); |
377 | 0 | const ElfW(Versym) *version_symbol = image->GetVersym(index_); |
378 | 0 | ABSL_RAW_CHECK(symbol && version_symbol, ""); |
379 | 0 | const char *const symbol_name = image->GetDynstr(symbol->st_name); |
380 | | #if defined(__NetBSD__) |
381 | | const int version_index = version_symbol->vs_vers & VERSYM_VERSION; |
382 | | #else |
383 | 0 | const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION; |
384 | 0 | #endif |
385 | 0 | const ElfW(Verdef) *version_definition = nullptr; |
386 | 0 | const char *version_name = ""; |
387 | 0 | if (symbol->st_shndx == SHN_UNDEF) { |
388 | | // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and |
389 | | // version_index could well be greater than verdefnum_, so calling |
390 | | // GetVerdef(version_index) may trigger assertion. |
391 | 0 | } else { |
392 | 0 | version_definition = image->GetVerdef(version_index); |
393 | 0 | } |
394 | 0 | if (version_definition) { |
395 | | // I am expecting 1 or 2 auxiliary entries: 1 for the version itself, |
396 | | // optional 2nd if the version has a parent. |
397 | 0 | ABSL_RAW_CHECK( |
398 | 0 | version_definition->vd_cnt == 1 || version_definition->vd_cnt == 2, |
399 | 0 | "wrong number of entries"); |
400 | 0 | const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition); |
401 | 0 | version_name = image->GetVerstr(version_aux->vda_name); |
402 | 0 | } |
403 | 0 | info_.name = symbol_name; |
404 | 0 | info_.version = version_name; |
405 | 0 | info_.address = image->GetSymAddr(symbol); |
406 | 0 | info_.symbol = symbol; |
407 | 0 | } |
408 | | |
409 | | } // namespace debugging_internal |
410 | | ABSL_NAMESPACE_END |
411 | | } // namespace absl |
412 | | |
413 | | #endif // ABSL_HAVE_ELF_MEM_IMAGE |