/src/abseil-cpp/absl/debugging/internal/demangle_rust.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2024 The Abseil Authors |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #include "absl/debugging/internal/demangle_rust.h" |
16 | | |
17 | | #include <cstddef> |
18 | | #include <cstdint> |
19 | | #include <cstring> |
20 | | #include <limits> |
21 | | |
22 | | #include "absl/base/attributes.h" |
23 | | #include "absl/base/config.h" |
24 | | #include "absl/debugging/internal/decode_rust_punycode.h" |
25 | | |
26 | | namespace absl { |
27 | | ABSL_NAMESPACE_BEGIN |
28 | | namespace debugging_internal { |
29 | | |
30 | | namespace { |
31 | | |
32 | | // Same step limit as the C++ demangler in demangle.cc uses. |
33 | | constexpr int kMaxReturns = 1 << 17; |
34 | | |
35 | 0 | bool IsDigit(char c) { return '0' <= c && c <= '9'; } |
36 | 0 | bool IsLower(char c) { return 'a' <= c && c <= 'z'; } |
37 | 0 | bool IsUpper(char c) { return 'A' <= c && c <= 'Z'; } |
38 | 0 | bool IsAlpha(char c) { return IsLower(c) || IsUpper(c); } |
39 | 0 | bool IsIdentifierChar(char c) { return IsAlpha(c) || IsDigit(c) || c == '_'; } |
40 | 0 | bool IsLowerHexDigit(char c) { return IsDigit(c) || ('a' <= c && c <= 'f'); } |
41 | | |
42 | 0 | const char* BasicTypeName(char c) { |
43 | 0 | switch (c) { |
44 | 0 | case 'a': return "i8"; |
45 | 0 | case 'b': return "bool"; |
46 | 0 | case 'c': return "char"; |
47 | 0 | case 'd': return "f64"; |
48 | 0 | case 'e': return "str"; |
49 | 0 | case 'f': return "f32"; |
50 | 0 | case 'h': return "u8"; |
51 | 0 | case 'i': return "isize"; |
52 | 0 | case 'j': return "usize"; |
53 | 0 | case 'l': return "i32"; |
54 | 0 | case 'm': return "u32"; |
55 | 0 | case 'n': return "i128"; |
56 | 0 | case 'o': return "u128"; |
57 | 0 | case 'p': return "_"; |
58 | 0 | case 's': return "i16"; |
59 | 0 | case 't': return "u16"; |
60 | 0 | case 'u': return "()"; |
61 | 0 | case 'v': return "..."; |
62 | 0 | case 'x': return "i64"; |
63 | 0 | case 'y': return "u64"; |
64 | 0 | case 'z': return "!"; |
65 | 0 | } |
66 | 0 | return nullptr; |
67 | 0 | } |
68 | | |
69 | | // Parser for Rust symbol mangling v0, whose grammar is defined here: |
70 | | // |
71 | | // https://doc.rust-lang.org/rustc/symbol-mangling/v0.html#symbol-grammar-summary |
72 | | class RustSymbolParser { |
73 | | public: |
74 | | // Prepares to demangle the given encoding, a Rust symbol name starting with |
75 | | // _R, into the output buffer [out, out_end). The caller is expected to |
76 | | // continue by calling the new object's Parse function. |
77 | | RustSymbolParser(const char* encoding, char* out, char* const out_end) |
78 | 0 | : encoding_(encoding), out_(out), out_end_(out_end) { |
79 | 0 | if (out_ != out_end_) *out_ = '\0'; |
80 | 0 | } |
81 | | |
82 | | // Parses the constructor's encoding argument, writing output into the range |
83 | | // [out, out_end). Returns true on success and false for input whose |
84 | | // structure was not recognized or exceeded implementation limits, such as by |
85 | | // nesting structures too deep. In either case *this should not be used |
86 | | // again. |
87 | 0 | ABSL_MUST_USE_RESULT bool Parse() && { |
88 | | // Recursively parses the grammar production named by callee, then resumes |
89 | | // execution at the next statement. |
90 | | // |
91 | | // Recursive-descent parsing is a beautifully readable translation of a |
92 | | // grammar, but it risks stack overflow if implemented by naive recursion on |
93 | | // the C++ call stack. So we simulate recursion by goto and switch instead, |
94 | | // keeping a bounded stack of "return addresses" in the recursion_stack_ |
95 | | // member. |
96 | | // |
97 | | // The callee argument is a statement label. We goto that label after |
98 | | // saving the "return address" on recursion_stack_. The next continue |
99 | | // statement in the for loop below "returns" from this "call". |
100 | | // |
101 | | // The caller argument names the return point. Each value of caller must |
102 | | // appear in only one ABSL_DEMANGLER_RECURSE call and be listed in the |
103 | | // definition of enum ReturnAddress. The switch implements the control |
104 | | // transfer from the end of a "called" subroutine back to the statement |
105 | | // after the "call". |
106 | | // |
107 | | // Note that not all the grammar productions have to be packed into the |
108 | | // switch, but only those which appear in a cycle in the grammar. Anything |
109 | | // acyclic can be written as ordinary functions and function calls, e.g., |
110 | | // ParseIdentifier. |
111 | 0 | #define ABSL_DEMANGLER_RECURSE(callee, caller) \ |
112 | 0 | do { \ |
113 | 0 | if (recursion_depth_ == kStackSize) return false; \ |
114 | | /* The next continue will switch on this saved value ... */ \ |
115 | 0 | recursion_stack_[recursion_depth_++] = caller; \ |
116 | 0 | goto callee; \ |
117 | | /* ... and will land here, resuming the suspended code. */ \ |
118 | 0 | case caller: {} \ |
119 | 0 | } while (0) |
120 | | |
121 | | // Parse the encoding, counting completed recursive calls to guard against |
122 | | // excessively complex input and infinite-loop bugs. |
123 | 0 | int iter = 0; |
124 | 0 | goto whole_encoding; |
125 | 0 | for (; iter < kMaxReturns && recursion_depth_ > 0; ++iter) { |
126 | | // This switch resumes the code path most recently suspended by |
127 | | // ABSL_DEMANGLER_RECURSE. |
128 | 0 | switch (recursion_stack_[--recursion_depth_]) { |
129 | | // |
130 | | // symbol-name -> |
131 | | // _R decimal-number? path instantiating-crate? vendor-specific-suffix? |
132 | 0 | whole_encoding: |
133 | 0 | if (!Eat('_') || !Eat('R')) return false; |
134 | | // decimal-number? is always empty today, so proceed to path, which |
135 | | // can't start with a decimal digit. |
136 | 0 | ABSL_DEMANGLER_RECURSE(path, kInstantiatingCrate); |
137 | 0 | if (IsAlpha(Peek())) { |
138 | 0 | ++silence_depth_; // Print nothing more from here on. |
139 | 0 | ABSL_DEMANGLER_RECURSE(path, kVendorSpecificSuffix); |
140 | 0 | } |
141 | 0 | switch (Take()) { |
142 | 0 | case '.': case '$': case '\0': return true; |
143 | 0 | } |
144 | 0 | return false; // unexpected trailing content |
145 | | |
146 | | // path -> crate-root | inherent-impl | trait-impl | trait-definition | |
147 | | // nested-path | generic-args | backref |
148 | | // |
149 | | // Note that ABSL_DEMANGLER_RECURSE does not work inside a nested switch |
150 | | // (which would hide the generated case label). Thus we jump out of the |
151 | | // inner switch with gotos before performing any fake recursion. |
152 | 0 | path: |
153 | 0 | switch (Take()) { |
154 | 0 | case 'C': goto crate_root; |
155 | 0 | case 'M': goto inherent_impl; |
156 | 0 | case 'X': goto trait_impl; |
157 | 0 | case 'Y': goto trait_definition; |
158 | 0 | case 'N': goto nested_path; |
159 | 0 | case 'I': goto generic_args; |
160 | 0 | case 'B': goto path_backref; |
161 | 0 | default: return false; |
162 | 0 | } |
163 | | |
164 | | // crate-root -> C identifier (C consumed above) |
165 | 0 | crate_root: |
166 | 0 | if (!ParseIdentifier()) return false; |
167 | 0 | continue; |
168 | | |
169 | | // inherent-impl -> M impl-path type (M already consumed) |
170 | 0 | inherent_impl: |
171 | 0 | if (!Emit("<")) return false; |
172 | 0 | ABSL_DEMANGLER_RECURSE(impl_path, kInherentImplType); |
173 | 0 | ABSL_DEMANGLER_RECURSE(type, kInherentImplEnding); |
174 | 0 | if (!Emit(">")) return false; |
175 | 0 | continue; |
176 | | |
177 | | // trait-impl -> X impl-path type path (X already consumed) |
178 | 0 | trait_impl: |
179 | 0 | if (!Emit("<")) return false; |
180 | 0 | ABSL_DEMANGLER_RECURSE(impl_path, kTraitImplType); |
181 | 0 | ABSL_DEMANGLER_RECURSE(type, kTraitImplInfix); |
182 | 0 | if (!Emit(" as ")) return false; |
183 | 0 | ABSL_DEMANGLER_RECURSE(path, kTraitImplEnding); |
184 | 0 | if (!Emit(">")) return false; |
185 | 0 | continue; |
186 | | |
187 | | // impl-path -> disambiguator? path (but never print it!) |
188 | 0 | impl_path: |
189 | 0 | ++silence_depth_; |
190 | 0 | { |
191 | 0 | int ignored_disambiguator; |
192 | 0 | if (!ParseDisambiguator(ignored_disambiguator)) return false; |
193 | 0 | } |
194 | 0 | ABSL_DEMANGLER_RECURSE(path, kImplPathEnding); |
195 | 0 | --silence_depth_; |
196 | 0 | continue; |
197 | | |
198 | | // trait-definition -> Y type path (Y already consumed) |
199 | 0 | trait_definition: |
200 | 0 | if (!Emit("<")) return false; |
201 | 0 | ABSL_DEMANGLER_RECURSE(type, kTraitDefinitionInfix); |
202 | 0 | if (!Emit(" as ")) return false; |
203 | 0 | ABSL_DEMANGLER_RECURSE(path, kTraitDefinitionEnding); |
204 | 0 | if (!Emit(">")) return false; |
205 | 0 | continue; |
206 | | |
207 | | // nested-path -> N namespace path identifier (N already consumed) |
208 | | // namespace -> lower | upper |
209 | 0 | nested_path: |
210 | | // Uppercase namespaces must be saved on a stack so we can print |
211 | | // ::{closure#0} or ::{shim:vtable#0} or ::{X:name#0} as needed. |
212 | 0 | if (IsUpper(Peek())) { |
213 | 0 | if (!PushNamespace(Take())) return false; |
214 | 0 | ABSL_DEMANGLER_RECURSE(path, kIdentifierInUppercaseNamespace); |
215 | 0 | if (!Emit("::")) return false; |
216 | 0 | if (!ParseIdentifier(PopNamespace())) return false; |
217 | 0 | continue; |
218 | 0 | } |
219 | | |
220 | | // Lowercase namespaces, however, are never represented in the output; |
221 | | // they all emit just ::name. |
222 | 0 | if (IsLower(Take())) { |
223 | 0 | ABSL_DEMANGLER_RECURSE(path, kIdentifierInLowercaseNamespace); |
224 | 0 | if (!Emit("::")) return false; |
225 | 0 | if (!ParseIdentifier()) return false; |
226 | 0 | continue; |
227 | 0 | } |
228 | | |
229 | | // Neither upper or lower |
230 | 0 | return false; |
231 | | |
232 | | // type -> basic-type | array-type | slice-type | tuple-type | |
233 | | // ref-type | mut-ref-type | const-ptr-type | mut-ptr-type | |
234 | | // fn-type | dyn-trait-type | path | backref |
235 | | // |
236 | | // We use ifs instead of switch (Take()) because the default case jumps |
237 | | // to path, which will need to see the first character not yet Taken |
238 | | // from the input. Because we do not use a nested switch here, |
239 | | // ABSL_DEMANGLER_RECURSE works fine in the 'S' case. |
240 | 0 | type: |
241 | 0 | if (IsLower(Peek())) { |
242 | 0 | const char* type_name = BasicTypeName(Take()); |
243 | 0 | if (type_name == nullptr || !Emit(type_name)) return false; |
244 | 0 | continue; |
245 | 0 | } |
246 | 0 | if (Eat('A')) { |
247 | | // array-type = A type const |
248 | 0 | if (!Emit("[")) return false; |
249 | 0 | ABSL_DEMANGLER_RECURSE(type, kArraySize); |
250 | 0 | if (!Emit("; ")) return false; |
251 | 0 | ABSL_DEMANGLER_RECURSE(constant, kFinishArray); |
252 | 0 | if (!Emit("]")) return false; |
253 | 0 | continue; |
254 | 0 | } |
255 | 0 | if (Eat('S')) { |
256 | 0 | if (!Emit("[")) return false; |
257 | 0 | ABSL_DEMANGLER_RECURSE(type, kSliceEnding); |
258 | 0 | if (!Emit("]")) return false; |
259 | 0 | continue; |
260 | 0 | } |
261 | 0 | if (Eat('T')) goto tuple_type; |
262 | 0 | if (Eat('R')) { |
263 | 0 | if (!Emit("&")) return false; |
264 | 0 | if (!ParseOptionalLifetime()) return false; |
265 | 0 | goto type; |
266 | 0 | } |
267 | 0 | if (Eat('Q')) { |
268 | 0 | if (!Emit("&mut ")) return false; |
269 | 0 | if (!ParseOptionalLifetime()) return false; |
270 | 0 | goto type; |
271 | 0 | } |
272 | 0 | if (Eat('P')) { |
273 | 0 | if (!Emit("*const ")) return false; |
274 | 0 | goto type; |
275 | 0 | } |
276 | 0 | if (Eat('O')) { |
277 | 0 | if (!Emit("*mut ")) return false; |
278 | 0 | goto type; |
279 | 0 | } |
280 | 0 | if (Eat('F')) goto fn_type; |
281 | 0 | if (Eat('D')) goto dyn_trait_type; |
282 | 0 | if (Eat('B')) goto type_backref; |
283 | 0 | goto path; |
284 | | |
285 | | // tuple-type -> T type* E (T already consumed) |
286 | 0 | tuple_type: |
287 | 0 | if (!Emit("(")) return false; |
288 | | |
289 | | // The toolchain should call the unit type u instead of TE, but the |
290 | | // grammar and other demanglers also recognize TE, so we do too. |
291 | 0 | if (Eat('E')) { |
292 | 0 | if (!Emit(")")) return false; |
293 | 0 | continue; |
294 | 0 | } |
295 | | |
296 | | // A tuple with one element is rendered (type,) instead of (type). |
297 | 0 | ABSL_DEMANGLER_RECURSE(type, kAfterFirstTupleElement); |
298 | 0 | if (Eat('E')) { |
299 | 0 | if (!Emit(",)")) return false; |
300 | 0 | continue; |
301 | 0 | } |
302 | | |
303 | | // A tuple with two elements is of course (x, y). |
304 | 0 | if (!Emit(", ")) return false; |
305 | 0 | ABSL_DEMANGLER_RECURSE(type, kAfterSecondTupleElement); |
306 | 0 | if (Eat('E')) { |
307 | 0 | if (!Emit(")")) return false; |
308 | 0 | continue; |
309 | 0 | } |
310 | | |
311 | | // And (x, y, z) for three elements. |
312 | 0 | if (!Emit(", ")) return false; |
313 | 0 | ABSL_DEMANGLER_RECURSE(type, kAfterThirdTupleElement); |
314 | 0 | if (Eat('E')) { |
315 | 0 | if (!Emit(")")) return false; |
316 | 0 | continue; |
317 | 0 | } |
318 | | |
319 | | // For longer tuples we write (x, y, z, ...), printing none of the |
320 | | // content of the fourth and later types. Thus we avoid exhausting |
321 | | // output buffers and human readers' patience when some library has a |
322 | | // long tuple as an implementation detail, without having to |
323 | | // completely obfuscate all tuples. |
324 | 0 | if (!Emit(", ...)")) return false; |
325 | 0 | ++silence_depth_; |
326 | 0 | while (!Eat('E')) { |
327 | 0 | ABSL_DEMANGLER_RECURSE(type, kAfterSubsequentTupleElement); |
328 | 0 | } |
329 | 0 | --silence_depth_; |
330 | 0 | continue; |
331 | | |
332 | | // fn-type -> F fn-sig (F already consumed) |
333 | | // fn-sig -> binder? U? (K abi)? type* E type |
334 | | // abi -> C | undisambiguated-identifier |
335 | | // |
336 | | // We follow the C++ demangler in suppressing details of function |
337 | | // signatures. Every function type is rendered "fn...". |
338 | 0 | fn_type: |
339 | 0 | if (!Emit("fn...")) return false; |
340 | 0 | ++silence_depth_; |
341 | 0 | if (!ParseOptionalBinder()) return false; |
342 | 0 | (void)Eat('U'); |
343 | 0 | if (Eat('K')) { |
344 | 0 | if (!Eat('C') && !ParseUndisambiguatedIdentifier()) return false; |
345 | 0 | } |
346 | 0 | while (!Eat('E')) { |
347 | 0 | ABSL_DEMANGLER_RECURSE(type, kContinueParameterList); |
348 | 0 | } |
349 | 0 | ABSL_DEMANGLER_RECURSE(type, kFinishFn); |
350 | 0 | --silence_depth_; |
351 | 0 | continue; |
352 | | |
353 | | // dyn-trait-type -> D dyn-bounds lifetime (D already consumed) |
354 | | // dyn-bounds -> binder? dyn-trait* E |
355 | | // |
356 | | // The grammar strangely allows an empty trait list, even though the |
357 | | // compiler should never output one. We follow existing demanglers in |
358 | | // rendering DEL_ as "dyn ". |
359 | | // |
360 | | // Because auto traits lengthen a type name considerably without |
361 | | // providing much value to a search for related source code, it would be |
362 | | // desirable to abbreviate |
363 | | // dyn main::Trait + std::marker::Copy + std::marker::Send |
364 | | // to |
365 | | // dyn main::Trait + ..., |
366 | | // eliding the auto traits. But it is difficult to do so correctly, in |
367 | | // part because there is no guarantee that the mangling will list the |
368 | | // main trait first. So we just print all the traits in their order of |
369 | | // appearance in the mangled name. |
370 | 0 | dyn_trait_type: |
371 | 0 | if (!Emit("dyn ")) return false; |
372 | 0 | if (!ParseOptionalBinder()) return false; |
373 | 0 | if (!Eat('E')) { |
374 | 0 | ABSL_DEMANGLER_RECURSE(dyn_trait, kBeginAutoTraits); |
375 | 0 | while (!Eat('E')) { |
376 | 0 | if (!Emit(" + ")) return false; |
377 | 0 | ABSL_DEMANGLER_RECURSE(dyn_trait, kContinueAutoTraits); |
378 | 0 | } |
379 | 0 | } |
380 | 0 | if (!ParseRequiredLifetime()) return false; |
381 | 0 | continue; |
382 | | |
383 | | // dyn-trait -> path dyn-trait-assoc-binding* |
384 | | // dyn-trait-assoc-binding -> p undisambiguated-identifier type |
385 | | // |
386 | | // We render nonempty binding lists as <>, omitting their contents as |
387 | | // for generic-args. |
388 | 0 | dyn_trait: |
389 | 0 | ABSL_DEMANGLER_RECURSE(path, kContinueDynTrait); |
390 | 0 | if (Peek() == 'p') { |
391 | 0 | if (!Emit("<>")) return false; |
392 | 0 | ++silence_depth_; |
393 | 0 | while (Eat('p')) { |
394 | 0 | if (!ParseUndisambiguatedIdentifier()) return false; |
395 | 0 | ABSL_DEMANGLER_RECURSE(type, kContinueAssocBinding); |
396 | 0 | } |
397 | 0 | --silence_depth_; |
398 | 0 | } |
399 | 0 | continue; |
400 | | |
401 | | // const -> type const-data | p | backref |
402 | | // |
403 | | // const is a C++ keyword, so we use the label `constant` instead. |
404 | 0 | constant: |
405 | 0 | if (Eat('B')) goto const_backref; |
406 | 0 | if (Eat('p')) { |
407 | 0 | if (!Emit("_")) return false; |
408 | 0 | continue; |
409 | 0 | } |
410 | | |
411 | | // Scan the type without printing it. |
412 | | // |
413 | | // The Rust language restricts the type of a const generic argument |
414 | | // much more than the mangling grammar does. We do not enforce this. |
415 | | // |
416 | | // We also do not bother printing false, true, 'A', and '\u{abcd}' for |
417 | | // the types bool and char. Because we do not print generic-args |
418 | | // contents, we expect to print constants only in array sizes, and |
419 | | // those should not be bool or char. |
420 | 0 | ++silence_depth_; |
421 | 0 | ABSL_DEMANGLER_RECURSE(type, kConstData); |
422 | 0 | --silence_depth_; |
423 | | |
424 | | // const-data -> n? hex-digit* _ |
425 | | // |
426 | | // Although the grammar doesn't say this, existing demanglers expect |
427 | | // that zero is 0, not an empty digit sequence, and no nonzero value |
428 | | // may have leading zero digits. Also n0_ is accepted and printed as |
429 | | // -0, though a toolchain will probably never write that encoding. |
430 | 0 | if (Eat('n') && !EmitChar('-')) return false; |
431 | 0 | if (!Emit("0x")) return false; |
432 | 0 | if (Eat('0')) { |
433 | 0 | if (!EmitChar('0')) return false; |
434 | 0 | if (!Eat('_')) return false; |
435 | 0 | continue; |
436 | 0 | } |
437 | 0 | while (IsLowerHexDigit(Peek())) { |
438 | 0 | if (!EmitChar(Take())) return false; |
439 | 0 | } |
440 | 0 | if (!Eat('_')) return false; |
441 | 0 | continue; |
442 | | |
443 | | // generic-args -> I path generic-arg* E (I already consumed) |
444 | | // |
445 | | // We follow the C++ demangler in omitting all the arguments from the |
446 | | // output, printing only the list opening and closing tokens. |
447 | 0 | generic_args: |
448 | 0 | ABSL_DEMANGLER_RECURSE(path, kBeginGenericArgList); |
449 | 0 | if (!Emit("::<>")) return false; |
450 | 0 | ++silence_depth_; |
451 | 0 | while (!Eat('E')) { |
452 | 0 | ABSL_DEMANGLER_RECURSE(generic_arg, kContinueGenericArgList); |
453 | 0 | } |
454 | 0 | --silence_depth_; |
455 | 0 | continue; |
456 | | |
457 | | // generic-arg -> lifetime | type | K const |
458 | 0 | generic_arg: |
459 | 0 | if (Peek() == 'L') { |
460 | 0 | if (!ParseOptionalLifetime()) return false; |
461 | 0 | continue; |
462 | 0 | } |
463 | 0 | if (Eat('K')) goto constant; |
464 | 0 | goto type; |
465 | | |
466 | | // backref -> B base-62-number (B already consumed) |
467 | | // |
468 | | // The BeginBackref call parses and range-checks the base-62-number. We |
469 | | // always do that much. |
470 | | // |
471 | | // The recursive call parses and prints what the backref points at. We |
472 | | // save CPU and stack by skipping this work if the output would be |
473 | | // suppressed anyway. |
474 | 0 | path_backref: |
475 | 0 | if (!BeginBackref()) return false; |
476 | 0 | if (silence_depth_ == 0) { |
477 | 0 | ABSL_DEMANGLER_RECURSE(path, kPathBackrefEnding); |
478 | 0 | } |
479 | 0 | EndBackref(); |
480 | 0 | continue; |
481 | | |
482 | | // This represents the same backref production as in path_backref but |
483 | | // parses the target as a type instead of a path. |
484 | 0 | type_backref: |
485 | 0 | if (!BeginBackref()) return false; |
486 | 0 | if (silence_depth_ == 0) { |
487 | 0 | ABSL_DEMANGLER_RECURSE(type, kTypeBackrefEnding); |
488 | 0 | } |
489 | 0 | EndBackref(); |
490 | 0 | continue; |
491 | | |
492 | 0 | const_backref: |
493 | 0 | if (!BeginBackref()) return false; |
494 | 0 | if (silence_depth_ == 0) { |
495 | 0 | ABSL_DEMANGLER_RECURSE(constant, kConstantBackrefEnding); |
496 | 0 | } |
497 | 0 | EndBackref(); |
498 | 0 | continue; |
499 | 0 | } |
500 | 0 | } |
501 | | |
502 | 0 | return false; // hit iteration limit or a bug in our stack handling |
503 | 0 | } |
504 | | |
505 | | private: |
506 | | // Enumerates resumption points for ABSL_DEMANGLER_RECURSE calls. |
507 | | enum ReturnAddress : uint8_t { |
508 | | kInstantiatingCrate, |
509 | | kVendorSpecificSuffix, |
510 | | kIdentifierInUppercaseNamespace, |
511 | | kIdentifierInLowercaseNamespace, |
512 | | kInherentImplType, |
513 | | kInherentImplEnding, |
514 | | kTraitImplType, |
515 | | kTraitImplInfix, |
516 | | kTraitImplEnding, |
517 | | kImplPathEnding, |
518 | | kTraitDefinitionInfix, |
519 | | kTraitDefinitionEnding, |
520 | | kArraySize, |
521 | | kFinishArray, |
522 | | kSliceEnding, |
523 | | kAfterFirstTupleElement, |
524 | | kAfterSecondTupleElement, |
525 | | kAfterThirdTupleElement, |
526 | | kAfterSubsequentTupleElement, |
527 | | kContinueParameterList, |
528 | | kFinishFn, |
529 | | kBeginAutoTraits, |
530 | | kContinueAutoTraits, |
531 | | kContinueDynTrait, |
532 | | kContinueAssocBinding, |
533 | | kConstData, |
534 | | kBeginGenericArgList, |
535 | | kContinueGenericArgList, |
536 | | kPathBackrefEnding, |
537 | | kTypeBackrefEnding, |
538 | | kConstantBackrefEnding, |
539 | | }; |
540 | | |
541 | | // Element counts for the stack arrays. Larger stack sizes accommodate more |
542 | | // deeply nested names at the cost of a larger footprint on the C++ call |
543 | | // stack. |
544 | | enum { |
545 | | // Maximum recursive calls outstanding at one time. |
546 | | kStackSize = 256, |
547 | | |
548 | | // Maximum N<uppercase> nested-paths open at once. We do not expect |
549 | | // closures inside closures inside closures as much as functions inside |
550 | | // modules inside other modules, so we can use a smaller array here. |
551 | | kNamespaceStackSize = 64, |
552 | | |
553 | | // Maximum number of nested backrefs. We can keep this stack pretty small |
554 | | // because we do not follow backrefs inside generic-args or other contexts |
555 | | // that suppress printing, so deep stacking is unlikely in practice. |
556 | | kPositionStackSize = 16, |
557 | | }; |
558 | | |
559 | | // Returns the next input character without consuming it. |
560 | 0 | char Peek() const { return encoding_[pos_]; } |
561 | | |
562 | | // Consumes and returns the next input character. |
563 | 0 | char Take() { return encoding_[pos_++]; } |
564 | | |
565 | | // If the next input character is the given character, consumes it and returns |
566 | | // true; otherwise returns false without consuming a character. |
567 | 0 | ABSL_MUST_USE_RESULT bool Eat(char want) { |
568 | 0 | if (encoding_[pos_] != want) return false; |
569 | 0 | ++pos_; |
570 | 0 | return true; |
571 | 0 | } |
572 | | |
573 | | // Provided there is enough remaining output space, appends c to the output, |
574 | | // writing a fresh NUL terminator afterward, and returns true. Returns false |
575 | | // if the output buffer had less than two bytes free. |
576 | 0 | ABSL_MUST_USE_RESULT bool EmitChar(char c) { |
577 | 0 | if (silence_depth_ > 0) return true; |
578 | 0 | if (out_end_ - out_ < 2) return false; |
579 | 0 | *out_++ = c; |
580 | 0 | *out_ = '\0'; |
581 | 0 | return true; |
582 | 0 | } |
583 | | |
584 | | // Provided there is enough remaining output space, appends the C string token |
585 | | // to the output, followed by a NUL character, and returns true. Returns |
586 | | // false if not everything fit into the output buffer. |
587 | 0 | ABSL_MUST_USE_RESULT bool Emit(const char* token) { |
588 | 0 | if (silence_depth_ > 0) return true; |
589 | 0 | const size_t token_length = std::strlen(token); |
590 | 0 | const size_t bytes_to_copy = token_length + 1; // token and final NUL |
591 | 0 | if (static_cast<size_t>(out_end_ - out_) < bytes_to_copy) return false; |
592 | 0 | std::memcpy(out_, token, bytes_to_copy); |
593 | 0 | out_ += token_length; |
594 | 0 | return true; |
595 | 0 | } |
596 | | |
597 | | // Provided there is enough remaining output space, appends the decimal form |
598 | | // of disambiguator (if it's nonnegative) or "?" (if it's negative) to the |
599 | | // output, followed by a NUL character, and returns true. Returns false if |
600 | | // not everything fit into the output buffer. |
601 | 0 | ABSL_MUST_USE_RESULT bool EmitDisambiguator(int disambiguator) { |
602 | 0 | if (disambiguator < 0) return EmitChar('?'); // parsed but too large |
603 | 0 | if (disambiguator == 0) return EmitChar('0'); |
604 | | // Convert disambiguator to decimal text. Three digits per byte is enough |
605 | | // because 999 > 256. The bound will remain correct even if future |
606 | | // maintenance changes the type of the disambiguator variable. |
607 | 0 | char digits[3 * sizeof(disambiguator)] = {}; |
608 | 0 | size_t leading_digit_index = sizeof(digits) - 1; |
609 | 0 | for (; disambiguator > 0; disambiguator /= 10) { |
610 | 0 | digits[--leading_digit_index] = |
611 | 0 | static_cast<char>('0' + disambiguator % 10); |
612 | 0 | } |
613 | 0 | return Emit(digits + leading_digit_index); |
614 | 0 | } |
615 | | |
616 | | // Consumes an optional disambiguator (s123_) from the input. |
617 | | // |
618 | | // On success returns true and fills value with the encoded value if it was |
619 | | // not too big, otherwise with -1. If the optional disambiguator was omitted, |
620 | | // value is 0. On parse failure returns false and sets value to -1. |
621 | 0 | ABSL_MUST_USE_RESULT bool ParseDisambiguator(int& value) { |
622 | 0 | value = -1; |
623 | | |
624 | | // disambiguator = s base-62-number |
625 | | // |
626 | | // Disambiguators are optional. An omitted disambiguator is zero. |
627 | 0 | if (!Eat('s')) { |
628 | 0 | value = 0; |
629 | 0 | return true; |
630 | 0 | } |
631 | 0 | int base_62_value = 0; |
632 | 0 | if (!ParseBase62Number(base_62_value)) return false; |
633 | 0 | value = base_62_value < 0 ? -1 : base_62_value + 1; |
634 | 0 | return true; |
635 | 0 | } |
636 | | |
637 | | // Consumes a base-62 number like _ or 123_ from the input. |
638 | | // |
639 | | // On success returns true and fills value with the encoded value if it was |
640 | | // not too big, otherwise with -1. On parse failure returns false and sets |
641 | | // value to -1. |
642 | 0 | ABSL_MUST_USE_RESULT bool ParseBase62Number(int& value) { |
643 | 0 | value = -1; |
644 | | |
645 | | // base-62-number = (digit | lower | upper)* _ |
646 | | // |
647 | | // An empty base-62 digit sequence means 0. |
648 | 0 | if (Eat('_')) { |
649 | 0 | value = 0; |
650 | 0 | return true; |
651 | 0 | } |
652 | | |
653 | | // A nonempty digit sequence denotes its base-62 value plus 1. |
654 | 0 | int encoded_number = 0; |
655 | 0 | bool overflowed = false; |
656 | 0 | while (IsAlpha(Peek()) || IsDigit(Peek())) { |
657 | 0 | const char c = Take(); |
658 | 0 | if (encoded_number >= std::numeric_limits<int>::max()/62) { |
659 | | // If we are close to overflowing an int, keep parsing but stop updating |
660 | | // encoded_number and remember to return -1 at the end. The point is to |
661 | | // avoid undefined behavior while parsing crate-root disambiguators, |
662 | | // which are large in practice but not shown in demangling, while |
663 | | // successfully computing closure and shim disambiguators, which are |
664 | | // typically small and are printed out. |
665 | 0 | overflowed = true; |
666 | 0 | } else { |
667 | 0 | int digit; |
668 | 0 | if (IsDigit(c)) { |
669 | 0 | digit = c - '0'; |
670 | 0 | } else if (IsLower(c)) { |
671 | 0 | digit = c - 'a' + 10; |
672 | 0 | } else { |
673 | 0 | digit = c - 'A' + 36; |
674 | 0 | } |
675 | 0 | encoded_number = 62 * encoded_number + digit; |
676 | 0 | } |
677 | 0 | } |
678 | |
|
679 | 0 | if (!Eat('_')) return false; |
680 | 0 | if (!overflowed) value = encoded_number + 1; |
681 | 0 | return true; |
682 | 0 | } |
683 | | |
684 | | // Consumes an identifier from the input, returning true on success. |
685 | | // |
686 | | // A nonzero uppercase_namespace specifies the character after the N in a |
687 | | // nested-identifier, e.g., 'C' for a closure, allowing ParseIdentifier to |
688 | | // write out the name with the conventional decoration for that namespace. |
689 | 0 | ABSL_MUST_USE_RESULT bool ParseIdentifier(char uppercase_namespace = '\0') { |
690 | | // identifier -> disambiguator? undisambiguated-identifier |
691 | 0 | int disambiguator = 0; |
692 | 0 | if (!ParseDisambiguator(disambiguator)) return false; |
693 | | |
694 | 0 | return ParseUndisambiguatedIdentifier(uppercase_namespace, disambiguator); |
695 | 0 | } |
696 | | |
697 | | // Consumes from the input an identifier with no preceding disambiguator, |
698 | | // returning true on success. |
699 | | // |
700 | | // When ParseIdentifier calls this, it passes the N<namespace> character and |
701 | | // disambiguator value so that "{closure#42}" and similar forms can be |
702 | | // rendered correctly. |
703 | | // |
704 | | // At other appearances of undisambiguated-identifier in the grammar, this |
705 | | // treatment is not applicable, and the call site omits both arguments. |
706 | | ABSL_MUST_USE_RESULT bool ParseUndisambiguatedIdentifier( |
707 | 0 | char uppercase_namespace = '\0', int disambiguator = 0) { |
708 | | // undisambiguated-identifier -> u? decimal-number _? bytes |
709 | 0 | const bool is_punycoded = Eat('u'); |
710 | 0 | if (!IsDigit(Peek())) return false; |
711 | 0 | int num_bytes = 0; |
712 | 0 | if (!ParseDecimalNumber(num_bytes)) return false; |
713 | 0 | (void)Eat('_'); // optional separator, needed if a digit follows |
714 | 0 | if (is_punycoded) { |
715 | 0 | DecodeRustPunycodeOptions options; |
716 | 0 | options.punycode_begin = &encoding_[pos_]; |
717 | 0 | options.punycode_end = &encoding_[pos_] + num_bytes; |
718 | 0 | options.out_begin = out_; |
719 | 0 | options.out_end = out_end_; |
720 | 0 | out_ = DecodeRustPunycode(options); |
721 | 0 | if (out_ == nullptr) return false; |
722 | 0 | pos_ += static_cast<size_t>(num_bytes); |
723 | 0 | } |
724 | | |
725 | | // Emit the beginnings of braced forms like {shim:vtable#0}. |
726 | 0 | if (uppercase_namespace != '\0') { |
727 | 0 | switch (uppercase_namespace) { |
728 | 0 | case 'C': |
729 | 0 | if (!Emit("{closure")) return false; |
730 | 0 | break; |
731 | 0 | case 'S': |
732 | 0 | if (!Emit("{shim")) return false; |
733 | 0 | break; |
734 | 0 | default: |
735 | 0 | if (!EmitChar('{') || !EmitChar(uppercase_namespace)) return false; |
736 | 0 | break; |
737 | 0 | } |
738 | 0 | if (num_bytes > 0 && !Emit(":")) return false; |
739 | 0 | } |
740 | | |
741 | | // Emit the name itself. |
742 | 0 | if (!is_punycoded) { |
743 | 0 | for (int i = 0; i < num_bytes; ++i) { |
744 | 0 | const char c = Take(); |
745 | 0 | if (!IsIdentifierChar(c) && |
746 | | // The spec gives toolchains the choice of Punycode or raw UTF-8 for |
747 | | // identifiers containing code points above 0x7f, so accept bytes |
748 | | // with the high bit set. |
749 | 0 | (c & 0x80) == 0) { |
750 | 0 | return false; |
751 | 0 | } |
752 | 0 | if (!EmitChar(c)) return false; |
753 | 0 | } |
754 | 0 | } |
755 | | |
756 | | // Emit the endings of braced forms, e.g., "#42}". |
757 | 0 | if (uppercase_namespace != '\0') { |
758 | 0 | if (!EmitChar('#')) return false; |
759 | 0 | if (!EmitDisambiguator(disambiguator)) return false; |
760 | 0 | if (!EmitChar('}')) return false; |
761 | 0 | } |
762 | | |
763 | 0 | return true; |
764 | 0 | } |
765 | | |
766 | | // Consumes a decimal number like 0 or 123 from the input. On success returns |
767 | | // true and fills value with the encoded value. If the encoded value is too |
768 | | // large or otherwise unparsable, returns false and sets value to -1. |
769 | 0 | ABSL_MUST_USE_RESULT bool ParseDecimalNumber(int& value) { |
770 | 0 | value = -1; |
771 | 0 | if (!IsDigit(Peek())) return false; |
772 | 0 | int encoded_number = Take() - '0'; |
773 | 0 | if (encoded_number == 0) { |
774 | | // Decimal numbers are never encoded with extra leading zeroes. |
775 | 0 | value = 0; |
776 | 0 | return true; |
777 | 0 | } |
778 | 0 | while (IsDigit(Peek()) && |
779 | | // avoid overflow |
780 | 0 | encoded_number < std::numeric_limits<int>::max()/10) { |
781 | 0 | encoded_number = 10 * encoded_number + (Take() - '0'); |
782 | 0 | } |
783 | 0 | if (IsDigit(Peek())) return false; // too big |
784 | 0 | value = encoded_number; |
785 | 0 | return true; |
786 | 0 | } |
787 | | |
788 | | // Consumes a binder of higher-ranked lifetimes if one is present. On success |
789 | | // returns true and discards the encoded lifetime count. On parse failure |
790 | | // returns false. |
791 | 0 | ABSL_MUST_USE_RESULT bool ParseOptionalBinder() { |
792 | | // binder -> G base-62-number |
793 | 0 | if (!Eat('G')) return true; |
794 | 0 | int ignored_binding_count; |
795 | 0 | return ParseBase62Number(ignored_binding_count); |
796 | 0 | } |
797 | | |
798 | | // Consumes a lifetime if one is present. |
799 | | // |
800 | | // On success returns true and discards the lifetime index. We do not print |
801 | | // or even range-check lifetimes because they are a finer detail than other |
802 | | // things we omit from output, such as the entire contents of generic-args. |
803 | | // |
804 | | // On parse failure returns false. |
805 | 0 | ABSL_MUST_USE_RESULT bool ParseOptionalLifetime() { |
806 | | // lifetime -> L base-62-number |
807 | 0 | if (!Eat('L')) return true; |
808 | 0 | int ignored_de_bruijn_index; |
809 | 0 | return ParseBase62Number(ignored_de_bruijn_index); |
810 | 0 | } |
811 | | |
812 | | // Consumes a lifetime just like ParseOptionalLifetime, but returns false if |
813 | | // there is no lifetime here. |
814 | 0 | ABSL_MUST_USE_RESULT bool ParseRequiredLifetime() { |
815 | 0 | if (Peek() != 'L') return false; |
816 | 0 | return ParseOptionalLifetime(); |
817 | 0 | } |
818 | | |
819 | | // Pushes ns onto the namespace stack and returns true if the stack is not |
820 | | // full, else returns false. |
821 | 0 | ABSL_MUST_USE_RESULT bool PushNamespace(char ns) { |
822 | 0 | if (namespace_depth_ == kNamespaceStackSize) return false; |
823 | 0 | namespace_stack_[namespace_depth_++] = ns; |
824 | 0 | return true; |
825 | 0 | } |
826 | | |
827 | | // Pops the last pushed namespace. Requires that the namespace stack is not |
828 | | // empty (namespace_depth_ > 0). |
829 | 0 | char PopNamespace() { return namespace_stack_[--namespace_depth_]; } |
830 | | |
831 | | // Pushes position onto the position stack and returns true if the stack is |
832 | | // not full, else returns false. |
833 | 0 | ABSL_MUST_USE_RESULT bool PushPosition(int position) { |
834 | 0 | if (position_depth_ == kPositionStackSize) return false; |
835 | 0 | position_stack_[position_depth_++] = position; |
836 | 0 | return true; |
837 | 0 | } |
838 | | |
839 | | // Pops the last pushed input position. Requires that the position stack is |
840 | | // not empty (position_depth_ > 0). |
841 | 0 | int PopPosition() { return position_stack_[--position_depth_]; } |
842 | | |
843 | | // Consumes a base-62-number denoting a backref target, pushes the current |
844 | | // input position on the data stack, and sets the input position to the |
845 | | // beginning of the backref target. Returns true on success. Returns false |
846 | | // if parsing failed, the stack is exhausted, or the backref target position |
847 | | // is out of range. |
848 | 0 | ABSL_MUST_USE_RESULT bool BeginBackref() { |
849 | | // backref = B base-62-number (B already consumed) |
850 | | // |
851 | | // Reject backrefs that don't parse, overflow int, or don't point backward. |
852 | | // If the offset looks fine, adjust it to account for the _R prefix. |
853 | 0 | int offset = 0; |
854 | 0 | const int offset_of_this_backref = |
855 | 0 | pos_ - 2 /* _R */ - 1 /* B already consumed */; |
856 | 0 | if (!ParseBase62Number(offset) || offset < 0 || |
857 | 0 | offset >= offset_of_this_backref) { |
858 | 0 | return false; |
859 | 0 | } |
860 | 0 | offset += 2; |
861 | | |
862 | | // Save the old position to restore later. |
863 | 0 | if (!PushPosition(pos_)) return false; |
864 | | |
865 | | // Move the input position to the backref target. |
866 | | // |
867 | | // Note that we do not check whether the new position points to the |
868 | | // beginning of a construct matching the context in which the backref |
869 | | // appeared. We just jump to it and see whether nested parsing succeeds. |
870 | | // We therefore accept various wrong manglings, e.g., a type backref |
871 | | // pointing to an 'l' character inside an identifier, which happens to mean |
872 | | // i32 when parsed as a type mangling. This saves the complexity and RAM |
873 | | // footprint of remembering which offsets began which kinds of |
874 | | // substructures. Existing demanglers take similar shortcuts. |
875 | 0 | pos_ = offset; |
876 | 0 | return true; |
877 | 0 | } |
878 | | |
879 | | // Cleans up after a backref production by restoring the previous input |
880 | | // position from the data stack. |
881 | 0 | void EndBackref() { pos_ = PopPosition(); } |
882 | | |
883 | | // The leftmost recursion_depth_ elements of recursion_stack_ contain the |
884 | | // ReturnAddresses pushed by ABSL_DEMANGLER_RECURSE calls not yet completed. |
885 | | ReturnAddress recursion_stack_[kStackSize] = {}; |
886 | | int recursion_depth_ = 0; |
887 | | |
888 | | // The leftmost namespace_depth_ elements of namespace_stack_ contain the |
889 | | // uppercase namespace identifiers for open nested-paths, e.g., 'C' for a |
890 | | // closure. |
891 | | char namespace_stack_[kNamespaceStackSize] = {}; |
892 | | int namespace_depth_ = 0; |
893 | | |
894 | | // The leftmost position_depth_ elements of position_stack_ contain the input |
895 | | // positions to return to after fully printing the targets of backrefs. |
896 | | int position_stack_[kPositionStackSize] = {}; |
897 | | int position_depth_ = 0; |
898 | | |
899 | | // Anything parsed while silence_depth_ > 0 contributes nothing to the |
900 | | // demangled output. For constructs omitted from the demangling, such as |
901 | | // impl-path and the contents of generic-args, we will increment |
902 | | // silence_depth_ on the way in and decrement silence_depth_ on the way out. |
903 | | int silence_depth_ = 0; |
904 | | |
905 | | // Input: encoding_ points to a Rust mangled symbol, and encoding_[pos_] is |
906 | | // the next input character to be scanned. |
907 | | int pos_ = 0; |
908 | | const char* encoding_ = nullptr; |
909 | | |
910 | | // Output: *out_ is where the next output character should be written, and |
911 | | // out_end_ points past the last byte of available space. |
912 | | char* out_ = nullptr; |
913 | | char* out_end_ = nullptr; |
914 | | }; |
915 | | |
916 | | } // namespace |
917 | | |
918 | | bool DemangleRustSymbolEncoding(const char* mangled, char* out, |
919 | 0 | size_t out_size) { |
920 | 0 | return RustSymbolParser(mangled, out, out + out_size).Parse(); |
921 | 0 | } |
922 | | |
923 | | } // namespace debugging_internal |
924 | | ABSL_NAMESPACE_END |
925 | | } // namespace absl |