Coverage Report

Created: 2024-09-23 06:29

/src/abseil-cpp/absl/debugging/internal/demangle_rust.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2024 The Abseil Authors
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#include "absl/debugging/internal/demangle_rust.h"
16
17
#include <cstddef>
18
#include <cstdint>
19
#include <cstring>
20
#include <limits>
21
22
#include "absl/base/attributes.h"
23
#include "absl/base/config.h"
24
#include "absl/debugging/internal/decode_rust_punycode.h"
25
26
namespace absl {
27
ABSL_NAMESPACE_BEGIN
28
namespace debugging_internal {
29
30
namespace {
31
32
// Same step limit as the C++ demangler in demangle.cc uses.
33
constexpr int kMaxReturns = 1 << 17;
34
35
0
bool IsDigit(char c) { return '0' <= c && c <= '9'; }
36
0
bool IsLower(char c) { return 'a' <= c && c <= 'z'; }
37
0
bool IsUpper(char c) { return 'A' <= c && c <= 'Z'; }
38
0
bool IsAlpha(char c) { return IsLower(c) || IsUpper(c); }
39
0
bool IsIdentifierChar(char c) { return IsAlpha(c) || IsDigit(c) || c == '_'; }
40
0
bool IsLowerHexDigit(char c) { return IsDigit(c) || ('a' <= c && c <= 'f'); }
41
42
0
const char* BasicTypeName(char c) {
43
0
  switch (c) {
44
0
    case 'a': return "i8";
45
0
    case 'b': return "bool";
46
0
    case 'c': return "char";
47
0
    case 'd': return "f64";
48
0
    case 'e': return "str";
49
0
    case 'f': return "f32";
50
0
    case 'h': return "u8";
51
0
    case 'i': return "isize";
52
0
    case 'j': return "usize";
53
0
    case 'l': return "i32";
54
0
    case 'm': return "u32";
55
0
    case 'n': return "i128";
56
0
    case 'o': return "u128";
57
0
    case 'p': return "_";
58
0
    case 's': return "i16";
59
0
    case 't': return "u16";
60
0
    case 'u': return "()";
61
0
    case 'v': return "...";
62
0
    case 'x': return "i64";
63
0
    case 'y': return "u64";
64
0
    case 'z': return "!";
65
0
  }
66
0
  return nullptr;
67
0
}
68
69
// Parser for Rust symbol mangling v0, whose grammar is defined here:
70
//
71
// https://doc.rust-lang.org/rustc/symbol-mangling/v0.html#symbol-grammar-summary
72
class RustSymbolParser {
73
 public:
74
  // Prepares to demangle the given encoding, a Rust symbol name starting with
75
  // _R, into the output buffer [out, out_end).  The caller is expected to
76
  // continue by calling the new object's Parse function.
77
  RustSymbolParser(const char* encoding, char* out, char* const out_end)
78
0
      : encoding_(encoding), out_(out), out_end_(out_end) {
79
0
    if (out_ != out_end_) *out_ = '\0';
80
0
  }
81
82
  // Parses the constructor's encoding argument, writing output into the range
83
  // [out, out_end).  Returns true on success and false for input whose
84
  // structure was not recognized or exceeded implementation limits, such as by
85
  // nesting structures too deep.  In either case *this should not be used
86
  // again.
87
0
  ABSL_MUST_USE_RESULT bool Parse() && {
88
    // Recursively parses the grammar production named by callee, then resumes
89
    // execution at the next statement.
90
    //
91
    // Recursive-descent parsing is a beautifully readable translation of a
92
    // grammar, but it risks stack overflow if implemented by naive recursion on
93
    // the C++ call stack.  So we simulate recursion by goto and switch instead,
94
    // keeping a bounded stack of "return addresses" in the recursion_stack_
95
    // member.
96
    //
97
    // The callee argument is a statement label.  We goto that label after
98
    // saving the "return address" on recursion_stack_.  The next continue
99
    // statement in the for loop below "returns" from this "call".
100
    //
101
    // The caller argument names the return point.  Each value of caller must
102
    // appear in only one ABSL_DEMANGLER_RECURSE call and be listed in the
103
    // definition of enum ReturnAddress.  The switch implements the control
104
    // transfer from the end of a "called" subroutine back to the statement
105
    // after the "call".
106
    //
107
    // Note that not all the grammar productions have to be packed into the
108
    // switch, but only those which appear in a cycle in the grammar.  Anything
109
    // acyclic can be written as ordinary functions and function calls, e.g.,
110
    // ParseIdentifier.
111
0
#define ABSL_DEMANGLER_RECURSE(callee, caller) \
112
0
    do { \
113
0
      if (recursion_depth_ == kStackSize) return false; \
114
      /* The next continue will switch on this saved value ... */ \
115
0
      recursion_stack_[recursion_depth_++] = caller; \
116
0
      goto callee; \
117
      /* ... and will land here, resuming the suspended code. */ \
118
0
      case caller: {} \
119
0
    } while (0)
120
121
    // Parse the encoding, counting completed recursive calls to guard against
122
    // excessively complex input and infinite-loop bugs.
123
0
    int iter = 0;
124
0
    goto whole_encoding;
125
0
    for (; iter < kMaxReturns && recursion_depth_ > 0; ++iter) {
126
      // This switch resumes the code path most recently suspended by
127
      // ABSL_DEMANGLER_RECURSE.
128
0
      switch (recursion_stack_[--recursion_depth_]) {
129
        //
130
        // symbol-name ->
131
        // _R decimal-number? path instantiating-crate? vendor-specific-suffix?
132
0
        whole_encoding:
133
0
          if (!Eat('_') || !Eat('R')) return false;
134
          // decimal-number? is always empty today, so proceed to path, which
135
          // can't start with a decimal digit.
136
0
          ABSL_DEMANGLER_RECURSE(path, kInstantiatingCrate);
137
0
          if (IsAlpha(Peek())) {
138
0
            ++silence_depth_;  // Print nothing more from here on.
139
0
            ABSL_DEMANGLER_RECURSE(path, kVendorSpecificSuffix);
140
0
          }
141
0
          switch (Take()) {
142
0
            case '.': case '$': case '\0': return true;
143
0
          }
144
0
          return false;  // unexpected trailing content
145
146
        // path -> crate-root | inherent-impl | trait-impl | trait-definition |
147
        //         nested-path | generic-args | backref
148
        //
149
        // Note that ABSL_DEMANGLER_RECURSE does not work inside a nested switch
150
        // (which would hide the generated case label).  Thus we jump out of the
151
        // inner switch with gotos before performing any fake recursion.
152
0
        path:
153
0
          switch (Take()) {
154
0
            case 'C': goto crate_root;
155
0
            case 'M': goto inherent_impl;
156
0
            case 'X': goto trait_impl;
157
0
            case 'Y': goto trait_definition;
158
0
            case 'N': goto nested_path;
159
0
            case 'I': goto generic_args;
160
0
            case 'B': goto path_backref;
161
0
            default: return false;
162
0
          }
163
164
        // crate-root -> C identifier (C consumed above)
165
0
        crate_root:
166
0
          if (!ParseIdentifier()) return false;
167
0
          continue;
168
169
        // inherent-impl -> M impl-path type (M already consumed)
170
0
        inherent_impl:
171
0
          if (!Emit("<")) return false;
172
0
          ABSL_DEMANGLER_RECURSE(impl_path, kInherentImplType);
173
0
          ABSL_DEMANGLER_RECURSE(type, kInherentImplEnding);
174
0
          if (!Emit(">")) return false;
175
0
          continue;
176
177
        // trait-impl -> X impl-path type path (X already consumed)
178
0
        trait_impl:
179
0
          if (!Emit("<")) return false;
180
0
          ABSL_DEMANGLER_RECURSE(impl_path, kTraitImplType);
181
0
          ABSL_DEMANGLER_RECURSE(type, kTraitImplInfix);
182
0
          if (!Emit(" as ")) return false;
183
0
          ABSL_DEMANGLER_RECURSE(path, kTraitImplEnding);
184
0
          if (!Emit(">")) return false;
185
0
          continue;
186
187
        // impl-path -> disambiguator? path (but never print it!)
188
0
        impl_path:
189
0
          ++silence_depth_;
190
0
          {
191
0
            int ignored_disambiguator;
192
0
            if (!ParseDisambiguator(ignored_disambiguator)) return false;
193
0
          }
194
0
          ABSL_DEMANGLER_RECURSE(path, kImplPathEnding);
195
0
          --silence_depth_;
196
0
          continue;
197
198
        // trait-definition -> Y type path (Y already consumed)
199
0
        trait_definition:
200
0
          if (!Emit("<")) return false;
201
0
          ABSL_DEMANGLER_RECURSE(type, kTraitDefinitionInfix);
202
0
          if (!Emit(" as ")) return false;
203
0
          ABSL_DEMANGLER_RECURSE(path, kTraitDefinitionEnding);
204
0
          if (!Emit(">")) return false;
205
0
          continue;
206
207
        // nested-path -> N namespace path identifier (N already consumed)
208
        // namespace -> lower | upper
209
0
        nested_path:
210
          // Uppercase namespaces must be saved on a stack so we can print
211
          // ::{closure#0} or ::{shim:vtable#0} or ::{X:name#0} as needed.
212
0
          if (IsUpper(Peek())) {
213
0
            if (!PushNamespace(Take())) return false;
214
0
            ABSL_DEMANGLER_RECURSE(path, kIdentifierInUppercaseNamespace);
215
0
            if (!Emit("::")) return false;
216
0
            if (!ParseIdentifier(PopNamespace())) return false;
217
0
            continue;
218
0
          }
219
220
          // Lowercase namespaces, however, are never represented in the output;
221
          // they all emit just ::name.
222
0
          if (IsLower(Take())) {
223
0
            ABSL_DEMANGLER_RECURSE(path, kIdentifierInLowercaseNamespace);
224
0
            if (!Emit("::")) return false;
225
0
            if (!ParseIdentifier()) return false;
226
0
            continue;
227
0
          }
228
229
          // Neither upper or lower
230
0
          return false;
231
232
        // type -> basic-type | array-type | slice-type | tuple-type |
233
        //         ref-type | mut-ref-type | const-ptr-type | mut-ptr-type |
234
        //         fn-type | dyn-trait-type | path | backref
235
        //
236
        // We use ifs instead of switch (Take()) because the default case jumps
237
        // to path, which will need to see the first character not yet Taken
238
        // from the input.  Because we do not use a nested switch here,
239
        // ABSL_DEMANGLER_RECURSE works fine in the 'S' case.
240
0
        type:
241
0
          if (IsLower(Peek())) {
242
0
            const char* type_name = BasicTypeName(Take());
243
0
            if (type_name == nullptr || !Emit(type_name)) return false;
244
0
            continue;
245
0
          }
246
0
          if (Eat('A')) {
247
            // array-type = A type const
248
0
            if (!Emit("[")) return false;
249
0
            ABSL_DEMANGLER_RECURSE(type, kArraySize);
250
0
            if (!Emit("; ")) return false;
251
0
            ABSL_DEMANGLER_RECURSE(constant, kFinishArray);
252
0
            if (!Emit("]")) return false;
253
0
            continue;
254
0
          }
255
0
          if (Eat('S')) {
256
0
            if (!Emit("[")) return false;
257
0
            ABSL_DEMANGLER_RECURSE(type, kSliceEnding);
258
0
            if (!Emit("]")) return false;
259
0
            continue;
260
0
          }
261
0
          if (Eat('T')) goto tuple_type;
262
0
          if (Eat('R')) {
263
0
            if (!Emit("&")) return false;
264
0
            if (!ParseOptionalLifetime()) return false;
265
0
            goto type;
266
0
          }
267
0
          if (Eat('Q')) {
268
0
            if (!Emit("&mut ")) return false;
269
0
            if (!ParseOptionalLifetime()) return false;
270
0
            goto type;
271
0
          }
272
0
          if (Eat('P')) {
273
0
            if (!Emit("*const ")) return false;
274
0
            goto type;
275
0
          }
276
0
          if (Eat('O')) {
277
0
            if (!Emit("*mut ")) return false;
278
0
            goto type;
279
0
          }
280
0
          if (Eat('F')) goto fn_type;
281
0
          if (Eat('D')) goto dyn_trait_type;
282
0
          if (Eat('B')) goto type_backref;
283
0
          goto path;
284
285
        // tuple-type -> T type* E (T already consumed)
286
0
        tuple_type:
287
0
          if (!Emit("(")) return false;
288
289
          // The toolchain should call the unit type u instead of TE, but the
290
          // grammar and other demanglers also recognize TE, so we do too.
291
0
          if (Eat('E')) {
292
0
            if (!Emit(")")) return false;
293
0
            continue;
294
0
          }
295
296
          // A tuple with one element is rendered (type,) instead of (type).
297
0
          ABSL_DEMANGLER_RECURSE(type, kAfterFirstTupleElement);
298
0
          if (Eat('E')) {
299
0
            if (!Emit(",)")) return false;
300
0
            continue;
301
0
          }
302
303
          // A tuple with two elements is of course (x, y).
304
0
          if (!Emit(", ")) return false;
305
0
          ABSL_DEMANGLER_RECURSE(type, kAfterSecondTupleElement);
306
0
          if (Eat('E')) {
307
0
            if (!Emit(")")) return false;
308
0
            continue;
309
0
          }
310
311
          // And (x, y, z) for three elements.
312
0
          if (!Emit(", ")) return false;
313
0
          ABSL_DEMANGLER_RECURSE(type, kAfterThirdTupleElement);
314
0
          if (Eat('E')) {
315
0
            if (!Emit(")")) return false;
316
0
            continue;
317
0
          }
318
319
          // For longer tuples we write (x, y, z, ...), printing none of the
320
          // content of the fourth and later types.  Thus we avoid exhausting
321
          // output buffers and human readers' patience when some library has a
322
          // long tuple as an implementation detail, without having to
323
          // completely obfuscate all tuples.
324
0
          if (!Emit(", ...)")) return false;
325
0
          ++silence_depth_;
326
0
          while (!Eat('E')) {
327
0
            ABSL_DEMANGLER_RECURSE(type, kAfterSubsequentTupleElement);
328
0
          }
329
0
          --silence_depth_;
330
0
          continue;
331
332
        // fn-type -> F fn-sig (F already consumed)
333
        // fn-sig -> binder? U? (K abi)? type* E type
334
        // abi -> C | undisambiguated-identifier
335
        //
336
        // We follow the C++ demangler in suppressing details of function
337
        // signatures.  Every function type is rendered "fn...".
338
0
        fn_type:
339
0
          if (!Emit("fn...")) return false;
340
0
          ++silence_depth_;
341
0
          if (!ParseOptionalBinder()) return false;
342
0
          (void)Eat('U');
343
0
          if (Eat('K')) {
344
0
            if (!Eat('C') && !ParseUndisambiguatedIdentifier()) return false;
345
0
          }
346
0
          while (!Eat('E')) {
347
0
            ABSL_DEMANGLER_RECURSE(type, kContinueParameterList);
348
0
          }
349
0
          ABSL_DEMANGLER_RECURSE(type, kFinishFn);
350
0
          --silence_depth_;
351
0
          continue;
352
353
        // dyn-trait-type -> D dyn-bounds lifetime (D already consumed)
354
        // dyn-bounds -> binder? dyn-trait* E
355
        //
356
        // The grammar strangely allows an empty trait list, even though the
357
        // compiler should never output one.  We follow existing demanglers in
358
        // rendering DEL_ as "dyn ".
359
        //
360
        // Because auto traits lengthen a type name considerably without
361
        // providing much value to a search for related source code, it would be
362
        // desirable to abbreviate
363
        //     dyn main::Trait + std::marker::Copy + std::marker::Send
364
        // to
365
        //     dyn main::Trait + ...,
366
        // eliding the auto traits.  But it is difficult to do so correctly, in
367
        // part because there is no guarantee that the mangling will list the
368
        // main trait first.  So we just print all the traits in their order of
369
        // appearance in the mangled name.
370
0
        dyn_trait_type:
371
0
          if (!Emit("dyn ")) return false;
372
0
          if (!ParseOptionalBinder()) return false;
373
0
          if (!Eat('E')) {
374
0
            ABSL_DEMANGLER_RECURSE(dyn_trait, kBeginAutoTraits);
375
0
            while (!Eat('E')) {
376
0
              if (!Emit(" + ")) return false;
377
0
              ABSL_DEMANGLER_RECURSE(dyn_trait, kContinueAutoTraits);
378
0
            }
379
0
          }
380
0
          if (!ParseRequiredLifetime()) return false;
381
0
          continue;
382
383
        // dyn-trait -> path dyn-trait-assoc-binding*
384
        // dyn-trait-assoc-binding -> p undisambiguated-identifier type
385
        //
386
        // We render nonempty binding lists as <>, omitting their contents as
387
        // for generic-args.
388
0
        dyn_trait:
389
0
          ABSL_DEMANGLER_RECURSE(path, kContinueDynTrait);
390
0
          if (Peek() == 'p') {
391
0
            if (!Emit("<>")) return false;
392
0
            ++silence_depth_;
393
0
            while (Eat('p')) {
394
0
              if (!ParseUndisambiguatedIdentifier()) return false;
395
0
              ABSL_DEMANGLER_RECURSE(type, kContinueAssocBinding);
396
0
            }
397
0
            --silence_depth_;
398
0
          }
399
0
          continue;
400
401
        // const -> type const-data | p | backref
402
        //
403
        // const is a C++ keyword, so we use the label `constant` instead.
404
0
        constant:
405
0
          if (Eat('B')) goto const_backref;
406
0
          if (Eat('p')) {
407
0
            if (!Emit("_")) return false;
408
0
            continue;
409
0
          }
410
411
          // Scan the type without printing it.
412
          //
413
          // The Rust language restricts the type of a const generic argument
414
          // much more than the mangling grammar does.  We do not enforce this.
415
          //
416
          // We also do not bother printing false, true, 'A', and '\u{abcd}' for
417
          // the types bool and char.  Because we do not print generic-args
418
          // contents, we expect to print constants only in array sizes, and
419
          // those should not be bool or char.
420
0
          ++silence_depth_;
421
0
          ABSL_DEMANGLER_RECURSE(type, kConstData);
422
0
          --silence_depth_;
423
424
          // const-data -> n? hex-digit* _
425
          //
426
          // Although the grammar doesn't say this, existing demanglers expect
427
          // that zero is 0, not an empty digit sequence, and no nonzero value
428
          // may have leading zero digits.  Also n0_ is accepted and printed as
429
          // -0, though a toolchain will probably never write that encoding.
430
0
          if (Eat('n') && !EmitChar('-')) return false;
431
0
          if (!Emit("0x")) return false;
432
0
          if (Eat('0')) {
433
0
            if (!EmitChar('0')) return false;
434
0
            if (!Eat('_')) return false;
435
0
            continue;
436
0
          }
437
0
          while (IsLowerHexDigit(Peek())) {
438
0
            if (!EmitChar(Take())) return false;
439
0
          }
440
0
          if (!Eat('_')) return false;
441
0
          continue;
442
443
        // generic-args -> I path generic-arg* E (I already consumed)
444
        //
445
        // We follow the C++ demangler in omitting all the arguments from the
446
        // output, printing only the list opening and closing tokens.
447
0
        generic_args:
448
0
          ABSL_DEMANGLER_RECURSE(path, kBeginGenericArgList);
449
0
          if (!Emit("::<>")) return false;
450
0
          ++silence_depth_;
451
0
          while (!Eat('E')) {
452
0
            ABSL_DEMANGLER_RECURSE(generic_arg, kContinueGenericArgList);
453
0
          }
454
0
          --silence_depth_;
455
0
          continue;
456
457
        // generic-arg -> lifetime | type | K const
458
0
        generic_arg:
459
0
          if (Peek() == 'L') {
460
0
            if (!ParseOptionalLifetime()) return false;
461
0
            continue;
462
0
          }
463
0
          if (Eat('K')) goto constant;
464
0
          goto type;
465
466
        // backref -> B base-62-number (B already consumed)
467
        //
468
        // The BeginBackref call parses and range-checks the base-62-number.  We
469
        // always do that much.
470
        //
471
        // The recursive call parses and prints what the backref points at.  We
472
        // save CPU and stack by skipping this work if the output would be
473
        // suppressed anyway.
474
0
        path_backref:
475
0
          if (!BeginBackref()) return false;
476
0
          if (silence_depth_ == 0) {
477
0
            ABSL_DEMANGLER_RECURSE(path, kPathBackrefEnding);
478
0
          }
479
0
          EndBackref();
480
0
          continue;
481
482
        // This represents the same backref production as in path_backref but
483
        // parses the target as a type instead of a path.
484
0
        type_backref:
485
0
          if (!BeginBackref()) return false;
486
0
          if (silence_depth_ == 0) {
487
0
            ABSL_DEMANGLER_RECURSE(type, kTypeBackrefEnding);
488
0
          }
489
0
          EndBackref();
490
0
          continue;
491
492
0
        const_backref:
493
0
          if (!BeginBackref()) return false;
494
0
          if (silence_depth_ == 0) {
495
0
            ABSL_DEMANGLER_RECURSE(constant, kConstantBackrefEnding);
496
0
          }
497
0
          EndBackref();
498
0
          continue;
499
0
      }
500
0
    }
501
502
0
    return false;  // hit iteration limit or a bug in our stack handling
503
0
  }
504
505
 private:
506
  // Enumerates resumption points for ABSL_DEMANGLER_RECURSE calls.
507
  enum ReturnAddress : uint8_t {
508
    kInstantiatingCrate,
509
    kVendorSpecificSuffix,
510
    kIdentifierInUppercaseNamespace,
511
    kIdentifierInLowercaseNamespace,
512
    kInherentImplType,
513
    kInherentImplEnding,
514
    kTraitImplType,
515
    kTraitImplInfix,
516
    kTraitImplEnding,
517
    kImplPathEnding,
518
    kTraitDefinitionInfix,
519
    kTraitDefinitionEnding,
520
    kArraySize,
521
    kFinishArray,
522
    kSliceEnding,
523
    kAfterFirstTupleElement,
524
    kAfterSecondTupleElement,
525
    kAfterThirdTupleElement,
526
    kAfterSubsequentTupleElement,
527
    kContinueParameterList,
528
    kFinishFn,
529
    kBeginAutoTraits,
530
    kContinueAutoTraits,
531
    kContinueDynTrait,
532
    kContinueAssocBinding,
533
    kConstData,
534
    kBeginGenericArgList,
535
    kContinueGenericArgList,
536
    kPathBackrefEnding,
537
    kTypeBackrefEnding,
538
    kConstantBackrefEnding,
539
  };
540
541
  // Element counts for the stack arrays.  Larger stack sizes accommodate more
542
  // deeply nested names at the cost of a larger footprint on the C++ call
543
  // stack.
544
  enum {
545
    // Maximum recursive calls outstanding at one time.
546
    kStackSize = 256,
547
548
    // Maximum N<uppercase> nested-paths open at once.  We do not expect
549
    // closures inside closures inside closures as much as functions inside
550
    // modules inside other modules, so we can use a smaller array here.
551
    kNamespaceStackSize = 64,
552
553
    // Maximum number of nested backrefs.  We can keep this stack pretty small
554
    // because we do not follow backrefs inside generic-args or other contexts
555
    // that suppress printing, so deep stacking is unlikely in practice.
556
    kPositionStackSize = 16,
557
  };
558
559
  // Returns the next input character without consuming it.
560
0
  char Peek() const { return encoding_[pos_]; }
561
562
  // Consumes and returns the next input character.
563
0
  char Take() { return encoding_[pos_++]; }
564
565
  // If the next input character is the given character, consumes it and returns
566
  // true; otherwise returns false without consuming a character.
567
0
  ABSL_MUST_USE_RESULT bool Eat(char want) {
568
0
    if (encoding_[pos_] != want) return false;
569
0
    ++pos_;
570
0
    return true;
571
0
  }
572
573
  // Provided there is enough remaining output space, appends c to the output,
574
  // writing a fresh NUL terminator afterward, and returns true.  Returns false
575
  // if the output buffer had less than two bytes free.
576
0
  ABSL_MUST_USE_RESULT bool EmitChar(char c) {
577
0
    if (silence_depth_ > 0) return true;
578
0
    if (out_end_ - out_ < 2) return false;
579
0
    *out_++ = c;
580
0
    *out_ = '\0';
581
0
    return true;
582
0
  }
583
584
  // Provided there is enough remaining output space, appends the C string token
585
  // to the output, followed by a NUL character, and returns true.  Returns
586
  // false if not everything fit into the output buffer.
587
0
  ABSL_MUST_USE_RESULT bool Emit(const char* token) {
588
0
    if (silence_depth_ > 0) return true;
589
0
    const size_t token_length = std::strlen(token);
590
0
    const size_t bytes_to_copy = token_length + 1;  // token and final NUL
591
0
    if (static_cast<size_t>(out_end_ - out_) < bytes_to_copy) return false;
592
0
    std::memcpy(out_, token, bytes_to_copy);
593
0
    out_ += token_length;
594
0
    return true;
595
0
  }
596
597
  // Provided there is enough remaining output space, appends the decimal form
598
  // of disambiguator (if it's nonnegative) or "?" (if it's negative) to the
599
  // output, followed by a NUL character, and returns true.  Returns false if
600
  // not everything fit into the output buffer.
601
0
  ABSL_MUST_USE_RESULT bool EmitDisambiguator(int disambiguator) {
602
0
    if (disambiguator < 0) return EmitChar('?');  // parsed but too large
603
0
    if (disambiguator == 0) return EmitChar('0');
604
    // Convert disambiguator to decimal text.  Three digits per byte is enough
605
    // because 999 > 256.  The bound will remain correct even if future
606
    // maintenance changes the type of the disambiguator variable.
607
0
    char digits[3 * sizeof(disambiguator)] = {};
608
0
    size_t leading_digit_index = sizeof(digits) - 1;
609
0
    for (; disambiguator > 0; disambiguator /= 10) {
610
0
      digits[--leading_digit_index] =
611
0
          static_cast<char>('0' + disambiguator % 10);
612
0
    }
613
0
    return Emit(digits + leading_digit_index);
614
0
  }
615
616
  // Consumes an optional disambiguator (s123_) from the input.
617
  //
618
  // On success returns true and fills value with the encoded value if it was
619
  // not too big, otherwise with -1.  If the optional disambiguator was omitted,
620
  // value is 0.  On parse failure returns false and sets value to -1.
621
0
  ABSL_MUST_USE_RESULT bool ParseDisambiguator(int& value) {
622
0
    value = -1;
623
624
    // disambiguator = s base-62-number
625
    //
626
    // Disambiguators are optional.  An omitted disambiguator is zero.
627
0
    if (!Eat('s')) {
628
0
      value = 0;
629
0
      return true;
630
0
    }
631
0
    int base_62_value = 0;
632
0
    if (!ParseBase62Number(base_62_value)) return false;
633
0
    value = base_62_value < 0 ? -1 : base_62_value + 1;
634
0
    return true;
635
0
  }
636
637
  // Consumes a base-62 number like _ or 123_ from the input.
638
  //
639
  // On success returns true and fills value with the encoded value if it was
640
  // not too big, otherwise with -1.  On parse failure returns false and sets
641
  // value to -1.
642
0
  ABSL_MUST_USE_RESULT bool ParseBase62Number(int& value) {
643
0
    value = -1;
644
645
    // base-62-number = (digit | lower | upper)* _
646
    //
647
    // An empty base-62 digit sequence means 0.
648
0
    if (Eat('_')) {
649
0
      value = 0;
650
0
      return true;
651
0
    }
652
653
    // A nonempty digit sequence denotes its base-62 value plus 1.
654
0
    int encoded_number = 0;
655
0
    bool overflowed = false;
656
0
    while (IsAlpha(Peek()) || IsDigit(Peek())) {
657
0
      const char c = Take();
658
0
      if (encoded_number >= std::numeric_limits<int>::max()/62) {
659
        // If we are close to overflowing an int, keep parsing but stop updating
660
        // encoded_number and remember to return -1 at the end.  The point is to
661
        // avoid undefined behavior while parsing crate-root disambiguators,
662
        // which are large in practice but not shown in demangling, while
663
        // successfully computing closure and shim disambiguators, which are
664
        // typically small and are printed out.
665
0
        overflowed = true;
666
0
      } else {
667
0
        int digit;
668
0
        if (IsDigit(c)) {
669
0
          digit = c - '0';
670
0
        } else if (IsLower(c)) {
671
0
          digit = c - 'a' + 10;
672
0
        } else {
673
0
          digit = c - 'A' + 36;
674
0
        }
675
0
        encoded_number = 62 * encoded_number + digit;
676
0
      }
677
0
    }
678
679
0
    if (!Eat('_')) return false;
680
0
    if (!overflowed) value = encoded_number + 1;
681
0
    return true;
682
0
  }
683
684
  // Consumes an identifier from the input, returning true on success.
685
  //
686
  // A nonzero uppercase_namespace specifies the character after the N in a
687
  // nested-identifier, e.g., 'C' for a closure, allowing ParseIdentifier to
688
  // write out the name with the conventional decoration for that namespace.
689
0
  ABSL_MUST_USE_RESULT bool ParseIdentifier(char uppercase_namespace = '\0') {
690
    // identifier -> disambiguator? undisambiguated-identifier
691
0
    int disambiguator = 0;
692
0
    if (!ParseDisambiguator(disambiguator)) return false;
693
694
0
    return ParseUndisambiguatedIdentifier(uppercase_namespace, disambiguator);
695
0
  }
696
697
  // Consumes from the input an identifier with no preceding disambiguator,
698
  // returning true on success.
699
  //
700
  // When ParseIdentifier calls this, it passes the N<namespace> character and
701
  // disambiguator value so that "{closure#42}" and similar forms can be
702
  // rendered correctly.
703
  //
704
  // At other appearances of undisambiguated-identifier in the grammar, this
705
  // treatment is not applicable, and the call site omits both arguments.
706
  ABSL_MUST_USE_RESULT bool ParseUndisambiguatedIdentifier(
707
0
      char uppercase_namespace = '\0', int disambiguator = 0) {
708
    // undisambiguated-identifier -> u? decimal-number _? bytes
709
0
    const bool is_punycoded = Eat('u');
710
0
    if (!IsDigit(Peek())) return false;
711
0
    int num_bytes = 0;
712
0
    if (!ParseDecimalNumber(num_bytes)) return false;
713
0
    (void)Eat('_');  // optional separator, needed if a digit follows
714
0
    if (is_punycoded) {
715
0
      DecodeRustPunycodeOptions options;
716
0
      options.punycode_begin = &encoding_[pos_];
717
0
      options.punycode_end = &encoding_[pos_] + num_bytes;
718
0
      options.out_begin = out_;
719
0
      options.out_end = out_end_;
720
0
      out_ = DecodeRustPunycode(options);
721
0
      if (out_ == nullptr) return false;
722
0
      pos_ += static_cast<size_t>(num_bytes);
723
0
    }
724
725
    // Emit the beginnings of braced forms like {shim:vtable#0}.
726
0
    if (uppercase_namespace != '\0') {
727
0
      switch (uppercase_namespace) {
728
0
        case 'C':
729
0
          if (!Emit("{closure")) return false;
730
0
          break;
731
0
        case 'S':
732
0
          if (!Emit("{shim")) return false;
733
0
          break;
734
0
        default:
735
0
          if (!EmitChar('{') || !EmitChar(uppercase_namespace)) return false;
736
0
          break;
737
0
      }
738
0
      if (num_bytes > 0 && !Emit(":")) return false;
739
0
    }
740
741
    // Emit the name itself.
742
0
    if (!is_punycoded) {
743
0
      for (int i = 0; i < num_bytes; ++i) {
744
0
        const char c = Take();
745
0
        if (!IsIdentifierChar(c) &&
746
            // The spec gives toolchains the choice of Punycode or raw UTF-8 for
747
            // identifiers containing code points above 0x7f, so accept bytes
748
            // with the high bit set.
749
0
            (c & 0x80) == 0) {
750
0
          return false;
751
0
        }
752
0
        if (!EmitChar(c)) return false;
753
0
      }
754
0
    }
755
756
    // Emit the endings of braced forms, e.g., "#42}".
757
0
    if (uppercase_namespace != '\0') {
758
0
      if (!EmitChar('#')) return false;
759
0
      if (!EmitDisambiguator(disambiguator)) return false;
760
0
      if (!EmitChar('}')) return false;
761
0
    }
762
763
0
    return true;
764
0
  }
765
766
  // Consumes a decimal number like 0 or 123 from the input.  On success returns
767
  // true and fills value with the encoded value.  If the encoded value is too
768
  // large or otherwise unparsable, returns false and sets value to -1.
769
0
  ABSL_MUST_USE_RESULT bool ParseDecimalNumber(int& value) {
770
0
    value = -1;
771
0
    if (!IsDigit(Peek())) return false;
772
0
    int encoded_number = Take() - '0';
773
0
    if (encoded_number == 0) {
774
      // Decimal numbers are never encoded with extra leading zeroes.
775
0
      value = 0;
776
0
      return true;
777
0
    }
778
0
    while (IsDigit(Peek()) &&
779
           // avoid overflow
780
0
           encoded_number < std::numeric_limits<int>::max()/10) {
781
0
      encoded_number = 10 * encoded_number + (Take() - '0');
782
0
    }
783
0
    if (IsDigit(Peek())) return false;  // too big
784
0
    value = encoded_number;
785
0
    return true;
786
0
  }
787
788
  // Consumes a binder of higher-ranked lifetimes if one is present.  On success
789
  // returns true and discards the encoded lifetime count.  On parse failure
790
  // returns false.
791
0
  ABSL_MUST_USE_RESULT bool ParseOptionalBinder() {
792
    // binder -> G base-62-number
793
0
    if (!Eat('G')) return true;
794
0
    int ignored_binding_count;
795
0
    return ParseBase62Number(ignored_binding_count);
796
0
  }
797
798
  // Consumes a lifetime if one is present.
799
  //
800
  // On success returns true and discards the lifetime index.  We do not print
801
  // or even range-check lifetimes because they are a finer detail than other
802
  // things we omit from output, such as the entire contents of generic-args.
803
  //
804
  // On parse failure returns false.
805
0
  ABSL_MUST_USE_RESULT bool ParseOptionalLifetime() {
806
    // lifetime -> L base-62-number
807
0
    if (!Eat('L')) return true;
808
0
    int ignored_de_bruijn_index;
809
0
    return ParseBase62Number(ignored_de_bruijn_index);
810
0
  }
811
812
  // Consumes a lifetime just like ParseOptionalLifetime, but returns false if
813
  // there is no lifetime here.
814
0
  ABSL_MUST_USE_RESULT bool ParseRequiredLifetime() {
815
0
    if (Peek() != 'L') return false;
816
0
    return ParseOptionalLifetime();
817
0
  }
818
819
  // Pushes ns onto the namespace stack and returns true if the stack is not
820
  // full, else returns false.
821
0
  ABSL_MUST_USE_RESULT bool PushNamespace(char ns) {
822
0
    if (namespace_depth_ == kNamespaceStackSize) return false;
823
0
    namespace_stack_[namespace_depth_++] = ns;
824
0
    return true;
825
0
  }
826
827
  // Pops the last pushed namespace.  Requires that the namespace stack is not
828
  // empty (namespace_depth_ > 0).
829
0
  char PopNamespace() { return namespace_stack_[--namespace_depth_]; }
830
831
  // Pushes position onto the position stack and returns true if the stack is
832
  // not full, else returns false.
833
0
  ABSL_MUST_USE_RESULT bool PushPosition(int position) {
834
0
    if (position_depth_ == kPositionStackSize) return false;
835
0
    position_stack_[position_depth_++] = position;
836
0
    return true;
837
0
  }
838
839
  // Pops the last pushed input position.  Requires that the position stack is
840
  // not empty (position_depth_ > 0).
841
0
  int PopPosition() { return position_stack_[--position_depth_]; }
842
843
  // Consumes a base-62-number denoting a backref target, pushes the current
844
  // input position on the data stack, and sets the input position to the
845
  // beginning of the backref target.  Returns true on success.  Returns false
846
  // if parsing failed, the stack is exhausted, or the backref target position
847
  // is out of range.
848
0
  ABSL_MUST_USE_RESULT bool BeginBackref() {
849
    // backref = B base-62-number (B already consumed)
850
    //
851
    // Reject backrefs that don't parse, overflow int, or don't point backward.
852
    // If the offset looks fine, adjust it to account for the _R prefix.
853
0
    int offset = 0;
854
0
    const int offset_of_this_backref =
855
0
        pos_ - 2 /* _R */ - 1 /* B already consumed */;
856
0
    if (!ParseBase62Number(offset) || offset < 0 ||
857
0
        offset >= offset_of_this_backref) {
858
0
      return false;
859
0
    }
860
0
    offset += 2;
861
862
    // Save the old position to restore later.
863
0
    if (!PushPosition(pos_)) return false;
864
865
    // Move the input position to the backref target.
866
    //
867
    // Note that we do not check whether the new position points to the
868
    // beginning of a construct matching the context in which the backref
869
    // appeared.  We just jump to it and see whether nested parsing succeeds.
870
    // We therefore accept various wrong manglings, e.g., a type backref
871
    // pointing to an 'l' character inside an identifier, which happens to mean
872
    // i32 when parsed as a type mangling.  This saves the complexity and RAM
873
    // footprint of remembering which offsets began which kinds of
874
    // substructures.  Existing demanglers take similar shortcuts.
875
0
    pos_ = offset;
876
0
    return true;
877
0
  }
878
879
  // Cleans up after a backref production by restoring the previous input
880
  // position from the data stack.
881
0
  void EndBackref() { pos_ = PopPosition(); }
882
883
  // The leftmost recursion_depth_ elements of recursion_stack_ contain the
884
  // ReturnAddresses pushed by ABSL_DEMANGLER_RECURSE calls not yet completed.
885
  ReturnAddress recursion_stack_[kStackSize] = {};
886
  int recursion_depth_ = 0;
887
888
  // The leftmost namespace_depth_ elements of namespace_stack_ contain the
889
  // uppercase namespace identifiers for open nested-paths, e.g., 'C' for a
890
  // closure.
891
  char namespace_stack_[kNamespaceStackSize] = {};
892
  int namespace_depth_ = 0;
893
894
  // The leftmost position_depth_ elements of position_stack_ contain the input
895
  // positions to return to after fully printing the targets of backrefs.
896
  int position_stack_[kPositionStackSize] = {};
897
  int position_depth_ = 0;
898
899
  // Anything parsed while silence_depth_ > 0 contributes nothing to the
900
  // demangled output.  For constructs omitted from the demangling, such as
901
  // impl-path and the contents of generic-args, we will increment
902
  // silence_depth_ on the way in and decrement silence_depth_ on the way out.
903
  int silence_depth_ = 0;
904
905
  // Input: encoding_ points to a Rust mangled symbol, and encoding_[pos_] is
906
  // the next input character to be scanned.
907
  int pos_ = 0;
908
  const char* encoding_ = nullptr;
909
910
  // Output: *out_ is where the next output character should be written, and
911
  // out_end_ points past the last byte of available space.
912
  char* out_ = nullptr;
913
  char* out_end_ = nullptr;
914
};
915
916
}  // namespace
917
918
bool DemangleRustSymbolEncoding(const char* mangled, char* out,
919
0
                                size_t out_size) {
920
0
  return RustSymbolParser(mangled, out, out + out_size).Parse();
921
0
}
922
923
}  // namespace debugging_internal
924
ABSL_NAMESPACE_END
925
}  // namespace absl