Coverage Report

Created: 2023-09-25 06:27

/src/abseil-cpp/absl/debugging/internal/demangle.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2018 The Abseil Authors.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//      https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
// For reference check out:
16
// https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling
17
//
18
// Note that we only have partial C++11 support yet.
19
20
#include "absl/debugging/internal/demangle.h"
21
22
#include <cstdint>
23
#include <cstdio>
24
#include <limits>
25
26
namespace absl {
27
ABSL_NAMESPACE_BEGIN
28
namespace debugging_internal {
29
30
typedef struct {
31
  const char *abbrev;
32
  const char *real_name;
33
  // Number of arguments in <expression> context, or 0 if disallowed.
34
  int arity;
35
} AbbrevPair;
36
37
// List of operators from Itanium C++ ABI.
38
static const AbbrevPair kOperatorList[] = {
39
    // New has special syntax (not currently supported).
40
    {"nw", "new", 0},
41
    {"na", "new[]", 0},
42
43
    // Works except that the 'gs' prefix is not supported.
44
    {"dl", "delete", 1},
45
    {"da", "delete[]", 1},
46
47
    {"ps", "+", 1},  // "positive"
48
    {"ng", "-", 1},  // "negative"
49
    {"ad", "&", 1},  // "address-of"
50
    {"de", "*", 1},  // "dereference"
51
    {"co", "~", 1},
52
53
    {"pl", "+", 2},
54
    {"mi", "-", 2},
55
    {"ml", "*", 2},
56
    {"dv", "/", 2},
57
    {"rm", "%", 2},
58
    {"an", "&", 2},
59
    {"or", "|", 2},
60
    {"eo", "^", 2},
61
    {"aS", "=", 2},
62
    {"pL", "+=", 2},
63
    {"mI", "-=", 2},
64
    {"mL", "*=", 2},
65
    {"dV", "/=", 2},
66
    {"rM", "%=", 2},
67
    {"aN", "&=", 2},
68
    {"oR", "|=", 2},
69
    {"eO", "^=", 2},
70
    {"ls", "<<", 2},
71
    {"rs", ">>", 2},
72
    {"lS", "<<=", 2},
73
    {"rS", ">>=", 2},
74
    {"eq", "==", 2},
75
    {"ne", "!=", 2},
76
    {"lt", "<", 2},
77
    {"gt", ">", 2},
78
    {"le", "<=", 2},
79
    {"ge", ">=", 2},
80
    {"nt", "!", 1},
81
    {"aa", "&&", 2},
82
    {"oo", "||", 2},
83
    {"pp", "++", 1},
84
    {"mm", "--", 1},
85
    {"cm", ",", 2},
86
    {"pm", "->*", 2},
87
    {"pt", "->", 0},  // Special syntax
88
    {"cl", "()", 0},  // Special syntax
89
    {"ix", "[]", 2},
90
    {"qu", "?", 3},
91
    {"st", "sizeof", 0},  // Special syntax
92
    {"sz", "sizeof", 1},  // Not a real operator name, but used in expressions.
93
    {nullptr, nullptr, 0},
94
};
95
96
// List of builtin types from Itanium C++ ABI.
97
//
98
// Invariant: only one- or two-character type abbreviations here.
99
static const AbbrevPair kBuiltinTypeList[] = {
100
    {"v", "void", 0},
101
    {"w", "wchar_t", 0},
102
    {"b", "bool", 0},
103
    {"c", "char", 0},
104
    {"a", "signed char", 0},
105
    {"h", "unsigned char", 0},
106
    {"s", "short", 0},
107
    {"t", "unsigned short", 0},
108
    {"i", "int", 0},
109
    {"j", "unsigned int", 0},
110
    {"l", "long", 0},
111
    {"m", "unsigned long", 0},
112
    {"x", "long long", 0},
113
    {"y", "unsigned long long", 0},
114
    {"n", "__int128", 0},
115
    {"o", "unsigned __int128", 0},
116
    {"f", "float", 0},
117
    {"d", "double", 0},
118
    {"e", "long double", 0},
119
    {"g", "__float128", 0},
120
    {"z", "ellipsis", 0},
121
122
    {"De", "decimal128", 0},      // IEEE 754r decimal floating point (128 bits)
123
    {"Dd", "decimal64", 0},       // IEEE 754r decimal floating point (64 bits)
124
    {"Dc", "decltype(auto)", 0},
125
    {"Da", "auto", 0},
126
    {"Dn", "std::nullptr_t", 0},  // i.e., decltype(nullptr)
127
    {"Df", "decimal32", 0},       // IEEE 754r decimal floating point (32 bits)
128
    {"Di", "char32_t", 0},
129
    {"Du", "char8_t", 0},
130
    {"Ds", "char16_t", 0},
131
    {"Dh", "float16", 0},         // IEEE 754r half-precision float (16 bits)
132
    {nullptr, nullptr, 0},
133
};
134
135
// List of substitutions Itanium C++ ABI.
136
static const AbbrevPair kSubstitutionList[] = {
137
    {"St", "", 0},
138
    {"Sa", "allocator", 0},
139
    {"Sb", "basic_string", 0},
140
    // std::basic_string<char, std::char_traits<char>,std::allocator<char> >
141
    {"Ss", "string", 0},
142
    // std::basic_istream<char, std::char_traits<char> >
143
    {"Si", "istream", 0},
144
    // std::basic_ostream<char, std::char_traits<char> >
145
    {"So", "ostream", 0},
146
    // std::basic_iostream<char, std::char_traits<char> >
147
    {"Sd", "iostream", 0},
148
    {nullptr, nullptr, 0},
149
};
150
151
// State needed for demangling.  This struct is copied in almost every stack
152
// frame, so every byte counts.
153
typedef struct {
154
  int mangled_idx;                     // Cursor of mangled name.
155
  int out_cur_idx;                     // Cursor of output string.
156
  int prev_name_idx;                   // For constructors/destructors.
157
  unsigned int prev_name_length : 16;  // For constructors/destructors.
158
  signed int nest_level : 15;          // For nested names.
159
  unsigned int append : 1;             // Append flag.
160
  // Note: for some reason MSVC can't pack "bool append : 1" into the same int
161
  // with the above two fields, so we use an int instead.  Amusingly it can pack
162
  // "signed bool" as expected, but relying on that to continue to be a legal
163
  // type seems ill-advised (as it's illegal in at least clang).
164
} ParseState;
165
166
static_assert(sizeof(ParseState) == 4 * sizeof(int),
167
              "unexpected size of ParseState");
168
169
// One-off state for demangling that's not subject to backtracking -- either
170
// constant data, data that's intentionally immune to backtracking (steps), or
171
// data that would never be changed by backtracking anyway (recursion_depth).
172
//
173
// Only one copy of this exists for each call to Demangle, so the size of this
174
// struct is nearly inconsequential.
175
typedef struct {
176
  const char *mangled_begin;  // Beginning of input string.
177
  char *out;                  // Beginning of output string.
178
  int out_end_idx;            // One past last allowed output character.
179
  int recursion_depth;        // For stack exhaustion prevention.
180
  int steps;               // Cap how much work we'll do, regardless of depth.
181
  ParseState parse_state;  // Backtrackable state copied for most frames.
182
} State;
183
184
namespace {
185
// Prevent deep recursion / stack exhaustion.
186
// Also prevent unbounded handling of complex inputs.
187
class ComplexityGuard {
188
 public:
189
0
  explicit ComplexityGuard(State *state) : state_(state) {
190
0
    ++state->recursion_depth;
191
0
    ++state->steps;
192
0
  }
193
0
  ~ComplexityGuard() { --state_->recursion_depth; }
194
195
  // 256 levels of recursion seems like a reasonable upper limit on depth.
196
  // 128 is not enough to demagle synthetic tests from demangle_unittest.txt:
197
  // "_ZaaZZZZ..." and "_ZaaZcvZcvZ..."
198
  static constexpr int kRecursionDepthLimit = 256;
199
200
  // We're trying to pick a charitable upper-limit on how many parse steps are
201
  // necessary to handle something that a human could actually make use of.
202
  // This is mostly in place as a bound on how much work we'll do if we are
203
  // asked to demangle an mangled name from an untrusted source, so it should be
204
  // much larger than the largest expected symbol, but much smaller than the
205
  // amount of work we can do in, e.g., a second.
206
  //
207
  // Some real-world symbols from an arbitrary binary started failing between
208
  // 2^12 and 2^13, so we multiply the latter by an extra factor of 16 to set
209
  // the limit.
210
  //
211
  // Spending one second on 2^17 parse steps would require each step to take
212
  // 7.6us, or ~30000 clock cycles, so it's safe to say this can be done in
213
  // under a second.
214
  static constexpr int kParseStepsLimit = 1 << 17;
215
216
0
  bool IsTooComplex() const {
217
0
    return state_->recursion_depth > kRecursionDepthLimit ||
218
0
           state_->steps > kParseStepsLimit;
219
0
  }
220
221
 private:
222
  State *state_;
223
};
224
}  // namespace
225
226
// We don't use strlen() in libc since it's not guaranteed to be async
227
// signal safe.
228
0
static size_t StrLen(const char *str) {
229
0
  size_t len = 0;
230
0
  while (*str != '\0') {
231
0
    ++str;
232
0
    ++len;
233
0
  }
234
0
  return len;
235
0
}
236
237
// Returns true if "str" has at least "n" characters remaining.
238
0
static bool AtLeastNumCharsRemaining(const char *str, size_t n) {
239
0
  for (size_t i = 0; i < n; ++i) {
240
0
    if (str[i] == '\0') {
241
0
      return false;
242
0
    }
243
0
  }
244
0
  return true;
245
0
}
246
247
// Returns true if "str" has "prefix" as a prefix.
248
0
static bool StrPrefix(const char *str, const char *prefix) {
249
0
  size_t i = 0;
250
0
  while (str[i] != '\0' && prefix[i] != '\0' && str[i] == prefix[i]) {
251
0
    ++i;
252
0
  }
253
0
  return prefix[i] == '\0';  // Consumed everything in "prefix".
254
0
}
255
256
static void InitState(State* state,
257
                      const char* mangled,
258
                      char* out,
259
0
                      size_t out_size) {
260
0
  state->mangled_begin = mangled;
261
0
  state->out = out;
262
0
  state->out_end_idx = static_cast<int>(out_size);
263
0
  state->recursion_depth = 0;
264
0
  state->steps = 0;
265
266
0
  state->parse_state.mangled_idx = 0;
267
0
  state->parse_state.out_cur_idx = 0;
268
0
  state->parse_state.prev_name_idx = 0;
269
0
  state->parse_state.prev_name_length = 0;
270
0
  state->parse_state.nest_level = -1;
271
0
  state->parse_state.append = true;
272
0
}
273
274
0
static inline const char *RemainingInput(State *state) {
275
0
  return &state->mangled_begin[state->parse_state.mangled_idx];
276
0
}
277
278
// Returns true and advances "mangled_idx" if we find "one_char_token"
279
// at "mangled_idx" position.  It is assumed that "one_char_token" does
280
// not contain '\0'.
281
0
static bool ParseOneCharToken(State *state, const char one_char_token) {
282
0
  ComplexityGuard guard(state);
283
0
  if (guard.IsTooComplex()) return false;
284
0
  if (RemainingInput(state)[0] == one_char_token) {
285
0
    ++state->parse_state.mangled_idx;
286
0
    return true;
287
0
  }
288
0
  return false;
289
0
}
290
291
// Returns true and advances "mangled_cur" if we find "two_char_token"
292
// at "mangled_cur" position.  It is assumed that "two_char_token" does
293
// not contain '\0'.
294
0
static bool ParseTwoCharToken(State *state, const char *two_char_token) {
295
0
  ComplexityGuard guard(state);
296
0
  if (guard.IsTooComplex()) return false;
297
0
  if (RemainingInput(state)[0] == two_char_token[0] &&
298
0
      RemainingInput(state)[1] == two_char_token[1]) {
299
0
    state->parse_state.mangled_idx += 2;
300
0
    return true;
301
0
  }
302
0
  return false;
303
0
}
304
305
// Returns true and advances "mangled_cur" if we find any character in
306
// "char_class" at "mangled_cur" position.
307
0
static bool ParseCharClass(State *state, const char *char_class) {
308
0
  ComplexityGuard guard(state);
309
0
  if (guard.IsTooComplex()) return false;
310
0
  if (RemainingInput(state)[0] == '\0') {
311
0
    return false;
312
0
  }
313
0
  const char *p = char_class;
314
0
  for (; *p != '\0'; ++p) {
315
0
    if (RemainingInput(state)[0] == *p) {
316
0
      ++state->parse_state.mangled_idx;
317
0
      return true;
318
0
    }
319
0
  }
320
0
  return false;
321
0
}
322
323
0
static bool ParseDigit(State *state, int *digit) {
324
0
  char c = RemainingInput(state)[0];
325
0
  if (ParseCharClass(state, "0123456789")) {
326
0
    if (digit != nullptr) {
327
0
      *digit = c - '0';
328
0
    }
329
0
    return true;
330
0
  }
331
0
  return false;
332
0
}
333
334
// This function is used for handling an optional non-terminal.
335
0
static bool Optional(bool /*status*/) { return true; }
336
337
// This function is used for handling <non-terminal>+ syntax.
338
typedef bool (*ParseFunc)(State *);
339
0
static bool OneOrMore(ParseFunc parse_func, State *state) {
340
0
  if (parse_func(state)) {
341
0
    while (parse_func(state)) {
342
0
    }
343
0
    return true;
344
0
  }
345
0
  return false;
346
0
}
347
348
// This function is used for handling <non-terminal>* syntax. The function
349
// always returns true and must be followed by a termination token or a
350
// terminating sequence not handled by parse_func (e.g.
351
// ParseOneCharToken(state, 'E')).
352
0
static bool ZeroOrMore(ParseFunc parse_func, State *state) {
353
0
  while (parse_func(state)) {
354
0
  }
355
0
  return true;
356
0
}
357
358
// Append "str" at "out_cur_idx".  If there is an overflow, out_cur_idx is
359
// set to out_end_idx+1.  The output string is ensured to
360
// always terminate with '\0' as long as there is no overflow.
361
0
static void Append(State *state, const char *const str, const size_t length) {
362
0
  for (size_t i = 0; i < length; ++i) {
363
0
    if (state->parse_state.out_cur_idx + 1 <
364
0
        state->out_end_idx) {  // +1 for '\0'
365
0
      state->out[state->parse_state.out_cur_idx++] = str[i];
366
0
    } else {
367
      // signal overflow
368
0
      state->parse_state.out_cur_idx = state->out_end_idx + 1;
369
0
      break;
370
0
    }
371
0
  }
372
0
  if (state->parse_state.out_cur_idx < state->out_end_idx) {
373
0
    state->out[state->parse_state.out_cur_idx] =
374
0
        '\0';  // Terminate it with '\0'
375
0
  }
376
0
}
377
378
// We don't use equivalents in libc to avoid locale issues.
379
0
static bool IsLower(char c) { return c >= 'a' && c <= 'z'; }
380
381
0
static bool IsAlpha(char c) {
382
0
  return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
383
0
}
384
385
0
static bool IsDigit(char c) { return c >= '0' && c <= '9'; }
386
387
// Returns true if "str" is a function clone suffix.  These suffixes are used
388
// by GCC 4.5.x and later versions (and our locally-modified version of GCC
389
// 4.4.x) to indicate functions which have been cloned during optimization.
390
// We treat any sequence (.<alpha>+.<digit>+)+ as a function clone suffix.
391
// Additionally, '_' is allowed along with the alphanumeric sequence.
392
0
static bool IsFunctionCloneSuffix(const char *str) {
393
0
  size_t i = 0;
394
0
  while (str[i] != '\0') {
395
0
    bool parsed = false;
396
    // Consume a single [.<alpha> | _]*[.<digit>]* sequence.
397
0
    if (str[i] == '.' && (IsAlpha(str[i + 1]) || str[i + 1] == '_')) {
398
0
      parsed = true;
399
0
      i += 2;
400
0
      while (IsAlpha(str[i]) || str[i] == '_') {
401
0
        ++i;
402
0
      }
403
0
    }
404
0
    if (str[i] == '.' && IsDigit(str[i + 1])) {
405
0
      parsed = true;
406
0
      i += 2;
407
0
      while (IsDigit(str[i])) {
408
0
        ++i;
409
0
      }
410
0
    }
411
0
    if (!parsed)
412
0
      return false;
413
0
  }
414
0
  return true;  // Consumed everything in "str".
415
0
}
416
417
0
static bool EndsWith(State *state, const char chr) {
418
0
  return state->parse_state.out_cur_idx > 0 &&
419
0
         state->parse_state.out_cur_idx < state->out_end_idx &&
420
0
         chr == state->out[state->parse_state.out_cur_idx - 1];
421
0
}
422
423
// Append "str" with some tweaks, iff "append" state is true.
424
static void MaybeAppendWithLength(State *state, const char *const str,
425
0
                                  const size_t length) {
426
0
  if (state->parse_state.append && length > 0) {
427
    // Append a space if the output buffer ends with '<' and "str"
428
    // starts with '<' to avoid <<<.
429
0
    if (str[0] == '<' && EndsWith(state, '<')) {
430
0
      Append(state, " ", 1);
431
0
    }
432
    // Remember the last identifier name for ctors/dtors,
433
    // but only if we haven't yet overflown the buffer.
434
0
    if (state->parse_state.out_cur_idx < state->out_end_idx &&
435
0
        (IsAlpha(str[0]) || str[0] == '_')) {
436
0
      state->parse_state.prev_name_idx = state->parse_state.out_cur_idx;
437
0
      state->parse_state.prev_name_length = static_cast<unsigned int>(length);
438
0
    }
439
0
    Append(state, str, length);
440
0
  }
441
0
}
442
443
// Appends a positive decimal number to the output if appending is enabled.
444
0
static bool MaybeAppendDecimal(State *state, int val) {
445
  // Max {32-64}-bit unsigned int is 20 digits.
446
0
  constexpr size_t kMaxLength = 20;
447
0
  char buf[kMaxLength];
448
449
  // We can't use itoa or sprintf as neither is specified to be
450
  // async-signal-safe.
451
0
  if (state->parse_state.append) {
452
    // We can't have a one-before-the-beginning pointer, so instead start with
453
    // one-past-the-end and manipulate one character before the pointer.
454
0
    char *p = &buf[kMaxLength];
455
0
    do {  // val=0 is the only input that should write a leading zero digit.
456
0
      *--p = static_cast<char>((val % 10) + '0');
457
0
      val /= 10;
458
0
    } while (p > buf && val != 0);
459
460
    // 'p' landed on the last character we set.  How convenient.
461
0
    Append(state, p, kMaxLength - static_cast<size_t>(p - buf));
462
0
  }
463
464
0
  return true;
465
0
}
466
467
// A convenient wrapper around MaybeAppendWithLength().
468
// Returns true so that it can be placed in "if" conditions.
469
0
static bool MaybeAppend(State *state, const char *const str) {
470
0
  if (state->parse_state.append) {
471
0
    size_t length = StrLen(str);
472
0
    MaybeAppendWithLength(state, str, length);
473
0
  }
474
0
  return true;
475
0
}
476
477
// This function is used for handling nested names.
478
0
static bool EnterNestedName(State *state) {
479
0
  state->parse_state.nest_level = 0;
480
0
  return true;
481
0
}
482
483
// This function is used for handling nested names.
484
0
static bool LeaveNestedName(State *state, int16_t prev_value) {
485
0
  state->parse_state.nest_level = prev_value;
486
0
  return true;
487
0
}
488
489
// Disable the append mode not to print function parameters, etc.
490
0
static bool DisableAppend(State *state) {
491
0
  state->parse_state.append = false;
492
0
  return true;
493
0
}
494
495
// Restore the append mode to the previous state.
496
0
static bool RestoreAppend(State *state, bool prev_value) {
497
0
  state->parse_state.append = prev_value;
498
0
  return true;
499
0
}
500
501
// Increase the nest level for nested names.
502
0
static void MaybeIncreaseNestLevel(State *state) {
503
0
  if (state->parse_state.nest_level > -1) {
504
0
    ++state->parse_state.nest_level;
505
0
  }
506
0
}
507
508
// Appends :: for nested names if necessary.
509
0
static void MaybeAppendSeparator(State *state) {
510
0
  if (state->parse_state.nest_level >= 1) {
511
0
    MaybeAppend(state, "::");
512
0
  }
513
0
}
514
515
// Cancel the last separator if necessary.
516
0
static void MaybeCancelLastSeparator(State *state) {
517
0
  if (state->parse_state.nest_level >= 1 && state->parse_state.append &&
518
0
      state->parse_state.out_cur_idx >= 2) {
519
0
    state->parse_state.out_cur_idx -= 2;
520
0
    state->out[state->parse_state.out_cur_idx] = '\0';
521
0
  }
522
0
}
523
524
// Returns true if the identifier of the given length pointed to by
525
// "mangled_cur" is anonymous namespace.
526
0
static bool IdentifierIsAnonymousNamespace(State *state, size_t length) {
527
  // Returns true if "anon_prefix" is a proper prefix of "mangled_cur".
528
0
  static const char anon_prefix[] = "_GLOBAL__N_";
529
0
  return (length > (sizeof(anon_prefix) - 1) &&
530
0
          StrPrefix(RemainingInput(state), anon_prefix));
531
0
}
532
533
// Forward declarations of our parsing functions.
534
static bool ParseMangledName(State *state);
535
static bool ParseEncoding(State *state);
536
static bool ParseName(State *state);
537
static bool ParseUnscopedName(State *state);
538
static bool ParseNestedName(State *state);
539
static bool ParsePrefix(State *state);
540
static bool ParseUnqualifiedName(State *state);
541
static bool ParseSourceName(State *state);
542
static bool ParseLocalSourceName(State *state);
543
static bool ParseUnnamedTypeName(State *state);
544
static bool ParseNumber(State *state, int *number_out);
545
static bool ParseFloatNumber(State *state);
546
static bool ParseSeqId(State *state);
547
static bool ParseIdentifier(State *state, size_t length);
548
static bool ParseOperatorName(State *state, int *arity);
549
static bool ParseSpecialName(State *state);
550
static bool ParseCallOffset(State *state);
551
static bool ParseNVOffset(State *state);
552
static bool ParseVOffset(State *state);
553
static bool ParseAbiTags(State *state);
554
static bool ParseCtorDtorName(State *state);
555
static bool ParseDecltype(State *state);
556
static bool ParseType(State *state);
557
static bool ParseCVQualifiers(State *state);
558
static bool ParseBuiltinType(State *state);
559
static bool ParseFunctionType(State *state);
560
static bool ParseBareFunctionType(State *state);
561
static bool ParseClassEnumType(State *state);
562
static bool ParseArrayType(State *state);
563
static bool ParsePointerToMemberType(State *state);
564
static bool ParseTemplateParam(State *state);
565
static bool ParseTemplateTemplateParam(State *state);
566
static bool ParseTemplateArgs(State *state);
567
static bool ParseTemplateArg(State *state);
568
static bool ParseBaseUnresolvedName(State *state);
569
static bool ParseUnresolvedName(State *state);
570
static bool ParseExpression(State *state);
571
static bool ParseExprPrimary(State *state);
572
static bool ParseExprCastValue(State *state);
573
static bool ParseLocalName(State *state);
574
static bool ParseLocalNameSuffix(State *state);
575
static bool ParseDiscriminator(State *state);
576
static bool ParseSubstitution(State *state, bool accept_std);
577
578
// Implementation note: the following code is a straightforward
579
// translation of the Itanium C++ ABI defined in BNF with a couple of
580
// exceptions.
581
//
582
// - Support GNU extensions not defined in the Itanium C++ ABI
583
// - <prefix> and <template-prefix> are combined to avoid infinite loop
584
// - Reorder patterns to shorten the code
585
// - Reorder patterns to give greedier functions precedence
586
//   We'll mark "Less greedy than" for these cases in the code
587
//
588
// Each parsing function changes the parse state and returns true on
589
// success, or returns false and doesn't change the parse state (note:
590
// the parse-steps counter increases regardless of success or failure).
591
// To ensure that the parse state isn't changed in the latter case, we
592
// save the original state before we call multiple parsing functions
593
// consecutively with &&, and restore it if unsuccessful.  See
594
// ParseEncoding() as an example of this convention.  We follow the
595
// convention throughout the code.
596
//
597
// Originally we tried to do demangling without following the full ABI
598
// syntax but it turned out we needed to follow the full syntax to
599
// parse complicated cases like nested template arguments.  Note that
600
// implementing a full-fledged demangler isn't trivial (libiberty's
601
// cp-demangle.c has +4300 lines).
602
//
603
// Note that (foo) in <(foo) ...> is a modifier to be ignored.
604
//
605
// Reference:
606
// - Itanium C++ ABI
607
//   <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling>
608
609
// <mangled-name> ::= _Z <encoding>
610
0
static bool ParseMangledName(State *state) {
611
0
  ComplexityGuard guard(state);
612
0
  if (guard.IsTooComplex()) return false;
613
0
  return ParseTwoCharToken(state, "_Z") && ParseEncoding(state);
614
0
}
615
616
// <encoding> ::= <(function) name> <bare-function-type>
617
//            ::= <(data) name>
618
//            ::= <special-name>
619
0
static bool ParseEncoding(State *state) {
620
0
  ComplexityGuard guard(state);
621
0
  if (guard.IsTooComplex()) return false;
622
  // Implementing the first two productions together as <name>
623
  // [<bare-function-type>] avoids exponential blowup of backtracking.
624
  //
625
  // Since Optional(...) can't fail, there's no need to copy the state for
626
  // backtracking.
627
0
  if (ParseName(state) && Optional(ParseBareFunctionType(state))) {
628
0
    return true;
629
0
  }
630
631
0
  if (ParseSpecialName(state)) {
632
0
    return true;
633
0
  }
634
0
  return false;
635
0
}
636
637
// <name> ::= <nested-name>
638
//        ::= <unscoped-template-name> <template-args>
639
//        ::= <unscoped-name>
640
//        ::= <local-name>
641
0
static bool ParseName(State *state) {
642
0
  ComplexityGuard guard(state);
643
0
  if (guard.IsTooComplex()) return false;
644
0
  if (ParseNestedName(state) || ParseLocalName(state)) {
645
0
    return true;
646
0
  }
647
648
  // We reorganize the productions to avoid re-parsing unscoped names.
649
  // - Inline <unscoped-template-name> productions:
650
  //   <name> ::= <substitution> <template-args>
651
  //          ::= <unscoped-name> <template-args>
652
  //          ::= <unscoped-name>
653
  // - Merge the two productions that start with unscoped-name:
654
  //   <name> ::= <unscoped-name> [<template-args>]
655
656
0
  ParseState copy = state->parse_state;
657
  // "std<...>" isn't a valid name.
658
0
  if (ParseSubstitution(state, /*accept_std=*/false) &&
659
0
      ParseTemplateArgs(state)) {
660
0
    return true;
661
0
  }
662
0
  state->parse_state = copy;
663
664
  // Note there's no need to restore state after this since only the first
665
  // subparser can fail.
666
0
  return ParseUnscopedName(state) && Optional(ParseTemplateArgs(state));
667
0
}
668
669
// <unscoped-name> ::= <unqualified-name>
670
//                 ::= St <unqualified-name>
671
0
static bool ParseUnscopedName(State *state) {
672
0
  ComplexityGuard guard(state);
673
0
  if (guard.IsTooComplex()) return false;
674
0
  if (ParseUnqualifiedName(state)) {
675
0
    return true;
676
0
  }
677
678
0
  ParseState copy = state->parse_state;
679
0
  if (ParseTwoCharToken(state, "St") && MaybeAppend(state, "std::") &&
680
0
      ParseUnqualifiedName(state)) {
681
0
    return true;
682
0
  }
683
0
  state->parse_state = copy;
684
0
  return false;
685
0
}
686
687
// <ref-qualifer> ::= R // lvalue method reference qualifier
688
//                ::= O // rvalue method reference qualifier
689
0
static inline bool ParseRefQualifier(State *state) {
690
0
  return ParseCharClass(state, "OR");
691
0
}
692
693
// <nested-name> ::= N [<CV-qualifiers>] [<ref-qualifier>] <prefix>
694
//                   <unqualified-name> E
695
//               ::= N [<CV-qualifiers>] [<ref-qualifier>] <template-prefix>
696
//                   <template-args> E
697
0
static bool ParseNestedName(State *state) {
698
0
  ComplexityGuard guard(state);
699
0
  if (guard.IsTooComplex()) return false;
700
0
  ParseState copy = state->parse_state;
701
0
  if (ParseOneCharToken(state, 'N') && EnterNestedName(state) &&
702
0
      Optional(ParseCVQualifiers(state)) &&
703
0
      Optional(ParseRefQualifier(state)) && ParsePrefix(state) &&
704
0
      LeaveNestedName(state, copy.nest_level) &&
705
0
      ParseOneCharToken(state, 'E')) {
706
0
    return true;
707
0
  }
708
0
  state->parse_state = copy;
709
0
  return false;
710
0
}
711
712
// This part is tricky.  If we literally translate them to code, we'll
713
// end up infinite loop.  Hence we merge them to avoid the case.
714
//
715
// <prefix> ::= <prefix> <unqualified-name>
716
//          ::= <template-prefix> <template-args>
717
//          ::= <template-param>
718
//          ::= <substitution>
719
//          ::= # empty
720
// <template-prefix> ::= <prefix> <(template) unqualified-name>
721
//                   ::= <template-param>
722
//                   ::= <substitution>
723
0
static bool ParsePrefix(State *state) {
724
0
  ComplexityGuard guard(state);
725
0
  if (guard.IsTooComplex()) return false;
726
0
  bool has_something = false;
727
0
  while (true) {
728
0
    MaybeAppendSeparator(state);
729
0
    if (ParseTemplateParam(state) ||
730
0
        ParseSubstitution(state, /*accept_std=*/true) ||
731
0
        ParseUnscopedName(state) ||
732
0
        (ParseOneCharToken(state, 'M') && ParseUnnamedTypeName(state))) {
733
0
      has_something = true;
734
0
      MaybeIncreaseNestLevel(state);
735
0
      continue;
736
0
    }
737
0
    MaybeCancelLastSeparator(state);
738
0
    if (has_something && ParseTemplateArgs(state)) {
739
0
      return ParsePrefix(state);
740
0
    } else {
741
0
      break;
742
0
    }
743
0
  }
744
0
  return true;
745
0
}
746
747
// <unqualified-name> ::= <operator-name> [<abi-tags>]
748
//                    ::= <ctor-dtor-name> [<abi-tags>]
749
//                    ::= <source-name> [<abi-tags>]
750
//                    ::= <local-source-name> [<abi-tags>]
751
//                    ::= <unnamed-type-name> [<abi-tags>]
752
//
753
// <local-source-name> is a GCC extension; see below.
754
0
static bool ParseUnqualifiedName(State *state) {
755
0
  ComplexityGuard guard(state);
756
0
  if (guard.IsTooComplex()) return false;
757
0
  if (ParseOperatorName(state, nullptr) || ParseCtorDtorName(state) ||
758
0
      ParseSourceName(state) || ParseLocalSourceName(state) ||
759
0
      ParseUnnamedTypeName(state)) {
760
0
    return ParseAbiTags(state);
761
0
  }
762
0
  return false;
763
0
}
764
765
// <abi-tags> ::= <abi-tag> [<abi-tags>]
766
// <abi-tag>  ::= B <source-name>
767
0
static bool ParseAbiTags(State *state) {
768
0
  ComplexityGuard guard(state);
769
0
  if (guard.IsTooComplex()) return false;
770
771
0
  while (ParseOneCharToken(state, 'B')) {
772
0
    ParseState copy = state->parse_state;
773
0
    MaybeAppend(state, "[abi:");
774
775
0
    if (!ParseSourceName(state)) {
776
0
      state->parse_state = copy;
777
0
      return false;
778
0
    }
779
0
    MaybeAppend(state, "]");
780
0
  }
781
782
0
  return true;
783
0
}
784
785
// <source-name> ::= <positive length number> <identifier>
786
0
static bool ParseSourceName(State *state) {
787
0
  ComplexityGuard guard(state);
788
0
  if (guard.IsTooComplex()) return false;
789
0
  ParseState copy = state->parse_state;
790
0
  int length = -1;
791
0
  if (ParseNumber(state, &length) &&
792
0
      ParseIdentifier(state, static_cast<size_t>(length))) {
793
0
    return true;
794
0
  }
795
0
  state->parse_state = copy;
796
0
  return false;
797
0
}
798
799
// <local-source-name> ::= L <source-name> [<discriminator>]
800
//
801
// References:
802
//   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=31775
803
//   https://gcc.gnu.org/viewcvs?view=rev&revision=124467
804
0
static bool ParseLocalSourceName(State *state) {
805
0
  ComplexityGuard guard(state);
806
0
  if (guard.IsTooComplex()) return false;
807
0
  ParseState copy = state->parse_state;
808
0
  if (ParseOneCharToken(state, 'L') && ParseSourceName(state) &&
809
0
      Optional(ParseDiscriminator(state))) {
810
0
    return true;
811
0
  }
812
0
  state->parse_state = copy;
813
0
  return false;
814
0
}
815
816
// <unnamed-type-name> ::= Ut [<(nonnegative) number>] _
817
//                     ::= <closure-type-name>
818
// <closure-type-name> ::= Ul <lambda-sig> E [<(nonnegative) number>] _
819
// <lambda-sig>        ::= <(parameter) type>+
820
0
static bool ParseUnnamedTypeName(State *state) {
821
0
  ComplexityGuard guard(state);
822
0
  if (guard.IsTooComplex()) return false;
823
0
  ParseState copy = state->parse_state;
824
  // Type's 1-based index n is encoded as { "", n == 1; itoa(n-2), otherwise }.
825
  // Optionally parse the encoded value into 'which' and add 2 to get the index.
826
0
  int which = -1;
827
828
  // Unnamed type local to function or class.
829
0
  if (ParseTwoCharToken(state, "Ut") && Optional(ParseNumber(state, &which)) &&
830
0
      which <= std::numeric_limits<int>::max() - 2 &&  // Don't overflow.
831
0
      ParseOneCharToken(state, '_')) {
832
0
    MaybeAppend(state, "{unnamed type#");
833
0
    MaybeAppendDecimal(state, 2 + which);
834
0
    MaybeAppend(state, "}");
835
0
    return true;
836
0
  }
837
0
  state->parse_state = copy;
838
839
  // Closure type.
840
0
  which = -1;
841
0
  if (ParseTwoCharToken(state, "Ul") && DisableAppend(state) &&
842
0
      OneOrMore(ParseType, state) && RestoreAppend(state, copy.append) &&
843
0
      ParseOneCharToken(state, 'E') && Optional(ParseNumber(state, &which)) &&
844
0
      which <= std::numeric_limits<int>::max() - 2 &&  // Don't overflow.
845
0
      ParseOneCharToken(state, '_')) {
846
0
    MaybeAppend(state, "{lambda()#");
847
0
    MaybeAppendDecimal(state, 2 + which);
848
0
    MaybeAppend(state, "}");
849
0
    return true;
850
0
  }
851
0
  state->parse_state = copy;
852
853
0
  return false;
854
0
}
855
856
// <number> ::= [n] <non-negative decimal integer>
857
// If "number_out" is non-null, then *number_out is set to the value of the
858
// parsed number on success.
859
0
static bool ParseNumber(State *state, int *number_out) {
860
0
  ComplexityGuard guard(state);
861
0
  if (guard.IsTooComplex()) return false;
862
0
  bool negative = false;
863
0
  if (ParseOneCharToken(state, 'n')) {
864
0
    negative = true;
865
0
  }
866
0
  const char *p = RemainingInput(state);
867
0
  uint64_t number = 0;
868
0
  for (; *p != '\0'; ++p) {
869
0
    if (IsDigit(*p)) {
870
0
      number = number * 10 + static_cast<uint64_t>(*p - '0');
871
0
    } else {
872
0
      break;
873
0
    }
874
0
  }
875
  // Apply the sign with uint64_t arithmetic so overflows aren't UB.  Gives
876
  // "incorrect" results for out-of-range inputs, but negative values only
877
  // appear for literals, which aren't printed.
878
0
  if (negative) {
879
0
    number = ~number + 1;
880
0
  }
881
0
  if (p != RemainingInput(state)) {  // Conversion succeeded.
882
0
    state->parse_state.mangled_idx += p - RemainingInput(state);
883
0
    if (number_out != nullptr) {
884
      // Note: possibly truncate "number".
885
0
      *number_out = static_cast<int>(number);
886
0
    }
887
0
    return true;
888
0
  }
889
0
  return false;
890
0
}
891
892
// Floating-point literals are encoded using a fixed-length lowercase
893
// hexadecimal string.
894
0
static bool ParseFloatNumber(State *state) {
895
0
  ComplexityGuard guard(state);
896
0
  if (guard.IsTooComplex()) return false;
897
0
  const char *p = RemainingInput(state);
898
0
  for (; *p != '\0'; ++p) {
899
0
    if (!IsDigit(*p) && !(*p >= 'a' && *p <= 'f')) {
900
0
      break;
901
0
    }
902
0
  }
903
0
  if (p != RemainingInput(state)) {  // Conversion succeeded.
904
0
    state->parse_state.mangled_idx += p - RemainingInput(state);
905
0
    return true;
906
0
  }
907
0
  return false;
908
0
}
909
910
// The <seq-id> is a sequence number in base 36,
911
// using digits and upper case letters
912
0
static bool ParseSeqId(State *state) {
913
0
  ComplexityGuard guard(state);
914
0
  if (guard.IsTooComplex()) return false;
915
0
  const char *p = RemainingInput(state);
916
0
  for (; *p != '\0'; ++p) {
917
0
    if (!IsDigit(*p) && !(*p >= 'A' && *p <= 'Z')) {
918
0
      break;
919
0
    }
920
0
  }
921
0
  if (p != RemainingInput(state)) {  // Conversion succeeded.
922
0
    state->parse_state.mangled_idx += p - RemainingInput(state);
923
0
    return true;
924
0
  }
925
0
  return false;
926
0
}
927
928
// <identifier> ::= <unqualified source code identifier> (of given length)
929
0
static bool ParseIdentifier(State *state, size_t length) {
930
0
  ComplexityGuard guard(state);
931
0
  if (guard.IsTooComplex()) return false;
932
0
  if (!AtLeastNumCharsRemaining(RemainingInput(state), length)) {
933
0
    return false;
934
0
  }
935
0
  if (IdentifierIsAnonymousNamespace(state, length)) {
936
0
    MaybeAppend(state, "(anonymous namespace)");
937
0
  } else {
938
0
    MaybeAppendWithLength(state, RemainingInput(state), length);
939
0
  }
940
0
  state->parse_state.mangled_idx += length;
941
0
  return true;
942
0
}
943
944
// <operator-name> ::= nw, and other two letters cases
945
//                 ::= cv <type>  # (cast)
946
//                 ::= v  <digit> <source-name> # vendor extended operator
947
0
static bool ParseOperatorName(State *state, int *arity) {
948
0
  ComplexityGuard guard(state);
949
0
  if (guard.IsTooComplex()) return false;
950
0
  if (!AtLeastNumCharsRemaining(RemainingInput(state), 2)) {
951
0
    return false;
952
0
  }
953
  // First check with "cv" (cast) case.
954
0
  ParseState copy = state->parse_state;
955
0
  if (ParseTwoCharToken(state, "cv") && MaybeAppend(state, "operator ") &&
956
0
      EnterNestedName(state) && ParseType(state) &&
957
0
      LeaveNestedName(state, copy.nest_level)) {
958
0
    if (arity != nullptr) {
959
0
      *arity = 1;
960
0
    }
961
0
    return true;
962
0
  }
963
0
  state->parse_state = copy;
964
965
  // Then vendor extended operators.
966
0
  if (ParseOneCharToken(state, 'v') && ParseDigit(state, arity) &&
967
0
      ParseSourceName(state)) {
968
0
    return true;
969
0
  }
970
0
  state->parse_state = copy;
971
972
  // Other operator names should start with a lower alphabet followed
973
  // by a lower/upper alphabet.
974
0
  if (!(IsLower(RemainingInput(state)[0]) &&
975
0
        IsAlpha(RemainingInput(state)[1]))) {
976
0
    return false;
977
0
  }
978
  // We may want to perform a binary search if we really need speed.
979
0
  const AbbrevPair *p;
980
0
  for (p = kOperatorList; p->abbrev != nullptr; ++p) {
981
0
    if (RemainingInput(state)[0] == p->abbrev[0] &&
982
0
        RemainingInput(state)[1] == p->abbrev[1]) {
983
0
      if (arity != nullptr) {
984
0
        *arity = p->arity;
985
0
      }
986
0
      MaybeAppend(state, "operator");
987
0
      if (IsLower(*p->real_name)) {  // new, delete, etc.
988
0
        MaybeAppend(state, " ");
989
0
      }
990
0
      MaybeAppend(state, p->real_name);
991
0
      state->parse_state.mangled_idx += 2;
992
0
      return true;
993
0
    }
994
0
  }
995
0
  return false;
996
0
}
997
998
// <special-name> ::= TV <type>
999
//                ::= TT <type>
1000
//                ::= TI <type>
1001
//                ::= TS <type>
1002
//                ::= TH <type>  # thread-local
1003
//                ::= Tc <call-offset> <call-offset> <(base) encoding>
1004
//                ::= GV <(object) name>
1005
//                ::= T <call-offset> <(base) encoding>
1006
// G++ extensions:
1007
//                ::= TC <type> <(offset) number> _ <(base) type>
1008
//                ::= TF <type>
1009
//                ::= TJ <type>
1010
//                ::= GR <name>
1011
//                ::= GA <encoding>
1012
//                ::= Th <call-offset> <(base) encoding>
1013
//                ::= Tv <call-offset> <(base) encoding>
1014
//
1015
// Note: we don't care much about them since they don't appear in
1016
// stack traces.  The are special data.
1017
0
static bool ParseSpecialName(State *state) {
1018
0
  ComplexityGuard guard(state);
1019
0
  if (guard.IsTooComplex()) return false;
1020
0
  ParseState copy = state->parse_state;
1021
0
  if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "VTISH") &&
1022
0
      ParseType(state)) {
1023
0
    return true;
1024
0
  }
1025
0
  state->parse_state = copy;
1026
1027
0
  if (ParseTwoCharToken(state, "Tc") && ParseCallOffset(state) &&
1028
0
      ParseCallOffset(state) && ParseEncoding(state)) {
1029
0
    return true;
1030
0
  }
1031
0
  state->parse_state = copy;
1032
1033
0
  if (ParseTwoCharToken(state, "GV") && ParseName(state)) {
1034
0
    return true;
1035
0
  }
1036
0
  state->parse_state = copy;
1037
1038
0
  if (ParseOneCharToken(state, 'T') && ParseCallOffset(state) &&
1039
0
      ParseEncoding(state)) {
1040
0
    return true;
1041
0
  }
1042
0
  state->parse_state = copy;
1043
1044
  // G++ extensions
1045
0
  if (ParseTwoCharToken(state, "TC") && ParseType(state) &&
1046
0
      ParseNumber(state, nullptr) && ParseOneCharToken(state, '_') &&
1047
0
      DisableAppend(state) && ParseType(state)) {
1048
0
    RestoreAppend(state, copy.append);
1049
0
    return true;
1050
0
  }
1051
0
  state->parse_state = copy;
1052
1053
0
  if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "FJ") &&
1054
0
      ParseType(state)) {
1055
0
    return true;
1056
0
  }
1057
0
  state->parse_state = copy;
1058
1059
0
  if (ParseTwoCharToken(state, "GR") && ParseName(state)) {
1060
0
    return true;
1061
0
  }
1062
0
  state->parse_state = copy;
1063
1064
0
  if (ParseTwoCharToken(state, "GA") && ParseEncoding(state)) {
1065
0
    return true;
1066
0
  }
1067
0
  state->parse_state = copy;
1068
1069
0
  if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "hv") &&
1070
0
      ParseCallOffset(state) && ParseEncoding(state)) {
1071
0
    return true;
1072
0
  }
1073
0
  state->parse_state = copy;
1074
0
  return false;
1075
0
}
1076
1077
// <call-offset> ::= h <nv-offset> _
1078
//               ::= v <v-offset> _
1079
0
static bool ParseCallOffset(State *state) {
1080
0
  ComplexityGuard guard(state);
1081
0
  if (guard.IsTooComplex()) return false;
1082
0
  ParseState copy = state->parse_state;
1083
0
  if (ParseOneCharToken(state, 'h') && ParseNVOffset(state) &&
1084
0
      ParseOneCharToken(state, '_')) {
1085
0
    return true;
1086
0
  }
1087
0
  state->parse_state = copy;
1088
1089
0
  if (ParseOneCharToken(state, 'v') && ParseVOffset(state) &&
1090
0
      ParseOneCharToken(state, '_')) {
1091
0
    return true;
1092
0
  }
1093
0
  state->parse_state = copy;
1094
1095
0
  return false;
1096
0
}
1097
1098
// <nv-offset> ::= <(offset) number>
1099
0
static bool ParseNVOffset(State *state) {
1100
0
  ComplexityGuard guard(state);
1101
0
  if (guard.IsTooComplex()) return false;
1102
0
  return ParseNumber(state, nullptr);
1103
0
}
1104
1105
// <v-offset>  ::= <(offset) number> _ <(virtual offset) number>
1106
0
static bool ParseVOffset(State *state) {
1107
0
  ComplexityGuard guard(state);
1108
0
  if (guard.IsTooComplex()) return false;
1109
0
  ParseState copy = state->parse_state;
1110
0
  if (ParseNumber(state, nullptr) && ParseOneCharToken(state, '_') &&
1111
0
      ParseNumber(state, nullptr)) {
1112
0
    return true;
1113
0
  }
1114
0
  state->parse_state = copy;
1115
0
  return false;
1116
0
}
1117
1118
// <ctor-dtor-name> ::= C1 | C2 | C3 | CI1 <base-class-type> | CI2
1119
// <base-class-type>
1120
//                  ::= D0 | D1 | D2
1121
// # GCC extensions: "unified" constructor/destructor.  See
1122
// #
1123
// https://github.com/gcc-mirror/gcc/blob/7ad17b583c3643bd4557f29b8391ca7ef08391f5/gcc/cp/mangle.c#L1847
1124
//                  ::= C4 | D4
1125
0
static bool ParseCtorDtorName(State *state) {
1126
0
  ComplexityGuard guard(state);
1127
0
  if (guard.IsTooComplex()) return false;
1128
0
  ParseState copy = state->parse_state;
1129
0
  if (ParseOneCharToken(state, 'C')) {
1130
0
    if (ParseCharClass(state, "1234")) {
1131
0
      const char *const prev_name =
1132
0
          state->out + state->parse_state.prev_name_idx;
1133
0
      MaybeAppendWithLength(state, prev_name,
1134
0
                            state->parse_state.prev_name_length);
1135
0
      return true;
1136
0
    } else if (ParseOneCharToken(state, 'I') && ParseCharClass(state, "12") &&
1137
0
               ParseClassEnumType(state)) {
1138
0
      return true;
1139
0
    }
1140
0
  }
1141
0
  state->parse_state = copy;
1142
1143
0
  if (ParseOneCharToken(state, 'D') && ParseCharClass(state, "0124")) {
1144
0
    const char *const prev_name = state->out + state->parse_state.prev_name_idx;
1145
0
    MaybeAppend(state, "~");
1146
0
    MaybeAppendWithLength(state, prev_name,
1147
0
                          state->parse_state.prev_name_length);
1148
0
    return true;
1149
0
  }
1150
0
  state->parse_state = copy;
1151
0
  return false;
1152
0
}
1153
1154
// <decltype> ::= Dt <expression> E  # decltype of an id-expression or class
1155
//                                   # member access (C++0x)
1156
//            ::= DT <expression> E  # decltype of an expression (C++0x)
1157
0
static bool ParseDecltype(State *state) {
1158
0
  ComplexityGuard guard(state);
1159
0
  if (guard.IsTooComplex()) return false;
1160
1161
0
  ParseState copy = state->parse_state;
1162
0
  if (ParseOneCharToken(state, 'D') && ParseCharClass(state, "tT") &&
1163
0
      ParseExpression(state) && ParseOneCharToken(state, 'E')) {
1164
0
    return true;
1165
0
  }
1166
0
  state->parse_state = copy;
1167
1168
0
  return false;
1169
0
}
1170
1171
// <type> ::= <CV-qualifiers> <type>
1172
//        ::= P <type>   # pointer-to
1173
//        ::= R <type>   # reference-to
1174
//        ::= O <type>   # rvalue reference-to (C++0x)
1175
//        ::= C <type>   # complex pair (C 2000)
1176
//        ::= G <type>   # imaginary (C 2000)
1177
//        ::= U <source-name> <type>  # vendor extended type qualifier
1178
//        ::= <builtin-type>
1179
//        ::= <function-type>
1180
//        ::= <class-enum-type>  # note: just an alias for <name>
1181
//        ::= <array-type>
1182
//        ::= <pointer-to-member-type>
1183
//        ::= <template-template-param> <template-args>
1184
//        ::= <template-param>
1185
//        ::= <decltype>
1186
//        ::= <substitution>
1187
//        ::= Dp <type>          # pack expansion of (C++0x)
1188
//        ::= Dv <num-elems> _   # GNU vector extension
1189
//
1190
0
static bool ParseType(State *state) {
1191
0
  ComplexityGuard guard(state);
1192
0
  if (guard.IsTooComplex()) return false;
1193
0
  ParseState copy = state->parse_state;
1194
1195
  // We should check CV-qualifers, and PRGC things first.
1196
  //
1197
  // CV-qualifiers overlap with some operator names, but an operator name is not
1198
  // valid as a type.  To avoid an ambiguity that can lead to exponential time
1199
  // complexity, refuse to backtrack the CV-qualifiers.
1200
  //
1201
  // _Z4aoeuIrMvvE
1202
  //  => _Z 4aoeuI        rM  v     v   E
1203
  //         aoeu<operator%=, void, void>
1204
  //  => _Z 4aoeuI r Mv v              E
1205
  //         aoeu<void void::* restrict>
1206
  //
1207
  // By consuming the CV-qualifiers first, the former parse is disabled.
1208
0
  if (ParseCVQualifiers(state)) {
1209
0
    const bool result = ParseType(state);
1210
0
    if (!result) state->parse_state = copy;
1211
0
    return result;
1212
0
  }
1213
0
  state->parse_state = copy;
1214
1215
  // Similarly, these tag characters can overlap with other <name>s resulting in
1216
  // two different parse prefixes that land on <template-args> in the same
1217
  // place, such as "C3r1xI...".  So, disable the "ctor-name = C3" parse by
1218
  // refusing to backtrack the tag characters.
1219
0
  if (ParseCharClass(state, "OPRCG")) {
1220
0
    const bool result = ParseType(state);
1221
0
    if (!result) state->parse_state = copy;
1222
0
    return result;
1223
0
  }
1224
0
  state->parse_state = copy;
1225
1226
0
  if (ParseTwoCharToken(state, "Dp") && ParseType(state)) {
1227
0
    return true;
1228
0
  }
1229
0
  state->parse_state = copy;
1230
1231
0
  if (ParseOneCharToken(state, 'U') && ParseSourceName(state) &&
1232
0
      ParseType(state)) {
1233
0
    return true;
1234
0
  }
1235
0
  state->parse_state = copy;
1236
1237
0
  if (ParseBuiltinType(state) || ParseFunctionType(state) ||
1238
0
      ParseClassEnumType(state) || ParseArrayType(state) ||
1239
0
      ParsePointerToMemberType(state) || ParseDecltype(state) ||
1240
      // "std" on its own isn't a type.
1241
0
      ParseSubstitution(state, /*accept_std=*/false)) {
1242
0
    return true;
1243
0
  }
1244
1245
0
  if (ParseTemplateTemplateParam(state) && ParseTemplateArgs(state)) {
1246
0
    return true;
1247
0
  }
1248
0
  state->parse_state = copy;
1249
1250
  // Less greedy than <template-template-param> <template-args>.
1251
0
  if (ParseTemplateParam(state)) {
1252
0
    return true;
1253
0
  }
1254
1255
0
  if (ParseTwoCharToken(state, "Dv") && ParseNumber(state, nullptr) &&
1256
0
      ParseOneCharToken(state, '_')) {
1257
0
    return true;
1258
0
  }
1259
0
  state->parse_state = copy;
1260
1261
0
  return false;
1262
0
}
1263
1264
// <CV-qualifiers> ::= [r] [V] [K]
1265
// We don't allow empty <CV-qualifiers> to avoid infinite loop in
1266
// ParseType().
1267
0
static bool ParseCVQualifiers(State *state) {
1268
0
  ComplexityGuard guard(state);
1269
0
  if (guard.IsTooComplex()) return false;
1270
0
  int num_cv_qualifiers = 0;
1271
0
  num_cv_qualifiers += ParseOneCharToken(state, 'r');
1272
0
  num_cv_qualifiers += ParseOneCharToken(state, 'V');
1273
0
  num_cv_qualifiers += ParseOneCharToken(state, 'K');
1274
0
  return num_cv_qualifiers > 0;
1275
0
}
1276
1277
// <builtin-type> ::= v, etc.  # single-character builtin types
1278
//                ::= u <source-name>
1279
//                ::= Dd, etc.  # two-character builtin types
1280
//
1281
// Not supported:
1282
//                ::= DF <number> _ # _FloatN (N bits)
1283
//
1284
0
static bool ParseBuiltinType(State *state) {
1285
0
  ComplexityGuard guard(state);
1286
0
  if (guard.IsTooComplex()) return false;
1287
0
  const AbbrevPair *p;
1288
0
  for (p = kBuiltinTypeList; p->abbrev != nullptr; ++p) {
1289
    // Guaranteed only 1- or 2-character strings in kBuiltinTypeList.
1290
0
    if (p->abbrev[1] == '\0') {
1291
0
      if (ParseOneCharToken(state, p->abbrev[0])) {
1292
0
        MaybeAppend(state, p->real_name);
1293
0
        return true;
1294
0
      }
1295
0
    } else if (p->abbrev[2] == '\0' && ParseTwoCharToken(state, p->abbrev)) {
1296
0
      MaybeAppend(state, p->real_name);
1297
0
      return true;
1298
0
    }
1299
0
  }
1300
1301
0
  ParseState copy = state->parse_state;
1302
0
  if (ParseOneCharToken(state, 'u') && ParseSourceName(state)) {
1303
0
    return true;
1304
0
  }
1305
0
  state->parse_state = copy;
1306
0
  return false;
1307
0
}
1308
1309
//  <exception-spec> ::= Do                # non-throwing
1310
//                                           exception-specification (e.g.,
1311
//                                           noexcept, throw())
1312
//                   ::= DO <expression> E # computed (instantiation-dependent)
1313
//                                           noexcept
1314
//                   ::= Dw <type>+ E      # dynamic exception specification
1315
//                                           with instantiation-dependent types
1316
0
static bool ParseExceptionSpec(State *state) {
1317
0
  ComplexityGuard guard(state);
1318
0
  if (guard.IsTooComplex()) return false;
1319
1320
0
  if (ParseTwoCharToken(state, "Do")) return true;
1321
1322
0
  ParseState copy = state->parse_state;
1323
0
  if (ParseTwoCharToken(state, "DO") && ParseExpression(state) &&
1324
0
      ParseOneCharToken(state, 'E')) {
1325
0
    return true;
1326
0
  }
1327
0
  state->parse_state = copy;
1328
0
  if (ParseTwoCharToken(state, "Dw") && OneOrMore(ParseType, state) &&
1329
0
      ParseOneCharToken(state, 'E')) {
1330
0
    return true;
1331
0
  }
1332
0
  state->parse_state = copy;
1333
1334
0
  return false;
1335
0
}
1336
1337
// <function-type> ::= [exception-spec] F [Y] <bare-function-type> [O] E
1338
0
static bool ParseFunctionType(State *state) {
1339
0
  ComplexityGuard guard(state);
1340
0
  if (guard.IsTooComplex()) return false;
1341
0
  ParseState copy = state->parse_state;
1342
0
  if (Optional(ParseExceptionSpec(state)) && ParseOneCharToken(state, 'F') &&
1343
0
      Optional(ParseOneCharToken(state, 'Y')) && ParseBareFunctionType(state) &&
1344
0
      Optional(ParseOneCharToken(state, 'O')) &&
1345
0
      ParseOneCharToken(state, 'E')) {
1346
0
    return true;
1347
0
  }
1348
0
  state->parse_state = copy;
1349
0
  return false;
1350
0
}
1351
1352
// <bare-function-type> ::= <(signature) type>+
1353
0
static bool ParseBareFunctionType(State *state) {
1354
0
  ComplexityGuard guard(state);
1355
0
  if (guard.IsTooComplex()) return false;
1356
0
  ParseState copy = state->parse_state;
1357
0
  DisableAppend(state);
1358
0
  if (OneOrMore(ParseType, state)) {
1359
0
    RestoreAppend(state, copy.append);
1360
0
    MaybeAppend(state, "()");
1361
0
    return true;
1362
0
  }
1363
0
  state->parse_state = copy;
1364
0
  return false;
1365
0
}
1366
1367
// <class-enum-type> ::= <name>
1368
0
static bool ParseClassEnumType(State *state) {
1369
0
  ComplexityGuard guard(state);
1370
0
  if (guard.IsTooComplex()) return false;
1371
0
  return ParseName(state);
1372
0
}
1373
1374
// <array-type> ::= A <(positive dimension) number> _ <(element) type>
1375
//              ::= A [<(dimension) expression>] _ <(element) type>
1376
0
static bool ParseArrayType(State *state) {
1377
0
  ComplexityGuard guard(state);
1378
0
  if (guard.IsTooComplex()) return false;
1379
0
  ParseState copy = state->parse_state;
1380
0
  if (ParseOneCharToken(state, 'A') && ParseNumber(state, nullptr) &&
1381
0
      ParseOneCharToken(state, '_') && ParseType(state)) {
1382
0
    return true;
1383
0
  }
1384
0
  state->parse_state = copy;
1385
1386
0
  if (ParseOneCharToken(state, 'A') && Optional(ParseExpression(state)) &&
1387
0
      ParseOneCharToken(state, '_') && ParseType(state)) {
1388
0
    return true;
1389
0
  }
1390
0
  state->parse_state = copy;
1391
0
  return false;
1392
0
}
1393
1394
// <pointer-to-member-type> ::= M <(class) type> <(member) type>
1395
0
static bool ParsePointerToMemberType(State *state) {
1396
0
  ComplexityGuard guard(state);
1397
0
  if (guard.IsTooComplex()) return false;
1398
0
  ParseState copy = state->parse_state;
1399
0
  if (ParseOneCharToken(state, 'M') && ParseType(state) && ParseType(state)) {
1400
0
    return true;
1401
0
  }
1402
0
  state->parse_state = copy;
1403
0
  return false;
1404
0
}
1405
1406
// <template-param> ::= T_
1407
//                  ::= T <parameter-2 non-negative number> _
1408
0
static bool ParseTemplateParam(State *state) {
1409
0
  ComplexityGuard guard(state);
1410
0
  if (guard.IsTooComplex()) return false;
1411
0
  if (ParseTwoCharToken(state, "T_")) {
1412
0
    MaybeAppend(state, "?");  // We don't support template substitutions.
1413
0
    return true;
1414
0
  }
1415
1416
0
  ParseState copy = state->parse_state;
1417
0
  if (ParseOneCharToken(state, 'T') && ParseNumber(state, nullptr) &&
1418
0
      ParseOneCharToken(state, '_')) {
1419
0
    MaybeAppend(state, "?");  // We don't support template substitutions.
1420
0
    return true;
1421
0
  }
1422
0
  state->parse_state = copy;
1423
0
  return false;
1424
0
}
1425
1426
// <template-template-param> ::= <template-param>
1427
//                           ::= <substitution>
1428
0
static bool ParseTemplateTemplateParam(State *state) {
1429
0
  ComplexityGuard guard(state);
1430
0
  if (guard.IsTooComplex()) return false;
1431
0
  return (ParseTemplateParam(state) ||
1432
          // "std" on its own isn't a template.
1433
0
          ParseSubstitution(state, /*accept_std=*/false));
1434
0
}
1435
1436
// <template-args> ::= I <template-arg>+ E
1437
0
static bool ParseTemplateArgs(State *state) {
1438
0
  ComplexityGuard guard(state);
1439
0
  if (guard.IsTooComplex()) return false;
1440
0
  ParseState copy = state->parse_state;
1441
0
  DisableAppend(state);
1442
0
  if (ParseOneCharToken(state, 'I') && OneOrMore(ParseTemplateArg, state) &&
1443
0
      ParseOneCharToken(state, 'E')) {
1444
0
    RestoreAppend(state, copy.append);
1445
0
    MaybeAppend(state, "<>");
1446
0
    return true;
1447
0
  }
1448
0
  state->parse_state = copy;
1449
0
  return false;
1450
0
}
1451
1452
// <template-arg>  ::= <type>
1453
//                 ::= <expr-primary>
1454
//                 ::= J <template-arg>* E        # argument pack
1455
//                 ::= X <expression> E
1456
0
static bool ParseTemplateArg(State *state) {
1457
0
  ComplexityGuard guard(state);
1458
0
  if (guard.IsTooComplex()) return false;
1459
0
  ParseState copy = state->parse_state;
1460
0
  if (ParseOneCharToken(state, 'J') && ZeroOrMore(ParseTemplateArg, state) &&
1461
0
      ParseOneCharToken(state, 'E')) {
1462
0
    return true;
1463
0
  }
1464
0
  state->parse_state = copy;
1465
1466
  // There can be significant overlap between the following leading to
1467
  // exponential backtracking:
1468
  //
1469
  //   <expr-primary> ::= L <type> <expr-cast-value> E
1470
  //                 e.g. L 2xxIvE 1                 E
1471
  //   <type>         ==> <local-source-name> <template-args>
1472
  //                 e.g. L 2xx               IvE
1473
  //
1474
  // This means parsing an entire <type> twice, and <type> can contain
1475
  // <template-arg>, so this can generate exponential backtracking.  There is
1476
  // only overlap when the remaining input starts with "L <source-name>", so
1477
  // parse all cases that can start this way jointly to share the common prefix.
1478
  //
1479
  // We have:
1480
  //
1481
  //   <template-arg> ::= <type>
1482
  //                  ::= <expr-primary>
1483
  //
1484
  // First, drop all the productions of <type> that must start with something
1485
  // other than 'L'.  All that's left is <class-enum-type>; inline it.
1486
  //
1487
  //   <type> ::= <nested-name> # starts with 'N'
1488
  //          ::= <unscoped-name>
1489
  //          ::= <unscoped-template-name> <template-args>
1490
  //          ::= <local-name> # starts with 'Z'
1491
  //
1492
  // Drop and inline again:
1493
  //
1494
  //   <type> ::= <unscoped-name>
1495
  //          ::= <unscoped-name> <template-args>
1496
  //          ::= <substitution> <template-args> # starts with 'S'
1497
  //
1498
  // Merge the first two, inline <unscoped-name>, drop last:
1499
  //
1500
  //   <type> ::= <unqualified-name> [<template-args>]
1501
  //          ::= St <unqualified-name> [<template-args>] # starts with 'S'
1502
  //
1503
  // Drop and inline:
1504
  //
1505
  //   <type> ::= <operator-name> [<template-args>] # starts with lowercase
1506
  //          ::= <ctor-dtor-name> [<template-args>] # starts with 'C' or 'D'
1507
  //          ::= <source-name> [<template-args>] # starts with digit
1508
  //          ::= <local-source-name> [<template-args>]
1509
  //          ::= <unnamed-type-name> [<template-args>] # starts with 'U'
1510
  //
1511
  // One more time:
1512
  //
1513
  //   <type> ::= L <source-name> [<template-args>]
1514
  //
1515
  // Likewise with <expr-primary>:
1516
  //
1517
  //   <expr-primary> ::= L <type> <expr-cast-value> E
1518
  //                  ::= LZ <encoding> E # cannot overlap; drop
1519
  //                  ::= L <mangled_name> E # cannot overlap; drop
1520
  //
1521
  // By similar reasoning as shown above, the only <type>s starting with
1522
  // <source-name> are "<source-name> [<template-args>]".  Inline this.
1523
  //
1524
  //   <expr-primary> ::= L <source-name> [<template-args>] <expr-cast-value> E
1525
  //
1526
  // Now inline both of these into <template-arg>:
1527
  //
1528
  //   <template-arg> ::= L <source-name> [<template-args>]
1529
  //                  ::= L <source-name> [<template-args>] <expr-cast-value> E
1530
  //
1531
  // Merge them and we're done:
1532
  //   <template-arg>
1533
  //     ::= L <source-name> [<template-args>] [<expr-cast-value> E]
1534
0
  if (ParseLocalSourceName(state) && Optional(ParseTemplateArgs(state))) {
1535
0
    copy = state->parse_state;
1536
0
    if (ParseExprCastValue(state) && ParseOneCharToken(state, 'E')) {
1537
0
      return true;
1538
0
    }
1539
0
    state->parse_state = copy;
1540
0
    return true;
1541
0
  }
1542
1543
  // Now that the overlapping cases can't reach this code, we can safely call
1544
  // both of these.
1545
0
  if (ParseType(state) || ParseExprPrimary(state)) {
1546
0
    return true;
1547
0
  }
1548
0
  state->parse_state = copy;
1549
1550
0
  if (ParseOneCharToken(state, 'X') && ParseExpression(state) &&
1551
0
      ParseOneCharToken(state, 'E')) {
1552
0
    return true;
1553
0
  }
1554
0
  state->parse_state = copy;
1555
0
  return false;
1556
0
}
1557
1558
// <unresolved-type> ::= <template-param> [<template-args>]
1559
//                   ::= <decltype>
1560
//                   ::= <substitution>
1561
0
static inline bool ParseUnresolvedType(State *state) {
1562
  // No ComplexityGuard because we don't copy the state in this stack frame.
1563
0
  return (ParseTemplateParam(state) && Optional(ParseTemplateArgs(state))) ||
1564
0
         ParseDecltype(state) || ParseSubstitution(state, /*accept_std=*/false);
1565
0
}
1566
1567
// <simple-id> ::= <source-name> [<template-args>]
1568
0
static inline bool ParseSimpleId(State *state) {
1569
  // No ComplexityGuard because we don't copy the state in this stack frame.
1570
1571
  // Note: <simple-id> cannot be followed by a parameter pack; see comment in
1572
  // ParseUnresolvedType.
1573
0
  return ParseSourceName(state) && Optional(ParseTemplateArgs(state));
1574
0
}
1575
1576
// <base-unresolved-name> ::= <source-name> [<template-args>]
1577
//                        ::= on <operator-name> [<template-args>]
1578
//                        ::= dn <destructor-name>
1579
0
static bool ParseBaseUnresolvedName(State *state) {
1580
0
  ComplexityGuard guard(state);
1581
0
  if (guard.IsTooComplex()) return false;
1582
1583
0
  if (ParseSimpleId(state)) {
1584
0
    return true;
1585
0
  }
1586
1587
0
  ParseState copy = state->parse_state;
1588
0
  if (ParseTwoCharToken(state, "on") && ParseOperatorName(state, nullptr) &&
1589
0
      Optional(ParseTemplateArgs(state))) {
1590
0
    return true;
1591
0
  }
1592
0
  state->parse_state = copy;
1593
1594
0
  if (ParseTwoCharToken(state, "dn") &&
1595
0
      (ParseUnresolvedType(state) || ParseSimpleId(state))) {
1596
0
    return true;
1597
0
  }
1598
0
  state->parse_state = copy;
1599
1600
0
  return false;
1601
0
}
1602
1603
// <unresolved-name> ::= [gs] <base-unresolved-name>
1604
//                   ::= sr <unresolved-type> <base-unresolved-name>
1605
//                   ::= srN <unresolved-type> <unresolved-qualifier-level>+ E
1606
//                         <base-unresolved-name>
1607
//                   ::= [gs] sr <unresolved-qualifier-level>+ E
1608
//                         <base-unresolved-name>
1609
0
static bool ParseUnresolvedName(State *state) {
1610
0
  ComplexityGuard guard(state);
1611
0
  if (guard.IsTooComplex()) return false;
1612
1613
0
  ParseState copy = state->parse_state;
1614
0
  if (Optional(ParseTwoCharToken(state, "gs")) &&
1615
0
      ParseBaseUnresolvedName(state)) {
1616
0
    return true;
1617
0
  }
1618
0
  state->parse_state = copy;
1619
1620
0
  if (ParseTwoCharToken(state, "sr") && ParseUnresolvedType(state) &&
1621
0
      ParseBaseUnresolvedName(state)) {
1622
0
    return true;
1623
0
  }
1624
0
  state->parse_state = copy;
1625
1626
0
  if (ParseTwoCharToken(state, "sr") && ParseOneCharToken(state, 'N') &&
1627
0
      ParseUnresolvedType(state) &&
1628
0
      OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) &&
1629
0
      ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) {
1630
0
    return true;
1631
0
  }
1632
0
  state->parse_state = copy;
1633
1634
0
  if (Optional(ParseTwoCharToken(state, "gs")) &&
1635
0
      ParseTwoCharToken(state, "sr") &&
1636
0
      OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) &&
1637
0
      ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) {
1638
0
    return true;
1639
0
  }
1640
0
  state->parse_state = copy;
1641
1642
0
  return false;
1643
0
}
1644
1645
// <expression> ::= <1-ary operator-name> <expression>
1646
//              ::= <2-ary operator-name> <expression> <expression>
1647
//              ::= <3-ary operator-name> <expression> <expression> <expression>
1648
//              ::= cl <expression>+ E
1649
//              ::= cp <simple-id> <expression>* E # Clang-specific.
1650
//              ::= cv <type> <expression>      # type (expression)
1651
//              ::= cv <type> _ <expression>* E # type (expr-list)
1652
//              ::= st <type>
1653
//              ::= <template-param>
1654
//              ::= <function-param>
1655
//              ::= <expr-primary>
1656
//              ::= dt <expression> <unresolved-name> # expr.name
1657
//              ::= pt <expression> <unresolved-name> # expr->name
1658
//              ::= sp <expression>         # argument pack expansion
1659
//              ::= sr <type> <unqualified-name> <template-args>
1660
//              ::= sr <type> <unqualified-name>
1661
// <function-param> ::= fp <(top-level) CV-qualifiers> _
1662
//                  ::= fp <(top-level) CV-qualifiers> <number> _
1663
//                  ::= fL <number> p <(top-level) CV-qualifiers> _
1664
//                  ::= fL <number> p <(top-level) CV-qualifiers> <number> _
1665
0
static bool ParseExpression(State *state) {
1666
0
  ComplexityGuard guard(state);
1667
0
  if (guard.IsTooComplex()) return false;
1668
0
  if (ParseTemplateParam(state) || ParseExprPrimary(state)) {
1669
0
    return true;
1670
0
  }
1671
1672
0
  ParseState copy = state->parse_state;
1673
1674
  // Object/function call expression.
1675
0
  if (ParseTwoCharToken(state, "cl") && OneOrMore(ParseExpression, state) &&
1676
0
      ParseOneCharToken(state, 'E')) {
1677
0
    return true;
1678
0
  }
1679
0
  state->parse_state = copy;
1680
1681
  // Clang-specific "cp <simple-id> <expression>* E"
1682
  //   https://clang.llvm.org/doxygen/ItaniumMangle_8cpp_source.html#l04338
1683
0
  if (ParseTwoCharToken(state, "cp") && ParseSimpleId(state) &&
1684
0
      ZeroOrMore(ParseExpression, state) && ParseOneCharToken(state, 'E')) {
1685
0
    return true;
1686
0
  }
1687
0
  state->parse_state = copy;
1688
1689
  // Function-param expression (level 0).
1690
0
  if (ParseTwoCharToken(state, "fp") && Optional(ParseCVQualifiers(state)) &&
1691
0
      Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
1692
0
    return true;
1693
0
  }
1694
0
  state->parse_state = copy;
1695
1696
  // Function-param expression (level 1+).
1697
0
  if (ParseTwoCharToken(state, "fL") && Optional(ParseNumber(state, nullptr)) &&
1698
0
      ParseOneCharToken(state, 'p') && Optional(ParseCVQualifiers(state)) &&
1699
0
      Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
1700
0
    return true;
1701
0
  }
1702
0
  state->parse_state = copy;
1703
1704
  // Parse the conversion expressions jointly to avoid re-parsing the <type> in
1705
  // their common prefix.  Parsed as:
1706
  // <expression> ::= cv <type> <conversion-args>
1707
  // <conversion-args> ::= _ <expression>* E
1708
  //                   ::= <expression>
1709
  //
1710
  // Also don't try ParseOperatorName after seeing "cv", since ParseOperatorName
1711
  // also needs to accept "cv <type>" in other contexts.
1712
0
  if (ParseTwoCharToken(state, "cv")) {
1713
0
    if (ParseType(state)) {
1714
0
      ParseState copy2 = state->parse_state;
1715
0
      if (ParseOneCharToken(state, '_') && ZeroOrMore(ParseExpression, state) &&
1716
0
          ParseOneCharToken(state, 'E')) {
1717
0
        return true;
1718
0
      }
1719
0
      state->parse_state = copy2;
1720
0
      if (ParseExpression(state)) {
1721
0
        return true;
1722
0
      }
1723
0
    }
1724
0
  } else {
1725
    // Parse unary, binary, and ternary operator expressions jointly, taking
1726
    // care not to re-parse subexpressions repeatedly. Parse like:
1727
    //   <expression> ::= <operator-name> <expression>
1728
    //                    [<one-to-two-expressions>]
1729
    //   <one-to-two-expressions> ::= <expression> [<expression>]
1730
0
    int arity = -1;
1731
0
    if (ParseOperatorName(state, &arity) &&
1732
0
        arity > 0 &&  // 0 arity => disabled.
1733
0
        (arity < 3 || ParseExpression(state)) &&
1734
0
        (arity < 2 || ParseExpression(state)) &&
1735
0
        (arity < 1 || ParseExpression(state))) {
1736
0
      return true;
1737
0
    }
1738
0
  }
1739
0
  state->parse_state = copy;
1740
1741
  // sizeof type
1742
0
  if (ParseTwoCharToken(state, "st") && ParseType(state)) {
1743
0
    return true;
1744
0
  }
1745
0
  state->parse_state = copy;
1746
1747
  // Object and pointer member access expressions.
1748
0
  if ((ParseTwoCharToken(state, "dt") || ParseTwoCharToken(state, "pt")) &&
1749
0
      ParseExpression(state) && ParseType(state)) {
1750
0
    return true;
1751
0
  }
1752
0
  state->parse_state = copy;
1753
1754
  // Pointer-to-member access expressions.  This parses the same as a binary
1755
  // operator, but it's implemented separately because "ds" shouldn't be
1756
  // accepted in other contexts that parse an operator name.
1757
0
  if (ParseTwoCharToken(state, "ds") && ParseExpression(state) &&
1758
0
      ParseExpression(state)) {
1759
0
    return true;
1760
0
  }
1761
0
  state->parse_state = copy;
1762
1763
  // Parameter pack expansion
1764
0
  if (ParseTwoCharToken(state, "sp") && ParseExpression(state)) {
1765
0
    return true;
1766
0
  }
1767
0
  state->parse_state = copy;
1768
1769
0
  return ParseUnresolvedName(state);
1770
0
}
1771
1772
// <expr-primary> ::= L <type> <(value) number> E
1773
//                ::= L <type> <(value) float> E
1774
//                ::= L <mangled-name> E
1775
//                // A bug in g++'s C++ ABI version 2 (-fabi-version=2).
1776
//                ::= LZ <encoding> E
1777
//
1778
// Warning, subtle: the "bug" LZ production above is ambiguous with the first
1779
// production where <type> starts with <local-name>, which can lead to
1780
// exponential backtracking in two scenarios:
1781
//
1782
// - When whatever follows the E in the <local-name> in the first production is
1783
//   not a name, we backtrack the whole <encoding> and re-parse the whole thing.
1784
//
1785
// - When whatever follows the <local-name> in the first production is not a
1786
//   number and this <expr-primary> may be followed by a name, we backtrack the
1787
//   <name> and re-parse it.
1788
//
1789
// Moreover this ambiguity isn't always resolved -- for example, the following
1790
// has two different parses:
1791
//
1792
//   _ZaaILZ4aoeuE1x1EvE
1793
//   => operator&&<aoeu, x, E, void>
1794
//   => operator&&<(aoeu::x)(1), void>
1795
//
1796
// To resolve this, we just do what GCC's demangler does, and refuse to parse
1797
// casts to <local-name> types.
1798
0
static bool ParseExprPrimary(State *state) {
1799
0
  ComplexityGuard guard(state);
1800
0
  if (guard.IsTooComplex()) return false;
1801
0
  ParseState copy = state->parse_state;
1802
1803
  // The "LZ" special case: if we see LZ, we commit to accept "LZ <encoding> E"
1804
  // or fail, no backtracking.
1805
0
  if (ParseTwoCharToken(state, "LZ")) {
1806
0
    if (ParseEncoding(state) && ParseOneCharToken(state, 'E')) {
1807
0
      return true;
1808
0
    }
1809
1810
0
    state->parse_state = copy;
1811
0
    return false;
1812
0
  }
1813
1814
  // The merged cast production.
1815
0
  if (ParseOneCharToken(state, 'L') && ParseType(state) &&
1816
0
      ParseExprCastValue(state)) {
1817
0
    return true;
1818
0
  }
1819
0
  state->parse_state = copy;
1820
1821
0
  if (ParseOneCharToken(state, 'L') && ParseMangledName(state) &&
1822
0
      ParseOneCharToken(state, 'E')) {
1823
0
    return true;
1824
0
  }
1825
0
  state->parse_state = copy;
1826
1827
0
  return false;
1828
0
}
1829
1830
// <number> or <float>, followed by 'E', as described above ParseExprPrimary.
1831
0
static bool ParseExprCastValue(State *state) {
1832
0
  ComplexityGuard guard(state);
1833
0
  if (guard.IsTooComplex()) return false;
1834
  // We have to be able to backtrack after accepting a number because we could
1835
  // have e.g. "7fffE", which will accept "7" as a number but then fail to find
1836
  // the 'E'.
1837
0
  ParseState copy = state->parse_state;
1838
0
  if (ParseNumber(state, nullptr) && ParseOneCharToken(state, 'E')) {
1839
0
    return true;
1840
0
  }
1841
0
  state->parse_state = copy;
1842
1843
0
  if (ParseFloatNumber(state) && ParseOneCharToken(state, 'E')) {
1844
0
    return true;
1845
0
  }
1846
0
  state->parse_state = copy;
1847
1848
0
  return false;
1849
0
}
1850
1851
// <local-name> ::= Z <(function) encoding> E <(entity) name> [<discriminator>]
1852
//              ::= Z <(function) encoding> E s [<discriminator>]
1853
//
1854
// Parsing a common prefix of these two productions together avoids an
1855
// exponential blowup of backtracking.  Parse like:
1856
//   <local-name> := Z <encoding> E <local-name-suffix>
1857
//   <local-name-suffix> ::= s [<discriminator>]
1858
//                       ::= <name> [<discriminator>]
1859
1860
0
static bool ParseLocalNameSuffix(State *state) {
1861
0
  ComplexityGuard guard(state);
1862
0
  if (guard.IsTooComplex()) return false;
1863
1864
0
  if (MaybeAppend(state, "::") && ParseName(state) &&
1865
0
      Optional(ParseDiscriminator(state))) {
1866
0
    return true;
1867
0
  }
1868
1869
  // Since we're not going to overwrite the above "::" by re-parsing the
1870
  // <encoding> (whose trailing '\0' byte was in the byte now holding the
1871
  // first ':'), we have to rollback the "::" if the <name> parse failed.
1872
0
  if (state->parse_state.append) {
1873
0
    state->out[state->parse_state.out_cur_idx - 2] = '\0';
1874
0
  }
1875
1876
0
  return ParseOneCharToken(state, 's') && Optional(ParseDiscriminator(state));
1877
0
}
1878
1879
0
static bool ParseLocalName(State *state) {
1880
0
  ComplexityGuard guard(state);
1881
0
  if (guard.IsTooComplex()) return false;
1882
0
  ParseState copy = state->parse_state;
1883
0
  if (ParseOneCharToken(state, 'Z') && ParseEncoding(state) &&
1884
0
      ParseOneCharToken(state, 'E') && ParseLocalNameSuffix(state)) {
1885
0
    return true;
1886
0
  }
1887
0
  state->parse_state = copy;
1888
0
  return false;
1889
0
}
1890
1891
// <discriminator> := _ <(non-negative) number>
1892
0
static bool ParseDiscriminator(State *state) {
1893
0
  ComplexityGuard guard(state);
1894
0
  if (guard.IsTooComplex()) return false;
1895
0
  ParseState copy = state->parse_state;
1896
0
  if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr)) {
1897
0
    return true;
1898
0
  }
1899
0
  state->parse_state = copy;
1900
0
  return false;
1901
0
}
1902
1903
// <substitution> ::= S_
1904
//                ::= S <seq-id> _
1905
//                ::= St, etc.
1906
//
1907
// "St" is special in that it's not valid as a standalone name, and it *is*
1908
// allowed to precede a name without being wrapped in "N...E".  This means that
1909
// if we accept it on its own, we can accept "St1a" and try to parse
1910
// template-args, then fail and backtrack, accept "St" on its own, then "1a" as
1911
// an unqualified name and re-parse the same template-args.  To block this
1912
// exponential backtracking, we disable it with 'accept_std=false' in
1913
// problematic contexts.
1914
0
static bool ParseSubstitution(State *state, bool accept_std) {
1915
0
  ComplexityGuard guard(state);
1916
0
  if (guard.IsTooComplex()) return false;
1917
0
  if (ParseTwoCharToken(state, "S_")) {
1918
0
    MaybeAppend(state, "?");  // We don't support substitutions.
1919
0
    return true;
1920
0
  }
1921
1922
0
  ParseState copy = state->parse_state;
1923
0
  if (ParseOneCharToken(state, 'S') && ParseSeqId(state) &&
1924
0
      ParseOneCharToken(state, '_')) {
1925
0
    MaybeAppend(state, "?");  // We don't support substitutions.
1926
0
    return true;
1927
0
  }
1928
0
  state->parse_state = copy;
1929
1930
  // Expand abbreviations like "St" => "std".
1931
0
  if (ParseOneCharToken(state, 'S')) {
1932
0
    const AbbrevPair *p;
1933
0
    for (p = kSubstitutionList; p->abbrev != nullptr; ++p) {
1934
0
      if (RemainingInput(state)[0] == p->abbrev[1] &&
1935
0
          (accept_std || p->abbrev[1] != 't')) {
1936
0
        MaybeAppend(state, "std");
1937
0
        if (p->real_name[0] != '\0') {
1938
0
          MaybeAppend(state, "::");
1939
0
          MaybeAppend(state, p->real_name);
1940
0
        }
1941
0
        ++state->parse_state.mangled_idx;
1942
0
        return true;
1943
0
      }
1944
0
    }
1945
0
  }
1946
0
  state->parse_state = copy;
1947
0
  return false;
1948
0
}
1949
1950
// Parse <mangled-name>, optionally followed by either a function-clone suffix
1951
// or version suffix.  Returns true only if all of "mangled_cur" was consumed.
1952
0
static bool ParseTopLevelMangledName(State *state) {
1953
0
  ComplexityGuard guard(state);
1954
0
  if (guard.IsTooComplex()) return false;
1955
0
  if (ParseMangledName(state)) {
1956
0
    if (RemainingInput(state)[0] != '\0') {
1957
      // Drop trailing function clone suffix, if any.
1958
0
      if (IsFunctionCloneSuffix(RemainingInput(state))) {
1959
0
        return true;
1960
0
      }
1961
      // Append trailing version suffix if any.
1962
      // ex. _Z3foo@@GLIBCXX_3.4
1963
0
      if (RemainingInput(state)[0] == '@') {
1964
0
        MaybeAppend(state, RemainingInput(state));
1965
0
        return true;
1966
0
      }
1967
0
      return false;  // Unconsumed suffix.
1968
0
    }
1969
0
    return true;
1970
0
  }
1971
0
  return false;
1972
0
}
1973
1974
0
static bool Overflowed(const State *state) {
1975
0
  return state->parse_state.out_cur_idx >= state->out_end_idx;
1976
0
}
1977
1978
// The demangler entry point.
1979
0
bool Demangle(const char* mangled, char* out, size_t out_size) {
1980
0
  State state;
1981
0
  InitState(&state, mangled, out, out_size);
1982
0
  return ParseTopLevelMangledName(&state) && !Overflowed(&state) &&
1983
0
         state.parse_state.out_cur_idx > 0;
1984
0
}
1985
1986
}  // namespace debugging_internal
1987
ABSL_NAMESPACE_END
1988
}  // namespace absl