/src/abseil-cpp/absl/debugging/internal/demangle_rust.cc

Source (jump to first uncovered line)
// Copyright 2024 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "absl/debugging/internal/demangle_rust.h"

#include <cstddef>
#include <cstdint>
#include <cstring>
#include <limits>

#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/debugging/internal/decode_rust_punycode.h"

namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {

namespace {

// Same step limit as the C++ demangler in demangle.cc uses.
constexpr int kMaxReturns = 1 << 17;

bool IsDigit(char c) { return '0' <= c && c <= '9'; }
bool IsLower(char c) { return 'a' <= c && c <= 'z'; }
bool IsUpper(char c) { return 'A' <= c && c <= 'Z'; }
bool IsAlpha(char c) { return IsLower(c) || IsUpper(c); }
bool IsIdentifierChar(char c) { return IsAlpha(c) || IsDigit(c) || c == '_'; }
bool IsLowerHexDigit(char c) { return IsDigit(c) || ('a' <= c && c <= 'f'); }

const char* BasicTypeName(char c) {
  switch (c) {
    case 'a': return "i8";
    case 'b': return "bool";
    case 'c': return "char";
    case 'd': return "f64";
    case 'e': return "str";
    case 'f': return "f32";
    case 'h': return "u8";
    case 'i': return "isize";
    case 'j': return "usize";
    case 'l': return "i32";
    case 'm': return "u32";
    case 'n': return "i128";
    case 'o': return "u128";
    case 'p': return "_";
    case 's': return "i16";
    case 't': return "u16";
    case 'u': return "()";
    case 'v': return "...";
    case 'x': return "i64";
    case 'y': return "u64";
    case 'z': return "!";
  }
  return nullptr;
}

// Parser for Rust symbol mangling v0, whose grammar is defined here:
//
// https://doc.rust-lang.org/rustc/symbol-mangling/v0.html#symbol-grammar-summary
class RustSymbolParser {
 public:
  // Prepares to demangle the given encoding, a Rust symbol name starting with
  // _R, into the output buffer [out, out_end).  The caller is expected to
  // continue by calling the new object's Parse function.
  RustSymbolParser(const char* encoding, char* out, char* const out_end)
      : encoding_(encoding), out_(out), out_end_(out_end) {
    if (out_ != out_end_) *out_ = '\0';
  }

  // Parses the constructor's encoding argument, writing output into the range
  // [out, out_end).  Returns true on success and false for input whose
  // structure was not recognized or exceeded implementation limits, such as by
  // nesting structures too deep.  In either case *this should not be used
  // again.
  ABSL_MUST_USE_RESULT bool Parse() && {
    // Recursively parses the grammar production named by callee, then resumes
    // execution at the next statement.
    //
    // Recursive-descent parsing is a beautifully readable translation of a
    // grammar, but it risks stack overflow if implemented by naive recursion on
    // the C++ call stack.  So we simulate recursion by goto and switch instead,
    // keeping a bounded stack of "return addresses" in the recursion_stack_
    // member.
    //
    // The callee argument is a statement label.  We goto that label after
    // saving the "return address" on recursion_stack_.  The next continue
    // statement in the for loop below "returns" from this "call".
    //
    // The caller argument names the return point.  Each value of caller must
    // appear in only one ABSL_DEMANGLER_RECURSE call and be listed in the
    // definition of enum ReturnAddress.  The switch implements the control
    // transfer from the end of a "called" subroutine back to the statement
    // after the "call".
    //
    // Note that not all the grammar productions have to be packed into the
    // switch, but only those which appear in a cycle in the grammar.  Anything
    // acyclic can be written as ordinary functions and function calls, e.g.,
    // ParseIdentifier.
#define ABSL_DEMANGLER_RECURSE(callee, caller) \
    do { \
      if (recursion_depth_ == kStackSize) return false; \
      /* The next continue will switch on this saved value ... */ \
      recursion_stack_[recursion_depth_++] = caller; \
      goto callee; \
      /* ... and will land here, resuming the suspended code. */ \
      case caller: {} \
    } while (0)

    // Parse the encoding, counting completed recursive calls to guard against
    // excessively complex input and infinite-loop bugs.
    int iter = 0;
    goto whole_encoding;
    for (; iter < kMaxReturns && recursion_depth_ > 0; ++iter) {
      // This switch resumes the code path most recently suspended by
      // ABSL_DEMANGLER_RECURSE.
      switch (recursion_stack_[--recursion_depth_]) {
        //
        // symbol-name ->
        // _R decimal-number? path instantiating-crate? vendor-specific-suffix?
        whole_encoding:
          if (!Eat('_') || !Eat('R')) return false;
          // decimal-number? is always empty today, so proceed to path, which
          // can't start with a decimal digit.
          ABSL_DEMANGLER_RECURSE(path, kInstantiatingCrate);
          if (IsAlpha(Peek())) {
            ++silence_depth_;  // Print nothing more from here on.
            ABSL_DEMANGLER_RECURSE(path, kVendorSpecificSuffix);
          }
          switch (Take()) {
            case '.': case '$': case '\0': return true;
          }
          return false;  // unexpected trailing content

        // path -> crate-root | inherent-impl | trait-impl | trait-definition |
        //         nested-path | generic-args | backref
        //
        // Note that ABSL_DEMANGLER_RECURSE does not work inside a nested switch
        // (which would hide the generated case label).  Thus we jump out of the
        // inner switch with gotos before performing any fake recursion.
        path:
          switch (Take()) {
            case 'C': goto crate_root;
            case 'M': goto inherent_impl;
            case 'X': goto trait_impl;
            case 'Y': goto trait_definition;
            case 'N': goto nested_path;
            case 'I': goto generic_args;
            case 'B': goto path_backref;
            default: return false;
          }

        // crate-root -> C identifier (C consumed above)
        crate_root:
          if (!ParseIdentifier()) return false;
          continue;

        // inherent-impl -> M impl-path type (M already consumed)
        inherent_impl:
          if (!Emit("<")) return false;
          ABSL_DEMANGLER_RECURSE(impl_path, kInherentImplType);
          ABSL_DEMANGLER_RECURSE(type, kInherentImplEnding);
          if (!Emit(">")) return false;
          continue;

        // trait-impl -> X impl-path type path (X already consumed)
        trait_impl:
          if (!Emit("<")) return false;
          ABSL_DEMANGLER_RECURSE(impl_path, kTraitImplType);
          ABSL_DEMANGLER_RECURSE(type, kTraitImplInfix);
          if (!Emit(" as ")) return false;
          ABSL_DEMANGLER_RECURSE(path, kTraitImplEnding);
          if (!Emit(">")) return false;
          continue;

        // impl-path -> disambiguator? path (but never print it!)
        impl_path:
          ++silence_depth_;
          {
            int ignored_disambiguator;
            if (!ParseDisambiguator(ignored_disambiguator)) return false;
          }
          ABSL_DEMANGLER_RECURSE(path, kImplPathEnding);
          --silence_depth_;
          continue;

        // trait-definition -> Y type path (Y already consumed)
        trait_definition:
          if (!Emit("<")) return false;
          ABSL_DEMANGLER_RECURSE(type, kTraitDefinitionInfix);
          if (!Emit(" as ")) return false;
          ABSL_DEMANGLER_RECURSE(path, kTraitDefinitionEnding);
          if (!Emit(">")) return false;
          continue;

        // nested-path -> N namespace path identifier (N already consumed)
        // namespace -> lower | upper
        nested_path:
          // Uppercase namespaces must be saved on a stack so we can print
          // ::{closure#0} or ::{shim:vtable#0} or ::{X:name#0} as needed.
          if (IsUpper(Peek())) {
            if (!PushNamespace(Take())) return false;
            ABSL_DEMANGLER_RECURSE(path, kIdentifierInUppercaseNamespace);
            if (!Emit("::")) return false;
            if (!ParseIdentifier(PopNamespace())) return false;
            continue;
          }

          // Lowercase namespaces, however, are never represented in the output;
          // they all emit just ::name.
          if (IsLower(Take())) {
            ABSL_DEMANGLER_RECURSE(path, kIdentifierInLowercaseNamespace);
            if (!Emit("::")) return false;
            if (!ParseIdentifier()) return false;
            continue;
          }

          // Neither upper or lower
          return false;

        // type -> basic-type | array-type | slice-type | tuple-type |
        //         ref-type | mut-ref-type | const-ptr-type | mut-ptr-type |
        //         fn-type | dyn-trait-type | path | backref
        //
        // We use ifs instead of switch (Take()) because the default case jumps
        // to path, which will need to see the first character not yet Taken
        // from the input.  Because we do not use a nested switch here,
        // ABSL_DEMANGLER_RECURSE works fine in the 'S' case.
        type:
          if (IsLower(Peek())) {
            const char* type_name = BasicTypeName(Take());
            if (type_name == nullptr || !Emit(type_name)) return false;
            continue;
          }
          if (Eat('A')) {
            // array-type = A type const
            if (!Emit("[")) return false;
            ABSL_DEMANGLER_RECURSE(type, kArraySize);
            if (!Emit("; ")) return false;
            ABSL_DEMANGLER_RECURSE(constant, kFinishArray);
            if (!Emit("]")) return false;
            continue;
          }
          if (Eat('S')) {
            if (!Emit("[")) return false;
            ABSL_DEMANGLER_RECURSE(type, kSliceEnding);
            if (!Emit("]")) return false;
            continue;
          }
          if (Eat('T')) goto tuple_type;
          if (Eat('R')) {
            if (!Emit("&")) return false;
            if (!ParseOptionalLifetime()) return false;
            goto type;
          }
          if (Eat('Q')) {
            if (!Emit("&mut ")) return false;
            if (!ParseOptionalLifetime()) return false;
            goto type;
          }
          if (Eat('P')) {
            if (!Emit("*const ")) return false;
            goto type;
          }
          if (Eat('O')) {
            if (!Emit("*mut ")) return false;
            goto type;
          }
          if (Eat('F')) goto fn_type;
          if (Eat('D')) goto dyn_trait_type;
          if (Eat('B')) goto type_backref;
          goto path;

        // tuple-type -> T type* E (T already consumed)
        tuple_type:
          if (!Emit("(")) return false;

          // The toolchain should call the unit type u instead of TE, but the
          // grammar and other demanglers also recognize TE, so we do too.
          if (Eat('E')) {
            if (!Emit(")")) return false;
            continue;
          }

          // A tuple with one element is rendered (type,) instead of (type).
          ABSL_DEMANGLER_RECURSE(type, kAfterFirstTupleElement);
          if (Eat('E')) {
            if (!Emit(",)")) return false;
            continue;
          }

          // A tuple with two elements is of course (x, y).
          if (!Emit(", ")) return false;
          ABSL_DEMANGLER_RECURSE(type, kAfterSecondTupleElement);
          if (Eat('E')) {
            if (!Emit(")")) return false;
            continue;
          }

          // And (x, y, z) for three elements.
          if (!Emit(", ")) return false;
          ABSL_DEMANGLER_RECURSE(type, kAfterThirdTupleElement);
          if (Eat('E')) {
            if (!Emit(")")) return false;
            continue;
          }

          // For longer tuples we write (x, y, z, ...), printing none of the
          // content of the fourth and later types.  Thus we avoid exhausting
          // output buffers and human readers' patience when some library has a
          // long tuple as an implementation detail, without having to
          // completely obfuscate all tuples.
          if (!Emit(", ...)")) return false;
          ++silence_depth_;
          while (!Eat('E')) {
            ABSL_DEMANGLER_RECURSE(type, kAfterSubsequentTupleElement);
          }
          --silence_depth_;
          continue;

        // fn-type -> F fn-sig (F already consumed)
        // fn-sig -> binder? U? (K abi)? type* E type
        // abi -> C | undisambiguated-identifier
        //
        // We follow the C++ demangler in suppressing details of function
        // signatures.  Every function type is rendered "fn...".
        fn_type:
          if (!Emit("fn...")) return false;
          ++silence_depth_;
          if (!ParseOptionalBinder()) return false;
          (void)Eat('U');
          if (Eat('K')) {
            if (!Eat('C') && !ParseUndisambiguatedIdentifier()) return false;
          }
          while (!Eat('E')) {
            ABSL_DEMANGLER_RECURSE(type, kContinueParameterList);
          }
          ABSL_DEMANGLER_RECURSE(type, kFinishFn);
          --silence_depth_;
          continue;

        // dyn-trait-type -> D dyn-bounds lifetime (D already consumed)
        // dyn-bounds -> binder? dyn-trait* E
        //
        // The grammar strangely allows an empty trait list, even though the
        // compiler should never output one.  We follow existing demanglers in
        // rendering DEL_ as "dyn ".
        //
        // Because auto traits lengthen a type name considerably without
        // providing much value to a search for related source code, it would be
        // desirable to abbreviate
        //     dyn main::Trait + std::marker::Copy + std::marker::Send
        // to
        //     dyn main::Trait + ...,
        // eliding the auto traits.  But it is difficult to do so correctly, in
        // part because there is no guarantee that the mangling will list the
        // main trait first.  So we just print all the traits in their order of
        // appearance in the mangled name.
        dyn_trait_type:
          if (!Emit("dyn ")) return false;
          if (!ParseOptionalBinder()) return false;
          if (!Eat('E')) {
            ABSL_DEMANGLER_RECURSE(dyn_trait, kBeginAutoTraits);
            while (!Eat('E')) {
              if (!Emit(" + ")) return false;
              ABSL_DEMANGLER_RECURSE(dyn_trait, kContinueAutoTraits);
            }
          }
          if (!ParseRequiredLifetime()) return false;
          continue;

        // dyn-trait -> path dyn-trait-assoc-binding*
        // dyn-trait-assoc-binding -> p undisambiguated-identifier type
        //
        // We render nonempty binding lists as <>, omitting their contents as
        // for generic-args.
        dyn_trait:
          ABSL_DEMANGLER_RECURSE(path, kContinueDynTrait);
          if (Peek() == 'p') {
            if (!Emit("<>")) return false;
            ++silence_depth_;
            while (Eat('p')) {
              if (!ParseUndisambiguatedIdentifier()) return false;
              ABSL_DEMANGLER_RECURSE(type, kContinueAssocBinding);
            }
            --silence_depth_;
          }
          continue;

        // const -> type const-data | p | backref
        //
        // const is a C++ keyword, so we use the label `constant` instead.
        constant:
          if (Eat('B')) goto const_backref;
          if (Eat('p')) {
            if (!Emit("_")) return false;
            continue;
          }

          // Scan the type without printing it.
          //
          // The Rust language restricts the type of a const generic argument
          // much more than the mangling grammar does.  We do not enforce this.
          //
          // We also do not bother printing false, true, 'A', and '\u{abcd}' for
          // the types bool and char.  Because we do not print generic-args
          // contents, we expect to print constants only in array sizes, and
          // those should not be bool or char.
          ++silence_depth_;
          ABSL_DEMANGLER_RECURSE(type, kConstData);
          --silence_depth_;

          // const-data -> n? hex-digit* _
          //
          // Although the grammar doesn't say this, existing demanglers expect
          // that zero is 0, not an empty digit sequence, and no nonzero value
          // may have leading zero digits.  Also n0_ is accepted and printed as
          // -0, though a toolchain will probably never write that encoding.
          if (Eat('n') && !EmitChar('-')) return false;
          if (!Emit("0x")) return false;
          if (Eat('0')) {
            if (!EmitChar('0')) return false;
            if (!Eat('_')) return false;
            continue;
          }
          while (IsLowerHexDigit(Peek())) {
            if (!EmitChar(Take())) return false;
          }
          if (!Eat('_')) return false;
          continue;

        // generic-args -> I path generic-arg* E (I already consumed)
        //
        // We follow the C++ demangler in omitting all the arguments from the
        // output, printing only the list opening and closing tokens.
        generic_args:
          ABSL_DEMANGLER_RECURSE(path, kBeginGenericArgList);
          if (!Emit("::<>")) return false;
          ++silence_depth_;
          while (!Eat('E')) {
            ABSL_DEMANGLER_RECURSE(generic_arg, kContinueGenericArgList);
          }
          --silence_depth_;
          continue;

        // generic-arg -> lifetime | type | K const
        generic_arg:
          if (Peek() == 'L') {
            if (!ParseOptionalLifetime()) return false;
            continue;
          }
          if (Eat('K')) goto constant;
          goto type;

        // backref -> B base-62-number (B already consumed)
        //
        // The BeginBackref call parses and range-checks the base-62-number.  We
        // always do that much.
        //
        // The recursive call parses and prints what the backref points at.  We
        // save CPU and stack by skipping this work if the output would be
        // suppressed anyway.
        path_backref:
          if (!BeginBackref()) return false;
          if (silence_depth_ == 0) {
            ABSL_DEMANGLER_RECURSE(path, kPathBackrefEnding);
          }
          EndBackref();
          continue;

        // This represents the same backref production as in path_backref but
        // parses the target as a type instead of a path.
        type_backref:
          if (!BeginBackref()) return false;
          if (silence_depth_ == 0) {
            ABSL_DEMANGLER_RECURSE(type, kTypeBackrefEnding);
          }
          EndBackref();
          continue;

        const_backref:
          if (!BeginBackref()) return false;
          if (silence_depth_ == 0) {
            ABSL_DEMANGLER_RECURSE(constant, kConstantBackrefEnding);
          }
          EndBackref();
          continue;
      }
    }

    return false;  // hit iteration limit or a bug in our stack handling
  }

 private:
  // Enumerates resumption points for ABSL_DEMANGLER_RECURSE calls.
  enum ReturnAddress : uint8_t {
    kInstantiatingCrate,
    kVendorSpecificSuffix,
    kIdentifierInUppercaseNamespace,
    kIdentifierInLowercaseNamespace,
    kInherentImplType,
    kInherentImplEnding,
    kTraitImplType,
    kTraitImplInfix,
    kTraitImplEnding,
    kImplPathEnding,
    kTraitDefinitionInfix,
    kTraitDefinitionEnding,
    kArraySize,
    kFinishArray,
    kSliceEnding,
    kAfterFirstTupleElement,
    kAfterSecondTupleElement,
    kAfterThirdTupleElement,
    kAfterSubsequentTupleElement,
    kContinueParameterList,
    kFinishFn,
    kBeginAutoTraits,
    kContinueAutoTraits,
    kContinueDynTrait,
    kContinueAssocBinding,
    kConstData,
    kBeginGenericArgList,
    kContinueGenericArgList,
    kPathBackrefEnding,
    kTypeBackrefEnding,
    kConstantBackrefEnding,
  };

  // Element counts for the stack arrays.  Larger stack sizes accommodate more
  // deeply nested names at the cost of a larger footprint on the C++ call
  // stack.
  enum {
    // Maximum recursive calls outstanding at one time.
    kStackSize = 256,

    // Maximum N<uppercase> nested-paths open at once.  We do not expect
    // closures inside closures inside closures as much as functions inside
    // modules inside other modules, so we can use a smaller array here.
    kNamespaceStackSize = 64,

    // Maximum number of nested backrefs.  We can keep this stack pretty small
    // because we do not follow backrefs inside generic-args or other contexts
    // that suppress printing, so deep stacking is unlikely in practice.
    kPositionStackSize = 16,
  };

  // Returns the next input character without consuming it.
  char Peek() const { return encoding_[pos_]; }

  // Consumes and returns the next input character.
  char Take() { return encoding_[pos_++]; }

  // If the next input character is the given character, consumes it and returns
  // true; otherwise returns false without consuming a character.
  ABSL_MUST_USE_RESULT bool Eat(char want) {
    if (encoding_[pos_] != want) return false;
    ++pos_;
    return true;
  }

  // Provided there is enough remaining output space, appends c to the output,
  // writing a fresh NUL terminator afterward, and returns true.  Returns false
  // if the output buffer had less than two bytes free.
  ABSL_MUST_USE_RESULT bool EmitChar(char c) {
    if (silence_depth_ > 0) return true;
    if (out_end_ - out_ < 2) return false;
    *out_++ = c;
    *out_ = '\0';
    return true;
  }

  // Provided there is enough remaining output space, appends the C string token
  // to the output, followed by a NUL character, and returns true.  Returns
  // false if not everything fit into the output buffer.
  ABSL_MUST_USE_RESULT bool Emit(const char* token) {
    if (silence_depth_ > 0) return true;
    const size_t token_length = std::strlen(token);
    const size_t bytes_to_copy = token_length + 1;  // token and final NUL
    if (static_cast<size_t>(out_end_ - out_) < bytes_to_copy) return false;
    std::memcpy(out_, token, bytes_to_copy);
    out_ += token_length;
    return true;
  }

  // Provided there is enough remaining output space, appends the decimal form
  // of disambiguator (if it's nonnegative) or "?" (if it's negative) to the
  // output, followed by a NUL character, and returns true.  Returns false if
  // not everything fit into the output buffer.
  ABSL_MUST_USE_RESULT bool EmitDisambiguator(int disambiguator) {
    if (disambiguator < 0) return EmitChar('?');  // parsed but too large
    if (disambiguator == 0) return EmitChar('0');
    // Convert disambiguator to decimal text.  Three digits per byte is enough
    // because 999 > 256.  The bound will remain correct even if future
    // maintenance changes the type of the disambiguator variable.
    char digits[3 * sizeof(disambiguator)] = {};
    size_t leading_digit_index = sizeof(digits) - 1;
    for (; disambiguator > 0; disambiguator /= 10) {
      digits[--leading_digit_index] =
          static_cast<char>('0' + disambiguator % 10);
    }
    return Emit(digits + leading_digit_index);
  }

  // Consumes an optional disambiguator (s123_) from the input.
  //
  // On success returns true and fills value with the encoded value if it was
  // not too big, otherwise with -1.  If the optional disambiguator was omitted,
  // value is 0.  On parse failure returns false and sets value to -1.
  ABSL_MUST_USE_RESULT bool ParseDisambiguator(int& value) {
    value = -1;

    // disambiguator = s base-62-number
    //
    // Disambiguators are optional.  An omitted disambiguator is zero.
    if (!Eat('s')) {
      value = 0;
      return true;
    }
    int base_62_value = 0;
    if (!ParseBase62Number(base_62_value)) return false;
    value = base_62_value < 0 ? -1 : base_62_value + 1;
    return true;
  }

  // Consumes a base-62 number like _ or 123_ from the input.
  //
  // On success returns true and fills value with the encoded value if it was
  // not too big, otherwise with -1.  On parse failure returns false and sets
  // value to -1.
  ABSL_MUST_USE_RESULT bool ParseBase62Number(int& value) {
    value = -1;

    // base-62-number = (digit | lower | upper)* _
    //
    // An empty base-62 digit sequence means 0.
    if (Eat('_')) {
      value = 0;
      return true;
    }

    // A nonempty digit sequence denotes its base-62 value plus 1.
    int encoded_number = 0;
    bool overflowed = false;
    while (IsAlpha(Peek()) || IsDigit(Peek())) {
      const char c = Take();
      if (encoded_number >= std::numeric_limits<int>::max()/62) {
        // If we are close to overflowing an int, keep parsing but stop updating
        // encoded_number and remember to return -1 at the end.  The point is to
        // avoid undefined behavior while parsing crate-root disambiguators,
        // which are large in practice but not shown in demangling, while
        // successfully computing closure and shim disambiguators, which are
        // typically small and are printed out.
        overflowed = true;
      } else {
        int digit;
        if (IsDigit(c)) {
          digit = c - '0';
        } else if (IsLower(c)) {
          digit = c - 'a' + 10;
        } else {
          digit = c - 'A' + 36;
        }
        encoded_number = 62 * encoded_number + digit;
      }
    }

    if (!Eat('_')) return false;
    if (!overflowed) value = encoded_number + 1;
    return true;
  }

  // Consumes an identifier from the input, returning true on success.
  //
  // A nonzero uppercase_namespace specifies the character after the N in a
  // nested-identifier, e.g., 'C' for a closure, allowing ParseIdentifier to
  // write out the name with the conventional decoration for that namespace.
  ABSL_MUST_USE_RESULT bool ParseIdentifier(char uppercase_namespace = '\0') {
    // identifier -> disambiguator? undisambiguated-identifier
    int disambiguator = 0;
    if (!ParseDisambiguator(disambiguator)) return false;

    return ParseUndisambiguatedIdentifier(uppercase_namespace, disambiguator);
  }

  // Consumes from the input an identifier with no preceding disambiguator,
  // returning true on success.
  //
  // When ParseIdentifier calls this, it passes the N<namespace> character and
  // disambiguator value so that "{closure#42}" and similar forms can be
  // rendered correctly.
  //
  // At other appearances of undisambiguated-identifier in the grammar, this
  // treatment is not applicable, and the call site omits both arguments.
  ABSL_MUST_USE_RESULT bool ParseUndisambiguatedIdentifier(
      char uppercase_namespace = '\0', int disambiguator = 0) {
    // undisambiguated-identifier -> u? decimal-number _? bytes
    const bool is_punycoded = Eat('u');
    if (!IsDigit(Peek())) return false;
    int num_bytes = 0;
    if (!ParseDecimalNumber(num_bytes)) return false;
    (void)Eat('_');  // optional separator, needed if a digit follows
    if (is_punycoded) {
      DecodeRustPunycodeOptions options;
      options.punycode_begin = &encoding_[pos_];
      options.punycode_end = &encoding_[pos_] + num_bytes;
      options.out_begin = out_;
      options.out_end = out_end_;
      out_ = DecodeRustPunycode(options);
      if (out_ == nullptr) return false;
      pos_ += static_cast<size_t>(num_bytes);
    }

    // Emit the beginnings of braced forms like {shim:vtable#0}.
    if (uppercase_namespace != '\0') {
      switch (uppercase_namespace) {
        case 'C':
          if (!Emit("{closure")) return false;
          break;
        case 'S':
          if (!Emit("{shim")) return false;
          break;
        default:
          if (!EmitChar('{') || !EmitChar(uppercase_namespace)) return false;
          break;
      }
      if (num_bytes > 0 && !Emit(":")) return false;
    }

    // Emit the name itself.
    if (!is_punycoded) {
      for (int i = 0; i < num_bytes; ++i) {
        const char c = Take();
        if (!IsIdentifierChar(c) &&
            // The spec gives toolchains the choice of Punycode or raw UTF-8 for
            // identifiers containing code points above 0x7f, so accept bytes
            // with the high bit set.
            (c & 0x80) == 0) {
          return false;
        }
        if (!EmitChar(c)) return false;
      }
    }

    // Emit the endings of braced forms, e.g., "#42}".
    if (uppercase_namespace != '\0') {
      if (!EmitChar('#')) return false;
      if (!EmitDisambiguator(disambiguator)) return false;
      if (!EmitChar('}')) return false;
    }

    return true;
  }

  // Consumes a decimal number like 0 or 123 from the input.  On success returns
  // true and fills value with the encoded value.  If the encoded value is too
  // large or otherwise unparsable, returns false and sets value to -1.
  ABSL_MUST_USE_RESULT bool ParseDecimalNumber(int& value) {
    value = -1;
    if (!IsDigit(Peek())) return false;
    int encoded_number = Take() - '0';
    if (encoded_number == 0) {
      // Decimal numbers are never encoded with extra leading zeroes.
      value = 0;
      return true;
    }
    while (IsDigit(Peek()) &&
           // avoid overflow
           encoded_number < std::numeric_limits<int>::max()/10) {
      encoded_number = 10 * encoded_number + (Take() - '0');
    }
    if (IsDigit(Peek())) return false;  // too big
    value = encoded_number;
    return true;
  }

  // Consumes a binder of higher-ranked lifetimes if one is present.  On success
  // returns true and discards the encoded lifetime count.  On parse failure
  // returns false.
  ABSL_MUST_USE_RESULT bool ParseOptionalBinder() {
    // binder -> G base-62-number
    if (!Eat('G')) return true;
    int ignored_binding_count;
    return ParseBase62Number(ignored_binding_count);
  }

  // Consumes a lifetime if one is present.
  //
  // On success returns true and discards the lifetime index.  We do not print
  // or even range-check lifetimes because they are a finer detail than other
  // things we omit from output, such as the entire contents of generic-args.
  //
  // On parse failure returns false.
  ABSL_MUST_USE_RESULT bool ParseOptionalLifetime() {
    // lifetime -> L base-62-number
    if (!Eat('L')) return true;
    int ignored_de_bruijn_index;
    return ParseBase62Number(ignored_de_bruijn_index);
  }

  // Consumes a lifetime just like ParseOptionalLifetime, but returns false if
  // there is no lifetime here.
  ABSL_MUST_USE_RESULT bool ParseRequiredLifetime() {
    if (Peek() != 'L') return false;
    return ParseOptionalLifetime();
  }

  // Pushes ns onto the namespace stack and returns true if the stack is not
  // full, else returns false.
  ABSL_MUST_USE_RESULT bool PushNamespace(char ns) {
    if (namespace_depth_ == kNamespaceStackSize) return false;
    namespace_stack_[namespace_depth_++] = ns;
    return true;
  }

  // Pops the last pushed namespace.  Requires that the namespace stack is not
  // empty (namespace_depth_ > 0).
  char PopNamespace() { return namespace_stack_[--namespace_depth_]; }

  // Pushes position onto the position stack and returns true if the stack is
  // not full, else returns false.
  ABSL_MUST_USE_RESULT bool PushPosition(int position) {
    if (position_depth_ == kPositionStackSize) return false;
    position_stack_[position_depth_++] = position;
    return true;
  }

  // Pops the last pushed input position.  Requires that the position stack is
  // not empty (position_depth_ > 0).
  int PopPosition() { return position_stack_[--position_depth_]; }

  // Consumes a base-62-number denoting a backref target, pushes the current
  // input position on the data stack, and sets the input position to the
  // beginning of the backref target.  Returns true on success.  Returns false
  // if parsing failed, the stack is exhausted, or the backref target position
  // is out of range.
  ABSL_MUST_USE_RESULT bool BeginBackref() {
    // backref = B base-62-number (B already consumed)
    //
    // Reject backrefs that don't parse, overflow int, or don't point backward.
    // If the offset looks fine, adjust it to account for the _R prefix.
    int offset = 0;
    const int offset_of_this_backref =
        pos_ - 2 /* _R */ - 1 /* B already consumed */;
    if (!ParseBase62Number(offset) || offset < 0 ||
        offset >= offset_of_this_backref) {
      return false;
    }
    offset += 2;

    // Save the old position to restore later.
    if (!PushPosition(pos_)) return false;

    // Move the input position to the backref target.
    //
    // Note that we do not check whether the new position points to the
    // beginning of a construct matching the context in which the backref
    // appeared.  We just jump to it and see whether nested parsing succeeds.
    // We therefore accept various wrong manglings, e.g., a type backref
    // pointing to an 'l' character inside an identifier, which happens to mean
    // i32 when parsed as a type mangling.  This saves the complexity and RAM
    // footprint of remembering which offsets began which kinds of
    // substructures.  Existing demanglers take similar shortcuts.
    pos_ = offset;
    return true;
  }

  // Cleans up after a backref production by restoring the previous input
  // position from the data stack.
  void EndBackref() { pos_ = PopPosition(); }

  // The leftmost recursion_depth_ elements of recursion_stack_ contain the
  // ReturnAddresses pushed by ABSL_DEMANGLER_RECURSE calls not yet completed.
  ReturnAddress recursion_stack_[kStackSize] = {};
  int recursion_depth_ = 0;

  // The leftmost namespace_depth_ elements of namespace_stack_ contain the
  // uppercase namespace identifiers for open nested-paths, e.g., 'C' for a
  // closure.
  char namespace_stack_[kNamespaceStackSize] = {};
  int namespace_depth_ = 0;

  // The leftmost position_depth_ elements of position_stack_ contain the input
  // positions to return to after fully printing the targets of backrefs.
  int position_stack_[kPositionStackSize] = {};
  int position_depth_ = 0;

  // Anything parsed while silence_depth_ > 0 contributes nothing to the
  // demangled output.  For constructs omitted from the demangling, such as
  // impl-path and the contents of generic-args, we will increment
  // silence_depth_ on the way in and decrement silence_depth_ on the way out.
  int silence_depth_ = 0;

  // Input: encoding_ points to a Rust mangled symbol, and encoding_[pos_] is
  // the next input character to be scanned.
  int pos_ = 0;
  const char* encoding_ = nullptr;

  // Output: *out_ is where the next output character should be written, and
  // out_end_ points past the last byte of available space.
  char* out_ = nullptr;
  char* out_end_ = nullptr;
};

}  // namespace

bool DemangleRustSymbolEncoding(const char* mangled, char* out,
                                size_t out_size) {
  return RustSymbolParser(mangled, out, out + out_size).Parse();
}

}  // namespace debugging_internal
ABSL_NAMESPACE_END
}  // namespace absl

Coverage Report

Created: 2024-09-23 06:29