/src/llvm-project/llvm/lib/Demangle/RustDemangle.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file defines a demangler for Rust v0 mangled symbols as specified in |
10 | | // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "llvm/Demangle/Demangle.h" |
15 | | #include "llvm/Demangle/StringViewExtras.h" |
16 | | #include "llvm/Demangle/Utility.h" |
17 | | |
18 | | #include <algorithm> |
19 | | #include <cassert> |
20 | | #include <cstdint> |
21 | | #include <cstring> |
22 | | #include <limits> |
23 | | #include <string_view> |
24 | | |
25 | | using namespace llvm; |
26 | | |
27 | | using llvm::itanium_demangle::OutputBuffer; |
28 | | using llvm::itanium_demangle::ScopedOverride; |
29 | | using llvm::itanium_demangle::starts_with; |
30 | | |
31 | | namespace { |
32 | | |
33 | | struct Identifier { |
34 | | std::string_view Name; |
35 | | bool Punycode; |
36 | | |
37 | 2.71M | bool empty() const { return Name.empty(); } |
38 | | }; |
39 | | |
40 | | enum class BasicType { |
41 | | Bool, |
42 | | Char, |
43 | | I8, |
44 | | I16, |
45 | | I32, |
46 | | I64, |
47 | | I128, |
48 | | ISize, |
49 | | U8, |
50 | | U16, |
51 | | U32, |
52 | | U64, |
53 | | U128, |
54 | | USize, |
55 | | F32, |
56 | | F64, |
57 | | Str, |
58 | | Placeholder, |
59 | | Unit, |
60 | | Variadic, |
61 | | Never, |
62 | | }; |
63 | | |
64 | | enum class IsInType { |
65 | | No, |
66 | | Yes, |
67 | | }; |
68 | | |
69 | | enum class LeaveGenericsOpen { |
70 | | No, |
71 | | Yes, |
72 | | }; |
73 | | |
74 | | class Demangler { |
75 | | // Maximum recursion level. Used to avoid stack overflow. |
76 | | size_t MaxRecursionLevel; |
77 | | // Current recursion level. |
78 | | size_t RecursionLevel; |
79 | | size_t BoundLifetimes; |
80 | | // Input string that is being demangled with "_R" prefix removed. |
81 | | std::string_view Input; |
82 | | // Position in the input string. |
83 | | size_t Position; |
84 | | // When true, print methods append the output to the stream. |
85 | | // When false, the output is suppressed. |
86 | | bool Print; |
87 | | // True if an error occurred. |
88 | | bool Error; |
89 | | |
90 | | public: |
91 | | // Demangled output. |
92 | | OutputBuffer Output; |
93 | | |
94 | | Demangler(size_t MaxRecursionLevel = 500); |
95 | | |
96 | | bool demangle(std::string_view MangledName); |
97 | | |
98 | | private: |
99 | | bool demanglePath(IsInType Type, |
100 | | LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No); |
101 | | void demangleImplPath(IsInType InType); |
102 | | void demangleGenericArg(); |
103 | | void demangleType(); |
104 | | void demangleFnSig(); |
105 | | void demangleDynBounds(); |
106 | | void demangleDynTrait(); |
107 | | void demangleOptionalBinder(); |
108 | | void demangleConst(); |
109 | | void demangleConstInt(); |
110 | | void demangleConstBool(); |
111 | | void demangleConstChar(); |
112 | | |
113 | 16.1M | template <typename Callable> void demangleBackref(Callable Demangler) { |
114 | 16.1M | uint64_t Backref = parseBase62Number(); |
115 | 16.1M | if (Error || Backref >= Position) { |
116 | 985 | Error = true; |
117 | 985 | return; |
118 | 985 | } |
119 | | |
120 | 16.1M | if (!Print) |
121 | 8.21M | return; |
122 | | |
123 | 7.88M | ScopedOverride<size_t> SavePosition(Position, Position); |
124 | 7.88M | Position = Backref; |
125 | 7.88M | Demangler(); |
126 | 7.88M | } RustDemangle.cpp:void (anonymous namespace)::Demangler::demangleBackref<(anonymous namespace)::Demangler::demangleConst()::$_2>((anonymous namespace)::Demangler::demangleConst()::$_2) Line | Count | Source | 113 | 2.54k | template <typename Callable> void demangleBackref(Callable Demangler) { | 114 | 2.54k | uint64_t Backref = parseBase62Number(); | 115 | 2.54k | if (Error || Backref >= Position) { | 116 | 327 | Error = true; | 117 | 327 | return; | 118 | 327 | } | 119 | | | 120 | 2.21k | if (!Print) | 121 | 558 | return; | 122 | | | 123 | 1.65k | ScopedOverride<size_t> SavePosition(Position, Position); | 124 | 1.65k | Position = Backref; | 125 | 1.65k | Demangler(); | 126 | 1.65k | } |
RustDemangle.cpp:void (anonymous namespace)::Demangler::demangleBackref<(anonymous namespace)::Demangler::demangleType()::$_1>((anonymous namespace)::Demangler::demangleType()::$_1) Line | Count | Source | 113 | 11.8M | template <typename Callable> void demangleBackref(Callable Demangler) { | 114 | 11.8M | uint64_t Backref = parseBase62Number(); | 115 | 11.8M | if (Error || Backref >= Position) { | 116 | 339 | Error = true; | 117 | 339 | return; | 118 | 339 | } | 119 | | | 120 | 11.8M | if (!Print) | 121 | 4.09M | return; | 122 | | | 123 | 7.72M | ScopedOverride<size_t> SavePosition(Position, Position); | 124 | 7.72M | Position = Backref; | 125 | 7.72M | Demangler(); | 126 | 7.72M | } |
RustDemangle.cpp:void (anonymous namespace)::Demangler::demangleBackref<(anonymous namespace)::Demangler::demanglePath((anonymous namespace)::IsInType, (anonymous namespace)::LeaveGenericsOpen)::$_0>((anonymous namespace)::Demangler::demanglePath((anonymous namespace)::IsInType, (anonymous namespace)::LeaveGenericsOpen)::$_0) Line | Count | Source | 113 | 4.27M | template <typename Callable> void demangleBackref(Callable Demangler) { | 114 | 4.27M | uint64_t Backref = parseBase62Number(); | 115 | 4.27M | if (Error || Backref >= Position) { | 116 | 319 | Error = true; | 117 | 319 | return; | 118 | 319 | } | 119 | | | 120 | 4.27M | if (!Print) | 121 | 4.12M | return; | 122 | | | 123 | 151k | ScopedOverride<size_t> SavePosition(Position, Position); | 124 | 151k | Position = Backref; | 125 | 151k | Demangler(); | 126 | 151k | } |
|
127 | | |
128 | | Identifier parseIdentifier(); |
129 | | uint64_t parseOptionalBase62Number(char Tag); |
130 | | uint64_t parseBase62Number(); |
131 | | uint64_t parseDecimalNumber(); |
132 | | uint64_t parseHexNumber(std::string_view &HexDigits); |
133 | | |
134 | | void print(char C); |
135 | | void print(std::string_view S); |
136 | | void printDecimalNumber(uint64_t N); |
137 | | void printBasicType(BasicType); |
138 | | void printLifetime(uint64_t Index); |
139 | | void printIdentifier(Identifier Ident); |
140 | | |
141 | | char look() const; |
142 | | char consume(); |
143 | | bool consumeIf(char Prefix); |
144 | | |
145 | | bool addAssign(uint64_t &A, uint64_t B); |
146 | | bool mulAssign(uint64_t &A, uint64_t B); |
147 | | }; |
148 | | |
149 | | } // namespace |
150 | | |
151 | 8.54k | char *llvm::rustDemangle(std::string_view MangledName) { |
152 | | // Return early if mangled name doesn't look like a Rust symbol. |
153 | 8.54k | if (MangledName.empty() || !starts_with(MangledName, "_R")) |
154 | 40 | return nullptr; |
155 | | |
156 | 8.50k | Demangler D; |
157 | 8.50k | if (!D.demangle(MangledName)) { |
158 | 8.16k | std::free(D.Output.getBuffer()); |
159 | 8.16k | return nullptr; |
160 | 8.16k | } |
161 | | |
162 | 342 | D.Output += '\0'; |
163 | | |
164 | 342 | return D.Output.getBuffer(); |
165 | 8.50k | } |
166 | | |
167 | | Demangler::Demangler(size_t MaxRecursionLevel) |
168 | 8.50k | : MaxRecursionLevel(MaxRecursionLevel) {} |
169 | | |
170 | 1.25G | static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; } |
171 | | |
172 | 23.3M | static inline bool isHexDigit(const char C) { |
173 | 23.3M | return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f'); |
174 | 23.3M | } |
175 | | |
176 | 1.07G | static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; } |
177 | | |
178 | 171M | static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; } |
179 | | |
180 | | /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>. |
181 | 1.05G | static inline bool isValid(const char C) { |
182 | 1.05G | return isDigit(C) || isLower(C) || isUpper(C) || C == '_'; |
183 | 1.05G | } |
184 | | |
185 | | // Demangles Rust v0 mangled symbol. Returns true when successful, and false |
186 | | // otherwise. The demangled symbol is stored in Output field. It is |
187 | | // responsibility of the caller to free the memory behind the output stream. |
188 | | // |
189 | | // <symbol-name> = "_R" <path> [<instantiating-crate>] |
190 | 8.50k | bool Demangler::demangle(std::string_view Mangled) { |
191 | 8.50k | Position = 0; |
192 | 8.50k | Error = false; |
193 | 8.50k | Print = true; |
194 | 8.50k | RecursionLevel = 0; |
195 | 8.50k | BoundLifetimes = 0; |
196 | | |
197 | 8.50k | if (!starts_with(Mangled, "_R")) { |
198 | 0 | Error = true; |
199 | 0 | return false; |
200 | 0 | } |
201 | 8.50k | Mangled.remove_prefix(2); |
202 | 8.50k | size_t Dot = Mangled.find('.'); |
203 | 8.50k | Input = Dot == std::string_view::npos ? Mangled : Mangled.substr(0, Dot); |
204 | | |
205 | 8.50k | demanglePath(IsInType::No); |
206 | | |
207 | 8.50k | if (Position != Input.size()) { |
208 | 2.37k | ScopedOverride<bool> SavePrint(Print, false); |
209 | 2.37k | demanglePath(IsInType::No); |
210 | 2.37k | } |
211 | | |
212 | 8.50k | if (Position != Input.size()) |
213 | 2.35k | Error = true; |
214 | | |
215 | 8.50k | if (Dot != std::string_view::npos) { |
216 | 216 | print(" ("); |
217 | 216 | print(Mangled.substr(Dot)); |
218 | 216 | print(")"); |
219 | 216 | } |
220 | | |
221 | 8.50k | return !Error; |
222 | 8.50k | } |
223 | | |
224 | | // Demangles a path. InType indicates whether a path is inside a type. When |
225 | | // LeaveOpen is true, a closing `>` after generic arguments is omitted from the |
226 | | // output. Return value indicates whether generics arguments have been left |
227 | | // open. |
228 | | // |
229 | | // <path> = "C" <identifier> // crate root |
230 | | // | "M" <impl-path> <type> // <T> (inherent impl) |
231 | | // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl) |
232 | | // | "Y" <type> <path> // <T as Trait> (trait definition) |
233 | | // | "N" <ns> <path> <identifier> // ...::ident (nested path) |
234 | | // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args) |
235 | | // | <backref> |
236 | | // <identifier> = [<disambiguator>] <undisambiguated-identifier> |
237 | | // <ns> = "C" // closure |
238 | | // | "S" // shim |
239 | | // | <A-Z> // other special namespaces |
240 | | // | <a-z> // internal namespaces |
241 | 31.5M | bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) { |
242 | 31.5M | if (Error || RecursionLevel >= MaxRecursionLevel) { |
243 | 96.3k | Error = true; |
244 | 96.3k | return false; |
245 | 96.3k | } |
246 | 31.4M | ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); |
247 | | |
248 | 31.4M | switch (consume()) { |
249 | 10.2M | case 'C': { |
250 | 10.2M | parseOptionalBase62Number('s'); |
251 | 10.2M | printIdentifier(parseIdentifier()); |
252 | 10.2M | break; |
253 | 0 | } |
254 | 2.78M | case 'M': { |
255 | 2.78M | demangleImplPath(InType); |
256 | 2.78M | print("<"); |
257 | 2.78M | demangleType(); |
258 | 2.78M | print(">"); |
259 | 2.78M | break; |
260 | 0 | } |
261 | 4.35M | case 'X': { |
262 | 4.35M | demangleImplPath(InType); |
263 | 4.35M | print("<"); |
264 | 4.35M | demangleType(); |
265 | 4.35M | print(" as "); |
266 | 4.35M | demanglePath(IsInType::Yes); |
267 | 4.35M | print(">"); |
268 | 4.35M | break; |
269 | 0 | } |
270 | 6.58M | case 'Y': { |
271 | 6.58M | print("<"); |
272 | 6.58M | demangleType(); |
273 | 6.58M | print(" as "); |
274 | 6.58M | demanglePath(IsInType::Yes); |
275 | 6.58M | print(">"); |
276 | 6.58M | break; |
277 | 0 | } |
278 | 2.71M | case 'N': { |
279 | 2.71M | char NS = consume(); |
280 | 2.71M | if (!isLower(NS) && !isUpper(NS)) { |
281 | 15 | Error = true; |
282 | 15 | break; |
283 | 15 | } |
284 | 2.71M | demanglePath(InType); |
285 | | |
286 | 2.71M | uint64_t Disambiguator = parseOptionalBase62Number('s'); |
287 | 2.71M | Identifier Ident = parseIdentifier(); |
288 | | |
289 | 2.71M | if (isUpper(NS)) { |
290 | | // Special namespaces |
291 | 2.46M | print("::{"); |
292 | 2.46M | if (NS == 'C') |
293 | 2.06M | print("closure"); |
294 | 407k | else if (NS == 'S') |
295 | 460 | print("shim"); |
296 | 406k | else |
297 | 406k | print(NS); |
298 | 2.46M | if (!Ident.empty()) { |
299 | 1.72M | print(":"); |
300 | 1.72M | printIdentifier(Ident); |
301 | 1.72M | } |
302 | 2.46M | print('#'); |
303 | 2.46M | printDecimalNumber(Disambiguator); |
304 | 2.46M | print('}'); |
305 | 2.46M | } else { |
306 | | // Implementation internal namespaces. |
307 | 242k | if (!Ident.empty()) { |
308 | 672 | print("::"); |
309 | 672 | printIdentifier(Ident); |
310 | 672 | } |
311 | 242k | } |
312 | 2.71M | break; |
313 | 2.71M | } |
314 | 488k | case 'I': { |
315 | 488k | demanglePath(InType); |
316 | | // Omit "::" when in a type, where it is optional. |
317 | 488k | if (InType == IsInType::No) |
318 | 3.37k | print("::"); |
319 | 488k | print("<"); |
320 | 753M | for (size_t I = 0; !Error && !consumeIf('E'); ++I) { |
321 | 752M | if (I > 0) |
322 | 752M | print(", "); |
323 | 752M | demangleGenericArg(); |
324 | 752M | } |
325 | 488k | if (LeaveOpen == LeaveGenericsOpen::Yes) |
326 | 33.7k | return true; |
327 | 454k | else |
328 | 454k | print(">"); |
329 | 454k | break; |
330 | 488k | } |
331 | 4.27M | case 'B': { |
332 | 4.27M | bool IsOpen = false; |
333 | 4.27M | demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); }); |
334 | 4.27M | return IsOpen; |
335 | 488k | } |
336 | 541 | default: |
337 | 541 | Error = true; |
338 | 541 | break; |
339 | 31.4M | } |
340 | | |
341 | 27.1M | return false; |
342 | 31.4M | } |
343 | | |
344 | | // <impl-path> = [<disambiguator>] <path> |
345 | | // <disambiguator> = "s" <base-62-number> |
346 | 7.13M | void Demangler::demangleImplPath(IsInType InType) { |
347 | 7.13M | ScopedOverride<bool> SavePrint(Print, false); |
348 | 7.13M | parseOptionalBase62Number('s'); |
349 | 7.13M | demanglePath(InType); |
350 | 7.13M | } |
351 | | |
352 | | // <generic-arg> = <lifetime> |
353 | | // | <type> |
354 | | // | "K" <const> |
355 | | // <lifetime> = "L" <base-62-number> |
356 | 752M | void Demangler::demangleGenericArg() { |
357 | 752M | if (consumeIf('L')) |
358 | 216k | printLifetime(parseBase62Number()); |
359 | 752M | else if (consumeIf('K')) |
360 | 744 | demangleConst(); |
361 | 752M | else |
362 | 752M | demangleType(); |
363 | 752M | } |
364 | | |
365 | | // <basic-type> = "a" // i8 |
366 | | // | "b" // bool |
367 | | // | "c" // char |
368 | | // | "d" // f64 |
369 | | // | "e" // str |
370 | | // | "f" // f32 |
371 | | // | "h" // u8 |
372 | | // | "i" // isize |
373 | | // | "j" // usize |
374 | | // | "l" // i32 |
375 | | // | "m" // u32 |
376 | | // | "n" // i128 |
377 | | // | "o" // u128 |
378 | | // | "s" // i16 |
379 | | // | "t" // u16 |
380 | | // | "u" // () |
381 | | // | "v" // ... |
382 | | // | "x" // i64 |
383 | | // | "y" // u64 |
384 | | // | "z" // ! |
385 | | // | "p" // placeholder (e.g. for generic params), shown as _ |
386 | 5.41G | static bool parseBasicType(char C, BasicType &Type) { |
387 | 5.41G | switch (C) { |
388 | 244M | case 'a': |
389 | 244M | Type = BasicType::I8; |
390 | 244M | return true; |
391 | 458M | case 'b': |
392 | 458M | Type = BasicType::Bool; |
393 | 458M | return true; |
394 | 355M | case 'c': |
395 | 355M | Type = BasicType::Char; |
396 | 355M | return true; |
397 | 381M | case 'd': |
398 | 381M | Type = BasicType::F64; |
399 | 381M | return true; |
400 | 195M | case 'e': |
401 | 195M | Type = BasicType::Str; |
402 | 195M | return true; |
403 | 20.5M | case 'f': |
404 | 20.5M | Type = BasicType::F32; |
405 | 20.5M | return true; |
406 | 329M | case 'h': |
407 | 329M | Type = BasicType::U8; |
408 | 329M | return true; |
409 | 644M | case 'i': |
410 | 644M | Type = BasicType::ISize; |
411 | 644M | return true; |
412 | 103M | case 'j': |
413 | 103M | Type = BasicType::USize; |
414 | 103M | return true; |
415 | 246M | case 'l': |
416 | 246M | Type = BasicType::I32; |
417 | 246M | return true; |
418 | 29.6M | case 'm': |
419 | 29.6M | Type = BasicType::U32; |
420 | 29.6M | return true; |
421 | 79.7M | case 'n': |
422 | 79.7M | Type = BasicType::I128; |
423 | 79.7M | return true; |
424 | 315M | case 'o': |
425 | 315M | Type = BasicType::U128; |
426 | 315M | return true; |
427 | 77.2M | case 'p': |
428 | 77.2M | Type = BasicType::Placeholder; |
429 | 77.2M | return true; |
430 | 72.2M | case 's': |
431 | 72.2M | Type = BasicType::I16; |
432 | 72.2M | return true; |
433 | 219M | case 't': |
434 | 219M | Type = BasicType::U16; |
435 | 219M | return true; |
436 | 229M | case 'u': |
437 | 229M | Type = BasicType::Unit; |
438 | 229M | return true; |
439 | 296M | case 'v': |
440 | 296M | Type = BasicType::Variadic; |
441 | 296M | return true; |
442 | 130M | case 'x': |
443 | 130M | Type = BasicType::I64; |
444 | 130M | return true; |
445 | 95.3M | case 'y': |
446 | 95.3M | Type = BasicType::U64; |
447 | 95.3M | return true; |
448 | 413M | case 'z': |
449 | 413M | Type = BasicType::Never; |
450 | 413M | return true; |
451 | 470M | default: |
452 | 470M | return false; |
453 | 5.41G | } |
454 | 5.41G | } |
455 | | |
456 | 4.90G | void Demangler::printBasicType(BasicType Type) { |
457 | 4.90G | switch (Type) { |
458 | 442M | case BasicType::Bool: |
459 | 442M | print("bool"); |
460 | 442M | break; |
461 | 350M | case BasicType::Char: |
462 | 350M | print("char"); |
463 | 350M | break; |
464 | 242M | case BasicType::I8: |
465 | 242M | print("i8"); |
466 | 242M | break; |
467 | 72.1M | case BasicType::I16: |
468 | 72.1M | print("i16"); |
469 | 72.1M | break; |
470 | 246M | case BasicType::I32: |
471 | 246M | print("i32"); |
472 | 246M | break; |
473 | 130M | case BasicType::I64: |
474 | 130M | print("i64"); |
475 | 130M | break; |
476 | 79.7M | case BasicType::I128: |
477 | 79.7M | print("i128"); |
478 | 79.7M | break; |
479 | 644M | case BasicType::ISize: |
480 | 644M | print("isize"); |
481 | 644M | break; |
482 | 329M | case BasicType::U8: |
483 | 329M | print("u8"); |
484 | 329M | break; |
485 | 219M | case BasicType::U16: |
486 | 219M | print("u16"); |
487 | 219M | break; |
488 | 29.6M | case BasicType::U32: |
489 | 29.6M | print("u32"); |
490 | 29.6M | break; |
491 | 95.3M | case BasicType::U64: |
492 | 95.3M | print("u64"); |
493 | 95.3M | break; |
494 | 315M | case BasicType::U128: |
495 | 315M | print("u128"); |
496 | 315M | break; |
497 | 102M | case BasicType::USize: |
498 | 102M | print("usize"); |
499 | 102M | break; |
500 | 20.5M | case BasicType::F32: |
501 | 20.5M | print("f32"); |
502 | 20.5M | break; |
503 | 381M | case BasicType::F64: |
504 | 381M | print("f64"); |
505 | 381M | break; |
506 | 195M | case BasicType::Str: |
507 | 195M | print("str"); |
508 | 195M | break; |
509 | 68.2M | case BasicType::Placeholder: |
510 | 68.2M | print("_"); |
511 | 68.2M | break; |
512 | 229M | case BasicType::Unit: |
513 | 229M | print("()"); |
514 | 229M | break; |
515 | 296M | case BasicType::Variadic: |
516 | 296M | print("..."); |
517 | 296M | break; |
518 | 413M | case BasicType::Never: |
519 | 413M | print("!"); |
520 | 413M | break; |
521 | 4.90G | } |
522 | 4.90G | } |
523 | | |
524 | | // <type> = | <basic-type> |
525 | | // | <path> // named type |
526 | | // | "A" <type> <const> // [T; N] |
527 | | // | "S" <type> // [T] |
528 | | // | "T" {<type>} "E" // (T1, T2, T3, ...) |
529 | | // | "R" [<lifetime>] <type> // &T |
530 | | // | "Q" [<lifetime>] <type> // &mut T |
531 | | // | "P" <type> // *const T |
532 | | // | "O" <type> // *mut T |
533 | | // | "F" <fn-sig> // fn(...) -> ... |
534 | | // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a |
535 | | // | <backref> // backref |
536 | 5.37G | void Demangler::demangleType() { |
537 | 5.37G | if (Error || RecursionLevel >= MaxRecursionLevel) { |
538 | 197k | Error = true; |
539 | 197k | return; |
540 | 197k | } |
541 | 5.37G | ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); |
542 | | |
543 | 5.37G | size_t Start = Position; |
544 | 5.37G | char C = consume(); |
545 | 5.37G | BasicType Type; |
546 | 5.37G | if (parseBasicType(C, Type)) |
547 | 4.90G | return printBasicType(Type); |
548 | | |
549 | 470M | switch (C) { |
550 | 32.3M | case 'A': |
551 | 32.3M | print("["); |
552 | 32.3M | demangleType(); |
553 | 32.3M | print("; "); |
554 | 32.3M | demangleConst(); |
555 | 32.3M | print("]"); |
556 | 32.3M | break; |
557 | 1.68M | case 'S': |
558 | 1.68M | print("["); |
559 | 1.68M | demangleType(); |
560 | 1.68M | print("]"); |
561 | 1.68M | break; |
562 | 9.20M | case 'T': { |
563 | 9.20M | print("("); |
564 | 9.20M | size_t I = 0; |
565 | 2.67G | for (; !Error && !consumeIf('E'); ++I) { |
566 | 2.66G | if (I > 0) |
567 | 2.65G | print(", "); |
568 | 2.66G | demangleType(); |
569 | 2.66G | } |
570 | 9.20M | if (I == 1) |
571 | 8.12k | print(","); |
572 | 9.20M | print(")"); |
573 | 9.20M | break; |
574 | 0 | } |
575 | 34.0M | case 'R': |
576 | 219M | case 'Q': |
577 | 219M | print('&'); |
578 | 219M | if (consumeIf('L')) { |
579 | 4.34k | if (auto Lifetime = parseBase62Number()) { |
580 | 2.64k | printLifetime(Lifetime); |
581 | 2.64k | print(' '); |
582 | 2.64k | } |
583 | 4.34k | } |
584 | 219M | if (C == 'Q') |
585 | 185M | print("mut "); |
586 | 219M | demangleType(); |
587 | 219M | break; |
588 | 109M | case 'P': |
589 | 109M | print("*const "); |
590 | 109M | demangleType(); |
591 | 109M | break; |
592 | 38.6M | case 'O': |
593 | 38.6M | print("*mut "); |
594 | 38.6M | demangleType(); |
595 | 38.6M | break; |
596 | 37.7M | case 'F': |
597 | 37.7M | demangleFnSig(); |
598 | 37.7M | break; |
599 | 59.9k | case 'D': |
600 | 59.9k | demangleDynBounds(); |
601 | 59.9k | if (consumeIf('L')) { |
602 | 3.71k | if (auto Lifetime = parseBase62Number()) { |
603 | 1.33k | print(" + "); |
604 | 1.33k | printLifetime(Lifetime); |
605 | 1.33k | } |
606 | 56.2k | } else { |
607 | 56.2k | Error = true; |
608 | 56.2k | } |
609 | 59.9k | break; |
610 | 11.8M | case 'B': |
611 | 11.8M | demangleBackref([&] { demangleType(); }); |
612 | 11.8M | break; |
613 | 10.0M | default: |
614 | 10.0M | Position = Start; |
615 | 10.0M | demanglePath(IsInType::Yes); |
616 | 10.0M | break; |
617 | 470M | } |
618 | 470M | } |
619 | | |
620 | | // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type> |
621 | | // <abi> = "C" |
622 | | // | <undisambiguated-identifier> |
623 | 37.7M | void Demangler::demangleFnSig() { |
624 | 37.7M | ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); |
625 | 37.7M | demangleOptionalBinder(); |
626 | | |
627 | 37.7M | if (consumeIf('U')) |
628 | 104k | print("unsafe "); |
629 | | |
630 | 37.7M | if (consumeIf('K')) { |
631 | 3.05M | print("extern \""); |
632 | 3.05M | if (consumeIf('C')) { |
633 | 6.48k | print("C"); |
634 | 3.04M | } else { |
635 | 3.04M | Identifier Ident = parseIdentifier(); |
636 | 3.04M | if (Ident.Punycode) |
637 | 10 | Error = true; |
638 | 4.12M | for (char C : Ident.Name) { |
639 | | // When mangling ABI string, the "-" is replaced with "_". |
640 | 4.12M | if (C == '_') |
641 | 53.9k | C = '-'; |
642 | 4.12M | print(C); |
643 | 4.12M | } |
644 | 3.04M | } |
645 | 3.05M | print("\" "); |
646 | 3.05M | } |
647 | | |
648 | 37.7M | print("fn("); |
649 | 1.53G | for (size_t I = 0; !Error && !consumeIf('E'); ++I) { |
650 | 1.49G | if (I > 0) |
651 | 1.47G | print(", "); |
652 | 1.49G | demangleType(); |
653 | 1.49G | } |
654 | 37.7M | print(")"); |
655 | | |
656 | 37.7M | if (consumeIf('u')) { |
657 | | // Skip the unit type from the output. |
658 | 37.2M | } else { |
659 | 37.2M | print(" -> "); |
660 | 37.2M | demangleType(); |
661 | 37.2M | } |
662 | 37.7M | } |
663 | | |
664 | | // <dyn-bounds> = [<binder>] {<dyn-trait>} "E" |
665 | 59.9k | void Demangler::demangleDynBounds() { |
666 | 59.9k | ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); |
667 | 59.9k | print("dyn "); |
668 | 59.9k | demangleOptionalBinder(); |
669 | 160k | for (size_t I = 0; !Error && !consumeIf('E'); ++I) { |
670 | 100k | if (I > 0) |
671 | 43.5k | print(" + "); |
672 | 100k | demangleDynTrait(); |
673 | 100k | } |
674 | 59.9k | } |
675 | | |
676 | | // <dyn-trait> = <path> {<dyn-trait-assoc-binding>} |
677 | | // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type> |
678 | 100k | void Demangler::demangleDynTrait() { |
679 | 100k | bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes); |
680 | 177k | while (!Error && consumeIf('p')) { |
681 | 77.0k | if (!IsOpen) { |
682 | 50.6k | IsOpen = true; |
683 | 50.6k | print('<'); |
684 | 50.6k | } else { |
685 | 26.3k | print(", "); |
686 | 26.3k | } |
687 | 77.0k | print(parseIdentifier().Name); |
688 | 77.0k | print(" = "); |
689 | 77.0k | demangleType(); |
690 | 77.0k | } |
691 | 100k | if (IsOpen) |
692 | 84.4k | print(">"); |
693 | 100k | } |
694 | | |
695 | | // Demangles optional binder and updates the number of bound lifetimes. |
696 | | // |
697 | | // <binder> = "G" <base-62-number> |
698 | 37.8M | void Demangler::demangleOptionalBinder() { |
699 | 37.8M | uint64_t Binder = parseOptionalBase62Number('G'); |
700 | 37.8M | if (Error || Binder == 0) |
701 | 18.3M | return; |
702 | | |
703 | | // In valid inputs each bound lifetime is referenced later. Referencing a |
704 | | // lifetime requires at least one byte of input. Reject inputs that are too |
705 | | // short to reference all bound lifetimes. Otherwise demangling of invalid |
706 | | // binders could generate excessive amounts of output. |
707 | 19.5M | if (Binder >= Input.size() - BoundLifetimes) { |
708 | 617 | Error = true; |
709 | 617 | return; |
710 | 617 | } |
711 | | |
712 | 19.5M | print("for<"); |
713 | 4.52G | for (size_t I = 0; I != Binder; ++I) { |
714 | 4.50G | BoundLifetimes += 1; |
715 | 4.50G | if (I > 0) |
716 | 4.48G | print(", "); |
717 | 4.50G | printLifetime(1); |
718 | 4.50G | } |
719 | 19.5M | print("> "); |
720 | 19.5M | } |
721 | | |
722 | | // <const> = <basic-type> <const-data> |
723 | | // | "p" // placeholder |
724 | | // | <backref> |
725 | 32.3M | void Demangler::demangleConst() { |
726 | 32.3M | if (Error || RecursionLevel >= MaxRecursionLevel) { |
727 | 4.61k | Error = true; |
728 | 4.61k | return; |
729 | 4.61k | } |
730 | 32.3M | ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); |
731 | | |
732 | 32.3M | char C = consume(); |
733 | 32.3M | BasicType Type; |
734 | 32.3M | if (parseBasicType(C, Type)) { |
735 | 32.3M | switch (Type) { |
736 | 2.14M | case BasicType::I8: |
737 | 2.21M | case BasicType::I16: |
738 | 2.62M | case BasicType::I32: |
739 | 2.63M | case BasicType::I64: |
740 | 2.66M | case BasicType::I128: |
741 | 2.67M | case BasicType::ISize: |
742 | 2.68M | case BasicType::U8: |
743 | 2.83M | case BasicType::U16: |
744 | 2.83M | case BasicType::U32: |
745 | 2.84M | case BasicType::U64: |
746 | 2.84M | case BasicType::U128: |
747 | 2.95M | case BasicType::USize: |
748 | 2.95M | demangleConstInt(); |
749 | 2.95M | break; |
750 | 15.3M | case BasicType::Bool: |
751 | 15.3M | demangleConstBool(); |
752 | 15.3M | break; |
753 | 5.02M | case BasicType::Char: |
754 | 5.02M | demangleConstChar(); |
755 | 5.02M | break; |
756 | 8.94M | case BasicType::Placeholder: |
757 | 8.94M | print('_'); |
758 | 8.94M | break; |
759 | 6 | default: |
760 | 6 | Error = true; |
761 | 6 | break; |
762 | 32.3M | } |
763 | 32.3M | } else if (C == 'B') { |
764 | 2.54k | demangleBackref([&] { demangleConst(); }); |
765 | 2.54k | } else { |
766 | 35 | Error = true; |
767 | 35 | } |
768 | 32.3M | } |
769 | | |
770 | | // <const-data> = ["n"] <hex-number> |
771 | 2.95M | void Demangler::demangleConstInt() { |
772 | 2.95M | if (consumeIf('n')) |
773 | 375k | print('-'); |
774 | | |
775 | 2.95M | std::string_view HexDigits; |
776 | 2.95M | uint64_t Value = parseHexNumber(HexDigits); |
777 | 2.95M | if (HexDigits.size() <= 16) { |
778 | 2.71M | printDecimalNumber(Value); |
779 | 2.71M | } else { |
780 | 244k | print("0x"); |
781 | 244k | print(HexDigits); |
782 | 244k | } |
783 | 2.95M | } |
784 | | |
785 | | // <const-data> = "0_" // false |
786 | | // | "1_" // true |
787 | 15.3M | void Demangler::demangleConstBool() { |
788 | 15.3M | std::string_view HexDigits; |
789 | 15.3M | parseHexNumber(HexDigits); |
790 | 15.3M | if (HexDigits == "0") |
791 | 11.0M | print("false"); |
792 | 4.38M | else if (HexDigits == "1") |
793 | 4.38M | print("true"); |
794 | 20 | else |
795 | 20 | Error = true; |
796 | 15.3M | } |
797 | | |
798 | | /// Returns true if CodePoint represents a printable ASCII character. |
799 | 2.20M | static bool isAsciiPrintable(uint64_t CodePoint) { |
800 | 2.20M | return 0x20 <= CodePoint && CodePoint <= 0x7e; |
801 | 2.20M | } |
802 | | |
803 | | // <const-data> = <hex-number> |
804 | 5.02M | void Demangler::demangleConstChar() { |
805 | 5.02M | std::string_view HexDigits; |
806 | 5.02M | uint64_t CodePoint = parseHexNumber(HexDigits); |
807 | 5.02M | if (Error || HexDigits.size() > 6) { |
808 | 41 | Error = true; |
809 | 41 | return; |
810 | 41 | } |
811 | | |
812 | 5.02M | print("'"); |
813 | 5.02M | switch (CodePoint) { |
814 | 94.3k | case '\t': |
815 | 94.3k | print(R"(\t)"); |
816 | 94.3k | break; |
817 | 80.3k | case '\r': |
818 | 80.3k | print(R"(\r)"); |
819 | 80.3k | break; |
820 | 2.00M | case '\n': |
821 | 2.00M | print(R"(\n)"); |
822 | 2.00M | break; |
823 | 6.55k | case '\\': |
824 | 6.55k | print(R"(\\)"); |
825 | 6.55k | break; |
826 | 486k | case '"': |
827 | 486k | print(R"(")"); |
828 | 486k | break; |
829 | 149k | case '\'': |
830 | 149k | print(R"(\')"); |
831 | 149k | break; |
832 | 2.20M | default: |
833 | 2.20M | if (isAsciiPrintable(CodePoint)) { |
834 | 981k | char C = CodePoint; |
835 | 981k | print(C); |
836 | 1.22M | } else { |
837 | 1.22M | print(R"(\u{)"); |
838 | 1.22M | print(HexDigits); |
839 | 1.22M | print('}'); |
840 | 1.22M | } |
841 | 2.20M | break; |
842 | 5.02M | } |
843 | 5.02M | print('\''); |
844 | 5.02M | } |
845 | | |
846 | | // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> |
847 | 16.1M | Identifier Demangler::parseIdentifier() { |
848 | 16.1M | bool Punycode = consumeIf('u'); |
849 | 16.1M | uint64_t Bytes = parseDecimalNumber(); |
850 | | |
851 | | // Underscore resolves the ambiguity when identifier starts with a decimal |
852 | | // digit or another underscore. |
853 | 16.1M | consumeIf('_'); |
854 | | |
855 | 16.1M | if (Error || Bytes > Input.size() - Position) { |
856 | 3.06k | Error = true; |
857 | 3.06k | return {}; |
858 | 3.06k | } |
859 | 16.1M | std::string_view S = Input.substr(Position, Bytes); |
860 | 16.1M | Position += Bytes; |
861 | | |
862 | 16.1M | if (!std::all_of(S.begin(), S.end(), isValid)) { |
863 | 53 | Error = true; |
864 | 53 | return {}; |
865 | 53 | } |
866 | | |
867 | 16.1M | return {S, Punycode}; |
868 | 16.1M | } |
869 | | |
870 | | // Parses optional base 62 number. The presence of a number is determined using |
871 | | // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise |
872 | | // |
873 | | // This function is indended for parsing disambiguators and binders which when |
874 | | // not present have their value interpreted as 0, and otherwise as decoded |
875 | | // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is |
876 | | // 2. When "G" is absent value is 0. |
877 | 57.9M | uint64_t Demangler::parseOptionalBase62Number(char Tag) { |
878 | 57.9M | if (!consumeIf(Tag)) |
879 | 38.4M | return 0; |
880 | | |
881 | 19.5M | uint64_t N = parseBase62Number(); |
882 | 19.5M | if (Error || !addAssign(N, 1)) |
883 | 156 | return 0; |
884 | | |
885 | 19.5M | return N; |
886 | 19.5M | } |
887 | | |
888 | | // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by |
889 | | // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1, |
890 | | // "1_" encodes 2, etc. |
891 | | // |
892 | | // <base-62-number> = {<0-9a-zA-Z>} "_" |
893 | 35.8M | uint64_t Demangler::parseBase62Number() { |
894 | 35.8M | if (consumeIf('_')) |
895 | 8.71M | return 0; |
896 | | |
897 | 27.1M | uint64_t Value = 0; |
898 | | |
899 | 67.5M | while (true) { |
900 | 67.5M | uint64_t Digit; |
901 | 67.5M | char C = consume(); |
902 | | |
903 | 67.5M | if (C == '_') { |
904 | 27.1M | break; |
905 | 40.4M | } else if (isDigit(C)) { |
906 | 21.7M | Digit = C - '0'; |
907 | 21.7M | } else if (isLower(C)) { |
908 | 674k | Digit = 10 + (C - 'a'); |
909 | 17.9M | } else if (isUpper(C)) { |
910 | 17.9M | Digit = 10 + 26 + (C - 'A'); |
911 | 17.9M | } else { |
912 | 857 | Error = true; |
913 | 857 | return 0; |
914 | 857 | } |
915 | | |
916 | 40.4M | if (!mulAssign(Value, 62)) |
917 | 129 | return 0; |
918 | | |
919 | 40.4M | if (!addAssign(Value, Digit)) |
920 | 7 | return 0; |
921 | 40.4M | } |
922 | | |
923 | 27.1M | if (!addAssign(Value, 1)) |
924 | 6 | return 0; |
925 | | |
926 | 27.1M | return Value; |
927 | 27.1M | } |
928 | | |
929 | | // Parses a decimal number that had been encoded without any leading zeros. |
930 | | // |
931 | | // <decimal-number> = "0" |
932 | | // | <1-9> {<0-9>} |
933 | 16.1M | uint64_t Demangler::parseDecimalNumber() { |
934 | 16.1M | char C = look(); |
935 | 16.1M | if (!isDigit(C)) { |
936 | 2.66k | Error = true; |
937 | 2.66k | return 0; |
938 | 2.66k | } |
939 | | |
940 | 16.1M | if (C == '0') { |
941 | 5.27M | consume(); |
942 | 5.27M | return 0; |
943 | 5.27M | } |
944 | | |
945 | 10.8M | uint64_t Value = 0; |
946 | | |
947 | 24.8M | while (isDigit(look())) { |
948 | 13.9M | if (!mulAssign(Value, 10)) { |
949 | 22 | Error = true; |
950 | 22 | return 0; |
951 | 22 | } |
952 | | |
953 | 13.9M | uint64_t D = consume() - '0'; |
954 | 13.9M | if (!addAssign(Value, D)) |
955 | 2 | return 0; |
956 | 13.9M | } |
957 | | |
958 | 10.8M | return Value; |
959 | 10.8M | } |
960 | | |
961 | | // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed |
962 | | // value and stores hex digits in HexDigits. The return value is unspecified if |
963 | | // HexDigits.size() > 16. |
964 | | // |
965 | | // <hex-number> = "0_" |
966 | | // | <1-9a-f> {<0-9a-f>} "_" |
967 | 23.3M | uint64_t Demangler::parseHexNumber(std::string_view &HexDigits) { |
968 | 23.3M | size_t Start = Position; |
969 | 23.3M | uint64_t Value = 0; |
970 | | |
971 | 23.3M | if (!isHexDigit(look())) |
972 | 62 | Error = true; |
973 | | |
974 | 23.3M | if (consumeIf('0')) { |
975 | 11.1M | if (!consumeIf('_')) |
976 | 5 | Error = true; |
977 | 12.2M | } else { |
978 | 130M | while (!Error && !consumeIf('_')) { |
979 | 118M | char C = consume(); |
980 | 118M | Value *= 16; |
981 | 118M | if (isDigit(C)) |
982 | 29.9M | Value += C - '0'; |
983 | 88.0M | else if ('a' <= C && C <= 'f') |
984 | 88.0M | Value += 10 + (C - 'a'); |
985 | 45 | else |
986 | 45 | Error = true; |
987 | 118M | } |
988 | 12.2M | } |
989 | | |
990 | 23.3M | if (Error) { |
991 | 112 | HexDigits = std::string_view(); |
992 | 112 | return 0; |
993 | 112 | } |
994 | | |
995 | 23.3M | size_t End = Position - 1; |
996 | 23.3M | assert(Start < End); |
997 | 0 | HexDigits = Input.substr(Start, End - Start); |
998 | 23.3M | return Value; |
999 | 23.3M | } |
1000 | | |
1001 | 9.25G | void Demangler::print(char C) { |
1002 | 9.25G | if (Error || !Print) |
1003 | 4.17M | return; |
1004 | | |
1005 | 9.25G | Output += C; |
1006 | 9.25G | } |
1007 | | |
1008 | 14.9G | void Demangler::print(std::string_view S) { |
1009 | 14.9G | if (Error || !Print) |
1010 | 26.5M | return; |
1011 | | |
1012 | 14.9G | Output += S; |
1013 | 14.9G | } |
1014 | | |
1015 | 4.06G | void Demangler::printDecimalNumber(uint64_t N) { |
1016 | 4.06G | if (Error || !Print) |
1017 | 24.1k | return; |
1018 | | |
1019 | 4.06G | Output << N; |
1020 | 4.06G | } |
1021 | | |
1022 | | // Prints a lifetime. An index 0 always represents an erased lifetime. Indices |
1023 | | // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes |
1024 | | // bound by one of the enclosing binders. |
1025 | 4.50G | void Demangler::printLifetime(uint64_t Index) { |
1026 | 4.50G | if (Index == 0) { |
1027 | 205k | print("'_"); |
1028 | 205k | return; |
1029 | 205k | } |
1030 | | |
1031 | 4.50G | if (Index - 1 >= BoundLifetimes) { |
1032 | 336 | Error = true; |
1033 | 336 | return; |
1034 | 336 | } |
1035 | | |
1036 | 4.50G | uint64_t Depth = BoundLifetimes - Index; |
1037 | 4.50G | print('\''); |
1038 | 4.50G | if (Depth < 26) { |
1039 | 447M | char C = 'a' + Depth; |
1040 | 447M | print(C); |
1041 | 4.05G | } else { |
1042 | 4.05G | print('z'); |
1043 | 4.05G | printDecimalNumber(Depth - 26 + 1); |
1044 | 4.05G | } |
1045 | 4.50G | } |
1046 | | |
1047 | 12.8M | static inline bool decodePunycodeDigit(char C, size_t &Value) { |
1048 | 12.8M | if (isLower(C)) { |
1049 | 12.3M | Value = C - 'a'; |
1050 | 12.3M | return true; |
1051 | 12.3M | } |
1052 | | |
1053 | 474k | if (isDigit(C)) { |
1054 | 474k | Value = 26 + (C - '0'); |
1055 | 474k | return true; |
1056 | 474k | } |
1057 | | |
1058 | 45 | return false; |
1059 | 474k | } |
1060 | | |
1061 | 824k | static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) { |
1062 | 824k | char *Buffer = Output.getBuffer(); |
1063 | 824k | char *Start = Buffer + StartIdx; |
1064 | 824k | char *End = Buffer + Output.getCurrentPosition(); |
1065 | 824k | Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer); |
1066 | 824k | } |
1067 | | |
1068 | | // Encodes code point as UTF-8 and stores results in Output. Returns false if |
1069 | | // CodePoint is not a valid unicode scalar value. |
1070 | 9.49M | static inline bool encodeUTF8(size_t CodePoint, char *Output) { |
1071 | 9.49M | if (0xD800 <= CodePoint && CodePoint <= 0xDFFF) |
1072 | 12 | return false; |
1073 | | |
1074 | 9.49M | if (CodePoint <= 0x7F) { |
1075 | 0 | Output[0] = CodePoint; |
1076 | 0 | return true; |
1077 | 0 | } |
1078 | | |
1079 | 9.49M | if (CodePoint <= 0x7FF) { |
1080 | 5.77M | Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F); |
1081 | 5.77M | Output[1] = 0x80 | (CodePoint & 0x3F); |
1082 | 5.77M | return true; |
1083 | 5.77M | } |
1084 | | |
1085 | 3.71M | if (CodePoint <= 0xFFFF) { |
1086 | 3.71M | Output[0] = 0xE0 | (CodePoint >> 12); |
1087 | 3.71M | Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F); |
1088 | 3.71M | Output[2] = 0x80 | (CodePoint & 0x3F); |
1089 | 3.71M | return true; |
1090 | 3.71M | } |
1091 | | |
1092 | 3.45k | if (CodePoint <= 0x10FFFF) { |
1093 | 3.30k | Output[0] = 0xF0 | (CodePoint >> 18); |
1094 | 3.30k | Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F); |
1095 | 3.30k | Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F); |
1096 | 3.30k | Output[3] = 0x80 | (CodePoint & 0x3F); |
1097 | 3.30k | return true; |
1098 | 3.30k | } |
1099 | | |
1100 | 151 | return false; |
1101 | 3.45k | } |
1102 | | |
1103 | | // Decodes string encoded using punycode and appends results to Output. |
1104 | | // Returns true if decoding was successful. |
1105 | 824k | static bool decodePunycode(std::string_view Input, OutputBuffer &Output) { |
1106 | 824k | size_t OutputSize = Output.getCurrentPosition(); |
1107 | 824k | size_t InputIdx = 0; |
1108 | | |
1109 | | // Rust uses an underscore as a delimiter. |
1110 | 824k | size_t DelimiterPos = std::string_view::npos; |
1111 | 34.5M | for (size_t I = 0; I != Input.size(); ++I) |
1112 | 33.7M | if (Input[I] == '_') |
1113 | 973k | DelimiterPos = I; |
1114 | | |
1115 | 824k | if (DelimiterPos != std::string_view::npos) { |
1116 | | // Copy basic code points before the last delimiter to the output. |
1117 | 20.7M | for (; InputIdx != DelimiterPos; ++InputIdx) { |
1118 | 20.1M | char C = Input[InputIdx]; |
1119 | 20.1M | if (!isValid(C)) |
1120 | 0 | return false; |
1121 | | // Code points are padded with zeros while decoding is in progress. |
1122 | 20.1M | char UTF8[4] = {C}; |
1123 | 20.1M | Output += std::string_view(UTF8, 4); |
1124 | 20.1M | } |
1125 | | // Skip over the delimiter. |
1126 | 580k | ++InputIdx; |
1127 | 580k | } |
1128 | | |
1129 | 824k | size_t Base = 36; |
1130 | 824k | size_t Skew = 38; |
1131 | 824k | size_t Bias = 72; |
1132 | 824k | size_t N = 0x80; |
1133 | 824k | size_t TMin = 1; |
1134 | 824k | size_t TMax = 26; |
1135 | 824k | size_t Damp = 700; |
1136 | | |
1137 | 9.49M | auto Adapt = [&](size_t Delta, size_t NumPoints) { |
1138 | 9.49M | Delta /= Damp; |
1139 | 9.49M | Delta += Delta / NumPoints; |
1140 | 9.49M | Damp = 2; |
1141 | | |
1142 | 9.49M | size_t K = 0; |
1143 | 9.51M | while (Delta > (Base - TMin) * TMax / 2) { |
1144 | 16.9k | Delta /= Base - TMin; |
1145 | 16.9k | K += Base; |
1146 | 16.9k | } |
1147 | 9.49M | return K + (((Base - TMin + 1) * Delta) / (Delta + Skew)); |
1148 | 9.49M | }; |
1149 | | |
1150 | | // Main decoding loop. |
1151 | 10.3M | for (size_t I = 0; InputIdx != Input.size(); I += 1) { |
1152 | 9.49M | size_t OldI = I; |
1153 | 9.49M | size_t W = 1; |
1154 | 9.49M | size_t Max = std::numeric_limits<size_t>::max(); |
1155 | 12.8M | for (size_t K = Base; true; K += Base) { |
1156 | 12.8M | if (InputIdx == Input.size()) |
1157 | 39 | return false; |
1158 | 12.8M | char C = Input[InputIdx++]; |
1159 | 12.8M | size_t Digit = 0; |
1160 | 12.8M | if (!decodePunycodeDigit(C, Digit)) |
1161 | 45 | return false; |
1162 | | |
1163 | 12.8M | if (Digit > (Max - I) / W) |
1164 | 1 | return false; |
1165 | 12.8M | I += Digit * W; |
1166 | | |
1167 | 12.8M | size_t T; |
1168 | 12.8M | if (K <= Bias) |
1169 | 1.59M | T = TMin; |
1170 | 11.2M | else if (K >= Bias + TMax) |
1171 | 8.90M | T = TMax; |
1172 | 2.36M | else |
1173 | 2.36M | T = K - Bias; |
1174 | | |
1175 | 12.8M | if (Digit < T) |
1176 | 9.49M | break; |
1177 | | |
1178 | 3.36M | if (W > Max / (Base - T)) |
1179 | 0 | return false; |
1180 | 3.36M | W *= (Base - T); |
1181 | 3.36M | } |
1182 | 9.49M | size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1; |
1183 | 9.49M | Bias = Adapt(I - OldI, NumPoints); |
1184 | | |
1185 | 9.49M | if (I / NumPoints > Max - N) |
1186 | 0 | return false; |
1187 | 9.49M | N += I / NumPoints; |
1188 | 9.49M | I = I % NumPoints; |
1189 | | |
1190 | | // Insert N at position I in the output. |
1191 | 9.49M | char UTF8[4] = {}; |
1192 | 9.49M | if (!encodeUTF8(N, UTF8)) |
1193 | 163 | return false; |
1194 | 9.49M | Output.insert(OutputSize + I * 4, UTF8, 4); |
1195 | 9.49M | } |
1196 | | |
1197 | 824k | removeNullBytes(Output, OutputSize); |
1198 | 824k | return true; |
1199 | 824k | } |
1200 | | |
1201 | 11.9M | void Demangler::printIdentifier(Identifier Ident) { |
1202 | 11.9M | if (Error || !Print) |
1203 | 2.84M | return; |
1204 | | |
1205 | 9.15M | if (Ident.Punycode) { |
1206 | 824k | if (!decodePunycode(Ident.Name, Output)) |
1207 | 248 | Error = true; |
1208 | 8.32M | } else { |
1209 | 8.32M | print(Ident.Name); |
1210 | 8.32M | } |
1211 | 9.15M | } |
1212 | | |
1213 | 64.2M | char Demangler::look() const { |
1214 | 64.2M | if (Error || Position >= Input.size()) |
1215 | 2.94k | return 0; |
1216 | | |
1217 | 64.2M | return Input[Position]; |
1218 | 64.2M | } |
1219 | | |
1220 | 5.65G | char Demangler::consume() { |
1221 | 5.65G | if (Error || Position >= Input.size()) { |
1222 | 1.52k | Error = true; |
1223 | 1.52k | return 0; |
1224 | 1.52k | } |
1225 | | |
1226 | 5.65G | return Input[Position++]; |
1227 | 5.65G | } |
1228 | | |
1229 | 7.10G | bool Demangler::consumeIf(char Prefix) { |
1230 | 7.10G | if (Error || Position >= Input.size() || Input[Position] != Prefix) |
1231 | 6.98G | return false; |
1232 | | |
1233 | 119M | Position += 1; |
1234 | 119M | return true; |
1235 | 7.10G | } |
1236 | | |
1237 | | /// Computes A + B. When computation wraps around sets the error and returns |
1238 | | /// false. Otherwise assigns the result to A and returns true. |
1239 | 101M | bool Demangler::addAssign(uint64_t &A, uint64_t B) { |
1240 | 101M | if (A > std::numeric_limits<uint64_t>::max() - B) { |
1241 | 16 | Error = true; |
1242 | 16 | return false; |
1243 | 16 | } |
1244 | | |
1245 | 101M | A += B; |
1246 | 101M | return true; |
1247 | 101M | } |
1248 | | |
1249 | | /// Computes A * B. When computation wraps around sets the error and returns |
1250 | | /// false. Otherwise assigns the result to A and returns true. |
1251 | 54.4M | bool Demangler::mulAssign(uint64_t &A, uint64_t B) { |
1252 | 54.4M | if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) { |
1253 | 151 | Error = true; |
1254 | 151 | return false; |
1255 | 151 | } |
1256 | | |
1257 | 54.4M | A *= B; |
1258 | 54.4M | return true; |
1259 | 54.4M | } |