1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for Rust v0 mangled symbols as specified in
10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/Demangle/Demangle.h"
15 #include "llvm/Demangle/StringViewExtras.h"
16 #include "llvm/Demangle/Utility.h"
17
18 #include <algorithm>
19 #include <cassert>
20 #include <cstdint>
21 #include <cstring>
22 #include <limits>
23 #include <string_view>
24
25 using namespace llvm;
26
27 using llvm::itanium_demangle::OutputBuffer;
28 using llvm::itanium_demangle::ScopedOverride;
29 using llvm::itanium_demangle::starts_with;
30
31 namespace {
32
33 struct Identifier {
34 std::string_view Name;
35 bool Punycode;
36
empty__anon406d1ba70111::Identifier37 bool empty() const { return Name.empty(); }
38 };
39
40 enum class BasicType {
41 Bool,
42 Char,
43 I8,
44 I16,
45 I32,
46 I64,
47 I128,
48 ISize,
49 U8,
50 U16,
51 U32,
52 U64,
53 U128,
54 USize,
55 F32,
56 F64,
57 Str,
58 Placeholder,
59 Unit,
60 Variadic,
61 Never,
62 };
63
64 enum class IsInType {
65 No,
66 Yes,
67 };
68
69 enum class LeaveGenericsOpen {
70 No,
71 Yes,
72 };
73
74 class Demangler {
75 // Maximum recursion level. Used to avoid stack overflow.
76 size_t MaxRecursionLevel;
77 // Current recursion level.
78 size_t RecursionLevel;
79 size_t BoundLifetimes;
80 // Input string that is being demangled with "_R" prefix removed.
81 std::string_view Input;
82 // Position in the input string.
83 size_t Position;
84 // When true, print methods append the output to the stream.
85 // When false, the output is suppressed.
86 bool Print;
87 // True if an error occurred.
88 bool Error;
89
90 public:
91 // Demangled output.
92 OutputBuffer Output;
93
94 Demangler(size_t MaxRecursionLevel = 500);
95
96 bool demangle(std::string_view MangledName);
97
98 private:
99 bool demanglePath(IsInType Type,
100 LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
101 void demangleImplPath(IsInType InType);
102 void demangleGenericArg();
103 void demangleType();
104 void demangleFnSig();
105 void demangleDynBounds();
106 void demangleDynTrait();
107 void demangleOptionalBinder();
108 void demangleConst();
109 void demangleConstInt();
110 void demangleConstBool();
111 void demangleConstChar();
112
demangleBackref(Callable Demangler)113 template <typename Callable> void demangleBackref(Callable Demangler) {
114 uint64_t Backref = parseBase62Number();
115 if (Error || Backref >= Position) {
116 Error = true;
117 return;
118 }
119
120 if (!Print)
121 return;
122
123 ScopedOverride<size_t> SavePosition(Position, Position);
124 Position = Backref;
125 Demangler();
126 }
127
128 Identifier parseIdentifier();
129 uint64_t parseOptionalBase62Number(char Tag);
130 uint64_t parseBase62Number();
131 uint64_t parseDecimalNumber();
132 uint64_t parseHexNumber(std::string_view &HexDigits);
133
134 void print(char C);
135 void print(std::string_view S);
136 void printDecimalNumber(uint64_t N);
137 void printBasicType(BasicType);
138 void printLifetime(uint64_t Index);
139 void printIdentifier(Identifier Ident);
140
141 char look() const;
142 char consume();
143 bool consumeIf(char Prefix);
144
145 bool addAssign(uint64_t &A, uint64_t B);
146 bool mulAssign(uint64_t &A, uint64_t B);
147 };
148
149 } // namespace
150
rustDemangle(std::string_view MangledName)151 char *llvm::rustDemangle(std::string_view MangledName) {
152 // Return early if mangled name doesn't look like a Rust symbol.
153 if (MangledName.empty() || !starts_with(MangledName, "_R"))
154 return nullptr;
155
156 Demangler D;
157 if (!D.demangle(MangledName)) {
158 std::free(D.Output.getBuffer());
159 return nullptr;
160 }
161
162 D.Output += '\0';
163
164 return D.Output.getBuffer();
165 }
166
Demangler(size_t MaxRecursionLevel)167 Demangler::Demangler(size_t MaxRecursionLevel)
168 : MaxRecursionLevel(MaxRecursionLevel) {}
169
isDigit(const char C)170 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
171
isHexDigit(const char C)172 static inline bool isHexDigit(const char C) {
173 return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
174 }
175
isLower(const char C)176 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
177
isUpper(const char C)178 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
179
180 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
isValid(const char C)181 static inline bool isValid(const char C) {
182 return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
183 }
184
185 // Demangles Rust v0 mangled symbol. Returns true when successful, and false
186 // otherwise. The demangled symbol is stored in Output field. It is
187 // responsibility of the caller to free the memory behind the output stream.
188 //
189 // <symbol-name> = "_R" <path> [<instantiating-crate>]
demangle(std::string_view Mangled)190 bool Demangler::demangle(std::string_view Mangled) {
191 Position = 0;
192 Error = false;
193 Print = true;
194 RecursionLevel = 0;
195 BoundLifetimes = 0;
196
197 if (!starts_with(Mangled, "_R")) {
198 Error = true;
199 return false;
200 }
201 Mangled.remove_prefix(2);
202 size_t Dot = Mangled.find('.');
203 Input = Dot == std::string_view::npos ? Mangled : Mangled.substr(0, Dot);
204
205 demanglePath(IsInType::No);
206
207 if (Position != Input.size()) {
208 ScopedOverride<bool> SavePrint(Print, false);
209 demanglePath(IsInType::No);
210 }
211
212 if (Position != Input.size())
213 Error = true;
214
215 if (Dot != std::string_view::npos) {
216 print(" (");
217 print(Mangled.substr(Dot));
218 print(")");
219 }
220
221 return !Error;
222 }
223
224 // Demangles a path. InType indicates whether a path is inside a type. When
225 // LeaveOpen is true, a closing `>` after generic arguments is omitted from the
226 // output. Return value indicates whether generics arguments have been left
227 // open.
228 //
229 // <path> = "C" <identifier> // crate root
230 // | "M" <impl-path> <type> // <T> (inherent impl)
231 // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl)
232 // | "Y" <type> <path> // <T as Trait> (trait definition)
233 // | "N" <ns> <path> <identifier> // ...::ident (nested path)
234 // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
235 // | <backref>
236 // <identifier> = [<disambiguator>] <undisambiguated-identifier>
237 // <ns> = "C" // closure
238 // | "S" // shim
239 // | <A-Z> // other special namespaces
240 // | <a-z> // internal namespaces
demanglePath(IsInType InType,LeaveGenericsOpen LeaveOpen)241 bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
242 if (Error || RecursionLevel >= MaxRecursionLevel) {
243 Error = true;
244 return false;
245 }
246 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
247
248 switch (consume()) {
249 case 'C': {
250 parseOptionalBase62Number('s');
251 printIdentifier(parseIdentifier());
252 break;
253 }
254 case 'M': {
255 demangleImplPath(InType);
256 print("<");
257 demangleType();
258 print(">");
259 break;
260 }
261 case 'X': {
262 demangleImplPath(InType);
263 print("<");
264 demangleType();
265 print(" as ");
266 demanglePath(IsInType::Yes);
267 print(">");
268 break;
269 }
270 case 'Y': {
271 print("<");
272 demangleType();
273 print(" as ");
274 demanglePath(IsInType::Yes);
275 print(">");
276 break;
277 }
278 case 'N': {
279 char NS = consume();
280 if (!isLower(NS) && !isUpper(NS)) {
281 Error = true;
282 break;
283 }
284 demanglePath(InType);
285
286 uint64_t Disambiguator = parseOptionalBase62Number('s');
287 Identifier Ident = parseIdentifier();
288
289 if (isUpper(NS)) {
290 // Special namespaces
291 print("::{");
292 if (NS == 'C')
293 print("closure");
294 else if (NS == 'S')
295 print("shim");
296 else
297 print(NS);
298 if (!Ident.empty()) {
299 print(":");
300 printIdentifier(Ident);
301 }
302 print('#');
303 printDecimalNumber(Disambiguator);
304 print('}');
305 } else {
306 // Implementation internal namespaces.
307 if (!Ident.empty()) {
308 print("::");
309 printIdentifier(Ident);
310 }
311 }
312 break;
313 }
314 case 'I': {
315 demanglePath(InType);
316 // Omit "::" when in a type, where it is optional.
317 if (InType == IsInType::No)
318 print("::");
319 print("<");
320 for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
321 if (I > 0)
322 print(", ");
323 demangleGenericArg();
324 }
325 if (LeaveOpen == LeaveGenericsOpen::Yes)
326 return true;
327 else
328 print(">");
329 break;
330 }
331 case 'B': {
332 bool IsOpen = false;
333 demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
334 return IsOpen;
335 }
336 default:
337 Error = true;
338 break;
339 }
340
341 return false;
342 }
343
344 // <impl-path> = [<disambiguator>] <path>
345 // <disambiguator> = "s" <base-62-number>
demangleImplPath(IsInType InType)346 void Demangler::demangleImplPath(IsInType InType) {
347 ScopedOverride<bool> SavePrint(Print, false);
348 parseOptionalBase62Number('s');
349 demanglePath(InType);
350 }
351
352 // <generic-arg> = <lifetime>
353 // | <type>
354 // | "K" <const>
355 // <lifetime> = "L" <base-62-number>
demangleGenericArg()356 void Demangler::demangleGenericArg() {
357 if (consumeIf('L'))
358 printLifetime(parseBase62Number());
359 else if (consumeIf('K'))
360 demangleConst();
361 else
362 demangleType();
363 }
364
365 // <basic-type> = "a" // i8
366 // | "b" // bool
367 // | "c" // char
368 // | "d" // f64
369 // | "e" // str
370 // | "f" // f32
371 // | "h" // u8
372 // | "i" // isize
373 // | "j" // usize
374 // | "l" // i32
375 // | "m" // u32
376 // | "n" // i128
377 // | "o" // u128
378 // | "s" // i16
379 // | "t" // u16
380 // | "u" // ()
381 // | "v" // ...
382 // | "x" // i64
383 // | "y" // u64
384 // | "z" // !
385 // | "p" // placeholder (e.g. for generic params), shown as _
parseBasicType(char C,BasicType & Type)386 static bool parseBasicType(char C, BasicType &Type) {
387 switch (C) {
388 case 'a':
389 Type = BasicType::I8;
390 return true;
391 case 'b':
392 Type = BasicType::Bool;
393 return true;
394 case 'c':
395 Type = BasicType::Char;
396 return true;
397 case 'd':
398 Type = BasicType::F64;
399 return true;
400 case 'e':
401 Type = BasicType::Str;
402 return true;
403 case 'f':
404 Type = BasicType::F32;
405 return true;
406 case 'h':
407 Type = BasicType::U8;
408 return true;
409 case 'i':
410 Type = BasicType::ISize;
411 return true;
412 case 'j':
413 Type = BasicType::USize;
414 return true;
415 case 'l':
416 Type = BasicType::I32;
417 return true;
418 case 'm':
419 Type = BasicType::U32;
420 return true;
421 case 'n':
422 Type = BasicType::I128;
423 return true;
424 case 'o':
425 Type = BasicType::U128;
426 return true;
427 case 'p':
428 Type = BasicType::Placeholder;
429 return true;
430 case 's':
431 Type = BasicType::I16;
432 return true;
433 case 't':
434 Type = BasicType::U16;
435 return true;
436 case 'u':
437 Type = BasicType::Unit;
438 return true;
439 case 'v':
440 Type = BasicType::Variadic;
441 return true;
442 case 'x':
443 Type = BasicType::I64;
444 return true;
445 case 'y':
446 Type = BasicType::U64;
447 return true;
448 case 'z':
449 Type = BasicType::Never;
450 return true;
451 default:
452 return false;
453 }
454 }
455
printBasicType(BasicType Type)456 void Demangler::printBasicType(BasicType Type) {
457 switch (Type) {
458 case BasicType::Bool:
459 print("bool");
460 break;
461 case BasicType::Char:
462 print("char");
463 break;
464 case BasicType::I8:
465 print("i8");
466 break;
467 case BasicType::I16:
468 print("i16");
469 break;
470 case BasicType::I32:
471 print("i32");
472 break;
473 case BasicType::I64:
474 print("i64");
475 break;
476 case BasicType::I128:
477 print("i128");
478 break;
479 case BasicType::ISize:
480 print("isize");
481 break;
482 case BasicType::U8:
483 print("u8");
484 break;
485 case BasicType::U16:
486 print("u16");
487 break;
488 case BasicType::U32:
489 print("u32");
490 break;
491 case BasicType::U64:
492 print("u64");
493 break;
494 case BasicType::U128:
495 print("u128");
496 break;
497 case BasicType::USize:
498 print("usize");
499 break;
500 case BasicType::F32:
501 print("f32");
502 break;
503 case BasicType::F64:
504 print("f64");
505 break;
506 case BasicType::Str:
507 print("str");
508 break;
509 case BasicType::Placeholder:
510 print("_");
511 break;
512 case BasicType::Unit:
513 print("()");
514 break;
515 case BasicType::Variadic:
516 print("...");
517 break;
518 case BasicType::Never:
519 print("!");
520 break;
521 }
522 }
523
524 // <type> = | <basic-type>
525 // | <path> // named type
526 // | "A" <type> <const> // [T; N]
527 // | "S" <type> // [T]
528 // | "T" {<type>} "E" // (T1, T2, T3, ...)
529 // | "R" [<lifetime>] <type> // &T
530 // | "Q" [<lifetime>] <type> // &mut T
531 // | "P" <type> // *const T
532 // | "O" <type> // *mut T
533 // | "F" <fn-sig> // fn(...) -> ...
534 // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
535 // | <backref> // backref
demangleType()536 void Demangler::demangleType() {
537 if (Error || RecursionLevel >= MaxRecursionLevel) {
538 Error = true;
539 return;
540 }
541 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
542
543 size_t Start = Position;
544 char C = consume();
545 BasicType Type;
546 if (parseBasicType(C, Type))
547 return printBasicType(Type);
548
549 switch (C) {
550 case 'A':
551 print("[");
552 demangleType();
553 print("; ");
554 demangleConst();
555 print("]");
556 break;
557 case 'S':
558 print("[");
559 demangleType();
560 print("]");
561 break;
562 case 'T': {
563 print("(");
564 size_t I = 0;
565 for (; !Error && !consumeIf('E'); ++I) {
566 if (I > 0)
567 print(", ");
568 demangleType();
569 }
570 if (I == 1)
571 print(",");
572 print(")");
573 break;
574 }
575 case 'R':
576 case 'Q':
577 print('&');
578 if (consumeIf('L')) {
579 if (auto Lifetime = parseBase62Number()) {
580 printLifetime(Lifetime);
581 print(' ');
582 }
583 }
584 if (C == 'Q')
585 print("mut ");
586 demangleType();
587 break;
588 case 'P':
589 print("*const ");
590 demangleType();
591 break;
592 case 'O':
593 print("*mut ");
594 demangleType();
595 break;
596 case 'F':
597 demangleFnSig();
598 break;
599 case 'D':
600 demangleDynBounds();
601 if (consumeIf('L')) {
602 if (auto Lifetime = parseBase62Number()) {
603 print(" + ");
604 printLifetime(Lifetime);
605 }
606 } else {
607 Error = true;
608 }
609 break;
610 case 'B':
611 demangleBackref([&] { demangleType(); });
612 break;
613 default:
614 Position = Start;
615 demanglePath(IsInType::Yes);
616 break;
617 }
618 }
619
620 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
621 // <abi> = "C"
622 // | <undisambiguated-identifier>
demangleFnSig()623 void Demangler::demangleFnSig() {
624 ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
625 demangleOptionalBinder();
626
627 if (consumeIf('U'))
628 print("unsafe ");
629
630 if (consumeIf('K')) {
631 print("extern \"");
632 if (consumeIf('C')) {
633 print("C");
634 } else {
635 Identifier Ident = parseIdentifier();
636 if (Ident.Punycode)
637 Error = true;
638 for (char C : Ident.Name) {
639 // When mangling ABI string, the "-" is replaced with "_".
640 if (C == '_')
641 C = '-';
642 print(C);
643 }
644 }
645 print("\" ");
646 }
647
648 print("fn(");
649 for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
650 if (I > 0)
651 print(", ");
652 demangleType();
653 }
654 print(")");
655
656 if (consumeIf('u')) {
657 // Skip the unit type from the output.
658 } else {
659 print(" -> ");
660 demangleType();
661 }
662 }
663
664 // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
demangleDynBounds()665 void Demangler::demangleDynBounds() {
666 ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
667 print("dyn ");
668 demangleOptionalBinder();
669 for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
670 if (I > 0)
671 print(" + ");
672 demangleDynTrait();
673 }
674 }
675
676 // <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
677 // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
demangleDynTrait()678 void Demangler::demangleDynTrait() {
679 bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
680 while (!Error && consumeIf('p')) {
681 if (!IsOpen) {
682 IsOpen = true;
683 print('<');
684 } else {
685 print(", ");
686 }
687 print(parseIdentifier().Name);
688 print(" = ");
689 demangleType();
690 }
691 if (IsOpen)
692 print(">");
693 }
694
695 // Demangles optional binder and updates the number of bound lifetimes.
696 //
697 // <binder> = "G" <base-62-number>
demangleOptionalBinder()698 void Demangler::demangleOptionalBinder() {
699 uint64_t Binder = parseOptionalBase62Number('G');
700 if (Error || Binder == 0)
701 return;
702
703 // In valid inputs each bound lifetime is referenced later. Referencing a
704 // lifetime requires at least one byte of input. Reject inputs that are too
705 // short to reference all bound lifetimes. Otherwise demangling of invalid
706 // binders could generate excessive amounts of output.
707 if (Binder >= Input.size() - BoundLifetimes) {
708 Error = true;
709 return;
710 }
711
712 print("for<");
713 for (size_t I = 0; I != Binder; ++I) {
714 BoundLifetimes += 1;
715 if (I > 0)
716 print(", ");
717 printLifetime(1);
718 }
719 print("> ");
720 }
721
722 // <const> = <basic-type> <const-data>
723 // | "p" // placeholder
724 // | <backref>
demangleConst()725 void Demangler::demangleConst() {
726 if (Error || RecursionLevel >= MaxRecursionLevel) {
727 Error = true;
728 return;
729 }
730 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
731
732 char C = consume();
733 BasicType Type;
734 if (parseBasicType(C, Type)) {
735 switch (Type) {
736 case BasicType::I8:
737 case BasicType::I16:
738 case BasicType::I32:
739 case BasicType::I64:
740 case BasicType::I128:
741 case BasicType::ISize:
742 case BasicType::U8:
743 case BasicType::U16:
744 case BasicType::U32:
745 case BasicType::U64:
746 case BasicType::U128:
747 case BasicType::USize:
748 demangleConstInt();
749 break;
750 case BasicType::Bool:
751 demangleConstBool();
752 break;
753 case BasicType::Char:
754 demangleConstChar();
755 break;
756 case BasicType::Placeholder:
757 print('_');
758 break;
759 default:
760 Error = true;
761 break;
762 }
763 } else if (C == 'B') {
764 demangleBackref([&] { demangleConst(); });
765 } else {
766 Error = true;
767 }
768 }
769
770 // <const-data> = ["n"] <hex-number>
demangleConstInt()771 void Demangler::demangleConstInt() {
772 if (consumeIf('n'))
773 print('-');
774
775 std::string_view HexDigits;
776 uint64_t Value = parseHexNumber(HexDigits);
777 if (HexDigits.size() <= 16) {
778 printDecimalNumber(Value);
779 } else {
780 print("0x");
781 print(HexDigits);
782 }
783 }
784
785 // <const-data> = "0_" // false
786 // | "1_" // true
demangleConstBool()787 void Demangler::demangleConstBool() {
788 std::string_view HexDigits;
789 parseHexNumber(HexDigits);
790 if (HexDigits == "0")
791 print("false");
792 else if (HexDigits == "1")
793 print("true");
794 else
795 Error = true;
796 }
797
798 /// Returns true if CodePoint represents a printable ASCII character.
isAsciiPrintable(uint64_t CodePoint)799 static bool isAsciiPrintable(uint64_t CodePoint) {
800 return 0x20 <= CodePoint && CodePoint <= 0x7e;
801 }
802
803 // <const-data> = <hex-number>
demangleConstChar()804 void Demangler::demangleConstChar() {
805 std::string_view HexDigits;
806 uint64_t CodePoint = parseHexNumber(HexDigits);
807 if (Error || HexDigits.size() > 6) {
808 Error = true;
809 return;
810 }
811
812 print("'");
813 switch (CodePoint) {
814 case '\t':
815 print(R"(\t)");
816 break;
817 case '\r':
818 print(R"(\r)");
819 break;
820 case '\n':
821 print(R"(\n)");
822 break;
823 case '\\':
824 print(R"(\\)");
825 break;
826 case '"':
827 print(R"(")");
828 break;
829 case '\'':
830 print(R"(\')");
831 break;
832 default:
833 if (isAsciiPrintable(CodePoint)) {
834 char C = CodePoint;
835 print(C);
836 } else {
837 print(R"(\u{)");
838 print(HexDigits);
839 print('}');
840 }
841 break;
842 }
843 print('\'');
844 }
845
846 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
parseIdentifier()847 Identifier Demangler::parseIdentifier() {
848 bool Punycode = consumeIf('u');
849 uint64_t Bytes = parseDecimalNumber();
850
851 // Underscore resolves the ambiguity when identifier starts with a decimal
852 // digit or another underscore.
853 consumeIf('_');
854
855 if (Error || Bytes > Input.size() - Position) {
856 Error = true;
857 return {};
858 }
859 std::string_view S = Input.substr(Position, Bytes);
860 Position += Bytes;
861
862 if (!std::all_of(S.begin(), S.end(), isValid)) {
863 Error = true;
864 return {};
865 }
866
867 return {S, Punycode};
868 }
869
870 // Parses optional base 62 number. The presence of a number is determined using
871 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
872 //
873 // This function is indended for parsing disambiguators and binders which when
874 // not present have their value interpreted as 0, and otherwise as decoded
875 // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
876 // 2. When "G" is absent value is 0.
parseOptionalBase62Number(char Tag)877 uint64_t Demangler::parseOptionalBase62Number(char Tag) {
878 if (!consumeIf(Tag))
879 return 0;
880
881 uint64_t N = parseBase62Number();
882 if (Error || !addAssign(N, 1))
883 return 0;
884
885 return N;
886 }
887
888 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
889 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
890 // "1_" encodes 2, etc.
891 //
892 // <base-62-number> = {<0-9a-zA-Z>} "_"
parseBase62Number()893 uint64_t Demangler::parseBase62Number() {
894 if (consumeIf('_'))
895 return 0;
896
897 uint64_t Value = 0;
898
899 while (true) {
900 uint64_t Digit;
901 char C = consume();
902
903 if (C == '_') {
904 break;
905 } else if (isDigit(C)) {
906 Digit = C - '0';
907 } else if (isLower(C)) {
908 Digit = 10 + (C - 'a');
909 } else if (isUpper(C)) {
910 Digit = 10 + 26 + (C - 'A');
911 } else {
912 Error = true;
913 return 0;
914 }
915
916 if (!mulAssign(Value, 62))
917 return 0;
918
919 if (!addAssign(Value, Digit))
920 return 0;
921 }
922
923 if (!addAssign(Value, 1))
924 return 0;
925
926 return Value;
927 }
928
929 // Parses a decimal number that had been encoded without any leading zeros.
930 //
931 // <decimal-number> = "0"
932 // | <1-9> {<0-9>}
parseDecimalNumber()933 uint64_t Demangler::parseDecimalNumber() {
934 char C = look();
935 if (!isDigit(C)) {
936 Error = true;
937 return 0;
938 }
939
940 if (C == '0') {
941 consume();
942 return 0;
943 }
944
945 uint64_t Value = 0;
946
947 while (isDigit(look())) {
948 if (!mulAssign(Value, 10)) {
949 Error = true;
950 return 0;
951 }
952
953 uint64_t D = consume() - '0';
954 if (!addAssign(Value, D))
955 return 0;
956 }
957
958 return Value;
959 }
960
961 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
962 // value and stores hex digits in HexDigits. The return value is unspecified if
963 // HexDigits.size() > 16.
964 //
965 // <hex-number> = "0_"
966 // | <1-9a-f> {<0-9a-f>} "_"
parseHexNumber(std::string_view & HexDigits)967 uint64_t Demangler::parseHexNumber(std::string_view &HexDigits) {
968 size_t Start = Position;
969 uint64_t Value = 0;
970
971 if (!isHexDigit(look()))
972 Error = true;
973
974 if (consumeIf('0')) {
975 if (!consumeIf('_'))
976 Error = true;
977 } else {
978 while (!Error && !consumeIf('_')) {
979 char C = consume();
980 Value *= 16;
981 if (isDigit(C))
982 Value += C - '0';
983 else if ('a' <= C && C <= 'f')
984 Value += 10 + (C - 'a');
985 else
986 Error = true;
987 }
988 }
989
990 if (Error) {
991 HexDigits = std::string_view();
992 return 0;
993 }
994
995 size_t End = Position - 1;
996 assert(Start < End);
997 HexDigits = Input.substr(Start, End - Start);
998 return Value;
999 }
1000
print(char C)1001 void Demangler::print(char C) {
1002 if (Error || !Print)
1003 return;
1004
1005 Output += C;
1006 }
1007
print(std::string_view S)1008 void Demangler::print(std::string_view S) {
1009 if (Error || !Print)
1010 return;
1011
1012 Output += S;
1013 }
1014
printDecimalNumber(uint64_t N)1015 void Demangler::printDecimalNumber(uint64_t N) {
1016 if (Error || !Print)
1017 return;
1018
1019 Output << N;
1020 }
1021
1022 // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
1023 // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
1024 // bound by one of the enclosing binders.
printLifetime(uint64_t Index)1025 void Demangler::printLifetime(uint64_t Index) {
1026 if (Index == 0) {
1027 print("'_");
1028 return;
1029 }
1030
1031 if (Index - 1 >= BoundLifetimes) {
1032 Error = true;
1033 return;
1034 }
1035
1036 uint64_t Depth = BoundLifetimes - Index;
1037 print('\'');
1038 if (Depth < 26) {
1039 char C = 'a' + Depth;
1040 print(C);
1041 } else {
1042 print('z');
1043 printDecimalNumber(Depth - 26 + 1);
1044 }
1045 }
1046
decodePunycodeDigit(char C,size_t & Value)1047 static inline bool decodePunycodeDigit(char C, size_t &Value) {
1048 if (isLower(C)) {
1049 Value = C - 'a';
1050 return true;
1051 }
1052
1053 if (isDigit(C)) {
1054 Value = 26 + (C - '0');
1055 return true;
1056 }
1057
1058 return false;
1059 }
1060
removeNullBytes(OutputBuffer & Output,size_t StartIdx)1061 static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
1062 char *Buffer = Output.getBuffer();
1063 char *Start = Buffer + StartIdx;
1064 char *End = Buffer + Output.getCurrentPosition();
1065 Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
1066 }
1067
1068 // Encodes code point as UTF-8 and stores results in Output. Returns false if
1069 // CodePoint is not a valid unicode scalar value.
encodeUTF8(size_t CodePoint,char * Output)1070 static inline bool encodeUTF8(size_t CodePoint, char *Output) {
1071 if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
1072 return false;
1073
1074 if (CodePoint <= 0x7F) {
1075 Output[0] = CodePoint;
1076 return true;
1077 }
1078
1079 if (CodePoint <= 0x7FF) {
1080 Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
1081 Output[1] = 0x80 | (CodePoint & 0x3F);
1082 return true;
1083 }
1084
1085 if (CodePoint <= 0xFFFF) {
1086 Output[0] = 0xE0 | (CodePoint >> 12);
1087 Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
1088 Output[2] = 0x80 | (CodePoint & 0x3F);
1089 return true;
1090 }
1091
1092 if (CodePoint <= 0x10FFFF) {
1093 Output[0] = 0xF0 | (CodePoint >> 18);
1094 Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
1095 Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
1096 Output[3] = 0x80 | (CodePoint & 0x3F);
1097 return true;
1098 }
1099
1100 return false;
1101 }
1102
1103 // Decodes string encoded using punycode and appends results to Output.
1104 // Returns true if decoding was successful.
decodePunycode(std::string_view Input,OutputBuffer & Output)1105 static bool decodePunycode(std::string_view Input, OutputBuffer &Output) {
1106 size_t OutputSize = Output.getCurrentPosition();
1107 size_t InputIdx = 0;
1108
1109 // Rust uses an underscore as a delimiter.
1110 size_t DelimiterPos = std::string_view::npos;
1111 for (size_t I = 0; I != Input.size(); ++I)
1112 if (Input[I] == '_')
1113 DelimiterPos = I;
1114
1115 if (DelimiterPos != std::string_view::npos) {
1116 // Copy basic code points before the last delimiter to the output.
1117 for (; InputIdx != DelimiterPos; ++InputIdx) {
1118 char C = Input[InputIdx];
1119 if (!isValid(C))
1120 return false;
1121 // Code points are padded with zeros while decoding is in progress.
1122 char UTF8[4] = {C};
1123 Output += std::string_view(UTF8, 4);
1124 }
1125 // Skip over the delimiter.
1126 ++InputIdx;
1127 }
1128
1129 size_t Base = 36;
1130 size_t Skew = 38;
1131 size_t Bias = 72;
1132 size_t N = 0x80;
1133 size_t TMin = 1;
1134 size_t TMax = 26;
1135 size_t Damp = 700;
1136
1137 auto Adapt = [&](size_t Delta, size_t NumPoints) {
1138 Delta /= Damp;
1139 Delta += Delta / NumPoints;
1140 Damp = 2;
1141
1142 size_t K = 0;
1143 while (Delta > (Base - TMin) * TMax / 2) {
1144 Delta /= Base - TMin;
1145 K += Base;
1146 }
1147 return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
1148 };
1149
1150 // Main decoding loop.
1151 for (size_t I = 0; InputIdx != Input.size(); I += 1) {
1152 size_t OldI = I;
1153 size_t W = 1;
1154 size_t Max = std::numeric_limits<size_t>::max();
1155 for (size_t K = Base; true; K += Base) {
1156 if (InputIdx == Input.size())
1157 return false;
1158 char C = Input[InputIdx++];
1159 size_t Digit = 0;
1160 if (!decodePunycodeDigit(C, Digit))
1161 return false;
1162
1163 if (Digit > (Max - I) / W)
1164 return false;
1165 I += Digit * W;
1166
1167 size_t T;
1168 if (K <= Bias)
1169 T = TMin;
1170 else if (K >= Bias + TMax)
1171 T = TMax;
1172 else
1173 T = K - Bias;
1174
1175 if (Digit < T)
1176 break;
1177
1178 if (W > Max / (Base - T))
1179 return false;
1180 W *= (Base - T);
1181 }
1182 size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
1183 Bias = Adapt(I - OldI, NumPoints);
1184
1185 if (I / NumPoints > Max - N)
1186 return false;
1187 N += I / NumPoints;
1188 I = I % NumPoints;
1189
1190 // Insert N at position I in the output.
1191 char UTF8[4] = {};
1192 if (!encodeUTF8(N, UTF8))
1193 return false;
1194 Output.insert(OutputSize + I * 4, UTF8, 4);
1195 }
1196
1197 removeNullBytes(Output, OutputSize);
1198 return true;
1199 }
1200
printIdentifier(Identifier Ident)1201 void Demangler::printIdentifier(Identifier Ident) {
1202 if (Error || !Print)
1203 return;
1204
1205 if (Ident.Punycode) {
1206 if (!decodePunycode(Ident.Name, Output))
1207 Error = true;
1208 } else {
1209 print(Ident.Name);
1210 }
1211 }
1212
look() const1213 char Demangler::look() const {
1214 if (Error || Position >= Input.size())
1215 return 0;
1216
1217 return Input[Position];
1218 }
1219
consume()1220 char Demangler::consume() {
1221 if (Error || Position >= Input.size()) {
1222 Error = true;
1223 return 0;
1224 }
1225
1226 return Input[Position++];
1227 }
1228
consumeIf(char Prefix)1229 bool Demangler::consumeIf(char Prefix) {
1230 if (Error || Position >= Input.size() || Input[Position] != Prefix)
1231 return false;
1232
1233 Position += 1;
1234 return true;
1235 }
1236
1237 /// Computes A + B. When computation wraps around sets the error and returns
1238 /// false. Otherwise assigns the result to A and returns true.
addAssign(uint64_t & A,uint64_t B)1239 bool Demangler::addAssign(uint64_t &A, uint64_t B) {
1240 if (A > std::numeric_limits<uint64_t>::max() - B) {
1241 Error = true;
1242 return false;
1243 }
1244
1245 A += B;
1246 return true;
1247 }
1248
1249 /// Computes A * B. When computation wraps around sets the error and returns
1250 /// false. Otherwise assigns the result to A and returns true.
mulAssign(uint64_t & A,uint64_t B)1251 bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
1252 if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
1253 Error = true;
1254 return false;
1255 }
1256
1257 A *= B;
1258 return true;
1259 }
1260