xref: /freebsd/contrib/llvm-project/llvm/lib/Demangle/RustDemangle.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for Rust v0 mangled symbols as specified in
10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Demangle/Demangle.h"
15 #include "llvm/Demangle/StringViewExtras.h"
16 #include "llvm/Demangle/Utility.h"
17 
18 #include <algorithm>
19 #include <cassert>
20 #include <cstdint>
21 #include <cstring>
22 #include <limits>
23 #include <string_view>
24 
25 using namespace llvm;
26 
27 using llvm::itanium_demangle::OutputBuffer;
28 using llvm::itanium_demangle::ScopedOverride;
29 using llvm::itanium_demangle::starts_with;
30 
31 namespace {
32 
33 struct Identifier {
34   std::string_view Name;
35   bool Punycode;
36 
empty__anon406d1ba70111::Identifier37   bool empty() const { return Name.empty(); }
38 };
39 
40 enum class BasicType {
41   Bool,
42   Char,
43   I8,
44   I16,
45   I32,
46   I64,
47   I128,
48   ISize,
49   U8,
50   U16,
51   U32,
52   U64,
53   U128,
54   USize,
55   F32,
56   F64,
57   Str,
58   Placeholder,
59   Unit,
60   Variadic,
61   Never,
62 };
63 
64 enum class IsInType {
65   No,
66   Yes,
67 };
68 
69 enum class LeaveGenericsOpen {
70   No,
71   Yes,
72 };
73 
74 class Demangler {
75   // Maximum recursion level. Used to avoid stack overflow.
76   size_t MaxRecursionLevel;
77   // Current recursion level.
78   size_t RecursionLevel;
79   size_t BoundLifetimes;
80   // Input string that is being demangled with "_R" prefix removed.
81   std::string_view Input;
82   // Position in the input string.
83   size_t Position;
84   // When true, print methods append the output to the stream.
85   // When false, the output is suppressed.
86   bool Print;
87   // True if an error occurred.
88   bool Error;
89 
90 public:
91   // Demangled output.
92   OutputBuffer Output;
93 
94   Demangler(size_t MaxRecursionLevel = 500);
95 
96   bool demangle(std::string_view MangledName);
97 
98 private:
99   bool demanglePath(IsInType Type,
100                     LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
101   void demangleImplPath(IsInType InType);
102   void demangleGenericArg();
103   void demangleType();
104   void demangleFnSig();
105   void demangleDynBounds();
106   void demangleDynTrait();
107   void demangleOptionalBinder();
108   void demangleConst();
109   void demangleConstInt();
110   void demangleConstBool();
111   void demangleConstChar();
112 
demangleBackref(Callable Demangler)113   template <typename Callable> void demangleBackref(Callable Demangler) {
114     uint64_t Backref = parseBase62Number();
115     if (Error || Backref >= Position) {
116       Error = true;
117       return;
118     }
119 
120     if (!Print)
121       return;
122 
123     ScopedOverride<size_t> SavePosition(Position, Position);
124     Position = Backref;
125     Demangler();
126   }
127 
128   Identifier parseIdentifier();
129   uint64_t parseOptionalBase62Number(char Tag);
130   uint64_t parseBase62Number();
131   uint64_t parseDecimalNumber();
132   uint64_t parseHexNumber(std::string_view &HexDigits);
133 
134   void print(char C);
135   void print(std::string_view S);
136   void printDecimalNumber(uint64_t N);
137   void printBasicType(BasicType);
138   void printLifetime(uint64_t Index);
139   void printIdentifier(Identifier Ident);
140 
141   char look() const;
142   char consume();
143   bool consumeIf(char Prefix);
144 
145   bool addAssign(uint64_t &A, uint64_t B);
146   bool mulAssign(uint64_t &A, uint64_t B);
147 };
148 
149 } // namespace
150 
rustDemangle(std::string_view MangledName)151 char *llvm::rustDemangle(std::string_view MangledName) {
152   // Return early if mangled name doesn't look like a Rust symbol.
153   if (MangledName.empty() || !starts_with(MangledName, "_R"))
154     return nullptr;
155 
156   Demangler D;
157   if (!D.demangle(MangledName)) {
158     std::free(D.Output.getBuffer());
159     return nullptr;
160   }
161 
162   D.Output += '\0';
163 
164   return D.Output.getBuffer();
165 }
166 
Demangler(size_t MaxRecursionLevel)167 Demangler::Demangler(size_t MaxRecursionLevel)
168     : MaxRecursionLevel(MaxRecursionLevel) {}
169 
isDigit(const char C)170 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
171 
isHexDigit(const char C)172 static inline bool isHexDigit(const char C) {
173   return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
174 }
175 
isLower(const char C)176 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
177 
isUpper(const char C)178 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
179 
180 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
isValid(const char C)181 static inline bool isValid(const char C) {
182   return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
183 }
184 
185 // Demangles Rust v0 mangled symbol. Returns true when successful, and false
186 // otherwise. The demangled symbol is stored in Output field. It is
187 // responsibility of the caller to free the memory behind the output stream.
188 //
189 // <symbol-name> = "_R" <path> [<instantiating-crate>]
demangle(std::string_view Mangled)190 bool Demangler::demangle(std::string_view Mangled) {
191   Position = 0;
192   Error = false;
193   Print = true;
194   RecursionLevel = 0;
195   BoundLifetimes = 0;
196 
197   if (!starts_with(Mangled, "_R")) {
198     Error = true;
199     return false;
200   }
201   Mangled.remove_prefix(2);
202   size_t Dot = Mangled.find('.');
203   Input = Dot == std::string_view::npos ? Mangled : Mangled.substr(0, Dot);
204 
205   demanglePath(IsInType::No);
206 
207   if (Position != Input.size()) {
208     ScopedOverride<bool> SavePrint(Print, false);
209     demanglePath(IsInType::No);
210   }
211 
212   if (Position != Input.size())
213     Error = true;
214 
215   if (Dot != std::string_view::npos) {
216     print(" (");
217     print(Mangled.substr(Dot));
218     print(")");
219   }
220 
221   return !Error;
222 }
223 
224 // Demangles a path. InType indicates whether a path is inside a type. When
225 // LeaveOpen is true, a closing `>` after generic arguments is omitted from the
226 // output. Return value indicates whether generics arguments have been left
227 // open.
228 //
229 // <path> = "C" <identifier>               // crate root
230 //        | "M" <impl-path> <type>         // <T> (inherent impl)
231 //        | "X" <impl-path> <type> <path>  // <T as Trait> (trait impl)
232 //        | "Y" <type> <path>              // <T as Trait> (trait definition)
233 //        | "N" <ns> <path> <identifier>   // ...::ident (nested path)
234 //        | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
235 //        | <backref>
236 // <identifier> = [<disambiguator>] <undisambiguated-identifier>
237 // <ns> = "C"      // closure
238 //      | "S"      // shim
239 //      | <A-Z>    // other special namespaces
240 //      | <a-z>    // internal namespaces
demanglePath(IsInType InType,LeaveGenericsOpen LeaveOpen)241 bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
242   if (Error || RecursionLevel >= MaxRecursionLevel) {
243     Error = true;
244     return false;
245   }
246   ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
247 
248   switch (consume()) {
249   case 'C': {
250     parseOptionalBase62Number('s');
251     printIdentifier(parseIdentifier());
252     break;
253   }
254   case 'M': {
255     demangleImplPath(InType);
256     print("<");
257     demangleType();
258     print(">");
259     break;
260   }
261   case 'X': {
262     demangleImplPath(InType);
263     print("<");
264     demangleType();
265     print(" as ");
266     demanglePath(IsInType::Yes);
267     print(">");
268     break;
269   }
270   case 'Y': {
271     print("<");
272     demangleType();
273     print(" as ");
274     demanglePath(IsInType::Yes);
275     print(">");
276     break;
277   }
278   case 'N': {
279     char NS = consume();
280     if (!isLower(NS) && !isUpper(NS)) {
281       Error = true;
282       break;
283     }
284     demanglePath(InType);
285 
286     uint64_t Disambiguator = parseOptionalBase62Number('s');
287     Identifier Ident = parseIdentifier();
288 
289     if (isUpper(NS)) {
290       // Special namespaces
291       print("::{");
292       if (NS == 'C')
293         print("closure");
294       else if (NS == 'S')
295         print("shim");
296       else
297         print(NS);
298       if (!Ident.empty()) {
299         print(":");
300         printIdentifier(Ident);
301       }
302       print('#');
303       printDecimalNumber(Disambiguator);
304       print('}');
305     } else {
306       // Implementation internal namespaces.
307       if (!Ident.empty()) {
308         print("::");
309         printIdentifier(Ident);
310       }
311     }
312     break;
313   }
314   case 'I': {
315     demanglePath(InType);
316     // Omit "::" when in a type, where it is optional.
317     if (InType == IsInType::No)
318       print("::");
319     print("<");
320     for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
321       if (I > 0)
322         print(", ");
323       demangleGenericArg();
324     }
325     if (LeaveOpen == LeaveGenericsOpen::Yes)
326       return true;
327     else
328       print(">");
329     break;
330   }
331   case 'B': {
332     bool IsOpen = false;
333     demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
334     return IsOpen;
335   }
336   default:
337     Error = true;
338     break;
339   }
340 
341   return false;
342 }
343 
344 // <impl-path> = [<disambiguator>] <path>
345 // <disambiguator> = "s" <base-62-number>
demangleImplPath(IsInType InType)346 void Demangler::demangleImplPath(IsInType InType) {
347   ScopedOverride<bool> SavePrint(Print, false);
348   parseOptionalBase62Number('s');
349   demanglePath(InType);
350 }
351 
352 // <generic-arg> = <lifetime>
353 //               | <type>
354 //               | "K" <const>
355 // <lifetime> = "L" <base-62-number>
demangleGenericArg()356 void Demangler::demangleGenericArg() {
357   if (consumeIf('L'))
358     printLifetime(parseBase62Number());
359   else if (consumeIf('K'))
360     demangleConst();
361   else
362     demangleType();
363 }
364 
365 // <basic-type> = "a"      // i8
366 //              | "b"      // bool
367 //              | "c"      // char
368 //              | "d"      // f64
369 //              | "e"      // str
370 //              | "f"      // f32
371 //              | "h"      // u8
372 //              | "i"      // isize
373 //              | "j"      // usize
374 //              | "l"      // i32
375 //              | "m"      // u32
376 //              | "n"      // i128
377 //              | "o"      // u128
378 //              | "s"      // i16
379 //              | "t"      // u16
380 //              | "u"      // ()
381 //              | "v"      // ...
382 //              | "x"      // i64
383 //              | "y"      // u64
384 //              | "z"      // !
385 //              | "p"      // placeholder (e.g. for generic params), shown as _
parseBasicType(char C,BasicType & Type)386 static bool parseBasicType(char C, BasicType &Type) {
387   switch (C) {
388   case 'a':
389     Type = BasicType::I8;
390     return true;
391   case 'b':
392     Type = BasicType::Bool;
393     return true;
394   case 'c':
395     Type = BasicType::Char;
396     return true;
397   case 'd':
398     Type = BasicType::F64;
399     return true;
400   case 'e':
401     Type = BasicType::Str;
402     return true;
403   case 'f':
404     Type = BasicType::F32;
405     return true;
406   case 'h':
407     Type = BasicType::U8;
408     return true;
409   case 'i':
410     Type = BasicType::ISize;
411     return true;
412   case 'j':
413     Type = BasicType::USize;
414     return true;
415   case 'l':
416     Type = BasicType::I32;
417     return true;
418   case 'm':
419     Type = BasicType::U32;
420     return true;
421   case 'n':
422     Type = BasicType::I128;
423     return true;
424   case 'o':
425     Type = BasicType::U128;
426     return true;
427   case 'p':
428     Type = BasicType::Placeholder;
429     return true;
430   case 's':
431     Type = BasicType::I16;
432     return true;
433   case 't':
434     Type = BasicType::U16;
435     return true;
436   case 'u':
437     Type = BasicType::Unit;
438     return true;
439   case 'v':
440     Type = BasicType::Variadic;
441     return true;
442   case 'x':
443     Type = BasicType::I64;
444     return true;
445   case 'y':
446     Type = BasicType::U64;
447     return true;
448   case 'z':
449     Type = BasicType::Never;
450     return true;
451   default:
452     return false;
453   }
454 }
455 
printBasicType(BasicType Type)456 void Demangler::printBasicType(BasicType Type) {
457   switch (Type) {
458   case BasicType::Bool:
459     print("bool");
460     break;
461   case BasicType::Char:
462     print("char");
463     break;
464   case BasicType::I8:
465     print("i8");
466     break;
467   case BasicType::I16:
468     print("i16");
469     break;
470   case BasicType::I32:
471     print("i32");
472     break;
473   case BasicType::I64:
474     print("i64");
475     break;
476   case BasicType::I128:
477     print("i128");
478     break;
479   case BasicType::ISize:
480     print("isize");
481     break;
482   case BasicType::U8:
483     print("u8");
484     break;
485   case BasicType::U16:
486     print("u16");
487     break;
488   case BasicType::U32:
489     print("u32");
490     break;
491   case BasicType::U64:
492     print("u64");
493     break;
494   case BasicType::U128:
495     print("u128");
496     break;
497   case BasicType::USize:
498     print("usize");
499     break;
500   case BasicType::F32:
501     print("f32");
502     break;
503   case BasicType::F64:
504     print("f64");
505     break;
506   case BasicType::Str:
507     print("str");
508     break;
509   case BasicType::Placeholder:
510     print("_");
511     break;
512   case BasicType::Unit:
513     print("()");
514     break;
515   case BasicType::Variadic:
516     print("...");
517     break;
518   case BasicType::Never:
519     print("!");
520     break;
521   }
522 }
523 
524 // <type> = | <basic-type>
525 //          | <path>                      // named type
526 //          | "A" <type> <const>          // [T; N]
527 //          | "S" <type>                  // [T]
528 //          | "T" {<type>} "E"            // (T1, T2, T3, ...)
529 //          | "R" [<lifetime>] <type>     // &T
530 //          | "Q" [<lifetime>] <type>     // &mut T
531 //          | "P" <type>                  // *const T
532 //          | "O" <type>                  // *mut T
533 //          | "F" <fn-sig>                // fn(...) -> ...
534 //          | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
535 //          | <backref>                   // backref
demangleType()536 void Demangler::demangleType() {
537   if (Error || RecursionLevel >= MaxRecursionLevel) {
538     Error = true;
539     return;
540   }
541   ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
542 
543   size_t Start = Position;
544   char C = consume();
545   BasicType Type;
546   if (parseBasicType(C, Type))
547     return printBasicType(Type);
548 
549   switch (C) {
550   case 'A':
551     print("[");
552     demangleType();
553     print("; ");
554     demangleConst();
555     print("]");
556     break;
557   case 'S':
558     print("[");
559     demangleType();
560     print("]");
561     break;
562   case 'T': {
563     print("(");
564     size_t I = 0;
565     for (; !Error && !consumeIf('E'); ++I) {
566       if (I > 0)
567         print(", ");
568       demangleType();
569     }
570     if (I == 1)
571       print(",");
572     print(")");
573     break;
574   }
575   case 'R':
576   case 'Q':
577     print('&');
578     if (consumeIf('L')) {
579       if (auto Lifetime = parseBase62Number()) {
580         printLifetime(Lifetime);
581         print(' ');
582       }
583     }
584     if (C == 'Q')
585       print("mut ");
586     demangleType();
587     break;
588   case 'P':
589     print("*const ");
590     demangleType();
591     break;
592   case 'O':
593     print("*mut ");
594     demangleType();
595     break;
596   case 'F':
597     demangleFnSig();
598     break;
599   case 'D':
600     demangleDynBounds();
601     if (consumeIf('L')) {
602       if (auto Lifetime = parseBase62Number()) {
603         print(" + ");
604         printLifetime(Lifetime);
605       }
606     } else {
607       Error = true;
608     }
609     break;
610   case 'B':
611     demangleBackref([&] { demangleType(); });
612     break;
613   default:
614     Position = Start;
615     demanglePath(IsInType::Yes);
616     break;
617   }
618 }
619 
620 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
621 // <abi> = "C"
622 //       | <undisambiguated-identifier>
demangleFnSig()623 void Demangler::demangleFnSig() {
624   ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
625   demangleOptionalBinder();
626 
627   if (consumeIf('U'))
628     print("unsafe ");
629 
630   if (consumeIf('K')) {
631     print("extern \"");
632     if (consumeIf('C')) {
633       print("C");
634     } else {
635       Identifier Ident = parseIdentifier();
636       if (Ident.Punycode)
637         Error = true;
638       for (char C : Ident.Name) {
639         // When mangling ABI string, the "-" is replaced with "_".
640         if (C == '_')
641           C = '-';
642         print(C);
643       }
644     }
645     print("\" ");
646   }
647 
648   print("fn(");
649   for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
650     if (I > 0)
651       print(", ");
652     demangleType();
653   }
654   print(")");
655 
656   if (consumeIf('u')) {
657     // Skip the unit type from the output.
658   } else {
659     print(" -> ");
660     demangleType();
661   }
662 }
663 
664 // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
demangleDynBounds()665 void Demangler::demangleDynBounds() {
666   ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
667   print("dyn ");
668   demangleOptionalBinder();
669   for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
670     if (I > 0)
671       print(" + ");
672     demangleDynTrait();
673   }
674 }
675 
676 // <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
677 // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
demangleDynTrait()678 void Demangler::demangleDynTrait() {
679   bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
680   while (!Error && consumeIf('p')) {
681     if (!IsOpen) {
682       IsOpen = true;
683       print('<');
684     } else {
685       print(", ");
686     }
687     print(parseIdentifier().Name);
688     print(" = ");
689     demangleType();
690   }
691   if (IsOpen)
692     print(">");
693 }
694 
695 // Demangles optional binder and updates the number of bound lifetimes.
696 //
697 // <binder> = "G" <base-62-number>
demangleOptionalBinder()698 void Demangler::demangleOptionalBinder() {
699   uint64_t Binder = parseOptionalBase62Number('G');
700   if (Error || Binder == 0)
701     return;
702 
703   // In valid inputs each bound lifetime is referenced later. Referencing a
704   // lifetime requires at least one byte of input. Reject inputs that are too
705   // short to reference all bound lifetimes. Otherwise demangling of invalid
706   // binders could generate excessive amounts of output.
707   if (Binder >= Input.size() - BoundLifetimes) {
708     Error = true;
709     return;
710   }
711 
712   print("for<");
713   for (size_t I = 0; I != Binder; ++I) {
714     BoundLifetimes += 1;
715     if (I > 0)
716       print(", ");
717     printLifetime(1);
718   }
719   print("> ");
720 }
721 
722 // <const> = <basic-type> <const-data>
723 //         | "p"                          // placeholder
724 //         | <backref>
demangleConst()725 void Demangler::demangleConst() {
726   if (Error || RecursionLevel >= MaxRecursionLevel) {
727     Error = true;
728     return;
729   }
730   ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
731 
732   char C = consume();
733   BasicType Type;
734   if (parseBasicType(C, Type)) {
735     switch (Type) {
736     case BasicType::I8:
737     case BasicType::I16:
738     case BasicType::I32:
739     case BasicType::I64:
740     case BasicType::I128:
741     case BasicType::ISize:
742     case BasicType::U8:
743     case BasicType::U16:
744     case BasicType::U32:
745     case BasicType::U64:
746     case BasicType::U128:
747     case BasicType::USize:
748       demangleConstInt();
749       break;
750     case BasicType::Bool:
751       demangleConstBool();
752       break;
753     case BasicType::Char:
754       demangleConstChar();
755       break;
756     case BasicType::Placeholder:
757       print('_');
758       break;
759     default:
760       Error = true;
761       break;
762     }
763   } else if (C == 'B') {
764     demangleBackref([&] { demangleConst(); });
765   } else {
766     Error = true;
767   }
768 }
769 
770 // <const-data> = ["n"] <hex-number>
demangleConstInt()771 void Demangler::demangleConstInt() {
772   if (consumeIf('n'))
773     print('-');
774 
775   std::string_view HexDigits;
776   uint64_t Value = parseHexNumber(HexDigits);
777   if (HexDigits.size() <= 16) {
778     printDecimalNumber(Value);
779   } else {
780     print("0x");
781     print(HexDigits);
782   }
783 }
784 
785 // <const-data> = "0_" // false
786 //              | "1_" // true
demangleConstBool()787 void Demangler::demangleConstBool() {
788   std::string_view HexDigits;
789   parseHexNumber(HexDigits);
790   if (HexDigits == "0")
791     print("false");
792   else if (HexDigits == "1")
793     print("true");
794   else
795     Error = true;
796 }
797 
798 /// Returns true if CodePoint represents a printable ASCII character.
isAsciiPrintable(uint64_t CodePoint)799 static bool isAsciiPrintable(uint64_t CodePoint) {
800   return 0x20 <= CodePoint && CodePoint <= 0x7e;
801 }
802 
803 // <const-data> = <hex-number>
demangleConstChar()804 void Demangler::demangleConstChar() {
805   std::string_view HexDigits;
806   uint64_t CodePoint = parseHexNumber(HexDigits);
807   if (Error || HexDigits.size() > 6) {
808     Error = true;
809     return;
810   }
811 
812   print("'");
813   switch (CodePoint) {
814   case '\t':
815     print(R"(\t)");
816     break;
817   case '\r':
818     print(R"(\r)");
819     break;
820   case '\n':
821     print(R"(\n)");
822     break;
823   case '\\':
824     print(R"(\\)");
825     break;
826   case '"':
827     print(R"(")");
828     break;
829   case '\'':
830     print(R"(\')");
831     break;
832   default:
833     if (isAsciiPrintable(CodePoint)) {
834       char C = CodePoint;
835       print(C);
836     } else {
837       print(R"(\u{)");
838       print(HexDigits);
839       print('}');
840     }
841     break;
842   }
843   print('\'');
844 }
845 
846 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
parseIdentifier()847 Identifier Demangler::parseIdentifier() {
848   bool Punycode = consumeIf('u');
849   uint64_t Bytes = parseDecimalNumber();
850 
851   // Underscore resolves the ambiguity when identifier starts with a decimal
852   // digit or another underscore.
853   consumeIf('_');
854 
855   if (Error || Bytes > Input.size() - Position) {
856     Error = true;
857     return {};
858   }
859   std::string_view S = Input.substr(Position, Bytes);
860   Position += Bytes;
861 
862   if (!std::all_of(S.begin(), S.end(), isValid)) {
863     Error = true;
864     return {};
865   }
866 
867   return {S, Punycode};
868 }
869 
870 // Parses optional base 62 number. The presence of a number is determined using
871 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
872 //
873 // This function is indended for parsing disambiguators and binders which when
874 // not present have their value interpreted as 0, and otherwise as decoded
875 // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
876 // 2. When "G" is absent value is 0.
parseOptionalBase62Number(char Tag)877 uint64_t Demangler::parseOptionalBase62Number(char Tag) {
878   if (!consumeIf(Tag))
879     return 0;
880 
881   uint64_t N = parseBase62Number();
882   if (Error || !addAssign(N, 1))
883     return 0;
884 
885   return N;
886 }
887 
888 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
889 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
890 // "1_" encodes 2, etc.
891 //
892 // <base-62-number> = {<0-9a-zA-Z>} "_"
parseBase62Number()893 uint64_t Demangler::parseBase62Number() {
894   if (consumeIf('_'))
895     return 0;
896 
897   uint64_t Value = 0;
898 
899   while (true) {
900     uint64_t Digit;
901     char C = consume();
902 
903     if (C == '_') {
904       break;
905     } else if (isDigit(C)) {
906       Digit = C - '0';
907     } else if (isLower(C)) {
908       Digit = 10 + (C - 'a');
909     } else if (isUpper(C)) {
910       Digit = 10 + 26 + (C - 'A');
911     } else {
912       Error = true;
913       return 0;
914     }
915 
916     if (!mulAssign(Value, 62))
917       return 0;
918 
919     if (!addAssign(Value, Digit))
920       return 0;
921   }
922 
923   if (!addAssign(Value, 1))
924     return 0;
925 
926   return Value;
927 }
928 
929 // Parses a decimal number that had been encoded without any leading zeros.
930 //
931 // <decimal-number> = "0"
932 //                  | <1-9> {<0-9>}
parseDecimalNumber()933 uint64_t Demangler::parseDecimalNumber() {
934   char C = look();
935   if (!isDigit(C)) {
936     Error = true;
937     return 0;
938   }
939 
940   if (C == '0') {
941     consume();
942     return 0;
943   }
944 
945   uint64_t Value = 0;
946 
947   while (isDigit(look())) {
948     if (!mulAssign(Value, 10)) {
949       Error = true;
950       return 0;
951     }
952 
953     uint64_t D = consume() - '0';
954     if (!addAssign(Value, D))
955       return 0;
956   }
957 
958   return Value;
959 }
960 
961 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
962 // value and stores hex digits in HexDigits. The return value is unspecified if
963 // HexDigits.size() > 16.
964 //
965 // <hex-number> = "0_"
966 //              | <1-9a-f> {<0-9a-f>} "_"
parseHexNumber(std::string_view & HexDigits)967 uint64_t Demangler::parseHexNumber(std::string_view &HexDigits) {
968   size_t Start = Position;
969   uint64_t Value = 0;
970 
971   if (!isHexDigit(look()))
972     Error = true;
973 
974   if (consumeIf('0')) {
975     if (!consumeIf('_'))
976       Error = true;
977   } else {
978     while (!Error && !consumeIf('_')) {
979       char C = consume();
980       Value *= 16;
981       if (isDigit(C))
982         Value += C - '0';
983       else if ('a' <= C && C <= 'f')
984         Value += 10 + (C - 'a');
985       else
986         Error = true;
987     }
988   }
989 
990   if (Error) {
991     HexDigits = std::string_view();
992     return 0;
993   }
994 
995   size_t End = Position - 1;
996   assert(Start < End);
997   HexDigits = Input.substr(Start, End - Start);
998   return Value;
999 }
1000 
print(char C)1001 void Demangler::print(char C) {
1002   if (Error || !Print)
1003     return;
1004 
1005   Output += C;
1006 }
1007 
print(std::string_view S)1008 void Demangler::print(std::string_view S) {
1009   if (Error || !Print)
1010     return;
1011 
1012   Output += S;
1013 }
1014 
printDecimalNumber(uint64_t N)1015 void Demangler::printDecimalNumber(uint64_t N) {
1016   if (Error || !Print)
1017     return;
1018 
1019   Output << N;
1020 }
1021 
1022 // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
1023 // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
1024 // bound by one of the enclosing binders.
printLifetime(uint64_t Index)1025 void Demangler::printLifetime(uint64_t Index) {
1026   if (Index == 0) {
1027     print("'_");
1028     return;
1029   }
1030 
1031   if (Index - 1 >= BoundLifetimes) {
1032     Error = true;
1033     return;
1034   }
1035 
1036   uint64_t Depth = BoundLifetimes - Index;
1037   print('\'');
1038   if (Depth < 26) {
1039     char C = 'a' + Depth;
1040     print(C);
1041   } else {
1042     print('z');
1043     printDecimalNumber(Depth - 26 + 1);
1044   }
1045 }
1046 
decodePunycodeDigit(char C,size_t & Value)1047 static inline bool decodePunycodeDigit(char C, size_t &Value) {
1048   if (isLower(C)) {
1049     Value = C - 'a';
1050     return true;
1051   }
1052 
1053   if (isDigit(C)) {
1054     Value = 26 + (C - '0');
1055     return true;
1056   }
1057 
1058   return false;
1059 }
1060 
removeNullBytes(OutputBuffer & Output,size_t StartIdx)1061 static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
1062   char *Buffer = Output.getBuffer();
1063   char *Start = Buffer + StartIdx;
1064   char *End = Buffer + Output.getCurrentPosition();
1065   Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
1066 }
1067 
1068 // Encodes code point as UTF-8 and stores results in Output. Returns false if
1069 // CodePoint is not a valid unicode scalar value.
encodeUTF8(size_t CodePoint,char * Output)1070 static inline bool encodeUTF8(size_t CodePoint, char *Output) {
1071   if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
1072     return false;
1073 
1074   if (CodePoint <= 0x7F) {
1075     Output[0] = CodePoint;
1076     return true;
1077   }
1078 
1079   if (CodePoint <= 0x7FF) {
1080     Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
1081     Output[1] = 0x80 | (CodePoint & 0x3F);
1082     return true;
1083   }
1084 
1085   if (CodePoint <= 0xFFFF) {
1086     Output[0] = 0xE0 | (CodePoint >> 12);
1087     Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
1088     Output[2] = 0x80 | (CodePoint & 0x3F);
1089     return true;
1090   }
1091 
1092   if (CodePoint <= 0x10FFFF) {
1093     Output[0] = 0xF0 | (CodePoint >> 18);
1094     Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
1095     Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
1096     Output[3] = 0x80 | (CodePoint & 0x3F);
1097     return true;
1098   }
1099 
1100   return false;
1101 }
1102 
1103 // Decodes string encoded using punycode and appends results to Output.
1104 // Returns true if decoding was successful.
decodePunycode(std::string_view Input,OutputBuffer & Output)1105 static bool decodePunycode(std::string_view Input, OutputBuffer &Output) {
1106   size_t OutputSize = Output.getCurrentPosition();
1107   size_t InputIdx = 0;
1108 
1109   // Rust uses an underscore as a delimiter.
1110   size_t DelimiterPos = std::string_view::npos;
1111   for (size_t I = 0; I != Input.size(); ++I)
1112     if (Input[I] == '_')
1113       DelimiterPos = I;
1114 
1115   if (DelimiterPos != std::string_view::npos) {
1116     // Copy basic code points before the last delimiter to the output.
1117     for (; InputIdx != DelimiterPos; ++InputIdx) {
1118       char C = Input[InputIdx];
1119       if (!isValid(C))
1120         return false;
1121       // Code points are padded with zeros while decoding is in progress.
1122       char UTF8[4] = {C};
1123       Output += std::string_view(UTF8, 4);
1124     }
1125     // Skip over the delimiter.
1126     ++InputIdx;
1127   }
1128 
1129   size_t Base = 36;
1130   size_t Skew = 38;
1131   size_t Bias = 72;
1132   size_t N = 0x80;
1133   size_t TMin = 1;
1134   size_t TMax = 26;
1135   size_t Damp = 700;
1136 
1137   auto Adapt = [&](size_t Delta, size_t NumPoints) {
1138     Delta /= Damp;
1139     Delta += Delta / NumPoints;
1140     Damp = 2;
1141 
1142     size_t K = 0;
1143     while (Delta > (Base - TMin) * TMax / 2) {
1144       Delta /= Base - TMin;
1145       K += Base;
1146     }
1147     return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
1148   };
1149 
1150   // Main decoding loop.
1151   for (size_t I = 0; InputIdx != Input.size(); I += 1) {
1152     size_t OldI = I;
1153     size_t W = 1;
1154     size_t Max = std::numeric_limits<size_t>::max();
1155     for (size_t K = Base; true; K += Base) {
1156       if (InputIdx == Input.size())
1157         return false;
1158       char C = Input[InputIdx++];
1159       size_t Digit = 0;
1160       if (!decodePunycodeDigit(C, Digit))
1161         return false;
1162 
1163       if (Digit > (Max - I) / W)
1164         return false;
1165       I += Digit * W;
1166 
1167       size_t T;
1168       if (K <= Bias)
1169         T = TMin;
1170       else if (K >= Bias + TMax)
1171         T = TMax;
1172       else
1173         T = K - Bias;
1174 
1175       if (Digit < T)
1176         break;
1177 
1178       if (W > Max / (Base - T))
1179         return false;
1180       W *= (Base - T);
1181     }
1182     size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
1183     Bias = Adapt(I - OldI, NumPoints);
1184 
1185     if (I / NumPoints > Max - N)
1186       return false;
1187     N += I / NumPoints;
1188     I = I % NumPoints;
1189 
1190     // Insert N at position I in the output.
1191     char UTF8[4] = {};
1192     if (!encodeUTF8(N, UTF8))
1193       return false;
1194     Output.insert(OutputSize + I * 4, UTF8, 4);
1195   }
1196 
1197   removeNullBytes(Output, OutputSize);
1198   return true;
1199 }
1200 
printIdentifier(Identifier Ident)1201 void Demangler::printIdentifier(Identifier Ident) {
1202   if (Error || !Print)
1203     return;
1204 
1205   if (Ident.Punycode) {
1206     if (!decodePunycode(Ident.Name, Output))
1207       Error = true;
1208   } else {
1209     print(Ident.Name);
1210   }
1211 }
1212 
look() const1213 char Demangler::look() const {
1214   if (Error || Position >= Input.size())
1215     return 0;
1216 
1217   return Input[Position];
1218 }
1219 
consume()1220 char Demangler::consume() {
1221   if (Error || Position >= Input.size()) {
1222     Error = true;
1223     return 0;
1224   }
1225 
1226   return Input[Position++];
1227 }
1228 
consumeIf(char Prefix)1229 bool Demangler::consumeIf(char Prefix) {
1230   if (Error || Position >= Input.size() || Input[Position] != Prefix)
1231     return false;
1232 
1233   Position += 1;
1234   return true;
1235 }
1236 
1237 /// Computes A + B. When computation wraps around sets the error and returns
1238 /// false. Otherwise assigns the result to A and returns true.
addAssign(uint64_t & A,uint64_t B)1239 bool Demangler::addAssign(uint64_t &A, uint64_t B) {
1240   if (A > std::numeric_limits<uint64_t>::max() - B) {
1241     Error = true;
1242     return false;
1243   }
1244 
1245   A += B;
1246   return true;
1247 }
1248 
1249 /// Computes A * B. When computation wraps around sets the error and returns
1250 /// false. Otherwise assigns the result to A and returns true.
mulAssign(uint64_t & A,uint64_t B)1251 bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
1252   if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
1253     Error = true;
1254     return false;
1255   }
1256 
1257   A *= B;
1258   return true;
1259 }
1260