xref: /freebsd/contrib/llvm-project/llvm/lib/Demangle/RustDemangle.cpp (revision 56f3f2d2491e30f369f9461c3cb2a366bdffbe1d)
1  //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This file defines a demangler for Rust v0 mangled symbols as specified in
10  // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11  //
12  //===----------------------------------------------------------------------===//
13  
14  #include "llvm/Demangle/Demangle.h"
15  #include "llvm/Demangle/StringView.h"
16  #include "llvm/Demangle/Utility.h"
17  
18  #include <algorithm>
19  #include <cassert>
20  #include <cstdint>
21  #include <cstring>
22  #include <limits>
23  
24  using namespace llvm;
25  
26  using llvm::itanium_demangle::OutputBuffer;
27  using llvm::itanium_demangle::ScopedOverride;
28  using llvm::itanium_demangle::StringView;
29  
30  namespace {
31  
32  struct Identifier {
33    StringView Name;
34    bool Punycode;
35  
36    bool empty() const { return Name.empty(); }
37  };
38  
39  enum class BasicType {
40    Bool,
41    Char,
42    I8,
43    I16,
44    I32,
45    I64,
46    I128,
47    ISize,
48    U8,
49    U16,
50    U32,
51    U64,
52    U128,
53    USize,
54    F32,
55    F64,
56    Str,
57    Placeholder,
58    Unit,
59    Variadic,
60    Never,
61  };
62  
63  enum class IsInType {
64    No,
65    Yes,
66  };
67  
68  enum class LeaveGenericsOpen {
69    No,
70    Yes,
71  };
72  
73  class Demangler {
74    // Maximum recursion level. Used to avoid stack overflow.
75    size_t MaxRecursionLevel;
76    // Current recursion level.
77    size_t RecursionLevel;
78    size_t BoundLifetimes;
79    // Input string that is being demangled with "_R" prefix removed.
80    StringView Input;
81    // Position in the input string.
82    size_t Position;
83    // When true, print methods append the output to the stream.
84    // When false, the output is suppressed.
85    bool Print;
86    // True if an error occurred.
87    bool Error;
88  
89  public:
90    // Demangled output.
91    OutputBuffer Output;
92  
93    Demangler(size_t MaxRecursionLevel = 500);
94  
95    bool demangle(StringView MangledName);
96  
97  private:
98    bool demanglePath(IsInType Type,
99                      LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
100    void demangleImplPath(IsInType InType);
101    void demangleGenericArg();
102    void demangleType();
103    void demangleFnSig();
104    void demangleDynBounds();
105    void demangleDynTrait();
106    void demangleOptionalBinder();
107    void demangleConst();
108    void demangleConstInt();
109    void demangleConstBool();
110    void demangleConstChar();
111  
112    template <typename Callable> void demangleBackref(Callable Demangler) {
113      uint64_t Backref = parseBase62Number();
114      if (Error || Backref >= Position) {
115        Error = true;
116        return;
117      }
118  
119      if (!Print)
120        return;
121  
122      ScopedOverride<size_t> SavePosition(Position, Position);
123      Position = Backref;
124      Demangler();
125    }
126  
127    Identifier parseIdentifier();
128    uint64_t parseOptionalBase62Number(char Tag);
129    uint64_t parseBase62Number();
130    uint64_t parseDecimalNumber();
131    uint64_t parseHexNumber(StringView &HexDigits);
132  
133    void print(char C);
134    void print(StringView S);
135    void printDecimalNumber(uint64_t N);
136    void printBasicType(BasicType);
137    void printLifetime(uint64_t Index);
138    void printIdentifier(Identifier Ident);
139  
140    char look() const;
141    char consume();
142    bool consumeIf(char Prefix);
143  
144    bool addAssign(uint64_t &A, uint64_t B);
145    bool mulAssign(uint64_t &A, uint64_t B);
146  };
147  
148  } // namespace
149  
150  char *llvm::rustDemangle(const char *MangledName) {
151    if (MangledName == nullptr)
152      return nullptr;
153  
154    // Return early if mangled name doesn't look like a Rust symbol.
155    StringView Mangled(MangledName);
156    if (!Mangled.startsWith("_R"))
157      return nullptr;
158  
159    Demangler D;
160    if (!D.demangle(Mangled)) {
161      std::free(D.Output.getBuffer());
162      return nullptr;
163    }
164  
165    D.Output += '\0';
166  
167    return D.Output.getBuffer();
168  }
169  
170  Demangler::Demangler(size_t MaxRecursionLevel)
171      : MaxRecursionLevel(MaxRecursionLevel) {}
172  
173  static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
174  
175  static inline bool isHexDigit(const char C) {
176    return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
177  }
178  
179  static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
180  
181  static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
182  
183  /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
184  static inline bool isValid(const char C) {
185    return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
186  }
187  
188  // Demangles Rust v0 mangled symbol. Returns true when successful, and false
189  // otherwise. The demangled symbol is stored in Output field. It is
190  // responsibility of the caller to free the memory behind the output stream.
191  //
192  // <symbol-name> = "_R" <path> [<instantiating-crate>]
193  bool Demangler::demangle(StringView Mangled) {
194    Position = 0;
195    Error = false;
196    Print = true;
197    RecursionLevel = 0;
198    BoundLifetimes = 0;
199  
200    if (!Mangled.consumeFront("_R")) {
201      Error = true;
202      return false;
203    }
204    size_t Dot = Mangled.find('.');
205    Input = Mangled.substr(0, Dot);
206    StringView Suffix = Mangled.dropFront(Dot);
207  
208    demanglePath(IsInType::No);
209  
210    if (Position != Input.size()) {
211      ScopedOverride<bool> SavePrint(Print, false);
212      demanglePath(IsInType::No);
213    }
214  
215    if (Position != Input.size())
216      Error = true;
217  
218    if (!Suffix.empty()) {
219      print(" (");
220      print(Suffix);
221      print(")");
222    }
223  
224    return !Error;
225  }
226  
227  // Demangles a path. InType indicates whether a path is inside a type. When
228  // LeaveOpen is true, a closing `>` after generic arguments is omitted from the
229  // output. Return value indicates whether generics arguments have been left
230  // open.
231  //
232  // <path> = "C" <identifier>               // crate root
233  //        | "M" <impl-path> <type>         // <T> (inherent impl)
234  //        | "X" <impl-path> <type> <path>  // <T as Trait> (trait impl)
235  //        | "Y" <type> <path>              // <T as Trait> (trait definition)
236  //        | "N" <ns> <path> <identifier>   // ...::ident (nested path)
237  //        | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
238  //        | <backref>
239  // <identifier> = [<disambiguator>] <undisambiguated-identifier>
240  // <ns> = "C"      // closure
241  //      | "S"      // shim
242  //      | <A-Z>    // other special namespaces
243  //      | <a-z>    // internal namespaces
244  bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
245    if (Error || RecursionLevel >= MaxRecursionLevel) {
246      Error = true;
247      return false;
248    }
249    ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
250  
251    switch (consume()) {
252    case 'C': {
253      parseOptionalBase62Number('s');
254      printIdentifier(parseIdentifier());
255      break;
256    }
257    case 'M': {
258      demangleImplPath(InType);
259      print("<");
260      demangleType();
261      print(">");
262      break;
263    }
264    case 'X': {
265      demangleImplPath(InType);
266      print("<");
267      demangleType();
268      print(" as ");
269      demanglePath(IsInType::Yes);
270      print(">");
271      break;
272    }
273    case 'Y': {
274      print("<");
275      demangleType();
276      print(" as ");
277      demanglePath(IsInType::Yes);
278      print(">");
279      break;
280    }
281    case 'N': {
282      char NS = consume();
283      if (!isLower(NS) && !isUpper(NS)) {
284        Error = true;
285        break;
286      }
287      demanglePath(InType);
288  
289      uint64_t Disambiguator = parseOptionalBase62Number('s');
290      Identifier Ident = parseIdentifier();
291  
292      if (isUpper(NS)) {
293        // Special namespaces
294        print("::{");
295        if (NS == 'C')
296          print("closure");
297        else if (NS == 'S')
298          print("shim");
299        else
300          print(NS);
301        if (!Ident.empty()) {
302          print(":");
303          printIdentifier(Ident);
304        }
305        print('#');
306        printDecimalNumber(Disambiguator);
307        print('}');
308      } else {
309        // Implementation internal namespaces.
310        if (!Ident.empty()) {
311          print("::");
312          printIdentifier(Ident);
313        }
314      }
315      break;
316    }
317    case 'I': {
318      demanglePath(InType);
319      // Omit "::" when in a type, where it is optional.
320      if (InType == IsInType::No)
321        print("::");
322      print("<");
323      for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
324        if (I > 0)
325          print(", ");
326        demangleGenericArg();
327      }
328      if (LeaveOpen == LeaveGenericsOpen::Yes)
329        return true;
330      else
331        print(">");
332      break;
333    }
334    case 'B': {
335      bool IsOpen = false;
336      demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
337      return IsOpen;
338    }
339    default:
340      Error = true;
341      break;
342    }
343  
344    return false;
345  }
346  
347  // <impl-path> = [<disambiguator>] <path>
348  // <disambiguator> = "s" <base-62-number>
349  void Demangler::demangleImplPath(IsInType InType) {
350    ScopedOverride<bool> SavePrint(Print, false);
351    parseOptionalBase62Number('s');
352    demanglePath(InType);
353  }
354  
355  // <generic-arg> = <lifetime>
356  //               | <type>
357  //               | "K" <const>
358  // <lifetime> = "L" <base-62-number>
359  void Demangler::demangleGenericArg() {
360    if (consumeIf('L'))
361      printLifetime(parseBase62Number());
362    else if (consumeIf('K'))
363      demangleConst();
364    else
365      demangleType();
366  }
367  
368  // <basic-type> = "a"      // i8
369  //              | "b"      // bool
370  //              | "c"      // char
371  //              | "d"      // f64
372  //              | "e"      // str
373  //              | "f"      // f32
374  //              | "h"      // u8
375  //              | "i"      // isize
376  //              | "j"      // usize
377  //              | "l"      // i32
378  //              | "m"      // u32
379  //              | "n"      // i128
380  //              | "o"      // u128
381  //              | "s"      // i16
382  //              | "t"      // u16
383  //              | "u"      // ()
384  //              | "v"      // ...
385  //              | "x"      // i64
386  //              | "y"      // u64
387  //              | "z"      // !
388  //              | "p"      // placeholder (e.g. for generic params), shown as _
389  static bool parseBasicType(char C, BasicType &Type) {
390    switch (C) {
391    case 'a':
392      Type = BasicType::I8;
393      return true;
394    case 'b':
395      Type = BasicType::Bool;
396      return true;
397    case 'c':
398      Type = BasicType::Char;
399      return true;
400    case 'd':
401      Type = BasicType::F64;
402      return true;
403    case 'e':
404      Type = BasicType::Str;
405      return true;
406    case 'f':
407      Type = BasicType::F32;
408      return true;
409    case 'h':
410      Type = BasicType::U8;
411      return true;
412    case 'i':
413      Type = BasicType::ISize;
414      return true;
415    case 'j':
416      Type = BasicType::USize;
417      return true;
418    case 'l':
419      Type = BasicType::I32;
420      return true;
421    case 'm':
422      Type = BasicType::U32;
423      return true;
424    case 'n':
425      Type = BasicType::I128;
426      return true;
427    case 'o':
428      Type = BasicType::U128;
429      return true;
430    case 'p':
431      Type = BasicType::Placeholder;
432      return true;
433    case 's':
434      Type = BasicType::I16;
435      return true;
436    case 't':
437      Type = BasicType::U16;
438      return true;
439    case 'u':
440      Type = BasicType::Unit;
441      return true;
442    case 'v':
443      Type = BasicType::Variadic;
444      return true;
445    case 'x':
446      Type = BasicType::I64;
447      return true;
448    case 'y':
449      Type = BasicType::U64;
450      return true;
451    case 'z':
452      Type = BasicType::Never;
453      return true;
454    default:
455      return false;
456    }
457  }
458  
459  void Demangler::printBasicType(BasicType Type) {
460    switch (Type) {
461    case BasicType::Bool:
462      print("bool");
463      break;
464    case BasicType::Char:
465      print("char");
466      break;
467    case BasicType::I8:
468      print("i8");
469      break;
470    case BasicType::I16:
471      print("i16");
472      break;
473    case BasicType::I32:
474      print("i32");
475      break;
476    case BasicType::I64:
477      print("i64");
478      break;
479    case BasicType::I128:
480      print("i128");
481      break;
482    case BasicType::ISize:
483      print("isize");
484      break;
485    case BasicType::U8:
486      print("u8");
487      break;
488    case BasicType::U16:
489      print("u16");
490      break;
491    case BasicType::U32:
492      print("u32");
493      break;
494    case BasicType::U64:
495      print("u64");
496      break;
497    case BasicType::U128:
498      print("u128");
499      break;
500    case BasicType::USize:
501      print("usize");
502      break;
503    case BasicType::F32:
504      print("f32");
505      break;
506    case BasicType::F64:
507      print("f64");
508      break;
509    case BasicType::Str:
510      print("str");
511      break;
512    case BasicType::Placeholder:
513      print("_");
514      break;
515    case BasicType::Unit:
516      print("()");
517      break;
518    case BasicType::Variadic:
519      print("...");
520      break;
521    case BasicType::Never:
522      print("!");
523      break;
524    }
525  }
526  
527  // <type> = | <basic-type>
528  //          | <path>                      // named type
529  //          | "A" <type> <const>          // [T; N]
530  //          | "S" <type>                  // [T]
531  //          | "T" {<type>} "E"            // (T1, T2, T3, ...)
532  //          | "R" [<lifetime>] <type>     // &T
533  //          | "Q" [<lifetime>] <type>     // &mut T
534  //          | "P" <type>                  // *const T
535  //          | "O" <type>                  // *mut T
536  //          | "F" <fn-sig>                // fn(...) -> ...
537  //          | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
538  //          | <backref>                   // backref
539  void Demangler::demangleType() {
540    if (Error || RecursionLevel >= MaxRecursionLevel) {
541      Error = true;
542      return;
543    }
544    ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
545  
546    size_t Start = Position;
547    char C = consume();
548    BasicType Type;
549    if (parseBasicType(C, Type))
550      return printBasicType(Type);
551  
552    switch (C) {
553    case 'A':
554      print("[");
555      demangleType();
556      print("; ");
557      demangleConst();
558      print("]");
559      break;
560    case 'S':
561      print("[");
562      demangleType();
563      print("]");
564      break;
565    case 'T': {
566      print("(");
567      size_t I = 0;
568      for (; !Error && !consumeIf('E'); ++I) {
569        if (I > 0)
570          print(", ");
571        demangleType();
572      }
573      if (I == 1)
574        print(",");
575      print(")");
576      break;
577    }
578    case 'R':
579    case 'Q':
580      print('&');
581      if (consumeIf('L')) {
582        if (auto Lifetime = parseBase62Number()) {
583          printLifetime(Lifetime);
584          print(' ');
585        }
586      }
587      if (C == 'Q')
588        print("mut ");
589      demangleType();
590      break;
591    case 'P':
592      print("*const ");
593      demangleType();
594      break;
595    case 'O':
596      print("*mut ");
597      demangleType();
598      break;
599    case 'F':
600      demangleFnSig();
601      break;
602    case 'D':
603      demangleDynBounds();
604      if (consumeIf('L')) {
605        if (auto Lifetime = parseBase62Number()) {
606          print(" + ");
607          printLifetime(Lifetime);
608        }
609      } else {
610        Error = true;
611      }
612      break;
613    case 'B':
614      demangleBackref([&] { demangleType(); });
615      break;
616    default:
617      Position = Start;
618      demanglePath(IsInType::Yes);
619      break;
620    }
621  }
622  
623  // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
624  // <abi> = "C"
625  //       | <undisambiguated-identifier>
626  void Demangler::demangleFnSig() {
627    ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
628    demangleOptionalBinder();
629  
630    if (consumeIf('U'))
631      print("unsafe ");
632  
633    if (consumeIf('K')) {
634      print("extern \"");
635      if (consumeIf('C')) {
636        print("C");
637      } else {
638        Identifier Ident = parseIdentifier();
639        if (Ident.Punycode)
640          Error = true;
641        for (char C : Ident.Name) {
642          // When mangling ABI string, the "-" is replaced with "_".
643          if (C == '_')
644            C = '-';
645          print(C);
646        }
647      }
648      print("\" ");
649    }
650  
651    print("fn(");
652    for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
653      if (I > 0)
654        print(", ");
655      demangleType();
656    }
657    print(")");
658  
659    if (consumeIf('u')) {
660      // Skip the unit type from the output.
661    } else {
662      print(" -> ");
663      demangleType();
664    }
665  }
666  
667  // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
668  void Demangler::demangleDynBounds() {
669    ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
670    print("dyn ");
671    demangleOptionalBinder();
672    for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
673      if (I > 0)
674        print(" + ");
675      demangleDynTrait();
676    }
677  }
678  
679  // <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
680  // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
681  void Demangler::demangleDynTrait() {
682    bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
683    while (!Error && consumeIf('p')) {
684      if (!IsOpen) {
685        IsOpen = true;
686        print('<');
687      } else {
688        print(", ");
689      }
690      print(parseIdentifier().Name);
691      print(" = ");
692      demangleType();
693    }
694    if (IsOpen)
695      print(">");
696  }
697  
698  // Demangles optional binder and updates the number of bound lifetimes.
699  //
700  // <binder> = "G" <base-62-number>
701  void Demangler::demangleOptionalBinder() {
702    uint64_t Binder = parseOptionalBase62Number('G');
703    if (Error || Binder == 0)
704      return;
705  
706    // In valid inputs each bound lifetime is referenced later. Referencing a
707    // lifetime requires at least one byte of input. Reject inputs that are too
708    // short to reference all bound lifetimes. Otherwise demangling of invalid
709    // binders could generate excessive amounts of output.
710    if (Binder >= Input.size() - BoundLifetimes) {
711      Error = true;
712      return;
713    }
714  
715    print("for<");
716    for (size_t I = 0; I != Binder; ++I) {
717      BoundLifetimes += 1;
718      if (I > 0)
719        print(", ");
720      printLifetime(1);
721    }
722    print("> ");
723  }
724  
725  // <const> = <basic-type> <const-data>
726  //         | "p"                          // placeholder
727  //         | <backref>
728  void Demangler::demangleConst() {
729    if (Error || RecursionLevel >= MaxRecursionLevel) {
730      Error = true;
731      return;
732    }
733    ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
734  
735    char C = consume();
736    BasicType Type;
737    if (parseBasicType(C, Type)) {
738      switch (Type) {
739      case BasicType::I8:
740      case BasicType::I16:
741      case BasicType::I32:
742      case BasicType::I64:
743      case BasicType::I128:
744      case BasicType::ISize:
745      case BasicType::U8:
746      case BasicType::U16:
747      case BasicType::U32:
748      case BasicType::U64:
749      case BasicType::U128:
750      case BasicType::USize:
751        demangleConstInt();
752        break;
753      case BasicType::Bool:
754        demangleConstBool();
755        break;
756      case BasicType::Char:
757        demangleConstChar();
758        break;
759      case BasicType::Placeholder:
760        print('_');
761        break;
762      default:
763        Error = true;
764        break;
765      }
766    } else if (C == 'B') {
767      demangleBackref([&] { demangleConst(); });
768    } else {
769      Error = true;
770    }
771  }
772  
773  // <const-data> = ["n"] <hex-number>
774  void Demangler::demangleConstInt() {
775    if (consumeIf('n'))
776      print('-');
777  
778    StringView HexDigits;
779    uint64_t Value = parseHexNumber(HexDigits);
780    if (HexDigits.size() <= 16) {
781      printDecimalNumber(Value);
782    } else {
783      print("0x");
784      print(HexDigits);
785    }
786  }
787  
788  // <const-data> = "0_" // false
789  //              | "1_" // true
790  void Demangler::demangleConstBool() {
791    StringView HexDigits;
792    parseHexNumber(HexDigits);
793    if (HexDigits == "0")
794      print("false");
795    else if (HexDigits == "1")
796      print("true");
797    else
798      Error = true;
799  }
800  
801  /// Returns true if CodePoint represents a printable ASCII character.
802  static bool isAsciiPrintable(uint64_t CodePoint) {
803    return 0x20 <= CodePoint && CodePoint <= 0x7e;
804  }
805  
806  // <const-data> = <hex-number>
807  void Demangler::demangleConstChar() {
808    StringView HexDigits;
809    uint64_t CodePoint = parseHexNumber(HexDigits);
810    if (Error || HexDigits.size() > 6) {
811      Error = true;
812      return;
813    }
814  
815    print("'");
816    switch (CodePoint) {
817    case '\t':
818      print(R"(\t)");
819      break;
820    case '\r':
821      print(R"(\r)");
822      break;
823    case '\n':
824      print(R"(\n)");
825      break;
826    case '\\':
827      print(R"(\\)");
828      break;
829    case '"':
830      print(R"(")");
831      break;
832    case '\'':
833      print(R"(\')");
834      break;
835    default:
836      if (isAsciiPrintable(CodePoint)) {
837        char C = CodePoint;
838        print(C);
839      } else {
840        print(R"(\u{)");
841        print(HexDigits);
842        print('}');
843      }
844      break;
845    }
846    print('\'');
847  }
848  
849  // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
850  Identifier Demangler::parseIdentifier() {
851    bool Punycode = consumeIf('u');
852    uint64_t Bytes = parseDecimalNumber();
853  
854    // Underscore resolves the ambiguity when identifier starts with a decimal
855    // digit or another underscore.
856    consumeIf('_');
857  
858    if (Error || Bytes > Input.size() - Position) {
859      Error = true;
860      return {};
861    }
862    StringView S = Input.substr(Position, Bytes);
863    Position += Bytes;
864  
865    if (!std::all_of(S.begin(), S.end(), isValid)) {
866      Error = true;
867      return {};
868    }
869  
870    return {S, Punycode};
871  }
872  
873  // Parses optional base 62 number. The presence of a number is determined using
874  // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
875  //
876  // This function is indended for parsing disambiguators and binders which when
877  // not present have their value interpreted as 0, and otherwise as decoded
878  // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
879  // 2. When "G" is absent value is 0.
880  uint64_t Demangler::parseOptionalBase62Number(char Tag) {
881    if (!consumeIf(Tag))
882      return 0;
883  
884    uint64_t N = parseBase62Number();
885    if (Error || !addAssign(N, 1))
886      return 0;
887  
888    return N;
889  }
890  
891  // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
892  // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
893  // "1_" encodes 2, etc.
894  //
895  // <base-62-number> = {<0-9a-zA-Z>} "_"
896  uint64_t Demangler::parseBase62Number() {
897    if (consumeIf('_'))
898      return 0;
899  
900    uint64_t Value = 0;
901  
902    while (true) {
903      uint64_t Digit;
904      char C = consume();
905  
906      if (C == '_') {
907        break;
908      } else if (isDigit(C)) {
909        Digit = C - '0';
910      } else if (isLower(C)) {
911        Digit = 10 + (C - 'a');
912      } else if (isUpper(C)) {
913        Digit = 10 + 26 + (C - 'A');
914      } else {
915        Error = true;
916        return 0;
917      }
918  
919      if (!mulAssign(Value, 62))
920        return 0;
921  
922      if (!addAssign(Value, Digit))
923        return 0;
924    }
925  
926    if (!addAssign(Value, 1))
927      return 0;
928  
929    return Value;
930  }
931  
932  // Parses a decimal number that had been encoded without any leading zeros.
933  //
934  // <decimal-number> = "0"
935  //                  | <1-9> {<0-9>}
936  uint64_t Demangler::parseDecimalNumber() {
937    char C = look();
938    if (!isDigit(C)) {
939      Error = true;
940      return 0;
941    }
942  
943    if (C == '0') {
944      consume();
945      return 0;
946    }
947  
948    uint64_t Value = 0;
949  
950    while (isDigit(look())) {
951      if (!mulAssign(Value, 10)) {
952        Error = true;
953        return 0;
954      }
955  
956      uint64_t D = consume() - '0';
957      if (!addAssign(Value, D))
958        return 0;
959    }
960  
961    return Value;
962  }
963  
964  // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
965  // value and stores hex digits in HexDigits. The return value is unspecified if
966  // HexDigits.size() > 16.
967  //
968  // <hex-number> = "0_"
969  //              | <1-9a-f> {<0-9a-f>} "_"
970  uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
971    size_t Start = Position;
972    uint64_t Value = 0;
973  
974    if (!isHexDigit(look()))
975      Error = true;
976  
977    if (consumeIf('0')) {
978      if (!consumeIf('_'))
979        Error = true;
980    } else {
981      while (!Error && !consumeIf('_')) {
982        char C = consume();
983        Value *= 16;
984        if (isDigit(C))
985          Value += C - '0';
986        else if ('a' <= C && C <= 'f')
987          Value += 10 + (C - 'a');
988        else
989          Error = true;
990      }
991    }
992  
993    if (Error) {
994      HexDigits = StringView();
995      return 0;
996    }
997  
998    size_t End = Position - 1;
999    assert(Start < End);
1000    HexDigits = Input.substr(Start, End - Start);
1001    return Value;
1002  }
1003  
1004  void Demangler::print(char C) {
1005    if (Error || !Print)
1006      return;
1007  
1008    Output += C;
1009  }
1010  
1011  void Demangler::print(StringView S) {
1012    if (Error || !Print)
1013      return;
1014  
1015    Output += S;
1016  }
1017  
1018  void Demangler::printDecimalNumber(uint64_t N) {
1019    if (Error || !Print)
1020      return;
1021  
1022    Output << N;
1023  }
1024  
1025  // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
1026  // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
1027  // bound by one of the enclosing binders.
1028  void Demangler::printLifetime(uint64_t Index) {
1029    if (Index == 0) {
1030      print("'_");
1031      return;
1032    }
1033  
1034    if (Index - 1 >= BoundLifetimes) {
1035      Error = true;
1036      return;
1037    }
1038  
1039    uint64_t Depth = BoundLifetimes - Index;
1040    print('\'');
1041    if (Depth < 26) {
1042      char C = 'a' + Depth;
1043      print(C);
1044    } else {
1045      print('z');
1046      printDecimalNumber(Depth - 26 + 1);
1047    }
1048  }
1049  
1050  static inline bool decodePunycodeDigit(char C, size_t &Value) {
1051    if (isLower(C)) {
1052      Value = C - 'a';
1053      return true;
1054    }
1055  
1056    if (isDigit(C)) {
1057      Value = 26 + (C - '0');
1058      return true;
1059    }
1060  
1061    return false;
1062  }
1063  
1064  static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
1065    char *Buffer = Output.getBuffer();
1066    char *Start = Buffer + StartIdx;
1067    char *End = Buffer + Output.getCurrentPosition();
1068    Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
1069  }
1070  
1071  // Encodes code point as UTF-8 and stores results in Output. Returns false if
1072  // CodePoint is not a valid unicode scalar value.
1073  static inline bool encodeUTF8(size_t CodePoint, char *Output) {
1074    if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
1075      return false;
1076  
1077    if (CodePoint <= 0x7F) {
1078      Output[0] = CodePoint;
1079      return true;
1080    }
1081  
1082    if (CodePoint <= 0x7FF) {
1083      Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
1084      Output[1] = 0x80 | (CodePoint & 0x3F);
1085      return true;
1086    }
1087  
1088    if (CodePoint <= 0xFFFF) {
1089      Output[0] = 0xE0 | (CodePoint >> 12);
1090      Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
1091      Output[2] = 0x80 | (CodePoint & 0x3F);
1092      return true;
1093    }
1094  
1095    if (CodePoint <= 0x10FFFF) {
1096      Output[0] = 0xF0 | (CodePoint >> 18);
1097      Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
1098      Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
1099      Output[3] = 0x80 | (CodePoint & 0x3F);
1100      return true;
1101    }
1102  
1103    return false;
1104  }
1105  
1106  // Decodes string encoded using punycode and appends results to Output.
1107  // Returns true if decoding was successful.
1108  static bool decodePunycode(StringView Input, OutputBuffer &Output) {
1109    size_t OutputSize = Output.getCurrentPosition();
1110    size_t InputIdx = 0;
1111  
1112    // Rust uses an underscore as a delimiter.
1113    size_t DelimiterPos = StringView::npos;
1114    for (size_t I = 0; I != Input.size(); ++I)
1115      if (Input[I] == '_')
1116        DelimiterPos = I;
1117  
1118    if (DelimiterPos != StringView::npos) {
1119      // Copy basic code points before the last delimiter to the output.
1120      for (; InputIdx != DelimiterPos; ++InputIdx) {
1121        char C = Input[InputIdx];
1122        if (!isValid(C))
1123          return false;
1124        // Code points are padded with zeros while decoding is in progress.
1125        char UTF8[4] = {C};
1126        Output += StringView(UTF8, UTF8 + 4);
1127      }
1128      // Skip over the delimiter.
1129      ++InputIdx;
1130    }
1131  
1132    size_t Base = 36;
1133    size_t Skew = 38;
1134    size_t Bias = 72;
1135    size_t N = 0x80;
1136    size_t TMin = 1;
1137    size_t TMax = 26;
1138    size_t Damp = 700;
1139  
1140    auto Adapt = [&](size_t Delta, size_t NumPoints) {
1141      Delta /= Damp;
1142      Delta += Delta / NumPoints;
1143      Damp = 2;
1144  
1145      size_t K = 0;
1146      while (Delta > (Base - TMin) * TMax / 2) {
1147        Delta /= Base - TMin;
1148        K += Base;
1149      }
1150      return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
1151    };
1152  
1153    // Main decoding loop.
1154    for (size_t I = 0; InputIdx != Input.size(); I += 1) {
1155      size_t OldI = I;
1156      size_t W = 1;
1157      size_t Max = std::numeric_limits<size_t>::max();
1158      for (size_t K = Base; true; K += Base) {
1159        if (InputIdx == Input.size())
1160          return false;
1161        char C = Input[InputIdx++];
1162        size_t Digit = 0;
1163        if (!decodePunycodeDigit(C, Digit))
1164          return false;
1165  
1166        if (Digit > (Max - I) / W)
1167          return false;
1168        I += Digit * W;
1169  
1170        size_t T;
1171        if (K <= Bias)
1172          T = TMin;
1173        else if (K >= Bias + TMax)
1174          T = TMax;
1175        else
1176          T = K - Bias;
1177  
1178        if (Digit < T)
1179          break;
1180  
1181        if (W > Max / (Base - T))
1182          return false;
1183        W *= (Base - T);
1184      }
1185      size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
1186      Bias = Adapt(I - OldI, NumPoints);
1187  
1188      if (I / NumPoints > Max - N)
1189        return false;
1190      N += I / NumPoints;
1191      I = I % NumPoints;
1192  
1193      // Insert N at position I in the output.
1194      char UTF8[4] = {};
1195      if (!encodeUTF8(N, UTF8))
1196        return false;
1197      Output.insert(OutputSize + I * 4, UTF8, 4);
1198    }
1199  
1200    removeNullBytes(Output, OutputSize);
1201    return true;
1202  }
1203  
1204  void Demangler::printIdentifier(Identifier Ident) {
1205    if (Error || !Print)
1206      return;
1207  
1208    if (Ident.Punycode) {
1209      if (!decodePunycode(Ident.Name, Output))
1210        Error = true;
1211    } else {
1212      print(Ident.Name);
1213    }
1214  }
1215  
1216  char Demangler::look() const {
1217    if (Error || Position >= Input.size())
1218      return 0;
1219  
1220    return Input[Position];
1221  }
1222  
1223  char Demangler::consume() {
1224    if (Error || Position >= Input.size()) {
1225      Error = true;
1226      return 0;
1227    }
1228  
1229    return Input[Position++];
1230  }
1231  
1232  bool Demangler::consumeIf(char Prefix) {
1233    if (Error || Position >= Input.size() || Input[Position] != Prefix)
1234      return false;
1235  
1236    Position += 1;
1237    return true;
1238  }
1239  
1240  /// Computes A + B. When computation wraps around sets the error and returns
1241  /// false. Otherwise assigns the result to A and returns true.
1242  bool Demangler::addAssign(uint64_t &A, uint64_t B) {
1243    if (A > std::numeric_limits<uint64_t>::max() - B) {
1244      Error = true;
1245      return false;
1246    }
1247  
1248    A += B;
1249    return true;
1250  }
1251  
1252  /// Computes A * B. When computation wraps around sets the error and returns
1253  /// false. Otherwise assigns the result to A and returns true.
1254  bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
1255    if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
1256      Error = true;
1257      return false;
1258    }
1259  
1260    A *= B;
1261    return true;
1262  }
1263