xref: /freebsd/contrib/llvm-project/llvm/lib/Demangle/RustDemangle.cpp (revision 96b917bfcf2c558340f8f6e620e64efededa1daf)
1  //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This file defines a demangler for Rust v0 mangled symbols as specified in
10  // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11  //
12  //===----------------------------------------------------------------------===//
13  
14  #include "llvm/Demangle/Demangle.h"
15  #include "llvm/Demangle/StringView.h"
16  #include "llvm/Demangle/Utility.h"
17  
18  #include <algorithm>
19  #include <cassert>
20  #include <cstdint>
21  #include <cstring>
22  #include <limits>
23  
24  using namespace llvm;
25  
26  using llvm::itanium_demangle::OutputBuffer;
27  using llvm::itanium_demangle::StringView;
28  using llvm::itanium_demangle::SwapAndRestore;
29  
30  namespace {
31  
32  struct Identifier {
33    StringView Name;
34    bool Punycode;
35  
36    bool empty() const { return Name.empty(); }
37  };
38  
39  enum class BasicType {
40    Bool,
41    Char,
42    I8,
43    I16,
44    I32,
45    I64,
46    I128,
47    ISize,
48    U8,
49    U16,
50    U32,
51    U64,
52    U128,
53    USize,
54    F32,
55    F64,
56    Str,
57    Placeholder,
58    Unit,
59    Variadic,
60    Never,
61  };
62  
63  enum class IsInType {
64    No,
65    Yes,
66  };
67  
68  enum class LeaveGenericsOpen {
69    No,
70    Yes,
71  };
72  
73  class Demangler {
74    // Maximum recursion level. Used to avoid stack overflow.
75    size_t MaxRecursionLevel;
76    // Current recursion level.
77    size_t RecursionLevel;
78    size_t BoundLifetimes;
79    // Input string that is being demangled with "_R" prefix removed.
80    StringView Input;
81    // Position in the input string.
82    size_t Position;
83    // When true, print methods append the output to the stream.
84    // When false, the output is suppressed.
85    bool Print;
86    // True if an error occurred.
87    bool Error;
88  
89  public:
90    // Demangled output.
91    OutputBuffer Output;
92  
93    Demangler(size_t MaxRecursionLevel = 500);
94  
95    bool demangle(StringView MangledName);
96  
97  private:
98    bool demanglePath(IsInType Type,
99                      LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
100    void demangleImplPath(IsInType InType);
101    void demangleGenericArg();
102    void demangleType();
103    void demangleFnSig();
104    void demangleDynBounds();
105    void demangleDynTrait();
106    void demangleOptionalBinder();
107    void demangleConst();
108    void demangleConstInt();
109    void demangleConstBool();
110    void demangleConstChar();
111  
112    template <typename Callable> void demangleBackref(Callable Demangler) {
113      uint64_t Backref = parseBase62Number();
114      if (Error || Backref >= Position) {
115        Error = true;
116        return;
117      }
118  
119      if (!Print)
120        return;
121  
122      SwapAndRestore<size_t> SavePosition(Position, Position);
123      Position = Backref;
124      Demangler();
125    }
126  
127    Identifier parseIdentifier();
128    uint64_t parseOptionalBase62Number(char Tag);
129    uint64_t parseBase62Number();
130    uint64_t parseDecimalNumber();
131    uint64_t parseHexNumber(StringView &HexDigits);
132  
133    void print(char C);
134    void print(StringView S);
135    void printDecimalNumber(uint64_t N);
136    void printBasicType(BasicType);
137    void printLifetime(uint64_t Index);
138    void printIdentifier(Identifier Ident);
139  
140    char look() const;
141    char consume();
142    bool consumeIf(char Prefix);
143  
144    bool addAssign(uint64_t &A, uint64_t B);
145    bool mulAssign(uint64_t &A, uint64_t B);
146  };
147  
148  } // namespace
149  
150  char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N,
151                           int *Status) {
152    if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) {
153      if (Status != nullptr)
154        *Status = demangle_invalid_args;
155      return nullptr;
156    }
157  
158    // Return early if mangled name doesn't look like a Rust symbol.
159    StringView Mangled(MangledName);
160    if (!Mangled.startsWith("_R")) {
161      if (Status != nullptr)
162        *Status = demangle_invalid_mangled_name;
163      return nullptr;
164    }
165  
166    Demangler D;
167    if (!initializeOutputBuffer(nullptr, nullptr, D.Output, 1024)) {
168      if (Status != nullptr)
169        *Status = demangle_memory_alloc_failure;
170      return nullptr;
171    }
172  
173    if (!D.demangle(Mangled)) {
174      if (Status != nullptr)
175        *Status = demangle_invalid_mangled_name;
176      std::free(D.Output.getBuffer());
177      return nullptr;
178    }
179  
180    D.Output += '\0';
181    char *Demangled = D.Output.getBuffer();
182    size_t DemangledLen = D.Output.getCurrentPosition();
183  
184    if (Buf != nullptr) {
185      if (DemangledLen <= *N) {
186        std::memcpy(Buf, Demangled, DemangledLen);
187        std::free(Demangled);
188        Demangled = Buf;
189      } else {
190        std::free(Buf);
191      }
192    }
193  
194    if (N != nullptr)
195      *N = DemangledLen;
196  
197    if (Status != nullptr)
198      *Status = demangle_success;
199  
200    return Demangled;
201  }
202  
203  Demangler::Demangler(size_t MaxRecursionLevel)
204      : MaxRecursionLevel(MaxRecursionLevel) {}
205  
206  static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
207  
208  static inline bool isHexDigit(const char C) {
209    return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
210  }
211  
212  static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
213  
214  static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
215  
216  /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
217  static inline bool isValid(const char C) {
218    return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
219  }
220  
221  // Demangles Rust v0 mangled symbol. Returns true when successful, and false
222  // otherwise. The demangled symbol is stored in Output field. It is
223  // responsibility of the caller to free the memory behind the output stream.
224  //
225  // <symbol-name> = "_R" <path> [<instantiating-crate>]
226  bool Demangler::demangle(StringView Mangled) {
227    Position = 0;
228    Error = false;
229    Print = true;
230    RecursionLevel = 0;
231    BoundLifetimes = 0;
232  
233    if (!Mangled.consumeFront("_R")) {
234      Error = true;
235      return false;
236    }
237    size_t Dot = Mangled.find('.');
238    Input = Mangled.substr(0, Dot);
239    StringView Suffix = Mangled.dropFront(Dot);
240  
241    demanglePath(IsInType::No);
242  
243    if (Position != Input.size()) {
244      SwapAndRestore<bool> SavePrint(Print, false);
245      demanglePath(IsInType::No);
246    }
247  
248    if (Position != Input.size())
249      Error = true;
250  
251    if (!Suffix.empty()) {
252      print(" (");
253      print(Suffix);
254      print(")");
255    }
256  
257    return !Error;
258  }
259  
260  // Demangles a path. InType indicates whether a path is inside a type. When
261  // LeaveOpen is true, a closing `>` after generic arguments is omitted from the
262  // output. Return value indicates whether generics arguments have been left
263  // open.
264  //
265  // <path> = "C" <identifier>               // crate root
266  //        | "M" <impl-path> <type>         // <T> (inherent impl)
267  //        | "X" <impl-path> <type> <path>  // <T as Trait> (trait impl)
268  //        | "Y" <type> <path>              // <T as Trait> (trait definition)
269  //        | "N" <ns> <path> <identifier>   // ...::ident (nested path)
270  //        | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
271  //        | <backref>
272  // <identifier> = [<disambiguator>] <undisambiguated-identifier>
273  // <ns> = "C"      // closure
274  //      | "S"      // shim
275  //      | <A-Z>    // other special namespaces
276  //      | <a-z>    // internal namespaces
277  bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
278    if (Error || RecursionLevel >= MaxRecursionLevel) {
279      Error = true;
280      return false;
281    }
282    SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
283  
284    switch (consume()) {
285    case 'C': {
286      parseOptionalBase62Number('s');
287      printIdentifier(parseIdentifier());
288      break;
289    }
290    case 'M': {
291      demangleImplPath(InType);
292      print("<");
293      demangleType();
294      print(">");
295      break;
296    }
297    case 'X': {
298      demangleImplPath(InType);
299      print("<");
300      demangleType();
301      print(" as ");
302      demanglePath(IsInType::Yes);
303      print(">");
304      break;
305    }
306    case 'Y': {
307      print("<");
308      demangleType();
309      print(" as ");
310      demanglePath(IsInType::Yes);
311      print(">");
312      break;
313    }
314    case 'N': {
315      char NS = consume();
316      if (!isLower(NS) && !isUpper(NS)) {
317        Error = true;
318        break;
319      }
320      demanglePath(InType);
321  
322      uint64_t Disambiguator = parseOptionalBase62Number('s');
323      Identifier Ident = parseIdentifier();
324  
325      if (isUpper(NS)) {
326        // Special namespaces
327        print("::{");
328        if (NS == 'C')
329          print("closure");
330        else if (NS == 'S')
331          print("shim");
332        else
333          print(NS);
334        if (!Ident.empty()) {
335          print(":");
336          printIdentifier(Ident);
337        }
338        print('#');
339        printDecimalNumber(Disambiguator);
340        print('}');
341      } else {
342        // Implementation internal namespaces.
343        if (!Ident.empty()) {
344          print("::");
345          printIdentifier(Ident);
346        }
347      }
348      break;
349    }
350    case 'I': {
351      demanglePath(InType);
352      // Omit "::" when in a type, where it is optional.
353      if (InType == IsInType::No)
354        print("::");
355      print("<");
356      for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
357        if (I > 0)
358          print(", ");
359        demangleGenericArg();
360      }
361      if (LeaveOpen == LeaveGenericsOpen::Yes)
362        return true;
363      else
364        print(">");
365      break;
366    }
367    case 'B': {
368      bool IsOpen = false;
369      demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
370      return IsOpen;
371    }
372    default:
373      Error = true;
374      break;
375    }
376  
377    return false;
378  }
379  
380  // <impl-path> = [<disambiguator>] <path>
381  // <disambiguator> = "s" <base-62-number>
382  void Demangler::demangleImplPath(IsInType InType) {
383    SwapAndRestore<bool> SavePrint(Print, false);
384    parseOptionalBase62Number('s');
385    demanglePath(InType);
386  }
387  
388  // <generic-arg> = <lifetime>
389  //               | <type>
390  //               | "K" <const>
391  // <lifetime> = "L" <base-62-number>
392  void Demangler::demangleGenericArg() {
393    if (consumeIf('L'))
394      printLifetime(parseBase62Number());
395    else if (consumeIf('K'))
396      demangleConst();
397    else
398      demangleType();
399  }
400  
401  // <basic-type> = "a"      // i8
402  //              | "b"      // bool
403  //              | "c"      // char
404  //              | "d"      // f64
405  //              | "e"      // str
406  //              | "f"      // f32
407  //              | "h"      // u8
408  //              | "i"      // isize
409  //              | "j"      // usize
410  //              | "l"      // i32
411  //              | "m"      // u32
412  //              | "n"      // i128
413  //              | "o"      // u128
414  //              | "s"      // i16
415  //              | "t"      // u16
416  //              | "u"      // ()
417  //              | "v"      // ...
418  //              | "x"      // i64
419  //              | "y"      // u64
420  //              | "z"      // !
421  //              | "p"      // placeholder (e.g. for generic params), shown as _
422  static bool parseBasicType(char C, BasicType &Type) {
423    switch (C) {
424    case 'a':
425      Type = BasicType::I8;
426      return true;
427    case 'b':
428      Type = BasicType::Bool;
429      return true;
430    case 'c':
431      Type = BasicType::Char;
432      return true;
433    case 'd':
434      Type = BasicType::F64;
435      return true;
436    case 'e':
437      Type = BasicType::Str;
438      return true;
439    case 'f':
440      Type = BasicType::F32;
441      return true;
442    case 'h':
443      Type = BasicType::U8;
444      return true;
445    case 'i':
446      Type = BasicType::ISize;
447      return true;
448    case 'j':
449      Type = BasicType::USize;
450      return true;
451    case 'l':
452      Type = BasicType::I32;
453      return true;
454    case 'm':
455      Type = BasicType::U32;
456      return true;
457    case 'n':
458      Type = BasicType::I128;
459      return true;
460    case 'o':
461      Type = BasicType::U128;
462      return true;
463    case 'p':
464      Type = BasicType::Placeholder;
465      return true;
466    case 's':
467      Type = BasicType::I16;
468      return true;
469    case 't':
470      Type = BasicType::U16;
471      return true;
472    case 'u':
473      Type = BasicType::Unit;
474      return true;
475    case 'v':
476      Type = BasicType::Variadic;
477      return true;
478    case 'x':
479      Type = BasicType::I64;
480      return true;
481    case 'y':
482      Type = BasicType::U64;
483      return true;
484    case 'z':
485      Type = BasicType::Never;
486      return true;
487    default:
488      return false;
489    }
490  }
491  
492  void Demangler::printBasicType(BasicType Type) {
493    switch (Type) {
494    case BasicType::Bool:
495      print("bool");
496      break;
497    case BasicType::Char:
498      print("char");
499      break;
500    case BasicType::I8:
501      print("i8");
502      break;
503    case BasicType::I16:
504      print("i16");
505      break;
506    case BasicType::I32:
507      print("i32");
508      break;
509    case BasicType::I64:
510      print("i64");
511      break;
512    case BasicType::I128:
513      print("i128");
514      break;
515    case BasicType::ISize:
516      print("isize");
517      break;
518    case BasicType::U8:
519      print("u8");
520      break;
521    case BasicType::U16:
522      print("u16");
523      break;
524    case BasicType::U32:
525      print("u32");
526      break;
527    case BasicType::U64:
528      print("u64");
529      break;
530    case BasicType::U128:
531      print("u128");
532      break;
533    case BasicType::USize:
534      print("usize");
535      break;
536    case BasicType::F32:
537      print("f32");
538      break;
539    case BasicType::F64:
540      print("f64");
541      break;
542    case BasicType::Str:
543      print("str");
544      break;
545    case BasicType::Placeholder:
546      print("_");
547      break;
548    case BasicType::Unit:
549      print("()");
550      break;
551    case BasicType::Variadic:
552      print("...");
553      break;
554    case BasicType::Never:
555      print("!");
556      break;
557    }
558  }
559  
560  // <type> = | <basic-type>
561  //          | <path>                      // named type
562  //          | "A" <type> <const>          // [T; N]
563  //          | "S" <type>                  // [T]
564  //          | "T" {<type>} "E"            // (T1, T2, T3, ...)
565  //          | "R" [<lifetime>] <type>     // &T
566  //          | "Q" [<lifetime>] <type>     // &mut T
567  //          | "P" <type>                  // *const T
568  //          | "O" <type>                  // *mut T
569  //          | "F" <fn-sig>                // fn(...) -> ...
570  //          | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
571  //          | <backref>                   // backref
572  void Demangler::demangleType() {
573    if (Error || RecursionLevel >= MaxRecursionLevel) {
574      Error = true;
575      return;
576    }
577    SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
578  
579    size_t Start = Position;
580    char C = consume();
581    BasicType Type;
582    if (parseBasicType(C, Type))
583      return printBasicType(Type);
584  
585    switch (C) {
586    case 'A':
587      print("[");
588      demangleType();
589      print("; ");
590      demangleConst();
591      print("]");
592      break;
593    case 'S':
594      print("[");
595      demangleType();
596      print("]");
597      break;
598    case 'T': {
599      print("(");
600      size_t I = 0;
601      for (; !Error && !consumeIf('E'); ++I) {
602        if (I > 0)
603          print(", ");
604        demangleType();
605      }
606      if (I == 1)
607        print(",");
608      print(")");
609      break;
610    }
611    case 'R':
612    case 'Q':
613      print('&');
614      if (consumeIf('L')) {
615        if (auto Lifetime = parseBase62Number()) {
616          printLifetime(Lifetime);
617          print(' ');
618        }
619      }
620      if (C == 'Q')
621        print("mut ");
622      demangleType();
623      break;
624    case 'P':
625      print("*const ");
626      demangleType();
627      break;
628    case 'O':
629      print("*mut ");
630      demangleType();
631      break;
632    case 'F':
633      demangleFnSig();
634      break;
635    case 'D':
636      demangleDynBounds();
637      if (consumeIf('L')) {
638        if (auto Lifetime = parseBase62Number()) {
639          print(" + ");
640          printLifetime(Lifetime);
641        }
642      } else {
643        Error = true;
644      }
645      break;
646    case 'B':
647      demangleBackref([&] { demangleType(); });
648      break;
649    default:
650      Position = Start;
651      demanglePath(IsInType::Yes);
652      break;
653    }
654  }
655  
656  // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
657  // <abi> = "C"
658  //       | <undisambiguated-identifier>
659  void Demangler::demangleFnSig() {
660    SwapAndRestore<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
661    demangleOptionalBinder();
662  
663    if (consumeIf('U'))
664      print("unsafe ");
665  
666    if (consumeIf('K')) {
667      print("extern \"");
668      if (consumeIf('C')) {
669        print("C");
670      } else {
671        Identifier Ident = parseIdentifier();
672        if (Ident.Punycode)
673          Error = true;
674        for (char C : Ident.Name) {
675          // When mangling ABI string, the "-" is replaced with "_".
676          if (C == '_')
677            C = '-';
678          print(C);
679        }
680      }
681      print("\" ");
682    }
683  
684    print("fn(");
685    for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
686      if (I > 0)
687        print(", ");
688      demangleType();
689    }
690    print(")");
691  
692    if (consumeIf('u')) {
693      // Skip the unit type from the output.
694    } else {
695      print(" -> ");
696      demangleType();
697    }
698  }
699  
700  // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
701  void Demangler::demangleDynBounds() {
702    SwapAndRestore<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
703    print("dyn ");
704    demangleOptionalBinder();
705    for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
706      if (I > 0)
707        print(" + ");
708      demangleDynTrait();
709    }
710  }
711  
712  // <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
713  // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
714  void Demangler::demangleDynTrait() {
715    bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
716    while (!Error && consumeIf('p')) {
717      if (!IsOpen) {
718        IsOpen = true;
719        print('<');
720      } else {
721        print(", ");
722      }
723      print(parseIdentifier().Name);
724      print(" = ");
725      demangleType();
726    }
727    if (IsOpen)
728      print(">");
729  }
730  
731  // Demangles optional binder and updates the number of bound lifetimes.
732  //
733  // <binder> = "G" <base-62-number>
734  void Demangler::demangleOptionalBinder() {
735    uint64_t Binder = parseOptionalBase62Number('G');
736    if (Error || Binder == 0)
737      return;
738  
739    // In valid inputs each bound lifetime is referenced later. Referencing a
740    // lifetime requires at least one byte of input. Reject inputs that are too
741    // short to reference all bound lifetimes. Otherwise demangling of invalid
742    // binders could generate excessive amounts of output.
743    if (Binder >= Input.size() - BoundLifetimes) {
744      Error = true;
745      return;
746    }
747  
748    print("for<");
749    for (size_t I = 0; I != Binder; ++I) {
750      BoundLifetimes += 1;
751      if (I > 0)
752        print(", ");
753      printLifetime(1);
754    }
755    print("> ");
756  }
757  
758  // <const> = <basic-type> <const-data>
759  //         | "p"                          // placeholder
760  //         | <backref>
761  void Demangler::demangleConst() {
762    if (Error || RecursionLevel >= MaxRecursionLevel) {
763      Error = true;
764      return;
765    }
766    SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
767  
768    char C = consume();
769    BasicType Type;
770    if (parseBasicType(C, Type)) {
771      switch (Type) {
772      case BasicType::I8:
773      case BasicType::I16:
774      case BasicType::I32:
775      case BasicType::I64:
776      case BasicType::I128:
777      case BasicType::ISize:
778      case BasicType::U8:
779      case BasicType::U16:
780      case BasicType::U32:
781      case BasicType::U64:
782      case BasicType::U128:
783      case BasicType::USize:
784        demangleConstInt();
785        break;
786      case BasicType::Bool:
787        demangleConstBool();
788        break;
789      case BasicType::Char:
790        demangleConstChar();
791        break;
792      case BasicType::Placeholder:
793        print('_');
794        break;
795      default:
796        Error = true;
797        break;
798      }
799    } else if (C == 'B') {
800      demangleBackref([&] { demangleConst(); });
801    } else {
802      Error = true;
803    }
804  }
805  
806  // <const-data> = ["n"] <hex-number>
807  void Demangler::demangleConstInt() {
808    if (consumeIf('n'))
809      print('-');
810  
811    StringView HexDigits;
812    uint64_t Value = parseHexNumber(HexDigits);
813    if (HexDigits.size() <= 16) {
814      printDecimalNumber(Value);
815    } else {
816      print("0x");
817      print(HexDigits);
818    }
819  }
820  
821  // <const-data> = "0_" // false
822  //              | "1_" // true
823  void Demangler::demangleConstBool() {
824    StringView HexDigits;
825    parseHexNumber(HexDigits);
826    if (HexDigits == "0")
827      print("false");
828    else if (HexDigits == "1")
829      print("true");
830    else
831      Error = true;
832  }
833  
834  /// Returns true if CodePoint represents a printable ASCII character.
835  static bool isAsciiPrintable(uint64_t CodePoint) {
836    return 0x20 <= CodePoint && CodePoint <= 0x7e;
837  }
838  
839  // <const-data> = <hex-number>
840  void Demangler::demangleConstChar() {
841    StringView HexDigits;
842    uint64_t CodePoint = parseHexNumber(HexDigits);
843    if (Error || HexDigits.size() > 6) {
844      Error = true;
845      return;
846    }
847  
848    print("'");
849    switch (CodePoint) {
850    case '\t':
851      print(R"(\t)");
852      break;
853    case '\r':
854      print(R"(\r)");
855      break;
856    case '\n':
857      print(R"(\n)");
858      break;
859    case '\\':
860      print(R"(\\)");
861      break;
862    case '"':
863      print(R"(")");
864      break;
865    case '\'':
866      print(R"(\')");
867      break;
868    default:
869      if (isAsciiPrintable(CodePoint)) {
870        char C = CodePoint;
871        print(C);
872      } else {
873        print(R"(\u{)");
874        print(HexDigits);
875        print('}');
876      }
877      break;
878    }
879    print('\'');
880  }
881  
882  // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
883  Identifier Demangler::parseIdentifier() {
884    bool Punycode = consumeIf('u');
885    uint64_t Bytes = parseDecimalNumber();
886  
887    // Underscore resolves the ambiguity when identifier starts with a decimal
888    // digit or another underscore.
889    consumeIf('_');
890  
891    if (Error || Bytes > Input.size() - Position) {
892      Error = true;
893      return {};
894    }
895    StringView S = Input.substr(Position, Bytes);
896    Position += Bytes;
897  
898    if (!std::all_of(S.begin(), S.end(), isValid)) {
899      Error = true;
900      return {};
901    }
902  
903    return {S, Punycode};
904  }
905  
906  // Parses optional base 62 number. The presence of a number is determined using
907  // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
908  //
909  // This function is indended for parsing disambiguators and binders which when
910  // not present have their value interpreted as 0, and otherwise as decoded
911  // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
912  // 2. When "G" is absent value is 0.
913  uint64_t Demangler::parseOptionalBase62Number(char Tag) {
914    if (!consumeIf(Tag))
915      return 0;
916  
917    uint64_t N = parseBase62Number();
918    if (Error || !addAssign(N, 1))
919      return 0;
920  
921    return N;
922  }
923  
924  // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
925  // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
926  // "1_" encodes 2, etc.
927  //
928  // <base-62-number> = {<0-9a-zA-Z>} "_"
929  uint64_t Demangler::parseBase62Number() {
930    if (consumeIf('_'))
931      return 0;
932  
933    uint64_t Value = 0;
934  
935    while (true) {
936      uint64_t Digit;
937      char C = consume();
938  
939      if (C == '_') {
940        break;
941      } else if (isDigit(C)) {
942        Digit = C - '0';
943      } else if (isLower(C)) {
944        Digit = 10 + (C - 'a');
945      } else if (isUpper(C)) {
946        Digit = 10 + 26 + (C - 'A');
947      } else {
948        Error = true;
949        return 0;
950      }
951  
952      if (!mulAssign(Value, 62))
953        return 0;
954  
955      if (!addAssign(Value, Digit))
956        return 0;
957    }
958  
959    if (!addAssign(Value, 1))
960      return 0;
961  
962    return Value;
963  }
964  
965  // Parses a decimal number that had been encoded without any leading zeros.
966  //
967  // <decimal-number> = "0"
968  //                  | <1-9> {<0-9>}
969  uint64_t Demangler::parseDecimalNumber() {
970    char C = look();
971    if (!isDigit(C)) {
972      Error = true;
973      return 0;
974    }
975  
976    if (C == '0') {
977      consume();
978      return 0;
979    }
980  
981    uint64_t Value = 0;
982  
983    while (isDigit(look())) {
984      if (!mulAssign(Value, 10)) {
985        Error = true;
986        return 0;
987      }
988  
989      uint64_t D = consume() - '0';
990      if (!addAssign(Value, D))
991        return 0;
992    }
993  
994    return Value;
995  }
996  
997  // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
998  // value and stores hex digits in HexDigits. The return value is unspecified if
999  // HexDigits.size() > 16.
1000  //
1001  // <hex-number> = "0_"
1002  //              | <1-9a-f> {<0-9a-f>} "_"
1003  uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
1004    size_t Start = Position;
1005    uint64_t Value = 0;
1006  
1007    if (!isHexDigit(look()))
1008      Error = true;
1009  
1010    if (consumeIf('0')) {
1011      if (!consumeIf('_'))
1012        Error = true;
1013    } else {
1014      while (!Error && !consumeIf('_')) {
1015        char C = consume();
1016        Value *= 16;
1017        if (isDigit(C))
1018          Value += C - '0';
1019        else if ('a' <= C && C <= 'f')
1020          Value += 10 + (C - 'a');
1021        else
1022          Error = true;
1023      }
1024    }
1025  
1026    if (Error) {
1027      HexDigits = StringView();
1028      return 0;
1029    }
1030  
1031    size_t End = Position - 1;
1032    assert(Start < End);
1033    HexDigits = Input.substr(Start, End - Start);
1034    return Value;
1035  }
1036  
1037  void Demangler::print(char C) {
1038    if (Error || !Print)
1039      return;
1040  
1041    Output += C;
1042  }
1043  
1044  void Demangler::print(StringView S) {
1045    if (Error || !Print)
1046      return;
1047  
1048    Output += S;
1049  }
1050  
1051  void Demangler::printDecimalNumber(uint64_t N) {
1052    if (Error || !Print)
1053      return;
1054  
1055    Output << N;
1056  }
1057  
1058  // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
1059  // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
1060  // bound by one of the enclosing binders.
1061  void Demangler::printLifetime(uint64_t Index) {
1062    if (Index == 0) {
1063      print("'_");
1064      return;
1065    }
1066  
1067    if (Index - 1 >= BoundLifetimes) {
1068      Error = true;
1069      return;
1070    }
1071  
1072    uint64_t Depth = BoundLifetimes - Index;
1073    print('\'');
1074    if (Depth < 26) {
1075      char C = 'a' + Depth;
1076      print(C);
1077    } else {
1078      print('z');
1079      printDecimalNumber(Depth - 26 + 1);
1080    }
1081  }
1082  
1083  static inline bool decodePunycodeDigit(char C, size_t &Value) {
1084    if (isLower(C)) {
1085      Value = C - 'a';
1086      return true;
1087    }
1088  
1089    if (isDigit(C)) {
1090      Value = 26 + (C - '0');
1091      return true;
1092    }
1093  
1094    return false;
1095  }
1096  
1097  static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
1098    char *Buffer = Output.getBuffer();
1099    char *Start = Buffer + StartIdx;
1100    char *End = Buffer + Output.getCurrentPosition();
1101    Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
1102  }
1103  
1104  // Encodes code point as UTF-8 and stores results in Output. Returns false if
1105  // CodePoint is not a valid unicode scalar value.
1106  static inline bool encodeUTF8(size_t CodePoint, char *Output) {
1107    if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
1108      return false;
1109  
1110    if (CodePoint <= 0x7F) {
1111      Output[0] = CodePoint;
1112      return true;
1113    }
1114  
1115    if (CodePoint <= 0x7FF) {
1116      Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
1117      Output[1] = 0x80 | (CodePoint & 0x3F);
1118      return true;
1119    }
1120  
1121    if (CodePoint <= 0xFFFF) {
1122      Output[0] = 0xE0 | (CodePoint >> 12);
1123      Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
1124      Output[2] = 0x80 | (CodePoint & 0x3F);
1125      return true;
1126    }
1127  
1128    if (CodePoint <= 0x10FFFF) {
1129      Output[0] = 0xF0 | (CodePoint >> 18);
1130      Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
1131      Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
1132      Output[3] = 0x80 | (CodePoint & 0x3F);
1133      return true;
1134    }
1135  
1136    return false;
1137  }
1138  
1139  // Decodes string encoded using punycode and appends results to Output.
1140  // Returns true if decoding was successful.
1141  static bool decodePunycode(StringView Input, OutputBuffer &Output) {
1142    size_t OutputSize = Output.getCurrentPosition();
1143    size_t InputIdx = 0;
1144  
1145    // Rust uses an underscore as a delimiter.
1146    size_t DelimiterPos = StringView::npos;
1147    for (size_t I = 0; I != Input.size(); ++I)
1148      if (Input[I] == '_')
1149        DelimiterPos = I;
1150  
1151    if (DelimiterPos != StringView::npos) {
1152      // Copy basic code points before the last delimiter to the output.
1153      for (; InputIdx != DelimiterPos; ++InputIdx) {
1154        char C = Input[InputIdx];
1155        if (!isValid(C))
1156          return false;
1157        // Code points are padded with zeros while decoding is in progress.
1158        char UTF8[4] = {C};
1159        Output += StringView(UTF8, UTF8 + 4);
1160      }
1161      // Skip over the delimiter.
1162      ++InputIdx;
1163    }
1164  
1165    size_t Base = 36;
1166    size_t Skew = 38;
1167    size_t Bias = 72;
1168    size_t N = 0x80;
1169    size_t TMin = 1;
1170    size_t TMax = 26;
1171    size_t Damp = 700;
1172  
1173    auto Adapt = [&](size_t Delta, size_t NumPoints) {
1174      Delta /= Damp;
1175      Delta += Delta / NumPoints;
1176      Damp = 2;
1177  
1178      size_t K = 0;
1179      while (Delta > (Base - TMin) * TMax / 2) {
1180        Delta /= Base - TMin;
1181        K += Base;
1182      }
1183      return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
1184    };
1185  
1186    // Main decoding loop.
1187    for (size_t I = 0; InputIdx != Input.size(); I += 1) {
1188      size_t OldI = I;
1189      size_t W = 1;
1190      size_t Max = std::numeric_limits<size_t>::max();
1191      for (size_t K = Base; true; K += Base) {
1192        if (InputIdx == Input.size())
1193          return false;
1194        char C = Input[InputIdx++];
1195        size_t Digit = 0;
1196        if (!decodePunycodeDigit(C, Digit))
1197          return false;
1198  
1199        if (Digit > (Max - I) / W)
1200          return false;
1201        I += Digit * W;
1202  
1203        size_t T;
1204        if (K <= Bias)
1205          T = TMin;
1206        else if (K >= Bias + TMax)
1207          T = TMax;
1208        else
1209          T = K - Bias;
1210  
1211        if (Digit < T)
1212          break;
1213  
1214        if (W > Max / (Base - T))
1215          return false;
1216        W *= (Base - T);
1217      }
1218      size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
1219      Bias = Adapt(I - OldI, NumPoints);
1220  
1221      if (I / NumPoints > Max - N)
1222        return false;
1223      N += I / NumPoints;
1224      I = I % NumPoints;
1225  
1226      // Insert N at position I in the output.
1227      char UTF8[4] = {};
1228      if (!encodeUTF8(N, UTF8))
1229        return false;
1230      Output.insert(OutputSize + I * 4, UTF8, 4);
1231    }
1232  
1233    removeNullBytes(Output, OutputSize);
1234    return true;
1235  }
1236  
1237  void Demangler::printIdentifier(Identifier Ident) {
1238    if (Error || !Print)
1239      return;
1240  
1241    if (Ident.Punycode) {
1242      if (!decodePunycode(Ident.Name, Output))
1243        Error = true;
1244    } else {
1245      print(Ident.Name);
1246    }
1247  }
1248  
1249  char Demangler::look() const {
1250    if (Error || Position >= Input.size())
1251      return 0;
1252  
1253    return Input[Position];
1254  }
1255  
1256  char Demangler::consume() {
1257    if (Error || Position >= Input.size()) {
1258      Error = true;
1259      return 0;
1260    }
1261  
1262    return Input[Position++];
1263  }
1264  
1265  bool Demangler::consumeIf(char Prefix) {
1266    if (Error || Position >= Input.size() || Input[Position] != Prefix)
1267      return false;
1268  
1269    Position += 1;
1270    return true;
1271  }
1272  
1273  /// Computes A + B. When computation wraps around sets the error and returns
1274  /// false. Otherwise assigns the result to A and returns true.
1275  bool Demangler::addAssign(uint64_t &A, uint64_t B) {
1276    if (A > std::numeric_limits<uint64_t>::max() - B) {
1277      Error = true;
1278      return false;
1279    }
1280  
1281    A += B;
1282    return true;
1283  }
1284  
1285  /// Computes A * B. When computation wraps around sets the error and returns
1286  /// false. Otherwise assigns the result to A and returns true.
1287  bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
1288    if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
1289      Error = true;
1290      return false;
1291    }
1292  
1293    A *= B;
1294    return true;
1295  }
1296