xref: /freebsd/contrib/llvm-project/llvm/lib/Demangle/MicrosoftDemangle.cpp (revision 9f23cbd6cae82fd77edfad7173432fa8dccd0a95)
1 //===- MicrosoftDemangle.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for MSVC-style mangled symbols.
10 //
11 // This file has no dependencies on the rest of LLVM so that it can be
12 // easily reused in other programs such as libcxxabi.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Demangle/MicrosoftDemangle.h"
17 #include "llvm/Demangle/Demangle.h"
18 #include "llvm/Demangle/MicrosoftDemangleNodes.h"
19 
20 #include "llvm/Demangle/DemangleConfig.h"
21 #include "llvm/Demangle/StringView.h"
22 #include "llvm/Demangle/Utility.h"
23 
24 #include <array>
25 #include <cctype>
26 #include <cstdio>
27 #include <tuple>
28 
29 using namespace llvm;
30 using namespace ms_demangle;
31 
32 static bool startsWithDigit(StringView S) {
33   return !S.empty() && std::isdigit(S.front());
34 }
35 
36 
37 struct NodeList {
38   Node *N = nullptr;
39   NodeList *Next = nullptr;
40 };
41 
42 static bool isMemberPointer(StringView MangledName, bool &Error) {
43   Error = false;
44   switch (MangledName.popFront()) {
45   case '$':
46     // This is probably an rvalue reference (e.g. $$Q), and you cannot have an
47     // rvalue reference to a member.
48     return false;
49   case 'A':
50     // 'A' indicates a reference, and you cannot have a reference to a member
51     // function or member.
52     return false;
53   case 'P':
54   case 'Q':
55   case 'R':
56   case 'S':
57     // These 4 values indicate some kind of pointer, but we still don't know
58     // what.
59     break;
60   default:
61     // isMemberPointer() is called only if isPointerType() returns true,
62     // and it rejects other prefixes.
63     DEMANGLE_UNREACHABLE;
64   }
65 
66   // If it starts with a number, then 6 indicates a non-member function
67   // pointer, and 8 indicates a member function pointer.
68   if (startsWithDigit(MangledName)) {
69     if (MangledName[0] != '6' && MangledName[0] != '8') {
70       Error = true;
71       return false;
72     }
73     return (MangledName[0] == '8');
74   }
75 
76   // Remove ext qualifiers since those can appear on either type and are
77   // therefore not indicative.
78   MangledName.consumeFront('E'); // 64-bit
79   MangledName.consumeFront('I'); // restrict
80   MangledName.consumeFront('F'); // unaligned
81 
82   if (MangledName.empty()) {
83     Error = true;
84     return false;
85   }
86 
87   // The next value should be either ABCD (non-member) or QRST (member).
88   switch (MangledName.front()) {
89   case 'A':
90   case 'B':
91   case 'C':
92   case 'D':
93     return false;
94   case 'Q':
95   case 'R':
96   case 'S':
97   case 'T':
98     return true;
99   default:
100     Error = true;
101     return false;
102   }
103 }
104 
105 static SpecialIntrinsicKind
106 consumeSpecialIntrinsicKind(StringView &MangledName) {
107   if (MangledName.consumeFront("?_7"))
108     return SpecialIntrinsicKind::Vftable;
109   if (MangledName.consumeFront("?_8"))
110     return SpecialIntrinsicKind::Vbtable;
111   if (MangledName.consumeFront("?_9"))
112     return SpecialIntrinsicKind::VcallThunk;
113   if (MangledName.consumeFront("?_A"))
114     return SpecialIntrinsicKind::Typeof;
115   if (MangledName.consumeFront("?_B"))
116     return SpecialIntrinsicKind::LocalStaticGuard;
117   if (MangledName.consumeFront("?_C"))
118     return SpecialIntrinsicKind::StringLiteralSymbol;
119   if (MangledName.consumeFront("?_P"))
120     return SpecialIntrinsicKind::UdtReturning;
121   if (MangledName.consumeFront("?_R0"))
122     return SpecialIntrinsicKind::RttiTypeDescriptor;
123   if (MangledName.consumeFront("?_R1"))
124     return SpecialIntrinsicKind::RttiBaseClassDescriptor;
125   if (MangledName.consumeFront("?_R2"))
126     return SpecialIntrinsicKind::RttiBaseClassArray;
127   if (MangledName.consumeFront("?_R3"))
128     return SpecialIntrinsicKind::RttiClassHierarchyDescriptor;
129   if (MangledName.consumeFront("?_R4"))
130     return SpecialIntrinsicKind::RttiCompleteObjLocator;
131   if (MangledName.consumeFront("?_S"))
132     return SpecialIntrinsicKind::LocalVftable;
133   if (MangledName.consumeFront("?__E"))
134     return SpecialIntrinsicKind::DynamicInitializer;
135   if (MangledName.consumeFront("?__F"))
136     return SpecialIntrinsicKind::DynamicAtexitDestructor;
137   if (MangledName.consumeFront("?__J"))
138     return SpecialIntrinsicKind::LocalStaticThreadGuard;
139   return SpecialIntrinsicKind::None;
140 }
141 
142 static bool startsWithLocalScopePattern(StringView S) {
143   if (!S.consumeFront('?'))
144     return false;
145 
146   size_t End = S.find('?');
147   if (End == StringView::npos)
148     return false;
149   StringView Candidate = S.substr(0, End);
150   if (Candidate.empty())
151     return false;
152 
153   // \?[0-9]\?
154   // ?@? is the discriminator 0.
155   if (Candidate.size() == 1)
156     return Candidate[0] == '@' || (Candidate[0] >= '0' && Candidate[0] <= '9');
157 
158   // If it's not 0-9, then it's an encoded number terminated with an @
159   if (Candidate.back() != '@')
160     return false;
161   Candidate = Candidate.dropBack();
162 
163   // An encoded number starts with B-P and all subsequent digits are in A-P.
164   // Note that the reason the first digit cannot be A is two fold.  First, it
165   // would create an ambiguity with ?A which delimits the beginning of an
166   // anonymous namespace.  Second, A represents 0, and you don't start a multi
167   // digit number with a leading 0.  Presumably the anonymous namespace
168   // ambiguity is also why single digit encoded numbers use 0-9 rather than A-J.
169   if (Candidate[0] < 'B' || Candidate[0] > 'P')
170     return false;
171   Candidate = Candidate.dropFront();
172   while (!Candidate.empty()) {
173     if (Candidate[0] < 'A' || Candidate[0] > 'P')
174       return false;
175     Candidate = Candidate.dropFront();
176   }
177 
178   return true;
179 }
180 
181 static bool isTagType(StringView S) {
182   switch (S.front()) {
183   case 'T': // union
184   case 'U': // struct
185   case 'V': // class
186   case 'W': // enum
187     return true;
188   }
189   return false;
190 }
191 
192 static bool isCustomType(StringView S) { return S[0] == '?'; }
193 
194 static bool isPointerType(StringView S) {
195   if (S.startsWith("$$Q")) // foo &&
196     return true;
197 
198   switch (S.front()) {
199   case 'A': // foo &
200   case 'P': // foo *
201   case 'Q': // foo *const
202   case 'R': // foo *volatile
203   case 'S': // foo *const volatile
204     return true;
205   }
206   return false;
207 }
208 
209 static bool isArrayType(StringView S) { return S[0] == 'Y'; }
210 
211 static bool isFunctionType(StringView S) {
212   return S.startsWith("$$A8@@") || S.startsWith("$$A6");
213 }
214 
215 static FunctionRefQualifier
216 demangleFunctionRefQualifier(StringView &MangledName) {
217   if (MangledName.consumeFront('G'))
218     return FunctionRefQualifier::Reference;
219   else if (MangledName.consumeFront('H'))
220     return FunctionRefQualifier::RValueReference;
221   return FunctionRefQualifier::None;
222 }
223 
224 static std::pair<Qualifiers, PointerAffinity>
225 demanglePointerCVQualifiers(StringView &MangledName) {
226   if (MangledName.consumeFront("$$Q"))
227     return std::make_pair(Q_None, PointerAffinity::RValueReference);
228 
229   switch (MangledName.popFront()) {
230   case 'A':
231     return std::make_pair(Q_None, PointerAffinity::Reference);
232   case 'P':
233     return std::make_pair(Q_None, PointerAffinity::Pointer);
234   case 'Q':
235     return std::make_pair(Q_Const, PointerAffinity::Pointer);
236   case 'R':
237     return std::make_pair(Q_Volatile, PointerAffinity::Pointer);
238   case 'S':
239     return std::make_pair(Qualifiers(Q_Const | Q_Volatile),
240                           PointerAffinity::Pointer);
241   }
242   // This function is only called if isPointerType() returns true,
243   // and it only returns true for the six cases listed above.
244   DEMANGLE_UNREACHABLE;
245 }
246 
247 StringView Demangler::copyString(StringView Borrowed) {
248   char *Stable = Arena.allocUnalignedBuffer(Borrowed.size());
249   // This is not a micro-optimization, it avoids UB, should Borrowed be an null
250   // buffer.
251   if (Borrowed.size())
252     std::memcpy(Stable, Borrowed.begin(), Borrowed.size());
253 
254   return {Stable, Borrowed.size()};
255 }
256 
257 SpecialTableSymbolNode *
258 Demangler::demangleSpecialTableSymbolNode(StringView &MangledName,
259                                           SpecialIntrinsicKind K) {
260   NamedIdentifierNode *NI = Arena.alloc<NamedIdentifierNode>();
261   switch (K) {
262   case SpecialIntrinsicKind::Vftable:
263     NI->Name = "`vftable'";
264     break;
265   case SpecialIntrinsicKind::Vbtable:
266     NI->Name = "`vbtable'";
267     break;
268   case SpecialIntrinsicKind::LocalVftable:
269     NI->Name = "`local vftable'";
270     break;
271   case SpecialIntrinsicKind::RttiCompleteObjLocator:
272     NI->Name = "`RTTI Complete Object Locator'";
273     break;
274   default:
275     DEMANGLE_UNREACHABLE;
276   }
277   QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI);
278   SpecialTableSymbolNode *STSN = Arena.alloc<SpecialTableSymbolNode>();
279   STSN->Name = QN;
280   bool IsMember = false;
281   if (MangledName.empty()) {
282     Error = true;
283     return nullptr;
284   }
285   char Front = MangledName.popFront();
286   if (Front != '6' && Front != '7') {
287     Error = true;
288     return nullptr;
289   }
290 
291   std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName);
292   if (!MangledName.consumeFront('@'))
293     STSN->TargetName = demangleFullyQualifiedTypeName(MangledName);
294   return STSN;
295 }
296 
297 LocalStaticGuardVariableNode *
298 Demangler::demangleLocalStaticGuard(StringView &MangledName, bool IsThread) {
299   LocalStaticGuardIdentifierNode *LSGI =
300       Arena.alloc<LocalStaticGuardIdentifierNode>();
301   LSGI->IsThread = IsThread;
302   QualifiedNameNode *QN = demangleNameScopeChain(MangledName, LSGI);
303   LocalStaticGuardVariableNode *LSGVN =
304       Arena.alloc<LocalStaticGuardVariableNode>();
305   LSGVN->Name = QN;
306 
307   if (MangledName.consumeFront("4IA"))
308     LSGVN->IsVisible = false;
309   else if (MangledName.consumeFront("5"))
310     LSGVN->IsVisible = true;
311   else {
312     Error = true;
313     return nullptr;
314   }
315 
316   if (!MangledName.empty())
317     LSGI->ScopeIndex = demangleUnsigned(MangledName);
318   return LSGVN;
319 }
320 
321 static NamedIdentifierNode *synthesizeNamedIdentifier(ArenaAllocator &Arena,
322                                                       StringView Name) {
323   NamedIdentifierNode *Id = Arena.alloc<NamedIdentifierNode>();
324   Id->Name = Name;
325   return Id;
326 }
327 
328 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena,
329                                                   IdentifierNode *Identifier) {
330   QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>();
331   QN->Components = Arena.alloc<NodeArrayNode>();
332   QN->Components->Count = 1;
333   QN->Components->Nodes = Arena.allocArray<Node *>(1);
334   QN->Components->Nodes[0] = Identifier;
335   return QN;
336 }
337 
338 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena,
339                                                   StringView Name) {
340   NamedIdentifierNode *Id = synthesizeNamedIdentifier(Arena, Name);
341   return synthesizeQualifiedName(Arena, Id);
342 }
343 
344 static VariableSymbolNode *synthesizeVariable(ArenaAllocator &Arena,
345                                               TypeNode *Type,
346                                               StringView VariableName) {
347   VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
348   VSN->Type = Type;
349   VSN->Name = synthesizeQualifiedName(Arena, VariableName);
350   return VSN;
351 }
352 
353 VariableSymbolNode *Demangler::demangleUntypedVariable(
354     ArenaAllocator &Arena, StringView &MangledName, StringView VariableName) {
355   NamedIdentifierNode *NI = synthesizeNamedIdentifier(Arena, VariableName);
356   QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI);
357   VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
358   VSN->Name = QN;
359   if (MangledName.consumeFront("8"))
360     return VSN;
361 
362   Error = true;
363   return nullptr;
364 }
365 
366 VariableSymbolNode *
367 Demangler::demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena,
368                                                StringView &MangledName) {
369   RttiBaseClassDescriptorNode *RBCDN =
370       Arena.alloc<RttiBaseClassDescriptorNode>();
371   RBCDN->NVOffset = demangleUnsigned(MangledName);
372   RBCDN->VBPtrOffset = demangleSigned(MangledName);
373   RBCDN->VBTableOffset = demangleUnsigned(MangledName);
374   RBCDN->Flags = demangleUnsigned(MangledName);
375   if (Error)
376     return nullptr;
377 
378   VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
379   VSN->Name = demangleNameScopeChain(MangledName, RBCDN);
380   MangledName.consumeFront('8');
381   return VSN;
382 }
383 
384 FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName,
385                                                     bool IsDestructor) {
386   DynamicStructorIdentifierNode *DSIN =
387       Arena.alloc<DynamicStructorIdentifierNode>();
388   DSIN->IsDestructor = IsDestructor;
389 
390   bool IsKnownStaticDataMember = false;
391   if (MangledName.consumeFront('?'))
392     IsKnownStaticDataMember = true;
393 
394   SymbolNode *Symbol = demangleDeclarator(MangledName);
395   if (Error)
396     return nullptr;
397 
398   FunctionSymbolNode *FSN = nullptr;
399 
400   if (Symbol->kind() == NodeKind::VariableSymbol) {
401     DSIN->Variable = static_cast<VariableSymbolNode *>(Symbol);
402 
403     // Older versions of clang mangled this type of symbol incorrectly.  They
404     // would omit the leading ? and they would only emit a single @ at the end.
405     // The correct mangling is a leading ? and 2 trailing @ signs.  Handle
406     // both cases.
407     int AtCount = IsKnownStaticDataMember ? 2 : 1;
408     for (int I = 0; I < AtCount; ++I) {
409       if (MangledName.consumeFront('@'))
410         continue;
411       Error = true;
412       return nullptr;
413     }
414 
415     FSN = demangleFunctionEncoding(MangledName);
416     if (FSN)
417       FSN->Name = synthesizeQualifiedName(Arena, DSIN);
418   } else {
419     if (IsKnownStaticDataMember) {
420       // This was supposed to be a static data member, but we got a function.
421       Error = true;
422       return nullptr;
423     }
424 
425     FSN = static_cast<FunctionSymbolNode *>(Symbol);
426     DSIN->Name = Symbol->Name;
427     FSN->Name = synthesizeQualifiedName(Arena, DSIN);
428   }
429 
430   return FSN;
431 }
432 
433 SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) {
434   SpecialIntrinsicKind SIK = consumeSpecialIntrinsicKind(MangledName);
435 
436   switch (SIK) {
437   case SpecialIntrinsicKind::None:
438     return nullptr;
439   case SpecialIntrinsicKind::StringLiteralSymbol:
440     return demangleStringLiteral(MangledName);
441   case SpecialIntrinsicKind::Vftable:
442   case SpecialIntrinsicKind::Vbtable:
443   case SpecialIntrinsicKind::LocalVftable:
444   case SpecialIntrinsicKind::RttiCompleteObjLocator:
445     return demangleSpecialTableSymbolNode(MangledName, SIK);
446   case SpecialIntrinsicKind::VcallThunk:
447     return demangleVcallThunkNode(MangledName);
448   case SpecialIntrinsicKind::LocalStaticGuard:
449     return demangleLocalStaticGuard(MangledName, /*IsThread=*/false);
450   case SpecialIntrinsicKind::LocalStaticThreadGuard:
451     return demangleLocalStaticGuard(MangledName, /*IsThread=*/true);
452   case SpecialIntrinsicKind::RttiTypeDescriptor: {
453     TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result);
454     if (Error)
455       break;
456     if (!MangledName.consumeFront("@8"))
457       break;
458     if (!MangledName.empty())
459       break;
460     return synthesizeVariable(Arena, T, "`RTTI Type Descriptor'");
461   }
462   case SpecialIntrinsicKind::RttiBaseClassArray:
463     return demangleUntypedVariable(Arena, MangledName,
464                                    "`RTTI Base Class Array'");
465   case SpecialIntrinsicKind::RttiClassHierarchyDescriptor:
466     return demangleUntypedVariable(Arena, MangledName,
467                                    "`RTTI Class Hierarchy Descriptor'");
468   case SpecialIntrinsicKind::RttiBaseClassDescriptor:
469     return demangleRttiBaseClassDescriptorNode(Arena, MangledName);
470   case SpecialIntrinsicKind::DynamicInitializer:
471     return demangleInitFiniStub(MangledName, /*IsDestructor=*/false);
472   case SpecialIntrinsicKind::DynamicAtexitDestructor:
473     return demangleInitFiniStub(MangledName, /*IsDestructor=*/true);
474   case SpecialIntrinsicKind::Typeof:
475   case SpecialIntrinsicKind::UdtReturning:
476     // It's unclear which tools produces these manglings, so demangling
477     // support is not (yet?) implemented.
478     break;
479   case SpecialIntrinsicKind::Unknown:
480     DEMANGLE_UNREACHABLE; // Never returned by consumeSpecialIntrinsicKind.
481   }
482   Error = true;
483   return nullptr;
484 }
485 
486 IdentifierNode *
487 Demangler::demangleFunctionIdentifierCode(StringView &MangledName) {
488   assert(MangledName.startsWith('?'));
489   MangledName = MangledName.dropFront();
490   if (MangledName.empty()) {
491     Error = true;
492     return nullptr;
493   }
494 
495   if (MangledName.consumeFront("__"))
496     return demangleFunctionIdentifierCode(
497         MangledName, FunctionIdentifierCodeGroup::DoubleUnder);
498   if (MangledName.consumeFront("_"))
499     return demangleFunctionIdentifierCode(MangledName,
500                                           FunctionIdentifierCodeGroup::Under);
501   return demangleFunctionIdentifierCode(MangledName,
502                                         FunctionIdentifierCodeGroup::Basic);
503 }
504 
505 StructorIdentifierNode *
506 Demangler::demangleStructorIdentifier(StringView &MangledName,
507                                       bool IsDestructor) {
508   StructorIdentifierNode *N = Arena.alloc<StructorIdentifierNode>();
509   N->IsDestructor = IsDestructor;
510   return N;
511 }
512 
513 ConversionOperatorIdentifierNode *
514 Demangler::demangleConversionOperatorIdentifier(StringView &MangledName) {
515   ConversionOperatorIdentifierNode *N =
516       Arena.alloc<ConversionOperatorIdentifierNode>();
517   return N;
518 }
519 
520 LiteralOperatorIdentifierNode *
521 Demangler::demangleLiteralOperatorIdentifier(StringView &MangledName) {
522   LiteralOperatorIdentifierNode *N =
523       Arena.alloc<LiteralOperatorIdentifierNode>();
524   N->Name = demangleSimpleString(MangledName, /*Memorize=*/false);
525   return N;
526 }
527 
528 IntrinsicFunctionKind
529 Demangler::translateIntrinsicFunctionCode(char CH,
530                                           FunctionIdentifierCodeGroup Group) {
531   using IFK = IntrinsicFunctionKind;
532   if (!(CH >= '0' && CH <= '9') && !(CH >= 'A' && CH <= 'Z')) {
533     Error = true;
534     return IFK::None;
535   }
536 
537   // Not all ? identifiers are intrinsics *functions*.  This function only maps
538   // operator codes for the special functions, all others are handled elsewhere,
539   // hence the IFK::None entries in the table.
540   static IFK Basic[36] = {
541       IFK::None,             // ?0 # Foo::Foo()
542       IFK::None,             // ?1 # Foo::~Foo()
543       IFK::New,              // ?2 # operator new
544       IFK::Delete,           // ?3 # operator delete
545       IFK::Assign,           // ?4 # operator=
546       IFK::RightShift,       // ?5 # operator>>
547       IFK::LeftShift,        // ?6 # operator<<
548       IFK::LogicalNot,       // ?7 # operator!
549       IFK::Equals,           // ?8 # operator==
550       IFK::NotEquals,        // ?9 # operator!=
551       IFK::ArraySubscript,   // ?A # operator[]
552       IFK::None,             // ?B # Foo::operator <type>()
553       IFK::Pointer,          // ?C # operator->
554       IFK::Dereference,      // ?D # operator*
555       IFK::Increment,        // ?E # operator++
556       IFK::Decrement,        // ?F # operator--
557       IFK::Minus,            // ?G # operator-
558       IFK::Plus,             // ?H # operator+
559       IFK::BitwiseAnd,       // ?I # operator&
560       IFK::MemberPointer,    // ?J # operator->*
561       IFK::Divide,           // ?K # operator/
562       IFK::Modulus,          // ?L # operator%
563       IFK::LessThan,         // ?M operator<
564       IFK::LessThanEqual,    // ?N operator<=
565       IFK::GreaterThan,      // ?O operator>
566       IFK::GreaterThanEqual, // ?P operator>=
567       IFK::Comma,            // ?Q operator,
568       IFK::Parens,           // ?R operator()
569       IFK::BitwiseNot,       // ?S operator~
570       IFK::BitwiseXor,       // ?T operator^
571       IFK::BitwiseOr,        // ?U operator|
572       IFK::LogicalAnd,       // ?V operator&&
573       IFK::LogicalOr,        // ?W operator||
574       IFK::TimesEqual,       // ?X operator*=
575       IFK::PlusEqual,        // ?Y operator+=
576       IFK::MinusEqual,       // ?Z operator-=
577   };
578   static IFK Under[36] = {
579       IFK::DivEqual,           // ?_0 operator/=
580       IFK::ModEqual,           // ?_1 operator%=
581       IFK::RshEqual,           // ?_2 operator>>=
582       IFK::LshEqual,           // ?_3 operator<<=
583       IFK::BitwiseAndEqual,    // ?_4 operator&=
584       IFK::BitwiseOrEqual,     // ?_5 operator|=
585       IFK::BitwiseXorEqual,    // ?_6 operator^=
586       IFK::None,               // ?_7 # vftable
587       IFK::None,               // ?_8 # vbtable
588       IFK::None,               // ?_9 # vcall
589       IFK::None,               // ?_A # typeof
590       IFK::None,               // ?_B # local static guard
591       IFK::None,               // ?_C # string literal
592       IFK::VbaseDtor,          // ?_D # vbase destructor
593       IFK::VecDelDtor,         // ?_E # vector deleting destructor
594       IFK::DefaultCtorClosure, // ?_F # default constructor closure
595       IFK::ScalarDelDtor,      // ?_G # scalar deleting destructor
596       IFK::VecCtorIter,        // ?_H # vector constructor iterator
597       IFK::VecDtorIter,        // ?_I # vector destructor iterator
598       IFK::VecVbaseCtorIter,   // ?_J # vector vbase constructor iterator
599       IFK::VdispMap,           // ?_K # virtual displacement map
600       IFK::EHVecCtorIter,      // ?_L # eh vector constructor iterator
601       IFK::EHVecDtorIter,      // ?_M # eh vector destructor iterator
602       IFK::EHVecVbaseCtorIter, // ?_N # eh vector vbase constructor iterator
603       IFK::CopyCtorClosure,    // ?_O # copy constructor closure
604       IFK::None,               // ?_P<name> # udt returning <name>
605       IFK::None,               // ?_Q # <unknown>
606       IFK::None,               // ?_R0 - ?_R4 # RTTI Codes
607       IFK::None,               // ?_S # local vftable
608       IFK::LocalVftableCtorClosure, // ?_T # local vftable constructor closure
609       IFK::ArrayNew,                // ?_U operator new[]
610       IFK::ArrayDelete,             // ?_V operator delete[]
611       IFK::None,                    // ?_W <unused>
612       IFK::None,                    // ?_X <unused>
613       IFK::None,                    // ?_Y <unused>
614       IFK::None,                    // ?_Z <unused>
615   };
616   static IFK DoubleUnder[36] = {
617       IFK::None,                       // ?__0 <unused>
618       IFK::None,                       // ?__1 <unused>
619       IFK::None,                       // ?__2 <unused>
620       IFK::None,                       // ?__3 <unused>
621       IFK::None,                       // ?__4 <unused>
622       IFK::None,                       // ?__5 <unused>
623       IFK::None,                       // ?__6 <unused>
624       IFK::None,                       // ?__7 <unused>
625       IFK::None,                       // ?__8 <unused>
626       IFK::None,                       // ?__9 <unused>
627       IFK::ManVectorCtorIter,          // ?__A managed vector ctor iterator
628       IFK::ManVectorDtorIter,          // ?__B managed vector dtor iterator
629       IFK::EHVectorCopyCtorIter,       // ?__C EH vector copy ctor iterator
630       IFK::EHVectorVbaseCopyCtorIter,  // ?__D EH vector vbase copy ctor iter
631       IFK::None,                       // ?__E dynamic initializer for `T'
632       IFK::None,                       // ?__F dynamic atexit destructor for `T'
633       IFK::VectorCopyCtorIter,         // ?__G vector copy constructor iter
634       IFK::VectorVbaseCopyCtorIter,    // ?__H vector vbase copy ctor iter
635       IFK::ManVectorVbaseCopyCtorIter, // ?__I managed vector vbase copy ctor
636                                        // iter
637       IFK::None,                       // ?__J local static thread guard
638       IFK::None,                       // ?__K operator ""_name
639       IFK::CoAwait,                    // ?__L operator co_await
640       IFK::Spaceship,                  // ?__M operator<=>
641       IFK::None,                       // ?__N <unused>
642       IFK::None,                       // ?__O <unused>
643       IFK::None,                       // ?__P <unused>
644       IFK::None,                       // ?__Q <unused>
645       IFK::None,                       // ?__R <unused>
646       IFK::None,                       // ?__S <unused>
647       IFK::None,                       // ?__T <unused>
648       IFK::None,                       // ?__U <unused>
649       IFK::None,                       // ?__V <unused>
650       IFK::None,                       // ?__W <unused>
651       IFK::None,                       // ?__X <unused>
652       IFK::None,                       // ?__Y <unused>
653       IFK::None,                       // ?__Z <unused>
654   };
655 
656   int Index = (CH >= '0' && CH <= '9') ? (CH - '0') : (CH - 'A' + 10);
657   switch (Group) {
658   case FunctionIdentifierCodeGroup::Basic:
659     return Basic[Index];
660   case FunctionIdentifierCodeGroup::Under:
661     return Under[Index];
662   case FunctionIdentifierCodeGroup::DoubleUnder:
663     return DoubleUnder[Index];
664   }
665   DEMANGLE_UNREACHABLE;
666 }
667 
668 IdentifierNode *
669 Demangler::demangleFunctionIdentifierCode(StringView &MangledName,
670                                           FunctionIdentifierCodeGroup Group) {
671   if (MangledName.empty()) {
672     Error = true;
673     return nullptr;
674   }
675   switch (Group) {
676   case FunctionIdentifierCodeGroup::Basic:
677     switch (char CH = MangledName.popFront()) {
678     case '0':
679     case '1':
680       return demangleStructorIdentifier(MangledName, CH == '1');
681     case 'B':
682       return demangleConversionOperatorIdentifier(MangledName);
683     default:
684       return Arena.alloc<IntrinsicFunctionIdentifierNode>(
685           translateIntrinsicFunctionCode(CH, Group));
686     }
687   case FunctionIdentifierCodeGroup::Under:
688     return Arena.alloc<IntrinsicFunctionIdentifierNode>(
689         translateIntrinsicFunctionCode(MangledName.popFront(), Group));
690   case FunctionIdentifierCodeGroup::DoubleUnder:
691     switch (char CH = MangledName.popFront()) {
692     case 'K':
693       return demangleLiteralOperatorIdentifier(MangledName);
694     default:
695       return Arena.alloc<IntrinsicFunctionIdentifierNode>(
696           translateIntrinsicFunctionCode(CH, Group));
697     }
698   }
699 
700   DEMANGLE_UNREACHABLE;
701 }
702 
703 SymbolNode *Demangler::demangleEncodedSymbol(StringView &MangledName,
704                                              QualifiedNameNode *Name) {
705   if (MangledName.empty()) {
706     Error = true;
707     return nullptr;
708   }
709 
710   // Read a variable.
711   switch (MangledName.front()) {
712   case '0':
713   case '1':
714   case '2':
715   case '3':
716   case '4': {
717     StorageClass SC = demangleVariableStorageClass(MangledName);
718     return demangleVariableEncoding(MangledName, SC);
719   }
720   }
721   FunctionSymbolNode *FSN = demangleFunctionEncoding(MangledName);
722 
723   IdentifierNode *UQN = Name->getUnqualifiedIdentifier();
724   if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) {
725     ConversionOperatorIdentifierNode *COIN =
726         static_cast<ConversionOperatorIdentifierNode *>(UQN);
727     if (FSN)
728       COIN->TargetType = FSN->Signature->ReturnType;
729   }
730   return FSN;
731 }
732 
733 SymbolNode *Demangler::demangleDeclarator(StringView &MangledName) {
734   // What follows is a main symbol name. This may include namespaces or class
735   // back references.
736   QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName);
737   if (Error)
738     return nullptr;
739 
740   SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN);
741   if (Error)
742     return nullptr;
743   Symbol->Name = QN;
744 
745   IdentifierNode *UQN = QN->getUnqualifiedIdentifier();
746   if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) {
747     ConversionOperatorIdentifierNode *COIN =
748         static_cast<ConversionOperatorIdentifierNode *>(UQN);
749     if (!COIN->TargetType) {
750       Error = true;
751       return nullptr;
752     }
753   }
754   return Symbol;
755 }
756 
757 SymbolNode *Demangler::demangleMD5Name(StringView &MangledName) {
758   assert(MangledName.startsWith("??@"));
759   // This is an MD5 mangled name.  We can't demangle it, just return the
760   // mangled name.
761   // An MD5 mangled name is ??@ followed by 32 characters and a terminating @.
762   size_t MD5Last = MangledName.find('@', strlen("??@"));
763   if (MD5Last == StringView::npos) {
764     Error = true;
765     return nullptr;
766   }
767   const char *Start = MangledName.begin();
768   MangledName = MangledName.dropFront(MD5Last + 1);
769 
770   // There are two additional special cases for MD5 names:
771   // 1. For complete object locators where the object name is long enough
772   //    for the object to have an MD5 name, the complete object locator is
773   //    called ??@...@??_R4@ (with a trailing "??_R4@" instead of the usual
774   //    leading "??_R4". This is handled here.
775   // 2. For catchable types, in versions of MSVC before 2015 (<1900) or after
776   //    2017.2 (>= 1914), the catchable type mangling is _CT??@...@??@...@8
777   //    instead of_CT??@...@8 with just one MD5 name. Since we don't yet
778   //    demangle catchable types anywhere, this isn't handled for MD5 names
779   //    either.
780   MangledName.consumeFront("??_R4@");
781 
782   StringView MD5(Start, MangledName.begin());
783   SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol);
784   S->Name = synthesizeQualifiedName(Arena, MD5);
785 
786   return S;
787 }
788 
789 SymbolNode *Demangler::demangleTypeinfoName(StringView &MangledName) {
790   assert(MangledName.startsWith('.'));
791   MangledName.consumeFront('.');
792 
793   TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result);
794   if (Error || !MangledName.empty()) {
795     Error = true;
796     return nullptr;
797   }
798   return synthesizeVariable(Arena, T, "`RTTI Type Descriptor Name'");
799 }
800 
801 // Parser entry point.
802 SymbolNode *Demangler::parse(StringView &MangledName) {
803   // Typeinfo names are strings stored in RTTI data. They're not symbol names.
804   // It's still useful to demangle them. They're the only demangled entity
805   // that doesn't start with a "?" but a ".".
806   if (MangledName.startsWith('.'))
807     return demangleTypeinfoName(MangledName);
808 
809   if (MangledName.startsWith("??@"))
810     return demangleMD5Name(MangledName);
811 
812   // MSVC-style mangled symbols must start with '?'.
813   if (!MangledName.startsWith('?')) {
814     Error = true;
815     return nullptr;
816   }
817 
818   MangledName.consumeFront('?');
819 
820   // ?$ is a template instantiation, but all other names that start with ? are
821   // operators / special names.
822   if (SymbolNode *SI = demangleSpecialIntrinsic(MangledName))
823     return SI;
824 
825   return demangleDeclarator(MangledName);
826 }
827 
828 TagTypeNode *Demangler::parseTagUniqueName(StringView &MangledName) {
829   if (!MangledName.consumeFront(".?A")) {
830     Error = true;
831     return nullptr;
832   }
833   MangledName.consumeFront(".?A");
834   if (MangledName.empty()) {
835     Error = true;
836     return nullptr;
837   }
838 
839   return demangleClassType(MangledName);
840 }
841 
842 // <type-encoding> ::= <storage-class> <variable-type>
843 // <storage-class> ::= 0  # private static member
844 //                 ::= 1  # protected static member
845 //                 ::= 2  # public static member
846 //                 ::= 3  # global
847 //                 ::= 4  # static local
848 
849 VariableSymbolNode *Demangler::demangleVariableEncoding(StringView &MangledName,
850                                                         StorageClass SC) {
851   VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
852 
853   VSN->Type = demangleType(MangledName, QualifierMangleMode::Drop);
854   VSN->SC = SC;
855 
856   if (Error)
857     return nullptr;
858 
859   // <variable-type> ::= <type> <cvr-qualifiers>
860   //                 ::= <type> <pointee-cvr-qualifiers> # pointers, references
861   switch (VSN->Type->kind()) {
862   case NodeKind::PointerType: {
863     PointerTypeNode *PTN = static_cast<PointerTypeNode *>(VSN->Type);
864 
865     Qualifiers ExtraChildQuals = Q_None;
866     PTN->Quals = Qualifiers(VSN->Type->Quals |
867                             demanglePointerExtQualifiers(MangledName));
868 
869     bool IsMember = false;
870     std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName);
871 
872     if (PTN->ClassParent) {
873       QualifiedNameNode *BackRefName =
874           demangleFullyQualifiedTypeName(MangledName);
875       (void)BackRefName;
876     }
877     PTN->Pointee->Quals = Qualifiers(PTN->Pointee->Quals | ExtraChildQuals);
878 
879     break;
880   }
881   default:
882     VSN->Type->Quals = demangleQualifiers(MangledName).first;
883     break;
884   }
885 
886   return VSN;
887 }
888 
889 // Sometimes numbers are encoded in mangled symbols. For example,
890 // "int (*x)[20]" is a valid C type (x is a pointer to an array of
891 // length 20), so we need some way to embed numbers as part of symbols.
892 // This function parses it.
893 //
894 // <number>               ::= [?] <non-negative integer>
895 //
896 // <non-negative integer> ::= <decimal digit> # when 1 <= Number <= 10
897 //                        ::= <hex digit>+ @  # when Number == 0 or >= 10
898 //
899 // <hex-digit>            ::= [A-P]           # A = 0, B = 1, ...
900 std::pair<uint64_t, bool> Demangler::demangleNumber(StringView &MangledName) {
901   bool IsNegative = MangledName.consumeFront('?');
902 
903   if (startsWithDigit(MangledName)) {
904     uint64_t Ret = MangledName[0] - '0' + 1;
905     MangledName = MangledName.dropFront(1);
906     return {Ret, IsNegative};
907   }
908 
909   uint64_t Ret = 0;
910   for (size_t i = 0; i < MangledName.size(); ++i) {
911     char C = MangledName[i];
912     if (C == '@') {
913       MangledName = MangledName.dropFront(i + 1);
914       return {Ret, IsNegative};
915     }
916     if ('A' <= C && C <= 'P') {
917       Ret = (Ret << 4) + (C - 'A');
918       continue;
919     }
920     break;
921   }
922 
923   Error = true;
924   return {0ULL, false};
925 }
926 
927 uint64_t Demangler::demangleUnsigned(StringView &MangledName) {
928   bool IsNegative = false;
929   uint64_t Number = 0;
930   std::tie(Number, IsNegative) = demangleNumber(MangledName);
931   if (IsNegative)
932     Error = true;
933   return Number;
934 }
935 
936 int64_t Demangler::demangleSigned(StringView &MangledName) {
937   bool IsNegative = false;
938   uint64_t Number = 0;
939   std::tie(Number, IsNegative) = demangleNumber(MangledName);
940   if (Number > INT64_MAX)
941     Error = true;
942   int64_t I = static_cast<int64_t>(Number);
943   return IsNegative ? -I : I;
944 }
945 
946 // First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9.
947 // Memorize it.
948 void Demangler::memorizeString(StringView S) {
949   if (Backrefs.NamesCount >= BackrefContext::Max)
950     return;
951   for (size_t i = 0; i < Backrefs.NamesCount; ++i)
952     if (S == Backrefs.Names[i]->Name)
953       return;
954   NamedIdentifierNode *N = Arena.alloc<NamedIdentifierNode>();
955   N->Name = S;
956   Backrefs.Names[Backrefs.NamesCount++] = N;
957 }
958 
959 NamedIdentifierNode *Demangler::demangleBackRefName(StringView &MangledName) {
960   assert(startsWithDigit(MangledName));
961 
962   size_t I = MangledName[0] - '0';
963   if (I >= Backrefs.NamesCount) {
964     Error = true;
965     return nullptr;
966   }
967 
968   MangledName = MangledName.dropFront();
969   return Backrefs.Names[I];
970 }
971 
972 void Demangler::memorizeIdentifier(IdentifierNode *Identifier) {
973   // Render this class template name into a string buffer so that we can
974   // memorize it for the purpose of back-referencing.
975   OutputBuffer OB;
976   Identifier->output(OB, OF_Default);
977   StringView Owned = copyString(OB);
978   memorizeString(Owned);
979   std::free(OB.getBuffer());
980 }
981 
982 IdentifierNode *
983 Demangler::demangleTemplateInstantiationName(StringView &MangledName,
984                                              NameBackrefBehavior NBB) {
985   assert(MangledName.startsWith("?$"));
986   MangledName.consumeFront("?$");
987 
988   BackrefContext OuterContext;
989   std::swap(OuterContext, Backrefs);
990 
991   IdentifierNode *Identifier =
992       demangleUnqualifiedSymbolName(MangledName, NBB_Simple);
993   if (!Error)
994     Identifier->TemplateParams = demangleTemplateParameterList(MangledName);
995 
996   std::swap(OuterContext, Backrefs);
997   if (Error)
998     return nullptr;
999 
1000   if (NBB & NBB_Template) {
1001     // NBB_Template is only set for types and non-leaf names ("a::" in "a::b").
1002     // Structors and conversion operators only makes sense in a leaf name, so
1003     // reject them in NBB_Template contexts.
1004     if (Identifier->kind() == NodeKind::ConversionOperatorIdentifier ||
1005         Identifier->kind() == NodeKind::StructorIdentifier) {
1006       Error = true;
1007       return nullptr;
1008     }
1009 
1010     memorizeIdentifier(Identifier);
1011   }
1012 
1013   return Identifier;
1014 }
1015 
1016 NamedIdentifierNode *Demangler::demangleSimpleName(StringView &MangledName,
1017                                                    bool Memorize) {
1018   StringView S = demangleSimpleString(MangledName, Memorize);
1019   if (Error)
1020     return nullptr;
1021 
1022   NamedIdentifierNode *Name = Arena.alloc<NamedIdentifierNode>();
1023   Name->Name = S;
1024   return Name;
1025 }
1026 
1027 static bool isRebasedHexDigit(char C) { return (C >= 'A' && C <= 'P'); }
1028 
1029 static uint8_t rebasedHexDigitToNumber(char C) {
1030   assert(isRebasedHexDigit(C));
1031   return (C <= 'J') ? (C - 'A') : (10 + C - 'K');
1032 }
1033 
1034 uint8_t Demangler::demangleCharLiteral(StringView &MangledName) {
1035   assert(!MangledName.empty());
1036   if (!MangledName.startsWith('?'))
1037     return MangledName.popFront();
1038 
1039   MangledName = MangledName.dropFront();
1040   if (MangledName.empty())
1041     goto CharLiteralError;
1042 
1043   if (MangledName.consumeFront('$')) {
1044     // Two hex digits
1045     if (MangledName.size() < 2)
1046       goto CharLiteralError;
1047     StringView Nibbles = MangledName.substr(0, 2);
1048     if (!isRebasedHexDigit(Nibbles[0]) || !isRebasedHexDigit(Nibbles[1]))
1049       goto CharLiteralError;
1050     // Don't append the null terminator.
1051     uint8_t C1 = rebasedHexDigitToNumber(Nibbles[0]);
1052     uint8_t C2 = rebasedHexDigitToNumber(Nibbles[1]);
1053     MangledName = MangledName.dropFront(2);
1054     return (C1 << 4) | C2;
1055   }
1056 
1057   if (startsWithDigit(MangledName)) {
1058     const char *Lookup = ",/\\:. \n\t'-";
1059     char C = Lookup[MangledName[0] - '0'];
1060     MangledName = MangledName.dropFront();
1061     return C;
1062   }
1063 
1064   if (MangledName[0] >= 'a' && MangledName[0] <= 'z') {
1065     char Lookup[26] = {'\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7',
1066                        '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE',
1067                        '\xEF', '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5',
1068                        '\xF6', '\xF7', '\xF8', '\xF9', '\xFA'};
1069     char C = Lookup[MangledName[0] - 'a'];
1070     MangledName = MangledName.dropFront();
1071     return C;
1072   }
1073 
1074   if (MangledName[0] >= 'A' && MangledName[0] <= 'Z') {
1075     char Lookup[26] = {'\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7',
1076                        '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE',
1077                        '\xCF', '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5',
1078                        '\xD6', '\xD7', '\xD8', '\xD9', '\xDA'};
1079     char C = Lookup[MangledName[0] - 'A'];
1080     MangledName = MangledName.dropFront();
1081     return C;
1082   }
1083 
1084 CharLiteralError:
1085   Error = true;
1086   return '\0';
1087 }
1088 
1089 wchar_t Demangler::demangleWcharLiteral(StringView &MangledName) {
1090   uint8_t C1, C2;
1091 
1092   C1 = demangleCharLiteral(MangledName);
1093   if (Error || MangledName.empty())
1094     goto WCharLiteralError;
1095   C2 = demangleCharLiteral(MangledName);
1096   if (Error)
1097     goto WCharLiteralError;
1098 
1099   return ((wchar_t)C1 << 8) | (wchar_t)C2;
1100 
1101 WCharLiteralError:
1102   Error = true;
1103   return L'\0';
1104 }
1105 
1106 static void writeHexDigit(char *Buffer, uint8_t Digit) {
1107   assert(Digit <= 15);
1108   *Buffer = (Digit < 10) ? ('0' + Digit) : ('A' + Digit - 10);
1109 }
1110 
1111 static void outputHex(OutputBuffer &OB, unsigned C) {
1112   assert (C != 0);
1113 
1114   // It's easier to do the math if we can work from right to left, but we need
1115   // to print the numbers from left to right.  So render this into a temporary
1116   // buffer first, then output the temporary buffer.  Each byte is of the form
1117   // \xAB, which means that each byte needs 4 characters.  Since there are at
1118   // most 4 bytes, we need a 4*4+1 = 17 character temporary buffer.
1119   char TempBuffer[17];
1120 
1121   ::memset(TempBuffer, 0, sizeof(TempBuffer));
1122   constexpr int MaxPos = sizeof(TempBuffer) - 1;
1123 
1124   int Pos = MaxPos - 1; // TempBuffer[MaxPos] is the terminating \0.
1125   while (C != 0) {
1126     for (int I = 0; I < 2; ++I) {
1127       writeHexDigit(&TempBuffer[Pos--], C % 16);
1128       C /= 16;
1129     }
1130   }
1131   TempBuffer[Pos--] = 'x';
1132   assert(Pos >= 0);
1133   TempBuffer[Pos--] = '\\';
1134   OB << StringView(&TempBuffer[Pos + 1]);
1135 }
1136 
1137 static void outputEscapedChar(OutputBuffer &OB, unsigned C) {
1138   switch (C) {
1139   case '\0': // nul
1140     OB << "\\0";
1141     return;
1142   case '\'': // single quote
1143     OB << "\\\'";
1144     return;
1145   case '\"': // double quote
1146     OB << "\\\"";
1147     return;
1148   case '\\': // backslash
1149     OB << "\\\\";
1150     return;
1151   case '\a': // bell
1152     OB << "\\a";
1153     return;
1154   case '\b': // backspace
1155     OB << "\\b";
1156     return;
1157   case '\f': // form feed
1158     OB << "\\f";
1159     return;
1160   case '\n': // new line
1161     OB << "\\n";
1162     return;
1163   case '\r': // carriage return
1164     OB << "\\r";
1165     return;
1166   case '\t': // tab
1167     OB << "\\t";
1168     return;
1169   case '\v': // vertical tab
1170     OB << "\\v";
1171     return;
1172   default:
1173     break;
1174   }
1175 
1176   if (C > 0x1F && C < 0x7F) {
1177     // Standard ascii char.
1178     OB << (char)C;
1179     return;
1180   }
1181 
1182   outputHex(OB, C);
1183 }
1184 
1185 static unsigned countTrailingNullBytes(const uint8_t *StringBytes, int Length) {
1186   const uint8_t *End = StringBytes + Length - 1;
1187   unsigned Count = 0;
1188   while (Length > 0 && *End == 0) {
1189     --Length;
1190     --End;
1191     ++Count;
1192   }
1193   return Count;
1194 }
1195 
1196 static unsigned countEmbeddedNulls(const uint8_t *StringBytes,
1197                                    unsigned Length) {
1198   unsigned Result = 0;
1199   for (unsigned I = 0; I < Length; ++I) {
1200     if (*StringBytes++ == 0)
1201       ++Result;
1202   }
1203   return Result;
1204 }
1205 
1206 // A mangled (non-wide) string literal stores the total length of the string it
1207 // refers to (passed in NumBytes), and it contains up to 32 bytes of actual text
1208 // (passed in StringBytes, NumChars).
1209 static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars,
1210                                   uint64_t NumBytes) {
1211   assert(NumBytes > 0);
1212 
1213   // If the number of bytes is odd, this is guaranteed to be a char string.
1214   if (NumBytes % 2 == 1)
1215     return 1;
1216 
1217   // All strings can encode at most 32 bytes of data.  If it's less than that,
1218   // then we encoded the entire string.  In this case we check for a 1-byte,
1219   // 2-byte, or 4-byte null terminator.
1220   if (NumBytes < 32) {
1221     unsigned TrailingNulls = countTrailingNullBytes(StringBytes, NumChars);
1222     if (TrailingNulls >= 4 && NumBytes % 4 == 0)
1223       return 4;
1224     if (TrailingNulls >= 2)
1225       return 2;
1226     return 1;
1227   }
1228 
1229   // The whole string was not able to be encoded.  Try to look at embedded null
1230   // terminators to guess.  The heuristic is that we count all embedded null
1231   // terminators.  If more than 2/3 are null, it's a char32.  If more than 1/3
1232   // are null, it's a char16.  Otherwise it's a char8.  This obviously isn't
1233   // perfect and is biased towards languages that have ascii alphabets, but this
1234   // was always going to be best effort since the encoding is lossy.
1235   unsigned Nulls = countEmbeddedNulls(StringBytes, NumChars);
1236   if (Nulls >= 2 * NumChars / 3 && NumBytes % 4 == 0)
1237     return 4;
1238   if (Nulls >= NumChars / 3)
1239     return 2;
1240   return 1;
1241 }
1242 
1243 static unsigned decodeMultiByteChar(const uint8_t *StringBytes,
1244                                     unsigned CharIndex, unsigned CharBytes) {
1245   assert(CharBytes == 1 || CharBytes == 2 || CharBytes == 4);
1246   unsigned Offset = CharIndex * CharBytes;
1247   unsigned Result = 0;
1248   StringBytes = StringBytes + Offset;
1249   for (unsigned I = 0; I < CharBytes; ++I) {
1250     unsigned C = static_cast<unsigned>(StringBytes[I]);
1251     Result |= C << (8 * I);
1252   }
1253   return Result;
1254 }
1255 
1256 FunctionSymbolNode *Demangler::demangleVcallThunkNode(StringView &MangledName) {
1257   FunctionSymbolNode *FSN = Arena.alloc<FunctionSymbolNode>();
1258   VcallThunkIdentifierNode *VTIN = Arena.alloc<VcallThunkIdentifierNode>();
1259   FSN->Signature = Arena.alloc<ThunkSignatureNode>();
1260   FSN->Signature->FunctionClass = FC_NoParameterList;
1261 
1262   FSN->Name = demangleNameScopeChain(MangledName, VTIN);
1263   if (!Error)
1264     Error = !MangledName.consumeFront("$B");
1265   if (!Error)
1266     VTIN->OffsetInVTable = demangleUnsigned(MangledName);
1267   if (!Error)
1268     Error = !MangledName.consumeFront('A');
1269   if (!Error)
1270     FSN->Signature->CallConvention = demangleCallingConvention(MangledName);
1271   return (Error) ? nullptr : FSN;
1272 }
1273 
1274 EncodedStringLiteralNode *
1275 Demangler::demangleStringLiteral(StringView &MangledName) {
1276   // This function uses goto, so declare all variables up front.
1277   OutputBuffer OB;
1278   StringView CRC;
1279   uint64_t StringByteSize;
1280   bool IsWcharT = false;
1281   bool IsNegative = false;
1282   size_t CrcEndPos = 0;
1283 
1284   EncodedStringLiteralNode *Result = Arena.alloc<EncodedStringLiteralNode>();
1285 
1286   // Prefix indicating the beginning of a string literal
1287   if (!MangledName.consumeFront("@_"))
1288     goto StringLiteralError;
1289   if (MangledName.empty())
1290     goto StringLiteralError;
1291 
1292   // Char Type (regular or wchar_t)
1293   switch (MangledName.popFront()) {
1294   case '1':
1295     IsWcharT = true;
1296     DEMANGLE_FALLTHROUGH;
1297   case '0':
1298     break;
1299   default:
1300     goto StringLiteralError;
1301   }
1302 
1303   // Encoded Length
1304   std::tie(StringByteSize, IsNegative) = demangleNumber(MangledName);
1305   if (Error || IsNegative || StringByteSize < (IsWcharT ? 2 : 1))
1306     goto StringLiteralError;
1307 
1308   // CRC 32 (always 8 characters plus a terminator)
1309   CrcEndPos = MangledName.find('@');
1310   if (CrcEndPos == StringView::npos)
1311     goto StringLiteralError;
1312   CRC = MangledName.substr(0, CrcEndPos);
1313   MangledName = MangledName.dropFront(CrcEndPos + 1);
1314   if (MangledName.empty())
1315     goto StringLiteralError;
1316 
1317   if (IsWcharT) {
1318     Result->Char = CharKind::Wchar;
1319     if (StringByteSize > 64)
1320       Result->IsTruncated = true;
1321 
1322     while (!MangledName.consumeFront('@')) {
1323       if (MangledName.size() < 2)
1324         goto StringLiteralError;
1325       wchar_t W = demangleWcharLiteral(MangledName);
1326       if (StringByteSize != 2 || Result->IsTruncated)
1327         outputEscapedChar(OB, W);
1328       StringByteSize -= 2;
1329       if (Error)
1330         goto StringLiteralError;
1331     }
1332   } else {
1333     // The max byte length is actually 32, but some compilers mangled strings
1334     // incorrectly, so we have to assume it can go higher.
1335     constexpr unsigned MaxStringByteLength = 32 * 4;
1336     uint8_t StringBytes[MaxStringByteLength];
1337 
1338     unsigned BytesDecoded = 0;
1339     while (!MangledName.consumeFront('@')) {
1340       if (MangledName.size() < 1 || BytesDecoded >= MaxStringByteLength)
1341         goto StringLiteralError;
1342       StringBytes[BytesDecoded++] = demangleCharLiteral(MangledName);
1343     }
1344 
1345     if (StringByteSize > BytesDecoded)
1346       Result->IsTruncated = true;
1347 
1348     unsigned CharBytes =
1349         guessCharByteSize(StringBytes, BytesDecoded, StringByteSize);
1350     assert(StringByteSize % CharBytes == 0);
1351     switch (CharBytes) {
1352     case 1:
1353       Result->Char = CharKind::Char;
1354       break;
1355     case 2:
1356       Result->Char = CharKind::Char16;
1357       break;
1358     case 4:
1359       Result->Char = CharKind::Char32;
1360       break;
1361     default:
1362       DEMANGLE_UNREACHABLE;
1363     }
1364     const unsigned NumChars = BytesDecoded / CharBytes;
1365     for (unsigned CharIndex = 0; CharIndex < NumChars; ++CharIndex) {
1366       unsigned NextChar =
1367           decodeMultiByteChar(StringBytes, CharIndex, CharBytes);
1368       if (CharIndex + 1 < NumChars || Result->IsTruncated)
1369         outputEscapedChar(OB, NextChar);
1370     }
1371   }
1372 
1373   Result->DecodedString = copyString(OB);
1374   std::free(OB.getBuffer());
1375   return Result;
1376 
1377 StringLiteralError:
1378   Error = true;
1379   std::free(OB.getBuffer());
1380   return nullptr;
1381 }
1382 
1383 // Returns MangledName's prefix before the first '@', or an error if
1384 // MangledName contains no '@' or the prefix has length 0.
1385 StringView Demangler::demangleSimpleString(StringView &MangledName,
1386                                            bool Memorize) {
1387   StringView S;
1388   for (size_t i = 0; i < MangledName.size(); ++i) {
1389     if (MangledName[i] != '@')
1390       continue;
1391     if (i == 0)
1392       break;
1393     S = MangledName.substr(0, i);
1394     MangledName = MangledName.dropFront(i + 1);
1395 
1396     if (Memorize)
1397       memorizeString(S);
1398     return S;
1399   }
1400 
1401   Error = true;
1402   return {};
1403 }
1404 
1405 NamedIdentifierNode *
1406 Demangler::demangleAnonymousNamespaceName(StringView &MangledName) {
1407   assert(MangledName.startsWith("?A"));
1408   MangledName.consumeFront("?A");
1409 
1410   NamedIdentifierNode *Node = Arena.alloc<NamedIdentifierNode>();
1411   Node->Name = "`anonymous namespace'";
1412   size_t EndPos = MangledName.find('@');
1413   if (EndPos == StringView::npos) {
1414     Error = true;
1415     return nullptr;
1416   }
1417   StringView NamespaceKey = MangledName.substr(0, EndPos);
1418   memorizeString(NamespaceKey);
1419   MangledName = MangledName.substr(EndPos + 1);
1420   return Node;
1421 }
1422 
1423 NamedIdentifierNode *
1424 Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) {
1425   assert(startsWithLocalScopePattern(MangledName));
1426 
1427   NamedIdentifierNode *Identifier = Arena.alloc<NamedIdentifierNode>();
1428   MangledName.consumeFront('?');
1429   uint64_t Number = 0;
1430   bool IsNegative = false;
1431   std::tie(Number, IsNegative) = demangleNumber(MangledName);
1432   assert(!IsNegative);
1433 
1434   // One ? to terminate the number
1435   MangledName.consumeFront('?');
1436 
1437   assert(!Error);
1438   Node *Scope = parse(MangledName);
1439   if (Error)
1440     return nullptr;
1441 
1442   // Render the parent symbol's name into a buffer.
1443   OutputBuffer OB;
1444   OB << '`';
1445   Scope->output(OB, OF_Default);
1446   OB << '\'';
1447   OB << "::`" << Number << "'";
1448 
1449   Identifier->Name = copyString(OB);
1450   std::free(OB.getBuffer());
1451   return Identifier;
1452 }
1453 
1454 // Parses a type name in the form of A@B@C@@ which represents C::B::A.
1455 QualifiedNameNode *
1456 Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) {
1457   IdentifierNode *Identifier =
1458       demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true);
1459   if (Error)
1460     return nullptr;
1461   assert(Identifier);
1462 
1463   QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier);
1464   if (Error)
1465     return nullptr;
1466   assert(QN);
1467   return QN;
1468 }
1469 
1470 // Parses a symbol name in the form of A@B@C@@ which represents C::B::A.
1471 // Symbol names have slightly different rules regarding what can appear
1472 // so we separate out the implementations for flexibility.
1473 QualifiedNameNode *
1474 Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) {
1475   // This is the final component of a symbol name (i.e. the leftmost component
1476   // of a mangled name.  Since the only possible template instantiation that
1477   // can appear in this context is a function template, and since those are
1478   // not saved for the purposes of name backreferences, only backref simple
1479   // names.
1480   IdentifierNode *Identifier =
1481       demangleUnqualifiedSymbolName(MangledName, NBB_Simple);
1482   if (Error)
1483     return nullptr;
1484 
1485   QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier);
1486   if (Error)
1487     return nullptr;
1488 
1489   if (Identifier->kind() == NodeKind::StructorIdentifier) {
1490     if (QN->Components->Count < 2) {
1491       Error = true;
1492       return nullptr;
1493     }
1494     StructorIdentifierNode *SIN =
1495         static_cast<StructorIdentifierNode *>(Identifier);
1496     Node *ClassNode = QN->Components->Nodes[QN->Components->Count - 2];
1497     SIN->Class = static_cast<IdentifierNode *>(ClassNode);
1498   }
1499   assert(QN);
1500   return QN;
1501 }
1502 
1503 IdentifierNode *Demangler::demangleUnqualifiedTypeName(StringView &MangledName,
1504                                                        bool Memorize) {
1505   // An inner-most name can be a back-reference, because a fully-qualified name
1506   // (e.g. Scope + Inner) can contain other fully qualified names inside of
1507   // them (for example template parameters), and these nested parameters can
1508   // refer to previously mangled types.
1509   if (startsWithDigit(MangledName))
1510     return demangleBackRefName(MangledName);
1511 
1512   if (MangledName.startsWith("?$"))
1513     return demangleTemplateInstantiationName(MangledName, NBB_Template);
1514 
1515   return demangleSimpleName(MangledName, Memorize);
1516 }
1517 
1518 IdentifierNode *
1519 Demangler::demangleUnqualifiedSymbolName(StringView &MangledName,
1520                                          NameBackrefBehavior NBB) {
1521   if (startsWithDigit(MangledName))
1522     return demangleBackRefName(MangledName);
1523   if (MangledName.startsWith("?$"))
1524     return demangleTemplateInstantiationName(MangledName, NBB);
1525   if (MangledName.startsWith('?'))
1526     return demangleFunctionIdentifierCode(MangledName);
1527   return demangleSimpleName(MangledName, /*Memorize=*/(NBB & NBB_Simple) != 0);
1528 }
1529 
1530 IdentifierNode *Demangler::demangleNameScopePiece(StringView &MangledName) {
1531   if (startsWithDigit(MangledName))
1532     return demangleBackRefName(MangledName);
1533 
1534   if (MangledName.startsWith("?$"))
1535     return demangleTemplateInstantiationName(MangledName, NBB_Template);
1536 
1537   if (MangledName.startsWith("?A"))
1538     return demangleAnonymousNamespaceName(MangledName);
1539 
1540   if (startsWithLocalScopePattern(MangledName))
1541     return demangleLocallyScopedNamePiece(MangledName);
1542 
1543   return demangleSimpleName(MangledName, /*Memorize=*/true);
1544 }
1545 
1546 static NodeArrayNode *nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head,
1547                                           size_t Count) {
1548   NodeArrayNode *N = Arena.alloc<NodeArrayNode>();
1549   N->Count = Count;
1550   N->Nodes = Arena.allocArray<Node *>(Count);
1551   for (size_t I = 0; I < Count; ++I) {
1552     N->Nodes[I] = Head->N;
1553     Head = Head->Next;
1554   }
1555   return N;
1556 }
1557 
1558 QualifiedNameNode *
1559 Demangler::demangleNameScopeChain(StringView &MangledName,
1560                                   IdentifierNode *UnqualifiedName) {
1561   NodeList *Head = Arena.alloc<NodeList>();
1562 
1563   Head->N = UnqualifiedName;
1564 
1565   size_t Count = 1;
1566   while (!MangledName.consumeFront("@")) {
1567     ++Count;
1568     NodeList *NewHead = Arena.alloc<NodeList>();
1569     NewHead->Next = Head;
1570     Head = NewHead;
1571 
1572     if (MangledName.empty()) {
1573       Error = true;
1574       return nullptr;
1575     }
1576 
1577     assert(!Error);
1578     IdentifierNode *Elem = demangleNameScopePiece(MangledName);
1579     if (Error)
1580       return nullptr;
1581 
1582     Head->N = Elem;
1583   }
1584 
1585   QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>();
1586   QN->Components = nodeListToNodeArray(Arena, Head, Count);
1587   return QN;
1588 }
1589 
1590 FuncClass Demangler::demangleFunctionClass(StringView &MangledName) {
1591   switch (MangledName.popFront()) {
1592   case '9':
1593     return FuncClass(FC_ExternC | FC_NoParameterList);
1594   case 'A':
1595     return FC_Private;
1596   case 'B':
1597     return FuncClass(FC_Private | FC_Far);
1598   case 'C':
1599     return FuncClass(FC_Private | FC_Static);
1600   case 'D':
1601     return FuncClass(FC_Private | FC_Static | FC_Far);
1602   case 'E':
1603     return FuncClass(FC_Private | FC_Virtual);
1604   case 'F':
1605     return FuncClass(FC_Private | FC_Virtual | FC_Far);
1606   case 'G':
1607     return FuncClass(FC_Private | FC_StaticThisAdjust);
1608   case 'H':
1609     return FuncClass(FC_Private | FC_StaticThisAdjust | FC_Far);
1610   case 'I':
1611     return FuncClass(FC_Protected);
1612   case 'J':
1613     return FuncClass(FC_Protected | FC_Far);
1614   case 'K':
1615     return FuncClass(FC_Protected | FC_Static);
1616   case 'L':
1617     return FuncClass(FC_Protected | FC_Static | FC_Far);
1618   case 'M':
1619     return FuncClass(FC_Protected | FC_Virtual);
1620   case 'N':
1621     return FuncClass(FC_Protected | FC_Virtual | FC_Far);
1622   case 'O':
1623     return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust);
1624   case 'P':
1625     return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust | FC_Far);
1626   case 'Q':
1627     return FuncClass(FC_Public);
1628   case 'R':
1629     return FuncClass(FC_Public | FC_Far);
1630   case 'S':
1631     return FuncClass(FC_Public | FC_Static);
1632   case 'T':
1633     return FuncClass(FC_Public | FC_Static | FC_Far);
1634   case 'U':
1635     return FuncClass(FC_Public | FC_Virtual);
1636   case 'V':
1637     return FuncClass(FC_Public | FC_Virtual | FC_Far);
1638   case 'W':
1639     return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust);
1640   case 'X':
1641     return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust | FC_Far);
1642   case 'Y':
1643     return FuncClass(FC_Global);
1644   case 'Z':
1645     return FuncClass(FC_Global | FC_Far);
1646   case '$': {
1647     FuncClass VFlag = FC_VirtualThisAdjust;
1648     if (MangledName.consumeFront('R'))
1649       VFlag = FuncClass(VFlag | FC_VirtualThisAdjustEx);
1650     if (MangledName.empty())
1651       break;
1652     switch (MangledName.popFront()) {
1653     case '0':
1654       return FuncClass(FC_Private | FC_Virtual | VFlag);
1655     case '1':
1656       return FuncClass(FC_Private | FC_Virtual | VFlag | FC_Far);
1657     case '2':
1658       return FuncClass(FC_Protected | FC_Virtual | VFlag);
1659     case '3':
1660       return FuncClass(FC_Protected | FC_Virtual | VFlag | FC_Far);
1661     case '4':
1662       return FuncClass(FC_Public | FC_Virtual | VFlag);
1663     case '5':
1664       return FuncClass(FC_Public | FC_Virtual | VFlag | FC_Far);
1665     }
1666   }
1667   }
1668 
1669   Error = true;
1670   return FC_Public;
1671 }
1672 
1673 CallingConv Demangler::demangleCallingConvention(StringView &MangledName) {
1674   if (MangledName.empty()) {
1675     Error = true;
1676     return CallingConv::None;
1677   }
1678 
1679   switch (MangledName.popFront()) {
1680   case 'A':
1681   case 'B':
1682     return CallingConv::Cdecl;
1683   case 'C':
1684   case 'D':
1685     return CallingConv::Pascal;
1686   case 'E':
1687   case 'F':
1688     return CallingConv::Thiscall;
1689   case 'G':
1690   case 'H':
1691     return CallingConv::Stdcall;
1692   case 'I':
1693   case 'J':
1694     return CallingConv::Fastcall;
1695   case 'M':
1696   case 'N':
1697     return CallingConv::Clrcall;
1698   case 'O':
1699   case 'P':
1700     return CallingConv::Eabi;
1701   case 'Q':
1702     return CallingConv::Vectorcall;
1703   case 'S':
1704     return CallingConv::Swift;
1705   case 'W':
1706     return CallingConv::SwiftAsync;
1707   }
1708 
1709   return CallingConv::None;
1710 }
1711 
1712 StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) {
1713   assert(MangledName.front() >= '0' && MangledName.front() <= '4');
1714 
1715   switch (MangledName.popFront()) {
1716   case '0':
1717     return StorageClass::PrivateStatic;
1718   case '1':
1719     return StorageClass::ProtectedStatic;
1720   case '2':
1721     return StorageClass::PublicStatic;
1722   case '3':
1723     return StorageClass::Global;
1724   case '4':
1725     return StorageClass::FunctionLocalStatic;
1726   }
1727   DEMANGLE_UNREACHABLE;
1728 }
1729 
1730 std::pair<Qualifiers, bool>
1731 Demangler::demangleQualifiers(StringView &MangledName) {
1732   if (MangledName.empty()) {
1733     Error = true;
1734     return std::make_pair(Q_None, false);
1735   }
1736 
1737   switch (MangledName.popFront()) {
1738   // Member qualifiers
1739   case 'Q':
1740     return std::make_pair(Q_None, true);
1741   case 'R':
1742     return std::make_pair(Q_Const, true);
1743   case 'S':
1744     return std::make_pair(Q_Volatile, true);
1745   case 'T':
1746     return std::make_pair(Qualifiers(Q_Const | Q_Volatile), true);
1747   // Non-Member qualifiers
1748   case 'A':
1749     return std::make_pair(Q_None, false);
1750   case 'B':
1751     return std::make_pair(Q_Const, false);
1752   case 'C':
1753     return std::make_pair(Q_Volatile, false);
1754   case 'D':
1755     return std::make_pair(Qualifiers(Q_Const | Q_Volatile), false);
1756   }
1757   Error = true;
1758   return std::make_pair(Q_None, false);
1759 }
1760 
1761 // <variable-type> ::= <type> <cvr-qualifiers>
1762 //                 ::= <type> <pointee-cvr-qualifiers> # pointers, references
1763 TypeNode *Demangler::demangleType(StringView &MangledName,
1764                                   QualifierMangleMode QMM) {
1765   Qualifiers Quals = Q_None;
1766   bool IsMember = false;
1767   if (QMM == QualifierMangleMode::Mangle) {
1768     std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
1769   } else if (QMM == QualifierMangleMode::Result) {
1770     if (MangledName.consumeFront('?'))
1771       std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
1772   }
1773 
1774   if (MangledName.empty()) {
1775     Error = true;
1776     return nullptr;
1777   }
1778 
1779   TypeNode *Ty = nullptr;
1780   if (isTagType(MangledName))
1781     Ty = demangleClassType(MangledName);
1782   else if (isPointerType(MangledName)) {
1783     if (isMemberPointer(MangledName, Error))
1784       Ty = demangleMemberPointerType(MangledName);
1785     else if (!Error)
1786       Ty = demanglePointerType(MangledName);
1787     else
1788       return nullptr;
1789   } else if (isArrayType(MangledName))
1790     Ty = demangleArrayType(MangledName);
1791   else if (isFunctionType(MangledName)) {
1792     if (MangledName.consumeFront("$$A8@@"))
1793       Ty = demangleFunctionType(MangledName, true);
1794     else {
1795       assert(MangledName.startsWith("$$A6"));
1796       MangledName.consumeFront("$$A6");
1797       Ty = demangleFunctionType(MangledName, false);
1798     }
1799   } else if (isCustomType(MangledName)) {
1800     Ty = demangleCustomType(MangledName);
1801   } else {
1802     Ty = demanglePrimitiveType(MangledName);
1803   }
1804 
1805   if (!Ty || Error)
1806     return Ty;
1807   Ty->Quals = Qualifiers(Ty->Quals | Quals);
1808   return Ty;
1809 }
1810 
1811 bool Demangler::demangleThrowSpecification(StringView &MangledName) {
1812   if (MangledName.consumeFront("_E"))
1813     return true;
1814   if (MangledName.consumeFront('Z'))
1815     return false;
1816 
1817   Error = true;
1818   return false;
1819 }
1820 
1821 FunctionSignatureNode *Demangler::demangleFunctionType(StringView &MangledName,
1822                                                        bool HasThisQuals) {
1823   FunctionSignatureNode *FTy = Arena.alloc<FunctionSignatureNode>();
1824 
1825   if (HasThisQuals) {
1826     FTy->Quals = demanglePointerExtQualifiers(MangledName);
1827     FTy->RefQualifier = demangleFunctionRefQualifier(MangledName);
1828     FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first);
1829   }
1830 
1831   // Fields that appear on both member and non-member functions.
1832   FTy->CallConvention = demangleCallingConvention(MangledName);
1833 
1834   // <return-type> ::= <type>
1835   //               ::= @ # structors (they have no declared return type)
1836   bool IsStructor = MangledName.consumeFront('@');
1837   if (!IsStructor)
1838     FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result);
1839 
1840   FTy->Params = demangleFunctionParameterList(MangledName, FTy->IsVariadic);
1841 
1842   FTy->IsNoexcept = demangleThrowSpecification(MangledName);
1843 
1844   return FTy;
1845 }
1846 
1847 FunctionSymbolNode *
1848 Demangler::demangleFunctionEncoding(StringView &MangledName) {
1849   FuncClass ExtraFlags = FC_None;
1850   if (MangledName.consumeFront("$$J0"))
1851     ExtraFlags = FC_ExternC;
1852 
1853   if (MangledName.empty()) {
1854     Error = true;
1855     return nullptr;
1856   }
1857 
1858   FuncClass FC = demangleFunctionClass(MangledName);
1859   FC = FuncClass(ExtraFlags | FC);
1860 
1861   FunctionSignatureNode *FSN = nullptr;
1862   ThunkSignatureNode *TTN = nullptr;
1863   if (FC & FC_StaticThisAdjust) {
1864     TTN = Arena.alloc<ThunkSignatureNode>();
1865     TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName);
1866   } else if (FC & FC_VirtualThisAdjust) {
1867     TTN = Arena.alloc<ThunkSignatureNode>();
1868     if (FC & FC_VirtualThisAdjustEx) {
1869       TTN->ThisAdjust.VBPtrOffset = demangleSigned(MangledName);
1870       TTN->ThisAdjust.VBOffsetOffset = demangleSigned(MangledName);
1871     }
1872     TTN->ThisAdjust.VtordispOffset = demangleSigned(MangledName);
1873     TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName);
1874   }
1875 
1876   if (FC & FC_NoParameterList) {
1877     // This is an extern "C" function whose full signature hasn't been mangled.
1878     // This happens when we need to mangle a local symbol inside of an extern
1879     // "C" function.
1880     FSN = Arena.alloc<FunctionSignatureNode>();
1881   } else {
1882     bool HasThisQuals = !(FC & (FC_Global | FC_Static));
1883     FSN = demangleFunctionType(MangledName, HasThisQuals);
1884   }
1885 
1886   if (Error)
1887     return nullptr;
1888 
1889   if (TTN) {
1890     *static_cast<FunctionSignatureNode *>(TTN) = *FSN;
1891     FSN = TTN;
1892   }
1893   FSN->FunctionClass = FC;
1894 
1895   FunctionSymbolNode *Symbol = Arena.alloc<FunctionSymbolNode>();
1896   Symbol->Signature = FSN;
1897   return Symbol;
1898 }
1899 
1900 CustomTypeNode *Demangler::demangleCustomType(StringView &MangledName) {
1901   assert(MangledName.startsWith('?'));
1902   MangledName.popFront();
1903 
1904   CustomTypeNode *CTN = Arena.alloc<CustomTypeNode>();
1905   CTN->Identifier = demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true);
1906   if (!MangledName.consumeFront('@'))
1907     Error = true;
1908   if (Error)
1909     return nullptr;
1910   return CTN;
1911 }
1912 
1913 // Reads a primitive type.
1914 PrimitiveTypeNode *Demangler::demanglePrimitiveType(StringView &MangledName) {
1915   if (MangledName.consumeFront("$$T"))
1916     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Nullptr);
1917 
1918   switch (MangledName.popFront()) {
1919   case 'X':
1920     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Void);
1921   case 'D':
1922     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char);
1923   case 'C':
1924     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Schar);
1925   case 'E':
1926     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uchar);
1927   case 'F':
1928     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Short);
1929   case 'G':
1930     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ushort);
1931   case 'H':
1932     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int);
1933   case 'I':
1934     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint);
1935   case 'J':
1936     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Long);
1937   case 'K':
1938     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ulong);
1939   case 'M':
1940     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Float);
1941   case 'N':
1942     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Double);
1943   case 'O':
1944     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ldouble);
1945   case '_': {
1946     if (MangledName.empty()) {
1947       Error = true;
1948       return nullptr;
1949     }
1950     switch (MangledName.popFront()) {
1951     case 'N':
1952       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Bool);
1953     case 'J':
1954       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int64);
1955     case 'K':
1956       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint64);
1957     case 'W':
1958       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Wchar);
1959     case 'Q':
1960       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char8);
1961     case 'S':
1962       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char16);
1963     case 'U':
1964       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char32);
1965     }
1966     break;
1967   }
1968   }
1969   Error = true;
1970   return nullptr;
1971 }
1972 
1973 TagTypeNode *Demangler::demangleClassType(StringView &MangledName) {
1974   TagTypeNode *TT = nullptr;
1975 
1976   switch (MangledName.popFront()) {
1977   case 'T':
1978     TT = Arena.alloc<TagTypeNode>(TagKind::Union);
1979     break;
1980   case 'U':
1981     TT = Arena.alloc<TagTypeNode>(TagKind::Struct);
1982     break;
1983   case 'V':
1984     TT = Arena.alloc<TagTypeNode>(TagKind::Class);
1985     break;
1986   case 'W':
1987     if (!MangledName.consumeFront('4')) {
1988       Error = true;
1989       return nullptr;
1990     }
1991     TT = Arena.alloc<TagTypeNode>(TagKind::Enum);
1992     break;
1993   default:
1994     assert(false);
1995   }
1996 
1997   TT->QualifiedName = demangleFullyQualifiedTypeName(MangledName);
1998   return TT;
1999 }
2000 
2001 // <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type>
2002 //                       # the E is required for 64-bit non-static pointers
2003 PointerTypeNode *Demangler::demanglePointerType(StringView &MangledName) {
2004   PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>();
2005 
2006   std::tie(Pointer->Quals, Pointer->Affinity) =
2007       demanglePointerCVQualifiers(MangledName);
2008 
2009   if (MangledName.consumeFront("6")) {
2010     Pointer->Pointee = demangleFunctionType(MangledName, false);
2011     return Pointer;
2012   }
2013 
2014   Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
2015   Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
2016 
2017   Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Mangle);
2018   return Pointer;
2019 }
2020 
2021 PointerTypeNode *Demangler::demangleMemberPointerType(StringView &MangledName) {
2022   PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>();
2023 
2024   std::tie(Pointer->Quals, Pointer->Affinity) =
2025       demanglePointerCVQualifiers(MangledName);
2026   assert(Pointer->Affinity == PointerAffinity::Pointer);
2027 
2028   Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
2029   Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
2030 
2031   // isMemberPointer() only returns true if there is at least one character
2032   // after the qualifiers.
2033   if (MangledName.consumeFront("8")) {
2034     Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
2035     Pointer->Pointee = demangleFunctionType(MangledName, true);
2036   } else {
2037     Qualifiers PointeeQuals = Q_None;
2038     bool IsMember = false;
2039     std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName);
2040     assert(IsMember || Error);
2041     Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
2042 
2043     Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop);
2044     if (Pointer->Pointee)
2045       Pointer->Pointee->Quals = PointeeQuals;
2046   }
2047 
2048   return Pointer;
2049 }
2050 
2051 Qualifiers Demangler::demanglePointerExtQualifiers(StringView &MangledName) {
2052   Qualifiers Quals = Q_None;
2053   if (MangledName.consumeFront('E'))
2054     Quals = Qualifiers(Quals | Q_Pointer64);
2055   if (MangledName.consumeFront('I'))
2056     Quals = Qualifiers(Quals | Q_Restrict);
2057   if (MangledName.consumeFront('F'))
2058     Quals = Qualifiers(Quals | Q_Unaligned);
2059 
2060   return Quals;
2061 }
2062 
2063 ArrayTypeNode *Demangler::demangleArrayType(StringView &MangledName) {
2064   assert(MangledName.front() == 'Y');
2065   MangledName.popFront();
2066 
2067   uint64_t Rank = 0;
2068   bool IsNegative = false;
2069   std::tie(Rank, IsNegative) = demangleNumber(MangledName);
2070   if (IsNegative || Rank == 0) {
2071     Error = true;
2072     return nullptr;
2073   }
2074 
2075   ArrayTypeNode *ATy = Arena.alloc<ArrayTypeNode>();
2076   NodeList *Head = Arena.alloc<NodeList>();
2077   NodeList *Tail = Head;
2078 
2079   for (uint64_t I = 0; I < Rank; ++I) {
2080     uint64_t D = 0;
2081     std::tie(D, IsNegative) = demangleNumber(MangledName);
2082     if (Error || IsNegative) {
2083       Error = true;
2084       return nullptr;
2085     }
2086     Tail->N = Arena.alloc<IntegerLiteralNode>(D, IsNegative);
2087     if (I + 1 < Rank) {
2088       Tail->Next = Arena.alloc<NodeList>();
2089       Tail = Tail->Next;
2090     }
2091   }
2092   ATy->Dimensions = nodeListToNodeArray(Arena, Head, Rank);
2093 
2094   if (MangledName.consumeFront("$$C")) {
2095     bool IsMember = false;
2096     std::tie(ATy->Quals, IsMember) = demangleQualifiers(MangledName);
2097     if (IsMember) {
2098       Error = true;
2099       return nullptr;
2100     }
2101   }
2102 
2103   ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop);
2104   return ATy;
2105 }
2106 
2107 // Reads a function's parameters.
2108 NodeArrayNode *Demangler::demangleFunctionParameterList(StringView &MangledName,
2109                                                         bool &IsVariadic) {
2110   // Empty parameter list.
2111   if (MangledName.consumeFront('X'))
2112     return nullptr;
2113 
2114   NodeList *Head = Arena.alloc<NodeList>();
2115   NodeList **Current = &Head;
2116   size_t Count = 0;
2117   while (!Error && !MangledName.startsWith('@') &&
2118          !MangledName.startsWith('Z')) {
2119     ++Count;
2120 
2121     if (startsWithDigit(MangledName)) {
2122       size_t N = MangledName[0] - '0';
2123       if (N >= Backrefs.FunctionParamCount) {
2124         Error = true;
2125         return nullptr;
2126       }
2127       MangledName = MangledName.dropFront();
2128 
2129       *Current = Arena.alloc<NodeList>();
2130       (*Current)->N = Backrefs.FunctionParams[N];
2131       Current = &(*Current)->Next;
2132       continue;
2133     }
2134 
2135     size_t OldSize = MangledName.size();
2136 
2137     *Current = Arena.alloc<NodeList>();
2138     TypeNode *TN = demangleType(MangledName, QualifierMangleMode::Drop);
2139     if (!TN || Error)
2140       return nullptr;
2141 
2142     (*Current)->N = TN;
2143 
2144     size_t CharsConsumed = OldSize - MangledName.size();
2145     assert(CharsConsumed != 0);
2146 
2147     // Single-letter types are ignored for backreferences because memorizing
2148     // them doesn't save anything.
2149     if (Backrefs.FunctionParamCount <= 9 && CharsConsumed > 1)
2150       Backrefs.FunctionParams[Backrefs.FunctionParamCount++] = TN;
2151 
2152     Current = &(*Current)->Next;
2153   }
2154 
2155   if (Error)
2156     return nullptr;
2157 
2158   NodeArrayNode *NA = nodeListToNodeArray(Arena, Head, Count);
2159   // A non-empty parameter list is terminated by either 'Z' (variadic) parameter
2160   // list or '@' (non variadic).  Careful not to consume "@Z", as in that case
2161   // the following Z could be a throw specifier.
2162   if (MangledName.consumeFront('@'))
2163     return NA;
2164 
2165   if (MangledName.consumeFront('Z')) {
2166     IsVariadic = true;
2167     return NA;
2168   }
2169 
2170   DEMANGLE_UNREACHABLE;
2171 }
2172 
2173 NodeArrayNode *
2174 Demangler::demangleTemplateParameterList(StringView &MangledName) {
2175   NodeList *Head = nullptr;
2176   NodeList **Current = &Head;
2177   size_t Count = 0;
2178 
2179   while (!MangledName.startsWith('@')) {
2180     if (MangledName.consumeFront("$S") || MangledName.consumeFront("$$V") ||
2181         MangledName.consumeFront("$$$V") || MangledName.consumeFront("$$Z")) {
2182       // parameter pack separator
2183       continue;
2184     }
2185 
2186     ++Count;
2187 
2188     // Template parameter lists don't participate in back-referencing.
2189     *Current = Arena.alloc<NodeList>();
2190 
2191     NodeList &TP = **Current;
2192 
2193     TemplateParameterReferenceNode *TPRN = nullptr;
2194     if (MangledName.consumeFront("$$Y")) {
2195       // Template alias
2196       TP.N = demangleFullyQualifiedTypeName(MangledName);
2197     } else if (MangledName.consumeFront("$$B")) {
2198       // Array
2199       TP.N = demangleType(MangledName, QualifierMangleMode::Drop);
2200     } else if (MangledName.consumeFront("$$C")) {
2201       // Type has qualifiers.
2202       TP.N = demangleType(MangledName, QualifierMangleMode::Mangle);
2203     } else if (MangledName.startsWith("$1") || MangledName.startsWith("$H") ||
2204                MangledName.startsWith("$I") || MangledName.startsWith("$J")) {
2205       // Pointer to member
2206       TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
2207       TPRN->IsMemberPointer = true;
2208 
2209       MangledName = MangledName.dropFront();
2210       // 1 - single inheritance       <name>
2211       // H - multiple inheritance     <name> <number>
2212       // I - virtual inheritance      <name> <number> <number>
2213       // J - unspecified inheritance  <name> <number> <number> <number>
2214       char InheritanceSpecifier = MangledName.popFront();
2215       SymbolNode *S = nullptr;
2216       if (MangledName.startsWith('?')) {
2217         S = parse(MangledName);
2218         if (Error || !S->Name) {
2219           Error = true;
2220           return nullptr;
2221         }
2222         memorizeIdentifier(S->Name->getUnqualifiedIdentifier());
2223       }
2224 
2225       switch (InheritanceSpecifier) {
2226       case 'J':
2227         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2228             demangleSigned(MangledName);
2229         DEMANGLE_FALLTHROUGH;
2230       case 'I':
2231         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2232             demangleSigned(MangledName);
2233         DEMANGLE_FALLTHROUGH;
2234       case 'H':
2235         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2236             demangleSigned(MangledName);
2237         DEMANGLE_FALLTHROUGH;
2238       case '1':
2239         break;
2240       default:
2241         DEMANGLE_UNREACHABLE;
2242       }
2243       TPRN->Affinity = PointerAffinity::Pointer;
2244       TPRN->Symbol = S;
2245     } else if (MangledName.startsWith("$E?")) {
2246       MangledName.consumeFront("$E");
2247       // Reference to symbol
2248       TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
2249       TPRN->Symbol = parse(MangledName);
2250       TPRN->Affinity = PointerAffinity::Reference;
2251     } else if (MangledName.startsWith("$F") || MangledName.startsWith("$G")) {
2252       TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
2253 
2254       // Data member pointer.
2255       MangledName = MangledName.dropFront();
2256       char InheritanceSpecifier = MangledName.popFront();
2257 
2258       switch (InheritanceSpecifier) {
2259       case 'G':
2260         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2261             demangleSigned(MangledName);
2262         DEMANGLE_FALLTHROUGH;
2263       case 'F':
2264         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2265             demangleSigned(MangledName);
2266         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2267             demangleSigned(MangledName);
2268         break;
2269       default:
2270         DEMANGLE_UNREACHABLE;
2271       }
2272       TPRN->IsMemberPointer = true;
2273 
2274     } else if (MangledName.consumeFront("$0")) {
2275       // Integral non-type template parameter
2276       bool IsNegative = false;
2277       uint64_t Value = 0;
2278       std::tie(Value, IsNegative) = demangleNumber(MangledName);
2279 
2280       TP.N = Arena.alloc<IntegerLiteralNode>(Value, IsNegative);
2281     } else {
2282       TP.N = demangleType(MangledName, QualifierMangleMode::Drop);
2283     }
2284     if (Error)
2285       return nullptr;
2286 
2287     Current = &TP.Next;
2288   }
2289 
2290   // The loop above returns nullptr on Error.
2291   assert(!Error);
2292 
2293   // Template parameter lists cannot be variadic, so it can only be terminated
2294   // by @ (as opposed to 'Z' in the function parameter case).
2295   assert(MangledName.startsWith('@')); // The above loop exits only on '@'.
2296   MangledName.consumeFront('@');
2297   return nodeListToNodeArray(Arena, Head, Count);
2298 }
2299 
2300 void Demangler::dumpBackReferences() {
2301   std::printf("%d function parameter backreferences\n",
2302               (int)Backrefs.FunctionParamCount);
2303 
2304   // Create an output stream so we can render each type.
2305   OutputBuffer OB;
2306   for (size_t I = 0; I < Backrefs.FunctionParamCount; ++I) {
2307     OB.setCurrentPosition(0);
2308 
2309     TypeNode *T = Backrefs.FunctionParams[I];
2310     T->output(OB, OF_Default);
2311 
2312     StringView B = OB;
2313     std::printf("  [%d] - %.*s\n", (int)I, (int)B.size(), B.begin());
2314   }
2315   std::free(OB.getBuffer());
2316 
2317   if (Backrefs.FunctionParamCount > 0)
2318     std::printf("\n");
2319   std::printf("%d name backreferences\n", (int)Backrefs.NamesCount);
2320   for (size_t I = 0; I < Backrefs.NamesCount; ++I) {
2321     std::printf("  [%d] - %.*s\n", (int)I, (int)Backrefs.Names[I]->Name.size(),
2322                 Backrefs.Names[I]->Name.begin());
2323   }
2324   if (Backrefs.NamesCount > 0)
2325     std::printf("\n");
2326 }
2327 
2328 char *llvm::microsoftDemangle(const char *MangledName, size_t *NMangled,
2329                               char *Buf, size_t *N,
2330                               int *Status, MSDemangleFlags Flags) {
2331   Demangler D;
2332 
2333   StringView Name{MangledName};
2334   SymbolNode *AST = D.parse(Name);
2335   if (!D.Error && NMangled)
2336     *NMangled = Name.begin() - MangledName;
2337 
2338   if (Flags & MSDF_DumpBackrefs)
2339     D.dumpBackReferences();
2340 
2341   OutputFlags OF = OF_Default;
2342   if (Flags & MSDF_NoCallingConvention)
2343     OF = OutputFlags(OF | OF_NoCallingConvention);
2344   if (Flags & MSDF_NoAccessSpecifier)
2345     OF = OutputFlags(OF | OF_NoAccessSpecifier);
2346   if (Flags & MSDF_NoReturnType)
2347     OF = OutputFlags(OF | OF_NoReturnType);
2348   if (Flags & MSDF_NoMemberType)
2349     OF = OutputFlags(OF | OF_NoMemberType);
2350   if (Flags & MSDF_NoVariableType)
2351     OF = OutputFlags(OF | OF_NoVariableType);
2352 
2353   int InternalStatus = demangle_success;
2354   if (D.Error)
2355     InternalStatus = demangle_invalid_mangled_name;
2356   else {
2357     OutputBuffer OB(Buf, N);
2358     AST->output(OB, OF);
2359     OB += '\0';
2360     if (N != nullptr)
2361       *N = OB.getCurrentPosition();
2362     Buf = OB.getBuffer();
2363   }
2364 
2365   if (Status)
2366     *Status = InternalStatus;
2367   return InternalStatus == demangle_success ? Buf : nullptr;
2368 }
2369