xref: /freebsd/contrib/llvm-project/llvm/lib/Demangle/MicrosoftDemangle.cpp (revision 3e8eb5c7f4909209c042403ddee340b2ee7003a5)
1 //===- MicrosoftDemangle.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for MSVC-style mangled symbols.
10 //
11 // This file has no dependencies on the rest of LLVM so that it can be
12 // easily reused in other programs such as libcxxabi.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Demangle/MicrosoftDemangle.h"
17 #include "llvm/Demangle/Demangle.h"
18 #include "llvm/Demangle/MicrosoftDemangleNodes.h"
19 
20 #include "llvm/Demangle/DemangleConfig.h"
21 #include "llvm/Demangle/StringView.h"
22 #include "llvm/Demangle/Utility.h"
23 
24 #include <array>
25 #include <cctype>
26 #include <cstdio>
27 #include <tuple>
28 
29 using namespace llvm;
30 using namespace ms_demangle;
31 
32 static bool startsWithDigit(StringView S) {
33   return !S.empty() && std::isdigit(S.front());
34 }
35 
36 
37 struct NodeList {
38   Node *N = nullptr;
39   NodeList *Next = nullptr;
40 };
41 
42 static bool isMemberPointer(StringView MangledName, bool &Error) {
43   Error = false;
44   switch (MangledName.popFront()) {
45   case '$':
46     // This is probably an rvalue reference (e.g. $$Q), and you cannot have an
47     // rvalue reference to a member.
48     return false;
49   case 'A':
50     // 'A' indicates a reference, and you cannot have a reference to a member
51     // function or member.
52     return false;
53   case 'P':
54   case 'Q':
55   case 'R':
56   case 'S':
57     // These 4 values indicate some kind of pointer, but we still don't know
58     // what.
59     break;
60   default:
61     // isMemberPointer() is called only if isPointerType() returns true,
62     // and it rejects other prefixes.
63     DEMANGLE_UNREACHABLE;
64   }
65 
66   // If it starts with a number, then 6 indicates a non-member function
67   // pointer, and 8 indicates a member function pointer.
68   if (startsWithDigit(MangledName)) {
69     if (MangledName[0] != '6' && MangledName[0] != '8') {
70       Error = true;
71       return false;
72     }
73     return (MangledName[0] == '8');
74   }
75 
76   // Remove ext qualifiers since those can appear on either type and are
77   // therefore not indicative.
78   MangledName.consumeFront('E'); // 64-bit
79   MangledName.consumeFront('I'); // restrict
80   MangledName.consumeFront('F'); // unaligned
81 
82   if (MangledName.empty()) {
83     Error = true;
84     return false;
85   }
86 
87   // The next value should be either ABCD (non-member) or QRST (member).
88   switch (MangledName.front()) {
89   case 'A':
90   case 'B':
91   case 'C':
92   case 'D':
93     return false;
94   case 'Q':
95   case 'R':
96   case 'S':
97   case 'T':
98     return true;
99   default:
100     Error = true;
101     return false;
102   }
103 }
104 
105 static SpecialIntrinsicKind
106 consumeSpecialIntrinsicKind(StringView &MangledName) {
107   if (MangledName.consumeFront("?_7"))
108     return SpecialIntrinsicKind::Vftable;
109   if (MangledName.consumeFront("?_8"))
110     return SpecialIntrinsicKind::Vbtable;
111   if (MangledName.consumeFront("?_9"))
112     return SpecialIntrinsicKind::VcallThunk;
113   if (MangledName.consumeFront("?_A"))
114     return SpecialIntrinsicKind::Typeof;
115   if (MangledName.consumeFront("?_B"))
116     return SpecialIntrinsicKind::LocalStaticGuard;
117   if (MangledName.consumeFront("?_C"))
118     return SpecialIntrinsicKind::StringLiteralSymbol;
119   if (MangledName.consumeFront("?_P"))
120     return SpecialIntrinsicKind::UdtReturning;
121   if (MangledName.consumeFront("?_R0"))
122     return SpecialIntrinsicKind::RttiTypeDescriptor;
123   if (MangledName.consumeFront("?_R1"))
124     return SpecialIntrinsicKind::RttiBaseClassDescriptor;
125   if (MangledName.consumeFront("?_R2"))
126     return SpecialIntrinsicKind::RttiBaseClassArray;
127   if (MangledName.consumeFront("?_R3"))
128     return SpecialIntrinsicKind::RttiClassHierarchyDescriptor;
129   if (MangledName.consumeFront("?_R4"))
130     return SpecialIntrinsicKind::RttiCompleteObjLocator;
131   if (MangledName.consumeFront("?_S"))
132     return SpecialIntrinsicKind::LocalVftable;
133   if (MangledName.consumeFront("?__E"))
134     return SpecialIntrinsicKind::DynamicInitializer;
135   if (MangledName.consumeFront("?__F"))
136     return SpecialIntrinsicKind::DynamicAtexitDestructor;
137   if (MangledName.consumeFront("?__J"))
138     return SpecialIntrinsicKind::LocalStaticThreadGuard;
139   return SpecialIntrinsicKind::None;
140 }
141 
142 static bool startsWithLocalScopePattern(StringView S) {
143   if (!S.consumeFront('?'))
144     return false;
145 
146   size_t End = S.find('?');
147   if (End == StringView::npos)
148     return false;
149   StringView Candidate = S.substr(0, End);
150   if (Candidate.empty())
151     return false;
152 
153   // \?[0-9]\?
154   // ?@? is the discriminator 0.
155   if (Candidate.size() == 1)
156     return Candidate[0] == '@' || (Candidate[0] >= '0' && Candidate[0] <= '9');
157 
158   // If it's not 0-9, then it's an encoded number terminated with an @
159   if (Candidate.back() != '@')
160     return false;
161   Candidate = Candidate.dropBack();
162 
163   // An encoded number starts with B-P and all subsequent digits are in A-P.
164   // Note that the reason the first digit cannot be A is two fold.  First, it
165   // would create an ambiguity with ?A which delimits the beginning of an
166   // anonymous namespace.  Second, A represents 0, and you don't start a multi
167   // digit number with a leading 0.  Presumably the anonymous namespace
168   // ambiguity is also why single digit encoded numbers use 0-9 rather than A-J.
169   if (Candidate[0] < 'B' || Candidate[0] > 'P')
170     return false;
171   Candidate = Candidate.dropFront();
172   while (!Candidate.empty()) {
173     if (Candidate[0] < 'A' || Candidate[0] > 'P')
174       return false;
175     Candidate = Candidate.dropFront();
176   }
177 
178   return true;
179 }
180 
181 static bool isTagType(StringView S) {
182   switch (S.front()) {
183   case 'T': // union
184   case 'U': // struct
185   case 'V': // class
186   case 'W': // enum
187     return true;
188   }
189   return false;
190 }
191 
192 static bool isCustomType(StringView S) { return S[0] == '?'; }
193 
194 static bool isPointerType(StringView S) {
195   if (S.startsWith("$$Q")) // foo &&
196     return true;
197 
198   switch (S.front()) {
199   case 'A': // foo &
200   case 'P': // foo *
201   case 'Q': // foo *const
202   case 'R': // foo *volatile
203   case 'S': // foo *const volatile
204     return true;
205   }
206   return false;
207 }
208 
209 static bool isArrayType(StringView S) { return S[0] == 'Y'; }
210 
211 static bool isFunctionType(StringView S) {
212   return S.startsWith("$$A8@@") || S.startsWith("$$A6");
213 }
214 
215 static FunctionRefQualifier
216 demangleFunctionRefQualifier(StringView &MangledName) {
217   if (MangledName.consumeFront('G'))
218     return FunctionRefQualifier::Reference;
219   else if (MangledName.consumeFront('H'))
220     return FunctionRefQualifier::RValueReference;
221   return FunctionRefQualifier::None;
222 }
223 
224 static std::pair<Qualifiers, PointerAffinity>
225 demanglePointerCVQualifiers(StringView &MangledName) {
226   if (MangledName.consumeFront("$$Q"))
227     return std::make_pair(Q_None, PointerAffinity::RValueReference);
228 
229   switch (MangledName.popFront()) {
230   case 'A':
231     return std::make_pair(Q_None, PointerAffinity::Reference);
232   case 'P':
233     return std::make_pair(Q_None, PointerAffinity::Pointer);
234   case 'Q':
235     return std::make_pair(Q_Const, PointerAffinity::Pointer);
236   case 'R':
237     return std::make_pair(Q_Volatile, PointerAffinity::Pointer);
238   case 'S':
239     return std::make_pair(Qualifiers(Q_Const | Q_Volatile),
240                           PointerAffinity::Pointer);
241   }
242   // This function is only called if isPointerType() returns true,
243   // and it only returns true for the six cases listed above.
244   DEMANGLE_UNREACHABLE;
245 }
246 
247 StringView Demangler::copyString(StringView Borrowed) {
248   char *Stable = Arena.allocUnalignedBuffer(Borrowed.size() + 1);
249   std::strcpy(Stable, Borrowed.begin());
250 
251   return {Stable, Borrowed.size()};
252 }
253 
254 SpecialTableSymbolNode *
255 Demangler::demangleSpecialTableSymbolNode(StringView &MangledName,
256                                           SpecialIntrinsicKind K) {
257   NamedIdentifierNode *NI = Arena.alloc<NamedIdentifierNode>();
258   switch (K) {
259   case SpecialIntrinsicKind::Vftable:
260     NI->Name = "`vftable'";
261     break;
262   case SpecialIntrinsicKind::Vbtable:
263     NI->Name = "`vbtable'";
264     break;
265   case SpecialIntrinsicKind::LocalVftable:
266     NI->Name = "`local vftable'";
267     break;
268   case SpecialIntrinsicKind::RttiCompleteObjLocator:
269     NI->Name = "`RTTI Complete Object Locator'";
270     break;
271   default:
272     DEMANGLE_UNREACHABLE;
273   }
274   QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI);
275   SpecialTableSymbolNode *STSN = Arena.alloc<SpecialTableSymbolNode>();
276   STSN->Name = QN;
277   bool IsMember = false;
278   if (MangledName.empty()) {
279     Error = true;
280     return nullptr;
281   }
282   char Front = MangledName.popFront();
283   if (Front != '6' && Front != '7') {
284     Error = true;
285     return nullptr;
286   }
287 
288   std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName);
289   if (!MangledName.consumeFront('@'))
290     STSN->TargetName = demangleFullyQualifiedTypeName(MangledName);
291   return STSN;
292 }
293 
294 LocalStaticGuardVariableNode *
295 Demangler::demangleLocalStaticGuard(StringView &MangledName, bool IsThread) {
296   LocalStaticGuardIdentifierNode *LSGI =
297       Arena.alloc<LocalStaticGuardIdentifierNode>();
298   LSGI->IsThread = IsThread;
299   QualifiedNameNode *QN = demangleNameScopeChain(MangledName, LSGI);
300   LocalStaticGuardVariableNode *LSGVN =
301       Arena.alloc<LocalStaticGuardVariableNode>();
302   LSGVN->Name = QN;
303 
304   if (MangledName.consumeFront("4IA"))
305     LSGVN->IsVisible = false;
306   else if (MangledName.consumeFront("5"))
307     LSGVN->IsVisible = true;
308   else {
309     Error = true;
310     return nullptr;
311   }
312 
313   if (!MangledName.empty())
314     LSGI->ScopeIndex = demangleUnsigned(MangledName);
315   return LSGVN;
316 }
317 
318 static NamedIdentifierNode *synthesizeNamedIdentifier(ArenaAllocator &Arena,
319                                                       StringView Name) {
320   NamedIdentifierNode *Id = Arena.alloc<NamedIdentifierNode>();
321   Id->Name = Name;
322   return Id;
323 }
324 
325 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena,
326                                                   IdentifierNode *Identifier) {
327   QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>();
328   QN->Components = Arena.alloc<NodeArrayNode>();
329   QN->Components->Count = 1;
330   QN->Components->Nodes = Arena.allocArray<Node *>(1);
331   QN->Components->Nodes[0] = Identifier;
332   return QN;
333 }
334 
335 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena,
336                                                   StringView Name) {
337   NamedIdentifierNode *Id = synthesizeNamedIdentifier(Arena, Name);
338   return synthesizeQualifiedName(Arena, Id);
339 }
340 
341 static VariableSymbolNode *synthesizeVariable(ArenaAllocator &Arena,
342                                               TypeNode *Type,
343                                               StringView VariableName) {
344   VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
345   VSN->Type = Type;
346   VSN->Name = synthesizeQualifiedName(Arena, VariableName);
347   return VSN;
348 }
349 
350 VariableSymbolNode *Demangler::demangleUntypedVariable(
351     ArenaAllocator &Arena, StringView &MangledName, StringView VariableName) {
352   NamedIdentifierNode *NI = synthesizeNamedIdentifier(Arena, VariableName);
353   QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI);
354   VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
355   VSN->Name = QN;
356   if (MangledName.consumeFront("8"))
357     return VSN;
358 
359   Error = true;
360   return nullptr;
361 }
362 
363 VariableSymbolNode *
364 Demangler::demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena,
365                                                StringView &MangledName) {
366   RttiBaseClassDescriptorNode *RBCDN =
367       Arena.alloc<RttiBaseClassDescriptorNode>();
368   RBCDN->NVOffset = demangleUnsigned(MangledName);
369   RBCDN->VBPtrOffset = demangleSigned(MangledName);
370   RBCDN->VBTableOffset = demangleUnsigned(MangledName);
371   RBCDN->Flags = demangleUnsigned(MangledName);
372   if (Error)
373     return nullptr;
374 
375   VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
376   VSN->Name = demangleNameScopeChain(MangledName, RBCDN);
377   MangledName.consumeFront('8');
378   return VSN;
379 }
380 
381 FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName,
382                                                     bool IsDestructor) {
383   DynamicStructorIdentifierNode *DSIN =
384       Arena.alloc<DynamicStructorIdentifierNode>();
385   DSIN->IsDestructor = IsDestructor;
386 
387   bool IsKnownStaticDataMember = false;
388   if (MangledName.consumeFront('?'))
389     IsKnownStaticDataMember = true;
390 
391   SymbolNode *Symbol = demangleDeclarator(MangledName);
392   if (Error)
393     return nullptr;
394 
395   FunctionSymbolNode *FSN = nullptr;
396 
397   if (Symbol->kind() == NodeKind::VariableSymbol) {
398     DSIN->Variable = static_cast<VariableSymbolNode *>(Symbol);
399 
400     // Older versions of clang mangled this type of symbol incorrectly.  They
401     // would omit the leading ? and they would only emit a single @ at the end.
402     // The correct mangling is a leading ? and 2 trailing @ signs.  Handle
403     // both cases.
404     int AtCount = IsKnownStaticDataMember ? 2 : 1;
405     for (int I = 0; I < AtCount; ++I) {
406       if (MangledName.consumeFront('@'))
407         continue;
408       Error = true;
409       return nullptr;
410     }
411 
412     FSN = demangleFunctionEncoding(MangledName);
413     if (FSN)
414       FSN->Name = synthesizeQualifiedName(Arena, DSIN);
415   } else {
416     if (IsKnownStaticDataMember) {
417       // This was supposed to be a static data member, but we got a function.
418       Error = true;
419       return nullptr;
420     }
421 
422     FSN = static_cast<FunctionSymbolNode *>(Symbol);
423     DSIN->Name = Symbol->Name;
424     FSN->Name = synthesizeQualifiedName(Arena, DSIN);
425   }
426 
427   return FSN;
428 }
429 
430 SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) {
431   SpecialIntrinsicKind SIK = consumeSpecialIntrinsicKind(MangledName);
432 
433   switch (SIK) {
434   case SpecialIntrinsicKind::None:
435     return nullptr;
436   case SpecialIntrinsicKind::StringLiteralSymbol:
437     return demangleStringLiteral(MangledName);
438   case SpecialIntrinsicKind::Vftable:
439   case SpecialIntrinsicKind::Vbtable:
440   case SpecialIntrinsicKind::LocalVftable:
441   case SpecialIntrinsicKind::RttiCompleteObjLocator:
442     return demangleSpecialTableSymbolNode(MangledName, SIK);
443   case SpecialIntrinsicKind::VcallThunk:
444     return demangleVcallThunkNode(MangledName);
445   case SpecialIntrinsicKind::LocalStaticGuard:
446     return demangleLocalStaticGuard(MangledName, /*IsThread=*/false);
447   case SpecialIntrinsicKind::LocalStaticThreadGuard:
448     return demangleLocalStaticGuard(MangledName, /*IsThread=*/true);
449   case SpecialIntrinsicKind::RttiTypeDescriptor: {
450     TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result);
451     if (Error)
452       break;
453     if (!MangledName.consumeFront("@8"))
454       break;
455     if (!MangledName.empty())
456       break;
457     return synthesizeVariable(Arena, T, "`RTTI Type Descriptor'");
458   }
459   case SpecialIntrinsicKind::RttiBaseClassArray:
460     return demangleUntypedVariable(Arena, MangledName,
461                                    "`RTTI Base Class Array'");
462   case SpecialIntrinsicKind::RttiClassHierarchyDescriptor:
463     return demangleUntypedVariable(Arena, MangledName,
464                                    "`RTTI Class Hierarchy Descriptor'");
465   case SpecialIntrinsicKind::RttiBaseClassDescriptor:
466     return demangleRttiBaseClassDescriptorNode(Arena, MangledName);
467   case SpecialIntrinsicKind::DynamicInitializer:
468     return demangleInitFiniStub(MangledName, /*IsDestructor=*/false);
469   case SpecialIntrinsicKind::DynamicAtexitDestructor:
470     return demangleInitFiniStub(MangledName, /*IsDestructor=*/true);
471   case SpecialIntrinsicKind::Typeof:
472   case SpecialIntrinsicKind::UdtReturning:
473     // It's unclear which tools produces these manglings, so demangling
474     // support is not (yet?) implemented.
475     break;
476   case SpecialIntrinsicKind::Unknown:
477     DEMANGLE_UNREACHABLE; // Never returned by consumeSpecialIntrinsicKind.
478   }
479   Error = true;
480   return nullptr;
481 }
482 
483 IdentifierNode *
484 Demangler::demangleFunctionIdentifierCode(StringView &MangledName) {
485   assert(MangledName.startsWith('?'));
486   MangledName = MangledName.dropFront();
487   if (MangledName.empty()) {
488     Error = true;
489     return nullptr;
490   }
491 
492   if (MangledName.consumeFront("__"))
493     return demangleFunctionIdentifierCode(
494         MangledName, FunctionIdentifierCodeGroup::DoubleUnder);
495   if (MangledName.consumeFront("_"))
496     return demangleFunctionIdentifierCode(MangledName,
497                                           FunctionIdentifierCodeGroup::Under);
498   return demangleFunctionIdentifierCode(MangledName,
499                                         FunctionIdentifierCodeGroup::Basic);
500 }
501 
502 StructorIdentifierNode *
503 Demangler::demangleStructorIdentifier(StringView &MangledName,
504                                       bool IsDestructor) {
505   StructorIdentifierNode *N = Arena.alloc<StructorIdentifierNode>();
506   N->IsDestructor = IsDestructor;
507   return N;
508 }
509 
510 ConversionOperatorIdentifierNode *
511 Demangler::demangleConversionOperatorIdentifier(StringView &MangledName) {
512   ConversionOperatorIdentifierNode *N =
513       Arena.alloc<ConversionOperatorIdentifierNode>();
514   return N;
515 }
516 
517 LiteralOperatorIdentifierNode *
518 Demangler::demangleLiteralOperatorIdentifier(StringView &MangledName) {
519   LiteralOperatorIdentifierNode *N =
520       Arena.alloc<LiteralOperatorIdentifierNode>();
521   N->Name = demangleSimpleString(MangledName, /*Memorize=*/false);
522   return N;
523 }
524 
525 IntrinsicFunctionKind
526 Demangler::translateIntrinsicFunctionCode(char CH,
527                                           FunctionIdentifierCodeGroup Group) {
528   using IFK = IntrinsicFunctionKind;
529   if (!(CH >= '0' && CH <= '9') && !(CH >= 'A' && CH <= 'Z')) {
530     Error = true;
531     return IFK::None;
532   }
533 
534   // Not all ? identifiers are intrinsics *functions*.  This function only maps
535   // operator codes for the special functions, all others are handled elsewhere,
536   // hence the IFK::None entries in the table.
537   static IFK Basic[36] = {
538       IFK::None,             // ?0 # Foo::Foo()
539       IFK::None,             // ?1 # Foo::~Foo()
540       IFK::New,              // ?2 # operator new
541       IFK::Delete,           // ?3 # operator delete
542       IFK::Assign,           // ?4 # operator=
543       IFK::RightShift,       // ?5 # operator>>
544       IFK::LeftShift,        // ?6 # operator<<
545       IFK::LogicalNot,       // ?7 # operator!
546       IFK::Equals,           // ?8 # operator==
547       IFK::NotEquals,        // ?9 # operator!=
548       IFK::ArraySubscript,   // ?A # operator[]
549       IFK::None,             // ?B # Foo::operator <type>()
550       IFK::Pointer,          // ?C # operator->
551       IFK::Dereference,      // ?D # operator*
552       IFK::Increment,        // ?E # operator++
553       IFK::Decrement,        // ?F # operator--
554       IFK::Minus,            // ?G # operator-
555       IFK::Plus,             // ?H # operator+
556       IFK::BitwiseAnd,       // ?I # operator&
557       IFK::MemberPointer,    // ?J # operator->*
558       IFK::Divide,           // ?K # operator/
559       IFK::Modulus,          // ?L # operator%
560       IFK::LessThan,         // ?M operator<
561       IFK::LessThanEqual,    // ?N operator<=
562       IFK::GreaterThan,      // ?O operator>
563       IFK::GreaterThanEqual, // ?P operator>=
564       IFK::Comma,            // ?Q operator,
565       IFK::Parens,           // ?R operator()
566       IFK::BitwiseNot,       // ?S operator~
567       IFK::BitwiseXor,       // ?T operator^
568       IFK::BitwiseOr,        // ?U operator|
569       IFK::LogicalAnd,       // ?V operator&&
570       IFK::LogicalOr,        // ?W operator||
571       IFK::TimesEqual,       // ?X operator*=
572       IFK::PlusEqual,        // ?Y operator+=
573       IFK::MinusEqual,       // ?Z operator-=
574   };
575   static IFK Under[36] = {
576       IFK::DivEqual,           // ?_0 operator/=
577       IFK::ModEqual,           // ?_1 operator%=
578       IFK::RshEqual,           // ?_2 operator>>=
579       IFK::LshEqual,           // ?_3 operator<<=
580       IFK::BitwiseAndEqual,    // ?_4 operator&=
581       IFK::BitwiseOrEqual,     // ?_5 operator|=
582       IFK::BitwiseXorEqual,    // ?_6 operator^=
583       IFK::None,               // ?_7 # vftable
584       IFK::None,               // ?_8 # vbtable
585       IFK::None,               // ?_9 # vcall
586       IFK::None,               // ?_A # typeof
587       IFK::None,               // ?_B # local static guard
588       IFK::None,               // ?_C # string literal
589       IFK::VbaseDtor,          // ?_D # vbase destructor
590       IFK::VecDelDtor,         // ?_E # vector deleting destructor
591       IFK::DefaultCtorClosure, // ?_F # default constructor closure
592       IFK::ScalarDelDtor,      // ?_G # scalar deleting destructor
593       IFK::VecCtorIter,        // ?_H # vector constructor iterator
594       IFK::VecDtorIter,        // ?_I # vector destructor iterator
595       IFK::VecVbaseCtorIter,   // ?_J # vector vbase constructor iterator
596       IFK::VdispMap,           // ?_K # virtual displacement map
597       IFK::EHVecCtorIter,      // ?_L # eh vector constructor iterator
598       IFK::EHVecDtorIter,      // ?_M # eh vector destructor iterator
599       IFK::EHVecVbaseCtorIter, // ?_N # eh vector vbase constructor iterator
600       IFK::CopyCtorClosure,    // ?_O # copy constructor closure
601       IFK::None,               // ?_P<name> # udt returning <name>
602       IFK::None,               // ?_Q # <unknown>
603       IFK::None,               // ?_R0 - ?_R4 # RTTI Codes
604       IFK::None,               // ?_S # local vftable
605       IFK::LocalVftableCtorClosure, // ?_T # local vftable constructor closure
606       IFK::ArrayNew,                // ?_U operator new[]
607       IFK::ArrayDelete,             // ?_V operator delete[]
608       IFK::None,                    // ?_W <unused>
609       IFK::None,                    // ?_X <unused>
610       IFK::None,                    // ?_Y <unused>
611       IFK::None,                    // ?_Z <unused>
612   };
613   static IFK DoubleUnder[36] = {
614       IFK::None,                       // ?__0 <unused>
615       IFK::None,                       // ?__1 <unused>
616       IFK::None,                       // ?__2 <unused>
617       IFK::None,                       // ?__3 <unused>
618       IFK::None,                       // ?__4 <unused>
619       IFK::None,                       // ?__5 <unused>
620       IFK::None,                       // ?__6 <unused>
621       IFK::None,                       // ?__7 <unused>
622       IFK::None,                       // ?__8 <unused>
623       IFK::None,                       // ?__9 <unused>
624       IFK::ManVectorCtorIter,          // ?__A managed vector ctor iterator
625       IFK::ManVectorDtorIter,          // ?__B managed vector dtor iterator
626       IFK::EHVectorCopyCtorIter,       // ?__C EH vector copy ctor iterator
627       IFK::EHVectorVbaseCopyCtorIter,  // ?__D EH vector vbase copy ctor iter
628       IFK::None,                       // ?__E dynamic initializer for `T'
629       IFK::None,                       // ?__F dynamic atexit destructor for `T'
630       IFK::VectorCopyCtorIter,         // ?__G vector copy constructor iter
631       IFK::VectorVbaseCopyCtorIter,    // ?__H vector vbase copy ctor iter
632       IFK::ManVectorVbaseCopyCtorIter, // ?__I managed vector vbase copy ctor
633                                        // iter
634       IFK::None,                       // ?__J local static thread guard
635       IFK::None,                       // ?__K operator ""_name
636       IFK::CoAwait,                    // ?__L operator co_await
637       IFK::Spaceship,                  // ?__M operator<=>
638       IFK::None,                       // ?__N <unused>
639       IFK::None,                       // ?__O <unused>
640       IFK::None,                       // ?__P <unused>
641       IFK::None,                       // ?__Q <unused>
642       IFK::None,                       // ?__R <unused>
643       IFK::None,                       // ?__S <unused>
644       IFK::None,                       // ?__T <unused>
645       IFK::None,                       // ?__U <unused>
646       IFK::None,                       // ?__V <unused>
647       IFK::None,                       // ?__W <unused>
648       IFK::None,                       // ?__X <unused>
649       IFK::None,                       // ?__Y <unused>
650       IFK::None,                       // ?__Z <unused>
651   };
652 
653   int Index = (CH >= '0' && CH <= '9') ? (CH - '0') : (CH - 'A' + 10);
654   switch (Group) {
655   case FunctionIdentifierCodeGroup::Basic:
656     return Basic[Index];
657   case FunctionIdentifierCodeGroup::Under:
658     return Under[Index];
659   case FunctionIdentifierCodeGroup::DoubleUnder:
660     return DoubleUnder[Index];
661   }
662   DEMANGLE_UNREACHABLE;
663 }
664 
665 IdentifierNode *
666 Demangler::demangleFunctionIdentifierCode(StringView &MangledName,
667                                           FunctionIdentifierCodeGroup Group) {
668   if (MangledName.empty()) {
669     Error = true;
670     return nullptr;
671   }
672   switch (Group) {
673   case FunctionIdentifierCodeGroup::Basic:
674     switch (char CH = MangledName.popFront()) {
675     case '0':
676     case '1':
677       return demangleStructorIdentifier(MangledName, CH == '1');
678     case 'B':
679       return demangleConversionOperatorIdentifier(MangledName);
680     default:
681       return Arena.alloc<IntrinsicFunctionIdentifierNode>(
682           translateIntrinsicFunctionCode(CH, Group));
683     }
684   case FunctionIdentifierCodeGroup::Under:
685     return Arena.alloc<IntrinsicFunctionIdentifierNode>(
686         translateIntrinsicFunctionCode(MangledName.popFront(), Group));
687   case FunctionIdentifierCodeGroup::DoubleUnder:
688     switch (char CH = MangledName.popFront()) {
689     case 'K':
690       return demangleLiteralOperatorIdentifier(MangledName);
691     default:
692       return Arena.alloc<IntrinsicFunctionIdentifierNode>(
693           translateIntrinsicFunctionCode(CH, Group));
694     }
695   }
696 
697   DEMANGLE_UNREACHABLE;
698 }
699 
700 SymbolNode *Demangler::demangleEncodedSymbol(StringView &MangledName,
701                                              QualifiedNameNode *Name) {
702   if (MangledName.empty()) {
703     Error = true;
704     return nullptr;
705   }
706 
707   // Read a variable.
708   switch (MangledName.front()) {
709   case '0':
710   case '1':
711   case '2':
712   case '3':
713   case '4': {
714     StorageClass SC = demangleVariableStorageClass(MangledName);
715     return demangleVariableEncoding(MangledName, SC);
716   }
717   }
718   FunctionSymbolNode *FSN = demangleFunctionEncoding(MangledName);
719 
720   IdentifierNode *UQN = Name->getUnqualifiedIdentifier();
721   if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) {
722     ConversionOperatorIdentifierNode *COIN =
723         static_cast<ConversionOperatorIdentifierNode *>(UQN);
724     if (FSN)
725       COIN->TargetType = FSN->Signature->ReturnType;
726   }
727   return FSN;
728 }
729 
730 SymbolNode *Demangler::demangleDeclarator(StringView &MangledName) {
731   // What follows is a main symbol name. This may include namespaces or class
732   // back references.
733   QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName);
734   if (Error)
735     return nullptr;
736 
737   SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN);
738   if (Error)
739     return nullptr;
740   Symbol->Name = QN;
741 
742   IdentifierNode *UQN = QN->getUnqualifiedIdentifier();
743   if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) {
744     ConversionOperatorIdentifierNode *COIN =
745         static_cast<ConversionOperatorIdentifierNode *>(UQN);
746     if (!COIN->TargetType) {
747       Error = true;
748       return nullptr;
749     }
750   }
751   return Symbol;
752 }
753 
754 SymbolNode *Demangler::demangleMD5Name(StringView &MangledName) {
755   assert(MangledName.startsWith("??@"));
756   // This is an MD5 mangled name.  We can't demangle it, just return the
757   // mangled name.
758   // An MD5 mangled name is ??@ followed by 32 characters and a terminating @.
759   size_t MD5Last = MangledName.find('@', strlen("??@"));
760   if (MD5Last == StringView::npos) {
761     Error = true;
762     return nullptr;
763   }
764   const char *Start = MangledName.begin();
765   MangledName = MangledName.dropFront(MD5Last + 1);
766 
767   // There are two additional special cases for MD5 names:
768   // 1. For complete object locators where the object name is long enough
769   //    for the object to have an MD5 name, the complete object locator is
770   //    called ??@...@??_R4@ (with a trailing "??_R4@" instead of the usual
771   //    leading "??_R4". This is handled here.
772   // 2. For catchable types, in versions of MSVC before 2015 (<1900) or after
773   //    2017.2 (>= 1914), the catchable type mangling is _CT??@...@??@...@8
774   //    instead of_CT??@...@8 with just one MD5 name. Since we don't yet
775   //    demangle catchable types anywhere, this isn't handled for MD5 names
776   //    either.
777   MangledName.consumeFront("??_R4@");
778 
779   StringView MD5(Start, MangledName.begin());
780   SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol);
781   S->Name = synthesizeQualifiedName(Arena, MD5);
782 
783   return S;
784 }
785 
786 SymbolNode *Demangler::demangleTypeinfoName(StringView &MangledName) {
787   assert(MangledName.startsWith('.'));
788   MangledName.consumeFront('.');
789 
790   TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result);
791   if (Error || !MangledName.empty()) {
792     Error = true;
793     return nullptr;
794   }
795   return synthesizeVariable(Arena, T, "`RTTI Type Descriptor Name'");
796 }
797 
798 // Parser entry point.
799 SymbolNode *Demangler::parse(StringView &MangledName) {
800   // Typeinfo names are strings stored in RTTI data. They're not symbol names.
801   // It's still useful to demangle them. They're the only demangled entity
802   // that doesn't start with a "?" but a ".".
803   if (MangledName.startsWith('.'))
804     return demangleTypeinfoName(MangledName);
805 
806   if (MangledName.startsWith("??@"))
807     return demangleMD5Name(MangledName);
808 
809   // MSVC-style mangled symbols must start with '?'.
810   if (!MangledName.startsWith('?')) {
811     Error = true;
812     return nullptr;
813   }
814 
815   MangledName.consumeFront('?');
816 
817   // ?$ is a template instantiation, but all other names that start with ? are
818   // operators / special names.
819   if (SymbolNode *SI = demangleSpecialIntrinsic(MangledName))
820     return SI;
821 
822   return demangleDeclarator(MangledName);
823 }
824 
825 TagTypeNode *Demangler::parseTagUniqueName(StringView &MangledName) {
826   if (!MangledName.consumeFront(".?A"))
827     return nullptr;
828   MangledName.consumeFront(".?A");
829   if (MangledName.empty())
830     return nullptr;
831 
832   return demangleClassType(MangledName);
833 }
834 
835 // <type-encoding> ::= <storage-class> <variable-type>
836 // <storage-class> ::= 0  # private static member
837 //                 ::= 1  # protected static member
838 //                 ::= 2  # public static member
839 //                 ::= 3  # global
840 //                 ::= 4  # static local
841 
842 VariableSymbolNode *Demangler::demangleVariableEncoding(StringView &MangledName,
843                                                         StorageClass SC) {
844   VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
845 
846   VSN->Type = demangleType(MangledName, QualifierMangleMode::Drop);
847   VSN->SC = SC;
848 
849   if (Error)
850     return nullptr;
851 
852   // <variable-type> ::= <type> <cvr-qualifiers>
853   //                 ::= <type> <pointee-cvr-qualifiers> # pointers, references
854   switch (VSN->Type->kind()) {
855   case NodeKind::PointerType: {
856     PointerTypeNode *PTN = static_cast<PointerTypeNode *>(VSN->Type);
857 
858     Qualifiers ExtraChildQuals = Q_None;
859     PTN->Quals = Qualifiers(VSN->Type->Quals |
860                             demanglePointerExtQualifiers(MangledName));
861 
862     bool IsMember = false;
863     std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName);
864 
865     if (PTN->ClassParent) {
866       QualifiedNameNode *BackRefName =
867           demangleFullyQualifiedTypeName(MangledName);
868       (void)BackRefName;
869     }
870     PTN->Pointee->Quals = Qualifiers(PTN->Pointee->Quals | ExtraChildQuals);
871 
872     break;
873   }
874   default:
875     VSN->Type->Quals = demangleQualifiers(MangledName).first;
876     break;
877   }
878 
879   return VSN;
880 }
881 
882 // Sometimes numbers are encoded in mangled symbols. For example,
883 // "int (*x)[20]" is a valid C type (x is a pointer to an array of
884 // length 20), so we need some way to embed numbers as part of symbols.
885 // This function parses it.
886 //
887 // <number>               ::= [?] <non-negative integer>
888 //
889 // <non-negative integer> ::= <decimal digit> # when 1 <= Number <= 10
890 //                        ::= <hex digit>+ @  # when Number == 0 or >= 10
891 //
892 // <hex-digit>            ::= [A-P]           # A = 0, B = 1, ...
893 std::pair<uint64_t, bool> Demangler::demangleNumber(StringView &MangledName) {
894   bool IsNegative = MangledName.consumeFront('?');
895 
896   if (startsWithDigit(MangledName)) {
897     uint64_t Ret = MangledName[0] - '0' + 1;
898     MangledName = MangledName.dropFront(1);
899     return {Ret, IsNegative};
900   }
901 
902   uint64_t Ret = 0;
903   for (size_t i = 0; i < MangledName.size(); ++i) {
904     char C = MangledName[i];
905     if (C == '@') {
906       MangledName = MangledName.dropFront(i + 1);
907       return {Ret, IsNegative};
908     }
909     if ('A' <= C && C <= 'P') {
910       Ret = (Ret << 4) + (C - 'A');
911       continue;
912     }
913     break;
914   }
915 
916   Error = true;
917   return {0ULL, false};
918 }
919 
920 uint64_t Demangler::demangleUnsigned(StringView &MangledName) {
921   bool IsNegative = false;
922   uint64_t Number = 0;
923   std::tie(Number, IsNegative) = demangleNumber(MangledName);
924   if (IsNegative)
925     Error = true;
926   return Number;
927 }
928 
929 int64_t Demangler::demangleSigned(StringView &MangledName) {
930   bool IsNegative = false;
931   uint64_t Number = 0;
932   std::tie(Number, IsNegative) = demangleNumber(MangledName);
933   if (Number > INT64_MAX)
934     Error = true;
935   int64_t I = static_cast<int64_t>(Number);
936   return IsNegative ? -I : I;
937 }
938 
939 // First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9.
940 // Memorize it.
941 void Demangler::memorizeString(StringView S) {
942   if (Backrefs.NamesCount >= BackrefContext::Max)
943     return;
944   for (size_t i = 0; i < Backrefs.NamesCount; ++i)
945     if (S == Backrefs.Names[i]->Name)
946       return;
947   NamedIdentifierNode *N = Arena.alloc<NamedIdentifierNode>();
948   N->Name = S;
949   Backrefs.Names[Backrefs.NamesCount++] = N;
950 }
951 
952 NamedIdentifierNode *Demangler::demangleBackRefName(StringView &MangledName) {
953   assert(startsWithDigit(MangledName));
954 
955   size_t I = MangledName[0] - '0';
956   if (I >= Backrefs.NamesCount) {
957     Error = true;
958     return nullptr;
959   }
960 
961   MangledName = MangledName.dropFront();
962   return Backrefs.Names[I];
963 }
964 
965 void Demangler::memorizeIdentifier(IdentifierNode *Identifier) {
966   // Render this class template name into a string buffer so that we can
967   // memorize it for the purpose of back-referencing.
968   OutputBuffer OB;
969   if (!initializeOutputBuffer(nullptr, nullptr, OB, 1024))
970     // FIXME: Propagate out-of-memory as an error?
971     std::terminate();
972   Identifier->output(OB, OF_Default);
973   OB << '\0';
974   char *Name = OB.getBuffer();
975 
976   StringView Owned = copyString(Name);
977   memorizeString(Owned);
978   std::free(Name);
979 }
980 
981 IdentifierNode *
982 Demangler::demangleTemplateInstantiationName(StringView &MangledName,
983                                              NameBackrefBehavior NBB) {
984   assert(MangledName.startsWith("?$"));
985   MangledName.consumeFront("?$");
986 
987   BackrefContext OuterContext;
988   std::swap(OuterContext, Backrefs);
989 
990   IdentifierNode *Identifier =
991       demangleUnqualifiedSymbolName(MangledName, NBB_Simple);
992   if (!Error)
993     Identifier->TemplateParams = demangleTemplateParameterList(MangledName);
994 
995   std::swap(OuterContext, Backrefs);
996   if (Error)
997     return nullptr;
998 
999   if (NBB & NBB_Template) {
1000     // NBB_Template is only set for types and non-leaf names ("a::" in "a::b").
1001     // Structors and conversion operators only makes sense in a leaf name, so
1002     // reject them in NBB_Template contexts.
1003     if (Identifier->kind() == NodeKind::ConversionOperatorIdentifier ||
1004         Identifier->kind() == NodeKind::StructorIdentifier) {
1005       Error = true;
1006       return nullptr;
1007     }
1008 
1009     memorizeIdentifier(Identifier);
1010   }
1011 
1012   return Identifier;
1013 }
1014 
1015 NamedIdentifierNode *Demangler::demangleSimpleName(StringView &MangledName,
1016                                                    bool Memorize) {
1017   StringView S = demangleSimpleString(MangledName, Memorize);
1018   if (Error)
1019     return nullptr;
1020 
1021   NamedIdentifierNode *Name = Arena.alloc<NamedIdentifierNode>();
1022   Name->Name = S;
1023   return Name;
1024 }
1025 
1026 static bool isRebasedHexDigit(char C) { return (C >= 'A' && C <= 'P'); }
1027 
1028 static uint8_t rebasedHexDigitToNumber(char C) {
1029   assert(isRebasedHexDigit(C));
1030   return (C <= 'J') ? (C - 'A') : (10 + C - 'K');
1031 }
1032 
1033 uint8_t Demangler::demangleCharLiteral(StringView &MangledName) {
1034   assert(!MangledName.empty());
1035   if (!MangledName.startsWith('?'))
1036     return MangledName.popFront();
1037 
1038   MangledName = MangledName.dropFront();
1039   if (MangledName.empty())
1040     goto CharLiteralError;
1041 
1042   if (MangledName.consumeFront('$')) {
1043     // Two hex digits
1044     if (MangledName.size() < 2)
1045       goto CharLiteralError;
1046     StringView Nibbles = MangledName.substr(0, 2);
1047     if (!isRebasedHexDigit(Nibbles[0]) || !isRebasedHexDigit(Nibbles[1]))
1048       goto CharLiteralError;
1049     // Don't append the null terminator.
1050     uint8_t C1 = rebasedHexDigitToNumber(Nibbles[0]);
1051     uint8_t C2 = rebasedHexDigitToNumber(Nibbles[1]);
1052     MangledName = MangledName.dropFront(2);
1053     return (C1 << 4) | C2;
1054   }
1055 
1056   if (startsWithDigit(MangledName)) {
1057     const char *Lookup = ",/\\:. \n\t'-";
1058     char C = Lookup[MangledName[0] - '0'];
1059     MangledName = MangledName.dropFront();
1060     return C;
1061   }
1062 
1063   if (MangledName[0] >= 'a' && MangledName[0] <= 'z') {
1064     char Lookup[26] = {'\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7',
1065                        '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE',
1066                        '\xEF', '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5',
1067                        '\xF6', '\xF7', '\xF8', '\xF9', '\xFA'};
1068     char C = Lookup[MangledName[0] - 'a'];
1069     MangledName = MangledName.dropFront();
1070     return C;
1071   }
1072 
1073   if (MangledName[0] >= 'A' && MangledName[0] <= 'Z') {
1074     char Lookup[26] = {'\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7',
1075                        '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE',
1076                        '\xCF', '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5',
1077                        '\xD6', '\xD7', '\xD8', '\xD9', '\xDA'};
1078     char C = Lookup[MangledName[0] - 'A'];
1079     MangledName = MangledName.dropFront();
1080     return C;
1081   }
1082 
1083 CharLiteralError:
1084   Error = true;
1085   return '\0';
1086 }
1087 
1088 wchar_t Demangler::demangleWcharLiteral(StringView &MangledName) {
1089   uint8_t C1, C2;
1090 
1091   C1 = demangleCharLiteral(MangledName);
1092   if (Error || MangledName.empty())
1093     goto WCharLiteralError;
1094   C2 = demangleCharLiteral(MangledName);
1095   if (Error)
1096     goto WCharLiteralError;
1097 
1098   return ((wchar_t)C1 << 8) | (wchar_t)C2;
1099 
1100 WCharLiteralError:
1101   Error = true;
1102   return L'\0';
1103 }
1104 
1105 static void writeHexDigit(char *Buffer, uint8_t Digit) {
1106   assert(Digit <= 15);
1107   *Buffer = (Digit < 10) ? ('0' + Digit) : ('A' + Digit - 10);
1108 }
1109 
1110 static void outputHex(OutputBuffer &OB, unsigned C) {
1111   assert (C != 0);
1112 
1113   // It's easier to do the math if we can work from right to left, but we need
1114   // to print the numbers from left to right.  So render this into a temporary
1115   // buffer first, then output the temporary buffer.  Each byte is of the form
1116   // \xAB, which means that each byte needs 4 characters.  Since there are at
1117   // most 4 bytes, we need a 4*4+1 = 17 character temporary buffer.
1118   char TempBuffer[17];
1119 
1120   ::memset(TempBuffer, 0, sizeof(TempBuffer));
1121   constexpr int MaxPos = sizeof(TempBuffer) - 1;
1122 
1123   int Pos = MaxPos - 1; // TempBuffer[MaxPos] is the terminating \0.
1124   while (C != 0) {
1125     for (int I = 0; I < 2; ++I) {
1126       writeHexDigit(&TempBuffer[Pos--], C % 16);
1127       C /= 16;
1128     }
1129   }
1130   TempBuffer[Pos--] = 'x';
1131   assert(Pos >= 0);
1132   TempBuffer[Pos--] = '\\';
1133   OB << StringView(&TempBuffer[Pos + 1]);
1134 }
1135 
1136 static void outputEscapedChar(OutputBuffer &OB, unsigned C) {
1137   switch (C) {
1138   case '\0': // nul
1139     OB << "\\0";
1140     return;
1141   case '\'': // single quote
1142     OB << "\\\'";
1143     return;
1144   case '\"': // double quote
1145     OB << "\\\"";
1146     return;
1147   case '\\': // backslash
1148     OB << "\\\\";
1149     return;
1150   case '\a': // bell
1151     OB << "\\a";
1152     return;
1153   case '\b': // backspace
1154     OB << "\\b";
1155     return;
1156   case '\f': // form feed
1157     OB << "\\f";
1158     return;
1159   case '\n': // new line
1160     OB << "\\n";
1161     return;
1162   case '\r': // carriage return
1163     OB << "\\r";
1164     return;
1165   case '\t': // tab
1166     OB << "\\t";
1167     return;
1168   case '\v': // vertical tab
1169     OB << "\\v";
1170     return;
1171   default:
1172     break;
1173   }
1174 
1175   if (C > 0x1F && C < 0x7F) {
1176     // Standard ascii char.
1177     OB << (char)C;
1178     return;
1179   }
1180 
1181   outputHex(OB, C);
1182 }
1183 
1184 static unsigned countTrailingNullBytes(const uint8_t *StringBytes, int Length) {
1185   const uint8_t *End = StringBytes + Length - 1;
1186   unsigned Count = 0;
1187   while (Length > 0 && *End == 0) {
1188     --Length;
1189     --End;
1190     ++Count;
1191   }
1192   return Count;
1193 }
1194 
1195 static unsigned countEmbeddedNulls(const uint8_t *StringBytes,
1196                                    unsigned Length) {
1197   unsigned Result = 0;
1198   for (unsigned I = 0; I < Length; ++I) {
1199     if (*StringBytes++ == 0)
1200       ++Result;
1201   }
1202   return Result;
1203 }
1204 
1205 // A mangled (non-wide) string literal stores the total length of the string it
1206 // refers to (passed in NumBytes), and it contains up to 32 bytes of actual text
1207 // (passed in StringBytes, NumChars).
1208 static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars,
1209                                   uint64_t NumBytes) {
1210   assert(NumBytes > 0);
1211 
1212   // If the number of bytes is odd, this is guaranteed to be a char string.
1213   if (NumBytes % 2 == 1)
1214     return 1;
1215 
1216   // All strings can encode at most 32 bytes of data.  If it's less than that,
1217   // then we encoded the entire string.  In this case we check for a 1-byte,
1218   // 2-byte, or 4-byte null terminator.
1219   if (NumBytes < 32) {
1220     unsigned TrailingNulls = countTrailingNullBytes(StringBytes, NumChars);
1221     if (TrailingNulls >= 4 && NumBytes % 4 == 0)
1222       return 4;
1223     if (TrailingNulls >= 2)
1224       return 2;
1225     return 1;
1226   }
1227 
1228   // The whole string was not able to be encoded.  Try to look at embedded null
1229   // terminators to guess.  The heuristic is that we count all embedded null
1230   // terminators.  If more than 2/3 are null, it's a char32.  If more than 1/3
1231   // are null, it's a char16.  Otherwise it's a char8.  This obviously isn't
1232   // perfect and is biased towards languages that have ascii alphabets, but this
1233   // was always going to be best effort since the encoding is lossy.
1234   unsigned Nulls = countEmbeddedNulls(StringBytes, NumChars);
1235   if (Nulls >= 2 * NumChars / 3 && NumBytes % 4 == 0)
1236     return 4;
1237   if (Nulls >= NumChars / 3)
1238     return 2;
1239   return 1;
1240 }
1241 
1242 static unsigned decodeMultiByteChar(const uint8_t *StringBytes,
1243                                     unsigned CharIndex, unsigned CharBytes) {
1244   assert(CharBytes == 1 || CharBytes == 2 || CharBytes == 4);
1245   unsigned Offset = CharIndex * CharBytes;
1246   unsigned Result = 0;
1247   StringBytes = StringBytes + Offset;
1248   for (unsigned I = 0; I < CharBytes; ++I) {
1249     unsigned C = static_cast<unsigned>(StringBytes[I]);
1250     Result |= C << (8 * I);
1251   }
1252   return Result;
1253 }
1254 
1255 FunctionSymbolNode *Demangler::demangleVcallThunkNode(StringView &MangledName) {
1256   FunctionSymbolNode *FSN = Arena.alloc<FunctionSymbolNode>();
1257   VcallThunkIdentifierNode *VTIN = Arena.alloc<VcallThunkIdentifierNode>();
1258   FSN->Signature = Arena.alloc<ThunkSignatureNode>();
1259   FSN->Signature->FunctionClass = FC_NoParameterList;
1260 
1261   FSN->Name = demangleNameScopeChain(MangledName, VTIN);
1262   if (!Error)
1263     Error = !MangledName.consumeFront("$B");
1264   if (!Error)
1265     VTIN->OffsetInVTable = demangleUnsigned(MangledName);
1266   if (!Error)
1267     Error = !MangledName.consumeFront('A');
1268   if (!Error)
1269     FSN->Signature->CallConvention = demangleCallingConvention(MangledName);
1270   return (Error) ? nullptr : FSN;
1271 }
1272 
1273 EncodedStringLiteralNode *
1274 Demangler::demangleStringLiteral(StringView &MangledName) {
1275   // This function uses goto, so declare all variables up front.
1276   OutputBuffer OB;
1277   StringView CRC;
1278   uint64_t StringByteSize;
1279   bool IsWcharT = false;
1280   bool IsNegative = false;
1281   size_t CrcEndPos = 0;
1282   char *ResultBuffer = nullptr;
1283 
1284   EncodedStringLiteralNode *Result = Arena.alloc<EncodedStringLiteralNode>();
1285 
1286   // Must happen before the first `goto StringLiteralError`.
1287   if (!initializeOutputBuffer(nullptr, nullptr, OB, 1024))
1288     // FIXME: Propagate out-of-memory as an error?
1289     std::terminate();
1290 
1291   // Prefix indicating the beginning of a string literal
1292   if (!MangledName.consumeFront("@_"))
1293     goto StringLiteralError;
1294   if (MangledName.empty())
1295     goto StringLiteralError;
1296 
1297   // Char Type (regular or wchar_t)
1298   switch (MangledName.popFront()) {
1299   case '1':
1300     IsWcharT = true;
1301     DEMANGLE_FALLTHROUGH;
1302   case '0':
1303     break;
1304   default:
1305     goto StringLiteralError;
1306   }
1307 
1308   // Encoded Length
1309   std::tie(StringByteSize, IsNegative) = demangleNumber(MangledName);
1310   if (Error || IsNegative || StringByteSize < (IsWcharT ? 2 : 1))
1311     goto StringLiteralError;
1312 
1313   // CRC 32 (always 8 characters plus a terminator)
1314   CrcEndPos = MangledName.find('@');
1315   if (CrcEndPos == StringView::npos)
1316     goto StringLiteralError;
1317   CRC = MangledName.substr(0, CrcEndPos);
1318   MangledName = MangledName.dropFront(CrcEndPos + 1);
1319   if (MangledName.empty())
1320     goto StringLiteralError;
1321 
1322   if (IsWcharT) {
1323     Result->Char = CharKind::Wchar;
1324     if (StringByteSize > 64)
1325       Result->IsTruncated = true;
1326 
1327     while (!MangledName.consumeFront('@')) {
1328       if (MangledName.size() < 2)
1329         goto StringLiteralError;
1330       wchar_t W = demangleWcharLiteral(MangledName);
1331       if (StringByteSize != 2 || Result->IsTruncated)
1332         outputEscapedChar(OB, W);
1333       StringByteSize -= 2;
1334       if (Error)
1335         goto StringLiteralError;
1336     }
1337   } else {
1338     // The max byte length is actually 32, but some compilers mangled strings
1339     // incorrectly, so we have to assume it can go higher.
1340     constexpr unsigned MaxStringByteLength = 32 * 4;
1341     uint8_t StringBytes[MaxStringByteLength];
1342 
1343     unsigned BytesDecoded = 0;
1344     while (!MangledName.consumeFront('@')) {
1345       if (MangledName.size() < 1 || BytesDecoded >= MaxStringByteLength)
1346         goto StringLiteralError;
1347       StringBytes[BytesDecoded++] = demangleCharLiteral(MangledName);
1348     }
1349 
1350     if (StringByteSize > BytesDecoded)
1351       Result->IsTruncated = true;
1352 
1353     unsigned CharBytes =
1354         guessCharByteSize(StringBytes, BytesDecoded, StringByteSize);
1355     assert(StringByteSize % CharBytes == 0);
1356     switch (CharBytes) {
1357     case 1:
1358       Result->Char = CharKind::Char;
1359       break;
1360     case 2:
1361       Result->Char = CharKind::Char16;
1362       break;
1363     case 4:
1364       Result->Char = CharKind::Char32;
1365       break;
1366     default:
1367       DEMANGLE_UNREACHABLE;
1368     }
1369     const unsigned NumChars = BytesDecoded / CharBytes;
1370     for (unsigned CharIndex = 0; CharIndex < NumChars; ++CharIndex) {
1371       unsigned NextChar =
1372           decodeMultiByteChar(StringBytes, CharIndex, CharBytes);
1373       if (CharIndex + 1 < NumChars || Result->IsTruncated)
1374         outputEscapedChar(OB, NextChar);
1375     }
1376   }
1377 
1378   OB << '\0';
1379   ResultBuffer = OB.getBuffer();
1380   Result->DecodedString = copyString(ResultBuffer);
1381   std::free(ResultBuffer);
1382   return Result;
1383 
1384 StringLiteralError:
1385   Error = true;
1386   std::free(OB.getBuffer());
1387   return nullptr;
1388 }
1389 
1390 // Returns MangledName's prefix before the first '@', or an error if
1391 // MangledName contains no '@' or the prefix has length 0.
1392 StringView Demangler::demangleSimpleString(StringView &MangledName,
1393                                            bool Memorize) {
1394   StringView S;
1395   for (size_t i = 0; i < MangledName.size(); ++i) {
1396     if (MangledName[i] != '@')
1397       continue;
1398     if (i == 0)
1399       break;
1400     S = MangledName.substr(0, i);
1401     MangledName = MangledName.dropFront(i + 1);
1402 
1403     if (Memorize)
1404       memorizeString(S);
1405     return S;
1406   }
1407 
1408   Error = true;
1409   return {};
1410 }
1411 
1412 NamedIdentifierNode *
1413 Demangler::demangleAnonymousNamespaceName(StringView &MangledName) {
1414   assert(MangledName.startsWith("?A"));
1415   MangledName.consumeFront("?A");
1416 
1417   NamedIdentifierNode *Node = Arena.alloc<NamedIdentifierNode>();
1418   Node->Name = "`anonymous namespace'";
1419   size_t EndPos = MangledName.find('@');
1420   if (EndPos == StringView::npos) {
1421     Error = true;
1422     return nullptr;
1423   }
1424   StringView NamespaceKey = MangledName.substr(0, EndPos);
1425   memorizeString(NamespaceKey);
1426   MangledName = MangledName.substr(EndPos + 1);
1427   return Node;
1428 }
1429 
1430 NamedIdentifierNode *
1431 Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) {
1432   assert(startsWithLocalScopePattern(MangledName));
1433 
1434   NamedIdentifierNode *Identifier = Arena.alloc<NamedIdentifierNode>();
1435   MangledName.consumeFront('?');
1436   uint64_t Number = 0;
1437   bool IsNegative = false;
1438   std::tie(Number, IsNegative) = demangleNumber(MangledName);
1439   assert(!IsNegative);
1440 
1441   // One ? to terminate the number
1442   MangledName.consumeFront('?');
1443 
1444   assert(!Error);
1445   Node *Scope = parse(MangledName);
1446   if (Error)
1447     return nullptr;
1448 
1449   // Render the parent symbol's name into a buffer.
1450   OutputBuffer OB;
1451   if (!initializeOutputBuffer(nullptr, nullptr, OB, 1024))
1452     // FIXME: Propagate out-of-memory as an error?
1453     std::terminate();
1454   OB << '`';
1455   Scope->output(OB, OF_Default);
1456   OB << '\'';
1457   OB << "::`" << Number << "'";
1458   OB << '\0';
1459   char *Result = OB.getBuffer();
1460   Identifier->Name = copyString(Result);
1461   std::free(Result);
1462   return Identifier;
1463 }
1464 
1465 // Parses a type name in the form of A@B@C@@ which represents C::B::A.
1466 QualifiedNameNode *
1467 Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) {
1468   IdentifierNode *Identifier =
1469       demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true);
1470   if (Error)
1471     return nullptr;
1472   assert(Identifier);
1473 
1474   QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier);
1475   if (Error)
1476     return nullptr;
1477   assert(QN);
1478   return QN;
1479 }
1480 
1481 // Parses a symbol name in the form of A@B@C@@ which represents C::B::A.
1482 // Symbol names have slightly different rules regarding what can appear
1483 // so we separate out the implementations for flexibility.
1484 QualifiedNameNode *
1485 Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) {
1486   // This is the final component of a symbol name (i.e. the leftmost component
1487   // of a mangled name.  Since the only possible template instantiation that
1488   // can appear in this context is a function template, and since those are
1489   // not saved for the purposes of name backreferences, only backref simple
1490   // names.
1491   IdentifierNode *Identifier =
1492       demangleUnqualifiedSymbolName(MangledName, NBB_Simple);
1493   if (Error)
1494     return nullptr;
1495 
1496   QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier);
1497   if (Error)
1498     return nullptr;
1499 
1500   if (Identifier->kind() == NodeKind::StructorIdentifier) {
1501     if (QN->Components->Count < 2) {
1502       Error = true;
1503       return nullptr;
1504     }
1505     StructorIdentifierNode *SIN =
1506         static_cast<StructorIdentifierNode *>(Identifier);
1507     Node *ClassNode = QN->Components->Nodes[QN->Components->Count - 2];
1508     SIN->Class = static_cast<IdentifierNode *>(ClassNode);
1509   }
1510   assert(QN);
1511   return QN;
1512 }
1513 
1514 IdentifierNode *Demangler::demangleUnqualifiedTypeName(StringView &MangledName,
1515                                                        bool Memorize) {
1516   // An inner-most name can be a back-reference, because a fully-qualified name
1517   // (e.g. Scope + Inner) can contain other fully qualified names inside of
1518   // them (for example template parameters), and these nested parameters can
1519   // refer to previously mangled types.
1520   if (startsWithDigit(MangledName))
1521     return demangleBackRefName(MangledName);
1522 
1523   if (MangledName.startsWith("?$"))
1524     return demangleTemplateInstantiationName(MangledName, NBB_Template);
1525 
1526   return demangleSimpleName(MangledName, Memorize);
1527 }
1528 
1529 IdentifierNode *
1530 Demangler::demangleUnqualifiedSymbolName(StringView &MangledName,
1531                                          NameBackrefBehavior NBB) {
1532   if (startsWithDigit(MangledName))
1533     return demangleBackRefName(MangledName);
1534   if (MangledName.startsWith("?$"))
1535     return demangleTemplateInstantiationName(MangledName, NBB);
1536   if (MangledName.startsWith('?'))
1537     return demangleFunctionIdentifierCode(MangledName);
1538   return demangleSimpleName(MangledName, /*Memorize=*/(NBB & NBB_Simple) != 0);
1539 }
1540 
1541 IdentifierNode *Demangler::demangleNameScopePiece(StringView &MangledName) {
1542   if (startsWithDigit(MangledName))
1543     return demangleBackRefName(MangledName);
1544 
1545   if (MangledName.startsWith("?$"))
1546     return demangleTemplateInstantiationName(MangledName, NBB_Template);
1547 
1548   if (MangledName.startsWith("?A"))
1549     return demangleAnonymousNamespaceName(MangledName);
1550 
1551   if (startsWithLocalScopePattern(MangledName))
1552     return demangleLocallyScopedNamePiece(MangledName);
1553 
1554   return demangleSimpleName(MangledName, /*Memorize=*/true);
1555 }
1556 
1557 static NodeArrayNode *nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head,
1558                                           size_t Count) {
1559   NodeArrayNode *N = Arena.alloc<NodeArrayNode>();
1560   N->Count = Count;
1561   N->Nodes = Arena.allocArray<Node *>(Count);
1562   for (size_t I = 0; I < Count; ++I) {
1563     N->Nodes[I] = Head->N;
1564     Head = Head->Next;
1565   }
1566   return N;
1567 }
1568 
1569 QualifiedNameNode *
1570 Demangler::demangleNameScopeChain(StringView &MangledName,
1571                                   IdentifierNode *UnqualifiedName) {
1572   NodeList *Head = Arena.alloc<NodeList>();
1573 
1574   Head->N = UnqualifiedName;
1575 
1576   size_t Count = 1;
1577   while (!MangledName.consumeFront("@")) {
1578     ++Count;
1579     NodeList *NewHead = Arena.alloc<NodeList>();
1580     NewHead->Next = Head;
1581     Head = NewHead;
1582 
1583     if (MangledName.empty()) {
1584       Error = true;
1585       return nullptr;
1586     }
1587 
1588     assert(!Error);
1589     IdentifierNode *Elem = demangleNameScopePiece(MangledName);
1590     if (Error)
1591       return nullptr;
1592 
1593     Head->N = Elem;
1594   }
1595 
1596   QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>();
1597   QN->Components = nodeListToNodeArray(Arena, Head, Count);
1598   return QN;
1599 }
1600 
1601 FuncClass Demangler::demangleFunctionClass(StringView &MangledName) {
1602   switch (MangledName.popFront()) {
1603   case '9':
1604     return FuncClass(FC_ExternC | FC_NoParameterList);
1605   case 'A':
1606     return FC_Private;
1607   case 'B':
1608     return FuncClass(FC_Private | FC_Far);
1609   case 'C':
1610     return FuncClass(FC_Private | FC_Static);
1611   case 'D':
1612     return FuncClass(FC_Private | FC_Static | FC_Far);
1613   case 'E':
1614     return FuncClass(FC_Private | FC_Virtual);
1615   case 'F':
1616     return FuncClass(FC_Private | FC_Virtual | FC_Far);
1617   case 'G':
1618     return FuncClass(FC_Private | FC_StaticThisAdjust);
1619   case 'H':
1620     return FuncClass(FC_Private | FC_StaticThisAdjust | FC_Far);
1621   case 'I':
1622     return FuncClass(FC_Protected);
1623   case 'J':
1624     return FuncClass(FC_Protected | FC_Far);
1625   case 'K':
1626     return FuncClass(FC_Protected | FC_Static);
1627   case 'L':
1628     return FuncClass(FC_Protected | FC_Static | FC_Far);
1629   case 'M':
1630     return FuncClass(FC_Protected | FC_Virtual);
1631   case 'N':
1632     return FuncClass(FC_Protected | FC_Virtual | FC_Far);
1633   case 'O':
1634     return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust);
1635   case 'P':
1636     return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust | FC_Far);
1637   case 'Q':
1638     return FuncClass(FC_Public);
1639   case 'R':
1640     return FuncClass(FC_Public | FC_Far);
1641   case 'S':
1642     return FuncClass(FC_Public | FC_Static);
1643   case 'T':
1644     return FuncClass(FC_Public | FC_Static | FC_Far);
1645   case 'U':
1646     return FuncClass(FC_Public | FC_Virtual);
1647   case 'V':
1648     return FuncClass(FC_Public | FC_Virtual | FC_Far);
1649   case 'W':
1650     return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust);
1651   case 'X':
1652     return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust | FC_Far);
1653   case 'Y':
1654     return FuncClass(FC_Global);
1655   case 'Z':
1656     return FuncClass(FC_Global | FC_Far);
1657   case '$': {
1658     FuncClass VFlag = FC_VirtualThisAdjust;
1659     if (MangledName.consumeFront('R'))
1660       VFlag = FuncClass(VFlag | FC_VirtualThisAdjustEx);
1661     if (MangledName.empty())
1662       break;
1663     switch (MangledName.popFront()) {
1664     case '0':
1665       return FuncClass(FC_Private | FC_Virtual | VFlag);
1666     case '1':
1667       return FuncClass(FC_Private | FC_Virtual | VFlag | FC_Far);
1668     case '2':
1669       return FuncClass(FC_Protected | FC_Virtual | VFlag);
1670     case '3':
1671       return FuncClass(FC_Protected | FC_Virtual | VFlag | FC_Far);
1672     case '4':
1673       return FuncClass(FC_Public | FC_Virtual | VFlag);
1674     case '5':
1675       return FuncClass(FC_Public | FC_Virtual | VFlag | FC_Far);
1676     }
1677   }
1678   }
1679 
1680   Error = true;
1681   return FC_Public;
1682 }
1683 
1684 CallingConv Demangler::demangleCallingConvention(StringView &MangledName) {
1685   if (MangledName.empty()) {
1686     Error = true;
1687     return CallingConv::None;
1688   }
1689 
1690   switch (MangledName.popFront()) {
1691   case 'A':
1692   case 'B':
1693     return CallingConv::Cdecl;
1694   case 'C':
1695   case 'D':
1696     return CallingConv::Pascal;
1697   case 'E':
1698   case 'F':
1699     return CallingConv::Thiscall;
1700   case 'G':
1701   case 'H':
1702     return CallingConv::Stdcall;
1703   case 'I':
1704   case 'J':
1705     return CallingConv::Fastcall;
1706   case 'M':
1707   case 'N':
1708     return CallingConv::Clrcall;
1709   case 'O':
1710   case 'P':
1711     return CallingConv::Eabi;
1712   case 'Q':
1713     return CallingConv::Vectorcall;
1714   case 'S':
1715     return CallingConv::Swift;
1716   case 'W':
1717     return CallingConv::SwiftAsync;
1718   }
1719 
1720   return CallingConv::None;
1721 }
1722 
1723 StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) {
1724   assert(MangledName.front() >= '0' && MangledName.front() <= '4');
1725 
1726   switch (MangledName.popFront()) {
1727   case '0':
1728     return StorageClass::PrivateStatic;
1729   case '1':
1730     return StorageClass::ProtectedStatic;
1731   case '2':
1732     return StorageClass::PublicStatic;
1733   case '3':
1734     return StorageClass::Global;
1735   case '4':
1736     return StorageClass::FunctionLocalStatic;
1737   }
1738   DEMANGLE_UNREACHABLE;
1739 }
1740 
1741 std::pair<Qualifiers, bool>
1742 Demangler::demangleQualifiers(StringView &MangledName) {
1743   if (MangledName.empty()) {
1744     Error = true;
1745     return std::make_pair(Q_None, false);
1746   }
1747 
1748   switch (MangledName.popFront()) {
1749   // Member qualifiers
1750   case 'Q':
1751     return std::make_pair(Q_None, true);
1752   case 'R':
1753     return std::make_pair(Q_Const, true);
1754   case 'S':
1755     return std::make_pair(Q_Volatile, true);
1756   case 'T':
1757     return std::make_pair(Qualifiers(Q_Const | Q_Volatile), true);
1758   // Non-Member qualifiers
1759   case 'A':
1760     return std::make_pair(Q_None, false);
1761   case 'B':
1762     return std::make_pair(Q_Const, false);
1763   case 'C':
1764     return std::make_pair(Q_Volatile, false);
1765   case 'D':
1766     return std::make_pair(Qualifiers(Q_Const | Q_Volatile), false);
1767   }
1768   Error = true;
1769   return std::make_pair(Q_None, false);
1770 }
1771 
1772 // <variable-type> ::= <type> <cvr-qualifiers>
1773 //                 ::= <type> <pointee-cvr-qualifiers> # pointers, references
1774 TypeNode *Demangler::demangleType(StringView &MangledName,
1775                                   QualifierMangleMode QMM) {
1776   Qualifiers Quals = Q_None;
1777   bool IsMember = false;
1778   if (QMM == QualifierMangleMode::Mangle) {
1779     std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
1780   } else if (QMM == QualifierMangleMode::Result) {
1781     if (MangledName.consumeFront('?'))
1782       std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
1783   }
1784 
1785   if (MangledName.empty()) {
1786     Error = true;
1787     return nullptr;
1788   }
1789 
1790   TypeNode *Ty = nullptr;
1791   if (isTagType(MangledName))
1792     Ty = demangleClassType(MangledName);
1793   else if (isPointerType(MangledName)) {
1794     if (isMemberPointer(MangledName, Error))
1795       Ty = demangleMemberPointerType(MangledName);
1796     else if (!Error)
1797       Ty = demanglePointerType(MangledName);
1798     else
1799       return nullptr;
1800   } else if (isArrayType(MangledName))
1801     Ty = demangleArrayType(MangledName);
1802   else if (isFunctionType(MangledName)) {
1803     if (MangledName.consumeFront("$$A8@@"))
1804       Ty = demangleFunctionType(MangledName, true);
1805     else {
1806       assert(MangledName.startsWith("$$A6"));
1807       MangledName.consumeFront("$$A6");
1808       Ty = demangleFunctionType(MangledName, false);
1809     }
1810   } else if (isCustomType(MangledName)) {
1811     Ty = demangleCustomType(MangledName);
1812   } else {
1813     Ty = demanglePrimitiveType(MangledName);
1814   }
1815 
1816   if (!Ty || Error)
1817     return Ty;
1818   Ty->Quals = Qualifiers(Ty->Quals | Quals);
1819   return Ty;
1820 }
1821 
1822 bool Demangler::demangleThrowSpecification(StringView &MangledName) {
1823   if (MangledName.consumeFront("_E"))
1824     return true;
1825   if (MangledName.consumeFront('Z'))
1826     return false;
1827 
1828   Error = true;
1829   return false;
1830 }
1831 
1832 FunctionSignatureNode *Demangler::demangleFunctionType(StringView &MangledName,
1833                                                        bool HasThisQuals) {
1834   FunctionSignatureNode *FTy = Arena.alloc<FunctionSignatureNode>();
1835 
1836   if (HasThisQuals) {
1837     FTy->Quals = demanglePointerExtQualifiers(MangledName);
1838     FTy->RefQualifier = demangleFunctionRefQualifier(MangledName);
1839     FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first);
1840   }
1841 
1842   // Fields that appear on both member and non-member functions.
1843   FTy->CallConvention = demangleCallingConvention(MangledName);
1844 
1845   // <return-type> ::= <type>
1846   //               ::= @ # structors (they have no declared return type)
1847   bool IsStructor = MangledName.consumeFront('@');
1848   if (!IsStructor)
1849     FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result);
1850 
1851   FTy->Params = demangleFunctionParameterList(MangledName, FTy->IsVariadic);
1852 
1853   FTy->IsNoexcept = demangleThrowSpecification(MangledName);
1854 
1855   return FTy;
1856 }
1857 
1858 FunctionSymbolNode *
1859 Demangler::demangleFunctionEncoding(StringView &MangledName) {
1860   FuncClass ExtraFlags = FC_None;
1861   if (MangledName.consumeFront("$$J0"))
1862     ExtraFlags = FC_ExternC;
1863 
1864   if (MangledName.empty()) {
1865     Error = true;
1866     return nullptr;
1867   }
1868 
1869   FuncClass FC = demangleFunctionClass(MangledName);
1870   FC = FuncClass(ExtraFlags | FC);
1871 
1872   FunctionSignatureNode *FSN = nullptr;
1873   ThunkSignatureNode *TTN = nullptr;
1874   if (FC & FC_StaticThisAdjust) {
1875     TTN = Arena.alloc<ThunkSignatureNode>();
1876     TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName);
1877   } else if (FC & FC_VirtualThisAdjust) {
1878     TTN = Arena.alloc<ThunkSignatureNode>();
1879     if (FC & FC_VirtualThisAdjustEx) {
1880       TTN->ThisAdjust.VBPtrOffset = demangleSigned(MangledName);
1881       TTN->ThisAdjust.VBOffsetOffset = demangleSigned(MangledName);
1882     }
1883     TTN->ThisAdjust.VtordispOffset = demangleSigned(MangledName);
1884     TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName);
1885   }
1886 
1887   if (FC & FC_NoParameterList) {
1888     // This is an extern "C" function whose full signature hasn't been mangled.
1889     // This happens when we need to mangle a local symbol inside of an extern
1890     // "C" function.
1891     FSN = Arena.alloc<FunctionSignatureNode>();
1892   } else {
1893     bool HasThisQuals = !(FC & (FC_Global | FC_Static));
1894     FSN = demangleFunctionType(MangledName, HasThisQuals);
1895   }
1896 
1897   if (Error)
1898     return nullptr;
1899 
1900   if (TTN) {
1901     *static_cast<FunctionSignatureNode *>(TTN) = *FSN;
1902     FSN = TTN;
1903   }
1904   FSN->FunctionClass = FC;
1905 
1906   FunctionSymbolNode *Symbol = Arena.alloc<FunctionSymbolNode>();
1907   Symbol->Signature = FSN;
1908   return Symbol;
1909 }
1910 
1911 CustomTypeNode *Demangler::demangleCustomType(StringView &MangledName) {
1912   assert(MangledName.startsWith('?'));
1913   MangledName.popFront();
1914 
1915   CustomTypeNode *CTN = Arena.alloc<CustomTypeNode>();
1916   CTN->Identifier = demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true);
1917   if (!MangledName.consumeFront('@'))
1918     Error = true;
1919   if (Error)
1920     return nullptr;
1921   return CTN;
1922 }
1923 
1924 // Reads a primitive type.
1925 PrimitiveTypeNode *Demangler::demanglePrimitiveType(StringView &MangledName) {
1926   if (MangledName.consumeFront("$$T"))
1927     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Nullptr);
1928 
1929   switch (MangledName.popFront()) {
1930   case 'X':
1931     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Void);
1932   case 'D':
1933     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char);
1934   case 'C':
1935     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Schar);
1936   case 'E':
1937     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uchar);
1938   case 'F':
1939     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Short);
1940   case 'G':
1941     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ushort);
1942   case 'H':
1943     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int);
1944   case 'I':
1945     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint);
1946   case 'J':
1947     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Long);
1948   case 'K':
1949     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ulong);
1950   case 'M':
1951     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Float);
1952   case 'N':
1953     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Double);
1954   case 'O':
1955     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ldouble);
1956   case '_': {
1957     if (MangledName.empty()) {
1958       Error = true;
1959       return nullptr;
1960     }
1961     switch (MangledName.popFront()) {
1962     case 'N':
1963       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Bool);
1964     case 'J':
1965       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int64);
1966     case 'K':
1967       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint64);
1968     case 'W':
1969       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Wchar);
1970     case 'Q':
1971       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char8);
1972     case 'S':
1973       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char16);
1974     case 'U':
1975       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char32);
1976     }
1977     break;
1978   }
1979   }
1980   Error = true;
1981   return nullptr;
1982 }
1983 
1984 TagTypeNode *Demangler::demangleClassType(StringView &MangledName) {
1985   TagTypeNode *TT = nullptr;
1986 
1987   switch (MangledName.popFront()) {
1988   case 'T':
1989     TT = Arena.alloc<TagTypeNode>(TagKind::Union);
1990     break;
1991   case 'U':
1992     TT = Arena.alloc<TagTypeNode>(TagKind::Struct);
1993     break;
1994   case 'V':
1995     TT = Arena.alloc<TagTypeNode>(TagKind::Class);
1996     break;
1997   case 'W':
1998     if (!MangledName.consumeFront('4')) {
1999       Error = true;
2000       return nullptr;
2001     }
2002     TT = Arena.alloc<TagTypeNode>(TagKind::Enum);
2003     break;
2004   default:
2005     assert(false);
2006   }
2007 
2008   TT->QualifiedName = demangleFullyQualifiedTypeName(MangledName);
2009   return TT;
2010 }
2011 
2012 // <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type>
2013 //                       # the E is required for 64-bit non-static pointers
2014 PointerTypeNode *Demangler::demanglePointerType(StringView &MangledName) {
2015   PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>();
2016 
2017   std::tie(Pointer->Quals, Pointer->Affinity) =
2018       demanglePointerCVQualifiers(MangledName);
2019 
2020   if (MangledName.consumeFront("6")) {
2021     Pointer->Pointee = demangleFunctionType(MangledName, false);
2022     return Pointer;
2023   }
2024 
2025   Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
2026   Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
2027 
2028   Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Mangle);
2029   return Pointer;
2030 }
2031 
2032 PointerTypeNode *Demangler::demangleMemberPointerType(StringView &MangledName) {
2033   PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>();
2034 
2035   std::tie(Pointer->Quals, Pointer->Affinity) =
2036       demanglePointerCVQualifiers(MangledName);
2037   assert(Pointer->Affinity == PointerAffinity::Pointer);
2038 
2039   Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
2040   Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
2041 
2042   // isMemberPointer() only returns true if there is at least one character
2043   // after the qualifiers.
2044   if (MangledName.consumeFront("8")) {
2045     Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
2046     Pointer->Pointee = demangleFunctionType(MangledName, true);
2047   } else {
2048     Qualifiers PointeeQuals = Q_None;
2049     bool IsMember = false;
2050     std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName);
2051     assert(IsMember || Error);
2052     Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
2053 
2054     Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop);
2055     if (Pointer->Pointee)
2056       Pointer->Pointee->Quals = PointeeQuals;
2057   }
2058 
2059   return Pointer;
2060 }
2061 
2062 Qualifiers Demangler::demanglePointerExtQualifiers(StringView &MangledName) {
2063   Qualifiers Quals = Q_None;
2064   if (MangledName.consumeFront('E'))
2065     Quals = Qualifiers(Quals | Q_Pointer64);
2066   if (MangledName.consumeFront('I'))
2067     Quals = Qualifiers(Quals | Q_Restrict);
2068   if (MangledName.consumeFront('F'))
2069     Quals = Qualifiers(Quals | Q_Unaligned);
2070 
2071   return Quals;
2072 }
2073 
2074 ArrayTypeNode *Demangler::demangleArrayType(StringView &MangledName) {
2075   assert(MangledName.front() == 'Y');
2076   MangledName.popFront();
2077 
2078   uint64_t Rank = 0;
2079   bool IsNegative = false;
2080   std::tie(Rank, IsNegative) = demangleNumber(MangledName);
2081   if (IsNegative || Rank == 0) {
2082     Error = true;
2083     return nullptr;
2084   }
2085 
2086   ArrayTypeNode *ATy = Arena.alloc<ArrayTypeNode>();
2087   NodeList *Head = Arena.alloc<NodeList>();
2088   NodeList *Tail = Head;
2089 
2090   for (uint64_t I = 0; I < Rank; ++I) {
2091     uint64_t D = 0;
2092     std::tie(D, IsNegative) = demangleNumber(MangledName);
2093     if (Error || IsNegative) {
2094       Error = true;
2095       return nullptr;
2096     }
2097     Tail->N = Arena.alloc<IntegerLiteralNode>(D, IsNegative);
2098     if (I + 1 < Rank) {
2099       Tail->Next = Arena.alloc<NodeList>();
2100       Tail = Tail->Next;
2101     }
2102   }
2103   ATy->Dimensions = nodeListToNodeArray(Arena, Head, Rank);
2104 
2105   if (MangledName.consumeFront("$$C")) {
2106     bool IsMember = false;
2107     std::tie(ATy->Quals, IsMember) = demangleQualifiers(MangledName);
2108     if (IsMember) {
2109       Error = true;
2110       return nullptr;
2111     }
2112   }
2113 
2114   ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop);
2115   return ATy;
2116 }
2117 
2118 // Reads a function's parameters.
2119 NodeArrayNode *Demangler::demangleFunctionParameterList(StringView &MangledName,
2120                                                         bool &IsVariadic) {
2121   // Empty parameter list.
2122   if (MangledName.consumeFront('X'))
2123     return nullptr;
2124 
2125   NodeList *Head = Arena.alloc<NodeList>();
2126   NodeList **Current = &Head;
2127   size_t Count = 0;
2128   while (!Error && !MangledName.startsWith('@') &&
2129          !MangledName.startsWith('Z')) {
2130     ++Count;
2131 
2132     if (startsWithDigit(MangledName)) {
2133       size_t N = MangledName[0] - '0';
2134       if (N >= Backrefs.FunctionParamCount) {
2135         Error = true;
2136         return nullptr;
2137       }
2138       MangledName = MangledName.dropFront();
2139 
2140       *Current = Arena.alloc<NodeList>();
2141       (*Current)->N = Backrefs.FunctionParams[N];
2142       Current = &(*Current)->Next;
2143       continue;
2144     }
2145 
2146     size_t OldSize = MangledName.size();
2147 
2148     *Current = Arena.alloc<NodeList>();
2149     TypeNode *TN = demangleType(MangledName, QualifierMangleMode::Drop);
2150     if (!TN || Error)
2151       return nullptr;
2152 
2153     (*Current)->N = TN;
2154 
2155     size_t CharsConsumed = OldSize - MangledName.size();
2156     assert(CharsConsumed != 0);
2157 
2158     // Single-letter types are ignored for backreferences because memorizing
2159     // them doesn't save anything.
2160     if (Backrefs.FunctionParamCount <= 9 && CharsConsumed > 1)
2161       Backrefs.FunctionParams[Backrefs.FunctionParamCount++] = TN;
2162 
2163     Current = &(*Current)->Next;
2164   }
2165 
2166   if (Error)
2167     return nullptr;
2168 
2169   NodeArrayNode *NA = nodeListToNodeArray(Arena, Head, Count);
2170   // A non-empty parameter list is terminated by either 'Z' (variadic) parameter
2171   // list or '@' (non variadic).  Careful not to consume "@Z", as in that case
2172   // the following Z could be a throw specifier.
2173   if (MangledName.consumeFront('@'))
2174     return NA;
2175 
2176   if (MangledName.consumeFront('Z')) {
2177     IsVariadic = true;
2178     return NA;
2179   }
2180 
2181   DEMANGLE_UNREACHABLE;
2182 }
2183 
2184 NodeArrayNode *
2185 Demangler::demangleTemplateParameterList(StringView &MangledName) {
2186   NodeList *Head = nullptr;
2187   NodeList **Current = &Head;
2188   size_t Count = 0;
2189 
2190   while (!MangledName.startsWith('@')) {
2191     if (MangledName.consumeFront("$S") || MangledName.consumeFront("$$V") ||
2192         MangledName.consumeFront("$$$V") || MangledName.consumeFront("$$Z")) {
2193       // parameter pack separator
2194       continue;
2195     }
2196 
2197     ++Count;
2198 
2199     // Template parameter lists don't participate in back-referencing.
2200     *Current = Arena.alloc<NodeList>();
2201 
2202     NodeList &TP = **Current;
2203 
2204     TemplateParameterReferenceNode *TPRN = nullptr;
2205     if (MangledName.consumeFront("$$Y")) {
2206       // Template alias
2207       TP.N = demangleFullyQualifiedTypeName(MangledName);
2208     } else if (MangledName.consumeFront("$$B")) {
2209       // Array
2210       TP.N = demangleType(MangledName, QualifierMangleMode::Drop);
2211     } else if (MangledName.consumeFront("$$C")) {
2212       // Type has qualifiers.
2213       TP.N = demangleType(MangledName, QualifierMangleMode::Mangle);
2214     } else if (MangledName.startsWith("$1") || MangledName.startsWith("$H") ||
2215                MangledName.startsWith("$I") || MangledName.startsWith("$J")) {
2216       // Pointer to member
2217       TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
2218       TPRN->IsMemberPointer = true;
2219 
2220       MangledName = MangledName.dropFront();
2221       // 1 - single inheritance       <name>
2222       // H - multiple inheritance     <name> <number>
2223       // I - virtual inheritance      <name> <number> <number>
2224       // J - unspecified inheritance  <name> <number> <number> <number>
2225       char InheritanceSpecifier = MangledName.popFront();
2226       SymbolNode *S = nullptr;
2227       if (MangledName.startsWith('?')) {
2228         S = parse(MangledName);
2229         if (Error || !S->Name) {
2230           Error = true;
2231           return nullptr;
2232         }
2233         memorizeIdentifier(S->Name->getUnqualifiedIdentifier());
2234       }
2235 
2236       switch (InheritanceSpecifier) {
2237       case 'J':
2238         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2239             demangleSigned(MangledName);
2240         DEMANGLE_FALLTHROUGH;
2241       case 'I':
2242         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2243             demangleSigned(MangledName);
2244         DEMANGLE_FALLTHROUGH;
2245       case 'H':
2246         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2247             demangleSigned(MangledName);
2248         DEMANGLE_FALLTHROUGH;
2249       case '1':
2250         break;
2251       default:
2252         DEMANGLE_UNREACHABLE;
2253       }
2254       TPRN->Affinity = PointerAffinity::Pointer;
2255       TPRN->Symbol = S;
2256     } else if (MangledName.startsWith("$E?")) {
2257       MangledName.consumeFront("$E");
2258       // Reference to symbol
2259       TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
2260       TPRN->Symbol = parse(MangledName);
2261       TPRN->Affinity = PointerAffinity::Reference;
2262     } else if (MangledName.startsWith("$F") || MangledName.startsWith("$G")) {
2263       TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
2264 
2265       // Data member pointer.
2266       MangledName = MangledName.dropFront();
2267       char InheritanceSpecifier = MangledName.popFront();
2268 
2269       switch (InheritanceSpecifier) {
2270       case 'G':
2271         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2272             demangleSigned(MangledName);
2273         DEMANGLE_FALLTHROUGH;
2274       case 'F':
2275         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2276             demangleSigned(MangledName);
2277         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2278             demangleSigned(MangledName);
2279         break;
2280       default:
2281         DEMANGLE_UNREACHABLE;
2282       }
2283       TPRN->IsMemberPointer = true;
2284 
2285     } else if (MangledName.consumeFront("$0")) {
2286       // Integral non-type template parameter
2287       bool IsNegative = false;
2288       uint64_t Value = 0;
2289       std::tie(Value, IsNegative) = demangleNumber(MangledName);
2290 
2291       TP.N = Arena.alloc<IntegerLiteralNode>(Value, IsNegative);
2292     } else {
2293       TP.N = demangleType(MangledName, QualifierMangleMode::Drop);
2294     }
2295     if (Error)
2296       return nullptr;
2297 
2298     Current = &TP.Next;
2299   }
2300 
2301   // The loop above returns nullptr on Error.
2302   assert(!Error);
2303 
2304   // Template parameter lists cannot be variadic, so it can only be terminated
2305   // by @ (as opposed to 'Z' in the function parameter case).
2306   assert(MangledName.startsWith('@')); // The above loop exits only on '@'.
2307   MangledName.consumeFront('@');
2308   return nodeListToNodeArray(Arena, Head, Count);
2309 }
2310 
2311 void Demangler::dumpBackReferences() {
2312   std::printf("%d function parameter backreferences\n",
2313               (int)Backrefs.FunctionParamCount);
2314 
2315   // Create an output stream so we can render each type.
2316   OutputBuffer OB;
2317   if (!initializeOutputBuffer(nullptr, nullptr, OB, 1024))
2318     std::terminate();
2319   for (size_t I = 0; I < Backrefs.FunctionParamCount; ++I) {
2320     OB.setCurrentPosition(0);
2321 
2322     TypeNode *T = Backrefs.FunctionParams[I];
2323     T->output(OB, OF_Default);
2324 
2325     std::printf("  [%d] - %.*s\n", (int)I, (int)OB.getCurrentPosition(),
2326                 OB.getBuffer());
2327   }
2328   std::free(OB.getBuffer());
2329 
2330   if (Backrefs.FunctionParamCount > 0)
2331     std::printf("\n");
2332   std::printf("%d name backreferences\n", (int)Backrefs.NamesCount);
2333   for (size_t I = 0; I < Backrefs.NamesCount; ++I) {
2334     std::printf("  [%d] - %.*s\n", (int)I, (int)Backrefs.Names[I]->Name.size(),
2335                 Backrefs.Names[I]->Name.begin());
2336   }
2337   if (Backrefs.NamesCount > 0)
2338     std::printf("\n");
2339 }
2340 
2341 char *llvm::microsoftDemangle(const char *MangledName, size_t *NMangled,
2342                               char *Buf, size_t *N,
2343                               int *Status, MSDemangleFlags Flags) {
2344   Demangler D;
2345   OutputBuffer OB;
2346 
2347   StringView Name{MangledName};
2348   SymbolNode *AST = D.parse(Name);
2349   if (!D.Error && NMangled)
2350     *NMangled = Name.begin() - MangledName;
2351 
2352   if (Flags & MSDF_DumpBackrefs)
2353     D.dumpBackReferences();
2354 
2355   OutputFlags OF = OF_Default;
2356   if (Flags & MSDF_NoCallingConvention)
2357     OF = OutputFlags(OF | OF_NoCallingConvention);
2358   if (Flags & MSDF_NoAccessSpecifier)
2359     OF = OutputFlags(OF | OF_NoAccessSpecifier);
2360   if (Flags & MSDF_NoReturnType)
2361     OF = OutputFlags(OF | OF_NoReturnType);
2362   if (Flags & MSDF_NoMemberType)
2363     OF = OutputFlags(OF | OF_NoMemberType);
2364   if (Flags & MSDF_NoVariableType)
2365     OF = OutputFlags(OF | OF_NoVariableType);
2366 
2367   int InternalStatus = demangle_success;
2368   if (D.Error)
2369     InternalStatus = demangle_invalid_mangled_name;
2370   else if (!initializeOutputBuffer(Buf, N, OB, 1024))
2371     InternalStatus = demangle_memory_alloc_failure;
2372   else {
2373     AST->output(OB, OF);
2374     OB += '\0';
2375     if (N != nullptr)
2376       *N = OB.getCurrentPosition();
2377     Buf = OB.getBuffer();
2378   }
2379 
2380   if (Status)
2381     *Status = InternalStatus;
2382   return InternalStatus == demangle_success ? Buf : nullptr;
2383 }
2384