1 //===- MicrosoftDemangle.cpp ----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a demangler for MSVC-style mangled symbols. 10 // 11 // This file has no dependencies on the rest of LLVM so that it can be 12 // easily reused in other programs such as libcxxabi. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/Demangle/MicrosoftDemangle.h" 17 #include "llvm/Demangle/Demangle.h" 18 #include "llvm/Demangle/MicrosoftDemangleNodes.h" 19 20 #include "llvm/Demangle/DemangleConfig.h" 21 #include "llvm/Demangle/StringView.h" 22 #include "llvm/Demangle/Utility.h" 23 24 #include <array> 25 #include <cctype> 26 #include <cstdio> 27 #include <tuple> 28 29 using namespace llvm; 30 using namespace ms_demangle; 31 32 static bool startsWithDigit(StringView S) { 33 return !S.empty() && std::isdigit(S.front()); 34 } 35 36 37 struct NodeList { 38 Node *N = nullptr; 39 NodeList *Next = nullptr; 40 }; 41 42 static bool isMemberPointer(StringView MangledName, bool &Error) { 43 Error = false; 44 switch (MangledName.popFront()) { 45 case '$': 46 // This is probably an rvalue reference (e.g. $$Q), and you cannot have an 47 // rvalue reference to a member. 48 return false; 49 case 'A': 50 // 'A' indicates a reference, and you cannot have a reference to a member 51 // function or member. 52 return false; 53 case 'P': 54 case 'Q': 55 case 'R': 56 case 'S': 57 // These 4 values indicate some kind of pointer, but we still don't know 58 // what. 59 break; 60 default: 61 // isMemberPointer() is called only if isPointerType() returns true, 62 // and it rejects other prefixes. 63 DEMANGLE_UNREACHABLE; 64 } 65 66 // If it starts with a number, then 6 indicates a non-member function 67 // pointer, and 8 indicates a member function pointer. 68 if (startsWithDigit(MangledName)) { 69 if (MangledName[0] != '6' && MangledName[0] != '8') { 70 Error = true; 71 return false; 72 } 73 return (MangledName[0] == '8'); 74 } 75 76 // Remove ext qualifiers since those can appear on either type and are 77 // therefore not indicative. 78 MangledName.consumeFront('E'); // 64-bit 79 MangledName.consumeFront('I'); // restrict 80 MangledName.consumeFront('F'); // unaligned 81 82 if (MangledName.empty()) { 83 Error = true; 84 return false; 85 } 86 87 // The next value should be either ABCD (non-member) or QRST (member). 88 switch (MangledName.front()) { 89 case 'A': 90 case 'B': 91 case 'C': 92 case 'D': 93 return false; 94 case 'Q': 95 case 'R': 96 case 'S': 97 case 'T': 98 return true; 99 default: 100 Error = true; 101 return false; 102 } 103 } 104 105 static SpecialIntrinsicKind 106 consumeSpecialIntrinsicKind(StringView &MangledName) { 107 if (MangledName.consumeFront("?_7")) 108 return SpecialIntrinsicKind::Vftable; 109 if (MangledName.consumeFront("?_8")) 110 return SpecialIntrinsicKind::Vbtable; 111 if (MangledName.consumeFront("?_9")) 112 return SpecialIntrinsicKind::VcallThunk; 113 if (MangledName.consumeFront("?_A")) 114 return SpecialIntrinsicKind::Typeof; 115 if (MangledName.consumeFront("?_B")) 116 return SpecialIntrinsicKind::LocalStaticGuard; 117 if (MangledName.consumeFront("?_C")) 118 return SpecialIntrinsicKind::StringLiteralSymbol; 119 if (MangledName.consumeFront("?_P")) 120 return SpecialIntrinsicKind::UdtReturning; 121 if (MangledName.consumeFront("?_R0")) 122 return SpecialIntrinsicKind::RttiTypeDescriptor; 123 if (MangledName.consumeFront("?_R1")) 124 return SpecialIntrinsicKind::RttiBaseClassDescriptor; 125 if (MangledName.consumeFront("?_R2")) 126 return SpecialIntrinsicKind::RttiBaseClassArray; 127 if (MangledName.consumeFront("?_R3")) 128 return SpecialIntrinsicKind::RttiClassHierarchyDescriptor; 129 if (MangledName.consumeFront("?_R4")) 130 return SpecialIntrinsicKind::RttiCompleteObjLocator; 131 if (MangledName.consumeFront("?_S")) 132 return SpecialIntrinsicKind::LocalVftable; 133 if (MangledName.consumeFront("?__E")) 134 return SpecialIntrinsicKind::DynamicInitializer; 135 if (MangledName.consumeFront("?__F")) 136 return SpecialIntrinsicKind::DynamicAtexitDestructor; 137 if (MangledName.consumeFront("?__J")) 138 return SpecialIntrinsicKind::LocalStaticThreadGuard; 139 return SpecialIntrinsicKind::None; 140 } 141 142 static bool startsWithLocalScopePattern(StringView S) { 143 if (!S.consumeFront('?')) 144 return false; 145 146 size_t End = S.find('?'); 147 if (End == StringView::npos) 148 return false; 149 StringView Candidate = S.substr(0, End); 150 if (Candidate.empty()) 151 return false; 152 153 // \?[0-9]\? 154 // ?@? is the discriminator 0. 155 if (Candidate.size() == 1) 156 return Candidate[0] == '@' || (Candidate[0] >= '0' && Candidate[0] <= '9'); 157 158 // If it's not 0-9, then it's an encoded number terminated with an @ 159 if (Candidate.back() != '@') 160 return false; 161 Candidate = Candidate.dropBack(); 162 163 // An encoded number starts with B-P and all subsequent digits are in A-P. 164 // Note that the reason the first digit cannot be A is two fold. First, it 165 // would create an ambiguity with ?A which delimits the beginning of an 166 // anonymous namespace. Second, A represents 0, and you don't start a multi 167 // digit number with a leading 0. Presumably the anonymous namespace 168 // ambiguity is also why single digit encoded numbers use 0-9 rather than A-J. 169 if (Candidate[0] < 'B' || Candidate[0] > 'P') 170 return false; 171 Candidate = Candidate.dropFront(); 172 while (!Candidate.empty()) { 173 if (Candidate[0] < 'A' || Candidate[0] > 'P') 174 return false; 175 Candidate = Candidate.dropFront(); 176 } 177 178 return true; 179 } 180 181 static bool isTagType(StringView S) { 182 switch (S.front()) { 183 case 'T': // union 184 case 'U': // struct 185 case 'V': // class 186 case 'W': // enum 187 return true; 188 } 189 return false; 190 } 191 192 static bool isCustomType(StringView S) { return S[0] == '?'; } 193 194 static bool isPointerType(StringView S) { 195 if (S.startsWith("$$Q")) // foo && 196 return true; 197 198 switch (S.front()) { 199 case 'A': // foo & 200 case 'P': // foo * 201 case 'Q': // foo *const 202 case 'R': // foo *volatile 203 case 'S': // foo *const volatile 204 return true; 205 } 206 return false; 207 } 208 209 static bool isArrayType(StringView S) { return S[0] == 'Y'; } 210 211 static bool isFunctionType(StringView S) { 212 return S.startsWith("$$A8@@") || S.startsWith("$$A6"); 213 } 214 215 static FunctionRefQualifier 216 demangleFunctionRefQualifier(StringView &MangledName) { 217 if (MangledName.consumeFront('G')) 218 return FunctionRefQualifier::Reference; 219 else if (MangledName.consumeFront('H')) 220 return FunctionRefQualifier::RValueReference; 221 return FunctionRefQualifier::None; 222 } 223 224 static std::pair<Qualifiers, PointerAffinity> 225 demanglePointerCVQualifiers(StringView &MangledName) { 226 if (MangledName.consumeFront("$$Q")) 227 return std::make_pair(Q_None, PointerAffinity::RValueReference); 228 229 switch (MangledName.popFront()) { 230 case 'A': 231 return std::make_pair(Q_None, PointerAffinity::Reference); 232 case 'P': 233 return std::make_pair(Q_None, PointerAffinity::Pointer); 234 case 'Q': 235 return std::make_pair(Q_Const, PointerAffinity::Pointer); 236 case 'R': 237 return std::make_pair(Q_Volatile, PointerAffinity::Pointer); 238 case 'S': 239 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), 240 PointerAffinity::Pointer); 241 } 242 // This function is only called if isPointerType() returns true, 243 // and it only returns true for the six cases listed above. 244 DEMANGLE_UNREACHABLE; 245 } 246 247 StringView Demangler::copyString(StringView Borrowed) { 248 char *Stable = Arena.allocUnalignedBuffer(Borrowed.size() + 1); 249 std::strcpy(Stable, Borrowed.begin()); 250 251 return {Stable, Borrowed.size()}; 252 } 253 254 SpecialTableSymbolNode * 255 Demangler::demangleSpecialTableSymbolNode(StringView &MangledName, 256 SpecialIntrinsicKind K) { 257 NamedIdentifierNode *NI = Arena.alloc<NamedIdentifierNode>(); 258 switch (K) { 259 case SpecialIntrinsicKind::Vftable: 260 NI->Name = "`vftable'"; 261 break; 262 case SpecialIntrinsicKind::Vbtable: 263 NI->Name = "`vbtable'"; 264 break; 265 case SpecialIntrinsicKind::LocalVftable: 266 NI->Name = "`local vftable'"; 267 break; 268 case SpecialIntrinsicKind::RttiCompleteObjLocator: 269 NI->Name = "`RTTI Complete Object Locator'"; 270 break; 271 default: 272 DEMANGLE_UNREACHABLE; 273 } 274 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI); 275 SpecialTableSymbolNode *STSN = Arena.alloc<SpecialTableSymbolNode>(); 276 STSN->Name = QN; 277 bool IsMember = false; 278 if (MangledName.empty()) { 279 Error = true; 280 return nullptr; 281 } 282 char Front = MangledName.popFront(); 283 if (Front != '6' && Front != '7') { 284 Error = true; 285 return nullptr; 286 } 287 288 std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName); 289 if (!MangledName.consumeFront('@')) 290 STSN->TargetName = demangleFullyQualifiedTypeName(MangledName); 291 return STSN; 292 } 293 294 LocalStaticGuardVariableNode * 295 Demangler::demangleLocalStaticGuard(StringView &MangledName, bool IsThread) { 296 LocalStaticGuardIdentifierNode *LSGI = 297 Arena.alloc<LocalStaticGuardIdentifierNode>(); 298 LSGI->IsThread = IsThread; 299 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, LSGI); 300 LocalStaticGuardVariableNode *LSGVN = 301 Arena.alloc<LocalStaticGuardVariableNode>(); 302 LSGVN->Name = QN; 303 304 if (MangledName.consumeFront("4IA")) 305 LSGVN->IsVisible = false; 306 else if (MangledName.consumeFront("5")) 307 LSGVN->IsVisible = true; 308 else { 309 Error = true; 310 return nullptr; 311 } 312 313 if (!MangledName.empty()) 314 LSGI->ScopeIndex = demangleUnsigned(MangledName); 315 return LSGVN; 316 } 317 318 static NamedIdentifierNode *synthesizeNamedIdentifier(ArenaAllocator &Arena, 319 StringView Name) { 320 NamedIdentifierNode *Id = Arena.alloc<NamedIdentifierNode>(); 321 Id->Name = Name; 322 return Id; 323 } 324 325 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena, 326 IdentifierNode *Identifier) { 327 QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>(); 328 QN->Components = Arena.alloc<NodeArrayNode>(); 329 QN->Components->Count = 1; 330 QN->Components->Nodes = Arena.allocArray<Node *>(1); 331 QN->Components->Nodes[0] = Identifier; 332 return QN; 333 } 334 335 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena, 336 StringView Name) { 337 NamedIdentifierNode *Id = synthesizeNamedIdentifier(Arena, Name); 338 return synthesizeQualifiedName(Arena, Id); 339 } 340 341 static VariableSymbolNode *synthesizeVariable(ArenaAllocator &Arena, 342 TypeNode *Type, 343 StringView VariableName) { 344 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 345 VSN->Type = Type; 346 VSN->Name = synthesizeQualifiedName(Arena, VariableName); 347 return VSN; 348 } 349 350 VariableSymbolNode *Demangler::demangleUntypedVariable( 351 ArenaAllocator &Arena, StringView &MangledName, StringView VariableName) { 352 NamedIdentifierNode *NI = synthesizeNamedIdentifier(Arena, VariableName); 353 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI); 354 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 355 VSN->Name = QN; 356 if (MangledName.consumeFront("8")) 357 return VSN; 358 359 Error = true; 360 return nullptr; 361 } 362 363 VariableSymbolNode * 364 Demangler::demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena, 365 StringView &MangledName) { 366 RttiBaseClassDescriptorNode *RBCDN = 367 Arena.alloc<RttiBaseClassDescriptorNode>(); 368 RBCDN->NVOffset = demangleUnsigned(MangledName); 369 RBCDN->VBPtrOffset = demangleSigned(MangledName); 370 RBCDN->VBTableOffset = demangleUnsigned(MangledName); 371 RBCDN->Flags = demangleUnsigned(MangledName); 372 if (Error) 373 return nullptr; 374 375 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 376 VSN->Name = demangleNameScopeChain(MangledName, RBCDN); 377 MangledName.consumeFront('8'); 378 return VSN; 379 } 380 381 FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName, 382 bool IsDestructor) { 383 DynamicStructorIdentifierNode *DSIN = 384 Arena.alloc<DynamicStructorIdentifierNode>(); 385 DSIN->IsDestructor = IsDestructor; 386 387 bool IsKnownStaticDataMember = false; 388 if (MangledName.consumeFront('?')) 389 IsKnownStaticDataMember = true; 390 391 SymbolNode *Symbol = demangleDeclarator(MangledName); 392 if (Error) 393 return nullptr; 394 395 FunctionSymbolNode *FSN = nullptr; 396 397 if (Symbol->kind() == NodeKind::VariableSymbol) { 398 DSIN->Variable = static_cast<VariableSymbolNode *>(Symbol); 399 400 // Older versions of clang mangled this type of symbol incorrectly. They 401 // would omit the leading ? and they would only emit a single @ at the end. 402 // The correct mangling is a leading ? and 2 trailing @ signs. Handle 403 // both cases. 404 int AtCount = IsKnownStaticDataMember ? 2 : 1; 405 for (int I = 0; I < AtCount; ++I) { 406 if (MangledName.consumeFront('@')) 407 continue; 408 Error = true; 409 return nullptr; 410 } 411 412 FSN = demangleFunctionEncoding(MangledName); 413 if (FSN) 414 FSN->Name = synthesizeQualifiedName(Arena, DSIN); 415 } else { 416 if (IsKnownStaticDataMember) { 417 // This was supposed to be a static data member, but we got a function. 418 Error = true; 419 return nullptr; 420 } 421 422 FSN = static_cast<FunctionSymbolNode *>(Symbol); 423 DSIN->Name = Symbol->Name; 424 FSN->Name = synthesizeQualifiedName(Arena, DSIN); 425 } 426 427 return FSN; 428 } 429 430 SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) { 431 SpecialIntrinsicKind SIK = consumeSpecialIntrinsicKind(MangledName); 432 433 switch (SIK) { 434 case SpecialIntrinsicKind::None: 435 return nullptr; 436 case SpecialIntrinsicKind::StringLiteralSymbol: 437 return demangleStringLiteral(MangledName); 438 case SpecialIntrinsicKind::Vftable: 439 case SpecialIntrinsicKind::Vbtable: 440 case SpecialIntrinsicKind::LocalVftable: 441 case SpecialIntrinsicKind::RttiCompleteObjLocator: 442 return demangleSpecialTableSymbolNode(MangledName, SIK); 443 case SpecialIntrinsicKind::VcallThunk: 444 return demangleVcallThunkNode(MangledName); 445 case SpecialIntrinsicKind::LocalStaticGuard: 446 return demangleLocalStaticGuard(MangledName, /*IsThread=*/false); 447 case SpecialIntrinsicKind::LocalStaticThreadGuard: 448 return demangleLocalStaticGuard(MangledName, /*IsThread=*/true); 449 case SpecialIntrinsicKind::RttiTypeDescriptor: { 450 TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result); 451 if (Error) 452 break; 453 if (!MangledName.consumeFront("@8")) 454 break; 455 if (!MangledName.empty()) 456 break; 457 return synthesizeVariable(Arena, T, "`RTTI Type Descriptor'"); 458 } 459 case SpecialIntrinsicKind::RttiBaseClassArray: 460 return demangleUntypedVariable(Arena, MangledName, 461 "`RTTI Base Class Array'"); 462 case SpecialIntrinsicKind::RttiClassHierarchyDescriptor: 463 return demangleUntypedVariable(Arena, MangledName, 464 "`RTTI Class Hierarchy Descriptor'"); 465 case SpecialIntrinsicKind::RttiBaseClassDescriptor: 466 return demangleRttiBaseClassDescriptorNode(Arena, MangledName); 467 case SpecialIntrinsicKind::DynamicInitializer: 468 return demangleInitFiniStub(MangledName, /*IsDestructor=*/false); 469 case SpecialIntrinsicKind::DynamicAtexitDestructor: 470 return demangleInitFiniStub(MangledName, /*IsDestructor=*/true); 471 case SpecialIntrinsicKind::Typeof: 472 case SpecialIntrinsicKind::UdtReturning: 473 // It's unclear which tools produces these manglings, so demangling 474 // support is not (yet?) implemented. 475 break; 476 case SpecialIntrinsicKind::Unknown: 477 DEMANGLE_UNREACHABLE; // Never returned by consumeSpecialIntrinsicKind. 478 } 479 Error = true; 480 return nullptr; 481 } 482 483 IdentifierNode * 484 Demangler::demangleFunctionIdentifierCode(StringView &MangledName) { 485 assert(MangledName.startsWith('?')); 486 MangledName = MangledName.dropFront(); 487 if (MangledName.empty()) { 488 Error = true; 489 return nullptr; 490 } 491 492 if (MangledName.consumeFront("__")) 493 return demangleFunctionIdentifierCode( 494 MangledName, FunctionIdentifierCodeGroup::DoubleUnder); 495 if (MangledName.consumeFront("_")) 496 return demangleFunctionIdentifierCode(MangledName, 497 FunctionIdentifierCodeGroup::Under); 498 return demangleFunctionIdentifierCode(MangledName, 499 FunctionIdentifierCodeGroup::Basic); 500 } 501 502 StructorIdentifierNode * 503 Demangler::demangleStructorIdentifier(StringView &MangledName, 504 bool IsDestructor) { 505 StructorIdentifierNode *N = Arena.alloc<StructorIdentifierNode>(); 506 N->IsDestructor = IsDestructor; 507 return N; 508 } 509 510 ConversionOperatorIdentifierNode * 511 Demangler::demangleConversionOperatorIdentifier(StringView &MangledName) { 512 ConversionOperatorIdentifierNode *N = 513 Arena.alloc<ConversionOperatorIdentifierNode>(); 514 return N; 515 } 516 517 LiteralOperatorIdentifierNode * 518 Demangler::demangleLiteralOperatorIdentifier(StringView &MangledName) { 519 LiteralOperatorIdentifierNode *N = 520 Arena.alloc<LiteralOperatorIdentifierNode>(); 521 N->Name = demangleSimpleString(MangledName, /*Memorize=*/false); 522 return N; 523 } 524 525 IntrinsicFunctionKind 526 Demangler::translateIntrinsicFunctionCode(char CH, 527 FunctionIdentifierCodeGroup Group) { 528 using IFK = IntrinsicFunctionKind; 529 if (!(CH >= '0' && CH <= '9') && !(CH >= 'A' && CH <= 'Z')) { 530 Error = true; 531 return IFK::None; 532 } 533 534 // Not all ? identifiers are intrinsics *functions*. This function only maps 535 // operator codes for the special functions, all others are handled elsewhere, 536 // hence the IFK::None entries in the table. 537 static IFK Basic[36] = { 538 IFK::None, // ?0 # Foo::Foo() 539 IFK::None, // ?1 # Foo::~Foo() 540 IFK::New, // ?2 # operator new 541 IFK::Delete, // ?3 # operator delete 542 IFK::Assign, // ?4 # operator= 543 IFK::RightShift, // ?5 # operator>> 544 IFK::LeftShift, // ?6 # operator<< 545 IFK::LogicalNot, // ?7 # operator! 546 IFK::Equals, // ?8 # operator== 547 IFK::NotEquals, // ?9 # operator!= 548 IFK::ArraySubscript, // ?A # operator[] 549 IFK::None, // ?B # Foo::operator <type>() 550 IFK::Pointer, // ?C # operator-> 551 IFK::Dereference, // ?D # operator* 552 IFK::Increment, // ?E # operator++ 553 IFK::Decrement, // ?F # operator-- 554 IFK::Minus, // ?G # operator- 555 IFK::Plus, // ?H # operator+ 556 IFK::BitwiseAnd, // ?I # operator& 557 IFK::MemberPointer, // ?J # operator->* 558 IFK::Divide, // ?K # operator/ 559 IFK::Modulus, // ?L # operator% 560 IFK::LessThan, // ?M operator< 561 IFK::LessThanEqual, // ?N operator<= 562 IFK::GreaterThan, // ?O operator> 563 IFK::GreaterThanEqual, // ?P operator>= 564 IFK::Comma, // ?Q operator, 565 IFK::Parens, // ?R operator() 566 IFK::BitwiseNot, // ?S operator~ 567 IFK::BitwiseXor, // ?T operator^ 568 IFK::BitwiseOr, // ?U operator| 569 IFK::LogicalAnd, // ?V operator&& 570 IFK::LogicalOr, // ?W operator|| 571 IFK::TimesEqual, // ?X operator*= 572 IFK::PlusEqual, // ?Y operator+= 573 IFK::MinusEqual, // ?Z operator-= 574 }; 575 static IFK Under[36] = { 576 IFK::DivEqual, // ?_0 operator/= 577 IFK::ModEqual, // ?_1 operator%= 578 IFK::RshEqual, // ?_2 operator>>= 579 IFK::LshEqual, // ?_3 operator<<= 580 IFK::BitwiseAndEqual, // ?_4 operator&= 581 IFK::BitwiseOrEqual, // ?_5 operator|= 582 IFK::BitwiseXorEqual, // ?_6 operator^= 583 IFK::None, // ?_7 # vftable 584 IFK::None, // ?_8 # vbtable 585 IFK::None, // ?_9 # vcall 586 IFK::None, // ?_A # typeof 587 IFK::None, // ?_B # local static guard 588 IFK::None, // ?_C # string literal 589 IFK::VbaseDtor, // ?_D # vbase destructor 590 IFK::VecDelDtor, // ?_E # vector deleting destructor 591 IFK::DefaultCtorClosure, // ?_F # default constructor closure 592 IFK::ScalarDelDtor, // ?_G # scalar deleting destructor 593 IFK::VecCtorIter, // ?_H # vector constructor iterator 594 IFK::VecDtorIter, // ?_I # vector destructor iterator 595 IFK::VecVbaseCtorIter, // ?_J # vector vbase constructor iterator 596 IFK::VdispMap, // ?_K # virtual displacement map 597 IFK::EHVecCtorIter, // ?_L # eh vector constructor iterator 598 IFK::EHVecDtorIter, // ?_M # eh vector destructor iterator 599 IFK::EHVecVbaseCtorIter, // ?_N # eh vector vbase constructor iterator 600 IFK::CopyCtorClosure, // ?_O # copy constructor closure 601 IFK::None, // ?_P<name> # udt returning <name> 602 IFK::None, // ?_Q # <unknown> 603 IFK::None, // ?_R0 - ?_R4 # RTTI Codes 604 IFK::None, // ?_S # local vftable 605 IFK::LocalVftableCtorClosure, // ?_T # local vftable constructor closure 606 IFK::ArrayNew, // ?_U operator new[] 607 IFK::ArrayDelete, // ?_V operator delete[] 608 IFK::None, // ?_W <unused> 609 IFK::None, // ?_X <unused> 610 IFK::None, // ?_Y <unused> 611 IFK::None, // ?_Z <unused> 612 }; 613 static IFK DoubleUnder[36] = { 614 IFK::None, // ?__0 <unused> 615 IFK::None, // ?__1 <unused> 616 IFK::None, // ?__2 <unused> 617 IFK::None, // ?__3 <unused> 618 IFK::None, // ?__4 <unused> 619 IFK::None, // ?__5 <unused> 620 IFK::None, // ?__6 <unused> 621 IFK::None, // ?__7 <unused> 622 IFK::None, // ?__8 <unused> 623 IFK::None, // ?__9 <unused> 624 IFK::ManVectorCtorIter, // ?__A managed vector ctor iterator 625 IFK::ManVectorDtorIter, // ?__B managed vector dtor iterator 626 IFK::EHVectorCopyCtorIter, // ?__C EH vector copy ctor iterator 627 IFK::EHVectorVbaseCopyCtorIter, // ?__D EH vector vbase copy ctor iter 628 IFK::None, // ?__E dynamic initializer for `T' 629 IFK::None, // ?__F dynamic atexit destructor for `T' 630 IFK::VectorCopyCtorIter, // ?__G vector copy constructor iter 631 IFK::VectorVbaseCopyCtorIter, // ?__H vector vbase copy ctor iter 632 IFK::ManVectorVbaseCopyCtorIter, // ?__I managed vector vbase copy ctor 633 // iter 634 IFK::None, // ?__J local static thread guard 635 IFK::None, // ?__K operator ""_name 636 IFK::CoAwait, // ?__L operator co_await 637 IFK::Spaceship, // ?__M operator<=> 638 IFK::None, // ?__N <unused> 639 IFK::None, // ?__O <unused> 640 IFK::None, // ?__P <unused> 641 IFK::None, // ?__Q <unused> 642 IFK::None, // ?__R <unused> 643 IFK::None, // ?__S <unused> 644 IFK::None, // ?__T <unused> 645 IFK::None, // ?__U <unused> 646 IFK::None, // ?__V <unused> 647 IFK::None, // ?__W <unused> 648 IFK::None, // ?__X <unused> 649 IFK::None, // ?__Y <unused> 650 IFK::None, // ?__Z <unused> 651 }; 652 653 int Index = (CH >= '0' && CH <= '9') ? (CH - '0') : (CH - 'A' + 10); 654 switch (Group) { 655 case FunctionIdentifierCodeGroup::Basic: 656 return Basic[Index]; 657 case FunctionIdentifierCodeGroup::Under: 658 return Under[Index]; 659 case FunctionIdentifierCodeGroup::DoubleUnder: 660 return DoubleUnder[Index]; 661 } 662 DEMANGLE_UNREACHABLE; 663 } 664 665 IdentifierNode * 666 Demangler::demangleFunctionIdentifierCode(StringView &MangledName, 667 FunctionIdentifierCodeGroup Group) { 668 if (MangledName.empty()) { 669 Error = true; 670 return nullptr; 671 } 672 switch (Group) { 673 case FunctionIdentifierCodeGroup::Basic: 674 switch (char CH = MangledName.popFront()) { 675 case '0': 676 case '1': 677 return demangleStructorIdentifier(MangledName, CH == '1'); 678 case 'B': 679 return demangleConversionOperatorIdentifier(MangledName); 680 default: 681 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 682 translateIntrinsicFunctionCode(CH, Group)); 683 } 684 case FunctionIdentifierCodeGroup::Under: 685 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 686 translateIntrinsicFunctionCode(MangledName.popFront(), Group)); 687 case FunctionIdentifierCodeGroup::DoubleUnder: 688 switch (char CH = MangledName.popFront()) { 689 case 'K': 690 return demangleLiteralOperatorIdentifier(MangledName); 691 default: 692 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 693 translateIntrinsicFunctionCode(CH, Group)); 694 } 695 } 696 697 DEMANGLE_UNREACHABLE; 698 } 699 700 SymbolNode *Demangler::demangleEncodedSymbol(StringView &MangledName, 701 QualifiedNameNode *Name) { 702 if (MangledName.empty()) { 703 Error = true; 704 return nullptr; 705 } 706 707 // Read a variable. 708 switch (MangledName.front()) { 709 case '0': 710 case '1': 711 case '2': 712 case '3': 713 case '4': { 714 StorageClass SC = demangleVariableStorageClass(MangledName); 715 return demangleVariableEncoding(MangledName, SC); 716 } 717 } 718 FunctionSymbolNode *FSN = demangleFunctionEncoding(MangledName); 719 720 IdentifierNode *UQN = Name->getUnqualifiedIdentifier(); 721 if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) { 722 ConversionOperatorIdentifierNode *COIN = 723 static_cast<ConversionOperatorIdentifierNode *>(UQN); 724 if (FSN) 725 COIN->TargetType = FSN->Signature->ReturnType; 726 } 727 return FSN; 728 } 729 730 SymbolNode *Demangler::demangleDeclarator(StringView &MangledName) { 731 // What follows is a main symbol name. This may include namespaces or class 732 // back references. 733 QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName); 734 if (Error) 735 return nullptr; 736 737 SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN); 738 if (Error) 739 return nullptr; 740 Symbol->Name = QN; 741 742 IdentifierNode *UQN = QN->getUnqualifiedIdentifier(); 743 if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) { 744 ConversionOperatorIdentifierNode *COIN = 745 static_cast<ConversionOperatorIdentifierNode *>(UQN); 746 if (!COIN->TargetType) { 747 Error = true; 748 return nullptr; 749 } 750 } 751 return Symbol; 752 } 753 754 SymbolNode *Demangler::demangleMD5Name(StringView &MangledName) { 755 assert(MangledName.startsWith("??@")); 756 // This is an MD5 mangled name. We can't demangle it, just return the 757 // mangled name. 758 // An MD5 mangled name is ??@ followed by 32 characters and a terminating @. 759 size_t MD5Last = MangledName.find('@', strlen("??@")); 760 if (MD5Last == StringView::npos) { 761 Error = true; 762 return nullptr; 763 } 764 const char *Start = MangledName.begin(); 765 MangledName = MangledName.dropFront(MD5Last + 1); 766 767 // There are two additional special cases for MD5 names: 768 // 1. For complete object locators where the object name is long enough 769 // for the object to have an MD5 name, the complete object locator is 770 // called ??@...@??_R4@ (with a trailing "??_R4@" instead of the usual 771 // leading "??_R4". This is handled here. 772 // 2. For catchable types, in versions of MSVC before 2015 (<1900) or after 773 // 2017.2 (>= 1914), the catchable type mangling is _CT??@...@??@...@8 774 // instead of_CT??@...@8 with just one MD5 name. Since we don't yet 775 // demangle catchable types anywhere, this isn't handled for MD5 names 776 // either. 777 MangledName.consumeFront("??_R4@"); 778 779 StringView MD5(Start, MangledName.begin()); 780 SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol); 781 S->Name = synthesizeQualifiedName(Arena, MD5); 782 783 return S; 784 } 785 786 SymbolNode *Demangler::demangleTypeinfoName(StringView &MangledName) { 787 assert(MangledName.startsWith('.')); 788 MangledName.consumeFront('.'); 789 790 TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result); 791 if (Error || !MangledName.empty()) { 792 Error = true; 793 return nullptr; 794 } 795 return synthesizeVariable(Arena, T, "`RTTI Type Descriptor Name'"); 796 } 797 798 // Parser entry point. 799 SymbolNode *Demangler::parse(StringView &MangledName) { 800 // Typeinfo names are strings stored in RTTI data. They're not symbol names. 801 // It's still useful to demangle them. They're the only demangled entity 802 // that doesn't start with a "?" but a ".". 803 if (MangledName.startsWith('.')) 804 return demangleTypeinfoName(MangledName); 805 806 if (MangledName.startsWith("??@")) 807 return demangleMD5Name(MangledName); 808 809 // MSVC-style mangled symbols must start with '?'. 810 if (!MangledName.startsWith('?')) { 811 Error = true; 812 return nullptr; 813 } 814 815 MangledName.consumeFront('?'); 816 817 // ?$ is a template instantiation, but all other names that start with ? are 818 // operators / special names. 819 if (SymbolNode *SI = demangleSpecialIntrinsic(MangledName)) 820 return SI; 821 822 return demangleDeclarator(MangledName); 823 } 824 825 TagTypeNode *Demangler::parseTagUniqueName(StringView &MangledName) { 826 if (!MangledName.consumeFront(".?A")) 827 return nullptr; 828 MangledName.consumeFront(".?A"); 829 if (MangledName.empty()) 830 return nullptr; 831 832 return demangleClassType(MangledName); 833 } 834 835 // <type-encoding> ::= <storage-class> <variable-type> 836 // <storage-class> ::= 0 # private static member 837 // ::= 1 # protected static member 838 // ::= 2 # public static member 839 // ::= 3 # global 840 // ::= 4 # static local 841 842 VariableSymbolNode *Demangler::demangleVariableEncoding(StringView &MangledName, 843 StorageClass SC) { 844 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 845 846 VSN->Type = demangleType(MangledName, QualifierMangleMode::Drop); 847 VSN->SC = SC; 848 849 if (Error) 850 return nullptr; 851 852 // <variable-type> ::= <type> <cvr-qualifiers> 853 // ::= <type> <pointee-cvr-qualifiers> # pointers, references 854 switch (VSN->Type->kind()) { 855 case NodeKind::PointerType: { 856 PointerTypeNode *PTN = static_cast<PointerTypeNode *>(VSN->Type); 857 858 Qualifiers ExtraChildQuals = Q_None; 859 PTN->Quals = Qualifiers(VSN->Type->Quals | 860 demanglePointerExtQualifiers(MangledName)); 861 862 bool IsMember = false; 863 std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName); 864 865 if (PTN->ClassParent) { 866 QualifiedNameNode *BackRefName = 867 demangleFullyQualifiedTypeName(MangledName); 868 (void)BackRefName; 869 } 870 PTN->Pointee->Quals = Qualifiers(PTN->Pointee->Quals | ExtraChildQuals); 871 872 break; 873 } 874 default: 875 VSN->Type->Quals = demangleQualifiers(MangledName).first; 876 break; 877 } 878 879 return VSN; 880 } 881 882 // Sometimes numbers are encoded in mangled symbols. For example, 883 // "int (*x)[20]" is a valid C type (x is a pointer to an array of 884 // length 20), so we need some way to embed numbers as part of symbols. 885 // This function parses it. 886 // 887 // <number> ::= [?] <non-negative integer> 888 // 889 // <non-negative integer> ::= <decimal digit> # when 1 <= Number <= 10 890 // ::= <hex digit>+ @ # when Number == 0 or >= 10 891 // 892 // <hex-digit> ::= [A-P] # A = 0, B = 1, ... 893 std::pair<uint64_t, bool> Demangler::demangleNumber(StringView &MangledName) { 894 bool IsNegative = MangledName.consumeFront('?'); 895 896 if (startsWithDigit(MangledName)) { 897 uint64_t Ret = MangledName[0] - '0' + 1; 898 MangledName = MangledName.dropFront(1); 899 return {Ret, IsNegative}; 900 } 901 902 uint64_t Ret = 0; 903 for (size_t i = 0; i < MangledName.size(); ++i) { 904 char C = MangledName[i]; 905 if (C == '@') { 906 MangledName = MangledName.dropFront(i + 1); 907 return {Ret, IsNegative}; 908 } 909 if ('A' <= C && C <= 'P') { 910 Ret = (Ret << 4) + (C - 'A'); 911 continue; 912 } 913 break; 914 } 915 916 Error = true; 917 return {0ULL, false}; 918 } 919 920 uint64_t Demangler::demangleUnsigned(StringView &MangledName) { 921 bool IsNegative = false; 922 uint64_t Number = 0; 923 std::tie(Number, IsNegative) = demangleNumber(MangledName); 924 if (IsNegative) 925 Error = true; 926 return Number; 927 } 928 929 int64_t Demangler::demangleSigned(StringView &MangledName) { 930 bool IsNegative = false; 931 uint64_t Number = 0; 932 std::tie(Number, IsNegative) = demangleNumber(MangledName); 933 if (Number > INT64_MAX) 934 Error = true; 935 int64_t I = static_cast<int64_t>(Number); 936 return IsNegative ? -I : I; 937 } 938 939 // First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9. 940 // Memorize it. 941 void Demangler::memorizeString(StringView S) { 942 if (Backrefs.NamesCount >= BackrefContext::Max) 943 return; 944 for (size_t i = 0; i < Backrefs.NamesCount; ++i) 945 if (S == Backrefs.Names[i]->Name) 946 return; 947 NamedIdentifierNode *N = Arena.alloc<NamedIdentifierNode>(); 948 N->Name = S; 949 Backrefs.Names[Backrefs.NamesCount++] = N; 950 } 951 952 NamedIdentifierNode *Demangler::demangleBackRefName(StringView &MangledName) { 953 assert(startsWithDigit(MangledName)); 954 955 size_t I = MangledName[0] - '0'; 956 if (I >= Backrefs.NamesCount) { 957 Error = true; 958 return nullptr; 959 } 960 961 MangledName = MangledName.dropFront(); 962 return Backrefs.Names[I]; 963 } 964 965 void Demangler::memorizeIdentifier(IdentifierNode *Identifier) { 966 // Render this class template name into a string buffer so that we can 967 // memorize it for the purpose of back-referencing. 968 OutputBuffer OB; 969 if (!initializeOutputBuffer(nullptr, nullptr, OB, 1024)) 970 // FIXME: Propagate out-of-memory as an error? 971 std::terminate(); 972 Identifier->output(OB, OF_Default); 973 OB << '\0'; 974 char *Name = OB.getBuffer(); 975 976 StringView Owned = copyString(Name); 977 memorizeString(Owned); 978 std::free(Name); 979 } 980 981 IdentifierNode * 982 Demangler::demangleTemplateInstantiationName(StringView &MangledName, 983 NameBackrefBehavior NBB) { 984 assert(MangledName.startsWith("?$")); 985 MangledName.consumeFront("?$"); 986 987 BackrefContext OuterContext; 988 std::swap(OuterContext, Backrefs); 989 990 IdentifierNode *Identifier = 991 demangleUnqualifiedSymbolName(MangledName, NBB_Simple); 992 if (!Error) 993 Identifier->TemplateParams = demangleTemplateParameterList(MangledName); 994 995 std::swap(OuterContext, Backrefs); 996 if (Error) 997 return nullptr; 998 999 if (NBB & NBB_Template) { 1000 // NBB_Template is only set for types and non-leaf names ("a::" in "a::b"). 1001 // Structors and conversion operators only makes sense in a leaf name, so 1002 // reject them in NBB_Template contexts. 1003 if (Identifier->kind() == NodeKind::ConversionOperatorIdentifier || 1004 Identifier->kind() == NodeKind::StructorIdentifier) { 1005 Error = true; 1006 return nullptr; 1007 } 1008 1009 memorizeIdentifier(Identifier); 1010 } 1011 1012 return Identifier; 1013 } 1014 1015 NamedIdentifierNode *Demangler::demangleSimpleName(StringView &MangledName, 1016 bool Memorize) { 1017 StringView S = demangleSimpleString(MangledName, Memorize); 1018 if (Error) 1019 return nullptr; 1020 1021 NamedIdentifierNode *Name = Arena.alloc<NamedIdentifierNode>(); 1022 Name->Name = S; 1023 return Name; 1024 } 1025 1026 static bool isRebasedHexDigit(char C) { return (C >= 'A' && C <= 'P'); } 1027 1028 static uint8_t rebasedHexDigitToNumber(char C) { 1029 assert(isRebasedHexDigit(C)); 1030 return (C <= 'J') ? (C - 'A') : (10 + C - 'K'); 1031 } 1032 1033 uint8_t Demangler::demangleCharLiteral(StringView &MangledName) { 1034 assert(!MangledName.empty()); 1035 if (!MangledName.startsWith('?')) 1036 return MangledName.popFront(); 1037 1038 MangledName = MangledName.dropFront(); 1039 if (MangledName.empty()) 1040 goto CharLiteralError; 1041 1042 if (MangledName.consumeFront('$')) { 1043 // Two hex digits 1044 if (MangledName.size() < 2) 1045 goto CharLiteralError; 1046 StringView Nibbles = MangledName.substr(0, 2); 1047 if (!isRebasedHexDigit(Nibbles[0]) || !isRebasedHexDigit(Nibbles[1])) 1048 goto CharLiteralError; 1049 // Don't append the null terminator. 1050 uint8_t C1 = rebasedHexDigitToNumber(Nibbles[0]); 1051 uint8_t C2 = rebasedHexDigitToNumber(Nibbles[1]); 1052 MangledName = MangledName.dropFront(2); 1053 return (C1 << 4) | C2; 1054 } 1055 1056 if (startsWithDigit(MangledName)) { 1057 const char *Lookup = ",/\\:. \n\t'-"; 1058 char C = Lookup[MangledName[0] - '0']; 1059 MangledName = MangledName.dropFront(); 1060 return C; 1061 } 1062 1063 if (MangledName[0] >= 'a' && MangledName[0] <= 'z') { 1064 char Lookup[26] = {'\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7', 1065 '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE', 1066 '\xEF', '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5', 1067 '\xF6', '\xF7', '\xF8', '\xF9', '\xFA'}; 1068 char C = Lookup[MangledName[0] - 'a']; 1069 MangledName = MangledName.dropFront(); 1070 return C; 1071 } 1072 1073 if (MangledName[0] >= 'A' && MangledName[0] <= 'Z') { 1074 char Lookup[26] = {'\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7', 1075 '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE', 1076 '\xCF', '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5', 1077 '\xD6', '\xD7', '\xD8', '\xD9', '\xDA'}; 1078 char C = Lookup[MangledName[0] - 'A']; 1079 MangledName = MangledName.dropFront(); 1080 return C; 1081 } 1082 1083 CharLiteralError: 1084 Error = true; 1085 return '\0'; 1086 } 1087 1088 wchar_t Demangler::demangleWcharLiteral(StringView &MangledName) { 1089 uint8_t C1, C2; 1090 1091 C1 = demangleCharLiteral(MangledName); 1092 if (Error || MangledName.empty()) 1093 goto WCharLiteralError; 1094 C2 = demangleCharLiteral(MangledName); 1095 if (Error) 1096 goto WCharLiteralError; 1097 1098 return ((wchar_t)C1 << 8) | (wchar_t)C2; 1099 1100 WCharLiteralError: 1101 Error = true; 1102 return L'\0'; 1103 } 1104 1105 static void writeHexDigit(char *Buffer, uint8_t Digit) { 1106 assert(Digit <= 15); 1107 *Buffer = (Digit < 10) ? ('0' + Digit) : ('A' + Digit - 10); 1108 } 1109 1110 static void outputHex(OutputBuffer &OB, unsigned C) { 1111 assert (C != 0); 1112 1113 // It's easier to do the math if we can work from right to left, but we need 1114 // to print the numbers from left to right. So render this into a temporary 1115 // buffer first, then output the temporary buffer. Each byte is of the form 1116 // \xAB, which means that each byte needs 4 characters. Since there are at 1117 // most 4 bytes, we need a 4*4+1 = 17 character temporary buffer. 1118 char TempBuffer[17]; 1119 1120 ::memset(TempBuffer, 0, sizeof(TempBuffer)); 1121 constexpr int MaxPos = sizeof(TempBuffer) - 1; 1122 1123 int Pos = MaxPos - 1; // TempBuffer[MaxPos] is the terminating \0. 1124 while (C != 0) { 1125 for (int I = 0; I < 2; ++I) { 1126 writeHexDigit(&TempBuffer[Pos--], C % 16); 1127 C /= 16; 1128 } 1129 } 1130 TempBuffer[Pos--] = 'x'; 1131 assert(Pos >= 0); 1132 TempBuffer[Pos--] = '\\'; 1133 OB << StringView(&TempBuffer[Pos + 1]); 1134 } 1135 1136 static void outputEscapedChar(OutputBuffer &OB, unsigned C) { 1137 switch (C) { 1138 case '\0': // nul 1139 OB << "\\0"; 1140 return; 1141 case '\'': // single quote 1142 OB << "\\\'"; 1143 return; 1144 case '\"': // double quote 1145 OB << "\\\""; 1146 return; 1147 case '\\': // backslash 1148 OB << "\\\\"; 1149 return; 1150 case '\a': // bell 1151 OB << "\\a"; 1152 return; 1153 case '\b': // backspace 1154 OB << "\\b"; 1155 return; 1156 case '\f': // form feed 1157 OB << "\\f"; 1158 return; 1159 case '\n': // new line 1160 OB << "\\n"; 1161 return; 1162 case '\r': // carriage return 1163 OB << "\\r"; 1164 return; 1165 case '\t': // tab 1166 OB << "\\t"; 1167 return; 1168 case '\v': // vertical tab 1169 OB << "\\v"; 1170 return; 1171 default: 1172 break; 1173 } 1174 1175 if (C > 0x1F && C < 0x7F) { 1176 // Standard ascii char. 1177 OB << (char)C; 1178 return; 1179 } 1180 1181 outputHex(OB, C); 1182 } 1183 1184 static unsigned countTrailingNullBytes(const uint8_t *StringBytes, int Length) { 1185 const uint8_t *End = StringBytes + Length - 1; 1186 unsigned Count = 0; 1187 while (Length > 0 && *End == 0) { 1188 --Length; 1189 --End; 1190 ++Count; 1191 } 1192 return Count; 1193 } 1194 1195 static unsigned countEmbeddedNulls(const uint8_t *StringBytes, 1196 unsigned Length) { 1197 unsigned Result = 0; 1198 for (unsigned I = 0; I < Length; ++I) { 1199 if (*StringBytes++ == 0) 1200 ++Result; 1201 } 1202 return Result; 1203 } 1204 1205 // A mangled (non-wide) string literal stores the total length of the string it 1206 // refers to (passed in NumBytes), and it contains up to 32 bytes of actual text 1207 // (passed in StringBytes, NumChars). 1208 static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars, 1209 uint64_t NumBytes) { 1210 assert(NumBytes > 0); 1211 1212 // If the number of bytes is odd, this is guaranteed to be a char string. 1213 if (NumBytes % 2 == 1) 1214 return 1; 1215 1216 // All strings can encode at most 32 bytes of data. If it's less than that, 1217 // then we encoded the entire string. In this case we check for a 1-byte, 1218 // 2-byte, or 4-byte null terminator. 1219 if (NumBytes < 32) { 1220 unsigned TrailingNulls = countTrailingNullBytes(StringBytes, NumChars); 1221 if (TrailingNulls >= 4 && NumBytes % 4 == 0) 1222 return 4; 1223 if (TrailingNulls >= 2) 1224 return 2; 1225 return 1; 1226 } 1227 1228 // The whole string was not able to be encoded. Try to look at embedded null 1229 // terminators to guess. The heuristic is that we count all embedded null 1230 // terminators. If more than 2/3 are null, it's a char32. If more than 1/3 1231 // are null, it's a char16. Otherwise it's a char8. This obviously isn't 1232 // perfect and is biased towards languages that have ascii alphabets, but this 1233 // was always going to be best effort since the encoding is lossy. 1234 unsigned Nulls = countEmbeddedNulls(StringBytes, NumChars); 1235 if (Nulls >= 2 * NumChars / 3 && NumBytes % 4 == 0) 1236 return 4; 1237 if (Nulls >= NumChars / 3) 1238 return 2; 1239 return 1; 1240 } 1241 1242 static unsigned decodeMultiByteChar(const uint8_t *StringBytes, 1243 unsigned CharIndex, unsigned CharBytes) { 1244 assert(CharBytes == 1 || CharBytes == 2 || CharBytes == 4); 1245 unsigned Offset = CharIndex * CharBytes; 1246 unsigned Result = 0; 1247 StringBytes = StringBytes + Offset; 1248 for (unsigned I = 0; I < CharBytes; ++I) { 1249 unsigned C = static_cast<unsigned>(StringBytes[I]); 1250 Result |= C << (8 * I); 1251 } 1252 return Result; 1253 } 1254 1255 FunctionSymbolNode *Demangler::demangleVcallThunkNode(StringView &MangledName) { 1256 FunctionSymbolNode *FSN = Arena.alloc<FunctionSymbolNode>(); 1257 VcallThunkIdentifierNode *VTIN = Arena.alloc<VcallThunkIdentifierNode>(); 1258 FSN->Signature = Arena.alloc<ThunkSignatureNode>(); 1259 FSN->Signature->FunctionClass = FC_NoParameterList; 1260 1261 FSN->Name = demangleNameScopeChain(MangledName, VTIN); 1262 if (!Error) 1263 Error = !MangledName.consumeFront("$B"); 1264 if (!Error) 1265 VTIN->OffsetInVTable = demangleUnsigned(MangledName); 1266 if (!Error) 1267 Error = !MangledName.consumeFront('A'); 1268 if (!Error) 1269 FSN->Signature->CallConvention = demangleCallingConvention(MangledName); 1270 return (Error) ? nullptr : FSN; 1271 } 1272 1273 EncodedStringLiteralNode * 1274 Demangler::demangleStringLiteral(StringView &MangledName) { 1275 // This function uses goto, so declare all variables up front. 1276 OutputBuffer OB; 1277 StringView CRC; 1278 uint64_t StringByteSize; 1279 bool IsWcharT = false; 1280 bool IsNegative = false; 1281 size_t CrcEndPos = 0; 1282 char *ResultBuffer = nullptr; 1283 1284 EncodedStringLiteralNode *Result = Arena.alloc<EncodedStringLiteralNode>(); 1285 1286 // Must happen before the first `goto StringLiteralError`. 1287 if (!initializeOutputBuffer(nullptr, nullptr, OB, 1024)) 1288 // FIXME: Propagate out-of-memory as an error? 1289 std::terminate(); 1290 1291 // Prefix indicating the beginning of a string literal 1292 if (!MangledName.consumeFront("@_")) 1293 goto StringLiteralError; 1294 if (MangledName.empty()) 1295 goto StringLiteralError; 1296 1297 // Char Type (regular or wchar_t) 1298 switch (MangledName.popFront()) { 1299 case '1': 1300 IsWcharT = true; 1301 DEMANGLE_FALLTHROUGH; 1302 case '0': 1303 break; 1304 default: 1305 goto StringLiteralError; 1306 } 1307 1308 // Encoded Length 1309 std::tie(StringByteSize, IsNegative) = demangleNumber(MangledName); 1310 if (Error || IsNegative || StringByteSize < (IsWcharT ? 2 : 1)) 1311 goto StringLiteralError; 1312 1313 // CRC 32 (always 8 characters plus a terminator) 1314 CrcEndPos = MangledName.find('@'); 1315 if (CrcEndPos == StringView::npos) 1316 goto StringLiteralError; 1317 CRC = MangledName.substr(0, CrcEndPos); 1318 MangledName = MangledName.dropFront(CrcEndPos + 1); 1319 if (MangledName.empty()) 1320 goto StringLiteralError; 1321 1322 if (IsWcharT) { 1323 Result->Char = CharKind::Wchar; 1324 if (StringByteSize > 64) 1325 Result->IsTruncated = true; 1326 1327 while (!MangledName.consumeFront('@')) { 1328 if (MangledName.size() < 2) 1329 goto StringLiteralError; 1330 wchar_t W = demangleWcharLiteral(MangledName); 1331 if (StringByteSize != 2 || Result->IsTruncated) 1332 outputEscapedChar(OB, W); 1333 StringByteSize -= 2; 1334 if (Error) 1335 goto StringLiteralError; 1336 } 1337 } else { 1338 // The max byte length is actually 32, but some compilers mangled strings 1339 // incorrectly, so we have to assume it can go higher. 1340 constexpr unsigned MaxStringByteLength = 32 * 4; 1341 uint8_t StringBytes[MaxStringByteLength]; 1342 1343 unsigned BytesDecoded = 0; 1344 while (!MangledName.consumeFront('@')) { 1345 if (MangledName.size() < 1 || BytesDecoded >= MaxStringByteLength) 1346 goto StringLiteralError; 1347 StringBytes[BytesDecoded++] = demangleCharLiteral(MangledName); 1348 } 1349 1350 if (StringByteSize > BytesDecoded) 1351 Result->IsTruncated = true; 1352 1353 unsigned CharBytes = 1354 guessCharByteSize(StringBytes, BytesDecoded, StringByteSize); 1355 assert(StringByteSize % CharBytes == 0); 1356 switch (CharBytes) { 1357 case 1: 1358 Result->Char = CharKind::Char; 1359 break; 1360 case 2: 1361 Result->Char = CharKind::Char16; 1362 break; 1363 case 4: 1364 Result->Char = CharKind::Char32; 1365 break; 1366 default: 1367 DEMANGLE_UNREACHABLE; 1368 } 1369 const unsigned NumChars = BytesDecoded / CharBytes; 1370 for (unsigned CharIndex = 0; CharIndex < NumChars; ++CharIndex) { 1371 unsigned NextChar = 1372 decodeMultiByteChar(StringBytes, CharIndex, CharBytes); 1373 if (CharIndex + 1 < NumChars || Result->IsTruncated) 1374 outputEscapedChar(OB, NextChar); 1375 } 1376 } 1377 1378 OB << '\0'; 1379 ResultBuffer = OB.getBuffer(); 1380 Result->DecodedString = copyString(ResultBuffer); 1381 std::free(ResultBuffer); 1382 return Result; 1383 1384 StringLiteralError: 1385 Error = true; 1386 std::free(OB.getBuffer()); 1387 return nullptr; 1388 } 1389 1390 // Returns MangledName's prefix before the first '@', or an error if 1391 // MangledName contains no '@' or the prefix has length 0. 1392 StringView Demangler::demangleSimpleString(StringView &MangledName, 1393 bool Memorize) { 1394 StringView S; 1395 for (size_t i = 0; i < MangledName.size(); ++i) { 1396 if (MangledName[i] != '@') 1397 continue; 1398 if (i == 0) 1399 break; 1400 S = MangledName.substr(0, i); 1401 MangledName = MangledName.dropFront(i + 1); 1402 1403 if (Memorize) 1404 memorizeString(S); 1405 return S; 1406 } 1407 1408 Error = true; 1409 return {}; 1410 } 1411 1412 NamedIdentifierNode * 1413 Demangler::demangleAnonymousNamespaceName(StringView &MangledName) { 1414 assert(MangledName.startsWith("?A")); 1415 MangledName.consumeFront("?A"); 1416 1417 NamedIdentifierNode *Node = Arena.alloc<NamedIdentifierNode>(); 1418 Node->Name = "`anonymous namespace'"; 1419 size_t EndPos = MangledName.find('@'); 1420 if (EndPos == StringView::npos) { 1421 Error = true; 1422 return nullptr; 1423 } 1424 StringView NamespaceKey = MangledName.substr(0, EndPos); 1425 memorizeString(NamespaceKey); 1426 MangledName = MangledName.substr(EndPos + 1); 1427 return Node; 1428 } 1429 1430 NamedIdentifierNode * 1431 Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) { 1432 assert(startsWithLocalScopePattern(MangledName)); 1433 1434 NamedIdentifierNode *Identifier = Arena.alloc<NamedIdentifierNode>(); 1435 MangledName.consumeFront('?'); 1436 uint64_t Number = 0; 1437 bool IsNegative = false; 1438 std::tie(Number, IsNegative) = demangleNumber(MangledName); 1439 assert(!IsNegative); 1440 1441 // One ? to terminate the number 1442 MangledName.consumeFront('?'); 1443 1444 assert(!Error); 1445 Node *Scope = parse(MangledName); 1446 if (Error) 1447 return nullptr; 1448 1449 // Render the parent symbol's name into a buffer. 1450 OutputBuffer OB; 1451 if (!initializeOutputBuffer(nullptr, nullptr, OB, 1024)) 1452 // FIXME: Propagate out-of-memory as an error? 1453 std::terminate(); 1454 OB << '`'; 1455 Scope->output(OB, OF_Default); 1456 OB << '\''; 1457 OB << "::`" << Number << "'"; 1458 OB << '\0'; 1459 char *Result = OB.getBuffer(); 1460 Identifier->Name = copyString(Result); 1461 std::free(Result); 1462 return Identifier; 1463 } 1464 1465 // Parses a type name in the form of A@B@C@@ which represents C::B::A. 1466 QualifiedNameNode * 1467 Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) { 1468 IdentifierNode *Identifier = 1469 demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true); 1470 if (Error) 1471 return nullptr; 1472 assert(Identifier); 1473 1474 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier); 1475 if (Error) 1476 return nullptr; 1477 assert(QN); 1478 return QN; 1479 } 1480 1481 // Parses a symbol name in the form of A@B@C@@ which represents C::B::A. 1482 // Symbol names have slightly different rules regarding what can appear 1483 // so we separate out the implementations for flexibility. 1484 QualifiedNameNode * 1485 Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) { 1486 // This is the final component of a symbol name (i.e. the leftmost component 1487 // of a mangled name. Since the only possible template instantiation that 1488 // can appear in this context is a function template, and since those are 1489 // not saved for the purposes of name backreferences, only backref simple 1490 // names. 1491 IdentifierNode *Identifier = 1492 demangleUnqualifiedSymbolName(MangledName, NBB_Simple); 1493 if (Error) 1494 return nullptr; 1495 1496 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier); 1497 if (Error) 1498 return nullptr; 1499 1500 if (Identifier->kind() == NodeKind::StructorIdentifier) { 1501 if (QN->Components->Count < 2) { 1502 Error = true; 1503 return nullptr; 1504 } 1505 StructorIdentifierNode *SIN = 1506 static_cast<StructorIdentifierNode *>(Identifier); 1507 Node *ClassNode = QN->Components->Nodes[QN->Components->Count - 2]; 1508 SIN->Class = static_cast<IdentifierNode *>(ClassNode); 1509 } 1510 assert(QN); 1511 return QN; 1512 } 1513 1514 IdentifierNode *Demangler::demangleUnqualifiedTypeName(StringView &MangledName, 1515 bool Memorize) { 1516 // An inner-most name can be a back-reference, because a fully-qualified name 1517 // (e.g. Scope + Inner) can contain other fully qualified names inside of 1518 // them (for example template parameters), and these nested parameters can 1519 // refer to previously mangled types. 1520 if (startsWithDigit(MangledName)) 1521 return demangleBackRefName(MangledName); 1522 1523 if (MangledName.startsWith("?$")) 1524 return demangleTemplateInstantiationName(MangledName, NBB_Template); 1525 1526 return demangleSimpleName(MangledName, Memorize); 1527 } 1528 1529 IdentifierNode * 1530 Demangler::demangleUnqualifiedSymbolName(StringView &MangledName, 1531 NameBackrefBehavior NBB) { 1532 if (startsWithDigit(MangledName)) 1533 return demangleBackRefName(MangledName); 1534 if (MangledName.startsWith("?$")) 1535 return demangleTemplateInstantiationName(MangledName, NBB); 1536 if (MangledName.startsWith('?')) 1537 return demangleFunctionIdentifierCode(MangledName); 1538 return demangleSimpleName(MangledName, /*Memorize=*/(NBB & NBB_Simple) != 0); 1539 } 1540 1541 IdentifierNode *Demangler::demangleNameScopePiece(StringView &MangledName) { 1542 if (startsWithDigit(MangledName)) 1543 return demangleBackRefName(MangledName); 1544 1545 if (MangledName.startsWith("?$")) 1546 return demangleTemplateInstantiationName(MangledName, NBB_Template); 1547 1548 if (MangledName.startsWith("?A")) 1549 return demangleAnonymousNamespaceName(MangledName); 1550 1551 if (startsWithLocalScopePattern(MangledName)) 1552 return demangleLocallyScopedNamePiece(MangledName); 1553 1554 return demangleSimpleName(MangledName, /*Memorize=*/true); 1555 } 1556 1557 static NodeArrayNode *nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head, 1558 size_t Count) { 1559 NodeArrayNode *N = Arena.alloc<NodeArrayNode>(); 1560 N->Count = Count; 1561 N->Nodes = Arena.allocArray<Node *>(Count); 1562 for (size_t I = 0; I < Count; ++I) { 1563 N->Nodes[I] = Head->N; 1564 Head = Head->Next; 1565 } 1566 return N; 1567 } 1568 1569 QualifiedNameNode * 1570 Demangler::demangleNameScopeChain(StringView &MangledName, 1571 IdentifierNode *UnqualifiedName) { 1572 NodeList *Head = Arena.alloc<NodeList>(); 1573 1574 Head->N = UnqualifiedName; 1575 1576 size_t Count = 1; 1577 while (!MangledName.consumeFront("@")) { 1578 ++Count; 1579 NodeList *NewHead = Arena.alloc<NodeList>(); 1580 NewHead->Next = Head; 1581 Head = NewHead; 1582 1583 if (MangledName.empty()) { 1584 Error = true; 1585 return nullptr; 1586 } 1587 1588 assert(!Error); 1589 IdentifierNode *Elem = demangleNameScopePiece(MangledName); 1590 if (Error) 1591 return nullptr; 1592 1593 Head->N = Elem; 1594 } 1595 1596 QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>(); 1597 QN->Components = nodeListToNodeArray(Arena, Head, Count); 1598 return QN; 1599 } 1600 1601 FuncClass Demangler::demangleFunctionClass(StringView &MangledName) { 1602 switch (MangledName.popFront()) { 1603 case '9': 1604 return FuncClass(FC_ExternC | FC_NoParameterList); 1605 case 'A': 1606 return FC_Private; 1607 case 'B': 1608 return FuncClass(FC_Private | FC_Far); 1609 case 'C': 1610 return FuncClass(FC_Private | FC_Static); 1611 case 'D': 1612 return FuncClass(FC_Private | FC_Static | FC_Far); 1613 case 'E': 1614 return FuncClass(FC_Private | FC_Virtual); 1615 case 'F': 1616 return FuncClass(FC_Private | FC_Virtual | FC_Far); 1617 case 'G': 1618 return FuncClass(FC_Private | FC_StaticThisAdjust); 1619 case 'H': 1620 return FuncClass(FC_Private | FC_StaticThisAdjust | FC_Far); 1621 case 'I': 1622 return FuncClass(FC_Protected); 1623 case 'J': 1624 return FuncClass(FC_Protected | FC_Far); 1625 case 'K': 1626 return FuncClass(FC_Protected | FC_Static); 1627 case 'L': 1628 return FuncClass(FC_Protected | FC_Static | FC_Far); 1629 case 'M': 1630 return FuncClass(FC_Protected | FC_Virtual); 1631 case 'N': 1632 return FuncClass(FC_Protected | FC_Virtual | FC_Far); 1633 case 'O': 1634 return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust); 1635 case 'P': 1636 return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust | FC_Far); 1637 case 'Q': 1638 return FuncClass(FC_Public); 1639 case 'R': 1640 return FuncClass(FC_Public | FC_Far); 1641 case 'S': 1642 return FuncClass(FC_Public | FC_Static); 1643 case 'T': 1644 return FuncClass(FC_Public | FC_Static | FC_Far); 1645 case 'U': 1646 return FuncClass(FC_Public | FC_Virtual); 1647 case 'V': 1648 return FuncClass(FC_Public | FC_Virtual | FC_Far); 1649 case 'W': 1650 return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust); 1651 case 'X': 1652 return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust | FC_Far); 1653 case 'Y': 1654 return FuncClass(FC_Global); 1655 case 'Z': 1656 return FuncClass(FC_Global | FC_Far); 1657 case '$': { 1658 FuncClass VFlag = FC_VirtualThisAdjust; 1659 if (MangledName.consumeFront('R')) 1660 VFlag = FuncClass(VFlag | FC_VirtualThisAdjustEx); 1661 if (MangledName.empty()) 1662 break; 1663 switch (MangledName.popFront()) { 1664 case '0': 1665 return FuncClass(FC_Private | FC_Virtual | VFlag); 1666 case '1': 1667 return FuncClass(FC_Private | FC_Virtual | VFlag | FC_Far); 1668 case '2': 1669 return FuncClass(FC_Protected | FC_Virtual | VFlag); 1670 case '3': 1671 return FuncClass(FC_Protected | FC_Virtual | VFlag | FC_Far); 1672 case '4': 1673 return FuncClass(FC_Public | FC_Virtual | VFlag); 1674 case '5': 1675 return FuncClass(FC_Public | FC_Virtual | VFlag | FC_Far); 1676 } 1677 } 1678 } 1679 1680 Error = true; 1681 return FC_Public; 1682 } 1683 1684 CallingConv Demangler::demangleCallingConvention(StringView &MangledName) { 1685 if (MangledName.empty()) { 1686 Error = true; 1687 return CallingConv::None; 1688 } 1689 1690 switch (MangledName.popFront()) { 1691 case 'A': 1692 case 'B': 1693 return CallingConv::Cdecl; 1694 case 'C': 1695 case 'D': 1696 return CallingConv::Pascal; 1697 case 'E': 1698 case 'F': 1699 return CallingConv::Thiscall; 1700 case 'G': 1701 case 'H': 1702 return CallingConv::Stdcall; 1703 case 'I': 1704 case 'J': 1705 return CallingConv::Fastcall; 1706 case 'M': 1707 case 'N': 1708 return CallingConv::Clrcall; 1709 case 'O': 1710 case 'P': 1711 return CallingConv::Eabi; 1712 case 'Q': 1713 return CallingConv::Vectorcall; 1714 case 'S': 1715 return CallingConv::Swift; 1716 case 'W': 1717 return CallingConv::SwiftAsync; 1718 } 1719 1720 return CallingConv::None; 1721 } 1722 1723 StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) { 1724 assert(MangledName.front() >= '0' && MangledName.front() <= '4'); 1725 1726 switch (MangledName.popFront()) { 1727 case '0': 1728 return StorageClass::PrivateStatic; 1729 case '1': 1730 return StorageClass::ProtectedStatic; 1731 case '2': 1732 return StorageClass::PublicStatic; 1733 case '3': 1734 return StorageClass::Global; 1735 case '4': 1736 return StorageClass::FunctionLocalStatic; 1737 } 1738 DEMANGLE_UNREACHABLE; 1739 } 1740 1741 std::pair<Qualifiers, bool> 1742 Demangler::demangleQualifiers(StringView &MangledName) { 1743 if (MangledName.empty()) { 1744 Error = true; 1745 return std::make_pair(Q_None, false); 1746 } 1747 1748 switch (MangledName.popFront()) { 1749 // Member qualifiers 1750 case 'Q': 1751 return std::make_pair(Q_None, true); 1752 case 'R': 1753 return std::make_pair(Q_Const, true); 1754 case 'S': 1755 return std::make_pair(Q_Volatile, true); 1756 case 'T': 1757 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), true); 1758 // Non-Member qualifiers 1759 case 'A': 1760 return std::make_pair(Q_None, false); 1761 case 'B': 1762 return std::make_pair(Q_Const, false); 1763 case 'C': 1764 return std::make_pair(Q_Volatile, false); 1765 case 'D': 1766 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), false); 1767 } 1768 Error = true; 1769 return std::make_pair(Q_None, false); 1770 } 1771 1772 // <variable-type> ::= <type> <cvr-qualifiers> 1773 // ::= <type> <pointee-cvr-qualifiers> # pointers, references 1774 TypeNode *Demangler::demangleType(StringView &MangledName, 1775 QualifierMangleMode QMM) { 1776 Qualifiers Quals = Q_None; 1777 bool IsMember = false; 1778 if (QMM == QualifierMangleMode::Mangle) { 1779 std::tie(Quals, IsMember) = demangleQualifiers(MangledName); 1780 } else if (QMM == QualifierMangleMode::Result) { 1781 if (MangledName.consumeFront('?')) 1782 std::tie(Quals, IsMember) = demangleQualifiers(MangledName); 1783 } 1784 1785 if (MangledName.empty()) { 1786 Error = true; 1787 return nullptr; 1788 } 1789 1790 TypeNode *Ty = nullptr; 1791 if (isTagType(MangledName)) 1792 Ty = demangleClassType(MangledName); 1793 else if (isPointerType(MangledName)) { 1794 if (isMemberPointer(MangledName, Error)) 1795 Ty = demangleMemberPointerType(MangledName); 1796 else if (!Error) 1797 Ty = demanglePointerType(MangledName); 1798 else 1799 return nullptr; 1800 } else if (isArrayType(MangledName)) 1801 Ty = demangleArrayType(MangledName); 1802 else if (isFunctionType(MangledName)) { 1803 if (MangledName.consumeFront("$$A8@@")) 1804 Ty = demangleFunctionType(MangledName, true); 1805 else { 1806 assert(MangledName.startsWith("$$A6")); 1807 MangledName.consumeFront("$$A6"); 1808 Ty = demangleFunctionType(MangledName, false); 1809 } 1810 } else if (isCustomType(MangledName)) { 1811 Ty = demangleCustomType(MangledName); 1812 } else { 1813 Ty = demanglePrimitiveType(MangledName); 1814 } 1815 1816 if (!Ty || Error) 1817 return Ty; 1818 Ty->Quals = Qualifiers(Ty->Quals | Quals); 1819 return Ty; 1820 } 1821 1822 bool Demangler::demangleThrowSpecification(StringView &MangledName) { 1823 if (MangledName.consumeFront("_E")) 1824 return true; 1825 if (MangledName.consumeFront('Z')) 1826 return false; 1827 1828 Error = true; 1829 return false; 1830 } 1831 1832 FunctionSignatureNode *Demangler::demangleFunctionType(StringView &MangledName, 1833 bool HasThisQuals) { 1834 FunctionSignatureNode *FTy = Arena.alloc<FunctionSignatureNode>(); 1835 1836 if (HasThisQuals) { 1837 FTy->Quals = demanglePointerExtQualifiers(MangledName); 1838 FTy->RefQualifier = demangleFunctionRefQualifier(MangledName); 1839 FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first); 1840 } 1841 1842 // Fields that appear on both member and non-member functions. 1843 FTy->CallConvention = demangleCallingConvention(MangledName); 1844 1845 // <return-type> ::= <type> 1846 // ::= @ # structors (they have no declared return type) 1847 bool IsStructor = MangledName.consumeFront('@'); 1848 if (!IsStructor) 1849 FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result); 1850 1851 FTy->Params = demangleFunctionParameterList(MangledName, FTy->IsVariadic); 1852 1853 FTy->IsNoexcept = demangleThrowSpecification(MangledName); 1854 1855 return FTy; 1856 } 1857 1858 FunctionSymbolNode * 1859 Demangler::demangleFunctionEncoding(StringView &MangledName) { 1860 FuncClass ExtraFlags = FC_None; 1861 if (MangledName.consumeFront("$$J0")) 1862 ExtraFlags = FC_ExternC; 1863 1864 if (MangledName.empty()) { 1865 Error = true; 1866 return nullptr; 1867 } 1868 1869 FuncClass FC = demangleFunctionClass(MangledName); 1870 FC = FuncClass(ExtraFlags | FC); 1871 1872 FunctionSignatureNode *FSN = nullptr; 1873 ThunkSignatureNode *TTN = nullptr; 1874 if (FC & FC_StaticThisAdjust) { 1875 TTN = Arena.alloc<ThunkSignatureNode>(); 1876 TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName); 1877 } else if (FC & FC_VirtualThisAdjust) { 1878 TTN = Arena.alloc<ThunkSignatureNode>(); 1879 if (FC & FC_VirtualThisAdjustEx) { 1880 TTN->ThisAdjust.VBPtrOffset = demangleSigned(MangledName); 1881 TTN->ThisAdjust.VBOffsetOffset = demangleSigned(MangledName); 1882 } 1883 TTN->ThisAdjust.VtordispOffset = demangleSigned(MangledName); 1884 TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName); 1885 } 1886 1887 if (FC & FC_NoParameterList) { 1888 // This is an extern "C" function whose full signature hasn't been mangled. 1889 // This happens when we need to mangle a local symbol inside of an extern 1890 // "C" function. 1891 FSN = Arena.alloc<FunctionSignatureNode>(); 1892 } else { 1893 bool HasThisQuals = !(FC & (FC_Global | FC_Static)); 1894 FSN = demangleFunctionType(MangledName, HasThisQuals); 1895 } 1896 1897 if (Error) 1898 return nullptr; 1899 1900 if (TTN) { 1901 *static_cast<FunctionSignatureNode *>(TTN) = *FSN; 1902 FSN = TTN; 1903 } 1904 FSN->FunctionClass = FC; 1905 1906 FunctionSymbolNode *Symbol = Arena.alloc<FunctionSymbolNode>(); 1907 Symbol->Signature = FSN; 1908 return Symbol; 1909 } 1910 1911 CustomTypeNode *Demangler::demangleCustomType(StringView &MangledName) { 1912 assert(MangledName.startsWith('?')); 1913 MangledName.popFront(); 1914 1915 CustomTypeNode *CTN = Arena.alloc<CustomTypeNode>(); 1916 CTN->Identifier = demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true); 1917 if (!MangledName.consumeFront('@')) 1918 Error = true; 1919 if (Error) 1920 return nullptr; 1921 return CTN; 1922 } 1923 1924 // Reads a primitive type. 1925 PrimitiveTypeNode *Demangler::demanglePrimitiveType(StringView &MangledName) { 1926 if (MangledName.consumeFront("$$T")) 1927 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Nullptr); 1928 1929 switch (MangledName.popFront()) { 1930 case 'X': 1931 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Void); 1932 case 'D': 1933 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char); 1934 case 'C': 1935 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Schar); 1936 case 'E': 1937 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uchar); 1938 case 'F': 1939 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Short); 1940 case 'G': 1941 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ushort); 1942 case 'H': 1943 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int); 1944 case 'I': 1945 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint); 1946 case 'J': 1947 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Long); 1948 case 'K': 1949 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ulong); 1950 case 'M': 1951 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Float); 1952 case 'N': 1953 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Double); 1954 case 'O': 1955 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ldouble); 1956 case '_': { 1957 if (MangledName.empty()) { 1958 Error = true; 1959 return nullptr; 1960 } 1961 switch (MangledName.popFront()) { 1962 case 'N': 1963 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Bool); 1964 case 'J': 1965 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int64); 1966 case 'K': 1967 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint64); 1968 case 'W': 1969 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Wchar); 1970 case 'Q': 1971 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char8); 1972 case 'S': 1973 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char16); 1974 case 'U': 1975 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char32); 1976 } 1977 break; 1978 } 1979 } 1980 Error = true; 1981 return nullptr; 1982 } 1983 1984 TagTypeNode *Demangler::demangleClassType(StringView &MangledName) { 1985 TagTypeNode *TT = nullptr; 1986 1987 switch (MangledName.popFront()) { 1988 case 'T': 1989 TT = Arena.alloc<TagTypeNode>(TagKind::Union); 1990 break; 1991 case 'U': 1992 TT = Arena.alloc<TagTypeNode>(TagKind::Struct); 1993 break; 1994 case 'V': 1995 TT = Arena.alloc<TagTypeNode>(TagKind::Class); 1996 break; 1997 case 'W': 1998 if (!MangledName.consumeFront('4')) { 1999 Error = true; 2000 return nullptr; 2001 } 2002 TT = Arena.alloc<TagTypeNode>(TagKind::Enum); 2003 break; 2004 default: 2005 assert(false); 2006 } 2007 2008 TT->QualifiedName = demangleFullyQualifiedTypeName(MangledName); 2009 return TT; 2010 } 2011 2012 // <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type> 2013 // # the E is required for 64-bit non-static pointers 2014 PointerTypeNode *Demangler::demanglePointerType(StringView &MangledName) { 2015 PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>(); 2016 2017 std::tie(Pointer->Quals, Pointer->Affinity) = 2018 demanglePointerCVQualifiers(MangledName); 2019 2020 if (MangledName.consumeFront("6")) { 2021 Pointer->Pointee = demangleFunctionType(MangledName, false); 2022 return Pointer; 2023 } 2024 2025 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName); 2026 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals); 2027 2028 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Mangle); 2029 return Pointer; 2030 } 2031 2032 PointerTypeNode *Demangler::demangleMemberPointerType(StringView &MangledName) { 2033 PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>(); 2034 2035 std::tie(Pointer->Quals, Pointer->Affinity) = 2036 demanglePointerCVQualifiers(MangledName); 2037 assert(Pointer->Affinity == PointerAffinity::Pointer); 2038 2039 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName); 2040 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals); 2041 2042 // isMemberPointer() only returns true if there is at least one character 2043 // after the qualifiers. 2044 if (MangledName.consumeFront("8")) { 2045 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName); 2046 Pointer->Pointee = demangleFunctionType(MangledName, true); 2047 } else { 2048 Qualifiers PointeeQuals = Q_None; 2049 bool IsMember = false; 2050 std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName); 2051 assert(IsMember || Error); 2052 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName); 2053 2054 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop); 2055 if (Pointer->Pointee) 2056 Pointer->Pointee->Quals = PointeeQuals; 2057 } 2058 2059 return Pointer; 2060 } 2061 2062 Qualifiers Demangler::demanglePointerExtQualifiers(StringView &MangledName) { 2063 Qualifiers Quals = Q_None; 2064 if (MangledName.consumeFront('E')) 2065 Quals = Qualifiers(Quals | Q_Pointer64); 2066 if (MangledName.consumeFront('I')) 2067 Quals = Qualifiers(Quals | Q_Restrict); 2068 if (MangledName.consumeFront('F')) 2069 Quals = Qualifiers(Quals | Q_Unaligned); 2070 2071 return Quals; 2072 } 2073 2074 ArrayTypeNode *Demangler::demangleArrayType(StringView &MangledName) { 2075 assert(MangledName.front() == 'Y'); 2076 MangledName.popFront(); 2077 2078 uint64_t Rank = 0; 2079 bool IsNegative = false; 2080 std::tie(Rank, IsNegative) = demangleNumber(MangledName); 2081 if (IsNegative || Rank == 0) { 2082 Error = true; 2083 return nullptr; 2084 } 2085 2086 ArrayTypeNode *ATy = Arena.alloc<ArrayTypeNode>(); 2087 NodeList *Head = Arena.alloc<NodeList>(); 2088 NodeList *Tail = Head; 2089 2090 for (uint64_t I = 0; I < Rank; ++I) { 2091 uint64_t D = 0; 2092 std::tie(D, IsNegative) = demangleNumber(MangledName); 2093 if (Error || IsNegative) { 2094 Error = true; 2095 return nullptr; 2096 } 2097 Tail->N = Arena.alloc<IntegerLiteralNode>(D, IsNegative); 2098 if (I + 1 < Rank) { 2099 Tail->Next = Arena.alloc<NodeList>(); 2100 Tail = Tail->Next; 2101 } 2102 } 2103 ATy->Dimensions = nodeListToNodeArray(Arena, Head, Rank); 2104 2105 if (MangledName.consumeFront("$$C")) { 2106 bool IsMember = false; 2107 std::tie(ATy->Quals, IsMember) = demangleQualifiers(MangledName); 2108 if (IsMember) { 2109 Error = true; 2110 return nullptr; 2111 } 2112 } 2113 2114 ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop); 2115 return ATy; 2116 } 2117 2118 // Reads a function's parameters. 2119 NodeArrayNode *Demangler::demangleFunctionParameterList(StringView &MangledName, 2120 bool &IsVariadic) { 2121 // Empty parameter list. 2122 if (MangledName.consumeFront('X')) 2123 return nullptr; 2124 2125 NodeList *Head = Arena.alloc<NodeList>(); 2126 NodeList **Current = &Head; 2127 size_t Count = 0; 2128 while (!Error && !MangledName.startsWith('@') && 2129 !MangledName.startsWith('Z')) { 2130 ++Count; 2131 2132 if (startsWithDigit(MangledName)) { 2133 size_t N = MangledName[0] - '0'; 2134 if (N >= Backrefs.FunctionParamCount) { 2135 Error = true; 2136 return nullptr; 2137 } 2138 MangledName = MangledName.dropFront(); 2139 2140 *Current = Arena.alloc<NodeList>(); 2141 (*Current)->N = Backrefs.FunctionParams[N]; 2142 Current = &(*Current)->Next; 2143 continue; 2144 } 2145 2146 size_t OldSize = MangledName.size(); 2147 2148 *Current = Arena.alloc<NodeList>(); 2149 TypeNode *TN = demangleType(MangledName, QualifierMangleMode::Drop); 2150 if (!TN || Error) 2151 return nullptr; 2152 2153 (*Current)->N = TN; 2154 2155 size_t CharsConsumed = OldSize - MangledName.size(); 2156 assert(CharsConsumed != 0); 2157 2158 // Single-letter types are ignored for backreferences because memorizing 2159 // them doesn't save anything. 2160 if (Backrefs.FunctionParamCount <= 9 && CharsConsumed > 1) 2161 Backrefs.FunctionParams[Backrefs.FunctionParamCount++] = TN; 2162 2163 Current = &(*Current)->Next; 2164 } 2165 2166 if (Error) 2167 return nullptr; 2168 2169 NodeArrayNode *NA = nodeListToNodeArray(Arena, Head, Count); 2170 // A non-empty parameter list is terminated by either 'Z' (variadic) parameter 2171 // list or '@' (non variadic). Careful not to consume "@Z", as in that case 2172 // the following Z could be a throw specifier. 2173 if (MangledName.consumeFront('@')) 2174 return NA; 2175 2176 if (MangledName.consumeFront('Z')) { 2177 IsVariadic = true; 2178 return NA; 2179 } 2180 2181 DEMANGLE_UNREACHABLE; 2182 } 2183 2184 NodeArrayNode * 2185 Demangler::demangleTemplateParameterList(StringView &MangledName) { 2186 NodeList *Head = nullptr; 2187 NodeList **Current = &Head; 2188 size_t Count = 0; 2189 2190 while (!MangledName.startsWith('@')) { 2191 if (MangledName.consumeFront("$S") || MangledName.consumeFront("$$V") || 2192 MangledName.consumeFront("$$$V") || MangledName.consumeFront("$$Z")) { 2193 // parameter pack separator 2194 continue; 2195 } 2196 2197 ++Count; 2198 2199 // Template parameter lists don't participate in back-referencing. 2200 *Current = Arena.alloc<NodeList>(); 2201 2202 NodeList &TP = **Current; 2203 2204 TemplateParameterReferenceNode *TPRN = nullptr; 2205 if (MangledName.consumeFront("$$Y")) { 2206 // Template alias 2207 TP.N = demangleFullyQualifiedTypeName(MangledName); 2208 } else if (MangledName.consumeFront("$$B")) { 2209 // Array 2210 TP.N = demangleType(MangledName, QualifierMangleMode::Drop); 2211 } else if (MangledName.consumeFront("$$C")) { 2212 // Type has qualifiers. 2213 TP.N = demangleType(MangledName, QualifierMangleMode::Mangle); 2214 } else if (MangledName.startsWith("$1") || MangledName.startsWith("$H") || 2215 MangledName.startsWith("$I") || MangledName.startsWith("$J")) { 2216 // Pointer to member 2217 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2218 TPRN->IsMemberPointer = true; 2219 2220 MangledName = MangledName.dropFront(); 2221 // 1 - single inheritance <name> 2222 // H - multiple inheritance <name> <number> 2223 // I - virtual inheritance <name> <number> <number> 2224 // J - unspecified inheritance <name> <number> <number> <number> 2225 char InheritanceSpecifier = MangledName.popFront(); 2226 SymbolNode *S = nullptr; 2227 if (MangledName.startsWith('?')) { 2228 S = parse(MangledName); 2229 if (Error || !S->Name) { 2230 Error = true; 2231 return nullptr; 2232 } 2233 memorizeIdentifier(S->Name->getUnqualifiedIdentifier()); 2234 } 2235 2236 switch (InheritanceSpecifier) { 2237 case 'J': 2238 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2239 demangleSigned(MangledName); 2240 DEMANGLE_FALLTHROUGH; 2241 case 'I': 2242 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2243 demangleSigned(MangledName); 2244 DEMANGLE_FALLTHROUGH; 2245 case 'H': 2246 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2247 demangleSigned(MangledName); 2248 DEMANGLE_FALLTHROUGH; 2249 case '1': 2250 break; 2251 default: 2252 DEMANGLE_UNREACHABLE; 2253 } 2254 TPRN->Affinity = PointerAffinity::Pointer; 2255 TPRN->Symbol = S; 2256 } else if (MangledName.startsWith("$E?")) { 2257 MangledName.consumeFront("$E"); 2258 // Reference to symbol 2259 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2260 TPRN->Symbol = parse(MangledName); 2261 TPRN->Affinity = PointerAffinity::Reference; 2262 } else if (MangledName.startsWith("$F") || MangledName.startsWith("$G")) { 2263 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2264 2265 // Data member pointer. 2266 MangledName = MangledName.dropFront(); 2267 char InheritanceSpecifier = MangledName.popFront(); 2268 2269 switch (InheritanceSpecifier) { 2270 case 'G': 2271 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2272 demangleSigned(MangledName); 2273 DEMANGLE_FALLTHROUGH; 2274 case 'F': 2275 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2276 demangleSigned(MangledName); 2277 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2278 demangleSigned(MangledName); 2279 break; 2280 default: 2281 DEMANGLE_UNREACHABLE; 2282 } 2283 TPRN->IsMemberPointer = true; 2284 2285 } else if (MangledName.consumeFront("$0")) { 2286 // Integral non-type template parameter 2287 bool IsNegative = false; 2288 uint64_t Value = 0; 2289 std::tie(Value, IsNegative) = demangleNumber(MangledName); 2290 2291 TP.N = Arena.alloc<IntegerLiteralNode>(Value, IsNegative); 2292 } else { 2293 TP.N = demangleType(MangledName, QualifierMangleMode::Drop); 2294 } 2295 if (Error) 2296 return nullptr; 2297 2298 Current = &TP.Next; 2299 } 2300 2301 // The loop above returns nullptr on Error. 2302 assert(!Error); 2303 2304 // Template parameter lists cannot be variadic, so it can only be terminated 2305 // by @ (as opposed to 'Z' in the function parameter case). 2306 assert(MangledName.startsWith('@')); // The above loop exits only on '@'. 2307 MangledName.consumeFront('@'); 2308 return nodeListToNodeArray(Arena, Head, Count); 2309 } 2310 2311 void Demangler::dumpBackReferences() { 2312 std::printf("%d function parameter backreferences\n", 2313 (int)Backrefs.FunctionParamCount); 2314 2315 // Create an output stream so we can render each type. 2316 OutputBuffer OB; 2317 if (!initializeOutputBuffer(nullptr, nullptr, OB, 1024)) 2318 std::terminate(); 2319 for (size_t I = 0; I < Backrefs.FunctionParamCount; ++I) { 2320 OB.setCurrentPosition(0); 2321 2322 TypeNode *T = Backrefs.FunctionParams[I]; 2323 T->output(OB, OF_Default); 2324 2325 std::printf(" [%d] - %.*s\n", (int)I, (int)OB.getCurrentPosition(), 2326 OB.getBuffer()); 2327 } 2328 std::free(OB.getBuffer()); 2329 2330 if (Backrefs.FunctionParamCount > 0) 2331 std::printf("\n"); 2332 std::printf("%d name backreferences\n", (int)Backrefs.NamesCount); 2333 for (size_t I = 0; I < Backrefs.NamesCount; ++I) { 2334 std::printf(" [%d] - %.*s\n", (int)I, (int)Backrefs.Names[I]->Name.size(), 2335 Backrefs.Names[I]->Name.begin()); 2336 } 2337 if (Backrefs.NamesCount > 0) 2338 std::printf("\n"); 2339 } 2340 2341 char *llvm::microsoftDemangle(const char *MangledName, size_t *NMangled, 2342 char *Buf, size_t *N, 2343 int *Status, MSDemangleFlags Flags) { 2344 Demangler D; 2345 OutputBuffer OB; 2346 2347 StringView Name{MangledName}; 2348 SymbolNode *AST = D.parse(Name); 2349 if (!D.Error && NMangled) 2350 *NMangled = Name.begin() - MangledName; 2351 2352 if (Flags & MSDF_DumpBackrefs) 2353 D.dumpBackReferences(); 2354 2355 OutputFlags OF = OF_Default; 2356 if (Flags & MSDF_NoCallingConvention) 2357 OF = OutputFlags(OF | OF_NoCallingConvention); 2358 if (Flags & MSDF_NoAccessSpecifier) 2359 OF = OutputFlags(OF | OF_NoAccessSpecifier); 2360 if (Flags & MSDF_NoReturnType) 2361 OF = OutputFlags(OF | OF_NoReturnType); 2362 if (Flags & MSDF_NoMemberType) 2363 OF = OutputFlags(OF | OF_NoMemberType); 2364 if (Flags & MSDF_NoVariableType) 2365 OF = OutputFlags(OF | OF_NoVariableType); 2366 2367 int InternalStatus = demangle_success; 2368 if (D.Error) 2369 InternalStatus = demangle_invalid_mangled_name; 2370 else if (!initializeOutputBuffer(Buf, N, OB, 1024)) 2371 InternalStatus = demangle_memory_alloc_failure; 2372 else { 2373 AST->output(OB, OF); 2374 OB += '\0'; 2375 if (N != nullptr) 2376 *N = OB.getCurrentPosition(); 2377 Buf = OB.getBuffer(); 2378 } 2379 2380 if (Status) 2381 *Status = InternalStatus; 2382 return InternalStatus == demangle_success ? Buf : nullptr; 2383 } 2384