1 //===- MicrosoftDemangle.cpp ----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a demangler for MSVC-style mangled symbols. 10 // 11 // This file has no dependencies on the rest of LLVM so that it can be 12 // easily reused in other programs such as libcxxabi. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/Demangle/MicrosoftDemangle.h" 17 #include "llvm/Demangle/Demangle.h" 18 #include "llvm/Demangle/MicrosoftDemangleNodes.h" 19 20 #include "llvm/Demangle/DemangleConfig.h" 21 #include "llvm/Demangle/StringView.h" 22 #include "llvm/Demangle/Utility.h" 23 24 #include <array> 25 #include <cctype> 26 #include <cstdio> 27 #include <tuple> 28 29 using namespace llvm; 30 using namespace ms_demangle; 31 32 static bool startsWithDigit(StringView S) { 33 return !S.empty() && std::isdigit(S.front()); 34 } 35 36 37 struct NodeList { 38 Node *N = nullptr; 39 NodeList *Next = nullptr; 40 }; 41 42 static bool isMemberPointer(StringView MangledName, bool &Error) { 43 Error = false; 44 switch (MangledName.popFront()) { 45 case '$': 46 // This is probably an rvalue reference (e.g. $$Q), and you cannot have an 47 // rvalue reference to a member. 48 return false; 49 case 'A': 50 // 'A' indicates a reference, and you cannot have a reference to a member 51 // function or member. 52 return false; 53 case 'P': 54 case 'Q': 55 case 'R': 56 case 'S': 57 // These 4 values indicate some kind of pointer, but we still don't know 58 // what. 59 break; 60 default: 61 // isMemberPointer() is called only if isPointerType() returns true, 62 // and it rejects other prefixes. 63 DEMANGLE_UNREACHABLE; 64 } 65 66 // If it starts with a number, then 6 indicates a non-member function 67 // pointer, and 8 indicates a member function pointer. 68 if (startsWithDigit(MangledName)) { 69 if (MangledName[0] != '6' && MangledName[0] != '8') { 70 Error = true; 71 return false; 72 } 73 return (MangledName[0] == '8'); 74 } 75 76 // Remove ext qualifiers since those can appear on either type and are 77 // therefore not indicative. 78 MangledName.consumeFront('E'); // 64-bit 79 MangledName.consumeFront('I'); // restrict 80 MangledName.consumeFront('F'); // unaligned 81 82 if (MangledName.empty()) { 83 Error = true; 84 return false; 85 } 86 87 // The next value should be either ABCD (non-member) or QRST (member). 88 switch (MangledName.front()) { 89 case 'A': 90 case 'B': 91 case 'C': 92 case 'D': 93 return false; 94 case 'Q': 95 case 'R': 96 case 'S': 97 case 'T': 98 return true; 99 default: 100 Error = true; 101 return false; 102 } 103 } 104 105 static SpecialIntrinsicKind 106 consumeSpecialIntrinsicKind(StringView &MangledName) { 107 if (MangledName.consumeFront("?_7")) 108 return SpecialIntrinsicKind::Vftable; 109 if (MangledName.consumeFront("?_8")) 110 return SpecialIntrinsicKind::Vbtable; 111 if (MangledName.consumeFront("?_9")) 112 return SpecialIntrinsicKind::VcallThunk; 113 if (MangledName.consumeFront("?_A")) 114 return SpecialIntrinsicKind::Typeof; 115 if (MangledName.consumeFront("?_B")) 116 return SpecialIntrinsicKind::LocalStaticGuard; 117 if (MangledName.consumeFront("?_C")) 118 return SpecialIntrinsicKind::StringLiteralSymbol; 119 if (MangledName.consumeFront("?_P")) 120 return SpecialIntrinsicKind::UdtReturning; 121 if (MangledName.consumeFront("?_R0")) 122 return SpecialIntrinsicKind::RttiTypeDescriptor; 123 if (MangledName.consumeFront("?_R1")) 124 return SpecialIntrinsicKind::RttiBaseClassDescriptor; 125 if (MangledName.consumeFront("?_R2")) 126 return SpecialIntrinsicKind::RttiBaseClassArray; 127 if (MangledName.consumeFront("?_R3")) 128 return SpecialIntrinsicKind::RttiClassHierarchyDescriptor; 129 if (MangledName.consumeFront("?_R4")) 130 return SpecialIntrinsicKind::RttiCompleteObjLocator; 131 if (MangledName.consumeFront("?_S")) 132 return SpecialIntrinsicKind::LocalVftable; 133 if (MangledName.consumeFront("?__E")) 134 return SpecialIntrinsicKind::DynamicInitializer; 135 if (MangledName.consumeFront("?__F")) 136 return SpecialIntrinsicKind::DynamicAtexitDestructor; 137 if (MangledName.consumeFront("?__J")) 138 return SpecialIntrinsicKind::LocalStaticThreadGuard; 139 return SpecialIntrinsicKind::None; 140 } 141 142 static bool startsWithLocalScopePattern(StringView S) { 143 if (!S.consumeFront('?')) 144 return false; 145 146 size_t End = S.find('?'); 147 if (End == StringView::npos) 148 return false; 149 StringView Candidate = S.substr(0, End); 150 if (Candidate.empty()) 151 return false; 152 153 // \?[0-9]\? 154 // ?@? is the discriminator 0. 155 if (Candidate.size() == 1) 156 return Candidate[0] == '@' || (Candidate[0] >= '0' && Candidate[0] <= '9'); 157 158 // If it's not 0-9, then it's an encoded number terminated with an @ 159 if (Candidate.back() != '@') 160 return false; 161 Candidate = Candidate.dropBack(); 162 163 // An encoded number starts with B-P and all subsequent digits are in A-P. 164 // Note that the reason the first digit cannot be A is two fold. First, it 165 // would create an ambiguity with ?A which delimits the beginning of an 166 // anonymous namespace. Second, A represents 0, and you don't start a multi 167 // digit number with a leading 0. Presumably the anonymous namespace 168 // ambiguity is also why single digit encoded numbers use 0-9 rather than A-J. 169 if (Candidate[0] < 'B' || Candidate[0] > 'P') 170 return false; 171 Candidate = Candidate.dropFront(); 172 while (!Candidate.empty()) { 173 if (Candidate[0] < 'A' || Candidate[0] > 'P') 174 return false; 175 Candidate = Candidate.dropFront(); 176 } 177 178 return true; 179 } 180 181 static bool isTagType(StringView S) { 182 switch (S.front()) { 183 case 'T': // union 184 case 'U': // struct 185 case 'V': // class 186 case 'W': // enum 187 return true; 188 } 189 return false; 190 } 191 192 static bool isCustomType(StringView S) { return S[0] == '?'; } 193 194 static bool isPointerType(StringView S) { 195 if (S.startsWith("$$Q")) // foo && 196 return true; 197 198 switch (S.front()) { 199 case 'A': // foo & 200 case 'P': // foo * 201 case 'Q': // foo *const 202 case 'R': // foo *volatile 203 case 'S': // foo *const volatile 204 return true; 205 } 206 return false; 207 } 208 209 static bool isArrayType(StringView S) { return S[0] == 'Y'; } 210 211 static bool isFunctionType(StringView S) { 212 return S.startsWith("$$A8@@") || S.startsWith("$$A6"); 213 } 214 215 static FunctionRefQualifier 216 demangleFunctionRefQualifier(StringView &MangledName) { 217 if (MangledName.consumeFront('G')) 218 return FunctionRefQualifier::Reference; 219 else if (MangledName.consumeFront('H')) 220 return FunctionRefQualifier::RValueReference; 221 return FunctionRefQualifier::None; 222 } 223 224 static std::pair<Qualifiers, PointerAffinity> 225 demanglePointerCVQualifiers(StringView &MangledName) { 226 if (MangledName.consumeFront("$$Q")) 227 return std::make_pair(Q_None, PointerAffinity::RValueReference); 228 229 switch (MangledName.popFront()) { 230 case 'A': 231 return std::make_pair(Q_None, PointerAffinity::Reference); 232 case 'P': 233 return std::make_pair(Q_None, PointerAffinity::Pointer); 234 case 'Q': 235 return std::make_pair(Q_Const, PointerAffinity::Pointer); 236 case 'R': 237 return std::make_pair(Q_Volatile, PointerAffinity::Pointer); 238 case 'S': 239 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), 240 PointerAffinity::Pointer); 241 } 242 // This function is only called if isPointerType() returns true, 243 // and it only returns true for the six cases listed above. 244 DEMANGLE_UNREACHABLE; 245 } 246 247 StringView Demangler::copyString(StringView Borrowed) { 248 char *Stable = Arena.allocUnalignedBuffer(Borrowed.size()); 249 // This is not a micro-optimization, it avoids UB, should Borrowed be an null 250 // buffer. 251 if (Borrowed.size()) 252 std::memcpy(Stable, Borrowed.begin(), Borrowed.size()); 253 254 return {Stable, Borrowed.size()}; 255 } 256 257 SpecialTableSymbolNode * 258 Demangler::demangleSpecialTableSymbolNode(StringView &MangledName, 259 SpecialIntrinsicKind K) { 260 NamedIdentifierNode *NI = Arena.alloc<NamedIdentifierNode>(); 261 switch (K) { 262 case SpecialIntrinsicKind::Vftable: 263 NI->Name = "`vftable'"; 264 break; 265 case SpecialIntrinsicKind::Vbtable: 266 NI->Name = "`vbtable'"; 267 break; 268 case SpecialIntrinsicKind::LocalVftable: 269 NI->Name = "`local vftable'"; 270 break; 271 case SpecialIntrinsicKind::RttiCompleteObjLocator: 272 NI->Name = "`RTTI Complete Object Locator'"; 273 break; 274 default: 275 DEMANGLE_UNREACHABLE; 276 } 277 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI); 278 SpecialTableSymbolNode *STSN = Arena.alloc<SpecialTableSymbolNode>(); 279 STSN->Name = QN; 280 bool IsMember = false; 281 if (MangledName.empty()) { 282 Error = true; 283 return nullptr; 284 } 285 char Front = MangledName.popFront(); 286 if (Front != '6' && Front != '7') { 287 Error = true; 288 return nullptr; 289 } 290 291 std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName); 292 if (!MangledName.consumeFront('@')) 293 STSN->TargetName = demangleFullyQualifiedTypeName(MangledName); 294 return STSN; 295 } 296 297 LocalStaticGuardVariableNode * 298 Demangler::demangleLocalStaticGuard(StringView &MangledName, bool IsThread) { 299 LocalStaticGuardIdentifierNode *LSGI = 300 Arena.alloc<LocalStaticGuardIdentifierNode>(); 301 LSGI->IsThread = IsThread; 302 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, LSGI); 303 LocalStaticGuardVariableNode *LSGVN = 304 Arena.alloc<LocalStaticGuardVariableNode>(); 305 LSGVN->Name = QN; 306 307 if (MangledName.consumeFront("4IA")) 308 LSGVN->IsVisible = false; 309 else if (MangledName.consumeFront("5")) 310 LSGVN->IsVisible = true; 311 else { 312 Error = true; 313 return nullptr; 314 } 315 316 if (!MangledName.empty()) 317 LSGI->ScopeIndex = demangleUnsigned(MangledName); 318 return LSGVN; 319 } 320 321 static NamedIdentifierNode *synthesizeNamedIdentifier(ArenaAllocator &Arena, 322 StringView Name) { 323 NamedIdentifierNode *Id = Arena.alloc<NamedIdentifierNode>(); 324 Id->Name = Name; 325 return Id; 326 } 327 328 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena, 329 IdentifierNode *Identifier) { 330 QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>(); 331 QN->Components = Arena.alloc<NodeArrayNode>(); 332 QN->Components->Count = 1; 333 QN->Components->Nodes = Arena.allocArray<Node *>(1); 334 QN->Components->Nodes[0] = Identifier; 335 return QN; 336 } 337 338 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena, 339 StringView Name) { 340 NamedIdentifierNode *Id = synthesizeNamedIdentifier(Arena, Name); 341 return synthesizeQualifiedName(Arena, Id); 342 } 343 344 static VariableSymbolNode *synthesizeVariable(ArenaAllocator &Arena, 345 TypeNode *Type, 346 StringView VariableName) { 347 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 348 VSN->Type = Type; 349 VSN->Name = synthesizeQualifiedName(Arena, VariableName); 350 return VSN; 351 } 352 353 VariableSymbolNode *Demangler::demangleUntypedVariable( 354 ArenaAllocator &Arena, StringView &MangledName, StringView VariableName) { 355 NamedIdentifierNode *NI = synthesizeNamedIdentifier(Arena, VariableName); 356 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI); 357 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 358 VSN->Name = QN; 359 if (MangledName.consumeFront("8")) 360 return VSN; 361 362 Error = true; 363 return nullptr; 364 } 365 366 VariableSymbolNode * 367 Demangler::demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena, 368 StringView &MangledName) { 369 RttiBaseClassDescriptorNode *RBCDN = 370 Arena.alloc<RttiBaseClassDescriptorNode>(); 371 RBCDN->NVOffset = demangleUnsigned(MangledName); 372 RBCDN->VBPtrOffset = demangleSigned(MangledName); 373 RBCDN->VBTableOffset = demangleUnsigned(MangledName); 374 RBCDN->Flags = demangleUnsigned(MangledName); 375 if (Error) 376 return nullptr; 377 378 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 379 VSN->Name = demangleNameScopeChain(MangledName, RBCDN); 380 MangledName.consumeFront('8'); 381 return VSN; 382 } 383 384 FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName, 385 bool IsDestructor) { 386 DynamicStructorIdentifierNode *DSIN = 387 Arena.alloc<DynamicStructorIdentifierNode>(); 388 DSIN->IsDestructor = IsDestructor; 389 390 bool IsKnownStaticDataMember = false; 391 if (MangledName.consumeFront('?')) 392 IsKnownStaticDataMember = true; 393 394 SymbolNode *Symbol = demangleDeclarator(MangledName); 395 if (Error) 396 return nullptr; 397 398 FunctionSymbolNode *FSN = nullptr; 399 400 if (Symbol->kind() == NodeKind::VariableSymbol) { 401 DSIN->Variable = static_cast<VariableSymbolNode *>(Symbol); 402 403 // Older versions of clang mangled this type of symbol incorrectly. They 404 // would omit the leading ? and they would only emit a single @ at the end. 405 // The correct mangling is a leading ? and 2 trailing @ signs. Handle 406 // both cases. 407 int AtCount = IsKnownStaticDataMember ? 2 : 1; 408 for (int I = 0; I < AtCount; ++I) { 409 if (MangledName.consumeFront('@')) 410 continue; 411 Error = true; 412 return nullptr; 413 } 414 415 FSN = demangleFunctionEncoding(MangledName); 416 if (FSN) 417 FSN->Name = synthesizeQualifiedName(Arena, DSIN); 418 } else { 419 if (IsKnownStaticDataMember) { 420 // This was supposed to be a static data member, but we got a function. 421 Error = true; 422 return nullptr; 423 } 424 425 FSN = static_cast<FunctionSymbolNode *>(Symbol); 426 DSIN->Name = Symbol->Name; 427 FSN->Name = synthesizeQualifiedName(Arena, DSIN); 428 } 429 430 return FSN; 431 } 432 433 SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) { 434 SpecialIntrinsicKind SIK = consumeSpecialIntrinsicKind(MangledName); 435 436 switch (SIK) { 437 case SpecialIntrinsicKind::None: 438 return nullptr; 439 case SpecialIntrinsicKind::StringLiteralSymbol: 440 return demangleStringLiteral(MangledName); 441 case SpecialIntrinsicKind::Vftable: 442 case SpecialIntrinsicKind::Vbtable: 443 case SpecialIntrinsicKind::LocalVftable: 444 case SpecialIntrinsicKind::RttiCompleteObjLocator: 445 return demangleSpecialTableSymbolNode(MangledName, SIK); 446 case SpecialIntrinsicKind::VcallThunk: 447 return demangleVcallThunkNode(MangledName); 448 case SpecialIntrinsicKind::LocalStaticGuard: 449 return demangleLocalStaticGuard(MangledName, /*IsThread=*/false); 450 case SpecialIntrinsicKind::LocalStaticThreadGuard: 451 return demangleLocalStaticGuard(MangledName, /*IsThread=*/true); 452 case SpecialIntrinsicKind::RttiTypeDescriptor: { 453 TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result); 454 if (Error) 455 break; 456 if (!MangledName.consumeFront("@8")) 457 break; 458 if (!MangledName.empty()) 459 break; 460 return synthesizeVariable(Arena, T, "`RTTI Type Descriptor'"); 461 } 462 case SpecialIntrinsicKind::RttiBaseClassArray: 463 return demangleUntypedVariable(Arena, MangledName, 464 "`RTTI Base Class Array'"); 465 case SpecialIntrinsicKind::RttiClassHierarchyDescriptor: 466 return demangleUntypedVariable(Arena, MangledName, 467 "`RTTI Class Hierarchy Descriptor'"); 468 case SpecialIntrinsicKind::RttiBaseClassDescriptor: 469 return demangleRttiBaseClassDescriptorNode(Arena, MangledName); 470 case SpecialIntrinsicKind::DynamicInitializer: 471 return demangleInitFiniStub(MangledName, /*IsDestructor=*/false); 472 case SpecialIntrinsicKind::DynamicAtexitDestructor: 473 return demangleInitFiniStub(MangledName, /*IsDestructor=*/true); 474 case SpecialIntrinsicKind::Typeof: 475 case SpecialIntrinsicKind::UdtReturning: 476 // It's unclear which tools produces these manglings, so demangling 477 // support is not (yet?) implemented. 478 break; 479 case SpecialIntrinsicKind::Unknown: 480 DEMANGLE_UNREACHABLE; // Never returned by consumeSpecialIntrinsicKind. 481 } 482 Error = true; 483 return nullptr; 484 } 485 486 IdentifierNode * 487 Demangler::demangleFunctionIdentifierCode(StringView &MangledName) { 488 assert(MangledName.startsWith('?')); 489 MangledName = MangledName.dropFront(); 490 if (MangledName.empty()) { 491 Error = true; 492 return nullptr; 493 } 494 495 if (MangledName.consumeFront("__")) 496 return demangleFunctionIdentifierCode( 497 MangledName, FunctionIdentifierCodeGroup::DoubleUnder); 498 if (MangledName.consumeFront("_")) 499 return demangleFunctionIdentifierCode(MangledName, 500 FunctionIdentifierCodeGroup::Under); 501 return demangleFunctionIdentifierCode(MangledName, 502 FunctionIdentifierCodeGroup::Basic); 503 } 504 505 StructorIdentifierNode * 506 Demangler::demangleStructorIdentifier(StringView &MangledName, 507 bool IsDestructor) { 508 StructorIdentifierNode *N = Arena.alloc<StructorIdentifierNode>(); 509 N->IsDestructor = IsDestructor; 510 return N; 511 } 512 513 ConversionOperatorIdentifierNode * 514 Demangler::demangleConversionOperatorIdentifier(StringView &MangledName) { 515 ConversionOperatorIdentifierNode *N = 516 Arena.alloc<ConversionOperatorIdentifierNode>(); 517 return N; 518 } 519 520 LiteralOperatorIdentifierNode * 521 Demangler::demangleLiteralOperatorIdentifier(StringView &MangledName) { 522 LiteralOperatorIdentifierNode *N = 523 Arena.alloc<LiteralOperatorIdentifierNode>(); 524 N->Name = demangleSimpleString(MangledName, /*Memorize=*/false); 525 return N; 526 } 527 528 IntrinsicFunctionKind 529 Demangler::translateIntrinsicFunctionCode(char CH, 530 FunctionIdentifierCodeGroup Group) { 531 using IFK = IntrinsicFunctionKind; 532 if (!(CH >= '0' && CH <= '9') && !(CH >= 'A' && CH <= 'Z')) { 533 Error = true; 534 return IFK::None; 535 } 536 537 // Not all ? identifiers are intrinsics *functions*. This function only maps 538 // operator codes for the special functions, all others are handled elsewhere, 539 // hence the IFK::None entries in the table. 540 static IFK Basic[36] = { 541 IFK::None, // ?0 # Foo::Foo() 542 IFK::None, // ?1 # Foo::~Foo() 543 IFK::New, // ?2 # operator new 544 IFK::Delete, // ?3 # operator delete 545 IFK::Assign, // ?4 # operator= 546 IFK::RightShift, // ?5 # operator>> 547 IFK::LeftShift, // ?6 # operator<< 548 IFK::LogicalNot, // ?7 # operator! 549 IFK::Equals, // ?8 # operator== 550 IFK::NotEquals, // ?9 # operator!= 551 IFK::ArraySubscript, // ?A # operator[] 552 IFK::None, // ?B # Foo::operator <type>() 553 IFK::Pointer, // ?C # operator-> 554 IFK::Dereference, // ?D # operator* 555 IFK::Increment, // ?E # operator++ 556 IFK::Decrement, // ?F # operator-- 557 IFK::Minus, // ?G # operator- 558 IFK::Plus, // ?H # operator+ 559 IFK::BitwiseAnd, // ?I # operator& 560 IFK::MemberPointer, // ?J # operator->* 561 IFK::Divide, // ?K # operator/ 562 IFK::Modulus, // ?L # operator% 563 IFK::LessThan, // ?M operator< 564 IFK::LessThanEqual, // ?N operator<= 565 IFK::GreaterThan, // ?O operator> 566 IFK::GreaterThanEqual, // ?P operator>= 567 IFK::Comma, // ?Q operator, 568 IFK::Parens, // ?R operator() 569 IFK::BitwiseNot, // ?S operator~ 570 IFK::BitwiseXor, // ?T operator^ 571 IFK::BitwiseOr, // ?U operator| 572 IFK::LogicalAnd, // ?V operator&& 573 IFK::LogicalOr, // ?W operator|| 574 IFK::TimesEqual, // ?X operator*= 575 IFK::PlusEqual, // ?Y operator+= 576 IFK::MinusEqual, // ?Z operator-= 577 }; 578 static IFK Under[36] = { 579 IFK::DivEqual, // ?_0 operator/= 580 IFK::ModEqual, // ?_1 operator%= 581 IFK::RshEqual, // ?_2 operator>>= 582 IFK::LshEqual, // ?_3 operator<<= 583 IFK::BitwiseAndEqual, // ?_4 operator&= 584 IFK::BitwiseOrEqual, // ?_5 operator|= 585 IFK::BitwiseXorEqual, // ?_6 operator^= 586 IFK::None, // ?_7 # vftable 587 IFK::None, // ?_8 # vbtable 588 IFK::None, // ?_9 # vcall 589 IFK::None, // ?_A # typeof 590 IFK::None, // ?_B # local static guard 591 IFK::None, // ?_C # string literal 592 IFK::VbaseDtor, // ?_D # vbase destructor 593 IFK::VecDelDtor, // ?_E # vector deleting destructor 594 IFK::DefaultCtorClosure, // ?_F # default constructor closure 595 IFK::ScalarDelDtor, // ?_G # scalar deleting destructor 596 IFK::VecCtorIter, // ?_H # vector constructor iterator 597 IFK::VecDtorIter, // ?_I # vector destructor iterator 598 IFK::VecVbaseCtorIter, // ?_J # vector vbase constructor iterator 599 IFK::VdispMap, // ?_K # virtual displacement map 600 IFK::EHVecCtorIter, // ?_L # eh vector constructor iterator 601 IFK::EHVecDtorIter, // ?_M # eh vector destructor iterator 602 IFK::EHVecVbaseCtorIter, // ?_N # eh vector vbase constructor iterator 603 IFK::CopyCtorClosure, // ?_O # copy constructor closure 604 IFK::None, // ?_P<name> # udt returning <name> 605 IFK::None, // ?_Q # <unknown> 606 IFK::None, // ?_R0 - ?_R4 # RTTI Codes 607 IFK::None, // ?_S # local vftable 608 IFK::LocalVftableCtorClosure, // ?_T # local vftable constructor closure 609 IFK::ArrayNew, // ?_U operator new[] 610 IFK::ArrayDelete, // ?_V operator delete[] 611 IFK::None, // ?_W <unused> 612 IFK::None, // ?_X <unused> 613 IFK::None, // ?_Y <unused> 614 IFK::None, // ?_Z <unused> 615 }; 616 static IFK DoubleUnder[36] = { 617 IFK::None, // ?__0 <unused> 618 IFK::None, // ?__1 <unused> 619 IFK::None, // ?__2 <unused> 620 IFK::None, // ?__3 <unused> 621 IFK::None, // ?__4 <unused> 622 IFK::None, // ?__5 <unused> 623 IFK::None, // ?__6 <unused> 624 IFK::None, // ?__7 <unused> 625 IFK::None, // ?__8 <unused> 626 IFK::None, // ?__9 <unused> 627 IFK::ManVectorCtorIter, // ?__A managed vector ctor iterator 628 IFK::ManVectorDtorIter, // ?__B managed vector dtor iterator 629 IFK::EHVectorCopyCtorIter, // ?__C EH vector copy ctor iterator 630 IFK::EHVectorVbaseCopyCtorIter, // ?__D EH vector vbase copy ctor iter 631 IFK::None, // ?__E dynamic initializer for `T' 632 IFK::None, // ?__F dynamic atexit destructor for `T' 633 IFK::VectorCopyCtorIter, // ?__G vector copy constructor iter 634 IFK::VectorVbaseCopyCtorIter, // ?__H vector vbase copy ctor iter 635 IFK::ManVectorVbaseCopyCtorIter, // ?__I managed vector vbase copy ctor 636 // iter 637 IFK::None, // ?__J local static thread guard 638 IFK::None, // ?__K operator ""_name 639 IFK::CoAwait, // ?__L operator co_await 640 IFK::Spaceship, // ?__M operator<=> 641 IFK::None, // ?__N <unused> 642 IFK::None, // ?__O <unused> 643 IFK::None, // ?__P <unused> 644 IFK::None, // ?__Q <unused> 645 IFK::None, // ?__R <unused> 646 IFK::None, // ?__S <unused> 647 IFK::None, // ?__T <unused> 648 IFK::None, // ?__U <unused> 649 IFK::None, // ?__V <unused> 650 IFK::None, // ?__W <unused> 651 IFK::None, // ?__X <unused> 652 IFK::None, // ?__Y <unused> 653 IFK::None, // ?__Z <unused> 654 }; 655 656 int Index = (CH >= '0' && CH <= '9') ? (CH - '0') : (CH - 'A' + 10); 657 switch (Group) { 658 case FunctionIdentifierCodeGroup::Basic: 659 return Basic[Index]; 660 case FunctionIdentifierCodeGroup::Under: 661 return Under[Index]; 662 case FunctionIdentifierCodeGroup::DoubleUnder: 663 return DoubleUnder[Index]; 664 } 665 DEMANGLE_UNREACHABLE; 666 } 667 668 IdentifierNode * 669 Demangler::demangleFunctionIdentifierCode(StringView &MangledName, 670 FunctionIdentifierCodeGroup Group) { 671 if (MangledName.empty()) { 672 Error = true; 673 return nullptr; 674 } 675 switch (Group) { 676 case FunctionIdentifierCodeGroup::Basic: 677 switch (char CH = MangledName.popFront()) { 678 case '0': 679 case '1': 680 return demangleStructorIdentifier(MangledName, CH == '1'); 681 case 'B': 682 return demangleConversionOperatorIdentifier(MangledName); 683 default: 684 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 685 translateIntrinsicFunctionCode(CH, Group)); 686 } 687 case FunctionIdentifierCodeGroup::Under: 688 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 689 translateIntrinsicFunctionCode(MangledName.popFront(), Group)); 690 case FunctionIdentifierCodeGroup::DoubleUnder: 691 switch (char CH = MangledName.popFront()) { 692 case 'K': 693 return demangleLiteralOperatorIdentifier(MangledName); 694 default: 695 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 696 translateIntrinsicFunctionCode(CH, Group)); 697 } 698 } 699 700 DEMANGLE_UNREACHABLE; 701 } 702 703 SymbolNode *Demangler::demangleEncodedSymbol(StringView &MangledName, 704 QualifiedNameNode *Name) { 705 if (MangledName.empty()) { 706 Error = true; 707 return nullptr; 708 } 709 710 // Read a variable. 711 switch (MangledName.front()) { 712 case '0': 713 case '1': 714 case '2': 715 case '3': 716 case '4': { 717 StorageClass SC = demangleVariableStorageClass(MangledName); 718 return demangleVariableEncoding(MangledName, SC); 719 } 720 } 721 FunctionSymbolNode *FSN = demangleFunctionEncoding(MangledName); 722 723 IdentifierNode *UQN = Name->getUnqualifiedIdentifier(); 724 if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) { 725 ConversionOperatorIdentifierNode *COIN = 726 static_cast<ConversionOperatorIdentifierNode *>(UQN); 727 if (FSN) 728 COIN->TargetType = FSN->Signature->ReturnType; 729 } 730 return FSN; 731 } 732 733 SymbolNode *Demangler::demangleDeclarator(StringView &MangledName) { 734 // What follows is a main symbol name. This may include namespaces or class 735 // back references. 736 QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName); 737 if (Error) 738 return nullptr; 739 740 SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN); 741 if (Error) 742 return nullptr; 743 Symbol->Name = QN; 744 745 IdentifierNode *UQN = QN->getUnqualifiedIdentifier(); 746 if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) { 747 ConversionOperatorIdentifierNode *COIN = 748 static_cast<ConversionOperatorIdentifierNode *>(UQN); 749 if (!COIN->TargetType) { 750 Error = true; 751 return nullptr; 752 } 753 } 754 return Symbol; 755 } 756 757 SymbolNode *Demangler::demangleMD5Name(StringView &MangledName) { 758 assert(MangledName.startsWith("??@")); 759 // This is an MD5 mangled name. We can't demangle it, just return the 760 // mangled name. 761 // An MD5 mangled name is ??@ followed by 32 characters and a terminating @. 762 size_t MD5Last = MangledName.find('@', strlen("??@")); 763 if (MD5Last == StringView::npos) { 764 Error = true; 765 return nullptr; 766 } 767 const char *Start = MangledName.begin(); 768 MangledName = MangledName.dropFront(MD5Last + 1); 769 770 // There are two additional special cases for MD5 names: 771 // 1. For complete object locators where the object name is long enough 772 // for the object to have an MD5 name, the complete object locator is 773 // called ??@...@??_R4@ (with a trailing "??_R4@" instead of the usual 774 // leading "??_R4". This is handled here. 775 // 2. For catchable types, in versions of MSVC before 2015 (<1900) or after 776 // 2017.2 (>= 1914), the catchable type mangling is _CT??@...@??@...@8 777 // instead of_CT??@...@8 with just one MD5 name. Since we don't yet 778 // demangle catchable types anywhere, this isn't handled for MD5 names 779 // either. 780 MangledName.consumeFront("??_R4@"); 781 782 StringView MD5(Start, MangledName.begin()); 783 SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol); 784 S->Name = synthesizeQualifiedName(Arena, MD5); 785 786 return S; 787 } 788 789 SymbolNode *Demangler::demangleTypeinfoName(StringView &MangledName) { 790 assert(MangledName.startsWith('.')); 791 MangledName.consumeFront('.'); 792 793 TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result); 794 if (Error || !MangledName.empty()) { 795 Error = true; 796 return nullptr; 797 } 798 return synthesizeVariable(Arena, T, "`RTTI Type Descriptor Name'"); 799 } 800 801 // Parser entry point. 802 SymbolNode *Demangler::parse(StringView &MangledName) { 803 // Typeinfo names are strings stored in RTTI data. They're not symbol names. 804 // It's still useful to demangle them. They're the only demangled entity 805 // that doesn't start with a "?" but a ".". 806 if (MangledName.startsWith('.')) 807 return demangleTypeinfoName(MangledName); 808 809 if (MangledName.startsWith("??@")) 810 return demangleMD5Name(MangledName); 811 812 // MSVC-style mangled symbols must start with '?'. 813 if (!MangledName.startsWith('?')) { 814 Error = true; 815 return nullptr; 816 } 817 818 MangledName.consumeFront('?'); 819 820 // ?$ is a template instantiation, but all other names that start with ? are 821 // operators / special names. 822 if (SymbolNode *SI = demangleSpecialIntrinsic(MangledName)) 823 return SI; 824 825 return demangleDeclarator(MangledName); 826 } 827 828 TagTypeNode *Demangler::parseTagUniqueName(StringView &MangledName) { 829 if (!MangledName.consumeFront(".?A")) { 830 Error = true; 831 return nullptr; 832 } 833 MangledName.consumeFront(".?A"); 834 if (MangledName.empty()) { 835 Error = true; 836 return nullptr; 837 } 838 839 return demangleClassType(MangledName); 840 } 841 842 // <type-encoding> ::= <storage-class> <variable-type> 843 // <storage-class> ::= 0 # private static member 844 // ::= 1 # protected static member 845 // ::= 2 # public static member 846 // ::= 3 # global 847 // ::= 4 # static local 848 849 VariableSymbolNode *Demangler::demangleVariableEncoding(StringView &MangledName, 850 StorageClass SC) { 851 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 852 853 VSN->Type = demangleType(MangledName, QualifierMangleMode::Drop); 854 VSN->SC = SC; 855 856 if (Error) 857 return nullptr; 858 859 // <variable-type> ::= <type> <cvr-qualifiers> 860 // ::= <type> <pointee-cvr-qualifiers> # pointers, references 861 switch (VSN->Type->kind()) { 862 case NodeKind::PointerType: { 863 PointerTypeNode *PTN = static_cast<PointerTypeNode *>(VSN->Type); 864 865 Qualifiers ExtraChildQuals = Q_None; 866 PTN->Quals = Qualifiers(VSN->Type->Quals | 867 demanglePointerExtQualifiers(MangledName)); 868 869 bool IsMember = false; 870 std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName); 871 872 if (PTN->ClassParent) { 873 QualifiedNameNode *BackRefName = 874 demangleFullyQualifiedTypeName(MangledName); 875 (void)BackRefName; 876 } 877 PTN->Pointee->Quals = Qualifiers(PTN->Pointee->Quals | ExtraChildQuals); 878 879 break; 880 } 881 default: 882 VSN->Type->Quals = demangleQualifiers(MangledName).first; 883 break; 884 } 885 886 return VSN; 887 } 888 889 // Sometimes numbers are encoded in mangled symbols. For example, 890 // "int (*x)[20]" is a valid C type (x is a pointer to an array of 891 // length 20), so we need some way to embed numbers as part of symbols. 892 // This function parses it. 893 // 894 // <number> ::= [?] <non-negative integer> 895 // 896 // <non-negative integer> ::= <decimal digit> # when 1 <= Number <= 10 897 // ::= <hex digit>+ @ # when Number == 0 or >= 10 898 // 899 // <hex-digit> ::= [A-P] # A = 0, B = 1, ... 900 std::pair<uint64_t, bool> Demangler::demangleNumber(StringView &MangledName) { 901 bool IsNegative = MangledName.consumeFront('?'); 902 903 if (startsWithDigit(MangledName)) { 904 uint64_t Ret = MangledName[0] - '0' + 1; 905 MangledName = MangledName.dropFront(1); 906 return {Ret, IsNegative}; 907 } 908 909 uint64_t Ret = 0; 910 for (size_t i = 0; i < MangledName.size(); ++i) { 911 char C = MangledName[i]; 912 if (C == '@') { 913 MangledName = MangledName.dropFront(i + 1); 914 return {Ret, IsNegative}; 915 } 916 if ('A' <= C && C <= 'P') { 917 Ret = (Ret << 4) + (C - 'A'); 918 continue; 919 } 920 break; 921 } 922 923 Error = true; 924 return {0ULL, false}; 925 } 926 927 uint64_t Demangler::demangleUnsigned(StringView &MangledName) { 928 bool IsNegative = false; 929 uint64_t Number = 0; 930 std::tie(Number, IsNegative) = demangleNumber(MangledName); 931 if (IsNegative) 932 Error = true; 933 return Number; 934 } 935 936 int64_t Demangler::demangleSigned(StringView &MangledName) { 937 bool IsNegative = false; 938 uint64_t Number = 0; 939 std::tie(Number, IsNegative) = demangleNumber(MangledName); 940 if (Number > INT64_MAX) 941 Error = true; 942 int64_t I = static_cast<int64_t>(Number); 943 return IsNegative ? -I : I; 944 } 945 946 // First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9. 947 // Memorize it. 948 void Demangler::memorizeString(StringView S) { 949 if (Backrefs.NamesCount >= BackrefContext::Max) 950 return; 951 for (size_t i = 0; i < Backrefs.NamesCount; ++i) 952 if (S == Backrefs.Names[i]->Name) 953 return; 954 NamedIdentifierNode *N = Arena.alloc<NamedIdentifierNode>(); 955 N->Name = S; 956 Backrefs.Names[Backrefs.NamesCount++] = N; 957 } 958 959 NamedIdentifierNode *Demangler::demangleBackRefName(StringView &MangledName) { 960 assert(startsWithDigit(MangledName)); 961 962 size_t I = MangledName[0] - '0'; 963 if (I >= Backrefs.NamesCount) { 964 Error = true; 965 return nullptr; 966 } 967 968 MangledName = MangledName.dropFront(); 969 return Backrefs.Names[I]; 970 } 971 972 void Demangler::memorizeIdentifier(IdentifierNode *Identifier) { 973 // Render this class template name into a string buffer so that we can 974 // memorize it for the purpose of back-referencing. 975 OutputBuffer OB; 976 Identifier->output(OB, OF_Default); 977 StringView Owned = copyString(OB); 978 memorizeString(Owned); 979 std::free(OB.getBuffer()); 980 } 981 982 IdentifierNode * 983 Demangler::demangleTemplateInstantiationName(StringView &MangledName, 984 NameBackrefBehavior NBB) { 985 assert(MangledName.startsWith("?$")); 986 MangledName.consumeFront("?$"); 987 988 BackrefContext OuterContext; 989 std::swap(OuterContext, Backrefs); 990 991 IdentifierNode *Identifier = 992 demangleUnqualifiedSymbolName(MangledName, NBB_Simple); 993 if (!Error) 994 Identifier->TemplateParams = demangleTemplateParameterList(MangledName); 995 996 std::swap(OuterContext, Backrefs); 997 if (Error) 998 return nullptr; 999 1000 if (NBB & NBB_Template) { 1001 // NBB_Template is only set for types and non-leaf names ("a::" in "a::b"). 1002 // Structors and conversion operators only makes sense in a leaf name, so 1003 // reject them in NBB_Template contexts. 1004 if (Identifier->kind() == NodeKind::ConversionOperatorIdentifier || 1005 Identifier->kind() == NodeKind::StructorIdentifier) { 1006 Error = true; 1007 return nullptr; 1008 } 1009 1010 memorizeIdentifier(Identifier); 1011 } 1012 1013 return Identifier; 1014 } 1015 1016 NamedIdentifierNode *Demangler::demangleSimpleName(StringView &MangledName, 1017 bool Memorize) { 1018 StringView S = demangleSimpleString(MangledName, Memorize); 1019 if (Error) 1020 return nullptr; 1021 1022 NamedIdentifierNode *Name = Arena.alloc<NamedIdentifierNode>(); 1023 Name->Name = S; 1024 return Name; 1025 } 1026 1027 static bool isRebasedHexDigit(char C) { return (C >= 'A' && C <= 'P'); } 1028 1029 static uint8_t rebasedHexDigitToNumber(char C) { 1030 assert(isRebasedHexDigit(C)); 1031 return (C <= 'J') ? (C - 'A') : (10 + C - 'K'); 1032 } 1033 1034 uint8_t Demangler::demangleCharLiteral(StringView &MangledName) { 1035 assert(!MangledName.empty()); 1036 if (!MangledName.startsWith('?')) 1037 return MangledName.popFront(); 1038 1039 MangledName = MangledName.dropFront(); 1040 if (MangledName.empty()) 1041 goto CharLiteralError; 1042 1043 if (MangledName.consumeFront('$')) { 1044 // Two hex digits 1045 if (MangledName.size() < 2) 1046 goto CharLiteralError; 1047 StringView Nibbles = MangledName.substr(0, 2); 1048 if (!isRebasedHexDigit(Nibbles[0]) || !isRebasedHexDigit(Nibbles[1])) 1049 goto CharLiteralError; 1050 // Don't append the null terminator. 1051 uint8_t C1 = rebasedHexDigitToNumber(Nibbles[0]); 1052 uint8_t C2 = rebasedHexDigitToNumber(Nibbles[1]); 1053 MangledName = MangledName.dropFront(2); 1054 return (C1 << 4) | C2; 1055 } 1056 1057 if (startsWithDigit(MangledName)) { 1058 const char *Lookup = ",/\\:. \n\t'-"; 1059 char C = Lookup[MangledName[0] - '0']; 1060 MangledName = MangledName.dropFront(); 1061 return C; 1062 } 1063 1064 if (MangledName[0] >= 'a' && MangledName[0] <= 'z') { 1065 char Lookup[26] = {'\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7', 1066 '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE', 1067 '\xEF', '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5', 1068 '\xF6', '\xF7', '\xF8', '\xF9', '\xFA'}; 1069 char C = Lookup[MangledName[0] - 'a']; 1070 MangledName = MangledName.dropFront(); 1071 return C; 1072 } 1073 1074 if (MangledName[0] >= 'A' && MangledName[0] <= 'Z') { 1075 char Lookup[26] = {'\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7', 1076 '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE', 1077 '\xCF', '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5', 1078 '\xD6', '\xD7', '\xD8', '\xD9', '\xDA'}; 1079 char C = Lookup[MangledName[0] - 'A']; 1080 MangledName = MangledName.dropFront(); 1081 return C; 1082 } 1083 1084 CharLiteralError: 1085 Error = true; 1086 return '\0'; 1087 } 1088 1089 wchar_t Demangler::demangleWcharLiteral(StringView &MangledName) { 1090 uint8_t C1, C2; 1091 1092 C1 = demangleCharLiteral(MangledName); 1093 if (Error || MangledName.empty()) 1094 goto WCharLiteralError; 1095 C2 = demangleCharLiteral(MangledName); 1096 if (Error) 1097 goto WCharLiteralError; 1098 1099 return ((wchar_t)C1 << 8) | (wchar_t)C2; 1100 1101 WCharLiteralError: 1102 Error = true; 1103 return L'\0'; 1104 } 1105 1106 static void writeHexDigit(char *Buffer, uint8_t Digit) { 1107 assert(Digit <= 15); 1108 *Buffer = (Digit < 10) ? ('0' + Digit) : ('A' + Digit - 10); 1109 } 1110 1111 static void outputHex(OutputBuffer &OB, unsigned C) { 1112 assert (C != 0); 1113 1114 // It's easier to do the math if we can work from right to left, but we need 1115 // to print the numbers from left to right. So render this into a temporary 1116 // buffer first, then output the temporary buffer. Each byte is of the form 1117 // \xAB, which means that each byte needs 4 characters. Since there are at 1118 // most 4 bytes, we need a 4*4+1 = 17 character temporary buffer. 1119 char TempBuffer[17]; 1120 1121 ::memset(TempBuffer, 0, sizeof(TempBuffer)); 1122 constexpr int MaxPos = sizeof(TempBuffer) - 1; 1123 1124 int Pos = MaxPos - 1; // TempBuffer[MaxPos] is the terminating \0. 1125 while (C != 0) { 1126 for (int I = 0; I < 2; ++I) { 1127 writeHexDigit(&TempBuffer[Pos--], C % 16); 1128 C /= 16; 1129 } 1130 } 1131 TempBuffer[Pos--] = 'x'; 1132 assert(Pos >= 0); 1133 TempBuffer[Pos--] = '\\'; 1134 OB << StringView(&TempBuffer[Pos + 1]); 1135 } 1136 1137 static void outputEscapedChar(OutputBuffer &OB, unsigned C) { 1138 switch (C) { 1139 case '\0': // nul 1140 OB << "\\0"; 1141 return; 1142 case '\'': // single quote 1143 OB << "\\\'"; 1144 return; 1145 case '\"': // double quote 1146 OB << "\\\""; 1147 return; 1148 case '\\': // backslash 1149 OB << "\\\\"; 1150 return; 1151 case '\a': // bell 1152 OB << "\\a"; 1153 return; 1154 case '\b': // backspace 1155 OB << "\\b"; 1156 return; 1157 case '\f': // form feed 1158 OB << "\\f"; 1159 return; 1160 case '\n': // new line 1161 OB << "\\n"; 1162 return; 1163 case '\r': // carriage return 1164 OB << "\\r"; 1165 return; 1166 case '\t': // tab 1167 OB << "\\t"; 1168 return; 1169 case '\v': // vertical tab 1170 OB << "\\v"; 1171 return; 1172 default: 1173 break; 1174 } 1175 1176 if (C > 0x1F && C < 0x7F) { 1177 // Standard ascii char. 1178 OB << (char)C; 1179 return; 1180 } 1181 1182 outputHex(OB, C); 1183 } 1184 1185 static unsigned countTrailingNullBytes(const uint8_t *StringBytes, int Length) { 1186 const uint8_t *End = StringBytes + Length - 1; 1187 unsigned Count = 0; 1188 while (Length > 0 && *End == 0) { 1189 --Length; 1190 --End; 1191 ++Count; 1192 } 1193 return Count; 1194 } 1195 1196 static unsigned countEmbeddedNulls(const uint8_t *StringBytes, 1197 unsigned Length) { 1198 unsigned Result = 0; 1199 for (unsigned I = 0; I < Length; ++I) { 1200 if (*StringBytes++ == 0) 1201 ++Result; 1202 } 1203 return Result; 1204 } 1205 1206 // A mangled (non-wide) string literal stores the total length of the string it 1207 // refers to (passed in NumBytes), and it contains up to 32 bytes of actual text 1208 // (passed in StringBytes, NumChars). 1209 static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars, 1210 uint64_t NumBytes) { 1211 assert(NumBytes > 0); 1212 1213 // If the number of bytes is odd, this is guaranteed to be a char string. 1214 if (NumBytes % 2 == 1) 1215 return 1; 1216 1217 // All strings can encode at most 32 bytes of data. If it's less than that, 1218 // then we encoded the entire string. In this case we check for a 1-byte, 1219 // 2-byte, or 4-byte null terminator. 1220 if (NumBytes < 32) { 1221 unsigned TrailingNulls = countTrailingNullBytes(StringBytes, NumChars); 1222 if (TrailingNulls >= 4 && NumBytes % 4 == 0) 1223 return 4; 1224 if (TrailingNulls >= 2) 1225 return 2; 1226 return 1; 1227 } 1228 1229 // The whole string was not able to be encoded. Try to look at embedded null 1230 // terminators to guess. The heuristic is that we count all embedded null 1231 // terminators. If more than 2/3 are null, it's a char32. If more than 1/3 1232 // are null, it's a char16. Otherwise it's a char8. This obviously isn't 1233 // perfect and is biased towards languages that have ascii alphabets, but this 1234 // was always going to be best effort since the encoding is lossy. 1235 unsigned Nulls = countEmbeddedNulls(StringBytes, NumChars); 1236 if (Nulls >= 2 * NumChars / 3 && NumBytes % 4 == 0) 1237 return 4; 1238 if (Nulls >= NumChars / 3) 1239 return 2; 1240 return 1; 1241 } 1242 1243 static unsigned decodeMultiByteChar(const uint8_t *StringBytes, 1244 unsigned CharIndex, unsigned CharBytes) { 1245 assert(CharBytes == 1 || CharBytes == 2 || CharBytes == 4); 1246 unsigned Offset = CharIndex * CharBytes; 1247 unsigned Result = 0; 1248 StringBytes = StringBytes + Offset; 1249 for (unsigned I = 0; I < CharBytes; ++I) { 1250 unsigned C = static_cast<unsigned>(StringBytes[I]); 1251 Result |= C << (8 * I); 1252 } 1253 return Result; 1254 } 1255 1256 FunctionSymbolNode *Demangler::demangleVcallThunkNode(StringView &MangledName) { 1257 FunctionSymbolNode *FSN = Arena.alloc<FunctionSymbolNode>(); 1258 VcallThunkIdentifierNode *VTIN = Arena.alloc<VcallThunkIdentifierNode>(); 1259 FSN->Signature = Arena.alloc<ThunkSignatureNode>(); 1260 FSN->Signature->FunctionClass = FC_NoParameterList; 1261 1262 FSN->Name = demangleNameScopeChain(MangledName, VTIN); 1263 if (!Error) 1264 Error = !MangledName.consumeFront("$B"); 1265 if (!Error) 1266 VTIN->OffsetInVTable = demangleUnsigned(MangledName); 1267 if (!Error) 1268 Error = !MangledName.consumeFront('A'); 1269 if (!Error) 1270 FSN->Signature->CallConvention = demangleCallingConvention(MangledName); 1271 return (Error) ? nullptr : FSN; 1272 } 1273 1274 EncodedStringLiteralNode * 1275 Demangler::demangleStringLiteral(StringView &MangledName) { 1276 // This function uses goto, so declare all variables up front. 1277 OutputBuffer OB; 1278 StringView CRC; 1279 uint64_t StringByteSize; 1280 bool IsWcharT = false; 1281 bool IsNegative = false; 1282 size_t CrcEndPos = 0; 1283 1284 EncodedStringLiteralNode *Result = Arena.alloc<EncodedStringLiteralNode>(); 1285 1286 // Prefix indicating the beginning of a string literal 1287 if (!MangledName.consumeFront("@_")) 1288 goto StringLiteralError; 1289 if (MangledName.empty()) 1290 goto StringLiteralError; 1291 1292 // Char Type (regular or wchar_t) 1293 switch (MangledName.popFront()) { 1294 case '1': 1295 IsWcharT = true; 1296 DEMANGLE_FALLTHROUGH; 1297 case '0': 1298 break; 1299 default: 1300 goto StringLiteralError; 1301 } 1302 1303 // Encoded Length 1304 std::tie(StringByteSize, IsNegative) = demangleNumber(MangledName); 1305 if (Error || IsNegative || StringByteSize < (IsWcharT ? 2 : 1)) 1306 goto StringLiteralError; 1307 1308 // CRC 32 (always 8 characters plus a terminator) 1309 CrcEndPos = MangledName.find('@'); 1310 if (CrcEndPos == StringView::npos) 1311 goto StringLiteralError; 1312 CRC = MangledName.substr(0, CrcEndPos); 1313 MangledName = MangledName.dropFront(CrcEndPos + 1); 1314 if (MangledName.empty()) 1315 goto StringLiteralError; 1316 1317 if (IsWcharT) { 1318 Result->Char = CharKind::Wchar; 1319 if (StringByteSize > 64) 1320 Result->IsTruncated = true; 1321 1322 while (!MangledName.consumeFront('@')) { 1323 if (MangledName.size() < 2) 1324 goto StringLiteralError; 1325 wchar_t W = demangleWcharLiteral(MangledName); 1326 if (StringByteSize != 2 || Result->IsTruncated) 1327 outputEscapedChar(OB, W); 1328 StringByteSize -= 2; 1329 if (Error) 1330 goto StringLiteralError; 1331 } 1332 } else { 1333 // The max byte length is actually 32, but some compilers mangled strings 1334 // incorrectly, so we have to assume it can go higher. 1335 constexpr unsigned MaxStringByteLength = 32 * 4; 1336 uint8_t StringBytes[MaxStringByteLength]; 1337 1338 unsigned BytesDecoded = 0; 1339 while (!MangledName.consumeFront('@')) { 1340 if (MangledName.size() < 1 || BytesDecoded >= MaxStringByteLength) 1341 goto StringLiteralError; 1342 StringBytes[BytesDecoded++] = demangleCharLiteral(MangledName); 1343 } 1344 1345 if (StringByteSize > BytesDecoded) 1346 Result->IsTruncated = true; 1347 1348 unsigned CharBytes = 1349 guessCharByteSize(StringBytes, BytesDecoded, StringByteSize); 1350 assert(StringByteSize % CharBytes == 0); 1351 switch (CharBytes) { 1352 case 1: 1353 Result->Char = CharKind::Char; 1354 break; 1355 case 2: 1356 Result->Char = CharKind::Char16; 1357 break; 1358 case 4: 1359 Result->Char = CharKind::Char32; 1360 break; 1361 default: 1362 DEMANGLE_UNREACHABLE; 1363 } 1364 const unsigned NumChars = BytesDecoded / CharBytes; 1365 for (unsigned CharIndex = 0; CharIndex < NumChars; ++CharIndex) { 1366 unsigned NextChar = 1367 decodeMultiByteChar(StringBytes, CharIndex, CharBytes); 1368 if (CharIndex + 1 < NumChars || Result->IsTruncated) 1369 outputEscapedChar(OB, NextChar); 1370 } 1371 } 1372 1373 Result->DecodedString = copyString(OB); 1374 std::free(OB.getBuffer()); 1375 return Result; 1376 1377 StringLiteralError: 1378 Error = true; 1379 std::free(OB.getBuffer()); 1380 return nullptr; 1381 } 1382 1383 // Returns MangledName's prefix before the first '@', or an error if 1384 // MangledName contains no '@' or the prefix has length 0. 1385 StringView Demangler::demangleSimpleString(StringView &MangledName, 1386 bool Memorize) { 1387 StringView S; 1388 for (size_t i = 0; i < MangledName.size(); ++i) { 1389 if (MangledName[i] != '@') 1390 continue; 1391 if (i == 0) 1392 break; 1393 S = MangledName.substr(0, i); 1394 MangledName = MangledName.dropFront(i + 1); 1395 1396 if (Memorize) 1397 memorizeString(S); 1398 return S; 1399 } 1400 1401 Error = true; 1402 return {}; 1403 } 1404 1405 NamedIdentifierNode * 1406 Demangler::demangleAnonymousNamespaceName(StringView &MangledName) { 1407 assert(MangledName.startsWith("?A")); 1408 MangledName.consumeFront("?A"); 1409 1410 NamedIdentifierNode *Node = Arena.alloc<NamedIdentifierNode>(); 1411 Node->Name = "`anonymous namespace'"; 1412 size_t EndPos = MangledName.find('@'); 1413 if (EndPos == StringView::npos) { 1414 Error = true; 1415 return nullptr; 1416 } 1417 StringView NamespaceKey = MangledName.substr(0, EndPos); 1418 memorizeString(NamespaceKey); 1419 MangledName = MangledName.substr(EndPos + 1); 1420 return Node; 1421 } 1422 1423 NamedIdentifierNode * 1424 Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) { 1425 assert(startsWithLocalScopePattern(MangledName)); 1426 1427 NamedIdentifierNode *Identifier = Arena.alloc<NamedIdentifierNode>(); 1428 MangledName.consumeFront('?'); 1429 uint64_t Number = 0; 1430 bool IsNegative = false; 1431 std::tie(Number, IsNegative) = demangleNumber(MangledName); 1432 assert(!IsNegative); 1433 1434 // One ? to terminate the number 1435 MangledName.consumeFront('?'); 1436 1437 assert(!Error); 1438 Node *Scope = parse(MangledName); 1439 if (Error) 1440 return nullptr; 1441 1442 // Render the parent symbol's name into a buffer. 1443 OutputBuffer OB; 1444 OB << '`'; 1445 Scope->output(OB, OF_Default); 1446 OB << '\''; 1447 OB << "::`" << Number << "'"; 1448 1449 Identifier->Name = copyString(OB); 1450 std::free(OB.getBuffer()); 1451 return Identifier; 1452 } 1453 1454 // Parses a type name in the form of A@B@C@@ which represents C::B::A. 1455 QualifiedNameNode * 1456 Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) { 1457 IdentifierNode *Identifier = 1458 demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true); 1459 if (Error) 1460 return nullptr; 1461 assert(Identifier); 1462 1463 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier); 1464 if (Error) 1465 return nullptr; 1466 assert(QN); 1467 return QN; 1468 } 1469 1470 // Parses a symbol name in the form of A@B@C@@ which represents C::B::A. 1471 // Symbol names have slightly different rules regarding what can appear 1472 // so we separate out the implementations for flexibility. 1473 QualifiedNameNode * 1474 Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) { 1475 // This is the final component of a symbol name (i.e. the leftmost component 1476 // of a mangled name. Since the only possible template instantiation that 1477 // can appear in this context is a function template, and since those are 1478 // not saved for the purposes of name backreferences, only backref simple 1479 // names. 1480 IdentifierNode *Identifier = 1481 demangleUnqualifiedSymbolName(MangledName, NBB_Simple); 1482 if (Error) 1483 return nullptr; 1484 1485 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier); 1486 if (Error) 1487 return nullptr; 1488 1489 if (Identifier->kind() == NodeKind::StructorIdentifier) { 1490 if (QN->Components->Count < 2) { 1491 Error = true; 1492 return nullptr; 1493 } 1494 StructorIdentifierNode *SIN = 1495 static_cast<StructorIdentifierNode *>(Identifier); 1496 Node *ClassNode = QN->Components->Nodes[QN->Components->Count - 2]; 1497 SIN->Class = static_cast<IdentifierNode *>(ClassNode); 1498 } 1499 assert(QN); 1500 return QN; 1501 } 1502 1503 IdentifierNode *Demangler::demangleUnqualifiedTypeName(StringView &MangledName, 1504 bool Memorize) { 1505 // An inner-most name can be a back-reference, because a fully-qualified name 1506 // (e.g. Scope + Inner) can contain other fully qualified names inside of 1507 // them (for example template parameters), and these nested parameters can 1508 // refer to previously mangled types. 1509 if (startsWithDigit(MangledName)) 1510 return demangleBackRefName(MangledName); 1511 1512 if (MangledName.startsWith("?$")) 1513 return demangleTemplateInstantiationName(MangledName, NBB_Template); 1514 1515 return demangleSimpleName(MangledName, Memorize); 1516 } 1517 1518 IdentifierNode * 1519 Demangler::demangleUnqualifiedSymbolName(StringView &MangledName, 1520 NameBackrefBehavior NBB) { 1521 if (startsWithDigit(MangledName)) 1522 return demangleBackRefName(MangledName); 1523 if (MangledName.startsWith("?$")) 1524 return demangleTemplateInstantiationName(MangledName, NBB); 1525 if (MangledName.startsWith('?')) 1526 return demangleFunctionIdentifierCode(MangledName); 1527 return demangleSimpleName(MangledName, /*Memorize=*/(NBB & NBB_Simple) != 0); 1528 } 1529 1530 IdentifierNode *Demangler::demangleNameScopePiece(StringView &MangledName) { 1531 if (startsWithDigit(MangledName)) 1532 return demangleBackRefName(MangledName); 1533 1534 if (MangledName.startsWith("?$")) 1535 return demangleTemplateInstantiationName(MangledName, NBB_Template); 1536 1537 if (MangledName.startsWith("?A")) 1538 return demangleAnonymousNamespaceName(MangledName); 1539 1540 if (startsWithLocalScopePattern(MangledName)) 1541 return demangleLocallyScopedNamePiece(MangledName); 1542 1543 return demangleSimpleName(MangledName, /*Memorize=*/true); 1544 } 1545 1546 static NodeArrayNode *nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head, 1547 size_t Count) { 1548 NodeArrayNode *N = Arena.alloc<NodeArrayNode>(); 1549 N->Count = Count; 1550 N->Nodes = Arena.allocArray<Node *>(Count); 1551 for (size_t I = 0; I < Count; ++I) { 1552 N->Nodes[I] = Head->N; 1553 Head = Head->Next; 1554 } 1555 return N; 1556 } 1557 1558 QualifiedNameNode * 1559 Demangler::demangleNameScopeChain(StringView &MangledName, 1560 IdentifierNode *UnqualifiedName) { 1561 NodeList *Head = Arena.alloc<NodeList>(); 1562 1563 Head->N = UnqualifiedName; 1564 1565 size_t Count = 1; 1566 while (!MangledName.consumeFront("@")) { 1567 ++Count; 1568 NodeList *NewHead = Arena.alloc<NodeList>(); 1569 NewHead->Next = Head; 1570 Head = NewHead; 1571 1572 if (MangledName.empty()) { 1573 Error = true; 1574 return nullptr; 1575 } 1576 1577 assert(!Error); 1578 IdentifierNode *Elem = demangleNameScopePiece(MangledName); 1579 if (Error) 1580 return nullptr; 1581 1582 Head->N = Elem; 1583 } 1584 1585 QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>(); 1586 QN->Components = nodeListToNodeArray(Arena, Head, Count); 1587 return QN; 1588 } 1589 1590 FuncClass Demangler::demangleFunctionClass(StringView &MangledName) { 1591 switch (MangledName.popFront()) { 1592 case '9': 1593 return FuncClass(FC_ExternC | FC_NoParameterList); 1594 case 'A': 1595 return FC_Private; 1596 case 'B': 1597 return FuncClass(FC_Private | FC_Far); 1598 case 'C': 1599 return FuncClass(FC_Private | FC_Static); 1600 case 'D': 1601 return FuncClass(FC_Private | FC_Static | FC_Far); 1602 case 'E': 1603 return FuncClass(FC_Private | FC_Virtual); 1604 case 'F': 1605 return FuncClass(FC_Private | FC_Virtual | FC_Far); 1606 case 'G': 1607 return FuncClass(FC_Private | FC_StaticThisAdjust); 1608 case 'H': 1609 return FuncClass(FC_Private | FC_StaticThisAdjust | FC_Far); 1610 case 'I': 1611 return FuncClass(FC_Protected); 1612 case 'J': 1613 return FuncClass(FC_Protected | FC_Far); 1614 case 'K': 1615 return FuncClass(FC_Protected | FC_Static); 1616 case 'L': 1617 return FuncClass(FC_Protected | FC_Static | FC_Far); 1618 case 'M': 1619 return FuncClass(FC_Protected | FC_Virtual); 1620 case 'N': 1621 return FuncClass(FC_Protected | FC_Virtual | FC_Far); 1622 case 'O': 1623 return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust); 1624 case 'P': 1625 return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust | FC_Far); 1626 case 'Q': 1627 return FuncClass(FC_Public); 1628 case 'R': 1629 return FuncClass(FC_Public | FC_Far); 1630 case 'S': 1631 return FuncClass(FC_Public | FC_Static); 1632 case 'T': 1633 return FuncClass(FC_Public | FC_Static | FC_Far); 1634 case 'U': 1635 return FuncClass(FC_Public | FC_Virtual); 1636 case 'V': 1637 return FuncClass(FC_Public | FC_Virtual | FC_Far); 1638 case 'W': 1639 return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust); 1640 case 'X': 1641 return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust | FC_Far); 1642 case 'Y': 1643 return FuncClass(FC_Global); 1644 case 'Z': 1645 return FuncClass(FC_Global | FC_Far); 1646 case '$': { 1647 FuncClass VFlag = FC_VirtualThisAdjust; 1648 if (MangledName.consumeFront('R')) 1649 VFlag = FuncClass(VFlag | FC_VirtualThisAdjustEx); 1650 if (MangledName.empty()) 1651 break; 1652 switch (MangledName.popFront()) { 1653 case '0': 1654 return FuncClass(FC_Private | FC_Virtual | VFlag); 1655 case '1': 1656 return FuncClass(FC_Private | FC_Virtual | VFlag | FC_Far); 1657 case '2': 1658 return FuncClass(FC_Protected | FC_Virtual | VFlag); 1659 case '3': 1660 return FuncClass(FC_Protected | FC_Virtual | VFlag | FC_Far); 1661 case '4': 1662 return FuncClass(FC_Public | FC_Virtual | VFlag); 1663 case '5': 1664 return FuncClass(FC_Public | FC_Virtual | VFlag | FC_Far); 1665 } 1666 } 1667 } 1668 1669 Error = true; 1670 return FC_Public; 1671 } 1672 1673 CallingConv Demangler::demangleCallingConvention(StringView &MangledName) { 1674 if (MangledName.empty()) { 1675 Error = true; 1676 return CallingConv::None; 1677 } 1678 1679 switch (MangledName.popFront()) { 1680 case 'A': 1681 case 'B': 1682 return CallingConv::Cdecl; 1683 case 'C': 1684 case 'D': 1685 return CallingConv::Pascal; 1686 case 'E': 1687 case 'F': 1688 return CallingConv::Thiscall; 1689 case 'G': 1690 case 'H': 1691 return CallingConv::Stdcall; 1692 case 'I': 1693 case 'J': 1694 return CallingConv::Fastcall; 1695 case 'M': 1696 case 'N': 1697 return CallingConv::Clrcall; 1698 case 'O': 1699 case 'P': 1700 return CallingConv::Eabi; 1701 case 'Q': 1702 return CallingConv::Vectorcall; 1703 case 'S': 1704 return CallingConv::Swift; 1705 case 'W': 1706 return CallingConv::SwiftAsync; 1707 } 1708 1709 return CallingConv::None; 1710 } 1711 1712 StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) { 1713 assert(MangledName.front() >= '0' && MangledName.front() <= '4'); 1714 1715 switch (MangledName.popFront()) { 1716 case '0': 1717 return StorageClass::PrivateStatic; 1718 case '1': 1719 return StorageClass::ProtectedStatic; 1720 case '2': 1721 return StorageClass::PublicStatic; 1722 case '3': 1723 return StorageClass::Global; 1724 case '4': 1725 return StorageClass::FunctionLocalStatic; 1726 } 1727 DEMANGLE_UNREACHABLE; 1728 } 1729 1730 std::pair<Qualifiers, bool> 1731 Demangler::demangleQualifiers(StringView &MangledName) { 1732 if (MangledName.empty()) { 1733 Error = true; 1734 return std::make_pair(Q_None, false); 1735 } 1736 1737 switch (MangledName.popFront()) { 1738 // Member qualifiers 1739 case 'Q': 1740 return std::make_pair(Q_None, true); 1741 case 'R': 1742 return std::make_pair(Q_Const, true); 1743 case 'S': 1744 return std::make_pair(Q_Volatile, true); 1745 case 'T': 1746 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), true); 1747 // Non-Member qualifiers 1748 case 'A': 1749 return std::make_pair(Q_None, false); 1750 case 'B': 1751 return std::make_pair(Q_Const, false); 1752 case 'C': 1753 return std::make_pair(Q_Volatile, false); 1754 case 'D': 1755 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), false); 1756 } 1757 Error = true; 1758 return std::make_pair(Q_None, false); 1759 } 1760 1761 // <variable-type> ::= <type> <cvr-qualifiers> 1762 // ::= <type> <pointee-cvr-qualifiers> # pointers, references 1763 TypeNode *Demangler::demangleType(StringView &MangledName, 1764 QualifierMangleMode QMM) { 1765 Qualifiers Quals = Q_None; 1766 bool IsMember = false; 1767 if (QMM == QualifierMangleMode::Mangle) { 1768 std::tie(Quals, IsMember) = demangleQualifiers(MangledName); 1769 } else if (QMM == QualifierMangleMode::Result) { 1770 if (MangledName.consumeFront('?')) 1771 std::tie(Quals, IsMember) = demangleQualifiers(MangledName); 1772 } 1773 1774 if (MangledName.empty()) { 1775 Error = true; 1776 return nullptr; 1777 } 1778 1779 TypeNode *Ty = nullptr; 1780 if (isTagType(MangledName)) 1781 Ty = demangleClassType(MangledName); 1782 else if (isPointerType(MangledName)) { 1783 if (isMemberPointer(MangledName, Error)) 1784 Ty = demangleMemberPointerType(MangledName); 1785 else if (!Error) 1786 Ty = demanglePointerType(MangledName); 1787 else 1788 return nullptr; 1789 } else if (isArrayType(MangledName)) 1790 Ty = demangleArrayType(MangledName); 1791 else if (isFunctionType(MangledName)) { 1792 if (MangledName.consumeFront("$$A8@@")) 1793 Ty = demangleFunctionType(MangledName, true); 1794 else { 1795 assert(MangledName.startsWith("$$A6")); 1796 MangledName.consumeFront("$$A6"); 1797 Ty = demangleFunctionType(MangledName, false); 1798 } 1799 } else if (isCustomType(MangledName)) { 1800 Ty = demangleCustomType(MangledName); 1801 } else { 1802 Ty = demanglePrimitiveType(MangledName); 1803 } 1804 1805 if (!Ty || Error) 1806 return Ty; 1807 Ty->Quals = Qualifiers(Ty->Quals | Quals); 1808 return Ty; 1809 } 1810 1811 bool Demangler::demangleThrowSpecification(StringView &MangledName) { 1812 if (MangledName.consumeFront("_E")) 1813 return true; 1814 if (MangledName.consumeFront('Z')) 1815 return false; 1816 1817 Error = true; 1818 return false; 1819 } 1820 1821 FunctionSignatureNode *Demangler::demangleFunctionType(StringView &MangledName, 1822 bool HasThisQuals) { 1823 FunctionSignatureNode *FTy = Arena.alloc<FunctionSignatureNode>(); 1824 1825 if (HasThisQuals) { 1826 FTy->Quals = demanglePointerExtQualifiers(MangledName); 1827 FTy->RefQualifier = demangleFunctionRefQualifier(MangledName); 1828 FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first); 1829 } 1830 1831 // Fields that appear on both member and non-member functions. 1832 FTy->CallConvention = demangleCallingConvention(MangledName); 1833 1834 // <return-type> ::= <type> 1835 // ::= @ # structors (they have no declared return type) 1836 bool IsStructor = MangledName.consumeFront('@'); 1837 if (!IsStructor) 1838 FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result); 1839 1840 FTy->Params = demangleFunctionParameterList(MangledName, FTy->IsVariadic); 1841 1842 FTy->IsNoexcept = demangleThrowSpecification(MangledName); 1843 1844 return FTy; 1845 } 1846 1847 FunctionSymbolNode * 1848 Demangler::demangleFunctionEncoding(StringView &MangledName) { 1849 FuncClass ExtraFlags = FC_None; 1850 if (MangledName.consumeFront("$$J0")) 1851 ExtraFlags = FC_ExternC; 1852 1853 if (MangledName.empty()) { 1854 Error = true; 1855 return nullptr; 1856 } 1857 1858 FuncClass FC = demangleFunctionClass(MangledName); 1859 FC = FuncClass(ExtraFlags | FC); 1860 1861 FunctionSignatureNode *FSN = nullptr; 1862 ThunkSignatureNode *TTN = nullptr; 1863 if (FC & FC_StaticThisAdjust) { 1864 TTN = Arena.alloc<ThunkSignatureNode>(); 1865 TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName); 1866 } else if (FC & FC_VirtualThisAdjust) { 1867 TTN = Arena.alloc<ThunkSignatureNode>(); 1868 if (FC & FC_VirtualThisAdjustEx) { 1869 TTN->ThisAdjust.VBPtrOffset = demangleSigned(MangledName); 1870 TTN->ThisAdjust.VBOffsetOffset = demangleSigned(MangledName); 1871 } 1872 TTN->ThisAdjust.VtordispOffset = demangleSigned(MangledName); 1873 TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName); 1874 } 1875 1876 if (FC & FC_NoParameterList) { 1877 // This is an extern "C" function whose full signature hasn't been mangled. 1878 // This happens when we need to mangle a local symbol inside of an extern 1879 // "C" function. 1880 FSN = Arena.alloc<FunctionSignatureNode>(); 1881 } else { 1882 bool HasThisQuals = !(FC & (FC_Global | FC_Static)); 1883 FSN = demangleFunctionType(MangledName, HasThisQuals); 1884 } 1885 1886 if (Error) 1887 return nullptr; 1888 1889 if (TTN) { 1890 *static_cast<FunctionSignatureNode *>(TTN) = *FSN; 1891 FSN = TTN; 1892 } 1893 FSN->FunctionClass = FC; 1894 1895 FunctionSymbolNode *Symbol = Arena.alloc<FunctionSymbolNode>(); 1896 Symbol->Signature = FSN; 1897 return Symbol; 1898 } 1899 1900 CustomTypeNode *Demangler::demangleCustomType(StringView &MangledName) { 1901 assert(MangledName.startsWith('?')); 1902 MangledName.popFront(); 1903 1904 CustomTypeNode *CTN = Arena.alloc<CustomTypeNode>(); 1905 CTN->Identifier = demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true); 1906 if (!MangledName.consumeFront('@')) 1907 Error = true; 1908 if (Error) 1909 return nullptr; 1910 return CTN; 1911 } 1912 1913 // Reads a primitive type. 1914 PrimitiveTypeNode *Demangler::demanglePrimitiveType(StringView &MangledName) { 1915 if (MangledName.consumeFront("$$T")) 1916 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Nullptr); 1917 1918 switch (MangledName.popFront()) { 1919 case 'X': 1920 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Void); 1921 case 'D': 1922 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char); 1923 case 'C': 1924 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Schar); 1925 case 'E': 1926 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uchar); 1927 case 'F': 1928 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Short); 1929 case 'G': 1930 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ushort); 1931 case 'H': 1932 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int); 1933 case 'I': 1934 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint); 1935 case 'J': 1936 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Long); 1937 case 'K': 1938 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ulong); 1939 case 'M': 1940 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Float); 1941 case 'N': 1942 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Double); 1943 case 'O': 1944 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ldouble); 1945 case '_': { 1946 if (MangledName.empty()) { 1947 Error = true; 1948 return nullptr; 1949 } 1950 switch (MangledName.popFront()) { 1951 case 'N': 1952 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Bool); 1953 case 'J': 1954 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int64); 1955 case 'K': 1956 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint64); 1957 case 'W': 1958 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Wchar); 1959 case 'Q': 1960 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char8); 1961 case 'S': 1962 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char16); 1963 case 'U': 1964 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char32); 1965 } 1966 break; 1967 } 1968 } 1969 Error = true; 1970 return nullptr; 1971 } 1972 1973 TagTypeNode *Demangler::demangleClassType(StringView &MangledName) { 1974 TagTypeNode *TT = nullptr; 1975 1976 switch (MangledName.popFront()) { 1977 case 'T': 1978 TT = Arena.alloc<TagTypeNode>(TagKind::Union); 1979 break; 1980 case 'U': 1981 TT = Arena.alloc<TagTypeNode>(TagKind::Struct); 1982 break; 1983 case 'V': 1984 TT = Arena.alloc<TagTypeNode>(TagKind::Class); 1985 break; 1986 case 'W': 1987 if (!MangledName.consumeFront('4')) { 1988 Error = true; 1989 return nullptr; 1990 } 1991 TT = Arena.alloc<TagTypeNode>(TagKind::Enum); 1992 break; 1993 default: 1994 assert(false); 1995 } 1996 1997 TT->QualifiedName = demangleFullyQualifiedTypeName(MangledName); 1998 return TT; 1999 } 2000 2001 // <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type> 2002 // # the E is required for 64-bit non-static pointers 2003 PointerTypeNode *Demangler::demanglePointerType(StringView &MangledName) { 2004 PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>(); 2005 2006 std::tie(Pointer->Quals, Pointer->Affinity) = 2007 demanglePointerCVQualifiers(MangledName); 2008 2009 if (MangledName.consumeFront("6")) { 2010 Pointer->Pointee = demangleFunctionType(MangledName, false); 2011 return Pointer; 2012 } 2013 2014 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName); 2015 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals); 2016 2017 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Mangle); 2018 return Pointer; 2019 } 2020 2021 PointerTypeNode *Demangler::demangleMemberPointerType(StringView &MangledName) { 2022 PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>(); 2023 2024 std::tie(Pointer->Quals, Pointer->Affinity) = 2025 demanglePointerCVQualifiers(MangledName); 2026 assert(Pointer->Affinity == PointerAffinity::Pointer); 2027 2028 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName); 2029 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals); 2030 2031 // isMemberPointer() only returns true if there is at least one character 2032 // after the qualifiers. 2033 if (MangledName.consumeFront("8")) { 2034 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName); 2035 Pointer->Pointee = demangleFunctionType(MangledName, true); 2036 } else { 2037 Qualifiers PointeeQuals = Q_None; 2038 bool IsMember = false; 2039 std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName); 2040 assert(IsMember || Error); 2041 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName); 2042 2043 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop); 2044 if (Pointer->Pointee) 2045 Pointer->Pointee->Quals = PointeeQuals; 2046 } 2047 2048 return Pointer; 2049 } 2050 2051 Qualifiers Demangler::demanglePointerExtQualifiers(StringView &MangledName) { 2052 Qualifiers Quals = Q_None; 2053 if (MangledName.consumeFront('E')) 2054 Quals = Qualifiers(Quals | Q_Pointer64); 2055 if (MangledName.consumeFront('I')) 2056 Quals = Qualifiers(Quals | Q_Restrict); 2057 if (MangledName.consumeFront('F')) 2058 Quals = Qualifiers(Quals | Q_Unaligned); 2059 2060 return Quals; 2061 } 2062 2063 ArrayTypeNode *Demangler::demangleArrayType(StringView &MangledName) { 2064 assert(MangledName.front() == 'Y'); 2065 MangledName.popFront(); 2066 2067 uint64_t Rank = 0; 2068 bool IsNegative = false; 2069 std::tie(Rank, IsNegative) = demangleNumber(MangledName); 2070 if (IsNegative || Rank == 0) { 2071 Error = true; 2072 return nullptr; 2073 } 2074 2075 ArrayTypeNode *ATy = Arena.alloc<ArrayTypeNode>(); 2076 NodeList *Head = Arena.alloc<NodeList>(); 2077 NodeList *Tail = Head; 2078 2079 for (uint64_t I = 0; I < Rank; ++I) { 2080 uint64_t D = 0; 2081 std::tie(D, IsNegative) = demangleNumber(MangledName); 2082 if (Error || IsNegative) { 2083 Error = true; 2084 return nullptr; 2085 } 2086 Tail->N = Arena.alloc<IntegerLiteralNode>(D, IsNegative); 2087 if (I + 1 < Rank) { 2088 Tail->Next = Arena.alloc<NodeList>(); 2089 Tail = Tail->Next; 2090 } 2091 } 2092 ATy->Dimensions = nodeListToNodeArray(Arena, Head, Rank); 2093 2094 if (MangledName.consumeFront("$$C")) { 2095 bool IsMember = false; 2096 std::tie(ATy->Quals, IsMember) = demangleQualifiers(MangledName); 2097 if (IsMember) { 2098 Error = true; 2099 return nullptr; 2100 } 2101 } 2102 2103 ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop); 2104 return ATy; 2105 } 2106 2107 // Reads a function's parameters. 2108 NodeArrayNode *Demangler::demangleFunctionParameterList(StringView &MangledName, 2109 bool &IsVariadic) { 2110 // Empty parameter list. 2111 if (MangledName.consumeFront('X')) 2112 return nullptr; 2113 2114 NodeList *Head = Arena.alloc<NodeList>(); 2115 NodeList **Current = &Head; 2116 size_t Count = 0; 2117 while (!Error && !MangledName.startsWith('@') && 2118 !MangledName.startsWith('Z')) { 2119 ++Count; 2120 2121 if (startsWithDigit(MangledName)) { 2122 size_t N = MangledName[0] - '0'; 2123 if (N >= Backrefs.FunctionParamCount) { 2124 Error = true; 2125 return nullptr; 2126 } 2127 MangledName = MangledName.dropFront(); 2128 2129 *Current = Arena.alloc<NodeList>(); 2130 (*Current)->N = Backrefs.FunctionParams[N]; 2131 Current = &(*Current)->Next; 2132 continue; 2133 } 2134 2135 size_t OldSize = MangledName.size(); 2136 2137 *Current = Arena.alloc<NodeList>(); 2138 TypeNode *TN = demangleType(MangledName, QualifierMangleMode::Drop); 2139 if (!TN || Error) 2140 return nullptr; 2141 2142 (*Current)->N = TN; 2143 2144 size_t CharsConsumed = OldSize - MangledName.size(); 2145 assert(CharsConsumed != 0); 2146 2147 // Single-letter types are ignored for backreferences because memorizing 2148 // them doesn't save anything. 2149 if (Backrefs.FunctionParamCount <= 9 && CharsConsumed > 1) 2150 Backrefs.FunctionParams[Backrefs.FunctionParamCount++] = TN; 2151 2152 Current = &(*Current)->Next; 2153 } 2154 2155 if (Error) 2156 return nullptr; 2157 2158 NodeArrayNode *NA = nodeListToNodeArray(Arena, Head, Count); 2159 // A non-empty parameter list is terminated by either 'Z' (variadic) parameter 2160 // list or '@' (non variadic). Careful not to consume "@Z", as in that case 2161 // the following Z could be a throw specifier. 2162 if (MangledName.consumeFront('@')) 2163 return NA; 2164 2165 if (MangledName.consumeFront('Z')) { 2166 IsVariadic = true; 2167 return NA; 2168 } 2169 2170 DEMANGLE_UNREACHABLE; 2171 } 2172 2173 NodeArrayNode * 2174 Demangler::demangleTemplateParameterList(StringView &MangledName) { 2175 NodeList *Head = nullptr; 2176 NodeList **Current = &Head; 2177 size_t Count = 0; 2178 2179 while (!MangledName.startsWith('@')) { 2180 if (MangledName.consumeFront("$S") || MangledName.consumeFront("$$V") || 2181 MangledName.consumeFront("$$$V") || MangledName.consumeFront("$$Z")) { 2182 // parameter pack separator 2183 continue; 2184 } 2185 2186 ++Count; 2187 2188 // Template parameter lists don't participate in back-referencing. 2189 *Current = Arena.alloc<NodeList>(); 2190 2191 NodeList &TP = **Current; 2192 2193 TemplateParameterReferenceNode *TPRN = nullptr; 2194 if (MangledName.consumeFront("$$Y")) { 2195 // Template alias 2196 TP.N = demangleFullyQualifiedTypeName(MangledName); 2197 } else if (MangledName.consumeFront("$$B")) { 2198 // Array 2199 TP.N = demangleType(MangledName, QualifierMangleMode::Drop); 2200 } else if (MangledName.consumeFront("$$C")) { 2201 // Type has qualifiers. 2202 TP.N = demangleType(MangledName, QualifierMangleMode::Mangle); 2203 } else if (MangledName.startsWith("$1") || MangledName.startsWith("$H") || 2204 MangledName.startsWith("$I") || MangledName.startsWith("$J")) { 2205 // Pointer to member 2206 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2207 TPRN->IsMemberPointer = true; 2208 2209 MangledName = MangledName.dropFront(); 2210 // 1 - single inheritance <name> 2211 // H - multiple inheritance <name> <number> 2212 // I - virtual inheritance <name> <number> <number> 2213 // J - unspecified inheritance <name> <number> <number> <number> 2214 char InheritanceSpecifier = MangledName.popFront(); 2215 SymbolNode *S = nullptr; 2216 if (MangledName.startsWith('?')) { 2217 S = parse(MangledName); 2218 if (Error || !S->Name) { 2219 Error = true; 2220 return nullptr; 2221 } 2222 memorizeIdentifier(S->Name->getUnqualifiedIdentifier()); 2223 } 2224 2225 switch (InheritanceSpecifier) { 2226 case 'J': 2227 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2228 demangleSigned(MangledName); 2229 DEMANGLE_FALLTHROUGH; 2230 case 'I': 2231 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2232 demangleSigned(MangledName); 2233 DEMANGLE_FALLTHROUGH; 2234 case 'H': 2235 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2236 demangleSigned(MangledName); 2237 DEMANGLE_FALLTHROUGH; 2238 case '1': 2239 break; 2240 default: 2241 DEMANGLE_UNREACHABLE; 2242 } 2243 TPRN->Affinity = PointerAffinity::Pointer; 2244 TPRN->Symbol = S; 2245 } else if (MangledName.startsWith("$E?")) { 2246 MangledName.consumeFront("$E"); 2247 // Reference to symbol 2248 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2249 TPRN->Symbol = parse(MangledName); 2250 TPRN->Affinity = PointerAffinity::Reference; 2251 } else if (MangledName.startsWith("$F") || MangledName.startsWith("$G")) { 2252 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2253 2254 // Data member pointer. 2255 MangledName = MangledName.dropFront(); 2256 char InheritanceSpecifier = MangledName.popFront(); 2257 2258 switch (InheritanceSpecifier) { 2259 case 'G': 2260 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2261 demangleSigned(MangledName); 2262 DEMANGLE_FALLTHROUGH; 2263 case 'F': 2264 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2265 demangleSigned(MangledName); 2266 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2267 demangleSigned(MangledName); 2268 break; 2269 default: 2270 DEMANGLE_UNREACHABLE; 2271 } 2272 TPRN->IsMemberPointer = true; 2273 2274 } else if (MangledName.consumeFront("$0")) { 2275 // Integral non-type template parameter 2276 bool IsNegative = false; 2277 uint64_t Value = 0; 2278 std::tie(Value, IsNegative) = demangleNumber(MangledName); 2279 2280 TP.N = Arena.alloc<IntegerLiteralNode>(Value, IsNegative); 2281 } else { 2282 TP.N = demangleType(MangledName, QualifierMangleMode::Drop); 2283 } 2284 if (Error) 2285 return nullptr; 2286 2287 Current = &TP.Next; 2288 } 2289 2290 // The loop above returns nullptr on Error. 2291 assert(!Error); 2292 2293 // Template parameter lists cannot be variadic, so it can only be terminated 2294 // by @ (as opposed to 'Z' in the function parameter case). 2295 assert(MangledName.startsWith('@')); // The above loop exits only on '@'. 2296 MangledName.consumeFront('@'); 2297 return nodeListToNodeArray(Arena, Head, Count); 2298 } 2299 2300 void Demangler::dumpBackReferences() { 2301 std::printf("%d function parameter backreferences\n", 2302 (int)Backrefs.FunctionParamCount); 2303 2304 // Create an output stream so we can render each type. 2305 OutputBuffer OB; 2306 for (size_t I = 0; I < Backrefs.FunctionParamCount; ++I) { 2307 OB.setCurrentPosition(0); 2308 2309 TypeNode *T = Backrefs.FunctionParams[I]; 2310 T->output(OB, OF_Default); 2311 2312 StringView B = OB; 2313 std::printf(" [%d] - %.*s\n", (int)I, (int)B.size(), B.begin()); 2314 } 2315 std::free(OB.getBuffer()); 2316 2317 if (Backrefs.FunctionParamCount > 0) 2318 std::printf("\n"); 2319 std::printf("%d name backreferences\n", (int)Backrefs.NamesCount); 2320 for (size_t I = 0; I < Backrefs.NamesCount; ++I) { 2321 std::printf(" [%d] - %.*s\n", (int)I, (int)Backrefs.Names[I]->Name.size(), 2322 Backrefs.Names[I]->Name.begin()); 2323 } 2324 if (Backrefs.NamesCount > 0) 2325 std::printf("\n"); 2326 } 2327 2328 char *llvm::microsoftDemangle(const char *MangledName, size_t *NMangled, 2329 char *Buf, size_t *N, 2330 int *Status, MSDemangleFlags Flags) { 2331 Demangler D; 2332 2333 StringView Name{MangledName}; 2334 SymbolNode *AST = D.parse(Name); 2335 if (!D.Error && NMangled) 2336 *NMangled = Name.begin() - MangledName; 2337 2338 if (Flags & MSDF_DumpBackrefs) 2339 D.dumpBackReferences(); 2340 2341 OutputFlags OF = OF_Default; 2342 if (Flags & MSDF_NoCallingConvention) 2343 OF = OutputFlags(OF | OF_NoCallingConvention); 2344 if (Flags & MSDF_NoAccessSpecifier) 2345 OF = OutputFlags(OF | OF_NoAccessSpecifier); 2346 if (Flags & MSDF_NoReturnType) 2347 OF = OutputFlags(OF | OF_NoReturnType); 2348 if (Flags & MSDF_NoMemberType) 2349 OF = OutputFlags(OF | OF_NoMemberType); 2350 if (Flags & MSDF_NoVariableType) 2351 OF = OutputFlags(OF | OF_NoVariableType); 2352 2353 int InternalStatus = demangle_success; 2354 if (D.Error) 2355 InternalStatus = demangle_invalid_mangled_name; 2356 else { 2357 OutputBuffer OB(Buf, N); 2358 AST->output(OB, OF); 2359 OB += '\0'; 2360 if (N != nullptr) 2361 *N = OB.getCurrentPosition(); 2362 Buf = OB.getBuffer(); 2363 } 2364 2365 if (Status) 2366 *Status = InternalStatus; 2367 return InternalStatus == demangle_success ? Buf : nullptr; 2368 } 2369