1 //===--- DLangDemangle.cpp ------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file defines a demangler for the D programming language as specified 11 /// in the ABI specification, available at: 12 /// https://dlang.org/spec/abi.html#name_mangling 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/Demangle/Demangle.h" 17 #include "llvm/Demangle/StringViewExtras.h" 18 #include "llvm/Demangle/Utility.h" 19 20 #include <cctype> 21 #include <cstring> 22 #include <limits> 23 #include <string_view> 24 25 using namespace llvm; 26 using llvm::itanium_demangle::OutputBuffer; 27 using llvm::itanium_demangle::starts_with; 28 29 namespace { 30 31 /// Demangle information structure. 32 struct Demangler { 33 /// Initialize the information structure we use to pass around information. 34 /// 35 /// \param Mangled String to demangle. 36 Demangler(std::string_view Mangled); 37 38 /// Extract and demangle the mangled symbol and append it to the output 39 /// string. 40 /// 41 /// \param Demangled Output buffer to write the demangled name. 42 /// 43 /// \return The remaining string on success or nullptr on failure. 44 /// 45 /// \see https://dlang.org/spec/abi.html#name_mangling . 46 /// \see https://dlang.org/spec/abi.html#MangledName . 47 const char *parseMangle(OutputBuffer *Demangled); 48 49 private: 50 /// Extract and demangle a given mangled symbol and append it to the output 51 /// string. 52 /// 53 /// \param Demangled output buffer to write the demangled name. 54 /// \param Mangled mangled symbol to be demangled. 55 /// 56 /// \see https://dlang.org/spec/abi.html#name_mangling . 57 /// \see https://dlang.org/spec/abi.html#MangledName . 58 void parseMangle(OutputBuffer *Demangled, std::string_view &Mangled); 59 60 /// Extract the number from a given string. 61 /// 62 /// \param Mangled string to extract the number. 63 /// \param Ret assigned result value. 64 /// 65 /// \note Ret larger than UINT_MAX is considered a failure. 66 /// 67 /// \see https://dlang.org/spec/abi.html#Number . 68 void decodeNumber(std::string_view &Mangled, unsigned long &Ret); 69 70 /// Extract the back reference position from a given string. 71 /// 72 /// \param Mangled string to extract the back reference position. 73 /// \param Ret assigned result value. 74 /// 75 /// \return true on success, false on error. 76 /// 77 /// \note Ret is always >= 0 on success, and unspecified on failure 78 /// 79 /// \see https://dlang.org/spec/abi.html#back_ref . 80 /// \see https://dlang.org/spec/abi.html#NumberBackRef . 81 bool decodeBackrefPos(std::string_view &Mangled, long &Ret); 82 83 /// Extract the symbol pointed by the back reference form a given string. 84 /// 85 /// \param Mangled string to extract the back reference position. 86 /// \param Ret assigned result value. 87 /// 88 /// \return true on success, false on error. 89 /// 90 /// \see https://dlang.org/spec/abi.html#back_ref . 91 bool decodeBackref(std::string_view &Mangled, std::string_view &Ret); 92 93 /// Extract and demangle backreferenced symbol from a given mangled symbol 94 /// and append it to the output string. 95 /// 96 /// \param Demangled output buffer to write the demangled name. 97 /// \param Mangled mangled symbol to be demangled. 98 /// 99 /// \see https://dlang.org/spec/abi.html#back_ref . 100 /// \see https://dlang.org/spec/abi.html#IdentifierBackRef . 101 void parseSymbolBackref(OutputBuffer *Demangled, std::string_view &Mangled); 102 103 /// Extract and demangle backreferenced type from a given mangled symbol 104 /// and append it to the output string. 105 /// 106 /// \param Mangled mangled symbol to be demangled. 107 /// 108 /// \see https://dlang.org/spec/abi.html#back_ref . 109 /// \see https://dlang.org/spec/abi.html#TypeBackRef . 110 void parseTypeBackref(std::string_view &Mangled); 111 112 /// Check whether it is the beginning of a symbol name. 113 /// 114 /// \param Mangled string to extract the symbol name. 115 /// 116 /// \return true on success, false otherwise. 117 /// 118 /// \see https://dlang.org/spec/abi.html#SymbolName . 119 bool isSymbolName(std::string_view Mangled); 120 121 /// Extract and demangle an identifier from a given mangled symbol append it 122 /// to the output string. 123 /// 124 /// \param Demangled Output buffer to write the demangled name. 125 /// \param Mangled Mangled symbol to be demangled. 126 /// 127 /// \see https://dlang.org/spec/abi.html#SymbolName . 128 void parseIdentifier(OutputBuffer *Demangled, std::string_view &Mangled); 129 130 /// Extract and demangle the plain identifier from a given mangled symbol and 131 /// prepend/append it to the output string, with a special treatment for some 132 /// magic compiler generated symbols. 133 /// 134 /// \param Demangled Output buffer to write the demangled name. 135 /// \param Mangled Mangled symbol to be demangled. 136 /// \param Len Length of the mangled symbol name. 137 /// 138 /// \see https://dlang.org/spec/abi.html#LName . 139 void parseLName(OutputBuffer *Demangled, std::string_view &Mangled, 140 unsigned long Len); 141 142 /// Extract and demangle the qualified symbol from a given mangled symbol 143 /// append it to the output string. 144 /// 145 /// \param Demangled Output buffer to write the demangled name. 146 /// \param Mangled Mangled symbol to be demangled. 147 /// 148 /// \see https://dlang.org/spec/abi.html#QualifiedName . 149 void parseQualified(OutputBuffer *Demangled, std::string_view &Mangled); 150 151 /// Extract and demangle a type from a given mangled symbol append it to 152 /// the output string. 153 /// 154 /// \param Mangled mangled symbol to be demangled. 155 /// 156 /// \return true on success, false on error. 157 /// 158 /// \see https://dlang.org/spec/abi.html#Type . 159 bool parseType(std::string_view &Mangled); 160 161 /// An immutable view of the string we are demangling. 162 const std::string_view Str; 163 /// The index of the last back reference. 164 int LastBackref; 165 }; 166 167 } // namespace 168 169 void Demangler::decodeNumber(std::string_view &Mangled, unsigned long &Ret) { 170 // Clear Mangled if trying to extract something that isn't a digit. 171 if (Mangled.empty()) { 172 Mangled = {}; 173 return; 174 } 175 176 if (!std::isdigit(Mangled.front())) { 177 Mangled = {}; 178 return; 179 } 180 181 unsigned long Val = 0; 182 183 do { 184 unsigned long Digit = Mangled[0] - '0'; 185 186 // Check for overflow. 187 if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10) { 188 Mangled = {}; 189 return; 190 } 191 192 Val = Val * 10 + Digit; 193 Mangled.remove_prefix(1); 194 } while (!Mangled.empty() && std::isdigit(Mangled.front())); 195 196 if (Mangled.empty()) { 197 Mangled = {}; 198 return; 199 } 200 201 Ret = Val; 202 } 203 204 bool Demangler::decodeBackrefPos(std::string_view &Mangled, long &Ret) { 205 // Return nullptr if trying to extract something that isn't a digit 206 if (Mangled.empty()) { 207 Mangled = {}; 208 return false; 209 } 210 // Any identifier or non-basic type that has been emitted to the mangled 211 // symbol before will not be emitted again, but is referenced by a special 212 // sequence encoding the relative position of the original occurrence in the 213 // mangled symbol name. 214 // Numbers in back references are encoded with base 26 by upper case letters 215 // A-Z for higher digits but lower case letters a-z for the last digit. 216 // NumberBackRef: 217 // [a-z] 218 // [A-Z] NumberBackRef 219 // ^ 220 unsigned long Val = 0; 221 222 while (!Mangled.empty() && std::isalpha(Mangled.front())) { 223 // Check for overflow 224 if (Val > (std::numeric_limits<unsigned long>::max() - 25) / 26) 225 break; 226 227 Val *= 26; 228 229 if (Mangled[0] >= 'a' && Mangled[0] <= 'z') { 230 Val += Mangled[0] - 'a'; 231 if ((long)Val <= 0) 232 break; 233 Ret = Val; 234 Mangled.remove_prefix(1); 235 return true; 236 } 237 238 Val += Mangled[0] - 'A'; 239 Mangled.remove_prefix(1); 240 } 241 242 Mangled = {}; 243 return false; 244 } 245 246 bool Demangler::decodeBackref(std::string_view &Mangled, 247 std::string_view &Ret) { 248 assert(!Mangled.empty() && Mangled.front() == 'Q' && 249 "Invalid back reference!"); 250 Ret = {}; 251 252 // Position of 'Q' 253 const char *Qpos = Mangled.data(); 254 long RefPos; 255 Mangled.remove_prefix(1); 256 257 if (!decodeBackrefPos(Mangled, RefPos)) { 258 Mangled = {}; 259 return false; 260 } 261 262 if (RefPos > Qpos - Str.data()) { 263 Mangled = {}; 264 return false; 265 } 266 267 // Set the position of the back reference. 268 Ret = Qpos - RefPos; 269 270 return true; 271 } 272 273 void Demangler::parseSymbolBackref(OutputBuffer *Demangled, 274 std::string_view &Mangled) { 275 // An identifier back reference always points to a digit 0 to 9. 276 // IdentifierBackRef: 277 // Q NumberBackRef 278 // ^ 279 unsigned long Len; 280 281 // Get position of the back reference 282 std::string_view Backref; 283 if (!decodeBackref(Mangled, Backref)) { 284 Mangled = {}; 285 return; 286 } 287 288 // Must point to a simple identifier 289 decodeNumber(Backref, Len); 290 if (Backref.empty() || Backref.length() < Len) { 291 Mangled = {}; 292 return; 293 } 294 295 parseLName(Demangled, Backref, Len); 296 if (Backref.empty()) 297 Mangled = {}; 298 } 299 300 void Demangler::parseTypeBackref(std::string_view &Mangled) { 301 // A type back reference always points to a letter. 302 // TypeBackRef: 303 // Q NumberBackRef 304 // ^ 305 306 // If we appear to be moving backwards through the mangle string, then 307 // bail as this may be a recursive back reference. 308 if (Mangled.data() - Str.data() >= LastBackref) { 309 Mangled = {}; 310 return; 311 } 312 313 int SaveRefPos = LastBackref; 314 LastBackref = Mangled.data() - Str.data(); 315 316 // Get position of the back reference. 317 std::string_view Backref; 318 if (!decodeBackref(Mangled, Backref)) { 319 Mangled = {}; 320 return; 321 } 322 323 // Can't decode back reference. 324 if (Backref.empty()) { 325 Mangled = {}; 326 return; 327 } 328 329 // TODO: Add support for function type back references. 330 if (!parseType(Backref)) 331 Mangled = {}; 332 333 LastBackref = SaveRefPos; 334 335 if (Backref.empty()) 336 Mangled = {}; 337 } 338 339 bool Demangler::isSymbolName(std::string_view Mangled) { 340 long Ret; 341 const char *Qref = Mangled.data(); 342 343 if (std::isdigit(Mangled.front())) 344 return true; 345 346 // TODO: Handle template instances. 347 348 if (Mangled.front() != 'Q') 349 return false; 350 351 Mangled.remove_prefix(1); 352 bool Valid = decodeBackrefPos(Mangled, Ret); 353 if (!Valid || Ret > Qref - Str.data()) 354 return false; 355 356 return std::isdigit(Qref[-Ret]); 357 } 358 359 void Demangler::parseMangle(OutputBuffer *Demangled, 360 std::string_view &Mangled) { 361 // A D mangled symbol is comprised of both scope and type information. 362 // MangleName: 363 // _D QualifiedName Type 364 // _D QualifiedName Z 365 // ^ 366 // The caller should have guaranteed that the start pointer is at the 367 // above location. 368 // Note that type is never a function type, but only the return type of 369 // a function or the type of a variable. 370 Mangled.remove_prefix(2); 371 372 parseQualified(Demangled, Mangled); 373 374 if (Mangled.empty()) { 375 Mangled = {}; 376 return; 377 } 378 379 // Artificial symbols end with 'Z' and have no type. 380 if (Mangled.front() == 'Z') { 381 Mangled.remove_prefix(1); 382 } else if (!parseType(Mangled)) 383 Mangled = {}; 384 } 385 386 void Demangler::parseQualified(OutputBuffer *Demangled, 387 std::string_view &Mangled) { 388 // Qualified names are identifiers separated by their encoded length. 389 // Nested functions also encode their argument types without specifying 390 // what they return. 391 // QualifiedName: 392 // SymbolFunctionName 393 // SymbolFunctionName QualifiedName 394 // ^ 395 // SymbolFunctionName: 396 // SymbolName 397 // SymbolName TypeFunctionNoReturn 398 // SymbolName M TypeFunctionNoReturn 399 // SymbolName M TypeModifiers TypeFunctionNoReturn 400 // The start pointer should be at the above location. 401 402 // Whether it has more than one symbol 403 size_t NotFirst = false; 404 do { 405 // Skip over anonymous symbols. 406 if (!Mangled.empty() && Mangled.front() == '0') { 407 do 408 Mangled.remove_prefix(1); 409 while (!Mangled.empty() && Mangled.front() == '0'); 410 411 continue; 412 } 413 414 if (NotFirst) 415 *Demangled << '.'; 416 NotFirst = true; 417 418 parseIdentifier(Demangled, Mangled); 419 } while (!Mangled.empty() && isSymbolName(Mangled)); 420 } 421 422 void Demangler::parseIdentifier(OutputBuffer *Demangled, 423 std::string_view &Mangled) { 424 if (Mangled.empty()) { 425 Mangled = {}; 426 return; 427 } 428 429 if (Mangled.front() == 'Q') 430 return parseSymbolBackref(Demangled, Mangled); 431 432 // TODO: Parse lengthless template instances. 433 434 unsigned long Len; 435 decodeNumber(Mangled, Len); 436 437 if (Mangled.empty()) { 438 Mangled = {}; 439 return; 440 } 441 if (!Len || Mangled.length() < Len) { 442 Mangled = {}; 443 return; 444 } 445 446 // TODO: Parse template instances with a length prefix. 447 448 // There can be multiple different declarations in the same function that 449 // have the same mangled name. To make the mangled names unique, a fake 450 // parent in the form `__Sddd' is added to the symbol. 451 if (Len >= 4 && starts_with(Mangled, "__S")) { 452 const size_t SuffixLen = Mangled.length() - Len; 453 std::string_view P = Mangled.substr(3); 454 while (P.length() > SuffixLen && std::isdigit(P.front())) 455 P.remove_prefix(1); 456 if (P.length() == SuffixLen) { 457 // Skip over the fake parent. 458 Mangled.remove_prefix(Len); 459 return parseIdentifier(Demangled, Mangled); 460 } 461 462 // Else demangle it as a plain identifier. 463 } 464 465 parseLName(Demangled, Mangled, Len); 466 } 467 468 bool Demangler::parseType(std::string_view &Mangled) { 469 if (Mangled.empty()) { 470 Mangled = {}; 471 return false; 472 } 473 474 switch (Mangled.front()) { 475 // TODO: Parse type qualifiers. 476 // TODO: Parse function types. 477 // TODO: Parse compound types. 478 // TODO: Parse delegate types. 479 // TODO: Parse tuple types. 480 481 // Basic types. 482 case 'i': 483 Mangled.remove_prefix(1); 484 // TODO: Add type name dumping 485 return true; 486 487 // TODO: Add support for the rest of the basic types. 488 489 // Back referenced type. 490 case 'Q': { 491 parseTypeBackref(Mangled); 492 return true; 493 } 494 495 default: // unhandled. 496 Mangled = {}; 497 return false; 498 } 499 } 500 501 void Demangler::parseLName(OutputBuffer *Demangled, std::string_view &Mangled, 502 unsigned long Len) { 503 switch (Len) { 504 case 6: 505 if (starts_with(Mangled, "__initZ")) { 506 // The static initializer for a given symbol. 507 Demangled->prepend("initializer for "); 508 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); 509 Mangled.remove_prefix(Len); 510 return; 511 } 512 if (starts_with(Mangled, "__vtblZ")) { 513 // The vtable symbol for a given class. 514 Demangled->prepend("vtable for "); 515 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); 516 Mangled.remove_prefix(Len); 517 return; 518 } 519 break; 520 521 case 7: 522 if (starts_with(Mangled, "__ClassZ")) { 523 // The classinfo symbol for a given class. 524 Demangled->prepend("ClassInfo for "); 525 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); 526 Mangled.remove_prefix(Len); 527 return; 528 } 529 break; 530 531 case 11: 532 if (starts_with(Mangled, "__InterfaceZ")) { 533 // The interface symbol for a given class. 534 Demangled->prepend("Interface for "); 535 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); 536 Mangled.remove_prefix(Len); 537 return; 538 } 539 break; 540 541 case 12: 542 if (starts_with(Mangled, "__ModuleInfoZ")) { 543 // The ModuleInfo symbol for a given module. 544 Demangled->prepend("ModuleInfo for "); 545 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); 546 Mangled.remove_prefix(Len); 547 return; 548 } 549 break; 550 } 551 552 *Demangled << Mangled.substr(0, Len); 553 Mangled.remove_prefix(Len); 554 } 555 556 Demangler::Demangler(std::string_view Mangled) 557 : Str(Mangled), LastBackref(Mangled.length()) {} 558 559 const char *Demangler::parseMangle(OutputBuffer *Demangled) { 560 std::string_view M(this->Str); 561 parseMangle(Demangled, M); 562 return M.data(); 563 } 564 565 char *llvm::dlangDemangle(std::string_view MangledName) { 566 if (MangledName.empty() || !starts_with(MangledName, "_D")) 567 return nullptr; 568 569 OutputBuffer Demangled; 570 if (MangledName == "_Dmain") { 571 Demangled << "D main"; 572 } else { 573 574 Demangler D(MangledName); 575 const char *M = D.parseMangle(&Demangled); 576 577 // Check that the entire symbol was successfully demangled. 578 if (M == nullptr || *M != '\0') { 579 std::free(Demangled.getBuffer()); 580 return nullptr; 581 } 582 } 583 584 // OutputBuffer's internal buffer is not null terminated and therefore we need 585 // to add it to comply with C null terminated strings. 586 if (Demangled.getCurrentPosition() > 0) { 587 Demangled << '\0'; 588 Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1); 589 return Demangled.getBuffer(); 590 } 591 592 std::free(Demangled.getBuffer()); 593 return nullptr; 594 } 595