1 //===--- DLangDemangle.cpp ------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file defines a demangler for the D programming language as specified 11 /// in the ABI specification, available at: 12 /// https://dlang.org/spec/abi.html#name_mangling 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/Demangle/Demangle.h" 17 #include "llvm/Demangle/StringView.h" 18 #include "llvm/Demangle/Utility.h" 19 20 #include <cctype> 21 #include <cstring> 22 #include <limits> 23 24 using namespace llvm; 25 using llvm::itanium_demangle::OutputBuffer; 26 using llvm::itanium_demangle::StringView; 27 28 namespace { 29 30 /// Demangle information structure. 31 struct Demangler { 32 /// Initialize the information structure we use to pass around information. 33 /// 34 /// \param Mangled String to demangle. 35 Demangler(const char *Mangled); 36 37 /// Extract and demangle the mangled symbol and append it to the output 38 /// string. 39 /// 40 /// \param Demangled Output buffer to write the demangled name. 41 /// 42 /// \return The remaining string on success or nullptr on failure. 43 /// 44 /// \see https://dlang.org/spec/abi.html#name_mangling . 45 /// \see https://dlang.org/spec/abi.html#MangledName . 46 const char *parseMangle(OutputBuffer *Demangled); 47 48 private: 49 /// Extract and demangle a given mangled symbol and append it to the output 50 /// string. 51 /// 52 /// \param Demangled output buffer to write the demangled name. 53 /// \param Mangled mangled symbol to be demangled. 54 /// 55 /// \return The remaining string on success or nullptr on failure. 56 /// 57 /// \see https://dlang.org/spec/abi.html#name_mangling . 58 /// \see https://dlang.org/spec/abi.html#MangledName . 59 const char *parseMangle(OutputBuffer *Demangled, const char *Mangled); 60 61 /// Extract the number from a given string. 62 /// 63 /// \param Mangled string to extract the number. 64 /// \param Ret assigned result value. 65 /// 66 /// \return The remaining string on success or nullptr on failure. 67 /// 68 /// \note A result larger than UINT_MAX is considered a failure. 69 /// 70 /// \see https://dlang.org/spec/abi.html#Number . 71 const char *decodeNumber(const char *Mangled, unsigned long &Ret); 72 73 /// Extract the back reference position from a given string. 74 /// 75 /// \param Mangled string to extract the back reference position. 76 /// \param Ret assigned result value. 77 /// 78 /// \return the remaining string on success or nullptr on failure. 79 /// 80 /// \note Ret is always >= 0 on success, and unspecified on failure 81 /// 82 /// \see https://dlang.org/spec/abi.html#back_ref . 83 /// \see https://dlang.org/spec/abi.html#NumberBackRef . 84 const char *decodeBackrefPos(const char *Mangled, long &Ret); 85 86 /// Extract the symbol pointed by the back reference form a given string. 87 /// 88 /// \param Mangled string to extract the back reference position. 89 /// \param Ret assigned result value. 90 /// 91 /// \return the remaining string on success or nullptr on failure. 92 /// 93 /// \see https://dlang.org/spec/abi.html#back_ref . 94 const char *decodeBackref(const char *Mangled, const char *&Ret); 95 96 /// Extract and demangle backreferenced symbol from a given mangled symbol 97 /// and append it to the output string. 98 /// 99 /// \param Demangled output buffer to write the demangled name. 100 /// \param Mangled mangled symbol to be demangled. 101 /// 102 /// \return the remaining string on success or nullptr on failure. 103 /// 104 /// \see https://dlang.org/spec/abi.html#back_ref . 105 /// \see https://dlang.org/spec/abi.html#IdentifierBackRef . 106 const char *parseSymbolBackref(OutputBuffer *Demangled, const char *Mangled); 107 108 /// Extract and demangle backreferenced type from a given mangled symbol 109 /// and append it to the output string. 110 /// 111 /// \param Mangled mangled symbol to be demangled. 112 /// 113 /// \return the remaining string on success or nullptr on failure. 114 /// 115 /// \see https://dlang.org/spec/abi.html#back_ref . 116 /// \see https://dlang.org/spec/abi.html#TypeBackRef . 117 const char *parseTypeBackref(const char *Mangled); 118 119 /// Check whether it is the beginning of a symbol name. 120 /// 121 /// \param Mangled string to extract the symbol name. 122 /// 123 /// \return true on success, false otherwise. 124 /// 125 /// \see https://dlang.org/spec/abi.html#SymbolName . 126 bool isSymbolName(const char *Mangled); 127 128 /// Extract and demangle an identifier from a given mangled symbol append it 129 /// to the output string. 130 /// 131 /// \param Demangled Output buffer to write the demangled name. 132 /// \param Mangled Mangled symbol to be demangled. 133 /// 134 /// \return The remaining string on success or nullptr on failure. 135 /// 136 /// \see https://dlang.org/spec/abi.html#SymbolName . 137 const char *parseIdentifier(OutputBuffer *Demangled, const char *Mangled); 138 139 /// Extract and demangle the plain identifier from a given mangled symbol and 140 /// prepend/append it to the output string, with a special treatment for some 141 /// magic compiler generated symbols. 142 /// 143 /// \param Demangled Output buffer to write the demangled name. 144 /// \param Mangled Mangled symbol to be demangled. 145 /// \param Len Length of the mangled symbol name. 146 /// 147 /// \return The remaining string on success or nullptr on failure. 148 /// 149 /// \see https://dlang.org/spec/abi.html#LName . 150 const char *parseLName(OutputBuffer *Demangled, const char *Mangled, 151 unsigned long Len); 152 153 /// Extract and demangle the qualified symbol from a given mangled symbol 154 /// append it to the output string. 155 /// 156 /// \param Demangled Output buffer to write the demangled name. 157 /// \param Mangled Mangled symbol to be demangled. 158 /// 159 /// \return The remaining string on success or nullptr on failure. 160 /// 161 /// \see https://dlang.org/spec/abi.html#QualifiedName . 162 const char *parseQualified(OutputBuffer *Demangled, const char *Mangled); 163 164 /// Extract and demangle a type from a given mangled symbol append it to 165 /// the output string. 166 /// 167 /// \param Mangled mangled symbol to be demangled. 168 /// 169 /// \return the remaining string on success or nullptr on failure. 170 /// 171 /// \see https://dlang.org/spec/abi.html#Type . 172 const char *parseType(const char *Mangled); 173 174 /// The string we are demangling. 175 const char *Str; 176 /// The index of the last back reference. 177 int LastBackref; 178 }; 179 180 } // namespace 181 182 const char *Demangler::decodeNumber(const char *Mangled, unsigned long &Ret) { 183 // Return nullptr if trying to extract something that isn't a digit. 184 if (Mangled == nullptr || !std::isdigit(*Mangled)) 185 return nullptr; 186 187 unsigned long Val = 0; 188 189 do { 190 unsigned long Digit = Mangled[0] - '0'; 191 192 // Check for overflow. 193 if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10) 194 return nullptr; 195 196 Val = Val * 10 + Digit; 197 ++Mangled; 198 } while (std::isdigit(*Mangled)); 199 200 if (*Mangled == '\0') 201 return nullptr; 202 203 Ret = Val; 204 return Mangled; 205 } 206 207 const char *Demangler::decodeBackrefPos(const char *Mangled, long &Ret) { 208 // Return nullptr if trying to extract something that isn't a digit 209 if (Mangled == nullptr || !std::isalpha(*Mangled)) 210 return nullptr; 211 212 // Any identifier or non-basic type that has been emitted to the mangled 213 // symbol before will not be emitted again, but is referenced by a special 214 // sequence encoding the relative position of the original occurrence in the 215 // mangled symbol name. 216 // Numbers in back references are encoded with base 26 by upper case letters 217 // A-Z for higher digits but lower case letters a-z for the last digit. 218 // NumberBackRef: 219 // [a-z] 220 // [A-Z] NumberBackRef 221 // ^ 222 unsigned long Val = 0; 223 224 while (std::isalpha(*Mangled)) { 225 // Check for overflow 226 if (Val > (std::numeric_limits<unsigned long>::max() - 25) / 26) 227 break; 228 229 Val *= 26; 230 231 if (Mangled[0] >= 'a' && Mangled[0] <= 'z') { 232 Val += Mangled[0] - 'a'; 233 if ((long)Val <= 0) 234 break; 235 Ret = Val; 236 return Mangled + 1; 237 } 238 239 Val += Mangled[0] - 'A'; 240 ++Mangled; 241 } 242 243 return nullptr; 244 } 245 246 const char *Demangler::decodeBackref(const char *Mangled, const char *&Ret) { 247 assert(Mangled != nullptr && *Mangled == 'Q' && "Invalid back reference!"); 248 Ret = nullptr; 249 250 // Position of 'Q' 251 const char *Qpos = Mangled; 252 long RefPos; 253 ++Mangled; 254 255 Mangled = decodeBackrefPos(Mangled, RefPos); 256 if (Mangled == nullptr) 257 return nullptr; 258 259 if (RefPos > Qpos - Str) 260 return nullptr; 261 262 // Set the position of the back reference. 263 Ret = Qpos - RefPos; 264 265 return Mangled; 266 } 267 268 const char *Demangler::parseSymbolBackref(OutputBuffer *Demangled, 269 const char *Mangled) { 270 // An identifier back reference always points to a digit 0 to 9. 271 // IdentifierBackRef: 272 // Q NumberBackRef 273 // ^ 274 const char *Backref; 275 unsigned long Len; 276 277 // Get position of the back reference 278 Mangled = decodeBackref(Mangled, Backref); 279 280 // Must point to a simple identifier 281 Backref = decodeNumber(Backref, Len); 282 if (Backref == nullptr || strlen(Backref) < Len) 283 return nullptr; 284 285 Backref = parseLName(Demangled, Backref, Len); 286 if (Backref == nullptr) 287 return nullptr; 288 289 return Mangled; 290 } 291 292 const char *Demangler::parseTypeBackref(const char *Mangled) { 293 // A type back reference always points to a letter. 294 // TypeBackRef: 295 // Q NumberBackRef 296 // ^ 297 const char *Backref; 298 299 // If we appear to be moving backwards through the mangle string, then 300 // bail as this may be a recursive back reference. 301 if (Mangled - Str >= LastBackref) 302 return nullptr; 303 304 int SaveRefPos = LastBackref; 305 LastBackref = Mangled - Str; 306 307 // Get position of the back reference. 308 Mangled = decodeBackref(Mangled, Backref); 309 310 // Can't decode back reference. 311 if (Backref == nullptr) 312 return nullptr; 313 314 // TODO: Add support for function type back references. 315 Backref = parseType(Backref); 316 317 LastBackref = SaveRefPos; 318 319 if (Backref == nullptr) 320 return nullptr; 321 322 return Mangled; 323 } 324 325 bool Demangler::isSymbolName(const char *Mangled) { 326 long Ret; 327 const char *Qref = Mangled; 328 329 if (std::isdigit(*Mangled)) 330 return true; 331 332 // TODO: Handle template instances. 333 334 if (*Mangled != 'Q') 335 return false; 336 337 Mangled = decodeBackrefPos(Mangled + 1, Ret); 338 if (Mangled == nullptr || Ret > Qref - Str) 339 return false; 340 341 return std::isdigit(Qref[-Ret]); 342 } 343 344 const char *Demangler::parseMangle(OutputBuffer *Demangled, 345 const char *Mangled) { 346 // A D mangled symbol is comprised of both scope and type information. 347 // MangleName: 348 // _D QualifiedName Type 349 // _D QualifiedName Z 350 // ^ 351 // The caller should have guaranteed that the start pointer is at the 352 // above location. 353 // Note that type is never a function type, but only the return type of 354 // a function or the type of a variable. 355 Mangled += 2; 356 357 Mangled = parseQualified(Demangled, Mangled); 358 359 if (Mangled != nullptr) { 360 // Artificial symbols end with 'Z' and have no type. 361 if (*Mangled == 'Z') 362 ++Mangled; 363 else { 364 Mangled = parseType(Mangled); 365 } 366 } 367 368 return Mangled; 369 } 370 371 const char *Demangler::parseQualified(OutputBuffer *Demangled, 372 const char *Mangled) { 373 // Qualified names are identifiers separated by their encoded length. 374 // Nested functions also encode their argument types without specifying 375 // what they return. 376 // QualifiedName: 377 // SymbolFunctionName 378 // SymbolFunctionName QualifiedName 379 // ^ 380 // SymbolFunctionName: 381 // SymbolName 382 // SymbolName TypeFunctionNoReturn 383 // SymbolName M TypeFunctionNoReturn 384 // SymbolName M TypeModifiers TypeFunctionNoReturn 385 // The start pointer should be at the above location. 386 387 // Whether it has more than one symbol 388 size_t NotFirst = false; 389 do { 390 // Skip over anonymous symbols. 391 if (*Mangled == '0') { 392 do 393 ++Mangled; 394 while (*Mangled == '0'); 395 396 continue; 397 } 398 399 if (NotFirst) 400 *Demangled << '.'; 401 NotFirst = true; 402 403 Mangled = parseIdentifier(Demangled, Mangled); 404 405 } while (Mangled && isSymbolName(Mangled)); 406 407 return Mangled; 408 } 409 410 const char *Demangler::parseIdentifier(OutputBuffer *Demangled, 411 const char *Mangled) { 412 unsigned long Len; 413 414 if (Mangled == nullptr || *Mangled == '\0') 415 return nullptr; 416 417 if (*Mangled == 'Q') 418 return parseSymbolBackref(Demangled, Mangled); 419 420 // TODO: Parse lengthless template instances. 421 422 const char *Endptr = decodeNumber(Mangled, Len); 423 424 if (Endptr == nullptr || Len == 0) 425 return nullptr; 426 427 if (strlen(Endptr) < Len) 428 return nullptr; 429 430 Mangled = Endptr; 431 432 // TODO: Parse template instances with a length prefix. 433 434 // There can be multiple different declarations in the same function that 435 // have the same mangled name. To make the mangled names unique, a fake 436 // parent in the form `__Sddd' is added to the symbol. 437 if (Len >= 4 && Mangled[0] == '_' && Mangled[1] == '_' && Mangled[2] == 'S') { 438 const char *NumPtr = Mangled + 3; 439 while (NumPtr < (Mangled + Len) && std::isdigit(*NumPtr)) 440 ++NumPtr; 441 442 if (Mangled + Len == NumPtr) { 443 // Skip over the fake parent. 444 Mangled += Len; 445 return parseIdentifier(Demangled, Mangled); 446 } 447 448 // Else demangle it as a plain identifier. 449 } 450 451 return parseLName(Demangled, Mangled, Len); 452 } 453 454 const char *Demangler::parseType(const char *Mangled) { 455 if (*Mangled == '\0') 456 return nullptr; 457 458 switch (*Mangled) { 459 // TODO: Parse type qualifiers. 460 // TODO: Parse function types. 461 // TODO: Parse compound types. 462 // TODO: Parse delegate types. 463 // TODO: Parse tuple types. 464 465 // Basic types. 466 case 'i': 467 ++Mangled; 468 // TODO: Add type name dumping 469 return Mangled; 470 471 // TODO: Add support for the rest of the basic types. 472 473 // Back referenced type. 474 case 'Q': 475 return parseTypeBackref(Mangled); 476 477 default: // unhandled. 478 return nullptr; 479 } 480 } 481 482 const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled, 483 unsigned long Len) { 484 switch (Len) { 485 case 6: 486 if (strncmp(Mangled, "__initZ", Len + 1) == 0) { 487 // The static initializer for a given symbol. 488 Demangled->prepend("initializer for "); 489 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); 490 Mangled += Len; 491 return Mangled; 492 } 493 if (strncmp(Mangled, "__vtblZ", Len + 1) == 0) { 494 // The vtable symbol for a given class. 495 Demangled->prepend("vtable for "); 496 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); 497 Mangled += Len; 498 return Mangled; 499 } 500 break; 501 502 case 7: 503 if (strncmp(Mangled, "__ClassZ", Len + 1) == 0) { 504 // The classinfo symbol for a given class. 505 Demangled->prepend("ClassInfo for "); 506 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); 507 Mangled += Len; 508 return Mangled; 509 } 510 break; 511 512 case 11: 513 if (strncmp(Mangled, "__InterfaceZ", Len + 1) == 0) { 514 // The interface symbol for a given class. 515 Demangled->prepend("Interface for "); 516 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); 517 Mangled += Len; 518 return Mangled; 519 } 520 break; 521 522 case 12: 523 if (strncmp(Mangled, "__ModuleInfoZ", Len + 1) == 0) { 524 // The ModuleInfo symbol for a given module. 525 Demangled->prepend("ModuleInfo for "); 526 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); 527 Mangled += Len; 528 return Mangled; 529 } 530 break; 531 } 532 533 *Demangled << StringView(Mangled, Len); 534 Mangled += Len; 535 536 return Mangled; 537 } 538 539 Demangler::Demangler(const char *Mangled) 540 : Str(Mangled), LastBackref(strlen(Mangled)) {} 541 542 const char *Demangler::parseMangle(OutputBuffer *Demangled) { 543 return parseMangle(Demangled, this->Str); 544 } 545 546 char *llvm::dlangDemangle(const char *MangledName) { 547 if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0) 548 return nullptr; 549 550 OutputBuffer Demangled; 551 if (strcmp(MangledName, "_Dmain") == 0) { 552 Demangled << "D main"; 553 } else { 554 555 Demangler D = Demangler(MangledName); 556 MangledName = D.parseMangle(&Demangled); 557 558 // Check that the entire symbol was successfully demangled. 559 if (MangledName == nullptr || *MangledName != '\0') { 560 std::free(Demangled.getBuffer()); 561 return nullptr; 562 } 563 } 564 565 // OutputBuffer's internal buffer is not null terminated and therefore we need 566 // to add it to comply with C null terminated strings. 567 if (Demangled.getCurrentPosition() > 0) { 568 Demangled << '\0'; 569 Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1); 570 return Demangled.getBuffer(); 571 } 572 573 std::free(Demangled.getBuffer()); 574 return nullptr; 575 } 576