1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a demangler for Rust v0 mangled symbols as specified in 10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Demangle/Demangle.h" 15 #include "llvm/Demangle/StringView.h" 16 #include "llvm/Demangle/Utility.h" 17 18 #include <algorithm> 19 #include <cassert> 20 #include <cstdint> 21 #include <cstring> 22 #include <limits> 23 24 using namespace llvm; 25 26 using llvm::itanium_demangle::OutputBuffer; 27 using llvm::itanium_demangle::ScopedOverride; 28 using llvm::itanium_demangle::StringView; 29 30 namespace { 31 32 struct Identifier { 33 StringView Name; 34 bool Punycode; 35 36 bool empty() const { return Name.empty(); } 37 }; 38 39 enum class BasicType { 40 Bool, 41 Char, 42 I8, 43 I16, 44 I32, 45 I64, 46 I128, 47 ISize, 48 U8, 49 U16, 50 U32, 51 U64, 52 U128, 53 USize, 54 F32, 55 F64, 56 Str, 57 Placeholder, 58 Unit, 59 Variadic, 60 Never, 61 }; 62 63 enum class IsInType { 64 No, 65 Yes, 66 }; 67 68 enum class LeaveGenericsOpen { 69 No, 70 Yes, 71 }; 72 73 class Demangler { 74 // Maximum recursion level. Used to avoid stack overflow. 75 size_t MaxRecursionLevel; 76 // Current recursion level. 77 size_t RecursionLevel; 78 size_t BoundLifetimes; 79 // Input string that is being demangled with "_R" prefix removed. 80 StringView Input; 81 // Position in the input string. 82 size_t Position; 83 // When true, print methods append the output to the stream. 84 // When false, the output is suppressed. 85 bool Print; 86 // True if an error occurred. 87 bool Error; 88 89 public: 90 // Demangled output. 91 OutputBuffer Output; 92 93 Demangler(size_t MaxRecursionLevel = 500); 94 95 bool demangle(StringView MangledName); 96 97 private: 98 bool demanglePath(IsInType Type, 99 LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No); 100 void demangleImplPath(IsInType InType); 101 void demangleGenericArg(); 102 void demangleType(); 103 void demangleFnSig(); 104 void demangleDynBounds(); 105 void demangleDynTrait(); 106 void demangleOptionalBinder(); 107 void demangleConst(); 108 void demangleConstInt(); 109 void demangleConstBool(); 110 void demangleConstChar(); 111 112 template <typename Callable> void demangleBackref(Callable Demangler) { 113 uint64_t Backref = parseBase62Number(); 114 if (Error || Backref >= Position) { 115 Error = true; 116 return; 117 } 118 119 if (!Print) 120 return; 121 122 ScopedOverride<size_t> SavePosition(Position, Position); 123 Position = Backref; 124 Demangler(); 125 } 126 127 Identifier parseIdentifier(); 128 uint64_t parseOptionalBase62Number(char Tag); 129 uint64_t parseBase62Number(); 130 uint64_t parseDecimalNumber(); 131 uint64_t parseHexNumber(StringView &HexDigits); 132 133 void print(char C); 134 void print(StringView S); 135 void printDecimalNumber(uint64_t N); 136 void printBasicType(BasicType); 137 void printLifetime(uint64_t Index); 138 void printIdentifier(Identifier Ident); 139 140 char look() const; 141 char consume(); 142 bool consumeIf(char Prefix); 143 144 bool addAssign(uint64_t &A, uint64_t B); 145 bool mulAssign(uint64_t &A, uint64_t B); 146 }; 147 148 } // namespace 149 150 char *llvm::rustDemangle(const char *MangledName) { 151 if (MangledName == nullptr) 152 return nullptr; 153 154 // Return early if mangled name doesn't look like a Rust symbol. 155 StringView Mangled(MangledName); 156 if (!Mangled.startsWith("_R")) 157 return nullptr; 158 159 Demangler D; 160 if (!initializeOutputBuffer(nullptr, nullptr, D.Output, 1024)) 161 return nullptr; 162 163 if (!D.demangle(Mangled)) { 164 std::free(D.Output.getBuffer()); 165 return nullptr; 166 } 167 168 D.Output += '\0'; 169 170 return D.Output.getBuffer(); 171 } 172 173 Demangler::Demangler(size_t MaxRecursionLevel) 174 : MaxRecursionLevel(MaxRecursionLevel) {} 175 176 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; } 177 178 static inline bool isHexDigit(const char C) { 179 return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f'); 180 } 181 182 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; } 183 184 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; } 185 186 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>. 187 static inline bool isValid(const char C) { 188 return isDigit(C) || isLower(C) || isUpper(C) || C == '_'; 189 } 190 191 // Demangles Rust v0 mangled symbol. Returns true when successful, and false 192 // otherwise. The demangled symbol is stored in Output field. It is 193 // responsibility of the caller to free the memory behind the output stream. 194 // 195 // <symbol-name> = "_R" <path> [<instantiating-crate>] 196 bool Demangler::demangle(StringView Mangled) { 197 Position = 0; 198 Error = false; 199 Print = true; 200 RecursionLevel = 0; 201 BoundLifetimes = 0; 202 203 if (!Mangled.consumeFront("_R")) { 204 Error = true; 205 return false; 206 } 207 size_t Dot = Mangled.find('.'); 208 Input = Mangled.substr(0, Dot); 209 StringView Suffix = Mangled.dropFront(Dot); 210 211 demanglePath(IsInType::No); 212 213 if (Position != Input.size()) { 214 ScopedOverride<bool> SavePrint(Print, false); 215 demanglePath(IsInType::No); 216 } 217 218 if (Position != Input.size()) 219 Error = true; 220 221 if (!Suffix.empty()) { 222 print(" ("); 223 print(Suffix); 224 print(")"); 225 } 226 227 return !Error; 228 } 229 230 // Demangles a path. InType indicates whether a path is inside a type. When 231 // LeaveOpen is true, a closing `>` after generic arguments is omitted from the 232 // output. Return value indicates whether generics arguments have been left 233 // open. 234 // 235 // <path> = "C" <identifier> // crate root 236 // | "M" <impl-path> <type> // <T> (inherent impl) 237 // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl) 238 // | "Y" <type> <path> // <T as Trait> (trait definition) 239 // | "N" <ns> <path> <identifier> // ...::ident (nested path) 240 // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args) 241 // | <backref> 242 // <identifier> = [<disambiguator>] <undisambiguated-identifier> 243 // <ns> = "C" // closure 244 // | "S" // shim 245 // | <A-Z> // other special namespaces 246 // | <a-z> // internal namespaces 247 bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) { 248 if (Error || RecursionLevel >= MaxRecursionLevel) { 249 Error = true; 250 return false; 251 } 252 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 253 254 switch (consume()) { 255 case 'C': { 256 parseOptionalBase62Number('s'); 257 printIdentifier(parseIdentifier()); 258 break; 259 } 260 case 'M': { 261 demangleImplPath(InType); 262 print("<"); 263 demangleType(); 264 print(">"); 265 break; 266 } 267 case 'X': { 268 demangleImplPath(InType); 269 print("<"); 270 demangleType(); 271 print(" as "); 272 demanglePath(IsInType::Yes); 273 print(">"); 274 break; 275 } 276 case 'Y': { 277 print("<"); 278 demangleType(); 279 print(" as "); 280 demanglePath(IsInType::Yes); 281 print(">"); 282 break; 283 } 284 case 'N': { 285 char NS = consume(); 286 if (!isLower(NS) && !isUpper(NS)) { 287 Error = true; 288 break; 289 } 290 demanglePath(InType); 291 292 uint64_t Disambiguator = parseOptionalBase62Number('s'); 293 Identifier Ident = parseIdentifier(); 294 295 if (isUpper(NS)) { 296 // Special namespaces 297 print("::{"); 298 if (NS == 'C') 299 print("closure"); 300 else if (NS == 'S') 301 print("shim"); 302 else 303 print(NS); 304 if (!Ident.empty()) { 305 print(":"); 306 printIdentifier(Ident); 307 } 308 print('#'); 309 printDecimalNumber(Disambiguator); 310 print('}'); 311 } else { 312 // Implementation internal namespaces. 313 if (!Ident.empty()) { 314 print("::"); 315 printIdentifier(Ident); 316 } 317 } 318 break; 319 } 320 case 'I': { 321 demanglePath(InType); 322 // Omit "::" when in a type, where it is optional. 323 if (InType == IsInType::No) 324 print("::"); 325 print("<"); 326 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 327 if (I > 0) 328 print(", "); 329 demangleGenericArg(); 330 } 331 if (LeaveOpen == LeaveGenericsOpen::Yes) 332 return true; 333 else 334 print(">"); 335 break; 336 } 337 case 'B': { 338 bool IsOpen = false; 339 demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); }); 340 return IsOpen; 341 } 342 default: 343 Error = true; 344 break; 345 } 346 347 return false; 348 } 349 350 // <impl-path> = [<disambiguator>] <path> 351 // <disambiguator> = "s" <base-62-number> 352 void Demangler::demangleImplPath(IsInType InType) { 353 ScopedOverride<bool> SavePrint(Print, false); 354 parseOptionalBase62Number('s'); 355 demanglePath(InType); 356 } 357 358 // <generic-arg> = <lifetime> 359 // | <type> 360 // | "K" <const> 361 // <lifetime> = "L" <base-62-number> 362 void Demangler::demangleGenericArg() { 363 if (consumeIf('L')) 364 printLifetime(parseBase62Number()); 365 else if (consumeIf('K')) 366 demangleConst(); 367 else 368 demangleType(); 369 } 370 371 // <basic-type> = "a" // i8 372 // | "b" // bool 373 // | "c" // char 374 // | "d" // f64 375 // | "e" // str 376 // | "f" // f32 377 // | "h" // u8 378 // | "i" // isize 379 // | "j" // usize 380 // | "l" // i32 381 // | "m" // u32 382 // | "n" // i128 383 // | "o" // u128 384 // | "s" // i16 385 // | "t" // u16 386 // | "u" // () 387 // | "v" // ... 388 // | "x" // i64 389 // | "y" // u64 390 // | "z" // ! 391 // | "p" // placeholder (e.g. for generic params), shown as _ 392 static bool parseBasicType(char C, BasicType &Type) { 393 switch (C) { 394 case 'a': 395 Type = BasicType::I8; 396 return true; 397 case 'b': 398 Type = BasicType::Bool; 399 return true; 400 case 'c': 401 Type = BasicType::Char; 402 return true; 403 case 'd': 404 Type = BasicType::F64; 405 return true; 406 case 'e': 407 Type = BasicType::Str; 408 return true; 409 case 'f': 410 Type = BasicType::F32; 411 return true; 412 case 'h': 413 Type = BasicType::U8; 414 return true; 415 case 'i': 416 Type = BasicType::ISize; 417 return true; 418 case 'j': 419 Type = BasicType::USize; 420 return true; 421 case 'l': 422 Type = BasicType::I32; 423 return true; 424 case 'm': 425 Type = BasicType::U32; 426 return true; 427 case 'n': 428 Type = BasicType::I128; 429 return true; 430 case 'o': 431 Type = BasicType::U128; 432 return true; 433 case 'p': 434 Type = BasicType::Placeholder; 435 return true; 436 case 's': 437 Type = BasicType::I16; 438 return true; 439 case 't': 440 Type = BasicType::U16; 441 return true; 442 case 'u': 443 Type = BasicType::Unit; 444 return true; 445 case 'v': 446 Type = BasicType::Variadic; 447 return true; 448 case 'x': 449 Type = BasicType::I64; 450 return true; 451 case 'y': 452 Type = BasicType::U64; 453 return true; 454 case 'z': 455 Type = BasicType::Never; 456 return true; 457 default: 458 return false; 459 } 460 } 461 462 void Demangler::printBasicType(BasicType Type) { 463 switch (Type) { 464 case BasicType::Bool: 465 print("bool"); 466 break; 467 case BasicType::Char: 468 print("char"); 469 break; 470 case BasicType::I8: 471 print("i8"); 472 break; 473 case BasicType::I16: 474 print("i16"); 475 break; 476 case BasicType::I32: 477 print("i32"); 478 break; 479 case BasicType::I64: 480 print("i64"); 481 break; 482 case BasicType::I128: 483 print("i128"); 484 break; 485 case BasicType::ISize: 486 print("isize"); 487 break; 488 case BasicType::U8: 489 print("u8"); 490 break; 491 case BasicType::U16: 492 print("u16"); 493 break; 494 case BasicType::U32: 495 print("u32"); 496 break; 497 case BasicType::U64: 498 print("u64"); 499 break; 500 case BasicType::U128: 501 print("u128"); 502 break; 503 case BasicType::USize: 504 print("usize"); 505 break; 506 case BasicType::F32: 507 print("f32"); 508 break; 509 case BasicType::F64: 510 print("f64"); 511 break; 512 case BasicType::Str: 513 print("str"); 514 break; 515 case BasicType::Placeholder: 516 print("_"); 517 break; 518 case BasicType::Unit: 519 print("()"); 520 break; 521 case BasicType::Variadic: 522 print("..."); 523 break; 524 case BasicType::Never: 525 print("!"); 526 break; 527 } 528 } 529 530 // <type> = | <basic-type> 531 // | <path> // named type 532 // | "A" <type> <const> // [T; N] 533 // | "S" <type> // [T] 534 // | "T" {<type>} "E" // (T1, T2, T3, ...) 535 // | "R" [<lifetime>] <type> // &T 536 // | "Q" [<lifetime>] <type> // &mut T 537 // | "P" <type> // *const T 538 // | "O" <type> // *mut T 539 // | "F" <fn-sig> // fn(...) -> ... 540 // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a 541 // | <backref> // backref 542 void Demangler::demangleType() { 543 if (Error || RecursionLevel >= MaxRecursionLevel) { 544 Error = true; 545 return; 546 } 547 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 548 549 size_t Start = Position; 550 char C = consume(); 551 BasicType Type; 552 if (parseBasicType(C, Type)) 553 return printBasicType(Type); 554 555 switch (C) { 556 case 'A': 557 print("["); 558 demangleType(); 559 print("; "); 560 demangleConst(); 561 print("]"); 562 break; 563 case 'S': 564 print("["); 565 demangleType(); 566 print("]"); 567 break; 568 case 'T': { 569 print("("); 570 size_t I = 0; 571 for (; !Error && !consumeIf('E'); ++I) { 572 if (I > 0) 573 print(", "); 574 demangleType(); 575 } 576 if (I == 1) 577 print(","); 578 print(")"); 579 break; 580 } 581 case 'R': 582 case 'Q': 583 print('&'); 584 if (consumeIf('L')) { 585 if (auto Lifetime = parseBase62Number()) { 586 printLifetime(Lifetime); 587 print(' '); 588 } 589 } 590 if (C == 'Q') 591 print("mut "); 592 demangleType(); 593 break; 594 case 'P': 595 print("*const "); 596 demangleType(); 597 break; 598 case 'O': 599 print("*mut "); 600 demangleType(); 601 break; 602 case 'F': 603 demangleFnSig(); 604 break; 605 case 'D': 606 demangleDynBounds(); 607 if (consumeIf('L')) { 608 if (auto Lifetime = parseBase62Number()) { 609 print(" + "); 610 printLifetime(Lifetime); 611 } 612 } else { 613 Error = true; 614 } 615 break; 616 case 'B': 617 demangleBackref([&] { demangleType(); }); 618 break; 619 default: 620 Position = Start; 621 demanglePath(IsInType::Yes); 622 break; 623 } 624 } 625 626 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type> 627 // <abi> = "C" 628 // | <undisambiguated-identifier> 629 void Demangler::demangleFnSig() { 630 ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); 631 demangleOptionalBinder(); 632 633 if (consumeIf('U')) 634 print("unsafe "); 635 636 if (consumeIf('K')) { 637 print("extern \""); 638 if (consumeIf('C')) { 639 print("C"); 640 } else { 641 Identifier Ident = parseIdentifier(); 642 if (Ident.Punycode) 643 Error = true; 644 for (char C : Ident.Name) { 645 // When mangling ABI string, the "-" is replaced with "_". 646 if (C == '_') 647 C = '-'; 648 print(C); 649 } 650 } 651 print("\" "); 652 } 653 654 print("fn("); 655 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 656 if (I > 0) 657 print(", "); 658 demangleType(); 659 } 660 print(")"); 661 662 if (consumeIf('u')) { 663 // Skip the unit type from the output. 664 } else { 665 print(" -> "); 666 demangleType(); 667 } 668 } 669 670 // <dyn-bounds> = [<binder>] {<dyn-trait>} "E" 671 void Demangler::demangleDynBounds() { 672 ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); 673 print("dyn "); 674 demangleOptionalBinder(); 675 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 676 if (I > 0) 677 print(" + "); 678 demangleDynTrait(); 679 } 680 } 681 682 // <dyn-trait> = <path> {<dyn-trait-assoc-binding>} 683 // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type> 684 void Demangler::demangleDynTrait() { 685 bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes); 686 while (!Error && consumeIf('p')) { 687 if (!IsOpen) { 688 IsOpen = true; 689 print('<'); 690 } else { 691 print(", "); 692 } 693 print(parseIdentifier().Name); 694 print(" = "); 695 demangleType(); 696 } 697 if (IsOpen) 698 print(">"); 699 } 700 701 // Demangles optional binder and updates the number of bound lifetimes. 702 // 703 // <binder> = "G" <base-62-number> 704 void Demangler::demangleOptionalBinder() { 705 uint64_t Binder = parseOptionalBase62Number('G'); 706 if (Error || Binder == 0) 707 return; 708 709 // In valid inputs each bound lifetime is referenced later. Referencing a 710 // lifetime requires at least one byte of input. Reject inputs that are too 711 // short to reference all bound lifetimes. Otherwise demangling of invalid 712 // binders could generate excessive amounts of output. 713 if (Binder >= Input.size() - BoundLifetimes) { 714 Error = true; 715 return; 716 } 717 718 print("for<"); 719 for (size_t I = 0; I != Binder; ++I) { 720 BoundLifetimes += 1; 721 if (I > 0) 722 print(", "); 723 printLifetime(1); 724 } 725 print("> "); 726 } 727 728 // <const> = <basic-type> <const-data> 729 // | "p" // placeholder 730 // | <backref> 731 void Demangler::demangleConst() { 732 if (Error || RecursionLevel >= MaxRecursionLevel) { 733 Error = true; 734 return; 735 } 736 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 737 738 char C = consume(); 739 BasicType Type; 740 if (parseBasicType(C, Type)) { 741 switch (Type) { 742 case BasicType::I8: 743 case BasicType::I16: 744 case BasicType::I32: 745 case BasicType::I64: 746 case BasicType::I128: 747 case BasicType::ISize: 748 case BasicType::U8: 749 case BasicType::U16: 750 case BasicType::U32: 751 case BasicType::U64: 752 case BasicType::U128: 753 case BasicType::USize: 754 demangleConstInt(); 755 break; 756 case BasicType::Bool: 757 demangleConstBool(); 758 break; 759 case BasicType::Char: 760 demangleConstChar(); 761 break; 762 case BasicType::Placeholder: 763 print('_'); 764 break; 765 default: 766 Error = true; 767 break; 768 } 769 } else if (C == 'B') { 770 demangleBackref([&] { demangleConst(); }); 771 } else { 772 Error = true; 773 } 774 } 775 776 // <const-data> = ["n"] <hex-number> 777 void Demangler::demangleConstInt() { 778 if (consumeIf('n')) 779 print('-'); 780 781 StringView HexDigits; 782 uint64_t Value = parseHexNumber(HexDigits); 783 if (HexDigits.size() <= 16) { 784 printDecimalNumber(Value); 785 } else { 786 print("0x"); 787 print(HexDigits); 788 } 789 } 790 791 // <const-data> = "0_" // false 792 // | "1_" // true 793 void Demangler::demangleConstBool() { 794 StringView HexDigits; 795 parseHexNumber(HexDigits); 796 if (HexDigits == "0") 797 print("false"); 798 else if (HexDigits == "1") 799 print("true"); 800 else 801 Error = true; 802 } 803 804 /// Returns true if CodePoint represents a printable ASCII character. 805 static bool isAsciiPrintable(uint64_t CodePoint) { 806 return 0x20 <= CodePoint && CodePoint <= 0x7e; 807 } 808 809 // <const-data> = <hex-number> 810 void Demangler::demangleConstChar() { 811 StringView HexDigits; 812 uint64_t CodePoint = parseHexNumber(HexDigits); 813 if (Error || HexDigits.size() > 6) { 814 Error = true; 815 return; 816 } 817 818 print("'"); 819 switch (CodePoint) { 820 case '\t': 821 print(R"(\t)"); 822 break; 823 case '\r': 824 print(R"(\r)"); 825 break; 826 case '\n': 827 print(R"(\n)"); 828 break; 829 case '\\': 830 print(R"(\\)"); 831 break; 832 case '"': 833 print(R"(")"); 834 break; 835 case '\'': 836 print(R"(\')"); 837 break; 838 default: 839 if (isAsciiPrintable(CodePoint)) { 840 char C = CodePoint; 841 print(C); 842 } else { 843 print(R"(\u{)"); 844 print(HexDigits); 845 print('}'); 846 } 847 break; 848 } 849 print('\''); 850 } 851 852 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> 853 Identifier Demangler::parseIdentifier() { 854 bool Punycode = consumeIf('u'); 855 uint64_t Bytes = parseDecimalNumber(); 856 857 // Underscore resolves the ambiguity when identifier starts with a decimal 858 // digit or another underscore. 859 consumeIf('_'); 860 861 if (Error || Bytes > Input.size() - Position) { 862 Error = true; 863 return {}; 864 } 865 StringView S = Input.substr(Position, Bytes); 866 Position += Bytes; 867 868 if (!std::all_of(S.begin(), S.end(), isValid)) { 869 Error = true; 870 return {}; 871 } 872 873 return {S, Punycode}; 874 } 875 876 // Parses optional base 62 number. The presence of a number is determined using 877 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise 878 // 879 // This function is indended for parsing disambiguators and binders which when 880 // not present have their value interpreted as 0, and otherwise as decoded 881 // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is 882 // 2. When "G" is absent value is 0. 883 uint64_t Demangler::parseOptionalBase62Number(char Tag) { 884 if (!consumeIf(Tag)) 885 return 0; 886 887 uint64_t N = parseBase62Number(); 888 if (Error || !addAssign(N, 1)) 889 return 0; 890 891 return N; 892 } 893 894 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by 895 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1, 896 // "1_" encodes 2, etc. 897 // 898 // <base-62-number> = {<0-9a-zA-Z>} "_" 899 uint64_t Demangler::parseBase62Number() { 900 if (consumeIf('_')) 901 return 0; 902 903 uint64_t Value = 0; 904 905 while (true) { 906 uint64_t Digit; 907 char C = consume(); 908 909 if (C == '_') { 910 break; 911 } else if (isDigit(C)) { 912 Digit = C - '0'; 913 } else if (isLower(C)) { 914 Digit = 10 + (C - 'a'); 915 } else if (isUpper(C)) { 916 Digit = 10 + 26 + (C - 'A'); 917 } else { 918 Error = true; 919 return 0; 920 } 921 922 if (!mulAssign(Value, 62)) 923 return 0; 924 925 if (!addAssign(Value, Digit)) 926 return 0; 927 } 928 929 if (!addAssign(Value, 1)) 930 return 0; 931 932 return Value; 933 } 934 935 // Parses a decimal number that had been encoded without any leading zeros. 936 // 937 // <decimal-number> = "0" 938 // | <1-9> {<0-9>} 939 uint64_t Demangler::parseDecimalNumber() { 940 char C = look(); 941 if (!isDigit(C)) { 942 Error = true; 943 return 0; 944 } 945 946 if (C == '0') { 947 consume(); 948 return 0; 949 } 950 951 uint64_t Value = 0; 952 953 while (isDigit(look())) { 954 if (!mulAssign(Value, 10)) { 955 Error = true; 956 return 0; 957 } 958 959 uint64_t D = consume() - '0'; 960 if (!addAssign(Value, D)) 961 return 0; 962 } 963 964 return Value; 965 } 966 967 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed 968 // value and stores hex digits in HexDigits. The return value is unspecified if 969 // HexDigits.size() > 16. 970 // 971 // <hex-number> = "0_" 972 // | <1-9a-f> {<0-9a-f>} "_" 973 uint64_t Demangler::parseHexNumber(StringView &HexDigits) { 974 size_t Start = Position; 975 uint64_t Value = 0; 976 977 if (!isHexDigit(look())) 978 Error = true; 979 980 if (consumeIf('0')) { 981 if (!consumeIf('_')) 982 Error = true; 983 } else { 984 while (!Error && !consumeIf('_')) { 985 char C = consume(); 986 Value *= 16; 987 if (isDigit(C)) 988 Value += C - '0'; 989 else if ('a' <= C && C <= 'f') 990 Value += 10 + (C - 'a'); 991 else 992 Error = true; 993 } 994 } 995 996 if (Error) { 997 HexDigits = StringView(); 998 return 0; 999 } 1000 1001 size_t End = Position - 1; 1002 assert(Start < End); 1003 HexDigits = Input.substr(Start, End - Start); 1004 return Value; 1005 } 1006 1007 void Demangler::print(char C) { 1008 if (Error || !Print) 1009 return; 1010 1011 Output += C; 1012 } 1013 1014 void Demangler::print(StringView S) { 1015 if (Error || !Print) 1016 return; 1017 1018 Output += S; 1019 } 1020 1021 void Demangler::printDecimalNumber(uint64_t N) { 1022 if (Error || !Print) 1023 return; 1024 1025 Output << N; 1026 } 1027 1028 // Prints a lifetime. An index 0 always represents an erased lifetime. Indices 1029 // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes 1030 // bound by one of the enclosing binders. 1031 void Demangler::printLifetime(uint64_t Index) { 1032 if (Index == 0) { 1033 print("'_"); 1034 return; 1035 } 1036 1037 if (Index - 1 >= BoundLifetimes) { 1038 Error = true; 1039 return; 1040 } 1041 1042 uint64_t Depth = BoundLifetimes - Index; 1043 print('\''); 1044 if (Depth < 26) { 1045 char C = 'a' + Depth; 1046 print(C); 1047 } else { 1048 print('z'); 1049 printDecimalNumber(Depth - 26 + 1); 1050 } 1051 } 1052 1053 static inline bool decodePunycodeDigit(char C, size_t &Value) { 1054 if (isLower(C)) { 1055 Value = C - 'a'; 1056 return true; 1057 } 1058 1059 if (isDigit(C)) { 1060 Value = 26 + (C - '0'); 1061 return true; 1062 } 1063 1064 return false; 1065 } 1066 1067 static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) { 1068 char *Buffer = Output.getBuffer(); 1069 char *Start = Buffer + StartIdx; 1070 char *End = Buffer + Output.getCurrentPosition(); 1071 Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer); 1072 } 1073 1074 // Encodes code point as UTF-8 and stores results in Output. Returns false if 1075 // CodePoint is not a valid unicode scalar value. 1076 static inline bool encodeUTF8(size_t CodePoint, char *Output) { 1077 if (0xD800 <= CodePoint && CodePoint <= 0xDFFF) 1078 return false; 1079 1080 if (CodePoint <= 0x7F) { 1081 Output[0] = CodePoint; 1082 return true; 1083 } 1084 1085 if (CodePoint <= 0x7FF) { 1086 Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F); 1087 Output[1] = 0x80 | (CodePoint & 0x3F); 1088 return true; 1089 } 1090 1091 if (CodePoint <= 0xFFFF) { 1092 Output[0] = 0xE0 | (CodePoint >> 12); 1093 Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F); 1094 Output[2] = 0x80 | (CodePoint & 0x3F); 1095 return true; 1096 } 1097 1098 if (CodePoint <= 0x10FFFF) { 1099 Output[0] = 0xF0 | (CodePoint >> 18); 1100 Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F); 1101 Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F); 1102 Output[3] = 0x80 | (CodePoint & 0x3F); 1103 return true; 1104 } 1105 1106 return false; 1107 } 1108 1109 // Decodes string encoded using punycode and appends results to Output. 1110 // Returns true if decoding was successful. 1111 static bool decodePunycode(StringView Input, OutputBuffer &Output) { 1112 size_t OutputSize = Output.getCurrentPosition(); 1113 size_t InputIdx = 0; 1114 1115 // Rust uses an underscore as a delimiter. 1116 size_t DelimiterPos = StringView::npos; 1117 for (size_t I = 0; I != Input.size(); ++I) 1118 if (Input[I] == '_') 1119 DelimiterPos = I; 1120 1121 if (DelimiterPos != StringView::npos) { 1122 // Copy basic code points before the last delimiter to the output. 1123 for (; InputIdx != DelimiterPos; ++InputIdx) { 1124 char C = Input[InputIdx]; 1125 if (!isValid(C)) 1126 return false; 1127 // Code points are padded with zeros while decoding is in progress. 1128 char UTF8[4] = {C}; 1129 Output += StringView(UTF8, UTF8 + 4); 1130 } 1131 // Skip over the delimiter. 1132 ++InputIdx; 1133 } 1134 1135 size_t Base = 36; 1136 size_t Skew = 38; 1137 size_t Bias = 72; 1138 size_t N = 0x80; 1139 size_t TMin = 1; 1140 size_t TMax = 26; 1141 size_t Damp = 700; 1142 1143 auto Adapt = [&](size_t Delta, size_t NumPoints) { 1144 Delta /= Damp; 1145 Delta += Delta / NumPoints; 1146 Damp = 2; 1147 1148 size_t K = 0; 1149 while (Delta > (Base - TMin) * TMax / 2) { 1150 Delta /= Base - TMin; 1151 K += Base; 1152 } 1153 return K + (((Base - TMin + 1) * Delta) / (Delta + Skew)); 1154 }; 1155 1156 // Main decoding loop. 1157 for (size_t I = 0; InputIdx != Input.size(); I += 1) { 1158 size_t OldI = I; 1159 size_t W = 1; 1160 size_t Max = std::numeric_limits<size_t>::max(); 1161 for (size_t K = Base; true; K += Base) { 1162 if (InputIdx == Input.size()) 1163 return false; 1164 char C = Input[InputIdx++]; 1165 size_t Digit = 0; 1166 if (!decodePunycodeDigit(C, Digit)) 1167 return false; 1168 1169 if (Digit > (Max - I) / W) 1170 return false; 1171 I += Digit * W; 1172 1173 size_t T; 1174 if (K <= Bias) 1175 T = TMin; 1176 else if (K >= Bias + TMax) 1177 T = TMax; 1178 else 1179 T = K - Bias; 1180 1181 if (Digit < T) 1182 break; 1183 1184 if (W > Max / (Base - T)) 1185 return false; 1186 W *= (Base - T); 1187 } 1188 size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1; 1189 Bias = Adapt(I - OldI, NumPoints); 1190 1191 if (I / NumPoints > Max - N) 1192 return false; 1193 N += I / NumPoints; 1194 I = I % NumPoints; 1195 1196 // Insert N at position I in the output. 1197 char UTF8[4] = {}; 1198 if (!encodeUTF8(N, UTF8)) 1199 return false; 1200 Output.insert(OutputSize + I * 4, UTF8, 4); 1201 } 1202 1203 removeNullBytes(Output, OutputSize); 1204 return true; 1205 } 1206 1207 void Demangler::printIdentifier(Identifier Ident) { 1208 if (Error || !Print) 1209 return; 1210 1211 if (Ident.Punycode) { 1212 if (!decodePunycode(Ident.Name, Output)) 1213 Error = true; 1214 } else { 1215 print(Ident.Name); 1216 } 1217 } 1218 1219 char Demangler::look() const { 1220 if (Error || Position >= Input.size()) 1221 return 0; 1222 1223 return Input[Position]; 1224 } 1225 1226 char Demangler::consume() { 1227 if (Error || Position >= Input.size()) { 1228 Error = true; 1229 return 0; 1230 } 1231 1232 return Input[Position++]; 1233 } 1234 1235 bool Demangler::consumeIf(char Prefix) { 1236 if (Error || Position >= Input.size() || Input[Position] != Prefix) 1237 return false; 1238 1239 Position += 1; 1240 return true; 1241 } 1242 1243 /// Computes A + B. When computation wraps around sets the error and returns 1244 /// false. Otherwise assigns the result to A and returns true. 1245 bool Demangler::addAssign(uint64_t &A, uint64_t B) { 1246 if (A > std::numeric_limits<uint64_t>::max() - B) { 1247 Error = true; 1248 return false; 1249 } 1250 1251 A += B; 1252 return true; 1253 } 1254 1255 /// Computes A * B. When computation wraps around sets the error and returns 1256 /// false. Otherwise assigns the result to A and returns true. 1257 bool Demangler::mulAssign(uint64_t &A, uint64_t B) { 1258 if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) { 1259 Error = true; 1260 return false; 1261 } 1262 1263 A *= B; 1264 return true; 1265 } 1266