1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a demangler for Rust v0 mangled symbols as specified in 10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Demangle/Demangle.h" 15 #include "llvm/Demangle/StringView.h" 16 #include "llvm/Demangle/Utility.h" 17 18 #include <algorithm> 19 #include <cassert> 20 #include <cstdint> 21 #include <cstring> 22 #include <limits> 23 24 using namespace llvm; 25 26 using llvm::itanium_demangle::OutputBuffer; 27 using llvm::itanium_demangle::ScopedOverride; 28 using llvm::itanium_demangle::StringView; 29 30 namespace { 31 32 struct Identifier { 33 StringView Name; 34 bool Punycode; 35 36 bool empty() const { return Name.empty(); } 37 }; 38 39 enum class BasicType { 40 Bool, 41 Char, 42 I8, 43 I16, 44 I32, 45 I64, 46 I128, 47 ISize, 48 U8, 49 U16, 50 U32, 51 U64, 52 U128, 53 USize, 54 F32, 55 F64, 56 Str, 57 Placeholder, 58 Unit, 59 Variadic, 60 Never, 61 }; 62 63 enum class IsInType { 64 No, 65 Yes, 66 }; 67 68 enum class LeaveGenericsOpen { 69 No, 70 Yes, 71 }; 72 73 class Demangler { 74 // Maximum recursion level. Used to avoid stack overflow. 75 size_t MaxRecursionLevel; 76 // Current recursion level. 77 size_t RecursionLevel; 78 size_t BoundLifetimes; 79 // Input string that is being demangled with "_R" prefix removed. 80 StringView Input; 81 // Position in the input string. 82 size_t Position; 83 // When true, print methods append the output to the stream. 84 // When false, the output is suppressed. 85 bool Print; 86 // True if an error occurred. 87 bool Error; 88 89 public: 90 // Demangled output. 91 OutputBuffer Output; 92 93 Demangler(size_t MaxRecursionLevel = 500); 94 95 bool demangle(StringView MangledName); 96 97 private: 98 bool demanglePath(IsInType Type, 99 LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No); 100 void demangleImplPath(IsInType InType); 101 void demangleGenericArg(); 102 void demangleType(); 103 void demangleFnSig(); 104 void demangleDynBounds(); 105 void demangleDynTrait(); 106 void demangleOptionalBinder(); 107 void demangleConst(); 108 void demangleConstInt(); 109 void demangleConstBool(); 110 void demangleConstChar(); 111 112 template <typename Callable> void demangleBackref(Callable Demangler) { 113 uint64_t Backref = parseBase62Number(); 114 if (Error || Backref >= Position) { 115 Error = true; 116 return; 117 } 118 119 if (!Print) 120 return; 121 122 ScopedOverride<size_t> SavePosition(Position, Position); 123 Position = Backref; 124 Demangler(); 125 } 126 127 Identifier parseIdentifier(); 128 uint64_t parseOptionalBase62Number(char Tag); 129 uint64_t parseBase62Number(); 130 uint64_t parseDecimalNumber(); 131 uint64_t parseHexNumber(StringView &HexDigits); 132 133 void print(char C); 134 void print(StringView S); 135 void printDecimalNumber(uint64_t N); 136 void printBasicType(BasicType); 137 void printLifetime(uint64_t Index); 138 void printIdentifier(Identifier Ident); 139 140 char look() const; 141 char consume(); 142 bool consumeIf(char Prefix); 143 144 bool addAssign(uint64_t &A, uint64_t B); 145 bool mulAssign(uint64_t &A, uint64_t B); 146 }; 147 148 } // namespace 149 150 char *llvm::rustDemangle(const char *MangledName) { 151 if (MangledName == nullptr) 152 return nullptr; 153 154 // Return early if mangled name doesn't look like a Rust symbol. 155 StringView Mangled(MangledName); 156 if (!Mangled.startsWith("_R")) 157 return nullptr; 158 159 Demangler D; 160 if (!D.demangle(Mangled)) { 161 std::free(D.Output.getBuffer()); 162 return nullptr; 163 } 164 165 D.Output += '\0'; 166 167 return D.Output.getBuffer(); 168 } 169 170 Demangler::Demangler(size_t MaxRecursionLevel) 171 : MaxRecursionLevel(MaxRecursionLevel) {} 172 173 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; } 174 175 static inline bool isHexDigit(const char C) { 176 return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f'); 177 } 178 179 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; } 180 181 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; } 182 183 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>. 184 static inline bool isValid(const char C) { 185 return isDigit(C) || isLower(C) || isUpper(C) || C == '_'; 186 } 187 188 // Demangles Rust v0 mangled symbol. Returns true when successful, and false 189 // otherwise. The demangled symbol is stored in Output field. It is 190 // responsibility of the caller to free the memory behind the output stream. 191 // 192 // <symbol-name> = "_R" <path> [<instantiating-crate>] 193 bool Demangler::demangle(StringView Mangled) { 194 Position = 0; 195 Error = false; 196 Print = true; 197 RecursionLevel = 0; 198 BoundLifetimes = 0; 199 200 if (!Mangled.consumeFront("_R")) { 201 Error = true; 202 return false; 203 } 204 size_t Dot = Mangled.find('.'); 205 Input = Mangled.substr(0, Dot); 206 StringView Suffix = Mangled.dropFront(Dot); 207 208 demanglePath(IsInType::No); 209 210 if (Position != Input.size()) { 211 ScopedOverride<bool> SavePrint(Print, false); 212 demanglePath(IsInType::No); 213 } 214 215 if (Position != Input.size()) 216 Error = true; 217 218 if (!Suffix.empty()) { 219 print(" ("); 220 print(Suffix); 221 print(")"); 222 } 223 224 return !Error; 225 } 226 227 // Demangles a path. InType indicates whether a path is inside a type. When 228 // LeaveOpen is true, a closing `>` after generic arguments is omitted from the 229 // output. Return value indicates whether generics arguments have been left 230 // open. 231 // 232 // <path> = "C" <identifier> // crate root 233 // | "M" <impl-path> <type> // <T> (inherent impl) 234 // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl) 235 // | "Y" <type> <path> // <T as Trait> (trait definition) 236 // | "N" <ns> <path> <identifier> // ...::ident (nested path) 237 // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args) 238 // | <backref> 239 // <identifier> = [<disambiguator>] <undisambiguated-identifier> 240 // <ns> = "C" // closure 241 // | "S" // shim 242 // | <A-Z> // other special namespaces 243 // | <a-z> // internal namespaces 244 bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) { 245 if (Error || RecursionLevel >= MaxRecursionLevel) { 246 Error = true; 247 return false; 248 } 249 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 250 251 switch (consume()) { 252 case 'C': { 253 parseOptionalBase62Number('s'); 254 printIdentifier(parseIdentifier()); 255 break; 256 } 257 case 'M': { 258 demangleImplPath(InType); 259 print("<"); 260 demangleType(); 261 print(">"); 262 break; 263 } 264 case 'X': { 265 demangleImplPath(InType); 266 print("<"); 267 demangleType(); 268 print(" as "); 269 demanglePath(IsInType::Yes); 270 print(">"); 271 break; 272 } 273 case 'Y': { 274 print("<"); 275 demangleType(); 276 print(" as "); 277 demanglePath(IsInType::Yes); 278 print(">"); 279 break; 280 } 281 case 'N': { 282 char NS = consume(); 283 if (!isLower(NS) && !isUpper(NS)) { 284 Error = true; 285 break; 286 } 287 demanglePath(InType); 288 289 uint64_t Disambiguator = parseOptionalBase62Number('s'); 290 Identifier Ident = parseIdentifier(); 291 292 if (isUpper(NS)) { 293 // Special namespaces 294 print("::{"); 295 if (NS == 'C') 296 print("closure"); 297 else if (NS == 'S') 298 print("shim"); 299 else 300 print(NS); 301 if (!Ident.empty()) { 302 print(":"); 303 printIdentifier(Ident); 304 } 305 print('#'); 306 printDecimalNumber(Disambiguator); 307 print('}'); 308 } else { 309 // Implementation internal namespaces. 310 if (!Ident.empty()) { 311 print("::"); 312 printIdentifier(Ident); 313 } 314 } 315 break; 316 } 317 case 'I': { 318 demanglePath(InType); 319 // Omit "::" when in a type, where it is optional. 320 if (InType == IsInType::No) 321 print("::"); 322 print("<"); 323 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 324 if (I > 0) 325 print(", "); 326 demangleGenericArg(); 327 } 328 if (LeaveOpen == LeaveGenericsOpen::Yes) 329 return true; 330 else 331 print(">"); 332 break; 333 } 334 case 'B': { 335 bool IsOpen = false; 336 demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); }); 337 return IsOpen; 338 } 339 default: 340 Error = true; 341 break; 342 } 343 344 return false; 345 } 346 347 // <impl-path> = [<disambiguator>] <path> 348 // <disambiguator> = "s" <base-62-number> 349 void Demangler::demangleImplPath(IsInType InType) { 350 ScopedOverride<bool> SavePrint(Print, false); 351 parseOptionalBase62Number('s'); 352 demanglePath(InType); 353 } 354 355 // <generic-arg> = <lifetime> 356 // | <type> 357 // | "K" <const> 358 // <lifetime> = "L" <base-62-number> 359 void Demangler::demangleGenericArg() { 360 if (consumeIf('L')) 361 printLifetime(parseBase62Number()); 362 else if (consumeIf('K')) 363 demangleConst(); 364 else 365 demangleType(); 366 } 367 368 // <basic-type> = "a" // i8 369 // | "b" // bool 370 // | "c" // char 371 // | "d" // f64 372 // | "e" // str 373 // | "f" // f32 374 // | "h" // u8 375 // | "i" // isize 376 // | "j" // usize 377 // | "l" // i32 378 // | "m" // u32 379 // | "n" // i128 380 // | "o" // u128 381 // | "s" // i16 382 // | "t" // u16 383 // | "u" // () 384 // | "v" // ... 385 // | "x" // i64 386 // | "y" // u64 387 // | "z" // ! 388 // | "p" // placeholder (e.g. for generic params), shown as _ 389 static bool parseBasicType(char C, BasicType &Type) { 390 switch (C) { 391 case 'a': 392 Type = BasicType::I8; 393 return true; 394 case 'b': 395 Type = BasicType::Bool; 396 return true; 397 case 'c': 398 Type = BasicType::Char; 399 return true; 400 case 'd': 401 Type = BasicType::F64; 402 return true; 403 case 'e': 404 Type = BasicType::Str; 405 return true; 406 case 'f': 407 Type = BasicType::F32; 408 return true; 409 case 'h': 410 Type = BasicType::U8; 411 return true; 412 case 'i': 413 Type = BasicType::ISize; 414 return true; 415 case 'j': 416 Type = BasicType::USize; 417 return true; 418 case 'l': 419 Type = BasicType::I32; 420 return true; 421 case 'm': 422 Type = BasicType::U32; 423 return true; 424 case 'n': 425 Type = BasicType::I128; 426 return true; 427 case 'o': 428 Type = BasicType::U128; 429 return true; 430 case 'p': 431 Type = BasicType::Placeholder; 432 return true; 433 case 's': 434 Type = BasicType::I16; 435 return true; 436 case 't': 437 Type = BasicType::U16; 438 return true; 439 case 'u': 440 Type = BasicType::Unit; 441 return true; 442 case 'v': 443 Type = BasicType::Variadic; 444 return true; 445 case 'x': 446 Type = BasicType::I64; 447 return true; 448 case 'y': 449 Type = BasicType::U64; 450 return true; 451 case 'z': 452 Type = BasicType::Never; 453 return true; 454 default: 455 return false; 456 } 457 } 458 459 void Demangler::printBasicType(BasicType Type) { 460 switch (Type) { 461 case BasicType::Bool: 462 print("bool"); 463 break; 464 case BasicType::Char: 465 print("char"); 466 break; 467 case BasicType::I8: 468 print("i8"); 469 break; 470 case BasicType::I16: 471 print("i16"); 472 break; 473 case BasicType::I32: 474 print("i32"); 475 break; 476 case BasicType::I64: 477 print("i64"); 478 break; 479 case BasicType::I128: 480 print("i128"); 481 break; 482 case BasicType::ISize: 483 print("isize"); 484 break; 485 case BasicType::U8: 486 print("u8"); 487 break; 488 case BasicType::U16: 489 print("u16"); 490 break; 491 case BasicType::U32: 492 print("u32"); 493 break; 494 case BasicType::U64: 495 print("u64"); 496 break; 497 case BasicType::U128: 498 print("u128"); 499 break; 500 case BasicType::USize: 501 print("usize"); 502 break; 503 case BasicType::F32: 504 print("f32"); 505 break; 506 case BasicType::F64: 507 print("f64"); 508 break; 509 case BasicType::Str: 510 print("str"); 511 break; 512 case BasicType::Placeholder: 513 print("_"); 514 break; 515 case BasicType::Unit: 516 print("()"); 517 break; 518 case BasicType::Variadic: 519 print("..."); 520 break; 521 case BasicType::Never: 522 print("!"); 523 break; 524 } 525 } 526 527 // <type> = | <basic-type> 528 // | <path> // named type 529 // | "A" <type> <const> // [T; N] 530 // | "S" <type> // [T] 531 // | "T" {<type>} "E" // (T1, T2, T3, ...) 532 // | "R" [<lifetime>] <type> // &T 533 // | "Q" [<lifetime>] <type> // &mut T 534 // | "P" <type> // *const T 535 // | "O" <type> // *mut T 536 // | "F" <fn-sig> // fn(...) -> ... 537 // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a 538 // | <backref> // backref 539 void Demangler::demangleType() { 540 if (Error || RecursionLevel >= MaxRecursionLevel) { 541 Error = true; 542 return; 543 } 544 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 545 546 size_t Start = Position; 547 char C = consume(); 548 BasicType Type; 549 if (parseBasicType(C, Type)) 550 return printBasicType(Type); 551 552 switch (C) { 553 case 'A': 554 print("["); 555 demangleType(); 556 print("; "); 557 demangleConst(); 558 print("]"); 559 break; 560 case 'S': 561 print("["); 562 demangleType(); 563 print("]"); 564 break; 565 case 'T': { 566 print("("); 567 size_t I = 0; 568 for (; !Error && !consumeIf('E'); ++I) { 569 if (I > 0) 570 print(", "); 571 demangleType(); 572 } 573 if (I == 1) 574 print(","); 575 print(")"); 576 break; 577 } 578 case 'R': 579 case 'Q': 580 print('&'); 581 if (consumeIf('L')) { 582 if (auto Lifetime = parseBase62Number()) { 583 printLifetime(Lifetime); 584 print(' '); 585 } 586 } 587 if (C == 'Q') 588 print("mut "); 589 demangleType(); 590 break; 591 case 'P': 592 print("*const "); 593 demangleType(); 594 break; 595 case 'O': 596 print("*mut "); 597 demangleType(); 598 break; 599 case 'F': 600 demangleFnSig(); 601 break; 602 case 'D': 603 demangleDynBounds(); 604 if (consumeIf('L')) { 605 if (auto Lifetime = parseBase62Number()) { 606 print(" + "); 607 printLifetime(Lifetime); 608 } 609 } else { 610 Error = true; 611 } 612 break; 613 case 'B': 614 demangleBackref([&] { demangleType(); }); 615 break; 616 default: 617 Position = Start; 618 demanglePath(IsInType::Yes); 619 break; 620 } 621 } 622 623 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type> 624 // <abi> = "C" 625 // | <undisambiguated-identifier> 626 void Demangler::demangleFnSig() { 627 ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); 628 demangleOptionalBinder(); 629 630 if (consumeIf('U')) 631 print("unsafe "); 632 633 if (consumeIf('K')) { 634 print("extern \""); 635 if (consumeIf('C')) { 636 print("C"); 637 } else { 638 Identifier Ident = parseIdentifier(); 639 if (Ident.Punycode) 640 Error = true; 641 for (char C : Ident.Name) { 642 // When mangling ABI string, the "-" is replaced with "_". 643 if (C == '_') 644 C = '-'; 645 print(C); 646 } 647 } 648 print("\" "); 649 } 650 651 print("fn("); 652 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 653 if (I > 0) 654 print(", "); 655 demangleType(); 656 } 657 print(")"); 658 659 if (consumeIf('u')) { 660 // Skip the unit type from the output. 661 } else { 662 print(" -> "); 663 demangleType(); 664 } 665 } 666 667 // <dyn-bounds> = [<binder>] {<dyn-trait>} "E" 668 void Demangler::demangleDynBounds() { 669 ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); 670 print("dyn "); 671 demangleOptionalBinder(); 672 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 673 if (I > 0) 674 print(" + "); 675 demangleDynTrait(); 676 } 677 } 678 679 // <dyn-trait> = <path> {<dyn-trait-assoc-binding>} 680 // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type> 681 void Demangler::demangleDynTrait() { 682 bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes); 683 while (!Error && consumeIf('p')) { 684 if (!IsOpen) { 685 IsOpen = true; 686 print('<'); 687 } else { 688 print(", "); 689 } 690 print(parseIdentifier().Name); 691 print(" = "); 692 demangleType(); 693 } 694 if (IsOpen) 695 print(">"); 696 } 697 698 // Demangles optional binder and updates the number of bound lifetimes. 699 // 700 // <binder> = "G" <base-62-number> 701 void Demangler::demangleOptionalBinder() { 702 uint64_t Binder = parseOptionalBase62Number('G'); 703 if (Error || Binder == 0) 704 return; 705 706 // In valid inputs each bound lifetime is referenced later. Referencing a 707 // lifetime requires at least one byte of input. Reject inputs that are too 708 // short to reference all bound lifetimes. Otherwise demangling of invalid 709 // binders could generate excessive amounts of output. 710 if (Binder >= Input.size() - BoundLifetimes) { 711 Error = true; 712 return; 713 } 714 715 print("for<"); 716 for (size_t I = 0; I != Binder; ++I) { 717 BoundLifetimes += 1; 718 if (I > 0) 719 print(", "); 720 printLifetime(1); 721 } 722 print("> "); 723 } 724 725 // <const> = <basic-type> <const-data> 726 // | "p" // placeholder 727 // | <backref> 728 void Demangler::demangleConst() { 729 if (Error || RecursionLevel >= MaxRecursionLevel) { 730 Error = true; 731 return; 732 } 733 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 734 735 char C = consume(); 736 BasicType Type; 737 if (parseBasicType(C, Type)) { 738 switch (Type) { 739 case BasicType::I8: 740 case BasicType::I16: 741 case BasicType::I32: 742 case BasicType::I64: 743 case BasicType::I128: 744 case BasicType::ISize: 745 case BasicType::U8: 746 case BasicType::U16: 747 case BasicType::U32: 748 case BasicType::U64: 749 case BasicType::U128: 750 case BasicType::USize: 751 demangleConstInt(); 752 break; 753 case BasicType::Bool: 754 demangleConstBool(); 755 break; 756 case BasicType::Char: 757 demangleConstChar(); 758 break; 759 case BasicType::Placeholder: 760 print('_'); 761 break; 762 default: 763 Error = true; 764 break; 765 } 766 } else if (C == 'B') { 767 demangleBackref([&] { demangleConst(); }); 768 } else { 769 Error = true; 770 } 771 } 772 773 // <const-data> = ["n"] <hex-number> 774 void Demangler::demangleConstInt() { 775 if (consumeIf('n')) 776 print('-'); 777 778 StringView HexDigits; 779 uint64_t Value = parseHexNumber(HexDigits); 780 if (HexDigits.size() <= 16) { 781 printDecimalNumber(Value); 782 } else { 783 print("0x"); 784 print(HexDigits); 785 } 786 } 787 788 // <const-data> = "0_" // false 789 // | "1_" // true 790 void Demangler::demangleConstBool() { 791 StringView HexDigits; 792 parseHexNumber(HexDigits); 793 if (HexDigits == "0") 794 print("false"); 795 else if (HexDigits == "1") 796 print("true"); 797 else 798 Error = true; 799 } 800 801 /// Returns true if CodePoint represents a printable ASCII character. 802 static bool isAsciiPrintable(uint64_t CodePoint) { 803 return 0x20 <= CodePoint && CodePoint <= 0x7e; 804 } 805 806 // <const-data> = <hex-number> 807 void Demangler::demangleConstChar() { 808 StringView HexDigits; 809 uint64_t CodePoint = parseHexNumber(HexDigits); 810 if (Error || HexDigits.size() > 6) { 811 Error = true; 812 return; 813 } 814 815 print("'"); 816 switch (CodePoint) { 817 case '\t': 818 print(R"(\t)"); 819 break; 820 case '\r': 821 print(R"(\r)"); 822 break; 823 case '\n': 824 print(R"(\n)"); 825 break; 826 case '\\': 827 print(R"(\\)"); 828 break; 829 case '"': 830 print(R"(")"); 831 break; 832 case '\'': 833 print(R"(\')"); 834 break; 835 default: 836 if (isAsciiPrintable(CodePoint)) { 837 char C = CodePoint; 838 print(C); 839 } else { 840 print(R"(\u{)"); 841 print(HexDigits); 842 print('}'); 843 } 844 break; 845 } 846 print('\''); 847 } 848 849 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> 850 Identifier Demangler::parseIdentifier() { 851 bool Punycode = consumeIf('u'); 852 uint64_t Bytes = parseDecimalNumber(); 853 854 // Underscore resolves the ambiguity when identifier starts with a decimal 855 // digit or another underscore. 856 consumeIf('_'); 857 858 if (Error || Bytes > Input.size() - Position) { 859 Error = true; 860 return {}; 861 } 862 StringView S = Input.substr(Position, Bytes); 863 Position += Bytes; 864 865 if (!std::all_of(S.begin(), S.end(), isValid)) { 866 Error = true; 867 return {}; 868 } 869 870 return {S, Punycode}; 871 } 872 873 // Parses optional base 62 number. The presence of a number is determined using 874 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise 875 // 876 // This function is indended for parsing disambiguators and binders which when 877 // not present have their value interpreted as 0, and otherwise as decoded 878 // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is 879 // 2. When "G" is absent value is 0. 880 uint64_t Demangler::parseOptionalBase62Number(char Tag) { 881 if (!consumeIf(Tag)) 882 return 0; 883 884 uint64_t N = parseBase62Number(); 885 if (Error || !addAssign(N, 1)) 886 return 0; 887 888 return N; 889 } 890 891 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by 892 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1, 893 // "1_" encodes 2, etc. 894 // 895 // <base-62-number> = {<0-9a-zA-Z>} "_" 896 uint64_t Demangler::parseBase62Number() { 897 if (consumeIf('_')) 898 return 0; 899 900 uint64_t Value = 0; 901 902 while (true) { 903 uint64_t Digit; 904 char C = consume(); 905 906 if (C == '_') { 907 break; 908 } else if (isDigit(C)) { 909 Digit = C - '0'; 910 } else if (isLower(C)) { 911 Digit = 10 + (C - 'a'); 912 } else if (isUpper(C)) { 913 Digit = 10 + 26 + (C - 'A'); 914 } else { 915 Error = true; 916 return 0; 917 } 918 919 if (!mulAssign(Value, 62)) 920 return 0; 921 922 if (!addAssign(Value, Digit)) 923 return 0; 924 } 925 926 if (!addAssign(Value, 1)) 927 return 0; 928 929 return Value; 930 } 931 932 // Parses a decimal number that had been encoded without any leading zeros. 933 // 934 // <decimal-number> = "0" 935 // | <1-9> {<0-9>} 936 uint64_t Demangler::parseDecimalNumber() { 937 char C = look(); 938 if (!isDigit(C)) { 939 Error = true; 940 return 0; 941 } 942 943 if (C == '0') { 944 consume(); 945 return 0; 946 } 947 948 uint64_t Value = 0; 949 950 while (isDigit(look())) { 951 if (!mulAssign(Value, 10)) { 952 Error = true; 953 return 0; 954 } 955 956 uint64_t D = consume() - '0'; 957 if (!addAssign(Value, D)) 958 return 0; 959 } 960 961 return Value; 962 } 963 964 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed 965 // value and stores hex digits in HexDigits. The return value is unspecified if 966 // HexDigits.size() > 16. 967 // 968 // <hex-number> = "0_" 969 // | <1-9a-f> {<0-9a-f>} "_" 970 uint64_t Demangler::parseHexNumber(StringView &HexDigits) { 971 size_t Start = Position; 972 uint64_t Value = 0; 973 974 if (!isHexDigit(look())) 975 Error = true; 976 977 if (consumeIf('0')) { 978 if (!consumeIf('_')) 979 Error = true; 980 } else { 981 while (!Error && !consumeIf('_')) { 982 char C = consume(); 983 Value *= 16; 984 if (isDigit(C)) 985 Value += C - '0'; 986 else if ('a' <= C && C <= 'f') 987 Value += 10 + (C - 'a'); 988 else 989 Error = true; 990 } 991 } 992 993 if (Error) { 994 HexDigits = StringView(); 995 return 0; 996 } 997 998 size_t End = Position - 1; 999 assert(Start < End); 1000 HexDigits = Input.substr(Start, End - Start); 1001 return Value; 1002 } 1003 1004 void Demangler::print(char C) { 1005 if (Error || !Print) 1006 return; 1007 1008 Output += C; 1009 } 1010 1011 void Demangler::print(StringView S) { 1012 if (Error || !Print) 1013 return; 1014 1015 Output += S; 1016 } 1017 1018 void Demangler::printDecimalNumber(uint64_t N) { 1019 if (Error || !Print) 1020 return; 1021 1022 Output << N; 1023 } 1024 1025 // Prints a lifetime. An index 0 always represents an erased lifetime. Indices 1026 // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes 1027 // bound by one of the enclosing binders. 1028 void Demangler::printLifetime(uint64_t Index) { 1029 if (Index == 0) { 1030 print("'_"); 1031 return; 1032 } 1033 1034 if (Index - 1 >= BoundLifetimes) { 1035 Error = true; 1036 return; 1037 } 1038 1039 uint64_t Depth = BoundLifetimes - Index; 1040 print('\''); 1041 if (Depth < 26) { 1042 char C = 'a' + Depth; 1043 print(C); 1044 } else { 1045 print('z'); 1046 printDecimalNumber(Depth - 26 + 1); 1047 } 1048 } 1049 1050 static inline bool decodePunycodeDigit(char C, size_t &Value) { 1051 if (isLower(C)) { 1052 Value = C - 'a'; 1053 return true; 1054 } 1055 1056 if (isDigit(C)) { 1057 Value = 26 + (C - '0'); 1058 return true; 1059 } 1060 1061 return false; 1062 } 1063 1064 static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) { 1065 char *Buffer = Output.getBuffer(); 1066 char *Start = Buffer + StartIdx; 1067 char *End = Buffer + Output.getCurrentPosition(); 1068 Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer); 1069 } 1070 1071 // Encodes code point as UTF-8 and stores results in Output. Returns false if 1072 // CodePoint is not a valid unicode scalar value. 1073 static inline bool encodeUTF8(size_t CodePoint, char *Output) { 1074 if (0xD800 <= CodePoint && CodePoint <= 0xDFFF) 1075 return false; 1076 1077 if (CodePoint <= 0x7F) { 1078 Output[0] = CodePoint; 1079 return true; 1080 } 1081 1082 if (CodePoint <= 0x7FF) { 1083 Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F); 1084 Output[1] = 0x80 | (CodePoint & 0x3F); 1085 return true; 1086 } 1087 1088 if (CodePoint <= 0xFFFF) { 1089 Output[0] = 0xE0 | (CodePoint >> 12); 1090 Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F); 1091 Output[2] = 0x80 | (CodePoint & 0x3F); 1092 return true; 1093 } 1094 1095 if (CodePoint <= 0x10FFFF) { 1096 Output[0] = 0xF0 | (CodePoint >> 18); 1097 Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F); 1098 Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F); 1099 Output[3] = 0x80 | (CodePoint & 0x3F); 1100 return true; 1101 } 1102 1103 return false; 1104 } 1105 1106 // Decodes string encoded using punycode and appends results to Output. 1107 // Returns true if decoding was successful. 1108 static bool decodePunycode(StringView Input, OutputBuffer &Output) { 1109 size_t OutputSize = Output.getCurrentPosition(); 1110 size_t InputIdx = 0; 1111 1112 // Rust uses an underscore as a delimiter. 1113 size_t DelimiterPos = StringView::npos; 1114 for (size_t I = 0; I != Input.size(); ++I) 1115 if (Input[I] == '_') 1116 DelimiterPos = I; 1117 1118 if (DelimiterPos != StringView::npos) { 1119 // Copy basic code points before the last delimiter to the output. 1120 for (; InputIdx != DelimiterPos; ++InputIdx) { 1121 char C = Input[InputIdx]; 1122 if (!isValid(C)) 1123 return false; 1124 // Code points are padded with zeros while decoding is in progress. 1125 char UTF8[4] = {C}; 1126 Output += StringView(UTF8, UTF8 + 4); 1127 } 1128 // Skip over the delimiter. 1129 ++InputIdx; 1130 } 1131 1132 size_t Base = 36; 1133 size_t Skew = 38; 1134 size_t Bias = 72; 1135 size_t N = 0x80; 1136 size_t TMin = 1; 1137 size_t TMax = 26; 1138 size_t Damp = 700; 1139 1140 auto Adapt = [&](size_t Delta, size_t NumPoints) { 1141 Delta /= Damp; 1142 Delta += Delta / NumPoints; 1143 Damp = 2; 1144 1145 size_t K = 0; 1146 while (Delta > (Base - TMin) * TMax / 2) { 1147 Delta /= Base - TMin; 1148 K += Base; 1149 } 1150 return K + (((Base - TMin + 1) * Delta) / (Delta + Skew)); 1151 }; 1152 1153 // Main decoding loop. 1154 for (size_t I = 0; InputIdx != Input.size(); I += 1) { 1155 size_t OldI = I; 1156 size_t W = 1; 1157 size_t Max = std::numeric_limits<size_t>::max(); 1158 for (size_t K = Base; true; K += Base) { 1159 if (InputIdx == Input.size()) 1160 return false; 1161 char C = Input[InputIdx++]; 1162 size_t Digit = 0; 1163 if (!decodePunycodeDigit(C, Digit)) 1164 return false; 1165 1166 if (Digit > (Max - I) / W) 1167 return false; 1168 I += Digit * W; 1169 1170 size_t T; 1171 if (K <= Bias) 1172 T = TMin; 1173 else if (K >= Bias + TMax) 1174 T = TMax; 1175 else 1176 T = K - Bias; 1177 1178 if (Digit < T) 1179 break; 1180 1181 if (W > Max / (Base - T)) 1182 return false; 1183 W *= (Base - T); 1184 } 1185 size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1; 1186 Bias = Adapt(I - OldI, NumPoints); 1187 1188 if (I / NumPoints > Max - N) 1189 return false; 1190 N += I / NumPoints; 1191 I = I % NumPoints; 1192 1193 // Insert N at position I in the output. 1194 char UTF8[4] = {}; 1195 if (!encodeUTF8(N, UTF8)) 1196 return false; 1197 Output.insert(OutputSize + I * 4, UTF8, 4); 1198 } 1199 1200 removeNullBytes(Output, OutputSize); 1201 return true; 1202 } 1203 1204 void Demangler::printIdentifier(Identifier Ident) { 1205 if (Error || !Print) 1206 return; 1207 1208 if (Ident.Punycode) { 1209 if (!decodePunycode(Ident.Name, Output)) 1210 Error = true; 1211 } else { 1212 print(Ident.Name); 1213 } 1214 } 1215 1216 char Demangler::look() const { 1217 if (Error || Position >= Input.size()) 1218 return 0; 1219 1220 return Input[Position]; 1221 } 1222 1223 char Demangler::consume() { 1224 if (Error || Position >= Input.size()) { 1225 Error = true; 1226 return 0; 1227 } 1228 1229 return Input[Position++]; 1230 } 1231 1232 bool Demangler::consumeIf(char Prefix) { 1233 if (Error || Position >= Input.size() || Input[Position] != Prefix) 1234 return false; 1235 1236 Position += 1; 1237 return true; 1238 } 1239 1240 /// Computes A + B. When computation wraps around sets the error and returns 1241 /// false. Otherwise assigns the result to A and returns true. 1242 bool Demangler::addAssign(uint64_t &A, uint64_t B) { 1243 if (A > std::numeric_limits<uint64_t>::max() - B) { 1244 Error = true; 1245 return false; 1246 } 1247 1248 A += B; 1249 return true; 1250 } 1251 1252 /// Computes A * B. When computation wraps around sets the error and returns 1253 /// false. Otherwise assigns the result to A and returns true. 1254 bool Demangler::mulAssign(uint64_t &A, uint64_t B) { 1255 if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) { 1256 Error = true; 1257 return false; 1258 } 1259 1260 A *= B; 1261 return true; 1262 } 1263