1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a demangler for Rust v0 mangled symbols as specified in 10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Demangle/Demangle.h" 15 #include "llvm/Demangle/StringViewExtras.h" 16 #include "llvm/Demangle/Utility.h" 17 18 #include <algorithm> 19 #include <cassert> 20 #include <cstdint> 21 #include <cstring> 22 #include <limits> 23 #include <string_view> 24 25 using namespace llvm; 26 27 using llvm::itanium_demangle::OutputBuffer; 28 using llvm::itanium_demangle::ScopedOverride; 29 using llvm::itanium_demangle::starts_with; 30 31 namespace { 32 33 struct Identifier { 34 std::string_view Name; 35 bool Punycode; 36 37 bool empty() const { return Name.empty(); } 38 }; 39 40 enum class BasicType { 41 Bool, 42 Char, 43 I8, 44 I16, 45 I32, 46 I64, 47 I128, 48 ISize, 49 U8, 50 U16, 51 U32, 52 U64, 53 U128, 54 USize, 55 F32, 56 F64, 57 Str, 58 Placeholder, 59 Unit, 60 Variadic, 61 Never, 62 }; 63 64 enum class IsInType { 65 No, 66 Yes, 67 }; 68 69 enum class LeaveGenericsOpen { 70 No, 71 Yes, 72 }; 73 74 class Demangler { 75 // Maximum recursion level. Used to avoid stack overflow. 76 size_t MaxRecursionLevel; 77 // Current recursion level. 78 size_t RecursionLevel; 79 size_t BoundLifetimes; 80 // Input string that is being demangled with "_R" prefix removed. 81 std::string_view Input; 82 // Position in the input string. 83 size_t Position; 84 // When true, print methods append the output to the stream. 85 // When false, the output is suppressed. 86 bool Print; 87 // True if an error occurred. 88 bool Error; 89 90 public: 91 // Demangled output. 92 OutputBuffer Output; 93 94 Demangler(size_t MaxRecursionLevel = 500); 95 96 bool demangle(std::string_view MangledName); 97 98 private: 99 bool demanglePath(IsInType Type, 100 LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No); 101 void demangleImplPath(IsInType InType); 102 void demangleGenericArg(); 103 void demangleType(); 104 void demangleFnSig(); 105 void demangleDynBounds(); 106 void demangleDynTrait(); 107 void demangleOptionalBinder(); 108 void demangleConst(); 109 void demangleConstInt(); 110 void demangleConstBool(); 111 void demangleConstChar(); 112 113 template <typename Callable> void demangleBackref(Callable Demangler) { 114 uint64_t Backref = parseBase62Number(); 115 if (Error || Backref >= Position) { 116 Error = true; 117 return; 118 } 119 120 if (!Print) 121 return; 122 123 ScopedOverride<size_t> SavePosition(Position, Position); 124 Position = Backref; 125 Demangler(); 126 } 127 128 Identifier parseIdentifier(); 129 uint64_t parseOptionalBase62Number(char Tag); 130 uint64_t parseBase62Number(); 131 uint64_t parseDecimalNumber(); 132 uint64_t parseHexNumber(std::string_view &HexDigits); 133 134 void print(char C); 135 void print(std::string_view S); 136 void printDecimalNumber(uint64_t N); 137 void printBasicType(BasicType); 138 void printLifetime(uint64_t Index); 139 void printIdentifier(Identifier Ident); 140 141 char look() const; 142 char consume(); 143 bool consumeIf(char Prefix); 144 145 bool addAssign(uint64_t &A, uint64_t B); 146 bool mulAssign(uint64_t &A, uint64_t B); 147 }; 148 149 } // namespace 150 151 char *llvm::rustDemangle(std::string_view MangledName) { 152 // Return early if mangled name doesn't look like a Rust symbol. 153 if (MangledName.empty() || !starts_with(MangledName, "_R")) 154 return nullptr; 155 156 Demangler D; 157 if (!D.demangle(MangledName)) { 158 std::free(D.Output.getBuffer()); 159 return nullptr; 160 } 161 162 D.Output += '\0'; 163 164 return D.Output.getBuffer(); 165 } 166 167 Demangler::Demangler(size_t MaxRecursionLevel) 168 : MaxRecursionLevel(MaxRecursionLevel) {} 169 170 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; } 171 172 static inline bool isHexDigit(const char C) { 173 return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f'); 174 } 175 176 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; } 177 178 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; } 179 180 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>. 181 static inline bool isValid(const char C) { 182 return isDigit(C) || isLower(C) || isUpper(C) || C == '_'; 183 } 184 185 // Demangles Rust v0 mangled symbol. Returns true when successful, and false 186 // otherwise. The demangled symbol is stored in Output field. It is 187 // responsibility of the caller to free the memory behind the output stream. 188 // 189 // <symbol-name> = "_R" <path> [<instantiating-crate>] 190 bool Demangler::demangle(std::string_view Mangled) { 191 Position = 0; 192 Error = false; 193 Print = true; 194 RecursionLevel = 0; 195 BoundLifetimes = 0; 196 197 if (!starts_with(Mangled, "_R")) { 198 Error = true; 199 return false; 200 } 201 Mangled.remove_prefix(2); 202 size_t Dot = Mangled.find('.'); 203 Input = Dot == std::string_view::npos ? Mangled : Mangled.substr(0, Dot); 204 205 demanglePath(IsInType::No); 206 207 if (Position != Input.size()) { 208 ScopedOverride<bool> SavePrint(Print, false); 209 demanglePath(IsInType::No); 210 } 211 212 if (Position != Input.size()) 213 Error = true; 214 215 if (Dot != std::string_view::npos) { 216 print(" ("); 217 print(Mangled.substr(Dot)); 218 print(")"); 219 } 220 221 return !Error; 222 } 223 224 // Demangles a path. InType indicates whether a path is inside a type. When 225 // LeaveOpen is true, a closing `>` after generic arguments is omitted from the 226 // output. Return value indicates whether generics arguments have been left 227 // open. 228 // 229 // <path> = "C" <identifier> // crate root 230 // | "M" <impl-path> <type> // <T> (inherent impl) 231 // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl) 232 // | "Y" <type> <path> // <T as Trait> (trait definition) 233 // | "N" <ns> <path> <identifier> // ...::ident (nested path) 234 // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args) 235 // | <backref> 236 // <identifier> = [<disambiguator>] <undisambiguated-identifier> 237 // <ns> = "C" // closure 238 // | "S" // shim 239 // | <A-Z> // other special namespaces 240 // | <a-z> // internal namespaces 241 bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) { 242 if (Error || RecursionLevel >= MaxRecursionLevel) { 243 Error = true; 244 return false; 245 } 246 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 247 248 switch (consume()) { 249 case 'C': { 250 parseOptionalBase62Number('s'); 251 printIdentifier(parseIdentifier()); 252 break; 253 } 254 case 'M': { 255 demangleImplPath(InType); 256 print("<"); 257 demangleType(); 258 print(">"); 259 break; 260 } 261 case 'X': { 262 demangleImplPath(InType); 263 print("<"); 264 demangleType(); 265 print(" as "); 266 demanglePath(IsInType::Yes); 267 print(">"); 268 break; 269 } 270 case 'Y': { 271 print("<"); 272 demangleType(); 273 print(" as "); 274 demanglePath(IsInType::Yes); 275 print(">"); 276 break; 277 } 278 case 'N': { 279 char NS = consume(); 280 if (!isLower(NS) && !isUpper(NS)) { 281 Error = true; 282 break; 283 } 284 demanglePath(InType); 285 286 uint64_t Disambiguator = parseOptionalBase62Number('s'); 287 Identifier Ident = parseIdentifier(); 288 289 if (isUpper(NS)) { 290 // Special namespaces 291 print("::{"); 292 if (NS == 'C') 293 print("closure"); 294 else if (NS == 'S') 295 print("shim"); 296 else 297 print(NS); 298 if (!Ident.empty()) { 299 print(":"); 300 printIdentifier(Ident); 301 } 302 print('#'); 303 printDecimalNumber(Disambiguator); 304 print('}'); 305 } else { 306 // Implementation internal namespaces. 307 if (!Ident.empty()) { 308 print("::"); 309 printIdentifier(Ident); 310 } 311 } 312 break; 313 } 314 case 'I': { 315 demanglePath(InType); 316 // Omit "::" when in a type, where it is optional. 317 if (InType == IsInType::No) 318 print("::"); 319 print("<"); 320 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 321 if (I > 0) 322 print(", "); 323 demangleGenericArg(); 324 } 325 if (LeaveOpen == LeaveGenericsOpen::Yes) 326 return true; 327 else 328 print(">"); 329 break; 330 } 331 case 'B': { 332 bool IsOpen = false; 333 demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); }); 334 return IsOpen; 335 } 336 default: 337 Error = true; 338 break; 339 } 340 341 return false; 342 } 343 344 // <impl-path> = [<disambiguator>] <path> 345 // <disambiguator> = "s" <base-62-number> 346 void Demangler::demangleImplPath(IsInType InType) { 347 ScopedOverride<bool> SavePrint(Print, false); 348 parseOptionalBase62Number('s'); 349 demanglePath(InType); 350 } 351 352 // <generic-arg> = <lifetime> 353 // | <type> 354 // | "K" <const> 355 // <lifetime> = "L" <base-62-number> 356 void Demangler::demangleGenericArg() { 357 if (consumeIf('L')) 358 printLifetime(parseBase62Number()); 359 else if (consumeIf('K')) 360 demangleConst(); 361 else 362 demangleType(); 363 } 364 365 // <basic-type> = "a" // i8 366 // | "b" // bool 367 // | "c" // char 368 // | "d" // f64 369 // | "e" // str 370 // | "f" // f32 371 // | "h" // u8 372 // | "i" // isize 373 // | "j" // usize 374 // | "l" // i32 375 // | "m" // u32 376 // | "n" // i128 377 // | "o" // u128 378 // | "s" // i16 379 // | "t" // u16 380 // | "u" // () 381 // | "v" // ... 382 // | "x" // i64 383 // | "y" // u64 384 // | "z" // ! 385 // | "p" // placeholder (e.g. for generic params), shown as _ 386 static bool parseBasicType(char C, BasicType &Type) { 387 switch (C) { 388 case 'a': 389 Type = BasicType::I8; 390 return true; 391 case 'b': 392 Type = BasicType::Bool; 393 return true; 394 case 'c': 395 Type = BasicType::Char; 396 return true; 397 case 'd': 398 Type = BasicType::F64; 399 return true; 400 case 'e': 401 Type = BasicType::Str; 402 return true; 403 case 'f': 404 Type = BasicType::F32; 405 return true; 406 case 'h': 407 Type = BasicType::U8; 408 return true; 409 case 'i': 410 Type = BasicType::ISize; 411 return true; 412 case 'j': 413 Type = BasicType::USize; 414 return true; 415 case 'l': 416 Type = BasicType::I32; 417 return true; 418 case 'm': 419 Type = BasicType::U32; 420 return true; 421 case 'n': 422 Type = BasicType::I128; 423 return true; 424 case 'o': 425 Type = BasicType::U128; 426 return true; 427 case 'p': 428 Type = BasicType::Placeholder; 429 return true; 430 case 's': 431 Type = BasicType::I16; 432 return true; 433 case 't': 434 Type = BasicType::U16; 435 return true; 436 case 'u': 437 Type = BasicType::Unit; 438 return true; 439 case 'v': 440 Type = BasicType::Variadic; 441 return true; 442 case 'x': 443 Type = BasicType::I64; 444 return true; 445 case 'y': 446 Type = BasicType::U64; 447 return true; 448 case 'z': 449 Type = BasicType::Never; 450 return true; 451 default: 452 return false; 453 } 454 } 455 456 void Demangler::printBasicType(BasicType Type) { 457 switch (Type) { 458 case BasicType::Bool: 459 print("bool"); 460 break; 461 case BasicType::Char: 462 print("char"); 463 break; 464 case BasicType::I8: 465 print("i8"); 466 break; 467 case BasicType::I16: 468 print("i16"); 469 break; 470 case BasicType::I32: 471 print("i32"); 472 break; 473 case BasicType::I64: 474 print("i64"); 475 break; 476 case BasicType::I128: 477 print("i128"); 478 break; 479 case BasicType::ISize: 480 print("isize"); 481 break; 482 case BasicType::U8: 483 print("u8"); 484 break; 485 case BasicType::U16: 486 print("u16"); 487 break; 488 case BasicType::U32: 489 print("u32"); 490 break; 491 case BasicType::U64: 492 print("u64"); 493 break; 494 case BasicType::U128: 495 print("u128"); 496 break; 497 case BasicType::USize: 498 print("usize"); 499 break; 500 case BasicType::F32: 501 print("f32"); 502 break; 503 case BasicType::F64: 504 print("f64"); 505 break; 506 case BasicType::Str: 507 print("str"); 508 break; 509 case BasicType::Placeholder: 510 print("_"); 511 break; 512 case BasicType::Unit: 513 print("()"); 514 break; 515 case BasicType::Variadic: 516 print("..."); 517 break; 518 case BasicType::Never: 519 print("!"); 520 break; 521 } 522 } 523 524 // <type> = | <basic-type> 525 // | <path> // named type 526 // | "A" <type> <const> // [T; N] 527 // | "S" <type> // [T] 528 // | "T" {<type>} "E" // (T1, T2, T3, ...) 529 // | "R" [<lifetime>] <type> // &T 530 // | "Q" [<lifetime>] <type> // &mut T 531 // | "P" <type> // *const T 532 // | "O" <type> // *mut T 533 // | "F" <fn-sig> // fn(...) -> ... 534 // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a 535 // | <backref> // backref 536 void Demangler::demangleType() { 537 if (Error || RecursionLevel >= MaxRecursionLevel) { 538 Error = true; 539 return; 540 } 541 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 542 543 size_t Start = Position; 544 char C = consume(); 545 BasicType Type; 546 if (parseBasicType(C, Type)) 547 return printBasicType(Type); 548 549 switch (C) { 550 case 'A': 551 print("["); 552 demangleType(); 553 print("; "); 554 demangleConst(); 555 print("]"); 556 break; 557 case 'S': 558 print("["); 559 demangleType(); 560 print("]"); 561 break; 562 case 'T': { 563 print("("); 564 size_t I = 0; 565 for (; !Error && !consumeIf('E'); ++I) { 566 if (I > 0) 567 print(", "); 568 demangleType(); 569 } 570 if (I == 1) 571 print(","); 572 print(")"); 573 break; 574 } 575 case 'R': 576 case 'Q': 577 print('&'); 578 if (consumeIf('L')) { 579 if (auto Lifetime = parseBase62Number()) { 580 printLifetime(Lifetime); 581 print(' '); 582 } 583 } 584 if (C == 'Q') 585 print("mut "); 586 demangleType(); 587 break; 588 case 'P': 589 print("*const "); 590 demangleType(); 591 break; 592 case 'O': 593 print("*mut "); 594 demangleType(); 595 break; 596 case 'F': 597 demangleFnSig(); 598 break; 599 case 'D': 600 demangleDynBounds(); 601 if (consumeIf('L')) { 602 if (auto Lifetime = parseBase62Number()) { 603 print(" + "); 604 printLifetime(Lifetime); 605 } 606 } else { 607 Error = true; 608 } 609 break; 610 case 'B': 611 demangleBackref([&] { demangleType(); }); 612 break; 613 default: 614 Position = Start; 615 demanglePath(IsInType::Yes); 616 break; 617 } 618 } 619 620 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type> 621 // <abi> = "C" 622 // | <undisambiguated-identifier> 623 void Demangler::demangleFnSig() { 624 ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); 625 demangleOptionalBinder(); 626 627 if (consumeIf('U')) 628 print("unsafe "); 629 630 if (consumeIf('K')) { 631 print("extern \""); 632 if (consumeIf('C')) { 633 print("C"); 634 } else { 635 Identifier Ident = parseIdentifier(); 636 if (Ident.Punycode) 637 Error = true; 638 for (char C : Ident.Name) { 639 // When mangling ABI string, the "-" is replaced with "_". 640 if (C == '_') 641 C = '-'; 642 print(C); 643 } 644 } 645 print("\" "); 646 } 647 648 print("fn("); 649 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 650 if (I > 0) 651 print(", "); 652 demangleType(); 653 } 654 print(")"); 655 656 if (consumeIf('u')) { 657 // Skip the unit type from the output. 658 } else { 659 print(" -> "); 660 demangleType(); 661 } 662 } 663 664 // <dyn-bounds> = [<binder>] {<dyn-trait>} "E" 665 void Demangler::demangleDynBounds() { 666 ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); 667 print("dyn "); 668 demangleOptionalBinder(); 669 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 670 if (I > 0) 671 print(" + "); 672 demangleDynTrait(); 673 } 674 } 675 676 // <dyn-trait> = <path> {<dyn-trait-assoc-binding>} 677 // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type> 678 void Demangler::demangleDynTrait() { 679 bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes); 680 while (!Error && consumeIf('p')) { 681 if (!IsOpen) { 682 IsOpen = true; 683 print('<'); 684 } else { 685 print(", "); 686 } 687 print(parseIdentifier().Name); 688 print(" = "); 689 demangleType(); 690 } 691 if (IsOpen) 692 print(">"); 693 } 694 695 // Demangles optional binder and updates the number of bound lifetimes. 696 // 697 // <binder> = "G" <base-62-number> 698 void Demangler::demangleOptionalBinder() { 699 uint64_t Binder = parseOptionalBase62Number('G'); 700 if (Error || Binder == 0) 701 return; 702 703 // In valid inputs each bound lifetime is referenced later. Referencing a 704 // lifetime requires at least one byte of input. Reject inputs that are too 705 // short to reference all bound lifetimes. Otherwise demangling of invalid 706 // binders could generate excessive amounts of output. 707 if (Binder >= Input.size() - BoundLifetimes) { 708 Error = true; 709 return; 710 } 711 712 print("for<"); 713 for (size_t I = 0; I != Binder; ++I) { 714 BoundLifetimes += 1; 715 if (I > 0) 716 print(", "); 717 printLifetime(1); 718 } 719 print("> "); 720 } 721 722 // <const> = <basic-type> <const-data> 723 // | "p" // placeholder 724 // | <backref> 725 void Demangler::demangleConst() { 726 if (Error || RecursionLevel >= MaxRecursionLevel) { 727 Error = true; 728 return; 729 } 730 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 731 732 char C = consume(); 733 BasicType Type; 734 if (parseBasicType(C, Type)) { 735 switch (Type) { 736 case BasicType::I8: 737 case BasicType::I16: 738 case BasicType::I32: 739 case BasicType::I64: 740 case BasicType::I128: 741 case BasicType::ISize: 742 case BasicType::U8: 743 case BasicType::U16: 744 case BasicType::U32: 745 case BasicType::U64: 746 case BasicType::U128: 747 case BasicType::USize: 748 demangleConstInt(); 749 break; 750 case BasicType::Bool: 751 demangleConstBool(); 752 break; 753 case BasicType::Char: 754 demangleConstChar(); 755 break; 756 case BasicType::Placeholder: 757 print('_'); 758 break; 759 default: 760 Error = true; 761 break; 762 } 763 } else if (C == 'B') { 764 demangleBackref([&] { demangleConst(); }); 765 } else { 766 Error = true; 767 } 768 } 769 770 // <const-data> = ["n"] <hex-number> 771 void Demangler::demangleConstInt() { 772 if (consumeIf('n')) 773 print('-'); 774 775 std::string_view HexDigits; 776 uint64_t Value = parseHexNumber(HexDigits); 777 if (HexDigits.size() <= 16) { 778 printDecimalNumber(Value); 779 } else { 780 print("0x"); 781 print(HexDigits); 782 } 783 } 784 785 // <const-data> = "0_" // false 786 // | "1_" // true 787 void Demangler::demangleConstBool() { 788 std::string_view HexDigits; 789 parseHexNumber(HexDigits); 790 if (HexDigits == "0") 791 print("false"); 792 else if (HexDigits == "1") 793 print("true"); 794 else 795 Error = true; 796 } 797 798 /// Returns true if CodePoint represents a printable ASCII character. 799 static bool isAsciiPrintable(uint64_t CodePoint) { 800 return 0x20 <= CodePoint && CodePoint <= 0x7e; 801 } 802 803 // <const-data> = <hex-number> 804 void Demangler::demangleConstChar() { 805 std::string_view HexDigits; 806 uint64_t CodePoint = parseHexNumber(HexDigits); 807 if (Error || HexDigits.size() > 6) { 808 Error = true; 809 return; 810 } 811 812 print("'"); 813 switch (CodePoint) { 814 case '\t': 815 print(R"(\t)"); 816 break; 817 case '\r': 818 print(R"(\r)"); 819 break; 820 case '\n': 821 print(R"(\n)"); 822 break; 823 case '\\': 824 print(R"(\\)"); 825 break; 826 case '"': 827 print(R"(")"); 828 break; 829 case '\'': 830 print(R"(\')"); 831 break; 832 default: 833 if (isAsciiPrintable(CodePoint)) { 834 char C = CodePoint; 835 print(C); 836 } else { 837 print(R"(\u{)"); 838 print(HexDigits); 839 print('}'); 840 } 841 break; 842 } 843 print('\''); 844 } 845 846 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> 847 Identifier Demangler::parseIdentifier() { 848 bool Punycode = consumeIf('u'); 849 uint64_t Bytes = parseDecimalNumber(); 850 851 // Underscore resolves the ambiguity when identifier starts with a decimal 852 // digit or another underscore. 853 consumeIf('_'); 854 855 if (Error || Bytes > Input.size() - Position) { 856 Error = true; 857 return {}; 858 } 859 std::string_view S = Input.substr(Position, Bytes); 860 Position += Bytes; 861 862 if (!std::all_of(S.begin(), S.end(), isValid)) { 863 Error = true; 864 return {}; 865 } 866 867 return {S, Punycode}; 868 } 869 870 // Parses optional base 62 number. The presence of a number is determined using 871 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise 872 // 873 // This function is indended for parsing disambiguators and binders which when 874 // not present have their value interpreted as 0, and otherwise as decoded 875 // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is 876 // 2. When "G" is absent value is 0. 877 uint64_t Demangler::parseOptionalBase62Number(char Tag) { 878 if (!consumeIf(Tag)) 879 return 0; 880 881 uint64_t N = parseBase62Number(); 882 if (Error || !addAssign(N, 1)) 883 return 0; 884 885 return N; 886 } 887 888 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by 889 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1, 890 // "1_" encodes 2, etc. 891 // 892 // <base-62-number> = {<0-9a-zA-Z>} "_" 893 uint64_t Demangler::parseBase62Number() { 894 if (consumeIf('_')) 895 return 0; 896 897 uint64_t Value = 0; 898 899 while (true) { 900 uint64_t Digit; 901 char C = consume(); 902 903 if (C == '_') { 904 break; 905 } else if (isDigit(C)) { 906 Digit = C - '0'; 907 } else if (isLower(C)) { 908 Digit = 10 + (C - 'a'); 909 } else if (isUpper(C)) { 910 Digit = 10 + 26 + (C - 'A'); 911 } else { 912 Error = true; 913 return 0; 914 } 915 916 if (!mulAssign(Value, 62)) 917 return 0; 918 919 if (!addAssign(Value, Digit)) 920 return 0; 921 } 922 923 if (!addAssign(Value, 1)) 924 return 0; 925 926 return Value; 927 } 928 929 // Parses a decimal number that had been encoded without any leading zeros. 930 // 931 // <decimal-number> = "0" 932 // | <1-9> {<0-9>} 933 uint64_t Demangler::parseDecimalNumber() { 934 char C = look(); 935 if (!isDigit(C)) { 936 Error = true; 937 return 0; 938 } 939 940 if (C == '0') { 941 consume(); 942 return 0; 943 } 944 945 uint64_t Value = 0; 946 947 while (isDigit(look())) { 948 if (!mulAssign(Value, 10)) { 949 Error = true; 950 return 0; 951 } 952 953 uint64_t D = consume() - '0'; 954 if (!addAssign(Value, D)) 955 return 0; 956 } 957 958 return Value; 959 } 960 961 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed 962 // value and stores hex digits in HexDigits. The return value is unspecified if 963 // HexDigits.size() > 16. 964 // 965 // <hex-number> = "0_" 966 // | <1-9a-f> {<0-9a-f>} "_" 967 uint64_t Demangler::parseHexNumber(std::string_view &HexDigits) { 968 size_t Start = Position; 969 uint64_t Value = 0; 970 971 if (!isHexDigit(look())) 972 Error = true; 973 974 if (consumeIf('0')) { 975 if (!consumeIf('_')) 976 Error = true; 977 } else { 978 while (!Error && !consumeIf('_')) { 979 char C = consume(); 980 Value *= 16; 981 if (isDigit(C)) 982 Value += C - '0'; 983 else if ('a' <= C && C <= 'f') 984 Value += 10 + (C - 'a'); 985 else 986 Error = true; 987 } 988 } 989 990 if (Error) { 991 HexDigits = std::string_view(); 992 return 0; 993 } 994 995 size_t End = Position - 1; 996 assert(Start < End); 997 HexDigits = Input.substr(Start, End - Start); 998 return Value; 999 } 1000 1001 void Demangler::print(char C) { 1002 if (Error || !Print) 1003 return; 1004 1005 Output += C; 1006 } 1007 1008 void Demangler::print(std::string_view S) { 1009 if (Error || !Print) 1010 return; 1011 1012 Output += S; 1013 } 1014 1015 void Demangler::printDecimalNumber(uint64_t N) { 1016 if (Error || !Print) 1017 return; 1018 1019 Output << N; 1020 } 1021 1022 // Prints a lifetime. An index 0 always represents an erased lifetime. Indices 1023 // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes 1024 // bound by one of the enclosing binders. 1025 void Demangler::printLifetime(uint64_t Index) { 1026 if (Index == 0) { 1027 print("'_"); 1028 return; 1029 } 1030 1031 if (Index - 1 >= BoundLifetimes) { 1032 Error = true; 1033 return; 1034 } 1035 1036 uint64_t Depth = BoundLifetimes - Index; 1037 print('\''); 1038 if (Depth < 26) { 1039 char C = 'a' + Depth; 1040 print(C); 1041 } else { 1042 print('z'); 1043 printDecimalNumber(Depth - 26 + 1); 1044 } 1045 } 1046 1047 static inline bool decodePunycodeDigit(char C, size_t &Value) { 1048 if (isLower(C)) { 1049 Value = C - 'a'; 1050 return true; 1051 } 1052 1053 if (isDigit(C)) { 1054 Value = 26 + (C - '0'); 1055 return true; 1056 } 1057 1058 return false; 1059 } 1060 1061 static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) { 1062 char *Buffer = Output.getBuffer(); 1063 char *Start = Buffer + StartIdx; 1064 char *End = Buffer + Output.getCurrentPosition(); 1065 Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer); 1066 } 1067 1068 // Encodes code point as UTF-8 and stores results in Output. Returns false if 1069 // CodePoint is not a valid unicode scalar value. 1070 static inline bool encodeUTF8(size_t CodePoint, char *Output) { 1071 if (0xD800 <= CodePoint && CodePoint <= 0xDFFF) 1072 return false; 1073 1074 if (CodePoint <= 0x7F) { 1075 Output[0] = CodePoint; 1076 return true; 1077 } 1078 1079 if (CodePoint <= 0x7FF) { 1080 Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F); 1081 Output[1] = 0x80 | (CodePoint & 0x3F); 1082 return true; 1083 } 1084 1085 if (CodePoint <= 0xFFFF) { 1086 Output[0] = 0xE0 | (CodePoint >> 12); 1087 Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F); 1088 Output[2] = 0x80 | (CodePoint & 0x3F); 1089 return true; 1090 } 1091 1092 if (CodePoint <= 0x10FFFF) { 1093 Output[0] = 0xF0 | (CodePoint >> 18); 1094 Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F); 1095 Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F); 1096 Output[3] = 0x80 | (CodePoint & 0x3F); 1097 return true; 1098 } 1099 1100 return false; 1101 } 1102 1103 // Decodes string encoded using punycode and appends results to Output. 1104 // Returns true if decoding was successful. 1105 static bool decodePunycode(std::string_view Input, OutputBuffer &Output) { 1106 size_t OutputSize = Output.getCurrentPosition(); 1107 size_t InputIdx = 0; 1108 1109 // Rust uses an underscore as a delimiter. 1110 size_t DelimiterPos = std::string_view::npos; 1111 for (size_t I = 0; I != Input.size(); ++I) 1112 if (Input[I] == '_') 1113 DelimiterPos = I; 1114 1115 if (DelimiterPos != std::string_view::npos) { 1116 // Copy basic code points before the last delimiter to the output. 1117 for (; InputIdx != DelimiterPos; ++InputIdx) { 1118 char C = Input[InputIdx]; 1119 if (!isValid(C)) 1120 return false; 1121 // Code points are padded with zeros while decoding is in progress. 1122 char UTF8[4] = {C}; 1123 Output += std::string_view(UTF8, 4); 1124 } 1125 // Skip over the delimiter. 1126 ++InputIdx; 1127 } 1128 1129 size_t Base = 36; 1130 size_t Skew = 38; 1131 size_t Bias = 72; 1132 size_t N = 0x80; 1133 size_t TMin = 1; 1134 size_t TMax = 26; 1135 size_t Damp = 700; 1136 1137 auto Adapt = [&](size_t Delta, size_t NumPoints) { 1138 Delta /= Damp; 1139 Delta += Delta / NumPoints; 1140 Damp = 2; 1141 1142 size_t K = 0; 1143 while (Delta > (Base - TMin) * TMax / 2) { 1144 Delta /= Base - TMin; 1145 K += Base; 1146 } 1147 return K + (((Base - TMin + 1) * Delta) / (Delta + Skew)); 1148 }; 1149 1150 // Main decoding loop. 1151 for (size_t I = 0; InputIdx != Input.size(); I += 1) { 1152 size_t OldI = I; 1153 size_t W = 1; 1154 size_t Max = std::numeric_limits<size_t>::max(); 1155 for (size_t K = Base; true; K += Base) { 1156 if (InputIdx == Input.size()) 1157 return false; 1158 char C = Input[InputIdx++]; 1159 size_t Digit = 0; 1160 if (!decodePunycodeDigit(C, Digit)) 1161 return false; 1162 1163 if (Digit > (Max - I) / W) 1164 return false; 1165 I += Digit * W; 1166 1167 size_t T; 1168 if (K <= Bias) 1169 T = TMin; 1170 else if (K >= Bias + TMax) 1171 T = TMax; 1172 else 1173 T = K - Bias; 1174 1175 if (Digit < T) 1176 break; 1177 1178 if (W > Max / (Base - T)) 1179 return false; 1180 W *= (Base - T); 1181 } 1182 size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1; 1183 Bias = Adapt(I - OldI, NumPoints); 1184 1185 if (I / NumPoints > Max - N) 1186 return false; 1187 N += I / NumPoints; 1188 I = I % NumPoints; 1189 1190 // Insert N at position I in the output. 1191 char UTF8[4] = {}; 1192 if (!encodeUTF8(N, UTF8)) 1193 return false; 1194 Output.insert(OutputSize + I * 4, UTF8, 4); 1195 } 1196 1197 removeNullBytes(Output, OutputSize); 1198 return true; 1199 } 1200 1201 void Demangler::printIdentifier(Identifier Ident) { 1202 if (Error || !Print) 1203 return; 1204 1205 if (Ident.Punycode) { 1206 if (!decodePunycode(Ident.Name, Output)) 1207 Error = true; 1208 } else { 1209 print(Ident.Name); 1210 } 1211 } 1212 1213 char Demangler::look() const { 1214 if (Error || Position >= Input.size()) 1215 return 0; 1216 1217 return Input[Position]; 1218 } 1219 1220 char Demangler::consume() { 1221 if (Error || Position >= Input.size()) { 1222 Error = true; 1223 return 0; 1224 } 1225 1226 return Input[Position++]; 1227 } 1228 1229 bool Demangler::consumeIf(char Prefix) { 1230 if (Error || Position >= Input.size() || Input[Position] != Prefix) 1231 return false; 1232 1233 Position += 1; 1234 return true; 1235 } 1236 1237 /// Computes A + B. When computation wraps around sets the error and returns 1238 /// false. Otherwise assigns the result to A and returns true. 1239 bool Demangler::addAssign(uint64_t &A, uint64_t B) { 1240 if (A > std::numeric_limits<uint64_t>::max() - B) { 1241 Error = true; 1242 return false; 1243 } 1244 1245 A += B; 1246 return true; 1247 } 1248 1249 /// Computes A * B. When computation wraps around sets the error and returns 1250 /// false. Otherwise assigns the result to A and returns true. 1251 bool Demangler::mulAssign(uint64_t &A, uint64_t B) { 1252 if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) { 1253 Error = true; 1254 return false; 1255 } 1256 1257 A *= B; 1258 return true; 1259 } 1260