1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a demangler for Rust v0 mangled symbols as specified in 10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Demangle/Demangle.h" 15 #include "llvm/Demangle/StringView.h" 16 #include "llvm/Demangle/Utility.h" 17 18 #include <algorithm> 19 #include <cassert> 20 #include <cstdint> 21 #include <cstring> 22 #include <limits> 23 24 using namespace llvm; 25 26 using llvm::itanium_demangle::OutputBuffer; 27 using llvm::itanium_demangle::StringView; 28 using llvm::itanium_demangle::SwapAndRestore; 29 30 namespace { 31 32 struct Identifier { 33 StringView Name; 34 bool Punycode; 35 36 bool empty() const { return Name.empty(); } 37 }; 38 39 enum class BasicType { 40 Bool, 41 Char, 42 I8, 43 I16, 44 I32, 45 I64, 46 I128, 47 ISize, 48 U8, 49 U16, 50 U32, 51 U64, 52 U128, 53 USize, 54 F32, 55 F64, 56 Str, 57 Placeholder, 58 Unit, 59 Variadic, 60 Never, 61 }; 62 63 enum class IsInType { 64 No, 65 Yes, 66 }; 67 68 enum class LeaveGenericsOpen { 69 No, 70 Yes, 71 }; 72 73 class Demangler { 74 // Maximum recursion level. Used to avoid stack overflow. 75 size_t MaxRecursionLevel; 76 // Current recursion level. 77 size_t RecursionLevel; 78 size_t BoundLifetimes; 79 // Input string that is being demangled with "_R" prefix removed. 80 StringView Input; 81 // Position in the input string. 82 size_t Position; 83 // When true, print methods append the output to the stream. 84 // When false, the output is suppressed. 85 bool Print; 86 // True if an error occurred. 87 bool Error; 88 89 public: 90 // Demangled output. 91 OutputBuffer Output; 92 93 Demangler(size_t MaxRecursionLevel = 500); 94 95 bool demangle(StringView MangledName); 96 97 private: 98 bool demanglePath(IsInType Type, 99 LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No); 100 void demangleImplPath(IsInType InType); 101 void demangleGenericArg(); 102 void demangleType(); 103 void demangleFnSig(); 104 void demangleDynBounds(); 105 void demangleDynTrait(); 106 void demangleOptionalBinder(); 107 void demangleConst(); 108 void demangleConstInt(); 109 void demangleConstBool(); 110 void demangleConstChar(); 111 112 template <typename Callable> void demangleBackref(Callable Demangler) { 113 uint64_t Backref = parseBase62Number(); 114 if (Error || Backref >= Position) { 115 Error = true; 116 return; 117 } 118 119 if (!Print) 120 return; 121 122 SwapAndRestore<size_t> SavePosition(Position, Position); 123 Position = Backref; 124 Demangler(); 125 } 126 127 Identifier parseIdentifier(); 128 uint64_t parseOptionalBase62Number(char Tag); 129 uint64_t parseBase62Number(); 130 uint64_t parseDecimalNumber(); 131 uint64_t parseHexNumber(StringView &HexDigits); 132 133 void print(char C); 134 void print(StringView S); 135 void printDecimalNumber(uint64_t N); 136 void printBasicType(BasicType); 137 void printLifetime(uint64_t Index); 138 void printIdentifier(Identifier Ident); 139 140 char look() const; 141 char consume(); 142 bool consumeIf(char Prefix); 143 144 bool addAssign(uint64_t &A, uint64_t B); 145 bool mulAssign(uint64_t &A, uint64_t B); 146 }; 147 148 } // namespace 149 150 char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N, 151 int *Status) { 152 if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) { 153 if (Status != nullptr) 154 *Status = demangle_invalid_args; 155 return nullptr; 156 } 157 158 // Return early if mangled name doesn't look like a Rust symbol. 159 StringView Mangled(MangledName); 160 if (!Mangled.startsWith("_R")) { 161 if (Status != nullptr) 162 *Status = demangle_invalid_mangled_name; 163 return nullptr; 164 } 165 166 Demangler D; 167 if (!initializeOutputBuffer(nullptr, nullptr, D.Output, 1024)) { 168 if (Status != nullptr) 169 *Status = demangle_memory_alloc_failure; 170 return nullptr; 171 } 172 173 if (!D.demangle(Mangled)) { 174 if (Status != nullptr) 175 *Status = demangle_invalid_mangled_name; 176 std::free(D.Output.getBuffer()); 177 return nullptr; 178 } 179 180 D.Output += '\0'; 181 char *Demangled = D.Output.getBuffer(); 182 size_t DemangledLen = D.Output.getCurrentPosition(); 183 184 if (Buf != nullptr) { 185 if (DemangledLen <= *N) { 186 std::memcpy(Buf, Demangled, DemangledLen); 187 std::free(Demangled); 188 Demangled = Buf; 189 } else { 190 std::free(Buf); 191 } 192 } 193 194 if (N != nullptr) 195 *N = DemangledLen; 196 197 if (Status != nullptr) 198 *Status = demangle_success; 199 200 return Demangled; 201 } 202 203 Demangler::Demangler(size_t MaxRecursionLevel) 204 : MaxRecursionLevel(MaxRecursionLevel) {} 205 206 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; } 207 208 static inline bool isHexDigit(const char C) { 209 return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f'); 210 } 211 212 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; } 213 214 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; } 215 216 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>. 217 static inline bool isValid(const char C) { 218 return isDigit(C) || isLower(C) || isUpper(C) || C == '_'; 219 } 220 221 // Demangles Rust v0 mangled symbol. Returns true when successful, and false 222 // otherwise. The demangled symbol is stored in Output field. It is 223 // responsibility of the caller to free the memory behind the output stream. 224 // 225 // <symbol-name> = "_R" <path> [<instantiating-crate>] 226 bool Demangler::demangle(StringView Mangled) { 227 Position = 0; 228 Error = false; 229 Print = true; 230 RecursionLevel = 0; 231 BoundLifetimes = 0; 232 233 if (!Mangled.consumeFront("_R")) { 234 Error = true; 235 return false; 236 } 237 size_t Dot = Mangled.find('.'); 238 Input = Mangled.substr(0, Dot); 239 StringView Suffix = Mangled.dropFront(Dot); 240 241 demanglePath(IsInType::No); 242 243 if (Position != Input.size()) { 244 SwapAndRestore<bool> SavePrint(Print, false); 245 demanglePath(IsInType::No); 246 } 247 248 if (Position != Input.size()) 249 Error = true; 250 251 if (!Suffix.empty()) { 252 print(" ("); 253 print(Suffix); 254 print(")"); 255 } 256 257 return !Error; 258 } 259 260 // Demangles a path. InType indicates whether a path is inside a type. When 261 // LeaveOpen is true, a closing `>` after generic arguments is omitted from the 262 // output. Return value indicates whether generics arguments have been left 263 // open. 264 // 265 // <path> = "C" <identifier> // crate root 266 // | "M" <impl-path> <type> // <T> (inherent impl) 267 // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl) 268 // | "Y" <type> <path> // <T as Trait> (trait definition) 269 // | "N" <ns> <path> <identifier> // ...::ident (nested path) 270 // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args) 271 // | <backref> 272 // <identifier> = [<disambiguator>] <undisambiguated-identifier> 273 // <ns> = "C" // closure 274 // | "S" // shim 275 // | <A-Z> // other special namespaces 276 // | <a-z> // internal namespaces 277 bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) { 278 if (Error || RecursionLevel >= MaxRecursionLevel) { 279 Error = true; 280 return false; 281 } 282 SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 283 284 switch (consume()) { 285 case 'C': { 286 parseOptionalBase62Number('s'); 287 printIdentifier(parseIdentifier()); 288 break; 289 } 290 case 'M': { 291 demangleImplPath(InType); 292 print("<"); 293 demangleType(); 294 print(">"); 295 break; 296 } 297 case 'X': { 298 demangleImplPath(InType); 299 print("<"); 300 demangleType(); 301 print(" as "); 302 demanglePath(IsInType::Yes); 303 print(">"); 304 break; 305 } 306 case 'Y': { 307 print("<"); 308 demangleType(); 309 print(" as "); 310 demanglePath(IsInType::Yes); 311 print(">"); 312 break; 313 } 314 case 'N': { 315 char NS = consume(); 316 if (!isLower(NS) && !isUpper(NS)) { 317 Error = true; 318 break; 319 } 320 demanglePath(InType); 321 322 uint64_t Disambiguator = parseOptionalBase62Number('s'); 323 Identifier Ident = parseIdentifier(); 324 325 if (isUpper(NS)) { 326 // Special namespaces 327 print("::{"); 328 if (NS == 'C') 329 print("closure"); 330 else if (NS == 'S') 331 print("shim"); 332 else 333 print(NS); 334 if (!Ident.empty()) { 335 print(":"); 336 printIdentifier(Ident); 337 } 338 print('#'); 339 printDecimalNumber(Disambiguator); 340 print('}'); 341 } else { 342 // Implementation internal namespaces. 343 if (!Ident.empty()) { 344 print("::"); 345 printIdentifier(Ident); 346 } 347 } 348 break; 349 } 350 case 'I': { 351 demanglePath(InType); 352 // Omit "::" when in a type, where it is optional. 353 if (InType == IsInType::No) 354 print("::"); 355 print("<"); 356 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 357 if (I > 0) 358 print(", "); 359 demangleGenericArg(); 360 } 361 if (LeaveOpen == LeaveGenericsOpen::Yes) 362 return true; 363 else 364 print(">"); 365 break; 366 } 367 case 'B': { 368 bool IsOpen = false; 369 demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); }); 370 return IsOpen; 371 } 372 default: 373 Error = true; 374 break; 375 } 376 377 return false; 378 } 379 380 // <impl-path> = [<disambiguator>] <path> 381 // <disambiguator> = "s" <base-62-number> 382 void Demangler::demangleImplPath(IsInType InType) { 383 SwapAndRestore<bool> SavePrint(Print, false); 384 parseOptionalBase62Number('s'); 385 demanglePath(InType); 386 } 387 388 // <generic-arg> = <lifetime> 389 // | <type> 390 // | "K" <const> 391 // <lifetime> = "L" <base-62-number> 392 void Demangler::demangleGenericArg() { 393 if (consumeIf('L')) 394 printLifetime(parseBase62Number()); 395 else if (consumeIf('K')) 396 demangleConst(); 397 else 398 demangleType(); 399 } 400 401 // <basic-type> = "a" // i8 402 // | "b" // bool 403 // | "c" // char 404 // | "d" // f64 405 // | "e" // str 406 // | "f" // f32 407 // | "h" // u8 408 // | "i" // isize 409 // | "j" // usize 410 // | "l" // i32 411 // | "m" // u32 412 // | "n" // i128 413 // | "o" // u128 414 // | "s" // i16 415 // | "t" // u16 416 // | "u" // () 417 // | "v" // ... 418 // | "x" // i64 419 // | "y" // u64 420 // | "z" // ! 421 // | "p" // placeholder (e.g. for generic params), shown as _ 422 static bool parseBasicType(char C, BasicType &Type) { 423 switch (C) { 424 case 'a': 425 Type = BasicType::I8; 426 return true; 427 case 'b': 428 Type = BasicType::Bool; 429 return true; 430 case 'c': 431 Type = BasicType::Char; 432 return true; 433 case 'd': 434 Type = BasicType::F64; 435 return true; 436 case 'e': 437 Type = BasicType::Str; 438 return true; 439 case 'f': 440 Type = BasicType::F32; 441 return true; 442 case 'h': 443 Type = BasicType::U8; 444 return true; 445 case 'i': 446 Type = BasicType::ISize; 447 return true; 448 case 'j': 449 Type = BasicType::USize; 450 return true; 451 case 'l': 452 Type = BasicType::I32; 453 return true; 454 case 'm': 455 Type = BasicType::U32; 456 return true; 457 case 'n': 458 Type = BasicType::I128; 459 return true; 460 case 'o': 461 Type = BasicType::U128; 462 return true; 463 case 'p': 464 Type = BasicType::Placeholder; 465 return true; 466 case 's': 467 Type = BasicType::I16; 468 return true; 469 case 't': 470 Type = BasicType::U16; 471 return true; 472 case 'u': 473 Type = BasicType::Unit; 474 return true; 475 case 'v': 476 Type = BasicType::Variadic; 477 return true; 478 case 'x': 479 Type = BasicType::I64; 480 return true; 481 case 'y': 482 Type = BasicType::U64; 483 return true; 484 case 'z': 485 Type = BasicType::Never; 486 return true; 487 default: 488 return false; 489 } 490 } 491 492 void Demangler::printBasicType(BasicType Type) { 493 switch (Type) { 494 case BasicType::Bool: 495 print("bool"); 496 break; 497 case BasicType::Char: 498 print("char"); 499 break; 500 case BasicType::I8: 501 print("i8"); 502 break; 503 case BasicType::I16: 504 print("i16"); 505 break; 506 case BasicType::I32: 507 print("i32"); 508 break; 509 case BasicType::I64: 510 print("i64"); 511 break; 512 case BasicType::I128: 513 print("i128"); 514 break; 515 case BasicType::ISize: 516 print("isize"); 517 break; 518 case BasicType::U8: 519 print("u8"); 520 break; 521 case BasicType::U16: 522 print("u16"); 523 break; 524 case BasicType::U32: 525 print("u32"); 526 break; 527 case BasicType::U64: 528 print("u64"); 529 break; 530 case BasicType::U128: 531 print("u128"); 532 break; 533 case BasicType::USize: 534 print("usize"); 535 break; 536 case BasicType::F32: 537 print("f32"); 538 break; 539 case BasicType::F64: 540 print("f64"); 541 break; 542 case BasicType::Str: 543 print("str"); 544 break; 545 case BasicType::Placeholder: 546 print("_"); 547 break; 548 case BasicType::Unit: 549 print("()"); 550 break; 551 case BasicType::Variadic: 552 print("..."); 553 break; 554 case BasicType::Never: 555 print("!"); 556 break; 557 } 558 } 559 560 // <type> = | <basic-type> 561 // | <path> // named type 562 // | "A" <type> <const> // [T; N] 563 // | "S" <type> // [T] 564 // | "T" {<type>} "E" // (T1, T2, T3, ...) 565 // | "R" [<lifetime>] <type> // &T 566 // | "Q" [<lifetime>] <type> // &mut T 567 // | "P" <type> // *const T 568 // | "O" <type> // *mut T 569 // | "F" <fn-sig> // fn(...) -> ... 570 // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a 571 // | <backref> // backref 572 void Demangler::demangleType() { 573 if (Error || RecursionLevel >= MaxRecursionLevel) { 574 Error = true; 575 return; 576 } 577 SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 578 579 size_t Start = Position; 580 char C = consume(); 581 BasicType Type; 582 if (parseBasicType(C, Type)) 583 return printBasicType(Type); 584 585 switch (C) { 586 case 'A': 587 print("["); 588 demangleType(); 589 print("; "); 590 demangleConst(); 591 print("]"); 592 break; 593 case 'S': 594 print("["); 595 demangleType(); 596 print("]"); 597 break; 598 case 'T': { 599 print("("); 600 size_t I = 0; 601 for (; !Error && !consumeIf('E'); ++I) { 602 if (I > 0) 603 print(", "); 604 demangleType(); 605 } 606 if (I == 1) 607 print(","); 608 print(")"); 609 break; 610 } 611 case 'R': 612 case 'Q': 613 print('&'); 614 if (consumeIf('L')) { 615 if (auto Lifetime = parseBase62Number()) { 616 printLifetime(Lifetime); 617 print(' '); 618 } 619 } 620 if (C == 'Q') 621 print("mut "); 622 demangleType(); 623 break; 624 case 'P': 625 print("*const "); 626 demangleType(); 627 break; 628 case 'O': 629 print("*mut "); 630 demangleType(); 631 break; 632 case 'F': 633 demangleFnSig(); 634 break; 635 case 'D': 636 demangleDynBounds(); 637 if (consumeIf('L')) { 638 if (auto Lifetime = parseBase62Number()) { 639 print(" + "); 640 printLifetime(Lifetime); 641 } 642 } else { 643 Error = true; 644 } 645 break; 646 case 'B': 647 demangleBackref([&] { demangleType(); }); 648 break; 649 default: 650 Position = Start; 651 demanglePath(IsInType::Yes); 652 break; 653 } 654 } 655 656 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type> 657 // <abi> = "C" 658 // | <undisambiguated-identifier> 659 void Demangler::demangleFnSig() { 660 SwapAndRestore<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); 661 demangleOptionalBinder(); 662 663 if (consumeIf('U')) 664 print("unsafe "); 665 666 if (consumeIf('K')) { 667 print("extern \""); 668 if (consumeIf('C')) { 669 print("C"); 670 } else { 671 Identifier Ident = parseIdentifier(); 672 if (Ident.Punycode) 673 Error = true; 674 for (char C : Ident.Name) { 675 // When mangling ABI string, the "-" is replaced with "_". 676 if (C == '_') 677 C = '-'; 678 print(C); 679 } 680 } 681 print("\" "); 682 } 683 684 print("fn("); 685 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 686 if (I > 0) 687 print(", "); 688 demangleType(); 689 } 690 print(")"); 691 692 if (consumeIf('u')) { 693 // Skip the unit type from the output. 694 } else { 695 print(" -> "); 696 demangleType(); 697 } 698 } 699 700 // <dyn-bounds> = [<binder>] {<dyn-trait>} "E" 701 void Demangler::demangleDynBounds() { 702 SwapAndRestore<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); 703 print("dyn "); 704 demangleOptionalBinder(); 705 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 706 if (I > 0) 707 print(" + "); 708 demangleDynTrait(); 709 } 710 } 711 712 // <dyn-trait> = <path> {<dyn-trait-assoc-binding>} 713 // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type> 714 void Demangler::demangleDynTrait() { 715 bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes); 716 while (!Error && consumeIf('p')) { 717 if (!IsOpen) { 718 IsOpen = true; 719 print('<'); 720 } else { 721 print(", "); 722 } 723 print(parseIdentifier().Name); 724 print(" = "); 725 demangleType(); 726 } 727 if (IsOpen) 728 print(">"); 729 } 730 731 // Demangles optional binder and updates the number of bound lifetimes. 732 // 733 // <binder> = "G" <base-62-number> 734 void Demangler::demangleOptionalBinder() { 735 uint64_t Binder = parseOptionalBase62Number('G'); 736 if (Error || Binder == 0) 737 return; 738 739 // In valid inputs each bound lifetime is referenced later. Referencing a 740 // lifetime requires at least one byte of input. Reject inputs that are too 741 // short to reference all bound lifetimes. Otherwise demangling of invalid 742 // binders could generate excessive amounts of output. 743 if (Binder >= Input.size() - BoundLifetimes) { 744 Error = true; 745 return; 746 } 747 748 print("for<"); 749 for (size_t I = 0; I != Binder; ++I) { 750 BoundLifetimes += 1; 751 if (I > 0) 752 print(", "); 753 printLifetime(1); 754 } 755 print("> "); 756 } 757 758 // <const> = <basic-type> <const-data> 759 // | "p" // placeholder 760 // | <backref> 761 void Demangler::demangleConst() { 762 if (Error || RecursionLevel >= MaxRecursionLevel) { 763 Error = true; 764 return; 765 } 766 SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 767 768 char C = consume(); 769 BasicType Type; 770 if (parseBasicType(C, Type)) { 771 switch (Type) { 772 case BasicType::I8: 773 case BasicType::I16: 774 case BasicType::I32: 775 case BasicType::I64: 776 case BasicType::I128: 777 case BasicType::ISize: 778 case BasicType::U8: 779 case BasicType::U16: 780 case BasicType::U32: 781 case BasicType::U64: 782 case BasicType::U128: 783 case BasicType::USize: 784 demangleConstInt(); 785 break; 786 case BasicType::Bool: 787 demangleConstBool(); 788 break; 789 case BasicType::Char: 790 demangleConstChar(); 791 break; 792 case BasicType::Placeholder: 793 print('_'); 794 break; 795 default: 796 Error = true; 797 break; 798 } 799 } else if (C == 'B') { 800 demangleBackref([&] { demangleConst(); }); 801 } else { 802 Error = true; 803 } 804 } 805 806 // <const-data> = ["n"] <hex-number> 807 void Demangler::demangleConstInt() { 808 if (consumeIf('n')) 809 print('-'); 810 811 StringView HexDigits; 812 uint64_t Value = parseHexNumber(HexDigits); 813 if (HexDigits.size() <= 16) { 814 printDecimalNumber(Value); 815 } else { 816 print("0x"); 817 print(HexDigits); 818 } 819 } 820 821 // <const-data> = "0_" // false 822 // | "1_" // true 823 void Demangler::demangleConstBool() { 824 StringView HexDigits; 825 parseHexNumber(HexDigits); 826 if (HexDigits == "0") 827 print("false"); 828 else if (HexDigits == "1") 829 print("true"); 830 else 831 Error = true; 832 } 833 834 /// Returns true if CodePoint represents a printable ASCII character. 835 static bool isAsciiPrintable(uint64_t CodePoint) { 836 return 0x20 <= CodePoint && CodePoint <= 0x7e; 837 } 838 839 // <const-data> = <hex-number> 840 void Demangler::demangleConstChar() { 841 StringView HexDigits; 842 uint64_t CodePoint = parseHexNumber(HexDigits); 843 if (Error || HexDigits.size() > 6) { 844 Error = true; 845 return; 846 } 847 848 print("'"); 849 switch (CodePoint) { 850 case '\t': 851 print(R"(\t)"); 852 break; 853 case '\r': 854 print(R"(\r)"); 855 break; 856 case '\n': 857 print(R"(\n)"); 858 break; 859 case '\\': 860 print(R"(\\)"); 861 break; 862 case '"': 863 print(R"(")"); 864 break; 865 case '\'': 866 print(R"(\')"); 867 break; 868 default: 869 if (isAsciiPrintable(CodePoint)) { 870 char C = CodePoint; 871 print(C); 872 } else { 873 print(R"(\u{)"); 874 print(HexDigits); 875 print('}'); 876 } 877 break; 878 } 879 print('\''); 880 } 881 882 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> 883 Identifier Demangler::parseIdentifier() { 884 bool Punycode = consumeIf('u'); 885 uint64_t Bytes = parseDecimalNumber(); 886 887 // Underscore resolves the ambiguity when identifier starts with a decimal 888 // digit or another underscore. 889 consumeIf('_'); 890 891 if (Error || Bytes > Input.size() - Position) { 892 Error = true; 893 return {}; 894 } 895 StringView S = Input.substr(Position, Bytes); 896 Position += Bytes; 897 898 if (!std::all_of(S.begin(), S.end(), isValid)) { 899 Error = true; 900 return {}; 901 } 902 903 return {S, Punycode}; 904 } 905 906 // Parses optional base 62 number. The presence of a number is determined using 907 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise 908 // 909 // This function is indended for parsing disambiguators and binders which when 910 // not present have their value interpreted as 0, and otherwise as decoded 911 // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is 912 // 2. When "G" is absent value is 0. 913 uint64_t Demangler::parseOptionalBase62Number(char Tag) { 914 if (!consumeIf(Tag)) 915 return 0; 916 917 uint64_t N = parseBase62Number(); 918 if (Error || !addAssign(N, 1)) 919 return 0; 920 921 return N; 922 } 923 924 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by 925 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1, 926 // "1_" encodes 2, etc. 927 // 928 // <base-62-number> = {<0-9a-zA-Z>} "_" 929 uint64_t Demangler::parseBase62Number() { 930 if (consumeIf('_')) 931 return 0; 932 933 uint64_t Value = 0; 934 935 while (true) { 936 uint64_t Digit; 937 char C = consume(); 938 939 if (C == '_') { 940 break; 941 } else if (isDigit(C)) { 942 Digit = C - '0'; 943 } else if (isLower(C)) { 944 Digit = 10 + (C - 'a'); 945 } else if (isUpper(C)) { 946 Digit = 10 + 26 + (C - 'A'); 947 } else { 948 Error = true; 949 return 0; 950 } 951 952 if (!mulAssign(Value, 62)) 953 return 0; 954 955 if (!addAssign(Value, Digit)) 956 return 0; 957 } 958 959 if (!addAssign(Value, 1)) 960 return 0; 961 962 return Value; 963 } 964 965 // Parses a decimal number that had been encoded without any leading zeros. 966 // 967 // <decimal-number> = "0" 968 // | <1-9> {<0-9>} 969 uint64_t Demangler::parseDecimalNumber() { 970 char C = look(); 971 if (!isDigit(C)) { 972 Error = true; 973 return 0; 974 } 975 976 if (C == '0') { 977 consume(); 978 return 0; 979 } 980 981 uint64_t Value = 0; 982 983 while (isDigit(look())) { 984 if (!mulAssign(Value, 10)) { 985 Error = true; 986 return 0; 987 } 988 989 uint64_t D = consume() - '0'; 990 if (!addAssign(Value, D)) 991 return 0; 992 } 993 994 return Value; 995 } 996 997 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed 998 // value and stores hex digits in HexDigits. The return value is unspecified if 999 // HexDigits.size() > 16. 1000 // 1001 // <hex-number> = "0_" 1002 // | <1-9a-f> {<0-9a-f>} "_" 1003 uint64_t Demangler::parseHexNumber(StringView &HexDigits) { 1004 size_t Start = Position; 1005 uint64_t Value = 0; 1006 1007 if (!isHexDigit(look())) 1008 Error = true; 1009 1010 if (consumeIf('0')) { 1011 if (!consumeIf('_')) 1012 Error = true; 1013 } else { 1014 while (!Error && !consumeIf('_')) { 1015 char C = consume(); 1016 Value *= 16; 1017 if (isDigit(C)) 1018 Value += C - '0'; 1019 else if ('a' <= C && C <= 'f') 1020 Value += 10 + (C - 'a'); 1021 else 1022 Error = true; 1023 } 1024 } 1025 1026 if (Error) { 1027 HexDigits = StringView(); 1028 return 0; 1029 } 1030 1031 size_t End = Position - 1; 1032 assert(Start < End); 1033 HexDigits = Input.substr(Start, End - Start); 1034 return Value; 1035 } 1036 1037 void Demangler::print(char C) { 1038 if (Error || !Print) 1039 return; 1040 1041 Output += C; 1042 } 1043 1044 void Demangler::print(StringView S) { 1045 if (Error || !Print) 1046 return; 1047 1048 Output += S; 1049 } 1050 1051 void Demangler::printDecimalNumber(uint64_t N) { 1052 if (Error || !Print) 1053 return; 1054 1055 Output << N; 1056 } 1057 1058 // Prints a lifetime. An index 0 always represents an erased lifetime. Indices 1059 // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes 1060 // bound by one of the enclosing binders. 1061 void Demangler::printLifetime(uint64_t Index) { 1062 if (Index == 0) { 1063 print("'_"); 1064 return; 1065 } 1066 1067 if (Index - 1 >= BoundLifetimes) { 1068 Error = true; 1069 return; 1070 } 1071 1072 uint64_t Depth = BoundLifetimes - Index; 1073 print('\''); 1074 if (Depth < 26) { 1075 char C = 'a' + Depth; 1076 print(C); 1077 } else { 1078 print('z'); 1079 printDecimalNumber(Depth - 26 + 1); 1080 } 1081 } 1082 1083 static inline bool decodePunycodeDigit(char C, size_t &Value) { 1084 if (isLower(C)) { 1085 Value = C - 'a'; 1086 return true; 1087 } 1088 1089 if (isDigit(C)) { 1090 Value = 26 + (C - '0'); 1091 return true; 1092 } 1093 1094 return false; 1095 } 1096 1097 static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) { 1098 char *Buffer = Output.getBuffer(); 1099 char *Start = Buffer + StartIdx; 1100 char *End = Buffer + Output.getCurrentPosition(); 1101 Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer); 1102 } 1103 1104 // Encodes code point as UTF-8 and stores results in Output. Returns false if 1105 // CodePoint is not a valid unicode scalar value. 1106 static inline bool encodeUTF8(size_t CodePoint, char *Output) { 1107 if (0xD800 <= CodePoint && CodePoint <= 0xDFFF) 1108 return false; 1109 1110 if (CodePoint <= 0x7F) { 1111 Output[0] = CodePoint; 1112 return true; 1113 } 1114 1115 if (CodePoint <= 0x7FF) { 1116 Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F); 1117 Output[1] = 0x80 | (CodePoint & 0x3F); 1118 return true; 1119 } 1120 1121 if (CodePoint <= 0xFFFF) { 1122 Output[0] = 0xE0 | (CodePoint >> 12); 1123 Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F); 1124 Output[2] = 0x80 | (CodePoint & 0x3F); 1125 return true; 1126 } 1127 1128 if (CodePoint <= 0x10FFFF) { 1129 Output[0] = 0xF0 | (CodePoint >> 18); 1130 Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F); 1131 Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F); 1132 Output[3] = 0x80 | (CodePoint & 0x3F); 1133 return true; 1134 } 1135 1136 return false; 1137 } 1138 1139 // Decodes string encoded using punycode and appends results to Output. 1140 // Returns true if decoding was successful. 1141 static bool decodePunycode(StringView Input, OutputBuffer &Output) { 1142 size_t OutputSize = Output.getCurrentPosition(); 1143 size_t InputIdx = 0; 1144 1145 // Rust uses an underscore as a delimiter. 1146 size_t DelimiterPos = StringView::npos; 1147 for (size_t I = 0; I != Input.size(); ++I) 1148 if (Input[I] == '_') 1149 DelimiterPos = I; 1150 1151 if (DelimiterPos != StringView::npos) { 1152 // Copy basic code points before the last delimiter to the output. 1153 for (; InputIdx != DelimiterPos; ++InputIdx) { 1154 char C = Input[InputIdx]; 1155 if (!isValid(C)) 1156 return false; 1157 // Code points are padded with zeros while decoding is in progress. 1158 char UTF8[4] = {C}; 1159 Output += StringView(UTF8, UTF8 + 4); 1160 } 1161 // Skip over the delimiter. 1162 ++InputIdx; 1163 } 1164 1165 size_t Base = 36; 1166 size_t Skew = 38; 1167 size_t Bias = 72; 1168 size_t N = 0x80; 1169 size_t TMin = 1; 1170 size_t TMax = 26; 1171 size_t Damp = 700; 1172 1173 auto Adapt = [&](size_t Delta, size_t NumPoints) { 1174 Delta /= Damp; 1175 Delta += Delta / NumPoints; 1176 Damp = 2; 1177 1178 size_t K = 0; 1179 while (Delta > (Base - TMin) * TMax / 2) { 1180 Delta /= Base - TMin; 1181 K += Base; 1182 } 1183 return K + (((Base - TMin + 1) * Delta) / (Delta + Skew)); 1184 }; 1185 1186 // Main decoding loop. 1187 for (size_t I = 0; InputIdx != Input.size(); I += 1) { 1188 size_t OldI = I; 1189 size_t W = 1; 1190 size_t Max = std::numeric_limits<size_t>::max(); 1191 for (size_t K = Base; true; K += Base) { 1192 if (InputIdx == Input.size()) 1193 return false; 1194 char C = Input[InputIdx++]; 1195 size_t Digit = 0; 1196 if (!decodePunycodeDigit(C, Digit)) 1197 return false; 1198 1199 if (Digit > (Max - I) / W) 1200 return false; 1201 I += Digit * W; 1202 1203 size_t T; 1204 if (K <= Bias) 1205 T = TMin; 1206 else if (K >= Bias + TMax) 1207 T = TMax; 1208 else 1209 T = K - Bias; 1210 1211 if (Digit < T) 1212 break; 1213 1214 if (W > Max / (Base - T)) 1215 return false; 1216 W *= (Base - T); 1217 } 1218 size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1; 1219 Bias = Adapt(I - OldI, NumPoints); 1220 1221 if (I / NumPoints > Max - N) 1222 return false; 1223 N += I / NumPoints; 1224 I = I % NumPoints; 1225 1226 // Insert N at position I in the output. 1227 char UTF8[4] = {}; 1228 if (!encodeUTF8(N, UTF8)) 1229 return false; 1230 Output.insert(OutputSize + I * 4, UTF8, 4); 1231 } 1232 1233 removeNullBytes(Output, OutputSize); 1234 return true; 1235 } 1236 1237 void Demangler::printIdentifier(Identifier Ident) { 1238 if (Error || !Print) 1239 return; 1240 1241 if (Ident.Punycode) { 1242 if (!decodePunycode(Ident.Name, Output)) 1243 Error = true; 1244 } else { 1245 print(Ident.Name); 1246 } 1247 } 1248 1249 char Demangler::look() const { 1250 if (Error || Position >= Input.size()) 1251 return 0; 1252 1253 return Input[Position]; 1254 } 1255 1256 char Demangler::consume() { 1257 if (Error || Position >= Input.size()) { 1258 Error = true; 1259 return 0; 1260 } 1261 1262 return Input[Position++]; 1263 } 1264 1265 bool Demangler::consumeIf(char Prefix) { 1266 if (Error || Position >= Input.size() || Input[Position] != Prefix) 1267 return false; 1268 1269 Position += 1; 1270 return true; 1271 } 1272 1273 /// Computes A + B. When computation wraps around sets the error and returns 1274 /// false. Otherwise assigns the result to A and returns true. 1275 bool Demangler::addAssign(uint64_t &A, uint64_t B) { 1276 if (A > std::numeric_limits<uint64_t>::max() - B) { 1277 Error = true; 1278 return false; 1279 } 1280 1281 A += B; 1282 return true; 1283 } 1284 1285 /// Computes A * B. When computation wraps around sets the error and returns 1286 /// false. Otherwise assigns the result to A and returns true. 1287 bool Demangler::mulAssign(uint64_t &A, uint64_t B) { 1288 if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) { 1289 Error = true; 1290 return false; 1291 } 1292 1293 A *= B; 1294 return true; 1295 } 1296