1 // SPDX-License-Identifier: Apache-2.0 OR MIT 2 3 // The contents of this file come from the Rust rustc-demangle library, hosted 4 // in the <https://github.com/rust-lang/rustc-demangle> repository, licensed 5 // under "Apache-2.0 OR MIT". For copyright details, see 6 // <https://github.com/rust-lang/rustc-demangle/blob/main/README.md>. 7 // Please note that the file should be kept as close as possible to upstream. 8 9 // Code for demangling Rust symbols. This code is mostly 10 // a line-by-line translation of the Rust code in `rustc-demangle`. 11 12 // you can find the latest version of this code in https://github.com/rust-lang/rustc-demangle 13 14 #include <stdint.h> 15 #include <stddef.h> 16 #include <string.h> 17 #include <stdbool.h> 18 #include <sys/param.h> 19 #include <stdio.h> 20 21 #include "demangle-rust-v0.h" 22 23 #if defined(__GNUC__) || defined(__clang__) 24 #define NODISCARD __attribute__((warn_unused_result)) 25 #else 26 #define NODISCARD 27 #endif 28 29 #define MAX_DEPTH 500 30 31 typedef enum { 32 DemangleOk, 33 DemangleInvalid, 34 DemangleRecursed, 35 DemangleBug, 36 } demangle_status; 37 38 struct demangle_v0 { 39 const char *mangled; 40 size_t mangled_len; 41 }; 42 43 struct demangle_legacy { 44 const char *mangled; 45 size_t mangled_len; 46 size_t elements; 47 }; 48 49 // private version of memrchr to avoid _GNU_SOURCE 50 static void *demangle_memrchr(const void *s, int c, size_t n) { 51 const uint8_t *s_ = s; 52 for (; n != 0; n--) { 53 if (s_[n-1] == c) { 54 return (void*)&s_[n-1]; 55 } 56 } 57 return NULL; 58 } 59 60 61 static bool unicode_iscontrol(uint32_t ch) { 62 // this is *technically* a unicode table, but 63 // some unicode properties are simpler than you might think 64 return ch < 0x20 || (ch >= 0x7f && ch < 0xa0); 65 } 66 67 // "good enough" tables, the only consequence is that when printing 68 // *constant strings*, some characters are printed as `\u{abcd}` rather than themselves. 69 // 70 // I'm leaving these here to allow easily replacing them with actual 71 // tables if desired. 72 static bool unicode_isprint(uint32_t ch) { 73 if (ch < 0x20) { 74 return false; 75 } 76 if (ch < 0x7f) { 77 return true; 78 } 79 return false; 80 } 81 82 static bool unicode_isgraphemextend(uint32_t ch) { 83 (void)ch; 84 return false; 85 } 86 87 static bool str_isascii(const char *s, size_t s_len) { 88 for (size_t i = 0; i < s_len; i++) { 89 if (s[i] & 0x80) { 90 return false; 91 } 92 } 93 94 return true; 95 } 96 97 typedef enum { 98 PunycodeOk, 99 PunycodeError 100 } punycode_status; 101 102 struct parser { 103 // the parser assumes that `sym` has a safe "terminating byte". It might be NUL, 104 // but it might also be something else if a symbol is "truncated". 105 const char *sym; 106 size_t sym_len; 107 size_t next; 108 uint32_t depth; 109 }; 110 111 struct printer { 112 demangle_status status; // if status == 0 parser is valid 113 struct parser parser; 114 char *out; // NULL for no output [in which case out_len is not decremented] 115 size_t out_len; 116 uint32_t bound_lifetime_depth; 117 bool alternate; 118 }; 119 120 static NODISCARD overflow_status printer_print_path(struct printer *printer, bool in_value); 121 static NODISCARD overflow_status printer_print_type(struct printer *printer); 122 static NODISCARD overflow_status printer_print_const(struct printer *printer, bool in_value); 123 124 static NODISCARD demangle_status try_parse_path(struct parser *parser) { 125 struct printer printer = { 126 DemangleOk, 127 *parser, 128 NULL, 129 SIZE_MAX, 130 0, 131 false 132 }; 133 overflow_status ignore = printer_print_path(&printer, false); // can't fail since no output 134 (void)ignore; 135 *parser = printer.parser; 136 return printer.status; 137 } 138 139 NODISCARD static demangle_status rust_demangle_v0_demangle(const char *s, size_t s_len, struct demangle_v0 *res, const char **rest) { 140 if (s_len > strlen(s)) { 141 // s_len only exists to shorten the string, this is not a buffer API 142 return DemangleInvalid; 143 } 144 145 const char *inner; 146 size_t inner_len; 147 if (s_len >= 2 && !strncmp(s, "_R", strlen("_R"))) { 148 inner = s+2; 149 inner_len = s_len - 2; 150 } else if (s_len >= 1 && !strncmp(s, "R", strlen("R"))) { 151 // On Windows, dbghelp strips leading underscores, so we accept "R..." 152 // form too. 153 inner = s+1; 154 inner_len = s_len - 1; 155 } else if (s_len >= 3 && !strncmp(s, "__R", strlen("__R"))) { 156 // On OSX, symbols are prefixed with an extra _ 157 inner = s+3; 158 inner_len = s_len - 3; 159 } else { 160 return DemangleInvalid; 161 } 162 163 // Paths always start with uppercase characters. 164 if (*inner < 'A' || *inner > 'Z') { 165 return DemangleInvalid; 166 } 167 168 if (!str_isascii(inner, inner_len)) { 169 return DemangleInvalid; 170 } 171 172 struct parser parser = { inner, inner_len, 0, 0 }; 173 174 demangle_status status = try_parse_path(&parser); 175 if (status != DemangleOk) return status; 176 char next = parser.sym[parser.next]; 177 178 // Instantiating crate (paths always start with uppercase characters). 179 if (parser.next < parser.sym_len && next >= 'A' && next <= 'Z') { 180 status = try_parse_path(&parser); 181 if (status != DemangleOk) return status; 182 } 183 184 res->mangled = inner; 185 res->mangled_len = inner_len; 186 if (rest) { 187 *rest = parser.sym + parser.next; 188 } 189 190 return DemangleOk; 191 } 192 193 // This might require `len` to be up to 3 characters bigger than the real output len in case of utf-8 194 NODISCARD static overflow_status rust_demangle_v0_display_demangle(struct demangle_v0 res, char *out, size_t len, bool alternate) { 195 struct printer printer = { 196 DemangleOk, 197 { 198 res.mangled, 199 res.mangled_len, 200 0, 201 0 202 }, 203 out, 204 len, 205 0, 206 alternate 207 }; 208 if (printer_print_path(&printer, true) == OverflowOverflow) { 209 return OverflowOverflow; 210 } 211 if (printer.out_len < OVERFLOW_MARGIN) { 212 return OverflowOverflow; 213 } 214 *printer.out = '\0'; 215 return OverflowOk; 216 } 217 218 static size_t code_to_utf8(unsigned char *buffer, uint32_t code) 219 { 220 if (code <= 0x7F) { 221 buffer[0] = code; 222 return 1; 223 } 224 if (code <= 0x7FF) { 225 buffer[0] = 0xC0 | (code >> 6); /* 110xxxxx */ 226 buffer[1] = 0x80 | (code & 0x3F); /* 10xxxxxx */ 227 return 2; 228 } 229 if (code <= 0xFFFF) { 230 buffer[0] = 0xE0 | (code >> 12); /* 1110xxxx */ 231 buffer[1] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */ 232 buffer[2] = 0x80 | (code & 0x3F); /* 10xxxxxx */ 233 return 3; 234 } 235 if (code <= 0x10FFFF) { 236 buffer[0] = 0xF0 | (code >> 18); /* 11110xxx */ 237 buffer[1] = 0x80 | ((code >> 12) & 0x3F); /* 10xxxxxx */ 238 buffer[2] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */ 239 buffer[3] = 0x80 | (code & 0x3F); /* 10xxxxxx */ 240 return 4; 241 } 242 return 0; 243 } 244 245 246 // return length of char at byte, or SIZE_MAX if invalid. buf should have 4 valid characters 247 static NODISCARD size_t utf8_next_char(uint8_t *s, uint32_t *ch) { 248 uint8_t byte = *s; 249 // UTF8-1 = %x00-7F 250 // UTF8-2 = %xC2-DF UTF8-tail 251 // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / 252 // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) 253 // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / 254 // %xF4 %x80-8F 2( UTF8-tail ) 255 if (byte < 0x80) { 256 *ch = byte; 257 return 1; 258 } else if (byte < 0xc2) { 259 return SIZE_MAX; 260 } else if (byte < 0xe0) { 261 if (s[1] >= 0x80 && s[1] < 0xc0) { 262 *ch = ((byte&0x1f)<<6) + (s[1] & 0x3f); 263 return 2; 264 } 265 return SIZE_MAX; 266 } if (byte < 0xf0) { 267 if (!(s[1] >= 0x80 && s[1] < 0xc0) || !(s[2] >= 0x80 && s[2] < 0xc0)) { 268 return SIZE_MAX; // basic validation 269 } 270 if (byte == 0xe0 && s[1] < 0xa0) { 271 return SIZE_MAX; // overshort 272 } 273 if (byte == 0xed && s[1] >= 0xa0) { 274 return SIZE_MAX; // surrogate 275 } 276 *ch = ((byte&0x0f)<<12) + ((s[1] & 0x3f)<<6) + (s[2] & 0x3f); 277 return 3; 278 } else if (byte < 0xf5) { 279 if (!(s[1] >= 0x80 && s[1] < 0xc0) || !(s[2] >= 0x80 && s[2] < 0xc0) || !(s[3] >= 0x80 && s[3] < 0xc0)) { 280 return SIZE_MAX; // basic validation 281 } 282 if (byte == 0xf0 && s[1] < 0x90) { 283 return SIZE_MAX; // overshort 284 } 285 if (byte == 0xf4 && s[1] >= 0x90) { 286 return SIZE_MAX; // over max 287 } 288 *ch = ((byte&0x07)<<18) + ((s[1] & 0x3f)<<12) + ((s[2] & 0x3f)<<6) + (s[3]&0x3f); 289 return 4; 290 } else { 291 return SIZE_MAX; 292 } 293 } 294 295 static NODISCARD bool validate_char(uint32_t n) { 296 return ((n ^ 0xd800) - 0x800) < 0x110000 - 0x800; 297 } 298 299 #define SMALL_PUNYCODE_LEN 128 300 301 static NODISCARD punycode_status punycode_decode(const char *start, size_t ascii_len, const char *punycode_start, size_t punycode_len, uint32_t (*out_)[SMALL_PUNYCODE_LEN], size_t *out_len) { 302 uint32_t *out = *out_; 303 304 if (punycode_len == 0) { 305 return PunycodeError; 306 } 307 308 if (ascii_len > SMALL_PUNYCODE_LEN) { 309 return PunycodeError; 310 } 311 for (size_t i = 0; i < ascii_len; i++) { 312 out[i] = start[i]; 313 } 314 size_t len = ascii_len; 315 316 size_t base = 36, t_min = 1, t_max = 26, skew = 38, damp = 700, bias = 72, i = 0, n = 0x80; 317 for (;;) { 318 size_t delta = 0, w = 1, k = 0; 319 for (;;) { 320 k += base; 321 size_t biased = k < bias ? 0 : k - bias; 322 size_t t = MIN(MAX(biased, t_min), t_max); 323 size_t d; 324 if (punycode_len == 0) { 325 return PunycodeError; 326 } 327 char nx = *punycode_start++; 328 punycode_len--; 329 if ('a' <= nx && nx <= 'z') { 330 d = nx - 'a'; 331 } else if ('0' <= nx && nx <= '9') { 332 d = 26 + (nx - '0'); 333 } else { 334 return PunycodeError; 335 } 336 if (w == 0 || d > SIZE_MAX / w || d*w > SIZE_MAX - delta) { 337 return PunycodeError; 338 } 339 delta += d * w; 340 if (d < t) { 341 break; 342 } 343 if (base < t || w == 0 || (base - t) > SIZE_MAX / w) { 344 return PunycodeError; 345 } 346 w *= (base - t); 347 } 348 349 len += 1; 350 if (i > SIZE_MAX - delta) { 351 return PunycodeError; 352 } 353 i += delta; 354 if (n > SIZE_MAX - i / len) { 355 return PunycodeError; 356 } 357 n += i / len; 358 i %= len; 359 360 // char validation 361 if (n > UINT32_MAX || !validate_char((uint32_t)n)) { 362 return PunycodeError; 363 } 364 365 // insert new character 366 if (len > SMALL_PUNYCODE_LEN) { 367 return PunycodeError; 368 } 369 memmove(out + i + 1, out + i, (len - i - 1) * sizeof(uint32_t)); 370 out[i] = (uint32_t)n; 371 372 // start i index at incremented position 373 i++; 374 375 // If there are no more deltas, decoding is complete. 376 if (punycode_len == 0) { 377 *out_len = len; 378 return PunycodeOk; 379 } 380 381 // Perform bias adaptation. 382 delta /= damp; 383 damp = 2; 384 385 delta += delta / len; 386 k = 0; 387 while (delta > ((base - t_min) * t_max) / 2) { 388 delta /= base - t_min; 389 k += base; 390 } 391 bias = k + ((base - t_min + 1) * delta) / (delta + skew); 392 } 393 } 394 395 struct ident { 396 const char *ascii_start; 397 size_t ascii_len; 398 const char *punycode_start; 399 size_t punycode_len; 400 }; 401 402 static NODISCARD overflow_status display_ident(const char *ascii_start, size_t ascii_len, const char *punycode_start, size_t punycode_len, uint8_t *out, size_t *out_len) { 403 uint32_t outbuf[SMALL_PUNYCODE_LEN]; 404 405 size_t wide_len; 406 size_t out_buflen = *out_len; 407 408 if (punycode_len == 0) { 409 if (ascii_len > out_buflen) { 410 return OverflowOverflow; 411 } 412 memcpy(out, ascii_start, ascii_len); 413 *out_len = ascii_len; 414 } else if (punycode_decode(ascii_start, ascii_len, punycode_start, punycode_len, &outbuf, &wide_len) == PunycodeOk) { 415 size_t narrow_len = 0; 416 for (size_t i = 0; i < wide_len; i++) { 417 if (out_buflen - narrow_len < 4) { 418 return OverflowOverflow; 419 } 420 unsigned char *pos = &out[narrow_len]; 421 narrow_len += code_to_utf8(pos, outbuf[i]); 422 } 423 *out_len = narrow_len; 424 } else { 425 size_t narrow_len = 0; 426 if (out_buflen < strlen("punycode{")) { 427 return OverflowOverflow; 428 } 429 memcpy(out, "punycode{", strlen("punycode{")); 430 narrow_len = strlen("punycode{"); 431 if (ascii_len > 0) { 432 if (out_buflen - narrow_len < ascii_len || out_buflen - narrow_len - ascii_len < 1) { 433 return OverflowOverflow; 434 } 435 memcpy(out + narrow_len, ascii_start, ascii_len); 436 narrow_len += ascii_len; 437 out[narrow_len] = '-'; 438 narrow_len++; 439 } 440 if (out_buflen - narrow_len < punycode_len || out_buflen - narrow_len - punycode_len < 1) { 441 return OverflowOverflow; 442 } 443 memcpy(out + narrow_len, punycode_start, punycode_len); 444 narrow_len += punycode_len; 445 out[narrow_len] = '}'; 446 narrow_len++; 447 *out_len = narrow_len; 448 } 449 450 return OverflowOk; 451 } 452 453 static NODISCARD bool try_parse_uint(const char *buf, size_t len, uint64_t *result) { 454 size_t cur = 0; 455 for(;cur < len && buf[cur] == '0';cur++); 456 uint64_t result_val = 0; 457 if (len - cur > 16) return false; 458 for(;cur < len;cur++) { 459 char c = buf[cur]; 460 result_val <<= 4; 461 if ('0' <= c && c <= '9') { 462 result_val += c - '0'; 463 } else if ('a' <= c && c <= 'f') { 464 result_val += 10 + (c - 'a'); 465 } else { 466 return false; 467 } 468 } 469 *result = result_val; 470 return true; 471 } 472 473 static NODISCARD bool dinibble2int(const char *buf, uint8_t *result) { 474 uint8_t result_val = 0; 475 for (int i = 0; i < 2; i++) { 476 char c = buf[i]; 477 result_val <<= 4; 478 if ('0' <= c && c <= '9') { 479 result_val += c - '0'; 480 } else if ('a' <= c && c <= 'f') { 481 result_val += 10 + (c - 'a'); 482 } else { 483 return false; 484 } 485 } 486 *result = result_val; 487 return true; 488 } 489 490 491 typedef enum { 492 NtsOk = 0, 493 NtsOverflow = 1, 494 NtsInvalid = 2 495 } nibbles_to_string_status; 496 497 // '\u{10ffff}', +margin 498 #define ESCAPED_SIZE 12 499 500 static NODISCARD size_t char_to_string(uint32_t ch, uint8_t quote, bool first, char (*buf)[ESCAPED_SIZE]) { 501 // encode the character 502 char *escaped_buf = *buf; 503 escaped_buf[0] = '\\'; 504 size_t escaped_len = 2; 505 switch (ch) { 506 case '\0': 507 escaped_buf[1] = '0'; 508 break; 509 case '\t': 510 escaped_buf[1] = 't'; 511 break; 512 case '\r': 513 escaped_buf[1] = 'r'; 514 break; 515 case '\n': 516 escaped_buf[1] = 'n'; 517 break; 518 case '\\': 519 escaped_buf[1] = '\\'; 520 break; 521 default: 522 if (ch == quote) { 523 escaped_buf[1] = ch; 524 } else if (!unicode_isprint(ch) || (first && unicode_isgraphemextend(ch))) { 525 int hexlen = snprintf(escaped_buf, ESCAPED_SIZE, "\\u{%x}", (unsigned int)ch); 526 if (hexlen < 0) { 527 return 0; // (snprintf shouldn't fail!) 528 } 529 escaped_len = hexlen; 530 } else { 531 // printable character 532 escaped_buf[0] = ch; 533 escaped_len = 1; 534 } 535 break; 536 } 537 538 return escaped_len; 539 } 540 541 // convert nibbles to a single/double-quoted string 542 static NODISCARD nibbles_to_string_status nibbles_to_string(const char *buf, size_t len, uint8_t *out, size_t *out_len) { 543 uint8_t quote = '"'; 544 bool first = true; 545 546 if ((len % 2) != 0) { 547 return NtsInvalid; // odd number of nibbles 548 } 549 550 size_t cur_out_len = 0; 551 552 // write starting quote 553 if (out != NULL) { 554 cur_out_len = *out_len; 555 if (cur_out_len == 0) { 556 return NtsOverflow; 557 } 558 *out++ = quote; 559 cur_out_len--; 560 } 561 562 uint8_t conv_buf[4] = {0}; 563 size_t conv_buf_len = 0; 564 while (len > 1 || conv_buf_len > 0) { 565 while (len > 1 && conv_buf_len < sizeof(conv_buf)) { 566 if (!dinibble2int(buf, &conv_buf[conv_buf_len])) { 567 return NtsInvalid; 568 } 569 conv_buf_len++; 570 buf += 2; 571 len -= 2; 572 } 573 574 // conv_buf is full here if possible, process 1 UTF-8 character 575 uint32_t ch = 0; 576 size_t consumed = utf8_next_char(conv_buf, &ch); 577 if (consumed > conv_buf_len) { 578 // either SIZE_MAX (invalid UTF-8) or finished input buffer and 579 // there are still bytes remaining, in both cases invalid 580 return NtsInvalid; 581 } 582 583 // "consume" the character 584 memmove(conv_buf, conv_buf+consumed, conv_buf_len-consumed); 585 conv_buf_len -= consumed; 586 587 char escaped_buf[ESCAPED_SIZE]; 588 size_t escaped_len = char_to_string(ch, '"', first, &escaped_buf); 589 if (out != NULL) { 590 if (cur_out_len < escaped_len) { 591 return NtsOverflow; 592 } 593 memcpy(out, escaped_buf, escaped_len); 594 out += escaped_len; 595 cur_out_len -= escaped_len; 596 } 597 first = false; 598 } 599 600 // write ending quote 601 if (out != NULL) { 602 if (cur_out_len == 0) { 603 return NtsOverflow; 604 } 605 *out++ = quote; 606 cur_out_len--; 607 *out_len -= cur_out_len; // subtract remaining space to get used space 608 } 609 610 return NtsOk; 611 } 612 613 static const char* basic_type(uint8_t tag) { 614 switch(tag) { 615 case 'b': 616 return "bool"; 617 case 'c': 618 return "char"; 619 case 'e': 620 return "str"; 621 case 'u': 622 return "()"; 623 case 'a': 624 return "i8"; 625 case 's': 626 return "i16"; 627 case 'l': 628 return "i32"; 629 case 'x': 630 return "i64"; 631 case 'n': 632 return "i128"; 633 case 'i': 634 return "isize"; 635 case 'h': 636 return "u8"; 637 case 't': 638 return "u16"; 639 case 'm': 640 return "u32"; 641 case 'y': 642 return "u64"; 643 case 'o': 644 return "u128"; 645 case 'j': 646 return "usize"; 647 case 'f': 648 return "f32"; 649 case 'd': 650 return "f64"; 651 case 'z': 652 return "!"; 653 case 'p': 654 return "_"; 655 case 'v': 656 return "..."; 657 default: 658 return NULL; 659 } 660 } 661 662 static NODISCARD demangle_status parser_push_depth(struct parser *parser) { 663 parser->depth++; 664 if (parser->depth > MAX_DEPTH) { 665 return DemangleRecursed; 666 } else { 667 return DemangleOk; 668 } 669 } 670 671 static demangle_status parser_pop_depth(struct parser *parser) { 672 parser->depth--; 673 return DemangleOk; 674 } 675 676 static uint8_t parser_peek(struct parser const *parser) { 677 if (parser->next == parser->sym_len) { 678 return 0; // add a "pseudo nul terminator" to avoid peeking past the end of a symbol 679 } else { 680 return parser->sym[parser->next]; 681 } 682 } 683 684 static bool parser_eat(struct parser *parser, uint8_t ch) { 685 if (parser_peek(parser) == ch) { 686 if (ch != 0) { // safety: make sure we don't skip past the NUL terminator 687 parser->next++; 688 } 689 return true; 690 } else { 691 return false; 692 } 693 } 694 695 static uint8_t parser_next(struct parser *parser) { 696 // don't advance after end of input, and return an imaginary NUL terminator 697 if (parser->next == parser->sym_len) { 698 return 0; 699 } else { 700 return parser->sym[parser->next++]; 701 } 702 } 703 704 static NODISCARD demangle_status parser_ch(struct parser *parser, uint8_t *next) { 705 // don't advance after end of input 706 if (parser->next == parser->sym_len) { 707 return DemangleInvalid; 708 } else { 709 *next = parser->sym[parser->next++]; 710 return DemangleOk; 711 } 712 } 713 714 struct buf { 715 const char *start; 716 size_t len; 717 }; 718 719 static NODISCARD demangle_status parser_hex_nibbles(struct parser *parser, struct buf *buf) { 720 size_t start = parser->next; 721 for (;;) { 722 uint8_t ch = parser_next(parser); 723 if (ch == '_') { 724 break; 725 } 726 if (!(('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f'))) { 727 return DemangleInvalid; 728 } 729 } 730 buf->start = parser->sym + start; 731 buf->len = parser->next - start - 1; // skip final _ 732 return DemangleOk; 733 } 734 735 static NODISCARD demangle_status parser_digit_10(struct parser *parser, uint8_t *out) { 736 uint8_t ch = parser_peek(parser); 737 if ('0' <= ch && ch <= '9') { 738 *out = ch - '0'; 739 parser->next++; 740 return DemangleOk; 741 } else { 742 return DemangleInvalid; 743 } 744 } 745 746 static NODISCARD demangle_status parser_digit_62(struct parser *parser, uint64_t *out) { 747 uint8_t ch = parser_peek(parser); 748 if ('0' <= ch && ch <= '9') { 749 *out = ch - '0'; 750 parser->next++; 751 return DemangleOk; 752 } else if ('a' <= ch && ch <= 'z') { 753 *out = 10 + (ch - 'a'); 754 parser->next++; 755 return DemangleOk; 756 } else if ('A' <= ch && ch <= 'Z') { 757 *out = 10 + 26 + (ch - 'A'); 758 parser->next++; 759 return DemangleOk; 760 } else { 761 return DemangleInvalid; 762 } 763 } 764 765 static NODISCARD demangle_status parser_integer_62(struct parser *parser, uint64_t *out) { 766 if (parser_eat(parser, '_')) { 767 *out = 0; 768 return DemangleOk; 769 } 770 771 uint64_t x = 0; 772 demangle_status status; 773 while (!parser_eat(parser, '_')) { 774 uint64_t d; 775 if ((status = parser_digit_62(parser, &d)) != DemangleOk) { 776 return status; 777 } 778 if (x > UINT64_MAX / 62) { 779 return DemangleInvalid; 780 } 781 x *= 62; 782 if (x > UINT64_MAX - d) { 783 return DemangleInvalid; 784 } 785 x += d; 786 } 787 if (x == UINT64_MAX) { 788 return DemangleInvalid; 789 } 790 *out = x + 1; 791 return DemangleOk; 792 } 793 794 static NODISCARD demangle_status parser_opt_integer_62(struct parser *parser, uint8_t tag, uint64_t *out) { 795 if (!parser_eat(parser, tag)) { 796 *out = 0; 797 return DemangleOk; 798 } 799 800 demangle_status status; 801 if ((status = parser_integer_62(parser, out)) != DemangleOk) { 802 return status; 803 } 804 if (*out == UINT64_MAX) { 805 return DemangleInvalid; 806 } 807 *out = *out + 1; 808 return DemangleOk; 809 } 810 811 static NODISCARD demangle_status parser_disambiguator(struct parser *parser, uint64_t *out) { 812 return parser_opt_integer_62(parser, 's', out); 813 } 814 815 typedef uint8_t parser_namespace_type; 816 817 static NODISCARD demangle_status parser_namespace(struct parser *parser, parser_namespace_type *out) { 818 uint8_t next = parser_next(parser); 819 if ('A' <= next && next <= 'Z') { 820 *out = next; 821 return DemangleOk; 822 } else if ('a' <= next && next <= 'z') { 823 *out = 0; 824 return DemangleOk; 825 } else { 826 return DemangleInvalid; 827 } 828 } 829 830 static NODISCARD demangle_status parser_backref(struct parser *parser, struct parser *out) { 831 size_t start = parser->next; 832 if (start == 0) { 833 return DemangleBug; 834 } 835 size_t s_start = start - 1; 836 uint64_t i; 837 demangle_status status = parser_integer_62(parser, &i); 838 if (status != DemangleOk) { 839 return status; 840 } 841 if (i >= s_start) { 842 return DemangleInvalid; 843 } 844 struct parser res = { 845 .sym = parser->sym, 846 .sym_len = parser->sym_len, 847 .next = (size_t)i, 848 .depth = parser->depth 849 }; 850 status = parser_push_depth(&res); 851 if (status != DemangleOk) { 852 return status; 853 } 854 *out = res; 855 return DemangleOk; 856 } 857 858 static NODISCARD demangle_status parser_ident(struct parser *parser, struct ident *out) { 859 bool is_punycode = parser_eat(parser, 'u'); 860 size_t len; 861 uint8_t d; 862 demangle_status status = parser_digit_10(parser, &d); 863 len = d; 864 if (status != DemangleOk) { 865 return status; 866 } 867 if (len) { 868 for (;;) { 869 status = parser_digit_10(parser, &d); 870 if (status != DemangleOk) { 871 break; 872 } 873 if (len > SIZE_MAX / 10) { 874 return DemangleInvalid; 875 } 876 len *= 10; 877 if (len > SIZE_MAX - d) { 878 return DemangleInvalid; 879 } 880 len += d; 881 } 882 } 883 884 // Skip past the optional `_` separator. 885 parser_eat(parser, '_'); 886 887 size_t start = parser->next; 888 if (parser->sym_len - parser->next < len) { 889 return DemangleInvalid; 890 } 891 parser->next += len; 892 893 const char *ident = &parser->sym[start]; 894 895 if (is_punycode) { 896 const char *underscore = demangle_memrchr(ident, '_', (size_t)len); 897 if (underscore == NULL) { 898 *out = (struct ident){ 899 .ascii_start="", 900 .ascii_len=0, 901 .punycode_start=ident, 902 .punycode_len=len 903 }; 904 } else { 905 size_t ascii_len = underscore - ident; 906 // ascii_len <= len - 1 since `_` is in the first len bytes 907 size_t punycode_len = len - 1 - ascii_len; 908 *out = (struct ident){ 909 .ascii_start=ident, 910 .ascii_len=ascii_len, 911 .punycode_start=underscore + 1, 912 .punycode_len=punycode_len 913 }; 914 } 915 if (out->punycode_len == 0) { 916 return DemangleInvalid; 917 } 918 return DemangleOk; 919 } else { 920 *out = (struct ident) { 921 .ascii_start=ident, 922 .ascii_len=(size_t)len, 923 .punycode_start="", 924 .punycode_len=0, 925 }; 926 return DemangleOk; 927 } 928 } 929 930 #define INVALID_SYNTAX "{invalid syntax}" 931 932 static const char *demangle_error_message(demangle_status status) { 933 switch (status) { 934 case DemangleInvalid: 935 return INVALID_SYNTAX; 936 case DemangleBug: 937 return "{bug}"; 938 case DemangleRecursed: 939 return "{recursion limit reached}"; 940 default: 941 return "{unknown error}"; 942 } 943 } 944 945 #define PRINT(print_fn) \ 946 do { \ 947 if ((print_fn) == OverflowOverflow) { \ 948 return OverflowOverflow; \ 949 } \ 950 } while(0) 951 952 #define PRINT_CH(printer, s) PRINT(printer_print_ch((printer), (s))) 953 #define PRINT_STR(printer, s) PRINT(printer_print_str((printer), (s))) 954 #define PRINT_U64(printer, s) PRINT(printer_print_u64((printer), (s))) 955 #define PRINT_IDENT(printer, s) PRINT(printer_print_ident((printer), (s))) 956 957 #define INVALID(printer) \ 958 do { \ 959 PRINT_STR((printer), INVALID_SYNTAX); \ 960 (printer)->status = DemangleInvalid; \ 961 return OverflowOk; \ 962 } while(0) 963 964 #define PARSE(printer, method, ...) \ 965 do { \ 966 if ((printer)->status != DemangleOk) { \ 967 PRINT_STR((printer), "?"); \ 968 return OverflowOk; \ 969 } else { \ 970 demangle_status _parse_status = method(&(printer)->parser, ## __VA_ARGS__); \ 971 if (_parse_status != DemangleOk) { \ 972 PRINT_STR((printer), demangle_error_message(_parse_status)); \ 973 (printer)->status = _parse_status; \ 974 return OverflowOk; \ 975 } \ 976 } \ 977 } while(0) 978 979 #define PRINT_SEP_LIST(printer, body, sep) \ 980 do { \ 981 size_t _sep_list_i; \ 982 PRINT_SEP_LIST_COUNT(printer, _sep_list_i, body, sep); \ 983 } while(0) 984 985 #define PRINT_SEP_LIST_COUNT(printer, count, body, sep) \ 986 do { \ 987 count = 0; \ 988 while ((printer)->status == DemangleOk && !printer_eat((printer), 'E')) { \ 989 if (count > 0) { PRINT_STR(printer, sep); } \ 990 body; \ 991 count++; \ 992 } \ 993 } while(0) 994 995 static bool printer_eat(struct printer *printer, uint8_t b) { 996 if (printer->status != DemangleOk) { 997 return false; 998 } 999 1000 return parser_eat(&printer->parser, b); 1001 } 1002 1003 static void printer_pop_depth(struct printer *printer) { 1004 if (printer->status == DemangleOk) { 1005 parser_pop_depth(&printer->parser); 1006 } 1007 } 1008 1009 static NODISCARD overflow_status printer_print_buf(struct printer *printer, const char *start, size_t len) { 1010 if (printer->out == NULL) { 1011 return OverflowOk; 1012 } 1013 if (printer->out_len < len) { 1014 return OverflowOverflow; 1015 } 1016 1017 memcpy(printer->out, start, len); 1018 printer->out += len; 1019 printer->out_len -= len; 1020 return OverflowOk; 1021 } 1022 1023 static NODISCARD overflow_status printer_print_str(struct printer *printer, const char *buf) { 1024 return printer_print_buf(printer, buf, strlen(buf)); 1025 } 1026 1027 static NODISCARD overflow_status printer_print_ch(struct printer *printer, char ch) { 1028 return printer_print_buf(printer, &ch, 1); 1029 } 1030 1031 static NODISCARD overflow_status printer_print_u64(struct printer *printer, uint64_t n) { 1032 char buf[32] = {0}; 1033 sprintf(buf, "%llu", (unsigned long long)n); // printing uint64 uses 21 < 32 chars 1034 return printer_print_str(printer, buf); 1035 } 1036 1037 static NODISCARD overflow_status printer_print_ident(struct printer *printer, struct ident *ident) { 1038 if (printer->out == NULL) { 1039 return OverflowOk; 1040 } 1041 1042 size_t out_len = printer->out_len; 1043 overflow_status status; 1044 if ((status = display_ident(ident->ascii_start, ident->ascii_len, ident->punycode_start, ident->punycode_len, (uint8_t*)printer->out, &out_len)) != OverflowOk) { 1045 return status; 1046 } 1047 printer->out += out_len; 1048 printer->out_len -= out_len; 1049 return OverflowOk; 1050 } 1051 1052 typedef overflow_status (*printer_fn)(struct printer *printer); 1053 typedef overflow_status (*backref_fn)(struct printer *printer, bool *arg); 1054 1055 static NODISCARD overflow_status printer_print_backref(struct printer *printer, backref_fn func, bool *arg) { 1056 struct parser backref; 1057 PARSE(printer, parser_backref, &backref); 1058 1059 if (printer->out == NULL) { 1060 return OverflowOk; 1061 } 1062 1063 struct parser orig_parser = printer->parser; 1064 demangle_status orig_status = printer->status; // fixme not sure this is needed match for Ok on the Rust side 1065 printer->parser = backref; 1066 printer->status = DemangleOk; 1067 overflow_status status = func(printer, arg); 1068 printer->parser = orig_parser; 1069 printer->status = orig_status; 1070 1071 return status; 1072 } 1073 1074 static NODISCARD overflow_status printer_print_lifetime_from_index(struct printer *printer, uint64_t lt) { 1075 // Bound lifetimes aren't tracked when skipping printing. 1076 if (printer->out == NULL) { 1077 return OverflowOk; 1078 } 1079 1080 PRINT_STR(printer, "'"); 1081 if (lt == 0) { 1082 PRINT_STR(printer, "_"); 1083 return OverflowOk; 1084 } 1085 1086 if (printer->bound_lifetime_depth < lt) { 1087 INVALID(printer); 1088 } else { 1089 uint64_t depth = printer->bound_lifetime_depth - lt; 1090 if (depth < 26) { 1091 PRINT_CH(printer, 'a' + depth); 1092 } else { 1093 PRINT_STR(printer, "_"); 1094 PRINT_U64(printer, depth); 1095 } 1096 1097 return OverflowOk; 1098 } 1099 } 1100 1101 static NODISCARD overflow_status printer_in_binder(struct printer *printer, printer_fn func) { 1102 uint64_t bound_lifetimes; 1103 PARSE(printer, parser_opt_integer_62, 'G', &bound_lifetimes); 1104 1105 // Don't track bound lifetimes when skipping printing. 1106 if (printer->out == NULL) { 1107 return func(printer); 1108 } 1109 1110 if (bound_lifetimes > 0) { 1111 PRINT_STR(printer, "for<"); 1112 for (uint64_t i = 0; i < bound_lifetimes; i++) { 1113 if (i > 0) { 1114 PRINT_STR(printer, ", "); 1115 } 1116 printer->bound_lifetime_depth++; 1117 PRINT(printer_print_lifetime_from_index(printer, 1)); 1118 } 1119 PRINT_STR(printer, "> "); 1120 } 1121 1122 overflow_status r = func(printer); 1123 printer->bound_lifetime_depth -= bound_lifetimes; 1124 1125 return r; 1126 } 1127 1128 static NODISCARD overflow_status printer_print_generic_arg(struct printer *printer) { 1129 if (printer_eat(printer, 'L')) { 1130 uint64_t lt; 1131 PARSE(printer, parser_integer_62, <); 1132 return printer_print_lifetime_from_index(printer, lt); 1133 } else if (printer_eat(printer, 'K')) { 1134 return printer_print_const(printer, false); 1135 } else { 1136 return printer_print_type(printer); 1137 } 1138 } 1139 1140 static NODISCARD overflow_status printer_print_generic_args(struct printer *printer) { 1141 PRINT_STR(printer, "<"); 1142 PRINT_SEP_LIST(printer, PRINT(printer_print_generic_arg(printer)), ", "); 1143 PRINT_STR(printer, ">"); 1144 return OverflowOk; 1145 } 1146 1147 static NODISCARD overflow_status printer_print_path_out_of_value(struct printer *printer, bool *_arg) { 1148 (void)_arg; 1149 return printer_print_path(printer, false); 1150 } 1151 1152 static NODISCARD overflow_status printer_print_path_in_value(struct printer *printer, bool *_arg) { 1153 (void)_arg; 1154 return printer_print_path(printer, true); 1155 } 1156 1157 static NODISCARD overflow_status printer_print_path(struct printer *printer, bool in_value) { 1158 PARSE(printer, parser_push_depth); 1159 uint8_t tag; 1160 PARSE(printer, parser_ch, &tag); 1161 1162 overflow_status st; 1163 uint64_t dis; 1164 struct ident name; 1165 parser_namespace_type ns; 1166 char *orig_out; 1167 1168 switch(tag) { 1169 case 'C': 1170 PARSE(printer, parser_disambiguator, &dis); 1171 PARSE(printer, parser_ident, &name); 1172 1173 PRINT_IDENT(printer, &name); 1174 1175 if (printer->out != NULL && !printer->alternate && dis != 0) { 1176 PRINT_STR(printer, "["); 1177 char buf[24] = {0}; 1178 sprintf(buf, "%llx", (unsigned long long)dis); 1179 PRINT_STR(printer, buf); 1180 PRINT_STR(printer, "]"); 1181 } 1182 break; 1183 case 'N': 1184 PARSE(printer, parser_namespace, &ns); 1185 if ((st = printer_print_path(printer, in_value)) != OverflowOk) { 1186 return st; 1187 } 1188 1189 // HACK(eddyb) if the parser is already marked as having errored, 1190 // `parse!` below will print a `?` without its preceding `::` 1191 // (because printing the `::` is skipped in certain conditions, 1192 // i.e. a lowercase namespace with an empty identifier), 1193 // so in order to get `::?`, the `::` has to be printed here. 1194 if (printer->status != DemangleOk) { 1195 PRINT_STR(printer, "::"); 1196 } 1197 1198 PARSE(printer, parser_disambiguator, &dis); 1199 PARSE(printer, parser_ident, &name); 1200 // Special namespace, like closures and shims 1201 if (ns) { 1202 PRINT_STR(printer, "::{"); 1203 if (ns == 'C') { 1204 PRINT_STR(printer, "closure"); 1205 } else if (ns == 'S') { 1206 PRINT_STR(printer, "shim"); 1207 } else { 1208 PRINT_CH(printer, ns); 1209 } 1210 if (name.ascii_len != 0 || name.punycode_len != 0) { 1211 PRINT_STR(printer, ":"); 1212 PRINT_IDENT(printer, &name); 1213 } 1214 PRINT_STR(printer, "#"); 1215 PRINT_U64(printer, dis); 1216 PRINT_STR(printer, "}"); 1217 } else { 1218 // Implementation-specific/unspecified namespaces 1219 if (name.ascii_len != 0 || name.punycode_len != 0) { 1220 PRINT_STR(printer, "::"); 1221 PRINT_IDENT(printer, &name); 1222 } 1223 } 1224 break; 1225 case 'M': 1226 case 'X': 1227 // for impls, ignore the impls own path 1228 PARSE(printer, parser_disambiguator, &dis); 1229 orig_out = printer->out; 1230 printer->out = NULL; 1231 PRINT(printer_print_path(printer, false)); 1232 printer->out = orig_out; 1233 1234 // fallthru 1235 case 'Y': 1236 PRINT_STR(printer, "<"); 1237 PRINT(printer_print_type(printer)); 1238 if (tag != 'M') { 1239 PRINT_STR(printer, " as "); 1240 PRINT(printer_print_path(printer, false)); 1241 } 1242 PRINT_STR(printer, ">"); 1243 break; 1244 case 'I': 1245 PRINT(printer_print_path(printer, in_value)); 1246 if (in_value) { 1247 PRINT_STR(printer, "::"); 1248 } 1249 PRINT(printer_print_generic_args(printer)); 1250 break; 1251 case 'B': 1252 PRINT(printer_print_backref(printer, in_value ? printer_print_path_in_value : printer_print_path_out_of_value, NULL)); 1253 break; 1254 default: 1255 INVALID(printer); 1256 break; 1257 } 1258 1259 printer_pop_depth(printer); 1260 return OverflowOk; 1261 } 1262 1263 static NODISCARD overflow_status printer_print_const_uint(struct printer *printer, uint8_t tag) { 1264 struct buf hex; 1265 PARSE(printer, parser_hex_nibbles, &hex); 1266 1267 uint64_t val; 1268 if (try_parse_uint(hex.start, hex.len, &val)) { 1269 PRINT_U64(printer, val); 1270 } else { 1271 PRINT_STR(printer, "0x"); 1272 PRINT(printer_print_buf(printer, hex.start, hex.len)); 1273 } 1274 1275 if (printer->out != NULL && !printer->alternate) { 1276 const char *ty = basic_type(tag); 1277 if (/* safety */ ty != NULL) { 1278 PRINT_STR(printer, ty); 1279 } 1280 } 1281 1282 return OverflowOk; 1283 } 1284 1285 static NODISCARD overflow_status printer_print_const_str_literal(struct printer *printer) { 1286 struct buf hex; 1287 PARSE(printer, parser_hex_nibbles, &hex); 1288 1289 size_t out_len = SIZE_MAX; 1290 nibbles_to_string_status nts_status = nibbles_to_string(hex.start, hex.len, NULL, &out_len); 1291 switch (nts_status) { 1292 case NtsOk: 1293 if (printer->out != NULL) { 1294 out_len = printer->out_len; 1295 nts_status = nibbles_to_string(hex.start, hex.len, (uint8_t*)printer->out, &out_len); 1296 if (nts_status != NtsOk) { 1297 return OverflowOverflow; 1298 } 1299 printer->out += out_len; 1300 printer->out_len -= out_len; 1301 } 1302 return OverflowOk; 1303 case NtsOverflow: 1304 // technically if there is a string of size `SIZE_MAX/6` whose escaped version overflows 1305 // SIZE_MAX but has an invalid char, this will be a "fake" overflow. In practice, 1306 // that is not going to happen and a fuzzer will not generate strings of this length. 1307 return OverflowOverflow; 1308 case NtsInvalid: 1309 default: 1310 INVALID(printer); 1311 } 1312 } 1313 1314 static NODISCARD overflow_status printer_print_const_struct(struct printer *printer) { 1315 uint64_t dis; 1316 struct ident name; 1317 PARSE(printer, parser_disambiguator, &dis); 1318 PARSE(printer, parser_ident, &name); 1319 PRINT_IDENT(printer, &name); 1320 PRINT_STR(printer, ": "); 1321 return printer_print_const(printer, true); 1322 } 1323 1324 static NODISCARD overflow_status printer_print_const_out_of_value(struct printer *printer, bool *_arg) { 1325 (void)_arg; 1326 return printer_print_const(printer, false); 1327 } 1328 1329 static NODISCARD overflow_status printer_print_const_in_value(struct printer *printer, bool *_arg) { 1330 (void)_arg; 1331 return printer_print_const(printer, true); 1332 } 1333 1334 static NODISCARD overflow_status printer_print_const(struct printer *printer, bool in_value) { 1335 uint8_t tag; 1336 1337 PARSE(printer, parser_ch, &tag); 1338 PARSE(printer, parser_push_depth); 1339 1340 struct buf hex; 1341 uint64_t val; 1342 size_t count; 1343 1344 bool opened_brace = false; 1345 #define OPEN_BRACE_IF_OUTSIDE_EXPR \ 1346 do { if (!in_value) { \ 1347 opened_brace = true; \ 1348 PRINT_STR(printer, "{"); \ 1349 } } while(0) 1350 1351 switch(tag) { 1352 case 'p': 1353 PRINT_STR(printer, "_"); 1354 break; 1355 // Primitive leaves with hex-encoded values (see `basic_type`). 1356 case 'a': 1357 case 's': 1358 case 'l': 1359 case 'x': 1360 case 'n': 1361 case 'i': 1362 if (printer_eat(printer, 'n')) { 1363 PRINT_STR(printer, "-"); 1364 } 1365 /* fallthrough */ 1366 case 'h': 1367 case 't': 1368 case 'm': 1369 case 'y': 1370 case 'o': 1371 case 'j': 1372 PRINT(printer_print_const_uint(printer, tag)); 1373 break; 1374 case 'b': 1375 PARSE(printer, parser_hex_nibbles, &hex); 1376 if (try_parse_uint(hex.start, hex.len, &val)) { 1377 if (val == 0) { 1378 PRINT_STR(printer, "false"); 1379 } else if (val == 1) { 1380 PRINT_STR(printer, "true"); 1381 } else { 1382 INVALID(printer); 1383 } 1384 } else { 1385 INVALID(printer); 1386 } 1387 break; 1388 case 'c': 1389 PARSE(printer, parser_hex_nibbles, &hex); 1390 if (try_parse_uint(hex.start, hex.len, &val) 1391 && val < UINT32_MAX 1392 && validate_char((uint32_t)val)) 1393 { 1394 char escaped_buf[ESCAPED_SIZE]; 1395 size_t escaped_size = char_to_string((uint32_t)val, '\'', true, &escaped_buf); 1396 1397 PRINT_STR(printer, "'"); 1398 PRINT(printer_print_buf(printer, escaped_buf, escaped_size)); 1399 PRINT_STR(printer, "'"); 1400 } else { 1401 INVALID(printer); 1402 } 1403 break; 1404 case 'e': 1405 OPEN_BRACE_IF_OUTSIDE_EXPR; 1406 PRINT_STR(printer, "*"); 1407 PRINT(printer_print_const_str_literal(printer)); 1408 break; 1409 case 'R': 1410 case 'Q': 1411 if (tag == 'R' && printer_eat(printer, 'e')) { 1412 PRINT(printer_print_const_str_literal(printer)); 1413 } else { 1414 OPEN_BRACE_IF_OUTSIDE_EXPR; 1415 PRINT_STR(printer, "&"); 1416 if (tag != 'R') { 1417 PRINT_STR(printer, "mut "); 1418 } 1419 PRINT(printer_print_const(printer, true)); 1420 } 1421 break; 1422 case 'A': 1423 OPEN_BRACE_IF_OUTSIDE_EXPR; 1424 PRINT_STR(printer, "["); 1425 PRINT_SEP_LIST(printer, PRINT(printer_print_const(printer, true)), ", "); 1426 PRINT_STR(printer, "]"); 1427 break; 1428 case 'T': 1429 OPEN_BRACE_IF_OUTSIDE_EXPR; 1430 PRINT_STR(printer, "("); 1431 PRINT_SEP_LIST_COUNT(printer, count, PRINT(printer_print_const(printer, true)), ", "); 1432 if (count == 1) { 1433 PRINT_STR(printer, ","); 1434 } 1435 PRINT_STR(printer, ")"); 1436 break; 1437 case 'V': 1438 OPEN_BRACE_IF_OUTSIDE_EXPR; 1439 PRINT(printer_print_path(printer, true)); 1440 PARSE(printer, parser_ch, &tag); 1441 switch(tag) { 1442 case 'U': 1443 break; 1444 case 'T': 1445 PRINT_STR(printer, "("); 1446 PRINT_SEP_LIST(printer, PRINT(printer_print_const(printer, true)), ", "); 1447 PRINT_STR(printer, ")"); 1448 break; 1449 case 'S': 1450 PRINT_STR(printer, " { "); 1451 PRINT_SEP_LIST(printer, PRINT(printer_print_const_struct(printer)), ", "); 1452 PRINT_STR(printer, " }"); 1453 break; 1454 default: 1455 INVALID(printer); 1456 } 1457 break; 1458 case 'B': 1459 PRINT(printer_print_backref(printer, in_value ? printer_print_const_in_value : printer_print_const_out_of_value, NULL)); 1460 break; 1461 default: 1462 INVALID(printer); 1463 } 1464 #undef OPEN_BRACE_IF_OUTSIDE_EXPR 1465 1466 if (opened_brace) { 1467 PRINT_STR(printer, "}"); 1468 } 1469 printer_pop_depth(printer); 1470 1471 return OverflowOk; 1472 } 1473 1474 /// A trait in a trait object may have some "existential projections" 1475 /// (i.e. associated type bindings) after it, which should be printed 1476 /// in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`. 1477 /// To this end, this method will keep the `<...>` of an 'I' path 1478 /// open, by omitting the `>`, and return `Ok(true)` in that case. 1479 static NODISCARD overflow_status printer_print_maybe_open_generics(struct printer *printer, bool *open) { 1480 if (printer_eat(printer, 'B')) { 1481 // NOTE(eddyb) the closure may not run if printing is being skipped, 1482 // but in that case the returned boolean doesn't matter. 1483 *open = false; 1484 return printer_print_backref(printer, printer_print_maybe_open_generics, open); 1485 } else if(printer_eat(printer, 'I')) { 1486 PRINT(printer_print_path(printer, false)); 1487 PRINT_STR(printer, "<"); 1488 PRINT_SEP_LIST(printer, PRINT(printer_print_generic_arg(printer)), ", "); 1489 *open = true; 1490 return OverflowOk; 1491 } else { 1492 PRINT(printer_print_path(printer, false)); 1493 *open = false; 1494 return OverflowOk; 1495 } 1496 } 1497 1498 static NODISCARD overflow_status printer_print_dyn_trait(struct printer *printer) { 1499 bool open; 1500 PRINT(printer_print_maybe_open_generics(printer, &open)); 1501 1502 while (printer_eat(printer, 'p')) { 1503 if (!open) { 1504 PRINT_STR(printer, "<"); 1505 open = true; 1506 } else { 1507 PRINT_STR(printer, ", "); 1508 } 1509 1510 struct ident name; 1511 PARSE(printer, parser_ident, &name); 1512 1513 PRINT_IDENT(printer, &name); 1514 PRINT_STR(printer, " = "); 1515 PRINT(printer_print_type(printer)); 1516 } 1517 1518 if (open) { 1519 PRINT_STR(printer, ">"); 1520 } 1521 1522 return OverflowOk; 1523 } 1524 1525 static NODISCARD overflow_status printer_print_object_bounds(struct printer *printer) { 1526 PRINT_SEP_LIST(printer, PRINT(printer_print_dyn_trait(printer)), " + "); 1527 return OverflowOk; 1528 } 1529 1530 static NODISCARD overflow_status printer_print_function_type(struct printer *printer) { 1531 bool is_unsafe = printer_eat(printer, 'U'); 1532 const char *abi; 1533 size_t abi_len; 1534 if (printer_eat(printer, 'K')) { 1535 if (printer_eat(printer, 'C')) { 1536 abi = "C"; 1537 abi_len = 1; 1538 } else { 1539 struct ident abi_ident; 1540 PARSE(printer, parser_ident, &abi_ident); 1541 if (abi_ident.ascii_len == 0 || abi_ident.punycode_len != 0) { 1542 INVALID(printer); 1543 } 1544 abi = abi_ident.ascii_start; 1545 abi_len = abi_ident.ascii_len; 1546 } 1547 } else { 1548 abi = NULL; 1549 abi_len = 0; 1550 } 1551 1552 if (is_unsafe) { 1553 PRINT_STR(printer, "unsafe "); 1554 } 1555 1556 if (abi != NULL) { 1557 PRINT_STR(printer, "extern \""); 1558 1559 // replace _ with - 1560 while (abi_len > 0) { 1561 const char *minus = memchr(abi, '_', abi_len); 1562 if (minus == NULL) { 1563 PRINT(printer_print_buf(printer, (const char*)abi, abi_len)); 1564 break; 1565 } else { 1566 size_t space_to_minus = minus - abi; 1567 PRINT(printer_print_buf(printer, (const char*)abi, space_to_minus)); 1568 PRINT_STR(printer, "-"); 1569 abi = minus + 1; 1570 abi_len -= (space_to_minus + 1); 1571 } 1572 } 1573 1574 PRINT_STR(printer, "\" "); 1575 } 1576 1577 PRINT_STR(printer, "fn("); 1578 PRINT_SEP_LIST(printer, PRINT(printer_print_type(printer)), ", "); 1579 PRINT_STR(printer, ")"); 1580 1581 if (printer_eat(printer, 'u')) { 1582 // Skip printing the return type if it's 'u', i.e. `()`. 1583 } else { 1584 PRINT_STR(printer, " -> "); 1585 PRINT(printer_print_type(printer)); 1586 } 1587 1588 return OverflowOk; 1589 } 1590 1591 static NODISCARD overflow_status printer_print_type_backref(struct printer *printer, bool *_arg) { 1592 (void)_arg; 1593 return printer_print_type(printer); 1594 } 1595 1596 static NODISCARD overflow_status printer_print_type(struct printer *printer) { 1597 uint8_t tag; 1598 PARSE(printer, parser_ch, &tag); 1599 1600 const char *basic_ty = basic_type(tag); 1601 if (basic_ty) { 1602 return printer_print_str(printer, basic_ty); 1603 } 1604 1605 uint64_t count; 1606 uint64_t lt; 1607 1608 PARSE(printer, parser_push_depth); 1609 switch (tag) { 1610 case 'R': 1611 case 'Q': 1612 PRINT_STR(printer, "&"); 1613 if (printer_eat(printer, 'L')) { 1614 PARSE(printer, parser_integer_62, <); 1615 if (lt != 0) { 1616 PRINT(printer_print_lifetime_from_index(printer, lt)); 1617 PRINT_STR(printer, " "); 1618 } 1619 } 1620 if (tag != 'R') { 1621 PRINT_STR(printer, "mut "); 1622 } 1623 PRINT(printer_print_type(printer)); 1624 break; 1625 case 'P': 1626 case 'O': 1627 PRINT_STR(printer, "*"); 1628 if (tag != 'P') { 1629 PRINT_STR(printer, "mut "); 1630 } else { 1631 PRINT_STR(printer, "const "); 1632 } 1633 PRINT(printer_print_type(printer)); 1634 break; 1635 case 'A': 1636 case 'S': 1637 PRINT_STR(printer, "["); 1638 PRINT(printer_print_type(printer)); 1639 if (tag == 'A') { 1640 PRINT_STR(printer, "; "); 1641 PRINT(printer_print_const(printer, true)); 1642 } 1643 PRINT_STR(printer, "]"); 1644 break; 1645 case 'T': 1646 PRINT_STR(printer, "("); 1647 PRINT_SEP_LIST_COUNT(printer, count, PRINT(printer_print_type(printer)), ", "); 1648 if (count == 1) { 1649 PRINT_STR(printer, ","); 1650 } 1651 PRINT_STR(printer, ")"); 1652 break; 1653 case 'F': 1654 PRINT(printer_in_binder(printer, printer_print_function_type)); 1655 break; 1656 case 'D': 1657 PRINT_STR(printer, "dyn "); 1658 PRINT(printer_in_binder(printer, printer_print_object_bounds)); 1659 1660 if (!printer_eat(printer, 'L')) { 1661 INVALID(printer); 1662 } 1663 PARSE(printer, parser_integer_62, <); 1664 1665 if (lt != 0) { 1666 PRINT_STR(printer, " + "); 1667 PRINT(printer_print_lifetime_from_index(printer, lt)); 1668 } 1669 break; 1670 case 'B': 1671 PRINT(printer_print_backref(printer, printer_print_type_backref, NULL)); 1672 break; 1673 default: 1674 // Go back to the tag, so `print_path` also sees it. 1675 if (printer->status == DemangleOk && /* safety */ printer->parser.next > 0) { 1676 printer->parser.next--; 1677 } 1678 PRINT(printer_print_path(printer, false)); 1679 } 1680 1681 printer_pop_depth(printer); 1682 return OverflowOk; 1683 } 1684 1685 NODISCARD static demangle_status rust_demangle_legacy_demangle(const char *s, size_t s_len, struct demangle_legacy *res, const char **rest) 1686 { 1687 if (s_len > strlen(s)) { 1688 // s_len only exists to shorten the string, this is not a buffer API 1689 return DemangleInvalid; 1690 } 1691 1692 const char *inner; 1693 size_t inner_len; 1694 if (s_len >= 3 && !strncmp(s, "_ZN", 3)) { 1695 inner = s + 3; 1696 inner_len = s_len - 3; 1697 } else if (s_len >= 2 && !strncmp(s, "ZN", 2)) { 1698 // On Windows, dbghelp strips leading underscores, so we accept "ZN...E" 1699 // form too. 1700 inner = s + 2; 1701 inner_len = s_len - 2; 1702 } else if (s_len >= 4 && !strncmp(s, "__ZN", 4)) { 1703 // On OSX, symbols are prefixed with an extra _ 1704 inner = s + 4; 1705 inner_len = s_len - 4; 1706 } else { 1707 return DemangleInvalid; 1708 } 1709 1710 if (!str_isascii(inner, inner_len)) { 1711 return DemangleInvalid; 1712 } 1713 1714 size_t elements = 0; 1715 const char *chars = inner; 1716 size_t chars_len = inner_len; 1717 if (chars_len == 0) { 1718 return DemangleInvalid; 1719 } 1720 char c; 1721 while ((c = *chars) != 'E') { 1722 // Decode an identifier element's length 1723 if (c < '0' || c > '9') { 1724 return DemangleInvalid; 1725 } 1726 size_t len = 0; 1727 while (c >= '0' && c <= '9') { 1728 size_t d = c - '0'; 1729 if (len > SIZE_MAX / 10) { 1730 return DemangleInvalid; 1731 } 1732 len *= 10; 1733 if (len > SIZE_MAX - d) { 1734 return DemangleInvalid; 1735 } 1736 len += d; 1737 1738 chars++; 1739 chars_len--; 1740 if (chars_len == 0) { 1741 return DemangleInvalid; 1742 } 1743 c = *chars; 1744 } 1745 1746 // Advance by the length 1747 if (chars_len <= len) { 1748 return DemangleInvalid; 1749 } 1750 chars += len; 1751 chars_len -= len; 1752 elements++; 1753 } 1754 *res = (struct demangle_legacy) { inner, inner_len, elements }; 1755 *rest = chars + 1; 1756 return DemangleOk; 1757 } 1758 1759 static bool is_rust_hash(const char *s, size_t len) { 1760 if (len == 0 || s[0] != 'h') { 1761 return false; 1762 } 1763 1764 for (size_t i = 1; i < len; i++) { 1765 if (!((s[i] >= '0' && s[i] <= '9') || (s[i] >= 'a' && s[i] <= 'f') || (s[i] >= 'A' && s[i] <= 'F'))) { 1766 return false; 1767 } 1768 } 1769 1770 return true; 1771 } 1772 1773 NODISCARD static overflow_status rust_demangle_legacy_display_demangle(struct demangle_legacy res, char *out, size_t len, bool alternate) 1774 { 1775 struct printer printer = { 1776 // not actually using the parser part of the printer, just keeping it to share the format functions 1777 DemangleOk, 1778 { NULL }, 1779 out, 1780 len, 1781 0, 1782 alternate 1783 }; 1784 const char *inner = res.mangled; 1785 for (size_t element = 0; element < res.elements; element++) { 1786 size_t i = 0; 1787 const char *rest; 1788 for (rest = inner; rest < res.mangled + res.mangled_len && *rest >= '0' && *rest <= '9'; rest++) { 1789 i *= 10; 1790 i += *rest - '0'; 1791 } 1792 if ((size_t)(res.mangled + res.mangled_len - rest) < i) { 1793 // safety: shouldn't reach this place if the input string is validated. bail out. 1794 // safety: we knwo rest <= res.mangled + res.mangled_len from the for-loop above 1795 break; 1796 } 1797 1798 size_t len = i; 1799 inner = rest + len; 1800 1801 // From here on, inner contains a pointer to the next element, rest[:len] to the current one 1802 if (alternate && element + 1 == res.elements && is_rust_hash(rest, i)) { 1803 break; 1804 } 1805 if (element != 0) { 1806 PRINT_STR(&printer, "::"); 1807 } 1808 1809 if (len >= 2 && !strncmp(rest, "_$", 2)) { 1810 rest++; 1811 len--; 1812 } 1813 1814 while (len > 0) { 1815 if (rest[0] == '.') { 1816 if (len >= 2 && rest[1] == '.') { 1817 PRINT_STR(&printer, "::"); 1818 rest += 2; 1819 len -= 2; 1820 } else { 1821 PRINT_STR(&printer, "."); 1822 rest += 1; 1823 len -= 1; 1824 } 1825 } else if (rest[0] == '$') { 1826 const char *escape = memchr(rest + 1, '$', len - 1); 1827 if (escape == NULL) { 1828 break; 1829 } 1830 const char *escape_start = rest + 1; 1831 size_t escape_len = escape - (rest + 1); 1832 1833 size_t next_len = len - (escape + 1 - rest); 1834 const char *next_rest = escape + 1; 1835 1836 char ch; 1837 if ((escape_len == 2 && escape_start[0] == 'S' && escape_start[1] == 'P')) { 1838 ch = '@'; 1839 } else if ((escape_len == 2 && escape_start[0] == 'B' && escape_start[1] == 'P')) { 1840 ch = '*'; 1841 } else if ((escape_len == 2 && escape_start[0] == 'R' && escape_start[1] == 'F')) { 1842 ch = '&'; 1843 } else if ((escape_len == 2 && escape_start[0] == 'L' && escape_start[1] == 'T')) { 1844 ch = '<'; 1845 } else if ((escape_len == 2 && escape_start[0] == 'G' && escape_start[1] == 'T')) { 1846 ch = '>'; 1847 } else if ((escape_len == 2 && escape_start[0] == 'L' && escape_start[1] == 'P')) { 1848 ch = '('; 1849 } else if ((escape_len == 2 && escape_start[0] == 'R' && escape_start[1] == 'P')) { 1850 ch = ')'; 1851 } else if ((escape_len == 1 && escape_start[0] == 'C')) { 1852 ch = ','; 1853 } else { 1854 if (escape_len > 1 && escape_start[0] == 'u') { 1855 escape_start++; 1856 escape_len--; 1857 uint64_t val; 1858 if (try_parse_uint(escape_start, escape_len, &val) 1859 && val < UINT32_MAX 1860 && validate_char((uint32_t)val)) 1861 { 1862 if (!unicode_iscontrol(val)) { 1863 uint8_t wchr[4]; 1864 size_t wchr_len = code_to_utf8(wchr, (uint32_t)val); 1865 PRINT(printer_print_buf(&printer, (const char*)wchr, wchr_len)); 1866 len = next_len; 1867 rest = next_rest; 1868 continue; 1869 } 1870 } 1871 } 1872 break; // print the rest of this element raw 1873 } 1874 PRINT_CH(&printer, ch); 1875 len = next_len; 1876 rest = next_rest; 1877 } else { 1878 size_t j = 0; 1879 for (;j < len && rest[j] != '$' && rest[j] != '.';j++); 1880 if (j == len) { 1881 break; 1882 } 1883 PRINT(printer_print_buf(&printer, rest, j)); 1884 rest += j; 1885 len -= j; 1886 } 1887 } 1888 PRINT(printer_print_buf(&printer, rest, len)); 1889 } 1890 1891 if (printer.out_len < OVERFLOW_MARGIN) { 1892 return OverflowOverflow; 1893 } 1894 *printer.out = '\0'; 1895 return OverflowOk; 1896 } 1897 1898 static bool is_symbol_like(const char *s, size_t len) { 1899 // rust-demangle definition of symbol like: control characters and space are not symbol-like, all else is 1900 for (size_t i = 0; i < len; i++) { 1901 char ch = s[i]; 1902 if (!(ch >= 0x21 && ch <= 0x7e)) { 1903 return false; 1904 } 1905 } 1906 return true; 1907 } 1908 1909 void rust_demangle_demangle(const char *s, struct demangle *res) 1910 { 1911 // During ThinLTO LLVM may import and rename internal symbols, so strip out 1912 // those endings first as they're one of the last manglings applied to symbol 1913 // names. 1914 const char *llvm = ".llvm."; 1915 const char *found_llvm = strstr(s, llvm); 1916 size_t s_len = strlen(s); 1917 if (found_llvm) { 1918 const char *all_hex_ptr = found_llvm + strlen(".llvm."); 1919 bool all_hex = true; 1920 for (;*all_hex_ptr;all_hex_ptr++) { 1921 if (!(('0' <= *all_hex_ptr && *all_hex_ptr <= '9') || 1922 ('A' <= *all_hex_ptr && *all_hex_ptr <= 'F') || 1923 *all_hex_ptr == '@')) { 1924 all_hex = false; 1925 break; 1926 } 1927 } 1928 1929 if (all_hex) { 1930 s_len = found_llvm - s; 1931 } 1932 } 1933 1934 const char *suffix; 1935 struct demangle_legacy legacy; 1936 demangle_status st = rust_demangle_legacy_demangle(s, s_len, &legacy, &suffix); 1937 if (st == DemangleOk) { 1938 *res = (struct demangle) { 1939 .style=DemangleStyleLegacy, 1940 .mangled=legacy.mangled, 1941 .mangled_len=legacy.mangled_len, 1942 .elements=legacy.elements, 1943 .original=s, 1944 .original_len=s_len, 1945 .suffix=suffix, 1946 .suffix_len=s_len - (suffix - s), 1947 }; 1948 } else { 1949 struct demangle_v0 v0; 1950 st = rust_demangle_v0_demangle(s, s_len, &v0, &suffix); 1951 if (st == DemangleOk) { 1952 *res = (struct demangle) { 1953 .style=DemangleStyleV0, 1954 .mangled=v0.mangled, 1955 .mangled_len=v0.mangled_len, 1956 .elements=0, 1957 .original=s, 1958 .original_len=s_len, 1959 .suffix=suffix, 1960 .suffix_len=s_len - (suffix - s), 1961 }; 1962 } else { 1963 *res = (struct demangle) { 1964 .style=DemangleStyleUnknown, 1965 .mangled=NULL, 1966 .mangled_len=0, 1967 .elements=0, 1968 .original=s, 1969 .original_len=s_len, 1970 .suffix=s, 1971 .suffix_len=0, 1972 }; 1973 } 1974 } 1975 1976 // Output like LLVM IR adds extra period-delimited words. See if 1977 // we are in that case and save the trailing words if so. 1978 if (res->suffix_len) { 1979 if (res->suffix[0] == '.' && is_symbol_like(res->suffix, res->suffix_len)) { 1980 // Keep the suffix 1981 } else { 1982 // Reset the suffix and invalidate the demangling 1983 res->style = DemangleStyleUnknown; 1984 res->suffix_len = 0; 1985 } 1986 } 1987 } 1988 1989 bool rust_demangle_is_known(struct demangle *res) { 1990 return res->style != DemangleStyleUnknown; 1991 } 1992 1993 overflow_status rust_demangle_display_demangle(struct demangle const *res, char *out, size_t len, bool alternate) { 1994 size_t original_len = res->original_len; 1995 size_t out_len; 1996 switch (res->style) { 1997 case DemangleStyleUnknown: 1998 if (len < original_len) { 1999 return OverflowOverflow; 2000 } else { 2001 memcpy(out, res->original, original_len); 2002 out += original_len; 2003 len -= original_len; 2004 break; 2005 } 2006 break; 2007 case DemangleStyleLegacy: { 2008 struct demangle_legacy legacy = { 2009 res->mangled, 2010 res->mangled_len, 2011 res->elements 2012 }; 2013 if (rust_demangle_legacy_display_demangle(legacy, out, len, alternate) == OverflowOverflow) { 2014 return OverflowOverflow; 2015 } 2016 out_len = strlen(out); 2017 out += out_len; 2018 len -= out_len; 2019 break; 2020 } 2021 case DemangleStyleV0: { 2022 struct demangle_v0 v0 = { 2023 res->mangled, 2024 res->mangled_len 2025 }; 2026 if (rust_demangle_v0_display_demangle(v0, out, len, alternate) == OverflowOverflow) { 2027 return OverflowOverflow; 2028 } 2029 out_len = strlen(out); 2030 out += out_len; 2031 len -= out_len; 2032 break; 2033 } 2034 } 2035 size_t suffix_len = res->suffix_len; 2036 if (len < suffix_len || len - suffix_len < OVERFLOW_MARGIN) { 2037 return OverflowOverflow; 2038 } 2039 memcpy(out, res->suffix, suffix_len); 2040 out[suffix_len] = 0; 2041 return OverflowOk; 2042 } 2043