1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2019 Joyent, Inc. 14 * Copyright 2021 Jason King 15 */ 16 17 /* BEGIN CSTYLED */ 18 19 /* 20 * This implements the 'symbol_name_mangling_v2' demangling for rust as 21 * described in Rust RFC 2603 as opposed to the original (now called 22 * legacy) mangling older versions of rust used (implemented in rust.c). 23 * 24 * The specification can be viewed at: 25 * https://github.com/rust-lang/rfcs/blob/master/text/2603-rust-symbol-name-mangling-v0.md 26 */ 27 28 /* END CSTYLED */ 29 30 #include <errno.h> 31 #include <libcustr.h> 32 #include <stdarg.h> 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include <string.h> 36 37 #include "rust.h" 38 39 /* 40 * Help track amount of additional output added to rs_demangled across 41 * a function call (to allow that portion to be output for debugging) 42 */ 43 #define SAVE_LEN(_st, _len) _len = custr_len((_st)->rs_demangled) 44 #define CSTR_END(_st, _len) \ 45 ((int)(custr_len((_st)->rs_demangled) - (_len))), \ 46 custr_cstr((_st)->rs_demangled) + (_len) 47 48 typedef enum const_type_class { 49 CTC_INVALID = -1, 50 CTC_UNSIGNED, 51 CTC_SIGNED, 52 CTC_CHAR, 53 CTC_BOOL, 54 } const_type_class_t; 55 56 /* 57 * Sometimes, parsing something is optional. In this case a failure to 58 * parse is fine, however we still want to consider a fatal error as 59 * failure. 60 */ 61 #define OPTIONAL(_st, _f) ((_f) || !HAS_ERROR(_st)) 62 63 static boolean_t rustv0_valid_sym(const strview_t *); 64 static const_type_class_t rustv0_classify_const_type(char); 65 static boolean_t rustv0_parse_hex_num(rust_state_t *restrict, 66 strview_t *restrict, uint64_t *restrict); 67 static boolean_t rustv0_parse_base62(rust_state_t *restrict, 68 strview_t *restrict, uint64_t *restrict); 69 70 static boolean_t rustv0_parse_undisambiguated_identifier( 71 rust_state_t *restrict, strview_t *restrict, boolean_t); 72 static boolean_t rustv0_parse_disambiguator(rust_state_t *restrict, 73 strview_t *restrict, uint64_t *restrict); 74 75 static boolean_t rustv0_parse_path(rust_state_t *restrict, strview_t *restrict, 76 boolean_t); 77 static boolean_t rustv0_parse_impl_path(rust_state_t *restrict, 78 strview_t *restrict, boolean_t); 79 static boolean_t rustv0_parse_nested_path(rust_state_t *restrict, 80 strview_t *restrict, boolean_t); 81 static boolean_t rustv0_parse_basic_type(rust_state_t *restrict, 82 strview_t *restrict); 83 static boolean_t rustv0_parse_backref(rust_state_t *restrict, 84 strview_t *restrict, 85 boolean_t (*)(rust_state_t *restrict, strview_t *restrict, boolean_t), 86 boolean_t); 87 static boolean_t rustv0_parse_lifetime(rust_state_t *restrict, 88 strview_t *restrict); 89 static boolean_t rustv0_parse_const(rust_state_t *restrict, 90 strview_t *restrict, boolean_t); 91 static boolean_t rustv0_parse_fnsig(rust_state_t *restrict, 92 strview_t *restrict); 93 static boolean_t rustv0_parse_dynbounds(rust_state_t *restrict, 94 strview_t *restrict); 95 static boolean_t rustv0_parse_generic_arg(rust_state_t *restrict, 96 strview_t *restrict, boolean_t); 97 98 boolean_t 99 rust_demangle_v0(rust_state_t *restrict st, strview_t *restrict sv) 100 { 101 boolean_t save_skip; 102 boolean_t ret; 103 104 /* Make sure all the characters are valid */ 105 if (!rustv0_valid_sym(sv)) { 106 st->rs_error = EINVAL; 107 return (B_FALSE); 108 } 109 110 /* 111 * <symbol-name> = "_R" [<decimal-number>] <path> 112 * [<instantiating-crate>] 113 * 114 * We've already parsed the prefix in rust_demangle(), as well 115 * as made sure there's no [<decimal-number>] present, so 116 * start with <path>. 117 */ 118 if (!rustv0_parse_path(st, sv, B_TRUE)) 119 return (B_FALSE); 120 121 /* [<instantiating crate>] -- parse but don't save */ 122 SKIP_BEGIN(st, save_skip); 123 ret = OPTIONAL(st, rustv0_parse_path(st, sv, B_FALSE)); 124 SKIP_END(st, save_skip); 125 if (!ret) 126 return (B_FALSE); 127 128 /* If nothing's left, we know we're done */ 129 if (sv_remaining(sv) == 0) 130 return (!HAS_ERROR(st)); 131 132 /* 133 * LLVM sometimes will suffix symbols starting with a '.' 134 * followed by extra data. For things that start with 135 * ".llvm.", we discard the rest of the string. For 136 * other things that start with '.', we copy the 137 * results to the final string. This matches 138 * what the rust native demangler crate does, and 139 * we don't see a reason to deviate from their 140 * behavior. 141 */ 142 if (sv_consume_if(sv, ".llvm.")) 143 return (!HAS_ERROR(st)); 144 145 if (sv_peek(sv, 0) != '.') { 146 DEMDEBUG("%s: Unexpected trailing data at the end of the " 147 "name: '%.*s'", __func__, SV_PRINT(sv)); 148 st->rs_error = EINVAL; 149 return (B_FALSE); 150 } 151 152 return (rust_append_sv(st, sv_remaining(sv), sv)); 153 } 154 155 /* 156 * Parse an optional list terminated by 'E'. Each result of 'fn' is 157 * separated by 'sep' in the output. 158 */ 159 static boolean_t 160 rustv0_parse_opt_list(rust_state_t *restrict st, strview_t *restrict sv, 161 boolean_t (*fn)(rust_state_t *restrict, strview_t *restrict, boolean_t), 162 const char *restrict sep, boolean_t bval, size_t *restrict countp) 163 { 164 size_t count = 0; 165 166 DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv)); 167 168 while (sv_remaining(sv) > 0) { 169 if (sv_consume_if_c(sv, 'E')) { 170 if (countp != NULL) 171 *countp += count; 172 return (B_TRUE); 173 } 174 175 if (count > 0 && !rust_append(st, sep)) 176 return (B_FALSE); 177 178 if (!fn(st, sv, bval)) 179 return (B_FALSE); 180 181 count++; 182 } 183 184 /* 185 * An optional list should terminate with an 'E'. If we get here, 186 * we ran out of charaters and didn't terminate as we should. 187 */ 188 return (B_FALSE); 189 } 190 191 static boolean_t 192 rustv0_parse_uint_type(rust_state_t *restrict st, strview_t *sv) 193 { 194 const char *str = NULL; 195 strview_t save; 196 char c; 197 198 if (HAS_ERROR(st) || sv_remaining(sv) == 0) 199 return (B_FALSE); 200 201 sv_init_sv(&save, sv); 202 203 switch (c = sv_consume_c(sv)) { 204 case 'h': 205 str = "u8"; 206 break; 207 case 't': 208 str = "u16"; 209 break; 210 case 'm': 211 str = "u32"; 212 break; 213 case 'y': 214 str = "u64"; 215 break; 216 case 'o': 217 str = "u128"; 218 break; 219 case 'j': /* usize */ 220 str = "usize"; 221 break; 222 default: 223 sv_init_sv(sv, &save); 224 return (B_FALSE); 225 } 226 227 DEMDEBUG("%s: %c -> %s", __func__, c, str); 228 return (rust_append(st, str)); 229 } 230 231 static boolean_t 232 rustv0_parse_basic_type(rust_state_t *restrict st, strview_t *restrict sv) 233 { 234 const char *str = NULL; 235 strview_t save; 236 char c; 237 238 if (HAS_ERROR(st) || sv_remaining(sv) == 0) 239 return (B_FALSE); 240 241 if (rustv0_parse_uint_type(st, sv)) 242 return (B_TRUE); 243 244 sv_init_sv(&save, sv); 245 246 switch (c = sv_consume_c(sv)) { 247 case 'a': 248 str = "i8"; 249 break; 250 case 'b': 251 str = "bool"; 252 break; 253 case 'c': 254 str = "char"; 255 break; 256 case 'd': 257 str = "f64"; 258 break; 259 case 'e': 260 str = "str"; 261 break; 262 case 'f': 263 str = "f32"; 264 break; 265 case 'i': 266 str = "isize"; 267 break; 268 case 'l': 269 str = "i32"; 270 break; 271 case 'n': 272 str = "i128"; 273 break; 274 case 'p': 275 str = "_"; 276 break; 277 case 's': 278 str = "i16"; 279 break; 280 case 'u': 281 str = "()"; 282 break; 283 case 'v': 284 str = "..."; 285 break; 286 case 'x': 287 str = "i64"; 288 break; 289 case 'z': 290 str = "!"; 291 break; 292 default: 293 sv_init_sv(sv, &save); 294 return (B_FALSE); 295 } 296 297 DEMDEBUG("%s: %c -> %s", __func__, c, str); 298 return (rust_append(st, str)); 299 } 300 301 static boolean_t 302 rustv0_parse_type(rust_state_t *restrict st, strview_t *restrict sv, 303 boolean_t dummy __unused) 304 { 305 strview_t save; 306 size_t len, tuple_elem_count; 307 boolean_t ret; 308 char c; 309 310 if (HAS_ERROR(st)) 311 return (B_FALSE); 312 313 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); 314 315 if (sv_remaining(sv) == 0) 316 return (B_FALSE); 317 318 SAVE_LEN(st, len); 319 sv_init_sv(&save, sv); 320 321 switch (c = sv_consume_c(sv)) { 322 case 'A': 323 ret = rust_appendc(st, '[') && 324 rustv0_parse_type(st, sv, B_FALSE) && 325 rust_append(st, "; ") && 326 rustv0_parse_const(st, sv, B_FALSE) && 327 rust_appendc(st, ']'); 328 break; 329 case 'S': 330 ret = rust_appendc(st, '[') && 331 rustv0_parse_type(st, sv, B_FALSE) && 332 rust_appendc(st, ']'); 333 break; 334 case 'T': 335 tuple_elem_count = 0; 336 ret = rust_appendc(st, '(') && 337 rustv0_parse_opt_list(st, sv, rustv0_parse_type, ", ", 338 B_FALSE, &tuple_elem_count) && 339 rust_append(st, (tuple_elem_count == 1) ? ",)" : ")"); 340 break; 341 case 'R': 342 case 'Q': 343 /* `&mut T` or `&'... mut T` */ 344 if (!(ret = rust_appendc(st, '&'))) 345 break; 346 347 /* 348 * lifetime is optional, but we need to add a trailing 349 * space if present (so we cannot use the OPTIONAL macro). 350 */ 351 if (rustv0_parse_lifetime(st, sv)) { 352 if (!(ret = rust_appendc(st, ' '))) 353 break; 354 } else if (HAS_ERROR(st)) { 355 break; 356 } 357 358 ret = rust_append(st, (c == 'Q') ? "mut " : "") && 359 rustv0_parse_type(st, sv, B_FALSE); 360 break; 361 case 'P': 362 ret = rust_append(st, "*const ") && 363 rustv0_parse_type(st, sv, B_FALSE); 364 break; 365 case 'O': 366 ret = rust_append(st, "*mut ") && 367 rustv0_parse_type(st, sv, B_FALSE); 368 break; 369 case 'F': 370 ret = rustv0_parse_fnsig(st, sv); 371 break; 372 case 'D': 373 ret = rust_append(st, "dyn ") && 374 rustv0_parse_dynbounds(st, sv); 375 if (!ret) 376 break; 377 378 /* 379 * Rust RFC2603 shows the lifetime as required, however 380 * it appears this is optional. 381 */ 382 DEMDEBUG("%s: pre-lifetime: '%*s'", __func__, SV_PRINT(sv)); 383 384 /* 385 * We only want to print a non-zero (non "'_") 386 * lifetime. 387 */ 388 if (sv_consume_if(sv, "L_")) 389 break; 390 391 /* 392 * But if there is a lifetime we want to print, 393 * we want to prepend " + " before it. 394 */ 395 if (sv_peek(sv, 0) == 'L' && 396 !(ret = rust_append(st, " + "))) 397 break; 398 399 ret = rustv0_parse_lifetime(st, sv); 400 break; 401 default: 402 sv_init_sv(sv, &save); 403 404 ret = rustv0_parse_backref(st, sv, rustv0_parse_type, 405 B_FALSE) || 406 rustv0_parse_basic_type(st, sv); 407 if (ret) 408 break; 409 410 ret = rustv0_parse_path(st, sv, B_FALSE); 411 break; 412 } 413 414 DEMDEBUG("%s: type='%.*s' (%s)", __func__, CSTR_END(st, len), 415 ret ? "success" : "fail"); 416 417 return (ret); 418 } 419 420 /* 421 * <path> = "C" <identifier> crate root 422 * | "M" <impl-path> <type> <T> 423 * | "X" <impl-path> <type> <path> <T as Trait> (trait impl) 424 * | "Y" <type> <path> <T as Trait> (trait definition) 425 * | "N" <ns> <path> <identifier> ...::ident (nested path) 426 * | "I" <path> {<generic-arg>} "E" ...<T, U> 427 * | <backref> 428 */ 429 static boolean_t 430 rustv0_parse_path(rust_state_t *restrict st, strview_t *restrict sv, 431 boolean_t in_value) 432 { 433 strview_t save; 434 uint64_t disamb = 0; 435 size_t len; 436 boolean_t ret = B_FALSE; 437 boolean_t save_skip; 438 boolean_t args_stay_save = st->rs_args_stay_open; 439 boolean_t args_open_save = st->rs_args_is_open; 440 441 if (HAS_ERROR(st)) 442 return (B_FALSE); 443 444 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); 445 446 if (sv_remaining(sv) == 0) 447 return (B_FALSE); 448 449 SAVE_LEN(st, len); 450 sv_init_sv(&save, sv); 451 452 switch (sv_consume_c(sv)) { 453 case 'C': 454 if (!OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &disamb))) 455 goto done; 456 457 if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE)) 458 goto done; 459 460 if (st->rs_verbose && 461 !rust_append_printf(st, "[%" PRIx64 "]", disamb)) 462 goto done; 463 break; 464 case 'M': 465 SKIP_BEGIN(st, save_skip); 466 if (!rustv0_parse_impl_path(st, sv, in_value)) { 467 SKIP_END(st, save_skip); 468 goto done; 469 } 470 SKIP_END(st, save_skip); 471 472 if (!rust_appendc(st, '<') || 473 !rustv0_parse_type(st, sv, B_FALSE) || 474 !rust_appendc(st, '>')) 475 goto done; 476 break; 477 case 'X': 478 SKIP_BEGIN(st, save_skip); 479 if (!rustv0_parse_impl_path(st, sv, in_value)) { 480 SKIP_END(st, save_skip); 481 goto done; 482 } 483 SKIP_END(st, save_skip); 484 /*FALLTHRU*/ 485 case 'Y': 486 if (!rust_appendc(st, '<') || 487 !rustv0_parse_type(st, sv, B_FALSE) || 488 !rust_append(st, " as ") || 489 !rustv0_parse_path(st, sv, B_FALSE) || 490 !rust_appendc(st, '>')) 491 goto done; 492 break; 493 case 'N': 494 if (!rustv0_parse_nested_path(st, sv, in_value)) 495 goto done; 496 break; 497 case 'I': 498 st->rs_args_stay_open = B_FALSE; 499 st->rs_args_is_open = B_FALSE; 500 501 if (!rustv0_parse_path(st, sv, in_value)) 502 goto done; 503 504 if (in_value && !rust_append(st, "::")) 505 goto done; 506 507 if (!rust_appendc(st, '<') || 508 !rustv0_parse_opt_list(st, sv, rustv0_parse_generic_arg, 509 ", ", B_FALSE, NULL)) 510 goto done; 511 512 st->rs_args_stay_open = args_stay_save; 513 st->rs_args_is_open = args_open_save; 514 515 /* 516 * If we were asked to not close our list, then don't and 517 * indicate that the list is open. 518 */ 519 if (st->rs_args_stay_open) { 520 st->rs_args_stay_open = B_FALSE; 521 st->rs_args_is_open = B_TRUE; 522 } else if (!rust_appendc(st, '>')) { 523 goto done; 524 } 525 break; 526 default: 527 /* 528 * Didn't recognize the letter, so it has to be a path. Restore 529 * sv to state prior to switch and continue. 530 */ 531 sv_init_sv(sv, &save); 532 if (!rustv0_parse_backref(st, sv, rustv0_parse_path, in_value)) 533 goto done; 534 } 535 536 ret = B_TRUE; 537 538 done: 539 DEMDEBUG("%s: path='%.*s' (%s)", __func__, CSTR_END(st, len), 540 ret ? "success" : "fail"); 541 542 return (ret); 543 } 544 545 static boolean_t 546 rustv0_parse_impl_path(rust_state_t *restrict st, strview_t *restrict sv, 547 boolean_t in_value) 548 { 549 uint64_t val = 0; 550 551 return (OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &val)) && 552 rustv0_parse_path(st, sv, in_value)); 553 } 554 555 /* 556 * A bit of a hack -- when printing a nested path, we need to know 557 * if the identifier is there or not in order to correctly format 558 * the output preceeding it (when present). This peeks ahead and 559 * determines this. 560 */ 561 static boolean_t 562 rustv0_has_name(rust_state_t *restrict st, strview_t *restrict sv, 563 boolean_t *has_namep) 564 { 565 strview_t save; 566 567 if (HAS_ERROR(st)) 568 return (B_FALSE); 569 570 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); 571 572 if (sv_remaining(sv) == 0) 573 return (B_FALSE); 574 575 sv_init_sv(&save, sv); 576 577 /* For checking the length, we don't care if it's punycode or not */ 578 (void) sv_consume_if_c(&save, 'u'); 579 580 if (sv_remaining(sv) == 0) { 581 st->rs_error = EINVAL; 582 return (B_FALSE); 583 } 584 585 if (sv_consume_if_c(&save, '0')) { 586 *has_namep = B_FALSE; 587 return (B_TRUE); 588 } 589 590 *has_namep = B_TRUE; 591 return (B_TRUE); 592 } 593 594 static boolean_t 595 rustv0_parse_nested_path(rust_state_t *restrict st, strview_t *restrict sv, 596 boolean_t in_value) 597 { 598 uint64_t disambiguator = 0; 599 size_t len = 0; 600 char ns; 601 boolean_t ret = B_FALSE; 602 boolean_t has_name; 603 604 if (HAS_ERROR(st)) 605 return (B_FALSE); 606 607 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); 608 609 if (sv_remaining(sv) == 0) 610 return (B_FALSE); 611 612 SAVE_LEN(st, len); 613 614 ns = sv_consume_c(sv); 615 616 if (!rustv0_parse_path(st, sv, in_value)) 617 goto done; 618 619 if (!OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &disambiguator))) 620 goto done; 621 622 if (!rustv0_has_name(st, sv, &has_name)) 623 goto done; 624 625 if (ISUPPER(ns)) { 626 if (!rust_append(st, "::{")) 627 goto done; 628 629 switch (ns) { 630 case 'C': 631 if (!rust_append(st, "closure")) 632 goto done; 633 break; 634 case 'S': 635 if (!rust_append(st, "shim")) 636 goto done; 637 break; 638 default: 639 if (!rust_appendc(st, ns)) 640 goto done; 641 break; 642 } 643 644 if (has_name && !rust_appendc(st, ':')) 645 goto done; 646 647 if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE)) 648 goto done; 649 650 ret = rust_append_printf(st, "#%" PRIu64 "}", disambiguator); 651 } else { 652 if (has_name) { 653 if (!(ret = rust_append(st, "::"))) 654 goto done; 655 } 656 ret = rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE); 657 } 658 659 done: 660 DEMDEBUG("%s: nested path = '%.*s' (%s)", __func__, CSTR_END(st, len), 661 ret ? "success" : "fail"); 662 663 return (ret); 664 } 665 666 /* 667 * <disambiguator> = "s" <base-64-number> 668 * 669 */ 670 static boolean_t 671 rustv0_parse_disambiguator(rust_state_t *restrict st, strview_t *restrict sv, 672 uint64_t *valp) 673 { 674 if (HAS_ERROR(st) || sv_remaining(sv) < 2) 675 return (B_FALSE); 676 677 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); 678 679 *valp = 0; 680 681 if (!sv_consume_if_c(sv, 's')) 682 return (B_FALSE); 683 684 if (!rustv0_parse_base62(st, sv, valp)) { 685 st->rs_error = EINVAL; 686 return (B_FALSE); 687 } 688 689 /* 690 * Rust RFC 2603 details this in Appendix A, but not the main 691 * portion of the RFC. If no disambiguator is present, the value 692 * is 0, if the decoded value is 0, the index is 1, ... 693 * rustv0_parse_base62() already adjusts _ -> 0, 0 -> 1, so we 694 * only need to add one here to complete the adjustment. 695 */ 696 *valp = *valp + 1; 697 698 DEMDEBUG("%s: disambiguator=%" PRIu64, __func__, *valp); 699 return (B_TRUE); 700 } 701 702 /* <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> */ 703 static boolean_t 704 rustv0_parse_undisambiguated_identifier(rust_state_t *restrict st, 705 strview_t *restrict sv, boolean_t repl_underscore) 706 { 707 uint64_t len = 0; 708 boolean_t puny = B_FALSE; 709 710 if (HAS_ERROR(st)) 711 return (B_FALSE); 712 713 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); 714 715 if (sv_remaining(sv) == 0) 716 return (B_FALSE); 717 718 if (sv_consume_if_c(sv, 'u')) 719 puny = B_TRUE; 720 721 if (!rust_parse_base10(st, sv, &len)) 722 return (B_FALSE); 723 724 /* skip optional separator '_' */ 725 (void) sv_consume_if_c(sv, '_'); 726 727 if (sv_remaining(sv) < len) { 728 DEMDEBUG("%s: ERROR: identifier length (%" PRIu64 ") " 729 "> remaining bytes (%zu)", __func__, len, 730 sv_remaining(sv)); 731 return (B_FALSE); 732 } 733 734 /* 0 length identifiers are acceptable */ 735 if (len == 0) 736 return (B_TRUE); 737 738 if (puny) { 739 strview_t ident; 740 741 sv_init_sv_range(&ident, sv, len); 742 if (!rustv0_puny_decode(st, &ident, repl_underscore)) 743 return (B_FALSE); 744 745 sv_consume_n(sv, len); 746 return (B_TRUE); 747 } 748 749 /* 750 * rust identifiers do not contain '-'. However ABI identifiers 751 * are allowed to contain them (e.g. extern "foo-bar" fn ...). 752 * They are substituted with '_' in the mangled output. If we 753 * do not need to reverse this, we can just append 'len' bytes 754 * of sv. Otherwise we need to go through and reverse this 755 * substitution. 756 */ 757 if (!repl_underscore) 758 return (rust_append_sv(st, len, sv)); 759 760 /* 761 * We checked earlier that len < sv_remaining(sv); so this loop 762 * cannot overrun. 763 */ 764 for (size_t i = 0; i < len; i++) { 765 char c = sv_consume_c(sv); 766 767 if (c == '_') 768 c = '-'; 769 770 if (!rust_appendc(st, c)) 771 return (B_FALSE); 772 } 773 774 return (B_TRUE); 775 } 776 777 /* <backref> = "B" <base-62-number> */ 778 static boolean_t 779 rustv0_parse_backref(rust_state_t *restrict st, strview_t *restrict sv, 780 boolean_t (*fn)(rust_state_t *restrict, strview_t *restrict, boolean_t b), 781 boolean_t bval) 782 { 783 strview_t backref; 784 strview_t target; 785 uint64_t idx = 0; 786 size_t save_len; 787 size_t len; 788 789 if (HAS_ERROR(st)) 790 return (B_FALSE); 791 792 sv_init_sv(&backref, sv); 793 794 if (!sv_consume_if_c(sv, 'B')) 795 return (B_FALSE); 796 797 DEMDEBUG("%s: str='B%.*s'", __func__, SV_PRINT(sv)); 798 799 if (!rustv0_parse_base62(st, sv, &idx)) { 800 st->rs_error = EINVAL; 801 return (B_FALSE); 802 } 803 804 /* 805 * Determine how many bytes we've consumed (up to the start of 806 * the current backref token). 807 */ 808 VERIFY3P(backref.sv_first, >=, st->rs_orig.sv_first); 809 len = (size_t)(uintptr_t)(backref.sv_first - st->rs_orig.sv_first); 810 811 /* 812 * The backref can only refer to an index prior to the start of 813 * the current backref token -- that is must always refer back in 814 * the string, never to the current position or beyond. 815 */ 816 if (idx >= len) { 817 DEMDEBUG("%s: ERROR: backref index (%" PRIu64 ") " 818 "is out of range [0, %zu)", __func__, idx, len); 819 st->rs_error = ERANGE; 820 return (B_FALSE); 821 } 822 823 /* 824 * Create a strview_t of the original string (sans prefix) by 825 * copying from st->rs_orig. The length of the target strview_t is 826 * capped to end immediately prior to this backref token. Since we 827 * enforce that backrefs must always refer to already processed 828 * portions of the string (i.e. must always refer backwards), and the 829 * length of the strview_t is set to end prior to the start of this 830 * backref token, we guarantee processing of a backref will always 831 * terminate before it can possibly encounter this backref token 832 * and cause a loop -- either the processing terminates normally or 833 * it reaches the end of the capped strview_t. 834 */ 835 sv_init_sv_range(&target, &st->rs_orig, len); 836 837 /* 838 * Consume all the input in the target strview_t up to the index 839 */ 840 sv_consume_n(&target, idx); 841 842 DEMDEBUG("%s: backref starting at %" PRIu64 " str='%.*s'%s", __func__, 843 idx, SV_PRINT(&target), st->rs_skip ? " (skipping)" : ""); 844 845 /* 846 * If we're skipping the output, there's no reason to bother reparsing 847 * the output -- we're not going to save it. We still setup everything 848 * so that the debug output is still emitted. 849 */ 850 if (st->rs_skip) 851 return (B_TRUE); 852 853 SAVE_LEN(st, save_len); 854 if (!fn(st, &target, bval)) 855 return (B_FALSE); 856 857 DEMDEBUG("%s: backref is '%.*s'", __func__, CSTR_END(st, save_len)); 858 return (B_TRUE); 859 } 860 861 static boolean_t 862 rustv0_append_lifetime(rust_state_t *restrict st, uint64_t lifetime) 863 { 864 uint64_t bound_lt; 865 866 if (HAS_ERROR(st)) 867 return (B_FALSE); 868 869 if (!rust_appendc(st, '\'')) 870 return (B_FALSE); 871 872 if (lifetime == 0) 873 return (rust_appendc(st, '_')); 874 875 if (sub_overflow(st->rs_lt_depth, lifetime, &bound_lt)) { 876 DEMDEBUG("%s: ERROR: lifetime value %" PRIu64 877 " > current depth %" PRIu64, __func__, lifetime, 878 st->rs_lt_depth); 879 st->rs_lt_depth = ERANGE; 880 return (B_FALSE); 881 } 882 883 /* 884 * Use 'a, 'b, ... 885 */ 886 if (bound_lt < 26) { 887 char c = (char)bound_lt + 'a'; 888 return (rust_append_printf(st, "%c", c)); 889 } 890 891 /* 892 * Otherwise, use '_123, '_456, ... 893 */ 894 return (rust_append_printf(st, "_%" PRIu64, bound_lt)); 895 } 896 897 static boolean_t 898 rustv0_parse_lifetime(rust_state_t *restrict st, strview_t *restrict sv) 899 { 900 uint64_t lifetime; 901 902 if (!sv_consume_if_c(sv, 'L')) 903 return (B_FALSE); 904 905 if (!rustv0_parse_base62(st, sv, &lifetime)) 906 return (B_FALSE); 907 908 return (rustv0_append_lifetime(st, lifetime)); 909 } 910 911 static boolean_t 912 rustv0_parse_const_data(rust_state_t *restrict st, 913 const_type_class_t type_class, strview_t *restrict sv) 914 { 915 uint64_t val = 0; 916 size_t save_len; 917 boolean_t neg = B_FALSE; 918 boolean_t ret = B_FALSE; 919 920 VERIFY3S(type_class, !=, CTC_INVALID); 921 922 if (HAS_ERROR(st)) 923 return (B_FALSE); 924 925 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); 926 SAVE_LEN(st, save_len); 927 928 if (sv_remaining(sv) == 0) 929 return (B_FALSE); 930 931 if (type_class == CTC_SIGNED && sv_consume_if_c(sv, 'n')) 932 neg = B_TRUE; 933 934 ret = OPTIONAL(st, rustv0_parse_hex_num(st, sv, &val)) && 935 sv_consume_if_c(sv, '_'); 936 if (!ret) 937 goto done; 938 939 switch (type_class) { 940 case CTC_SIGNED: 941 case CTC_UNSIGNED: 942 ret = rust_append_printf(st, "%s%" PRIu64, neg ? "-" : "", val); 943 break; 944 case CTC_BOOL: 945 if (val > 1) { 946 DEMDEBUG("%s: invalid bool val %" PRIu64, __func__, 947 val); 948 ret = B_FALSE; 949 break; 950 } 951 ret = rust_append_printf(st, "%s", 952 (val == 0) ? "false" : "true"); 953 break; 954 case CTC_CHAR: 955 if (val > UINT32_MAX) { 956 DEMDEBUG("%s: char value %" PRIu64 " out of range", 957 __func__, val); 958 ret = B_FALSE; 959 break; 960 } 961 962 ret = rust_appendc(st, '\'') && rust_append_utf8_c(st, val) && 963 rust_appendc(st, '\''); 964 break; 965 default: 966 ret = B_FALSE; 967 } 968 969 done: 970 DEMDEBUG("%s: const='%.*s' (%s)", __func__, CSTR_END(st, save_len), 971 ret ? "success" : "fail"); 972 973 return (ret); 974 } 975 976 static boolean_t 977 rustv0_parse_const(rust_state_t *restrict st, strview_t *restrict sv, 978 boolean_t dummy __unused) 979 { 980 strview_t type; 981 size_t start_len; 982 const_type_class_t ctype_class; 983 char ctype; 984 boolean_t save_skip; 985 boolean_t ret; 986 987 if (HAS_ERROR(st)) 988 return (B_FALSE); 989 990 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); 991 SAVE_LEN(st, start_len); 992 993 if (sv_remaining(sv) == 0) 994 return (B_FALSE); 995 996 if (rustv0_parse_backref(st, sv, rustv0_parse_const, B_FALSE)) 997 return (B_TRUE); 998 999 if (sv_consume_if_c(sv, 'p')) { 1000 ret = rust_appendc(st, '_'); 1001 goto done; 1002 } 1003 1004 ctype = sv_peek(sv, 0); 1005 ctype_class = rustv0_classify_const_type(ctype); 1006 if (ctype_class == CTC_INVALID) { 1007 DEMDEBUG("%s: const type isn't a valid const generic type", 1008 __func__); 1009 return (B_FALSE); 1010 } 1011 1012 /* 1013 * This isn't spelled out clearly in Rust RFC 2603, but currently 1014 * only unsigned int types are allowed at this point. However, we 1015 * have a bit of a potential tricky situation. Unlike formatting 1016 * the other tokens, if we want to display the type, we do so 1017 * _after_ the value, even though the type appears first. 1018 * 1019 * This is bit of a hack, but we save off the input position from 1020 * sv before the parse the type. We then parse it without saving 1021 * the resulting value, then parse and output the constant. If 1022 * we wish to then display the type, we can go back and parse 1023 * the type again, this time saving the result. 1024 */ 1025 sv_init_sv(&type, sv); 1026 1027 SKIP_BEGIN(st, save_skip); 1028 ret = rustv0_parse_type(st, sv, B_FALSE); 1029 SKIP_END(st, save_skip); 1030 1031 if (!ret) { 1032 DEMDEBUG("%s: const type isn't valid", __func__); 1033 return (B_FALSE); 1034 } 1035 1036 if (sv_consume_if_c(sv, 'p')) { 1037 ret = rust_appendc(st, '_'); 1038 } else { 1039 ret = rustv0_parse_const_data(st, ctype_class, sv); 1040 } 1041 if (!ret) 1042 goto done; 1043 1044 if (st->rs_show_const_type) { 1045 ret = rust_append(st, ": ") && 1046 rustv0_parse_uint_type(st, &type); 1047 } 1048 1049 done: 1050 DEMDEBUG("%s: const='%.*s' (%s)", __func__, CSTR_END(st, start_len), 1051 ret ? "success" : "fail"); 1052 return (ret); 1053 } 1054 1055 static boolean_t 1056 rustv0_parse_abi(rust_state_t *restrict st, strview_t *restrict sv) 1057 { 1058 DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv)); 1059 1060 if (sv_consume_if_c(sv, 'C')) 1061 return (rust_appendc(st, 'C')); 1062 1063 return (rustv0_parse_undisambiguated_identifier(st, sv, B_TRUE)); 1064 } 1065 1066 static boolean_t 1067 rustv0_parse_binder(rust_state_t *restrict st, strview_t *restrict sv) 1068 { 1069 uint64_t n, i; 1070 1071 if (!sv_consume_if_c(sv, 'G')) 1072 return (B_FALSE); 1073 1074 if (!rustv0_parse_base62(st, sv, &n)) 1075 return (B_FALSE); 1076 n += 1; 1077 1078 if (!rust_append(st, "for<")) 1079 return (B_FALSE); 1080 1081 for (i = 0; i < n; i++) { 1082 if (i > 0 && !rust_append(st, ", ")) 1083 return (B_FALSE); 1084 1085 st->rs_lt_depth++; 1086 if (!rustv0_append_lifetime(st, 1)) 1087 return (B_FALSE); 1088 } 1089 1090 if (!rust_append(st, "> ")) 1091 return (B_FALSE); 1092 1093 return (B_TRUE); 1094 } 1095 1096 /* 1097 * <fn-sig> := [<binder>] ["U"] ["K" <abi>] {type} "E" <type> 1098 * 1099 * Note that while the Rust RFC states the binder is manditory, based on 1100 * actual examples, and comparing with the rust-based demangler, it is in 1101 * fact optional. 1102 */ 1103 static boolean_t 1104 rustv0_parse_fnsig(rust_state_t *restrict st, strview_t *restrict sv) 1105 { 1106 uint64_t save_lt = st->rs_lt_depth; 1107 1108 DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv)); 1109 1110 if (!OPTIONAL(st, rustv0_parse_binder(st, sv))) 1111 return (B_FALSE); 1112 1113 if (sv_consume_if_c(sv, 'U') && !rust_append(st, "unsafe ")) 1114 return (B_FALSE); 1115 1116 if (sv_consume_if_c(sv, 'K') && 1117 (!rust_append(st, "extern \"") || !rustv0_parse_abi(st, sv) || 1118 !rust_append(st, "\" "))) 1119 return (B_FALSE); 1120 1121 if (!rust_append(st, "fn(")) 1122 return (B_FALSE); 1123 1124 if (!rustv0_parse_opt_list(st, sv, rustv0_parse_type, ", ", B_FALSE, 1125 NULL)) { 1126 return (B_FALSE); 1127 } 1128 1129 if (!rust_appendc(st, ')')) 1130 return (B_FALSE); 1131 1132 /* If the return type is (), don't print it */ 1133 if (!sv_consume_if_c(sv, 'u')) { 1134 if (!rust_append(st, " -> ")) 1135 return (B_FALSE); 1136 1137 if (!rustv0_parse_type(st, sv, B_FALSE)) 1138 return (B_FALSE); 1139 } 1140 1141 st->rs_lt_depth = save_lt; 1142 1143 return (B_TRUE); 1144 } 1145 1146 /* 1147 * <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type> 1148 */ 1149 static boolean_t 1150 rustv0_parse_dyn_trait_assoc_binding(rust_state_t *restrict st, 1151 strview_t *restrict sv, boolean_t open) 1152 { 1153 size_t save_len; 1154 1155 if (HAS_ERROR(st)) 1156 return (B_FALSE); 1157 1158 if (sv_remaining(sv) == 0) 1159 return (B_FALSE); 1160 1161 if (!sv_consume_if_c(sv, 'p')) 1162 return (B_FALSE); 1163 1164 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); 1165 SAVE_LEN(st, save_len); 1166 1167 if (!rust_append(st, open ? ", " : "<")) 1168 return (B_FALSE); 1169 1170 if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE)) { 1171 st->rs_error = EINVAL; 1172 return (B_FALSE); 1173 } 1174 1175 if (!rust_append(st, " = ")) 1176 return (B_FALSE); 1177 1178 if (!rustv0_parse_type(st, sv, B_FALSE)) { 1179 st->rs_error = EINVAL; 1180 return (B_FALSE); 1181 } 1182 1183 DEMDEBUG("%s: binding='%.*s'", __func__, CSTR_END(st, save_len)); 1184 1185 return (B_TRUE); 1186 } 1187 1188 static boolean_t 1189 rustv0_parse_dyn_trait(rust_state_t *restrict st, strview_t *restrict sv, 1190 boolean_t dummy __unused) 1191 { 1192 boolean_t stay_save = st->rs_args_stay_open; 1193 boolean_t open_save = st->rs_args_is_open; 1194 boolean_t open = B_FALSE; 1195 1196 if (HAS_ERROR(st)) 1197 return (B_FALSE); 1198 1199 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); 1200 1201 /* 1202 * This is a bit subtle, but when formatting a trait in trait, 1203 * we want something like this: 1204 * 1205 * dyn Trait<T, U, Assoc=X> 1206 * 1207 * instead of 1208 * 1209 * dyn Trait<T, U, <Assoc=X>> 1210 * 1211 * So when parsing the path, if we encounter generic arguments, we want 1212 * the arg list to remain open at the end of processing the path so 1213 * we can append the bindings to it. We set rs_args_stay_open to B_TRUE 1214 * to indidcate to rustv0_parse_path() that a generic argument list 1215 * should not be closed (i.e. don't append a '>' at the end of the 1216 * list). If rustv0_parse_path() encounters a list of generic arguments, 1217 * it will also set rs->args_is_open to indiciate it opened the list. 1218 * We save this in 'open' so that when we process the associated 1219 * bindings, we know if we need to open the list on the first binding 1220 * or not -- we don't want 'dyn Trait<>' if there are no bindings, 1221 * just 'dyn Trait'. 1222 */ 1223 st->rs_args_stay_open = B_TRUE; 1224 st->rs_args_is_open = B_FALSE; 1225 1226 if (!rustv0_parse_path(st, sv, B_FALSE)) { 1227 st->rs_args_stay_open = stay_save; 1228 st->rs_args_is_open = open_save; 1229 return (B_FALSE); 1230 } 1231 1232 open = st->rs_args_is_open; 1233 1234 st->rs_args_stay_open = stay_save; 1235 st->rs_args_is_open = open_save; 1236 1237 while (rustv0_parse_dyn_trait_assoc_binding(st, sv, open)) { 1238 open = B_TRUE; 1239 } 1240 1241 if (HAS_ERROR(st)) 1242 return (B_FALSE); 1243 1244 if (open && !rust_appendc(st, '>')) 1245 return (B_FALSE); 1246 1247 return (!HAS_ERROR(st)); 1248 } 1249 1250 static boolean_t 1251 rustv0_parse_dynbounds(rust_state_t *restrict st, strview_t *restrict sv) 1252 { 1253 uint64_t save_lt = st->rs_lt_depth; 1254 1255 if (HAS_ERROR(st)) 1256 return (B_FALSE); 1257 1258 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); 1259 1260 /* 1261 * This is another case where Rust RFC2603 seems to disagree with 1262 * the implementation. The RFC implies this is mandatory, while 1263 * the implementations treat it as optional. 1264 */ 1265 if (!OPTIONAL(st, rustv0_parse_binder(st, sv))) 1266 return (B_FALSE); 1267 1268 if (!rustv0_parse_opt_list(st, sv, rustv0_parse_dyn_trait, " + ", 1269 B_FALSE, NULL)) 1270 return (B_FALSE); 1271 1272 st->rs_lt_depth = save_lt; 1273 1274 return (B_TRUE); 1275 } 1276 1277 static boolean_t 1278 rustv0_parse_generic_arg(rust_state_t *restrict st, strview_t *restrict sv, 1279 boolean_t dummy __unused) 1280 { 1281 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); 1282 1283 if (sv_consume_if_c(sv, 'K')) 1284 return (rustv0_parse_const(st, sv, B_FALSE)); 1285 1286 if (rustv0_parse_lifetime(st, sv)) 1287 return (B_TRUE); 1288 1289 return (rustv0_parse_type(st, sv, B_FALSE)); 1290 } 1291 1292 /* 1293 * Parse a hex value into *valp. Note that rust only uses lower case 1294 * hex values. 1295 */ 1296 static boolean_t 1297 rustv0_parse_hex_num(rust_state_t *restrict st, strview_t *restrict sv, 1298 uint64_t *restrict valp) 1299 { 1300 uint64_t val = 0; 1301 size_t ndigits = 0; 1302 1303 if (HAS_ERROR(st)) 1304 return (B_FALSE); 1305 1306 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); 1307 1308 if (sv_remaining(sv) == 0) 1309 return (B_FALSE); 1310 1311 /* 1312 * Unfortunately, Rust RFC 2603 also doesn't not explicty define 1313 * {hex-digits}. We follow what decimal digits does, and treat a 1314 * leading 0 as a terminator. 1315 */ 1316 while (sv_remaining(sv) > 0) { 1317 char c = sv_peek(sv, 0); 1318 1319 if (ISDIGIT(c)) { 1320 val *= 16; 1321 val += c - '0'; 1322 } else if (c >= 'a' && c <= 'f') { 1323 val *= 16; 1324 val += c - 'a' + 10; 1325 } else { 1326 break; 1327 } 1328 1329 sv_consume_n(sv, 1); 1330 1331 if (++ndigits == 1 && val == 0) 1332 break; 1333 } 1334 1335 if (ndigits > 0) 1336 *valp = val; 1337 1338 return ((ndigits > 0) ? B_TRUE : B_FALSE); 1339 } 1340 1341 /* 1342 * Parse a base62 number into *valp. The number is explicitly terminated 1343 * by a '_'. The values are also offset by 0 -- that is '_' == 0, 1344 * '0_' == 1, ... 1345 */ 1346 static boolean_t 1347 rustv0_parse_base62(rust_state_t *restrict st, strview_t *restrict sv, 1348 uint64_t *restrict valp) 1349 { 1350 uint64_t val = 0; 1351 char c; 1352 1353 if (HAS_ERROR(st)) 1354 return (B_FALSE); 1355 1356 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv)); 1357 1358 if (sv_remaining(sv) == 0) 1359 return (B_FALSE); 1360 1361 /* A terminating '_' without any digits is 0 */ 1362 if (sv_consume_if_c(sv, '_')) { 1363 *valp = 0; 1364 return (B_TRUE); 1365 } 1366 1367 /* Need at least one valid digit if > 0 */ 1368 if (!ISALNUM(sv_peek(sv, 0))) 1369 return (B_FALSE); 1370 1371 while (sv_remaining(sv) > 0) { 1372 c = sv_consume_c(sv); 1373 1374 if (c == '_') { 1375 /* 1376 * Because a lone '_' was already handled earlier, 1377 * we know we've had at least one other digit and 1378 * can increment the value and return. 1379 */ 1380 *valp = val + 1; 1381 return (B_TRUE); 1382 } else if (ISDIGIT(c)) { 1383 val *= 62; 1384 val += c - '0'; 1385 } else if (ISLOWER(c)) { 1386 val *= 62; 1387 val += c - 'a' + 10; 1388 } else if (ISUPPER(c)) { 1389 val *= 62; 1390 val += c - 'A' + 36; 1391 } else { 1392 return (B_FALSE); 1393 } 1394 } 1395 1396 /* We reached the end of the string without a terminating _ */ 1397 return (B_FALSE); 1398 } 1399 1400 static const_type_class_t 1401 rustv0_classify_const_type(char type) 1402 { 1403 switch (type) { 1404 case 'h': case 't': case 'm': case 'y': case 'o': case 'j': 1405 return (CTC_UNSIGNED); 1406 case 'a': case 'i': case 'l': case 'n': case 's': case 'x': 1407 return (CTC_SIGNED); 1408 case 'b': 1409 return (CTC_BOOL); 1410 case 'c': 1411 return (CTC_CHAR); 1412 default: 1413 return (CTC_INVALID); 1414 } 1415 } 1416 1417 /* 1418 * Make sure the name is a plausible mangled rust symbol. 1419 * Non-ASCII are never allowed. Rust itself uses [_0-9A-Za-z], however 1420 * some things will add a suffix starting with a '.' (e.g. LLVM thin LTO). 1421 * As such we proceed in two phases. We first only allow [_0-9A-Z-az] until 1422 * we encounter a '.'. At that point, any ASCII character is allowed. 1423 */ 1424 static boolean_t 1425 rustv0_valid_sym(const strview_t *sv) 1426 { 1427 size_t i; 1428 boolean_t check_rust = B_TRUE; 1429 1430 for (i = 0; i < sv->sv_rem; i++) { 1431 char c = sv->sv_first[i]; 1432 1433 if (ISALNUM(c) || c == '_') 1434 continue; 1435 1436 if (c == '.') { 1437 check_rust = B_FALSE; 1438 continue; 1439 } 1440 1441 if (check_rust || (c & 0x80) != 0) { 1442 DEMDEBUG("%s: ERROR found invalid character '%c' " 1443 "in '%.*s' at index %zu", 1444 __func__, c, SV_PRINT(sv), i); 1445 return (B_FALSE); 1446 } 1447 } 1448 return (B_TRUE); 1449 } 1450