/* * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. */ /* * Copyright 2019 Joyent, Inc. * Copyright 2021 Jason King */ #include #include #include #include #include #include "rust.h" /* * Unfortunately, there is currently no official specification for the legacy * rust name mangling. This is an attempt to document the understanding of the * mangling used here. It is based off examination of * https://docs.rs/rustc-demangle/0.1.13/rustc_demangle/ * * A mangled rust name is: * * * ::= _Z * __Z * * ::= N + [] E * * ::= {len} * * ::= [1-9][0-9]+ * * ::= <[A-Za-z]> <[A-Za-z0-9]>* * * * * ::= '..' # '::' * * ::= $SP$ # '@' * $BP$ # '*' * $RF$ # '&' * $LT$ # '<' * $GT$ # '>' * $LP$ # '(' * $RP$ # ')' * $C$ # ',' * * := h + * * := <[0-9a-f]> */ static const struct rust_charmap { const char *ruc_seq; char ruc_ch; } rust_charmap[] = { { "$SP$", '@' }, { "$BP$", '*' }, { "$RF$", '&' }, { "$LT$", '<' }, { "$GT$", '>' }, { "$LP$", '(' }, { "$RP$", ')' }, { "$C$", ',' }, }; static const size_t rust_charmap_sz = ARRAY_SIZE(rust_charmap); static boolean_t rustleg_valid_sym(const strview_t *); static boolean_t rustleg_parse_name(rust_state_t *, strview_t *); static boolean_t rustleg_parse_hash(rust_state_t *, strview_t *); static boolean_t rustleg_parse_special(rust_state_t *, strview_t *); static boolean_t rustleg_add_sep(rust_state_t *); boolean_t rust_demangle_legacy(rust_state_t *restrict st, strview_t *restrict sv) { /* Make sure the whole thing contains valid characters */ if (!rustleg_valid_sym(sv)) { st->rs_error = EINVAL; return (B_FALSE); } if (sv_peek(sv, -1) != 'E') { DEMDEBUG("ERROR: string does not end with 'E'"); st->rs_error = EINVAL; return (B_FALSE); } if (!rustleg_parse_name(st, sv)) return (B_FALSE); if (sv_remaining(sv) != 0) { DEMDEBUG("ERROR: trailing characters in name"); st->rs_error = EINVAL; return (B_FALSE); } return (B_TRUE); } static boolean_t rustleg_parse_name_segment(rust_state_t *st, strview_t *svp, boolean_t first) { strview_t orig; strview_t name; uint64_t len; size_t rem; boolean_t last = B_FALSE; if (HAS_ERROR(st) || sv_remaining(svp) == 0) return (B_FALSE); sv_init_sv(&orig, svp); if (!rust_parse_base10(st, svp, &len)) { DEMDEBUG("ERROR: no leading length"); st->rs_error = EINVAL; return (B_FALSE); } rem = sv_remaining(svp); if (rem < len) { DEMDEBUG("ERROR: segment length (%" PRIu64 ") > remaining " "bytes in string (%zu)", len, rem); st->rs_error = EINVAL; return (B_FALSE); } /* Is this the last segment before the terminating E? */ if (rem == len + 1) { VERIFY3U(sv_peek(svp, -1), ==, 'E'); last = B_TRUE; } if (!first && !rustleg_add_sep(st)) return (B_FALSE); /* Reduce length of seg to the length we parsed */ (void) sv_init_sv_range(&name, svp, len); DEMDEBUG("%s: segment='%.*s'", __func__, SV_PRINT(&name)); /* * A rust hash starts with 'h', and is the last component of a name * before the terminating 'E'. It is however not always present * in every mangled symbol, and a last segment that starts with 'h' * could be confused for it, so failing to part it just means * we don't have a trailing hash. */ if (sv_peek(&name, 0) == 'h' && last) { if (rustleg_parse_hash(st, &name)) goto done; /* * However any error other than 'not a hash' (e.g. ENOMEM) * means we should fail. */ if (st->rs_error != 0) goto done; } /* A '_' followed by $ is ignored at the start of a name segment */ if (sv_peek(&name, 0) == '_' && sv_peek(&name, 1) == '$') (void) sv_consume_n(&name, 1); while (sv_remaining(&name) > 0) { switch (sv_peek(&name, 0)) { case '$': if (rustleg_parse_special(st, &name)) continue; break; case '.': /* Convert '..' to '::' */ if (sv_peek(&name, 1) != '.') break; if (!rustleg_add_sep(st)) return (B_FALSE); sv_consume_n(&name, 2); continue; default: break; } if (!rust_appendc(st, sv_consume_c(&name))) { SET_ERROR(st); return (B_FALSE); } } done: sv_consume_n(svp, len); VERIFY3P(orig.sv_first, <=, svp->sv_first); DEMDEBUG("%s: consumed '%.*s'", __func__, (int)(uintptr_t)(svp->sv_first - orig.sv_first), orig.sv_first); return (B_TRUE); } /* * Parse N ({num})+ [h 0 && sv_peek(svp, 0) != 'E') { if (!rustleg_parse_name_segment(st, svp, first)) return (B_FALSE); first = B_FALSE; } if (!sv_consume_if_c(svp, 'E')) { DEMDEBUG("%s: ERROR no terminating 'E'", __func__); return (B_FALSE); } VERIFY3P(name.sv_first, <=, svp->sv_first); DEMDEBUG("%s: consumed '%.*s'", __func__, (int)(uintptr_t)(svp->sv_first - name.sv_first), name.sv_first); return (B_TRUE); } static boolean_t rustleg_parse_hash(rust_state_t *st, strview_t *svp) { if (HAS_ERROR(st)) return (B_FALSE); VERIFY(sv_consume_if_c(svp, 'h')); if (!rust_appendc(st, 'h')) return (B_FALSE); while (sv_remaining(svp) > 0) { char c = sv_consume_c(svp); switch (c) { /* * The upper-case hex digits (A-F) are excluded as valid * hash values for several reasons: * * 1. It would result in two different possible names for * the same function, leading to ambiguity in linking (among * other things). * * 2. It would cause potential ambiguity in parsing -- is a * trailing 'E' part of the hash, or the terminating character * in the mangled name? * * 3. No examples were able to be found in the wild where * uppercase digits are used, and other rust demanglers all * seem to assume the hash must contain lower-case hex digits. */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': if (!rust_appendc(st, c)) return (B_FALSE); break; default: return (B_FALSE); } } return (B_TRUE); } static boolean_t rustleg_parse_special(rust_state_t *restrict st, strview_t *restrict svp) { if (HAS_ERROR(st)) return (B_FALSE); if (sv_peek(svp, 0) != '$') return (B_FALSE); for (size_t i = 0; i < rust_charmap_sz; i++) { if (sv_consume_if(svp, rust_charmap[i].ruc_seq)) { if (!rust_appendc(st, rust_charmap[i].ruc_ch)) return (B_FALSE); return (B_TRUE); } } /* Handle $uXXXX$ */ strview_t sv; uint32_t val = 0; uint_t ndigits = 0; sv_init_sv(&sv, svp); /* We peeked at this earlier, so it should still be there */ VERIFY(sv_consume_if_c(&sv, '$')); if (!sv_consume_if_c(&sv, 'u')) return (B_FALSE); while (sv_remaining(&sv) > 0) { uint32_t cval = 0; char c; if (ndigits == 4) return (B_FALSE); c = sv_consume_c(&sv); if (c >= '0' && c <= '9') cval = c - '0'; else if (c >= 'a' && c <= 'f') cval = c - 'a' + 10; else if (c == '$') break; else return (B_FALSE); val <<= 4; val |= cval; ndigits++; } if (!rust_append_utf8_c(st, val)) return (B_FALSE); sv_consume_n(svp, ndigits + 3); return (B_TRUE); } static boolean_t rustleg_add_sep(rust_state_t *st) { if (HAS_ERROR(st)) return (B_FALSE); return (rust_append(st, "::")); } static boolean_t rustleg_valid_sym(const strview_t *sv) { size_t i; for (i = 0; i < sv->sv_rem; i++) { char c = sv->sv_first[i]; if ((c & 0x80) == 0) continue; DEMDEBUG("%s: ERROR found 8-bit character '%c' in '%.*s' " "at index %zu", __func__, c, SV_PRINT(sv), i); return (B_FALSE); } return (B_TRUE); }