1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2021 Jason King 14 * Copyright 2019 Joyent, Inc. 15 */ 16 17 #include <errno.h> 18 #include <langinfo.h> 19 #include <libcustr.h> 20 #include <limits.h> 21 #include <stdarg.h> 22 #include <string.h> 23 24 #include "demangle_int.h" 25 #include "rust.h" 26 27 static void * 28 rust_cualloc(custr_alloc_t *cua, size_t len) 29 { 30 rust_state_t *st = cua->cua_arg; 31 return (zalloc(st->rs_ops, len)); 32 } 33 34 static void 35 rust_cufree(custr_alloc_t *cua, void *p, size_t len) 36 { 37 rust_state_t *st = cua->cua_arg; 38 xfree(st->rs_ops, p, len); 39 } 40 41 static const custr_alloc_ops_t rust_custr_ops = { 42 .custr_ao_alloc = rust_cualloc, 43 .custr_ao_free = rust_cufree 44 }; 45 46 boolean_t 47 rust_appendc(rust_state_t *st, char c) 48 { 49 custr_t *cus = st->rs_demangled; 50 51 if (HAS_ERROR(st)) 52 return (B_FALSE); 53 54 if (st->rs_skip) 55 return (B_TRUE); 56 57 switch (c) { 58 case '\a': 59 return (rust_append(st, "\\a")); 60 case '\b': 61 return (rust_append(st, "\\b")); 62 case '\f': 63 return (rust_append(st, "\\f")); 64 case '\n': 65 return (rust_append(st, "\\n")); 66 case '\r': 67 return (rust_append(st, "\\r")); 68 case '\t': 69 return (rust_append(st, "\\t")); 70 case '\v': 71 return (rust_append(st, "\\v")); 72 case '\\': 73 return (rust_append(st, "\\\\")); 74 } 75 76 if (c < ' ') 77 return (rust_append_printf(st, "\\x%02" PRIx8, (uint8_t)c)); 78 79 if (custr_appendc(cus, c) != 0) { 80 SET_ERROR(st); 81 return (B_FALSE); 82 } 83 84 return (B_TRUE); 85 } 86 87 /* 88 * Append a UTF-8 code point. If we're not in a UTF-8 locale, this gets 89 * appended as '\u<hex codepoint>' otherwise the character itself is 90 * added. 91 */ 92 boolean_t 93 rust_append_utf8_c(rust_state_t *st, uint32_t val) 94 { 95 custr_t *cus = st->rs_demangled; 96 uint_t n = 0; 97 uint8_t c[4] = { 0 }; 98 99 if (HAS_ERROR(st)) 100 return (B_FALSE); 101 102 if (!st->rs_isutf8) { 103 if (val < 0x80) 104 return (rust_appendc(st, (char)val)); 105 if (val < 0x10000) 106 return (rust_append_printf(st, "\\u%04" PRIx32, val)); 107 return (rust_append_printf(st, "\\U%08" PRIx32, val)); 108 } 109 110 if (val < 0x80) { 111 return (rust_appendc(st, (char)val)); 112 } else if (val < 0x800) { 113 c[0] = 0xc0 | ((val >> 6) & 0x1f); 114 c[1] = 0x80 | (val & 0x3f); 115 n = 2; 116 } else if (val < 0x10000) { 117 c[0] = 0xe0 | ((val >> 12) & 0x0f); 118 c[1] = 0x80 | ((val >> 6) & 0x3f); 119 c[2] = 0x80 | (val & 0x3f); 120 n = 3; 121 } else if (val < 0x110000) { 122 c[0] = 0xf0 | ((val >> 18) & 0x7); 123 c[1] = 0x80 | ((val >> 12) & 0x3f); 124 c[2] = 0x80 | ((val >> 6) & 0x3f); 125 c[3] = 0x80 | (val & 0x3f); 126 n = 4; 127 } else { 128 DEMDEBUG("%s: invalid unicode character \\u%" PRIx32, __func__, 129 val); 130 return (B_FALSE); 131 } 132 133 for (uint_t i = 0; i < n; i++) { 134 if (custr_appendc(cus, c[i]) != 0) { 135 SET_ERROR(st); 136 return (B_FALSE); 137 } 138 } 139 140 return (B_TRUE); 141 } 142 143 boolean_t 144 rust_append(rust_state_t *st, const char *s) 145 { 146 custr_t *cus = st->rs_demangled; 147 148 if (HAS_ERROR(st)) 149 return (B_FALSE); 150 151 if (st->rs_skip) 152 return (B_TRUE); 153 154 if (custr_append(cus, s) != 0) { 155 SET_ERROR(st); 156 return (B_FALSE); 157 } 158 159 return (B_TRUE); 160 } 161 162 boolean_t 163 rust_append_sv(rust_state_t *restrict st, uint64_t n, strview_t *restrict sv) 164 { 165 if (HAS_ERROR(st)) 166 return (B_FALSE); 167 168 if (st->rs_skip) { 169 sv_consume_n(sv, (size_t)n); 170 return (B_TRUE); 171 } 172 173 if (n > sv_remaining(sv)) { 174 DEMDEBUG("%s: ERROR amount to append (%" PRIu64 ") > " 175 "remaining bytes (%zu)", __func__, n, sv_remaining(sv)); 176 st->rs_error = ERANGE; 177 return (B_FALSE); 178 } 179 180 if (n > INT_MAX) { 181 DEMDEBUG("%s: amount (%" PRIu64 ") > INT_MAX", __func__, n); 182 st->rs_error = ERANGE; 183 return (B_FALSE); 184 } 185 186 if (custr_append_printf(st->rs_demangled, "%.*s", 187 (int)n, sv->sv_first) != 0) { 188 SET_ERROR(st); 189 return (B_FALSE); 190 } 191 sv_consume_n(sv, (size_t)n); 192 193 return (B_TRUE); 194 } 195 196 boolean_t 197 rust_append_printf(rust_state_t *st, const char *fmt, ...) 198 { 199 va_list ap; 200 int ret; 201 202 if (HAS_ERROR(st)) 203 return (B_FALSE); 204 205 if (st->rs_skip) 206 return (B_TRUE); 207 208 va_start(ap, fmt); 209 ret = custr_append_vprintf(st->rs_demangled, fmt, ap); 210 va_end(ap); 211 212 if (ret == 0) 213 return (B_TRUE); 214 SET_ERROR(st); 215 return (B_FALSE); 216 } 217 218 boolean_t 219 rust_parse_base10(rust_state_t *restrict st, strview_t *restrict sv, 220 uint64_t *restrict valp) 221 { 222 uint64_t v = 0; 223 char c; 224 225 if (HAS_ERROR(st) || sv_remaining(sv) == 0) 226 return (B_FALSE); 227 228 c = sv_peek(sv, 0); 229 230 /* 231 * Since the legacy rust encoding states that it follows the 232 * Itanium C++ mangling format, we match the behavior of the 233 * Itanium C++ ABI in disallowing leading 0s in decimal numbers. 234 * 235 * For Rust encoding v0, RFC2603 currently has omitted the 236 * actual definition of <decimal-number>. However examination of 237 * other implementations written in tandem with the mangling 238 * implementation suggest that <decimal-number> can be expressed 239 * by the eregex: 0|[1-9][0-9]* -- that is a '0' is allowed and 240 * terminates the token, while any other leading digit allows 241 * parsing to continue until a non-digit is encountered, the 242 * end of the string is encountered, or overflow is encountered. 243 */ 244 if (c == '0') { 245 if (st->rs_encver == RUSTENC_V0) { 246 sv_consume_n(sv, 1); 247 *valp = 0; 248 return (B_TRUE); 249 } 250 251 DEMDEBUG("%s: ERROR number starts with leading 0\n", 252 __func__); 253 st->rs_error = EINVAL; 254 return (B_FALSE); 255 } else if (!ISDIGIT(c)) { 256 return (B_FALSE); 257 } 258 259 while (sv_remaining(sv) > 0) { 260 uint64_t cval; 261 262 c = sv_peek(sv, 0); 263 if (!ISDIGIT(c)) 264 break; 265 sv_consume_n(sv, 1); 266 267 cval = c - '0'; 268 269 if (mul_overflow(v, 10, &v)) { 270 DEMDEBUG("%s: multiplication overflowed\n", __func__); 271 st->rs_error = EOVERFLOW; 272 return (B_FALSE); 273 } 274 275 if (add_overflow(v, cval, &v)) { 276 DEMDEBUG("%s: addition overflowed\n", __func__); 277 st->rs_error = EOVERFLOW; 278 return (B_FALSE); 279 } 280 } 281 282 *valp = v; 283 return (B_TRUE); 284 } 285 286 static boolean_t 287 rust_parse_prefix(rust_state_t *restrict st, strview_t *restrict sv) 288 { 289 DEMDEBUG("checking prefix in '%.*s'", SV_PRINT(sv)); 290 291 if (HAS_ERROR(st)) 292 return (B_FALSE); 293 294 if (!sv_consume_if_c(sv, '_')) 295 return (B_FALSE); 296 297 /* 298 * MacOS prepends an additional '_' -- allow that in case 299 * we're given symbols from a MacOS object. 300 */ 301 (void) sv_consume_if_c(sv, '_'); 302 303 if (sv_consume_if_c(sv, 'Z')) { 304 /* 305 * Legacy names must start with '[_]_Z' 306 */ 307 st->rs_encver = RUSTENC_LEGACY; 308 DEMDEBUG("name is encoded using the rust legacy mangling " 309 "scheme"); 310 } else if (sv_consume_if_c(sv, 'R')) { 311 uint64_t ver = 0; 312 313 /* 314 * The non-legacy encoding is versioned. After the initial 315 * 'R' is the version. This isn't spelled out clearly in the 316 * RFC, but many numeric values encoded take an approach of 317 * a value of 0 is omitted, and any digits represent the 318 * value - 1. In other words, in this case, no digits means 319 * version 0, '_R0...' would be version 1, 'R1...' would 320 * be version 2, etc. Currently only version 0 is defined, 321 * but we try to provide a (hopefully) useful message 322 * when debugging, even if we can't use the version value 323 * beyond that. 324 */ 325 if (rust_parse_base10(st, sv, &ver)) { 326 DEMDEBUG("%s: ERROR: an unsupported encoding version " 327 "(%" PRIu64 ") was encountered", ver + 1); 328 st->rs_error = ENOTSUP; 329 return (B_FALSE); 330 } 331 332 st->rs_encver = RUSTENC_V0; 333 DEMDEBUG("name is encoded using the v0 mangling scheme"); 334 } else { 335 DEMDEBUG("did not find a valid rust prefix"); 336 return (B_FALSE); 337 } 338 339 sv_init_sv(&st->rs_orig, sv); 340 return (B_TRUE); 341 } 342 343 static void 344 rust_fini_state(rust_state_t *st) 345 { 346 custr_free(st->rs_demangled); 347 custr_alloc_fini(&st->rs_cualloc); 348 } 349 350 static boolean_t 351 rust_init_state(rust_state_t *restrict st, const char *s, sysdem_ops_t *ops) 352 { 353 const char *codeset; 354 355 (void) memset(st, 0, sizeof (*st)); 356 357 st->rs_str = s; 358 st->rs_ops = ops; 359 360 st->rs_cualloc.cua_version = CUSTR_VERSION; 361 if (custr_alloc_init(&st->rs_cualloc, &rust_custr_ops) != 0) 362 return (B_FALSE); 363 st->rs_cualloc.cua_arg = st; 364 365 if (custr_xalloc(&st->rs_demangled, &st->rs_cualloc) != 0) { 366 custr_alloc_fini(&st->rs_cualloc); 367 return (B_FALSE); 368 } 369 370 codeset = nl_langinfo(CODESET); 371 if (codeset != NULL && strcmp(codeset, "UTF-8") == 0) 372 st->rs_isutf8 = B_TRUE; 373 374 return (B_TRUE); 375 } 376 377 char * 378 rust_demangle(const char *s, size_t len, sysdem_ops_t *ops) 379 { 380 rust_state_t st; 381 strview_t sv = { 0 }; 382 boolean_t success = B_FALSE; 383 int e = 0; 384 char *out = NULL; 385 386 if (!rust_init_state(&st, s, ops)) 387 return (NULL); 388 389 sv_init_str(&sv, s, s + len); 390 391 if (!rust_parse_prefix(&st, &sv)) { 392 if (st.rs_error == 0) 393 st.rs_error = EINVAL; 394 goto done; 395 } 396 397 DEMDEBUG("parsed prefix; remaining string='%.*s'", SV_PRINT(&sv)); 398 399 switch (st.rs_encver) { 400 case RUSTENC_LEGACY: 401 success = rust_demangle_legacy(&st, &sv); 402 break; 403 case RUSTENC_V0: 404 success = rust_demangle_v0(&st, &sv); 405 break; 406 } 407 408 done: 409 if (success) { 410 out = xstrdup(ops, custr_cstr(st.rs_demangled)); 411 if (out == NULL) 412 SET_ERROR(&st); 413 } else { 414 DEMDEBUG("%s: failed, str='%s'", __func__, 415 custr_cstr(st.rs_demangled)); 416 417 st.rs_error = EINVAL; 418 } 419 420 e = st.rs_error; 421 rust_fini_state(&st); 422 if (e > 0) 423 errno = e; 424 425 return (out); 426 } 427