1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2021 Jason King
14 * Copyright 2019 Joyent, Inc.
15 */
16
17 #include <errno.h>
18 #include <langinfo.h>
19 #include <libcustr.h>
20 #include <limits.h>
21 #include <stdarg.h>
22 #include <string.h>
23
24 #include "demangle_int.h"
25 #include "rust.h"
26
27 static void *
rust_cualloc(custr_alloc_t * cua,size_t len)28 rust_cualloc(custr_alloc_t *cua, size_t len)
29 {
30 rust_state_t *st = cua->cua_arg;
31 return (zalloc(st->rs_ops, len));
32 }
33
34 static void
rust_cufree(custr_alloc_t * cua,void * p,size_t len)35 rust_cufree(custr_alloc_t *cua, void *p, size_t len)
36 {
37 rust_state_t *st = cua->cua_arg;
38 xfree(st->rs_ops, p, len);
39 }
40
41 static const custr_alloc_ops_t rust_custr_ops = {
42 .custr_ao_alloc = rust_cualloc,
43 .custr_ao_free = rust_cufree
44 };
45
46 boolean_t
rust_appendc(rust_state_t * st,char c)47 rust_appendc(rust_state_t *st, char c)
48 {
49 custr_t *cus = st->rs_demangled;
50
51 if (HAS_ERROR(st))
52 return (B_FALSE);
53
54 if (st->rs_skip)
55 return (B_TRUE);
56
57 switch (c) {
58 case '\a':
59 return (rust_append(st, "\\a"));
60 case '\b':
61 return (rust_append(st, "\\b"));
62 case '\f':
63 return (rust_append(st, "\\f"));
64 case '\n':
65 return (rust_append(st, "\\n"));
66 case '\r':
67 return (rust_append(st, "\\r"));
68 case '\t':
69 return (rust_append(st, "\\t"));
70 case '\v':
71 return (rust_append(st, "\\v"));
72 case '\\':
73 return (rust_append(st, "\\\\"));
74 }
75
76 if (c < ' ')
77 return (rust_append_printf(st, "\\x%02" PRIx8, (uint8_t)c));
78
79 if (custr_appendc(cus, c) != 0) {
80 SET_ERROR(st);
81 return (B_FALSE);
82 }
83
84 return (B_TRUE);
85 }
86
87 /*
88 * Append a UTF-8 code point. If we're not in a UTF-8 locale, this gets
89 * appended as '\u<hex codepoint>' otherwise the character itself is
90 * added.
91 */
92 boolean_t
rust_append_utf8_c(rust_state_t * st,uint32_t val)93 rust_append_utf8_c(rust_state_t *st, uint32_t val)
94 {
95 custr_t *cus = st->rs_demangled;
96 uint_t n = 0;
97 uint8_t c[4] = { 0 };
98
99 if (HAS_ERROR(st))
100 return (B_FALSE);
101
102 if (!st->rs_isutf8) {
103 if (val < 0x80)
104 return (rust_appendc(st, (char)val));
105 if (val < 0x10000)
106 return (rust_append_printf(st, "\\u%04" PRIx32, val));
107 return (rust_append_printf(st, "\\U%08" PRIx32, val));
108 }
109
110 if (val < 0x80) {
111 return (rust_appendc(st, (char)val));
112 } else if (val < 0x800) {
113 c[0] = 0xc0 | ((val >> 6) & 0x1f);
114 c[1] = 0x80 | (val & 0x3f);
115 n = 2;
116 } else if (val < 0x10000) {
117 c[0] = 0xe0 | ((val >> 12) & 0x0f);
118 c[1] = 0x80 | ((val >> 6) & 0x3f);
119 c[2] = 0x80 | (val & 0x3f);
120 n = 3;
121 } else if (val < 0x110000) {
122 c[0] = 0xf0 | ((val >> 18) & 0x7);
123 c[1] = 0x80 | ((val >> 12) & 0x3f);
124 c[2] = 0x80 | ((val >> 6) & 0x3f);
125 c[3] = 0x80 | (val & 0x3f);
126 n = 4;
127 } else {
128 DEMDEBUG("%s: invalid unicode character \\u%" PRIx32, __func__,
129 val);
130 return (B_FALSE);
131 }
132
133 for (uint_t i = 0; i < n; i++) {
134 if (custr_appendc(cus, c[i]) != 0) {
135 SET_ERROR(st);
136 return (B_FALSE);
137 }
138 }
139
140 return (B_TRUE);
141 }
142
143 boolean_t
rust_append(rust_state_t * st,const char * s)144 rust_append(rust_state_t *st, const char *s)
145 {
146 custr_t *cus = st->rs_demangled;
147
148 if (HAS_ERROR(st))
149 return (B_FALSE);
150
151 if (st->rs_skip)
152 return (B_TRUE);
153
154 if (custr_append(cus, s) != 0) {
155 SET_ERROR(st);
156 return (B_FALSE);
157 }
158
159 return (B_TRUE);
160 }
161
162 boolean_t
rust_append_sv(rust_state_t * restrict st,uint64_t n,strview_t * restrict sv)163 rust_append_sv(rust_state_t *restrict st, uint64_t n, strview_t *restrict sv)
164 {
165 if (HAS_ERROR(st))
166 return (B_FALSE);
167
168 if (st->rs_skip) {
169 sv_consume_n(sv, (size_t)n);
170 return (B_TRUE);
171 }
172
173 if (n > sv_remaining(sv)) {
174 DEMDEBUG("%s: ERROR amount to append (%" PRIu64 ") > "
175 "remaining bytes (%zu)", __func__, n, sv_remaining(sv));
176 st->rs_error = ERANGE;
177 return (B_FALSE);
178 }
179
180 if (n > INT_MAX) {
181 DEMDEBUG("%s: amount (%" PRIu64 ") > INT_MAX", __func__, n);
182 st->rs_error = ERANGE;
183 return (B_FALSE);
184 }
185
186 if (custr_append_printf(st->rs_demangled, "%.*s",
187 (int)n, sv->sv_first) != 0) {
188 SET_ERROR(st);
189 return (B_FALSE);
190 }
191 sv_consume_n(sv, (size_t)n);
192
193 return (B_TRUE);
194 }
195
196 boolean_t
rust_append_printf(rust_state_t * st,const char * fmt,...)197 rust_append_printf(rust_state_t *st, const char *fmt, ...)
198 {
199 va_list ap;
200 int ret;
201
202 if (HAS_ERROR(st))
203 return (B_FALSE);
204
205 if (st->rs_skip)
206 return (B_TRUE);
207
208 va_start(ap, fmt);
209 ret = custr_append_vprintf(st->rs_demangled, fmt, ap);
210 va_end(ap);
211
212 if (ret == 0)
213 return (B_TRUE);
214 SET_ERROR(st);
215 return (B_FALSE);
216 }
217
218 boolean_t
rust_parse_base10(rust_state_t * restrict st,strview_t * restrict sv,uint64_t * restrict valp)219 rust_parse_base10(rust_state_t *restrict st, strview_t *restrict sv,
220 uint64_t *restrict valp)
221 {
222 uint64_t v = 0;
223 char c;
224
225 if (HAS_ERROR(st) || sv_remaining(sv) == 0)
226 return (B_FALSE);
227
228 c = sv_peek(sv, 0);
229
230 /*
231 * Since the legacy rust encoding states that it follows the
232 * Itanium C++ mangling format, we match the behavior of the
233 * Itanium C++ ABI in disallowing leading 0s in decimal numbers.
234 *
235 * For Rust encoding v0, RFC2603 currently has omitted the
236 * actual definition of <decimal-number>. However examination of
237 * other implementations written in tandem with the mangling
238 * implementation suggest that <decimal-number> can be expressed
239 * by the eregex: 0|[1-9][0-9]* -- that is a '0' is allowed and
240 * terminates the token, while any other leading digit allows
241 * parsing to continue until a non-digit is encountered, the
242 * end of the string is encountered, or overflow is encountered.
243 */
244 if (c == '0') {
245 if (st->rs_encver == RUSTENC_V0) {
246 sv_consume_n(sv, 1);
247 *valp = 0;
248 return (B_TRUE);
249 }
250
251 DEMDEBUG("%s: ERROR number starts with leading 0\n",
252 __func__);
253 st->rs_error = EINVAL;
254 return (B_FALSE);
255 } else if (!ISDIGIT(c)) {
256 return (B_FALSE);
257 }
258
259 while (sv_remaining(sv) > 0) {
260 uint64_t cval;
261
262 c = sv_peek(sv, 0);
263 if (!ISDIGIT(c))
264 break;
265 sv_consume_n(sv, 1);
266
267 cval = c - '0';
268
269 if (mul_overflow(v, 10, &v)) {
270 DEMDEBUG("%s: multiplication overflowed\n", __func__);
271 st->rs_error = EOVERFLOW;
272 return (B_FALSE);
273 }
274
275 if (add_overflow(v, cval, &v)) {
276 DEMDEBUG("%s: addition overflowed\n", __func__);
277 st->rs_error = EOVERFLOW;
278 return (B_FALSE);
279 }
280 }
281
282 *valp = v;
283 return (B_TRUE);
284 }
285
286 static boolean_t
rust_parse_prefix(rust_state_t * restrict st,strview_t * restrict sv)287 rust_parse_prefix(rust_state_t *restrict st, strview_t *restrict sv)
288 {
289 DEMDEBUG("checking prefix in '%.*s'", SV_PRINT(sv));
290
291 if (HAS_ERROR(st))
292 return (B_FALSE);
293
294 if (!sv_consume_if_c(sv, '_'))
295 return (B_FALSE);
296
297 /*
298 * MacOS prepends an additional '_' -- allow that in case
299 * we're given symbols from a MacOS object.
300 */
301 (void) sv_consume_if_c(sv, '_');
302
303 if (sv_consume_if_c(sv, 'Z')) {
304 /*
305 * Legacy names must start with '[_]_Z'
306 */
307 st->rs_encver = RUSTENC_LEGACY;
308 DEMDEBUG("name is encoded using the rust legacy mangling "
309 "scheme");
310 } else if (sv_consume_if_c(sv, 'R')) {
311 uint64_t ver = 0;
312
313 /*
314 * The non-legacy encoding is versioned. After the initial
315 * 'R' is the version. This isn't spelled out clearly in the
316 * RFC, but many numeric values encoded take an approach of
317 * a value of 0 is omitted, and any digits represent the
318 * value - 1. In other words, in this case, no digits means
319 * version 0, '_R0...' would be version 1, 'R1...' would
320 * be version 2, etc. Currently only version 0 is defined,
321 * but we try to provide a (hopefully) useful message
322 * when debugging, even if we can't use the version value
323 * beyond that.
324 */
325 if (rust_parse_base10(st, sv, &ver)) {
326 DEMDEBUG("%s: ERROR: an unsupported encoding version "
327 "(%" PRIu64 ") was encountered", ver + 1);
328 st->rs_error = ENOTSUP;
329 return (B_FALSE);
330 }
331
332 st->rs_encver = RUSTENC_V0;
333 DEMDEBUG("name is encoded using the v0 mangling scheme");
334 } else {
335 DEMDEBUG("did not find a valid rust prefix");
336 return (B_FALSE);
337 }
338
339 sv_init_sv(&st->rs_orig, sv);
340 return (B_TRUE);
341 }
342
343 static void
rust_fini_state(rust_state_t * st)344 rust_fini_state(rust_state_t *st)
345 {
346 custr_free(st->rs_demangled);
347 custr_alloc_fini(&st->rs_cualloc);
348 }
349
350 static boolean_t
rust_init_state(rust_state_t * restrict st,const char * s,sysdem_ops_t * ops)351 rust_init_state(rust_state_t *restrict st, const char *s, sysdem_ops_t *ops)
352 {
353 const char *codeset;
354
355 (void) memset(st, 0, sizeof (*st));
356
357 st->rs_str = s;
358 st->rs_ops = ops;
359
360 st->rs_cualloc.cua_version = CUSTR_VERSION;
361 if (custr_alloc_init(&st->rs_cualloc, &rust_custr_ops) != 0)
362 return (B_FALSE);
363 st->rs_cualloc.cua_arg = st;
364
365 if (custr_xalloc(&st->rs_demangled, &st->rs_cualloc) != 0) {
366 custr_alloc_fini(&st->rs_cualloc);
367 return (B_FALSE);
368 }
369
370 codeset = nl_langinfo(CODESET);
371 if (codeset != NULL && strcmp(codeset, "UTF-8") == 0)
372 st->rs_isutf8 = B_TRUE;
373
374 return (B_TRUE);
375 }
376
377 char *
rust_demangle(const char * s,size_t len,sysdem_ops_t * ops)378 rust_demangle(const char *s, size_t len, sysdem_ops_t *ops)
379 {
380 rust_state_t st;
381 strview_t sv = { 0 };
382 boolean_t success = B_FALSE;
383 int e = 0;
384 char *out = NULL;
385
386 if (!rust_init_state(&st, s, ops))
387 return (NULL);
388
389 sv_init_str(&sv, s, s + len);
390
391 if (!rust_parse_prefix(&st, &sv)) {
392 if (st.rs_error == 0)
393 st.rs_error = EINVAL;
394 goto done;
395 }
396
397 DEMDEBUG("parsed prefix; remaining string='%.*s'", SV_PRINT(&sv));
398
399 switch (st.rs_encver) {
400 case RUSTENC_LEGACY:
401 success = rust_demangle_legacy(&st, &sv);
402 break;
403 case RUSTENC_V0:
404 success = rust_demangle_v0(&st, &sv);
405 break;
406 }
407
408 done:
409 if (success) {
410 out = xstrdup(ops, custr_cstr(st.rs_demangled));
411 if (out == NULL)
412 SET_ERROR(&st);
413 } else {
414 DEMDEBUG("%s: failed, str='%s'", __func__,
415 custr_cstr(st.rs_demangled));
416
417 st.rs_error = EINVAL;
418 }
419
420 e = st.rs_error;
421 rust_fini_state(&st);
422 if (e > 0)
423 errno = e;
424
425 return (out);
426 }
427