xref: /illumos-gate/usr/src/lib/libdemangle/common/rust.c (revision fec047081731fd77caf46ec0471c501b2cb33894)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2021 Jason King
14  * Copyright 2019 Joyent, Inc.
15  */
16 
17 #include <errno.h>
18 #include <langinfo.h>
19 #include <libcustr.h>
20 #include <limits.h>
21 #include <stdarg.h>
22 #include <string.h>
23 
24 #include "demangle_int.h"
25 #include "rust.h"
26 
27 static void *
28 rust_cualloc(custr_alloc_t *cua, size_t len)
29 {
30 	rust_state_t *st = cua->cua_arg;
31 	return (zalloc(st->rs_ops, len));
32 }
33 
34 static void
35 rust_cufree(custr_alloc_t *cua, void *p, size_t len)
36 {
37 	rust_state_t *st = cua->cua_arg;
38 	xfree(st->rs_ops, p, len);
39 }
40 
41 static const custr_alloc_ops_t rust_custr_ops = {
42 	.custr_ao_alloc = rust_cualloc,
43 	.custr_ao_free = rust_cufree
44 };
45 
46 boolean_t
47 rust_appendc(rust_state_t *st, char c)
48 {
49 	custr_t *cus = st->rs_demangled;
50 
51 	if (HAS_ERROR(st))
52 		return (B_FALSE);
53 
54 	if (st->rs_skip)
55 		return (B_TRUE);
56 
57 	switch (c) {
58 	case '\a':
59 		return (rust_append(st, "\\a"));
60 	case '\b':
61 		return (rust_append(st, "\\b"));
62 	case '\f':
63 		return (rust_append(st, "\\f"));
64 	case '\n':
65 		return (rust_append(st, "\\n"));
66 	case '\r':
67 		return (rust_append(st, "\\r"));
68 	case '\t':
69 		return (rust_append(st, "\\t"));
70 	case '\v':
71 		return (rust_append(st, "\\v"));
72 	case '\\':
73 		return (rust_append(st, "\\\\"));
74 	}
75 
76 	if (c < ' ')
77 		return (rust_append_printf(st, "\\x%02" PRIx8, (uint8_t)c));
78 
79 	if (custr_appendc(cus, c) != 0) {
80 		SET_ERROR(st);
81 		return (B_FALSE);
82 	}
83 
84 	return (B_TRUE);
85 }
86 
87 /*
88  * Append a UTF-8 code point. If we're not in a UTF-8 locale, this gets
89  * appended as '\u<hex codepoint>' otherwise the character itself is
90  * added.
91  */
92 boolean_t
93 rust_append_utf8_c(rust_state_t *st, uint32_t val)
94 {
95 	custr_t *cus = st->rs_demangled;
96 	uint_t n = 0;
97 	uint8_t c[4] = { 0 };
98 
99 	if (HAS_ERROR(st))
100 		return (B_FALSE);
101 
102 	if (!st->rs_isutf8) {
103 		if (val < 0x80)
104 			return (rust_appendc(st, (char)val));
105 		if (val < 0x10000)
106 			return (rust_append_printf(st, "\\u%04" PRIx32, val));
107 		return (rust_append_printf(st, "\\U%08" PRIx32, val));
108 	}
109 
110 	if (val < 0x80) {
111 		return (rust_appendc(st, (char)val));
112 	} else if (val < 0x800) {
113 		c[0] = 0xc0 | ((val >> 6) & 0x1f);
114 		c[1] = 0x80 | (val & 0x3f);
115 		n = 2;
116 	} else if (val < 0x10000) {
117 		c[0] = 0xe0 | ((val >> 12) & 0x0f);
118 		c[1] = 0x80 | ((val >> 6) & 0x3f);
119 		c[2] = 0x80 | (val & 0x3f);
120 		n = 3;
121 	} else if (val < 0x110000) {
122 		c[0] = 0xf0 | ((val >> 18) & 0x7);
123 		c[1] = 0x80 | ((val >> 12) & 0x3f);
124 		c[2] = 0x80 | ((val >> 6) & 0x3f);
125 		c[3] = 0x80 | (val & 0x3f);
126 		n = 4;
127 	} else {
128 		DEMDEBUG("%s: invalid unicode character \\u%" PRIx32, __func__,
129 		    val);
130 		return (B_FALSE);
131 	}
132 
133 	for (uint_t i = 0; i < n; i++) {
134 		if (custr_appendc(cus, c[i]) != 0) {
135 			SET_ERROR(st);
136 			return (B_FALSE);
137 		}
138 	}
139 
140 	return (B_TRUE);
141 }
142 
143 boolean_t
144 rust_append(rust_state_t *st, const char *s)
145 {
146 	custr_t *cus = st->rs_demangled;
147 
148 	if (HAS_ERROR(st))
149 		return (B_FALSE);
150 
151 	if (st->rs_skip)
152 		return (B_TRUE);
153 
154 	if (custr_append(cus, s) != 0) {
155 		SET_ERROR(st);
156 		return (B_FALSE);
157 	}
158 
159 	return (B_TRUE);
160 }
161 
162 boolean_t
163 rust_append_sv(rust_state_t *restrict st, uint64_t n, strview_t *restrict sv)
164 {
165 	if (HAS_ERROR(st))
166 		return (B_FALSE);
167 
168 	if (st->rs_skip) {
169 		sv_consume_n(sv, (size_t)n);
170 		return (B_TRUE);
171 	}
172 
173 	if (n > sv_remaining(sv)) {
174 		DEMDEBUG("%s: ERROR amount to append (%" PRIu64 ") > "
175 		    "remaining bytes (%zu)", __func__, n, sv_remaining(sv));
176 		st->rs_error = ERANGE;
177 		return (B_FALSE);
178 	}
179 
180 	if (n > INT_MAX) {
181 		DEMDEBUG("%s: amount (%" PRIu64 ") > INT_MAX", __func__, n);
182 		st->rs_error = ERANGE;
183 		return (B_FALSE);
184 	}
185 
186 	if (custr_append_printf(st->rs_demangled, "%.*s",
187 	    (int)n, sv->sv_first) != 0) {
188 		SET_ERROR(st);
189 		return (B_FALSE);
190 	}
191 	sv_consume_n(sv, (size_t)n);
192 
193 	return (B_TRUE);
194 }
195 
196 boolean_t
197 rust_append_printf(rust_state_t *st, const char *fmt, ...)
198 {
199 	va_list ap;
200 	int ret;
201 
202 	if (HAS_ERROR(st))
203 		return (B_FALSE);
204 
205 	if (st->rs_skip)
206 		return (B_TRUE);
207 
208 	va_start(ap, fmt);
209 	ret = custr_append_vprintf(st->rs_demangled, fmt, ap);
210 	va_end(ap);
211 
212 	if (ret == 0)
213 		return (B_TRUE);
214 	SET_ERROR(st);
215 	return (B_FALSE);
216 }
217 
218 boolean_t
219 rust_parse_base10(rust_state_t *restrict st, strview_t *restrict sv,
220     uint64_t *restrict valp)
221 {
222 	uint64_t v = 0;
223 	char c;
224 
225 	if (HAS_ERROR(st) || sv_remaining(sv) == 0)
226 		return (B_FALSE);
227 
228 	c = sv_peek(sv, 0);
229 
230 	/*
231 	 * Since the legacy rust encoding states that it follows the
232 	 * Itanium C++ mangling format, we match the behavior of the
233 	 * Itanium C++ ABI in disallowing leading 0s in decimal numbers.
234 	 *
235 	 * For Rust encoding v0, RFC2603 currently has omitted the
236 	 * actual definition of <decimal-number>. However examination of
237 	 * other implementations written in tandem with the mangling
238 	 * implementation suggest that <decimal-number> can be expressed
239 	 * by the eregex: 0|[1-9][0-9]* -- that is a '0' is allowed and
240 	 * terminates the token, while any other leading digit allows
241 	 * parsing to continue until a non-digit is encountered, the
242 	 * end of the string is encountered, or overflow is encountered.
243 	 */
244 	if (c == '0') {
245 		if (st->rs_encver == RUSTENC_V0) {
246 			sv_consume_n(sv, 1);
247 			*valp = 0;
248 			return (B_TRUE);
249 		}
250 
251 		DEMDEBUG("%s: ERROR number starts with leading 0\n",
252 		    __func__);
253 		st->rs_error = EINVAL;
254 		return (B_FALSE);
255 	} else if (!ISDIGIT(c)) {
256 		return (B_FALSE);
257 	}
258 
259 	while (sv_remaining(sv) > 0) {
260 		uint64_t cval;
261 
262 		c = sv_peek(sv, 0);
263 		if (!ISDIGIT(c))
264 			break;
265 		sv_consume_n(sv, 1);
266 
267 		cval = c - '0';
268 
269 		if (mul_overflow(v, 10, &v)) {
270 			DEMDEBUG("%s: multiplication overflowed\n", __func__);
271 			st->rs_error = EOVERFLOW;
272 			return (B_FALSE);
273 		}
274 
275 		if (add_overflow(v, cval, &v)) {
276 			DEMDEBUG("%s: addition overflowed\n", __func__);
277 			st->rs_error = EOVERFLOW;
278 			return (B_FALSE);
279 		}
280 	}
281 
282 	*valp = v;
283 	return (B_TRUE);
284 }
285 
286 static boolean_t
287 rust_parse_prefix(rust_state_t *restrict st, strview_t *restrict sv)
288 {
289 	DEMDEBUG("checking prefix in '%.*s'", SV_PRINT(sv));
290 
291 	if (HAS_ERROR(st))
292 		return (B_FALSE);
293 
294 	if (!sv_consume_if_c(sv, '_'))
295 		return (B_FALSE);
296 
297 	/*
298 	 * MacOS prepends an additional '_' -- allow that in case
299 	 * we're given symbols from a MacOS object.
300 	 */
301 	(void) sv_consume_if_c(sv, '_');
302 
303 	if (sv_consume_if_c(sv, 'Z')) {
304 		/*
305 		 * Legacy names must start with '[_]_Z'
306 		 */
307 		st->rs_encver = RUSTENC_LEGACY;
308 		DEMDEBUG("name is encoded using the rust legacy mangling "
309 		    "scheme");
310 	} else if (sv_consume_if_c(sv, 'R')) {
311 		uint64_t ver = 0;
312 
313 		/*
314 		 * The non-legacy encoding is versioned. After the initial
315 		 * 'R' is the version. This isn't spelled out clearly in the
316 		 * RFC, but many numeric values encoded take an approach of
317 		 * a value of 0 is omitted, and any digits represent the
318 		 * value - 1. In other words, in this case, no digits means
319 		 * version 0, '_R0...' would be version 1, 'R1...' would
320 		 * be version 2, etc. Currently only version 0 is defined,
321 		 * but we try to provide a (hopefully) useful message
322 		 * when debugging, even if we can't use the version value
323 		 * beyond that.
324 		 */
325 		if (rust_parse_base10(st, sv, &ver)) {
326 			DEMDEBUG("%s: ERROR: an unsupported encoding version "
327 			    "(%" PRIu64 ") was encountered", ver + 1);
328 			st->rs_error = ENOTSUP;
329 			return (B_FALSE);
330 		}
331 
332 		st->rs_encver = RUSTENC_V0;
333 		DEMDEBUG("name is encoded using the v0 mangling scheme");
334 	} else {
335 		DEMDEBUG("did not find a valid rust prefix");
336 		return (B_FALSE);
337 	}
338 
339 	sv_init_sv(&st->rs_orig, sv);
340 	return (B_TRUE);
341 }
342 
343 static void
344 rust_fini_state(rust_state_t *st)
345 {
346 	custr_free(st->rs_demangled);
347 	custr_alloc_fini(&st->rs_cualloc);
348 }
349 
350 static boolean_t
351 rust_init_state(rust_state_t *restrict st, const char *s, sysdem_ops_t *ops)
352 {
353 	const char *codeset;
354 
355 	(void) memset(st, 0, sizeof (*st));
356 
357 	st->rs_str = s;
358 	st->rs_ops = ops;
359 
360 	st->rs_cualloc.cua_version = CUSTR_VERSION;
361 	if (custr_alloc_init(&st->rs_cualloc, &rust_custr_ops) != 0)
362 		return (B_FALSE);
363 	st->rs_cualloc.cua_arg = st;
364 
365 	if (custr_xalloc(&st->rs_demangled, &st->rs_cualloc) != 0) {
366 		custr_alloc_fini(&st->rs_cualloc);
367 		return (B_FALSE);
368 	}
369 
370 	codeset = nl_langinfo(CODESET);
371 	if (codeset != NULL && strcmp(codeset, "UTF-8") == 0)
372 		st->rs_isutf8 = B_TRUE;
373 
374 	return (B_TRUE);
375 }
376 
377 char *
378 rust_demangle(const char *s, size_t len, sysdem_ops_t *ops)
379 {
380 	rust_state_t st;
381 	strview_t sv = { 0 };
382 	boolean_t success = B_FALSE;
383 	int e = 0;
384 	char *out = NULL;
385 
386 	if (!rust_init_state(&st, s, ops))
387 		return (NULL);
388 
389 	sv_init_str(&sv, s, s + len);
390 
391 	if (!rust_parse_prefix(&st, &sv)) {
392 		if (st.rs_error == 0)
393 			st.rs_error = EINVAL;
394 		goto done;
395 	}
396 
397 	DEMDEBUG("parsed prefix; remaining string='%.*s'", SV_PRINT(&sv));
398 
399 	switch (st.rs_encver) {
400 	case RUSTENC_LEGACY:
401 		success = rust_demangle_legacy(&st, &sv);
402 		break;
403 	case RUSTENC_V0:
404 		success = rust_demangle_v0(&st, &sv);
405 		break;
406 	}
407 
408 done:
409 	if (success) {
410 		out = xstrdup(ops, custr_cstr(st.rs_demangled));
411 		if (out == NULL)
412 			SET_ERROR(&st);
413 	} else {
414 		DEMDEBUG("%s: failed, str='%s'", __func__,
415 		    custr_cstr(st.rs_demangled));
416 
417 		st.rs_error = EINVAL;
418 	}
419 
420 	e = st.rs_error;
421 	rust_fini_state(&st);
422 	if (e > 0)
423 		errno = e;
424 
425 	return (out);
426 }
427