xref: /illumos-gate/usr/src/lib/libdemangle/common/rust.c (revision 1cd083931cfd3fb8617c1178f62bce417cfa6af2)
1  /*
2   * This file and its contents are supplied under the terms of the
3   * Common Development and Distribution License ("CDDL"), version 1.0.
4   * You may only use this file in accordance with the terms of version
5   * 1.0 of the CDDL.
6   *
7   * A full copy of the text of the CDDL should have accompanied this
8   * source.  A copy of the CDDL is also available via the Internet at
9   * http://www.illumos.org/license/CDDL.
10   */
11  
12  /*
13   * Copyright 2021 Jason King
14   * Copyright 2019 Joyent, Inc.
15   */
16  
17  #include <errno.h>
18  #include <langinfo.h>
19  #include <libcustr.h>
20  #include <limits.h>
21  #include <stdarg.h>
22  #include <string.h>
23  
24  #include "demangle_int.h"
25  #include "rust.h"
26  
27  static void *
rust_cualloc(custr_alloc_t * cua,size_t len)28  rust_cualloc(custr_alloc_t *cua, size_t len)
29  {
30  	rust_state_t *st = cua->cua_arg;
31  	return (zalloc(st->rs_ops, len));
32  }
33  
34  static void
rust_cufree(custr_alloc_t * cua,void * p,size_t len)35  rust_cufree(custr_alloc_t *cua, void *p, size_t len)
36  {
37  	rust_state_t *st = cua->cua_arg;
38  	xfree(st->rs_ops, p, len);
39  }
40  
41  static const custr_alloc_ops_t rust_custr_ops = {
42  	.custr_ao_alloc = rust_cualloc,
43  	.custr_ao_free = rust_cufree
44  };
45  
46  boolean_t
rust_appendc(rust_state_t * st,char c)47  rust_appendc(rust_state_t *st, char c)
48  {
49  	custr_t *cus = st->rs_demangled;
50  
51  	if (HAS_ERROR(st))
52  		return (B_FALSE);
53  
54  	if (st->rs_skip)
55  		return (B_TRUE);
56  
57  	switch (c) {
58  	case '\a':
59  		return (rust_append(st, "\\a"));
60  	case '\b':
61  		return (rust_append(st, "\\b"));
62  	case '\f':
63  		return (rust_append(st, "\\f"));
64  	case '\n':
65  		return (rust_append(st, "\\n"));
66  	case '\r':
67  		return (rust_append(st, "\\r"));
68  	case '\t':
69  		return (rust_append(st, "\\t"));
70  	case '\v':
71  		return (rust_append(st, "\\v"));
72  	case '\\':
73  		return (rust_append(st, "\\\\"));
74  	}
75  
76  	if (c < ' ')
77  		return (rust_append_printf(st, "\\x%02" PRIx8, (uint8_t)c));
78  
79  	if (custr_appendc(cus, c) != 0) {
80  		SET_ERROR(st);
81  		return (B_FALSE);
82  	}
83  
84  	return (B_TRUE);
85  }
86  
87  /*
88   * Append a UTF-8 code point. If we're not in a UTF-8 locale, this gets
89   * appended as '\u<hex codepoint>' otherwise the character itself is
90   * added.
91   */
92  boolean_t
rust_append_utf8_c(rust_state_t * st,uint32_t val)93  rust_append_utf8_c(rust_state_t *st, uint32_t val)
94  {
95  	custr_t *cus = st->rs_demangled;
96  	uint_t n = 0;
97  	uint8_t c[4] = { 0 };
98  
99  	if (HAS_ERROR(st))
100  		return (B_FALSE);
101  
102  	if (!st->rs_isutf8) {
103  		if (val < 0x80)
104  			return (rust_appendc(st, (char)val));
105  		if (val < 0x10000)
106  			return (rust_append_printf(st, "\\u%04" PRIx32, val));
107  		return (rust_append_printf(st, "\\U%08" PRIx32, val));
108  	}
109  
110  	if (val < 0x80) {
111  		return (rust_appendc(st, (char)val));
112  	} else if (val < 0x800) {
113  		c[0] = 0xc0 | ((val >> 6) & 0x1f);
114  		c[1] = 0x80 | (val & 0x3f);
115  		n = 2;
116  	} else if (val < 0x10000) {
117  		c[0] = 0xe0 | ((val >> 12) & 0x0f);
118  		c[1] = 0x80 | ((val >> 6) & 0x3f);
119  		c[2] = 0x80 | (val & 0x3f);
120  		n = 3;
121  	} else if (val < 0x110000) {
122  		c[0] = 0xf0 | ((val >> 18) & 0x7);
123  		c[1] = 0x80 | ((val >> 12) & 0x3f);
124  		c[2] = 0x80 | ((val >> 6) & 0x3f);
125  		c[3] = 0x80 | (val & 0x3f);
126  		n = 4;
127  	} else {
128  		DEMDEBUG("%s: invalid unicode character \\u%" PRIx32, __func__,
129  		    val);
130  		return (B_FALSE);
131  	}
132  
133  	for (uint_t i = 0; i < n; i++) {
134  		if (custr_appendc(cus, c[i]) != 0) {
135  			SET_ERROR(st);
136  			return (B_FALSE);
137  		}
138  	}
139  
140  	return (B_TRUE);
141  }
142  
143  boolean_t
rust_append(rust_state_t * st,const char * s)144  rust_append(rust_state_t *st, const char *s)
145  {
146  	custr_t *cus = st->rs_demangled;
147  
148  	if (HAS_ERROR(st))
149  		return (B_FALSE);
150  
151  	if (st->rs_skip)
152  		return (B_TRUE);
153  
154  	if (custr_append(cus, s) != 0) {
155  		SET_ERROR(st);
156  		return (B_FALSE);
157  	}
158  
159  	return (B_TRUE);
160  }
161  
162  boolean_t
rust_append_sv(rust_state_t * restrict st,uint64_t n,strview_t * restrict sv)163  rust_append_sv(rust_state_t *restrict st, uint64_t n, strview_t *restrict sv)
164  {
165  	if (HAS_ERROR(st))
166  		return (B_FALSE);
167  
168  	if (st->rs_skip) {
169  		sv_consume_n(sv, (size_t)n);
170  		return (B_TRUE);
171  	}
172  
173  	if (n > sv_remaining(sv)) {
174  		DEMDEBUG("%s: ERROR amount to append (%" PRIu64 ") > "
175  		    "remaining bytes (%zu)", __func__, n, sv_remaining(sv));
176  		st->rs_error = ERANGE;
177  		return (B_FALSE);
178  	}
179  
180  	if (n > INT_MAX) {
181  		DEMDEBUG("%s: amount (%" PRIu64 ") > INT_MAX", __func__, n);
182  		st->rs_error = ERANGE;
183  		return (B_FALSE);
184  	}
185  
186  	if (custr_append_printf(st->rs_demangled, "%.*s",
187  	    (int)n, sv->sv_first) != 0) {
188  		SET_ERROR(st);
189  		return (B_FALSE);
190  	}
191  	sv_consume_n(sv, (size_t)n);
192  
193  	return (B_TRUE);
194  }
195  
196  boolean_t
rust_append_printf(rust_state_t * st,const char * fmt,...)197  rust_append_printf(rust_state_t *st, const char *fmt, ...)
198  {
199  	va_list ap;
200  	int ret;
201  
202  	if (HAS_ERROR(st))
203  		return (B_FALSE);
204  
205  	if (st->rs_skip)
206  		return (B_TRUE);
207  
208  	va_start(ap, fmt);
209  	ret = custr_append_vprintf(st->rs_demangled, fmt, ap);
210  	va_end(ap);
211  
212  	if (ret == 0)
213  		return (B_TRUE);
214  	SET_ERROR(st);
215  	return (B_FALSE);
216  }
217  
218  boolean_t
rust_parse_base10(rust_state_t * restrict st,strview_t * restrict sv,uint64_t * restrict valp)219  rust_parse_base10(rust_state_t *restrict st, strview_t *restrict sv,
220      uint64_t *restrict valp)
221  {
222  	uint64_t v = 0;
223  	char c;
224  
225  	if (HAS_ERROR(st) || sv_remaining(sv) == 0)
226  		return (B_FALSE);
227  
228  	c = sv_peek(sv, 0);
229  
230  	/*
231  	 * Since the legacy rust encoding states that it follows the
232  	 * Itanium C++ mangling format, we match the behavior of the
233  	 * Itanium C++ ABI in disallowing leading 0s in decimal numbers.
234  	 *
235  	 * For Rust encoding v0, RFC2603 currently has omitted the
236  	 * actual definition of <decimal-number>. However examination of
237  	 * other implementations written in tandem with the mangling
238  	 * implementation suggest that <decimal-number> can be expressed
239  	 * by the eregex: 0|[1-9][0-9]* -- that is a '0' is allowed and
240  	 * terminates the token, while any other leading digit allows
241  	 * parsing to continue until a non-digit is encountered, the
242  	 * end of the string is encountered, or overflow is encountered.
243  	 */
244  	if (c == '0') {
245  		if (st->rs_encver == RUSTENC_V0) {
246  			sv_consume_n(sv, 1);
247  			*valp = 0;
248  			return (B_TRUE);
249  		}
250  
251  		DEMDEBUG("%s: ERROR number starts with leading 0\n",
252  		    __func__);
253  		st->rs_error = EINVAL;
254  		return (B_FALSE);
255  	} else if (!ISDIGIT(c)) {
256  		return (B_FALSE);
257  	}
258  
259  	while (sv_remaining(sv) > 0) {
260  		uint64_t cval;
261  
262  		c = sv_peek(sv, 0);
263  		if (!ISDIGIT(c))
264  			break;
265  		sv_consume_n(sv, 1);
266  
267  		cval = c - '0';
268  
269  		if (mul_overflow(v, 10, &v)) {
270  			DEMDEBUG("%s: multiplication overflowed\n", __func__);
271  			st->rs_error = EOVERFLOW;
272  			return (B_FALSE);
273  		}
274  
275  		if (add_overflow(v, cval, &v)) {
276  			DEMDEBUG("%s: addition overflowed\n", __func__);
277  			st->rs_error = EOVERFLOW;
278  			return (B_FALSE);
279  		}
280  	}
281  
282  	*valp = v;
283  	return (B_TRUE);
284  }
285  
286  static boolean_t
rust_parse_prefix(rust_state_t * restrict st,strview_t * restrict sv)287  rust_parse_prefix(rust_state_t *restrict st, strview_t *restrict sv)
288  {
289  	DEMDEBUG("checking prefix in '%.*s'", SV_PRINT(sv));
290  
291  	if (HAS_ERROR(st))
292  		return (B_FALSE);
293  
294  	if (!sv_consume_if_c(sv, '_'))
295  		return (B_FALSE);
296  
297  	/*
298  	 * MacOS prepends an additional '_' -- allow that in case
299  	 * we're given symbols from a MacOS object.
300  	 */
301  	(void) sv_consume_if_c(sv, '_');
302  
303  	if (sv_consume_if_c(sv, 'Z')) {
304  		/*
305  		 * Legacy names must start with '[_]_Z'
306  		 */
307  		st->rs_encver = RUSTENC_LEGACY;
308  		DEMDEBUG("name is encoded using the rust legacy mangling "
309  		    "scheme");
310  	} else if (sv_consume_if_c(sv, 'R')) {
311  		uint64_t ver = 0;
312  
313  		/*
314  		 * The non-legacy encoding is versioned. After the initial
315  		 * 'R' is the version. This isn't spelled out clearly in the
316  		 * RFC, but many numeric values encoded take an approach of
317  		 * a value of 0 is omitted, and any digits represent the
318  		 * value - 1. In other words, in this case, no digits means
319  		 * version 0, '_R0...' would be version 1, 'R1...' would
320  		 * be version 2, etc. Currently only version 0 is defined,
321  		 * but we try to provide a (hopefully) useful message
322  		 * when debugging, even if we can't use the version value
323  		 * beyond that.
324  		 */
325  		if (rust_parse_base10(st, sv, &ver)) {
326  			DEMDEBUG("%s: ERROR: an unsupported encoding version "
327  			    "(%" PRIu64 ") was encountered", ver + 1);
328  			st->rs_error = ENOTSUP;
329  			return (B_FALSE);
330  		}
331  
332  		st->rs_encver = RUSTENC_V0;
333  		DEMDEBUG("name is encoded using the v0 mangling scheme");
334  	} else {
335  		DEMDEBUG("did not find a valid rust prefix");
336  		return (B_FALSE);
337  	}
338  
339  	sv_init_sv(&st->rs_orig, sv);
340  	return (B_TRUE);
341  }
342  
343  static void
rust_fini_state(rust_state_t * st)344  rust_fini_state(rust_state_t *st)
345  {
346  	custr_free(st->rs_demangled);
347  	custr_alloc_fini(&st->rs_cualloc);
348  }
349  
350  static boolean_t
rust_init_state(rust_state_t * restrict st,const char * s,sysdem_ops_t * ops)351  rust_init_state(rust_state_t *restrict st, const char *s, sysdem_ops_t *ops)
352  {
353  	const char *codeset;
354  
355  	(void) memset(st, 0, sizeof (*st));
356  
357  	st->rs_str = s;
358  	st->rs_ops = ops;
359  
360  	st->rs_cualloc.cua_version = CUSTR_VERSION;
361  	if (custr_alloc_init(&st->rs_cualloc, &rust_custr_ops) != 0)
362  		return (B_FALSE);
363  	st->rs_cualloc.cua_arg = st;
364  
365  	if (custr_xalloc(&st->rs_demangled, &st->rs_cualloc) != 0) {
366  		custr_alloc_fini(&st->rs_cualloc);
367  		return (B_FALSE);
368  	}
369  
370  	codeset = nl_langinfo(CODESET);
371  	if (codeset != NULL && strcmp(codeset, "UTF-8") == 0)
372  		st->rs_isutf8 = B_TRUE;
373  
374  	return (B_TRUE);
375  }
376  
377  char *
rust_demangle(const char * s,size_t len,sysdem_ops_t * ops)378  rust_demangle(const char *s, size_t len, sysdem_ops_t *ops)
379  {
380  	rust_state_t st;
381  	strview_t sv = { 0 };
382  	boolean_t success = B_FALSE;
383  	int e = 0;
384  	char *out = NULL;
385  
386  	if (!rust_init_state(&st, s, ops))
387  		return (NULL);
388  
389  	sv_init_str(&sv, s, s + len);
390  
391  	if (!rust_parse_prefix(&st, &sv)) {
392  		if (st.rs_error == 0)
393  			st.rs_error = EINVAL;
394  		goto done;
395  	}
396  
397  	DEMDEBUG("parsed prefix; remaining string='%.*s'", SV_PRINT(&sv));
398  
399  	switch (st.rs_encver) {
400  	case RUSTENC_LEGACY:
401  		success = rust_demangle_legacy(&st, &sv);
402  		break;
403  	case RUSTENC_V0:
404  		success = rust_demangle_v0(&st, &sv);
405  		break;
406  	}
407  
408  done:
409  	if (success) {
410  		out = xstrdup(ops, custr_cstr(st.rs_demangled));
411  		if (out == NULL)
412  			SET_ERROR(&st);
413  	} else {
414  		DEMDEBUG("%s: failed, str='%s'", __func__,
415  		    custr_cstr(st.rs_demangled));
416  
417  		st.rs_error = EINVAL;
418  	}
419  
420  	e = st.rs_error;
421  	rust_fini_state(&st);
422  	if (e > 0)
423  		errno = e;
424  
425  	return (out);
426  }
427