1 // SPDX-License-Identifier: Apache-2.0 OR MIT
2
3 // The contents of this file come from the Rust rustc-demangle library, hosted
4 // in the <https://github.com/rust-lang/rustc-demangle> repository, licensed
5 // under "Apache-2.0 OR MIT". For copyright details, see
6 // <https://github.com/rust-lang/rustc-demangle/blob/main/README.md>.
7 // Please note that the file should be kept as close as possible to upstream.
8
9 // Code for demangling Rust symbols. This code is mostly
10 // a line-by-line translation of the Rust code in `rustc-demangle`.
11
12 // you can find the latest version of this code in https://github.com/rust-lang/rustc-demangle
13
14 #include <stdint.h>
15 #include <stddef.h>
16 #include <string.h>
17 #include <stdbool.h>
18 #include <sys/param.h>
19 #include <stdio.h>
20
21 #include "demangle-rust-v0.h"
22
23 #if defined(__GNUC__) || defined(__clang__)
24 #define NODISCARD __attribute__((warn_unused_result))
25 #else
26 #define NODISCARD
27 #endif
28
29 #define MAX_DEPTH 500
30
31 typedef enum {
32 DemangleOk,
33 DemangleInvalid,
34 DemangleRecursed,
35 DemangleBug,
36 } demangle_status;
37
38 struct demangle_v0 {
39 const char *mangled;
40 size_t mangled_len;
41 };
42
43 struct demangle_legacy {
44 const char *mangled;
45 size_t mangled_len;
46 size_t elements;
47 };
48
49 // private version of memrchr to avoid _GNU_SOURCE
demangle_memrchr(const void * s,int c,size_t n)50 static void *demangle_memrchr(const void *s, int c, size_t n) {
51 const uint8_t *s_ = s;
52 for (; n != 0; n--) {
53 if (s_[n-1] == c) {
54 return (void*)&s_[n-1];
55 }
56 }
57 return NULL;
58 }
59
60
unicode_iscontrol(uint32_t ch)61 static bool unicode_iscontrol(uint32_t ch) {
62 // this is *technically* a unicode table, but
63 // some unicode properties are simpler than you might think
64 return ch < 0x20 || (ch >= 0x7f && ch < 0xa0);
65 }
66
67 // "good enough" tables, the only consequence is that when printing
68 // *constant strings*, some characters are printed as `\u{abcd}` rather than themselves.
69 //
70 // I'm leaving these here to allow easily replacing them with actual
71 // tables if desired.
unicode_isprint(uint32_t ch)72 static bool unicode_isprint(uint32_t ch) {
73 if (ch < 0x20) {
74 return false;
75 }
76 if (ch < 0x7f) {
77 return true;
78 }
79 return false;
80 }
81
unicode_isgraphemextend(uint32_t ch)82 static bool unicode_isgraphemextend(uint32_t ch) {
83 (void)ch;
84 return false;
85 }
86
str_isascii(const char * s,size_t s_len)87 static bool str_isascii(const char *s, size_t s_len) {
88 for (size_t i = 0; i < s_len; i++) {
89 if (s[i] & 0x80) {
90 return false;
91 }
92 }
93
94 return true;
95 }
96
97 typedef enum {
98 PunycodeOk,
99 PunycodeError
100 } punycode_status;
101
102 struct parser {
103 // the parser assumes that `sym` has a safe "terminating byte". It might be NUL,
104 // but it might also be something else if a symbol is "truncated".
105 const char *sym;
106 size_t sym_len;
107 size_t next;
108 uint32_t depth;
109 };
110
111 struct printer {
112 demangle_status status; // if status == 0 parser is valid
113 struct parser parser;
114 char *out; // NULL for no output [in which case out_len is not decremented]
115 size_t out_len;
116 uint32_t bound_lifetime_depth;
117 bool alternate;
118 };
119
120 static NODISCARD overflow_status printer_print_path(struct printer *printer, bool in_value);
121 static NODISCARD overflow_status printer_print_type(struct printer *printer);
122 static NODISCARD overflow_status printer_print_const(struct printer *printer, bool in_value);
123
try_parse_path(struct parser * parser)124 static NODISCARD demangle_status try_parse_path(struct parser *parser) {
125 struct printer printer = {
126 DemangleOk,
127 *parser,
128 NULL,
129 SIZE_MAX,
130 0,
131 false
132 };
133 overflow_status ignore = printer_print_path(&printer, false); // can't fail since no output
134 (void)ignore;
135 *parser = printer.parser;
136 return printer.status;
137 }
138
rust_demangle_v0_demangle(const char * s,size_t s_len,struct demangle_v0 * res,const char ** rest)139 NODISCARD static demangle_status rust_demangle_v0_demangle(const char *s, size_t s_len, struct demangle_v0 *res, const char **rest) {
140 if (s_len > strlen(s)) {
141 // s_len only exists to shorten the string, this is not a buffer API
142 return DemangleInvalid;
143 }
144
145 const char *inner;
146 size_t inner_len;
147 if (s_len >= 2 && !strncmp(s, "_R", strlen("_R"))) {
148 inner = s+2;
149 inner_len = s_len - 2;
150 } else if (s_len >= 1 && !strncmp(s, "R", strlen("R"))) {
151 // On Windows, dbghelp strips leading underscores, so we accept "R..."
152 // form too.
153 inner = s+1;
154 inner_len = s_len - 1;
155 } else if (s_len >= 3 && !strncmp(s, "__R", strlen("__R"))) {
156 // On OSX, symbols are prefixed with an extra _
157 inner = s+3;
158 inner_len = s_len - 3;
159 } else {
160 return DemangleInvalid;
161 }
162
163 // Paths always start with uppercase characters.
164 if (*inner < 'A' || *inner > 'Z') {
165 return DemangleInvalid;
166 }
167
168 if (!str_isascii(inner, inner_len)) {
169 return DemangleInvalid;
170 }
171
172 struct parser parser = { inner, inner_len, 0, 0 };
173
174 demangle_status status = try_parse_path(&parser);
175 if (status != DemangleOk) return status;
176 char next = parser.sym[parser.next];
177
178 // Instantiating crate (paths always start with uppercase characters).
179 if (parser.next < parser.sym_len && next >= 'A' && next <= 'Z') {
180 status = try_parse_path(&parser);
181 if (status != DemangleOk) return status;
182 }
183
184 res->mangled = inner;
185 res->mangled_len = inner_len;
186 if (rest) {
187 *rest = parser.sym + parser.next;
188 }
189
190 return DemangleOk;
191 }
192
193 // This might require `len` to be up to 3 characters bigger than the real output len in case of utf-8
rust_demangle_v0_display_demangle(struct demangle_v0 res,char * out,size_t len,bool alternate)194 NODISCARD static overflow_status rust_demangle_v0_display_demangle(struct demangle_v0 res, char *out, size_t len, bool alternate) {
195 struct printer printer = {
196 DemangleOk,
197 {
198 res.mangled,
199 res.mangled_len,
200 0,
201 0
202 },
203 out,
204 len,
205 0,
206 alternate
207 };
208 if (printer_print_path(&printer, true) == OverflowOverflow) {
209 return OverflowOverflow;
210 }
211 if (printer.out_len < OVERFLOW_MARGIN) {
212 return OverflowOverflow;
213 }
214 *printer.out = '\0';
215 return OverflowOk;
216 }
217
code_to_utf8(unsigned char * buffer,uint32_t code)218 static size_t code_to_utf8(unsigned char *buffer, uint32_t code)
219 {
220 if (code <= 0x7F) {
221 buffer[0] = code;
222 return 1;
223 }
224 if (code <= 0x7FF) {
225 buffer[0] = 0xC0 | (code >> 6); /* 110xxxxx */
226 buffer[1] = 0x80 | (code & 0x3F); /* 10xxxxxx */
227 return 2;
228 }
229 if (code <= 0xFFFF) {
230 buffer[0] = 0xE0 | (code >> 12); /* 1110xxxx */
231 buffer[1] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */
232 buffer[2] = 0x80 | (code & 0x3F); /* 10xxxxxx */
233 return 3;
234 }
235 if (code <= 0x10FFFF) {
236 buffer[0] = 0xF0 | (code >> 18); /* 11110xxx */
237 buffer[1] = 0x80 | ((code >> 12) & 0x3F); /* 10xxxxxx */
238 buffer[2] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */
239 buffer[3] = 0x80 | (code & 0x3F); /* 10xxxxxx */
240 return 4;
241 }
242 return 0;
243 }
244
245
246 // return length of char at byte, or SIZE_MAX if invalid. buf should have 4 valid characters
utf8_next_char(uint8_t * s,uint32_t * ch)247 static NODISCARD size_t utf8_next_char(uint8_t *s, uint32_t *ch) {
248 uint8_t byte = *s;
249 // UTF8-1 = %x00-7F
250 // UTF8-2 = %xC2-DF UTF8-tail
251 // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
252 // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
253 // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
254 // %xF4 %x80-8F 2( UTF8-tail )
255 if (byte < 0x80) {
256 *ch = byte;
257 return 1;
258 } else if (byte < 0xc2) {
259 return SIZE_MAX;
260 } else if (byte < 0xe0) {
261 if (s[1] >= 0x80 && s[1] < 0xc0) {
262 *ch = ((byte&0x1f)<<6) + (s[1] & 0x3f);
263 return 2;
264 }
265 return SIZE_MAX;
266 } if (byte < 0xf0) {
267 if (!(s[1] >= 0x80 && s[1] < 0xc0) || !(s[2] >= 0x80 && s[2] < 0xc0)) {
268 return SIZE_MAX; // basic validation
269 }
270 if (byte == 0xe0 && s[1] < 0xa0) {
271 return SIZE_MAX; // overshort
272 }
273 if (byte == 0xed && s[1] >= 0xa0) {
274 return SIZE_MAX; // surrogate
275 }
276 *ch = ((byte&0x0f)<<12) + ((s[1] & 0x3f)<<6) + (s[2] & 0x3f);
277 return 3;
278 } else if (byte < 0xf5) {
279 if (!(s[1] >= 0x80 && s[1] < 0xc0) || !(s[2] >= 0x80 && s[2] < 0xc0) || !(s[3] >= 0x80 && s[3] < 0xc0)) {
280 return SIZE_MAX; // basic validation
281 }
282 if (byte == 0xf0 && s[1] < 0x90) {
283 return SIZE_MAX; // overshort
284 }
285 if (byte == 0xf4 && s[1] >= 0x90) {
286 return SIZE_MAX; // over max
287 }
288 *ch = ((byte&0x07)<<18) + ((s[1] & 0x3f)<<12) + ((s[2] & 0x3f)<<6) + (s[3]&0x3f);
289 return 4;
290 } else {
291 return SIZE_MAX;
292 }
293 }
294
validate_char(uint32_t n)295 static NODISCARD bool validate_char(uint32_t n) {
296 return ((n ^ 0xd800) - 0x800) < 0x110000 - 0x800;
297 }
298
299 #define SMALL_PUNYCODE_LEN 128
300
punycode_decode(const char * start,size_t ascii_len,const char * punycode_start,size_t punycode_len,uint32_t (* out_)[SMALL_PUNYCODE_LEN],size_t * out_len)301 static NODISCARD punycode_status punycode_decode(const char *start, size_t ascii_len, const char *punycode_start, size_t punycode_len, uint32_t (*out_)[SMALL_PUNYCODE_LEN], size_t *out_len) {
302 uint32_t *out = *out_;
303
304 if (punycode_len == 0) {
305 return PunycodeError;
306 }
307
308 if (ascii_len > SMALL_PUNYCODE_LEN) {
309 return PunycodeError;
310 }
311 for (size_t i = 0; i < ascii_len; i++) {
312 out[i] = start[i];
313 }
314 size_t len = ascii_len;
315
316 size_t base = 36, t_min = 1, t_max = 26, skew = 38, damp = 700, bias = 72, i = 0, n = 0x80;
317 for (;;) {
318 size_t delta = 0, w = 1, k = 0;
319 for (;;) {
320 k += base;
321 size_t biased = k < bias ? 0 : k - bias;
322 size_t t = MIN(MAX(biased, t_min), t_max);
323 size_t d;
324 if (punycode_len == 0) {
325 return PunycodeError;
326 }
327 char nx = *punycode_start++;
328 punycode_len--;
329 if ('a' <= nx && nx <= 'z') {
330 d = nx - 'a';
331 } else if ('0' <= nx && nx <= '9') {
332 d = 26 + (nx - '0');
333 } else {
334 return PunycodeError;
335 }
336 if (w == 0 || d > SIZE_MAX / w || d*w > SIZE_MAX - delta) {
337 return PunycodeError;
338 }
339 delta += d * w;
340 if (d < t) {
341 break;
342 }
343 if (base < t || w == 0 || (base - t) > SIZE_MAX / w) {
344 return PunycodeError;
345 }
346 w *= (base - t);
347 }
348
349 len += 1;
350 if (i > SIZE_MAX - delta) {
351 return PunycodeError;
352 }
353 i += delta;
354 if (n > SIZE_MAX - i / len) {
355 return PunycodeError;
356 }
357 n += i / len;
358 i %= len;
359
360 // char validation
361 if (n > UINT32_MAX || !validate_char((uint32_t)n)) {
362 return PunycodeError;
363 }
364
365 // insert new character
366 if (len > SMALL_PUNYCODE_LEN) {
367 return PunycodeError;
368 }
369 memmove(out + i + 1, out + i, (len - i - 1) * sizeof(uint32_t));
370 out[i] = (uint32_t)n;
371
372 // start i index at incremented position
373 i++;
374
375 // If there are no more deltas, decoding is complete.
376 if (punycode_len == 0) {
377 *out_len = len;
378 return PunycodeOk;
379 }
380
381 // Perform bias adaptation.
382 delta /= damp;
383 damp = 2;
384
385 delta += delta / len;
386 k = 0;
387 while (delta > ((base - t_min) * t_max) / 2) {
388 delta /= base - t_min;
389 k += base;
390 }
391 bias = k + ((base - t_min + 1) * delta) / (delta + skew);
392 }
393 }
394
395 struct ident {
396 const char *ascii_start;
397 size_t ascii_len;
398 const char *punycode_start;
399 size_t punycode_len;
400 };
401
display_ident(const char * ascii_start,size_t ascii_len,const char * punycode_start,size_t punycode_len,uint8_t * out,size_t * out_len)402 static NODISCARD overflow_status display_ident(const char *ascii_start, size_t ascii_len, const char *punycode_start, size_t punycode_len, uint8_t *out, size_t *out_len) {
403 uint32_t outbuf[SMALL_PUNYCODE_LEN];
404
405 size_t wide_len;
406 size_t out_buflen = *out_len;
407
408 if (punycode_len == 0) {
409 if (ascii_len > out_buflen) {
410 return OverflowOverflow;
411 }
412 memcpy(out, ascii_start, ascii_len);
413 *out_len = ascii_len;
414 } else if (punycode_decode(ascii_start, ascii_len, punycode_start, punycode_len, &outbuf, &wide_len) == PunycodeOk) {
415 size_t narrow_len = 0;
416 for (size_t i = 0; i < wide_len; i++) {
417 if (out_buflen - narrow_len < 4) {
418 return OverflowOverflow;
419 }
420 unsigned char *pos = &out[narrow_len];
421 narrow_len += code_to_utf8(pos, outbuf[i]);
422 }
423 *out_len = narrow_len;
424 } else {
425 size_t narrow_len = 0;
426 if (out_buflen < strlen("punycode{")) {
427 return OverflowOverflow;
428 }
429 memcpy(out, "punycode{", strlen("punycode{"));
430 narrow_len = strlen("punycode{");
431 if (ascii_len > 0) {
432 if (out_buflen - narrow_len < ascii_len || out_buflen - narrow_len - ascii_len < 1) {
433 return OverflowOverflow;
434 }
435 memcpy(out + narrow_len, ascii_start, ascii_len);
436 narrow_len += ascii_len;
437 out[narrow_len] = '-';
438 narrow_len++;
439 }
440 if (out_buflen - narrow_len < punycode_len || out_buflen - narrow_len - punycode_len < 1) {
441 return OverflowOverflow;
442 }
443 memcpy(out + narrow_len, punycode_start, punycode_len);
444 narrow_len += punycode_len;
445 out[narrow_len] = '}';
446 narrow_len++;
447 *out_len = narrow_len;
448 }
449
450 return OverflowOk;
451 }
452
try_parse_uint(const char * buf,size_t len,uint64_t * result)453 static NODISCARD bool try_parse_uint(const char *buf, size_t len, uint64_t *result) {
454 size_t cur = 0;
455 for(;cur < len && buf[cur] == '0';cur++);
456 uint64_t result_val = 0;
457 if (len - cur > 16) return false;
458 for(;cur < len;cur++) {
459 char c = buf[cur];
460 result_val <<= 4;
461 if ('0' <= c && c <= '9') {
462 result_val += c - '0';
463 } else if ('a' <= c && c <= 'f') {
464 result_val += 10 + (c - 'a');
465 } else {
466 return false;
467 }
468 }
469 *result = result_val;
470 return true;
471 }
472
dinibble2int(const char * buf,uint8_t * result)473 static NODISCARD bool dinibble2int(const char *buf, uint8_t *result) {
474 uint8_t result_val = 0;
475 for (int i = 0; i < 2; i++) {
476 char c = buf[i];
477 result_val <<= 4;
478 if ('0' <= c && c <= '9') {
479 result_val += c - '0';
480 } else if ('a' <= c && c <= 'f') {
481 result_val += 10 + (c - 'a');
482 } else {
483 return false;
484 }
485 }
486 *result = result_val;
487 return true;
488 }
489
490
491 typedef enum {
492 NtsOk = 0,
493 NtsOverflow = 1,
494 NtsInvalid = 2
495 } nibbles_to_string_status;
496
497 // '\u{10ffff}', +margin
498 #define ESCAPED_SIZE 12
499
char_to_string(uint32_t ch,uint8_t quote,bool first,char (* buf)[ESCAPED_SIZE])500 static NODISCARD size_t char_to_string(uint32_t ch, uint8_t quote, bool first, char (*buf)[ESCAPED_SIZE]) {
501 // encode the character
502 char *escaped_buf = *buf;
503 escaped_buf[0] = '\\';
504 size_t escaped_len = 2;
505 switch (ch) {
506 case '\0':
507 escaped_buf[1] = '0';
508 break;
509 case '\t':
510 escaped_buf[1] = 't';
511 break;
512 case '\r':
513 escaped_buf[1] = 'r';
514 break;
515 case '\n':
516 escaped_buf[1] = 'n';
517 break;
518 case '\\':
519 escaped_buf[1] = '\\';
520 break;
521 default:
522 if (ch == quote) {
523 escaped_buf[1] = ch;
524 } else if (!unicode_isprint(ch) || (first && unicode_isgraphemextend(ch))) {
525 int hexlen = snprintf(escaped_buf, ESCAPED_SIZE, "\\u{%x}", (unsigned int)ch);
526 if (hexlen < 0) {
527 return 0; // (snprintf shouldn't fail!)
528 }
529 escaped_len = hexlen;
530 } else {
531 // printable character
532 escaped_buf[0] = ch;
533 escaped_len = 1;
534 }
535 break;
536 }
537
538 return escaped_len;
539 }
540
541 // convert nibbles to a single/double-quoted string
nibbles_to_string(const char * buf,size_t len,uint8_t * out,size_t * out_len)542 static NODISCARD nibbles_to_string_status nibbles_to_string(const char *buf, size_t len, uint8_t *out, size_t *out_len) {
543 uint8_t quote = '"';
544 bool first = true;
545
546 if ((len % 2) != 0) {
547 return NtsInvalid; // odd number of nibbles
548 }
549
550 size_t cur_out_len = 0;
551
552 // write starting quote
553 if (out != NULL) {
554 cur_out_len = *out_len;
555 if (cur_out_len == 0) {
556 return NtsOverflow;
557 }
558 *out++ = quote;
559 cur_out_len--;
560 }
561
562 uint8_t conv_buf[4] = {0};
563 size_t conv_buf_len = 0;
564 while (len > 1 || conv_buf_len > 0) {
565 while (len > 1 && conv_buf_len < sizeof(conv_buf)) {
566 if (!dinibble2int(buf, &conv_buf[conv_buf_len])) {
567 return NtsInvalid;
568 }
569 conv_buf_len++;
570 buf += 2;
571 len -= 2;
572 }
573
574 // conv_buf is full here if possible, process 1 UTF-8 character
575 uint32_t ch = 0;
576 size_t consumed = utf8_next_char(conv_buf, &ch);
577 if (consumed > conv_buf_len) {
578 // either SIZE_MAX (invalid UTF-8) or finished input buffer and
579 // there are still bytes remaining, in both cases invalid
580 return NtsInvalid;
581 }
582
583 // "consume" the character
584 memmove(conv_buf, conv_buf+consumed, conv_buf_len-consumed);
585 conv_buf_len -= consumed;
586
587 char escaped_buf[ESCAPED_SIZE];
588 size_t escaped_len = char_to_string(ch, '"', first, &escaped_buf);
589 if (out != NULL) {
590 if (cur_out_len < escaped_len) {
591 return NtsOverflow;
592 }
593 memcpy(out, escaped_buf, escaped_len);
594 out += escaped_len;
595 cur_out_len -= escaped_len;
596 }
597 first = false;
598 }
599
600 // write ending quote
601 if (out != NULL) {
602 if (cur_out_len == 0) {
603 return NtsOverflow;
604 }
605 *out++ = quote;
606 cur_out_len--;
607 *out_len -= cur_out_len; // subtract remaining space to get used space
608 }
609
610 return NtsOk;
611 }
612
basic_type(uint8_t tag)613 static const char* basic_type(uint8_t tag) {
614 switch(tag) {
615 case 'b':
616 return "bool";
617 case 'c':
618 return "char";
619 case 'e':
620 return "str";
621 case 'u':
622 return "()";
623 case 'a':
624 return "i8";
625 case 's':
626 return "i16";
627 case 'l':
628 return "i32";
629 case 'x':
630 return "i64";
631 case 'n':
632 return "i128";
633 case 'i':
634 return "isize";
635 case 'h':
636 return "u8";
637 case 't':
638 return "u16";
639 case 'm':
640 return "u32";
641 case 'y':
642 return "u64";
643 case 'o':
644 return "u128";
645 case 'j':
646 return "usize";
647 case 'f':
648 return "f32";
649 case 'd':
650 return "f64";
651 case 'z':
652 return "!";
653 case 'p':
654 return "_";
655 case 'v':
656 return "...";
657 default:
658 return NULL;
659 }
660 }
661
parser_push_depth(struct parser * parser)662 static NODISCARD demangle_status parser_push_depth(struct parser *parser) {
663 parser->depth++;
664 if (parser->depth > MAX_DEPTH) {
665 return DemangleRecursed;
666 } else {
667 return DemangleOk;
668 }
669 }
670
parser_pop_depth(struct parser * parser)671 static demangle_status parser_pop_depth(struct parser *parser) {
672 parser->depth--;
673 return DemangleOk;
674 }
675
parser_peek(struct parser const * parser)676 static uint8_t parser_peek(struct parser const *parser) {
677 if (parser->next == parser->sym_len) {
678 return 0; // add a "pseudo nul terminator" to avoid peeking past the end of a symbol
679 } else {
680 return parser->sym[parser->next];
681 }
682 }
683
parser_eat(struct parser * parser,uint8_t ch)684 static bool parser_eat(struct parser *parser, uint8_t ch) {
685 if (parser_peek(parser) == ch) {
686 if (ch != 0) { // safety: make sure we don't skip past the NUL terminator
687 parser->next++;
688 }
689 return true;
690 } else {
691 return false;
692 }
693 }
694
parser_next(struct parser * parser)695 static uint8_t parser_next(struct parser *parser) {
696 // don't advance after end of input, and return an imaginary NUL terminator
697 if (parser->next == parser->sym_len) {
698 return 0;
699 } else {
700 return parser->sym[parser->next++];
701 }
702 }
703
parser_ch(struct parser * parser,uint8_t * next)704 static NODISCARD demangle_status parser_ch(struct parser *parser, uint8_t *next) {
705 // don't advance after end of input
706 if (parser->next == parser->sym_len) {
707 return DemangleInvalid;
708 } else {
709 *next = parser->sym[parser->next++];
710 return DemangleOk;
711 }
712 }
713
714 struct buf {
715 const char *start;
716 size_t len;
717 };
718
parser_hex_nibbles(struct parser * parser,struct buf * buf)719 static NODISCARD demangle_status parser_hex_nibbles(struct parser *parser, struct buf *buf) {
720 size_t start = parser->next;
721 for (;;) {
722 uint8_t ch = parser_next(parser);
723 if (ch == '_') {
724 break;
725 }
726 if (!(('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f'))) {
727 return DemangleInvalid;
728 }
729 }
730 buf->start = parser->sym + start;
731 buf->len = parser->next - start - 1; // skip final _
732 return DemangleOk;
733 }
734
parser_digit_10(struct parser * parser,uint8_t * out)735 static NODISCARD demangle_status parser_digit_10(struct parser *parser, uint8_t *out) {
736 uint8_t ch = parser_peek(parser);
737 if ('0' <= ch && ch <= '9') {
738 *out = ch - '0';
739 parser->next++;
740 return DemangleOk;
741 } else {
742 return DemangleInvalid;
743 }
744 }
745
parser_digit_62(struct parser * parser,uint64_t * out)746 static NODISCARD demangle_status parser_digit_62(struct parser *parser, uint64_t *out) {
747 uint8_t ch = parser_peek(parser);
748 if ('0' <= ch && ch <= '9') {
749 *out = ch - '0';
750 parser->next++;
751 return DemangleOk;
752 } else if ('a' <= ch && ch <= 'z') {
753 *out = 10 + (ch - 'a');
754 parser->next++;
755 return DemangleOk;
756 } else if ('A' <= ch && ch <= 'Z') {
757 *out = 10 + 26 + (ch - 'A');
758 parser->next++;
759 return DemangleOk;
760 } else {
761 return DemangleInvalid;
762 }
763 }
764
parser_integer_62(struct parser * parser,uint64_t * out)765 static NODISCARD demangle_status parser_integer_62(struct parser *parser, uint64_t *out) {
766 if (parser_eat(parser, '_')) {
767 *out = 0;
768 return DemangleOk;
769 }
770
771 uint64_t x = 0;
772 demangle_status status;
773 while (!parser_eat(parser, '_')) {
774 uint64_t d;
775 if ((status = parser_digit_62(parser, &d)) != DemangleOk) {
776 return status;
777 }
778 if (x > UINT64_MAX / 62) {
779 return DemangleInvalid;
780 }
781 x *= 62;
782 if (x > UINT64_MAX - d) {
783 return DemangleInvalid;
784 }
785 x += d;
786 }
787 if (x == UINT64_MAX) {
788 return DemangleInvalid;
789 }
790 *out = x + 1;
791 return DemangleOk;
792 }
793
parser_opt_integer_62(struct parser * parser,uint8_t tag,uint64_t * out)794 static NODISCARD demangle_status parser_opt_integer_62(struct parser *parser, uint8_t tag, uint64_t *out) {
795 if (!parser_eat(parser, tag)) {
796 *out = 0;
797 return DemangleOk;
798 }
799
800 demangle_status status;
801 if ((status = parser_integer_62(parser, out)) != DemangleOk) {
802 return status;
803 }
804 if (*out == UINT64_MAX) {
805 return DemangleInvalid;
806 }
807 *out = *out + 1;
808 return DemangleOk;
809 }
810
parser_disambiguator(struct parser * parser,uint64_t * out)811 static NODISCARD demangle_status parser_disambiguator(struct parser *parser, uint64_t *out) {
812 return parser_opt_integer_62(parser, 's', out);
813 }
814
815 typedef uint8_t parser_namespace_type;
816
parser_namespace(struct parser * parser,parser_namespace_type * out)817 static NODISCARD demangle_status parser_namespace(struct parser *parser, parser_namespace_type *out) {
818 uint8_t next = parser_next(parser);
819 if ('A' <= next && next <= 'Z') {
820 *out = next;
821 return DemangleOk;
822 } else if ('a' <= next && next <= 'z') {
823 *out = 0;
824 return DemangleOk;
825 } else {
826 return DemangleInvalid;
827 }
828 }
829
parser_backref(struct parser * parser,struct parser * out)830 static NODISCARD demangle_status parser_backref(struct parser *parser, struct parser *out) {
831 size_t start = parser->next;
832 if (start == 0) {
833 return DemangleBug;
834 }
835 size_t s_start = start - 1;
836 uint64_t i;
837 demangle_status status = parser_integer_62(parser, &i);
838 if (status != DemangleOk) {
839 return status;
840 }
841 if (i >= s_start) {
842 return DemangleInvalid;
843 }
844 struct parser res = {
845 .sym = parser->sym,
846 .sym_len = parser->sym_len,
847 .next = (size_t)i,
848 .depth = parser->depth
849 };
850 status = parser_push_depth(&res);
851 if (status != DemangleOk) {
852 return status;
853 }
854 *out = res;
855 return DemangleOk;
856 }
857
parser_ident(struct parser * parser,struct ident * out)858 static NODISCARD demangle_status parser_ident(struct parser *parser, struct ident *out) {
859 bool is_punycode = parser_eat(parser, 'u');
860 size_t len;
861 uint8_t d;
862 demangle_status status = parser_digit_10(parser, &d);
863 len = d;
864 if (status != DemangleOk) {
865 return status;
866 }
867 if (len) {
868 for (;;) {
869 status = parser_digit_10(parser, &d);
870 if (status != DemangleOk) {
871 break;
872 }
873 if (len > SIZE_MAX / 10) {
874 return DemangleInvalid;
875 }
876 len *= 10;
877 if (len > SIZE_MAX - d) {
878 return DemangleInvalid;
879 }
880 len += d;
881 }
882 }
883
884 // Skip past the optional `_` separator.
885 parser_eat(parser, '_');
886
887 size_t start = parser->next;
888 if (parser->sym_len - parser->next < len) {
889 return DemangleInvalid;
890 }
891 parser->next += len;
892
893 const char *ident = &parser->sym[start];
894
895 if (is_punycode) {
896 const char *underscore = demangle_memrchr(ident, '_', (size_t)len);
897 if (underscore == NULL) {
898 *out = (struct ident){
899 .ascii_start="",
900 .ascii_len=0,
901 .punycode_start=ident,
902 .punycode_len=len
903 };
904 } else {
905 size_t ascii_len = underscore - ident;
906 // ascii_len <= len - 1 since `_` is in the first len bytes
907 size_t punycode_len = len - 1 - ascii_len;
908 *out = (struct ident){
909 .ascii_start=ident,
910 .ascii_len=ascii_len,
911 .punycode_start=underscore + 1,
912 .punycode_len=punycode_len
913 };
914 }
915 if (out->punycode_len == 0) {
916 return DemangleInvalid;
917 }
918 return DemangleOk;
919 } else {
920 *out = (struct ident) {
921 .ascii_start=ident,
922 .ascii_len=(size_t)len,
923 .punycode_start="",
924 .punycode_len=0,
925 };
926 return DemangleOk;
927 }
928 }
929
930 #define INVALID_SYNTAX "{invalid syntax}"
931
demangle_error_message(demangle_status status)932 static const char *demangle_error_message(demangle_status status) {
933 switch (status) {
934 case DemangleInvalid:
935 return INVALID_SYNTAX;
936 case DemangleBug:
937 return "{bug}";
938 case DemangleRecursed:
939 return "{recursion limit reached}";
940 default:
941 return "{unknown error}";
942 }
943 }
944
945 #define PRINT(print_fn) \
946 do { \
947 if ((print_fn) == OverflowOverflow) { \
948 return OverflowOverflow; \
949 } \
950 } while(0)
951
952 #define PRINT_CH(printer, s) PRINT(printer_print_ch((printer), (s)))
953 #define PRINT_STR(printer, s) PRINT(printer_print_str((printer), (s)))
954 #define PRINT_U64(printer, s) PRINT(printer_print_u64((printer), (s)))
955 #define PRINT_IDENT(printer, s) PRINT(printer_print_ident((printer), (s)))
956
957 #define INVALID(printer) \
958 do { \
959 PRINT_STR((printer), INVALID_SYNTAX); \
960 (printer)->status = DemangleInvalid; \
961 return OverflowOk; \
962 } while(0)
963
964 #define PARSE(printer, method, ...) \
965 do { \
966 if ((printer)->status != DemangleOk) { \
967 PRINT_STR((printer), "?"); \
968 return OverflowOk; \
969 } else { \
970 demangle_status _parse_status = method(&(printer)->parser, ## __VA_ARGS__); \
971 if (_parse_status != DemangleOk) { \
972 PRINT_STR((printer), demangle_error_message(_parse_status)); \
973 (printer)->status = _parse_status; \
974 return OverflowOk; \
975 } \
976 } \
977 } while(0)
978
979 #define PRINT_SEP_LIST(printer, body, sep) \
980 do { \
981 size_t _sep_list_i; \
982 PRINT_SEP_LIST_COUNT(printer, _sep_list_i, body, sep); \
983 } while(0)
984
985 #define PRINT_SEP_LIST_COUNT(printer, count, body, sep) \
986 do { \
987 count = 0; \
988 while ((printer)->status == DemangleOk && !printer_eat((printer), 'E')) { \
989 if (count > 0) { PRINT_STR(printer, sep); } \
990 body; \
991 count++; \
992 } \
993 } while(0)
994
printer_eat(struct printer * printer,uint8_t b)995 static bool printer_eat(struct printer *printer, uint8_t b) {
996 if (printer->status != DemangleOk) {
997 return false;
998 }
999
1000 return parser_eat(&printer->parser, b);
1001 }
1002
printer_pop_depth(struct printer * printer)1003 static void printer_pop_depth(struct printer *printer) {
1004 if (printer->status == DemangleOk) {
1005 parser_pop_depth(&printer->parser);
1006 }
1007 }
1008
printer_print_buf(struct printer * printer,const char * start,size_t len)1009 static NODISCARD overflow_status printer_print_buf(struct printer *printer, const char *start, size_t len) {
1010 if (printer->out == NULL) {
1011 return OverflowOk;
1012 }
1013 if (printer->out_len < len) {
1014 return OverflowOverflow;
1015 }
1016
1017 memcpy(printer->out, start, len);
1018 printer->out += len;
1019 printer->out_len -= len;
1020 return OverflowOk;
1021 }
1022
printer_print_str(struct printer * printer,const char * buf)1023 static NODISCARD overflow_status printer_print_str(struct printer *printer, const char *buf) {
1024 return printer_print_buf(printer, buf, strlen(buf));
1025 }
1026
printer_print_ch(struct printer * printer,char ch)1027 static NODISCARD overflow_status printer_print_ch(struct printer *printer, char ch) {
1028 return printer_print_buf(printer, &ch, 1);
1029 }
1030
printer_print_u64(struct printer * printer,uint64_t n)1031 static NODISCARD overflow_status printer_print_u64(struct printer *printer, uint64_t n) {
1032 char buf[32] = {0};
1033 sprintf(buf, "%llu", (unsigned long long)n); // printing uint64 uses 21 < 32 chars
1034 return printer_print_str(printer, buf);
1035 }
1036
printer_print_ident(struct printer * printer,struct ident * ident)1037 static NODISCARD overflow_status printer_print_ident(struct printer *printer, struct ident *ident) {
1038 if (printer->out == NULL) {
1039 return OverflowOk;
1040 }
1041
1042 size_t out_len = printer->out_len;
1043 overflow_status status;
1044 if ((status = display_ident(ident->ascii_start, ident->ascii_len, ident->punycode_start, ident->punycode_len, (uint8_t*)printer->out, &out_len)) != OverflowOk) {
1045 return status;
1046 }
1047 printer->out += out_len;
1048 printer->out_len -= out_len;
1049 return OverflowOk;
1050 }
1051
1052 typedef overflow_status (*printer_fn)(struct printer *printer);
1053 typedef overflow_status (*backref_fn)(struct printer *printer, bool *arg);
1054
printer_print_backref(struct printer * printer,backref_fn func,bool * arg)1055 static NODISCARD overflow_status printer_print_backref(struct printer *printer, backref_fn func, bool *arg) {
1056 struct parser backref;
1057 PARSE(printer, parser_backref, &backref);
1058
1059 if (printer->out == NULL) {
1060 return OverflowOk;
1061 }
1062
1063 struct parser orig_parser = printer->parser;
1064 demangle_status orig_status = printer->status; // fixme not sure this is needed match for Ok on the Rust side
1065 printer->parser = backref;
1066 printer->status = DemangleOk;
1067 overflow_status status = func(printer, arg);
1068 printer->parser = orig_parser;
1069 printer->status = orig_status;
1070
1071 return status;
1072 }
1073
printer_print_lifetime_from_index(struct printer * printer,uint64_t lt)1074 static NODISCARD overflow_status printer_print_lifetime_from_index(struct printer *printer, uint64_t lt) {
1075 // Bound lifetimes aren't tracked when skipping printing.
1076 if (printer->out == NULL) {
1077 return OverflowOk;
1078 }
1079
1080 PRINT_STR(printer, "'");
1081 if (lt == 0) {
1082 PRINT_STR(printer, "_");
1083 return OverflowOk;
1084 }
1085
1086 if (printer->bound_lifetime_depth < lt) {
1087 INVALID(printer);
1088 } else {
1089 uint64_t depth = printer->bound_lifetime_depth - lt;
1090 if (depth < 26) {
1091 PRINT_CH(printer, 'a' + depth);
1092 } else {
1093 PRINT_STR(printer, "_");
1094 PRINT_U64(printer, depth);
1095 }
1096
1097 return OverflowOk;
1098 }
1099 }
1100
printer_in_binder(struct printer * printer,printer_fn func)1101 static NODISCARD overflow_status printer_in_binder(struct printer *printer, printer_fn func) {
1102 uint64_t bound_lifetimes;
1103 PARSE(printer, parser_opt_integer_62, 'G', &bound_lifetimes);
1104
1105 // Don't track bound lifetimes when skipping printing.
1106 if (printer->out == NULL) {
1107 return func(printer);
1108 }
1109
1110 if (bound_lifetimes > 0) {
1111 PRINT_STR(printer, "for<");
1112 for (uint64_t i = 0; i < bound_lifetimes; i++) {
1113 if (i > 0) {
1114 PRINT_STR(printer, ", ");
1115 }
1116 printer->bound_lifetime_depth++;
1117 PRINT(printer_print_lifetime_from_index(printer, 1));
1118 }
1119 PRINT_STR(printer, "> ");
1120 }
1121
1122 overflow_status r = func(printer);
1123 printer->bound_lifetime_depth -= bound_lifetimes;
1124
1125 return r;
1126 }
1127
printer_print_generic_arg(struct printer * printer)1128 static NODISCARD overflow_status printer_print_generic_arg(struct printer *printer) {
1129 if (printer_eat(printer, 'L')) {
1130 uint64_t lt;
1131 PARSE(printer, parser_integer_62, <);
1132 return printer_print_lifetime_from_index(printer, lt);
1133 } else if (printer_eat(printer, 'K')) {
1134 return printer_print_const(printer, false);
1135 } else {
1136 return printer_print_type(printer);
1137 }
1138 }
1139
printer_print_generic_args(struct printer * printer)1140 static NODISCARD overflow_status printer_print_generic_args(struct printer *printer) {
1141 PRINT_STR(printer, "<");
1142 PRINT_SEP_LIST(printer, PRINT(printer_print_generic_arg(printer)), ", ");
1143 PRINT_STR(printer, ">");
1144 return OverflowOk;
1145 }
1146
printer_print_path_out_of_value(struct printer * printer,bool * _arg)1147 static NODISCARD overflow_status printer_print_path_out_of_value(struct printer *printer, bool *_arg) {
1148 (void)_arg;
1149 return printer_print_path(printer, false);
1150 }
1151
printer_print_path_in_value(struct printer * printer,bool * _arg)1152 static NODISCARD overflow_status printer_print_path_in_value(struct printer *printer, bool *_arg) {
1153 (void)_arg;
1154 return printer_print_path(printer, true);
1155 }
1156
printer_print_path(struct printer * printer,bool in_value)1157 static NODISCARD overflow_status printer_print_path(struct printer *printer, bool in_value) {
1158 PARSE(printer, parser_push_depth);
1159 uint8_t tag;
1160 PARSE(printer, parser_ch, &tag);
1161
1162 overflow_status st;
1163 uint64_t dis;
1164 struct ident name;
1165 parser_namespace_type ns;
1166 char *orig_out;
1167
1168 switch(tag) {
1169 case 'C':
1170 PARSE(printer, parser_disambiguator, &dis);
1171 PARSE(printer, parser_ident, &name);
1172
1173 PRINT_IDENT(printer, &name);
1174
1175 if (printer->out != NULL && !printer->alternate && dis != 0) {
1176 PRINT_STR(printer, "[");
1177 char buf[24] = {0};
1178 sprintf(buf, "%llx", (unsigned long long)dis);
1179 PRINT_STR(printer, buf);
1180 PRINT_STR(printer, "]");
1181 }
1182 break;
1183 case 'N':
1184 PARSE(printer, parser_namespace, &ns);
1185 if ((st = printer_print_path(printer, in_value)) != OverflowOk) {
1186 return st;
1187 }
1188
1189 // HACK(eddyb) if the parser is already marked as having errored,
1190 // `parse!` below will print a `?` without its preceding `::`
1191 // (because printing the `::` is skipped in certain conditions,
1192 // i.e. a lowercase namespace with an empty identifier),
1193 // so in order to get `::?`, the `::` has to be printed here.
1194 if (printer->status != DemangleOk) {
1195 PRINT_STR(printer, "::");
1196 }
1197
1198 PARSE(printer, parser_disambiguator, &dis);
1199 PARSE(printer, parser_ident, &name);
1200 // Special namespace, like closures and shims
1201 if (ns) {
1202 PRINT_STR(printer, "::{");
1203 if (ns == 'C') {
1204 PRINT_STR(printer, "closure");
1205 } else if (ns == 'S') {
1206 PRINT_STR(printer, "shim");
1207 } else {
1208 PRINT_CH(printer, ns);
1209 }
1210 if (name.ascii_len != 0 || name.punycode_len != 0) {
1211 PRINT_STR(printer, ":");
1212 PRINT_IDENT(printer, &name);
1213 }
1214 PRINT_STR(printer, "#");
1215 PRINT_U64(printer, dis);
1216 PRINT_STR(printer, "}");
1217 } else {
1218 // Implementation-specific/unspecified namespaces
1219 if (name.ascii_len != 0 || name.punycode_len != 0) {
1220 PRINT_STR(printer, "::");
1221 PRINT_IDENT(printer, &name);
1222 }
1223 }
1224 break;
1225 case 'M':
1226 case 'X':
1227 // for impls, ignore the impls own path
1228 PARSE(printer, parser_disambiguator, &dis);
1229 orig_out = printer->out;
1230 printer->out = NULL;
1231 PRINT(printer_print_path(printer, false));
1232 printer->out = orig_out;
1233
1234 // fallthru
1235 case 'Y':
1236 PRINT_STR(printer, "<");
1237 PRINT(printer_print_type(printer));
1238 if (tag != 'M') {
1239 PRINT_STR(printer, " as ");
1240 PRINT(printer_print_path(printer, false));
1241 }
1242 PRINT_STR(printer, ">");
1243 break;
1244 case 'I':
1245 PRINT(printer_print_path(printer, in_value));
1246 if (in_value) {
1247 PRINT_STR(printer, "::");
1248 }
1249 PRINT(printer_print_generic_args(printer));
1250 break;
1251 case 'B':
1252 PRINT(printer_print_backref(printer, in_value ? printer_print_path_in_value : printer_print_path_out_of_value, NULL));
1253 break;
1254 default:
1255 INVALID(printer);
1256 break;
1257 }
1258
1259 printer_pop_depth(printer);
1260 return OverflowOk;
1261 }
1262
printer_print_const_uint(struct printer * printer,uint8_t tag)1263 static NODISCARD overflow_status printer_print_const_uint(struct printer *printer, uint8_t tag) {
1264 struct buf hex;
1265 PARSE(printer, parser_hex_nibbles, &hex);
1266
1267 uint64_t val;
1268 if (try_parse_uint(hex.start, hex.len, &val)) {
1269 PRINT_U64(printer, val);
1270 } else {
1271 PRINT_STR(printer, "0x");
1272 PRINT(printer_print_buf(printer, hex.start, hex.len));
1273 }
1274
1275 if (printer->out != NULL && !printer->alternate) {
1276 const char *ty = basic_type(tag);
1277 if (/* safety */ ty != NULL) {
1278 PRINT_STR(printer, ty);
1279 }
1280 }
1281
1282 return OverflowOk;
1283 }
1284
printer_print_const_str_literal(struct printer * printer)1285 static NODISCARD overflow_status printer_print_const_str_literal(struct printer *printer) {
1286 struct buf hex;
1287 PARSE(printer, parser_hex_nibbles, &hex);
1288
1289 size_t out_len = SIZE_MAX;
1290 nibbles_to_string_status nts_status = nibbles_to_string(hex.start, hex.len, NULL, &out_len);
1291 switch (nts_status) {
1292 case NtsOk:
1293 if (printer->out != NULL) {
1294 out_len = printer->out_len;
1295 nts_status = nibbles_to_string(hex.start, hex.len, (uint8_t*)printer->out, &out_len);
1296 if (nts_status != NtsOk) {
1297 return OverflowOverflow;
1298 }
1299 printer->out += out_len;
1300 printer->out_len -= out_len;
1301 }
1302 return OverflowOk;
1303 case NtsOverflow:
1304 // technically if there is a string of size `SIZE_MAX/6` whose escaped version overflows
1305 // SIZE_MAX but has an invalid char, this will be a "fake" overflow. In practice,
1306 // that is not going to happen and a fuzzer will not generate strings of this length.
1307 return OverflowOverflow;
1308 case NtsInvalid:
1309 default:
1310 INVALID(printer);
1311 }
1312 }
1313
printer_print_const_struct(struct printer * printer)1314 static NODISCARD overflow_status printer_print_const_struct(struct printer *printer) {
1315 uint64_t dis;
1316 struct ident name;
1317 PARSE(printer, parser_disambiguator, &dis);
1318 PARSE(printer, parser_ident, &name);
1319 PRINT_IDENT(printer, &name);
1320 PRINT_STR(printer, ": ");
1321 return printer_print_const(printer, true);
1322 }
1323
printer_print_const_out_of_value(struct printer * printer,bool * _arg)1324 static NODISCARD overflow_status printer_print_const_out_of_value(struct printer *printer, bool *_arg) {
1325 (void)_arg;
1326 return printer_print_const(printer, false);
1327 }
1328
printer_print_const_in_value(struct printer * printer,bool * _arg)1329 static NODISCARD overflow_status printer_print_const_in_value(struct printer *printer, bool *_arg) {
1330 (void)_arg;
1331 return printer_print_const(printer, true);
1332 }
1333
printer_print_const(struct printer * printer,bool in_value)1334 static NODISCARD overflow_status printer_print_const(struct printer *printer, bool in_value) {
1335 uint8_t tag;
1336
1337 PARSE(printer, parser_ch, &tag);
1338 PARSE(printer, parser_push_depth);
1339
1340 struct buf hex;
1341 uint64_t val;
1342 size_t count;
1343
1344 bool opened_brace = false;
1345 #define OPEN_BRACE_IF_OUTSIDE_EXPR \
1346 do { if (!in_value) { \
1347 opened_brace = true; \
1348 PRINT_STR(printer, "{"); \
1349 } } while(0)
1350
1351 switch(tag) {
1352 case 'p':
1353 PRINT_STR(printer, "_");
1354 break;
1355 // Primitive leaves with hex-encoded values (see `basic_type`).
1356 case 'a':
1357 case 's':
1358 case 'l':
1359 case 'x':
1360 case 'n':
1361 case 'i':
1362 if (printer_eat(printer, 'n')) {
1363 PRINT_STR(printer, "-");
1364 }
1365 /* fallthrough */
1366 case 'h':
1367 case 't':
1368 case 'm':
1369 case 'y':
1370 case 'o':
1371 case 'j':
1372 PRINT(printer_print_const_uint(printer, tag));
1373 break;
1374 case 'b':
1375 PARSE(printer, parser_hex_nibbles, &hex);
1376 if (try_parse_uint(hex.start, hex.len, &val)) {
1377 if (val == 0) {
1378 PRINT_STR(printer, "false");
1379 } else if (val == 1) {
1380 PRINT_STR(printer, "true");
1381 } else {
1382 INVALID(printer);
1383 }
1384 } else {
1385 INVALID(printer);
1386 }
1387 break;
1388 case 'c':
1389 PARSE(printer, parser_hex_nibbles, &hex);
1390 if (try_parse_uint(hex.start, hex.len, &val)
1391 && val < UINT32_MAX
1392 && validate_char((uint32_t)val))
1393 {
1394 char escaped_buf[ESCAPED_SIZE];
1395 size_t escaped_size = char_to_string((uint32_t)val, '\'', true, &escaped_buf);
1396
1397 PRINT_STR(printer, "'");
1398 PRINT(printer_print_buf(printer, escaped_buf, escaped_size));
1399 PRINT_STR(printer, "'");
1400 } else {
1401 INVALID(printer);
1402 }
1403 break;
1404 case 'e':
1405 OPEN_BRACE_IF_OUTSIDE_EXPR;
1406 PRINT_STR(printer, "*");
1407 PRINT(printer_print_const_str_literal(printer));
1408 break;
1409 case 'R':
1410 case 'Q':
1411 if (tag == 'R' && printer_eat(printer, 'e')) {
1412 PRINT(printer_print_const_str_literal(printer));
1413 } else {
1414 OPEN_BRACE_IF_OUTSIDE_EXPR;
1415 PRINT_STR(printer, "&");
1416 if (tag != 'R') {
1417 PRINT_STR(printer, "mut ");
1418 }
1419 PRINT(printer_print_const(printer, true));
1420 }
1421 break;
1422 case 'A':
1423 OPEN_BRACE_IF_OUTSIDE_EXPR;
1424 PRINT_STR(printer, "[");
1425 PRINT_SEP_LIST(printer, PRINT(printer_print_const(printer, true)), ", ");
1426 PRINT_STR(printer, "]");
1427 break;
1428 case 'T':
1429 OPEN_BRACE_IF_OUTSIDE_EXPR;
1430 PRINT_STR(printer, "(");
1431 PRINT_SEP_LIST_COUNT(printer, count, PRINT(printer_print_const(printer, true)), ", ");
1432 if (count == 1) {
1433 PRINT_STR(printer, ",");
1434 }
1435 PRINT_STR(printer, ")");
1436 break;
1437 case 'V':
1438 OPEN_BRACE_IF_OUTSIDE_EXPR;
1439 PRINT(printer_print_path(printer, true));
1440 PARSE(printer, parser_ch, &tag);
1441 switch(tag) {
1442 case 'U':
1443 break;
1444 case 'T':
1445 PRINT_STR(printer, "(");
1446 PRINT_SEP_LIST(printer, PRINT(printer_print_const(printer, true)), ", ");
1447 PRINT_STR(printer, ")");
1448 break;
1449 case 'S':
1450 PRINT_STR(printer, " { ");
1451 PRINT_SEP_LIST(printer, PRINT(printer_print_const_struct(printer)), ", ");
1452 PRINT_STR(printer, " }");
1453 break;
1454 default:
1455 INVALID(printer);
1456 }
1457 break;
1458 case 'B':
1459 PRINT(printer_print_backref(printer, in_value ? printer_print_const_in_value : printer_print_const_out_of_value, NULL));
1460 break;
1461 default:
1462 INVALID(printer);
1463 }
1464 #undef OPEN_BRACE_IF_OUTSIDE_EXPR
1465
1466 if (opened_brace) {
1467 PRINT_STR(printer, "}");
1468 }
1469 printer_pop_depth(printer);
1470
1471 return OverflowOk;
1472 }
1473
1474 /// A trait in a trait object may have some "existential projections"
1475 /// (i.e. associated type bindings) after it, which should be printed
1476 /// in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`.
1477 /// To this end, this method will keep the `<...>` of an 'I' path
1478 /// open, by omitting the `>`, and return `Ok(true)` in that case.
printer_print_maybe_open_generics(struct printer * printer,bool * open)1479 static NODISCARD overflow_status printer_print_maybe_open_generics(struct printer *printer, bool *open) {
1480 if (printer_eat(printer, 'B')) {
1481 // NOTE(eddyb) the closure may not run if printing is being skipped,
1482 // but in that case the returned boolean doesn't matter.
1483 *open = false;
1484 return printer_print_backref(printer, printer_print_maybe_open_generics, open);
1485 } else if(printer_eat(printer, 'I')) {
1486 PRINT(printer_print_path(printer, false));
1487 PRINT_STR(printer, "<");
1488 PRINT_SEP_LIST(printer, PRINT(printer_print_generic_arg(printer)), ", ");
1489 *open = true;
1490 return OverflowOk;
1491 } else {
1492 PRINT(printer_print_path(printer, false));
1493 *open = false;
1494 return OverflowOk;
1495 }
1496 }
1497
printer_print_dyn_trait(struct printer * printer)1498 static NODISCARD overflow_status printer_print_dyn_trait(struct printer *printer) {
1499 bool open;
1500 PRINT(printer_print_maybe_open_generics(printer, &open));
1501
1502 while (printer_eat(printer, 'p')) {
1503 if (!open) {
1504 PRINT_STR(printer, "<");
1505 open = true;
1506 } else {
1507 PRINT_STR(printer, ", ");
1508 }
1509
1510 struct ident name;
1511 PARSE(printer, parser_ident, &name);
1512
1513 PRINT_IDENT(printer, &name);
1514 PRINT_STR(printer, " = ");
1515 PRINT(printer_print_type(printer));
1516 }
1517
1518 if (open) {
1519 PRINT_STR(printer, ">");
1520 }
1521
1522 return OverflowOk;
1523 }
1524
printer_print_object_bounds(struct printer * printer)1525 static NODISCARD overflow_status printer_print_object_bounds(struct printer *printer) {
1526 PRINT_SEP_LIST(printer, PRINT(printer_print_dyn_trait(printer)), " + ");
1527 return OverflowOk;
1528 }
1529
printer_print_function_type(struct printer * printer)1530 static NODISCARD overflow_status printer_print_function_type(struct printer *printer) {
1531 bool is_unsafe = printer_eat(printer, 'U');
1532 const char *abi;
1533 size_t abi_len;
1534 if (printer_eat(printer, 'K')) {
1535 if (printer_eat(printer, 'C')) {
1536 abi = "C";
1537 abi_len = 1;
1538 } else {
1539 struct ident abi_ident;
1540 PARSE(printer, parser_ident, &abi_ident);
1541 if (abi_ident.ascii_len == 0 || abi_ident.punycode_len != 0) {
1542 INVALID(printer);
1543 }
1544 abi = abi_ident.ascii_start;
1545 abi_len = abi_ident.ascii_len;
1546 }
1547 } else {
1548 abi = NULL;
1549 abi_len = 0;
1550 }
1551
1552 if (is_unsafe) {
1553 PRINT_STR(printer, "unsafe ");
1554 }
1555
1556 if (abi != NULL) {
1557 PRINT_STR(printer, "extern \"");
1558
1559 // replace _ with -
1560 while (abi_len > 0) {
1561 const char *minus = memchr(abi, '_', abi_len);
1562 if (minus == NULL) {
1563 PRINT(printer_print_buf(printer, (const char*)abi, abi_len));
1564 break;
1565 } else {
1566 size_t space_to_minus = minus - abi;
1567 PRINT(printer_print_buf(printer, (const char*)abi, space_to_minus));
1568 PRINT_STR(printer, "-");
1569 abi = minus + 1;
1570 abi_len -= (space_to_minus + 1);
1571 }
1572 }
1573
1574 PRINT_STR(printer, "\" ");
1575 }
1576
1577 PRINT_STR(printer, "fn(");
1578 PRINT_SEP_LIST(printer, PRINT(printer_print_type(printer)), ", ");
1579 PRINT_STR(printer, ")");
1580
1581 if (printer_eat(printer, 'u')) {
1582 // Skip printing the return type if it's 'u', i.e. `()`.
1583 } else {
1584 PRINT_STR(printer, " -> ");
1585 PRINT(printer_print_type(printer));
1586 }
1587
1588 return OverflowOk;
1589 }
1590
printer_print_type_backref(struct printer * printer,bool * _arg)1591 static NODISCARD overflow_status printer_print_type_backref(struct printer *printer, bool *_arg) {
1592 (void)_arg;
1593 return printer_print_type(printer);
1594 }
1595
printer_print_type(struct printer * printer)1596 static NODISCARD overflow_status printer_print_type(struct printer *printer) {
1597 uint8_t tag;
1598 PARSE(printer, parser_ch, &tag);
1599
1600 const char *basic_ty = basic_type(tag);
1601 if (basic_ty) {
1602 return printer_print_str(printer, basic_ty);
1603 }
1604
1605 uint64_t count;
1606 uint64_t lt;
1607
1608 PARSE(printer, parser_push_depth);
1609 switch (tag) {
1610 case 'R':
1611 case 'Q':
1612 PRINT_STR(printer, "&");
1613 if (printer_eat(printer, 'L')) {
1614 PARSE(printer, parser_integer_62, <);
1615 if (lt != 0) {
1616 PRINT(printer_print_lifetime_from_index(printer, lt));
1617 PRINT_STR(printer, " ");
1618 }
1619 }
1620 if (tag != 'R') {
1621 PRINT_STR(printer, "mut ");
1622 }
1623 PRINT(printer_print_type(printer));
1624 break;
1625 case 'P':
1626 case 'O':
1627 PRINT_STR(printer, "*");
1628 if (tag != 'P') {
1629 PRINT_STR(printer, "mut ");
1630 } else {
1631 PRINT_STR(printer, "const ");
1632 }
1633 PRINT(printer_print_type(printer));
1634 break;
1635 case 'A':
1636 case 'S':
1637 PRINT_STR(printer, "[");
1638 PRINT(printer_print_type(printer));
1639 if (tag == 'A') {
1640 PRINT_STR(printer, "; ");
1641 PRINT(printer_print_const(printer, true));
1642 }
1643 PRINT_STR(printer, "]");
1644 break;
1645 case 'T':
1646 PRINT_STR(printer, "(");
1647 PRINT_SEP_LIST_COUNT(printer, count, PRINT(printer_print_type(printer)), ", ");
1648 if (count == 1) {
1649 PRINT_STR(printer, ",");
1650 }
1651 PRINT_STR(printer, ")");
1652 break;
1653 case 'F':
1654 PRINT(printer_in_binder(printer, printer_print_function_type));
1655 break;
1656 case 'D':
1657 PRINT_STR(printer, "dyn ");
1658 PRINT(printer_in_binder(printer, printer_print_object_bounds));
1659
1660 if (!printer_eat(printer, 'L')) {
1661 INVALID(printer);
1662 }
1663 PARSE(printer, parser_integer_62, <);
1664
1665 if (lt != 0) {
1666 PRINT_STR(printer, " + ");
1667 PRINT(printer_print_lifetime_from_index(printer, lt));
1668 }
1669 break;
1670 case 'B':
1671 PRINT(printer_print_backref(printer, printer_print_type_backref, NULL));
1672 break;
1673 default:
1674 // Go back to the tag, so `print_path` also sees it.
1675 if (printer->status == DemangleOk && /* safety */ printer->parser.next > 0) {
1676 printer->parser.next--;
1677 }
1678 PRINT(printer_print_path(printer, false));
1679 }
1680
1681 printer_pop_depth(printer);
1682 return OverflowOk;
1683 }
1684
rust_demangle_legacy_demangle(const char * s,size_t s_len,struct demangle_legacy * res,const char ** rest)1685 NODISCARD static demangle_status rust_demangle_legacy_demangle(const char *s, size_t s_len, struct demangle_legacy *res, const char **rest)
1686 {
1687 if (s_len > strlen(s)) {
1688 // s_len only exists to shorten the string, this is not a buffer API
1689 return DemangleInvalid;
1690 }
1691
1692 const char *inner;
1693 size_t inner_len;
1694 if (s_len >= 3 && !strncmp(s, "_ZN", 3)) {
1695 inner = s + 3;
1696 inner_len = s_len - 3;
1697 } else if (s_len >= 2 && !strncmp(s, "ZN", 2)) {
1698 // On Windows, dbghelp strips leading underscores, so we accept "ZN...E"
1699 // form too.
1700 inner = s + 2;
1701 inner_len = s_len - 2;
1702 } else if (s_len >= 4 && !strncmp(s, "__ZN", 4)) {
1703 // On OSX, symbols are prefixed with an extra _
1704 inner = s + 4;
1705 inner_len = s_len - 4;
1706 } else {
1707 return DemangleInvalid;
1708 }
1709
1710 if (!str_isascii(inner, inner_len)) {
1711 return DemangleInvalid;
1712 }
1713
1714 size_t elements = 0;
1715 const char *chars = inner;
1716 size_t chars_len = inner_len;
1717 if (chars_len == 0) {
1718 return DemangleInvalid;
1719 }
1720 char c;
1721 while ((c = *chars) != 'E') {
1722 // Decode an identifier element's length
1723 if (c < '0' || c > '9') {
1724 return DemangleInvalid;
1725 }
1726 size_t len = 0;
1727 while (c >= '0' && c <= '9') {
1728 size_t d = c - '0';
1729 if (len > SIZE_MAX / 10) {
1730 return DemangleInvalid;
1731 }
1732 len *= 10;
1733 if (len > SIZE_MAX - d) {
1734 return DemangleInvalid;
1735 }
1736 len += d;
1737
1738 chars++;
1739 chars_len--;
1740 if (chars_len == 0) {
1741 return DemangleInvalid;
1742 }
1743 c = *chars;
1744 }
1745
1746 // Advance by the length
1747 if (chars_len <= len) {
1748 return DemangleInvalid;
1749 }
1750 chars += len;
1751 chars_len -= len;
1752 elements++;
1753 }
1754 *res = (struct demangle_legacy) { inner, inner_len, elements };
1755 *rest = chars + 1;
1756 return DemangleOk;
1757 }
1758
is_rust_hash(const char * s,size_t len)1759 static bool is_rust_hash(const char *s, size_t len) {
1760 if (len == 0 || s[0] != 'h') {
1761 return false;
1762 }
1763
1764 for (size_t i = 1; i < len; i++) {
1765 if (!((s[i] >= '0' && s[i] <= '9') || (s[i] >= 'a' && s[i] <= 'f') || (s[i] >= 'A' && s[i] <= 'F'))) {
1766 return false;
1767 }
1768 }
1769
1770 return true;
1771 }
1772
rust_demangle_legacy_display_demangle(struct demangle_legacy res,char * out,size_t len,bool alternate)1773 NODISCARD static overflow_status rust_demangle_legacy_display_demangle(struct demangle_legacy res, char *out, size_t len, bool alternate)
1774 {
1775 struct printer printer = {
1776 // not actually using the parser part of the printer, just keeping it to share the format functions
1777 DemangleOk,
1778 { NULL },
1779 out,
1780 len,
1781 0,
1782 alternate
1783 };
1784 const char *inner = res.mangled;
1785 for (size_t element = 0; element < res.elements; element++) {
1786 size_t i = 0;
1787 const char *rest;
1788 for (rest = inner; rest < res.mangled + res.mangled_len && *rest >= '0' && *rest <= '9'; rest++) {
1789 i *= 10;
1790 i += *rest - '0';
1791 }
1792 if ((size_t)(res.mangled + res.mangled_len - rest) < i) {
1793 // safety: shouldn't reach this place if the input string is validated. bail out.
1794 // safety: we knwo rest <= res.mangled + res.mangled_len from the for-loop above
1795 break;
1796 }
1797
1798 size_t len = i;
1799 inner = rest + len;
1800
1801 // From here on, inner contains a pointer to the next element, rest[:len] to the current one
1802 if (alternate && element + 1 == res.elements && is_rust_hash(rest, i)) {
1803 break;
1804 }
1805 if (element != 0) {
1806 PRINT_STR(&printer, "::");
1807 }
1808
1809 if (len >= 2 && !strncmp(rest, "_$", 2)) {
1810 rest++;
1811 len--;
1812 }
1813
1814 while (len > 0) {
1815 if (rest[0] == '.') {
1816 if (len >= 2 && rest[1] == '.') {
1817 PRINT_STR(&printer, "::");
1818 rest += 2;
1819 len -= 2;
1820 } else {
1821 PRINT_STR(&printer, ".");
1822 rest += 1;
1823 len -= 1;
1824 }
1825 } else if (rest[0] == '$') {
1826 const char *escape = memchr(rest + 1, '$', len - 1);
1827 if (escape == NULL) {
1828 break;
1829 }
1830 const char *escape_start = rest + 1;
1831 size_t escape_len = escape - (rest + 1);
1832
1833 size_t next_len = len - (escape + 1 - rest);
1834 const char *next_rest = escape + 1;
1835
1836 char ch;
1837 if ((escape_len == 2 && escape_start[0] == 'S' && escape_start[1] == 'P')) {
1838 ch = '@';
1839 } else if ((escape_len == 2 && escape_start[0] == 'B' && escape_start[1] == 'P')) {
1840 ch = '*';
1841 } else if ((escape_len == 2 && escape_start[0] == 'R' && escape_start[1] == 'F')) {
1842 ch = '&';
1843 } else if ((escape_len == 2 && escape_start[0] == 'L' && escape_start[1] == 'T')) {
1844 ch = '<';
1845 } else if ((escape_len == 2 && escape_start[0] == 'G' && escape_start[1] == 'T')) {
1846 ch = '>';
1847 } else if ((escape_len == 2 && escape_start[0] == 'L' && escape_start[1] == 'P')) {
1848 ch = '(';
1849 } else if ((escape_len == 2 && escape_start[0] == 'R' && escape_start[1] == 'P')) {
1850 ch = ')';
1851 } else if ((escape_len == 1 && escape_start[0] == 'C')) {
1852 ch = ',';
1853 } else {
1854 if (escape_len > 1 && escape_start[0] == 'u') {
1855 escape_start++;
1856 escape_len--;
1857 uint64_t val;
1858 if (try_parse_uint(escape_start, escape_len, &val)
1859 && val < UINT32_MAX
1860 && validate_char((uint32_t)val))
1861 {
1862 if (!unicode_iscontrol(val)) {
1863 uint8_t wchr[4];
1864 size_t wchr_len = code_to_utf8(wchr, (uint32_t)val);
1865 PRINT(printer_print_buf(&printer, (const char*)wchr, wchr_len));
1866 len = next_len;
1867 rest = next_rest;
1868 continue;
1869 }
1870 }
1871 }
1872 break; // print the rest of this element raw
1873 }
1874 PRINT_CH(&printer, ch);
1875 len = next_len;
1876 rest = next_rest;
1877 } else {
1878 size_t j = 0;
1879 for (;j < len && rest[j] != '$' && rest[j] != '.';j++);
1880 if (j == len) {
1881 break;
1882 }
1883 PRINT(printer_print_buf(&printer, rest, j));
1884 rest += j;
1885 len -= j;
1886 }
1887 }
1888 PRINT(printer_print_buf(&printer, rest, len));
1889 }
1890
1891 if (printer.out_len < OVERFLOW_MARGIN) {
1892 return OverflowOverflow;
1893 }
1894 *printer.out = '\0';
1895 return OverflowOk;
1896 }
1897
is_symbol_like(const char * s,size_t len)1898 static bool is_symbol_like(const char *s, size_t len) {
1899 // rust-demangle definition of symbol like: control characters and space are not symbol-like, all else is
1900 for (size_t i = 0; i < len; i++) {
1901 char ch = s[i];
1902 if (!(ch >= 0x21 && ch <= 0x7e)) {
1903 return false;
1904 }
1905 }
1906 return true;
1907 }
1908
rust_demangle_demangle(const char * s,struct demangle * res)1909 void rust_demangle_demangle(const char *s, struct demangle *res)
1910 {
1911 // During ThinLTO LLVM may import and rename internal symbols, so strip out
1912 // those endings first as they're one of the last manglings applied to symbol
1913 // names.
1914 const char *llvm = ".llvm.";
1915 const char *found_llvm = strstr(s, llvm);
1916 size_t s_len = strlen(s);
1917 if (found_llvm) {
1918 const char *all_hex_ptr = found_llvm + strlen(".llvm.");
1919 bool all_hex = true;
1920 for (;*all_hex_ptr;all_hex_ptr++) {
1921 if (!(('0' <= *all_hex_ptr && *all_hex_ptr <= '9') ||
1922 ('A' <= *all_hex_ptr && *all_hex_ptr <= 'F') ||
1923 *all_hex_ptr == '@')) {
1924 all_hex = false;
1925 break;
1926 }
1927 }
1928
1929 if (all_hex) {
1930 s_len = found_llvm - s;
1931 }
1932 }
1933
1934 const char *suffix;
1935 struct demangle_legacy legacy;
1936 demangle_status st = rust_demangle_legacy_demangle(s, s_len, &legacy, &suffix);
1937 if (st == DemangleOk) {
1938 *res = (struct demangle) {
1939 .style=DemangleStyleLegacy,
1940 .mangled=legacy.mangled,
1941 .mangled_len=legacy.mangled_len,
1942 .elements=legacy.elements,
1943 .original=s,
1944 .original_len=s_len,
1945 .suffix=suffix,
1946 .suffix_len=s_len - (suffix - s),
1947 };
1948 } else {
1949 struct demangle_v0 v0;
1950 st = rust_demangle_v0_demangle(s, s_len, &v0, &suffix);
1951 if (st == DemangleOk) {
1952 *res = (struct demangle) {
1953 .style=DemangleStyleV0,
1954 .mangled=v0.mangled,
1955 .mangled_len=v0.mangled_len,
1956 .elements=0,
1957 .original=s,
1958 .original_len=s_len,
1959 .suffix=suffix,
1960 .suffix_len=s_len - (suffix - s),
1961 };
1962 } else {
1963 *res = (struct demangle) {
1964 .style=DemangleStyleUnknown,
1965 .mangled=NULL,
1966 .mangled_len=0,
1967 .elements=0,
1968 .original=s,
1969 .original_len=s_len,
1970 .suffix=s,
1971 .suffix_len=0,
1972 };
1973 }
1974 }
1975
1976 // Output like LLVM IR adds extra period-delimited words. See if
1977 // we are in that case and save the trailing words if so.
1978 if (res->suffix_len) {
1979 if (res->suffix[0] == '.' && is_symbol_like(res->suffix, res->suffix_len)) {
1980 // Keep the suffix
1981 } else {
1982 // Reset the suffix and invalidate the demangling
1983 res->style = DemangleStyleUnknown;
1984 res->suffix_len = 0;
1985 }
1986 }
1987 }
1988
rust_demangle_is_known(struct demangle * res)1989 bool rust_demangle_is_known(struct demangle *res) {
1990 return res->style != DemangleStyleUnknown;
1991 }
1992
rust_demangle_display_demangle(struct demangle const * res,char * out,size_t len,bool alternate)1993 overflow_status rust_demangle_display_demangle(struct demangle const *res, char *out, size_t len, bool alternate) {
1994 size_t original_len = res->original_len;
1995 size_t out_len;
1996 switch (res->style) {
1997 case DemangleStyleUnknown:
1998 if (len < original_len) {
1999 return OverflowOverflow;
2000 } else {
2001 memcpy(out, res->original, original_len);
2002 out += original_len;
2003 len -= original_len;
2004 break;
2005 }
2006 break;
2007 case DemangleStyleLegacy: {
2008 struct demangle_legacy legacy = {
2009 res->mangled,
2010 res->mangled_len,
2011 res->elements
2012 };
2013 if (rust_demangle_legacy_display_demangle(legacy, out, len, alternate) == OverflowOverflow) {
2014 return OverflowOverflow;
2015 }
2016 out_len = strlen(out);
2017 out += out_len;
2018 len -= out_len;
2019 break;
2020 }
2021 case DemangleStyleV0: {
2022 struct demangle_v0 v0 = {
2023 res->mangled,
2024 res->mangled_len
2025 };
2026 if (rust_demangle_v0_display_demangle(v0, out, len, alternate) == OverflowOverflow) {
2027 return OverflowOverflow;
2028 }
2029 out_len = strlen(out);
2030 out += out_len;
2031 len -= out_len;
2032 break;
2033 }
2034 }
2035 size_t suffix_len = res->suffix_len;
2036 if (len < suffix_len || len - suffix_len < OVERFLOW_MARGIN) {
2037 return OverflowOverflow;
2038 }
2039 memcpy(out, res->suffix, suffix_len);
2040 out[suffix_len] = 0;
2041 return OverflowOk;
2042 }
2043