1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2019 Joyent, Inc.
14 * Copyright 2021 Jason King
15 */
16
17 /* BEGIN CSTYLED */
18
19 /*
20 * This implements the 'symbol_name_mangling_v2' demangling for rust as
21 * described in Rust RFC 2603 as opposed to the original (now called
22 * legacy) mangling older versions of rust used (implemented in rust.c).
23 *
24 * The specification can be viewed at:
25 * https://github.com/rust-lang/rfcs/blob/master/text/2603-rust-symbol-name-mangling-v0.md
26 */
27
28 /* END CSTYLED */
29
30 #include <errno.h>
31 #include <libcustr.h>
32 #include <stdarg.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36
37 #include "rust.h"
38
39 /*
40 * Help track amount of additional output added to rs_demangled across
41 * a function call (to allow that portion to be output for debugging)
42 */
43 #define SAVE_LEN(_st, _len) _len = custr_len((_st)->rs_demangled)
44 #define CSTR_END(_st, _len) \
45 ((int)(custr_len((_st)->rs_demangled) - (_len))), \
46 custr_cstr((_st)->rs_demangled) + (_len)
47
48 typedef enum const_type_class {
49 CTC_INVALID = -1,
50 CTC_UNSIGNED,
51 CTC_SIGNED,
52 CTC_CHAR,
53 CTC_BOOL,
54 } const_type_class_t;
55
56 /*
57 * Sometimes, parsing something is optional. In this case a failure to
58 * parse is fine, however we still want to consider a fatal error as
59 * failure.
60 */
61 #define OPTIONAL(_st, _f) ((_f) || !HAS_ERROR(_st))
62
63 static boolean_t rustv0_valid_sym(const strview_t *);
64 static const_type_class_t rustv0_classify_const_type(char);
65 static boolean_t rustv0_parse_hex_num(rust_state_t *restrict,
66 strview_t *restrict, uint64_t *restrict);
67 static boolean_t rustv0_parse_base62(rust_state_t *restrict,
68 strview_t *restrict, uint64_t *restrict);
69
70 static boolean_t rustv0_parse_undisambiguated_identifier(
71 rust_state_t *restrict, strview_t *restrict, boolean_t);
72 static boolean_t rustv0_parse_disambiguator(rust_state_t *restrict,
73 strview_t *restrict, uint64_t *restrict);
74
75 static boolean_t rustv0_parse_path(rust_state_t *restrict, strview_t *restrict,
76 boolean_t);
77 static boolean_t rustv0_parse_impl_path(rust_state_t *restrict,
78 strview_t *restrict, boolean_t);
79 static boolean_t rustv0_parse_nested_path(rust_state_t *restrict,
80 strview_t *restrict, boolean_t);
81 static boolean_t rustv0_parse_basic_type(rust_state_t *restrict,
82 strview_t *restrict);
83 static boolean_t rustv0_parse_backref(rust_state_t *restrict,
84 strview_t *restrict,
85 boolean_t (*)(rust_state_t *restrict, strview_t *restrict, boolean_t),
86 boolean_t);
87 static boolean_t rustv0_parse_lifetime(rust_state_t *restrict,
88 strview_t *restrict);
89 static boolean_t rustv0_parse_const(rust_state_t *restrict,
90 strview_t *restrict, boolean_t);
91 static boolean_t rustv0_parse_fnsig(rust_state_t *restrict,
92 strview_t *restrict);
93 static boolean_t rustv0_parse_dynbounds(rust_state_t *restrict,
94 strview_t *restrict);
95 static boolean_t rustv0_parse_generic_arg(rust_state_t *restrict,
96 strview_t *restrict, boolean_t);
97
98 boolean_t
rust_demangle_v0(rust_state_t * restrict st,strview_t * restrict sv)99 rust_demangle_v0(rust_state_t *restrict st, strview_t *restrict sv)
100 {
101 boolean_t save_skip;
102 boolean_t ret;
103
104 /* Make sure all the characters are valid */
105 if (!rustv0_valid_sym(sv)) {
106 st->rs_error = EINVAL;
107 return (B_FALSE);
108 }
109
110 /*
111 * <symbol-name> = "_R" [<decimal-number>] <path>
112 * [<instantiating-crate>]
113 *
114 * We've already parsed the prefix in rust_demangle(), as well
115 * as made sure there's no [<decimal-number>] present, so
116 * start with <path>.
117 */
118 if (!rustv0_parse_path(st, sv, B_TRUE))
119 return (B_FALSE);
120
121 /* [<instantiating crate>] -- parse but don't save */
122 SKIP_BEGIN(st, save_skip);
123 ret = OPTIONAL(st, rustv0_parse_path(st, sv, B_FALSE));
124 SKIP_END(st, save_skip);
125 if (!ret)
126 return (B_FALSE);
127
128 /* If nothing's left, we know we're done */
129 if (sv_remaining(sv) == 0)
130 return (!HAS_ERROR(st));
131
132 /*
133 * LLVM sometimes will suffix symbols starting with a '.'
134 * followed by extra data. For things that start with
135 * ".llvm.", we discard the rest of the string. For
136 * other things that start with '.', we copy the
137 * results to the final string. This matches
138 * what the rust native demangler crate does, and
139 * we don't see a reason to deviate from their
140 * behavior.
141 */
142 if (sv_consume_if(sv, ".llvm."))
143 return (!HAS_ERROR(st));
144
145 if (sv_peek(sv, 0) != '.') {
146 DEMDEBUG("%s: Unexpected trailing data at the end of the "
147 "name: '%.*s'", __func__, SV_PRINT(sv));
148 st->rs_error = EINVAL;
149 return (B_FALSE);
150 }
151
152 return (rust_append_sv(st, sv_remaining(sv), sv));
153 }
154
155 /*
156 * Parse an optional list terminated by 'E'. Each result of 'fn' is
157 * separated by 'sep' in the output.
158 */
159 static boolean_t
rustv0_parse_opt_list(rust_state_t * restrict st,strview_t * restrict sv,boolean_t (* fn)(rust_state_t * restrict,strview_t * restrict,boolean_t),const char * restrict sep,boolean_t bval,size_t * restrict countp)160 rustv0_parse_opt_list(rust_state_t *restrict st, strview_t *restrict sv,
161 boolean_t (*fn)(rust_state_t *restrict, strview_t *restrict, boolean_t),
162 const char *restrict sep, boolean_t bval, size_t *restrict countp)
163 {
164 size_t count = 0;
165
166 DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv));
167
168 while (sv_remaining(sv) > 0) {
169 if (sv_consume_if_c(sv, 'E')) {
170 if (countp != NULL)
171 *countp += count;
172 return (B_TRUE);
173 }
174
175 if (count > 0 && !rust_append(st, sep))
176 return (B_FALSE);
177
178 if (!fn(st, sv, bval))
179 return (B_FALSE);
180
181 count++;
182 }
183
184 /*
185 * An optional list should terminate with an 'E'. If we get here,
186 * we ran out of charaters and didn't terminate as we should.
187 */
188 return (B_FALSE);
189 }
190
191 static boolean_t
rustv0_parse_uint_type(rust_state_t * restrict st,strview_t * sv)192 rustv0_parse_uint_type(rust_state_t *restrict st, strview_t *sv)
193 {
194 const char *str = NULL;
195 strview_t save;
196 char c;
197
198 if (HAS_ERROR(st) || sv_remaining(sv) == 0)
199 return (B_FALSE);
200
201 sv_init_sv(&save, sv);
202
203 switch (c = sv_consume_c(sv)) {
204 case 'h':
205 str = "u8";
206 break;
207 case 't':
208 str = "u16";
209 break;
210 case 'm':
211 str = "u32";
212 break;
213 case 'y':
214 str = "u64";
215 break;
216 case 'o':
217 str = "u128";
218 break;
219 case 'j': /* usize */
220 str = "usize";
221 break;
222 default:
223 sv_init_sv(sv, &save);
224 return (B_FALSE);
225 }
226
227 DEMDEBUG("%s: %c -> %s", __func__, c, str);
228 return (rust_append(st, str));
229 }
230
231 static boolean_t
rustv0_parse_basic_type(rust_state_t * restrict st,strview_t * restrict sv)232 rustv0_parse_basic_type(rust_state_t *restrict st, strview_t *restrict sv)
233 {
234 const char *str = NULL;
235 strview_t save;
236 char c;
237
238 if (HAS_ERROR(st) || sv_remaining(sv) == 0)
239 return (B_FALSE);
240
241 if (rustv0_parse_uint_type(st, sv))
242 return (B_TRUE);
243
244 sv_init_sv(&save, sv);
245
246 switch (c = sv_consume_c(sv)) {
247 case 'a':
248 str = "i8";
249 break;
250 case 'b':
251 str = "bool";
252 break;
253 case 'c':
254 str = "char";
255 break;
256 case 'd':
257 str = "f64";
258 break;
259 case 'e':
260 str = "str";
261 break;
262 case 'f':
263 str = "f32";
264 break;
265 case 'i':
266 str = "isize";
267 break;
268 case 'l':
269 str = "i32";
270 break;
271 case 'n':
272 str = "i128";
273 break;
274 case 'p':
275 str = "_";
276 break;
277 case 's':
278 str = "i16";
279 break;
280 case 'u':
281 str = "()";
282 break;
283 case 'v':
284 str = "...";
285 break;
286 case 'x':
287 str = "i64";
288 break;
289 case 'z':
290 str = "!";
291 break;
292 default:
293 sv_init_sv(sv, &save);
294 return (B_FALSE);
295 }
296
297 DEMDEBUG("%s: %c -> %s", __func__, c, str);
298 return (rust_append(st, str));
299 }
300
301 static boolean_t
rustv0_parse_type(rust_state_t * restrict st,strview_t * restrict sv,boolean_t dummy __unused)302 rustv0_parse_type(rust_state_t *restrict st, strview_t *restrict sv,
303 boolean_t dummy __unused)
304 {
305 strview_t save;
306 size_t len, tuple_elem_count;
307 boolean_t ret;
308 char c;
309
310 if (HAS_ERROR(st))
311 return (B_FALSE);
312
313 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
314
315 if (sv_remaining(sv) == 0)
316 return (B_FALSE);
317
318 SAVE_LEN(st, len);
319 sv_init_sv(&save, sv);
320
321 switch (c = sv_consume_c(sv)) {
322 case 'A':
323 ret = rust_appendc(st, '[') &&
324 rustv0_parse_type(st, sv, B_FALSE) &&
325 rust_append(st, "; ") &&
326 rustv0_parse_const(st, sv, B_FALSE) &&
327 rust_appendc(st, ']');
328 break;
329 case 'S':
330 ret = rust_appendc(st, '[') &&
331 rustv0_parse_type(st, sv, B_FALSE) &&
332 rust_appendc(st, ']');
333 break;
334 case 'T':
335 tuple_elem_count = 0;
336 ret = rust_appendc(st, '(') &&
337 rustv0_parse_opt_list(st, sv, rustv0_parse_type, ", ",
338 B_FALSE, &tuple_elem_count) &&
339 rust_append(st, (tuple_elem_count == 1) ? ",)" : ")");
340 break;
341 case 'R':
342 case 'Q':
343 /* `&mut T` or `&'... mut T` */
344 if (!(ret = rust_appendc(st, '&')))
345 break;
346
347 /*
348 * lifetime is optional, but we need to add a trailing
349 * space if present (so we cannot use the OPTIONAL macro).
350 */
351 if (rustv0_parse_lifetime(st, sv)) {
352 if (!(ret = rust_appendc(st, ' ')))
353 break;
354 } else if (HAS_ERROR(st)) {
355 break;
356 }
357
358 ret = rust_append(st, (c == 'Q') ? "mut " : "") &&
359 rustv0_parse_type(st, sv, B_FALSE);
360 break;
361 case 'P':
362 ret = rust_append(st, "*const ") &&
363 rustv0_parse_type(st, sv, B_FALSE);
364 break;
365 case 'O':
366 ret = rust_append(st, "*mut ") &&
367 rustv0_parse_type(st, sv, B_FALSE);
368 break;
369 case 'F':
370 ret = rustv0_parse_fnsig(st, sv);
371 break;
372 case 'D':
373 ret = rust_append(st, "dyn ") &&
374 rustv0_parse_dynbounds(st, sv);
375 if (!ret)
376 break;
377
378 /*
379 * Rust RFC2603 shows the lifetime as required, however
380 * it appears this is optional.
381 */
382 DEMDEBUG("%s: pre-lifetime: '%*s'", __func__, SV_PRINT(sv));
383
384 /*
385 * We only want to print a non-zero (non "'_")
386 * lifetime.
387 */
388 if (sv_consume_if(sv, "L_"))
389 break;
390
391 /*
392 * But if there is a lifetime we want to print,
393 * we want to prepend " + " before it.
394 */
395 if (sv_peek(sv, 0) == 'L' &&
396 !(ret = rust_append(st, " + ")))
397 break;
398
399 ret = rustv0_parse_lifetime(st, sv);
400 break;
401 default:
402 sv_init_sv(sv, &save);
403
404 ret = rustv0_parse_backref(st, sv, rustv0_parse_type,
405 B_FALSE) ||
406 rustv0_parse_basic_type(st, sv);
407 if (ret)
408 break;
409
410 ret = rustv0_parse_path(st, sv, B_FALSE);
411 break;
412 }
413
414 DEMDEBUG("%s: type='%.*s' (%s)", __func__, CSTR_END(st, len),
415 ret ? "success" : "fail");
416
417 return (ret);
418 }
419
420 /*
421 * <path> = "C" <identifier> crate root
422 * | "M" <impl-path> <type> <T>
423 * | "X" <impl-path> <type> <path> <T as Trait> (trait impl)
424 * | "Y" <type> <path> <T as Trait> (trait definition)
425 * | "N" <ns> <path> <identifier> ...::ident (nested path)
426 * | "I" <path> {<generic-arg>} "E" ...<T, U>
427 * | <backref>
428 */
429 static boolean_t
rustv0_parse_path(rust_state_t * restrict st,strview_t * restrict sv,boolean_t in_value)430 rustv0_parse_path(rust_state_t *restrict st, strview_t *restrict sv,
431 boolean_t in_value)
432 {
433 strview_t save;
434 uint64_t disamb = 0;
435 size_t len;
436 boolean_t ret = B_FALSE;
437 boolean_t save_skip;
438 boolean_t args_stay_save = st->rs_args_stay_open;
439 boolean_t args_open_save = st->rs_args_is_open;
440
441 if (HAS_ERROR(st))
442 return (B_FALSE);
443
444 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
445
446 if (sv_remaining(sv) == 0)
447 return (B_FALSE);
448
449 SAVE_LEN(st, len);
450 sv_init_sv(&save, sv);
451
452 switch (sv_consume_c(sv)) {
453 case 'C':
454 if (!OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &disamb)))
455 goto done;
456
457 if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE))
458 goto done;
459
460 if (st->rs_verbose &&
461 !rust_append_printf(st, "[%" PRIx64 "]", disamb))
462 goto done;
463 break;
464 case 'M':
465 SKIP_BEGIN(st, save_skip);
466 if (!rustv0_parse_impl_path(st, sv, in_value)) {
467 SKIP_END(st, save_skip);
468 goto done;
469 }
470 SKIP_END(st, save_skip);
471
472 if (!rust_appendc(st, '<') ||
473 !rustv0_parse_type(st, sv, B_FALSE) ||
474 !rust_appendc(st, '>'))
475 goto done;
476 break;
477 case 'X':
478 SKIP_BEGIN(st, save_skip);
479 if (!rustv0_parse_impl_path(st, sv, in_value)) {
480 SKIP_END(st, save_skip);
481 goto done;
482 }
483 SKIP_END(st, save_skip);
484 /*FALLTHRU*/
485 case 'Y':
486 if (!rust_appendc(st, '<') ||
487 !rustv0_parse_type(st, sv, B_FALSE) ||
488 !rust_append(st, " as ") ||
489 !rustv0_parse_path(st, sv, B_FALSE) ||
490 !rust_appendc(st, '>'))
491 goto done;
492 break;
493 case 'N':
494 if (!rustv0_parse_nested_path(st, sv, in_value))
495 goto done;
496 break;
497 case 'I':
498 st->rs_args_stay_open = B_FALSE;
499 st->rs_args_is_open = B_FALSE;
500
501 if (!rustv0_parse_path(st, sv, in_value))
502 goto done;
503
504 if (in_value && !rust_append(st, "::"))
505 goto done;
506
507 if (!rust_appendc(st, '<') ||
508 !rustv0_parse_opt_list(st, sv, rustv0_parse_generic_arg,
509 ", ", B_FALSE, NULL))
510 goto done;
511
512 st->rs_args_stay_open = args_stay_save;
513 st->rs_args_is_open = args_open_save;
514
515 /*
516 * If we were asked to not close our list, then don't and
517 * indicate that the list is open.
518 */
519 if (st->rs_args_stay_open) {
520 st->rs_args_stay_open = B_FALSE;
521 st->rs_args_is_open = B_TRUE;
522 } else if (!rust_appendc(st, '>')) {
523 goto done;
524 }
525 break;
526 default:
527 /*
528 * Didn't recognize the letter, so it has to be a path. Restore
529 * sv to state prior to switch and continue.
530 */
531 sv_init_sv(sv, &save);
532 if (!rustv0_parse_backref(st, sv, rustv0_parse_path, in_value))
533 goto done;
534 }
535
536 ret = B_TRUE;
537
538 done:
539 DEMDEBUG("%s: path='%.*s' (%s)", __func__, CSTR_END(st, len),
540 ret ? "success" : "fail");
541
542 return (ret);
543 }
544
545 static boolean_t
rustv0_parse_impl_path(rust_state_t * restrict st,strview_t * restrict sv,boolean_t in_value)546 rustv0_parse_impl_path(rust_state_t *restrict st, strview_t *restrict sv,
547 boolean_t in_value)
548 {
549 uint64_t val = 0;
550
551 return (OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &val)) &&
552 rustv0_parse_path(st, sv, in_value));
553 }
554
555 /*
556 * A bit of a hack -- when printing a nested path, we need to know
557 * if the identifier is there or not in order to correctly format
558 * the output preceeding it (when present). This peeks ahead and
559 * determines this.
560 */
561 static boolean_t
rustv0_has_name(rust_state_t * restrict st,strview_t * restrict sv,boolean_t * has_namep)562 rustv0_has_name(rust_state_t *restrict st, strview_t *restrict sv,
563 boolean_t *has_namep)
564 {
565 strview_t save;
566
567 if (HAS_ERROR(st))
568 return (B_FALSE);
569
570 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
571
572 if (sv_remaining(sv) == 0)
573 return (B_FALSE);
574
575 sv_init_sv(&save, sv);
576
577 /* For checking the length, we don't care if it's punycode or not */
578 (void) sv_consume_if_c(&save, 'u');
579
580 if (sv_remaining(sv) == 0) {
581 st->rs_error = EINVAL;
582 return (B_FALSE);
583 }
584
585 if (sv_consume_if_c(&save, '0')) {
586 *has_namep = B_FALSE;
587 return (B_TRUE);
588 }
589
590 *has_namep = B_TRUE;
591 return (B_TRUE);
592 }
593
594 static boolean_t
rustv0_parse_nested_path(rust_state_t * restrict st,strview_t * restrict sv,boolean_t in_value)595 rustv0_parse_nested_path(rust_state_t *restrict st, strview_t *restrict sv,
596 boolean_t in_value)
597 {
598 uint64_t disambiguator = 0;
599 size_t len = 0;
600 char ns;
601 boolean_t ret = B_FALSE;
602 boolean_t has_name;
603
604 if (HAS_ERROR(st))
605 return (B_FALSE);
606
607 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
608
609 if (sv_remaining(sv) == 0)
610 return (B_FALSE);
611
612 SAVE_LEN(st, len);
613
614 ns = sv_consume_c(sv);
615
616 if (!rustv0_parse_path(st, sv, in_value))
617 goto done;
618
619 if (!OPTIONAL(st, rustv0_parse_disambiguator(st, sv, &disambiguator)))
620 goto done;
621
622 if (!rustv0_has_name(st, sv, &has_name))
623 goto done;
624
625 if (ISUPPER(ns)) {
626 if (!rust_append(st, "::{"))
627 goto done;
628
629 switch (ns) {
630 case 'C':
631 if (!rust_append(st, "closure"))
632 goto done;
633 break;
634 case 'S':
635 if (!rust_append(st, "shim"))
636 goto done;
637 break;
638 default:
639 if (!rust_appendc(st, ns))
640 goto done;
641 break;
642 }
643
644 if (has_name && !rust_appendc(st, ':'))
645 goto done;
646
647 if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE))
648 goto done;
649
650 ret = rust_append_printf(st, "#%" PRIu64 "}", disambiguator);
651 } else {
652 if (has_name) {
653 if (!(ret = rust_append(st, "::")))
654 goto done;
655 }
656 ret = rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE);
657 }
658
659 done:
660 DEMDEBUG("%s: nested path = '%.*s' (%s)", __func__, CSTR_END(st, len),
661 ret ? "success" : "fail");
662
663 return (ret);
664 }
665
666 /*
667 * <disambiguator> = "s" <base-64-number>
668 *
669 */
670 static boolean_t
rustv0_parse_disambiguator(rust_state_t * restrict st,strview_t * restrict sv,uint64_t * valp)671 rustv0_parse_disambiguator(rust_state_t *restrict st, strview_t *restrict sv,
672 uint64_t *valp)
673 {
674 if (HAS_ERROR(st) || sv_remaining(sv) < 2)
675 return (B_FALSE);
676
677 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
678
679 *valp = 0;
680
681 if (!sv_consume_if_c(sv, 's'))
682 return (B_FALSE);
683
684 if (!rustv0_parse_base62(st, sv, valp)) {
685 st->rs_error = EINVAL;
686 return (B_FALSE);
687 }
688
689 /*
690 * Rust RFC 2603 details this in Appendix A, but not the main
691 * portion of the RFC. If no disambiguator is present, the value
692 * is 0, if the decoded value is 0, the index is 1, ...
693 * rustv0_parse_base62() already adjusts _ -> 0, 0 -> 1, so we
694 * only need to add one here to complete the adjustment.
695 */
696 *valp = *valp + 1;
697
698 DEMDEBUG("%s: disambiguator=%" PRIu64, __func__, *valp);
699 return (B_TRUE);
700 }
701
702 /* <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> */
703 static boolean_t
rustv0_parse_undisambiguated_identifier(rust_state_t * restrict st,strview_t * restrict sv,boolean_t repl_underscore)704 rustv0_parse_undisambiguated_identifier(rust_state_t *restrict st,
705 strview_t *restrict sv, boolean_t repl_underscore)
706 {
707 uint64_t len = 0;
708 boolean_t puny = B_FALSE;
709
710 if (HAS_ERROR(st))
711 return (B_FALSE);
712
713 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
714
715 if (sv_remaining(sv) == 0)
716 return (B_FALSE);
717
718 if (sv_consume_if_c(sv, 'u'))
719 puny = B_TRUE;
720
721 if (!rust_parse_base10(st, sv, &len))
722 return (B_FALSE);
723
724 /* skip optional separator '_' */
725 (void) sv_consume_if_c(sv, '_');
726
727 if (sv_remaining(sv) < len) {
728 DEMDEBUG("%s: ERROR: identifier length (%" PRIu64 ") "
729 "> remaining bytes (%zu)", __func__, len,
730 sv_remaining(sv));
731 return (B_FALSE);
732 }
733
734 /* 0 length identifiers are acceptable */
735 if (len == 0)
736 return (B_TRUE);
737
738 if (puny) {
739 strview_t ident;
740
741 sv_init_sv_range(&ident, sv, len);
742 if (!rustv0_puny_decode(st, &ident, repl_underscore))
743 return (B_FALSE);
744
745 sv_consume_n(sv, len);
746 return (B_TRUE);
747 }
748
749 /*
750 * rust identifiers do not contain '-'. However ABI identifiers
751 * are allowed to contain them (e.g. extern "foo-bar" fn ...).
752 * They are substituted with '_' in the mangled output. If we
753 * do not need to reverse this, we can just append 'len' bytes
754 * of sv. Otherwise we need to go through and reverse this
755 * substitution.
756 */
757 if (!repl_underscore)
758 return (rust_append_sv(st, len, sv));
759
760 /*
761 * We checked earlier that len < sv_remaining(sv); so this loop
762 * cannot overrun.
763 */
764 for (size_t i = 0; i < len; i++) {
765 char c = sv_consume_c(sv);
766
767 if (c == '_')
768 c = '-';
769
770 if (!rust_appendc(st, c))
771 return (B_FALSE);
772 }
773
774 return (B_TRUE);
775 }
776
777 /* <backref> = "B" <base-62-number> */
778 static boolean_t
rustv0_parse_backref(rust_state_t * restrict st,strview_t * restrict sv,boolean_t (* fn)(rust_state_t * restrict,strview_t * restrict,boolean_t b),boolean_t bval)779 rustv0_parse_backref(rust_state_t *restrict st, strview_t *restrict sv,
780 boolean_t (*fn)(rust_state_t *restrict, strview_t *restrict, boolean_t b),
781 boolean_t bval)
782 {
783 strview_t backref;
784 strview_t target;
785 uint64_t idx = 0;
786 size_t save_len;
787 size_t len;
788
789 if (HAS_ERROR(st))
790 return (B_FALSE);
791
792 sv_init_sv(&backref, sv);
793
794 if (!sv_consume_if_c(sv, 'B'))
795 return (B_FALSE);
796
797 DEMDEBUG("%s: str='B%.*s'", __func__, SV_PRINT(sv));
798
799 if (!rustv0_parse_base62(st, sv, &idx)) {
800 st->rs_error = EINVAL;
801 return (B_FALSE);
802 }
803
804 /*
805 * Determine how many bytes we've consumed (up to the start of
806 * the current backref token).
807 */
808 VERIFY3P(backref.sv_first, >=, st->rs_orig.sv_first);
809 len = (size_t)(uintptr_t)(backref.sv_first - st->rs_orig.sv_first);
810
811 /*
812 * The backref can only refer to an index prior to the start of
813 * the current backref token -- that is must always refer back in
814 * the string, never to the current position or beyond.
815 */
816 if (idx >= len) {
817 DEMDEBUG("%s: ERROR: backref index (%" PRIu64 ") "
818 "is out of range [0, %zu)", __func__, idx, len);
819 st->rs_error = ERANGE;
820 return (B_FALSE);
821 }
822
823 /*
824 * Create a strview_t of the original string (sans prefix) by
825 * copying from st->rs_orig. The length of the target strview_t is
826 * capped to end immediately prior to this backref token. Since we
827 * enforce that backrefs must always refer to already processed
828 * portions of the string (i.e. must always refer backwards), and the
829 * length of the strview_t is set to end prior to the start of this
830 * backref token, we guarantee processing of a backref will always
831 * terminate before it can possibly encounter this backref token
832 * and cause a loop -- either the processing terminates normally or
833 * it reaches the end of the capped strview_t.
834 */
835 sv_init_sv_range(&target, &st->rs_orig, len);
836
837 /*
838 * Consume all the input in the target strview_t up to the index
839 */
840 sv_consume_n(&target, idx);
841
842 DEMDEBUG("%s: backref starting at %" PRIu64 " str='%.*s'%s", __func__,
843 idx, SV_PRINT(&target), st->rs_skip ? " (skipping)" : "");
844
845 /*
846 * If we're skipping the output, there's no reason to bother reparsing
847 * the output -- we're not going to save it. We still setup everything
848 * so that the debug output is still emitted.
849 */
850 if (st->rs_skip)
851 return (B_TRUE);
852
853 SAVE_LEN(st, save_len);
854 if (!fn(st, &target, bval))
855 return (B_FALSE);
856
857 DEMDEBUG("%s: backref is '%.*s'", __func__, CSTR_END(st, save_len));
858 return (B_TRUE);
859 }
860
861 static boolean_t
rustv0_append_lifetime(rust_state_t * restrict st,uint64_t lifetime)862 rustv0_append_lifetime(rust_state_t *restrict st, uint64_t lifetime)
863 {
864 uint64_t bound_lt;
865
866 if (HAS_ERROR(st))
867 return (B_FALSE);
868
869 if (!rust_appendc(st, '\''))
870 return (B_FALSE);
871
872 if (lifetime == 0)
873 return (rust_appendc(st, '_'));
874
875 if (sub_overflow(st->rs_lt_depth, lifetime, &bound_lt)) {
876 DEMDEBUG("%s: ERROR: lifetime value %" PRIu64
877 " > current depth %" PRIu64, __func__, lifetime,
878 st->rs_lt_depth);
879 st->rs_lt_depth = ERANGE;
880 return (B_FALSE);
881 }
882
883 /*
884 * Use 'a, 'b, ...
885 */
886 if (bound_lt < 26) {
887 char c = (char)bound_lt + 'a';
888 return (rust_append_printf(st, "%c", c));
889 }
890
891 /*
892 * Otherwise, use '_123, '_456, ...
893 */
894 return (rust_append_printf(st, "_%" PRIu64, bound_lt));
895 }
896
897 static boolean_t
rustv0_parse_lifetime(rust_state_t * restrict st,strview_t * restrict sv)898 rustv0_parse_lifetime(rust_state_t *restrict st, strview_t *restrict sv)
899 {
900 uint64_t lifetime;
901
902 if (!sv_consume_if_c(sv, 'L'))
903 return (B_FALSE);
904
905 if (!rustv0_parse_base62(st, sv, &lifetime))
906 return (B_FALSE);
907
908 return (rustv0_append_lifetime(st, lifetime));
909 }
910
911 static boolean_t
rustv0_parse_const_data(rust_state_t * restrict st,const_type_class_t type_class,strview_t * restrict sv)912 rustv0_parse_const_data(rust_state_t *restrict st,
913 const_type_class_t type_class, strview_t *restrict sv)
914 {
915 uint64_t val = 0;
916 size_t save_len;
917 boolean_t neg = B_FALSE;
918 boolean_t ret = B_FALSE;
919
920 VERIFY3S(type_class, !=, CTC_INVALID);
921
922 if (HAS_ERROR(st))
923 return (B_FALSE);
924
925 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
926 SAVE_LEN(st, save_len);
927
928 if (sv_remaining(sv) == 0)
929 return (B_FALSE);
930
931 if (type_class == CTC_SIGNED && sv_consume_if_c(sv, 'n'))
932 neg = B_TRUE;
933
934 ret = OPTIONAL(st, rustv0_parse_hex_num(st, sv, &val)) &&
935 sv_consume_if_c(sv, '_');
936 if (!ret)
937 goto done;
938
939 switch (type_class) {
940 case CTC_SIGNED:
941 case CTC_UNSIGNED:
942 ret = rust_append_printf(st, "%s%" PRIu64, neg ? "-" : "", val);
943 break;
944 case CTC_BOOL:
945 if (val > 1) {
946 DEMDEBUG("%s: invalid bool val %" PRIu64, __func__,
947 val);
948 ret = B_FALSE;
949 break;
950 }
951 ret = rust_append_printf(st, "%s",
952 (val == 0) ? "false" : "true");
953 break;
954 case CTC_CHAR:
955 if (val > UINT32_MAX) {
956 DEMDEBUG("%s: char value %" PRIu64 " out of range",
957 __func__, val);
958 ret = B_FALSE;
959 break;
960 }
961
962 ret = rust_appendc(st, '\'') && rust_append_utf8_c(st, val) &&
963 rust_appendc(st, '\'');
964 break;
965 default:
966 ret = B_FALSE;
967 }
968
969 done:
970 DEMDEBUG("%s: const='%.*s' (%s)", __func__, CSTR_END(st, save_len),
971 ret ? "success" : "fail");
972
973 return (ret);
974 }
975
976 static boolean_t
rustv0_parse_const(rust_state_t * restrict st,strview_t * restrict sv,boolean_t dummy __unused)977 rustv0_parse_const(rust_state_t *restrict st, strview_t *restrict sv,
978 boolean_t dummy __unused)
979 {
980 strview_t type;
981 size_t start_len;
982 const_type_class_t ctype_class;
983 char ctype;
984 boolean_t save_skip;
985 boolean_t ret;
986
987 if (HAS_ERROR(st))
988 return (B_FALSE);
989
990 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
991 SAVE_LEN(st, start_len);
992
993 if (sv_remaining(sv) == 0)
994 return (B_FALSE);
995
996 if (rustv0_parse_backref(st, sv, rustv0_parse_const, B_FALSE))
997 return (B_TRUE);
998
999 if (sv_consume_if_c(sv, 'p')) {
1000 ret = rust_appendc(st, '_');
1001 goto done;
1002 }
1003
1004 ctype = sv_peek(sv, 0);
1005 ctype_class = rustv0_classify_const_type(ctype);
1006 if (ctype_class == CTC_INVALID) {
1007 DEMDEBUG("%s: const type isn't a valid const generic type",
1008 __func__);
1009 return (B_FALSE);
1010 }
1011
1012 /*
1013 * This isn't spelled out clearly in Rust RFC 2603, but currently
1014 * only unsigned int types are allowed at this point. However, we
1015 * have a bit of a potential tricky situation. Unlike formatting
1016 * the other tokens, if we want to display the type, we do so
1017 * _after_ the value, even though the type appears first.
1018 *
1019 * This is bit of a hack, but we save off the input position from
1020 * sv before the parse the type. We then parse it without saving
1021 * the resulting value, then parse and output the constant. If
1022 * we wish to then display the type, we can go back and parse
1023 * the type again, this time saving the result.
1024 */
1025 sv_init_sv(&type, sv);
1026
1027 SKIP_BEGIN(st, save_skip);
1028 ret = rustv0_parse_type(st, sv, B_FALSE);
1029 SKIP_END(st, save_skip);
1030
1031 if (!ret) {
1032 DEMDEBUG("%s: const type isn't valid", __func__);
1033 return (B_FALSE);
1034 }
1035
1036 if (sv_consume_if_c(sv, 'p')) {
1037 ret = rust_appendc(st, '_');
1038 } else {
1039 ret = rustv0_parse_const_data(st, ctype_class, sv);
1040 }
1041 if (!ret)
1042 goto done;
1043
1044 if (st->rs_show_const_type) {
1045 ret = rust_append(st, ": ") &&
1046 rustv0_parse_uint_type(st, &type);
1047 }
1048
1049 done:
1050 DEMDEBUG("%s: const='%.*s' (%s)", __func__, CSTR_END(st, start_len),
1051 ret ? "success" : "fail");
1052 return (ret);
1053 }
1054
1055 static boolean_t
rustv0_parse_abi(rust_state_t * restrict st,strview_t * restrict sv)1056 rustv0_parse_abi(rust_state_t *restrict st, strview_t *restrict sv)
1057 {
1058 DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv));
1059
1060 if (sv_consume_if_c(sv, 'C'))
1061 return (rust_appendc(st, 'C'));
1062
1063 return (rustv0_parse_undisambiguated_identifier(st, sv, B_TRUE));
1064 }
1065
1066 static boolean_t
rustv0_parse_binder(rust_state_t * restrict st,strview_t * restrict sv)1067 rustv0_parse_binder(rust_state_t *restrict st, strview_t *restrict sv)
1068 {
1069 uint64_t n, i;
1070
1071 if (!sv_consume_if_c(sv, 'G'))
1072 return (B_FALSE);
1073
1074 if (!rustv0_parse_base62(st, sv, &n))
1075 return (B_FALSE);
1076 n += 1;
1077
1078 if (!rust_append(st, "for<"))
1079 return (B_FALSE);
1080
1081 for (i = 0; i < n; i++) {
1082 if (i > 0 && !rust_append(st, ", "))
1083 return (B_FALSE);
1084
1085 st->rs_lt_depth++;
1086 if (!rustv0_append_lifetime(st, 1))
1087 return (B_FALSE);
1088 }
1089
1090 if (!rust_append(st, "> "))
1091 return (B_FALSE);
1092
1093 return (B_TRUE);
1094 }
1095
1096 /*
1097 * <fn-sig> := [<binder>] ["U"] ["K" <abi>] {type} "E" <type>
1098 *
1099 * Note that while the Rust RFC states the binder is manditory, based on
1100 * actual examples, and comparing with the rust-based demangler, it is in
1101 * fact optional.
1102 */
1103 static boolean_t
rustv0_parse_fnsig(rust_state_t * restrict st,strview_t * restrict sv)1104 rustv0_parse_fnsig(rust_state_t *restrict st, strview_t *restrict sv)
1105 {
1106 uint64_t save_lt = st->rs_lt_depth;
1107
1108 DEMDEBUG("%s: str = '%.*s'", __func__, SV_PRINT(sv));
1109
1110 if (!OPTIONAL(st, rustv0_parse_binder(st, sv)))
1111 return (B_FALSE);
1112
1113 if (sv_consume_if_c(sv, 'U') && !rust_append(st, "unsafe "))
1114 return (B_FALSE);
1115
1116 if (sv_consume_if_c(sv, 'K') &&
1117 (!rust_append(st, "extern \"") || !rustv0_parse_abi(st, sv) ||
1118 !rust_append(st, "\" ")))
1119 return (B_FALSE);
1120
1121 if (!rust_append(st, "fn("))
1122 return (B_FALSE);
1123
1124 if (!rustv0_parse_opt_list(st, sv, rustv0_parse_type, ", ", B_FALSE,
1125 NULL)) {
1126 return (B_FALSE);
1127 }
1128
1129 if (!rust_appendc(st, ')'))
1130 return (B_FALSE);
1131
1132 /* If the return type is (), don't print it */
1133 if (!sv_consume_if_c(sv, 'u')) {
1134 if (!rust_append(st, " -> "))
1135 return (B_FALSE);
1136
1137 if (!rustv0_parse_type(st, sv, B_FALSE))
1138 return (B_FALSE);
1139 }
1140
1141 st->rs_lt_depth = save_lt;
1142
1143 return (B_TRUE);
1144 }
1145
1146 /*
1147 * <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
1148 */
1149 static boolean_t
rustv0_parse_dyn_trait_assoc_binding(rust_state_t * restrict st,strview_t * restrict sv,boolean_t open)1150 rustv0_parse_dyn_trait_assoc_binding(rust_state_t *restrict st,
1151 strview_t *restrict sv, boolean_t open)
1152 {
1153 size_t save_len;
1154
1155 if (HAS_ERROR(st))
1156 return (B_FALSE);
1157
1158 if (sv_remaining(sv) == 0)
1159 return (B_FALSE);
1160
1161 if (!sv_consume_if_c(sv, 'p'))
1162 return (B_FALSE);
1163
1164 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
1165 SAVE_LEN(st, save_len);
1166
1167 if (!rust_append(st, open ? ", " : "<"))
1168 return (B_FALSE);
1169
1170 if (!rustv0_parse_undisambiguated_identifier(st, sv, B_FALSE)) {
1171 st->rs_error = EINVAL;
1172 return (B_FALSE);
1173 }
1174
1175 if (!rust_append(st, " = "))
1176 return (B_FALSE);
1177
1178 if (!rustv0_parse_type(st, sv, B_FALSE)) {
1179 st->rs_error = EINVAL;
1180 return (B_FALSE);
1181 }
1182
1183 DEMDEBUG("%s: binding='%.*s'", __func__, CSTR_END(st, save_len));
1184
1185 return (B_TRUE);
1186 }
1187
1188 static boolean_t
rustv0_parse_dyn_trait(rust_state_t * restrict st,strview_t * restrict sv,boolean_t dummy __unused)1189 rustv0_parse_dyn_trait(rust_state_t *restrict st, strview_t *restrict sv,
1190 boolean_t dummy __unused)
1191 {
1192 boolean_t stay_save = st->rs_args_stay_open;
1193 boolean_t open_save = st->rs_args_is_open;
1194 boolean_t open = B_FALSE;
1195
1196 if (HAS_ERROR(st))
1197 return (B_FALSE);
1198
1199 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
1200
1201 /*
1202 * This is a bit subtle, but when formatting a trait in trait,
1203 * we want something like this:
1204 *
1205 * dyn Trait<T, U, Assoc=X>
1206 *
1207 * instead of
1208 *
1209 * dyn Trait<T, U, <Assoc=X>>
1210 *
1211 * So when parsing the path, if we encounter generic arguments, we want
1212 * the arg list to remain open at the end of processing the path so
1213 * we can append the bindings to it. We set rs_args_stay_open to B_TRUE
1214 * to indidcate to rustv0_parse_path() that a generic argument list
1215 * should not be closed (i.e. don't append a '>' at the end of the
1216 * list). If rustv0_parse_path() encounters a list of generic arguments,
1217 * it will also set rs->args_is_open to indiciate it opened the list.
1218 * We save this in 'open' so that when we process the associated
1219 * bindings, we know if we need to open the list on the first binding
1220 * or not -- we don't want 'dyn Trait<>' if there are no bindings,
1221 * just 'dyn Trait'.
1222 */
1223 st->rs_args_stay_open = B_TRUE;
1224 st->rs_args_is_open = B_FALSE;
1225
1226 if (!rustv0_parse_path(st, sv, B_FALSE)) {
1227 st->rs_args_stay_open = stay_save;
1228 st->rs_args_is_open = open_save;
1229 return (B_FALSE);
1230 }
1231
1232 open = st->rs_args_is_open;
1233
1234 st->rs_args_stay_open = stay_save;
1235 st->rs_args_is_open = open_save;
1236
1237 while (rustv0_parse_dyn_trait_assoc_binding(st, sv, open)) {
1238 open = B_TRUE;
1239 }
1240
1241 if (HAS_ERROR(st))
1242 return (B_FALSE);
1243
1244 if (open && !rust_appendc(st, '>'))
1245 return (B_FALSE);
1246
1247 return (!HAS_ERROR(st));
1248 }
1249
1250 static boolean_t
rustv0_parse_dynbounds(rust_state_t * restrict st,strview_t * restrict sv)1251 rustv0_parse_dynbounds(rust_state_t *restrict st, strview_t *restrict sv)
1252 {
1253 uint64_t save_lt = st->rs_lt_depth;
1254
1255 if (HAS_ERROR(st))
1256 return (B_FALSE);
1257
1258 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
1259
1260 /*
1261 * This is another case where Rust RFC2603 seems to disagree with
1262 * the implementation. The RFC implies this is mandatory, while
1263 * the implementations treat it as optional.
1264 */
1265 if (!OPTIONAL(st, rustv0_parse_binder(st, sv)))
1266 return (B_FALSE);
1267
1268 if (!rustv0_parse_opt_list(st, sv, rustv0_parse_dyn_trait, " + ",
1269 B_FALSE, NULL))
1270 return (B_FALSE);
1271
1272 st->rs_lt_depth = save_lt;
1273
1274 return (B_TRUE);
1275 }
1276
1277 static boolean_t
rustv0_parse_generic_arg(rust_state_t * restrict st,strview_t * restrict sv,boolean_t dummy __unused)1278 rustv0_parse_generic_arg(rust_state_t *restrict st, strview_t *restrict sv,
1279 boolean_t dummy __unused)
1280 {
1281 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
1282
1283 if (sv_consume_if_c(sv, 'K'))
1284 return (rustv0_parse_const(st, sv, B_FALSE));
1285
1286 if (rustv0_parse_lifetime(st, sv))
1287 return (B_TRUE);
1288
1289 return (rustv0_parse_type(st, sv, B_FALSE));
1290 }
1291
1292 /*
1293 * Parse a hex value into *valp. Note that rust only uses lower case
1294 * hex values.
1295 */
1296 static boolean_t
rustv0_parse_hex_num(rust_state_t * restrict st,strview_t * restrict sv,uint64_t * restrict valp)1297 rustv0_parse_hex_num(rust_state_t *restrict st, strview_t *restrict sv,
1298 uint64_t *restrict valp)
1299 {
1300 uint64_t val = 0;
1301 size_t ndigits = 0;
1302
1303 if (HAS_ERROR(st))
1304 return (B_FALSE);
1305
1306 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
1307
1308 if (sv_remaining(sv) == 0)
1309 return (B_FALSE);
1310
1311 /*
1312 * Unfortunately, Rust RFC 2603 also doesn't not explicty define
1313 * {hex-digits}. We follow what decimal digits does, and treat a
1314 * leading 0 as a terminator.
1315 */
1316 while (sv_remaining(sv) > 0) {
1317 char c = sv_peek(sv, 0);
1318
1319 if (ISDIGIT(c)) {
1320 val *= 16;
1321 val += c - '0';
1322 } else if (c >= 'a' && c <= 'f') {
1323 val *= 16;
1324 val += c - 'a' + 10;
1325 } else {
1326 break;
1327 }
1328
1329 sv_consume_n(sv, 1);
1330
1331 if (++ndigits == 1 && val == 0)
1332 break;
1333 }
1334
1335 if (ndigits > 0)
1336 *valp = val;
1337
1338 return ((ndigits > 0) ? B_TRUE : B_FALSE);
1339 }
1340
1341 /*
1342 * Parse a base62 number into *valp. The number is explicitly terminated
1343 * by a '_'. The values are also offset by 0 -- that is '_' == 0,
1344 * '0_' == 1, ...
1345 */
1346 static boolean_t
rustv0_parse_base62(rust_state_t * restrict st,strview_t * restrict sv,uint64_t * restrict valp)1347 rustv0_parse_base62(rust_state_t *restrict st, strview_t *restrict sv,
1348 uint64_t *restrict valp)
1349 {
1350 uint64_t val = 0;
1351 char c;
1352
1353 if (HAS_ERROR(st))
1354 return (B_FALSE);
1355
1356 DEMDEBUG("%s: str='%.*s'", __func__, SV_PRINT(sv));
1357
1358 if (sv_remaining(sv) == 0)
1359 return (B_FALSE);
1360
1361 /* A terminating '_' without any digits is 0 */
1362 if (sv_consume_if_c(sv, '_')) {
1363 *valp = 0;
1364 return (B_TRUE);
1365 }
1366
1367 /* Need at least one valid digit if > 0 */
1368 if (!ISALNUM(sv_peek(sv, 0)))
1369 return (B_FALSE);
1370
1371 while (sv_remaining(sv) > 0) {
1372 c = sv_consume_c(sv);
1373
1374 if (c == '_') {
1375 /*
1376 * Because a lone '_' was already handled earlier,
1377 * we know we've had at least one other digit and
1378 * can increment the value and return.
1379 */
1380 *valp = val + 1;
1381 return (B_TRUE);
1382 } else if (ISDIGIT(c)) {
1383 val *= 62;
1384 val += c - '0';
1385 } else if (ISLOWER(c)) {
1386 val *= 62;
1387 val += c - 'a' + 10;
1388 } else if (ISUPPER(c)) {
1389 val *= 62;
1390 val += c - 'A' + 36;
1391 } else {
1392 return (B_FALSE);
1393 }
1394 }
1395
1396 /* We reached the end of the string without a terminating _ */
1397 return (B_FALSE);
1398 }
1399
1400 static const_type_class_t
rustv0_classify_const_type(char type)1401 rustv0_classify_const_type(char type)
1402 {
1403 switch (type) {
1404 case 'h': case 't': case 'm': case 'y': case 'o': case 'j':
1405 return (CTC_UNSIGNED);
1406 case 'a': case 'i': case 'l': case 'n': case 's': case 'x':
1407 return (CTC_SIGNED);
1408 case 'b':
1409 return (CTC_BOOL);
1410 case 'c':
1411 return (CTC_CHAR);
1412 default:
1413 return (CTC_INVALID);
1414 }
1415 }
1416
1417 /*
1418 * Make sure the name is a plausible mangled rust symbol.
1419 * Non-ASCII are never allowed. Rust itself uses [_0-9A-Za-z], however
1420 * some things will add a suffix starting with a '.' (e.g. LLVM thin LTO).
1421 * As such we proceed in two phases. We first only allow [_0-9A-Z-az] until
1422 * we encounter a '.'. At that point, any ASCII character is allowed.
1423 */
1424 static boolean_t
rustv0_valid_sym(const strview_t * sv)1425 rustv0_valid_sym(const strview_t *sv)
1426 {
1427 size_t i;
1428 boolean_t check_rust = B_TRUE;
1429
1430 for (i = 0; i < sv->sv_rem; i++) {
1431 char c = sv->sv_first[i];
1432
1433 if (ISALNUM(c) || c == '_')
1434 continue;
1435
1436 if (c == '.') {
1437 check_rust = B_FALSE;
1438 continue;
1439 }
1440
1441 if (check_rust || (c & 0x80) != 0) {
1442 DEMDEBUG("%s: ERROR found invalid character '%c' "
1443 "in '%.*s' at index %zu",
1444 __func__, c, SV_PRINT(sv), i);
1445 return (B_FALSE);
1446 }
1447 }
1448 return (B_TRUE);
1449 }
1450