xref: /illumos-gate/usr/src/lib/libdemangle/common/demangle.c (revision dd72704bd9e794056c558153663c739e2012d721)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2021 Jason King
14  * Copyright 2019 Joyent, Inc.
15  */
16 
17 #include <stdlib.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <errno.h>
21 #include <limits.h>
22 #include <pthread.h>
23 #include <sys/ctype.h>
24 #include <sys/debug.h>
25 #include <sys/sysmacros.h>
26 #include <stdarg.h>
27 #include "demangle-sys.h"
28 #include "demangle_int.h"
29 #include "strview.h"
30 
31 #define	DEMANGLE_DEBUG	"DEMANGLE_DEBUG"
32 
33 static pthread_once_t debug_once = PTHREAD_ONCE_INIT;
34 volatile boolean_t demangle_debug;
35 FILE *debugf = stderr;
36 
37 static struct {
38 	const char	*str;
39 	sysdem_lang_t	lang;
40 } lang_tbl[] = {
41 	{ "auto", SYSDEM_LANG_AUTO },
42 	{ "c++", SYSDEM_LANG_CPP },
43 	{ "rust", SYSDEM_LANG_RUST },
44 };
45 
46 static const char *
47 langstr(sysdem_lang_t lang)
48 {
49 	size_t i;
50 
51 	for (i = 0; i < ARRAY_SIZE(lang_tbl); i++) {
52 		if (lang == lang_tbl[i].lang)
53 			return (lang_tbl[i].str);
54 	}
55 	return ("invalid");
56 }
57 
58 boolean_t
59 sysdem_parse_lang(const char *str, sysdem_lang_t *langp)
60 {
61 	size_t i;
62 
63 	for (i = 0; i < ARRAY_SIZE(lang_tbl); i++) {
64 		if (strcmp(str, lang_tbl[i].str) == 0) {
65 			*langp = lang_tbl[i].lang;
66 			return (B_TRUE);
67 		}
68 	}
69 
70 	return (B_FALSE);
71 }
72 
73 /*
74  * A quick check if str can possibly be a mangled string. Currently, that
75  * means it must start with _Z or __Z.
76  */
77 static boolean_t
78 is_mangled(const char *str, size_t n)
79 {
80 	strview_t sv;
81 
82 	sv_init_str(&sv, str, str + n);
83 
84 	if (!sv_consume_if_c(&sv, '_'))
85 		return (B_FALSE);
86 	(void) sv_consume_if_c(&sv, '_');
87 	if (sv_consume_if_c(&sv, 'Z'))
88 		return (B_TRUE);
89 	if (sv_consume_if_c(&sv, 'R'))
90 		return (B_TRUE);
91 
92 	return (B_FALSE);
93 }
94 
95 static void
96 check_debug(void)
97 {
98 	if (getenv(DEMANGLE_DEBUG))
99 		demangle_debug = B_TRUE;
100 }
101 
102 char *
103 sysdemangle(const char *str, sysdem_lang_t lang, sysdem_ops_t *ops)
104 {
105 	char *res = NULL;
106 
107 	/*
108 	 * While the language specific demangler code can handle non-NUL
109 	 * terminated strings, we currently don't expose this to consumers.
110 	 * Consumers should still pass in a NUL-terminated string.
111 	 */
112 	size_t slen;
113 
114 	VERIFY0(pthread_once(&debug_once, check_debug));
115 
116 	DEMDEBUG("name = '%s'", (str == NULL) ? "(NULL)" : str);
117 	DEMDEBUG("lang = %s (%d)", langstr(lang), lang);
118 
119 	if (str == NULL) {
120 		errno = EINVAL;
121 		return (NULL);
122 	}
123 
124 	slen = strlen(str);
125 
126 	switch (lang) {
127 		case SYSDEM_LANG_AUTO:
128 		case SYSDEM_LANG_CPP:
129 		case SYSDEM_LANG_RUST:
130 			break;
131 		default:
132 			errno = EINVAL;
133 			return (NULL);
134 	}
135 
136 	if (ops == NULL)
137 		ops = sysdem_ops_default;
138 
139 	/*
140 	 * If we were given an explicit language to demangle, we always
141 	 * use that. If not, we try to demangle as rust, then c++. Any
142 	 * mangled C++ symbol that manages to successfully demangle as a
143 	 * legacy rust symbol _should_ look the same as it can really
144 	 * only be a very simple C++ symbol. Otherwise, the rust demangling
145 	 * should fail and we can try C++.
146 	 */
147 	switch (lang) {
148 	case SYSDEM_LANG_CPP:
149 		return (cpp_demangle(str, slen, ops));
150 	case SYSDEM_LANG_RUST:
151 		return (rust_demangle(str, slen, ops));
152 	case SYSDEM_LANG_AUTO:
153 		break;
154 	}
155 
156 	/*
157 	 * To save us some potential work, if the symbol cannot
158 	 * possibly be a rust or C++ mangled name, we don't
159 	 * even attempt to demangle either.
160 	 */
161 	if (!is_mangled(str, slen)) {
162 		/*
163 		 * This does mean if we somehow get a string > 2GB
164 		 * the debugging output will be truncated, but that
165 		 * seems an acceptable tradeoff.
166 		 */
167 		int len = slen > INT_MAX ? INT_MAX : slen;
168 
169 		DEMDEBUG("ERROR: '%.*s' cannot be a mangled string", len, str);
170 		errno = EINVAL;
171 		return (NULL);
172 	}
173 
174 	DEMDEBUG("trying rust");
175 	res = rust_demangle(str, slen, ops);
176 
177 	IMPLY(ret != NULL, errno == 0);
178 	if (res != NULL)
179 		return (res);
180 
181 	DEMDEBUG("trying C++");
182 	return (cpp_demangle(str, slen, ops));
183 }
184 
185 int
186 demdebug(const char *fmt, ...)
187 {
188 	va_list ap;
189 
190 	flockfile(debugf);
191 	(void) fprintf(debugf, "LIBDEMANGLE: ");
192 	va_start(ap, fmt);
193 	(void) vfprintf(debugf, fmt, ap);
194 	(void) fputc('\n', debugf);
195 	(void) fflush(debugf);
196 	va_end(ap);
197 	funlockfile(debugf);
198 
199 	return (0);
200 }
201