xref: /illumos-gate/usr/src/cmd/demangle/demangle.c (revision 8fa80b6906806016ca3449e9e20ce5f6b49653dc)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2020 Joyent, Inc.
14  */
15 
16 #include <ctype.h>
17 #include <demangle-sys.h>
18 #include <err.h>
19 #include <errno.h>
20 #include <libcustr.h>
21 #include <locale.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 
25 #define	_(x) gettext(x)
26 
27 locale_t c_locale;
28 
29 static int do_symbols(sysdem_lang_t, int, char * const *);
30 static int do_input(sysdem_lang_t, FILE *restrict, FILE *restrict);
31 static int do_demangle(const char *, sysdem_lang_t, FILE *);
32 static void appendc(custr_t *, char);
33 static void xputc(int, FILE *);
34 
35 static void
36 usage(void)
37 {
38 	(void) fprintf(stderr, _("Usage: %s [-l lang] [sym...]\n"),
39 	    getprogname());
40 	exit(2);
41 }
42 
43 int
44 main(int argc, char * const *argv)
45 {
46 	sysdem_lang_t lang = SYSDEM_LANG_AUTO;
47 	int c;
48 	int ret;
49 
50 	(void) setlocale(LC_ALL, "");
51 
52 #if !defined(TEXT_DOMAIN)
53 #define	TEXT_DOMAIN "SYS_TEST"
54 #endif
55 	(void) textdomain(TEXT_DOMAIN);
56 
57 	/*
58 	 * For detecting symbol boundaries, we want to use the C locale
59 	 * definitions for use in isalnum_l().
60 	 */
61 	if ((c_locale = newlocale(LC_CTYPE_MASK, "C", NULL)) == NULL)
62 		err(EXIT_FAILURE, _("failed to construct C locale"));
63 
64 	while ((c = getopt(argc, argv, "hl:")) != -1) {
65 		switch (c) {
66 		case 'l':
67 			if (sysdem_parse_lang(optarg, &lang))
68 				break;
69 
70 			errx(EXIT_FAILURE, _("Unsupported language '%s'\n"),
71 			    optarg);
72 		case 'h':
73 		case '?':
74 			usage();
75 		}
76 	}
77 
78 	argc -= optind;
79 	argv += optind;
80 
81 	if (argc > 0)
82 		ret = do_symbols(lang, argc, argv);
83 	else
84 		ret = do_input(lang, stdin, stdout);
85 
86 	return ((ret < 0) ? EXIT_FAILURE : EXIT_SUCCESS);
87 }
88 
89 static int
90 do_symbols(sysdem_lang_t lang, int argc, char * const *argv)
91 {
92 	int ret = 0;
93 
94 	for (int i = 0; i < argc; i++) {
95 		if (do_demangle(argv[i], lang, stdout) < 0)
96 			ret = -1;
97 		else
98 			xputc('\n', stdout);
99 	}
100 
101 	return (ret);
102 }
103 
104 static int
105 do_input(sysdem_lang_t lang, FILE *restrict in, FILE *restrict out)
106 {
107 	custr_t *word = NULL;
108 	int c;
109 	int ret = 0;
110 	boolean_t in_symbol = B_FALSE;
111 
112 	if (custr_alloc(&word) != 0)
113 		err(EXIT_FAILURE, _("failed to allocate memory"));
114 
115 	while ((c = fgetc(in)) != EOF) {
116 		if (in_symbol) {
117 			/*
118 			 * All currently supported mangling formats only use
119 			 * alphanumeric characters, '.', '_', or '$' in
120 			 * mangled names. Once we've seen the potential start
121 			 * of a symbol ('_'), we accumulate subsequent
122 			 * charaters into 'word'. If we encounter a character
123 			 * that is not a part of that set ([A-Za-z0-9._$]), we
124 			 * treat it as a delimiter, we stop accumulating
125 			 * characters into word, and we attempt to demangle the
126 			 * accumulated string in 'word' by calling
127 			 * demangle_custr().
128 			 *
129 			 * Similar utilities like c++filt behave in a similar
130 			 * fashion when reading from stdin to allow for
131 			 * demangling of symbols embedded in surrounding text.
132 			 */
133 			if (isalnum_l(c, c_locale) || c == '.' || c == '_' ||
134 			    c == '$') {
135 				appendc(word, c);
136 				continue;
137 			}
138 
139 			/*
140 			 * Hit a symbol boundary, attempt to demangle what
141 			 * we've accumulated in word and reset word.
142 			 */
143 			if (do_demangle(custr_cstr(word), lang, out) < 0)
144 				ret = -1;
145 
146 			custr_reset(word);
147 			in_symbol = B_FALSE;
148 		}
149 
150 		if (c != '_') {
151 			xputc(c, out);
152 		} else {
153 			in_symbol = B_TRUE;
154 			appendc(word, c);
155 		}
156 	}
157 
158 	if (ferror(in))
159 		err(EXIT_FAILURE, _("error reading input"));
160 
161 	/*
162 	 * If we were accumulating characters for a symbol and hit EOF,
163 	 * attempt to demangle what we accumulated.
164 	 */
165 	if (custr_len(word) > 0 && do_demangle(custr_cstr(word), lang, out) < 0)
166 		ret = -1;
167 
168 	custr_free(word);
169 	return (ret);
170 }
171 
172 /*
173  * Attempt to demangle 'sym' as a symbol for 'lang' and write the result
174  * to 'out'. If 'sym' could not be demangled as 'lang' symbol, the original
175  * string is output instead.
176  *
177  * If an error other than 'not a mangled symbol' is encountered (e.g. ENOMEM),
178  * a warning is sent to stderr and -1 is returned. Otherwise, 0 is returned
179  * (including when 'sym' is merely not a mangled symbol of 'lang').
180  */
181 static int
182 do_demangle(const char *sym, sysdem_lang_t lang, FILE *out)
183 {
184 	char *demangled = sysdemangle(sym, lang, NULL);
185 
186 	if (demangled == NULL && errno != EINVAL && errno != ENOTSUP) {
187 		warn(_("error while demangling '%s'"), sym);
188 		return (-1);
189 	}
190 
191 	if (fprintf(out, "%s", (demangled != NULL) ? demangled : sym) < 0)
192 		err(EXIT_FAILURE, _("failed to write to output"));
193 
194 	free(demangled);
195 	return (0);
196 }
197 
198 static void
199 appendc(custr_t *cus, char c)
200 {
201 	if (custr_appendc(cus, c) == 0)
202 		return;
203 	err(EXIT_FAILURE, _("failed to save character from input"));
204 }
205 
206 static void
207 xputc(int c, FILE *out)
208 {
209 	if (fputc(c, out) < 0)
210 		err(EXIT_FAILURE, _("failed to write output"));
211 }
212