xref: /freebsd/cddl/contrib/opensolaris/lib/libdtrace/common/dt_string.c (revision cfd6422a5217410fbd66f7a7a8a64d9d85e61229)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <strings.h>
27 #include <stdlib.h>
28 #include <errno.h>
29 #include <ctype.h>
30 
31 #include <dt_string.h>
32 
33 /*
34  * Transform string s inline, converting each embedded C escape sequence string
35  * to the corresponding character.  For example, the substring "\n" is replaced
36  * by an inline '\n' character.  The length of the resulting string is returned.
37  */
38 size_t
39 stresc2chr(char *s)
40 {
41 	char *p, *q, c;
42 	int esc = 0;
43 	int x;
44 
45 	for (p = q = s; (c = *p) != '\0'; p++) {
46 		if (esc) {
47 			switch (c) {
48 			case '0':
49 			case '1':
50 			case '2':
51 			case '3':
52 			case '4':
53 			case '5':
54 			case '6':
55 			case '7':
56 				c -= '0';
57 				p++;
58 
59 				if (*p >= '0' && *p <= '7') {
60 					c = c * 8 + *p++ - '0';
61 
62 					if (*p >= '0' && *p <= '7')
63 						c = c * 8 + *p - '0';
64 					else
65 						p--;
66 				} else
67 					p--;
68 
69 				*q++ = c;
70 				break;
71 
72 			case 'a':
73 				*q++ = '\a';
74 				break;
75 			case 'b':
76 				*q++ = '\b';
77 				break;
78 			case 'f':
79 				*q++ = '\f';
80 				break;
81 			case 'n':
82 				*q++ = '\n';
83 				break;
84 			case 'r':
85 				*q++ = '\r';
86 				break;
87 			case 't':
88 				*q++ = '\t';
89 				break;
90 			case 'v':
91 				*q++ = '\v';
92 				break;
93 
94 			case 'x':
95 				for (x = 0; (c = *++p) != '\0'; ) {
96 					if (c >= '0' && c <= '9')
97 						x = x * 16 + c - '0';
98 					else if (c >= 'a' && c <= 'f')
99 						x = x * 16 + c - 'a' + 10;
100 					else if (c >= 'A' && c <= 'F')
101 						x = x * 16 + c - 'A' + 10;
102 					else
103 						break;
104 				}
105 				*q++ = (char)x;
106 				p--;
107 				break;
108 
109 			case '"':
110 			case '\\':
111 				*q++ = c;
112 				break;
113 			default:
114 				*q++ = '\\';
115 				*q++ = c;
116 			}
117 
118 			esc = 0;
119 
120 		} else {
121 			if ((esc = c == '\\') == 0)
122 				*q++ = c;
123 		}
124 	}
125 
126 	*q = '\0';
127 	return ((size_t)(q - s));
128 }
129 
130 /*
131  * Create a copy of string s in which certain unprintable or special characters
132  * have been converted to the string representation of their C escape sequence.
133  * For example, the newline character is expanded to the string "\n".
134  */
135 char *
136 strchr2esc(const char *s, size_t n)
137 {
138 	const char *p;
139 	char *q, *s2, c;
140 	size_t addl = 0;
141 
142 	for (p = s; p < s + n; p++) {
143 		switch (c = *p) {
144 		case '\0':
145 		case '\a':
146 		case '\b':
147 		case '\f':
148 		case '\n':
149 		case '\r':
150 		case '\t':
151 		case '\v':
152 		case '"':
153 		case '\\':
154 			addl++;		/* 1 add'l char needed to follow \ */
155 			break;
156 		case ' ':
157 			break;
158 		default:
159 			if (c < '!' || c > '~')
160 				addl += 3; /* 3 add'l chars following \ */
161 		}
162 	}
163 
164 	if ((s2 = malloc(n + addl + 1)) == NULL)
165 		return (NULL);
166 
167 	for (p = s, q = s2; p < s + n; p++) {
168 		switch (c = *p) {
169 		case '\0':
170 			*q++ = '\\';
171 			*q++ = '0';
172 			break;
173 		case '\a':
174 			*q++ = '\\';
175 			*q++ = 'a';
176 			break;
177 		case '\b':
178 			*q++ = '\\';
179 			*q++ = 'b';
180 			break;
181 		case '\f':
182 			*q++ = '\\';
183 			*q++ = 'f';
184 			break;
185 		case '\n':
186 			*q++ = '\\';
187 			*q++ = 'n';
188 			break;
189 		case '\r':
190 			*q++ = '\\';
191 			*q++ = 'r';
192 			break;
193 		case '\t':
194 			*q++ = '\\';
195 			*q++ = 't';
196 			break;
197 		case '\v':
198 			*q++ = '\\';
199 			*q++ = 'v';
200 			break;
201 		case '"':
202 			*q++ = '\\';
203 			*q++ = '"';
204 			break;
205 		case '\\':
206 			*q++ = '\\';
207 			*q++ = '\\';
208 			break;
209 		case ' ':
210 			*q++ = c;
211 			break;
212 		default:
213 			if (c < '!' || c > '~') {
214 				*q++ = '\\';
215 				*q++ = ((c >> 6) & 3) + '0';
216 				*q++ = ((c >> 3) & 7) + '0';
217 				*q++ = (c & 7) + '0';
218 			} else
219 				*q++ = c;
220 		}
221 
222 		if (c == '\0')
223 			break; /* don't continue past \0 even if p < s + n */
224 	}
225 
226 	*q = '\0';
227 	return (s2);
228 }
229 
230 /*
231  * Return the basename (name after final /) of the given string.  We use
232  * strbasename rather than basename to avoid conflicting with libgen.h's
233  * non-const function prototype.
234  */
235 const char *
236 strbasename(const char *s)
237 {
238 	const char *p = strrchr(s, '/');
239 
240 	if (p == NULL)
241 		return (s);
242 
243 	return (++p);
244 }
245 
246 /*
247  * This function tests a string against the regular expression used for idents
248  * and integers in the D lexer, and should match the superset of RGX_IDENT and
249  * RGX_INT in dt_lex.l.  If an invalid character is found, the function returns
250  * a pointer to it.  Otherwise NULL is returned for a valid string.
251  */
252 const char *
253 strbadidnum(const char *s)
254 {
255 	char *p;
256 	int c;
257 
258 	if (*s == '\0')
259 		return (s);
260 
261 	errno = 0;
262 	(void) strtoull(s, &p, 0);
263 
264 	if (errno == 0 && *p == '\0')
265 		return (NULL); /* matches RGX_INT */
266 
267 	while ((c = *s++) != '\0') {
268 		if (isalnum(c) == 0 && c != '_' && c != '`')
269 			return (s - 1);
270 	}
271 
272 	return (NULL); /* matches RGX_IDENT */
273 }
274 
275 /*
276  * Determine whether the string contains a glob matching pattern or is just a
277  * simple string.  See gmatch(3GEN) and sh(1) for the glob syntax definition.
278  */
279 int
280 strisglob(const char *s)
281 {
282 	char c;
283 
284 	while ((c = *s++) != '\0') {
285 		if (c == '[' || c == '?' || c == '*' || c == '\\')
286 			return (1);
287 	}
288 
289 	return (0);
290 }
291 
292 /*
293  * Hyphenate a string in-place by converting any instances of "__" to "-",
294  * which we use for probe names to improve readability, and return the string.
295  */
296 char *
297 strhyphenate(char *s)
298 {
299 	char *p, *q;
300 
301 	for (p = s, q = p + strlen(p); p < q; p++) {
302 		if (p[0] == '_' && p[1] == '_') {
303 			p[0] = '-';
304 			bcopy(p + 2, p + 1, (size_t)(q - p) - 1);
305 		}
306 	}
307 
308 	return (s);
309 }
310