xref: /illumos-gate/usr/src/lib/libdtrace/common/dt_string.c (revision dbed73cbda2229fd1aa6dc5743993cae7f0a7ee9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <strings.h>
30 #include <stdlib.h>
31 #include <errno.h>
32 #include <ctype.h>
33 
34 #include <dt_string.h>
35 
36 /*
37  * Create a copy of string s, but only duplicate the first n bytes.
38  */
39 char *
40 strndup(const char *s, size_t n)
41 {
42 	char *s2 = malloc(n + 1);
43 
44 	(void) strncpy(s2, s, n);
45 	s2[n] = '\0';
46 	return (s2);
47 }
48 
49 /*
50  * Transform string s inline, converting each embedded C escape sequence string
51  * to the corresponding character.  For example, the substring "\n" is replaced
52  * by an inline '\n' character.  The length of the resulting string is returned.
53  */
54 size_t
55 stresc2chr(char *s)
56 {
57 	char *p, *q, c;
58 	int esc = 0;
59 	int x;
60 
61 	for (p = q = s; (c = *p) != '\0'; p++) {
62 		if (esc) {
63 			switch (c) {
64 			case '0':
65 			case '1':
66 			case '2':
67 			case '3':
68 			case '4':
69 			case '5':
70 			case '6':
71 			case '7':
72 				c -= '0';
73 				p++;
74 
75 				if (*p >= '0' && *p <= '7') {
76 					c = c * 8 + *p++ - '0';
77 
78 					if (*p >= '0' && *p <= '7')
79 						c = c * 8 + *p - '0';
80 					else
81 						p--;
82 				} else
83 					p--;
84 
85 				*q++ = c;
86 				break;
87 
88 			case 'a':
89 				*q++ = '\a';
90 				break;
91 			case 'b':
92 				*q++ = '\b';
93 				break;
94 			case 'f':
95 				*q++ = '\f';
96 				break;
97 			case 'n':
98 				*q++ = '\n';
99 				break;
100 			case 'r':
101 				*q++ = '\r';
102 				break;
103 			case 't':
104 				*q++ = '\t';
105 				break;
106 			case 'v':
107 				*q++ = '\v';
108 				break;
109 
110 			case 'x':
111 				for (x = 0; (c = *++p) != '\0'; ) {
112 					if (c >= '0' && c <= '9')
113 						x = x * 16 + c - '0';
114 					else if (c >= 'a' && c <= 'f')
115 						x = x * 16 + c - 'a' + 10;
116 					else if (c >= 'A' && c <= 'F')
117 						x = x * 16 + c - 'A' + 10;
118 					else
119 						break;
120 				}
121 				*q++ = (char)x;
122 				p--;
123 				break;
124 
125 			case '"':
126 			case '\\':
127 				*q++ = c;
128 				break;
129 			default:
130 				*q++ = '\\';
131 				*q++ = c;
132 			}
133 
134 			esc = 0;
135 
136 		} else {
137 			if ((esc = c == '\\') == 0)
138 				*q++ = c;
139 		}
140 	}
141 
142 	*q = '\0';
143 	return ((size_t)(q - s));
144 }
145 
146 /*
147  * Create a copy of string s in which certain unprintable or special characters
148  * have been converted to the string representation of their C escape sequence.
149  * For example, the newline character is expanded to the string "\n".
150  */
151 char *
152 strchr2esc(const char *s, size_t n)
153 {
154 	const char *p;
155 	char *q, *s2, c;
156 	size_t addl = 0;
157 
158 	for (p = s; p < s + n; p++) {
159 		switch (c = *p) {
160 		case '\0':
161 		case '\a':
162 		case '\b':
163 		case '\f':
164 		case '\n':
165 		case '\r':
166 		case '\t':
167 		case '\v':
168 		case '"':
169 		case '\\':
170 			addl++;		/* 1 add'l char needed to follow \ */
171 			break;
172 		case ' ':
173 			break;
174 		default:
175 			if (c < '!' || c > '~')
176 				addl += 3; /* 3 add'l chars following \ */
177 		}
178 	}
179 
180 	if ((s2 = malloc(n + addl + 1)) == NULL)
181 		return (NULL);
182 
183 	for (p = s, q = s2; p < s + n; p++) {
184 		switch (c = *p) {
185 		case '\0':
186 			*q++ = '\\';
187 			*q++ = '0';
188 			break;
189 		case '\a':
190 			*q++ = '\\';
191 			*q++ = 'a';
192 			break;
193 		case '\b':
194 			*q++ = '\\';
195 			*q++ = 'b';
196 			break;
197 		case '\f':
198 			*q++ = '\\';
199 			*q++ = 'f';
200 			break;
201 		case '\n':
202 			*q++ = '\\';
203 			*q++ = 'n';
204 			break;
205 		case '\r':
206 			*q++ = '\\';
207 			*q++ = 'r';
208 			break;
209 		case '\t':
210 			*q++ = '\\';
211 			*q++ = 't';
212 			break;
213 		case '\v':
214 			*q++ = '\\';
215 			*q++ = 'v';
216 			break;
217 		case '"':
218 			*q++ = '\\';
219 			*q++ = '"';
220 			break;
221 		case '\\':
222 			*q++ = '\\';
223 			*q++ = '\\';
224 			break;
225 		case ' ':
226 			*q++ = c;
227 			break;
228 		default:
229 			if (c < '!' || c > '~') {
230 				*q++ = '\\';
231 				*q++ = ((c >> 6) & 3) + '0';
232 				*q++ = ((c >> 3) & 7) + '0';
233 				*q++ = (c & 7) + '0';
234 			} else
235 				*q++ = c;
236 		}
237 
238 		if (c == '\0')
239 			break; /* don't continue past \0 even if p < s + n */
240 	}
241 
242 	*q = '\0';
243 	return (s2);
244 }
245 
246 /*
247  * Return the basename (name after final /) of the given string.  We use
248  * strbasename rather than basename to avoid conflicting with libgen.h's
249  * non-const function prototype.
250  */
251 const char *
252 strbasename(const char *s)
253 {
254 	const char *p = strrchr(s, '/');
255 
256 	if (p == NULL)
257 		return (s);
258 
259 	return (++p);
260 }
261 
262 /*
263  * This function tests a string against the regular expression used for idents
264  * and integers in the D lexer, and should match the superset of RGX_IDENT and
265  * RGX_INT in dt_lex.l.  If an invalid character is found, the function returns
266  * a pointer to it.  Otherwise NULL is returned for a valid string.
267  */
268 const char *
269 strbadidnum(const char *s)
270 {
271 	char *p;
272 	int c;
273 
274 	if (*s == '\0')
275 		return (s);
276 
277 	errno = 0;
278 	(void) strtoull(s, &p, 0);
279 
280 	if (errno == 0 && *p == '\0')
281 		return (NULL); /* matches RGX_INT */
282 
283 	while ((c = *s++) != '\0') {
284 		if (isalnum(c) == 0 && c != '_' && c != '`')
285 			return (s - 1);
286 	}
287 
288 	return (NULL); /* matches RGX_IDENT */
289 }
290 
291 /*
292  * Determine whether the string contains a glob matching pattern or is just a
293  * simple string.  See gmatch(3GEN) and sh(1) for the glob syntax definition.
294  */
295 int
296 strisglob(const char *s)
297 {
298 	char c;
299 
300 	while ((c = *s++) != '\0') {
301 		if (c == '[' || c == '?' || c == '*' || c == '\\')
302 			return (1);
303 	}
304 
305 	return (0);
306 }
307 
308 /*
309  * Hyphenate a string in-place by converting any instances of "__" to "-",
310  * which we use for probe names to improve readability, and return the string.
311  */
312 char *
313 strhyphenate(char *s)
314 {
315 	char *p, *q;
316 
317 	for (p = s, q = p + strlen(p); p < q; p++) {
318 		if (p[0] == '_' && p[1] == '_') {
319 			p[0] = '-';
320 			bcopy(p + 2, p + 1, (size_t)(q - p) - 1);
321 		}
322 	}
323 
324 	return (s);
325 }
326