xref: /illumos-gate/usr/src/lib/libdtrace/common/dt_string.c (revision 6a634c9dca3093f3922e4b7ab826d7bdf17bf78e)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*c9a6ea2eSBryan Cantrill  * Common Development and Distribution License (the "License").
6*c9a6ea2eSBryan Cantrill  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate 
22*c9a6ea2eSBryan Cantrill /*
23*c9a6ea2eSBryan Cantrill  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24*c9a6ea2eSBryan Cantrill  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #include <strings.h>
277c478bd9Sstevel@tonic-gate #include <stdlib.h>
287c478bd9Sstevel@tonic-gate #include <errno.h>
297c478bd9Sstevel@tonic-gate #include <ctype.h>
307c478bd9Sstevel@tonic-gate 
317c478bd9Sstevel@tonic-gate #include <dt_string.h>
327c478bd9Sstevel@tonic-gate 
337c478bd9Sstevel@tonic-gate /*
347c478bd9Sstevel@tonic-gate  * Transform string s inline, converting each embedded C escape sequence string
357c478bd9Sstevel@tonic-gate  * to the corresponding character.  For example, the substring "\n" is replaced
367c478bd9Sstevel@tonic-gate  * by an inline '\n' character.  The length of the resulting string is returned.
377c478bd9Sstevel@tonic-gate  */
387c478bd9Sstevel@tonic-gate size_t
stresc2chr(char * s)397c478bd9Sstevel@tonic-gate stresc2chr(char *s)
407c478bd9Sstevel@tonic-gate {
417c478bd9Sstevel@tonic-gate 	char *p, *q, c;
427c478bd9Sstevel@tonic-gate 	int esc = 0;
437c478bd9Sstevel@tonic-gate 	int x;
447c478bd9Sstevel@tonic-gate 
457c478bd9Sstevel@tonic-gate 	for (p = q = s; (c = *p) != '\0'; p++) {
467c478bd9Sstevel@tonic-gate 		if (esc) {
477c478bd9Sstevel@tonic-gate 			switch (c) {
487c478bd9Sstevel@tonic-gate 			case '0':
497c478bd9Sstevel@tonic-gate 			case '1':
507c478bd9Sstevel@tonic-gate 			case '2':
517c478bd9Sstevel@tonic-gate 			case '3':
527c478bd9Sstevel@tonic-gate 			case '4':
537c478bd9Sstevel@tonic-gate 			case '5':
547c478bd9Sstevel@tonic-gate 			case '6':
557c478bd9Sstevel@tonic-gate 			case '7':
567c478bd9Sstevel@tonic-gate 				c -= '0';
577c478bd9Sstevel@tonic-gate 				p++;
587c478bd9Sstevel@tonic-gate 
597c478bd9Sstevel@tonic-gate 				if (*p >= '0' && *p <= '7') {
607c478bd9Sstevel@tonic-gate 					c = c * 8 + *p++ - '0';
617c478bd9Sstevel@tonic-gate 
627c478bd9Sstevel@tonic-gate 					if (*p >= '0' && *p <= '7')
637c478bd9Sstevel@tonic-gate 						c = c * 8 + *p - '0';
647c478bd9Sstevel@tonic-gate 					else
657c478bd9Sstevel@tonic-gate 						p--;
667c478bd9Sstevel@tonic-gate 				} else
677c478bd9Sstevel@tonic-gate 					p--;
687c478bd9Sstevel@tonic-gate 
697c478bd9Sstevel@tonic-gate 				*q++ = c;
707c478bd9Sstevel@tonic-gate 				break;
717c478bd9Sstevel@tonic-gate 
727c478bd9Sstevel@tonic-gate 			case 'a':
737c478bd9Sstevel@tonic-gate 				*q++ = '\a';
747c478bd9Sstevel@tonic-gate 				break;
757c478bd9Sstevel@tonic-gate 			case 'b':
767c478bd9Sstevel@tonic-gate 				*q++ = '\b';
777c478bd9Sstevel@tonic-gate 				break;
787c478bd9Sstevel@tonic-gate 			case 'f':
797c478bd9Sstevel@tonic-gate 				*q++ = '\f';
807c478bd9Sstevel@tonic-gate 				break;
817c478bd9Sstevel@tonic-gate 			case 'n':
827c478bd9Sstevel@tonic-gate 				*q++ = '\n';
837c478bd9Sstevel@tonic-gate 				break;
847c478bd9Sstevel@tonic-gate 			case 'r':
857c478bd9Sstevel@tonic-gate 				*q++ = '\r';
867c478bd9Sstevel@tonic-gate 				break;
877c478bd9Sstevel@tonic-gate 			case 't':
887c478bd9Sstevel@tonic-gate 				*q++ = '\t';
897c478bd9Sstevel@tonic-gate 				break;
907c478bd9Sstevel@tonic-gate 			case 'v':
917c478bd9Sstevel@tonic-gate 				*q++ = '\v';
927c478bd9Sstevel@tonic-gate 				break;
937c478bd9Sstevel@tonic-gate 
947c478bd9Sstevel@tonic-gate 			case 'x':
957c478bd9Sstevel@tonic-gate 				for (x = 0; (c = *++p) != '\0'; ) {
967c478bd9Sstevel@tonic-gate 					if (c >= '0' && c <= '9')
977c478bd9Sstevel@tonic-gate 						x = x * 16 + c - '0';
987c478bd9Sstevel@tonic-gate 					else if (c >= 'a' && c <= 'f')
997c478bd9Sstevel@tonic-gate 						x = x * 16 + c - 'a' + 10;
1007c478bd9Sstevel@tonic-gate 					else if (c >= 'A' && c <= 'F')
1017c478bd9Sstevel@tonic-gate 						x = x * 16 + c - 'A' + 10;
1027c478bd9Sstevel@tonic-gate 					else
1037c478bd9Sstevel@tonic-gate 						break;
1047c478bd9Sstevel@tonic-gate 				}
1057c478bd9Sstevel@tonic-gate 				*q++ = (char)x;
1067c478bd9Sstevel@tonic-gate 				p--;
1077c478bd9Sstevel@tonic-gate 				break;
1087c478bd9Sstevel@tonic-gate 
1097c478bd9Sstevel@tonic-gate 			case '"':
1107c478bd9Sstevel@tonic-gate 			case '\\':
1117c478bd9Sstevel@tonic-gate 				*q++ = c;
1127c478bd9Sstevel@tonic-gate 				break;
1137c478bd9Sstevel@tonic-gate 			default:
1147c478bd9Sstevel@tonic-gate 				*q++ = '\\';
1157c478bd9Sstevel@tonic-gate 				*q++ = c;
1167c478bd9Sstevel@tonic-gate 			}
1177c478bd9Sstevel@tonic-gate 
1187c478bd9Sstevel@tonic-gate 			esc = 0;
1197c478bd9Sstevel@tonic-gate 
1207c478bd9Sstevel@tonic-gate 		} else {
1217c478bd9Sstevel@tonic-gate 			if ((esc = c == '\\') == 0)
1227c478bd9Sstevel@tonic-gate 				*q++ = c;
1237c478bd9Sstevel@tonic-gate 		}
1247c478bd9Sstevel@tonic-gate 	}
1257c478bd9Sstevel@tonic-gate 
1267c478bd9Sstevel@tonic-gate 	*q = '\0';
1277c478bd9Sstevel@tonic-gate 	return ((size_t)(q - s));
1287c478bd9Sstevel@tonic-gate }
1297c478bd9Sstevel@tonic-gate 
1307c478bd9Sstevel@tonic-gate /*
1317c478bd9Sstevel@tonic-gate  * Create a copy of string s in which certain unprintable or special characters
1327c478bd9Sstevel@tonic-gate  * have been converted to the string representation of their C escape sequence.
1337c478bd9Sstevel@tonic-gate  * For example, the newline character is expanded to the string "\n".
1347c478bd9Sstevel@tonic-gate  */
1357c478bd9Sstevel@tonic-gate char *
strchr2esc(const char * s,size_t n)1367c478bd9Sstevel@tonic-gate strchr2esc(const char *s, size_t n)
1377c478bd9Sstevel@tonic-gate {
1387c478bd9Sstevel@tonic-gate 	const char *p;
1397c478bd9Sstevel@tonic-gate 	char *q, *s2, c;
1407c478bd9Sstevel@tonic-gate 	size_t addl = 0;
1417c478bd9Sstevel@tonic-gate 
1427c478bd9Sstevel@tonic-gate 	for (p = s; p < s + n; p++) {
1437c478bd9Sstevel@tonic-gate 		switch (c = *p) {
1447c478bd9Sstevel@tonic-gate 		case '\0':
1457c478bd9Sstevel@tonic-gate 		case '\a':
1467c478bd9Sstevel@tonic-gate 		case '\b':
1477c478bd9Sstevel@tonic-gate 		case '\f':
1487c478bd9Sstevel@tonic-gate 		case '\n':
1497c478bd9Sstevel@tonic-gate 		case '\r':
1507c478bd9Sstevel@tonic-gate 		case '\t':
1517c478bd9Sstevel@tonic-gate 		case '\v':
1527c478bd9Sstevel@tonic-gate 		case '"':
1537c478bd9Sstevel@tonic-gate 		case '\\':
1547c478bd9Sstevel@tonic-gate 			addl++;		/* 1 add'l char needed to follow \ */
1557c478bd9Sstevel@tonic-gate 			break;
1567c478bd9Sstevel@tonic-gate 		case ' ':
1577c478bd9Sstevel@tonic-gate 			break;
1587c478bd9Sstevel@tonic-gate 		default:
1597c478bd9Sstevel@tonic-gate 			if (c < '!' || c > '~')
1607c478bd9Sstevel@tonic-gate 				addl += 3; /* 3 add'l chars following \ */
1617c478bd9Sstevel@tonic-gate 		}
1627c478bd9Sstevel@tonic-gate 	}
1637c478bd9Sstevel@tonic-gate 
1647c478bd9Sstevel@tonic-gate 	if ((s2 = malloc(n + addl + 1)) == NULL)
1657c478bd9Sstevel@tonic-gate 		return (NULL);
1667c478bd9Sstevel@tonic-gate 
1677c478bd9Sstevel@tonic-gate 	for (p = s, q = s2; p < s + n; p++) {
1687c478bd9Sstevel@tonic-gate 		switch (c = *p) {
1697c478bd9Sstevel@tonic-gate 		case '\0':
1707c478bd9Sstevel@tonic-gate 			*q++ = '\\';
1717c478bd9Sstevel@tonic-gate 			*q++ = '0';
1727c478bd9Sstevel@tonic-gate 			break;
1737c478bd9Sstevel@tonic-gate 		case '\a':
1747c478bd9Sstevel@tonic-gate 			*q++ = '\\';
1757c478bd9Sstevel@tonic-gate 			*q++ = 'a';
1767c478bd9Sstevel@tonic-gate 			break;
1777c478bd9Sstevel@tonic-gate 		case '\b':
1787c478bd9Sstevel@tonic-gate 			*q++ = '\\';
1797c478bd9Sstevel@tonic-gate 			*q++ = 'b';
1807c478bd9Sstevel@tonic-gate 			break;
1817c478bd9Sstevel@tonic-gate 		case '\f':
1827c478bd9Sstevel@tonic-gate 			*q++ = '\\';
1837c478bd9Sstevel@tonic-gate 			*q++ = 'f';
1847c478bd9Sstevel@tonic-gate 			break;
1857c478bd9Sstevel@tonic-gate 		case '\n':
1867c478bd9Sstevel@tonic-gate 			*q++ = '\\';
1877c478bd9Sstevel@tonic-gate 			*q++ = 'n';
1887c478bd9Sstevel@tonic-gate 			break;
1897c478bd9Sstevel@tonic-gate 		case '\r':
1907c478bd9Sstevel@tonic-gate 			*q++ = '\\';
1917c478bd9Sstevel@tonic-gate 			*q++ = 'r';
1927c478bd9Sstevel@tonic-gate 			break;
1937c478bd9Sstevel@tonic-gate 		case '\t':
1947c478bd9Sstevel@tonic-gate 			*q++ = '\\';
1957c478bd9Sstevel@tonic-gate 			*q++ = 't';
1967c478bd9Sstevel@tonic-gate 			break;
1977c478bd9Sstevel@tonic-gate 		case '\v':
1987c478bd9Sstevel@tonic-gate 			*q++ = '\\';
1997c478bd9Sstevel@tonic-gate 			*q++ = 'v';
2007c478bd9Sstevel@tonic-gate 			break;
2017c478bd9Sstevel@tonic-gate 		case '"':
2027c478bd9Sstevel@tonic-gate 			*q++ = '\\';
2037c478bd9Sstevel@tonic-gate 			*q++ = '"';
2047c478bd9Sstevel@tonic-gate 			break;
2057c478bd9Sstevel@tonic-gate 		case '\\':
2067c478bd9Sstevel@tonic-gate 			*q++ = '\\';
2077c478bd9Sstevel@tonic-gate 			*q++ = '\\';
2087c478bd9Sstevel@tonic-gate 			break;
2097c478bd9Sstevel@tonic-gate 		case ' ':
2107c478bd9Sstevel@tonic-gate 			*q++ = c;
2117c478bd9Sstevel@tonic-gate 			break;
2127c478bd9Sstevel@tonic-gate 		default:
2137c478bd9Sstevel@tonic-gate 			if (c < '!' || c > '~') {
2147c478bd9Sstevel@tonic-gate 				*q++ = '\\';
2157c478bd9Sstevel@tonic-gate 				*q++ = ((c >> 6) & 3) + '0';
2167c478bd9Sstevel@tonic-gate 				*q++ = ((c >> 3) & 7) + '0';
2177c478bd9Sstevel@tonic-gate 				*q++ = (c & 7) + '0';
2187c478bd9Sstevel@tonic-gate 			} else
2197c478bd9Sstevel@tonic-gate 				*q++ = c;
2207c478bd9Sstevel@tonic-gate 		}
2217c478bd9Sstevel@tonic-gate 
2227c478bd9Sstevel@tonic-gate 		if (c == '\0')
2237c478bd9Sstevel@tonic-gate 			break; /* don't continue past \0 even if p < s + n */
2247c478bd9Sstevel@tonic-gate 	}
2257c478bd9Sstevel@tonic-gate 
2267c478bd9Sstevel@tonic-gate 	*q = '\0';
2277c478bd9Sstevel@tonic-gate 	return (s2);
2287c478bd9Sstevel@tonic-gate }
2297c478bd9Sstevel@tonic-gate 
2307c478bd9Sstevel@tonic-gate /*
2317c478bd9Sstevel@tonic-gate  * Return the basename (name after final /) of the given string.  We use
2327c478bd9Sstevel@tonic-gate  * strbasename rather than basename to avoid conflicting with libgen.h's
2337c478bd9Sstevel@tonic-gate  * non-const function prototype.
2347c478bd9Sstevel@tonic-gate  */
2357c478bd9Sstevel@tonic-gate const char *
strbasename(const char * s)2367c478bd9Sstevel@tonic-gate strbasename(const char *s)
2377c478bd9Sstevel@tonic-gate {
2387c478bd9Sstevel@tonic-gate 	const char *p = strrchr(s, '/');
2397c478bd9Sstevel@tonic-gate 
2407c478bd9Sstevel@tonic-gate 	if (p == NULL)
2417c478bd9Sstevel@tonic-gate 		return (s);
2427c478bd9Sstevel@tonic-gate 
2437c478bd9Sstevel@tonic-gate 	return (++p);
2447c478bd9Sstevel@tonic-gate }
2457c478bd9Sstevel@tonic-gate 
2467c478bd9Sstevel@tonic-gate /*
2477c478bd9Sstevel@tonic-gate  * This function tests a string against the regular expression used for idents
2487c478bd9Sstevel@tonic-gate  * and integers in the D lexer, and should match the superset of RGX_IDENT and
2497c478bd9Sstevel@tonic-gate  * RGX_INT in dt_lex.l.  If an invalid character is found, the function returns
2507c478bd9Sstevel@tonic-gate  * a pointer to it.  Otherwise NULL is returned for a valid string.
2517c478bd9Sstevel@tonic-gate  */
2527c478bd9Sstevel@tonic-gate const char *
strbadidnum(const char * s)2537c478bd9Sstevel@tonic-gate strbadidnum(const char *s)
2547c478bd9Sstevel@tonic-gate {
2557c478bd9Sstevel@tonic-gate 	char *p;
2567c478bd9Sstevel@tonic-gate 	int c;
2577c478bd9Sstevel@tonic-gate 
2587c478bd9Sstevel@tonic-gate 	if (*s == '\0')
2597c478bd9Sstevel@tonic-gate 		return (s);
2607c478bd9Sstevel@tonic-gate 
2617c478bd9Sstevel@tonic-gate 	errno = 0;
2627c478bd9Sstevel@tonic-gate 	(void) strtoull(s, &p, 0);
2637c478bd9Sstevel@tonic-gate 
2647c478bd9Sstevel@tonic-gate 	if (errno == 0 && *p == '\0')
2657c478bd9Sstevel@tonic-gate 		return (NULL); /* matches RGX_INT */
2667c478bd9Sstevel@tonic-gate 
2677c478bd9Sstevel@tonic-gate 	while ((c = *s++) != '\0') {
2687c478bd9Sstevel@tonic-gate 		if (isalnum(c) == 0 && c != '_' && c != '`')
2697c478bd9Sstevel@tonic-gate 			return (s - 1);
2707c478bd9Sstevel@tonic-gate 	}
2717c478bd9Sstevel@tonic-gate 
2727c478bd9Sstevel@tonic-gate 	return (NULL); /* matches RGX_IDENT */
2737c478bd9Sstevel@tonic-gate }
2747c478bd9Sstevel@tonic-gate 
2757c478bd9Sstevel@tonic-gate /*
2767c478bd9Sstevel@tonic-gate  * Determine whether the string contains a glob matching pattern or is just a
2777c478bd9Sstevel@tonic-gate  * simple string.  See gmatch(3GEN) and sh(1) for the glob syntax definition.
2787c478bd9Sstevel@tonic-gate  */
2797c478bd9Sstevel@tonic-gate int
strisglob(const char * s)2807c478bd9Sstevel@tonic-gate strisglob(const char *s)
2817c478bd9Sstevel@tonic-gate {
2827c478bd9Sstevel@tonic-gate 	char c;
2837c478bd9Sstevel@tonic-gate 
2847c478bd9Sstevel@tonic-gate 	while ((c = *s++) != '\0') {
2857c478bd9Sstevel@tonic-gate 		if (c == '[' || c == '?' || c == '*' || c == '\\')
2867c478bd9Sstevel@tonic-gate 			return (1);
2877c478bd9Sstevel@tonic-gate 	}
2887c478bd9Sstevel@tonic-gate 
2897c478bd9Sstevel@tonic-gate 	return (0);
2907c478bd9Sstevel@tonic-gate }
2917c478bd9Sstevel@tonic-gate 
2927c478bd9Sstevel@tonic-gate /*
2937c478bd9Sstevel@tonic-gate  * Hyphenate a string in-place by converting any instances of "__" to "-",
2947c478bd9Sstevel@tonic-gate  * which we use for probe names to improve readability, and return the string.
2957c478bd9Sstevel@tonic-gate  */
2967c478bd9Sstevel@tonic-gate char *
strhyphenate(char * s)2977c478bd9Sstevel@tonic-gate strhyphenate(char *s)
2987c478bd9Sstevel@tonic-gate {
2997c478bd9Sstevel@tonic-gate 	char *p, *q;
3007c478bd9Sstevel@tonic-gate 
3017c478bd9Sstevel@tonic-gate 	for (p = s, q = p + strlen(p); p < q; p++) {
3027c478bd9Sstevel@tonic-gate 		if (p[0] == '_' && p[1] == '_') {
3037c478bd9Sstevel@tonic-gate 			p[0] = '-';
3047c478bd9Sstevel@tonic-gate 			bcopy(p + 2, p + 1, (size_t)(q - p) - 1);
3057c478bd9Sstevel@tonic-gate 		}
3067c478bd9Sstevel@tonic-gate 	}
3077c478bd9Sstevel@tonic-gate 
3087c478bd9Sstevel@tonic-gate 	return (s);
3097c478bd9Sstevel@tonic-gate }
310