xref: /illumos-gate/usr/src/common/util/string.c (revision e688a0bc223983e7c7fedad44c92b8d0292a10f7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Implementations of the functions described in vsnprintf(3C) and string(3C),
30  * for use by the kernel, the standalone, and kmdb.  Unless otherwise specified,
31  * these functions match the section 3C manpages.
32  */
33 
34 #include <sys/types.h>
35 #include <sys/varargs.h>
36 
37 #if defined(_KERNEL)
38 #include <sys/systm.h>
39 #include <sys/debug.h>
40 #elif !defined(_BOOT)
41 #include <string.h>
42 #endif
43 
44 #ifndef	NULL
45 #define	NULL	0l
46 #endif
47 
48 #include "memcpy.h"
49 #include "string.h"
50 
51 /*
52  * We don't need these for x86 boot or kmdb.
53  */
54 #if !defined(_KMDB) && (!defined(_BOOT) || defined(__sparc))
55 
56 #define	ADDCHAR(c)	if (bufp++ - buf < buflen) bufp[-1] = (c)
57 
58 /*
59  * Given a buffer 'buf' of size 'buflen', render as much of the string
60  * described by <fmt, args> as possible.  The string will always be
61  * null-terminated, so the maximum string length is 'buflen - 1'.
62  * Returns the number of bytes that would be necessary to render the
63  * entire string, not including null terminator (just like vsnprintf(3S)).
64  * To determine buffer size in advance, use vsnprintf(NULL, 0, fmt, args) + 1.
65  *
66  * There is no support for floating point, and the C locale is assumed.
67  */
68 size_t
69 vsnprintf(char *buf, size_t buflen, const char *fmt, va_list aargs)
70 {
71 	uint64_t ul, tmp;
72 	char *bufp = buf;	/* current buffer pointer */
73 	int pad, width, base, sign, c, num;
74 	int prec, h_count, l_count, dot_count;
75 	int pad_count, transfer_count, left_align;
76 	char *digits, *sp, *bs;
77 	char numbuf[65];	/* sufficient for a 64-bit binary value */
78 	va_list args;
79 
80 	/*
81 	 * Make a copy so that all our callers don't have to make a copy
82 	 */
83 	va_copy(args, aargs);
84 
85 	if ((ssize_t)buflen < 0)
86 		buflen = 0;
87 
88 	while ((c = *fmt++) != '\0') {
89 		if (c != '%') {
90 			ADDCHAR(c);
91 			continue;
92 		}
93 
94 		width = prec = 0;
95 		left_align = base = sign = 0;
96 		h_count = l_count = dot_count = 0;
97 		pad = ' ';
98 		digits = "0123456789abcdef";
99 next_fmt:
100 		if ((c = *fmt++) == '\0')
101 			break;
102 
103 		if (c >= 'A' && c <= 'Z') {
104 			c += 'a' - 'A';
105 			digits = "0123456789ABCDEF";
106 		}
107 
108 		switch (c) {
109 		case '-':
110 			left_align++;
111 			goto next_fmt;
112 		case '0':
113 			if (dot_count == 0)
114 				pad = '0';
115 			/*FALLTHROUGH*/
116 		case '1':
117 		case '2':
118 		case '3':
119 		case '4':
120 		case '5':
121 		case '6':
122 		case '7':
123 		case '8':
124 		case '9':
125 			num = 0;
126 			for (;;) {
127 				num = 10 * num + c - '0';
128 				c = *fmt;
129 				if (c < '0' || c > '9')
130 					break;
131 				else
132 					fmt++;
133 			}
134 			if (dot_count > 0)
135 				prec = num;
136 			else
137 				width = num;
138 
139 			goto next_fmt;
140 		case '.':
141 			dot_count++;
142 			goto next_fmt;
143 		case '*':
144 			if (dot_count > 0)
145 				prec = (int)va_arg(args, int);
146 			else
147 				width = (int)va_arg(args, int);
148 			goto next_fmt;
149 		case 'l':
150 			l_count++;
151 			goto next_fmt;
152 		case 'h':
153 			h_count++;
154 			goto next_fmt;
155 		case 'd':
156 			sign = 1;
157 			/*FALLTHROUGH*/
158 		case 'u':
159 			base = 10;
160 			break;
161 		case 'p':
162 			l_count = 1;
163 			/*FALLTHROUGH*/
164 		case 'x':
165 			base = 16;
166 			break;
167 		case 'o':
168 			base = 8;
169 			break;
170 		case 'b':
171 			l_count = 0;
172 			base = 1;
173 			break;
174 		case 'c':
175 			c = (char)va_arg(args, int);
176 			ADDCHAR(c);
177 			break;
178 		case 's':
179 			sp = va_arg(args, char *);
180 			if (sp == NULL) {
181 				sp = "<null string>";
182 				/* avoid truncation */
183 				prec = strlen(sp);
184 			}
185 			/*
186 			 * Handle simple case specially to avoid
187 			 * performance hit of strlen()
188 			 */
189 			if (prec == 0 && width == 0) {
190 				while ((c = *sp++) != 0)
191 					ADDCHAR(c);
192 				break;
193 			}
194 			if (prec > 0) {
195 				transfer_count = strnlen(sp, prec);
196 				/* widen field if too narrow */
197 				if (prec > width)
198 					width = prec;
199 			} else
200 				transfer_count = strlen(sp);
201 			if (width > transfer_count)
202 				pad_count = width - transfer_count;
203 			else
204 				pad_count = 0;
205 			while ((!left_align) && (pad_count-- > 0))
206 				ADDCHAR(' ');
207 			/* ADDCHAR() evaluates arg at most once */
208 			while (transfer_count-- > 0)
209 				ADDCHAR(*sp++);
210 			while ((left_align) && (pad_count-- > 0))
211 				ADDCHAR(' ');
212 			break;
213 		case '%':
214 			ADDCHAR('%');
215 			break;
216 		}
217 
218 		if (base == 0)
219 			continue;
220 
221 		if (h_count == 0 && l_count == 0)
222 			if (sign)
223 				ul = (int64_t)va_arg(args, int);
224 			else
225 				ul = (int64_t)va_arg(args, unsigned int);
226 		else if (l_count > 1)
227 			if (sign)
228 				ul = (int64_t)va_arg(args, int64_t);
229 			else
230 				ul = (int64_t)va_arg(args, uint64_t);
231 		else if (l_count > 0)
232 			if (sign)
233 				ul = (int64_t)va_arg(args, long);
234 			else
235 				ul = (int64_t)va_arg(args, unsigned long);
236 		else if (h_count > 1)
237 			if (sign)
238 				ul = (int64_t)((char)va_arg(args, int));
239 			else
240 				ul = (int64_t)((unsigned char)va_arg(args,
241 				    int));
242 		else if (h_count > 0)
243 			if (sign)
244 				ul = (int64_t)((short)va_arg(args, int));
245 			else
246 				ul = (int64_t)((unsigned short)va_arg(args,
247 				    int));
248 
249 		if (sign && (int64_t)ul < 0)
250 			ul = -ul;
251 		else
252 			sign = 0;
253 
254 		if (c == 'b') {
255 			bs = va_arg(args, char *);
256 			base = *bs++;
257 		}
258 
259 		/* avoid repeated division if width is 0 */
260 		if (width > 0) {
261 			tmp = ul;
262 			do {
263 				width--;
264 			} while ((tmp /= base) != 0);
265 		}
266 
267 		if (sign && pad == '0')
268 			ADDCHAR('-');
269 		while (width-- > sign)
270 			ADDCHAR(pad);
271 		if (sign && pad == ' ')
272 			ADDCHAR('-');
273 
274 		sp = numbuf;
275 		tmp = ul;
276 		do {
277 			*sp++ = digits[tmp % base];
278 		} while ((tmp /= base) != 0);
279 
280 		while (sp > numbuf) {
281 			sp--;
282 			ADDCHAR(*sp);
283 		}
284 
285 		if (c == 'b' && ul != 0) {
286 			int any = 0;
287 			c = *bs++;
288 			while (c != 0) {
289 				if (ul & (1 << (c - 1))) {
290 					if (any++ == 0)
291 						ADDCHAR('<');
292 					while ((c = *bs++) >= 32)
293 						ADDCHAR(c);
294 					ADDCHAR(',');
295 				} else {
296 					while ((c = *bs++) >= 32)
297 						continue;
298 				}
299 			}
300 			if (any) {
301 				bufp--;
302 				ADDCHAR('>');
303 			}
304 		}
305 	}
306 	if (bufp - buf < buflen)
307 		bufp[0] = c;
308 	else if (buflen != 0)
309 		buf[buflen - 1] = c;
310 
311 	va_end(args);
312 
313 	return (bufp - buf);
314 }
315 
316 /*PRINTFLIKE1*/
317 size_t
318 snprintf(char *buf, size_t buflen, const char *fmt, ...)
319 {
320 	va_list args;
321 
322 	va_start(args, fmt);
323 	buflen = vsnprintf(buf, buflen, fmt, args);
324 	va_end(args);
325 
326 	return (buflen);
327 }
328 
329 #if defined(_BOOT) && defined(__sparc)
330 /*
331  * The sprintf() and vsprintf() routines aren't shared with the kernel because
332  * the DDI mandates that they return the buffer rather than its length.
333  */
334 /*PRINTFLIKE2*/
335 int
336 sprintf(char *buf, const char *fmt, ...)
337 {
338 	va_list args;
339 
340 	va_start(args, fmt);
341 	(void) vsnprintf(buf, INT_MAX, fmt, args);
342 	va_end(args);
343 
344 	return (strlen(buf));
345 }
346 
347 int
348 vsprintf(char *buf, const char *fmt, va_list args)
349 {
350 	(void) vsnprintf(buf, INT_MAX, fmt, args);
351 	return (strlen(buf));
352 }
353 #endif /* _BOOT && __sparc */
354 
355 #endif /* !_KMDB && (!_BOOT || __sparc) */
356 
357 char *
358 strcat(char *s1, const char *s2)
359 {
360 	char *os1 = s1;
361 
362 	while (*s1++ != '\0')
363 		;
364 	s1--;
365 	while ((*s1++ = *s2++) != '\0')
366 		;
367 	return (os1);
368 }
369 
370 char *
371 strchr(const char *sp, int c)
372 {
373 	do {
374 		if (*sp == (char)c)
375 			return ((char *)sp);
376 	} while (*sp++);
377 	return (NULL);
378 }
379 
380 int
381 strcmp(const char *s1, const char *s2)
382 {
383 	while (*s1 == *s2++)
384 		if (*s1++ == '\0')
385 			return (0);
386 	return (*(unsigned char *)s1 - *(unsigned char *)--s2);
387 }
388 
389 int
390 strncmp(const char *s1, const char *s2, size_t n)
391 {
392 	if (s1 == s2)
393 		return (0);
394 	n++;
395 	while (--n != 0 && *s1 == *s2++)
396 		if (*s1++ == '\0')
397 			return (0);
398 	return ((n == 0) ? 0 : *(unsigned char *)s1 - *(unsigned char *)--s2);
399 }
400 
401 static const char charmap[] = {
402 	'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
403 	'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
404 	'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
405 	'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
406 	'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
407 	'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
408 	'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
409 	'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
410 	'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
411 	'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
412 	'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
413 	'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
414 	'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
415 	'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
416 	'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
417 	'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
418 	'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
419 	'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
420 	'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
421 	'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
422 	'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
423 	'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
424 	'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
425 	'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
426 	'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
427 	'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
428 	'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
429 	'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
430 	'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
431 	'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
432 	'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
433 	'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
434 };
435 
436 int
437 strcasecmp(const char *s1, const char *s2)
438 {
439 	const unsigned char *cm = (const unsigned char *)charmap;
440 	const unsigned char *us1 = (const unsigned char *)s1;
441 	const unsigned char *us2 = (const unsigned char *)s2;
442 
443 	while (cm[*us1] == cm[*us2++])
444 		if (*us1++ == '\0')
445 			return (0);
446 	return (cm[*us1] - cm[*(us2 - 1)]);
447 }
448 
449 int
450 strncasecmp(const char *s1, const char *s2, size_t n)
451 {
452 	const unsigned char *cm = (const unsigned char *)charmap;
453 	const unsigned char *us1 = (const unsigned char *)s1;
454 	const unsigned char *us2 = (const unsigned char *)s2;
455 
456 	while (n != 0 && cm[*us1] == cm[*us2++]) {
457 		if (*us1++ == '\0')
458 			return (0);
459 		n--;
460 	}
461 	return (n == 0 ? 0 : cm[*us1] - cm[*(us2 - 1)]);
462 }
463 
464 char *
465 strcpy(char *s1, const char *s2)
466 {
467 	char *os1 = s1;
468 
469 	while ((*s1++ = *s2++) != '\0')
470 		;
471 	return (os1);
472 }
473 
474 char *
475 strncpy(char *s1, const char *s2, size_t n)
476 {
477 	char *os1 = s1;
478 
479 	n++;
480 	while (--n != 0 && (*s1++ = *s2++) != '\0')
481 		;
482 	if (n != 0)
483 		while (--n != 0)
484 			*s1++ = '\0';
485 	return (os1);
486 }
487 
488 char *
489 strrchr(const char *sp, int c)
490 {
491 	char *r = NULL;
492 
493 	do {
494 		if (*sp == (char)c)
495 			r = (char *)sp;
496 	} while (*sp++);
497 
498 	return (r);
499 }
500 
501 char *
502 strstr(const char *as1, const char *as2)
503 {
504 	const char *s1, *s2;
505 	const char *tptr;
506 	char c;
507 
508 	s1 = as1;
509 	s2 = as2;
510 
511 	if (s2 == NULL || *s2 == '\0')
512 		return ((char *)s1);
513 	c = *s2;
514 
515 	while (*s1)
516 		if (*s1++ == c) {
517 			tptr = s1;
518 			while ((c = *++s2) == *s1++ && c)
519 				;
520 			if (c == 0)
521 				return ((char *)tptr - 1);
522 			s1 = tptr;
523 			s2 = as2;
524 			c = *s2;
525 		}
526 
527 	return (NULL);
528 }
529 
530 char *
531 strpbrk(const char *string, const char *brkset)
532 {
533 	const char *p;
534 
535 	do {
536 		for (p = brkset; *p != '\0' && *p != *string; ++p)
537 			;
538 		if (*p != '\0')
539 			return ((char *)string);
540 	} while (*string++);
541 
542 	return (NULL);
543 }
544 
545 char *
546 strncat(char *s1, const char *s2, size_t n)
547 {
548 	char *os1 = s1;
549 
550 	n++;
551 	while (*s1++ != '\0')
552 		;
553 	--s1;
554 	while ((*s1++ = *s2++) != '\0') {
555 		if (--n == 0) {
556 			s1[-1] = '\0';
557 			break;
558 		}
559 	}
560 	return (os1);
561 }
562 
563 #if defined(_BOOT) || defined(_KMDB)
564 #define	bcopy(src, dst, n)	(void) memcpy((dst), (src), (n))
565 #endif
566 
567 size_t
568 strlcat(char *dst, const char *src, size_t dstsize)
569 {
570 	char *df = dst;
571 	size_t left = dstsize;
572 	size_t l1;
573 	size_t l2 = strlen(src);
574 	size_t copied;
575 
576 	while (left-- != 0 && *df != '\0')
577 		df++;
578 	/*LINTED: possible ptrdiff_t overflow*/
579 	l1 = (size_t)(df - dst);
580 	if (dstsize == l1)
581 		return (l1 + l2);
582 
583 	copied = l1 + l2 >= dstsize ? dstsize - l1 - 1 : l2;
584 	bcopy(src, dst + l1, copied);
585 	dst[l1+copied] = '\0';
586 	return (l1 + l2);
587 }
588 
589 size_t
590 strlcpy(char *dst, const char *src, size_t len)
591 {
592 	size_t slen = strlen(src);
593 	size_t copied;
594 
595 	if (len == 0)
596 		return (slen);
597 
598 	if (slen >= len)
599 		copied = len - 1;
600 	else
601 		copied = slen;
602 	bcopy(src, dst, copied);
603 	dst[copied] = '\0';
604 	return (slen);
605 }
606 
607 size_t
608 strspn(const char *string, const char *charset)
609 {
610 	const char *p, *q;
611 
612 	for (q = string; *q != '\0'; ++q) {
613 		for (p = charset; *p != '\0' && *p != *q; ++p)
614 			;
615 		if (*p == '\0')
616 			break;
617 	}
618 
619 	/*LINTED: possible ptrdiff_t overflow*/
620 	return ((size_t)(q - string));
621 }
622 
623 size_t
624 strcspn(const char *string, const char *charset)
625 {
626 	const char *p, *q;
627 
628 	for (q = string; *q != '\0'; ++q) {
629 		for (p = charset; *p != '\0' && *p != *q; ++p)
630 			;
631 		if (*p != '\0')
632 			break;
633 	}
634 
635 	/*LINTED E_PTRDIFF_OVERFLOW*/
636 	return ((size_t)(q - string));
637 }
638 
639 /*
640  * strsep
641  *
642  * The strsep() function locates, in the string referenced by *stringp, the
643  * first occurrence of any character in the string delim (or the terminating
644  * `\0' character) and replaces it with a `\0'.  The location of the next
645  * character after the delimiter character (or NULL, if the end of the
646  * string was reached) is stored in *stringp.  The original value of
647  * *stringp is returned.
648  *
649  * If *stringp is initially NULL, strsep() returns NULL.
650  */
651 char *
652 strsep(char **stringp, const char *delim)
653 {
654 	char *s;
655 	const char *spanp;
656 	int c, sc;
657 	char *tok;
658 
659 	if ((s = *stringp) == NULL)
660 		return (NULL);
661 
662 	for (tok = s; ; ) {
663 		c = *s++;
664 		spanp = delim;
665 		do {
666 			if ((sc = *spanp++) == c) {
667 				if (c == 0)
668 					s = NULL;
669 				else
670 					s[-1] = 0;
671 				*stringp = s;
672 				return (tok);
673 			}
674 		} while (sc != 0);
675 	}
676 	/* NOTREACHED */
677 }
678 
679 /*
680  * Unless mentioned otherwise, all of the routines below should be added to
681  * the Solaris DDI as necessary.  For now, only provide them to standalone.
682  */
683 #if defined(_BOOT) || defined(_KMDB)
684 char *
685 strtok(char *string, const char *sepset)
686 {
687 	char		*p, *q, *r;
688 	static char	*savept;
689 
690 	/*
691 	 * Set `p' to our current location in the string.
692 	 */
693 	p = (string == NULL) ? savept : string;
694 	if (p == NULL)
695 		return (NULL);
696 
697 	/*
698 	 * Skip leading separators; bail if no tokens remain.
699 	 */
700 	q = p + strspn(p, sepset);
701 	if (*q == '\0')
702 		return (NULL);
703 
704 	/*
705 	 * Mark the end of the token and set `savept' for the next iteration.
706 	 */
707 	if ((r = strpbrk(q, sepset)) == NULL)
708 		savept = NULL;
709 	else {
710 		*r = '\0';
711 		savept = ++r;
712 	}
713 
714 	return (q);
715 }
716 
717 /*
718  * The strlen() routine isn't shared with the kernel because it has its own
719  * hand-tuned assembly version.
720  */
721 size_t
722 strlen(const char *s)
723 {
724 	size_t n = 0;
725 
726 	while (*s++)
727 		n++;
728 	return (n);
729 }
730 
731 #endif /* _BOOT || _KMDB */
732 
733 /*
734  * Returns the number of non-NULL bytes in string argument,
735  * but not more than maxlen.  Does not look past str + maxlen.
736  */
737 size_t
738 strnlen(const char *s, size_t maxlen)
739 {
740 	size_t n = 0;
741 
742 	while (maxlen != 0 && *s != 0) {
743 		s++;
744 		maxlen--;
745 		n++;
746 	}
747 
748 	return (n);
749 }
750 
751 
752 #ifdef _KERNEL
753 /*
754  * Check for a valid C identifier:
755  *	a letter or underscore, followed by
756  *	zero or more letters, digits and underscores.
757  */
758 
759 #define	IS_DIGIT(c)	((c) >= '0' && (c) <= '9')
760 
761 #define	IS_ALPHA(c)	\
762 	(((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
763 
764 int
765 strident_valid(const char *id)
766 {
767 	int c = *id++;
768 
769 	if (!IS_ALPHA(c) && c != '_')
770 		return (0);
771 	while ((c = *id++) != 0) {
772 		if (!IS_ALPHA(c) && !IS_DIGIT(c) && c != '_')
773 			return (0);
774 	}
775 	return (1);
776 }
777 
778 /*
779  * Convert a string into a valid C identifier by replacing invalid
780  * characters with '_'.  Also makes sure the string is nul-terminated
781  * and takes up at most n bytes.
782  */
783 void
784 strident_canon(char *s, size_t n)
785 {
786 	char c;
787 	char *end = s + n - 1;
788 
789 	ASSERT(n > 0);
790 
791 	if ((c = *s) == 0)
792 		return;
793 
794 	if (!IS_ALPHA(c) && c != '_')
795 		*s = '_';
796 
797 	while (s < end && ((c = *(++s)) != 0)) {
798 		if (!IS_ALPHA(c) && !IS_DIGIT(c) && c != '_')
799 			*s = '_';
800 	}
801 	*s = 0;
802 }
803 
804 #endif	/* _KERNEL */
805