xref: /illumos-gate/usr/src/common/util/string.c (revision 628e3cbed6489fa1db545d8524a06cd6535af456)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Implementations of the functions described in vsnprintf(3C) and string(3C),
28  * for use by the kernel, the standalone, and kmdb.  Unless otherwise specified,
29  * these functions match the section 3C manpages.
30  */
31 
32 #include <sys/types.h>
33 #include <sys/varargs.h>
34 
35 #if defined(_KERNEL)
36 #include <sys/systm.h>
37 #include <sys/debug.h>
38 #elif !defined(_BOOT)
39 #include <string.h>
40 #endif
41 
42 #ifndef	NULL
43 #define	NULL	0l
44 #endif
45 
46 #include "memcpy.h"
47 #include "string.h"
48 
49 /*
50  * We don't need these for x86 boot or kmdb.
51  */
52 #if !defined(_KMDB) && (!defined(_BOOT) || defined(__sparc))
53 
54 #define	ADDCHAR(c)	if (bufp++ - buf < buflen) bufp[-1] = (c)
55 
56 /*
57  * Given a buffer 'buf' of size 'buflen', render as much of the string
58  * described by <fmt, args> as possible.  The string will always be
59  * null-terminated, so the maximum string length is 'buflen - 1'.
60  * Returns the number of bytes that would be necessary to render the
61  * entire string, not including null terminator (just like vsnprintf(3S)).
62  * To determine buffer size in advance, use vsnprintf(NULL, 0, fmt, args) + 1.
63  *
64  * There is no support for floating point, and the C locale is assumed.
65  */
66 size_t
67 vsnprintf(char *buf, size_t buflen, const char *fmt, va_list aargs)
68 {
69 	uint64_t ul, tmp;
70 	char *bufp = buf;	/* current buffer pointer */
71 	int pad, width, base, sign, c, num;
72 	int prec, h_count, l_count, dot_count;
73 	int pad_count, transfer_count, left_align;
74 	char *digits, *sp, *bs;
75 	char numbuf[65];	/* sufficient for a 64-bit binary value */
76 	va_list args;
77 
78 	/*
79 	 * Make a copy so that all our callers don't have to make a copy
80 	 */
81 	va_copy(args, aargs);
82 
83 	if ((ssize_t)buflen < 0)
84 		buflen = 0;
85 
86 	while ((c = *fmt++) != '\0') {
87 		if (c != '%') {
88 			ADDCHAR(c);
89 			continue;
90 		}
91 
92 		width = prec = 0;
93 		left_align = base = sign = 0;
94 		h_count = l_count = dot_count = 0;
95 		pad = ' ';
96 		digits = "0123456789abcdef";
97 next_fmt:
98 		if ((c = *fmt++) == '\0')
99 			break;
100 
101 		if (c >= 'A' && c <= 'Z') {
102 			c += 'a' - 'A';
103 			digits = "0123456789ABCDEF";
104 		}
105 
106 		switch (c) {
107 		case '-':
108 			left_align++;
109 			goto next_fmt;
110 		case '0':
111 			if (dot_count == 0)
112 				pad = '0';
113 			/*FALLTHROUGH*/
114 		case '1':
115 		case '2':
116 		case '3':
117 		case '4':
118 		case '5':
119 		case '6':
120 		case '7':
121 		case '8':
122 		case '9':
123 			num = 0;
124 			for (;;) {
125 				num = 10 * num + c - '0';
126 				c = *fmt;
127 				if (c < '0' || c > '9')
128 					break;
129 				else
130 					fmt++;
131 			}
132 			if (dot_count > 0)
133 				prec = num;
134 			else
135 				width = num;
136 
137 			goto next_fmt;
138 		case '.':
139 			dot_count++;
140 			goto next_fmt;
141 		case '*':
142 			if (dot_count > 0)
143 				prec = (int)va_arg(args, int);
144 			else
145 				width = (int)va_arg(args, int);
146 			goto next_fmt;
147 		case 'l':
148 			l_count++;
149 			goto next_fmt;
150 		case 'h':
151 			h_count++;
152 			goto next_fmt;
153 		case 'd':
154 			sign = 1;
155 			/*FALLTHROUGH*/
156 		case 'u':
157 			base = 10;
158 			break;
159 		case 'p':
160 			l_count = 1;
161 			/*FALLTHROUGH*/
162 		case 'x':
163 			base = 16;
164 			break;
165 		case 'o':
166 			base = 8;
167 			break;
168 		case 'b':
169 			l_count = 0;
170 			base = 1;
171 			break;
172 		case 'c':
173 			c = (char)va_arg(args, int);
174 			ADDCHAR(c);
175 			break;
176 		case 's':
177 			sp = va_arg(args, char *);
178 			if (sp == NULL) {
179 				sp = "<null string>";
180 				/* avoid truncation */
181 				prec = strlen(sp);
182 			}
183 			/*
184 			 * Handle simple case specially to avoid
185 			 * performance hit of strlen()
186 			 */
187 			if (prec == 0 && width == 0) {
188 				while ((c = *sp++) != 0)
189 					ADDCHAR(c);
190 				break;
191 			}
192 			if (prec > 0) {
193 				transfer_count = strnlen(sp, prec);
194 				/* widen field if too narrow */
195 				if (prec > width)
196 					width = prec;
197 			} else
198 				transfer_count = strlen(sp);
199 			if (width > transfer_count)
200 				pad_count = width - transfer_count;
201 			else
202 				pad_count = 0;
203 			while ((!left_align) && (pad_count-- > 0))
204 				ADDCHAR(' ');
205 			/* ADDCHAR() evaluates arg at most once */
206 			while (transfer_count-- > 0)
207 				ADDCHAR(*sp++);
208 			while ((left_align) && (pad_count-- > 0))
209 				ADDCHAR(' ');
210 			break;
211 		case '%':
212 			ADDCHAR('%');
213 			break;
214 		}
215 
216 		if (base == 0)
217 			continue;
218 
219 		if (h_count == 0 && l_count == 0)
220 			if (sign)
221 				ul = (int64_t)va_arg(args, int);
222 			else
223 				ul = (int64_t)va_arg(args, unsigned int);
224 		else if (l_count > 1)
225 			if (sign)
226 				ul = (int64_t)va_arg(args, int64_t);
227 			else
228 				ul = (int64_t)va_arg(args, uint64_t);
229 		else if (l_count > 0)
230 			if (sign)
231 				ul = (int64_t)va_arg(args, long);
232 			else
233 				ul = (int64_t)va_arg(args, unsigned long);
234 		else if (h_count > 1)
235 			if (sign)
236 				ul = (int64_t)((char)va_arg(args, int));
237 			else
238 				ul = (int64_t)((unsigned char)va_arg(args,
239 				    int));
240 		else if (h_count > 0)
241 			if (sign)
242 				ul = (int64_t)((short)va_arg(args, int));
243 			else
244 				ul = (int64_t)((unsigned short)va_arg(args,
245 				    int));
246 
247 		if (sign && (int64_t)ul < 0)
248 			ul = -ul;
249 		else
250 			sign = 0;
251 
252 		if (c == 'b') {
253 			bs = va_arg(args, char *);
254 			base = *bs++;
255 		}
256 
257 		/* avoid repeated division if width is 0 */
258 		if (width > 0) {
259 			tmp = ul;
260 			do {
261 				width--;
262 			} while ((tmp /= base) != 0);
263 		}
264 
265 		if (sign && pad == '0')
266 			ADDCHAR('-');
267 		while (width-- > sign)
268 			ADDCHAR(pad);
269 		if (sign && pad == ' ')
270 			ADDCHAR('-');
271 
272 		sp = numbuf;
273 		tmp = ul;
274 		do {
275 			*sp++ = digits[tmp % base];
276 		} while ((tmp /= base) != 0);
277 
278 		while (sp > numbuf) {
279 			sp--;
280 			ADDCHAR(*sp);
281 		}
282 
283 		if (c == 'b' && ul != 0) {
284 			int any = 0;
285 			c = *bs++;
286 			while (c != 0) {
287 				if (ul & (1 << (c - 1))) {
288 					if (any++ == 0)
289 						ADDCHAR('<');
290 					while ((c = *bs++) >= 32)
291 						ADDCHAR(c);
292 					ADDCHAR(',');
293 				} else {
294 					while ((c = *bs++) >= 32)
295 						continue;
296 				}
297 			}
298 			if (any) {
299 				bufp--;
300 				ADDCHAR('>');
301 			}
302 		}
303 	}
304 	if (bufp - buf < buflen)
305 		bufp[0] = c;
306 	else if (buflen != 0)
307 		buf[buflen - 1] = c;
308 
309 	va_end(args);
310 
311 	return (bufp - buf);
312 }
313 
314 /*PRINTFLIKE1*/
315 size_t
316 snprintf(char *buf, size_t buflen, const char *fmt, ...)
317 {
318 	va_list args;
319 
320 	va_start(args, fmt);
321 	buflen = vsnprintf(buf, buflen, fmt, args);
322 	va_end(args);
323 
324 	return (buflen);
325 }
326 
327 #if defined(_BOOT) && defined(__sparc)
328 /*
329  * The sprintf() and vsprintf() routines aren't shared with the kernel because
330  * the DDI mandates that they return the buffer rather than its length.
331  */
332 /*PRINTFLIKE2*/
333 int
334 sprintf(char *buf, const char *fmt, ...)
335 {
336 	va_list args;
337 
338 	va_start(args, fmt);
339 	(void) vsnprintf(buf, INT_MAX, fmt, args);
340 	va_end(args);
341 
342 	return (strlen(buf));
343 }
344 
345 int
346 vsprintf(char *buf, const char *fmt, va_list args)
347 {
348 	(void) vsnprintf(buf, INT_MAX, fmt, args);
349 	return (strlen(buf));
350 }
351 #endif /* _BOOT && __sparc */
352 
353 #endif /* !_KMDB && (!_BOOT || __sparc) */
354 
355 char *
356 strcat(char *s1, const char *s2)
357 {
358 	char *os1 = s1;
359 
360 	while (*s1++ != '\0')
361 		;
362 	s1--;
363 	while ((*s1++ = *s2++) != '\0')
364 		;
365 	return (os1);
366 }
367 
368 char *
369 strchr(const char *sp, int c)
370 {
371 	do {
372 		if (*sp == (char)c)
373 			return ((char *)sp);
374 	} while (*sp++);
375 	return (NULL);
376 }
377 
378 int
379 strcmp(const char *s1, const char *s2)
380 {
381 	while (*s1 == *s2++)
382 		if (*s1++ == '\0')
383 			return (0);
384 	return (*(unsigned char *)s1 - *(unsigned char *)--s2);
385 }
386 
387 int
388 strncmp(const char *s1, const char *s2, size_t n)
389 {
390 	if (s1 == s2)
391 		return (0);
392 	n++;
393 	while (--n != 0 && *s1 == *s2++)
394 		if (*s1++ == '\0')
395 			return (0);
396 	return ((n == 0) ? 0 : *(unsigned char *)s1 - *(unsigned char *)--s2);
397 }
398 
399 static const char charmap[] = {
400 	'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
401 	'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
402 	'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
403 	'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
404 	'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
405 	'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
406 	'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
407 	'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
408 	'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
409 	'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
410 	'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
411 	'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
412 	'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
413 	'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
414 	'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
415 	'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
416 	'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
417 	'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
418 	'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
419 	'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
420 	'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
421 	'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
422 	'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
423 	'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
424 	'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
425 	'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
426 	'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
427 	'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
428 	'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
429 	'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
430 	'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
431 	'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
432 };
433 
434 int
435 strcasecmp(const char *s1, const char *s2)
436 {
437 	const unsigned char *cm = (const unsigned char *)charmap;
438 	const unsigned char *us1 = (const unsigned char *)s1;
439 	const unsigned char *us2 = (const unsigned char *)s2;
440 
441 	while (cm[*us1] == cm[*us2++])
442 		if (*us1++ == '\0')
443 			return (0);
444 	return (cm[*us1] - cm[*(us2 - 1)]);
445 }
446 
447 int
448 strncasecmp(const char *s1, const char *s2, size_t n)
449 {
450 	const unsigned char *cm = (const unsigned char *)charmap;
451 	const unsigned char *us1 = (const unsigned char *)s1;
452 	const unsigned char *us2 = (const unsigned char *)s2;
453 
454 	while (n != 0 && cm[*us1] == cm[*us2++]) {
455 		if (*us1++ == '\0')
456 			return (0);
457 		n--;
458 	}
459 	return (n == 0 ? 0 : cm[*us1] - cm[*(us2 - 1)]);
460 }
461 
462 char *
463 strcpy(char *s1, const char *s2)
464 {
465 	char *os1 = s1;
466 
467 	while ((*s1++ = *s2++) != '\0')
468 		;
469 	return (os1);
470 }
471 
472 char *
473 strncpy(char *s1, const char *s2, size_t n)
474 {
475 	char *os1 = s1;
476 
477 	n++;
478 	while (--n != 0 && (*s1++ = *s2++) != '\0')
479 		;
480 	if (n != 0)
481 		while (--n != 0)
482 			*s1++ = '\0';
483 	return (os1);
484 }
485 
486 char *
487 strrchr(const char *sp, int c)
488 {
489 	char *r = NULL;
490 
491 	do {
492 		if (*sp == (char)c)
493 			r = (char *)sp;
494 	} while (*sp++);
495 
496 	return (r);
497 }
498 
499 char *
500 strstr(const char *as1, const char *as2)
501 {
502 	const char *s1, *s2;
503 	const char *tptr;
504 	char c;
505 
506 	s1 = as1;
507 	s2 = as2;
508 
509 	if (s2 == NULL || *s2 == '\0')
510 		return ((char *)s1);
511 	c = *s2;
512 
513 	while (*s1)
514 		if (*s1++ == c) {
515 			tptr = s1;
516 			while ((c = *++s2) == *s1++ && c)
517 				;
518 			if (c == 0)
519 				return ((char *)tptr - 1);
520 			s1 = tptr;
521 			s2 = as2;
522 			c = *s2;
523 		}
524 
525 	return (NULL);
526 }
527 
528 char *
529 strpbrk(const char *string, const char *brkset)
530 {
531 	const char *p;
532 
533 	do {
534 		for (p = brkset; *p != '\0' && *p != *string; ++p)
535 			;
536 		if (*p != '\0')
537 			return ((char *)string);
538 	} while (*string++);
539 
540 	return (NULL);
541 }
542 
543 char *
544 strncat(char *s1, const char *s2, size_t n)
545 {
546 	char *os1 = s1;
547 
548 	n++;
549 	while (*s1++ != '\0')
550 		;
551 	--s1;
552 	while ((*s1++ = *s2++) != '\0') {
553 		if (--n == 0) {
554 			s1[-1] = '\0';
555 			break;
556 		}
557 	}
558 	return (os1);
559 }
560 
561 #if defined(_BOOT) || defined(_KMDB)
562 #define	bcopy(src, dst, n)	(void) memcpy((dst), (src), (n))
563 #endif
564 
565 size_t
566 strlcat(char *dst, const char *src, size_t dstsize)
567 {
568 	char *df = dst;
569 	size_t left = dstsize;
570 	size_t l1;
571 	size_t l2 = strlen(src);
572 	size_t copied;
573 
574 	while (left-- != 0 && *df != '\0')
575 		df++;
576 	/*LINTED: possible ptrdiff_t overflow*/
577 	l1 = (size_t)(df - dst);
578 	if (dstsize == l1)
579 		return (l1 + l2);
580 
581 	copied = l1 + l2 >= dstsize ? dstsize - l1 - 1 : l2;
582 	bcopy(src, dst + l1, copied);
583 	dst[l1+copied] = '\0';
584 	return (l1 + l2);
585 }
586 
587 size_t
588 strlcpy(char *dst, const char *src, size_t len)
589 {
590 	size_t slen = strlen(src);
591 	size_t copied;
592 
593 	if (len == 0)
594 		return (slen);
595 
596 	if (slen >= len)
597 		copied = len - 1;
598 	else
599 		copied = slen;
600 	bcopy(src, dst, copied);
601 	dst[copied] = '\0';
602 	return (slen);
603 }
604 
605 size_t
606 strspn(const char *string, const char *charset)
607 {
608 	const char *p, *q;
609 
610 	for (q = string; *q != '\0'; ++q) {
611 		for (p = charset; *p != '\0' && *p != *q; ++p)
612 			;
613 		if (*p == '\0')
614 			break;
615 	}
616 
617 	/*LINTED: possible ptrdiff_t overflow*/
618 	return ((size_t)(q - string));
619 }
620 
621 size_t
622 strcspn(const char *string, const char *charset)
623 {
624 	const char *p, *q;
625 
626 	for (q = string; *q != '\0'; ++q) {
627 		for (p = charset; *p != '\0' && *p != *q; ++p)
628 			;
629 		if (*p != '\0')
630 			break;
631 	}
632 
633 	/*LINTED E_PTRDIFF_OVERFLOW*/
634 	return ((size_t)(q - string));
635 }
636 
637 /*
638  * strsep
639  *
640  * The strsep() function locates, in the string referenced by *stringp, the
641  * first occurrence of any character in the string delim (or the terminating
642  * `\0' character) and replaces it with a `\0'.  The location of the next
643  * character after the delimiter character (or NULL, if the end of the
644  * string was reached) is stored in *stringp.  The original value of
645  * *stringp is returned.
646  *
647  * If *stringp is initially NULL, strsep() returns NULL.
648  *
649  * NOTE: This instance is left for in-kernel use. Libraries and programs
650  *       should use strsep from libc.
651  */
652 char *
653 strsep(char **stringp, const char *delim)
654 {
655 	char *s;
656 	const char *spanp;
657 	int c, sc;
658 	char *tok;
659 
660 	if ((s = *stringp) == NULL)
661 		return (NULL);
662 
663 	for (tok = s; ; ) {
664 		c = *s++;
665 		spanp = delim;
666 		do {
667 			if ((sc = *spanp++) == c) {
668 				if (c == 0)
669 					s = NULL;
670 				else
671 					s[-1] = 0;
672 				*stringp = s;
673 				return (tok);
674 			}
675 		} while (sc != 0);
676 	}
677 	/* NOTREACHED */
678 }
679 
680 /*
681  * Unless mentioned otherwise, all of the routines below should be added to
682  * the Solaris DDI as necessary.  For now, only provide them to standalone.
683  */
684 #if defined(_BOOT) || defined(_KMDB)
685 char *
686 strtok(char *string, const char *sepset)
687 {
688 	char		*p, *q, *r;
689 	static char	*savept;
690 
691 	/*
692 	 * Set `p' to our current location in the string.
693 	 */
694 	p = (string == NULL) ? savept : string;
695 	if (p == NULL)
696 		return (NULL);
697 
698 	/*
699 	 * Skip leading separators; bail if no tokens remain.
700 	 */
701 	q = p + strspn(p, sepset);
702 	if (*q == '\0')
703 		return (NULL);
704 
705 	/*
706 	 * Mark the end of the token and set `savept' for the next iteration.
707 	 */
708 	if ((r = strpbrk(q, sepset)) == NULL)
709 		savept = NULL;
710 	else {
711 		*r = '\0';
712 		savept = ++r;
713 	}
714 
715 	return (q);
716 }
717 
718 /*
719  * The strlen() routine isn't shared with the kernel because it has its own
720  * hand-tuned assembly version.
721  */
722 size_t
723 strlen(const char *s)
724 {
725 	size_t n = 0;
726 
727 	while (*s++)
728 		n++;
729 	return (n);
730 }
731 
732 #endif /* _BOOT || _KMDB */
733 
734 /*
735  * Returns the number of non-NULL bytes in string argument,
736  * but not more than maxlen.  Does not look past str + maxlen.
737  */
738 size_t
739 strnlen(const char *s, size_t maxlen)
740 {
741 	size_t n = 0;
742 
743 	while (maxlen != 0 && *s != 0) {
744 		s++;
745 		maxlen--;
746 		n++;
747 	}
748 
749 	return (n);
750 }
751 
752 
753 #ifdef _KERNEL
754 /*
755  * Check for a valid C identifier:
756  *	a letter or underscore, followed by
757  *	zero or more letters, digits and underscores.
758  */
759 
760 #define	IS_DIGIT(c)	((c) >= '0' && (c) <= '9')
761 
762 #define	IS_ALPHA(c)	\
763 	(((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
764 
765 int
766 strident_valid(const char *id)
767 {
768 	int c = *id++;
769 
770 	if (!IS_ALPHA(c) && c != '_')
771 		return (0);
772 	while ((c = *id++) != 0) {
773 		if (!IS_ALPHA(c) && !IS_DIGIT(c) && c != '_')
774 			return (0);
775 	}
776 	return (1);
777 }
778 
779 /*
780  * Convert a string into a valid C identifier by replacing invalid
781  * characters with '_'.  Also makes sure the string is nul-terminated
782  * and takes up at most n bytes.
783  */
784 void
785 strident_canon(char *s, size_t n)
786 {
787 	char c;
788 	char *end = s + n - 1;
789 
790 	ASSERT(n > 0);
791 
792 	if ((c = *s) == 0)
793 		return;
794 
795 	if (!IS_ALPHA(c) && c != '_')
796 		*s = '_';
797 
798 	while (s < end && ((c = *(++s)) != 0)) {
799 		if (!IS_ALPHA(c) && !IS_DIGIT(c) && c != '_')
800 			*s = '_';
801 	}
802 	*s = 0;
803 }
804 
805 #endif	/* _KERNEL */
806