xref: /freebsd/usr.bin/printf/printf.c (revision 5b31cc94b10d4bb7109c6b27940a0fc76a44a331)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright 2018 Staysail Systems, Inc. <info@staysail.tech>
5  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
6  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
7  * Copyright (c) 1989, 1993
8  *	The Regents of the University of California.  All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * Important: This file is used both as a standalone program /usr/bin/printf
36  * and as a builtin for /bin/sh (#define SHELL).
37  */
38 
39 #ifndef SHELL
40 #ifndef lint
41 static char const copyright[] =
42 "@(#) Copyright (c) 1989, 1993\n\
43 	The Regents of the University of California.  All rights reserved.\n";
44 #endif /* not lint */
45 #endif
46 
47 #ifndef lint
48 #endif /* not lint */
49 
50 #include <sys/types.h>
51 
52 #include <ctype.h>
53 #include <err.h>
54 #include <errno.h>
55 #include <inttypes.h>
56 #include <limits.h>
57 #include <locale.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <unistd.h>
62 #include <wchar.h>
63 
64 #ifdef SHELL
65 #define	main printfcmd
66 #include "bltin/bltin.h"
67 #include "options.h"
68 #endif
69 
70 #define	PF(f, func) do {						\
71 	if (havewidth)							\
72 		if (haveprec)						\
73 			(void)printf(f, fieldwidth, precision, func);	\
74 		else							\
75 			(void)printf(f, fieldwidth, func);		\
76 	else if (haveprec)						\
77 		(void)printf(f, precision, func);			\
78 	else								\
79 		(void)printf(f, func);					\
80 } while (0)
81 
82 static int	 asciicode(void);
83 static char	*printf_doformat(char *, int *);
84 static int	 escape(char *, int, size_t *);
85 static int	 getchr(void);
86 static int	 getfloating(long double *, int);
87 static int	 getint(int *);
88 static int	 getnum(intmax_t *, uintmax_t *, int);
89 static const char
90 		*getstr(void);
91 static char	*mknum(char *, char);
92 static void	 usage(void);
93 
94 static const char digits[] = "0123456789";
95 
96 static char end_fmt[1];
97 
98 static int  myargc;
99 static char **myargv;
100 static char **gargv;
101 static char **maxargv;
102 
103 int
104 main(int argc, char *argv[])
105 {
106 	size_t len;
107 	int end, rval;
108 	char *format, *fmt, *start;
109 #ifndef SHELL
110 	int ch;
111 
112 	(void) setlocale(LC_ALL, "");
113 #endif
114 
115 #ifdef SHELL
116 	nextopt("");
117 	argc -= argptr - argv;
118 	argv = argptr;
119 #else
120 	while ((ch = getopt(argc, argv, "")) != -1)
121 		switch (ch) {
122 		case '?':
123 		default:
124 			usage();
125 			return (1);
126 		}
127 	argc -= optind;
128 	argv += optind;
129 #endif
130 
131 	if (argc < 1) {
132 		usage();
133 		return (1);
134 	}
135 
136 #ifdef SHELL
137 	INTOFF;
138 #endif
139 	/*
140 	 * Basic algorithm is to scan the format string for conversion
141 	 * specifications -- once one is found, find out if the field
142 	 * width or precision is a '*'; if it is, gather up value.  Note,
143 	 * format strings are reused as necessary to use up the provided
144 	 * arguments, arguments of zero/null string are provided to use
145 	 * up the format string.
146 	 */
147 	fmt = format = *argv;
148 	escape(fmt, 1, &len);		/* backslash interpretation */
149 	rval = end = 0;
150 	gargv = ++argv;
151 
152 	for (;;) {
153 		maxargv = gargv;
154 
155 		myargv = gargv;
156 		for (myargc = 0; gargv[myargc]; myargc++)
157 			/* nop */;
158 		start = fmt;
159 		while (fmt < format + len) {
160 			if (fmt[0] == '%') {
161 				fwrite(start, 1, fmt - start, stdout);
162 				if (fmt[1] == '%') {
163 					/* %% prints a % */
164 					putchar('%');
165 					fmt += 2;
166 				} else {
167 					fmt = printf_doformat(fmt, &rval);
168 					if (fmt == NULL || fmt == end_fmt) {
169 #ifdef SHELL
170 						INTON;
171 #endif
172 						return (fmt == NULL ? 1 : rval);
173 					}
174 					end = 0;
175 				}
176 				start = fmt;
177 			} else
178 				fmt++;
179 			if (gargv > maxargv)
180 				maxargv = gargv;
181 		}
182 		gargv = maxargv;
183 
184 		if (end == 1) {
185 			warnx("missing format character");
186 #ifdef SHELL
187 			INTON;
188 #endif
189 			return (1);
190 		}
191 		fwrite(start, 1, fmt - start, stdout);
192 		if (!*gargv) {
193 #ifdef SHELL
194 			INTON;
195 #endif
196 			return (rval);
197 		}
198 		/* Restart at the beginning of the format string. */
199 		fmt = format;
200 		end = 1;
201 	}
202 	/* NOTREACHED */
203 }
204 
205 
206 static char *
207 printf_doformat(char *fmt, int *rval)
208 {
209 	static const char skip1[] = "#'-+ 0";
210 	int fieldwidth, haveprec, havewidth, mod_ldbl, precision;
211 	char convch, nextch;
212 	char start[strlen(fmt) + 1];
213 	char **fargv;
214 	char *dptr;
215 	int l;
216 
217 	dptr = start;
218 	*dptr++ = '%';
219 	*dptr = 0;
220 
221 	fmt++;
222 
223 	/* look for "n$" field index specifier */
224 	l = strspn(fmt, digits);
225 	if ((l > 0) && (fmt[l] == '$')) {
226 		int idx = atoi(fmt);
227 		if (idx <= myargc) {
228 			gargv = &myargv[idx - 1];
229 		} else {
230 			gargv = &myargv[myargc];
231 		}
232 		if (gargv > maxargv)
233 			maxargv = gargv;
234 		fmt += l + 1;
235 
236 		/* save format argument */
237 		fargv = gargv;
238 	} else {
239 		fargv = NULL;
240 	}
241 
242 	/* skip to field width */
243 	while (*fmt && strchr(skip1, *fmt) != NULL) {
244 		*dptr++ = *fmt++;
245 		*dptr = 0;
246 	}
247 
248 	if (*fmt == '*') {
249 
250 		fmt++;
251 		l = strspn(fmt, digits);
252 		if ((l > 0) && (fmt[l] == '$')) {
253 			int idx = atoi(fmt);
254 			if (fargv == NULL) {
255 				warnx("incomplete use of n$");
256 				return (NULL);
257 			}
258 			if (idx <= myargc) {
259 				gargv = &myargv[idx - 1];
260 			} else {
261 				gargv = &myargv[myargc];
262 			}
263 			fmt += l + 1;
264 		} else if (fargv != NULL) {
265 			warnx("incomplete use of n$");
266 			return (NULL);
267 		}
268 
269 		if (getint(&fieldwidth))
270 			return (NULL);
271 		if (gargv > maxargv)
272 			maxargv = gargv;
273 		havewidth = 1;
274 
275 		*dptr++ = '*';
276 		*dptr = 0;
277 	} else {
278 		havewidth = 0;
279 
280 		/* skip to possible '.', get following precision */
281 		while (isdigit(*fmt)) {
282 			*dptr++ = *fmt++;
283 			*dptr = 0;
284 		}
285 	}
286 
287 	if (*fmt == '.') {
288 		/* precision present? */
289 		fmt++;
290 		*dptr++ = '.';
291 
292 		if (*fmt == '*') {
293 
294 			fmt++;
295 			l = strspn(fmt, digits);
296 			if ((l > 0) && (fmt[l] == '$')) {
297 				int idx = atoi(fmt);
298 				if (fargv == NULL) {
299 					warnx("incomplete use of n$");
300 					return (NULL);
301 				}
302 				if (idx <= myargc) {
303 					gargv = &myargv[idx - 1];
304 				} else {
305 					gargv = &myargv[myargc];
306 				}
307 				fmt += l + 1;
308 			} else if (fargv != NULL) {
309 				warnx("incomplete use of n$");
310 				return (NULL);
311 			}
312 
313 			if (getint(&precision))
314 				return (NULL);
315 			if (gargv > maxargv)
316 				maxargv = gargv;
317 			haveprec = 1;
318 			*dptr++ = '*';
319 			*dptr = 0;
320 		} else {
321 			haveprec = 0;
322 
323 			/* skip to conversion char */
324 			while (isdigit(*fmt)) {
325 				*dptr++ = *fmt++;
326 				*dptr = 0;
327 			}
328 		}
329 	} else
330 		haveprec = 0;
331 	if (!*fmt) {
332 		warnx("missing format character");
333 		return (NULL);
334 	}
335 	*dptr++ = *fmt;
336 	*dptr = 0;
337 
338 	/*
339 	 * Look for a length modifier.  POSIX doesn't have these, so
340 	 * we only support them for floating-point conversions, which
341 	 * are extensions.  This is useful because the L modifier can
342 	 * be used to gain extra range and precision, while omitting
343 	 * it is more likely to produce consistent results on different
344 	 * architectures.  This is not so important for integers
345 	 * because overflow is the only bad thing that can happen to
346 	 * them, but consider the command  printf %a 1.1
347 	 */
348 	if (*fmt == 'L') {
349 		mod_ldbl = 1;
350 		fmt++;
351 		if (!strchr("aAeEfFgG", *fmt)) {
352 			warnx("bad modifier L for %%%c", *fmt);
353 			return (NULL);
354 		}
355 	} else {
356 		mod_ldbl = 0;
357 	}
358 
359 	/* save the current arg offset, and set to the format arg */
360 	if (fargv != NULL) {
361 		gargv = fargv;
362 	}
363 
364 	convch = *fmt;
365 	nextch = *++fmt;
366 
367 	*fmt = '\0';
368 	switch (convch) {
369 	case 'b': {
370 		size_t len;
371 		char *p;
372 		int getout;
373 
374 		/* Convert "b" to "s" for output. */
375 		start[strlen(start) - 1] = 's';
376 		if ((p = strdup(getstr())) == NULL) {
377 			warnx("%s", strerror(ENOMEM));
378 			return (NULL);
379 		}
380 		getout = escape(p, 0, &len);
381 		PF(start, p);
382 		/* Restore format for next loop. */
383 
384 		free(p);
385 		if (getout)
386 			return (end_fmt);
387 		break;
388 	}
389 	case 'c': {
390 		char p;
391 
392 		p = getchr();
393 		if (p != '\0')
394 			PF(start, p);
395 		break;
396 	}
397 	case 's': {
398 		const char *p;
399 
400 		p = getstr();
401 		PF(start, p);
402 		break;
403 	}
404 	case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': {
405 		char *f;
406 		intmax_t val;
407 		uintmax_t uval;
408 		int signedconv;
409 
410 		signedconv = (convch == 'd' || convch == 'i');
411 		if ((f = mknum(start, convch)) == NULL)
412 			return (NULL);
413 		if (getnum(&val, &uval, signedconv))
414 			*rval = 1;
415 		if (signedconv)
416 			PF(f, val);
417 		else
418 			PF(f, uval);
419 		break;
420 	}
421 	case 'e': case 'E':
422 	case 'f': case 'F':
423 	case 'g': case 'G':
424 	case 'a': case 'A': {
425 		long double p;
426 
427 		if (getfloating(&p, mod_ldbl))
428 			*rval = 1;
429 		if (mod_ldbl)
430 			PF(start, p);
431 		else
432 			PF(start, (double)p);
433 		break;
434 	}
435 	default:
436 		warnx("illegal format character %c", convch);
437 		return (NULL);
438 	}
439 	*fmt = nextch;
440 	/* return the gargv to the next element */
441 	return (fmt);
442 }
443 
444 static char *
445 mknum(char *str, char ch)
446 {
447 	static char *copy;
448 	static size_t copy_size;
449 	char *newcopy;
450 	size_t len, newlen;
451 
452 	len = strlen(str) + 2;
453 	if (len > copy_size) {
454 		newlen = ((len + 1023) >> 10) << 10;
455 		if ((newcopy = realloc(copy, newlen)) == NULL) {
456 			warnx("%s", strerror(ENOMEM));
457 			return (NULL);
458 		}
459 		copy = newcopy;
460 		copy_size = newlen;
461 	}
462 
463 	memmove(copy, str, len - 3);
464 	copy[len - 3] = 'j';
465 	copy[len - 2] = ch;
466 	copy[len - 1] = '\0';
467 	return (copy);
468 }
469 
470 static int
471 escape(char *fmt, int percent, size_t *len)
472 {
473 	char *save, *store, c;
474 	int value;
475 
476 	for (save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store) {
477 		if (c != '\\') {
478 			*store = c;
479 			continue;
480 		}
481 		switch (*++fmt) {
482 		case '\0':		/* EOS, user error */
483 			*store = '\\';
484 			*++store = '\0';
485 			*len = store - save;
486 			return (0);
487 		case '\\':		/* backslash */
488 		case '\'':		/* single quote */
489 			*store = *fmt;
490 			break;
491 		case 'a':		/* bell/alert */
492 			*store = '\a';
493 			break;
494 		case 'b':		/* backspace */
495 			*store = '\b';
496 			break;
497 		case 'c':
498 			if (!percent) {
499 				*store = '\0';
500 				*len = store - save;
501 				return (1);
502 			}
503 			*store = 'c';
504 			break;
505 		case 'f':		/* form-feed */
506 			*store = '\f';
507 			break;
508 		case 'n':		/* newline */
509 			*store = '\n';
510 			break;
511 		case 'r':		/* carriage-return */
512 			*store = '\r';
513 			break;
514 		case 't':		/* horizontal tab */
515 			*store = '\t';
516 			break;
517 		case 'v':		/* vertical tab */
518 			*store = '\v';
519 			break;
520 					/* octal constant */
521 		case '0': case '1': case '2': case '3':
522 		case '4': case '5': case '6': case '7':
523 			c = (!percent && *fmt == '0') ? 4 : 3;
524 			for (value = 0;
525 			    c-- && *fmt >= '0' && *fmt <= '7'; ++fmt) {
526 				value <<= 3;
527 				value += *fmt - '0';
528 			}
529 			--fmt;
530 			if (percent && value == '%') {
531 				*store++ = '%';
532 				*store = '%';
533 			} else
534 				*store = (char)value;
535 			break;
536 		default:
537 			*store = *fmt;
538 			break;
539 		}
540 	}
541 	*store = '\0';
542 	*len = store - save;
543 	return (0);
544 }
545 
546 static int
547 getchr(void)
548 {
549 	if (!*gargv)
550 		return ('\0');
551 	return ((int)**gargv++);
552 }
553 
554 static const char *
555 getstr(void)
556 {
557 	if (!*gargv)
558 		return ("");
559 	return (*gargv++);
560 }
561 
562 static int
563 getint(int *ip)
564 {
565 	intmax_t val;
566 	uintmax_t uval;
567 	int rval;
568 
569 	if (getnum(&val, &uval, 1))
570 		return (1);
571 	rval = 0;
572 	if (val < INT_MIN || val > INT_MAX) {
573 		warnx("%s: %s", *gargv, strerror(ERANGE));
574 		rval = 1;
575 	}
576 	*ip = (int)val;
577 	return (rval);
578 }
579 
580 static int
581 getnum(intmax_t *ip, uintmax_t *uip, int signedconv)
582 {
583 	char *ep;
584 	int rval;
585 
586 	if (!*gargv) {
587 		*ip = *uip = 0;
588 		return (0);
589 	}
590 	if (**gargv == '"' || **gargv == '\'') {
591 		if (signedconv)
592 			*ip = asciicode();
593 		else
594 			*uip = asciicode();
595 		return (0);
596 	}
597 	rval = 0;
598 	errno = 0;
599 	if (signedconv)
600 		*ip = strtoimax(*gargv, &ep, 0);
601 	else
602 		*uip = strtoumax(*gargv, &ep, 0);
603 	if (ep == *gargv) {
604 		warnx("%s: expected numeric value", *gargv);
605 		rval = 1;
606 	}
607 	else if (*ep != '\0') {
608 		warnx("%s: not completely converted", *gargv);
609 		rval = 1;
610 	}
611 	if (errno == ERANGE) {
612 		warnx("%s: %s", *gargv, strerror(ERANGE));
613 		rval = 1;
614 	}
615 	++gargv;
616 	return (rval);
617 }
618 
619 static int
620 getfloating(long double *dp, int mod_ldbl)
621 {
622 	char *ep;
623 	int rval;
624 
625 	if (!*gargv) {
626 		*dp = 0.0;
627 		return (0);
628 	}
629 	if (**gargv == '"' || **gargv == '\'') {
630 		*dp = asciicode();
631 		return (0);
632 	}
633 	rval = 0;
634 	errno = 0;
635 	if (mod_ldbl)
636 		*dp = strtold(*gargv, &ep);
637 	else
638 		*dp = strtod(*gargv, &ep);
639 	if (ep == *gargv) {
640 		warnx("%s: expected numeric value", *gargv);
641 		rval = 1;
642 	} else if (*ep != '\0') {
643 		warnx("%s: not completely converted", *gargv);
644 		rval = 1;
645 	}
646 	if (errno == ERANGE) {
647 		warnx("%s: %s", *gargv, strerror(ERANGE));
648 		rval = 1;
649 	}
650 	++gargv;
651 	return (rval);
652 }
653 
654 static int
655 asciicode(void)
656 {
657 	int ch;
658 	wchar_t wch;
659 	mbstate_t mbs;
660 
661 	ch = (unsigned char)**gargv;
662 	if (ch == '\'' || ch == '"') {
663 		memset(&mbs, 0, sizeof(mbs));
664 		switch (mbrtowc(&wch, *gargv + 1, MB_LEN_MAX, &mbs)) {
665 		case (size_t)-2:
666 		case (size_t)-1:
667 			wch = (unsigned char)gargv[0][1];
668 			break;
669 		case 0:
670 			wch = 0;
671 			break;
672 		}
673 		ch = wch;
674 	}
675 	++gargv;
676 	return (ch);
677 }
678 
679 static void
680 usage(void)
681 {
682 	(void)fprintf(stderr, "usage: printf format [arguments ...]\n");
683 }
684