xref: /freebsd/usr.bin/printf/printf.c (revision b0d29bc47dba79f6f38e67eabadfb4b32ffd9390)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright 2018 Staysail Systems, Inc. <info@staysail.tech>
5  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
6  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
7  * Copyright (c) 1989, 1993
8  *	The Regents of the University of California.  All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * Important: This file is used both as a standalone program /usr/bin/printf
36  * and as a builtin for /bin/sh (#define SHELL).
37  */
38 
39 #ifndef SHELL
40 #ifndef lint
41 static char const copyright[] =
42 "@(#) Copyright (c) 1989, 1993\n\
43 	The Regents of the University of California.  All rights reserved.\n";
44 #endif /* not lint */
45 #endif
46 
47 #ifndef lint
48 #if 0
49 static char const sccsid[] = "@(#)printf.c	8.1 (Berkeley) 7/20/93";
50 #endif
51 static const char rcsid[] =
52   "$FreeBSD$";
53 #endif /* not lint */
54 
55 #include <sys/types.h>
56 
57 #include <ctype.h>
58 #include <err.h>
59 #include <errno.h>
60 #include <inttypes.h>
61 #include <limits.h>
62 #include <locale.h>
63 #include <stdio.h>
64 #include <stdlib.h>
65 #include <string.h>
66 #include <unistd.h>
67 #include <wchar.h>
68 
69 #ifdef SHELL
70 #define	main printfcmd
71 #include "bltin/bltin.h"
72 #include "options.h"
73 #endif
74 
75 #define	PF(f, func) do {						\
76 	if (havewidth)							\
77 		if (haveprec)						\
78 			(void)printf(f, fieldwidth, precision, func);	\
79 		else							\
80 			(void)printf(f, fieldwidth, func);		\
81 	else if (haveprec)						\
82 		(void)printf(f, precision, func);			\
83 	else								\
84 		(void)printf(f, func);					\
85 } while (0)
86 
87 static int	 asciicode(void);
88 static char	*printf_doformat(char *, int *);
89 static int	 escape(char *, int, size_t *);
90 static int	 getchr(void);
91 static int	 getfloating(long double *, int);
92 static int	 getint(int *);
93 static int	 getnum(intmax_t *, uintmax_t *, int);
94 static const char
95 		*getstr(void);
96 static char	*mknum(char *, char);
97 static void	 usage(void);
98 
99 static const char digits[] = "0123456789";
100 
101 static char end_fmt[1];
102 
103 static int  myargc;
104 static char **myargv;
105 static char **gargv;
106 static char **maxargv;
107 
108 int
109 main(int argc, char *argv[])
110 {
111 	size_t len;
112 	int end, rval;
113 	char *format, *fmt, *start;
114 #ifndef SHELL
115 	int ch;
116 
117 	(void) setlocale(LC_ALL, "");
118 #endif
119 
120 #ifdef SHELL
121 	nextopt("");
122 	argc -= argptr - argv;
123 	argv = argptr;
124 #else
125 	while ((ch = getopt(argc, argv, "")) != -1)
126 		switch (ch) {
127 		case '?':
128 		default:
129 			usage();
130 			return (1);
131 		}
132 	argc -= optind;
133 	argv += optind;
134 #endif
135 
136 	if (argc < 1) {
137 		usage();
138 		return (1);
139 	}
140 
141 #ifdef SHELL
142 	INTOFF;
143 #endif
144 	/*
145 	 * Basic algorithm is to scan the format string for conversion
146 	 * specifications -- once one is found, find out if the field
147 	 * width or precision is a '*'; if it is, gather up value.  Note,
148 	 * format strings are reused as necessary to use up the provided
149 	 * arguments, arguments of zero/null string are provided to use
150 	 * up the format string.
151 	 */
152 	fmt = format = *argv;
153 	escape(fmt, 1, &len);		/* backslash interpretation */
154 	rval = end = 0;
155 	gargv = ++argv;
156 
157 	for (;;) {
158 		maxargv = gargv;
159 
160 		myargv = gargv;
161 		for (myargc = 0; gargv[myargc]; myargc++)
162 			/* nop */;
163 		start = fmt;
164 		while (fmt < format + len) {
165 			if (fmt[0] == '%') {
166 				fwrite(start, 1, fmt - start, stdout);
167 				if (fmt[1] == '%') {
168 					/* %% prints a % */
169 					putchar('%');
170 					fmt += 2;
171 				} else {
172 					fmt = printf_doformat(fmt, &rval);
173 					if (fmt == NULL || fmt == end_fmt) {
174 #ifdef SHELL
175 						INTON;
176 #endif
177 						return (fmt == NULL ? 1 : rval);
178 					}
179 					end = 0;
180 				}
181 				start = fmt;
182 			} else
183 				fmt++;
184 			if (gargv > maxargv)
185 				maxargv = gargv;
186 		}
187 		gargv = maxargv;
188 
189 		if (end == 1) {
190 			warnx("missing format character");
191 #ifdef SHELL
192 			INTON;
193 #endif
194 			return (1);
195 		}
196 		fwrite(start, 1, fmt - start, stdout);
197 		if (!*gargv) {
198 #ifdef SHELL
199 			INTON;
200 #endif
201 			return (rval);
202 		}
203 		/* Restart at the beginning of the format string. */
204 		fmt = format;
205 		end = 1;
206 	}
207 	/* NOTREACHED */
208 }
209 
210 
211 static char *
212 printf_doformat(char *fmt, int *rval)
213 {
214 	static const char skip1[] = "#'-+ 0";
215 	int fieldwidth, haveprec, havewidth, mod_ldbl, precision;
216 	char convch, nextch;
217 	char start[strlen(fmt) + 1];
218 	char **fargv;
219 	char *dptr;
220 	int l;
221 
222 	dptr = start;
223 	*dptr++ = '%';
224 	*dptr = 0;
225 
226 	fmt++;
227 
228 	/* look for "n$" field index specifier */
229 	l = strspn(fmt, digits);
230 	if ((l > 0) && (fmt[l] == '$')) {
231 		int idx = atoi(fmt);
232 		if (idx <= myargc) {
233 			gargv = &myargv[idx - 1];
234 		} else {
235 			gargv = &myargv[myargc];
236 		}
237 		if (gargv > maxargv)
238 			maxargv = gargv;
239 		fmt += l + 1;
240 
241 		/* save format argument */
242 		fargv = gargv;
243 	} else {
244 		fargv = NULL;
245 	}
246 
247 	/* skip to field width */
248 	while (*fmt && strchr(skip1, *fmt) != NULL) {
249 		*dptr++ = *fmt++;
250 		*dptr = 0;
251 	}
252 
253 	if (*fmt == '*') {
254 
255 		fmt++;
256 		l = strspn(fmt, digits);
257 		if ((l > 0) && (fmt[l] == '$')) {
258 			int idx = atoi(fmt);
259 			if (fargv == NULL) {
260 				warnx("incomplete use of n$");
261 				return (NULL);
262 			}
263 			if (idx <= myargc) {
264 				gargv = &myargv[idx - 1];
265 			} else {
266 				gargv = &myargv[myargc];
267 			}
268 			fmt += l + 1;
269 		} else if (fargv != NULL) {
270 			warnx("incomplete use of n$");
271 			return (NULL);
272 		}
273 
274 		if (getint(&fieldwidth))
275 			return (NULL);
276 		if (gargv > maxargv)
277 			maxargv = gargv;
278 		havewidth = 1;
279 
280 		*dptr++ = '*';
281 		*dptr = 0;
282 	} else {
283 		havewidth = 0;
284 
285 		/* skip to possible '.', get following precision */
286 		while (isdigit(*fmt)) {
287 			*dptr++ = *fmt++;
288 			*dptr = 0;
289 		}
290 	}
291 
292 	if (*fmt == '.') {
293 		/* precision present? */
294 		fmt++;
295 		*dptr++ = '.';
296 
297 		if (*fmt == '*') {
298 
299 			fmt++;
300 			l = strspn(fmt, digits);
301 			if ((l > 0) && (fmt[l] == '$')) {
302 				int idx = atoi(fmt);
303 				if (fargv == NULL) {
304 					warnx("incomplete use of n$");
305 					return (NULL);
306 				}
307 				if (idx <= myargc) {
308 					gargv = &myargv[idx - 1];
309 				} else {
310 					gargv = &myargv[myargc];
311 				}
312 				fmt += l + 1;
313 			} else if (fargv != NULL) {
314 				warnx("incomplete use of n$");
315 				return (NULL);
316 			}
317 
318 			if (getint(&precision))
319 				return (NULL);
320 			if (gargv > maxargv)
321 				maxargv = gargv;
322 			haveprec = 1;
323 			*dptr++ = '*';
324 			*dptr = 0;
325 		} else {
326 			haveprec = 0;
327 
328 			/* skip to conversion char */
329 			while (isdigit(*fmt)) {
330 				*dptr++ = *fmt++;
331 				*dptr = 0;
332 			}
333 		}
334 	} else
335 		haveprec = 0;
336 	if (!*fmt) {
337 		warnx("missing format character");
338 		return (NULL);
339 	}
340 	*dptr++ = *fmt;
341 	*dptr = 0;
342 
343 	/*
344 	 * Look for a length modifier.  POSIX doesn't have these, so
345 	 * we only support them for floating-point conversions, which
346 	 * are extensions.  This is useful because the L modifier can
347 	 * be used to gain extra range and precision, while omitting
348 	 * it is more likely to produce consistent results on different
349 	 * architectures.  This is not so important for integers
350 	 * because overflow is the only bad thing that can happen to
351 	 * them, but consider the command  printf %a 1.1
352 	 */
353 	if (*fmt == 'L') {
354 		mod_ldbl = 1;
355 		fmt++;
356 		if (!strchr("aAeEfFgG", *fmt)) {
357 			warnx("bad modifier L for %%%c", *fmt);
358 			return (NULL);
359 		}
360 	} else {
361 		mod_ldbl = 0;
362 	}
363 
364 	/* save the current arg offset, and set to the format arg */
365 	if (fargv != NULL) {
366 		gargv = fargv;
367 	}
368 
369 	convch = *fmt;
370 	nextch = *++fmt;
371 
372 	*fmt = '\0';
373 	switch (convch) {
374 	case 'b': {
375 		size_t len;
376 		char *p;
377 		int getout;
378 
379 		/* Convert "b" to "s" for output. */
380 		start[strlen(start) - 1] = 's';
381 		if ((p = strdup(getstr())) == NULL) {
382 			warnx("%s", strerror(ENOMEM));
383 			return (NULL);
384 		}
385 		getout = escape(p, 0, &len);
386 		PF(start, p);
387 		/* Restore format for next loop. */
388 
389 		free(p);
390 		if (getout)
391 			return (end_fmt);
392 		break;
393 	}
394 	case 'c': {
395 		char p;
396 
397 		p = getchr();
398 		if (p != '\0')
399 			PF(start, p);
400 		break;
401 	}
402 	case 's': {
403 		const char *p;
404 
405 		p = getstr();
406 		PF(start, p);
407 		break;
408 	}
409 	case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': {
410 		char *f;
411 		intmax_t val;
412 		uintmax_t uval;
413 		int signedconv;
414 
415 		signedconv = (convch == 'd' || convch == 'i');
416 		if ((f = mknum(start, convch)) == NULL)
417 			return (NULL);
418 		if (getnum(&val, &uval, signedconv))
419 			*rval = 1;
420 		if (signedconv)
421 			PF(f, val);
422 		else
423 			PF(f, uval);
424 		break;
425 	}
426 	case 'e': case 'E':
427 	case 'f': case 'F':
428 	case 'g': case 'G':
429 	case 'a': case 'A': {
430 		long double p;
431 
432 		if (getfloating(&p, mod_ldbl))
433 			*rval = 1;
434 		if (mod_ldbl)
435 			PF(start, p);
436 		else
437 			PF(start, (double)p);
438 		break;
439 	}
440 	default:
441 		warnx("illegal format character %c", convch);
442 		return (NULL);
443 	}
444 	*fmt = nextch;
445 	/* return the gargv to the next element */
446 	return (fmt);
447 }
448 
449 static char *
450 mknum(char *str, char ch)
451 {
452 	static char *copy;
453 	static size_t copy_size;
454 	char *newcopy;
455 	size_t len, newlen;
456 
457 	len = strlen(str) + 2;
458 	if (len > copy_size) {
459 		newlen = ((len + 1023) >> 10) << 10;
460 		if ((newcopy = realloc(copy, newlen)) == NULL) {
461 			warnx("%s", strerror(ENOMEM));
462 			return (NULL);
463 		}
464 		copy = newcopy;
465 		copy_size = newlen;
466 	}
467 
468 	memmove(copy, str, len - 3);
469 	copy[len - 3] = 'j';
470 	copy[len - 2] = ch;
471 	copy[len - 1] = '\0';
472 	return (copy);
473 }
474 
475 static int
476 escape(char *fmt, int percent, size_t *len)
477 {
478 	char *save, *store, c;
479 	int value;
480 
481 	for (save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store) {
482 		if (c != '\\') {
483 			*store = c;
484 			continue;
485 		}
486 		switch (*++fmt) {
487 		case '\0':		/* EOS, user error */
488 			*store = '\\';
489 			*++store = '\0';
490 			*len = store - save;
491 			return (0);
492 		case '\\':		/* backslash */
493 		case '\'':		/* single quote */
494 			*store = *fmt;
495 			break;
496 		case 'a':		/* bell/alert */
497 			*store = '\a';
498 			break;
499 		case 'b':		/* backspace */
500 			*store = '\b';
501 			break;
502 		case 'c':
503 			if (!percent) {
504 				*store = '\0';
505 				*len = store - save;
506 				return (1);
507 			}
508 			*store = 'c';
509 			break;
510 		case 'f':		/* form-feed */
511 			*store = '\f';
512 			break;
513 		case 'n':		/* newline */
514 			*store = '\n';
515 			break;
516 		case 'r':		/* carriage-return */
517 			*store = '\r';
518 			break;
519 		case 't':		/* horizontal tab */
520 			*store = '\t';
521 			break;
522 		case 'v':		/* vertical tab */
523 			*store = '\v';
524 			break;
525 					/* octal constant */
526 		case '0': case '1': case '2': case '3':
527 		case '4': case '5': case '6': case '7':
528 			c = (!percent && *fmt == '0') ? 4 : 3;
529 			for (value = 0;
530 			    c-- && *fmt >= '0' && *fmt <= '7'; ++fmt) {
531 				value <<= 3;
532 				value += *fmt - '0';
533 			}
534 			--fmt;
535 			if (percent && value == '%') {
536 				*store++ = '%';
537 				*store = '%';
538 			} else
539 				*store = (char)value;
540 			break;
541 		default:
542 			*store = *fmt;
543 			break;
544 		}
545 	}
546 	*store = '\0';
547 	*len = store - save;
548 	return (0);
549 }
550 
551 static int
552 getchr(void)
553 {
554 	if (!*gargv)
555 		return ('\0');
556 	return ((int)**gargv++);
557 }
558 
559 static const char *
560 getstr(void)
561 {
562 	if (!*gargv)
563 		return ("");
564 	return (*gargv++);
565 }
566 
567 static int
568 getint(int *ip)
569 {
570 	intmax_t val;
571 	uintmax_t uval;
572 	int rval;
573 
574 	if (getnum(&val, &uval, 1))
575 		return (1);
576 	rval = 0;
577 	if (val < INT_MIN || val > INT_MAX) {
578 		warnx("%s: %s", *gargv, strerror(ERANGE));
579 		rval = 1;
580 	}
581 	*ip = (int)val;
582 	return (rval);
583 }
584 
585 static int
586 getnum(intmax_t *ip, uintmax_t *uip, int signedconv)
587 {
588 	char *ep;
589 	int rval;
590 
591 	if (!*gargv) {
592 		*ip = *uip = 0;
593 		return (0);
594 	}
595 	if (**gargv == '"' || **gargv == '\'') {
596 		if (signedconv)
597 			*ip = asciicode();
598 		else
599 			*uip = asciicode();
600 		return (0);
601 	}
602 	rval = 0;
603 	errno = 0;
604 	if (signedconv)
605 		*ip = strtoimax(*gargv, &ep, 0);
606 	else
607 		*uip = strtoumax(*gargv, &ep, 0);
608 	if (ep == *gargv) {
609 		warnx("%s: expected numeric value", *gargv);
610 		rval = 1;
611 	}
612 	else if (*ep != '\0') {
613 		warnx("%s: not completely converted", *gargv);
614 		rval = 1;
615 	}
616 	if (errno == ERANGE) {
617 		warnx("%s: %s", *gargv, strerror(ERANGE));
618 		rval = 1;
619 	}
620 	++gargv;
621 	return (rval);
622 }
623 
624 static int
625 getfloating(long double *dp, int mod_ldbl)
626 {
627 	char *ep;
628 	int rval;
629 
630 	if (!*gargv) {
631 		*dp = 0.0;
632 		return (0);
633 	}
634 	if (**gargv == '"' || **gargv == '\'') {
635 		*dp = asciicode();
636 		return (0);
637 	}
638 	rval = 0;
639 	errno = 0;
640 	if (mod_ldbl)
641 		*dp = strtold(*gargv, &ep);
642 	else
643 		*dp = strtod(*gargv, &ep);
644 	if (ep == *gargv) {
645 		warnx("%s: expected numeric value", *gargv);
646 		rval = 1;
647 	} else if (*ep != '\0') {
648 		warnx("%s: not completely converted", *gargv);
649 		rval = 1;
650 	}
651 	if (errno == ERANGE) {
652 		warnx("%s: %s", *gargv, strerror(ERANGE));
653 		rval = 1;
654 	}
655 	++gargv;
656 	return (rval);
657 }
658 
659 static int
660 asciicode(void)
661 {
662 	int ch;
663 	wchar_t wch;
664 	mbstate_t mbs;
665 
666 	ch = (unsigned char)**gargv;
667 	if (ch == '\'' || ch == '"') {
668 		memset(&mbs, 0, sizeof(mbs));
669 		switch (mbrtowc(&wch, *gargv + 1, MB_LEN_MAX, &mbs)) {
670 		case (size_t)-2:
671 		case (size_t)-1:
672 			wch = (unsigned char)gargv[0][1];
673 			break;
674 		case 0:
675 			wch = 0;
676 			break;
677 		}
678 		ch = wch;
679 	}
680 	++gargv;
681 	return (ch);
682 }
683 
684 static void
685 usage(void)
686 {
687 	(void)fprintf(stderr, "usage: printf format [arguments ...]\n");
688 }
689