xref: /freebsd/usr.bin/printf/printf.c (revision 640235e2c2ba32947f7c59d168437ffa1280f1e6)
1 /*-
2  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
3  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 4. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 /*
32  * Important: This file is used both as a standalone program /usr/bin/printf
33  * and as a builtin for /bin/sh (#define SHELL).
34  */
35 
36 #ifndef SHELL
37 #ifndef lint
38 static char const copyright[] =
39 "@(#) Copyright (c) 1989, 1993\n\
40 	The Regents of the University of California.  All rights reserved.\n";
41 #endif /* not lint */
42 #endif
43 
44 #ifndef lint
45 #if 0
46 static char const sccsid[] = "@(#)printf.c	8.1 (Berkeley) 7/20/93";
47 #endif
48 static const char rcsid[] =
49   "$FreeBSD$";
50 #endif /* not lint */
51 
52 #include <sys/types.h>
53 
54 #include <ctype.h>
55 #include <err.h>
56 #include <errno.h>
57 #include <inttypes.h>
58 #include <limits.h>
59 #include <locale.h>
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <string.h>
63 #include <unistd.h>
64 #include <wchar.h>
65 
66 #ifdef SHELL
67 #define	main printfcmd
68 #include "bltin/bltin.h"
69 #include "options.h"
70 #endif
71 
72 #define	PF(f, func) do {						\
73 	char *b = NULL;							\
74 	if (havewidth)							\
75 		if (haveprec)						\
76 			(void)asprintf(&b, f, fieldwidth, precision, func); \
77 		else							\
78 			(void)asprintf(&b, f, fieldwidth, func);	\
79 	else if (haveprec)						\
80 		(void)asprintf(&b, f, precision, func);			\
81 	else								\
82 		(void)asprintf(&b, f, func);				\
83 	if (b) {							\
84 		(void)fputs(b, stdout);					\
85 		free(b);						\
86 	}								\
87 } while (0)
88 
89 static int	 asciicode(void);
90 static char	*printf_doformat(char *, int *);
91 static int	 escape(char *, int, size_t *);
92 static int	 getchr(void);
93 static int	 getfloating(long double *, int);
94 static int	 getint(int *);
95 static int	 getnum(intmax_t *, uintmax_t *, int);
96 static const char
97 		*getstr(void);
98 static char	*mknum(char *, char);
99 static void	 usage(void);
100 
101 static const char digits[] = "0123456789";
102 
103 static char end_fmt[1];
104 
105 static int  myargc;
106 static char **myargv;
107 static char **gargv;
108 static char **maxargv;
109 
110 int
111 main(int argc, char *argv[])
112 {
113 	size_t len;
114 	int end, rval;
115 	char *format, *fmt, *start;
116 #ifndef SHELL
117 	int ch;
118 
119 	(void) setlocale(LC_ALL, "");
120 #endif
121 
122 #ifdef SHELL
123 	nextopt("");
124 	argc -= argptr - argv;
125 	argv = argptr;
126 #else
127 	while ((ch = getopt(argc, argv, "")) != -1)
128 		switch (ch) {
129 		case '?':
130 		default:
131 			usage();
132 			return (1);
133 		}
134 	argc -= optind;
135 	argv += optind;
136 #endif
137 
138 	if (argc < 1) {
139 		usage();
140 		return (1);
141 	}
142 
143 #ifdef SHELL
144 	INTOFF;
145 #endif
146 	/*
147 	 * Basic algorithm is to scan the format string for conversion
148 	 * specifications -- once one is found, find out if the field
149 	 * width or precision is a '*'; if it is, gather up value.  Note,
150 	 * format strings are reused as necessary to use up the provided
151 	 * arguments, arguments of zero/null string are provided to use
152 	 * up the format string.
153 	 */
154 	fmt = format = *argv;
155 	escape(fmt, 1, &len);		/* backslash interpretation */
156 	rval = end = 0;
157 	gargv = ++argv;
158 
159 	for (;;) {
160 		maxargv = gargv;
161 
162 		myargv = gargv;
163 		for (myargc = 0; gargv[myargc]; myargc++)
164 			/* nop */;
165 		start = fmt;
166 		while (fmt < format + len) {
167 			if (fmt[0] == '%') {
168 				fwrite(start, 1, fmt - start, stdout);
169 				if (fmt[1] == '%') {
170 					/* %% prints a % */
171 					putchar('%');
172 					fmt += 2;
173 				} else {
174 					fmt = printf_doformat(fmt, &rval);
175 					if (fmt == NULL || fmt == end_fmt) {
176 #ifdef SHELL
177 						INTON;
178 #endif
179 						return (fmt == NULL ? 1 : rval);
180 					}
181 					end = 0;
182 				}
183 				start = fmt;
184 			} else
185 				fmt++;
186 			if (gargv > maxargv)
187 				maxargv = gargv;
188 		}
189 		gargv = maxargv;
190 
191 		if (end == 1) {
192 			warnx("missing format character");
193 #ifdef SHELL
194 			INTON;
195 #endif
196 			return (1);
197 		}
198 		fwrite(start, 1, fmt - start, stdout);
199 		if (!*gargv) {
200 #ifdef SHELL
201 			INTON;
202 #endif
203 			return (rval);
204 		}
205 		/* Restart at the beginning of the format string. */
206 		fmt = format;
207 		end = 1;
208 	}
209 	/* NOTREACHED */
210 }
211 
212 
213 static char *
214 printf_doformat(char *fmt, int *rval)
215 {
216 	static const char skip1[] = "#'-+ 0";
217 	int fieldwidth, haveprec, havewidth, mod_ldbl, precision;
218 	char convch, nextch;
219 	char start[strlen(fmt) + 1];
220 	char **fargv;
221 	char *dptr;
222 	int l;
223 
224 	dptr = start;
225 	*dptr++ = '%';
226 	*dptr = 0;
227 
228 	fmt++;
229 
230 	/* look for "n$" field index specifier */
231 	l = strspn(fmt, digits);
232 	if ((l > 0) && (fmt[l] == '$')) {
233 		int idx = atoi(fmt);
234 		if (idx <= myargc) {
235 			gargv = &myargv[idx - 1];
236 		} else {
237 			gargv = &myargv[myargc];
238 		}
239 		if (gargv > maxargv)
240 			maxargv = gargv;
241 		fmt += l + 1;
242 
243 		/* save format argument */
244 		fargv = gargv;
245 	} else {
246 		fargv = NULL;
247 	}
248 
249 	/* skip to field width */
250 	while (*fmt && strchr(skip1, *fmt) != NULL) {
251 		*dptr++ = *fmt++;
252 		*dptr = 0;
253 	}
254 
255 	if (*fmt == '*') {
256 
257 		fmt++;
258 		l = strspn(fmt, digits);
259 		if ((l > 0) && (fmt[l] == '$')) {
260 			int idx = atoi(fmt);
261 			if (fargv == NULL) {
262 				warnx("incomplete use of n$");
263 				return (NULL);
264 			}
265 			if (idx <= myargc) {
266 				gargv = &myargv[idx - 1];
267 			} else {
268 				gargv = &myargv[myargc];
269 			}
270 			fmt += l + 1;
271 		} else if (fargv != NULL) {
272 			warnx("incomplete use of n$");
273 			return (NULL);
274 		}
275 
276 		if (getint(&fieldwidth))
277 			return (NULL);
278 		if (gargv > maxargv)
279 			maxargv = gargv;
280 		havewidth = 1;
281 
282 		*dptr++ = '*';
283 		*dptr = 0;
284 	} else {
285 		havewidth = 0;
286 
287 		/* skip to possible '.', get following precision */
288 		while (isdigit(*fmt)) {
289 			*dptr++ = *fmt++;
290 			*dptr = 0;
291 		}
292 	}
293 
294 	if (*fmt == '.') {
295 		/* precision present? */
296 		fmt++;
297 		*dptr++ = '.';
298 
299 		if (*fmt == '*') {
300 
301 			fmt++;
302 			l = strspn(fmt, digits);
303 			if ((l > 0) && (fmt[l] == '$')) {
304 				int idx = atoi(fmt);
305 				if (fargv == NULL) {
306 					warnx("incomplete use of n$");
307 					return (NULL);
308 				}
309 				if (idx <= myargc) {
310 					gargv = &myargv[idx - 1];
311 				} else {
312 					gargv = &myargv[myargc];
313 				}
314 				fmt += l + 1;
315 			} else if (fargv != NULL) {
316 				warnx("incomplete use of n$");
317 				return (NULL);
318 			}
319 
320 			if (getint(&precision))
321 				return (NULL);
322 			if (gargv > maxargv)
323 				maxargv = gargv;
324 			haveprec = 1;
325 			*dptr++ = '*';
326 			*dptr = 0;
327 		} else {
328 			haveprec = 0;
329 
330 			/* skip to conversion char */
331 			while (isdigit(*fmt)) {
332 				*dptr++ = *fmt++;
333 				*dptr = 0;
334 			}
335 		}
336 	} else
337 		haveprec = 0;
338 	if (!*fmt) {
339 		warnx("missing format character");
340 		return (NULL);
341 	}
342 	*dptr++ = *fmt;
343 	*dptr = 0;
344 
345 	/*
346 	 * Look for a length modifier.  POSIX doesn't have these, so
347 	 * we only support them for floating-point conversions, which
348 	 * are extensions.  This is useful because the L modifier can
349 	 * be used to gain extra range and precision, while omitting
350 	 * it is more likely to produce consistent results on different
351 	 * architectures.  This is not so important for integers
352 	 * because overflow is the only bad thing that can happen to
353 	 * them, but consider the command  printf %a 1.1
354 	 */
355 	if (*fmt == 'L') {
356 		mod_ldbl = 1;
357 		fmt++;
358 		if (!strchr("aAeEfFgG", *fmt)) {
359 			warnx("bad modifier L for %%%c", *fmt);
360 			return (NULL);
361 		}
362 	} else {
363 		mod_ldbl = 0;
364 	}
365 
366 	/* save the current arg offset, and set to the format arg */
367 	if (fargv != NULL) {
368 		gargv = fargv;
369 	}
370 
371 	convch = *fmt;
372 	nextch = *++fmt;
373 
374 	*fmt = '\0';
375 	switch (convch) {
376 	case 'b': {
377 		size_t len;
378 		char *p;
379 		int getout;
380 
381 		p = strdup(getstr());
382 		if (p == NULL) {
383 			warnx("%s", strerror(ENOMEM));
384 			return (NULL);
385 		}
386 		getout = escape(p, 0, &len);
387 		fputs(p, stdout);
388 		free(p);
389 		if (getout)
390 			return (end_fmt);
391 		break;
392 	}
393 	case 'c': {
394 		char p;
395 
396 		p = getchr();
397 		PF(start, p);
398 		break;
399 	}
400 	case 's': {
401 		const char *p;
402 
403 		p = getstr();
404 		PF(start, p);
405 		break;
406 	}
407 	case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': {
408 		char *f;
409 		intmax_t val;
410 		uintmax_t uval;
411 		int signedconv;
412 
413 		signedconv = (convch == 'd' || convch == 'i');
414 		if ((f = mknum(start, convch)) == NULL)
415 			return (NULL);
416 		if (getnum(&val, &uval, signedconv))
417 			*rval = 1;
418 		if (signedconv)
419 			PF(f, val);
420 		else
421 			PF(f, uval);
422 		break;
423 	}
424 	case 'e': case 'E':
425 	case 'f': case 'F':
426 	case 'g': case 'G':
427 	case 'a': case 'A': {
428 		long double p;
429 
430 		if (getfloating(&p, mod_ldbl))
431 			*rval = 1;
432 		if (mod_ldbl)
433 			PF(start, p);
434 		else
435 			PF(start, (double)p);
436 		break;
437 	}
438 	default:
439 		warnx("illegal format character %c", convch);
440 		return (NULL);
441 	}
442 	*fmt = nextch;
443 	/* return the gargv to the next element */
444 	return (fmt);
445 }
446 
447 static char *
448 mknum(char *str, char ch)
449 {
450 	static char *copy;
451 	static size_t copy_size;
452 	char *newcopy;
453 	size_t len, newlen;
454 
455 	len = strlen(str) + 2;
456 	if (len > copy_size) {
457 		newlen = ((len + 1023) >> 10) << 10;
458 		if ((newcopy = realloc(copy, newlen)) == NULL) {
459 			warnx("%s", strerror(ENOMEM));
460 			return (NULL);
461 		}
462 		copy = newcopy;
463 		copy_size = newlen;
464 	}
465 
466 	memmove(copy, str, len - 3);
467 	copy[len - 3] = 'j';
468 	copy[len - 2] = ch;
469 	copy[len - 1] = '\0';
470 	return (copy);
471 }
472 
473 static int
474 escape(char *fmt, int percent, size_t *len)
475 {
476 	char *save, *store, c;
477 	int value;
478 
479 	for (save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store) {
480 		if (c != '\\') {
481 			*store = c;
482 			continue;
483 		}
484 		switch (*++fmt) {
485 		case '\0':		/* EOS, user error */
486 			*store = '\\';
487 			*++store = '\0';
488 			*len = store - save;
489 			return (0);
490 		case '\\':		/* backslash */
491 		case '\'':		/* single quote */
492 			*store = *fmt;
493 			break;
494 		case 'a':		/* bell/alert */
495 			*store = '\a';
496 			break;
497 		case 'b':		/* backspace */
498 			*store = '\b';
499 			break;
500 		case 'c':
501 			if (!percent) {
502 				*store = '\0';
503 				*len = store - save;
504 				return (1);
505 			}
506 			*store = 'c';
507 			break;
508 		case 'f':		/* form-feed */
509 			*store = '\f';
510 			break;
511 		case 'n':		/* newline */
512 			*store = '\n';
513 			break;
514 		case 'r':		/* carriage-return */
515 			*store = '\r';
516 			break;
517 		case 't':		/* horizontal tab */
518 			*store = '\t';
519 			break;
520 		case 'v':		/* vertical tab */
521 			*store = '\v';
522 			break;
523 					/* octal constant */
524 		case '0': case '1': case '2': case '3':
525 		case '4': case '5': case '6': case '7':
526 			c = (!percent && *fmt == '0') ? 4 : 3;
527 			for (value = 0;
528 			    c-- && *fmt >= '0' && *fmt <= '7'; ++fmt) {
529 				value <<= 3;
530 				value += *fmt - '0';
531 			}
532 			--fmt;
533 			if (percent && value == '%') {
534 				*store++ = '%';
535 				*store = '%';
536 			} else
537 				*store = (char)value;
538 			break;
539 		default:
540 			*store = *fmt;
541 			break;
542 		}
543 	}
544 	*store = '\0';
545 	*len = store - save;
546 	return (0);
547 }
548 
549 static int
550 getchr(void)
551 {
552 	if (!*gargv)
553 		return ('\0');
554 	return ((int)**gargv++);
555 }
556 
557 static const char *
558 getstr(void)
559 {
560 	if (!*gargv)
561 		return ("");
562 	return (*gargv++);
563 }
564 
565 static int
566 getint(int *ip)
567 {
568 	intmax_t val;
569 	uintmax_t uval;
570 	int rval;
571 
572 	if (getnum(&val, &uval, 1))
573 		return (1);
574 	rval = 0;
575 	if (val < INT_MIN || val > INT_MAX) {
576 		warnx("%s: %s", *gargv, strerror(ERANGE));
577 		rval = 1;
578 	}
579 	*ip = (int)val;
580 	return (rval);
581 }
582 
583 static int
584 getnum(intmax_t *ip, uintmax_t *uip, int signedconv)
585 {
586 	char *ep;
587 	int rval;
588 
589 	if (!*gargv) {
590 		*ip = *uip = 0;
591 		return (0);
592 	}
593 	if (**gargv == '"' || **gargv == '\'') {
594 		if (signedconv)
595 			*ip = asciicode();
596 		else
597 			*uip = asciicode();
598 		return (0);
599 	}
600 	rval = 0;
601 	errno = 0;
602 	if (signedconv)
603 		*ip = strtoimax(*gargv, &ep, 0);
604 	else
605 		*uip = strtoumax(*gargv, &ep, 0);
606 	if (ep == *gargv) {
607 		warnx("%s: expected numeric value", *gargv);
608 		rval = 1;
609 	}
610 	else if (*ep != '\0') {
611 		warnx("%s: not completely converted", *gargv);
612 		rval = 1;
613 	}
614 	if (errno == ERANGE) {
615 		warnx("%s: %s", *gargv, strerror(ERANGE));
616 		rval = 1;
617 	}
618 	++gargv;
619 	return (rval);
620 }
621 
622 static int
623 getfloating(long double *dp, int mod_ldbl)
624 {
625 	char *ep;
626 	int rval;
627 
628 	if (!*gargv) {
629 		*dp = 0.0;
630 		return (0);
631 	}
632 	if (**gargv == '"' || **gargv == '\'') {
633 		*dp = asciicode();
634 		return (0);
635 	}
636 	rval = 0;
637 	errno = 0;
638 	if (mod_ldbl)
639 		*dp = strtold(*gargv, &ep);
640 	else
641 		*dp = strtod(*gargv, &ep);
642 	if (ep == *gargv) {
643 		warnx("%s: expected numeric value", *gargv);
644 		rval = 1;
645 	} else if (*ep != '\0') {
646 		warnx("%s: not completely converted", *gargv);
647 		rval = 1;
648 	}
649 	if (errno == ERANGE) {
650 		warnx("%s: %s", *gargv, strerror(ERANGE));
651 		rval = 1;
652 	}
653 	++gargv;
654 	return (rval);
655 }
656 
657 static int
658 asciicode(void)
659 {
660 	int ch;
661 	wchar_t wch;
662 	mbstate_t mbs;
663 
664 	ch = (unsigned char)**gargv;
665 	if (ch == '\'' || ch == '"') {
666 		memset(&mbs, 0, sizeof(mbs));
667 		switch (mbrtowc(&wch, *gargv + 1, MB_LEN_MAX, &mbs)) {
668 		case (size_t)-2:
669 		case (size_t)-1:
670 			wch = (unsigned char)gargv[0][1];
671 			break;
672 		case 0:
673 			wch = 0;
674 			break;
675 		}
676 		ch = wch;
677 	}
678 	++gargv;
679 	return (ch);
680 }
681 
682 static void
683 usage(void)
684 {
685 	(void)fprintf(stderr, "usage: printf format [arguments ...]\n");
686 }
687