xref: /freebsd/usr.bin/printf/printf.c (revision fcb560670601b2a4d87bb31d7531c8dcc37ee71b)
1 /*-
2  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
3  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 4. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 /*
32  * Important: This file is used both as a standalone program /usr/bin/printf
33  * and as a builtin for /bin/sh (#define SHELL).
34  */
35 
36 #ifndef SHELL
37 #ifndef lint
38 static char const copyright[] =
39 "@(#) Copyright (c) 1989, 1993\n\
40 	The Regents of the University of California.  All rights reserved.\n";
41 #endif /* not lint */
42 #endif
43 
44 #ifndef lint
45 #if 0
46 static char const sccsid[] = "@(#)printf.c	8.1 (Berkeley) 7/20/93";
47 #endif
48 static const char rcsid[] =
49   "$FreeBSD$";
50 #endif /* not lint */
51 
52 #include <sys/types.h>
53 
54 #include <ctype.h>
55 #include <err.h>
56 #include <errno.h>
57 #include <inttypes.h>
58 #include <limits.h>
59 #include <locale.h>
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <string.h>
63 #include <unistd.h>
64 #include <wchar.h>
65 
66 #ifdef SHELL
67 #define	main printfcmd
68 #include "bltin/bltin.h"
69 #include "error.h"
70 #include "options.h"
71 #endif
72 
73 #define	PF(f, func) do {						\
74 	char *b = NULL;							\
75 	if (havewidth)							\
76 		if (haveprec)						\
77 			(void)asprintf(&b, f, fieldwidth, precision, func); \
78 		else							\
79 			(void)asprintf(&b, f, fieldwidth, func);	\
80 	else if (haveprec)						\
81 		(void)asprintf(&b, f, precision, func);			\
82 	else								\
83 		(void)asprintf(&b, f, func);				\
84 	if (b) {							\
85 		(void)fputs(b, stdout);					\
86 		free(b);						\
87 	}								\
88 } while (0)
89 
90 static int	 asciicode(void);
91 static char	*printf_doformat(char *, int *);
92 static int	 escape(char *, int, size_t *);
93 static int	 getchr(void);
94 static int	 getfloating(long double *, int);
95 static int	 getint(int *);
96 static int	 getnum(intmax_t *, uintmax_t *, int);
97 static const char
98 		*getstr(void);
99 static char	*mknum(char *, char);
100 static void	 usage(void);
101 
102 static const char digits[] = "0123456789";
103 
104 static char end_fmt[1];
105 
106 static int  myargc;
107 static char **myargv;
108 static char **gargv;
109 static char **maxargv;
110 
111 int
112 main(int argc, char *argv[])
113 {
114 	size_t len;
115 	int end, rval;
116 	char *format, *fmt, *start;
117 #ifndef SHELL
118 	int ch;
119 
120 	(void) setlocale(LC_ALL, "");
121 #endif
122 
123 #ifdef SHELL
124 	nextopt("");
125 	argc -= argptr - argv;
126 	argv = argptr;
127 #else
128 	while ((ch = getopt(argc, argv, "")) != -1)
129 		switch (ch) {
130 		case '?':
131 		default:
132 			usage();
133 			return (1);
134 		}
135 	argc -= optind;
136 	argv += optind;
137 #endif
138 
139 	if (argc < 1) {
140 		usage();
141 		return (1);
142 	}
143 
144 #ifdef SHELL
145 	INTOFF;
146 #endif
147 	/*
148 	 * Basic algorithm is to scan the format string for conversion
149 	 * specifications -- once one is found, find out if the field
150 	 * width or precision is a '*'; if it is, gather up value.  Note,
151 	 * format strings are reused as necessary to use up the provided
152 	 * arguments, arguments of zero/null string are provided to use
153 	 * up the format string.
154 	 */
155 	fmt = format = *argv;
156 	escape(fmt, 1, &len);		/* backslash interpretation */
157 	rval = end = 0;
158 	gargv = ++argv;
159 
160 	for (;;) {
161 		maxargv = gargv;
162 
163 		myargv = gargv;
164 		for (myargc = 0; gargv[myargc]; myargc++)
165 			/* nop */;
166 		start = fmt;
167 		while (fmt < format + len) {
168 			if (fmt[0] == '%') {
169 				fwrite(start, 1, fmt - start, stdout);
170 				if (fmt[1] == '%') {
171 					/* %% prints a % */
172 					putchar('%');
173 					fmt += 2;
174 				} else {
175 					fmt = printf_doformat(fmt, &rval);
176 					if (fmt == NULL || fmt == end_fmt) {
177 #ifdef SHELL
178 						INTON;
179 #endif
180 						return (fmt == NULL ? 1 : rval);
181 					}
182 					end = 0;
183 				}
184 				start = fmt;
185 			} else
186 				fmt++;
187 			if (gargv > maxargv)
188 				maxargv = gargv;
189 		}
190 		gargv = maxargv;
191 
192 		if (end == 1) {
193 			warnx("missing format character");
194 #ifdef SHELL
195 			INTON;
196 #endif
197 			return (1);
198 		}
199 		fwrite(start, 1, fmt - start, stdout);
200 		if (!*gargv) {
201 #ifdef SHELL
202 			INTON;
203 #endif
204 			return (rval);
205 		}
206 		/* Restart at the beginning of the format string. */
207 		fmt = format;
208 		end = 1;
209 	}
210 	/* NOTREACHED */
211 }
212 
213 
214 static char *
215 printf_doformat(char *fmt, int *rval)
216 {
217 	static const char skip1[] = "#'-+ 0";
218 	int fieldwidth, haveprec, havewidth, mod_ldbl, precision;
219 	char convch, nextch;
220 	char start[strlen(fmt) + 1];
221 	char **fargv;
222 	char *dptr;
223 	int l;
224 
225 	dptr = start;
226 	*dptr++ = '%';
227 	*dptr = 0;
228 
229 	fmt++;
230 
231 	/* look for "n$" field index specifier */
232 	l = strspn(fmt, digits);
233 	if ((l > 0) && (fmt[l] == '$')) {
234 		int idx = atoi(fmt);
235 		if (idx <= myargc) {
236 			gargv = &myargv[idx - 1];
237 		} else {
238 			gargv = &myargv[myargc];
239 		}
240 		if (gargv > maxargv)
241 			maxargv = gargv;
242 		fmt += l + 1;
243 
244 		/* save format argument */
245 		fargv = gargv;
246 	} else {
247 		fargv = NULL;
248 	}
249 
250 	/* skip to field width */
251 	while (*fmt && strchr(skip1, *fmt) != NULL) {
252 		*dptr++ = *fmt++;
253 		*dptr = 0;
254 	}
255 
256 	if (*fmt == '*') {
257 
258 		fmt++;
259 		l = strspn(fmt, digits);
260 		if ((l > 0) && (fmt[l] == '$')) {
261 			int idx = atoi(fmt);
262 			if (fargv == NULL) {
263 				warnx("incomplete use of n$");
264 				return (NULL);
265 			}
266 			if (idx <= myargc) {
267 				gargv = &myargv[idx - 1];
268 			} else {
269 				gargv = &myargv[myargc];
270 			}
271 			fmt += l + 1;
272 		} else if (fargv != NULL) {
273 			warnx("incomplete use of n$");
274 			return (NULL);
275 		}
276 
277 		if (getint(&fieldwidth))
278 			return (NULL);
279 		if (gargv > maxargv)
280 			maxargv = gargv;
281 		havewidth = 1;
282 
283 		*dptr++ = '*';
284 		*dptr = 0;
285 	} else {
286 		havewidth = 0;
287 
288 		/* skip to possible '.', get following precision */
289 		while (isdigit(*fmt)) {
290 			*dptr++ = *fmt++;
291 			*dptr = 0;
292 		}
293 	}
294 
295 	if (*fmt == '.') {
296 		/* precision present? */
297 		fmt++;
298 		*dptr++ = '.';
299 
300 		if (*fmt == '*') {
301 
302 			fmt++;
303 			l = strspn(fmt, digits);
304 			if ((l > 0) && (fmt[l] == '$')) {
305 				int idx = atoi(fmt);
306 				if (fargv == NULL) {
307 					warnx("incomplete use of n$");
308 					return (NULL);
309 				}
310 				if (idx <= myargc) {
311 					gargv = &myargv[idx - 1];
312 				} else {
313 					gargv = &myargv[myargc];
314 				}
315 				fmt += l + 1;
316 			} else if (fargv != NULL) {
317 				warnx("incomplete use of n$");
318 				return (NULL);
319 			}
320 
321 			if (getint(&precision))
322 				return (NULL);
323 			if (gargv > maxargv)
324 				maxargv = gargv;
325 			haveprec = 1;
326 			*dptr++ = '*';
327 			*dptr = 0;
328 		} else {
329 			haveprec = 0;
330 
331 			/* skip to conversion char */
332 			while (isdigit(*fmt)) {
333 				*dptr++ = *fmt++;
334 				*dptr = 0;
335 			}
336 		}
337 	} else
338 		haveprec = 0;
339 	if (!*fmt) {
340 		warnx("missing format character");
341 		return (NULL);
342 	}
343 	*dptr++ = *fmt;
344 	*dptr = 0;
345 
346 	/*
347 	 * Look for a length modifier.  POSIX doesn't have these, so
348 	 * we only support them for floating-point conversions, which
349 	 * are extensions.  This is useful because the L modifier can
350 	 * be used to gain extra range and precision, while omitting
351 	 * it is more likely to produce consistent results on different
352 	 * architectures.  This is not so important for integers
353 	 * because overflow is the only bad thing that can happen to
354 	 * them, but consider the command  printf %a 1.1
355 	 */
356 	if (*fmt == 'L') {
357 		mod_ldbl = 1;
358 		fmt++;
359 		if (!strchr("aAeEfFgG", *fmt)) {
360 			warnx("bad modifier L for %%%c", *fmt);
361 			return (NULL);
362 		}
363 	} else {
364 		mod_ldbl = 0;
365 	}
366 
367 	/* save the current arg offset, and set to the format arg */
368 	if (fargv != NULL) {
369 		gargv = fargv;
370 	}
371 
372 	convch = *fmt;
373 	nextch = *++fmt;
374 
375 	*fmt = '\0';
376 	switch (convch) {
377 	case 'b': {
378 		size_t len;
379 		char *p;
380 		int getout;
381 
382 		p = strdup(getstr());
383 		if (p == NULL) {
384 			warnx("%s", strerror(ENOMEM));
385 			return (NULL);
386 		}
387 		getout = escape(p, 0, &len);
388 		fputs(p, stdout);
389 		free(p);
390 		if (getout)
391 			return (end_fmt);
392 		break;
393 	}
394 	case 'c': {
395 		char p;
396 
397 		p = getchr();
398 		PF(start, p);
399 		break;
400 	}
401 	case 's': {
402 		const char *p;
403 
404 		p = getstr();
405 		PF(start, p);
406 		break;
407 	}
408 	case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': {
409 		char *f;
410 		intmax_t val;
411 		uintmax_t uval;
412 		int signedconv;
413 
414 		signedconv = (convch == 'd' || convch == 'i');
415 		if ((f = mknum(start, convch)) == NULL)
416 			return (NULL);
417 		if (getnum(&val, &uval, signedconv))
418 			*rval = 1;
419 		if (signedconv)
420 			PF(f, val);
421 		else
422 			PF(f, uval);
423 		break;
424 	}
425 	case 'e': case 'E':
426 	case 'f': case 'F':
427 	case 'g': case 'G':
428 	case 'a': case 'A': {
429 		long double p;
430 
431 		if (getfloating(&p, mod_ldbl))
432 			*rval = 1;
433 		if (mod_ldbl)
434 			PF(start, p);
435 		else
436 			PF(start, (double)p);
437 		break;
438 	}
439 	default:
440 		warnx("illegal format character %c", convch);
441 		return (NULL);
442 	}
443 	*fmt = nextch;
444 	/* return the gargv to the next element */
445 	return (fmt);
446 }
447 
448 static char *
449 mknum(char *str, char ch)
450 {
451 	static char *copy;
452 	static size_t copy_size;
453 	char *newcopy;
454 	size_t len, newlen;
455 
456 	len = strlen(str) + 2;
457 	if (len > copy_size) {
458 		newlen = ((len + 1023) >> 10) << 10;
459 		if ((newcopy = realloc(copy, newlen)) == NULL) {
460 			warnx("%s", strerror(ENOMEM));
461 			return (NULL);
462 		}
463 		copy = newcopy;
464 		copy_size = newlen;
465 	}
466 
467 	memmove(copy, str, len - 3);
468 	copy[len - 3] = 'j';
469 	copy[len - 2] = ch;
470 	copy[len - 1] = '\0';
471 	return (copy);
472 }
473 
474 static int
475 escape(char *fmt, int percent, size_t *len)
476 {
477 	char *save, *store, c;
478 	int value;
479 
480 	for (save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store) {
481 		if (c != '\\') {
482 			*store = c;
483 			continue;
484 		}
485 		switch (*++fmt) {
486 		case '\0':		/* EOS, user error */
487 			*store = '\\';
488 			*++store = '\0';
489 			*len = store - save;
490 			return (0);
491 		case '\\':		/* backslash */
492 		case '\'':		/* single quote */
493 			*store = *fmt;
494 			break;
495 		case 'a':		/* bell/alert */
496 			*store = '\a';
497 			break;
498 		case 'b':		/* backspace */
499 			*store = '\b';
500 			break;
501 		case 'c':
502 			if (!percent) {
503 				*store = '\0';
504 				*len = store - save;
505 				return (1);
506 			}
507 			*store = 'c';
508 			break;
509 		case 'f':		/* form-feed */
510 			*store = '\f';
511 			break;
512 		case 'n':		/* newline */
513 			*store = '\n';
514 			break;
515 		case 'r':		/* carriage-return */
516 			*store = '\r';
517 			break;
518 		case 't':		/* horizontal tab */
519 			*store = '\t';
520 			break;
521 		case 'v':		/* vertical tab */
522 			*store = '\v';
523 			break;
524 					/* octal constant */
525 		case '0': case '1': case '2': case '3':
526 		case '4': case '5': case '6': case '7':
527 			c = (!percent && *fmt == '0') ? 4 : 3;
528 			for (value = 0;
529 			    c-- && *fmt >= '0' && *fmt <= '7'; ++fmt) {
530 				value <<= 3;
531 				value += *fmt - '0';
532 			}
533 			--fmt;
534 			if (percent && value == '%') {
535 				*store++ = '%';
536 				*store = '%';
537 			} else
538 				*store = (char)value;
539 			break;
540 		default:
541 			*store = *fmt;
542 			break;
543 		}
544 	}
545 	*store = '\0';
546 	*len = store - save;
547 	return (0);
548 }
549 
550 static int
551 getchr(void)
552 {
553 	if (!*gargv)
554 		return ('\0');
555 	return ((int)**gargv++);
556 }
557 
558 static const char *
559 getstr(void)
560 {
561 	if (!*gargv)
562 		return ("");
563 	return (*gargv++);
564 }
565 
566 static int
567 getint(int *ip)
568 {
569 	intmax_t val;
570 	uintmax_t uval;
571 	int rval;
572 
573 	if (getnum(&val, &uval, 1))
574 		return (1);
575 	rval = 0;
576 	if (val < INT_MIN || val > INT_MAX) {
577 		warnx("%s: %s", *gargv, strerror(ERANGE));
578 		rval = 1;
579 	}
580 	*ip = (int)val;
581 	return (rval);
582 }
583 
584 static int
585 getnum(intmax_t *ip, uintmax_t *uip, int signedconv)
586 {
587 	char *ep;
588 	int rval;
589 
590 	if (!*gargv) {
591 		*ip = *uip = 0;
592 		return (0);
593 	}
594 	if (**gargv == '"' || **gargv == '\'') {
595 		if (signedconv)
596 			*ip = asciicode();
597 		else
598 			*uip = asciicode();
599 		return (0);
600 	}
601 	rval = 0;
602 	errno = 0;
603 	if (signedconv)
604 		*ip = strtoimax(*gargv, &ep, 0);
605 	else
606 		*uip = strtoumax(*gargv, &ep, 0);
607 	if (ep == *gargv) {
608 		warnx("%s: expected numeric value", *gargv);
609 		rval = 1;
610 	}
611 	else if (*ep != '\0') {
612 		warnx("%s: not completely converted", *gargv);
613 		rval = 1;
614 	}
615 	if (errno == ERANGE) {
616 		warnx("%s: %s", *gargv, strerror(ERANGE));
617 		rval = 1;
618 	}
619 	++gargv;
620 	return (rval);
621 }
622 
623 static int
624 getfloating(long double *dp, int mod_ldbl)
625 {
626 	char *ep;
627 	int rval;
628 
629 	if (!*gargv) {
630 		*dp = 0.0;
631 		return (0);
632 	}
633 	if (**gargv == '"' || **gargv == '\'') {
634 		*dp = asciicode();
635 		return (0);
636 	}
637 	rval = 0;
638 	errno = 0;
639 	if (mod_ldbl)
640 		*dp = strtold(*gargv, &ep);
641 	else
642 		*dp = strtod(*gargv, &ep);
643 	if (ep == *gargv) {
644 		warnx("%s: expected numeric value", *gargv);
645 		rval = 1;
646 	} else if (*ep != '\0') {
647 		warnx("%s: not completely converted", *gargv);
648 		rval = 1;
649 	}
650 	if (errno == ERANGE) {
651 		warnx("%s: %s", *gargv, strerror(ERANGE));
652 		rval = 1;
653 	}
654 	++gargv;
655 	return (rval);
656 }
657 
658 static int
659 asciicode(void)
660 {
661 	int ch;
662 	wchar_t wch;
663 	mbstate_t mbs;
664 
665 	ch = (unsigned char)**gargv;
666 	if (ch == '\'' || ch == '"') {
667 		memset(&mbs, 0, sizeof(mbs));
668 		switch (mbrtowc(&wch, *gargv + 1, MB_LEN_MAX, &mbs)) {
669 		case (size_t)-2:
670 		case (size_t)-1:
671 			wch = (unsigned char)gargv[0][1];
672 			break;
673 		case 0:
674 			wch = 0;
675 			break;
676 		}
677 		ch = wch;
678 	}
679 	++gargv;
680 	return (ch);
681 }
682 
683 static void
684 usage(void)
685 {
686 	(void)fprintf(stderr, "usage: printf format [arguments ...]\n");
687 }
688