xref: /freebsd/usr.bin/printf/printf.c (revision d93a896ef95946b0bf1219866fcb324b78543444)
1 /*-
2  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
3  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 /*
32  * Important: This file is used both as a standalone program /usr/bin/printf
33  * and as a builtin for /bin/sh (#define SHELL).
34  */
35 
36 #ifndef SHELL
37 #ifndef lint
38 static char const copyright[] =
39 "@(#) Copyright (c) 1989, 1993\n\
40 	The Regents of the University of California.  All rights reserved.\n";
41 #endif /* not lint */
42 #endif
43 
44 #ifndef lint
45 #if 0
46 static char const sccsid[] = "@(#)printf.c	8.1 (Berkeley) 7/20/93";
47 #endif
48 static const char rcsid[] =
49   "$FreeBSD$";
50 #endif /* not lint */
51 
52 #include <sys/types.h>
53 
54 #include <ctype.h>
55 #include <err.h>
56 #include <errno.h>
57 #include <inttypes.h>
58 #include <limits.h>
59 #include <locale.h>
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <string.h>
63 #include <unistd.h>
64 #include <wchar.h>
65 
66 #ifdef SHELL
67 #define	main printfcmd
68 #include "bltin/bltin.h"
69 #include "options.h"
70 #endif
71 
72 #define	PF(f, func) do {						\
73 	if (havewidth)							\
74 		if (haveprec)						\
75 			(void)printf(f, fieldwidth, precision, func);	\
76 		else							\
77 			(void)printf(f, fieldwidth, func);		\
78 	else if (haveprec)						\
79 		(void)printf(f, precision, func);			\
80 	else								\
81 		(void)printf(f, func);					\
82 } while (0)
83 
84 static int	 asciicode(void);
85 static char	*printf_doformat(char *, int *);
86 static int	 escape(char *, int, size_t *);
87 static int	 getchr(void);
88 static int	 getfloating(long double *, int);
89 static int	 getint(int *);
90 static int	 getnum(intmax_t *, uintmax_t *, int);
91 static const char
92 		*getstr(void);
93 static char	*mknum(char *, char);
94 static void	 usage(void);
95 
96 static const char digits[] = "0123456789";
97 
98 static char end_fmt[1];
99 
100 static int  myargc;
101 static char **myargv;
102 static char **gargv;
103 static char **maxargv;
104 
105 int
106 main(int argc, char *argv[])
107 {
108 	size_t len;
109 	int end, rval;
110 	char *format, *fmt, *start;
111 #ifndef SHELL
112 	int ch;
113 
114 	(void) setlocale(LC_ALL, "");
115 #endif
116 
117 #ifdef SHELL
118 	nextopt("");
119 	argc -= argptr - argv;
120 	argv = argptr;
121 #else
122 	while ((ch = getopt(argc, argv, "")) != -1)
123 		switch (ch) {
124 		case '?':
125 		default:
126 			usage();
127 			return (1);
128 		}
129 	argc -= optind;
130 	argv += optind;
131 #endif
132 
133 	if (argc < 1) {
134 		usage();
135 		return (1);
136 	}
137 
138 #ifdef SHELL
139 	INTOFF;
140 #endif
141 	/*
142 	 * Basic algorithm is to scan the format string for conversion
143 	 * specifications -- once one is found, find out if the field
144 	 * width or precision is a '*'; if it is, gather up value.  Note,
145 	 * format strings are reused as necessary to use up the provided
146 	 * arguments, arguments of zero/null string are provided to use
147 	 * up the format string.
148 	 */
149 	fmt = format = *argv;
150 	escape(fmt, 1, &len);		/* backslash interpretation */
151 	rval = end = 0;
152 	gargv = ++argv;
153 
154 	for (;;) {
155 		maxargv = gargv;
156 
157 		myargv = gargv;
158 		for (myargc = 0; gargv[myargc]; myargc++)
159 			/* nop */;
160 		start = fmt;
161 		while (fmt < format + len) {
162 			if (fmt[0] == '%') {
163 				fwrite(start, 1, fmt - start, stdout);
164 				if (fmt[1] == '%') {
165 					/* %% prints a % */
166 					putchar('%');
167 					fmt += 2;
168 				} else {
169 					fmt = printf_doformat(fmt, &rval);
170 					if (fmt == NULL || fmt == end_fmt) {
171 #ifdef SHELL
172 						INTON;
173 #endif
174 						return (fmt == NULL ? 1 : rval);
175 					}
176 					end = 0;
177 				}
178 				start = fmt;
179 			} else
180 				fmt++;
181 			if (gargv > maxargv)
182 				maxargv = gargv;
183 		}
184 		gargv = maxargv;
185 
186 		if (end == 1) {
187 			warnx("missing format character");
188 #ifdef SHELL
189 			INTON;
190 #endif
191 			return (1);
192 		}
193 		fwrite(start, 1, fmt - start, stdout);
194 		if (!*gargv) {
195 #ifdef SHELL
196 			INTON;
197 #endif
198 			return (rval);
199 		}
200 		/* Restart at the beginning of the format string. */
201 		fmt = format;
202 		end = 1;
203 	}
204 	/* NOTREACHED */
205 }
206 
207 
208 static char *
209 printf_doformat(char *fmt, int *rval)
210 {
211 	static const char skip1[] = "#'-+ 0";
212 	int fieldwidth, haveprec, havewidth, mod_ldbl, precision;
213 	char convch, nextch;
214 	char start[strlen(fmt) + 1];
215 	char **fargv;
216 	char *dptr;
217 	int l;
218 
219 	dptr = start;
220 	*dptr++ = '%';
221 	*dptr = 0;
222 
223 	fmt++;
224 
225 	/* look for "n$" field index specifier */
226 	l = strspn(fmt, digits);
227 	if ((l > 0) && (fmt[l] == '$')) {
228 		int idx = atoi(fmt);
229 		if (idx <= myargc) {
230 			gargv = &myargv[idx - 1];
231 		} else {
232 			gargv = &myargv[myargc];
233 		}
234 		if (gargv > maxargv)
235 			maxargv = gargv;
236 		fmt += l + 1;
237 
238 		/* save format argument */
239 		fargv = gargv;
240 	} else {
241 		fargv = NULL;
242 	}
243 
244 	/* skip to field width */
245 	while (*fmt && strchr(skip1, *fmt) != NULL) {
246 		*dptr++ = *fmt++;
247 		*dptr = 0;
248 	}
249 
250 	if (*fmt == '*') {
251 
252 		fmt++;
253 		l = strspn(fmt, digits);
254 		if ((l > 0) && (fmt[l] == '$')) {
255 			int idx = atoi(fmt);
256 			if (fargv == NULL) {
257 				warnx("incomplete use of n$");
258 				return (NULL);
259 			}
260 			if (idx <= myargc) {
261 				gargv = &myargv[idx - 1];
262 			} else {
263 				gargv = &myargv[myargc];
264 			}
265 			fmt += l + 1;
266 		} else if (fargv != NULL) {
267 			warnx("incomplete use of n$");
268 			return (NULL);
269 		}
270 
271 		if (getint(&fieldwidth))
272 			return (NULL);
273 		if (gargv > maxargv)
274 			maxargv = gargv;
275 		havewidth = 1;
276 
277 		*dptr++ = '*';
278 		*dptr = 0;
279 	} else {
280 		havewidth = 0;
281 
282 		/* skip to possible '.', get following precision */
283 		while (isdigit(*fmt)) {
284 			*dptr++ = *fmt++;
285 			*dptr = 0;
286 		}
287 	}
288 
289 	if (*fmt == '.') {
290 		/* precision present? */
291 		fmt++;
292 		*dptr++ = '.';
293 
294 		if (*fmt == '*') {
295 
296 			fmt++;
297 			l = strspn(fmt, digits);
298 			if ((l > 0) && (fmt[l] == '$')) {
299 				int idx = atoi(fmt);
300 				if (fargv == NULL) {
301 					warnx("incomplete use of n$");
302 					return (NULL);
303 				}
304 				if (idx <= myargc) {
305 					gargv = &myargv[idx - 1];
306 				} else {
307 					gargv = &myargv[myargc];
308 				}
309 				fmt += l + 1;
310 			} else if (fargv != NULL) {
311 				warnx("incomplete use of n$");
312 				return (NULL);
313 			}
314 
315 			if (getint(&precision))
316 				return (NULL);
317 			if (gargv > maxargv)
318 				maxargv = gargv;
319 			haveprec = 1;
320 			*dptr++ = '*';
321 			*dptr = 0;
322 		} else {
323 			haveprec = 0;
324 
325 			/* skip to conversion char */
326 			while (isdigit(*fmt)) {
327 				*dptr++ = *fmt++;
328 				*dptr = 0;
329 			}
330 		}
331 	} else
332 		haveprec = 0;
333 	if (!*fmt) {
334 		warnx("missing format character");
335 		return (NULL);
336 	}
337 	*dptr++ = *fmt;
338 	*dptr = 0;
339 
340 	/*
341 	 * Look for a length modifier.  POSIX doesn't have these, so
342 	 * we only support them for floating-point conversions, which
343 	 * are extensions.  This is useful because the L modifier can
344 	 * be used to gain extra range and precision, while omitting
345 	 * it is more likely to produce consistent results on different
346 	 * architectures.  This is not so important for integers
347 	 * because overflow is the only bad thing that can happen to
348 	 * them, but consider the command  printf %a 1.1
349 	 */
350 	if (*fmt == 'L') {
351 		mod_ldbl = 1;
352 		fmt++;
353 		if (!strchr("aAeEfFgG", *fmt)) {
354 			warnx("bad modifier L for %%%c", *fmt);
355 			return (NULL);
356 		}
357 	} else {
358 		mod_ldbl = 0;
359 	}
360 
361 	/* save the current arg offset, and set to the format arg */
362 	if (fargv != NULL) {
363 		gargv = fargv;
364 	}
365 
366 	convch = *fmt;
367 	nextch = *++fmt;
368 
369 	*fmt = '\0';
370 	switch (convch) {
371 	case 'b': {
372 		size_t len;
373 		char *p;
374 		int getout;
375 
376 		p = strdup(getstr());
377 		if (p == NULL) {
378 			warnx("%s", strerror(ENOMEM));
379 			return (NULL);
380 		}
381 		getout = escape(p, 0, &len);
382 		fputs(p, stdout);
383 		free(p);
384 		if (getout)
385 			return (end_fmt);
386 		break;
387 	}
388 	case 'c': {
389 		char p;
390 
391 		p = getchr();
392 		if (p != '\0')
393 			PF(start, p);
394 		break;
395 	}
396 	case 's': {
397 		const char *p;
398 
399 		p = getstr();
400 		PF(start, p);
401 		break;
402 	}
403 	case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': {
404 		char *f;
405 		intmax_t val;
406 		uintmax_t uval;
407 		int signedconv;
408 
409 		signedconv = (convch == 'd' || convch == 'i');
410 		if ((f = mknum(start, convch)) == NULL)
411 			return (NULL);
412 		if (getnum(&val, &uval, signedconv))
413 			*rval = 1;
414 		if (signedconv)
415 			PF(f, val);
416 		else
417 			PF(f, uval);
418 		break;
419 	}
420 	case 'e': case 'E':
421 	case 'f': case 'F':
422 	case 'g': case 'G':
423 	case 'a': case 'A': {
424 		long double p;
425 
426 		if (getfloating(&p, mod_ldbl))
427 			*rval = 1;
428 		if (mod_ldbl)
429 			PF(start, p);
430 		else
431 			PF(start, (double)p);
432 		break;
433 	}
434 	default:
435 		warnx("illegal format character %c", convch);
436 		return (NULL);
437 	}
438 	*fmt = nextch;
439 	/* return the gargv to the next element */
440 	return (fmt);
441 }
442 
443 static char *
444 mknum(char *str, char ch)
445 {
446 	static char *copy;
447 	static size_t copy_size;
448 	char *newcopy;
449 	size_t len, newlen;
450 
451 	len = strlen(str) + 2;
452 	if (len > copy_size) {
453 		newlen = ((len + 1023) >> 10) << 10;
454 		if ((newcopy = realloc(copy, newlen)) == NULL) {
455 			warnx("%s", strerror(ENOMEM));
456 			return (NULL);
457 		}
458 		copy = newcopy;
459 		copy_size = newlen;
460 	}
461 
462 	memmove(copy, str, len - 3);
463 	copy[len - 3] = 'j';
464 	copy[len - 2] = ch;
465 	copy[len - 1] = '\0';
466 	return (copy);
467 }
468 
469 static int
470 escape(char *fmt, int percent, size_t *len)
471 {
472 	char *save, *store, c;
473 	int value;
474 
475 	for (save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store) {
476 		if (c != '\\') {
477 			*store = c;
478 			continue;
479 		}
480 		switch (*++fmt) {
481 		case '\0':		/* EOS, user error */
482 			*store = '\\';
483 			*++store = '\0';
484 			*len = store - save;
485 			return (0);
486 		case '\\':		/* backslash */
487 		case '\'':		/* single quote */
488 			*store = *fmt;
489 			break;
490 		case 'a':		/* bell/alert */
491 			*store = '\a';
492 			break;
493 		case 'b':		/* backspace */
494 			*store = '\b';
495 			break;
496 		case 'c':
497 			if (!percent) {
498 				*store = '\0';
499 				*len = store - save;
500 				return (1);
501 			}
502 			*store = 'c';
503 			break;
504 		case 'f':		/* form-feed */
505 			*store = '\f';
506 			break;
507 		case 'n':		/* newline */
508 			*store = '\n';
509 			break;
510 		case 'r':		/* carriage-return */
511 			*store = '\r';
512 			break;
513 		case 't':		/* horizontal tab */
514 			*store = '\t';
515 			break;
516 		case 'v':		/* vertical tab */
517 			*store = '\v';
518 			break;
519 					/* octal constant */
520 		case '0': case '1': case '2': case '3':
521 		case '4': case '5': case '6': case '7':
522 			c = (!percent && *fmt == '0') ? 4 : 3;
523 			for (value = 0;
524 			    c-- && *fmt >= '0' && *fmt <= '7'; ++fmt) {
525 				value <<= 3;
526 				value += *fmt - '0';
527 			}
528 			--fmt;
529 			if (percent && value == '%') {
530 				*store++ = '%';
531 				*store = '%';
532 			} else
533 				*store = (char)value;
534 			break;
535 		default:
536 			*store = *fmt;
537 			break;
538 		}
539 	}
540 	*store = '\0';
541 	*len = store - save;
542 	return (0);
543 }
544 
545 static int
546 getchr(void)
547 {
548 	if (!*gargv)
549 		return ('\0');
550 	return ((int)**gargv++);
551 }
552 
553 static const char *
554 getstr(void)
555 {
556 	if (!*gargv)
557 		return ("");
558 	return (*gargv++);
559 }
560 
561 static int
562 getint(int *ip)
563 {
564 	intmax_t val;
565 	uintmax_t uval;
566 	int rval;
567 
568 	if (getnum(&val, &uval, 1))
569 		return (1);
570 	rval = 0;
571 	if (val < INT_MIN || val > INT_MAX) {
572 		warnx("%s: %s", *gargv, strerror(ERANGE));
573 		rval = 1;
574 	}
575 	*ip = (int)val;
576 	return (rval);
577 }
578 
579 static int
580 getnum(intmax_t *ip, uintmax_t *uip, int signedconv)
581 {
582 	char *ep;
583 	int rval;
584 
585 	if (!*gargv) {
586 		*ip = *uip = 0;
587 		return (0);
588 	}
589 	if (**gargv == '"' || **gargv == '\'') {
590 		if (signedconv)
591 			*ip = asciicode();
592 		else
593 			*uip = asciicode();
594 		return (0);
595 	}
596 	rval = 0;
597 	errno = 0;
598 	if (signedconv)
599 		*ip = strtoimax(*gargv, &ep, 0);
600 	else
601 		*uip = strtoumax(*gargv, &ep, 0);
602 	if (ep == *gargv) {
603 		warnx("%s: expected numeric value", *gargv);
604 		rval = 1;
605 	}
606 	else if (*ep != '\0') {
607 		warnx("%s: not completely converted", *gargv);
608 		rval = 1;
609 	}
610 	if (errno == ERANGE) {
611 		warnx("%s: %s", *gargv, strerror(ERANGE));
612 		rval = 1;
613 	}
614 	++gargv;
615 	return (rval);
616 }
617 
618 static int
619 getfloating(long double *dp, int mod_ldbl)
620 {
621 	char *ep;
622 	int rval;
623 
624 	if (!*gargv) {
625 		*dp = 0.0;
626 		return (0);
627 	}
628 	if (**gargv == '"' || **gargv == '\'') {
629 		*dp = asciicode();
630 		return (0);
631 	}
632 	rval = 0;
633 	errno = 0;
634 	if (mod_ldbl)
635 		*dp = strtold(*gargv, &ep);
636 	else
637 		*dp = strtod(*gargv, &ep);
638 	if (ep == *gargv) {
639 		warnx("%s: expected numeric value", *gargv);
640 		rval = 1;
641 	} else if (*ep != '\0') {
642 		warnx("%s: not completely converted", *gargv);
643 		rval = 1;
644 	}
645 	if (errno == ERANGE) {
646 		warnx("%s: %s", *gargv, strerror(ERANGE));
647 		rval = 1;
648 	}
649 	++gargv;
650 	return (rval);
651 }
652 
653 static int
654 asciicode(void)
655 {
656 	int ch;
657 	wchar_t wch;
658 	mbstate_t mbs;
659 
660 	ch = (unsigned char)**gargv;
661 	if (ch == '\'' || ch == '"') {
662 		memset(&mbs, 0, sizeof(mbs));
663 		switch (mbrtowc(&wch, *gargv + 1, MB_LEN_MAX, &mbs)) {
664 		case (size_t)-2:
665 		case (size_t)-1:
666 			wch = (unsigned char)gargv[0][1];
667 			break;
668 		case 0:
669 			wch = 0;
670 			break;
671 		}
672 		ch = wch;
673 	}
674 	++gargv;
675 	return (ch);
676 }
677 
678 static void
679 usage(void)
680 {
681 	(void)fprintf(stderr, "usage: printf format [arguments ...]\n");
682 }
683