xref: /freebsd/usr.bin/printf/printf.c (revision 5ca8e32633c4ffbbcd6762e5888b6a4ba0708c6c)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright 2018 Staysail Systems, Inc. <info@staysail.tech>
5  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
6  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
7  * Copyright (c) 1989, 1993
8  *	The Regents of the University of California.  All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * Important: This file is used both as a standalone program /usr/bin/printf
36  * and as a builtin for /bin/sh (#define SHELL).
37  */
38 
39 #include <sys/types.h>
40 
41 #include <ctype.h>
42 #include <err.h>
43 #include <errno.h>
44 #include <inttypes.h>
45 #include <limits.h>
46 #include <locale.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <wchar.h>
52 
53 #ifdef SHELL
54 #define	main printfcmd
55 #include "bltin/bltin.h"
56 #include "options.h"
57 #endif
58 
59 #define	PF(f, func) do {						\
60 	if (havewidth)							\
61 		if (haveprec)						\
62 			(void)printf(f, fieldwidth, precision, func);	\
63 		else							\
64 			(void)printf(f, fieldwidth, func);		\
65 	else if (haveprec)						\
66 		(void)printf(f, precision, func);			\
67 	else								\
68 		(void)printf(f, func);					\
69 } while (0)
70 
71 static int	 asciicode(void);
72 static char	*printf_doformat(char *, int *);
73 static int	 escape(char *, int, size_t *);
74 static int	 getchr(void);
75 static int	 getfloating(long double *, int);
76 static int	 getint(int *);
77 static int	 getnum(intmax_t *, uintmax_t *, int);
78 static const char
79 		*getstr(void);
80 static char	*mknum(char *, char);
81 static void	 usage(void);
82 
83 static const char digits[] = "0123456789";
84 
85 static char end_fmt[1];
86 
87 static int  myargc;
88 static char **myargv;
89 static char **gargv;
90 static char **maxargv;
91 
92 int
93 main(int argc, char *argv[])
94 {
95 	size_t len;
96 	int end, rval;
97 	char *format, *fmt, *start;
98 #ifndef SHELL
99 	int ch;
100 
101 	(void) setlocale(LC_ALL, "");
102 #endif
103 
104 #ifdef SHELL
105 	nextopt("");
106 	argc -= argptr - argv;
107 	argv = argptr;
108 #else
109 	while ((ch = getopt(argc, argv, "")) != -1)
110 		switch (ch) {
111 		case '?':
112 		default:
113 			usage();
114 			return (1);
115 		}
116 	argc -= optind;
117 	argv += optind;
118 #endif
119 
120 	if (argc < 1) {
121 		usage();
122 		return (1);
123 	}
124 
125 #ifdef SHELL
126 	INTOFF;
127 #endif
128 	/*
129 	 * Basic algorithm is to scan the format string for conversion
130 	 * specifications -- once one is found, find out if the field
131 	 * width or precision is a '*'; if it is, gather up value.  Note,
132 	 * format strings are reused as necessary to use up the provided
133 	 * arguments, arguments of zero/null string are provided to use
134 	 * up the format string.
135 	 */
136 	fmt = format = *argv;
137 	escape(fmt, 1, &len);		/* backslash interpretation */
138 	rval = end = 0;
139 	gargv = ++argv;
140 
141 	for (;;) {
142 		maxargv = gargv;
143 
144 		myargv = gargv;
145 		for (myargc = 0; gargv[myargc]; myargc++)
146 			/* nop */;
147 		start = fmt;
148 		while (fmt < format + len) {
149 			if (fmt[0] == '%') {
150 				fwrite(start, 1, fmt - start, stdout);
151 				if (fmt[1] == '%') {
152 					/* %% prints a % */
153 					putchar('%');
154 					fmt += 2;
155 				} else {
156 					fmt = printf_doformat(fmt, &rval);
157 					if (fmt == NULL || fmt == end_fmt) {
158 #ifdef SHELL
159 						INTON;
160 #endif
161 						return (fmt == NULL ? 1 : rval);
162 					}
163 					end = 0;
164 				}
165 				start = fmt;
166 			} else
167 				fmt++;
168 			if (gargv > maxargv)
169 				maxargv = gargv;
170 		}
171 		gargv = maxargv;
172 
173 		if (end == 1) {
174 			warnx("missing format character");
175 #ifdef SHELL
176 			INTON;
177 #endif
178 			return (1);
179 		}
180 		fwrite(start, 1, fmt - start, stdout);
181 		if (!*gargv) {
182 #ifdef SHELL
183 			INTON;
184 #endif
185 			return (rval);
186 		}
187 		/* Restart at the beginning of the format string. */
188 		fmt = format;
189 		end = 1;
190 	}
191 	/* NOTREACHED */
192 }
193 
194 
195 static char *
196 printf_doformat(char *fmt, int *rval)
197 {
198 	static const char skip1[] = "#'-+ 0";
199 	int fieldwidth, haveprec, havewidth, mod_ldbl, precision;
200 	char convch, nextch;
201 	char start[strlen(fmt) + 1];
202 	char **fargv;
203 	char *dptr;
204 	int l;
205 
206 	dptr = start;
207 	*dptr++ = '%';
208 	*dptr = 0;
209 
210 	fmt++;
211 
212 	/* look for "n$" field index specifier */
213 	l = strspn(fmt, digits);
214 	if ((l > 0) && (fmt[l] == '$')) {
215 		int idx = atoi(fmt);
216 		if (idx <= myargc) {
217 			gargv = &myargv[idx - 1];
218 		} else {
219 			gargv = &myargv[myargc];
220 		}
221 		if (gargv > maxargv)
222 			maxargv = gargv;
223 		fmt += l + 1;
224 
225 		/* save format argument */
226 		fargv = gargv;
227 	} else {
228 		fargv = NULL;
229 	}
230 
231 	/* skip to field width */
232 	while (*fmt && strchr(skip1, *fmt) != NULL) {
233 		*dptr++ = *fmt++;
234 		*dptr = 0;
235 	}
236 
237 	if (*fmt == '*') {
238 
239 		fmt++;
240 		l = strspn(fmt, digits);
241 		if ((l > 0) && (fmt[l] == '$')) {
242 			int idx = atoi(fmt);
243 			if (fargv == NULL) {
244 				warnx("incomplete use of n$");
245 				return (NULL);
246 			}
247 			if (idx <= myargc) {
248 				gargv = &myargv[idx - 1];
249 			} else {
250 				gargv = &myargv[myargc];
251 			}
252 			fmt += l + 1;
253 		} else if (fargv != NULL) {
254 			warnx("incomplete use of n$");
255 			return (NULL);
256 		}
257 
258 		if (getint(&fieldwidth))
259 			return (NULL);
260 		if (gargv > maxargv)
261 			maxargv = gargv;
262 		havewidth = 1;
263 
264 		*dptr++ = '*';
265 		*dptr = 0;
266 	} else {
267 		havewidth = 0;
268 
269 		/* skip to possible '.', get following precision */
270 		while (isdigit(*fmt)) {
271 			*dptr++ = *fmt++;
272 			*dptr = 0;
273 		}
274 	}
275 
276 	if (*fmt == '.') {
277 		/* precision present? */
278 		fmt++;
279 		*dptr++ = '.';
280 
281 		if (*fmt == '*') {
282 
283 			fmt++;
284 			l = strspn(fmt, digits);
285 			if ((l > 0) && (fmt[l] == '$')) {
286 				int idx = atoi(fmt);
287 				if (fargv == NULL) {
288 					warnx("incomplete use of n$");
289 					return (NULL);
290 				}
291 				if (idx <= myargc) {
292 					gargv = &myargv[idx - 1];
293 				} else {
294 					gargv = &myargv[myargc];
295 				}
296 				fmt += l + 1;
297 			} else if (fargv != NULL) {
298 				warnx("incomplete use of n$");
299 				return (NULL);
300 			}
301 
302 			if (getint(&precision))
303 				return (NULL);
304 			if (gargv > maxargv)
305 				maxargv = gargv;
306 			haveprec = 1;
307 			*dptr++ = '*';
308 			*dptr = 0;
309 		} else {
310 			haveprec = 0;
311 
312 			/* skip to conversion char */
313 			while (isdigit(*fmt)) {
314 				*dptr++ = *fmt++;
315 				*dptr = 0;
316 			}
317 		}
318 	} else
319 		haveprec = 0;
320 	if (!*fmt) {
321 		warnx("missing format character");
322 		return (NULL);
323 	}
324 	*dptr++ = *fmt;
325 	*dptr = 0;
326 
327 	/*
328 	 * Look for a length modifier.  POSIX doesn't have these, so
329 	 * we only support them for floating-point conversions, which
330 	 * are extensions.  This is useful because the L modifier can
331 	 * be used to gain extra range and precision, while omitting
332 	 * it is more likely to produce consistent results on different
333 	 * architectures.  This is not so important for integers
334 	 * because overflow is the only bad thing that can happen to
335 	 * them, but consider the command  printf %a 1.1
336 	 */
337 	if (*fmt == 'L') {
338 		mod_ldbl = 1;
339 		fmt++;
340 		if (!strchr("aAeEfFgG", *fmt)) {
341 			warnx("bad modifier L for %%%c", *fmt);
342 			return (NULL);
343 		}
344 	} else {
345 		mod_ldbl = 0;
346 	}
347 
348 	/* save the current arg offset, and set to the format arg */
349 	if (fargv != NULL) {
350 		gargv = fargv;
351 	}
352 
353 	convch = *fmt;
354 	nextch = *++fmt;
355 
356 	*fmt = '\0';
357 	switch (convch) {
358 	case 'b': {
359 		size_t len;
360 		char *p;
361 		int getout;
362 
363 		/* Convert "b" to "s" for output. */
364 		start[strlen(start) - 1] = 's';
365 		if ((p = strdup(getstr())) == NULL) {
366 			warnx("%s", strerror(ENOMEM));
367 			return (NULL);
368 		}
369 		getout = escape(p, 0, &len);
370 		PF(start, p);
371 		/* Restore format for next loop. */
372 
373 		free(p);
374 		if (getout)
375 			return (end_fmt);
376 		break;
377 	}
378 	case 'c': {
379 		char p;
380 
381 		p = getchr();
382 		if (p != '\0')
383 			PF(start, p);
384 		break;
385 	}
386 	case 's': {
387 		const char *p;
388 
389 		p = getstr();
390 		PF(start, p);
391 		break;
392 	}
393 	case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': {
394 		char *f;
395 		intmax_t val;
396 		uintmax_t uval;
397 		int signedconv;
398 
399 		signedconv = (convch == 'd' || convch == 'i');
400 		if ((f = mknum(start, convch)) == NULL)
401 			return (NULL);
402 		if (getnum(&val, &uval, signedconv))
403 			*rval = 1;
404 		if (signedconv)
405 			PF(f, val);
406 		else
407 			PF(f, uval);
408 		break;
409 	}
410 	case 'e': case 'E':
411 	case 'f': case 'F':
412 	case 'g': case 'G':
413 	case 'a': case 'A': {
414 		long double p;
415 
416 		if (getfloating(&p, mod_ldbl))
417 			*rval = 1;
418 		if (mod_ldbl)
419 			PF(start, p);
420 		else
421 			PF(start, (double)p);
422 		break;
423 	}
424 	default:
425 		warnx("illegal format character %c", convch);
426 		return (NULL);
427 	}
428 	*fmt = nextch;
429 	/* return the gargv to the next element */
430 	return (fmt);
431 }
432 
433 static char *
434 mknum(char *str, char ch)
435 {
436 	static char *copy;
437 	static size_t copy_size;
438 	char *newcopy;
439 	size_t len, newlen;
440 
441 	len = strlen(str) + 2;
442 	if (len > copy_size) {
443 		newlen = ((len + 1023) >> 10) << 10;
444 		if ((newcopy = realloc(copy, newlen)) == NULL) {
445 			warnx("%s", strerror(ENOMEM));
446 			return (NULL);
447 		}
448 		copy = newcopy;
449 		copy_size = newlen;
450 	}
451 
452 	memmove(copy, str, len - 3);
453 	copy[len - 3] = 'j';
454 	copy[len - 2] = ch;
455 	copy[len - 1] = '\0';
456 	return (copy);
457 }
458 
459 static int
460 escape(char *fmt, int percent, size_t *len)
461 {
462 	char *save, *store, c;
463 	int value;
464 
465 	for (save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store) {
466 		if (c != '\\') {
467 			*store = c;
468 			continue;
469 		}
470 		switch (*++fmt) {
471 		case '\0':		/* EOS, user error */
472 			*store = '\\';
473 			*++store = '\0';
474 			*len = store - save;
475 			return (0);
476 		case '\\':		/* backslash */
477 		case '\'':		/* single quote */
478 			*store = *fmt;
479 			break;
480 		case 'a':		/* bell/alert */
481 			*store = '\a';
482 			break;
483 		case 'b':		/* backspace */
484 			*store = '\b';
485 			break;
486 		case 'c':
487 			if (!percent) {
488 				*store = '\0';
489 				*len = store - save;
490 				return (1);
491 			}
492 			*store = 'c';
493 			break;
494 		case 'f':		/* form-feed */
495 			*store = '\f';
496 			break;
497 		case 'n':		/* newline */
498 			*store = '\n';
499 			break;
500 		case 'r':		/* carriage-return */
501 			*store = '\r';
502 			break;
503 		case 't':		/* horizontal tab */
504 			*store = '\t';
505 			break;
506 		case 'v':		/* vertical tab */
507 			*store = '\v';
508 			break;
509 					/* octal constant */
510 		case '0': case '1': case '2': case '3':
511 		case '4': case '5': case '6': case '7':
512 			c = (!percent && *fmt == '0') ? 4 : 3;
513 			for (value = 0;
514 			    c-- && *fmt >= '0' && *fmt <= '7'; ++fmt) {
515 				value <<= 3;
516 				value += *fmt - '0';
517 			}
518 			--fmt;
519 			if (percent && value == '%') {
520 				*store++ = '%';
521 				*store = '%';
522 			} else
523 				*store = (char)value;
524 			break;
525 		default:
526 			*store = *fmt;
527 			break;
528 		}
529 	}
530 	*store = '\0';
531 	*len = store - save;
532 	return (0);
533 }
534 
535 static int
536 getchr(void)
537 {
538 	if (!*gargv)
539 		return ('\0');
540 	return ((int)**gargv++);
541 }
542 
543 static const char *
544 getstr(void)
545 {
546 	if (!*gargv)
547 		return ("");
548 	return (*gargv++);
549 }
550 
551 static int
552 getint(int *ip)
553 {
554 	intmax_t val;
555 	uintmax_t uval;
556 	int rval;
557 
558 	if (getnum(&val, &uval, 1))
559 		return (1);
560 	rval = 0;
561 	if (val < INT_MIN || val > INT_MAX) {
562 		warnx("%s: %s", *gargv, strerror(ERANGE));
563 		rval = 1;
564 	}
565 	*ip = (int)val;
566 	return (rval);
567 }
568 
569 static int
570 getnum(intmax_t *ip, uintmax_t *uip, int signedconv)
571 {
572 	char *ep;
573 	int rval;
574 
575 	if (!*gargv) {
576 		*ip = *uip = 0;
577 		return (0);
578 	}
579 	if (**gargv == '"' || **gargv == '\'') {
580 		if (signedconv)
581 			*ip = asciicode();
582 		else
583 			*uip = asciicode();
584 		return (0);
585 	}
586 	rval = 0;
587 	errno = 0;
588 	if (signedconv)
589 		*ip = strtoimax(*gargv, &ep, 0);
590 	else
591 		*uip = strtoumax(*gargv, &ep, 0);
592 	if (ep == *gargv) {
593 		warnx("%s: expected numeric value", *gargv);
594 		rval = 1;
595 	}
596 	else if (*ep != '\0') {
597 		warnx("%s: not completely converted", *gargv);
598 		rval = 1;
599 	}
600 	if (errno == ERANGE) {
601 		warnx("%s: %s", *gargv, strerror(ERANGE));
602 		rval = 1;
603 	}
604 	++gargv;
605 	return (rval);
606 }
607 
608 static int
609 getfloating(long double *dp, int mod_ldbl)
610 {
611 	char *ep;
612 	int rval;
613 
614 	if (!*gargv) {
615 		*dp = 0.0;
616 		return (0);
617 	}
618 	if (**gargv == '"' || **gargv == '\'') {
619 		*dp = asciicode();
620 		return (0);
621 	}
622 	rval = 0;
623 	errno = 0;
624 	if (mod_ldbl)
625 		*dp = strtold(*gargv, &ep);
626 	else
627 		*dp = strtod(*gargv, &ep);
628 	if (ep == *gargv) {
629 		warnx("%s: expected numeric value", *gargv);
630 		rval = 1;
631 	} else if (*ep != '\0') {
632 		warnx("%s: not completely converted", *gargv);
633 		rval = 1;
634 	}
635 	if (errno == ERANGE) {
636 		warnx("%s: %s", *gargv, strerror(ERANGE));
637 		rval = 1;
638 	}
639 	++gargv;
640 	return (rval);
641 }
642 
643 static int
644 asciicode(void)
645 {
646 	int ch;
647 	wchar_t wch;
648 	mbstate_t mbs;
649 
650 	ch = (unsigned char)**gargv;
651 	if (ch == '\'' || ch == '"') {
652 		memset(&mbs, 0, sizeof(mbs));
653 		switch (mbrtowc(&wch, *gargv + 1, MB_LEN_MAX, &mbs)) {
654 		case (size_t)-2:
655 		case (size_t)-1:
656 			wch = (unsigned char)gargv[0][1];
657 			break;
658 		case 0:
659 			wch = 0;
660 			break;
661 		}
662 		ch = wch;
663 	}
664 	++gargv;
665 	return (ch);
666 }
667 
668 static void
669 usage(void)
670 {
671 	(void)fprintf(stderr, "usage: printf format [arguments ...]\n");
672 }
673