xref: /freebsd/usr.bin/printf/printf.c (revision 4f8f43b06ed07e96a250855488cc531799d5b78f)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright 2018 Staysail Systems, Inc. <info@staysail.tech>
5  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
6  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
7  * Copyright (c) 1989, 1993
8  *	The Regents of the University of California.  All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * Important: This file is used both as a standalone program /usr/bin/printf
36  * and as a builtin for /bin/sh (#define SHELL).
37  */
38 
39 #ifndef SHELL
40 #ifndef lint
41 static char const copyright[] =
42 "@(#) Copyright (c) 1989, 1993\n\
43 	The Regents of the University of California.  All rights reserved.\n";
44 #endif /* not lint */
45 #endif
46 
47 #ifndef lint
48 #if 0
49 static char const sccsid[] = "@(#)printf.c	8.1 (Berkeley) 7/20/93";
50 #endif
51 #endif /* not lint */
52 
53 #include <sys/types.h>
54 
55 #include <ctype.h>
56 #include <err.h>
57 #include <errno.h>
58 #include <inttypes.h>
59 #include <limits.h>
60 #include <locale.h>
61 #include <stdio.h>
62 #include <stdlib.h>
63 #include <string.h>
64 #include <unistd.h>
65 #include <wchar.h>
66 
67 #ifdef SHELL
68 #define	main printfcmd
69 #include "bltin/bltin.h"
70 #include "options.h"
71 #endif
72 
73 #define	PF(f, func) do {						\
74 	if (havewidth)							\
75 		if (haveprec)						\
76 			(void)printf(f, fieldwidth, precision, func);	\
77 		else							\
78 			(void)printf(f, fieldwidth, func);		\
79 	else if (haveprec)						\
80 		(void)printf(f, precision, func);			\
81 	else								\
82 		(void)printf(f, func);					\
83 } while (0)
84 
85 static int	 asciicode(void);
86 static char	*printf_doformat(char *, int *);
87 static int	 escape(char *, int, size_t *);
88 static int	 getchr(void);
89 static int	 getfloating(long double *, int);
90 static int	 getint(int *);
91 static int	 getnum(intmax_t *, uintmax_t *, int);
92 static const char
93 		*getstr(void);
94 static char	*mknum(char *, char);
95 static void	 usage(void);
96 
97 static const char digits[] = "0123456789";
98 
99 static char end_fmt[1];
100 
101 static int  myargc;
102 static char **myargv;
103 static char **gargv;
104 static char **maxargv;
105 
106 int
107 main(int argc, char *argv[])
108 {
109 	size_t len;
110 	int end, rval;
111 	char *format, *fmt, *start;
112 #ifndef SHELL
113 	int ch;
114 
115 	(void) setlocale(LC_ALL, "");
116 #endif
117 
118 #ifdef SHELL
119 	nextopt("");
120 	argc -= argptr - argv;
121 	argv = argptr;
122 #else
123 	while ((ch = getopt(argc, argv, "")) != -1)
124 		switch (ch) {
125 		case '?':
126 		default:
127 			usage();
128 			return (1);
129 		}
130 	argc -= optind;
131 	argv += optind;
132 #endif
133 
134 	if (argc < 1) {
135 		usage();
136 		return (1);
137 	}
138 
139 #ifdef SHELL
140 	INTOFF;
141 #endif
142 	/*
143 	 * Basic algorithm is to scan the format string for conversion
144 	 * specifications -- once one is found, find out if the field
145 	 * width or precision is a '*'; if it is, gather up value.  Note,
146 	 * format strings are reused as necessary to use up the provided
147 	 * arguments, arguments of zero/null string are provided to use
148 	 * up the format string.
149 	 */
150 	fmt = format = *argv;
151 	escape(fmt, 1, &len);		/* backslash interpretation */
152 	rval = end = 0;
153 	gargv = ++argv;
154 
155 	for (;;) {
156 		maxargv = gargv;
157 
158 		myargv = gargv;
159 		for (myargc = 0; gargv[myargc]; myargc++)
160 			/* nop */;
161 		start = fmt;
162 		while (fmt < format + len) {
163 			if (fmt[0] == '%') {
164 				fwrite(start, 1, fmt - start, stdout);
165 				if (fmt[1] == '%') {
166 					/* %% prints a % */
167 					putchar('%');
168 					fmt += 2;
169 				} else {
170 					fmt = printf_doformat(fmt, &rval);
171 					if (fmt == NULL || fmt == end_fmt) {
172 #ifdef SHELL
173 						INTON;
174 #endif
175 						return (fmt == NULL ? 1 : rval);
176 					}
177 					end = 0;
178 				}
179 				start = fmt;
180 			} else
181 				fmt++;
182 			if (gargv > maxargv)
183 				maxargv = gargv;
184 		}
185 		gargv = maxargv;
186 
187 		if (end == 1) {
188 			warnx("missing format character");
189 #ifdef SHELL
190 			INTON;
191 #endif
192 			return (1);
193 		}
194 		fwrite(start, 1, fmt - start, stdout);
195 		if (!*gargv) {
196 #ifdef SHELL
197 			INTON;
198 #endif
199 			return (rval);
200 		}
201 		/* Restart at the beginning of the format string. */
202 		fmt = format;
203 		end = 1;
204 	}
205 	/* NOTREACHED */
206 }
207 
208 
209 static char *
210 printf_doformat(char *fmt, int *rval)
211 {
212 	static const char skip1[] = "#'-+ 0";
213 	int fieldwidth, haveprec, havewidth, mod_ldbl, precision;
214 	char convch, nextch;
215 	char start[strlen(fmt) + 1];
216 	char **fargv;
217 	char *dptr;
218 	int l;
219 
220 	dptr = start;
221 	*dptr++ = '%';
222 	*dptr = 0;
223 
224 	fmt++;
225 
226 	/* look for "n$" field index specifier */
227 	l = strspn(fmt, digits);
228 	if ((l > 0) && (fmt[l] == '$')) {
229 		int idx = atoi(fmt);
230 		if (idx <= myargc) {
231 			gargv = &myargv[idx - 1];
232 		} else {
233 			gargv = &myargv[myargc];
234 		}
235 		if (gargv > maxargv)
236 			maxargv = gargv;
237 		fmt += l + 1;
238 
239 		/* save format argument */
240 		fargv = gargv;
241 	} else {
242 		fargv = NULL;
243 	}
244 
245 	/* skip to field width */
246 	while (*fmt && strchr(skip1, *fmt) != NULL) {
247 		*dptr++ = *fmt++;
248 		*dptr = 0;
249 	}
250 
251 	if (*fmt == '*') {
252 
253 		fmt++;
254 		l = strspn(fmt, digits);
255 		if ((l > 0) && (fmt[l] == '$')) {
256 			int idx = atoi(fmt);
257 			if (fargv == NULL) {
258 				warnx("incomplete use of n$");
259 				return (NULL);
260 			}
261 			if (idx <= myargc) {
262 				gargv = &myargv[idx - 1];
263 			} else {
264 				gargv = &myargv[myargc];
265 			}
266 			fmt += l + 1;
267 		} else if (fargv != NULL) {
268 			warnx("incomplete use of n$");
269 			return (NULL);
270 		}
271 
272 		if (getint(&fieldwidth))
273 			return (NULL);
274 		if (gargv > maxargv)
275 			maxargv = gargv;
276 		havewidth = 1;
277 
278 		*dptr++ = '*';
279 		*dptr = 0;
280 	} else {
281 		havewidth = 0;
282 
283 		/* skip to possible '.', get following precision */
284 		while (isdigit(*fmt)) {
285 			*dptr++ = *fmt++;
286 			*dptr = 0;
287 		}
288 	}
289 
290 	if (*fmt == '.') {
291 		/* precision present? */
292 		fmt++;
293 		*dptr++ = '.';
294 
295 		if (*fmt == '*') {
296 
297 			fmt++;
298 			l = strspn(fmt, digits);
299 			if ((l > 0) && (fmt[l] == '$')) {
300 				int idx = atoi(fmt);
301 				if (fargv == NULL) {
302 					warnx("incomplete use of n$");
303 					return (NULL);
304 				}
305 				if (idx <= myargc) {
306 					gargv = &myargv[idx - 1];
307 				} else {
308 					gargv = &myargv[myargc];
309 				}
310 				fmt += l + 1;
311 			} else if (fargv != NULL) {
312 				warnx("incomplete use of n$");
313 				return (NULL);
314 			}
315 
316 			if (getint(&precision))
317 				return (NULL);
318 			if (gargv > maxargv)
319 				maxargv = gargv;
320 			haveprec = 1;
321 			*dptr++ = '*';
322 			*dptr = 0;
323 		} else {
324 			haveprec = 0;
325 
326 			/* skip to conversion char */
327 			while (isdigit(*fmt)) {
328 				*dptr++ = *fmt++;
329 				*dptr = 0;
330 			}
331 		}
332 	} else
333 		haveprec = 0;
334 	if (!*fmt) {
335 		warnx("missing format character");
336 		return (NULL);
337 	}
338 	*dptr++ = *fmt;
339 	*dptr = 0;
340 
341 	/*
342 	 * Look for a length modifier.  POSIX doesn't have these, so
343 	 * we only support them for floating-point conversions, which
344 	 * are extensions.  This is useful because the L modifier can
345 	 * be used to gain extra range and precision, while omitting
346 	 * it is more likely to produce consistent results on different
347 	 * architectures.  This is not so important for integers
348 	 * because overflow is the only bad thing that can happen to
349 	 * them, but consider the command  printf %a 1.1
350 	 */
351 	if (*fmt == 'L') {
352 		mod_ldbl = 1;
353 		fmt++;
354 		if (!strchr("aAeEfFgG", *fmt)) {
355 			warnx("bad modifier L for %%%c", *fmt);
356 			return (NULL);
357 		}
358 	} else {
359 		mod_ldbl = 0;
360 	}
361 
362 	/* save the current arg offset, and set to the format arg */
363 	if (fargv != NULL) {
364 		gargv = fargv;
365 	}
366 
367 	convch = *fmt;
368 	nextch = *++fmt;
369 
370 	*fmt = '\0';
371 	switch (convch) {
372 	case 'b': {
373 		size_t len;
374 		char *p;
375 		int getout;
376 
377 		/* Convert "b" to "s" for output. */
378 		start[strlen(start) - 1] = 's';
379 		if ((p = strdup(getstr())) == NULL) {
380 			warnx("%s", strerror(ENOMEM));
381 			return (NULL);
382 		}
383 		getout = escape(p, 0, &len);
384 		PF(start, p);
385 		/* Restore format for next loop. */
386 
387 		free(p);
388 		if (getout)
389 			return (end_fmt);
390 		break;
391 	}
392 	case 'c': {
393 		char p;
394 
395 		p = getchr();
396 		if (p != '\0')
397 			PF(start, p);
398 		break;
399 	}
400 	case 's': {
401 		const char *p;
402 
403 		p = getstr();
404 		PF(start, p);
405 		break;
406 	}
407 	case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': {
408 		char *f;
409 		intmax_t val;
410 		uintmax_t uval;
411 		int signedconv;
412 
413 		signedconv = (convch == 'd' || convch == 'i');
414 		if ((f = mknum(start, convch)) == NULL)
415 			return (NULL);
416 		if (getnum(&val, &uval, signedconv))
417 			*rval = 1;
418 		if (signedconv)
419 			PF(f, val);
420 		else
421 			PF(f, uval);
422 		break;
423 	}
424 	case 'e': case 'E':
425 	case 'f': case 'F':
426 	case 'g': case 'G':
427 	case 'a': case 'A': {
428 		long double p;
429 
430 		if (getfloating(&p, mod_ldbl))
431 			*rval = 1;
432 		if (mod_ldbl)
433 			PF(start, p);
434 		else
435 			PF(start, (double)p);
436 		break;
437 	}
438 	default:
439 		warnx("illegal format character %c", convch);
440 		return (NULL);
441 	}
442 	*fmt = nextch;
443 	/* return the gargv to the next element */
444 	return (fmt);
445 }
446 
447 static char *
448 mknum(char *str, char ch)
449 {
450 	static char *copy;
451 	static size_t copy_size;
452 	char *newcopy;
453 	size_t len, newlen;
454 
455 	len = strlen(str) + 2;
456 	if (len > copy_size) {
457 		newlen = ((len + 1023) >> 10) << 10;
458 		if ((newcopy = realloc(copy, newlen)) == NULL) {
459 			warnx("%s", strerror(ENOMEM));
460 			return (NULL);
461 		}
462 		copy = newcopy;
463 		copy_size = newlen;
464 	}
465 
466 	memmove(copy, str, len - 3);
467 	copy[len - 3] = 'j';
468 	copy[len - 2] = ch;
469 	copy[len - 1] = '\0';
470 	return (copy);
471 }
472 
473 static int
474 escape(char *fmt, int percent, size_t *len)
475 {
476 	char *save, *store, c;
477 	int value;
478 
479 	for (save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store) {
480 		if (c != '\\') {
481 			*store = c;
482 			continue;
483 		}
484 		switch (*++fmt) {
485 		case '\0':		/* EOS, user error */
486 			*store = '\\';
487 			*++store = '\0';
488 			*len = store - save;
489 			return (0);
490 		case '\\':		/* backslash */
491 		case '\'':		/* single quote */
492 			*store = *fmt;
493 			break;
494 		case 'a':		/* bell/alert */
495 			*store = '\a';
496 			break;
497 		case 'b':		/* backspace */
498 			*store = '\b';
499 			break;
500 		case 'c':
501 			if (!percent) {
502 				*store = '\0';
503 				*len = store - save;
504 				return (1);
505 			}
506 			*store = 'c';
507 			break;
508 		case 'f':		/* form-feed */
509 			*store = '\f';
510 			break;
511 		case 'n':		/* newline */
512 			*store = '\n';
513 			break;
514 		case 'r':		/* carriage-return */
515 			*store = '\r';
516 			break;
517 		case 't':		/* horizontal tab */
518 			*store = '\t';
519 			break;
520 		case 'v':		/* vertical tab */
521 			*store = '\v';
522 			break;
523 					/* octal constant */
524 		case '0': case '1': case '2': case '3':
525 		case '4': case '5': case '6': case '7':
526 			c = (!percent && *fmt == '0') ? 4 : 3;
527 			for (value = 0;
528 			    c-- && *fmt >= '0' && *fmt <= '7'; ++fmt) {
529 				value <<= 3;
530 				value += *fmt - '0';
531 			}
532 			--fmt;
533 			if (percent && value == '%') {
534 				*store++ = '%';
535 				*store = '%';
536 			} else
537 				*store = (char)value;
538 			break;
539 		default:
540 			*store = *fmt;
541 			break;
542 		}
543 	}
544 	*store = '\0';
545 	*len = store - save;
546 	return (0);
547 }
548 
549 static int
550 getchr(void)
551 {
552 	if (!*gargv)
553 		return ('\0');
554 	return ((int)**gargv++);
555 }
556 
557 static const char *
558 getstr(void)
559 {
560 	if (!*gargv)
561 		return ("");
562 	return (*gargv++);
563 }
564 
565 static int
566 getint(int *ip)
567 {
568 	intmax_t val;
569 	uintmax_t uval;
570 	int rval;
571 
572 	if (getnum(&val, &uval, 1))
573 		return (1);
574 	rval = 0;
575 	if (val < INT_MIN || val > INT_MAX) {
576 		warnx("%s: %s", *gargv, strerror(ERANGE));
577 		rval = 1;
578 	}
579 	*ip = (int)val;
580 	return (rval);
581 }
582 
583 static int
584 getnum(intmax_t *ip, uintmax_t *uip, int signedconv)
585 {
586 	char *ep;
587 	int rval;
588 
589 	if (!*gargv) {
590 		*ip = *uip = 0;
591 		return (0);
592 	}
593 	if (**gargv == '"' || **gargv == '\'') {
594 		if (signedconv)
595 			*ip = asciicode();
596 		else
597 			*uip = asciicode();
598 		return (0);
599 	}
600 	rval = 0;
601 	errno = 0;
602 	if (signedconv)
603 		*ip = strtoimax(*gargv, &ep, 0);
604 	else
605 		*uip = strtoumax(*gargv, &ep, 0);
606 	if (ep == *gargv) {
607 		warnx("%s: expected numeric value", *gargv);
608 		rval = 1;
609 	}
610 	else if (*ep != '\0') {
611 		warnx("%s: not completely converted", *gargv);
612 		rval = 1;
613 	}
614 	if (errno == ERANGE) {
615 		warnx("%s: %s", *gargv, strerror(ERANGE));
616 		rval = 1;
617 	}
618 	++gargv;
619 	return (rval);
620 }
621 
622 static int
623 getfloating(long double *dp, int mod_ldbl)
624 {
625 	char *ep;
626 	int rval;
627 
628 	if (!*gargv) {
629 		*dp = 0.0;
630 		return (0);
631 	}
632 	if (**gargv == '"' || **gargv == '\'') {
633 		*dp = asciicode();
634 		return (0);
635 	}
636 	rval = 0;
637 	errno = 0;
638 	if (mod_ldbl)
639 		*dp = strtold(*gargv, &ep);
640 	else
641 		*dp = strtod(*gargv, &ep);
642 	if (ep == *gargv) {
643 		warnx("%s: expected numeric value", *gargv);
644 		rval = 1;
645 	} else if (*ep != '\0') {
646 		warnx("%s: not completely converted", *gargv);
647 		rval = 1;
648 	}
649 	if (errno == ERANGE) {
650 		warnx("%s: %s", *gargv, strerror(ERANGE));
651 		rval = 1;
652 	}
653 	++gargv;
654 	return (rval);
655 }
656 
657 static int
658 asciicode(void)
659 {
660 	int ch;
661 	wchar_t wch;
662 	mbstate_t mbs;
663 
664 	ch = (unsigned char)**gargv;
665 	if (ch == '\'' || ch == '"') {
666 		memset(&mbs, 0, sizeof(mbs));
667 		switch (mbrtowc(&wch, *gargv + 1, MB_LEN_MAX, &mbs)) {
668 		case (size_t)-2:
669 		case (size_t)-1:
670 			wch = (unsigned char)gargv[0][1];
671 			break;
672 		case 0:
673 			wch = 0;
674 			break;
675 		}
676 		ch = wch;
677 	}
678 	++gargv;
679 	return (ch);
680 }
681 
682 static void
683 usage(void)
684 {
685 	(void)fprintf(stderr, "usage: printf format [arguments ...]\n");
686 }
687