xref: /freebsd/usr.bin/printf/printf.c (revision 3b8f08459569bf0faa21473e5cec2491e95c9349)
1 /*-
2  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
3  * Copyright (c) 1989, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 4. Neither the name of the University nor the names of its contributors
15  *    may be used to endorse or promote products derived from this software
16  *    without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 /*
31  * Important: This file is used both as a standalone program /usr/bin/printf
32  * and as a builtin for /bin/sh (#define SHELL).
33  */
34 
35 #ifndef SHELL
36 #ifndef lint
37 static char const copyright[] =
38 "@(#) Copyright (c) 1989, 1993\n\
39 	The Regents of the University of California.  All rights reserved.\n";
40 #endif /* not lint */
41 #endif
42 
43 #ifndef lint
44 #if 0
45 static char const sccsid[] = "@(#)printf.c	8.1 (Berkeley) 7/20/93";
46 #endif
47 static const char rcsid[] =
48   "$FreeBSD$";
49 #endif /* not lint */
50 
51 #include <sys/types.h>
52 
53 #include <err.h>
54 #include <errno.h>
55 #include <inttypes.h>
56 #include <limits.h>
57 #include <locale.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <unistd.h>
62 #include <wchar.h>
63 
64 #ifdef SHELL
65 #define	main printfcmd
66 #include "bltin/bltin.h"
67 #include "error.h"
68 #include "options.h"
69 #endif
70 
71 #define	PF(f, func) do {						\
72 	char *b = NULL;							\
73 	int dollar = 0;							\
74 	if (*f == '$') 	{						\
75 		dollar++;						\
76 		*f = '%';						\
77 	} 								\
78 	if (havewidth)							\
79 		if (haveprec)						\
80 			(void)asprintf(&b, f, fieldwidth, precision, func); \
81 		else							\
82 			(void)asprintf(&b, f, fieldwidth, func);	\
83 	else if (haveprec)						\
84 		(void)asprintf(&b, f, precision, func);			\
85 	else								\
86 		(void)asprintf(&b, f, func);				\
87 	if (b) {							\
88 		(void)fputs(b, stdout);					\
89 		free(b);						\
90 	}								\
91 	if (dollar)							\
92 		*f = '$';						\
93 } while (0)
94 
95 static int	 asciicode(void);
96 static char	*printf_doformat(char *, int *);
97 static int	 escape(char *, int, size_t *);
98 static int	 getchr(void);
99 static int	 getfloating(long double *, int);
100 static int	 getint(int *);
101 static int	 getnum(intmax_t *, uintmax_t *, int);
102 static const char
103 		*getstr(void);
104 static char	*mknum(char *, char);
105 static void	 usage(void);
106 
107 static int  myargc;
108 static char **myargv;
109 static char **gargv;
110 
111 int
112 main(int argc, char *argv[])
113 {
114 	size_t len;
115 	int chopped, end, rval;
116 	char *format, *fmt, *start;
117 #ifndef SHELL
118 	int ch;
119 
120 	(void) setlocale(LC_ALL, "");
121 #endif
122 
123 #ifdef SHELL
124 	nextopt("");
125 	argc -= argptr - argv;
126 	argv = argptr;
127 #else
128 	while ((ch = getopt(argc, argv, "")) != -1)
129 		switch (ch) {
130 		case '?':
131 		default:
132 			usage();
133 			return (1);
134 		}
135 	argc -= optind;
136 	argv += optind;
137 #endif
138 
139 	if (argc < 1) {
140 		usage();
141 		return (1);
142 	}
143 
144 #ifdef SHELL
145 	INTOFF;
146 #endif
147 	/*
148 	 * Basic algorithm is to scan the format string for conversion
149 	 * specifications -- once one is found, find out if the field
150 	 * width or precision is a '*'; if it is, gather up value.  Note,
151 	 * format strings are reused as necessary to use up the provided
152 	 * arguments, arguments of zero/null string are provided to use
153 	 * up the format string.
154 	 */
155 	fmt = format = *argv;
156 	chopped = escape(fmt, 1, &len);		/* backslash interpretation */
157 	rval = end = 0;
158 	gargv = ++argv;
159 
160 	for (;;) {
161 		char **maxargv = gargv;
162 
163 		myargv = gargv;
164 		for (myargc = 0; gargv[myargc]; myargc++)
165 			/* nop */;
166 		start = fmt;
167 		while (fmt < format + len) {
168 			if (fmt[0] == '%') {
169 				fwrite(start, 1, fmt - start, stdout);
170 				if (fmt[1] == '%') {
171 					/* %% prints a % */
172 					putchar('%');
173 					fmt += 2;
174 				} else {
175 					fmt = printf_doformat(fmt, &rval);
176 					if (fmt == NULL) {
177 #ifdef SHELL
178 						INTON;
179 #endif
180 						return (1);
181 					}
182 					end = 0;
183 				}
184 				start = fmt;
185 			} else
186 				fmt++;
187 			if (gargv > maxargv)
188 				maxargv = gargv;
189 		}
190 		gargv = maxargv;
191 
192 		if (end == 1) {
193 			warnx("missing format character");
194 #ifdef SHELL
195 			INTON;
196 #endif
197 			return (1);
198 		}
199 		fwrite(start, 1, fmt - start, stdout);
200 		if (chopped || !*gargv) {
201 #ifdef SHELL
202 			INTON;
203 #endif
204 			return (rval);
205 		}
206 		/* Restart at the beginning of the format string. */
207 		fmt = format;
208 		end = 1;
209 	}
210 	/* NOTREACHED */
211 }
212 
213 
214 static char *
215 printf_doformat(char *start, int *rval)
216 {
217 	static const char skip1[] = "#'-+ 0";
218 	static const char skip2[] = "0123456789";
219 	char *fmt;
220 	int fieldwidth, haveprec, havewidth, mod_ldbl, precision;
221 	char convch, nextch;
222 
223 	fmt = start + 1;
224 
225 	/* look for "n$" field index specifier */
226 	fmt += strspn(fmt, skip2);
227 	if ((*fmt == '$') && (fmt != (start + 1))) {
228 		int idx = atoi(start + 1);
229 		if (idx <= myargc) {
230 			gargv = &myargv[idx - 1];
231 		} else {
232 			gargv = &myargv[myargc];
233 		}
234 		start = fmt;
235 		fmt++;
236 	} else {
237 		fmt = start + 1;
238 	}
239 
240 	/* skip to field width */
241 	fmt += strspn(fmt, skip1);
242 	if (*fmt == '*') {
243 		if (getint(&fieldwidth))
244 			return (NULL);
245 		havewidth = 1;
246 		++fmt;
247 	} else {
248 		havewidth = 0;
249 
250 		/* skip to possible '.', get following precision */
251 		fmt += strspn(fmt, skip2);
252 	}
253 	if (*fmt == '.') {
254 		/* precision present? */
255 		++fmt;
256 		if (*fmt == '*') {
257 			if (getint(&precision))
258 				return (NULL);
259 			haveprec = 1;
260 			++fmt;
261 		} else {
262 			haveprec = 0;
263 
264 			/* skip to conversion char */
265 			fmt += strspn(fmt, skip2);
266 		}
267 	} else
268 		haveprec = 0;
269 	if (!*fmt) {
270 		warnx("missing format character");
271 		return (NULL);
272 	}
273 
274 	/*
275 	 * Look for a length modifier.  POSIX doesn't have these, so
276 	 * we only support them for floating-point conversions, which
277 	 * are extensions.  This is useful because the L modifier can
278 	 * be used to gain extra range and precision, while omitting
279 	 * it is more likely to produce consistent results on different
280 	 * architectures.  This is not so important for integers
281 	 * because overflow is the only bad thing that can happen to
282 	 * them, but consider the command  printf %a 1.1
283 	 */
284 	if (*fmt == 'L') {
285 		mod_ldbl = 1;
286 		fmt++;
287 		if (!strchr("aAeEfFgG", *fmt)) {
288 			warnx("bad modifier L for %%%c", *fmt);
289 			return (NULL);
290 		}
291 	} else {
292 		mod_ldbl = 0;
293 	}
294 
295 	convch = *fmt;
296 	nextch = *++fmt;
297 	*fmt = '\0';
298 	switch (convch) {
299 	case 'b': {
300 		size_t len;
301 		char *p;
302 		int getout;
303 
304 		p = strdup(getstr());
305 		if (p == NULL) {
306 			warnx("%s", strerror(ENOMEM));
307 			return (NULL);
308 		}
309 		getout = escape(p, 0, &len);
310 		*(fmt - 1) = 's';
311 		PF(start, p);
312 		*(fmt - 1) = 'b';
313 		free(p);
314 		if (getout)
315 			return (fmt);
316 		break;
317 	}
318 	case 'c': {
319 		char p;
320 
321 		p = getchr();
322 		PF(start, p);
323 		break;
324 	}
325 	case 's': {
326 		const char *p;
327 
328 		p = getstr();
329 		PF(start, p);
330 		break;
331 	}
332 	case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': {
333 		char *f;
334 		intmax_t val;
335 		uintmax_t uval;
336 		int signedconv;
337 
338 		signedconv = (convch == 'd' || convch == 'i');
339 		if ((f = mknum(start, convch)) == NULL)
340 			return (NULL);
341 		if (getnum(&val, &uval, signedconv))
342 			*rval = 1;
343 		if (signedconv)
344 			PF(f, val);
345 		else
346 			PF(f, uval);
347 		break;
348 	}
349 	case 'e': case 'E':
350 	case 'f': case 'F':
351 	case 'g': case 'G':
352 	case 'a': case 'A': {
353 		long double p;
354 
355 		if (getfloating(&p, mod_ldbl))
356 			*rval = 1;
357 		if (mod_ldbl)
358 			PF(start, p);
359 		else
360 			PF(start, (double)p);
361 		break;
362 	}
363 	default:
364 		warnx("illegal format character %c", convch);
365 		return (NULL);
366 	}
367 	*fmt = nextch;
368 	return (fmt);
369 }
370 
371 static char *
372 mknum(char *str, char ch)
373 {
374 	static char *copy;
375 	static size_t copy_size;
376 	char *newcopy;
377 	size_t len, newlen;
378 
379 	len = strlen(str) + 2;
380 	if (len > copy_size) {
381 		newlen = ((len + 1023) >> 10) << 10;
382 		if ((newcopy = realloc(copy, newlen)) == NULL)
383 		{
384 			warnx("%s", strerror(ENOMEM));
385 			return (NULL);
386 		}
387 		copy = newcopy;
388 		copy_size = newlen;
389 	}
390 
391 	memmove(copy, str, len - 3);
392 	copy[len - 3] = 'j';
393 	copy[len - 2] = ch;
394 	copy[len - 1] = '\0';
395 	return (copy);
396 }
397 
398 static int
399 escape(char *fmt, int percent, size_t *len)
400 {
401 	char *save, *store, c;
402 	int value;
403 
404 	for (save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store) {
405 		if (c != '\\') {
406 			*store = c;
407 			continue;
408 		}
409 		switch (*++fmt) {
410 		case '\0':		/* EOS, user error */
411 			*store = '\\';
412 			*++store = '\0';
413 			*len = store - save;
414 			return (0);
415 		case '\\':		/* backslash */
416 		case '\'':		/* single quote */
417 			*store = *fmt;
418 			break;
419 		case 'a':		/* bell/alert */
420 			*store = '\a';
421 			break;
422 		case 'b':		/* backspace */
423 			*store = '\b';
424 			break;
425 		case 'c':
426 			*store = '\0';
427 			*len = store - save;
428 			return (1);
429 		case 'f':		/* form-feed */
430 			*store = '\f';
431 			break;
432 		case 'n':		/* newline */
433 			*store = '\n';
434 			break;
435 		case 'r':		/* carriage-return */
436 			*store = '\r';
437 			break;
438 		case 't':		/* horizontal tab */
439 			*store = '\t';
440 			break;
441 		case 'v':		/* vertical tab */
442 			*store = '\v';
443 			break;
444 					/* octal constant */
445 		case '0': case '1': case '2': case '3':
446 		case '4': case '5': case '6': case '7':
447 			c = (!percent && *fmt == '0') ? 4 : 3;
448 			for (value = 0;
449 			    c-- && *fmt >= '0' && *fmt <= '7'; ++fmt) {
450 				value <<= 3;
451 				value += *fmt - '0';
452 			}
453 			--fmt;
454 			if (percent && value == '%') {
455 				*store++ = '%';
456 				*store = '%';
457 			} else
458 				*store = (char)value;
459 			break;
460 		default:
461 			*store = *fmt;
462 			break;
463 		}
464 	}
465 	*store = '\0';
466 	*len = store - save;
467 	return (0);
468 }
469 
470 static int
471 getchr(void)
472 {
473 	if (!*gargv)
474 		return ('\0');
475 	return ((int)**gargv++);
476 }
477 
478 static const char *
479 getstr(void)
480 {
481 	if (!*gargv)
482 		return ("");
483 	return (*gargv++);
484 }
485 
486 static int
487 getint(int *ip)
488 {
489 	intmax_t val;
490 	uintmax_t uval;
491 	int rval;
492 
493 	if (getnum(&val, &uval, 1))
494 		return (1);
495 	rval = 0;
496 	if (val < INT_MIN || val > INT_MAX) {
497 		warnx("%s: %s", *gargv, strerror(ERANGE));
498 		rval = 1;
499 	}
500 	*ip = (int)val;
501 	return (rval);
502 }
503 
504 static int
505 getnum(intmax_t *ip, uintmax_t *uip, int signedconv)
506 {
507 	char *ep;
508 	int rval;
509 
510 	if (!*gargv) {
511 		*ip = *uip = 0;
512 		return (0);
513 	}
514 	if (**gargv == '"' || **gargv == '\'') {
515 		if (signedconv)
516 			*ip = asciicode();
517 		else
518 			*uip = asciicode();
519 		return (0);
520 	}
521 	rval = 0;
522 	errno = 0;
523 	if (signedconv)
524 		*ip = strtoimax(*gargv, &ep, 0);
525 	else
526 		*uip = strtoumax(*gargv, &ep, 0);
527 	if (ep == *gargv) {
528 		warnx("%s: expected numeric value", *gargv);
529 		rval = 1;
530 	}
531 	else if (*ep != '\0') {
532 		warnx("%s: not completely converted", *gargv);
533 		rval = 1;
534 	}
535 	if (errno == ERANGE) {
536 		warnx("%s: %s", *gargv, strerror(ERANGE));
537 		rval = 1;
538 	}
539 	++gargv;
540 	return (rval);
541 }
542 
543 static int
544 getfloating(long double *dp, int mod_ldbl)
545 {
546 	char *ep;
547 	int rval;
548 
549 	if (!*gargv) {
550 		*dp = 0.0;
551 		return (0);
552 	}
553 	if (**gargv == '"' || **gargv == '\'') {
554 		*dp = asciicode();
555 		return (0);
556 	}
557 	rval = 0;
558 	errno = 0;
559 	if (mod_ldbl)
560 		*dp = strtold(*gargv, &ep);
561 	else
562 		*dp = strtod(*gargv, &ep);
563 	if (ep == *gargv) {
564 		warnx("%s: expected numeric value", *gargv);
565 		rval = 1;
566 	} else if (*ep != '\0') {
567 		warnx("%s: not completely converted", *gargv);
568 		rval = 1;
569 	}
570 	if (errno == ERANGE) {
571 		warnx("%s: %s", *gargv, strerror(ERANGE));
572 		rval = 1;
573 	}
574 	++gargv;
575 	return (rval);
576 }
577 
578 static int
579 asciicode(void)
580 {
581 	int ch;
582 	wchar_t wch;
583 	mbstate_t mbs;
584 
585 	ch = (unsigned char)**gargv;
586 	if (ch == '\'' || ch == '"') {
587 		memset(&mbs, 0, sizeof(mbs));
588 		switch (mbrtowc(&wch, *gargv + 1, MB_LEN_MAX, &mbs)) {
589 		case (size_t)-2:
590 		case (size_t)-1:
591 			wch = (unsigned char)gargv[0][1];
592 			break;
593 		case 0:
594 			wch = 0;
595 			break;
596 		}
597 		ch = wch;
598 	}
599 	++gargv;
600 	return (ch);
601 }
602 
603 static void
604 usage(void)
605 {
606 	(void)fprintf(stderr, "usage: printf format [arguments ...]\n");
607 }
608