xref: /illumos-gate/usr/src/cmd/printf/printf.c (revision 3bb8546d907194e91bea367d863eb10ce3fe6621)
1 /*
2  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
3  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 4. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/types.h>
33 
34 #include <err.h>
35 #include <errno.h>
36 #include <inttypes.h>
37 #include <limits.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <unistd.h>
42 #include <alloca.h>
43 #include <ctype.h>
44 #include <locale.h>
45 #include <note.h>
46 
47 #define	warnx1(a, b, c)		warnx(a)
48 #define	warnx2(a, b, c)		warnx(a, b)
49 #define	warnx3(a, b, c)		warnx(a, b, c)
50 
51 #define	PTRDIFF(x, y)	((uintptr_t)(x) - (uintptr_t)(y))
52 
53 #define	_(x)	gettext(x)
54 
55 #define	PF(f, func) do {						\
56 	char *b = NULL;							\
57 	if (havewidth)							\
58 		if (haveprec)						\
59 			(void) asprintf(&b, f, fieldwidth, precision, func); \
60 		else							\
61 			(void) asprintf(&b, f, fieldwidth, func);	\
62 	else if (haveprec)						\
63 		(void) asprintf(&b, f, precision, func);		\
64 	else								\
65 		(void) asprintf(&b, f, func);				\
66 	if (b) {							\
67 		(void) fputs(b, stdout);				\
68 		free(b);						\
69 	}								\
70 _NOTE(CONSTCOND) } while (0)
71 
72 static int	 asciicode(void);
73 static char	*doformat(char *, int *);
74 static int	 escape(char *, int, size_t *);
75 static int	 getchr(void);
76 static int	 getfloating(long double *, int);
77 static int	 getint(int *);
78 static int	 getnum(intmax_t *, uintmax_t *, int);
79 static const char
80 		*getstr(void);
81 static char	*mknum(char *, char);
82 static void	 usage(void);
83 
84 static const char digits[] = "0123456789";
85 
86 static int  myargc;
87 static char **myargv;
88 static char **gargv;
89 static char **maxargv;
90 
91 int
92 main(int argc, char *argv[])
93 {
94 	size_t len;
95 	int end, rval;
96 	char *format, *fmt, *start;
97 
98 	(void) setlocale(LC_ALL, "");
99 
100 	argv++;
101 	argc--;
102 
103 	/*
104 	 * POSIX says: Standard utilities that do not accept options,
105 	 * but that do accept operands, shall recognize "--" as a
106 	 * first argument to be discarded.
107 	 */
108 	if (argc && strcmp(argv[0], "--") == 0) {
109 		argc--;
110 		argv++;
111 	}
112 
113 	if (argc < 1) {
114 		usage();
115 		return (1);
116 	}
117 
118 	/*
119 	 * Basic algorithm is to scan the format string for conversion
120 	 * specifications -- once one is found, find out if the field
121 	 * width or precision is a '*'; if it is, gather up value.  Note,
122 	 * format strings are reused as necessary to use up the provided
123 	 * arguments, arguments of zero/null string are provided to use
124 	 * up the format string.
125 	 */
126 	fmt = format = *argv;
127 	(void) escape(fmt, 1, &len);	/* backslash interpretation */
128 	rval = end = 0;
129 	gargv = ++argv;
130 
131 	for (;;) {
132 		maxargv = gargv;
133 
134 		myargv = gargv;
135 		for (myargc = 0; gargv[myargc]; myargc++)
136 			/* nop */;
137 		start = fmt;
138 		while (fmt < format + len) {
139 			if (fmt[0] == '%') {
140 				(void) fwrite(start, 1, PTRDIFF(fmt, start),
141 				    stdout);
142 				if (fmt[1] == '%') {
143 					/* %% prints a % */
144 					(void) putchar('%');
145 					fmt += 2;
146 				} else {
147 					fmt = doformat(fmt, &rval);
148 					if (fmt == NULL)
149 						return (1);
150 					end = 0;
151 				}
152 				start = fmt;
153 			} else
154 				fmt++;
155 			if (gargv > maxargv)
156 				maxargv = gargv;
157 		}
158 		gargv = maxargv;
159 
160 		if (end == 1) {
161 			warnx1(_("missing format character"), NULL, NULL);
162 			return (1);
163 		}
164 		(void) fwrite(start, 1, PTRDIFF(fmt, start), stdout);
165 		if (!*gargv)
166 			return (rval);
167 		/* Restart at the beginning of the format string. */
168 		fmt = format;
169 		end = 1;
170 	}
171 	/* NOTREACHED */
172 }
173 
174 
175 static char *
176 doformat(char *fmt, int *rval)
177 {
178 	static const char skip1[] = "#'-+ 0";
179 	int fieldwidth, haveprec, havewidth, mod_ldbl, precision;
180 	char convch, nextch;
181 	char *start;
182 	char **fargv;
183 	char *dptr;
184 	int l;
185 
186 	start = alloca(strlen(fmt) + 1);
187 
188 	dptr = start;
189 	*dptr++ = '%';
190 	*dptr = 0;
191 
192 	fmt++;
193 
194 	/* look for "n$" field index specifier */
195 	l = strspn(fmt, digits);
196 	if ((l > 0) && (fmt[l] == '$')) {
197 		int idx = atoi(fmt);
198 		if (idx <= myargc) {
199 			gargv = &myargv[idx - 1];
200 		} else {
201 			gargv = &myargv[myargc];
202 		}
203 		if (gargv > maxargv) {
204 			maxargv = gargv;
205 		}
206 		fmt += l + 1;
207 
208 		/* save format argument */
209 		fargv = gargv;
210 	} else {
211 		fargv = NULL;
212 	}
213 
214 	/* skip to field width */
215 	while (*fmt && strchr(skip1, *fmt) != NULL) {
216 		*dptr++ = *fmt++;
217 		*dptr = 0;
218 	}
219 
220 
221 	if (*fmt == '*') {
222 
223 		fmt++;
224 		l = strspn(fmt, digits);
225 		if ((l > 0) && (fmt[l] == '$')) {
226 			int idx = atoi(fmt);
227 			if (fargv == NULL) {
228 				warnx1(_("incomplete use of n$"), NULL, NULL);
229 				return (NULL);
230 			}
231 			if (idx <= myargc) {
232 				gargv = &myargv[idx - 1];
233 			} else {
234 				gargv = &myargv[myargc];
235 			}
236 			fmt += l + 1;
237 		} else if (fargv != NULL) {
238 			warnx1(_("incomplete use of n$"), NULL, NULL);
239 			return (NULL);
240 		}
241 
242 		if (getint(&fieldwidth))
243 			return (NULL);
244 		if (gargv > maxargv) {
245 			maxargv = gargv;
246 		}
247 		havewidth = 1;
248 
249 		*dptr++ = '*';
250 		*dptr = 0;
251 	} else {
252 		havewidth = 0;
253 
254 		/* skip to possible '.', get following precision */
255 		while (isdigit(*fmt)) {
256 			*dptr++ = *fmt++;
257 			*dptr = 0;
258 		}
259 	}
260 
261 	if (*fmt == '.') {
262 		/* precision present? */
263 		fmt++;
264 		*dptr++ = '.';
265 
266 		if (*fmt == '*') {
267 
268 			fmt++;
269 			l = strspn(fmt, digits);
270 			if ((l > 0) && (fmt[l] == '$')) {
271 				int idx = atoi(fmt);
272 				if (fargv == NULL) {
273 					warnx1(_("incomplete use of n$"),
274 					    NULL, NULL);
275 					return (NULL);
276 				}
277 				if (idx <= myargc) {
278 					gargv = &myargv[idx - 1];
279 				} else {
280 					gargv = &myargv[myargc];
281 				}
282 				fmt += l + 1;
283 			} else if (fargv != NULL) {
284 				warnx1(_("incomplete use of n$"), NULL, NULL);
285 				return (NULL);
286 			}
287 
288 			if (getint(&precision))
289 				return (NULL);
290 			if (gargv > maxargv) {
291 				maxargv = gargv;
292 			}
293 			haveprec = 1;
294 			*dptr++ = '*';
295 			*dptr = 0;
296 		} else {
297 			haveprec = 0;
298 
299 			/* skip to conversion char */
300 			while (isdigit(*fmt)) {
301 				*dptr++ = *fmt++;
302 				*dptr = 0;
303 			}
304 		}
305 	} else
306 		haveprec = 0;
307 	if (!*fmt) {
308 		warnx1(_("missing format character"), NULL, NULL);
309 		return (NULL);
310 	}
311 	*dptr++ = *fmt;
312 	*dptr = 0;
313 
314 	/*
315 	 * Look for a length modifier.  POSIX doesn't have these, so
316 	 * we only support them for floating-point conversions, which
317 	 * are extensions.  This is useful because the L modifier can
318 	 * be used to gain extra range and precision, while omitting
319 	 * it is more likely to produce consistent results on different
320 	 * architectures.  This is not so important for integers
321 	 * because overflow is the only bad thing that can happen to
322 	 * them, but consider the command  printf %a 1.1
323 	 */
324 	if (*fmt == 'L') {
325 		mod_ldbl = 1;
326 		fmt++;
327 		if (!strchr("aAeEfFgG", *fmt)) {
328 			warnx2(_("bad modifier L for %%%c"), *fmt, NULL);
329 			return (NULL);
330 		}
331 	} else {
332 		mod_ldbl = 0;
333 	}
334 
335 	/* save the current arg offset, and set to the format arg */
336 	if (fargv != NULL) {
337 		gargv = fargv;
338 	}
339 
340 	convch = *fmt;
341 	nextch = *++fmt;
342 
343 	*fmt = '\0';
344 	switch (convch) {
345 	case 'b': {
346 		size_t len;
347 		char *p;
348 		int getout;
349 
350 		p = strdup(getstr());
351 		if (p == NULL) {
352 			warnx2("%s", strerror(ENOMEM), NULL);
353 			return (NULL);
354 		}
355 		getout = escape(p, 0, &len);
356 		(void) fputs(p, stdout);
357 		free(p);
358 
359 		if (getout)
360 			exit(*rval);
361 		break;
362 	}
363 	case 'c': {
364 		char p;
365 
366 		p = getchr();
367 		PF(start, p);
368 		break;
369 	}
370 	case 's': {
371 		const char *p;
372 
373 		p = getstr();
374 		PF(start, p);
375 		break;
376 	}
377 	case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': {
378 		char *f;
379 		intmax_t val;
380 		uintmax_t uval;
381 		int signedconv;
382 
383 		signedconv = (convch == 'd' || convch == 'i');
384 		if ((f = mknum(start, convch)) == NULL)
385 			return (NULL);
386 		if (getnum(&val, &uval, signedconv))
387 			*rval = 1;
388 		if (signedconv)
389 			PF(f, val);
390 		else
391 			PF(f, uval);
392 		break;
393 	}
394 	case 'e': case 'E':
395 	case 'f': case 'F':
396 	case 'g': case 'G':
397 	case 'a': case 'A': {
398 		long double p;
399 
400 		if (getfloating(&p, mod_ldbl))
401 			*rval = 1;
402 		if (mod_ldbl)
403 			PF(start, p);
404 		else
405 			PF(start, (double)p);
406 		break;
407 	}
408 	default:
409 		warnx2(_("illegal format character %c"), convch, NULL);
410 		return (NULL);
411 	}
412 	*fmt = nextch;
413 
414 	/* return the gargv to the next element */
415 	return (fmt);
416 }
417 
418 static char *
419 mknum(char *str, char ch)
420 {
421 	static char *copy;
422 	static size_t copy_size;
423 	char *newcopy;
424 	size_t len, newlen;
425 
426 	len = strlen(str) + 2;
427 	if (len > copy_size) {
428 		newlen = ((len + 1023) >> 10) << 10;
429 		if ((newcopy = realloc(copy, newlen)) == NULL) {
430 			warnx2("%s", strerror(ENOMEM), NULL);
431 			return (NULL);
432 		}
433 		copy = newcopy;
434 		copy_size = newlen;
435 	}
436 
437 	(void) memmove(copy, str, len - 3);
438 	copy[len - 3] = 'j';
439 	copy[len - 2] = ch;
440 	copy[len - 1] = '\0';
441 	return (copy);
442 }
443 
444 static int
445 escape(char *fmt, int percent, size_t *len)
446 {
447 	char *save, *store, c;
448 	int value;
449 
450 	for (save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store) {
451 		if (c != '\\') {
452 			*store = c;
453 			continue;
454 		}
455 		switch (*++fmt) {
456 		case '\0':		/* EOS, user error */
457 			*store = '\\';
458 			*++store = '\0';
459 			*len = PTRDIFF(store, save);
460 			return (0);
461 		case '\\':		/* backslash */
462 		case '\'':		/* single quote */
463 			*store = *fmt;
464 			break;
465 		case 'a':		/* bell/alert */
466 			*store = '\a';
467 			break;
468 		case 'b':		/* backspace */
469 			*store = '\b';
470 			break;
471 		case 'c':
472 			if (!percent) {
473 				*store = '\0';
474 				*len = PTRDIFF(store, save);
475 				return (1);
476 			}
477 			*store = 'c';
478 			break;
479 		case 'f':		/* form-feed */
480 			*store = '\f';
481 			break;
482 		case 'n':		/* newline */
483 			*store = '\n';
484 			break;
485 		case 'r':		/* carriage-return */
486 			*store = '\r';
487 			break;
488 		case 't':		/* horizontal tab */
489 			*store = '\t';
490 			break;
491 		case 'v':		/* vertical tab */
492 			*store = '\v';
493 			break;
494 					/* octal constant */
495 		case '0': case '1': case '2': case '3':
496 		case '4': case '5': case '6': case '7':
497 			c = (!percent && *fmt == '0') ? 4 : 3;
498 			for (value = 0;
499 			    c-- && *fmt >= '0' && *fmt <= '7'; ++fmt) {
500 				value <<= 3;
501 				value += *fmt - '0';
502 			}
503 			--fmt;
504 			if (percent && value == '%') {
505 				*store++ = '%';
506 				*store = '%';
507 			} else
508 				*store = (char)value;
509 			break;
510 		default:
511 			*store = *fmt;
512 			break;
513 		}
514 	}
515 	*store = '\0';
516 	*len = PTRDIFF(store, save);
517 	return (0);
518 }
519 
520 static int
521 getchr(void)
522 {
523 	if (!*gargv)
524 		return ('\0');
525 	return ((int)**gargv++);
526 }
527 
528 static const char *
529 getstr(void)
530 {
531 	if (!*gargv)
532 		return ("");
533 	return (*gargv++);
534 }
535 
536 static int
537 getint(int *ip)
538 {
539 	intmax_t val;
540 	uintmax_t uval;
541 	int rval;
542 
543 	if (getnum(&val, &uval, 1))
544 		return (1);
545 	rval = 0;
546 	if (val < INT_MIN || val > INT_MAX) {
547 		warnx3("%s: %s", *gargv, strerror(ERANGE));
548 		rval = 1;
549 	}
550 	*ip = (int)val;
551 	return (rval);
552 }
553 
554 static int
555 getnum(intmax_t *ip, uintmax_t *uip, int signedconv)
556 {
557 	char *ep;
558 	int rval;
559 
560 	if (!*gargv) {
561 		*ip = *uip = 0;
562 		return (0);
563 	}
564 	if (**gargv == '"' || **gargv == '\'') {
565 		if (signedconv)
566 			*ip = asciicode();
567 		else
568 			*uip = asciicode();
569 		return (0);
570 	}
571 	rval = 0;
572 	errno = 0;
573 	if (signedconv)
574 		*ip = strtoimax(*gargv, &ep, 0);
575 	else
576 		*uip = strtoumax(*gargv, &ep, 0);
577 	if (ep == *gargv) {
578 		warnx2(_("%s: expected numeric value"), *gargv, NULL);
579 		rval = 1;
580 	} else if (*ep != '\0') {
581 		warnx2(_("%s: not completely converted"), *gargv, NULL);
582 		rval = 1;
583 	}
584 	if (errno == ERANGE) {
585 		warnx3("%s: %s", *gargv, strerror(ERANGE));
586 		rval = 1;
587 	}
588 	++gargv;
589 	return (rval);
590 }
591 
592 static int
593 getfloating(long double *dp, int mod_ldbl)
594 {
595 	char *ep;
596 	int rval;
597 
598 	if (!*gargv) {
599 		*dp = 0.0;
600 		return (0);
601 	}
602 	if (**gargv == '"' || **gargv == '\'') {
603 		*dp = asciicode();
604 		return (0);
605 	}
606 	rval = 0;
607 	errno = 0;
608 	if (mod_ldbl)
609 		*dp = strtold(*gargv, &ep);
610 	else
611 		*dp = strtod(*gargv, &ep);
612 	if (ep == *gargv) {
613 		warnx2(_("%s: expected numeric value"), *gargv, NULL);
614 		rval = 1;
615 	} else if (*ep != '\0') {
616 		warnx2(_("%s: not completely converted"), *gargv, NULL);
617 		rval = 1;
618 	}
619 	if (errno == ERANGE) {
620 		warnx3("%s: %s", *gargv, strerror(ERANGE));
621 		rval = 1;
622 	}
623 	++gargv;
624 	return (rval);
625 }
626 
627 static int
628 asciicode(void)
629 {
630 	int ch;
631 
632 	ch = **gargv;
633 	if (ch == '\'' || ch == '"')
634 		ch = (*gargv)[1];
635 	++gargv;
636 	return (ch);
637 }
638 
639 static void
640 usage(void)
641 {
642 	(void) fprintf(stderr, _("usage: printf format [arguments ...]\n"));
643 }
644