1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright 2018 Staysail Systems, Inc. <info@staysail.tech>
5 * Copyright 2014 Garrett D'Amore <garrett@damore.org>
6 * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
7 * Copyright (c) 1989, 1993
8 * The Regents of the University of California. All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34 /*
35 * Important: This file is used both as a standalone program /usr/bin/printf
36 * and as a builtin for /bin/sh (#define SHELL).
37 */
38
39 #include <sys/types.h>
40
41 #include <ctype.h>
42 #include <err.h>
43 #include <errno.h>
44 #include <inttypes.h>
45 #include <limits.h>
46 #include <locale.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <wchar.h>
52
53 #ifdef SHELL
54 #define main printfcmd
55 #include "bltin/bltin.h"
56 #include "options.h"
57 #endif
58
59 #define PF(f, func) do { \
60 if (havewidth) \
61 if (haveprec) \
62 (void)printf(f, fieldwidth, precision, func); \
63 else \
64 (void)printf(f, fieldwidth, func); \
65 else if (haveprec) \
66 (void)printf(f, precision, func); \
67 else \
68 (void)printf(f, func); \
69 } while (0)
70
71 static int asciicode(void);
72 static char *printf_doformat(char *, int *);
73 static int escape(char *, int, size_t *);
74 static int getchr(void);
75 static int getfloating(long double *, int);
76 static int getint(int *);
77 static int getnum(intmax_t *, uintmax_t *, int);
78 static const char
79 *getstr(void);
80 static char *mknum(char *, char);
81 static void usage(void);
82
83 static const char digits[] = "0123456789";
84
85 static char end_fmt[1];
86
87 static int myargc;
88 static char **myargv;
89 static char **gargv;
90 static char **maxargv;
91
92 int
main(int argc,char * argv[])93 main(int argc, char *argv[])
94 {
95 size_t len;
96 int end, rval;
97 char *format, *fmt, *start;
98 #ifndef SHELL
99 int ch;
100
101 (void) setlocale(LC_ALL, "");
102 #endif
103
104 #ifdef SHELL
105 nextopt("");
106 argc -= argptr - argv;
107 argv = argptr;
108 #else
109 while ((ch = getopt(argc, argv, "")) != -1)
110 switch (ch) {
111 case '?':
112 default:
113 usage();
114 return (1);
115 }
116 argc -= optind;
117 argv += optind;
118 #endif
119
120 if (argc < 1) {
121 usage();
122 return (1);
123 }
124
125 #ifdef SHELL
126 INTOFF;
127 #endif
128 /*
129 * Basic algorithm is to scan the format string for conversion
130 * specifications -- once one is found, find out if the field
131 * width or precision is a '*'; if it is, gather up value. Note,
132 * format strings are reused as necessary to use up the provided
133 * arguments, arguments of zero/null string are provided to use
134 * up the format string.
135 */
136 fmt = format = *argv;
137 escape(fmt, 1, &len); /* backslash interpretation */
138 rval = end = 0;
139 gargv = ++argv;
140
141 for (;;) {
142 maxargv = gargv;
143
144 myargv = gargv;
145 for (myargc = 0; gargv[myargc]; myargc++)
146 /* nop */;
147 start = fmt;
148 while (fmt < format + len) {
149 if (fmt[0] == '%') {
150 fwrite(start, 1, fmt - start, stdout);
151 if (fmt[1] == '%') {
152 /* %% prints a % */
153 putchar('%');
154 fmt += 2;
155 } else {
156 fmt = printf_doformat(fmt, &rval);
157 if (fmt == NULL || fmt == end_fmt) {
158 #ifdef SHELL
159 INTON;
160 #endif
161 return (fmt == NULL ? 1 : rval);
162 }
163 end = 0;
164 }
165 start = fmt;
166 } else
167 fmt++;
168 if (gargv > maxargv)
169 maxargv = gargv;
170 }
171 gargv = maxargv;
172
173 if (end == 1) {
174 warnx("missing format character");
175 #ifdef SHELL
176 INTON;
177 #endif
178 return (1);
179 }
180 fwrite(start, 1, fmt - start, stdout);
181 if (!*gargv) {
182 #ifdef SHELL
183 INTON;
184 #endif
185 return (rval);
186 }
187 /* Restart at the beginning of the format string. */
188 fmt = format;
189 end = 1;
190 }
191 /* NOTREACHED */
192 }
193
194
195 static char *
printf_doformat(char * fmt,int * rval)196 printf_doformat(char *fmt, int *rval)
197 {
198 static const char skip1[] = "#'-+ 0";
199 int fieldwidth, haveprec, havewidth, mod_ldbl, precision;
200 char convch, nextch;
201 char start[strlen(fmt) + 1];
202 char **fargv;
203 char *dptr;
204 int l;
205
206 dptr = start;
207 *dptr++ = '%';
208 *dptr = 0;
209
210 fmt++;
211
212 /* look for "n$" field index specifier */
213 l = strspn(fmt, digits);
214 if ((l > 0) && (fmt[l] == '$')) {
215 int idx = atoi(fmt);
216 if (idx <= myargc) {
217 gargv = &myargv[idx - 1];
218 } else {
219 gargv = &myargv[myargc];
220 }
221 if (gargv > maxargv)
222 maxargv = gargv;
223 fmt += l + 1;
224
225 /* save format argument */
226 fargv = gargv;
227 } else {
228 fargv = NULL;
229 }
230
231 /* skip to field width */
232 while (*fmt && strchr(skip1, *fmt) != NULL) {
233 *dptr++ = *fmt++;
234 *dptr = 0;
235 }
236
237 if (*fmt == '*') {
238
239 fmt++;
240 l = strspn(fmt, digits);
241 if ((l > 0) && (fmt[l] == '$')) {
242 int idx = atoi(fmt);
243 if (fargv == NULL) {
244 warnx("incomplete use of n$");
245 return (NULL);
246 }
247 if (idx <= myargc) {
248 gargv = &myargv[idx - 1];
249 } else {
250 gargv = &myargv[myargc];
251 }
252 fmt += l + 1;
253 } else if (fargv != NULL) {
254 warnx("incomplete use of n$");
255 return (NULL);
256 }
257
258 if (getint(&fieldwidth))
259 return (NULL);
260 if (gargv > maxargv)
261 maxargv = gargv;
262 havewidth = 1;
263
264 *dptr++ = '*';
265 *dptr = 0;
266 } else {
267 havewidth = 0;
268
269 /* skip to possible '.', get following precision */
270 while (isdigit(*fmt)) {
271 *dptr++ = *fmt++;
272 *dptr = 0;
273 }
274 }
275
276 if (*fmt == '.') {
277 /* precision present? */
278 fmt++;
279 *dptr++ = '.';
280
281 if (*fmt == '*') {
282
283 fmt++;
284 l = strspn(fmt, digits);
285 if ((l > 0) && (fmt[l] == '$')) {
286 int idx = atoi(fmt);
287 if (fargv == NULL) {
288 warnx("incomplete use of n$");
289 return (NULL);
290 }
291 if (idx <= myargc) {
292 gargv = &myargv[idx - 1];
293 } else {
294 gargv = &myargv[myargc];
295 }
296 fmt += l + 1;
297 } else if (fargv != NULL) {
298 warnx("incomplete use of n$");
299 return (NULL);
300 }
301
302 if (getint(&precision))
303 return (NULL);
304 if (gargv > maxargv)
305 maxargv = gargv;
306 haveprec = 1;
307 *dptr++ = '*';
308 *dptr = 0;
309 } else {
310 haveprec = 0;
311
312 /* skip to conversion char */
313 while (isdigit(*fmt)) {
314 *dptr++ = *fmt++;
315 *dptr = 0;
316 }
317 }
318 } else
319 haveprec = 0;
320 if (!*fmt) {
321 warnx("missing format character");
322 return (NULL);
323 }
324 *dptr++ = *fmt;
325 *dptr = 0;
326
327 /*
328 * Look for a length modifier. POSIX doesn't have these, so
329 * we only support them for floating-point conversions, which
330 * are extensions. This is useful because the L modifier can
331 * be used to gain extra range and precision, while omitting
332 * it is more likely to produce consistent results on different
333 * architectures. This is not so important for integers
334 * because overflow is the only bad thing that can happen to
335 * them, but consider the command printf %a 1.1
336 */
337 if (*fmt == 'L') {
338 mod_ldbl = 1;
339 fmt++;
340 if (!strchr("aAeEfFgG", *fmt)) {
341 warnx("bad modifier L for %%%c", *fmt);
342 return (NULL);
343 }
344 } else {
345 mod_ldbl = 0;
346 }
347
348 /* save the current arg offset, and set to the format arg */
349 if (fargv != NULL) {
350 gargv = fargv;
351 }
352
353 convch = *fmt;
354 nextch = *++fmt;
355
356 *fmt = '\0';
357 switch (convch) {
358 case 'b': {
359 size_t len;
360 char *p;
361 int getout;
362
363 /* Convert "b" to "s" for output. */
364 start[strlen(start) - 1] = 's';
365 if ((p = strdup(getstr())) == NULL) {
366 warnx("%s", strerror(ENOMEM));
367 return (NULL);
368 }
369 getout = escape(p, 0, &len);
370 PF(start, p);
371 /* Restore format for next loop. */
372
373 free(p);
374 if (getout)
375 return (end_fmt);
376 break;
377 }
378 case 'c': {
379 char p;
380
381 p = getchr();
382 if (p != '\0')
383 PF(start, p);
384 break;
385 }
386 case 's': {
387 const char *p;
388
389 p = getstr();
390 PF(start, p);
391 break;
392 }
393 case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': {
394 char *f;
395 intmax_t val;
396 uintmax_t uval;
397 int signedconv;
398
399 signedconv = (convch == 'd' || convch == 'i');
400 if ((f = mknum(start, convch)) == NULL)
401 return (NULL);
402 if (getnum(&val, &uval, signedconv))
403 *rval = 1;
404 if (signedconv)
405 PF(f, val);
406 else
407 PF(f, uval);
408 break;
409 }
410 case 'e': case 'E':
411 case 'f': case 'F':
412 case 'g': case 'G':
413 case 'a': case 'A': {
414 long double p;
415
416 if (getfloating(&p, mod_ldbl))
417 *rval = 1;
418 if (mod_ldbl)
419 PF(start, p);
420 else
421 PF(start, (double)p);
422 break;
423 }
424 default:
425 warnx("illegal format character %c", convch);
426 return (NULL);
427 }
428 *fmt = nextch;
429 /* return the gargv to the next element */
430 return (fmt);
431 }
432
433 static char *
mknum(char * str,char ch)434 mknum(char *str, char ch)
435 {
436 static char *copy;
437 static size_t copy_size;
438 char *newcopy;
439 size_t len, newlen;
440
441 len = strlen(str) + 2;
442 if (len > copy_size) {
443 newlen = ((len + 1023) >> 10) << 10;
444 if ((newcopy = realloc(copy, newlen)) == NULL) {
445 warnx("%s", strerror(ENOMEM));
446 return (NULL);
447 }
448 copy = newcopy;
449 copy_size = newlen;
450 }
451
452 memmove(copy, str, len - 3);
453 copy[len - 3] = 'j';
454 copy[len - 2] = ch;
455 copy[len - 1] = '\0';
456 return (copy);
457 }
458
459 static int
escape(char * fmt,int percent,size_t * len)460 escape(char *fmt, int percent, size_t *len)
461 {
462 char *save, *store, c;
463 int value;
464
465 for (save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store) {
466 if (c != '\\') {
467 *store = c;
468 continue;
469 }
470 switch (*++fmt) {
471 case '\0': /* EOS, user error */
472 *store = '\\';
473 *++store = '\0';
474 *len = store - save;
475 return (0);
476 case '\\': /* backslash */
477 case '\'': /* single quote */
478 *store = *fmt;
479 break;
480 case 'a': /* bell/alert */
481 *store = '\a';
482 break;
483 case 'b': /* backspace */
484 *store = '\b';
485 break;
486 case 'c':
487 if (!percent) {
488 *store = '\0';
489 *len = store - save;
490 return (1);
491 }
492 *store = 'c';
493 break;
494 case 'f': /* form-feed */
495 *store = '\f';
496 break;
497 case 'n': /* newline */
498 *store = '\n';
499 break;
500 case 'r': /* carriage-return */
501 *store = '\r';
502 break;
503 case 't': /* horizontal tab */
504 *store = '\t';
505 break;
506 case 'v': /* vertical tab */
507 *store = '\v';
508 break;
509 /* octal constant */
510 case '0': case '1': case '2': case '3':
511 case '4': case '5': case '6': case '7':
512 c = (!percent && *fmt == '0') ? 4 : 3;
513 for (value = 0;
514 c-- && *fmt >= '0' && *fmt <= '7'; ++fmt) {
515 value <<= 3;
516 value += *fmt - '0';
517 }
518 --fmt;
519 if (percent && value == '%') {
520 *store++ = '%';
521 *store = '%';
522 } else
523 *store = (char)value;
524 break;
525 default:
526 *store = *fmt;
527 break;
528 }
529 }
530 *store = '\0';
531 *len = store - save;
532 return (0);
533 }
534
535 static int
getchr(void)536 getchr(void)
537 {
538 if (!*gargv)
539 return ('\0');
540 return ((int)**gargv++);
541 }
542
543 static const char *
getstr(void)544 getstr(void)
545 {
546 if (!*gargv)
547 return ("");
548 return (*gargv++);
549 }
550
551 static int
getint(int * ip)552 getint(int *ip)
553 {
554 intmax_t val;
555 uintmax_t uval;
556 int rval;
557
558 if (getnum(&val, &uval, 1))
559 return (1);
560 rval = 0;
561 if (val < INT_MIN || val > INT_MAX) {
562 warnx("%s: %s", *gargv, strerror(ERANGE));
563 rval = 1;
564 }
565 *ip = (int)val;
566 return (rval);
567 }
568
569 static int
getnum(intmax_t * ip,uintmax_t * uip,int signedconv)570 getnum(intmax_t *ip, uintmax_t *uip, int signedconv)
571 {
572 char *ep;
573 int rval;
574
575 if (!*gargv) {
576 *ip = *uip = 0;
577 return (0);
578 }
579 if (**gargv == '"' || **gargv == '\'') {
580 if (signedconv)
581 *ip = asciicode();
582 else
583 *uip = asciicode();
584 return (0);
585 }
586 rval = 0;
587 errno = 0;
588 if (signedconv)
589 *ip = strtoimax(*gargv, &ep, 0);
590 else
591 *uip = strtoumax(*gargv, &ep, 0);
592 if (ep == *gargv) {
593 warnx("%s: expected numeric value", *gargv);
594 rval = 1;
595 }
596 else if (*ep != '\0') {
597 warnx("%s: not completely converted", *gargv);
598 rval = 1;
599 }
600 if (errno == ERANGE) {
601 warnx("%s: %s", *gargv, strerror(ERANGE));
602 rval = 1;
603 }
604 ++gargv;
605 return (rval);
606 }
607
608 static int
getfloating(long double * dp,int mod_ldbl)609 getfloating(long double *dp, int mod_ldbl)
610 {
611 char *ep;
612 int rval;
613
614 if (!*gargv) {
615 *dp = 0.0;
616 return (0);
617 }
618 if (**gargv == '"' || **gargv == '\'') {
619 *dp = asciicode();
620 return (0);
621 }
622 rval = 0;
623 errno = 0;
624 if (mod_ldbl)
625 *dp = strtold(*gargv, &ep);
626 else
627 *dp = strtod(*gargv, &ep);
628 if (ep == *gargv) {
629 warnx("%s: expected numeric value", *gargv);
630 rval = 1;
631 } else if (*ep != '\0') {
632 warnx("%s: not completely converted", *gargv);
633 rval = 1;
634 }
635 if (errno == ERANGE) {
636 warnx("%s: %s", *gargv, strerror(ERANGE));
637 rval = 1;
638 }
639 ++gargv;
640 return (rval);
641 }
642
643 static int
asciicode(void)644 asciicode(void)
645 {
646 int ch;
647 wchar_t wch;
648 mbstate_t mbs;
649
650 ch = (unsigned char)**gargv;
651 if (ch == '\'' || ch == '"') {
652 memset(&mbs, 0, sizeof(mbs));
653 switch (mbrtowc(&wch, *gargv + 1, MB_LEN_MAX, &mbs)) {
654 case (size_t)-2:
655 case (size_t)-1:
656 wch = (unsigned char)gargv[0][1];
657 break;
658 case 0:
659 wch = 0;
660 break;
661 }
662 ch = wch;
663 }
664 ++gargv;
665 return (ch);
666 }
667
668 static void
usage(void)669 usage(void)
670 {
671 (void)fprintf(stderr, "usage: printf format [arguments ...]\n");
672 }
673