1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright 2018 Staysail Systems, Inc. <info@staysail.tech> 5 * Copyright 2014 Garrett D'Amore <garrett@damore.org> 6 * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 7 * Copyright (c) 1989, 1993 8 * The Regents of the University of California. All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * Important: This file is used both as a standalone program /usr/bin/printf 36 * and as a builtin for /bin/sh (#define SHELL). 37 */ 38 39 #include <sys/types.h> 40 41 #include <ctype.h> 42 #include <err.h> 43 #include <errno.h> 44 #include <inttypes.h> 45 #include <limits.h> 46 #include <locale.h> 47 #include <stdio.h> 48 #include <stdlib.h> 49 #include <string.h> 50 #include <unistd.h> 51 #include <wchar.h> 52 53 #ifdef SHELL 54 #define main printfcmd 55 #include "bltin/bltin.h" 56 #include "options.h" 57 #endif 58 59 #define PF(f, func) do { \ 60 if (havewidth) \ 61 if (haveprec) \ 62 (void)printf(f, fieldwidth, precision, func); \ 63 else \ 64 (void)printf(f, fieldwidth, func); \ 65 else if (haveprec) \ 66 (void)printf(f, precision, func); \ 67 else \ 68 (void)printf(f, func); \ 69 } while (0) 70 71 static int asciicode(void); 72 static char *printf_doformat(char *, int *); 73 static int escape(char *, int, size_t *); 74 static int getchr(void); 75 static int getfloating(long double *, int); 76 static int getint(int *); 77 static int getnum(intmax_t *, uintmax_t *, int); 78 static const char 79 *getstr(void); 80 static char *mknum(char *, char); 81 static void usage(void); 82 83 static const char digits[] = "0123456789"; 84 85 static char end_fmt[1]; 86 87 static int myargc; 88 static char **myargv; 89 static char **gargv; 90 static char **maxargv; 91 92 int 93 main(int argc, char *argv[]) 94 { 95 size_t len; 96 int end, rval; 97 char *format, *fmt, *start; 98 #ifndef SHELL 99 int ch; 100 101 (void) setlocale(LC_ALL, ""); 102 #endif 103 104 #ifdef SHELL 105 nextopt(""); 106 argc -= argptr - argv; 107 argv = argptr; 108 #else 109 while ((ch = getopt(argc, argv, "")) != -1) 110 switch (ch) { 111 case '?': 112 default: 113 usage(); 114 return (1); 115 } 116 argc -= optind; 117 argv += optind; 118 #endif 119 120 if (argc < 1) { 121 usage(); 122 return (1); 123 } 124 125 #ifdef SHELL 126 INTOFF; 127 #endif 128 /* 129 * Basic algorithm is to scan the format string for conversion 130 * specifications -- once one is found, find out if the field 131 * width or precision is a '*'; if it is, gather up value. Note, 132 * format strings are reused as necessary to use up the provided 133 * arguments, arguments of zero/null string are provided to use 134 * up the format string. 135 */ 136 fmt = format = *argv; 137 escape(fmt, 1, &len); /* backslash interpretation */ 138 rval = end = 0; 139 gargv = ++argv; 140 141 for (;;) { 142 maxargv = gargv; 143 144 myargv = gargv; 145 for (myargc = 0; gargv[myargc]; myargc++) 146 /* nop */; 147 start = fmt; 148 while (fmt < format + len) { 149 if (fmt[0] == '%') { 150 fwrite(start, 1, fmt - start, stdout); 151 if (fmt[1] == '%') { 152 /* %% prints a % */ 153 putchar('%'); 154 fmt += 2; 155 } else { 156 fmt = printf_doformat(fmt, &rval); 157 if (fmt == NULL || fmt == end_fmt) { 158 #ifdef SHELL 159 INTON; 160 #endif 161 return (fmt == NULL ? 1 : rval); 162 } 163 end = 0; 164 } 165 start = fmt; 166 } else 167 fmt++; 168 if (gargv > maxargv) 169 maxargv = gargv; 170 } 171 gargv = maxargv; 172 173 if (end == 1) { 174 warnx("missing format character"); 175 #ifdef SHELL 176 INTON; 177 #endif 178 return (1); 179 } 180 fwrite(start, 1, fmt - start, stdout); 181 if (!*gargv) { 182 #ifdef SHELL 183 INTON; 184 #endif 185 return (rval); 186 } 187 /* Restart at the beginning of the format string. */ 188 fmt = format; 189 end = 1; 190 } 191 /* NOTREACHED */ 192 } 193 194 195 static char * 196 printf_doformat(char *fmt, int *rval) 197 { 198 static const char skip1[] = "#'-+ 0"; 199 int fieldwidth, haveprec, havewidth, mod_ldbl, precision; 200 char convch, nextch; 201 char start[strlen(fmt) + 1]; 202 char **fargv; 203 char *dptr; 204 int l; 205 206 dptr = start; 207 *dptr++ = '%'; 208 *dptr = 0; 209 210 fmt++; 211 212 /* look for "n$" field index specifier */ 213 l = strspn(fmt, digits); 214 if ((l > 0) && (fmt[l] == '$')) { 215 int idx = atoi(fmt); 216 if (idx <= myargc) { 217 gargv = &myargv[idx - 1]; 218 } else { 219 gargv = &myargv[myargc]; 220 } 221 if (gargv > maxargv) 222 maxargv = gargv; 223 fmt += l + 1; 224 225 /* save format argument */ 226 fargv = gargv; 227 } else { 228 fargv = NULL; 229 } 230 231 /* skip to field width */ 232 while (*fmt && strchr(skip1, *fmt) != NULL) { 233 *dptr++ = *fmt++; 234 *dptr = 0; 235 } 236 237 if (*fmt == '*') { 238 239 fmt++; 240 l = strspn(fmt, digits); 241 if ((l > 0) && (fmt[l] == '$')) { 242 int idx = atoi(fmt); 243 if (fargv == NULL) { 244 warnx("incomplete use of n$"); 245 return (NULL); 246 } 247 if (idx <= myargc) { 248 gargv = &myargv[idx - 1]; 249 } else { 250 gargv = &myargv[myargc]; 251 } 252 fmt += l + 1; 253 } else if (fargv != NULL) { 254 warnx("incomplete use of n$"); 255 return (NULL); 256 } 257 258 if (getint(&fieldwidth)) 259 return (NULL); 260 if (gargv > maxargv) 261 maxargv = gargv; 262 havewidth = 1; 263 264 *dptr++ = '*'; 265 *dptr = 0; 266 } else { 267 havewidth = 0; 268 269 /* skip to possible '.', get following precision */ 270 while (isdigit(*fmt)) { 271 *dptr++ = *fmt++; 272 *dptr = 0; 273 } 274 } 275 276 if (*fmt == '.') { 277 /* precision present? */ 278 fmt++; 279 *dptr++ = '.'; 280 281 if (*fmt == '*') { 282 283 fmt++; 284 l = strspn(fmt, digits); 285 if ((l > 0) && (fmt[l] == '$')) { 286 int idx = atoi(fmt); 287 if (fargv == NULL) { 288 warnx("incomplete use of n$"); 289 return (NULL); 290 } 291 if (idx <= myargc) { 292 gargv = &myargv[idx - 1]; 293 } else { 294 gargv = &myargv[myargc]; 295 } 296 fmt += l + 1; 297 } else if (fargv != NULL) { 298 warnx("incomplete use of n$"); 299 return (NULL); 300 } 301 302 if (getint(&precision)) 303 return (NULL); 304 if (gargv > maxargv) 305 maxargv = gargv; 306 haveprec = 1; 307 *dptr++ = '*'; 308 *dptr = 0; 309 } else { 310 haveprec = 0; 311 312 /* skip to conversion char */ 313 while (isdigit(*fmt)) { 314 *dptr++ = *fmt++; 315 *dptr = 0; 316 } 317 } 318 } else 319 haveprec = 0; 320 if (!*fmt) { 321 warnx("missing format character"); 322 return (NULL); 323 } 324 *dptr++ = *fmt; 325 *dptr = 0; 326 327 /* 328 * Look for a length modifier. POSIX doesn't have these, so 329 * we only support them for floating-point conversions, which 330 * are extensions. This is useful because the L modifier can 331 * be used to gain extra range and precision, while omitting 332 * it is more likely to produce consistent results on different 333 * architectures. This is not so important for integers 334 * because overflow is the only bad thing that can happen to 335 * them, but consider the command printf %a 1.1 336 */ 337 if (*fmt == 'L') { 338 mod_ldbl = 1; 339 fmt++; 340 if (!strchr("aAeEfFgG", *fmt)) { 341 warnx("bad modifier L for %%%c", *fmt); 342 return (NULL); 343 } 344 } else { 345 mod_ldbl = 0; 346 } 347 348 /* save the current arg offset, and set to the format arg */ 349 if (fargv != NULL) { 350 gargv = fargv; 351 } 352 353 convch = *fmt; 354 nextch = *++fmt; 355 356 *fmt = '\0'; 357 switch (convch) { 358 case 'b': { 359 size_t len; 360 char *p; 361 int getout; 362 363 /* Convert "b" to "s" for output. */ 364 start[strlen(start) - 1] = 's'; 365 if ((p = strdup(getstr())) == NULL) { 366 warnx("%s", strerror(ENOMEM)); 367 return (NULL); 368 } 369 getout = escape(p, 0, &len); 370 PF(start, p); 371 /* Restore format for next loop. */ 372 373 free(p); 374 if (getout) 375 return (end_fmt); 376 break; 377 } 378 case 'c': { 379 char p; 380 381 p = getchr(); 382 if (p != '\0') 383 PF(start, p); 384 break; 385 } 386 case 's': { 387 const char *p; 388 389 p = getstr(); 390 PF(start, p); 391 break; 392 } 393 case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': { 394 char *f; 395 intmax_t val; 396 uintmax_t uval; 397 int signedconv; 398 399 signedconv = (convch == 'd' || convch == 'i'); 400 if ((f = mknum(start, convch)) == NULL) 401 return (NULL); 402 if (getnum(&val, &uval, signedconv)) 403 *rval = 1; 404 if (signedconv) 405 PF(f, val); 406 else 407 PF(f, uval); 408 break; 409 } 410 case 'e': case 'E': 411 case 'f': case 'F': 412 case 'g': case 'G': 413 case 'a': case 'A': { 414 long double p; 415 416 if (getfloating(&p, mod_ldbl)) 417 *rval = 1; 418 if (mod_ldbl) 419 PF(start, p); 420 else 421 PF(start, (double)p); 422 break; 423 } 424 default: 425 warnx("illegal format character %c", convch); 426 return (NULL); 427 } 428 *fmt = nextch; 429 /* return the gargv to the next element */ 430 return (fmt); 431 } 432 433 static char * 434 mknum(char *str, char ch) 435 { 436 static char *copy; 437 static size_t copy_size; 438 char *newcopy; 439 size_t len, newlen; 440 441 len = strlen(str) + 2; 442 if (len > copy_size) { 443 newlen = ((len + 1023) >> 10) << 10; 444 if ((newcopy = realloc(copy, newlen)) == NULL) { 445 warnx("%s", strerror(ENOMEM)); 446 return (NULL); 447 } 448 copy = newcopy; 449 copy_size = newlen; 450 } 451 452 memmove(copy, str, len - 3); 453 copy[len - 3] = 'j'; 454 copy[len - 2] = ch; 455 copy[len - 1] = '\0'; 456 return (copy); 457 } 458 459 static int 460 escape(char *fmt, int percent, size_t *len) 461 { 462 char *save, *store, c; 463 int value; 464 465 for (save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store) { 466 if (c != '\\') { 467 *store = c; 468 continue; 469 } 470 switch (*++fmt) { 471 case '\0': /* EOS, user error */ 472 *store = '\\'; 473 *++store = '\0'; 474 *len = store - save; 475 return (0); 476 case '\\': /* backslash */ 477 case '\'': /* single quote */ 478 *store = *fmt; 479 break; 480 case 'a': /* bell/alert */ 481 *store = '\a'; 482 break; 483 case 'b': /* backspace */ 484 *store = '\b'; 485 break; 486 case 'c': 487 if (!percent) { 488 *store = '\0'; 489 *len = store - save; 490 return (1); 491 } 492 *store = 'c'; 493 break; 494 case 'f': /* form-feed */ 495 *store = '\f'; 496 break; 497 case 'n': /* newline */ 498 *store = '\n'; 499 break; 500 case 'r': /* carriage-return */ 501 *store = '\r'; 502 break; 503 case 't': /* horizontal tab */ 504 *store = '\t'; 505 break; 506 case 'v': /* vertical tab */ 507 *store = '\v'; 508 break; 509 /* octal constant */ 510 case '0': case '1': case '2': case '3': 511 case '4': case '5': case '6': case '7': 512 c = (!percent && *fmt == '0') ? 4 : 3; 513 for (value = 0; 514 c-- && *fmt >= '0' && *fmt <= '7'; ++fmt) { 515 value <<= 3; 516 value += *fmt - '0'; 517 } 518 --fmt; 519 if (percent && value == '%') { 520 *store++ = '%'; 521 *store = '%'; 522 } else 523 *store = (char)value; 524 break; 525 default: 526 *store = *fmt; 527 break; 528 } 529 } 530 *store = '\0'; 531 *len = store - save; 532 return (0); 533 } 534 535 static int 536 getchr(void) 537 { 538 if (!*gargv) 539 return ('\0'); 540 return ((int)**gargv++); 541 } 542 543 static const char * 544 getstr(void) 545 { 546 if (!*gargv) 547 return (""); 548 return (*gargv++); 549 } 550 551 static int 552 getint(int *ip) 553 { 554 intmax_t val; 555 uintmax_t uval; 556 int rval; 557 558 if (getnum(&val, &uval, 1)) 559 return (1); 560 rval = 0; 561 if (val < INT_MIN || val > INT_MAX) { 562 warnx("%s: %s", *gargv, strerror(ERANGE)); 563 rval = 1; 564 } 565 *ip = (int)val; 566 return (rval); 567 } 568 569 static int 570 getnum(intmax_t *ip, uintmax_t *uip, int signedconv) 571 { 572 char *ep; 573 int rval; 574 575 if (!*gargv) { 576 *ip = *uip = 0; 577 return (0); 578 } 579 if (**gargv == '"' || **gargv == '\'') { 580 if (signedconv) 581 *ip = asciicode(); 582 else 583 *uip = asciicode(); 584 return (0); 585 } 586 rval = 0; 587 errno = 0; 588 if (signedconv) 589 *ip = strtoimax(*gargv, &ep, 0); 590 else 591 *uip = strtoumax(*gargv, &ep, 0); 592 if (ep == *gargv) { 593 warnx("%s: expected numeric value", *gargv); 594 rval = 1; 595 } 596 else if (*ep != '\0') { 597 warnx("%s: not completely converted", *gargv); 598 rval = 1; 599 } 600 if (errno == ERANGE) { 601 warnx("%s: %s", *gargv, strerror(ERANGE)); 602 rval = 1; 603 } 604 ++gargv; 605 return (rval); 606 } 607 608 static int 609 getfloating(long double *dp, int mod_ldbl) 610 { 611 char *ep; 612 int rval; 613 614 if (!*gargv) { 615 *dp = 0.0; 616 return (0); 617 } 618 if (**gargv == '"' || **gargv == '\'') { 619 *dp = asciicode(); 620 return (0); 621 } 622 rval = 0; 623 errno = 0; 624 if (mod_ldbl) 625 *dp = strtold(*gargv, &ep); 626 else 627 *dp = strtod(*gargv, &ep); 628 if (ep == *gargv) { 629 warnx("%s: expected numeric value", *gargv); 630 rval = 1; 631 } else if (*ep != '\0') { 632 warnx("%s: not completely converted", *gargv); 633 rval = 1; 634 } 635 if (errno == ERANGE) { 636 warnx("%s: %s", *gargv, strerror(ERANGE)); 637 rval = 1; 638 } 639 ++gargv; 640 return (rval); 641 } 642 643 static int 644 asciicode(void) 645 { 646 int ch; 647 wchar_t wch; 648 mbstate_t mbs; 649 650 ch = (unsigned char)**gargv; 651 if (ch == '\'' || ch == '"') { 652 memset(&mbs, 0, sizeof(mbs)); 653 switch (mbrtowc(&wch, *gargv + 1, MB_LEN_MAX, &mbs)) { 654 case (size_t)-2: 655 case (size_t)-1: 656 wch = (unsigned char)gargv[0][1]; 657 break; 658 case 0: 659 wch = 0; 660 break; 661 } 662 ch = wch; 663 } 664 ++gargv; 665 return (ch); 666 } 667 668 static void 669 usage(void) 670 { 671 (void)fprintf(stderr, "usage: printf format [arguments ...]\n"); 672 } 673