1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Copyright (c) 2011 The FreeBSD Foundation 8 * 9 * Copyright (c) 2023 Dag-Erling Smørgrav 10 * 11 * Portions of this software were developed by David Chisnall 12 * under sponsorship from the FreeBSD Foundation. 13 * 14 * This code is derived from software contributed to Berkeley by 15 * Chris Torek. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 3. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 */ 41 42 #if defined(LIBC_SCCS) && !defined(lint) 43 static char sccsid[] = "@(#)vfscanf.c 8.1 (Berkeley) 6/4/93"; 44 #endif /* LIBC_SCCS and not lint */ 45 #include <sys/cdefs.h> 46 #include "namespace.h" 47 #include <ctype.h> 48 #include <inttypes.h> 49 #include <stdio.h> 50 #include <stdlib.h> 51 #include <stddef.h> 52 #include <stdarg.h> 53 #include <string.h> 54 #include <wchar.h> 55 #include <wctype.h> 56 #include "un-namespace.h" 57 58 #include "collate.h" 59 #include "libc_private.h" 60 #include "local.h" 61 #include "xlocale_private.h" 62 63 #ifndef NO_FLOATING_POINT 64 #include <locale.h> 65 #endif 66 67 #define BUF 513 /* Maximum length of numeric string. */ 68 69 /* 70 * Flags used during conversion. 71 */ 72 #define LONG 0x01 /* l: long or double */ 73 #define LONGDBL 0x02 /* L: long double */ 74 #define SHORT 0x04 /* h: short */ 75 #define SUPPRESS 0x08 /* *: suppress assignment */ 76 #define POINTER 0x10 /* p: void * (as hex) */ 77 #define NOSKIP 0x20 /* [ or c: do not skip blanks */ 78 #define FASTINT 0x200 /* wfN: int_fastN_t */ 79 #define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */ 80 #define INTMAXT 0x800 /* j: intmax_t */ 81 #define PTRDIFFT 0x1000 /* t: ptrdiff_t */ 82 #define SIZET 0x2000 /* z: size_t */ 83 #define SHORTSHORT 0x4000 /* hh: char */ 84 #define UNSIGNED 0x8000 /* %[oupxX] conversions */ 85 86 /* 87 * Conversion types. 88 */ 89 #define CT_CHAR 0 /* %c conversion */ 90 #define CT_CCL 1 /* %[...] conversion */ 91 #define CT_STRING 2 /* %s conversion */ 92 #define CT_INT 3 /* %[dioupxX] conversion */ 93 #define CT_FLOAT 4 /* %[efgEFG] conversion */ 94 95 static const u_char *__sccl(char *, const u_char *); 96 #ifndef NO_FLOATING_POINT 97 static int parsefloat(FILE *, char *, char *, locale_t); 98 #endif 99 100 __weak_reference(__vfscanf, vfscanf); 101 102 /* 103 * Conversion functions are passed a pointer to this object instead of 104 * a real parameter to indicate that the assignment-suppression (*) 105 * flag was specified. We could use a NULL pointer to indicate this, 106 * but that would mask bugs in applications that call scanf() with a 107 * NULL pointer. 108 */ 109 static const int suppress; 110 #define SUPPRESS_PTR ((void *)&suppress) 111 112 static const mbstate_t initial_mbs; 113 114 /* 115 * The following conversion functions return the number of characters consumed, 116 * or -1 on input failure. Character class conversion returns 0 on match 117 * failure. 118 */ 119 120 static __inline int 121 convert_char(FILE *fp, char * p, int width) 122 { 123 int n; 124 125 if (p == SUPPRESS_PTR) { 126 size_t sum = 0; 127 for (;;) { 128 if ((n = fp->_r) < width) { 129 sum += n; 130 width -= n; 131 fp->_p += n; 132 if (__srefill(fp)) { 133 if (sum == 0) 134 return (-1); 135 break; 136 } 137 } else { 138 sum += width; 139 fp->_r -= width; 140 fp->_p += width; 141 break; 142 } 143 } 144 return (sum); 145 } else { 146 size_t r = __fread(p, 1, width, fp); 147 148 if (r == 0) 149 return (-1); 150 return (r); 151 } 152 } 153 154 static __inline int 155 convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale) 156 { 157 mbstate_t mbs; 158 int n, nread; 159 wint_t wi; 160 161 mbs = initial_mbs; 162 n = 0; 163 while (width-- != 0 && 164 (wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF) { 165 if (wcp != SUPPRESS_PTR) 166 *wcp++ = (wchar_t)wi; 167 n += nread; 168 } 169 if (n == 0) 170 return (-1); 171 return (n); 172 } 173 174 static __inline int 175 convert_ccl(FILE *fp, char * p, int width, const char *ccltab) 176 { 177 char *p0; 178 int n; 179 180 if (p == SUPPRESS_PTR) { 181 n = 0; 182 while (ccltab[*fp->_p]) { 183 n++, fp->_r--, fp->_p++; 184 if (--width == 0) 185 break; 186 if (fp->_r <= 0 && __srefill(fp)) { 187 if (n == 0) 188 return (-1); 189 break; 190 } 191 } 192 } else { 193 p0 = p; 194 while (ccltab[*fp->_p]) { 195 fp->_r--; 196 *p++ = *fp->_p++; 197 if (--width == 0) 198 break; 199 if (fp->_r <= 0 && __srefill(fp)) { 200 if (p == p0) 201 return (-1); 202 break; 203 } 204 } 205 n = p - p0; 206 if (n == 0) 207 return (0); 208 *p = 0; 209 } 210 return (n); 211 } 212 213 static __inline int 214 convert_wccl(FILE *fp, wchar_t *wcp, int width, const char *ccltab, 215 locale_t locale) 216 { 217 mbstate_t mbs; 218 wint_t wi; 219 int n, nread; 220 221 mbs = initial_mbs; 222 n = 0; 223 if (wcp == SUPPRESS_PTR) { 224 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 225 width-- != 0 && ccltab[wctob(wi)]) 226 n += nread; 227 if (wi != WEOF) 228 __ungetwc(wi, fp, __get_locale()); 229 } else { 230 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 231 width-- != 0 && ccltab[wctob(wi)]) { 232 *wcp++ = (wchar_t)wi; 233 n += nread; 234 } 235 if (wi != WEOF) 236 __ungetwc(wi, fp, __get_locale()); 237 if (n == 0) 238 return (0); 239 *wcp = 0; 240 } 241 return (n); 242 } 243 244 static __inline int 245 convert_string(FILE *fp, char * p, int width) 246 { 247 char *p0; 248 int n; 249 250 if (p == SUPPRESS_PTR) { 251 n = 0; 252 while (!isspace(*fp->_p)) { 253 n++, fp->_r--, fp->_p++; 254 if (--width == 0) 255 break; 256 if (fp->_r <= 0 && __srefill(fp)) 257 break; 258 } 259 } else { 260 p0 = p; 261 while (!isspace(*fp->_p)) { 262 fp->_r--; 263 *p++ = *fp->_p++; 264 if (--width == 0) 265 break; 266 if (fp->_r <= 0 && __srefill(fp)) 267 break; 268 } 269 *p = 0; 270 n = p - p0; 271 } 272 return (n); 273 } 274 275 static __inline int 276 convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale) 277 { 278 mbstate_t mbs; 279 wint_t wi; 280 int n, nread; 281 282 mbs = initial_mbs; 283 n = 0; 284 if (wcp == SUPPRESS_PTR) { 285 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 286 width-- != 0 && !iswspace(wi)) 287 n += nread; 288 if (wi != WEOF) 289 __ungetwc(wi, fp, __get_locale()); 290 } else { 291 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 292 width-- != 0 && !iswspace(wi)) { 293 *wcp++ = (wchar_t)wi; 294 n += nread; 295 } 296 if (wi != WEOF) 297 __ungetwc(wi, fp, __get_locale()); 298 *wcp = '\0'; 299 } 300 return (n); 301 } 302 303 enum parseint_state { 304 begin, 305 havesign, 306 havezero, 307 haveprefix, 308 any, 309 }; 310 311 static __inline int 312 parseint_fsm(int c, enum parseint_state *state, int *base) 313 { 314 switch (c) { 315 case '+': 316 case '-': 317 if (*state == begin) { 318 *state = havesign; 319 return 1; 320 } 321 break; 322 case '0': 323 if (*state == begin || *state == havesign) { 324 *state = havezero; 325 } else { 326 *state = any; 327 } 328 return 1; 329 case '1': 330 case '2': 331 case '3': 332 case '4': 333 case '5': 334 case '6': 335 case '7': 336 if (*state == havezero && *base == 0) { 337 *base = 8; 338 } 339 /* FALL THROUGH */ 340 case '8': 341 case '9': 342 if (*state == begin || 343 *state == havesign) { 344 if (*base == 0) { 345 *base = 10; 346 } 347 } 348 if (*state == begin || 349 *state == havesign || 350 *state == havezero || 351 *state == haveprefix || 352 *state == any) { 353 if (*base > c - '0') { 354 *state = any; 355 return 1; 356 } 357 } 358 break; 359 case 'b': 360 if (*state == havezero) { 361 if (*base == 0 || *base == 2) { 362 *state = haveprefix; 363 *base = 2; 364 return 1; 365 } 366 } 367 /* FALL THROUGH */ 368 case 'a': 369 case 'c': 370 case 'd': 371 case 'e': 372 case 'f': 373 if (*state == begin || 374 *state == havesign || 375 *state == havezero || 376 *state == haveprefix || 377 *state == any) { 378 if (*base > c - 'a' + 10) { 379 *state = any; 380 return 1; 381 } 382 } 383 break; 384 case 'B': 385 if (*state == havezero) { 386 if (*base == 0 || *base == 2) { 387 *state = haveprefix; 388 *base = 2; 389 return 1; 390 } 391 } 392 /* FALL THROUGH */ 393 case 'A': 394 case 'C': 395 case 'D': 396 case 'E': 397 case 'F': 398 if (*state == begin || 399 *state == havesign || 400 *state == havezero || 401 *state == haveprefix || 402 *state == any) { 403 if (*base > c - 'A' + 10) { 404 *state = any; 405 return 1; 406 } 407 } 408 break; 409 case 'x': 410 case 'X': 411 if (*state == havezero) { 412 if (*base == 0 || *base == 16) { 413 *state = haveprefix; 414 *base = 16; 415 return 1; 416 } 417 } 418 break; 419 } 420 return 0; 421 } 422 423 /* 424 * Read an integer, storing it in buf. 425 * 426 * Return 0 on a match failure, and the number of characters read 427 * otherwise. 428 */ 429 static __inline int 430 parseint(FILE *fp, char * __restrict buf, int width, int base) 431 { 432 enum parseint_state state = begin; 433 char *p; 434 int c; 435 436 for (p = buf; width; width--) { 437 c = __sgetc(fp); 438 if (c == EOF) 439 break; 440 if (!parseint_fsm(c, &state, &base)) 441 break; 442 *p++ = c; 443 } 444 /* 445 * If we only had a sign, push it back. If we only had a 0b or 0x 446 * prefix (possibly preceded by a sign), we view it as "0" and 447 * push back the letter. In all other cases, if we stopped 448 * because we read a non-number character, push it back. 449 */ 450 if (state == havesign) { 451 p--; 452 (void) __ungetc(*(u_char *)p, fp); 453 } else if (state == haveprefix) { 454 p--; 455 (void) __ungetc(c, fp); 456 } else if (width && c != EOF) { 457 (void) __ungetc(c, fp); 458 } 459 return (p - buf); 460 } 461 462 /* 463 * __vfscanf - MT-safe version 464 */ 465 int 466 __vfscanf(FILE *fp, char const *fmt0, va_list ap) 467 { 468 int ret; 469 470 FLOCKFILE_CANCELSAFE(fp); 471 ret = __svfscanf(fp, __get_locale(), fmt0, ap); 472 FUNLOCKFILE_CANCELSAFE(); 473 return (ret); 474 } 475 int 476 vfscanf_l(FILE *fp, locale_t locale, char const *fmt0, va_list ap) 477 { 478 int ret; 479 FIX_LOCALE(locale); 480 481 FLOCKFILE_CANCELSAFE(fp); 482 ret = __svfscanf(fp, locale, fmt0, ap); 483 FUNLOCKFILE_CANCELSAFE(); 484 return (ret); 485 } 486 487 /* 488 * __svfscanf - non-MT-safe version of __vfscanf 489 */ 490 int 491 __svfscanf(FILE *fp, locale_t locale, const char *fmt0, va_list ap) 492 { 493 #define GETARG(type) ((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type)) 494 const u_char *fmt = (const u_char *)fmt0; 495 int c; /* character from format, or conversion */ 496 size_t width; /* field width, or 0 */ 497 int flags; /* flags as defined above */ 498 int nassigned; /* number of fields assigned */ 499 int nconversions; /* number of conversions */ 500 int nr; /* characters read by the current conversion */ 501 int nread; /* number of characters consumed from fp */ 502 int base; /* base argument to conversion function */ 503 char ccltab[256]; /* character class table for %[...] */ 504 char buf[BUF]; /* buffer for numeric conversions */ 505 506 ORIENT(fp, -1); 507 508 nassigned = 0; 509 nconversions = 0; 510 nread = 0; 511 for (;;) { 512 c = *fmt++; 513 if (c == 0) 514 return (nassigned); 515 if (isspace(c)) { 516 while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p)) 517 nread++, fp->_r--, fp->_p++; 518 continue; 519 } 520 if (c != '%') 521 goto literal; 522 width = 0; 523 flags = 0; 524 /* 525 * switch on the format. continue if done; 526 * break once format type is derived. 527 */ 528 again: c = *fmt++; 529 switch (c) { 530 case '%': 531 literal: 532 if (fp->_r <= 0 && __srefill(fp)) 533 goto input_failure; 534 if (*fp->_p != c) 535 goto match_failure; 536 fp->_r--, fp->_p++; 537 nread++; 538 continue; 539 540 case '*': 541 flags |= SUPPRESS; 542 goto again; 543 case 'j': 544 flags |= INTMAXT; 545 goto again; 546 case 'l': 547 if (flags & LONG) { 548 flags &= ~LONG; 549 flags |= LONGLONG; 550 } else 551 flags |= LONG; 552 goto again; 553 case 'q': 554 flags |= LONGLONG; /* not quite */ 555 goto again; 556 case 't': 557 flags |= PTRDIFFT; 558 goto again; 559 case 'w': 560 /* 561 * Fixed-width integer types. On all platforms we 562 * support, int8_t is equivalent to char, int16_t 563 * is equivalent to short, int32_t is equivalent 564 * to int, int64_t is equivalent to long long int. 565 * Furthermore, int_fast8_t, int_fast16_t and 566 * int_fast32_t are equivalent to int, and 567 * int_fast64_t is equivalent to long long int. 568 */ 569 flags &= ~(SHORTSHORT|SHORT|LONG|LONGLONG|SIZET|INTMAXT|PTRDIFFT); 570 if (fmt[0] == 'f') { 571 flags |= FASTINT; 572 fmt++; 573 } else { 574 flags &= ~FASTINT; 575 } 576 if (fmt[0] == '8') { 577 if (!(flags & FASTINT)) 578 flags |= SHORTSHORT; 579 else 580 /* no flag set = 32 */ ; 581 fmt += 1; 582 } else if (fmt[0] == '1' && fmt[1] == '6') { 583 if (!(flags & FASTINT)) 584 flags |= SHORT; 585 else 586 /* no flag set = 32 */ ; 587 fmt += 2; 588 } else if (fmt[0] == '3' && fmt[1] == '2') { 589 /* no flag set = 32 */ ; 590 fmt += 2; 591 } else if (fmt[0] == '6' && fmt[1] == '4') { 592 flags |= LONGLONG; 593 fmt += 2; 594 } else { 595 goto match_failure; 596 } 597 goto again; 598 case 'z': 599 flags |= SIZET; 600 goto again; 601 case 'L': 602 flags |= LONGDBL; 603 goto again; 604 case 'h': 605 if (flags & SHORT) { 606 flags &= ~SHORT; 607 flags |= SHORTSHORT; 608 } else 609 flags |= SHORT; 610 goto again; 611 612 case '0': case '1': case '2': case '3': case '4': 613 case '5': case '6': case '7': case '8': case '9': 614 width = width * 10 + c - '0'; 615 goto again; 616 617 /* 618 * Conversions. 619 */ 620 case 'B': 621 case 'b': 622 c = CT_INT; 623 flags |= UNSIGNED; 624 base = 2; 625 break; 626 627 case 'd': 628 c = CT_INT; 629 base = 10; 630 break; 631 632 case 'i': 633 c = CT_INT; 634 base = 0; 635 break; 636 637 case 'o': 638 c = CT_INT; 639 flags |= UNSIGNED; 640 base = 8; 641 break; 642 643 case 'u': 644 c = CT_INT; 645 flags |= UNSIGNED; 646 base = 10; 647 break; 648 649 case 'X': 650 case 'x': 651 c = CT_INT; 652 flags |= UNSIGNED; 653 base = 16; 654 break; 655 656 #ifndef NO_FLOATING_POINT 657 case 'A': case 'E': case 'F': case 'G': 658 case 'a': case 'e': case 'f': case 'g': 659 c = CT_FLOAT; 660 break; 661 #endif 662 663 case 'S': 664 flags |= LONG; 665 /* FALLTHROUGH */ 666 case 's': 667 c = CT_STRING; 668 break; 669 670 case '[': 671 fmt = __sccl(ccltab, fmt); 672 flags |= NOSKIP; 673 c = CT_CCL; 674 break; 675 676 case 'C': 677 flags |= LONG; 678 /* FALLTHROUGH */ 679 case 'c': 680 flags |= NOSKIP; 681 c = CT_CHAR; 682 break; 683 684 case 'p': /* pointer format is like hex */ 685 flags |= POINTER; 686 c = CT_INT; /* assumes sizeof(uintmax_t) */ 687 flags |= UNSIGNED; /* >= sizeof(uintptr_t) */ 688 base = 16; 689 break; 690 691 case 'n': 692 if (flags & SUPPRESS) /* ??? */ 693 continue; 694 if (flags & SHORTSHORT) 695 *va_arg(ap, char *) = nread; 696 else if (flags & SHORT) 697 *va_arg(ap, short *) = nread; 698 else if (flags & LONG) 699 *va_arg(ap, long *) = nread; 700 else if (flags & LONGLONG) 701 *va_arg(ap, long long *) = nread; 702 else if (flags & INTMAXT) 703 *va_arg(ap, intmax_t *) = nread; 704 else if (flags & SIZET) 705 *va_arg(ap, size_t *) = nread; 706 else if (flags & PTRDIFFT) 707 *va_arg(ap, ptrdiff_t *) = nread; 708 else 709 *va_arg(ap, int *) = nread; 710 continue; 711 712 default: 713 goto match_failure; 714 715 /* 716 * Disgusting backwards compatibility hack. XXX 717 */ 718 case '\0': /* compat */ 719 return (EOF); 720 } 721 722 /* 723 * We have a conversion that requires input. 724 */ 725 if (fp->_r <= 0 && __srefill(fp)) 726 goto input_failure; 727 728 /* 729 * Consume leading white space, except for formats 730 * that suppress this. 731 */ 732 if ((flags & NOSKIP) == 0) { 733 while (isspace(*fp->_p)) { 734 nread++; 735 if (--fp->_r > 0) 736 fp->_p++; 737 else if (__srefill(fp)) 738 goto input_failure; 739 } 740 /* 741 * Note that there is at least one character in 742 * the buffer, so conversions that do not set NOSKIP 743 * ca no longer result in an input failure. 744 */ 745 } 746 747 /* 748 * Do the conversion. 749 */ 750 switch (c) { 751 752 case CT_CHAR: 753 /* scan arbitrary characters (sets NOSKIP) */ 754 if (width == 0) 755 width = 1; 756 if (flags & LONG) { 757 nr = convert_wchar(fp, GETARG(wchar_t *), 758 width, locale); 759 } else { 760 nr = convert_char(fp, GETARG(char *), width); 761 } 762 if (nr < 0) 763 goto input_failure; 764 break; 765 766 case CT_CCL: 767 /* scan a (nonempty) character class (sets NOSKIP) */ 768 if (width == 0) 769 width = (size_t)~0; /* `infinity' */ 770 if (flags & LONG) { 771 nr = convert_wccl(fp, GETARG(wchar_t *), width, 772 ccltab, locale); 773 } else { 774 nr = convert_ccl(fp, GETARG(char *), width, 775 ccltab); 776 } 777 if (nr <= 0) { 778 if (nr < 0) 779 goto input_failure; 780 else /* nr == 0 */ 781 goto match_failure; 782 } 783 break; 784 785 case CT_STRING: 786 /* like CCL, but zero-length string OK, & no NOSKIP */ 787 if (width == 0) 788 width = (size_t)~0; 789 if (flags & LONG) { 790 nr = convert_wstring(fp, GETARG(wchar_t *), 791 width, locale); 792 } else { 793 nr = convert_string(fp, GETARG(char *), width); 794 } 795 if (nr < 0) 796 goto input_failure; 797 break; 798 799 case CT_INT: 800 /* scan an integer as if by the conversion function */ 801 #ifdef hardway 802 if (width == 0 || width > sizeof(buf) - 1) 803 width = sizeof(buf) - 1; 804 #else 805 /* size_t is unsigned, hence this optimisation */ 806 if (--width > sizeof(buf) - 2) 807 width = sizeof(buf) - 2; 808 width++; 809 #endif 810 nr = parseint(fp, buf, width, base); 811 if (nr == 0) 812 goto match_failure; 813 if ((flags & SUPPRESS) == 0) { 814 uintmax_t res; 815 816 buf[nr] = '\0'; 817 if ((flags & UNSIGNED) == 0) 818 res = strtoimax_l(buf, (char **)NULL, base, locale); 819 else 820 res = strtoumax_l(buf, (char **)NULL, base, locale); 821 if (flags & POINTER) 822 *va_arg(ap, void **) = 823 (void *)(uintptr_t)res; 824 else if (flags & SHORTSHORT) 825 *va_arg(ap, char *) = res; 826 else if (flags & SHORT) 827 *va_arg(ap, short *) = res; 828 else if (flags & LONG) 829 *va_arg(ap, long *) = res; 830 else if (flags & LONGLONG) 831 *va_arg(ap, long long *) = res; 832 else if (flags & INTMAXT) 833 *va_arg(ap, intmax_t *) = res; 834 else if (flags & PTRDIFFT) 835 *va_arg(ap, ptrdiff_t *) = res; 836 else if (flags & SIZET) 837 *va_arg(ap, size_t *) = res; 838 else 839 *va_arg(ap, int *) = res; 840 } 841 break; 842 843 #ifndef NO_FLOATING_POINT 844 case CT_FLOAT: 845 /* scan a floating point number as if by strtod */ 846 if (width == 0 || width > sizeof(buf) - 1) 847 width = sizeof(buf) - 1; 848 nr = parsefloat(fp, buf, buf + width, locale); 849 if (nr == 0) 850 goto match_failure; 851 if ((flags & SUPPRESS) == 0) { 852 if (flags & LONGDBL) { 853 long double res = strtold_l(buf, NULL, 854 locale); 855 *va_arg(ap, long double *) = res; 856 } else if (flags & LONG) { 857 double res = strtod_l(buf, NULL, 858 locale); 859 *va_arg(ap, double *) = res; 860 } else { 861 float res = strtof_l(buf, NULL, locale); 862 *va_arg(ap, float *) = res; 863 } 864 } 865 break; 866 #endif /* !NO_FLOATING_POINT */ 867 } 868 if (!(flags & SUPPRESS)) 869 nassigned++; 870 nread += nr; 871 nconversions++; 872 } 873 input_failure: 874 return (nconversions != 0 ? nassigned : EOF); 875 match_failure: 876 return (nassigned); 877 } 878 879 /* 880 * Fill in the given table from the scanset at the given format 881 * (just after `['). Return a pointer to the character past the 882 * closing `]'. The table has a 1 wherever characters should be 883 * considered part of the scanset. 884 */ 885 static const u_char * 886 __sccl(char *tab, const u_char *fmt) 887 { 888 int c, n, v, i; 889 struct xlocale_collate *table = 890 (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; 891 892 /* first `clear' the whole table */ 893 c = *fmt++; /* first char hat => negated scanset */ 894 if (c == '^') { 895 v = 1; /* default => accept */ 896 c = *fmt++; /* get new first char */ 897 } else 898 v = 0; /* default => reject */ 899 900 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ 901 (void) memset(tab, v, 256); 902 903 if (c == 0) 904 return (fmt - 1);/* format ended before closing ] */ 905 906 /* 907 * Now set the entries corresponding to the actual scanset 908 * to the opposite of the above. 909 * 910 * The first character may be ']' (or '-') without being special; 911 * the last character may be '-'. 912 */ 913 v = 1 - v; 914 for (;;) { 915 tab[c] = v; /* take character c */ 916 doswitch: 917 n = *fmt++; /* and examine the next */ 918 switch (n) { 919 920 case 0: /* format ended too soon */ 921 return (fmt - 1); 922 923 case '-': 924 /* 925 * A scanset of the form 926 * [01+-] 927 * is defined as `the digit 0, the digit 1, 928 * the character +, the character -', but 929 * the effect of a scanset such as 930 * [a-zA-Z0-9] 931 * is implementation defined. The V7 Unix 932 * scanf treats `a-z' as `the letters a through 933 * z', but treats `a-a' as `the letter a, the 934 * character -, and the letter a'. 935 * 936 * For compatibility, the `-' is not considered 937 * to define a range if the character following 938 * it is either a close bracket (required by ANSI) 939 * or is not numerically greater than the character 940 * we just stored in the table (c). 941 */ 942 n = *fmt; 943 if (n == ']' 944 || (table->__collate_load_error ? n < c : 945 __collate_range_cmp(n, c) < 0 946 ) 947 ) { 948 c = '-'; 949 break; /* resume the for(;;) */ 950 } 951 fmt++; 952 /* fill in the range */ 953 if (table->__collate_load_error) { 954 do { 955 tab[++c] = v; 956 } while (c < n); 957 } else { 958 for (i = 0; i < 256; i ++) 959 if (__collate_range_cmp(c, i) <= 0 && 960 __collate_range_cmp(i, n) <= 0 961 ) 962 tab[i] = v; 963 } 964 #if 1 /* XXX another disgusting compatibility hack */ 965 c = n; 966 /* 967 * Alas, the V7 Unix scanf also treats formats 968 * such as [a-c-e] as `the letters a through e'. 969 * This too is permitted by the standard.... 970 */ 971 goto doswitch; 972 #else 973 c = *fmt++; 974 if (c == 0) 975 return (fmt - 1); 976 if (c == ']') 977 return (fmt); 978 #endif 979 break; 980 981 case ']': /* end of scanset */ 982 return (fmt); 983 984 default: /* just another character */ 985 c = n; 986 break; 987 } 988 } 989 /* NOTREACHED */ 990 } 991 992 #ifndef NO_FLOATING_POINT 993 static int 994 parsefloat(FILE *fp, char *buf, char *end, locale_t locale) 995 { 996 char *commit, *p; 997 int infnanpos = 0, decptpos = 0; 998 enum { 999 S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX, 1000 S_DIGITS, S_DECPT, S_FRAC, S_EXP, S_EXPDIGITS 1001 } state = S_START; 1002 unsigned char c; 1003 const char *decpt = localeconv_l(locale)->decimal_point; 1004 _Bool gotmantdig = 0, ishex = 0; 1005 1006 /* 1007 * We set commit = p whenever the string we have read so far 1008 * constitutes a valid representation of a floating point 1009 * number by itself. At some point, the parse will complete 1010 * or fail, and we will ungetc() back to the last commit point. 1011 * To ensure that the file offset gets updated properly, it is 1012 * always necessary to read at least one character that doesn't 1013 * match; thus, we can't short-circuit "infinity" or "nan(...)". 1014 */ 1015 commit = buf - 1; 1016 for (p = buf; p < end; ) { 1017 c = *fp->_p; 1018 reswitch: 1019 switch (state) { 1020 case S_START: 1021 state = S_GOTSIGN; 1022 if (c == '-' || c == '+') 1023 break; 1024 else 1025 goto reswitch; 1026 case S_GOTSIGN: 1027 switch (c) { 1028 case '0': 1029 state = S_MAYBEHEX; 1030 commit = p; 1031 break; 1032 case 'I': 1033 case 'i': 1034 state = S_INF; 1035 break; 1036 case 'N': 1037 case 'n': 1038 state = S_NAN; 1039 break; 1040 default: 1041 state = S_DIGITS; 1042 goto reswitch; 1043 } 1044 break; 1045 case S_INF: 1046 if (infnanpos > 6 || 1047 (c != "nfinity"[infnanpos] && 1048 c != "NFINITY"[infnanpos])) 1049 goto parsedone; 1050 if (infnanpos == 1 || infnanpos == 6) 1051 commit = p; /* inf or infinity */ 1052 infnanpos++; 1053 break; 1054 case S_NAN: 1055 switch (infnanpos) { 1056 case 0: 1057 if (c != 'A' && c != 'a') 1058 goto parsedone; 1059 break; 1060 case 1: 1061 if (c != 'N' && c != 'n') 1062 goto parsedone; 1063 else 1064 commit = p; 1065 break; 1066 case 2: 1067 if (c != '(') 1068 goto parsedone; 1069 break; 1070 default: 1071 if (c == ')') { 1072 commit = p; 1073 state = S_DONE; 1074 } else if (!isalnum(c) && c != '_') 1075 goto parsedone; 1076 break; 1077 } 1078 infnanpos++; 1079 break; 1080 case S_DONE: 1081 goto parsedone; 1082 case S_MAYBEHEX: 1083 state = S_DIGITS; 1084 if (c == 'X' || c == 'x') { 1085 ishex = 1; 1086 break; 1087 } else { /* we saw a '0', but no 'x' */ 1088 gotmantdig = 1; 1089 goto reswitch; 1090 } 1091 case S_DIGITS: 1092 if ((ishex && isxdigit(c)) || isdigit(c)) { 1093 gotmantdig = 1; 1094 commit = p; 1095 break; 1096 } else { 1097 state = S_DECPT; 1098 goto reswitch; 1099 } 1100 case S_DECPT: 1101 if (c == decpt[decptpos]) { 1102 if (decpt[++decptpos] == '\0') { 1103 /* We read the complete decpt seq. */ 1104 state = S_FRAC; 1105 if (gotmantdig) 1106 commit = p; 1107 } 1108 break; 1109 } else if (!decptpos) { 1110 /* We didn't read any decpt characters. */ 1111 state = S_FRAC; 1112 goto reswitch; 1113 } else { 1114 /* 1115 * We read part of a multibyte decimal point, 1116 * but the rest is invalid, so bail. 1117 */ 1118 goto parsedone; 1119 } 1120 case S_FRAC: 1121 if (((c == 'E' || c == 'e') && !ishex) || 1122 ((c == 'P' || c == 'p') && ishex)) { 1123 if (!gotmantdig) 1124 goto parsedone; 1125 else 1126 state = S_EXP; 1127 } else if ((ishex && isxdigit(c)) || isdigit(c)) { 1128 commit = p; 1129 gotmantdig = 1; 1130 } else 1131 goto parsedone; 1132 break; 1133 case S_EXP: 1134 state = S_EXPDIGITS; 1135 if (c == '-' || c == '+') 1136 break; 1137 else 1138 goto reswitch; 1139 case S_EXPDIGITS: 1140 if (isdigit(c)) 1141 commit = p; 1142 else 1143 goto parsedone; 1144 break; 1145 default: 1146 abort(); 1147 } 1148 *p++ = c; 1149 if (--fp->_r > 0) 1150 fp->_p++; 1151 else if (__srefill(fp)) 1152 break; /* EOF */ 1153 } 1154 1155 parsedone: 1156 while (commit < --p) 1157 __ungetc(*(u_char *)p, fp); 1158 *++commit = '\0'; 1159 return (commit - buf); 1160 } 1161 #endif 1162