1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Copyright (c) 2011 The FreeBSD Foundation 8 * 9 * Copyright (c) 2023 Dag-Erling Smørgrav 10 * 11 * Portions of this software were developed by David Chisnall 12 * under sponsorship from the FreeBSD Foundation. 13 * 14 * This code is derived from software contributed to Berkeley by 15 * Chris Torek. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 3. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 */ 41 42 #if defined(LIBC_SCCS) && !defined(lint) 43 static char sccsid[] = "@(#)vfscanf.c 8.1 (Berkeley) 6/4/93"; 44 #endif /* LIBC_SCCS and not lint */ 45 #include "namespace.h" 46 #include <ctype.h> 47 #include <inttypes.h> 48 #include <stdio.h> 49 #include <stdlib.h> 50 #include <stddef.h> 51 #include <stdarg.h> 52 #include <string.h> 53 #include <wchar.h> 54 #include <wctype.h> 55 #include "un-namespace.h" 56 57 #include "collate.h" 58 #include "libc_private.h" 59 #include "local.h" 60 #include "xlocale_private.h" 61 62 #ifndef NO_FLOATING_POINT 63 #include <locale.h> 64 #endif 65 66 #define BUF 513 /* Maximum length of numeric string. */ 67 68 /* 69 * Flags used during conversion. 70 */ 71 #define LONG 0x01 /* l: long or double */ 72 #define LONGDBL 0x02 /* L: long double */ 73 #define SHORT 0x04 /* h: short */ 74 #define SUPPRESS 0x08 /* *: suppress assignment */ 75 #define POINTER 0x10 /* p: void * (as hex) */ 76 #define NOSKIP 0x20 /* [ or c: do not skip blanks */ 77 #define FASTINT 0x200 /* wfN: int_fastN_t */ 78 #define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */ 79 #define INTMAXT 0x800 /* j: intmax_t */ 80 #define PTRDIFFT 0x1000 /* t: ptrdiff_t */ 81 #define SIZET 0x2000 /* z: size_t */ 82 #define SHORTSHORT 0x4000 /* hh: char */ 83 #define UNSIGNED 0x8000 /* %[oupxX] conversions */ 84 85 /* 86 * Conversion types. 87 */ 88 #define CT_CHAR 0 /* %c conversion */ 89 #define CT_CCL 1 /* %[...] conversion */ 90 #define CT_STRING 2 /* %s conversion */ 91 #define CT_INT 3 /* %[dioupxX] conversion */ 92 #define CT_FLOAT 4 /* %[efgEFG] conversion */ 93 94 static const u_char *__sccl(char *, const u_char *); 95 #ifndef NO_FLOATING_POINT 96 static int parsefloat(FILE *, char *, char *, locale_t); 97 #endif 98 99 __weak_reference(__vfscanf, vfscanf); 100 101 /* 102 * Conversion functions are passed a pointer to this object instead of 103 * a real parameter to indicate that the assignment-suppression (*) 104 * flag was specified. We could use a NULL pointer to indicate this, 105 * but that would mask bugs in applications that call scanf() with a 106 * NULL pointer. 107 */ 108 static const int suppress; 109 #define SUPPRESS_PTR ((void *)&suppress) 110 111 static const mbstate_t initial_mbs; 112 113 /* 114 * The following conversion functions return the number of characters consumed, 115 * or -1 on input failure. Character class conversion returns 0 on match 116 * failure. 117 */ 118 119 static __inline int 120 convert_char(FILE *fp, char * p, int width) 121 { 122 int n; 123 124 if (p == SUPPRESS_PTR) { 125 size_t sum = 0; 126 for (;;) { 127 if ((n = fp->_r) < width) { 128 sum += n; 129 width -= n; 130 fp->_p += n; 131 if (__srefill(fp)) { 132 if (sum == 0) 133 return (-1); 134 break; 135 } 136 } else { 137 sum += width; 138 fp->_r -= width; 139 fp->_p += width; 140 break; 141 } 142 } 143 return (sum); 144 } else { 145 size_t r = __fread(p, 1, width, fp); 146 147 if (r == 0) 148 return (-1); 149 return (r); 150 } 151 } 152 153 static __inline int 154 convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale) 155 { 156 mbstate_t mbs; 157 int n, nread; 158 wint_t wi; 159 160 mbs = initial_mbs; 161 n = 0; 162 while (width-- != 0 && 163 (wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF) { 164 if (wcp != SUPPRESS_PTR) 165 *wcp++ = (wchar_t)wi; 166 n += nread; 167 } 168 if (n == 0) 169 return (-1); 170 return (n); 171 } 172 173 static __inline int 174 convert_ccl(FILE *fp, char * p, int width, const char *ccltab) 175 { 176 char *p0; 177 int n; 178 179 if (p == SUPPRESS_PTR) { 180 n = 0; 181 while (ccltab[*fp->_p]) { 182 n++, fp->_r--, fp->_p++; 183 if (--width == 0) 184 break; 185 if (fp->_r <= 0 && __srefill(fp)) { 186 if (n == 0) 187 return (-1); 188 break; 189 } 190 } 191 } else { 192 p0 = p; 193 while (ccltab[*fp->_p]) { 194 fp->_r--; 195 *p++ = *fp->_p++; 196 if (--width == 0) 197 break; 198 if (fp->_r <= 0 && __srefill(fp)) { 199 if (p == p0) 200 return (-1); 201 break; 202 } 203 } 204 n = p - p0; 205 if (n == 0) 206 return (0); 207 *p = 0; 208 } 209 return (n); 210 } 211 212 static __inline int 213 convert_wccl(FILE *fp, wchar_t *wcp, int width, const char *ccltab, 214 locale_t locale) 215 { 216 mbstate_t mbs; 217 wint_t wi; 218 int n, nread; 219 220 mbs = initial_mbs; 221 n = 0; 222 if (wcp == SUPPRESS_PTR) { 223 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 224 width-- != 0 && ccltab[wctob(wi)]) 225 n += nread; 226 if (wi != WEOF) 227 __ungetwc(wi, fp, __get_locale()); 228 } else { 229 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 230 width-- != 0 && ccltab[wctob(wi)]) { 231 *wcp++ = (wchar_t)wi; 232 n += nread; 233 } 234 if (wi != WEOF) 235 __ungetwc(wi, fp, __get_locale()); 236 if (n == 0) 237 return (0); 238 *wcp = 0; 239 } 240 return (n); 241 } 242 243 static __inline int 244 convert_string(FILE *fp, char * p, int width) 245 { 246 char *p0; 247 int n; 248 249 if (p == SUPPRESS_PTR) { 250 n = 0; 251 while (!isspace(*fp->_p)) { 252 n++, fp->_r--, fp->_p++; 253 if (--width == 0) 254 break; 255 if (fp->_r <= 0 && __srefill(fp)) 256 break; 257 } 258 } else { 259 p0 = p; 260 while (!isspace(*fp->_p)) { 261 fp->_r--; 262 *p++ = *fp->_p++; 263 if (--width == 0) 264 break; 265 if (fp->_r <= 0 && __srefill(fp)) 266 break; 267 } 268 *p = 0; 269 n = p - p0; 270 } 271 return (n); 272 } 273 274 static __inline int 275 convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale) 276 { 277 mbstate_t mbs; 278 wint_t wi; 279 int n, nread; 280 281 mbs = initial_mbs; 282 n = 0; 283 if (wcp == SUPPRESS_PTR) { 284 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 285 width-- != 0 && !iswspace(wi)) 286 n += nread; 287 if (wi != WEOF) 288 __ungetwc(wi, fp, __get_locale()); 289 } else { 290 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 291 width-- != 0 && !iswspace(wi)) { 292 *wcp++ = (wchar_t)wi; 293 n += nread; 294 } 295 if (wi != WEOF) 296 __ungetwc(wi, fp, __get_locale()); 297 *wcp = '\0'; 298 } 299 return (n); 300 } 301 302 enum parseint_state { 303 begin, 304 havesign, 305 havezero, 306 haveprefix, 307 any, 308 }; 309 310 static __inline int 311 parseint_fsm(int c, enum parseint_state *state, int *base) 312 { 313 switch (c) { 314 case '+': 315 case '-': 316 if (*state == begin) { 317 *state = havesign; 318 return 1; 319 } 320 break; 321 case '0': 322 if (*state == begin || *state == havesign) { 323 *state = havezero; 324 } else { 325 *state = any; 326 } 327 return 1; 328 case '1': 329 case '2': 330 case '3': 331 case '4': 332 case '5': 333 case '6': 334 case '7': 335 if (*state == havezero && *base == 0) { 336 *base = 8; 337 } 338 /* FALL THROUGH */ 339 case '8': 340 case '9': 341 if (*state == begin || 342 *state == havesign) { 343 if (*base == 0) { 344 *base = 10; 345 } 346 } 347 if (*state == begin || 348 *state == havesign || 349 *state == havezero || 350 *state == haveprefix || 351 *state == any) { 352 if (*base > c - '0') { 353 *state = any; 354 return 1; 355 } 356 } 357 break; 358 case 'b': 359 if (*state == havezero) { 360 if (*base == 0 || *base == 2) { 361 *state = haveprefix; 362 *base = 2; 363 return 1; 364 } 365 } 366 /* FALL THROUGH */ 367 case 'a': 368 case 'c': 369 case 'd': 370 case 'e': 371 case 'f': 372 if (*state == begin || 373 *state == havesign || 374 *state == havezero || 375 *state == haveprefix || 376 *state == any) { 377 if (*base > c - 'a' + 10) { 378 *state = any; 379 return 1; 380 } 381 } 382 break; 383 case 'B': 384 if (*state == havezero) { 385 if (*base == 0 || *base == 2) { 386 *state = haveprefix; 387 *base = 2; 388 return 1; 389 } 390 } 391 /* FALL THROUGH */ 392 case 'A': 393 case 'C': 394 case 'D': 395 case 'E': 396 case 'F': 397 if (*state == begin || 398 *state == havesign || 399 *state == havezero || 400 *state == haveprefix || 401 *state == any) { 402 if (*base > c - 'A' + 10) { 403 *state = any; 404 return 1; 405 } 406 } 407 break; 408 case 'x': 409 case 'X': 410 if (*state == havezero) { 411 if (*base == 0 || *base == 16) { 412 *state = haveprefix; 413 *base = 16; 414 return 1; 415 } 416 } 417 break; 418 } 419 return 0; 420 } 421 422 /* 423 * Read an integer, storing it in buf. 424 * 425 * Return 0 on a match failure, and the number of characters read 426 * otherwise. 427 */ 428 static __inline int 429 parseint(FILE *fp, char * __restrict buf, int width, int base) 430 { 431 enum parseint_state state = begin; 432 char *p; 433 int c; 434 435 for (p = buf; width; width--) { 436 c = __sgetc(fp); 437 if (c == EOF) 438 break; 439 if (!parseint_fsm(c, &state, &base)) 440 break; 441 *p++ = c; 442 } 443 /* 444 * If we only had a sign, push it back. If we only had a 0b or 0x 445 * prefix (possibly preceded by a sign), we view it as "0" and 446 * push back the letter. In all other cases, if we stopped 447 * because we read a non-number character, push it back. 448 */ 449 if (state == havesign) { 450 p--; 451 (void) __ungetc(*(u_char *)p, fp); 452 } else if (state == haveprefix) { 453 p--; 454 (void) __ungetc(c, fp); 455 } else if (width && c != EOF) { 456 (void) __ungetc(c, fp); 457 } 458 return (p - buf); 459 } 460 461 /* 462 * __vfscanf - MT-safe version 463 */ 464 int 465 __vfscanf(FILE *fp, char const *fmt0, va_list ap) 466 { 467 int ret; 468 469 FLOCKFILE_CANCELSAFE(fp); 470 ret = __svfscanf(fp, __get_locale(), fmt0, ap); 471 FUNLOCKFILE_CANCELSAFE(); 472 return (ret); 473 } 474 int 475 vfscanf_l(FILE *fp, locale_t locale, char const *fmt0, va_list ap) 476 { 477 int ret; 478 FIX_LOCALE(locale); 479 480 FLOCKFILE_CANCELSAFE(fp); 481 ret = __svfscanf(fp, locale, fmt0, ap); 482 FUNLOCKFILE_CANCELSAFE(); 483 return (ret); 484 } 485 486 /* 487 * __svfscanf - non-MT-safe version of __vfscanf 488 */ 489 int 490 __svfscanf(FILE *fp, locale_t locale, const char *fmt0, va_list ap) 491 { 492 #define GETARG(type) ((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type)) 493 const u_char *fmt = (const u_char *)fmt0; 494 int c; /* character from format, or conversion */ 495 size_t width; /* field width, or 0 */ 496 int flags; /* flags as defined above */ 497 int nassigned; /* number of fields assigned */ 498 int nconversions; /* number of conversions */ 499 int nr; /* characters read by the current conversion */ 500 int nread; /* number of characters consumed from fp */ 501 int base; /* base argument to conversion function */ 502 char ccltab[256]; /* character class table for %[...] */ 503 char buf[BUF]; /* buffer for numeric conversions */ 504 505 ORIENT(fp, -1); 506 507 nassigned = 0; 508 nconversions = 0; 509 nread = 0; 510 for (;;) { 511 c = *fmt++; 512 if (c == 0) 513 return (nassigned); 514 if (isspace(c)) { 515 while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p)) 516 nread++, fp->_r--, fp->_p++; 517 continue; 518 } 519 if (c != '%') 520 goto literal; 521 width = 0; 522 flags = 0; 523 /* 524 * switch on the format. continue if done; 525 * break once format type is derived. 526 */ 527 again: c = *fmt++; 528 switch (c) { 529 case '%': 530 literal: 531 if (fp->_r <= 0 && __srefill(fp)) 532 goto input_failure; 533 if (*fp->_p != c) 534 goto match_failure; 535 fp->_r--, fp->_p++; 536 nread++; 537 continue; 538 539 case '*': 540 flags |= SUPPRESS; 541 goto again; 542 case 'j': 543 flags |= INTMAXT; 544 goto again; 545 case 'l': 546 if (flags & LONG) { 547 flags &= ~LONG; 548 flags |= LONGLONG; 549 } else 550 flags |= LONG; 551 goto again; 552 case 'q': 553 flags |= LONGLONG; /* not quite */ 554 goto again; 555 case 't': 556 flags |= PTRDIFFT; 557 goto again; 558 case 'w': 559 /* 560 * Fixed-width integer types. On all platforms we 561 * support, int8_t is equivalent to char, int16_t 562 * is equivalent to short, int32_t is equivalent 563 * to int, int64_t is equivalent to long long int. 564 * Furthermore, int_fast8_t, int_fast16_t and 565 * int_fast32_t are equivalent to int, and 566 * int_fast64_t is equivalent to long long int. 567 */ 568 flags &= ~(SHORTSHORT|SHORT|LONG|LONGLONG|SIZET|INTMAXT|PTRDIFFT); 569 if (fmt[0] == 'f') { 570 flags |= FASTINT; 571 fmt++; 572 } else { 573 flags &= ~FASTINT; 574 } 575 if (fmt[0] == '8') { 576 if (!(flags & FASTINT)) 577 flags |= SHORTSHORT; 578 else 579 /* no flag set = 32 */ ; 580 fmt += 1; 581 } else if (fmt[0] == '1' && fmt[1] == '6') { 582 if (!(flags & FASTINT)) 583 flags |= SHORT; 584 else 585 /* no flag set = 32 */ ; 586 fmt += 2; 587 } else if (fmt[0] == '3' && fmt[1] == '2') { 588 /* no flag set = 32 */ ; 589 fmt += 2; 590 } else if (fmt[0] == '6' && fmt[1] == '4') { 591 flags |= LONGLONG; 592 fmt += 2; 593 } else { 594 goto match_failure; 595 } 596 goto again; 597 case 'z': 598 flags |= SIZET; 599 goto again; 600 case 'L': 601 flags |= LONGDBL; 602 goto again; 603 case 'h': 604 if (flags & SHORT) { 605 flags &= ~SHORT; 606 flags |= SHORTSHORT; 607 } else 608 flags |= SHORT; 609 goto again; 610 611 case '0': case '1': case '2': case '3': case '4': 612 case '5': case '6': case '7': case '8': case '9': 613 width = width * 10 + c - '0'; 614 goto again; 615 616 /* 617 * Conversions. 618 */ 619 case 'B': 620 case 'b': 621 c = CT_INT; 622 flags |= UNSIGNED; 623 base = 2; 624 break; 625 626 case 'd': 627 c = CT_INT; 628 base = 10; 629 break; 630 631 case 'i': 632 c = CT_INT; 633 base = 0; 634 break; 635 636 case 'o': 637 c = CT_INT; 638 flags |= UNSIGNED; 639 base = 8; 640 break; 641 642 case 'u': 643 c = CT_INT; 644 flags |= UNSIGNED; 645 base = 10; 646 break; 647 648 case 'X': 649 case 'x': 650 c = CT_INT; 651 flags |= UNSIGNED; 652 base = 16; 653 break; 654 655 #ifndef NO_FLOATING_POINT 656 case 'A': case 'E': case 'F': case 'G': 657 case 'a': case 'e': case 'f': case 'g': 658 c = CT_FLOAT; 659 break; 660 #endif 661 662 case 'S': 663 flags |= LONG; 664 /* FALLTHROUGH */ 665 case 's': 666 c = CT_STRING; 667 break; 668 669 case '[': 670 fmt = __sccl(ccltab, fmt); 671 flags |= NOSKIP; 672 c = CT_CCL; 673 break; 674 675 case 'C': 676 flags |= LONG; 677 /* FALLTHROUGH */ 678 case 'c': 679 flags |= NOSKIP; 680 c = CT_CHAR; 681 break; 682 683 case 'p': /* pointer format is like hex */ 684 flags |= POINTER; 685 c = CT_INT; /* assumes sizeof(uintmax_t) */ 686 flags |= UNSIGNED; /* >= sizeof(uintptr_t) */ 687 base = 16; 688 break; 689 690 case 'n': 691 if (flags & SUPPRESS) /* ??? */ 692 continue; 693 if (flags & SHORTSHORT) 694 *va_arg(ap, char *) = nread; 695 else if (flags & SHORT) 696 *va_arg(ap, short *) = nread; 697 else if (flags & LONG) 698 *va_arg(ap, long *) = nread; 699 else if (flags & LONGLONG) 700 *va_arg(ap, long long *) = nread; 701 else if (flags & INTMAXT) 702 *va_arg(ap, intmax_t *) = nread; 703 else if (flags & SIZET) 704 *va_arg(ap, size_t *) = nread; 705 else if (flags & PTRDIFFT) 706 *va_arg(ap, ptrdiff_t *) = nread; 707 else 708 *va_arg(ap, int *) = nread; 709 continue; 710 711 default: 712 goto match_failure; 713 714 /* 715 * Disgusting backwards compatibility hack. XXX 716 */ 717 case '\0': /* compat */ 718 return (EOF); 719 } 720 721 /* 722 * We have a conversion that requires input. 723 */ 724 if (fp->_r <= 0 && __srefill(fp)) 725 goto input_failure; 726 727 /* 728 * Consume leading white space, except for formats 729 * that suppress this. 730 */ 731 if ((flags & NOSKIP) == 0) { 732 while (isspace(*fp->_p)) { 733 nread++; 734 if (--fp->_r > 0) 735 fp->_p++; 736 else if (__srefill(fp)) 737 goto input_failure; 738 } 739 /* 740 * Note that there is at least one character in 741 * the buffer, so conversions that do not set NOSKIP 742 * ca no longer result in an input failure. 743 */ 744 } 745 746 /* 747 * Do the conversion. 748 */ 749 switch (c) { 750 751 case CT_CHAR: 752 /* scan arbitrary characters (sets NOSKIP) */ 753 if (width == 0) 754 width = 1; 755 if (flags & LONG) { 756 nr = convert_wchar(fp, GETARG(wchar_t *), 757 width, locale); 758 } else { 759 nr = convert_char(fp, GETARG(char *), width); 760 } 761 if (nr < 0) 762 goto input_failure; 763 break; 764 765 case CT_CCL: 766 /* scan a (nonempty) character class (sets NOSKIP) */ 767 if (width == 0) 768 width = (size_t)~0; /* `infinity' */ 769 if (flags & LONG) { 770 nr = convert_wccl(fp, GETARG(wchar_t *), width, 771 ccltab, locale); 772 } else { 773 nr = convert_ccl(fp, GETARG(char *), width, 774 ccltab); 775 } 776 if (nr <= 0) { 777 if (nr < 0) 778 goto input_failure; 779 else /* nr == 0 */ 780 goto match_failure; 781 } 782 break; 783 784 case CT_STRING: 785 /* like CCL, but zero-length string OK, & no NOSKIP */ 786 if (width == 0) 787 width = (size_t)~0; 788 if (flags & LONG) { 789 nr = convert_wstring(fp, GETARG(wchar_t *), 790 width, locale); 791 } else { 792 nr = convert_string(fp, GETARG(char *), width); 793 } 794 if (nr < 0) 795 goto input_failure; 796 break; 797 798 case CT_INT: 799 /* scan an integer as if by the conversion function */ 800 #ifdef hardway 801 if (width == 0 || width > sizeof(buf) - 1) 802 width = sizeof(buf) - 1; 803 #else 804 /* size_t is unsigned, hence this optimisation */ 805 if (--width > sizeof(buf) - 2) 806 width = sizeof(buf) - 2; 807 width++; 808 #endif 809 nr = parseint(fp, buf, width, base); 810 if (nr == 0) 811 goto match_failure; 812 if ((flags & SUPPRESS) == 0) { 813 uintmax_t res; 814 815 buf[nr] = '\0'; 816 if ((flags & UNSIGNED) == 0) 817 res = strtoimax_l(buf, (char **)NULL, base, locale); 818 else 819 res = strtoumax_l(buf, (char **)NULL, base, locale); 820 if (flags & POINTER) 821 *va_arg(ap, void **) = 822 (void *)(uintptr_t)res; 823 else if (flags & SHORTSHORT) 824 *va_arg(ap, char *) = res; 825 else if (flags & SHORT) 826 *va_arg(ap, short *) = res; 827 else if (flags & LONG) 828 *va_arg(ap, long *) = res; 829 else if (flags & LONGLONG) 830 *va_arg(ap, long long *) = res; 831 else if (flags & INTMAXT) 832 *va_arg(ap, intmax_t *) = res; 833 else if (flags & PTRDIFFT) 834 *va_arg(ap, ptrdiff_t *) = res; 835 else if (flags & SIZET) 836 *va_arg(ap, size_t *) = res; 837 else 838 *va_arg(ap, int *) = res; 839 } 840 break; 841 842 #ifndef NO_FLOATING_POINT 843 case CT_FLOAT: 844 /* scan a floating point number as if by strtod */ 845 if (width == 0 || width > sizeof(buf) - 1) 846 width = sizeof(buf) - 1; 847 nr = parsefloat(fp, buf, buf + width, locale); 848 if (nr == 0) 849 goto match_failure; 850 if ((flags & SUPPRESS) == 0) { 851 if (flags & LONGDBL) { 852 long double res = strtold_l(buf, NULL, 853 locale); 854 *va_arg(ap, long double *) = res; 855 } else if (flags & LONG) { 856 double res = strtod_l(buf, NULL, 857 locale); 858 *va_arg(ap, double *) = res; 859 } else { 860 float res = strtof_l(buf, NULL, locale); 861 *va_arg(ap, float *) = res; 862 } 863 } 864 break; 865 #endif /* !NO_FLOATING_POINT */ 866 } 867 if (!(flags & SUPPRESS)) 868 nassigned++; 869 nread += nr; 870 nconversions++; 871 } 872 input_failure: 873 return (nconversions != 0 ? nassigned : EOF); 874 match_failure: 875 return (nassigned); 876 } 877 878 /* 879 * Fill in the given table from the scanset at the given format 880 * (just after `['). Return a pointer to the character past the 881 * closing `]'. The table has a 1 wherever characters should be 882 * considered part of the scanset. 883 */ 884 static const u_char * 885 __sccl(char *tab, const u_char *fmt) 886 { 887 int c, n, v, i; 888 struct xlocale_collate *table = 889 (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; 890 891 /* first `clear' the whole table */ 892 c = *fmt++; /* first char hat => negated scanset */ 893 if (c == '^') { 894 v = 1; /* default => accept */ 895 c = *fmt++; /* get new first char */ 896 } else 897 v = 0; /* default => reject */ 898 899 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ 900 (void) memset(tab, v, 256); 901 902 if (c == 0) 903 return (fmt - 1);/* format ended before closing ] */ 904 905 /* 906 * Now set the entries corresponding to the actual scanset 907 * to the opposite of the above. 908 * 909 * The first character may be ']' (or '-') without being special; 910 * the last character may be '-'. 911 */ 912 v = 1 - v; 913 for (;;) { 914 tab[c] = v; /* take character c */ 915 doswitch: 916 n = *fmt++; /* and examine the next */ 917 switch (n) { 918 919 case 0: /* format ended too soon */ 920 return (fmt - 1); 921 922 case '-': 923 /* 924 * A scanset of the form 925 * [01+-] 926 * is defined as `the digit 0, the digit 1, 927 * the character +, the character -', but 928 * the effect of a scanset such as 929 * [a-zA-Z0-9] 930 * is implementation defined. The V7 Unix 931 * scanf treats `a-z' as `the letters a through 932 * z', but treats `a-a' as `the letter a, the 933 * character -, and the letter a'. 934 * 935 * For compatibility, the `-' is not considered 936 * to define a range if the character following 937 * it is either a close bracket (required by ANSI) 938 * or is not numerically greater than the character 939 * we just stored in the table (c). 940 */ 941 n = *fmt; 942 if (n == ']' 943 || (table->__collate_load_error ? n < c : 944 __collate_range_cmp(n, c) < 0 945 ) 946 ) { 947 c = '-'; 948 break; /* resume the for(;;) */ 949 } 950 fmt++; 951 /* fill in the range */ 952 if (table->__collate_load_error) { 953 do { 954 tab[++c] = v; 955 } while (c < n); 956 } else { 957 for (i = 0; i < 256; i ++) 958 if (__collate_range_cmp(c, i) <= 0 && 959 __collate_range_cmp(i, n) <= 0 960 ) 961 tab[i] = v; 962 } 963 #if 1 /* XXX another disgusting compatibility hack */ 964 c = n; 965 /* 966 * Alas, the V7 Unix scanf also treats formats 967 * such as [a-c-e] as `the letters a through e'. 968 * This too is permitted by the standard.... 969 */ 970 goto doswitch; 971 #else 972 c = *fmt++; 973 if (c == 0) 974 return (fmt - 1); 975 if (c == ']') 976 return (fmt); 977 #endif 978 break; 979 980 case ']': /* end of scanset */ 981 return (fmt); 982 983 default: /* just another character */ 984 c = n; 985 break; 986 } 987 } 988 /* NOTREACHED */ 989 } 990 991 #ifndef NO_FLOATING_POINT 992 static int 993 parsefloat(FILE *fp, char *buf, char *end, locale_t locale) 994 { 995 char *commit, *p; 996 int infnanpos = 0, decptpos = 0; 997 enum { 998 S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX, 999 S_DIGITS, S_DECPT, S_FRAC, S_EXP, S_EXPDIGITS 1000 } state = S_START; 1001 unsigned char c; 1002 const char *decpt = localeconv_l(locale)->decimal_point; 1003 _Bool gotmantdig = 0, ishex = 0; 1004 1005 /* 1006 * We set commit = p whenever the string we have read so far 1007 * constitutes a valid representation of a floating point 1008 * number by itself. At some point, the parse will complete 1009 * or fail, and we will ungetc() back to the last commit point. 1010 * To ensure that the file offset gets updated properly, it is 1011 * always necessary to read at least one character that doesn't 1012 * match; thus, we can't short-circuit "infinity" or "nan(...)". 1013 */ 1014 commit = buf - 1; 1015 for (p = buf; p < end; ) { 1016 c = *fp->_p; 1017 reswitch: 1018 switch (state) { 1019 case S_START: 1020 state = S_GOTSIGN; 1021 if (c == '-' || c == '+') 1022 break; 1023 else 1024 goto reswitch; 1025 case S_GOTSIGN: 1026 switch (c) { 1027 case '0': 1028 state = S_MAYBEHEX; 1029 commit = p; 1030 break; 1031 case 'I': 1032 case 'i': 1033 state = S_INF; 1034 break; 1035 case 'N': 1036 case 'n': 1037 state = S_NAN; 1038 break; 1039 default: 1040 state = S_DIGITS; 1041 goto reswitch; 1042 } 1043 break; 1044 case S_INF: 1045 if (infnanpos > 6 || 1046 (c != "nfinity"[infnanpos] && 1047 c != "NFINITY"[infnanpos])) 1048 goto parsedone; 1049 if (infnanpos == 1 || infnanpos == 6) 1050 commit = p; /* inf or infinity */ 1051 infnanpos++; 1052 break; 1053 case S_NAN: 1054 switch (infnanpos) { 1055 case 0: 1056 if (c != 'A' && c != 'a') 1057 goto parsedone; 1058 break; 1059 case 1: 1060 if (c != 'N' && c != 'n') 1061 goto parsedone; 1062 else 1063 commit = p; 1064 break; 1065 case 2: 1066 if (c != '(') 1067 goto parsedone; 1068 break; 1069 default: 1070 if (c == ')') { 1071 commit = p; 1072 state = S_DONE; 1073 } else if (!isalnum(c) && c != '_') 1074 goto parsedone; 1075 break; 1076 } 1077 infnanpos++; 1078 break; 1079 case S_DONE: 1080 goto parsedone; 1081 case S_MAYBEHEX: 1082 state = S_DIGITS; 1083 if (c == 'X' || c == 'x') { 1084 ishex = 1; 1085 break; 1086 } else { /* we saw a '0', but no 'x' */ 1087 gotmantdig = 1; 1088 goto reswitch; 1089 } 1090 case S_DIGITS: 1091 if ((ishex && isxdigit(c)) || isdigit(c)) { 1092 gotmantdig = 1; 1093 commit = p; 1094 break; 1095 } else { 1096 state = S_DECPT; 1097 goto reswitch; 1098 } 1099 case S_DECPT: 1100 if (c == decpt[decptpos]) { 1101 if (decpt[++decptpos] == '\0') { 1102 /* We read the complete decpt seq. */ 1103 state = S_FRAC; 1104 if (gotmantdig) 1105 commit = p; 1106 } 1107 break; 1108 } else if (!decptpos) { 1109 /* We didn't read any decpt characters. */ 1110 state = S_FRAC; 1111 goto reswitch; 1112 } else { 1113 /* 1114 * We read part of a multibyte decimal point, 1115 * but the rest is invalid, so bail. 1116 */ 1117 goto parsedone; 1118 } 1119 case S_FRAC: 1120 if (((c == 'E' || c == 'e') && !ishex) || 1121 ((c == 'P' || c == 'p') && ishex)) { 1122 if (!gotmantdig) 1123 goto parsedone; 1124 else 1125 state = S_EXP; 1126 } else if ((ishex && isxdigit(c)) || isdigit(c)) { 1127 commit = p; 1128 gotmantdig = 1; 1129 } else 1130 goto parsedone; 1131 break; 1132 case S_EXP: 1133 state = S_EXPDIGITS; 1134 if (c == '-' || c == '+') 1135 break; 1136 else 1137 goto reswitch; 1138 case S_EXPDIGITS: 1139 if (isdigit(c)) 1140 commit = p; 1141 else 1142 goto parsedone; 1143 break; 1144 default: 1145 abort(); 1146 } 1147 *p++ = c; 1148 if (--fp->_r > 0) 1149 fp->_p++; 1150 else if (__srefill(fp)) 1151 break; /* EOF */ 1152 } 1153 1154 parsedone: 1155 while (commit < --p) 1156 __ungetc(*(u_char *)p, fp); 1157 *++commit = '\0'; 1158 return (commit - buf); 1159 } 1160 #endif 1161