1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Copyright (c) 2011 The FreeBSD Foundation 8 * 9 * Copyright (c) 2023 Dag-Erling Smørgrav 10 * 11 * Portions of this software were developed by David Chisnall 12 * under sponsorship from the FreeBSD Foundation. 13 * 14 * This code is derived from software contributed to Berkeley by 15 * Chris Torek. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 3. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 */ 41 42 #include "namespace.h" 43 #include <ctype.h> 44 #include <inttypes.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <stddef.h> 48 #include <stdarg.h> 49 #include <string.h> 50 #include <wchar.h> 51 #include <wctype.h> 52 #include "un-namespace.h" 53 54 #include "collate.h" 55 #include "libc_private.h" 56 #include "local.h" 57 #include "xlocale_private.h" 58 59 #ifndef NO_FLOATING_POINT 60 #include <locale.h> 61 #endif 62 63 #define BUF 513 /* Maximum length of numeric string. */ 64 65 /* 66 * Flags used during conversion. 67 */ 68 #define LONG 0x01 /* l: long or double */ 69 #define LONGDBL 0x02 /* L: long double */ 70 #define SHORT 0x04 /* h: short */ 71 #define SUPPRESS 0x08 /* *: suppress assignment */ 72 #define POINTER 0x10 /* p: void * (as hex) */ 73 #define NOSKIP 0x20 /* [ or c: do not skip blanks */ 74 #define FASTINT 0x200 /* wfN: int_fastN_t */ 75 #define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */ 76 #define INTMAXT 0x800 /* j: intmax_t */ 77 #define PTRDIFFT 0x1000 /* t: ptrdiff_t */ 78 #define SIZET 0x2000 /* z: size_t */ 79 #define SHORTSHORT 0x4000 /* hh: char */ 80 #define UNSIGNED 0x8000 /* %[oupxX] conversions */ 81 82 /* 83 * Conversion types. 84 */ 85 #define CT_CHAR 0 /* %c conversion */ 86 #define CT_CCL 1 /* %[...] conversion */ 87 #define CT_STRING 2 /* %s conversion */ 88 #define CT_INT 3 /* %[dioupxX] conversion */ 89 #define CT_FLOAT 4 /* %[efgEFG] conversion */ 90 91 static const u_char *__sccl(char *, const u_char *); 92 #ifndef NO_FLOATING_POINT 93 static int parsefloat(FILE *, char *, char *, locale_t); 94 #endif 95 96 __weak_reference(__vfscanf, vfscanf); 97 98 /* 99 * Conversion functions are passed a pointer to this object instead of 100 * a real parameter to indicate that the assignment-suppression (*) 101 * flag was specified. We could use a NULL pointer to indicate this, 102 * but that would mask bugs in applications that call scanf() with a 103 * NULL pointer. 104 */ 105 static const int suppress; 106 #define SUPPRESS_PTR ((void *)&suppress) 107 108 static const mbstate_t initial_mbs; 109 110 /* 111 * The following conversion functions return the number of characters consumed, 112 * or -1 on input failure. Character class conversion returns 0 on match 113 * failure. 114 */ 115 116 static __inline int 117 convert_char(FILE *fp, char * p, int width) 118 { 119 int n; 120 121 if (p == SUPPRESS_PTR) { 122 size_t sum = 0; 123 for (;;) { 124 if ((n = fp->_r) < width) { 125 sum += n; 126 width -= n; 127 fp->_p += n; 128 if (__srefill(fp)) { 129 if (sum == 0) 130 return (-1); 131 break; 132 } 133 } else { 134 sum += width; 135 fp->_r -= width; 136 fp->_p += width; 137 break; 138 } 139 } 140 return (sum); 141 } else { 142 size_t r = __fread(p, 1, width, fp); 143 144 if (r == 0) 145 return (-1); 146 return (r); 147 } 148 } 149 150 static __inline int 151 convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale) 152 { 153 mbstate_t mbs; 154 int n, nread; 155 wint_t wi; 156 157 mbs = initial_mbs; 158 n = 0; 159 while (width-- != 0 && 160 (wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF) { 161 if (wcp != SUPPRESS_PTR) 162 *wcp++ = (wchar_t)wi; 163 n += nread; 164 } 165 if (n == 0) 166 return (-1); 167 return (n); 168 } 169 170 static __inline int 171 convert_ccl(FILE *fp, char * p, int width, const char *ccltab) 172 { 173 char *p0; 174 int n; 175 176 if (p == SUPPRESS_PTR) { 177 n = 0; 178 while (ccltab[*fp->_p]) { 179 n++, fp->_r--, fp->_p++; 180 if (--width == 0) 181 break; 182 if (fp->_r <= 0 && __srefill(fp)) { 183 if (n == 0) 184 return (-1); 185 break; 186 } 187 } 188 } else { 189 p0 = p; 190 while (ccltab[*fp->_p]) { 191 fp->_r--; 192 *p++ = *fp->_p++; 193 if (--width == 0) 194 break; 195 if (fp->_r <= 0 && __srefill(fp)) { 196 if (p == p0) 197 return (-1); 198 break; 199 } 200 } 201 n = p - p0; 202 if (n == 0) 203 return (0); 204 *p = 0; 205 } 206 return (n); 207 } 208 209 static __inline int 210 convert_wccl(FILE *fp, wchar_t *wcp, int width, const char *ccltab, 211 locale_t locale) 212 { 213 mbstate_t mbs; 214 wint_t wi; 215 int n, nread; 216 217 mbs = initial_mbs; 218 n = 0; 219 if (wcp == SUPPRESS_PTR) { 220 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 221 width-- != 0 && ccltab[wctob(wi)]) 222 n += nread; 223 if (wi != WEOF) 224 __ungetwc(wi, fp, __get_locale()); 225 } else { 226 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 227 width-- != 0 && ccltab[wctob(wi)]) { 228 *wcp++ = (wchar_t)wi; 229 n += nread; 230 } 231 if (wi != WEOF) 232 __ungetwc(wi, fp, __get_locale()); 233 if (n == 0) 234 return (0); 235 *wcp = 0; 236 } 237 return (n); 238 } 239 240 static __inline int 241 convert_string(FILE *fp, char * p, int width) 242 { 243 char *p0; 244 int n; 245 246 if (p == SUPPRESS_PTR) { 247 n = 0; 248 while (!isspace(*fp->_p)) { 249 n++, fp->_r--, fp->_p++; 250 if (--width == 0) 251 break; 252 if (fp->_r <= 0 && __srefill(fp)) 253 break; 254 } 255 } else { 256 p0 = p; 257 while (!isspace(*fp->_p)) { 258 fp->_r--; 259 *p++ = *fp->_p++; 260 if (--width == 0) 261 break; 262 if (fp->_r <= 0 && __srefill(fp)) 263 break; 264 } 265 *p = 0; 266 n = p - p0; 267 } 268 return (n); 269 } 270 271 static __inline int 272 convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale) 273 { 274 mbstate_t mbs; 275 wint_t wi; 276 int n, nread; 277 278 mbs = initial_mbs; 279 n = 0; 280 if (wcp == SUPPRESS_PTR) { 281 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 282 width-- != 0 && !iswspace(wi)) 283 n += nread; 284 if (wi != WEOF) 285 __ungetwc(wi, fp, __get_locale()); 286 } else { 287 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 288 width-- != 0 && !iswspace(wi)) { 289 *wcp++ = (wchar_t)wi; 290 n += nread; 291 } 292 if (wi != WEOF) 293 __ungetwc(wi, fp, __get_locale()); 294 *wcp = '\0'; 295 } 296 return (n); 297 } 298 299 enum parseint_state { 300 begin, 301 havesign, 302 havezero, 303 haveprefix, 304 any, 305 }; 306 307 static __inline int 308 parseint_fsm(int c, enum parseint_state *state, int *base) 309 { 310 switch (c) { 311 case '+': 312 case '-': 313 if (*state == begin) { 314 *state = havesign; 315 return 1; 316 } 317 break; 318 case '0': 319 if (*state == begin || *state == havesign) { 320 *state = havezero; 321 } else { 322 *state = any; 323 } 324 return 1; 325 case '1': 326 case '2': 327 case '3': 328 case '4': 329 case '5': 330 case '6': 331 case '7': 332 if (*state == havezero && *base == 0) { 333 *base = 8; 334 } 335 /* FALL THROUGH */ 336 case '8': 337 case '9': 338 if (*state == begin || 339 *state == havesign) { 340 if (*base == 0) { 341 *base = 10; 342 } 343 } 344 if (*state == begin || 345 *state == havesign || 346 *state == havezero || 347 *state == haveprefix || 348 *state == any) { 349 if (*base > c - '0') { 350 *state = any; 351 return 1; 352 } 353 } 354 break; 355 case 'b': 356 if (*state == havezero) { 357 if (*base == 0 || *base == 2) { 358 *state = haveprefix; 359 *base = 2; 360 return 1; 361 } 362 } 363 /* FALL THROUGH */ 364 case 'a': 365 case 'c': 366 case 'd': 367 case 'e': 368 case 'f': 369 if (*state == begin || 370 *state == havesign || 371 *state == havezero || 372 *state == haveprefix || 373 *state == any) { 374 if (*base > c - 'a' + 10) { 375 *state = any; 376 return 1; 377 } 378 } 379 break; 380 case 'B': 381 if (*state == havezero) { 382 if (*base == 0 || *base == 2) { 383 *state = haveprefix; 384 *base = 2; 385 return 1; 386 } 387 } 388 /* FALL THROUGH */ 389 case 'A': 390 case 'C': 391 case 'D': 392 case 'E': 393 case 'F': 394 if (*state == begin || 395 *state == havesign || 396 *state == havezero || 397 *state == haveprefix || 398 *state == any) { 399 if (*base > c - 'A' + 10) { 400 *state = any; 401 return 1; 402 } 403 } 404 break; 405 case 'x': 406 case 'X': 407 if (*state == havezero) { 408 if (*base == 0 || *base == 16) { 409 *state = haveprefix; 410 *base = 16; 411 return 1; 412 } 413 } 414 break; 415 } 416 return 0; 417 } 418 419 /* 420 * Read an integer, storing it in buf. 421 * 422 * Return 0 on a match failure, and the number of characters read 423 * otherwise. 424 */ 425 static __inline int 426 parseint(FILE *fp, char * __restrict buf, int width, int base) 427 { 428 enum parseint_state state = begin; 429 char *p; 430 int c; 431 432 for (p = buf; width; width--) { 433 c = __sgetc(fp); 434 if (c == EOF) 435 break; 436 if (!parseint_fsm(c, &state, &base)) 437 break; 438 *p++ = c; 439 } 440 /* 441 * If we only had a sign, push it back. If we only had a 0b or 0x 442 * prefix (possibly preceded by a sign), we view it as "0" and 443 * push back the letter. In all other cases, if we stopped 444 * because we read a non-number character, push it back. 445 */ 446 if (state == havesign) { 447 p--; 448 (void) __ungetc(*(u_char *)p, fp); 449 } else if (state == haveprefix) { 450 p--; 451 (void) __ungetc(c, fp); 452 } else if (width && c != EOF) { 453 (void) __ungetc(c, fp); 454 } 455 return (p - buf); 456 } 457 458 /* 459 * __vfscanf - MT-safe version 460 */ 461 int 462 __vfscanf(FILE *fp, char const *fmt0, va_list ap) 463 { 464 int ret; 465 466 FLOCKFILE_CANCELSAFE(fp); 467 ret = __svfscanf(fp, __get_locale(), fmt0, ap); 468 FUNLOCKFILE_CANCELSAFE(); 469 return (ret); 470 } 471 int 472 vfscanf_l(FILE *fp, locale_t locale, char const *fmt0, va_list ap) 473 { 474 int ret; 475 FIX_LOCALE(locale); 476 477 FLOCKFILE_CANCELSAFE(fp); 478 ret = __svfscanf(fp, locale, fmt0, ap); 479 FUNLOCKFILE_CANCELSAFE(); 480 return (ret); 481 } 482 483 /* 484 * __svfscanf - non-MT-safe version of __vfscanf 485 */ 486 int 487 __svfscanf(FILE *fp, locale_t locale, const char *fmt0, va_list ap) 488 { 489 #define GETARG(type) ((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type)) 490 const u_char *fmt = (const u_char *)fmt0; 491 int c; /* character from format, or conversion */ 492 size_t width; /* field width, or 0 */ 493 int flags; /* flags as defined above */ 494 int nassigned; /* number of fields assigned */ 495 int nconversions; /* number of conversions */ 496 int nr; /* characters read by the current conversion */ 497 int nread; /* number of characters consumed from fp */ 498 int base; /* base argument to conversion function */ 499 char ccltab[256]; /* character class table for %[...] */ 500 char buf[BUF]; /* buffer for numeric conversions */ 501 502 ORIENT(fp, -1); 503 504 nassigned = 0; 505 nconversions = 0; 506 nread = 0; 507 for (;;) { 508 c = *fmt++; 509 if (c == 0) 510 return (nassigned); 511 if (isspace(c)) { 512 while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p)) 513 nread++, fp->_r--, fp->_p++; 514 continue; 515 } 516 if (c != '%') 517 goto literal; 518 width = 0; 519 flags = 0; 520 /* 521 * switch on the format. continue if done; 522 * break once format type is derived. 523 */ 524 again: c = *fmt++; 525 switch (c) { 526 case '%': 527 literal: 528 if (fp->_r <= 0 && __srefill(fp)) 529 goto input_failure; 530 if (*fp->_p != c) 531 goto match_failure; 532 fp->_r--, fp->_p++; 533 nread++; 534 continue; 535 536 case '*': 537 flags |= SUPPRESS; 538 goto again; 539 case 'j': 540 flags |= INTMAXT; 541 goto again; 542 case 'l': 543 if (flags & LONG) { 544 flags &= ~LONG; 545 flags |= LONGLONG; 546 } else 547 flags |= LONG; 548 goto again; 549 case 'q': 550 flags |= LONGLONG; /* not quite */ 551 goto again; 552 case 't': 553 flags |= PTRDIFFT; 554 goto again; 555 case 'w': 556 /* 557 * Fixed-width integer types. On all platforms we 558 * support, int8_t is equivalent to char, int16_t 559 * is equivalent to short, int32_t is equivalent 560 * to int, int64_t is equivalent to long long int. 561 * Furthermore, int_fast8_t, int_fast16_t and 562 * int_fast32_t are equivalent to int, and 563 * int_fast64_t is equivalent to long long int. 564 */ 565 flags &= ~(SHORTSHORT|SHORT|LONG|LONGLONG|SIZET|INTMAXT|PTRDIFFT); 566 if (fmt[0] == 'f') { 567 flags |= FASTINT; 568 fmt++; 569 } else { 570 flags &= ~FASTINT; 571 } 572 if (fmt[0] == '8') { 573 if (!(flags & FASTINT)) 574 flags |= SHORTSHORT; 575 else 576 /* no flag set = 32 */ ; 577 fmt += 1; 578 } else if (fmt[0] == '1' && fmt[1] == '6') { 579 if (!(flags & FASTINT)) 580 flags |= SHORT; 581 else 582 /* no flag set = 32 */ ; 583 fmt += 2; 584 } else if (fmt[0] == '3' && fmt[1] == '2') { 585 /* no flag set = 32 */ ; 586 fmt += 2; 587 } else if (fmt[0] == '6' && fmt[1] == '4') { 588 flags |= LONGLONG; 589 fmt += 2; 590 } else { 591 goto match_failure; 592 } 593 goto again; 594 case 'z': 595 flags |= SIZET; 596 goto again; 597 case 'L': 598 flags |= LONGDBL; 599 goto again; 600 case 'h': 601 if (flags & SHORT) { 602 flags &= ~SHORT; 603 flags |= SHORTSHORT; 604 } else 605 flags |= SHORT; 606 goto again; 607 608 case '0': case '1': case '2': case '3': case '4': 609 case '5': case '6': case '7': case '8': case '9': 610 width = width * 10 + c - '0'; 611 goto again; 612 613 /* 614 * Conversions. 615 */ 616 case 'B': 617 case 'b': 618 c = CT_INT; 619 flags |= UNSIGNED; 620 base = 2; 621 break; 622 623 case 'd': 624 c = CT_INT; 625 base = 10; 626 break; 627 628 case 'i': 629 c = CT_INT; 630 base = 0; 631 break; 632 633 case 'o': 634 c = CT_INT; 635 flags |= UNSIGNED; 636 base = 8; 637 break; 638 639 case 'u': 640 c = CT_INT; 641 flags |= UNSIGNED; 642 base = 10; 643 break; 644 645 case 'X': 646 case 'x': 647 c = CT_INT; 648 flags |= UNSIGNED; 649 base = 16; 650 break; 651 652 #ifndef NO_FLOATING_POINT 653 case 'A': case 'E': case 'F': case 'G': 654 case 'a': case 'e': case 'f': case 'g': 655 c = CT_FLOAT; 656 break; 657 #endif 658 659 case 'S': 660 flags |= LONG; 661 /* FALLTHROUGH */ 662 case 's': 663 c = CT_STRING; 664 break; 665 666 case '[': 667 fmt = __sccl(ccltab, fmt); 668 flags |= NOSKIP; 669 c = CT_CCL; 670 break; 671 672 case 'C': 673 flags |= LONG; 674 /* FALLTHROUGH */ 675 case 'c': 676 flags |= NOSKIP; 677 c = CT_CHAR; 678 break; 679 680 case 'p': /* pointer format is like hex */ 681 flags |= POINTER; 682 c = CT_INT; /* assumes sizeof(uintmax_t) */ 683 flags |= UNSIGNED; /* >= sizeof(uintptr_t) */ 684 base = 16; 685 break; 686 687 case 'n': 688 if (flags & SUPPRESS) /* ??? */ 689 continue; 690 if (flags & SHORTSHORT) 691 *va_arg(ap, char *) = nread; 692 else if (flags & SHORT) 693 *va_arg(ap, short *) = nread; 694 else if (flags & LONG) 695 *va_arg(ap, long *) = nread; 696 else if (flags & LONGLONG) 697 *va_arg(ap, long long *) = nread; 698 else if (flags & INTMAXT) 699 *va_arg(ap, intmax_t *) = nread; 700 else if (flags & SIZET) 701 *va_arg(ap, size_t *) = nread; 702 else if (flags & PTRDIFFT) 703 *va_arg(ap, ptrdiff_t *) = nread; 704 else 705 *va_arg(ap, int *) = nread; 706 continue; 707 708 default: 709 goto match_failure; 710 711 /* 712 * Disgusting backwards compatibility hack. XXX 713 */ 714 case '\0': /* compat */ 715 return (EOF); 716 } 717 718 /* 719 * We have a conversion that requires input. 720 */ 721 if (fp->_r <= 0 && __srefill(fp)) 722 goto input_failure; 723 724 /* 725 * Consume leading white space, except for formats 726 * that suppress this. 727 */ 728 if ((flags & NOSKIP) == 0) { 729 while (isspace(*fp->_p)) { 730 nread++; 731 if (--fp->_r > 0) 732 fp->_p++; 733 else if (__srefill(fp)) 734 goto input_failure; 735 } 736 /* 737 * Note that there is at least one character in 738 * the buffer, so conversions that do not set NOSKIP 739 * ca no longer result in an input failure. 740 */ 741 } 742 743 /* 744 * Do the conversion. 745 */ 746 switch (c) { 747 748 case CT_CHAR: 749 /* scan arbitrary characters (sets NOSKIP) */ 750 if (width == 0) 751 width = 1; 752 if (flags & LONG) { 753 nr = convert_wchar(fp, GETARG(wchar_t *), 754 width, locale); 755 } else { 756 nr = convert_char(fp, GETARG(char *), width); 757 } 758 if (nr < 0) 759 goto input_failure; 760 break; 761 762 case CT_CCL: 763 /* scan a (nonempty) character class (sets NOSKIP) */ 764 if (width == 0) 765 width = (size_t)~0; /* `infinity' */ 766 if (flags & LONG) { 767 nr = convert_wccl(fp, GETARG(wchar_t *), width, 768 ccltab, locale); 769 } else { 770 nr = convert_ccl(fp, GETARG(char *), width, 771 ccltab); 772 } 773 if (nr <= 0) { 774 if (nr < 0) 775 goto input_failure; 776 else /* nr == 0 */ 777 goto match_failure; 778 } 779 break; 780 781 case CT_STRING: 782 /* like CCL, but zero-length string OK, & no NOSKIP */ 783 if (width == 0) 784 width = (size_t)~0; 785 if (flags & LONG) { 786 nr = convert_wstring(fp, GETARG(wchar_t *), 787 width, locale); 788 } else { 789 nr = convert_string(fp, GETARG(char *), width); 790 } 791 if (nr < 0) 792 goto input_failure; 793 break; 794 795 case CT_INT: 796 /* scan an integer as if by the conversion function */ 797 #ifdef hardway 798 if (width == 0 || width > sizeof(buf) - 1) 799 width = sizeof(buf) - 1; 800 #else 801 /* size_t is unsigned, hence this optimisation */ 802 if (--width > sizeof(buf) - 2) 803 width = sizeof(buf) - 2; 804 width++; 805 #endif 806 nr = parseint(fp, buf, width, base); 807 if (nr == 0) 808 goto match_failure; 809 if ((flags & SUPPRESS) == 0) { 810 uintmax_t res; 811 812 buf[nr] = '\0'; 813 if ((flags & UNSIGNED) == 0) 814 res = strtoimax_l(buf, (char **)NULL, base, locale); 815 else 816 res = strtoumax_l(buf, (char **)NULL, base, locale); 817 if (flags & POINTER) 818 *va_arg(ap, void **) = 819 (void *)(uintptr_t)res; 820 else if (flags & SHORTSHORT) 821 *va_arg(ap, char *) = res; 822 else if (flags & SHORT) 823 *va_arg(ap, short *) = res; 824 else if (flags & LONG) 825 *va_arg(ap, long *) = res; 826 else if (flags & LONGLONG) 827 *va_arg(ap, long long *) = res; 828 else if (flags & INTMAXT) 829 *va_arg(ap, intmax_t *) = res; 830 else if (flags & PTRDIFFT) 831 *va_arg(ap, ptrdiff_t *) = res; 832 else if (flags & SIZET) 833 *va_arg(ap, size_t *) = res; 834 else 835 *va_arg(ap, int *) = res; 836 } 837 break; 838 839 #ifndef NO_FLOATING_POINT 840 case CT_FLOAT: 841 /* scan a floating point number as if by strtod */ 842 if (width == 0 || width > sizeof(buf) - 1) 843 width = sizeof(buf) - 1; 844 nr = parsefloat(fp, buf, buf + width, locale); 845 if (nr == 0) 846 goto match_failure; 847 if ((flags & SUPPRESS) == 0) { 848 if (flags & LONGDBL) { 849 long double res = strtold_l(buf, NULL, 850 locale); 851 *va_arg(ap, long double *) = res; 852 } else if (flags & LONG) { 853 double res = strtod_l(buf, NULL, 854 locale); 855 *va_arg(ap, double *) = res; 856 } else { 857 float res = strtof_l(buf, NULL, locale); 858 *va_arg(ap, float *) = res; 859 } 860 } 861 break; 862 #endif /* !NO_FLOATING_POINT */ 863 } 864 if (!(flags & SUPPRESS)) 865 nassigned++; 866 nread += nr; 867 nconversions++; 868 } 869 input_failure: 870 return (nconversions != 0 ? nassigned : EOF); 871 match_failure: 872 return (nassigned); 873 } 874 875 /* 876 * Fill in the given table from the scanset at the given format 877 * (just after `['). Return a pointer to the character past the 878 * closing `]'. The table has a 1 wherever characters should be 879 * considered part of the scanset. 880 */ 881 static const u_char * 882 __sccl(char *tab, const u_char *fmt) 883 { 884 int c, n, v, i; 885 struct xlocale_collate *table = 886 (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; 887 888 /* first `clear' the whole table */ 889 c = *fmt++; /* first char hat => negated scanset */ 890 if (c == '^') { 891 v = 1; /* default => accept */ 892 c = *fmt++; /* get new first char */ 893 } else 894 v = 0; /* default => reject */ 895 896 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ 897 (void) memset(tab, v, 256); 898 899 if (c == 0) 900 return (fmt - 1);/* format ended before closing ] */ 901 902 /* 903 * Now set the entries corresponding to the actual scanset 904 * to the opposite of the above. 905 * 906 * The first character may be ']' (or '-') without being special; 907 * the last character may be '-'. 908 */ 909 v = 1 - v; 910 for (;;) { 911 tab[c] = v; /* take character c */ 912 doswitch: 913 n = *fmt++; /* and examine the next */ 914 switch (n) { 915 916 case 0: /* format ended too soon */ 917 return (fmt - 1); 918 919 case '-': 920 /* 921 * A scanset of the form 922 * [01+-] 923 * is defined as `the digit 0, the digit 1, 924 * the character +, the character -', but 925 * the effect of a scanset such as 926 * [a-zA-Z0-9] 927 * is implementation defined. The V7 Unix 928 * scanf treats `a-z' as `the letters a through 929 * z', but treats `a-a' as `the letter a, the 930 * character -, and the letter a'. 931 * 932 * For compatibility, the `-' is not considered 933 * to define a range if the character following 934 * it is either a close bracket (required by ANSI) 935 * or is not numerically greater than the character 936 * we just stored in the table (c). 937 */ 938 n = *fmt; 939 if (n == ']' 940 || (table->__collate_load_error ? n < c : 941 __collate_range_cmp(n, c) < 0 942 ) 943 ) { 944 c = '-'; 945 break; /* resume the for(;;) */ 946 } 947 fmt++; 948 /* fill in the range */ 949 if (table->__collate_load_error) { 950 do { 951 tab[++c] = v; 952 } while (c < n); 953 } else { 954 for (i = 0; i < 256; i ++) 955 if (__collate_range_cmp(c, i) <= 0 && 956 __collate_range_cmp(i, n) <= 0 957 ) 958 tab[i] = v; 959 } 960 #if 1 /* XXX another disgusting compatibility hack */ 961 c = n; 962 /* 963 * Alas, the V7 Unix scanf also treats formats 964 * such as [a-c-e] as `the letters a through e'. 965 * This too is permitted by the standard.... 966 */ 967 goto doswitch; 968 #else 969 c = *fmt++; 970 if (c == 0) 971 return (fmt - 1); 972 if (c == ']') 973 return (fmt); 974 #endif 975 break; 976 977 case ']': /* end of scanset */ 978 return (fmt); 979 980 default: /* just another character */ 981 c = n; 982 break; 983 } 984 } 985 /* NOTREACHED */ 986 } 987 988 #ifndef NO_FLOATING_POINT 989 static int 990 parsefloat(FILE *fp, char *buf, char *end, locale_t locale) 991 { 992 char *commit, *p; 993 int infnanpos = 0, decptpos = 0; 994 enum { 995 S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX, 996 S_DIGITS, S_DECPT, S_FRAC, S_EXP, S_EXPDIGITS 997 } state = S_START; 998 unsigned char c; 999 const char *decpt = localeconv_l(locale)->decimal_point; 1000 _Bool gotmantdig = 0, ishex = 0; 1001 1002 /* 1003 * We set commit = p whenever the string we have read so far 1004 * constitutes a valid representation of a floating point 1005 * number by itself. At some point, the parse will complete 1006 * or fail, and we will ungetc() back to the last commit point. 1007 * To ensure that the file offset gets updated properly, it is 1008 * always necessary to read at least one character that doesn't 1009 * match; thus, we can't short-circuit "infinity" or "nan(...)". 1010 */ 1011 commit = buf - 1; 1012 for (p = buf; p < end; ) { 1013 c = *fp->_p; 1014 reswitch: 1015 switch (state) { 1016 case S_START: 1017 state = S_GOTSIGN; 1018 if (c == '-' || c == '+') 1019 break; 1020 else 1021 goto reswitch; 1022 case S_GOTSIGN: 1023 switch (c) { 1024 case '0': 1025 state = S_MAYBEHEX; 1026 commit = p; 1027 break; 1028 case 'I': 1029 case 'i': 1030 state = S_INF; 1031 break; 1032 case 'N': 1033 case 'n': 1034 state = S_NAN; 1035 break; 1036 default: 1037 state = S_DIGITS; 1038 goto reswitch; 1039 } 1040 break; 1041 case S_INF: 1042 if (infnanpos > 6 || 1043 (c != "nfinity"[infnanpos] && 1044 c != "NFINITY"[infnanpos])) 1045 goto parsedone; 1046 if (infnanpos == 1 || infnanpos == 6) 1047 commit = p; /* inf or infinity */ 1048 infnanpos++; 1049 break; 1050 case S_NAN: 1051 switch (infnanpos) { 1052 case 0: 1053 if (c != 'A' && c != 'a') 1054 goto parsedone; 1055 break; 1056 case 1: 1057 if (c != 'N' && c != 'n') 1058 goto parsedone; 1059 else 1060 commit = p; 1061 break; 1062 case 2: 1063 if (c != '(') 1064 goto parsedone; 1065 break; 1066 default: 1067 if (c == ')') { 1068 commit = p; 1069 state = S_DONE; 1070 } else if (!isalnum(c) && c != '_') 1071 goto parsedone; 1072 break; 1073 } 1074 infnanpos++; 1075 break; 1076 case S_DONE: 1077 goto parsedone; 1078 case S_MAYBEHEX: 1079 state = S_DIGITS; 1080 if (c == 'X' || c == 'x') { 1081 ishex = 1; 1082 break; 1083 } else { /* we saw a '0', but no 'x' */ 1084 gotmantdig = 1; 1085 goto reswitch; 1086 } 1087 case S_DIGITS: 1088 if ((ishex && isxdigit(c)) || isdigit(c)) { 1089 gotmantdig = 1; 1090 commit = p; 1091 break; 1092 } else { 1093 state = S_DECPT; 1094 goto reswitch; 1095 } 1096 case S_DECPT: 1097 if (c == decpt[decptpos]) { 1098 if (decpt[++decptpos] == '\0') { 1099 /* We read the complete decpt seq. */ 1100 state = S_FRAC; 1101 if (gotmantdig) 1102 commit = p; 1103 } 1104 break; 1105 } else if (!decptpos) { 1106 /* We didn't read any decpt characters. */ 1107 state = S_FRAC; 1108 goto reswitch; 1109 } else { 1110 /* 1111 * We read part of a multibyte decimal point, 1112 * but the rest is invalid, so bail. 1113 */ 1114 goto parsedone; 1115 } 1116 case S_FRAC: 1117 if (((c == 'E' || c == 'e') && !ishex) || 1118 ((c == 'P' || c == 'p') && ishex)) { 1119 if (!gotmantdig) 1120 goto parsedone; 1121 else 1122 state = S_EXP; 1123 } else if ((ishex && isxdigit(c)) || isdigit(c)) { 1124 commit = p; 1125 gotmantdig = 1; 1126 } else 1127 goto parsedone; 1128 break; 1129 case S_EXP: 1130 state = S_EXPDIGITS; 1131 if (c == '-' || c == '+') 1132 break; 1133 else 1134 goto reswitch; 1135 case S_EXPDIGITS: 1136 if (isdigit(c)) 1137 commit = p; 1138 else 1139 goto parsedone; 1140 break; 1141 default: 1142 abort(); 1143 } 1144 *p++ = c; 1145 if (--fp->_r > 0) 1146 fp->_p++; 1147 else if (__srefill(fp)) 1148 break; /* EOF */ 1149 } 1150 1151 parsedone: 1152 while (commit < --p) 1153 __ungetc(*(u_char *)p, fp); 1154 *++commit = '\0'; 1155 return (commit - buf); 1156 } 1157 #endif 1158