1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Copyright (c) 2011 The FreeBSD Foundation 8 * 9 * Copyright (c) 2023 Dag-Erling Smørgrav 10 * 11 * Portions of this software were developed by David Chisnall 12 * under sponsorship from the FreeBSD Foundation. 13 * 14 * This code is derived from software contributed to Berkeley by 15 * Chris Torek. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 3. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 */ 41 42 #include "namespace.h" 43 #include <ctype.h> 44 #include <inttypes.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <stddef.h> 48 #include <stdarg.h> 49 #include <string.h> 50 #include <wchar.h> 51 #include <wctype.h> 52 #include "un-namespace.h" 53 54 #include "collate.h" 55 #include "libc_private.h" 56 #include "local.h" 57 #include "xlocale_private.h" 58 59 #include <locale.h> 60 61 #define BUF 513 /* Maximum length of numeric string. */ 62 63 /* 64 * Flags used during conversion. 65 */ 66 #define LONG 0x01 /* l: long or double */ 67 #define LONGDBL 0x02 /* L: long double */ 68 #define SHORT 0x04 /* h: short */ 69 #define SUPPRESS 0x08 /* *: suppress assignment */ 70 #define POINTER 0x10 /* p: void * (as hex) */ 71 #define NOSKIP 0x20 /* [ or c: do not skip blanks */ 72 #define FASTINT 0x200 /* wfN: int_fastN_t */ 73 #define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */ 74 #define INTMAXT 0x800 /* j: intmax_t */ 75 #define PTRDIFFT 0x1000 /* t: ptrdiff_t */ 76 #define SIZET 0x2000 /* z: size_t */ 77 #define SHORTSHORT 0x4000 /* hh: char */ 78 #define UNSIGNED 0x8000 /* %[oupxX] conversions */ 79 80 /* 81 * Conversion types. 82 */ 83 #define CT_CHAR 0 /* %c conversion */ 84 #define CT_CCL 1 /* %[...] conversion */ 85 #define CT_STRING 2 /* %s conversion */ 86 #define CT_INT 3 /* %[dioupxX] conversion */ 87 #define CT_FLOAT 4 /* %[efgEFG] conversion */ 88 89 static const u_char *__sccl(char *, const u_char *); 90 static int parsefloat(FILE *, char *, char *, locale_t); 91 92 __weak_reference(__vfscanf, vfscanf); 93 94 /* 95 * Conversion functions are passed a pointer to this object instead of 96 * a real parameter to indicate that the assignment-suppression (*) 97 * flag was specified. We could use a NULL pointer to indicate this, 98 * but that would mask bugs in applications that call scanf() with a 99 * NULL pointer. 100 */ 101 static const int suppress; 102 #define SUPPRESS_PTR ((void *)&suppress) 103 104 static const mbstate_t initial_mbs; 105 106 /* 107 * The following conversion functions return the number of characters consumed, 108 * or -1 on input failure. Character class conversion returns 0 on match 109 * failure. 110 */ 111 112 static __inline int 113 convert_char(FILE *fp, char * p, int width) 114 { 115 int n; 116 117 if (p == SUPPRESS_PTR) { 118 size_t sum = 0; 119 for (;;) { 120 if ((n = fp->_r) < width) { 121 sum += n; 122 width -= n; 123 fp->_p += n; 124 if (__srefill(fp)) { 125 if (sum == 0) 126 return (-1); 127 break; 128 } 129 } else { 130 sum += width; 131 fp->_r -= width; 132 fp->_p += width; 133 break; 134 } 135 } 136 return (sum); 137 } else { 138 size_t r = __fread(p, 1, width, fp); 139 140 if (r == 0) 141 return (-1); 142 return (r); 143 } 144 } 145 146 static __inline int 147 convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale) 148 { 149 mbstate_t mbs; 150 int n, nread; 151 wint_t wi; 152 153 mbs = initial_mbs; 154 n = 0; 155 while (width-- != 0 && 156 (wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF) { 157 if (wcp != SUPPRESS_PTR) 158 *wcp++ = (wchar_t)wi; 159 n += nread; 160 } 161 if (n == 0) 162 return (-1); 163 return (n); 164 } 165 166 static __inline int 167 convert_ccl(FILE *fp, char * p, int width, const char *ccltab) 168 { 169 char *p0; 170 int n; 171 172 if (p == SUPPRESS_PTR) { 173 n = 0; 174 while (ccltab[*fp->_p]) { 175 n++, fp->_r--, fp->_p++; 176 if (--width == 0) 177 break; 178 if (fp->_r <= 0 && __srefill(fp)) { 179 if (n == 0) 180 return (-1); 181 break; 182 } 183 } 184 } else { 185 p0 = p; 186 while (ccltab[*fp->_p]) { 187 fp->_r--; 188 *p++ = *fp->_p++; 189 if (--width == 0) 190 break; 191 if (fp->_r <= 0 && __srefill(fp)) { 192 if (p == p0) 193 return (-1); 194 break; 195 } 196 } 197 n = p - p0; 198 if (n == 0) 199 return (0); 200 *p = 0; 201 } 202 return (n); 203 } 204 205 static __inline int 206 convert_wccl(FILE *fp, wchar_t *wcp, int width, const char *ccltab, 207 locale_t locale) 208 { 209 mbstate_t mbs; 210 wint_t wi; 211 int n, nread; 212 213 mbs = initial_mbs; 214 n = 0; 215 if (wcp == SUPPRESS_PTR) { 216 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 217 width-- != 0 && ccltab[wctob(wi)]) 218 n += nread; 219 if (wi != WEOF) 220 __ungetwc(wi, fp, __get_locale()); 221 } else { 222 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 223 width-- != 0 && ccltab[wctob(wi)]) { 224 *wcp++ = (wchar_t)wi; 225 n += nread; 226 } 227 if (wi != WEOF) 228 __ungetwc(wi, fp, __get_locale()); 229 if (n == 0) 230 return (0); 231 *wcp = 0; 232 } 233 return (n); 234 } 235 236 static __inline int 237 convert_string(FILE *fp, char * p, int width) 238 { 239 char *p0; 240 int n; 241 242 if (p == SUPPRESS_PTR) { 243 n = 0; 244 while (!isspace(*fp->_p)) { 245 n++, fp->_r--, fp->_p++; 246 if (--width == 0) 247 break; 248 if (fp->_r <= 0 && __srefill(fp)) 249 break; 250 } 251 } else { 252 p0 = p; 253 while (!isspace(*fp->_p)) { 254 fp->_r--; 255 *p++ = *fp->_p++; 256 if (--width == 0) 257 break; 258 if (fp->_r <= 0 && __srefill(fp)) 259 break; 260 } 261 *p = 0; 262 n = p - p0; 263 } 264 return (n); 265 } 266 267 static __inline int 268 convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale) 269 { 270 mbstate_t mbs; 271 wint_t wi; 272 int n, nread; 273 274 mbs = initial_mbs; 275 n = 0; 276 if (wcp == SUPPRESS_PTR) { 277 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 278 width-- != 0 && !iswspace(wi)) 279 n += nread; 280 if (wi != WEOF) 281 __ungetwc(wi, fp, __get_locale()); 282 } else { 283 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 284 width-- != 0 && !iswspace(wi)) { 285 *wcp++ = (wchar_t)wi; 286 n += nread; 287 } 288 if (wi != WEOF) 289 __ungetwc(wi, fp, __get_locale()); 290 *wcp = '\0'; 291 } 292 return (n); 293 } 294 295 enum parseint_state { 296 begin, 297 havesign, 298 havezero, 299 haveprefix, 300 any, 301 }; 302 303 static __inline int 304 parseint_fsm(int c, enum parseint_state *state, int *base) 305 { 306 switch (c) { 307 case '+': 308 case '-': 309 if (*state == begin) { 310 *state = havesign; 311 return 1; 312 } 313 break; 314 case '0': 315 if (*state == begin || *state == havesign) { 316 *state = havezero; 317 return 1; 318 } 319 /* FALL THROUGH */ 320 case '1': 321 case '2': 322 case '3': 323 case '4': 324 case '5': 325 case '6': 326 case '7': 327 if (*state == havezero && *base == 0) { 328 *base = 8; 329 } 330 /* FALL THROUGH */ 331 case '8': 332 case '9': 333 if (*state == begin || 334 *state == havesign) { 335 if (*base == 0) { 336 *base = 10; 337 } 338 } 339 if (*state == begin || 340 *state == havesign || 341 *state == havezero || 342 *state == haveprefix || 343 *state == any) { 344 if (*base > c - '0') { 345 *state = any; 346 return 1; 347 } 348 } 349 break; 350 case 'b': 351 if (*state == havezero) { 352 if (*base == 0 || *base == 2) { 353 *state = haveprefix; 354 *base = 2; 355 return 1; 356 } 357 } 358 /* FALL THROUGH */ 359 case 'a': 360 case 'c': 361 case 'd': 362 case 'e': 363 case 'f': 364 if (*state == begin || 365 *state == havesign || 366 *state == havezero || 367 *state == haveprefix || 368 *state == any) { 369 if (*base > c - 'a' + 10) { 370 *state = any; 371 return 1; 372 } 373 } 374 break; 375 case 'B': 376 if (*state == havezero) { 377 if (*base == 0 || *base == 2) { 378 *state = haveprefix; 379 *base = 2; 380 return 1; 381 } 382 } 383 /* FALL THROUGH */ 384 case 'A': 385 case 'C': 386 case 'D': 387 case 'E': 388 case 'F': 389 if (*state == begin || 390 *state == havesign || 391 *state == havezero || 392 *state == haveprefix || 393 *state == any) { 394 if (*base > c - 'A' + 10) { 395 *state = any; 396 return 1; 397 } 398 } 399 break; 400 case 'x': 401 case 'X': 402 if (*state == havezero) { 403 if (*base == 0 || *base == 16) { 404 *state = haveprefix; 405 *base = 16; 406 return 1; 407 } 408 } 409 break; 410 } 411 return 0; 412 } 413 414 /* 415 * Read an integer, storing it in buf. 416 * 417 * Return 0 on a match failure, and the number of characters read 418 * otherwise. 419 */ 420 static __inline int 421 parseint(FILE *fp, char * __restrict buf, int width, int base) 422 { 423 enum parseint_state state = begin; 424 char *p; 425 int c; 426 427 for (p = buf; width; width--) { 428 c = __sgetc(fp); 429 if (c == EOF) 430 break; 431 if (!parseint_fsm(c, &state, &base)) 432 break; 433 *p++ = c; 434 } 435 /* 436 * If we only had a sign, push it back. If we only had a 0b or 0x 437 * prefix (possibly preceded by a sign), we view it as "0" and 438 * push back the letter. In all other cases, if we stopped 439 * because we read a non-number character, push it back. 440 */ 441 if (state == havesign) { 442 p--; 443 (void) __ungetc(*(u_char *)p, fp); 444 } else if (state == haveprefix) { 445 p--; 446 (void) __ungetc(c, fp); 447 } else if (width && c != EOF) { 448 (void) __ungetc(c, fp); 449 } 450 return (p - buf); 451 } 452 453 /* 454 * __vfscanf - MT-safe version 455 */ 456 int 457 __vfscanf(FILE *fp, char const *fmt0, va_list ap) 458 { 459 int ret; 460 461 FLOCKFILE_CANCELSAFE(fp); 462 ret = __svfscanf(fp, __get_locale(), fmt0, ap); 463 FUNLOCKFILE_CANCELSAFE(); 464 return (ret); 465 } 466 int 467 vfscanf_l(FILE *fp, locale_t locale, char const *fmt0, va_list ap) 468 { 469 int ret; 470 FIX_LOCALE(locale); 471 472 FLOCKFILE_CANCELSAFE(fp); 473 ret = __svfscanf(fp, locale, fmt0, ap); 474 FUNLOCKFILE_CANCELSAFE(); 475 return (ret); 476 } 477 478 /* 479 * __svfscanf - non-MT-safe version of __vfscanf 480 */ 481 int 482 __svfscanf(FILE *fp, locale_t locale, const char *fmt0, va_list ap) 483 { 484 #define GETARG(type) ((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type)) 485 const u_char *fmt = (const u_char *)fmt0; 486 int c; /* character from format, or conversion */ 487 size_t width; /* field width, or 0 */ 488 int flags; /* flags as defined above */ 489 int nassigned; /* number of fields assigned */ 490 int nconversions; /* number of conversions */ 491 int nr; /* characters read by the current conversion */ 492 int nread; /* number of characters consumed from fp */ 493 int base; /* base argument to conversion function */ 494 char ccltab[256]; /* character class table for %[...] */ 495 char buf[BUF]; /* buffer for numeric conversions */ 496 497 ORIENT(fp, -1); 498 499 nassigned = 0; 500 nconversions = 0; 501 nread = 0; 502 for (;;) { 503 c = *fmt++; 504 if (c == 0) 505 return (nassigned); 506 if (isspace(c)) { 507 while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p)) 508 nread++, fp->_r--, fp->_p++; 509 continue; 510 } 511 if (c != '%') 512 goto literal; 513 width = 0; 514 flags = 0; 515 /* 516 * switch on the format. continue if done; 517 * break once format type is derived. 518 */ 519 again: c = *fmt++; 520 switch (c) { 521 case '%': 522 literal: 523 if (fp->_r <= 0 && __srefill(fp)) 524 goto input_failure; 525 if (*fp->_p != c) 526 goto match_failure; 527 fp->_r--, fp->_p++; 528 nread++; 529 continue; 530 531 case '*': 532 flags |= SUPPRESS; 533 goto again; 534 case 'j': 535 flags |= INTMAXT; 536 goto again; 537 case 'l': 538 if (flags & LONG) { 539 flags &= ~LONG; 540 flags |= LONGLONG; 541 } else 542 flags |= LONG; 543 goto again; 544 case 'q': 545 flags |= LONGLONG; /* not quite */ 546 goto again; 547 case 't': 548 flags |= PTRDIFFT; 549 goto again; 550 case 'w': 551 /* 552 * Fixed-width integer types. On all platforms we 553 * support, int8_t is equivalent to char, int16_t 554 * is equivalent to short, int32_t is equivalent 555 * to int, int64_t is equivalent to long long int. 556 * Furthermore, int_fast8_t, int_fast16_t and 557 * int_fast32_t are equivalent to int, and 558 * int_fast64_t is equivalent to long long int. 559 */ 560 flags &= ~(SHORTSHORT|SHORT|LONG|LONGLONG|SIZET|INTMAXT|PTRDIFFT); 561 if (fmt[0] == 'f') { 562 flags |= FASTINT; 563 fmt++; 564 } else { 565 flags &= ~FASTINT; 566 } 567 if (fmt[0] == '8') { 568 if (!(flags & FASTINT)) 569 flags |= SHORTSHORT; 570 else 571 /* no flag set = 32 */ ; 572 fmt += 1; 573 } else if (fmt[0] == '1' && fmt[1] == '6') { 574 if (!(flags & FASTINT)) 575 flags |= SHORT; 576 else 577 /* no flag set = 32 */ ; 578 fmt += 2; 579 } else if (fmt[0] == '3' && fmt[1] == '2') { 580 /* no flag set = 32 */ ; 581 fmt += 2; 582 } else if (fmt[0] == '6' && fmt[1] == '4') { 583 flags |= LONGLONG; 584 fmt += 2; 585 } else { 586 goto match_failure; 587 } 588 goto again; 589 case 'z': 590 flags |= SIZET; 591 goto again; 592 case 'L': 593 flags |= LONGDBL; 594 goto again; 595 case 'h': 596 if (flags & SHORT) { 597 flags &= ~SHORT; 598 flags |= SHORTSHORT; 599 } else 600 flags |= SHORT; 601 goto again; 602 603 case '0': case '1': case '2': case '3': case '4': 604 case '5': case '6': case '7': case '8': case '9': 605 width = width * 10 + c - '0'; 606 goto again; 607 608 /* 609 * Conversions. 610 */ 611 case 'B': 612 case 'b': 613 c = CT_INT; 614 flags |= UNSIGNED; 615 base = 2; 616 break; 617 618 case 'd': 619 c = CT_INT; 620 base = 10; 621 break; 622 623 case 'i': 624 c = CT_INT; 625 base = 0; 626 break; 627 628 case 'o': 629 c = CT_INT; 630 flags |= UNSIGNED; 631 base = 8; 632 break; 633 634 case 'u': 635 c = CT_INT; 636 flags |= UNSIGNED; 637 base = 10; 638 break; 639 640 case 'X': 641 case 'x': 642 c = CT_INT; 643 flags |= UNSIGNED; 644 base = 16; 645 break; 646 647 case 'A': case 'E': case 'F': case 'G': 648 case 'a': case 'e': case 'f': case 'g': 649 c = CT_FLOAT; 650 break; 651 652 case 'S': 653 flags |= LONG; 654 /* FALLTHROUGH */ 655 case 's': 656 c = CT_STRING; 657 break; 658 659 case '[': 660 fmt = __sccl(ccltab, fmt); 661 flags |= NOSKIP; 662 c = CT_CCL; 663 break; 664 665 case 'C': 666 flags |= LONG; 667 /* FALLTHROUGH */ 668 case 'c': 669 flags |= NOSKIP; 670 c = CT_CHAR; 671 break; 672 673 case 'p': /* pointer format is like hex */ 674 flags |= POINTER; 675 c = CT_INT; /* assumes sizeof(uintmax_t) */ 676 flags |= UNSIGNED; /* >= sizeof(uintptr_t) */ 677 base = 16; 678 break; 679 680 case 'n': 681 if (flags & SUPPRESS) /* ??? */ 682 continue; 683 if (flags & SHORTSHORT) 684 *va_arg(ap, char *) = nread; 685 else if (flags & SHORT) 686 *va_arg(ap, short *) = nread; 687 else if (flags & LONG) 688 *va_arg(ap, long *) = nread; 689 else if (flags & LONGLONG) 690 *va_arg(ap, long long *) = nread; 691 else if (flags & INTMAXT) 692 *va_arg(ap, intmax_t *) = nread; 693 else if (flags & SIZET) 694 *va_arg(ap, size_t *) = nread; 695 else if (flags & PTRDIFFT) 696 *va_arg(ap, ptrdiff_t *) = nread; 697 else 698 *va_arg(ap, int *) = nread; 699 continue; 700 701 default: 702 goto match_failure; 703 704 /* 705 * Disgusting backwards compatibility hack. XXX 706 */ 707 case '\0': /* compat */ 708 return (EOF); 709 } 710 711 /* 712 * We have a conversion that requires input. 713 */ 714 if (fp->_r <= 0 && __srefill(fp)) 715 goto input_failure; 716 717 /* 718 * Consume leading white space, except for formats 719 * that suppress this. 720 */ 721 if ((flags & NOSKIP) == 0) { 722 while (isspace(*fp->_p)) { 723 nread++; 724 if (--fp->_r > 0) 725 fp->_p++; 726 else if (__srefill(fp)) 727 goto input_failure; 728 } 729 /* 730 * Note that there is at least one character in 731 * the buffer, so conversions that do not set NOSKIP 732 * ca no longer result in an input failure. 733 */ 734 } 735 736 /* 737 * Do the conversion. 738 */ 739 switch (c) { 740 741 case CT_CHAR: 742 /* scan arbitrary characters (sets NOSKIP) */ 743 if (width == 0) 744 width = 1; 745 if (flags & LONG) { 746 nr = convert_wchar(fp, GETARG(wchar_t *), 747 width, locale); 748 } else { 749 nr = convert_char(fp, GETARG(char *), width); 750 } 751 if (nr < 0) 752 goto input_failure; 753 break; 754 755 case CT_CCL: 756 /* scan a (nonempty) character class (sets NOSKIP) */ 757 if (width == 0) 758 width = (size_t)~0; /* `infinity' */ 759 if (flags & LONG) { 760 nr = convert_wccl(fp, GETARG(wchar_t *), width, 761 ccltab, locale); 762 } else { 763 nr = convert_ccl(fp, GETARG(char *), width, 764 ccltab); 765 } 766 if (nr <= 0) { 767 if (nr < 0) 768 goto input_failure; 769 else /* nr == 0 */ 770 goto match_failure; 771 } 772 break; 773 774 case CT_STRING: 775 /* like CCL, but zero-length string OK, & no NOSKIP */ 776 if (width == 0) 777 width = (size_t)~0; 778 if (flags & LONG) { 779 nr = convert_wstring(fp, GETARG(wchar_t *), 780 width, locale); 781 } else { 782 nr = convert_string(fp, GETARG(char *), width); 783 } 784 if (nr < 0) 785 goto input_failure; 786 break; 787 788 case CT_INT: 789 /* scan an integer as if by the conversion function */ 790 #ifdef hardway 791 if (width == 0 || width > sizeof(buf) - 1) 792 width = sizeof(buf) - 1; 793 #else 794 /* size_t is unsigned, hence this optimisation */ 795 if (--width > sizeof(buf) - 2) 796 width = sizeof(buf) - 2; 797 width++; 798 #endif 799 nr = parseint(fp, buf, width, base); 800 if (nr == 0) 801 goto match_failure; 802 if ((flags & SUPPRESS) == 0) { 803 uintmax_t res; 804 805 buf[nr] = '\0'; 806 if ((flags & UNSIGNED) == 0) 807 res = strtoimax_l(buf, (char **)NULL, base, locale); 808 else 809 res = strtoumax_l(buf, (char **)NULL, base, locale); 810 if (flags & POINTER) 811 *va_arg(ap, void **) = 812 (void *)(uintptr_t)res; 813 else if (flags & SHORTSHORT) 814 *va_arg(ap, char *) = res; 815 else if (flags & SHORT) 816 *va_arg(ap, short *) = res; 817 else if (flags & LONG) 818 *va_arg(ap, long *) = res; 819 else if (flags & LONGLONG) 820 *va_arg(ap, long long *) = res; 821 else if (flags & INTMAXT) 822 *va_arg(ap, intmax_t *) = res; 823 else if (flags & PTRDIFFT) 824 *va_arg(ap, ptrdiff_t *) = res; 825 else if (flags & SIZET) 826 *va_arg(ap, size_t *) = res; 827 else 828 *va_arg(ap, int *) = res; 829 } 830 break; 831 832 case CT_FLOAT: 833 /* scan a floating point number as if by strtod */ 834 if (width == 0 || width > sizeof(buf) - 1) 835 width = sizeof(buf) - 1; 836 nr = parsefloat(fp, buf, buf + width, locale); 837 if (nr == 0) 838 goto match_failure; 839 if ((flags & SUPPRESS) == 0) { 840 if (flags & LONGDBL) { 841 long double res = strtold_l(buf, NULL, 842 locale); 843 *va_arg(ap, long double *) = res; 844 } else if (flags & LONG) { 845 double res = strtod_l(buf, NULL, 846 locale); 847 *va_arg(ap, double *) = res; 848 } else { 849 float res = strtof_l(buf, NULL, locale); 850 *va_arg(ap, float *) = res; 851 } 852 } 853 break; 854 } 855 if (!(flags & SUPPRESS)) 856 nassigned++; 857 nread += nr; 858 nconversions++; 859 } 860 input_failure: 861 return (nconversions != 0 ? nassigned : EOF); 862 match_failure: 863 return (nassigned); 864 } 865 866 /* 867 * Fill in the given table from the scanset at the given format 868 * (just after `['). Return a pointer to the character past the 869 * closing `]'. The table has a 1 wherever characters should be 870 * considered part of the scanset. 871 */ 872 static const u_char * 873 __sccl(char *tab, const u_char *fmt) 874 { 875 int c, n, v, i; 876 struct xlocale_collate *table = 877 (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; 878 879 /* first `clear' the whole table */ 880 c = *fmt++; /* first char hat => negated scanset */ 881 if (c == '^') { 882 v = 1; /* default => accept */ 883 c = *fmt++; /* get new first char */ 884 } else 885 v = 0; /* default => reject */ 886 887 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ 888 (void) memset(tab, v, 256); 889 890 if (c == 0) 891 return (fmt - 1);/* format ended before closing ] */ 892 893 /* 894 * Now set the entries corresponding to the actual scanset 895 * to the opposite of the above. 896 * 897 * The first character may be ']' (or '-') without being special; 898 * the last character may be '-'. 899 */ 900 v = 1 - v; 901 for (;;) { 902 tab[c] = v; /* take character c */ 903 doswitch: 904 n = *fmt++; /* and examine the next */ 905 switch (n) { 906 907 case 0: /* format ended too soon */ 908 return (fmt - 1); 909 910 case '-': 911 /* 912 * A scanset of the form 913 * [01+-] 914 * is defined as `the digit 0, the digit 1, 915 * the character +, the character -', but 916 * the effect of a scanset such as 917 * [a-zA-Z0-9] 918 * is implementation defined. The V7 Unix 919 * scanf treats `a-z' as `the letters a through 920 * z', but treats `a-a' as `the letter a, the 921 * character -, and the letter a'. 922 * 923 * For compatibility, the `-' is not considered 924 * to define a range if the character following 925 * it is either a close bracket (required by ANSI) 926 * or is not numerically greater than the character 927 * we just stored in the table (c). 928 */ 929 n = *fmt; 930 if (n == ']' 931 || (table->__collate_load_error ? n < c : 932 __collate_range_cmp(n, c) < 0 933 ) 934 ) { 935 c = '-'; 936 break; /* resume the for(;;) */ 937 } 938 fmt++; 939 /* fill in the range */ 940 if (table->__collate_load_error) { 941 do { 942 tab[++c] = v; 943 } while (c < n); 944 } else { 945 for (i = 0; i < 256; i ++) 946 if (__collate_range_cmp(c, i) <= 0 && 947 __collate_range_cmp(i, n) <= 0 948 ) 949 tab[i] = v; 950 } 951 #if 1 /* XXX another disgusting compatibility hack */ 952 c = n; 953 /* 954 * Alas, the V7 Unix scanf also treats formats 955 * such as [a-c-e] as `the letters a through e'. 956 * This too is permitted by the standard.... 957 */ 958 goto doswitch; 959 #else 960 c = *fmt++; 961 if (c == 0) 962 return (fmt - 1); 963 if (c == ']') 964 return (fmt); 965 #endif 966 break; 967 968 case ']': /* end of scanset */ 969 return (fmt); 970 971 default: /* just another character */ 972 c = n; 973 break; 974 } 975 } 976 /* NOTREACHED */ 977 } 978 979 static int 980 parsefloat(FILE *fp, char *buf, char *end, locale_t locale) 981 { 982 char *commit, *p; 983 int infnanpos = 0, decptpos = 0; 984 enum { 985 S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX, 986 S_DIGITS, S_DECPT, S_FRAC, S_EXP, S_EXPDIGITS 987 } state = S_START; 988 unsigned char c; 989 const char *decpt = localeconv_l(locale)->decimal_point; 990 _Bool gotmantdig = 0, ishex = 0; 991 992 /* 993 * We set commit = p whenever the string we have read so far 994 * constitutes a valid representation of a floating point 995 * number by itself. At some point, the parse will complete 996 * or fail, and we will ungetc() back to the last commit point. 997 * To ensure that the file offset gets updated properly, it is 998 * always necessary to read at least one character that doesn't 999 * match; thus, we can't short-circuit "infinity" or "nan(...)". 1000 */ 1001 commit = buf - 1; 1002 for (p = buf; p < end; ) { 1003 c = *fp->_p; 1004 reswitch: 1005 switch (state) { 1006 case S_START: 1007 state = S_GOTSIGN; 1008 if (c == '-' || c == '+') 1009 break; 1010 else 1011 goto reswitch; 1012 case S_GOTSIGN: 1013 switch (c) { 1014 case '0': 1015 state = S_MAYBEHEX; 1016 commit = p; 1017 break; 1018 case 'I': 1019 case 'i': 1020 state = S_INF; 1021 break; 1022 case 'N': 1023 case 'n': 1024 state = S_NAN; 1025 break; 1026 default: 1027 state = S_DIGITS; 1028 goto reswitch; 1029 } 1030 break; 1031 case S_INF: 1032 if (infnanpos > 6 || 1033 (c != "nfinity"[infnanpos] && 1034 c != "NFINITY"[infnanpos])) 1035 goto parsedone; 1036 if (infnanpos == 1 || infnanpos == 6) 1037 commit = p; /* inf or infinity */ 1038 infnanpos++; 1039 break; 1040 case S_NAN: 1041 switch (infnanpos) { 1042 case 0: 1043 if (c != 'A' && c != 'a') 1044 goto parsedone; 1045 break; 1046 case 1: 1047 if (c != 'N' && c != 'n') 1048 goto parsedone; 1049 else 1050 commit = p; 1051 break; 1052 case 2: 1053 if (c != '(') 1054 goto parsedone; 1055 break; 1056 default: 1057 if (c == ')') { 1058 commit = p; 1059 state = S_DONE; 1060 } else if (!isalnum(c) && c != '_') 1061 goto parsedone; 1062 break; 1063 } 1064 infnanpos++; 1065 break; 1066 case S_DONE: 1067 goto parsedone; 1068 case S_MAYBEHEX: 1069 state = S_DIGITS; 1070 if (c == 'X' || c == 'x') { 1071 ishex = 1; 1072 break; 1073 } else { /* we saw a '0', but no 'x' */ 1074 gotmantdig = 1; 1075 goto reswitch; 1076 } 1077 case S_DIGITS: 1078 if ((ishex && isxdigit(c)) || isdigit(c)) { 1079 gotmantdig = 1; 1080 commit = p; 1081 break; 1082 } else { 1083 state = S_DECPT; 1084 goto reswitch; 1085 } 1086 case S_DECPT: 1087 if (c == decpt[decptpos]) { 1088 if (decpt[++decptpos] == '\0') { 1089 /* We read the complete decpt seq. */ 1090 state = S_FRAC; 1091 if (gotmantdig) 1092 commit = p; 1093 } 1094 break; 1095 } else if (!decptpos) { 1096 /* We didn't read any decpt characters. */ 1097 state = S_FRAC; 1098 goto reswitch; 1099 } else { 1100 /* 1101 * We read part of a multibyte decimal point, 1102 * but the rest is invalid, so bail. 1103 */ 1104 goto parsedone; 1105 } 1106 case S_FRAC: 1107 if (((c == 'E' || c == 'e') && !ishex) || 1108 ((c == 'P' || c == 'p') && ishex)) { 1109 if (!gotmantdig) 1110 goto parsedone; 1111 else 1112 state = S_EXP; 1113 } else if ((ishex && isxdigit(c)) || isdigit(c)) { 1114 commit = p; 1115 gotmantdig = 1; 1116 } else 1117 goto parsedone; 1118 break; 1119 case S_EXP: 1120 state = S_EXPDIGITS; 1121 if (c == '-' || c == '+') 1122 break; 1123 else 1124 goto reswitch; 1125 case S_EXPDIGITS: 1126 if (isdigit(c)) 1127 commit = p; 1128 else 1129 goto parsedone; 1130 break; 1131 default: 1132 abort(); 1133 } 1134 *p++ = c; 1135 if (--fp->_r > 0) 1136 fp->_p++; 1137 else if (__srefill(fp)) 1138 break; /* EOF */ 1139 } 1140 1141 parsedone: 1142 while (commit < --p) 1143 __ungetc(*(u_char *)p, fp); 1144 *++commit = '\0'; 1145 return (commit - buf); 1146 } 1147