1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Copyright (c) 2011 The FreeBSD Foundation 8 * 9 * Copyright (c) 2023 Dag-Erling Smørgrav 10 * 11 * Portions of this software were developed by David Chisnall 12 * under sponsorship from the FreeBSD Foundation. 13 * 14 * This code is derived from software contributed to Berkeley by 15 * Chris Torek. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 3. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 */ 41 42 #if defined(LIBC_SCCS) && !defined(lint) 43 static char sccsid[] = "@(#)vfscanf.c 8.1 (Berkeley) 6/4/93"; 44 #endif /* LIBC_SCCS and not lint */ 45 #include <sys/cdefs.h> 46 #include "namespace.h" 47 #include <ctype.h> 48 #include <inttypes.h> 49 #include <stdio.h> 50 #include <stdlib.h> 51 #include <stddef.h> 52 #include <stdarg.h> 53 #include <string.h> 54 #include <wchar.h> 55 #include <wctype.h> 56 #include "un-namespace.h" 57 58 #include "collate.h" 59 #include "libc_private.h" 60 #include "local.h" 61 #include "xlocale_private.h" 62 63 #ifndef NO_FLOATING_POINT 64 #include <locale.h> 65 #endif 66 67 #define BUF 513 /* Maximum length of numeric string. */ 68 69 /* 70 * Flags used during conversion. 71 */ 72 #define LONG 0x01 /* l: long or double */ 73 #define LONGDBL 0x02 /* L: long double */ 74 #define SHORT 0x04 /* h: short */ 75 #define SUPPRESS 0x08 /* *: suppress assignment */ 76 #define POINTER 0x10 /* p: void * (as hex) */ 77 #define NOSKIP 0x20 /* [ or c: do not skip blanks */ 78 #define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */ 79 #define INTMAXT 0x800 /* j: intmax_t */ 80 #define PTRDIFFT 0x1000 /* t: ptrdiff_t */ 81 #define SIZET 0x2000 /* z: size_t */ 82 #define SHORTSHORT 0x4000 /* hh: char */ 83 #define UNSIGNED 0x8000 /* %[oupxX] conversions */ 84 85 /* 86 * Conversion types. 87 */ 88 #define CT_CHAR 0 /* %c conversion */ 89 #define CT_CCL 1 /* %[...] conversion */ 90 #define CT_STRING 2 /* %s conversion */ 91 #define CT_INT 3 /* %[dioupxX] conversion */ 92 #define CT_FLOAT 4 /* %[efgEFG] conversion */ 93 94 static const u_char *__sccl(char *, const u_char *); 95 #ifndef NO_FLOATING_POINT 96 static int parsefloat(FILE *, char *, char *, locale_t); 97 #endif 98 99 __weak_reference(__vfscanf, vfscanf); 100 101 /* 102 * Conversion functions are passed a pointer to this object instead of 103 * a real parameter to indicate that the assignment-suppression (*) 104 * flag was specified. We could use a NULL pointer to indicate this, 105 * but that would mask bugs in applications that call scanf() with a 106 * NULL pointer. 107 */ 108 static const int suppress; 109 #define SUPPRESS_PTR ((void *)&suppress) 110 111 static const mbstate_t initial_mbs; 112 113 /* 114 * The following conversion functions return the number of characters consumed, 115 * or -1 on input failure. Character class conversion returns 0 on match 116 * failure. 117 */ 118 119 static __inline int 120 convert_char(FILE *fp, char * p, int width) 121 { 122 int n; 123 124 if (p == SUPPRESS_PTR) { 125 size_t sum = 0; 126 for (;;) { 127 if ((n = fp->_r) < width) { 128 sum += n; 129 width -= n; 130 fp->_p += n; 131 if (__srefill(fp)) { 132 if (sum == 0) 133 return (-1); 134 break; 135 } 136 } else { 137 sum += width; 138 fp->_r -= width; 139 fp->_p += width; 140 break; 141 } 142 } 143 return (sum); 144 } else { 145 size_t r = __fread(p, 1, width, fp); 146 147 if (r == 0) 148 return (-1); 149 return (r); 150 } 151 } 152 153 static __inline int 154 convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale) 155 { 156 mbstate_t mbs; 157 int n, nread; 158 wint_t wi; 159 160 mbs = initial_mbs; 161 n = 0; 162 while (width-- != 0 && 163 (wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF) { 164 if (wcp != SUPPRESS_PTR) 165 *wcp++ = (wchar_t)wi; 166 n += nread; 167 } 168 if (n == 0) 169 return (-1); 170 return (n); 171 } 172 173 static __inline int 174 convert_ccl(FILE *fp, char * p, int width, const char *ccltab) 175 { 176 char *p0; 177 int n; 178 179 if (p == SUPPRESS_PTR) { 180 n = 0; 181 while (ccltab[*fp->_p]) { 182 n++, fp->_r--, fp->_p++; 183 if (--width == 0) 184 break; 185 if (fp->_r <= 0 && __srefill(fp)) { 186 if (n == 0) 187 return (-1); 188 break; 189 } 190 } 191 } else { 192 p0 = p; 193 while (ccltab[*fp->_p]) { 194 fp->_r--; 195 *p++ = *fp->_p++; 196 if (--width == 0) 197 break; 198 if (fp->_r <= 0 && __srefill(fp)) { 199 if (p == p0) 200 return (-1); 201 break; 202 } 203 } 204 n = p - p0; 205 if (n == 0) 206 return (0); 207 *p = 0; 208 } 209 return (n); 210 } 211 212 static __inline int 213 convert_wccl(FILE *fp, wchar_t *wcp, int width, const char *ccltab, 214 locale_t locale) 215 { 216 mbstate_t mbs; 217 wint_t wi; 218 int n, nread; 219 220 mbs = initial_mbs; 221 n = 0; 222 if (wcp == SUPPRESS_PTR) { 223 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 224 width-- != 0 && ccltab[wctob(wi)]) 225 n += nread; 226 if (wi != WEOF) 227 __ungetwc(wi, fp, __get_locale()); 228 } else { 229 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 230 width-- != 0 && ccltab[wctob(wi)]) { 231 *wcp++ = (wchar_t)wi; 232 n += nread; 233 } 234 if (wi != WEOF) 235 __ungetwc(wi, fp, __get_locale()); 236 if (n == 0) 237 return (0); 238 *wcp = 0; 239 } 240 return (n); 241 } 242 243 static __inline int 244 convert_string(FILE *fp, char * p, int width) 245 { 246 char *p0; 247 int n; 248 249 if (p == SUPPRESS_PTR) { 250 n = 0; 251 while (!isspace(*fp->_p)) { 252 n++, fp->_r--, fp->_p++; 253 if (--width == 0) 254 break; 255 if (fp->_r <= 0 && __srefill(fp)) 256 break; 257 } 258 } else { 259 p0 = p; 260 while (!isspace(*fp->_p)) { 261 fp->_r--; 262 *p++ = *fp->_p++; 263 if (--width == 0) 264 break; 265 if (fp->_r <= 0 && __srefill(fp)) 266 break; 267 } 268 *p = 0; 269 n = p - p0; 270 } 271 return (n); 272 } 273 274 static __inline int 275 convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale) 276 { 277 mbstate_t mbs; 278 wint_t wi; 279 int n, nread; 280 281 mbs = initial_mbs; 282 n = 0; 283 if (wcp == SUPPRESS_PTR) { 284 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 285 width-- != 0 && !iswspace(wi)) 286 n += nread; 287 if (wi != WEOF) 288 __ungetwc(wi, fp, __get_locale()); 289 } else { 290 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 291 width-- != 0 && !iswspace(wi)) { 292 *wcp++ = (wchar_t)wi; 293 n += nread; 294 } 295 if (wi != WEOF) 296 __ungetwc(wi, fp, __get_locale()); 297 *wcp = '\0'; 298 } 299 return (n); 300 } 301 302 enum parseint_state { 303 begin, 304 havesign, 305 havezero, 306 haveprefix, 307 any, 308 }; 309 310 static __inline int 311 parseint_fsm(int c, enum parseint_state *state, int *base) 312 { 313 switch (c) { 314 case '+': 315 case '-': 316 if (*state == begin) { 317 *state = havesign; 318 return 1; 319 } 320 break; 321 case '0': 322 if (*state == begin || *state == havesign) { 323 *state = havezero; 324 } else { 325 *state = any; 326 } 327 return 1; 328 case '1': 329 case '2': 330 case '3': 331 case '4': 332 case '5': 333 case '6': 334 case '7': 335 if (*state == havezero && *base == 0) { 336 *base = 8; 337 } 338 /* FALL THROUGH */ 339 case '8': 340 case '9': 341 if (*state == begin || 342 *state == havesign) { 343 if (*base == 0) { 344 *base = 10; 345 } 346 } 347 if (*state == begin || 348 *state == havesign || 349 *state == havezero || 350 *state == haveprefix || 351 *state == any) { 352 if (*base > c - '0') { 353 *state = any; 354 return 1; 355 } 356 } 357 break; 358 case 'b': 359 if (*state == havezero) { 360 if (*base == 0 || *base == 2) { 361 *state = haveprefix; 362 *base = 2; 363 return 1; 364 } 365 } 366 /* FALL THROUGH */ 367 case 'a': 368 case 'c': 369 case 'd': 370 case 'e': 371 case 'f': 372 if (*state == begin || 373 *state == havesign || 374 *state == havezero || 375 *state == haveprefix || 376 *state == any) { 377 if (*base > c - 'a' + 10) { 378 *state = any; 379 return 1; 380 } 381 } 382 break; 383 case 'B': 384 if (*state == havezero) { 385 if (*base == 0 || *base == 2) { 386 *state = haveprefix; 387 *base = 2; 388 return 1; 389 } 390 } 391 /* FALL THROUGH */ 392 case 'A': 393 case 'C': 394 case 'D': 395 case 'E': 396 case 'F': 397 if (*state == begin || 398 *state == havesign || 399 *state == havezero || 400 *state == haveprefix || 401 *state == any) { 402 if (*base > c - 'A' + 10) { 403 *state = any; 404 return 1; 405 } 406 } 407 break; 408 case 'x': 409 case 'X': 410 if (*state == havezero) { 411 if (*base == 0 || *base == 16) { 412 *state = haveprefix; 413 *base = 16; 414 return 1; 415 } 416 } 417 break; 418 } 419 return 0; 420 } 421 422 /* 423 * Read an integer, storing it in buf. 424 * 425 * Return 0 on a match failure, and the number of characters read 426 * otherwise. 427 */ 428 static __inline int 429 parseint(FILE *fp, char * __restrict buf, int width, int base) 430 { 431 enum parseint_state state = begin; 432 char *p; 433 int c; 434 435 for (p = buf; width; width--) { 436 c = __sgetc(fp); 437 if (c == EOF) 438 break; 439 if (!parseint_fsm(c, &state, &base)) 440 break; 441 *p++ = c; 442 } 443 /* 444 * If we only had a sign, push it back. If we only had a 0b or 0x 445 * prefix (possibly preceded by a sign), we view it as "0" and 446 * push back the letter. In all other cases, if we stopped 447 * because we read a non-number character, push it back. 448 */ 449 if (state == havesign) { 450 p--; 451 (void) __ungetc(*(u_char *)p, fp); 452 } else if (state == haveprefix) { 453 p--; 454 (void) __ungetc(c, fp); 455 } else if (width && c != EOF) { 456 (void) __ungetc(c, fp); 457 } 458 return (p - buf); 459 } 460 461 /* 462 * __vfscanf - MT-safe version 463 */ 464 int 465 __vfscanf(FILE *fp, char const *fmt0, va_list ap) 466 { 467 int ret; 468 469 FLOCKFILE_CANCELSAFE(fp); 470 ret = __svfscanf(fp, __get_locale(), fmt0, ap); 471 FUNLOCKFILE_CANCELSAFE(); 472 return (ret); 473 } 474 int 475 vfscanf_l(FILE *fp, locale_t locale, char const *fmt0, va_list ap) 476 { 477 int ret; 478 FIX_LOCALE(locale); 479 480 FLOCKFILE_CANCELSAFE(fp); 481 ret = __svfscanf(fp, locale, fmt0, ap); 482 FUNLOCKFILE_CANCELSAFE(); 483 return (ret); 484 } 485 486 /* 487 * __svfscanf - non-MT-safe version of __vfscanf 488 */ 489 int 490 __svfscanf(FILE *fp, locale_t locale, const char *fmt0, va_list ap) 491 { 492 #define GETARG(type) ((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type)) 493 const u_char *fmt = (const u_char *)fmt0; 494 int c; /* character from format, or conversion */ 495 size_t width; /* field width, or 0 */ 496 int flags; /* flags as defined above */ 497 int nassigned; /* number of fields assigned */ 498 int nconversions; /* number of conversions */ 499 int nr; /* characters read by the current conversion */ 500 int nread; /* number of characters consumed from fp */ 501 int base; /* base argument to conversion function */ 502 char ccltab[256]; /* character class table for %[...] */ 503 char buf[BUF]; /* buffer for numeric conversions */ 504 505 ORIENT(fp, -1); 506 507 nassigned = 0; 508 nconversions = 0; 509 nread = 0; 510 for (;;) { 511 c = *fmt++; 512 if (c == 0) 513 return (nassigned); 514 if (isspace(c)) { 515 while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p)) 516 nread++, fp->_r--, fp->_p++; 517 continue; 518 } 519 if (c != '%') 520 goto literal; 521 width = 0; 522 flags = 0; 523 /* 524 * switch on the format. continue if done; 525 * break once format type is derived. 526 */ 527 again: c = *fmt++; 528 switch (c) { 529 case '%': 530 literal: 531 if (fp->_r <= 0 && __srefill(fp)) 532 goto input_failure; 533 if (*fp->_p != c) 534 goto match_failure; 535 fp->_r--, fp->_p++; 536 nread++; 537 continue; 538 539 case '*': 540 flags |= SUPPRESS; 541 goto again; 542 case 'j': 543 flags |= INTMAXT; 544 goto again; 545 case 'l': 546 if (flags & LONG) { 547 flags &= ~LONG; 548 flags |= LONGLONG; 549 } else 550 flags |= LONG; 551 goto again; 552 case 'q': 553 flags |= LONGLONG; /* not quite */ 554 goto again; 555 case 't': 556 flags |= PTRDIFFT; 557 goto again; 558 case 'z': 559 flags |= SIZET; 560 goto again; 561 case 'L': 562 flags |= LONGDBL; 563 goto again; 564 case 'h': 565 if (flags & SHORT) { 566 flags &= ~SHORT; 567 flags |= SHORTSHORT; 568 } else 569 flags |= SHORT; 570 goto again; 571 572 case '0': case '1': case '2': case '3': case '4': 573 case '5': case '6': case '7': case '8': case '9': 574 width = width * 10 + c - '0'; 575 goto again; 576 577 /* 578 * Conversions. 579 */ 580 case 'B': 581 case 'b': 582 c = CT_INT; 583 flags |= UNSIGNED; 584 base = 2; 585 break; 586 587 case 'd': 588 c = CT_INT; 589 base = 10; 590 break; 591 592 case 'i': 593 c = CT_INT; 594 base = 0; 595 break; 596 597 case 'o': 598 c = CT_INT; 599 flags |= UNSIGNED; 600 base = 8; 601 break; 602 603 case 'u': 604 c = CT_INT; 605 flags |= UNSIGNED; 606 base = 10; 607 break; 608 609 case 'X': 610 case 'x': 611 c = CT_INT; 612 flags |= UNSIGNED; 613 base = 16; 614 break; 615 616 #ifndef NO_FLOATING_POINT 617 case 'A': case 'E': case 'F': case 'G': 618 case 'a': case 'e': case 'f': case 'g': 619 c = CT_FLOAT; 620 break; 621 #endif 622 623 case 'S': 624 flags |= LONG; 625 /* FALLTHROUGH */ 626 case 's': 627 c = CT_STRING; 628 break; 629 630 case '[': 631 fmt = __sccl(ccltab, fmt); 632 flags |= NOSKIP; 633 c = CT_CCL; 634 break; 635 636 case 'C': 637 flags |= LONG; 638 /* FALLTHROUGH */ 639 case 'c': 640 flags |= NOSKIP; 641 c = CT_CHAR; 642 break; 643 644 case 'p': /* pointer format is like hex */ 645 flags |= POINTER; 646 c = CT_INT; /* assumes sizeof(uintmax_t) */ 647 flags |= UNSIGNED; /* >= sizeof(uintptr_t) */ 648 base = 16; 649 break; 650 651 case 'n': 652 if (flags & SUPPRESS) /* ??? */ 653 continue; 654 if (flags & SHORTSHORT) 655 *va_arg(ap, char *) = nread; 656 else if (flags & SHORT) 657 *va_arg(ap, short *) = nread; 658 else if (flags & LONG) 659 *va_arg(ap, long *) = nread; 660 else if (flags & LONGLONG) 661 *va_arg(ap, long long *) = nread; 662 else if (flags & INTMAXT) 663 *va_arg(ap, intmax_t *) = nread; 664 else if (flags & SIZET) 665 *va_arg(ap, size_t *) = nread; 666 else if (flags & PTRDIFFT) 667 *va_arg(ap, ptrdiff_t *) = nread; 668 else 669 *va_arg(ap, int *) = nread; 670 continue; 671 672 default: 673 goto match_failure; 674 675 /* 676 * Disgusting backwards compatibility hack. XXX 677 */ 678 case '\0': /* compat */ 679 return (EOF); 680 } 681 682 /* 683 * We have a conversion that requires input. 684 */ 685 if (fp->_r <= 0 && __srefill(fp)) 686 goto input_failure; 687 688 /* 689 * Consume leading white space, except for formats 690 * that suppress this. 691 */ 692 if ((flags & NOSKIP) == 0) { 693 while (isspace(*fp->_p)) { 694 nread++; 695 if (--fp->_r > 0) 696 fp->_p++; 697 else if (__srefill(fp)) 698 goto input_failure; 699 } 700 /* 701 * Note that there is at least one character in 702 * the buffer, so conversions that do not set NOSKIP 703 * ca no longer result in an input failure. 704 */ 705 } 706 707 /* 708 * Do the conversion. 709 */ 710 switch (c) { 711 712 case CT_CHAR: 713 /* scan arbitrary characters (sets NOSKIP) */ 714 if (width == 0) 715 width = 1; 716 if (flags & LONG) { 717 nr = convert_wchar(fp, GETARG(wchar_t *), 718 width, locale); 719 } else { 720 nr = convert_char(fp, GETARG(char *), width); 721 } 722 if (nr < 0) 723 goto input_failure; 724 break; 725 726 case CT_CCL: 727 /* scan a (nonempty) character class (sets NOSKIP) */ 728 if (width == 0) 729 width = (size_t)~0; /* `infinity' */ 730 if (flags & LONG) { 731 nr = convert_wccl(fp, GETARG(wchar_t *), width, 732 ccltab, locale); 733 } else { 734 nr = convert_ccl(fp, GETARG(char *), width, 735 ccltab); 736 } 737 if (nr <= 0) { 738 if (nr < 0) 739 goto input_failure; 740 else /* nr == 0 */ 741 goto match_failure; 742 } 743 break; 744 745 case CT_STRING: 746 /* like CCL, but zero-length string OK, & no NOSKIP */ 747 if (width == 0) 748 width = (size_t)~0; 749 if (flags & LONG) { 750 nr = convert_wstring(fp, GETARG(wchar_t *), 751 width, locale); 752 } else { 753 nr = convert_string(fp, GETARG(char *), width); 754 } 755 if (nr < 0) 756 goto input_failure; 757 break; 758 759 case CT_INT: 760 /* scan an integer as if by the conversion function */ 761 #ifdef hardway 762 if (width == 0 || width > sizeof(buf) - 1) 763 width = sizeof(buf) - 1; 764 #else 765 /* size_t is unsigned, hence this optimisation */ 766 if (--width > sizeof(buf) - 2) 767 width = sizeof(buf) - 2; 768 width++; 769 #endif 770 nr = parseint(fp, buf, width, base); 771 if (nr == 0) 772 goto match_failure; 773 if ((flags & SUPPRESS) == 0) { 774 uintmax_t res; 775 776 buf[nr] = '\0'; 777 if ((flags & UNSIGNED) == 0) 778 res = strtoimax_l(buf, (char **)NULL, base, locale); 779 else 780 res = strtoumax_l(buf, (char **)NULL, base, locale); 781 if (flags & POINTER) 782 *va_arg(ap, void **) = 783 (void *)(uintptr_t)res; 784 else if (flags & SHORTSHORT) 785 *va_arg(ap, char *) = res; 786 else if (flags & SHORT) 787 *va_arg(ap, short *) = res; 788 else if (flags & LONG) 789 *va_arg(ap, long *) = res; 790 else if (flags & LONGLONG) 791 *va_arg(ap, long long *) = res; 792 else if (flags & INTMAXT) 793 *va_arg(ap, intmax_t *) = res; 794 else if (flags & PTRDIFFT) 795 *va_arg(ap, ptrdiff_t *) = res; 796 else if (flags & SIZET) 797 *va_arg(ap, size_t *) = res; 798 else 799 *va_arg(ap, int *) = res; 800 } 801 break; 802 803 #ifndef NO_FLOATING_POINT 804 case CT_FLOAT: 805 /* scan a floating point number as if by strtod */ 806 if (width == 0 || width > sizeof(buf) - 1) 807 width = sizeof(buf) - 1; 808 nr = parsefloat(fp, buf, buf + width, locale); 809 if (nr == 0) 810 goto match_failure; 811 if ((flags & SUPPRESS) == 0) { 812 if (flags & LONGDBL) { 813 long double res = strtold_l(buf, NULL, 814 locale); 815 *va_arg(ap, long double *) = res; 816 } else if (flags & LONG) { 817 double res = strtod_l(buf, NULL, 818 locale); 819 *va_arg(ap, double *) = res; 820 } else { 821 float res = strtof_l(buf, NULL, locale); 822 *va_arg(ap, float *) = res; 823 } 824 } 825 break; 826 #endif /* !NO_FLOATING_POINT */ 827 } 828 if (!(flags & SUPPRESS)) 829 nassigned++; 830 nread += nr; 831 nconversions++; 832 } 833 input_failure: 834 return (nconversions != 0 ? nassigned : EOF); 835 match_failure: 836 return (nassigned); 837 } 838 839 /* 840 * Fill in the given table from the scanset at the given format 841 * (just after `['). Return a pointer to the character past the 842 * closing `]'. The table has a 1 wherever characters should be 843 * considered part of the scanset. 844 */ 845 static const u_char * 846 __sccl(char *tab, const u_char *fmt) 847 { 848 int c, n, v, i; 849 struct xlocale_collate *table = 850 (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; 851 852 /* first `clear' the whole table */ 853 c = *fmt++; /* first char hat => negated scanset */ 854 if (c == '^') { 855 v = 1; /* default => accept */ 856 c = *fmt++; /* get new first char */ 857 } else 858 v = 0; /* default => reject */ 859 860 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ 861 (void) memset(tab, v, 256); 862 863 if (c == 0) 864 return (fmt - 1);/* format ended before closing ] */ 865 866 /* 867 * Now set the entries corresponding to the actual scanset 868 * to the opposite of the above. 869 * 870 * The first character may be ']' (or '-') without being special; 871 * the last character may be '-'. 872 */ 873 v = 1 - v; 874 for (;;) { 875 tab[c] = v; /* take character c */ 876 doswitch: 877 n = *fmt++; /* and examine the next */ 878 switch (n) { 879 880 case 0: /* format ended too soon */ 881 return (fmt - 1); 882 883 case '-': 884 /* 885 * A scanset of the form 886 * [01+-] 887 * is defined as `the digit 0, the digit 1, 888 * the character +, the character -', but 889 * the effect of a scanset such as 890 * [a-zA-Z0-9] 891 * is implementation defined. The V7 Unix 892 * scanf treats `a-z' as `the letters a through 893 * z', but treats `a-a' as `the letter a, the 894 * character -, and the letter a'. 895 * 896 * For compatibility, the `-' is not considered 897 * to define a range if the character following 898 * it is either a close bracket (required by ANSI) 899 * or is not numerically greater than the character 900 * we just stored in the table (c). 901 */ 902 n = *fmt; 903 if (n == ']' 904 || (table->__collate_load_error ? n < c : 905 __collate_range_cmp(n, c) < 0 906 ) 907 ) { 908 c = '-'; 909 break; /* resume the for(;;) */ 910 } 911 fmt++; 912 /* fill in the range */ 913 if (table->__collate_load_error) { 914 do { 915 tab[++c] = v; 916 } while (c < n); 917 } else { 918 for (i = 0; i < 256; i ++) 919 if (__collate_range_cmp(c, i) <= 0 && 920 __collate_range_cmp(i, n) <= 0 921 ) 922 tab[i] = v; 923 } 924 #if 1 /* XXX another disgusting compatibility hack */ 925 c = n; 926 /* 927 * Alas, the V7 Unix scanf also treats formats 928 * such as [a-c-e] as `the letters a through e'. 929 * This too is permitted by the standard.... 930 */ 931 goto doswitch; 932 #else 933 c = *fmt++; 934 if (c == 0) 935 return (fmt - 1); 936 if (c == ']') 937 return (fmt); 938 #endif 939 break; 940 941 case ']': /* end of scanset */ 942 return (fmt); 943 944 default: /* just another character */ 945 c = n; 946 break; 947 } 948 } 949 /* NOTREACHED */ 950 } 951 952 #ifndef NO_FLOATING_POINT 953 static int 954 parsefloat(FILE *fp, char *buf, char *end, locale_t locale) 955 { 956 char *commit, *p; 957 int infnanpos = 0, decptpos = 0; 958 enum { 959 S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX, 960 S_DIGITS, S_DECPT, S_FRAC, S_EXP, S_EXPDIGITS 961 } state = S_START; 962 unsigned char c; 963 const char *decpt = localeconv_l(locale)->decimal_point; 964 _Bool gotmantdig = 0, ishex = 0; 965 966 /* 967 * We set commit = p whenever the string we have read so far 968 * constitutes a valid representation of a floating point 969 * number by itself. At some point, the parse will complete 970 * or fail, and we will ungetc() back to the last commit point. 971 * To ensure that the file offset gets updated properly, it is 972 * always necessary to read at least one character that doesn't 973 * match; thus, we can't short-circuit "infinity" or "nan(...)". 974 */ 975 commit = buf - 1; 976 for (p = buf; p < end; ) { 977 c = *fp->_p; 978 reswitch: 979 switch (state) { 980 case S_START: 981 state = S_GOTSIGN; 982 if (c == '-' || c == '+') 983 break; 984 else 985 goto reswitch; 986 case S_GOTSIGN: 987 switch (c) { 988 case '0': 989 state = S_MAYBEHEX; 990 commit = p; 991 break; 992 case 'I': 993 case 'i': 994 state = S_INF; 995 break; 996 case 'N': 997 case 'n': 998 state = S_NAN; 999 break; 1000 default: 1001 state = S_DIGITS; 1002 goto reswitch; 1003 } 1004 break; 1005 case S_INF: 1006 if (infnanpos > 6 || 1007 (c != "nfinity"[infnanpos] && 1008 c != "NFINITY"[infnanpos])) 1009 goto parsedone; 1010 if (infnanpos == 1 || infnanpos == 6) 1011 commit = p; /* inf or infinity */ 1012 infnanpos++; 1013 break; 1014 case S_NAN: 1015 switch (infnanpos) { 1016 case 0: 1017 if (c != 'A' && c != 'a') 1018 goto parsedone; 1019 break; 1020 case 1: 1021 if (c != 'N' && c != 'n') 1022 goto parsedone; 1023 else 1024 commit = p; 1025 break; 1026 case 2: 1027 if (c != '(') 1028 goto parsedone; 1029 break; 1030 default: 1031 if (c == ')') { 1032 commit = p; 1033 state = S_DONE; 1034 } else if (!isalnum(c) && c != '_') 1035 goto parsedone; 1036 break; 1037 } 1038 infnanpos++; 1039 break; 1040 case S_DONE: 1041 goto parsedone; 1042 case S_MAYBEHEX: 1043 state = S_DIGITS; 1044 if (c == 'X' || c == 'x') { 1045 ishex = 1; 1046 break; 1047 } else { /* we saw a '0', but no 'x' */ 1048 gotmantdig = 1; 1049 goto reswitch; 1050 } 1051 case S_DIGITS: 1052 if ((ishex && isxdigit(c)) || isdigit(c)) { 1053 gotmantdig = 1; 1054 commit = p; 1055 break; 1056 } else { 1057 state = S_DECPT; 1058 goto reswitch; 1059 } 1060 case S_DECPT: 1061 if (c == decpt[decptpos]) { 1062 if (decpt[++decptpos] == '\0') { 1063 /* We read the complete decpt seq. */ 1064 state = S_FRAC; 1065 if (gotmantdig) 1066 commit = p; 1067 } 1068 break; 1069 } else if (!decptpos) { 1070 /* We didn't read any decpt characters. */ 1071 state = S_FRAC; 1072 goto reswitch; 1073 } else { 1074 /* 1075 * We read part of a multibyte decimal point, 1076 * but the rest is invalid, so bail. 1077 */ 1078 goto parsedone; 1079 } 1080 case S_FRAC: 1081 if (((c == 'E' || c == 'e') && !ishex) || 1082 ((c == 'P' || c == 'p') && ishex)) { 1083 if (!gotmantdig) 1084 goto parsedone; 1085 else 1086 state = S_EXP; 1087 } else if ((ishex && isxdigit(c)) || isdigit(c)) { 1088 commit = p; 1089 gotmantdig = 1; 1090 } else 1091 goto parsedone; 1092 break; 1093 case S_EXP: 1094 state = S_EXPDIGITS; 1095 if (c == '-' || c == '+') 1096 break; 1097 else 1098 goto reswitch; 1099 case S_EXPDIGITS: 1100 if (isdigit(c)) 1101 commit = p; 1102 else 1103 goto parsedone; 1104 break; 1105 default: 1106 abort(); 1107 } 1108 *p++ = c; 1109 if (--fp->_r > 0) 1110 fp->_p++; 1111 else if (__srefill(fp)) 1112 break; /* EOF */ 1113 } 1114 1115 parsedone: 1116 while (commit < --p) 1117 __ungetc(*(u_char *)p, fp); 1118 *++commit = '\0'; 1119 return (commit - buf); 1120 } 1121 #endif 1122