1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Chris Torek. 9 * 10 * Copyright (c) 2011 The FreeBSD Foundation 11 * 12 * Copyright (c) 2023 Dag-Erling Smørgrav 13 * 14 * Portions of this software were developed by David Chisnall 15 * under sponsorship from the FreeBSD Foundation. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 3. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 */ 41 42 #include "namespace.h" 43 #include <ctype.h> 44 #include <inttypes.h> 45 #include <limits.h> 46 #include <stdio.h> 47 #include <stdlib.h> 48 #include <stddef.h> 49 #include <stdarg.h> 50 #include <string.h> 51 #include <wchar.h> 52 #include <wctype.h> 53 #include "un-namespace.h" 54 55 #include "libc_private.h" 56 #include "local.h" 57 #include "xlocale_private.h" 58 59 #define BUF 513 /* Maximum length of numeric string. */ 60 61 /* 62 * Flags used during conversion. 63 */ 64 #define LONG 0x01 /* l: long or double */ 65 #define LONGDBL 0x02 /* L: long double */ 66 #define SHORT 0x04 /* h: short */ 67 #define SUPPRESS 0x08 /* *: suppress assignment */ 68 #define POINTER 0x10 /* p: void * (as hex) */ 69 #define NOSKIP 0x20 /* [ or c: do not skip blanks */ 70 #define FASTINT 0x200 /* wfN: int_fastN_t */ 71 #define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */ 72 #define INTMAXT 0x800 /* j: intmax_t */ 73 #define PTRDIFFT 0x1000 /* t: ptrdiff_t */ 74 #define SIZET 0x2000 /* z: size_t */ 75 #define SHORTSHORT 0x4000 /* hh: char */ 76 #define UNSIGNED 0x8000 /* %[oupxX] conversions */ 77 78 /* 79 * Conversion types. 80 */ 81 #define CT_CHAR 0 /* %c conversion */ 82 #define CT_CCL 1 /* %[...] conversion */ 83 #define CT_STRING 2 /* %s conversion */ 84 #define CT_INT 3 /* %[dioupxX] conversion */ 85 #define CT_FLOAT 4 /* %[efgEFG] conversion */ 86 87 static int parsefloat(FILE *, wchar_t *, wchar_t *, locale_t); 88 89 struct ccl { 90 const wchar_t *start; /* character class start */ 91 const wchar_t *end; /* character class end */ 92 int compl; /* ccl is complemented? */ 93 }; 94 95 static __inline int 96 inccl(const struct ccl *ccl, wint_t wi) 97 { 98 99 if (ccl->compl) { 100 return (wmemchr(ccl->start, wi, ccl->end - ccl->start) 101 == NULL); 102 } else { 103 return (wmemchr(ccl->start, wi, ccl->end - ccl->start) != NULL); 104 } 105 } 106 107 /* 108 * Conversion functions are passed a pointer to this object instead of 109 * a real parameter to indicate that the assignment-suppression (*) 110 * flag was specified. We could use a NULL pointer to indicate this, 111 * but that would mask bugs in applications that call scanf() with a 112 * NULL pointer. 113 */ 114 static const int suppress; 115 #define SUPPRESS_PTR ((void *)&suppress) 116 117 static const mbstate_t initial_mbs; 118 119 /* 120 * The following conversion functions return the number of characters consumed, 121 * or -1 on input failure. Character class conversion returns 0 on match 122 * failure. 123 */ 124 125 static __inline int 126 convert_char(FILE *fp, char * mbp, int width, locale_t locale) 127 { 128 mbstate_t mbs; 129 size_t nconv; 130 wint_t wi; 131 int n; 132 133 n = 0; 134 mbs = initial_mbs; 135 while (width-- != 0 && (wi = __fgetwc(fp, locale)) != WEOF) { 136 if (mbp != SUPPRESS_PTR) { 137 nconv = wcrtomb(mbp, wi, &mbs); 138 if (nconv == (size_t)-1) 139 return (-1); 140 mbp += nconv; 141 } 142 n++; 143 } 144 if (n == 0) 145 return (-1); 146 return (n); 147 } 148 149 static __inline int 150 convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale) 151 { 152 wint_t wi; 153 int n; 154 155 n = 0; 156 while (width-- != 0 && (wi = __fgetwc(fp, locale)) != WEOF) { 157 if (wcp != SUPPRESS_PTR) 158 *wcp++ = (wchar_t)wi; 159 n++; 160 } 161 if (n == 0) 162 return (-1); 163 return (n); 164 } 165 166 static __inline int 167 convert_ccl(FILE *fp, char * mbp, int width, const struct ccl *ccl, 168 locale_t locale) 169 { 170 mbstate_t mbs; 171 size_t nconv; 172 wint_t wi; 173 int n; 174 175 n = 0; 176 mbs = initial_mbs; 177 while ((wi = __fgetwc(fp, locale)) != WEOF && 178 width-- != 0 && inccl(ccl, wi)) { 179 if (mbp != SUPPRESS_PTR) { 180 nconv = wcrtomb(mbp, wi, &mbs); 181 if (nconv == (size_t)-1) 182 return (-1); 183 mbp += nconv; 184 } 185 n++; 186 } 187 if (wi != WEOF) 188 __ungetwc(wi, fp, locale); 189 if (mbp != SUPPRESS_PTR) 190 *mbp = 0; 191 return (n); 192 } 193 194 static __inline int 195 convert_wccl(FILE *fp, wchar_t *wcp, int width, const struct ccl *ccl, 196 locale_t locale) 197 { 198 wchar_t *wcp0; 199 wint_t wi; 200 int n; 201 202 if (wcp == SUPPRESS_PTR) { 203 n = 0; 204 while ((wi = __fgetwc(fp, locale)) != WEOF && 205 width-- != 0 && inccl(ccl, wi)) 206 n++; 207 if (wi != WEOF) 208 __ungetwc(wi, fp, locale); 209 } else { 210 wcp0 = wcp; 211 while ((wi = __fgetwc(fp, locale)) != WEOF && 212 width-- != 0 && inccl(ccl, wi)) 213 *wcp++ = (wchar_t)wi; 214 if (wi != WEOF) 215 __ungetwc(wi, fp, locale); 216 n = wcp - wcp0; 217 if (n == 0) 218 return (0); 219 *wcp = 0; 220 } 221 return (n); 222 } 223 224 static __inline int 225 convert_string(FILE *fp, char * mbp, int width, locale_t locale) 226 { 227 mbstate_t mbs; 228 size_t nconv; 229 wint_t wi; 230 int nread; 231 232 mbs = initial_mbs; 233 nread = 0; 234 while ((wi = __fgetwc(fp, locale)) != WEOF && width-- != 0 && 235 !iswspace(wi)) { 236 if (mbp != SUPPRESS_PTR) { 237 nconv = wcrtomb(mbp, wi, &mbs); 238 if (nconv == (size_t)-1) 239 return (-1); 240 mbp += nconv; 241 } 242 nread++; 243 } 244 if (wi != WEOF) 245 __ungetwc(wi, fp, locale); 246 if (mbp != SUPPRESS_PTR) 247 *mbp = 0; 248 return (nread); 249 } 250 251 static __inline int 252 convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale) 253 { 254 wint_t wi; 255 int nread; 256 257 nread = 0; 258 if (wcp == SUPPRESS_PTR) { 259 while ((wi = __fgetwc(fp, locale)) != WEOF && 260 width-- != 0 && !iswspace(wi)) 261 nread++; 262 if (wi != WEOF) 263 __ungetwc(wi, fp, locale); 264 } else { 265 while ((wi = __fgetwc(fp, locale)) != WEOF && 266 width-- != 0 && !iswspace(wi)) { 267 *wcp++ = (wchar_t)wi; 268 nread++; 269 } 270 if (wi != WEOF) 271 __ungetwc(wi, fp, locale); 272 *wcp = '\0'; 273 } 274 return (nread); 275 } 276 277 enum parseint_state { 278 begin, 279 havesign, 280 havezero, 281 haveprefix, 282 any, 283 }; 284 285 static __inline int 286 parseint_fsm(wchar_t c, enum parseint_state *state, int *base) 287 { 288 switch (c) { 289 case '+': 290 case '-': 291 if (*state == begin) { 292 *state = havesign; 293 return 1; 294 } 295 break; 296 case '0': 297 if (*state == begin || *state == havesign) { 298 *state = havezero; 299 return 1; 300 } 301 /* FALL THROUGH */ 302 case '1': 303 case '2': 304 case '3': 305 case '4': 306 case '5': 307 case '6': 308 case '7': 309 if (*state == havezero && *base == 0) { 310 *base = 8; 311 } 312 /* FALL THROUGH */ 313 case '8': 314 case '9': 315 if (*state == begin || 316 *state == havesign) { 317 if (*base == 0) { 318 *base = 10; 319 } 320 } 321 if (*state == begin || 322 *state == havesign || 323 *state == havezero || 324 *state == haveprefix || 325 *state == any) { 326 if (*base > c - '0') { 327 *state = any; 328 return 1; 329 } 330 } 331 break; 332 case 'b': 333 if (*state == havezero) { 334 if (*base == 0 || *base == 2) { 335 *state = haveprefix; 336 *base = 2; 337 return 1; 338 } 339 } 340 /* FALL THROUGH */ 341 case 'a': 342 case 'c': 343 case 'd': 344 case 'e': 345 case 'f': 346 if (*state == begin || 347 *state == havesign || 348 *state == havezero || 349 *state == haveprefix || 350 *state == any) { 351 if (*base > c - 'a' + 10) { 352 *state = any; 353 return 1; 354 } 355 } 356 break; 357 case 'B': 358 if (*state == havezero) { 359 if (*base == 0 || *base == 2) { 360 *state = haveprefix; 361 *base = 2; 362 return 1; 363 } 364 } 365 /* FALL THROUGH */ 366 case 'A': 367 case 'C': 368 case 'D': 369 case 'E': 370 case 'F': 371 if (*state == begin || 372 *state == havesign || 373 *state == havezero || 374 *state == haveprefix || 375 *state == any) { 376 if (*base > c - 'A' + 10) { 377 *state = any; 378 return 1; 379 } 380 } 381 break; 382 case 'x': 383 case 'X': 384 if (*state == havezero) { 385 if (*base == 0 || *base == 16) { 386 *state = haveprefix; 387 *base = 16; 388 return 1; 389 } 390 } 391 break; 392 } 393 return 0; 394 } 395 396 /* 397 * Read an integer, storing it in buf. 398 * 399 * Return 0 on a match failure, and the number of characters read 400 * otherwise. 401 */ 402 static __inline int 403 parseint(FILE *fp, wchar_t * __restrict buf, int width, int base, 404 locale_t locale) 405 { 406 enum parseint_state state = begin; 407 wchar_t *wcp; 408 int c; 409 410 for (wcp = buf; width; width--) { 411 c = __fgetwc(fp, locale); 412 if (c == WEOF) 413 break; 414 if (!parseint_fsm(c, &state, &base)) 415 break; 416 *wcp++ = (wchar_t)c; 417 } 418 /* 419 * If we only had a sign, push it back. If we only had a 0b or 0x 420 * prefix (possibly preceded by a sign), we view it as "0" and 421 * push back the letter. In all other cases, if we stopped 422 * because we read a non-number character, push it back. 423 */ 424 if (state == havesign) { 425 wcp--; 426 __ungetwc(*wcp, fp, locale); 427 } else if (state == haveprefix) { 428 wcp--; 429 __ungetwc(c, fp, locale); 430 } else if (width && c != WEOF) { 431 __ungetwc(c, fp, locale); 432 } 433 return (wcp - buf); 434 } 435 436 /* 437 * MT-safe version. 438 */ 439 int 440 vfwscanf_l(FILE * __restrict fp, locale_t locale, 441 const wchar_t * __restrict fmt, va_list ap) 442 { 443 int ret; 444 FIX_LOCALE(locale); 445 446 FLOCKFILE_CANCELSAFE(fp); 447 ORIENT(fp, 1); 448 ret = __vfwscanf(fp, locale, fmt, ap); 449 FUNLOCKFILE_CANCELSAFE(); 450 return (ret); 451 } 452 int 453 vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap) 454 { 455 return vfwscanf_l(fp, __get_locale(), fmt, ap); 456 } 457 458 /* 459 * Non-MT-safe version. 460 */ 461 int 462 __vfwscanf(FILE * __restrict fp, locale_t locale, 463 const wchar_t * __restrict fmt, va_list ap) 464 { 465 #define GETARG(type) ((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type)) 466 wint_t c; /* character from format, or conversion */ 467 size_t width; /* field width, or 0 */ 468 int flags; /* flags as defined above */ 469 int nassigned; /* number of fields assigned */ 470 int nconversions; /* number of conversions */ 471 int nr; /* characters read by the current conversion */ 472 int nread; /* number of characters consumed from fp */ 473 int base; /* base argument to conversion function */ 474 struct ccl ccl; /* character class info */ 475 wchar_t buf[BUF]; /* buffer for numeric conversions */ 476 wint_t wi; /* handy wint_t */ 477 478 nassigned = 0; 479 nconversions = 0; 480 nread = 0; 481 ccl.start = ccl.end = NULL; 482 for (;;) { 483 c = *fmt++; 484 if (c == 0) 485 return (nassigned); 486 if (iswspace(c)) { 487 while ((c = __fgetwc(fp, locale)) != WEOF && 488 iswspace_l(c, locale)) 489 nread++; 490 if (c != WEOF) 491 __ungetwc(c, fp, locale); 492 continue; 493 } 494 if (c != '%') 495 goto literal; 496 width = 0; 497 flags = 0; 498 /* 499 * switch on the format. continue if done; 500 * break once format type is derived. 501 */ 502 again: c = *fmt++; 503 switch (c) { 504 case '%': 505 literal: 506 if ((wi = __fgetwc(fp, locale)) == WEOF) 507 goto input_failure; 508 if (wi != c) { 509 __ungetwc(wi, fp, locale); 510 goto match_failure; 511 } 512 nread++; 513 continue; 514 515 case '*': 516 flags |= SUPPRESS; 517 goto again; 518 case 'j': 519 flags |= INTMAXT; 520 goto again; 521 case 'l': 522 if (flags & LONG) { 523 flags &= ~LONG; 524 flags |= LONGLONG; 525 } else 526 flags |= LONG; 527 goto again; 528 case 'q': 529 flags |= LONGLONG; /* not quite */ 530 goto again; 531 case 't': 532 flags |= PTRDIFFT; 533 goto again; 534 case 'w': 535 /* 536 * Fixed-width integer types. On all platforms we 537 * support, int8_t is equivalent to char, int16_t 538 * is equivalent to short, int32_t is equivalent 539 * to int, int64_t is equivalent to long long int. 540 * Furthermore, int_fast8_t, int_fast16_t and 541 * int_fast32_t are equivalent to int, and 542 * int_fast64_t is equivalent to long long int. 543 */ 544 flags &= ~(SHORTSHORT|SHORT|LONG|LONGLONG|SIZET|INTMAXT|PTRDIFFT); 545 if (fmt[0] == 'f') { 546 flags |= FASTINT; 547 fmt++; 548 } else { 549 flags &= ~FASTINT; 550 } 551 if (fmt[0] == '8') { 552 if (!(flags & FASTINT)) 553 flags |= SHORTSHORT; 554 else 555 /* no flag set = 32 */ ; 556 fmt += 1; 557 } else if (fmt[0] == '1' && fmt[1] == '6') { 558 if (!(flags & FASTINT)) 559 flags |= SHORT; 560 else 561 /* no flag set = 32 */ ; 562 fmt += 2; 563 } else if (fmt[0] == '3' && fmt[1] == '2') { 564 /* no flag set = 32 */ ; 565 fmt += 2; 566 } else if (fmt[0] == '6' && fmt[1] == '4') { 567 flags |= LONGLONG; 568 fmt += 2; 569 } else { 570 goto match_failure; 571 } 572 goto again; 573 case 'z': 574 flags |= SIZET; 575 goto again; 576 case 'L': 577 flags |= LONGDBL; 578 goto again; 579 case 'h': 580 if (flags & SHORT) { 581 flags &= ~SHORT; 582 flags |= SHORTSHORT; 583 } else 584 flags |= SHORT; 585 goto again; 586 587 case '0': case '1': case '2': case '3': case '4': 588 case '5': case '6': case '7': case '8': case '9': 589 width = width * 10 + c - '0'; 590 goto again; 591 592 /* 593 * Conversions. 594 */ 595 case 'B': 596 case 'b': 597 c = CT_INT; 598 flags |= UNSIGNED; 599 base = 2; 600 break; 601 602 case 'd': 603 c = CT_INT; 604 base = 10; 605 break; 606 607 case 'i': 608 c = CT_INT; 609 base = 0; 610 break; 611 612 case 'o': 613 c = CT_INT; 614 flags |= UNSIGNED; 615 base = 8; 616 break; 617 618 case 'u': 619 c = CT_INT; 620 flags |= UNSIGNED; 621 base = 10; 622 break; 623 624 case 'X': 625 case 'x': 626 c = CT_INT; 627 flags |= UNSIGNED; 628 base = 16; 629 break; 630 631 case 'A': case 'E': case 'F': case 'G': 632 case 'a': case 'e': case 'f': case 'g': 633 c = CT_FLOAT; 634 break; 635 636 case 'S': 637 flags |= LONG; 638 /* FALLTHROUGH */ 639 case 's': 640 c = CT_STRING; 641 break; 642 643 case '[': 644 ccl.start = fmt; 645 if (*fmt == '^') { 646 ccl.compl = 1; 647 fmt++; 648 } else 649 ccl.compl = 0; 650 if (*fmt == ']') 651 fmt++; 652 while (*fmt != '\0' && *fmt != ']') 653 fmt++; 654 ccl.end = fmt; 655 fmt++; 656 flags |= NOSKIP; 657 c = CT_CCL; 658 break; 659 660 case 'C': 661 flags |= LONG; 662 /* FALLTHROUGH */ 663 case 'c': 664 flags |= NOSKIP; 665 c = CT_CHAR; 666 break; 667 668 case 'p': /* pointer format is like hex */ 669 flags |= POINTER; 670 c = CT_INT; /* assumes sizeof(uintmax_t) */ 671 flags |= UNSIGNED; /* >= sizeof(uintptr_t) */ 672 base = 16; 673 break; 674 675 case 'n': 676 if (flags & SUPPRESS) /* ??? */ 677 continue; 678 if (flags & SHORTSHORT) 679 *va_arg(ap, char *) = nread; 680 else if (flags & SHORT) 681 *va_arg(ap, short *) = nread; 682 else if (flags & LONG) 683 *va_arg(ap, long *) = nread; 684 else if (flags & LONGLONG) 685 *va_arg(ap, long long *) = nread; 686 else if (flags & INTMAXT) 687 *va_arg(ap, intmax_t *) = nread; 688 else if (flags & SIZET) 689 *va_arg(ap, size_t *) = nread; 690 else if (flags & PTRDIFFT) 691 *va_arg(ap, ptrdiff_t *) = nread; 692 else 693 *va_arg(ap, int *) = nread; 694 continue; 695 696 default: 697 goto match_failure; 698 699 /* 700 * Disgusting backwards compatibility hack. XXX 701 */ 702 case '\0': /* compat */ 703 return (EOF); 704 } 705 706 /* 707 * Consume leading white space, except for formats 708 * that suppress this. 709 */ 710 if ((flags & NOSKIP) == 0) { 711 while ((wi = __fgetwc(fp, locale)) != WEOF && iswspace(wi)) 712 nread++; 713 if (wi == WEOF) 714 goto input_failure; 715 __ungetwc(wi, fp, locale); 716 } 717 718 /* 719 * Do the conversion. 720 */ 721 switch (c) { 722 723 case CT_CHAR: 724 /* scan arbitrary characters (sets NOSKIP) */ 725 if (width == 0) 726 width = 1; 727 if (flags & LONG) { 728 nr = convert_wchar(fp, GETARG(wchar_t *), width, 729 locale); 730 } else { 731 nr = convert_char(fp, GETARG(char *), width, 732 locale); 733 } 734 if (nr < 0) 735 goto input_failure; 736 break; 737 738 case CT_CCL: 739 /* scan a (nonempty) character class (sets NOSKIP) */ 740 if (width == 0) 741 width = (size_t)~0; /* `infinity' */ 742 /* take only those things in the class */ 743 if (flags & LONG) { 744 nr = convert_wccl(fp, GETARG(wchar_t *), width, 745 &ccl, locale); 746 } else { 747 nr = convert_ccl(fp, GETARG(char *), width, 748 &ccl, locale); 749 } 750 if (nr <= 0) { 751 if (nr < 0) 752 goto input_failure; 753 else /* nr == 0 */ 754 goto match_failure; 755 } 756 break; 757 758 case CT_STRING: 759 /* like CCL, but zero-length string OK, & no NOSKIP */ 760 if (width == 0) 761 width = (size_t)~0; 762 if (flags & LONG) { 763 nr = convert_wstring(fp, GETARG(wchar_t *), 764 width, locale); 765 } else { 766 nr = convert_string(fp, GETARG(char *), width, 767 locale); 768 } 769 if (nr < 0) 770 goto input_failure; 771 break; 772 773 case CT_INT: 774 /* scan an integer as if by the conversion function */ 775 if (width == 0 || width > sizeof(buf) / 776 sizeof(*buf) - 1) 777 width = sizeof(buf) / sizeof(*buf) - 1; 778 779 nr = parseint(fp, buf, width, base, locale); 780 if (nr == 0) 781 goto match_failure; 782 if ((flags & SUPPRESS) == 0) { 783 uintmax_t res; 784 785 buf[nr] = L'\0'; 786 if ((flags & UNSIGNED) == 0) 787 res = wcstoimax(buf, NULL, base); 788 else 789 res = wcstoumax(buf, NULL, base); 790 if (flags & POINTER) 791 *va_arg(ap, void **) = 792 (void *)(uintptr_t)res; 793 else if (flags & SHORTSHORT) 794 *va_arg(ap, char *) = res; 795 else if (flags & SHORT) 796 *va_arg(ap, short *) = res; 797 else if (flags & LONG) 798 *va_arg(ap, long *) = res; 799 else if (flags & LONGLONG) 800 *va_arg(ap, long long *) = res; 801 else if (flags & INTMAXT) 802 *va_arg(ap, intmax_t *) = res; 803 else if (flags & PTRDIFFT) 804 *va_arg(ap, ptrdiff_t *) = res; 805 else if (flags & SIZET) 806 *va_arg(ap, size_t *) = res; 807 else 808 *va_arg(ap, int *) = res; 809 } 810 break; 811 812 case CT_FLOAT: 813 /* scan a floating point number as if by strtod */ 814 if (width == 0 || width > sizeof(buf) / 815 sizeof(*buf) - 1) 816 width = sizeof(buf) / sizeof(*buf) - 1; 817 nr = parsefloat(fp, buf, buf + width, locale); 818 if (nr == 0) 819 goto match_failure; 820 if ((flags & SUPPRESS) == 0) { 821 if (flags & LONGDBL) { 822 long double res = wcstold(buf, NULL); 823 *va_arg(ap, long double *) = res; 824 } else if (flags & LONG) { 825 double res = wcstod(buf, NULL); 826 *va_arg(ap, double *) = res; 827 } else { 828 float res = wcstof(buf, NULL); 829 *va_arg(ap, float *) = res; 830 } 831 } 832 break; 833 } 834 if (!(flags & SUPPRESS)) 835 nassigned++; 836 nread += nr; 837 nconversions++; 838 } 839 input_failure: 840 return (nconversions != 0 ? nassigned : EOF); 841 match_failure: 842 return (nassigned); 843 } 844 845 static int 846 parsefloat(FILE *fp, wchar_t *buf, wchar_t *end, locale_t locale) 847 { 848 mbstate_t mbs; 849 size_t nconv; 850 wchar_t *commit, *p; 851 int infnanpos = 0; 852 enum { 853 S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX, 854 S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS 855 } state = S_START; 856 wchar_t c; 857 wchar_t decpt; 858 _Bool gotmantdig = 0, ishex = 0; 859 860 mbs = initial_mbs; 861 nconv = mbrtowc(&decpt, localeconv()->decimal_point, MB_CUR_MAX, &mbs); 862 if (nconv == (size_t)-1 || nconv == (size_t)-2) 863 decpt = '.'; /* failsafe */ 864 865 /* 866 * We set commit = p whenever the string we have read so far 867 * constitutes a valid representation of a floating point 868 * number by itself. At some point, the parse will complete 869 * or fail, and we will ungetc() back to the last commit point. 870 * To ensure that the file offset gets updated properly, it is 871 * always necessary to read at least one character that doesn't 872 * match; thus, we can't short-circuit "infinity" or "nan(...)". 873 */ 874 commit = buf - 1; 875 c = WEOF; 876 for (p = buf; p < end; ) { 877 if ((c = __fgetwc(fp, locale)) == WEOF) 878 break; 879 reswitch: 880 switch (state) { 881 case S_START: 882 state = S_GOTSIGN; 883 if (c == '-' || c == '+') 884 break; 885 else 886 goto reswitch; 887 case S_GOTSIGN: 888 switch (c) { 889 case '0': 890 state = S_MAYBEHEX; 891 commit = p; 892 break; 893 case 'I': 894 case 'i': 895 state = S_INF; 896 break; 897 case 'N': 898 case 'n': 899 state = S_NAN; 900 break; 901 default: 902 state = S_DIGITS; 903 goto reswitch; 904 } 905 break; 906 case S_INF: 907 if (infnanpos > 6 || 908 (c != "nfinity"[infnanpos] && 909 c != "NFINITY"[infnanpos])) 910 goto parsedone; 911 if (infnanpos == 1 || infnanpos == 6) 912 commit = p; /* inf or infinity */ 913 infnanpos++; 914 break; 915 case S_NAN: 916 switch (infnanpos) { 917 case 0: 918 if (c != 'A' && c != 'a') 919 goto parsedone; 920 break; 921 case 1: 922 if (c != 'N' && c != 'n') 923 goto parsedone; 924 else 925 commit = p; 926 break; 927 case 2: 928 if (c != '(') 929 goto parsedone; 930 break; 931 default: 932 if (c == ')') { 933 commit = p; 934 state = S_DONE; 935 } else if (!iswalnum(c) && c != '_') 936 goto parsedone; 937 break; 938 } 939 infnanpos++; 940 break; 941 case S_DONE: 942 goto parsedone; 943 case S_MAYBEHEX: 944 state = S_DIGITS; 945 if (c == 'X' || c == 'x') { 946 ishex = 1; 947 break; 948 } else { /* we saw a '0', but no 'x' */ 949 gotmantdig = 1; 950 goto reswitch; 951 } 952 case S_DIGITS: 953 if ((ishex && iswxdigit(c)) || iswdigit(c)) 954 gotmantdig = 1; 955 else { 956 state = S_FRAC; 957 if (c != decpt) 958 goto reswitch; 959 } 960 if (gotmantdig) 961 commit = p; 962 break; 963 case S_FRAC: 964 if (((c == 'E' || c == 'e') && !ishex) || 965 ((c == 'P' || c == 'p') && ishex)) { 966 if (!gotmantdig) 967 goto parsedone; 968 else 969 state = S_EXP; 970 } else if ((ishex && iswxdigit(c)) || iswdigit(c)) { 971 commit = p; 972 gotmantdig = 1; 973 } else 974 goto parsedone; 975 break; 976 case S_EXP: 977 state = S_EXPDIGITS; 978 if (c == '-' || c == '+') 979 break; 980 else 981 goto reswitch; 982 case S_EXPDIGITS: 983 if (iswdigit(c)) 984 commit = p; 985 else 986 goto parsedone; 987 break; 988 default: 989 abort(); 990 } 991 *p++ = c; 992 c = WEOF; 993 } 994 995 parsedone: 996 if (c != WEOF) 997 __ungetwc(c, fp, locale); 998 while (commit < --p) 999 __ungetwc(*p, fp, locale); 1000 *++commit = '\0'; 1001 return (commit - buf); 1002 } 1003