1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Chris Torek.
9 *
10 * Copyright (c) 2011 The FreeBSD Foundation
11 *
12 * Copyright (c) 2023 Dag-Erling Smørgrav
13 *
14 * Portions of this software were developed by David Chisnall
15 * under sponsorship from the FreeBSD Foundation.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
19 * are met:
20 * 1. Redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution.
25 * 3. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 */
41
42 #include "namespace.h"
43 #include <ctype.h>
44 #include <inttypes.h>
45 #include <limits.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <stddef.h>
49 #include <stdarg.h>
50 #include <string.h>
51 #include <wchar.h>
52 #include <wctype.h>
53 #include "un-namespace.h"
54
55 #include "libc_private.h"
56 #include "local.h"
57 #include "xlocale_private.h"
58
59 #define BUF 513 /* Maximum length of numeric string. */
60
61 /*
62 * Flags used during conversion.
63 */
64 #define LONG 0x01 /* l: long or double */
65 #define LONGDBL 0x02 /* L: long double */
66 #define SHORT 0x04 /* h: short */
67 #define SUPPRESS 0x08 /* *: suppress assignment */
68 #define POINTER 0x10 /* p: void * (as hex) */
69 #define NOSKIP 0x20 /* [ or c: do not skip blanks */
70 #define FASTINT 0x200 /* wfN: int_fastN_t */
71 #define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */
72 #define INTMAXT 0x800 /* j: intmax_t */
73 #define PTRDIFFT 0x1000 /* t: ptrdiff_t */
74 #define SIZET 0x2000 /* z: size_t */
75 #define SHORTSHORT 0x4000 /* hh: char */
76 #define UNSIGNED 0x8000 /* %[oupxX] conversions */
77
78 /*
79 * Conversion types.
80 */
81 #define CT_CHAR 0 /* %c conversion */
82 #define CT_CCL 1 /* %[...] conversion */
83 #define CT_STRING 2 /* %s conversion */
84 #define CT_INT 3 /* %[dioupxX] conversion */
85 #define CT_FLOAT 4 /* %[efgEFG] conversion */
86
87 #ifndef NO_FLOATING_POINT
88 static int parsefloat(FILE *, wchar_t *, wchar_t *, locale_t);
89 #endif
90
91 struct ccl {
92 const wchar_t *start; /* character class start */
93 const wchar_t *end; /* character class end */
94 int compl; /* ccl is complemented? */
95 };
96
97 static __inline int
inccl(const struct ccl * ccl,wint_t wi)98 inccl(const struct ccl *ccl, wint_t wi)
99 {
100
101 if (ccl->compl) {
102 return (wmemchr(ccl->start, wi, ccl->end - ccl->start)
103 == NULL);
104 } else {
105 return (wmemchr(ccl->start, wi, ccl->end - ccl->start) != NULL);
106 }
107 }
108
109 /*
110 * Conversion functions are passed a pointer to this object instead of
111 * a real parameter to indicate that the assignment-suppression (*)
112 * flag was specified. We could use a NULL pointer to indicate this,
113 * but that would mask bugs in applications that call scanf() with a
114 * NULL pointer.
115 */
116 static const int suppress;
117 #define SUPPRESS_PTR ((void *)&suppress)
118
119 static const mbstate_t initial_mbs;
120
121 /*
122 * The following conversion functions return the number of characters consumed,
123 * or -1 on input failure. Character class conversion returns 0 on match
124 * failure.
125 */
126
127 static __inline int
convert_char(FILE * fp,char * mbp,int width,locale_t locale)128 convert_char(FILE *fp, char * mbp, int width, locale_t locale)
129 {
130 mbstate_t mbs;
131 size_t nconv;
132 wint_t wi;
133 int n;
134
135 n = 0;
136 mbs = initial_mbs;
137 while (width-- != 0 && (wi = __fgetwc(fp, locale)) != WEOF) {
138 if (mbp != SUPPRESS_PTR) {
139 nconv = wcrtomb(mbp, wi, &mbs);
140 if (nconv == (size_t)-1)
141 return (-1);
142 mbp += nconv;
143 }
144 n++;
145 }
146 if (n == 0)
147 return (-1);
148 return (n);
149 }
150
151 static __inline int
convert_wchar(FILE * fp,wchar_t * wcp,int width,locale_t locale)152 convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale)
153 {
154 wint_t wi;
155 int n;
156
157 n = 0;
158 while (width-- != 0 && (wi = __fgetwc(fp, locale)) != WEOF) {
159 if (wcp != SUPPRESS_PTR)
160 *wcp++ = (wchar_t)wi;
161 n++;
162 }
163 if (n == 0)
164 return (-1);
165 return (n);
166 }
167
168 static __inline int
convert_ccl(FILE * fp,char * mbp,int width,const struct ccl * ccl,locale_t locale)169 convert_ccl(FILE *fp, char * mbp, int width, const struct ccl *ccl,
170 locale_t locale)
171 {
172 mbstate_t mbs;
173 size_t nconv;
174 wint_t wi;
175 int n;
176
177 n = 0;
178 mbs = initial_mbs;
179 while ((wi = __fgetwc(fp, locale)) != WEOF &&
180 width-- != 0 && inccl(ccl, wi)) {
181 if (mbp != SUPPRESS_PTR) {
182 nconv = wcrtomb(mbp, wi, &mbs);
183 if (nconv == (size_t)-1)
184 return (-1);
185 mbp += nconv;
186 }
187 n++;
188 }
189 if (wi != WEOF)
190 __ungetwc(wi, fp, locale);
191 if (mbp != SUPPRESS_PTR)
192 *mbp = 0;
193 return (n);
194 }
195
196 static __inline int
convert_wccl(FILE * fp,wchar_t * wcp,int width,const struct ccl * ccl,locale_t locale)197 convert_wccl(FILE *fp, wchar_t *wcp, int width, const struct ccl *ccl,
198 locale_t locale)
199 {
200 wchar_t *wcp0;
201 wint_t wi;
202 int n;
203
204 if (wcp == SUPPRESS_PTR) {
205 n = 0;
206 while ((wi = __fgetwc(fp, locale)) != WEOF &&
207 width-- != 0 && inccl(ccl, wi))
208 n++;
209 if (wi != WEOF)
210 __ungetwc(wi, fp, locale);
211 } else {
212 wcp0 = wcp;
213 while ((wi = __fgetwc(fp, locale)) != WEOF &&
214 width-- != 0 && inccl(ccl, wi))
215 *wcp++ = (wchar_t)wi;
216 if (wi != WEOF)
217 __ungetwc(wi, fp, locale);
218 n = wcp - wcp0;
219 if (n == 0)
220 return (0);
221 *wcp = 0;
222 }
223 return (n);
224 }
225
226 static __inline int
convert_string(FILE * fp,char * mbp,int width,locale_t locale)227 convert_string(FILE *fp, char * mbp, int width, locale_t locale)
228 {
229 mbstate_t mbs;
230 size_t nconv;
231 wint_t wi;
232 int nread;
233
234 mbs = initial_mbs;
235 nread = 0;
236 while ((wi = __fgetwc(fp, locale)) != WEOF && width-- != 0 &&
237 !iswspace(wi)) {
238 if (mbp != SUPPRESS_PTR) {
239 nconv = wcrtomb(mbp, wi, &mbs);
240 if (nconv == (size_t)-1)
241 return (-1);
242 mbp += nconv;
243 }
244 nread++;
245 }
246 if (wi != WEOF)
247 __ungetwc(wi, fp, locale);
248 if (mbp != SUPPRESS_PTR)
249 *mbp = 0;
250 return (nread);
251 }
252
253 static __inline int
convert_wstring(FILE * fp,wchar_t * wcp,int width,locale_t locale)254 convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
255 {
256 wint_t wi;
257 int nread;
258
259 nread = 0;
260 if (wcp == SUPPRESS_PTR) {
261 while ((wi = __fgetwc(fp, locale)) != WEOF &&
262 width-- != 0 && !iswspace(wi))
263 nread++;
264 if (wi != WEOF)
265 __ungetwc(wi, fp, locale);
266 } else {
267 while ((wi = __fgetwc(fp, locale)) != WEOF &&
268 width-- != 0 && !iswspace(wi)) {
269 *wcp++ = (wchar_t)wi;
270 nread++;
271 }
272 if (wi != WEOF)
273 __ungetwc(wi, fp, locale);
274 *wcp = '\0';
275 }
276 return (nread);
277 }
278
279 enum parseint_state {
280 begin,
281 havesign,
282 havezero,
283 haveprefix,
284 any,
285 };
286
287 static __inline int
parseint_fsm(wchar_t c,enum parseint_state * state,int * base)288 parseint_fsm(wchar_t c, enum parseint_state *state, int *base)
289 {
290 switch (c) {
291 case '+':
292 case '-':
293 if (*state == begin) {
294 *state = havesign;
295 return 1;
296 }
297 break;
298 case '0':
299 if (*state == begin || *state == havesign) {
300 *state = havezero;
301 } else {
302 *state = any;
303 }
304 return 1;
305 case '1':
306 case '2':
307 case '3':
308 case '4':
309 case '5':
310 case '6':
311 case '7':
312 if (*state == havezero && *base == 0) {
313 *base = 8;
314 }
315 /* FALL THROUGH */
316 case '8':
317 case '9':
318 if (*state == begin ||
319 *state == havesign) {
320 if (*base == 0) {
321 *base = 10;
322 }
323 }
324 if (*state == begin ||
325 *state == havesign ||
326 *state == havezero ||
327 *state == haveprefix ||
328 *state == any) {
329 if (*base > c - '0') {
330 *state = any;
331 return 1;
332 }
333 }
334 break;
335 case 'b':
336 if (*state == havezero) {
337 if (*base == 0 || *base == 2) {
338 *state = haveprefix;
339 *base = 2;
340 return 1;
341 }
342 }
343 /* FALL THROUGH */
344 case 'a':
345 case 'c':
346 case 'd':
347 case 'e':
348 case 'f':
349 if (*state == begin ||
350 *state == havesign ||
351 *state == havezero ||
352 *state == haveprefix ||
353 *state == any) {
354 if (*base > c - 'a' + 10) {
355 *state = any;
356 return 1;
357 }
358 }
359 break;
360 case 'B':
361 if (*state == havezero) {
362 if (*base == 0 || *base == 2) {
363 *state = haveprefix;
364 *base = 2;
365 return 1;
366 }
367 }
368 /* FALL THROUGH */
369 case 'A':
370 case 'C':
371 case 'D':
372 case 'E':
373 case 'F':
374 if (*state == begin ||
375 *state == havesign ||
376 *state == havezero ||
377 *state == haveprefix ||
378 *state == any) {
379 if (*base > c - 'A' + 10) {
380 *state = any;
381 return 1;
382 }
383 }
384 break;
385 case 'x':
386 case 'X':
387 if (*state == havezero) {
388 if (*base == 0 || *base == 16) {
389 *state = haveprefix;
390 *base = 16;
391 return 1;
392 }
393 }
394 break;
395 }
396 return 0;
397 }
398
399 /*
400 * Read an integer, storing it in buf.
401 *
402 * Return 0 on a match failure, and the number of characters read
403 * otherwise.
404 */
405 static __inline int
parseint(FILE * fp,wchar_t * __restrict buf,int width,int base,locale_t locale)406 parseint(FILE *fp, wchar_t * __restrict buf, int width, int base,
407 locale_t locale)
408 {
409 enum parseint_state state = begin;
410 wchar_t *wcp;
411 int c;
412
413 for (wcp = buf; width; width--) {
414 c = __fgetwc(fp, locale);
415 if (c == WEOF)
416 break;
417 if (!parseint_fsm(c, &state, &base))
418 break;
419 *wcp++ = (wchar_t)c;
420 }
421 /*
422 * If we only had a sign, push it back. If we only had a 0b or 0x
423 * prefix (possibly preceded by a sign), we view it as "0" and
424 * push back the letter. In all other cases, if we stopped
425 * because we read a non-number character, push it back.
426 */
427 if (state == havesign) {
428 wcp--;
429 __ungetwc(*wcp, fp, locale);
430 } else if (state == haveprefix) {
431 wcp--;
432 __ungetwc(c, fp, locale);
433 } else if (width && c != WEOF) {
434 __ungetwc(c, fp, locale);
435 }
436 return (wcp - buf);
437 }
438
439 /*
440 * MT-safe version.
441 */
442 int
vfwscanf_l(FILE * __restrict fp,locale_t locale,const wchar_t * __restrict fmt,va_list ap)443 vfwscanf_l(FILE * __restrict fp, locale_t locale,
444 const wchar_t * __restrict fmt, va_list ap)
445 {
446 int ret;
447 FIX_LOCALE(locale);
448
449 FLOCKFILE_CANCELSAFE(fp);
450 ORIENT(fp, 1);
451 ret = __vfwscanf(fp, locale, fmt, ap);
452 FUNLOCKFILE_CANCELSAFE();
453 return (ret);
454 }
455 int
vfwscanf(FILE * __restrict fp,const wchar_t * __restrict fmt,va_list ap)456 vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap)
457 {
458 return vfwscanf_l(fp, __get_locale(), fmt, ap);
459 }
460
461 /*
462 * Non-MT-safe version.
463 */
464 int
__vfwscanf(FILE * __restrict fp,locale_t locale,const wchar_t * __restrict fmt,va_list ap)465 __vfwscanf(FILE * __restrict fp, locale_t locale,
466 const wchar_t * __restrict fmt, va_list ap)
467 {
468 #define GETARG(type) ((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type))
469 wint_t c; /* character from format, or conversion */
470 size_t width; /* field width, or 0 */
471 int flags; /* flags as defined above */
472 int nassigned; /* number of fields assigned */
473 int nconversions; /* number of conversions */
474 int nr; /* characters read by the current conversion */
475 int nread; /* number of characters consumed from fp */
476 int base; /* base argument to conversion function */
477 struct ccl ccl; /* character class info */
478 wchar_t buf[BUF]; /* buffer for numeric conversions */
479 wint_t wi; /* handy wint_t */
480
481 nassigned = 0;
482 nconversions = 0;
483 nread = 0;
484 ccl.start = ccl.end = NULL;
485 for (;;) {
486 c = *fmt++;
487 if (c == 0)
488 return (nassigned);
489 if (iswspace(c)) {
490 while ((c = __fgetwc(fp, locale)) != WEOF &&
491 iswspace_l(c, locale))
492 nread++;
493 if (c != WEOF)
494 __ungetwc(c, fp, locale);
495 continue;
496 }
497 if (c != '%')
498 goto literal;
499 width = 0;
500 flags = 0;
501 /*
502 * switch on the format. continue if done;
503 * break once format type is derived.
504 */
505 again: c = *fmt++;
506 switch (c) {
507 case '%':
508 literal:
509 if ((wi = __fgetwc(fp, locale)) == WEOF)
510 goto input_failure;
511 if (wi != c) {
512 __ungetwc(wi, fp, locale);
513 goto match_failure;
514 }
515 nread++;
516 continue;
517
518 case '*':
519 flags |= SUPPRESS;
520 goto again;
521 case 'j':
522 flags |= INTMAXT;
523 goto again;
524 case 'l':
525 if (flags & LONG) {
526 flags &= ~LONG;
527 flags |= LONGLONG;
528 } else
529 flags |= LONG;
530 goto again;
531 case 'q':
532 flags |= LONGLONG; /* not quite */
533 goto again;
534 case 't':
535 flags |= PTRDIFFT;
536 goto again;
537 case 'w':
538 /*
539 * Fixed-width integer types. On all platforms we
540 * support, int8_t is equivalent to char, int16_t
541 * is equivalent to short, int32_t is equivalent
542 * to int, int64_t is equivalent to long long int.
543 * Furthermore, int_fast8_t, int_fast16_t and
544 * int_fast32_t are equivalent to int, and
545 * int_fast64_t is equivalent to long long int.
546 */
547 flags &= ~(SHORTSHORT|SHORT|LONG|LONGLONG|SIZET|INTMAXT|PTRDIFFT);
548 if (fmt[0] == 'f') {
549 flags |= FASTINT;
550 fmt++;
551 } else {
552 flags &= ~FASTINT;
553 }
554 if (fmt[0] == '8') {
555 if (!(flags & FASTINT))
556 flags |= SHORTSHORT;
557 else
558 /* no flag set = 32 */ ;
559 fmt += 1;
560 } else if (fmt[0] == '1' && fmt[1] == '6') {
561 if (!(flags & FASTINT))
562 flags |= SHORT;
563 else
564 /* no flag set = 32 */ ;
565 fmt += 2;
566 } else if (fmt[0] == '3' && fmt[1] == '2') {
567 /* no flag set = 32 */ ;
568 fmt += 2;
569 } else if (fmt[0] == '6' && fmt[1] == '4') {
570 flags |= LONGLONG;
571 fmt += 2;
572 } else {
573 goto match_failure;
574 }
575 goto again;
576 case 'z':
577 flags |= SIZET;
578 goto again;
579 case 'L':
580 flags |= LONGDBL;
581 goto again;
582 case 'h':
583 if (flags & SHORT) {
584 flags &= ~SHORT;
585 flags |= SHORTSHORT;
586 } else
587 flags |= SHORT;
588 goto again;
589
590 case '0': case '1': case '2': case '3': case '4':
591 case '5': case '6': case '7': case '8': case '9':
592 width = width * 10 + c - '0';
593 goto again;
594
595 /*
596 * Conversions.
597 */
598 case 'B':
599 case 'b':
600 c = CT_INT;
601 flags |= UNSIGNED;
602 base = 2;
603 break;
604
605 case 'd':
606 c = CT_INT;
607 base = 10;
608 break;
609
610 case 'i':
611 c = CT_INT;
612 base = 0;
613 break;
614
615 case 'o':
616 c = CT_INT;
617 flags |= UNSIGNED;
618 base = 8;
619 break;
620
621 case 'u':
622 c = CT_INT;
623 flags |= UNSIGNED;
624 base = 10;
625 break;
626
627 case 'X':
628 case 'x':
629 c = CT_INT;
630 flags |= UNSIGNED;
631 base = 16;
632 break;
633
634 #ifndef NO_FLOATING_POINT
635 case 'A': case 'E': case 'F': case 'G':
636 case 'a': case 'e': case 'f': case 'g':
637 c = CT_FLOAT;
638 break;
639 #endif
640
641 case 'S':
642 flags |= LONG;
643 /* FALLTHROUGH */
644 case 's':
645 c = CT_STRING;
646 break;
647
648 case '[':
649 ccl.start = fmt;
650 if (*fmt == '^') {
651 ccl.compl = 1;
652 fmt++;
653 } else
654 ccl.compl = 0;
655 if (*fmt == ']')
656 fmt++;
657 while (*fmt != '\0' && *fmt != ']')
658 fmt++;
659 ccl.end = fmt;
660 fmt++;
661 flags |= NOSKIP;
662 c = CT_CCL;
663 break;
664
665 case 'C':
666 flags |= LONG;
667 /* FALLTHROUGH */
668 case 'c':
669 flags |= NOSKIP;
670 c = CT_CHAR;
671 break;
672
673 case 'p': /* pointer format is like hex */
674 flags |= POINTER;
675 c = CT_INT; /* assumes sizeof(uintmax_t) */
676 flags |= UNSIGNED; /* >= sizeof(uintptr_t) */
677 base = 16;
678 break;
679
680 case 'n':
681 if (flags & SUPPRESS) /* ??? */
682 continue;
683 if (flags & SHORTSHORT)
684 *va_arg(ap, char *) = nread;
685 else if (flags & SHORT)
686 *va_arg(ap, short *) = nread;
687 else if (flags & LONG)
688 *va_arg(ap, long *) = nread;
689 else if (flags & LONGLONG)
690 *va_arg(ap, long long *) = nread;
691 else if (flags & INTMAXT)
692 *va_arg(ap, intmax_t *) = nread;
693 else if (flags & SIZET)
694 *va_arg(ap, size_t *) = nread;
695 else if (flags & PTRDIFFT)
696 *va_arg(ap, ptrdiff_t *) = nread;
697 else
698 *va_arg(ap, int *) = nread;
699 continue;
700
701 default:
702 goto match_failure;
703
704 /*
705 * Disgusting backwards compatibility hack. XXX
706 */
707 case '\0': /* compat */
708 return (EOF);
709 }
710
711 /*
712 * Consume leading white space, except for formats
713 * that suppress this.
714 */
715 if ((flags & NOSKIP) == 0) {
716 while ((wi = __fgetwc(fp, locale)) != WEOF && iswspace(wi))
717 nread++;
718 if (wi == WEOF)
719 goto input_failure;
720 __ungetwc(wi, fp, locale);
721 }
722
723 /*
724 * Do the conversion.
725 */
726 switch (c) {
727
728 case CT_CHAR:
729 /* scan arbitrary characters (sets NOSKIP) */
730 if (width == 0)
731 width = 1;
732 if (flags & LONG) {
733 nr = convert_wchar(fp, GETARG(wchar_t *), width,
734 locale);
735 } else {
736 nr = convert_char(fp, GETARG(char *), width,
737 locale);
738 }
739 if (nr < 0)
740 goto input_failure;
741 break;
742
743 case CT_CCL:
744 /* scan a (nonempty) character class (sets NOSKIP) */
745 if (width == 0)
746 width = (size_t)~0; /* `infinity' */
747 /* take only those things in the class */
748 if (flags & LONG) {
749 nr = convert_wccl(fp, GETARG(wchar_t *), width,
750 &ccl, locale);
751 } else {
752 nr = convert_ccl(fp, GETARG(char *), width,
753 &ccl, locale);
754 }
755 if (nr <= 0) {
756 if (nr < 0)
757 goto input_failure;
758 else /* nr == 0 */
759 goto match_failure;
760 }
761 break;
762
763 case CT_STRING:
764 /* like CCL, but zero-length string OK, & no NOSKIP */
765 if (width == 0)
766 width = (size_t)~0;
767 if (flags & LONG) {
768 nr = convert_wstring(fp, GETARG(wchar_t *),
769 width, locale);
770 } else {
771 nr = convert_string(fp, GETARG(char *), width,
772 locale);
773 }
774 if (nr < 0)
775 goto input_failure;
776 break;
777
778 case CT_INT:
779 /* scan an integer as if by the conversion function */
780 if (width == 0 || width > sizeof(buf) /
781 sizeof(*buf) - 1)
782 width = sizeof(buf) / sizeof(*buf) - 1;
783
784 nr = parseint(fp, buf, width, base, locale);
785 if (nr == 0)
786 goto match_failure;
787 if ((flags & SUPPRESS) == 0) {
788 uintmax_t res;
789
790 buf[nr] = L'\0';
791 if ((flags & UNSIGNED) == 0)
792 res = wcstoimax(buf, NULL, base);
793 else
794 res = wcstoumax(buf, NULL, base);
795 if (flags & POINTER)
796 *va_arg(ap, void **) =
797 (void *)(uintptr_t)res;
798 else if (flags & SHORTSHORT)
799 *va_arg(ap, char *) = res;
800 else if (flags & SHORT)
801 *va_arg(ap, short *) = res;
802 else if (flags & LONG)
803 *va_arg(ap, long *) = res;
804 else if (flags & LONGLONG)
805 *va_arg(ap, long long *) = res;
806 else if (flags & INTMAXT)
807 *va_arg(ap, intmax_t *) = res;
808 else if (flags & PTRDIFFT)
809 *va_arg(ap, ptrdiff_t *) = res;
810 else if (flags & SIZET)
811 *va_arg(ap, size_t *) = res;
812 else
813 *va_arg(ap, int *) = res;
814 }
815 break;
816
817 #ifndef NO_FLOATING_POINT
818 case CT_FLOAT:
819 /* scan a floating point number as if by strtod */
820 if (width == 0 || width > sizeof(buf) /
821 sizeof(*buf) - 1)
822 width = sizeof(buf) / sizeof(*buf) - 1;
823 nr = parsefloat(fp, buf, buf + width, locale);
824 if (nr == 0)
825 goto match_failure;
826 if ((flags & SUPPRESS) == 0) {
827 if (flags & LONGDBL) {
828 long double res = wcstold(buf, NULL);
829 *va_arg(ap, long double *) = res;
830 } else if (flags & LONG) {
831 double res = wcstod(buf, NULL);
832 *va_arg(ap, double *) = res;
833 } else {
834 float res = wcstof(buf, NULL);
835 *va_arg(ap, float *) = res;
836 }
837 }
838 break;
839 #endif /* !NO_FLOATING_POINT */
840 }
841 if (!(flags & SUPPRESS))
842 nassigned++;
843 nread += nr;
844 nconversions++;
845 }
846 input_failure:
847 return (nconversions != 0 ? nassigned : EOF);
848 match_failure:
849 return (nassigned);
850 }
851
852 #ifndef NO_FLOATING_POINT
853 static int
parsefloat(FILE * fp,wchar_t * buf,wchar_t * end,locale_t locale)854 parsefloat(FILE *fp, wchar_t *buf, wchar_t *end, locale_t locale)
855 {
856 mbstate_t mbs;
857 size_t nconv;
858 wchar_t *commit, *p;
859 int infnanpos = 0;
860 enum {
861 S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX,
862 S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
863 } state = S_START;
864 wchar_t c;
865 wchar_t decpt;
866 _Bool gotmantdig = 0, ishex = 0;
867
868 mbs = initial_mbs;
869 nconv = mbrtowc(&decpt, localeconv()->decimal_point, MB_CUR_MAX, &mbs);
870 if (nconv == (size_t)-1 || nconv == (size_t)-2)
871 decpt = '.'; /* failsafe */
872
873 /*
874 * We set commit = p whenever the string we have read so far
875 * constitutes a valid representation of a floating point
876 * number by itself. At some point, the parse will complete
877 * or fail, and we will ungetc() back to the last commit point.
878 * To ensure that the file offset gets updated properly, it is
879 * always necessary to read at least one character that doesn't
880 * match; thus, we can't short-circuit "infinity" or "nan(...)".
881 */
882 commit = buf - 1;
883 c = WEOF;
884 for (p = buf; p < end; ) {
885 if ((c = __fgetwc(fp, locale)) == WEOF)
886 break;
887 reswitch:
888 switch (state) {
889 case S_START:
890 state = S_GOTSIGN;
891 if (c == '-' || c == '+')
892 break;
893 else
894 goto reswitch;
895 case S_GOTSIGN:
896 switch (c) {
897 case '0':
898 state = S_MAYBEHEX;
899 commit = p;
900 break;
901 case 'I':
902 case 'i':
903 state = S_INF;
904 break;
905 case 'N':
906 case 'n':
907 state = S_NAN;
908 break;
909 default:
910 state = S_DIGITS;
911 goto reswitch;
912 }
913 break;
914 case S_INF:
915 if (infnanpos > 6 ||
916 (c != "nfinity"[infnanpos] &&
917 c != "NFINITY"[infnanpos]))
918 goto parsedone;
919 if (infnanpos == 1 || infnanpos == 6)
920 commit = p; /* inf or infinity */
921 infnanpos++;
922 break;
923 case S_NAN:
924 switch (infnanpos) {
925 case 0:
926 if (c != 'A' && c != 'a')
927 goto parsedone;
928 break;
929 case 1:
930 if (c != 'N' && c != 'n')
931 goto parsedone;
932 else
933 commit = p;
934 break;
935 case 2:
936 if (c != '(')
937 goto parsedone;
938 break;
939 default:
940 if (c == ')') {
941 commit = p;
942 state = S_DONE;
943 } else if (!iswalnum(c) && c != '_')
944 goto parsedone;
945 break;
946 }
947 infnanpos++;
948 break;
949 case S_DONE:
950 goto parsedone;
951 case S_MAYBEHEX:
952 state = S_DIGITS;
953 if (c == 'X' || c == 'x') {
954 ishex = 1;
955 break;
956 } else { /* we saw a '0', but no 'x' */
957 gotmantdig = 1;
958 goto reswitch;
959 }
960 case S_DIGITS:
961 if ((ishex && iswxdigit(c)) || iswdigit(c))
962 gotmantdig = 1;
963 else {
964 state = S_FRAC;
965 if (c != decpt)
966 goto reswitch;
967 }
968 if (gotmantdig)
969 commit = p;
970 break;
971 case S_FRAC:
972 if (((c == 'E' || c == 'e') && !ishex) ||
973 ((c == 'P' || c == 'p') && ishex)) {
974 if (!gotmantdig)
975 goto parsedone;
976 else
977 state = S_EXP;
978 } else if ((ishex && iswxdigit(c)) || iswdigit(c)) {
979 commit = p;
980 gotmantdig = 1;
981 } else
982 goto parsedone;
983 break;
984 case S_EXP:
985 state = S_EXPDIGITS;
986 if (c == '-' || c == '+')
987 break;
988 else
989 goto reswitch;
990 case S_EXPDIGITS:
991 if (iswdigit(c))
992 commit = p;
993 else
994 goto parsedone;
995 break;
996 default:
997 abort();
998 }
999 *p++ = c;
1000 c = WEOF;
1001 }
1002
1003 parsedone:
1004 if (c != WEOF)
1005 __ungetwc(c, fp, locale);
1006 while (commit < --p)
1007 __ungetwc(*p, fp, locale);
1008 *++commit = '\0';
1009 return (commit - buf);
1010 }
1011 #endif
1012