xref: /freebsd/lib/libc/stdio/vfwscanf.c (revision b1bebaaba9b9c0ddfe503c43ca8e9e3917ee2c57)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1990, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Chris Torek.
9  *
10  * Copyright (c) 2011 The FreeBSD Foundation
11  *
12  * Copyright (c) 2023 Dag-Erling Smørgrav
13  *
14  * Portions of this software were developed by David Chisnall
15  * under sponsorship from the FreeBSD Foundation.
16  *
17  * Redistribution and use in source and binary forms, with or without
18  * modification, are permitted provided that the following conditions
19  * are met:
20  * 1. Redistributions of source code must retain the above copyright
21  *    notice, this list of conditions and the following disclaimer.
22  * 2. Redistributions in binary form must reproduce the above copyright
23  *    notice, this list of conditions and the following disclaimer in the
24  *    documentation and/or other materials provided with the distribution.
25  * 3. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  */
41 
42 #include "namespace.h"
43 #include <ctype.h>
44 #include <inttypes.h>
45 #include <limits.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <stddef.h>
49 #include <stdarg.h>
50 #include <string.h>
51 #include <wchar.h>
52 #include <wctype.h>
53 #include "un-namespace.h"
54 
55 #include "libc_private.h"
56 #include "local.h"
57 #include "xlocale_private.h"
58 
59 #define	BUF		513	/* Maximum length of numeric string. */
60 
61 /*
62  * Flags used during conversion.
63  */
64 #define	LONG		0x01	/* l: long or double */
65 #define	LONGDBL		0x02	/* L: long double */
66 #define	SHORT		0x04	/* h: short */
67 #define	SUPPRESS	0x08	/* *: suppress assignment */
68 #define	POINTER		0x10	/* p: void * (as hex) */
69 #define	NOSKIP		0x20	/* [ or c: do not skip blanks */
70 #define FASTINT		0x200	/* wfN: int_fastN_t */
71 #define	LONGLONG	0x400	/* ll: long long (+ deprecated q: quad) */
72 #define	INTMAXT		0x800	/* j: intmax_t */
73 #define	PTRDIFFT	0x1000	/* t: ptrdiff_t */
74 #define	SIZET		0x2000	/* z: size_t */
75 #define	SHORTSHORT	0x4000	/* hh: char */
76 #define	UNSIGNED	0x8000	/* %[oupxX] conversions */
77 
78 /*
79  * Conversion types.
80  */
81 #define	CT_CHAR		0	/* %c conversion */
82 #define	CT_CCL		1	/* %[...] conversion */
83 #define	CT_STRING	2	/* %s conversion */
84 #define	CT_INT		3	/* %[dioupxX] conversion */
85 #define	CT_FLOAT	4	/* %[efgEFG] conversion */
86 
87 static int parsefloat(FILE *, wchar_t *, wchar_t *, locale_t);
88 
89 struct ccl {
90 	const wchar_t *start;	/* character class start */
91 	const wchar_t *end;	/* character class end */
92 	int compl;		/* ccl is complemented? */
93 };
94 
95 static __inline int
96 inccl(const struct ccl *ccl, wint_t wi)
97 {
98 
99 	if (ccl->compl) {
100 		return (wmemchr(ccl->start, wi, ccl->end - ccl->start)
101 		    == NULL);
102 	} else {
103 		return (wmemchr(ccl->start, wi, ccl->end - ccl->start) != NULL);
104 	}
105 }
106 
107 /*
108  * Conversion functions are passed a pointer to this object instead of
109  * a real parameter to indicate that the assignment-suppression (*)
110  * flag was specified.  We could use a NULL pointer to indicate this,
111  * but that would mask bugs in applications that call scanf() with a
112  * NULL pointer.
113  */
114 static const int suppress;
115 #define	SUPPRESS_PTR	((void *)&suppress)
116 
117 static const mbstate_t initial_mbs;
118 
119 /*
120  * The following conversion functions return the number of characters consumed,
121  * or -1 on input failure.  Character class conversion returns 0 on match
122  * failure.
123  */
124 
125 static __inline int
126 convert_char(FILE *fp, char * mbp, int width, locale_t locale)
127 {
128 	mbstate_t mbs;
129 	size_t nconv;
130 	wint_t wi;
131 	int n;
132 
133 	n = 0;
134 	mbs = initial_mbs;
135 	while (width-- != 0 && (wi = __fgetwc(fp, locale)) != WEOF) {
136 		if (mbp != SUPPRESS_PTR) {
137 			nconv = wcrtomb(mbp, wi, &mbs);
138 			if (nconv == (size_t)-1)
139 				return (-1);
140 			mbp += nconv;
141 		}
142 		n++;
143 	}
144 	if (n == 0)
145 		return (-1);
146 	return (n);
147 }
148 
149 static __inline int
150 convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale)
151 {
152 	wint_t wi;
153 	int n;
154 
155 	n = 0;
156 	while (width-- != 0 && (wi = __fgetwc(fp, locale)) != WEOF) {
157 		if (wcp != SUPPRESS_PTR)
158 			*wcp++ = (wchar_t)wi;
159 		n++;
160 	}
161 	if (n == 0)
162 		return (-1);
163 	return (n);
164 }
165 
166 static __inline int
167 convert_ccl(FILE *fp, char * mbp, int width, const struct ccl *ccl,
168     locale_t locale)
169 {
170 	mbstate_t mbs;
171 	size_t nconv;
172 	wint_t wi;
173 	int n;
174 
175 	n = 0;
176 	mbs = initial_mbs;
177 	while ((wi = __fgetwc(fp, locale)) != WEOF &&
178 	    width-- != 0 && inccl(ccl, wi)) {
179 		if (mbp != SUPPRESS_PTR) {
180 			nconv = wcrtomb(mbp, wi, &mbs);
181 			if (nconv == (size_t)-1)
182 				return (-1);
183 			mbp += nconv;
184 		}
185 		n++;
186 	}
187 	if (wi != WEOF)
188 		__ungetwc(wi, fp, locale);
189 	if (mbp != SUPPRESS_PTR)
190 		*mbp = 0;
191 	return (n);
192 }
193 
194 static __inline int
195 convert_wccl(FILE *fp, wchar_t *wcp, int width, const struct ccl *ccl,
196     locale_t locale)
197 {
198 	wchar_t *wcp0;
199 	wint_t wi;
200 	int n;
201 
202 	if (wcp == SUPPRESS_PTR) {
203 		n = 0;
204 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
205 		    width-- != 0 && inccl(ccl, wi))
206 			n++;
207 		if (wi != WEOF)
208 			__ungetwc(wi, fp, locale);
209 	} else {
210 		wcp0 = wcp;
211 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
212 		    width-- != 0 && inccl(ccl, wi))
213 			*wcp++ = (wchar_t)wi;
214 		if (wi != WEOF)
215 			__ungetwc(wi, fp, locale);
216 		n = wcp - wcp0;
217 		if (n == 0)
218 			return (0);
219 		*wcp = 0;
220 	}
221 	return (n);
222 }
223 
224 static __inline int
225 convert_string(FILE *fp, char * mbp, int width, locale_t locale)
226 {
227 	mbstate_t mbs;
228 	size_t nconv;
229 	wint_t wi;
230 	int nread;
231 
232 	mbs = initial_mbs;
233 	nread = 0;
234 	while ((wi = __fgetwc(fp, locale)) != WEOF && width-- != 0 &&
235 	    !iswspace(wi)) {
236 		if (mbp != SUPPRESS_PTR) {
237 			nconv = wcrtomb(mbp, wi, &mbs);
238 			if (nconv == (size_t)-1)
239 				return (-1);
240 			mbp += nconv;
241 		}
242 		nread++;
243 	}
244 	if (wi != WEOF)
245 		__ungetwc(wi, fp, locale);
246 	if (mbp != SUPPRESS_PTR)
247 		*mbp = 0;
248 	return (nread);
249 }
250 
251 static __inline int
252 convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
253 {
254 	wint_t wi;
255 	int nread;
256 
257 	nread = 0;
258 	if (wcp == SUPPRESS_PTR) {
259 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
260 		    width-- != 0 && !iswspace(wi))
261 			nread++;
262 		if (wi != WEOF)
263 			__ungetwc(wi, fp, locale);
264 	} else {
265 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
266 		    width-- != 0 && !iswspace(wi)) {
267 			*wcp++ = (wchar_t)wi;
268 			nread++;
269 		}
270 		if (wi != WEOF)
271 			__ungetwc(wi, fp, locale);
272 		*wcp = '\0';
273 	}
274 	return (nread);
275 }
276 
277 enum parseint_state {
278 	begin,
279 	havesign,
280 	havezero,
281 	haveprefix,
282 	any,
283 };
284 
285 static __inline int
286 parseint_fsm(wchar_t c, enum parseint_state *state, int *base)
287 {
288 	switch (c) {
289 	case '+':
290 	case '-':
291 		if (*state == begin) {
292 			*state = havesign;
293 			return 1;
294 		}
295 		break;
296 	case '0':
297 		if (*state == begin || *state == havesign) {
298 			*state = havezero;
299 			return 1;
300 		}
301 		/* FALL THROUGH */
302 	case '1':
303 	case '2':
304 	case '3':
305 	case '4':
306 	case '5':
307 	case '6':
308 	case '7':
309 		if (*state == havezero && *base == 0) {
310 			*base = 8;
311 		}
312 		/* FALL THROUGH */
313 	case '8':
314 	case '9':
315 		if (*state == begin ||
316 		    *state == havesign) {
317 			if (*base == 0) {
318 				*base = 10;
319 			}
320 		}
321 		if (*state == begin ||
322 		    *state == havesign ||
323 		    *state == havezero ||
324 		    *state == haveprefix ||
325 		    *state == any) {
326 			if (*base > c - '0') {
327 				*state = any;
328 				return 1;
329 			}
330 		}
331 		break;
332 	case 'b':
333 		if (*state == havezero) {
334 			if (*base == 0 || *base == 2) {
335 				*state = haveprefix;
336 				*base = 2;
337 				return 1;
338 			}
339 		}
340 		/* FALL THROUGH */
341 	case 'a':
342 	case 'c':
343 	case 'd':
344 	case 'e':
345 	case 'f':
346 		if (*state == begin ||
347 		    *state == havesign ||
348 		    *state == havezero ||
349 		    *state == haveprefix ||
350 		    *state == any) {
351 			if (*base > c - 'a' + 10) {
352 				*state = any;
353 				return 1;
354 			}
355 		}
356 		break;
357 	case 'B':
358 		if (*state == havezero) {
359 			if (*base == 0 || *base == 2) {
360 				*state = haveprefix;
361 				*base = 2;
362 				return 1;
363 			}
364 		}
365 		/* FALL THROUGH */
366 	case 'A':
367 	case 'C':
368 	case 'D':
369 	case 'E':
370 	case 'F':
371 		if (*state == begin ||
372 		    *state == havesign ||
373 		    *state == havezero ||
374 		    *state == haveprefix ||
375 		    *state == any) {
376 			if (*base > c - 'A' + 10) {
377 				*state = any;
378 				return 1;
379 			}
380 		}
381 		break;
382 	case 'x':
383 	case 'X':
384 		if (*state == havezero) {
385 			if (*base == 0 || *base == 16) {
386 				*state = haveprefix;
387 				*base = 16;
388 				return 1;
389 			}
390 		}
391 		break;
392 	}
393 	return 0;
394 }
395 
396 /*
397  * Read an integer, storing it in buf.
398  *
399  * Return 0 on a match failure, and the number of characters read
400  * otherwise.
401  */
402 static __inline int
403 parseint(FILE *fp, wchar_t * __restrict buf, int width, int base,
404     locale_t locale)
405 {
406 	enum parseint_state state = begin;
407 	wchar_t *wcp;
408 	int c;
409 
410 	for (wcp = buf; width; width--) {
411 		c = __fgetwc(fp, locale);
412 		if (c == WEOF)
413 			break;
414 		if (!parseint_fsm(c, &state, &base))
415 			break;
416 		*wcp++ = (wchar_t)c;
417 	}
418 	/*
419 	 * If we only had a sign, push it back.  If we only had a 0b or 0x
420 	 * prefix (possibly preceded by a sign), we view it as "0" and
421 	 * push back the letter.  In all other cases, if we stopped
422 	 * because we read a non-number character, push it back.
423 	 */
424 	if (state == havesign) {
425 		wcp--;
426 		__ungetwc(*wcp, fp, locale);
427 	} else if (state == haveprefix) {
428 		wcp--;
429 		__ungetwc(c, fp, locale);
430 	} else if (width && c != WEOF) {
431 		__ungetwc(c, fp, locale);
432 	}
433 	return (wcp - buf);
434 }
435 
436 /*
437  * MT-safe version.
438  */
439 int
440 vfwscanf_l(FILE * __restrict fp, locale_t locale,
441 		const wchar_t * __restrict fmt, va_list ap)
442 {
443 	int ret;
444 	FIX_LOCALE(locale);
445 
446 	FLOCKFILE_CANCELSAFE(fp);
447 	ORIENT(fp, 1);
448 	ret = __vfwscanf(fp, locale, fmt, ap);
449 	FUNLOCKFILE_CANCELSAFE();
450 	return (ret);
451 }
452 int
453 vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap)
454 {
455 	return vfwscanf_l(fp, __get_locale(), fmt, ap);
456 }
457 
458 /*
459  * Non-MT-safe version.
460  */
461 int
462 __vfwscanf(FILE * __restrict fp, locale_t locale,
463 		const wchar_t * __restrict fmt, va_list ap)
464 {
465 #define	GETARG(type)	((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type))
466 	wint_t c;		/* character from format, or conversion */
467 	size_t width;		/* field width, or 0 */
468 	int flags;		/* flags as defined above */
469 	int nassigned;		/* number of fields assigned */
470 	int nconversions;	/* number of conversions */
471 	int nr;			/* characters read by the current conversion */
472 	int nread;		/* number of characters consumed from fp */
473 	int base;		/* base argument to conversion function */
474 	struct ccl ccl;		/* character class info */
475 	wchar_t buf[BUF];	/* buffer for numeric conversions */
476 	wint_t wi;		/* handy wint_t */
477 
478 	nassigned = 0;
479 	nconversions = 0;
480 	nread = 0;
481 	ccl.start = ccl.end = NULL;
482 	for (;;) {
483 		c = *fmt++;
484 		if (c == 0)
485 			return (nassigned);
486 		if (iswspace(c)) {
487 			while ((c = __fgetwc(fp, locale)) != WEOF &&
488 			    iswspace_l(c, locale))
489 				nread++;
490 			if (c != WEOF)
491 				__ungetwc(c, fp, locale);
492 			continue;
493 		}
494 		if (c != '%')
495 			goto literal;
496 		width = 0;
497 		flags = 0;
498 		/*
499 		 * switch on the format.  continue if done;
500 		 * break once format type is derived.
501 		 */
502 again:		c = *fmt++;
503 		switch (c) {
504 		case '%':
505 literal:
506 			if ((wi = __fgetwc(fp, locale)) == WEOF)
507 				goto input_failure;
508 			if (wi != c) {
509 				__ungetwc(wi, fp, locale);
510 				goto match_failure;
511 			}
512 			nread++;
513 			continue;
514 
515 		case '*':
516 			flags |= SUPPRESS;
517 			goto again;
518 		case 'j':
519 			flags |= INTMAXT;
520 			goto again;
521 		case 'l':
522 			if (flags & LONG) {
523 				flags &= ~LONG;
524 				flags |= LONGLONG;
525 			} else
526 				flags |= LONG;
527 			goto again;
528 		case 'q':
529 			flags |= LONGLONG;	/* not quite */
530 			goto again;
531 		case 't':
532 			flags |= PTRDIFFT;
533 			goto again;
534 		case 'w':
535 			/*
536 			 * Fixed-width integer types.  On all platforms we
537 			 * support, int8_t is equivalent to char, int16_t
538 			 * is equivalent to short, int32_t is equivalent
539 			 * to int, int64_t is equivalent to long long int.
540 			 * Furthermore, int_fast8_t, int_fast16_t and
541 			 * int_fast32_t are equivalent to int, and
542 			 * int_fast64_t is equivalent to long long int.
543 			 */
544 			flags &= ~(SHORTSHORT|SHORT|LONG|LONGLONG|SIZET|INTMAXT|PTRDIFFT);
545 			if (fmt[0] == 'f') {
546 				flags |= FASTINT;
547 				fmt++;
548 			} else {
549 				flags &= ~FASTINT;
550 			}
551 			if (fmt[0] == '8') {
552 				if (!(flags & FASTINT))
553 					flags |= SHORTSHORT;
554 				else
555 					/* no flag set = 32 */ ;
556 				fmt += 1;
557 			} else if (fmt[0] == '1' && fmt[1] == '6') {
558 				if (!(flags & FASTINT))
559 					flags |= SHORT;
560 				else
561 					/* no flag set = 32 */ ;
562 				fmt += 2;
563 			} else if (fmt[0] == '3' && fmt[1] == '2') {
564 				/* no flag set = 32 */ ;
565 				fmt += 2;
566 			} else if (fmt[0] == '6' && fmt[1] == '4') {
567 				flags |= LONGLONG;
568 				fmt += 2;
569 			} else {
570 				goto match_failure;
571 			}
572 			goto again;
573 		case 'z':
574 			flags |= SIZET;
575 			goto again;
576 		case 'L':
577 			flags |= LONGDBL;
578 			goto again;
579 		case 'h':
580 			if (flags & SHORT) {
581 				flags &= ~SHORT;
582 				flags |= SHORTSHORT;
583 			} else
584 				flags |= SHORT;
585 			goto again;
586 
587 		case '0': case '1': case '2': case '3': case '4':
588 		case '5': case '6': case '7': case '8': case '9':
589 			width = width * 10 + c - '0';
590 			goto again;
591 
592 		/*
593 		 * Conversions.
594 		 */
595 		case 'B':
596 		case 'b':
597 			c = CT_INT;
598 			flags |= UNSIGNED;
599 			base = 2;
600 			break;
601 
602 		case 'd':
603 			c = CT_INT;
604 			base = 10;
605 			break;
606 
607 		case 'i':
608 			c = CT_INT;
609 			base = 0;
610 			break;
611 
612 		case 'o':
613 			c = CT_INT;
614 			flags |= UNSIGNED;
615 			base = 8;
616 			break;
617 
618 		case 'u':
619 			c = CT_INT;
620 			flags |= UNSIGNED;
621 			base = 10;
622 			break;
623 
624 		case 'X':
625 		case 'x':
626 			c = CT_INT;
627 			flags |= UNSIGNED;
628 			base = 16;
629 			break;
630 
631 		case 'A': case 'E': case 'F': case 'G':
632 		case 'a': case 'e': case 'f': case 'g':
633 			c = CT_FLOAT;
634 			break;
635 
636 		case 'S':
637 			flags |= LONG;
638 			/* FALLTHROUGH */
639 		case 's':
640 			c = CT_STRING;
641 			break;
642 
643 		case '[':
644 			ccl.start = fmt;
645 			if (*fmt == '^') {
646 				ccl.compl = 1;
647 				fmt++;
648 			} else
649 				ccl.compl = 0;
650 			if (*fmt == ']')
651 				fmt++;
652 			while (*fmt != '\0' && *fmt != ']')
653 				fmt++;
654 			ccl.end = fmt;
655 			fmt++;
656 			flags |= NOSKIP;
657 			c = CT_CCL;
658 			break;
659 
660 		case 'C':
661 			flags |= LONG;
662 			/* FALLTHROUGH */
663 		case 'c':
664 			flags |= NOSKIP;
665 			c = CT_CHAR;
666 			break;
667 
668 		case 'p':	/* pointer format is like hex */
669 			flags |= POINTER;
670 			c = CT_INT;		/* assumes sizeof(uintmax_t) */
671 			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
672 			base = 16;
673 			break;
674 
675 		case 'n':
676 			if (flags & SUPPRESS)	/* ??? */
677 				continue;
678 			if (flags & SHORTSHORT)
679 				*va_arg(ap, char *) = nread;
680 			else if (flags & SHORT)
681 				*va_arg(ap, short *) = nread;
682 			else if (flags & LONG)
683 				*va_arg(ap, long *) = nread;
684 			else if (flags & LONGLONG)
685 				*va_arg(ap, long long *) = nread;
686 			else if (flags & INTMAXT)
687 				*va_arg(ap, intmax_t *) = nread;
688 			else if (flags & SIZET)
689 				*va_arg(ap, size_t *) = nread;
690 			else if (flags & PTRDIFFT)
691 				*va_arg(ap, ptrdiff_t *) = nread;
692 			else
693 				*va_arg(ap, int *) = nread;
694 			continue;
695 
696 		default:
697 			goto match_failure;
698 
699 		/*
700 		 * Disgusting backwards compatibility hack.	XXX
701 		 */
702 		case '\0':	/* compat */
703 			return (EOF);
704 		}
705 
706 		/*
707 		 * Consume leading white space, except for formats
708 		 * that suppress this.
709 		 */
710 		if ((flags & NOSKIP) == 0) {
711 			while ((wi = __fgetwc(fp, locale)) != WEOF && iswspace(wi))
712 				nread++;
713 			if (wi == WEOF)
714 				goto input_failure;
715 			__ungetwc(wi, fp, locale);
716 		}
717 
718 		/*
719 		 * Do the conversion.
720 		 */
721 		switch (c) {
722 
723 		case CT_CHAR:
724 			/* scan arbitrary characters (sets NOSKIP) */
725 			if (width == 0)
726 				width = 1;
727 			if (flags & LONG) {
728 				nr = convert_wchar(fp, GETARG(wchar_t *), width,
729 				    locale);
730 			} else {
731 				nr = convert_char(fp, GETARG(char *), width,
732 				    locale);
733 			}
734 			if (nr < 0)
735 				goto input_failure;
736 			break;
737 
738 		case CT_CCL:
739 			/* scan a (nonempty) character class (sets NOSKIP) */
740 			if (width == 0)
741 				width = (size_t)~0;	/* `infinity' */
742 			/* take only those things in the class */
743 			if (flags & LONG) {
744 				nr = convert_wccl(fp, GETARG(wchar_t *), width,
745 				    &ccl, locale);
746 			} else {
747 				nr = convert_ccl(fp, GETARG(char *), width,
748 				    &ccl, locale);
749 			}
750 			if (nr <= 0) {
751 				if (nr < 0)
752 					goto input_failure;
753 				else /* nr == 0 */
754 					goto match_failure;
755 			}
756 			break;
757 
758 		case CT_STRING:
759 			/* like CCL, but zero-length string OK, & no NOSKIP */
760 			if (width == 0)
761 				width = (size_t)~0;
762 			if (flags & LONG) {
763 				nr = convert_wstring(fp, GETARG(wchar_t *),
764 				    width, locale);
765 			} else {
766 				nr = convert_string(fp, GETARG(char *), width,
767 				    locale);
768 			}
769 			if (nr < 0)
770 				goto input_failure;
771 			break;
772 
773 		case CT_INT:
774 			/* scan an integer as if by the conversion function */
775 			if (width == 0 || width > sizeof(buf) /
776 			    sizeof(*buf) - 1)
777 				width = sizeof(buf) / sizeof(*buf) - 1;
778 
779 			nr = parseint(fp, buf, width, base, locale);
780 			if (nr == 0)
781 				goto match_failure;
782 			if ((flags & SUPPRESS) == 0) {
783 				uintmax_t res;
784 
785 				buf[nr] = L'\0';
786 				if ((flags & UNSIGNED) == 0)
787 				    res = wcstoimax(buf, NULL, base);
788 				else
789 				    res = wcstoumax(buf, NULL, base);
790 				if (flags & POINTER)
791 					*va_arg(ap, void **) =
792 							(void *)(uintptr_t)res;
793 				else if (flags & SHORTSHORT)
794 					*va_arg(ap, char *) = res;
795 				else if (flags & SHORT)
796 					*va_arg(ap, short *) = res;
797 				else if (flags & LONG)
798 					*va_arg(ap, long *) = res;
799 				else if (flags & LONGLONG)
800 					*va_arg(ap, long long *) = res;
801 				else if (flags & INTMAXT)
802 					*va_arg(ap, intmax_t *) = res;
803 				else if (flags & PTRDIFFT)
804 					*va_arg(ap, ptrdiff_t *) = res;
805 				else if (flags & SIZET)
806 					*va_arg(ap, size_t *) = res;
807 				else
808 					*va_arg(ap, int *) = res;
809 			}
810 			break;
811 
812 		case CT_FLOAT:
813 			/* scan a floating point number as if by strtod */
814 			if (width == 0 || width > sizeof(buf) /
815 			    sizeof(*buf) - 1)
816 				width = sizeof(buf) / sizeof(*buf) - 1;
817 			nr = parsefloat(fp, buf, buf + width, locale);
818 			if (nr == 0)
819 				goto match_failure;
820 			if ((flags & SUPPRESS) == 0) {
821 				if (flags & LONGDBL) {
822 					long double res = wcstold(buf, NULL);
823 					*va_arg(ap, long double *) = res;
824 				} else if (flags & LONG) {
825 					double res = wcstod(buf, NULL);
826 					*va_arg(ap, double *) = res;
827 				} else {
828 					float res = wcstof(buf, NULL);
829 					*va_arg(ap, float *) = res;
830 				}
831 			}
832 			break;
833 		}
834 		if (!(flags & SUPPRESS))
835 			nassigned++;
836 		nread += nr;
837 		nconversions++;
838 	}
839 input_failure:
840 	return (nconversions != 0 ? nassigned : EOF);
841 match_failure:
842 	return (nassigned);
843 }
844 
845 static int
846 parsefloat(FILE *fp, wchar_t *buf, wchar_t *end, locale_t locale)
847 {
848 	mbstate_t mbs;
849 	size_t nconv;
850 	wchar_t *commit, *p;
851 	int infnanpos = 0;
852 	enum {
853 		S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX,
854 		S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
855 	} state = S_START;
856 	wchar_t c;
857 	wchar_t decpt;
858 	_Bool gotmantdig = 0, ishex = 0;
859 
860 	mbs = initial_mbs;
861 	nconv = mbrtowc(&decpt, localeconv()->decimal_point, MB_CUR_MAX, &mbs);
862 	if (nconv == (size_t)-1 || nconv == (size_t)-2)
863 		decpt = '.';	/* failsafe */
864 
865 	/*
866 	 * We set commit = p whenever the string we have read so far
867 	 * constitutes a valid representation of a floating point
868 	 * number by itself.  At some point, the parse will complete
869 	 * or fail, and we will ungetc() back to the last commit point.
870 	 * To ensure that the file offset gets updated properly, it is
871 	 * always necessary to read at least one character that doesn't
872 	 * match; thus, we can't short-circuit "infinity" or "nan(...)".
873 	 */
874 	commit = buf - 1;
875 	c = WEOF;
876 	for (p = buf; p < end; ) {
877 		if ((c = __fgetwc(fp, locale)) == WEOF)
878 			break;
879 reswitch:
880 		switch (state) {
881 		case S_START:
882 			state = S_GOTSIGN;
883 			if (c == '-' || c == '+')
884 				break;
885 			else
886 				goto reswitch;
887 		case S_GOTSIGN:
888 			switch (c) {
889 			case '0':
890 				state = S_MAYBEHEX;
891 				commit = p;
892 				break;
893 			case 'I':
894 			case 'i':
895 				state = S_INF;
896 				break;
897 			case 'N':
898 			case 'n':
899 				state = S_NAN;
900 				break;
901 			default:
902 				state = S_DIGITS;
903 				goto reswitch;
904 			}
905 			break;
906 		case S_INF:
907 			if (infnanpos > 6 ||
908 			    (c != "nfinity"[infnanpos] &&
909 			     c != "NFINITY"[infnanpos]))
910 				goto parsedone;
911 			if (infnanpos == 1 || infnanpos == 6)
912 				commit = p;	/* inf or infinity */
913 			infnanpos++;
914 			break;
915 		case S_NAN:
916 			switch (infnanpos) {
917 			case 0:
918 				if (c != 'A' && c != 'a')
919 					goto parsedone;
920 				break;
921 			case 1:
922 				if (c != 'N' && c != 'n')
923 					goto parsedone;
924 				else
925 					commit = p;
926 				break;
927 			case 2:
928 				if (c != '(')
929 					goto parsedone;
930 				break;
931 			default:
932 				if (c == ')') {
933 					commit = p;
934 					state = S_DONE;
935 				} else if (!iswalnum(c) && c != '_')
936 					goto parsedone;
937 				break;
938 			}
939 			infnanpos++;
940 			break;
941 		case S_DONE:
942 			goto parsedone;
943 		case S_MAYBEHEX:
944 			state = S_DIGITS;
945 			if (c == 'X' || c == 'x') {
946 				ishex = 1;
947 				break;
948 			} else {	/* we saw a '0', but no 'x' */
949 				gotmantdig = 1;
950 				goto reswitch;
951 			}
952 		case S_DIGITS:
953 			if ((ishex && iswxdigit(c)) || iswdigit(c))
954 				gotmantdig = 1;
955 			else {
956 				state = S_FRAC;
957 				if (c != decpt)
958 					goto reswitch;
959 			}
960 			if (gotmantdig)
961 				commit = p;
962 			break;
963 		case S_FRAC:
964 			if (((c == 'E' || c == 'e') && !ishex) ||
965 			    ((c == 'P' || c == 'p') && ishex)) {
966 				if (!gotmantdig)
967 					goto parsedone;
968 				else
969 					state = S_EXP;
970 			} else if ((ishex && iswxdigit(c)) || iswdigit(c)) {
971 				commit = p;
972 				gotmantdig = 1;
973 			} else
974 				goto parsedone;
975 			break;
976 		case S_EXP:
977 			state = S_EXPDIGITS;
978 			if (c == '-' || c == '+')
979 				break;
980 			else
981 				goto reswitch;
982 		case S_EXPDIGITS:
983 			if (iswdigit(c))
984 				commit = p;
985 			else
986 				goto parsedone;
987 			break;
988 		default:
989 			abort();
990 		}
991 		*p++ = c;
992 		c = WEOF;
993 	}
994 
995 parsedone:
996 	if (c != WEOF)
997 		__ungetwc(c, fp, locale);
998 	while (commit < --p)
999 		__ungetwc(*p, fp, locale);
1000 	*++commit = '\0';
1001 	return (commit - buf);
1002 }
1003