xref: /freebsd/lib/libc/stdio/vfwscanf.c (revision ac00d4d59b18a76c6148ca5d7439bb446d38da5c)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1990, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Chris Torek.
9  *
10  * Copyright (c) 2011 The FreeBSD Foundation
11  *
12  * Copyright (c) 2023 Dag-Erling Smørgrav
13  *
14  * Portions of this software were developed by David Chisnall
15  * under sponsorship from the FreeBSD Foundation.
16  *
17  * Redistribution and use in source and binary forms, with or without
18  * modification, are permitted provided that the following conditions
19  * are met:
20  * 1. Redistributions of source code must retain the above copyright
21  *    notice, this list of conditions and the following disclaimer.
22  * 2. Redistributions in binary form must reproduce the above copyright
23  *    notice, this list of conditions and the following disclaimer in the
24  *    documentation and/or other materials provided with the distribution.
25  * 3. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  */
41 
42 #if 0
43 #endif
44 #include "namespace.h"
45 #include <ctype.h>
46 #include <inttypes.h>
47 #include <limits.h>
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <stddef.h>
51 #include <stdarg.h>
52 #include <string.h>
53 #include <wchar.h>
54 #include <wctype.h>
55 #include "un-namespace.h"
56 
57 #include "libc_private.h"
58 #include "local.h"
59 #include "xlocale_private.h"
60 
61 #define	BUF		513	/* Maximum length of numeric string. */
62 
63 /*
64  * Flags used during conversion.
65  */
66 #define	LONG		0x01	/* l: long or double */
67 #define	LONGDBL		0x02	/* L: long double */
68 #define	SHORT		0x04	/* h: short */
69 #define	SUPPRESS	0x08	/* *: suppress assignment */
70 #define	POINTER		0x10	/* p: void * (as hex) */
71 #define	NOSKIP		0x20	/* [ or c: do not skip blanks */
72 #define FASTINT		0x200	/* wfN: int_fastN_t */
73 #define	LONGLONG	0x400	/* ll: long long (+ deprecated q: quad) */
74 #define	INTMAXT		0x800	/* j: intmax_t */
75 #define	PTRDIFFT	0x1000	/* t: ptrdiff_t */
76 #define	SIZET		0x2000	/* z: size_t */
77 #define	SHORTSHORT	0x4000	/* hh: char */
78 #define	UNSIGNED	0x8000	/* %[oupxX] conversions */
79 
80 /*
81  * Conversion types.
82  */
83 #define	CT_CHAR		0	/* %c conversion */
84 #define	CT_CCL		1	/* %[...] conversion */
85 #define	CT_STRING	2	/* %s conversion */
86 #define	CT_INT		3	/* %[dioupxX] conversion */
87 #define	CT_FLOAT	4	/* %[efgEFG] conversion */
88 
89 #ifndef NO_FLOATING_POINT
90 static int parsefloat(FILE *, wchar_t *, wchar_t *, locale_t);
91 #endif
92 
93 struct ccl {
94 	const wchar_t *start;	/* character class start */
95 	const wchar_t *end;	/* character class end */
96 	int compl;		/* ccl is complemented? */
97 };
98 
99 static __inline int
100 inccl(const struct ccl *ccl, wint_t wi)
101 {
102 
103 	if (ccl->compl) {
104 		return (wmemchr(ccl->start, wi, ccl->end - ccl->start)
105 		    == NULL);
106 	} else {
107 		return (wmemchr(ccl->start, wi, ccl->end - ccl->start) != NULL);
108 	}
109 }
110 
111 /*
112  * Conversion functions are passed a pointer to this object instead of
113  * a real parameter to indicate that the assignment-suppression (*)
114  * flag was specified.  We could use a NULL pointer to indicate this,
115  * but that would mask bugs in applications that call scanf() with a
116  * NULL pointer.
117  */
118 static const int suppress;
119 #define	SUPPRESS_PTR	((void *)&suppress)
120 
121 static const mbstate_t initial_mbs;
122 
123 /*
124  * The following conversion functions return the number of characters consumed,
125  * or -1 on input failure.  Character class conversion returns 0 on match
126  * failure.
127  */
128 
129 static __inline int
130 convert_char(FILE *fp, char * mbp, int width, locale_t locale)
131 {
132 	mbstate_t mbs;
133 	size_t nconv;
134 	wint_t wi;
135 	int n;
136 
137 	n = 0;
138 	mbs = initial_mbs;
139 	while (width-- != 0 && (wi = __fgetwc(fp, locale)) != WEOF) {
140 		if (mbp != SUPPRESS_PTR) {
141 			nconv = wcrtomb(mbp, wi, &mbs);
142 			if (nconv == (size_t)-1)
143 				return (-1);
144 			mbp += nconv;
145 		}
146 		n++;
147 	}
148 	if (n == 0)
149 		return (-1);
150 	return (n);
151 }
152 
153 static __inline int
154 convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale)
155 {
156 	wint_t wi;
157 	int n;
158 
159 	n = 0;
160 	while (width-- != 0 && (wi = __fgetwc(fp, locale)) != WEOF) {
161 		if (wcp != SUPPRESS_PTR)
162 			*wcp++ = (wchar_t)wi;
163 		n++;
164 	}
165 	if (n == 0)
166 		return (-1);
167 	return (n);
168 }
169 
170 static __inline int
171 convert_ccl(FILE *fp, char * mbp, int width, const struct ccl *ccl,
172     locale_t locale)
173 {
174 	mbstate_t mbs;
175 	size_t nconv;
176 	wint_t wi;
177 	int n;
178 
179 	n = 0;
180 	mbs = initial_mbs;
181 	while ((wi = __fgetwc(fp, locale)) != WEOF &&
182 	    width-- != 0 && inccl(ccl, wi)) {
183 		if (mbp != SUPPRESS_PTR) {
184 			nconv = wcrtomb(mbp, wi, &mbs);
185 			if (nconv == (size_t)-1)
186 				return (-1);
187 			mbp += nconv;
188 		}
189 		n++;
190 	}
191 	if (wi != WEOF)
192 		__ungetwc(wi, fp, locale);
193 	if (mbp != SUPPRESS_PTR)
194 		*mbp = 0;
195 	return (n);
196 }
197 
198 static __inline int
199 convert_wccl(FILE *fp, wchar_t *wcp, int width, const struct ccl *ccl,
200     locale_t locale)
201 {
202 	wchar_t *wcp0;
203 	wint_t wi;
204 	int n;
205 
206 	if (wcp == SUPPRESS_PTR) {
207 		n = 0;
208 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
209 		    width-- != 0 && inccl(ccl, wi))
210 			n++;
211 		if (wi != WEOF)
212 			__ungetwc(wi, fp, locale);
213 	} else {
214 		wcp0 = wcp;
215 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
216 		    width-- != 0 && inccl(ccl, wi))
217 			*wcp++ = (wchar_t)wi;
218 		if (wi != WEOF)
219 			__ungetwc(wi, fp, locale);
220 		n = wcp - wcp0;
221 		if (n == 0)
222 			return (0);
223 		*wcp = 0;
224 	}
225 	return (n);
226 }
227 
228 static __inline int
229 convert_string(FILE *fp, char * mbp, int width, locale_t locale)
230 {
231 	mbstate_t mbs;
232 	size_t nconv;
233 	wint_t wi;
234 	int nread;
235 
236 	mbs = initial_mbs;
237 	nread = 0;
238 	while ((wi = __fgetwc(fp, locale)) != WEOF && width-- != 0 &&
239 	    !iswspace(wi)) {
240 		if (mbp != SUPPRESS_PTR) {
241 			nconv = wcrtomb(mbp, wi, &mbs);
242 			if (nconv == (size_t)-1)
243 				return (-1);
244 			mbp += nconv;
245 		}
246 		nread++;
247 	}
248 	if (wi != WEOF)
249 		__ungetwc(wi, fp, locale);
250 	if (mbp != SUPPRESS_PTR)
251 		*mbp = 0;
252 	return (nread);
253 }
254 
255 static __inline int
256 convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
257 {
258 	wint_t wi;
259 	int nread;
260 
261 	nread = 0;
262 	if (wcp == SUPPRESS_PTR) {
263 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
264 		    width-- != 0 && !iswspace(wi))
265 			nread++;
266 		if (wi != WEOF)
267 			__ungetwc(wi, fp, locale);
268 	} else {
269 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
270 		    width-- != 0 && !iswspace(wi)) {
271 			*wcp++ = (wchar_t)wi;
272 			nread++;
273 		}
274 		if (wi != WEOF)
275 			__ungetwc(wi, fp, locale);
276 		*wcp = '\0';
277 	}
278 	return (nread);
279 }
280 
281 enum parseint_state {
282 	begin,
283 	havesign,
284 	havezero,
285 	haveprefix,
286 	any,
287 };
288 
289 static __inline int
290 parseint_fsm(wchar_t c, enum parseint_state *state, int *base)
291 {
292 	switch (c) {
293 	case '+':
294 	case '-':
295 		if (*state == begin) {
296 			*state = havesign;
297 			return 1;
298 		}
299 		break;
300 	case '0':
301 		if (*state == begin || *state == havesign) {
302 			*state = havezero;
303 		} else {
304 			*state = any;
305 		}
306 		return 1;
307 	case '1':
308 	case '2':
309 	case '3':
310 	case '4':
311 	case '5':
312 	case '6':
313 	case '7':
314 		if (*state == havezero && *base == 0) {
315 			*base = 8;
316 		}
317 		/* FALL THROUGH */
318 	case '8':
319 	case '9':
320 		if (*state == begin ||
321 		    *state == havesign) {
322 			if (*base == 0) {
323 				*base = 10;
324 			}
325 		}
326 		if (*state == begin ||
327 		    *state == havesign ||
328 		    *state == havezero ||
329 		    *state == haveprefix ||
330 		    *state == any) {
331 			if (*base > c - '0') {
332 				*state = any;
333 				return 1;
334 			}
335 		}
336 		break;
337 	case 'b':
338 		if (*state == havezero) {
339 			if (*base == 0 || *base == 2) {
340 				*state = haveprefix;
341 				*base = 2;
342 				return 1;
343 			}
344 		}
345 		/* FALL THROUGH */
346 	case 'a':
347 	case 'c':
348 	case 'd':
349 	case 'e':
350 	case 'f':
351 		if (*state == begin ||
352 		    *state == havesign ||
353 		    *state == havezero ||
354 		    *state == haveprefix ||
355 		    *state == any) {
356 			if (*base > c - 'a' + 10) {
357 				*state = any;
358 				return 1;
359 			}
360 		}
361 		break;
362 	case 'B':
363 		if (*state == havezero) {
364 			if (*base == 0 || *base == 2) {
365 				*state = haveprefix;
366 				*base = 2;
367 				return 1;
368 			}
369 		}
370 		/* FALL THROUGH */
371 	case 'A':
372 	case 'C':
373 	case 'D':
374 	case 'E':
375 	case 'F':
376 		if (*state == begin ||
377 		    *state == havesign ||
378 		    *state == havezero ||
379 		    *state == haveprefix ||
380 		    *state == any) {
381 			if (*base > c - 'A' + 10) {
382 				*state = any;
383 				return 1;
384 			}
385 		}
386 		break;
387 	case 'x':
388 	case 'X':
389 		if (*state == havezero) {
390 			if (*base == 0 || *base == 16) {
391 				*state = haveprefix;
392 				*base = 16;
393 				return 1;
394 			}
395 		}
396 		break;
397 	}
398 	return 0;
399 }
400 
401 /*
402  * Read an integer, storing it in buf.
403  *
404  * Return 0 on a match failure, and the number of characters read
405  * otherwise.
406  */
407 static __inline int
408 parseint(FILE *fp, wchar_t * __restrict buf, int width, int base,
409     locale_t locale)
410 {
411 	enum parseint_state state = begin;
412 	wchar_t *wcp;
413 	int c;
414 
415 	for (wcp = buf; width; width--) {
416 		c = __fgetwc(fp, locale);
417 		if (c == WEOF)
418 			break;
419 		if (!parseint_fsm(c, &state, &base))
420 			break;
421 		*wcp++ = (wchar_t)c;
422 	}
423 	/*
424 	 * If we only had a sign, push it back.  If we only had a 0b or 0x
425 	 * prefix (possibly preceded by a sign), we view it as "0" and
426 	 * push back the letter.  In all other cases, if we stopped
427 	 * because we read a non-number character, push it back.
428 	 */
429 	if (state == havesign) {
430 		wcp--;
431 		__ungetwc(*wcp, fp, locale);
432 	} else if (state == haveprefix) {
433 		wcp--;
434 		__ungetwc(c, fp, locale);
435 	} else if (width && c != WEOF) {
436 		__ungetwc(c, fp, locale);
437 	}
438 	return (wcp - buf);
439 }
440 
441 /*
442  * MT-safe version.
443  */
444 int
445 vfwscanf_l(FILE * __restrict fp, locale_t locale,
446 		const wchar_t * __restrict fmt, va_list ap)
447 {
448 	int ret;
449 	FIX_LOCALE(locale);
450 
451 	FLOCKFILE_CANCELSAFE(fp);
452 	ORIENT(fp, 1);
453 	ret = __vfwscanf(fp, locale, fmt, ap);
454 	FUNLOCKFILE_CANCELSAFE();
455 	return (ret);
456 }
457 int
458 vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap)
459 {
460 	return vfwscanf_l(fp, __get_locale(), fmt, ap);
461 }
462 
463 /*
464  * Non-MT-safe version.
465  */
466 int
467 __vfwscanf(FILE * __restrict fp, locale_t locale,
468 		const wchar_t * __restrict fmt, va_list ap)
469 {
470 #define	GETARG(type)	((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type))
471 	wint_t c;		/* character from format, or conversion */
472 	size_t width;		/* field width, or 0 */
473 	int flags;		/* flags as defined above */
474 	int nassigned;		/* number of fields assigned */
475 	int nconversions;	/* number of conversions */
476 	int nr;			/* characters read by the current conversion */
477 	int nread;		/* number of characters consumed from fp */
478 	int base;		/* base argument to conversion function */
479 	struct ccl ccl;		/* character class info */
480 	wchar_t buf[BUF];	/* buffer for numeric conversions */
481 	wint_t wi;		/* handy wint_t */
482 
483 	nassigned = 0;
484 	nconversions = 0;
485 	nread = 0;
486 	ccl.start = ccl.end = NULL;
487 	for (;;) {
488 		c = *fmt++;
489 		if (c == 0)
490 			return (nassigned);
491 		if (iswspace(c)) {
492 			while ((c = __fgetwc(fp, locale)) != WEOF &&
493 			    iswspace_l(c, locale))
494 				nread++;
495 			if (c != WEOF)
496 				__ungetwc(c, fp, locale);
497 			continue;
498 		}
499 		if (c != '%')
500 			goto literal;
501 		width = 0;
502 		flags = 0;
503 		/*
504 		 * switch on the format.  continue if done;
505 		 * break once format type is derived.
506 		 */
507 again:		c = *fmt++;
508 		switch (c) {
509 		case '%':
510 literal:
511 			if ((wi = __fgetwc(fp, locale)) == WEOF)
512 				goto input_failure;
513 			if (wi != c) {
514 				__ungetwc(wi, fp, locale);
515 				goto match_failure;
516 			}
517 			nread++;
518 			continue;
519 
520 		case '*':
521 			flags |= SUPPRESS;
522 			goto again;
523 		case 'j':
524 			flags |= INTMAXT;
525 			goto again;
526 		case 'l':
527 			if (flags & LONG) {
528 				flags &= ~LONG;
529 				flags |= LONGLONG;
530 			} else
531 				flags |= LONG;
532 			goto again;
533 		case 'q':
534 			flags |= LONGLONG;	/* not quite */
535 			goto again;
536 		case 't':
537 			flags |= PTRDIFFT;
538 			goto again;
539 		case 'w':
540 			/*
541 			 * Fixed-width integer types.  On all platforms we
542 			 * support, int8_t is equivalent to char, int16_t
543 			 * is equivalent to short, int32_t is equivalent
544 			 * to int, int64_t is equivalent to long long int.
545 			 * Furthermore, int_fast8_t, int_fast16_t and
546 			 * int_fast32_t are equivalent to int, and
547 			 * int_fast64_t is equivalent to long long int.
548 			 */
549 			flags &= ~(SHORTSHORT|SHORT|LONG|LONGLONG|SIZET|INTMAXT|PTRDIFFT);
550 			if (fmt[0] == 'f') {
551 				flags |= FASTINT;
552 				fmt++;
553 			} else {
554 				flags &= ~FASTINT;
555 			}
556 			if (fmt[0] == '8') {
557 				if (!(flags & FASTINT))
558 					flags |= SHORTSHORT;
559 				else
560 					/* no flag set = 32 */ ;
561 				fmt += 1;
562 			} else if (fmt[0] == '1' && fmt[1] == '6') {
563 				if (!(flags & FASTINT))
564 					flags |= SHORT;
565 				else
566 					/* no flag set = 32 */ ;
567 				fmt += 2;
568 			} else if (fmt[0] == '3' && fmt[1] == '2') {
569 				/* no flag set = 32 */ ;
570 				fmt += 2;
571 			} else if (fmt[0] == '6' && fmt[1] == '4') {
572 				flags |= LONGLONG;
573 				fmt += 2;
574 			} else {
575 				goto match_failure;
576 			}
577 			goto again;
578 		case 'z':
579 			flags |= SIZET;
580 			goto again;
581 		case 'L':
582 			flags |= LONGDBL;
583 			goto again;
584 		case 'h':
585 			if (flags & SHORT) {
586 				flags &= ~SHORT;
587 				flags |= SHORTSHORT;
588 			} else
589 				flags |= SHORT;
590 			goto again;
591 
592 		case '0': case '1': case '2': case '3': case '4':
593 		case '5': case '6': case '7': case '8': case '9':
594 			width = width * 10 + c - '0';
595 			goto again;
596 
597 		/*
598 		 * Conversions.
599 		 */
600 		case 'B':
601 		case 'b':
602 			c = CT_INT;
603 			flags |= UNSIGNED;
604 			base = 2;
605 			break;
606 
607 		case 'd':
608 			c = CT_INT;
609 			base = 10;
610 			break;
611 
612 		case 'i':
613 			c = CT_INT;
614 			base = 0;
615 			break;
616 
617 		case 'o':
618 			c = CT_INT;
619 			flags |= UNSIGNED;
620 			base = 8;
621 			break;
622 
623 		case 'u':
624 			c = CT_INT;
625 			flags |= UNSIGNED;
626 			base = 10;
627 			break;
628 
629 		case 'X':
630 		case 'x':
631 			c = CT_INT;
632 			flags |= UNSIGNED;
633 			base = 16;
634 			break;
635 
636 #ifndef NO_FLOATING_POINT
637 		case 'A': case 'E': case 'F': case 'G':
638 		case 'a': case 'e': case 'f': case 'g':
639 			c = CT_FLOAT;
640 			break;
641 #endif
642 
643 		case 'S':
644 			flags |= LONG;
645 			/* FALLTHROUGH */
646 		case 's':
647 			c = CT_STRING;
648 			break;
649 
650 		case '[':
651 			ccl.start = fmt;
652 			if (*fmt == '^') {
653 				ccl.compl = 1;
654 				fmt++;
655 			} else
656 				ccl.compl = 0;
657 			if (*fmt == ']')
658 				fmt++;
659 			while (*fmt != '\0' && *fmt != ']')
660 				fmt++;
661 			ccl.end = fmt;
662 			fmt++;
663 			flags |= NOSKIP;
664 			c = CT_CCL;
665 			break;
666 
667 		case 'C':
668 			flags |= LONG;
669 			/* FALLTHROUGH */
670 		case 'c':
671 			flags |= NOSKIP;
672 			c = CT_CHAR;
673 			break;
674 
675 		case 'p':	/* pointer format is like hex */
676 			flags |= POINTER;
677 			c = CT_INT;		/* assumes sizeof(uintmax_t) */
678 			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
679 			base = 16;
680 			break;
681 
682 		case 'n':
683 			if (flags & SUPPRESS)	/* ??? */
684 				continue;
685 			if (flags & SHORTSHORT)
686 				*va_arg(ap, char *) = nread;
687 			else if (flags & SHORT)
688 				*va_arg(ap, short *) = nread;
689 			else if (flags & LONG)
690 				*va_arg(ap, long *) = nread;
691 			else if (flags & LONGLONG)
692 				*va_arg(ap, long long *) = nread;
693 			else if (flags & INTMAXT)
694 				*va_arg(ap, intmax_t *) = nread;
695 			else if (flags & SIZET)
696 				*va_arg(ap, size_t *) = nread;
697 			else if (flags & PTRDIFFT)
698 				*va_arg(ap, ptrdiff_t *) = nread;
699 			else
700 				*va_arg(ap, int *) = nread;
701 			continue;
702 
703 		default:
704 			goto match_failure;
705 
706 		/*
707 		 * Disgusting backwards compatibility hack.	XXX
708 		 */
709 		case '\0':	/* compat */
710 			return (EOF);
711 		}
712 
713 		/*
714 		 * Consume leading white space, except for formats
715 		 * that suppress this.
716 		 */
717 		if ((flags & NOSKIP) == 0) {
718 			while ((wi = __fgetwc(fp, locale)) != WEOF && iswspace(wi))
719 				nread++;
720 			if (wi == WEOF)
721 				goto input_failure;
722 			__ungetwc(wi, fp, locale);
723 		}
724 
725 		/*
726 		 * Do the conversion.
727 		 */
728 		switch (c) {
729 
730 		case CT_CHAR:
731 			/* scan arbitrary characters (sets NOSKIP) */
732 			if (width == 0)
733 				width = 1;
734 			if (flags & LONG) {
735 				nr = convert_wchar(fp, GETARG(wchar_t *), width,
736 				    locale);
737 			} else {
738 				nr = convert_char(fp, GETARG(char *), width,
739 				    locale);
740 			}
741 			if (nr < 0)
742 				goto input_failure;
743 			break;
744 
745 		case CT_CCL:
746 			/* scan a (nonempty) character class (sets NOSKIP) */
747 			if (width == 0)
748 				width = (size_t)~0;	/* `infinity' */
749 			/* take only those things in the class */
750 			if (flags & LONG) {
751 				nr = convert_wccl(fp, GETARG(wchar_t *), width,
752 				    &ccl, locale);
753 			} else {
754 				nr = convert_ccl(fp, GETARG(char *), width,
755 				    &ccl, locale);
756 			}
757 			if (nr <= 0) {
758 				if (nr < 0)
759 					goto input_failure;
760 				else /* nr == 0 */
761 					goto match_failure;
762 			}
763 			break;
764 
765 		case CT_STRING:
766 			/* like CCL, but zero-length string OK, & no NOSKIP */
767 			if (width == 0)
768 				width = (size_t)~0;
769 			if (flags & LONG) {
770 				nr = convert_wstring(fp, GETARG(wchar_t *),
771 				    width, locale);
772 			} else {
773 				nr = convert_string(fp, GETARG(char *), width,
774 				    locale);
775 			}
776 			if (nr < 0)
777 				goto input_failure;
778 			break;
779 
780 		case CT_INT:
781 			/* scan an integer as if by the conversion function */
782 			if (width == 0 || width > sizeof(buf) /
783 			    sizeof(*buf) - 1)
784 				width = sizeof(buf) / sizeof(*buf) - 1;
785 
786 			nr = parseint(fp, buf, width, base, locale);
787 			if (nr == 0)
788 				goto match_failure;
789 			if ((flags & SUPPRESS) == 0) {
790 				uintmax_t res;
791 
792 				buf[nr] = L'\0';
793 				if ((flags & UNSIGNED) == 0)
794 				    res = wcstoimax(buf, NULL, base);
795 				else
796 				    res = wcstoumax(buf, NULL, base);
797 				if (flags & POINTER)
798 					*va_arg(ap, void **) =
799 							(void *)(uintptr_t)res;
800 				else if (flags & SHORTSHORT)
801 					*va_arg(ap, char *) = res;
802 				else if (flags & SHORT)
803 					*va_arg(ap, short *) = res;
804 				else if (flags & LONG)
805 					*va_arg(ap, long *) = res;
806 				else if (flags & LONGLONG)
807 					*va_arg(ap, long long *) = res;
808 				else if (flags & INTMAXT)
809 					*va_arg(ap, intmax_t *) = res;
810 				else if (flags & PTRDIFFT)
811 					*va_arg(ap, ptrdiff_t *) = res;
812 				else if (flags & SIZET)
813 					*va_arg(ap, size_t *) = res;
814 				else
815 					*va_arg(ap, int *) = res;
816 			}
817 			break;
818 
819 #ifndef NO_FLOATING_POINT
820 		case CT_FLOAT:
821 			/* scan a floating point number as if by strtod */
822 			if (width == 0 || width > sizeof(buf) /
823 			    sizeof(*buf) - 1)
824 				width = sizeof(buf) / sizeof(*buf) - 1;
825 			nr = parsefloat(fp, buf, buf + width, locale);
826 			if (nr == 0)
827 				goto match_failure;
828 			if ((flags & SUPPRESS) == 0) {
829 				if (flags & LONGDBL) {
830 					long double res = wcstold(buf, NULL);
831 					*va_arg(ap, long double *) = res;
832 				} else if (flags & LONG) {
833 					double res = wcstod(buf, NULL);
834 					*va_arg(ap, double *) = res;
835 				} else {
836 					float res = wcstof(buf, NULL);
837 					*va_arg(ap, float *) = res;
838 				}
839 			}
840 			break;
841 #endif /* !NO_FLOATING_POINT */
842 		}
843 		if (!(flags & SUPPRESS))
844 			nassigned++;
845 		nread += nr;
846 		nconversions++;
847 	}
848 input_failure:
849 	return (nconversions != 0 ? nassigned : EOF);
850 match_failure:
851 	return (nassigned);
852 }
853 
854 #ifndef NO_FLOATING_POINT
855 static int
856 parsefloat(FILE *fp, wchar_t *buf, wchar_t *end, locale_t locale)
857 {
858 	mbstate_t mbs;
859 	size_t nconv;
860 	wchar_t *commit, *p;
861 	int infnanpos = 0;
862 	enum {
863 		S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX,
864 		S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
865 	} state = S_START;
866 	wchar_t c;
867 	wchar_t decpt;
868 	_Bool gotmantdig = 0, ishex = 0;
869 
870 	mbs = initial_mbs;
871 	nconv = mbrtowc(&decpt, localeconv()->decimal_point, MB_CUR_MAX, &mbs);
872 	if (nconv == (size_t)-1 || nconv == (size_t)-2)
873 		decpt = '.';	/* failsafe */
874 
875 	/*
876 	 * We set commit = p whenever the string we have read so far
877 	 * constitutes a valid representation of a floating point
878 	 * number by itself.  At some point, the parse will complete
879 	 * or fail, and we will ungetc() back to the last commit point.
880 	 * To ensure that the file offset gets updated properly, it is
881 	 * always necessary to read at least one character that doesn't
882 	 * match; thus, we can't short-circuit "infinity" or "nan(...)".
883 	 */
884 	commit = buf - 1;
885 	c = WEOF;
886 	for (p = buf; p < end; ) {
887 		if ((c = __fgetwc(fp, locale)) == WEOF)
888 			break;
889 reswitch:
890 		switch (state) {
891 		case S_START:
892 			state = S_GOTSIGN;
893 			if (c == '-' || c == '+')
894 				break;
895 			else
896 				goto reswitch;
897 		case S_GOTSIGN:
898 			switch (c) {
899 			case '0':
900 				state = S_MAYBEHEX;
901 				commit = p;
902 				break;
903 			case 'I':
904 			case 'i':
905 				state = S_INF;
906 				break;
907 			case 'N':
908 			case 'n':
909 				state = S_NAN;
910 				break;
911 			default:
912 				state = S_DIGITS;
913 				goto reswitch;
914 			}
915 			break;
916 		case S_INF:
917 			if (infnanpos > 6 ||
918 			    (c != "nfinity"[infnanpos] &&
919 			     c != "NFINITY"[infnanpos]))
920 				goto parsedone;
921 			if (infnanpos == 1 || infnanpos == 6)
922 				commit = p;	/* inf or infinity */
923 			infnanpos++;
924 			break;
925 		case S_NAN:
926 			switch (infnanpos) {
927 			case 0:
928 				if (c != 'A' && c != 'a')
929 					goto parsedone;
930 				break;
931 			case 1:
932 				if (c != 'N' && c != 'n')
933 					goto parsedone;
934 				else
935 					commit = p;
936 				break;
937 			case 2:
938 				if (c != '(')
939 					goto parsedone;
940 				break;
941 			default:
942 				if (c == ')') {
943 					commit = p;
944 					state = S_DONE;
945 				} else if (!iswalnum(c) && c != '_')
946 					goto parsedone;
947 				break;
948 			}
949 			infnanpos++;
950 			break;
951 		case S_DONE:
952 			goto parsedone;
953 		case S_MAYBEHEX:
954 			state = S_DIGITS;
955 			if (c == 'X' || c == 'x') {
956 				ishex = 1;
957 				break;
958 			} else {	/* we saw a '0', but no 'x' */
959 				gotmantdig = 1;
960 				goto reswitch;
961 			}
962 		case S_DIGITS:
963 			if ((ishex && iswxdigit(c)) || iswdigit(c))
964 				gotmantdig = 1;
965 			else {
966 				state = S_FRAC;
967 				if (c != decpt)
968 					goto reswitch;
969 			}
970 			if (gotmantdig)
971 				commit = p;
972 			break;
973 		case S_FRAC:
974 			if (((c == 'E' || c == 'e') && !ishex) ||
975 			    ((c == 'P' || c == 'p') && ishex)) {
976 				if (!gotmantdig)
977 					goto parsedone;
978 				else
979 					state = S_EXP;
980 			} else if ((ishex && iswxdigit(c)) || iswdigit(c)) {
981 				commit = p;
982 				gotmantdig = 1;
983 			} else
984 				goto parsedone;
985 			break;
986 		case S_EXP:
987 			state = S_EXPDIGITS;
988 			if (c == '-' || c == '+')
989 				break;
990 			else
991 				goto reswitch;
992 		case S_EXPDIGITS:
993 			if (iswdigit(c))
994 				commit = p;
995 			else
996 				goto parsedone;
997 			break;
998 		default:
999 			abort();
1000 		}
1001 		*p++ = c;
1002 		c = WEOF;
1003 	}
1004 
1005 parsedone:
1006 	if (c != WEOF)
1007 		__ungetwc(c, fp, locale);
1008 	while (commit < --p)
1009 		__ungetwc(*p, fp, locale);
1010 	*++commit = '\0';
1011 	return (commit - buf);
1012 }
1013 #endif
1014