xref: /freebsd/lib/libc/stdio/vfwscanf.c (revision dcc4d2939f789a6d1f272ffeab2068ba2b7525ea)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1990, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Chris Torek.
9  *
10  * Copyright (c) 2011 The FreeBSD Foundation
11  *
12  * Copyright (c) 2023 Dag-Erling Smørgrav
13  *
14  * Portions of this software were developed by David Chisnall
15  * under sponsorship from the FreeBSD Foundation.
16  *
17  * Redistribution and use in source and binary forms, with or without
18  * modification, are permitted provided that the following conditions
19  * are met:
20  * 1. Redistributions of source code must retain the above copyright
21  *    notice, this list of conditions and the following disclaimer.
22  * 2. Redistributions in binary form must reproduce the above copyright
23  *    notice, this list of conditions and the following disclaimer in the
24  *    documentation and/or other materials provided with the distribution.
25  * 3. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  */
41 
42 #if 0
43 #if defined(LIBC_SCCS) && !defined(lint)
44 static char sccsid[] = "@(#)vfscanf.c	8.1 (Berkeley) 6/4/93";
45 #endif /* LIBC_SCCS and not lint */
46 #endif
47 #include "namespace.h"
48 #include <ctype.h>
49 #include <inttypes.h>
50 #include <limits.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <stddef.h>
54 #include <stdarg.h>
55 #include <string.h>
56 #include <wchar.h>
57 #include <wctype.h>
58 #include "un-namespace.h"
59 
60 #include "libc_private.h"
61 #include "local.h"
62 #include "xlocale_private.h"
63 
64 #define	BUF		513	/* Maximum length of numeric string. */
65 
66 /*
67  * Flags used during conversion.
68  */
69 #define	LONG		0x01	/* l: long or double */
70 #define	LONGDBL		0x02	/* L: long double */
71 #define	SHORT		0x04	/* h: short */
72 #define	SUPPRESS	0x08	/* *: suppress assignment */
73 #define	POINTER		0x10	/* p: void * (as hex) */
74 #define	NOSKIP		0x20	/* [ or c: do not skip blanks */
75 #define FASTINT		0x200	/* wfN: int_fastN_t */
76 #define	LONGLONG	0x400	/* ll: long long (+ deprecated q: quad) */
77 #define	INTMAXT		0x800	/* j: intmax_t */
78 #define	PTRDIFFT	0x1000	/* t: ptrdiff_t */
79 #define	SIZET		0x2000	/* z: size_t */
80 #define	SHORTSHORT	0x4000	/* hh: char */
81 #define	UNSIGNED	0x8000	/* %[oupxX] conversions */
82 
83 /*
84  * Conversion types.
85  */
86 #define	CT_CHAR		0	/* %c conversion */
87 #define	CT_CCL		1	/* %[...] conversion */
88 #define	CT_STRING	2	/* %s conversion */
89 #define	CT_INT		3	/* %[dioupxX] conversion */
90 #define	CT_FLOAT	4	/* %[efgEFG] conversion */
91 
92 #ifndef NO_FLOATING_POINT
93 static int parsefloat(FILE *, wchar_t *, wchar_t *, locale_t);
94 #endif
95 
96 struct ccl {
97 	const wchar_t *start;	/* character class start */
98 	const wchar_t *end;	/* character class end */
99 	int compl;		/* ccl is complemented? */
100 };
101 
102 static __inline int
103 inccl(const struct ccl *ccl, wint_t wi)
104 {
105 
106 	if (ccl->compl) {
107 		return (wmemchr(ccl->start, wi, ccl->end - ccl->start)
108 		    == NULL);
109 	} else {
110 		return (wmemchr(ccl->start, wi, ccl->end - ccl->start) != NULL);
111 	}
112 }
113 
114 /*
115  * Conversion functions are passed a pointer to this object instead of
116  * a real parameter to indicate that the assignment-suppression (*)
117  * flag was specified.  We could use a NULL pointer to indicate this,
118  * but that would mask bugs in applications that call scanf() with a
119  * NULL pointer.
120  */
121 static const int suppress;
122 #define	SUPPRESS_PTR	((void *)&suppress)
123 
124 static const mbstate_t initial_mbs;
125 
126 /*
127  * The following conversion functions return the number of characters consumed,
128  * or -1 on input failure.  Character class conversion returns 0 on match
129  * failure.
130  */
131 
132 static __inline int
133 convert_char(FILE *fp, char * mbp, int width, locale_t locale)
134 {
135 	mbstate_t mbs;
136 	size_t nconv;
137 	wint_t wi;
138 	int n;
139 
140 	n = 0;
141 	mbs = initial_mbs;
142 	while (width-- != 0 && (wi = __fgetwc(fp, locale)) != WEOF) {
143 		if (mbp != SUPPRESS_PTR) {
144 			nconv = wcrtomb(mbp, wi, &mbs);
145 			if (nconv == (size_t)-1)
146 				return (-1);
147 			mbp += nconv;
148 		}
149 		n++;
150 	}
151 	if (n == 0)
152 		return (-1);
153 	return (n);
154 }
155 
156 static __inline int
157 convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale)
158 {
159 	wint_t wi;
160 	int n;
161 
162 	n = 0;
163 	while (width-- != 0 && (wi = __fgetwc(fp, locale)) != WEOF) {
164 		if (wcp != SUPPRESS_PTR)
165 			*wcp++ = (wchar_t)wi;
166 		n++;
167 	}
168 	if (n == 0)
169 		return (-1);
170 	return (n);
171 }
172 
173 static __inline int
174 convert_ccl(FILE *fp, char * mbp, int width, const struct ccl *ccl,
175     locale_t locale)
176 {
177 	mbstate_t mbs;
178 	size_t nconv;
179 	wint_t wi;
180 	int n;
181 
182 	n = 0;
183 	mbs = initial_mbs;
184 	while ((wi = __fgetwc(fp, locale)) != WEOF &&
185 	    width-- != 0 && inccl(ccl, wi)) {
186 		if (mbp != SUPPRESS_PTR) {
187 			nconv = wcrtomb(mbp, wi, &mbs);
188 			if (nconv == (size_t)-1)
189 				return (-1);
190 			mbp += nconv;
191 		}
192 		n++;
193 	}
194 	if (wi != WEOF)
195 		__ungetwc(wi, fp, locale);
196 	if (mbp != SUPPRESS_PTR)
197 		*mbp = 0;
198 	return (n);
199 }
200 
201 static __inline int
202 convert_wccl(FILE *fp, wchar_t *wcp, int width, const struct ccl *ccl,
203     locale_t locale)
204 {
205 	wchar_t *wcp0;
206 	wint_t wi;
207 	int n;
208 
209 	if (wcp == SUPPRESS_PTR) {
210 		n = 0;
211 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
212 		    width-- != 0 && inccl(ccl, wi))
213 			n++;
214 		if (wi != WEOF)
215 			__ungetwc(wi, fp, locale);
216 	} else {
217 		wcp0 = wcp;
218 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
219 		    width-- != 0 && inccl(ccl, wi))
220 			*wcp++ = (wchar_t)wi;
221 		if (wi != WEOF)
222 			__ungetwc(wi, fp, locale);
223 		n = wcp - wcp0;
224 		if (n == 0)
225 			return (0);
226 		*wcp = 0;
227 	}
228 	return (n);
229 }
230 
231 static __inline int
232 convert_string(FILE *fp, char * mbp, int width, locale_t locale)
233 {
234 	mbstate_t mbs;
235 	size_t nconv;
236 	wint_t wi;
237 	int nread;
238 
239 	mbs = initial_mbs;
240 	nread = 0;
241 	while ((wi = __fgetwc(fp, locale)) != WEOF && width-- != 0 &&
242 	    !iswspace(wi)) {
243 		if (mbp != SUPPRESS_PTR) {
244 			nconv = wcrtomb(mbp, wi, &mbs);
245 			if (nconv == (size_t)-1)
246 				return (-1);
247 			mbp += nconv;
248 		}
249 		nread++;
250 	}
251 	if (wi != WEOF)
252 		__ungetwc(wi, fp, locale);
253 	if (mbp != SUPPRESS_PTR)
254 		*mbp = 0;
255 	return (nread);
256 }
257 
258 static __inline int
259 convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
260 {
261 	wint_t wi;
262 	int nread;
263 
264 	nread = 0;
265 	if (wcp == SUPPRESS_PTR) {
266 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
267 		    width-- != 0 && !iswspace(wi))
268 			nread++;
269 		if (wi != WEOF)
270 			__ungetwc(wi, fp, locale);
271 	} else {
272 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
273 		    width-- != 0 && !iswspace(wi)) {
274 			*wcp++ = (wchar_t)wi;
275 			nread++;
276 		}
277 		if (wi != WEOF)
278 			__ungetwc(wi, fp, locale);
279 		*wcp = '\0';
280 	}
281 	return (nread);
282 }
283 
284 enum parseint_state {
285 	begin,
286 	havesign,
287 	havezero,
288 	haveprefix,
289 	any,
290 };
291 
292 static __inline int
293 parseint_fsm(wchar_t c, enum parseint_state *state, int *base)
294 {
295 	switch (c) {
296 	case '+':
297 	case '-':
298 		if (*state == begin) {
299 			*state = havesign;
300 			return 1;
301 		}
302 		break;
303 	case '0':
304 		if (*state == begin || *state == havesign) {
305 			*state = havezero;
306 		} else {
307 			*state = any;
308 		}
309 		return 1;
310 	case '1':
311 	case '2':
312 	case '3':
313 	case '4':
314 	case '5':
315 	case '6':
316 	case '7':
317 		if (*state == havezero && *base == 0) {
318 			*base = 8;
319 		}
320 		/* FALL THROUGH */
321 	case '8':
322 	case '9':
323 		if (*state == begin ||
324 		    *state == havesign) {
325 			if (*base == 0) {
326 				*base = 10;
327 			}
328 		}
329 		if (*state == begin ||
330 		    *state == havesign ||
331 		    *state == havezero ||
332 		    *state == haveprefix ||
333 		    *state == any) {
334 			if (*base > c - '0') {
335 				*state = any;
336 				return 1;
337 			}
338 		}
339 		break;
340 	case 'b':
341 		if (*state == havezero) {
342 			if (*base == 0 || *base == 2) {
343 				*state = haveprefix;
344 				*base = 2;
345 				return 1;
346 			}
347 		}
348 		/* FALL THROUGH */
349 	case 'a':
350 	case 'c':
351 	case 'd':
352 	case 'e':
353 	case 'f':
354 		if (*state == begin ||
355 		    *state == havesign ||
356 		    *state == havezero ||
357 		    *state == haveprefix ||
358 		    *state == any) {
359 			if (*base > c - 'a' + 10) {
360 				*state = any;
361 				return 1;
362 			}
363 		}
364 		break;
365 	case 'B':
366 		if (*state == havezero) {
367 			if (*base == 0 || *base == 2) {
368 				*state = haveprefix;
369 				*base = 2;
370 				return 1;
371 			}
372 		}
373 		/* FALL THROUGH */
374 	case 'A':
375 	case 'C':
376 	case 'D':
377 	case 'E':
378 	case 'F':
379 		if (*state == begin ||
380 		    *state == havesign ||
381 		    *state == havezero ||
382 		    *state == haveprefix ||
383 		    *state == any) {
384 			if (*base > c - 'A' + 10) {
385 				*state = any;
386 				return 1;
387 			}
388 		}
389 		break;
390 	case 'x':
391 	case 'X':
392 		if (*state == havezero) {
393 			if (*base == 0 || *base == 16) {
394 				*state = haveprefix;
395 				*base = 16;
396 				return 1;
397 			}
398 		}
399 		break;
400 	}
401 	return 0;
402 }
403 
404 /*
405  * Read an integer, storing it in buf.
406  *
407  * Return 0 on a match failure, and the number of characters read
408  * otherwise.
409  */
410 static __inline int
411 parseint(FILE *fp, wchar_t * __restrict buf, int width, int base,
412     locale_t locale)
413 {
414 	enum parseint_state state = begin;
415 	wchar_t *wcp;
416 	int c;
417 
418 	for (wcp = buf; width; width--) {
419 		c = __fgetwc(fp, locale);
420 		if (c == WEOF)
421 			break;
422 		if (!parseint_fsm(c, &state, &base))
423 			break;
424 		*wcp++ = (wchar_t)c;
425 	}
426 	/*
427 	 * If we only had a sign, push it back.  If we only had a 0b or 0x
428 	 * prefix (possibly preceded by a sign), we view it as "0" and
429 	 * push back the letter.  In all other cases, if we stopped
430 	 * because we read a non-number character, push it back.
431 	 */
432 	if (state == havesign) {
433 		wcp--;
434 		__ungetwc(*wcp, fp, locale);
435 	} else if (state == haveprefix) {
436 		wcp--;
437 		__ungetwc(c, fp, locale);
438 	} else if (width && c != WEOF) {
439 		__ungetwc(c, fp, locale);
440 	}
441 	return (wcp - buf);
442 }
443 
444 /*
445  * MT-safe version.
446  */
447 int
448 vfwscanf_l(FILE * __restrict fp, locale_t locale,
449 		const wchar_t * __restrict fmt, va_list ap)
450 {
451 	int ret;
452 	FIX_LOCALE(locale);
453 
454 	FLOCKFILE_CANCELSAFE(fp);
455 	ORIENT(fp, 1);
456 	ret = __vfwscanf(fp, locale, fmt, ap);
457 	FUNLOCKFILE_CANCELSAFE();
458 	return (ret);
459 }
460 int
461 vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap)
462 {
463 	return vfwscanf_l(fp, __get_locale(), fmt, ap);
464 }
465 
466 /*
467  * Non-MT-safe version.
468  */
469 int
470 __vfwscanf(FILE * __restrict fp, locale_t locale,
471 		const wchar_t * __restrict fmt, va_list ap)
472 {
473 #define	GETARG(type)	((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type))
474 	wint_t c;		/* character from format, or conversion */
475 	size_t width;		/* field width, or 0 */
476 	int flags;		/* flags as defined above */
477 	int nassigned;		/* number of fields assigned */
478 	int nconversions;	/* number of conversions */
479 	int nr;			/* characters read by the current conversion */
480 	int nread;		/* number of characters consumed from fp */
481 	int base;		/* base argument to conversion function */
482 	struct ccl ccl;		/* character class info */
483 	wchar_t buf[BUF];	/* buffer for numeric conversions */
484 	wint_t wi;		/* handy wint_t */
485 
486 	nassigned = 0;
487 	nconversions = 0;
488 	nread = 0;
489 	ccl.start = ccl.end = NULL;
490 	for (;;) {
491 		c = *fmt++;
492 		if (c == 0)
493 			return (nassigned);
494 		if (iswspace(c)) {
495 			while ((c = __fgetwc(fp, locale)) != WEOF &&
496 			    iswspace_l(c, locale))
497 				nread++;
498 			if (c != WEOF)
499 				__ungetwc(c, fp, locale);
500 			continue;
501 		}
502 		if (c != '%')
503 			goto literal;
504 		width = 0;
505 		flags = 0;
506 		/*
507 		 * switch on the format.  continue if done;
508 		 * break once format type is derived.
509 		 */
510 again:		c = *fmt++;
511 		switch (c) {
512 		case '%':
513 literal:
514 			if ((wi = __fgetwc(fp, locale)) == WEOF)
515 				goto input_failure;
516 			if (wi != c) {
517 				__ungetwc(wi, fp, locale);
518 				goto match_failure;
519 			}
520 			nread++;
521 			continue;
522 
523 		case '*':
524 			flags |= SUPPRESS;
525 			goto again;
526 		case 'j':
527 			flags |= INTMAXT;
528 			goto again;
529 		case 'l':
530 			if (flags & LONG) {
531 				flags &= ~LONG;
532 				flags |= LONGLONG;
533 			} else
534 				flags |= LONG;
535 			goto again;
536 		case 'q':
537 			flags |= LONGLONG;	/* not quite */
538 			goto again;
539 		case 't':
540 			flags |= PTRDIFFT;
541 			goto again;
542 		case 'w':
543 			/*
544 			 * Fixed-width integer types.  On all platforms we
545 			 * support, int8_t is equivalent to char, int16_t
546 			 * is equivalent to short, int32_t is equivalent
547 			 * to int, int64_t is equivalent to long long int.
548 			 * Furthermore, int_fast8_t, int_fast16_t and
549 			 * int_fast32_t are equivalent to int, and
550 			 * int_fast64_t is equivalent to long long int.
551 			 */
552 			flags &= ~(SHORTSHORT|SHORT|LONG|LONGLONG|SIZET|INTMAXT|PTRDIFFT);
553 			if (fmt[0] == 'f') {
554 				flags |= FASTINT;
555 				fmt++;
556 			} else {
557 				flags &= ~FASTINT;
558 			}
559 			if (fmt[0] == '8') {
560 				if (!(flags & FASTINT))
561 					flags |= SHORTSHORT;
562 				else
563 					/* no flag set = 32 */ ;
564 				fmt += 1;
565 			} else if (fmt[0] == '1' && fmt[1] == '6') {
566 				if (!(flags & FASTINT))
567 					flags |= SHORT;
568 				else
569 					/* no flag set = 32 */ ;
570 				fmt += 2;
571 			} else if (fmt[0] == '3' && fmt[1] == '2') {
572 				/* no flag set = 32 */ ;
573 				fmt += 2;
574 			} else if (fmt[0] == '6' && fmt[1] == '4') {
575 				flags |= LONGLONG;
576 				fmt += 2;
577 			} else {
578 				goto match_failure;
579 			}
580 			goto again;
581 		case 'z':
582 			flags |= SIZET;
583 			goto again;
584 		case 'L':
585 			flags |= LONGDBL;
586 			goto again;
587 		case 'h':
588 			if (flags & SHORT) {
589 				flags &= ~SHORT;
590 				flags |= SHORTSHORT;
591 			} else
592 				flags |= SHORT;
593 			goto again;
594 
595 		case '0': case '1': case '2': case '3': case '4':
596 		case '5': case '6': case '7': case '8': case '9':
597 			width = width * 10 + c - '0';
598 			goto again;
599 
600 		/*
601 		 * Conversions.
602 		 */
603 		case 'B':
604 		case 'b':
605 			c = CT_INT;
606 			flags |= UNSIGNED;
607 			base = 2;
608 			break;
609 
610 		case 'd':
611 			c = CT_INT;
612 			base = 10;
613 			break;
614 
615 		case 'i':
616 			c = CT_INT;
617 			base = 0;
618 			break;
619 
620 		case 'o':
621 			c = CT_INT;
622 			flags |= UNSIGNED;
623 			base = 8;
624 			break;
625 
626 		case 'u':
627 			c = CT_INT;
628 			flags |= UNSIGNED;
629 			base = 10;
630 			break;
631 
632 		case 'X':
633 		case 'x':
634 			c = CT_INT;
635 			flags |= UNSIGNED;
636 			base = 16;
637 			break;
638 
639 #ifndef NO_FLOATING_POINT
640 		case 'A': case 'E': case 'F': case 'G':
641 		case 'a': case 'e': case 'f': case 'g':
642 			c = CT_FLOAT;
643 			break;
644 #endif
645 
646 		case 'S':
647 			flags |= LONG;
648 			/* FALLTHROUGH */
649 		case 's':
650 			c = CT_STRING;
651 			break;
652 
653 		case '[':
654 			ccl.start = fmt;
655 			if (*fmt == '^') {
656 				ccl.compl = 1;
657 				fmt++;
658 			} else
659 				ccl.compl = 0;
660 			if (*fmt == ']')
661 				fmt++;
662 			while (*fmt != '\0' && *fmt != ']')
663 				fmt++;
664 			ccl.end = fmt;
665 			fmt++;
666 			flags |= NOSKIP;
667 			c = CT_CCL;
668 			break;
669 
670 		case 'C':
671 			flags |= LONG;
672 			/* FALLTHROUGH */
673 		case 'c':
674 			flags |= NOSKIP;
675 			c = CT_CHAR;
676 			break;
677 
678 		case 'p':	/* pointer format is like hex */
679 			flags |= POINTER;
680 			c = CT_INT;		/* assumes sizeof(uintmax_t) */
681 			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
682 			base = 16;
683 			break;
684 
685 		case 'n':
686 			if (flags & SUPPRESS)	/* ??? */
687 				continue;
688 			if (flags & SHORTSHORT)
689 				*va_arg(ap, char *) = nread;
690 			else if (flags & SHORT)
691 				*va_arg(ap, short *) = nread;
692 			else if (flags & LONG)
693 				*va_arg(ap, long *) = nread;
694 			else if (flags & LONGLONG)
695 				*va_arg(ap, long long *) = nread;
696 			else if (flags & INTMAXT)
697 				*va_arg(ap, intmax_t *) = nread;
698 			else if (flags & SIZET)
699 				*va_arg(ap, size_t *) = nread;
700 			else if (flags & PTRDIFFT)
701 				*va_arg(ap, ptrdiff_t *) = nread;
702 			else
703 				*va_arg(ap, int *) = nread;
704 			continue;
705 
706 		default:
707 			goto match_failure;
708 
709 		/*
710 		 * Disgusting backwards compatibility hack.	XXX
711 		 */
712 		case '\0':	/* compat */
713 			return (EOF);
714 		}
715 
716 		/*
717 		 * Consume leading white space, except for formats
718 		 * that suppress this.
719 		 */
720 		if ((flags & NOSKIP) == 0) {
721 			while ((wi = __fgetwc(fp, locale)) != WEOF && iswspace(wi))
722 				nread++;
723 			if (wi == WEOF)
724 				goto input_failure;
725 			__ungetwc(wi, fp, locale);
726 		}
727 
728 		/*
729 		 * Do the conversion.
730 		 */
731 		switch (c) {
732 
733 		case CT_CHAR:
734 			/* scan arbitrary characters (sets NOSKIP) */
735 			if (width == 0)
736 				width = 1;
737 			if (flags & LONG) {
738 				nr = convert_wchar(fp, GETARG(wchar_t *), width,
739 				    locale);
740 			} else {
741 				nr = convert_char(fp, GETARG(char *), width,
742 				    locale);
743 			}
744 			if (nr < 0)
745 				goto input_failure;
746 			break;
747 
748 		case CT_CCL:
749 			/* scan a (nonempty) character class (sets NOSKIP) */
750 			if (width == 0)
751 				width = (size_t)~0;	/* `infinity' */
752 			/* take only those things in the class */
753 			if (flags & LONG) {
754 				nr = convert_wccl(fp, GETARG(wchar_t *), width,
755 				    &ccl, locale);
756 			} else {
757 				nr = convert_ccl(fp, GETARG(char *), width,
758 				    &ccl, locale);
759 			}
760 			if (nr <= 0) {
761 				if (nr < 0)
762 					goto input_failure;
763 				else /* nr == 0 */
764 					goto match_failure;
765 			}
766 			break;
767 
768 		case CT_STRING:
769 			/* like CCL, but zero-length string OK, & no NOSKIP */
770 			if (width == 0)
771 				width = (size_t)~0;
772 			if (flags & LONG) {
773 				nr = convert_wstring(fp, GETARG(wchar_t *),
774 				    width, locale);
775 			} else {
776 				nr = convert_string(fp, GETARG(char *), width,
777 				    locale);
778 			}
779 			if (nr < 0)
780 				goto input_failure;
781 			break;
782 
783 		case CT_INT:
784 			/* scan an integer as if by the conversion function */
785 			if (width == 0 || width > sizeof(buf) /
786 			    sizeof(*buf) - 1)
787 				width = sizeof(buf) / sizeof(*buf) - 1;
788 
789 			nr = parseint(fp, buf, width, base, locale);
790 			if (nr == 0)
791 				goto match_failure;
792 			if ((flags & SUPPRESS) == 0) {
793 				uintmax_t res;
794 
795 				buf[nr] = L'\0';
796 				if ((flags & UNSIGNED) == 0)
797 				    res = wcstoimax(buf, NULL, base);
798 				else
799 				    res = wcstoumax(buf, NULL, base);
800 				if (flags & POINTER)
801 					*va_arg(ap, void **) =
802 							(void *)(uintptr_t)res;
803 				else if (flags & SHORTSHORT)
804 					*va_arg(ap, char *) = res;
805 				else if (flags & SHORT)
806 					*va_arg(ap, short *) = res;
807 				else if (flags & LONG)
808 					*va_arg(ap, long *) = res;
809 				else if (flags & LONGLONG)
810 					*va_arg(ap, long long *) = res;
811 				else if (flags & INTMAXT)
812 					*va_arg(ap, intmax_t *) = res;
813 				else if (flags & PTRDIFFT)
814 					*va_arg(ap, ptrdiff_t *) = res;
815 				else if (flags & SIZET)
816 					*va_arg(ap, size_t *) = res;
817 				else
818 					*va_arg(ap, int *) = res;
819 			}
820 			break;
821 
822 #ifndef NO_FLOATING_POINT
823 		case CT_FLOAT:
824 			/* scan a floating point number as if by strtod */
825 			if (width == 0 || width > sizeof(buf) /
826 			    sizeof(*buf) - 1)
827 				width = sizeof(buf) / sizeof(*buf) - 1;
828 			nr = parsefloat(fp, buf, buf + width, locale);
829 			if (nr == 0)
830 				goto match_failure;
831 			if ((flags & SUPPRESS) == 0) {
832 				if (flags & LONGDBL) {
833 					long double res = wcstold(buf, NULL);
834 					*va_arg(ap, long double *) = res;
835 				} else if (flags & LONG) {
836 					double res = wcstod(buf, NULL);
837 					*va_arg(ap, double *) = res;
838 				} else {
839 					float res = wcstof(buf, NULL);
840 					*va_arg(ap, float *) = res;
841 				}
842 			}
843 			break;
844 #endif /* !NO_FLOATING_POINT */
845 		}
846 		if (!(flags & SUPPRESS))
847 			nassigned++;
848 		nread += nr;
849 		nconversions++;
850 	}
851 input_failure:
852 	return (nconversions != 0 ? nassigned : EOF);
853 match_failure:
854 	return (nassigned);
855 }
856 
857 #ifndef NO_FLOATING_POINT
858 static int
859 parsefloat(FILE *fp, wchar_t *buf, wchar_t *end, locale_t locale)
860 {
861 	mbstate_t mbs;
862 	size_t nconv;
863 	wchar_t *commit, *p;
864 	int infnanpos = 0;
865 	enum {
866 		S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX,
867 		S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
868 	} state = S_START;
869 	wchar_t c;
870 	wchar_t decpt;
871 	_Bool gotmantdig = 0, ishex = 0;
872 
873 	mbs = initial_mbs;
874 	nconv = mbrtowc(&decpt, localeconv()->decimal_point, MB_CUR_MAX, &mbs);
875 	if (nconv == (size_t)-1 || nconv == (size_t)-2)
876 		decpt = '.';	/* failsafe */
877 
878 	/*
879 	 * We set commit = p whenever the string we have read so far
880 	 * constitutes a valid representation of a floating point
881 	 * number by itself.  At some point, the parse will complete
882 	 * or fail, and we will ungetc() back to the last commit point.
883 	 * To ensure that the file offset gets updated properly, it is
884 	 * always necessary to read at least one character that doesn't
885 	 * match; thus, we can't short-circuit "infinity" or "nan(...)".
886 	 */
887 	commit = buf - 1;
888 	c = WEOF;
889 	for (p = buf; p < end; ) {
890 		if ((c = __fgetwc(fp, locale)) == WEOF)
891 			break;
892 reswitch:
893 		switch (state) {
894 		case S_START:
895 			state = S_GOTSIGN;
896 			if (c == '-' || c == '+')
897 				break;
898 			else
899 				goto reswitch;
900 		case S_GOTSIGN:
901 			switch (c) {
902 			case '0':
903 				state = S_MAYBEHEX;
904 				commit = p;
905 				break;
906 			case 'I':
907 			case 'i':
908 				state = S_INF;
909 				break;
910 			case 'N':
911 			case 'n':
912 				state = S_NAN;
913 				break;
914 			default:
915 				state = S_DIGITS;
916 				goto reswitch;
917 			}
918 			break;
919 		case S_INF:
920 			if (infnanpos > 6 ||
921 			    (c != "nfinity"[infnanpos] &&
922 			     c != "NFINITY"[infnanpos]))
923 				goto parsedone;
924 			if (infnanpos == 1 || infnanpos == 6)
925 				commit = p;	/* inf or infinity */
926 			infnanpos++;
927 			break;
928 		case S_NAN:
929 			switch (infnanpos) {
930 			case 0:
931 				if (c != 'A' && c != 'a')
932 					goto parsedone;
933 				break;
934 			case 1:
935 				if (c != 'N' && c != 'n')
936 					goto parsedone;
937 				else
938 					commit = p;
939 				break;
940 			case 2:
941 				if (c != '(')
942 					goto parsedone;
943 				break;
944 			default:
945 				if (c == ')') {
946 					commit = p;
947 					state = S_DONE;
948 				} else if (!iswalnum(c) && c != '_')
949 					goto parsedone;
950 				break;
951 			}
952 			infnanpos++;
953 			break;
954 		case S_DONE:
955 			goto parsedone;
956 		case S_MAYBEHEX:
957 			state = S_DIGITS;
958 			if (c == 'X' || c == 'x') {
959 				ishex = 1;
960 				break;
961 			} else {	/* we saw a '0', but no 'x' */
962 				gotmantdig = 1;
963 				goto reswitch;
964 			}
965 		case S_DIGITS:
966 			if ((ishex && iswxdigit(c)) || iswdigit(c))
967 				gotmantdig = 1;
968 			else {
969 				state = S_FRAC;
970 				if (c != decpt)
971 					goto reswitch;
972 			}
973 			if (gotmantdig)
974 				commit = p;
975 			break;
976 		case S_FRAC:
977 			if (((c == 'E' || c == 'e') && !ishex) ||
978 			    ((c == 'P' || c == 'p') && ishex)) {
979 				if (!gotmantdig)
980 					goto parsedone;
981 				else
982 					state = S_EXP;
983 			} else if ((ishex && iswxdigit(c)) || iswdigit(c)) {
984 				commit = p;
985 				gotmantdig = 1;
986 			} else
987 				goto parsedone;
988 			break;
989 		case S_EXP:
990 			state = S_EXPDIGITS;
991 			if (c == '-' || c == '+')
992 				break;
993 			else
994 				goto reswitch;
995 		case S_EXPDIGITS:
996 			if (iswdigit(c))
997 				commit = p;
998 			else
999 				goto parsedone;
1000 			break;
1001 		default:
1002 			abort();
1003 		}
1004 		*p++ = c;
1005 		c = WEOF;
1006 	}
1007 
1008 parsedone:
1009 	if (c != WEOF)
1010 		__ungetwc(c, fp, locale);
1011 	while (commit < --p)
1012 		__ungetwc(*p, fp, locale);
1013 	*++commit = '\0';
1014 	return (commit - buf);
1015 }
1016 #endif
1017