xref: /freebsd/lib/libc/stdio/vfwscanf.c (revision edf8578117e8844e02c0121147f45e4609b30680)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1990, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Chris Torek.
9  *
10  * Copyright (c) 2011 The FreeBSD Foundation
11  *
12  * Copyright (c) 2023 Dag-Erling Smørgrav
13  *
14  * Portions of this software were developed by David Chisnall
15  * under sponsorship from the FreeBSD Foundation.
16  *
17  * Redistribution and use in source and binary forms, with or without
18  * modification, are permitted provided that the following conditions
19  * are met:
20  * 1. Redistributions of source code must retain the above copyright
21  *    notice, this list of conditions and the following disclaimer.
22  * 2. Redistributions in binary form must reproduce the above copyright
23  *    notice, this list of conditions and the following disclaimer in the
24  *    documentation and/or other materials provided with the distribution.
25  * 3. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  */
41 
42 #if 0
43 #if defined(LIBC_SCCS) && !defined(lint)
44 static char sccsid[] = "@(#)vfscanf.c	8.1 (Berkeley) 6/4/93";
45 #endif /* LIBC_SCCS and not lint */
46 #endif
47 #include <sys/cdefs.h>
48 #include "namespace.h"
49 #include <ctype.h>
50 #include <inttypes.h>
51 #include <limits.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <stddef.h>
55 #include <stdarg.h>
56 #include <string.h>
57 #include <wchar.h>
58 #include <wctype.h>
59 #include "un-namespace.h"
60 
61 #include "libc_private.h"
62 #include "local.h"
63 #include "xlocale_private.h"
64 
65 #define	BUF		513	/* Maximum length of numeric string. */
66 
67 /*
68  * Flags used during conversion.
69  */
70 #define	LONG		0x01	/* l: long or double */
71 #define	LONGDBL		0x02	/* L: long double */
72 #define	SHORT		0x04	/* h: short */
73 #define	SUPPRESS	0x08	/* *: suppress assignment */
74 #define	POINTER		0x10	/* p: void * (as hex) */
75 #define	NOSKIP		0x20	/* [ or c: do not skip blanks */
76 #define FASTINT		0x200	/* wfN: int_fastN_t */
77 #define	LONGLONG	0x400	/* ll: long long (+ deprecated q: quad) */
78 #define	INTMAXT		0x800	/* j: intmax_t */
79 #define	PTRDIFFT	0x1000	/* t: ptrdiff_t */
80 #define	SIZET		0x2000	/* z: size_t */
81 #define	SHORTSHORT	0x4000	/* hh: char */
82 #define	UNSIGNED	0x8000	/* %[oupxX] conversions */
83 
84 /*
85  * Conversion types.
86  */
87 #define	CT_CHAR		0	/* %c conversion */
88 #define	CT_CCL		1	/* %[...] conversion */
89 #define	CT_STRING	2	/* %s conversion */
90 #define	CT_INT		3	/* %[dioupxX] conversion */
91 #define	CT_FLOAT	4	/* %[efgEFG] conversion */
92 
93 #ifndef NO_FLOATING_POINT
94 static int parsefloat(FILE *, wchar_t *, wchar_t *, locale_t);
95 #endif
96 
97 struct ccl {
98 	const wchar_t *start;	/* character class start */
99 	const wchar_t *end;	/* character class end */
100 	int compl;		/* ccl is complemented? */
101 };
102 
103 static __inline int
104 inccl(const struct ccl *ccl, wint_t wi)
105 {
106 
107 	if (ccl->compl) {
108 		return (wmemchr(ccl->start, wi, ccl->end - ccl->start)
109 		    == NULL);
110 	} else {
111 		return (wmemchr(ccl->start, wi, ccl->end - ccl->start) != NULL);
112 	}
113 }
114 
115 /*
116  * Conversion functions are passed a pointer to this object instead of
117  * a real parameter to indicate that the assignment-suppression (*)
118  * flag was specified.  We could use a NULL pointer to indicate this,
119  * but that would mask bugs in applications that call scanf() with a
120  * NULL pointer.
121  */
122 static const int suppress;
123 #define	SUPPRESS_PTR	((void *)&suppress)
124 
125 static const mbstate_t initial_mbs;
126 
127 /*
128  * The following conversion functions return the number of characters consumed,
129  * or -1 on input failure.  Character class conversion returns 0 on match
130  * failure.
131  */
132 
133 static __inline int
134 convert_char(FILE *fp, char * mbp, int width, locale_t locale)
135 {
136 	mbstate_t mbs;
137 	size_t nconv;
138 	wint_t wi;
139 	int n;
140 
141 	n = 0;
142 	mbs = initial_mbs;
143 	while (width-- != 0 && (wi = __fgetwc(fp, locale)) != WEOF) {
144 		if (mbp != SUPPRESS_PTR) {
145 			nconv = wcrtomb(mbp, wi, &mbs);
146 			if (nconv == (size_t)-1)
147 				return (-1);
148 			mbp += nconv;
149 		}
150 		n++;
151 	}
152 	if (n == 0)
153 		return (-1);
154 	return (n);
155 }
156 
157 static __inline int
158 convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale)
159 {
160 	wint_t wi;
161 	int n;
162 
163 	n = 0;
164 	while (width-- != 0 && (wi = __fgetwc(fp, locale)) != WEOF) {
165 		if (wcp != SUPPRESS_PTR)
166 			*wcp++ = (wchar_t)wi;
167 		n++;
168 	}
169 	if (n == 0)
170 		return (-1);
171 	return (n);
172 }
173 
174 static __inline int
175 convert_ccl(FILE *fp, char * mbp, int width, const struct ccl *ccl,
176     locale_t locale)
177 {
178 	mbstate_t mbs;
179 	size_t nconv;
180 	wint_t wi;
181 	int n;
182 
183 	n = 0;
184 	mbs = initial_mbs;
185 	while ((wi = __fgetwc(fp, locale)) != WEOF &&
186 	    width-- != 0 && inccl(ccl, wi)) {
187 		if (mbp != SUPPRESS_PTR) {
188 			nconv = wcrtomb(mbp, wi, &mbs);
189 			if (nconv == (size_t)-1)
190 				return (-1);
191 			mbp += nconv;
192 		}
193 		n++;
194 	}
195 	if (wi != WEOF)
196 		__ungetwc(wi, fp, locale);
197 	if (mbp != SUPPRESS_PTR)
198 		*mbp = 0;
199 	return (n);
200 }
201 
202 static __inline int
203 convert_wccl(FILE *fp, wchar_t *wcp, int width, const struct ccl *ccl,
204     locale_t locale)
205 {
206 	wchar_t *wcp0;
207 	wint_t wi;
208 	int n;
209 
210 	if (wcp == SUPPRESS_PTR) {
211 		n = 0;
212 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
213 		    width-- != 0 && inccl(ccl, wi))
214 			n++;
215 		if (wi != WEOF)
216 			__ungetwc(wi, fp, locale);
217 	} else {
218 		wcp0 = wcp;
219 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
220 		    width-- != 0 && inccl(ccl, wi))
221 			*wcp++ = (wchar_t)wi;
222 		if (wi != WEOF)
223 			__ungetwc(wi, fp, locale);
224 		n = wcp - wcp0;
225 		if (n == 0)
226 			return (0);
227 		*wcp = 0;
228 	}
229 	return (n);
230 }
231 
232 static __inline int
233 convert_string(FILE *fp, char * mbp, int width, locale_t locale)
234 {
235 	mbstate_t mbs;
236 	size_t nconv;
237 	wint_t wi;
238 	int nread;
239 
240 	mbs = initial_mbs;
241 	nread = 0;
242 	while ((wi = __fgetwc(fp, locale)) != WEOF && width-- != 0 &&
243 	    !iswspace(wi)) {
244 		if (mbp != SUPPRESS_PTR) {
245 			nconv = wcrtomb(mbp, wi, &mbs);
246 			if (nconv == (size_t)-1)
247 				return (-1);
248 			mbp += nconv;
249 		}
250 		nread++;
251 	}
252 	if (wi != WEOF)
253 		__ungetwc(wi, fp, locale);
254 	if (mbp != SUPPRESS_PTR)
255 		*mbp = 0;
256 	return (nread);
257 }
258 
259 static __inline int
260 convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
261 {
262 	wint_t wi;
263 	int nread;
264 
265 	nread = 0;
266 	if (wcp == SUPPRESS_PTR) {
267 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
268 		    width-- != 0 && !iswspace(wi))
269 			nread++;
270 		if (wi != WEOF)
271 			__ungetwc(wi, fp, locale);
272 	} else {
273 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
274 		    width-- != 0 && !iswspace(wi)) {
275 			*wcp++ = (wchar_t)wi;
276 			nread++;
277 		}
278 		if (wi != WEOF)
279 			__ungetwc(wi, fp, locale);
280 		*wcp = '\0';
281 	}
282 	return (nread);
283 }
284 
285 enum parseint_state {
286 	begin,
287 	havesign,
288 	havezero,
289 	haveprefix,
290 	any,
291 };
292 
293 static __inline int
294 parseint_fsm(wchar_t c, enum parseint_state *state, int *base)
295 {
296 	switch (c) {
297 	case '+':
298 	case '-':
299 		if (*state == begin) {
300 			*state = havesign;
301 			return 1;
302 		}
303 		break;
304 	case '0':
305 		if (*state == begin || *state == havesign) {
306 			*state = havezero;
307 		} else {
308 			*state = any;
309 		}
310 		return 1;
311 	case '1':
312 	case '2':
313 	case '3':
314 	case '4':
315 	case '5':
316 	case '6':
317 	case '7':
318 		if (*state == havezero && *base == 0) {
319 			*base = 8;
320 		}
321 		/* FALL THROUGH */
322 	case '8':
323 	case '9':
324 		if (*state == begin ||
325 		    *state == havesign) {
326 			if (*base == 0) {
327 				*base = 10;
328 			}
329 		}
330 		if (*state == begin ||
331 		    *state == havesign ||
332 		    *state == havezero ||
333 		    *state == haveprefix ||
334 		    *state == any) {
335 			if (*base > c - '0') {
336 				*state = any;
337 				return 1;
338 			}
339 		}
340 		break;
341 	case 'b':
342 		if (*state == havezero) {
343 			if (*base == 0 || *base == 2) {
344 				*state = haveprefix;
345 				*base = 2;
346 				return 1;
347 			}
348 		}
349 		/* FALL THROUGH */
350 	case 'a':
351 	case 'c':
352 	case 'd':
353 	case 'e':
354 	case 'f':
355 		if (*state == begin ||
356 		    *state == havesign ||
357 		    *state == havezero ||
358 		    *state == haveprefix ||
359 		    *state == any) {
360 			if (*base > c - 'a' + 10) {
361 				*state = any;
362 				return 1;
363 			}
364 		}
365 		break;
366 	case 'B':
367 		if (*state == havezero) {
368 			if (*base == 0 || *base == 2) {
369 				*state = haveprefix;
370 				*base = 2;
371 				return 1;
372 			}
373 		}
374 		/* FALL THROUGH */
375 	case 'A':
376 	case 'C':
377 	case 'D':
378 	case 'E':
379 	case 'F':
380 		if (*state == begin ||
381 		    *state == havesign ||
382 		    *state == havezero ||
383 		    *state == haveprefix ||
384 		    *state == any) {
385 			if (*base > c - 'A' + 10) {
386 				*state = any;
387 				return 1;
388 			}
389 		}
390 		break;
391 	case 'x':
392 	case 'X':
393 		if (*state == havezero) {
394 			if (*base == 0 || *base == 16) {
395 				*state = haveprefix;
396 				*base = 16;
397 				return 1;
398 			}
399 		}
400 		break;
401 	}
402 	return 0;
403 }
404 
405 /*
406  * Read an integer, storing it in buf.
407  *
408  * Return 0 on a match failure, and the number of characters read
409  * otherwise.
410  */
411 static __inline int
412 parseint(FILE *fp, wchar_t * __restrict buf, int width, int base,
413     locale_t locale)
414 {
415 	enum parseint_state state = begin;
416 	wchar_t *wcp;
417 	int c;
418 
419 	for (wcp = buf; width; width--) {
420 		c = __fgetwc(fp, locale);
421 		if (c == WEOF)
422 			break;
423 		if (!parseint_fsm(c, &state, &base))
424 			break;
425 		*wcp++ = (wchar_t)c;
426 	}
427 	/*
428 	 * If we only had a sign, push it back.  If we only had a 0b or 0x
429 	 * prefix (possibly preceded by a sign), we view it as "0" and
430 	 * push back the letter.  In all other cases, if we stopped
431 	 * because we read a non-number character, push it back.
432 	 */
433 	if (state == havesign) {
434 		wcp--;
435 		__ungetwc(*wcp, fp, locale);
436 	} else if (state == haveprefix) {
437 		wcp--;
438 		__ungetwc(c, fp, locale);
439 	} else if (width && c != WEOF) {
440 		__ungetwc(c, fp, locale);
441 	}
442 	return (wcp - buf);
443 }
444 
445 /*
446  * MT-safe version.
447  */
448 int
449 vfwscanf_l(FILE * __restrict fp, locale_t locale,
450 		const wchar_t * __restrict fmt, va_list ap)
451 {
452 	int ret;
453 	FIX_LOCALE(locale);
454 
455 	FLOCKFILE_CANCELSAFE(fp);
456 	ORIENT(fp, 1);
457 	ret = __vfwscanf(fp, locale, fmt, ap);
458 	FUNLOCKFILE_CANCELSAFE();
459 	return (ret);
460 }
461 int
462 vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap)
463 {
464 	return vfwscanf_l(fp, __get_locale(), fmt, ap);
465 }
466 
467 /*
468  * Non-MT-safe version.
469  */
470 int
471 __vfwscanf(FILE * __restrict fp, locale_t locale,
472 		const wchar_t * __restrict fmt, va_list ap)
473 {
474 #define	GETARG(type)	((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type))
475 	wint_t c;		/* character from format, or conversion */
476 	size_t width;		/* field width, or 0 */
477 	int flags;		/* flags as defined above */
478 	int nassigned;		/* number of fields assigned */
479 	int nconversions;	/* number of conversions */
480 	int nr;			/* characters read by the current conversion */
481 	int nread;		/* number of characters consumed from fp */
482 	int base;		/* base argument to conversion function */
483 	struct ccl ccl;		/* character class info */
484 	wchar_t buf[BUF];	/* buffer for numeric conversions */
485 	wint_t wi;		/* handy wint_t */
486 
487 	nassigned = 0;
488 	nconversions = 0;
489 	nread = 0;
490 	ccl.start = ccl.end = NULL;
491 	for (;;) {
492 		c = *fmt++;
493 		if (c == 0)
494 			return (nassigned);
495 		if (iswspace(c)) {
496 			while ((c = __fgetwc(fp, locale)) != WEOF &&
497 			    iswspace_l(c, locale))
498 				nread++;
499 			if (c != WEOF)
500 				__ungetwc(c, fp, locale);
501 			continue;
502 		}
503 		if (c != '%')
504 			goto literal;
505 		width = 0;
506 		flags = 0;
507 		/*
508 		 * switch on the format.  continue if done;
509 		 * break once format type is derived.
510 		 */
511 again:		c = *fmt++;
512 		switch (c) {
513 		case '%':
514 literal:
515 			if ((wi = __fgetwc(fp, locale)) == WEOF)
516 				goto input_failure;
517 			if (wi != c) {
518 				__ungetwc(wi, fp, locale);
519 				goto match_failure;
520 			}
521 			nread++;
522 			continue;
523 
524 		case '*':
525 			flags |= SUPPRESS;
526 			goto again;
527 		case 'j':
528 			flags |= INTMAXT;
529 			goto again;
530 		case 'l':
531 			if (flags & LONG) {
532 				flags &= ~LONG;
533 				flags |= LONGLONG;
534 			} else
535 				flags |= LONG;
536 			goto again;
537 		case 'q':
538 			flags |= LONGLONG;	/* not quite */
539 			goto again;
540 		case 't':
541 			flags |= PTRDIFFT;
542 			goto again;
543 		case 'w':
544 			/*
545 			 * Fixed-width integer types.  On all platforms we
546 			 * support, int8_t is equivalent to char, int16_t
547 			 * is equivalent to short, int32_t is equivalent
548 			 * to int, int64_t is equivalent to long long int.
549 			 * Furthermore, int_fast8_t, int_fast16_t and
550 			 * int_fast32_t are equivalent to int, and
551 			 * int_fast64_t is equivalent to long long int.
552 			 */
553 			flags &= ~(SHORTSHORT|SHORT|LONG|LONGLONG|SIZET|INTMAXT|PTRDIFFT);
554 			if (fmt[0] == 'f') {
555 				flags |= FASTINT;
556 				fmt++;
557 			} else {
558 				flags &= ~FASTINT;
559 			}
560 			if (fmt[0] == '8') {
561 				if (!(flags & FASTINT))
562 					flags |= SHORTSHORT;
563 				else
564 					/* no flag set = 32 */ ;
565 				fmt += 1;
566 			} else if (fmt[0] == '1' && fmt[1] == '6') {
567 				if (!(flags & FASTINT))
568 					flags |= SHORT;
569 				else
570 					/* no flag set = 32 */ ;
571 				fmt += 2;
572 			} else if (fmt[0] == '3' && fmt[1] == '2') {
573 				/* no flag set = 32 */ ;
574 				fmt += 2;
575 			} else if (fmt[0] == '6' && fmt[1] == '4') {
576 				flags |= LONGLONG;
577 				fmt += 2;
578 			} else {
579 				goto match_failure;
580 			}
581 			goto again;
582 		case 'z':
583 			flags |= SIZET;
584 			goto again;
585 		case 'L':
586 			flags |= LONGDBL;
587 			goto again;
588 		case 'h':
589 			if (flags & SHORT) {
590 				flags &= ~SHORT;
591 				flags |= SHORTSHORT;
592 			} else
593 				flags |= SHORT;
594 			goto again;
595 
596 		case '0': case '1': case '2': case '3': case '4':
597 		case '5': case '6': case '7': case '8': case '9':
598 			width = width * 10 + c - '0';
599 			goto again;
600 
601 		/*
602 		 * Conversions.
603 		 */
604 		case 'B':
605 		case 'b':
606 			c = CT_INT;
607 			flags |= UNSIGNED;
608 			base = 2;
609 			break;
610 
611 		case 'd':
612 			c = CT_INT;
613 			base = 10;
614 			break;
615 
616 		case 'i':
617 			c = CT_INT;
618 			base = 0;
619 			break;
620 
621 		case 'o':
622 			c = CT_INT;
623 			flags |= UNSIGNED;
624 			base = 8;
625 			break;
626 
627 		case 'u':
628 			c = CT_INT;
629 			flags |= UNSIGNED;
630 			base = 10;
631 			break;
632 
633 		case 'X':
634 		case 'x':
635 			c = CT_INT;
636 			flags |= UNSIGNED;
637 			base = 16;
638 			break;
639 
640 #ifndef NO_FLOATING_POINT
641 		case 'A': case 'E': case 'F': case 'G':
642 		case 'a': case 'e': case 'f': case 'g':
643 			c = CT_FLOAT;
644 			break;
645 #endif
646 
647 		case 'S':
648 			flags |= LONG;
649 			/* FALLTHROUGH */
650 		case 's':
651 			c = CT_STRING;
652 			break;
653 
654 		case '[':
655 			ccl.start = fmt;
656 			if (*fmt == '^') {
657 				ccl.compl = 1;
658 				fmt++;
659 			} else
660 				ccl.compl = 0;
661 			if (*fmt == ']')
662 				fmt++;
663 			while (*fmt != '\0' && *fmt != ']')
664 				fmt++;
665 			ccl.end = fmt;
666 			fmt++;
667 			flags |= NOSKIP;
668 			c = CT_CCL;
669 			break;
670 
671 		case 'C':
672 			flags |= LONG;
673 			/* FALLTHROUGH */
674 		case 'c':
675 			flags |= NOSKIP;
676 			c = CT_CHAR;
677 			break;
678 
679 		case 'p':	/* pointer format is like hex */
680 			flags |= POINTER;
681 			c = CT_INT;		/* assumes sizeof(uintmax_t) */
682 			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
683 			base = 16;
684 			break;
685 
686 		case 'n':
687 			if (flags & SUPPRESS)	/* ??? */
688 				continue;
689 			if (flags & SHORTSHORT)
690 				*va_arg(ap, char *) = nread;
691 			else if (flags & SHORT)
692 				*va_arg(ap, short *) = nread;
693 			else if (flags & LONG)
694 				*va_arg(ap, long *) = nread;
695 			else if (flags & LONGLONG)
696 				*va_arg(ap, long long *) = nread;
697 			else if (flags & INTMAXT)
698 				*va_arg(ap, intmax_t *) = nread;
699 			else if (flags & SIZET)
700 				*va_arg(ap, size_t *) = nread;
701 			else if (flags & PTRDIFFT)
702 				*va_arg(ap, ptrdiff_t *) = nread;
703 			else
704 				*va_arg(ap, int *) = nread;
705 			continue;
706 
707 		default:
708 			goto match_failure;
709 
710 		/*
711 		 * Disgusting backwards compatibility hack.	XXX
712 		 */
713 		case '\0':	/* compat */
714 			return (EOF);
715 		}
716 
717 		/*
718 		 * Consume leading white space, except for formats
719 		 * that suppress this.
720 		 */
721 		if ((flags & NOSKIP) == 0) {
722 			while ((wi = __fgetwc(fp, locale)) != WEOF && iswspace(wi))
723 				nread++;
724 			if (wi == WEOF)
725 				goto input_failure;
726 			__ungetwc(wi, fp, locale);
727 		}
728 
729 		/*
730 		 * Do the conversion.
731 		 */
732 		switch (c) {
733 
734 		case CT_CHAR:
735 			/* scan arbitrary characters (sets NOSKIP) */
736 			if (width == 0)
737 				width = 1;
738 			if (flags & LONG) {
739 				nr = convert_wchar(fp, GETARG(wchar_t *), width,
740 				    locale);
741 			} else {
742 				nr = convert_char(fp, GETARG(char *), width,
743 				    locale);
744 			}
745 			if (nr < 0)
746 				goto input_failure;
747 			break;
748 
749 		case CT_CCL:
750 			/* scan a (nonempty) character class (sets NOSKIP) */
751 			if (width == 0)
752 				width = (size_t)~0;	/* `infinity' */
753 			/* take only those things in the class */
754 			if (flags & LONG) {
755 				nr = convert_wccl(fp, GETARG(wchar_t *), width,
756 				    &ccl, locale);
757 			} else {
758 				nr = convert_ccl(fp, GETARG(char *), width,
759 				    &ccl, locale);
760 			}
761 			if (nr <= 0) {
762 				if (nr < 0)
763 					goto input_failure;
764 				else /* nr == 0 */
765 					goto match_failure;
766 			}
767 			break;
768 
769 		case CT_STRING:
770 			/* like CCL, but zero-length string OK, & no NOSKIP */
771 			if (width == 0)
772 				width = (size_t)~0;
773 			if (flags & LONG) {
774 				nr = convert_wstring(fp, GETARG(wchar_t *),
775 				    width, locale);
776 			} else {
777 				nr = convert_string(fp, GETARG(char *), width,
778 				    locale);
779 			}
780 			if (nr < 0)
781 				goto input_failure;
782 			break;
783 
784 		case CT_INT:
785 			/* scan an integer as if by the conversion function */
786 			if (width == 0 || width > sizeof(buf) /
787 			    sizeof(*buf) - 1)
788 				width = sizeof(buf) / sizeof(*buf) - 1;
789 
790 			nr = parseint(fp, buf, width, base, locale);
791 			if (nr == 0)
792 				goto match_failure;
793 			if ((flags & SUPPRESS) == 0) {
794 				uintmax_t res;
795 
796 				buf[nr] = L'\0';
797 				if ((flags & UNSIGNED) == 0)
798 				    res = wcstoimax(buf, NULL, base);
799 				else
800 				    res = wcstoumax(buf, NULL, base);
801 				if (flags & POINTER)
802 					*va_arg(ap, void **) =
803 							(void *)(uintptr_t)res;
804 				else if (flags & SHORTSHORT)
805 					*va_arg(ap, char *) = res;
806 				else if (flags & SHORT)
807 					*va_arg(ap, short *) = res;
808 				else if (flags & LONG)
809 					*va_arg(ap, long *) = res;
810 				else if (flags & LONGLONG)
811 					*va_arg(ap, long long *) = res;
812 				else if (flags & INTMAXT)
813 					*va_arg(ap, intmax_t *) = res;
814 				else if (flags & PTRDIFFT)
815 					*va_arg(ap, ptrdiff_t *) = res;
816 				else if (flags & SIZET)
817 					*va_arg(ap, size_t *) = res;
818 				else
819 					*va_arg(ap, int *) = res;
820 			}
821 			break;
822 
823 #ifndef NO_FLOATING_POINT
824 		case CT_FLOAT:
825 			/* scan a floating point number as if by strtod */
826 			if (width == 0 || width > sizeof(buf) /
827 			    sizeof(*buf) - 1)
828 				width = sizeof(buf) / sizeof(*buf) - 1;
829 			nr = parsefloat(fp, buf, buf + width, locale);
830 			if (nr == 0)
831 				goto match_failure;
832 			if ((flags & SUPPRESS) == 0) {
833 				if (flags & LONGDBL) {
834 					long double res = wcstold(buf, NULL);
835 					*va_arg(ap, long double *) = res;
836 				} else if (flags & LONG) {
837 					double res = wcstod(buf, NULL);
838 					*va_arg(ap, double *) = res;
839 				} else {
840 					float res = wcstof(buf, NULL);
841 					*va_arg(ap, float *) = res;
842 				}
843 			}
844 			break;
845 #endif /* !NO_FLOATING_POINT */
846 		}
847 		if (!(flags & SUPPRESS))
848 			nassigned++;
849 		nread += nr;
850 		nconversions++;
851 	}
852 input_failure:
853 	return (nconversions != 0 ? nassigned : EOF);
854 match_failure:
855 	return (nassigned);
856 }
857 
858 #ifndef NO_FLOATING_POINT
859 static int
860 parsefloat(FILE *fp, wchar_t *buf, wchar_t *end, locale_t locale)
861 {
862 	mbstate_t mbs;
863 	size_t nconv;
864 	wchar_t *commit, *p;
865 	int infnanpos = 0;
866 	enum {
867 		S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX,
868 		S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
869 	} state = S_START;
870 	wchar_t c;
871 	wchar_t decpt;
872 	_Bool gotmantdig = 0, ishex = 0;
873 
874 	mbs = initial_mbs;
875 	nconv = mbrtowc(&decpt, localeconv()->decimal_point, MB_CUR_MAX, &mbs);
876 	if (nconv == (size_t)-1 || nconv == (size_t)-2)
877 		decpt = '.';	/* failsafe */
878 
879 	/*
880 	 * We set commit = p whenever the string we have read so far
881 	 * constitutes a valid representation of a floating point
882 	 * number by itself.  At some point, the parse will complete
883 	 * or fail, and we will ungetc() back to the last commit point.
884 	 * To ensure that the file offset gets updated properly, it is
885 	 * always necessary to read at least one character that doesn't
886 	 * match; thus, we can't short-circuit "infinity" or "nan(...)".
887 	 */
888 	commit = buf - 1;
889 	c = WEOF;
890 	for (p = buf; p < end; ) {
891 		if ((c = __fgetwc(fp, locale)) == WEOF)
892 			break;
893 reswitch:
894 		switch (state) {
895 		case S_START:
896 			state = S_GOTSIGN;
897 			if (c == '-' || c == '+')
898 				break;
899 			else
900 				goto reswitch;
901 		case S_GOTSIGN:
902 			switch (c) {
903 			case '0':
904 				state = S_MAYBEHEX;
905 				commit = p;
906 				break;
907 			case 'I':
908 			case 'i':
909 				state = S_INF;
910 				break;
911 			case 'N':
912 			case 'n':
913 				state = S_NAN;
914 				break;
915 			default:
916 				state = S_DIGITS;
917 				goto reswitch;
918 			}
919 			break;
920 		case S_INF:
921 			if (infnanpos > 6 ||
922 			    (c != "nfinity"[infnanpos] &&
923 			     c != "NFINITY"[infnanpos]))
924 				goto parsedone;
925 			if (infnanpos == 1 || infnanpos == 6)
926 				commit = p;	/* inf or infinity */
927 			infnanpos++;
928 			break;
929 		case S_NAN:
930 			switch (infnanpos) {
931 			case 0:
932 				if (c != 'A' && c != 'a')
933 					goto parsedone;
934 				break;
935 			case 1:
936 				if (c != 'N' && c != 'n')
937 					goto parsedone;
938 				else
939 					commit = p;
940 				break;
941 			case 2:
942 				if (c != '(')
943 					goto parsedone;
944 				break;
945 			default:
946 				if (c == ')') {
947 					commit = p;
948 					state = S_DONE;
949 				} else if (!iswalnum(c) && c != '_')
950 					goto parsedone;
951 				break;
952 			}
953 			infnanpos++;
954 			break;
955 		case S_DONE:
956 			goto parsedone;
957 		case S_MAYBEHEX:
958 			state = S_DIGITS;
959 			if (c == 'X' || c == 'x') {
960 				ishex = 1;
961 				break;
962 			} else {	/* we saw a '0', but no 'x' */
963 				gotmantdig = 1;
964 				goto reswitch;
965 			}
966 		case S_DIGITS:
967 			if ((ishex && iswxdigit(c)) || iswdigit(c))
968 				gotmantdig = 1;
969 			else {
970 				state = S_FRAC;
971 				if (c != decpt)
972 					goto reswitch;
973 			}
974 			if (gotmantdig)
975 				commit = p;
976 			break;
977 		case S_FRAC:
978 			if (((c == 'E' || c == 'e') && !ishex) ||
979 			    ((c == 'P' || c == 'p') && ishex)) {
980 				if (!gotmantdig)
981 					goto parsedone;
982 				else
983 					state = S_EXP;
984 			} else if ((ishex && iswxdigit(c)) || iswdigit(c)) {
985 				commit = p;
986 				gotmantdig = 1;
987 			} else
988 				goto parsedone;
989 			break;
990 		case S_EXP:
991 			state = S_EXPDIGITS;
992 			if (c == '-' || c == '+')
993 				break;
994 			else
995 				goto reswitch;
996 		case S_EXPDIGITS:
997 			if (iswdigit(c))
998 				commit = p;
999 			else
1000 				goto parsedone;
1001 			break;
1002 		default:
1003 			abort();
1004 		}
1005 		*p++ = c;
1006 		c = WEOF;
1007 	}
1008 
1009 parsedone:
1010 	if (c != WEOF)
1011 		__ungetwc(c, fp, locale);
1012 	while (commit < --p)
1013 		__ungetwc(*p, fp, locale);
1014 	*++commit = '\0';
1015 	return (commit - buf);
1016 }
1017 #endif
1018