xref: /freebsd/lib/libc/stdio/vfwscanf.c (revision 02e9120893770924227138ba49df1edb3896112a)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1990, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Chris Torek.
9  *
10  * Copyright (c) 2011 The FreeBSD Foundation
11  *
12  * Copyright (c) 2023 Dag-Erling Smørgrav
13  *
14  * Portions of this software were developed by David Chisnall
15  * under sponsorship from the FreeBSD Foundation.
16  *
17  * Redistribution and use in source and binary forms, with or without
18  * modification, are permitted provided that the following conditions
19  * are met:
20  * 1. Redistributions of source code must retain the above copyright
21  *    notice, this list of conditions and the following disclaimer.
22  * 2. Redistributions in binary form must reproduce the above copyright
23  *    notice, this list of conditions and the following disclaimer in the
24  *    documentation and/or other materials provided with the distribution.
25  * 3. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  */
41 
42 #include "namespace.h"
43 #include <ctype.h>
44 #include <inttypes.h>
45 #include <limits.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <stddef.h>
49 #include <stdarg.h>
50 #include <string.h>
51 #include <wchar.h>
52 #include <wctype.h>
53 #include "un-namespace.h"
54 
55 #include "libc_private.h"
56 #include "local.h"
57 #include "xlocale_private.h"
58 
59 #define	BUF		513	/* Maximum length of numeric string. */
60 
61 /*
62  * Flags used during conversion.
63  */
64 #define	LONG		0x01	/* l: long or double */
65 #define	LONGDBL		0x02	/* L: long double */
66 #define	SHORT		0x04	/* h: short */
67 #define	SUPPRESS	0x08	/* *: suppress assignment */
68 #define	POINTER		0x10	/* p: void * (as hex) */
69 #define	NOSKIP		0x20	/* [ or c: do not skip blanks */
70 #define FASTINT		0x200	/* wfN: int_fastN_t */
71 #define	LONGLONG	0x400	/* ll: long long (+ deprecated q: quad) */
72 #define	INTMAXT		0x800	/* j: intmax_t */
73 #define	PTRDIFFT	0x1000	/* t: ptrdiff_t */
74 #define	SIZET		0x2000	/* z: size_t */
75 #define	SHORTSHORT	0x4000	/* hh: char */
76 #define	UNSIGNED	0x8000	/* %[oupxX] conversions */
77 
78 /*
79  * Conversion types.
80  */
81 #define	CT_CHAR		0	/* %c conversion */
82 #define	CT_CCL		1	/* %[...] conversion */
83 #define	CT_STRING	2	/* %s conversion */
84 #define	CT_INT		3	/* %[dioupxX] conversion */
85 #define	CT_FLOAT	4	/* %[efgEFG] conversion */
86 
87 #ifndef NO_FLOATING_POINT
88 static int parsefloat(FILE *, wchar_t *, wchar_t *, locale_t);
89 #endif
90 
91 struct ccl {
92 	const wchar_t *start;	/* character class start */
93 	const wchar_t *end;	/* character class end */
94 	int compl;		/* ccl is complemented? */
95 };
96 
97 static __inline int
98 inccl(const struct ccl *ccl, wint_t wi)
99 {
100 
101 	if (ccl->compl) {
102 		return (wmemchr(ccl->start, wi, ccl->end - ccl->start)
103 		    == NULL);
104 	} else {
105 		return (wmemchr(ccl->start, wi, ccl->end - ccl->start) != NULL);
106 	}
107 }
108 
109 /*
110  * Conversion functions are passed a pointer to this object instead of
111  * a real parameter to indicate that the assignment-suppression (*)
112  * flag was specified.  We could use a NULL pointer to indicate this,
113  * but that would mask bugs in applications that call scanf() with a
114  * NULL pointer.
115  */
116 static const int suppress;
117 #define	SUPPRESS_PTR	((void *)&suppress)
118 
119 static const mbstate_t initial_mbs;
120 
121 /*
122  * The following conversion functions return the number of characters consumed,
123  * or -1 on input failure.  Character class conversion returns 0 on match
124  * failure.
125  */
126 
127 static __inline int
128 convert_char(FILE *fp, char * mbp, int width, locale_t locale)
129 {
130 	mbstate_t mbs;
131 	size_t nconv;
132 	wint_t wi;
133 	int n;
134 
135 	n = 0;
136 	mbs = initial_mbs;
137 	while (width-- != 0 && (wi = __fgetwc(fp, locale)) != WEOF) {
138 		if (mbp != SUPPRESS_PTR) {
139 			nconv = wcrtomb(mbp, wi, &mbs);
140 			if (nconv == (size_t)-1)
141 				return (-1);
142 			mbp += nconv;
143 		}
144 		n++;
145 	}
146 	if (n == 0)
147 		return (-1);
148 	return (n);
149 }
150 
151 static __inline int
152 convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale)
153 {
154 	wint_t wi;
155 	int n;
156 
157 	n = 0;
158 	while (width-- != 0 && (wi = __fgetwc(fp, locale)) != WEOF) {
159 		if (wcp != SUPPRESS_PTR)
160 			*wcp++ = (wchar_t)wi;
161 		n++;
162 	}
163 	if (n == 0)
164 		return (-1);
165 	return (n);
166 }
167 
168 static __inline int
169 convert_ccl(FILE *fp, char * mbp, int width, const struct ccl *ccl,
170     locale_t locale)
171 {
172 	mbstate_t mbs;
173 	size_t nconv;
174 	wint_t wi;
175 	int n;
176 
177 	n = 0;
178 	mbs = initial_mbs;
179 	while ((wi = __fgetwc(fp, locale)) != WEOF &&
180 	    width-- != 0 && inccl(ccl, wi)) {
181 		if (mbp != SUPPRESS_PTR) {
182 			nconv = wcrtomb(mbp, wi, &mbs);
183 			if (nconv == (size_t)-1)
184 				return (-1);
185 			mbp += nconv;
186 		}
187 		n++;
188 	}
189 	if (wi != WEOF)
190 		__ungetwc(wi, fp, locale);
191 	if (mbp != SUPPRESS_PTR)
192 		*mbp = 0;
193 	return (n);
194 }
195 
196 static __inline int
197 convert_wccl(FILE *fp, wchar_t *wcp, int width, const struct ccl *ccl,
198     locale_t locale)
199 {
200 	wchar_t *wcp0;
201 	wint_t wi;
202 	int n;
203 
204 	if (wcp == SUPPRESS_PTR) {
205 		n = 0;
206 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
207 		    width-- != 0 && inccl(ccl, wi))
208 			n++;
209 		if (wi != WEOF)
210 			__ungetwc(wi, fp, locale);
211 	} else {
212 		wcp0 = wcp;
213 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
214 		    width-- != 0 && inccl(ccl, wi))
215 			*wcp++ = (wchar_t)wi;
216 		if (wi != WEOF)
217 			__ungetwc(wi, fp, locale);
218 		n = wcp - wcp0;
219 		if (n == 0)
220 			return (0);
221 		*wcp = 0;
222 	}
223 	return (n);
224 }
225 
226 static __inline int
227 convert_string(FILE *fp, char * mbp, int width, locale_t locale)
228 {
229 	mbstate_t mbs;
230 	size_t nconv;
231 	wint_t wi;
232 	int nread;
233 
234 	mbs = initial_mbs;
235 	nread = 0;
236 	while ((wi = __fgetwc(fp, locale)) != WEOF && width-- != 0 &&
237 	    !iswspace(wi)) {
238 		if (mbp != SUPPRESS_PTR) {
239 			nconv = wcrtomb(mbp, wi, &mbs);
240 			if (nconv == (size_t)-1)
241 				return (-1);
242 			mbp += nconv;
243 		}
244 		nread++;
245 	}
246 	if (wi != WEOF)
247 		__ungetwc(wi, fp, locale);
248 	if (mbp != SUPPRESS_PTR)
249 		*mbp = 0;
250 	return (nread);
251 }
252 
253 static __inline int
254 convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
255 {
256 	wint_t wi;
257 	int nread;
258 
259 	nread = 0;
260 	if (wcp == SUPPRESS_PTR) {
261 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
262 		    width-- != 0 && !iswspace(wi))
263 			nread++;
264 		if (wi != WEOF)
265 			__ungetwc(wi, fp, locale);
266 	} else {
267 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
268 		    width-- != 0 && !iswspace(wi)) {
269 			*wcp++ = (wchar_t)wi;
270 			nread++;
271 		}
272 		if (wi != WEOF)
273 			__ungetwc(wi, fp, locale);
274 		*wcp = '\0';
275 	}
276 	return (nread);
277 }
278 
279 enum parseint_state {
280 	begin,
281 	havesign,
282 	havezero,
283 	haveprefix,
284 	any,
285 };
286 
287 static __inline int
288 parseint_fsm(wchar_t c, enum parseint_state *state, int *base)
289 {
290 	switch (c) {
291 	case '+':
292 	case '-':
293 		if (*state == begin) {
294 			*state = havesign;
295 			return 1;
296 		}
297 		break;
298 	case '0':
299 		if (*state == begin || *state == havesign) {
300 			*state = havezero;
301 		} else {
302 			*state = any;
303 		}
304 		return 1;
305 	case '1':
306 	case '2':
307 	case '3':
308 	case '4':
309 	case '5':
310 	case '6':
311 	case '7':
312 		if (*state == havezero && *base == 0) {
313 			*base = 8;
314 		}
315 		/* FALL THROUGH */
316 	case '8':
317 	case '9':
318 		if (*state == begin ||
319 		    *state == havesign) {
320 			if (*base == 0) {
321 				*base = 10;
322 			}
323 		}
324 		if (*state == begin ||
325 		    *state == havesign ||
326 		    *state == havezero ||
327 		    *state == haveprefix ||
328 		    *state == any) {
329 			if (*base > c - '0') {
330 				*state = any;
331 				return 1;
332 			}
333 		}
334 		break;
335 	case 'b':
336 		if (*state == havezero) {
337 			if (*base == 0 || *base == 2) {
338 				*state = haveprefix;
339 				*base = 2;
340 				return 1;
341 			}
342 		}
343 		/* FALL THROUGH */
344 	case 'a':
345 	case 'c':
346 	case 'd':
347 	case 'e':
348 	case 'f':
349 		if (*state == begin ||
350 		    *state == havesign ||
351 		    *state == havezero ||
352 		    *state == haveprefix ||
353 		    *state == any) {
354 			if (*base > c - 'a' + 10) {
355 				*state = any;
356 				return 1;
357 			}
358 		}
359 		break;
360 	case 'B':
361 		if (*state == havezero) {
362 			if (*base == 0 || *base == 2) {
363 				*state = haveprefix;
364 				*base = 2;
365 				return 1;
366 			}
367 		}
368 		/* FALL THROUGH */
369 	case 'A':
370 	case 'C':
371 	case 'D':
372 	case 'E':
373 	case 'F':
374 		if (*state == begin ||
375 		    *state == havesign ||
376 		    *state == havezero ||
377 		    *state == haveprefix ||
378 		    *state == any) {
379 			if (*base > c - 'A' + 10) {
380 				*state = any;
381 				return 1;
382 			}
383 		}
384 		break;
385 	case 'x':
386 	case 'X':
387 		if (*state == havezero) {
388 			if (*base == 0 || *base == 16) {
389 				*state = haveprefix;
390 				*base = 16;
391 				return 1;
392 			}
393 		}
394 		break;
395 	}
396 	return 0;
397 }
398 
399 /*
400  * Read an integer, storing it in buf.
401  *
402  * Return 0 on a match failure, and the number of characters read
403  * otherwise.
404  */
405 static __inline int
406 parseint(FILE *fp, wchar_t * __restrict buf, int width, int base,
407     locale_t locale)
408 {
409 	enum parseint_state state = begin;
410 	wchar_t *wcp;
411 	int c;
412 
413 	for (wcp = buf; width; width--) {
414 		c = __fgetwc(fp, locale);
415 		if (c == WEOF)
416 			break;
417 		if (!parseint_fsm(c, &state, &base))
418 			break;
419 		*wcp++ = (wchar_t)c;
420 	}
421 	/*
422 	 * If we only had a sign, push it back.  If we only had a 0b or 0x
423 	 * prefix (possibly preceded by a sign), we view it as "0" and
424 	 * push back the letter.  In all other cases, if we stopped
425 	 * because we read a non-number character, push it back.
426 	 */
427 	if (state == havesign) {
428 		wcp--;
429 		__ungetwc(*wcp, fp, locale);
430 	} else if (state == haveprefix) {
431 		wcp--;
432 		__ungetwc(c, fp, locale);
433 	} else if (width && c != WEOF) {
434 		__ungetwc(c, fp, locale);
435 	}
436 	return (wcp - buf);
437 }
438 
439 /*
440  * MT-safe version.
441  */
442 int
443 vfwscanf_l(FILE * __restrict fp, locale_t locale,
444 		const wchar_t * __restrict fmt, va_list ap)
445 {
446 	int ret;
447 	FIX_LOCALE(locale);
448 
449 	FLOCKFILE_CANCELSAFE(fp);
450 	ORIENT(fp, 1);
451 	ret = __vfwscanf(fp, locale, fmt, ap);
452 	FUNLOCKFILE_CANCELSAFE();
453 	return (ret);
454 }
455 int
456 vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap)
457 {
458 	return vfwscanf_l(fp, __get_locale(), fmt, ap);
459 }
460 
461 /*
462  * Non-MT-safe version.
463  */
464 int
465 __vfwscanf(FILE * __restrict fp, locale_t locale,
466 		const wchar_t * __restrict fmt, va_list ap)
467 {
468 #define	GETARG(type)	((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type))
469 	wint_t c;		/* character from format, or conversion */
470 	size_t width;		/* field width, or 0 */
471 	int flags;		/* flags as defined above */
472 	int nassigned;		/* number of fields assigned */
473 	int nconversions;	/* number of conversions */
474 	int nr;			/* characters read by the current conversion */
475 	int nread;		/* number of characters consumed from fp */
476 	int base;		/* base argument to conversion function */
477 	struct ccl ccl;		/* character class info */
478 	wchar_t buf[BUF];	/* buffer for numeric conversions */
479 	wint_t wi;		/* handy wint_t */
480 
481 	nassigned = 0;
482 	nconversions = 0;
483 	nread = 0;
484 	ccl.start = ccl.end = NULL;
485 	for (;;) {
486 		c = *fmt++;
487 		if (c == 0)
488 			return (nassigned);
489 		if (iswspace(c)) {
490 			while ((c = __fgetwc(fp, locale)) != WEOF &&
491 			    iswspace_l(c, locale))
492 				nread++;
493 			if (c != WEOF)
494 				__ungetwc(c, fp, locale);
495 			continue;
496 		}
497 		if (c != '%')
498 			goto literal;
499 		width = 0;
500 		flags = 0;
501 		/*
502 		 * switch on the format.  continue if done;
503 		 * break once format type is derived.
504 		 */
505 again:		c = *fmt++;
506 		switch (c) {
507 		case '%':
508 literal:
509 			if ((wi = __fgetwc(fp, locale)) == WEOF)
510 				goto input_failure;
511 			if (wi != c) {
512 				__ungetwc(wi, fp, locale);
513 				goto match_failure;
514 			}
515 			nread++;
516 			continue;
517 
518 		case '*':
519 			flags |= SUPPRESS;
520 			goto again;
521 		case 'j':
522 			flags |= INTMAXT;
523 			goto again;
524 		case 'l':
525 			if (flags & LONG) {
526 				flags &= ~LONG;
527 				flags |= LONGLONG;
528 			} else
529 				flags |= LONG;
530 			goto again;
531 		case 'q':
532 			flags |= LONGLONG;	/* not quite */
533 			goto again;
534 		case 't':
535 			flags |= PTRDIFFT;
536 			goto again;
537 		case 'w':
538 			/*
539 			 * Fixed-width integer types.  On all platforms we
540 			 * support, int8_t is equivalent to char, int16_t
541 			 * is equivalent to short, int32_t is equivalent
542 			 * to int, int64_t is equivalent to long long int.
543 			 * Furthermore, int_fast8_t, int_fast16_t and
544 			 * int_fast32_t are equivalent to int, and
545 			 * int_fast64_t is equivalent to long long int.
546 			 */
547 			flags &= ~(SHORTSHORT|SHORT|LONG|LONGLONG|SIZET|INTMAXT|PTRDIFFT);
548 			if (fmt[0] == 'f') {
549 				flags |= FASTINT;
550 				fmt++;
551 			} else {
552 				flags &= ~FASTINT;
553 			}
554 			if (fmt[0] == '8') {
555 				if (!(flags & FASTINT))
556 					flags |= SHORTSHORT;
557 				else
558 					/* no flag set = 32 */ ;
559 				fmt += 1;
560 			} else if (fmt[0] == '1' && fmt[1] == '6') {
561 				if (!(flags & FASTINT))
562 					flags |= SHORT;
563 				else
564 					/* no flag set = 32 */ ;
565 				fmt += 2;
566 			} else if (fmt[0] == '3' && fmt[1] == '2') {
567 				/* no flag set = 32 */ ;
568 				fmt += 2;
569 			} else if (fmt[0] == '6' && fmt[1] == '4') {
570 				flags |= LONGLONG;
571 				fmt += 2;
572 			} else {
573 				goto match_failure;
574 			}
575 			goto again;
576 		case 'z':
577 			flags |= SIZET;
578 			goto again;
579 		case 'L':
580 			flags |= LONGDBL;
581 			goto again;
582 		case 'h':
583 			if (flags & SHORT) {
584 				flags &= ~SHORT;
585 				flags |= SHORTSHORT;
586 			} else
587 				flags |= SHORT;
588 			goto again;
589 
590 		case '0': case '1': case '2': case '3': case '4':
591 		case '5': case '6': case '7': case '8': case '9':
592 			width = width * 10 + c - '0';
593 			goto again;
594 
595 		/*
596 		 * Conversions.
597 		 */
598 		case 'B':
599 		case 'b':
600 			c = CT_INT;
601 			flags |= UNSIGNED;
602 			base = 2;
603 			break;
604 
605 		case 'd':
606 			c = CT_INT;
607 			base = 10;
608 			break;
609 
610 		case 'i':
611 			c = CT_INT;
612 			base = 0;
613 			break;
614 
615 		case 'o':
616 			c = CT_INT;
617 			flags |= UNSIGNED;
618 			base = 8;
619 			break;
620 
621 		case 'u':
622 			c = CT_INT;
623 			flags |= UNSIGNED;
624 			base = 10;
625 			break;
626 
627 		case 'X':
628 		case 'x':
629 			c = CT_INT;
630 			flags |= UNSIGNED;
631 			base = 16;
632 			break;
633 
634 #ifndef NO_FLOATING_POINT
635 		case 'A': case 'E': case 'F': case 'G':
636 		case 'a': case 'e': case 'f': case 'g':
637 			c = CT_FLOAT;
638 			break;
639 #endif
640 
641 		case 'S':
642 			flags |= LONG;
643 			/* FALLTHROUGH */
644 		case 's':
645 			c = CT_STRING;
646 			break;
647 
648 		case '[':
649 			ccl.start = fmt;
650 			if (*fmt == '^') {
651 				ccl.compl = 1;
652 				fmt++;
653 			} else
654 				ccl.compl = 0;
655 			if (*fmt == ']')
656 				fmt++;
657 			while (*fmt != '\0' && *fmt != ']')
658 				fmt++;
659 			ccl.end = fmt;
660 			fmt++;
661 			flags |= NOSKIP;
662 			c = CT_CCL;
663 			break;
664 
665 		case 'C':
666 			flags |= LONG;
667 			/* FALLTHROUGH */
668 		case 'c':
669 			flags |= NOSKIP;
670 			c = CT_CHAR;
671 			break;
672 
673 		case 'p':	/* pointer format is like hex */
674 			flags |= POINTER;
675 			c = CT_INT;		/* assumes sizeof(uintmax_t) */
676 			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
677 			base = 16;
678 			break;
679 
680 		case 'n':
681 			if (flags & SUPPRESS)	/* ??? */
682 				continue;
683 			if (flags & SHORTSHORT)
684 				*va_arg(ap, char *) = nread;
685 			else if (flags & SHORT)
686 				*va_arg(ap, short *) = nread;
687 			else if (flags & LONG)
688 				*va_arg(ap, long *) = nread;
689 			else if (flags & LONGLONG)
690 				*va_arg(ap, long long *) = nread;
691 			else if (flags & INTMAXT)
692 				*va_arg(ap, intmax_t *) = nread;
693 			else if (flags & SIZET)
694 				*va_arg(ap, size_t *) = nread;
695 			else if (flags & PTRDIFFT)
696 				*va_arg(ap, ptrdiff_t *) = nread;
697 			else
698 				*va_arg(ap, int *) = nread;
699 			continue;
700 
701 		default:
702 			goto match_failure;
703 
704 		/*
705 		 * Disgusting backwards compatibility hack.	XXX
706 		 */
707 		case '\0':	/* compat */
708 			return (EOF);
709 		}
710 
711 		/*
712 		 * Consume leading white space, except for formats
713 		 * that suppress this.
714 		 */
715 		if ((flags & NOSKIP) == 0) {
716 			while ((wi = __fgetwc(fp, locale)) != WEOF && iswspace(wi))
717 				nread++;
718 			if (wi == WEOF)
719 				goto input_failure;
720 			__ungetwc(wi, fp, locale);
721 		}
722 
723 		/*
724 		 * Do the conversion.
725 		 */
726 		switch (c) {
727 
728 		case CT_CHAR:
729 			/* scan arbitrary characters (sets NOSKIP) */
730 			if (width == 0)
731 				width = 1;
732 			if (flags & LONG) {
733 				nr = convert_wchar(fp, GETARG(wchar_t *), width,
734 				    locale);
735 			} else {
736 				nr = convert_char(fp, GETARG(char *), width,
737 				    locale);
738 			}
739 			if (nr < 0)
740 				goto input_failure;
741 			break;
742 
743 		case CT_CCL:
744 			/* scan a (nonempty) character class (sets NOSKIP) */
745 			if (width == 0)
746 				width = (size_t)~0;	/* `infinity' */
747 			/* take only those things in the class */
748 			if (flags & LONG) {
749 				nr = convert_wccl(fp, GETARG(wchar_t *), width,
750 				    &ccl, locale);
751 			} else {
752 				nr = convert_ccl(fp, GETARG(char *), width,
753 				    &ccl, locale);
754 			}
755 			if (nr <= 0) {
756 				if (nr < 0)
757 					goto input_failure;
758 				else /* nr == 0 */
759 					goto match_failure;
760 			}
761 			break;
762 
763 		case CT_STRING:
764 			/* like CCL, but zero-length string OK, & no NOSKIP */
765 			if (width == 0)
766 				width = (size_t)~0;
767 			if (flags & LONG) {
768 				nr = convert_wstring(fp, GETARG(wchar_t *),
769 				    width, locale);
770 			} else {
771 				nr = convert_string(fp, GETARG(char *), width,
772 				    locale);
773 			}
774 			if (nr < 0)
775 				goto input_failure;
776 			break;
777 
778 		case CT_INT:
779 			/* scan an integer as if by the conversion function */
780 			if (width == 0 || width > sizeof(buf) /
781 			    sizeof(*buf) - 1)
782 				width = sizeof(buf) / sizeof(*buf) - 1;
783 
784 			nr = parseint(fp, buf, width, base, locale);
785 			if (nr == 0)
786 				goto match_failure;
787 			if ((flags & SUPPRESS) == 0) {
788 				uintmax_t res;
789 
790 				buf[nr] = L'\0';
791 				if ((flags & UNSIGNED) == 0)
792 				    res = wcstoimax(buf, NULL, base);
793 				else
794 				    res = wcstoumax(buf, NULL, base);
795 				if (flags & POINTER)
796 					*va_arg(ap, void **) =
797 							(void *)(uintptr_t)res;
798 				else if (flags & SHORTSHORT)
799 					*va_arg(ap, char *) = res;
800 				else if (flags & SHORT)
801 					*va_arg(ap, short *) = res;
802 				else if (flags & LONG)
803 					*va_arg(ap, long *) = res;
804 				else if (flags & LONGLONG)
805 					*va_arg(ap, long long *) = res;
806 				else if (flags & INTMAXT)
807 					*va_arg(ap, intmax_t *) = res;
808 				else if (flags & PTRDIFFT)
809 					*va_arg(ap, ptrdiff_t *) = res;
810 				else if (flags & SIZET)
811 					*va_arg(ap, size_t *) = res;
812 				else
813 					*va_arg(ap, int *) = res;
814 			}
815 			break;
816 
817 #ifndef NO_FLOATING_POINT
818 		case CT_FLOAT:
819 			/* scan a floating point number as if by strtod */
820 			if (width == 0 || width > sizeof(buf) /
821 			    sizeof(*buf) - 1)
822 				width = sizeof(buf) / sizeof(*buf) - 1;
823 			nr = parsefloat(fp, buf, buf + width, locale);
824 			if (nr == 0)
825 				goto match_failure;
826 			if ((flags & SUPPRESS) == 0) {
827 				if (flags & LONGDBL) {
828 					long double res = wcstold(buf, NULL);
829 					*va_arg(ap, long double *) = res;
830 				} else if (flags & LONG) {
831 					double res = wcstod(buf, NULL);
832 					*va_arg(ap, double *) = res;
833 				} else {
834 					float res = wcstof(buf, NULL);
835 					*va_arg(ap, float *) = res;
836 				}
837 			}
838 			break;
839 #endif /* !NO_FLOATING_POINT */
840 		}
841 		if (!(flags & SUPPRESS))
842 			nassigned++;
843 		nread += nr;
844 		nconversions++;
845 	}
846 input_failure:
847 	return (nconversions != 0 ? nassigned : EOF);
848 match_failure:
849 	return (nassigned);
850 }
851 
852 #ifndef NO_FLOATING_POINT
853 static int
854 parsefloat(FILE *fp, wchar_t *buf, wchar_t *end, locale_t locale)
855 {
856 	mbstate_t mbs;
857 	size_t nconv;
858 	wchar_t *commit, *p;
859 	int infnanpos = 0;
860 	enum {
861 		S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX,
862 		S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
863 	} state = S_START;
864 	wchar_t c;
865 	wchar_t decpt;
866 	_Bool gotmantdig = 0, ishex = 0;
867 
868 	mbs = initial_mbs;
869 	nconv = mbrtowc(&decpt, localeconv()->decimal_point, MB_CUR_MAX, &mbs);
870 	if (nconv == (size_t)-1 || nconv == (size_t)-2)
871 		decpt = '.';	/* failsafe */
872 
873 	/*
874 	 * We set commit = p whenever the string we have read so far
875 	 * constitutes a valid representation of a floating point
876 	 * number by itself.  At some point, the parse will complete
877 	 * or fail, and we will ungetc() back to the last commit point.
878 	 * To ensure that the file offset gets updated properly, it is
879 	 * always necessary to read at least one character that doesn't
880 	 * match; thus, we can't short-circuit "infinity" or "nan(...)".
881 	 */
882 	commit = buf - 1;
883 	c = WEOF;
884 	for (p = buf; p < end; ) {
885 		if ((c = __fgetwc(fp, locale)) == WEOF)
886 			break;
887 reswitch:
888 		switch (state) {
889 		case S_START:
890 			state = S_GOTSIGN;
891 			if (c == '-' || c == '+')
892 				break;
893 			else
894 				goto reswitch;
895 		case S_GOTSIGN:
896 			switch (c) {
897 			case '0':
898 				state = S_MAYBEHEX;
899 				commit = p;
900 				break;
901 			case 'I':
902 			case 'i':
903 				state = S_INF;
904 				break;
905 			case 'N':
906 			case 'n':
907 				state = S_NAN;
908 				break;
909 			default:
910 				state = S_DIGITS;
911 				goto reswitch;
912 			}
913 			break;
914 		case S_INF:
915 			if (infnanpos > 6 ||
916 			    (c != "nfinity"[infnanpos] &&
917 			     c != "NFINITY"[infnanpos]))
918 				goto parsedone;
919 			if (infnanpos == 1 || infnanpos == 6)
920 				commit = p;	/* inf or infinity */
921 			infnanpos++;
922 			break;
923 		case S_NAN:
924 			switch (infnanpos) {
925 			case 0:
926 				if (c != 'A' && c != 'a')
927 					goto parsedone;
928 				break;
929 			case 1:
930 				if (c != 'N' && c != 'n')
931 					goto parsedone;
932 				else
933 					commit = p;
934 				break;
935 			case 2:
936 				if (c != '(')
937 					goto parsedone;
938 				break;
939 			default:
940 				if (c == ')') {
941 					commit = p;
942 					state = S_DONE;
943 				} else if (!iswalnum(c) && c != '_')
944 					goto parsedone;
945 				break;
946 			}
947 			infnanpos++;
948 			break;
949 		case S_DONE:
950 			goto parsedone;
951 		case S_MAYBEHEX:
952 			state = S_DIGITS;
953 			if (c == 'X' || c == 'x') {
954 				ishex = 1;
955 				break;
956 			} else {	/* we saw a '0', but no 'x' */
957 				gotmantdig = 1;
958 				goto reswitch;
959 			}
960 		case S_DIGITS:
961 			if ((ishex && iswxdigit(c)) || iswdigit(c))
962 				gotmantdig = 1;
963 			else {
964 				state = S_FRAC;
965 				if (c != decpt)
966 					goto reswitch;
967 			}
968 			if (gotmantdig)
969 				commit = p;
970 			break;
971 		case S_FRAC:
972 			if (((c == 'E' || c == 'e') && !ishex) ||
973 			    ((c == 'P' || c == 'p') && ishex)) {
974 				if (!gotmantdig)
975 					goto parsedone;
976 				else
977 					state = S_EXP;
978 			} else if ((ishex && iswxdigit(c)) || iswdigit(c)) {
979 				commit = p;
980 				gotmantdig = 1;
981 			} else
982 				goto parsedone;
983 			break;
984 		case S_EXP:
985 			state = S_EXPDIGITS;
986 			if (c == '-' || c == '+')
987 				break;
988 			else
989 				goto reswitch;
990 		case S_EXPDIGITS:
991 			if (iswdigit(c))
992 				commit = p;
993 			else
994 				goto parsedone;
995 			break;
996 		default:
997 			abort();
998 		}
999 		*p++ = c;
1000 		c = WEOF;
1001 	}
1002 
1003 parsedone:
1004 	if (c != WEOF)
1005 		__ungetwc(c, fp, locale);
1006 	while (commit < --p)
1007 		__ungetwc(*p, fp, locale);
1008 	*++commit = '\0';
1009 	return (commit - buf);
1010 }
1011 #endif
1012