xref: /freebsd/lib/libc/stdio/vfwscanf.c (revision 602e98dd35ea5041b800fb56a2b1ac34f6649310)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1990, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Chris Torek.
9  *
10  * Copyright (c) 2011 The FreeBSD Foundation
11  *
12  * Copyright (c) 2023 Dag-Erling Smørgrav
13  *
14  * Portions of this software were developed by David Chisnall
15  * under sponsorship from the FreeBSD Foundation.
16  *
17  * Redistribution and use in source and binary forms, with or without
18  * modification, are permitted provided that the following conditions
19  * are met:
20  * 1. Redistributions of source code must retain the above copyright
21  *    notice, this list of conditions and the following disclaimer.
22  * 2. Redistributions in binary form must reproduce the above copyright
23  *    notice, this list of conditions and the following disclaimer in the
24  *    documentation and/or other materials provided with the distribution.
25  * 3. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  */
41 
42 #include "namespace.h"
43 #include <ctype.h>
44 #include <inttypes.h>
45 #include <limits.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <stddef.h>
49 #include <stdarg.h>
50 #include <string.h>
51 #include <wchar.h>
52 #include <wctype.h>
53 #include "un-namespace.h"
54 
55 #include "libc_private.h"
56 #include "local.h"
57 #include "xlocale_private.h"
58 
59 #define	BUF		513	/* Maximum length of numeric string. */
60 
61 /*
62  * Flags used during conversion.
63  */
64 #define	LONG		0x01	/* l: long or double */
65 #define	LONGDBL		0x02	/* L: long double */
66 #define	SHORT		0x04	/* h: short */
67 #define	SUPPRESS	0x08	/* *: suppress assignment */
68 #define	POINTER		0x10	/* p: void * (as hex) */
69 #define	NOSKIP		0x20	/* [ or c: do not skip blanks */
70 #define FASTINT		0x200	/* wfN: int_fastN_t */
71 #define	LONGLONG	0x400	/* ll: long long (+ deprecated q: quad) */
72 #define	INTMAXT		0x800	/* j: intmax_t */
73 #define	PTRDIFFT	0x1000	/* t: ptrdiff_t */
74 #define	SIZET		0x2000	/* z: size_t */
75 #define	SHORTSHORT	0x4000	/* hh: char */
76 #define	UNSIGNED	0x8000	/* %[oupxX] conversions */
77 
78 /*
79  * Conversion types.
80  */
81 #define	CT_CHAR		0	/* %c conversion */
82 #define	CT_CCL		1	/* %[...] conversion */
83 #define	CT_STRING	2	/* %s conversion */
84 #define	CT_INT		3	/* %[dioupxX] conversion */
85 #define	CT_FLOAT	4	/* %[efgEFG] conversion */
86 
87 #ifndef NO_FLOATING_POINT
88 static int parsefloat(FILE *, wchar_t *, wchar_t *, locale_t);
89 #endif
90 
91 struct ccl {
92 	const wchar_t *start;	/* character class start */
93 	const wchar_t *end;	/* character class end */
94 	int compl;		/* ccl is complemented? */
95 };
96 
97 static __inline int
inccl(const struct ccl * ccl,wint_t wi)98 inccl(const struct ccl *ccl, wint_t wi)
99 {
100 
101 	if (ccl->compl) {
102 		return (wmemchr(ccl->start, wi, ccl->end - ccl->start)
103 		    == NULL);
104 	} else {
105 		return (wmemchr(ccl->start, wi, ccl->end - ccl->start) != NULL);
106 	}
107 }
108 
109 /*
110  * Conversion functions are passed a pointer to this object instead of
111  * a real parameter to indicate that the assignment-suppression (*)
112  * flag was specified.  We could use a NULL pointer to indicate this,
113  * but that would mask bugs in applications that call scanf() with a
114  * NULL pointer.
115  */
116 static const int suppress;
117 #define	SUPPRESS_PTR	((void *)&suppress)
118 
119 static const mbstate_t initial_mbs;
120 
121 /*
122  * The following conversion functions return the number of characters consumed,
123  * or -1 on input failure.  Character class conversion returns 0 on match
124  * failure.
125  */
126 
127 static __inline int
convert_char(FILE * fp,char * mbp,int width,locale_t locale)128 convert_char(FILE *fp, char * mbp, int width, locale_t locale)
129 {
130 	mbstate_t mbs;
131 	size_t nconv;
132 	wint_t wi;
133 	int n;
134 
135 	n = 0;
136 	mbs = initial_mbs;
137 	while (width-- != 0 && (wi = __fgetwc(fp, locale)) != WEOF) {
138 		if (mbp != SUPPRESS_PTR) {
139 			nconv = wcrtomb(mbp, wi, &mbs);
140 			if (nconv == (size_t)-1)
141 				return (-1);
142 			mbp += nconv;
143 		}
144 		n++;
145 	}
146 	if (n == 0)
147 		return (-1);
148 	return (n);
149 }
150 
151 static __inline int
convert_wchar(FILE * fp,wchar_t * wcp,int width,locale_t locale)152 convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale)
153 {
154 	wint_t wi;
155 	int n;
156 
157 	n = 0;
158 	while (width-- != 0 && (wi = __fgetwc(fp, locale)) != WEOF) {
159 		if (wcp != SUPPRESS_PTR)
160 			*wcp++ = (wchar_t)wi;
161 		n++;
162 	}
163 	if (n == 0)
164 		return (-1);
165 	return (n);
166 }
167 
168 static __inline int
convert_ccl(FILE * fp,char * mbp,int width,const struct ccl * ccl,locale_t locale)169 convert_ccl(FILE *fp, char * mbp, int width, const struct ccl *ccl,
170     locale_t locale)
171 {
172 	mbstate_t mbs;
173 	size_t nconv;
174 	wint_t wi;
175 	int n;
176 
177 	n = 0;
178 	mbs = initial_mbs;
179 	while ((wi = __fgetwc(fp, locale)) != WEOF &&
180 	    width-- != 0 && inccl(ccl, wi)) {
181 		if (mbp != SUPPRESS_PTR) {
182 			nconv = wcrtomb(mbp, wi, &mbs);
183 			if (nconv == (size_t)-1)
184 				return (-1);
185 			mbp += nconv;
186 		}
187 		n++;
188 	}
189 	if (wi != WEOF)
190 		__ungetwc(wi, fp, locale);
191 	if (mbp != SUPPRESS_PTR)
192 		*mbp = 0;
193 	return (n);
194 }
195 
196 static __inline int
convert_wccl(FILE * fp,wchar_t * wcp,int width,const struct ccl * ccl,locale_t locale)197 convert_wccl(FILE *fp, wchar_t *wcp, int width, const struct ccl *ccl,
198     locale_t locale)
199 {
200 	wchar_t *wcp0;
201 	wint_t wi;
202 	int n;
203 
204 	if (wcp == SUPPRESS_PTR) {
205 		n = 0;
206 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
207 		    width-- != 0 && inccl(ccl, wi))
208 			n++;
209 		if (wi != WEOF)
210 			__ungetwc(wi, fp, locale);
211 	} else {
212 		wcp0 = wcp;
213 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
214 		    width-- != 0 && inccl(ccl, wi))
215 			*wcp++ = (wchar_t)wi;
216 		if (wi != WEOF)
217 			__ungetwc(wi, fp, locale);
218 		n = wcp - wcp0;
219 		if (n == 0)
220 			return (0);
221 		*wcp = 0;
222 	}
223 	return (n);
224 }
225 
226 static __inline int
convert_string(FILE * fp,char * mbp,int width,locale_t locale)227 convert_string(FILE *fp, char * mbp, int width, locale_t locale)
228 {
229 	mbstate_t mbs;
230 	size_t nconv;
231 	wint_t wi;
232 	int nread;
233 
234 	mbs = initial_mbs;
235 	nread = 0;
236 	while ((wi = __fgetwc(fp, locale)) != WEOF && width-- != 0 &&
237 	    !iswspace(wi)) {
238 		if (mbp != SUPPRESS_PTR) {
239 			nconv = wcrtomb(mbp, wi, &mbs);
240 			if (nconv == (size_t)-1)
241 				return (-1);
242 			mbp += nconv;
243 		}
244 		nread++;
245 	}
246 	if (wi != WEOF)
247 		__ungetwc(wi, fp, locale);
248 	if (mbp != SUPPRESS_PTR)
249 		*mbp = 0;
250 	return (nread);
251 }
252 
253 static __inline int
convert_wstring(FILE * fp,wchar_t * wcp,int width,locale_t locale)254 convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
255 {
256 	wint_t wi;
257 	int nread;
258 
259 	nread = 0;
260 	if (wcp == SUPPRESS_PTR) {
261 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
262 		    width-- != 0 && !iswspace(wi))
263 			nread++;
264 		if (wi != WEOF)
265 			__ungetwc(wi, fp, locale);
266 	} else {
267 		while ((wi = __fgetwc(fp, locale)) != WEOF &&
268 		    width-- != 0 && !iswspace(wi)) {
269 			*wcp++ = (wchar_t)wi;
270 			nread++;
271 		}
272 		if (wi != WEOF)
273 			__ungetwc(wi, fp, locale);
274 		*wcp = '\0';
275 	}
276 	return (nread);
277 }
278 
279 enum parseint_state {
280 	begin,
281 	havesign,
282 	havezero,
283 	haveprefix,
284 	any,
285 };
286 
287 static __inline int
parseint_fsm(wchar_t c,enum parseint_state * state,int * base)288 parseint_fsm(wchar_t c, enum parseint_state *state, int *base)
289 {
290 	switch (c) {
291 	case '+':
292 	case '-':
293 		if (*state == begin) {
294 			*state = havesign;
295 			return 1;
296 		}
297 		break;
298 	case '0':
299 		if (*state == begin || *state == havesign) {
300 			*state = havezero;
301 			return 1;
302 		}
303 		/* FALL THROUGH */
304 	case '1':
305 	case '2':
306 	case '3':
307 	case '4':
308 	case '5':
309 	case '6':
310 	case '7':
311 		if (*state == havezero && *base == 0) {
312 			*base = 8;
313 		}
314 		/* FALL THROUGH */
315 	case '8':
316 	case '9':
317 		if (*state == begin ||
318 		    *state == havesign) {
319 			if (*base == 0) {
320 				*base = 10;
321 			}
322 		}
323 		if (*state == begin ||
324 		    *state == havesign ||
325 		    *state == havezero ||
326 		    *state == haveprefix ||
327 		    *state == any) {
328 			if (*base > c - '0') {
329 				*state = any;
330 				return 1;
331 			}
332 		}
333 		break;
334 	case 'b':
335 		if (*state == havezero) {
336 			if (*base == 0 || *base == 2) {
337 				*state = haveprefix;
338 				*base = 2;
339 				return 1;
340 			}
341 		}
342 		/* FALL THROUGH */
343 	case 'a':
344 	case 'c':
345 	case 'd':
346 	case 'e':
347 	case 'f':
348 		if (*state == begin ||
349 		    *state == havesign ||
350 		    *state == havezero ||
351 		    *state == haveprefix ||
352 		    *state == any) {
353 			if (*base > c - 'a' + 10) {
354 				*state = any;
355 				return 1;
356 			}
357 		}
358 		break;
359 	case 'B':
360 		if (*state == havezero) {
361 			if (*base == 0 || *base == 2) {
362 				*state = haveprefix;
363 				*base = 2;
364 				return 1;
365 			}
366 		}
367 		/* FALL THROUGH */
368 	case 'A':
369 	case 'C':
370 	case 'D':
371 	case 'E':
372 	case 'F':
373 		if (*state == begin ||
374 		    *state == havesign ||
375 		    *state == havezero ||
376 		    *state == haveprefix ||
377 		    *state == any) {
378 			if (*base > c - 'A' + 10) {
379 				*state = any;
380 				return 1;
381 			}
382 		}
383 		break;
384 	case 'x':
385 	case 'X':
386 		if (*state == havezero) {
387 			if (*base == 0 || *base == 16) {
388 				*state = haveprefix;
389 				*base = 16;
390 				return 1;
391 			}
392 		}
393 		break;
394 	}
395 	return 0;
396 }
397 
398 /*
399  * Read an integer, storing it in buf.
400  *
401  * Return 0 on a match failure, and the number of characters read
402  * otherwise.
403  */
404 static __inline int
parseint(FILE * fp,wchar_t * __restrict buf,int width,int base,locale_t locale)405 parseint(FILE *fp, wchar_t * __restrict buf, int width, int base,
406     locale_t locale)
407 {
408 	enum parseint_state state = begin;
409 	wchar_t *wcp;
410 	int c;
411 
412 	for (wcp = buf; width; width--) {
413 		c = __fgetwc(fp, locale);
414 		if (c == WEOF)
415 			break;
416 		if (!parseint_fsm(c, &state, &base))
417 			break;
418 		*wcp++ = (wchar_t)c;
419 	}
420 	/*
421 	 * If we only had a sign, push it back.  If we only had a 0b or 0x
422 	 * prefix (possibly preceded by a sign), we view it as "0" and
423 	 * push back the letter.  In all other cases, if we stopped
424 	 * because we read a non-number character, push it back.
425 	 */
426 	if (state == havesign) {
427 		wcp--;
428 		__ungetwc(*wcp, fp, locale);
429 	} else if (state == haveprefix) {
430 		wcp--;
431 		__ungetwc(c, fp, locale);
432 	} else if (width && c != WEOF) {
433 		__ungetwc(c, fp, locale);
434 	}
435 	return (wcp - buf);
436 }
437 
438 /*
439  * MT-safe version.
440  */
441 int
vfwscanf_l(FILE * __restrict fp,locale_t locale,const wchar_t * __restrict fmt,va_list ap)442 vfwscanf_l(FILE * __restrict fp, locale_t locale,
443 		const wchar_t * __restrict fmt, va_list ap)
444 {
445 	int ret;
446 	FIX_LOCALE(locale);
447 
448 	FLOCKFILE_CANCELSAFE(fp);
449 	ORIENT(fp, 1);
450 	ret = __vfwscanf(fp, locale, fmt, ap);
451 	FUNLOCKFILE_CANCELSAFE();
452 	return (ret);
453 }
454 int
vfwscanf(FILE * __restrict fp,const wchar_t * __restrict fmt,va_list ap)455 vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap)
456 {
457 	return vfwscanf_l(fp, __get_locale(), fmt, ap);
458 }
459 
460 /*
461  * Non-MT-safe version.
462  */
463 int
__vfwscanf(FILE * __restrict fp,locale_t locale,const wchar_t * __restrict fmt,va_list ap)464 __vfwscanf(FILE * __restrict fp, locale_t locale,
465 		const wchar_t * __restrict fmt, va_list ap)
466 {
467 #define	GETARG(type)	((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type))
468 	wint_t c;		/* character from format, or conversion */
469 	size_t width;		/* field width, or 0 */
470 	int flags;		/* flags as defined above */
471 	int nassigned;		/* number of fields assigned */
472 	int nconversions;	/* number of conversions */
473 	int nr;			/* characters read by the current conversion */
474 	int nread;		/* number of characters consumed from fp */
475 	int base;		/* base argument to conversion function */
476 	struct ccl ccl;		/* character class info */
477 	wchar_t buf[BUF];	/* buffer for numeric conversions */
478 	wint_t wi;		/* handy wint_t */
479 
480 	nassigned = 0;
481 	nconversions = 0;
482 	nread = 0;
483 	ccl.start = ccl.end = NULL;
484 	for (;;) {
485 		c = *fmt++;
486 		if (c == 0)
487 			return (nassigned);
488 		if (iswspace(c)) {
489 			while ((c = __fgetwc(fp, locale)) != WEOF &&
490 			    iswspace_l(c, locale))
491 				nread++;
492 			if (c != WEOF)
493 				__ungetwc(c, fp, locale);
494 			continue;
495 		}
496 		if (c != '%')
497 			goto literal;
498 		width = 0;
499 		flags = 0;
500 		/*
501 		 * switch on the format.  continue if done;
502 		 * break once format type is derived.
503 		 */
504 again:		c = *fmt++;
505 		switch (c) {
506 		case '%':
507 literal:
508 			if ((wi = __fgetwc(fp, locale)) == WEOF)
509 				goto input_failure;
510 			if (wi != c) {
511 				__ungetwc(wi, fp, locale);
512 				goto match_failure;
513 			}
514 			nread++;
515 			continue;
516 
517 		case '*':
518 			flags |= SUPPRESS;
519 			goto again;
520 		case 'j':
521 			flags |= INTMAXT;
522 			goto again;
523 		case 'l':
524 			if (flags & LONG) {
525 				flags &= ~LONG;
526 				flags |= LONGLONG;
527 			} else
528 				flags |= LONG;
529 			goto again;
530 		case 'q':
531 			flags |= LONGLONG;	/* not quite */
532 			goto again;
533 		case 't':
534 			flags |= PTRDIFFT;
535 			goto again;
536 		case 'w':
537 			/*
538 			 * Fixed-width integer types.  On all platforms we
539 			 * support, int8_t is equivalent to char, int16_t
540 			 * is equivalent to short, int32_t is equivalent
541 			 * to int, int64_t is equivalent to long long int.
542 			 * Furthermore, int_fast8_t, int_fast16_t and
543 			 * int_fast32_t are equivalent to int, and
544 			 * int_fast64_t is equivalent to long long int.
545 			 */
546 			flags &= ~(SHORTSHORT|SHORT|LONG|LONGLONG|SIZET|INTMAXT|PTRDIFFT);
547 			if (fmt[0] == 'f') {
548 				flags |= FASTINT;
549 				fmt++;
550 			} else {
551 				flags &= ~FASTINT;
552 			}
553 			if (fmt[0] == '8') {
554 				if (!(flags & FASTINT))
555 					flags |= SHORTSHORT;
556 				else
557 					/* no flag set = 32 */ ;
558 				fmt += 1;
559 			} else if (fmt[0] == '1' && fmt[1] == '6') {
560 				if (!(flags & FASTINT))
561 					flags |= SHORT;
562 				else
563 					/* no flag set = 32 */ ;
564 				fmt += 2;
565 			} else if (fmt[0] == '3' && fmt[1] == '2') {
566 				/* no flag set = 32 */ ;
567 				fmt += 2;
568 			} else if (fmt[0] == '6' && fmt[1] == '4') {
569 				flags |= LONGLONG;
570 				fmt += 2;
571 			} else {
572 				goto match_failure;
573 			}
574 			goto again;
575 		case 'z':
576 			flags |= SIZET;
577 			goto again;
578 		case 'L':
579 			flags |= LONGDBL;
580 			goto again;
581 		case 'h':
582 			if (flags & SHORT) {
583 				flags &= ~SHORT;
584 				flags |= SHORTSHORT;
585 			} else
586 				flags |= SHORT;
587 			goto again;
588 
589 		case '0': case '1': case '2': case '3': case '4':
590 		case '5': case '6': case '7': case '8': case '9':
591 			width = width * 10 + c - '0';
592 			goto again;
593 
594 		/*
595 		 * Conversions.
596 		 */
597 		case 'B':
598 		case 'b':
599 			c = CT_INT;
600 			flags |= UNSIGNED;
601 			base = 2;
602 			break;
603 
604 		case 'd':
605 			c = CT_INT;
606 			base = 10;
607 			break;
608 
609 		case 'i':
610 			c = CT_INT;
611 			base = 0;
612 			break;
613 
614 		case 'o':
615 			c = CT_INT;
616 			flags |= UNSIGNED;
617 			base = 8;
618 			break;
619 
620 		case 'u':
621 			c = CT_INT;
622 			flags |= UNSIGNED;
623 			base = 10;
624 			break;
625 
626 		case 'X':
627 		case 'x':
628 			c = CT_INT;
629 			flags |= UNSIGNED;
630 			base = 16;
631 			break;
632 
633 #ifndef NO_FLOATING_POINT
634 		case 'A': case 'E': case 'F': case 'G':
635 		case 'a': case 'e': case 'f': case 'g':
636 			c = CT_FLOAT;
637 			break;
638 #endif
639 
640 		case 'S':
641 			flags |= LONG;
642 			/* FALLTHROUGH */
643 		case 's':
644 			c = CT_STRING;
645 			break;
646 
647 		case '[':
648 			ccl.start = fmt;
649 			if (*fmt == '^') {
650 				ccl.compl = 1;
651 				fmt++;
652 			} else
653 				ccl.compl = 0;
654 			if (*fmt == ']')
655 				fmt++;
656 			while (*fmt != '\0' && *fmt != ']')
657 				fmt++;
658 			ccl.end = fmt;
659 			fmt++;
660 			flags |= NOSKIP;
661 			c = CT_CCL;
662 			break;
663 
664 		case 'C':
665 			flags |= LONG;
666 			/* FALLTHROUGH */
667 		case 'c':
668 			flags |= NOSKIP;
669 			c = CT_CHAR;
670 			break;
671 
672 		case 'p':	/* pointer format is like hex */
673 			flags |= POINTER;
674 			c = CT_INT;		/* assumes sizeof(uintmax_t) */
675 			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
676 			base = 16;
677 			break;
678 
679 		case 'n':
680 			if (flags & SUPPRESS)	/* ??? */
681 				continue;
682 			if (flags & SHORTSHORT)
683 				*va_arg(ap, char *) = nread;
684 			else if (flags & SHORT)
685 				*va_arg(ap, short *) = nread;
686 			else if (flags & LONG)
687 				*va_arg(ap, long *) = nread;
688 			else if (flags & LONGLONG)
689 				*va_arg(ap, long long *) = nread;
690 			else if (flags & INTMAXT)
691 				*va_arg(ap, intmax_t *) = nread;
692 			else if (flags & SIZET)
693 				*va_arg(ap, size_t *) = nread;
694 			else if (flags & PTRDIFFT)
695 				*va_arg(ap, ptrdiff_t *) = nread;
696 			else
697 				*va_arg(ap, int *) = nread;
698 			continue;
699 
700 		default:
701 			goto match_failure;
702 
703 		/*
704 		 * Disgusting backwards compatibility hack.	XXX
705 		 */
706 		case '\0':	/* compat */
707 			return (EOF);
708 		}
709 
710 		/*
711 		 * Consume leading white space, except for formats
712 		 * that suppress this.
713 		 */
714 		if ((flags & NOSKIP) == 0) {
715 			while ((wi = __fgetwc(fp, locale)) != WEOF && iswspace(wi))
716 				nread++;
717 			if (wi == WEOF)
718 				goto input_failure;
719 			__ungetwc(wi, fp, locale);
720 		}
721 
722 		/*
723 		 * Do the conversion.
724 		 */
725 		switch (c) {
726 
727 		case CT_CHAR:
728 			/* scan arbitrary characters (sets NOSKIP) */
729 			if (width == 0)
730 				width = 1;
731 			if (flags & LONG) {
732 				nr = convert_wchar(fp, GETARG(wchar_t *), width,
733 				    locale);
734 			} else {
735 				nr = convert_char(fp, GETARG(char *), width,
736 				    locale);
737 			}
738 			if (nr < 0)
739 				goto input_failure;
740 			break;
741 
742 		case CT_CCL:
743 			/* scan a (nonempty) character class (sets NOSKIP) */
744 			if (width == 0)
745 				width = (size_t)~0;	/* `infinity' */
746 			/* take only those things in the class */
747 			if (flags & LONG) {
748 				nr = convert_wccl(fp, GETARG(wchar_t *), width,
749 				    &ccl, locale);
750 			} else {
751 				nr = convert_ccl(fp, GETARG(char *), width,
752 				    &ccl, locale);
753 			}
754 			if (nr <= 0) {
755 				if (nr < 0)
756 					goto input_failure;
757 				else /* nr == 0 */
758 					goto match_failure;
759 			}
760 			break;
761 
762 		case CT_STRING:
763 			/* like CCL, but zero-length string OK, & no NOSKIP */
764 			if (width == 0)
765 				width = (size_t)~0;
766 			if (flags & LONG) {
767 				nr = convert_wstring(fp, GETARG(wchar_t *),
768 				    width, locale);
769 			} else {
770 				nr = convert_string(fp, GETARG(char *), width,
771 				    locale);
772 			}
773 			if (nr < 0)
774 				goto input_failure;
775 			break;
776 
777 		case CT_INT:
778 			/* scan an integer as if by the conversion function */
779 			if (width == 0 || width > sizeof(buf) /
780 			    sizeof(*buf) - 1)
781 				width = sizeof(buf) / sizeof(*buf) - 1;
782 
783 			nr = parseint(fp, buf, width, base, locale);
784 			if (nr == 0)
785 				goto match_failure;
786 			if ((flags & SUPPRESS) == 0) {
787 				uintmax_t res;
788 
789 				buf[nr] = L'\0';
790 				if ((flags & UNSIGNED) == 0)
791 				    res = wcstoimax(buf, NULL, base);
792 				else
793 				    res = wcstoumax(buf, NULL, base);
794 				if (flags & POINTER)
795 					*va_arg(ap, void **) =
796 							(void *)(uintptr_t)res;
797 				else if (flags & SHORTSHORT)
798 					*va_arg(ap, char *) = res;
799 				else if (flags & SHORT)
800 					*va_arg(ap, short *) = res;
801 				else if (flags & LONG)
802 					*va_arg(ap, long *) = res;
803 				else if (flags & LONGLONG)
804 					*va_arg(ap, long long *) = res;
805 				else if (flags & INTMAXT)
806 					*va_arg(ap, intmax_t *) = res;
807 				else if (flags & PTRDIFFT)
808 					*va_arg(ap, ptrdiff_t *) = res;
809 				else if (flags & SIZET)
810 					*va_arg(ap, size_t *) = res;
811 				else
812 					*va_arg(ap, int *) = res;
813 			}
814 			break;
815 
816 #ifndef NO_FLOATING_POINT
817 		case CT_FLOAT:
818 			/* scan a floating point number as if by strtod */
819 			if (width == 0 || width > sizeof(buf) /
820 			    sizeof(*buf) - 1)
821 				width = sizeof(buf) / sizeof(*buf) - 1;
822 			nr = parsefloat(fp, buf, buf + width, locale);
823 			if (nr == 0)
824 				goto match_failure;
825 			if ((flags & SUPPRESS) == 0) {
826 				if (flags & LONGDBL) {
827 					long double res = wcstold(buf, NULL);
828 					*va_arg(ap, long double *) = res;
829 				} else if (flags & LONG) {
830 					double res = wcstod(buf, NULL);
831 					*va_arg(ap, double *) = res;
832 				} else {
833 					float res = wcstof(buf, NULL);
834 					*va_arg(ap, float *) = res;
835 				}
836 			}
837 			break;
838 #endif /* !NO_FLOATING_POINT */
839 		}
840 		if (!(flags & SUPPRESS))
841 			nassigned++;
842 		nread += nr;
843 		nconversions++;
844 	}
845 input_failure:
846 	return (nconversions != 0 ? nassigned : EOF);
847 match_failure:
848 	return (nassigned);
849 }
850 
851 #ifndef NO_FLOATING_POINT
852 static int
parsefloat(FILE * fp,wchar_t * buf,wchar_t * end,locale_t locale)853 parsefloat(FILE *fp, wchar_t *buf, wchar_t *end, locale_t locale)
854 {
855 	mbstate_t mbs;
856 	size_t nconv;
857 	wchar_t *commit, *p;
858 	int infnanpos = 0;
859 	enum {
860 		S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX,
861 		S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
862 	} state = S_START;
863 	wchar_t c;
864 	wchar_t decpt;
865 	_Bool gotmantdig = 0, ishex = 0;
866 
867 	mbs = initial_mbs;
868 	nconv = mbrtowc(&decpt, localeconv()->decimal_point, MB_CUR_MAX, &mbs);
869 	if (nconv == (size_t)-1 || nconv == (size_t)-2)
870 		decpt = '.';	/* failsafe */
871 
872 	/*
873 	 * We set commit = p whenever the string we have read so far
874 	 * constitutes a valid representation of a floating point
875 	 * number by itself.  At some point, the parse will complete
876 	 * or fail, and we will ungetc() back to the last commit point.
877 	 * To ensure that the file offset gets updated properly, it is
878 	 * always necessary to read at least one character that doesn't
879 	 * match; thus, we can't short-circuit "infinity" or "nan(...)".
880 	 */
881 	commit = buf - 1;
882 	c = WEOF;
883 	for (p = buf; p < end; ) {
884 		if ((c = __fgetwc(fp, locale)) == WEOF)
885 			break;
886 reswitch:
887 		switch (state) {
888 		case S_START:
889 			state = S_GOTSIGN;
890 			if (c == '-' || c == '+')
891 				break;
892 			else
893 				goto reswitch;
894 		case S_GOTSIGN:
895 			switch (c) {
896 			case '0':
897 				state = S_MAYBEHEX;
898 				commit = p;
899 				break;
900 			case 'I':
901 			case 'i':
902 				state = S_INF;
903 				break;
904 			case 'N':
905 			case 'n':
906 				state = S_NAN;
907 				break;
908 			default:
909 				state = S_DIGITS;
910 				goto reswitch;
911 			}
912 			break;
913 		case S_INF:
914 			if (infnanpos > 6 ||
915 			    (c != "nfinity"[infnanpos] &&
916 			     c != "NFINITY"[infnanpos]))
917 				goto parsedone;
918 			if (infnanpos == 1 || infnanpos == 6)
919 				commit = p;	/* inf or infinity */
920 			infnanpos++;
921 			break;
922 		case S_NAN:
923 			switch (infnanpos) {
924 			case 0:
925 				if (c != 'A' && c != 'a')
926 					goto parsedone;
927 				break;
928 			case 1:
929 				if (c != 'N' && c != 'n')
930 					goto parsedone;
931 				else
932 					commit = p;
933 				break;
934 			case 2:
935 				if (c != '(')
936 					goto parsedone;
937 				break;
938 			default:
939 				if (c == ')') {
940 					commit = p;
941 					state = S_DONE;
942 				} else if (!iswalnum(c) && c != '_')
943 					goto parsedone;
944 				break;
945 			}
946 			infnanpos++;
947 			break;
948 		case S_DONE:
949 			goto parsedone;
950 		case S_MAYBEHEX:
951 			state = S_DIGITS;
952 			if (c == 'X' || c == 'x') {
953 				ishex = 1;
954 				break;
955 			} else {	/* we saw a '0', but no 'x' */
956 				gotmantdig = 1;
957 				goto reswitch;
958 			}
959 		case S_DIGITS:
960 			if ((ishex && iswxdigit(c)) || iswdigit(c))
961 				gotmantdig = 1;
962 			else {
963 				state = S_FRAC;
964 				if (c != decpt)
965 					goto reswitch;
966 			}
967 			if (gotmantdig)
968 				commit = p;
969 			break;
970 		case S_FRAC:
971 			if (((c == 'E' || c == 'e') && !ishex) ||
972 			    ((c == 'P' || c == 'p') && ishex)) {
973 				if (!gotmantdig)
974 					goto parsedone;
975 				else
976 					state = S_EXP;
977 			} else if ((ishex && iswxdigit(c)) || iswdigit(c)) {
978 				commit = p;
979 				gotmantdig = 1;
980 			} else
981 				goto parsedone;
982 			break;
983 		case S_EXP:
984 			state = S_EXPDIGITS;
985 			if (c == '-' || c == '+')
986 				break;
987 			else
988 				goto reswitch;
989 		case S_EXPDIGITS:
990 			if (iswdigit(c))
991 				commit = p;
992 			else
993 				goto parsedone;
994 			break;
995 		default:
996 			abort();
997 		}
998 		*p++ = c;
999 		c = WEOF;
1000 	}
1001 
1002 parsedone:
1003 	if (c != WEOF)
1004 		__ungetwc(c, fp, locale);
1005 	while (commit < --p)
1006 		__ungetwc(*p, fp, locale);
1007 	*++commit = '\0';
1008 	return (commit - buf);
1009 }
1010 #endif
1011