xref: /freebsd/lib/libc/stdio/vfscanf.c (revision 944223076fbcb681a8eb3e118ddafab79b21ac3d)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1990, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Copyright (c) 2011 The FreeBSD Foundation
8  *
9  * Copyright (c) 2023 Dag-Erling Smørgrav
10  *
11  * Portions of this software were developed by David Chisnall
12  * under sponsorship from the FreeBSD Foundation.
13  *
14  * This code is derived from software contributed to Berkeley by
15  * Chris Torek.
16  *
17  * Redistribution and use in source and binary forms, with or without
18  * modification, are permitted provided that the following conditions
19  * are met:
20  * 1. Redistributions of source code must retain the above copyright
21  *    notice, this list of conditions and the following disclaimer.
22  * 2. Redistributions in binary form must reproduce the above copyright
23  *    notice, this list of conditions and the following disclaimer in the
24  *    documentation and/or other materials provided with the distribution.
25  * 3. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  */
41 
42 #if defined(LIBC_SCCS) && !defined(lint)
43 static char sccsid[] = "@(#)vfscanf.c	8.1 (Berkeley) 6/4/93";
44 #endif /* LIBC_SCCS and not lint */
45 #include <sys/cdefs.h>
46 #include "namespace.h"
47 #include <ctype.h>
48 #include <inttypes.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <stddef.h>
52 #include <stdarg.h>
53 #include <string.h>
54 #include <wchar.h>
55 #include <wctype.h>
56 #include "un-namespace.h"
57 
58 #include "collate.h"
59 #include "libc_private.h"
60 #include "local.h"
61 #include "xlocale_private.h"
62 
63 #ifndef NO_FLOATING_POINT
64 #include <locale.h>
65 #endif
66 
67 #define	BUF		513	/* Maximum length of numeric string. */
68 
69 /*
70  * Flags used during conversion.
71  */
72 #define	LONG		0x01	/* l: long or double */
73 #define	LONGDBL		0x02	/* L: long double */
74 #define	SHORT		0x04	/* h: short */
75 #define	SUPPRESS	0x08	/* *: suppress assignment */
76 #define	POINTER		0x10	/* p: void * (as hex) */
77 #define	NOSKIP		0x20	/* [ or c: do not skip blanks */
78 #define FASTINT		0x200	/* wfN: int_fastN_t */
79 #define	LONGLONG	0x400	/* ll: long long (+ deprecated q: quad) */
80 #define	INTMAXT		0x800	/* j: intmax_t */
81 #define	PTRDIFFT	0x1000	/* t: ptrdiff_t */
82 #define	SIZET		0x2000	/* z: size_t */
83 #define	SHORTSHORT	0x4000	/* hh: char */
84 #define	UNSIGNED	0x8000	/* %[oupxX] conversions */
85 
86 /*
87  * Conversion types.
88  */
89 #define	CT_CHAR		0	/* %c conversion */
90 #define	CT_CCL		1	/* %[...] conversion */
91 #define	CT_STRING	2	/* %s conversion */
92 #define	CT_INT		3	/* %[dioupxX] conversion */
93 #define	CT_FLOAT	4	/* %[efgEFG] conversion */
94 
95 static const u_char *__sccl(char *, const u_char *);
96 #ifndef NO_FLOATING_POINT
97 static int parsefloat(FILE *, char *, char *, locale_t);
98 #endif
99 
100 __weak_reference(__vfscanf, vfscanf);
101 
102 /*
103  * Conversion functions are passed a pointer to this object instead of
104  * a real parameter to indicate that the assignment-suppression (*)
105  * flag was specified.  We could use a NULL pointer to indicate this,
106  * but that would mask bugs in applications that call scanf() with a
107  * NULL pointer.
108  */
109 static const int suppress;
110 #define	SUPPRESS_PTR	((void *)&suppress)
111 
112 static const mbstate_t initial_mbs;
113 
114 /*
115  * The following conversion functions return the number of characters consumed,
116  * or -1 on input failure.  Character class conversion returns 0 on match
117  * failure.
118  */
119 
120 static __inline int
121 convert_char(FILE *fp, char * p, int width)
122 {
123 	int n;
124 
125 	if (p == SUPPRESS_PTR) {
126 		size_t sum = 0;
127 		for (;;) {
128 			if ((n = fp->_r) < width) {
129 				sum += n;
130 				width -= n;
131 				fp->_p += n;
132 				if (__srefill(fp)) {
133 					if (sum == 0)
134 						return (-1);
135 					break;
136 				}
137 			} else {
138 				sum += width;
139 				fp->_r -= width;
140 				fp->_p += width;
141 				break;
142 			}
143 		}
144 		return (sum);
145 	} else {
146 		size_t r = __fread(p, 1, width, fp);
147 
148 		if (r == 0)
149 			return (-1);
150 		return (r);
151 	}
152 }
153 
154 static __inline int
155 convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale)
156 {
157 	mbstate_t mbs;
158 	int n, nread;
159 	wint_t wi;
160 
161 	mbs = initial_mbs;
162 	n = 0;
163 	while (width-- != 0 &&
164 	    (wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF) {
165 		if (wcp != SUPPRESS_PTR)
166 			*wcp++ = (wchar_t)wi;
167 		n += nread;
168 	}
169 	if (n == 0)
170 		return (-1);
171 	return (n);
172 }
173 
174 static __inline int
175 convert_ccl(FILE *fp, char * p, int width, const char *ccltab)
176 {
177 	char *p0;
178 	int n;
179 
180 	if (p == SUPPRESS_PTR) {
181 		n = 0;
182 		while (ccltab[*fp->_p]) {
183 			n++, fp->_r--, fp->_p++;
184 			if (--width == 0)
185 				break;
186 			if (fp->_r <= 0 && __srefill(fp)) {
187 				if (n == 0)
188 					return (-1);
189 				break;
190 			}
191 		}
192 	} else {
193 		p0 = p;
194 		while (ccltab[*fp->_p]) {
195 			fp->_r--;
196 			*p++ = *fp->_p++;
197 			if (--width == 0)
198 				break;
199 			if (fp->_r <= 0 && __srefill(fp)) {
200 				if (p == p0)
201 					return (-1);
202 				break;
203 			}
204 		}
205 		n = p - p0;
206 		if (n == 0)
207 			return (0);
208 		*p = 0;
209 	}
210 	return (n);
211 }
212 
213 static __inline int
214 convert_wccl(FILE *fp, wchar_t *wcp, int width, const char *ccltab,
215     locale_t locale)
216 {
217 	mbstate_t mbs;
218 	wint_t wi;
219 	int n, nread;
220 
221 	mbs = initial_mbs;
222 	n = 0;
223 	if (wcp == SUPPRESS_PTR) {
224 		while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
225 		    width-- != 0 && ccltab[wctob(wi)])
226 			n += nread;
227 		if (wi != WEOF)
228 			__ungetwc(wi, fp, __get_locale());
229 	} else {
230 		while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
231 		    width-- != 0 && ccltab[wctob(wi)]) {
232 			*wcp++ = (wchar_t)wi;
233 			n += nread;
234 		}
235 		if (wi != WEOF)
236 			__ungetwc(wi, fp, __get_locale());
237 		if (n == 0)
238 			return (0);
239 		*wcp = 0;
240 	}
241 	return (n);
242 }
243 
244 static __inline int
245 convert_string(FILE *fp, char * p, int width)
246 {
247 	char *p0;
248 	int n;
249 
250 	if (p == SUPPRESS_PTR) {
251 		n = 0;
252 		while (!isspace(*fp->_p)) {
253 			n++, fp->_r--, fp->_p++;
254 			if (--width == 0)
255 				break;
256 			if (fp->_r <= 0 && __srefill(fp))
257 				break;
258 		}
259 	} else {
260 		p0 = p;
261 		while (!isspace(*fp->_p)) {
262 			fp->_r--;
263 			*p++ = *fp->_p++;
264 			if (--width == 0)
265 				break;
266 			if (fp->_r <= 0 && __srefill(fp))
267 				break;
268 		}
269 		*p = 0;
270 		n = p - p0;
271 	}
272 	return (n);
273 }
274 
275 static __inline int
276 convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
277 {
278 	mbstate_t mbs;
279 	wint_t wi;
280 	int n, nread;
281 
282 	mbs = initial_mbs;
283 	n = 0;
284 	if (wcp == SUPPRESS_PTR) {
285 		while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
286 		    width-- != 0 && !iswspace(wi))
287 			n += nread;
288 		if (wi != WEOF)
289 			__ungetwc(wi, fp, __get_locale());
290 	} else {
291 		while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
292 		    width-- != 0 && !iswspace(wi)) {
293 			*wcp++ = (wchar_t)wi;
294 			n += nread;
295 		}
296 		if (wi != WEOF)
297 			__ungetwc(wi, fp, __get_locale());
298 		*wcp = '\0';
299 	}
300 	return (n);
301 }
302 
303 enum parseint_state {
304 	begin,
305 	havesign,
306 	havezero,
307 	haveprefix,
308 	any,
309 };
310 
311 static __inline int
312 parseint_fsm(int c, enum parseint_state *state, int *base)
313 {
314 	switch (c) {
315 	case '+':
316 	case '-':
317 		if (*state == begin) {
318 			*state = havesign;
319 			return 1;
320 		}
321 		break;
322 	case '0':
323 		if (*state == begin || *state == havesign) {
324 			*state = havezero;
325 		} else {
326 			*state = any;
327 		}
328 		return 1;
329 	case '1':
330 	case '2':
331 	case '3':
332 	case '4':
333 	case '5':
334 	case '6':
335 	case '7':
336 		if (*state == havezero && *base == 0) {
337 			*base = 8;
338 		}
339 		/* FALL THROUGH */
340 	case '8':
341 	case '9':
342 		if (*state == begin ||
343 		    *state == havesign) {
344 			if (*base == 0) {
345 				*base = 10;
346 			}
347 		}
348 		if (*state == begin ||
349 		    *state == havesign ||
350 		    *state == havezero ||
351 		    *state == haveprefix ||
352 		    *state == any) {
353 			if (*base > c - '0') {
354 				*state = any;
355 				return 1;
356 			}
357 		}
358 		break;
359 	case 'b':
360 		if (*state == havezero) {
361 			if (*base == 0 || *base == 2) {
362 				*state = haveprefix;
363 				*base = 2;
364 				return 1;
365 			}
366 		}
367 		/* FALL THROUGH */
368 	case 'a':
369 	case 'c':
370 	case 'd':
371 	case 'e':
372 	case 'f':
373 		if (*state == begin ||
374 		    *state == havesign ||
375 		    *state == havezero ||
376 		    *state == haveprefix ||
377 		    *state == any) {
378 			if (*base > c - 'a' + 10) {
379 				*state = any;
380 				return 1;
381 			}
382 		}
383 		break;
384 	case 'B':
385 		if (*state == havezero) {
386 			if (*base == 0 || *base == 2) {
387 				*state = haveprefix;
388 				*base = 2;
389 				return 1;
390 			}
391 		}
392 		/* FALL THROUGH */
393 	case 'A':
394 	case 'C':
395 	case 'D':
396 	case 'E':
397 	case 'F':
398 		if (*state == begin ||
399 		    *state == havesign ||
400 		    *state == havezero ||
401 		    *state == haveprefix ||
402 		    *state == any) {
403 			if (*base > c - 'A' + 10) {
404 				*state = any;
405 				return 1;
406 			}
407 		}
408 		break;
409 	case 'x':
410 	case 'X':
411 		if (*state == havezero) {
412 			if (*base == 0 || *base == 16) {
413 				*state = haveprefix;
414 				*base = 16;
415 				return 1;
416 			}
417 		}
418 		break;
419 	}
420 	return 0;
421 }
422 
423 /*
424  * Read an integer, storing it in buf.
425  *
426  * Return 0 on a match failure, and the number of characters read
427  * otherwise.
428  */
429 static __inline int
430 parseint(FILE *fp, char * __restrict buf, int width, int base)
431 {
432 	enum parseint_state state = begin;
433 	char *p;
434 	int c;
435 
436 	for (p = buf; width; width--) {
437 		c = __sgetc(fp);
438 		if (c == EOF)
439 			break;
440 		if (!parseint_fsm(c, &state, &base))
441 			break;
442 		*p++ = c;
443 	}
444 	/*
445 	 * If we only had a sign, push it back.  If we only had a 0b or 0x
446 	 * prefix (possibly preceded by a sign), we view it as "0" and
447 	 * push back the letter.  In all other cases, if we stopped
448 	 * because we read a non-number character, push it back.
449 	 */
450 	if (state == havesign) {
451 		p--;
452 		(void) __ungetc(*(u_char *)p, fp);
453 	} else if (state == haveprefix) {
454 		p--;
455 		(void) __ungetc(c, fp);
456 	} else if (width && c != EOF) {
457 		(void) __ungetc(c, fp);
458 	}
459 	return (p - buf);
460 }
461 
462 /*
463  * __vfscanf - MT-safe version
464  */
465 int
466 __vfscanf(FILE *fp, char const *fmt0, va_list ap)
467 {
468 	int ret;
469 
470 	FLOCKFILE_CANCELSAFE(fp);
471 	ret = __svfscanf(fp, __get_locale(), fmt0, ap);
472 	FUNLOCKFILE_CANCELSAFE();
473 	return (ret);
474 }
475 int
476 vfscanf_l(FILE *fp, locale_t locale, char const *fmt0, va_list ap)
477 {
478 	int ret;
479 	FIX_LOCALE(locale);
480 
481 	FLOCKFILE_CANCELSAFE(fp);
482 	ret = __svfscanf(fp, locale, fmt0, ap);
483 	FUNLOCKFILE_CANCELSAFE();
484 	return (ret);
485 }
486 
487 /*
488  * __svfscanf - non-MT-safe version of __vfscanf
489  */
490 int
491 __svfscanf(FILE *fp, locale_t locale, const char *fmt0, va_list ap)
492 {
493 #define	GETARG(type)	((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type))
494 	const u_char *fmt = (const u_char *)fmt0;
495 	int c;			/* character from format, or conversion */
496 	size_t width;		/* field width, or 0 */
497 	int flags;		/* flags as defined above */
498 	int nassigned;		/* number of fields assigned */
499 	int nconversions;	/* number of conversions */
500 	int nr;			/* characters read by the current conversion */
501 	int nread;		/* number of characters consumed from fp */
502 	int base;		/* base argument to conversion function */
503 	char ccltab[256];	/* character class table for %[...] */
504 	char buf[BUF];		/* buffer for numeric conversions */
505 
506 	ORIENT(fp, -1);
507 
508 	nassigned = 0;
509 	nconversions = 0;
510 	nread = 0;
511 	for (;;) {
512 		c = *fmt++;
513 		if (c == 0)
514 			return (nassigned);
515 		if (isspace(c)) {
516 			while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p))
517 				nread++, fp->_r--, fp->_p++;
518 			continue;
519 		}
520 		if (c != '%')
521 			goto literal;
522 		width = 0;
523 		flags = 0;
524 		/*
525 		 * switch on the format.  continue if done;
526 		 * break once format type is derived.
527 		 */
528 again:		c = *fmt++;
529 		switch (c) {
530 		case '%':
531 literal:
532 			if (fp->_r <= 0 && __srefill(fp))
533 				goto input_failure;
534 			if (*fp->_p != c)
535 				goto match_failure;
536 			fp->_r--, fp->_p++;
537 			nread++;
538 			continue;
539 
540 		case '*':
541 			flags |= SUPPRESS;
542 			goto again;
543 		case 'j':
544 			flags |= INTMAXT;
545 			goto again;
546 		case 'l':
547 			if (flags & LONG) {
548 				flags &= ~LONG;
549 				flags |= LONGLONG;
550 			} else
551 				flags |= LONG;
552 			goto again;
553 		case 'q':
554 			flags |= LONGLONG;	/* not quite */
555 			goto again;
556 		case 't':
557 			flags |= PTRDIFFT;
558 			goto again;
559 		case 'w':
560 			/*
561 			 * Fixed-width integer types.  On all platforms we
562 			 * support, int8_t is equivalent to char, int16_t
563 			 * is equivalent to short, int32_t is equivalent
564 			 * to int, int64_t is equivalent to long long int.
565 			 * Furthermore, int_fast8_t, int_fast16_t and
566 			 * int_fast32_t are equivalent to int, and
567 			 * int_fast64_t is equivalent to long long int.
568 			 */
569 			flags &= ~(SHORTSHORT|SHORT|LONG|LONGLONG|SIZET|INTMAXT|PTRDIFFT);
570 			if (fmt[0] == 'f') {
571 				flags |= FASTINT;
572 				fmt++;
573 			} else {
574 				flags &= ~FASTINT;
575 			}
576 			if (fmt[0] == '8') {
577 				if (!(flags & FASTINT))
578 					flags |= SHORTSHORT;
579 				else
580 					/* no flag set = 32 */ ;
581 				fmt += 1;
582 			} else if (fmt[0] == '1' && fmt[1] == '6') {
583 				if (!(flags & FASTINT))
584 					flags |= SHORT;
585 				else
586 					/* no flag set = 32 */ ;
587 				fmt += 2;
588 			} else if (fmt[0] == '3' && fmt[1] == '2') {
589 				/* no flag set = 32 */ ;
590 				fmt += 2;
591 			} else if (fmt[0] == '6' && fmt[1] == '4') {
592 				flags |= LONGLONG;
593 				fmt += 2;
594 			} else {
595 				goto match_failure;
596 			}
597 			goto again;
598 		case 'z':
599 			flags |= SIZET;
600 			goto again;
601 		case 'L':
602 			flags |= LONGDBL;
603 			goto again;
604 		case 'h':
605 			if (flags & SHORT) {
606 				flags &= ~SHORT;
607 				flags |= SHORTSHORT;
608 			} else
609 				flags |= SHORT;
610 			goto again;
611 
612 		case '0': case '1': case '2': case '3': case '4':
613 		case '5': case '6': case '7': case '8': case '9':
614 			width = width * 10 + c - '0';
615 			goto again;
616 
617 		/*
618 		 * Conversions.
619 		 */
620 		case 'B':
621 		case 'b':
622 			c = CT_INT;
623 			flags |= UNSIGNED;
624 			base = 2;
625 			break;
626 
627 		case 'd':
628 			c = CT_INT;
629 			base = 10;
630 			break;
631 
632 		case 'i':
633 			c = CT_INT;
634 			base = 0;
635 			break;
636 
637 		case 'o':
638 			c = CT_INT;
639 			flags |= UNSIGNED;
640 			base = 8;
641 			break;
642 
643 		case 'u':
644 			c = CT_INT;
645 			flags |= UNSIGNED;
646 			base = 10;
647 			break;
648 
649 		case 'X':
650 		case 'x':
651 			c = CT_INT;
652 			flags |= UNSIGNED;
653 			base = 16;
654 			break;
655 
656 #ifndef NO_FLOATING_POINT
657 		case 'A': case 'E': case 'F': case 'G':
658 		case 'a': case 'e': case 'f': case 'g':
659 			c = CT_FLOAT;
660 			break;
661 #endif
662 
663 		case 'S':
664 			flags |= LONG;
665 			/* FALLTHROUGH */
666 		case 's':
667 			c = CT_STRING;
668 			break;
669 
670 		case '[':
671 			fmt = __sccl(ccltab, fmt);
672 			flags |= NOSKIP;
673 			c = CT_CCL;
674 			break;
675 
676 		case 'C':
677 			flags |= LONG;
678 			/* FALLTHROUGH */
679 		case 'c':
680 			flags |= NOSKIP;
681 			c = CT_CHAR;
682 			break;
683 
684 		case 'p':	/* pointer format is like hex */
685 			flags |= POINTER;
686 			c = CT_INT;		/* assumes sizeof(uintmax_t) */
687 			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
688 			base = 16;
689 			break;
690 
691 		case 'n':
692 			if (flags & SUPPRESS)	/* ??? */
693 				continue;
694 			if (flags & SHORTSHORT)
695 				*va_arg(ap, char *) = nread;
696 			else if (flags & SHORT)
697 				*va_arg(ap, short *) = nread;
698 			else if (flags & LONG)
699 				*va_arg(ap, long *) = nread;
700 			else if (flags & LONGLONG)
701 				*va_arg(ap, long long *) = nread;
702 			else if (flags & INTMAXT)
703 				*va_arg(ap, intmax_t *) = nread;
704 			else if (flags & SIZET)
705 				*va_arg(ap, size_t *) = nread;
706 			else if (flags & PTRDIFFT)
707 				*va_arg(ap, ptrdiff_t *) = nread;
708 			else
709 				*va_arg(ap, int *) = nread;
710 			continue;
711 
712 		default:
713 			goto match_failure;
714 
715 		/*
716 		 * Disgusting backwards compatibility hack.	XXX
717 		 */
718 		case '\0':	/* compat */
719 			return (EOF);
720 		}
721 
722 		/*
723 		 * We have a conversion that requires input.
724 		 */
725 		if (fp->_r <= 0 && __srefill(fp))
726 			goto input_failure;
727 
728 		/*
729 		 * Consume leading white space, except for formats
730 		 * that suppress this.
731 		 */
732 		if ((flags & NOSKIP) == 0) {
733 			while (isspace(*fp->_p)) {
734 				nread++;
735 				if (--fp->_r > 0)
736 					fp->_p++;
737 				else if (__srefill(fp))
738 					goto input_failure;
739 			}
740 			/*
741 			 * Note that there is at least one character in
742 			 * the buffer, so conversions that do not set NOSKIP
743 			 * ca no longer result in an input failure.
744 			 */
745 		}
746 
747 		/*
748 		 * Do the conversion.
749 		 */
750 		switch (c) {
751 
752 		case CT_CHAR:
753 			/* scan arbitrary characters (sets NOSKIP) */
754 			if (width == 0)
755 				width = 1;
756 			if (flags & LONG) {
757 				nr = convert_wchar(fp, GETARG(wchar_t *),
758 				    width, locale);
759 			} else {
760 				nr = convert_char(fp, GETARG(char *), width);
761 			}
762 			if (nr < 0)
763 				goto input_failure;
764 			break;
765 
766 		case CT_CCL:
767 			/* scan a (nonempty) character class (sets NOSKIP) */
768 			if (width == 0)
769 				width = (size_t)~0;	/* `infinity' */
770 			if (flags & LONG) {
771 				nr = convert_wccl(fp, GETARG(wchar_t *), width,
772 				    ccltab, locale);
773 			} else {
774 				nr = convert_ccl(fp, GETARG(char *), width,
775 				    ccltab);
776 			}
777 			if (nr <= 0) {
778 				if (nr < 0)
779 					goto input_failure;
780 				else /* nr == 0 */
781 					goto match_failure;
782 			}
783 			break;
784 
785 		case CT_STRING:
786 			/* like CCL, but zero-length string OK, & no NOSKIP */
787 			if (width == 0)
788 				width = (size_t)~0;
789 			if (flags & LONG) {
790 				nr = convert_wstring(fp, GETARG(wchar_t *),
791 				    width, locale);
792 			} else {
793 				nr = convert_string(fp, GETARG(char *), width);
794 			}
795 			if (nr < 0)
796 				goto input_failure;
797 			break;
798 
799 		case CT_INT:
800 			/* scan an integer as if by the conversion function */
801 #ifdef hardway
802 			if (width == 0 || width > sizeof(buf) - 1)
803 				width = sizeof(buf) - 1;
804 #else
805 			/* size_t is unsigned, hence this optimisation */
806 			if (--width > sizeof(buf) - 2)
807 				width = sizeof(buf) - 2;
808 			width++;
809 #endif
810 			nr = parseint(fp, buf, width, base);
811 			if (nr == 0)
812 				goto match_failure;
813 			if ((flags & SUPPRESS) == 0) {
814 				uintmax_t res;
815 
816 				buf[nr] = '\0';
817 				if ((flags & UNSIGNED) == 0)
818 				    res = strtoimax_l(buf, (char **)NULL, base, locale);
819 				else
820 				    res = strtoumax_l(buf, (char **)NULL, base, locale);
821 				if (flags & POINTER)
822 					*va_arg(ap, void **) =
823 							(void *)(uintptr_t)res;
824 				else if (flags & SHORTSHORT)
825 					*va_arg(ap, char *) = res;
826 				else if (flags & SHORT)
827 					*va_arg(ap, short *) = res;
828 				else if (flags & LONG)
829 					*va_arg(ap, long *) = res;
830 				else if (flags & LONGLONG)
831 					*va_arg(ap, long long *) = res;
832 				else if (flags & INTMAXT)
833 					*va_arg(ap, intmax_t *) = res;
834 				else if (flags & PTRDIFFT)
835 					*va_arg(ap, ptrdiff_t *) = res;
836 				else if (flags & SIZET)
837 					*va_arg(ap, size_t *) = res;
838 				else
839 					*va_arg(ap, int *) = res;
840 			}
841 			break;
842 
843 #ifndef NO_FLOATING_POINT
844 		case CT_FLOAT:
845 			/* scan a floating point number as if by strtod */
846 			if (width == 0 || width > sizeof(buf) - 1)
847 				width = sizeof(buf) - 1;
848 			nr = parsefloat(fp, buf, buf + width, locale);
849 			if (nr == 0)
850 				goto match_failure;
851 			if ((flags & SUPPRESS) == 0) {
852 				if (flags & LONGDBL) {
853 					long double res = strtold_l(buf, NULL,
854 					    locale);
855 					*va_arg(ap, long double *) = res;
856 				} else if (flags & LONG) {
857 					double res = strtod_l(buf, NULL,
858 					    locale);
859 					*va_arg(ap, double *) = res;
860 				} else {
861 					float res = strtof_l(buf, NULL, locale);
862 					*va_arg(ap, float *) = res;
863 				}
864 			}
865 			break;
866 #endif /* !NO_FLOATING_POINT */
867 		}
868 		if (!(flags & SUPPRESS))
869 			nassigned++;
870 		nread += nr;
871 		nconversions++;
872 	}
873 input_failure:
874 	return (nconversions != 0 ? nassigned : EOF);
875 match_failure:
876 	return (nassigned);
877 }
878 
879 /*
880  * Fill in the given table from the scanset at the given format
881  * (just after `[').  Return a pointer to the character past the
882  * closing `]'.  The table has a 1 wherever characters should be
883  * considered part of the scanset.
884  */
885 static const u_char *
886 __sccl(char *tab, const u_char *fmt)
887 {
888 	int c, n, v, i;
889 	struct xlocale_collate *table =
890 		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
891 
892 	/* first `clear' the whole table */
893 	c = *fmt++;		/* first char hat => negated scanset */
894 	if (c == '^') {
895 		v = 1;		/* default => accept */
896 		c = *fmt++;	/* get new first char */
897 	} else
898 		v = 0;		/* default => reject */
899 
900 	/* XXX: Will not work if sizeof(tab*) > sizeof(char) */
901 	(void) memset(tab, v, 256);
902 
903 	if (c == 0)
904 		return (fmt - 1);/* format ended before closing ] */
905 
906 	/*
907 	 * Now set the entries corresponding to the actual scanset
908 	 * to the opposite of the above.
909 	 *
910 	 * The first character may be ']' (or '-') without being special;
911 	 * the last character may be '-'.
912 	 */
913 	v = 1 - v;
914 	for (;;) {
915 		tab[c] = v;		/* take character c */
916 doswitch:
917 		n = *fmt++;		/* and examine the next */
918 		switch (n) {
919 
920 		case 0:			/* format ended too soon */
921 			return (fmt - 1);
922 
923 		case '-':
924 			/*
925 			 * A scanset of the form
926 			 *	[01+-]
927 			 * is defined as `the digit 0, the digit 1,
928 			 * the character +, the character -', but
929 			 * the effect of a scanset such as
930 			 *	[a-zA-Z0-9]
931 			 * is implementation defined.  The V7 Unix
932 			 * scanf treats `a-z' as `the letters a through
933 			 * z', but treats `a-a' as `the letter a, the
934 			 * character -, and the letter a'.
935 			 *
936 			 * For compatibility, the `-' is not considered
937 			 * to define a range if the character following
938 			 * it is either a close bracket (required by ANSI)
939 			 * or is not numerically greater than the character
940 			 * we just stored in the table (c).
941 			 */
942 			n = *fmt;
943 			if (n == ']'
944 			    || (table->__collate_load_error ? n < c :
945 				__collate_range_cmp(n, c) < 0
946 			       )
947 			   ) {
948 				c = '-';
949 				break;	/* resume the for(;;) */
950 			}
951 			fmt++;
952 			/* fill in the range */
953 			if (table->__collate_load_error) {
954 				do {
955 					tab[++c] = v;
956 				} while (c < n);
957 			} else {
958 				for (i = 0; i < 256; i ++)
959 					if (__collate_range_cmp(c, i) <= 0 &&
960 					    __collate_range_cmp(i, n) <= 0
961 					   )
962 						tab[i] = v;
963 			}
964 #if 1	/* XXX another disgusting compatibility hack */
965 			c = n;
966 			/*
967 			 * Alas, the V7 Unix scanf also treats formats
968 			 * such as [a-c-e] as `the letters a through e'.
969 			 * This too is permitted by the standard....
970 			 */
971 			goto doswitch;
972 #else
973 			c = *fmt++;
974 			if (c == 0)
975 				return (fmt - 1);
976 			if (c == ']')
977 				return (fmt);
978 #endif
979 			break;
980 
981 		case ']':		/* end of scanset */
982 			return (fmt);
983 
984 		default:		/* just another character */
985 			c = n;
986 			break;
987 		}
988 	}
989 	/* NOTREACHED */
990 }
991 
992 #ifndef NO_FLOATING_POINT
993 static int
994 parsefloat(FILE *fp, char *buf, char *end, locale_t locale)
995 {
996 	char *commit, *p;
997 	int infnanpos = 0, decptpos = 0;
998 	enum {
999 		S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX,
1000 		S_DIGITS, S_DECPT, S_FRAC, S_EXP, S_EXPDIGITS
1001 	} state = S_START;
1002 	unsigned char c;
1003 	const char *decpt = localeconv_l(locale)->decimal_point;
1004 	_Bool gotmantdig = 0, ishex = 0;
1005 
1006 	/*
1007 	 * We set commit = p whenever the string we have read so far
1008 	 * constitutes a valid representation of a floating point
1009 	 * number by itself.  At some point, the parse will complete
1010 	 * or fail, and we will ungetc() back to the last commit point.
1011 	 * To ensure that the file offset gets updated properly, it is
1012 	 * always necessary to read at least one character that doesn't
1013 	 * match; thus, we can't short-circuit "infinity" or "nan(...)".
1014 	 */
1015 	commit = buf - 1;
1016 	for (p = buf; p < end; ) {
1017 		c = *fp->_p;
1018 reswitch:
1019 		switch (state) {
1020 		case S_START:
1021 			state = S_GOTSIGN;
1022 			if (c == '-' || c == '+')
1023 				break;
1024 			else
1025 				goto reswitch;
1026 		case S_GOTSIGN:
1027 			switch (c) {
1028 			case '0':
1029 				state = S_MAYBEHEX;
1030 				commit = p;
1031 				break;
1032 			case 'I':
1033 			case 'i':
1034 				state = S_INF;
1035 				break;
1036 			case 'N':
1037 			case 'n':
1038 				state = S_NAN;
1039 				break;
1040 			default:
1041 				state = S_DIGITS;
1042 				goto reswitch;
1043 			}
1044 			break;
1045 		case S_INF:
1046 			if (infnanpos > 6 ||
1047 			    (c != "nfinity"[infnanpos] &&
1048 			     c != "NFINITY"[infnanpos]))
1049 				goto parsedone;
1050 			if (infnanpos == 1 || infnanpos == 6)
1051 				commit = p;	/* inf or infinity */
1052 			infnanpos++;
1053 			break;
1054 		case S_NAN:
1055 			switch (infnanpos) {
1056 			case 0:
1057 				if (c != 'A' && c != 'a')
1058 					goto parsedone;
1059 				break;
1060 			case 1:
1061 				if (c != 'N' && c != 'n')
1062 					goto parsedone;
1063 				else
1064 					commit = p;
1065 				break;
1066 			case 2:
1067 				if (c != '(')
1068 					goto parsedone;
1069 				break;
1070 			default:
1071 				if (c == ')') {
1072 					commit = p;
1073 					state = S_DONE;
1074 				} else if (!isalnum(c) && c != '_')
1075 					goto parsedone;
1076 				break;
1077 			}
1078 			infnanpos++;
1079 			break;
1080 		case S_DONE:
1081 			goto parsedone;
1082 		case S_MAYBEHEX:
1083 			state = S_DIGITS;
1084 			if (c == 'X' || c == 'x') {
1085 				ishex = 1;
1086 				break;
1087 			} else {	/* we saw a '0', but no 'x' */
1088 				gotmantdig = 1;
1089 				goto reswitch;
1090 			}
1091 		case S_DIGITS:
1092 			if ((ishex && isxdigit(c)) || isdigit(c)) {
1093 				gotmantdig = 1;
1094 				commit = p;
1095 				break;
1096 			} else {
1097 				state = S_DECPT;
1098 				goto reswitch;
1099 			}
1100 		case S_DECPT:
1101 			if (c == decpt[decptpos]) {
1102 				if (decpt[++decptpos] == '\0') {
1103 					/* We read the complete decpt seq. */
1104 					state = S_FRAC;
1105 					if (gotmantdig)
1106 						commit = p;
1107 				}
1108 				break;
1109 			} else if (!decptpos) {
1110 				/* We didn't read any decpt characters. */
1111 				state = S_FRAC;
1112 				goto reswitch;
1113 			} else {
1114 				/*
1115 				 * We read part of a multibyte decimal point,
1116 				 * but the rest is invalid, so bail.
1117 				 */
1118 				goto parsedone;
1119 			}
1120 		case S_FRAC:
1121 			if (((c == 'E' || c == 'e') && !ishex) ||
1122 			    ((c == 'P' || c == 'p') && ishex)) {
1123 				if (!gotmantdig)
1124 					goto parsedone;
1125 				else
1126 					state = S_EXP;
1127 			} else if ((ishex && isxdigit(c)) || isdigit(c)) {
1128 				commit = p;
1129 				gotmantdig = 1;
1130 			} else
1131 				goto parsedone;
1132 			break;
1133 		case S_EXP:
1134 			state = S_EXPDIGITS;
1135 			if (c == '-' || c == '+')
1136 				break;
1137 			else
1138 				goto reswitch;
1139 		case S_EXPDIGITS:
1140 			if (isdigit(c))
1141 				commit = p;
1142 			else
1143 				goto parsedone;
1144 			break;
1145 		default:
1146 			abort();
1147 		}
1148 		*p++ = c;
1149 		if (--fp->_r > 0)
1150 			fp->_p++;
1151 		else if (__srefill(fp))
1152 			break;	/* EOF */
1153 	}
1154 
1155 parsedone:
1156 	while (commit < --p)
1157 		__ungetc(*(u_char *)p, fp);
1158 	*++commit = '\0';
1159 	return (commit - buf);
1160 }
1161 #endif
1162