xref: /freebsd/lib/libc/stdio/vfscanf.c (revision 51015e6d0f570239b0c2088dc6cf2b018928375d)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1990, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Copyright (c) 2011 The FreeBSD Foundation
8  *
9  * Portions of this software were developed by David Chisnall
10  * under sponsorship from the FreeBSD Foundation.
11  *
12  * This code is derived from software contributed to Berkeley by
13  * Chris Torek.
14  *
15  * Redistribution and use in source and binary forms, with or without
16  * modification, are permitted provided that the following conditions
17  * are met:
18  * 1. Redistributions of source code must retain the above copyright
19  *    notice, this list of conditions and the following disclaimer.
20  * 2. Redistributions in binary form must reproduce the above copyright
21  *    notice, this list of conditions and the following disclaimer in the
22  *    documentation and/or other materials provided with the distribution.
23  * 3. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  */
39 
40 #if defined(LIBC_SCCS) && !defined(lint)
41 static char sccsid[] = "@(#)vfscanf.c	8.1 (Berkeley) 6/4/93";
42 #endif /* LIBC_SCCS and not lint */
43 #include <sys/cdefs.h>
44 __FBSDID("$FreeBSD$");
45 
46 #include "namespace.h"
47 #include <ctype.h>
48 #include <inttypes.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <stddef.h>
52 #include <stdarg.h>
53 #include <string.h>
54 #include <wchar.h>
55 #include <wctype.h>
56 #include "un-namespace.h"
57 
58 #include "collate.h"
59 #include "libc_private.h"
60 #include "local.h"
61 #include "xlocale_private.h"
62 
63 #ifndef NO_FLOATING_POINT
64 #include <locale.h>
65 #endif
66 
67 #define	BUF		513	/* Maximum length of numeric string. */
68 
69 /*
70  * Flags used during conversion.
71  */
72 #define	LONG		0x01	/* l: long or double */
73 #define	LONGDBL		0x02	/* L: long double */
74 #define	SHORT		0x04	/* h: short */
75 #define	SUPPRESS	0x08	/* *: suppress assignment */
76 #define	POINTER		0x10	/* p: void * (as hex) */
77 #define	NOSKIP		0x20	/* [ or c: do not skip blanks */
78 #define	LONGLONG	0x400	/* ll: long long (+ deprecated q: quad) */
79 #define	INTMAXT		0x800	/* j: intmax_t */
80 #define	PTRDIFFT	0x1000	/* t: ptrdiff_t */
81 #define	SIZET		0x2000	/* z: size_t */
82 #define	SHORTSHORT	0x4000	/* hh: char */
83 #define	UNSIGNED	0x8000	/* %[oupxX] conversions */
84 
85 /*
86  * The following are used in integral conversions only:
87  * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
88  */
89 #define	SIGNOK		0x40	/* +/- is (still) legal */
90 #define	NDIGITS		0x80	/* no digits detected */
91 #define	PFXOK		0x100	/* 0x prefix is (still) legal */
92 #define	NZDIGITS	0x200	/* no zero digits detected */
93 #define	HAVESIGN	0x10000	/* sign detected */
94 
95 /*
96  * Conversion types.
97  */
98 #define	CT_CHAR		0	/* %c conversion */
99 #define	CT_CCL		1	/* %[...] conversion */
100 #define	CT_STRING	2	/* %s conversion */
101 #define	CT_INT		3	/* %[dioupxX] conversion */
102 #define	CT_FLOAT	4	/* %[efgEFG] conversion */
103 
104 static const u_char *__sccl(char *, const u_char *);
105 #ifndef NO_FLOATING_POINT
106 static int parsefloat(FILE *, char *, char *, locale_t);
107 #endif
108 
109 __weak_reference(__vfscanf, vfscanf);
110 
111 /*
112  * Conversion functions are passed a pointer to this object instead of
113  * a real parameter to indicate that the assignment-suppression (*)
114  * flag was specified.  We could use a NULL pointer to indicate this,
115  * but that would mask bugs in applications that call scanf() with a
116  * NULL pointer.
117  */
118 static const int suppress;
119 #define	SUPPRESS_PTR	((void *)&suppress)
120 
121 static const mbstate_t initial_mbs;
122 
123 /*
124  * The following conversion functions return the number of characters consumed,
125  * or -1 on input failure.  Character class conversion returns 0 on match
126  * failure.
127  */
128 
129 static __inline int
130 convert_char(FILE *fp, char * p, int width)
131 {
132 	int n;
133 
134 	if (p == SUPPRESS_PTR) {
135 		size_t sum = 0;
136 		for (;;) {
137 			if ((n = fp->_r) < width) {
138 				sum += n;
139 				width -= n;
140 				fp->_p += n;
141 				if (__srefill(fp)) {
142 					if (sum == 0)
143 						return (-1);
144 					break;
145 				}
146 			} else {
147 				sum += width;
148 				fp->_r -= width;
149 				fp->_p += width;
150 				break;
151 			}
152 		}
153 		return (sum);
154 	} else {
155 		size_t r = __fread(p, 1, width, fp);
156 
157 		if (r == 0)
158 			return (-1);
159 		return (r);
160 	}
161 }
162 
163 static __inline int
164 convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale)
165 {
166 	mbstate_t mbs;
167 	int n, nread;
168 	wint_t wi;
169 
170 	mbs = initial_mbs;
171 	n = 0;
172 	while (width-- != 0 &&
173 	    (wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF) {
174 		if (wcp != SUPPRESS_PTR)
175 			*wcp++ = (wchar_t)wi;
176 		n += nread;
177 	}
178 	if (n == 0)
179 		return (-1);
180 	return (n);
181 }
182 
183 static __inline int
184 convert_ccl(FILE *fp, char * p, int width, const char *ccltab)
185 {
186 	char *p0;
187 	int n;
188 
189 	if (p == SUPPRESS_PTR) {
190 		n = 0;
191 		while (ccltab[*fp->_p]) {
192 			n++, fp->_r--, fp->_p++;
193 			if (--width == 0)
194 				break;
195 			if (fp->_r <= 0 && __srefill(fp)) {
196 				if (n == 0)
197 					return (-1);
198 				break;
199 			}
200 		}
201 	} else {
202 		p0 = p;
203 		while (ccltab[*fp->_p]) {
204 			fp->_r--;
205 			*p++ = *fp->_p++;
206 			if (--width == 0)
207 				break;
208 			if (fp->_r <= 0 && __srefill(fp)) {
209 				if (p == p0)
210 					return (-1);
211 				break;
212 			}
213 		}
214 		n = p - p0;
215 		if (n == 0)
216 			return (0);
217 		*p = 0;
218 	}
219 	return (n);
220 }
221 
222 static __inline int
223 convert_wccl(FILE *fp, wchar_t *wcp, int width, const char *ccltab,
224     locale_t locale)
225 {
226 	mbstate_t mbs;
227 	wint_t wi;
228 	int n, nread;
229 
230 	mbs = initial_mbs;
231 	n = 0;
232 	if (wcp == SUPPRESS_PTR) {
233 		while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
234 		    width-- != 0 && ccltab[wctob(wi)])
235 			n += nread;
236 		if (wi != WEOF)
237 			__ungetwc(wi, fp, __get_locale());
238 	} else {
239 		while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
240 		    width-- != 0 && ccltab[wctob(wi)]) {
241 			*wcp++ = (wchar_t)wi;
242 			n += nread;
243 		}
244 		if (wi != WEOF)
245 			__ungetwc(wi, fp, __get_locale());
246 		if (n == 0)
247 			return (0);
248 		*wcp = 0;
249 	}
250 	return (n);
251 }
252 
253 static __inline int
254 convert_string(FILE *fp, char * p, int width)
255 {
256 	char *p0;
257 	int n;
258 
259 	if (p == SUPPRESS_PTR) {
260 		n = 0;
261 		while (!isspace(*fp->_p)) {
262 			n++, fp->_r--, fp->_p++;
263 			if (--width == 0)
264 				break;
265 			if (fp->_r <= 0 && __srefill(fp))
266 				break;
267 		}
268 	} else {
269 		p0 = p;
270 		while (!isspace(*fp->_p)) {
271 			fp->_r--;
272 			*p++ = *fp->_p++;
273 			if (--width == 0)
274 				break;
275 			if (fp->_r <= 0 && __srefill(fp))
276 				break;
277 		}
278 		*p = 0;
279 		n = p - p0;
280 	}
281 	return (n);
282 }
283 
284 static __inline int
285 convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
286 {
287 	mbstate_t mbs;
288 	wint_t wi;
289 	int n, nread;
290 
291 	mbs = initial_mbs;
292 	n = 0;
293 	if (wcp == SUPPRESS_PTR) {
294 		while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
295 		    width-- != 0 && !iswspace(wi))
296 			n += nread;
297 		if (wi != WEOF)
298 			__ungetwc(wi, fp, __get_locale());
299 	} else {
300 		while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
301 		    width-- != 0 && !iswspace(wi)) {
302 			*wcp++ = (wchar_t)wi;
303 			n += nread;
304 		}
305 		if (wi != WEOF)
306 			__ungetwc(wi, fp, __get_locale());
307 		*wcp = '\0';
308 	}
309 	return (n);
310 }
311 
312 /*
313  * Read an integer, storing it in buf.  The only relevant bit in the
314  * flags argument is PFXOK.
315  *
316  * Return 0 on a match failure, and the number of characters read
317  * otherwise.
318  */
319 static __inline int
320 parseint(FILE *fp, char * __restrict buf, int width, int base, int flags)
321 {
322 	/* `basefix' is used to avoid `if' tests */
323 	static const short basefix[17] =
324 		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
325 	char *p;
326 	int c;
327 
328 	flags |= SIGNOK | NDIGITS | NZDIGITS;
329 	for (p = buf; width; width--) {
330 		c = *fp->_p;
331 		/*
332 		 * Switch on the character; `goto ok' if we accept it
333 		 * as a part of number.
334 		 */
335 		switch (c) {
336 
337 		/*
338 		 * The digit 0 is always legal, but is special.  For
339 		 * %i conversions, if no digits (zero or nonzero) have
340 		 * been scanned (only signs), we will have base==0.
341 		 * In that case, we should set it to 8 and enable 0x
342 		 * prefixing.  Also, if we have not scanned zero
343 		 * digits before this, do not turn off prefixing
344 		 * (someone else will turn it off if we have scanned
345 		 * any nonzero digits).
346 		 */
347 		case '0':
348 			if (base == 0) {
349 				base = 8;
350 				flags |= PFXOK;
351 			}
352 			if (flags & NZDIGITS)
353 				flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
354 			else
355 				flags &= ~(SIGNOK|PFXOK|NDIGITS);
356 			goto ok;
357 
358 		/* 1 through 7 always legal */
359 		case '1': case '2': case '3':
360 		case '4': case '5': case '6': case '7':
361 			base = basefix[base];
362 			flags &= ~(SIGNOK | PFXOK | NDIGITS);
363 			goto ok;
364 
365 		/* digits 8 and 9 ok iff decimal or hex */
366 		case '8': case '9':
367 			base = basefix[base];
368 			if (base <= 8)
369 				break;	/* not legal here */
370 			flags &= ~(SIGNOK | PFXOK | NDIGITS);
371 			goto ok;
372 
373 		/* letters ok iff hex */
374 		case 'A': case 'B': case 'C':
375 		case 'D': case 'E': case 'F':
376 		case 'a': case 'b': case 'c':
377 		case 'd': case 'e': case 'f':
378 			/* no need to fix base here */
379 			if (base <= 10)
380 				break;	/* not legal here */
381 			flags &= ~(SIGNOK | PFXOK | NDIGITS);
382 			goto ok;
383 
384 		/* sign ok only as first character */
385 		case '+': case '-':
386 			if (flags & SIGNOK) {
387 				flags &= ~SIGNOK;
388 				flags |= HAVESIGN;
389 				goto ok;
390 			}
391 			break;
392 
393 		/*
394 		 * x ok iff flag still set & 2nd char (or 3rd char if
395 		 * we have a sign).
396 		 */
397 		case 'x': case 'X':
398 			if (flags & PFXOK && p ==
399 			    buf + 1 + !!(flags & HAVESIGN)) {
400 				base = 16;	/* if %i */
401 				flags &= ~PFXOK;
402 				goto ok;
403 			}
404 			break;
405 		}
406 
407 		/*
408 		 * If we got here, c is not a legal character for a
409 		 * number.  Stop accumulating digits.
410 		 */
411 		break;
412 	ok:
413 		/*
414 		 * c is legal: store it and look at the next.
415 		 */
416 		*p++ = c;
417 		if (--fp->_r > 0)
418 			fp->_p++;
419 		else if (__srefill(fp))
420 			break;		/* EOF */
421 	}
422 	/*
423 	 * If we had only a sign, it is no good; push back the sign.
424 	 * If the number ends in `x', it was [sign] '0' 'x', so push
425 	 * back the x and treat it as [sign] '0'.
426 	 */
427 	if (flags & NDIGITS) {
428 		if (p > buf)
429 			(void) __ungetc(*(u_char *)--p, fp);
430 		return (0);
431 	}
432 	c = ((u_char *)p)[-1];
433 	if (c == 'x' || c == 'X') {
434 		--p;
435 		(void) __ungetc(c, fp);
436 	}
437 	return (p - buf);
438 }
439 
440 /*
441  * __vfscanf - MT-safe version
442  */
443 int
444 __vfscanf(FILE *fp, char const *fmt0, va_list ap)
445 {
446 	int ret;
447 
448 	FLOCKFILE_CANCELSAFE(fp);
449 	ret = __svfscanf(fp, __get_locale(), fmt0, ap);
450 	FUNLOCKFILE_CANCELSAFE();
451 	return (ret);
452 }
453 int
454 vfscanf_l(FILE *fp, locale_t locale, char const *fmt0, va_list ap)
455 {
456 	int ret;
457 	FIX_LOCALE(locale);
458 
459 	FLOCKFILE_CANCELSAFE(fp);
460 	ret = __svfscanf(fp, locale, fmt0, ap);
461 	FUNLOCKFILE_CANCELSAFE();
462 	return (ret);
463 }
464 
465 /*
466  * __svfscanf - non-MT-safe version of __vfscanf
467  */
468 int
469 __svfscanf(FILE *fp, locale_t locale, const char *fmt0, va_list ap)
470 {
471 #define	GETARG(type)	((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type))
472 	const u_char *fmt = (const u_char *)fmt0;
473 	int c;			/* character from format, or conversion */
474 	size_t width;		/* field width, or 0 */
475 	int flags;		/* flags as defined above */
476 	int nassigned;		/* number of fields assigned */
477 	int nconversions;	/* number of conversions */
478 	int nr;			/* characters read by the current conversion */
479 	int nread;		/* number of characters consumed from fp */
480 	int base;		/* base argument to conversion function */
481 	char ccltab[256];	/* character class table for %[...] */
482 	char buf[BUF];		/* buffer for numeric conversions */
483 
484 	ORIENT(fp, -1);
485 
486 	nassigned = 0;
487 	nconversions = 0;
488 	nread = 0;
489 	for (;;) {
490 		c = *fmt++;
491 		if (c == 0)
492 			return (nassigned);
493 		if (isspace(c)) {
494 			while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p))
495 				nread++, fp->_r--, fp->_p++;
496 			continue;
497 		}
498 		if (c != '%')
499 			goto literal;
500 		width = 0;
501 		flags = 0;
502 		/*
503 		 * switch on the format.  continue if done;
504 		 * break once format type is derived.
505 		 */
506 again:		c = *fmt++;
507 		switch (c) {
508 		case '%':
509 literal:
510 			if (fp->_r <= 0 && __srefill(fp))
511 				goto input_failure;
512 			if (*fp->_p != c)
513 				goto match_failure;
514 			fp->_r--, fp->_p++;
515 			nread++;
516 			continue;
517 
518 		case '*':
519 			flags |= SUPPRESS;
520 			goto again;
521 		case 'j':
522 			flags |= INTMAXT;
523 			goto again;
524 		case 'l':
525 			if (flags & LONG) {
526 				flags &= ~LONG;
527 				flags |= LONGLONG;
528 			} else
529 				flags |= LONG;
530 			goto again;
531 		case 'q':
532 			flags |= LONGLONG;	/* not quite */
533 			goto again;
534 		case 't':
535 			flags |= PTRDIFFT;
536 			goto again;
537 		case 'z':
538 			flags |= SIZET;
539 			goto again;
540 		case 'L':
541 			flags |= LONGDBL;
542 			goto again;
543 		case 'h':
544 			if (flags & SHORT) {
545 				flags &= ~SHORT;
546 				flags |= SHORTSHORT;
547 			} else
548 				flags |= SHORT;
549 			goto again;
550 
551 		case '0': case '1': case '2': case '3': case '4':
552 		case '5': case '6': case '7': case '8': case '9':
553 			width = width * 10 + c - '0';
554 			goto again;
555 
556 		/*
557 		 * Conversions.
558 		 */
559 		case 'd':
560 			c = CT_INT;
561 			base = 10;
562 			break;
563 
564 		case 'i':
565 			c = CT_INT;
566 			base = 0;
567 			break;
568 
569 		case 'o':
570 			c = CT_INT;
571 			flags |= UNSIGNED;
572 			base = 8;
573 			break;
574 
575 		case 'u':
576 			c = CT_INT;
577 			flags |= UNSIGNED;
578 			base = 10;
579 			break;
580 
581 		case 'X':
582 		case 'x':
583 			flags |= PFXOK;	/* enable 0x prefixing */
584 			c = CT_INT;
585 			flags |= UNSIGNED;
586 			base = 16;
587 			break;
588 
589 #ifndef NO_FLOATING_POINT
590 		case 'A': case 'E': case 'F': case 'G':
591 		case 'a': case 'e': case 'f': case 'g':
592 			c = CT_FLOAT;
593 			break;
594 #endif
595 
596 		case 'S':
597 			flags |= LONG;
598 			/* FALLTHROUGH */
599 		case 's':
600 			c = CT_STRING;
601 			break;
602 
603 		case '[':
604 			fmt = __sccl(ccltab, fmt);
605 			flags |= NOSKIP;
606 			c = CT_CCL;
607 			break;
608 
609 		case 'C':
610 			flags |= LONG;
611 			/* FALLTHROUGH */
612 		case 'c':
613 			flags |= NOSKIP;
614 			c = CT_CHAR;
615 			break;
616 
617 		case 'p':	/* pointer format is like hex */
618 			flags |= POINTER | PFXOK;
619 			c = CT_INT;		/* assumes sizeof(uintmax_t) */
620 			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
621 			base = 16;
622 			break;
623 
624 		case 'n':
625 			if (flags & SUPPRESS)	/* ??? */
626 				continue;
627 			if (flags & SHORTSHORT)
628 				*va_arg(ap, char *) = nread;
629 			else if (flags & SHORT)
630 				*va_arg(ap, short *) = nread;
631 			else if (flags & LONG)
632 				*va_arg(ap, long *) = nread;
633 			else if (flags & LONGLONG)
634 				*va_arg(ap, long long *) = nread;
635 			else if (flags & INTMAXT)
636 				*va_arg(ap, intmax_t *) = nread;
637 			else if (flags & SIZET)
638 				*va_arg(ap, size_t *) = nread;
639 			else if (flags & PTRDIFFT)
640 				*va_arg(ap, ptrdiff_t *) = nread;
641 			else
642 				*va_arg(ap, int *) = nread;
643 			continue;
644 
645 		default:
646 			goto match_failure;
647 
648 		/*
649 		 * Disgusting backwards compatibility hack.	XXX
650 		 */
651 		case '\0':	/* compat */
652 			return (EOF);
653 		}
654 
655 		/*
656 		 * We have a conversion that requires input.
657 		 */
658 		if (fp->_r <= 0 && __srefill(fp))
659 			goto input_failure;
660 
661 		/*
662 		 * Consume leading white space, except for formats
663 		 * that suppress this.
664 		 */
665 		if ((flags & NOSKIP) == 0) {
666 			while (isspace(*fp->_p)) {
667 				nread++;
668 				if (--fp->_r > 0)
669 					fp->_p++;
670 				else if (__srefill(fp))
671 					goto input_failure;
672 			}
673 			/*
674 			 * Note that there is at least one character in
675 			 * the buffer, so conversions that do not set NOSKIP
676 			 * ca no longer result in an input failure.
677 			 */
678 		}
679 
680 		/*
681 		 * Do the conversion.
682 		 */
683 		switch (c) {
684 
685 		case CT_CHAR:
686 			/* scan arbitrary characters (sets NOSKIP) */
687 			if (width == 0)
688 				width = 1;
689 			if (flags & LONG) {
690 				nr = convert_wchar(fp, GETARG(wchar_t *),
691 				    width, locale);
692 			} else {
693 				nr = convert_char(fp, GETARG(char *), width);
694 			}
695 			if (nr < 0)
696 				goto input_failure;
697 			break;
698 
699 		case CT_CCL:
700 			/* scan a (nonempty) character class (sets NOSKIP) */
701 			if (width == 0)
702 				width = (size_t)~0;	/* `infinity' */
703 			if (flags & LONG) {
704 				nr = convert_wccl(fp, GETARG(wchar_t *), width,
705 				    ccltab, locale);
706 			} else {
707 				nr = convert_ccl(fp, GETARG(char *), width,
708 				    ccltab);
709 			}
710 			if (nr <= 0) {
711 				if (nr < 0)
712 					goto input_failure;
713 				else /* nr == 0 */
714 					goto match_failure;
715 			}
716 			break;
717 
718 		case CT_STRING:
719 			/* like CCL, but zero-length string OK, & no NOSKIP */
720 			if (width == 0)
721 				width = (size_t)~0;
722 			if (flags & LONG) {
723 				nr = convert_wstring(fp, GETARG(wchar_t *),
724 				    width, locale);
725 			} else {
726 				nr = convert_string(fp, GETARG(char *), width);
727 			}
728 			if (nr < 0)
729 				goto input_failure;
730 			break;
731 
732 		case CT_INT:
733 			/* scan an integer as if by the conversion function */
734 #ifdef hardway
735 			if (width == 0 || width > sizeof(buf) - 1)
736 				width = sizeof(buf) - 1;
737 #else
738 			/* size_t is unsigned, hence this optimisation */
739 			if (--width > sizeof(buf) - 2)
740 				width = sizeof(buf) - 2;
741 			width++;
742 #endif
743 			nr = parseint(fp, buf, width, base, flags);
744 			if (nr == 0)
745 				goto match_failure;
746 			if ((flags & SUPPRESS) == 0) {
747 				uintmax_t res;
748 
749 				buf[nr] = '\0';
750 				if ((flags & UNSIGNED) == 0)
751 				    res = strtoimax_l(buf, (char **)NULL, base, locale);
752 				else
753 				    res = strtoumax_l(buf, (char **)NULL, base, locale);
754 				if (flags & POINTER)
755 					*va_arg(ap, void **) =
756 							(void *)(uintptr_t)res;
757 				else if (flags & SHORTSHORT)
758 					*va_arg(ap, char *) = res;
759 				else if (flags & SHORT)
760 					*va_arg(ap, short *) = res;
761 				else if (flags & LONG)
762 					*va_arg(ap, long *) = res;
763 				else if (flags & LONGLONG)
764 					*va_arg(ap, long long *) = res;
765 				else if (flags & INTMAXT)
766 					*va_arg(ap, intmax_t *) = res;
767 				else if (flags & PTRDIFFT)
768 					*va_arg(ap, ptrdiff_t *) = res;
769 				else if (flags & SIZET)
770 					*va_arg(ap, size_t *) = res;
771 				else
772 					*va_arg(ap, int *) = res;
773 			}
774 			break;
775 
776 #ifndef NO_FLOATING_POINT
777 		case CT_FLOAT:
778 			/* scan a floating point number as if by strtod */
779 			if (width == 0 || width > sizeof(buf) - 1)
780 				width = sizeof(buf) - 1;
781 			nr = parsefloat(fp, buf, buf + width, locale);
782 			if (nr == 0)
783 				goto match_failure;
784 			if ((flags & SUPPRESS) == 0) {
785 				if (flags & LONGDBL) {
786 					long double res = strtold_l(buf, NULL,
787 					    locale);
788 					*va_arg(ap, long double *) = res;
789 				} else if (flags & LONG) {
790 					double res = strtod_l(buf, NULL,
791 					    locale);
792 					*va_arg(ap, double *) = res;
793 				} else {
794 					float res = strtof_l(buf, NULL, locale);
795 					*va_arg(ap, float *) = res;
796 				}
797 			}
798 			break;
799 #endif /* !NO_FLOATING_POINT */
800 		}
801 		if (!(flags & SUPPRESS))
802 			nassigned++;
803 		nread += nr;
804 		nconversions++;
805 	}
806 input_failure:
807 	return (nconversions != 0 ? nassigned : EOF);
808 match_failure:
809 	return (nassigned);
810 }
811 
812 /*
813  * Fill in the given table from the scanset at the given format
814  * (just after `[').  Return a pointer to the character past the
815  * closing `]'.  The table has a 1 wherever characters should be
816  * considered part of the scanset.
817  */
818 static const u_char *
819 __sccl(char *tab, const u_char *fmt)
820 {
821 	int c, n, v, i;
822 	struct xlocale_collate *table =
823 		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
824 
825 	/* first `clear' the whole table */
826 	c = *fmt++;		/* first char hat => negated scanset */
827 	if (c == '^') {
828 		v = 1;		/* default => accept */
829 		c = *fmt++;	/* get new first char */
830 	} else
831 		v = 0;		/* default => reject */
832 
833 	/* XXX: Will not work if sizeof(tab*) > sizeof(char) */
834 	(void) memset(tab, v, 256);
835 
836 	if (c == 0)
837 		return (fmt - 1);/* format ended before closing ] */
838 
839 	/*
840 	 * Now set the entries corresponding to the actual scanset
841 	 * to the opposite of the above.
842 	 *
843 	 * The first character may be ']' (or '-') without being special;
844 	 * the last character may be '-'.
845 	 */
846 	v = 1 - v;
847 	for (;;) {
848 		tab[c] = v;		/* take character c */
849 doswitch:
850 		n = *fmt++;		/* and examine the next */
851 		switch (n) {
852 
853 		case 0:			/* format ended too soon */
854 			return (fmt - 1);
855 
856 		case '-':
857 			/*
858 			 * A scanset of the form
859 			 *	[01+-]
860 			 * is defined as `the digit 0, the digit 1,
861 			 * the character +, the character -', but
862 			 * the effect of a scanset such as
863 			 *	[a-zA-Z0-9]
864 			 * is implementation defined.  The V7 Unix
865 			 * scanf treats `a-z' as `the letters a through
866 			 * z', but treats `a-a' as `the letter a, the
867 			 * character -, and the letter a'.
868 			 *
869 			 * For compatibility, the `-' is not considered
870 			 * to define a range if the character following
871 			 * it is either a close bracket (required by ANSI)
872 			 * or is not numerically greater than the character
873 			 * we just stored in the table (c).
874 			 */
875 			n = *fmt;
876 			if (n == ']'
877 			    || (table->__collate_load_error ? n < c :
878 				__collate_range_cmp(n, c) < 0
879 			       )
880 			   ) {
881 				c = '-';
882 				break;	/* resume the for(;;) */
883 			}
884 			fmt++;
885 			/* fill in the range */
886 			if (table->__collate_load_error) {
887 				do {
888 					tab[++c] = v;
889 				} while (c < n);
890 			} else {
891 				for (i = 0; i < 256; i ++)
892 					if (__collate_range_cmp(c, i) <= 0 &&
893 					    __collate_range_cmp(i, n) <= 0
894 					   )
895 						tab[i] = v;
896 			}
897 #if 1	/* XXX another disgusting compatibility hack */
898 			c = n;
899 			/*
900 			 * Alas, the V7 Unix scanf also treats formats
901 			 * such as [a-c-e] as `the letters a through e'.
902 			 * This too is permitted by the standard....
903 			 */
904 			goto doswitch;
905 #else
906 			c = *fmt++;
907 			if (c == 0)
908 				return (fmt - 1);
909 			if (c == ']')
910 				return (fmt);
911 #endif
912 			break;
913 
914 		case ']':		/* end of scanset */
915 			return (fmt);
916 
917 		default:		/* just another character */
918 			c = n;
919 			break;
920 		}
921 	}
922 	/* NOTREACHED */
923 }
924 
925 #ifndef NO_FLOATING_POINT
926 static int
927 parsefloat(FILE *fp, char *buf, char *end, locale_t locale)
928 {
929 	char *commit, *p;
930 	int infnanpos = 0, decptpos = 0;
931 	enum {
932 		S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX,
933 		S_DIGITS, S_DECPT, S_FRAC, S_EXP, S_EXPDIGITS
934 	} state = S_START;
935 	unsigned char c;
936 	const char *decpt = localeconv_l(locale)->decimal_point;
937 	_Bool gotmantdig = 0, ishex = 0;
938 
939 	/*
940 	 * We set commit = p whenever the string we have read so far
941 	 * constitutes a valid representation of a floating point
942 	 * number by itself.  At some point, the parse will complete
943 	 * or fail, and we will ungetc() back to the last commit point.
944 	 * To ensure that the file offset gets updated properly, it is
945 	 * always necessary to read at least one character that doesn't
946 	 * match; thus, we can't short-circuit "infinity" or "nan(...)".
947 	 */
948 	commit = buf - 1;
949 	for (p = buf; p < end; ) {
950 		c = *fp->_p;
951 reswitch:
952 		switch (state) {
953 		case S_START:
954 			state = S_GOTSIGN;
955 			if (c == '-' || c == '+')
956 				break;
957 			else
958 				goto reswitch;
959 		case S_GOTSIGN:
960 			switch (c) {
961 			case '0':
962 				state = S_MAYBEHEX;
963 				commit = p;
964 				break;
965 			case 'I':
966 			case 'i':
967 				state = S_INF;
968 				break;
969 			case 'N':
970 			case 'n':
971 				state = S_NAN;
972 				break;
973 			default:
974 				state = S_DIGITS;
975 				goto reswitch;
976 			}
977 			break;
978 		case S_INF:
979 			if (infnanpos > 6 ||
980 			    (c != "nfinity"[infnanpos] &&
981 			     c != "NFINITY"[infnanpos]))
982 				goto parsedone;
983 			if (infnanpos == 1 || infnanpos == 6)
984 				commit = p;	/* inf or infinity */
985 			infnanpos++;
986 			break;
987 		case S_NAN:
988 			switch (infnanpos) {
989 			case 0:
990 				if (c != 'A' && c != 'a')
991 					goto parsedone;
992 				break;
993 			case 1:
994 				if (c != 'N' && c != 'n')
995 					goto parsedone;
996 				else
997 					commit = p;
998 				break;
999 			case 2:
1000 				if (c != '(')
1001 					goto parsedone;
1002 				break;
1003 			default:
1004 				if (c == ')') {
1005 					commit = p;
1006 					state = S_DONE;
1007 				} else if (!isalnum(c) && c != '_')
1008 					goto parsedone;
1009 				break;
1010 			}
1011 			infnanpos++;
1012 			break;
1013 		case S_DONE:
1014 			goto parsedone;
1015 		case S_MAYBEHEX:
1016 			state = S_DIGITS;
1017 			if (c == 'X' || c == 'x') {
1018 				ishex = 1;
1019 				break;
1020 			} else {	/* we saw a '0', but no 'x' */
1021 				gotmantdig = 1;
1022 				goto reswitch;
1023 			}
1024 		case S_DIGITS:
1025 			if ((ishex && isxdigit(c)) || isdigit(c)) {
1026 				gotmantdig = 1;
1027 				commit = p;
1028 				break;
1029 			} else {
1030 				state = S_DECPT;
1031 				goto reswitch;
1032 			}
1033 		case S_DECPT:
1034 			if (c == decpt[decptpos]) {
1035 				if (decpt[++decptpos] == '\0') {
1036 					/* We read the complete decpt seq. */
1037 					state = S_FRAC;
1038 					if (gotmantdig)
1039 						commit = p;
1040 				}
1041 				break;
1042 			} else if (!decptpos) {
1043 				/* We didn't read any decpt characters. */
1044 				state = S_FRAC;
1045 				goto reswitch;
1046 			} else {
1047 				/*
1048 				 * We read part of a multibyte decimal point,
1049 				 * but the rest is invalid, so bail.
1050 				 */
1051 				goto parsedone;
1052 			}
1053 		case S_FRAC:
1054 			if (((c == 'E' || c == 'e') && !ishex) ||
1055 			    ((c == 'P' || c == 'p') && ishex)) {
1056 				if (!gotmantdig)
1057 					goto parsedone;
1058 				else
1059 					state = S_EXP;
1060 			} else if ((ishex && isxdigit(c)) || isdigit(c)) {
1061 				commit = p;
1062 				gotmantdig = 1;
1063 			} else
1064 				goto parsedone;
1065 			break;
1066 		case S_EXP:
1067 			state = S_EXPDIGITS;
1068 			if (c == '-' || c == '+')
1069 				break;
1070 			else
1071 				goto reswitch;
1072 		case S_EXPDIGITS:
1073 			if (isdigit(c))
1074 				commit = p;
1075 			else
1076 				goto parsedone;
1077 			break;
1078 		default:
1079 			abort();
1080 		}
1081 		*p++ = c;
1082 		if (--fp->_r > 0)
1083 			fp->_p++;
1084 		else if (__srefill(fp))
1085 			break;	/* EOF */
1086 	}
1087 
1088 parsedone:
1089 	while (commit < --p)
1090 		__ungetc(*(u_char *)p, fp);
1091 	*++commit = '\0';
1092 	return (commit - buf);
1093 }
1094 #endif
1095