xref: /freebsd/lib/libc/stdio/vfscanf.c (revision 6486b015fc84e96725fef22b0e3363351399ae83)
1 /*-
2  * Copyright (c) 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Copyright (c) 2011 The FreeBSD Foundation
6  * All rights reserved.
7  * Portions of this software were developed by David Chisnall
8  * under sponsorship from the FreeBSD Foundation.
9  *
10  * This code is derived from software contributed to Berkeley by
11  * Chris Torek.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  */
37 
38 #if defined(LIBC_SCCS) && !defined(lint)
39 static char sccsid[] = "@(#)vfscanf.c	8.1 (Berkeley) 6/4/93";
40 #endif /* LIBC_SCCS and not lint */
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
43 
44 #include "namespace.h"
45 #include <ctype.h>
46 #include <inttypes.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <stddef.h>
50 #include <stdarg.h>
51 #include <string.h>
52 #include <wchar.h>
53 #include <wctype.h>
54 #include "un-namespace.h"
55 
56 #include "collate.h"
57 #include "libc_private.h"
58 #include "local.h"
59 #include "xlocale_private.h"
60 
61 #ifndef NO_FLOATING_POINT
62 #include <locale.h>
63 #endif
64 
65 #define	BUF		513	/* Maximum length of numeric string. */
66 
67 /*
68  * Flags used during conversion.
69  */
70 #define	LONG		0x01	/* l: long or double */
71 #define	LONGDBL		0x02	/* L: long double */
72 #define	SHORT		0x04	/* h: short */
73 #define	SUPPRESS	0x08	/* *: suppress assignment */
74 #define	POINTER		0x10	/* p: void * (as hex) */
75 #define	NOSKIP		0x20	/* [ or c: do not skip blanks */
76 #define	LONGLONG	0x400	/* ll: long long (+ deprecated q: quad) */
77 #define	INTMAXT		0x800	/* j: intmax_t */
78 #define	PTRDIFFT	0x1000	/* t: ptrdiff_t */
79 #define	SIZET		0x2000	/* z: size_t */
80 #define	SHORTSHORT	0x4000	/* hh: char */
81 #define	UNSIGNED	0x8000	/* %[oupxX] conversions */
82 
83 /*
84  * The following are used in integral conversions only:
85  * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
86  */
87 #define	SIGNOK		0x40	/* +/- is (still) legal */
88 #define	NDIGITS		0x80	/* no digits detected */
89 #define	PFXOK		0x100	/* 0x prefix is (still) legal */
90 #define	NZDIGITS	0x200	/* no zero digits detected */
91 #define	HAVESIGN	0x10000	/* sign detected */
92 
93 /*
94  * Conversion types.
95  */
96 #define	CT_CHAR		0	/* %c conversion */
97 #define	CT_CCL		1	/* %[...] conversion */
98 #define	CT_STRING	2	/* %s conversion */
99 #define	CT_INT		3	/* %[dioupxX] conversion */
100 #define	CT_FLOAT	4	/* %[efgEFG] conversion */
101 
102 static const u_char *__sccl(char *, const u_char *);
103 #ifndef NO_FLOATING_POINT
104 static int parsefloat(FILE *, char *, char *, locale_t);
105 #endif
106 
107 __weak_reference(__vfscanf, vfscanf);
108 
109 /*
110  * Conversion functions are passed a pointer to this object instead of
111  * a real parameter to indicate that the assignment-suppression (*)
112  * flag was specified.  We could use a NULL pointer to indicate this,
113  * but that would mask bugs in applications that call scanf() with a
114  * NULL pointer.
115  */
116 static const int suppress;
117 #define	SUPPRESS_PTR	((void *)&suppress)
118 
119 static const mbstate_t initial_mbs;
120 
121 /*
122  * The following conversion functions return the number of characters consumed,
123  * or -1 on input failure.  Character class conversion returns 0 on match
124  * failure.
125  */
126 
127 static __inline int
128 convert_char(FILE *fp, char * __restrict p, int width)
129 {
130 	int n, nread;
131 
132 	nread = 0;
133 	if (p == SUPPRESS_PTR) {
134 		size_t sum = 0;
135 		for (;;) {
136 			if ((n = fp->_r) < width) {
137 				sum += n;
138 				width -= n;
139 				fp->_p += n;
140 				if (__srefill(fp)) {
141 					if (sum == 0)
142 						return (-1);
143 					break;
144 				}
145 			} else {
146 				sum += width;
147 				fp->_r -= width;
148 				fp->_p += width;
149 				break;
150 			}
151 		}
152 		nread += sum;
153 	} else {
154 		size_t r = __fread(p, 1, width, fp);
155 
156 		if (r == 0)
157 			return (-1);
158 		nread += r;
159 	}
160 	return (nread);
161 }
162 
163 static __inline int
164 convert_wchar(FILE *fp, wchar_t *wcp, int width)
165 {
166 	mbstate_t mbs;
167 	size_t nconv;
168 	int n, nread;
169 	char buf[MB_CUR_MAX];
170 
171 	nread = 0;
172 	n = 0;
173 	while (width != 0) {
174 		if (n == MB_CUR_MAX) {
175 			fp->_flags |= __SERR;
176 			return (-1);
177 		}
178 		buf[n++] = *fp->_p;
179 		fp->_p++;
180 		fp->_r--;
181 		mbs = initial_mbs;
182 		nconv = mbrtowc(wcp, buf, n, &mbs);
183 		if (nconv == (size_t)-1) {
184 			fp->_flags |= __SERR;
185 			return (-1);
186 		}
187 		if (nconv == 0 && wcp != SUPPRESS_PTR)
188 			*wcp = L'\0';
189 		if (nconv != (size_t)-2) {
190 			nread += n;
191 			width--;
192 			if (wcp != SUPPRESS_PTR)
193 				wcp++;
194 			n = 0;
195 		}
196 		if (fp->_r <= 0 && __srefill(fp)) {
197 			if (n != 0) {
198 				fp->_flags |= __SERR;
199 				return (-1);
200 			}
201 			break;
202 		}
203 	}
204 	return (nread);
205 }
206 
207 static __inline int
208 convert_ccl(FILE *fp, char * __restrict p, int width, const char *ccltab)
209 {
210 	char *p0;
211 	int n;
212 
213 	if (p == SUPPRESS_PTR) {
214 		n = 0;
215 		while (ccltab[*fp->_p]) {
216 			n++, fp->_r--, fp->_p++;
217 			if (--width == 0)
218 				break;
219 			if (fp->_r <= 0 && __srefill(fp)) {
220 				if (n == 0)
221 					return (-1);
222 				break;
223 			}
224 		}
225 	} else {
226 		p0 = p;
227 		while (ccltab[*fp->_p]) {
228 			fp->_r--;
229 			*p++ = *fp->_p++;
230 			if (--width == 0)
231 				break;
232 			if (fp->_r <= 0 && __srefill(fp)) {
233 				if (p == p0)
234 					return (-1);
235 				break;
236 			}
237 		}
238 		n = p - p0;
239 		if (n == 0)
240 			return (0);
241 		*p = 0;
242 	}
243 	return (n);
244 }
245 
246 static __inline int
247 convert_wccl(FILE *fp, wchar_t *wcp, int width, const char *ccltab)
248 {
249 	mbstate_t mbs;
250 	wchar_t twc;
251 	int n, nchars, nconv;
252 	char buf[MB_CUR_MAX];
253 
254 	if (wcp == SUPPRESS_PTR)
255 		wcp = &twc;
256 	n = 0;
257 	nchars = 0;
258 	while (width != 0) {
259 		if (n == MB_CUR_MAX) {
260 			fp->_flags |= __SERR;
261 			return (-1);
262 		}
263 		buf[n++] = *fp->_p;
264 		fp->_p++;
265 		fp->_r--;
266 		mbs = initial_mbs;
267 		nconv = mbrtowc(wcp, buf, n, &mbs);
268 		if (nconv == (size_t)-1) {
269 			fp->_flags |= __SERR;
270 			return (-1);
271 		}
272 		if (nconv == 0)
273 			*wcp = L'\0';
274 		if (nconv != (size_t)-2) {
275 			if (wctob(*wcp) != EOF && !ccltab[wctob(*wcp)]) {
276 				while (n != 0) {
277 					n--;
278 					__ungetc(buf[n], fp);
279 				}
280 				break;
281 			}
282 			width--;
283 			if (wcp != &twc)
284 				wcp++;
285 			nchars++;
286 			n = 0;
287 		}
288 		if (fp->_r <= 0 && __srefill(fp)) {
289 			if (n != 0) {
290 				fp->_flags |= __SERR;
291 				return (-1);
292 			}
293 			break;
294 		}
295 	}
296 	if (n != 0) {
297 		fp->_flags |= __SERR;
298 		return (-1);
299 	}
300 	if (nchars == 0)
301 		return (0);
302 	*wcp = L'\0';
303 	return (nchars);
304 }
305 
306 static __inline int
307 convert_string(FILE *fp, char * __restrict p, int width)
308 {
309 	char *p0;
310 	int n;
311 
312 	if (p == SUPPRESS_PTR) {
313 		n = 0;
314 		while (!isspace(*fp->_p)) {
315 			n++, fp->_r--, fp->_p++;
316 			if (--width == 0)
317 				break;
318 			if (fp->_r <= 0 && __srefill(fp))
319 				break;
320 		}
321 	} else {
322 		p0 = p;
323 		while (!isspace(*fp->_p)) {
324 			fp->_r--;
325 			*p++ = *fp->_p++;
326 			if (--width == 0)
327 				break;
328 			if (fp->_r <= 0 && __srefill(fp))
329 				break;
330 		}
331 		*p = 0;
332 		n = p - p0;
333 	}
334 	return (n);
335 }
336 
337 static __inline int
338 convert_wstring(FILE *fp, wchar_t *wcp, int width)
339 {
340 	mbstate_t mbs;
341 	wchar_t twc;
342 	int n, nconv, nread;
343 	char buf[MB_CUR_MAX];
344 
345 	if (wcp == SUPPRESS_PTR)
346 		wcp = &twc;
347 	n = nread = 0;
348 	while (!isspace(*fp->_p) && width != 0) {
349 		if (n == MB_CUR_MAX) {
350 			fp->_flags |= __SERR;
351 			return (-1);
352 		}
353 		buf[n++] = *fp->_p;
354 		fp->_p++;
355 		fp->_r--;
356 		mbs = initial_mbs;
357 		nconv = mbrtowc(wcp, buf, n, &mbs);
358 		if (nconv == (size_t)-1) {
359 			fp->_flags |= __SERR;
360 			return (-1);
361 		}
362 		if (nconv == 0)
363 			*wcp = L'\0';
364 		if (nconv != (size_t)-2) {
365 			if (iswspace(*wcp)) {
366 				while (n != 0) {
367 					n--;
368 					__ungetc(buf[n], fp);
369 				}
370 				break;
371 			}
372 			nread += n;
373 			width--;
374 			if (wcp != &twc)
375 				wcp++;
376 			n = 0;
377 		}
378 		if (fp->_r <= 0 && __srefill(fp)) {
379 			if (n != 0) {
380 				fp->_flags |= __SERR;
381 				return (-1);
382 			}
383 			break;
384 		}
385 	}
386 	*wcp = L'\0';
387 	return (nread);
388 }
389 
390 /*
391  * Read an integer, storing it in buf.  The only relevant bit in the
392  * flags argument is PFXOK.
393  *
394  * Return 0 on a match failure, and the number of characters read
395  * otherwise.
396  */
397 static __inline int
398 parseint(FILE *fp, char * __restrict buf, int width, int base, int flags)
399 {
400 	/* `basefix' is used to avoid `if' tests */
401 	static const short basefix[17] =
402 		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
403 	char *p;
404 	int c;
405 
406 	flags |= SIGNOK | NDIGITS | NZDIGITS;
407 	for (p = buf; width; width--) {
408 		c = *fp->_p;
409 		/*
410 		 * Switch on the character; `goto ok' if we accept it
411 		 * as a part of number.
412 		 */
413 		switch (c) {
414 
415 		/*
416 		 * The digit 0 is always legal, but is special.  For
417 		 * %i conversions, if no digits (zero or nonzero) have
418 		 * been scanned (only signs), we will have base==0.
419 		 * In that case, we should set it to 8 and enable 0x
420 		 * prefixing.  Also, if we have not scanned zero
421 		 * digits before this, do not turn off prefixing
422 		 * (someone else will turn it off if we have scanned
423 		 * any nonzero digits).
424 		 */
425 		case '0':
426 			if (base == 0) {
427 				base = 8;
428 				flags |= PFXOK;
429 			}
430 			if (flags & NZDIGITS)
431 				flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
432 			else
433 				flags &= ~(SIGNOK|PFXOK|NDIGITS);
434 			goto ok;
435 
436 		/* 1 through 7 always legal */
437 		case '1': case '2': case '3':
438 		case '4': case '5': case '6': case '7':
439 			base = basefix[base];
440 			flags &= ~(SIGNOK | PFXOK | NDIGITS);
441 			goto ok;
442 
443 		/* digits 8 and 9 ok iff decimal or hex */
444 		case '8': case '9':
445 			base = basefix[base];
446 			if (base <= 8)
447 				break;	/* not legal here */
448 			flags &= ~(SIGNOK | PFXOK | NDIGITS);
449 			goto ok;
450 
451 		/* letters ok iff hex */
452 		case 'A': case 'B': case 'C':
453 		case 'D': case 'E': case 'F':
454 		case 'a': case 'b': case 'c':
455 		case 'd': case 'e': case 'f':
456 			/* no need to fix base here */
457 			if (base <= 10)
458 				break;	/* not legal here */
459 			flags &= ~(SIGNOK | PFXOK | NDIGITS);
460 			goto ok;
461 
462 		/* sign ok only as first character */
463 		case '+': case '-':
464 			if (flags & SIGNOK) {
465 				flags &= ~SIGNOK;
466 				flags |= HAVESIGN;
467 				goto ok;
468 			}
469 			break;
470 
471 		/*
472 		 * x ok iff flag still set & 2nd char (or 3rd char if
473 		 * we have a sign).
474 		 */
475 		case 'x': case 'X':
476 			if (flags & PFXOK && p ==
477 			    buf + 1 + !!(flags & HAVESIGN)) {
478 				base = 16;	/* if %i */
479 				flags &= ~PFXOK;
480 				goto ok;
481 			}
482 			break;
483 		}
484 
485 		/*
486 		 * If we got here, c is not a legal character for a
487 		 * number.  Stop accumulating digits.
488 		 */
489 		break;
490 	ok:
491 		/*
492 		 * c is legal: store it and look at the next.
493 		 */
494 		*p++ = c;
495 		if (--fp->_r > 0)
496 			fp->_p++;
497 		else if (__srefill(fp))
498 			break;		/* EOF */
499 	}
500 	/*
501 	 * If we had only a sign, it is no good; push back the sign.
502 	 * If the number ends in `x', it was [sign] '0' 'x', so push
503 	 * back the x and treat it as [sign] '0'.
504 	 */
505 	if (flags & NDIGITS) {
506 		if (p > buf)
507 			(void) __ungetc(*(u_char *)--p, fp);
508 		return (0);
509 	}
510 	c = ((u_char *)p)[-1];
511 	if (c == 'x' || c == 'X') {
512 		--p;
513 		(void) __ungetc(c, fp);
514 	}
515 	return (p - buf);
516 }
517 
518 /*
519  * __vfscanf - MT-safe version
520  */
521 int
522 __vfscanf(FILE *fp, char const *fmt0, va_list ap)
523 {
524 	int ret;
525 
526 	FLOCKFILE(fp);
527 	ret = __svfscanf(fp, __get_locale(), fmt0, ap);
528 	FUNLOCKFILE(fp);
529 	return (ret);
530 }
531 int
532 vfscanf_l(FILE *fp, locale_t locale, char const *fmt0, va_list ap)
533 {
534 	int ret;
535 	FIX_LOCALE(locale);
536 
537 	FLOCKFILE(fp);
538 	ret = __svfscanf(fp, locale, fmt0, ap);
539 	FUNLOCKFILE(fp);
540 	return (ret);
541 }
542 
543 /*
544  * __svfscanf - non-MT-safe version of __vfscanf
545  */
546 int
547 __svfscanf(FILE *fp, locale_t locale, const char *fmt0, va_list ap)
548 {
549 #define	GETARG(type)	((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type))
550 	const u_char *fmt = (const u_char *)fmt0;
551 	int c;			/* character from format, or conversion */
552 	size_t width;		/* field width, or 0 */
553 	int flags;		/* flags as defined above */
554 	int nassigned;		/* number of fields assigned */
555 	int nconversions;	/* number of conversions */
556 	int nr;			/* characters read by the current conversion */
557 	int nread;		/* number of characters consumed from fp */
558 	int base;		/* base argument to conversion function */
559 	char ccltab[256];	/* character class table for %[...] */
560 	char buf[BUF];		/* buffer for numeric conversions */
561 
562 	ORIENT(fp, -1);
563 
564 	nassigned = 0;
565 	nconversions = 0;
566 	nread = 0;
567 	for (;;) {
568 		c = *fmt++;
569 		if (c == 0)
570 			return (nassigned);
571 		if (isspace(c)) {
572 			while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p))
573 				nread++, fp->_r--, fp->_p++;
574 			continue;
575 		}
576 		if (c != '%')
577 			goto literal;
578 		width = 0;
579 		flags = 0;
580 		/*
581 		 * switch on the format.  continue if done;
582 		 * break once format type is derived.
583 		 */
584 again:		c = *fmt++;
585 		switch (c) {
586 		case '%':
587 literal:
588 			if (fp->_r <= 0 && __srefill(fp))
589 				goto input_failure;
590 			if (*fp->_p != c)
591 				goto match_failure;
592 			fp->_r--, fp->_p++;
593 			nread++;
594 			continue;
595 
596 		case '*':
597 			flags |= SUPPRESS;
598 			goto again;
599 		case 'j':
600 			flags |= INTMAXT;
601 			goto again;
602 		case 'l':
603 			if (flags & LONG) {
604 				flags &= ~LONG;
605 				flags |= LONGLONG;
606 			} else
607 				flags |= LONG;
608 			goto again;
609 		case 'q':
610 			flags |= LONGLONG;	/* not quite */
611 			goto again;
612 		case 't':
613 			flags |= PTRDIFFT;
614 			goto again;
615 		case 'z':
616 			flags |= SIZET;
617 			goto again;
618 		case 'L':
619 			flags |= LONGDBL;
620 			goto again;
621 		case 'h':
622 			if (flags & SHORT) {
623 				flags &= ~SHORT;
624 				flags |= SHORTSHORT;
625 			} else
626 				flags |= SHORT;
627 			goto again;
628 
629 		case '0': case '1': case '2': case '3': case '4':
630 		case '5': case '6': case '7': case '8': case '9':
631 			width = width * 10 + c - '0';
632 			goto again;
633 
634 		/*
635 		 * Conversions.
636 		 */
637 		case 'd':
638 			c = CT_INT;
639 			base = 10;
640 			break;
641 
642 		case 'i':
643 			c = CT_INT;
644 			base = 0;
645 			break;
646 
647 		case 'o':
648 			c = CT_INT;
649 			flags |= UNSIGNED;
650 			base = 8;
651 			break;
652 
653 		case 'u':
654 			c = CT_INT;
655 			flags |= UNSIGNED;
656 			base = 10;
657 			break;
658 
659 		case 'X':
660 		case 'x':
661 			flags |= PFXOK;	/* enable 0x prefixing */
662 			c = CT_INT;
663 			flags |= UNSIGNED;
664 			base = 16;
665 			break;
666 
667 #ifndef NO_FLOATING_POINT
668 		case 'A': case 'E': case 'F': case 'G':
669 		case 'a': case 'e': case 'f': case 'g':
670 			c = CT_FLOAT;
671 			break;
672 #endif
673 
674 		case 'S':
675 			flags |= LONG;
676 			/* FALLTHROUGH */
677 		case 's':
678 			c = CT_STRING;
679 			break;
680 
681 		case '[':
682 			fmt = __sccl(ccltab, fmt);
683 			flags |= NOSKIP;
684 			c = CT_CCL;
685 			break;
686 
687 		case 'C':
688 			flags |= LONG;
689 			/* FALLTHROUGH */
690 		case 'c':
691 			flags |= NOSKIP;
692 			c = CT_CHAR;
693 			break;
694 
695 		case 'p':	/* pointer format is like hex */
696 			flags |= POINTER | PFXOK;
697 			c = CT_INT;		/* assumes sizeof(uintmax_t) */
698 			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
699 			base = 16;
700 			break;
701 
702 		case 'n':
703 			if (flags & SUPPRESS)	/* ??? */
704 				continue;
705 			if (flags & SHORTSHORT)
706 				*va_arg(ap, char *) = nread;
707 			else if (flags & SHORT)
708 				*va_arg(ap, short *) = nread;
709 			else if (flags & LONG)
710 				*va_arg(ap, long *) = nread;
711 			else if (flags & LONGLONG)
712 				*va_arg(ap, long long *) = nread;
713 			else if (flags & INTMAXT)
714 				*va_arg(ap, intmax_t *) = nread;
715 			else if (flags & SIZET)
716 				*va_arg(ap, size_t *) = nread;
717 			else if (flags & PTRDIFFT)
718 				*va_arg(ap, ptrdiff_t *) = nread;
719 			else
720 				*va_arg(ap, int *) = nread;
721 			continue;
722 
723 		default:
724 			goto match_failure;
725 
726 		/*
727 		 * Disgusting backwards compatibility hack.	XXX
728 		 */
729 		case '\0':	/* compat */
730 			return (EOF);
731 		}
732 
733 		/*
734 		 * We have a conversion that requires input.
735 		 */
736 		if (fp->_r <= 0 && __srefill(fp))
737 			goto input_failure;
738 
739 		/*
740 		 * Consume leading white space, except for formats
741 		 * that suppress this.
742 		 */
743 		if ((flags & NOSKIP) == 0) {
744 			while (isspace(*fp->_p)) {
745 				nread++;
746 				if (--fp->_r > 0)
747 					fp->_p++;
748 				else if (__srefill(fp))
749 					goto input_failure;
750 			}
751 			/*
752 			 * Note that there is at least one character in
753 			 * the buffer, so conversions that do not set NOSKIP
754 			 * ca no longer result in an input failure.
755 			 */
756 		}
757 
758 		/*
759 		 * Do the conversion.
760 		 */
761 		switch (c) {
762 
763 		case CT_CHAR:
764 			/* scan arbitrary characters (sets NOSKIP) */
765 			if (width == 0)
766 				width = 1;
767 			if (flags & LONG) {
768 				nr = convert_wchar(fp, GETARG(wchar_t *),
769 				    width);
770 			} else {
771 				nr = convert_char(fp, GETARG(char *), width);
772 			}
773 			if (nr < 0)
774 				goto input_failure;
775 			break;
776 
777 		case CT_CCL:
778 			/* scan a (nonempty) character class (sets NOSKIP) */
779 			if (width == 0)
780 				width = (size_t)~0;	/* `infinity' */
781 			if (flags & LONG) {
782 				nr = convert_wccl(fp, GETARG(wchar_t *), width,
783 				    ccltab);
784 			} else {
785 				nr = convert_ccl(fp, GETARG(char *), width,
786 				    ccltab);
787 			}
788 			if (nr <= 0) {
789 				if (nr < 0)
790 					goto input_failure;
791 				else /* nr == 0 */
792 					goto match_failure;
793 			}
794 			break;
795 
796 		case CT_STRING:
797 			/* like CCL, but zero-length string OK, & no NOSKIP */
798 			if (width == 0)
799 				width = (size_t)~0;
800 			if (flags & LONG) {
801 				nr = convert_wstring(fp, GETARG(wchar_t *),
802 				    width);
803 			} else {
804 				nr = convert_string(fp, GETARG(char *), width);
805 			}
806 			if (nr < 0)
807 				goto input_failure;
808 			break;
809 
810 		case CT_INT:
811 			/* scan an integer as if by the conversion function */
812 #ifdef hardway
813 			if (width == 0 || width > sizeof(buf) - 1)
814 				width = sizeof(buf) - 1;
815 #else
816 			/* size_t is unsigned, hence this optimisation */
817 			if (--width > sizeof(buf) - 2)
818 				width = sizeof(buf) - 2;
819 			width++;
820 #endif
821 			nr = parseint(fp, buf, width, base, flags);
822 			if (nr == 0)
823 				goto match_failure;
824 			if ((flags & SUPPRESS) == 0) {
825 				uintmax_t res;
826 
827 				buf[nr] = '\0';
828 				if ((flags & UNSIGNED) == 0)
829 				    res = strtoimax_l(buf, (char **)NULL, base, locale);
830 				else
831 				    res = strtoumax_l(buf, (char **)NULL, base, locale);
832 				if (flags & POINTER)
833 					*va_arg(ap, void **) =
834 							(void *)(uintptr_t)res;
835 				else if (flags & SHORTSHORT)
836 					*va_arg(ap, char *) = res;
837 				else if (flags & SHORT)
838 					*va_arg(ap, short *) = res;
839 				else if (flags & LONG)
840 					*va_arg(ap, long *) = res;
841 				else if (flags & LONGLONG)
842 					*va_arg(ap, long long *) = res;
843 				else if (flags & INTMAXT)
844 					*va_arg(ap, intmax_t *) = res;
845 				else if (flags & PTRDIFFT)
846 					*va_arg(ap, ptrdiff_t *) = res;
847 				else if (flags & SIZET)
848 					*va_arg(ap, size_t *) = res;
849 				else
850 					*va_arg(ap, int *) = res;
851 			}
852 			break;
853 
854 #ifndef NO_FLOATING_POINT
855 		case CT_FLOAT:
856 			/* scan a floating point number as if by strtod */
857 			if (width == 0 || width > sizeof(buf) - 1)
858 				width = sizeof(buf) - 1;
859 			nr = parsefloat(fp, buf, buf + width, locale);
860 			if (nr == 0)
861 				goto match_failure;
862 			if ((flags & SUPPRESS) == 0) {
863 				if (flags & LONGDBL) {
864 					long double res = strtold_l(buf, NULL,
865 					    locale);
866 					*va_arg(ap, long double *) = res;
867 				} else if (flags & LONG) {
868 					double res = strtod_l(buf, NULL,
869 					    locale);
870 					*va_arg(ap, double *) = res;
871 				} else {
872 					float res = strtof_l(buf, NULL, locale);
873 					*va_arg(ap, float *) = res;
874 				}
875 			}
876 			break;
877 #endif /* !NO_FLOATING_POINT */
878 		}
879 		if (!(flags & SUPPRESS))
880 			nassigned++;
881 		nread += nr;
882 		nconversions++;
883 	}
884 input_failure:
885 	return (nconversions != 0 ? nassigned : EOF);
886 match_failure:
887 	return (nassigned);
888 }
889 
890 /*
891  * Fill in the given table from the scanset at the given format
892  * (just after `[').  Return a pointer to the character past the
893  * closing `]'.  The table has a 1 wherever characters should be
894  * considered part of the scanset.
895  */
896 static const u_char *
897 __sccl(tab, fmt)
898 	char *tab;
899 	const u_char *fmt;
900 {
901 	int c, n, v, i;
902 	struct xlocale_collate *table =
903 		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
904 
905 	/* first `clear' the whole table */
906 	c = *fmt++;		/* first char hat => negated scanset */
907 	if (c == '^') {
908 		v = 1;		/* default => accept */
909 		c = *fmt++;	/* get new first char */
910 	} else
911 		v = 0;		/* default => reject */
912 
913 	/* XXX: Will not work if sizeof(tab*) > sizeof(char) */
914 	(void) memset(tab, v, 256);
915 
916 	if (c == 0)
917 		return (fmt - 1);/* format ended before closing ] */
918 
919 	/*
920 	 * Now set the entries corresponding to the actual scanset
921 	 * to the opposite of the above.
922 	 *
923 	 * The first character may be ']' (or '-') without being special;
924 	 * the last character may be '-'.
925 	 */
926 	v = 1 - v;
927 	for (;;) {
928 		tab[c] = v;		/* take character c */
929 doswitch:
930 		n = *fmt++;		/* and examine the next */
931 		switch (n) {
932 
933 		case 0:			/* format ended too soon */
934 			return (fmt - 1);
935 
936 		case '-':
937 			/*
938 			 * A scanset of the form
939 			 *	[01+-]
940 			 * is defined as `the digit 0, the digit 1,
941 			 * the character +, the character -', but
942 			 * the effect of a scanset such as
943 			 *	[a-zA-Z0-9]
944 			 * is implementation defined.  The V7 Unix
945 			 * scanf treats `a-z' as `the letters a through
946 			 * z', but treats `a-a' as `the letter a, the
947 			 * character -, and the letter a'.
948 			 *
949 			 * For compatibility, the `-' is not considerd
950 			 * to define a range if the character following
951 			 * it is either a close bracket (required by ANSI)
952 			 * or is not numerically greater than the character
953 			 * we just stored in the table (c).
954 			 */
955 			n = *fmt;
956 			if (n == ']'
957 			    || (table->__collate_load_error ? n < c :
958 				__collate_range_cmp (table, n, c) < 0
959 			       )
960 			   ) {
961 				c = '-';
962 				break;	/* resume the for(;;) */
963 			}
964 			fmt++;
965 			/* fill in the range */
966 			if (table->__collate_load_error) {
967 				do {
968 					tab[++c] = v;
969 				} while (c < n);
970 			} else {
971 				for (i = 0; i < 256; i ++)
972 					if (   __collate_range_cmp (table, c, i) < 0
973 					    && __collate_range_cmp (table, i, n) <= 0
974 					   )
975 						tab[i] = v;
976 			}
977 #if 1	/* XXX another disgusting compatibility hack */
978 			c = n;
979 			/*
980 			 * Alas, the V7 Unix scanf also treats formats
981 			 * such as [a-c-e] as `the letters a through e'.
982 			 * This too is permitted by the standard....
983 			 */
984 			goto doswitch;
985 #else
986 			c = *fmt++;
987 			if (c == 0)
988 				return (fmt - 1);
989 			if (c == ']')
990 				return (fmt);
991 #endif
992 			break;
993 
994 		case ']':		/* end of scanset */
995 			return (fmt);
996 
997 		default:		/* just another character */
998 			c = n;
999 			break;
1000 		}
1001 	}
1002 	/* NOTREACHED */
1003 }
1004 
1005 #ifndef NO_FLOATING_POINT
1006 static int
1007 parsefloat(FILE *fp, char *buf, char *end, locale_t locale)
1008 {
1009 	char *commit, *p;
1010 	int infnanpos = 0, decptpos = 0;
1011 	enum {
1012 		S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX,
1013 		S_DIGITS, S_DECPT, S_FRAC, S_EXP, S_EXPDIGITS
1014 	} state = S_START;
1015 	unsigned char c;
1016 	const char *decpt = localeconv_l(locale)->decimal_point;
1017 	_Bool gotmantdig = 0, ishex = 0;
1018 
1019 	/*
1020 	 * We set commit = p whenever the string we have read so far
1021 	 * constitutes a valid representation of a floating point
1022 	 * number by itself.  At some point, the parse will complete
1023 	 * or fail, and we will ungetc() back to the last commit point.
1024 	 * To ensure that the file offset gets updated properly, it is
1025 	 * always necessary to read at least one character that doesn't
1026 	 * match; thus, we can't short-circuit "infinity" or "nan(...)".
1027 	 */
1028 	commit = buf - 1;
1029 	for (p = buf; p < end; ) {
1030 		c = *fp->_p;
1031 reswitch:
1032 		switch (state) {
1033 		case S_START:
1034 			state = S_GOTSIGN;
1035 			if (c == '-' || c == '+')
1036 				break;
1037 			else
1038 				goto reswitch;
1039 		case S_GOTSIGN:
1040 			switch (c) {
1041 			case '0':
1042 				state = S_MAYBEHEX;
1043 				commit = p;
1044 				break;
1045 			case 'I':
1046 			case 'i':
1047 				state = S_INF;
1048 				break;
1049 			case 'N':
1050 			case 'n':
1051 				state = S_NAN;
1052 				break;
1053 			default:
1054 				state = S_DIGITS;
1055 				goto reswitch;
1056 			}
1057 			break;
1058 		case S_INF:
1059 			if (infnanpos > 6 ||
1060 			    (c != "nfinity"[infnanpos] &&
1061 			     c != "NFINITY"[infnanpos]))
1062 				goto parsedone;
1063 			if (infnanpos == 1 || infnanpos == 6)
1064 				commit = p;	/* inf or infinity */
1065 			infnanpos++;
1066 			break;
1067 		case S_NAN:
1068 			switch (infnanpos) {
1069 			case 0:
1070 				if (c != 'A' && c != 'a')
1071 					goto parsedone;
1072 				break;
1073 			case 1:
1074 				if (c != 'N' && c != 'n')
1075 					goto parsedone;
1076 				else
1077 					commit = p;
1078 				break;
1079 			case 2:
1080 				if (c != '(')
1081 					goto parsedone;
1082 				break;
1083 			default:
1084 				if (c == ')') {
1085 					commit = p;
1086 					state = S_DONE;
1087 				} else if (!isalnum(c) && c != '_')
1088 					goto parsedone;
1089 				break;
1090 			}
1091 			infnanpos++;
1092 			break;
1093 		case S_DONE:
1094 			goto parsedone;
1095 		case S_MAYBEHEX:
1096 			state = S_DIGITS;
1097 			if (c == 'X' || c == 'x') {
1098 				ishex = 1;
1099 				break;
1100 			} else {	/* we saw a '0', but no 'x' */
1101 				gotmantdig = 1;
1102 				goto reswitch;
1103 			}
1104 		case S_DIGITS:
1105 			if ((ishex && isxdigit(c)) || isdigit(c)) {
1106 				gotmantdig = 1;
1107 				commit = p;
1108 				break;
1109 			} else {
1110 				state = S_DECPT;
1111 				goto reswitch;
1112 			}
1113 		case S_DECPT:
1114 			if (c == decpt[decptpos]) {
1115 				if (decpt[++decptpos] == '\0') {
1116 					/* We read the complete decpt seq. */
1117 					state = S_FRAC;
1118 					if (gotmantdig)
1119 						commit = p;
1120 				}
1121 				break;
1122 			} else if (!decptpos) {
1123 				/* We didn't read any decpt characters. */
1124 				state = S_FRAC;
1125 				goto reswitch;
1126 			} else {
1127 				/*
1128 				 * We read part of a multibyte decimal point,
1129 				 * but the rest is invalid, so bail.
1130 				 */
1131 				goto parsedone;
1132 			}
1133 		case S_FRAC:
1134 			if (((c == 'E' || c == 'e') && !ishex) ||
1135 			    ((c == 'P' || c == 'p') && ishex)) {
1136 				if (!gotmantdig)
1137 					goto parsedone;
1138 				else
1139 					state = S_EXP;
1140 			} else if ((ishex && isxdigit(c)) || isdigit(c)) {
1141 				commit = p;
1142 				gotmantdig = 1;
1143 			} else
1144 				goto parsedone;
1145 			break;
1146 		case S_EXP:
1147 			state = S_EXPDIGITS;
1148 			if (c == '-' || c == '+')
1149 				break;
1150 			else
1151 				goto reswitch;
1152 		case S_EXPDIGITS:
1153 			if (isdigit(c))
1154 				commit = p;
1155 			else
1156 				goto parsedone;
1157 			break;
1158 		default:
1159 			abort();
1160 		}
1161 		*p++ = c;
1162 		if (--fp->_r > 0)
1163 			fp->_p++;
1164 		else if (__srefill(fp))
1165 			break;	/* EOF */
1166 	}
1167 
1168 parsedone:
1169 	while (commit < --p)
1170 		__ungetc(*(u_char *)p, fp);
1171 	*++commit = '\0';
1172 	return (commit - buf);
1173 }
1174 #endif
1175