xref: /freebsd/lib/libc/stdio/vfscanf.c (revision d056fa046c6a91b90cd98165face0e42a33a5173)
1 /*-
2  * Copyright (c) 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Chris Torek.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #if defined(LIBC_SCCS) && !defined(lint)
38 static char sccsid[] = "@(#)vfscanf.c	8.1 (Berkeley) 6/4/93";
39 #endif /* LIBC_SCCS and not lint */
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42 
43 #include "namespace.h"
44 #include <ctype.h>
45 #include <inttypes.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <stddef.h>
49 #include <stdarg.h>
50 #include <string.h>
51 #include <wchar.h>
52 #include <wctype.h>
53 #include "un-namespace.h"
54 
55 #include "collate.h"
56 #include "libc_private.h"
57 #include "local.h"
58 
59 #ifndef NO_FLOATING_POINT
60 #include <locale.h>
61 #endif
62 
63 #define	BUF		513	/* Maximum length of numeric string. */
64 
65 /*
66  * Flags used during conversion.
67  */
68 #define	LONG		0x01	/* l: long or double */
69 #define	LONGDBL		0x02	/* L: long double */
70 #define	SHORT		0x04	/* h: short */
71 #define	SUPPRESS	0x08	/* *: suppress assignment */
72 #define	POINTER		0x10	/* p: void * (as hex) */
73 #define	NOSKIP		0x20	/* [ or c: do not skip blanks */
74 #define	LONGLONG	0x400	/* ll: long long (+ deprecated q: quad) */
75 #define	INTMAXT		0x800	/* j: intmax_t */
76 #define	PTRDIFFT	0x1000	/* t: ptrdiff_t */
77 #define	SIZET		0x2000	/* z: size_t */
78 #define	SHORTSHORT	0x4000	/* hh: char */
79 #define	UNSIGNED	0x8000	/* %[oupxX] conversions */
80 
81 /*
82  * The following are used in integral conversions only:
83  * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
84  */
85 #define	SIGNOK		0x40	/* +/- is (still) legal */
86 #define	NDIGITS		0x80	/* no digits detected */
87 #define	PFXOK		0x100	/* 0x prefix is (still) legal */
88 #define	NZDIGITS	0x200	/* no zero digits detected */
89 #define	HAVESIGN	0x10000	/* sign detected */
90 
91 /*
92  * Conversion types.
93  */
94 #define	CT_CHAR		0	/* %c conversion */
95 #define	CT_CCL		1	/* %[...] conversion */
96 #define	CT_STRING	2	/* %s conversion */
97 #define	CT_INT		3	/* %[dioupxX] conversion */
98 #define	CT_FLOAT	4	/* %[efgEFG] conversion */
99 
100 static const u_char *__sccl(char *, const u_char *);
101 #ifndef NO_FLOATING_POINT
102 static int parsefloat(FILE *, char *, char *);
103 #endif
104 
105 int __scanfdebug = 0;
106 
107 __weak_reference(__vfscanf, vfscanf);
108 
109 /*
110  * __vfscanf - MT-safe version
111  */
112 int
113 __vfscanf(FILE *fp, char const *fmt0, va_list ap)
114 {
115 	int ret;
116 
117 	FLOCKFILE(fp);
118 	ret = __svfscanf(fp, fmt0, ap);
119 	FUNLOCKFILE(fp);
120 	return (ret);
121 }
122 
123 /*
124  * __svfscanf - non-MT-safe version of __vfscanf
125  */
126 int
127 __svfscanf(FILE *fp, const char *fmt0, va_list ap)
128 {
129 	const u_char *fmt = (const u_char *)fmt0;
130 	int c;			/* character from format, or conversion */
131 	size_t width;		/* field width, or 0 */
132 	char *p;		/* points into all kinds of strings */
133 	int n;			/* handy integer */
134 	int flags;		/* flags as defined above */
135 	char *p0;		/* saves original value of p when necessary */
136 	int nassigned;		/* number of fields assigned */
137 	int nconversions;	/* number of conversions */
138 	int nread;		/* number of characters consumed from fp */
139 	int base;		/* base argument to conversion function */
140 	char ccltab[256];	/* character class table for %[...] */
141 	char buf[BUF];		/* buffer for numeric and mb conversions */
142 	wchar_t *wcp;		/* handy wide character pointer */
143 	size_t nconv;		/* length of multibyte sequence converted */
144 	static const mbstate_t initial;
145 	mbstate_t mbs;
146 
147 	/* `basefix' is used to avoid `if' tests in the integer scanner */
148 	static short basefix[17] =
149 		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
150 
151 	ORIENT(fp, -1);
152 
153 	nassigned = 0;
154 	nconversions = 0;
155 	nread = 0;
156 	for (;;) {
157 		c = *fmt++;
158 		if (c == 0)
159 			return (nassigned);
160 		if (isspace(c)) {
161 			while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p))
162 				nread++, fp->_r--, fp->_p++;
163 			continue;
164 		}
165 		if (c != '%')
166 			goto literal;
167 		width = 0;
168 		flags = 0;
169 		/*
170 		 * switch on the format.  continue if done;
171 		 * break once format type is derived.
172 		 */
173 again:		c = *fmt++;
174 		switch (c) {
175 		case '%':
176 literal:
177 			if (fp->_r <= 0 && __srefill(fp))
178 				goto input_failure;
179 			if (*fp->_p != c)
180 				goto match_failure;
181 			fp->_r--, fp->_p++;
182 			nread++;
183 			continue;
184 
185 		case '*':
186 			flags |= SUPPRESS;
187 			goto again;
188 		case 'j':
189 			flags |= INTMAXT;
190 			goto again;
191 		case 'l':
192 			if (flags & LONG) {
193 				flags &= ~LONG;
194 				flags |= LONGLONG;
195 			} else
196 				flags |= LONG;
197 			goto again;
198 		case 'q':
199 			flags |= LONGLONG;	/* not quite */
200 			goto again;
201 		case 't':
202 			flags |= PTRDIFFT;
203 			goto again;
204 		case 'z':
205 			flags |= SIZET;
206 			goto again;
207 		case 'L':
208 			flags |= LONGDBL;
209 			goto again;
210 		case 'h':
211 			if (flags & SHORT) {
212 				flags &= ~SHORT;
213 				flags |= SHORTSHORT;
214 			} else
215 				flags |= SHORT;
216 			goto again;
217 
218 		case '0': case '1': case '2': case '3': case '4':
219 		case '5': case '6': case '7': case '8': case '9':
220 			width = width * 10 + c - '0';
221 			goto again;
222 
223 		/*
224 		 * Conversions.
225 		 */
226 		case 'd':
227 			c = CT_INT;
228 			base = 10;
229 			break;
230 
231 		case 'i':
232 			c = CT_INT;
233 			base = 0;
234 			break;
235 
236 		case 'o':
237 			c = CT_INT;
238 			flags |= UNSIGNED;
239 			base = 8;
240 			break;
241 
242 		case 'u':
243 			c = CT_INT;
244 			flags |= UNSIGNED;
245 			base = 10;
246 			break;
247 
248 		case 'X':
249 		case 'x':
250 			flags |= PFXOK;	/* enable 0x prefixing */
251 			c = CT_INT;
252 			flags |= UNSIGNED;
253 			base = 16;
254 			break;
255 
256 #ifndef NO_FLOATING_POINT
257 		case 'A': case 'E': case 'F': case 'G':
258 		case 'a': case 'e': case 'f': case 'g':
259 			c = CT_FLOAT;
260 			break;
261 #endif
262 
263 		case 'S':
264 			flags |= LONG;
265 			/* FALLTHROUGH */
266 		case 's':
267 			c = CT_STRING;
268 			break;
269 
270 		case '[':
271 			fmt = __sccl(ccltab, fmt);
272 			flags |= NOSKIP;
273 			c = CT_CCL;
274 			break;
275 
276 		case 'C':
277 			flags |= LONG;
278 			/* FALLTHROUGH */
279 		case 'c':
280 			flags |= NOSKIP;
281 			c = CT_CHAR;
282 			break;
283 
284 		case 'p':	/* pointer format is like hex */
285 			flags |= POINTER | PFXOK;
286 			c = CT_INT;		/* assumes sizeof(uintmax_t) */
287 			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
288 			base = 16;
289 			break;
290 
291 		case 'n':
292 			nconversions++;
293 			if (flags & SUPPRESS)	/* ??? */
294 				continue;
295 			if (flags & SHORTSHORT)
296 				*va_arg(ap, char *) = nread;
297 			else if (flags & SHORT)
298 				*va_arg(ap, short *) = nread;
299 			else if (flags & LONG)
300 				*va_arg(ap, long *) = nread;
301 			else if (flags & LONGLONG)
302 				*va_arg(ap, long long *) = nread;
303 			else if (flags & INTMAXT)
304 				*va_arg(ap, intmax_t *) = nread;
305 			else if (flags & SIZET)
306 				*va_arg(ap, size_t *) = nread;
307 			else if (flags & PTRDIFFT)
308 				*va_arg(ap, ptrdiff_t *) = nread;
309 			else
310 				*va_arg(ap, int *) = nread;
311 			continue;
312 
313 		default:
314 			goto match_failure;
315 
316 		/*
317 		 * Disgusting backwards compatibility hack.	XXX
318 		 */
319 		case '\0':	/* compat */
320 			return (EOF);
321 		}
322 
323 		/*
324 		 * We have a conversion that requires input.
325 		 */
326 		if (fp->_r <= 0 && __srefill(fp))
327 			goto input_failure;
328 
329 		/*
330 		 * Consume leading white space, except for formats
331 		 * that suppress this.
332 		 */
333 		if ((flags & NOSKIP) == 0) {
334 			while (isspace(*fp->_p)) {
335 				nread++;
336 				if (--fp->_r > 0)
337 					fp->_p++;
338 				else if (__srefill(fp))
339 					goto input_failure;
340 			}
341 			/*
342 			 * Note that there is at least one character in
343 			 * the buffer, so conversions that do not set NOSKIP
344 			 * ca no longer result in an input failure.
345 			 */
346 		}
347 
348 		/*
349 		 * Do the conversion.
350 		 */
351 		switch (c) {
352 
353 		case CT_CHAR:
354 			/* scan arbitrary characters (sets NOSKIP) */
355 			if (width == 0)
356 				width = 1;
357 			if (flags & LONG) {
358 				if ((flags & SUPPRESS) == 0)
359 					wcp = va_arg(ap, wchar_t *);
360 				else
361 					wcp = NULL;
362 				n = 0;
363 				while (width != 0) {
364 					if (n == MB_CUR_MAX) {
365 						fp->_flags |= __SERR;
366 						goto input_failure;
367 					}
368 					buf[n++] = *fp->_p;
369 					fp->_p++;
370 					fp->_r--;
371 					mbs = initial;
372 					nconv = mbrtowc(wcp, buf, n, &mbs);
373 					if (nconv == (size_t)-1) {
374 						fp->_flags |= __SERR;
375 						goto input_failure;
376 					}
377 					if (nconv == 0 && !(flags & SUPPRESS))
378 						*wcp = L'\0';
379 					if (nconv != (size_t)-2) {
380 						nread += n;
381 						width--;
382 						if (!(flags & SUPPRESS))
383 							wcp++;
384 						n = 0;
385 					}
386 					if (fp->_r <= 0 && __srefill(fp)) {
387 						if (n != 0) {
388 							fp->_flags |= __SERR;
389 							goto input_failure;
390 						}
391 						break;
392 					}
393 				}
394 				if (!(flags & SUPPRESS))
395 					nassigned++;
396 			} else if (flags & SUPPRESS) {
397 				size_t sum = 0;
398 				for (;;) {
399 					if ((n = fp->_r) < width) {
400 						sum += n;
401 						width -= n;
402 						fp->_p += n;
403 						if (__srefill(fp)) {
404 							if (sum == 0)
405 							    goto input_failure;
406 							break;
407 						}
408 					} else {
409 						sum += width;
410 						fp->_r -= width;
411 						fp->_p += width;
412 						break;
413 					}
414 				}
415 				nread += sum;
416 			} else {
417 				size_t r = __fread((void *)va_arg(ap, char *), 1,
418 				    width, fp);
419 
420 				if (r == 0)
421 					goto input_failure;
422 				nread += r;
423 				nassigned++;
424 			}
425 			nconversions++;
426 			break;
427 
428 		case CT_CCL:
429 			/* scan a (nonempty) character class (sets NOSKIP) */
430 			if (width == 0)
431 				width = (size_t)~0;	/* `infinity' */
432 			/* take only those things in the class */
433 			if (flags & LONG) {
434 				wchar_t twc;
435 				int nchars;
436 
437 				if ((flags & SUPPRESS) == 0)
438 					wcp = va_arg(ap, wchar_t *);
439 				else
440 					wcp = &twc;
441 				n = 0;
442 				nchars = 0;
443 				while (width != 0) {
444 					if (n == MB_CUR_MAX) {
445 						fp->_flags |= __SERR;
446 						goto input_failure;
447 					}
448 					buf[n++] = *fp->_p;
449 					fp->_p++;
450 					fp->_r--;
451 					mbs = initial;
452 					nconv = mbrtowc(wcp, buf, n, &mbs);
453 					if (nconv == (size_t)-1) {
454 						fp->_flags |= __SERR;
455 						goto input_failure;
456 					}
457 					if (nconv == 0)
458 						*wcp = L'\0';
459 					if (nconv != (size_t)-2) {
460 						if (wctob(*wcp) != EOF &&
461 						    !ccltab[wctob(*wcp)]) {
462 							while (n != 0) {
463 								n--;
464 								__ungetc(buf[n],
465 								    fp);
466 							}
467 							break;
468 						}
469 						nread += n;
470 						width--;
471 						if (!(flags & SUPPRESS))
472 							wcp++;
473 						nchars++;
474 						n = 0;
475 					}
476 					if (fp->_r <= 0 && __srefill(fp)) {
477 						if (n != 0) {
478 							fp->_flags |= __SERR;
479 							goto input_failure;
480 						}
481 						break;
482 					}
483 				}
484 				if (n != 0) {
485 					fp->_flags |= __SERR;
486 					goto input_failure;
487 				}
488 				n = nchars;
489 				if (n == 0)
490 					goto match_failure;
491 				if (!(flags & SUPPRESS)) {
492 					*wcp = L'\0';
493 					nassigned++;
494 				}
495 			} else if (flags & SUPPRESS) {
496 				n = 0;
497 				while (ccltab[*fp->_p]) {
498 					n++, fp->_r--, fp->_p++;
499 					if (--width == 0)
500 						break;
501 					if (fp->_r <= 0 && __srefill(fp)) {
502 						if (n == 0)
503 							goto input_failure;
504 						break;
505 					}
506 				}
507 				if (n == 0)
508 					goto match_failure;
509 			} else {
510 				p0 = p = va_arg(ap, char *);
511 				while (ccltab[*fp->_p]) {
512 					fp->_r--;
513 					*p++ = *fp->_p++;
514 					if (--width == 0)
515 						break;
516 					if (fp->_r <= 0 && __srefill(fp)) {
517 						if (p == p0)
518 							goto input_failure;
519 						break;
520 					}
521 				}
522 				n = p - p0;
523 				if (n == 0)
524 					goto match_failure;
525 				*p = 0;
526 				nassigned++;
527 			}
528 			nread += n;
529 			nconversions++;
530 			break;
531 
532 		case CT_STRING:
533 			/* like CCL, but zero-length string OK, & no NOSKIP */
534 			if (width == 0)
535 				width = (size_t)~0;
536 			if (flags & LONG) {
537 				wchar_t twc;
538 
539 				if ((flags & SUPPRESS) == 0)
540 					wcp = va_arg(ap, wchar_t *);
541 				else
542 					wcp = &twc;
543 				n = 0;
544 				while (!isspace(*fp->_p) && width != 0) {
545 					if (n == MB_CUR_MAX) {
546 						fp->_flags |= __SERR;
547 						goto input_failure;
548 					}
549 					buf[n++] = *fp->_p;
550 					fp->_p++;
551 					fp->_r--;
552 					mbs = initial;
553 					nconv = mbrtowc(wcp, buf, n, &mbs);
554 					if (nconv == (size_t)-1) {
555 						fp->_flags |= __SERR;
556 						goto input_failure;
557 					}
558 					if (nconv == 0)
559 						*wcp = L'\0';
560 					if (nconv != (size_t)-2) {
561 						if (iswspace(*wcp)) {
562 							while (n != 0) {
563 								n--;
564 								__ungetc(buf[n],
565 								    fp);
566 							}
567 							break;
568 						}
569 						nread += n;
570 						width--;
571 						if (!(flags & SUPPRESS))
572 							wcp++;
573 						n = 0;
574 					}
575 					if (fp->_r <= 0 && __srefill(fp)) {
576 						if (n != 0) {
577 							fp->_flags |= __SERR;
578 							goto input_failure;
579 						}
580 						break;
581 					}
582 				}
583 				if (!(flags & SUPPRESS)) {
584 					*wcp = L'\0';
585 					nassigned++;
586 				}
587 			} else if (flags & SUPPRESS) {
588 				n = 0;
589 				while (!isspace(*fp->_p)) {
590 					n++, fp->_r--, fp->_p++;
591 					if (--width == 0)
592 						break;
593 					if (fp->_r <= 0 && __srefill(fp))
594 						break;
595 				}
596 				nread += n;
597 			} else {
598 				p0 = p = va_arg(ap, char *);
599 				while (!isspace(*fp->_p)) {
600 					fp->_r--;
601 					*p++ = *fp->_p++;
602 					if (--width == 0)
603 						break;
604 					if (fp->_r <= 0 && __srefill(fp))
605 						break;
606 				}
607 				*p = 0;
608 				nread += p - p0;
609 				nassigned++;
610 			}
611 			nconversions++;
612 			continue;
613 
614 		case CT_INT:
615 			/* scan an integer as if by the conversion function */
616 #ifdef hardway
617 			if (width == 0 || width > sizeof(buf) - 1)
618 				width = sizeof(buf) - 1;
619 #else
620 			/* size_t is unsigned, hence this optimisation */
621 			if (--width > sizeof(buf) - 2)
622 				width = sizeof(buf) - 2;
623 			width++;
624 #endif
625 			flags |= SIGNOK | NDIGITS | NZDIGITS;
626 			for (p = buf; width; width--) {
627 				c = *fp->_p;
628 				/*
629 				 * Switch on the character; `goto ok'
630 				 * if we accept it as a part of number.
631 				 */
632 				switch (c) {
633 
634 				/*
635 				 * The digit 0 is always legal, but is
636 				 * special.  For %i conversions, if no
637 				 * digits (zero or nonzero) have been
638 				 * scanned (only signs), we will have
639 				 * base==0.  In that case, we should set
640 				 * it to 8 and enable 0x prefixing.
641 				 * Also, if we have not scanned zero digits
642 				 * before this, do not turn off prefixing
643 				 * (someone else will turn it off if we
644 				 * have scanned any nonzero digits).
645 				 */
646 				case '0':
647 					if (base == 0) {
648 						base = 8;
649 						flags |= PFXOK;
650 					}
651 					if (flags & NZDIGITS)
652 					    flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
653 					else
654 					    flags &= ~(SIGNOK|PFXOK|NDIGITS);
655 					goto ok;
656 
657 				/* 1 through 7 always legal */
658 				case '1': case '2': case '3':
659 				case '4': case '5': case '6': case '7':
660 					base = basefix[base];
661 					flags &= ~(SIGNOK | PFXOK | NDIGITS);
662 					goto ok;
663 
664 				/* digits 8 and 9 ok iff decimal or hex */
665 				case '8': case '9':
666 					base = basefix[base];
667 					if (base <= 8)
668 						break;	/* not legal here */
669 					flags &= ~(SIGNOK | PFXOK | NDIGITS);
670 					goto ok;
671 
672 				/* letters ok iff hex */
673 				case 'A': case 'B': case 'C':
674 				case 'D': case 'E': case 'F':
675 				case 'a': case 'b': case 'c':
676 				case 'd': case 'e': case 'f':
677 					/* no need to fix base here */
678 					if (base <= 10)
679 						break;	/* not legal here */
680 					flags &= ~(SIGNOK | PFXOK | NDIGITS);
681 					goto ok;
682 
683 				/* sign ok only as first character */
684 				case '+': case '-':
685 					if (flags & SIGNOK) {
686 						flags &= ~SIGNOK;
687 						flags |= HAVESIGN;
688 						goto ok;
689 					}
690 					break;
691 
692 				/*
693 				 * x ok iff flag still set & 2nd char (or
694 				 * 3rd char if we have a sign).
695 				 */
696 				case 'x': case 'X':
697 					if (flags & PFXOK && p ==
698 					    buf + 1 + !!(flags & HAVESIGN)) {
699 						base = 16;	/* if %i */
700 						flags &= ~PFXOK;
701 						goto ok;
702 					}
703 					break;
704 				}
705 
706 				/*
707 				 * If we got here, c is not a legal character
708 				 * for a number.  Stop accumulating digits.
709 				 */
710 				break;
711 		ok:
712 				/*
713 				 * c is legal: store it and look at the next.
714 				 */
715 				*p++ = c;
716 				if (--fp->_r > 0)
717 					fp->_p++;
718 				else if (__srefill(fp))
719 					break;		/* EOF */
720 			}
721 			/*
722 			 * If we had only a sign, it is no good; push
723 			 * back the sign.  If the number ends in `x',
724 			 * it was [sign] '0' 'x', so push back the x
725 			 * and treat it as [sign] '0'.
726 			 */
727 			if (flags & NDIGITS) {
728 				if (p > buf)
729 					(void) __ungetc(*(u_char *)--p, fp);
730 				goto match_failure;
731 			}
732 			c = ((u_char *)p)[-1];
733 			if (c == 'x' || c == 'X') {
734 				--p;
735 				(void) __ungetc(c, fp);
736 			}
737 			if ((flags & SUPPRESS) == 0) {
738 				uintmax_t res;
739 
740 				*p = 0;
741 				if ((flags & UNSIGNED) == 0)
742 				    res = strtoimax(buf, (char **)NULL, base);
743 				else
744 				    res = strtoumax(buf, (char **)NULL, base);
745 				if (flags & POINTER)
746 					*va_arg(ap, void **) =
747 							(void *)(uintptr_t)res;
748 				else if (flags & SHORTSHORT)
749 					*va_arg(ap, char *) = res;
750 				else if (flags & SHORT)
751 					*va_arg(ap, short *) = res;
752 				else if (flags & LONG)
753 					*va_arg(ap, long *) = res;
754 				else if (flags & LONGLONG)
755 					*va_arg(ap, long long *) = res;
756 				else if (flags & INTMAXT)
757 					*va_arg(ap, intmax_t *) = res;
758 				else if (flags & PTRDIFFT)
759 					*va_arg(ap, ptrdiff_t *) = res;
760 				else if (flags & SIZET)
761 					*va_arg(ap, size_t *) = res;
762 				else
763 					*va_arg(ap, int *) = res;
764 				nassigned++;
765 			}
766 			nread += p - buf;
767 			nconversions++;
768 			break;
769 
770 #ifndef NO_FLOATING_POINT
771 		case CT_FLOAT:
772 			/* scan a floating point number as if by strtod */
773 			if (width == 0 || width > sizeof(buf) - 1)
774 				width = sizeof(buf) - 1;
775 			if ((width = parsefloat(fp, buf, buf + width)) == 0)
776 				goto match_failure;
777 			if ((flags & SUPPRESS) == 0) {
778 				if (flags & LONGDBL) {
779 					long double res = strtold(buf, &p);
780 					*va_arg(ap, long double *) = res;
781 				} else if (flags & LONG) {
782 					double res = strtod(buf, &p);
783 					*va_arg(ap, double *) = res;
784 				} else {
785 					float res = strtof(buf, &p);
786 					*va_arg(ap, float *) = res;
787 				}
788 				if (__scanfdebug && p - buf != width)
789 					abort();
790 				nassigned++;
791 			}
792 			nread += width;
793 			nconversions++;
794 			break;
795 #endif /* !NO_FLOATING_POINT */
796 		}
797 	}
798 input_failure:
799 	return (nconversions != 0 ? nassigned : EOF);
800 match_failure:
801 	return (nassigned);
802 }
803 
804 /*
805  * Fill in the given table from the scanset at the given format
806  * (just after `[').  Return a pointer to the character past the
807  * closing `]'.  The table has a 1 wherever characters should be
808  * considered part of the scanset.
809  */
810 static const u_char *
811 __sccl(tab, fmt)
812 	char *tab;
813 	const u_char *fmt;
814 {
815 	int c, n, v, i;
816 
817 	/* first `clear' the whole table */
818 	c = *fmt++;		/* first char hat => negated scanset */
819 	if (c == '^') {
820 		v = 1;		/* default => accept */
821 		c = *fmt++;	/* get new first char */
822 	} else
823 		v = 0;		/* default => reject */
824 
825 	/* XXX: Will not work if sizeof(tab*) > sizeof(char) */
826 	(void) memset(tab, v, 256);
827 
828 	if (c == 0)
829 		return (fmt - 1);/* format ended before closing ] */
830 
831 	/*
832 	 * Now set the entries corresponding to the actual scanset
833 	 * to the opposite of the above.
834 	 *
835 	 * The first character may be ']' (or '-') without being special;
836 	 * the last character may be '-'.
837 	 */
838 	v = 1 - v;
839 	for (;;) {
840 		tab[c] = v;		/* take character c */
841 doswitch:
842 		n = *fmt++;		/* and examine the next */
843 		switch (n) {
844 
845 		case 0:			/* format ended too soon */
846 			return (fmt - 1);
847 
848 		case '-':
849 			/*
850 			 * A scanset of the form
851 			 *	[01+-]
852 			 * is defined as `the digit 0, the digit 1,
853 			 * the character +, the character -', but
854 			 * the effect of a scanset such as
855 			 *	[a-zA-Z0-9]
856 			 * is implementation defined.  The V7 Unix
857 			 * scanf treats `a-z' as `the letters a through
858 			 * z', but treats `a-a' as `the letter a, the
859 			 * character -, and the letter a'.
860 			 *
861 			 * For compatibility, the `-' is not considerd
862 			 * to define a range if the character following
863 			 * it is either a close bracket (required by ANSI)
864 			 * or is not numerically greater than the character
865 			 * we just stored in the table (c).
866 			 */
867 			n = *fmt;
868 			if (n == ']'
869 			    || (__collate_load_error ? n < c :
870 				__collate_range_cmp (n, c) < 0
871 			       )
872 			   ) {
873 				c = '-';
874 				break;	/* resume the for(;;) */
875 			}
876 			fmt++;
877 			/* fill in the range */
878 			if (__collate_load_error) {
879 				do {
880 					tab[++c] = v;
881 				} while (c < n);
882 			} else {
883 				for (i = 0; i < 256; i ++)
884 					if (   __collate_range_cmp (c, i) < 0
885 					    && __collate_range_cmp (i, n) <= 0
886 					   )
887 						tab[i] = v;
888 			}
889 #if 1	/* XXX another disgusting compatibility hack */
890 			c = n;
891 			/*
892 			 * Alas, the V7 Unix scanf also treats formats
893 			 * such as [a-c-e] as `the letters a through e'.
894 			 * This too is permitted by the standard....
895 			 */
896 			goto doswitch;
897 #else
898 			c = *fmt++;
899 			if (c == 0)
900 				return (fmt - 1);
901 			if (c == ']')
902 				return (fmt);
903 #endif
904 			break;
905 
906 		case ']':		/* end of scanset */
907 			return (fmt);
908 
909 		default:		/* just another character */
910 			c = n;
911 			break;
912 		}
913 	}
914 	/* NOTREACHED */
915 }
916 
917 #ifndef NO_FLOATING_POINT
918 static int
919 parsefloat(FILE *fp, char *buf, char *end)
920 {
921 	char *commit, *p;
922 	int infnanpos = 0;
923 	enum {
924 		S_START, S_GOTSIGN, S_INF, S_NAN, S_MAYBEHEX,
925 		S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
926 	} state = S_START;
927 	unsigned char c;
928 	char decpt = *localeconv()->decimal_point;
929 	_Bool gotmantdig = 0, ishex = 0;
930 
931 	/*
932 	 * We set commit = p whenever the string we have read so far
933 	 * constitutes a valid representation of a floating point
934 	 * number by itself.  At some point, the parse will complete
935 	 * or fail, and we will ungetc() back to the last commit point.
936 	 * To ensure that the file offset gets updated properly, it is
937 	 * always necessary to read at least one character that doesn't
938 	 * match; thus, we can't short-circuit "infinity" or "nan(...)".
939 	 */
940 	commit = buf - 1;
941 	for (p = buf; p < end; ) {
942 		c = *fp->_p;
943 reswitch:
944 		switch (state) {
945 		case S_START:
946 			state = S_GOTSIGN;
947 			if (c == '-' || c == '+')
948 				break;
949 			else
950 				goto reswitch;
951 		case S_GOTSIGN:
952 			switch (c) {
953 			case '0':
954 				state = S_MAYBEHEX;
955 				commit = p;
956 				break;
957 			case 'I':
958 			case 'i':
959 				state = S_INF;
960 				break;
961 			case 'N':
962 			case 'n':
963 				state = S_NAN;
964 				break;
965 			default:
966 				state = S_DIGITS;
967 				goto reswitch;
968 			}
969 			break;
970 		case S_INF:
971 			if (infnanpos > 6 ||
972 			    (c != "nfinity"[infnanpos] &&
973 			     c != "NFINITY"[infnanpos]))
974 				goto parsedone;
975 			if (infnanpos == 1 || infnanpos == 6)
976 				commit = p;	/* inf or infinity */
977 			infnanpos++;
978 			break;
979 		case S_NAN:
980 			switch (infnanpos) {
981 			case -1:	/* XXX kludge to deal with nan(...) */
982 				goto parsedone;
983 			case 0:
984 				if (c != 'A' && c != 'a')
985 					goto parsedone;
986 				break;
987 			case 1:
988 				if (c != 'N' && c != 'n')
989 					goto parsedone;
990 				else
991 					commit = p;
992 				break;
993 			case 2:
994 				if (c != '(')
995 					goto parsedone;
996 				break;
997 			default:
998 				if (c == ')') {
999 					commit = p;
1000 					infnanpos = -2;
1001 				} else if (!isalnum(c) && c != '_')
1002 					goto parsedone;
1003 				break;
1004 			}
1005 			infnanpos++;
1006 			break;
1007 		case S_MAYBEHEX:
1008 			state = S_DIGITS;
1009 			if (c == 'X' || c == 'x') {
1010 				ishex = 1;
1011 				break;
1012 			} else {	/* we saw a '0', but no 'x' */
1013 				gotmantdig = 1;
1014 				goto reswitch;
1015 			}
1016 		case S_DIGITS:
1017 			if ((ishex && isxdigit(c)) || isdigit(c))
1018 				gotmantdig = 1;
1019 			else {
1020 				state = S_FRAC;
1021 				if (c != decpt)
1022 					goto reswitch;
1023 			}
1024 			if (gotmantdig)
1025 				commit = p;
1026 			break;
1027 		case S_FRAC:
1028 			if (((c == 'E' || c == 'e') && !ishex) ||
1029 			    ((c == 'P' || c == 'p') && ishex)) {
1030 				if (!gotmantdig)
1031 					goto parsedone;
1032 				else
1033 					state = S_EXP;
1034 			} else if ((ishex && isxdigit(c)) || isdigit(c)) {
1035 				commit = p;
1036 				gotmantdig = 1;
1037 			} else
1038 				goto parsedone;
1039 			break;
1040 		case S_EXP:
1041 			state = S_EXPDIGITS;
1042 			if (c == '-' || c == '+')
1043 				break;
1044 			else
1045 				goto reswitch;
1046 		case S_EXPDIGITS:
1047 			if (isdigit(c))
1048 				commit = p;
1049 			else
1050 				goto parsedone;
1051 			break;
1052 		default:
1053 			abort();
1054 		}
1055 		*p++ = c;
1056 		if (--fp->_r > 0)
1057 			fp->_p++;
1058 		else if (__srefill(fp))
1059 			break;	/* EOF */
1060 	}
1061 
1062 parsedone:
1063 	while (commit < --p)
1064 		__ungetc(*(u_char *)p, fp);
1065 	*++commit = '\0';
1066 	return (commit - buf);
1067 }
1068 #endif
1069