xref: /freebsd/lib/libc/stdio/vfscanf.c (revision 42821a2fc9aa8656e89d0353ece77c4799e940bb)
1 /*-
2  * Copyright (c) 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Copyright (c) 2011 The FreeBSD Foundation
6  * All rights reserved.
7  * Portions of this software were developed by David Chisnall
8  * under sponsorship from the FreeBSD Foundation.
9  *
10  * This code is derived from software contributed to Berkeley by
11  * Chris Torek.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  */
37 
38 #if defined(LIBC_SCCS) && !defined(lint)
39 static char sccsid[] = "@(#)vfscanf.c	8.1 (Berkeley) 6/4/93";
40 #endif /* LIBC_SCCS and not lint */
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
43 
44 #include "namespace.h"
45 #include <ctype.h>
46 #include <inttypes.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <stddef.h>
50 #include <stdarg.h>
51 #include <string.h>
52 #include <wchar.h>
53 #include <wctype.h>
54 #include "un-namespace.h"
55 
56 #include "collate.h"
57 #include "libc_private.h"
58 #include "local.h"
59 #include "xlocale_private.h"
60 
61 #ifndef NO_FLOATING_POINT
62 #include <locale.h>
63 #endif
64 
65 #define	BUF		513	/* Maximum length of numeric string. */
66 
67 /*
68  * Flags used during conversion.
69  */
70 #define	LONG		0x01	/* l: long or double */
71 #define	LONGDBL		0x02	/* L: long double */
72 #define	SHORT		0x04	/* h: short */
73 #define	SUPPRESS	0x08	/* *: suppress assignment */
74 #define	POINTER		0x10	/* p: void * (as hex) */
75 #define	NOSKIP		0x20	/* [ or c: do not skip blanks */
76 #define	LONGLONG	0x400	/* ll: long long (+ deprecated q: quad) */
77 #define	INTMAXT		0x800	/* j: intmax_t */
78 #define	PTRDIFFT	0x1000	/* t: ptrdiff_t */
79 #define	SIZET		0x2000	/* z: size_t */
80 #define	SHORTSHORT	0x4000	/* hh: char */
81 #define	UNSIGNED	0x8000	/* %[oupxX] conversions */
82 
83 /*
84  * The following are used in integral conversions only:
85  * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
86  */
87 #define	SIGNOK		0x40	/* +/- is (still) legal */
88 #define	NDIGITS		0x80	/* no digits detected */
89 #define	PFXOK		0x100	/* 0x prefix is (still) legal */
90 #define	NZDIGITS	0x200	/* no zero digits detected */
91 #define	HAVESIGN	0x10000	/* sign detected */
92 
93 /*
94  * Conversion types.
95  */
96 #define	CT_CHAR		0	/* %c conversion */
97 #define	CT_CCL		1	/* %[...] conversion */
98 #define	CT_STRING	2	/* %s conversion */
99 #define	CT_INT		3	/* %[dioupxX] conversion */
100 #define	CT_FLOAT	4	/* %[efgEFG] conversion */
101 
102 static const u_char *__sccl(char *, const u_char *);
103 #ifndef NO_FLOATING_POINT
104 static int parsefloat(FILE *, char *, char *, locale_t);
105 #endif
106 
107 __weak_reference(__vfscanf, vfscanf);
108 
109 /*
110  * __vfscanf - MT-safe version
111  */
112 int
113 __vfscanf(FILE *fp, char const *fmt0, va_list ap)
114 {
115 	int ret;
116 
117 	FLOCKFILE(fp);
118 	ret = __svfscanf(fp, __get_locale(), fmt0, ap);
119 	FUNLOCKFILE(fp);
120 	return (ret);
121 }
122 int
123 vfscanf_l(FILE *fp, locale_t locale, char const *fmt0, va_list ap)
124 {
125 	int ret;
126 	FIX_LOCALE(locale);
127 
128 	FLOCKFILE(fp);
129 	ret = __svfscanf(fp, locale, fmt0, ap);
130 	FUNLOCKFILE(fp);
131 	return (ret);
132 }
133 
134 /*
135  * __svfscanf - non-MT-safe version of __vfscanf
136  */
137 int
138 __svfscanf(FILE *fp, locale_t locale, const char *fmt0, va_list ap)
139 {
140 	const u_char *fmt = (const u_char *)fmt0;
141 	int c;			/* character from format, or conversion */
142 	size_t width;		/* field width, or 0 */
143 	char *p;		/* points into all kinds of strings */
144 	int n;			/* handy integer */
145 	int flags;		/* flags as defined above */
146 	char *p0;		/* saves original value of p when necessary */
147 	int nassigned;		/* number of fields assigned */
148 	int nconversions;	/* number of conversions */
149 	int nread;		/* number of characters consumed from fp */
150 	int base;		/* base argument to conversion function */
151 	char ccltab[256];	/* character class table for %[...] */
152 	char buf[BUF];		/* buffer for numeric and mb conversions */
153 	wchar_t *wcp;		/* handy wide character pointer */
154 	size_t nconv;		/* length of multibyte sequence converted */
155 	static const mbstate_t initial;
156 	mbstate_t mbs;
157 
158 	/* `basefix' is used to avoid `if' tests in the integer scanner */
159 	static short basefix[17] =
160 		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
161 
162 	ORIENT(fp, -1);
163 
164 	nassigned = 0;
165 	nconversions = 0;
166 	nread = 0;
167 	for (;;) {
168 		c = *fmt++;
169 		if (c == 0)
170 			return (nassigned);
171 		if (isspace(c)) {
172 			while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p))
173 				nread++, fp->_r--, fp->_p++;
174 			continue;
175 		}
176 		if (c != '%')
177 			goto literal;
178 		width = 0;
179 		flags = 0;
180 		/*
181 		 * switch on the format.  continue if done;
182 		 * break once format type is derived.
183 		 */
184 again:		c = *fmt++;
185 		switch (c) {
186 		case '%':
187 literal:
188 			if (fp->_r <= 0 && __srefill(fp))
189 				goto input_failure;
190 			if (*fp->_p != c)
191 				goto match_failure;
192 			fp->_r--, fp->_p++;
193 			nread++;
194 			continue;
195 
196 		case '*':
197 			flags |= SUPPRESS;
198 			goto again;
199 		case 'j':
200 			flags |= INTMAXT;
201 			goto again;
202 		case 'l':
203 			if (flags & LONG) {
204 				flags &= ~LONG;
205 				flags |= LONGLONG;
206 			} else
207 				flags |= LONG;
208 			goto again;
209 		case 'q':
210 			flags |= LONGLONG;	/* not quite */
211 			goto again;
212 		case 't':
213 			flags |= PTRDIFFT;
214 			goto again;
215 		case 'z':
216 			flags |= SIZET;
217 			goto again;
218 		case 'L':
219 			flags |= LONGDBL;
220 			goto again;
221 		case 'h':
222 			if (flags & SHORT) {
223 				flags &= ~SHORT;
224 				flags |= SHORTSHORT;
225 			} else
226 				flags |= SHORT;
227 			goto again;
228 
229 		case '0': case '1': case '2': case '3': case '4':
230 		case '5': case '6': case '7': case '8': case '9':
231 			width = width * 10 + c - '0';
232 			goto again;
233 
234 		/*
235 		 * Conversions.
236 		 */
237 		case 'd':
238 			c = CT_INT;
239 			base = 10;
240 			break;
241 
242 		case 'i':
243 			c = CT_INT;
244 			base = 0;
245 			break;
246 
247 		case 'o':
248 			c = CT_INT;
249 			flags |= UNSIGNED;
250 			base = 8;
251 			break;
252 
253 		case 'u':
254 			c = CT_INT;
255 			flags |= UNSIGNED;
256 			base = 10;
257 			break;
258 
259 		case 'X':
260 		case 'x':
261 			flags |= PFXOK;	/* enable 0x prefixing */
262 			c = CT_INT;
263 			flags |= UNSIGNED;
264 			base = 16;
265 			break;
266 
267 #ifndef NO_FLOATING_POINT
268 		case 'A': case 'E': case 'F': case 'G':
269 		case 'a': case 'e': case 'f': case 'g':
270 			c = CT_FLOAT;
271 			break;
272 #endif
273 
274 		case 'S':
275 			flags |= LONG;
276 			/* FALLTHROUGH */
277 		case 's':
278 			c = CT_STRING;
279 			break;
280 
281 		case '[':
282 			fmt = __sccl(ccltab, fmt);
283 			flags |= NOSKIP;
284 			c = CT_CCL;
285 			break;
286 
287 		case 'C':
288 			flags |= LONG;
289 			/* FALLTHROUGH */
290 		case 'c':
291 			flags |= NOSKIP;
292 			c = CT_CHAR;
293 			break;
294 
295 		case 'p':	/* pointer format is like hex */
296 			flags |= POINTER | PFXOK;
297 			c = CT_INT;		/* assumes sizeof(uintmax_t) */
298 			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
299 			base = 16;
300 			break;
301 
302 		case 'n':
303 			nconversions++;
304 			if (flags & SUPPRESS)	/* ??? */
305 				continue;
306 			if (flags & SHORTSHORT)
307 				*va_arg(ap, char *) = nread;
308 			else if (flags & SHORT)
309 				*va_arg(ap, short *) = nread;
310 			else if (flags & LONG)
311 				*va_arg(ap, long *) = nread;
312 			else if (flags & LONGLONG)
313 				*va_arg(ap, long long *) = nread;
314 			else if (flags & INTMAXT)
315 				*va_arg(ap, intmax_t *) = nread;
316 			else if (flags & SIZET)
317 				*va_arg(ap, size_t *) = nread;
318 			else if (flags & PTRDIFFT)
319 				*va_arg(ap, ptrdiff_t *) = nread;
320 			else
321 				*va_arg(ap, int *) = nread;
322 			continue;
323 
324 		default:
325 			goto match_failure;
326 
327 		/*
328 		 * Disgusting backwards compatibility hack.	XXX
329 		 */
330 		case '\0':	/* compat */
331 			return (EOF);
332 		}
333 
334 		/*
335 		 * We have a conversion that requires input.
336 		 */
337 		if (fp->_r <= 0 && __srefill(fp))
338 			goto input_failure;
339 
340 		/*
341 		 * Consume leading white space, except for formats
342 		 * that suppress this.
343 		 */
344 		if ((flags & NOSKIP) == 0) {
345 			while (isspace(*fp->_p)) {
346 				nread++;
347 				if (--fp->_r > 0)
348 					fp->_p++;
349 				else if (__srefill(fp))
350 					goto input_failure;
351 			}
352 			/*
353 			 * Note that there is at least one character in
354 			 * the buffer, so conversions that do not set NOSKIP
355 			 * ca no longer result in an input failure.
356 			 */
357 		}
358 
359 		/*
360 		 * Do the conversion.
361 		 */
362 		switch (c) {
363 
364 		case CT_CHAR:
365 			/* scan arbitrary characters (sets NOSKIP) */
366 			if (width == 0)
367 				width = 1;
368 			if (flags & LONG) {
369 				if ((flags & SUPPRESS) == 0)
370 					wcp = va_arg(ap, wchar_t *);
371 				else
372 					wcp = NULL;
373 				n = 0;
374 				while (width != 0) {
375 					if (n == MB_CUR_MAX) {
376 						fp->_flags |= __SERR;
377 						goto input_failure;
378 					}
379 					buf[n++] = *fp->_p;
380 					fp->_p++;
381 					fp->_r--;
382 					mbs = initial;
383 					nconv = mbrtowc(wcp, buf, n, &mbs);
384 					if (nconv == (size_t)-1) {
385 						fp->_flags |= __SERR;
386 						goto input_failure;
387 					}
388 					if (nconv == 0 && !(flags & SUPPRESS))
389 						*wcp = L'\0';
390 					if (nconv != (size_t)-2) {
391 						nread += n;
392 						width--;
393 						if (!(flags & SUPPRESS))
394 							wcp++;
395 						n = 0;
396 					}
397 					if (fp->_r <= 0 && __srefill(fp)) {
398 						if (n != 0) {
399 							fp->_flags |= __SERR;
400 							goto input_failure;
401 						}
402 						break;
403 					}
404 				}
405 				if (!(flags & SUPPRESS))
406 					nassigned++;
407 			} else if (flags & SUPPRESS) {
408 				size_t sum = 0;
409 				for (;;) {
410 					if ((n = fp->_r) < width) {
411 						sum += n;
412 						width -= n;
413 						fp->_p += n;
414 						if (__srefill(fp)) {
415 							if (sum == 0)
416 							    goto input_failure;
417 							break;
418 						}
419 					} else {
420 						sum += width;
421 						fp->_r -= width;
422 						fp->_p += width;
423 						break;
424 					}
425 				}
426 				nread += sum;
427 			} else {
428 				size_t r = __fread((void *)va_arg(ap, char *), 1,
429 				    width, fp);
430 
431 				if (r == 0)
432 					goto input_failure;
433 				nread += r;
434 				nassigned++;
435 			}
436 			nconversions++;
437 			break;
438 
439 		case CT_CCL:
440 			/* scan a (nonempty) character class (sets NOSKIP) */
441 			if (width == 0)
442 				width = (size_t)~0;	/* `infinity' */
443 			/* take only those things in the class */
444 			if (flags & LONG) {
445 				wchar_t twc;
446 				int nchars;
447 
448 				if ((flags & SUPPRESS) == 0)
449 					wcp = va_arg(ap, wchar_t *);
450 				else
451 					wcp = &twc;
452 				n = 0;
453 				nchars = 0;
454 				while (width != 0) {
455 					if (n == MB_CUR_MAX) {
456 						fp->_flags |= __SERR;
457 						goto input_failure;
458 					}
459 					buf[n++] = *fp->_p;
460 					fp->_p++;
461 					fp->_r--;
462 					mbs = initial;
463 					nconv = mbrtowc(wcp, buf, n, &mbs);
464 					if (nconv == (size_t)-1) {
465 						fp->_flags |= __SERR;
466 						goto input_failure;
467 					}
468 					if (nconv == 0)
469 						*wcp = L'\0';
470 					if (nconv != (size_t)-2) {
471 						if (wctob(*wcp) != EOF &&
472 						    !ccltab[wctob(*wcp)]) {
473 							while (n != 0) {
474 								n--;
475 								__ungetc(buf[n],
476 								    fp);
477 							}
478 							break;
479 						}
480 						nread += n;
481 						width--;
482 						if (!(flags & SUPPRESS))
483 							wcp++;
484 						nchars++;
485 						n = 0;
486 					}
487 					if (fp->_r <= 0 && __srefill(fp)) {
488 						if (n != 0) {
489 							fp->_flags |= __SERR;
490 							goto input_failure;
491 						}
492 						break;
493 					}
494 				}
495 				if (n != 0) {
496 					fp->_flags |= __SERR;
497 					goto input_failure;
498 				}
499 				n = nchars;
500 				if (n == 0)
501 					goto match_failure;
502 				if (!(flags & SUPPRESS)) {
503 					*wcp = L'\0';
504 					nassigned++;
505 				}
506 			} else if (flags & SUPPRESS) {
507 				n = 0;
508 				while (ccltab[*fp->_p]) {
509 					n++, fp->_r--, fp->_p++;
510 					if (--width == 0)
511 						break;
512 					if (fp->_r <= 0 && __srefill(fp)) {
513 						if (n == 0)
514 							goto input_failure;
515 						break;
516 					}
517 				}
518 				if (n == 0)
519 					goto match_failure;
520 			} else {
521 				p0 = p = va_arg(ap, char *);
522 				while (ccltab[*fp->_p]) {
523 					fp->_r--;
524 					*p++ = *fp->_p++;
525 					if (--width == 0)
526 						break;
527 					if (fp->_r <= 0 && __srefill(fp)) {
528 						if (p == p0)
529 							goto input_failure;
530 						break;
531 					}
532 				}
533 				n = p - p0;
534 				if (n == 0)
535 					goto match_failure;
536 				*p = 0;
537 				nassigned++;
538 			}
539 			nread += n;
540 			nconversions++;
541 			break;
542 
543 		case CT_STRING:
544 			/* like CCL, but zero-length string OK, & no NOSKIP */
545 			if (width == 0)
546 				width = (size_t)~0;
547 			if (flags & LONG) {
548 				wchar_t twc;
549 
550 				if ((flags & SUPPRESS) == 0)
551 					wcp = va_arg(ap, wchar_t *);
552 				else
553 					wcp = &twc;
554 				n = 0;
555 				while (!isspace(*fp->_p) && width != 0) {
556 					if (n == MB_CUR_MAX) {
557 						fp->_flags |= __SERR;
558 						goto input_failure;
559 					}
560 					buf[n++] = *fp->_p;
561 					fp->_p++;
562 					fp->_r--;
563 					mbs = initial;
564 					nconv = mbrtowc(wcp, buf, n, &mbs);
565 					if (nconv == (size_t)-1) {
566 						fp->_flags |= __SERR;
567 						goto input_failure;
568 					}
569 					if (nconv == 0)
570 						*wcp = L'\0';
571 					if (nconv != (size_t)-2) {
572 						if (iswspace(*wcp)) {
573 							while (n != 0) {
574 								n--;
575 								__ungetc(buf[n],
576 								    fp);
577 							}
578 							break;
579 						}
580 						nread += n;
581 						width--;
582 						if (!(flags & SUPPRESS))
583 							wcp++;
584 						n = 0;
585 					}
586 					if (fp->_r <= 0 && __srefill(fp)) {
587 						if (n != 0) {
588 							fp->_flags |= __SERR;
589 							goto input_failure;
590 						}
591 						break;
592 					}
593 				}
594 				if (!(flags & SUPPRESS)) {
595 					*wcp = L'\0';
596 					nassigned++;
597 				}
598 			} else if (flags & SUPPRESS) {
599 				n = 0;
600 				while (!isspace(*fp->_p)) {
601 					n++, fp->_r--, fp->_p++;
602 					if (--width == 0)
603 						break;
604 					if (fp->_r <= 0 && __srefill(fp))
605 						break;
606 				}
607 				nread += n;
608 			} else {
609 				p0 = p = va_arg(ap, char *);
610 				while (!isspace(*fp->_p)) {
611 					fp->_r--;
612 					*p++ = *fp->_p++;
613 					if (--width == 0)
614 						break;
615 					if (fp->_r <= 0 && __srefill(fp))
616 						break;
617 				}
618 				*p = 0;
619 				nread += p - p0;
620 				nassigned++;
621 			}
622 			nconversions++;
623 			continue;
624 
625 		case CT_INT:
626 			/* scan an integer as if by the conversion function */
627 #ifdef hardway
628 			if (width == 0 || width > sizeof(buf) - 1)
629 				width = sizeof(buf) - 1;
630 #else
631 			/* size_t is unsigned, hence this optimisation */
632 			if (--width > sizeof(buf) - 2)
633 				width = sizeof(buf) - 2;
634 			width++;
635 #endif
636 			flags |= SIGNOK | NDIGITS | NZDIGITS;
637 			for (p = buf; width; width--) {
638 				c = *fp->_p;
639 				/*
640 				 * Switch on the character; `goto ok'
641 				 * if we accept it as a part of number.
642 				 */
643 				switch (c) {
644 
645 				/*
646 				 * The digit 0 is always legal, but is
647 				 * special.  For %i conversions, if no
648 				 * digits (zero or nonzero) have been
649 				 * scanned (only signs), we will have
650 				 * base==0.  In that case, we should set
651 				 * it to 8 and enable 0x prefixing.
652 				 * Also, if we have not scanned zero digits
653 				 * before this, do not turn off prefixing
654 				 * (someone else will turn it off if we
655 				 * have scanned any nonzero digits).
656 				 */
657 				case '0':
658 					if (base == 0) {
659 						base = 8;
660 						flags |= PFXOK;
661 					}
662 					if (flags & NZDIGITS)
663 					    flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
664 					else
665 					    flags &= ~(SIGNOK|PFXOK|NDIGITS);
666 					goto ok;
667 
668 				/* 1 through 7 always legal */
669 				case '1': case '2': case '3':
670 				case '4': case '5': case '6': case '7':
671 					base = basefix[base];
672 					flags &= ~(SIGNOK | PFXOK | NDIGITS);
673 					goto ok;
674 
675 				/* digits 8 and 9 ok iff decimal or hex */
676 				case '8': case '9':
677 					base = basefix[base];
678 					if (base <= 8)
679 						break;	/* not legal here */
680 					flags &= ~(SIGNOK | PFXOK | NDIGITS);
681 					goto ok;
682 
683 				/* letters ok iff hex */
684 				case 'A': case 'B': case 'C':
685 				case 'D': case 'E': case 'F':
686 				case 'a': case 'b': case 'c':
687 				case 'd': case 'e': case 'f':
688 					/* no need to fix base here */
689 					if (base <= 10)
690 						break;	/* not legal here */
691 					flags &= ~(SIGNOK | PFXOK | NDIGITS);
692 					goto ok;
693 
694 				/* sign ok only as first character */
695 				case '+': case '-':
696 					if (flags & SIGNOK) {
697 						flags &= ~SIGNOK;
698 						flags |= HAVESIGN;
699 						goto ok;
700 					}
701 					break;
702 
703 				/*
704 				 * x ok iff flag still set & 2nd char (or
705 				 * 3rd char if we have a sign).
706 				 */
707 				case 'x': case 'X':
708 					if (flags & PFXOK && p ==
709 					    buf + 1 + !!(flags & HAVESIGN)) {
710 						base = 16;	/* if %i */
711 						flags &= ~PFXOK;
712 						goto ok;
713 					}
714 					break;
715 				}
716 
717 				/*
718 				 * If we got here, c is not a legal character
719 				 * for a number.  Stop accumulating digits.
720 				 */
721 				break;
722 		ok:
723 				/*
724 				 * c is legal: store it and look at the next.
725 				 */
726 				*p++ = c;
727 				if (--fp->_r > 0)
728 					fp->_p++;
729 				else if (__srefill(fp))
730 					break;		/* EOF */
731 			}
732 			/*
733 			 * If we had only a sign, it is no good; push
734 			 * back the sign.  If the number ends in `x',
735 			 * it was [sign] '0' 'x', so push back the x
736 			 * and treat it as [sign] '0'.
737 			 */
738 			if (flags & NDIGITS) {
739 				if (p > buf)
740 					(void) __ungetc(*(u_char *)--p, fp);
741 				goto match_failure;
742 			}
743 			c = ((u_char *)p)[-1];
744 			if (c == 'x' || c == 'X') {
745 				--p;
746 				(void) __ungetc(c, fp);
747 			}
748 			if ((flags & SUPPRESS) == 0) {
749 				uintmax_t res;
750 
751 				*p = 0;
752 				if ((flags & UNSIGNED) == 0)
753 				    res = strtoimax_l(buf, (char **)NULL, base, locale);
754 				else
755 				    res = strtoumax_l(buf, (char **)NULL, base, locale);
756 				if (flags & POINTER)
757 					*va_arg(ap, void **) =
758 							(void *)(uintptr_t)res;
759 				else if (flags & SHORTSHORT)
760 					*va_arg(ap, char *) = res;
761 				else if (flags & SHORT)
762 					*va_arg(ap, short *) = res;
763 				else if (flags & LONG)
764 					*va_arg(ap, long *) = res;
765 				else if (flags & LONGLONG)
766 					*va_arg(ap, long long *) = res;
767 				else if (flags & INTMAXT)
768 					*va_arg(ap, intmax_t *) = res;
769 				else if (flags & PTRDIFFT)
770 					*va_arg(ap, ptrdiff_t *) = res;
771 				else if (flags & SIZET)
772 					*va_arg(ap, size_t *) = res;
773 				else
774 					*va_arg(ap, int *) = res;
775 				nassigned++;
776 			}
777 			nread += p - buf;
778 			nconversions++;
779 			break;
780 
781 #ifndef NO_FLOATING_POINT
782 		case CT_FLOAT:
783 			/* scan a floating point number as if by strtod */
784 			if (width == 0 || width > sizeof(buf) - 1)
785 				width = sizeof(buf) - 1;
786 			if ((width = parsefloat(fp, buf, buf + width, locale)) == 0)
787 				goto match_failure;
788 			if ((flags & SUPPRESS) == 0) {
789 				if (flags & LONGDBL) {
790 					long double res = strtold_l(buf, &p, locale);
791 					*va_arg(ap, long double *) = res;
792 				} else if (flags & LONG) {
793 					double res = strtod_l(buf, &p, locale);
794 					*va_arg(ap, double *) = res;
795 				} else {
796 					float res = strtof_l(buf, &p, locale);
797 					*va_arg(ap, float *) = res;
798 				}
799 				nassigned++;
800 			}
801 			nread += width;
802 			nconversions++;
803 			break;
804 #endif /* !NO_FLOATING_POINT */
805 		}
806 	}
807 input_failure:
808 	return (nconversions != 0 ? nassigned : EOF);
809 match_failure:
810 	return (nassigned);
811 }
812 
813 /*
814  * Fill in the given table from the scanset at the given format
815  * (just after `[').  Return a pointer to the character past the
816  * closing `]'.  The table has a 1 wherever characters should be
817  * considered part of the scanset.
818  */
819 static const u_char *
820 __sccl(tab, fmt)
821 	char *tab;
822 	const u_char *fmt;
823 {
824 	int c, n, v, i;
825 	struct xlocale_collate *table =
826 		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
827 
828 	/* first `clear' the whole table */
829 	c = *fmt++;		/* first char hat => negated scanset */
830 	if (c == '^') {
831 		v = 1;		/* default => accept */
832 		c = *fmt++;	/* get new first char */
833 	} else
834 		v = 0;		/* default => reject */
835 
836 	/* XXX: Will not work if sizeof(tab*) > sizeof(char) */
837 	(void) memset(tab, v, 256);
838 
839 	if (c == 0)
840 		return (fmt - 1);/* format ended before closing ] */
841 
842 	/*
843 	 * Now set the entries corresponding to the actual scanset
844 	 * to the opposite of the above.
845 	 *
846 	 * The first character may be ']' (or '-') without being special;
847 	 * the last character may be '-'.
848 	 */
849 	v = 1 - v;
850 	for (;;) {
851 		tab[c] = v;		/* take character c */
852 doswitch:
853 		n = *fmt++;		/* and examine the next */
854 		switch (n) {
855 
856 		case 0:			/* format ended too soon */
857 			return (fmt - 1);
858 
859 		case '-':
860 			/*
861 			 * A scanset of the form
862 			 *	[01+-]
863 			 * is defined as `the digit 0, the digit 1,
864 			 * the character +, the character -', but
865 			 * the effect of a scanset such as
866 			 *	[a-zA-Z0-9]
867 			 * is implementation defined.  The V7 Unix
868 			 * scanf treats `a-z' as `the letters a through
869 			 * z', but treats `a-a' as `the letter a, the
870 			 * character -, and the letter a'.
871 			 *
872 			 * For compatibility, the `-' is not considerd
873 			 * to define a range if the character following
874 			 * it is either a close bracket (required by ANSI)
875 			 * or is not numerically greater than the character
876 			 * we just stored in the table (c).
877 			 */
878 			n = *fmt;
879 			if (n == ']'
880 			    || (table->__collate_load_error ? n < c :
881 				__collate_range_cmp (table, n, c) < 0
882 			       )
883 			   ) {
884 				c = '-';
885 				break;	/* resume the for(;;) */
886 			}
887 			fmt++;
888 			/* fill in the range */
889 			if (table->__collate_load_error) {
890 				do {
891 					tab[++c] = v;
892 				} while (c < n);
893 			} else {
894 				for (i = 0; i < 256; i ++)
895 					if (   __collate_range_cmp (table, c, i) < 0
896 					    && __collate_range_cmp (table, i, n) <= 0
897 					   )
898 						tab[i] = v;
899 			}
900 #if 1	/* XXX another disgusting compatibility hack */
901 			c = n;
902 			/*
903 			 * Alas, the V7 Unix scanf also treats formats
904 			 * such as [a-c-e] as `the letters a through e'.
905 			 * This too is permitted by the standard....
906 			 */
907 			goto doswitch;
908 #else
909 			c = *fmt++;
910 			if (c == 0)
911 				return (fmt - 1);
912 			if (c == ']')
913 				return (fmt);
914 #endif
915 			break;
916 
917 		case ']':		/* end of scanset */
918 			return (fmt);
919 
920 		default:		/* just another character */
921 			c = n;
922 			break;
923 		}
924 	}
925 	/* NOTREACHED */
926 }
927 
928 #ifndef NO_FLOATING_POINT
929 static int
930 parsefloat(FILE *fp, char *buf, char *end, locale_t locale)
931 {
932 	char *commit, *p;
933 	int infnanpos = 0, decptpos = 0;
934 	enum {
935 		S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX,
936 		S_DIGITS, S_DECPT, S_FRAC, S_EXP, S_EXPDIGITS
937 	} state = S_START;
938 	unsigned char c;
939 	const char *decpt = localeconv_l(locale)->decimal_point;
940 	_Bool gotmantdig = 0, ishex = 0;
941 
942 	/*
943 	 * We set commit = p whenever the string we have read so far
944 	 * constitutes a valid representation of a floating point
945 	 * number by itself.  At some point, the parse will complete
946 	 * or fail, and we will ungetc() back to the last commit point.
947 	 * To ensure that the file offset gets updated properly, it is
948 	 * always necessary to read at least one character that doesn't
949 	 * match; thus, we can't short-circuit "infinity" or "nan(...)".
950 	 */
951 	commit = buf - 1;
952 	for (p = buf; p < end; ) {
953 		c = *fp->_p;
954 reswitch:
955 		switch (state) {
956 		case S_START:
957 			state = S_GOTSIGN;
958 			if (c == '-' || c == '+')
959 				break;
960 			else
961 				goto reswitch;
962 		case S_GOTSIGN:
963 			switch (c) {
964 			case '0':
965 				state = S_MAYBEHEX;
966 				commit = p;
967 				break;
968 			case 'I':
969 			case 'i':
970 				state = S_INF;
971 				break;
972 			case 'N':
973 			case 'n':
974 				state = S_NAN;
975 				break;
976 			default:
977 				state = S_DIGITS;
978 				goto reswitch;
979 			}
980 			break;
981 		case S_INF:
982 			if (infnanpos > 6 ||
983 			    (c != "nfinity"[infnanpos] &&
984 			     c != "NFINITY"[infnanpos]))
985 				goto parsedone;
986 			if (infnanpos == 1 || infnanpos == 6)
987 				commit = p;	/* inf or infinity */
988 			infnanpos++;
989 			break;
990 		case S_NAN:
991 			switch (infnanpos) {
992 			case 0:
993 				if (c != 'A' && c != 'a')
994 					goto parsedone;
995 				break;
996 			case 1:
997 				if (c != 'N' && c != 'n')
998 					goto parsedone;
999 				else
1000 					commit = p;
1001 				break;
1002 			case 2:
1003 				if (c != '(')
1004 					goto parsedone;
1005 				break;
1006 			default:
1007 				if (c == ')') {
1008 					commit = p;
1009 					state = S_DONE;
1010 				} else if (!isalnum(c) && c != '_')
1011 					goto parsedone;
1012 				break;
1013 			}
1014 			infnanpos++;
1015 			break;
1016 		case S_DONE:
1017 			goto parsedone;
1018 		case S_MAYBEHEX:
1019 			state = S_DIGITS;
1020 			if (c == 'X' || c == 'x') {
1021 				ishex = 1;
1022 				break;
1023 			} else {	/* we saw a '0', but no 'x' */
1024 				gotmantdig = 1;
1025 				goto reswitch;
1026 			}
1027 		case S_DIGITS:
1028 			if ((ishex && isxdigit(c)) || isdigit(c)) {
1029 				gotmantdig = 1;
1030 				commit = p;
1031 				break;
1032 			} else {
1033 				state = S_DECPT;
1034 				goto reswitch;
1035 			}
1036 		case S_DECPT:
1037 			if (c == decpt[decptpos]) {
1038 				if (decpt[++decptpos] == '\0') {
1039 					/* We read the complete decpt seq. */
1040 					state = S_FRAC;
1041 					if (gotmantdig)
1042 						commit = p;
1043 				}
1044 				break;
1045 			} else if (!decptpos) {
1046 				/* We didn't read any decpt characters. */
1047 				state = S_FRAC;
1048 				goto reswitch;
1049 			} else {
1050 				/*
1051 				 * We read part of a multibyte decimal point,
1052 				 * but the rest is invalid, so bail.
1053 				 */
1054 				goto parsedone;
1055 			}
1056 		case S_FRAC:
1057 			if (((c == 'E' || c == 'e') && !ishex) ||
1058 			    ((c == 'P' || c == 'p') && ishex)) {
1059 				if (!gotmantdig)
1060 					goto parsedone;
1061 				else
1062 					state = S_EXP;
1063 			} else if ((ishex && isxdigit(c)) || isdigit(c)) {
1064 				commit = p;
1065 				gotmantdig = 1;
1066 			} else
1067 				goto parsedone;
1068 			break;
1069 		case S_EXP:
1070 			state = S_EXPDIGITS;
1071 			if (c == '-' || c == '+')
1072 				break;
1073 			else
1074 				goto reswitch;
1075 		case S_EXPDIGITS:
1076 			if (isdigit(c))
1077 				commit = p;
1078 			else
1079 				goto parsedone;
1080 			break;
1081 		default:
1082 			abort();
1083 		}
1084 		*p++ = c;
1085 		if (--fp->_r > 0)
1086 			fp->_p++;
1087 		else if (__srefill(fp))
1088 			break;	/* EOF */
1089 	}
1090 
1091 parsedone:
1092 	while (commit < --p)
1093 		__ungetc(*(u_char *)p, fp);
1094 	*++commit = '\0';
1095 	return (commit - buf);
1096 }
1097 #endif
1098