1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Copyright (c) 2011 The FreeBSD Foundation
8 *
9 * Copyright (c) 2023 Dag-Erling Smørgrav
10 *
11 * Portions of this software were developed by David Chisnall
12 * under sponsorship from the FreeBSD Foundation.
13 *
14 * This code is derived from software contributed to Berkeley by
15 * Chris Torek.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
19 * are met:
20 * 1. Redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution.
25 * 3. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 */
41
42 #include "namespace.h"
43 #include <ctype.h>
44 #include <inttypes.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <stddef.h>
48 #include <stdarg.h>
49 #include <string.h>
50 #include <wchar.h>
51 #include <wctype.h>
52 #include "un-namespace.h"
53
54 #include "collate.h"
55 #include "libc_private.h"
56 #include "local.h"
57 #include "xlocale_private.h"
58
59 #ifndef NO_FLOATING_POINT
60 #include <locale.h>
61 #endif
62
63 #define BUF 513 /* Maximum length of numeric string. */
64
65 /*
66 * Flags used during conversion.
67 */
68 #define LONG 0x01 /* l: long or double */
69 #define LONGDBL 0x02 /* L: long double */
70 #define SHORT 0x04 /* h: short */
71 #define SUPPRESS 0x08 /* *: suppress assignment */
72 #define POINTER 0x10 /* p: void * (as hex) */
73 #define NOSKIP 0x20 /* [ or c: do not skip blanks */
74 #define FASTINT 0x200 /* wfN: int_fastN_t */
75 #define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */
76 #define INTMAXT 0x800 /* j: intmax_t */
77 #define PTRDIFFT 0x1000 /* t: ptrdiff_t */
78 #define SIZET 0x2000 /* z: size_t */
79 #define SHORTSHORT 0x4000 /* hh: char */
80 #define UNSIGNED 0x8000 /* %[oupxX] conversions */
81
82 /*
83 * Conversion types.
84 */
85 #define CT_CHAR 0 /* %c conversion */
86 #define CT_CCL 1 /* %[...] conversion */
87 #define CT_STRING 2 /* %s conversion */
88 #define CT_INT 3 /* %[dioupxX] conversion */
89 #define CT_FLOAT 4 /* %[efgEFG] conversion */
90
91 static const u_char *__sccl(char *, const u_char *);
92 #ifndef NO_FLOATING_POINT
93 static int parsefloat(FILE *, char *, char *, locale_t);
94 #endif
95
96 __weak_reference(__vfscanf, vfscanf);
97
98 /*
99 * Conversion functions are passed a pointer to this object instead of
100 * a real parameter to indicate that the assignment-suppression (*)
101 * flag was specified. We could use a NULL pointer to indicate this,
102 * but that would mask bugs in applications that call scanf() with a
103 * NULL pointer.
104 */
105 static const int suppress;
106 #define SUPPRESS_PTR ((void *)&suppress)
107
108 static const mbstate_t initial_mbs;
109
110 /*
111 * The following conversion functions return the number of characters consumed,
112 * or -1 on input failure. Character class conversion returns 0 on match
113 * failure.
114 */
115
116 static __inline int
convert_char(FILE * fp,char * p,int width)117 convert_char(FILE *fp, char * p, int width)
118 {
119 int n;
120
121 if (p == SUPPRESS_PTR) {
122 size_t sum = 0;
123 for (;;) {
124 if ((n = fp->_r) < width) {
125 sum += n;
126 width -= n;
127 fp->_p += n;
128 if (__srefill(fp)) {
129 if (sum == 0)
130 return (-1);
131 break;
132 }
133 } else {
134 sum += width;
135 fp->_r -= width;
136 fp->_p += width;
137 break;
138 }
139 }
140 return (sum);
141 } else {
142 size_t r = __fread(p, 1, width, fp);
143
144 if (r == 0)
145 return (-1);
146 return (r);
147 }
148 }
149
150 static __inline int
convert_wchar(FILE * fp,wchar_t * wcp,int width,locale_t locale)151 convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale)
152 {
153 mbstate_t mbs;
154 int n, nread;
155 wint_t wi;
156
157 mbs = initial_mbs;
158 n = 0;
159 while (width-- != 0 &&
160 (wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF) {
161 if (wcp != SUPPRESS_PTR)
162 *wcp++ = (wchar_t)wi;
163 n += nread;
164 }
165 if (n == 0)
166 return (-1);
167 return (n);
168 }
169
170 static __inline int
convert_ccl(FILE * fp,char * p,int width,const char * ccltab)171 convert_ccl(FILE *fp, char * p, int width, const char *ccltab)
172 {
173 char *p0;
174 int n;
175
176 if (p == SUPPRESS_PTR) {
177 n = 0;
178 while (ccltab[*fp->_p]) {
179 n++, fp->_r--, fp->_p++;
180 if (--width == 0)
181 break;
182 if (fp->_r <= 0 && __srefill(fp)) {
183 if (n == 0)
184 return (-1);
185 break;
186 }
187 }
188 } else {
189 p0 = p;
190 while (ccltab[*fp->_p]) {
191 fp->_r--;
192 *p++ = *fp->_p++;
193 if (--width == 0)
194 break;
195 if (fp->_r <= 0 && __srefill(fp)) {
196 if (p == p0)
197 return (-1);
198 break;
199 }
200 }
201 n = p - p0;
202 if (n == 0)
203 return (0);
204 *p = 0;
205 }
206 return (n);
207 }
208
209 static __inline int
convert_wccl(FILE * fp,wchar_t * wcp,int width,const char * ccltab,locale_t locale)210 convert_wccl(FILE *fp, wchar_t *wcp, int width, const char *ccltab,
211 locale_t locale)
212 {
213 mbstate_t mbs;
214 wint_t wi;
215 int n, nread;
216
217 mbs = initial_mbs;
218 n = 0;
219 if (wcp == SUPPRESS_PTR) {
220 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
221 width-- != 0 && ccltab[wctob(wi)])
222 n += nread;
223 if (wi != WEOF)
224 __ungetwc(wi, fp, __get_locale());
225 } else {
226 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
227 width-- != 0 && ccltab[wctob(wi)]) {
228 *wcp++ = (wchar_t)wi;
229 n += nread;
230 }
231 if (wi != WEOF)
232 __ungetwc(wi, fp, __get_locale());
233 if (n == 0)
234 return (0);
235 *wcp = 0;
236 }
237 return (n);
238 }
239
240 static __inline int
convert_string(FILE * fp,char * p,int width)241 convert_string(FILE *fp, char * p, int width)
242 {
243 char *p0;
244 int n;
245
246 if (p == SUPPRESS_PTR) {
247 n = 0;
248 while (!isspace(*fp->_p)) {
249 n++, fp->_r--, fp->_p++;
250 if (--width == 0)
251 break;
252 if (fp->_r <= 0 && __srefill(fp))
253 break;
254 }
255 } else {
256 p0 = p;
257 while (!isspace(*fp->_p)) {
258 fp->_r--;
259 *p++ = *fp->_p++;
260 if (--width == 0)
261 break;
262 if (fp->_r <= 0 && __srefill(fp))
263 break;
264 }
265 *p = 0;
266 n = p - p0;
267 }
268 return (n);
269 }
270
271 static __inline int
convert_wstring(FILE * fp,wchar_t * wcp,int width,locale_t locale)272 convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
273 {
274 mbstate_t mbs;
275 wint_t wi;
276 int n, nread;
277
278 mbs = initial_mbs;
279 n = 0;
280 if (wcp == SUPPRESS_PTR) {
281 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
282 width-- != 0 && !iswspace(wi))
283 n += nread;
284 if (wi != WEOF)
285 __ungetwc(wi, fp, __get_locale());
286 } else {
287 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
288 width-- != 0 && !iswspace(wi)) {
289 *wcp++ = (wchar_t)wi;
290 n += nread;
291 }
292 if (wi != WEOF)
293 __ungetwc(wi, fp, __get_locale());
294 *wcp = '\0';
295 }
296 return (n);
297 }
298
299 enum parseint_state {
300 begin,
301 havesign,
302 havezero,
303 haveprefix,
304 any,
305 };
306
307 static __inline int
parseint_fsm(int c,enum parseint_state * state,int * base)308 parseint_fsm(int c, enum parseint_state *state, int *base)
309 {
310 switch (c) {
311 case '+':
312 case '-':
313 if (*state == begin) {
314 *state = havesign;
315 return 1;
316 }
317 break;
318 case '0':
319 if (*state == begin || *state == havesign) {
320 *state = havezero;
321 } else {
322 *state = any;
323 }
324 return 1;
325 case '1':
326 case '2':
327 case '3':
328 case '4':
329 case '5':
330 case '6':
331 case '7':
332 if (*state == havezero && *base == 0) {
333 *base = 8;
334 }
335 /* FALL THROUGH */
336 case '8':
337 case '9':
338 if (*state == begin ||
339 *state == havesign) {
340 if (*base == 0) {
341 *base = 10;
342 }
343 }
344 if (*state == begin ||
345 *state == havesign ||
346 *state == havezero ||
347 *state == haveprefix ||
348 *state == any) {
349 if (*base > c - '0') {
350 *state = any;
351 return 1;
352 }
353 }
354 break;
355 case 'b':
356 if (*state == havezero) {
357 if (*base == 0 || *base == 2) {
358 *state = haveprefix;
359 *base = 2;
360 return 1;
361 }
362 }
363 /* FALL THROUGH */
364 case 'a':
365 case 'c':
366 case 'd':
367 case 'e':
368 case 'f':
369 if (*state == begin ||
370 *state == havesign ||
371 *state == havezero ||
372 *state == haveprefix ||
373 *state == any) {
374 if (*base > c - 'a' + 10) {
375 *state = any;
376 return 1;
377 }
378 }
379 break;
380 case 'B':
381 if (*state == havezero) {
382 if (*base == 0 || *base == 2) {
383 *state = haveprefix;
384 *base = 2;
385 return 1;
386 }
387 }
388 /* FALL THROUGH */
389 case 'A':
390 case 'C':
391 case 'D':
392 case 'E':
393 case 'F':
394 if (*state == begin ||
395 *state == havesign ||
396 *state == havezero ||
397 *state == haveprefix ||
398 *state == any) {
399 if (*base > c - 'A' + 10) {
400 *state = any;
401 return 1;
402 }
403 }
404 break;
405 case 'x':
406 case 'X':
407 if (*state == havezero) {
408 if (*base == 0 || *base == 16) {
409 *state = haveprefix;
410 *base = 16;
411 return 1;
412 }
413 }
414 break;
415 }
416 return 0;
417 }
418
419 /*
420 * Read an integer, storing it in buf.
421 *
422 * Return 0 on a match failure, and the number of characters read
423 * otherwise.
424 */
425 static __inline int
parseint(FILE * fp,char * __restrict buf,int width,int base)426 parseint(FILE *fp, char * __restrict buf, int width, int base)
427 {
428 enum parseint_state state = begin;
429 char *p;
430 int c;
431
432 for (p = buf; width; width--) {
433 c = __sgetc(fp);
434 if (c == EOF)
435 break;
436 if (!parseint_fsm(c, &state, &base))
437 break;
438 *p++ = c;
439 }
440 /*
441 * If we only had a sign, push it back. If we only had a 0b or 0x
442 * prefix (possibly preceded by a sign), we view it as "0" and
443 * push back the letter. In all other cases, if we stopped
444 * because we read a non-number character, push it back.
445 */
446 if (state == havesign) {
447 p--;
448 (void) __ungetc(*(u_char *)p, fp);
449 } else if (state == haveprefix) {
450 p--;
451 (void) __ungetc(c, fp);
452 } else if (width && c != EOF) {
453 (void) __ungetc(c, fp);
454 }
455 return (p - buf);
456 }
457
458 /*
459 * __vfscanf - MT-safe version
460 */
461 int
__vfscanf(FILE * fp,char const * fmt0,va_list ap)462 __vfscanf(FILE *fp, char const *fmt0, va_list ap)
463 {
464 int ret;
465
466 FLOCKFILE_CANCELSAFE(fp);
467 ret = __svfscanf(fp, __get_locale(), fmt0, ap);
468 FUNLOCKFILE_CANCELSAFE();
469 return (ret);
470 }
471 int
vfscanf_l(FILE * fp,locale_t locale,char const * fmt0,va_list ap)472 vfscanf_l(FILE *fp, locale_t locale, char const *fmt0, va_list ap)
473 {
474 int ret;
475 FIX_LOCALE(locale);
476
477 FLOCKFILE_CANCELSAFE(fp);
478 ret = __svfscanf(fp, locale, fmt0, ap);
479 FUNLOCKFILE_CANCELSAFE();
480 return (ret);
481 }
482
483 /*
484 * __svfscanf - non-MT-safe version of __vfscanf
485 */
486 int
__svfscanf(FILE * fp,locale_t locale,const char * fmt0,va_list ap)487 __svfscanf(FILE *fp, locale_t locale, const char *fmt0, va_list ap)
488 {
489 #define GETARG(type) ((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type))
490 const u_char *fmt = (const u_char *)fmt0;
491 int c; /* character from format, or conversion */
492 size_t width; /* field width, or 0 */
493 int flags; /* flags as defined above */
494 int nassigned; /* number of fields assigned */
495 int nconversions; /* number of conversions */
496 int nr; /* characters read by the current conversion */
497 int nread; /* number of characters consumed from fp */
498 int base; /* base argument to conversion function */
499 char ccltab[256]; /* character class table for %[...] */
500 char buf[BUF]; /* buffer for numeric conversions */
501
502 ORIENT(fp, -1);
503
504 nassigned = 0;
505 nconversions = 0;
506 nread = 0;
507 for (;;) {
508 c = *fmt++;
509 if (c == 0)
510 return (nassigned);
511 if (isspace(c)) {
512 while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p))
513 nread++, fp->_r--, fp->_p++;
514 continue;
515 }
516 if (c != '%')
517 goto literal;
518 width = 0;
519 flags = 0;
520 /*
521 * switch on the format. continue if done;
522 * break once format type is derived.
523 */
524 again: c = *fmt++;
525 switch (c) {
526 case '%':
527 literal:
528 if (fp->_r <= 0 && __srefill(fp))
529 goto input_failure;
530 if (*fp->_p != c)
531 goto match_failure;
532 fp->_r--, fp->_p++;
533 nread++;
534 continue;
535
536 case '*':
537 flags |= SUPPRESS;
538 goto again;
539 case 'j':
540 flags |= INTMAXT;
541 goto again;
542 case 'l':
543 if (flags & LONG) {
544 flags &= ~LONG;
545 flags |= LONGLONG;
546 } else
547 flags |= LONG;
548 goto again;
549 case 'q':
550 flags |= LONGLONG; /* not quite */
551 goto again;
552 case 't':
553 flags |= PTRDIFFT;
554 goto again;
555 case 'w':
556 /*
557 * Fixed-width integer types. On all platforms we
558 * support, int8_t is equivalent to char, int16_t
559 * is equivalent to short, int32_t is equivalent
560 * to int, int64_t is equivalent to long long int.
561 * Furthermore, int_fast8_t, int_fast16_t and
562 * int_fast32_t are equivalent to int, and
563 * int_fast64_t is equivalent to long long int.
564 */
565 flags &= ~(SHORTSHORT|SHORT|LONG|LONGLONG|SIZET|INTMAXT|PTRDIFFT);
566 if (fmt[0] == 'f') {
567 flags |= FASTINT;
568 fmt++;
569 } else {
570 flags &= ~FASTINT;
571 }
572 if (fmt[0] == '8') {
573 if (!(flags & FASTINT))
574 flags |= SHORTSHORT;
575 else
576 /* no flag set = 32 */ ;
577 fmt += 1;
578 } else if (fmt[0] == '1' && fmt[1] == '6') {
579 if (!(flags & FASTINT))
580 flags |= SHORT;
581 else
582 /* no flag set = 32 */ ;
583 fmt += 2;
584 } else if (fmt[0] == '3' && fmt[1] == '2') {
585 /* no flag set = 32 */ ;
586 fmt += 2;
587 } else if (fmt[0] == '6' && fmt[1] == '4') {
588 flags |= LONGLONG;
589 fmt += 2;
590 } else {
591 goto match_failure;
592 }
593 goto again;
594 case 'z':
595 flags |= SIZET;
596 goto again;
597 case 'L':
598 flags |= LONGDBL;
599 goto again;
600 case 'h':
601 if (flags & SHORT) {
602 flags &= ~SHORT;
603 flags |= SHORTSHORT;
604 } else
605 flags |= SHORT;
606 goto again;
607
608 case '0': case '1': case '2': case '3': case '4':
609 case '5': case '6': case '7': case '8': case '9':
610 width = width * 10 + c - '0';
611 goto again;
612
613 /*
614 * Conversions.
615 */
616 case 'B':
617 case 'b':
618 c = CT_INT;
619 flags |= UNSIGNED;
620 base = 2;
621 break;
622
623 case 'd':
624 c = CT_INT;
625 base = 10;
626 break;
627
628 case 'i':
629 c = CT_INT;
630 base = 0;
631 break;
632
633 case 'o':
634 c = CT_INT;
635 flags |= UNSIGNED;
636 base = 8;
637 break;
638
639 case 'u':
640 c = CT_INT;
641 flags |= UNSIGNED;
642 base = 10;
643 break;
644
645 case 'X':
646 case 'x':
647 c = CT_INT;
648 flags |= UNSIGNED;
649 base = 16;
650 break;
651
652 #ifndef NO_FLOATING_POINT
653 case 'A': case 'E': case 'F': case 'G':
654 case 'a': case 'e': case 'f': case 'g':
655 c = CT_FLOAT;
656 break;
657 #endif
658
659 case 'S':
660 flags |= LONG;
661 /* FALLTHROUGH */
662 case 's':
663 c = CT_STRING;
664 break;
665
666 case '[':
667 fmt = __sccl(ccltab, fmt);
668 flags |= NOSKIP;
669 c = CT_CCL;
670 break;
671
672 case 'C':
673 flags |= LONG;
674 /* FALLTHROUGH */
675 case 'c':
676 flags |= NOSKIP;
677 c = CT_CHAR;
678 break;
679
680 case 'p': /* pointer format is like hex */
681 flags |= POINTER;
682 c = CT_INT; /* assumes sizeof(uintmax_t) */
683 flags |= UNSIGNED; /* >= sizeof(uintptr_t) */
684 base = 16;
685 break;
686
687 case 'n':
688 if (flags & SUPPRESS) /* ??? */
689 continue;
690 if (flags & SHORTSHORT)
691 *va_arg(ap, char *) = nread;
692 else if (flags & SHORT)
693 *va_arg(ap, short *) = nread;
694 else if (flags & LONG)
695 *va_arg(ap, long *) = nread;
696 else if (flags & LONGLONG)
697 *va_arg(ap, long long *) = nread;
698 else if (flags & INTMAXT)
699 *va_arg(ap, intmax_t *) = nread;
700 else if (flags & SIZET)
701 *va_arg(ap, size_t *) = nread;
702 else if (flags & PTRDIFFT)
703 *va_arg(ap, ptrdiff_t *) = nread;
704 else
705 *va_arg(ap, int *) = nread;
706 continue;
707
708 default:
709 goto match_failure;
710
711 /*
712 * Disgusting backwards compatibility hack. XXX
713 */
714 case '\0': /* compat */
715 return (EOF);
716 }
717
718 /*
719 * We have a conversion that requires input.
720 */
721 if (fp->_r <= 0 && __srefill(fp))
722 goto input_failure;
723
724 /*
725 * Consume leading white space, except for formats
726 * that suppress this.
727 */
728 if ((flags & NOSKIP) == 0) {
729 while (isspace(*fp->_p)) {
730 nread++;
731 if (--fp->_r > 0)
732 fp->_p++;
733 else if (__srefill(fp))
734 goto input_failure;
735 }
736 /*
737 * Note that there is at least one character in
738 * the buffer, so conversions that do not set NOSKIP
739 * ca no longer result in an input failure.
740 */
741 }
742
743 /*
744 * Do the conversion.
745 */
746 switch (c) {
747
748 case CT_CHAR:
749 /* scan arbitrary characters (sets NOSKIP) */
750 if (width == 0)
751 width = 1;
752 if (flags & LONG) {
753 nr = convert_wchar(fp, GETARG(wchar_t *),
754 width, locale);
755 } else {
756 nr = convert_char(fp, GETARG(char *), width);
757 }
758 if (nr < 0)
759 goto input_failure;
760 break;
761
762 case CT_CCL:
763 /* scan a (nonempty) character class (sets NOSKIP) */
764 if (width == 0)
765 width = (size_t)~0; /* `infinity' */
766 if (flags & LONG) {
767 nr = convert_wccl(fp, GETARG(wchar_t *), width,
768 ccltab, locale);
769 } else {
770 nr = convert_ccl(fp, GETARG(char *), width,
771 ccltab);
772 }
773 if (nr <= 0) {
774 if (nr < 0)
775 goto input_failure;
776 else /* nr == 0 */
777 goto match_failure;
778 }
779 break;
780
781 case CT_STRING:
782 /* like CCL, but zero-length string OK, & no NOSKIP */
783 if (width == 0)
784 width = (size_t)~0;
785 if (flags & LONG) {
786 nr = convert_wstring(fp, GETARG(wchar_t *),
787 width, locale);
788 } else {
789 nr = convert_string(fp, GETARG(char *), width);
790 }
791 if (nr < 0)
792 goto input_failure;
793 break;
794
795 case CT_INT:
796 /* scan an integer as if by the conversion function */
797 #ifdef hardway
798 if (width == 0 || width > sizeof(buf) - 1)
799 width = sizeof(buf) - 1;
800 #else
801 /* size_t is unsigned, hence this optimisation */
802 if (--width > sizeof(buf) - 2)
803 width = sizeof(buf) - 2;
804 width++;
805 #endif
806 nr = parseint(fp, buf, width, base);
807 if (nr == 0)
808 goto match_failure;
809 if ((flags & SUPPRESS) == 0) {
810 uintmax_t res;
811
812 buf[nr] = '\0';
813 if ((flags & UNSIGNED) == 0)
814 res = strtoimax_l(buf, (char **)NULL, base, locale);
815 else
816 res = strtoumax_l(buf, (char **)NULL, base, locale);
817 if (flags & POINTER)
818 *va_arg(ap, void **) =
819 (void *)(uintptr_t)res;
820 else if (flags & SHORTSHORT)
821 *va_arg(ap, char *) = res;
822 else if (flags & SHORT)
823 *va_arg(ap, short *) = res;
824 else if (flags & LONG)
825 *va_arg(ap, long *) = res;
826 else if (flags & LONGLONG)
827 *va_arg(ap, long long *) = res;
828 else if (flags & INTMAXT)
829 *va_arg(ap, intmax_t *) = res;
830 else if (flags & PTRDIFFT)
831 *va_arg(ap, ptrdiff_t *) = res;
832 else if (flags & SIZET)
833 *va_arg(ap, size_t *) = res;
834 else
835 *va_arg(ap, int *) = res;
836 }
837 break;
838
839 #ifndef NO_FLOATING_POINT
840 case CT_FLOAT:
841 /* scan a floating point number as if by strtod */
842 if (width == 0 || width > sizeof(buf) - 1)
843 width = sizeof(buf) - 1;
844 nr = parsefloat(fp, buf, buf + width, locale);
845 if (nr == 0)
846 goto match_failure;
847 if ((flags & SUPPRESS) == 0) {
848 if (flags & LONGDBL) {
849 long double res = strtold_l(buf, NULL,
850 locale);
851 *va_arg(ap, long double *) = res;
852 } else if (flags & LONG) {
853 double res = strtod_l(buf, NULL,
854 locale);
855 *va_arg(ap, double *) = res;
856 } else {
857 float res = strtof_l(buf, NULL, locale);
858 *va_arg(ap, float *) = res;
859 }
860 }
861 break;
862 #endif /* !NO_FLOATING_POINT */
863 }
864 if (!(flags & SUPPRESS))
865 nassigned++;
866 nread += nr;
867 nconversions++;
868 }
869 input_failure:
870 return (nconversions != 0 ? nassigned : EOF);
871 match_failure:
872 return (nassigned);
873 }
874
875 /*
876 * Fill in the given table from the scanset at the given format
877 * (just after `['). Return a pointer to the character past the
878 * closing `]'. The table has a 1 wherever characters should be
879 * considered part of the scanset.
880 */
881 static const u_char *
__sccl(char * tab,const u_char * fmt)882 __sccl(char *tab, const u_char *fmt)
883 {
884 int c, n, v, i;
885 struct xlocale_collate *table =
886 (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
887
888 /* first `clear' the whole table */
889 c = *fmt++; /* first char hat => negated scanset */
890 if (c == '^') {
891 v = 1; /* default => accept */
892 c = *fmt++; /* get new first char */
893 } else
894 v = 0; /* default => reject */
895
896 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */
897 (void) memset(tab, v, 256);
898
899 if (c == 0)
900 return (fmt - 1);/* format ended before closing ] */
901
902 /*
903 * Now set the entries corresponding to the actual scanset
904 * to the opposite of the above.
905 *
906 * The first character may be ']' (or '-') without being special;
907 * the last character may be '-'.
908 */
909 v = 1 - v;
910 for (;;) {
911 tab[c] = v; /* take character c */
912 doswitch:
913 n = *fmt++; /* and examine the next */
914 switch (n) {
915
916 case 0: /* format ended too soon */
917 return (fmt - 1);
918
919 case '-':
920 /*
921 * A scanset of the form
922 * [01+-]
923 * is defined as `the digit 0, the digit 1,
924 * the character +, the character -', but
925 * the effect of a scanset such as
926 * [a-zA-Z0-9]
927 * is implementation defined. The V7 Unix
928 * scanf treats `a-z' as `the letters a through
929 * z', but treats `a-a' as `the letter a, the
930 * character -, and the letter a'.
931 *
932 * For compatibility, the `-' is not considered
933 * to define a range if the character following
934 * it is either a close bracket (required by ANSI)
935 * or is not numerically greater than the character
936 * we just stored in the table (c).
937 */
938 n = *fmt;
939 if (n == ']'
940 || (table->__collate_load_error ? n < c :
941 __collate_range_cmp(n, c) < 0
942 )
943 ) {
944 c = '-';
945 break; /* resume the for(;;) */
946 }
947 fmt++;
948 /* fill in the range */
949 if (table->__collate_load_error) {
950 do {
951 tab[++c] = v;
952 } while (c < n);
953 } else {
954 for (i = 0; i < 256; i ++)
955 if (__collate_range_cmp(c, i) <= 0 &&
956 __collate_range_cmp(i, n) <= 0
957 )
958 tab[i] = v;
959 }
960 #if 1 /* XXX another disgusting compatibility hack */
961 c = n;
962 /*
963 * Alas, the V7 Unix scanf also treats formats
964 * such as [a-c-e] as `the letters a through e'.
965 * This too is permitted by the standard....
966 */
967 goto doswitch;
968 #else
969 c = *fmt++;
970 if (c == 0)
971 return (fmt - 1);
972 if (c == ']')
973 return (fmt);
974 #endif
975 break;
976
977 case ']': /* end of scanset */
978 return (fmt);
979
980 default: /* just another character */
981 c = n;
982 break;
983 }
984 }
985 /* NOTREACHED */
986 }
987
988 #ifndef NO_FLOATING_POINT
989 static int
parsefloat(FILE * fp,char * buf,char * end,locale_t locale)990 parsefloat(FILE *fp, char *buf, char *end, locale_t locale)
991 {
992 char *commit, *p;
993 int infnanpos = 0, decptpos = 0;
994 enum {
995 S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX,
996 S_DIGITS, S_DECPT, S_FRAC, S_EXP, S_EXPDIGITS
997 } state = S_START;
998 unsigned char c;
999 const char *decpt = localeconv_l(locale)->decimal_point;
1000 _Bool gotmantdig = 0, ishex = 0;
1001
1002 /*
1003 * We set commit = p whenever the string we have read so far
1004 * constitutes a valid representation of a floating point
1005 * number by itself. At some point, the parse will complete
1006 * or fail, and we will ungetc() back to the last commit point.
1007 * To ensure that the file offset gets updated properly, it is
1008 * always necessary to read at least one character that doesn't
1009 * match; thus, we can't short-circuit "infinity" or "nan(...)".
1010 */
1011 commit = buf - 1;
1012 for (p = buf; p < end; ) {
1013 c = *fp->_p;
1014 reswitch:
1015 switch (state) {
1016 case S_START:
1017 state = S_GOTSIGN;
1018 if (c == '-' || c == '+')
1019 break;
1020 else
1021 goto reswitch;
1022 case S_GOTSIGN:
1023 switch (c) {
1024 case '0':
1025 state = S_MAYBEHEX;
1026 commit = p;
1027 break;
1028 case 'I':
1029 case 'i':
1030 state = S_INF;
1031 break;
1032 case 'N':
1033 case 'n':
1034 state = S_NAN;
1035 break;
1036 default:
1037 state = S_DIGITS;
1038 goto reswitch;
1039 }
1040 break;
1041 case S_INF:
1042 if (infnanpos > 6 ||
1043 (c != "nfinity"[infnanpos] &&
1044 c != "NFINITY"[infnanpos]))
1045 goto parsedone;
1046 if (infnanpos == 1 || infnanpos == 6)
1047 commit = p; /* inf or infinity */
1048 infnanpos++;
1049 break;
1050 case S_NAN:
1051 switch (infnanpos) {
1052 case 0:
1053 if (c != 'A' && c != 'a')
1054 goto parsedone;
1055 break;
1056 case 1:
1057 if (c != 'N' && c != 'n')
1058 goto parsedone;
1059 else
1060 commit = p;
1061 break;
1062 case 2:
1063 if (c != '(')
1064 goto parsedone;
1065 break;
1066 default:
1067 if (c == ')') {
1068 commit = p;
1069 state = S_DONE;
1070 } else if (!isalnum(c) && c != '_')
1071 goto parsedone;
1072 break;
1073 }
1074 infnanpos++;
1075 break;
1076 case S_DONE:
1077 goto parsedone;
1078 case S_MAYBEHEX:
1079 state = S_DIGITS;
1080 if (c == 'X' || c == 'x') {
1081 ishex = 1;
1082 break;
1083 } else { /* we saw a '0', but no 'x' */
1084 gotmantdig = 1;
1085 goto reswitch;
1086 }
1087 case S_DIGITS:
1088 if ((ishex && isxdigit(c)) || isdigit(c)) {
1089 gotmantdig = 1;
1090 commit = p;
1091 break;
1092 } else {
1093 state = S_DECPT;
1094 goto reswitch;
1095 }
1096 case S_DECPT:
1097 if (c == decpt[decptpos]) {
1098 if (decpt[++decptpos] == '\0') {
1099 /* We read the complete decpt seq. */
1100 state = S_FRAC;
1101 if (gotmantdig)
1102 commit = p;
1103 }
1104 break;
1105 } else if (!decptpos) {
1106 /* We didn't read any decpt characters. */
1107 state = S_FRAC;
1108 goto reswitch;
1109 } else {
1110 /*
1111 * We read part of a multibyte decimal point,
1112 * but the rest is invalid, so bail.
1113 */
1114 goto parsedone;
1115 }
1116 case S_FRAC:
1117 if (((c == 'E' || c == 'e') && !ishex) ||
1118 ((c == 'P' || c == 'p') && ishex)) {
1119 if (!gotmantdig)
1120 goto parsedone;
1121 else
1122 state = S_EXP;
1123 } else if ((ishex && isxdigit(c)) || isdigit(c)) {
1124 commit = p;
1125 gotmantdig = 1;
1126 } else
1127 goto parsedone;
1128 break;
1129 case S_EXP:
1130 state = S_EXPDIGITS;
1131 if (c == '-' || c == '+')
1132 break;
1133 else
1134 goto reswitch;
1135 case S_EXPDIGITS:
1136 if (isdigit(c))
1137 commit = p;
1138 else
1139 goto parsedone;
1140 break;
1141 default:
1142 abort();
1143 }
1144 *p++ = c;
1145 if (--fp->_r > 0)
1146 fp->_p++;
1147 else if (__srefill(fp))
1148 break; /* EOF */
1149 }
1150
1151 parsedone:
1152 while (commit < --p)
1153 __ungetc(*(u_char *)p, fp);
1154 *++commit = '\0';
1155 return (commit - buf);
1156 }
1157 #endif
1158