1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Copyright (c) 2011 The FreeBSD Foundation
8 *
9 * Copyright (c) 2023 Dag-Erling Smørgrav
10 *
11 * Portions of this software were developed by David Chisnall
12 * under sponsorship from the FreeBSD Foundation.
13 *
14 * This code is derived from software contributed to Berkeley by
15 * Chris Torek.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
19 * are met:
20 * 1. Redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution.
25 * 3. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 */
41
42 #include "namespace.h"
43 #include <ctype.h>
44 #include <inttypes.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <stddef.h>
48 #include <stdarg.h>
49 #include <string.h>
50 #include <wchar.h>
51 #include <wctype.h>
52 #include "un-namespace.h"
53
54 #include "collate.h"
55 #include "libc_private.h"
56 #include "local.h"
57 #include "xlocale_private.h"
58
59 #ifndef NO_FLOATING_POINT
60 #include <locale.h>
61 #endif
62
63 #define BUF 513 /* Maximum length of numeric string. */
64
65 /*
66 * Flags used during conversion.
67 */
68 #define LONG 0x01 /* l: long or double */
69 #define LONGDBL 0x02 /* L: long double */
70 #define SHORT 0x04 /* h: short */
71 #define SUPPRESS 0x08 /* *: suppress assignment */
72 #define POINTER 0x10 /* p: void * (as hex) */
73 #define NOSKIP 0x20 /* [ or c: do not skip blanks */
74 #define FASTINT 0x200 /* wfN: int_fastN_t */
75 #define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */
76 #define INTMAXT 0x800 /* j: intmax_t */
77 #define PTRDIFFT 0x1000 /* t: ptrdiff_t */
78 #define SIZET 0x2000 /* z: size_t */
79 #define SHORTSHORT 0x4000 /* hh: char */
80 #define UNSIGNED 0x8000 /* %[oupxX] conversions */
81
82 /*
83 * Conversion types.
84 */
85 #define CT_CHAR 0 /* %c conversion */
86 #define CT_CCL 1 /* %[...] conversion */
87 #define CT_STRING 2 /* %s conversion */
88 #define CT_INT 3 /* %[dioupxX] conversion */
89 #define CT_FLOAT 4 /* %[efgEFG] conversion */
90
91 static const u_char *__sccl(char *, const u_char *);
92 #ifndef NO_FLOATING_POINT
93 static int parsefloat(FILE *, char *, char *, locale_t);
94 #endif
95
96 __weak_reference(__vfscanf, vfscanf);
97
98 /*
99 * Conversion functions are passed a pointer to this object instead of
100 * a real parameter to indicate that the assignment-suppression (*)
101 * flag was specified. We could use a NULL pointer to indicate this,
102 * but that would mask bugs in applications that call scanf() with a
103 * NULL pointer.
104 */
105 static const int suppress;
106 #define SUPPRESS_PTR ((void *)&suppress)
107
108 static const mbstate_t initial_mbs;
109
110 /*
111 * The following conversion functions return the number of characters consumed,
112 * or -1 on input failure. Character class conversion returns 0 on match
113 * failure.
114 */
115
116 static __inline int
convert_char(FILE * fp,char * p,int width)117 convert_char(FILE *fp, char * p, int width)
118 {
119 int n;
120
121 if (p == SUPPRESS_PTR) {
122 size_t sum = 0;
123 for (;;) {
124 if ((n = fp->_r) < width) {
125 sum += n;
126 width -= n;
127 fp->_p += n;
128 if (__srefill(fp)) {
129 if (sum == 0)
130 return (-1);
131 break;
132 }
133 } else {
134 sum += width;
135 fp->_r -= width;
136 fp->_p += width;
137 break;
138 }
139 }
140 return (sum);
141 } else {
142 size_t r = __fread(p, 1, width, fp);
143
144 if (r == 0)
145 return (-1);
146 return (r);
147 }
148 }
149
150 static __inline int
convert_wchar(FILE * fp,wchar_t * wcp,int width,locale_t locale)151 convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale)
152 {
153 mbstate_t mbs;
154 int n, nread;
155 wint_t wi;
156
157 mbs = initial_mbs;
158 n = 0;
159 while (width-- != 0 &&
160 (wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF) {
161 if (wcp != SUPPRESS_PTR)
162 *wcp++ = (wchar_t)wi;
163 n += nread;
164 }
165 if (n == 0)
166 return (-1);
167 return (n);
168 }
169
170 static __inline int
convert_ccl(FILE * fp,char * p,int width,const char * ccltab)171 convert_ccl(FILE *fp, char * p, int width, const char *ccltab)
172 {
173 char *p0;
174 int n;
175
176 if (p == SUPPRESS_PTR) {
177 n = 0;
178 while (ccltab[*fp->_p]) {
179 n++, fp->_r--, fp->_p++;
180 if (--width == 0)
181 break;
182 if (fp->_r <= 0 && __srefill(fp)) {
183 if (n == 0)
184 return (-1);
185 break;
186 }
187 }
188 } else {
189 p0 = p;
190 while (ccltab[*fp->_p]) {
191 fp->_r--;
192 *p++ = *fp->_p++;
193 if (--width == 0)
194 break;
195 if (fp->_r <= 0 && __srefill(fp)) {
196 if (p == p0)
197 return (-1);
198 break;
199 }
200 }
201 n = p - p0;
202 if (n == 0)
203 return (0);
204 *p = 0;
205 }
206 return (n);
207 }
208
209 static __inline int
convert_wccl(FILE * fp,wchar_t * wcp,int width,const char * ccltab,locale_t locale)210 convert_wccl(FILE *fp, wchar_t *wcp, int width, const char *ccltab,
211 locale_t locale)
212 {
213 mbstate_t mbs;
214 wint_t wi;
215 int n, nread;
216
217 mbs = initial_mbs;
218 n = 0;
219 if (wcp == SUPPRESS_PTR) {
220 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
221 width-- != 0 && ccltab[wctob(wi)])
222 n += nread;
223 if (wi != WEOF)
224 __ungetwc(wi, fp, __get_locale());
225 } else {
226 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
227 width-- != 0 && ccltab[wctob(wi)]) {
228 *wcp++ = (wchar_t)wi;
229 n += nread;
230 }
231 if (wi != WEOF)
232 __ungetwc(wi, fp, __get_locale());
233 if (n == 0)
234 return (0);
235 *wcp = 0;
236 }
237 return (n);
238 }
239
240 static __inline int
convert_string(FILE * fp,char * p,int width)241 convert_string(FILE *fp, char * p, int width)
242 {
243 char *p0;
244 int n;
245
246 if (p == SUPPRESS_PTR) {
247 n = 0;
248 while (!isspace(*fp->_p)) {
249 n++, fp->_r--, fp->_p++;
250 if (--width == 0)
251 break;
252 if (fp->_r <= 0 && __srefill(fp))
253 break;
254 }
255 } else {
256 p0 = p;
257 while (!isspace(*fp->_p)) {
258 fp->_r--;
259 *p++ = *fp->_p++;
260 if (--width == 0)
261 break;
262 if (fp->_r <= 0 && __srefill(fp))
263 break;
264 }
265 *p = 0;
266 n = p - p0;
267 }
268 return (n);
269 }
270
271 static __inline int
convert_wstring(FILE * fp,wchar_t * wcp,int width,locale_t locale)272 convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
273 {
274 mbstate_t mbs;
275 wint_t wi;
276 int n, nread;
277
278 mbs = initial_mbs;
279 n = 0;
280 if (wcp == SUPPRESS_PTR) {
281 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
282 width-- != 0 && !iswspace(wi))
283 n += nread;
284 if (wi != WEOF)
285 __ungetwc(wi, fp, __get_locale());
286 } else {
287 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
288 width-- != 0 && !iswspace(wi)) {
289 *wcp++ = (wchar_t)wi;
290 n += nread;
291 }
292 if (wi != WEOF)
293 __ungetwc(wi, fp, __get_locale());
294 *wcp = '\0';
295 }
296 return (n);
297 }
298
299 enum parseint_state {
300 begin,
301 havesign,
302 havezero,
303 haveprefix,
304 any,
305 };
306
307 static __inline int
parseint_fsm(int c,enum parseint_state * state,int * base)308 parseint_fsm(int c, enum parseint_state *state, int *base)
309 {
310 switch (c) {
311 case '+':
312 case '-':
313 if (*state == begin) {
314 *state = havesign;
315 return 1;
316 }
317 break;
318 case '0':
319 if (*state == begin || *state == havesign) {
320 *state = havezero;
321 return 1;
322 }
323 /* FALL THROUGH */
324 case '1':
325 case '2':
326 case '3':
327 case '4':
328 case '5':
329 case '6':
330 case '7':
331 if (*state == havezero && *base == 0) {
332 *base = 8;
333 }
334 /* FALL THROUGH */
335 case '8':
336 case '9':
337 if (*state == begin ||
338 *state == havesign) {
339 if (*base == 0) {
340 *base = 10;
341 }
342 }
343 if (*state == begin ||
344 *state == havesign ||
345 *state == havezero ||
346 *state == haveprefix ||
347 *state == any) {
348 if (*base > c - '0') {
349 *state = any;
350 return 1;
351 }
352 }
353 break;
354 case 'b':
355 if (*state == havezero) {
356 if (*base == 0 || *base == 2) {
357 *state = haveprefix;
358 *base = 2;
359 return 1;
360 }
361 }
362 /* FALL THROUGH */
363 case 'a':
364 case 'c':
365 case 'd':
366 case 'e':
367 case 'f':
368 if (*state == begin ||
369 *state == havesign ||
370 *state == havezero ||
371 *state == haveprefix ||
372 *state == any) {
373 if (*base > c - 'a' + 10) {
374 *state = any;
375 return 1;
376 }
377 }
378 break;
379 case 'B':
380 if (*state == havezero) {
381 if (*base == 0 || *base == 2) {
382 *state = haveprefix;
383 *base = 2;
384 return 1;
385 }
386 }
387 /* FALL THROUGH */
388 case 'A':
389 case 'C':
390 case 'D':
391 case 'E':
392 case 'F':
393 if (*state == begin ||
394 *state == havesign ||
395 *state == havezero ||
396 *state == haveprefix ||
397 *state == any) {
398 if (*base > c - 'A' + 10) {
399 *state = any;
400 return 1;
401 }
402 }
403 break;
404 case 'x':
405 case 'X':
406 if (*state == havezero) {
407 if (*base == 0 || *base == 16) {
408 *state = haveprefix;
409 *base = 16;
410 return 1;
411 }
412 }
413 break;
414 }
415 return 0;
416 }
417
418 /*
419 * Read an integer, storing it in buf.
420 *
421 * Return 0 on a match failure, and the number of characters read
422 * otherwise.
423 */
424 static __inline int
parseint(FILE * fp,char * __restrict buf,int width,int base)425 parseint(FILE *fp, char * __restrict buf, int width, int base)
426 {
427 enum parseint_state state = begin;
428 char *p;
429 int c;
430
431 for (p = buf; width; width--) {
432 c = __sgetc(fp);
433 if (c == EOF)
434 break;
435 if (!parseint_fsm(c, &state, &base))
436 break;
437 *p++ = c;
438 }
439 /*
440 * If we only had a sign, push it back. If we only had a 0b or 0x
441 * prefix (possibly preceded by a sign), we view it as "0" and
442 * push back the letter. In all other cases, if we stopped
443 * because we read a non-number character, push it back.
444 */
445 if (state == havesign) {
446 p--;
447 (void) __ungetc(*(u_char *)p, fp);
448 } else if (state == haveprefix) {
449 p--;
450 (void) __ungetc(c, fp);
451 } else if (width && c != EOF) {
452 (void) __ungetc(c, fp);
453 }
454 return (p - buf);
455 }
456
457 /*
458 * __vfscanf - MT-safe version
459 */
460 int
__vfscanf(FILE * fp,char const * fmt0,va_list ap)461 __vfscanf(FILE *fp, char const *fmt0, va_list ap)
462 {
463 int ret;
464
465 FLOCKFILE_CANCELSAFE(fp);
466 ret = __svfscanf(fp, __get_locale(), fmt0, ap);
467 FUNLOCKFILE_CANCELSAFE();
468 return (ret);
469 }
470 int
vfscanf_l(FILE * fp,locale_t locale,char const * fmt0,va_list ap)471 vfscanf_l(FILE *fp, locale_t locale, char const *fmt0, va_list ap)
472 {
473 int ret;
474 FIX_LOCALE(locale);
475
476 FLOCKFILE_CANCELSAFE(fp);
477 ret = __svfscanf(fp, locale, fmt0, ap);
478 FUNLOCKFILE_CANCELSAFE();
479 return (ret);
480 }
481
482 /*
483 * __svfscanf - non-MT-safe version of __vfscanf
484 */
485 int
__svfscanf(FILE * fp,locale_t locale,const char * fmt0,va_list ap)486 __svfscanf(FILE *fp, locale_t locale, const char *fmt0, va_list ap)
487 {
488 #define GETARG(type) ((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type))
489 const u_char *fmt = (const u_char *)fmt0;
490 int c; /* character from format, or conversion */
491 size_t width; /* field width, or 0 */
492 int flags; /* flags as defined above */
493 int nassigned; /* number of fields assigned */
494 int nconversions; /* number of conversions */
495 int nr; /* characters read by the current conversion */
496 int nread; /* number of characters consumed from fp */
497 int base; /* base argument to conversion function */
498 char ccltab[256]; /* character class table for %[...] */
499 char buf[BUF]; /* buffer for numeric conversions */
500
501 ORIENT(fp, -1);
502
503 nassigned = 0;
504 nconversions = 0;
505 nread = 0;
506 for (;;) {
507 c = *fmt++;
508 if (c == 0)
509 return (nassigned);
510 if (isspace(c)) {
511 while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p))
512 nread++, fp->_r--, fp->_p++;
513 continue;
514 }
515 if (c != '%')
516 goto literal;
517 width = 0;
518 flags = 0;
519 /*
520 * switch on the format. continue if done;
521 * break once format type is derived.
522 */
523 again: c = *fmt++;
524 switch (c) {
525 case '%':
526 literal:
527 if (fp->_r <= 0 && __srefill(fp))
528 goto input_failure;
529 if (*fp->_p != c)
530 goto match_failure;
531 fp->_r--, fp->_p++;
532 nread++;
533 continue;
534
535 case '*':
536 flags |= SUPPRESS;
537 goto again;
538 case 'j':
539 flags |= INTMAXT;
540 goto again;
541 case 'l':
542 if (flags & LONG) {
543 flags &= ~LONG;
544 flags |= LONGLONG;
545 } else
546 flags |= LONG;
547 goto again;
548 case 'q':
549 flags |= LONGLONG; /* not quite */
550 goto again;
551 case 't':
552 flags |= PTRDIFFT;
553 goto again;
554 case 'w':
555 /*
556 * Fixed-width integer types. On all platforms we
557 * support, int8_t is equivalent to char, int16_t
558 * is equivalent to short, int32_t is equivalent
559 * to int, int64_t is equivalent to long long int.
560 * Furthermore, int_fast8_t, int_fast16_t and
561 * int_fast32_t are equivalent to int, and
562 * int_fast64_t is equivalent to long long int.
563 */
564 flags &= ~(SHORTSHORT|SHORT|LONG|LONGLONG|SIZET|INTMAXT|PTRDIFFT);
565 if (fmt[0] == 'f') {
566 flags |= FASTINT;
567 fmt++;
568 } else {
569 flags &= ~FASTINT;
570 }
571 if (fmt[0] == '8') {
572 if (!(flags & FASTINT))
573 flags |= SHORTSHORT;
574 else
575 /* no flag set = 32 */ ;
576 fmt += 1;
577 } else if (fmt[0] == '1' && fmt[1] == '6') {
578 if (!(flags & FASTINT))
579 flags |= SHORT;
580 else
581 /* no flag set = 32 */ ;
582 fmt += 2;
583 } else if (fmt[0] == '3' && fmt[1] == '2') {
584 /* no flag set = 32 */ ;
585 fmt += 2;
586 } else if (fmt[0] == '6' && fmt[1] == '4') {
587 flags |= LONGLONG;
588 fmt += 2;
589 } else {
590 goto match_failure;
591 }
592 goto again;
593 case 'z':
594 flags |= SIZET;
595 goto again;
596 case 'L':
597 flags |= LONGDBL;
598 goto again;
599 case 'h':
600 if (flags & SHORT) {
601 flags &= ~SHORT;
602 flags |= SHORTSHORT;
603 } else
604 flags |= SHORT;
605 goto again;
606
607 case '0': case '1': case '2': case '3': case '4':
608 case '5': case '6': case '7': case '8': case '9':
609 width = width * 10 + c - '0';
610 goto again;
611
612 /*
613 * Conversions.
614 */
615 case 'B':
616 case 'b':
617 c = CT_INT;
618 flags |= UNSIGNED;
619 base = 2;
620 break;
621
622 case 'd':
623 c = CT_INT;
624 base = 10;
625 break;
626
627 case 'i':
628 c = CT_INT;
629 base = 0;
630 break;
631
632 case 'o':
633 c = CT_INT;
634 flags |= UNSIGNED;
635 base = 8;
636 break;
637
638 case 'u':
639 c = CT_INT;
640 flags |= UNSIGNED;
641 base = 10;
642 break;
643
644 case 'X':
645 case 'x':
646 c = CT_INT;
647 flags |= UNSIGNED;
648 base = 16;
649 break;
650
651 #ifndef NO_FLOATING_POINT
652 case 'A': case 'E': case 'F': case 'G':
653 case 'a': case 'e': case 'f': case 'g':
654 c = CT_FLOAT;
655 break;
656 #endif
657
658 case 'S':
659 flags |= LONG;
660 /* FALLTHROUGH */
661 case 's':
662 c = CT_STRING;
663 break;
664
665 case '[':
666 fmt = __sccl(ccltab, fmt);
667 flags |= NOSKIP;
668 c = CT_CCL;
669 break;
670
671 case 'C':
672 flags |= LONG;
673 /* FALLTHROUGH */
674 case 'c':
675 flags |= NOSKIP;
676 c = CT_CHAR;
677 break;
678
679 case 'p': /* pointer format is like hex */
680 flags |= POINTER;
681 c = CT_INT; /* assumes sizeof(uintmax_t) */
682 flags |= UNSIGNED; /* >= sizeof(uintptr_t) */
683 base = 16;
684 break;
685
686 case 'n':
687 if (flags & SUPPRESS) /* ??? */
688 continue;
689 if (flags & SHORTSHORT)
690 *va_arg(ap, char *) = nread;
691 else if (flags & SHORT)
692 *va_arg(ap, short *) = nread;
693 else if (flags & LONG)
694 *va_arg(ap, long *) = nread;
695 else if (flags & LONGLONG)
696 *va_arg(ap, long long *) = nread;
697 else if (flags & INTMAXT)
698 *va_arg(ap, intmax_t *) = nread;
699 else if (flags & SIZET)
700 *va_arg(ap, size_t *) = nread;
701 else if (flags & PTRDIFFT)
702 *va_arg(ap, ptrdiff_t *) = nread;
703 else
704 *va_arg(ap, int *) = nread;
705 continue;
706
707 default:
708 goto match_failure;
709
710 /*
711 * Disgusting backwards compatibility hack. XXX
712 */
713 case '\0': /* compat */
714 return (EOF);
715 }
716
717 /*
718 * We have a conversion that requires input.
719 */
720 if (fp->_r <= 0 && __srefill(fp))
721 goto input_failure;
722
723 /*
724 * Consume leading white space, except for formats
725 * that suppress this.
726 */
727 if ((flags & NOSKIP) == 0) {
728 while (isspace(*fp->_p)) {
729 nread++;
730 if (--fp->_r > 0)
731 fp->_p++;
732 else if (__srefill(fp))
733 goto input_failure;
734 }
735 /*
736 * Note that there is at least one character in
737 * the buffer, so conversions that do not set NOSKIP
738 * ca no longer result in an input failure.
739 */
740 }
741
742 /*
743 * Do the conversion.
744 */
745 switch (c) {
746
747 case CT_CHAR:
748 /* scan arbitrary characters (sets NOSKIP) */
749 if (width == 0)
750 width = 1;
751 if (flags & LONG) {
752 nr = convert_wchar(fp, GETARG(wchar_t *),
753 width, locale);
754 } else {
755 nr = convert_char(fp, GETARG(char *), width);
756 }
757 if (nr < 0)
758 goto input_failure;
759 break;
760
761 case CT_CCL:
762 /* scan a (nonempty) character class (sets NOSKIP) */
763 if (width == 0)
764 width = (size_t)~0; /* `infinity' */
765 if (flags & LONG) {
766 nr = convert_wccl(fp, GETARG(wchar_t *), width,
767 ccltab, locale);
768 } else {
769 nr = convert_ccl(fp, GETARG(char *), width,
770 ccltab);
771 }
772 if (nr <= 0) {
773 if (nr < 0)
774 goto input_failure;
775 else /* nr == 0 */
776 goto match_failure;
777 }
778 break;
779
780 case CT_STRING:
781 /* like CCL, but zero-length string OK, & no NOSKIP */
782 if (width == 0)
783 width = (size_t)~0;
784 if (flags & LONG) {
785 nr = convert_wstring(fp, GETARG(wchar_t *),
786 width, locale);
787 } else {
788 nr = convert_string(fp, GETARG(char *), width);
789 }
790 if (nr < 0)
791 goto input_failure;
792 break;
793
794 case CT_INT:
795 /* scan an integer as if by the conversion function */
796 #ifdef hardway
797 if (width == 0 || width > sizeof(buf) - 1)
798 width = sizeof(buf) - 1;
799 #else
800 /* size_t is unsigned, hence this optimisation */
801 if (--width > sizeof(buf) - 2)
802 width = sizeof(buf) - 2;
803 width++;
804 #endif
805 nr = parseint(fp, buf, width, base);
806 if (nr == 0)
807 goto match_failure;
808 if ((flags & SUPPRESS) == 0) {
809 uintmax_t res;
810
811 buf[nr] = '\0';
812 if ((flags & UNSIGNED) == 0)
813 res = strtoimax_l(buf, (char **)NULL, base, locale);
814 else
815 res = strtoumax_l(buf, (char **)NULL, base, locale);
816 if (flags & POINTER)
817 *va_arg(ap, void **) =
818 (void *)(uintptr_t)res;
819 else if (flags & SHORTSHORT)
820 *va_arg(ap, char *) = res;
821 else if (flags & SHORT)
822 *va_arg(ap, short *) = res;
823 else if (flags & LONG)
824 *va_arg(ap, long *) = res;
825 else if (flags & LONGLONG)
826 *va_arg(ap, long long *) = res;
827 else if (flags & INTMAXT)
828 *va_arg(ap, intmax_t *) = res;
829 else if (flags & PTRDIFFT)
830 *va_arg(ap, ptrdiff_t *) = res;
831 else if (flags & SIZET)
832 *va_arg(ap, size_t *) = res;
833 else
834 *va_arg(ap, int *) = res;
835 }
836 break;
837
838 #ifndef NO_FLOATING_POINT
839 case CT_FLOAT:
840 /* scan a floating point number as if by strtod */
841 if (width == 0 || width > sizeof(buf) - 1)
842 width = sizeof(buf) - 1;
843 nr = parsefloat(fp, buf, buf + width, locale);
844 if (nr == 0)
845 goto match_failure;
846 if ((flags & SUPPRESS) == 0) {
847 if (flags & LONGDBL) {
848 long double res = strtold_l(buf, NULL,
849 locale);
850 *va_arg(ap, long double *) = res;
851 } else if (flags & LONG) {
852 double res = strtod_l(buf, NULL,
853 locale);
854 *va_arg(ap, double *) = res;
855 } else {
856 float res = strtof_l(buf, NULL, locale);
857 *va_arg(ap, float *) = res;
858 }
859 }
860 break;
861 #endif /* !NO_FLOATING_POINT */
862 }
863 if (!(flags & SUPPRESS))
864 nassigned++;
865 nread += nr;
866 nconversions++;
867 }
868 input_failure:
869 return (nconversions != 0 ? nassigned : EOF);
870 match_failure:
871 return (nassigned);
872 }
873
874 /*
875 * Fill in the given table from the scanset at the given format
876 * (just after `['). Return a pointer to the character past the
877 * closing `]'. The table has a 1 wherever characters should be
878 * considered part of the scanset.
879 */
880 static const u_char *
__sccl(char * tab,const u_char * fmt)881 __sccl(char *tab, const u_char *fmt)
882 {
883 int c, n, v, i;
884 struct xlocale_collate *table =
885 (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
886
887 /* first `clear' the whole table */
888 c = *fmt++; /* first char hat => negated scanset */
889 if (c == '^') {
890 v = 1; /* default => accept */
891 c = *fmt++; /* get new first char */
892 } else
893 v = 0; /* default => reject */
894
895 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */
896 (void) memset(tab, v, 256);
897
898 if (c == 0)
899 return (fmt - 1);/* format ended before closing ] */
900
901 /*
902 * Now set the entries corresponding to the actual scanset
903 * to the opposite of the above.
904 *
905 * The first character may be ']' (or '-') without being special;
906 * the last character may be '-'.
907 */
908 v = 1 - v;
909 for (;;) {
910 tab[c] = v; /* take character c */
911 doswitch:
912 n = *fmt++; /* and examine the next */
913 switch (n) {
914
915 case 0: /* format ended too soon */
916 return (fmt - 1);
917
918 case '-':
919 /*
920 * A scanset of the form
921 * [01+-]
922 * is defined as `the digit 0, the digit 1,
923 * the character +, the character -', but
924 * the effect of a scanset such as
925 * [a-zA-Z0-9]
926 * is implementation defined. The V7 Unix
927 * scanf treats `a-z' as `the letters a through
928 * z', but treats `a-a' as `the letter a, the
929 * character -, and the letter a'.
930 *
931 * For compatibility, the `-' is not considered
932 * to define a range if the character following
933 * it is either a close bracket (required by ANSI)
934 * or is not numerically greater than the character
935 * we just stored in the table (c).
936 */
937 n = *fmt;
938 if (n == ']'
939 || (table->__collate_load_error ? n < c :
940 __collate_range_cmp(n, c) < 0
941 )
942 ) {
943 c = '-';
944 break; /* resume the for(;;) */
945 }
946 fmt++;
947 /* fill in the range */
948 if (table->__collate_load_error) {
949 do {
950 tab[++c] = v;
951 } while (c < n);
952 } else {
953 for (i = 0; i < 256; i ++)
954 if (__collate_range_cmp(c, i) <= 0 &&
955 __collate_range_cmp(i, n) <= 0
956 )
957 tab[i] = v;
958 }
959 #if 1 /* XXX another disgusting compatibility hack */
960 c = n;
961 /*
962 * Alas, the V7 Unix scanf also treats formats
963 * such as [a-c-e] as `the letters a through e'.
964 * This too is permitted by the standard....
965 */
966 goto doswitch;
967 #else
968 c = *fmt++;
969 if (c == 0)
970 return (fmt - 1);
971 if (c == ']')
972 return (fmt);
973 #endif
974 break;
975
976 case ']': /* end of scanset */
977 return (fmt);
978
979 default: /* just another character */
980 c = n;
981 break;
982 }
983 }
984 /* NOTREACHED */
985 }
986
987 #ifndef NO_FLOATING_POINT
988 static int
parsefloat(FILE * fp,char * buf,char * end,locale_t locale)989 parsefloat(FILE *fp, char *buf, char *end, locale_t locale)
990 {
991 char *commit, *p;
992 int infnanpos = 0, decptpos = 0;
993 enum {
994 S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX,
995 S_DIGITS, S_DECPT, S_FRAC, S_EXP, S_EXPDIGITS
996 } state = S_START;
997 unsigned char c;
998 const char *decpt = localeconv_l(locale)->decimal_point;
999 _Bool gotmantdig = 0, ishex = 0;
1000
1001 /*
1002 * We set commit = p whenever the string we have read so far
1003 * constitutes a valid representation of a floating point
1004 * number by itself. At some point, the parse will complete
1005 * or fail, and we will ungetc() back to the last commit point.
1006 * To ensure that the file offset gets updated properly, it is
1007 * always necessary to read at least one character that doesn't
1008 * match; thus, we can't short-circuit "infinity" or "nan(...)".
1009 */
1010 commit = buf - 1;
1011 for (p = buf; p < end; ) {
1012 c = *fp->_p;
1013 reswitch:
1014 switch (state) {
1015 case S_START:
1016 state = S_GOTSIGN;
1017 if (c == '-' || c == '+')
1018 break;
1019 else
1020 goto reswitch;
1021 case S_GOTSIGN:
1022 switch (c) {
1023 case '0':
1024 state = S_MAYBEHEX;
1025 commit = p;
1026 break;
1027 case 'I':
1028 case 'i':
1029 state = S_INF;
1030 break;
1031 case 'N':
1032 case 'n':
1033 state = S_NAN;
1034 break;
1035 default:
1036 state = S_DIGITS;
1037 goto reswitch;
1038 }
1039 break;
1040 case S_INF:
1041 if (infnanpos > 6 ||
1042 (c != "nfinity"[infnanpos] &&
1043 c != "NFINITY"[infnanpos]))
1044 goto parsedone;
1045 if (infnanpos == 1 || infnanpos == 6)
1046 commit = p; /* inf or infinity */
1047 infnanpos++;
1048 break;
1049 case S_NAN:
1050 switch (infnanpos) {
1051 case 0:
1052 if (c != 'A' && c != 'a')
1053 goto parsedone;
1054 break;
1055 case 1:
1056 if (c != 'N' && c != 'n')
1057 goto parsedone;
1058 else
1059 commit = p;
1060 break;
1061 case 2:
1062 if (c != '(')
1063 goto parsedone;
1064 break;
1065 default:
1066 if (c == ')') {
1067 commit = p;
1068 state = S_DONE;
1069 } else if (!isalnum(c) && c != '_')
1070 goto parsedone;
1071 break;
1072 }
1073 infnanpos++;
1074 break;
1075 case S_DONE:
1076 goto parsedone;
1077 case S_MAYBEHEX:
1078 state = S_DIGITS;
1079 if (c == 'X' || c == 'x') {
1080 ishex = 1;
1081 break;
1082 } else { /* we saw a '0', but no 'x' */
1083 gotmantdig = 1;
1084 goto reswitch;
1085 }
1086 case S_DIGITS:
1087 if ((ishex && isxdigit(c)) || isdigit(c)) {
1088 gotmantdig = 1;
1089 commit = p;
1090 break;
1091 } else {
1092 state = S_DECPT;
1093 goto reswitch;
1094 }
1095 case S_DECPT:
1096 if (c == decpt[decptpos]) {
1097 if (decpt[++decptpos] == '\0') {
1098 /* We read the complete decpt seq. */
1099 state = S_FRAC;
1100 if (gotmantdig)
1101 commit = p;
1102 }
1103 break;
1104 } else if (!decptpos) {
1105 /* We didn't read any decpt characters. */
1106 state = S_FRAC;
1107 goto reswitch;
1108 } else {
1109 /*
1110 * We read part of a multibyte decimal point,
1111 * but the rest is invalid, so bail.
1112 */
1113 goto parsedone;
1114 }
1115 case S_FRAC:
1116 if (((c == 'E' || c == 'e') && !ishex) ||
1117 ((c == 'P' || c == 'p') && ishex)) {
1118 if (!gotmantdig)
1119 goto parsedone;
1120 else
1121 state = S_EXP;
1122 } else if ((ishex && isxdigit(c)) || isdigit(c)) {
1123 commit = p;
1124 gotmantdig = 1;
1125 } else
1126 goto parsedone;
1127 break;
1128 case S_EXP:
1129 state = S_EXPDIGITS;
1130 if (c == '-' || c == '+')
1131 break;
1132 else
1133 goto reswitch;
1134 case S_EXPDIGITS:
1135 if (isdigit(c))
1136 commit = p;
1137 else
1138 goto parsedone;
1139 break;
1140 default:
1141 abort();
1142 }
1143 *p++ = c;
1144 if (--fp->_r > 0)
1145 fp->_p++;
1146 else if (__srefill(fp))
1147 break; /* EOF */
1148 }
1149
1150 parsedone:
1151 while (commit < --p)
1152 __ungetc(*(u_char *)p, fp);
1153 *++commit = '\0';
1154 return (commit - buf);
1155 }
1156 #endif
1157