1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1988 AT&T */
28 /* All Rights Reserved */
29
30 #include "lint.h"
31 #include <sys/types.h>
32 #include "mtlib.h"
33 #include "file64.h"
34 #include <stdio.h>
35 #include <ctype.h>
36 #include <stdarg.h>
37 #include <values.h>
38 #include <errno.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <math.h>
42 #include <thread.h>
43 #include <synch.h>
44 #include <stdlib.h>
45 #include <fnmatch.h>
46 #include <limits.h>
47 #include <wchar.h>
48 #include <unistd.h>
49 #include "libc.h"
50 #include "stdiom.h"
51 #include "xpg6.h"
52
53 #define NCHARS (1 << BITSPERBYTE)
54
55 /* if the _IOWRT flag is set, this must be a call from sscanf */
56 #define locgetc(cnt) (cnt += 1, (iop->_flag & _IOWRT) ? \
57 ((*iop->_ptr == '\0') ? EOF : *iop->_ptr++) : \
58 GETC(iop))
59 #define locungetc(cnt, x) (cnt -= 1, (x == EOF) ? EOF : \
60 ((iop->_flag & _IOWRT) ? *(--iop->_ptr) : \
61 (++iop->_cnt, *(--iop->_ptr))))
62
63 #define wlocgetc() ((iop->_flag & _IOWRT) ? \
64 ((*iop->_ptr == '\0') ? EOF : *iop->_ptr++) : \
65 GETC(iop))
66 #define wlocungetc(x) ((x == EOF) ? EOF : \
67 ((iop->_flag & _IOWRT) ? *(--iop->_ptr) : \
68 UNGETC(x, iop)))
69
70 #define MAXARGS 30 /* max. number of args for fast positional paramters */
71
72 /*
73 * stva_list is used to subvert C's restriction that a variable with an
74 * array type can not appear on the left hand side of an assignment operator.
75 * By putting the array inside a structure, the functionality of assigning to
76 * the whole array through a simple assignment is achieved..
77 */
78 typedef struct stva_list {
79 va_list ap;
80 } stva_list;
81
82 static int number(int *, int *, int, int, int, int, FILE *, va_list *);
83 static int readchar(FILE *, int *);
84 static int string(int *, int *, int, int, int, char *, FILE *, va_list *);
85 static int wstring(int *, int *, int, int, int, FILE *, va_list *);
86 static int wbrstring(int *, int *, int, int, int, FILE *,
87 unsigned char *, va_list *);
88 #ifdef _WIDE
89 static int brstring(int *, int *, int, int, int, FILE *,
90 unsigned char *, va_list *);
91 #endif
92 static int _bi_getwc(FILE *);
93 static int _bi_ungetwc(wint_t, FILE *);
94
95 #ifdef _WIDE
96 static int _mkarglst(const wchar_t *, stva_list, stva_list[]);
97 static wint_t _wd_getwc(int *, FILE *);
98 static wint_t _wd_ungetwc(int *, wchar_t, FILE *);
99 static int _watoi(wchar_t *);
100 #else /* _WIDE */
101 static int _mkarglst(const char *, stva_list, stva_list[]);
102 #endif /* _WIDE */
103
104 #ifndef _WIDE
105 int
_doscan(FILE * iop,const char * fmt,va_list va_Alist)106 _doscan(FILE *iop, const char *fmt, va_list va_Alist)
107 {
108 int ret;
109 rmutex_t *lk;
110
111 if (iop->_flag & _IOWRT)
112 ret = __doscan_u(iop, fmt, va_Alist, 0);
113 else {
114 FLOCKFILE(lk, iop);
115 ret = __doscan_u(iop, fmt, va_Alist, 0);
116 FUNLOCKFILE(lk);
117 }
118 return (ret);
119 }
120 #endif /* _WIDE */
121
122 /* ARGSUSED3 */
123 #ifdef _WIDE
124 int
__wdoscan_u(FILE * iop,const wchar_t * fmt,va_list va_Alist,int scflag __unused)125 __wdoscan_u(FILE *iop, const wchar_t *fmt, va_list va_Alist,
126 int scflag __unused)
127 #else /* _WIDE */
128 int
129 __doscan_u(FILE *iop, const char *sfmt, va_list va_Alist, int scflag __unused)
130 #endif /* _WIDE */
131 {
132 #ifdef _WIDE
133 wchar_t ch;
134 wchar_t inchar, size;
135 int nmatch = 0, len, stow;
136 #else /* _WIDE */
137 int ch;
138 int nmatch = 0, len, inchar, stow, size;
139 #endif /* _WIDE */
140
141 unsigned char *bracket_str = NULL;
142 int chcount, flag_eof;
143 char tab[NCHARS];
144
145 /* variables for postional parameters */
146 #ifdef _WIDE
147 const wchar_t *sformat = fmt; /* save the beginning of the format */
148 #else /* _WIDE */
149 const unsigned char *fmt = (const unsigned char *)sfmt;
150 const char *sformat = sfmt; /* save the beginning of the format */
151 #endif /* _WIDE */
152 int fpos = 1; /* 1 if first postional parameter */
153 stva_list args; /* used to step through the argument list */
154 stva_list sargs; /* used to save start of the argument list */
155 stva_list arglst[MAXARGS];
156 /*
157 * array giving the appropriate values
158 * for va_arg() to retrieve the
159 * corresponding argument:
160 * arglst[0] is the first argument
161 * arglst[1] is the second argument,etc.
162 */
163 /* Check if readable stream */
164 if (!(iop->_flag & (_IOREAD | _IORW))) {
165 errno = EBADF;
166 return (EOF);
167 }
168
169 /*
170 * Initialize args and sargs to the start of the argument list.
171 * We don't know any portable way to copy an arbitrary C object
172 * so we use a system-specific routine(probably a macro) from
173 * stdarg.h. (Remember that if va_list is an array, in_args will
174 * be a pointer and &in_args won't be what we would want for
175 * memcpy.)
176 */
177 va_copy(args.ap, va_Alist);
178
179 sargs = args;
180
181 chcount = 0; flag_eof = 0;
182
183 /*
184 * ****************************************************
185 * Main loop: reads format to determine a pattern,
186 * and then goes to read input stream
187 * in attempt to match the pattern.
188 * ****************************************************
189 */
190 for (; ; ) {
191 if ((ch = *fmt++) == '\0') {
192 return (nmatch); /* end of format */
193 }
194 #ifdef _WIDE
195 if (iswspace(ch)) {
196 if (!flag_eof) {
197 while (iswspace(inchar =
198 _wd_getwc(&chcount, iop)))
199 ;
200 if (_wd_ungetwc(&chcount, inchar, iop) == WEOF)
201 flag_eof = 1;
202 }
203 continue;
204 }
205 if (ch != '%' || (ch = *fmt++) == '%') {
206 if (ch == '%') {
207 if (!flag_eof) {
208 while (iswspace(inchar =
209 _wd_getwc(&chcount, iop)))
210 ;
211 if (_wd_ungetwc(&chcount, inchar, iop)
212 == WEOF)
213 flag_eof = 1;
214 }
215 }
216 if ((inchar = _wd_getwc(&chcount, iop)) == ch)
217 continue;
218 if (_wd_ungetwc(&chcount, inchar, iop) != WEOF) {
219 return (nmatch); /* failed to match input */
220 }
221 break;
222 }
223 #else /* _WIDE */
224 if (isspace(ch)) {
225 if (!flag_eof) {
226 while (isspace(inchar = locgetc(chcount)))
227 ;
228 if (locungetc(chcount, inchar) == EOF)
229 flag_eof = 1;
230
231 }
232 continue;
233 }
234 if (ch != '%' || (ch = *fmt++) == '%') {
235 if (ch == '%') {
236 if (!flag_eof) {
237 while (isspace(inchar =
238 locgetc(chcount)))
239 ;
240 if (locungetc(chcount, inchar) == EOF)
241 flag_eof = 1;
242 }
243 }
244 if ((inchar = locgetc(chcount)) == ch)
245 continue;
246 if (locungetc(chcount, inchar) != EOF) {
247 return (nmatch); /* failed to match input */
248 }
249 break;
250 }
251 #endif /* _WIDE */
252
253 charswitch: /* target of a goto 8-( */
254
255 if (ch == '*') {
256 stow = 0;
257 ch = *fmt++;
258 } else
259 stow = 1;
260
261 #ifdef _WIDE
262 for (len = 0; ((ch >= 0) && (ch < 256) && isdigit(ch));
263 ch = *fmt++)
264 len = len * 10 + ch - '0';
265 #else /* _WIDE */
266 for (len = 0; isdigit(ch); ch = *fmt++)
267 len = len * 10 + ch - '0';
268 #endif /* _WIDE */
269
270 if (ch == '$') {
271 /*
272 * positional parameter handling - the number
273 * specified in len gives the argument to which
274 * the next conversion should be applied.
275 * WARNING: This implementation of positional
276 * parameters assumes that the sizes of all pointer
277 * types are the same. (Code similar to that
278 * in the portable doprnt.c should be used if this
279 * assumption does not hold for a particular
280 * port.)
281 */
282 if (fpos) {
283 if (_mkarglst(sformat, sargs, arglst) != 0) {
284 return (EOF);
285 } else {
286 fpos = 0;
287 }
288 }
289 if (len <= MAXARGS) {
290 args = arglst[len - 1];
291 } else {
292 args = arglst[MAXARGS - 1];
293 for (len -= MAXARGS; len > 0; len--)
294 (void) va_arg(args.ap, void *);
295 }
296 len = 0;
297 ch = *fmt++;
298 goto charswitch;
299 }
300
301 if (len == 0)
302 len = MAXINT;
303 #ifdef _WIDE
304 if ((size = ch) == 'l' || (size == 'h') || (size == 'L') ||
305 (size == 'j') || (size == 't') || (size == 'z'))
306 ch = *fmt++;
307 #else /* _WIDE */
308 if ((size = ch) == 'l' || (size == 'h') || (size == 'L') ||
309 (size == 'w') || (size == 'j') || (size == 't') ||
310 (size == 'z'))
311 ch = *fmt++;
312 #endif /* _WIDE */
313 if (size == 'l' && ch == 'l') {
314 size = 'm'; /* size = 'm' if long long */
315 ch = *fmt++;
316 } else if (size == 'h' && ch == 'h') {
317 size = 'b'; /* use size = 'b' if char */
318 ch = *fmt++;
319 } else if ((size == 't') || (size == 'z')) {
320 size = 'l';
321 } else if (size == 'j') {
322 #ifndef _LP64
323 /* check scflag for size of u/intmax_t (32-bit libc) */
324 if (!(scflag & _F_INTMAX32)) {
325 #endif
326 size = 'm';
327 #ifndef _LP64
328 }
329 #endif
330 }
331 if (ch == '\0') {
332 return (EOF); /* unexpected end of format */
333 }
334 #ifdef _WIDE
335 if (ch == '[') {
336 wchar_t c;
337 size_t len;
338 int negflg = 0;
339 wchar_t *p;
340 wchar_t *wbracket_str;
341 size_t wlen, clen;
342
343 /* p points to the address of '[' */
344 p = (wchar_t *)fmt - 1;
345 len = 0;
346 if (*fmt == '^') {
347 len++;
348 fmt++;
349 negflg = 1;
350 }
351 if (((c = *fmt) == ']') || (c == '-')) {
352 len++;
353 fmt++;
354 }
355 while ((c = *fmt) != ']') {
356 if (c == '\0') {
357 return (EOF); /* unexpected EOF */
358 } else {
359 len++;
360 fmt++;
361 }
362 }
363 fmt++;
364 len += 2;
365 wbracket_str = (wchar_t *)
366 malloc(sizeof (wchar_t) * (len + 1));
367 if (wbracket_str == NULL) {
368 errno = ENOMEM;
369 return (EOF);
370 } else {
371 (void) wmemcpy(wbracket_str,
372 (const wchar_t *)p, len);
373 *(wbracket_str + len) = L'\0';
374 if (negflg && *(wbracket_str + 1) == '^') {
375 *(wbracket_str + 1) = L'!';
376 }
377 }
378 wlen = wcslen(wbracket_str);
379 clen = wcstombs((char *)NULL, wbracket_str, 0);
380 if (clen == (size_t)-1) {
381 free(wbracket_str);
382 return (EOF);
383 }
384 bracket_str = (unsigned char *)
385 malloc(sizeof (unsigned char) * (clen + 1));
386 if (bracket_str == NULL) {
387 free(wbracket_str);
388 errno = ENOMEM;
389 return (EOF);
390 }
391 clen = wcstombs((char *)bracket_str, wbracket_str,
392 wlen + 1);
393 free(wbracket_str);
394 if (clen == (size_t)-1) {
395 free(bracket_str);
396 return (EOF);
397 }
398 }
399 #else /* _WIDE */
400 if (ch == '[') {
401 if (size == 'l') {
402 int c, len, i;
403 int negflg = 0;
404 unsigned char *p;
405
406 p = (unsigned char *)(fmt - 1);
407 len = 0;
408 if (*fmt == '^') {
409 len++;
410 fmt++;
411 negflg = 1;
412 }
413 if (((c = *fmt) == ']') || (c == '-')) {
414 len++;
415 fmt++;
416 }
417 while ((c = *fmt) != ']') {
418 if (c == '\0') {
419 return (EOF);
420 } else if (isascii(c)) {
421 len++;
422 fmt++;
423 } else {
424 i = mblen((const char *)fmt,
425 MB_CUR_MAX);
426 if (i <= 0) {
427 return (EOF);
428 } else {
429 len += i;
430 fmt += i;
431 }
432 }
433 }
434 fmt++;
435 len += 2;
436 bracket_str = (unsigned char *)
437 malloc(sizeof (unsigned char) * (len + 1));
438 if (bracket_str == NULL) {
439 errno = ENOMEM;
440 return (EOF);
441 } else {
442 (void) strncpy((char *)bracket_str,
443 (const char *)p, len);
444 *(bracket_str + len) = '\0';
445 if (negflg &&
446 *(bracket_str + 1) == '^') {
447 *(bracket_str + 1) = '!';
448 }
449 }
450 } else {
451 int t = 0;
452 int b, c, d;
453
454 if (*fmt == '^') {
455 t++;
456 fmt++;
457 }
458 (void) memset(tab, !t, NCHARS);
459 if ((c = *fmt) == ']' || c == '-') {
460 tab[c] = t;
461 fmt++;
462 }
463
464 while ((c = *fmt) != ']') {
465 if (c == '\0') {
466 return (EOF);
467 }
468 b = *(fmt - 1);
469 d = *(fmt + 1);
470 if ((c == '-') && (d != ']') &&
471 (b < d)) {
472 (void) memset(&tab[b], t,
473 d - b + 1);
474 fmt += 2;
475 } else {
476 tab[c] = t;
477 fmt++;
478 }
479 }
480 fmt++;
481 }
482 }
483 #endif /* _WIDE */
484
485 #ifdef _WIDE
486 if ((ch >= 0) && (ch < 256) &&
487 isupper((int)ch)) { /* no longer documented */
488 if (_lib_version == c_issue_4) {
489 if (size != 'm' && size != 'L')
490 size = 'l';
491 }
492 ch = _tolower((int)ch);
493 }
494 if (ch != 'n' && !flag_eof) {
495 if (ch != 'c' && ch != 'C' && ch != '[') {
496 while (iswspace(inchar =
497 _wd_getwc(&chcount, iop)))
498 ;
499 if (_wd_ungetwc(&chcount, inchar, iop) == WEOF)
500 break;
501
502 }
503 }
504 #else /* _WIDE */
505 if (isupper(ch)) { /* no longer documented */
506 if (_lib_version == c_issue_4) {
507 if (size != 'm' && size != 'L')
508 size = 'l';
509 }
510 ch = _tolower(ch);
511 }
512 if (ch != 'n' && !flag_eof) {
513 if (ch != 'c' && ch != 'C' && ch != '[') {
514 while (isspace(inchar = locgetc(chcount)))
515 ;
516 if (locungetc(chcount, inchar) == EOF)
517 break;
518 }
519 }
520 #endif /* _WIDE */
521
522 switch (ch) {
523 case 'C':
524 case 'S':
525 case 'c':
526 case 's':
527 #ifdef _WIDE
528 if ((size == 'l') || (size == 'C') || (size == 'S'))
529 #else /* _WIDE */
530 if ((size == 'w') || (size == 'l') || (size == 'C') ||
531 (size == 'S'))
532 #endif /* _WIDE */
533 {
534 size = wstring(&chcount, &flag_eof, stow,
535 (int)ch, len, iop, &args.ap);
536 } else {
537 size = string(&chcount, &flag_eof, stow,
538 (int)ch, len, tab, iop, &args.ap);
539 }
540 break;
541 case '[':
542 if (size == 'l') {
543 size = wbrstring(&chcount, &flag_eof, stow,
544 (int)ch, len, iop, bracket_str, &args.ap);
545 free(bracket_str);
546 bracket_str = NULL;
547 } else {
548 #ifdef _WIDE
549 size = brstring(&chcount, &flag_eof, stow,
550 (int)ch, len, iop, bracket_str, &args.ap);
551 free(bracket_str);
552 bracket_str = NULL;
553 #else /* _WIDE */
554 size = string(&chcount, &flag_eof, stow,
555 ch, len, tab, iop, &args.ap);
556 #endif /* _WIDE */
557 }
558 break;
559
560 case 'n':
561 if (stow == 0)
562 continue;
563 if (size == 'b') /* char */
564 *va_arg(args.ap, char *) = (char)chcount;
565 else if (size == 'h')
566 *va_arg(args.ap, short *) = (short)chcount;
567 else if (size == 'l')
568 *va_arg(args.ap, long *) = (long)chcount;
569 else if (size == 'm') /* long long */
570 *va_arg(args.ap, long long *) =
571 (long long) chcount;
572 else
573 *va_arg(args.ap, int *) = (int)chcount;
574 continue;
575
576 case 'i':
577 default:
578 size = number(&chcount, &flag_eof, stow, (int)ch,
579 len, (int)size, iop, &args.ap);
580 break;
581 }
582 if (size)
583 nmatch += stow;
584 else {
585 return ((flag_eof && !nmatch) ? EOF : nmatch);
586 }
587 continue;
588 }
589 if (bracket_str)
590 free(bracket_str);
591 return (nmatch != 0 ? nmatch : EOF); /* end of input */
592 }
593
594 /* ****************************************************************** */
595 /* Functions to read the input stream in an attempt to match incoming */
596 /* data to the current pattern from the main loop of _doscan(). */
597 /* ****************************************************************** */
598 static int
number(int * chcount,int * flag_eof,int stow,int type,int len,int size,FILE * iop,va_list * listp)599 number(int *chcount, int *flag_eof, int stow, int type, int len, int size,
600 FILE *iop, va_list *listp)
601 {
602 char numbuf[64];
603 char *np = numbuf;
604 int c, base, inchar, lookahead;
605 int digitseen = 0, floater = 0, negflg = 0;
606 int lc;
607 long long lcval = 0LL;
608
609 switch (type) {
610 case 'e':
611 case 'f':
612 case 'g':
613 /*
614 * lc = 0 corresponds to c90 mode: do not recognize
615 * hexadecimal fp strings; attempt to push back
616 * all unused characters read
617 *
618 * lc = -1 corresponds to c99 mode: recognize hexa-
619 * decimal fp strings; push back at most one
620 * unused character
621 */
622 lc = (__xpg6 & _C99SUSv3_recognize_hexfp)? -1 : 0;
623 floater = 1;
624 break;
625
626 case 'a':
627 lc = -1;
628 floater = 1;
629 break;
630
631 case 'd':
632 case 'u':
633 case 'i':
634 base = 10;
635 break;
636 case 'o':
637 base = 8;
638 break;
639 case 'p':
640 #ifdef _LP64
641 size = 'l'; /* pointers are long in LP64 */
642 #endif /* _LP64 */
643 /* FALLTHROUGH */
644 case 'x':
645 base = 16;
646 break;
647 default:
648 return (0); /* unrecognized conversion character */
649 }
650
651 if (floater != 0) {
652 /*
653 * Handle floating point with
654 * file_to_decimal.
655 */
656 decimal_mode dm;
657 decimal_record dr;
658 fp_exception_field_type efs;
659 enum decimal_string_form form;
660 char *echar;
661 int nread;
662 char buffer[1024+1];
663 char *nb = buffer;
664
665 if (len > 1024)
666 len = 1024;
667 file_to_decimal(&nb, len, lc, &dr, &form, &echar, iop, &nread);
668 if (lc == -1) {
669 /*
670 * In C99 mode, the entire string read has to be
671 * accepted in order to qualify as a match
672 */
673 if (nb != buffer + nread)
674 form = invalid_form;
675 }
676 if (stow && (form != invalid_form)) {
677 #if defined(__sparc)
678 dm.rd = _QgetRD();
679 if (size == 'L') { /* long double */
680 if ((int)form < 0)
681 __hex_to_quadruple(&dr, dm.rd,
682 va_arg(*listp, quadruple *), &efs);
683 else
684 decimal_to_quadruple(
685 va_arg(*listp, quadruple *),
686 &dm, &dr, &efs);
687 }
688 #elif defined(__i386) || defined(__amd64)
689 dm.rd = __xgetRD();
690 if (size == 'L') { /* long double */
691 if ((int)form < 0)
692 __hex_to_extended(&dr, dm.rd,
693 va_arg(*listp, extended *), &efs);
694 else
695 decimal_to_extended(
696 va_arg(*listp, extended *),
697 &dm, &dr, &efs);
698 }
699 #else
700 #error Unknown architecture
701 #endif
702 else if (size == 'l') { /* double */
703 if ((int)form < 0)
704 __hex_to_double(&dr, dm.rd,
705 va_arg(*listp, double *), &efs);
706 else
707 decimal_to_double(
708 va_arg(*listp, double *),
709 &dm, &dr, &efs);
710 } else { /* float */
711 if ((int)form < 0)
712 __hex_to_single(&dr, dm.rd,
713 va_arg(*listp, single *), &efs);
714 else
715 decimal_to_single((single *)
716 va_arg(*listp, single *),
717 &dm, &dr, &efs);
718 }
719 if ((efs & (1 << fp_overflow)) != 0) {
720 errno = ERANGE;
721 }
722 if ((efs & (1 << fp_underflow)) != 0) {
723 errno = ERANGE;
724 }
725 }
726 (*chcount) += nread; /* Count characters read. */
727 c = locgetc((*chcount));
728 if (locungetc((*chcount), c) == EOF)
729 *flag_eof = 1;
730 return ((form == invalid_form) ? 0 : 1);
731 /* successful match if non-zero */
732 }
733
734 switch (c = locgetc((*chcount))) {
735 case '-':
736 negflg++;
737 /* FALLTHROUGH */
738 case '+':
739 if (--len <= 0)
740 break;
741 if ((c = locgetc((*chcount))) != '0')
742 break;
743 /* FALLTHROUGH */
744 case '0':
745 /*
746 * If %i or %x, the characters 0x or 0X may optionally precede
747 * the sequence of letters and digits (base 16).
748 */
749 if ((type != 'i' && type != 'x') || (len <= 1))
750 break;
751 if (((inchar = locgetc((*chcount))) == 'x') ||
752 (inchar == 'X')) {
753 lookahead = readchar(iop, chcount);
754 if (isxdigit(lookahead)) {
755 base = 16;
756
757 if (len <= 2) {
758 (void) locungetc((*chcount), lookahead);
759 /* Take into account the 'x' */
760 len -= 1;
761 } else {
762 c = lookahead;
763 /* Take into account '0x' */
764 len -= 2;
765 }
766 } else {
767 (void) locungetc((*chcount), lookahead);
768 (void) locungetc((*chcount), inchar);
769 }
770 } else {
771 /* inchar wans't 'x'. */
772 (void) locungetc((*chcount), inchar); /* Put it back. */
773 if (type == 'i') /* Only %i accepts an octal. */
774 base = 8;
775 }
776 }
777 for (; --len >= 0; *np++ = (char)c, c = locgetc((*chcount))) {
778 if (np > numbuf + 62) {
779 errno = ERANGE;
780 return (0);
781 }
782 if (isdigit(c) || base == 16 && isxdigit(c)) {
783 int digit = c - (isdigit(c) ? '0' :
784 isupper(c) ? 'A' - 10 : 'a' - 10);
785 if (digit >= base)
786 break;
787 if (stow)
788 lcval = base * lcval + digit;
789 digitseen++;
790 continue;
791 }
792 break;
793 }
794
795 if (stow && digitseen) {
796 /* suppress possible overflow on 2's-comp negation */
797 if (negflg && lcval != (1ULL << 63))
798 lcval = -lcval;
799 switch (size) {
800 case 'm':
801 *va_arg(*listp, long long *) = lcval;
802 break;
803 case 'l':
804 *va_arg(*listp, long *) = (long)lcval;
805 break;
806 case 'h':
807 *va_arg(*listp, short *) = (short)lcval;
808 break;
809 case 'b':
810 *va_arg(*listp, char *) = (char)lcval;
811 break;
812 default:
813 *va_arg(*listp, int *) = (int)lcval;
814 break;
815 }
816 }
817 if (locungetc((*chcount), c) == EOF)
818 *flag_eof = 1;
819 return (digitseen); /* successful match if non-zero */
820 }
821
822 /* Get a character. If not using sscanf and at the buffer's end */
823 /* then do a direct read(). Characters read via readchar() */
824 /* can be pushed back on the input stream by locungetc((*chcount),) */
825 /* since there is padding allocated at the end of the stream buffer. */
826 static int
readchar(FILE * iop,int * chcount)827 readchar(FILE *iop, int *chcount)
828 {
829 int inchar;
830 char buf[1];
831
832 if ((iop->_flag & _IOWRT) || (iop->_cnt != 0)) {
833 inchar = locgetc((*chcount));
834 } else {
835 if (_xread(iop, buf, 1) != 1)
836 return (EOF);
837 inchar = (int)buf[0];
838 (*chcount) += 1;
839 }
840 return (inchar);
841 }
842
843 static int
string(int * chcount,int * flag_eof,int stow,int type,int len,char * tab __unused,FILE * iop,va_list * listp)844 string(int *chcount, int *flag_eof, int stow, int type, int len,
845 char *tab __unused, FILE *iop, va_list *listp)
846 {
847 int ch;
848 char *ptr;
849 char *start;
850
851 start = ptr = stow ? va_arg(*listp, char *) : NULL;
852 if (((type == 'c') || (type == 'C')) && len == MAXINT)
853 len = 1;
854 #ifdef _WIDE
855 while ((ch = locgetc((*chcount))) != EOF &&
856 !(((type == 's') || (type == 'S')) && isspace(ch))) {
857 #else /* _WIDE */
858 while ((ch = locgetc((*chcount))) != EOF &&
859 !(((type == 's') || (type == 'S')) &&
860 isspace(ch) || type == '[' && tab[ch])) {
861 #endif /* _WIDE */
862 if (stow)
863 *ptr = (char)ch;
864 ptr++;
865 if (--len <= 0)
866 break;
867 }
868 if (ch == EOF) {
869 (*flag_eof) = 1;
870 (*chcount) -= 1;
871 } else if (len > 0 && locungetc((*chcount), ch) == EOF)
872 (*flag_eof) = 1;
873 if (ptr == start)
874 return (0); /* no match */
875 if (stow && ((type != 'c') && (type != 'C')))
876 *ptr = '\0';
877 return (1); /* successful match */
878 }
879
880 /* This function initializes arglst, to contain the appropriate */
881 /* va_list values for the first MAXARGS arguments. */
882 /* WARNING: this code assumes that the sizes of all pointer types */
883 /* are the same. (Code similar to that in the portable doprnt.c */
884 /* should be used if this assumption is not true for a */
885 /* particular port.) */
886
887 #ifdef _WIDE
888 static int
889 _mkarglst(const wchar_t *fmt, stva_list args, stva_list arglst[])
890 #else /* _WIDE */
891 static int
892 _mkarglst(const char *fmt, stva_list args, stva_list arglst[])
893 #endif /* _WIDE */
894 {
895 #ifdef _WIDE
896 #define STRCHR wcschr
897 #define STRSPN wcsspn
898 #define ATOI(x) _watoi((wchar_t *)x)
899 #define SPNSTR1 L"01234567890"
900 #define SPNSTR2 L"# +-.0123456789hL$"
901 #else /* _WIDE */
902 #define STRCHR strchr
903 #define STRSPN strspn
904 #define ATOI(x) atoi(x)
905 #define SPNSTR1 "01234567890"
906 #define SPNSTR2 "# +-.0123456789hL$"
907 #endif /* _WIDE */
908
909 int maxnum, curargno;
910 size_t n;
911
912 maxnum = -1;
913 curargno = 0;
914
915 while ((fmt = STRCHR(fmt, '%')) != NULL) {
916 fmt++; /* skip % */
917 if (*fmt == '*' || *fmt == '%')
918 continue;
919 if (fmt[n = STRSPN(fmt, SPNSTR1)] == L'$') {
920 /* convert to zero base */
921 curargno = ATOI(fmt) - 1;
922 fmt += n + 1;
923 }
924
925 if (maxnum < curargno)
926 maxnum = curargno;
927 curargno++; /* default to next in list */
928
929 fmt += STRSPN(fmt, SPNSTR2);
930 if (*fmt == '[') {
931 fmt++; /* has to be at least on item in scan list */
932 if (*fmt == ']') {
933 fmt++;
934 }
935 while (*fmt != ']') {
936 if (*fmt == L'\0') {
937 return (-1); /* bad format */
938 #ifdef _WIDE
939 } else {
940 fmt++;
941 }
942 #else /* _WIDE */
943 } else if (isascii(*fmt)) {
944 fmt++;
945 } else {
946 int i;
947
948 i = mblen((const char *)
949 fmt, MB_CUR_MAX);
950 if (i <= 0) {
951 return (-1);
952 } else {
953 fmt += i;
954 }
955 }
956 #endif /* _WIDE */
957 }
958 }
959 }
960 if (maxnum > MAXARGS)
961 maxnum = MAXARGS;
962 for (n = 0; n <= maxnum; n++) {
963 arglst[n] = args;
964 (void) va_arg(args.ap, void *);
965 }
966 return (0);
967 }
968
969
970 /*
971 * For wide character handling
972 */
973
974 #ifdef _WIDE
975 static int
wstring(int * chcount,int * flag_eof,int stow,int type,int len,FILE * iop,va_list * listp)976 wstring(int *chcount, int *flag_eof, int stow, int type,
977 int len, FILE *iop, va_list *listp)
978 {
979 wint_t wch;
980 wchar_t *ptr;
981 wchar_t *wstart;
982
983 wstart = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
984
985 if ((type == 'c') && len == MAXINT)
986 len = 1;
987 while (((wch = _wd_getwc(chcount, iop)) != WEOF) &&
988 !(type == 's' && iswspace(wch))) {
989 if (stow)
990 *ptr = wch;
991 ptr++;
992 if (--len <= 0)
993 break;
994 }
995 if (wch == WEOF) {
996 *flag_eof = 1;
997 (*chcount) -= 1;
998 } else {
999 if (len > 0 && _wd_ungetwc(chcount, wch, iop) == WEOF)
1000 *flag_eof = 1;
1001 }
1002 if (ptr == wstart)
1003 return (0); /* no match */
1004 if (stow && (type != 'c'))
1005 *ptr = '\0';
1006 return (1); /* successful match */
1007 }
1008
1009 #else /* _WIDE */
1010 static int
wstring(int * chcount,int * flag_eof,int stow,int type,int len,FILE * iop,va_list * listp)1011 wstring(int *chcount, int *flag_eof, int stow, int type, int len, FILE *iop,
1012 va_list *listp)
1013 {
1014 int wch;
1015 wchar_t *ptr;
1016 wchar_t *wstart;
1017
1018 wstart = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
1019
1020 if ((type == 'c') && len == MAXINT)
1021 len = 1;
1022 while (((wch = _bi_getwc(iop)) != EOF) &&
1023 !(type == 's' && (isascii(wch) ? isspace(wch) : 0))) {
1024 (*chcount) += _scrwidth((wchar_t)wch);
1025 if (stow)
1026 *ptr = wch;
1027 ptr++;
1028 if (--len <= 0)
1029 break;
1030 }
1031 if (wch == EOF) {
1032 (*flag_eof) = 1;
1033 (*chcount) -= 1;
1034 } else {
1035 if (len > 0 && _bi_ungetwc(wch, iop) == EOF)
1036 (*flag_eof) = 1;
1037 }
1038 if (ptr == wstart)
1039 return (0); /* no match */
1040 if (stow && (type != 'c'))
1041 *ptr = '\0';
1042 return (1); /* successful match */
1043 }
1044 #endif /* _WIDE */
1045
1046 #ifdef _WIDE
1047 static wint_t
_wd_getwc(int * chcount,FILE * iop)1048 _wd_getwc(int *chcount, FILE *iop)
1049 {
1050 wint_t wc;
1051 int len;
1052
1053 if (!(iop->_flag & _IOWRT)) {
1054 /* call from fwscanf, wscanf */
1055 wc = __fgetwc_xpg5(iop);
1056 (*chcount)++;
1057 return (wc);
1058 } else {
1059 /* call from swscanf */
1060 if (*iop->_ptr == '\0')
1061 return (WEOF);
1062 len = mbtowc((wchar_t *)&wc, (const char *)iop->_ptr,
1063 MB_CUR_MAX);
1064 if (len == -1)
1065 return (WEOF);
1066 iop->_ptr += len;
1067 (*chcount)++;
1068 return (wc);
1069 }
1070 }
1071
1072 static wint_t
_wd_ungetwc(int * chcount,wchar_t wc,FILE * iop)1073 _wd_ungetwc(int *chcount, wchar_t wc, FILE *iop)
1074 {
1075 wint_t ret;
1076 int len;
1077 char mbs[MB_LEN_MAX];
1078
1079 if (wc == WEOF)
1080 return (WEOF);
1081
1082 if (!(iop->_flag & _IOWRT)) {
1083 /* call from fwscanf, wscanf */
1084 ret = __ungetwc_xpg5((wint_t)wc, iop);
1085 if (ret != (wint_t)wc)
1086 return (WEOF);
1087 (*chcount)--;
1088 return (ret);
1089 } else {
1090 /* call from swscanf */
1091 len = wctomb(mbs, wc);
1092 if (len == -1)
1093 return (WEOF);
1094 iop->_ptr -= len;
1095 (*chcount)--;
1096 return ((wint_t)wc);
1097 }
1098 }
1099
1100 static int
_watoi(wchar_t * fmt)1101 _watoi(wchar_t *fmt)
1102 {
1103 int n = 0;
1104 wchar_t ch;
1105
1106 ch = *fmt;
1107 if ((ch >= 0) && (ch < 256) && isdigit((int)ch)) {
1108 n = ch - '0';
1109 while (((ch = *++fmt) >= 0) && (ch < 256) &&
1110 isdigit((int)ch)) {
1111 n *= 10;
1112 n += ch - '0';
1113 }
1114 }
1115 return (n);
1116 }
1117 #endif /* _WIDE */
1118
1119 static int
wbrstring(int * chcount,int * flag_eof,int stow,int type __unused,int len,FILE * iop,unsigned char * brstr,va_list * listp)1120 wbrstring(int *chcount, int *flag_eof, int stow, int type __unused,
1121 int len, FILE *iop, unsigned char *brstr, va_list *listp)
1122 {
1123 wint_t wch;
1124 int i;
1125 char str[MB_LEN_MAX + 1]; /* include null termination */
1126 wchar_t *ptr, *start;
1127 #ifdef _WIDE
1128 int dummy;
1129 #endif /* _WIDE */
1130
1131 start = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
1132
1133 #ifdef _WIDE
1134 while ((wch = _wd_getwc(&dummy, iop)) != WEOF) {
1135 #else /* _WIDE */
1136 while ((wch = _bi_getwc(iop)) != WEOF) {
1137 #endif /* _WIDE */
1138 i = wctomb(str, (wchar_t)wch);
1139 if (i == -1) {
1140 return (0);
1141 }
1142 str[i] = '\0';
1143 if (fnmatch((const char *)brstr, (const char *)str,
1144 FNM_NOESCAPE)) {
1145 break;
1146 } else {
1147 if (len > 0) {
1148 #ifdef _WIDE
1149 (*chcount)++;
1150 #else /* _WIDE */
1151 (*chcount) += _scrwidth(wch);
1152 #endif /* _WIDE */
1153 len--;
1154 if (stow) {
1155 *ptr = wch;
1156 }
1157 ptr++;
1158 if (len <= 0)
1159 break;
1160 } else {
1161 break;
1162 }
1163 }
1164 }
1165 if (wch == WEOF) {
1166 *flag_eof = 1;
1167 } else {
1168 #ifdef _WIDE
1169 if (len > 0 && _wd_ungetwc(&dummy, wch, iop) == WEOF)
1170 #else /* _WIDE */
1171 if (len > 0 && _bi_ungetwc(wch, iop) == WEOF)
1172 #endif /* _WIDE */
1173 *flag_eof = 1;
1174 }
1175 if (ptr == start)
1176 return (0); /* no match */
1177 if (stow)
1178 *ptr = L'\0';
1179 return (1); /* successful match */
1180 }
1181
1182 #ifdef _WIDE
1183 static int
1184 brstring(int *chcount, int *flag_eof, int stow, int type __unused,
1185 int len, FILE *iop, unsigned char *brstr, va_list *listp)
1186 {
1187 wint_t wch;
1188 int i;
1189 char str[MB_LEN_MAX + 1]; /* include null termination */
1190 char *ptr, *start, *p;
1191 int dummy;
1192
1193 start = ptr = stow ? va_arg(*listp, char *) : NULL;
1194
1195 while ((wch = _wd_getwc(&dummy, iop)) != WEOF) {
1196 p = str;
1197 i = wctomb(str, (wchar_t)wch);
1198 if (i == -1) {
1199 return (0);
1200 }
1201 str[i] = '\0';
1202 if (fnmatch((const char *)brstr, (const char *)str,
1203 FNM_NOESCAPE)) {
1204 break;
1205 } else {
1206 if (len >= i) {
1207 (*chcount)++;
1208 len -= i;
1209 if (stow) {
1210 while (i-- > 0) {
1211 *ptr++ = *p++;
1212 }
1213 } else {
1214 while (i-- > 0) {
1215 ptr++;
1216 }
1217 }
1218 if (len <= 0)
1219 break;
1220 } else {
1221 break;
1222 }
1223 }
1224 }
1225 if (wch == WEOF) {
1226 *flag_eof = 1;
1227 } else {
1228 if (len > 0 && _wd_ungetwc(&dummy, wch, iop) == WEOF)
1229 *flag_eof = 1;
1230 }
1231 if (ptr == start)
1232 return (0); /* no match */
1233 if (stow)
1234 *ptr = '\0';
1235 return (1); /* successful match */
1236 }
1237 #endif /* _WIDE */
1238
1239 /*
1240 * Locally define getwc and ungetwc
1241 */
1242 static int
1243 _bi_getwc(FILE *iop)
1244 {
1245 int c;
1246 wchar_t intcode;
1247 int i, nbytes, cur_max;
1248 char buff[MB_LEN_MAX];
1249
1250 if ((c = wlocgetc()) == EOF)
1251 return (WEOF);
1252
1253 if (isascii(c)) /* ASCII code */
1254 return ((wint_t)c);
1255
1256 buff[0] = (char)c;
1257
1258 cur_max = (int)MB_CUR_MAX;
1259 /* MB_CUR_MAX doen't exeed the value of MB_LEN_MAX */
1260 /* So we use MB_CUR_MAX instead of MB_LEN_MAX for */
1261 /* improving the performance. */
1262 for (i = 1; i < cur_max; i++) {
1263 c = wlocgetc();
1264 if (c == '\n') {
1265 (void) wlocungetc(c);
1266 break;
1267 }
1268 if (c == EOF) {
1269 /* this still may be a valid multibyte character */
1270 break;
1271 }
1272 buff[i] = (char)c;
1273 }
1274
1275 if ((nbytes = mbtowc(&intcode, buff, i)) == -1) {
1276 /*
1277 * If mbtowc fails, the input was not a legal character.
1278 * ungetc all but one character.
1279 *
1280 * Note: the number of pushback characters that
1281 * ungetc() can handle must be >= (MB_LEN_MAX - 1).
1282 * In Solaris 2.x, the number of pushback
1283 * characters is 4.
1284 */
1285 while (i-- > 1) {
1286 (void) wlocungetc((signed char)buff[i]);
1287 }
1288 errno = EILSEQ;
1289 return (WEOF); /* Illegal EUC sequence. */
1290 }
1291
1292 while (i-- > nbytes) {
1293 /*
1294 * Note: the number of pushback characters that
1295 * ungetc() can handle must be >= (MB_LEN_MAX - 1).
1296 * In Solaris 2.x, the number of pushback
1297 * characters is 4.
1298 */
1299 (void) wlocungetc((signed char)buff[i]);
1300 }
1301 return ((int)intcode);
1302 }
1303
1304 static int
1305 _bi_ungetwc(wint_t wc, FILE *iop)
1306 {
1307 char mbs[MB_LEN_MAX];
1308 unsigned char *p;
1309 int n;
1310
1311 if ((wc == WEOF) || ((iop->_flag & _IOREAD) == 0))
1312 return (WEOF);
1313
1314 n = wctomb(mbs, (wchar_t)wc);
1315 if (n <= 0)
1316 return (WEOF);
1317
1318 if (iop->_ptr <= iop->_base) {
1319 if (iop->_base == NULL) {
1320 return (WEOF);
1321 }
1322 if ((iop->_ptr == iop->_base) && (iop->_cnt == 0)) {
1323 ++iop->_ptr;
1324 } else if ((iop->_ptr - n) < (iop->_base - PUSHBACK)) {
1325 return (WEOF);
1326 }
1327 }
1328
1329 p = (unsigned char *)(mbs+n-1); /* p points the last byte */
1330 /* if _IOWRT is set to iop->_flag, it means this is */
1331 /* an invocation from sscanf(), and in that time we */
1332 /* don't touch iop->_cnt. Otherwise, which means an */
1333 /* invocation from fscanf() or scanf(), we touch iop->_cnt */
1334 if ((iop->_flag & _IOWRT) == 0) {
1335 /* scanf() and fscanf() */
1336 iop->_cnt += n;
1337 while (n--) {
1338 *--iop->_ptr = *(p--);
1339 }
1340 } else {
1341 /* sscanf() */
1342 iop->_ptr -= n;
1343 }
1344 return (wc);
1345 }
1346