1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1988 AT&T */
28 /* All Rights Reserved */
29
30 #pragma ident "%Z%%M% %I% %E% SMI"
31
32 #include "lint.h"
33 #include <sys/types.h>
34 #include "mtlib.h"
35 #include "file64.h"
36 #include <stdio.h>
37 #include <ctype.h>
38 #include <stdarg.h>
39 #include <values.h>
40 #include <errno.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <math.h>
44 #include <thread.h>
45 #include <synch.h>
46 #include <stdlib.h>
47 #include <fnmatch.h>
48 #include <limits.h>
49 #include <wchar.h>
50 #include <unistd.h>
51 #include "libc.h"
52 #include "stdiom.h"
53 #include "xpg6.h"
54
55 #define NCHARS (1 << BITSPERBYTE)
56
57 /* if the _IOWRT flag is set, this must be a call from sscanf */
58 #define locgetc(cnt) (cnt += 1, (iop->_flag & _IOWRT) ? \
59 ((*iop->_ptr == '\0') ? EOF : *iop->_ptr++) : \
60 GETC(iop))
61 #define locungetc(cnt, x) (cnt -= 1, (x == EOF) ? EOF : \
62 ((iop->_flag & _IOWRT) ? *(--iop->_ptr) : \
63 (++iop->_cnt, *(--iop->_ptr))))
64
65 #define wlocgetc() ((iop->_flag & _IOWRT) ? \
66 ((*iop->_ptr == '\0') ? EOF : *iop->_ptr++) : \
67 GETC(iop))
68 #define wlocungetc(x) ((x == EOF) ? EOF : \
69 ((iop->_flag & _IOWRT) ? *(--iop->_ptr) : \
70 UNGETC(x, iop)))
71
72 #define MAXARGS 30 /* max. number of args for fast positional paramters */
73
74 /*
75 * stva_list is used to subvert C's restriction that a variable with an
76 * array type can not appear on the left hand side of an assignment operator.
77 * By putting the array inside a structure, the functionality of assigning to
78 * the whole array through a simple assignment is achieved..
79 */
80 typedef struct stva_list {
81 va_list ap;
82 } stva_list;
83
84 static int number(int *, int *, int, int, int, int, FILE *, va_list *);
85 static int readchar(FILE *, int *);
86 static int string(int *, int *, int, int, int, char *, FILE *, va_list *);
87 static int wstring(int *, int *, int, int, int, FILE *, va_list *);
88 static int wbrstring(int *, int *, int, int, int, FILE *,
89 unsigned char *, va_list *);
90 #ifdef _WIDE
91 static int brstring(int *, int *, int, int, int, FILE *,
92 unsigned char *, va_list *);
93 #endif
94 static int _bi_getwc(FILE *);
95 static int _bi_ungetwc(wint_t, FILE *);
96
97 #ifdef _WIDE
98 static int _mkarglst(const wchar_t *, stva_list, stva_list[]);
99 static wint_t _wd_getwc(int *, FILE *);
100 static wint_t _wd_ungetwc(int *, wchar_t, FILE *);
101 static int _watoi(wchar_t *);
102 #else /* _WIDE */
103 static int _mkarglst(const char *, stva_list, stva_list[]);
104 #endif /* _WIDE */
105
106 #ifndef _WIDE
107 int
_doscan(FILE * iop,const char * fmt,va_list va_Alist)108 _doscan(FILE *iop, const char *fmt, va_list va_Alist)
109 {
110 int ret;
111 rmutex_t *lk;
112
113 if (iop->_flag & _IOWRT)
114 ret = __doscan_u(iop, fmt, va_Alist, 0);
115 else {
116 FLOCKFILE(lk, iop);
117 ret = __doscan_u(iop, fmt, va_Alist, 0);
118 FUNLOCKFILE(lk);
119 }
120 return (ret);
121 }
122 #endif /* _WIDE */
123
124 /* ARGSUSED3 */
125 #ifdef _WIDE
126 int
__wdoscan_u(FILE * iop,const wchar_t * fmt,va_list va_Alist,int scflag)127 __wdoscan_u(FILE *iop, const wchar_t *fmt, va_list va_Alist, int scflag)
128 #else /* _WIDE */
129 int
130 __doscan_u(FILE *iop, const char *sfmt, va_list va_Alist, int scflag)
131 #endif /* _WIDE */
132 {
133 #ifdef _WIDE
134 wchar_t ch;
135 wchar_t inchar, size;
136 int nmatch = 0, len, stow;
137 #else /* _WIDE */
138 int ch;
139 int nmatch = 0, len, inchar, stow, size;
140 #endif /* _WIDE */
141
142 unsigned char *bracket_str = NULL;
143 int chcount, flag_eof;
144 char tab[NCHARS];
145
146 /* variables for postional parameters */
147 #ifdef _WIDE
148 const wchar_t *sformat = fmt; /* save the beginning of the format */
149 #else /* _WIDE */
150 const unsigned char *fmt = (const unsigned char *)sfmt;
151 const char *sformat = sfmt; /* save the beginning of the format */
152 #endif /* _WIDE */
153 int fpos = 1; /* 1 if first postional parameter */
154 stva_list args; /* used to step through the argument list */
155 stva_list sargs; /* used to save start of the argument list */
156 stva_list arglst[MAXARGS];
157 /*
158 * array giving the appropriate values
159 * for va_arg() to retrieve the
160 * corresponding argument:
161 * arglst[0] is the first argument
162 * arglst[1] is the second argument,etc.
163 */
164 /* Check if readable stream */
165 if (!(iop->_flag & (_IOREAD | _IORW))) {
166 errno = EBADF;
167 return (EOF);
168 }
169
170 /*
171 * Initialize args and sargs to the start of the argument list.
172 * We don't know any portable way to copy an arbitrary C object
173 * so we use a system-specific routine(probably a macro) from
174 * stdarg.h. (Remember that if va_list is an array, in_args will
175 * be a pointer and &in_args won't be what we would want for
176 * memcpy.)
177 */
178 va_copy(args.ap, va_Alist);
179
180 sargs = args;
181
182 chcount = 0; flag_eof = 0;
183
184 /*
185 * ****************************************************
186 * Main loop: reads format to determine a pattern,
187 * and then goes to read input stream
188 * in attempt to match the pattern.
189 * ****************************************************
190 */
191 for (; ; ) {
192 if ((ch = *fmt++) == '\0') {
193 return (nmatch); /* end of format */
194 }
195 #ifdef _WIDE
196 if (iswspace(ch)) {
197 if (!flag_eof) {
198 while (iswspace(inchar =
199 _wd_getwc(&chcount, iop)))
200 ;
201 if (_wd_ungetwc(&chcount, inchar, iop) == WEOF)
202 flag_eof = 1;
203 }
204 continue;
205 }
206 if (ch != '%' || (ch = *fmt++) == '%') {
207 if (ch == '%') {
208 if (!flag_eof) {
209 while (iswspace(inchar =
210 _wd_getwc(&chcount, iop)))
211 ;
212 if (_wd_ungetwc(&chcount, inchar, iop)
213 == WEOF)
214 flag_eof = 1;
215 }
216 }
217 if ((inchar = _wd_getwc(&chcount, iop)) == ch)
218 continue;
219 if (_wd_ungetwc(&chcount, inchar, iop) != WEOF) {
220 return (nmatch); /* failed to match input */
221 }
222 break;
223 }
224 #else /* _WIDE */
225 if (isspace(ch)) {
226 if (!flag_eof) {
227 while (isspace(inchar = locgetc(chcount)))
228 ;
229 if (locungetc(chcount, inchar) == EOF)
230 flag_eof = 1;
231
232 }
233 continue;
234 }
235 if (ch != '%' || (ch = *fmt++) == '%') {
236 if (ch == '%') {
237 if (!flag_eof) {
238 while (isspace(inchar =
239 locgetc(chcount)))
240 ;
241 if (locungetc(chcount, inchar) == EOF)
242 flag_eof = 1;
243 }
244 }
245 if ((inchar = locgetc(chcount)) == ch)
246 continue;
247 if (locungetc(chcount, inchar) != EOF) {
248 return (nmatch); /* failed to match input */
249 }
250 break;
251 }
252 #endif /* _WIDE */
253
254 charswitch: /* target of a goto 8-( */
255
256 if (ch == '*') {
257 stow = 0;
258 ch = *fmt++;
259 } else
260 stow = 1;
261
262 #ifdef _WIDE
263 for (len = 0; ((ch >= 0) && (ch < 256) && isdigit(ch));
264 ch = *fmt++)
265 len = len * 10 + ch - '0';
266 #else /* _WIDE */
267 for (len = 0; isdigit(ch); ch = *fmt++)
268 len = len * 10 + ch - '0';
269 #endif /* _WIDE */
270
271 if (ch == '$') {
272 /*
273 * positional parameter handling - the number
274 * specified in len gives the argument to which
275 * the next conversion should be applied.
276 * WARNING: This implementation of positional
277 * parameters assumes that the sizes of all pointer
278 * types are the same. (Code similar to that
279 * in the portable doprnt.c should be used if this
280 * assumption does not hold for a particular
281 * port.)
282 */
283 if (fpos) {
284 if (_mkarglst(sformat, sargs, arglst) != 0) {
285 return (EOF);
286 } else {
287 fpos = 0;
288 }
289 }
290 if (len <= MAXARGS) {
291 args = arglst[len - 1];
292 } else {
293 args = arglst[MAXARGS - 1];
294 for (len -= MAXARGS; len > 0; len--)
295 (void) va_arg(args.ap, void *);
296 }
297 len = 0;
298 ch = *fmt++;
299 goto charswitch;
300 }
301
302 if (len == 0)
303 len = MAXINT;
304 #ifdef _WIDE
305 if ((size = ch) == 'l' || (size == 'h') || (size == 'L') ||
306 (size == 'j') || (size == 't') || (size == 'z'))
307 ch = *fmt++;
308 #else /* _WIDE */
309 if ((size = ch) == 'l' || (size == 'h') || (size == 'L') ||
310 (size == 'w') || (size == 'j') || (size == 't') ||
311 (size == 'z'))
312 ch = *fmt++;
313 #endif /* _WIDE */
314 if (size == 'l' && ch == 'l') {
315 size = 'm'; /* size = 'm' if long long */
316 ch = *fmt++;
317 } else if (size == 'h' && ch == 'h') {
318 size = 'b'; /* use size = 'b' if char */
319 ch = *fmt++;
320 } else if ((size == 't') || (size == 'z')) {
321 size = 'l';
322 } else if (size == 'j') {
323 #ifndef _LP64
324 /* check scflag for size of u/intmax_t (32-bit libc) */
325 if (!(scflag & _F_INTMAX32)) {
326 #endif
327 size = 'm';
328 #ifndef _LP64
329 }
330 #endif
331 }
332 if (ch == '\0') {
333 return (EOF); /* unexpected end of format */
334 }
335 #ifdef _WIDE
336 if (ch == '[') {
337 wchar_t c;
338 size_t len;
339 int negflg = 0;
340 wchar_t *p;
341 wchar_t *wbracket_str;
342 size_t wlen, clen;
343
344 /* p points to the address of '[' */
345 p = (wchar_t *)fmt - 1;
346 len = 0;
347 if (*fmt == '^') {
348 len++;
349 fmt++;
350 negflg = 1;
351 }
352 if (((c = *fmt) == ']') || (c == '-')) {
353 len++;
354 fmt++;
355 }
356 while ((c = *fmt) != ']') {
357 if (c == '\0') {
358 return (EOF); /* unexpected EOF */
359 } else {
360 len++;
361 fmt++;
362 }
363 }
364 fmt++;
365 len += 2;
366 wbracket_str = (wchar_t *)
367 malloc(sizeof (wchar_t) * (len + 1));
368 if (wbracket_str == NULL) {
369 errno = ENOMEM;
370 return (EOF);
371 } else {
372 (void) wmemcpy(wbracket_str,
373 (const wchar_t *)p, len);
374 *(wbracket_str + len) = L'\0';
375 if (negflg && *(wbracket_str + 1) == '^') {
376 *(wbracket_str + 1) = L'!';
377 }
378 }
379 wlen = wcslen(wbracket_str);
380 clen = wcstombs((char *)NULL, wbracket_str, 0);
381 if (clen == (size_t)-1) {
382 free(wbracket_str);
383 return (EOF);
384 }
385 bracket_str = (unsigned char *)
386 malloc(sizeof (unsigned char) * (clen + 1));
387 if (bracket_str == NULL) {
388 free(wbracket_str);
389 errno = ENOMEM;
390 return (EOF);
391 }
392 clen = wcstombs((char *)bracket_str, wbracket_str,
393 wlen + 1);
394 free(wbracket_str);
395 if (clen == (size_t)-1) {
396 free(bracket_str);
397 return (EOF);
398 }
399 }
400 #else /* _WIDE */
401 if (ch == '[') {
402 if (size == 'l') {
403 int c, len, i;
404 int negflg = 0;
405 unsigned char *p;
406
407 p = (unsigned char *)(fmt - 1);
408 len = 0;
409 if (*fmt == '^') {
410 len++;
411 fmt++;
412 negflg = 1;
413 }
414 if (((c = *fmt) == ']') || (c == '-')) {
415 len++;
416 fmt++;
417 }
418 while ((c = *fmt) != ']') {
419 if (c == '\0') {
420 return (EOF);
421 } else if (isascii(c)) {
422 len++;
423 fmt++;
424 } else {
425 i = mblen((const char *)fmt,
426 MB_CUR_MAX);
427 if (i <= 0) {
428 return (EOF);
429 } else {
430 len += i;
431 fmt += i;
432 }
433 }
434 }
435 fmt++;
436 len += 2;
437 bracket_str = (unsigned char *)
438 malloc(sizeof (unsigned char) * (len + 1));
439 if (bracket_str == NULL) {
440 errno = ENOMEM;
441 return (EOF);
442 } else {
443 (void) strncpy((char *)bracket_str,
444 (const char *)p, len);
445 *(bracket_str + len) = '\0';
446 if (negflg &&
447 *(bracket_str + 1) == '^') {
448 *(bracket_str + 1) = '!';
449 }
450 }
451 } else {
452 int t = 0;
453 int b, c, d;
454
455 if (*fmt == '^') {
456 t++;
457 fmt++;
458 }
459 (void) memset(tab, !t, NCHARS);
460 if ((c = *fmt) == ']' || c == '-') {
461 tab[c] = t;
462 fmt++;
463 }
464
465 while ((c = *fmt) != ']') {
466 if (c == '\0') {
467 return (EOF);
468 }
469 b = *(fmt - 1);
470 d = *(fmt + 1);
471 if ((c == '-') && (d != ']') &&
472 (b < d)) {
473 (void) memset(&tab[b], t,
474 d - b + 1);
475 fmt += 2;
476 } else {
477 tab[c] = t;
478 fmt++;
479 }
480 }
481 fmt++;
482 }
483 }
484 #endif /* _WIDE */
485
486 #ifdef _WIDE
487 if ((ch >= 0) && (ch < 256) &&
488 isupper((int)ch)) { /* no longer documented */
489 if (_lib_version == c_issue_4) {
490 if (size != 'm' && size != 'L')
491 size = 'l';
492 }
493 ch = _tolower((int)ch);
494 }
495 if (ch != 'n' && !flag_eof) {
496 if (ch != 'c' && ch != 'C' && ch != '[') {
497 while (iswspace(inchar =
498 _wd_getwc(&chcount, iop)))
499 ;
500 if (_wd_ungetwc(&chcount, inchar, iop) == WEOF)
501 break;
502
503 }
504 }
505 #else /* _WIDE */
506 if (isupper(ch)) { /* no longer documented */
507 if (_lib_version == c_issue_4) {
508 if (size != 'm' && size != 'L')
509 size = 'l';
510 }
511 ch = _tolower(ch);
512 }
513 if (ch != 'n' && !flag_eof) {
514 if (ch != 'c' && ch != 'C' && ch != '[') {
515 while (isspace(inchar = locgetc(chcount)))
516 ;
517 if (locungetc(chcount, inchar) == EOF)
518 break;
519 }
520 }
521 #endif /* _WIDE */
522
523 switch (ch) {
524 case 'C':
525 case 'S':
526 case 'c':
527 case 's':
528 #ifdef _WIDE
529 if ((size == 'l') || (size == 'C') || (size == 'S'))
530 #else /* _WIDE */
531 if ((size == 'w') || (size == 'l') || (size == 'C') ||
532 (size == 'S'))
533 #endif /* _WIDE */
534 {
535 size = wstring(&chcount, &flag_eof, stow,
536 (int)ch, len, iop, &args.ap);
537 } else {
538 size = string(&chcount, &flag_eof, stow,
539 (int)ch, len, tab, iop, &args.ap);
540 }
541 break;
542 case '[':
543 if (size == 'l') {
544 size = wbrstring(&chcount, &flag_eof, stow,
545 (int)ch, len, iop, bracket_str, &args.ap);
546 free(bracket_str);
547 bracket_str = NULL;
548 } else {
549 #ifdef _WIDE
550 size = brstring(&chcount, &flag_eof, stow,
551 (int)ch, len, iop, bracket_str, &args.ap);
552 free(bracket_str);
553 bracket_str = NULL;
554 #else /* _WIDE */
555 size = string(&chcount, &flag_eof, stow,
556 ch, len, tab, iop, &args.ap);
557 #endif /* _WIDE */
558 }
559 break;
560
561 case 'n':
562 if (stow == 0)
563 continue;
564 if (size == 'b') /* char */
565 *va_arg(args.ap, char *) = (char)chcount;
566 else if (size == 'h')
567 *va_arg(args.ap, short *) = (short)chcount;
568 else if (size == 'l')
569 *va_arg(args.ap, long *) = (long)chcount;
570 else if (size == 'm') /* long long */
571 *va_arg(args.ap, long long *) =
572 (long long) chcount;
573 else
574 *va_arg(args.ap, int *) = (int)chcount;
575 continue;
576
577 case 'i':
578 default:
579 size = number(&chcount, &flag_eof, stow, (int)ch,
580 len, (int)size, iop, &args.ap);
581 break;
582 }
583 if (size)
584 nmatch += stow;
585 else {
586 return ((flag_eof && !nmatch) ? EOF : nmatch);
587 }
588 continue;
589 }
590 if (bracket_str)
591 free(bracket_str);
592 return (nmatch != 0 ? nmatch : EOF); /* end of input */
593 }
594
595 /* ****************************************************************** */
596 /* Functions to read the input stream in an attempt to match incoming */
597 /* data to the current pattern from the main loop of _doscan(). */
598 /* ****************************************************************** */
599 static int
number(int * chcount,int * flag_eof,int stow,int type,int len,int size,FILE * iop,va_list * listp)600 number(int *chcount, int *flag_eof, int stow, int type, int len, int size,
601 FILE *iop, va_list *listp)
602 {
603 char numbuf[64];
604 char *np = numbuf;
605 int c, base, inchar, lookahead;
606 int digitseen = 0, floater = 0, negflg = 0;
607 int lc;
608 long long lcval = 0LL;
609
610 switch (type) {
611 case 'e':
612 case 'f':
613 case 'g':
614 /*
615 * lc = 0 corresponds to c90 mode: do not recognize
616 * hexadecimal fp strings; attempt to push back
617 * all unused characters read
618 *
619 * lc = -1 corresponds to c99 mode: recognize hexa-
620 * decimal fp strings; push back at most one
621 * unused character
622 */
623 lc = (__xpg6 & _C99SUSv3_recognize_hexfp)? -1 : 0;
624 floater = 1;
625 break;
626
627 case 'a':
628 lc = -1;
629 floater = 1;
630 break;
631
632 case 'd':
633 case 'u':
634 case 'i':
635 base = 10;
636 break;
637 case 'o':
638 base = 8;
639 break;
640 case 'p':
641 #ifdef _LP64
642 size = 'l'; /* pointers are long in LP64 */
643 #endif /* _LP64 */
644 /* FALLTHROUGH */
645 case 'x':
646 base = 16;
647 break;
648 default:
649 return (0); /* unrecognized conversion character */
650 }
651
652 if (floater != 0) {
653 /*
654 * Handle floating point with
655 * file_to_decimal.
656 */
657 decimal_mode dm;
658 decimal_record dr;
659 fp_exception_field_type efs;
660 enum decimal_string_form form;
661 char *echar;
662 int nread;
663 char buffer[1024+1];
664 char *nb = buffer;
665
666 if (len > 1024)
667 len = 1024;
668 file_to_decimal(&nb, len, lc, &dr, &form, &echar, iop, &nread);
669 if (lc == -1) {
670 /*
671 * In C99 mode, the entire string read has to be
672 * accepted in order to qualify as a match
673 */
674 if (nb != buffer + nread)
675 form = invalid_form;
676 }
677 if (stow && (form != invalid_form)) {
678 #if defined(__sparc)
679 dm.rd = _QgetRD();
680 if (size == 'L') { /* long double */
681 if ((int)form < 0)
682 __hex_to_quadruple(&dr, dm.rd,
683 va_arg(*listp, quadruple *), &efs);
684 else
685 decimal_to_quadruple(
686 va_arg(*listp, quadruple *),
687 &dm, &dr, &efs);
688 }
689 #elif defined(__i386) || defined(__amd64)
690 dm.rd = __xgetRD();
691 if (size == 'L') { /* long double */
692 if ((int)form < 0)
693 __hex_to_extended(&dr, dm.rd,
694 va_arg(*listp, extended *), &efs);
695 else
696 decimal_to_extended(
697 va_arg(*listp, extended *),
698 &dm, &dr, &efs);
699 }
700 #else
701 #error Unknown architecture
702 #endif
703 else if (size == 'l') { /* double */
704 if ((int)form < 0)
705 __hex_to_double(&dr, dm.rd,
706 va_arg(*listp, double *), &efs);
707 else
708 decimal_to_double(
709 va_arg(*listp, double *),
710 &dm, &dr, &efs);
711 } else { /* float */
712 if ((int)form < 0)
713 __hex_to_single(&dr, dm.rd,
714 va_arg(*listp, single *), &efs);
715 else
716 decimal_to_single((single *)
717 va_arg(*listp, single *),
718 &dm, &dr, &efs);
719 }
720 if ((efs & (1 << fp_overflow)) != 0) {
721 errno = ERANGE;
722 }
723 if ((efs & (1 << fp_underflow)) != 0) {
724 errno = ERANGE;
725 }
726 }
727 (*chcount) += nread; /* Count characters read. */
728 c = locgetc((*chcount));
729 if (locungetc((*chcount), c) == EOF)
730 *flag_eof = 1;
731 return ((form == invalid_form) ? 0 : 1);
732 /* successful match if non-zero */
733 }
734
735 switch (c = locgetc((*chcount))) {
736 case '-':
737 negflg++;
738 /* FALLTHROUGH */
739 case '+':
740 if (--len <= 0)
741 break;
742 if ((c = locgetc((*chcount))) != '0')
743 break;
744 /* FALLTHROUGH */
745 case '0':
746 /*
747 * If %i or %x, the characters 0x or 0X may optionally precede
748 * the sequence of letters and digits (base 16).
749 */
750 if ((type != 'i' && type != 'x') || (len <= 1))
751 break;
752 if (((inchar = locgetc((*chcount))) == 'x') ||
753 (inchar == 'X')) {
754 lookahead = readchar(iop, chcount);
755 if (isxdigit(lookahead)) {
756 base = 16;
757
758 if (len <= 2) {
759 (void) locungetc((*chcount), lookahead);
760 /* Take into account the 'x' */
761 len -= 1;
762 } else {
763 c = lookahead;
764 /* Take into account '0x' */
765 len -= 2;
766 }
767 } else {
768 (void) locungetc((*chcount), lookahead);
769 (void) locungetc((*chcount), inchar);
770 }
771 } else {
772 /* inchar wans't 'x'. */
773 (void) locungetc((*chcount), inchar); /* Put it back. */
774 if (type == 'i') /* Only %i accepts an octal. */
775 base = 8;
776 }
777 }
778 for (; --len >= 0; *np++ = (char)c, c = locgetc((*chcount))) {
779 if (np > numbuf + 62) {
780 errno = ERANGE;
781 return (0);
782 }
783 if (isdigit(c) || base == 16 && isxdigit(c)) {
784 int digit = c - (isdigit(c) ? '0' :
785 isupper(c) ? 'A' - 10 : 'a' - 10);
786 if (digit >= base)
787 break;
788 if (stow)
789 lcval = base * lcval + digit;
790 digitseen++;
791 continue;
792 }
793 break;
794 }
795
796 if (stow && digitseen) {
797 /* suppress possible overflow on 2's-comp negation */
798 if (negflg && lcval != (1ULL << 63))
799 lcval = -lcval;
800 switch (size) {
801 case 'm':
802 *va_arg(*listp, long long *) = lcval;
803 break;
804 case 'l':
805 *va_arg(*listp, long *) = (long)lcval;
806 break;
807 case 'h':
808 *va_arg(*listp, short *) = (short)lcval;
809 break;
810 case 'b':
811 *va_arg(*listp, char *) = (char)lcval;
812 break;
813 default:
814 *va_arg(*listp, int *) = (int)lcval;
815 break;
816 }
817 }
818 if (locungetc((*chcount), c) == EOF)
819 *flag_eof = 1;
820 return (digitseen); /* successful match if non-zero */
821 }
822
823 /* Get a character. If not using sscanf and at the buffer's end */
824 /* then do a direct read(). Characters read via readchar() */
825 /* can be pushed back on the input stream by locungetc((*chcount),) */
826 /* since there is padding allocated at the end of the stream buffer. */
827 static int
readchar(FILE * iop,int * chcount)828 readchar(FILE *iop, int *chcount)
829 {
830 int inchar;
831 char buf[1];
832
833 if ((iop->_flag & _IOWRT) || (iop->_cnt != 0))
834 inchar = locgetc((*chcount));
835 else {
836 if (read(FILENO(iop), buf, 1) != 1)
837 return (EOF);
838 inchar = (int)buf[0];
839 (*chcount) += 1;
840 }
841 return (inchar);
842 }
843
844 static int
string(int * chcount,int * flag_eof,int stow,int type,int len,char * tab,FILE * iop,va_list * listp)845 string(int *chcount, int *flag_eof, int stow, int type, int len, char *tab,
846 FILE *iop, va_list *listp)
847 {
848 int ch;
849 char *ptr;
850 char *start;
851
852 start = ptr = stow ? va_arg(*listp, char *) : NULL;
853 if (((type == 'c') || (type == 'C')) && len == MAXINT)
854 len = 1;
855 #ifdef _WIDE
856 while ((ch = locgetc((*chcount))) != EOF &&
857 !(((type == 's') || (type == 'S')) && isspace(ch))) {
858 #else /* _WIDE */
859 while ((ch = locgetc((*chcount))) != EOF &&
860 !(((type == 's') || (type == 'S')) &&
861 isspace(ch) || type == '[' && tab[ch])) {
862 #endif /* _WIDE */
863 if (stow)
864 *ptr = (char)ch;
865 ptr++;
866 if (--len <= 0)
867 break;
868 }
869 if (ch == EOF) {
870 (*flag_eof) = 1;
871 (*chcount) -= 1;
872 } else if (len > 0 && locungetc((*chcount), ch) == EOF)
873 (*flag_eof) = 1;
874 if (ptr == start)
875 return (0); /* no match */
876 if (stow && ((type != 'c') && (type != 'C')))
877 *ptr = '\0';
878 return (1); /* successful match */
879 }
880
881 /* This function initializes arglst, to contain the appropriate */
882 /* va_list values for the first MAXARGS arguments. */
883 /* WARNING: this code assumes that the sizes of all pointer types */
884 /* are the same. (Code similar to that in the portable doprnt.c */
885 /* should be used if this assumption is not true for a */
886 /* particular port.) */
887
888 #ifdef _WIDE
889 static int
890 _mkarglst(const wchar_t *fmt, stva_list args, stva_list arglst[])
891 #else /* _WIDE */
892 static int
893 _mkarglst(const char *fmt, stva_list args, stva_list arglst[])
894 #endif /* _WIDE */
895 {
896 #ifdef _WIDE
897 #define STRCHR wcschr
898 #define STRSPN wcsspn
899 #define ATOI(x) _watoi((wchar_t *)x)
900 #define SPNSTR1 L"01234567890"
901 #define SPNSTR2 L"# +-.0123456789hL$"
902 #else /* _WIDE */
903 #define STRCHR strchr
904 #define STRSPN strspn
905 #define ATOI(x) atoi(x)
906 #define SPNSTR1 "01234567890"
907 #define SPNSTR2 "# +-.0123456789hL$"
908 #endif /* _WIDE */
909
910 int maxnum, curargno;
911 size_t n;
912
913 maxnum = -1;
914 curargno = 0;
915
916 while ((fmt = STRCHR(fmt, '%')) != NULL) {
917 fmt++; /* skip % */
918 if (*fmt == '*' || *fmt == '%')
919 continue;
920 if (fmt[n = STRSPN(fmt, SPNSTR1)] == L'$') {
921 /* convert to zero base */
922 curargno = ATOI(fmt) - 1;
923 fmt += n + 1;
924 }
925
926 if (maxnum < curargno)
927 maxnum = curargno;
928 curargno++; /* default to next in list */
929
930 fmt += STRSPN(fmt, SPNSTR2);
931 if (*fmt == '[') {
932 int i;
933 fmt++; /* has to be at least on item in scan list */
934 if (*fmt == ']') {
935 fmt++;
936 }
937 while (*fmt != ']') {
938 if (*fmt == L'\0') {
939 return (-1); /* bad format */
940 #ifdef _WIDE
941 } else {
942 fmt++;
943 }
944 #else /* _WIDE */
945 } else if (isascii(*fmt)) {
946 fmt++;
947 } else {
948 i = mblen((const char *)
949 fmt, MB_CUR_MAX);
950 if (i <= 0) {
951 return (-1);
952 } else {
953 fmt += i;
954 }
955 }
956 #endif /* _WIDE */
957 }
958 }
959 }
960 if (maxnum > MAXARGS)
961 maxnum = MAXARGS;
962 for (n = 0; n <= maxnum; n++) {
963 arglst[n] = args;
964 (void) va_arg(args.ap, void *);
965 }
966 return (0);
967 }
968
969
970 /*
971 * For wide character handling
972 */
973
974 #ifdef _WIDE
975 static int
wstring(int * chcount,int * flag_eof,int stow,int type,int len,FILE * iop,va_list * listp)976 wstring(int *chcount, int *flag_eof, int stow, int type,
977 int len, FILE *iop, va_list *listp)
978 {
979 wint_t wch;
980 wchar_t *ptr;
981 wchar_t *wstart;
982 int dummy;
983
984 wstart = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
985
986 if ((type == 'c') && len == MAXINT)
987 len = 1;
988 while (((wch = _wd_getwc(chcount, iop)) != WEOF) &&
989 !(type == 's' && iswspace(wch))) {
990 if (stow)
991 *ptr = wch;
992 ptr++;
993 if (--len <= 0)
994 break;
995 }
996 if (wch == WEOF) {
997 *flag_eof = 1;
998 (*chcount) -= 1;
999 } else {
1000 if (len > 0 && _wd_ungetwc(chcount, wch, iop) == WEOF)
1001 *flag_eof = 1;
1002 }
1003 if (ptr == wstart)
1004 return (0); /* no match */
1005 if (stow && (type != 'c'))
1006 *ptr = '\0';
1007 return (1); /* successful match */
1008 }
1009
1010 #else /* _WIDE */
1011 static int
wstring(int * chcount,int * flag_eof,int stow,int type,int len,FILE * iop,va_list * listp)1012 wstring(int *chcount, int *flag_eof, int stow, int type, int len, FILE *iop,
1013 va_list *listp)
1014 {
1015 int wch;
1016 wchar_t *ptr;
1017 wchar_t *wstart;
1018
1019 wstart = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
1020
1021 if ((type == 'c') && len == MAXINT)
1022 len = 1;
1023 while (((wch = _bi_getwc(iop)) != EOF) &&
1024 !(type == 's' && (isascii(wch) ? isspace(wch) : 0))) {
1025 (*chcount) += _scrwidth((wchar_t)wch);
1026 if (stow)
1027 *ptr = wch;
1028 ptr++;
1029 if (--len <= 0)
1030 break;
1031 }
1032 if (wch == EOF) {
1033 (*flag_eof) = 1;
1034 (*chcount) -= 1;
1035 } else {
1036 if (len > 0 && _bi_ungetwc(wch, iop) == EOF)
1037 (*flag_eof) = 1;
1038 }
1039 if (ptr == wstart)
1040 return (0); /* no match */
1041 if (stow && (type != 'c'))
1042 *ptr = '\0';
1043 return (1); /* successful match */
1044 }
1045 #endif /* _WIDE */
1046
1047 #ifdef _WIDE
1048 static wint_t
_wd_getwc(int * chcount,FILE * iop)1049 _wd_getwc(int *chcount, FILE *iop)
1050 {
1051 wint_t wc;
1052 int len;
1053
1054 if (!(iop->_flag & _IOWRT)) {
1055 /* call from fwscanf, wscanf */
1056 wc = __fgetwc_xpg5(iop);
1057 (*chcount)++;
1058 return (wc);
1059 } else {
1060 /* call from swscanf */
1061 if (*iop->_ptr == '\0')
1062 return (WEOF);
1063 len = mbtowc((wchar_t *)&wc, (const char *)iop->_ptr,
1064 MB_CUR_MAX);
1065 if (len == -1)
1066 return (WEOF);
1067 iop->_ptr += len;
1068 (*chcount)++;
1069 return (wc);
1070 }
1071 }
1072
1073 static wint_t
_wd_ungetwc(int * chcount,wchar_t wc,FILE * iop)1074 _wd_ungetwc(int *chcount, wchar_t wc, FILE *iop)
1075 {
1076 wint_t ret;
1077 int len;
1078 char mbs[MB_LEN_MAX];
1079
1080 if (wc == WEOF)
1081 return (WEOF);
1082
1083 if (!(iop->_flag & _IOWRT)) {
1084 /* call from fwscanf, wscanf */
1085 ret = __ungetwc_xpg5((wint_t)wc, iop);
1086 if (ret != (wint_t)wc)
1087 return (WEOF);
1088 (*chcount)--;
1089 return (ret);
1090 } else {
1091 /* call from swscanf */
1092 len = wctomb(mbs, wc);
1093 if (len == -1)
1094 return (WEOF);
1095 iop->_ptr -= len;
1096 (*chcount)--;
1097 return ((wint_t)wc);
1098 }
1099 }
1100
1101 static int
_watoi(wchar_t * fmt)1102 _watoi(wchar_t *fmt)
1103 {
1104 int n = 0;
1105 wchar_t ch;
1106
1107 ch = *fmt;
1108 if ((ch >= 0) && (ch < 256) && isdigit((int)ch)) {
1109 n = ch - '0';
1110 while (((ch = *++fmt) >= 0) && (ch < 256) &&
1111 isdigit((int)ch)) {
1112 n *= 10;
1113 n += ch - '0';
1114 }
1115 }
1116 return (n);
1117 }
1118 #endif /* _WIDE */
1119
1120 /* ARGSUSED3 */
1121 static int
wbrstring(int * chcount,int * flag_eof,int stow,int type,int len,FILE * iop,unsigned char * brstr,va_list * listp)1122 wbrstring(int *chcount, int *flag_eof, int stow, int type,
1123 int len, FILE *iop, unsigned char *brstr, va_list *listp)
1124 {
1125 wint_t wch;
1126 int i;
1127 char str[MB_LEN_MAX + 1]; /* include null termination */
1128 wchar_t *ptr, *start;
1129 #ifdef _WIDE
1130 int dummy;
1131 #endif /* _WIDE */
1132
1133 start = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
1134
1135 #ifdef _WIDE
1136 while ((wch = _wd_getwc(&dummy, iop)) != WEOF) {
1137 #else /* _WIDE */
1138 while ((wch = _bi_getwc(iop)) != WEOF) {
1139 #endif /* _WIDE */
1140 i = wctomb(str, (wchar_t)wch);
1141 if (i == -1) {
1142 return (0);
1143 }
1144 str[i] = '\0';
1145 if (fnmatch((const char *)brstr, (const char *)str,
1146 FNM_NOESCAPE)) {
1147 break;
1148 } else {
1149 if (len > 0) {
1150 #ifdef _WIDE
1151 (*chcount)++;
1152 #else /* _WIDE */
1153 (*chcount) += _scrwidth(wch);
1154 #endif /* _WIDE */
1155 len--;
1156 if (stow) {
1157 *ptr = wch;
1158 }
1159 ptr++;
1160 if (len <= 0)
1161 break;
1162 } else {
1163 break;
1164 }
1165 }
1166 }
1167 if (wch == WEOF) {
1168 *flag_eof = 1;
1169 } else {
1170 #ifdef _WIDE
1171 if (len > 0 && _wd_ungetwc(&dummy, wch, iop) == WEOF)
1172 #else /* _WIDE */
1173 if (len > 0 && _bi_ungetwc(wch, iop) == WEOF)
1174 #endif /* _WIDE */
1175 *flag_eof = 1;
1176 }
1177 if (ptr == start)
1178 return (0); /* no match */
1179 if (stow)
1180 *ptr = L'\0';
1181 return (1); /* successful match */
1182 }
1183
1184 #ifdef _WIDE
1185 static int
1186 brstring(int *chcount, int *flag_eof, int stow, int type,
1187 int len, FILE *iop, unsigned char *brstr, va_list *listp)
1188 {
1189 wint_t wch;
1190 int i;
1191 char str[MB_LEN_MAX + 1]; /* include null termination */
1192 char *ptr, *start, *p;
1193 int dummy;
1194
1195 start = ptr = stow ? va_arg(*listp, char *) : NULL;
1196
1197 while ((wch = _wd_getwc(&dummy, iop)) != WEOF) {
1198 p = str;
1199 i = wctomb(str, (wchar_t)wch);
1200 if (i == -1) {
1201 return (0);
1202 }
1203 str[i] = '\0';
1204 if (fnmatch((const char *)brstr, (const char *)str,
1205 FNM_NOESCAPE)) {
1206 break;
1207 } else {
1208 if (len >= i) {
1209 (*chcount)++;
1210 len -= i;
1211 if (stow) {
1212 while (i-- > 0) {
1213 *ptr++ = *p++;
1214 }
1215 } else {
1216 while (i-- > 0) {
1217 ptr++;
1218 }
1219 }
1220 if (len <= 0)
1221 break;
1222 } else {
1223 break;
1224 }
1225 }
1226 }
1227 if (wch == WEOF) {
1228 *flag_eof = 1;
1229 } else {
1230 if (len > 0 && _wd_ungetwc(&dummy, wch, iop) == WEOF)
1231 *flag_eof = 1;
1232 }
1233 if (ptr == start)
1234 return (0); /* no match */
1235 if (stow)
1236 *ptr = '\0';
1237 return (1); /* successful match */
1238 }
1239 #endif /* _WIDE */
1240
1241 /*
1242 * Locally define getwc and ungetwc
1243 */
1244 static int
1245 _bi_getwc(FILE *iop)
1246 {
1247 int c;
1248 wchar_t intcode;
1249 int i, nbytes, cur_max;
1250 char buff[MB_LEN_MAX];
1251
1252 if ((c = wlocgetc()) == EOF)
1253 return (WEOF);
1254
1255 if (isascii(c)) /* ASCII code */
1256 return ((wint_t)c);
1257
1258 buff[0] = (char)c;
1259
1260 cur_max = (int)MB_CUR_MAX;
1261 /* MB_CUR_MAX doen't exeed the value of MB_LEN_MAX */
1262 /* So we use MB_CUR_MAX instead of MB_LEN_MAX for */
1263 /* improving the performance. */
1264 for (i = 1; i < cur_max; i++) {
1265 c = wlocgetc();
1266 if (c == '\n') {
1267 (void) wlocungetc(c);
1268 break;
1269 }
1270 if (c == EOF) {
1271 /* this still may be a valid multibyte character */
1272 break;
1273 }
1274 buff[i] = (char)c;
1275 }
1276
1277 if ((nbytes = mbtowc(&intcode, buff, i)) == -1) {
1278 /*
1279 * If mbtowc fails, the input was not a legal character.
1280 * ungetc all but one character.
1281 *
1282 * Note: the number of pushback characters that
1283 * ungetc() can handle must be >= (MB_LEN_MAX - 1).
1284 * In Solaris 2.x, the number of pushback
1285 * characters is 4.
1286 */
1287 while (i-- > 1) {
1288 (void) wlocungetc((signed char)buff[i]);
1289 }
1290 errno = EILSEQ;
1291 return (WEOF); /* Illegal EUC sequence. */
1292 }
1293
1294 while (i-- > nbytes) {
1295 /*
1296 * Note: the number of pushback characters that
1297 * ungetc() can handle must be >= (MB_LEN_MAX - 1).
1298 * In Solaris 2.x, the number of pushback
1299 * characters is 4.
1300 */
1301 (void) wlocungetc((signed char)buff[i]);
1302 }
1303 return ((int)intcode);
1304 }
1305
1306 static int
1307 _bi_ungetwc(wint_t wc, FILE *iop)
1308 {
1309 char mbs[MB_LEN_MAX];
1310 unsigned char *p;
1311 int n;
1312
1313 if ((wc == WEOF) || ((iop->_flag & _IOREAD) == 0))
1314 return (WEOF);
1315
1316 n = wctomb(mbs, (wchar_t)wc);
1317 if (n <= 0)
1318 return (WEOF);
1319
1320 if (iop->_ptr <= iop->_base) {
1321 if (iop->_base == NULL) {
1322 return (WEOF);
1323 }
1324 if ((iop->_ptr == iop->_base) && (iop->_cnt == 0)) {
1325 ++iop->_ptr;
1326 } else if ((iop->_ptr - n) < (iop->_base - PUSHBACK)) {
1327 return (WEOF);
1328 }
1329 }
1330
1331 p = (unsigned char *)(mbs+n-1); /* p points the last byte */
1332 /* if _IOWRT is set to iop->_flag, it means this is */
1333 /* an invocation from sscanf(), and in that time we */
1334 /* don't touch iop->_cnt. Otherwise, which means an */
1335 /* invocation from fscanf() or scanf(), we touch iop->_cnt */
1336 if ((iop->_flag & _IOWRT) == 0) {
1337 /* scanf() and fscanf() */
1338 iop->_cnt += n;
1339 while (n--) {
1340 *--iop->_ptr = *(p--);
1341 }
1342 } else {
1343 /* sscanf() */
1344 iop->_ptr -= n;
1345 }
1346 return (wc);
1347 }
1348