xref: /titanic_41/usr/src/lib/libc/port/stdio/doscan.c (revision dd49f125507979bb2ab505a8daf2a46d1be27051)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1988 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include "lint.h"
33 #include <sys/types.h>
34 #include "mtlib.h"
35 #include "file64.h"
36 #include <stdio.h>
37 #include <ctype.h>
38 #include <stdarg.h>
39 #include <values.h>
40 #include <errno.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <math.h>
44 #include <thread.h>
45 #include <synch.h>
46 #include <stdlib.h>
47 #include <fnmatch.h>
48 #include <limits.h>
49 #include <wchar.h>
50 #include <unistd.h>
51 #include "libc.h"
52 #include "stdiom.h"
53 #include "xpg6.h"
54 
55 #define	NCHARS	(1 << BITSPERBYTE)
56 
57 /* if the _IOWRT flag is set, this must be a call from sscanf */
58 #define	locgetc(cnt)	(cnt += 1, (iop->_flag & _IOWRT) ? \
59 				((*iop->_ptr == '\0') ? EOF : *iop->_ptr++) : \
60 				GETC(iop))
61 #define	locungetc(cnt, x) (cnt -= 1, (x == EOF) ? EOF : \
62 				((iop->_flag & _IOWRT) ? *(--iop->_ptr) : \
63 				    (++iop->_cnt, *(--iop->_ptr))))
64 
65 #define	wlocgetc()	((iop->_flag & _IOWRT) ? \
66 				((*iop->_ptr == '\0') ? EOF : *iop->_ptr++) : \
67 				GETC(iop))
68 #define	wlocungetc(x) ((x == EOF) ? EOF : \
69 				((iop->_flag & _IOWRT) ? *(--iop->_ptr) : \
70 				    UNGETC(x, iop)))
71 
72 #define	MAXARGS	30	/* max. number of args for fast positional paramters */
73 
74 /*
75  * stva_list is used to subvert C's restriction that a variable with an
76  * array type can not appear on the left hand side of an assignment operator.
77  * By putting the array inside a structure, the functionality of assigning to
78  * the whole array through a simple assignment is achieved..
79  */
80 typedef struct stva_list {
81 	va_list	ap;
82 } stva_list;
83 
84 static int number(int *, int *, int, int, int, int, FILE *, va_list *);
85 static int readchar(FILE *, int *);
86 static int string(int *, int *, int, int, int, char *, FILE *, va_list *);
87 static int wstring(int *, int *, int, int, int, FILE *, va_list *);
88 static int	wbrstring(int *, int *, int, int, int, FILE *,
89 	unsigned char *, va_list *);
90 #ifdef	_WIDE
91 static int	brstring(int *, int *, int, int, int, FILE *,
92 	unsigned char *, va_list *);
93 #endif
94 static int _bi_getwc(FILE *);
95 static int _bi_ungetwc(wint_t, FILE *);
96 
97 #ifdef	_WIDE
98 static int _mkarglst(const wchar_t *, stva_list, stva_list[]);
99 static wint_t	_wd_getwc(int *, FILE *);
100 static wint_t	_wd_ungetwc(int *, wchar_t, FILE *);
101 static int	_watoi(wchar_t *);
102 #else  /* _WIDE */
103 static int _mkarglst(const char *, stva_list, stva_list[]);
104 #endif /* _WIDE */
105 
106 #ifndef	_WIDE
107 int
108 _doscan(FILE *iop, const char *fmt, va_list va_Alist)
109 {
110 	int ret;
111 	rmutex_t *lk;
112 
113 	if (iop->_flag & _IOWRT)
114 		ret = __doscan_u(iop, fmt, va_Alist, 0);
115 	else {
116 		FLOCKFILE(lk, iop);
117 		ret = __doscan_u(iop, fmt, va_Alist, 0);
118 		FUNLOCKFILE(lk);
119 	}
120 	return (ret);
121 }
122 #endif  /* _WIDE */
123 
124 /* ARGSUSED3 */
125 #ifdef	_WIDE
126 int
127 __wdoscan_u(FILE *iop, const wchar_t *fmt, va_list va_Alist, int scflag)
128 #else  /* _WIDE */
129 int
130 __doscan_u(FILE *iop, const char *sfmt, va_list va_Alist, int scflag)
131 #endif /* _WIDE */
132 {
133 #ifdef	_WIDE
134 	wchar_t	ch;
135 	wchar_t	inchar, size;
136 	int	nmatch = 0, len, stow;
137 #else  /* _WIDE */
138 	int	ch;
139 	int		nmatch = 0, len, inchar, stow, size;
140 #endif /* _WIDE */
141 
142 	unsigned char	*bracket_str = NULL;
143 	int		chcount, flag_eof;
144 	char	tab[NCHARS];
145 
146 	/* variables for postional parameters */
147 #ifdef	_WIDE
148 	const wchar_t	*sformat = fmt;	/* save the beginning of the format */
149 #else  /* _WIDE */
150 	const unsigned char	*fmt = (const unsigned char *)sfmt;
151 	const char	*sformat = sfmt; /* save the beginning of the format */
152 #endif /* _WIDE */
153 	int		fpos = 1;	/* 1 if first postional parameter */
154 	stva_list	args;	/* used to step through the argument list */
155 	stva_list	sargs;	/* used to save start of the argument list */
156 	stva_list	arglst[MAXARGS];
157 					/*
158 					 * array giving the appropriate values
159 					 * for va_arg() to retrieve the
160 					 * corresponding argument:
161 					 * arglst[0] is the first argument
162 					 * arglst[1] is the second argument,etc.
163 					 */
164 	/* Check if readable stream */
165 	if (!(iop->_flag & (_IOREAD | _IORW))) {
166 		errno = EBADF;
167 		return (EOF);
168 	}
169 
170 	/*
171 	 * Initialize args and sargs to the start of the argument list.
172 	 * We don't know any portable way to copy an arbitrary C object
173 	 * so we use a system-specific routine(probably a macro) from
174 	 * stdarg.h.  (Remember that if va_list is an array, in_args will
175 	 * be a pointer and &in_args won't be what we would want for
176 	 * memcpy.)
177 	 */
178 	va_copy(args.ap, va_Alist);
179 
180 	sargs = args;
181 
182 	chcount = 0; flag_eof = 0;
183 
184 	/*
185 	 * ****************************************************
186 	 * Main loop: reads format to determine a pattern,
187 	 *		and then goes to read input stream
188 	 *		in attempt to match the pattern.
189 	 * ****************************************************
190 	 */
191 	for (; ; ) {
192 		if ((ch = *fmt++) == '\0') {
193 			return (nmatch); /* end of format */
194 		}
195 #ifdef	_WIDE
196 		if (iswspace(ch)) {
197 			if (!flag_eof) {
198 				while (iswspace(inchar =
199 				    _wd_getwc(&chcount, iop)))
200 					;
201 				if (_wd_ungetwc(&chcount, inchar, iop) == WEOF)
202 					flag_eof = 1;
203 			}
204 			continue;
205 		}
206 		if (ch != '%' || (ch = *fmt++) == '%') {
207 			if (ch == '%') {
208 				if (!flag_eof) {
209 					while (iswspace(inchar =
210 					    _wd_getwc(&chcount, iop)))
211 						;
212 					if (_wd_ungetwc(&chcount, inchar, iop)
213 					    == WEOF)
214 						flag_eof = 1;
215 				}
216 			}
217 			if ((inchar = _wd_getwc(&chcount, iop)) == ch)
218 				continue;
219 			if (_wd_ungetwc(&chcount, inchar, iop) != WEOF) {
220 				return (nmatch); /* failed to match input */
221 			}
222 			break;
223 		}
224 #else  /* _WIDE */
225 		if (isspace(ch)) {
226 			if (!flag_eof) {
227 				while (isspace(inchar = locgetc(chcount)))
228 					;
229 				if (locungetc(chcount, inchar) == EOF)
230 					flag_eof = 1;
231 
232 			}
233 			continue;
234 		}
235 		if (ch != '%' || (ch = *fmt++) == '%') {
236 			if (ch == '%') {
237 				if (!flag_eof) {
238 					while (isspace(inchar =
239 					    locgetc(chcount)))
240 						;
241 					if (locungetc(chcount, inchar) == EOF)
242 						flag_eof = 1;
243 				}
244 			}
245 			if ((inchar = locgetc(chcount)) == ch)
246 				continue;
247 			if (locungetc(chcount, inchar) != EOF) {
248 				return (nmatch); /* failed to match input */
249 			}
250 			break;
251 		}
252 #endif /* _WIDE */
253 
254 charswitch:	/* target of a goto 8-( */
255 
256 		if (ch == '*') {
257 			stow = 0;
258 			ch = *fmt++;
259 		} else
260 			stow = 1;
261 
262 #ifdef	_WIDE
263 		for (len = 0; ((ch >= 0) && (ch < 256) && isdigit(ch));
264 		    ch = *fmt++)
265 			len = len * 10 + ch - '0';
266 #else  /* _WIDE */
267 		for (len = 0; isdigit(ch); ch = *fmt++)
268 			len = len * 10 + ch - '0';
269 #endif /* _WIDE */
270 
271 		if (ch == '$') {
272 			/*
273 			 * positional parameter handling - the number
274 			 * specified in len gives the argument to which
275 			 * the next conversion should be applied.
276 			 * WARNING: This implementation of positional
277 			 * parameters assumes that the sizes of all pointer
278 			 * types are the same. (Code similar to that
279 			 * in the portable doprnt.c should be used if this
280 			 * assumption does not hold for a particular
281 			 * port.)
282 			 */
283 			if (fpos) {
284 				if (_mkarglst(sformat, sargs, arglst) != 0) {
285 					return (EOF);
286 				} else {
287 					fpos = 0;
288 				}
289 			}
290 			if (len <= MAXARGS) {
291 				args = arglst[len - 1];
292 			} else {
293 				args = arglst[MAXARGS - 1];
294 				for (len -= MAXARGS; len > 0; len--)
295 					(void) va_arg(args.ap, void *);
296 			}
297 			len = 0;
298 			ch = *fmt++;
299 			goto charswitch;
300 		}
301 
302 		if (len == 0)
303 			len = MAXINT;
304 #ifdef	_WIDE
305 		if ((size = ch) == 'l' || (size == 'h') || (size == 'L') ||
306 		    (size == 'j') || (size == 't') || (size == 'z'))
307 			ch = *fmt++;
308 #else  /* _WIDE */
309 		if ((size = ch) == 'l' || (size == 'h') || (size == 'L') ||
310 		    (size == 'w') || (size == 'j') || (size == 't') ||
311 		    (size == 'z'))
312 			ch = *fmt++;
313 #endif /* _WIDE */
314 		if (size == 'l' && ch == 'l') {
315 			size = 'm';		/* size = 'm' if long long */
316 			ch = *fmt++;
317 		} else if (size == 'h' && ch == 'h') {
318 			size = 'b';		/* use size = 'b' if char */
319 			ch = *fmt++;
320 		} else if ((size == 't') || (size == 'z')) {
321 			size = 'l';
322 		} else if (size == 'j') {
323 #ifndef _LP64
324 			/* check scflag for size of u/intmax_t (32-bit libc) */
325 			if (!(scflag & _F_INTMAX32)) {
326 #endif
327 				size = 'm';
328 #ifndef _LP64
329 			}
330 #endif
331 		}
332 		if (ch == '\0') {
333 			return (EOF);		/* unexpected end of format */
334 		}
335 #ifdef	_WIDE
336 		if (ch == '[') {
337 			wchar_t	c;
338 			size_t	len;
339 			int	negflg = 0;
340 			wchar_t	*p;
341 			wchar_t	*wbracket_str;
342 			size_t	wlen, clen;
343 
344 			/* p points to the address of '[' */
345 			p = (wchar_t *)fmt - 1;
346 			len = 0;
347 			if (*fmt == '^') {
348 				len++;
349 				fmt++;
350 				negflg = 1;
351 			}
352 			if (((c = *fmt) == ']') || (c == '-')) {
353 				len++;
354 				fmt++;
355 			}
356 			while ((c = *fmt) != ']') {
357 				if (c == '\0') {
358 					return (EOF); /* unexpected EOF */
359 				} else {
360 					len++;
361 					fmt++;
362 				}
363 			}
364 			fmt++;
365 			len += 2;
366 			wbracket_str = (wchar_t *)
367 			    malloc(sizeof (wchar_t) * (len + 1));
368 			if (wbracket_str == NULL) {
369 				errno = ENOMEM;
370 				return (EOF);
371 			} else {
372 				(void) wmemcpy(wbracket_str,
373 				    (const wchar_t *)p, len);
374 				*(wbracket_str + len) = L'\0';
375 				if (negflg && *(wbracket_str + 1) == '^') {
376 					*(wbracket_str + 1) = L'!';
377 				}
378 			}
379 			wlen = wcslen(wbracket_str);
380 			clen = wcstombs((char *)NULL, wbracket_str, 0);
381 			if (clen == (size_t)-1) {
382 				free(wbracket_str);
383 				return (EOF);
384 			}
385 			bracket_str = (unsigned char *)
386 			    malloc(sizeof (unsigned char) * (clen + 1));
387 			if (bracket_str == NULL) {
388 				free(wbracket_str);
389 				errno = ENOMEM;
390 				return (EOF);
391 			}
392 			clen = wcstombs((char *)bracket_str, wbracket_str,
393 			    wlen + 1);
394 			free(wbracket_str);
395 			if (clen == (size_t)-1) {
396 				free(bracket_str);
397 				return (EOF);
398 			}
399 		}
400 #else  /* _WIDE */
401 		if (ch == '[') {
402 			if (size == 'l') {
403 				int	c, len, i;
404 				int	negflg = 0;
405 				unsigned char 	*p;
406 
407 				p = (unsigned char *)(fmt - 1);
408 				len = 0;
409 				if (*fmt == '^') {
410 					len++;
411 					fmt++;
412 					negflg = 1;
413 				}
414 				if (((c = *fmt) == ']') || (c == '-')) {
415 					len++;
416 					fmt++;
417 				}
418 				while ((c = *fmt) != ']') {
419 					if (c == '\0') {
420 						return (EOF);
421 					} else if (isascii(c)) {
422 						len++;
423 						fmt++;
424 					} else {
425 						i = mblen((const char *)fmt,
426 						    MB_CUR_MAX);
427 						if (i <= 0) {
428 							return (EOF);
429 						} else {
430 							len += i;
431 							fmt += i;
432 						}
433 					}
434 				}
435 				fmt++;
436 				len += 2;
437 				bracket_str = (unsigned char *)
438 				    malloc(sizeof (unsigned char) * (len + 1));
439 				if (bracket_str == NULL) {
440 					errno = ENOMEM;
441 					return (EOF);
442 				} else {
443 					(void) strncpy((char *)bracket_str,
444 					    (const char *)p, len);
445 					*(bracket_str + len) = '\0';
446 					if (negflg &&
447 					    *(bracket_str + 1) == '^') {
448 						*(bracket_str + 1) = '!';
449 					}
450 				}
451 			} else {
452 				int	t = 0;
453 				int	b, c, d;
454 
455 				if (*fmt == '^') {
456 					t++;
457 					fmt++;
458 				}
459 				(void) memset(tab, !t, NCHARS);
460 				if ((c = *fmt) == ']' || c == '-') {
461 					tab[c] = t;
462 					fmt++;
463 				}
464 
465 				while ((c = *fmt) != ']') {
466 					if (c == '\0') {
467 						return (EOF);
468 					}
469 					b = *(fmt - 1);
470 					d = *(fmt + 1);
471 					if ((c == '-') && (d != ']') &&
472 					    (b < d)) {
473 						(void) memset(&tab[b], t,
474 						    d - b + 1);
475 						fmt += 2;
476 					} else {
477 						tab[c] = t;
478 						fmt++;
479 					}
480 				}
481 				fmt++;
482 			}
483 		}
484 #endif /* _WIDE */
485 
486 #ifdef	_WIDE
487 		if ((ch >= 0) && (ch < 256) &&
488 		    isupper((int)ch)) { /* no longer documented */
489 			if (_lib_version == c_issue_4) {
490 				if (size != 'm' && size != 'L')
491 					size = 'l';
492 			}
493 			ch = _tolower((int)ch);
494 		}
495 		if (ch != 'n' && !flag_eof) {
496 			if (ch != 'c' && ch != 'C' && ch != '[') {
497 				while (iswspace(inchar =
498 				    _wd_getwc(&chcount, iop)))
499 					;
500 				if (_wd_ungetwc(&chcount, inchar, iop) == WEOF)
501 					break;
502 
503 			}
504 		}
505 #else  /* _WIDE */
506 		if (isupper(ch)) { /* no longer documented */
507 			if (_lib_version == c_issue_4) {
508 				if (size != 'm' && size != 'L')
509 					size = 'l';
510 			}
511 			ch = _tolower(ch);
512 		}
513 		if (ch != 'n' && !flag_eof) {
514 			if (ch != 'c' && ch != 'C' && ch != '[') {
515 				while (isspace(inchar = locgetc(chcount)))
516 					;
517 				if (locungetc(chcount, inchar) == EOF)
518 					break;
519 			}
520 		}
521 #endif /* _WIDE */
522 
523 		switch (ch) {
524 		case 'C':
525 		case 'S':
526 		case 'c':
527 		case 's':
528 #ifdef	_WIDE
529 			if ((size == 'l') || (size == 'C') || (size == 'S'))
530 #else  /* _WIDE */
531 			if ((size == 'w') || (size == 'l') || (size == 'C') ||
532 			    (size == 'S'))
533 #endif /* _WIDE */
534 			{
535 				size = wstring(&chcount, &flag_eof, stow,
536 				    (int)ch, len, iop, &args.ap);
537 			} else {
538 				size = string(&chcount, &flag_eof, stow,
539 				    (int)ch, len, tab, iop, &args.ap);
540 			}
541 			break;
542 		case '[':
543 			if (size == 'l') {
544 				size = wbrstring(&chcount, &flag_eof, stow,
545 				    (int)ch, len, iop, bracket_str, &args.ap);
546 				free(bracket_str);
547 				bracket_str = NULL;
548 			} else {
549 #ifdef	_WIDE
550 				size = brstring(&chcount, &flag_eof, stow,
551 				    (int)ch, len, iop, bracket_str, &args.ap);
552 				free(bracket_str);
553 				bracket_str = NULL;
554 #else  /* _WIDE */
555 				size = string(&chcount, &flag_eof, stow,
556 				    ch, len, tab, iop, &args.ap);
557 #endif /* _WIDE */
558 			}
559 			break;
560 
561 		case 'n':
562 			if (stow == 0)
563 				continue;
564 			if (size == 'b')	/* char */
565 				*va_arg(args.ap, char *) = (char)chcount;
566 			else if (size == 'h')
567 				*va_arg(args.ap, short *) = (short)chcount;
568 			else if (size == 'l')
569 				*va_arg(args.ap, long *) = (long)chcount;
570 			else if (size == 'm') /* long long */
571 				*va_arg(args.ap, long long *) =
572 				    (long long) chcount;
573 			else
574 				*va_arg(args.ap, int *) = (int)chcount;
575 			continue;
576 
577 		case 'i':
578 		default:
579 			size = number(&chcount, &flag_eof, stow, (int)ch,
580 			    len, (int)size, iop, &args.ap);
581 			break;
582 		}
583 		if (size)
584 			nmatch += stow;
585 		else {
586 			return ((flag_eof && !nmatch) ? EOF : nmatch);
587 		}
588 		continue;
589 	}
590 	if (bracket_str)
591 		free(bracket_str);
592 	return (nmatch != 0 ? nmatch : EOF); /* end of input */
593 }
594 
595 /* ****************************************************************** */
596 /* Functions to read the input stream in an attempt to match incoming */
597 /* data to the current pattern from the main loop of _doscan(). */
598 /* ****************************************************************** */
599 static int
600 number(int *chcount, int *flag_eof, int stow, int type, int len, int size,
601 	FILE *iop, va_list *listp)
602 {
603 	char	numbuf[64];
604 	char	*np = numbuf;
605 	int	c, base, inchar, lookahead;
606 	int	digitseen = 0, floater = 0, negflg = 0;
607 	int	lc;
608 	long long	lcval = 0LL;
609 
610 	switch (type) {
611 	case 'e':
612 	case 'f':
613 	case 'g':
614 		/*
615 		 * lc = 0 corresponds to c90 mode: do not recognize
616 		 *	hexadecimal fp strings; attempt to push back
617 		 *	all unused characters read
618 		 *
619 		 * lc = -1 corresponds to c99 mode: recognize hexa-
620 		 *	decimal fp strings; push back at most one
621 		 *	unused character
622 		 */
623 		lc = (__xpg6 & _C99SUSv3_recognize_hexfp)? -1 : 0;
624 		floater = 1;
625 		break;
626 
627 	case 'a':
628 		lc = -1;
629 		floater = 1;
630 		break;
631 
632 	case 'd':
633 	case 'u':
634 	case 'i':
635 		base = 10;
636 		break;
637 	case 'o':
638 		base = 8;
639 		break;
640 	case 'p':
641 #ifdef	_LP64
642 		size = 'l'; /* pointers are long in LP64 */
643 #endif	/*	_LP64	*/
644 		/* FALLTHROUGH */
645 	case 'x':
646 		base = 16;
647 		break;
648 	default:
649 		return (0); /* unrecognized conversion character */
650 	}
651 
652 	if (floater != 0) {
653 		/*
654 		 * Handle floating point with
655 		 * file_to_decimal.
656 		 */
657 		decimal_mode		dm;
658 		decimal_record		dr;
659 		fp_exception_field_type	efs;
660 		enum decimal_string_form form;
661 		char			*echar;
662 		int			nread;
663 		char			buffer[1024+1];
664 		char			*nb = buffer;
665 
666 		if (len > 1024)
667 			len = 1024;
668 		file_to_decimal(&nb, len, lc, &dr, &form, &echar, iop, &nread);
669 		if (lc == -1) {
670 			/*
671 			 * In C99 mode, the entire string read has to be
672 			 * accepted in order to qualify as a match
673 			 */
674 			if (nb != buffer + nread)
675 				form = invalid_form;
676 		}
677 		if (stow && (form != invalid_form)) {
678 #if defined(__sparc)
679 			dm.rd = _QgetRD();
680 			if (size == 'L') {		/* long double */
681 				if ((int)form < 0)
682 					__hex_to_quadruple(&dr, dm.rd,
683 					    va_arg(*listp, quadruple *), &efs);
684 				else
685 					decimal_to_quadruple(
686 					    va_arg(*listp, quadruple *),
687 					    &dm, &dr, &efs);
688 			}
689 #elif defined(__i386) || defined(__amd64)
690 			dm.rd = __xgetRD();
691 			if (size == 'L') {		/* long double */
692 				if ((int)form < 0)
693 					__hex_to_extended(&dr, dm.rd,
694 					    va_arg(*listp, extended *), &efs);
695 				else
696 					decimal_to_extended(
697 					    va_arg(*listp, extended *),
698 					    &dm, &dr, &efs);
699 			}
700 #else
701 #error Unknown architecture
702 #endif
703 			else if (size == 'l') {		/* double */
704 				if ((int)form < 0)
705 					__hex_to_double(&dr, dm.rd,
706 					    va_arg(*listp, double *), &efs);
707 				else
708 					decimal_to_double(
709 					    va_arg(*listp, double *),
710 					    &dm, &dr, &efs);
711 			} else {			/* float */
712 				if ((int)form < 0)
713 					__hex_to_single(&dr, dm.rd,
714 					    va_arg(*listp, single *), &efs);
715 				else
716 					decimal_to_single((single *)
717 					    va_arg(*listp, single *),
718 					    &dm, &dr, &efs);
719 			}
720 			if ((efs & (1 << fp_overflow)) != 0) {
721 				errno = ERANGE;
722 			}
723 			if ((efs & (1 << fp_underflow)) != 0) {
724 				errno = ERANGE;
725 			}
726 		}
727 		(*chcount) += nread;	/* Count characters read. */
728 		c = locgetc((*chcount));
729 		if (locungetc((*chcount), c) == EOF)
730 			*flag_eof = 1;
731 		return ((form == invalid_form) ? 0 : 1);
732 				/* successful match if non-zero */
733 	}
734 
735 	switch (c = locgetc((*chcount))) {
736 	case '-':
737 		negflg++;
738 		/* FALLTHROUGH */
739 	case '+':
740 		if (--len <= 0)
741 			break;
742 		if ((c = locgetc((*chcount))) != '0')
743 			break;
744 		/* FALLTHROUGH */
745 	case '0':
746 		/*
747 		 * If %i or %x, the characters 0x or 0X may optionally precede
748 		 * the sequence of letters and digits (base 16).
749 		 */
750 		if ((type != 'i' && type != 'x') || (len <= 1))
751 			break;
752 		if (((inchar = locgetc((*chcount))) == 'x') ||
753 		    (inchar == 'X')) {
754 			lookahead = readchar(iop, chcount);
755 			if (isxdigit(lookahead)) {
756 				base = 16;
757 
758 				if (len <= 2) {
759 					(void) locungetc((*chcount), lookahead);
760 					/* Take into account the 'x' */
761 					len -= 1;
762 				} else {
763 					c = lookahead;
764 					/* Take into account '0x' */
765 					len -= 2;
766 				}
767 			} else {
768 				(void) locungetc((*chcount), lookahead);
769 				(void) locungetc((*chcount), inchar);
770 			}
771 		} else {
772 			/* inchar wans't 'x'. */
773 			(void) locungetc((*chcount), inchar); /* Put it back. */
774 			if (type == 'i') /* Only %i accepts an octal. */
775 				base = 8;
776 		}
777 	}
778 	for (; --len  >= 0; *np++ = (char)c, c = locgetc((*chcount))) {
779 		if (np > numbuf + 62) {
780 			errno = ERANGE;
781 			return (0);
782 		}
783 		if (isdigit(c) || base == 16 && isxdigit(c)) {
784 			int digit = c - (isdigit(c) ? '0' :
785 			    isupper(c) ? 'A' - 10 : 'a' - 10);
786 			if (digit >= base)
787 				break;
788 			if (stow)
789 				lcval = base * lcval + digit;
790 			digitseen++;
791 			continue;
792 		}
793 		break;
794 	}
795 
796 	if (stow && digitseen) {
797 		/* suppress possible overflow on 2's-comp negation */
798 		if (negflg && lcval != (1ULL << 63))
799 			lcval = -lcval;
800 		switch (size) {
801 			case 'm':
802 				*va_arg(*listp, long long *) = lcval;
803 				break;
804 			case 'l':
805 				*va_arg(*listp, long *) = (long)lcval;
806 				break;
807 			case 'h':
808 				*va_arg(*listp, short *) = (short)lcval;
809 				break;
810 			case 'b':
811 				*va_arg(*listp, char *) = (char)lcval;
812 				break;
813 			default:
814 				*va_arg(*listp, int *) = (int)lcval;
815 				break;
816 		}
817 	}
818 	if (locungetc((*chcount), c) == EOF)
819 		*flag_eof = 1;
820 	return (digitseen); /* successful match if non-zero */
821 }
822 
823 /* Get a character. If not using sscanf and at the buffer's end */
824 /* then do a direct read(). Characters read via readchar() */
825 /* can be  pushed back on the input stream by locungetc((*chcount),) */
826 /* since there is padding allocated at the end of the stream buffer. */
827 static int
828 readchar(FILE *iop, int *chcount)
829 {
830 	int	inchar;
831 	char	buf[1];
832 
833 	if ((iop->_flag & _IOWRT) || (iop->_cnt != 0))
834 		inchar = locgetc((*chcount));
835 	else {
836 		if (read(FILENO(iop), buf, 1) != 1)
837 			return (EOF);
838 		inchar = (int)buf[0];
839 		(*chcount) += 1;
840 	}
841 	return (inchar);
842 }
843 
844 static int
845 string(int *chcount, int *flag_eof, int stow, int type, int len, char *tab,
846 	FILE *iop, va_list *listp)
847 {
848 	int	ch;
849 	char	*ptr;
850 	char	*start;
851 
852 	start = ptr = stow ? va_arg(*listp, char *) : NULL;
853 	if (((type == 'c') || (type == 'C')) && len == MAXINT)
854 		len = 1;
855 #ifdef	_WIDE
856 	while ((ch = locgetc((*chcount))) != EOF &&
857 	    !(((type == 's') || (type == 'S')) && isspace(ch))) {
858 #else  /* _WIDE */
859 	while ((ch = locgetc((*chcount))) != EOF &&
860 	    !(((type == 's') || (type == 'S')) &&
861 	    isspace(ch) || type == '[' && tab[ch])) {
862 #endif /* _WIDE */
863 		if (stow)
864 			*ptr = (char)ch;
865 		ptr++;
866 		if (--len <= 0)
867 			break;
868 	}
869 	if (ch == EOF) {
870 		(*flag_eof) = 1;
871 		(*chcount) -= 1;
872 	} else if (len > 0 && locungetc((*chcount), ch) == EOF)
873 		(*flag_eof) = 1;
874 	if (ptr == start)
875 		return (0);	/* no match */
876 	if (stow && ((type != 'c') && (type != 'C')))
877 		*ptr = '\0';
878 	return (1);	/* successful match */
879 }
880 
881 /* This function initializes arglst, to contain the appropriate */
882 /* va_list values for the first MAXARGS arguments. */
883 /* WARNING: this code assumes that the sizes of all pointer types */
884 /* are the same. (Code similar to that in the portable doprnt.c */
885 /* should be used if this assumption is not true for a */
886 /* particular port.) */
887 
888 #ifdef	_WIDE
889 static int
890 _mkarglst(const wchar_t *fmt, stva_list args, stva_list arglst[])
891 #else  /* _WIDE */
892 static int
893 _mkarglst(const char *fmt, stva_list args, stva_list arglst[])
894 #endif /* _WIDE */
895 {
896 #ifdef	_WIDE
897 #define	STRCHR	wcschr
898 #define	STRSPN	wcsspn
899 #define	ATOI(x)	_watoi((wchar_t *)x)
900 #define	SPNSTR1	L"01234567890"
901 #define	SPNSTR2	L"# +-.0123456789hL$"
902 #else  /* _WIDE */
903 #define	STRCHR	strchr
904 #define	STRSPN	strspn
905 #define	ATOI(x)	atoi(x)
906 #define	SPNSTR1	"01234567890"
907 #define	SPNSTR2	"# +-.0123456789hL$"
908 #endif /* _WIDE */
909 
910 	int maxnum, curargno;
911 	size_t n;
912 
913 	maxnum = -1;
914 	curargno = 0;
915 
916 	while ((fmt = STRCHR(fmt, '%')) != NULL) {
917 		fmt++;	/* skip % */
918 		if (*fmt == '*' || *fmt == '%')
919 			continue;
920 		if (fmt[n = STRSPN(fmt, SPNSTR1)] == L'$') {
921 			/* convert to zero base */
922 			curargno = ATOI(fmt) - 1;
923 			fmt += n + 1;
924 		}
925 
926 		if (maxnum < curargno)
927 			maxnum = curargno;
928 		curargno++;	/* default to next in list */
929 
930 		fmt += STRSPN(fmt, SPNSTR2);
931 		if (*fmt == '[') {
932 			int	i;
933 			fmt++; /* has to be at least on item in scan list */
934 			if (*fmt == ']') {
935 				fmt++;
936 			}
937 			while (*fmt != ']') {
938 				if (*fmt == L'\0') {
939 					return (-1); /* bad format */
940 #ifdef	_WIDE
941 				} else {
942 					fmt++;
943 				}
944 #else  /* _WIDE */
945 				} else if (isascii(*fmt)) {
946 					fmt++;
947 				} else {
948 					i = mblen((const char *)
949 					    fmt, MB_CUR_MAX);
950 					if (i <= 0) {
951 						return (-1);
952 					} else {
953 						fmt += i;
954 					}
955 				}
956 #endif /* _WIDE */
957 			}
958 		}
959 	}
960 	if (maxnum > MAXARGS)
961 		maxnum = MAXARGS;
962 	for (n = 0; n <= maxnum; n++) {
963 		arglst[n] = args;
964 		(void) va_arg(args.ap, void *);
965 	}
966 	return (0);
967 }
968 
969 
970 /*
971  * For wide character handling
972  */
973 
974 #ifdef	_WIDE
975 static int
976 wstring(int *chcount, int *flag_eof, int stow, int type,
977 	int len, FILE *iop, va_list *listp)
978 {
979 	wint_t	wch;
980 	wchar_t	*ptr;
981 	wchar_t	*wstart;
982 	int	dummy;
983 
984 	wstart = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
985 
986 	if ((type == 'c') && len == MAXINT)
987 		len = 1;
988 	while (((wch = _wd_getwc(chcount, iop)) != WEOF) &&
989 	    !(type == 's' && iswspace(wch))) {
990 		if (stow)
991 			*ptr = wch;
992 		ptr++;
993 		if (--len <= 0)
994 			break;
995 	}
996 	if (wch == WEOF) {
997 		*flag_eof = 1;
998 		(*chcount) -= 1;
999 	} else {
1000 		if (len > 0 && _wd_ungetwc(chcount, wch, iop) == WEOF)
1001 			*flag_eof = 1;
1002 	}
1003 	if (ptr == wstart)
1004 		return (0); /* no match */
1005 	if (stow && (type != 'c'))
1006 		*ptr = '\0';
1007 	return (1); /* successful match */
1008 }
1009 
1010 #else  /* _WIDE */
1011 static int
1012 wstring(int *chcount, int *flag_eof, int stow, int type, int len, FILE *iop,
1013 	va_list *listp)
1014 {
1015 	int	wch;
1016 	wchar_t	*ptr;
1017 	wchar_t	*wstart;
1018 
1019 	wstart = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
1020 
1021 	if ((type == 'c') && len == MAXINT)
1022 		len = 1;
1023 	while (((wch = _bi_getwc(iop)) != EOF) &&
1024 	    !(type == 's' && (isascii(wch) ? isspace(wch) : 0))) {
1025 		(*chcount) += _scrwidth((wchar_t)wch);
1026 		if (stow)
1027 			*ptr = wch;
1028 		ptr++;
1029 		if (--len <= 0)
1030 			break;
1031 	}
1032 	if (wch == EOF) {
1033 		(*flag_eof) = 1;
1034 		(*chcount) -= 1;
1035 	} else {
1036 		if (len > 0 && _bi_ungetwc(wch, iop) == EOF)
1037 			(*flag_eof) = 1;
1038 	}
1039 	if (ptr == wstart)
1040 		return (0); /* no match */
1041 	if (stow && (type != 'c'))
1042 		*ptr = '\0';
1043 	return (1); /* successful match */
1044 }
1045 #endif /* _WIDE */
1046 
1047 #ifdef	_WIDE
1048 static wint_t
1049 _wd_getwc(int *chcount, FILE *iop)
1050 {
1051 	wint_t	wc;
1052 	int	len;
1053 
1054 	if (!(iop->_flag & _IOWRT)) {
1055 		/* call from fwscanf, wscanf */
1056 		wc = __fgetwc_xpg5(iop);
1057 		(*chcount)++;
1058 		return (wc);
1059 	} else {
1060 		/* call from swscanf */
1061 		if (*iop->_ptr == '\0')
1062 			return (WEOF);
1063 		len = mbtowc((wchar_t *)&wc, (const char *)iop->_ptr,
1064 		    MB_CUR_MAX);
1065 		if (len == -1)
1066 			return (WEOF);
1067 		iop->_ptr += len;
1068 		(*chcount)++;
1069 		return (wc);
1070 	}
1071 }
1072 
1073 static wint_t
1074 _wd_ungetwc(int *chcount, wchar_t wc, FILE *iop)
1075 {
1076 	wint_t	ret;
1077 	int	len;
1078 	char	mbs[MB_LEN_MAX];
1079 
1080 	if (wc == WEOF)
1081 		return (WEOF);
1082 
1083 	if (!(iop->_flag & _IOWRT)) {
1084 		/* call from fwscanf, wscanf */
1085 		ret = __ungetwc_xpg5((wint_t)wc, iop);
1086 		if (ret != (wint_t)wc)
1087 			return (WEOF);
1088 		(*chcount)--;
1089 		return (ret);
1090 	} else {
1091 		/* call from swscanf */
1092 		len = wctomb(mbs, wc);
1093 		if (len == -1)
1094 			return (WEOF);
1095 		iop->_ptr -= len;
1096 		(*chcount)--;
1097 		return ((wint_t)wc);
1098 	}
1099 }
1100 
1101 static int
1102 _watoi(wchar_t *fmt)
1103 {
1104 	int	n = 0;
1105 	wchar_t	ch;
1106 
1107 	ch = *fmt;
1108 	if ((ch >= 0) && (ch < 256) && isdigit((int)ch)) {
1109 		n = ch - '0';
1110 		while (((ch = *++fmt) >= 0) && (ch < 256) &&
1111 		    isdigit((int)ch)) {
1112 			n *= 10;
1113 			n += ch - '0';
1114 		}
1115 	}
1116 	return (n);
1117 }
1118 #endif /* _WIDE */
1119 
1120 /* ARGSUSED3 */
1121 static int
1122 wbrstring(int *chcount, int *flag_eof, int stow, int type,
1123 	int len, FILE *iop, unsigned char *brstr, va_list *listp)
1124 {
1125 	wint_t	wch;
1126 	int	i;
1127 	char	str[MB_LEN_MAX + 1]; /* include null termination */
1128 	wchar_t	*ptr, *start;
1129 #ifdef	_WIDE
1130 	int	dummy;
1131 #endif /* _WIDE */
1132 
1133 	start = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
1134 
1135 #ifdef	_WIDE
1136 	while ((wch = _wd_getwc(&dummy, iop)) != WEOF) {
1137 #else  /* _WIDE */
1138 	while ((wch = _bi_getwc(iop)) != WEOF) {
1139 #endif /* _WIDE */
1140 		i = wctomb(str, (wchar_t)wch);
1141 		if (i == -1) {
1142 			return (0);
1143 		}
1144 		str[i] = '\0';
1145 		if (fnmatch((const char *)brstr, (const char *)str,
1146 		    FNM_NOESCAPE)) {
1147 			break;
1148 		} else {
1149 			if (len > 0) {
1150 #ifdef	_WIDE
1151 				(*chcount)++;
1152 #else  /* _WIDE */
1153 				(*chcount) += _scrwidth(wch);
1154 #endif /* _WIDE */
1155 				len--;
1156 				if (stow) {
1157 					*ptr = wch;
1158 				}
1159 				ptr++;
1160 				if (len <= 0)
1161 					break;
1162 			} else {
1163 				break;
1164 			}
1165 		}
1166 	}
1167 	if (wch == WEOF) {
1168 		*flag_eof = 1;
1169 	} else {
1170 #ifdef	_WIDE
1171 		if (len > 0 && _wd_ungetwc(&dummy, wch, iop) == WEOF)
1172 #else  /* _WIDE */
1173 		if (len > 0 && _bi_ungetwc(wch, iop) == WEOF)
1174 #endif /* _WIDE */
1175 			*flag_eof = 1;
1176 	}
1177 	if (ptr == start)
1178 		return (0);				/* no match */
1179 	if (stow)
1180 		*ptr = L'\0';
1181 	return (1);					/* successful match */
1182 }
1183 
1184 #ifdef	_WIDE
1185 static int
1186 brstring(int *chcount, int *flag_eof, int stow, int type,
1187 	int len, FILE *iop, unsigned char *brstr, va_list *listp)
1188 {
1189 	wint_t	wch;
1190 	int	i;
1191 	char	str[MB_LEN_MAX + 1]; /* include null termination */
1192 	char	*ptr, *start, *p;
1193 	int	dummy;
1194 
1195 	start = ptr = stow ? va_arg(*listp, char *) : NULL;
1196 
1197 	while ((wch = _wd_getwc(&dummy, iop)) != WEOF) {
1198 		p = str;
1199 		i = wctomb(str, (wchar_t)wch);
1200 		if (i == -1) {
1201 			return (0);
1202 		}
1203 		str[i] = '\0';
1204 		if (fnmatch((const char *)brstr, (const char *)str,
1205 		    FNM_NOESCAPE)) {
1206 			break;
1207 		} else {
1208 			if (len >= i) {
1209 				(*chcount)++;
1210 				len -= i;
1211 				if (stow) {
1212 					while (i-- > 0) {
1213 						*ptr++ = *p++;
1214 					}
1215 				} else {
1216 					while (i-- > 0) {
1217 						ptr++;
1218 					}
1219 				}
1220 				if (len <= 0)
1221 					break;
1222 			} else {
1223 				break;
1224 			}
1225 		}
1226 	}
1227 	if (wch == WEOF) {
1228 		*flag_eof = 1;
1229 	} else {
1230 		if (len > 0 && _wd_ungetwc(&dummy, wch, iop) == WEOF)
1231 			*flag_eof = 1;
1232 	}
1233 	if (ptr == start)
1234 		return (0);				/* no match */
1235 	if (stow)
1236 		*ptr = '\0';
1237 	return (1);					/* successful match */
1238 }
1239 #endif /* _WIDE */
1240 
1241 /*
1242  * Locally define getwc and ungetwc
1243  */
1244 static int
1245 _bi_getwc(FILE *iop)
1246 {
1247 	int c;
1248 	wchar_t intcode;
1249 	int i, nbytes, cur_max;
1250 	char buff[MB_LEN_MAX];
1251 
1252 	if ((c = wlocgetc()) == EOF)
1253 		return (WEOF);
1254 
1255 	if (isascii(c))	/* ASCII code */
1256 		return ((wint_t)c);
1257 
1258 	buff[0] = (char)c;
1259 
1260 	cur_max = (int)MB_CUR_MAX;
1261 	/* MB_CUR_MAX doen't exeed the value of MB_LEN_MAX */
1262 	/* So we use MB_CUR_MAX instead of MB_LEN_MAX for */
1263 	/* improving the performance. */
1264 	for (i = 1; i < cur_max; i++) {
1265 		c = wlocgetc();
1266 		if (c == '\n') {
1267 			(void) wlocungetc(c);
1268 			break;
1269 		}
1270 		if (c == EOF) {
1271 			/* this still may be a valid multibyte character */
1272 			break;
1273 		}
1274 		buff[i] = (char)c;
1275 	}
1276 
1277 	if ((nbytes = mbtowc(&intcode, buff, i)) == -1) {
1278 		/*
1279 		 * If mbtowc fails, the input was not a legal character.
1280 		 *	ungetc all but one character.
1281 		 *
1282 		 * Note:  the number of pushback characters that
1283 		 *	ungetc() can handle must be >= (MB_LEN_MAX - 1).
1284 		 *	In Solaris 2.x, the number of pushback
1285 		 *	characters is 4.
1286 		 */
1287 		while (i-- > 1) {
1288 			(void) wlocungetc((signed char)buff[i]);
1289 		}
1290 		errno = EILSEQ;
1291 		return (WEOF); /* Illegal EUC sequence. */
1292 	}
1293 
1294 	while (i-- > nbytes) {
1295 		/*
1296 		 * Note:  the number of pushback characters that
1297 		 *	ungetc() can handle must be >= (MB_LEN_MAX - 1).
1298 		 *	In Solaris 2.x, the number of pushback
1299 		 *	characters is 4.
1300 		 */
1301 		(void) wlocungetc((signed char)buff[i]);
1302 	}
1303 	return ((int)intcode);
1304 }
1305 
1306 static int
1307 _bi_ungetwc(wint_t wc, FILE *iop)
1308 {
1309 	char mbs[MB_LEN_MAX];
1310 	unsigned char *p;
1311 	int n;
1312 
1313 	if ((wc == WEOF) || ((iop->_flag & _IOREAD) == 0))
1314 		return (WEOF);
1315 
1316 	n = wctomb(mbs, (wchar_t)wc);
1317 	if (n <= 0)
1318 		return (WEOF);
1319 
1320 	if (iop->_ptr <= iop->_base) {
1321 		if (iop->_base == NULL) {
1322 			return (WEOF);
1323 		}
1324 		if ((iop->_ptr == iop->_base) && (iop->_cnt == 0)) {
1325 			++iop->_ptr;
1326 		} else if ((iop->_ptr - n) < (iop->_base - PUSHBACK)) {
1327 			return (WEOF);
1328 		}
1329 	}
1330 
1331 	p = (unsigned char *)(mbs+n-1); /* p points the last byte */
1332 	/* if _IOWRT is set to iop->_flag, it means this is */
1333 	/* an invocation from sscanf(), and in that time we */
1334 	/* don't touch iop->_cnt.  Otherwise, which means an */
1335 	/* invocation from fscanf() or scanf(), we touch iop->_cnt */
1336 	if ((iop->_flag & _IOWRT) == 0) {
1337 		/* scanf() and fscanf() */
1338 		iop->_cnt += n;
1339 		while (n--) {
1340 			*--iop->_ptr = *(p--);
1341 		}
1342 	} else {
1343 		/* sscanf() */
1344 		iop->_ptr -= n;
1345 	}
1346 	return (wc);
1347 }
1348