xref: /illumos-gate/usr/src/lib/libc/port/stdio/doscan.c (revision dcafa541382944b24abd3a40c357b47e04f314e2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1988 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include "lint.h"
33 #include <sys/types.h>
34 #include "mtlib.h"
35 #include "file64.h"
36 #include <stdio.h>
37 #include <ctype.h>
38 #include <stdarg.h>
39 #include <values.h>
40 #include <errno.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <math.h>
44 #include <thread.h>
45 #include <synch.h>
46 #include <stdlib.h>
47 #include <fnmatch.h>
48 #include <limits.h>
49 #include <wchar.h>
50 #include <unistd.h>
51 #include "libc.h"
52 #include "stdiom.h"
53 #include "xpg6.h"
54 
55 #define	NCHARS	(1 << BITSPERBYTE)
56 
57 /* if the _IOWRT flag is set, this must be a call from sscanf */
58 #define	locgetc(cnt)	(cnt += 1, (iop->_flag & _IOWRT) ? \
59 				((*iop->_ptr == '\0') ? EOF : *iop->_ptr++) : \
60 				GETC(iop))
61 #define	locungetc(cnt, x) (cnt -= 1, (x == EOF) ? EOF : \
62 				((iop->_flag & _IOWRT) ? *(--iop->_ptr) : \
63 				    (++iop->_cnt, *(--iop->_ptr))))
64 
65 #define	wlocgetc()	((iop->_flag & _IOWRT) ? \
66 				((*iop->_ptr == '\0') ? EOF : *iop->_ptr++) : \
67 				GETC(iop))
68 #define	wlocungetc(x) ((x == EOF) ? EOF : \
69 				((iop->_flag & _IOWRT) ? *(--iop->_ptr) : \
70 				    UNGETC(x, iop)))
71 
72 #define	MAXARGS	30	/* max. number of args for fast positional paramters */
73 
74 /*
75  * stva_list is used to subvert C's restriction that a variable with an
76  * array type can not appear on the left hand side of an assignment operator.
77  * By putting the array inside a structure, the functionality of assigning to
78  * the whole array through a simple assignment is achieved..
79  */
80 typedef struct stva_list {
81 	va_list	ap;
82 } stva_list;
83 
84 static int number(int *, int *, int, int, int, int, FILE *, va_list *);
85 static int readchar(FILE *, int *);
86 static int string(int *, int *, int, int, int, char *, FILE *, va_list *);
87 static int wstring(int *, int *, int, int, int, FILE *, va_list *);
88 static int	wbrstring(int *, int *, int, int, int, FILE *,
89 	unsigned char *, va_list *);
90 #ifdef	_WIDE
91 static int	brstring(int *, int *, int, int, int, FILE *,
92 	unsigned char *, va_list *);
93 #endif
94 static int _bi_getwc(FILE *);
95 static int _bi_ungetwc(wint_t, FILE *);
96 
97 #ifdef	_WIDE
98 static int _mkarglst(const wchar_t *, stva_list, stva_list[]);
99 static wint_t	_wd_getwc(int *, FILE *);
100 static wint_t	_wd_ungetwc(int *, wchar_t, FILE *);
101 static int	_watoi(wchar_t *);
102 #else  /* _WIDE */
103 static int _mkarglst(const char *, stva_list, stva_list[]);
104 #endif /* _WIDE */
105 
106 #ifndef	_WIDE
107 int
108 _doscan(FILE *iop, const char *fmt, va_list va_Alist)
109 {
110 	int ret;
111 	rmutex_t *lk;
112 
113 	if (iop->_flag & _IOWRT)
114 		ret = __doscan_u(iop, fmt, va_Alist, 0);
115 	else {
116 		FLOCKFILE(lk, iop);
117 		ret = __doscan_u(iop, fmt, va_Alist, 0);
118 		FUNLOCKFILE(lk);
119 	}
120 	return (ret);
121 }
122 #endif  /* _WIDE */
123 
124 /* ARGSUSED3 */
125 #ifdef	_WIDE
126 int
127 __wdoscan_u(FILE *iop, const wchar_t *fmt, va_list va_Alist, int scflag)
128 #else  /* _WIDE */
129 int
130 __doscan_u(FILE *iop, const char *sfmt, va_list va_Alist, int scflag)
131 #endif /* _WIDE */
132 {
133 #ifdef	_WIDE
134 	wchar_t	ch;
135 	wchar_t	inchar, size;
136 	int	nmatch = 0, len, stow;
137 #else  /* _WIDE */
138 	int	ch;
139 	int		nmatch = 0, len, inchar, stow, size;
140 #endif /* _WIDE */
141 
142 	unsigned char	*bracket_str = NULL;
143 	int		chcount, flag_eof;
144 	char	tab[NCHARS];
145 
146 	/* variables for postional parameters */
147 #ifdef	_WIDE
148 	const wchar_t	*sformat = fmt;	/* save the beginning of the format */
149 #else  /* _WIDE */
150 	const unsigned char	*fmt = (const unsigned char *)sfmt;
151 	const char	*sformat = sfmt; /* save the beginning of the format */
152 #endif /* _WIDE */
153 	int		fpos = 1;	/* 1 if first postional parameter */
154 	stva_list	args,	/* used to step through the argument list */
155 		sargs;	/* used to save the start of the argument list */
156 	stva_list	arglst[MAXARGS];
157 					/*
158 					 * array giving the appropriate values
159 					 * for va_arg() to retrieve the
160 					 * corresponding argument:
161 					 * arglst[0] is the first argument
162 					 * arglst[1] is the second argument,etc.
163 					 */
164 	/* Check if readable stream */
165 	if (!(iop->_flag & (_IOREAD | _IORW))) {
166 		errno = EBADF;
167 		return (EOF);
168 	}
169 
170 	/*
171 	 * Initialize args and sargs to the start of the argument list.
172 	 * We don't know any portable way to copy an arbitrary C object
173 	 * so we use a system-specific routine(probably a macro) from
174 	 * stdarg.h.  (Remember that if va_list is an array, in_args will
175 	 * be a pointer and &in_args won't be what we would want for
176 	 * memcpy.)
177 	 */
178 	va_copy(args.ap, va_Alist);
179 
180 	sargs = args;
181 
182 	chcount = 0; flag_eof = 0;
183 
184 	/*
185 	 * ****************************************************
186 	 * Main loop: reads format to determine a pattern,
187 	 *		and then goes to read input stream
188 	 *		in attempt to match the pattern.
189 	 * ****************************************************
190 	 */
191 	for (; ; ) {
192 		if ((ch = *fmt++) == '\0') {
193 			return (nmatch); /* end of format */
194 		}
195 #ifdef	_WIDE
196 		if (iswspace(ch)) {
197 			if (!flag_eof) {
198 				while (iswspace(inchar =
199 					    _wd_getwc(&chcount, iop)))
200 					;
201 				if (_wd_ungetwc(&chcount, inchar, iop) == WEOF)
202 					flag_eof = 1;
203 			}
204 			continue;
205 		}
206 		if (ch != '%' || (ch = *fmt++) == '%') {
207 			if ((inchar = _wd_getwc(&chcount, iop)) == ch)
208 				continue;
209 			if (_wd_ungetwc(&chcount, inchar, iop) != WEOF) {
210 				return (nmatch); /* failed to match input */
211 			}
212 			break;
213 		}
214 #else  /* _WIDE */
215 		if (isspace(ch)) {
216 			if (!flag_eof) {
217 				while (isspace(inchar = locgetc(chcount)))
218 					;
219 				if (locungetc(chcount, inchar) == EOF)
220 					flag_eof = 1;
221 
222 			}
223 			continue;
224 		}
225 		if (ch != '%' || (ch = *fmt++) == '%') {
226 			if ((inchar = locgetc(chcount)) == ch)
227 				continue;
228 			if (locungetc(chcount, inchar) != EOF) {
229 				return (nmatch); /* failed to match input */
230 			}
231 			break;
232 		}
233 #endif /* _WIDE */
234 
235 charswitch:	/* target of a goto 8-( */
236 
237 		if (ch == '*') {
238 			stow = 0;
239 			ch = *fmt++;
240 		} else
241 			stow = 1;
242 
243 #ifdef	_WIDE
244 		for (len = 0; ((ch >= 0) && (ch < 256) && isdigit(ch));
245 								ch = *fmt++)
246 			len = len * 10 + ch - '0';
247 #else  /* _WIDE */
248 		for (len = 0; isdigit(ch); ch = *fmt++)
249 			len = len * 10 + ch - '0';
250 #endif /* _WIDE */
251 
252 		if (ch == '$') {
253 			/*
254 			 * positional parameter handling - the number
255 			 * specified in len gives the argument to which
256 			 * the next conversion should be applied.
257 			 * WARNING: This implementation of positional
258 			 * parameters assumes that the sizes of all pointer
259 			 * types are the same. (Code similar to that
260 			 * in the portable doprnt.c should be used if this
261 			 * assumption does not hold for a particular
262 			 * port.)
263 			 */
264 			if (fpos) {
265 				if (_mkarglst(sformat, sargs, arglst) != 0) {
266 					return (EOF);
267 				} else {
268 					fpos = 0;
269 				}
270 			}
271 			if (len <= MAXARGS) {
272 				args = arglst[len - 1];
273 			} else {
274 				args = arglst[MAXARGS - 1];
275 				for (len -= MAXARGS; len > 0; len--)
276 					(void) va_arg(args.ap, void *);
277 			}
278 			len = 0;
279 			ch = *fmt++;
280 			goto charswitch;
281 		}
282 
283 		if (len == 0)
284 			len = MAXINT;
285 #ifdef	_WIDE
286 		if ((size = ch) == 'l' || (size == 'h') || (size == 'L') ||
287 			(size == 'j') || (size == 't') || (size == 'z'))
288 			ch = *fmt++;
289 #else  /* _WIDE */
290 		if ((size = ch) == 'l' || (size == 'h') || (size == 'L') ||
291 			(size == 'w') || (size == 'j') || (size == 't') ||
292 			(size == 'z'))
293 			ch = *fmt++;
294 #endif /* _WIDE */
295 		if (size == 'l' && ch == 'l') {
296 			size = 'm';		/* size = 'm' if long long */
297 			ch = *fmt++;
298 		} else if (size == 'h' && ch == 'h') {
299 			size = 'b';		/* use size = 'b' if char */
300 			ch = *fmt++;
301 		} else if ((size == 't') || (size == 'z')) {
302 			size = 'l';
303 		} else if (size == 'j') {
304 #ifndef _LP64
305 			/* check scflag for size of u/intmax_t (32-bit libc) */
306 			if (!(scflag & _F_INTMAX32))
307 			{
308 #endif
309 				size = 'm';
310 #ifndef _LP64
311 			}
312 #endif
313 		}
314 		if (ch == '\0') {
315 			return (EOF);		/* unexpected end of format */
316 		}
317 #ifdef	_WIDE
318 		if (ch == '[') {
319 			wchar_t	c;
320 			size_t	len;
321 			int	negflg = 0;
322 			wchar_t	*p;
323 			wchar_t	*wbracket_str;
324 			size_t	wlen, clen;
325 
326 			/* p points to the address of '[' */
327 			p = (wchar_t *)fmt - 1;
328 			len = 0;
329 			if (*fmt == '^') {
330 				len++;
331 				fmt++;
332 				negflg = 1;
333 			}
334 			if (((c = *fmt) == ']') || (c == '-')) {
335 				len++;
336 				fmt++;
337 			}
338 			while ((c = *fmt) != ']') {
339 				if (c == '\0') {
340 					return (EOF); /* unexpected EOF */
341 				} else {
342 					len++;
343 					fmt++;
344 				}
345 			}
346 			fmt++;
347 			len += 2;
348 			wbracket_str = (wchar_t *)
349 				malloc(sizeof (wchar_t) * (len + 1));
350 			if (wbracket_str == NULL) {
351 				errno = ENOMEM;
352 				return (EOF);
353 			} else {
354 				(void) wmemcpy(wbracket_str,
355 						(const wchar_t *)p, len);
356 				*(wbracket_str + len) = L'\0';
357 				if (negflg && *(wbracket_str + 1) == '^') {
358 					*(wbracket_str + 1) = L'!';
359 				}
360 			}
361 			wlen = wcslen(wbracket_str);
362 			clen = wcstombs((char *)NULL, wbracket_str, 0);
363 			if (clen == (size_t)-1) {
364 				free(wbracket_str);
365 				return (EOF);
366 			}
367 			bracket_str = (unsigned char *)
368 				malloc(sizeof (unsigned char) * (clen + 1));
369 			if (bracket_str == NULL) {
370 				free(wbracket_str);
371 				errno = ENOMEM;
372 				return (EOF);
373 			}
374 			clen = wcstombs((char *)bracket_str, wbracket_str,
375 				wlen + 1);
376 			free(wbracket_str);
377 			if (clen == (size_t)-1) {
378 				free(bracket_str);
379 				return (EOF);
380 			}
381 		}
382 #else  /* _WIDE */
383 		if (ch == '[') {
384 			if (size == 'l') {
385 				int	c, len, i;
386 				int	negflg = 0;
387 				unsigned char 	*p;
388 
389 				p = (unsigned char *)(fmt - 1);
390 				len = 0;
391 				if (*fmt == '^') {
392 					len++;
393 					fmt++;
394 					negflg = 1;
395 				}
396 				if (((c = *fmt) == ']') || (c == '-')) {
397 					len++;
398 					fmt++;
399 				}
400 				while ((c = *fmt) != ']') {
401 					if (c == '\0') {
402 						return (EOF);
403 					} else if (isascii(c)) {
404 						len++;
405 						fmt++;
406 					} else {
407 						i = mblen((const char *)fmt,
408 							MB_CUR_MAX);
409 						if (i <= 0) {
410 							return (EOF);
411 						} else {
412 							len += i;
413 							fmt += i;
414 						}
415 					}
416 				}
417 				fmt++;
418 				len += 2;
419 				bracket_str = (unsigned char *)
420 					malloc(sizeof (unsigned char) *
421 					(len + 1));
422 				if (bracket_str == NULL) {
423 					errno = ENOMEM;
424 					return (EOF);
425 				} else {
426 					(void) strncpy((char *)bracket_str,
427 						(const char *)p, len);
428 					*(bracket_str + len) = '\0';
429 					if (negflg &&
430 					    *(bracket_str + 1) == '^') {
431 						*(bracket_str + 1) = '!';
432 					}
433 				}
434 			} else {
435 				int	t = 0;
436 				int	b, c, d;
437 
438 				if (*fmt == '^') {
439 					t++;
440 					fmt++;
441 				}
442 				(void) memset(tab, !t, NCHARS);
443 				if ((c = *fmt) == ']' || c == '-') {
444 					tab[c] = t;
445 					fmt++;
446 				}
447 
448 				while ((c = *fmt) != ']') {
449 					if (c == '\0') {
450 						return (EOF);
451 					}
452 					b = *(fmt - 1);
453 					d = *(fmt + 1);
454 					if ((c == '-') && (d != ']') &&
455 								(b < d)) {
456 						(void) memset(&tab[b], t,
457 								d - b + 1);
458 						fmt += 2;
459 					} else {
460 						tab[c] = t;
461 						fmt++;
462 					}
463 				}
464 				fmt++;
465 			}
466 		}
467 #endif /* _WIDE */
468 
469 #ifdef	_WIDE
470 		if ((ch >= 0) && (ch < 256) &&
471 			isupper((int)ch)) { /* no longer documented */
472 			if (_lib_version == c_issue_4) {
473 				if (size != 'm' && size != 'L')
474 					size = 'l';
475 			}
476 			ch = _tolower((int)ch);
477 		}
478 		if (ch != 'n' && !flag_eof) {
479 			if (ch != 'c' && ch != 'C' && ch != '[') {
480 				while (iswspace(inchar =
481 						_wd_getwc(&chcount, iop)))
482 					;
483 				if (_wd_ungetwc(&chcount, inchar, iop) == WEOF)
484 					break;
485 
486 			}
487 		}
488 #else  /* _WIDE */
489 		if (isupper(ch)) { /* no longer documented */
490 			if (_lib_version == c_issue_4) {
491 				if (size != 'm' && size != 'L')
492 					size = 'l';
493 			}
494 			ch = _tolower(ch);
495 		}
496 		if (ch != 'n' && !flag_eof) {
497 			if (ch != 'c' && ch != 'C' && ch != '[') {
498 				while (isspace(inchar = locgetc(chcount)))
499 					;
500 				if (locungetc(chcount, inchar) == EOF)
501 					break;
502 			}
503 		}
504 #endif /* _WIDE */
505 
506 		switch (ch) {
507 		case 'C':
508 		case 'S':
509 		case 'c':
510 		case 's':
511 #ifdef	_WIDE
512 			if ((size == 'l') || (size == 'C') || (size == 'S'))
513 #else  /* _WIDE */
514 			if ((size == 'w') || (size == 'l') || (size == 'C') ||
515 				(size == 'S'))
516 #endif /* _WIDE */
517 			{
518 				size = wstring(&chcount, &flag_eof, stow,
519 					(int)ch, len, iop, &args.ap);
520 			} else {
521 				size = string(&chcount, &flag_eof, stow,
522 					(int)ch, len, tab, iop, &args.ap);
523 			}
524 			break;
525 		case '[':
526 			if (size == 'l') {
527 				size = wbrstring(&chcount, &flag_eof, stow,
528 					(int)ch, len, iop, bracket_str,
529 					&args.ap);
530 				free(bracket_str);
531 				bracket_str = NULL;
532 			} else {
533 #ifdef	_WIDE
534 				size = brstring(&chcount, &flag_eof, stow,
535 					(int)ch, len, iop, bracket_str,
536 					&args.ap);
537 				free(bracket_str);
538 				bracket_str = NULL;
539 #else  /* _WIDE */
540 				size = string(&chcount, &flag_eof, stow,
541 					ch, len, tab, iop, &args.ap);
542 #endif /* _WIDE */
543 			}
544 			break;
545 
546 		case 'n':
547 			if (stow == 0)
548 				continue;
549 			if (size == 'b')	/* char */
550 				*va_arg(args.ap, char *) = (char)chcount;
551 			else if (size == 'h')
552 				*va_arg(args.ap, short *) = (short)chcount;
553 			else if (size == 'l')
554 				*va_arg(args.ap, long *) = (long)chcount;
555 			else if (size == 'm') /* long long */
556 				*va_arg(args.ap, long long *) =
557 					(long long) chcount;
558 			else
559 				*va_arg(args.ap, int *) = (int)chcount;
560 			continue;
561 
562 		case 'i':
563 		default:
564 			size = number(&chcount, &flag_eof, stow, (int)ch,
565 				len, (int)size, iop, &args.ap);
566 			break;
567 		}
568 		if (size)
569 			nmatch += stow;
570 		else {
571 			return ((flag_eof && !nmatch) ? EOF : nmatch);
572 		}
573 		continue;
574 	}
575 	if (bracket_str)
576 		free(bracket_str);
577 	return (nmatch != 0 ? nmatch : EOF); /* end of input */
578 }
579 
580 /* ****************************************************************** */
581 /* Functions to read the input stream in an attempt to match incoming */
582 /* data to the current pattern from the main loop of _doscan(). */
583 /* ****************************************************************** */
584 static int
585 number(int *chcount, int *flag_eof, int stow, int type, int len, int size,
586 	FILE *iop, va_list *listp)
587 {
588 	char	numbuf[64];
589 	char	*np = numbuf;
590 	int	c, base, inchar, lookahead;
591 	int		digitseen = 0, floater = 0, negflg = 0;
592 	int		lc;
593 	long long	lcval = 0LL;
594 
595 	switch (type) {
596 	case 'e':
597 	case 'f':
598 	case 'g':
599 		/*
600 		 * lc = 0 corresponds to c90 mode: do not recognize
601 		 *	hexadecimal fp strings; attempt to push back
602 		 *	all unused characters read
603 		 *
604 		 * lc = -1 corresponds to c99 mode: recognize hexa-
605 		 *	decimal fp strings; push back at most one
606 		 *	unused character
607 		 */
608 		lc = (__xpg6 & _C99SUSv3_recognize_hexfp)? -1 : 0;
609 		floater = 1;
610 		break;
611 
612 	case 'a':
613 		lc = -1;
614 		floater = 1;
615 		break;
616 
617 	case 'd':
618 	case 'u':
619 	case 'i':
620 		base = 10;
621 		break;
622 	case 'o':
623 		base = 8;
624 		break;
625 	case 'p':
626 #ifdef	_LP64
627 		size = 'l'; /* pointers are long in LP64 */
628 #endif	/*	_LP64	*/
629 		/* FALLTHROUGH */
630 	case 'x':
631 		base = 16;
632 		break;
633 	default:
634 		return (0); /* unrecognized conversion character */
635 	}
636 
637 	if (floater != 0) {
638 		/*
639 		 * Handle floating point with
640 		 * file_to_decimal.
641 		 */
642 		decimal_mode		dm;
643 		decimal_record		dr;
644 		fp_exception_field_type	efs;
645 		enum decimal_string_form form;
646 		char			*echar;
647 		int			nread;
648 		char			buffer[1024+1];
649 		char			*nb = buffer;
650 
651 		if (len > 1024)
652 			len = 1024;
653 		file_to_decimal(&nb, len, lc, &dr, &form, &echar, iop, &nread);
654 		if (lc == -1) {
655 			/*
656 			 * In C99 mode, the entire string read has to be
657 			 * accepted in order to qualify as a match
658 			 */
659 			if (nb != buffer + nread)
660 				form = invalid_form;
661 		}
662 		if (stow && (form != invalid_form)) {
663 #if defined(__sparc)
664 			dm.rd = _QgetRD();
665 			if (size == 'L') {		/* long double */
666 				if ((int)form < 0)
667 					__hex_to_quadruple(&dr, dm.rd,
668 					    va_arg(*listp, quadruple *),
669 					    &efs);
670 				else
671 					decimal_to_quadruple(
672 					    va_arg(*listp, quadruple *),
673 					    &dm, &dr, &efs);
674 			}
675 #elif defined(__i386) || defined(__amd64)
676 			dm.rd = __xgetRD();
677 			if (size == 'L') {		/* long double */
678 				if ((int)form < 0)
679 					__hex_to_extended(&dr, dm.rd,
680 					    va_arg(*listp, extended *),
681 					    &efs);
682 				else
683 					decimal_to_extended(
684 					    va_arg(*listp, extended *),
685 					    &dm, &dr, &efs);
686 			}
687 #else
688 #error Unknown architecture
689 #endif
690 			else if (size == 'l') {		/* double */
691 				if ((int)form < 0)
692 					__hex_to_double(&dr, dm.rd,
693 					    va_arg(*listp, double *), &efs);
694 				else
695 					decimal_to_double(
696 					    va_arg(*listp, double *),
697 					    &dm, &dr, &efs);
698 			} else {			/* float */
699 				if ((int)form < 0)
700 					__hex_to_single(&dr, dm.rd,
701 					    va_arg(*listp, single *), &efs);
702 				else
703 					decimal_to_single((single *)
704 					    va_arg(*listp, single *),
705 					    &dm, &dr, &efs);
706 			}
707 			if ((efs & (1 << fp_overflow)) != 0) {
708 				errno = ERANGE;
709 			}
710 			if ((efs & (1 << fp_underflow)) != 0) {
711 				errno = ERANGE;
712 			}
713 		}
714 		(*chcount) += nread;	/* Count characters read. */
715 		c = locgetc((*chcount));
716 		if (locungetc((*chcount), c) == EOF)
717 			*flag_eof = 1;
718 		return ((form == invalid_form) ? 0 : 1);
719 				/* successful match if non-zero */
720 	}
721 
722 	switch (c = locgetc((*chcount))) {
723 	case '-':
724 		negflg++;
725 		/* FALLTHROUGH */
726 	case '+':
727 		if (--len <= 0)
728 			break;
729 		if ((c = locgetc((*chcount))) != '0')
730 			break;
731 		/* FALLTHROUGH */
732 	case '0':
733 		/*
734 		 * If %i or %x, the characters 0x or 0X may optionally precede
735 		 * the sequence of letters and digits (base 16).
736 		 */
737 		if ((type != 'i' && type != 'x') || (len <= 1))
738 			break;
739 		if (((inchar = locgetc((*chcount))) == 'x') ||
740 		    (inchar == 'X')) {
741 			lookahead = readchar(iop, chcount);
742 			if (isxdigit(lookahead)) {
743 				base = 16;
744 
745 				if (len <= 2) {
746 					(void) locungetc((*chcount), lookahead);
747 					/* Take into account the 'x' */
748 					len -= 1;
749 				} else {
750 					c = lookahead;
751 					/* Take into account '0x' */
752 					len -= 2;
753 				}
754 			} else {
755 				(void) locungetc((*chcount), lookahead);
756 				(void) locungetc((*chcount), inchar);
757 			}
758 		} else {
759 			/* inchar wans't 'x'. */
760 			(void) locungetc((*chcount), inchar); /* Put it back. */
761 			if (type == 'i') /* Only %i accepts an octal. */
762 				base = 8;
763 		}
764 	}
765 	for (; --len  >= 0; *np++ = (char)c, c = locgetc((*chcount))) {
766 		if (np > numbuf + 62) {
767 			errno = ERANGE;
768 			return (0);
769 		}
770 		if (isdigit(c) || base == 16 && isxdigit(c)) {
771 			int digit = c - (isdigit(c) ? '0' :
772 			    isupper(c) ? 'A' - 10 : 'a' - 10);
773 			if (digit >= base)
774 				break;
775 			if (stow)
776 				lcval = base * lcval + digit;
777 			digitseen++;
778 			continue;
779 		}
780 		break;
781 	}
782 
783 	if (stow && digitseen) {
784 		/* suppress possible overflow on 2's-comp negation */
785 		if (negflg && lcval != (1ULL << 63))
786 			lcval = -lcval;
787 		switch (size) {
788 			case 'm':
789 				*va_arg(*listp, long long *) = lcval;
790 				break;
791 			case 'l':
792 				*va_arg(*listp, long *) = (long)lcval;
793 				break;
794 			case 'h':
795 				*va_arg(*listp, short *) = (short)lcval;
796 				break;
797 			case 'b':
798 				*va_arg(*listp, char *) = (char)lcval;
799 				break;
800 			default:
801 				*va_arg(*listp, int *) = (int)lcval;
802 				break;
803 		}
804 	}
805 	if (locungetc((*chcount), c) == EOF)
806 		*flag_eof = 1;
807 	return (digitseen); /* successful match if non-zero */
808 }
809 
810 /* Get a character. If not using sscanf and at the buffer's end */
811 /* then do a direct read(). Characters read via readchar() */
812 /* can be  pushed back on the input stream by locungetc((*chcount),) */
813 /* since there is padding allocated at the end of the stream buffer. */
814 static int
815 readchar(FILE *iop, int *chcount)
816 {
817 	int	inchar;
818 	char	buf[1];
819 
820 	if ((iop->_flag & _IOWRT) || (iop->_cnt != 0))
821 		inchar = locgetc((*chcount));
822 	else {
823 		if (read(FILENO(iop), buf, 1) != 1)
824 			return (EOF);
825 		inchar = (int)buf[0];
826 		(*chcount) += 1;
827 	}
828 	return (inchar);
829 }
830 
831 static int
832 string(int *chcount, int *flag_eof, int stow, int type, int len, char *tab,
833 	FILE *iop, va_list *listp)
834 {
835 	int	ch;
836 	char	*ptr;
837 	char	*start;
838 
839 	start = ptr = stow ? va_arg(*listp, char *) : NULL;
840 	if (((type == 'c') || (type == 'C')) && len == MAXINT)
841 		len = 1;
842 #ifdef	_WIDE
843 	while ((ch = locgetc((*chcount))) != EOF &&
844 	    !(((type == 's') || (type == 'S')) && isspace(ch))) {
845 #else  /* _WIDE */
846 	while ((ch = locgetc((*chcount))) != EOF &&
847 	    !(((type == 's') || (type == 'S')) &&
848 	    isspace(ch) || type == '[' && tab[ch])) {
849 #endif /* _WIDE */
850 		if (stow)
851 			*ptr = (char)ch;
852 		ptr++;
853 		if (--len <= 0)
854 			break;
855 	}
856 	if (ch == EOF) {
857 		(*flag_eof) = 1;
858 		(*chcount) -= 1;
859 	} else if (len > 0 && locungetc((*chcount), ch) == EOF)
860 		(*flag_eof) = 1;
861 	if (ptr == start)
862 		return (0);	/* no match */
863 	if (stow && ((type != 'c') && (type != 'C')))
864 		*ptr = '\0';
865 	return (1);	/* successful match */
866 }
867 
868 /* This function initializes arglst, to contain the appropriate */
869 /* va_list values for the first MAXARGS arguments. */
870 /* WARNING: this code assumes that the sizes of all pointer types */
871 /* are the same. (Code similar to that in the portable doprnt.c */
872 /* should be used if this assumption is not true for a */
873 /* particular port.) */
874 
875 #ifdef	_WIDE
876 static int
877 _mkarglst(const wchar_t *fmt, stva_list args, stva_list arglst[])
878 #else  /* _WIDE */
879 static int
880 _mkarglst(const char *fmt, stva_list args, stva_list arglst[])
881 #endif /* _WIDE */
882 {
883 #ifdef	_WIDE
884 #define	STRCHR	wcschr
885 #define	STRSPN	wcsspn
886 #define	ATOI(x)	_watoi((wchar_t *)x)
887 #define	SPNSTR1	L"01234567890"
888 #define	SPNSTR2	L"# +-.0123456789hL$"
889 #else  /* _WIDE */
890 #define	STRCHR	strchr
891 #define	STRSPN	strspn
892 #define	ATOI(x)	atoi(x)
893 #define	SPNSTR1	"01234567890"
894 #define	SPNSTR2	"# +-.0123456789hL$"
895 #endif /* _WIDE */
896 
897 	int maxnum, curargno;
898 	size_t n;
899 
900 	maxnum = -1;
901 	curargno = 0;
902 
903 	while ((fmt = STRCHR(fmt, '%')) != NULL) {
904 		fmt++;	/* skip % */
905 		if (*fmt == '*' || *fmt == '%')
906 			continue;
907 		if (fmt[n = STRSPN(fmt, SPNSTR1)] == L'$') {
908 			/* convert to zero base */
909 			curargno = ATOI(fmt) - 1;
910 			fmt += n + 1;
911 		}
912 
913 		if (maxnum < curargno)
914 			maxnum = curargno;
915 		curargno++;	/* default to next in list */
916 
917 		fmt += STRSPN(fmt, SPNSTR2);
918 		if (*fmt == '[') {
919 			int		i;
920 			fmt++; /* has to be at least on item in scan list */
921 			if (*fmt == ']') {
922 				fmt++;
923 			}
924 			while (*fmt != ']') {
925 				if (*fmt == L'\0') {
926 					return (-1); /* bad format */
927 #ifdef	_WIDE
928 				} else {
929 					fmt++;
930 				}
931 #else  /* _WIDE */
932 				} else if (isascii(*fmt)) {
933 					fmt++;
934 				} else {
935 					i = mblen((const char *)
936 						fmt, MB_CUR_MAX);
937 					if (i <= 0) {
938 						return (-1);
939 					} else {
940 						fmt += i;
941 					}
942 				}
943 #endif /* _WIDE */
944 			}
945 		}
946 	}
947 	if (maxnum > MAXARGS)
948 		maxnum = MAXARGS;
949 	for (n = 0; n <= maxnum; n++) {
950 		arglst[n] = args;
951 		(void) va_arg(args.ap, void *);
952 	}
953 	return (0);
954 }
955 
956 
957 /*
958  * For wide character handling
959  */
960 #ifdef	_WIDE
961 static int
962 wstring(int *chcount, int *flag_eof, int stow, int type,
963 	int len, FILE *iop, va_list *listp)
964 {
965 	wint_t	wch;
966 	wchar_t	*ptr;
967 	wchar_t	*wstart;
968 	int	dummy;
969 
970 	wstart = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
971 
972 	if ((type == 'c') && len == MAXINT)
973 		len = 1;
974 	while (((wch = _wd_getwc(chcount, iop)) != WEOF) &&
975 	    !(type == 's' && iswspace(wch))) {
976 		if (stow)
977 			*ptr = wch;
978 		ptr++;
979 		if (--len <= 0)
980 			break;
981 	}
982 	if (wch == WEOF) {
983 		*flag_eof = 1;
984 		(*chcount) -= 1;
985 	} else {
986 		if (len > 0 && _wd_ungetwc(chcount, wch, iop) == WEOF)
987 			*flag_eof = 1;
988 	}
989 	if (ptr == wstart)
990 		return (0); /* no match */
991 	if (stow && (type != 'c'))
992 		*ptr = '\0';
993 	return (1); /* successful match */
994 }
995 #else  /* _WIDE */
996 static int
997 wstring(int *chcount, int *flag_eof, int stow, int type, int len, FILE *iop,
998 	va_list *listp)
999 {
1000 	int	wch;
1001 	wchar_t	*ptr;
1002 	wchar_t	*wstart;
1003 
1004 	wstart = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
1005 
1006 	if ((type == 'c') && len == MAXINT)
1007 		len = 1;
1008 	while (((wch = _bi_getwc(iop)) != EOF) &&
1009 	    !(type == 's' && (isascii(wch) ? isspace(wch) : 0))) {
1010 		(*chcount) += _scrwidth((wchar_t)wch);
1011 		if (stow)
1012 			*ptr = wch;
1013 		ptr++;
1014 		if (--len <= 0)
1015 			break;
1016 	}
1017 	if (wch == EOF) {
1018 		(*flag_eof) = 1;
1019 		(*chcount) -= 1;
1020 	} else {
1021 		if (len > 0 && _bi_ungetwc(wch, iop) == EOF)
1022 			(*flag_eof) = 1;
1023 	}
1024 	if (ptr == wstart)
1025 		return (0); /* no match */
1026 	if (stow && (type != 'c'))
1027 		*ptr = '\0';
1028 	return (1); /* successful match */
1029 }
1030 #endif /* _WIDE */
1031 
1032 #ifdef	_WIDE
1033 static wint_t
1034 _wd_getwc(int *chcount, FILE *iop)
1035 {
1036 	wint_t	wc;
1037 	int	len;
1038 
1039 	if (!(iop->_flag & _IOWRT)) {
1040 		/* call from fwscanf, wscanf */
1041 		wc = __fgetwc_xpg5(iop);
1042 		(*chcount)++;
1043 		return (wc);
1044 	} else {
1045 		/* call from swscanf */
1046 		if (*iop->_ptr == '\0')
1047 			return (WEOF);
1048 		len = mbtowc((wchar_t *)&wc, (const char *)iop->_ptr,
1049 		    MB_CUR_MAX);
1050 		if (len == -1)
1051 			return (WEOF);
1052 		iop->_ptr += len;
1053 		(*chcount)++;
1054 		return (wc);
1055 	}
1056 }
1057 
1058 static wint_t
1059 _wd_ungetwc(int *chcount, wchar_t wc, FILE *iop)
1060 {
1061 	wint_t	ret;
1062 	int	len;
1063 	char	mbs[MB_LEN_MAX];
1064 
1065 	if (wc == WEOF)
1066 		return (WEOF);
1067 
1068 	if (!(iop->_flag & _IOWRT)) {
1069 		/* call from fwscanf, wscanf */
1070 		ret = __ungetwc_xpg5((wint_t)wc, iop);
1071 		if (ret != (wint_t)wc)
1072 			return (WEOF);
1073 		(*chcount)--;
1074 		return (ret);
1075 	} else {
1076 		/* call from swscanf */
1077 		len = wctomb(mbs, wc);
1078 		if (len == -1)
1079 			return (WEOF);
1080 		iop->_ptr -= len;
1081 		(*chcount)--;
1082 		return ((wint_t)wc);
1083 	}
1084 }
1085 
1086 static int
1087 _watoi(wchar_t *fmt)
1088 {
1089 	int	n = 0;
1090 	wchar_t	ch;
1091 
1092 	ch = *fmt;
1093 	if ((ch >= 0) && (ch < 256) && isdigit((int)ch)) {
1094 		n = ch - '0';
1095 		while (((ch = *++fmt) >= 0) && (ch < 256) &&
1096 		    isdigit((int)ch)) {
1097 			n *= 10;
1098 			n += ch - '0';
1099 		}
1100 	}
1101 	return (n);
1102 }
1103 #endif /* _WIDE */
1104 
1105 /* ARGSUSED3 */
1106 static int
1107 wbrstring(int *chcount, int *flag_eof, int stow, int type,
1108 	int len, FILE *iop, unsigned char *brstr, va_list *listp)
1109 {
1110 	wint_t	wch;
1111 	int	i;
1112 	char	str[MB_LEN_MAX + 1]; /* include null termination */
1113 	wchar_t	*ptr, *start;
1114 #ifdef	_WIDE
1115 	int	dummy;
1116 #endif /* _WIDE */
1117 
1118 	start = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
1119 
1120 #ifdef	_WIDE
1121 	while ((wch = _wd_getwc(&dummy, iop)) != WEOF) {
1122 #else  /* _WIDE */
1123 	while ((wch = _bi_getwc(iop)) != WEOF) {
1124 #endif /* _WIDE */
1125 		i = wctomb(str, (wchar_t)wch);
1126 		if (i == -1) {
1127 			return (0);
1128 		}
1129 		str[i] = '\0';
1130 		if (fnmatch((const char *)brstr, (const char *)str,
1131 		    FNM_NOESCAPE)) {
1132 			break;
1133 		} else {
1134 			if (len > 0) {
1135 #ifdef	_WIDE
1136 				(*chcount)++;
1137 #else  /* _WIDE */
1138 				(*chcount) += _scrwidth(wch);
1139 #endif /* _WIDE */
1140 				len--;
1141 				if (stow) {
1142 					*ptr = wch;
1143 				}
1144 				ptr++;
1145 				if (len <= 0)
1146 					break;
1147 			} else {
1148 				break;
1149 			}
1150 		}
1151 	}
1152 	if (wch == WEOF) {
1153 		*flag_eof = 1;
1154 	} else {
1155 #ifdef	_WIDE
1156 		if (len > 0 && _wd_ungetwc(&dummy, wch, iop) == WEOF)
1157 #else  /* _WIDE */
1158 		if (len > 0 && _bi_ungetwc(wch, iop) == WEOF)
1159 #endif /* _WIDE */
1160 			*flag_eof = 1;
1161 	}
1162 	if (ptr == start)
1163 		return (0);				/* no match */
1164 	if (stow)
1165 		*ptr = L'\0';
1166 	return (1);					/* successful match */
1167 }
1168 
1169 #ifdef	_WIDE
1170 static int
1171 brstring(int *chcount, int *flag_eof, int stow, int type,
1172 	int len, FILE *iop, unsigned char *brstr, va_list *listp)
1173 {
1174 	wint_t	wch;
1175 	int	i;
1176 	char	str[MB_LEN_MAX + 1]; /* include null termination */
1177 	char	*ptr, *start, *p;
1178 	int	dummy;
1179 
1180 	start = ptr = stow ? va_arg(*listp, char *) : NULL;
1181 
1182 	while ((wch = _wd_getwc(&dummy, iop)) != WEOF) {
1183 		p = str;
1184 		i = wctomb(str, (wchar_t)wch);
1185 		if (i == -1) {
1186 			return (0);
1187 		}
1188 		str[i] = '\0';
1189 		if (fnmatch((const char *)brstr, (const char *)str,
1190 		    FNM_NOESCAPE)) {
1191 			break;
1192 		} else {
1193 			if (len >= i) {
1194 				(*chcount)++;
1195 				len -= i;
1196 				if (stow) {
1197 					while (i-- > 0) {
1198 						*ptr++ = *p++;
1199 					}
1200 				} else {
1201 					while (i-- > 0) {
1202 						ptr++;
1203 					}
1204 				}
1205 				if (len <= 0)
1206 					break;
1207 			} else {
1208 				break;
1209 			}
1210 		}
1211 	}
1212 	if (wch == WEOF) {
1213 		*flag_eof = 1;
1214 	} else {
1215 		if (len > 0 && _wd_ungetwc(&dummy, wch, iop) == WEOF)
1216 			*flag_eof = 1;
1217 	}
1218 	if (ptr == start)
1219 		return (0);				/* no match */
1220 	if (stow)
1221 		*ptr = '\0';
1222 	return (1);					/* successful match */
1223 }
1224 #endif /* _WIDE */
1225 
1226 /*
1227  * Locally define getwc and ungetwc
1228  */
1229 static int
1230 _bi_getwc(FILE *iop)
1231 {
1232 	int c;
1233 	wchar_t intcode;
1234 	int i, nbytes, cur_max;
1235 	char buff[MB_LEN_MAX];
1236 
1237 	if ((c = wlocgetc()) == EOF)
1238 		return (WEOF);
1239 
1240 	if (isascii(c))	/* ASCII code */
1241 		return ((wint_t)c);
1242 
1243 	buff[0] = (char)c;
1244 
1245 	cur_max = (int)MB_CUR_MAX;
1246 	/* MB_CUR_MAX doen't exeed the value of MB_LEN_MAX */
1247 	/* So we use MB_CUR_MAX instead of MB_LEN_MAX for */
1248 	/* improving the performance. */
1249 	for (i = 1; i < cur_max; i++) {
1250 		c = wlocgetc();
1251 		if (c == '\n') {
1252 			(void) wlocungetc(c);
1253 			break;
1254 		}
1255 		if (c == EOF) {
1256 			/* this still may be a valid multibyte character */
1257 			break;
1258 		}
1259 		buff[i] = (char)c;
1260 	}
1261 
1262 	if ((nbytes = mbtowc(&intcode, buff, i)) == -1) {
1263 		/*
1264 		 * If mbtowc fails, the input was not a legal character.
1265 		 *	ungetc all but one character.
1266 		 *
1267 		 * Note:  the number of pushback characters that
1268 		 *	ungetc() can handle must be >= (MB_LEN_MAX - 1).
1269 		 *	In Solaris 2.x, the number of pushback
1270 		 *	characters is 4.
1271 		 */
1272 		while (i-- > 1) {
1273 			(void) wlocungetc((signed char)buff[i]);
1274 		}
1275 		errno = EILSEQ;
1276 		return (WEOF); /* Illegal EUC sequence. */
1277 	}
1278 
1279 	while (i-- > nbytes) {
1280 		/*
1281 		 * Note:  the number of pushback characters that
1282 		 *	ungetc() can handle must be >= (MB_LEN_MAX - 1).
1283 		 *	In Solaris 2.x, the number of pushback
1284 		 *	characters is 4.
1285 		 */
1286 		(void) wlocungetc((signed char)buff[i]);
1287 	}
1288 	return ((int)intcode);
1289 }
1290 
1291 static int
1292 _bi_ungetwc(wint_t wc, FILE *iop)
1293 {
1294 	char mbs[MB_LEN_MAX];
1295 	unsigned char *p;
1296 	int n;
1297 
1298 	if ((wc == WEOF) || ((iop->_flag & _IOREAD) == 0))
1299 		return (WEOF);
1300 
1301 	n = wctomb(mbs, (wchar_t)wc);
1302 	if (n <= 0)
1303 		return (WEOF);
1304 
1305 	if (iop->_ptr <= iop->_base) {
1306 		if (iop->_base == NULL) {
1307 			return (WEOF);
1308 		}
1309 		if ((iop->_ptr == iop->_base) && (iop->_cnt == 0)) {
1310 			++iop->_ptr;
1311 		} else if ((iop->_ptr - n) < (iop->_base - PUSHBACK)) {
1312 			return (WEOF);
1313 		}
1314 	}
1315 
1316 	p = (unsigned char *)(mbs+n-1); /* p points the last byte */
1317 	/* if _IOWRT is set to iop->_flag, it means this is */
1318 	/* an invocation from sscanf(), and in that time we */
1319 	/* don't touch iop->_cnt.  Otherwise, which means an */
1320 	/* invocation from fscanf() or scanf(), we touch iop->_cnt */
1321 	if ((iop->_flag & _IOWRT) == 0) {
1322 		/* scanf() and fscanf() */
1323 		iop->_cnt += n;
1324 		while (n--) {
1325 			*--iop->_ptr = *(p--);
1326 		}
1327 	} else {
1328 		/* sscanf() */
1329 		iop->_ptr -= n;
1330 	}
1331 	return (wc);
1332 }
1333