xref: /illumos-gate/usr/src/lib/libc/port/stdio/doscan.c (revision 12042ab213b3af68474f48555504db816a449211)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1988 AT&T	*/
28 /*	  All Rights Reserved	*/
29 
30 #include "lint.h"
31 #include <sys/types.h>
32 #include "mtlib.h"
33 #include "file64.h"
34 #include <stdio.h>
35 #include <ctype.h>
36 #include <stdarg.h>
37 #include <values.h>
38 #include <errno.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <math.h>
42 #include <thread.h>
43 #include <synch.h>
44 #include <stdlib.h>
45 #include <fnmatch.h>
46 #include <limits.h>
47 #include <wchar.h>
48 #include <unistd.h>
49 #include "libc.h"
50 #include "stdiom.h"
51 #include "xpg6.h"
52 
53 #define	NCHARS	(1 << BITSPERBYTE)
54 
55 /* if the _IOWRT flag is set, this must be a call from sscanf */
56 #define	locgetc(cnt)	(cnt += 1, (iop->_flag & _IOWRT) ? \
57 				((*iop->_ptr == '\0') ? EOF : *iop->_ptr++) : \
58 				GETC(iop))
59 #define	locungetc(cnt, x) (cnt -= 1, (x == EOF) ? EOF : \
60 				((iop->_flag & _IOWRT) ? *(--iop->_ptr) : \
61 				    (++iop->_cnt, *(--iop->_ptr))))
62 
63 #define	wlocgetc()	((iop->_flag & _IOWRT) ? \
64 				((*iop->_ptr == '\0') ? EOF : *iop->_ptr++) : \
65 				GETC(iop))
66 #define	wlocungetc(x) ((x == EOF) ? EOF : \
67 				((iop->_flag & _IOWRT) ? *(--iop->_ptr) : \
68 				    UNGETC(x, iop)))
69 
70 #define	MAXARGS	30	/* max. number of args for fast positional paramters */
71 
72 /*
73  * stva_list is used to subvert C's restriction that a variable with an
74  * array type can not appear on the left hand side of an assignment operator.
75  * By putting the array inside a structure, the functionality of assigning to
76  * the whole array through a simple assignment is achieved..
77  */
78 typedef struct stva_list {
79 	va_list	ap;
80 } stva_list;
81 
82 static int number(int *, int *, int, int, int, int, FILE *, va_list *);
83 static int readchar(FILE *, int *);
84 static int string(int *, int *, int, int, int, char *, FILE *, va_list *);
85 static int wstring(int *, int *, int, int, int, FILE *, va_list *);
86 static int	wbrstring(int *, int *, int, int, int, FILE *,
87 	unsigned char *, va_list *);
88 #ifdef	_WIDE
89 static int	brstring(int *, int *, int, int, int, FILE *,
90 	unsigned char *, va_list *);
91 #endif
92 static int _bi_getwc(FILE *);
93 static int _bi_ungetwc(wint_t, FILE *);
94 
95 #ifdef	_WIDE
96 static int _mkarglst(const wchar_t *, stva_list, stva_list[]);
97 static wint_t	_wd_getwc(int *, FILE *);
98 static wint_t	_wd_ungetwc(int *, wchar_t, FILE *);
99 static int	_watoi(wchar_t *);
100 #else  /* _WIDE */
101 static int _mkarglst(const char *, stva_list, stva_list[]);
102 #endif /* _WIDE */
103 
104 #ifndef	_WIDE
105 int
106 _doscan(FILE *iop, const char *fmt, va_list va_Alist)
107 {
108 	int ret;
109 	rmutex_t *lk;
110 
111 	if (iop->_flag & _IOWRT)
112 		ret = __doscan_u(iop, fmt, va_Alist, 0);
113 	else {
114 		FLOCKFILE(lk, iop);
115 		ret = __doscan_u(iop, fmt, va_Alist, 0);
116 		FUNLOCKFILE(lk);
117 	}
118 	return (ret);
119 }
120 #endif  /* _WIDE */
121 
122 /* ARGSUSED3 */
123 #ifdef	_WIDE
124 int
125 __wdoscan_u(FILE *iop, const wchar_t *fmt, va_list va_Alist, int scflag)
126 #else  /* _WIDE */
127 int
128 __doscan_u(FILE *iop, const char *sfmt, va_list va_Alist, int scflag)
129 #endif /* _WIDE */
130 {
131 #ifdef	_WIDE
132 	wchar_t	ch;
133 	wchar_t	inchar, size;
134 	int	nmatch = 0, len, stow;
135 #else  /* _WIDE */
136 	int	ch;
137 	int		nmatch = 0, len, inchar, stow, size;
138 #endif /* _WIDE */
139 
140 	unsigned char	*bracket_str = NULL;
141 	int		chcount, flag_eof;
142 	char	tab[NCHARS];
143 
144 	/* variables for postional parameters */
145 #ifdef	_WIDE
146 	const wchar_t	*sformat = fmt;	/* save the beginning of the format */
147 #else  /* _WIDE */
148 	const unsigned char	*fmt = (const unsigned char *)sfmt;
149 	const char	*sformat = sfmt; /* save the beginning of the format */
150 #endif /* _WIDE */
151 	int		fpos = 1;	/* 1 if first postional parameter */
152 	stva_list	args;	/* used to step through the argument list */
153 	stva_list	sargs;	/* used to save start of the argument list */
154 	stva_list	arglst[MAXARGS];
155 					/*
156 					 * array giving the appropriate values
157 					 * for va_arg() to retrieve the
158 					 * corresponding argument:
159 					 * arglst[0] is the first argument
160 					 * arglst[1] is the second argument,etc.
161 					 */
162 	/* Check if readable stream */
163 	if (!(iop->_flag & (_IOREAD | _IORW))) {
164 		errno = EBADF;
165 		return (EOF);
166 	}
167 
168 	/*
169 	 * Initialize args and sargs to the start of the argument list.
170 	 * We don't know any portable way to copy an arbitrary C object
171 	 * so we use a system-specific routine(probably a macro) from
172 	 * stdarg.h.  (Remember that if va_list is an array, in_args will
173 	 * be a pointer and &in_args won't be what we would want for
174 	 * memcpy.)
175 	 */
176 	va_copy(args.ap, va_Alist);
177 
178 	sargs = args;
179 
180 	chcount = 0; flag_eof = 0;
181 
182 	/*
183 	 * ****************************************************
184 	 * Main loop: reads format to determine a pattern,
185 	 *		and then goes to read input stream
186 	 *		in attempt to match the pattern.
187 	 * ****************************************************
188 	 */
189 	for (; ; ) {
190 		if ((ch = *fmt++) == '\0') {
191 			return (nmatch); /* end of format */
192 		}
193 #ifdef	_WIDE
194 		if (iswspace(ch)) {
195 			if (!flag_eof) {
196 				while (iswspace(inchar =
197 				    _wd_getwc(&chcount, iop)))
198 					;
199 				if (_wd_ungetwc(&chcount, inchar, iop) == WEOF)
200 					flag_eof = 1;
201 			}
202 			continue;
203 		}
204 		if (ch != '%' || (ch = *fmt++) == '%') {
205 			if (ch == '%') {
206 				if (!flag_eof) {
207 					while (iswspace(inchar =
208 					    _wd_getwc(&chcount, iop)))
209 						;
210 					if (_wd_ungetwc(&chcount, inchar, iop)
211 					    == WEOF)
212 						flag_eof = 1;
213 				}
214 			}
215 			if ((inchar = _wd_getwc(&chcount, iop)) == ch)
216 				continue;
217 			if (_wd_ungetwc(&chcount, inchar, iop) != WEOF) {
218 				return (nmatch); /* failed to match input */
219 			}
220 			break;
221 		}
222 #else  /* _WIDE */
223 		if (isspace(ch)) {
224 			if (!flag_eof) {
225 				while (isspace(inchar = locgetc(chcount)))
226 					;
227 				if (locungetc(chcount, inchar) == EOF)
228 					flag_eof = 1;
229 
230 			}
231 			continue;
232 		}
233 		if (ch != '%' || (ch = *fmt++) == '%') {
234 			if (ch == '%') {
235 				if (!flag_eof) {
236 					while (isspace(inchar =
237 					    locgetc(chcount)))
238 						;
239 					if (locungetc(chcount, inchar) == EOF)
240 						flag_eof = 1;
241 				}
242 			}
243 			if ((inchar = locgetc(chcount)) == ch)
244 				continue;
245 			if (locungetc(chcount, inchar) != EOF) {
246 				return (nmatch); /* failed to match input */
247 			}
248 			break;
249 		}
250 #endif /* _WIDE */
251 
252 charswitch:	/* target of a goto 8-( */
253 
254 		if (ch == '*') {
255 			stow = 0;
256 			ch = *fmt++;
257 		} else
258 			stow = 1;
259 
260 #ifdef	_WIDE
261 		for (len = 0; ((ch >= 0) && (ch < 256) && isdigit(ch));
262 		    ch = *fmt++)
263 			len = len * 10 + ch - '0';
264 #else  /* _WIDE */
265 		for (len = 0; isdigit(ch); ch = *fmt++)
266 			len = len * 10 + ch - '0';
267 #endif /* _WIDE */
268 
269 		if (ch == '$') {
270 			/*
271 			 * positional parameter handling - the number
272 			 * specified in len gives the argument to which
273 			 * the next conversion should be applied.
274 			 * WARNING: This implementation of positional
275 			 * parameters assumes that the sizes of all pointer
276 			 * types are the same. (Code similar to that
277 			 * in the portable doprnt.c should be used if this
278 			 * assumption does not hold for a particular
279 			 * port.)
280 			 */
281 			if (fpos) {
282 				if (_mkarglst(sformat, sargs, arglst) != 0) {
283 					return (EOF);
284 				} else {
285 					fpos = 0;
286 				}
287 			}
288 			if (len <= MAXARGS) {
289 				args = arglst[len - 1];
290 			} else {
291 				args = arglst[MAXARGS - 1];
292 				for (len -= MAXARGS; len > 0; len--)
293 					(void) va_arg(args.ap, void *);
294 			}
295 			len = 0;
296 			ch = *fmt++;
297 			goto charswitch;
298 		}
299 
300 		if (len == 0)
301 			len = MAXINT;
302 #ifdef	_WIDE
303 		if ((size = ch) == 'l' || (size == 'h') || (size == 'L') ||
304 		    (size == 'j') || (size == 't') || (size == 'z'))
305 			ch = *fmt++;
306 #else  /* _WIDE */
307 		if ((size = ch) == 'l' || (size == 'h') || (size == 'L') ||
308 		    (size == 'w') || (size == 'j') || (size == 't') ||
309 		    (size == 'z'))
310 			ch = *fmt++;
311 #endif /* _WIDE */
312 		if (size == 'l' && ch == 'l') {
313 			size = 'm';		/* size = 'm' if long long */
314 			ch = *fmt++;
315 		} else if (size == 'h' && ch == 'h') {
316 			size = 'b';		/* use size = 'b' if char */
317 			ch = *fmt++;
318 		} else if ((size == 't') || (size == 'z')) {
319 			size = 'l';
320 		} else if (size == 'j') {
321 #ifndef _LP64
322 			/* check scflag for size of u/intmax_t (32-bit libc) */
323 			if (!(scflag & _F_INTMAX32)) {
324 #endif
325 				size = 'm';
326 #ifndef _LP64
327 			}
328 #endif
329 		}
330 		if (ch == '\0') {
331 			return (EOF);		/* unexpected end of format */
332 		}
333 #ifdef	_WIDE
334 		if (ch == '[') {
335 			wchar_t	c;
336 			size_t	len;
337 			int	negflg = 0;
338 			wchar_t	*p;
339 			wchar_t	*wbracket_str;
340 			size_t	wlen, clen;
341 
342 			/* p points to the address of '[' */
343 			p = (wchar_t *)fmt - 1;
344 			len = 0;
345 			if (*fmt == '^') {
346 				len++;
347 				fmt++;
348 				negflg = 1;
349 			}
350 			if (((c = *fmt) == ']') || (c == '-')) {
351 				len++;
352 				fmt++;
353 			}
354 			while ((c = *fmt) != ']') {
355 				if (c == '\0') {
356 					return (EOF); /* unexpected EOF */
357 				} else {
358 					len++;
359 					fmt++;
360 				}
361 			}
362 			fmt++;
363 			len += 2;
364 			wbracket_str = (wchar_t *)
365 			    malloc(sizeof (wchar_t) * (len + 1));
366 			if (wbracket_str == NULL) {
367 				errno = ENOMEM;
368 				return (EOF);
369 			} else {
370 				(void) wmemcpy(wbracket_str,
371 				    (const wchar_t *)p, len);
372 				*(wbracket_str + len) = L'\0';
373 				if (negflg && *(wbracket_str + 1) == '^') {
374 					*(wbracket_str + 1) = L'!';
375 				}
376 			}
377 			wlen = wcslen(wbracket_str);
378 			clen = wcstombs((char *)NULL, wbracket_str, 0);
379 			if (clen == (size_t)-1) {
380 				free(wbracket_str);
381 				return (EOF);
382 			}
383 			bracket_str = (unsigned char *)
384 			    malloc(sizeof (unsigned char) * (clen + 1));
385 			if (bracket_str == NULL) {
386 				free(wbracket_str);
387 				errno = ENOMEM;
388 				return (EOF);
389 			}
390 			clen = wcstombs((char *)bracket_str, wbracket_str,
391 			    wlen + 1);
392 			free(wbracket_str);
393 			if (clen == (size_t)-1) {
394 				free(bracket_str);
395 				return (EOF);
396 			}
397 		}
398 #else  /* _WIDE */
399 		if (ch == '[') {
400 			if (size == 'l') {
401 				int	c, len, i;
402 				int	negflg = 0;
403 				unsigned char	*p;
404 
405 				p = (unsigned char *)(fmt - 1);
406 				len = 0;
407 				if (*fmt == '^') {
408 					len++;
409 					fmt++;
410 					negflg = 1;
411 				}
412 				if (((c = *fmt) == ']') || (c == '-')) {
413 					len++;
414 					fmt++;
415 				}
416 				while ((c = *fmt) != ']') {
417 					if (c == '\0') {
418 						return (EOF);
419 					} else if (isascii(c)) {
420 						len++;
421 						fmt++;
422 					} else {
423 						i = mblen((const char *)fmt,
424 						    MB_CUR_MAX);
425 						if (i <= 0) {
426 							return (EOF);
427 						} else {
428 							len += i;
429 							fmt += i;
430 						}
431 					}
432 				}
433 				fmt++;
434 				len += 2;
435 				bracket_str = (unsigned char *)
436 				    malloc(sizeof (unsigned char) * (len + 1));
437 				if (bracket_str == NULL) {
438 					errno = ENOMEM;
439 					return (EOF);
440 				} else {
441 					(void) strncpy((char *)bracket_str,
442 					    (const char *)p, len);
443 					*(bracket_str + len) = '\0';
444 					if (negflg &&
445 					    *(bracket_str + 1) == '^') {
446 						*(bracket_str + 1) = '!';
447 					}
448 				}
449 			} else {
450 				int	t = 0;
451 				int	b, c, d;
452 
453 				if (*fmt == '^') {
454 					t++;
455 					fmt++;
456 				}
457 				(void) memset(tab, !t, NCHARS);
458 				if ((c = *fmt) == ']' || c == '-') {
459 					tab[c] = t;
460 					fmt++;
461 				}
462 
463 				while ((c = *fmt) != ']') {
464 					if (c == '\0') {
465 						return (EOF);
466 					}
467 					b = *(fmt - 1);
468 					d = *(fmt + 1);
469 					if ((c == '-') && (d != ']') &&
470 					    (b < d)) {
471 						(void) memset(&tab[b], t,
472 						    d - b + 1);
473 						fmt += 2;
474 					} else {
475 						tab[c] = t;
476 						fmt++;
477 					}
478 				}
479 				fmt++;
480 			}
481 		}
482 #endif /* _WIDE */
483 
484 #ifdef	_WIDE
485 		if ((ch >= 0) && (ch < 256) &&
486 		    isupper((int)ch)) { /* no longer documented */
487 			if (_lib_version == c_issue_4) {
488 				if (size != 'm' && size != 'L')
489 					size = 'l';
490 			}
491 			ch = _tolower((int)ch);
492 		}
493 		if (ch != 'n' && !flag_eof) {
494 			if (ch != 'c' && ch != 'C' && ch != '[') {
495 				while (iswspace(inchar =
496 				    _wd_getwc(&chcount, iop)))
497 					;
498 				if (_wd_ungetwc(&chcount, inchar, iop) == WEOF)
499 					break;
500 
501 			}
502 		}
503 #else  /* _WIDE */
504 		if (isupper(ch)) { /* no longer documented */
505 			if (_lib_version == c_issue_4) {
506 				if (size != 'm' && size != 'L')
507 					size = 'l';
508 			}
509 			ch = _tolower(ch);
510 		}
511 		if (ch != 'n' && !flag_eof) {
512 			if (ch != 'c' && ch != 'C' && ch != '[') {
513 				while (isspace(inchar = locgetc(chcount)))
514 					;
515 				if (locungetc(chcount, inchar) == EOF)
516 					break;
517 			}
518 		}
519 #endif /* _WIDE */
520 
521 		switch (ch) {
522 		case 'C':
523 		case 'S':
524 		case 'c':
525 		case 's':
526 #ifdef	_WIDE
527 			if ((size == 'l') || (size == 'C') || (size == 'S'))
528 #else  /* _WIDE */
529 			if ((size == 'w') || (size == 'l') || (size == 'C') ||
530 			    (size == 'S'))
531 #endif /* _WIDE */
532 			{
533 				size = wstring(&chcount, &flag_eof, stow,
534 				    (int)ch, len, iop, &args.ap);
535 			} else {
536 				size = string(&chcount, &flag_eof, stow,
537 				    (int)ch, len, tab, iop, &args.ap);
538 			}
539 			break;
540 		case '[':
541 			if (size == 'l') {
542 				size = wbrstring(&chcount, &flag_eof, stow,
543 				    (int)ch, len, iop, bracket_str, &args.ap);
544 				free(bracket_str);
545 				bracket_str = NULL;
546 			} else {
547 #ifdef	_WIDE
548 				size = brstring(&chcount, &flag_eof, stow,
549 				    (int)ch, len, iop, bracket_str, &args.ap);
550 				free(bracket_str);
551 				bracket_str = NULL;
552 #else  /* _WIDE */
553 				size = string(&chcount, &flag_eof, stow,
554 				    ch, len, tab, iop, &args.ap);
555 #endif /* _WIDE */
556 			}
557 			break;
558 
559 		case 'n':
560 			if (stow == 0)
561 				continue;
562 			if (size == 'b')	/* char */
563 				*va_arg(args.ap, char *) = (char)chcount;
564 			else if (size == 'h')
565 				*va_arg(args.ap, short *) = (short)chcount;
566 			else if (size == 'l')
567 				*va_arg(args.ap, long *) = (long)chcount;
568 			else if (size == 'm') /* long long */
569 				*va_arg(args.ap, long long *) =
570 				    (long long) chcount;
571 			else
572 				*va_arg(args.ap, int *) = (int)chcount;
573 			continue;
574 
575 		case 'i':
576 		default:
577 			size = number(&chcount, &flag_eof, stow, (int)ch,
578 			    len, (int)size, iop, &args.ap);
579 			break;
580 		}
581 		if (size)
582 			nmatch += stow;
583 		else {
584 			return ((flag_eof && !nmatch) ? EOF : nmatch);
585 		}
586 		continue;
587 	}
588 	if (bracket_str)
589 		free(bracket_str);
590 	return (nmatch != 0 ? nmatch : EOF); /* end of input */
591 }
592 
593 /* ****************************************************************** */
594 /* Functions to read the input stream in an attempt to match incoming */
595 /* data to the current pattern from the main loop of _doscan(). */
596 /* ****************************************************************** */
597 static int
598 number(int *chcount, int *flag_eof, int stow, int type, int len, int size,
599 	FILE *iop, va_list *listp)
600 {
601 	char	numbuf[64];
602 	char	*np = numbuf;
603 	int	c, base, inchar, lookahead;
604 	int	digitseen = 0, floater = 0, negflg = 0;
605 	int	lc;
606 	long long	lcval = 0LL;
607 
608 	switch (type) {
609 	case 'e':
610 	case 'f':
611 	case 'g':
612 		/*
613 		 * lc = 0 corresponds to c90 mode: do not recognize
614 		 *	hexadecimal fp strings; attempt to push back
615 		 *	all unused characters read
616 		 *
617 		 * lc = -1 corresponds to c99 mode: recognize hexa-
618 		 *	decimal fp strings; push back at most one
619 		 *	unused character
620 		 */
621 		lc = (__xpg6 & _C99SUSv3_recognize_hexfp)? -1 : 0;
622 		floater = 1;
623 		break;
624 
625 	case 'a':
626 		lc = -1;
627 		floater = 1;
628 		break;
629 
630 	case 'd':
631 	case 'u':
632 	case 'i':
633 		base = 10;
634 		break;
635 	case 'o':
636 		base = 8;
637 		break;
638 	case 'p':
639 #ifdef	_LP64
640 		size = 'l'; /* pointers are long in LP64 */
641 #endif	/*	_LP64	*/
642 		/* FALLTHROUGH */
643 	case 'x':
644 		base = 16;
645 		break;
646 	default:
647 		return (0); /* unrecognized conversion character */
648 	}
649 
650 	if (floater != 0) {
651 		/*
652 		 * Handle floating point with
653 		 * file_to_decimal.
654 		 */
655 		decimal_mode		dm;
656 		decimal_record		dr;
657 		fp_exception_field_type	efs;
658 		enum decimal_string_form form;
659 		char			*echar;
660 		int			nread;
661 		char			buffer[1024+1];
662 		char			*nb = buffer;
663 
664 		if (len > 1024)
665 			len = 1024;
666 		file_to_decimal(&nb, len, lc, &dr, &form, &echar, iop, &nread);
667 		if (lc == -1) {
668 			/*
669 			 * In C99 mode, the entire string read has to be
670 			 * accepted in order to qualify as a match
671 			 */
672 			if (nb != buffer + nread)
673 				form = invalid_form;
674 		}
675 		if (stow && (form != invalid_form)) {
676 #if defined(__sparc)
677 			dm.rd = _QgetRD();
678 			if (size == 'L') {		/* long double */
679 				if ((int)form < 0)
680 					__hex_to_quadruple(&dr, dm.rd,
681 					    va_arg(*listp, quadruple *), &efs);
682 				else
683 					decimal_to_quadruple(
684 					    va_arg(*listp, quadruple *),
685 					    &dm, &dr, &efs);
686 			}
687 #elif defined(__i386) || defined(__amd64)
688 			dm.rd = __xgetRD();
689 			if (size == 'L') {		/* long double */
690 				if ((int)form < 0)
691 					__hex_to_extended(&dr, dm.rd,
692 					    va_arg(*listp, extended *), &efs);
693 				else
694 					decimal_to_extended(
695 					    va_arg(*listp, extended *),
696 					    &dm, &dr, &efs);
697 			}
698 #else
699 #error Unknown architecture
700 #endif
701 			else if (size == 'l') {		/* double */
702 				if ((int)form < 0)
703 					__hex_to_double(&dr, dm.rd,
704 					    va_arg(*listp, double *), &efs);
705 				else
706 					decimal_to_double(
707 					    va_arg(*listp, double *),
708 					    &dm, &dr, &efs);
709 			} else {			/* float */
710 				if ((int)form < 0)
711 					__hex_to_single(&dr, dm.rd,
712 					    va_arg(*listp, single *), &efs);
713 				else
714 					decimal_to_single((single *)
715 					    va_arg(*listp, single *),
716 					    &dm, &dr, &efs);
717 			}
718 			if ((efs & (1 << fp_overflow)) != 0) {
719 				errno = ERANGE;
720 			}
721 			if ((efs & (1 << fp_underflow)) != 0) {
722 				errno = ERANGE;
723 			}
724 		}
725 		(*chcount) += nread;	/* Count characters read. */
726 		c = locgetc((*chcount));
727 		if (locungetc((*chcount), c) == EOF)
728 			*flag_eof = 1;
729 		return ((form == invalid_form) ? 0 : 1);
730 				/* successful match if non-zero */
731 	}
732 
733 	switch (c = locgetc((*chcount))) {
734 	case '-':
735 		negflg++;
736 		/* FALLTHROUGH */
737 	case '+':
738 		if (--len <= 0)
739 			break;
740 		if ((c = locgetc((*chcount))) != '0')
741 			break;
742 		/* FALLTHROUGH */
743 	case '0':
744 		/*
745 		 * If %i or %x, the characters 0x or 0X may optionally precede
746 		 * the sequence of letters and digits (base 16).
747 		 */
748 		if ((type != 'i' && type != 'x') || (len <= 1))
749 			break;
750 		if (((inchar = locgetc((*chcount))) == 'x') ||
751 		    (inchar == 'X')) {
752 			lookahead = readchar(iop, chcount);
753 			if (isxdigit(lookahead)) {
754 				base = 16;
755 
756 				if (len <= 2) {
757 					(void) locungetc((*chcount), lookahead);
758 					/* Take into account the 'x' */
759 					len -= 1;
760 				} else {
761 					c = lookahead;
762 					/* Take into account '0x' */
763 					len -= 2;
764 				}
765 			} else {
766 				(void) locungetc((*chcount), lookahead);
767 				(void) locungetc((*chcount), inchar);
768 			}
769 		} else {
770 			/* inchar wans't 'x'. */
771 			(void) locungetc((*chcount), inchar); /* Put it back. */
772 			if (type == 'i') /* Only %i accepts an octal. */
773 				base = 8;
774 		}
775 	}
776 	for (; --len  >= 0; *np++ = (char)c, c = locgetc((*chcount))) {
777 		if (np > numbuf + 62) {
778 			errno = ERANGE;
779 			return (0);
780 		}
781 		if (isdigit(c) || base == 16 && isxdigit(c)) {
782 			int digit = c - (isdigit(c) ? '0' :
783 			    isupper(c) ? 'A' - 10 : 'a' - 10);
784 			if (digit >= base)
785 				break;
786 			if (stow)
787 				lcval = base * lcval + digit;
788 			digitseen++;
789 			continue;
790 		}
791 		break;
792 	}
793 
794 	if (stow && digitseen) {
795 		/* suppress possible overflow on 2's-comp negation */
796 		if (negflg && lcval != (1ULL << 63))
797 			lcval = -lcval;
798 		switch (size) {
799 			case 'm':
800 				*va_arg(*listp, long long *) = lcval;
801 				break;
802 			case 'l':
803 				*va_arg(*listp, long *) = (long)lcval;
804 				break;
805 			case 'h':
806 				*va_arg(*listp, short *) = (short)lcval;
807 				break;
808 			case 'b':
809 				*va_arg(*listp, char *) = (char)lcval;
810 				break;
811 			default:
812 				*va_arg(*listp, int *) = (int)lcval;
813 				break;
814 		}
815 	}
816 	if (locungetc((*chcount), c) == EOF)
817 		*flag_eof = 1;
818 	return (digitseen); /* successful match if non-zero */
819 }
820 
821 /* Get a character. If not using sscanf and at the buffer's end */
822 /* then do a direct read(). Characters read via readchar() */
823 /* can be  pushed back on the input stream by locungetc((*chcount),) */
824 /* since there is padding allocated at the end of the stream buffer. */
825 static int
826 readchar(FILE *iop, int *chcount)
827 {
828 	int	inchar;
829 	char	buf[1];
830 
831 	if ((iop->_flag & _IOWRT) || (iop->_cnt != 0)) {
832 		inchar = locgetc((*chcount));
833 	} else {
834 		if (_xread(iop, buf, 1) != 1)
835 			return (EOF);
836 		inchar = (int)buf[0];
837 		(*chcount) += 1;
838 	}
839 	return (inchar);
840 }
841 
842 static int
843 string(int *chcount, int *flag_eof, int stow, int type, int len, char *tab,
844 	FILE *iop, va_list *listp)
845 {
846 	int	ch;
847 	char	*ptr;
848 	char	*start;
849 
850 	start = ptr = stow ? va_arg(*listp, char *) : NULL;
851 	if (((type == 'c') || (type == 'C')) && len == MAXINT)
852 		len = 1;
853 #ifdef	_WIDE
854 	while ((ch = locgetc((*chcount))) != EOF &&
855 	    !(((type == 's') || (type == 'S')) && isspace(ch))) {
856 #else  /* _WIDE */
857 	while ((ch = locgetc((*chcount))) != EOF &&
858 	    !(((type == 's') || (type == 'S')) &&
859 	    isspace(ch) || type == '[' && tab[ch])) {
860 #endif /* _WIDE */
861 		if (stow)
862 			*ptr = (char)ch;
863 		ptr++;
864 		if (--len <= 0)
865 			break;
866 	}
867 	if (ch == EOF) {
868 		(*flag_eof) = 1;
869 		(*chcount) -= 1;
870 	} else if (len > 0 && locungetc((*chcount), ch) == EOF)
871 		(*flag_eof) = 1;
872 	if (ptr == start)
873 		return (0);	/* no match */
874 	if (stow && ((type != 'c') && (type != 'C')))
875 		*ptr = '\0';
876 	return (1);	/* successful match */
877 }
878 
879 /* This function initializes arglst, to contain the appropriate */
880 /* va_list values for the first MAXARGS arguments. */
881 /* WARNING: this code assumes that the sizes of all pointer types */
882 /* are the same. (Code similar to that in the portable doprnt.c */
883 /* should be used if this assumption is not true for a */
884 /* particular port.) */
885 
886 #ifdef	_WIDE
887 static int
888 _mkarglst(const wchar_t *fmt, stva_list args, stva_list arglst[])
889 #else  /* _WIDE */
890 static int
891 _mkarglst(const char *fmt, stva_list args, stva_list arglst[])
892 #endif /* _WIDE */
893 {
894 #ifdef	_WIDE
895 #define	STRCHR	wcschr
896 #define	STRSPN	wcsspn
897 #define	ATOI(x)	_watoi((wchar_t *)x)
898 #define	SPNSTR1	L"01234567890"
899 #define	SPNSTR2	L"# +-.0123456789hL$"
900 #else  /* _WIDE */
901 #define	STRCHR	strchr
902 #define	STRSPN	strspn
903 #define	ATOI(x)	atoi(x)
904 #define	SPNSTR1	"01234567890"
905 #define	SPNSTR2	"# +-.0123456789hL$"
906 #endif /* _WIDE */
907 
908 	int maxnum, curargno;
909 	size_t n;
910 
911 	maxnum = -1;
912 	curargno = 0;
913 
914 	while ((fmt = STRCHR(fmt, '%')) != NULL) {
915 		fmt++;	/* skip % */
916 		if (*fmt == '*' || *fmt == '%')
917 			continue;
918 		if (fmt[n = STRSPN(fmt, SPNSTR1)] == L'$') {
919 			/* convert to zero base */
920 			curargno = ATOI(fmt) - 1;
921 			fmt += n + 1;
922 		}
923 
924 		if (maxnum < curargno)
925 			maxnum = curargno;
926 		curargno++;	/* default to next in list */
927 
928 		fmt += STRSPN(fmt, SPNSTR2);
929 		if (*fmt == '[') {
930 			int	i;
931 			fmt++; /* has to be at least on item in scan list */
932 			if (*fmt == ']') {
933 				fmt++;
934 			}
935 			while (*fmt != ']') {
936 				if (*fmt == L'\0') {
937 					return (-1); /* bad format */
938 #ifdef	_WIDE
939 				} else {
940 					fmt++;
941 				}
942 #else  /* _WIDE */
943 				} else if (isascii(*fmt)) {
944 					fmt++;
945 				} else {
946 					i = mblen((const char *)
947 					    fmt, MB_CUR_MAX);
948 					if (i <= 0) {
949 						return (-1);
950 					} else {
951 						fmt += i;
952 					}
953 				}
954 #endif /* _WIDE */
955 			}
956 		}
957 	}
958 	if (maxnum > MAXARGS)
959 		maxnum = MAXARGS;
960 	for (n = 0; n <= maxnum; n++) {
961 		arglst[n] = args;
962 		(void) va_arg(args.ap, void *);
963 	}
964 	return (0);
965 }
966 
967 
968 /*
969  * For wide character handling
970  */
971 
972 #ifdef	_WIDE
973 static int
974 wstring(int *chcount, int *flag_eof, int stow, int type,
975 	int len, FILE *iop, va_list *listp)
976 {
977 	wint_t	wch;
978 	wchar_t	*ptr;
979 	wchar_t	*wstart;
980 	int	dummy;
981 
982 	wstart = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
983 
984 	if ((type == 'c') && len == MAXINT)
985 		len = 1;
986 	while (((wch = _wd_getwc(chcount, iop)) != WEOF) &&
987 	    !(type == 's' && iswspace(wch))) {
988 		if (stow)
989 			*ptr = wch;
990 		ptr++;
991 		if (--len <= 0)
992 			break;
993 	}
994 	if (wch == WEOF) {
995 		*flag_eof = 1;
996 		(*chcount) -= 1;
997 	} else {
998 		if (len > 0 && _wd_ungetwc(chcount, wch, iop) == WEOF)
999 			*flag_eof = 1;
1000 	}
1001 	if (ptr == wstart)
1002 		return (0); /* no match */
1003 	if (stow && (type != 'c'))
1004 		*ptr = '\0';
1005 	return (1); /* successful match */
1006 }
1007 
1008 #else  /* _WIDE */
1009 static int
1010 wstring(int *chcount, int *flag_eof, int stow, int type, int len, FILE *iop,
1011 	va_list *listp)
1012 {
1013 	int	wch;
1014 	wchar_t	*ptr;
1015 	wchar_t	*wstart;
1016 
1017 	wstart = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
1018 
1019 	if ((type == 'c') && len == MAXINT)
1020 		len = 1;
1021 	while (((wch = _bi_getwc(iop)) != EOF) &&
1022 	    !(type == 's' && (isascii(wch) ? isspace(wch) : 0))) {
1023 		(*chcount) += _scrwidth((wchar_t)wch);
1024 		if (stow)
1025 			*ptr = wch;
1026 		ptr++;
1027 		if (--len <= 0)
1028 			break;
1029 	}
1030 	if (wch == EOF) {
1031 		(*flag_eof) = 1;
1032 		(*chcount) -= 1;
1033 	} else {
1034 		if (len > 0 && _bi_ungetwc(wch, iop) == EOF)
1035 			(*flag_eof) = 1;
1036 	}
1037 	if (ptr == wstart)
1038 		return (0); /* no match */
1039 	if (stow && (type != 'c'))
1040 		*ptr = '\0';
1041 	return (1); /* successful match */
1042 }
1043 #endif /* _WIDE */
1044 
1045 #ifdef	_WIDE
1046 static wint_t
1047 _wd_getwc(int *chcount, FILE *iop)
1048 {
1049 	wint_t	wc;
1050 	int	len;
1051 
1052 	if (!(iop->_flag & _IOWRT)) {
1053 		/* call from fwscanf, wscanf */
1054 		wc = __fgetwc_xpg5(iop);
1055 		(*chcount)++;
1056 		return (wc);
1057 	} else {
1058 		/* call from swscanf */
1059 		if (*iop->_ptr == '\0')
1060 			return (WEOF);
1061 		len = mbtowc((wchar_t *)&wc, (const char *)iop->_ptr,
1062 		    MB_CUR_MAX);
1063 		if (len == -1)
1064 			return (WEOF);
1065 		iop->_ptr += len;
1066 		(*chcount)++;
1067 		return (wc);
1068 	}
1069 }
1070 
1071 static wint_t
1072 _wd_ungetwc(int *chcount, wchar_t wc, FILE *iop)
1073 {
1074 	wint_t	ret;
1075 	int	len;
1076 	char	mbs[MB_LEN_MAX];
1077 
1078 	if (wc == WEOF)
1079 		return (WEOF);
1080 
1081 	if (!(iop->_flag & _IOWRT)) {
1082 		/* call from fwscanf, wscanf */
1083 		ret = __ungetwc_xpg5((wint_t)wc, iop);
1084 		if (ret != (wint_t)wc)
1085 			return (WEOF);
1086 		(*chcount)--;
1087 		return (ret);
1088 	} else {
1089 		/* call from swscanf */
1090 		len = wctomb(mbs, wc);
1091 		if (len == -1)
1092 			return (WEOF);
1093 		iop->_ptr -= len;
1094 		(*chcount)--;
1095 		return ((wint_t)wc);
1096 	}
1097 }
1098 
1099 static int
1100 _watoi(wchar_t *fmt)
1101 {
1102 	int	n = 0;
1103 	wchar_t	ch;
1104 
1105 	ch = *fmt;
1106 	if ((ch >= 0) && (ch < 256) && isdigit((int)ch)) {
1107 		n = ch - '0';
1108 		while (((ch = *++fmt) >= 0) && (ch < 256) &&
1109 		    isdigit((int)ch)) {
1110 			n *= 10;
1111 			n += ch - '0';
1112 		}
1113 	}
1114 	return (n);
1115 }
1116 #endif /* _WIDE */
1117 
1118 /* ARGSUSED3 */
1119 static int
1120 wbrstring(int *chcount, int *flag_eof, int stow, int type,
1121 	int len, FILE *iop, unsigned char *brstr, va_list *listp)
1122 {
1123 	wint_t	wch;
1124 	int	i;
1125 	char	str[MB_LEN_MAX + 1]; /* include null termination */
1126 	wchar_t	*ptr, *start;
1127 #ifdef	_WIDE
1128 	int	dummy;
1129 #endif /* _WIDE */
1130 
1131 	start = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
1132 
1133 #ifdef	_WIDE
1134 	while ((wch = _wd_getwc(&dummy, iop)) != WEOF) {
1135 #else  /* _WIDE */
1136 	while ((wch = _bi_getwc(iop)) != WEOF) {
1137 #endif /* _WIDE */
1138 		i = wctomb(str, (wchar_t)wch);
1139 		if (i == -1) {
1140 			return (0);
1141 		}
1142 		str[i] = '\0';
1143 		if (fnmatch((const char *)brstr, (const char *)str,
1144 		    FNM_NOESCAPE)) {
1145 			break;
1146 		} else {
1147 			if (len > 0) {
1148 #ifdef	_WIDE
1149 				(*chcount)++;
1150 #else  /* _WIDE */
1151 				(*chcount) += _scrwidth(wch);
1152 #endif /* _WIDE */
1153 				len--;
1154 				if (stow) {
1155 					*ptr = wch;
1156 				}
1157 				ptr++;
1158 				if (len <= 0)
1159 					break;
1160 			} else {
1161 				break;
1162 			}
1163 		}
1164 	}
1165 	if (wch == WEOF) {
1166 		*flag_eof = 1;
1167 	} else {
1168 #ifdef	_WIDE
1169 		if (len > 0 && _wd_ungetwc(&dummy, wch, iop) == WEOF)
1170 #else  /* _WIDE */
1171 		if (len > 0 && _bi_ungetwc(wch, iop) == WEOF)
1172 #endif /* _WIDE */
1173 			*flag_eof = 1;
1174 	}
1175 	if (ptr == start)
1176 		return (0);				/* no match */
1177 	if (stow)
1178 		*ptr = L'\0';
1179 	return (1);					/* successful match */
1180 }
1181 
1182 #ifdef	_WIDE
1183 static int
1184 brstring(int *chcount, int *flag_eof, int stow, int type,
1185 	int len, FILE *iop, unsigned char *brstr, va_list *listp)
1186 {
1187 	wint_t	wch;
1188 	int	i;
1189 	char	str[MB_LEN_MAX + 1]; /* include null termination */
1190 	char	*ptr, *start, *p;
1191 	int	dummy;
1192 
1193 	start = ptr = stow ? va_arg(*listp, char *) : NULL;
1194 
1195 	while ((wch = _wd_getwc(&dummy, iop)) != WEOF) {
1196 		p = str;
1197 		i = wctomb(str, (wchar_t)wch);
1198 		if (i == -1) {
1199 			return (0);
1200 		}
1201 		str[i] = '\0';
1202 		if (fnmatch((const char *)brstr, (const char *)str,
1203 		    FNM_NOESCAPE)) {
1204 			break;
1205 		} else {
1206 			if (len >= i) {
1207 				(*chcount)++;
1208 				len -= i;
1209 				if (stow) {
1210 					while (i-- > 0) {
1211 						*ptr++ = *p++;
1212 					}
1213 				} else {
1214 					while (i-- > 0) {
1215 						ptr++;
1216 					}
1217 				}
1218 				if (len <= 0)
1219 					break;
1220 			} else {
1221 				break;
1222 			}
1223 		}
1224 	}
1225 	if (wch == WEOF) {
1226 		*flag_eof = 1;
1227 	} else {
1228 		if (len > 0 && _wd_ungetwc(&dummy, wch, iop) == WEOF)
1229 			*flag_eof = 1;
1230 	}
1231 	if (ptr == start)
1232 		return (0);				/* no match */
1233 	if (stow)
1234 		*ptr = '\0';
1235 	return (1);					/* successful match */
1236 }
1237 #endif /* _WIDE */
1238 
1239 /*
1240  * Locally define getwc and ungetwc
1241  */
1242 static int
1243 _bi_getwc(FILE *iop)
1244 {
1245 	int c;
1246 	wchar_t intcode;
1247 	int i, nbytes, cur_max;
1248 	char buff[MB_LEN_MAX];
1249 
1250 	if ((c = wlocgetc()) == EOF)
1251 		return (WEOF);
1252 
1253 	if (isascii(c))	/* ASCII code */
1254 		return ((wint_t)c);
1255 
1256 	buff[0] = (char)c;
1257 
1258 	cur_max = (int)MB_CUR_MAX;
1259 	/* MB_CUR_MAX doen't exeed the value of MB_LEN_MAX */
1260 	/* So we use MB_CUR_MAX instead of MB_LEN_MAX for */
1261 	/* improving the performance. */
1262 	for (i = 1; i < cur_max; i++) {
1263 		c = wlocgetc();
1264 		if (c == '\n') {
1265 			(void) wlocungetc(c);
1266 			break;
1267 		}
1268 		if (c == EOF) {
1269 			/* this still may be a valid multibyte character */
1270 			break;
1271 		}
1272 		buff[i] = (char)c;
1273 	}
1274 
1275 	if ((nbytes = mbtowc(&intcode, buff, i)) == -1) {
1276 		/*
1277 		 * If mbtowc fails, the input was not a legal character.
1278 		 *	ungetc all but one character.
1279 		 *
1280 		 * Note:  the number of pushback characters that
1281 		 *	ungetc() can handle must be >= (MB_LEN_MAX - 1).
1282 		 *	In Solaris 2.x, the number of pushback
1283 		 *	characters is 4.
1284 		 */
1285 		while (i-- > 1) {
1286 			(void) wlocungetc((signed char)buff[i]);
1287 		}
1288 		errno = EILSEQ;
1289 		return (WEOF); /* Illegal EUC sequence. */
1290 	}
1291 
1292 	while (i-- > nbytes) {
1293 		/*
1294 		 * Note:  the number of pushback characters that
1295 		 *	ungetc() can handle must be >= (MB_LEN_MAX - 1).
1296 		 *	In Solaris 2.x, the number of pushback
1297 		 *	characters is 4.
1298 		 */
1299 		(void) wlocungetc((signed char)buff[i]);
1300 	}
1301 	return ((int)intcode);
1302 }
1303 
1304 static int
1305 _bi_ungetwc(wint_t wc, FILE *iop)
1306 {
1307 	char mbs[MB_LEN_MAX];
1308 	unsigned char *p;
1309 	int n;
1310 
1311 	if ((wc == WEOF) || ((iop->_flag & _IOREAD) == 0))
1312 		return (WEOF);
1313 
1314 	n = wctomb(mbs, (wchar_t)wc);
1315 	if (n <= 0)
1316 		return (WEOF);
1317 
1318 	if (iop->_ptr <= iop->_base) {
1319 		if (iop->_base == NULL) {
1320 			return (WEOF);
1321 		}
1322 		if ((iop->_ptr == iop->_base) && (iop->_cnt == 0)) {
1323 			++iop->_ptr;
1324 		} else if ((iop->_ptr - n) < (iop->_base - PUSHBACK)) {
1325 			return (WEOF);
1326 		}
1327 	}
1328 
1329 	p = (unsigned char *)(mbs+n-1); /* p points the last byte */
1330 	/* if _IOWRT is set to iop->_flag, it means this is */
1331 	/* an invocation from sscanf(), and in that time we */
1332 	/* don't touch iop->_cnt.  Otherwise, which means an */
1333 	/* invocation from fscanf() or scanf(), we touch iop->_cnt */
1334 	if ((iop->_flag & _IOWRT) == 0) {
1335 		/* scanf() and fscanf() */
1336 		iop->_cnt += n;
1337 		while (n--) {
1338 			*--iop->_ptr = *(p--);
1339 		}
1340 	} else {
1341 		/* sscanf() */
1342 		iop->_ptr -= n;
1343 	}
1344 	return (wc);
1345 }
1346