xref: /titanic_50/usr/src/lib/libc/port/stdio/doscan.c (revision 14ea4bb737263733ad80a36b4f73f681c30a6b45)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1988 AT&T	*/
23 /*	  All Rights Reserved  	*/
24 
25 
26 /*
27  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include "synonyms.h"
34 #include <sys/types.h>
35 #include "mtlib.h"
36 #include "file64.h"
37 #include <stdio.h>
38 #include <ctype.h>
39 #include <stdarg.h>
40 #include <values.h>
41 #include <errno.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <math.h>
45 #include <thread.h>
46 #include <synch.h>
47 #include <stdlib.h>
48 #include <fnmatch.h>
49 #include <limits.h>
50 #include <wchar.h>
51 #include <unistd.h>
52 #include "libc.h"
53 #include "stdiom.h"
54 #include "xpg6.h"
55 
56 #define	NCHARS	(1 << BITSPERBYTE)
57 
58 /* if the _IOWRT flag is set, this must be a call from sscanf */
59 #define	locgetc(cnt)	(cnt += 1, (iop->_flag & _IOWRT) ? \
60 				((*iop->_ptr == '\0') ? EOF : *iop->_ptr++) : \
61 				GETC(iop))
62 #define	locungetc(cnt, x) (cnt -= 1, (x == EOF) ? EOF : \
63 				((iop->_flag & _IOWRT) ? *(--iop->_ptr) : \
64 				    (++iop->_cnt, *(--iop->_ptr))))
65 
66 #define	wlocgetc()	((iop->_flag & _IOWRT) ? \
67 				((*iop->_ptr == '\0') ? EOF : *iop->_ptr++) : \
68 				GETC(iop))
69 #define	wlocungetc(x) ((x == EOF) ? EOF : \
70 				((iop->_flag & _IOWRT) ? *(--iop->_ptr) : \
71 				    UNGETC(x, iop)))
72 
73 #define	MAXARGS	30	/* max. number of args for fast positional paramters */
74 
75 /*
76  * stva_list is used to subvert C's restriction that a variable with an
77  * array type can not appear on the left hand side of an assignment operator.
78  * By putting the array inside a structure, the functionality of assigning to
79  * the whole array through a simple assignment is achieved..
80  */
81 typedef struct stva_list {
82 	va_list	ap;
83 } stva_list;
84 
85 static int number(int *, int *, int, int, int, int, FILE *, va_list *);
86 static int readchar(FILE *, int *);
87 static int string(int *, int *, int, int, int, char *, FILE *, va_list *);
88 static int wstring(int *, int *, int, int, int, FILE *, va_list *);
89 static int	wbrstring(int *, int *, int, int, int, FILE *,
90 	unsigned char *, va_list *);
91 #ifdef	_WIDE
92 static int	brstring(int *, int *, int, int, int, FILE *,
93 	unsigned char *, va_list *);
94 #endif
95 static int _bi_getwc(FILE *);
96 static int _bi_ungetwc(wint_t, FILE *);
97 
98 #ifdef	_WIDE
99 static int _mkarglst(const wchar_t *, stva_list, stva_list[]);
100 static wint_t	_wd_getwc(int *, FILE *);
101 static wint_t	_wd_ungetwc(int *, wchar_t, FILE *);
102 static int	_watoi(wchar_t *);
103 #else  /* _WIDE */
104 static int _mkarglst(const char *, stva_list, stva_list[]);
105 #endif /* _WIDE */
106 
107 #ifndef	_WIDE
108 int
109 _doscan(FILE *iop, const char *fmt, va_list va_Alist)
110 {
111 	int ret;
112 	rmutex_t *lk;
113 
114 	if (iop->_flag & _IOWRT)
115 		ret = __doscan_u(iop, fmt, va_Alist, 0);
116 	else {
117 		FLOCKFILE(lk, iop);
118 		ret = __doscan_u(iop, fmt, va_Alist, 0);
119 		FUNLOCKFILE(lk);
120 	}
121 	return (ret);
122 }
123 #endif  /* _WIDE */
124 
125 /* ARGSUSED3 */
126 #ifdef	_WIDE
127 int
128 __wdoscan_u(FILE *iop, const wchar_t *fmt, va_list va_Alist, int scflag)
129 #else  /* _WIDE */
130 int
131 __doscan_u(FILE *iop, const char *sfmt, va_list va_Alist, int scflag)
132 #endif /* _WIDE */
133 {
134 #ifdef	_WIDE
135 	wchar_t	ch;
136 	wchar_t	inchar, size;
137 	int	nmatch = 0, len, stow;
138 #else  /* _WIDE */
139 	int	ch;
140 	int		nmatch = 0, len, inchar, stow, size;
141 #endif /* _WIDE */
142 
143 	unsigned char	*bracket_str = NULL;
144 	int		chcount, flag_eof;
145 	char	tab[NCHARS];
146 
147 	/* variables for postional parameters */
148 #ifdef	_WIDE
149 	const wchar_t	*sformat = fmt;	/* save the beginning of the format */
150 #else  /* _WIDE */
151 	const unsigned char	*fmt = (const unsigned char *)sfmt;
152 	const char	*sformat = sfmt; /* save the beginning of the format */
153 #endif /* _WIDE */
154 	int		fpos = 1;	/* 1 if first postional parameter */
155 	stva_list	args,	/* used to step through the argument list */
156 		sargs;	/* used to save the start of the argument list */
157 	stva_list	arglst[MAXARGS];
158 					/*
159 					 * array giving the appropriate values
160 					 * for va_arg() to retrieve the
161 					 * corresponding argument:
162 					 * arglst[0] is the first argument
163 					 * arglst[1] is the second argument,etc.
164 					 */
165 	/* Check if readable stream */
166 	if (!(iop->_flag & (_IOREAD | _IORW))) {
167 		errno = EBADF;
168 		return (EOF);
169 	}
170 
171 	/*
172 	 * Initialize args and sargs to the start of the argument list.
173 	 * We don't know any portable way to copy an arbitrary C object
174 	 * so we use a system-specific routine(probably a macro) from
175 	 * stdarg.h.  (Remember that if va_list is an array, in_args will
176 	 * be a pointer and &in_args won't be what we would want for
177 	 * memcpy.)
178 	 */
179 	va_copy(args.ap, va_Alist);
180 
181 	sargs = args;
182 
183 	chcount = 0; flag_eof = 0;
184 
185 	/*
186 	 * ****************************************************
187 	 * Main loop: reads format to determine a pattern,
188 	 *		and then goes to read input stream
189 	 *		in attempt to match the pattern.
190 	 * ****************************************************
191 	 */
192 	for (; ; ) {
193 		if ((ch = *fmt++) == '\0') {
194 			return (nmatch); /* end of format */
195 		}
196 #ifdef	_WIDE
197 		if (iswspace(ch)) {
198 			if (!flag_eof) {
199 				while (iswspace(inchar =
200 					    _wd_getwc(&chcount, iop)))
201 					;
202 				if (_wd_ungetwc(&chcount, inchar, iop) == WEOF)
203 					flag_eof = 1;
204 			}
205 			continue;
206 		}
207 		if (ch != '%' || (ch = *fmt++) == '%') {
208 			if ((inchar = _wd_getwc(&chcount, iop)) == ch)
209 				continue;
210 			if (_wd_ungetwc(&chcount, inchar, iop) != WEOF) {
211 				return (nmatch); /* failed to match input */
212 			}
213 			break;
214 		}
215 #else  /* _WIDE */
216 		if (isspace(ch)) {
217 			if (!flag_eof) {
218 				while (isspace(inchar = locgetc(chcount)))
219 					;
220 				if (locungetc(chcount, inchar) == EOF)
221 					flag_eof = 1;
222 
223 			}
224 			continue;
225 		}
226 		if (ch != '%' || (ch = *fmt++) == '%') {
227 			if ((inchar = locgetc(chcount)) == ch)
228 				continue;
229 			if (locungetc(chcount, inchar) != EOF) {
230 				return (nmatch); /* failed to match input */
231 			}
232 			break;
233 		}
234 #endif /* _WIDE */
235 
236 charswitch:	/* target of a goto 8-( */
237 
238 		if (ch == '*') {
239 			stow = 0;
240 			ch = *fmt++;
241 		} else
242 			stow = 1;
243 
244 #ifdef	_WIDE
245 		for (len = 0; ((ch >= 0) && (ch < 256) && isdigit(ch));
246 								ch = *fmt++)
247 			len = len * 10 + ch - '0';
248 #else  /* _WIDE */
249 		for (len = 0; isdigit(ch); ch = *fmt++)
250 			len = len * 10 + ch - '0';
251 #endif /* _WIDE */
252 
253 		if (ch == '$') {
254 			/*
255 			 * positional parameter handling - the number
256 			 * specified in len gives the argument to which
257 			 * the next conversion should be applied.
258 			 * WARNING: This implementation of positional
259 			 * parameters assumes that the sizes of all pointer
260 			 * types are the same. (Code similar to that
261 			 * in the portable doprnt.c should be used if this
262 			 * assumption does not hold for a particular
263 			 * port.)
264 			 */
265 			if (fpos) {
266 				if (_mkarglst(sformat, sargs, arglst) != 0) {
267 					return (EOF);
268 				} else {
269 					fpos = 0;
270 				}
271 			}
272 			if (len <= MAXARGS) {
273 				args = arglst[len - 1];
274 			} else {
275 				args = arglst[MAXARGS - 1];
276 				for (len -= MAXARGS; len > 0; len--)
277 					(void) va_arg(args.ap, void *);
278 			}
279 			len = 0;
280 			ch = *fmt++;
281 			goto charswitch;
282 		}
283 
284 		if (len == 0)
285 			len = MAXINT;
286 #ifdef	_WIDE
287 		if ((size = ch) == 'l' || (size == 'h') || (size == 'L') ||
288 			(size == 'j') || (size == 't') || (size == 'z'))
289 			ch = *fmt++;
290 #else  /* _WIDE */
291 		if ((size = ch) == 'l' || (size == 'h') || (size == 'L') ||
292 			(size == 'w') || (size == 'j') || (size == 't') ||
293 			(size == 'z'))
294 			ch = *fmt++;
295 #endif /* _WIDE */
296 		if (size == 'l' && ch == 'l') {
297 			size = 'm';		/* size = 'm' if long long */
298 			ch = *fmt++;
299 		} else if (size == 'h' && ch == 'h') {
300 			size = 'b';		/* use size = 'b' if char */
301 			ch = *fmt++;
302 		} else if ((size == 't') || (size == 'z')) {
303 			size = 'l';
304 		} else if (size == 'j') {
305 #ifndef _LP64
306 			/* check scflag for size of u/intmax_t (32-bit libc) */
307 			if (!(scflag & _F_INTMAX32))
308 			{
309 #endif
310 				size = 'm';
311 #ifndef _LP64
312 			}
313 #endif
314 		}
315 		if (ch == '\0') {
316 			return (EOF);		/* unexpected end of format */
317 		}
318 #ifdef	_WIDE
319 		if (ch == '[') {
320 			wchar_t	c;
321 			size_t	len;
322 			int	negflg = 0;
323 			wchar_t	*p;
324 			wchar_t	*wbracket_str;
325 			size_t	wlen, clen;
326 
327 			/* p points to the address of '[' */
328 			p = (wchar_t *)fmt - 1;
329 			len = 0;
330 			if (*fmt == '^') {
331 				len++;
332 				fmt++;
333 				negflg = 1;
334 			}
335 			if (((c = *fmt) == ']') || (c == '-')) {
336 				len++;
337 				fmt++;
338 			}
339 			while ((c = *fmt) != ']') {
340 				if (c == '\0') {
341 					return (EOF); /* unexpected EOF */
342 				} else {
343 					len++;
344 					fmt++;
345 				}
346 			}
347 			fmt++;
348 			len += 2;
349 			wbracket_str = (wchar_t *)
350 				malloc(sizeof (wchar_t) * (len + 1));
351 			if (wbracket_str == NULL) {
352 				errno = ENOMEM;
353 				return (EOF);
354 			} else {
355 				(void) wmemcpy(wbracket_str,
356 						(const wchar_t *)p, len);
357 				*(wbracket_str + len) = L'\0';
358 				if (negflg && *(wbracket_str + 1) == '^') {
359 					*(wbracket_str + 1) = L'!';
360 				}
361 			}
362 			wlen = wcslen(wbracket_str);
363 			clen = wcstombs((char *)NULL, wbracket_str, 0);
364 			if (clen == (size_t)-1) {
365 				free(wbracket_str);
366 				return (EOF);
367 			}
368 			bracket_str = (unsigned char *)
369 				malloc(sizeof (unsigned char) * (clen + 1));
370 			if (bracket_str == NULL) {
371 				free(wbracket_str);
372 				errno = ENOMEM;
373 				return (EOF);
374 			}
375 			clen = wcstombs((char *)bracket_str, wbracket_str,
376 				wlen + 1);
377 			free(wbracket_str);
378 			if (clen == (size_t)-1) {
379 				free(bracket_str);
380 				return (EOF);
381 			}
382 		}
383 #else  /* _WIDE */
384 		if (ch == '[') {
385 			if (size == 'l') {
386 				int	c, len, i;
387 				int	negflg = 0;
388 				unsigned char 	*p;
389 
390 				p = (unsigned char *)(fmt - 1);
391 				len = 0;
392 				if (*fmt == '^') {
393 					len++;
394 					fmt++;
395 					negflg = 1;
396 				}
397 				if (((c = *fmt) == ']') || (c == '-')) {
398 					len++;
399 					fmt++;
400 				}
401 				while ((c = *fmt) != ']') {
402 					if (c == '\0') {
403 						return (EOF);
404 					} else if (isascii(c)) {
405 						len++;
406 						fmt++;
407 					} else {
408 						i = mblen((const char *)fmt,
409 							MB_CUR_MAX);
410 						if (i <= 0) {
411 							return (EOF);
412 						} else {
413 							len += i;
414 							fmt += i;
415 						}
416 					}
417 				}
418 				fmt++;
419 				len += 2;
420 				bracket_str = (unsigned char *)
421 					malloc(sizeof (unsigned char) *
422 					(len + 1));
423 				if (bracket_str == NULL) {
424 					errno = ENOMEM;
425 					return (EOF);
426 				} else {
427 					(void) strncpy((char *)bracket_str,
428 						(const char *)p, len);
429 					*(bracket_str + len) = '\0';
430 					if (negflg &&
431 					    *(bracket_str + 1) == '^') {
432 						*(bracket_str + 1) = '!';
433 					}
434 				}
435 			} else {
436 				int	t = 0;
437 				int	b, c, d;
438 
439 				if (*fmt == '^') {
440 					t++;
441 					fmt++;
442 				}
443 				(void) memset(tab, !t, NCHARS);
444 				if ((c = *fmt) == ']' || c == '-') {
445 					tab[c] = t;
446 					fmt++;
447 				}
448 
449 				while ((c = *fmt) != ']') {
450 					if (c == '\0') {
451 						return (EOF);
452 					}
453 					b = *(fmt - 1);
454 					d = *(fmt + 1);
455 					if ((c == '-') && (d != ']') &&
456 								(b < d)) {
457 						(void) memset(&tab[b], t,
458 								d - b + 1);
459 						fmt += 2;
460 					} else {
461 						tab[c] = t;
462 						fmt++;
463 					}
464 				}
465 				fmt++;
466 			}
467 		}
468 #endif /* _WIDE */
469 
470 #ifdef	_WIDE
471 		if ((ch >= 0) && (ch < 256) &&
472 			isupper((int)ch)) { /* no longer documented */
473 			if (_lib_version == c_issue_4) {
474 				if (size != 'm' && size != 'L')
475 					size = 'l';
476 			}
477 			ch = _tolower((int)ch);
478 		}
479 		if (ch != 'n' && !flag_eof) {
480 			if (ch != 'c' && ch != 'C' && ch != '[') {
481 				while (iswspace(inchar =
482 						_wd_getwc(&chcount, iop)))
483 					;
484 				if (_wd_ungetwc(&chcount, inchar, iop) == WEOF)
485 					break;
486 
487 			}
488 		}
489 #else  /* _WIDE */
490 		if (isupper(ch)) { /* no longer documented */
491 			if (_lib_version == c_issue_4) {
492 				if (size != 'm' && size != 'L')
493 					size = 'l';
494 			}
495 			ch = _tolower(ch);
496 		}
497 		if (ch != 'n' && !flag_eof) {
498 			if (ch != 'c' && ch != 'C' && ch != '[') {
499 				while (isspace(inchar = locgetc(chcount)))
500 					;
501 				if (locungetc(chcount, inchar) == EOF)
502 					break;
503 			}
504 		}
505 #endif /* _WIDE */
506 
507 		switch (ch) {
508 		case 'C':
509 		case 'S':
510 		case 'c':
511 		case 's':
512 #ifdef	_WIDE
513 			if ((size == 'l') || (size == 'C') || (size == 'S'))
514 #else  /* _WIDE */
515 			if ((size == 'w') || (size == 'l') || (size == 'C') ||
516 				(size == 'S'))
517 #endif /* _WIDE */
518 			{
519 				size = wstring(&chcount, &flag_eof, stow,
520 					(int)ch, len, iop, &args.ap);
521 			} else {
522 				size = string(&chcount, &flag_eof, stow,
523 					(int)ch, len, tab, iop, &args.ap);
524 			}
525 			break;
526 		case '[':
527 			if (size == 'l') {
528 				size = wbrstring(&chcount, &flag_eof, stow,
529 					(int)ch, len, iop, bracket_str,
530 					&args.ap);
531 				free(bracket_str);
532 				bracket_str = NULL;
533 			} else {
534 #ifdef	_WIDE
535 				size = brstring(&chcount, &flag_eof, stow,
536 					(int)ch, len, iop, bracket_str,
537 					&args.ap);
538 				free(bracket_str);
539 				bracket_str = NULL;
540 #else  /* _WIDE */
541 				size = string(&chcount, &flag_eof, stow,
542 					ch, len, tab, iop, &args.ap);
543 #endif /* _WIDE */
544 			}
545 			break;
546 
547 		case 'n':
548 			if (stow == 0)
549 				continue;
550 			if (size == 'b')	/* char */
551 				*va_arg(args.ap, char *) = (char)chcount;
552 			else if (size == 'h')
553 				*va_arg(args.ap, short *) = (short)chcount;
554 			else if (size == 'l')
555 				*va_arg(args.ap, long *) = (long)chcount;
556 			else if (size == 'm') /* long long */
557 				*va_arg(args.ap, long long *) =
558 					(long long) chcount;
559 			else
560 				*va_arg(args.ap, int *) = (int)chcount;
561 			continue;
562 
563 		case 'i':
564 		default:
565 			size = number(&chcount, &flag_eof, stow, (int)ch,
566 				len, (int)size, iop, &args.ap);
567 			break;
568 		}
569 		if (size)
570 			nmatch += stow;
571 		else {
572 			return ((flag_eof && !nmatch) ? EOF : nmatch);
573 		}
574 		continue;
575 	}
576 	if (bracket_str)
577 		free(bracket_str);
578 	return (nmatch != 0 ? nmatch : EOF); /* end of input */
579 }
580 
581 /* ****************************************************************** */
582 /* Functions to read the input stream in an attempt to match incoming */
583 /* data to the current pattern from the main loop of _doscan(). */
584 /* ****************************************************************** */
585 static int
586 number(int *chcount, int *flag_eof, int stow, int type, int len, int size,
587 	FILE *iop, va_list *listp)
588 {
589 	char	numbuf[64];
590 	char	*np = numbuf;
591 	int	c, base, inchar, lookahead;
592 	int		digitseen = 0, floater = 0, negflg = 0;
593 	int		lc;
594 	long long	lcval = 0LL;
595 
596 	switch (type) {
597 	case 'e':
598 	case 'f':
599 	case 'g':
600 		/*
601 		 * lc = 0 corresponds to c90 mode: do not recognize
602 		 *	hexadecimal fp strings; attempt to push back
603 		 *	all unused characters read
604 		 *
605 		 * lc = -1 corresponds to c99 mode: recognize hexa-
606 		 *	decimal fp strings; push back at most one
607 		 *	unused character
608 		 */
609 		lc = (__xpg6 & _C99SUSv3_recognize_hexfp)? -1 : 0;
610 		floater = 1;
611 		break;
612 
613 	case 'a':
614 		lc = -1;
615 		floater = 1;
616 		break;
617 
618 	case 'd':
619 	case 'u':
620 	case 'i':
621 		base = 10;
622 		break;
623 	case 'o':
624 		base = 8;
625 		break;
626 	case 'p':
627 #ifdef	_LP64
628 		size = 'l'; /* pointers are long in LP64 */
629 #endif	/*	_LP64	*/
630 		/* FALLTHROUGH */
631 	case 'x':
632 		base = 16;
633 		break;
634 	default:
635 		return (0); /* unrecognized conversion character */
636 	}
637 
638 	if (floater != 0) {
639 		/*
640 		 * Handle floating point with
641 		 * file_to_decimal.
642 		 */
643 		decimal_mode		dm;
644 		decimal_record		dr;
645 		fp_exception_field_type	efs;
646 		enum decimal_string_form form;
647 		char			*echar;
648 		int			nread;
649 		char			buffer[1024+1];
650 		char			*nb = buffer;
651 
652 		if (len > 1024)
653 			len = 1024;
654 		file_to_decimal(&nb, len, lc, &dr, &form, &echar, iop, &nread);
655 		if (lc == -1) {
656 			/*
657 			 * In C99 mode, the entire string read has to be
658 			 * accepted in order to qualify as a match
659 			 */
660 			if (nb != buffer + nread)
661 				form = invalid_form;
662 		}
663 		if (stow && (form != invalid_form)) {
664 #if defined(__sparc)
665 			dm.rd = _QgetRD();
666 			if (size == 'L') {		/* long double */
667 				if ((int)form < 0)
668 					__hex_to_quadruple(&dr, dm.rd,
669 					    va_arg(*listp, quadruple *),
670 					    &efs);
671 				else
672 					decimal_to_quadruple(
673 					    va_arg(*listp, quadruple *),
674 					    &dm, &dr, &efs);
675 			}
676 #elif defined(__i386) || defined(__amd64)
677 			dm.rd = __xgetRD();
678 			if (size == 'L') {		/* long double */
679 				if ((int)form < 0)
680 					__hex_to_extended(&dr, dm.rd,
681 					    va_arg(*listp, extended *),
682 					    &efs);
683 				else
684 					decimal_to_extended(
685 					    va_arg(*listp, extended *),
686 					    &dm, &dr, &efs);
687 			}
688 #else
689 #error Unknown architecture
690 #endif
691 			else if (size == 'l') {		/* double */
692 				if ((int)form < 0)
693 					__hex_to_double(&dr, dm.rd,
694 					    va_arg(*listp, double *), &efs);
695 				else
696 					decimal_to_double(
697 					    va_arg(*listp, double *),
698 					    &dm, &dr, &efs);
699 			} else {			/* float */
700 				if ((int)form < 0)
701 					__hex_to_single(&dr, dm.rd,
702 					    va_arg(*listp, single *), &efs);
703 				else
704 					decimal_to_single((single *)
705 					    va_arg(*listp, single *),
706 					    &dm, &dr, &efs);
707 			}
708 			if ((efs & (1 << fp_overflow)) != 0) {
709 				errno = ERANGE;
710 			}
711 			if ((efs & (1 << fp_underflow)) != 0) {
712 				errno = ERANGE;
713 			}
714 		}
715 		(*chcount) += nread;	/* Count characters read. */
716 		c = locgetc((*chcount));
717 		if (locungetc((*chcount), c) == EOF)
718 			*flag_eof = 1;
719 		return ((form == invalid_form) ? 0 : 1);
720 				/* successful match if non-zero */
721 	}
722 
723 	switch (c = locgetc((*chcount))) {
724 	case '-':
725 		negflg++;
726 		/* FALLTHROUGH */
727 	case '+':
728 		if (--len <= 0)
729 			break;
730 		if ((c = locgetc((*chcount))) != '0')
731 			break;
732 		/* FALLTHROUGH */
733 	case '0':
734 		/*
735 		 * If %i or %x, the characters 0x or 0X may optionally precede
736 		 * the sequence of letters and digits (base 16).
737 		 */
738 		if ((type != 'i' && type != 'x') || (len <= 1))
739 			break;
740 		if (((inchar = locgetc((*chcount))) == 'x') ||
741 			(inchar == 'X')) {
742 			lookahead = readchar(iop, chcount);
743 			if (isxdigit(lookahead)) {
744 				base = 16;
745 
746 				if (len <= 2) {
747 					(void) locungetc((*chcount), lookahead);
748 					/* Take into account the 'x' */
749 					len -= 1;
750 				} else {
751 					c = lookahead;
752 					/* Take into account '0x' */
753 					len -= 2;
754 				}
755 			} else {
756 				(void) locungetc((*chcount), lookahead);
757 				(void) locungetc((*chcount), inchar);
758 			}
759 		} else {
760 			/* inchar wans't 'x'. */
761 			(void) locungetc((*chcount), inchar); /* Put it back. */
762 			if (type == 'i') /* Only %i accepts an octal. */
763 				base = 8;
764 		}
765 	}
766 	for (; --len  >= 0; *np++ = (char)c, c = locgetc((*chcount))) {
767 		if (np > numbuf + 62) {
768 		    errno = ERANGE;
769 		    return (0);
770 		}
771 		if (isdigit(c) || base == 16 && isxdigit(c)) {
772 			int digit = c - (isdigit(c) ? '0' :
773 				isupper(c) ? 'A' - 10 : 'a' - 10);
774 			if (digit >= base)
775 				break;
776 			if (stow)
777 				lcval = base * lcval + digit;
778 			digitseen++;
779 			continue;
780 		}
781 		break;
782 	}
783 
784 	if (stow && digitseen) {
785 		/* suppress possible overflow on 2's-comp negation */
786 		if (negflg && lcval != (1ULL << 63))
787 			lcval = -lcval;
788 		switch (size) {
789 			case 'm':
790 				*va_arg(*listp, long long *) = lcval;
791 				break;
792 			case 'l':
793 				*va_arg(*listp, long *) = (long)lcval;
794 				break;
795 			case 'h':
796 				*va_arg(*listp, short *) = (short)lcval;
797 				break;
798 			case 'b':
799 				*va_arg(*listp, char *) = (char)lcval;
800 				break;
801 			default:
802 				*va_arg(*listp, int *) = (int)lcval;
803 				break;
804 		}
805 	}
806 	if (locungetc((*chcount), c) == EOF)
807 	    *flag_eof = 1;
808 	return (digitseen); /* successful match if non-zero */
809 }
810 
811 /* Get a character. If not using sscanf and at the buffer's end */
812 /* then do a direct read(). Characters read via readchar() */
813 /* can be  pushed back on the input stream by locungetc((*chcount),) */
814 /* since there is padding allocated at the end of the stream buffer. */
815 static int
816 readchar(FILE *iop, int *chcount)
817 {
818 	int	inchar;
819 	char	buf[1];
820 
821 	if ((iop->_flag & _IOWRT) || (iop->_cnt != 0))
822 		inchar = locgetc((*chcount));
823 	else {
824 		if (read(FILENO(iop), buf, 1) != 1)
825 			return (EOF);
826 		inchar = (int)buf[0];
827 		(*chcount) += 1;
828 	}
829 	return (inchar);
830 }
831 
832 static int
833 string(int *chcount, int *flag_eof, int stow, int type, int len, char *tab,
834 	FILE *iop, va_list *listp)
835 {
836 	int	ch;
837 	char	*ptr;
838 	char	*start;
839 
840 	start = ptr = stow ? va_arg(*listp, char *) : NULL;
841 	if (((type == 'c') || (type == 'C')) && len == MAXINT)
842 		len = 1;
843 #ifdef	_WIDE
844 	while ((ch = locgetc((*chcount))) != EOF &&
845 		!(((type == 's') || (type == 'S')) && isspace(ch))) {
846 #else  /* _WIDE */
847 	while ((ch = locgetc((*chcount))) != EOF &&
848 		!(((type == 's') || (type == 'S')) &&
849 			isspace(ch) || type == '[' && tab[ch])) {
850 #endif /* _WIDE */
851 		if (stow)
852 			*ptr = (char)ch;
853 		ptr++;
854 		if (--len <= 0)
855 			break;
856 	}
857 	if (ch == EOF) {
858 		(*flag_eof) = 1;
859 		(*chcount) -= 1;
860 	} else if (len > 0 && locungetc((*chcount), ch) == EOF)
861 		(*flag_eof) = 1;
862 	if (ptr == start)
863 		return (0);	/* no match */
864 	if (stow && ((type != 'c') && (type != 'C')))
865 		*ptr = '\0';
866 	return (1);	/* successful match */
867 }
868 
869 /* This function initializes arglst, to contain the appropriate */
870 /* va_list values for the first MAXARGS arguments. */
871 /* WARNING: this code assumes that the sizes of all pointer types */
872 /* are the same. (Code similar to that in the portable doprnt.c */
873 /* should be used if this assumption is not true for a */
874 /* particular port.) */
875 
876 #ifdef	_WIDE
877 static int
878 _mkarglst(const wchar_t *fmt, stva_list args, stva_list arglst[])
879 #else  /* _WIDE */
880 static int
881 _mkarglst(const char *fmt, stva_list args, stva_list arglst[])
882 #endif /* _WIDE */
883 {
884 #ifdef	_WIDE
885 #define	STRCHR	wcschr
886 #define	STRSPN	wcsspn
887 #define	ATOI(x)	_watoi((wchar_t *)x)
888 #define	SPNSTR1	L"01234567890"
889 #define	SPNSTR2	L"# +-.0123456789hL$"
890 #else  /* _WIDE */
891 #define	STRCHR	strchr
892 #define	STRSPN	strspn
893 #define	ATOI(x)	atoi(x)
894 #define	SPNSTR1	"01234567890"
895 #define	SPNSTR2	"# +-.0123456789hL$"
896 #endif /* _WIDE */
897 
898 	int maxnum, curargno;
899 	size_t n;
900 
901 	maxnum = -1;
902 	curargno = 0;
903 
904 	while ((fmt = STRCHR(fmt, '%')) != NULL) {
905 		fmt++;	/* skip % */
906 		if (*fmt == '*' || *fmt == '%')
907 			continue;
908 		if (fmt[n = STRSPN(fmt, SPNSTR1)] == L'$') {
909 			/* convert to zero base */
910 			curargno = ATOI(fmt) - 1;
911 			fmt += n + 1;
912 		}
913 
914 		if (maxnum < curargno)
915 			maxnum = curargno;
916 		curargno++;	/* default to next in list */
917 
918 		fmt += STRSPN(fmt, SPNSTR2);
919 		if (*fmt == '[') {
920 			int		i;
921 			fmt++; /* has to be at least on item in scan list */
922 			if (*fmt == ']') {
923 				fmt++;
924 			}
925 			while (*fmt != ']') {
926 				if (*fmt == L'\0') {
927 					return (-1); /* bad format */
928 #ifdef	_WIDE
929 				} else {
930 					fmt++;
931 				}
932 #else  /* _WIDE */
933 				} else if (isascii(*fmt)) {
934 					fmt++;
935 				} else {
936 					i = mblen((const char *)
937 						fmt, MB_CUR_MAX);
938 					if (i <= 0) {
939 						return (-1);
940 					} else {
941 						fmt += i;
942 					}
943 				}
944 #endif /* _WIDE */
945 			}
946 		}
947 	}
948 	if (maxnum > MAXARGS)
949 		maxnum = MAXARGS;
950 	for (n = 0; n <= maxnum; n++) {
951 		arglst[n] = args;
952 		(void) va_arg(args.ap, void *);
953 	}
954 	return (0);
955 }
956 
957 
958 /*
959  * For wide character handling
960  */
961 #ifdef	_WIDE
962 static int
963 wstring(int *chcount, int *flag_eof, int stow, int type,
964 	int len, FILE *iop, va_list *listp)
965 {
966 	wint_t	wch;
967 	wchar_t	*ptr;
968 	wchar_t	*wstart;
969 	int	dummy;
970 
971 	wstart = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
972 
973 	if ((type == 'c') && len == MAXINT)
974 		len = 1;
975 	while (((wch = _wd_getwc(chcount, iop)) != WEOF) &&
976 		!(type == 's' && iswspace(wch))) {
977 		if (stow)
978 			*ptr = wch;
979 		ptr++;
980 		if (--len <= 0)
981 			break;
982 	}
983 	if (wch == WEOF) {
984 		*flag_eof = 1;
985 		(*chcount) -= 1;
986 	} else {
987 		if (len > 0 && _wd_ungetwc(chcount, wch, iop) == WEOF)
988 			*flag_eof = 1;
989 	}
990 	if (ptr == wstart)
991 		return (0); /* no match */
992 	if (stow && (type != 'c'))
993 		*ptr = '\0';
994 	return (1); /* successful match */
995 }
996 #else  /* _WIDE */
997 static int
998 wstring(int *chcount, int *flag_eof, int stow, int type, int len, FILE *iop,
999 	va_list *listp)
1000 {
1001 	int	wch;
1002 	wchar_t	*ptr;
1003 	wchar_t	*wstart;
1004 
1005 	wstart = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
1006 
1007 	if ((type == 'c') && len == MAXINT)
1008 		len = 1;
1009 	while (((wch = _bi_getwc(iop)) != EOF) &&
1010 		!(type == 's' && (isascii(wch) ? isspace(wch) : 0))) {
1011 		(*chcount) += _scrwidth((wchar_t)wch);
1012 		if (stow)
1013 			*ptr = wch;
1014 		ptr++;
1015 		if (--len <= 0)
1016 			break;
1017 	}
1018 	if (wch == EOF) {
1019 		(*flag_eof) = 1;
1020 		(*chcount) -= 1;
1021 	} else {
1022 		if (len > 0 && _bi_ungetwc(wch, iop) == EOF)
1023 			(*flag_eof) = 1;
1024 	}
1025 	if (ptr == wstart)
1026 		return (0); /* no match */
1027 	if (stow && (type != 'c'))
1028 		*ptr = '\0';
1029 	return (1); /* successful match */
1030 }
1031 #endif /* _WIDE */
1032 
1033 #ifdef	_WIDE
1034 static wint_t
1035 _wd_getwc(int *chcount, FILE *iop)
1036 {
1037 	wint_t	wc;
1038 	int	len;
1039 
1040 	if (!(iop->_flag & _IOWRT)) {
1041 		/* call from fwscanf, wscanf */
1042 		wc = __fgetwc_xpg5(iop);
1043 		(*chcount)++;
1044 		return (wc);
1045 	} else {
1046 		/* call from swscanf */
1047 		if (*iop->_ptr == '\0')
1048 			return (WEOF);
1049 		len = mbtowc((wchar_t *)&wc, (const char *)iop->_ptr,
1050 								MB_CUR_MAX);
1051 		if (len == -1)
1052 			return (WEOF);
1053 		iop->_ptr += len;
1054 		(*chcount)++;
1055 		return (wc);
1056 	}
1057 }
1058 
1059 static wint_t
1060 _wd_ungetwc(int *chcount, wchar_t wc, FILE *iop)
1061 {
1062 	wint_t	ret;
1063 	int	len;
1064 	char	mbs[MB_LEN_MAX];
1065 
1066 	if (wc == WEOF)
1067 		return (WEOF);
1068 
1069 	if (!(iop->_flag & _IOWRT)) {
1070 		/* call from fwscanf, wscanf */
1071 		ret = __ungetwc_xpg5((wint_t)wc, iop);
1072 		if (ret != (wint_t)wc)
1073 			return (WEOF);
1074 		(*chcount)--;
1075 		return (ret);
1076 	} else {
1077 		/* call from swscanf */
1078 		len = wctomb(mbs, wc);
1079 		if (len == -1)
1080 			return (WEOF);
1081 		iop->_ptr -= len;
1082 		(*chcount)--;
1083 		return ((wint_t)wc);
1084 	}
1085 }
1086 
1087 static int
1088 _watoi(wchar_t *fmt)
1089 {
1090 	int	n = 0;
1091 	wchar_t	ch;
1092 
1093 	ch = *fmt;
1094 	if ((ch >= 0) && (ch < 256) && isdigit((int)ch)) {
1095 		n = ch - '0';
1096 		while (((ch = *++fmt) >= 0) && (ch < 256) &&
1097 				isdigit((int)ch)) {
1098 			n *= 10;
1099 			n += ch - '0';
1100 		}
1101 	}
1102 	return (n);
1103 }
1104 #endif /* _WIDE */
1105 
1106 /* ARGSUSED3 */
1107 static int
1108 wbrstring(int *chcount, int *flag_eof, int stow, int type,
1109 	int len, FILE *iop, unsigned char *brstr, va_list *listp)
1110 {
1111 	wint_t	wch;
1112 	int	i;
1113 	char	str[MB_LEN_MAX + 1]; /* include null termination */
1114 	wchar_t	*ptr, *start;
1115 #ifdef	_WIDE
1116 	int	dummy;
1117 #endif /* _WIDE */
1118 
1119 	start = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
1120 
1121 #ifdef	_WIDE
1122 	while ((wch = _wd_getwc(&dummy, iop)) != WEOF) {
1123 #else  /* _WIDE */
1124 	while ((wch = _bi_getwc(iop)) != WEOF) {
1125 #endif /* _WIDE */
1126 		i = wctomb(str, (wchar_t)wch);
1127 		if (i == -1) {
1128 			return (0);
1129 		}
1130 		str[i] = '\0';
1131 		if (fnmatch((const char *)brstr, (const char *)str,
1132 			FNM_NOESCAPE)) {
1133 			break;
1134 		} else {
1135 			if (len > 0) {
1136 #ifdef	_WIDE
1137 				(*chcount)++;
1138 #else  /* _WIDE */
1139 				(*chcount) += _scrwidth(wch);
1140 #endif /* _WIDE */
1141 				len--;
1142 				if (stow) {
1143 					*ptr = wch;
1144 				}
1145 				ptr++;
1146 				if (len <= 0)
1147 					break;
1148 			} else {
1149 				break;
1150 			}
1151 		}
1152 	}
1153 	if (wch == WEOF) {
1154 		*flag_eof = 1;
1155 	} else {
1156 #ifdef	_WIDE
1157 		if (len > 0 && _wd_ungetwc(&dummy, wch, iop) == WEOF)
1158 #else  /* _WIDE */
1159 		if (len > 0 && _bi_ungetwc(wch, iop) == WEOF)
1160 #endif /* _WIDE */
1161 			*flag_eof = 1;
1162 	}
1163 	if (ptr == start)
1164 		return (0);				/* no match */
1165 	if (stow)
1166 		*ptr = L'\0';
1167 	return (1);					/* successful match */
1168 }
1169 
1170 #ifdef	_WIDE
1171 static int
1172 brstring(int *chcount, int *flag_eof, int stow, int type,
1173 	int len, FILE *iop, unsigned char *brstr, va_list *listp)
1174 {
1175 	wint_t	wch;
1176 	int	i;
1177 	char	str[MB_LEN_MAX + 1]; /* include null termination */
1178 	char	*ptr, *start, *p;
1179 	int	dummy;
1180 
1181 	start = ptr = stow ? va_arg(*listp, char *) : NULL;
1182 
1183 	while ((wch = _wd_getwc(&dummy, iop)) != WEOF) {
1184 		p = str;
1185 		i = wctomb(str, (wchar_t)wch);
1186 		if (i == -1) {
1187 			return (0);
1188 		}
1189 		str[i] = '\0';
1190 		if (fnmatch((const char *)brstr, (const char *)str,
1191 			FNM_NOESCAPE)) {
1192 			break;
1193 		} else {
1194 			if (len >= i) {
1195 				(*chcount)++;
1196 				len -= i;
1197 				if (stow) {
1198 					while (i-- > 0) {
1199 						*ptr++ = *p++;
1200 					}
1201 				} else {
1202 					while (i-- > 0) {
1203 						ptr++;
1204 					}
1205 				}
1206 				if (len <= 0)
1207 					break;
1208 			} else {
1209 				break;
1210 			}
1211 		}
1212 	}
1213 	if (wch == WEOF) {
1214 		*flag_eof = 1;
1215 	} else {
1216 		if (len > 0 && _wd_ungetwc(&dummy, wch, iop) == WEOF)
1217 			*flag_eof = 1;
1218 	}
1219 	if (ptr == start)
1220 		return (0);				/* no match */
1221 	if (stow)
1222 		*ptr = '\0';
1223 	return (1);					/* successful match */
1224 }
1225 #endif /* _WIDE */
1226 
1227 /*
1228  * Locally define getwc and ungetwc
1229  */
1230 static int
1231 _bi_getwc(FILE *iop)
1232 {
1233 	int c;
1234 	wchar_t intcode;
1235 	int i, nbytes, cur_max;
1236 	char buff[MB_LEN_MAX];
1237 
1238 	if ((c = wlocgetc()) == EOF)
1239 		return (WEOF);
1240 
1241 	if (isascii(c))	/* ASCII code */
1242 		return ((wint_t)c);
1243 
1244 	buff[0] = (char)c;
1245 
1246 	cur_max = (int)MB_CUR_MAX;
1247 	/* MB_CUR_MAX doen't exeed the value of MB_LEN_MAX */
1248 	/* So we use MB_CUR_MAX instead of MB_LEN_MAX for */
1249 	/* improving the performance. */
1250 	for (i = 1; i < cur_max; i++) {
1251 		c = wlocgetc();
1252 		if (c == '\n') {
1253 			(void) wlocungetc(c);
1254 			break;
1255 		}
1256 		if (c == EOF) {
1257 			/* this still may be a valid multibyte character */
1258 			break;
1259 		}
1260 		buff[i] = (char)c;
1261 	}
1262 
1263 	if ((nbytes = mbtowc(&intcode, buff, i)) == -1) {
1264 		/*
1265 		 * If mbtowc fails, the input was not a legal character.
1266 		 *	ungetc all but one character.
1267 		 *
1268 		 * Note:  the number of pushback characters that
1269 		 *	ungetc() can handle must be >= (MB_LEN_MAX - 1).
1270 		 *	In Solaris 2.x, the number of pushback
1271 		 *	characters is 4.
1272 		 */
1273 		while (i-- > 1) {
1274 			(void) wlocungetc((signed char)buff[i]);
1275 		}
1276 		errno = EILSEQ;
1277 		return (WEOF); /* Illegal EUC sequence. */
1278 	}
1279 
1280 	while (i-- > nbytes) {
1281 		/*
1282 		 * Note:  the number of pushback characters that
1283 		 *	ungetc() can handle must be >= (MB_LEN_MAX - 1).
1284 		 *	In Solaris 2.x, the number of pushback
1285 		 *	characters is 4.
1286 		 */
1287 		(void) wlocungetc((signed char)buff[i]);
1288 	}
1289 	return ((int)intcode);
1290 }
1291 
1292 static int
1293 _bi_ungetwc(wint_t wc, FILE *iop)
1294 {
1295 	char mbs[MB_LEN_MAX];
1296 	unsigned char *p;
1297 	int n;
1298 
1299 	if ((wc == WEOF) || ((iop->_flag & _IOREAD) == 0))
1300 		return (WEOF);
1301 
1302 	n = wctomb(mbs, (wchar_t)wc);
1303 	if (n <= 0)
1304 		return (WEOF);
1305 
1306 	if (iop->_ptr <= iop->_base) {
1307 		if (iop->_base == NULL) {
1308 			return (WEOF);
1309 		}
1310 		if ((iop->_ptr == iop->_base) && (iop->_cnt == 0)) {
1311 			++iop->_ptr;
1312 		} else if ((iop->_ptr - n) < (iop->_base - PUSHBACK)) {
1313 			return (WEOF);
1314 		}
1315 	}
1316 
1317 	p = (unsigned char *)(mbs+n-1); /* p points the last byte */
1318 	/* if _IOWRT is set to iop->_flag, it means this is */
1319 	/* an invocation from sscanf(), and in that time we */
1320 	/* don't touch iop->_cnt.  Otherwise, which means an */
1321 	/* invocation from fscanf() or scanf(), we touch iop->_cnt */
1322 	if ((iop->_flag & _IOWRT) == 0) {
1323 		/* scanf() and fscanf() */
1324 		iop->_cnt += n;
1325 		while (n--) {
1326 			*--iop->_ptr = *(p--);
1327 		}
1328 	} else {
1329 		/* sscanf() */
1330 		iop->_ptr -= n;
1331 	}
1332 	return (wc);
1333 }
1334