xref: /titanic_50/usr/src/lib/libbc/libc/stdio/common/doscan.c (revision b65731f1f612238279eb4d997f43589b535c5646)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 /*LINTLIBRARY*/
33 #include <stdio.h>
34 #include <ctype.h>
35 #include <stdarg.h>
36 #include <values.h>
37 #include <floatingpoint.h>
38 #include <errno.h>
39 #include <memory.h>
40 
41 #define NCHARS	(1 << BITSPERBYTE)
42 #define locgetc()	(chcount+=1,getc(iop))
43 #define locungetc(x)	(chcount-=1,ungetc(x,iop))
44 
45 static int chcount,flag_eof;
46 
47 static int	number(int, int, int, int, FILE *, va_list *);
48 static int	string(int, int, int, char *, FILE *, va_list *);
49 static unsigned char	*setup(unsigned char *, char *);
50 
51 #ifdef S5EMUL
52 #define	isws(c)		isspace(c)
53 #else
54 /*
55  * _sptab[c+1] is 1 iff 'c' is a white space character according to the
56  * 4.2BSD "scanf" definition - namely, SP, TAB, and NL are the only
57  * whitespace characters.
58  */
59 static char _sptab[1+256] = {
60 	0,				/* EOF - not a whitespace char */
61 	0,0,0,0,0,0,0,0,
62 	0,1,1,0,0,0,0,0,
63 	0,0,0,0,0,0,0,0,
64 	0,0,0,0,0,0,0,0,
65 	1,0,0,0,0,0,0,0,
66 	0,0,0,0,0,0,0,0,
67 	0,0,0,0,0,0,0,0,
68 	0,0,0,0,0,0,0,0,
69 	0,0,0,0,0,0,0,0,
70 	0,0,0,0,0,0,0,0,
71 	0,0,0,0,0,0,0,0,
72 	0,0,0,0,0,0,0,0,
73 	0,0,0,0,0,0,0,0,
74 	0,0,0,0,0,0,0,0,
75 	0,0,0,0,0,0,0,0,
76 	0,0,0,0,0,0,0,0,
77 };
78 
79 #define	isws(c)		((_sptab + 1)[c] != 0)
80 #endif
81 
82 int
83 _doscan(FILE *iop, unsigned char *fmt, va_list va_alist)
84 {
85 	char tab[NCHARS];
86 	int ch;
87 	int nmatch = 0, len, inchar, stow, size;
88 	chcount=0; flag_eof=0;
89 
90 	/*******************************************************
91 	 * Main loop: reads format to determine a pattern,
92 	 *		and then goes to read input stream
93 	 *		in attempt to match the pattern.
94 	 *******************************************************/
95 	for ( ; ; )
96 	{
97 		if ( (ch = *fmt++) == '\0')
98 			return(nmatch); /* end of format */
99 		if (isws(ch))
100 		{
101 		  	if (!flag_eof)
102 			{
103 			   while (isws(inchar = locgetc()))
104 				;
105 			   if (inchar == EOF) {
106 				chcount--;
107 				flag_eof = 1;
108 			   }
109 			   else if (locungetc(inchar) == EOF)
110 				flag_eof = 1;
111 			}
112 		  continue;
113 		}
114 		if (ch != '%' || (ch = *fmt++) == '%')
115                 {
116 			if ( (inchar = locgetc()) == ch )
117 				continue;
118 			if (inchar != EOF) {
119 				if (locungetc(inchar) != EOF)
120 					return(nmatch); /* failed to match input */
121 			} else {
122 				chcount--;
123 			}
124 			break;
125 		}
126 		if (ch == '*')
127 		{
128 			stow = 0;
129 			ch = *fmt++;
130 		}
131 		else
132 			stow = 1;
133 
134 		for (len = 0; isdigit(ch); ch = *fmt++)
135 			len = len * 10 + ch - '0';
136 		if (len == 0)
137 			len = MAXINT;
138 		if ( (size = ch) == 'l' || (size == 'h') || (size == 'L') )
139 			ch = *fmt++;
140 		if (ch == '\0' ||
141 		    ch == '[' && (fmt = setup(fmt, tab)) == NULL)
142 			return(EOF); /* unexpected end of format */
143 		if (isupper(ch))  /* no longer documented */
144 		{
145 			/*
146 			 * The rationale behind excluding the size
147 			 * of 'L' is that the 'L' size specifier was
148 			 * introduced in ANSI/ISO-C.  If the user
149 			 * specifies a format of %LG, it can mean
150 			 * nothing other than "long double", be the
151 			 * code ANSI or not.  Mapping it to "double"
152 			 * makes no sense.
153 			 */
154 			if (size != 'L')
155 				size = 'l';
156 #ifdef S5EMUL
157 			ch = _tolower(ch);
158 #else
159 			ch = tolower(ch);
160 #endif
161 		}
162 		switch(ch)
163 		{
164 		 case 'c':
165 		 case 's':
166 		 case '[':
167 			  if ((size = string(stow,ch,len,tab,iop,&va_alist)) < 0)
168 				goto out;	/* EOF seen, nothing converted */
169 			  break;
170                  case 'n':
171 			  if (stow == 0)
172 				continue;
173 			  if (size == 'h')
174 				*va_arg(va_alist, short *) = (short) chcount;
175 		          else if (size == 'l')
176 				*va_arg(va_alist, long *) = (long) chcount;
177 			  else
178 			  	*va_arg(va_alist, int *) = (int) chcount;
179 			  continue;
180                  default:
181 			 if ((size = number(stow, ch, len, size, iop, &va_alist)) < 0)
182 				goto out;	/* EOF seen, nothing converted */
183 			 break;
184                  }
185 		   if (size)
186 			nmatch += stow;
187 		   else
188 			return((flag_eof && !nmatch) ? EOF : nmatch);
189 		continue;
190 	}
191 out:
192 	return (nmatch != 0 ? nmatch : EOF); /* end of input */
193 }
194 
195 /*
196  **************************************************************
197  * Functions to read the input stream in an attempt to match incoming
198  * data to the current pattern from the main loop of _doscan().
199  **************************************************************
200  */
201 static int
202 number(int stow, int type, int len, int size, FILE *iop, va_list *listp)
203 {
204 	char numbuf[64], inchar, lookahead;
205 	char *np = numbuf;
206 	int c, base;
207 	int digitseen = 0, floater = 0, negflg = 0;
208 	long lcval = 0;
209 	switch(type)
210 	{
211 	case 'e':
212 	case 'f':
213 	case 'g':
214 		floater++;
215 	case 'd':
216 	case 'u':
217 	case 'i':
218 		base = 10;
219 		break;
220 	case 'o':
221 		base = 8;
222 		break;
223 	case 'x':
224 		base = 16;
225 		break;
226 	default:
227 		return(0); /* unrecognized conversion character */
228 	}
229 	if (!flag_eof)
230 	{
231 		while (isws(c = locgetc()))
232 			;
233 	}
234 	else
235 		c = locgetc();
236 	if (c == EOF) {
237 		chcount--;
238 		return(-1);	/* EOF before match */
239 	}
240         if (floater != 0) {     /* Handle floating point with
241                                  * file_to_decimal. */
242                 decimal_mode    dm;
243                 decimal_record  dr;
244                 fp_exception_field_type efs;
245                 enum decimal_string_form form;
246                 char           *echar;
247                 int             nread, ic;
248                 char            buffer[1024];
249                 char           *nb = buffer;
250 
251                 locungetc(c);
252 		if (len > 1024)
253 			len = 1024;
254                 file_to_decimal(&nb, len, 0, &dr, &form, &echar, iop, &nread);
255                 if (stow && (form != invalid_form)) {
256                         dm.rd = fp_direction;
257                         if (size == 'l') {      /* double */
258                                 decimal_to_double((double *) va_arg(*listp, double *), &dm, &dr, &efs);
259                         } else if (size == 'L') {      /* quad */
260                                 decimal_to_quadruple((quadruple *)va_arg(*listp, double *), &dm, &dr, &efs);
261                         } else {/* single */
262                                 decimal_to_single((float *) va_arg(*listp, float *), &dm, &dr, &efs);
263                         }
264 			if ((efs & (1 << fp_overflow)) != 0) {
265 				errno = ERANGE;
266 			}
267 			if ((efs & (1 << fp_underflow)) != 0) {
268 				errno = ERANGE;
269                         }
270                 }
271 		chcount += nread;	/* Count characters read. */
272                 c = *nb;        /* Get first unused character. */
273                 ic = c;
274                 if (c == NULL) {
275                         ic = locgetc();
276                         c = ic;
277                         /*
278                          * If null, first unused may have been put back
279                          * already.
280                          */
281                 }
282                 if (ic == EOF) {
283                         chcount--;
284                         flag_eof = 1;
285                 } else if (locungetc(c) == EOF)
286                         flag_eof = 1;
287                 return ((form == invalid_form) ? 0 : 1);        /* successful match if
288                                                                  * non-zero */
289         }
290 	switch(c) {
291 	case '-':
292 		negflg++;
293 		if (type == 'u')
294 			break;
295 	case '+': /* fall-through */
296 		if (--len <= 0)
297 			break;
298 		if ( (c = locgetc()) != '0')
299 			break;
300         case '0':
301                 if ( (type != 'i') || (len <= 1) )
302 		   break;
303 	        if ( ((inchar = locgetc()) == 'x') || (inchar == 'X') )
304 	        {
305 		      /* If not using sscanf and *
306 		       * at the buffer's end     *
307 		       * then LOOK ahead         */
308 
309                    if ( (iop->_flag & _IOSTRG) || (iop->_cnt != 0) )
310 		      lookahead = locgetc();
311 		   else
312 		   {
313 		      if ( read(fileno(iop),np,1) == 1)
314 		         lookahead = *np;
315                       else
316 		         lookahead = EOF;
317                       chcount += 1;
318                    }
319 		   if ( isxdigit(lookahead) )
320 		   {
321 		       base =16;
322 
323 		       if ( len <= 2)
324 		       {
325 			  locungetc(lookahead);
326 			  len -= 1;            /* Take into account the 'x'*/
327                        }
328 		       else
329 		       {
330 		          c = lookahead;
331 			  len -= 2;           /* Take into account '0x'*/
332 		       }
333                    }
334 	           else
335 	           {
336 	               locungetc(lookahead);
337 	               locungetc(inchar);
338                    }
339 		}
340 	        else
341 	        {
342 		    locungetc(inchar);
343 	            base = 8;
344                 }
345 	}
346 	if (!negflg || type != 'u')
347 	    for (; --len  >= 0 ; *np++ = c, c = locgetc())
348 	    {
349 		if (np > numbuf + 62)
350 		{
351 		    errno = ERANGE;
352 		    return(0);
353                 }
354 		if (isdigit(c))
355 		{
356 			int digit;
357 			digit = c - '0';
358 			if (base == 8)
359 			{
360 				if (digit >= 8)
361 					break;
362 				if (stow)
363 					lcval = (lcval<<3) + digit;
364 			}
365 			else
366 			{
367 				if (stow)
368 				{
369 					if (base == 10)
370 						lcval = (((lcval<<2) + lcval)<<1) + digit;
371 					else /* base == 16 */
372 						lcval = (lcval<<4) + digit;
373 				}
374 			}
375 			digitseen++;
376 
377 
378 			continue;
379 		}
380 		else if (base == 16 && isxdigit(c))
381 		{
382 			int digit;
383 			digit = c - (isupper(c) ? 'A' - 10 : 'a' - 10);
384 			if (stow)
385 				lcval = (lcval<<4) + digit;
386 			digitseen++;
387 			continue;
388 		}
389 		break;
390 	    }
391 
392 
393 	if (stow && digitseen)
394 		{
395 	 	/* suppress possible overflow on 2's-comp negation */
396 			if (negflg && lcval != HIBITL)
397 				lcval = -lcval;
398 			if (size == 'l')
399 				*va_arg(*listp, long *) = lcval;
400 			else if (size == 'h')
401 				*va_arg(*listp, short *) = (short)lcval;
402 			else
403 				*va_arg(*listp, int *) = (int)lcval;
404 		}
405 	if (c == EOF) {
406 		chcount--;
407 		flag_eof=1;
408 	} else if (locungetc(c) == EOF)
409 		flag_eof=1;
410 	return (digitseen); /* successful match if non-zero */
411 }
412 
413 static int
414 string(int stow, int type, int len, char *tab, FILE *iop, va_list *listp)
415 {
416 	int ch;
417 	char *ptr;
418 	char *start;
419 
420 	start = ptr = stow ? va_arg(*listp, char *) : NULL;
421 	if (type == 's')
422 	{
423 		if (!flag_eof)
424 		{
425 			while (isws(ch = locgetc()))
426 				;
427 		}
428 		else
429 			ch = locgetc();
430 		if (ch == EOF)
431 			return(-1);	/* EOF before match */
432 		while (ch != EOF && !isws(ch))
433 		{
434 			if (stow)
435 				*ptr = ch;
436 			ptr++;
437 			if (--len <= 0)
438 				break;
439 			ch = locgetc();
440 		}
441 	} else if (type == 'c') {
442 		if (len == MAXINT)
443 			len = 1;
444 		while ( (ch = locgetc()) != EOF)
445 		{
446 			if (stow)
447 				*ptr = ch;
448 			ptr++;
449 			if (--len <= 0)
450 				break;
451 		}
452 	} else { /* type == '[' */
453 		while ( (ch = locgetc()) != EOF && !tab[ch])
454 		{
455 			if (stow)
456 				*ptr = ch;
457 			ptr++;
458 			if (--len <= 0)
459 				break;
460 		}
461 	}
462 	if (ch == EOF )
463 	{
464 		chcount-=1;
465 		flag_eof = 1;
466 	}
467 	else if (len > 0 && locungetc(ch) == EOF)
468 		flag_eof = 1;
469 	if (ptr == start)
470 		return(0); /* no match */
471 	if (stow && type != 'c')
472 		*ptr = '\0';
473 	return (1); /* successful match */
474 }
475 
476 static unsigned char *
477 setup(unsigned char *fmt, char *tab)
478 {
479 	int b, c, d, t = 0;
480 
481 	if (*fmt == '^')
482 	{
483 		t++;
484 		fmt++;
485 	}
486 	(void) memset(tab, !t, NCHARS);
487 	if ( (c = *fmt) == ']' || c == '-')  /* first char is special */
488 	{
489 		tab[c] = t;
490 		fmt++;
491 	}
492 	while ( (c = *fmt++) != ']')
493 	{
494 		if (c == '\0')
495 			return(NULL); /* unexpected end of format */
496 		if (c == '-' && (d = *fmt) != ']' && (b = fmt[-2]) < d)
497 		{
498 			(void) memset(&tab[b], t, d - b + 1);
499 			fmt++;
500 		}
501 		else
502 			tab[c] = t;
503 	}
504 	return (fmt);
505 }
506