xref: /freebsd/contrib/sendmail/libsm/vfscanf.c (revision 9f44a47fd07924afc035991af15d84e6585dea4f)
1 /*
2  * Copyright (c) 2000-2001, 2004 Proofpoint, Inc. and its suppliers.
3  *      All rights reserved.
4  * Copyright (c) 1990, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Chris Torek.
9  *
10  * By using this file, you agree to the terms and conditions set
11  * forth in the LICENSE file which can be found at the top level of
12  * the sendmail distribution.
13  */
14 
15 #include <sm/gen.h>
16 SM_IDSTR(id, "@(#)$Id: vfscanf.c,v 1.55 2013-11-22 20:51:44 ca Exp $")
17 
18 #include <ctype.h>
19 #include <stdlib.h>
20 #include <errno.h>
21 #include <setjmp.h>
22 #include <sm/time.h>
23 #include <sm/varargs.h>
24 #include <sm/config.h>
25 #include <sm/io.h>
26 #include <sm/signal.h>
27 #include <sm/clock.h>
28 #include <sm/string.h>
29 #include "local.h"
30 
31 #define BUF		513	/* Maximum length of numeric string. */
32 
33 /* Flags used during conversion. */
34 #define LONG		0x01	/* l: long or double */
35 #define SHORT		0x04	/* h: short */
36 #define QUAD		0x08	/* q: quad (same as ll) */
37 #define SUPPRESS	0x10	/* suppress assignment */
38 #define POINTER		0x20	/* weird %p pointer (`fake hex') */
39 #define NOSKIP		0x40	/* do not skip blanks */
40 
41 /*
42 **  The following are used in numeric conversions only:
43 **  SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
44 **  SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
45 */
46 
47 #define SIGNOK		0x080	/* +/- is (still) legal */
48 #define NDIGITS		0x100	/* no digits detected */
49 
50 #define DPTOK		0x200	/* (float) decimal point is still legal */
51 #define EXPOK		0x400	/* (float) exponent (e+3, etc) still legal */
52 
53 #define PFXOK		0x200	/* 0x prefix is (still) legal */
54 #define NZDIGITS	0x400	/* no zero digits detected */
55 
56 /* Conversion types. */
57 #define CT_CHAR		0	/* %c conversion */
58 #define CT_CCL		1	/* %[...] conversion */
59 #define CT_STRING	2	/* %s conversion */
60 #define CT_INT		3	/* integer, i.e., strtoll or strtoull */
61 #define CT_FLOAT	4	/* floating, i.e., strtod */
62 
63 static void		scanalrm __P((int));
64 static unsigned char	*sm_sccl __P((char *, unsigned char *));
65 static jmp_buf		ScanTimeOut;
66 
67 /*
68 **  SCANALRM -- handler when timeout activated for sm_io_vfscanf()
69 **
70 **  Returns flow of control to where setjmp(ScanTimeOut) was set.
71 **
72 **	Parameters:
73 **		sig -- unused
74 **
75 **	Returns:
76 **		does not return
77 **
78 **	Side Effects:
79 **		returns flow of control to setjmp(ScanTimeOut).
80 **
81 **	NOTE:	THIS CAN BE CALLED FROM A SIGNAL HANDLER.  DO NOT ADD
82 **		ANYTHING TO THIS ROUTINE UNLESS YOU KNOW WHAT YOU ARE
83 **		DOING.
84 */
85 
86 /* ARGSUSED0 */
87 static void
88 scanalrm(sig)
89 	int sig;
90 {
91 	longjmp(ScanTimeOut, 1);
92 }
93 
94 /*
95 **  SM_VFSCANF -- convert input into data units
96 **
97 **	Parameters:
98 **		fp -- file pointer for input data
99 **		timeout -- time intvl allowed to complete (milliseconds)
100 **		fmt0 -- format for finding data units
101 **		ap -- vectors for memory location for storing data units
102 **
103 **	Results:
104 **		Success: number of data units assigned
105 **		Failure: SM_IO_EOF
106 */
107 
108 int
109 sm_vfscanf(fp, timeout, fmt0, ap)
110 	register SM_FILE_T *fp;
111 	int SM_NONVOLATILE timeout;
112 	char const *fmt0;
113 	va_list ap;
114 {
115 	register unsigned char *SM_NONVOLATILE fmt = (unsigned char *) fmt0;
116 	register int c;		/* character from format, or conversion */
117 	register size_t width;	/* field width, or 0 */
118 	register char *p;	/* points into all kinds of strings */
119 	register int n;		/* handy integer */
120 	register int flags;	/* flags as defined above */
121 	register char *p0;	/* saves original value of p when necessary */
122 	int nassigned;		/* number of fields assigned */
123 	int nread;		/* number of characters consumed from fp */
124 	int base;		/* base argument to strtoll/strtoull */
125 
126 	/* conversion function (strtoll/strtoull) */
127 	ULONGLONG_T (*ccfn) __P((const char *, char **, int));
128 	char ccltab[256];	/* character class table for %[...] */
129 	char buf[BUF];		/* buffer for numeric conversions */
130 	SM_EVENT *evt = NULL;
131 
132 	/* `basefix' is used to avoid `if' tests in the integer scanner */
133 	static short basefix[17] =
134 		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
135 
136 	if (timeout == SM_TIME_DEFAULT)
137 		timeout = fp->f_timeout;
138 	if (timeout == SM_TIME_IMMEDIATE)
139 	{
140 		/*
141 		**  Filling the buffer will take time and we are wanted to
142 		**  return immediately. So...
143 		*/
144 
145 		errno = EAGAIN;
146 		return SM_IO_EOF;
147 	}
148 
149 	if (timeout != SM_TIME_FOREVER)
150 	{
151 		if (setjmp(ScanTimeOut) != 0)
152 		{
153 			errno = EAGAIN;
154 			return SM_IO_EOF;
155 		}
156 
157 		evt = sm_seteventm(timeout, scanalrm, 0);
158 	}
159 
160 	nassigned = 0;
161 	nread = 0;
162 	base = 0;		/* XXX just to keep gcc happy */
163 	ccfn = NULL;		/* XXX just to keep gcc happy */
164 	for (;;)
165 	{
166 		c = *fmt++;
167 		if (c == 0)
168 		{
169 			if (evt != NULL)
170 				sm_clrevent(evt); /*  undo our timeout */
171 			return nassigned;
172 		}
173 		if (isspace(c))
174 		{
175 			while ((fp->f_r > 0 || sm_refill(fp, SM_TIME_FOREVER)
176 						== 0) &&
177 			    isspace(*fp->f_p))
178 				nread++, fp->f_r--, fp->f_p++;
179 			continue;
180 		}
181 		if (c != '%')
182 			goto literal;
183 		width = 0;
184 		flags = 0;
185 
186 		/*
187 		**  switch on the format.  continue if done;
188 		**  break once format type is derived.
189 		*/
190 
191 again:		c = *fmt++;
192 		switch (c)
193 		{
194 		  case '%':
195 literal:
196 			if (fp->f_r <= 0 && sm_refill(fp, SM_TIME_FOREVER))
197 				goto input_failure;
198 			if (*fp->f_p != c)
199 				goto match_failure;
200 			fp->f_r--, fp->f_p++;
201 			nread++;
202 			continue;
203 
204 		  case '*':
205 			flags |= SUPPRESS;
206 			goto again;
207 		  case 'h':
208 			flags |= SHORT;
209 			goto again;
210 		  case 'l':
211 			if (*fmt == 'l')
212 			{
213 				fmt++;
214 				flags |= QUAD;
215 			}
216 			else
217 			{
218 				flags |= LONG;
219 			}
220 			goto again;
221 		  case 'q':
222 			flags |= QUAD;
223 			goto again;
224 
225 		  case '0': case '1': case '2': case '3': case '4':
226 		  case '5': case '6': case '7': case '8': case '9':
227 			width = width * 10 + c - '0';
228 			goto again;
229 
230 		/*
231 		**  Conversions.
232 		**  Those marked `compat' are for 4.[123]BSD compatibility.
233 		**
234 		**  (According to ANSI, E and X formats are supposed
235 		**  to the same as e and x.  Sorry about that.)
236 		*/
237 
238 		  case 'D':	/* compat */
239 			flags |= LONG;
240 			/* FALLTHROUGH */
241 		  case 'd':
242 			c = CT_INT;
243 			ccfn = (ULONGLONG_T (*)())sm_strtoll;
244 			base = 10;
245 			break;
246 
247 		  case 'i':
248 			c = CT_INT;
249 			ccfn = (ULONGLONG_T (*)())sm_strtoll;
250 			base = 0;
251 			break;
252 
253 		  case 'O':	/* compat */
254 			flags |= LONG;
255 			/* FALLTHROUGH */
256 		  case 'o':
257 			c = CT_INT;
258 			ccfn = sm_strtoull;
259 			base = 8;
260 			break;
261 
262 		  case 'u':
263 			c = CT_INT;
264 			ccfn = sm_strtoull;
265 			base = 10;
266 			break;
267 
268 		  case 'X':
269 		  case 'x':
270 			flags |= PFXOK;	/* enable 0x prefixing */
271 			c = CT_INT;
272 			ccfn = sm_strtoull;
273 			base = 16;
274 			break;
275 
276 		  case 'E':
277 		  case 'G':
278 		  case 'e':
279 		  case 'f':
280 		  case 'g':
281 			c = CT_FLOAT;
282 			break;
283 
284 		  case 's':
285 			c = CT_STRING;
286 			break;
287 
288 		  case '[':
289 			fmt = sm_sccl(ccltab, fmt);
290 			flags |= NOSKIP;
291 			c = CT_CCL;
292 			break;
293 
294 		  case 'c':
295 			flags |= NOSKIP;
296 			c = CT_CHAR;
297 			break;
298 
299 		  case 'p':	/* pointer format is like hex */
300 			flags |= POINTER | PFXOK;
301 			c = CT_INT;
302 			ccfn = sm_strtoull;
303 			base = 16;
304 			break;
305 
306 		  case 'n':
307 			if (flags & SUPPRESS)	/* ??? */
308 				continue;
309 			if (flags & SHORT)
310 				*SM_VA_ARG(ap, short *) = nread;
311 			else if (flags & LONG)
312 				*SM_VA_ARG(ap, long *) = nread;
313 			else
314 				*SM_VA_ARG(ap, int *) = nread;
315 			continue;
316 
317 		/* Disgusting backwards compatibility hacks.	XXX */
318 		  case '\0':	/* compat */
319 			if (evt != NULL)
320 				sm_clrevent(evt); /*  undo our timeout */
321 			return SM_IO_EOF;
322 
323 		  default:	/* compat */
324 			if (isupper(c))
325 				flags |= LONG;
326 			c = CT_INT;
327 			ccfn = (ULONGLONG_T (*)()) sm_strtoll;
328 			base = 10;
329 			break;
330 		}
331 
332 		/* We have a conversion that requires input. */
333 		if (fp->f_r <= 0 && sm_refill(fp, SM_TIME_FOREVER))
334 			goto input_failure;
335 
336 		/*
337 		**  Consume leading white space, except for formats
338 		**  that suppress this.
339 		*/
340 
341 		if ((flags & NOSKIP) == 0)
342 		{
343 			while (isspace(*fp->f_p))
344 			{
345 				nread++;
346 				if (--fp->f_r > 0)
347 					fp->f_p++;
348 				else if (sm_refill(fp, SM_TIME_FOREVER))
349 					goto input_failure;
350 			}
351 			/*
352 			**  Note that there is at least one character in
353 			**  the buffer, so conversions that do not set NOSKIP
354 			**  can no longer result in an input failure.
355 			*/
356 		}
357 
358 		/* Do the conversion. */
359 		switch (c)
360 		{
361 		  case CT_CHAR:
362 			/* scan arbitrary characters (sets NOSKIP) */
363 			if (width == 0)
364 				width = 1;
365 			if (flags & SUPPRESS)
366 			{
367 				size_t sum = 0;
368 				for (;;)
369 				{
370 					if ((size_t) (n = fp->f_r) < width)
371 					{
372 						sum += n;
373 						width -= n;
374 						fp->f_p += n;
375 						if (sm_refill(fp,
376 							      SM_TIME_FOREVER))
377 						{
378 							if (sum == 0)
379 								goto input_failure;
380 							break;
381 						}
382 					}
383 					else
384 					{
385 						sum += width;
386 						fp->f_r -= width;
387 						fp->f_p += width;
388 						break;
389 					}
390 				}
391 				nread += sum;
392 			}
393 			else
394 			{
395 				size_t r;
396 
397 				r = sm_io_read(fp, SM_TIME_FOREVER,
398 						(void *) SM_VA_ARG(ap, char *),
399 						width);
400 				if (r == 0)
401 					goto input_failure;
402 				nread += r;
403 				nassigned++;
404 			}
405 			break;
406 
407 		  case CT_CCL:
408 			/* scan a (nonempty) character class (sets NOSKIP) */
409 			if (width == 0)
410 				width = (size_t)~0;	/* `infinity' */
411 
412 			/* take only those things in the class */
413 			if (flags & SUPPRESS)
414 			{
415 				n = 0;
416 				while (ccltab[*fp->f_p] != '\0')
417 				{
418 					n++, fp->f_r--, fp->f_p++;
419 					if (--width == 0)
420 						break;
421 					if (fp->f_r <= 0 &&
422 					    sm_refill(fp, SM_TIME_FOREVER))
423 					{
424 						if (n == 0) /* XXX how? */
425 							goto input_failure;
426 						break;
427 					}
428 				}
429 				if (n == 0)
430 					goto match_failure;
431 			}
432 			else
433 			{
434 				p0 = p = SM_VA_ARG(ap, char *);
435 				while (ccltab[*fp->f_p] != '\0')
436 				{
437 					fp->f_r--;
438 					*p++ = *fp->f_p++;
439 					if (--width == 0)
440 						break;
441 					if (fp->f_r <= 0 &&
442 					    sm_refill(fp, SM_TIME_FOREVER))
443 					{
444 						if (p == p0)
445 							goto input_failure;
446 						break;
447 					}
448 				}
449 				n = p - p0;
450 				if (n == 0)
451 					goto match_failure;
452 				*p = 0;
453 				nassigned++;
454 			}
455 			nread += n;
456 			break;
457 
458 		  case CT_STRING:
459 			/* like CCL, but zero-length string OK, & no NOSKIP */
460 			if (width == 0)
461 				width = (size_t)~0;
462 			if (flags & SUPPRESS)
463 			{
464 				n = 0;
465 				while (!isspace(*fp->f_p))
466 				{
467 					n++, fp->f_r--, fp->f_p++;
468 					if (--width == 0)
469 						break;
470 					if (fp->f_r <= 0 &&
471 					    sm_refill(fp, SM_TIME_FOREVER))
472 						break;
473 				}
474 				nread += n;
475 			}
476 			else
477 			{
478 				p0 = p = SM_VA_ARG(ap, char *);
479 				while (!isspace(*fp->f_p))
480 				{
481 					fp->f_r--;
482 					*p++ = *fp->f_p++;
483 					if (--width == 0)
484 						break;
485 					if (fp->f_r <= 0 &&
486 					    sm_refill(fp, SM_TIME_FOREVER))
487 						break;
488 				}
489 				*p = 0;
490 				nread += p - p0;
491 				nassigned++;
492 			}
493 			continue;
494 
495 		  case CT_INT:
496 			/* scan an integer as if by strtoll/strtoull */
497 #if SM_CONF_BROKEN_SIZE_T
498 			if (width == 0 || width > sizeof(buf) - 1)
499 				width = sizeof(buf) - 1;
500 #else /* SM_CONF_BROKEN_SIZE_T */
501 			/* size_t is unsigned, hence this optimisation */
502 			if (--width > sizeof(buf) - 2)
503 				width = sizeof(buf) - 2;
504 			width++;
505 #endif /* SM_CONF_BROKEN_SIZE_T */
506 			flags |= SIGNOK | NDIGITS | NZDIGITS;
507 			for (p = buf; width > 0; width--)
508 			{
509 				c = *fp->f_p;
510 
511 				/*
512 				**  Switch on the character; `goto ok'
513 				**  if we accept it as a part of number.
514 				*/
515 
516 				switch (c)
517 				{
518 
519 				/*
520 				**  The digit 0 is always legal, but is
521 				**  special.  For %i conversions, if no
522 				**  digits (zero or nonzero) have been
523 				**  scanned (only signs), we will have
524 				**  base==0.  In that case, we should set
525 				**  it to 8 and enable 0x prefixing.
526 				**  Also, if we have not scanned zero digits
527 				**  before this, do not turn off prefixing
528 				**  (someone else will turn it off if we
529 				**  have scanned any nonzero digits).
530 				*/
531 
532 				  case '0':
533 					if (base == 0)
534 					{
535 						base = 8;
536 						flags |= PFXOK;
537 					}
538 					if (flags & NZDIGITS)
539 					    flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
540 					else
541 					    flags &= ~(SIGNOK|PFXOK|NDIGITS);
542 					goto ok;
543 
544 				/* 1 through 7 always legal */
545 				  case '1': case '2': case '3':
546 				  case '4': case '5': case '6': case '7':
547 					base = basefix[base];
548 					flags &= ~(SIGNOK | PFXOK | NDIGITS);
549 					goto ok;
550 
551 				/* digits 8 and 9 ok iff decimal or hex */
552 				  case '8': case '9':
553 					base = basefix[base];
554 					if (base <= 8)
555 						break;	/* not legal here */
556 					flags &= ~(SIGNOK | PFXOK | NDIGITS);
557 					goto ok;
558 
559 				/* letters ok iff hex */
560 				  case 'A': case 'B': case 'C':
561 				  case 'D': case 'E': case 'F':
562 				  case 'a': case 'b': case 'c':
563 				  case 'd': case 'e': case 'f':
564 
565 					/* no need to fix base here */
566 					if (base <= 10)
567 						break;	/* not legal here */
568 					flags &= ~(SIGNOK | PFXOK | NDIGITS);
569 					goto ok;
570 
571 				/* sign ok only as first character */
572 				  case '+': case '-':
573 					if (flags & SIGNOK)
574 					{
575 						flags &= ~SIGNOK;
576 						goto ok;
577 					}
578 					break;
579 
580 				/* x ok iff flag still set & 2nd char */
581 				  case 'x': case 'X':
582 					if (flags & PFXOK && p == buf + 1)
583 					{
584 						base = 16;	/* if %i */
585 						flags &= ~PFXOK;
586 						goto ok;
587 					}
588 					break;
589 				}
590 
591 				/*
592 				**  If we got here, c is not a legal character
593 				**  for a number.  Stop accumulating digits.
594 				*/
595 
596 				break;
597 		ok:
598 				/* c is legal: store it and look at the next. */
599 				*p++ = c;
600 				if (--fp->f_r > 0)
601 					fp->f_p++;
602 				else if (sm_refill(fp, SM_TIME_FOREVER))
603 					break;		/* SM_IO_EOF */
604 			}
605 
606 			/*
607 			**  If we had only a sign, it is no good; push
608 			**  back the sign.  If the number ends in `x',
609 			**  it was [sign] '0' 'x', so push back the x
610 			**  and treat it as [sign] '0'.
611 			*/
612 
613 			if (flags & NDIGITS)
614 			{
615 				if (p > buf)
616 					(void) sm_io_ungetc(fp, SM_TIME_DEFAULT,
617 							    *(unsigned char *)--p);
618 				goto match_failure;
619 			}
620 			c = ((unsigned char *)p)[-1];
621 			if (c == 'x' || c == 'X')
622 			{
623 				--p;
624 				(void) sm_io_ungetc(fp, SM_TIME_DEFAULT, c);
625 			}
626 			if ((flags & SUPPRESS) == 0)
627 			{
628 				ULONGLONG_T res;
629 
630 				*p = 0;
631 				res = (*ccfn)(buf, (char **)NULL, base);
632 				if (flags & POINTER)
633 					*SM_VA_ARG(ap, void **) =
634 					    (void *)(long) res;
635 				else if (flags & QUAD)
636 					*SM_VA_ARG(ap, LONGLONG_T *) = res;
637 				else if (flags & LONG)
638 					*SM_VA_ARG(ap, long *) = res;
639 				else if (flags & SHORT)
640 					*SM_VA_ARG(ap, short *) = res;
641 				else
642 					*SM_VA_ARG(ap, int *) = res;
643 				nassigned++;
644 			}
645 			nread += p - buf;
646 			break;
647 
648 		  case CT_FLOAT:
649 			/* scan a floating point number as if by strtod */
650 			if (width == 0 || width > sizeof(buf) - 1)
651 				width = sizeof(buf) - 1;
652 			flags |= SIGNOK | NDIGITS | DPTOK | EXPOK;
653 			for (p = buf; width; width--)
654 			{
655 				c = *fp->f_p;
656 
657 				/*
658 				**  This code mimicks the integer conversion
659 				**  code, but is much simpler.
660 				*/
661 
662 				switch (c)
663 				{
664 
665 				  case '0': case '1': case '2': case '3':
666 				  case '4': case '5': case '6': case '7':
667 				  case '8': case '9':
668 					flags &= ~(SIGNOK | NDIGITS);
669 					goto fok;
670 
671 				  case '+': case '-':
672 					if (flags & SIGNOK)
673 					{
674 						flags &= ~SIGNOK;
675 						goto fok;
676 					}
677 					break;
678 				  case '.':
679 					if (flags & DPTOK)
680 					{
681 						flags &= ~(SIGNOK | DPTOK);
682 						goto fok;
683 					}
684 					break;
685 				  case 'e': case 'E':
686 
687 					/* no exponent without some digits */
688 					if ((flags&(NDIGITS|EXPOK)) == EXPOK)
689 					{
690 						flags =
691 						    (flags & ~(EXPOK|DPTOK)) |
692 						    SIGNOK | NDIGITS;
693 						goto fok;
694 					}
695 					break;
696 				}
697 				break;
698 		fok:
699 				*p++ = c;
700 				if (--fp->f_r > 0)
701 					fp->f_p++;
702 				else if (sm_refill(fp, SM_TIME_FOREVER))
703 					break;	/* SM_IO_EOF */
704 			}
705 
706 			/*
707 			**  If no digits, might be missing exponent digits
708 			**  (just give back the exponent) or might be missing
709 			**  regular digits, but had sign and/or decimal point.
710 			*/
711 
712 			if (flags & NDIGITS)
713 			{
714 				if (flags & EXPOK)
715 				{
716 					/* no digits at all */
717 					while (p > buf)
718 						(void) sm_io_ungetc(fp,
719 							     SM_TIME_DEFAULT,
720 							     *(unsigned char *)--p);
721 					goto match_failure;
722 				}
723 
724 				/* just a bad exponent (e and maybe sign) */
725 				c = *(unsigned char *) --p;
726 				if (c != 'e' && c != 'E')
727 				{
728 					(void) sm_io_ungetc(fp, SM_TIME_DEFAULT,
729 							    c); /* sign */
730 					c = *(unsigned char *)--p;
731 				}
732 				(void) sm_io_ungetc(fp, SM_TIME_DEFAULT, c);
733 			}
734 			if ((flags & SUPPRESS) == 0)
735 			{
736 				double res;
737 
738 				*p = 0;
739 				res = strtod(buf, (char **) NULL);
740 				if (flags & LONG)
741 					*SM_VA_ARG(ap, double *) = res;
742 				else
743 					*SM_VA_ARG(ap, float *) = res;
744 				nassigned++;
745 			}
746 			nread += p - buf;
747 			break;
748 		}
749 	}
750 input_failure:
751 	if (evt != NULL)
752 		sm_clrevent(evt); /*  undo our timeout */
753 	return nassigned ? nassigned : -1;
754 match_failure:
755 	if (evt != NULL)
756 		sm_clrevent(evt); /*  undo our timeout */
757 	return nassigned;
758 }
759 
760 /*
761 **  SM_SCCL -- sequenced character comparison list
762 **
763 **  Fill in the given table from the scanset at the given format
764 **  (just after `[').  Return a pointer to the character past the
765 **  closing `]'.  The table has a 1 wherever characters should be
766 **  considered part of the scanset.
767 **
768 **	Parameters:
769 **		tab -- array flagging "active" char's to match (returned)
770 **		fmt -- character list (within "[]")
771 **
772 **	Results:
773 */
774 
775 static unsigned char *
776 sm_sccl(tab, fmt)
777 	register char *tab;
778 	register unsigned char *fmt;
779 {
780 	register int c, n, v;
781 
782 	/* first `clear' the whole table */
783 	c = *fmt++;		/* first char hat => negated scanset */
784 	if (c == '^')
785 	{
786 		v = 1;		/* default => accept */
787 		c = *fmt++;	/* get new first char */
788 	}
789 	else
790 		v = 0;		/* default => reject */
791 
792 	/* should probably use memset here */
793 	for (n = 0; n < 256; n++)
794 		tab[n] = v;
795 	if (c == 0)
796 		return fmt - 1;	/* format ended before closing ] */
797 
798 	/*
799 	**  Now set the entries corresponding to the actual scanset
800 	**  to the opposite of the above.
801 	**
802 	**  The first character may be ']' (or '-') without being special;
803 	**  the last character may be '-'.
804 	*/
805 
806 	v = 1 - v;
807 	for (;;)
808 	{
809 		tab[c] = v;		/* take character c */
810 doswitch:
811 		n = *fmt++;		/* and examine the next */
812 		switch (n)
813 		{
814 
815 		  case 0:			/* format ended too soon */
816 			return fmt - 1;
817 
818 		  case '-':
819 			/*
820 			**  A scanset of the form
821 			**	[01+-]
822 			**  is defined as `the digit 0, the digit 1,
823 			**  the character +, the character -', but
824 			**  the effect of a scanset such as
825 			**	[a-zA-Z0-9]
826 			**  is implementation defined.  The V7 Unix
827 			**  scanf treats `a-z' as `the letters a through
828 			**  z', but treats `a-a' as `the letter a, the
829 			**  character -, and the letter a'.
830 			**
831 			**  For compatibility, the `-' is not considered
832 			**  to define a range if the character following
833 			**  it is either a close bracket (required by ANSI)
834 			**  or is not numerically greater than the character
835 			**  we just stored in the table (c).
836 			*/
837 
838 			n = *fmt;
839 			if (n == ']' || n < c)
840 			{
841 				c = '-';
842 				break;	/* resume the for(;;) */
843 			}
844 			fmt++;
845 			do
846 			{
847 				/* fill in the range */
848 				tab[++c] = v;
849 			} while (c < n);
850 #if 1	/* XXX another disgusting compatibility hack */
851 
852 			/*
853 			**  Alas, the V7 Unix scanf also treats formats
854 			**  such as [a-c-e] as `the letters a through e'.
855 			**  This too is permitted by the standard....
856 			*/
857 
858 			goto doswitch;
859 #else
860 			c = *fmt++;
861 			if (c == 0)
862 				return fmt - 1;
863 			if (c == ']')
864 				return fmt;
865 			break;
866 #endif
867 
868 		  case ']':		/* end of scanset */
869 			return fmt;
870 
871 		  default:		/* just another character */
872 			c = n;
873 			break;
874 		}
875 	}
876 	/* NOTREACHED */
877 }
878