1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Chris Torek.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * From: Id: vfscanf.c,v 1.13 1998/09/25 12:20:27 obrien Exp
35 */
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/ctype.h>
40 #include <sys/limits.h>
41 #include <sys/stdarg.h>
42 #include <sys/stddef.h>
43
44 #define BUF 32 /* Maximum length of numeric string. */
45
46 /*
47 * Flags used during conversion.
48 */
49 #define LONG 0x01 /* l: long or double */
50 #define SHORT 0x04 /* h: short */
51 #define SUPPRESS 0x08 /* suppress assignment */
52 #define POINTER 0x10 /* weird %p pointer (`fake hex') */
53 #define NOSKIP 0x20 /* do not skip blanks */
54 #define QUAD 0x400
55 #define INTMAXT 0x800 /* j: intmax_t */
56 #define PTRDIFFT 0x1000 /* t: ptrdiff_t */
57 #define SIZET 0x2000 /* z: size_t */
58 #define SHORTSHORT 0x4000 /** hh: char */
59
60 /*
61 * The following are used in numeric conversions only:
62 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
63 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
64 */
65 #define SIGNOK 0x40 /* +/- is (still) legal */
66 #define NDIGITS 0x80 /* no digits detected */
67
68 #define DPTOK 0x100 /* (float) decimal point is still legal */
69 #define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */
70
71 #define PFXOK 0x100 /* 0x prefix is (still) legal */
72 #define NZDIGITS 0x200 /* no zero digits detected */
73
74 /*
75 * Conversion types.
76 */
77 #define CT_CHAR 0 /* %c conversion */
78 #define CT_CCL 1 /* %[...] conversion */
79 #define CT_STRING 2 /* %s conversion */
80 #define CT_INT 3 /* integer, i.e., strtoq or strtouq */
81 typedef u_quad_t (*ccfntype)(const char *, char **, int);
82
83 static const u_char *__sccl(char *, const u_char *);
84
85 int
sscanf(const char * ibuf,const char * fmt,...)86 sscanf(const char *ibuf, const char *fmt, ...)
87 {
88 va_list ap;
89 int ret;
90
91 va_start(ap, fmt);
92 ret = vsscanf(ibuf, fmt, ap);
93 va_end(ap);
94 return(ret);
95 }
96
97 int
vsscanf(const char * inp,char const * fmt0,va_list ap)98 vsscanf(const char *inp, char const *fmt0, va_list ap)
99 {
100 int inr;
101 const u_char *fmt = (const u_char *)fmt0;
102 int c; /* character from format, or conversion */
103 size_t width; /* field width, or 0 */
104 char *p; /* points into all kinds of strings */
105 int n; /* handy integer */
106 int flags; /* flags as defined above */
107 char *p0; /* saves original value of p when necessary */
108 int nassigned; /* number of fields assigned */
109 int nconversions; /* number of conversions */
110 int nread; /* number of characters consumed from fp */
111 int base; /* base argument to strtoq/strtouq */
112 ccfntype ccfn; /* conversion function (strtoq/strtouq) */
113 char ccltab[256]; /* character class table for %[...] */
114 char buf[BUF]; /* buffer for numeric conversions */
115
116 /* `basefix' is used to avoid `if' tests in the integer scanner */
117 static short basefix[17] =
118 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
119
120 inr = strlen(inp);
121
122 nassigned = 0;
123 nconversions = 0;
124 nread = 0;
125 base = 0; /* XXX just to keep gcc happy */
126 ccfn = NULL; /* XXX just to keep gcc happy */
127 for (;;) {
128 c = *fmt++;
129 if (c == 0)
130 return (nassigned);
131 if (isspace(c)) {
132 while (inr > 0 && isspace(*inp))
133 nread++, inr--, inp++;
134 continue;
135 }
136 if (c != '%')
137 goto literal;
138 width = 0;
139 flags = 0;
140 /*
141 * switch on the format. continue if done;
142 * break once format type is derived.
143 */
144 again: c = *fmt++;
145 switch (c) {
146 case '%':
147 literal:
148 if (inr <= 0)
149 goto input_failure;
150 if (*inp != c)
151 goto match_failure;
152 inr--, inp++;
153 nread++;
154 continue;
155
156 case '*':
157 flags |= SUPPRESS;
158 goto again;
159 case 'j':
160 flags |= INTMAXT;
161 goto again;
162 case 'l':
163 if (flags & LONG){
164 flags &= ~LONG;
165 flags |= QUAD;
166 } else {
167 flags |= LONG;
168 }
169 goto again;
170 case 'q':
171 flags |= QUAD;
172 goto again;
173 case 't':
174 flags |= PTRDIFFT;
175 goto again;
176 case 'z':
177 flags |= SIZET;
178 goto again;
179 case 'h':
180 if (flags & SHORT){
181 flags &= ~SHORT;
182 flags |= SHORTSHORT;
183 } else {
184 flags |= SHORT;
185 }
186 goto again;
187
188 case '0': case '1': case '2': case '3': case '4':
189 case '5': case '6': case '7': case '8': case '9':
190 width = width * 10 + c - '0';
191 goto again;
192
193 /*
194 * Conversions.
195 *
196 */
197 case 'd':
198 c = CT_INT;
199 ccfn = (ccfntype)strtoq;
200 base = 10;
201 break;
202
203 case 'i':
204 c = CT_INT;
205 ccfn = (ccfntype)strtoq;
206 base = 0;
207 break;
208
209 case 'o':
210 c = CT_INT;
211 ccfn = strtouq;
212 base = 8;
213 break;
214
215 case 'u':
216 c = CT_INT;
217 ccfn = strtouq;
218 base = 10;
219 break;
220
221 case 'x':
222 flags |= PFXOK; /* enable 0x prefixing */
223 c = CT_INT;
224 ccfn = strtouq;
225 base = 16;
226 break;
227
228 case 's':
229 c = CT_STRING;
230 break;
231
232 case '[':
233 fmt = __sccl(ccltab, fmt);
234 flags |= NOSKIP;
235 c = CT_CCL;
236 break;
237
238 case 'c':
239 flags |= NOSKIP;
240 c = CT_CHAR;
241 break;
242
243 case 'p': /* pointer format is like hex */
244 flags |= POINTER | PFXOK;
245 c = CT_INT;
246 ccfn = strtouq;
247 base = 16;
248 break;
249
250 case 'n':
251 nconversions++;
252 if (flags & SUPPRESS) /* ??? */
253 continue;
254 if (flags & SHORTSHORT)
255 *va_arg(ap, char *) = nread;
256 else if (flags & SHORT)
257 *va_arg(ap, short *) = nread;
258 else if (flags & LONG)
259 *va_arg(ap, long *) = nread;
260 else if (flags & QUAD)
261 *va_arg(ap, quad_t *) = nread;
262 else if (flags & INTMAXT)
263 *va_arg(ap, intmax_t *) = nread;
264 else if (flags & SIZET)
265 *va_arg(ap, size_t *) = nread;
266 else if (flags & PTRDIFFT)
267 *va_arg(ap, ptrdiff_t *) = nread;
268 else
269 *va_arg(ap, int *) = nread;
270 continue;
271 }
272
273 /*
274 * We have a conversion that requires input.
275 */
276 if (inr <= 0)
277 goto input_failure;
278
279 /*
280 * Consume leading white space, except for formats
281 * that suppress this.
282 */
283 if ((flags & NOSKIP) == 0) {
284 while (isspace(*inp)) {
285 nread++;
286 if (--inr > 0)
287 inp++;
288 else
289 goto input_failure;
290 }
291 /*
292 * Note that there is at least one character in
293 * the buffer, so conversions that do not set NOSKIP
294 * can no longer result in an input failure.
295 */
296 }
297
298 /*
299 * Do the conversion.
300 */
301 switch (c) {
302 case CT_CHAR:
303 /* scan arbitrary characters (sets NOSKIP) */
304 if (width == 0)
305 width = 1;
306 if (flags & SUPPRESS) {
307 size_t sum = 0;
308 for (;;) {
309 if ((n = inr) < width) {
310 sum += n;
311 width -= n;
312 inp += n;
313 if (sum == 0)
314 goto input_failure;
315 break;
316 } else {
317 sum += width;
318 inr -= width;
319 inp += width;
320 break;
321 }
322 }
323 nread += sum;
324 } else {
325 bcopy(inp, va_arg(ap, char *), width);
326 inr -= width;
327 inp += width;
328 nread += width;
329 nassigned++;
330 }
331 nconversions++;
332 break;
333
334 case CT_CCL:
335 /* scan a (nonempty) character class (sets NOSKIP) */
336 if (width == 0)
337 width = (size_t)~0; /* `infinity' */
338 /* take only those things in the class */
339 if (flags & SUPPRESS) {
340 n = 0;
341 while (ccltab[(unsigned char)*inp]) {
342 n++, inr--, inp++;
343 if (--width == 0)
344 break;
345 if (inr <= 0) {
346 if (n == 0)
347 goto input_failure;
348 break;
349 }
350 }
351 if (n == 0)
352 goto match_failure;
353 } else {
354 p0 = p = va_arg(ap, char *);
355 while (ccltab[(unsigned char)*inp]) {
356 inr--;
357 *p++ = *inp++;
358 if (--width == 0)
359 break;
360 if (inr <= 0) {
361 if (p == p0)
362 goto input_failure;
363 break;
364 }
365 }
366 n = p - p0;
367 if (n == 0)
368 goto match_failure;
369 *p = 0;
370 nassigned++;
371 }
372 nread += n;
373 nconversions++;
374 break;
375
376 case CT_STRING:
377 /* like CCL, but zero-length string OK, & no NOSKIP */
378 if (width == 0)
379 width = (size_t)~0;
380 if (flags & SUPPRESS) {
381 n = 0;
382 while (!isspace(*inp)) {
383 n++, inr--, inp++;
384 if (--width == 0)
385 break;
386 if (inr <= 0)
387 break;
388 }
389 nread += n;
390 } else {
391 p0 = p = va_arg(ap, char *);
392 while (!isspace(*inp)) {
393 inr--;
394 *p++ = *inp++;
395 if (--width == 0)
396 break;
397 if (inr <= 0)
398 break;
399 }
400 *p = 0;
401 nread += p - p0;
402 nassigned++;
403 }
404 nconversions++;
405 continue;
406
407 case CT_INT:
408 /* scan an integer as if by strtoq/strtouq */
409 #ifdef hardway
410 if (width == 0 || width > sizeof(buf) - 1)
411 width = sizeof(buf) - 1;
412 #else
413 /* size_t is unsigned, hence this optimisation */
414 if (--width > sizeof(buf) - 2)
415 width = sizeof(buf) - 2;
416 width++;
417 #endif
418 flags |= SIGNOK | NDIGITS | NZDIGITS;
419 for (p = buf; width; width--) {
420 c = *inp;
421 /*
422 * Switch on the character; `goto ok'
423 * if we accept it as a part of number.
424 */
425 switch (c) {
426 /*
427 * The digit 0 is always legal, but is
428 * special. For %i conversions, if no
429 * digits (zero or nonzero) have been
430 * scanned (only signs), we will have
431 * base==0. In that case, we should set
432 * it to 8 and enable 0x prefixing.
433 * Also, if we have not scanned zero digits
434 * before this, do not turn off prefixing
435 * (someone else will turn it off if we
436 * have scanned any nonzero digits).
437 */
438 case '0':
439 if (base == 0) {
440 base = 8;
441 flags |= PFXOK;
442 }
443 if (flags & NZDIGITS)
444 flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
445 else
446 flags &= ~(SIGNOK|PFXOK|NDIGITS);
447 goto ok;
448
449 /* 1 through 7 always legal */
450 case '1': case '2': case '3':
451 case '4': case '5': case '6': case '7':
452 base = basefix[base];
453 flags &= ~(SIGNOK | PFXOK | NDIGITS);
454 goto ok;
455
456 /* digits 8 and 9 ok iff decimal or hex */
457 case '8': case '9':
458 base = basefix[base];
459 if (base <= 8)
460 break; /* not legal here */
461 flags &= ~(SIGNOK | PFXOK | NDIGITS);
462 goto ok;
463
464 /* letters ok iff hex */
465 case 'A': case 'B': case 'C':
466 case 'D': case 'E': case 'F':
467 case 'a': case 'b': case 'c':
468 case 'd': case 'e': case 'f':
469 /* no need to fix base here */
470 if (base <= 10)
471 break; /* not legal here */
472 flags &= ~(SIGNOK | PFXOK | NDIGITS);
473 goto ok;
474
475 /* sign ok only as first character */
476 case '+': case '-':
477 if (flags & SIGNOK) {
478 flags &= ~SIGNOK;
479 goto ok;
480 }
481 break;
482
483 /* x ok iff flag still set & 2nd char */
484 case 'x': case 'X':
485 if (flags & PFXOK && p == buf + 1) {
486 base = 16; /* if %i */
487 flags &= ~PFXOK;
488 goto ok;
489 }
490 break;
491 }
492
493 /*
494 * If we got here, c is not a legal character
495 * for a number. Stop accumulating digits.
496 */
497 break;
498 ok:
499 /*
500 * c is legal: store it and look at the next.
501 */
502 *p++ = c;
503 if (--inr > 0)
504 inp++;
505 else
506 break; /* end of input */
507 }
508 /*
509 * If we had only a sign, it is no good; push
510 * back the sign. If the number ends in `x',
511 * it was [sign] '0' 'x', so push back the x
512 * and treat it as [sign] '0'.
513 */
514 if (flags & NDIGITS) {
515 if (p > buf) {
516 inp--;
517 inr++;
518 }
519 goto match_failure;
520 }
521 c = ((u_char *)p)[-1];
522 if (c == 'x' || c == 'X') {
523 --p;
524 inp--;
525 inr++;
526 }
527 if ((flags & SUPPRESS) == 0) {
528 u_quad_t res;
529
530 *p = 0;
531 res = (*ccfn)(buf, (char **)NULL, base);
532 if (flags & POINTER)
533 *va_arg(ap, void **) =
534 (void *)(uintptr_t)res;
535 else if (flags & SHORTSHORT)
536 *va_arg(ap, char *) = res;
537 else if (flags & SHORT)
538 *va_arg(ap, short *) = res;
539 else if (flags & LONG)
540 *va_arg(ap, long *) = res;
541 else if (flags & QUAD)
542 *va_arg(ap, quad_t *) = res;
543 else if (flags & INTMAXT)
544 *va_arg(ap, intmax_t *) = res;
545 else if (flags & PTRDIFFT)
546 *va_arg(ap, ptrdiff_t *) = res;
547 else if (flags & SIZET)
548 *va_arg(ap, size_t *) = res;
549 else
550 *va_arg(ap, int *) = res;
551 nassigned++;
552 }
553 nread += p - buf;
554 nconversions++;
555 break;
556 }
557 }
558 input_failure:
559 return (nconversions != 0 ? nassigned : -1);
560 match_failure:
561 return (nassigned);
562 }
563
564 /*
565 * Fill in the given table from the scanset at the given format
566 * (just after `['). Return a pointer to the character past the
567 * closing `]'. The table has a 1 wherever characters should be
568 * considered part of the scanset.
569 */
570 static const u_char *
__sccl(char * tab,const u_char * fmt)571 __sccl(char *tab, const u_char *fmt)
572 {
573 int c, n, v;
574
575 /* first `clear' the whole table */
576 c = *fmt++; /* first char hat => negated scanset */
577 if (c == '^') {
578 v = 1; /* default => accept */
579 c = *fmt++; /* get new first char */
580 } else
581 v = 0; /* default => reject */
582
583 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */
584 for (n = 0; n < 256; n++)
585 tab[n] = v; /* memset(tab, v, 256) */
586
587 if (c == 0)
588 return (fmt - 1);/* format ended before closing ] */
589
590 /*
591 * Now set the entries corresponding to the actual scanset
592 * to the opposite of the above.
593 *
594 * The first character may be ']' (or '-') without being special;
595 * the last character may be '-'.
596 */
597 v = 1 - v;
598 for (;;) {
599 tab[c] = v; /* take character c */
600 doswitch:
601 n = *fmt++; /* and examine the next */
602 switch (n) {
603 case 0: /* format ended too soon */
604 return (fmt - 1);
605
606 case '-':
607 /*
608 * A scanset of the form
609 * [01+-]
610 * is defined as `the digit 0, the digit 1,
611 * the character +, the character -', but
612 * the effect of a scanset such as
613 * [a-zA-Z0-9]
614 * is implementation defined. The V7 Unix
615 * scanf treats `a-z' as `the letters a through
616 * z', but treats `a-a' as `the letter a, the
617 * character -, and the letter a'.
618 *
619 * For compatibility, the `-' is not considered
620 * to define a range if the character following
621 * it is either a close bracket (required by ANSI)
622 * or is not numerically greater than the character
623 * we just stored in the table (c).
624 */
625 n = *fmt;
626 if (n == ']' || n < c) {
627 c = '-';
628 break; /* resume the for(;;) */
629 }
630 fmt++;
631 /* fill in the range */
632 do {
633 tab[++c] = v;
634 } while (c < n);
635 c = n;
636 /*
637 * Alas, the V7 Unix scanf also treats formats
638 * such as [a-c-e] as `the letters a through e'.
639 * This too is permitted by the standard....
640 */
641 goto doswitch;
642 break;
643
644 case ']': /* end of scanset */
645 return (fmt);
646
647 default: /* just another character */
648 c = n;
649 break;
650 }
651 }
652 /* NOTREACHED */
653 }
654