1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Chris Torek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * From: Id: vfscanf.c,v 1.13 1998/09/25 12:20:27 obrien Exp 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/ctype.h> 40 #include <sys/limits.h> 41 #include <sys/stddef.h> 42 43 /* 44 * Note that stdarg.h and the ANSI style va_start macro is used for both 45 * ANSI and traditional C compilers. 46 */ 47 #include <machine/stdarg.h> 48 49 #define BUF 32 /* Maximum length of numeric string. */ 50 51 /* 52 * Flags used during conversion. 53 */ 54 #define LONG 0x01 /* l: long or double */ 55 #define SHORT 0x04 /* h: short */ 56 #define SUPPRESS 0x08 /* suppress assignment */ 57 #define POINTER 0x10 /* weird %p pointer (`fake hex') */ 58 #define NOSKIP 0x20 /* do not skip blanks */ 59 #define QUAD 0x400 60 #define INTMAXT 0x800 /* j: intmax_t */ 61 #define PTRDIFFT 0x1000 /* t: ptrdiff_t */ 62 #define SIZET 0x2000 /* z: size_t */ 63 #define SHORTSHORT 0x4000 /** hh: char */ 64 65 /* 66 * The following are used in numeric conversions only: 67 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; 68 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. 69 */ 70 #define SIGNOK 0x40 /* +/- is (still) legal */ 71 #define NDIGITS 0x80 /* no digits detected */ 72 73 #define DPTOK 0x100 /* (float) decimal point is still legal */ 74 #define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ 75 76 #define PFXOK 0x100 /* 0x prefix is (still) legal */ 77 #define NZDIGITS 0x200 /* no zero digits detected */ 78 79 /* 80 * Conversion types. 81 */ 82 #define CT_CHAR 0 /* %c conversion */ 83 #define CT_CCL 1 /* %[...] conversion */ 84 #define CT_STRING 2 /* %s conversion */ 85 #define CT_INT 3 /* integer, i.e., strtoq or strtouq */ 86 typedef u_quad_t (*ccfntype)(const char *, char **, int); 87 88 static const u_char *__sccl(char *, const u_char *); 89 90 int 91 sscanf(const char *ibuf, const char *fmt, ...) 92 { 93 va_list ap; 94 int ret; 95 96 va_start(ap, fmt); 97 ret = vsscanf(ibuf, fmt, ap); 98 va_end(ap); 99 return(ret); 100 } 101 102 int 103 vsscanf(const char *inp, char const *fmt0, va_list ap) 104 { 105 int inr; 106 const u_char *fmt = (const u_char *)fmt0; 107 int c; /* character from format, or conversion */ 108 size_t width; /* field width, or 0 */ 109 char *p; /* points into all kinds of strings */ 110 int n; /* handy integer */ 111 int flags; /* flags as defined above */ 112 char *p0; /* saves original value of p when necessary */ 113 int nassigned; /* number of fields assigned */ 114 int nconversions; /* number of conversions */ 115 int nread; /* number of characters consumed from fp */ 116 int base; /* base argument to strtoq/strtouq */ 117 ccfntype ccfn; /* conversion function (strtoq/strtouq) */ 118 char ccltab[256]; /* character class table for %[...] */ 119 char buf[BUF]; /* buffer for numeric conversions */ 120 121 /* `basefix' is used to avoid `if' tests in the integer scanner */ 122 static short basefix[17] = 123 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 124 125 inr = strlen(inp); 126 127 nassigned = 0; 128 nconversions = 0; 129 nread = 0; 130 base = 0; /* XXX just to keep gcc happy */ 131 ccfn = NULL; /* XXX just to keep gcc happy */ 132 for (;;) { 133 c = *fmt++; 134 if (c == 0) 135 return (nassigned); 136 if (isspace(c)) { 137 while (inr > 0 && isspace(*inp)) 138 nread++, inr--, inp++; 139 continue; 140 } 141 if (c != '%') 142 goto literal; 143 width = 0; 144 flags = 0; 145 /* 146 * switch on the format. continue if done; 147 * break once format type is derived. 148 */ 149 again: c = *fmt++; 150 switch (c) { 151 case '%': 152 literal: 153 if (inr <= 0) 154 goto input_failure; 155 if (*inp != c) 156 goto match_failure; 157 inr--, inp++; 158 nread++; 159 continue; 160 161 case '*': 162 flags |= SUPPRESS; 163 goto again; 164 case 'j': 165 flags |= INTMAXT; 166 goto again; 167 case 'l': 168 if (flags & LONG){ 169 flags &= ~LONG; 170 flags |= QUAD; 171 } else { 172 flags |= LONG; 173 } 174 goto again; 175 case 'q': 176 flags |= QUAD; 177 goto again; 178 case 't': 179 flags |= PTRDIFFT; 180 goto again; 181 case 'z': 182 flags |= SIZET; 183 goto again; 184 case 'h': 185 if (flags & SHORT){ 186 flags &= ~SHORT; 187 flags |= SHORTSHORT; 188 } else { 189 flags |= SHORT; 190 } 191 goto again; 192 193 case '0': case '1': case '2': case '3': case '4': 194 case '5': case '6': case '7': case '8': case '9': 195 width = width * 10 + c - '0'; 196 goto again; 197 198 /* 199 * Conversions. 200 * 201 */ 202 case 'd': 203 c = CT_INT; 204 ccfn = (ccfntype)strtoq; 205 base = 10; 206 break; 207 208 case 'i': 209 c = CT_INT; 210 ccfn = (ccfntype)strtoq; 211 base = 0; 212 break; 213 214 case 'o': 215 c = CT_INT; 216 ccfn = strtouq; 217 base = 8; 218 break; 219 220 case 'u': 221 c = CT_INT; 222 ccfn = strtouq; 223 base = 10; 224 break; 225 226 case 'x': 227 flags |= PFXOK; /* enable 0x prefixing */ 228 c = CT_INT; 229 ccfn = strtouq; 230 base = 16; 231 break; 232 233 case 's': 234 c = CT_STRING; 235 break; 236 237 case '[': 238 fmt = __sccl(ccltab, fmt); 239 flags |= NOSKIP; 240 c = CT_CCL; 241 break; 242 243 case 'c': 244 flags |= NOSKIP; 245 c = CT_CHAR; 246 break; 247 248 case 'p': /* pointer format is like hex */ 249 flags |= POINTER | PFXOK; 250 c = CT_INT; 251 ccfn = strtouq; 252 base = 16; 253 break; 254 255 case 'n': 256 nconversions++; 257 if (flags & SUPPRESS) /* ??? */ 258 continue; 259 if (flags & SHORTSHORT) 260 *va_arg(ap, char *) = nread; 261 else if (flags & SHORT) 262 *va_arg(ap, short *) = nread; 263 else if (flags & LONG) 264 *va_arg(ap, long *) = nread; 265 else if (flags & QUAD) 266 *va_arg(ap, quad_t *) = nread; 267 else if (flags & INTMAXT) 268 *va_arg(ap, intmax_t *) = nread; 269 else if (flags & SIZET) 270 *va_arg(ap, size_t *) = nread; 271 else if (flags & PTRDIFFT) 272 *va_arg(ap, ptrdiff_t *) = nread; 273 else 274 *va_arg(ap, int *) = nread; 275 continue; 276 } 277 278 /* 279 * We have a conversion that requires input. 280 */ 281 if (inr <= 0) 282 goto input_failure; 283 284 /* 285 * Consume leading white space, except for formats 286 * that suppress this. 287 */ 288 if ((flags & NOSKIP) == 0) { 289 while (isspace(*inp)) { 290 nread++; 291 if (--inr > 0) 292 inp++; 293 else 294 goto input_failure; 295 } 296 /* 297 * Note that there is at least one character in 298 * the buffer, so conversions that do not set NOSKIP 299 * can no longer result in an input failure. 300 */ 301 } 302 303 /* 304 * Do the conversion. 305 */ 306 switch (c) { 307 case CT_CHAR: 308 /* scan arbitrary characters (sets NOSKIP) */ 309 if (width == 0) 310 width = 1; 311 if (flags & SUPPRESS) { 312 size_t sum = 0; 313 for (;;) { 314 if ((n = inr) < width) { 315 sum += n; 316 width -= n; 317 inp += n; 318 if (sum == 0) 319 goto input_failure; 320 break; 321 } else { 322 sum += width; 323 inr -= width; 324 inp += width; 325 break; 326 } 327 } 328 nread += sum; 329 } else { 330 bcopy(inp, va_arg(ap, char *), width); 331 inr -= width; 332 inp += width; 333 nread += width; 334 nassigned++; 335 } 336 nconversions++; 337 break; 338 339 case CT_CCL: 340 /* scan a (nonempty) character class (sets NOSKIP) */ 341 if (width == 0) 342 width = (size_t)~0; /* `infinity' */ 343 /* take only those things in the class */ 344 if (flags & SUPPRESS) { 345 n = 0; 346 while (ccltab[(unsigned char)*inp]) { 347 n++, inr--, inp++; 348 if (--width == 0) 349 break; 350 if (inr <= 0) { 351 if (n == 0) 352 goto input_failure; 353 break; 354 } 355 } 356 if (n == 0) 357 goto match_failure; 358 } else { 359 p0 = p = va_arg(ap, char *); 360 while (ccltab[(unsigned char)*inp]) { 361 inr--; 362 *p++ = *inp++; 363 if (--width == 0) 364 break; 365 if (inr <= 0) { 366 if (p == p0) 367 goto input_failure; 368 break; 369 } 370 } 371 n = p - p0; 372 if (n == 0) 373 goto match_failure; 374 *p = 0; 375 nassigned++; 376 } 377 nread += n; 378 nconversions++; 379 break; 380 381 case CT_STRING: 382 /* like CCL, but zero-length string OK, & no NOSKIP */ 383 if (width == 0) 384 width = (size_t)~0; 385 if (flags & SUPPRESS) { 386 n = 0; 387 while (!isspace(*inp)) { 388 n++, inr--, inp++; 389 if (--width == 0) 390 break; 391 if (inr <= 0) 392 break; 393 } 394 nread += n; 395 } else { 396 p0 = p = va_arg(ap, char *); 397 while (!isspace(*inp)) { 398 inr--; 399 *p++ = *inp++; 400 if (--width == 0) 401 break; 402 if (inr <= 0) 403 break; 404 } 405 *p = 0; 406 nread += p - p0; 407 nassigned++; 408 } 409 nconversions++; 410 continue; 411 412 case CT_INT: 413 /* scan an integer as if by strtoq/strtouq */ 414 #ifdef hardway 415 if (width == 0 || width > sizeof(buf) - 1) 416 width = sizeof(buf) - 1; 417 #else 418 /* size_t is unsigned, hence this optimisation */ 419 if (--width > sizeof(buf) - 2) 420 width = sizeof(buf) - 2; 421 width++; 422 #endif 423 flags |= SIGNOK | NDIGITS | NZDIGITS; 424 for (p = buf; width; width--) { 425 c = *inp; 426 /* 427 * Switch on the character; `goto ok' 428 * if we accept it as a part of number. 429 */ 430 switch (c) { 431 /* 432 * The digit 0 is always legal, but is 433 * special. For %i conversions, if no 434 * digits (zero or nonzero) have been 435 * scanned (only signs), we will have 436 * base==0. In that case, we should set 437 * it to 8 and enable 0x prefixing. 438 * Also, if we have not scanned zero digits 439 * before this, do not turn off prefixing 440 * (someone else will turn it off if we 441 * have scanned any nonzero digits). 442 */ 443 case '0': 444 if (base == 0) { 445 base = 8; 446 flags |= PFXOK; 447 } 448 if (flags & NZDIGITS) 449 flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 450 else 451 flags &= ~(SIGNOK|PFXOK|NDIGITS); 452 goto ok; 453 454 /* 1 through 7 always legal */ 455 case '1': case '2': case '3': 456 case '4': case '5': case '6': case '7': 457 base = basefix[base]; 458 flags &= ~(SIGNOK | PFXOK | NDIGITS); 459 goto ok; 460 461 /* digits 8 and 9 ok iff decimal or hex */ 462 case '8': case '9': 463 base = basefix[base]; 464 if (base <= 8) 465 break; /* not legal here */ 466 flags &= ~(SIGNOK | PFXOK | NDIGITS); 467 goto ok; 468 469 /* letters ok iff hex */ 470 case 'A': case 'B': case 'C': 471 case 'D': case 'E': case 'F': 472 case 'a': case 'b': case 'c': 473 case 'd': case 'e': case 'f': 474 /* no need to fix base here */ 475 if (base <= 10) 476 break; /* not legal here */ 477 flags &= ~(SIGNOK | PFXOK | NDIGITS); 478 goto ok; 479 480 /* sign ok only as first character */ 481 case '+': case '-': 482 if (flags & SIGNOK) { 483 flags &= ~SIGNOK; 484 goto ok; 485 } 486 break; 487 488 /* x ok iff flag still set & 2nd char */ 489 case 'x': case 'X': 490 if (flags & PFXOK && p == buf + 1) { 491 base = 16; /* if %i */ 492 flags &= ~PFXOK; 493 goto ok; 494 } 495 break; 496 } 497 498 /* 499 * If we got here, c is not a legal character 500 * for a number. Stop accumulating digits. 501 */ 502 break; 503 ok: 504 /* 505 * c is legal: store it and look at the next. 506 */ 507 *p++ = c; 508 if (--inr > 0) 509 inp++; 510 else 511 break; /* end of input */ 512 } 513 /* 514 * If we had only a sign, it is no good; push 515 * back the sign. If the number ends in `x', 516 * it was [sign] '0' 'x', so push back the x 517 * and treat it as [sign] '0'. 518 */ 519 if (flags & NDIGITS) { 520 if (p > buf) { 521 inp--; 522 inr++; 523 } 524 goto match_failure; 525 } 526 c = ((u_char *)p)[-1]; 527 if (c == 'x' || c == 'X') { 528 --p; 529 inp--; 530 inr++; 531 } 532 if ((flags & SUPPRESS) == 0) { 533 u_quad_t res; 534 535 *p = 0; 536 res = (*ccfn)(buf, (char **)NULL, base); 537 if (flags & POINTER) 538 *va_arg(ap, void **) = 539 (void *)(uintptr_t)res; 540 else if (flags & SHORTSHORT) 541 *va_arg(ap, char *) = res; 542 else if (flags & SHORT) 543 *va_arg(ap, short *) = res; 544 else if (flags & LONG) 545 *va_arg(ap, long *) = res; 546 else if (flags & QUAD) 547 *va_arg(ap, quad_t *) = res; 548 else if (flags & INTMAXT) 549 *va_arg(ap, intmax_t *) = res; 550 else if (flags & PTRDIFFT) 551 *va_arg(ap, ptrdiff_t *) = res; 552 else if (flags & SIZET) 553 *va_arg(ap, size_t *) = res; 554 else 555 *va_arg(ap, int *) = res; 556 nassigned++; 557 } 558 nread += p - buf; 559 nconversions++; 560 break; 561 } 562 } 563 input_failure: 564 return (nconversions != 0 ? nassigned : -1); 565 match_failure: 566 return (nassigned); 567 } 568 569 /* 570 * Fill in the given table from the scanset at the given format 571 * (just after `['). Return a pointer to the character past the 572 * closing `]'. The table has a 1 wherever characters should be 573 * considered part of the scanset. 574 */ 575 static const u_char * 576 __sccl(char *tab, const u_char *fmt) 577 { 578 int c, n, v; 579 580 /* first `clear' the whole table */ 581 c = *fmt++; /* first char hat => negated scanset */ 582 if (c == '^') { 583 v = 1; /* default => accept */ 584 c = *fmt++; /* get new first char */ 585 } else 586 v = 0; /* default => reject */ 587 588 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ 589 for (n = 0; n < 256; n++) 590 tab[n] = v; /* memset(tab, v, 256) */ 591 592 if (c == 0) 593 return (fmt - 1);/* format ended before closing ] */ 594 595 /* 596 * Now set the entries corresponding to the actual scanset 597 * to the opposite of the above. 598 * 599 * The first character may be ']' (or '-') without being special; 600 * the last character may be '-'. 601 */ 602 v = 1 - v; 603 for (;;) { 604 tab[c] = v; /* take character c */ 605 doswitch: 606 n = *fmt++; /* and examine the next */ 607 switch (n) { 608 case 0: /* format ended too soon */ 609 return (fmt - 1); 610 611 case '-': 612 /* 613 * A scanset of the form 614 * [01+-] 615 * is defined as `the digit 0, the digit 1, 616 * the character +, the character -', but 617 * the effect of a scanset such as 618 * [a-zA-Z0-9] 619 * is implementation defined. The V7 Unix 620 * scanf treats `a-z' as `the letters a through 621 * z', but treats `a-a' as `the letter a, the 622 * character -, and the letter a'. 623 * 624 * For compatibility, the `-' is not considered 625 * to define a range if the character following 626 * it is either a close bracket (required by ANSI) 627 * or is not numerically greater than the character 628 * we just stored in the table (c). 629 */ 630 n = *fmt; 631 if (n == ']' || n < c) { 632 c = '-'; 633 break; /* resume the for(;;) */ 634 } 635 fmt++; 636 /* fill in the range */ 637 do { 638 tab[++c] = v; 639 } while (c < n); 640 c = n; 641 /* 642 * Alas, the V7 Unix scanf also treats formats 643 * such as [a-c-e] as `the letters a through e'. 644 * This too is permitted by the standard.... 645 */ 646 goto doswitch; 647 break; 648 649 case ']': /* end of scanset */ 650 return (fmt); 651 652 default: /* just another character */ 653 c = n; 654 break; 655 } 656 } 657 /* NOTREACHED */ 658 } 659