1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Chris Torek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * From: Id: vfscanf.c,v 1.13 1998/09/25 12:20:27 obrien Exp 35 */ 36 37 #include <sys/cdefs.h> 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/ctype.h> 41 #include <sys/limits.h> 42 #include <sys/stddef.h> 43 44 /* 45 * Note that stdarg.h and the ANSI style va_start macro is used for both 46 * ANSI and traditional C compilers. 47 */ 48 #include <machine/stdarg.h> 49 50 #define BUF 32 /* Maximum length of numeric string. */ 51 52 /* 53 * Flags used during conversion. 54 */ 55 #define LONG 0x01 /* l: long or double */ 56 #define SHORT 0x04 /* h: short */ 57 #define SUPPRESS 0x08 /* suppress assignment */ 58 #define POINTER 0x10 /* weird %p pointer (`fake hex') */ 59 #define NOSKIP 0x20 /* do not skip blanks */ 60 #define QUAD 0x400 61 #define INTMAXT 0x800 /* j: intmax_t */ 62 #define PTRDIFFT 0x1000 /* t: ptrdiff_t */ 63 #define SIZET 0x2000 /* z: size_t */ 64 #define SHORTSHORT 0x4000 /** hh: char */ 65 66 /* 67 * The following are used in numeric conversions only: 68 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; 69 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. 70 */ 71 #define SIGNOK 0x40 /* +/- is (still) legal */ 72 #define NDIGITS 0x80 /* no digits detected */ 73 74 #define DPTOK 0x100 /* (float) decimal point is still legal */ 75 #define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ 76 77 #define PFXOK 0x100 /* 0x prefix is (still) legal */ 78 #define NZDIGITS 0x200 /* no zero digits detected */ 79 80 /* 81 * Conversion types. 82 */ 83 #define CT_CHAR 0 /* %c conversion */ 84 #define CT_CCL 1 /* %[...] conversion */ 85 #define CT_STRING 2 /* %s conversion */ 86 #define CT_INT 3 /* integer, i.e., strtoq or strtouq */ 87 typedef u_quad_t (*ccfntype)(const char *, char **, int); 88 89 static const u_char *__sccl(char *, const u_char *); 90 91 int 92 sscanf(const char *ibuf, const char *fmt, ...) 93 { 94 va_list ap; 95 int ret; 96 97 va_start(ap, fmt); 98 ret = vsscanf(ibuf, fmt, ap); 99 va_end(ap); 100 return(ret); 101 } 102 103 int 104 vsscanf(const char *inp, char const *fmt0, va_list ap) 105 { 106 int inr; 107 const u_char *fmt = (const u_char *)fmt0; 108 int c; /* character from format, or conversion */ 109 size_t width; /* field width, or 0 */ 110 char *p; /* points into all kinds of strings */ 111 int n; /* handy integer */ 112 int flags; /* flags as defined above */ 113 char *p0; /* saves original value of p when necessary */ 114 int nassigned; /* number of fields assigned */ 115 int nconversions; /* number of conversions */ 116 int nread; /* number of characters consumed from fp */ 117 int base; /* base argument to strtoq/strtouq */ 118 ccfntype ccfn; /* conversion function (strtoq/strtouq) */ 119 char ccltab[256]; /* character class table for %[...] */ 120 char buf[BUF]; /* buffer for numeric conversions */ 121 122 /* `basefix' is used to avoid `if' tests in the integer scanner */ 123 static short basefix[17] = 124 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 125 126 inr = strlen(inp); 127 128 nassigned = 0; 129 nconversions = 0; 130 nread = 0; 131 base = 0; /* XXX just to keep gcc happy */ 132 ccfn = NULL; /* XXX just to keep gcc happy */ 133 for (;;) { 134 c = *fmt++; 135 if (c == 0) 136 return (nassigned); 137 if (isspace(c)) { 138 while (inr > 0 && isspace(*inp)) 139 nread++, inr--, inp++; 140 continue; 141 } 142 if (c != '%') 143 goto literal; 144 width = 0; 145 flags = 0; 146 /* 147 * switch on the format. continue if done; 148 * break once format type is derived. 149 */ 150 again: c = *fmt++; 151 switch (c) { 152 case '%': 153 literal: 154 if (inr <= 0) 155 goto input_failure; 156 if (*inp != c) 157 goto match_failure; 158 inr--, inp++; 159 nread++; 160 continue; 161 162 case '*': 163 flags |= SUPPRESS; 164 goto again; 165 case 'j': 166 flags |= INTMAXT; 167 goto again; 168 case 'l': 169 if (flags & LONG){ 170 flags &= ~LONG; 171 flags |= QUAD; 172 } else { 173 flags |= LONG; 174 } 175 goto again; 176 case 'q': 177 flags |= QUAD; 178 goto again; 179 case 't': 180 flags |= PTRDIFFT; 181 goto again; 182 case 'z': 183 flags |= SIZET; 184 goto again; 185 case 'h': 186 if (flags & SHORT){ 187 flags &= ~SHORT; 188 flags |= SHORTSHORT; 189 } else { 190 flags |= SHORT; 191 } 192 goto again; 193 194 case '0': case '1': case '2': case '3': case '4': 195 case '5': case '6': case '7': case '8': case '9': 196 width = width * 10 + c - '0'; 197 goto again; 198 199 /* 200 * Conversions. 201 * 202 */ 203 case 'd': 204 c = CT_INT; 205 ccfn = (ccfntype)strtoq; 206 base = 10; 207 break; 208 209 case 'i': 210 c = CT_INT; 211 ccfn = (ccfntype)strtoq; 212 base = 0; 213 break; 214 215 case 'o': 216 c = CT_INT; 217 ccfn = strtouq; 218 base = 8; 219 break; 220 221 case 'u': 222 c = CT_INT; 223 ccfn = strtouq; 224 base = 10; 225 break; 226 227 case 'x': 228 flags |= PFXOK; /* enable 0x prefixing */ 229 c = CT_INT; 230 ccfn = strtouq; 231 base = 16; 232 break; 233 234 case 's': 235 c = CT_STRING; 236 break; 237 238 case '[': 239 fmt = __sccl(ccltab, fmt); 240 flags |= NOSKIP; 241 c = CT_CCL; 242 break; 243 244 case 'c': 245 flags |= NOSKIP; 246 c = CT_CHAR; 247 break; 248 249 case 'p': /* pointer format is like hex */ 250 flags |= POINTER | PFXOK; 251 c = CT_INT; 252 ccfn = strtouq; 253 base = 16; 254 break; 255 256 case 'n': 257 nconversions++; 258 if (flags & SUPPRESS) /* ??? */ 259 continue; 260 if (flags & SHORTSHORT) 261 *va_arg(ap, char *) = nread; 262 else if (flags & SHORT) 263 *va_arg(ap, short *) = nread; 264 else if (flags & LONG) 265 *va_arg(ap, long *) = nread; 266 else if (flags & QUAD) 267 *va_arg(ap, quad_t *) = nread; 268 else if (flags & INTMAXT) 269 *va_arg(ap, intmax_t *) = nread; 270 else if (flags & SIZET) 271 *va_arg(ap, size_t *) = nread; 272 else if (flags & PTRDIFFT) 273 *va_arg(ap, ptrdiff_t *) = nread; 274 else 275 *va_arg(ap, int *) = nread; 276 continue; 277 } 278 279 /* 280 * We have a conversion that requires input. 281 */ 282 if (inr <= 0) 283 goto input_failure; 284 285 /* 286 * Consume leading white space, except for formats 287 * that suppress this. 288 */ 289 if ((flags & NOSKIP) == 0) { 290 while (isspace(*inp)) { 291 nread++; 292 if (--inr > 0) 293 inp++; 294 else 295 goto input_failure; 296 } 297 /* 298 * Note that there is at least one character in 299 * the buffer, so conversions that do not set NOSKIP 300 * can no longer result in an input failure. 301 */ 302 } 303 304 /* 305 * Do the conversion. 306 */ 307 switch (c) { 308 case CT_CHAR: 309 /* scan arbitrary characters (sets NOSKIP) */ 310 if (width == 0) 311 width = 1; 312 if (flags & SUPPRESS) { 313 size_t sum = 0; 314 for (;;) { 315 if ((n = inr) < width) { 316 sum += n; 317 width -= n; 318 inp += n; 319 if (sum == 0) 320 goto input_failure; 321 break; 322 } else { 323 sum += width; 324 inr -= width; 325 inp += width; 326 break; 327 } 328 } 329 nread += sum; 330 } else { 331 bcopy(inp, va_arg(ap, char *), width); 332 inr -= width; 333 inp += width; 334 nread += width; 335 nassigned++; 336 } 337 nconversions++; 338 break; 339 340 case CT_CCL: 341 /* scan a (nonempty) character class (sets NOSKIP) */ 342 if (width == 0) 343 width = (size_t)~0; /* `infinity' */ 344 /* take only those things in the class */ 345 if (flags & SUPPRESS) { 346 n = 0; 347 while (ccltab[(unsigned char)*inp]) { 348 n++, inr--, inp++; 349 if (--width == 0) 350 break; 351 if (inr <= 0) { 352 if (n == 0) 353 goto input_failure; 354 break; 355 } 356 } 357 if (n == 0) 358 goto match_failure; 359 } else { 360 p0 = p = va_arg(ap, char *); 361 while (ccltab[(unsigned char)*inp]) { 362 inr--; 363 *p++ = *inp++; 364 if (--width == 0) 365 break; 366 if (inr <= 0) { 367 if (p == p0) 368 goto input_failure; 369 break; 370 } 371 } 372 n = p - p0; 373 if (n == 0) 374 goto match_failure; 375 *p = 0; 376 nassigned++; 377 } 378 nread += n; 379 nconversions++; 380 break; 381 382 case CT_STRING: 383 /* like CCL, but zero-length string OK, & no NOSKIP */ 384 if (width == 0) 385 width = (size_t)~0; 386 if (flags & SUPPRESS) { 387 n = 0; 388 while (!isspace(*inp)) { 389 n++, inr--, inp++; 390 if (--width == 0) 391 break; 392 if (inr <= 0) 393 break; 394 } 395 nread += n; 396 } else { 397 p0 = p = va_arg(ap, char *); 398 while (!isspace(*inp)) { 399 inr--; 400 *p++ = *inp++; 401 if (--width == 0) 402 break; 403 if (inr <= 0) 404 break; 405 } 406 *p = 0; 407 nread += p - p0; 408 nassigned++; 409 } 410 nconversions++; 411 continue; 412 413 case CT_INT: 414 /* scan an integer as if by strtoq/strtouq */ 415 #ifdef hardway 416 if (width == 0 || width > sizeof(buf) - 1) 417 width = sizeof(buf) - 1; 418 #else 419 /* size_t is unsigned, hence this optimisation */ 420 if (--width > sizeof(buf) - 2) 421 width = sizeof(buf) - 2; 422 width++; 423 #endif 424 flags |= SIGNOK | NDIGITS | NZDIGITS; 425 for (p = buf; width; width--) { 426 c = *inp; 427 /* 428 * Switch on the character; `goto ok' 429 * if we accept it as a part of number. 430 */ 431 switch (c) { 432 /* 433 * The digit 0 is always legal, but is 434 * special. For %i conversions, if no 435 * digits (zero or nonzero) have been 436 * scanned (only signs), we will have 437 * base==0. In that case, we should set 438 * it to 8 and enable 0x prefixing. 439 * Also, if we have not scanned zero digits 440 * before this, do not turn off prefixing 441 * (someone else will turn it off if we 442 * have scanned any nonzero digits). 443 */ 444 case '0': 445 if (base == 0) { 446 base = 8; 447 flags |= PFXOK; 448 } 449 if (flags & NZDIGITS) 450 flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 451 else 452 flags &= ~(SIGNOK|PFXOK|NDIGITS); 453 goto ok; 454 455 /* 1 through 7 always legal */ 456 case '1': case '2': case '3': 457 case '4': case '5': case '6': case '7': 458 base = basefix[base]; 459 flags &= ~(SIGNOK | PFXOK | NDIGITS); 460 goto ok; 461 462 /* digits 8 and 9 ok iff decimal or hex */ 463 case '8': case '9': 464 base = basefix[base]; 465 if (base <= 8) 466 break; /* not legal here */ 467 flags &= ~(SIGNOK | PFXOK | NDIGITS); 468 goto ok; 469 470 /* letters ok iff hex */ 471 case 'A': case 'B': case 'C': 472 case 'D': case 'E': case 'F': 473 case 'a': case 'b': case 'c': 474 case 'd': case 'e': case 'f': 475 /* no need to fix base here */ 476 if (base <= 10) 477 break; /* not legal here */ 478 flags &= ~(SIGNOK | PFXOK | NDIGITS); 479 goto ok; 480 481 /* sign ok only as first character */ 482 case '+': case '-': 483 if (flags & SIGNOK) { 484 flags &= ~SIGNOK; 485 goto ok; 486 } 487 break; 488 489 /* x ok iff flag still set & 2nd char */ 490 case 'x': case 'X': 491 if (flags & PFXOK && p == buf + 1) { 492 base = 16; /* if %i */ 493 flags &= ~PFXOK; 494 goto ok; 495 } 496 break; 497 } 498 499 /* 500 * If we got here, c is not a legal character 501 * for a number. Stop accumulating digits. 502 */ 503 break; 504 ok: 505 /* 506 * c is legal: store it and look at the next. 507 */ 508 *p++ = c; 509 if (--inr > 0) 510 inp++; 511 else 512 break; /* end of input */ 513 } 514 /* 515 * If we had only a sign, it is no good; push 516 * back the sign. If the number ends in `x', 517 * it was [sign] '0' 'x', so push back the x 518 * and treat it as [sign] '0'. 519 */ 520 if (flags & NDIGITS) { 521 if (p > buf) { 522 inp--; 523 inr++; 524 } 525 goto match_failure; 526 } 527 c = ((u_char *)p)[-1]; 528 if (c == 'x' || c == 'X') { 529 --p; 530 inp--; 531 inr++; 532 } 533 if ((flags & SUPPRESS) == 0) { 534 u_quad_t res; 535 536 *p = 0; 537 res = (*ccfn)(buf, (char **)NULL, base); 538 if (flags & POINTER) 539 *va_arg(ap, void **) = 540 (void *)(uintptr_t)res; 541 else if (flags & SHORTSHORT) 542 *va_arg(ap, char *) = res; 543 else if (flags & SHORT) 544 *va_arg(ap, short *) = res; 545 else if (flags & LONG) 546 *va_arg(ap, long *) = res; 547 else if (flags & QUAD) 548 *va_arg(ap, quad_t *) = res; 549 else if (flags & INTMAXT) 550 *va_arg(ap, intmax_t *) = res; 551 else if (flags & PTRDIFFT) 552 *va_arg(ap, ptrdiff_t *) = res; 553 else if (flags & SIZET) 554 *va_arg(ap, size_t *) = res; 555 else 556 *va_arg(ap, int *) = res; 557 nassigned++; 558 } 559 nread += p - buf; 560 nconversions++; 561 break; 562 } 563 } 564 input_failure: 565 return (nconversions != 0 ? nassigned : -1); 566 match_failure: 567 return (nassigned); 568 } 569 570 /* 571 * Fill in the given table from the scanset at the given format 572 * (just after `['). Return a pointer to the character past the 573 * closing `]'. The table has a 1 wherever characters should be 574 * considered part of the scanset. 575 */ 576 static const u_char * 577 __sccl(char *tab, const u_char *fmt) 578 { 579 int c, n, v; 580 581 /* first `clear' the whole table */ 582 c = *fmt++; /* first char hat => negated scanset */ 583 if (c == '^') { 584 v = 1; /* default => accept */ 585 c = *fmt++; /* get new first char */ 586 } else 587 v = 0; /* default => reject */ 588 589 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ 590 for (n = 0; n < 256; n++) 591 tab[n] = v; /* memset(tab, v, 256) */ 592 593 if (c == 0) 594 return (fmt - 1);/* format ended before closing ] */ 595 596 /* 597 * Now set the entries corresponding to the actual scanset 598 * to the opposite of the above. 599 * 600 * The first character may be ']' (or '-') without being special; 601 * the last character may be '-'. 602 */ 603 v = 1 - v; 604 for (;;) { 605 tab[c] = v; /* take character c */ 606 doswitch: 607 n = *fmt++; /* and examine the next */ 608 switch (n) { 609 case 0: /* format ended too soon */ 610 return (fmt - 1); 611 612 case '-': 613 /* 614 * A scanset of the form 615 * [01+-] 616 * is defined as `the digit 0, the digit 1, 617 * the character +, the character -', but 618 * the effect of a scanset such as 619 * [a-zA-Z0-9] 620 * is implementation defined. The V7 Unix 621 * scanf treats `a-z' as `the letters a through 622 * z', but treats `a-a' as `the letter a, the 623 * character -, and the letter a'. 624 * 625 * For compatibility, the `-' is not considered 626 * to define a range if the character following 627 * it is either a close bracket (required by ANSI) 628 * or is not numerically greater than the character 629 * we just stored in the table (c). 630 */ 631 n = *fmt; 632 if (n == ']' || n < c) { 633 c = '-'; 634 break; /* resume the for(;;) */ 635 } 636 fmt++; 637 /* fill in the range */ 638 do { 639 tab[++c] = v; 640 } while (c < n); 641 c = n; 642 /* 643 * Alas, the V7 Unix scanf also treats formats 644 * such as [a-c-e] as `the letters a through e'. 645 * This too is permitted by the standard.... 646 */ 647 goto doswitch; 648 break; 649 650 case ']': /* end of scanset */ 651 return (fmt); 652 653 default: /* just another character */ 654 c = n; 655 break; 656 } 657 } 658 /* NOTREACHED */ 659 } 660