1 /*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Chris Torek. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * $FreeBSD$ 37 * From: Id: vfscanf.c,v 1.13 1998/09/25 12:20:27 obrien Exp 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <machine/limits.h> 43 44 /* 45 * Note that stdarg.h and the ANSI style va_start macro is used for both 46 * ANSI and traditional C compilers. 47 */ 48 #include <machine/stdarg.h> 49 50 #define BUF 32 /* Maximum length of numeric string. */ 51 52 /* 53 * Flags used during conversion. 54 */ 55 #define LONG 0x01 /* l: long or double */ 56 #define SHORT 0x04 /* h: short */ 57 #define SUPPRESS 0x08 /* suppress assignment */ 58 #define POINTER 0x10 /* weird %p pointer (`fake hex') */ 59 #define NOSKIP 0x20 /* do not skip blanks */ 60 #define QUAD 0x400 61 62 /* 63 * The following are used in numeric conversions only: 64 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; 65 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. 66 */ 67 #define SIGNOK 0x40 /* +/- is (still) legal */ 68 #define NDIGITS 0x80 /* no digits detected */ 69 70 #define DPTOK 0x100 /* (float) decimal point is still legal */ 71 #define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ 72 73 #define PFXOK 0x100 /* 0x prefix is (still) legal */ 74 #define NZDIGITS 0x200 /* no zero digits detected */ 75 76 /* 77 * Conversion types. 78 */ 79 #define CT_CHAR 0 /* %c conversion */ 80 #define CT_CCL 1 /* %[...] conversion */ 81 #define CT_STRING 2 /* %s conversion */ 82 #define CT_INT 3 /* integer, i.e., strtoq or strtouq */ 83 typedef u_quad_t (*ccfntype)(const char *, const char **, int); 84 85 #define isspace(c) ((c) == ' ' || (c) == '\t' || \ 86 (c) == '\r' || (c) == '\n') 87 #define isascii(c) (((c) & ~0x7f) == 0) 88 #define isupper(c) ((c) >= 'A' && (c) <= 'Z') 89 #define islower(c) ((c) >= 'a' && (c) <= 'z') 90 #define isalpha(c) (isupper(c) || (islower(c))) 91 #define isdigit(c) ((c) >= '0' && (c) <= '9') 92 93 static const u_char *__sccl(char *, const u_char *); 94 95 int 96 sscanf(const char *ibuf, const char *fmt, ...) 97 { 98 va_list ap; 99 int ret; 100 101 va_start(ap, fmt); 102 ret = vsscanf(ibuf, fmt, ap); 103 va_end(ap); 104 return(ret); 105 } 106 107 int 108 vsscanf(const char *inp, char const *fmt0, va_list ap) 109 { 110 int inr; 111 const u_char *fmt = (const u_char *)fmt0; 112 int c; /* character from format, or conversion */ 113 size_t width; /* field width, or 0 */ 114 char *p; /* points into all kinds of strings */ 115 int n; /* handy integer */ 116 int flags; /* flags as defined above */ 117 char *p0; /* saves original value of p when necessary */ 118 int nassigned; /* number of fields assigned */ 119 int nconversions; /* number of conversions */ 120 int nread; /* number of characters consumed from fp */ 121 int base; /* base argument to strtoq/strtouq */ 122 ccfntype ccfn; /* conversion function (strtoq/strtouq) */ 123 char ccltab[256]; /* character class table for %[...] */ 124 char buf[BUF]; /* buffer for numeric conversions */ 125 126 /* `basefix' is used to avoid `if' tests in the integer scanner */ 127 static short basefix[17] = 128 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 129 130 inr = strlen(inp); 131 132 nassigned = 0; 133 nconversions = 0; 134 nread = 0; 135 base = 0; /* XXX just to keep gcc happy */ 136 ccfn = NULL; /* XXX just to keep gcc happy */ 137 for (;;) { 138 c = *fmt++; 139 if (c == 0) 140 return (nassigned); 141 if (isspace(c)) { 142 while (inr > 0 && isspace(*inp)) 143 nread++, inr--, inp++; 144 continue; 145 } 146 if (c != '%') 147 goto literal; 148 width = 0; 149 flags = 0; 150 /* 151 * switch on the format. continue if done; 152 * break once format type is derived. 153 */ 154 again: c = *fmt++; 155 switch (c) { 156 case '%': 157 literal: 158 if (inr <= 0) 159 goto input_failure; 160 if (*inp != c) 161 goto match_failure; 162 inr--, inp++; 163 nread++; 164 continue; 165 166 case '*': 167 flags |= SUPPRESS; 168 goto again; 169 case 'l': 170 flags |= LONG; 171 goto again; 172 case 'q': 173 flags |= QUAD; 174 goto again; 175 case 'h': 176 flags |= SHORT; 177 goto again; 178 179 case '0': case '1': case '2': case '3': case '4': 180 case '5': case '6': case '7': case '8': case '9': 181 width = width * 10 + c - '0'; 182 goto again; 183 184 /* 185 * Conversions. 186 * 187 */ 188 case 'd': 189 c = CT_INT; 190 ccfn = (ccfntype)strtoq; 191 base = 10; 192 break; 193 194 case 'i': 195 c = CT_INT; 196 ccfn = (ccfntype)strtoq; 197 base = 0; 198 break; 199 200 case 'o': 201 c = CT_INT; 202 ccfn = strtouq; 203 base = 8; 204 break; 205 206 case 'u': 207 c = CT_INT; 208 ccfn = strtouq; 209 base = 10; 210 break; 211 212 case 'x': 213 flags |= PFXOK; /* enable 0x prefixing */ 214 c = CT_INT; 215 ccfn = strtouq; 216 base = 16; 217 break; 218 219 case 's': 220 c = CT_STRING; 221 break; 222 223 case '[': 224 fmt = __sccl(ccltab, fmt); 225 flags |= NOSKIP; 226 c = CT_CCL; 227 break; 228 229 case 'c': 230 flags |= NOSKIP; 231 c = CT_CHAR; 232 break; 233 234 case 'p': /* pointer format is like hex */ 235 flags |= POINTER | PFXOK; 236 c = CT_INT; 237 ccfn = strtouq; 238 base = 16; 239 break; 240 241 case 'n': 242 nconversions++; 243 if (flags & SUPPRESS) /* ??? */ 244 continue; 245 if (flags & SHORT) 246 *va_arg(ap, short *) = nread; 247 else if (flags & LONG) 248 *va_arg(ap, long *) = nread; 249 else if (flags & QUAD) 250 *va_arg(ap, quad_t *) = nread; 251 else 252 *va_arg(ap, int *) = nread; 253 continue; 254 } 255 256 /* 257 * We have a conversion that requires input. 258 */ 259 if (inr <= 0) 260 goto input_failure; 261 262 /* 263 * Consume leading white space, except for formats 264 * that suppress this. 265 */ 266 if ((flags & NOSKIP) == 0) { 267 while (isspace(*inp)) { 268 nread++; 269 if (--inr > 0) 270 inp++; 271 else 272 goto input_failure; 273 } 274 /* 275 * Note that there is at least one character in 276 * the buffer, so conversions that do not set NOSKIP 277 * can no longer result in an input failure. 278 */ 279 } 280 281 /* 282 * Do the conversion. 283 */ 284 switch (c) { 285 286 case CT_CHAR: 287 /* scan arbitrary characters (sets NOSKIP) */ 288 if (width == 0) 289 width = 1; 290 if (flags & SUPPRESS) { 291 size_t sum = 0; 292 for (;;) { 293 if ((n = inr) < width) { 294 sum += n; 295 width -= n; 296 inp += n; 297 if (sum == 0) 298 goto input_failure; 299 break; 300 } else { 301 sum += width; 302 inr -= width; 303 inp += width; 304 break; 305 } 306 } 307 nread += sum; 308 } else { 309 bcopy(inp, va_arg(ap, char *), width); 310 inr -= width; 311 inp += width; 312 nread += width; 313 nassigned++; 314 } 315 nconversions++; 316 break; 317 318 case CT_CCL: 319 /* scan a (nonempty) character class (sets NOSKIP) */ 320 if (width == 0) 321 width = (size_t)~0; /* `infinity' */ 322 /* take only those things in the class */ 323 if (flags & SUPPRESS) { 324 n = 0; 325 while (ccltab[(unsigned char)*inp]) { 326 n++, inr--, inp++; 327 if (--width == 0) 328 break; 329 if (inr <= 0) { 330 if (n == 0) 331 goto input_failure; 332 break; 333 } 334 } 335 if (n == 0) 336 goto match_failure; 337 } else { 338 p0 = p = va_arg(ap, char *); 339 while (ccltab[(unsigned char)*inp]) { 340 inr--; 341 *p++ = *inp++; 342 if (--width == 0) 343 break; 344 if (inr <= 0) { 345 if (p == p0) 346 goto input_failure; 347 break; 348 } 349 } 350 n = p - p0; 351 if (n == 0) 352 goto match_failure; 353 *p = 0; 354 nassigned++; 355 } 356 nread += n; 357 nconversions++; 358 break; 359 360 case CT_STRING: 361 /* like CCL, but zero-length string OK, & no NOSKIP */ 362 if (width == 0) 363 width = (size_t)~0; 364 if (flags & SUPPRESS) { 365 n = 0; 366 while (!isspace(*inp)) { 367 n++, inr--, inp++; 368 if (--width == 0) 369 break; 370 if (inr <= 0) 371 break; 372 } 373 nread += n; 374 } else { 375 p0 = p = va_arg(ap, char *); 376 while (!isspace(*inp)) { 377 inr--; 378 *p++ = *inp++; 379 if (--width == 0) 380 break; 381 if (inr <= 0) 382 break; 383 } 384 *p = 0; 385 nread += p - p0; 386 nassigned++; 387 } 388 nconversions++; 389 continue; 390 391 case CT_INT: 392 /* scan an integer as if by strtoq/strtouq */ 393 #ifdef hardway 394 if (width == 0 || width > sizeof(buf) - 1) 395 width = sizeof(buf) - 1; 396 #else 397 /* size_t is unsigned, hence this optimisation */ 398 if (--width > sizeof(buf) - 2) 399 width = sizeof(buf) - 2; 400 width++; 401 #endif 402 flags |= SIGNOK | NDIGITS | NZDIGITS; 403 for (p = buf; width; width--) { 404 c = *inp; 405 /* 406 * Switch on the character; `goto ok' 407 * if we accept it as a part of number. 408 */ 409 switch (c) { 410 411 /* 412 * The digit 0 is always legal, but is 413 * special. For %i conversions, if no 414 * digits (zero or nonzero) have been 415 * scanned (only signs), we will have 416 * base==0. In that case, we should set 417 * it to 8 and enable 0x prefixing. 418 * Also, if we have not scanned zero digits 419 * before this, do not turn off prefixing 420 * (someone else will turn it off if we 421 * have scanned any nonzero digits). 422 */ 423 case '0': 424 if (base == 0) { 425 base = 8; 426 flags |= PFXOK; 427 } 428 if (flags & NZDIGITS) 429 flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 430 else 431 flags &= ~(SIGNOK|PFXOK|NDIGITS); 432 goto ok; 433 434 /* 1 through 7 always legal */ 435 case '1': case '2': case '3': 436 case '4': case '5': case '6': case '7': 437 base = basefix[base]; 438 flags &= ~(SIGNOK | PFXOK | NDIGITS); 439 goto ok; 440 441 /* digits 8 and 9 ok iff decimal or hex */ 442 case '8': case '9': 443 base = basefix[base]; 444 if (base <= 8) 445 break; /* not legal here */ 446 flags &= ~(SIGNOK | PFXOK | NDIGITS); 447 goto ok; 448 449 /* letters ok iff hex */ 450 case 'A': case 'B': case 'C': 451 case 'D': case 'E': case 'F': 452 case 'a': case 'b': case 'c': 453 case 'd': case 'e': case 'f': 454 /* no need to fix base here */ 455 if (base <= 10) 456 break; /* not legal here */ 457 flags &= ~(SIGNOK | PFXOK | NDIGITS); 458 goto ok; 459 460 /* sign ok only as first character */ 461 case '+': case '-': 462 if (flags & SIGNOK) { 463 flags &= ~SIGNOK; 464 goto ok; 465 } 466 break; 467 468 /* x ok iff flag still set & 2nd char */ 469 case 'x': case 'X': 470 if (flags & PFXOK && p == buf + 1) { 471 base = 16; /* if %i */ 472 flags &= ~PFXOK; 473 goto ok; 474 } 475 break; 476 } 477 478 /* 479 * If we got here, c is not a legal character 480 * for a number. Stop accumulating digits. 481 */ 482 break; 483 ok: 484 /* 485 * c is legal: store it and look at the next. 486 */ 487 *p++ = c; 488 if (--inr > 0) 489 inp++; 490 else 491 break; /* end of input */ 492 } 493 /* 494 * If we had only a sign, it is no good; push 495 * back the sign. If the number ends in `x', 496 * it was [sign] '0' 'x', so push back the x 497 * and treat it as [sign] '0'. 498 */ 499 if (flags & NDIGITS) { 500 if (p > buf) { 501 inp--; 502 inr++; 503 } 504 goto match_failure; 505 } 506 c = ((u_char *)p)[-1]; 507 if (c == 'x' || c == 'X') { 508 --p; 509 inp--; 510 inr++; 511 } 512 if ((flags & SUPPRESS) == 0) { 513 u_quad_t res; 514 515 *p = 0; 516 res = (*ccfn)(buf, (const char **)NULL, base); 517 if (flags & POINTER) 518 *va_arg(ap, void **) = 519 (void *)(uintptr_t)res; 520 else if (flags & SHORT) 521 *va_arg(ap, short *) = res; 522 else if (flags & LONG) 523 *va_arg(ap, long *) = res; 524 else if (flags & QUAD) 525 *va_arg(ap, quad_t *) = res; 526 else 527 *va_arg(ap, int *) = res; 528 nassigned++; 529 } 530 nread += p - buf; 531 nconversions++; 532 break; 533 534 } 535 } 536 input_failure: 537 return (nconversions != 0 ? nassigned : -1); 538 match_failure: 539 return (nassigned); 540 } 541 542 /* 543 * Fill in the given table from the scanset at the given format 544 * (just after `['). Return a pointer to the character past the 545 * closing `]'. The table has a 1 wherever characters should be 546 * considered part of the scanset. 547 */ 548 static const u_char * 549 __sccl(char *tab, const u_char *fmt) 550 { 551 int c, n, v; 552 553 /* first `clear' the whole table */ 554 c = *fmt++; /* first char hat => negated scanset */ 555 if (c == '^') { 556 v = 1; /* default => accept */ 557 c = *fmt++; /* get new first char */ 558 } else 559 v = 0; /* default => reject */ 560 561 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ 562 for (n = 0; n < 256; n++) 563 tab[n] = v; /* memset(tab, v, 256) */ 564 565 if (c == 0) 566 return (fmt - 1);/* format ended before closing ] */ 567 568 /* 569 * Now set the entries corresponding to the actual scanset 570 * to the opposite of the above. 571 * 572 * The first character may be ']' (or '-') without being special; 573 * the last character may be '-'. 574 */ 575 v = 1 - v; 576 for (;;) { 577 tab[c] = v; /* take character c */ 578 doswitch: 579 n = *fmt++; /* and examine the next */ 580 switch (n) { 581 582 case 0: /* format ended too soon */ 583 return (fmt - 1); 584 585 case '-': 586 /* 587 * A scanset of the form 588 * [01+-] 589 * is defined as `the digit 0, the digit 1, 590 * the character +, the character -', but 591 * the effect of a scanset such as 592 * [a-zA-Z0-9] 593 * is implementation defined. The V7 Unix 594 * scanf treats `a-z' as `the letters a through 595 * z', but treats `a-a' as `the letter a, the 596 * character -, and the letter a'. 597 * 598 * For compatibility, the `-' is not considerd 599 * to define a range if the character following 600 * it is either a close bracket (required by ANSI) 601 * or is not numerically greater than the character 602 * we just stored in the table (c). 603 */ 604 n = *fmt; 605 if (n == ']' || n < c) { 606 c = '-'; 607 break; /* resume the for(;;) */ 608 } 609 fmt++; 610 /* fill in the range */ 611 do { 612 tab[++c] = v; 613 } while (c < n); 614 c = n; 615 /* 616 * Alas, the V7 Unix scanf also treats formats 617 * such as [a-c-e] as `the letters a through e'. 618 * This too is permitted by the standard.... 619 */ 620 goto doswitch; 621 break; 622 623 case ']': /* end of scanset */ 624 return (fmt); 625 626 default: /* just another character */ 627 c = n; 628 break; 629 } 630 } 631 /* NOTREACHED */ 632 } 633 634 /* 635 * Convert a string to an unsigned quad integer. 636 * 637 * Ignores `locale' stuff. Assumes that the upper and lower case 638 * alphabets and digits are each contiguous. 639 */ 640 u_quad_t 641 strtouq(const char *nptr, const char **endptr, int base) 642 { 643 const char *s = nptr; 644 u_quad_t acc; 645 unsigned char c; 646 u_quad_t qbase, cutoff; 647 int neg, any, cutlim; 648 649 /* 650 * See strtoq for comments as to the logic used. 651 */ 652 s = nptr; 653 do { 654 c = *s++; 655 } while (isspace(c)); 656 if (c == '-') { 657 neg = 1; 658 c = *s++; 659 } else { 660 neg = 0; 661 if (c == '+') 662 c = *s++; 663 } 664 if ((base == 0 || base == 16) && 665 c == '0' && (*s == 'x' || *s == 'X')) { 666 c = s[1]; 667 s += 2; 668 base = 16; 669 } 670 if (base == 0) 671 base = c == '0' ? 8 : 10; 672 qbase = (unsigned)base; 673 cutoff = (u_quad_t)UQUAD_MAX / qbase; 674 cutlim = (u_quad_t)UQUAD_MAX % qbase; 675 for (acc = 0, any = 0;; c = *s++) { 676 if (!isascii(c)) 677 break; 678 if (isdigit(c)) 679 c -= '0'; 680 else if (isalpha(c)) 681 c -= isupper(c) ? 'A' - 10 : 'a' - 10; 682 else 683 break; 684 if (c >= base) 685 break; 686 if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) 687 any = -1; 688 else { 689 any = 1; 690 acc *= qbase; 691 acc += c; 692 } 693 } 694 if (any < 0) { 695 acc = UQUAD_MAX; 696 } else if (neg) 697 acc = -acc; 698 if (endptr != 0) 699 *endptr = (const char *)(any ? s - 1 : nptr); 700 return (acc); 701 } 702 703 /* 704 * Convert a string to a quad integer. 705 * 706 * Ignores `locale' stuff. Assumes that the upper and lower case 707 * alphabets and digits are each contiguous. 708 */ 709 quad_t 710 strtoq(const char *nptr, const char **endptr, int base) 711 { 712 const char *s; 713 u_quad_t acc; 714 unsigned char c; 715 u_quad_t qbase, cutoff; 716 int neg, any, cutlim; 717 718 /* 719 * Skip white space and pick up leading +/- sign if any. 720 * If base is 0, allow 0x for hex and 0 for octal, else 721 * assume decimal; if base is already 16, allow 0x. 722 */ 723 s = nptr; 724 do { 725 c = *s++; 726 } while (isspace(c)); 727 if (c == '-') { 728 neg = 1; 729 c = *s++; 730 } else { 731 neg = 0; 732 if (c == '+') 733 c = *s++; 734 } 735 if ((base == 0 || base == 16) && 736 c == '0' && (*s == 'x' || *s == 'X')) { 737 c = s[1]; 738 s += 2; 739 base = 16; 740 } 741 if (base == 0) 742 base = c == '0' ? 8 : 10; 743 744 /* 745 * Compute the cutoff value between legal numbers and illegal 746 * numbers. That is the largest legal value, divided by the 747 * base. An input number that is greater than this value, if 748 * followed by a legal input character, is too big. One that 749 * is equal to this value may be valid or not; the limit 750 * between valid and invalid numbers is then based on the last 751 * digit. For instance, if the range for quads is 752 * [-9223372036854775808..9223372036854775807] and the input base 753 * is 10, cutoff will be set to 922337203685477580 and cutlim to 754 * either 7 (neg==0) or 8 (neg==1), meaning that if we have 755 * accumulated a value > 922337203685477580, or equal but the 756 * next digit is > 7 (or 8), the number is too big, and we will 757 * return a range error. 758 * 759 * Set any if any `digits' consumed; make it negative to indicate 760 * overflow. 761 */ 762 qbase = (unsigned)base; 763 cutoff = neg ? (u_quad_t)-(QUAD_MIN + QUAD_MAX) + QUAD_MAX : QUAD_MAX; 764 cutlim = cutoff % qbase; 765 cutoff /= qbase; 766 for (acc = 0, any = 0;; c = *s++) { 767 if (!isascii(c)) 768 break; 769 if (isdigit(c)) 770 c -= '0'; 771 else if (isalpha(c)) 772 c -= isupper(c) ? 'A' - 10 : 'a' - 10; 773 else 774 break; 775 if (c >= base) 776 break; 777 if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) 778 any = -1; 779 else { 780 any = 1; 781 acc *= qbase; 782 acc += c; 783 } 784 } 785 if (any < 0) { 786 acc = neg ? QUAD_MIN : QUAD_MAX; 787 } else if (neg) 788 acc = -acc; 789 if (endptr != 0) 790 *endptr = (const char *)(any ? s - 1 : nptr); 791 return (acc); 792 } 793