1 /*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Chris Torek. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * From: Id: vfscanf.c,v 1.13 1998/09/25 12:20:27 obrien Exp 37 * From: static char sccsid[] = "@(#)strtol.c 8.1 (Berkeley) 6/4/93"; 38 * From: static char sccsid[] = "@(#)strtoul.c 8.1 (Berkeley) 6/4/93"; 39 */ 40 41 #include <sys/cdefs.h> 42 __FBSDID("$FreeBSD$"); 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/ctype.h> 47 #include <sys/limits.h> 48 49 /* 50 * Note that stdarg.h and the ANSI style va_start macro is used for both 51 * ANSI and traditional C compilers. 52 */ 53 #include <machine/stdarg.h> 54 55 #define BUF 32 /* Maximum length of numeric string. */ 56 57 /* 58 * Flags used during conversion. 59 */ 60 #define LONG 0x01 /* l: long or double */ 61 #define SHORT 0x04 /* h: short */ 62 #define SUPPRESS 0x08 /* suppress assignment */ 63 #define POINTER 0x10 /* weird %p pointer (`fake hex') */ 64 #define NOSKIP 0x20 /* do not skip blanks */ 65 #define QUAD 0x400 66 67 /* 68 * The following are used in numeric conversions only: 69 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; 70 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. 71 */ 72 #define SIGNOK 0x40 /* +/- is (still) legal */ 73 #define NDIGITS 0x80 /* no digits detected */ 74 75 #define DPTOK 0x100 /* (float) decimal point is still legal */ 76 #define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ 77 78 #define PFXOK 0x100 /* 0x prefix is (still) legal */ 79 #define NZDIGITS 0x200 /* no zero digits detected */ 80 81 /* 82 * Conversion types. 83 */ 84 #define CT_CHAR 0 /* %c conversion */ 85 #define CT_CCL 1 /* %[...] conversion */ 86 #define CT_STRING 2 /* %s conversion */ 87 #define CT_INT 3 /* integer, i.e., strtoq or strtouq */ 88 typedef u_quad_t (*ccfntype)(const char *, char **, int); 89 90 static const u_char *__sccl(char *, const u_char *); 91 92 int 93 sscanf(const char *ibuf, const char *fmt, ...) 94 { 95 va_list ap; 96 int ret; 97 98 va_start(ap, fmt); 99 ret = vsscanf(ibuf, fmt, ap); 100 va_end(ap); 101 return(ret); 102 } 103 104 int 105 vsscanf(const char *inp, char const *fmt0, va_list ap) 106 { 107 int inr; 108 const u_char *fmt = (const u_char *)fmt0; 109 int c; /* character from format, or conversion */ 110 size_t width; /* field width, or 0 */ 111 char *p; /* points into all kinds of strings */ 112 int n; /* handy integer */ 113 int flags; /* flags as defined above */ 114 char *p0; /* saves original value of p when necessary */ 115 int nassigned; /* number of fields assigned */ 116 int nconversions; /* number of conversions */ 117 int nread; /* number of characters consumed from fp */ 118 int base; /* base argument to strtoq/strtouq */ 119 ccfntype ccfn; /* conversion function (strtoq/strtouq) */ 120 char ccltab[256]; /* character class table for %[...] */ 121 char buf[BUF]; /* buffer for numeric conversions */ 122 123 /* `basefix' is used to avoid `if' tests in the integer scanner */ 124 static short basefix[17] = 125 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 126 127 inr = strlen(inp); 128 129 nassigned = 0; 130 nconversions = 0; 131 nread = 0; 132 base = 0; /* XXX just to keep gcc happy */ 133 ccfn = NULL; /* XXX just to keep gcc happy */ 134 for (;;) { 135 c = *fmt++; 136 if (c == 0) 137 return (nassigned); 138 if (isspace(c)) { 139 while (inr > 0 && isspace(*inp)) 140 nread++, inr--, inp++; 141 continue; 142 } 143 if (c != '%') 144 goto literal; 145 width = 0; 146 flags = 0; 147 /* 148 * switch on the format. continue if done; 149 * break once format type is derived. 150 */ 151 again: c = *fmt++; 152 switch (c) { 153 case '%': 154 literal: 155 if (inr <= 0) 156 goto input_failure; 157 if (*inp != c) 158 goto match_failure; 159 inr--, inp++; 160 nread++; 161 continue; 162 163 case '*': 164 flags |= SUPPRESS; 165 goto again; 166 case 'l': 167 flags |= LONG; 168 goto again; 169 case 'q': 170 flags |= QUAD; 171 goto again; 172 case 'h': 173 flags |= SHORT; 174 goto again; 175 176 case '0': case '1': case '2': case '3': case '4': 177 case '5': case '6': case '7': case '8': case '9': 178 width = width * 10 + c - '0'; 179 goto again; 180 181 /* 182 * Conversions. 183 * 184 */ 185 case 'd': 186 c = CT_INT; 187 ccfn = (ccfntype)strtoq; 188 base = 10; 189 break; 190 191 case 'i': 192 c = CT_INT; 193 ccfn = (ccfntype)strtoq; 194 base = 0; 195 break; 196 197 case 'o': 198 c = CT_INT; 199 ccfn = strtouq; 200 base = 8; 201 break; 202 203 case 'u': 204 c = CT_INT; 205 ccfn = strtouq; 206 base = 10; 207 break; 208 209 case 'x': 210 flags |= PFXOK; /* enable 0x prefixing */ 211 c = CT_INT; 212 ccfn = strtouq; 213 base = 16; 214 break; 215 216 case 's': 217 c = CT_STRING; 218 break; 219 220 case '[': 221 fmt = __sccl(ccltab, fmt); 222 flags |= NOSKIP; 223 c = CT_CCL; 224 break; 225 226 case 'c': 227 flags |= NOSKIP; 228 c = CT_CHAR; 229 break; 230 231 case 'p': /* pointer format is like hex */ 232 flags |= POINTER | PFXOK; 233 c = CT_INT; 234 ccfn = strtouq; 235 base = 16; 236 break; 237 238 case 'n': 239 nconversions++; 240 if (flags & SUPPRESS) /* ??? */ 241 continue; 242 if (flags & SHORT) 243 *va_arg(ap, short *) = nread; 244 else if (flags & LONG) 245 *va_arg(ap, long *) = nread; 246 else if (flags & QUAD) 247 *va_arg(ap, quad_t *) = nread; 248 else 249 *va_arg(ap, int *) = nread; 250 continue; 251 } 252 253 /* 254 * We have a conversion that requires input. 255 */ 256 if (inr <= 0) 257 goto input_failure; 258 259 /* 260 * Consume leading white space, except for formats 261 * that suppress this. 262 */ 263 if ((flags & NOSKIP) == 0) { 264 while (isspace(*inp)) { 265 nread++; 266 if (--inr > 0) 267 inp++; 268 else 269 goto input_failure; 270 } 271 /* 272 * Note that there is at least one character in 273 * the buffer, so conversions that do not set NOSKIP 274 * can no longer result in an input failure. 275 */ 276 } 277 278 /* 279 * Do the conversion. 280 */ 281 switch (c) { 282 283 case CT_CHAR: 284 /* scan arbitrary characters (sets NOSKIP) */ 285 if (width == 0) 286 width = 1; 287 if (flags & SUPPRESS) { 288 size_t sum = 0; 289 for (;;) { 290 if ((n = inr) < width) { 291 sum += n; 292 width -= n; 293 inp += n; 294 if (sum == 0) 295 goto input_failure; 296 break; 297 } else { 298 sum += width; 299 inr -= width; 300 inp += width; 301 break; 302 } 303 } 304 nread += sum; 305 } else { 306 bcopy(inp, va_arg(ap, char *), width); 307 inr -= width; 308 inp += width; 309 nread += width; 310 nassigned++; 311 } 312 nconversions++; 313 break; 314 315 case CT_CCL: 316 /* scan a (nonempty) character class (sets NOSKIP) */ 317 if (width == 0) 318 width = (size_t)~0; /* `infinity' */ 319 /* take only those things in the class */ 320 if (flags & SUPPRESS) { 321 n = 0; 322 while (ccltab[(unsigned char)*inp]) { 323 n++, inr--, inp++; 324 if (--width == 0) 325 break; 326 if (inr <= 0) { 327 if (n == 0) 328 goto input_failure; 329 break; 330 } 331 } 332 if (n == 0) 333 goto match_failure; 334 } else { 335 p0 = p = va_arg(ap, char *); 336 while (ccltab[(unsigned char)*inp]) { 337 inr--; 338 *p++ = *inp++; 339 if (--width == 0) 340 break; 341 if (inr <= 0) { 342 if (p == p0) 343 goto input_failure; 344 break; 345 } 346 } 347 n = p - p0; 348 if (n == 0) 349 goto match_failure; 350 *p = 0; 351 nassigned++; 352 } 353 nread += n; 354 nconversions++; 355 break; 356 357 case CT_STRING: 358 /* like CCL, but zero-length string OK, & no NOSKIP */ 359 if (width == 0) 360 width = (size_t)~0; 361 if (flags & SUPPRESS) { 362 n = 0; 363 while (!isspace(*inp)) { 364 n++, inr--, inp++; 365 if (--width == 0) 366 break; 367 if (inr <= 0) 368 break; 369 } 370 nread += n; 371 } else { 372 p0 = p = va_arg(ap, char *); 373 while (!isspace(*inp)) { 374 inr--; 375 *p++ = *inp++; 376 if (--width == 0) 377 break; 378 if (inr <= 0) 379 break; 380 } 381 *p = 0; 382 nread += p - p0; 383 nassigned++; 384 } 385 nconversions++; 386 continue; 387 388 case CT_INT: 389 /* scan an integer as if by strtoq/strtouq */ 390 #ifdef hardway 391 if (width == 0 || width > sizeof(buf) - 1) 392 width = sizeof(buf) - 1; 393 #else 394 /* size_t is unsigned, hence this optimisation */ 395 if (--width > sizeof(buf) - 2) 396 width = sizeof(buf) - 2; 397 width++; 398 #endif 399 flags |= SIGNOK | NDIGITS | NZDIGITS; 400 for (p = buf; width; width--) { 401 c = *inp; 402 /* 403 * Switch on the character; `goto ok' 404 * if we accept it as a part of number. 405 */ 406 switch (c) { 407 408 /* 409 * The digit 0 is always legal, but is 410 * special. For %i conversions, if no 411 * digits (zero or nonzero) have been 412 * scanned (only signs), we will have 413 * base==0. In that case, we should set 414 * it to 8 and enable 0x prefixing. 415 * Also, if we have not scanned zero digits 416 * before this, do not turn off prefixing 417 * (someone else will turn it off if we 418 * have scanned any nonzero digits). 419 */ 420 case '0': 421 if (base == 0) { 422 base = 8; 423 flags |= PFXOK; 424 } 425 if (flags & NZDIGITS) 426 flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 427 else 428 flags &= ~(SIGNOK|PFXOK|NDIGITS); 429 goto ok; 430 431 /* 1 through 7 always legal */ 432 case '1': case '2': case '3': 433 case '4': case '5': case '6': case '7': 434 base = basefix[base]; 435 flags &= ~(SIGNOK | PFXOK | NDIGITS); 436 goto ok; 437 438 /* digits 8 and 9 ok iff decimal or hex */ 439 case '8': case '9': 440 base = basefix[base]; 441 if (base <= 8) 442 break; /* not legal here */ 443 flags &= ~(SIGNOK | PFXOK | NDIGITS); 444 goto ok; 445 446 /* letters ok iff hex */ 447 case 'A': case 'B': case 'C': 448 case 'D': case 'E': case 'F': 449 case 'a': case 'b': case 'c': 450 case 'd': case 'e': case 'f': 451 /* no need to fix base here */ 452 if (base <= 10) 453 break; /* not legal here */ 454 flags &= ~(SIGNOK | PFXOK | NDIGITS); 455 goto ok; 456 457 /* sign ok only as first character */ 458 case '+': case '-': 459 if (flags & SIGNOK) { 460 flags &= ~SIGNOK; 461 goto ok; 462 } 463 break; 464 465 /* x ok iff flag still set & 2nd char */ 466 case 'x': case 'X': 467 if (flags & PFXOK && p == buf + 1) { 468 base = 16; /* if %i */ 469 flags &= ~PFXOK; 470 goto ok; 471 } 472 break; 473 } 474 475 /* 476 * If we got here, c is not a legal character 477 * for a number. Stop accumulating digits. 478 */ 479 break; 480 ok: 481 /* 482 * c is legal: store it and look at the next. 483 */ 484 *p++ = c; 485 if (--inr > 0) 486 inp++; 487 else 488 break; /* end of input */ 489 } 490 /* 491 * If we had only a sign, it is no good; push 492 * back the sign. If the number ends in `x', 493 * it was [sign] '0' 'x', so push back the x 494 * and treat it as [sign] '0'. 495 */ 496 if (flags & NDIGITS) { 497 if (p > buf) { 498 inp--; 499 inr++; 500 } 501 goto match_failure; 502 } 503 c = ((u_char *)p)[-1]; 504 if (c == 'x' || c == 'X') { 505 --p; 506 inp--; 507 inr++; 508 } 509 if ((flags & SUPPRESS) == 0) { 510 u_quad_t res; 511 512 *p = 0; 513 res = (*ccfn)(buf, (char **)NULL, base); 514 if (flags & POINTER) 515 *va_arg(ap, void **) = 516 (void *)(uintptr_t)res; 517 else if (flags & SHORT) 518 *va_arg(ap, short *) = res; 519 else if (flags & LONG) 520 *va_arg(ap, long *) = res; 521 else if (flags & QUAD) 522 *va_arg(ap, quad_t *) = res; 523 else 524 *va_arg(ap, int *) = res; 525 nassigned++; 526 } 527 nread += p - buf; 528 nconversions++; 529 break; 530 531 } 532 } 533 input_failure: 534 return (nconversions != 0 ? nassigned : -1); 535 match_failure: 536 return (nassigned); 537 } 538 539 /* 540 * Fill in the given table from the scanset at the given format 541 * (just after `['). Return a pointer to the character past the 542 * closing `]'. The table has a 1 wherever characters should be 543 * considered part of the scanset. 544 */ 545 static const u_char * 546 __sccl(char *tab, const u_char *fmt) 547 { 548 int c, n, v; 549 550 /* first `clear' the whole table */ 551 c = *fmt++; /* first char hat => negated scanset */ 552 if (c == '^') { 553 v = 1; /* default => accept */ 554 c = *fmt++; /* get new first char */ 555 } else 556 v = 0; /* default => reject */ 557 558 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ 559 for (n = 0; n < 256; n++) 560 tab[n] = v; /* memset(tab, v, 256) */ 561 562 if (c == 0) 563 return (fmt - 1);/* format ended before closing ] */ 564 565 /* 566 * Now set the entries corresponding to the actual scanset 567 * to the opposite of the above. 568 * 569 * The first character may be ']' (or '-') without being special; 570 * the last character may be '-'. 571 */ 572 v = 1 - v; 573 for (;;) { 574 tab[c] = v; /* take character c */ 575 doswitch: 576 n = *fmt++; /* and examine the next */ 577 switch (n) { 578 579 case 0: /* format ended too soon */ 580 return (fmt - 1); 581 582 case '-': 583 /* 584 * A scanset of the form 585 * [01+-] 586 * is defined as `the digit 0, the digit 1, 587 * the character +, the character -', but 588 * the effect of a scanset such as 589 * [a-zA-Z0-9] 590 * is implementation defined. The V7 Unix 591 * scanf treats `a-z' as `the letters a through 592 * z', but treats `a-a' as `the letter a, the 593 * character -, and the letter a'. 594 * 595 * For compatibility, the `-' is not considerd 596 * to define a range if the character following 597 * it is either a close bracket (required by ANSI) 598 * or is not numerically greater than the character 599 * we just stored in the table (c). 600 */ 601 n = *fmt; 602 if (n == ']' || n < c) { 603 c = '-'; 604 break; /* resume the for(;;) */ 605 } 606 fmt++; 607 /* fill in the range */ 608 do { 609 tab[++c] = v; 610 } while (c < n); 611 c = n; 612 /* 613 * Alas, the V7 Unix scanf also treats formats 614 * such as [a-c-e] as `the letters a through e'. 615 * This too is permitted by the standard.... 616 */ 617 goto doswitch; 618 break; 619 620 case ']': /* end of scanset */ 621 return (fmt); 622 623 default: /* just another character */ 624 c = n; 625 break; 626 } 627 } 628 /* NOTREACHED */ 629 } 630 631