1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Chris Torek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * From: Id: vfscanf.c,v 1.13 1998/09/25 12:20:27 obrien Exp 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/ctype.h> 40 #include <sys/limits.h> 41 #include <sys/stdarg.h> 42 #include <sys/stddef.h> 43 44 #define BUF 32 /* Maximum length of numeric string. */ 45 46 /* 47 * Flags used during conversion. 48 */ 49 #define LONG 0x01 /* l: long or double */ 50 #define SHORT 0x04 /* h: short */ 51 #define SUPPRESS 0x08 /* suppress assignment */ 52 #define POINTER 0x10 /* weird %p pointer (`fake hex') */ 53 #define NOSKIP 0x20 /* do not skip blanks */ 54 #define QUAD 0x400 55 #define INTMAXT 0x800 /* j: intmax_t */ 56 #define PTRDIFFT 0x1000 /* t: ptrdiff_t */ 57 #define SIZET 0x2000 /* z: size_t */ 58 #define SHORTSHORT 0x4000 /** hh: char */ 59 60 /* 61 * The following are used in numeric conversions only: 62 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; 63 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. 64 */ 65 #define SIGNOK 0x40 /* +/- is (still) legal */ 66 #define NDIGITS 0x80 /* no digits detected */ 67 68 #define DPTOK 0x100 /* (float) decimal point is still legal */ 69 #define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ 70 71 #define PFXOK 0x100 /* 0x prefix is (still) legal */ 72 #define NZDIGITS 0x200 /* no zero digits detected */ 73 74 /* 75 * Conversion types. 76 */ 77 #define CT_CHAR 0 /* %c conversion */ 78 #define CT_CCL 1 /* %[...] conversion */ 79 #define CT_STRING 2 /* %s conversion */ 80 #define CT_INT 3 /* integer, i.e., strtoq or strtouq */ 81 typedef u_quad_t (*ccfntype)(const char *, char **, int); 82 83 static const u_char *__sccl(char *, const u_char *); 84 85 int 86 sscanf(const char *ibuf, const char *fmt, ...) 87 { 88 va_list ap; 89 int ret; 90 91 va_start(ap, fmt); 92 ret = vsscanf(ibuf, fmt, ap); 93 va_end(ap); 94 return(ret); 95 } 96 97 int 98 vsscanf(const char *inp, char const *fmt0, va_list ap) 99 { 100 int inr; 101 const u_char *fmt = (const u_char *)fmt0; 102 int c; /* character from format, or conversion */ 103 size_t width; /* field width, or 0 */ 104 char *p; /* points into all kinds of strings */ 105 int n; /* handy integer */ 106 int flags; /* flags as defined above */ 107 char *p0; /* saves original value of p when necessary */ 108 int nassigned; /* number of fields assigned */ 109 int nconversions; /* number of conversions */ 110 int nread; /* number of characters consumed from fp */ 111 int base; /* base argument to strtoq/strtouq */ 112 ccfntype ccfn; /* conversion function (strtoq/strtouq) */ 113 char ccltab[256]; /* character class table for %[...] */ 114 char buf[BUF]; /* buffer for numeric conversions */ 115 116 /* `basefix' is used to avoid `if' tests in the integer scanner */ 117 static short basefix[17] = 118 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 119 120 inr = strlen(inp); 121 122 nassigned = 0; 123 nconversions = 0; 124 nread = 0; 125 base = 0; /* XXX just to keep gcc happy */ 126 ccfn = NULL; /* XXX just to keep gcc happy */ 127 for (;;) { 128 c = *fmt++; 129 if (c == 0) 130 return (nassigned); 131 if (isspace(c)) { 132 while (inr > 0 && isspace(*inp)) 133 nread++, inr--, inp++; 134 continue; 135 } 136 if (c != '%') 137 goto literal; 138 width = 0; 139 flags = 0; 140 /* 141 * switch on the format. continue if done; 142 * break once format type is derived. 143 */ 144 again: c = *fmt++; 145 switch (c) { 146 case '%': 147 literal: 148 if (inr <= 0) 149 goto input_failure; 150 if (*inp != c) 151 goto match_failure; 152 inr--, inp++; 153 nread++; 154 continue; 155 156 case '*': 157 flags |= SUPPRESS; 158 goto again; 159 case 'j': 160 flags |= INTMAXT; 161 goto again; 162 case 'l': 163 if (flags & LONG){ 164 flags &= ~LONG; 165 flags |= QUAD; 166 } else { 167 flags |= LONG; 168 } 169 goto again; 170 case 'q': 171 flags |= QUAD; 172 goto again; 173 case 't': 174 flags |= PTRDIFFT; 175 goto again; 176 case 'z': 177 flags |= SIZET; 178 goto again; 179 case 'h': 180 if (flags & SHORT){ 181 flags &= ~SHORT; 182 flags |= SHORTSHORT; 183 } else { 184 flags |= SHORT; 185 } 186 goto again; 187 188 case '0': case '1': case '2': case '3': case '4': 189 case '5': case '6': case '7': case '8': case '9': 190 width = width * 10 + c - '0'; 191 goto again; 192 193 /* 194 * Conversions. 195 * 196 */ 197 case 'd': 198 c = CT_INT; 199 ccfn = (ccfntype)strtoq; 200 base = 10; 201 break; 202 203 case 'i': 204 c = CT_INT; 205 ccfn = (ccfntype)strtoq; 206 base = 0; 207 break; 208 209 case 'o': 210 c = CT_INT; 211 ccfn = strtouq; 212 base = 8; 213 break; 214 215 case 'u': 216 c = CT_INT; 217 ccfn = strtouq; 218 base = 10; 219 break; 220 221 case 'x': 222 flags |= PFXOK; /* enable 0x prefixing */ 223 c = CT_INT; 224 ccfn = strtouq; 225 base = 16; 226 break; 227 228 case 's': 229 c = CT_STRING; 230 break; 231 232 case '[': 233 fmt = __sccl(ccltab, fmt); 234 flags |= NOSKIP; 235 c = CT_CCL; 236 break; 237 238 case 'c': 239 flags |= NOSKIP; 240 c = CT_CHAR; 241 break; 242 243 case 'p': /* pointer format is like hex */ 244 flags |= POINTER | PFXOK; 245 c = CT_INT; 246 ccfn = strtouq; 247 base = 16; 248 break; 249 250 case 'n': 251 nconversions++; 252 if (flags & SUPPRESS) /* ??? */ 253 continue; 254 if (flags & SHORTSHORT) 255 *va_arg(ap, char *) = nread; 256 else if (flags & SHORT) 257 *va_arg(ap, short *) = nread; 258 else if (flags & LONG) 259 *va_arg(ap, long *) = nread; 260 else if (flags & QUAD) 261 *va_arg(ap, quad_t *) = nread; 262 else if (flags & INTMAXT) 263 *va_arg(ap, intmax_t *) = nread; 264 else if (flags & SIZET) 265 *va_arg(ap, size_t *) = nread; 266 else if (flags & PTRDIFFT) 267 *va_arg(ap, ptrdiff_t *) = nread; 268 else 269 *va_arg(ap, int *) = nread; 270 continue; 271 } 272 273 /* 274 * We have a conversion that requires input. 275 */ 276 if (inr <= 0) 277 goto input_failure; 278 279 /* 280 * Consume leading white space, except for formats 281 * that suppress this. 282 */ 283 if ((flags & NOSKIP) == 0) { 284 while (isspace(*inp)) { 285 nread++; 286 if (--inr > 0) 287 inp++; 288 else 289 goto input_failure; 290 } 291 /* 292 * Note that there is at least one character in 293 * the buffer, so conversions that do not set NOSKIP 294 * can no longer result in an input failure. 295 */ 296 } 297 298 /* 299 * Do the conversion. 300 */ 301 switch (c) { 302 case CT_CHAR: 303 /* scan arbitrary characters (sets NOSKIP) */ 304 if (width == 0) 305 width = 1; 306 if (flags & SUPPRESS) { 307 size_t sum = 0; 308 for (;;) { 309 if ((n = inr) < width) { 310 sum += n; 311 width -= n; 312 inp += n; 313 if (sum == 0) 314 goto input_failure; 315 break; 316 } else { 317 sum += width; 318 inr -= width; 319 inp += width; 320 break; 321 } 322 } 323 nread += sum; 324 } else { 325 bcopy(inp, va_arg(ap, char *), width); 326 inr -= width; 327 inp += width; 328 nread += width; 329 nassigned++; 330 } 331 nconversions++; 332 break; 333 334 case CT_CCL: 335 /* scan a (nonempty) character class (sets NOSKIP) */ 336 if (width == 0) 337 width = (size_t)~0; /* `infinity' */ 338 /* take only those things in the class */ 339 if (flags & SUPPRESS) { 340 n = 0; 341 while (ccltab[(unsigned char)*inp]) { 342 n++, inr--, inp++; 343 if (--width == 0) 344 break; 345 if (inr <= 0) { 346 if (n == 0) 347 goto input_failure; 348 break; 349 } 350 } 351 if (n == 0) 352 goto match_failure; 353 } else { 354 p0 = p = va_arg(ap, char *); 355 while (ccltab[(unsigned char)*inp]) { 356 inr--; 357 *p++ = *inp++; 358 if (--width == 0) 359 break; 360 if (inr <= 0) { 361 if (p == p0) 362 goto input_failure; 363 break; 364 } 365 } 366 n = p - p0; 367 if (n == 0) 368 goto match_failure; 369 *p = 0; 370 nassigned++; 371 } 372 nread += n; 373 nconversions++; 374 break; 375 376 case CT_STRING: 377 /* like CCL, but zero-length string OK, & no NOSKIP */ 378 if (width == 0) 379 width = (size_t)~0; 380 if (flags & SUPPRESS) { 381 n = 0; 382 while (!isspace(*inp)) { 383 n++, inr--, inp++; 384 if (--width == 0) 385 break; 386 if (inr <= 0) 387 break; 388 } 389 nread += n; 390 } else { 391 p0 = p = va_arg(ap, char *); 392 while (!isspace(*inp)) { 393 inr--; 394 *p++ = *inp++; 395 if (--width == 0) 396 break; 397 if (inr <= 0) 398 break; 399 } 400 *p = 0; 401 nread += p - p0; 402 nassigned++; 403 } 404 nconversions++; 405 continue; 406 407 case CT_INT: 408 /* scan an integer as if by strtoq/strtouq */ 409 #ifdef hardway 410 if (width == 0 || width > sizeof(buf) - 1) 411 width = sizeof(buf) - 1; 412 #else 413 /* size_t is unsigned, hence this optimisation */ 414 if (--width > sizeof(buf) - 2) 415 width = sizeof(buf) - 2; 416 width++; 417 #endif 418 flags |= SIGNOK | NDIGITS | NZDIGITS; 419 for (p = buf; width; width--) { 420 c = *inp; 421 /* 422 * Switch on the character; `goto ok' 423 * if we accept it as a part of number. 424 */ 425 switch (c) { 426 /* 427 * The digit 0 is always legal, but is 428 * special. For %i conversions, if no 429 * digits (zero or nonzero) have been 430 * scanned (only signs), we will have 431 * base==0. In that case, we should set 432 * it to 8 and enable 0x prefixing. 433 * Also, if we have not scanned zero digits 434 * before this, do not turn off prefixing 435 * (someone else will turn it off if we 436 * have scanned any nonzero digits). 437 */ 438 case '0': 439 if (base == 0) { 440 base = 8; 441 flags |= PFXOK; 442 } 443 if (flags & NZDIGITS) 444 flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 445 else 446 flags &= ~(SIGNOK|PFXOK|NDIGITS); 447 goto ok; 448 449 /* 1 through 7 always legal */ 450 case '1': case '2': case '3': 451 case '4': case '5': case '6': case '7': 452 base = basefix[base]; 453 flags &= ~(SIGNOK | PFXOK | NDIGITS); 454 goto ok; 455 456 /* digits 8 and 9 ok iff decimal or hex */ 457 case '8': case '9': 458 base = basefix[base]; 459 if (base <= 8) 460 break; /* not legal here */ 461 flags &= ~(SIGNOK | PFXOK | NDIGITS); 462 goto ok; 463 464 /* letters ok iff hex */ 465 case 'A': case 'B': case 'C': 466 case 'D': case 'E': case 'F': 467 case 'a': case 'b': case 'c': 468 case 'd': case 'e': case 'f': 469 /* no need to fix base here */ 470 if (base <= 10) 471 break; /* not legal here */ 472 flags &= ~(SIGNOK | PFXOK | NDIGITS); 473 goto ok; 474 475 /* sign ok only as first character */ 476 case '+': case '-': 477 if (flags & SIGNOK) { 478 flags &= ~SIGNOK; 479 goto ok; 480 } 481 break; 482 483 /* x ok iff flag still set & 2nd char */ 484 case 'x': case 'X': 485 if (flags & PFXOK && p == buf + 1) { 486 base = 16; /* if %i */ 487 flags &= ~PFXOK; 488 goto ok; 489 } 490 break; 491 } 492 493 /* 494 * If we got here, c is not a legal character 495 * for a number. Stop accumulating digits. 496 */ 497 break; 498 ok: 499 /* 500 * c is legal: store it and look at the next. 501 */ 502 *p++ = c; 503 if (--inr > 0) 504 inp++; 505 else 506 break; /* end of input */ 507 } 508 /* 509 * If we had only a sign, it is no good; push 510 * back the sign. If the number ends in `x', 511 * it was [sign] '0' 'x', so push back the x 512 * and treat it as [sign] '0'. 513 */ 514 if (flags & NDIGITS) { 515 if (p > buf) { 516 inp--; 517 inr++; 518 } 519 goto match_failure; 520 } 521 c = ((u_char *)p)[-1]; 522 if (c == 'x' || c == 'X') { 523 --p; 524 inp--; 525 inr++; 526 } 527 if ((flags & SUPPRESS) == 0) { 528 u_quad_t res; 529 530 *p = 0; 531 res = (*ccfn)(buf, (char **)NULL, base); 532 if (flags & POINTER) 533 *va_arg(ap, void **) = 534 (void *)(uintptr_t)res; 535 else if (flags & SHORTSHORT) 536 *va_arg(ap, char *) = res; 537 else if (flags & SHORT) 538 *va_arg(ap, short *) = res; 539 else if (flags & LONG) 540 *va_arg(ap, long *) = res; 541 else if (flags & QUAD) 542 *va_arg(ap, quad_t *) = res; 543 else if (flags & INTMAXT) 544 *va_arg(ap, intmax_t *) = res; 545 else if (flags & PTRDIFFT) 546 *va_arg(ap, ptrdiff_t *) = res; 547 else if (flags & SIZET) 548 *va_arg(ap, size_t *) = res; 549 else 550 *va_arg(ap, int *) = res; 551 nassigned++; 552 } 553 nread += p - buf; 554 nconversions++; 555 break; 556 } 557 } 558 input_failure: 559 return (nconversions != 0 ? nassigned : -1); 560 match_failure: 561 return (nassigned); 562 } 563 564 /* 565 * Fill in the given table from the scanset at the given format 566 * (just after `['). Return a pointer to the character past the 567 * closing `]'. The table has a 1 wherever characters should be 568 * considered part of the scanset. 569 */ 570 static const u_char * 571 __sccl(char *tab, const u_char *fmt) 572 { 573 int c, n, v; 574 575 /* first `clear' the whole table */ 576 c = *fmt++; /* first char hat => negated scanset */ 577 if (c == '^') { 578 v = 1; /* default => accept */ 579 c = *fmt++; /* get new first char */ 580 } else 581 v = 0; /* default => reject */ 582 583 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ 584 for (n = 0; n < 256; n++) 585 tab[n] = v; /* memset(tab, v, 256) */ 586 587 if (c == 0) 588 return (fmt - 1);/* format ended before closing ] */ 589 590 /* 591 * Now set the entries corresponding to the actual scanset 592 * to the opposite of the above. 593 * 594 * The first character may be ']' (or '-') without being special; 595 * the last character may be '-'. 596 */ 597 v = 1 - v; 598 for (;;) { 599 tab[c] = v; /* take character c */ 600 doswitch: 601 n = *fmt++; /* and examine the next */ 602 switch (n) { 603 case 0: /* format ended too soon */ 604 return (fmt - 1); 605 606 case '-': 607 /* 608 * A scanset of the form 609 * [01+-] 610 * is defined as `the digit 0, the digit 1, 611 * the character +, the character -', but 612 * the effect of a scanset such as 613 * [a-zA-Z0-9] 614 * is implementation defined. The V7 Unix 615 * scanf treats `a-z' as `the letters a through 616 * z', but treats `a-a' as `the letter a, the 617 * character -, and the letter a'. 618 * 619 * For compatibility, the `-' is not considered 620 * to define a range if the character following 621 * it is either a close bracket (required by ANSI) 622 * or is not numerically greater than the character 623 * we just stored in the table (c). 624 */ 625 n = *fmt; 626 if (n == ']' || n < c) { 627 c = '-'; 628 break; /* resume the for(;;) */ 629 } 630 fmt++; 631 /* fill in the range */ 632 do { 633 tab[++c] = v; 634 } while (c < n); 635 c = n; 636 /* 637 * Alas, the V7 Unix scanf also treats formats 638 * such as [a-c-e] as `the letters a through e'. 639 * This too is permitted by the standard.... 640 */ 641 goto doswitch; 642 break; 643 644 case ']': /* end of scanset */ 645 return (fmt); 646 647 default: /* just another character */ 648 c = n; 649 break; 650 } 651 } 652 /* NOTREACHED */ 653 } 654