1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Chris Torek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * From: Id: vfscanf.c,v 1.13 1998/09/25 12:20:27 obrien Exp 35 * From: static char sccsid[] = "@(#)strtol.c 8.1 (Berkeley) 6/4/93"; 36 * From: static char sccsid[] = "@(#)strtoul.c 8.1 (Berkeley) 6/4/93"; 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/ctype.h> 45 #include <sys/limits.h> 46 47 /* 48 * Note that stdarg.h and the ANSI style va_start macro is used for both 49 * ANSI and traditional C compilers. 50 */ 51 #include <machine/stdarg.h> 52 53 #define BUF 32 /* Maximum length of numeric string. */ 54 55 /* 56 * Flags used during conversion. 57 */ 58 #define LONG 0x01 /* l: long or double */ 59 #define SHORT 0x04 /* h: short */ 60 #define SUPPRESS 0x08 /* suppress assignment */ 61 #define POINTER 0x10 /* weird %p pointer (`fake hex') */ 62 #define NOSKIP 0x20 /* do not skip blanks */ 63 #define QUAD 0x400 64 #define SHORTSHORT 0x4000 /** hh: char */ 65 66 /* 67 * The following are used in numeric conversions only: 68 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; 69 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. 70 */ 71 #define SIGNOK 0x40 /* +/- is (still) legal */ 72 #define NDIGITS 0x80 /* no digits detected */ 73 74 #define DPTOK 0x100 /* (float) decimal point is still legal */ 75 #define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ 76 77 #define PFXOK 0x100 /* 0x prefix is (still) legal */ 78 #define NZDIGITS 0x200 /* no zero digits detected */ 79 80 /* 81 * Conversion types. 82 */ 83 #define CT_CHAR 0 /* %c conversion */ 84 #define CT_CCL 1 /* %[...] conversion */ 85 #define CT_STRING 2 /* %s conversion */ 86 #define CT_INT 3 /* integer, i.e., strtoq or strtouq */ 87 typedef u_quad_t (*ccfntype)(const char *, char **, int); 88 89 static const u_char *__sccl(char *, const u_char *); 90 91 int 92 sscanf(const char *ibuf, const char *fmt, ...) 93 { 94 va_list ap; 95 int ret; 96 97 va_start(ap, fmt); 98 ret = vsscanf(ibuf, fmt, ap); 99 va_end(ap); 100 return(ret); 101 } 102 103 int 104 vsscanf(const char *inp, char const *fmt0, va_list ap) 105 { 106 int inr; 107 const u_char *fmt = (const u_char *)fmt0; 108 int c; /* character from format, or conversion */ 109 size_t width; /* field width, or 0 */ 110 char *p; /* points into all kinds of strings */ 111 int n; /* handy integer */ 112 int flags; /* flags as defined above */ 113 char *p0; /* saves original value of p when necessary */ 114 int nassigned; /* number of fields assigned */ 115 int nconversions; /* number of conversions */ 116 int nread; /* number of characters consumed from fp */ 117 int base; /* base argument to strtoq/strtouq */ 118 ccfntype ccfn; /* conversion function (strtoq/strtouq) */ 119 char ccltab[256]; /* character class table for %[...] */ 120 char buf[BUF]; /* buffer for numeric conversions */ 121 122 /* `basefix' is used to avoid `if' tests in the integer scanner */ 123 static short basefix[17] = 124 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 125 126 inr = strlen(inp); 127 128 nassigned = 0; 129 nconversions = 0; 130 nread = 0; 131 base = 0; /* XXX just to keep gcc happy */ 132 ccfn = NULL; /* XXX just to keep gcc happy */ 133 for (;;) { 134 c = *fmt++; 135 if (c == 0) 136 return (nassigned); 137 if (isspace(c)) { 138 while (inr > 0 && isspace(*inp)) 139 nread++, inr--, inp++; 140 continue; 141 } 142 if (c != '%') 143 goto literal; 144 width = 0; 145 flags = 0; 146 /* 147 * switch on the format. continue if done; 148 * break once format type is derived. 149 */ 150 again: c = *fmt++; 151 switch (c) { 152 case '%': 153 literal: 154 if (inr <= 0) 155 goto input_failure; 156 if (*inp != c) 157 goto match_failure; 158 inr--, inp++; 159 nread++; 160 continue; 161 162 case '*': 163 flags |= SUPPRESS; 164 goto again; 165 case 'l': 166 if (flags & LONG){ 167 flags &= ~LONG; 168 flags |= QUAD; 169 } else { 170 flags |= LONG; 171 } 172 goto again; 173 case 'q': 174 flags |= QUAD; 175 goto again; 176 case 'h': 177 if (flags & SHORT){ 178 flags &= ~SHORT; 179 flags |= SHORTSHORT; 180 } else { 181 flags |= SHORT; 182 } 183 goto again; 184 185 case '0': case '1': case '2': case '3': case '4': 186 case '5': case '6': case '7': case '8': case '9': 187 width = width * 10 + c - '0'; 188 goto again; 189 190 /* 191 * Conversions. 192 * 193 */ 194 case 'd': 195 c = CT_INT; 196 ccfn = (ccfntype)strtoq; 197 base = 10; 198 break; 199 200 case 'i': 201 c = CT_INT; 202 ccfn = (ccfntype)strtoq; 203 base = 0; 204 break; 205 206 case 'o': 207 c = CT_INT; 208 ccfn = strtouq; 209 base = 8; 210 break; 211 212 case 'u': 213 c = CT_INT; 214 ccfn = strtouq; 215 base = 10; 216 break; 217 218 case 'x': 219 flags |= PFXOK; /* enable 0x prefixing */ 220 c = CT_INT; 221 ccfn = strtouq; 222 base = 16; 223 break; 224 225 case 's': 226 c = CT_STRING; 227 break; 228 229 case '[': 230 fmt = __sccl(ccltab, fmt); 231 flags |= NOSKIP; 232 c = CT_CCL; 233 break; 234 235 case 'c': 236 flags |= NOSKIP; 237 c = CT_CHAR; 238 break; 239 240 case 'p': /* pointer format is like hex */ 241 flags |= POINTER | PFXOK; 242 c = CT_INT; 243 ccfn = strtouq; 244 base = 16; 245 break; 246 247 case 'n': 248 nconversions++; 249 if (flags & SUPPRESS) /* ??? */ 250 continue; 251 if (flags & SHORTSHORT) 252 *va_arg(ap, char *) = nread; 253 else if (flags & SHORT) 254 *va_arg(ap, short *) = nread; 255 else if (flags & LONG) 256 *va_arg(ap, long *) = nread; 257 else if (flags & QUAD) 258 *va_arg(ap, quad_t *) = nread; 259 else 260 *va_arg(ap, int *) = nread; 261 continue; 262 } 263 264 /* 265 * We have a conversion that requires input. 266 */ 267 if (inr <= 0) 268 goto input_failure; 269 270 /* 271 * Consume leading white space, except for formats 272 * that suppress this. 273 */ 274 if ((flags & NOSKIP) == 0) { 275 while (isspace(*inp)) { 276 nread++; 277 if (--inr > 0) 278 inp++; 279 else 280 goto input_failure; 281 } 282 /* 283 * Note that there is at least one character in 284 * the buffer, so conversions that do not set NOSKIP 285 * can no longer result in an input failure. 286 */ 287 } 288 289 /* 290 * Do the conversion. 291 */ 292 switch (c) { 293 294 case CT_CHAR: 295 /* scan arbitrary characters (sets NOSKIP) */ 296 if (width == 0) 297 width = 1; 298 if (flags & SUPPRESS) { 299 size_t sum = 0; 300 for (;;) { 301 if ((n = inr) < width) { 302 sum += n; 303 width -= n; 304 inp += n; 305 if (sum == 0) 306 goto input_failure; 307 break; 308 } else { 309 sum += width; 310 inr -= width; 311 inp += width; 312 break; 313 } 314 } 315 nread += sum; 316 } else { 317 bcopy(inp, va_arg(ap, char *), width); 318 inr -= width; 319 inp += width; 320 nread += width; 321 nassigned++; 322 } 323 nconversions++; 324 break; 325 326 case CT_CCL: 327 /* scan a (nonempty) character class (sets NOSKIP) */ 328 if (width == 0) 329 width = (size_t)~0; /* `infinity' */ 330 /* take only those things in the class */ 331 if (flags & SUPPRESS) { 332 n = 0; 333 while (ccltab[(unsigned char)*inp]) { 334 n++, inr--, inp++; 335 if (--width == 0) 336 break; 337 if (inr <= 0) { 338 if (n == 0) 339 goto input_failure; 340 break; 341 } 342 } 343 if (n == 0) 344 goto match_failure; 345 } else { 346 p0 = p = va_arg(ap, char *); 347 while (ccltab[(unsigned char)*inp]) { 348 inr--; 349 *p++ = *inp++; 350 if (--width == 0) 351 break; 352 if (inr <= 0) { 353 if (p == p0) 354 goto input_failure; 355 break; 356 } 357 } 358 n = p - p0; 359 if (n == 0) 360 goto match_failure; 361 *p = 0; 362 nassigned++; 363 } 364 nread += n; 365 nconversions++; 366 break; 367 368 case CT_STRING: 369 /* like CCL, but zero-length string OK, & no NOSKIP */ 370 if (width == 0) 371 width = (size_t)~0; 372 if (flags & SUPPRESS) { 373 n = 0; 374 while (!isspace(*inp)) { 375 n++, inr--, inp++; 376 if (--width == 0) 377 break; 378 if (inr <= 0) 379 break; 380 } 381 nread += n; 382 } else { 383 p0 = p = va_arg(ap, char *); 384 while (!isspace(*inp)) { 385 inr--; 386 *p++ = *inp++; 387 if (--width == 0) 388 break; 389 if (inr <= 0) 390 break; 391 } 392 *p = 0; 393 nread += p - p0; 394 nassigned++; 395 } 396 nconversions++; 397 continue; 398 399 case CT_INT: 400 /* scan an integer as if by strtoq/strtouq */ 401 #ifdef hardway 402 if (width == 0 || width > sizeof(buf) - 1) 403 width = sizeof(buf) - 1; 404 #else 405 /* size_t is unsigned, hence this optimisation */ 406 if (--width > sizeof(buf) - 2) 407 width = sizeof(buf) - 2; 408 width++; 409 #endif 410 flags |= SIGNOK | NDIGITS | NZDIGITS; 411 for (p = buf; width; width--) { 412 c = *inp; 413 /* 414 * Switch on the character; `goto ok' 415 * if we accept it as a part of number. 416 */ 417 switch (c) { 418 419 /* 420 * The digit 0 is always legal, but is 421 * special. For %i conversions, if no 422 * digits (zero or nonzero) have been 423 * scanned (only signs), we will have 424 * base==0. In that case, we should set 425 * it to 8 and enable 0x prefixing. 426 * Also, if we have not scanned zero digits 427 * before this, do not turn off prefixing 428 * (someone else will turn it off if we 429 * have scanned any nonzero digits). 430 */ 431 case '0': 432 if (base == 0) { 433 base = 8; 434 flags |= PFXOK; 435 } 436 if (flags & NZDIGITS) 437 flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 438 else 439 flags &= ~(SIGNOK|PFXOK|NDIGITS); 440 goto ok; 441 442 /* 1 through 7 always legal */ 443 case '1': case '2': case '3': 444 case '4': case '5': case '6': case '7': 445 base = basefix[base]; 446 flags &= ~(SIGNOK | PFXOK | NDIGITS); 447 goto ok; 448 449 /* digits 8 and 9 ok iff decimal or hex */ 450 case '8': case '9': 451 base = basefix[base]; 452 if (base <= 8) 453 break; /* not legal here */ 454 flags &= ~(SIGNOK | PFXOK | NDIGITS); 455 goto ok; 456 457 /* letters ok iff hex */ 458 case 'A': case 'B': case 'C': 459 case 'D': case 'E': case 'F': 460 case 'a': case 'b': case 'c': 461 case 'd': case 'e': case 'f': 462 /* no need to fix base here */ 463 if (base <= 10) 464 break; /* not legal here */ 465 flags &= ~(SIGNOK | PFXOK | NDIGITS); 466 goto ok; 467 468 /* sign ok only as first character */ 469 case '+': case '-': 470 if (flags & SIGNOK) { 471 flags &= ~SIGNOK; 472 goto ok; 473 } 474 break; 475 476 /* x ok iff flag still set & 2nd char */ 477 case 'x': case 'X': 478 if (flags & PFXOK && p == buf + 1) { 479 base = 16; /* if %i */ 480 flags &= ~PFXOK; 481 goto ok; 482 } 483 break; 484 } 485 486 /* 487 * If we got here, c is not a legal character 488 * for a number. Stop accumulating digits. 489 */ 490 break; 491 ok: 492 /* 493 * c is legal: store it and look at the next. 494 */ 495 *p++ = c; 496 if (--inr > 0) 497 inp++; 498 else 499 break; /* end of input */ 500 } 501 /* 502 * If we had only a sign, it is no good; push 503 * back the sign. If the number ends in `x', 504 * it was [sign] '0' 'x', so push back the x 505 * and treat it as [sign] '0'. 506 */ 507 if (flags & NDIGITS) { 508 if (p > buf) { 509 inp--; 510 inr++; 511 } 512 goto match_failure; 513 } 514 c = ((u_char *)p)[-1]; 515 if (c == 'x' || c == 'X') { 516 --p; 517 inp--; 518 inr++; 519 } 520 if ((flags & SUPPRESS) == 0) { 521 u_quad_t res; 522 523 *p = 0; 524 res = (*ccfn)(buf, (char **)NULL, base); 525 if (flags & POINTER) 526 *va_arg(ap, void **) = 527 (void *)(uintptr_t)res; 528 else if (flags & SHORTSHORT) 529 *va_arg(ap, char *) = res; 530 else if (flags & SHORT) 531 *va_arg(ap, short *) = res; 532 else if (flags & LONG) 533 *va_arg(ap, long *) = res; 534 else if (flags & QUAD) 535 *va_arg(ap, quad_t *) = res; 536 else 537 *va_arg(ap, int *) = res; 538 nassigned++; 539 } 540 nread += p - buf; 541 nconversions++; 542 break; 543 544 } 545 } 546 input_failure: 547 return (nconversions != 0 ? nassigned : -1); 548 match_failure: 549 return (nassigned); 550 } 551 552 /* 553 * Fill in the given table from the scanset at the given format 554 * (just after `['). Return a pointer to the character past the 555 * closing `]'. The table has a 1 wherever characters should be 556 * considered part of the scanset. 557 */ 558 static const u_char * 559 __sccl(char *tab, const u_char *fmt) 560 { 561 int c, n, v; 562 563 /* first `clear' the whole table */ 564 c = *fmt++; /* first char hat => negated scanset */ 565 if (c == '^') { 566 v = 1; /* default => accept */ 567 c = *fmt++; /* get new first char */ 568 } else 569 v = 0; /* default => reject */ 570 571 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ 572 for (n = 0; n < 256; n++) 573 tab[n] = v; /* memset(tab, v, 256) */ 574 575 if (c == 0) 576 return (fmt - 1);/* format ended before closing ] */ 577 578 /* 579 * Now set the entries corresponding to the actual scanset 580 * to the opposite of the above. 581 * 582 * The first character may be ']' (or '-') without being special; 583 * the last character may be '-'. 584 */ 585 v = 1 - v; 586 for (;;) { 587 tab[c] = v; /* take character c */ 588 doswitch: 589 n = *fmt++; /* and examine the next */ 590 switch (n) { 591 592 case 0: /* format ended too soon */ 593 return (fmt - 1); 594 595 case '-': 596 /* 597 * A scanset of the form 598 * [01+-] 599 * is defined as `the digit 0, the digit 1, 600 * the character +, the character -', but 601 * the effect of a scanset such as 602 * [a-zA-Z0-9] 603 * is implementation defined. The V7 Unix 604 * scanf treats `a-z' as `the letters a through 605 * z', but treats `a-a' as `the letter a, the 606 * character -, and the letter a'. 607 * 608 * For compatibility, the `-' is not considered 609 * to define a range if the character following 610 * it is either a close bracket (required by ANSI) 611 * or is not numerically greater than the character 612 * we just stored in the table (c). 613 */ 614 n = *fmt; 615 if (n == ']' || n < c) { 616 c = '-'; 617 break; /* resume the for(;;) */ 618 } 619 fmt++; 620 /* fill in the range */ 621 do { 622 tab[++c] = v; 623 } while (c < n); 624 c = n; 625 /* 626 * Alas, the V7 Unix scanf also treats formats 627 * such as [a-c-e] as `the letters a through e'. 628 * This too is permitted by the standard.... 629 */ 630 goto doswitch; 631 break; 632 633 case ']': /* end of scanset */ 634 return (fmt); 635 636 default: /* just another character */ 637 c = n; 638 break; 639 } 640 } 641 /* NOTREACHED */ 642 } 643 644