1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1988 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include "lint.h" 33 #include <sys/types.h> 34 #include "mtlib.h" 35 #include "file64.h" 36 #include <stdio.h> 37 #include <ctype.h> 38 #include <stdarg.h> 39 #include <values.h> 40 #include <errno.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <math.h> 44 #include <thread.h> 45 #include <synch.h> 46 #include <stdlib.h> 47 #include <fnmatch.h> 48 #include <limits.h> 49 #include <wchar.h> 50 #include <unistd.h> 51 #include "libc.h" 52 #include "stdiom.h" 53 #include "xpg6.h" 54 55 #define NCHARS (1 << BITSPERBYTE) 56 57 /* if the _IOWRT flag is set, this must be a call from sscanf */ 58 #define locgetc(cnt) (cnt += 1, (iop->_flag & _IOWRT) ? \ 59 ((*iop->_ptr == '\0') ? EOF : *iop->_ptr++) : \ 60 GETC(iop)) 61 #define locungetc(cnt, x) (cnt -= 1, (x == EOF) ? EOF : \ 62 ((iop->_flag & _IOWRT) ? *(--iop->_ptr) : \ 63 (++iop->_cnt, *(--iop->_ptr)))) 64 65 #define wlocgetc() ((iop->_flag & _IOWRT) ? \ 66 ((*iop->_ptr == '\0') ? EOF : *iop->_ptr++) : \ 67 GETC(iop)) 68 #define wlocungetc(x) ((x == EOF) ? EOF : \ 69 ((iop->_flag & _IOWRT) ? *(--iop->_ptr) : \ 70 UNGETC(x, iop))) 71 72 #define MAXARGS 30 /* max. number of args for fast positional paramters */ 73 74 /* 75 * stva_list is used to subvert C's restriction that a variable with an 76 * array type can not appear on the left hand side of an assignment operator. 77 * By putting the array inside a structure, the functionality of assigning to 78 * the whole array through a simple assignment is achieved.. 79 */ 80 typedef struct stva_list { 81 va_list ap; 82 } stva_list; 83 84 static int number(int *, int *, int, int, int, int, FILE *, va_list *); 85 static int readchar(FILE *, int *); 86 static int string(int *, int *, int, int, int, char *, FILE *, va_list *); 87 static int wstring(int *, int *, int, int, int, FILE *, va_list *); 88 static int wbrstring(int *, int *, int, int, int, FILE *, 89 unsigned char *, va_list *); 90 #ifdef _WIDE 91 static int brstring(int *, int *, int, int, int, FILE *, 92 unsigned char *, va_list *); 93 #endif 94 static int _bi_getwc(FILE *); 95 static int _bi_ungetwc(wint_t, FILE *); 96 97 #ifdef _WIDE 98 static int _mkarglst(const wchar_t *, stva_list, stva_list[]); 99 static wint_t _wd_getwc(int *, FILE *); 100 static wint_t _wd_ungetwc(int *, wchar_t, FILE *); 101 static int _watoi(wchar_t *); 102 #else /* _WIDE */ 103 static int _mkarglst(const char *, stva_list, stva_list[]); 104 #endif /* _WIDE */ 105 106 #ifndef _WIDE 107 int 108 _doscan(FILE *iop, const char *fmt, va_list va_Alist) 109 { 110 int ret; 111 rmutex_t *lk; 112 113 if (iop->_flag & _IOWRT) 114 ret = __doscan_u(iop, fmt, va_Alist, 0); 115 else { 116 FLOCKFILE(lk, iop); 117 ret = __doscan_u(iop, fmt, va_Alist, 0); 118 FUNLOCKFILE(lk); 119 } 120 return (ret); 121 } 122 #endif /* _WIDE */ 123 124 /* ARGSUSED3 */ 125 #ifdef _WIDE 126 int 127 __wdoscan_u(FILE *iop, const wchar_t *fmt, va_list va_Alist, int scflag) 128 #else /* _WIDE */ 129 int 130 __doscan_u(FILE *iop, const char *sfmt, va_list va_Alist, int scflag) 131 #endif /* _WIDE */ 132 { 133 #ifdef _WIDE 134 wchar_t ch; 135 wchar_t inchar, size; 136 int nmatch = 0, len, stow; 137 #else /* _WIDE */ 138 int ch; 139 int nmatch = 0, len, inchar, stow, size; 140 #endif /* _WIDE */ 141 142 unsigned char *bracket_str = NULL; 143 int chcount, flag_eof; 144 char tab[NCHARS]; 145 146 /* variables for postional parameters */ 147 #ifdef _WIDE 148 const wchar_t *sformat = fmt; /* save the beginning of the format */ 149 #else /* _WIDE */ 150 const unsigned char *fmt = (const unsigned char *)sfmt; 151 const char *sformat = sfmt; /* save the beginning of the format */ 152 #endif /* _WIDE */ 153 int fpos = 1; /* 1 if first postional parameter */ 154 stva_list args; /* used to step through the argument list */ 155 stva_list sargs; /* used to save start of the argument list */ 156 stva_list arglst[MAXARGS]; 157 /* 158 * array giving the appropriate values 159 * for va_arg() to retrieve the 160 * corresponding argument: 161 * arglst[0] is the first argument 162 * arglst[1] is the second argument,etc. 163 */ 164 /* Check if readable stream */ 165 if (!(iop->_flag & (_IOREAD | _IORW))) { 166 errno = EBADF; 167 return (EOF); 168 } 169 170 /* 171 * Initialize args and sargs to the start of the argument list. 172 * We don't know any portable way to copy an arbitrary C object 173 * so we use a system-specific routine(probably a macro) from 174 * stdarg.h. (Remember that if va_list is an array, in_args will 175 * be a pointer and &in_args won't be what we would want for 176 * memcpy.) 177 */ 178 va_copy(args.ap, va_Alist); 179 180 sargs = args; 181 182 chcount = 0; flag_eof = 0; 183 184 /* 185 * **************************************************** 186 * Main loop: reads format to determine a pattern, 187 * and then goes to read input stream 188 * in attempt to match the pattern. 189 * **************************************************** 190 */ 191 for (; ; ) { 192 if ((ch = *fmt++) == '\0') { 193 return (nmatch); /* end of format */ 194 } 195 #ifdef _WIDE 196 if (iswspace(ch)) { 197 if (!flag_eof) { 198 while (iswspace(inchar = 199 _wd_getwc(&chcount, iop))) 200 ; 201 if (_wd_ungetwc(&chcount, inchar, iop) == WEOF) 202 flag_eof = 1; 203 } 204 continue; 205 } 206 if (ch != '%' || (ch = *fmt++) == '%') { 207 if (ch == '%') { 208 if (!flag_eof) { 209 while (iswspace(inchar = 210 _wd_getwc(&chcount, iop))) 211 ; 212 if (_wd_ungetwc(&chcount, inchar, iop) 213 == WEOF) 214 flag_eof = 1; 215 } 216 } 217 if ((inchar = _wd_getwc(&chcount, iop)) == ch) 218 continue; 219 if (_wd_ungetwc(&chcount, inchar, iop) != WEOF) { 220 return (nmatch); /* failed to match input */ 221 } 222 break; 223 } 224 #else /* _WIDE */ 225 if (isspace(ch)) { 226 if (!flag_eof) { 227 while (isspace(inchar = locgetc(chcount))) 228 ; 229 if (locungetc(chcount, inchar) == EOF) 230 flag_eof = 1; 231 232 } 233 continue; 234 } 235 if (ch != '%' || (ch = *fmt++) == '%') { 236 if (ch == '%') { 237 if (!flag_eof) { 238 while (isspace(inchar = 239 locgetc(chcount))) 240 ; 241 if (locungetc(chcount, inchar) == EOF) 242 flag_eof = 1; 243 } 244 } 245 if ((inchar = locgetc(chcount)) == ch) 246 continue; 247 if (locungetc(chcount, inchar) != EOF) { 248 return (nmatch); /* failed to match input */ 249 } 250 break; 251 } 252 #endif /* _WIDE */ 253 254 charswitch: /* target of a goto 8-( */ 255 256 if (ch == '*') { 257 stow = 0; 258 ch = *fmt++; 259 } else 260 stow = 1; 261 262 #ifdef _WIDE 263 for (len = 0; ((ch >= 0) && (ch < 256) && isdigit(ch)); 264 ch = *fmt++) 265 len = len * 10 + ch - '0'; 266 #else /* _WIDE */ 267 for (len = 0; isdigit(ch); ch = *fmt++) 268 len = len * 10 + ch - '0'; 269 #endif /* _WIDE */ 270 271 if (ch == '$') { 272 /* 273 * positional parameter handling - the number 274 * specified in len gives the argument to which 275 * the next conversion should be applied. 276 * WARNING: This implementation of positional 277 * parameters assumes that the sizes of all pointer 278 * types are the same. (Code similar to that 279 * in the portable doprnt.c should be used if this 280 * assumption does not hold for a particular 281 * port.) 282 */ 283 if (fpos) { 284 if (_mkarglst(sformat, sargs, arglst) != 0) { 285 return (EOF); 286 } else { 287 fpos = 0; 288 } 289 } 290 if (len <= MAXARGS) { 291 args = arglst[len - 1]; 292 } else { 293 args = arglst[MAXARGS - 1]; 294 for (len -= MAXARGS; len > 0; len--) 295 (void) va_arg(args.ap, void *); 296 } 297 len = 0; 298 ch = *fmt++; 299 goto charswitch; 300 } 301 302 if (len == 0) 303 len = MAXINT; 304 #ifdef _WIDE 305 if ((size = ch) == 'l' || (size == 'h') || (size == 'L') || 306 (size == 'j') || (size == 't') || (size == 'z')) 307 ch = *fmt++; 308 #else /* _WIDE */ 309 if ((size = ch) == 'l' || (size == 'h') || (size == 'L') || 310 (size == 'w') || (size == 'j') || (size == 't') || 311 (size == 'z')) 312 ch = *fmt++; 313 #endif /* _WIDE */ 314 if (size == 'l' && ch == 'l') { 315 size = 'm'; /* size = 'm' if long long */ 316 ch = *fmt++; 317 } else if (size == 'h' && ch == 'h') { 318 size = 'b'; /* use size = 'b' if char */ 319 ch = *fmt++; 320 } else if ((size == 't') || (size == 'z')) { 321 size = 'l'; 322 } else if (size == 'j') { 323 #ifndef _LP64 324 /* check scflag for size of u/intmax_t (32-bit libc) */ 325 if (!(scflag & _F_INTMAX32)) { 326 #endif 327 size = 'm'; 328 #ifndef _LP64 329 } 330 #endif 331 } 332 if (ch == '\0') { 333 return (EOF); /* unexpected end of format */ 334 } 335 #ifdef _WIDE 336 if (ch == '[') { 337 wchar_t c; 338 size_t len; 339 int negflg = 0; 340 wchar_t *p; 341 wchar_t *wbracket_str; 342 size_t wlen, clen; 343 344 /* p points to the address of '[' */ 345 p = (wchar_t *)fmt - 1; 346 len = 0; 347 if (*fmt == '^') { 348 len++; 349 fmt++; 350 negflg = 1; 351 } 352 if (((c = *fmt) == ']') || (c == '-')) { 353 len++; 354 fmt++; 355 } 356 while ((c = *fmt) != ']') { 357 if (c == '\0') { 358 return (EOF); /* unexpected EOF */ 359 } else { 360 len++; 361 fmt++; 362 } 363 } 364 fmt++; 365 len += 2; 366 wbracket_str = (wchar_t *) 367 malloc(sizeof (wchar_t) * (len + 1)); 368 if (wbracket_str == NULL) { 369 errno = ENOMEM; 370 return (EOF); 371 } else { 372 (void) wmemcpy(wbracket_str, 373 (const wchar_t *)p, len); 374 *(wbracket_str + len) = L'\0'; 375 if (negflg && *(wbracket_str + 1) == '^') { 376 *(wbracket_str + 1) = L'!'; 377 } 378 } 379 wlen = wcslen(wbracket_str); 380 clen = wcstombs((char *)NULL, wbracket_str, 0); 381 if (clen == (size_t)-1) { 382 free(wbracket_str); 383 return (EOF); 384 } 385 bracket_str = (unsigned char *) 386 malloc(sizeof (unsigned char) * (clen + 1)); 387 if (bracket_str == NULL) { 388 free(wbracket_str); 389 errno = ENOMEM; 390 return (EOF); 391 } 392 clen = wcstombs((char *)bracket_str, wbracket_str, 393 wlen + 1); 394 free(wbracket_str); 395 if (clen == (size_t)-1) { 396 free(bracket_str); 397 return (EOF); 398 } 399 } 400 #else /* _WIDE */ 401 if (ch == '[') { 402 if (size == 'l') { 403 int c, len, i; 404 int negflg = 0; 405 unsigned char *p; 406 407 p = (unsigned char *)(fmt - 1); 408 len = 0; 409 if (*fmt == '^') { 410 len++; 411 fmt++; 412 negflg = 1; 413 } 414 if (((c = *fmt) == ']') || (c == '-')) { 415 len++; 416 fmt++; 417 } 418 while ((c = *fmt) != ']') { 419 if (c == '\0') { 420 return (EOF); 421 } else if (isascii(c)) { 422 len++; 423 fmt++; 424 } else { 425 i = mblen((const char *)fmt, 426 MB_CUR_MAX); 427 if (i <= 0) { 428 return (EOF); 429 } else { 430 len += i; 431 fmt += i; 432 } 433 } 434 } 435 fmt++; 436 len += 2; 437 bracket_str = (unsigned char *) 438 malloc(sizeof (unsigned char) * (len + 1)); 439 if (bracket_str == NULL) { 440 errno = ENOMEM; 441 return (EOF); 442 } else { 443 (void) strncpy((char *)bracket_str, 444 (const char *)p, len); 445 *(bracket_str + len) = '\0'; 446 if (negflg && 447 *(bracket_str + 1) == '^') { 448 *(bracket_str + 1) = '!'; 449 } 450 } 451 } else { 452 int t = 0; 453 int b, c, d; 454 455 if (*fmt == '^') { 456 t++; 457 fmt++; 458 } 459 (void) memset(tab, !t, NCHARS); 460 if ((c = *fmt) == ']' || c == '-') { 461 tab[c] = t; 462 fmt++; 463 } 464 465 while ((c = *fmt) != ']') { 466 if (c == '\0') { 467 return (EOF); 468 } 469 b = *(fmt - 1); 470 d = *(fmt + 1); 471 if ((c == '-') && (d != ']') && 472 (b < d)) { 473 (void) memset(&tab[b], t, 474 d - b + 1); 475 fmt += 2; 476 } else { 477 tab[c] = t; 478 fmt++; 479 } 480 } 481 fmt++; 482 } 483 } 484 #endif /* _WIDE */ 485 486 #ifdef _WIDE 487 if ((ch >= 0) && (ch < 256) && 488 isupper((int)ch)) { /* no longer documented */ 489 if (_lib_version == c_issue_4) { 490 if (size != 'm' && size != 'L') 491 size = 'l'; 492 } 493 ch = _tolower((int)ch); 494 } 495 if (ch != 'n' && !flag_eof) { 496 if (ch != 'c' && ch != 'C' && ch != '[') { 497 while (iswspace(inchar = 498 _wd_getwc(&chcount, iop))) 499 ; 500 if (_wd_ungetwc(&chcount, inchar, iop) == WEOF) 501 break; 502 503 } 504 } 505 #else /* _WIDE */ 506 if (isupper(ch)) { /* no longer documented */ 507 if (_lib_version == c_issue_4) { 508 if (size != 'm' && size != 'L') 509 size = 'l'; 510 } 511 ch = _tolower(ch); 512 } 513 if (ch != 'n' && !flag_eof) { 514 if (ch != 'c' && ch != 'C' && ch != '[') { 515 while (isspace(inchar = locgetc(chcount))) 516 ; 517 if (locungetc(chcount, inchar) == EOF) 518 break; 519 } 520 } 521 #endif /* _WIDE */ 522 523 switch (ch) { 524 case 'C': 525 case 'S': 526 case 'c': 527 case 's': 528 #ifdef _WIDE 529 if ((size == 'l') || (size == 'C') || (size == 'S')) 530 #else /* _WIDE */ 531 if ((size == 'w') || (size == 'l') || (size == 'C') || 532 (size == 'S')) 533 #endif /* _WIDE */ 534 { 535 size = wstring(&chcount, &flag_eof, stow, 536 (int)ch, len, iop, &args.ap); 537 } else { 538 size = string(&chcount, &flag_eof, stow, 539 (int)ch, len, tab, iop, &args.ap); 540 } 541 break; 542 case '[': 543 if (size == 'l') { 544 size = wbrstring(&chcount, &flag_eof, stow, 545 (int)ch, len, iop, bracket_str, &args.ap); 546 free(bracket_str); 547 bracket_str = NULL; 548 } else { 549 #ifdef _WIDE 550 size = brstring(&chcount, &flag_eof, stow, 551 (int)ch, len, iop, bracket_str, &args.ap); 552 free(bracket_str); 553 bracket_str = NULL; 554 #else /* _WIDE */ 555 size = string(&chcount, &flag_eof, stow, 556 ch, len, tab, iop, &args.ap); 557 #endif /* _WIDE */ 558 } 559 break; 560 561 case 'n': 562 if (stow == 0) 563 continue; 564 if (size == 'b') /* char */ 565 *va_arg(args.ap, char *) = (char)chcount; 566 else if (size == 'h') 567 *va_arg(args.ap, short *) = (short)chcount; 568 else if (size == 'l') 569 *va_arg(args.ap, long *) = (long)chcount; 570 else if (size == 'm') /* long long */ 571 *va_arg(args.ap, long long *) = 572 (long long) chcount; 573 else 574 *va_arg(args.ap, int *) = (int)chcount; 575 continue; 576 577 case 'i': 578 default: 579 size = number(&chcount, &flag_eof, stow, (int)ch, 580 len, (int)size, iop, &args.ap); 581 break; 582 } 583 if (size) 584 nmatch += stow; 585 else { 586 return ((flag_eof && !nmatch) ? EOF : nmatch); 587 } 588 continue; 589 } 590 if (bracket_str) 591 free(bracket_str); 592 return (nmatch != 0 ? nmatch : EOF); /* end of input */ 593 } 594 595 /* ****************************************************************** */ 596 /* Functions to read the input stream in an attempt to match incoming */ 597 /* data to the current pattern from the main loop of _doscan(). */ 598 /* ****************************************************************** */ 599 static int 600 number(int *chcount, int *flag_eof, int stow, int type, int len, int size, 601 FILE *iop, va_list *listp) 602 { 603 char numbuf[64]; 604 char *np = numbuf; 605 int c, base, inchar, lookahead; 606 int digitseen = 0, floater = 0, negflg = 0; 607 int lc; 608 long long lcval = 0LL; 609 610 switch (type) { 611 case 'e': 612 case 'f': 613 case 'g': 614 /* 615 * lc = 0 corresponds to c90 mode: do not recognize 616 * hexadecimal fp strings; attempt to push back 617 * all unused characters read 618 * 619 * lc = -1 corresponds to c99 mode: recognize hexa- 620 * decimal fp strings; push back at most one 621 * unused character 622 */ 623 lc = (__xpg6 & _C99SUSv3_recognize_hexfp)? -1 : 0; 624 floater = 1; 625 break; 626 627 case 'a': 628 lc = -1; 629 floater = 1; 630 break; 631 632 case 'd': 633 case 'u': 634 case 'i': 635 base = 10; 636 break; 637 case 'o': 638 base = 8; 639 break; 640 case 'p': 641 #ifdef _LP64 642 size = 'l'; /* pointers are long in LP64 */ 643 #endif /* _LP64 */ 644 /* FALLTHROUGH */ 645 case 'x': 646 base = 16; 647 break; 648 default: 649 return (0); /* unrecognized conversion character */ 650 } 651 652 if (floater != 0) { 653 /* 654 * Handle floating point with 655 * file_to_decimal. 656 */ 657 decimal_mode dm; 658 decimal_record dr; 659 fp_exception_field_type efs; 660 enum decimal_string_form form; 661 char *echar; 662 int nread; 663 char buffer[1024+1]; 664 char *nb = buffer; 665 666 if (len > 1024) 667 len = 1024; 668 file_to_decimal(&nb, len, lc, &dr, &form, &echar, iop, &nread); 669 if (lc == -1) { 670 /* 671 * In C99 mode, the entire string read has to be 672 * accepted in order to qualify as a match 673 */ 674 if (nb != buffer + nread) 675 form = invalid_form; 676 } 677 if (stow && (form != invalid_form)) { 678 #if defined(__sparc) 679 dm.rd = _QgetRD(); 680 if (size == 'L') { /* long double */ 681 if ((int)form < 0) 682 __hex_to_quadruple(&dr, dm.rd, 683 va_arg(*listp, quadruple *), &efs); 684 else 685 decimal_to_quadruple( 686 va_arg(*listp, quadruple *), 687 &dm, &dr, &efs); 688 } 689 #elif defined(__i386) || defined(__amd64) 690 dm.rd = __xgetRD(); 691 if (size == 'L') { /* long double */ 692 if ((int)form < 0) 693 __hex_to_extended(&dr, dm.rd, 694 va_arg(*listp, extended *), &efs); 695 else 696 decimal_to_extended( 697 va_arg(*listp, extended *), 698 &dm, &dr, &efs); 699 } 700 #else 701 #error Unknown architecture 702 #endif 703 else if (size == 'l') { /* double */ 704 if ((int)form < 0) 705 __hex_to_double(&dr, dm.rd, 706 va_arg(*listp, double *), &efs); 707 else 708 decimal_to_double( 709 va_arg(*listp, double *), 710 &dm, &dr, &efs); 711 } else { /* float */ 712 if ((int)form < 0) 713 __hex_to_single(&dr, dm.rd, 714 va_arg(*listp, single *), &efs); 715 else 716 decimal_to_single((single *) 717 va_arg(*listp, single *), 718 &dm, &dr, &efs); 719 } 720 if ((efs & (1 << fp_overflow)) != 0) { 721 errno = ERANGE; 722 } 723 if ((efs & (1 << fp_underflow)) != 0) { 724 errno = ERANGE; 725 } 726 } 727 (*chcount) += nread; /* Count characters read. */ 728 c = locgetc((*chcount)); 729 if (locungetc((*chcount), c) == EOF) 730 *flag_eof = 1; 731 return ((form == invalid_form) ? 0 : 1); 732 /* successful match if non-zero */ 733 } 734 735 switch (c = locgetc((*chcount))) { 736 case '-': 737 negflg++; 738 /* FALLTHROUGH */ 739 case '+': 740 if (--len <= 0) 741 break; 742 if ((c = locgetc((*chcount))) != '0') 743 break; 744 /* FALLTHROUGH */ 745 case '0': 746 /* 747 * If %i or %x, the characters 0x or 0X may optionally precede 748 * the sequence of letters and digits (base 16). 749 */ 750 if ((type != 'i' && type != 'x') || (len <= 1)) 751 break; 752 if (((inchar = locgetc((*chcount))) == 'x') || 753 (inchar == 'X')) { 754 lookahead = readchar(iop, chcount); 755 if (isxdigit(lookahead)) { 756 base = 16; 757 758 if (len <= 2) { 759 (void) locungetc((*chcount), lookahead); 760 /* Take into account the 'x' */ 761 len -= 1; 762 } else { 763 c = lookahead; 764 /* Take into account '0x' */ 765 len -= 2; 766 } 767 } else { 768 (void) locungetc((*chcount), lookahead); 769 (void) locungetc((*chcount), inchar); 770 } 771 } else { 772 /* inchar wans't 'x'. */ 773 (void) locungetc((*chcount), inchar); /* Put it back. */ 774 if (type == 'i') /* Only %i accepts an octal. */ 775 base = 8; 776 } 777 } 778 for (; --len >= 0; *np++ = (char)c, c = locgetc((*chcount))) { 779 if (np > numbuf + 62) { 780 errno = ERANGE; 781 return (0); 782 } 783 if (isdigit(c) || base == 16 && isxdigit(c)) { 784 int digit = c - (isdigit(c) ? '0' : 785 isupper(c) ? 'A' - 10 : 'a' - 10); 786 if (digit >= base) 787 break; 788 if (stow) 789 lcval = base * lcval + digit; 790 digitseen++; 791 continue; 792 } 793 break; 794 } 795 796 if (stow && digitseen) { 797 /* suppress possible overflow on 2's-comp negation */ 798 if (negflg && lcval != (1ULL << 63)) 799 lcval = -lcval; 800 switch (size) { 801 case 'm': 802 *va_arg(*listp, long long *) = lcval; 803 break; 804 case 'l': 805 *va_arg(*listp, long *) = (long)lcval; 806 break; 807 case 'h': 808 *va_arg(*listp, short *) = (short)lcval; 809 break; 810 case 'b': 811 *va_arg(*listp, char *) = (char)lcval; 812 break; 813 default: 814 *va_arg(*listp, int *) = (int)lcval; 815 break; 816 } 817 } 818 if (locungetc((*chcount), c) == EOF) 819 *flag_eof = 1; 820 return (digitseen); /* successful match if non-zero */ 821 } 822 823 /* Get a character. If not using sscanf and at the buffer's end */ 824 /* then do a direct read(). Characters read via readchar() */ 825 /* can be pushed back on the input stream by locungetc((*chcount),) */ 826 /* since there is padding allocated at the end of the stream buffer. */ 827 static int 828 readchar(FILE *iop, int *chcount) 829 { 830 int inchar; 831 char buf[1]; 832 833 if ((iop->_flag & _IOWRT) || (iop->_cnt != 0)) 834 inchar = locgetc((*chcount)); 835 else { 836 if (read(FILENO(iop), buf, 1) != 1) 837 return (EOF); 838 inchar = (int)buf[0]; 839 (*chcount) += 1; 840 } 841 return (inchar); 842 } 843 844 static int 845 string(int *chcount, int *flag_eof, int stow, int type, int len, char *tab, 846 FILE *iop, va_list *listp) 847 { 848 int ch; 849 char *ptr; 850 char *start; 851 852 start = ptr = stow ? va_arg(*listp, char *) : NULL; 853 if (((type == 'c') || (type == 'C')) && len == MAXINT) 854 len = 1; 855 #ifdef _WIDE 856 while ((ch = locgetc((*chcount))) != EOF && 857 !(((type == 's') || (type == 'S')) && isspace(ch))) { 858 #else /* _WIDE */ 859 while ((ch = locgetc((*chcount))) != EOF && 860 !(((type == 's') || (type == 'S')) && 861 isspace(ch) || type == '[' && tab[ch])) { 862 #endif /* _WIDE */ 863 if (stow) 864 *ptr = (char)ch; 865 ptr++; 866 if (--len <= 0) 867 break; 868 } 869 if (ch == EOF) { 870 (*flag_eof) = 1; 871 (*chcount) -= 1; 872 } else if (len > 0 && locungetc((*chcount), ch) == EOF) 873 (*flag_eof) = 1; 874 if (ptr == start) 875 return (0); /* no match */ 876 if (stow && ((type != 'c') && (type != 'C'))) 877 *ptr = '\0'; 878 return (1); /* successful match */ 879 } 880 881 /* This function initializes arglst, to contain the appropriate */ 882 /* va_list values for the first MAXARGS arguments. */ 883 /* WARNING: this code assumes that the sizes of all pointer types */ 884 /* are the same. (Code similar to that in the portable doprnt.c */ 885 /* should be used if this assumption is not true for a */ 886 /* particular port.) */ 887 888 #ifdef _WIDE 889 static int 890 _mkarglst(const wchar_t *fmt, stva_list args, stva_list arglst[]) 891 #else /* _WIDE */ 892 static int 893 _mkarglst(const char *fmt, stva_list args, stva_list arglst[]) 894 #endif /* _WIDE */ 895 { 896 #ifdef _WIDE 897 #define STRCHR wcschr 898 #define STRSPN wcsspn 899 #define ATOI(x) _watoi((wchar_t *)x) 900 #define SPNSTR1 L"01234567890" 901 #define SPNSTR2 L"# +-.0123456789hL$" 902 #else /* _WIDE */ 903 #define STRCHR strchr 904 #define STRSPN strspn 905 #define ATOI(x) atoi(x) 906 #define SPNSTR1 "01234567890" 907 #define SPNSTR2 "# +-.0123456789hL$" 908 #endif /* _WIDE */ 909 910 int maxnum, curargno; 911 size_t n; 912 913 maxnum = -1; 914 curargno = 0; 915 916 while ((fmt = STRCHR(fmt, '%')) != NULL) { 917 fmt++; /* skip % */ 918 if (*fmt == '*' || *fmt == '%') 919 continue; 920 if (fmt[n = STRSPN(fmt, SPNSTR1)] == L'$') { 921 /* convert to zero base */ 922 curargno = ATOI(fmt) - 1; 923 fmt += n + 1; 924 } 925 926 if (maxnum < curargno) 927 maxnum = curargno; 928 curargno++; /* default to next in list */ 929 930 fmt += STRSPN(fmt, SPNSTR2); 931 if (*fmt == '[') { 932 int i; 933 fmt++; /* has to be at least on item in scan list */ 934 if (*fmt == ']') { 935 fmt++; 936 } 937 while (*fmt != ']') { 938 if (*fmt == L'\0') { 939 return (-1); /* bad format */ 940 #ifdef _WIDE 941 } else { 942 fmt++; 943 } 944 #else /* _WIDE */ 945 } else if (isascii(*fmt)) { 946 fmt++; 947 } else { 948 i = mblen((const char *) 949 fmt, MB_CUR_MAX); 950 if (i <= 0) { 951 return (-1); 952 } else { 953 fmt += i; 954 } 955 } 956 #endif /* _WIDE */ 957 } 958 } 959 } 960 if (maxnum > MAXARGS) 961 maxnum = MAXARGS; 962 for (n = 0; n <= maxnum; n++) { 963 arglst[n] = args; 964 (void) va_arg(args.ap, void *); 965 } 966 return (0); 967 } 968 969 970 /* 971 * For wide character handling 972 */ 973 974 #ifdef _WIDE 975 static int 976 wstring(int *chcount, int *flag_eof, int stow, int type, 977 int len, FILE *iop, va_list *listp) 978 { 979 wint_t wch; 980 wchar_t *ptr; 981 wchar_t *wstart; 982 int dummy; 983 984 wstart = ptr = stow ? va_arg(*listp, wchar_t *) : NULL; 985 986 if ((type == 'c') && len == MAXINT) 987 len = 1; 988 while (((wch = _wd_getwc(chcount, iop)) != WEOF) && 989 !(type == 's' && iswspace(wch))) { 990 if (stow) 991 *ptr = wch; 992 ptr++; 993 if (--len <= 0) 994 break; 995 } 996 if (wch == WEOF) { 997 *flag_eof = 1; 998 (*chcount) -= 1; 999 } else { 1000 if (len > 0 && _wd_ungetwc(chcount, wch, iop) == WEOF) 1001 *flag_eof = 1; 1002 } 1003 if (ptr == wstart) 1004 return (0); /* no match */ 1005 if (stow && (type != 'c')) 1006 *ptr = '\0'; 1007 return (1); /* successful match */ 1008 } 1009 1010 #else /* _WIDE */ 1011 static int 1012 wstring(int *chcount, int *flag_eof, int stow, int type, int len, FILE *iop, 1013 va_list *listp) 1014 { 1015 int wch; 1016 wchar_t *ptr; 1017 wchar_t *wstart; 1018 1019 wstart = ptr = stow ? va_arg(*listp, wchar_t *) : NULL; 1020 1021 if ((type == 'c') && len == MAXINT) 1022 len = 1; 1023 while (((wch = _bi_getwc(iop)) != EOF) && 1024 !(type == 's' && (isascii(wch) ? isspace(wch) : 0))) { 1025 (*chcount) += _scrwidth((wchar_t)wch); 1026 if (stow) 1027 *ptr = wch; 1028 ptr++; 1029 if (--len <= 0) 1030 break; 1031 } 1032 if (wch == EOF) { 1033 (*flag_eof) = 1; 1034 (*chcount) -= 1; 1035 } else { 1036 if (len > 0 && _bi_ungetwc(wch, iop) == EOF) 1037 (*flag_eof) = 1; 1038 } 1039 if (ptr == wstart) 1040 return (0); /* no match */ 1041 if (stow && (type != 'c')) 1042 *ptr = '\0'; 1043 return (1); /* successful match */ 1044 } 1045 #endif /* _WIDE */ 1046 1047 #ifdef _WIDE 1048 static wint_t 1049 _wd_getwc(int *chcount, FILE *iop) 1050 { 1051 wint_t wc; 1052 int len; 1053 1054 if (!(iop->_flag & _IOWRT)) { 1055 /* call from fwscanf, wscanf */ 1056 wc = __fgetwc_xpg5(iop); 1057 (*chcount)++; 1058 return (wc); 1059 } else { 1060 /* call from swscanf */ 1061 if (*iop->_ptr == '\0') 1062 return (WEOF); 1063 len = mbtowc((wchar_t *)&wc, (const char *)iop->_ptr, 1064 MB_CUR_MAX); 1065 if (len == -1) 1066 return (WEOF); 1067 iop->_ptr += len; 1068 (*chcount)++; 1069 return (wc); 1070 } 1071 } 1072 1073 static wint_t 1074 _wd_ungetwc(int *chcount, wchar_t wc, FILE *iop) 1075 { 1076 wint_t ret; 1077 int len; 1078 char mbs[MB_LEN_MAX]; 1079 1080 if (wc == WEOF) 1081 return (WEOF); 1082 1083 if (!(iop->_flag & _IOWRT)) { 1084 /* call from fwscanf, wscanf */ 1085 ret = __ungetwc_xpg5((wint_t)wc, iop); 1086 if (ret != (wint_t)wc) 1087 return (WEOF); 1088 (*chcount)--; 1089 return (ret); 1090 } else { 1091 /* call from swscanf */ 1092 len = wctomb(mbs, wc); 1093 if (len == -1) 1094 return (WEOF); 1095 iop->_ptr -= len; 1096 (*chcount)--; 1097 return ((wint_t)wc); 1098 } 1099 } 1100 1101 static int 1102 _watoi(wchar_t *fmt) 1103 { 1104 int n = 0; 1105 wchar_t ch; 1106 1107 ch = *fmt; 1108 if ((ch >= 0) && (ch < 256) && isdigit((int)ch)) { 1109 n = ch - '0'; 1110 while (((ch = *++fmt) >= 0) && (ch < 256) && 1111 isdigit((int)ch)) { 1112 n *= 10; 1113 n += ch - '0'; 1114 } 1115 } 1116 return (n); 1117 } 1118 #endif /* _WIDE */ 1119 1120 /* ARGSUSED3 */ 1121 static int 1122 wbrstring(int *chcount, int *flag_eof, int stow, int type, 1123 int len, FILE *iop, unsigned char *brstr, va_list *listp) 1124 { 1125 wint_t wch; 1126 int i; 1127 char str[MB_LEN_MAX + 1]; /* include null termination */ 1128 wchar_t *ptr, *start; 1129 #ifdef _WIDE 1130 int dummy; 1131 #endif /* _WIDE */ 1132 1133 start = ptr = stow ? va_arg(*listp, wchar_t *) : NULL; 1134 1135 #ifdef _WIDE 1136 while ((wch = _wd_getwc(&dummy, iop)) != WEOF) { 1137 #else /* _WIDE */ 1138 while ((wch = _bi_getwc(iop)) != WEOF) { 1139 #endif /* _WIDE */ 1140 i = wctomb(str, (wchar_t)wch); 1141 if (i == -1) { 1142 return (0); 1143 } 1144 str[i] = '\0'; 1145 if (fnmatch((const char *)brstr, (const char *)str, 1146 FNM_NOESCAPE)) { 1147 break; 1148 } else { 1149 if (len > 0) { 1150 #ifdef _WIDE 1151 (*chcount)++; 1152 #else /* _WIDE */ 1153 (*chcount) += _scrwidth(wch); 1154 #endif /* _WIDE */ 1155 len--; 1156 if (stow) { 1157 *ptr = wch; 1158 } 1159 ptr++; 1160 if (len <= 0) 1161 break; 1162 } else { 1163 break; 1164 } 1165 } 1166 } 1167 if (wch == WEOF) { 1168 *flag_eof = 1; 1169 } else { 1170 #ifdef _WIDE 1171 if (len > 0 && _wd_ungetwc(&dummy, wch, iop) == WEOF) 1172 #else /* _WIDE */ 1173 if (len > 0 && _bi_ungetwc(wch, iop) == WEOF) 1174 #endif /* _WIDE */ 1175 *flag_eof = 1; 1176 } 1177 if (ptr == start) 1178 return (0); /* no match */ 1179 if (stow) 1180 *ptr = L'\0'; 1181 return (1); /* successful match */ 1182 } 1183 1184 #ifdef _WIDE 1185 static int 1186 brstring(int *chcount, int *flag_eof, int stow, int type, 1187 int len, FILE *iop, unsigned char *brstr, va_list *listp) 1188 { 1189 wint_t wch; 1190 int i; 1191 char str[MB_LEN_MAX + 1]; /* include null termination */ 1192 char *ptr, *start, *p; 1193 int dummy; 1194 1195 start = ptr = stow ? va_arg(*listp, char *) : NULL; 1196 1197 while ((wch = _wd_getwc(&dummy, iop)) != WEOF) { 1198 p = str; 1199 i = wctomb(str, (wchar_t)wch); 1200 if (i == -1) { 1201 return (0); 1202 } 1203 str[i] = '\0'; 1204 if (fnmatch((const char *)brstr, (const char *)str, 1205 FNM_NOESCAPE)) { 1206 break; 1207 } else { 1208 if (len >= i) { 1209 (*chcount)++; 1210 len -= i; 1211 if (stow) { 1212 while (i-- > 0) { 1213 *ptr++ = *p++; 1214 } 1215 } else { 1216 while (i-- > 0) { 1217 ptr++; 1218 } 1219 } 1220 if (len <= 0) 1221 break; 1222 } else { 1223 break; 1224 } 1225 } 1226 } 1227 if (wch == WEOF) { 1228 *flag_eof = 1; 1229 } else { 1230 if (len > 0 && _wd_ungetwc(&dummy, wch, iop) == WEOF) 1231 *flag_eof = 1; 1232 } 1233 if (ptr == start) 1234 return (0); /* no match */ 1235 if (stow) 1236 *ptr = '\0'; 1237 return (1); /* successful match */ 1238 } 1239 #endif /* _WIDE */ 1240 1241 /* 1242 * Locally define getwc and ungetwc 1243 */ 1244 static int 1245 _bi_getwc(FILE *iop) 1246 { 1247 int c; 1248 wchar_t intcode; 1249 int i, nbytes, cur_max; 1250 char buff[MB_LEN_MAX]; 1251 1252 if ((c = wlocgetc()) == EOF) 1253 return (WEOF); 1254 1255 if (isascii(c)) /* ASCII code */ 1256 return ((wint_t)c); 1257 1258 buff[0] = (char)c; 1259 1260 cur_max = (int)MB_CUR_MAX; 1261 /* MB_CUR_MAX doen't exeed the value of MB_LEN_MAX */ 1262 /* So we use MB_CUR_MAX instead of MB_LEN_MAX for */ 1263 /* improving the performance. */ 1264 for (i = 1; i < cur_max; i++) { 1265 c = wlocgetc(); 1266 if (c == '\n') { 1267 (void) wlocungetc(c); 1268 break; 1269 } 1270 if (c == EOF) { 1271 /* this still may be a valid multibyte character */ 1272 break; 1273 } 1274 buff[i] = (char)c; 1275 } 1276 1277 if ((nbytes = mbtowc(&intcode, buff, i)) == -1) { 1278 /* 1279 * If mbtowc fails, the input was not a legal character. 1280 * ungetc all but one character. 1281 * 1282 * Note: the number of pushback characters that 1283 * ungetc() can handle must be >= (MB_LEN_MAX - 1). 1284 * In Solaris 2.x, the number of pushback 1285 * characters is 4. 1286 */ 1287 while (i-- > 1) { 1288 (void) wlocungetc((signed char)buff[i]); 1289 } 1290 errno = EILSEQ; 1291 return (WEOF); /* Illegal EUC sequence. */ 1292 } 1293 1294 while (i-- > nbytes) { 1295 /* 1296 * Note: the number of pushback characters that 1297 * ungetc() can handle must be >= (MB_LEN_MAX - 1). 1298 * In Solaris 2.x, the number of pushback 1299 * characters is 4. 1300 */ 1301 (void) wlocungetc((signed char)buff[i]); 1302 } 1303 return ((int)intcode); 1304 } 1305 1306 static int 1307 _bi_ungetwc(wint_t wc, FILE *iop) 1308 { 1309 char mbs[MB_LEN_MAX]; 1310 unsigned char *p; 1311 int n; 1312 1313 if ((wc == WEOF) || ((iop->_flag & _IOREAD) == 0)) 1314 return (WEOF); 1315 1316 n = wctomb(mbs, (wchar_t)wc); 1317 if (n <= 0) 1318 return (WEOF); 1319 1320 if (iop->_ptr <= iop->_base) { 1321 if (iop->_base == NULL) { 1322 return (WEOF); 1323 } 1324 if ((iop->_ptr == iop->_base) && (iop->_cnt == 0)) { 1325 ++iop->_ptr; 1326 } else if ((iop->_ptr - n) < (iop->_base - PUSHBACK)) { 1327 return (WEOF); 1328 } 1329 } 1330 1331 p = (unsigned char *)(mbs+n-1); /* p points the last byte */ 1332 /* if _IOWRT is set to iop->_flag, it means this is */ 1333 /* an invocation from sscanf(), and in that time we */ 1334 /* don't touch iop->_cnt. Otherwise, which means an */ 1335 /* invocation from fscanf() or scanf(), we touch iop->_cnt */ 1336 if ((iop->_flag & _IOWRT) == 0) { 1337 /* scanf() and fscanf() */ 1338 iop->_cnt += n; 1339 while (n--) { 1340 *--iop->_ptr = *(p--); 1341 } 1342 } else { 1343 /* sscanf() */ 1344 iop->_ptr -= n; 1345 } 1346 return (wc); 1347 } 1348