1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1988 AT&T */ 28 /* All Rights Reserved */ 29 30 #include "lint.h" 31 #include <sys/types.h> 32 #include "mtlib.h" 33 #include "file64.h" 34 #include <stdio.h> 35 #include <ctype.h> 36 #include <stdarg.h> 37 #include <values.h> 38 #include <errno.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <math.h> 42 #include <thread.h> 43 #include <synch.h> 44 #include <stdlib.h> 45 #include <fnmatch.h> 46 #include <limits.h> 47 #include <wchar.h> 48 #include <unistd.h> 49 #include "libc.h" 50 #include "stdiom.h" 51 #include "xpg6.h" 52 53 #define NCHARS (1 << BITSPERBYTE) 54 55 /* if the _IOWRT flag is set, this must be a call from sscanf */ 56 #define locgetc(cnt) (cnt += 1, (iop->_flag & _IOWRT) ? \ 57 ((*iop->_ptr == '\0') ? EOF : *iop->_ptr++) : \ 58 GETC(iop)) 59 #define locungetc(cnt, x) (cnt -= 1, (x == EOF) ? EOF : \ 60 ((iop->_flag & _IOWRT) ? *(--iop->_ptr) : \ 61 (++iop->_cnt, *(--iop->_ptr)))) 62 63 #define wlocgetc() ((iop->_flag & _IOWRT) ? \ 64 ((*iop->_ptr == '\0') ? EOF : *iop->_ptr++) : \ 65 GETC(iop)) 66 #define wlocungetc(x) ((x == EOF) ? EOF : \ 67 ((iop->_flag & _IOWRT) ? *(--iop->_ptr) : \ 68 UNGETC(x, iop))) 69 70 #define MAXARGS 30 /* max. number of args for fast positional paramters */ 71 72 /* 73 * stva_list is used to subvert C's restriction that a variable with an 74 * array type can not appear on the left hand side of an assignment operator. 75 * By putting the array inside a structure, the functionality of assigning to 76 * the whole array through a simple assignment is achieved.. 77 */ 78 typedef struct stva_list { 79 va_list ap; 80 } stva_list; 81 82 static int number(int *, int *, int, int, int, int, FILE *, va_list *); 83 static int readchar(FILE *, int *); 84 static int string(int *, int *, int, int, int, char *, FILE *, va_list *); 85 static int wstring(int *, int *, int, int, int, FILE *, va_list *); 86 static int wbrstring(int *, int *, int, int, int, FILE *, 87 unsigned char *, va_list *); 88 #ifdef _WIDE 89 static int brstring(int *, int *, int, int, int, FILE *, 90 unsigned char *, va_list *); 91 #endif 92 static int _bi_getwc(FILE *); 93 static int _bi_ungetwc(wint_t, FILE *); 94 95 #ifdef _WIDE 96 static int _mkarglst(const wchar_t *, stva_list, stva_list[]); 97 static wint_t _wd_getwc(int *, FILE *); 98 static wint_t _wd_ungetwc(int *, wchar_t, FILE *); 99 static int _watoi(wchar_t *); 100 #else /* _WIDE */ 101 static int _mkarglst(const char *, stva_list, stva_list[]); 102 #endif /* _WIDE */ 103 104 #ifndef _WIDE 105 int 106 _doscan(FILE *iop, const char *fmt, va_list va_Alist) 107 { 108 int ret; 109 rmutex_t *lk; 110 111 if (iop->_flag & _IOWRT) 112 ret = __doscan_u(iop, fmt, va_Alist, 0); 113 else { 114 FLOCKFILE(lk, iop); 115 ret = __doscan_u(iop, fmt, va_Alist, 0); 116 FUNLOCKFILE(lk); 117 } 118 return (ret); 119 } 120 #endif /* _WIDE */ 121 122 /* ARGSUSED3 */ 123 #ifdef _WIDE 124 int 125 __wdoscan_u(FILE *iop, const wchar_t *fmt, va_list va_Alist, int scflag) 126 #else /* _WIDE */ 127 int 128 __doscan_u(FILE *iop, const char *sfmt, va_list va_Alist, int scflag) 129 #endif /* _WIDE */ 130 { 131 #ifdef _WIDE 132 wchar_t ch; 133 wchar_t inchar, size; 134 int nmatch = 0, len, stow; 135 #else /* _WIDE */ 136 int ch; 137 int nmatch = 0, len, inchar, stow, size; 138 #endif /* _WIDE */ 139 140 unsigned char *bracket_str = NULL; 141 int chcount, flag_eof; 142 char tab[NCHARS]; 143 144 /* variables for postional parameters */ 145 #ifdef _WIDE 146 const wchar_t *sformat = fmt; /* save the beginning of the format */ 147 #else /* _WIDE */ 148 const unsigned char *fmt = (const unsigned char *)sfmt; 149 const char *sformat = sfmt; /* save the beginning of the format */ 150 #endif /* _WIDE */ 151 int fpos = 1; /* 1 if first postional parameter */ 152 stva_list args; /* used to step through the argument list */ 153 stva_list sargs; /* used to save start of the argument list */ 154 stva_list arglst[MAXARGS]; 155 /* 156 * array giving the appropriate values 157 * for va_arg() to retrieve the 158 * corresponding argument: 159 * arglst[0] is the first argument 160 * arglst[1] is the second argument,etc. 161 */ 162 /* Check if readable stream */ 163 if (!(iop->_flag & (_IOREAD | _IORW))) { 164 errno = EBADF; 165 return (EOF); 166 } 167 168 /* 169 * Initialize args and sargs to the start of the argument list. 170 * We don't know any portable way to copy an arbitrary C object 171 * so we use a system-specific routine(probably a macro) from 172 * stdarg.h. (Remember that if va_list is an array, in_args will 173 * be a pointer and &in_args won't be what we would want for 174 * memcpy.) 175 */ 176 va_copy(args.ap, va_Alist); 177 178 sargs = args; 179 180 chcount = 0; flag_eof = 0; 181 182 /* 183 * **************************************************** 184 * Main loop: reads format to determine a pattern, 185 * and then goes to read input stream 186 * in attempt to match the pattern. 187 * **************************************************** 188 */ 189 for (; ; ) { 190 if ((ch = *fmt++) == '\0') { 191 return (nmatch); /* end of format */ 192 } 193 #ifdef _WIDE 194 if (iswspace(ch)) { 195 if (!flag_eof) { 196 while (iswspace(inchar = 197 _wd_getwc(&chcount, iop))) 198 ; 199 if (_wd_ungetwc(&chcount, inchar, iop) == WEOF) 200 flag_eof = 1; 201 } 202 continue; 203 } 204 if (ch != '%' || (ch = *fmt++) == '%') { 205 if (ch == '%') { 206 if (!flag_eof) { 207 while (iswspace(inchar = 208 _wd_getwc(&chcount, iop))) 209 ; 210 if (_wd_ungetwc(&chcount, inchar, iop) 211 == WEOF) 212 flag_eof = 1; 213 } 214 } 215 if ((inchar = _wd_getwc(&chcount, iop)) == ch) 216 continue; 217 if (_wd_ungetwc(&chcount, inchar, iop) != WEOF) { 218 return (nmatch); /* failed to match input */ 219 } 220 break; 221 } 222 #else /* _WIDE */ 223 if (isspace(ch)) { 224 if (!flag_eof) { 225 while (isspace(inchar = locgetc(chcount))) 226 ; 227 if (locungetc(chcount, inchar) == EOF) 228 flag_eof = 1; 229 230 } 231 continue; 232 } 233 if (ch != '%' || (ch = *fmt++) == '%') { 234 if (ch == '%') { 235 if (!flag_eof) { 236 while (isspace(inchar = 237 locgetc(chcount))) 238 ; 239 if (locungetc(chcount, inchar) == EOF) 240 flag_eof = 1; 241 } 242 } 243 if ((inchar = locgetc(chcount)) == ch) 244 continue; 245 if (locungetc(chcount, inchar) != EOF) { 246 return (nmatch); /* failed to match input */ 247 } 248 break; 249 } 250 #endif /* _WIDE */ 251 252 charswitch: /* target of a goto 8-( */ 253 254 if (ch == '*') { 255 stow = 0; 256 ch = *fmt++; 257 } else 258 stow = 1; 259 260 #ifdef _WIDE 261 for (len = 0; ((ch >= 0) && (ch < 256) && isdigit(ch)); 262 ch = *fmt++) 263 len = len * 10 + ch - '0'; 264 #else /* _WIDE */ 265 for (len = 0; isdigit(ch); ch = *fmt++) 266 len = len * 10 + ch - '0'; 267 #endif /* _WIDE */ 268 269 if (ch == '$') { 270 /* 271 * positional parameter handling - the number 272 * specified in len gives the argument to which 273 * the next conversion should be applied. 274 * WARNING: This implementation of positional 275 * parameters assumes that the sizes of all pointer 276 * types are the same. (Code similar to that 277 * in the portable doprnt.c should be used if this 278 * assumption does not hold for a particular 279 * port.) 280 */ 281 if (fpos) { 282 if (_mkarglst(sformat, sargs, arglst) != 0) { 283 return (EOF); 284 } else { 285 fpos = 0; 286 } 287 } 288 if (len <= MAXARGS) { 289 args = arglst[len - 1]; 290 } else { 291 args = arglst[MAXARGS - 1]; 292 for (len -= MAXARGS; len > 0; len--) 293 (void) va_arg(args.ap, void *); 294 } 295 len = 0; 296 ch = *fmt++; 297 goto charswitch; 298 } 299 300 if (len == 0) 301 len = MAXINT; 302 #ifdef _WIDE 303 if ((size = ch) == 'l' || (size == 'h') || (size == 'L') || 304 (size == 'j') || (size == 't') || (size == 'z')) 305 ch = *fmt++; 306 #else /* _WIDE */ 307 if ((size = ch) == 'l' || (size == 'h') || (size == 'L') || 308 (size == 'w') || (size == 'j') || (size == 't') || 309 (size == 'z')) 310 ch = *fmt++; 311 #endif /* _WIDE */ 312 if (size == 'l' && ch == 'l') { 313 size = 'm'; /* size = 'm' if long long */ 314 ch = *fmt++; 315 } else if (size == 'h' && ch == 'h') { 316 size = 'b'; /* use size = 'b' if char */ 317 ch = *fmt++; 318 } else if ((size == 't') || (size == 'z')) { 319 size = 'l'; 320 } else if (size == 'j') { 321 #ifndef _LP64 322 /* check scflag for size of u/intmax_t (32-bit libc) */ 323 if (!(scflag & _F_INTMAX32)) { 324 #endif 325 size = 'm'; 326 #ifndef _LP64 327 } 328 #endif 329 } 330 if (ch == '\0') { 331 return (EOF); /* unexpected end of format */ 332 } 333 #ifdef _WIDE 334 if (ch == '[') { 335 wchar_t c; 336 size_t len; 337 int negflg = 0; 338 wchar_t *p; 339 wchar_t *wbracket_str; 340 size_t wlen, clen; 341 342 /* p points to the address of '[' */ 343 p = (wchar_t *)fmt - 1; 344 len = 0; 345 if (*fmt == '^') { 346 len++; 347 fmt++; 348 negflg = 1; 349 } 350 if (((c = *fmt) == ']') || (c == '-')) { 351 len++; 352 fmt++; 353 } 354 while ((c = *fmt) != ']') { 355 if (c == '\0') { 356 return (EOF); /* unexpected EOF */ 357 } else { 358 len++; 359 fmt++; 360 } 361 } 362 fmt++; 363 len += 2; 364 wbracket_str = (wchar_t *) 365 malloc(sizeof (wchar_t) * (len + 1)); 366 if (wbracket_str == NULL) { 367 errno = ENOMEM; 368 return (EOF); 369 } else { 370 (void) wmemcpy(wbracket_str, 371 (const wchar_t *)p, len); 372 *(wbracket_str + len) = L'\0'; 373 if (negflg && *(wbracket_str + 1) == '^') { 374 *(wbracket_str + 1) = L'!'; 375 } 376 } 377 wlen = wcslen(wbracket_str); 378 clen = wcstombs((char *)NULL, wbracket_str, 0); 379 if (clen == (size_t)-1) { 380 free(wbracket_str); 381 return (EOF); 382 } 383 bracket_str = (unsigned char *) 384 malloc(sizeof (unsigned char) * (clen + 1)); 385 if (bracket_str == NULL) { 386 free(wbracket_str); 387 errno = ENOMEM; 388 return (EOF); 389 } 390 clen = wcstombs((char *)bracket_str, wbracket_str, 391 wlen + 1); 392 free(wbracket_str); 393 if (clen == (size_t)-1) { 394 free(bracket_str); 395 return (EOF); 396 } 397 } 398 #else /* _WIDE */ 399 if (ch == '[') { 400 if (size == 'l') { 401 int c, len, i; 402 int negflg = 0; 403 unsigned char *p; 404 405 p = (unsigned char *)(fmt - 1); 406 len = 0; 407 if (*fmt == '^') { 408 len++; 409 fmt++; 410 negflg = 1; 411 } 412 if (((c = *fmt) == ']') || (c == '-')) { 413 len++; 414 fmt++; 415 } 416 while ((c = *fmt) != ']') { 417 if (c == '\0') { 418 return (EOF); 419 } else if (isascii(c)) { 420 len++; 421 fmt++; 422 } else { 423 i = mblen((const char *)fmt, 424 MB_CUR_MAX); 425 if (i <= 0) { 426 return (EOF); 427 } else { 428 len += i; 429 fmt += i; 430 } 431 } 432 } 433 fmt++; 434 len += 2; 435 bracket_str = (unsigned char *) 436 malloc(sizeof (unsigned char) * (len + 1)); 437 if (bracket_str == NULL) { 438 errno = ENOMEM; 439 return (EOF); 440 } else { 441 (void) strncpy((char *)bracket_str, 442 (const char *)p, len); 443 *(bracket_str + len) = '\0'; 444 if (negflg && 445 *(bracket_str + 1) == '^') { 446 *(bracket_str + 1) = '!'; 447 } 448 } 449 } else { 450 int t = 0; 451 int b, c, d; 452 453 if (*fmt == '^') { 454 t++; 455 fmt++; 456 } 457 (void) memset(tab, !t, NCHARS); 458 if ((c = *fmt) == ']' || c == '-') { 459 tab[c] = t; 460 fmt++; 461 } 462 463 while ((c = *fmt) != ']') { 464 if (c == '\0') { 465 return (EOF); 466 } 467 b = *(fmt - 1); 468 d = *(fmt + 1); 469 if ((c == '-') && (d != ']') && 470 (b < d)) { 471 (void) memset(&tab[b], t, 472 d - b + 1); 473 fmt += 2; 474 } else { 475 tab[c] = t; 476 fmt++; 477 } 478 } 479 fmt++; 480 } 481 } 482 #endif /* _WIDE */ 483 484 #ifdef _WIDE 485 if ((ch >= 0) && (ch < 256) && 486 isupper((int)ch)) { /* no longer documented */ 487 if (_lib_version == c_issue_4) { 488 if (size != 'm' && size != 'L') 489 size = 'l'; 490 } 491 ch = _tolower((int)ch); 492 } 493 if (ch != 'n' && !flag_eof) { 494 if (ch != 'c' && ch != 'C' && ch != '[') { 495 while (iswspace(inchar = 496 _wd_getwc(&chcount, iop))) 497 ; 498 if (_wd_ungetwc(&chcount, inchar, iop) == WEOF) 499 break; 500 501 } 502 } 503 #else /* _WIDE */ 504 if (isupper(ch)) { /* no longer documented */ 505 if (_lib_version == c_issue_4) { 506 if (size != 'm' && size != 'L') 507 size = 'l'; 508 } 509 ch = _tolower(ch); 510 } 511 if (ch != 'n' && !flag_eof) { 512 if (ch != 'c' && ch != 'C' && ch != '[') { 513 while (isspace(inchar = locgetc(chcount))) 514 ; 515 if (locungetc(chcount, inchar) == EOF) 516 break; 517 } 518 } 519 #endif /* _WIDE */ 520 521 switch (ch) { 522 case 'C': 523 case 'S': 524 case 'c': 525 case 's': 526 #ifdef _WIDE 527 if ((size == 'l') || (size == 'C') || (size == 'S')) 528 #else /* _WIDE */ 529 if ((size == 'w') || (size == 'l') || (size == 'C') || 530 (size == 'S')) 531 #endif /* _WIDE */ 532 { 533 size = wstring(&chcount, &flag_eof, stow, 534 (int)ch, len, iop, &args.ap); 535 } else { 536 size = string(&chcount, &flag_eof, stow, 537 (int)ch, len, tab, iop, &args.ap); 538 } 539 break; 540 case '[': 541 if (size == 'l') { 542 size = wbrstring(&chcount, &flag_eof, stow, 543 (int)ch, len, iop, bracket_str, &args.ap); 544 free(bracket_str); 545 bracket_str = NULL; 546 } else { 547 #ifdef _WIDE 548 size = brstring(&chcount, &flag_eof, stow, 549 (int)ch, len, iop, bracket_str, &args.ap); 550 free(bracket_str); 551 bracket_str = NULL; 552 #else /* _WIDE */ 553 size = string(&chcount, &flag_eof, stow, 554 ch, len, tab, iop, &args.ap); 555 #endif /* _WIDE */ 556 } 557 break; 558 559 case 'n': 560 if (stow == 0) 561 continue; 562 if (size == 'b') /* char */ 563 *va_arg(args.ap, char *) = (char)chcount; 564 else if (size == 'h') 565 *va_arg(args.ap, short *) = (short)chcount; 566 else if (size == 'l') 567 *va_arg(args.ap, long *) = (long)chcount; 568 else if (size == 'm') /* long long */ 569 *va_arg(args.ap, long long *) = 570 (long long) chcount; 571 else 572 *va_arg(args.ap, int *) = (int)chcount; 573 continue; 574 575 case 'i': 576 default: 577 size = number(&chcount, &flag_eof, stow, (int)ch, 578 len, (int)size, iop, &args.ap); 579 break; 580 } 581 if (size) 582 nmatch += stow; 583 else { 584 return ((flag_eof && !nmatch) ? EOF : nmatch); 585 } 586 continue; 587 } 588 if (bracket_str) 589 free(bracket_str); 590 return (nmatch != 0 ? nmatch : EOF); /* end of input */ 591 } 592 593 /* ****************************************************************** */ 594 /* Functions to read the input stream in an attempt to match incoming */ 595 /* data to the current pattern from the main loop of _doscan(). */ 596 /* ****************************************************************** */ 597 static int 598 number(int *chcount, int *flag_eof, int stow, int type, int len, int size, 599 FILE *iop, va_list *listp) 600 { 601 char numbuf[64]; 602 char *np = numbuf; 603 int c, base, inchar, lookahead; 604 int digitseen = 0, floater = 0, negflg = 0; 605 int lc; 606 long long lcval = 0LL; 607 608 switch (type) { 609 case 'e': 610 case 'f': 611 case 'g': 612 /* 613 * lc = 0 corresponds to c90 mode: do not recognize 614 * hexadecimal fp strings; attempt to push back 615 * all unused characters read 616 * 617 * lc = -1 corresponds to c99 mode: recognize hexa- 618 * decimal fp strings; push back at most one 619 * unused character 620 */ 621 lc = (__xpg6 & _C99SUSv3_recognize_hexfp)? -1 : 0; 622 floater = 1; 623 break; 624 625 case 'a': 626 lc = -1; 627 floater = 1; 628 break; 629 630 case 'd': 631 case 'u': 632 case 'i': 633 base = 10; 634 break; 635 case 'o': 636 base = 8; 637 break; 638 case 'p': 639 #ifdef _LP64 640 size = 'l'; /* pointers are long in LP64 */ 641 #endif /* _LP64 */ 642 /* FALLTHROUGH */ 643 case 'x': 644 base = 16; 645 break; 646 default: 647 return (0); /* unrecognized conversion character */ 648 } 649 650 if (floater != 0) { 651 /* 652 * Handle floating point with 653 * file_to_decimal. 654 */ 655 decimal_mode dm; 656 decimal_record dr; 657 fp_exception_field_type efs; 658 enum decimal_string_form form; 659 char *echar; 660 int nread; 661 char buffer[1024+1]; 662 char *nb = buffer; 663 664 if (len > 1024) 665 len = 1024; 666 file_to_decimal(&nb, len, lc, &dr, &form, &echar, iop, &nread); 667 if (lc == -1) { 668 /* 669 * In C99 mode, the entire string read has to be 670 * accepted in order to qualify as a match 671 */ 672 if (nb != buffer + nread) 673 form = invalid_form; 674 } 675 if (stow && (form != invalid_form)) { 676 #if defined(__sparc) 677 dm.rd = _QgetRD(); 678 if (size == 'L') { /* long double */ 679 if ((int)form < 0) 680 __hex_to_quadruple(&dr, dm.rd, 681 va_arg(*listp, quadruple *), &efs); 682 else 683 decimal_to_quadruple( 684 va_arg(*listp, quadruple *), 685 &dm, &dr, &efs); 686 } 687 #elif defined(__i386) || defined(__amd64) 688 dm.rd = __xgetRD(); 689 if (size == 'L') { /* long double */ 690 if ((int)form < 0) 691 __hex_to_extended(&dr, dm.rd, 692 va_arg(*listp, extended *), &efs); 693 else 694 decimal_to_extended( 695 va_arg(*listp, extended *), 696 &dm, &dr, &efs); 697 } 698 #else 699 #error Unknown architecture 700 #endif 701 else if (size == 'l') { /* double */ 702 if ((int)form < 0) 703 __hex_to_double(&dr, dm.rd, 704 va_arg(*listp, double *), &efs); 705 else 706 decimal_to_double( 707 va_arg(*listp, double *), 708 &dm, &dr, &efs); 709 } else { /* float */ 710 if ((int)form < 0) 711 __hex_to_single(&dr, dm.rd, 712 va_arg(*listp, single *), &efs); 713 else 714 decimal_to_single((single *) 715 va_arg(*listp, single *), 716 &dm, &dr, &efs); 717 } 718 if ((efs & (1 << fp_overflow)) != 0) { 719 errno = ERANGE; 720 } 721 if ((efs & (1 << fp_underflow)) != 0) { 722 errno = ERANGE; 723 } 724 } 725 (*chcount) += nread; /* Count characters read. */ 726 c = locgetc((*chcount)); 727 if (locungetc((*chcount), c) == EOF) 728 *flag_eof = 1; 729 return ((form == invalid_form) ? 0 : 1); 730 /* successful match if non-zero */ 731 } 732 733 switch (c = locgetc((*chcount))) { 734 case '-': 735 negflg++; 736 /* FALLTHROUGH */ 737 case '+': 738 if (--len <= 0) 739 break; 740 if ((c = locgetc((*chcount))) != '0') 741 break; 742 /* FALLTHROUGH */ 743 case '0': 744 /* 745 * If %i or %x, the characters 0x or 0X may optionally precede 746 * the sequence of letters and digits (base 16). 747 */ 748 if ((type != 'i' && type != 'x') || (len <= 1)) 749 break; 750 if (((inchar = locgetc((*chcount))) == 'x') || 751 (inchar == 'X')) { 752 lookahead = readchar(iop, chcount); 753 if (isxdigit(lookahead)) { 754 base = 16; 755 756 if (len <= 2) { 757 (void) locungetc((*chcount), lookahead); 758 /* Take into account the 'x' */ 759 len -= 1; 760 } else { 761 c = lookahead; 762 /* Take into account '0x' */ 763 len -= 2; 764 } 765 } else { 766 (void) locungetc((*chcount), lookahead); 767 (void) locungetc((*chcount), inchar); 768 } 769 } else { 770 /* inchar wans't 'x'. */ 771 (void) locungetc((*chcount), inchar); /* Put it back. */ 772 if (type == 'i') /* Only %i accepts an octal. */ 773 base = 8; 774 } 775 } 776 for (; --len >= 0; *np++ = (char)c, c = locgetc((*chcount))) { 777 if (np > numbuf + 62) { 778 errno = ERANGE; 779 return (0); 780 } 781 if (isdigit(c) || base == 16 && isxdigit(c)) { 782 int digit = c - (isdigit(c) ? '0' : 783 isupper(c) ? 'A' - 10 : 'a' - 10); 784 if (digit >= base) 785 break; 786 if (stow) 787 lcval = base * lcval + digit; 788 digitseen++; 789 continue; 790 } 791 break; 792 } 793 794 if (stow && digitseen) { 795 /* suppress possible overflow on 2's-comp negation */ 796 if (negflg && lcval != (1ULL << 63)) 797 lcval = -lcval; 798 switch (size) { 799 case 'm': 800 *va_arg(*listp, long long *) = lcval; 801 break; 802 case 'l': 803 *va_arg(*listp, long *) = (long)lcval; 804 break; 805 case 'h': 806 *va_arg(*listp, short *) = (short)lcval; 807 break; 808 case 'b': 809 *va_arg(*listp, char *) = (char)lcval; 810 break; 811 default: 812 *va_arg(*listp, int *) = (int)lcval; 813 break; 814 } 815 } 816 if (locungetc((*chcount), c) == EOF) 817 *flag_eof = 1; 818 return (digitseen); /* successful match if non-zero */ 819 } 820 821 /* Get a character. If not using sscanf and at the buffer's end */ 822 /* then do a direct read(). Characters read via readchar() */ 823 /* can be pushed back on the input stream by locungetc((*chcount),) */ 824 /* since there is padding allocated at the end of the stream buffer. */ 825 static int 826 readchar(FILE *iop, int *chcount) 827 { 828 int inchar; 829 char buf[1]; 830 831 if ((iop->_flag & _IOWRT) || (iop->_cnt != 0)) { 832 inchar = locgetc((*chcount)); 833 } else { 834 if (_xread(iop, buf, 1) != 1) 835 return (EOF); 836 inchar = (int)buf[0]; 837 (*chcount) += 1; 838 } 839 return (inchar); 840 } 841 842 static int 843 string(int *chcount, int *flag_eof, int stow, int type, int len, char *tab, 844 FILE *iop, va_list *listp) 845 { 846 int ch; 847 char *ptr; 848 char *start; 849 850 start = ptr = stow ? va_arg(*listp, char *) : NULL; 851 if (((type == 'c') || (type == 'C')) && len == MAXINT) 852 len = 1; 853 #ifdef _WIDE 854 while ((ch = locgetc((*chcount))) != EOF && 855 !(((type == 's') || (type == 'S')) && isspace(ch))) { 856 #else /* _WIDE */ 857 while ((ch = locgetc((*chcount))) != EOF && 858 !(((type == 's') || (type == 'S')) && 859 isspace(ch) || type == '[' && tab[ch])) { 860 #endif /* _WIDE */ 861 if (stow) 862 *ptr = (char)ch; 863 ptr++; 864 if (--len <= 0) 865 break; 866 } 867 if (ch == EOF) { 868 (*flag_eof) = 1; 869 (*chcount) -= 1; 870 } else if (len > 0 && locungetc((*chcount), ch) == EOF) 871 (*flag_eof) = 1; 872 if (ptr == start) 873 return (0); /* no match */ 874 if (stow && ((type != 'c') && (type != 'C'))) 875 *ptr = '\0'; 876 return (1); /* successful match */ 877 } 878 879 /* This function initializes arglst, to contain the appropriate */ 880 /* va_list values for the first MAXARGS arguments. */ 881 /* WARNING: this code assumes that the sizes of all pointer types */ 882 /* are the same. (Code similar to that in the portable doprnt.c */ 883 /* should be used if this assumption is not true for a */ 884 /* particular port.) */ 885 886 #ifdef _WIDE 887 static int 888 _mkarglst(const wchar_t *fmt, stva_list args, stva_list arglst[]) 889 #else /* _WIDE */ 890 static int 891 _mkarglst(const char *fmt, stva_list args, stva_list arglst[]) 892 #endif /* _WIDE */ 893 { 894 #ifdef _WIDE 895 #define STRCHR wcschr 896 #define STRSPN wcsspn 897 #define ATOI(x) _watoi((wchar_t *)x) 898 #define SPNSTR1 L"01234567890" 899 #define SPNSTR2 L"# +-.0123456789hL$" 900 #else /* _WIDE */ 901 #define STRCHR strchr 902 #define STRSPN strspn 903 #define ATOI(x) atoi(x) 904 #define SPNSTR1 "01234567890" 905 #define SPNSTR2 "# +-.0123456789hL$" 906 #endif /* _WIDE */ 907 908 int maxnum, curargno; 909 size_t n; 910 911 maxnum = -1; 912 curargno = 0; 913 914 while ((fmt = STRCHR(fmt, '%')) != NULL) { 915 fmt++; /* skip % */ 916 if (*fmt == '*' || *fmt == '%') 917 continue; 918 if (fmt[n = STRSPN(fmt, SPNSTR1)] == L'$') { 919 /* convert to zero base */ 920 curargno = ATOI(fmt) - 1; 921 fmt += n + 1; 922 } 923 924 if (maxnum < curargno) 925 maxnum = curargno; 926 curargno++; /* default to next in list */ 927 928 fmt += STRSPN(fmt, SPNSTR2); 929 if (*fmt == '[') { 930 int i; 931 fmt++; /* has to be at least on item in scan list */ 932 if (*fmt == ']') { 933 fmt++; 934 } 935 while (*fmt != ']') { 936 if (*fmt == L'\0') { 937 return (-1); /* bad format */ 938 #ifdef _WIDE 939 } else { 940 fmt++; 941 } 942 #else /* _WIDE */ 943 } else if (isascii(*fmt)) { 944 fmt++; 945 } else { 946 i = mblen((const char *) 947 fmt, MB_CUR_MAX); 948 if (i <= 0) { 949 return (-1); 950 } else { 951 fmt += i; 952 } 953 } 954 #endif /* _WIDE */ 955 } 956 } 957 } 958 if (maxnum > MAXARGS) 959 maxnum = MAXARGS; 960 for (n = 0; n <= maxnum; n++) { 961 arglst[n] = args; 962 (void) va_arg(args.ap, void *); 963 } 964 return (0); 965 } 966 967 968 /* 969 * For wide character handling 970 */ 971 972 #ifdef _WIDE 973 static int 974 wstring(int *chcount, int *flag_eof, int stow, int type, 975 int len, FILE *iop, va_list *listp) 976 { 977 wint_t wch; 978 wchar_t *ptr; 979 wchar_t *wstart; 980 int dummy; 981 982 wstart = ptr = stow ? va_arg(*listp, wchar_t *) : NULL; 983 984 if ((type == 'c') && len == MAXINT) 985 len = 1; 986 while (((wch = _wd_getwc(chcount, iop)) != WEOF) && 987 !(type == 's' && iswspace(wch))) { 988 if (stow) 989 *ptr = wch; 990 ptr++; 991 if (--len <= 0) 992 break; 993 } 994 if (wch == WEOF) { 995 *flag_eof = 1; 996 (*chcount) -= 1; 997 } else { 998 if (len > 0 && _wd_ungetwc(chcount, wch, iop) == WEOF) 999 *flag_eof = 1; 1000 } 1001 if (ptr == wstart) 1002 return (0); /* no match */ 1003 if (stow && (type != 'c')) 1004 *ptr = '\0'; 1005 return (1); /* successful match */ 1006 } 1007 1008 #else /* _WIDE */ 1009 static int 1010 wstring(int *chcount, int *flag_eof, int stow, int type, int len, FILE *iop, 1011 va_list *listp) 1012 { 1013 int wch; 1014 wchar_t *ptr; 1015 wchar_t *wstart; 1016 1017 wstart = ptr = stow ? va_arg(*listp, wchar_t *) : NULL; 1018 1019 if ((type == 'c') && len == MAXINT) 1020 len = 1; 1021 while (((wch = _bi_getwc(iop)) != EOF) && 1022 !(type == 's' && (isascii(wch) ? isspace(wch) : 0))) { 1023 (*chcount) += _scrwidth((wchar_t)wch); 1024 if (stow) 1025 *ptr = wch; 1026 ptr++; 1027 if (--len <= 0) 1028 break; 1029 } 1030 if (wch == EOF) { 1031 (*flag_eof) = 1; 1032 (*chcount) -= 1; 1033 } else { 1034 if (len > 0 && _bi_ungetwc(wch, iop) == EOF) 1035 (*flag_eof) = 1; 1036 } 1037 if (ptr == wstart) 1038 return (0); /* no match */ 1039 if (stow && (type != 'c')) 1040 *ptr = '\0'; 1041 return (1); /* successful match */ 1042 } 1043 #endif /* _WIDE */ 1044 1045 #ifdef _WIDE 1046 static wint_t 1047 _wd_getwc(int *chcount, FILE *iop) 1048 { 1049 wint_t wc; 1050 int len; 1051 1052 if (!(iop->_flag & _IOWRT)) { 1053 /* call from fwscanf, wscanf */ 1054 wc = __fgetwc_xpg5(iop); 1055 (*chcount)++; 1056 return (wc); 1057 } else { 1058 /* call from swscanf */ 1059 if (*iop->_ptr == '\0') 1060 return (WEOF); 1061 len = mbtowc((wchar_t *)&wc, (const char *)iop->_ptr, 1062 MB_CUR_MAX); 1063 if (len == -1) 1064 return (WEOF); 1065 iop->_ptr += len; 1066 (*chcount)++; 1067 return (wc); 1068 } 1069 } 1070 1071 static wint_t 1072 _wd_ungetwc(int *chcount, wchar_t wc, FILE *iop) 1073 { 1074 wint_t ret; 1075 int len; 1076 char mbs[MB_LEN_MAX]; 1077 1078 if (wc == WEOF) 1079 return (WEOF); 1080 1081 if (!(iop->_flag & _IOWRT)) { 1082 /* call from fwscanf, wscanf */ 1083 ret = __ungetwc_xpg5((wint_t)wc, iop); 1084 if (ret != (wint_t)wc) 1085 return (WEOF); 1086 (*chcount)--; 1087 return (ret); 1088 } else { 1089 /* call from swscanf */ 1090 len = wctomb(mbs, wc); 1091 if (len == -1) 1092 return (WEOF); 1093 iop->_ptr -= len; 1094 (*chcount)--; 1095 return ((wint_t)wc); 1096 } 1097 } 1098 1099 static int 1100 _watoi(wchar_t *fmt) 1101 { 1102 int n = 0; 1103 wchar_t ch; 1104 1105 ch = *fmt; 1106 if ((ch >= 0) && (ch < 256) && isdigit((int)ch)) { 1107 n = ch - '0'; 1108 while (((ch = *++fmt) >= 0) && (ch < 256) && 1109 isdigit((int)ch)) { 1110 n *= 10; 1111 n += ch - '0'; 1112 } 1113 } 1114 return (n); 1115 } 1116 #endif /* _WIDE */ 1117 1118 /* ARGSUSED3 */ 1119 static int 1120 wbrstring(int *chcount, int *flag_eof, int stow, int type, 1121 int len, FILE *iop, unsigned char *brstr, va_list *listp) 1122 { 1123 wint_t wch; 1124 int i; 1125 char str[MB_LEN_MAX + 1]; /* include null termination */ 1126 wchar_t *ptr, *start; 1127 #ifdef _WIDE 1128 int dummy; 1129 #endif /* _WIDE */ 1130 1131 start = ptr = stow ? va_arg(*listp, wchar_t *) : NULL; 1132 1133 #ifdef _WIDE 1134 while ((wch = _wd_getwc(&dummy, iop)) != WEOF) { 1135 #else /* _WIDE */ 1136 while ((wch = _bi_getwc(iop)) != WEOF) { 1137 #endif /* _WIDE */ 1138 i = wctomb(str, (wchar_t)wch); 1139 if (i == -1) { 1140 return (0); 1141 } 1142 str[i] = '\0'; 1143 if (fnmatch((const char *)brstr, (const char *)str, 1144 FNM_NOESCAPE)) { 1145 break; 1146 } else { 1147 if (len > 0) { 1148 #ifdef _WIDE 1149 (*chcount)++; 1150 #else /* _WIDE */ 1151 (*chcount) += _scrwidth(wch); 1152 #endif /* _WIDE */ 1153 len--; 1154 if (stow) { 1155 *ptr = wch; 1156 } 1157 ptr++; 1158 if (len <= 0) 1159 break; 1160 } else { 1161 break; 1162 } 1163 } 1164 } 1165 if (wch == WEOF) { 1166 *flag_eof = 1; 1167 } else { 1168 #ifdef _WIDE 1169 if (len > 0 && _wd_ungetwc(&dummy, wch, iop) == WEOF) 1170 #else /* _WIDE */ 1171 if (len > 0 && _bi_ungetwc(wch, iop) == WEOF) 1172 #endif /* _WIDE */ 1173 *flag_eof = 1; 1174 } 1175 if (ptr == start) 1176 return (0); /* no match */ 1177 if (stow) 1178 *ptr = L'\0'; 1179 return (1); /* successful match */ 1180 } 1181 1182 #ifdef _WIDE 1183 static int 1184 brstring(int *chcount, int *flag_eof, int stow, int type, 1185 int len, FILE *iop, unsigned char *brstr, va_list *listp) 1186 { 1187 wint_t wch; 1188 int i; 1189 char str[MB_LEN_MAX + 1]; /* include null termination */ 1190 char *ptr, *start, *p; 1191 int dummy; 1192 1193 start = ptr = stow ? va_arg(*listp, char *) : NULL; 1194 1195 while ((wch = _wd_getwc(&dummy, iop)) != WEOF) { 1196 p = str; 1197 i = wctomb(str, (wchar_t)wch); 1198 if (i == -1) { 1199 return (0); 1200 } 1201 str[i] = '\0'; 1202 if (fnmatch((const char *)brstr, (const char *)str, 1203 FNM_NOESCAPE)) { 1204 break; 1205 } else { 1206 if (len >= i) { 1207 (*chcount)++; 1208 len -= i; 1209 if (stow) { 1210 while (i-- > 0) { 1211 *ptr++ = *p++; 1212 } 1213 } else { 1214 while (i-- > 0) { 1215 ptr++; 1216 } 1217 } 1218 if (len <= 0) 1219 break; 1220 } else { 1221 break; 1222 } 1223 } 1224 } 1225 if (wch == WEOF) { 1226 *flag_eof = 1; 1227 } else { 1228 if (len > 0 && _wd_ungetwc(&dummy, wch, iop) == WEOF) 1229 *flag_eof = 1; 1230 } 1231 if (ptr == start) 1232 return (0); /* no match */ 1233 if (stow) 1234 *ptr = '\0'; 1235 return (1); /* successful match */ 1236 } 1237 #endif /* _WIDE */ 1238 1239 /* 1240 * Locally define getwc and ungetwc 1241 */ 1242 static int 1243 _bi_getwc(FILE *iop) 1244 { 1245 int c; 1246 wchar_t intcode; 1247 int i, nbytes, cur_max; 1248 char buff[MB_LEN_MAX]; 1249 1250 if ((c = wlocgetc()) == EOF) 1251 return (WEOF); 1252 1253 if (isascii(c)) /* ASCII code */ 1254 return ((wint_t)c); 1255 1256 buff[0] = (char)c; 1257 1258 cur_max = (int)MB_CUR_MAX; 1259 /* MB_CUR_MAX doen't exeed the value of MB_LEN_MAX */ 1260 /* So we use MB_CUR_MAX instead of MB_LEN_MAX for */ 1261 /* improving the performance. */ 1262 for (i = 1; i < cur_max; i++) { 1263 c = wlocgetc(); 1264 if (c == '\n') { 1265 (void) wlocungetc(c); 1266 break; 1267 } 1268 if (c == EOF) { 1269 /* this still may be a valid multibyte character */ 1270 break; 1271 } 1272 buff[i] = (char)c; 1273 } 1274 1275 if ((nbytes = mbtowc(&intcode, buff, i)) == -1) { 1276 /* 1277 * If mbtowc fails, the input was not a legal character. 1278 * ungetc all but one character. 1279 * 1280 * Note: the number of pushback characters that 1281 * ungetc() can handle must be >= (MB_LEN_MAX - 1). 1282 * In Solaris 2.x, the number of pushback 1283 * characters is 4. 1284 */ 1285 while (i-- > 1) { 1286 (void) wlocungetc((signed char)buff[i]); 1287 } 1288 errno = EILSEQ; 1289 return (WEOF); /* Illegal EUC sequence. */ 1290 } 1291 1292 while (i-- > nbytes) { 1293 /* 1294 * Note: the number of pushback characters that 1295 * ungetc() can handle must be >= (MB_LEN_MAX - 1). 1296 * In Solaris 2.x, the number of pushback 1297 * characters is 4. 1298 */ 1299 (void) wlocungetc((signed char)buff[i]); 1300 } 1301 return ((int)intcode); 1302 } 1303 1304 static int 1305 _bi_ungetwc(wint_t wc, FILE *iop) 1306 { 1307 char mbs[MB_LEN_MAX]; 1308 unsigned char *p; 1309 int n; 1310 1311 if ((wc == WEOF) || ((iop->_flag & _IOREAD) == 0)) 1312 return (WEOF); 1313 1314 n = wctomb(mbs, (wchar_t)wc); 1315 if (n <= 0) 1316 return (WEOF); 1317 1318 if (iop->_ptr <= iop->_base) { 1319 if (iop->_base == NULL) { 1320 return (WEOF); 1321 } 1322 if ((iop->_ptr == iop->_base) && (iop->_cnt == 0)) { 1323 ++iop->_ptr; 1324 } else if ((iop->_ptr - n) < (iop->_base - PUSHBACK)) { 1325 return (WEOF); 1326 } 1327 } 1328 1329 p = (unsigned char *)(mbs+n-1); /* p points the last byte */ 1330 /* if _IOWRT is set to iop->_flag, it means this is */ 1331 /* an invocation from sscanf(), and in that time we */ 1332 /* don't touch iop->_cnt. Otherwise, which means an */ 1333 /* invocation from fscanf() or scanf(), we touch iop->_cnt */ 1334 if ((iop->_flag & _IOWRT) == 0) { 1335 /* scanf() and fscanf() */ 1336 iop->_cnt += n; 1337 while (n--) { 1338 *--iop->_ptr = *(p--); 1339 } 1340 } else { 1341 /* sscanf() */ 1342 iop->_ptr -= n; 1343 } 1344 return (wc); 1345 } 1346