1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1988 AT&T */ 28 /* All Rights Reserved */ 29 30 #include "lint.h" 31 #include <sys/types.h> 32 #include "mtlib.h" 33 #include "file64.h" 34 #include <stdio.h> 35 #include <ctype.h> 36 #include <stdarg.h> 37 #include <values.h> 38 #include <errno.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <math.h> 42 #include <thread.h> 43 #include <synch.h> 44 #include <stdlib.h> 45 #include <fnmatch.h> 46 #include <limits.h> 47 #include <wchar.h> 48 #include <unistd.h> 49 #include "libc.h" 50 #include "stdiom.h" 51 #include "xpg6.h" 52 53 #define NCHARS (1 << BITSPERBYTE) 54 55 /* if the _IOWRT flag is set, this must be a call from sscanf */ 56 #define locgetc(cnt) (cnt += 1, (iop->_flag & _IOWRT) ? \ 57 ((*iop->_ptr == '\0') ? EOF : *iop->_ptr++) : \ 58 GETC(iop)) 59 #define locungetc(cnt, x) (cnt -= 1, (x == EOF) ? EOF : \ 60 ((iop->_flag & _IOWRT) ? *(--iop->_ptr) : \ 61 (++iop->_cnt, *(--iop->_ptr)))) 62 63 #define wlocgetc() ((iop->_flag & _IOWRT) ? \ 64 ((*iop->_ptr == '\0') ? EOF : *iop->_ptr++) : \ 65 GETC(iop)) 66 #define wlocungetc(x) ((x == EOF) ? EOF : \ 67 ((iop->_flag & _IOWRT) ? *(--iop->_ptr) : \ 68 UNGETC(x, iop))) 69 70 #define MAXARGS 30 /* max. number of args for fast positional paramters */ 71 72 /* 73 * stva_list is used to subvert C's restriction that a variable with an 74 * array type can not appear on the left hand side of an assignment operator. 75 * By putting the array inside a structure, the functionality of assigning to 76 * the whole array through a simple assignment is achieved.. 77 */ 78 typedef struct stva_list { 79 va_list ap; 80 } stva_list; 81 82 static int number(int *, int *, int, int, int, int, FILE *, va_list *); 83 static int readchar(FILE *, int *); 84 static int string(int *, int *, int, int, int, char *, FILE *, va_list *); 85 static int wstring(int *, int *, int, int, int, FILE *, va_list *); 86 static int wbrstring(int *, int *, int, int, int, FILE *, 87 unsigned char *, va_list *); 88 #ifdef _WIDE 89 static int brstring(int *, int *, int, int, int, FILE *, 90 unsigned char *, va_list *); 91 #endif 92 static int _bi_getwc(FILE *); 93 static int _bi_ungetwc(wint_t, FILE *); 94 95 #ifdef _WIDE 96 static int _mkarglst(const wchar_t *, stva_list, stva_list[]); 97 static wint_t _wd_getwc(int *, FILE *); 98 static wint_t _wd_ungetwc(int *, wchar_t, FILE *); 99 static int _watoi(wchar_t *); 100 #else /* _WIDE */ 101 static int _mkarglst(const char *, stva_list, stva_list[]); 102 #endif /* _WIDE */ 103 104 #ifndef _WIDE 105 int 106 _doscan(FILE *iop, const char *fmt, va_list va_Alist) 107 { 108 int ret; 109 rmutex_t *lk; 110 111 if (iop->_flag & _IOWRT) 112 ret = __doscan_u(iop, fmt, va_Alist, 0); 113 else { 114 FLOCKFILE(lk, iop); 115 ret = __doscan_u(iop, fmt, va_Alist, 0); 116 FUNLOCKFILE(lk); 117 } 118 return (ret); 119 } 120 #endif /* _WIDE */ 121 122 /* ARGSUSED3 */ 123 #ifdef _WIDE 124 int 125 __wdoscan_u(FILE *iop, const wchar_t *fmt, va_list va_Alist, 126 int scflag __unused) 127 #else /* _WIDE */ 128 int 129 __doscan_u(FILE *iop, const char *sfmt, va_list va_Alist, int scflag __unused) 130 #endif /* _WIDE */ 131 { 132 #ifdef _WIDE 133 wchar_t ch; 134 wchar_t inchar, size; 135 int nmatch = 0, len, stow; 136 #else /* _WIDE */ 137 int ch; 138 int nmatch = 0, len, inchar, stow, size; 139 #endif /* _WIDE */ 140 141 unsigned char *bracket_str = NULL; 142 int chcount, flag_eof; 143 char tab[NCHARS]; 144 145 /* variables for postional parameters */ 146 #ifdef _WIDE 147 const wchar_t *sformat = fmt; /* save the beginning of the format */ 148 #else /* _WIDE */ 149 const unsigned char *fmt = (const unsigned char *)sfmt; 150 const char *sformat = sfmt; /* save the beginning of the format */ 151 #endif /* _WIDE */ 152 int fpos = 1; /* 1 if first postional parameter */ 153 stva_list args; /* used to step through the argument list */ 154 stva_list sargs; /* used to save start of the argument list */ 155 stva_list arglst[MAXARGS]; 156 /* 157 * array giving the appropriate values 158 * for va_arg() to retrieve the 159 * corresponding argument: 160 * arglst[0] is the first argument 161 * arglst[1] is the second argument,etc. 162 */ 163 /* Check if readable stream */ 164 if (!(iop->_flag & (_IOREAD | _IORW))) { 165 errno = EBADF; 166 return (EOF); 167 } 168 169 /* 170 * Initialize args and sargs to the start of the argument list. 171 * We don't know any portable way to copy an arbitrary C object 172 * so we use a system-specific routine(probably a macro) from 173 * stdarg.h. (Remember that if va_list is an array, in_args will 174 * be a pointer and &in_args won't be what we would want for 175 * memcpy.) 176 */ 177 va_copy(args.ap, va_Alist); 178 179 sargs = args; 180 181 chcount = 0; flag_eof = 0; 182 183 /* 184 * **************************************************** 185 * Main loop: reads format to determine a pattern, 186 * and then goes to read input stream 187 * in attempt to match the pattern. 188 * **************************************************** 189 */ 190 for (; ; ) { 191 if ((ch = *fmt++) == '\0') { 192 return (nmatch); /* end of format */ 193 } 194 #ifdef _WIDE 195 if (iswspace(ch)) { 196 if (!flag_eof) { 197 while (iswspace(inchar = 198 _wd_getwc(&chcount, iop))) 199 ; 200 if (_wd_ungetwc(&chcount, inchar, iop) == WEOF) 201 flag_eof = 1; 202 } 203 continue; 204 } 205 if (ch != '%' || (ch = *fmt++) == '%') { 206 if (ch == '%') { 207 if (!flag_eof) { 208 while (iswspace(inchar = 209 _wd_getwc(&chcount, iop))) 210 ; 211 if (_wd_ungetwc(&chcount, inchar, iop) 212 == WEOF) 213 flag_eof = 1; 214 } 215 } 216 if ((inchar = _wd_getwc(&chcount, iop)) == ch) 217 continue; 218 if (_wd_ungetwc(&chcount, inchar, iop) != WEOF) { 219 return (nmatch); /* failed to match input */ 220 } 221 break; 222 } 223 #else /* _WIDE */ 224 if (isspace(ch)) { 225 if (!flag_eof) { 226 while (isspace(inchar = locgetc(chcount))) 227 ; 228 if (locungetc(chcount, inchar) == EOF) 229 flag_eof = 1; 230 231 } 232 continue; 233 } 234 if (ch != '%' || (ch = *fmt++) == '%') { 235 if (ch == '%') { 236 if (!flag_eof) { 237 while (isspace(inchar = 238 locgetc(chcount))) 239 ; 240 if (locungetc(chcount, inchar) == EOF) 241 flag_eof = 1; 242 } 243 } 244 if ((inchar = locgetc(chcount)) == ch) 245 continue; 246 if (locungetc(chcount, inchar) != EOF) { 247 return (nmatch); /* failed to match input */ 248 } 249 break; 250 } 251 #endif /* _WIDE */ 252 253 charswitch: /* target of a goto 8-( */ 254 255 if (ch == '*') { 256 stow = 0; 257 ch = *fmt++; 258 } else 259 stow = 1; 260 261 #ifdef _WIDE 262 for (len = 0; ((ch >= 0) && (ch < 256) && isdigit(ch)); 263 ch = *fmt++) 264 len = len * 10 + ch - '0'; 265 #else /* _WIDE */ 266 for (len = 0; isdigit(ch); ch = *fmt++) 267 len = len * 10 + ch - '0'; 268 #endif /* _WIDE */ 269 270 if (ch == '$') { 271 /* 272 * positional parameter handling - the number 273 * specified in len gives the argument to which 274 * the next conversion should be applied. 275 * WARNING: This implementation of positional 276 * parameters assumes that the sizes of all pointer 277 * types are the same. (Code similar to that 278 * in the portable doprnt.c should be used if this 279 * assumption does not hold for a particular 280 * port.) 281 */ 282 if (fpos) { 283 if (_mkarglst(sformat, sargs, arglst) != 0) { 284 return (EOF); 285 } else { 286 fpos = 0; 287 } 288 } 289 if (len <= MAXARGS) { 290 args = arglst[len - 1]; 291 } else { 292 args = arglst[MAXARGS - 1]; 293 for (len -= MAXARGS; len > 0; len--) 294 (void) va_arg(args.ap, void *); 295 } 296 len = 0; 297 ch = *fmt++; 298 goto charswitch; 299 } 300 301 if (len == 0) 302 len = MAXINT; 303 #ifdef _WIDE 304 if ((size = ch) == 'l' || (size == 'h') || (size == 'L') || 305 (size == 'j') || (size == 't') || (size == 'z')) 306 ch = *fmt++; 307 #else /* _WIDE */ 308 if ((size = ch) == 'l' || (size == 'h') || (size == 'L') || 309 (size == 'w') || (size == 'j') || (size == 't') || 310 (size == 'z')) 311 ch = *fmt++; 312 #endif /* _WIDE */ 313 if (size == 'l' && ch == 'l') { 314 size = 'm'; /* size = 'm' if long long */ 315 ch = *fmt++; 316 } else if (size == 'h' && ch == 'h') { 317 size = 'b'; /* use size = 'b' if char */ 318 ch = *fmt++; 319 } else if ((size == 't') || (size == 'z')) { 320 size = 'l'; 321 } else if (size == 'j') { 322 #ifndef _LP64 323 /* check scflag for size of u/intmax_t (32-bit libc) */ 324 if (!(scflag & _F_INTMAX32)) { 325 #endif 326 size = 'm'; 327 #ifndef _LP64 328 } 329 #endif 330 } 331 if (ch == '\0') { 332 return (EOF); /* unexpected end of format */ 333 } 334 #ifdef _WIDE 335 if (ch == '[') { 336 wchar_t c; 337 size_t len; 338 int negflg = 0; 339 wchar_t *p; 340 wchar_t *wbracket_str; 341 size_t wlen, clen; 342 343 /* p points to the address of '[' */ 344 p = (wchar_t *)fmt - 1; 345 len = 0; 346 if (*fmt == '^') { 347 len++; 348 fmt++; 349 negflg = 1; 350 } 351 if (((c = *fmt) == ']') || (c == '-')) { 352 len++; 353 fmt++; 354 } 355 while ((c = *fmt) != ']') { 356 if (c == '\0') { 357 return (EOF); /* unexpected EOF */ 358 } else { 359 len++; 360 fmt++; 361 } 362 } 363 fmt++; 364 len += 2; 365 wbracket_str = (wchar_t *) 366 malloc(sizeof (wchar_t) * (len + 1)); 367 if (wbracket_str == NULL) { 368 errno = ENOMEM; 369 return (EOF); 370 } else { 371 (void) wmemcpy(wbracket_str, 372 (const wchar_t *)p, len); 373 *(wbracket_str + len) = L'\0'; 374 if (negflg && *(wbracket_str + 1) == '^') { 375 *(wbracket_str + 1) = L'!'; 376 } 377 } 378 wlen = wcslen(wbracket_str); 379 clen = wcstombs((char *)NULL, wbracket_str, 0); 380 if (clen == (size_t)-1) { 381 free(wbracket_str); 382 return (EOF); 383 } 384 bracket_str = (unsigned char *) 385 malloc(sizeof (unsigned char) * (clen + 1)); 386 if (bracket_str == NULL) { 387 free(wbracket_str); 388 errno = ENOMEM; 389 return (EOF); 390 } 391 clen = wcstombs((char *)bracket_str, wbracket_str, 392 wlen + 1); 393 free(wbracket_str); 394 if (clen == (size_t)-1) { 395 free(bracket_str); 396 return (EOF); 397 } 398 } 399 #else /* _WIDE */ 400 if (ch == '[') { 401 if (size == 'l') { 402 int c, len, i; 403 int negflg = 0; 404 unsigned char *p; 405 406 p = (unsigned char *)(fmt - 1); 407 len = 0; 408 if (*fmt == '^') { 409 len++; 410 fmt++; 411 negflg = 1; 412 } 413 if (((c = *fmt) == ']') || (c == '-')) { 414 len++; 415 fmt++; 416 } 417 while ((c = *fmt) != ']') { 418 if (c == '\0') { 419 return (EOF); 420 } else if (isascii(c)) { 421 len++; 422 fmt++; 423 } else { 424 i = mblen((const char *)fmt, 425 MB_CUR_MAX); 426 if (i <= 0) { 427 return (EOF); 428 } else { 429 len += i; 430 fmt += i; 431 } 432 } 433 } 434 fmt++; 435 len += 2; 436 bracket_str = (unsigned char *) 437 malloc(sizeof (unsigned char) * (len + 1)); 438 if (bracket_str == NULL) { 439 errno = ENOMEM; 440 return (EOF); 441 } else { 442 (void) strncpy((char *)bracket_str, 443 (const char *)p, len); 444 *(bracket_str + len) = '\0'; 445 if (negflg && 446 *(bracket_str + 1) == '^') { 447 *(bracket_str + 1) = '!'; 448 } 449 } 450 } else { 451 int t = 0; 452 int b, c, d; 453 454 if (*fmt == '^') { 455 t++; 456 fmt++; 457 } 458 (void) memset(tab, !t, NCHARS); 459 if ((c = *fmt) == ']' || c == '-') { 460 tab[c] = t; 461 fmt++; 462 } 463 464 while ((c = *fmt) != ']') { 465 if (c == '\0') { 466 return (EOF); 467 } 468 b = *(fmt - 1); 469 d = *(fmt + 1); 470 if ((c == '-') && (d != ']') && 471 (b < d)) { 472 (void) memset(&tab[b], t, 473 d - b + 1); 474 fmt += 2; 475 } else { 476 tab[c] = t; 477 fmt++; 478 } 479 } 480 fmt++; 481 } 482 } 483 #endif /* _WIDE */ 484 485 #ifdef _WIDE 486 if ((ch >= 0) && (ch < 256) && 487 isupper((int)ch)) { /* no longer documented */ 488 if (_lib_version == c_issue_4) { 489 if (size != 'm' && size != 'L') 490 size = 'l'; 491 } 492 ch = _tolower((int)ch); 493 } 494 if (ch != 'n' && !flag_eof) { 495 if (ch != 'c' && ch != 'C' && ch != '[') { 496 while (iswspace(inchar = 497 _wd_getwc(&chcount, iop))) 498 ; 499 if (_wd_ungetwc(&chcount, inchar, iop) == WEOF) 500 break; 501 502 } 503 } 504 #else /* _WIDE */ 505 if (isupper(ch)) { /* no longer documented */ 506 if (_lib_version == c_issue_4) { 507 if (size != 'm' && size != 'L') 508 size = 'l'; 509 } 510 ch = _tolower(ch); 511 } 512 if (ch != 'n' && !flag_eof) { 513 if (ch != 'c' && ch != 'C' && ch != '[') { 514 while (isspace(inchar = locgetc(chcount))) 515 ; 516 if (locungetc(chcount, inchar) == EOF) 517 break; 518 } 519 } 520 #endif /* _WIDE */ 521 522 switch (ch) { 523 case 'C': 524 case 'S': 525 case 'c': 526 case 's': 527 #ifdef _WIDE 528 if ((size == 'l') || (size == 'C') || (size == 'S')) 529 #else /* _WIDE */ 530 if ((size == 'w') || (size == 'l') || (size == 'C') || 531 (size == 'S')) 532 #endif /* _WIDE */ 533 { 534 size = wstring(&chcount, &flag_eof, stow, 535 (int)ch, len, iop, &args.ap); 536 } else { 537 size = string(&chcount, &flag_eof, stow, 538 (int)ch, len, tab, iop, &args.ap); 539 } 540 break; 541 case '[': 542 if (size == 'l') { 543 size = wbrstring(&chcount, &flag_eof, stow, 544 (int)ch, len, iop, bracket_str, &args.ap); 545 free(bracket_str); 546 bracket_str = NULL; 547 } else { 548 #ifdef _WIDE 549 size = brstring(&chcount, &flag_eof, stow, 550 (int)ch, len, iop, bracket_str, &args.ap); 551 free(bracket_str); 552 bracket_str = NULL; 553 #else /* _WIDE */ 554 size = string(&chcount, &flag_eof, stow, 555 ch, len, tab, iop, &args.ap); 556 #endif /* _WIDE */ 557 } 558 break; 559 560 case 'n': 561 if (stow == 0) 562 continue; 563 if (size == 'b') /* char */ 564 *va_arg(args.ap, char *) = (char)chcount; 565 else if (size == 'h') 566 *va_arg(args.ap, short *) = (short)chcount; 567 else if (size == 'l') 568 *va_arg(args.ap, long *) = (long)chcount; 569 else if (size == 'm') /* long long */ 570 *va_arg(args.ap, long long *) = 571 (long long) chcount; 572 else 573 *va_arg(args.ap, int *) = (int)chcount; 574 continue; 575 576 case 'i': 577 default: 578 size = number(&chcount, &flag_eof, stow, (int)ch, 579 len, (int)size, iop, &args.ap); 580 break; 581 } 582 if (size) 583 nmatch += stow; 584 else { 585 return ((flag_eof && !nmatch) ? EOF : nmatch); 586 } 587 continue; 588 } 589 if (bracket_str) 590 free(bracket_str); 591 return (nmatch != 0 ? nmatch : EOF); /* end of input */ 592 } 593 594 /* ****************************************************************** */ 595 /* Functions to read the input stream in an attempt to match incoming */ 596 /* data to the current pattern from the main loop of _doscan(). */ 597 /* ****************************************************************** */ 598 static int 599 number(int *chcount, int *flag_eof, int stow, int type, int len, int size, 600 FILE *iop, va_list *listp) 601 { 602 char numbuf[64]; 603 char *np = numbuf; 604 int c, base, inchar, lookahead; 605 int digitseen = 0, floater = 0, negflg = 0; 606 int lc; 607 long long lcval = 0LL; 608 609 switch (type) { 610 case 'e': 611 case 'f': 612 case 'g': 613 /* 614 * lc = 0 corresponds to c90 mode: do not recognize 615 * hexadecimal fp strings; attempt to push back 616 * all unused characters read 617 * 618 * lc = -1 corresponds to c99 mode: recognize hexa- 619 * decimal fp strings; push back at most one 620 * unused character 621 */ 622 lc = (__xpg6 & _C99SUSv3_recognize_hexfp)? -1 : 0; 623 floater = 1; 624 break; 625 626 case 'a': 627 lc = -1; 628 floater = 1; 629 break; 630 631 case 'd': 632 case 'u': 633 case 'i': 634 base = 10; 635 break; 636 case 'o': 637 base = 8; 638 break; 639 case 'p': 640 #ifdef _LP64 641 size = 'l'; /* pointers are long in LP64 */ 642 #endif /* _LP64 */ 643 /* FALLTHROUGH */ 644 case 'x': 645 base = 16; 646 break; 647 default: 648 return (0); /* unrecognized conversion character */ 649 } 650 651 if (floater != 0) { 652 /* 653 * Handle floating point with 654 * file_to_decimal. 655 */ 656 decimal_mode dm; 657 decimal_record dr; 658 fp_exception_field_type efs; 659 enum decimal_string_form form; 660 char *echar; 661 int nread; 662 char buffer[1024+1]; 663 char *nb = buffer; 664 665 if (len > 1024) 666 len = 1024; 667 file_to_decimal(&nb, len, lc, &dr, &form, &echar, iop, &nread); 668 if (lc == -1) { 669 /* 670 * In C99 mode, the entire string read has to be 671 * accepted in order to qualify as a match 672 */ 673 if (nb != buffer + nread) 674 form = invalid_form; 675 } 676 if (stow && (form != invalid_form)) { 677 #if defined(__sparc) 678 dm.rd = _QgetRD(); 679 if (size == 'L') { /* long double */ 680 if ((int)form < 0) 681 __hex_to_quadruple(&dr, dm.rd, 682 va_arg(*listp, quadruple *), &efs); 683 else 684 decimal_to_quadruple( 685 va_arg(*listp, quadruple *), 686 &dm, &dr, &efs); 687 } 688 #elif defined(__i386) || defined(__amd64) 689 dm.rd = __xgetRD(); 690 if (size == 'L') { /* long double */ 691 if ((int)form < 0) 692 __hex_to_extended(&dr, dm.rd, 693 va_arg(*listp, extended *), &efs); 694 else 695 decimal_to_extended( 696 va_arg(*listp, extended *), 697 &dm, &dr, &efs); 698 } 699 #else 700 #error Unknown architecture 701 #endif 702 else if (size == 'l') { /* double */ 703 if ((int)form < 0) 704 __hex_to_double(&dr, dm.rd, 705 va_arg(*listp, double *), &efs); 706 else 707 decimal_to_double( 708 va_arg(*listp, double *), 709 &dm, &dr, &efs); 710 } else { /* float */ 711 if ((int)form < 0) 712 __hex_to_single(&dr, dm.rd, 713 va_arg(*listp, single *), &efs); 714 else 715 decimal_to_single((single *) 716 va_arg(*listp, single *), 717 &dm, &dr, &efs); 718 } 719 if ((efs & (1 << fp_overflow)) != 0) { 720 errno = ERANGE; 721 } 722 if ((efs & (1 << fp_underflow)) != 0) { 723 errno = ERANGE; 724 } 725 } 726 (*chcount) += nread; /* Count characters read. */ 727 c = locgetc((*chcount)); 728 if (locungetc((*chcount), c) == EOF) 729 *flag_eof = 1; 730 return ((form == invalid_form) ? 0 : 1); 731 /* successful match if non-zero */ 732 } 733 734 switch (c = locgetc((*chcount))) { 735 case '-': 736 negflg++; 737 /* FALLTHROUGH */ 738 case '+': 739 if (--len <= 0) 740 break; 741 if ((c = locgetc((*chcount))) != '0') 742 break; 743 /* FALLTHROUGH */ 744 case '0': 745 /* 746 * If %i or %x, the characters 0x or 0X may optionally precede 747 * the sequence of letters and digits (base 16). 748 */ 749 if ((type != 'i' && type != 'x') || (len <= 1)) 750 break; 751 if (((inchar = locgetc((*chcount))) == 'x') || 752 (inchar == 'X')) { 753 lookahead = readchar(iop, chcount); 754 if (isxdigit(lookahead)) { 755 base = 16; 756 757 if (len <= 2) { 758 (void) locungetc((*chcount), lookahead); 759 /* Take into account the 'x' */ 760 len -= 1; 761 } else { 762 c = lookahead; 763 /* Take into account '0x' */ 764 len -= 2; 765 } 766 } else { 767 (void) locungetc((*chcount), lookahead); 768 (void) locungetc((*chcount), inchar); 769 } 770 } else { 771 /* inchar wans't 'x'. */ 772 (void) locungetc((*chcount), inchar); /* Put it back. */ 773 if (type == 'i') /* Only %i accepts an octal. */ 774 base = 8; 775 } 776 } 777 for (; --len >= 0; *np++ = (char)c, c = locgetc((*chcount))) { 778 if (np > numbuf + 62) { 779 errno = ERANGE; 780 return (0); 781 } 782 if (isdigit(c) || base == 16 && isxdigit(c)) { 783 int digit = c - (isdigit(c) ? '0' : 784 isupper(c) ? 'A' - 10 : 'a' - 10); 785 if (digit >= base) 786 break; 787 if (stow) 788 lcval = base * lcval + digit; 789 digitseen++; 790 continue; 791 } 792 break; 793 } 794 795 if (stow && digitseen) { 796 /* suppress possible overflow on 2's-comp negation */ 797 if (negflg && lcval != (1ULL << 63)) 798 lcval = -lcval; 799 switch (size) { 800 case 'm': 801 *va_arg(*listp, long long *) = lcval; 802 break; 803 case 'l': 804 *va_arg(*listp, long *) = (long)lcval; 805 break; 806 case 'h': 807 *va_arg(*listp, short *) = (short)lcval; 808 break; 809 case 'b': 810 *va_arg(*listp, char *) = (char)lcval; 811 break; 812 default: 813 *va_arg(*listp, int *) = (int)lcval; 814 break; 815 } 816 } 817 if (locungetc((*chcount), c) == EOF) 818 *flag_eof = 1; 819 return (digitseen); /* successful match if non-zero */ 820 } 821 822 /* Get a character. If not using sscanf and at the buffer's end */ 823 /* then do a direct read(). Characters read via readchar() */ 824 /* can be pushed back on the input stream by locungetc((*chcount),) */ 825 /* since there is padding allocated at the end of the stream buffer. */ 826 static int 827 readchar(FILE *iop, int *chcount) 828 { 829 int inchar; 830 char buf[1]; 831 832 if ((iop->_flag & _IOWRT) || (iop->_cnt != 0)) { 833 inchar = locgetc((*chcount)); 834 } else { 835 if (_xread(iop, buf, 1) != 1) 836 return (EOF); 837 inchar = (int)buf[0]; 838 (*chcount) += 1; 839 } 840 return (inchar); 841 } 842 843 static int 844 string(int *chcount, int *flag_eof, int stow, int type, int len, 845 char *tab __unused, FILE *iop, va_list *listp) 846 { 847 int ch; 848 char *ptr; 849 char *start; 850 851 start = ptr = stow ? va_arg(*listp, char *) : NULL; 852 if (((type == 'c') || (type == 'C')) && len == MAXINT) 853 len = 1; 854 #ifdef _WIDE 855 while ((ch = locgetc((*chcount))) != EOF && 856 !(((type == 's') || (type == 'S')) && isspace(ch))) { 857 #else /* _WIDE */ 858 while ((ch = locgetc((*chcount))) != EOF && 859 !(((type == 's') || (type == 'S')) && 860 isspace(ch) || type == '[' && tab[ch])) { 861 #endif /* _WIDE */ 862 if (stow) 863 *ptr = (char)ch; 864 ptr++; 865 if (--len <= 0) 866 break; 867 } 868 if (ch == EOF) { 869 (*flag_eof) = 1; 870 (*chcount) -= 1; 871 } else if (len > 0 && locungetc((*chcount), ch) == EOF) 872 (*flag_eof) = 1; 873 if (ptr == start) 874 return (0); /* no match */ 875 if (stow && ((type != 'c') && (type != 'C'))) 876 *ptr = '\0'; 877 return (1); /* successful match */ 878 } 879 880 /* This function initializes arglst, to contain the appropriate */ 881 /* va_list values for the first MAXARGS arguments. */ 882 /* WARNING: this code assumes that the sizes of all pointer types */ 883 /* are the same. (Code similar to that in the portable doprnt.c */ 884 /* should be used if this assumption is not true for a */ 885 /* particular port.) */ 886 887 #ifdef _WIDE 888 static int 889 _mkarglst(const wchar_t *fmt, stva_list args, stva_list arglst[]) 890 #else /* _WIDE */ 891 static int 892 _mkarglst(const char *fmt, stva_list args, stva_list arglst[]) 893 #endif /* _WIDE */ 894 { 895 #ifdef _WIDE 896 #define STRCHR wcschr 897 #define STRSPN wcsspn 898 #define ATOI(x) _watoi((wchar_t *)x) 899 #define SPNSTR1 L"01234567890" 900 #define SPNSTR2 L"# +-.0123456789hL$" 901 #else /* _WIDE */ 902 #define STRCHR strchr 903 #define STRSPN strspn 904 #define ATOI(x) atoi(x) 905 #define SPNSTR1 "01234567890" 906 #define SPNSTR2 "# +-.0123456789hL$" 907 #endif /* _WIDE */ 908 909 int maxnum, curargno; 910 size_t n; 911 912 maxnum = -1; 913 curargno = 0; 914 915 while ((fmt = STRCHR(fmt, '%')) != NULL) { 916 fmt++; /* skip % */ 917 if (*fmt == '*' || *fmt == '%') 918 continue; 919 if (fmt[n = STRSPN(fmt, SPNSTR1)] == L'$') { 920 /* convert to zero base */ 921 curargno = ATOI(fmt) - 1; 922 fmt += n + 1; 923 } 924 925 if (maxnum < curargno) 926 maxnum = curargno; 927 curargno++; /* default to next in list */ 928 929 fmt += STRSPN(fmt, SPNSTR2); 930 if (*fmt == '[') { 931 fmt++; /* has to be at least on item in scan list */ 932 if (*fmt == ']') { 933 fmt++; 934 } 935 while (*fmt != ']') { 936 if (*fmt == L'\0') { 937 return (-1); /* bad format */ 938 #ifdef _WIDE 939 } else { 940 fmt++; 941 } 942 #else /* _WIDE */ 943 } else if (isascii(*fmt)) { 944 fmt++; 945 } else { 946 int i; 947 948 i = mblen((const char *) 949 fmt, MB_CUR_MAX); 950 if (i <= 0) { 951 return (-1); 952 } else { 953 fmt += i; 954 } 955 } 956 #endif /* _WIDE */ 957 } 958 } 959 } 960 if (maxnum > MAXARGS) 961 maxnum = MAXARGS; 962 for (n = 0; n <= maxnum; n++) { 963 arglst[n] = args; 964 (void) va_arg(args.ap, void *); 965 } 966 return (0); 967 } 968 969 970 /* 971 * For wide character handling 972 */ 973 974 #ifdef _WIDE 975 static int 976 wstring(int *chcount, int *flag_eof, int stow, int type, 977 int len, FILE *iop, va_list *listp) 978 { 979 wint_t wch; 980 wchar_t *ptr; 981 wchar_t *wstart; 982 983 wstart = ptr = stow ? va_arg(*listp, wchar_t *) : NULL; 984 985 if ((type == 'c') && len == MAXINT) 986 len = 1; 987 while (((wch = _wd_getwc(chcount, iop)) != WEOF) && 988 !(type == 's' && iswspace(wch))) { 989 if (stow) 990 *ptr = wch; 991 ptr++; 992 if (--len <= 0) 993 break; 994 } 995 if (wch == WEOF) { 996 *flag_eof = 1; 997 (*chcount) -= 1; 998 } else { 999 if (len > 0 && _wd_ungetwc(chcount, wch, iop) == WEOF) 1000 *flag_eof = 1; 1001 } 1002 if (ptr == wstart) 1003 return (0); /* no match */ 1004 if (stow && (type != 'c')) 1005 *ptr = '\0'; 1006 return (1); /* successful match */ 1007 } 1008 1009 #else /* _WIDE */ 1010 static int 1011 wstring(int *chcount, int *flag_eof, int stow, int type, int len, FILE *iop, 1012 va_list *listp) 1013 { 1014 int wch; 1015 wchar_t *ptr; 1016 wchar_t *wstart; 1017 1018 wstart = ptr = stow ? va_arg(*listp, wchar_t *) : NULL; 1019 1020 if ((type == 'c') && len == MAXINT) 1021 len = 1; 1022 while (((wch = _bi_getwc(iop)) != EOF) && 1023 !(type == 's' && (isascii(wch) ? isspace(wch) : 0))) { 1024 (*chcount) += _scrwidth((wchar_t)wch); 1025 if (stow) 1026 *ptr = wch; 1027 ptr++; 1028 if (--len <= 0) 1029 break; 1030 } 1031 if (wch == EOF) { 1032 (*flag_eof) = 1; 1033 (*chcount) -= 1; 1034 } else { 1035 if (len > 0 && _bi_ungetwc(wch, iop) == EOF) 1036 (*flag_eof) = 1; 1037 } 1038 if (ptr == wstart) 1039 return (0); /* no match */ 1040 if (stow && (type != 'c')) 1041 *ptr = '\0'; 1042 return (1); /* successful match */ 1043 } 1044 #endif /* _WIDE */ 1045 1046 #ifdef _WIDE 1047 static wint_t 1048 _wd_getwc(int *chcount, FILE *iop) 1049 { 1050 wint_t wc; 1051 int len; 1052 1053 if (!(iop->_flag & _IOWRT)) { 1054 /* call from fwscanf, wscanf */ 1055 wc = __fgetwc_xpg5(iop); 1056 (*chcount)++; 1057 return (wc); 1058 } else { 1059 /* call from swscanf */ 1060 if (*iop->_ptr == '\0') 1061 return (WEOF); 1062 len = mbtowc((wchar_t *)&wc, (const char *)iop->_ptr, 1063 MB_CUR_MAX); 1064 if (len == -1) 1065 return (WEOF); 1066 iop->_ptr += len; 1067 (*chcount)++; 1068 return (wc); 1069 } 1070 } 1071 1072 static wint_t 1073 _wd_ungetwc(int *chcount, wchar_t wc, FILE *iop) 1074 { 1075 wint_t ret; 1076 int len; 1077 char mbs[MB_LEN_MAX]; 1078 1079 if (wc == WEOF) 1080 return (WEOF); 1081 1082 if (!(iop->_flag & _IOWRT)) { 1083 /* call from fwscanf, wscanf */ 1084 ret = __ungetwc_xpg5((wint_t)wc, iop); 1085 if (ret != (wint_t)wc) 1086 return (WEOF); 1087 (*chcount)--; 1088 return (ret); 1089 } else { 1090 /* call from swscanf */ 1091 len = wctomb(mbs, wc); 1092 if (len == -1) 1093 return (WEOF); 1094 iop->_ptr -= len; 1095 (*chcount)--; 1096 return ((wint_t)wc); 1097 } 1098 } 1099 1100 static int 1101 _watoi(wchar_t *fmt) 1102 { 1103 int n = 0; 1104 wchar_t ch; 1105 1106 ch = *fmt; 1107 if ((ch >= 0) && (ch < 256) && isdigit((int)ch)) { 1108 n = ch - '0'; 1109 while (((ch = *++fmt) >= 0) && (ch < 256) && 1110 isdigit((int)ch)) { 1111 n *= 10; 1112 n += ch - '0'; 1113 } 1114 } 1115 return (n); 1116 } 1117 #endif /* _WIDE */ 1118 1119 static int 1120 wbrstring(int *chcount, int *flag_eof, int stow, int type __unused, 1121 int len, FILE *iop, unsigned char *brstr, va_list *listp) 1122 { 1123 wint_t wch; 1124 int i; 1125 char str[MB_LEN_MAX + 1]; /* include null termination */ 1126 wchar_t *ptr, *start; 1127 #ifdef _WIDE 1128 int dummy; 1129 #endif /* _WIDE */ 1130 1131 start = ptr = stow ? va_arg(*listp, wchar_t *) : NULL; 1132 1133 #ifdef _WIDE 1134 while ((wch = _wd_getwc(&dummy, iop)) != WEOF) { 1135 #else /* _WIDE */ 1136 while ((wch = _bi_getwc(iop)) != WEOF) { 1137 #endif /* _WIDE */ 1138 i = wctomb(str, (wchar_t)wch); 1139 if (i == -1) { 1140 return (0); 1141 } 1142 str[i] = '\0'; 1143 if (fnmatch((const char *)brstr, (const char *)str, 1144 FNM_NOESCAPE)) { 1145 break; 1146 } else { 1147 if (len > 0) { 1148 #ifdef _WIDE 1149 (*chcount)++; 1150 #else /* _WIDE */ 1151 (*chcount) += _scrwidth(wch); 1152 #endif /* _WIDE */ 1153 len--; 1154 if (stow) { 1155 *ptr = wch; 1156 } 1157 ptr++; 1158 if (len <= 0) 1159 break; 1160 } else { 1161 break; 1162 } 1163 } 1164 } 1165 if (wch == WEOF) { 1166 *flag_eof = 1; 1167 } else { 1168 #ifdef _WIDE 1169 if (len > 0 && _wd_ungetwc(&dummy, wch, iop) == WEOF) 1170 #else /* _WIDE */ 1171 if (len > 0 && _bi_ungetwc(wch, iop) == WEOF) 1172 #endif /* _WIDE */ 1173 *flag_eof = 1; 1174 } 1175 if (ptr == start) 1176 return (0); /* no match */ 1177 if (stow) 1178 *ptr = L'\0'; 1179 return (1); /* successful match */ 1180 } 1181 1182 #ifdef _WIDE 1183 static int 1184 brstring(int *chcount, int *flag_eof, int stow, int type __unused, 1185 int len, FILE *iop, unsigned char *brstr, va_list *listp) 1186 { 1187 wint_t wch; 1188 int i; 1189 char str[MB_LEN_MAX + 1]; /* include null termination */ 1190 char *ptr, *start, *p; 1191 int dummy; 1192 1193 start = ptr = stow ? va_arg(*listp, char *) : NULL; 1194 1195 while ((wch = _wd_getwc(&dummy, iop)) != WEOF) { 1196 p = str; 1197 i = wctomb(str, (wchar_t)wch); 1198 if (i == -1) { 1199 return (0); 1200 } 1201 str[i] = '\0'; 1202 if (fnmatch((const char *)brstr, (const char *)str, 1203 FNM_NOESCAPE)) { 1204 break; 1205 } else { 1206 if (len >= i) { 1207 (*chcount)++; 1208 len -= i; 1209 if (stow) { 1210 while (i-- > 0) { 1211 *ptr++ = *p++; 1212 } 1213 } else { 1214 while (i-- > 0) { 1215 ptr++; 1216 } 1217 } 1218 if (len <= 0) 1219 break; 1220 } else { 1221 break; 1222 } 1223 } 1224 } 1225 if (wch == WEOF) { 1226 *flag_eof = 1; 1227 } else { 1228 if (len > 0 && _wd_ungetwc(&dummy, wch, iop) == WEOF) 1229 *flag_eof = 1; 1230 } 1231 if (ptr == start) 1232 return (0); /* no match */ 1233 if (stow) 1234 *ptr = '\0'; 1235 return (1); /* successful match */ 1236 } 1237 #endif /* _WIDE */ 1238 1239 /* 1240 * Locally define getwc and ungetwc 1241 */ 1242 static int 1243 _bi_getwc(FILE *iop) 1244 { 1245 int c; 1246 wchar_t intcode; 1247 int i, nbytes, cur_max; 1248 char buff[MB_LEN_MAX]; 1249 1250 if ((c = wlocgetc()) == EOF) 1251 return (WEOF); 1252 1253 if (isascii(c)) /* ASCII code */ 1254 return ((wint_t)c); 1255 1256 buff[0] = (char)c; 1257 1258 cur_max = (int)MB_CUR_MAX; 1259 /* MB_CUR_MAX doen't exeed the value of MB_LEN_MAX */ 1260 /* So we use MB_CUR_MAX instead of MB_LEN_MAX for */ 1261 /* improving the performance. */ 1262 for (i = 1; i < cur_max; i++) { 1263 c = wlocgetc(); 1264 if (c == '\n') { 1265 (void) wlocungetc(c); 1266 break; 1267 } 1268 if (c == EOF) { 1269 /* this still may be a valid multibyte character */ 1270 break; 1271 } 1272 buff[i] = (char)c; 1273 } 1274 1275 if ((nbytes = mbtowc(&intcode, buff, i)) == -1) { 1276 /* 1277 * If mbtowc fails, the input was not a legal character. 1278 * ungetc all but one character. 1279 * 1280 * Note: the number of pushback characters that 1281 * ungetc() can handle must be >= (MB_LEN_MAX - 1). 1282 * In Solaris 2.x, the number of pushback 1283 * characters is 4. 1284 */ 1285 while (i-- > 1) { 1286 (void) wlocungetc((signed char)buff[i]); 1287 } 1288 errno = EILSEQ; 1289 return (WEOF); /* Illegal EUC sequence. */ 1290 } 1291 1292 while (i-- > nbytes) { 1293 /* 1294 * Note: the number of pushback characters that 1295 * ungetc() can handle must be >= (MB_LEN_MAX - 1). 1296 * In Solaris 2.x, the number of pushback 1297 * characters is 4. 1298 */ 1299 (void) wlocungetc((signed char)buff[i]); 1300 } 1301 return ((int)intcode); 1302 } 1303 1304 static int 1305 _bi_ungetwc(wint_t wc, FILE *iop) 1306 { 1307 char mbs[MB_LEN_MAX]; 1308 unsigned char *p; 1309 int n; 1310 1311 if ((wc == WEOF) || ((iop->_flag & _IOREAD) == 0)) 1312 return (WEOF); 1313 1314 n = wctomb(mbs, (wchar_t)wc); 1315 if (n <= 0) 1316 return (WEOF); 1317 1318 if (iop->_ptr <= iop->_base) { 1319 if (iop->_base == NULL) { 1320 return (WEOF); 1321 } 1322 if ((iop->_ptr == iop->_base) && (iop->_cnt == 0)) { 1323 ++iop->_ptr; 1324 } else if ((iop->_ptr - n) < (iop->_base - PUSHBACK)) { 1325 return (WEOF); 1326 } 1327 } 1328 1329 p = (unsigned char *)(mbs+n-1); /* p points the last byte */ 1330 /* if _IOWRT is set to iop->_flag, it means this is */ 1331 /* an invocation from sscanf(), and in that time we */ 1332 /* don't touch iop->_cnt. Otherwise, which means an */ 1333 /* invocation from fscanf() or scanf(), we touch iop->_cnt */ 1334 if ((iop->_flag & _IOWRT) == 0) { 1335 /* scanf() and fscanf() */ 1336 iop->_cnt += n; 1337 while (n--) { 1338 *--iop->_ptr = *(p--); 1339 } 1340 } else { 1341 /* sscanf() */ 1342 iop->_ptr -= n; 1343 } 1344 return (wc); 1345 } 1346