1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * This file contains the common part of the functions string_to_decimal, 29 * func_to_decimal, and file_to_decimal. Much of this code has been dupli- 30 * cated in wstring_to_decimal (see wstod.c) with some simplifications and 31 * appropriate modifications for wide characters. DO NOT fix a bug here 32 * without fixing the same bug in wstring_to_decimal, if it applies. 33 * 34 * The code below makes the following assumptions. 35 * 36 * 1. The first six parameters to the function are declared with the 37 * following names and types: 38 * 39 * char **ppc; 40 * int nmax; 41 * int fortran_conventions; 42 * decimal_record *pd; 43 * enum decimal_string_form *pform; 44 * char **pechar; 45 * 46 * 2. Before this file is #included, the following variables have been 47 * defined and initialized as shown: 48 * 49 * char *cp; 50 * char *good = *ppc - 1; 51 * int current; 52 * int nread; 53 * locale_t loc; 54 * 55 * If the first character can be read successfully, then current is set 56 * to the value of the first character, cp is set to *ppc, (char)current 57 * is stored at *cp, and nread = 1. If the first character cannot be 58 * read successfully, then current = EOF and nread = 0. 59 * loc should be set to the desired locale in which to perform the 60 * conversion. 61 * 62 * 3. The macro NEXT is defined to expand to code that implements 63 * the following logic: 64 * 65 * if (nread < nmax) { 66 * current = <next character>; 67 * if (current != EOF) { 68 * *++cp = (char)current; 69 * nread++; 70 * } 71 * } else 72 * current = EOF; 73 * 74 * Note that nread always reflects the number of characters successfully 75 * read, the buffer pointed to by *ppc gets filled only with characters 76 * that have been successfully read, and cp always points to the location 77 * in the buffer that was filled by the last character successfully read. 78 * current == EOF if and only if we can't read any more, either because 79 * we've reached the end of the input file or the buffer is full (i.e., 80 * we've read nmax characters). 81 * 82 * 4. After this file is #included, the following variables may be used 83 * and will have the specified values: 84 * 85 * *ppc, *pd, *pform, and *pechar will be set as documented in the 86 * manual page; 87 * nmax and fortran_conventions will be unchanged; 88 * nread will be the number of characters actually read; 89 * cp will point to the last character actually read, provided at least 90 * one character was read successfully (in which case cp >= *ppc). 91 */ 92 93 #define UCASE(c) ((('a' <= c) && (c <= 'z'))? c - 32 : c) 94 95 #define NZDIGIT(c) (('1' <= c && c <= '9') || ((int)form < 0 && \ 96 (('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')))) 97 98 { 99 static const char *infstring = "INFINITY"; 100 static const char *nanstring = "NAN"; 101 102 int sigfound, spacefound = 0; 103 int ids = 0; 104 int i, agree; 105 int nzbp = 0; /* number of zeros before point */ 106 int nzap = 0; /* number of zeros after point */ 107 char decpt; 108 int nfast, nfastlimit; 109 char *pfast; 110 int e, esign; 111 int expshift = 0; 112 enum decimal_string_form form; 113 114 /* 115 * This routine assumes that the radix point is a single 116 * ASCII character, so that following this assignment, the 117 * condition (current == decpt) will correctly detect it. 118 */ 119 if (fortran_conventions > 0) 120 decpt = '.'; 121 else 122 decpt = *(localeconv_l(loc)->decimal_point); 123 124 /* input is invalid until we find something */ 125 pd->fpclass = fp_signaling; 126 pd->sign = 0; 127 pd->exponent = 0; 128 pd->ds[0] = '\0'; 129 pd->more = 0; 130 pd->ndigits = 0; 131 *pform = form = invalid_form; 132 *pechar = NULL; 133 134 /* skip white space */ 135 while (isspace_l(current, loc)) { 136 spacefound = 1; 137 NEXT; 138 } 139 140 if (fortran_conventions >= 2 && spacefound) { 141 /* 142 * We found at least one white space character. For 143 * Fortran formatted input, accept this; if we don't 144 * find anything else, we'll interpret it as a valid zero. 145 */ 146 pd->fpclass = fp_zero; 147 form = whitespace_form; 148 sigfound = 0; /* 0 = only zeros found so far */ 149 if (current == EOF) { 150 good = cp; 151 goto done; 152 } else { 153 good = cp - 1; 154 } 155 } else { 156 sigfound = -1; /* -1 = no digits found yet */ 157 } 158 159 /* look for optional leading sign */ 160 if (current == '+') { 161 NEXT; 162 } else if (current == '-') { 163 pd->sign = 1; 164 NEXT; 165 } 166 167 /* 168 * Admissible first non-white-space, non-sign characters are 169 * 0-9, i, I, n, N, or the radix point. 170 */ 171 if ('1' <= current && current <= '9') { 172 good = cp; 173 pd->fpclass = fp_normal; 174 form = fixed_int_form; 175 sigfound = 1; /* 1 = significant digits found */ 176 pd->ds[ids++] = (char)current; 177 NEXT; 178 } else { 179 switch (current) { 180 case ' ': 181 if (fortran_conventions < 2) 182 goto done; 183 /* 184 * When fortran_conventions >= 2, treat leading 185 * blanks the same as leading zeroes. 186 */ 187 /*FALLTHRU*/ 188 189 case '0': 190 /* 191 * Accept the leading zero and set pd->fpclass 192 * accordingly, but don't set sigfound until we 193 * determine that this isn't a "fake" hex string 194 * (i.e., 0x.p...). 195 */ 196 good = cp; 197 pd->fpclass = fp_zero; 198 if (fortran_conventions < 0) { 199 /* look for a hex fp string */ 200 NEXT; 201 if (current == 'X' || current == 'x') { 202 /* assume hex fp form */ 203 form = (enum decimal_string_form)-1; 204 expshift = 2; 205 NEXT; 206 /* 207 * Only a digit or radix point can 208 * follow "0x". 209 */ 210 if (NZDIGIT(current)) { 211 pd->fpclass = fp_normal; 212 good = cp; 213 sigfound = 1; 214 pd->ds[ids++] = (char)current; 215 NEXT; 216 break; 217 } else if (current == decpt) { 218 NEXT; 219 goto afterpoint; 220 } else if (current != '0') { 221 /* not hex fp after all */ 222 form = fixed_int_form; 223 expshift = 0; 224 goto done; 225 } 226 } else { 227 form = fixed_int_form; 228 } 229 } else { 230 form = fixed_int_form; 231 } 232 233 /* skip all leading zeros */ 234 while (current == '0' || (current == ' ' && 235 fortran_conventions >= 2)) { 236 NEXT; 237 } 238 sigfound = 0; /* 0 = only zeros found so far */ 239 if (current == EOF) { 240 good = cp; 241 goto done; 242 } else { 243 good = cp - 1; 244 } 245 break; 246 247 case 'i': 248 case 'I': 249 /* look for inf or infinity */ 250 NEXT; 251 agree = 1; 252 while (agree <= 7 && 253 UCASE(current) == infstring[agree]) { 254 NEXT; 255 agree++; 256 } 257 if (agree < 3) 258 goto done; 259 /* found valid infinity */ 260 pd->fpclass = fp_infinity; 261 sigfound = 1; 262 __inf_read = 1; 263 if (agree < 8) { 264 good = (current == EOF)? cp + 3 - agree : 265 cp + 2 - agree; 266 form = inf_form; 267 } else { 268 good = (current == EOF)? cp : cp - 1; 269 form = infinity_form; 270 } 271 /* 272 * Accept trailing blanks if no extra characters 273 * intervene. 274 */ 275 if (fortran_conventions >= 2 && (agree == 3 || 276 agree == 8)) { 277 while (current == ' ') { 278 NEXT; 279 } 280 good = (current == EOF)? cp : cp - 1; 281 } 282 goto done; 283 284 case 'n': 285 case 'N': 286 /* look for nan or nan(string) */ 287 NEXT; 288 agree = 1; 289 while (agree <= 2 && 290 UCASE(current) == nanstring[agree]) { 291 NEXT; 292 agree++; 293 } 294 if (agree < 3) 295 goto done; 296 /* found valid NaN */ 297 good = (current == EOF)? cp : cp - 1; 298 pd->fpclass = fp_quiet; 299 form = nan_form; 300 sigfound = 1; 301 __nan_read = 1; 302 if (current == '(') { 303 /* accept parenthesized string */ 304 NEXT; 305 if (fortran_conventions < 0) { 306 while ((isalnum_l(current, loc) || 307 current == '_') && 308 ids < DECIMAL_STRING_LENGTH - 1) { 309 pd->ds[ids++] = (char)current; 310 NEXT; 311 } 312 while (isalnum_l(current, loc) || 313 current == '_') { 314 pd->more = 1; 315 NEXT; 316 } 317 } else { 318 while (current > 0 && current != ')' && 319 ids < DECIMAL_STRING_LENGTH - 1) { 320 pd->ds[ids++] = (char)current; 321 NEXT; 322 } 323 while (current > 0 && current != ')') { 324 pd->more = 1; 325 NEXT; 326 } 327 } 328 if (current != ')') 329 goto done; 330 good = cp; 331 form = nanstring_form; 332 /* prepare for loop below */ 333 if (fortran_conventions >= 2) { 334 NEXT; 335 } 336 } 337 /* accept trailing blanks */ 338 if (fortran_conventions >= 2) { 339 while (current == ' ') { 340 NEXT; 341 } 342 good = (current == EOF)? cp : cp - 1; 343 } 344 goto done; 345 346 default: 347 if (current == decpt) { 348 /* 349 * Don't accept the radix point just yet; 350 * we need to see at least one digit. 351 */ 352 NEXT; 353 goto afterpoint; 354 } 355 goto done; 356 } 357 } 358 359 nextnumber: 360 /* 361 * Admissible characters after the first digit are a valid digit, 362 * an exponent delimiter (E or e for any decimal form; +, -, D, d, 363 * Q, or q when fortran_conventions >= 2; P or p for hex form), 364 * or the radix point. (Note that we can't get here unless we've 365 * already found a digit.) 366 */ 367 if (NZDIGIT(current)) { 368 /* 369 * Found another nonzero digit. If there's enough room 370 * in pd->ds, store any intervening zeros we've found so far 371 * and then store this digit. Otherwise, stop storing 372 * digits in pd->ds and set pd->more. 373 */ 374 if (ids + nzbp + 2 < DECIMAL_STRING_LENGTH) { 375 for (i = 0; i < nzbp; i++) 376 pd->ds[ids++] = '0'; 377 pd->ds[ids++] = (char)current; 378 } else { 379 pd->exponent += (nzbp + 1) << expshift; 380 pd->more = 1; 381 if (ids < DECIMAL_STRING_LENGTH) { 382 pd->ds[ids] = '\0'; 383 pd->ndigits = ids; 384 /* don't store any more digits */ 385 ids = DECIMAL_STRING_LENGTH; 386 } 387 } 388 pd->fpclass = fp_normal; 389 sigfound = 1; 390 nzbp = 0; 391 NEXT; 392 393 /* 394 * Use an optimized loop to grab a consecutive sequence 395 * of nonzero digits quickly. 396 */ 397 nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids; 398 for (nfast = 0, pfast = &(pd->ds[ids]); 399 nfast < nfastlimit && NZDIGIT(current); 400 nfast++) { 401 *pfast++ = (char)current; 402 NEXT; 403 } 404 ids += nfast; 405 if (current == '0') 406 goto nextnumberzero; /* common case */ 407 /* advance good to the last accepted digit */ 408 good = (current == EOF)? cp : cp - 1; 409 goto nextnumber; 410 } else { 411 switch (current) { 412 case ' ': 413 if (fortran_conventions < 2) 414 goto done; 415 if (fortran_conventions == 2) { 416 while (current == ' ') { 417 NEXT; 418 } 419 good = (current == EOF)? cp : cp - 1; 420 goto nextnumber; 421 } 422 /* 423 * When fortran_conventions > 2, treat internal 424 * blanks the same as zeroes. 425 */ 426 /*FALLTHRU*/ 427 428 case '0': 429 nextnumberzero: 430 /* 431 * Count zeros before the radix point. Later we 432 * will either put these zeros into pd->ds or add 433 * nzbp to pd->exponent to account for them. 434 */ 435 while (current == '0' || (current == ' ' && 436 fortran_conventions > 2)) { 437 nzbp++; 438 NEXT; 439 } 440 good = (current == EOF)? cp : cp - 1; 441 goto nextnumber; 442 443 case '+': 444 case '-': 445 case 'D': 446 case 'd': 447 case 'Q': 448 case 'q': 449 /* 450 * Only accept these as the start of the exponent 451 * field if fortran_conventions is positive. 452 */ 453 if (fortran_conventions <= 0) 454 goto done; 455 /*FALLTHRU*/ 456 457 case 'E': 458 case 'e': 459 if ((int)form < 0) 460 goto done; 461 goto exponent; 462 463 case 'P': 464 case 'p': 465 if ((int)form > 0) 466 goto done; 467 goto exponent; 468 469 default: 470 if (current == decpt) { 471 /* accept the radix point */ 472 good = cp; 473 if (form == fixed_int_form) 474 form = fixed_intdot_form; 475 NEXT; 476 goto afterpoint; 477 } 478 goto done; 479 } 480 } 481 482 afterpoint: 483 /* 484 * Admissible characters after the radix point are a valid digit 485 * or an exponent delimiter. (Note that it is possible to get 486 * here even though we haven't found any digits yet.) 487 */ 488 if (NZDIGIT(current)) { 489 /* found a digit after the point; revise form */ 490 if (form == invalid_form || form == whitespace_form) 491 form = fixed_dotfrac_form; 492 else if (form == fixed_intdot_form) 493 form = fixed_intdotfrac_form; 494 good = cp; 495 if (sigfound < 1) { 496 /* no significant digits found until now */ 497 pd->fpclass = fp_normal; 498 sigfound = 1; 499 pd->ds[ids++] = (char)current; 500 pd->exponent = (-(nzap + 1)) << expshift; 501 } else { 502 /* significant digits have been found */ 503 if (ids + nzbp + nzap + 2 < DECIMAL_STRING_LENGTH) { 504 for (i = 0; i < nzbp + nzap; i++) 505 pd->ds[ids++] = '0'; 506 pd->ds[ids++] = (char)current; 507 pd->exponent -= (nzap + 1) << expshift; 508 } else { 509 pd->exponent += nzbp << expshift; 510 pd->more = 1; 511 if (ids < DECIMAL_STRING_LENGTH) { 512 pd->ds[ids] = '\0'; 513 pd->ndigits = ids; 514 /* don't store any more digits */ 515 ids = DECIMAL_STRING_LENGTH; 516 } 517 } 518 } 519 nzbp = 0; 520 nzap = 0; 521 NEXT; 522 523 /* 524 * Use an optimized loop to grab a consecutive sequence 525 * of nonzero digits quickly. 526 */ 527 nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids; 528 for (nfast = 0, pfast = &(pd->ds[ids]); 529 nfast < nfastlimit && NZDIGIT(current); 530 nfast++) { 531 *pfast++ = (char)current; 532 NEXT; 533 } 534 ids += nfast; 535 pd->exponent -= nfast << expshift; 536 if (current == '0') 537 goto zeroafterpoint; 538 /* advance good to the last accepted digit */ 539 good = (current == EOF)? cp : cp - 1; 540 goto afterpoint; 541 } else { 542 switch (current) { 543 case ' ': 544 if (fortran_conventions < 2) 545 goto done; 546 if (fortran_conventions == 2) { 547 /* 548 * Treat a radix point followed by blanks 549 * but no digits as zero so we'll pass FCVS. 550 */ 551 if (sigfound == -1) { 552 pd->fpclass = fp_zero; 553 sigfound = 0; 554 } 555 while (current == ' ') { 556 NEXT; 557 } 558 good = (current == EOF)? cp : cp - 1; 559 goto afterpoint; 560 } 561 /* 562 * when fortran_conventions > 2, treat internal 563 * blanks the same as zeroes 564 */ 565 /*FALLTHRU*/ 566 567 case '0': 568 /* found a digit after the point; revise form */ 569 if (form == invalid_form || form == whitespace_form) 570 form = fixed_dotfrac_form; 571 else if (form == fixed_intdot_form) 572 form = fixed_intdotfrac_form; 573 if (sigfound == -1) { 574 pd->fpclass = fp_zero; 575 sigfound = 0; 576 } 577 zeroafterpoint: 578 /* 579 * Count zeros after the radix point. If we find 580 * any more nonzero digits later, we will put these 581 * zeros into pd->ds and decrease pd->exponent by 582 * nzap. 583 */ 584 while (current == '0' || (current == ' ' && 585 fortran_conventions > 2)) { 586 nzap++; 587 NEXT; 588 } 589 if (current == EOF) { 590 good = cp; 591 goto done; 592 } else { 593 good = cp - 1; 594 } 595 goto afterpoint; 596 597 case '+': 598 case '-': 599 case 'D': 600 case 'd': 601 case 'Q': 602 case 'q': 603 /* 604 * Only accept these as the start of the exponent 605 * field if fortran_conventions is positive. 606 */ 607 if (fortran_conventions <= 0) 608 goto done; 609 /*FALLTHRU*/ 610 611 case 'E': 612 case 'e': 613 /* don't accept exponent without preceding digits */ 614 if (sigfound == -1 || (int)form < 0) 615 goto done; 616 break; 617 618 case 'P': 619 case 'p': 620 /* don't accept exponent without preceding digits */ 621 if (sigfound == -1 || (int)form > 0) 622 goto done; 623 break; 624 625 default: 626 goto done; 627 } 628 } 629 630 exponent: 631 /* 632 * Set *pechar to point to the character that looks like the 633 * beginning of the exponent field, then attempt to parse it. 634 */ 635 *pechar = cp; 636 if (current != '+' && current != '-') { 637 /* skip the exponent character and following blanks */ 638 NEXT; 639 if (fortran_conventions >= 2 && current == ' ') { 640 while (current == ' ') { 641 NEXT; 642 } 643 if (fortran_conventions > 2) 644 good = (current == EOF)? cp : cp - 1; 645 } 646 } 647 648 e = 0; 649 esign = 0; 650 651 /* look for optional exponent sign */ 652 if (current == '+') { 653 NEXT; 654 } else if (current == '-') { 655 esign = 1; 656 NEXT; 657 } 658 659 /* 660 * Accumulate explicit exponent. Note that if we don't find at 661 * least one digit, good won't be updated and e will remain 0. 662 * Also, we keep e from getting too large so we don't overflow 663 * the range of int (but notice that the threshold is large 664 * enough that any larger e would cause the result to underflow 665 * or overflow anyway). 666 */ 667 while (('0' <= current && current <= '9') || current == ' ') { 668 if (current == ' ') { 669 if (fortran_conventions < 2) 670 break; 671 if (fortran_conventions == 2) { 672 NEXT; 673 continue; 674 } 675 current = '0'; 676 } 677 good = cp; 678 if (e <= 1000000) 679 e = 10 * e + current - '0'; 680 NEXT; 681 if (fortran_conventions == 2 && current == ' ') { 682 /* accept trailing blanks */ 683 while (current == ' ') { 684 NEXT; 685 } 686 good = (current == EOF)? cp : cp - 1; 687 } 688 } 689 if (esign == 1) 690 pd->exponent -= e; 691 else 692 pd->exponent += e; 693 694 /* 695 * If we successfully parsed an exponent field, update form 696 * accordingly. If we didn't, don't set *pechar. 697 */ 698 if (good >= *pechar) { 699 switch (form) { 700 case whitespace_form: 701 case fixed_int_form: 702 form = floating_int_form; 703 break; 704 705 case fixed_intdot_form: 706 form = floating_intdot_form; 707 break; 708 709 case fixed_dotfrac_form: 710 form = floating_dotfrac_form; 711 break; 712 713 case fixed_intdotfrac_form: 714 form = floating_intdotfrac_form; 715 break; 716 } 717 } else { 718 *pechar = NULL; 719 } 720 721 done: 722 /* 723 * If we found any zeros before the radix point that were not 724 * accounted for earlier, adjust the exponent. (This is only 725 * relevant when pd->fpclass == fp_normal, but it's harmless 726 * in all other cases.) 727 */ 728 pd->exponent += nzbp << expshift; 729 730 /* terminate pd->ds if we haven't already */ 731 if (ids < DECIMAL_STRING_LENGTH) { 732 pd->ds[ids] = '\0'; 733 pd->ndigits = ids; 734 } 735 736 /* 737 * If we accepted any characters, advance *ppc to point to the 738 * first character we didn't accept; otherwise, pass back a 739 * signaling nan. 740 */ 741 if (good >= *ppc) { 742 *ppc = good + 1; 743 } else { 744 pd->fpclass = fp_signaling; 745 pd->sign = 0; 746 form = invalid_form; 747 } 748 749 *pform = form; 750 } 751