1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * This file contains the common part of the functions string_to_decimal, 29 * func_to_decimal, and file_to_decimal. Much of this code has been dupli- 30 * cated in wstring_to_decimal (see wstod.c) with some simplifications and 31 * appropriate modifications for wide characters. DO NOT fix a bug here 32 * without fixing the same bug in wstring_to_decimal, if it applies. 33 * 34 * The code below makes the following assumptions. 35 * 36 * 1. The first six parameters to the function are declared with the 37 * following names and types: 38 * 39 * char **ppc; 40 * int nmax; 41 * int fortran_conventions; 42 * decimal_record *pd; 43 * enum decimal_string_form *pform; 44 * char **pechar; 45 * 46 * 2. Before this file is #included, the following variables have been 47 * defined and initialized as shown: 48 * 49 * char *cp; 50 * char *good = *ppc - 1; 51 * int current; 52 * int nread; 53 * 54 * If the first character can be read successfully, then current is set 55 * to the value of the first character, cp is set to *ppc, (char)current 56 * is stored at *cp, and nread = 1. If the first character cannot be 57 * read successfully, then current = EOF and nread = 0. 58 * 59 * 3. The macro NEXT is defined to expand to code that implements 60 * the following logic: 61 * 62 * if (nread < nmax) { 63 * current = <next character>; 64 * if (current != EOF) { 65 * *++cp = (char)current; 66 * nread++; 67 * } 68 * } else 69 * current = EOF; 70 * 71 * Note that nread always reflects the number of characters successfully 72 * read, the buffer pointed to by *ppc gets filled only with characters 73 * that have been successfully read, and cp always points to the location 74 * in the buffer that was filled by the last character successfully read. 75 * current == EOF if and only if we can't read any more, either because 76 * we've reached the end of the input file or the buffer is full (i.e., 77 * we've read nmax characters). 78 * 79 * 4. After this file is #included, the following variables may be used 80 * and will have the specified values: 81 * 82 * *ppc, *pd, *pform, and *pechar will be set as documented in the 83 * manual page; 84 * nmax and fortran_conventions will be unchanged; 85 * nread will be the number of characters actually read; 86 * cp will point to the last character actually read, provided at least 87 * one character was read successfully (in which case cp >= *ppc). 88 */ 89 90 #define UCASE(c) ((('a' <= c) && (c <= 'z'))? c - 32 : c) 91 92 #define NZDIGIT(c) (('1' <= c && c <= '9') || ((int)form < 0 && \ 93 (('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')))) 94 95 { 96 static const char *infstring = "INFINITY"; 97 static const char *nanstring = "NAN"; 98 99 int sigfound, spacefound = 0; 100 int ids = 0; 101 int i, agree; 102 int nzbp = 0; /* number of zeros before point */ 103 int nzap = 0; /* number of zeros after point */ 104 char decpt; 105 int nfast, nfastlimit; 106 char *pfast; 107 int e, esign; 108 int expshift = 0; 109 enum decimal_string_form form; 110 111 /* 112 * This routine assumes that the radix point is a single 113 * ASCII character, so that following this assignment, the 114 * condition (current == decpt) will correctly detect it. 115 */ 116 if (fortran_conventions > 0) 117 decpt = '.'; 118 else 119 decpt = *(localeconv()->decimal_point); 120 121 /* input is invalid until we find something */ 122 pd->fpclass = fp_signaling; 123 pd->sign = 0; 124 pd->exponent = 0; 125 pd->ds[0] = '\0'; 126 pd->more = 0; 127 pd->ndigits = 0; 128 *pform = form = invalid_form; 129 *pechar = NULL; 130 131 /* skip white space */ 132 while (isspace(current)) { 133 spacefound = 1; 134 NEXT; 135 } 136 137 if (fortran_conventions >= 2 && spacefound) { 138 /* 139 * We found at least one white space character. For 140 * Fortran formatted input, accept this; if we don't 141 * find anything else, we'll interpret it as a valid zero. 142 */ 143 pd->fpclass = fp_zero; 144 form = whitespace_form; 145 sigfound = 0; /* 0 = only zeros found so far */ 146 if (current == EOF) { 147 good = cp; 148 goto done; 149 } else { 150 good = cp - 1; 151 } 152 } else { 153 sigfound = -1; /* -1 = no digits found yet */ 154 } 155 156 /* look for optional leading sign */ 157 if (current == '+') { 158 NEXT; 159 } else if (current == '-') { 160 pd->sign = 1; 161 NEXT; 162 } 163 164 /* 165 * Admissible first non-white-space, non-sign characters are 166 * 0-9, i, I, n, N, or the radix point. 167 */ 168 if ('1' <= current && current <= '9') { 169 good = cp; 170 pd->fpclass = fp_normal; 171 form = fixed_int_form; 172 sigfound = 1; /* 1 = significant digits found */ 173 pd->ds[ids++] = (char)current; 174 NEXT; 175 } else { 176 switch (current) { 177 case ' ': 178 if (fortran_conventions < 2) 179 goto done; 180 /* 181 * When fortran_conventions >= 2, treat leading 182 * blanks the same as leading zeroes. 183 */ 184 /*FALLTHRU*/ 185 186 case '0': 187 /* 188 * Accept the leading zero and set pd->fpclass 189 * accordingly, but don't set sigfound until we 190 * determine that this isn't a "fake" hex string 191 * (i.e., 0x.p...). 192 */ 193 good = cp; 194 pd->fpclass = fp_zero; 195 if (fortran_conventions < 0) { 196 /* look for a hex fp string */ 197 NEXT; 198 if (current == 'X' || current == 'x') { 199 /* assume hex fp form */ 200 form = (enum decimal_string_form)-1; 201 expshift = 2; 202 NEXT; 203 /* 204 * Only a digit or radix point can 205 * follow "0x". 206 */ 207 if (NZDIGIT(current)) { 208 pd->fpclass = fp_normal; 209 good = cp; 210 sigfound = 1; 211 pd->ds[ids++] = (char)current; 212 NEXT; 213 break; 214 } else if (current == decpt) { 215 NEXT; 216 goto afterpoint; 217 } else if (current != '0') { 218 /* not hex fp after all */ 219 form = fixed_int_form; 220 expshift = 0; 221 goto done; 222 } 223 } else { 224 form = fixed_int_form; 225 } 226 } else { 227 form = fixed_int_form; 228 } 229 230 /* skip all leading zeros */ 231 while (current == '0' || (current == ' ' && 232 fortran_conventions >= 2)) { 233 NEXT; 234 } 235 sigfound = 0; /* 0 = only zeros found so far */ 236 if (current == EOF) { 237 good = cp; 238 goto done; 239 } else { 240 good = cp - 1; 241 } 242 break; 243 244 case 'i': 245 case 'I': 246 /* look for inf or infinity */ 247 NEXT; 248 agree = 1; 249 while (agree <= 7 && 250 UCASE(current) == infstring[agree]) { 251 NEXT; 252 agree++; 253 } 254 if (agree < 3) 255 goto done; 256 /* found valid infinity */ 257 pd->fpclass = fp_infinity; 258 sigfound = 1; 259 __inf_read = 1; 260 if (agree < 8) { 261 good = (current == EOF)? cp + 3 - agree : 262 cp + 2 - agree; 263 form = inf_form; 264 } else { 265 good = (current == EOF)? cp : cp - 1; 266 form = infinity_form; 267 } 268 /* 269 * Accept trailing blanks if no extra characters 270 * intervene. 271 */ 272 if (fortran_conventions >= 2 && (agree == 3 || 273 agree == 8)) { 274 while (current == ' ') { 275 NEXT; 276 } 277 good = (current == EOF)? cp : cp - 1; 278 } 279 goto done; 280 281 case 'n': 282 case 'N': 283 /* look for nan or nan(string) */ 284 NEXT; 285 agree = 1; 286 while (agree <= 2 && 287 UCASE(current) == nanstring[agree]) { 288 NEXT; 289 agree++; 290 } 291 if (agree < 3) 292 goto done; 293 /* found valid NaN */ 294 good = (current == EOF)? cp : cp - 1; 295 pd->fpclass = fp_quiet; 296 form = nan_form; 297 sigfound = 1; 298 __nan_read = 1; 299 if (current == '(') { 300 /* accept parenthesized string */ 301 NEXT; 302 if (fortran_conventions < 0) { 303 while ((isalnum(current) || 304 current == '_') && 305 ids < DECIMAL_STRING_LENGTH - 1) { 306 pd->ds[ids++] = (char)current; 307 NEXT; 308 } 309 while (isalnum(current) || 310 current == '_') { 311 pd->more = 1; 312 NEXT; 313 } 314 } else { 315 while (current > 0 && current != ')' && 316 ids < DECIMAL_STRING_LENGTH - 1) { 317 pd->ds[ids++] = (char)current; 318 NEXT; 319 } 320 while (current > 0 && current != ')') { 321 pd->more = 1; 322 NEXT; 323 } 324 } 325 if (current != ')') 326 goto done; 327 good = cp; 328 form = nanstring_form; 329 /* prepare for loop below */ 330 if (fortran_conventions >= 2) { 331 NEXT; 332 } 333 } 334 /* accept trailing blanks */ 335 if (fortran_conventions >= 2) { 336 while (current == ' ') { 337 NEXT; 338 } 339 good = (current == EOF)? cp : cp - 1; 340 } 341 goto done; 342 343 default: 344 if (current == decpt) { 345 /* 346 * Don't accept the radix point just yet; 347 * we need to see at least one digit. 348 */ 349 NEXT; 350 goto afterpoint; 351 } 352 goto done; 353 } 354 } 355 356 nextnumber: 357 /* 358 * Admissible characters after the first digit are a valid digit, 359 * an exponent delimiter (E or e for any decimal form; +, -, D, d, 360 * Q, or q when fortran_conventions >= 2; P or p for hex form), 361 * or the radix point. (Note that we can't get here unless we've 362 * already found a digit.) 363 */ 364 if (NZDIGIT(current)) { 365 /* 366 * Found another nonzero digit. If there's enough room 367 * in pd->ds, store any intervening zeros we've found so far 368 * and then store this digit. Otherwise, stop storing 369 * digits in pd->ds and set pd->more. 370 */ 371 if (ids + nzbp + 2 < DECIMAL_STRING_LENGTH) { 372 for (i = 0; i < nzbp; i++) 373 pd->ds[ids++] = '0'; 374 pd->ds[ids++] = (char)current; 375 } else { 376 pd->exponent += (nzbp + 1) << expshift; 377 pd->more = 1; 378 if (ids < DECIMAL_STRING_LENGTH) { 379 pd->ds[ids] = '\0'; 380 pd->ndigits = ids; 381 /* don't store any more digits */ 382 ids = DECIMAL_STRING_LENGTH; 383 } 384 } 385 pd->fpclass = fp_normal; 386 sigfound = 1; 387 nzbp = 0; 388 NEXT; 389 390 /* 391 * Use an optimized loop to grab a consecutive sequence 392 * of nonzero digits quickly. 393 */ 394 nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids; 395 for (nfast = 0, pfast = &(pd->ds[ids]); 396 nfast < nfastlimit && NZDIGIT(current); 397 nfast++) { 398 *pfast++ = (char)current; 399 NEXT; 400 } 401 ids += nfast; 402 if (current == '0') 403 goto nextnumberzero; /* common case */ 404 /* advance good to the last accepted digit */ 405 good = (current == EOF)? cp : cp - 1; 406 goto nextnumber; 407 } else { 408 switch (current) { 409 case ' ': 410 if (fortran_conventions < 2) 411 goto done; 412 if (fortran_conventions == 2) { 413 while (current == ' ') { 414 NEXT; 415 } 416 good = (current == EOF)? cp : cp - 1; 417 goto nextnumber; 418 } 419 /* 420 * When fortran_conventions > 2, treat internal 421 * blanks the same as zeroes. 422 */ 423 /*FALLTHRU*/ 424 425 case '0': 426 nextnumberzero: 427 /* 428 * Count zeros before the radix point. Later we 429 * will either put these zeros into pd->ds or add 430 * nzbp to pd->exponent to account for them. 431 */ 432 while (current == '0' || (current == ' ' && 433 fortran_conventions > 2)) { 434 nzbp++; 435 NEXT; 436 } 437 good = (current == EOF)? cp : cp - 1; 438 goto nextnumber; 439 440 case '+': 441 case '-': 442 case 'D': 443 case 'd': 444 case 'Q': 445 case 'q': 446 /* 447 * Only accept these as the start of the exponent 448 * field if fortran_conventions is positive. 449 */ 450 if (fortran_conventions <= 0) 451 goto done; 452 /*FALLTHRU*/ 453 454 case 'E': 455 case 'e': 456 if ((int)form < 0) 457 goto done; 458 goto exponent; 459 460 case 'P': 461 case 'p': 462 if ((int)form > 0) 463 goto done; 464 goto exponent; 465 466 default: 467 if (current == decpt) { 468 /* accept the radix point */ 469 good = cp; 470 if (form == fixed_int_form) 471 form = fixed_intdot_form; 472 NEXT; 473 goto afterpoint; 474 } 475 goto done; 476 } 477 } 478 479 afterpoint: 480 /* 481 * Admissible characters after the radix point are a valid digit 482 * or an exponent delimiter. (Note that it is possible to get 483 * here even though we haven't found any digits yet.) 484 */ 485 if (NZDIGIT(current)) { 486 /* found a digit after the point; revise form */ 487 if (form == invalid_form || form == whitespace_form) 488 form = fixed_dotfrac_form; 489 else if (form == fixed_intdot_form) 490 form = fixed_intdotfrac_form; 491 good = cp; 492 if (sigfound < 1) { 493 /* no significant digits found until now */ 494 pd->fpclass = fp_normal; 495 sigfound = 1; 496 pd->ds[ids++] = (char)current; 497 pd->exponent = (-(nzap + 1)) << expshift; 498 } else { 499 /* significant digits have been found */ 500 if (ids + nzbp + nzap + 2 < DECIMAL_STRING_LENGTH) { 501 for (i = 0; i < nzbp + nzap; i++) 502 pd->ds[ids++] = '0'; 503 pd->ds[ids++] = (char)current; 504 pd->exponent -= (nzap + 1) << expshift; 505 } else { 506 pd->exponent += nzbp << expshift; 507 pd->more = 1; 508 if (ids < DECIMAL_STRING_LENGTH) { 509 pd->ds[ids] = '\0'; 510 pd->ndigits = ids; 511 /* don't store any more digits */ 512 ids = DECIMAL_STRING_LENGTH; 513 } 514 } 515 } 516 nzbp = 0; 517 nzap = 0; 518 NEXT; 519 520 /* 521 * Use an optimized loop to grab a consecutive sequence 522 * of nonzero digits quickly. 523 */ 524 nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids; 525 for (nfast = 0, pfast = &(pd->ds[ids]); 526 nfast < nfastlimit && NZDIGIT(current); 527 nfast++) { 528 *pfast++ = (char)current; 529 NEXT; 530 } 531 ids += nfast; 532 pd->exponent -= nfast << expshift; 533 if (current == '0') 534 goto zeroafterpoint; 535 /* advance good to the last accepted digit */ 536 good = (current == EOF)? cp : cp - 1; 537 goto afterpoint; 538 } else { 539 switch (current) { 540 case ' ': 541 if (fortran_conventions < 2) 542 goto done; 543 if (fortran_conventions == 2) { 544 /* 545 * Treat a radix point followed by blanks 546 * but no digits as zero so we'll pass FCVS. 547 */ 548 if (sigfound == -1) { 549 pd->fpclass = fp_zero; 550 sigfound = 0; 551 } 552 while (current == ' ') { 553 NEXT; 554 } 555 good = (current == EOF)? cp : cp - 1; 556 goto afterpoint; 557 } 558 /* 559 * when fortran_conventions > 2, treat internal 560 * blanks the same as zeroes 561 */ 562 /*FALLTHRU*/ 563 564 case '0': 565 /* found a digit after the point; revise form */ 566 if (form == invalid_form || form == whitespace_form) 567 form = fixed_dotfrac_form; 568 else if (form == fixed_intdot_form) 569 form = fixed_intdotfrac_form; 570 if (sigfound == -1) { 571 pd->fpclass = fp_zero; 572 sigfound = 0; 573 } 574 zeroafterpoint: 575 /* 576 * Count zeros after the radix point. If we find 577 * any more nonzero digits later, we will put these 578 * zeros into pd->ds and decrease pd->exponent by 579 * nzap. 580 */ 581 while (current == '0' || (current == ' ' && 582 fortran_conventions > 2)) { 583 nzap++; 584 NEXT; 585 } 586 if (current == EOF) { 587 good = cp; 588 goto done; 589 } else { 590 good = cp - 1; 591 } 592 goto afterpoint; 593 594 case '+': 595 case '-': 596 case 'D': 597 case 'd': 598 case 'Q': 599 case 'q': 600 /* 601 * Only accept these as the start of the exponent 602 * field if fortran_conventions is positive. 603 */ 604 if (fortran_conventions <= 0) 605 goto done; 606 /*FALLTHRU*/ 607 608 case 'E': 609 case 'e': 610 /* don't accept exponent without preceding digits */ 611 if (sigfound == -1 || (int)form < 0) 612 goto done; 613 break; 614 615 case 'P': 616 case 'p': 617 /* don't accept exponent without preceding digits */ 618 if (sigfound == -1 || (int)form > 0) 619 goto done; 620 break; 621 622 default: 623 goto done; 624 } 625 } 626 627 exponent: 628 /* 629 * Set *pechar to point to the character that looks like the 630 * beginning of the exponent field, then attempt to parse it. 631 */ 632 *pechar = cp; 633 if (current != '+' && current != '-') { 634 /* skip the exponent character and following blanks */ 635 NEXT; 636 if (fortran_conventions >= 2 && current == ' ') { 637 while (current == ' ') { 638 NEXT; 639 } 640 if (fortran_conventions > 2) 641 good = (current == EOF)? cp : cp - 1; 642 } 643 } 644 645 e = 0; 646 esign = 0; 647 648 /* look for optional exponent sign */ 649 if (current == '+') { 650 NEXT; 651 } else if (current == '-') { 652 esign = 1; 653 NEXT; 654 } 655 656 /* 657 * Accumulate explicit exponent. Note that if we don't find at 658 * least one digit, good won't be updated and e will remain 0. 659 * Also, we keep e from getting too large so we don't overflow 660 * the range of int (but notice that the threshold is large 661 * enough that any larger e would cause the result to underflow 662 * or overflow anyway). 663 */ 664 while (('0' <= current && current <= '9') || current == ' ') { 665 if (current == ' ') { 666 if (fortran_conventions < 2) 667 break; 668 if (fortran_conventions == 2) { 669 NEXT; 670 continue; 671 } 672 current = '0'; 673 } 674 good = cp; 675 if (e <= 1000000) 676 e = 10 * e + current - '0'; 677 NEXT; 678 if (fortran_conventions == 2 && current == ' ') { 679 /* accept trailing blanks */ 680 while (current == ' ') { 681 NEXT; 682 } 683 good = (current == EOF)? cp : cp - 1; 684 } 685 } 686 if (esign == 1) 687 pd->exponent -= e; 688 else 689 pd->exponent += e; 690 691 /* 692 * If we successfully parsed an exponent field, update form 693 * accordingly. If we didn't, don't set *pechar. 694 */ 695 if (good >= *pechar) { 696 switch (form) { 697 case whitespace_form: 698 case fixed_int_form: 699 form = floating_int_form; 700 break; 701 702 case fixed_intdot_form: 703 form = floating_intdot_form; 704 break; 705 706 case fixed_dotfrac_form: 707 form = floating_dotfrac_form; 708 break; 709 710 case fixed_intdotfrac_form: 711 form = floating_intdotfrac_form; 712 break; 713 } 714 } else { 715 *pechar = NULL; 716 } 717 718 done: 719 /* 720 * If we found any zeros before the radix point that were not 721 * accounted for earlier, adjust the exponent. (This is only 722 * relevant when pd->fpclass == fp_normal, but it's harmless 723 * in all other cases.) 724 */ 725 pd->exponent += nzbp << expshift; 726 727 /* terminate pd->ds if we haven't already */ 728 if (ids < DECIMAL_STRING_LENGTH) { 729 pd->ds[ids] = '\0'; 730 pd->ndigits = ids; 731 } 732 733 /* 734 * If we accepted any characters, advance *ppc to point to the 735 * first character we didn't accept; otherwise, pass back a 736 * signaling nan. 737 */ 738 if (good >= *ppc) { 739 *ppc = good + 1; 740 } else { 741 pd->fpclass = fp_signaling; 742 pd->sign = 0; 743 form = invalid_form; 744 } 745 746 *pform = form; 747 } 748