xref: /illumos-gate/usr/src/lib/libc/port/fp/char_to_decimal.h (revision f37b3cbb6f67aaea5eec1c335bdc7bf432867d64)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * This file contains the common part of the functions string_to_decimal,
29  * func_to_decimal, and file_to_decimal.  Much of this code has been dupli-
30  * cated in wstring_to_decimal (see wstod.c) with some simplifications and
31  * appropriate modifications for wide characters.  DO NOT fix a bug here
32  * without fixing the same bug in wstring_to_decimal, if it applies.
33  *
34  * The code below makes the following assumptions.
35  *
36  * 1. The first six parameters to the function are declared with the
37  *    following names and types:
38  *
39  *    char **ppc;
40  *    int nmax;
41  *    int fortran_conventions;
42  *    decimal_record *pd;
43  *    enum decimal_string_form *pform;
44  *    char **pechar;
45  *
46  * 2. Before this file is #included, the following variables have been
47  *    defined and initialized as shown:
48  *
49  *    char *cp;
50  *    char *good = *ppc - 1;
51  *    int current;
52  *    int nread;
53  *
54  *    If the first character can be read successfully, then current is set
55  *    to the value of the first character, cp is set to *ppc, (char)current
56  *    is stored at *cp, and nread = 1.  If the first character cannot be
57  *    read successfully, then current = EOF and nread = 0.
58  *
59  * 3. The macro NEXT is defined to expand to code that implements
60  *    the following logic:
61  *
62  *      if (nread < nmax) {
63  *          current = <next character>;
64  *          if (current != EOF) {
65  *             *++cp = (char)current;
66  *             nread++;
67  *          }
68  *      } else
69  *          current = EOF;
70  *
71  *    Note that nread always reflects the number of characters successfully
72  *    read, the buffer pointed to by *ppc gets filled only with characters
73  *    that have been successfully read, and cp always points to the location
74  *    in the buffer that was filled by the last character successfully read.
75  *    current == EOF if and only if we can't read any more, either because
76  *    we've reached the end of the input file or the buffer is full (i.e.,
77  *    we've read nmax characters).
78  *
79  * 4. After this file is #included, the following variables may be used
80  *    and will have the specified values:
81  *
82  *    *ppc, *pd, *pform, and *pechar will be set as documented in the
83  *      manual page;
84  *    nmax and fortran_conventions will be unchanged;
85  *    nread will be the number of characters actually read;
86  *    cp will point to the last character actually read, provided at least
87  *      one character was read successfully (in which case cp >= *ppc).
88  */
89 
90 #define	UCASE(c) ((('a' <= c) && (c <= 'z'))? c - 32 : c)
91 
92 #define	NZDIGIT(c)	(('1' <= c && c <= '9') || ((int)form < 0 && \
93 			(('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'))))
94 
95 {
96 	static const char    *infstring = "INFINITY";
97 	static const char    *nanstring = "NAN";
98 
99 	int	sigfound, spacefound = 0;
100 	int	ids = 0;
101 	int	i, agree;
102 	int	nzbp = 0; /* number of zeros before point */
103 	int	nzap = 0; /* number of zeros after point */
104 	char	decpt;
105 	int	nfast, nfastlimit;
106 	char	*pfast;
107 	int	e, esign;
108 	int	expshift = 0;
109 	enum decimal_string_form	form;
110 
111 	/*
112 	 * This routine assumes that the radix point is a single
113 	 * ASCII character, so that following this assignment, the
114 	 * condition (current == decpt) will correctly detect it.
115 	 */
116 	if (fortran_conventions > 0)
117 		decpt = '.';
118 	else
119 		decpt = *(localeconv()->decimal_point);
120 
121 	/* input is invalid until we find something */
122 	pd->fpclass = fp_signaling;
123 	pd->sign = 0;
124 	pd->exponent = 0;
125 	pd->ds[0] = '\0';
126 	pd->more = 0;
127 	pd->ndigits = 0;
128 	*pform = form = invalid_form;
129 	*pechar = NULL;
130 
131 	/* skip white space */
132 	while (isspace(current)) {
133 		spacefound = 1;
134 		NEXT;
135 	}
136 
137 	if (fortran_conventions >= 2 && spacefound) {
138 		/*
139 		 * We found at least one white space character.  For
140 		 * Fortran formatted input, accept this; if we don't
141 		 * find anything else, we'll interpret it as a valid zero.
142 		 */
143 		pd->fpclass = fp_zero;
144 		form = whitespace_form;
145 		sigfound = 0;		/* 0 = only zeros found so far */
146 		if (current == EOF) {
147 			good = cp;
148 			goto done;
149 		} else {
150 			good = cp - 1;
151 		}
152 	} else {
153 		sigfound = -1;		/* -1 = no digits found yet */
154 	}
155 
156 	/* look for optional leading sign */
157 	if (current == '+') {
158 		NEXT;
159 	} else if (current == '-') {
160 		pd->sign = 1;
161 		NEXT;
162 	}
163 
164 	/*
165 	 * Admissible first non-white-space, non-sign characters are
166 	 * 0-9, i, I, n, N, or the radix point.
167 	 */
168 	if ('1' <= current && current <= '9') {
169 		good = cp;
170 		pd->fpclass = fp_normal;
171 		form = fixed_int_form;
172 		sigfound = 1;		/* 1 = significant digits found */
173 		pd->ds[ids++] = (char)current;
174 		NEXT;
175 	} else {
176 		switch (current) {
177 		case ' ':
178 			if (fortran_conventions < 2)
179 				goto done;
180 			/*
181 			 * When fortran_conventions >= 2, treat leading
182 			 * blanks the same as leading zeroes.
183 			 */
184 			/*FALLTHRU*/
185 
186 		case '0':
187 			/*
188 			 * Accept the leading zero and set pd->fpclass
189 			 * accordingly, but don't set sigfound until we
190 			 * determine that this isn't a "fake" hex string
191 			 * (i.e., 0x.p...).
192 			 */
193 			good = cp;
194 			pd->fpclass = fp_zero;
195 			if (fortran_conventions < 0) {
196 				/* look for a hex fp string */
197 				NEXT;
198 				if (current == 'X' || current == 'x') {
199 					/* assume hex fp form */
200 					form = (enum decimal_string_form)-1;
201 					expshift = 2;
202 					NEXT;
203 					/*
204 					 * Only a digit or radix point can
205 					 * follow "0x".
206 					 */
207 					if (NZDIGIT(current)) {
208 						pd->fpclass = fp_normal;
209 						good = cp;
210 						sigfound = 1;
211 						pd->ds[ids++] = (char)current;
212 						NEXT;
213 						break;
214 					} else if (current == decpt) {
215 						NEXT;
216 						goto afterpoint;
217 					} else if (current != '0') {
218 						/* not hex fp after all */
219 						form = fixed_int_form;
220 						expshift = 0;
221 						goto done;
222 					}
223 				} else {
224 					form = fixed_int_form;
225 				}
226 			} else {
227 				form = fixed_int_form;
228 			}
229 
230 			/* skip all leading zeros */
231 			while (current == '0' || (current == ' ' &&
232 			    fortran_conventions >= 2)) {
233 				NEXT;
234 			}
235 			sigfound = 0;	/* 0 = only zeros found so far */
236 			if (current == EOF) {
237 				good = cp;
238 				goto done;
239 			} else {
240 				good = cp - 1;
241 			}
242 			break;
243 
244 		case 'i':
245 		case 'I':
246 			/* look for inf or infinity */
247 			NEXT;
248 			agree = 1;
249 			while (agree <= 7 &&
250 			    UCASE(current) == infstring[agree]) {
251 				NEXT;
252 				agree++;
253 			}
254 			if (agree < 3)
255 				goto done;
256 			/* found valid infinity */
257 			pd->fpclass = fp_infinity;
258 			sigfound = 1;
259 			__inf_read = 1;
260 			if (agree < 8) {
261 				good = (current == EOF)? cp + 3 - agree :
262 				    cp + 2 - agree;
263 				form = inf_form;
264 			} else {
265 				good = (current == EOF)? cp : cp - 1;
266 				form = infinity_form;
267 			}
268 			/*
269 			 * Accept trailing blanks if no extra characters
270 			 * intervene.
271 			 */
272 			if (fortran_conventions >= 2 && (agree == 3 ||
273 			    agree == 8)) {
274 				while (current == ' ') {
275 					NEXT;
276 				}
277 				good = (current == EOF)? cp : cp - 1;
278 			}
279 			goto done;
280 
281 		case 'n':
282 		case 'N':
283 			/* look for nan or nan(string) */
284 			NEXT;
285 			agree = 1;
286 			while (agree <= 2 &&
287 			    UCASE(current) == nanstring[agree]) {
288 				NEXT;
289 				agree++;
290 			}
291 			if (agree < 3)
292 				goto done;
293 			/* found valid NaN */
294 			good = (current == EOF)? cp : cp - 1;
295 			pd->fpclass = fp_quiet;
296 			form = nan_form;
297 			sigfound = 1;
298 			__nan_read = 1;
299 			if (current == '(') {
300 				/* accept parenthesized string */
301 				NEXT;
302 				if (fortran_conventions < 0) {
303 					while ((isalnum(current) ||
304 					    current == '_') &&
305 					    ids < DECIMAL_STRING_LENGTH - 1) {
306 						pd->ds[ids++] = (char)current;
307 						NEXT;
308 					}
309 					while (isalnum(current) ||
310 					    current == '_') {
311 						pd->more = 1;
312 						NEXT;
313 					}
314 				} else {
315 					while (current > 0 && current != ')' &&
316 					    ids < DECIMAL_STRING_LENGTH - 1) {
317 						pd->ds[ids++] = (char)current;
318 						NEXT;
319 					}
320 					while (current > 0 && current != ')') {
321 						pd->more = 1;
322 						NEXT;
323 					}
324 				}
325 				if (current != ')')
326 					goto done;
327 				good = cp;
328 				form = nanstring_form;
329 				/* prepare for loop below */
330 				if (fortran_conventions >= 2) {
331 					NEXT;
332 				}
333 			}
334 			/* accept trailing blanks */
335 			if (fortran_conventions >= 2) {
336 				while (current == ' ') {
337 					NEXT;
338 				}
339 				good = (current == EOF)? cp : cp - 1;
340 			}
341 			goto done;
342 
343 		default:
344 			if (current == decpt) {
345 				/*
346 				 * Don't accept the radix point just yet;
347 				 * we need to see at least one digit.
348 				 */
349 				NEXT;
350 				goto afterpoint;
351 			}
352 			goto done;
353 		}
354 	}
355 
356 nextnumber:
357 	/*
358 	 * Admissible characters after the first digit are a valid digit,
359 	 * an exponent delimiter (E or e for any decimal form; +, -, D, d,
360 	 * Q, or q when fortran_conventions >= 2; P or p for hex form),
361 	 * or the radix point.  (Note that we can't get here unless we've
362 	 * already found a digit.)
363 	 */
364 	if (NZDIGIT(current)) {
365 		/*
366 		 * Found another nonzero digit.  If there's enough room
367 		 * in pd->ds, store any intervening zeros we've found so far
368 		 * and then store this digit.  Otherwise, stop storing
369 		 * digits in pd->ds and set pd->more.
370 		 */
371 		if (ids + nzbp + 2 < DECIMAL_STRING_LENGTH) {
372 			for (i = 0; i < nzbp; i++)
373 				pd->ds[ids++] = '0';
374 			pd->ds[ids++] = (char)current;
375 		} else {
376 			pd->exponent += (nzbp + 1) << expshift;
377 			pd->more = 1;
378 			if (ids < DECIMAL_STRING_LENGTH) {
379 				pd->ds[ids] = '\0';
380 				pd->ndigits = ids;
381 				/* don't store any more digits */
382 				ids = DECIMAL_STRING_LENGTH;
383 			}
384 		}
385 		pd->fpclass = fp_normal;
386 		sigfound = 1;
387 		nzbp = 0;
388 		NEXT;
389 
390 		/*
391 		 * Use an optimized loop to grab a consecutive sequence
392 		 * of nonzero digits quickly.
393 		 */
394 		nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids;
395 		for (nfast = 0, pfast = &(pd->ds[ids]);
396 		    nfast < nfastlimit && NZDIGIT(current);
397 		    nfast++) {
398 			*pfast++ = (char)current;
399 			NEXT;
400 		}
401 		ids += nfast;
402 		if (current == '0')
403 			goto nextnumberzero;	/* common case */
404 		/* advance good to the last accepted digit */
405 		good = (current == EOF)? cp : cp - 1;
406 		goto nextnumber;
407 	} else {
408 		switch (current) {
409 		case ' ':
410 			if (fortran_conventions < 2)
411 				goto done;
412 			if (fortran_conventions == 2) {
413 				while (current == ' ') {
414 					NEXT;
415 				}
416 				good = (current == EOF)? cp : cp - 1;
417 				goto nextnumber;
418 			}
419 			/*
420 			 * When fortran_conventions > 2, treat internal
421 			 * blanks the same as zeroes.
422 			 */
423 			/*FALLTHRU*/
424 
425 		case '0':
426 nextnumberzero:
427 			/*
428 			 * Count zeros before the radix point.  Later we
429 			 * will either put these zeros into pd->ds or add
430 			 * nzbp to pd->exponent to account for them.
431 			 */
432 			while (current == '0' || (current == ' ' &&
433 			    fortran_conventions > 2)) {
434 				nzbp++;
435 				NEXT;
436 			}
437 			good = (current == EOF)? cp : cp - 1;
438 			goto nextnumber;
439 
440 		case '+':
441 		case '-':
442 		case 'D':
443 		case 'd':
444 		case 'Q':
445 		case 'q':
446 			/*
447 			 * Only accept these as the start of the exponent
448 			 * field if fortran_conventions is positive.
449 			 */
450 			if (fortran_conventions <= 0)
451 				goto done;
452 			/*FALLTHRU*/
453 
454 		case 'E':
455 		case 'e':
456 			if ((int)form < 0)
457 				goto done;
458 			goto exponent;
459 
460 		case 'P':
461 		case 'p':
462 			if ((int)form > 0)
463 				goto done;
464 			goto exponent;
465 
466 		default:
467 			if (current == decpt) {
468 				/* accept the radix point */
469 				good = cp;
470 				if (form == fixed_int_form)
471 					form = fixed_intdot_form;
472 				NEXT;
473 				goto afterpoint;
474 			}
475 			goto done;
476 		}
477 	}
478 
479 afterpoint:
480 	/*
481 	 * Admissible characters after the radix point are a valid digit
482 	 * or an exponent delimiter.  (Note that it is possible to get
483 	 * here even though we haven't found any digits yet.)
484 	 */
485 	if (NZDIGIT(current)) {
486 		/* found a digit after the point; revise form */
487 		if (form == invalid_form || form == whitespace_form)
488 			form = fixed_dotfrac_form;
489 		else if (form == fixed_intdot_form)
490 			form = fixed_intdotfrac_form;
491 		good = cp;
492 		if (sigfound < 1) {
493 			/* no significant digits found until now */
494 			pd->fpclass = fp_normal;
495 			sigfound = 1;
496 			pd->ds[ids++] = (char)current;
497 			pd->exponent = (-(nzap + 1)) << expshift;
498 		} else {
499 			/* significant digits have been found */
500 			if (ids + nzbp + nzap + 2 < DECIMAL_STRING_LENGTH) {
501 				for (i = 0; i < nzbp + nzap; i++)
502 					pd->ds[ids++] = '0';
503 				pd->ds[ids++] = (char)current;
504 				pd->exponent -= (nzap + 1) << expshift;
505 			} else {
506 				pd->exponent += nzbp << expshift;
507 				pd->more = 1;
508 				if (ids < DECIMAL_STRING_LENGTH) {
509 					pd->ds[ids] = '\0';
510 					pd->ndigits = ids;
511 					/* don't store any more digits */
512 					ids = DECIMAL_STRING_LENGTH;
513 				}
514 			}
515 		}
516 		nzbp = 0;
517 		nzap = 0;
518 		NEXT;
519 
520 		/*
521 		 * Use an optimized loop to grab a consecutive sequence
522 		 * of nonzero digits quickly.
523 		 */
524 		nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids;
525 		for (nfast = 0, pfast = &(pd->ds[ids]);
526 		    nfast < nfastlimit && NZDIGIT(current);
527 		    nfast++) {
528 			*pfast++ = (char)current;
529 			NEXT;
530 		}
531 		ids += nfast;
532 		pd->exponent -= nfast << expshift;
533 		if (current == '0')
534 			goto zeroafterpoint;
535 		/* advance good to the last accepted digit */
536 		good = (current == EOF)? cp : cp - 1;
537 		goto afterpoint;
538 	} else {
539 		switch (current) {
540 		case ' ':
541 			if (fortran_conventions < 2)
542 				goto done;
543 			if (fortran_conventions == 2) {
544 				/*
545 				 * Treat a radix point followed by blanks
546 				 * but no digits as zero so we'll pass FCVS.
547 				 */
548 				if (sigfound == -1) {
549 					pd->fpclass = fp_zero;
550 					sigfound = 0;
551 				}
552 				while (current == ' ') {
553 					NEXT;
554 				}
555 				good = (current == EOF)? cp : cp - 1;
556 				goto afterpoint;
557 			}
558 			/*
559 			 * when fortran_conventions > 2, treat internal
560 			 * blanks the same as zeroes
561 			 */
562 			/*FALLTHRU*/
563 
564 		case '0':
565 			/* found a digit after the point; revise form */
566 			if (form == invalid_form || form == whitespace_form)
567 				form = fixed_dotfrac_form;
568 			else if (form == fixed_intdot_form)
569 				form = fixed_intdotfrac_form;
570 			if (sigfound == -1) {
571 				pd->fpclass = fp_zero;
572 				sigfound = 0;
573 			}
574 zeroafterpoint:
575 			/*
576 			 * Count zeros after the radix point.  If we find
577 			 * any more nonzero digits later, we will put these
578 			 * zeros into pd->ds and decrease pd->exponent by
579 			 * nzap.
580 			 */
581 			while (current == '0' || (current == ' ' &&
582 			    fortran_conventions > 2)) {
583 				nzap++;
584 				NEXT;
585 			}
586 			if (current == EOF) {
587 				good = cp;
588 				goto done;
589 			} else {
590 				good = cp - 1;
591 			}
592 			goto afterpoint;
593 
594 		case '+':
595 		case '-':
596 		case 'D':
597 		case 'd':
598 		case 'Q':
599 		case 'q':
600 			/*
601 			 * Only accept these as the start of the exponent
602 			 * field if fortran_conventions is positive.
603 			 */
604 			if (fortran_conventions <= 0)
605 				goto done;
606 			/*FALLTHRU*/
607 
608 		case 'E':
609 		case 'e':
610 			/* don't accept exponent without preceding digits */
611 			if (sigfound == -1 || (int)form < 0)
612 				goto done;
613 			break;
614 
615 		case 'P':
616 		case 'p':
617 			/* don't accept exponent without preceding digits */
618 			if (sigfound == -1 || (int)form > 0)
619 				goto done;
620 			break;
621 
622 		default:
623 			goto done;
624 		}
625 	}
626 
627 exponent:
628 	/*
629 	 * Set *pechar to point to the character that looks like the
630 	 * beginning of the exponent field, then attempt to parse it.
631 	 */
632 	*pechar = cp;
633 	if (current != '+' && current != '-') {
634 		/* skip the exponent character and following blanks */
635 		NEXT;
636 		if (fortran_conventions >= 2 && current == ' ') {
637 			while (current == ' ') {
638 				NEXT;
639 			}
640 			if (fortran_conventions > 2)
641 				good = (current == EOF)? cp : cp - 1;
642 		}
643 	}
644 
645 	e = 0;
646 	esign = 0;
647 
648 	/* look for optional exponent sign */
649 	if (current == '+') {
650 		NEXT;
651 	} else if (current == '-') {
652 		esign = 1;
653 		NEXT;
654 	}
655 
656 	/*
657 	 * Accumulate explicit exponent.  Note that if we don't find at
658 	 * least one digit, good won't be updated and e will remain 0.
659 	 * Also, we keep e from getting too large so we don't overflow
660 	 * the range of int (but notice that the threshold is large
661 	 * enough that any larger e would cause the result to underflow
662 	 * or overflow anyway).
663 	 */
664 	while (('0' <= current && current <= '9') || current == ' ') {
665 		if (current == ' ') {
666 			if (fortran_conventions < 2)
667 				break;
668 			if (fortran_conventions == 2) {
669 				NEXT;
670 				continue;
671 			}
672 			current = '0';
673 		}
674 		good = cp;
675 		if (e <= 1000000)
676 			e = 10 * e + current - '0';
677 		NEXT;
678 		if (fortran_conventions == 2 && current == ' ') {
679 			/* accept trailing blanks */
680 			while (current == ' ') {
681 				NEXT;
682 			}
683 			good = (current == EOF)? cp : cp - 1;
684 		}
685 	}
686 	if (esign == 1)
687 		pd->exponent -= e;
688 	else
689 		pd->exponent += e;
690 
691 	/*
692 	 * If we successfully parsed an exponent field, update form
693 	 * accordingly.  If we didn't, don't set *pechar.
694 	 */
695 	if (good >= *pechar) {
696 		switch (form) {
697 		case whitespace_form:
698 		case fixed_int_form:
699 			form = floating_int_form;
700 			break;
701 
702 		case fixed_intdot_form:
703 			form = floating_intdot_form;
704 			break;
705 
706 		case fixed_dotfrac_form:
707 			form = floating_dotfrac_form;
708 			break;
709 
710 		case fixed_intdotfrac_form:
711 			form = floating_intdotfrac_form;
712 			break;
713 		}
714 	} else {
715 		*pechar = NULL;
716 	}
717 
718 done:
719 	/*
720 	 * If we found any zeros before the radix point that were not
721 	 * accounted for earlier, adjust the exponent.  (This is only
722 	 * relevant when pd->fpclass == fp_normal, but it's harmless
723 	 * in all other cases.)
724 	 */
725 	pd->exponent += nzbp << expshift;
726 
727 	/* terminate pd->ds if we haven't already */
728 	if (ids < DECIMAL_STRING_LENGTH) {
729 		pd->ds[ids] = '\0';
730 		pd->ndigits = ids;
731 	}
732 
733 	/*
734 	 * If we accepted any characters, advance *ppc to point to the
735 	 * first character we didn't accept; otherwise, pass back a
736 	 * signaling nan.
737 	 */
738 	if (good >= *ppc) {
739 		*ppc = good + 1;
740 	} else {
741 		pd->fpclass = fp_signaling;
742 		pd->sign = 0;
743 		form = invalid_form;
744 	}
745 
746 	*pform = form;
747 }
748