xref: /illumos-gate/usr/src/lib/libc/port/fp/char_to_decimal.h (revision 4f2483e5d0c339c7ac30db66a67c108da0b33ca6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * This file contains the common part of the functions string_to_decimal,
29  * func_to_decimal, and file_to_decimal.  Much of this code has been dupli-
30  * cated in wstring_to_decimal (see wstod.c) with some simplifications and
31  * appropriate modifications for wide characters.  DO NOT fix a bug here
32  * without fixing the same bug in wstring_to_decimal, if it applies.
33  *
34  * The code below makes the following assumptions.
35  *
36  * 1. The first six parameters to the function are declared with the
37  *    following names and types:
38  *
39  *    char **ppc;
40  *    int nmax;
41  *    int fortran_conventions;
42  *    decimal_record *pd;
43  *    enum decimal_string_form *pform;
44  *    char **pechar;
45  *
46  * 2. Before this file is #included, the following variables have been
47  *    defined and initialized as shown:
48  *
49  *    char *cp;
50  *    char *good = *ppc - 1;
51  *    int current;
52  *    int nread;
53  *    locale_t loc;
54  *
55  *    If the first character can be read successfully, then current is set
56  *    to the value of the first character, cp is set to *ppc, (char)current
57  *    is stored at *cp, and nread = 1.  If the first character cannot be
58  *    read successfully, then current = EOF and nread = 0.
59  *    loc should be set to the desired locale in which to perform the
60  *    conversion.
61  *
62  * 3. The macro NEXT is defined to expand to code that implements
63  *    the following logic:
64  *
65  *      if (nread < nmax) {
66  *          current = <next character>;
67  *          if (current != EOF) {
68  *             *++cp = (char)current;
69  *             nread++;
70  *          }
71  *      } else
72  *          current = EOF;
73  *
74  *    Note that nread always reflects the number of characters successfully
75  *    read, the buffer pointed to by *ppc gets filled only with characters
76  *    that have been successfully read, and cp always points to the location
77  *    in the buffer that was filled by the last character successfully read.
78  *    current == EOF if and only if we can't read any more, either because
79  *    we've reached the end of the input file or the buffer is full (i.e.,
80  *    we've read nmax characters).
81  *
82  * 4. After this file is #included, the following variables may be used
83  *    and will have the specified values:
84  *
85  *    *ppc, *pd, *pform, and *pechar will be set as documented in the
86  *      manual page;
87  *    nmax and fortran_conventions will be unchanged;
88  *    nread will be the number of characters actually read;
89  *    cp will point to the last character actually read, provided at least
90  *      one character was read successfully (in which case cp >= *ppc).
91  */
92 
93 #define	UCASE(c) ((('a' <= c) && (c <= 'z'))? c - 32 : c)
94 
95 #define	NZDIGIT(c)	(('1' <= c && c <= '9') || ((int)form < 0 && \
96 			(('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'))))
97 
98 {
99 	static const char    *infstring = "INFINITY";
100 	static const char    *nanstring = "NAN";
101 
102 	int	sigfound, spacefound = 0;
103 	int	ids = 0;
104 	int	i, agree;
105 	int	nzbp = 0; /* number of zeros before point */
106 	int	nzap = 0; /* number of zeros after point */
107 	char	decpt;
108 	int	nfast, nfastlimit;
109 	char	*pfast;
110 	int	e, esign;
111 	int	expshift = 0;
112 	enum decimal_string_form	form;
113 
114 	/*
115 	 * This routine assumes that the radix point is a single
116 	 * ASCII character, so that following this assignment, the
117 	 * condition (current == decpt) will correctly detect it.
118 	 */
119 	if (fortran_conventions > 0)
120 		decpt = '.';
121 	else
122 		decpt = *(localeconv_l(loc)->decimal_point);
123 
124 	/* input is invalid until we find something */
125 	pd->fpclass = fp_signaling;
126 	pd->sign = 0;
127 	pd->exponent = 0;
128 	pd->ds[0] = '\0';
129 	pd->more = 0;
130 	pd->ndigits = 0;
131 	*pform = form = invalid_form;
132 	*pechar = NULL;
133 
134 	/* skip white space */
135 	while (isspace_l(current, loc)) {
136 		spacefound = 1;
137 		NEXT;
138 	}
139 
140 	if (fortran_conventions >= 2 && spacefound) {
141 		/*
142 		 * We found at least one white space character.  For
143 		 * Fortran formatted input, accept this; if we don't
144 		 * find anything else, we'll interpret it as a valid zero.
145 		 */
146 		pd->fpclass = fp_zero;
147 		form = whitespace_form;
148 		sigfound = 0;		/* 0 = only zeros found so far */
149 		if (current == EOF) {
150 			good = cp;
151 			goto done;
152 		} else {
153 			good = cp - 1;
154 		}
155 	} else {
156 		sigfound = -1;		/* -1 = no digits found yet */
157 	}
158 
159 	/* look for optional leading sign */
160 	if (current == '+') {
161 		NEXT;
162 	} else if (current == '-') {
163 		pd->sign = 1;
164 		NEXT;
165 	}
166 
167 	/*
168 	 * Admissible first non-white-space, non-sign characters are
169 	 * 0-9, i, I, n, N, or the radix point.
170 	 */
171 	if ('1' <= current && current <= '9') {
172 		good = cp;
173 		pd->fpclass = fp_normal;
174 		form = fixed_int_form;
175 		sigfound = 1;		/* 1 = significant digits found */
176 		pd->ds[ids++] = (char)current;
177 		NEXT;
178 	} else {
179 		switch (current) {
180 		case ' ':
181 			if (fortran_conventions < 2)
182 				goto done;
183 			/*
184 			 * When fortran_conventions >= 2, treat leading
185 			 * blanks the same as leading zeroes.
186 			 */
187 			/*FALLTHRU*/
188 
189 		case '0':
190 			/*
191 			 * Accept the leading zero and set pd->fpclass
192 			 * accordingly, but don't set sigfound until we
193 			 * determine that this isn't a "fake" hex string
194 			 * (i.e., 0x.p...).
195 			 */
196 			good = cp;
197 			pd->fpclass = fp_zero;
198 			if (fortran_conventions < 0) {
199 				/* look for a hex fp string */
200 				NEXT;
201 				if (current == 'X' || current == 'x') {
202 					/* assume hex fp form */
203 					form = (enum decimal_string_form)-1;
204 					expshift = 2;
205 					NEXT;
206 					/*
207 					 * Only a digit or radix point can
208 					 * follow "0x".
209 					 */
210 					if (NZDIGIT(current)) {
211 						pd->fpclass = fp_normal;
212 						good = cp;
213 						sigfound = 1;
214 						pd->ds[ids++] = (char)current;
215 						NEXT;
216 						break;
217 					} else if (current == decpt) {
218 						NEXT;
219 						goto afterpoint;
220 					} else if (current != '0') {
221 						/* not hex fp after all */
222 						form = fixed_int_form;
223 						expshift = 0;
224 						goto done;
225 					}
226 				} else {
227 					form = fixed_int_form;
228 				}
229 			} else {
230 				form = fixed_int_form;
231 			}
232 
233 			/* skip all leading zeros */
234 			while (current == '0' || (current == ' ' &&
235 			    fortran_conventions >= 2)) {
236 				NEXT;
237 			}
238 			sigfound = 0;	/* 0 = only zeros found so far */
239 			if (current == EOF) {
240 				good = cp;
241 				goto done;
242 			} else {
243 				good = cp - 1;
244 			}
245 			break;
246 
247 		case 'i':
248 		case 'I':
249 			/* look for inf or infinity */
250 			NEXT;
251 			agree = 1;
252 			while (agree <= 7 &&
253 			    UCASE(current) == infstring[agree]) {
254 				NEXT;
255 				agree++;
256 			}
257 			if (agree < 3)
258 				goto done;
259 			/* found valid infinity */
260 			pd->fpclass = fp_infinity;
261 			sigfound = 1;
262 			__inf_read = 1;
263 			if (agree < 8) {
264 				good = (current == EOF)? cp + 3 - agree :
265 				    cp + 2 - agree;
266 				form = inf_form;
267 			} else {
268 				good = (current == EOF)? cp : cp - 1;
269 				form = infinity_form;
270 			}
271 			/*
272 			 * Accept trailing blanks if no extra characters
273 			 * intervene.
274 			 */
275 			if (fortran_conventions >= 2 && (agree == 3 ||
276 			    agree == 8)) {
277 				while (current == ' ') {
278 					NEXT;
279 				}
280 				good = (current == EOF)? cp : cp - 1;
281 			}
282 			goto done;
283 
284 		case 'n':
285 		case 'N':
286 			/* look for nan or nan(string) */
287 			NEXT;
288 			agree = 1;
289 			while (agree <= 2 &&
290 			    UCASE(current) == nanstring[agree]) {
291 				NEXT;
292 				agree++;
293 			}
294 			if (agree < 3)
295 				goto done;
296 			/* found valid NaN */
297 			good = (current == EOF)? cp : cp - 1;
298 			pd->fpclass = fp_quiet;
299 			form = nan_form;
300 			sigfound = 1;
301 			__nan_read = 1;
302 			if (current == '(') {
303 				/* accept parenthesized string */
304 				NEXT;
305 				if (fortran_conventions < 0) {
306 					while ((isalnum_l(current, loc) ||
307 					    current == '_') &&
308 					    ids < DECIMAL_STRING_LENGTH - 1) {
309 						pd->ds[ids++] = (char)current;
310 						NEXT;
311 					}
312 					while (isalnum_l(current, loc) ||
313 					    current == '_') {
314 						pd->more = 1;
315 						NEXT;
316 					}
317 				} else {
318 					while (current > 0 && current != ')' &&
319 					    ids < DECIMAL_STRING_LENGTH - 1) {
320 						pd->ds[ids++] = (char)current;
321 						NEXT;
322 					}
323 					while (current > 0 && current != ')') {
324 						pd->more = 1;
325 						NEXT;
326 					}
327 				}
328 				if (current != ')')
329 					goto done;
330 				good = cp;
331 				form = nanstring_form;
332 				/* prepare for loop below */
333 				if (fortran_conventions >= 2) {
334 					NEXT;
335 				}
336 			}
337 			/* accept trailing blanks */
338 			if (fortran_conventions >= 2) {
339 				while (current == ' ') {
340 					NEXT;
341 				}
342 				good = (current == EOF)? cp : cp - 1;
343 			}
344 			goto done;
345 
346 		default:
347 			if (current == decpt) {
348 				/*
349 				 * Don't accept the radix point just yet;
350 				 * we need to see at least one digit.
351 				 */
352 				NEXT;
353 				goto afterpoint;
354 			}
355 			goto done;
356 		}
357 	}
358 
359 nextnumber:
360 	/*
361 	 * Admissible characters after the first digit are a valid digit,
362 	 * an exponent delimiter (E or e for any decimal form; +, -, D, d,
363 	 * Q, or q when fortran_conventions >= 2; P or p for hex form),
364 	 * or the radix point.  (Note that we can't get here unless we've
365 	 * already found a digit.)
366 	 */
367 	if (NZDIGIT(current)) {
368 		/*
369 		 * Found another nonzero digit.  If there's enough room
370 		 * in pd->ds, store any intervening zeros we've found so far
371 		 * and then store this digit.  Otherwise, stop storing
372 		 * digits in pd->ds and set pd->more.
373 		 */
374 		if (ids + nzbp + 2 < DECIMAL_STRING_LENGTH) {
375 			for (i = 0; i < nzbp; i++)
376 				pd->ds[ids++] = '0';
377 			pd->ds[ids++] = (char)current;
378 		} else {
379 			pd->exponent += (nzbp + 1) << expshift;
380 			pd->more = 1;
381 			if (ids < DECIMAL_STRING_LENGTH) {
382 				pd->ds[ids] = '\0';
383 				pd->ndigits = ids;
384 				/* don't store any more digits */
385 				ids = DECIMAL_STRING_LENGTH;
386 			}
387 		}
388 		pd->fpclass = fp_normal;
389 		sigfound = 1;
390 		nzbp = 0;
391 		NEXT;
392 
393 		/*
394 		 * Use an optimized loop to grab a consecutive sequence
395 		 * of nonzero digits quickly.
396 		 */
397 		nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids;
398 		for (nfast = 0, pfast = &(pd->ds[ids]);
399 		    nfast < nfastlimit && NZDIGIT(current);
400 		    nfast++) {
401 			*pfast++ = (char)current;
402 			NEXT;
403 		}
404 		ids += nfast;
405 		if (current == '0')
406 			goto nextnumberzero;	/* common case */
407 		/* advance good to the last accepted digit */
408 		good = (current == EOF)? cp : cp - 1;
409 		goto nextnumber;
410 	} else {
411 		switch (current) {
412 		case ' ':
413 			if (fortran_conventions < 2)
414 				goto done;
415 			if (fortran_conventions == 2) {
416 				while (current == ' ') {
417 					NEXT;
418 				}
419 				good = (current == EOF)? cp : cp - 1;
420 				goto nextnumber;
421 			}
422 			/*
423 			 * When fortran_conventions > 2, treat internal
424 			 * blanks the same as zeroes.
425 			 */
426 			/*FALLTHRU*/
427 
428 		case '0':
429 nextnumberzero:
430 			/*
431 			 * Count zeros before the radix point.  Later we
432 			 * will either put these zeros into pd->ds or add
433 			 * nzbp to pd->exponent to account for them.
434 			 */
435 			while (current == '0' || (current == ' ' &&
436 			    fortran_conventions > 2)) {
437 				nzbp++;
438 				NEXT;
439 			}
440 			good = (current == EOF)? cp : cp - 1;
441 			goto nextnumber;
442 
443 		case '+':
444 		case '-':
445 		case 'D':
446 		case 'd':
447 		case 'Q':
448 		case 'q':
449 			/*
450 			 * Only accept these as the start of the exponent
451 			 * field if fortran_conventions is positive.
452 			 */
453 			if (fortran_conventions <= 0)
454 				goto done;
455 			/*FALLTHRU*/
456 
457 		case 'E':
458 		case 'e':
459 			if ((int)form < 0)
460 				goto done;
461 			goto exponent;
462 
463 		case 'P':
464 		case 'p':
465 			if ((int)form > 0)
466 				goto done;
467 			goto exponent;
468 
469 		default:
470 			if (current == decpt) {
471 				/* accept the radix point */
472 				good = cp;
473 				if (form == fixed_int_form)
474 					form = fixed_intdot_form;
475 				NEXT;
476 				goto afterpoint;
477 			}
478 			goto done;
479 		}
480 	}
481 
482 afterpoint:
483 	/*
484 	 * Admissible characters after the radix point are a valid digit
485 	 * or an exponent delimiter.  (Note that it is possible to get
486 	 * here even though we haven't found any digits yet.)
487 	 */
488 	if (NZDIGIT(current)) {
489 		/* found a digit after the point; revise form */
490 		if (form == invalid_form || form == whitespace_form)
491 			form = fixed_dotfrac_form;
492 		else if (form == fixed_intdot_form)
493 			form = fixed_intdotfrac_form;
494 		good = cp;
495 		if (sigfound < 1) {
496 			/* no significant digits found until now */
497 			pd->fpclass = fp_normal;
498 			sigfound = 1;
499 			pd->ds[ids++] = (char)current;
500 			pd->exponent = (-(nzap + 1)) << expshift;
501 		} else {
502 			/* significant digits have been found */
503 			if (ids + nzbp + nzap + 2 < DECIMAL_STRING_LENGTH) {
504 				for (i = 0; i < nzbp + nzap; i++)
505 					pd->ds[ids++] = '0';
506 				pd->ds[ids++] = (char)current;
507 				pd->exponent -= (nzap + 1) << expshift;
508 			} else {
509 				pd->exponent += nzbp << expshift;
510 				pd->more = 1;
511 				if (ids < DECIMAL_STRING_LENGTH) {
512 					pd->ds[ids] = '\0';
513 					pd->ndigits = ids;
514 					/* don't store any more digits */
515 					ids = DECIMAL_STRING_LENGTH;
516 				}
517 			}
518 		}
519 		nzbp = 0;
520 		nzap = 0;
521 		NEXT;
522 
523 		/*
524 		 * Use an optimized loop to grab a consecutive sequence
525 		 * of nonzero digits quickly.
526 		 */
527 		nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids;
528 		for (nfast = 0, pfast = &(pd->ds[ids]);
529 		    nfast < nfastlimit && NZDIGIT(current);
530 		    nfast++) {
531 			*pfast++ = (char)current;
532 			NEXT;
533 		}
534 		ids += nfast;
535 		pd->exponent -= nfast << expshift;
536 		if (current == '0')
537 			goto zeroafterpoint;
538 		/* advance good to the last accepted digit */
539 		good = (current == EOF)? cp : cp - 1;
540 		goto afterpoint;
541 	} else {
542 		switch (current) {
543 		case ' ':
544 			if (fortran_conventions < 2)
545 				goto done;
546 			if (fortran_conventions == 2) {
547 				/*
548 				 * Treat a radix point followed by blanks
549 				 * but no digits as zero so we'll pass FCVS.
550 				 */
551 				if (sigfound == -1) {
552 					pd->fpclass = fp_zero;
553 					sigfound = 0;
554 				}
555 				while (current == ' ') {
556 					NEXT;
557 				}
558 				good = (current == EOF)? cp : cp - 1;
559 				goto afterpoint;
560 			}
561 			/*
562 			 * when fortran_conventions > 2, treat internal
563 			 * blanks the same as zeroes
564 			 */
565 			/*FALLTHRU*/
566 
567 		case '0':
568 			/* found a digit after the point; revise form */
569 			if (form == invalid_form || form == whitespace_form)
570 				form = fixed_dotfrac_form;
571 			else if (form == fixed_intdot_form)
572 				form = fixed_intdotfrac_form;
573 			if (sigfound == -1) {
574 				pd->fpclass = fp_zero;
575 				sigfound = 0;
576 			}
577 zeroafterpoint:
578 			/*
579 			 * Count zeros after the radix point.  If we find
580 			 * any more nonzero digits later, we will put these
581 			 * zeros into pd->ds and decrease pd->exponent by
582 			 * nzap.
583 			 */
584 			while (current == '0' || (current == ' ' &&
585 			    fortran_conventions > 2)) {
586 				nzap++;
587 				NEXT;
588 			}
589 			if (current == EOF) {
590 				good = cp;
591 				goto done;
592 			} else {
593 				good = cp - 1;
594 			}
595 			goto afterpoint;
596 
597 		case '+':
598 		case '-':
599 		case 'D':
600 		case 'd':
601 		case 'Q':
602 		case 'q':
603 			/*
604 			 * Only accept these as the start of the exponent
605 			 * field if fortran_conventions is positive.
606 			 */
607 			if (fortran_conventions <= 0)
608 				goto done;
609 			/*FALLTHRU*/
610 
611 		case 'E':
612 		case 'e':
613 			/* don't accept exponent without preceding digits */
614 			if (sigfound == -1 || (int)form < 0)
615 				goto done;
616 			break;
617 
618 		case 'P':
619 		case 'p':
620 			/* don't accept exponent without preceding digits */
621 			if (sigfound == -1 || (int)form > 0)
622 				goto done;
623 			break;
624 
625 		default:
626 			goto done;
627 		}
628 	}
629 
630 exponent:
631 	/*
632 	 * Set *pechar to point to the character that looks like the
633 	 * beginning of the exponent field, then attempt to parse it.
634 	 */
635 	*pechar = cp;
636 	if (current != '+' && current != '-') {
637 		/* skip the exponent character and following blanks */
638 		NEXT;
639 		if (fortran_conventions >= 2 && current == ' ') {
640 			while (current == ' ') {
641 				NEXT;
642 			}
643 			if (fortran_conventions > 2)
644 				good = (current == EOF)? cp : cp - 1;
645 		}
646 	}
647 
648 	e = 0;
649 	esign = 0;
650 
651 	/* look for optional exponent sign */
652 	if (current == '+') {
653 		NEXT;
654 	} else if (current == '-') {
655 		esign = 1;
656 		NEXT;
657 	}
658 
659 	/*
660 	 * Accumulate explicit exponent.  Note that if we don't find at
661 	 * least one digit, good won't be updated and e will remain 0.
662 	 * Also, we keep e from getting too large so we don't overflow
663 	 * the range of int (but notice that the threshold is large
664 	 * enough that any larger e would cause the result to underflow
665 	 * or overflow anyway).
666 	 */
667 	while (('0' <= current && current <= '9') || current == ' ') {
668 		if (current == ' ') {
669 			if (fortran_conventions < 2)
670 				break;
671 			if (fortran_conventions == 2) {
672 				NEXT;
673 				continue;
674 			}
675 			current = '0';
676 		}
677 		good = cp;
678 		if (e <= 1000000)
679 			e = 10 * e + current - '0';
680 		NEXT;
681 		if (fortran_conventions == 2 && current == ' ') {
682 			/* accept trailing blanks */
683 			while (current == ' ') {
684 				NEXT;
685 			}
686 			good = (current == EOF)? cp : cp - 1;
687 		}
688 	}
689 	if (esign == 1)
690 		pd->exponent -= e;
691 	else
692 		pd->exponent += e;
693 
694 	/*
695 	 * If we successfully parsed an exponent field, update form
696 	 * accordingly.  If we didn't, don't set *pechar.
697 	 */
698 	if (good >= *pechar) {
699 		switch (form) {
700 		case whitespace_form:
701 		case fixed_int_form:
702 			form = floating_int_form;
703 			break;
704 
705 		case fixed_intdot_form:
706 			form = floating_intdot_form;
707 			break;
708 
709 		case fixed_dotfrac_form:
710 			form = floating_dotfrac_form;
711 			break;
712 
713 		case fixed_intdotfrac_form:
714 			form = floating_intdotfrac_form;
715 			break;
716 		}
717 	} else {
718 		*pechar = NULL;
719 	}
720 
721 done:
722 	/*
723 	 * If we found any zeros before the radix point that were not
724 	 * accounted for earlier, adjust the exponent.  (This is only
725 	 * relevant when pd->fpclass == fp_normal, but it's harmless
726 	 * in all other cases.)
727 	 */
728 	pd->exponent += nzbp << expshift;
729 
730 	/* terminate pd->ds if we haven't already */
731 	if (ids < DECIMAL_STRING_LENGTH) {
732 		pd->ds[ids] = '\0';
733 		pd->ndigits = ids;
734 	}
735 
736 	/*
737 	 * If we accepted any characters, advance *ppc to point to the
738 	 * first character we didn't accept; otherwise, pass back a
739 	 * signaling nan.
740 	 */
741 	if (good >= *ppc) {
742 		*ppc = good + 1;
743 	} else {
744 		pd->fpclass = fp_signaling;
745 		pd->sign = 0;
746 		form = invalid_form;
747 	}
748 
749 	*pform = form;
750 }
751