xref: /illumos-gate/usr/src/lib/libc/port/i18n/wstod.c (revision a30583cb7f21a4667897c305d2bb4bacd936d85f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1988 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*
31  * This file is based on /usr/src/lib/libc/port/gen/strtod.c and
32  * /usr/src/lib/libc/sparc/fp/string_decim.c
33  */
34 
35 #pragma weak _wcstod = wcstod
36 #pragma weak _wstod = wstod
37 
38 #include "lint.h"
39 #include <errno.h>
40 #include <stdio.h>
41 #include <values.h>
42 #include <floatingpoint.h>
43 #include <stddef.h>
44 #include <wctype.h>
45 #include "base_conversion.h"	/* from usr/src/lib/libc/inc */
46 #include <locale.h>
47 #include "libc.h"
48 #include "xpg6.h"
49 
50 static void wstring_to_decimal(const wchar_t **, int, decimal_record *, int *);
51 
52 double
53 wcstod(const wchar_t *cp, wchar_t **ptr)
54 {
55 	double		x;
56 	decimal_mode	mr;
57 	decimal_record	dr;
58 	fp_exception_field_type fs;
59 	int 		form;
60 
61 	wstring_to_decimal(&cp, __xpg6 & _C99SUSv3_recognize_hexfp, &dr, &form);
62 	if (ptr != NULL)
63 		*ptr = (wchar_t *)cp;
64 	if (form == 0)
65 		return (0.0);	/* Shameful kluge for SVID's sake. */
66 #if defined(__i386) || defined(__amd64)
67 	mr.rd = __xgetRD();
68 #elif defined(__sparc)
69 	mr.rd = _QgetRD();
70 #else
71 #error Unknown architecture!
72 #endif
73 	if (form < 0)
74 		__hex_to_double(&dr, mr.rd, &x, &fs);
75 	else
76 		decimal_to_double(&x, &mr, &dr, &fs);
77 	if (fs & ((1 << fp_overflow) | (1 << fp_underflow)))
78 		errno = ERANGE;
79 	return (x);
80 }
81 
82 float
83 wcstof(const wchar_t *cp, wchar_t **ptr)
84 {
85 	float		x;
86 	decimal_mode	mr;
87 	decimal_record	dr;
88 	fp_exception_field_type fs;
89 	int		form;
90 
91 	wstring_to_decimal(&cp, 1, &dr, &form);
92 	if (ptr != NULL)
93 		*ptr = (wchar_t *)cp;
94 	if (form == 0)
95 		return (0.0f);
96 #if defined(__i386) || defined(__amd64)
97 	mr.rd = __xgetRD();
98 #elif defined(__sparc)
99 	mr.rd = _QgetRD();
100 #else
101 #error Unknown architecture!
102 #endif
103 	if (form < 0)
104 		__hex_to_single(&dr, mr.rd, &x, &fs);
105 	else
106 		decimal_to_single(&x, &mr, &dr, &fs);
107 	if (fs & ((1 << fp_overflow) | (1 << fp_underflow)))
108 		errno = ERANGE;
109 	return (x);
110 }
111 
112 long double
113 wcstold(const wchar_t *cp, wchar_t **ptr)
114 {
115 	long double	x;
116 	decimal_mode	mr;
117 	decimal_record	dr;
118 	fp_exception_field_type fs;
119 	int		form;
120 
121 	wstring_to_decimal(&cp, 1, &dr, &form);
122 	if (ptr != NULL)
123 		*ptr = (wchar_t *)cp;
124 	if (form == 0)
125 		return (0.0L);
126 #if defined(__i386) || defined(__amd64)
127 	mr.rd = __xgetRD();
128 	if (form < 0)
129 		__hex_to_extended(&dr, mr.rd, (extended *)&x, &fs);
130 	else
131 		decimal_to_extended((extended *)&x, &mr, &dr, &fs);
132 #elif defined(__sparc)
133 	mr.rd = _QgetRD();
134 	if (form < 0)
135 		__hex_to_quadruple(&dr, mr.rd, &x, &fs);
136 	else
137 		decimal_to_quadruple(&x, &mr, &dr, &fs);
138 #else
139 #error Unknown architecture!
140 #endif
141 	if (fs & ((1 << fp_overflow) | (1 << fp_underflow)))
142 		errno = ERANGE;
143 	return (x);
144 }
145 
146 double
147 wstod(const wchar_t *cp, wchar_t **ptr)
148 {
149 	return (wcstod(cp, ptr));
150 }
151 
152 static const char *infstring = "INFINITY";
153 static const char *nanstring = "NAN";
154 
155 /*
156  * The following macro is applied to wchar_t arguments solely for the
157  * purpose of comparing the result with one of the characters in the
158  * strings above.
159  */
160 #define	UCASE(c)	(((L'a' <= c) && (c <= L'z'))? c - 32 : c)
161 
162 /*
163  * The following macro yields an expression that is true whenever
164  * the argument is a valid nonzero digit for the form being parsed.
165  */
166 #define	NZDIGIT(c)	((L'1' <= c && c <= L'9') || (form < 0 && \
167 			((L'a' <= c && c <= L'f') || (L'A' <= c && c <= L'F'))))
168 
169 /*
170  * wstring_to_decimal is modelled on string_to_decimal, the majority
171  * of which can be found in the common file char_to_decimal.h.  The
172  * significant differences are:
173  *
174  * 1. This code recognizes only C99 (hex fp strings and restricted
175  *    characters in parentheses following "nan") vs. C90 modes, no
176  *    Fortran conventions.
177  *
178  * 2. *pform is an int rather than an enum decimal_string_form.  On
179  *    return, *pform == 0 if no valid token was found, *pform < 0
180  *    if a C99 hex fp string was found, and *pform > 0 if a decimal
181  *    string was found.
182  */
183 static void
184 wstring_to_decimal(const wchar_t **ppc, int c99, decimal_record *pd,
185     int *pform)
186 {
187 	const wchar_t	*cp = *ppc; /* last character seen */
188 	const wchar_t	*good = cp - 1;	/* last character accepted */
189 	wchar_t		current; /* always equal to *cp */
190 	int		sigfound;
191 	int		ids = 0;
192 	int		i, agree;
193 	int		nzbp = 0; /* number of zeros before point */
194 	int		nzap = 0; /* number of zeros after point */
195 	char		decpt;
196 	int		nfast, nfastlimit;
197 	char		*pfast;
198 	int		e, esign;
199 	int		expshift = 0;
200 	int		form;
201 
202 	/*
203 	 * This routine assumes that the radix point is a single
204 	 * ASCII character, so that following this assignment, the
205 	 * condition (current == decpt) will correctly detect it.
206 	 */
207 	decpt = *(localeconv()->decimal_point);
208 
209 	/* input is invalid until we find something */
210 	pd->fpclass = fp_signaling;
211 	pd->sign = 0;
212 	pd->exponent = 0;
213 	pd->ds[0] = '\0';
214 	pd->more = 0;
215 	pd->ndigits = 0;
216 	*pform = form = 0;
217 
218 	/* skip white space */
219 	current = *cp;
220 	while (iswspace((wint_t)current))
221 		current = *++cp;
222 
223 	/* look for optional leading sign */
224 	if (current == L'+') {
225 		current = *++cp;
226 	} else if (current == L'-') {
227 		pd->sign = 1;
228 		current = *++cp;
229 	}
230 
231 	sigfound = -1;		/* -1 = no digits found yet */
232 
233 	/*
234 	 * Admissible first non-white-space, non-sign characters are
235 	 * 0-9, i, I, n, N, or the radix point.
236 	 */
237 	if (L'1' <= current && current <= L'9') {
238 		pd->fpclass = fp_normal;
239 		form = 1;
240 		good = cp;
241 		sigfound = 1;	/* 1 = significant digits found */
242 		pd->ds[ids++] = (char)current;
243 		current = *++cp;
244 	} else {
245 		switch (current) {
246 		case L'0':
247 			/*
248 			 * Accept the leading zero and set pd->fpclass
249 			 * accordingly, but don't set sigfound until we
250 			 * determine that this isn't a "fake" hex string
251 			 * (i.e., 0x.p...).
252 			 */
253 			good = cp;
254 			pd->fpclass = fp_zero;
255 			if (c99) {
256 				/* look for a hex fp string */
257 				current = *++cp;
258 				if (current == L'X' || current == L'x') {
259 					/* assume hex fp form */
260 					form = -1;
261 					expshift = 2;
262 					current = *++cp;
263 					/*
264 					 * Only a digit or radix point can
265 					 * follow "0x".
266 					 */
267 					if (NZDIGIT(current)) {
268 						pd->fpclass = fp_normal;
269 						good = cp;
270 						sigfound = 1;
271 						pd->ds[ids++] = (char)current;
272 						current = *++cp;
273 						break;
274 					} else if (current == (wchar_t)decpt) {
275 						current = *++cp;
276 						goto afterpoint;
277 					} else if (current != L'0') {
278 						/* not hex fp after all */
279 						form = 1;
280 						expshift = 0;
281 						goto done;
282 					}
283 				} else {
284 					form = 1;
285 				}
286 			} else {
287 				form = 1;
288 			}
289 
290 			/* skip all leading zeros */
291 			while (current == L'0')
292 				current = *++cp;
293 			good = cp - 1;
294 			sigfound = 0;	/* 0 = only zeros found so far */
295 			break;
296 
297 		case L'i':
298 		case L'I':
299 			/* look for inf or infinity */
300 			current = *++cp;
301 			agree = 1;
302 			while (agree <= 7 &&
303 			    UCASE(current) == (wchar_t)infstring[agree]) {
304 				current = *++cp;
305 				agree++;
306 			}
307 			if (agree >= 3) {
308 				/* found valid infinity */
309 				pd->fpclass = fp_infinity;
310 				form = 1;
311 				good = (agree < 8)? cp + 2 - agree : cp - 1;
312 				__inf_read = 1;
313 			}
314 			goto done;
315 
316 		case L'n':
317 		case L'N':
318 			/* look for nan or nan(string) */
319 			current = *++cp;
320 			agree = 1;
321 			while (agree <= 2 &&
322 			    UCASE(current) == (wchar_t)nanstring[agree]) {
323 				current = *++cp;
324 				agree++;
325 			}
326 			if (agree == 3) {
327 				/* found valid NaN */
328 				pd->fpclass = fp_quiet;
329 				form = 1;
330 				good = cp - 1;
331 				__nan_read = 1;
332 				if (current == L'(') {
333 					/* accept parenthesized string */
334 					if (c99) {
335 						do {
336 							current = *++cp;
337 						} while (iswalnum(current) ||
338 						    current == L'_');
339 					} else {
340 						do {
341 							current = *++cp;
342 						} while (current &&
343 						    current != L')');
344 					}
345 					if (current == L')')
346 						good = cp;
347 				}
348 			}
349 			goto done;
350 
351 		default:
352 			if (current == (wchar_t)decpt) {
353 				/*
354 				 * Don't accept the radix point just yet;
355 				 * we need to see at least one digit.
356 				 */
357 				current = *++cp;
358 				goto afterpoint;
359 			}
360 			goto done;
361 		}
362 	}
363 
364 nextnumber:
365 	/*
366 	 * Admissible characters after the first digit are a valid
367 	 * digit, an exponent delimiter (E or e for decimal form,
368 	 * P or p for hex form), or the radix point.  (Note that we
369 	 * can't get here unless we've already found a digit.)
370 	 */
371 	if (NZDIGIT(current)) {
372 		/*
373 		 * Found another nonzero digit.  If there's enough room
374 		 * in pd->ds, store any intervening zeros we've found so far
375 		 * and then store this digit.  Otherwise, stop storing
376 		 * digits in pd->ds and set pd->more.
377 		 */
378 		if (ids + nzbp + 2 < DECIMAL_STRING_LENGTH) {
379 			for (i = 0; i < nzbp; i++)
380 				pd->ds[ids++] = '0';
381 			pd->ds[ids++] = (char)current;
382 		} else {
383 			pd->exponent += (nzbp + 1) << expshift;
384 			pd->more = 1;
385 			if (ids < DECIMAL_STRING_LENGTH) {
386 				pd->ds[ids] = '\0';
387 				pd->ndigits = ids;
388 				/* don't store any more digits */
389 				ids = DECIMAL_STRING_LENGTH;
390 			}
391 		}
392 		pd->fpclass = fp_normal;
393 		sigfound = 1;
394 		nzbp = 0;
395 		current = *++cp;
396 
397 		/*
398 		 * Use an optimized loop to grab a consecutive sequence
399 		 * of nonzero digits quickly.
400 		 */
401 		nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids;
402 		for (nfast = 0, pfast = &(pd->ds[ids]);
403 		    nfast < nfastlimit && NZDIGIT(current);
404 		    nfast++) {
405 			*pfast++ = (char)current;
406 			current = *++cp;
407 		}
408 		ids += nfast;
409 		if (current == L'0')
410 			goto nextnumberzero;	/* common case */
411 		/* advance good to the last accepted digit */
412 		good = cp - 1;
413 		goto nextnumber;
414 	} else {
415 		switch (current) {
416 		case L'0':
417 nextnumberzero:
418 			/*
419 			 * Count zeros before the radix point.  Later we
420 			 * will either put these zeros into pd->ds or add
421 			 * nzbp to pd->exponent to account for them.
422 			 */
423 			while (current == L'0') {
424 				nzbp++;
425 				current = *++cp;
426 			}
427 			good = cp - 1;
428 			goto nextnumber;
429 
430 		case L'E':
431 		case L'e':
432 			if (form < 0)
433 				goto done;
434 			goto exponent;
435 
436 		case L'P':
437 		case L'p':
438 			if (form > 0)
439 				goto done;
440 			goto exponent;
441 
442 		default:
443 			if (current == decpt) {
444 				/* accept the radix point */
445 				good = cp;
446 				current = *++cp;
447 				goto afterpoint;
448 			}
449 			goto done;
450 		}
451 	}
452 
453 afterpoint:
454 	/*
455 	 * Admissible characters after the radix point are a valid digit
456 	 * or an exponent delimiter.  (Note that it is possible to get
457 	 * here even though we haven't found any digits yet.)
458 	 */
459 	if (NZDIGIT(current)) {
460 		if (form == 0)
461 			form = 1;
462 		if (sigfound < 1) {
463 			/* no significant digits found until now */
464 			pd->fpclass = fp_normal;
465 			sigfound = 1;
466 			pd->ds[ids++] = (char)current;
467 			pd->exponent = (-(nzap + 1)) << expshift;
468 		} else {
469 			/* significant digits have been found */
470 			if (ids + nzbp + nzap + 2 < DECIMAL_STRING_LENGTH) {
471 				for (i = 0; i < nzbp + nzap; i++)
472 					pd->ds[ids++] = '0';
473 				pd->ds[ids++] = (char)current;
474 				pd->exponent -= (nzap + 1) << expshift;
475 			} else {
476 				pd->exponent += nzbp << expshift;
477 				pd->more = 1;
478 				if (ids < DECIMAL_STRING_LENGTH) {
479 					pd->ds[ids] = '\0';
480 					pd->ndigits = ids;
481 					/* don't store any more digits */
482 					ids = DECIMAL_STRING_LENGTH;
483 				}
484 			}
485 		}
486 		nzbp = 0;
487 		nzap = 0;
488 		current = *++cp;
489 
490 		/*
491 		 * Use an optimized loop to grab a consecutive sequence
492 		 * of nonzero digits quickly.
493 		 */
494 		nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids;
495 		for (nfast = 0, pfast = &(pd->ds[ids]);
496 		    nfast < nfastlimit && NZDIGIT(current);
497 		    nfast++) {
498 			*pfast++ = (char)current;
499 			current = *++cp;
500 		}
501 		ids += nfast;
502 		pd->exponent -= nfast << expshift;
503 		if (current == L'0')
504 			goto zeroafterpoint;
505 		/* advance good to the last accepted digit */
506 		good = cp - 1;
507 		goto afterpoint;
508 	} else {
509 		switch (current) {
510 		case L'0':
511 			if (form == 0)
512 				form = 1;
513 			if (sigfound == -1) {
514 				pd->fpclass = fp_zero;
515 				sigfound = 0;
516 			}
517 zeroafterpoint:
518 			/*
519 			 * Count zeros after the radix point.  If we find
520 			 * any more nonzero digits later, we will put these
521 			 * zeros into pd->ds and decrease pd->exponent by
522 			 * nzap.
523 			 */
524 			while (current == L'0') {
525 				nzap++;
526 				current = *++cp;
527 			}
528 			good = cp - 1;
529 			goto afterpoint;
530 
531 		case L'E':
532 		case L'e':
533 			/* don't accept exponent without preceding digits */
534 			if (sigfound == -1 || form < 0)
535 				goto done;
536 			break;
537 
538 		case L'P':
539 		case L'p':
540 			/* don't accept exponent without preceding digits */
541 			if (sigfound == -1 || form > 0)
542 				goto done;
543 			break;
544 
545 		default:
546 			goto done;
547 		}
548 	}
549 
550 exponent:
551 	e = 0;
552 	esign = 0;
553 
554 	/* look for optional exponent sign */
555 	current = *++cp;
556 	if (current == L'+') {
557 		current = *++cp;
558 	} else if (current == L'-') {
559 		esign = 1;
560 		current = *++cp;
561 	}
562 
563 	/*
564 	 * Accumulate explicit exponent.  Note that if we don't find at
565 	 * least one digit, good won't be updated and e will remain 0.
566 	 * Also, we keep e from getting too large so we don't overflow
567 	 * the range of int (but notice that the threshold is large
568 	 * enough that any larger e would cause the result to underflow
569 	 * or overflow anyway).
570 	 */
571 	while (L'0' <= current && current <= L'9') {
572 		good = cp;
573 		if (e <= 1000000)
574 			e = 10 * e + current - L'0';
575 		current = *++cp;
576 	}
577 	if (esign)
578 		pd->exponent -= e;
579 	else
580 		pd->exponent += e;
581 
582 done:
583 	/*
584 	 * If we found any zeros before the radix point that were not
585 	 * accounted for earlier, adjust the exponent.  (This is only
586 	 * relevant when pd->fpclass == fp_normal, but it's harmless
587 	 * in all other cases.)
588 	 */
589 	pd->exponent += nzbp << expshift;
590 
591 	/* terminate pd->ds if we haven't already */
592 	if (ids < DECIMAL_STRING_LENGTH) {
593 		pd->ds[ids] = '\0';
594 		pd->ndigits = ids;
595 	}
596 
597 	/*
598 	 * If we accepted any characters, advance *ppc to point to the
599 	 * first character we didn't accept; otherwise, pass back a
600 	 * signaling nan.
601 	 */
602 	if (good >= *ppc) {
603 		*ppc = good + 1;
604 	} else {
605 		pd->fpclass = fp_signaling;
606 		pd->sign = 0;
607 		form = 0;
608 	}
609 
610 	*pform = form;
611 }
612