xref: /illumos-gate/usr/src/lib/libc/port/i18n/wstod.c (revision 24da5b34f49324ed742a340010ed5bd3d4e06625)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*	Copyright (c) 1988 AT&T	*/
30 /*	  All Rights Reserved  	*/
31 
32 
33 /*
34  * This file is based on /usr/src/lib/libc/port/gen/strtod.c and
35  * /usr/src/lib/libc/sparc/fp/string_decim.c
36  */
37 
38 #pragma weak wcstod = _wcstod
39 #pragma weak wstod = _wstod
40 
41 #include "synonyms.h"
42 #include <errno.h>
43 #include <stdio.h>
44 #include <values.h>
45 #include <floatingpoint.h>
46 #include <stddef.h>
47 #include <wctype.h>
48 #include "base_conversion.h"	/* from usr/src/lib/libc/inc */
49 #include <locale.h>
50 #include "libc.h"
51 #include "xpg6.h"
52 
53 static void wstring_to_decimal(const wchar_t **, int, decimal_record *, int *);
54 
55 double
56 _wcstod(const wchar_t *cp, wchar_t **ptr)
57 {
58 	double		x;
59 	decimal_mode	mr;
60 	decimal_record	dr;
61 	fp_exception_field_type fs;
62 	int 		form;
63 
64 	wstring_to_decimal(&cp, __xpg6 & _C99SUSv3_recognize_hexfp, &dr, &form);
65 	if (ptr != NULL)
66 		*ptr = (wchar_t *)cp;
67 	if (form == 0)
68 		return (0.0);	/* Shameful kluge for SVID's sake. */
69 #if defined(__i386) || defined(__amd64)
70 	mr.rd = __xgetRD();
71 #elif defined(__sparc)
72 	mr.rd = _QgetRD();
73 #else
74 #error Unknown architecture!
75 #endif
76 	if (form < 0)
77 		__hex_to_double(&dr, mr.rd, &x, &fs);
78 	else
79 		decimal_to_double(&x, &mr, &dr, &fs);
80 	if (fs & ((1 << fp_overflow) | (1 << fp_underflow)))
81 		errno = ERANGE;
82 	return (x);
83 }
84 
85 float
86 wcstof(const wchar_t *cp, wchar_t **ptr)
87 {
88 	float		x;
89 	decimal_mode	mr;
90 	decimal_record	dr;
91 	fp_exception_field_type fs;
92 	int		form;
93 
94 	wstring_to_decimal(&cp, 1, &dr, &form);
95 	if (ptr != NULL)
96 		*ptr = (wchar_t *)cp;
97 	if (form == 0)
98 		return (0.0f);
99 #if defined(__i386) || defined(__amd64)
100 	mr.rd = __xgetRD();
101 #elif defined(__sparc)
102 	mr.rd = _QgetRD();
103 #else
104 #error Unknown architecture!
105 #endif
106 	if (form < 0)
107 		__hex_to_single(&dr, mr.rd, &x, &fs);
108 	else
109 		decimal_to_single(&x, &mr, &dr, &fs);
110 	if (fs & ((1 << fp_overflow) | (1 << fp_underflow)))
111 		errno = ERANGE;
112 	return (x);
113 }
114 
115 long double
116 wcstold(const wchar_t *cp, wchar_t **ptr)
117 {
118 	long double	x;
119 	decimal_mode	mr;
120 	decimal_record	dr;
121 	fp_exception_field_type fs;
122 	int		form;
123 
124 	wstring_to_decimal(&cp, 1, &dr, &form);
125 	if (ptr != NULL)
126 		*ptr = (wchar_t *)cp;
127 	if (form == 0)
128 		return (0.0L);
129 #if defined(__i386) || defined(__amd64)
130 	mr.rd = __xgetRD();
131 	if (form < 0)
132 		__hex_to_extended(&dr, mr.rd, (extended *)&x, &fs);
133 	else
134 		decimal_to_extended((extended *)&x, &mr, &dr, &fs);
135 #elif defined(__sparc)
136 	mr.rd = _QgetRD();
137 	if (form < 0)
138 		__hex_to_quadruple(&dr, mr.rd, &x, &fs);
139 	else
140 		decimal_to_quadruple(&x, &mr, &dr, &fs);
141 #else
142 #error Unknown architecture!
143 #endif
144 	if (fs & ((1 << fp_overflow) | (1 << fp_underflow)))
145 		errno = ERANGE;
146 	return (x);
147 }
148 
149 double
150 _wstod(const wchar_t *cp, wchar_t **ptr)
151 {
152 	return (_wcstod(cp, ptr));
153 }
154 
155 static const char *infstring = "INFINITY";
156 static const char *nanstring = "NAN";
157 
158 /*
159  * The following macro is applied to wchar_t arguments solely for the
160  * purpose of comparing the result with one of the characters in the
161  * strings above.
162  */
163 #define	UCASE(c)	(((L'a' <= c) && (c <= L'z'))? c - 32 : c)
164 
165 /*
166  * The following macro yields an expression that is true whenever
167  * the argument is a valid nonzero digit for the form being parsed.
168  */
169 #define	NZDIGIT(c)	((L'1' <= c && c <= L'9') || (form < 0 && \
170 			((L'a' <= c && c <= L'f') || (L'A' <= c && c <= L'F'))))
171 
172 /*
173  * wstring_to_decimal is modelled on string_to_decimal, the majority
174  * of which can be found in the common file char_to_decimal.h.  The
175  * significant differences are:
176  *
177  * 1. This code recognizes only C99 (hex fp strings and restricted
178  *    characters in parentheses following "nan") vs. C90 modes, no
179  *    Fortran conventions.
180  *
181  * 2. *pform is an int rather than an enum decimal_string_form.  On
182  *    return, *pform == 0 if no valid token was found, *pform < 0
183  *    if a C99 hex fp string was found, and *pform > 0 if a decimal
184  *    string was found.
185  */
186 static void
187 wstring_to_decimal(const wchar_t **ppc, int c99, decimal_record *pd,
188     int *pform)
189 {
190 	const wchar_t	*cp = *ppc; /* last character seen */
191 	const wchar_t	*good = cp - 1;	/* last character accepted */
192 	wchar_t		current; /* always equal to *cp */
193 	int		sigfound;
194 	int		ids = 0;
195 	int		i, agree;
196 	int		nzbp = 0; /* number of zeros before point */
197 	int		nzap = 0; /* number of zeros after point */
198 	char		decpt;
199 	int		nfast, nfastlimit;
200 	char		*pfast;
201 	int		e, esign;
202 	int		expshift = 0;
203 	int		form;
204 
205 	/*
206 	 * This routine assumes that the radix point is a single
207 	 * ASCII character, so that following this assignment, the
208 	 * condition (current == decpt) will correctly detect it.
209 	 */
210 	decpt = *(localeconv()->decimal_point);
211 
212 	/* input is invalid until we find something */
213 	pd->fpclass = fp_signaling;
214 	pd->sign = 0;
215 	pd->exponent = 0;
216 	pd->ds[0] = '\0';
217 	pd->more = 0;
218 	pd->ndigits = 0;
219 	*pform = form = 0;
220 
221 	/* skip white space */
222 	current = *cp;
223 	while (iswspace((wint_t)current))
224 		current = *++cp;
225 
226 	/* look for optional leading sign */
227 	if (current == L'+') {
228 		current = *++cp;
229 	} else if (current == L'-') {
230 		pd->sign = 1;
231 		current = *++cp;
232 	}
233 
234 	sigfound = -1;		/* -1 = no digits found yet */
235 
236 	/*
237 	 * Admissible first non-white-space, non-sign characters are
238 	 * 0-9, i, I, n, N, or the radix point.
239 	 */
240 	if (L'1' <= current && current <= L'9') {
241 		pd->fpclass = fp_normal;
242 		form = 1;
243 		good = cp;
244 		sigfound = 1;	/* 1 = significant digits found */
245 		pd->ds[ids++] = (char)current;
246 		current = *++cp;
247 	} else {
248 		switch (current) {
249 		case L'0':
250 			/*
251 			 * Accept the leading zero and set pd->fpclass
252 			 * accordingly, but don't set sigfound until we
253 			 * determine that this isn't a "fake" hex string
254 			 * (i.e., 0x.p...).
255 			 */
256 			good = cp;
257 			pd->fpclass = fp_zero;
258 			if (c99) {
259 				/* look for a hex fp string */
260 				current = *++cp;
261 				if (current == L'X' || current == L'x') {
262 					/* assume hex fp form */
263 					form = -1;
264 					expshift = 2;
265 					current = *++cp;
266 					/*
267 					 * Only a digit or radix point can
268 					 * follow "0x".
269 					 */
270 					if (NZDIGIT(current)) {
271 						pd->fpclass = fp_normal;
272 						good = cp;
273 						sigfound = 1;
274 						pd->ds[ids++] = (char)current;
275 						current = *++cp;
276 						break;
277 					} else if (current == (wchar_t)decpt) {
278 						current = *++cp;
279 						goto afterpoint;
280 					} else if (current != L'0') {
281 						/* not hex fp after all */
282 						form = 1;
283 						expshift = 0;
284 						goto done;
285 					}
286 				} else {
287 					form = 1;
288 				}
289 			} else {
290 				form = 1;
291 			}
292 
293 			/* skip all leading zeros */
294 			while (current == L'0')
295 				current = *++cp;
296 			good = cp - 1;
297 			sigfound = 0;	/* 0 = only zeros found so far */
298 			break;
299 
300 		case L'i':
301 		case L'I':
302 			/* look for inf or infinity */
303 			current = *++cp;
304 			agree = 1;
305 			while (agree <= 7 &&
306 			    UCASE(current) == (wchar_t)infstring[agree]) {
307 				current = *++cp;
308 				agree++;
309 			}
310 			if (agree >= 3) {
311 				/* found valid infinity */
312 				pd->fpclass = fp_infinity;
313 				form = 1;
314 				good = (agree < 8)? cp + 2 - agree : cp - 1;
315 				__inf_read = 1;
316 			}
317 			goto done;
318 
319 		case L'n':
320 		case L'N':
321 			/* look for nan or nan(string) */
322 			current = *++cp;
323 			agree = 1;
324 			while (agree <= 2 &&
325 			    UCASE(current) == (wchar_t)nanstring[agree]) {
326 				current = *++cp;
327 				agree++;
328 			}
329 			if (agree == 3) {
330 				/* found valid NaN */
331 				pd->fpclass = fp_quiet;
332 				form = 1;
333 				good = cp - 1;
334 				__nan_read = 1;
335 				if (current == L'(') {
336 					/* accept parenthesized string */
337 					if (c99) {
338 						do {
339 							current = *++cp;
340 						} while (iswalnum(current) ||
341 						    current == L'_');
342 					} else {
343 						do {
344 							current = *++cp;
345 						} while (current &&
346 						    current != L')');
347 					}
348 					if (current == L')')
349 						good = cp;
350 				}
351 			}
352 			goto done;
353 
354 		default:
355 			if (current == (wchar_t)decpt) {
356 				/*
357 				 * Don't accept the radix point just yet;
358 				 * we need to see at least one digit.
359 				 */
360 				current = *++cp;
361 				goto afterpoint;
362 			}
363 			goto done;
364 		}
365 	}
366 
367 nextnumber:
368 	/*
369 	 * Admissible characters after the first digit are a valid
370 	 * digit, an exponent delimiter (E or e for decimal form,
371 	 * P or p for hex form), or the radix point.  (Note that we
372 	 * can't get here unless we've already found a digit.)
373 	 */
374 	if (NZDIGIT(current)) {
375 		/*
376 		 * Found another nonzero digit.  If there's enough room
377 		 * in pd->ds, store any intervening zeros we've found so far
378 		 * and then store this digit.  Otherwise, stop storing
379 		 * digits in pd->ds and set pd->more.
380 		 */
381 		if (ids + nzbp + 2 < DECIMAL_STRING_LENGTH) {
382 			for (i = 0; i < nzbp; i++)
383 				pd->ds[ids++] = '0';
384 			pd->ds[ids++] = (char)current;
385 		} else {
386 			pd->exponent += (nzbp + 1) << expshift;
387 			pd->more = 1;
388 			if (ids < DECIMAL_STRING_LENGTH) {
389 				pd->ds[ids] = '\0';
390 				pd->ndigits = ids;
391 				/* don't store any more digits */
392 				ids = DECIMAL_STRING_LENGTH;
393 			}
394 		}
395 		pd->fpclass = fp_normal;
396 		sigfound = 1;
397 		nzbp = 0;
398 		current = *++cp;
399 
400 		/*
401 		 * Use an optimized loop to grab a consecutive sequence
402 		 * of nonzero digits quickly.
403 		 */
404 		nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids;
405 		for (nfast = 0, pfast = &(pd->ds[ids]);
406 		    nfast < nfastlimit && NZDIGIT(current);
407 		    nfast++) {
408 			*pfast++ = (char)current;
409 			current = *++cp;
410 		}
411 		ids += nfast;
412 		if (current == L'0')
413 			goto nextnumberzero;	/* common case */
414 		/* advance good to the last accepted digit */
415 		good = cp - 1;
416 		goto nextnumber;
417 	} else {
418 		switch (current) {
419 		case L'0':
420 nextnumberzero:
421 			/*
422 			 * Count zeros before the radix point.  Later we
423 			 * will either put these zeros into pd->ds or add
424 			 * nzbp to pd->exponent to account for them.
425 			 */
426 			while (current == L'0') {
427 				nzbp++;
428 				current = *++cp;
429 			}
430 			good = cp - 1;
431 			goto nextnumber;
432 
433 		case L'E':
434 		case L'e':
435 			if (form < 0)
436 				goto done;
437 			goto exponent;
438 
439 		case L'P':
440 		case L'p':
441 			if (form > 0)
442 				goto done;
443 			goto exponent;
444 
445 		default:
446 			if (current == decpt) {
447 				/* accept the radix point */
448 				good = cp;
449 				current = *++cp;
450 				goto afterpoint;
451 			}
452 			goto done;
453 		}
454 	}
455 
456 afterpoint:
457 	/*
458 	 * Admissible characters after the radix point are a valid digit
459 	 * or an exponent delimiter.  (Note that it is possible to get
460 	 * here even though we haven't found any digits yet.)
461 	 */
462 	if (NZDIGIT(current)) {
463 		if (form == 0)
464 			form = 1;
465 		if (sigfound < 1) {
466 			/* no significant digits found until now */
467 			pd->fpclass = fp_normal;
468 			sigfound = 1;
469 			pd->ds[ids++] = (char)current;
470 			pd->exponent = (-(nzap + 1)) << expshift;
471 		} else {
472 			/* significant digits have been found */
473 			if (ids + nzbp + nzap + 2 < DECIMAL_STRING_LENGTH) {
474 				for (i = 0; i < nzbp + nzap; i++)
475 					pd->ds[ids++] = '0';
476 				pd->ds[ids++] = (char)current;
477 				pd->exponent -= (nzap + 1) << expshift;
478 			} else {
479 				pd->exponent += nzbp << expshift;
480 				pd->more = 1;
481 				if (ids < DECIMAL_STRING_LENGTH) {
482 					pd->ds[ids] = '\0';
483 					pd->ndigits = ids;
484 					/* don't store any more digits */
485 					ids = DECIMAL_STRING_LENGTH;
486 				}
487 			}
488 		}
489 		nzbp = 0;
490 		nzap = 0;
491 		current = *++cp;
492 
493 		/*
494 		 * Use an optimized loop to grab a consecutive sequence
495 		 * of nonzero digits quickly.
496 		 */
497 		nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids;
498 		for (nfast = 0, pfast = &(pd->ds[ids]);
499 		    nfast < nfastlimit && NZDIGIT(current);
500 		    nfast++) {
501 			*pfast++ = (char)current;
502 			current = *++cp;
503 		}
504 		ids += nfast;
505 		pd->exponent -= nfast << expshift;
506 		if (current == L'0')
507 			goto zeroafterpoint;
508 		/* advance good to the last accepted digit */
509 		good = cp - 1;
510 		goto afterpoint;
511 	} else {
512 		switch (current) {
513 		case L'0':
514 			if (form == 0)
515 				form = 1;
516 			if (sigfound == -1) {
517 				pd->fpclass = fp_zero;
518 				sigfound = 0;
519 			}
520 zeroafterpoint:
521 			/*
522 			 * Count zeros after the radix point.  If we find
523 			 * any more nonzero digits later, we will put these
524 			 * zeros into pd->ds and decrease pd->exponent by
525 			 * nzap.
526 			 */
527 			while (current == L'0') {
528 				nzap++;
529 				current = *++cp;
530 			}
531 			good = cp - 1;
532 			goto afterpoint;
533 
534 		case L'E':
535 		case L'e':
536 			/* don't accept exponent without preceding digits */
537 			if (sigfound == -1 || form < 0)
538 				goto done;
539 			break;
540 
541 		case L'P':
542 		case L'p':
543 			/* don't accept exponent without preceding digits */
544 			if (sigfound == -1 || form > 0)
545 				goto done;
546 			break;
547 
548 		default:
549 			goto done;
550 		}
551 	}
552 
553 exponent:
554 	e = 0;
555 	esign = 0;
556 
557 	/* look for optional exponent sign */
558 	current = *++cp;
559 	if (current == L'+') {
560 		current = *++cp;
561 	} else if (current == L'-') {
562 		esign = 1;
563 		current = *++cp;
564 	}
565 
566 	/*
567 	 * Accumulate explicit exponent.  Note that if we don't find at
568 	 * least one digit, good won't be updated and e will remain 0.
569 	 * Also, we keep e from getting too large so we don't overflow
570 	 * the range of int (but notice that the threshold is large
571 	 * enough that any larger e would cause the result to underflow
572 	 * or overflow anyway).
573 	 */
574 	while (L'0' <= current && current <= L'9') {
575 		good = cp;
576 		if (e <= 1000000)
577 			e = 10 * e + current - L'0';
578 		current = *++cp;
579 	}
580 	if (esign)
581 		pd->exponent -= e;
582 	else
583 		pd->exponent += e;
584 
585 done:
586 	/*
587 	 * If we found any zeros before the radix point that were not
588 	 * accounted for earlier, adjust the exponent.  (This is only
589 	 * relevant when pd->fpclass == fp_normal, but it's harmless
590 	 * in all other cases.)
591 	 */
592 	pd->exponent += nzbp << expshift;
593 
594 	/* terminate pd->ds if we haven't already */
595 	if (ids < DECIMAL_STRING_LENGTH) {
596 		pd->ds[ids] = '\0';
597 		pd->ndigits = ids;
598 	}
599 
600 	/*
601 	 * If we accepted any characters, advance *ppc to point to the
602 	 * first character we didn't accept; otherwise, pass back a
603 	 * signaling nan.
604 	 */
605 	if (good >= *ppc) {
606 		*ppc = good + 1;
607 	} else {
608 		pd->fpclass = fp_signaling;
609 		pd->sign = 0;
610 		form = 0;
611 	}
612 
613 	*pform = form;
614 }
615