xref: /freebsd/lib/msun/src/math_private.h (revision 213eb102aeec50b8c236aac1d8f0e0a3f9a99449)
13a8617a8SJordan K. Hubbard /*
23a8617a8SJordan K. Hubbard  * ====================================================
33a8617a8SJordan K. Hubbard  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
43a8617a8SJordan K. Hubbard  *
53a8617a8SJordan K. Hubbard  * Developed at SunPro, a Sun Microsystems, Inc. business.
63a8617a8SJordan K. Hubbard  * Permission to use, copy, modify, and distribute this
73a8617a8SJordan K. Hubbard  * software is freely granted, provided that this notice
83a8617a8SJordan K. Hubbard  * is preserved.
93a8617a8SJordan K. Hubbard  * ====================================================
103a8617a8SJordan K. Hubbard  */
113a8617a8SJordan K. Hubbard 
123a8617a8SJordan K. Hubbard /*
133a8617a8SJordan K. Hubbard  */
143a8617a8SJordan K. Hubbard 
153a8617a8SJordan K. Hubbard #ifndef _MATH_PRIVATE_H_
163a8617a8SJordan K. Hubbard #define	_MATH_PRIVATE_H_
173a8617a8SJordan K. Hubbard 
183a8617a8SJordan K. Hubbard #include <sys/types.h>
19bcfa1759SBrian Somers #include <machine/endian.h>
203a8617a8SJordan K. Hubbard 
21ef1ee63eSAlexey Zelkin /*
22ef1ee63eSAlexey Zelkin  * The original fdlibm code used statements like:
23ef1ee63eSAlexey Zelkin  *	n0 = ((*(int*)&one)>>29)^1;		* index of high word *
24ef1ee63eSAlexey Zelkin  *	ix0 = *(n0+(int*)&x);			* high word of x *
25ef1ee63eSAlexey Zelkin  *	ix1 = *((1-n0)+(int*)&x);		* low word of x *
26ef1ee63eSAlexey Zelkin  * to dig two 32 bit words out of the 64 bit IEEE floating point
27ef1ee63eSAlexey Zelkin  * value.  That is non-ANSI, and, moreover, the gcc instruction
28ef1ee63eSAlexey Zelkin  * scheduler gets it wrong.  We instead use the following macros.
29ef1ee63eSAlexey Zelkin  * Unlike the original code, we determine the endianness at compile
30ef1ee63eSAlexey Zelkin  * time, not at run time; I don't see much benefit to selecting
31ef1ee63eSAlexey Zelkin  * endianness at run time.
32ef1ee63eSAlexey Zelkin  */
333a8617a8SJordan K. Hubbard 
34ef1ee63eSAlexey Zelkin /*
35ef1ee63eSAlexey Zelkin  * A union which permits us to convert between a double and two 32 bit
36ef1ee63eSAlexey Zelkin  * ints.
37ef1ee63eSAlexey Zelkin  */
383a8617a8SJordan K. Hubbard 
3974aed985SMarcel Moolenaar #ifdef __arm__
400a10f22aSAndrew Turner #if defined(__VFP_FP__) || defined(__ARM_EABI__)
4174aed985SMarcel Moolenaar #define	IEEE_WORD_ORDER	BYTE_ORDER
4274aed985SMarcel Moolenaar #else
4374aed985SMarcel Moolenaar #define	IEEE_WORD_ORDER	BIG_ENDIAN
4474aed985SMarcel Moolenaar #endif
4574aed985SMarcel Moolenaar #else /* __arm__ */
4674aed985SMarcel Moolenaar #define	IEEE_WORD_ORDER	BYTE_ORDER
4774aed985SMarcel Moolenaar #endif
4874aed985SMarcel Moolenaar 
496813d08fSMatt Macy /* A union which permits us to convert between a long double and
506813d08fSMatt Macy    four 32 bit ints.  */
516813d08fSMatt Macy 
526813d08fSMatt Macy #if IEEE_WORD_ORDER == BIG_ENDIAN
536813d08fSMatt Macy 
546813d08fSMatt Macy typedef union
556813d08fSMatt Macy {
566813d08fSMatt Macy   long double value;
576813d08fSMatt Macy   struct {
586813d08fSMatt Macy     u_int32_t mswhi;
596813d08fSMatt Macy     u_int32_t mswlo;
606813d08fSMatt Macy     u_int32_t lswhi;
616813d08fSMatt Macy     u_int32_t lswlo;
626813d08fSMatt Macy   } parts32;
636813d08fSMatt Macy   struct {
646813d08fSMatt Macy     u_int64_t msw;
656813d08fSMatt Macy     u_int64_t lsw;
666813d08fSMatt Macy   } parts64;
676813d08fSMatt Macy } ieee_quad_shape_type;
686813d08fSMatt Macy 
696813d08fSMatt Macy #endif
706813d08fSMatt Macy 
716813d08fSMatt Macy #if IEEE_WORD_ORDER == LITTLE_ENDIAN
726813d08fSMatt Macy 
736813d08fSMatt Macy typedef union
746813d08fSMatt Macy {
756813d08fSMatt Macy   long double value;
766813d08fSMatt Macy   struct {
776813d08fSMatt Macy     u_int32_t lswlo;
786813d08fSMatt Macy     u_int32_t lswhi;
796813d08fSMatt Macy     u_int32_t mswlo;
806813d08fSMatt Macy     u_int32_t mswhi;
816813d08fSMatt Macy   } parts32;
826813d08fSMatt Macy   struct {
836813d08fSMatt Macy     u_int64_t lsw;
846813d08fSMatt Macy     u_int64_t msw;
856813d08fSMatt Macy   } parts64;
866813d08fSMatt Macy } ieee_quad_shape_type;
876813d08fSMatt Macy 
886813d08fSMatt Macy #endif
896813d08fSMatt Macy 
9074aed985SMarcel Moolenaar #if IEEE_WORD_ORDER == BIG_ENDIAN
913a8617a8SJordan K. Hubbard 
923a8617a8SJordan K. Hubbard typedef union
933a8617a8SJordan K. Hubbard {
943a8617a8SJordan K. Hubbard   double value;
953a8617a8SJordan K. Hubbard   struct
963a8617a8SJordan K. Hubbard   {
973a8617a8SJordan K. Hubbard     u_int32_t msw;
983a8617a8SJordan K. Hubbard     u_int32_t lsw;
993a8617a8SJordan K. Hubbard   } parts;
1003d4dfde4SDavid Schultz   struct
1013d4dfde4SDavid Schultz   {
1023d4dfde4SDavid Schultz     u_int64_t w;
1033d4dfde4SDavid Schultz   } xparts;
1043a8617a8SJordan K. Hubbard } ieee_double_shape_type;
1053a8617a8SJordan K. Hubbard 
1063a8617a8SJordan K. Hubbard #endif
1073a8617a8SJordan K. Hubbard 
10874aed985SMarcel Moolenaar #if IEEE_WORD_ORDER == LITTLE_ENDIAN
1093a8617a8SJordan K. Hubbard 
1103a8617a8SJordan K. Hubbard typedef union
1113a8617a8SJordan K. Hubbard {
1123a8617a8SJordan K. Hubbard   double value;
1133a8617a8SJordan K. Hubbard   struct
1143a8617a8SJordan K. Hubbard   {
1153a8617a8SJordan K. Hubbard     u_int32_t lsw;
1163a8617a8SJordan K. Hubbard     u_int32_t msw;
1173a8617a8SJordan K. Hubbard   } parts;
1183d4dfde4SDavid Schultz   struct
1193d4dfde4SDavid Schultz   {
1203d4dfde4SDavid Schultz     u_int64_t w;
1213d4dfde4SDavid Schultz   } xparts;
1223a8617a8SJordan K. Hubbard } ieee_double_shape_type;
1233a8617a8SJordan K. Hubbard 
1243a8617a8SJordan K. Hubbard #endif
1253a8617a8SJordan K. Hubbard 
1263a8617a8SJordan K. Hubbard /* Get two 32 bit ints from a double.  */
1273a8617a8SJordan K. Hubbard 
1283a8617a8SJordan K. Hubbard #define EXTRACT_WORDS(ix0,ix1,d)				\
1293a8617a8SJordan K. Hubbard do {								\
1303a8617a8SJordan K. Hubbard   ieee_double_shape_type ew_u;					\
1313a8617a8SJordan K. Hubbard   ew_u.value = (d);						\
1323a8617a8SJordan K. Hubbard   (ix0) = ew_u.parts.msw;					\
1333a8617a8SJordan K. Hubbard   (ix1) = ew_u.parts.lsw;					\
1343a8617a8SJordan K. Hubbard } while (0)
1353a8617a8SJordan K. Hubbard 
1363d4dfde4SDavid Schultz /* Get a 64-bit int from a double. */
1373d4dfde4SDavid Schultz #define EXTRACT_WORD64(ix,d)					\
1383d4dfde4SDavid Schultz do {								\
1393d4dfde4SDavid Schultz   ieee_double_shape_type ew_u;					\
1403d4dfde4SDavid Schultz   ew_u.value = (d);						\
1413d4dfde4SDavid Schultz   (ix) = ew_u.xparts.w;						\
1423d4dfde4SDavid Schultz } while (0)
1433d4dfde4SDavid Schultz 
1443a8617a8SJordan K. Hubbard /* Get the more significant 32 bit int from a double.  */
1453a8617a8SJordan K. Hubbard 
1463a8617a8SJordan K. Hubbard #define GET_HIGH_WORD(i,d)					\
1473a8617a8SJordan K. Hubbard do {								\
1483a8617a8SJordan K. Hubbard   ieee_double_shape_type gh_u;					\
1493a8617a8SJordan K. Hubbard   gh_u.value = (d);						\
1503a8617a8SJordan K. Hubbard   (i) = gh_u.parts.msw;						\
1513a8617a8SJordan K. Hubbard } while (0)
1523a8617a8SJordan K. Hubbard 
1533a8617a8SJordan K. Hubbard /* Get the less significant 32 bit int from a double.  */
1543a8617a8SJordan K. Hubbard 
1553a8617a8SJordan K. Hubbard #define GET_LOW_WORD(i,d)					\
1563a8617a8SJordan K. Hubbard do {								\
1573a8617a8SJordan K. Hubbard   ieee_double_shape_type gl_u;					\
1583a8617a8SJordan K. Hubbard   gl_u.value = (d);						\
1593a8617a8SJordan K. Hubbard   (i) = gl_u.parts.lsw;						\
1603a8617a8SJordan K. Hubbard } while (0)
1613a8617a8SJordan K. Hubbard 
1623a8617a8SJordan K. Hubbard /* Set a double from two 32 bit ints.  */
1633a8617a8SJordan K. Hubbard 
1643a8617a8SJordan K. Hubbard #define INSERT_WORDS(d,ix0,ix1)					\
1653a8617a8SJordan K. Hubbard do {								\
1663a8617a8SJordan K. Hubbard   ieee_double_shape_type iw_u;					\
1673a8617a8SJordan K. Hubbard   iw_u.parts.msw = (ix0);					\
1683a8617a8SJordan K. Hubbard   iw_u.parts.lsw = (ix1);					\
1693a8617a8SJordan K. Hubbard   (d) = iw_u.value;						\
1703a8617a8SJordan K. Hubbard } while (0)
1713a8617a8SJordan K. Hubbard 
1723d4dfde4SDavid Schultz /* Set a double from a 64-bit int. */
1733d4dfde4SDavid Schultz #define INSERT_WORD64(d,ix)					\
1743d4dfde4SDavid Schultz do {								\
1753d4dfde4SDavid Schultz   ieee_double_shape_type iw_u;					\
1763d4dfde4SDavid Schultz   iw_u.xparts.w = (ix);						\
1773d4dfde4SDavid Schultz   (d) = iw_u.value;						\
1783d4dfde4SDavid Schultz } while (0)
1793d4dfde4SDavid Schultz 
1803a8617a8SJordan K. Hubbard /* Set the more significant 32 bits of a double from an int.  */
1813a8617a8SJordan K. Hubbard 
1823a8617a8SJordan K. Hubbard #define SET_HIGH_WORD(d,v)					\
1833a8617a8SJordan K. Hubbard do {								\
1843a8617a8SJordan K. Hubbard   ieee_double_shape_type sh_u;					\
1853a8617a8SJordan K. Hubbard   sh_u.value = (d);						\
1863a8617a8SJordan K. Hubbard   sh_u.parts.msw = (v);						\
1873a8617a8SJordan K. Hubbard   (d) = sh_u.value;						\
1883a8617a8SJordan K. Hubbard } while (0)
1893a8617a8SJordan K. Hubbard 
1903a8617a8SJordan K. Hubbard /* Set the less significant 32 bits of a double from an int.  */
1913a8617a8SJordan K. Hubbard 
1923a8617a8SJordan K. Hubbard #define SET_LOW_WORD(d,v)					\
1933a8617a8SJordan K. Hubbard do {								\
1943a8617a8SJordan K. Hubbard   ieee_double_shape_type sl_u;					\
1953a8617a8SJordan K. Hubbard   sl_u.value = (d);						\
1963a8617a8SJordan K. Hubbard   sl_u.parts.lsw = (v);						\
1973a8617a8SJordan K. Hubbard   (d) = sl_u.value;						\
1983a8617a8SJordan K. Hubbard } while (0)
1993a8617a8SJordan K. Hubbard 
200ef1ee63eSAlexey Zelkin /*
201ef1ee63eSAlexey Zelkin  * A union which permits us to convert between a float and a 32 bit
202ef1ee63eSAlexey Zelkin  * int.
203ef1ee63eSAlexey Zelkin  */
2043a8617a8SJordan K. Hubbard 
2053a8617a8SJordan K. Hubbard typedef union
2063a8617a8SJordan K. Hubbard {
2073a8617a8SJordan K. Hubbard   float value;
2083a8617a8SJordan K. Hubbard   /* FIXME: Assumes 32 bit int.  */
2093a8617a8SJordan K. Hubbard   unsigned int word;
2103a8617a8SJordan K. Hubbard } ieee_float_shape_type;
2113a8617a8SJordan K. Hubbard 
2123a8617a8SJordan K. Hubbard /* Get a 32 bit int from a float.  */
2133a8617a8SJordan K. Hubbard 
2143a8617a8SJordan K. Hubbard #define GET_FLOAT_WORD(i,d)					\
2153a8617a8SJordan K. Hubbard do {								\
2163a8617a8SJordan K. Hubbard   ieee_float_shape_type gf_u;					\
2173a8617a8SJordan K. Hubbard   gf_u.value = (d);						\
2183a8617a8SJordan K. Hubbard   (i) = gf_u.word;						\
2193a8617a8SJordan K. Hubbard } while (0)
2203a8617a8SJordan K. Hubbard 
2213a8617a8SJordan K. Hubbard /* Set a float from a 32 bit int.  */
2223a8617a8SJordan K. Hubbard 
2233a8617a8SJordan K. Hubbard #define SET_FLOAT_WORD(d,i)					\
2243a8617a8SJordan K. Hubbard do {								\
2253a8617a8SJordan K. Hubbard   ieee_float_shape_type sf_u;					\
2263a8617a8SJordan K. Hubbard   sf_u.word = (i);						\
2273a8617a8SJordan K. Hubbard   (d) = sf_u.value;						\
2283a8617a8SJordan K. Hubbard } while (0)
2293a8617a8SJordan K. Hubbard 
23025a4d6bfSDavid Schultz /*
23125a4d6bfSDavid Schultz  * Get expsign and mantissa as 16 bit and 64 bit ints from an 80 bit long
23225a4d6bfSDavid Schultz  * double.
23325a4d6bfSDavid Schultz  */
23425a4d6bfSDavid Schultz 
23525a4d6bfSDavid Schultz #define	EXTRACT_LDBL80_WORDS(ix0,ix1,d)				\
23625a4d6bfSDavid Schultz do {								\
23725a4d6bfSDavid Schultz   union IEEEl2bits ew_u;					\
23825a4d6bfSDavid Schultz   ew_u.e = (d);							\
23925a4d6bfSDavid Schultz   (ix0) = ew_u.xbits.expsign;					\
24025a4d6bfSDavid Schultz   (ix1) = ew_u.xbits.man;					\
24125a4d6bfSDavid Schultz } while (0)
24225a4d6bfSDavid Schultz 
24325a4d6bfSDavid Schultz /*
24425a4d6bfSDavid Schultz  * Get expsign and mantissa as one 16 bit and two 64 bit ints from a 128 bit
24525a4d6bfSDavid Schultz  * long double.
24625a4d6bfSDavid Schultz  */
24725a4d6bfSDavid Schultz 
24825a4d6bfSDavid Schultz #define	EXTRACT_LDBL128_WORDS(ix0,ix1,ix2,d)			\
24925a4d6bfSDavid Schultz do {								\
25025a4d6bfSDavid Schultz   union IEEEl2bits ew_u;					\
25125a4d6bfSDavid Schultz   ew_u.e = (d);							\
25225a4d6bfSDavid Schultz   (ix0) = ew_u.xbits.expsign;					\
25325a4d6bfSDavid Schultz   (ix1) = ew_u.xbits.manh;					\
25425a4d6bfSDavid Schultz   (ix2) = ew_u.xbits.manl;					\
25525a4d6bfSDavid Schultz } while (0)
25625a4d6bfSDavid Schultz 
257e6239486SDavid Schultz /* Get expsign as a 16 bit int from a long double.  */
258e6239486SDavid Schultz 
259e6239486SDavid Schultz #define	GET_LDBL_EXPSIGN(i,d)					\
260e6239486SDavid Schultz do {								\
261e6239486SDavid Schultz   union IEEEl2bits ge_u;					\
262e6239486SDavid Schultz   ge_u.e = (d);							\
263e6239486SDavid Schultz   (i) = ge_u.xbits.expsign;					\
264e6239486SDavid Schultz } while (0)
265e6239486SDavid Schultz 
26625a4d6bfSDavid Schultz /*
26725a4d6bfSDavid Schultz  * Set an 80 bit long double from a 16 bit int expsign and a 64 bit int
26825a4d6bfSDavid Schultz  * mantissa.
26925a4d6bfSDavid Schultz  */
27025a4d6bfSDavid Schultz 
27125a4d6bfSDavid Schultz #define	INSERT_LDBL80_WORDS(d,ix0,ix1)				\
27225a4d6bfSDavid Schultz do {								\
27325a4d6bfSDavid Schultz   union IEEEl2bits iw_u;					\
27425a4d6bfSDavid Schultz   iw_u.xbits.expsign = (ix0);					\
27525a4d6bfSDavid Schultz   iw_u.xbits.man = (ix1);					\
27625a4d6bfSDavid Schultz   (d) = iw_u.e;							\
27725a4d6bfSDavid Schultz } while (0)
27825a4d6bfSDavid Schultz 
27925a4d6bfSDavid Schultz /*
28025a4d6bfSDavid Schultz  * Set a 128 bit long double from a 16 bit int expsign and two 64 bit ints
28125a4d6bfSDavid Schultz  * comprising the mantissa.
28225a4d6bfSDavid Schultz  */
28325a4d6bfSDavid Schultz 
28425a4d6bfSDavid Schultz #define	INSERT_LDBL128_WORDS(d,ix0,ix1,ix2)			\
28525a4d6bfSDavid Schultz do {								\
28625a4d6bfSDavid Schultz   union IEEEl2bits iw_u;					\
28725a4d6bfSDavid Schultz   iw_u.xbits.expsign = (ix0);					\
28825a4d6bfSDavid Schultz   iw_u.xbits.manh = (ix1);					\
28925a4d6bfSDavid Schultz   iw_u.xbits.manl = (ix2);					\
29025a4d6bfSDavid Schultz   (d) = iw_u.e;							\
29125a4d6bfSDavid Schultz } while (0)
29225a4d6bfSDavid Schultz 
293e6239486SDavid Schultz /* Set expsign of a long double from a 16 bit int.  */
294e6239486SDavid Schultz 
295e6239486SDavid Schultz #define	SET_LDBL_EXPSIGN(d,v)					\
296e6239486SDavid Schultz do {								\
297e6239486SDavid Schultz   union IEEEl2bits se_u;					\
298e6239486SDavid Schultz   se_u.e = (d);							\
299e6239486SDavid Schultz   se_u.xbits.expsign = (v);					\
300e6239486SDavid Schultz   (d) = se_u.e;							\
301e6239486SDavid Schultz } while (0)
302e6239486SDavid Schultz 
303a077586cSSteve Kargl #ifdef __i386__
304a077586cSSteve Kargl /* Long double constants are broken on i386. */
305a077586cSSteve Kargl #define	LD80C(m, ex, v) {						\
306b83ccea3SSteve Kargl 	.xbits.man = __CONCAT(m, ULL),					\
307a077586cSSteve Kargl 	.xbits.expsign = (0x3fff + (ex)) | ((v) < 0 ? 0x8000 : 0),	\
308b83ccea3SSteve Kargl }
309a077586cSSteve Kargl #else
310a077586cSSteve Kargl /* The above works on non-i386 too, but we use this to check v. */
311a077586cSSteve Kargl #define	LD80C(m, ex, v)	{ .e = (v), }
312a077586cSSteve Kargl #endif
313b83ccea3SSteve Kargl 
3141880ccbdSBruce Evans #ifdef FLT_EVAL_METHOD
3151880ccbdSBruce Evans /*
3161880ccbdSBruce Evans  * Attempt to get strict C99 semantics for assignment with non-C99 compilers.
3171880ccbdSBruce Evans  */
3181880ccbdSBruce Evans #if FLT_EVAL_METHOD == 0 || __GNUC__ == 0
3191880ccbdSBruce Evans #define	STRICT_ASSIGN(type, lval, rval)	((lval) = (rval))
3201880ccbdSBruce Evans #else
3211880ccbdSBruce Evans #define	STRICT_ASSIGN(type, lval, rval) do {	\
3221880ccbdSBruce Evans 	volatile type __lval;			\
3231880ccbdSBruce Evans 						\
324e6f9129aSSteve Kargl 	if (sizeof(type) >= sizeof(long double))	\
3255b62c380SBruce Evans 		(lval) = (rval);		\
3265b62c380SBruce Evans 	else {					\
3271880ccbdSBruce Evans 		__lval = (rval);		\
3281880ccbdSBruce Evans 		(lval) = __lval;		\
3295b62c380SBruce Evans 	}					\
3301880ccbdSBruce Evans } while (0)
3311880ccbdSBruce Evans #endif
332b83ccea3SSteve Kargl #endif /* FLT_EVAL_METHOD */
333b83ccea3SSteve Kargl 
334b83ccea3SSteve Kargl /* Support switching the mode to FP_PE if necessary. */
335b83ccea3SSteve Kargl #if defined(__i386__) && !defined(NO_FPSETPREC)
3360c0288a2SKonstantin Belousov #define	ENTERI() ENTERIT(long double)
3370c0288a2SKonstantin Belousov #define	ENTERIT(returntype)			\
3380c0288a2SKonstantin Belousov 	returntype __retval;			\
339b83ccea3SSteve Kargl 	fp_prec_t __oprec;			\
340b83ccea3SSteve Kargl 						\
341b83ccea3SSteve Kargl 	if ((__oprec = fpgetprec()) != FP_PE)	\
342e6f9129aSSteve Kargl 		fpsetprec(FP_PE)
343b83ccea3SSteve Kargl #define	RETURNI(x) do {				\
344b83ccea3SSteve Kargl 	__retval = (x);				\
345b83ccea3SSteve Kargl 	if (__oprec != FP_PE)			\
346b83ccea3SSteve Kargl 		fpsetprec(__oprec);		\
347b83ccea3SSteve Kargl 	RETURNF(__retval);			\
348b83ccea3SSteve Kargl } while (0)
349e1b98d07SMichal Meloun #define	ENTERV()				\
350e1b98d07SMichal Meloun 	fp_prec_t __oprec;			\
351e1b98d07SMichal Meloun 						\
352e1b98d07SMichal Meloun 	if ((__oprec = fpgetprec()) != FP_PE)	\
353e1b98d07SMichal Meloun 		fpsetprec(FP_PE)
354e1b98d07SMichal Meloun #define	RETURNV() do {				\
355e1b98d07SMichal Meloun 	if (__oprec != FP_PE)			\
356e1b98d07SMichal Meloun 		fpsetprec(__oprec);		\
357e1b98d07SMichal Meloun 	return;			\
358e1b98d07SMichal Meloun } while (0)
359b83ccea3SSteve Kargl #else
360e1b98d07SMichal Meloun #define	ENTERI()
3610c0288a2SKonstantin Belousov #define	ENTERIT(x)
362b83ccea3SSteve Kargl #define	RETURNI(x)	RETURNF(x)
363e1b98d07SMichal Meloun #define	ENTERV()
364e1b98d07SMichal Meloun #define	RETURNV()	return
3651880ccbdSBruce Evans #endif
3661880ccbdSBruce Evans 
367b83ccea3SSteve Kargl /* Default return statement if hack*_t() is not used. */
368b83ccea3SSteve Kargl #define      RETURNF(v)      return (v)
369b83ccea3SSteve Kargl 
3707cd4a832SDavid Schultz /*
37125a4d6bfSDavid Schultz  * 2sum gives the same result as 2sumF without requiring |a| >= |b| or
37225a4d6bfSDavid Schultz  * a == 0, but is slower.
37325a4d6bfSDavid Schultz  */
37425a4d6bfSDavid Schultz #define	_2sum(a, b) do {	\
37525a4d6bfSDavid Schultz 	__typeof(a) __s, __w;	\
37625a4d6bfSDavid Schultz 				\
37725a4d6bfSDavid Schultz 	__w = (a) + (b);	\
37825a4d6bfSDavid Schultz 	__s = __w - (a);	\
37925a4d6bfSDavid Schultz 	(b) = ((a) - (__w - __s)) + ((b) - __s); \
38025a4d6bfSDavid Schultz 	(a) = __w;		\
38125a4d6bfSDavid Schultz } while (0)
38225a4d6bfSDavid Schultz 
38325a4d6bfSDavid Schultz /*
38425a4d6bfSDavid Schultz  * 2sumF algorithm.
38525a4d6bfSDavid Schultz  *
38625a4d6bfSDavid Schultz  * "Normalize" the terms in the infinite-precision expression a + b for
38725a4d6bfSDavid Schultz  * the sum of 2 floating point values so that b is as small as possible
38825a4d6bfSDavid Schultz  * relative to 'a'.  (The resulting 'a' is the value of the expression in
38925a4d6bfSDavid Schultz  * the same precision as 'a' and the resulting b is the rounding error.)
39025a4d6bfSDavid Schultz  * |a| must be >= |b| or 0, b's type must be no larger than 'a's type, and
39125a4d6bfSDavid Schultz  * exponent overflow or underflow must not occur.  This uses a Theorem of
39225a4d6bfSDavid Schultz  * Dekker (1971).  See Knuth (1981) 4.2.2 Theorem C.  The name "TwoSum"
39325a4d6bfSDavid Schultz  * is apparently due to Skewchuk (1997).
39425a4d6bfSDavid Schultz  *
39525a4d6bfSDavid Schultz  * For this to always work, assignment of a + b to 'a' must not retain any
39625a4d6bfSDavid Schultz  * extra precision in a + b.  This is required by C standards but broken
39725a4d6bfSDavid Schultz  * in many compilers.  The brokenness cannot be worked around using
39825a4d6bfSDavid Schultz  * STRICT_ASSIGN() like we do elsewhere, since the efficiency of this
39925a4d6bfSDavid Schultz  * algorithm would be destroyed by non-null strict assignments.  (The
40025a4d6bfSDavid Schultz  * compilers are correct to be broken -- the efficiency of all floating
40125a4d6bfSDavid Schultz  * point code calculations would be destroyed similarly if they forced the
40225a4d6bfSDavid Schultz  * conversions.)
40325a4d6bfSDavid Schultz  *
40425a4d6bfSDavid Schultz  * Fortunately, a case that works well can usually be arranged by building
40525a4d6bfSDavid Schultz  * any extra precision into the type of 'a' -- 'a' should have type float_t,
40625a4d6bfSDavid Schultz  * double_t or long double.  b's type should be no larger than 'a's type.
40725a4d6bfSDavid Schultz  * Callers should use these types with scopes as large as possible, to
408*213eb102SSteve Kargl  * reduce their own extra-precision and efficiency problems.  In
40925a4d6bfSDavid Schultz  * particular, they shouldn't convert back and forth just to call here.
41025a4d6bfSDavid Schultz  */
41125a4d6bfSDavid Schultz #ifdef DEBUG
41225a4d6bfSDavid Schultz #define	_2sumF(a, b) do {				\
41325a4d6bfSDavid Schultz 	__typeof(a) __w;				\
41425a4d6bfSDavid Schultz 	volatile __typeof(a) __ia, __ib, __r, __vw;	\
41525a4d6bfSDavid Schultz 							\
41625a4d6bfSDavid Schultz 	__ia = (a);					\
41725a4d6bfSDavid Schultz 	__ib = (b);					\
41825a4d6bfSDavid Schultz 	assert(__ia == 0 || fabsl(__ia) >= fabsl(__ib));	\
41925a4d6bfSDavid Schultz 							\
42025a4d6bfSDavid Schultz 	__w = (a) + (b);				\
42125a4d6bfSDavid Schultz 	(b) = ((a) - __w) + (b);			\
42225a4d6bfSDavid Schultz 	(a) = __w;					\
42325a4d6bfSDavid Schultz 							\
42425a4d6bfSDavid Schultz 	/* The next 2 assertions are weak if (a) is already long double. */ \
42525a4d6bfSDavid Schultz 	assert((long double)__ia + __ib == (long double)(a) + (b));	\
42625a4d6bfSDavid Schultz 	__vw = __ia + __ib;				\
42725a4d6bfSDavid Schultz 	__r = __ia - __vw;				\
42825a4d6bfSDavid Schultz 	__r += __ib;					\
42925a4d6bfSDavid Schultz 	assert(__vw == (a) && __r == (b));		\
43025a4d6bfSDavid Schultz } while (0)
43125a4d6bfSDavid Schultz #else /* !DEBUG */
43225a4d6bfSDavid Schultz #define	_2sumF(a, b) do {	\
43325a4d6bfSDavid Schultz 	__typeof(a) __w;	\
43425a4d6bfSDavid Schultz 				\
43525a4d6bfSDavid Schultz 	__w = (a) + (b);	\
43625a4d6bfSDavid Schultz 	(b) = ((a) - __w) + (b); \
43725a4d6bfSDavid Schultz 	(a) = __w;		\
43825a4d6bfSDavid Schultz } while (0)
43925a4d6bfSDavid Schultz #endif /* DEBUG */
44025a4d6bfSDavid Schultz 
44125a4d6bfSDavid Schultz /*
44225a4d6bfSDavid Schultz  * Set x += c, where x is represented in extra precision as a + b.
44325a4d6bfSDavid Schultz  * x must be sufficiently normalized and sufficiently larger than c,
44425a4d6bfSDavid Schultz  * and the result is then sufficiently normalized.
44525a4d6bfSDavid Schultz  *
44625a4d6bfSDavid Schultz  * The details of ordering are that |a| must be >= |c| (so that (a, c)
44725a4d6bfSDavid Schultz  * can be normalized without extra work to swap 'a' with c).  The details of
44825a4d6bfSDavid Schultz  * the normalization are that b must be small relative to the normalized 'a'.
44925a4d6bfSDavid Schultz  * Normalization of (a, c) makes the normalized c tiny relative to the
45025a4d6bfSDavid Schultz  * normalized a, so b remains small relative to 'a' in the result.  However,
45125a4d6bfSDavid Schultz  * b need not ever be tiny relative to 'a'.  For example, b might be about
45225a4d6bfSDavid Schultz  * 2**20 times smaller than 'a' to give about 20 extra bits of precision.
45325a4d6bfSDavid Schultz  * That is usually enough, and adding c (which by normalization is about
45425a4d6bfSDavid Schultz  * 2**53 times smaller than a) cannot change b significantly.  However,
45525a4d6bfSDavid Schultz  * cancellation of 'a' with c in normalization of (a, c) may reduce 'a'
45625a4d6bfSDavid Schultz  * significantly relative to b.  The caller must ensure that significant
45725a4d6bfSDavid Schultz  * cancellation doesn't occur, either by having c of the same sign as 'a',
45825a4d6bfSDavid Schultz  * or by having |c| a few percent smaller than |a|.  Pre-normalization of
45925a4d6bfSDavid Schultz  * (a, b) may help.
46025a4d6bfSDavid Schultz  *
46129fea59eSGordon Bergling  * This is a variant of an algorithm of Kahan (see Knuth (1981) 4.2.2
46225a4d6bfSDavid Schultz  * exercise 19).  We gain considerable efficiency by requiring the terms to
46325a4d6bfSDavid Schultz  * be sufficiently normalized and sufficiently increasing.
46425a4d6bfSDavid Schultz  */
46525a4d6bfSDavid Schultz #define	_3sumF(a, b, c) do {	\
46625a4d6bfSDavid Schultz 	__typeof(a) __tmp;	\
46725a4d6bfSDavid Schultz 				\
46825a4d6bfSDavid Schultz 	__tmp = (c);		\
46925a4d6bfSDavid Schultz 	_2sumF(__tmp, (a));	\
47025a4d6bfSDavid Schultz 	(b) += (a);		\
47125a4d6bfSDavid Schultz 	(a) = __tmp;		\
47225a4d6bfSDavid Schultz } while (0)
47325a4d6bfSDavid Schultz 
47425a4d6bfSDavid Schultz /*
4757cd4a832SDavid Schultz  * Common routine to process the arguments to nan(), nanf(), and nanl().
4767cd4a832SDavid Schultz  */
4777cd4a832SDavid Schultz void _scan_nan(uint32_t *__words, int __num_words, const char *__s);
4787cd4a832SDavid Schultz 
4796f1b8a07SBruce Evans /*
480daa1e391SBruce Evans  * Mix 0, 1 or 2 NaNs.  First add 0 to each arg.  This normally just turns
4816f1b8a07SBruce Evans  * signaling NaNs into quiet NaNs by setting a quiet bit.  We do this
4826f1b8a07SBruce Evans  * because we want to never return a signaling NaN, and also because we
4836f1b8a07SBruce Evans  * don't want the quiet bit to affect the result.  Then mix the converted
484daa1e391SBruce Evans  * args using the specified operation.
4856f1b8a07SBruce Evans  *
486daa1e391SBruce Evans  * When one arg is NaN, the result is typically that arg quieted.  When both
487daa1e391SBruce Evans  * args are NaNs, the result is typically the quietening of the arg whose
488daa1e391SBruce Evans  * mantissa is largest after quietening.  When neither arg is NaN, the
489daa1e391SBruce Evans  * result may be NaN because it is indeterminate, or finite for subsequent
490daa1e391SBruce Evans  * construction of a NaN as the indeterminate 0.0L/0.0L.
491daa1e391SBruce Evans  *
492daa1e391SBruce Evans  * Technical complications: the result in bits after rounding to the final
493daa1e391SBruce Evans  * precision might depend on the runtime precision and/or on compiler
494daa1e391SBruce Evans  * optimizations, especially when different register sets are used for
495daa1e391SBruce Evans  * different precisions.  Try to make the result not depend on at least the
496daa1e391SBruce Evans  * runtime precision by always doing the main mixing step in long double
4976f1b8a07SBruce Evans  * precision.  Try to reduce dependencies on optimizations by adding the
4986f1b8a07SBruce Evans  * the 0's in different precisions (unless everything is in long double
4996f1b8a07SBruce Evans  * precision).
5006f1b8a07SBruce Evans  */
501daa1e391SBruce Evans #define	nan_mix(x, y)		(nan_mix_op((x), (y), +))
502daa1e391SBruce Evans #define	nan_mix_op(x, y, op)	(((x) + 0.0L) op ((y) + 0))
5036f1b8a07SBruce Evans 
50419b114daSBruce Evans #ifdef _COMPLEX_H
50524863729SDavid Schultz 
50624863729SDavid Schultz /*
50724863729SDavid Schultz  * C99 specifies that complex numbers have the same representation as
50824863729SDavid Schultz  * an array of two elements, where the first element is the real part
50924863729SDavid Schultz  * and the second element is the imaginary part.
51024863729SDavid Schultz  */
51124863729SDavid Schultz typedef union {
51224863729SDavid Schultz 	float complex f;
51324863729SDavid Schultz 	float a[2];
51424863729SDavid Schultz } float_complex;
51524863729SDavid Schultz typedef union {
51624863729SDavid Schultz 	double complex f;
51724863729SDavid Schultz 	double a[2];
51824863729SDavid Schultz } double_complex;
51924863729SDavid Schultz typedef union {
52024863729SDavid Schultz 	long double complex f;
52124863729SDavid Schultz 	long double a[2];
52224863729SDavid Schultz } long_double_complex;
52324863729SDavid Schultz #define	REALPART(z)	((z).a[0])
52424863729SDavid Schultz #define	IMAGPART(z)	((z).a[1])
52524863729SDavid Schultz 
52619b114daSBruce Evans /*
52719b114daSBruce Evans  * Inline functions that can be used to construct complex values.
52819b114daSBruce Evans  *
52919b114daSBruce Evans  * The C99 standard intends x+I*y to be used for this, but x+I*y is
53019b114daSBruce Evans  * currently unusable in general since gcc introduces many overflow,
53119b114daSBruce Evans  * underflow, sign and efficiency bugs by rewriting I*y as
53219b114daSBruce Evans  * (0.0+I)*(y+0.0*I) and laboriously computing the full complex product.
53319b114daSBruce Evans  * In particular, I*Inf is corrupted to NaN+I*Inf, and I*-0 is corrupted
53419b114daSBruce Evans  * to -0.0+I*0.0.
5352cec876aSEd Schouten  *
5362cec876aSEd Schouten  * The C11 standard introduced the macros CMPLX(), CMPLXF() and CMPLXL()
537799cf446SEd Schouten  * to construct complex values.  Compilers that conform to the C99
538799cf446SEd Schouten  * standard require the following functions to avoid the above issues.
53919b114daSBruce Evans  */
5402cec876aSEd Schouten 
5412cec876aSEd Schouten #ifndef CMPLXF
54219b114daSBruce Evans static __inline float complex
CMPLXF(float x,float y)5432cec876aSEd Schouten CMPLXF(float x, float y)
54419b114daSBruce Evans {
54524863729SDavid Schultz 	float_complex z;
54619b114daSBruce Evans 
54724863729SDavid Schultz 	REALPART(z) = x;
54824863729SDavid Schultz 	IMAGPART(z) = y;
54924863729SDavid Schultz 	return (z.f);
55019b114daSBruce Evans }
5512cec876aSEd Schouten #endif
55219b114daSBruce Evans 
5532cec876aSEd Schouten #ifndef CMPLX
55419b114daSBruce Evans static __inline double complex
CMPLX(double x,double y)5552cec876aSEd Schouten CMPLX(double x, double y)
55619b114daSBruce Evans {
55724863729SDavid Schultz 	double_complex z;
55819b114daSBruce Evans 
55924863729SDavid Schultz 	REALPART(z) = x;
56024863729SDavid Schultz 	IMAGPART(z) = y;
56124863729SDavid Schultz 	return (z.f);
56219b114daSBruce Evans }
5632cec876aSEd Schouten #endif
56419b114daSBruce Evans 
5652cec876aSEd Schouten #ifndef CMPLXL
56619b114daSBruce Evans static __inline long double complex
CMPLXL(long double x,long double y)5672cec876aSEd Schouten CMPLXL(long double x, long double y)
56819b114daSBruce Evans {
56924863729SDavid Schultz 	long_double_complex z;
57019b114daSBruce Evans 
57124863729SDavid Schultz 	REALPART(z) = x;
57224863729SDavid Schultz 	IMAGPART(z) = y;
57324863729SDavid Schultz 	return (z.f);
57419b114daSBruce Evans }
5752cec876aSEd Schouten #endif
5762cec876aSEd Schouten 
57719b114daSBruce Evans #endif /* _COMPLEX_H */
57819b114daSBruce Evans 
57927aa8442SBruce Evans /*
58027aa8442SBruce Evans  * The rnint() family rounds to the nearest integer for a restricted range
58127aa8442SBruce Evans  * range of args (up to about 2**MANT_DIG).  We assume that the current
58227aa8442SBruce Evans  * rounding mode is FE_TONEAREST so that this can be done efficiently.
58327aa8442SBruce Evans  * Extra precision causes more problems in practice, and we only centralize
58427aa8442SBruce Evans  * this here to reduce those problems, and have not solved the efficiency
58527aa8442SBruce Evans  * problems.  The exp2() family uses a more delicate version of this that
58627aa8442SBruce Evans  * requires extracting bits from the intermediate value, so it is not
58727aa8442SBruce Evans  * centralized here and should copy any solution of the efficiency problems.
58827aa8442SBruce Evans  */
5890ddfa46bSBruce Evans 
59027aa8442SBruce Evans static inline double
rnint(__double_t x)59127aa8442SBruce Evans rnint(__double_t x)
59227aa8442SBruce Evans {
59327aa8442SBruce Evans 	/*
59427aa8442SBruce Evans 	 * This casts to double to kill any extra precision.  This depends
59527aa8442SBruce Evans 	 * on the cast being applied to a double_t to avoid compiler bugs
59627aa8442SBruce Evans 	 * (this is a cleaner version of STRICT_ASSIGN()).  This is
59727aa8442SBruce Evans 	 * inefficient if there actually is extra precision, but is hard
59827aa8442SBruce Evans 	 * to improve on.  We use double_t in the API to minimise conversions
59927aa8442SBruce Evans 	 * for just calling here.  Note that we cannot easily change the
60027aa8442SBruce Evans 	 * magic number to the one that works directly with double_t, since
60127aa8442SBruce Evans 	 * the rounding precision is variable at runtime on x86 so the
60227aa8442SBruce Evans 	 * magic number would need to be variable.  Assuming that the
60327aa8442SBruce Evans 	 * rounding precision is always the default is too fragile.  This
60427aa8442SBruce Evans 	 * and many other complications will move when the default is
60527aa8442SBruce Evans 	 * changed to FP_PE.
60627aa8442SBruce Evans 	 */
60727aa8442SBruce Evans 	return ((double)(x + 0x1.8p52) - 0x1.8p52);
60827aa8442SBruce Evans }
6090ddfa46bSBruce Evans 
61027aa8442SBruce Evans static inline float
rnintf(__float_t x)61127aa8442SBruce Evans rnintf(__float_t x)
61227aa8442SBruce Evans {
61327aa8442SBruce Evans 	/*
61427aa8442SBruce Evans 	 * As for rnint(), except we could just call that to handle the
61527aa8442SBruce Evans 	 * extra precision case, usually without losing efficiency.
61627aa8442SBruce Evans 	 */
61727aa8442SBruce Evans 	return ((float)(x + 0x1.8p23F) - 0x1.8p23F);
61827aa8442SBruce Evans }
61927aa8442SBruce Evans 
62027aa8442SBruce Evans #ifdef LDBL_MANT_DIG
62127aa8442SBruce Evans /*
62227aa8442SBruce Evans  * The complications for extra precision are smaller for rnintl() since it
62327aa8442SBruce Evans  * can safely assume that the rounding precision has been increased from
62427aa8442SBruce Evans  * its default to FP_PE on x86.  We don't exploit that here to get small
625f2c94dddSSteve Kargl  * optimizations from limiting the range to double.  We just need it for
62627aa8442SBruce Evans  * the magic number to work with long doubles.  ld128 callers should use
62727aa8442SBruce Evans  * rnint() instead of this if possible.  ld80 callers should prefer
62827aa8442SBruce Evans  * rnintl() since for amd64 this avoids swapping the register set, while
62927aa8442SBruce Evans  * for i386 it makes no difference (assuming FP_PE), and for other arches
63027aa8442SBruce Evans  * it makes little difference.
63127aa8442SBruce Evans  */
63227aa8442SBruce Evans static inline long double
rnintl(long double x)63327aa8442SBruce Evans rnintl(long double x)
63427aa8442SBruce Evans {
63527aa8442SBruce Evans 	return (x + __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2 -
63627aa8442SBruce Evans 	    __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2);
63727aa8442SBruce Evans }
63827aa8442SBruce Evans #endif /* LDBL_MANT_DIG */
63927aa8442SBruce Evans 
64027aa8442SBruce Evans /*
64127aa8442SBruce Evans  * irint() and i64rint() give the same result as casting to their integer
64227aa8442SBruce Evans  * return type provided their arg is a floating point integer.  They can
64327aa8442SBruce Evans  * sometimes be more efficient because no rounding is required.
64427aa8442SBruce Evans  */
64556f5947aSJohn Baldwin #if defined(amd64) || defined(__i386__)
64627aa8442SBruce Evans #define	irint(x)						\
64727aa8442SBruce Evans     (sizeof(x) == sizeof(float) &&				\
64827aa8442SBruce Evans     sizeof(__float_t) == sizeof(long double) ? irintf(x) :	\
64927aa8442SBruce Evans     sizeof(x) == sizeof(double) &&				\
65027aa8442SBruce Evans     sizeof(__double_t) == sizeof(long double) ? irintd(x) :	\
65127aa8442SBruce Evans     sizeof(x) == sizeof(long double) ? irintl(x) : (int)(x))
65227aa8442SBruce Evans #else
65327aa8442SBruce Evans #define	irint(x)	((int)(x))
65427aa8442SBruce Evans #endif
65527aa8442SBruce Evans 
65627aa8442SBruce Evans #define	i64rint(x)	((int64_t)(x))	/* only needed for ld128 so not opt. */
65727aa8442SBruce Evans 
65856f5947aSJohn Baldwin #if defined(__i386__)
6590ddfa46bSBruce Evans static __inline int
irintf(float x)66027aa8442SBruce Evans irintf(float x)
6610ddfa46bSBruce Evans {
6620ddfa46bSBruce Evans 	int n;
6630ddfa46bSBruce Evans 
66427aa8442SBruce Evans 	__asm("fistl %0" : "=m" (n) : "t" (x));
6650ddfa46bSBruce Evans 	return (n);
6660ddfa46bSBruce Evans }
6670ddfa46bSBruce Evans 
6680ddfa46bSBruce Evans static __inline int
irintd(double x)66927aa8442SBruce Evans irintd(double x)
6700ddfa46bSBruce Evans {
6710ddfa46bSBruce Evans 	int n;
6720ddfa46bSBruce Evans 
67327aa8442SBruce Evans 	__asm("fistl %0" : "=m" (n) : "t" (x));
6740ddfa46bSBruce Evans 	return (n);
6750ddfa46bSBruce Evans }
6760ddfa46bSBruce Evans #endif
6770ddfa46bSBruce Evans 
67856f5947aSJohn Baldwin #if defined(__amd64__) || defined(__i386__)
679b83ccea3SSteve Kargl static __inline int
irintl(long double x)680b83ccea3SSteve Kargl irintl(long double x)
681b83ccea3SSteve Kargl {
682b83ccea3SSteve Kargl 	int n;
683b83ccea3SSteve Kargl 
68427aa8442SBruce Evans 	__asm("fistl %0" : "=m" (n) : "t" (x));
685b83ccea3SSteve Kargl 	return (n);
686b83ccea3SSteve Kargl }
687b83ccea3SSteve Kargl #endif
688b83ccea3SSteve Kargl 
6892d3b0a68SSteve Kargl /*
6902d3b0a68SSteve Kargl  * The following are fast floor macros for 0 <= |x| < 0x1p(N-1), where
6912d3b0a68SSteve Kargl  * N is the precision of the type of x. These macros are used in the
6922d3b0a68SSteve Kargl  * half-cycle trignometric functions (e.g., sinpi(x)).
6932d3b0a68SSteve Kargl  */
6942d3b0a68SSteve Kargl #define	FFLOORF(x, j0, ix) do {			\
6952d3b0a68SSteve Kargl 	(j0) = (((ix) >> 23) & 0xff) - 0x7f;	\
6962d3b0a68SSteve Kargl 	(ix) &= ~(0x007fffff >> (j0));		\
6972d3b0a68SSteve Kargl 	SET_FLOAT_WORD((x), (ix));		\
6982d3b0a68SSteve Kargl } while (0)
6992d3b0a68SSteve Kargl 
7002d3b0a68SSteve Kargl #define	FFLOOR(x, j0, ix, lx) do {				\
7012d3b0a68SSteve Kargl 	(j0) = (((ix) >> 20) & 0x7ff) - 0x3ff;			\
7022d3b0a68SSteve Kargl 	if ((j0) < 20) {					\
7032d3b0a68SSteve Kargl 		(ix) &= ~(0x000fffff >> (j0));			\
7042d3b0a68SSteve Kargl 		(lx) = 0;					\
7052d3b0a68SSteve Kargl 	} else {						\
7062d3b0a68SSteve Kargl 		(lx) &= ~((uint32_t)0xffffffff >> ((j0) - 20));	\
7072d3b0a68SSteve Kargl 	}							\
7082d3b0a68SSteve Kargl 	INSERT_WORDS((x), (ix), (lx));				\
7092d3b0a68SSteve Kargl } while (0)
7102d3b0a68SSteve Kargl 
7112d3b0a68SSteve Kargl #define	FFLOORL80(x, j0, ix, lx) do {			\
7122d3b0a68SSteve Kargl 	j0 = ix - 0x3fff + 1;				\
7132d3b0a68SSteve Kargl 	if ((j0) < 32) {				\
7142d3b0a68SSteve Kargl 		(lx) = ((lx) >> 32) << 32;		\
7152d3b0a68SSteve Kargl 		(lx) &= ~((((lx) << 32)-1) >> (j0));	\
7162d3b0a68SSteve Kargl 	} else {					\
7172d3b0a68SSteve Kargl 		uint64_t _m;				\
7182d3b0a68SSteve Kargl 		_m = (uint64_t)-1 >> (j0);		\
7192d3b0a68SSteve Kargl 		if ((lx) & _m) (lx) &= ~_m;		\
7202d3b0a68SSteve Kargl 	}						\
7212d3b0a68SSteve Kargl 	INSERT_LDBL80_WORDS((x), (ix), (lx));		\
7222d3b0a68SSteve Kargl } while (0)
7232d3b0a68SSteve Kargl 
7242d3b0a68SSteve Kargl #define FFLOORL128(x, ai, ar) do {			\
7252d3b0a68SSteve Kargl 	union IEEEl2bits u;				\
7262d3b0a68SSteve Kargl 	uint64_t m;					\
7272d3b0a68SSteve Kargl 	int e;						\
7282d3b0a68SSteve Kargl 	u.e = (x);					\
7292d3b0a68SSteve Kargl 	e = u.bits.exp - 16383;				\
7302d3b0a68SSteve Kargl 	if (e < 48) {					\
7312d3b0a68SSteve Kargl 		m = ((1llu << 49) - 1) >> (e + 1);	\
7322d3b0a68SSteve Kargl 		u.bits.manh &= ~m;			\
7332d3b0a68SSteve Kargl 		u.bits.manl = 0;			\
7342d3b0a68SSteve Kargl 	} else {					\
7352d3b0a68SSteve Kargl 		m = (uint64_t)-1 >> (e - 48);		\
7362d3b0a68SSteve Kargl 		u.bits.manl &= ~m;			\
7372d3b0a68SSteve Kargl 	}						\
7382d3b0a68SSteve Kargl 	(ai) = u.e;					\
7392d3b0a68SSteve Kargl 	(ar) = (x) - (ai);				\
7402d3b0a68SSteve Kargl } while (0)
7412d3b0a68SSteve Kargl 
74225a4d6bfSDavid Schultz #ifdef DEBUG
74325a4d6bfSDavid Schultz #if defined(__amd64__) || defined(__i386__)
74425a4d6bfSDavid Schultz #define	breakpoint()	asm("int $3")
74525a4d6bfSDavid Schultz #else
74625a4d6bfSDavid Schultz #include <signal.h>
74725a4d6bfSDavid Schultz 
74825a4d6bfSDavid Schultz #define	breakpoint()	raise(SIGTRAP)
74925a4d6bfSDavid Schultz #endif
75025a4d6bfSDavid Schultz #endif
75125a4d6bfSDavid Schultz 
75225a4d6bfSDavid Schultz #ifdef STRUCT_RETURN
75325a4d6bfSDavid Schultz #define	RETURNSP(rp) do {		\
75425a4d6bfSDavid Schultz 	if (!(rp)->lo_set)		\
755c66a499eSSteve Kargl 		RETURNF((rp)->hi);	\
756c66a499eSSteve Kargl 	RETURNF((rp)->hi + (rp)->lo);	\
75725a4d6bfSDavid Schultz } while (0)
75825a4d6bfSDavid Schultz #define	RETURNSPI(rp) do {		\
75925a4d6bfSDavid Schultz 	if (!(rp)->lo_set)		\
760c66a499eSSteve Kargl 		RETURNI((rp)->hi);	\
761c66a499eSSteve Kargl 	RETURNI((rp)->hi + (rp)->lo);	\
76225a4d6bfSDavid Schultz } while (0)
76325a4d6bfSDavid Schultz #endif
764c66a499eSSteve Kargl 
76525a4d6bfSDavid Schultz #define	SUM2P(x, y) ({			\
76625a4d6bfSDavid Schultz 	const __typeof (x) __x = (x);	\
76725a4d6bfSDavid Schultz 	const __typeof (y) __y = (y);	\
76825a4d6bfSDavid Schultz 	__x + __y;			\
76925a4d6bfSDavid Schultz })
77025a4d6bfSDavid Schultz 
7713a8617a8SJordan K. Hubbard /* fdlibm kernel function */
772079299f7SDavid Schultz int	__kernel_rem_pio2(double*,double*,int,int,int);
773079299f7SDavid Schultz 
774079299f7SDavid Schultz /* double precision kernel functions */
7752b795b29SDimitry Andric #ifndef INLINE_REM_PIO2
776e02846ceSDavid Schultz int	__ieee754_rem_pio2(double,double*);
7772b795b29SDimitry Andric #endif
77869160b1eSDavid E. O'Brien double	__kernel_sin(double,double,int);
77969160b1eSDavid E. O'Brien double	__kernel_cos(double,double);
78069160b1eSDavid E. O'Brien double	__kernel_tan(double,double,int);
78112188b77SDavid Schultz double	__ldexp_exp(double,int);
78212188b77SDavid Schultz #ifdef _COMPLEX_H
78312188b77SDavid Schultz double complex __ldexp_cexp(double complex,int);
78412188b77SDavid Schultz #endif
7853a8617a8SJordan K. Hubbard 
786079299f7SDavid Schultz /* float precision kernel functions */
7872b795b29SDimitry Andric #ifndef INLINE_REM_PIO2F
78870d818a2SBruce Evans int	__ieee754_rem_pio2f(float,double*);
789b492f289SEd Schouten #endif
7902b795b29SDimitry Andric #ifndef INLINE_KERNEL_SINDF
79159aad933SBruce Evans float	__kernel_sindf(double);
792b492f289SEd Schouten #endif
7932b795b29SDimitry Andric #ifndef INLINE_KERNEL_COSDF
79459aad933SBruce Evans float	__kernel_cosdf(double);
795b492f289SEd Schouten #endif
7962b795b29SDimitry Andric #ifndef INLINE_KERNEL_TANDF
79794a5f9beSBruce Evans float	__kernel_tandf(double,int);
7982b795b29SDimitry Andric #endif
79912188b77SDavid Schultz float	__ldexp_expf(float,int);
80012188b77SDavid Schultz #ifdef _COMPLEX_H
80112188b77SDavid Schultz float complex __ldexp_cexpf(float complex,int);
80212188b77SDavid Schultz #endif
803079299f7SDavid Schultz 
804079299f7SDavid Schultz /* long double precision kernel functions */
805079299f7SDavid Schultz long double __kernel_sinl(long double, long double, int);
806079299f7SDavid Schultz long double __kernel_cosl(long double, long double);
807079299f7SDavid Schultz long double __kernel_tanl(long double, long double, int);
8083a8617a8SJordan K. Hubbard 
809ef1ee63eSAlexey Zelkin #endif /* !_MATH_PRIVATE_H_ */
810