xref: /freebsd/lib/libc/softfloat/bits64/softfloat.c (revision 559a218c9b257775fb249b67945fe4a05b7a6b9f)
1c36abe0dSDavid Schultz /* $NetBSD: softfloat.c,v 1.8 2011/07/10 04:52:23 matt Exp $ */
215144b0fSOlivier Houchard 
315144b0fSOlivier Houchard /*
415144b0fSOlivier Houchard  * This version hacked for use with gcc -msoft-float by bjh21.
515144b0fSOlivier Houchard  * (Mostly a case of #ifdefing out things GCC doesn't need or provides
615144b0fSOlivier Houchard  *  itself).
715144b0fSOlivier Houchard  */
815144b0fSOlivier Houchard 
915144b0fSOlivier Houchard /*
1015144b0fSOlivier Houchard  * Things you may want to define:
1115144b0fSOlivier Houchard  *
1215144b0fSOlivier Houchard  * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
1315144b0fSOlivier Houchard  *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
1415144b0fSOlivier Houchard  *   properly renamed.
1515144b0fSOlivier Houchard  */
1615144b0fSOlivier Houchard 
1715144b0fSOlivier Houchard /*
1815144b0fSOlivier Houchard ===============================================================================
1915144b0fSOlivier Houchard 
2015144b0fSOlivier Houchard This C source file is part of the SoftFloat IEC/IEEE Floating-point
2115144b0fSOlivier Houchard Arithmetic Package, Release 2a.
2215144b0fSOlivier Houchard 
2315144b0fSOlivier Houchard Written by John R. Hauser.  This work was made possible in part by the
2415144b0fSOlivier Houchard International Computer Science Institute, located at Suite 600, 1947 Center
2515144b0fSOlivier Houchard Street, Berkeley, California 94704.  Funding was partially provided by the
2615144b0fSOlivier Houchard National Science Foundation under grant MIP-9311980.  The original version
2715144b0fSOlivier Houchard of this code was written as part of a project to build a fixed-point vector
2815144b0fSOlivier Houchard processor in collaboration with the University of California at Berkeley,
2915144b0fSOlivier Houchard overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
3015144b0fSOlivier Houchard is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
3115144b0fSOlivier Houchard arithmetic/SoftFloat.html'.
3215144b0fSOlivier Houchard 
3315144b0fSOlivier Houchard THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
3415144b0fSOlivier Houchard has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
3515144b0fSOlivier Houchard TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
3615144b0fSOlivier Houchard PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
3715144b0fSOlivier Houchard AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
3815144b0fSOlivier Houchard 
3915144b0fSOlivier Houchard Derivative works are acceptable, even for commercial purposes, so long as
4015144b0fSOlivier Houchard (1) they include prominent notice that the work is derivative, and (2) they
4115144b0fSOlivier Houchard include prominent notice akin to these four paragraphs for those parts of
4215144b0fSOlivier Houchard this code that are retained.
4315144b0fSOlivier Houchard 
4415144b0fSOlivier Houchard ===============================================================================
4515144b0fSOlivier Houchard */
4615144b0fSOlivier Houchard 
4715144b0fSOlivier Houchard #ifdef SOFTFLOAT_FOR_GCC
4815144b0fSOlivier Houchard #include "softfloat-for-gcc.h"
4915144b0fSOlivier Houchard #endif
5015144b0fSOlivier Houchard 
5115144b0fSOlivier Houchard #include "milieu.h"
5215144b0fSOlivier Houchard #include "softfloat.h"
5315144b0fSOlivier Houchard 
5415144b0fSOlivier Houchard /*
5515144b0fSOlivier Houchard  * Conversions between floats as stored in memory and floats as
5615144b0fSOlivier Houchard  * SoftFloat uses them
5715144b0fSOlivier Houchard  */
5815144b0fSOlivier Houchard #ifndef FLOAT64_DEMANGLE
5915144b0fSOlivier Houchard #define FLOAT64_DEMANGLE(a)	(a)
6015144b0fSOlivier Houchard #endif
6115144b0fSOlivier Houchard #ifndef FLOAT64_MANGLE
6215144b0fSOlivier Houchard #define FLOAT64_MANGLE(a)	(a)
6315144b0fSOlivier Houchard #endif
6415144b0fSOlivier Houchard 
6515144b0fSOlivier Houchard /*
6615144b0fSOlivier Houchard -------------------------------------------------------------------------------
6715144b0fSOlivier Houchard Floating-point rounding mode, extended double-precision rounding precision,
6815144b0fSOlivier Houchard and exception flags.
6915144b0fSOlivier Houchard -------------------------------------------------------------------------------
7015144b0fSOlivier Houchard */
71b1d04644SDavid Schultz int float_rounding_mode = float_round_nearest_even;
72b1d04644SDavid Schultz int float_exception_flags = 0;
7315144b0fSOlivier Houchard #ifdef FLOATX80
7415144b0fSOlivier Houchard int8 floatx80_rounding_precision = 80;
7515144b0fSOlivier Houchard #endif
7615144b0fSOlivier Houchard 
7715144b0fSOlivier Houchard /*
7815144b0fSOlivier Houchard -------------------------------------------------------------------------------
7915144b0fSOlivier Houchard Primitive arithmetic functions, including multi-word arithmetic, and
8015144b0fSOlivier Houchard division and square root approximations.  (Can be specialized to target if
8115144b0fSOlivier Houchard desired.)
8215144b0fSOlivier Houchard -------------------------------------------------------------------------------
8315144b0fSOlivier Houchard */
8415144b0fSOlivier Houchard #include "softfloat-macros"
8515144b0fSOlivier Houchard 
8615144b0fSOlivier Houchard /*
8715144b0fSOlivier Houchard -------------------------------------------------------------------------------
8815144b0fSOlivier Houchard Functions and definitions to determine:  (1) whether tininess for underflow
8915144b0fSOlivier Houchard is detected before or after rounding by default, (2) what (if anything)
9015144b0fSOlivier Houchard happens when exceptions are raised, (3) how signaling NaNs are distinguished
9115144b0fSOlivier Houchard from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
9215144b0fSOlivier Houchard are propagated from function inputs to output.  These details are target-
9315144b0fSOlivier Houchard specific.
9415144b0fSOlivier Houchard -------------------------------------------------------------------------------
9515144b0fSOlivier Houchard */
9615144b0fSOlivier Houchard #include "softfloat-specialize"
9715144b0fSOlivier Houchard 
9815144b0fSOlivier Houchard #if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
9915144b0fSOlivier Houchard /*
10015144b0fSOlivier Houchard -------------------------------------------------------------------------------
10115144b0fSOlivier Houchard Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
10215144b0fSOlivier Houchard and 7, and returns the properly rounded 32-bit integer corresponding to the
10315144b0fSOlivier Houchard input.  If `zSign' is 1, the input is negated before being converted to an
10415144b0fSOlivier Houchard integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
10515144b0fSOlivier Houchard is simply rounded to an integer, with the inexact exception raised if the
10615144b0fSOlivier Houchard input cannot be represented exactly as an integer.  However, if the fixed-
10715144b0fSOlivier Houchard point input is too large, the invalid exception is raised and the largest
10815144b0fSOlivier Houchard positive or negative integer is returned.
10915144b0fSOlivier Houchard -------------------------------------------------------------------------------
11015144b0fSOlivier Houchard */
roundAndPackInt32(flag zSign,bits64 absZ)11115144b0fSOlivier Houchard static int32 roundAndPackInt32( flag zSign, bits64 absZ )
11215144b0fSOlivier Houchard {
11315144b0fSOlivier Houchard     int8 roundingMode;
11415144b0fSOlivier Houchard     flag roundNearestEven;
11515144b0fSOlivier Houchard     int8 roundIncrement, roundBits;
11615144b0fSOlivier Houchard     int32 z;
11715144b0fSOlivier Houchard 
11815144b0fSOlivier Houchard     roundingMode = float_rounding_mode;
11915144b0fSOlivier Houchard     roundNearestEven = ( roundingMode == float_round_nearest_even );
12015144b0fSOlivier Houchard     roundIncrement = 0x40;
12115144b0fSOlivier Houchard     if ( ! roundNearestEven ) {
12215144b0fSOlivier Houchard         if ( roundingMode == float_round_to_zero ) {
12315144b0fSOlivier Houchard             roundIncrement = 0;
12415144b0fSOlivier Houchard         }
12515144b0fSOlivier Houchard         else {
12615144b0fSOlivier Houchard             roundIncrement = 0x7F;
12715144b0fSOlivier Houchard             if ( zSign ) {
12815144b0fSOlivier Houchard                 if ( roundingMode == float_round_up ) roundIncrement = 0;
12915144b0fSOlivier Houchard             }
13015144b0fSOlivier Houchard             else {
13115144b0fSOlivier Houchard                 if ( roundingMode == float_round_down ) roundIncrement = 0;
13215144b0fSOlivier Houchard             }
13315144b0fSOlivier Houchard         }
13415144b0fSOlivier Houchard     }
13515144b0fSOlivier Houchard     roundBits = absZ & 0x7F;
13615144b0fSOlivier Houchard     absZ = ( absZ + roundIncrement )>>7;
13715144b0fSOlivier Houchard     absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
13815144b0fSOlivier Houchard     z = absZ;
13915144b0fSOlivier Houchard     if ( zSign ) z = - z;
14015144b0fSOlivier Houchard     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
14115144b0fSOlivier Houchard         float_raise( float_flag_invalid );
14215144b0fSOlivier Houchard         return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
14315144b0fSOlivier Houchard     }
14415144b0fSOlivier Houchard     if ( roundBits ) float_exception_flags |= float_flag_inexact;
14515144b0fSOlivier Houchard     return z;
14615144b0fSOlivier Houchard 
14715144b0fSOlivier Houchard }
14815144b0fSOlivier Houchard 
14915144b0fSOlivier Houchard /*
15015144b0fSOlivier Houchard -------------------------------------------------------------------------------
15115144b0fSOlivier Houchard Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
15215144b0fSOlivier Houchard `absZ1', with binary point between bits 63 and 64 (between the input words),
15315144b0fSOlivier Houchard and returns the properly rounded 64-bit integer corresponding to the input.
15415144b0fSOlivier Houchard If `zSign' is 1, the input is negated before being converted to an integer.
15515144b0fSOlivier Houchard Ordinarily, the fixed-point input is simply rounded to an integer, with
15615144b0fSOlivier Houchard the inexact exception raised if the input cannot be represented exactly as
15715144b0fSOlivier Houchard an integer.  However, if the fixed-point input is too large, the invalid
15815144b0fSOlivier Houchard exception is raised and the largest positive or negative integer is
15915144b0fSOlivier Houchard returned.
16015144b0fSOlivier Houchard -------------------------------------------------------------------------------
16115144b0fSOlivier Houchard */
roundAndPackInt64(flag zSign,bits64 absZ0,bits64 absZ1)16215144b0fSOlivier Houchard static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
16315144b0fSOlivier Houchard {
16415144b0fSOlivier Houchard     int8 roundingMode;
16515144b0fSOlivier Houchard     flag roundNearestEven, increment;
16615144b0fSOlivier Houchard     int64 z;
16715144b0fSOlivier Houchard 
16815144b0fSOlivier Houchard     roundingMode = float_rounding_mode;
16915144b0fSOlivier Houchard     roundNearestEven = ( roundingMode == float_round_nearest_even );
17015144b0fSOlivier Houchard     increment = ( (sbits64) absZ1 < 0 );
17115144b0fSOlivier Houchard     if ( ! roundNearestEven ) {
17215144b0fSOlivier Houchard         if ( roundingMode == float_round_to_zero ) {
17315144b0fSOlivier Houchard             increment = 0;
17415144b0fSOlivier Houchard         }
17515144b0fSOlivier Houchard         else {
17615144b0fSOlivier Houchard             if ( zSign ) {
17715144b0fSOlivier Houchard                 increment = ( roundingMode == float_round_down ) && absZ1;
17815144b0fSOlivier Houchard             }
17915144b0fSOlivier Houchard             else {
18015144b0fSOlivier Houchard                 increment = ( roundingMode == float_round_up ) && absZ1;
18115144b0fSOlivier Houchard             }
18215144b0fSOlivier Houchard         }
18315144b0fSOlivier Houchard     }
18415144b0fSOlivier Houchard     if ( increment ) {
18515144b0fSOlivier Houchard         ++absZ0;
18615144b0fSOlivier Houchard         if ( absZ0 == 0 ) goto overflow;
18715144b0fSOlivier Houchard         absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
18815144b0fSOlivier Houchard     }
18915144b0fSOlivier Houchard     z = absZ0;
19015144b0fSOlivier Houchard     if ( zSign ) z = - z;
19115144b0fSOlivier Houchard     if ( z && ( ( z < 0 ) ^ zSign ) ) {
19215144b0fSOlivier Houchard  overflow:
19315144b0fSOlivier Houchard         float_raise( float_flag_invalid );
19415144b0fSOlivier Houchard         return
19515144b0fSOlivier Houchard               zSign ? (sbits64) LIT64( 0x8000000000000000 )
19615144b0fSOlivier Houchard             : LIT64( 0x7FFFFFFFFFFFFFFF );
19715144b0fSOlivier Houchard     }
19815144b0fSOlivier Houchard     if ( absZ1 ) float_exception_flags |= float_flag_inexact;
19915144b0fSOlivier Houchard     return z;
20015144b0fSOlivier Houchard 
20115144b0fSOlivier Houchard }
20215144b0fSOlivier Houchard #endif
20315144b0fSOlivier Houchard 
20415144b0fSOlivier Houchard /*
20515144b0fSOlivier Houchard -------------------------------------------------------------------------------
20615144b0fSOlivier Houchard Returns the fraction bits of the single-precision floating-point value `a'.
20715144b0fSOlivier Houchard -------------------------------------------------------------------------------
20815144b0fSOlivier Houchard */
extractFloat32Frac(float32 a)20915144b0fSOlivier Houchard INLINE bits32 extractFloat32Frac( float32 a )
21015144b0fSOlivier Houchard {
21115144b0fSOlivier Houchard 
21215144b0fSOlivier Houchard     return a & 0x007FFFFF;
21315144b0fSOlivier Houchard 
21415144b0fSOlivier Houchard }
21515144b0fSOlivier Houchard 
21615144b0fSOlivier Houchard /*
21715144b0fSOlivier Houchard -------------------------------------------------------------------------------
21815144b0fSOlivier Houchard Returns the exponent bits of the single-precision floating-point value `a'.
21915144b0fSOlivier Houchard -------------------------------------------------------------------------------
22015144b0fSOlivier Houchard */
extractFloat32Exp(float32 a)22115144b0fSOlivier Houchard INLINE int16 extractFloat32Exp( float32 a )
22215144b0fSOlivier Houchard {
22315144b0fSOlivier Houchard 
22415144b0fSOlivier Houchard     return ( a>>23 ) & 0xFF;
22515144b0fSOlivier Houchard 
22615144b0fSOlivier Houchard }
22715144b0fSOlivier Houchard 
22815144b0fSOlivier Houchard /*
22915144b0fSOlivier Houchard -------------------------------------------------------------------------------
23015144b0fSOlivier Houchard Returns the sign bit of the single-precision floating-point value `a'.
23115144b0fSOlivier Houchard -------------------------------------------------------------------------------
23215144b0fSOlivier Houchard */
extractFloat32Sign(float32 a)23315144b0fSOlivier Houchard INLINE flag extractFloat32Sign( float32 a )
23415144b0fSOlivier Houchard {
23515144b0fSOlivier Houchard 
23615144b0fSOlivier Houchard     return a>>31;
23715144b0fSOlivier Houchard 
23815144b0fSOlivier Houchard }
23915144b0fSOlivier Houchard 
24015144b0fSOlivier Houchard /*
24115144b0fSOlivier Houchard -------------------------------------------------------------------------------
24215144b0fSOlivier Houchard Normalizes the subnormal single-precision floating-point value represented
24315144b0fSOlivier Houchard by the denormalized significand `aSig'.  The normalized exponent and
24415144b0fSOlivier Houchard significand are stored at the locations pointed to by `zExpPtr' and
24515144b0fSOlivier Houchard `zSigPtr', respectively.
24615144b0fSOlivier Houchard -------------------------------------------------------------------------------
24715144b0fSOlivier Houchard */
24815144b0fSOlivier Houchard static void
normalizeFloat32Subnormal(bits32 aSig,int16 * zExpPtr,bits32 * zSigPtr)24915144b0fSOlivier Houchard  normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
25015144b0fSOlivier Houchard {
25115144b0fSOlivier Houchard     int8 shiftCount;
25215144b0fSOlivier Houchard 
25315144b0fSOlivier Houchard     shiftCount = countLeadingZeros32( aSig ) - 8;
25415144b0fSOlivier Houchard     *zSigPtr = aSig<<shiftCount;
25515144b0fSOlivier Houchard     *zExpPtr = 1 - shiftCount;
25615144b0fSOlivier Houchard 
25715144b0fSOlivier Houchard }
25815144b0fSOlivier Houchard 
25915144b0fSOlivier Houchard /*
26015144b0fSOlivier Houchard -------------------------------------------------------------------------------
26115144b0fSOlivier Houchard Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
26215144b0fSOlivier Houchard single-precision floating-point value, returning the result.  After being
26315144b0fSOlivier Houchard shifted into the proper positions, the three fields are simply added
26415144b0fSOlivier Houchard together to form the result.  This means that any integer portion of `zSig'
26515144b0fSOlivier Houchard will be added into the exponent.  Since a properly normalized significand
26615144b0fSOlivier Houchard will have an integer portion equal to 1, the `zExp' input should be 1 less
26715144b0fSOlivier Houchard than the desired result exponent whenever `zSig' is a complete, normalized
26815144b0fSOlivier Houchard significand.
26915144b0fSOlivier Houchard -------------------------------------------------------------------------------
27015144b0fSOlivier Houchard */
packFloat32(flag zSign,int16 zExp,bits32 zSig)27115144b0fSOlivier Houchard INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
27215144b0fSOlivier Houchard {
27315144b0fSOlivier Houchard 
27415144b0fSOlivier Houchard     return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
27515144b0fSOlivier Houchard 
27615144b0fSOlivier Houchard }
27715144b0fSOlivier Houchard 
27815144b0fSOlivier Houchard /*
27915144b0fSOlivier Houchard -------------------------------------------------------------------------------
28015144b0fSOlivier Houchard Takes an abstract floating-point value having sign `zSign', exponent `zExp',
28115144b0fSOlivier Houchard and significand `zSig', and returns the proper single-precision floating-
28215144b0fSOlivier Houchard point value corresponding to the abstract input.  Ordinarily, the abstract
28315144b0fSOlivier Houchard value is simply rounded and packed into the single-precision format, with
28415144b0fSOlivier Houchard the inexact exception raised if the abstract input cannot be represented
28515144b0fSOlivier Houchard exactly.  However, if the abstract value is too large, the overflow and
28615144b0fSOlivier Houchard inexact exceptions are raised and an infinity or maximal finite value is
28715144b0fSOlivier Houchard returned.  If the abstract value is too small, the input value is rounded to
28815144b0fSOlivier Houchard a subnormal number, and the underflow and inexact exceptions are raised if
28915144b0fSOlivier Houchard the abstract input cannot be represented exactly as a subnormal single-
29015144b0fSOlivier Houchard precision floating-point number.
29115144b0fSOlivier Houchard     The input significand `zSig' has its binary point between bits 30
29215144b0fSOlivier Houchard and 29, which is 7 bits to the left of the usual location.  This shifted
29315144b0fSOlivier Houchard significand must be normalized or smaller.  If `zSig' is not normalized,
29415144b0fSOlivier Houchard `zExp' must be 0; in that case, the result returned is a subnormal number,
29515144b0fSOlivier Houchard and it must not require rounding.  In the usual case that `zSig' is
29615144b0fSOlivier Houchard normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
29715144b0fSOlivier Houchard The handling of underflow and overflow follows the IEC/IEEE Standard for
29815144b0fSOlivier Houchard Binary Floating-Point Arithmetic.
29915144b0fSOlivier Houchard -------------------------------------------------------------------------------
30015144b0fSOlivier Houchard */
roundAndPackFloat32(flag zSign,int16 zExp,bits32 zSig)30115144b0fSOlivier Houchard static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
30215144b0fSOlivier Houchard {
30315144b0fSOlivier Houchard     int8 roundingMode;
30415144b0fSOlivier Houchard     flag roundNearestEven;
30515144b0fSOlivier Houchard     int8 roundIncrement, roundBits;
30615144b0fSOlivier Houchard     flag isTiny;
30715144b0fSOlivier Houchard 
30815144b0fSOlivier Houchard     roundingMode = float_rounding_mode;
30915144b0fSOlivier Houchard     roundNearestEven = ( roundingMode == float_round_nearest_even );
31015144b0fSOlivier Houchard     roundIncrement = 0x40;
31115144b0fSOlivier Houchard     if ( ! roundNearestEven ) {
31215144b0fSOlivier Houchard         if ( roundingMode == float_round_to_zero ) {
31315144b0fSOlivier Houchard             roundIncrement = 0;
31415144b0fSOlivier Houchard         }
31515144b0fSOlivier Houchard         else {
31615144b0fSOlivier Houchard             roundIncrement = 0x7F;
31715144b0fSOlivier Houchard             if ( zSign ) {
31815144b0fSOlivier Houchard                 if ( roundingMode == float_round_up ) roundIncrement = 0;
31915144b0fSOlivier Houchard             }
32015144b0fSOlivier Houchard             else {
32115144b0fSOlivier Houchard                 if ( roundingMode == float_round_down ) roundIncrement = 0;
32215144b0fSOlivier Houchard             }
32315144b0fSOlivier Houchard         }
32415144b0fSOlivier Houchard     }
32515144b0fSOlivier Houchard     roundBits = zSig & 0x7F;
32615144b0fSOlivier Houchard     if ( 0xFD <= (bits16) zExp ) {
32715144b0fSOlivier Houchard         if (    ( 0xFD < zExp )
32815144b0fSOlivier Houchard              || (    ( zExp == 0xFD )
32915144b0fSOlivier Houchard                   && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
33015144b0fSOlivier Houchard            ) {
33115144b0fSOlivier Houchard             float_raise( float_flag_overflow | float_flag_inexact );
33215144b0fSOlivier Houchard             return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
33315144b0fSOlivier Houchard         }
33415144b0fSOlivier Houchard         if ( zExp < 0 ) {
33515144b0fSOlivier Houchard             isTiny =
33615144b0fSOlivier Houchard                    ( float_detect_tininess == float_tininess_before_rounding )
33715144b0fSOlivier Houchard                 || ( zExp < -1 )
33815144b0fSOlivier Houchard                 || ( zSig + roundIncrement < 0x80000000 );
33915144b0fSOlivier Houchard             shift32RightJamming( zSig, - zExp, &zSig );
34015144b0fSOlivier Houchard             zExp = 0;
34115144b0fSOlivier Houchard             roundBits = zSig & 0x7F;
34215144b0fSOlivier Houchard             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
34315144b0fSOlivier Houchard         }
34415144b0fSOlivier Houchard     }
34515144b0fSOlivier Houchard     if ( roundBits ) float_exception_flags |= float_flag_inexact;
34615144b0fSOlivier Houchard     zSig = ( zSig + roundIncrement )>>7;
34715144b0fSOlivier Houchard     zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
34815144b0fSOlivier Houchard     if ( zSig == 0 ) zExp = 0;
34915144b0fSOlivier Houchard     return packFloat32( zSign, zExp, zSig );
35015144b0fSOlivier Houchard 
35115144b0fSOlivier Houchard }
35215144b0fSOlivier Houchard 
35315144b0fSOlivier Houchard /*
35415144b0fSOlivier Houchard -------------------------------------------------------------------------------
35515144b0fSOlivier Houchard Takes an abstract floating-point value having sign `zSign', exponent `zExp',
35615144b0fSOlivier Houchard and significand `zSig', and returns the proper single-precision floating-
35715144b0fSOlivier Houchard point value corresponding to the abstract input.  This routine is just like
35815144b0fSOlivier Houchard `roundAndPackFloat32' except that `zSig' does not have to be normalized.
35915144b0fSOlivier Houchard Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
36015144b0fSOlivier Houchard floating-point exponent.
36115144b0fSOlivier Houchard -------------------------------------------------------------------------------
36215144b0fSOlivier Houchard */
36315144b0fSOlivier Houchard static float32
normalizeRoundAndPackFloat32(flag zSign,int16 zExp,bits32 zSig)36415144b0fSOlivier Houchard  normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
36515144b0fSOlivier Houchard {
36615144b0fSOlivier Houchard     int8 shiftCount;
36715144b0fSOlivier Houchard 
36815144b0fSOlivier Houchard     shiftCount = countLeadingZeros32( zSig ) - 1;
36915144b0fSOlivier Houchard     return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
37015144b0fSOlivier Houchard 
37115144b0fSOlivier Houchard }
37215144b0fSOlivier Houchard 
37315144b0fSOlivier Houchard /*
37415144b0fSOlivier Houchard -------------------------------------------------------------------------------
37515144b0fSOlivier Houchard Returns the fraction bits of the double-precision floating-point value `a'.
37615144b0fSOlivier Houchard -------------------------------------------------------------------------------
37715144b0fSOlivier Houchard */
extractFloat64Frac(float64 a)37815144b0fSOlivier Houchard INLINE bits64 extractFloat64Frac( float64 a )
37915144b0fSOlivier Houchard {
38015144b0fSOlivier Houchard 
38115144b0fSOlivier Houchard     return FLOAT64_DEMANGLE(a) & LIT64( 0x000FFFFFFFFFFFFF );
38215144b0fSOlivier Houchard 
38315144b0fSOlivier Houchard }
38415144b0fSOlivier Houchard 
38515144b0fSOlivier Houchard /*
38615144b0fSOlivier Houchard -------------------------------------------------------------------------------
38715144b0fSOlivier Houchard Returns the exponent bits of the double-precision floating-point value `a'.
38815144b0fSOlivier Houchard -------------------------------------------------------------------------------
38915144b0fSOlivier Houchard */
extractFloat64Exp(float64 a)39015144b0fSOlivier Houchard INLINE int16 extractFloat64Exp( float64 a )
39115144b0fSOlivier Houchard {
39215144b0fSOlivier Houchard 
39315144b0fSOlivier Houchard     return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF;
39415144b0fSOlivier Houchard 
39515144b0fSOlivier Houchard }
39615144b0fSOlivier Houchard 
39715144b0fSOlivier Houchard /*
39815144b0fSOlivier Houchard -------------------------------------------------------------------------------
39915144b0fSOlivier Houchard Returns the sign bit of the double-precision floating-point value `a'.
40015144b0fSOlivier Houchard -------------------------------------------------------------------------------
40115144b0fSOlivier Houchard */
extractFloat64Sign(float64 a)40215144b0fSOlivier Houchard INLINE flag extractFloat64Sign( float64 a )
40315144b0fSOlivier Houchard {
40415144b0fSOlivier Houchard 
40515144b0fSOlivier Houchard     return FLOAT64_DEMANGLE(a)>>63;
40615144b0fSOlivier Houchard 
40715144b0fSOlivier Houchard }
40815144b0fSOlivier Houchard 
40915144b0fSOlivier Houchard /*
41015144b0fSOlivier Houchard -------------------------------------------------------------------------------
41115144b0fSOlivier Houchard Normalizes the subnormal double-precision floating-point value represented
41215144b0fSOlivier Houchard by the denormalized significand `aSig'.  The normalized exponent and
41315144b0fSOlivier Houchard significand are stored at the locations pointed to by `zExpPtr' and
41415144b0fSOlivier Houchard `zSigPtr', respectively.
41515144b0fSOlivier Houchard -------------------------------------------------------------------------------
41615144b0fSOlivier Houchard */
41715144b0fSOlivier Houchard static void
normalizeFloat64Subnormal(bits64 aSig,int16 * zExpPtr,bits64 * zSigPtr)41815144b0fSOlivier Houchard  normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
41915144b0fSOlivier Houchard {
42015144b0fSOlivier Houchard     int8 shiftCount;
42115144b0fSOlivier Houchard 
42215144b0fSOlivier Houchard     shiftCount = countLeadingZeros64( aSig ) - 11;
42315144b0fSOlivier Houchard     *zSigPtr = aSig<<shiftCount;
42415144b0fSOlivier Houchard     *zExpPtr = 1 - shiftCount;
42515144b0fSOlivier Houchard 
42615144b0fSOlivier Houchard }
42715144b0fSOlivier Houchard 
42815144b0fSOlivier Houchard /*
42915144b0fSOlivier Houchard -------------------------------------------------------------------------------
43015144b0fSOlivier Houchard Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
43115144b0fSOlivier Houchard double-precision floating-point value, returning the result.  After being
43215144b0fSOlivier Houchard shifted into the proper positions, the three fields are simply added
43315144b0fSOlivier Houchard together to form the result.  This means that any integer portion of `zSig'
43415144b0fSOlivier Houchard will be added into the exponent.  Since a properly normalized significand
43515144b0fSOlivier Houchard will have an integer portion equal to 1, the `zExp' input should be 1 less
43615144b0fSOlivier Houchard than the desired result exponent whenever `zSig' is a complete, normalized
43715144b0fSOlivier Houchard significand.
43815144b0fSOlivier Houchard -------------------------------------------------------------------------------
43915144b0fSOlivier Houchard */
packFloat64(flag zSign,int16 zExp,bits64 zSig)44015144b0fSOlivier Houchard INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
44115144b0fSOlivier Houchard {
44215144b0fSOlivier Houchard 
44315144b0fSOlivier Houchard     return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
44415144b0fSOlivier Houchard 			   ( ( (bits64) zExp )<<52 ) + zSig );
44515144b0fSOlivier Houchard 
44615144b0fSOlivier Houchard }
44715144b0fSOlivier Houchard 
44815144b0fSOlivier Houchard /*
44915144b0fSOlivier Houchard -------------------------------------------------------------------------------
45015144b0fSOlivier Houchard Takes an abstract floating-point value having sign `zSign', exponent `zExp',
45115144b0fSOlivier Houchard and significand `zSig', and returns the proper double-precision floating-
45215144b0fSOlivier Houchard point value corresponding to the abstract input.  Ordinarily, the abstract
45315144b0fSOlivier Houchard value is simply rounded and packed into the double-precision format, with
45415144b0fSOlivier Houchard the inexact exception raised if the abstract input cannot be represented
45515144b0fSOlivier Houchard exactly.  However, if the abstract value is too large, the overflow and
45615144b0fSOlivier Houchard inexact exceptions are raised and an infinity or maximal finite value is
45715144b0fSOlivier Houchard returned.  If the abstract value is too small, the input value is rounded to
45815144b0fSOlivier Houchard a subnormal number, and the underflow and inexact exceptions are raised if
45915144b0fSOlivier Houchard the abstract input cannot be represented exactly as a subnormal double-
46015144b0fSOlivier Houchard precision floating-point number.
46115144b0fSOlivier Houchard     The input significand `zSig' has its binary point between bits 62
46215144b0fSOlivier Houchard and 61, which is 10 bits to the left of the usual location.  This shifted
46315144b0fSOlivier Houchard significand must be normalized or smaller.  If `zSig' is not normalized,
46415144b0fSOlivier Houchard `zExp' must be 0; in that case, the result returned is a subnormal number,
46515144b0fSOlivier Houchard and it must not require rounding.  In the usual case that `zSig' is
46615144b0fSOlivier Houchard normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
46715144b0fSOlivier Houchard The handling of underflow and overflow follows the IEC/IEEE Standard for
46815144b0fSOlivier Houchard Binary Floating-Point Arithmetic.
46915144b0fSOlivier Houchard -------------------------------------------------------------------------------
47015144b0fSOlivier Houchard */
roundAndPackFloat64(flag zSign,int16 zExp,bits64 zSig)47115144b0fSOlivier Houchard static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
47215144b0fSOlivier Houchard {
47315144b0fSOlivier Houchard     int8 roundingMode;
47415144b0fSOlivier Houchard     flag roundNearestEven;
47515144b0fSOlivier Houchard     int16 roundIncrement, roundBits;
47615144b0fSOlivier Houchard     flag isTiny;
47715144b0fSOlivier Houchard 
47815144b0fSOlivier Houchard     roundingMode = float_rounding_mode;
47915144b0fSOlivier Houchard     roundNearestEven = ( roundingMode == float_round_nearest_even );
48015144b0fSOlivier Houchard     roundIncrement = 0x200;
48115144b0fSOlivier Houchard     if ( ! roundNearestEven ) {
48215144b0fSOlivier Houchard         if ( roundingMode == float_round_to_zero ) {
48315144b0fSOlivier Houchard             roundIncrement = 0;
48415144b0fSOlivier Houchard         }
48515144b0fSOlivier Houchard         else {
48615144b0fSOlivier Houchard             roundIncrement = 0x3FF;
48715144b0fSOlivier Houchard             if ( zSign ) {
48815144b0fSOlivier Houchard                 if ( roundingMode == float_round_up ) roundIncrement = 0;
48915144b0fSOlivier Houchard             }
49015144b0fSOlivier Houchard             else {
49115144b0fSOlivier Houchard                 if ( roundingMode == float_round_down ) roundIncrement = 0;
49215144b0fSOlivier Houchard             }
49315144b0fSOlivier Houchard         }
49415144b0fSOlivier Houchard     }
49515144b0fSOlivier Houchard     roundBits = zSig & 0x3FF;
49615144b0fSOlivier Houchard     if ( 0x7FD <= (bits16) zExp ) {
49715144b0fSOlivier Houchard         if (    ( 0x7FD < zExp )
49815144b0fSOlivier Houchard              || (    ( zExp == 0x7FD )
49915144b0fSOlivier Houchard                   && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
50015144b0fSOlivier Houchard            ) {
50115144b0fSOlivier Houchard             float_raise( float_flag_overflow | float_flag_inexact );
50215144b0fSOlivier Houchard             return FLOAT64_MANGLE(
50315144b0fSOlivier Houchard 		FLOAT64_DEMANGLE(packFloat64( zSign, 0x7FF, 0 )) -
50415144b0fSOlivier Houchard 		( roundIncrement == 0 ));
50515144b0fSOlivier Houchard         }
50615144b0fSOlivier Houchard         if ( zExp < 0 ) {
50715144b0fSOlivier Houchard             isTiny =
50815144b0fSOlivier Houchard                    ( float_detect_tininess == float_tininess_before_rounding )
50915144b0fSOlivier Houchard                 || ( zExp < -1 )
51015144b0fSOlivier Houchard                 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
51115144b0fSOlivier Houchard             shift64RightJamming( zSig, - zExp, &zSig );
51215144b0fSOlivier Houchard             zExp = 0;
51315144b0fSOlivier Houchard             roundBits = zSig & 0x3FF;
51415144b0fSOlivier Houchard             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
51515144b0fSOlivier Houchard         }
51615144b0fSOlivier Houchard     }
51715144b0fSOlivier Houchard     if ( roundBits ) float_exception_flags |= float_flag_inexact;
51815144b0fSOlivier Houchard     zSig = ( zSig + roundIncrement )>>10;
51915144b0fSOlivier Houchard     zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
52015144b0fSOlivier Houchard     if ( zSig == 0 ) zExp = 0;
52115144b0fSOlivier Houchard     return packFloat64( zSign, zExp, zSig );
52215144b0fSOlivier Houchard 
52315144b0fSOlivier Houchard }
52415144b0fSOlivier Houchard 
52515144b0fSOlivier Houchard /*
52615144b0fSOlivier Houchard -------------------------------------------------------------------------------
52715144b0fSOlivier Houchard Takes an abstract floating-point value having sign `zSign', exponent `zExp',
52815144b0fSOlivier Houchard and significand `zSig', and returns the proper double-precision floating-
52915144b0fSOlivier Houchard point value corresponding to the abstract input.  This routine is just like
53015144b0fSOlivier Houchard `roundAndPackFloat64' except that `zSig' does not have to be normalized.
53115144b0fSOlivier Houchard Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
53215144b0fSOlivier Houchard floating-point exponent.
53315144b0fSOlivier Houchard -------------------------------------------------------------------------------
53415144b0fSOlivier Houchard */
53515144b0fSOlivier Houchard static float64
normalizeRoundAndPackFloat64(flag zSign,int16 zExp,bits64 zSig)53615144b0fSOlivier Houchard  normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
53715144b0fSOlivier Houchard {
53815144b0fSOlivier Houchard     int8 shiftCount;
53915144b0fSOlivier Houchard 
54015144b0fSOlivier Houchard     shiftCount = countLeadingZeros64( zSig ) - 1;
54115144b0fSOlivier Houchard     return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
54215144b0fSOlivier Houchard 
54315144b0fSOlivier Houchard }
54415144b0fSOlivier Houchard 
54515144b0fSOlivier Houchard #ifdef FLOATX80
54615144b0fSOlivier Houchard 
54715144b0fSOlivier Houchard /*
54815144b0fSOlivier Houchard -------------------------------------------------------------------------------
54915144b0fSOlivier Houchard Returns the fraction bits of the extended double-precision floating-point
55015144b0fSOlivier Houchard value `a'.
55115144b0fSOlivier Houchard -------------------------------------------------------------------------------
55215144b0fSOlivier Houchard */
extractFloatx80Frac(floatx80 a)55315144b0fSOlivier Houchard INLINE bits64 extractFloatx80Frac( floatx80 a )
55415144b0fSOlivier Houchard {
55515144b0fSOlivier Houchard 
55615144b0fSOlivier Houchard     return a.low;
55715144b0fSOlivier Houchard 
55815144b0fSOlivier Houchard }
55915144b0fSOlivier Houchard 
56015144b0fSOlivier Houchard /*
56115144b0fSOlivier Houchard -------------------------------------------------------------------------------
56215144b0fSOlivier Houchard Returns the exponent bits of the extended double-precision floating-point
56315144b0fSOlivier Houchard value `a'.
56415144b0fSOlivier Houchard -------------------------------------------------------------------------------
56515144b0fSOlivier Houchard */
extractFloatx80Exp(floatx80 a)56615144b0fSOlivier Houchard INLINE int32 extractFloatx80Exp( floatx80 a )
56715144b0fSOlivier Houchard {
56815144b0fSOlivier Houchard 
56915144b0fSOlivier Houchard     return a.high & 0x7FFF;
57015144b0fSOlivier Houchard 
57115144b0fSOlivier Houchard }
57215144b0fSOlivier Houchard 
57315144b0fSOlivier Houchard /*
57415144b0fSOlivier Houchard -------------------------------------------------------------------------------
57515144b0fSOlivier Houchard Returns the sign bit of the extended double-precision floating-point value
57615144b0fSOlivier Houchard `a'.
57715144b0fSOlivier Houchard -------------------------------------------------------------------------------
57815144b0fSOlivier Houchard */
extractFloatx80Sign(floatx80 a)57915144b0fSOlivier Houchard INLINE flag extractFloatx80Sign( floatx80 a )
58015144b0fSOlivier Houchard {
58115144b0fSOlivier Houchard 
58215144b0fSOlivier Houchard     return a.high>>15;
58315144b0fSOlivier Houchard 
58415144b0fSOlivier Houchard }
58515144b0fSOlivier Houchard 
58615144b0fSOlivier Houchard /*
58715144b0fSOlivier Houchard -------------------------------------------------------------------------------
58815144b0fSOlivier Houchard Normalizes the subnormal extended double-precision floating-point value
58915144b0fSOlivier Houchard represented by the denormalized significand `aSig'.  The normalized exponent
59015144b0fSOlivier Houchard and significand are stored at the locations pointed to by `zExpPtr' and
59115144b0fSOlivier Houchard `zSigPtr', respectively.
59215144b0fSOlivier Houchard -------------------------------------------------------------------------------
59315144b0fSOlivier Houchard */
59415144b0fSOlivier Houchard static void
normalizeFloatx80Subnormal(bits64 aSig,int32 * zExpPtr,bits64 * zSigPtr)59515144b0fSOlivier Houchard  normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
59615144b0fSOlivier Houchard {
59715144b0fSOlivier Houchard     int8 shiftCount;
59815144b0fSOlivier Houchard 
59915144b0fSOlivier Houchard     shiftCount = countLeadingZeros64( aSig );
60015144b0fSOlivier Houchard     *zSigPtr = aSig<<shiftCount;
60115144b0fSOlivier Houchard     *zExpPtr = 1 - shiftCount;
60215144b0fSOlivier Houchard 
60315144b0fSOlivier Houchard }
60415144b0fSOlivier Houchard 
60515144b0fSOlivier Houchard /*
60615144b0fSOlivier Houchard -------------------------------------------------------------------------------
60715144b0fSOlivier Houchard Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
60815144b0fSOlivier Houchard extended double-precision floating-point value, returning the result.
60915144b0fSOlivier Houchard -------------------------------------------------------------------------------
61015144b0fSOlivier Houchard */
packFloatx80(flag zSign,int32 zExp,bits64 zSig)61115144b0fSOlivier Houchard INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
61215144b0fSOlivier Houchard {
61315144b0fSOlivier Houchard     floatx80 z;
61415144b0fSOlivier Houchard 
61515144b0fSOlivier Houchard     z.low = zSig;
61615144b0fSOlivier Houchard     z.high = ( ( (bits16) zSign )<<15 ) + zExp;
61715144b0fSOlivier Houchard     return z;
61815144b0fSOlivier Houchard 
61915144b0fSOlivier Houchard }
62015144b0fSOlivier Houchard 
62115144b0fSOlivier Houchard /*
62215144b0fSOlivier Houchard -------------------------------------------------------------------------------
62315144b0fSOlivier Houchard Takes an abstract floating-point value having sign `zSign', exponent `zExp',
62415144b0fSOlivier Houchard and extended significand formed by the concatenation of `zSig0' and `zSig1',
62515144b0fSOlivier Houchard and returns the proper extended double-precision floating-point value
62615144b0fSOlivier Houchard corresponding to the abstract input.  Ordinarily, the abstract value is
62715144b0fSOlivier Houchard rounded and packed into the extended double-precision format, with the
62815144b0fSOlivier Houchard inexact exception raised if the abstract input cannot be represented
62915144b0fSOlivier Houchard exactly.  However, if the abstract value is too large, the overflow and
63015144b0fSOlivier Houchard inexact exceptions are raised and an infinity or maximal finite value is
63115144b0fSOlivier Houchard returned.  If the abstract value is too small, the input value is rounded to
63215144b0fSOlivier Houchard a subnormal number, and the underflow and inexact exceptions are raised if
63315144b0fSOlivier Houchard the abstract input cannot be represented exactly as a subnormal extended
63415144b0fSOlivier Houchard double-precision floating-point number.
63515144b0fSOlivier Houchard     If `roundingPrecision' is 32 or 64, the result is rounded to the same
63615144b0fSOlivier Houchard number of bits as single or double precision, respectively.  Otherwise, the
63715144b0fSOlivier Houchard result is rounded to the full precision of the extended double-precision
63815144b0fSOlivier Houchard format.
63915144b0fSOlivier Houchard     The input significand must be normalized or smaller.  If the input
64015144b0fSOlivier Houchard significand is not normalized, `zExp' must be 0; in that case, the result
64115144b0fSOlivier Houchard returned is a subnormal number, and it must not require rounding.  The
64215144b0fSOlivier Houchard handling of underflow and overflow follows the IEC/IEEE Standard for Binary
64315144b0fSOlivier Houchard Floating-Point Arithmetic.
64415144b0fSOlivier Houchard -------------------------------------------------------------------------------
64515144b0fSOlivier Houchard */
64615144b0fSOlivier Houchard static floatx80
roundAndPackFloatx80(int8 roundingPrecision,flag zSign,int32 zExp,bits64 zSig0,bits64 zSig1)64715144b0fSOlivier Houchard  roundAndPackFloatx80(
64815144b0fSOlivier Houchard      int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
64915144b0fSOlivier Houchard  )
65015144b0fSOlivier Houchard {
65115144b0fSOlivier Houchard     int8 roundingMode;
65215144b0fSOlivier Houchard     flag roundNearestEven, increment, isTiny;
65315144b0fSOlivier Houchard     int64 roundIncrement, roundMask, roundBits;
65415144b0fSOlivier Houchard 
65515144b0fSOlivier Houchard     roundingMode = float_rounding_mode;
65615144b0fSOlivier Houchard     roundNearestEven = ( roundingMode == float_round_nearest_even );
65715144b0fSOlivier Houchard     if ( roundingPrecision == 80 ) goto precision80;
65815144b0fSOlivier Houchard     if ( roundingPrecision == 64 ) {
65915144b0fSOlivier Houchard         roundIncrement = LIT64( 0x0000000000000400 );
66015144b0fSOlivier Houchard         roundMask = LIT64( 0x00000000000007FF );
66115144b0fSOlivier Houchard     }
66215144b0fSOlivier Houchard     else if ( roundingPrecision == 32 ) {
66315144b0fSOlivier Houchard         roundIncrement = LIT64( 0x0000008000000000 );
66415144b0fSOlivier Houchard         roundMask = LIT64( 0x000000FFFFFFFFFF );
66515144b0fSOlivier Houchard     }
66615144b0fSOlivier Houchard     else {
66715144b0fSOlivier Houchard         goto precision80;
66815144b0fSOlivier Houchard     }
66915144b0fSOlivier Houchard     zSig0 |= ( zSig1 != 0 );
67015144b0fSOlivier Houchard     if ( ! roundNearestEven ) {
67115144b0fSOlivier Houchard         if ( roundingMode == float_round_to_zero ) {
67215144b0fSOlivier Houchard             roundIncrement = 0;
67315144b0fSOlivier Houchard         }
67415144b0fSOlivier Houchard         else {
67515144b0fSOlivier Houchard             roundIncrement = roundMask;
67615144b0fSOlivier Houchard             if ( zSign ) {
67715144b0fSOlivier Houchard                 if ( roundingMode == float_round_up ) roundIncrement = 0;
67815144b0fSOlivier Houchard             }
67915144b0fSOlivier Houchard             else {
68015144b0fSOlivier Houchard                 if ( roundingMode == float_round_down ) roundIncrement = 0;
68115144b0fSOlivier Houchard             }
68215144b0fSOlivier Houchard         }
68315144b0fSOlivier Houchard     }
68415144b0fSOlivier Houchard     roundBits = zSig0 & roundMask;
68515144b0fSOlivier Houchard     if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
68615144b0fSOlivier Houchard         if (    ( 0x7FFE < zExp )
68715144b0fSOlivier Houchard              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
68815144b0fSOlivier Houchard            ) {
68915144b0fSOlivier Houchard             goto overflow;
69015144b0fSOlivier Houchard         }
69115144b0fSOlivier Houchard         if ( zExp <= 0 ) {
69215144b0fSOlivier Houchard             isTiny =
69315144b0fSOlivier Houchard                    ( float_detect_tininess == float_tininess_before_rounding )
69415144b0fSOlivier Houchard                 || ( zExp < 0 )
69515144b0fSOlivier Houchard                 || ( zSig0 <= zSig0 + roundIncrement );
69615144b0fSOlivier Houchard             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
69715144b0fSOlivier Houchard             zExp = 0;
69815144b0fSOlivier Houchard             roundBits = zSig0 & roundMask;
69915144b0fSOlivier Houchard             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
70015144b0fSOlivier Houchard             if ( roundBits ) float_exception_flags |= float_flag_inexact;
70115144b0fSOlivier Houchard             zSig0 += roundIncrement;
70215144b0fSOlivier Houchard             if ( (sbits64) zSig0 < 0 ) zExp = 1;
70315144b0fSOlivier Houchard             roundIncrement = roundMask + 1;
70415144b0fSOlivier Houchard             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
70515144b0fSOlivier Houchard                 roundMask |= roundIncrement;
70615144b0fSOlivier Houchard             }
70715144b0fSOlivier Houchard             zSig0 &= ~ roundMask;
70815144b0fSOlivier Houchard             return packFloatx80( zSign, zExp, zSig0 );
70915144b0fSOlivier Houchard         }
71015144b0fSOlivier Houchard     }
71115144b0fSOlivier Houchard     if ( roundBits ) float_exception_flags |= float_flag_inexact;
71215144b0fSOlivier Houchard     zSig0 += roundIncrement;
71315144b0fSOlivier Houchard     if ( zSig0 < roundIncrement ) {
71415144b0fSOlivier Houchard         ++zExp;
71515144b0fSOlivier Houchard         zSig0 = LIT64( 0x8000000000000000 );
71615144b0fSOlivier Houchard     }
71715144b0fSOlivier Houchard     roundIncrement = roundMask + 1;
71815144b0fSOlivier Houchard     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
71915144b0fSOlivier Houchard         roundMask |= roundIncrement;
72015144b0fSOlivier Houchard     }
72115144b0fSOlivier Houchard     zSig0 &= ~ roundMask;
72215144b0fSOlivier Houchard     if ( zSig0 == 0 ) zExp = 0;
72315144b0fSOlivier Houchard     return packFloatx80( zSign, zExp, zSig0 );
72415144b0fSOlivier Houchard  precision80:
72515144b0fSOlivier Houchard     increment = ( (sbits64) zSig1 < 0 );
72615144b0fSOlivier Houchard     if ( ! roundNearestEven ) {
72715144b0fSOlivier Houchard         if ( roundingMode == float_round_to_zero ) {
72815144b0fSOlivier Houchard             increment = 0;
72915144b0fSOlivier Houchard         }
73015144b0fSOlivier Houchard         else {
73115144b0fSOlivier Houchard             if ( zSign ) {
73215144b0fSOlivier Houchard                 increment = ( roundingMode == float_round_down ) && zSig1;
73315144b0fSOlivier Houchard             }
73415144b0fSOlivier Houchard             else {
73515144b0fSOlivier Houchard                 increment = ( roundingMode == float_round_up ) && zSig1;
73615144b0fSOlivier Houchard             }
73715144b0fSOlivier Houchard         }
73815144b0fSOlivier Houchard     }
73915144b0fSOlivier Houchard     if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
74015144b0fSOlivier Houchard         if (    ( 0x7FFE < zExp )
74115144b0fSOlivier Houchard              || (    ( zExp == 0x7FFE )
74215144b0fSOlivier Houchard                   && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
74315144b0fSOlivier Houchard                   && increment
74415144b0fSOlivier Houchard                 )
74515144b0fSOlivier Houchard            ) {
74615144b0fSOlivier Houchard             roundMask = 0;
74715144b0fSOlivier Houchard  overflow:
74815144b0fSOlivier Houchard             float_raise( float_flag_overflow | float_flag_inexact );
74915144b0fSOlivier Houchard             if (    ( roundingMode == float_round_to_zero )
75015144b0fSOlivier Houchard                  || ( zSign && ( roundingMode == float_round_up ) )
75115144b0fSOlivier Houchard                  || ( ! zSign && ( roundingMode == float_round_down ) )
75215144b0fSOlivier Houchard                ) {
75315144b0fSOlivier Houchard                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
75415144b0fSOlivier Houchard             }
75515144b0fSOlivier Houchard             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
75615144b0fSOlivier Houchard         }
75715144b0fSOlivier Houchard         if ( zExp <= 0 ) {
75815144b0fSOlivier Houchard             isTiny =
75915144b0fSOlivier Houchard                    ( float_detect_tininess == float_tininess_before_rounding )
76015144b0fSOlivier Houchard                 || ( zExp < 0 )
76115144b0fSOlivier Houchard                 || ! increment
76215144b0fSOlivier Houchard                 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
76315144b0fSOlivier Houchard             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
76415144b0fSOlivier Houchard             zExp = 0;
76515144b0fSOlivier Houchard             if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
76615144b0fSOlivier Houchard             if ( zSig1 ) float_exception_flags |= float_flag_inexact;
76715144b0fSOlivier Houchard             if ( roundNearestEven ) {
76815144b0fSOlivier Houchard                 increment = ( (sbits64) zSig1 < 0 );
76915144b0fSOlivier Houchard             }
77015144b0fSOlivier Houchard             else {
77115144b0fSOlivier Houchard                 if ( zSign ) {
77215144b0fSOlivier Houchard                     increment = ( roundingMode == float_round_down ) && zSig1;
77315144b0fSOlivier Houchard                 }
77415144b0fSOlivier Houchard                 else {
77515144b0fSOlivier Houchard                     increment = ( roundingMode == float_round_up ) && zSig1;
77615144b0fSOlivier Houchard                 }
77715144b0fSOlivier Houchard             }
77815144b0fSOlivier Houchard             if ( increment ) {
77915144b0fSOlivier Houchard                 ++zSig0;
78015144b0fSOlivier Houchard                 zSig0 &=
78115144b0fSOlivier Houchard                     ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
78215144b0fSOlivier Houchard                 if ( (sbits64) zSig0 < 0 ) zExp = 1;
78315144b0fSOlivier Houchard             }
78415144b0fSOlivier Houchard             return packFloatx80( zSign, zExp, zSig0 );
78515144b0fSOlivier Houchard         }
78615144b0fSOlivier Houchard     }
78715144b0fSOlivier Houchard     if ( zSig1 ) float_exception_flags |= float_flag_inexact;
78815144b0fSOlivier Houchard     if ( increment ) {
78915144b0fSOlivier Houchard         ++zSig0;
79015144b0fSOlivier Houchard         if ( zSig0 == 0 ) {
79115144b0fSOlivier Houchard             ++zExp;
79215144b0fSOlivier Houchard             zSig0 = LIT64( 0x8000000000000000 );
79315144b0fSOlivier Houchard         }
79415144b0fSOlivier Houchard         else {
79515144b0fSOlivier Houchard             zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
79615144b0fSOlivier Houchard         }
79715144b0fSOlivier Houchard     }
79815144b0fSOlivier Houchard     else {
79915144b0fSOlivier Houchard         if ( zSig0 == 0 ) zExp = 0;
80015144b0fSOlivier Houchard     }
80115144b0fSOlivier Houchard     return packFloatx80( zSign, zExp, zSig0 );
80215144b0fSOlivier Houchard 
80315144b0fSOlivier Houchard }
80415144b0fSOlivier Houchard 
80515144b0fSOlivier Houchard /*
80615144b0fSOlivier Houchard -------------------------------------------------------------------------------
80715144b0fSOlivier Houchard Takes an abstract floating-point value having sign `zSign', exponent
80815144b0fSOlivier Houchard `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
80915144b0fSOlivier Houchard and returns the proper extended double-precision floating-point value
81015144b0fSOlivier Houchard corresponding to the abstract input.  This routine is just like
81115144b0fSOlivier Houchard `roundAndPackFloatx80' except that the input significand does not have to be
81215144b0fSOlivier Houchard normalized.
81315144b0fSOlivier Houchard -------------------------------------------------------------------------------
81415144b0fSOlivier Houchard */
81515144b0fSOlivier Houchard static floatx80
normalizeRoundAndPackFloatx80(int8 roundingPrecision,flag zSign,int32 zExp,bits64 zSig0,bits64 zSig1)81615144b0fSOlivier Houchard  normalizeRoundAndPackFloatx80(
81715144b0fSOlivier Houchard      int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
81815144b0fSOlivier Houchard  )
81915144b0fSOlivier Houchard {
82015144b0fSOlivier Houchard     int8 shiftCount;
82115144b0fSOlivier Houchard 
82215144b0fSOlivier Houchard     if ( zSig0 == 0 ) {
82315144b0fSOlivier Houchard         zSig0 = zSig1;
82415144b0fSOlivier Houchard         zSig1 = 0;
82515144b0fSOlivier Houchard         zExp -= 64;
82615144b0fSOlivier Houchard     }
82715144b0fSOlivier Houchard     shiftCount = countLeadingZeros64( zSig0 );
82815144b0fSOlivier Houchard     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
82915144b0fSOlivier Houchard     zExp -= shiftCount;
83015144b0fSOlivier Houchard     return
83115144b0fSOlivier Houchard         roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
83215144b0fSOlivier Houchard 
83315144b0fSOlivier Houchard }
83415144b0fSOlivier Houchard 
83515144b0fSOlivier Houchard #endif
83615144b0fSOlivier Houchard 
83715144b0fSOlivier Houchard #ifdef FLOAT128
83815144b0fSOlivier Houchard 
83915144b0fSOlivier Houchard /*
84015144b0fSOlivier Houchard -------------------------------------------------------------------------------
84115144b0fSOlivier Houchard Returns the least-significant 64 fraction bits of the quadruple-precision
84215144b0fSOlivier Houchard floating-point value `a'.
84315144b0fSOlivier Houchard -------------------------------------------------------------------------------
84415144b0fSOlivier Houchard */
extractFloat128Frac1(float128 a)84515144b0fSOlivier Houchard INLINE bits64 extractFloat128Frac1( float128 a )
84615144b0fSOlivier Houchard {
84715144b0fSOlivier Houchard 
84815144b0fSOlivier Houchard     return a.low;
84915144b0fSOlivier Houchard 
85015144b0fSOlivier Houchard }
85115144b0fSOlivier Houchard 
85215144b0fSOlivier Houchard /*
85315144b0fSOlivier Houchard -------------------------------------------------------------------------------
85415144b0fSOlivier Houchard Returns the most-significant 48 fraction bits of the quadruple-precision
85515144b0fSOlivier Houchard floating-point value `a'.
85615144b0fSOlivier Houchard -------------------------------------------------------------------------------
85715144b0fSOlivier Houchard */
extractFloat128Frac0(float128 a)85815144b0fSOlivier Houchard INLINE bits64 extractFloat128Frac0( float128 a )
85915144b0fSOlivier Houchard {
86015144b0fSOlivier Houchard 
86115144b0fSOlivier Houchard     return a.high & LIT64( 0x0000FFFFFFFFFFFF );
86215144b0fSOlivier Houchard 
86315144b0fSOlivier Houchard }
86415144b0fSOlivier Houchard 
86515144b0fSOlivier Houchard /*
86615144b0fSOlivier Houchard -------------------------------------------------------------------------------
86715144b0fSOlivier Houchard Returns the exponent bits of the quadruple-precision floating-point value
86815144b0fSOlivier Houchard `a'.
86915144b0fSOlivier Houchard -------------------------------------------------------------------------------
87015144b0fSOlivier Houchard */
extractFloat128Exp(float128 a)87115144b0fSOlivier Houchard INLINE int32 extractFloat128Exp( float128 a )
87215144b0fSOlivier Houchard {
87315144b0fSOlivier Houchard 
87415144b0fSOlivier Houchard     return ( a.high>>48 ) & 0x7FFF;
87515144b0fSOlivier Houchard 
87615144b0fSOlivier Houchard }
87715144b0fSOlivier Houchard 
87815144b0fSOlivier Houchard /*
87915144b0fSOlivier Houchard -------------------------------------------------------------------------------
88015144b0fSOlivier Houchard Returns the sign bit of the quadruple-precision floating-point value `a'.
88115144b0fSOlivier Houchard -------------------------------------------------------------------------------
88215144b0fSOlivier Houchard */
extractFloat128Sign(float128 a)88315144b0fSOlivier Houchard INLINE flag extractFloat128Sign( float128 a )
88415144b0fSOlivier Houchard {
88515144b0fSOlivier Houchard 
88615144b0fSOlivier Houchard     return a.high>>63;
88715144b0fSOlivier Houchard 
88815144b0fSOlivier Houchard }
88915144b0fSOlivier Houchard 
89015144b0fSOlivier Houchard /*
89115144b0fSOlivier Houchard -------------------------------------------------------------------------------
89215144b0fSOlivier Houchard Normalizes the subnormal quadruple-precision floating-point value
89315144b0fSOlivier Houchard represented by the denormalized significand formed by the concatenation of
89415144b0fSOlivier Houchard `aSig0' and `aSig1'.  The normalized exponent is stored at the location
89515144b0fSOlivier Houchard pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
89615144b0fSOlivier Houchard significand are stored at the location pointed to by `zSig0Ptr', and the
89715144b0fSOlivier Houchard least significant 64 bits of the normalized significand are stored at the
89815144b0fSOlivier Houchard location pointed to by `zSig1Ptr'.
89915144b0fSOlivier Houchard -------------------------------------------------------------------------------
90015144b0fSOlivier Houchard */
90115144b0fSOlivier Houchard static void
normalizeFloat128Subnormal(bits64 aSig0,bits64 aSig1,int32 * zExpPtr,bits64 * zSig0Ptr,bits64 * zSig1Ptr)90215144b0fSOlivier Houchard  normalizeFloat128Subnormal(
90315144b0fSOlivier Houchard      bits64 aSig0,
90415144b0fSOlivier Houchard      bits64 aSig1,
90515144b0fSOlivier Houchard      int32 *zExpPtr,
90615144b0fSOlivier Houchard      bits64 *zSig0Ptr,
90715144b0fSOlivier Houchard      bits64 *zSig1Ptr
90815144b0fSOlivier Houchard  )
90915144b0fSOlivier Houchard {
91015144b0fSOlivier Houchard     int8 shiftCount;
91115144b0fSOlivier Houchard 
91215144b0fSOlivier Houchard     if ( aSig0 == 0 ) {
91315144b0fSOlivier Houchard         shiftCount = countLeadingZeros64( aSig1 ) - 15;
91415144b0fSOlivier Houchard         if ( shiftCount < 0 ) {
91515144b0fSOlivier Houchard             *zSig0Ptr = aSig1>>( - shiftCount );
91615144b0fSOlivier Houchard             *zSig1Ptr = aSig1<<( shiftCount & 63 );
91715144b0fSOlivier Houchard         }
91815144b0fSOlivier Houchard         else {
91915144b0fSOlivier Houchard             *zSig0Ptr = aSig1<<shiftCount;
92015144b0fSOlivier Houchard             *zSig1Ptr = 0;
92115144b0fSOlivier Houchard         }
92215144b0fSOlivier Houchard         *zExpPtr = - shiftCount - 63;
92315144b0fSOlivier Houchard     }
92415144b0fSOlivier Houchard     else {
92515144b0fSOlivier Houchard         shiftCount = countLeadingZeros64( aSig0 ) - 15;
92615144b0fSOlivier Houchard         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
92715144b0fSOlivier Houchard         *zExpPtr = 1 - shiftCount;
92815144b0fSOlivier Houchard     }
92915144b0fSOlivier Houchard 
93015144b0fSOlivier Houchard }
93115144b0fSOlivier Houchard 
93215144b0fSOlivier Houchard /*
93315144b0fSOlivier Houchard -------------------------------------------------------------------------------
93415144b0fSOlivier Houchard Packs the sign `zSign', the exponent `zExp', and the significand formed
93515144b0fSOlivier Houchard by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
93615144b0fSOlivier Houchard floating-point value, returning the result.  After being shifted into the
93715144b0fSOlivier Houchard proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
93815144b0fSOlivier Houchard added together to form the most significant 32 bits of the result.  This
93915144b0fSOlivier Houchard means that any integer portion of `zSig0' will be added into the exponent.
94015144b0fSOlivier Houchard Since a properly normalized significand will have an integer portion equal
94115144b0fSOlivier Houchard to 1, the `zExp' input should be 1 less than the desired result exponent
94215144b0fSOlivier Houchard whenever `zSig0' and `zSig1' concatenated form a complete, normalized
94315144b0fSOlivier Houchard significand.
94415144b0fSOlivier Houchard -------------------------------------------------------------------------------
94515144b0fSOlivier Houchard */
94615144b0fSOlivier Houchard INLINE float128
packFloat128(flag zSign,int32 zExp,bits64 zSig0,bits64 zSig1)94715144b0fSOlivier Houchard  packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
94815144b0fSOlivier Houchard {
94915144b0fSOlivier Houchard     float128 z;
95015144b0fSOlivier Houchard 
95115144b0fSOlivier Houchard     z.low = zSig1;
95215144b0fSOlivier Houchard     z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
95315144b0fSOlivier Houchard     return z;
95415144b0fSOlivier Houchard 
95515144b0fSOlivier Houchard }
95615144b0fSOlivier Houchard 
95715144b0fSOlivier Houchard /*
95815144b0fSOlivier Houchard -------------------------------------------------------------------------------
95915144b0fSOlivier Houchard Takes an abstract floating-point value having sign `zSign', exponent `zExp',
96015144b0fSOlivier Houchard and extended significand formed by the concatenation of `zSig0', `zSig1',
96115144b0fSOlivier Houchard and `zSig2', and returns the proper quadruple-precision floating-point value
96215144b0fSOlivier Houchard corresponding to the abstract input.  Ordinarily, the abstract value is
96315144b0fSOlivier Houchard simply rounded and packed into the quadruple-precision format, with the
96415144b0fSOlivier Houchard inexact exception raised if the abstract input cannot be represented
96515144b0fSOlivier Houchard exactly.  However, if the abstract value is too large, the overflow and
96615144b0fSOlivier Houchard inexact exceptions are raised and an infinity or maximal finite value is
96715144b0fSOlivier Houchard returned.  If the abstract value is too small, the input value is rounded to
96815144b0fSOlivier Houchard a subnormal number, and the underflow and inexact exceptions are raised if
96915144b0fSOlivier Houchard the abstract input cannot be represented exactly as a subnormal quadruple-
97015144b0fSOlivier Houchard precision floating-point number.
97115144b0fSOlivier Houchard     The input significand must be normalized or smaller.  If the input
97215144b0fSOlivier Houchard significand is not normalized, `zExp' must be 0; in that case, the result
97315144b0fSOlivier Houchard returned is a subnormal number, and it must not require rounding.  In the
97415144b0fSOlivier Houchard usual case that the input significand is normalized, `zExp' must be 1 less
97515144b0fSOlivier Houchard than the ``true'' floating-point exponent.  The handling of underflow and
97615144b0fSOlivier Houchard overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
97715144b0fSOlivier Houchard -------------------------------------------------------------------------------
97815144b0fSOlivier Houchard */
97915144b0fSOlivier Houchard static float128
roundAndPackFloat128(flag zSign,int32 zExp,bits64 zSig0,bits64 zSig1,bits64 zSig2)98015144b0fSOlivier Houchard  roundAndPackFloat128(
98115144b0fSOlivier Houchard      flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
98215144b0fSOlivier Houchard {
98315144b0fSOlivier Houchard     int8 roundingMode;
98415144b0fSOlivier Houchard     flag roundNearestEven, increment, isTiny;
98515144b0fSOlivier Houchard 
98615144b0fSOlivier Houchard     roundingMode = float_rounding_mode;
98715144b0fSOlivier Houchard     roundNearestEven = ( roundingMode == float_round_nearest_even );
98815144b0fSOlivier Houchard     increment = ( (sbits64) zSig2 < 0 );
98915144b0fSOlivier Houchard     if ( ! roundNearestEven ) {
99015144b0fSOlivier Houchard         if ( roundingMode == float_round_to_zero ) {
99115144b0fSOlivier Houchard             increment = 0;
99215144b0fSOlivier Houchard         }
99315144b0fSOlivier Houchard         else {
99415144b0fSOlivier Houchard             if ( zSign ) {
99515144b0fSOlivier Houchard                 increment = ( roundingMode == float_round_down ) && zSig2;
99615144b0fSOlivier Houchard             }
99715144b0fSOlivier Houchard             else {
99815144b0fSOlivier Houchard                 increment = ( roundingMode == float_round_up ) && zSig2;
99915144b0fSOlivier Houchard             }
100015144b0fSOlivier Houchard         }
100115144b0fSOlivier Houchard     }
100215144b0fSOlivier Houchard     if ( 0x7FFD <= (bits32) zExp ) {
100315144b0fSOlivier Houchard         if (    ( 0x7FFD < zExp )
100415144b0fSOlivier Houchard              || (    ( zExp == 0x7FFD )
100515144b0fSOlivier Houchard                   && eq128(
100615144b0fSOlivier Houchard                          LIT64( 0x0001FFFFFFFFFFFF ),
100715144b0fSOlivier Houchard                          LIT64( 0xFFFFFFFFFFFFFFFF ),
100815144b0fSOlivier Houchard                          zSig0,
100915144b0fSOlivier Houchard                          zSig1
101015144b0fSOlivier Houchard                      )
101115144b0fSOlivier Houchard                   && increment
101215144b0fSOlivier Houchard                 )
101315144b0fSOlivier Houchard            ) {
101415144b0fSOlivier Houchard             float_raise( float_flag_overflow | float_flag_inexact );
101515144b0fSOlivier Houchard             if (    ( roundingMode == float_round_to_zero )
101615144b0fSOlivier Houchard                  || ( zSign && ( roundingMode == float_round_up ) )
101715144b0fSOlivier Houchard                  || ( ! zSign && ( roundingMode == float_round_down ) )
101815144b0fSOlivier Houchard                ) {
101915144b0fSOlivier Houchard                 return
102015144b0fSOlivier Houchard                     packFloat128(
102115144b0fSOlivier Houchard                         zSign,
102215144b0fSOlivier Houchard                         0x7FFE,
102315144b0fSOlivier Houchard                         LIT64( 0x0000FFFFFFFFFFFF ),
102415144b0fSOlivier Houchard                         LIT64( 0xFFFFFFFFFFFFFFFF )
102515144b0fSOlivier Houchard                     );
102615144b0fSOlivier Houchard             }
102715144b0fSOlivier Houchard             return packFloat128( zSign, 0x7FFF, 0, 0 );
102815144b0fSOlivier Houchard         }
102915144b0fSOlivier Houchard         if ( zExp < 0 ) {
103015144b0fSOlivier Houchard             isTiny =
103115144b0fSOlivier Houchard                    ( float_detect_tininess == float_tininess_before_rounding )
103215144b0fSOlivier Houchard                 || ( zExp < -1 )
103315144b0fSOlivier Houchard                 || ! increment
103415144b0fSOlivier Houchard                 || lt128(
103515144b0fSOlivier Houchard                        zSig0,
103615144b0fSOlivier Houchard                        zSig1,
103715144b0fSOlivier Houchard                        LIT64( 0x0001FFFFFFFFFFFF ),
103815144b0fSOlivier Houchard                        LIT64( 0xFFFFFFFFFFFFFFFF )
103915144b0fSOlivier Houchard                    );
104015144b0fSOlivier Houchard             shift128ExtraRightJamming(
104115144b0fSOlivier Houchard                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
104215144b0fSOlivier Houchard             zExp = 0;
104315144b0fSOlivier Houchard             if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
104415144b0fSOlivier Houchard             if ( roundNearestEven ) {
104515144b0fSOlivier Houchard                 increment = ( (sbits64) zSig2 < 0 );
104615144b0fSOlivier Houchard             }
104715144b0fSOlivier Houchard             else {
104815144b0fSOlivier Houchard                 if ( zSign ) {
104915144b0fSOlivier Houchard                     increment = ( roundingMode == float_round_down ) && zSig2;
105015144b0fSOlivier Houchard                 }
105115144b0fSOlivier Houchard                 else {
105215144b0fSOlivier Houchard                     increment = ( roundingMode == float_round_up ) && zSig2;
105315144b0fSOlivier Houchard                 }
105415144b0fSOlivier Houchard             }
105515144b0fSOlivier Houchard         }
105615144b0fSOlivier Houchard     }
105715144b0fSOlivier Houchard     if ( zSig2 ) float_exception_flags |= float_flag_inexact;
105815144b0fSOlivier Houchard     if ( increment ) {
105915144b0fSOlivier Houchard         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
106015144b0fSOlivier Houchard         zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
106115144b0fSOlivier Houchard     }
106215144b0fSOlivier Houchard     else {
106315144b0fSOlivier Houchard         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
106415144b0fSOlivier Houchard     }
106515144b0fSOlivier Houchard     return packFloat128( zSign, zExp, zSig0, zSig1 );
106615144b0fSOlivier Houchard 
106715144b0fSOlivier Houchard }
106815144b0fSOlivier Houchard 
106915144b0fSOlivier Houchard /*
107015144b0fSOlivier Houchard -------------------------------------------------------------------------------
107115144b0fSOlivier Houchard Takes an abstract floating-point value having sign `zSign', exponent `zExp',
107215144b0fSOlivier Houchard and significand formed by the concatenation of `zSig0' and `zSig1', and
107315144b0fSOlivier Houchard returns the proper quadruple-precision floating-point value corresponding
107415144b0fSOlivier Houchard to the abstract input.  This routine is just like `roundAndPackFloat128'
107515144b0fSOlivier Houchard except that the input significand has fewer bits and does not have to be
107615144b0fSOlivier Houchard normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
107715144b0fSOlivier Houchard point exponent.
107815144b0fSOlivier Houchard -------------------------------------------------------------------------------
107915144b0fSOlivier Houchard */
108015144b0fSOlivier Houchard static float128
normalizeRoundAndPackFloat128(flag zSign,int32 zExp,bits64 zSig0,bits64 zSig1)108115144b0fSOlivier Houchard  normalizeRoundAndPackFloat128(
108215144b0fSOlivier Houchard      flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
108315144b0fSOlivier Houchard {
108415144b0fSOlivier Houchard     int8 shiftCount;
108515144b0fSOlivier Houchard     bits64 zSig2;
108615144b0fSOlivier Houchard 
108715144b0fSOlivier Houchard     if ( zSig0 == 0 ) {
108815144b0fSOlivier Houchard         zSig0 = zSig1;
108915144b0fSOlivier Houchard         zSig1 = 0;
109015144b0fSOlivier Houchard         zExp -= 64;
109115144b0fSOlivier Houchard     }
109215144b0fSOlivier Houchard     shiftCount = countLeadingZeros64( zSig0 ) - 15;
109315144b0fSOlivier Houchard     if ( 0 <= shiftCount ) {
109415144b0fSOlivier Houchard         zSig2 = 0;
109515144b0fSOlivier Houchard         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
109615144b0fSOlivier Houchard     }
109715144b0fSOlivier Houchard     else {
109815144b0fSOlivier Houchard         shift128ExtraRightJamming(
109915144b0fSOlivier Houchard             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
110015144b0fSOlivier Houchard     }
110115144b0fSOlivier Houchard     zExp -= shiftCount;
110215144b0fSOlivier Houchard     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
110315144b0fSOlivier Houchard 
110415144b0fSOlivier Houchard }
110515144b0fSOlivier Houchard 
110615144b0fSOlivier Houchard #endif
110715144b0fSOlivier Houchard 
110815144b0fSOlivier Houchard /*
110915144b0fSOlivier Houchard -------------------------------------------------------------------------------
111015144b0fSOlivier Houchard Returns the result of converting the 32-bit two's complement integer `a'
111115144b0fSOlivier Houchard to the single-precision floating-point format.  The conversion is performed
111215144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
111315144b0fSOlivier Houchard -------------------------------------------------------------------------------
111415144b0fSOlivier Houchard */
int32_to_float32(int32 a)111515144b0fSOlivier Houchard float32 int32_to_float32( int32 a )
111615144b0fSOlivier Houchard {
111715144b0fSOlivier Houchard     flag zSign;
111815144b0fSOlivier Houchard 
111915144b0fSOlivier Houchard     if ( a == 0 ) return 0;
112015144b0fSOlivier Houchard     if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
112115144b0fSOlivier Houchard     zSign = ( a < 0 );
112215144b0fSOlivier Houchard     return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
112315144b0fSOlivier Houchard 
112415144b0fSOlivier Houchard }
112515144b0fSOlivier Houchard 
1126*7ea324dfSDavid Schultz #ifndef SOFTFLOAT_FOR_GCC /* __floatunsisf is in libgcc */
uint32_to_float32(uint32 a)1127c36abe0dSDavid Schultz float32 uint32_to_float32( uint32 a )
1128c36abe0dSDavid Schultz {
1129c36abe0dSDavid Schultz     if ( a == 0 ) return 0;
1130c36abe0dSDavid Schultz     if ( a & (bits32) 0x80000000 )
1131c36abe0dSDavid Schultz 	return normalizeRoundAndPackFloat32( 0, 0x9D, a >> 1 );
1132c36abe0dSDavid Schultz     return normalizeRoundAndPackFloat32( 0, 0x9C, a );
1133c36abe0dSDavid Schultz }
1134*7ea324dfSDavid Schultz #endif
1135c36abe0dSDavid Schultz 
1136c36abe0dSDavid Schultz 
113715144b0fSOlivier Houchard /*
113815144b0fSOlivier Houchard -------------------------------------------------------------------------------
113915144b0fSOlivier Houchard Returns the result of converting the 32-bit two's complement integer `a'
114015144b0fSOlivier Houchard to the double-precision floating-point format.  The conversion is performed
114115144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
114215144b0fSOlivier Houchard -------------------------------------------------------------------------------
114315144b0fSOlivier Houchard */
int32_to_float64(int32 a)114415144b0fSOlivier Houchard float64 int32_to_float64( int32 a )
114515144b0fSOlivier Houchard {
114615144b0fSOlivier Houchard     flag zSign;
114715144b0fSOlivier Houchard     uint32 absA;
114815144b0fSOlivier Houchard     int8 shiftCount;
114915144b0fSOlivier Houchard     bits64 zSig;
115015144b0fSOlivier Houchard 
115115144b0fSOlivier Houchard     if ( a == 0 ) return 0;
115215144b0fSOlivier Houchard     zSign = ( a < 0 );
115315144b0fSOlivier Houchard     absA = zSign ? - a : a;
115415144b0fSOlivier Houchard     shiftCount = countLeadingZeros32( absA ) + 21;
115515144b0fSOlivier Houchard     zSig = absA;
115615144b0fSOlivier Houchard     return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
115715144b0fSOlivier Houchard 
115815144b0fSOlivier Houchard }
115915144b0fSOlivier Houchard 
1160*7ea324dfSDavid Schultz #ifndef SOFTFLOAT_FOR_GCC /* __floatunsidf is in libgcc */
uint32_to_float64(uint32 a)1161c36abe0dSDavid Schultz float64 uint32_to_float64( uint32 a )
1162c36abe0dSDavid Schultz {
1163c36abe0dSDavid Schultz     int8 shiftCount;
1164c36abe0dSDavid Schultz     bits64 zSig = a;
1165c36abe0dSDavid Schultz 
1166c36abe0dSDavid Schultz     if ( a == 0 ) return 0;
1167c36abe0dSDavid Schultz     shiftCount = countLeadingZeros32( a ) + 21;
1168c36abe0dSDavid Schultz     return packFloat64( 0, 0x432 - shiftCount, zSig<<shiftCount );
1169c36abe0dSDavid Schultz 
1170c36abe0dSDavid Schultz }
1171*7ea324dfSDavid Schultz #endif
1172c36abe0dSDavid Schultz 
117315144b0fSOlivier Houchard #ifdef FLOATX80
117415144b0fSOlivier Houchard 
117515144b0fSOlivier Houchard /*
117615144b0fSOlivier Houchard -------------------------------------------------------------------------------
117715144b0fSOlivier Houchard Returns the result of converting the 32-bit two's complement integer `a'
117815144b0fSOlivier Houchard to the extended double-precision floating-point format.  The conversion
117915144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
118015144b0fSOlivier Houchard Arithmetic.
118115144b0fSOlivier Houchard -------------------------------------------------------------------------------
118215144b0fSOlivier Houchard */
int32_to_floatx80(int32 a)118315144b0fSOlivier Houchard floatx80 int32_to_floatx80( int32 a )
118415144b0fSOlivier Houchard {
118515144b0fSOlivier Houchard     flag zSign;
118615144b0fSOlivier Houchard     uint32 absA;
118715144b0fSOlivier Houchard     int8 shiftCount;
118815144b0fSOlivier Houchard     bits64 zSig;
118915144b0fSOlivier Houchard 
119015144b0fSOlivier Houchard     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
119115144b0fSOlivier Houchard     zSign = ( a < 0 );
119215144b0fSOlivier Houchard     absA = zSign ? - a : a;
119315144b0fSOlivier Houchard     shiftCount = countLeadingZeros32( absA ) + 32;
119415144b0fSOlivier Houchard     zSig = absA;
119515144b0fSOlivier Houchard     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
119615144b0fSOlivier Houchard 
119715144b0fSOlivier Houchard }
119815144b0fSOlivier Houchard 
uint32_to_floatx80(uint32 a)1199c36abe0dSDavid Schultz floatx80 uint32_to_floatx80( uint32 a )
1200c36abe0dSDavid Schultz {
1201c36abe0dSDavid Schultz     int8 shiftCount;
1202c36abe0dSDavid Schultz     bits64 zSig = a;
1203c36abe0dSDavid Schultz 
1204c36abe0dSDavid Schultz     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1205c36abe0dSDavid Schultz     shiftCount = countLeadingZeros32( a ) + 32;
1206c36abe0dSDavid Schultz     return packFloatx80( 0, 0x403E - shiftCount, zSig<<shiftCount );
1207c36abe0dSDavid Schultz 
1208c36abe0dSDavid Schultz }
1209c36abe0dSDavid Schultz 
121015144b0fSOlivier Houchard #endif
121115144b0fSOlivier Houchard 
121215144b0fSOlivier Houchard #ifdef FLOAT128
121315144b0fSOlivier Houchard 
121415144b0fSOlivier Houchard /*
121515144b0fSOlivier Houchard -------------------------------------------------------------------------------
121615144b0fSOlivier Houchard Returns the result of converting the 32-bit two's complement integer `a' to
121715144b0fSOlivier Houchard the quadruple-precision floating-point format.  The conversion is performed
121815144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
121915144b0fSOlivier Houchard -------------------------------------------------------------------------------
122015144b0fSOlivier Houchard */
int32_to_float128(int32 a)122115144b0fSOlivier Houchard float128 int32_to_float128( int32 a )
122215144b0fSOlivier Houchard {
122315144b0fSOlivier Houchard     flag zSign;
122415144b0fSOlivier Houchard     uint32 absA;
122515144b0fSOlivier Houchard     int8 shiftCount;
122615144b0fSOlivier Houchard     bits64 zSig0;
122715144b0fSOlivier Houchard 
122815144b0fSOlivier Houchard     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
122915144b0fSOlivier Houchard     zSign = ( a < 0 );
123015144b0fSOlivier Houchard     absA = zSign ? - a : a;
123115144b0fSOlivier Houchard     shiftCount = countLeadingZeros32( absA ) + 17;
123215144b0fSOlivier Houchard     zSig0 = absA;
123315144b0fSOlivier Houchard     return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
123415144b0fSOlivier Houchard 
123515144b0fSOlivier Houchard }
123615144b0fSOlivier Houchard 
uint32_to_float128(uint32 a)1237c36abe0dSDavid Schultz float128 uint32_to_float128( uint32 a )
1238c36abe0dSDavid Schultz {
1239c36abe0dSDavid Schultz     int8 shiftCount;
1240c36abe0dSDavid Schultz     bits64 zSig0 = a;
1241c36abe0dSDavid Schultz 
1242c36abe0dSDavid Schultz     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1243c36abe0dSDavid Schultz     shiftCount = countLeadingZeros32( a ) + 17;
1244c36abe0dSDavid Schultz     return packFloat128( 0, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1245c36abe0dSDavid Schultz 
1246c36abe0dSDavid Schultz }
1247c36abe0dSDavid Schultz 
124815144b0fSOlivier Houchard #endif
124915144b0fSOlivier Houchard 
125015144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */
125115144b0fSOlivier Houchard /*
125215144b0fSOlivier Houchard -------------------------------------------------------------------------------
125315144b0fSOlivier Houchard Returns the result of converting the 64-bit two's complement integer `a'
125415144b0fSOlivier Houchard to the single-precision floating-point format.  The conversion is performed
125515144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
125615144b0fSOlivier Houchard -------------------------------------------------------------------------------
125715144b0fSOlivier Houchard */
int64_to_float32(int64 a)125815144b0fSOlivier Houchard float32 int64_to_float32( int64 a )
125915144b0fSOlivier Houchard {
126015144b0fSOlivier Houchard     flag zSign;
126115144b0fSOlivier Houchard     uint64 absA;
126215144b0fSOlivier Houchard     int8 shiftCount;
126315144b0fSOlivier Houchard 
126415144b0fSOlivier Houchard     if ( a == 0 ) return 0;
126515144b0fSOlivier Houchard     zSign = ( a < 0 );
126615144b0fSOlivier Houchard     absA = zSign ? - a : a;
126715144b0fSOlivier Houchard     shiftCount = countLeadingZeros64( absA ) - 40;
126815144b0fSOlivier Houchard     if ( 0 <= shiftCount ) {
126915144b0fSOlivier Houchard         return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
127015144b0fSOlivier Houchard     }
127115144b0fSOlivier Houchard     else {
127215144b0fSOlivier Houchard         shiftCount += 7;
127315144b0fSOlivier Houchard         if ( shiftCount < 0 ) {
127415144b0fSOlivier Houchard             shift64RightJamming( absA, - shiftCount, &absA );
127515144b0fSOlivier Houchard         }
127615144b0fSOlivier Houchard         else {
127715144b0fSOlivier Houchard             absA <<= shiftCount;
127815144b0fSOlivier Houchard         }
127915144b0fSOlivier Houchard         return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
128015144b0fSOlivier Houchard     }
128115144b0fSOlivier Houchard 
128215144b0fSOlivier Houchard }
128315144b0fSOlivier Houchard 
128415144b0fSOlivier Houchard /*
128515144b0fSOlivier Houchard -------------------------------------------------------------------------------
128615144b0fSOlivier Houchard Returns the result of converting the 64-bit two's complement integer `a'
128715144b0fSOlivier Houchard to the double-precision floating-point format.  The conversion is performed
128815144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
128915144b0fSOlivier Houchard -------------------------------------------------------------------------------
129015144b0fSOlivier Houchard */
int64_to_float64(int64 a)129115144b0fSOlivier Houchard float64 int64_to_float64( int64 a )
129215144b0fSOlivier Houchard {
129315144b0fSOlivier Houchard     flag zSign;
129415144b0fSOlivier Houchard 
129515144b0fSOlivier Houchard     if ( a == 0 ) return 0;
129615144b0fSOlivier Houchard     if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
129715144b0fSOlivier Houchard         return packFloat64( 1, 0x43E, 0 );
129815144b0fSOlivier Houchard     }
129915144b0fSOlivier Houchard     zSign = ( a < 0 );
130015144b0fSOlivier Houchard     return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
130115144b0fSOlivier Houchard 
130215144b0fSOlivier Houchard }
130315144b0fSOlivier Houchard 
130415144b0fSOlivier Houchard #ifdef FLOATX80
130515144b0fSOlivier Houchard 
130615144b0fSOlivier Houchard /*
130715144b0fSOlivier Houchard -------------------------------------------------------------------------------
130815144b0fSOlivier Houchard Returns the result of converting the 64-bit two's complement integer `a'
130915144b0fSOlivier Houchard to the extended double-precision floating-point format.  The conversion
131015144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
131115144b0fSOlivier Houchard Arithmetic.
131215144b0fSOlivier Houchard -------------------------------------------------------------------------------
131315144b0fSOlivier Houchard */
int64_to_floatx80(int64 a)131415144b0fSOlivier Houchard floatx80 int64_to_floatx80( int64 a )
131515144b0fSOlivier Houchard {
131615144b0fSOlivier Houchard     flag zSign;
131715144b0fSOlivier Houchard     uint64 absA;
131815144b0fSOlivier Houchard     int8 shiftCount;
131915144b0fSOlivier Houchard 
132015144b0fSOlivier Houchard     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
132115144b0fSOlivier Houchard     zSign = ( a < 0 );
132215144b0fSOlivier Houchard     absA = zSign ? - a : a;
132315144b0fSOlivier Houchard     shiftCount = countLeadingZeros64( absA );
132415144b0fSOlivier Houchard     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
132515144b0fSOlivier Houchard 
132615144b0fSOlivier Houchard }
132715144b0fSOlivier Houchard 
132815144b0fSOlivier Houchard #endif
132915144b0fSOlivier Houchard 
133015144b0fSOlivier Houchard #endif /* !SOFTFLOAT_FOR_GCC */
133115144b0fSOlivier Houchard 
133215144b0fSOlivier Houchard #ifdef FLOAT128
133315144b0fSOlivier Houchard 
133415144b0fSOlivier Houchard /*
133515144b0fSOlivier Houchard -------------------------------------------------------------------------------
133615144b0fSOlivier Houchard Returns the result of converting the 64-bit two's complement integer `a' to
133715144b0fSOlivier Houchard the quadruple-precision floating-point format.  The conversion is performed
133815144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
133915144b0fSOlivier Houchard -------------------------------------------------------------------------------
134015144b0fSOlivier Houchard */
int64_to_float128(int64 a)134115144b0fSOlivier Houchard float128 int64_to_float128( int64 a )
134215144b0fSOlivier Houchard {
134315144b0fSOlivier Houchard     flag zSign;
134415144b0fSOlivier Houchard     uint64 absA;
134515144b0fSOlivier Houchard     int8 shiftCount;
134615144b0fSOlivier Houchard     int32 zExp;
134715144b0fSOlivier Houchard     bits64 zSig0, zSig1;
134815144b0fSOlivier Houchard 
134915144b0fSOlivier Houchard     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
135015144b0fSOlivier Houchard     zSign = ( a < 0 );
135115144b0fSOlivier Houchard     absA = zSign ? - a : a;
135215144b0fSOlivier Houchard     shiftCount = countLeadingZeros64( absA ) + 49;
135315144b0fSOlivier Houchard     zExp = 0x406E - shiftCount;
135415144b0fSOlivier Houchard     if ( 64 <= shiftCount ) {
135515144b0fSOlivier Houchard         zSig1 = 0;
135615144b0fSOlivier Houchard         zSig0 = absA;
135715144b0fSOlivier Houchard         shiftCount -= 64;
135815144b0fSOlivier Houchard     }
135915144b0fSOlivier Houchard     else {
136015144b0fSOlivier Houchard         zSig1 = absA;
136115144b0fSOlivier Houchard         zSig0 = 0;
136215144b0fSOlivier Houchard     }
136315144b0fSOlivier Houchard     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
136415144b0fSOlivier Houchard     return packFloat128( zSign, zExp, zSig0, zSig1 );
136515144b0fSOlivier Houchard 
136615144b0fSOlivier Houchard }
136715144b0fSOlivier Houchard 
136815144b0fSOlivier Houchard #endif
136915144b0fSOlivier Houchard 
137015144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
137115144b0fSOlivier Houchard /*
137215144b0fSOlivier Houchard -------------------------------------------------------------------------------
137315144b0fSOlivier Houchard Returns the result of converting the single-precision floating-point value
137415144b0fSOlivier Houchard `a' to the 32-bit two's complement integer format.  The conversion is
137515144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
137615144b0fSOlivier Houchard Arithmetic---which means in particular that the conversion is rounded
137715144b0fSOlivier Houchard according to the current rounding mode.  If `a' is a NaN, the largest
137815144b0fSOlivier Houchard positive integer is returned.  Otherwise, if the conversion overflows, the
137915144b0fSOlivier Houchard largest integer with the same sign as `a' is returned.
138015144b0fSOlivier Houchard -------------------------------------------------------------------------------
138115144b0fSOlivier Houchard */
float32_to_int32(float32 a)138215144b0fSOlivier Houchard int32 float32_to_int32( float32 a )
138315144b0fSOlivier Houchard {
138415144b0fSOlivier Houchard     flag aSign;
138515144b0fSOlivier Houchard     int16 aExp, shiftCount;
138615144b0fSOlivier Houchard     bits32 aSig;
138715144b0fSOlivier Houchard     bits64 aSig64;
138815144b0fSOlivier Houchard 
138915144b0fSOlivier Houchard     aSig = extractFloat32Frac( a );
139015144b0fSOlivier Houchard     aExp = extractFloat32Exp( a );
139115144b0fSOlivier Houchard     aSign = extractFloat32Sign( a );
139215144b0fSOlivier Houchard     if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
139315144b0fSOlivier Houchard     if ( aExp ) aSig |= 0x00800000;
139415144b0fSOlivier Houchard     shiftCount = 0xAF - aExp;
139515144b0fSOlivier Houchard     aSig64 = aSig;
139615144b0fSOlivier Houchard     aSig64 <<= 32;
139715144b0fSOlivier Houchard     if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
139815144b0fSOlivier Houchard     return roundAndPackInt32( aSign, aSig64 );
139915144b0fSOlivier Houchard 
140015144b0fSOlivier Houchard }
140115144b0fSOlivier Houchard #endif /* !SOFTFLOAT_FOR_GCC */
140215144b0fSOlivier Houchard 
140315144b0fSOlivier Houchard /*
140415144b0fSOlivier Houchard -------------------------------------------------------------------------------
140515144b0fSOlivier Houchard Returns the result of converting the single-precision floating-point value
140615144b0fSOlivier Houchard `a' to the 32-bit two's complement integer format.  The conversion is
140715144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
140815144b0fSOlivier Houchard Arithmetic, except that the conversion is always rounded toward zero.
140915144b0fSOlivier Houchard If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
141015144b0fSOlivier Houchard the conversion overflows, the largest integer with the same sign as `a' is
141115144b0fSOlivier Houchard returned.
141215144b0fSOlivier Houchard -------------------------------------------------------------------------------
141315144b0fSOlivier Houchard */
float32_to_int32_round_to_zero(float32 a)141415144b0fSOlivier Houchard int32 float32_to_int32_round_to_zero( float32 a )
141515144b0fSOlivier Houchard {
141615144b0fSOlivier Houchard     flag aSign;
141715144b0fSOlivier Houchard     int16 aExp, shiftCount;
141815144b0fSOlivier Houchard     bits32 aSig;
141915144b0fSOlivier Houchard     int32 z;
142015144b0fSOlivier Houchard 
142115144b0fSOlivier Houchard     aSig = extractFloat32Frac( a );
142215144b0fSOlivier Houchard     aExp = extractFloat32Exp( a );
142315144b0fSOlivier Houchard     aSign = extractFloat32Sign( a );
142415144b0fSOlivier Houchard     shiftCount = aExp - 0x9E;
142515144b0fSOlivier Houchard     if ( 0 <= shiftCount ) {
142615144b0fSOlivier Houchard         if ( a != 0xCF000000 ) {
142715144b0fSOlivier Houchard             float_raise( float_flag_invalid );
142815144b0fSOlivier Houchard             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
142915144b0fSOlivier Houchard         }
143015144b0fSOlivier Houchard         return (sbits32) 0x80000000;
143115144b0fSOlivier Houchard     }
143215144b0fSOlivier Houchard     else if ( aExp <= 0x7E ) {
143315144b0fSOlivier Houchard         if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
143415144b0fSOlivier Houchard         return 0;
143515144b0fSOlivier Houchard     }
143615144b0fSOlivier Houchard     aSig = ( aSig | 0x00800000 )<<8;
143715144b0fSOlivier Houchard     z = aSig>>( - shiftCount );
143815144b0fSOlivier Houchard     if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
143915144b0fSOlivier Houchard         float_exception_flags |= float_flag_inexact;
144015144b0fSOlivier Houchard     }
144115144b0fSOlivier Houchard     if ( aSign ) z = - z;
144215144b0fSOlivier Houchard     return z;
144315144b0fSOlivier Houchard 
144415144b0fSOlivier Houchard }
144515144b0fSOlivier Houchard 
144615144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */
144715144b0fSOlivier Houchard /*
144815144b0fSOlivier Houchard -------------------------------------------------------------------------------
144915144b0fSOlivier Houchard Returns the result of converting the single-precision floating-point value
145015144b0fSOlivier Houchard `a' to the 64-bit two's complement integer format.  The conversion is
145115144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
145215144b0fSOlivier Houchard Arithmetic---which means in particular that the conversion is rounded
145315144b0fSOlivier Houchard according to the current rounding mode.  If `a' is a NaN, the largest
145415144b0fSOlivier Houchard positive integer is returned.  Otherwise, if the conversion overflows, the
145515144b0fSOlivier Houchard largest integer with the same sign as `a' is returned.
145615144b0fSOlivier Houchard -------------------------------------------------------------------------------
145715144b0fSOlivier Houchard */
float32_to_int64(float32 a)145815144b0fSOlivier Houchard int64 float32_to_int64( float32 a )
145915144b0fSOlivier Houchard {
146015144b0fSOlivier Houchard     flag aSign;
146115144b0fSOlivier Houchard     int16 aExp, shiftCount;
146215144b0fSOlivier Houchard     bits32 aSig;
146315144b0fSOlivier Houchard     bits64 aSig64, aSigExtra;
146415144b0fSOlivier Houchard 
146515144b0fSOlivier Houchard     aSig = extractFloat32Frac( a );
146615144b0fSOlivier Houchard     aExp = extractFloat32Exp( a );
146715144b0fSOlivier Houchard     aSign = extractFloat32Sign( a );
146815144b0fSOlivier Houchard     shiftCount = 0xBE - aExp;
146915144b0fSOlivier Houchard     if ( shiftCount < 0 ) {
147015144b0fSOlivier Houchard         float_raise( float_flag_invalid );
147115144b0fSOlivier Houchard         if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
147215144b0fSOlivier Houchard             return LIT64( 0x7FFFFFFFFFFFFFFF );
147315144b0fSOlivier Houchard         }
147415144b0fSOlivier Houchard         return (sbits64) LIT64( 0x8000000000000000 );
147515144b0fSOlivier Houchard     }
147615144b0fSOlivier Houchard     if ( aExp ) aSig |= 0x00800000;
147715144b0fSOlivier Houchard     aSig64 = aSig;
147815144b0fSOlivier Houchard     aSig64 <<= 40;
147915144b0fSOlivier Houchard     shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
148015144b0fSOlivier Houchard     return roundAndPackInt64( aSign, aSig64, aSigExtra );
148115144b0fSOlivier Houchard 
148215144b0fSOlivier Houchard }
148315144b0fSOlivier Houchard 
148415144b0fSOlivier Houchard /*
148515144b0fSOlivier Houchard -------------------------------------------------------------------------------
148615144b0fSOlivier Houchard Returns the result of converting the single-precision floating-point value
148715144b0fSOlivier Houchard `a' to the 64-bit two's complement integer format.  The conversion is
148815144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
148915144b0fSOlivier Houchard Arithmetic, except that the conversion is always rounded toward zero.  If
149015144b0fSOlivier Houchard `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
149115144b0fSOlivier Houchard conversion overflows, the largest integer with the same sign as `a' is
149215144b0fSOlivier Houchard returned.
149315144b0fSOlivier Houchard -------------------------------------------------------------------------------
149415144b0fSOlivier Houchard */
float32_to_int64_round_to_zero(float32 a)149515144b0fSOlivier Houchard int64 float32_to_int64_round_to_zero( float32 a )
149615144b0fSOlivier Houchard {
149715144b0fSOlivier Houchard     flag aSign;
149815144b0fSOlivier Houchard     int16 aExp, shiftCount;
149915144b0fSOlivier Houchard     bits32 aSig;
150015144b0fSOlivier Houchard     bits64 aSig64;
150115144b0fSOlivier Houchard     int64 z;
150215144b0fSOlivier Houchard 
150315144b0fSOlivier Houchard     aSig = extractFloat32Frac( a );
150415144b0fSOlivier Houchard     aExp = extractFloat32Exp( a );
150515144b0fSOlivier Houchard     aSign = extractFloat32Sign( a );
150615144b0fSOlivier Houchard     shiftCount = aExp - 0xBE;
150715144b0fSOlivier Houchard     if ( 0 <= shiftCount ) {
150815144b0fSOlivier Houchard         if ( a != 0xDF000000 ) {
150915144b0fSOlivier Houchard             float_raise( float_flag_invalid );
151015144b0fSOlivier Houchard             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
151115144b0fSOlivier Houchard                 return LIT64( 0x7FFFFFFFFFFFFFFF );
151215144b0fSOlivier Houchard             }
151315144b0fSOlivier Houchard         }
151415144b0fSOlivier Houchard         return (sbits64) LIT64( 0x8000000000000000 );
151515144b0fSOlivier Houchard     }
151615144b0fSOlivier Houchard     else if ( aExp <= 0x7E ) {
151715144b0fSOlivier Houchard         if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
151815144b0fSOlivier Houchard         return 0;
151915144b0fSOlivier Houchard     }
152015144b0fSOlivier Houchard     aSig64 = aSig | 0x00800000;
152115144b0fSOlivier Houchard     aSig64 <<= 40;
152215144b0fSOlivier Houchard     z = aSig64>>( - shiftCount );
152315144b0fSOlivier Houchard     if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
152415144b0fSOlivier Houchard         float_exception_flags |= float_flag_inexact;
152515144b0fSOlivier Houchard     }
152615144b0fSOlivier Houchard     if ( aSign ) z = - z;
152715144b0fSOlivier Houchard     return z;
152815144b0fSOlivier Houchard 
152915144b0fSOlivier Houchard }
153015144b0fSOlivier Houchard #endif /* !SOFTFLOAT_FOR_GCC */
153115144b0fSOlivier Houchard 
153215144b0fSOlivier Houchard /*
153315144b0fSOlivier Houchard -------------------------------------------------------------------------------
153415144b0fSOlivier Houchard Returns the result of converting the single-precision floating-point value
153515144b0fSOlivier Houchard `a' to the double-precision floating-point format.  The conversion is
153615144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
153715144b0fSOlivier Houchard Arithmetic.
153815144b0fSOlivier Houchard -------------------------------------------------------------------------------
153915144b0fSOlivier Houchard */
float32_to_float64(float32 a)154015144b0fSOlivier Houchard float64 float32_to_float64( float32 a )
154115144b0fSOlivier Houchard {
154215144b0fSOlivier Houchard     flag aSign;
154315144b0fSOlivier Houchard     int16 aExp;
154415144b0fSOlivier Houchard     bits32 aSig;
154515144b0fSOlivier Houchard 
154615144b0fSOlivier Houchard     aSig = extractFloat32Frac( a );
154715144b0fSOlivier Houchard     aExp = extractFloat32Exp( a );
154815144b0fSOlivier Houchard     aSign = extractFloat32Sign( a );
154915144b0fSOlivier Houchard     if ( aExp == 0xFF ) {
155015144b0fSOlivier Houchard         if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
155115144b0fSOlivier Houchard         return packFloat64( aSign, 0x7FF, 0 );
155215144b0fSOlivier Houchard     }
155315144b0fSOlivier Houchard     if ( aExp == 0 ) {
155415144b0fSOlivier Houchard         if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
155515144b0fSOlivier Houchard         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
155615144b0fSOlivier Houchard         --aExp;
155715144b0fSOlivier Houchard     }
155815144b0fSOlivier Houchard     return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
155915144b0fSOlivier Houchard 
156015144b0fSOlivier Houchard }
156115144b0fSOlivier Houchard 
156215144b0fSOlivier Houchard #ifdef FLOATX80
156315144b0fSOlivier Houchard 
156415144b0fSOlivier Houchard /*
156515144b0fSOlivier Houchard -------------------------------------------------------------------------------
156615144b0fSOlivier Houchard Returns the result of converting the single-precision floating-point value
156715144b0fSOlivier Houchard `a' to the extended double-precision floating-point format.  The conversion
156815144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
156915144b0fSOlivier Houchard Arithmetic.
157015144b0fSOlivier Houchard -------------------------------------------------------------------------------
157115144b0fSOlivier Houchard */
float32_to_floatx80(float32 a)157215144b0fSOlivier Houchard floatx80 float32_to_floatx80( float32 a )
157315144b0fSOlivier Houchard {
157415144b0fSOlivier Houchard     flag aSign;
157515144b0fSOlivier Houchard     int16 aExp;
157615144b0fSOlivier Houchard     bits32 aSig;
157715144b0fSOlivier Houchard 
157815144b0fSOlivier Houchard     aSig = extractFloat32Frac( a );
157915144b0fSOlivier Houchard     aExp = extractFloat32Exp( a );
158015144b0fSOlivier Houchard     aSign = extractFloat32Sign( a );
158115144b0fSOlivier Houchard     if ( aExp == 0xFF ) {
158215144b0fSOlivier Houchard         if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
158315144b0fSOlivier Houchard         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
158415144b0fSOlivier Houchard     }
158515144b0fSOlivier Houchard     if ( aExp == 0 ) {
158615144b0fSOlivier Houchard         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
158715144b0fSOlivier Houchard         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
158815144b0fSOlivier Houchard     }
158915144b0fSOlivier Houchard     aSig |= 0x00800000;
159015144b0fSOlivier Houchard     return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
159115144b0fSOlivier Houchard 
159215144b0fSOlivier Houchard }
159315144b0fSOlivier Houchard 
159415144b0fSOlivier Houchard #endif
159515144b0fSOlivier Houchard 
159615144b0fSOlivier Houchard #ifdef FLOAT128
159715144b0fSOlivier Houchard 
159815144b0fSOlivier Houchard /*
159915144b0fSOlivier Houchard -------------------------------------------------------------------------------
160015144b0fSOlivier Houchard Returns the result of converting the single-precision floating-point value
160115144b0fSOlivier Houchard `a' to the double-precision floating-point format.  The conversion is
160215144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
160315144b0fSOlivier Houchard Arithmetic.
160415144b0fSOlivier Houchard -------------------------------------------------------------------------------
160515144b0fSOlivier Houchard */
float32_to_float128(float32 a)160615144b0fSOlivier Houchard float128 float32_to_float128( float32 a )
160715144b0fSOlivier Houchard {
160815144b0fSOlivier Houchard     flag aSign;
160915144b0fSOlivier Houchard     int16 aExp;
161015144b0fSOlivier Houchard     bits32 aSig;
161115144b0fSOlivier Houchard 
161215144b0fSOlivier Houchard     aSig = extractFloat32Frac( a );
161315144b0fSOlivier Houchard     aExp = extractFloat32Exp( a );
161415144b0fSOlivier Houchard     aSign = extractFloat32Sign( a );
161515144b0fSOlivier Houchard     if ( aExp == 0xFF ) {
161615144b0fSOlivier Houchard         if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
161715144b0fSOlivier Houchard         return packFloat128( aSign, 0x7FFF, 0, 0 );
161815144b0fSOlivier Houchard     }
161915144b0fSOlivier Houchard     if ( aExp == 0 ) {
162015144b0fSOlivier Houchard         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
162115144b0fSOlivier Houchard         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
162215144b0fSOlivier Houchard         --aExp;
162315144b0fSOlivier Houchard     }
162415144b0fSOlivier Houchard     return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
162515144b0fSOlivier Houchard 
162615144b0fSOlivier Houchard }
162715144b0fSOlivier Houchard 
162815144b0fSOlivier Houchard #endif
162915144b0fSOlivier Houchard 
163015144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
163115144b0fSOlivier Houchard /*
163215144b0fSOlivier Houchard -------------------------------------------------------------------------------
163315144b0fSOlivier Houchard Rounds the single-precision floating-point value `a' to an integer, and
163415144b0fSOlivier Houchard returns the result as a single-precision floating-point value.  The
163515144b0fSOlivier Houchard operation is performed according to the IEC/IEEE Standard for Binary
163615144b0fSOlivier Houchard Floating-Point Arithmetic.
163715144b0fSOlivier Houchard -------------------------------------------------------------------------------
163815144b0fSOlivier Houchard */
float32_round_to_int(float32 a)163915144b0fSOlivier Houchard float32 float32_round_to_int( float32 a )
164015144b0fSOlivier Houchard {
164115144b0fSOlivier Houchard     flag aSign;
164215144b0fSOlivier Houchard     int16 aExp;
164315144b0fSOlivier Houchard     bits32 lastBitMask, roundBitsMask;
164415144b0fSOlivier Houchard     int8 roundingMode;
164515144b0fSOlivier Houchard     float32 z;
164615144b0fSOlivier Houchard 
164715144b0fSOlivier Houchard     aExp = extractFloat32Exp( a );
164815144b0fSOlivier Houchard     if ( 0x96 <= aExp ) {
164915144b0fSOlivier Houchard         if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
165015144b0fSOlivier Houchard             return propagateFloat32NaN( a, a );
165115144b0fSOlivier Houchard         }
165215144b0fSOlivier Houchard         return a;
165315144b0fSOlivier Houchard     }
165415144b0fSOlivier Houchard     if ( aExp <= 0x7E ) {
165515144b0fSOlivier Houchard         if ( (bits32) ( a<<1 ) == 0 ) return a;
165615144b0fSOlivier Houchard         float_exception_flags |= float_flag_inexact;
165715144b0fSOlivier Houchard         aSign = extractFloat32Sign( a );
165815144b0fSOlivier Houchard         switch ( float_rounding_mode ) {
165915144b0fSOlivier Houchard          case float_round_nearest_even:
166015144b0fSOlivier Houchard             if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
166115144b0fSOlivier Houchard                 return packFloat32( aSign, 0x7F, 0 );
166215144b0fSOlivier Houchard             }
166315144b0fSOlivier Houchard             break;
166415144b0fSOlivier Houchard 	 case float_round_to_zero:
166515144b0fSOlivier Houchard 	    break;
166615144b0fSOlivier Houchard          case float_round_down:
166715144b0fSOlivier Houchard             return aSign ? 0xBF800000 : 0;
166815144b0fSOlivier Houchard          case float_round_up:
166915144b0fSOlivier Houchard             return aSign ? 0x80000000 : 0x3F800000;
167015144b0fSOlivier Houchard         }
167115144b0fSOlivier Houchard         return packFloat32( aSign, 0, 0 );
167215144b0fSOlivier Houchard     }
167315144b0fSOlivier Houchard     lastBitMask = 1;
167415144b0fSOlivier Houchard     lastBitMask <<= 0x96 - aExp;
167515144b0fSOlivier Houchard     roundBitsMask = lastBitMask - 1;
167615144b0fSOlivier Houchard     z = a;
167715144b0fSOlivier Houchard     roundingMode = float_rounding_mode;
167815144b0fSOlivier Houchard     if ( roundingMode == float_round_nearest_even ) {
167915144b0fSOlivier Houchard         z += lastBitMask>>1;
168015144b0fSOlivier Houchard         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
168115144b0fSOlivier Houchard     }
168215144b0fSOlivier Houchard     else if ( roundingMode != float_round_to_zero ) {
168315144b0fSOlivier Houchard         if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
168415144b0fSOlivier Houchard             z += roundBitsMask;
168515144b0fSOlivier Houchard         }
168615144b0fSOlivier Houchard     }
168715144b0fSOlivier Houchard     z &= ~ roundBitsMask;
168815144b0fSOlivier Houchard     if ( z != a ) float_exception_flags |= float_flag_inexact;
168915144b0fSOlivier Houchard     return z;
169015144b0fSOlivier Houchard 
169115144b0fSOlivier Houchard }
169215144b0fSOlivier Houchard #endif /* !SOFTFLOAT_FOR_GCC */
169315144b0fSOlivier Houchard 
169415144b0fSOlivier Houchard /*
169515144b0fSOlivier Houchard -------------------------------------------------------------------------------
169615144b0fSOlivier Houchard Returns the result of adding the absolute values of the single-precision
169715144b0fSOlivier Houchard floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
169815144b0fSOlivier Houchard before being returned.  `zSign' is ignored if the result is a NaN.
169915144b0fSOlivier Houchard The addition is performed according to the IEC/IEEE Standard for Binary
170015144b0fSOlivier Houchard Floating-Point Arithmetic.
170115144b0fSOlivier Houchard -------------------------------------------------------------------------------
170215144b0fSOlivier Houchard */
addFloat32Sigs(float32 a,float32 b,flag zSign)170315144b0fSOlivier Houchard static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
170415144b0fSOlivier Houchard {
170515144b0fSOlivier Houchard     int16 aExp, bExp, zExp;
170615144b0fSOlivier Houchard     bits32 aSig, bSig, zSig;
170715144b0fSOlivier Houchard     int16 expDiff;
170815144b0fSOlivier Houchard 
170915144b0fSOlivier Houchard     aSig = extractFloat32Frac( a );
171015144b0fSOlivier Houchard     aExp = extractFloat32Exp( a );
171115144b0fSOlivier Houchard     bSig = extractFloat32Frac( b );
171215144b0fSOlivier Houchard     bExp = extractFloat32Exp( b );
171315144b0fSOlivier Houchard     expDiff = aExp - bExp;
171415144b0fSOlivier Houchard     aSig <<= 6;
171515144b0fSOlivier Houchard     bSig <<= 6;
171615144b0fSOlivier Houchard     if ( 0 < expDiff ) {
171715144b0fSOlivier Houchard         if ( aExp == 0xFF ) {
171815144b0fSOlivier Houchard             if ( aSig ) return propagateFloat32NaN( a, b );
171915144b0fSOlivier Houchard             return a;
172015144b0fSOlivier Houchard         }
172115144b0fSOlivier Houchard         if ( bExp == 0 ) {
172215144b0fSOlivier Houchard             --expDiff;
172315144b0fSOlivier Houchard         }
172415144b0fSOlivier Houchard         else {
172515144b0fSOlivier Houchard             bSig |= 0x20000000;
172615144b0fSOlivier Houchard         }
172715144b0fSOlivier Houchard         shift32RightJamming( bSig, expDiff, &bSig );
172815144b0fSOlivier Houchard         zExp = aExp;
172915144b0fSOlivier Houchard     }
173015144b0fSOlivier Houchard     else if ( expDiff < 0 ) {
173115144b0fSOlivier Houchard         if ( bExp == 0xFF ) {
173215144b0fSOlivier Houchard             if ( bSig ) return propagateFloat32NaN( a, b );
173315144b0fSOlivier Houchard             return packFloat32( zSign, 0xFF, 0 );
173415144b0fSOlivier Houchard         }
173515144b0fSOlivier Houchard         if ( aExp == 0 ) {
173615144b0fSOlivier Houchard             ++expDiff;
173715144b0fSOlivier Houchard         }
173815144b0fSOlivier Houchard         else {
173915144b0fSOlivier Houchard             aSig |= 0x20000000;
174015144b0fSOlivier Houchard         }
174115144b0fSOlivier Houchard         shift32RightJamming( aSig, - expDiff, &aSig );
174215144b0fSOlivier Houchard         zExp = bExp;
174315144b0fSOlivier Houchard     }
174415144b0fSOlivier Houchard     else {
174515144b0fSOlivier Houchard         if ( aExp == 0xFF ) {
174615144b0fSOlivier Houchard             if ( aSig | bSig ) return propagateFloat32NaN( a, b );
174715144b0fSOlivier Houchard             return a;
174815144b0fSOlivier Houchard         }
174915144b0fSOlivier Houchard         if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
175015144b0fSOlivier Houchard         zSig = 0x40000000 + aSig + bSig;
175115144b0fSOlivier Houchard         zExp = aExp;
175215144b0fSOlivier Houchard         goto roundAndPack;
175315144b0fSOlivier Houchard     }
175415144b0fSOlivier Houchard     aSig |= 0x20000000;
175515144b0fSOlivier Houchard     zSig = ( aSig + bSig )<<1;
175615144b0fSOlivier Houchard     --zExp;
175715144b0fSOlivier Houchard     if ( (sbits32) zSig < 0 ) {
175815144b0fSOlivier Houchard         zSig = aSig + bSig;
175915144b0fSOlivier Houchard         ++zExp;
176015144b0fSOlivier Houchard     }
176115144b0fSOlivier Houchard  roundAndPack:
176215144b0fSOlivier Houchard     return roundAndPackFloat32( zSign, zExp, zSig );
176315144b0fSOlivier Houchard 
176415144b0fSOlivier Houchard }
176515144b0fSOlivier Houchard 
176615144b0fSOlivier Houchard /*
176715144b0fSOlivier Houchard -------------------------------------------------------------------------------
176815144b0fSOlivier Houchard Returns the result of subtracting the absolute values of the single-
176915144b0fSOlivier Houchard precision floating-point values `a' and `b'.  If `zSign' is 1, the
177015144b0fSOlivier Houchard difference is negated before being returned.  `zSign' is ignored if the
177115144b0fSOlivier Houchard result is a NaN.  The subtraction is performed according to the IEC/IEEE
177215144b0fSOlivier Houchard Standard for Binary Floating-Point Arithmetic.
177315144b0fSOlivier Houchard -------------------------------------------------------------------------------
177415144b0fSOlivier Houchard */
subFloat32Sigs(float32 a,float32 b,flag zSign)177515144b0fSOlivier Houchard static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
177615144b0fSOlivier Houchard {
177715144b0fSOlivier Houchard     int16 aExp, bExp, zExp;
177815144b0fSOlivier Houchard     bits32 aSig, bSig, zSig;
177915144b0fSOlivier Houchard     int16 expDiff;
178015144b0fSOlivier Houchard 
178115144b0fSOlivier Houchard     aSig = extractFloat32Frac( a );
178215144b0fSOlivier Houchard     aExp = extractFloat32Exp( a );
178315144b0fSOlivier Houchard     bSig = extractFloat32Frac( b );
178415144b0fSOlivier Houchard     bExp = extractFloat32Exp( b );
178515144b0fSOlivier Houchard     expDiff = aExp - bExp;
178615144b0fSOlivier Houchard     aSig <<= 7;
178715144b0fSOlivier Houchard     bSig <<= 7;
178815144b0fSOlivier Houchard     if ( 0 < expDiff ) goto aExpBigger;
178915144b0fSOlivier Houchard     if ( expDiff < 0 ) goto bExpBigger;
179015144b0fSOlivier Houchard     if ( aExp == 0xFF ) {
179115144b0fSOlivier Houchard         if ( aSig | bSig ) return propagateFloat32NaN( a, b );
179215144b0fSOlivier Houchard         float_raise( float_flag_invalid );
179315144b0fSOlivier Houchard         return float32_default_nan;
179415144b0fSOlivier Houchard     }
179515144b0fSOlivier Houchard     if ( aExp == 0 ) {
179615144b0fSOlivier Houchard         aExp = 1;
179715144b0fSOlivier Houchard         bExp = 1;
179815144b0fSOlivier Houchard     }
179915144b0fSOlivier Houchard     if ( bSig < aSig ) goto aBigger;
180015144b0fSOlivier Houchard     if ( aSig < bSig ) goto bBigger;
180115144b0fSOlivier Houchard     return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
180215144b0fSOlivier Houchard  bExpBigger:
180315144b0fSOlivier Houchard     if ( bExp == 0xFF ) {
180415144b0fSOlivier Houchard         if ( bSig ) return propagateFloat32NaN( a, b );
180515144b0fSOlivier Houchard         return packFloat32( zSign ^ 1, 0xFF, 0 );
180615144b0fSOlivier Houchard     }
180715144b0fSOlivier Houchard     if ( aExp == 0 ) {
180815144b0fSOlivier Houchard         ++expDiff;
180915144b0fSOlivier Houchard     }
181015144b0fSOlivier Houchard     else {
181115144b0fSOlivier Houchard         aSig |= 0x40000000;
181215144b0fSOlivier Houchard     }
181315144b0fSOlivier Houchard     shift32RightJamming( aSig, - expDiff, &aSig );
181415144b0fSOlivier Houchard     bSig |= 0x40000000;
181515144b0fSOlivier Houchard  bBigger:
181615144b0fSOlivier Houchard     zSig = bSig - aSig;
181715144b0fSOlivier Houchard     zExp = bExp;
181815144b0fSOlivier Houchard     zSign ^= 1;
181915144b0fSOlivier Houchard     goto normalizeRoundAndPack;
182015144b0fSOlivier Houchard  aExpBigger:
182115144b0fSOlivier Houchard     if ( aExp == 0xFF ) {
182215144b0fSOlivier Houchard         if ( aSig ) return propagateFloat32NaN( a, b );
182315144b0fSOlivier Houchard         return a;
182415144b0fSOlivier Houchard     }
182515144b0fSOlivier Houchard     if ( bExp == 0 ) {
182615144b0fSOlivier Houchard         --expDiff;
182715144b0fSOlivier Houchard     }
182815144b0fSOlivier Houchard     else {
182915144b0fSOlivier Houchard         bSig |= 0x40000000;
183015144b0fSOlivier Houchard     }
183115144b0fSOlivier Houchard     shift32RightJamming( bSig, expDiff, &bSig );
183215144b0fSOlivier Houchard     aSig |= 0x40000000;
183315144b0fSOlivier Houchard  aBigger:
183415144b0fSOlivier Houchard     zSig = aSig - bSig;
183515144b0fSOlivier Houchard     zExp = aExp;
183615144b0fSOlivier Houchard  normalizeRoundAndPack:
183715144b0fSOlivier Houchard     --zExp;
183815144b0fSOlivier Houchard     return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
183915144b0fSOlivier Houchard 
184015144b0fSOlivier Houchard }
184115144b0fSOlivier Houchard 
184215144b0fSOlivier Houchard /*
184315144b0fSOlivier Houchard -------------------------------------------------------------------------------
184415144b0fSOlivier Houchard Returns the result of adding the single-precision floating-point values `a'
184515144b0fSOlivier Houchard and `b'.  The operation is performed according to the IEC/IEEE Standard for
184615144b0fSOlivier Houchard Binary Floating-Point Arithmetic.
184715144b0fSOlivier Houchard -------------------------------------------------------------------------------
184815144b0fSOlivier Houchard */
float32_add(float32 a,float32 b)184915144b0fSOlivier Houchard float32 float32_add( float32 a, float32 b )
185015144b0fSOlivier Houchard {
185115144b0fSOlivier Houchard     flag aSign, bSign;
185215144b0fSOlivier Houchard 
185315144b0fSOlivier Houchard     aSign = extractFloat32Sign( a );
185415144b0fSOlivier Houchard     bSign = extractFloat32Sign( b );
185515144b0fSOlivier Houchard     if ( aSign == bSign ) {
185615144b0fSOlivier Houchard         return addFloat32Sigs( a, b, aSign );
185715144b0fSOlivier Houchard     }
185815144b0fSOlivier Houchard     else {
185915144b0fSOlivier Houchard         return subFloat32Sigs( a, b, aSign );
186015144b0fSOlivier Houchard     }
186115144b0fSOlivier Houchard 
186215144b0fSOlivier Houchard }
186315144b0fSOlivier Houchard 
186415144b0fSOlivier Houchard /*
186515144b0fSOlivier Houchard -------------------------------------------------------------------------------
186615144b0fSOlivier Houchard Returns the result of subtracting the single-precision floating-point values
186715144b0fSOlivier Houchard `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
186815144b0fSOlivier Houchard for Binary Floating-Point Arithmetic.
186915144b0fSOlivier Houchard -------------------------------------------------------------------------------
187015144b0fSOlivier Houchard */
float32_sub(float32 a,float32 b)187115144b0fSOlivier Houchard float32 float32_sub( float32 a, float32 b )
187215144b0fSOlivier Houchard {
187315144b0fSOlivier Houchard     flag aSign, bSign;
187415144b0fSOlivier Houchard 
187515144b0fSOlivier Houchard     aSign = extractFloat32Sign( a );
187615144b0fSOlivier Houchard     bSign = extractFloat32Sign( b );
187715144b0fSOlivier Houchard     if ( aSign == bSign ) {
187815144b0fSOlivier Houchard         return subFloat32Sigs( a, b, aSign );
187915144b0fSOlivier Houchard     }
188015144b0fSOlivier Houchard     else {
188115144b0fSOlivier Houchard         return addFloat32Sigs( a, b, aSign );
188215144b0fSOlivier Houchard     }
188315144b0fSOlivier Houchard 
188415144b0fSOlivier Houchard }
188515144b0fSOlivier Houchard 
188615144b0fSOlivier Houchard /*
188715144b0fSOlivier Houchard -------------------------------------------------------------------------------
188815144b0fSOlivier Houchard Returns the result of multiplying the single-precision floating-point values
188915144b0fSOlivier Houchard `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
189015144b0fSOlivier Houchard for Binary Floating-Point Arithmetic.
189115144b0fSOlivier Houchard -------------------------------------------------------------------------------
189215144b0fSOlivier Houchard */
float32_mul(float32 a,float32 b)189315144b0fSOlivier Houchard float32 float32_mul( float32 a, float32 b )
189415144b0fSOlivier Houchard {
189515144b0fSOlivier Houchard     flag aSign, bSign, zSign;
189615144b0fSOlivier Houchard     int16 aExp, bExp, zExp;
189715144b0fSOlivier Houchard     bits32 aSig, bSig;
189815144b0fSOlivier Houchard     bits64 zSig64;
189915144b0fSOlivier Houchard     bits32 zSig;
190015144b0fSOlivier Houchard 
190115144b0fSOlivier Houchard     aSig = extractFloat32Frac( a );
190215144b0fSOlivier Houchard     aExp = extractFloat32Exp( a );
190315144b0fSOlivier Houchard     aSign = extractFloat32Sign( a );
190415144b0fSOlivier Houchard     bSig = extractFloat32Frac( b );
190515144b0fSOlivier Houchard     bExp = extractFloat32Exp( b );
190615144b0fSOlivier Houchard     bSign = extractFloat32Sign( b );
190715144b0fSOlivier Houchard     zSign = aSign ^ bSign;
190815144b0fSOlivier Houchard     if ( aExp == 0xFF ) {
190915144b0fSOlivier Houchard         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
191015144b0fSOlivier Houchard             return propagateFloat32NaN( a, b );
191115144b0fSOlivier Houchard         }
191215144b0fSOlivier Houchard         if ( ( bExp | bSig ) == 0 ) {
191315144b0fSOlivier Houchard             float_raise( float_flag_invalid );
191415144b0fSOlivier Houchard             return float32_default_nan;
191515144b0fSOlivier Houchard         }
191615144b0fSOlivier Houchard         return packFloat32( zSign, 0xFF, 0 );
191715144b0fSOlivier Houchard     }
191815144b0fSOlivier Houchard     if ( bExp == 0xFF ) {
191915144b0fSOlivier Houchard         if ( bSig ) return propagateFloat32NaN( a, b );
192015144b0fSOlivier Houchard         if ( ( aExp | aSig ) == 0 ) {
192115144b0fSOlivier Houchard             float_raise( float_flag_invalid );
192215144b0fSOlivier Houchard             return float32_default_nan;
192315144b0fSOlivier Houchard         }
192415144b0fSOlivier Houchard         return packFloat32( zSign, 0xFF, 0 );
192515144b0fSOlivier Houchard     }
192615144b0fSOlivier Houchard     if ( aExp == 0 ) {
192715144b0fSOlivier Houchard         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
192815144b0fSOlivier Houchard         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
192915144b0fSOlivier Houchard     }
193015144b0fSOlivier Houchard     if ( bExp == 0 ) {
193115144b0fSOlivier Houchard         if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
193215144b0fSOlivier Houchard         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
193315144b0fSOlivier Houchard     }
193415144b0fSOlivier Houchard     zExp = aExp + bExp - 0x7F;
193515144b0fSOlivier Houchard     aSig = ( aSig | 0x00800000 )<<7;
193615144b0fSOlivier Houchard     bSig = ( bSig | 0x00800000 )<<8;
193715144b0fSOlivier Houchard     shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
193815144b0fSOlivier Houchard     zSig = zSig64;
193915144b0fSOlivier Houchard     if ( 0 <= (sbits32) ( zSig<<1 ) ) {
194015144b0fSOlivier Houchard         zSig <<= 1;
194115144b0fSOlivier Houchard         --zExp;
194215144b0fSOlivier Houchard     }
194315144b0fSOlivier Houchard     return roundAndPackFloat32( zSign, zExp, zSig );
194415144b0fSOlivier Houchard 
194515144b0fSOlivier Houchard }
194615144b0fSOlivier Houchard 
194715144b0fSOlivier Houchard /*
194815144b0fSOlivier Houchard -------------------------------------------------------------------------------
194915144b0fSOlivier Houchard Returns the result of dividing the single-precision floating-point value `a'
195015144b0fSOlivier Houchard by the corresponding value `b'.  The operation is performed according to the
195115144b0fSOlivier Houchard IEC/IEEE Standard for Binary Floating-Point Arithmetic.
195215144b0fSOlivier Houchard -------------------------------------------------------------------------------
195315144b0fSOlivier Houchard */
float32_div(float32 a,float32 b)195415144b0fSOlivier Houchard float32 float32_div( float32 a, float32 b )
195515144b0fSOlivier Houchard {
195615144b0fSOlivier Houchard     flag aSign, bSign, zSign;
195715144b0fSOlivier Houchard     int16 aExp, bExp, zExp;
195815144b0fSOlivier Houchard     bits32 aSig, bSig, zSig;
195915144b0fSOlivier Houchard 
196015144b0fSOlivier Houchard     aSig = extractFloat32Frac( a );
196115144b0fSOlivier Houchard     aExp = extractFloat32Exp( a );
196215144b0fSOlivier Houchard     aSign = extractFloat32Sign( a );
196315144b0fSOlivier Houchard     bSig = extractFloat32Frac( b );
196415144b0fSOlivier Houchard     bExp = extractFloat32Exp( b );
196515144b0fSOlivier Houchard     bSign = extractFloat32Sign( b );
196615144b0fSOlivier Houchard     zSign = aSign ^ bSign;
196715144b0fSOlivier Houchard     if ( aExp == 0xFF ) {
196815144b0fSOlivier Houchard         if ( aSig ) return propagateFloat32NaN( a, b );
196915144b0fSOlivier Houchard         if ( bExp == 0xFF ) {
197015144b0fSOlivier Houchard             if ( bSig ) return propagateFloat32NaN( a, b );
197115144b0fSOlivier Houchard             float_raise( float_flag_invalid );
197215144b0fSOlivier Houchard             return float32_default_nan;
197315144b0fSOlivier Houchard         }
197415144b0fSOlivier Houchard         return packFloat32( zSign, 0xFF, 0 );
197515144b0fSOlivier Houchard     }
197615144b0fSOlivier Houchard     if ( bExp == 0xFF ) {
197715144b0fSOlivier Houchard         if ( bSig ) return propagateFloat32NaN( a, b );
197815144b0fSOlivier Houchard         return packFloat32( zSign, 0, 0 );
197915144b0fSOlivier Houchard     }
198015144b0fSOlivier Houchard     if ( bExp == 0 ) {
198115144b0fSOlivier Houchard         if ( bSig == 0 ) {
198215144b0fSOlivier Houchard             if ( ( aExp | aSig ) == 0 ) {
198315144b0fSOlivier Houchard                 float_raise( float_flag_invalid );
198415144b0fSOlivier Houchard                 return float32_default_nan;
198515144b0fSOlivier Houchard             }
198615144b0fSOlivier Houchard             float_raise( float_flag_divbyzero );
198715144b0fSOlivier Houchard             return packFloat32( zSign, 0xFF, 0 );
198815144b0fSOlivier Houchard         }
198915144b0fSOlivier Houchard         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
199015144b0fSOlivier Houchard     }
199115144b0fSOlivier Houchard     if ( aExp == 0 ) {
199215144b0fSOlivier Houchard         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
199315144b0fSOlivier Houchard         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
199415144b0fSOlivier Houchard     }
199515144b0fSOlivier Houchard     zExp = aExp - bExp + 0x7D;
199615144b0fSOlivier Houchard     aSig = ( aSig | 0x00800000 )<<7;
199715144b0fSOlivier Houchard     bSig = ( bSig | 0x00800000 )<<8;
199815144b0fSOlivier Houchard     if ( bSig <= ( aSig + aSig ) ) {
199915144b0fSOlivier Houchard         aSig >>= 1;
200015144b0fSOlivier Houchard         ++zExp;
200115144b0fSOlivier Houchard     }
200215144b0fSOlivier Houchard     zSig = ( ( (bits64) aSig )<<32 ) / bSig;
200315144b0fSOlivier Houchard     if ( ( zSig & 0x3F ) == 0 ) {
200415144b0fSOlivier Houchard         zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
200515144b0fSOlivier Houchard     }
200615144b0fSOlivier Houchard     return roundAndPackFloat32( zSign, zExp, zSig );
200715144b0fSOlivier Houchard 
200815144b0fSOlivier Houchard }
200915144b0fSOlivier Houchard 
201015144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
201115144b0fSOlivier Houchard /*
201215144b0fSOlivier Houchard -------------------------------------------------------------------------------
201315144b0fSOlivier Houchard Returns the remainder of the single-precision floating-point value `a'
201415144b0fSOlivier Houchard with respect to the corresponding value `b'.  The operation is performed
201515144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
201615144b0fSOlivier Houchard -------------------------------------------------------------------------------
201715144b0fSOlivier Houchard */
float32_rem(float32 a,float32 b)201815144b0fSOlivier Houchard float32 float32_rem( float32 a, float32 b )
201915144b0fSOlivier Houchard {
202015144b0fSOlivier Houchard     flag aSign, bSign, zSign;
202115144b0fSOlivier Houchard     int16 aExp, bExp, expDiff;
202215144b0fSOlivier Houchard     bits32 aSig, bSig;
202315144b0fSOlivier Houchard     bits32 q;
202415144b0fSOlivier Houchard     bits64 aSig64, bSig64, q64;
202515144b0fSOlivier Houchard     bits32 alternateASig;
202615144b0fSOlivier Houchard     sbits32 sigMean;
202715144b0fSOlivier Houchard 
202815144b0fSOlivier Houchard     aSig = extractFloat32Frac( a );
202915144b0fSOlivier Houchard     aExp = extractFloat32Exp( a );
203015144b0fSOlivier Houchard     aSign = extractFloat32Sign( a );
203115144b0fSOlivier Houchard     bSig = extractFloat32Frac( b );
203215144b0fSOlivier Houchard     bExp = extractFloat32Exp( b );
203315144b0fSOlivier Houchard     bSign = extractFloat32Sign( b );
203415144b0fSOlivier Houchard     if ( aExp == 0xFF ) {
203515144b0fSOlivier Houchard         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
203615144b0fSOlivier Houchard             return propagateFloat32NaN( a, b );
203715144b0fSOlivier Houchard         }
203815144b0fSOlivier Houchard         float_raise( float_flag_invalid );
203915144b0fSOlivier Houchard         return float32_default_nan;
204015144b0fSOlivier Houchard     }
204115144b0fSOlivier Houchard     if ( bExp == 0xFF ) {
204215144b0fSOlivier Houchard         if ( bSig ) return propagateFloat32NaN( a, b );
204315144b0fSOlivier Houchard         return a;
204415144b0fSOlivier Houchard     }
204515144b0fSOlivier Houchard     if ( bExp == 0 ) {
204615144b0fSOlivier Houchard         if ( bSig == 0 ) {
204715144b0fSOlivier Houchard             float_raise( float_flag_invalid );
204815144b0fSOlivier Houchard             return float32_default_nan;
204915144b0fSOlivier Houchard         }
205015144b0fSOlivier Houchard         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
205115144b0fSOlivier Houchard     }
205215144b0fSOlivier Houchard     if ( aExp == 0 ) {
205315144b0fSOlivier Houchard         if ( aSig == 0 ) return a;
205415144b0fSOlivier Houchard         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
205515144b0fSOlivier Houchard     }
205615144b0fSOlivier Houchard     expDiff = aExp - bExp;
205715144b0fSOlivier Houchard     aSig |= 0x00800000;
205815144b0fSOlivier Houchard     bSig |= 0x00800000;
205915144b0fSOlivier Houchard     if ( expDiff < 32 ) {
206015144b0fSOlivier Houchard         aSig <<= 8;
206115144b0fSOlivier Houchard         bSig <<= 8;
206215144b0fSOlivier Houchard         if ( expDiff < 0 ) {
206315144b0fSOlivier Houchard             if ( expDiff < -1 ) return a;
206415144b0fSOlivier Houchard             aSig >>= 1;
206515144b0fSOlivier Houchard         }
206615144b0fSOlivier Houchard         q = ( bSig <= aSig );
206715144b0fSOlivier Houchard         if ( q ) aSig -= bSig;
206815144b0fSOlivier Houchard         if ( 0 < expDiff ) {
206915144b0fSOlivier Houchard             q = ( ( (bits64) aSig )<<32 ) / bSig;
207015144b0fSOlivier Houchard             q >>= 32 - expDiff;
207115144b0fSOlivier Houchard             bSig >>= 2;
207215144b0fSOlivier Houchard             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
207315144b0fSOlivier Houchard         }
207415144b0fSOlivier Houchard         else {
207515144b0fSOlivier Houchard             aSig >>= 2;
207615144b0fSOlivier Houchard             bSig >>= 2;
207715144b0fSOlivier Houchard         }
207815144b0fSOlivier Houchard     }
207915144b0fSOlivier Houchard     else {
208015144b0fSOlivier Houchard         if ( bSig <= aSig ) aSig -= bSig;
208115144b0fSOlivier Houchard         aSig64 = ( (bits64) aSig )<<40;
208215144b0fSOlivier Houchard         bSig64 = ( (bits64) bSig )<<40;
208315144b0fSOlivier Houchard         expDiff -= 64;
208415144b0fSOlivier Houchard         while ( 0 < expDiff ) {
208515144b0fSOlivier Houchard             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
208615144b0fSOlivier Houchard             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
208715144b0fSOlivier Houchard             aSig64 = - ( ( bSig * q64 )<<38 );
208815144b0fSOlivier Houchard             expDiff -= 62;
208915144b0fSOlivier Houchard         }
209015144b0fSOlivier Houchard         expDiff += 64;
209115144b0fSOlivier Houchard         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
209215144b0fSOlivier Houchard         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
209315144b0fSOlivier Houchard         q = q64>>( 64 - expDiff );
209415144b0fSOlivier Houchard         bSig <<= 6;
209515144b0fSOlivier Houchard         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
209615144b0fSOlivier Houchard     }
209715144b0fSOlivier Houchard     do {
209815144b0fSOlivier Houchard         alternateASig = aSig;
209915144b0fSOlivier Houchard         ++q;
210015144b0fSOlivier Houchard         aSig -= bSig;
210115144b0fSOlivier Houchard     } while ( 0 <= (sbits32) aSig );
210215144b0fSOlivier Houchard     sigMean = aSig + alternateASig;
210315144b0fSOlivier Houchard     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
210415144b0fSOlivier Houchard         aSig = alternateASig;
210515144b0fSOlivier Houchard     }
210615144b0fSOlivier Houchard     zSign = ( (sbits32) aSig < 0 );
210715144b0fSOlivier Houchard     if ( zSign ) aSig = - aSig;
210815144b0fSOlivier Houchard     return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
210915144b0fSOlivier Houchard 
211015144b0fSOlivier Houchard }
211115144b0fSOlivier Houchard #endif /* !SOFTFLOAT_FOR_GCC */
211215144b0fSOlivier Houchard 
211315144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
211415144b0fSOlivier Houchard /*
211515144b0fSOlivier Houchard -------------------------------------------------------------------------------
211615144b0fSOlivier Houchard Returns the square root of the single-precision floating-point value `a'.
211715144b0fSOlivier Houchard The operation is performed according to the IEC/IEEE Standard for Binary
211815144b0fSOlivier Houchard Floating-Point Arithmetic.
211915144b0fSOlivier Houchard -------------------------------------------------------------------------------
212015144b0fSOlivier Houchard */
float32_sqrt(float32 a)212115144b0fSOlivier Houchard float32 float32_sqrt( float32 a )
212215144b0fSOlivier Houchard {
212315144b0fSOlivier Houchard     flag aSign;
212415144b0fSOlivier Houchard     int16 aExp, zExp;
212515144b0fSOlivier Houchard     bits32 aSig, zSig;
212615144b0fSOlivier Houchard     bits64 rem, term;
212715144b0fSOlivier Houchard 
212815144b0fSOlivier Houchard     aSig = extractFloat32Frac( a );
212915144b0fSOlivier Houchard     aExp = extractFloat32Exp( a );
213015144b0fSOlivier Houchard     aSign = extractFloat32Sign( a );
213115144b0fSOlivier Houchard     if ( aExp == 0xFF ) {
213215144b0fSOlivier Houchard         if ( aSig ) return propagateFloat32NaN( a, 0 );
213315144b0fSOlivier Houchard         if ( ! aSign ) return a;
213415144b0fSOlivier Houchard         float_raise( float_flag_invalid );
213515144b0fSOlivier Houchard         return float32_default_nan;
213615144b0fSOlivier Houchard     }
213715144b0fSOlivier Houchard     if ( aSign ) {
213815144b0fSOlivier Houchard         if ( ( aExp | aSig ) == 0 ) return a;
213915144b0fSOlivier Houchard         float_raise( float_flag_invalid );
214015144b0fSOlivier Houchard         return float32_default_nan;
214115144b0fSOlivier Houchard     }
214215144b0fSOlivier Houchard     if ( aExp == 0 ) {
214315144b0fSOlivier Houchard         if ( aSig == 0 ) return 0;
214415144b0fSOlivier Houchard         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
214515144b0fSOlivier Houchard     }
214615144b0fSOlivier Houchard     zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
214715144b0fSOlivier Houchard     aSig = ( aSig | 0x00800000 )<<8;
214815144b0fSOlivier Houchard     zSig = estimateSqrt32( aExp, aSig ) + 2;
214915144b0fSOlivier Houchard     if ( ( zSig & 0x7F ) <= 5 ) {
215015144b0fSOlivier Houchard         if ( zSig < 2 ) {
215115144b0fSOlivier Houchard             zSig = 0x7FFFFFFF;
215215144b0fSOlivier Houchard             goto roundAndPack;
215315144b0fSOlivier Houchard         }
215415144b0fSOlivier Houchard         aSig >>= aExp & 1;
215515144b0fSOlivier Houchard         term = ( (bits64) zSig ) * zSig;
215615144b0fSOlivier Houchard         rem = ( ( (bits64) aSig )<<32 ) - term;
215715144b0fSOlivier Houchard         while ( (sbits64) rem < 0 ) {
215815144b0fSOlivier Houchard             --zSig;
215915144b0fSOlivier Houchard             rem += ( ( (bits64) zSig )<<1 ) | 1;
216015144b0fSOlivier Houchard         }
216115144b0fSOlivier Houchard         zSig |= ( rem != 0 );
216215144b0fSOlivier Houchard     }
216315144b0fSOlivier Houchard     shift32RightJamming( zSig, 1, &zSig );
216415144b0fSOlivier Houchard  roundAndPack:
216515144b0fSOlivier Houchard     return roundAndPackFloat32( 0, zExp, zSig );
216615144b0fSOlivier Houchard 
216715144b0fSOlivier Houchard }
216815144b0fSOlivier Houchard #endif /* !SOFTFLOAT_FOR_GCC */
216915144b0fSOlivier Houchard 
217015144b0fSOlivier Houchard /*
217115144b0fSOlivier Houchard -------------------------------------------------------------------------------
217215144b0fSOlivier Houchard Returns 1 if the single-precision floating-point value `a' is equal to
217315144b0fSOlivier Houchard the corresponding value `b', and 0 otherwise.  The comparison is performed
217415144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
217515144b0fSOlivier Houchard -------------------------------------------------------------------------------
217615144b0fSOlivier Houchard */
float32_eq(float32 a,float32 b)217715144b0fSOlivier Houchard flag float32_eq( float32 a, float32 b )
217815144b0fSOlivier Houchard {
217915144b0fSOlivier Houchard 
218015144b0fSOlivier Houchard     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
218115144b0fSOlivier Houchard          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
218215144b0fSOlivier Houchard        ) {
218315144b0fSOlivier Houchard         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
218415144b0fSOlivier Houchard             float_raise( float_flag_invalid );
218515144b0fSOlivier Houchard         }
218615144b0fSOlivier Houchard         return 0;
218715144b0fSOlivier Houchard     }
218815144b0fSOlivier Houchard     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
218915144b0fSOlivier Houchard 
219015144b0fSOlivier Houchard }
219115144b0fSOlivier Houchard 
219215144b0fSOlivier Houchard /*
219315144b0fSOlivier Houchard -------------------------------------------------------------------------------
219415144b0fSOlivier Houchard Returns 1 if the single-precision floating-point value `a' is less than
219515144b0fSOlivier Houchard or equal to the corresponding value `b', and 0 otherwise.  The comparison
219615144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
219715144b0fSOlivier Houchard Arithmetic.
219815144b0fSOlivier Houchard -------------------------------------------------------------------------------
219915144b0fSOlivier Houchard */
float32_le(float32 a,float32 b)220015144b0fSOlivier Houchard flag float32_le( float32 a, float32 b )
220115144b0fSOlivier Houchard {
220215144b0fSOlivier Houchard     flag aSign, bSign;
220315144b0fSOlivier Houchard 
220415144b0fSOlivier Houchard     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
220515144b0fSOlivier Houchard          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
220615144b0fSOlivier Houchard        ) {
220715144b0fSOlivier Houchard         float_raise( float_flag_invalid );
220815144b0fSOlivier Houchard         return 0;
220915144b0fSOlivier Houchard     }
221015144b0fSOlivier Houchard     aSign = extractFloat32Sign( a );
221115144b0fSOlivier Houchard     bSign = extractFloat32Sign( b );
221215144b0fSOlivier Houchard     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
221315144b0fSOlivier Houchard     return ( a == b ) || ( aSign ^ ( a < b ) );
221415144b0fSOlivier Houchard 
221515144b0fSOlivier Houchard }
221615144b0fSOlivier Houchard 
221715144b0fSOlivier Houchard /*
221815144b0fSOlivier Houchard -------------------------------------------------------------------------------
221915144b0fSOlivier Houchard Returns 1 if the single-precision floating-point value `a' is less than
222015144b0fSOlivier Houchard the corresponding value `b', and 0 otherwise.  The comparison is performed
222115144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
222215144b0fSOlivier Houchard -------------------------------------------------------------------------------
222315144b0fSOlivier Houchard */
float32_lt(float32 a,float32 b)222415144b0fSOlivier Houchard flag float32_lt( float32 a, float32 b )
222515144b0fSOlivier Houchard {
222615144b0fSOlivier Houchard     flag aSign, bSign;
222715144b0fSOlivier Houchard 
222815144b0fSOlivier Houchard     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
222915144b0fSOlivier Houchard          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
223015144b0fSOlivier Houchard        ) {
223115144b0fSOlivier Houchard         float_raise( float_flag_invalid );
223215144b0fSOlivier Houchard         return 0;
223315144b0fSOlivier Houchard     }
223415144b0fSOlivier Houchard     aSign = extractFloat32Sign( a );
223515144b0fSOlivier Houchard     bSign = extractFloat32Sign( b );
223615144b0fSOlivier Houchard     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
223715144b0fSOlivier Houchard     return ( a != b ) && ( aSign ^ ( a < b ) );
223815144b0fSOlivier Houchard 
223915144b0fSOlivier Houchard }
224015144b0fSOlivier Houchard 
224115144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
224215144b0fSOlivier Houchard /*
224315144b0fSOlivier Houchard -------------------------------------------------------------------------------
224415144b0fSOlivier Houchard Returns 1 if the single-precision floating-point value `a' is equal to
224515144b0fSOlivier Houchard the corresponding value `b', and 0 otherwise.  The invalid exception is
224615144b0fSOlivier Houchard raised if either operand is a NaN.  Otherwise, the comparison is performed
224715144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
224815144b0fSOlivier Houchard -------------------------------------------------------------------------------
224915144b0fSOlivier Houchard */
float32_eq_signaling(float32 a,float32 b)225015144b0fSOlivier Houchard flag float32_eq_signaling( float32 a, float32 b )
225115144b0fSOlivier Houchard {
225215144b0fSOlivier Houchard 
225315144b0fSOlivier Houchard     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
225415144b0fSOlivier Houchard          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
225515144b0fSOlivier Houchard        ) {
225615144b0fSOlivier Houchard         float_raise( float_flag_invalid );
225715144b0fSOlivier Houchard         return 0;
225815144b0fSOlivier Houchard     }
225915144b0fSOlivier Houchard     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
226015144b0fSOlivier Houchard 
226115144b0fSOlivier Houchard }
226215144b0fSOlivier Houchard 
226315144b0fSOlivier Houchard /*
226415144b0fSOlivier Houchard -------------------------------------------------------------------------------
226515144b0fSOlivier Houchard Returns 1 if the single-precision floating-point value `a' is less than or
226615144b0fSOlivier Houchard equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
226715144b0fSOlivier Houchard cause an exception.  Otherwise, the comparison is performed according to the
226815144b0fSOlivier Houchard IEC/IEEE Standard for Binary Floating-Point Arithmetic.
226915144b0fSOlivier Houchard -------------------------------------------------------------------------------
227015144b0fSOlivier Houchard */
float32_le_quiet(float32 a,float32 b)227115144b0fSOlivier Houchard flag float32_le_quiet( float32 a, float32 b )
227215144b0fSOlivier Houchard {
227315144b0fSOlivier Houchard     flag aSign, bSign;
227415144b0fSOlivier Houchard 
227515144b0fSOlivier Houchard     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
227615144b0fSOlivier Houchard          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
227715144b0fSOlivier Houchard        ) {
227815144b0fSOlivier Houchard         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
227915144b0fSOlivier Houchard             float_raise( float_flag_invalid );
228015144b0fSOlivier Houchard         }
228115144b0fSOlivier Houchard         return 0;
228215144b0fSOlivier Houchard     }
228315144b0fSOlivier Houchard     aSign = extractFloat32Sign( a );
228415144b0fSOlivier Houchard     bSign = extractFloat32Sign( b );
228515144b0fSOlivier Houchard     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
228615144b0fSOlivier Houchard     return ( a == b ) || ( aSign ^ ( a < b ) );
228715144b0fSOlivier Houchard 
228815144b0fSOlivier Houchard }
228915144b0fSOlivier Houchard 
229015144b0fSOlivier Houchard /*
229115144b0fSOlivier Houchard -------------------------------------------------------------------------------
229215144b0fSOlivier Houchard Returns 1 if the single-precision floating-point value `a' is less than
229315144b0fSOlivier Houchard the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
229415144b0fSOlivier Houchard exception.  Otherwise, the comparison is performed according to the IEC/IEEE
229515144b0fSOlivier Houchard Standard for Binary Floating-Point Arithmetic.
229615144b0fSOlivier Houchard -------------------------------------------------------------------------------
229715144b0fSOlivier Houchard */
float32_lt_quiet(float32 a,float32 b)229815144b0fSOlivier Houchard flag float32_lt_quiet( float32 a, float32 b )
229915144b0fSOlivier Houchard {
230015144b0fSOlivier Houchard     flag aSign, bSign;
230115144b0fSOlivier Houchard 
230215144b0fSOlivier Houchard     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
230315144b0fSOlivier Houchard          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
230415144b0fSOlivier Houchard        ) {
230515144b0fSOlivier Houchard         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
230615144b0fSOlivier Houchard             float_raise( float_flag_invalid );
230715144b0fSOlivier Houchard         }
230815144b0fSOlivier Houchard         return 0;
230915144b0fSOlivier Houchard     }
231015144b0fSOlivier Houchard     aSign = extractFloat32Sign( a );
231115144b0fSOlivier Houchard     bSign = extractFloat32Sign( b );
231215144b0fSOlivier Houchard     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
231315144b0fSOlivier Houchard     return ( a != b ) && ( aSign ^ ( a < b ) );
231415144b0fSOlivier Houchard 
231515144b0fSOlivier Houchard }
231615144b0fSOlivier Houchard #endif /* !SOFTFLOAT_FOR_GCC */
231715144b0fSOlivier Houchard 
231815144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
231915144b0fSOlivier Houchard /*
232015144b0fSOlivier Houchard -------------------------------------------------------------------------------
232115144b0fSOlivier Houchard Returns the result of converting the double-precision floating-point value
232215144b0fSOlivier Houchard `a' to the 32-bit two's complement integer format.  The conversion is
232315144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
232415144b0fSOlivier Houchard Arithmetic---which means in particular that the conversion is rounded
232515144b0fSOlivier Houchard according to the current rounding mode.  If `a' is a NaN, the largest
232615144b0fSOlivier Houchard positive integer is returned.  Otherwise, if the conversion overflows, the
232715144b0fSOlivier Houchard largest integer with the same sign as `a' is returned.
232815144b0fSOlivier Houchard -------------------------------------------------------------------------------
232915144b0fSOlivier Houchard */
float64_to_int32(float64 a)233015144b0fSOlivier Houchard int32 float64_to_int32( float64 a )
233115144b0fSOlivier Houchard {
233215144b0fSOlivier Houchard     flag aSign;
233315144b0fSOlivier Houchard     int16 aExp, shiftCount;
233415144b0fSOlivier Houchard     bits64 aSig;
233515144b0fSOlivier Houchard 
233615144b0fSOlivier Houchard     aSig = extractFloat64Frac( a );
233715144b0fSOlivier Houchard     aExp = extractFloat64Exp( a );
233815144b0fSOlivier Houchard     aSign = extractFloat64Sign( a );
233915144b0fSOlivier Houchard     if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
234015144b0fSOlivier Houchard     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
234115144b0fSOlivier Houchard     shiftCount = 0x42C - aExp;
234215144b0fSOlivier Houchard     if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
234315144b0fSOlivier Houchard     return roundAndPackInt32( aSign, aSig );
234415144b0fSOlivier Houchard 
234515144b0fSOlivier Houchard }
234615144b0fSOlivier Houchard #endif /* !SOFTFLOAT_FOR_GCC */
234715144b0fSOlivier Houchard 
234815144b0fSOlivier Houchard /*
234915144b0fSOlivier Houchard -------------------------------------------------------------------------------
235015144b0fSOlivier Houchard Returns the result of converting the double-precision floating-point value
235115144b0fSOlivier Houchard `a' to the 32-bit two's complement integer format.  The conversion is
235215144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
235315144b0fSOlivier Houchard Arithmetic, except that the conversion is always rounded toward zero.
235415144b0fSOlivier Houchard If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
235515144b0fSOlivier Houchard the conversion overflows, the largest integer with the same sign as `a' is
235615144b0fSOlivier Houchard returned.
235715144b0fSOlivier Houchard -------------------------------------------------------------------------------
235815144b0fSOlivier Houchard */
float64_to_int32_round_to_zero(float64 a)235915144b0fSOlivier Houchard int32 float64_to_int32_round_to_zero( float64 a )
236015144b0fSOlivier Houchard {
236115144b0fSOlivier Houchard     flag aSign;
236215144b0fSOlivier Houchard     int16 aExp, shiftCount;
236315144b0fSOlivier Houchard     bits64 aSig, savedASig;
236415144b0fSOlivier Houchard     int32 z;
236515144b0fSOlivier Houchard 
236615144b0fSOlivier Houchard     aSig = extractFloat64Frac( a );
236715144b0fSOlivier Houchard     aExp = extractFloat64Exp( a );
236815144b0fSOlivier Houchard     aSign = extractFloat64Sign( a );
236915144b0fSOlivier Houchard     if ( 0x41E < aExp ) {
237015144b0fSOlivier Houchard         if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
237115144b0fSOlivier Houchard         goto invalid;
237215144b0fSOlivier Houchard     }
237315144b0fSOlivier Houchard     else if ( aExp < 0x3FF ) {
237415144b0fSOlivier Houchard         if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
237515144b0fSOlivier Houchard         return 0;
237615144b0fSOlivier Houchard     }
237715144b0fSOlivier Houchard     aSig |= LIT64( 0x0010000000000000 );
237815144b0fSOlivier Houchard     shiftCount = 0x433 - aExp;
237915144b0fSOlivier Houchard     savedASig = aSig;
238015144b0fSOlivier Houchard     aSig >>= shiftCount;
238115144b0fSOlivier Houchard     z = aSig;
238215144b0fSOlivier Houchard     if ( aSign ) z = - z;
238315144b0fSOlivier Houchard     if ( ( z < 0 ) ^ aSign ) {
238415144b0fSOlivier Houchard  invalid:
238515144b0fSOlivier Houchard         float_raise( float_flag_invalid );
238615144b0fSOlivier Houchard         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
238715144b0fSOlivier Houchard     }
238815144b0fSOlivier Houchard     if ( ( aSig<<shiftCount ) != savedASig ) {
238915144b0fSOlivier Houchard         float_exception_flags |= float_flag_inexact;
239015144b0fSOlivier Houchard     }
239115144b0fSOlivier Houchard     return z;
239215144b0fSOlivier Houchard 
239315144b0fSOlivier Houchard }
239415144b0fSOlivier Houchard 
239515144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
239615144b0fSOlivier Houchard /*
239715144b0fSOlivier Houchard -------------------------------------------------------------------------------
239815144b0fSOlivier Houchard Returns the result of converting the double-precision floating-point value
239915144b0fSOlivier Houchard `a' to the 64-bit two's complement integer format.  The conversion is
240015144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
240115144b0fSOlivier Houchard Arithmetic---which means in particular that the conversion is rounded
240215144b0fSOlivier Houchard according to the current rounding mode.  If `a' is a NaN, the largest
240315144b0fSOlivier Houchard positive integer is returned.  Otherwise, if the conversion overflows, the
240415144b0fSOlivier Houchard largest integer with the same sign as `a' is returned.
240515144b0fSOlivier Houchard -------------------------------------------------------------------------------
240615144b0fSOlivier Houchard */
float64_to_int64(float64 a)240715144b0fSOlivier Houchard int64 float64_to_int64( float64 a )
240815144b0fSOlivier Houchard {
240915144b0fSOlivier Houchard     flag aSign;
241015144b0fSOlivier Houchard     int16 aExp, shiftCount;
241115144b0fSOlivier Houchard     bits64 aSig, aSigExtra;
241215144b0fSOlivier Houchard 
241315144b0fSOlivier Houchard     aSig = extractFloat64Frac( a );
241415144b0fSOlivier Houchard     aExp = extractFloat64Exp( a );
241515144b0fSOlivier Houchard     aSign = extractFloat64Sign( a );
241615144b0fSOlivier Houchard     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
241715144b0fSOlivier Houchard     shiftCount = 0x433 - aExp;
241815144b0fSOlivier Houchard     if ( shiftCount <= 0 ) {
241915144b0fSOlivier Houchard         if ( 0x43E < aExp ) {
242015144b0fSOlivier Houchard             float_raise( float_flag_invalid );
242115144b0fSOlivier Houchard             if (    ! aSign
242215144b0fSOlivier Houchard                  || (    ( aExp == 0x7FF )
242315144b0fSOlivier Houchard                       && ( aSig != LIT64( 0x0010000000000000 ) ) )
242415144b0fSOlivier Houchard                ) {
242515144b0fSOlivier Houchard                 return LIT64( 0x7FFFFFFFFFFFFFFF );
242615144b0fSOlivier Houchard             }
242715144b0fSOlivier Houchard             return (sbits64) LIT64( 0x8000000000000000 );
242815144b0fSOlivier Houchard         }
242915144b0fSOlivier Houchard         aSigExtra = 0;
243015144b0fSOlivier Houchard         aSig <<= - shiftCount;
243115144b0fSOlivier Houchard     }
243215144b0fSOlivier Houchard     else {
243315144b0fSOlivier Houchard         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
243415144b0fSOlivier Houchard     }
243515144b0fSOlivier Houchard     return roundAndPackInt64( aSign, aSig, aSigExtra );
243615144b0fSOlivier Houchard 
243715144b0fSOlivier Houchard }
243815144b0fSOlivier Houchard 
243915144b0fSOlivier Houchard /*
244015144b0fSOlivier Houchard -------------------------------------------------------------------------------
244115144b0fSOlivier Houchard Returns the result of converting the double-precision floating-point value
244215144b0fSOlivier Houchard `a' to the 64-bit two's complement integer format.  The conversion is
244315144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
244415144b0fSOlivier Houchard Arithmetic, except that the conversion is always rounded toward zero.
244515144b0fSOlivier Houchard If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
244615144b0fSOlivier Houchard the conversion overflows, the largest integer with the same sign as `a' is
244715144b0fSOlivier Houchard returned.
244815144b0fSOlivier Houchard -------------------------------------------------------------------------------
244915144b0fSOlivier Houchard */
float64_to_int64_round_to_zero(float64 a)245015144b0fSOlivier Houchard int64 float64_to_int64_round_to_zero( float64 a )
245115144b0fSOlivier Houchard {
245215144b0fSOlivier Houchard     flag aSign;
245315144b0fSOlivier Houchard     int16 aExp, shiftCount;
245415144b0fSOlivier Houchard     bits64 aSig;
245515144b0fSOlivier Houchard     int64 z;
245615144b0fSOlivier Houchard 
245715144b0fSOlivier Houchard     aSig = extractFloat64Frac( a );
245815144b0fSOlivier Houchard     aExp = extractFloat64Exp( a );
245915144b0fSOlivier Houchard     aSign = extractFloat64Sign( a );
246015144b0fSOlivier Houchard     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
246115144b0fSOlivier Houchard     shiftCount = aExp - 0x433;
246215144b0fSOlivier Houchard     if ( 0 <= shiftCount ) {
246315144b0fSOlivier Houchard         if ( 0x43E <= aExp ) {
246415144b0fSOlivier Houchard             if ( a != LIT64( 0xC3E0000000000000 ) ) {
246515144b0fSOlivier Houchard                 float_raise( float_flag_invalid );
246615144b0fSOlivier Houchard                 if (    ! aSign
246715144b0fSOlivier Houchard                      || (    ( aExp == 0x7FF )
246815144b0fSOlivier Houchard                           && ( aSig != LIT64( 0x0010000000000000 ) ) )
246915144b0fSOlivier Houchard                    ) {
247015144b0fSOlivier Houchard                     return LIT64( 0x7FFFFFFFFFFFFFFF );
247115144b0fSOlivier Houchard                 }
247215144b0fSOlivier Houchard             }
247315144b0fSOlivier Houchard             return (sbits64) LIT64( 0x8000000000000000 );
247415144b0fSOlivier Houchard         }
247515144b0fSOlivier Houchard         z = aSig<<shiftCount;
247615144b0fSOlivier Houchard     }
247715144b0fSOlivier Houchard     else {
247815144b0fSOlivier Houchard         if ( aExp < 0x3FE ) {
247915144b0fSOlivier Houchard             if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
248015144b0fSOlivier Houchard             return 0;
248115144b0fSOlivier Houchard         }
248215144b0fSOlivier Houchard         z = aSig>>( - shiftCount );
248315144b0fSOlivier Houchard         if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
248415144b0fSOlivier Houchard             float_exception_flags |= float_flag_inexact;
248515144b0fSOlivier Houchard         }
248615144b0fSOlivier Houchard     }
248715144b0fSOlivier Houchard     if ( aSign ) z = - z;
248815144b0fSOlivier Houchard     return z;
248915144b0fSOlivier Houchard 
249015144b0fSOlivier Houchard }
249115144b0fSOlivier Houchard #endif /* !SOFTFLOAT_FOR_GCC */
249215144b0fSOlivier Houchard 
249315144b0fSOlivier Houchard /*
249415144b0fSOlivier Houchard -------------------------------------------------------------------------------
249515144b0fSOlivier Houchard Returns the result of converting the double-precision floating-point value
249615144b0fSOlivier Houchard `a' to the single-precision floating-point format.  The conversion is
249715144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
249815144b0fSOlivier Houchard Arithmetic.
249915144b0fSOlivier Houchard -------------------------------------------------------------------------------
250015144b0fSOlivier Houchard */
float64_to_float32(float64 a)250115144b0fSOlivier Houchard float32 float64_to_float32( float64 a )
250215144b0fSOlivier Houchard {
250315144b0fSOlivier Houchard     flag aSign;
250415144b0fSOlivier Houchard     int16 aExp;
250515144b0fSOlivier Houchard     bits64 aSig;
250615144b0fSOlivier Houchard     bits32 zSig;
250715144b0fSOlivier Houchard 
250815144b0fSOlivier Houchard     aSig = extractFloat64Frac( a );
250915144b0fSOlivier Houchard     aExp = extractFloat64Exp( a );
251015144b0fSOlivier Houchard     aSign = extractFloat64Sign( a );
251115144b0fSOlivier Houchard     if ( aExp == 0x7FF ) {
251215144b0fSOlivier Houchard         if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
251315144b0fSOlivier Houchard         return packFloat32( aSign, 0xFF, 0 );
251415144b0fSOlivier Houchard     }
251515144b0fSOlivier Houchard     shift64RightJamming( aSig, 22, &aSig );
251615144b0fSOlivier Houchard     zSig = aSig;
251715144b0fSOlivier Houchard     if ( aExp || zSig ) {
251815144b0fSOlivier Houchard         zSig |= 0x40000000;
251915144b0fSOlivier Houchard         aExp -= 0x381;
252015144b0fSOlivier Houchard     }
252115144b0fSOlivier Houchard     return roundAndPackFloat32( aSign, aExp, zSig );
252215144b0fSOlivier Houchard 
252315144b0fSOlivier Houchard }
252415144b0fSOlivier Houchard 
252515144b0fSOlivier Houchard #ifdef FLOATX80
252615144b0fSOlivier Houchard 
252715144b0fSOlivier Houchard /*
252815144b0fSOlivier Houchard -------------------------------------------------------------------------------
252915144b0fSOlivier Houchard Returns the result of converting the double-precision floating-point value
253015144b0fSOlivier Houchard `a' to the extended double-precision floating-point format.  The conversion
253115144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
253215144b0fSOlivier Houchard Arithmetic.
253315144b0fSOlivier Houchard -------------------------------------------------------------------------------
253415144b0fSOlivier Houchard */
float64_to_floatx80(float64 a)253515144b0fSOlivier Houchard floatx80 float64_to_floatx80( float64 a )
253615144b0fSOlivier Houchard {
253715144b0fSOlivier Houchard     flag aSign;
253815144b0fSOlivier Houchard     int16 aExp;
253915144b0fSOlivier Houchard     bits64 aSig;
254015144b0fSOlivier Houchard 
254115144b0fSOlivier Houchard     aSig = extractFloat64Frac( a );
254215144b0fSOlivier Houchard     aExp = extractFloat64Exp( a );
254315144b0fSOlivier Houchard     aSign = extractFloat64Sign( a );
254415144b0fSOlivier Houchard     if ( aExp == 0x7FF ) {
254515144b0fSOlivier Houchard         if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
254615144b0fSOlivier Houchard         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
254715144b0fSOlivier Houchard     }
254815144b0fSOlivier Houchard     if ( aExp == 0 ) {
254915144b0fSOlivier Houchard         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
255015144b0fSOlivier Houchard         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
255115144b0fSOlivier Houchard     }
255215144b0fSOlivier Houchard     return
255315144b0fSOlivier Houchard         packFloatx80(
255415144b0fSOlivier Houchard             aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
255515144b0fSOlivier Houchard 
255615144b0fSOlivier Houchard }
255715144b0fSOlivier Houchard 
255815144b0fSOlivier Houchard #endif
255915144b0fSOlivier Houchard 
256015144b0fSOlivier Houchard #ifdef FLOAT128
256115144b0fSOlivier Houchard 
256215144b0fSOlivier Houchard /*
256315144b0fSOlivier Houchard -------------------------------------------------------------------------------
256415144b0fSOlivier Houchard Returns the result of converting the double-precision floating-point value
256515144b0fSOlivier Houchard `a' to the quadruple-precision floating-point format.  The conversion is
256615144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
256715144b0fSOlivier Houchard Arithmetic.
256815144b0fSOlivier Houchard -------------------------------------------------------------------------------
256915144b0fSOlivier Houchard */
float64_to_float128(float64 a)257015144b0fSOlivier Houchard float128 float64_to_float128( float64 a )
257115144b0fSOlivier Houchard {
257215144b0fSOlivier Houchard     flag aSign;
257315144b0fSOlivier Houchard     int16 aExp;
257415144b0fSOlivier Houchard     bits64 aSig, zSig0, zSig1;
257515144b0fSOlivier Houchard 
257615144b0fSOlivier Houchard     aSig = extractFloat64Frac( a );
257715144b0fSOlivier Houchard     aExp = extractFloat64Exp( a );
257815144b0fSOlivier Houchard     aSign = extractFloat64Sign( a );
257915144b0fSOlivier Houchard     if ( aExp == 0x7FF ) {
258015144b0fSOlivier Houchard         if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
258115144b0fSOlivier Houchard         return packFloat128( aSign, 0x7FFF, 0, 0 );
258215144b0fSOlivier Houchard     }
258315144b0fSOlivier Houchard     if ( aExp == 0 ) {
258415144b0fSOlivier Houchard         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
258515144b0fSOlivier Houchard         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
258615144b0fSOlivier Houchard         --aExp;
258715144b0fSOlivier Houchard     }
258815144b0fSOlivier Houchard     shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
258915144b0fSOlivier Houchard     return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
259015144b0fSOlivier Houchard 
259115144b0fSOlivier Houchard }
259215144b0fSOlivier Houchard 
259315144b0fSOlivier Houchard #endif
259415144b0fSOlivier Houchard 
259515144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC
259615144b0fSOlivier Houchard /*
259715144b0fSOlivier Houchard -------------------------------------------------------------------------------
259815144b0fSOlivier Houchard Rounds the double-precision floating-point value `a' to an integer, and
259915144b0fSOlivier Houchard returns the result as a double-precision floating-point value.  The
260015144b0fSOlivier Houchard operation is performed according to the IEC/IEEE Standard for Binary
260115144b0fSOlivier Houchard Floating-Point Arithmetic.
260215144b0fSOlivier Houchard -------------------------------------------------------------------------------
260315144b0fSOlivier Houchard */
float64_round_to_int(float64 a)260415144b0fSOlivier Houchard float64 float64_round_to_int( float64 a )
260515144b0fSOlivier Houchard {
260615144b0fSOlivier Houchard     flag aSign;
260715144b0fSOlivier Houchard     int16 aExp;
260815144b0fSOlivier Houchard     bits64 lastBitMask, roundBitsMask;
260915144b0fSOlivier Houchard     int8 roundingMode;
261015144b0fSOlivier Houchard     float64 z;
261115144b0fSOlivier Houchard 
261215144b0fSOlivier Houchard     aExp = extractFloat64Exp( a );
261315144b0fSOlivier Houchard     if ( 0x433 <= aExp ) {
261415144b0fSOlivier Houchard         if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
261515144b0fSOlivier Houchard             return propagateFloat64NaN( a, a );
261615144b0fSOlivier Houchard         }
261715144b0fSOlivier Houchard         return a;
261815144b0fSOlivier Houchard     }
261915144b0fSOlivier Houchard     if ( aExp < 0x3FF ) {
262015144b0fSOlivier Houchard         if ( (bits64) ( a<<1 ) == 0 ) return a;
262115144b0fSOlivier Houchard         float_exception_flags |= float_flag_inexact;
262215144b0fSOlivier Houchard         aSign = extractFloat64Sign( a );
262315144b0fSOlivier Houchard         switch ( float_rounding_mode ) {
262415144b0fSOlivier Houchard          case float_round_nearest_even:
262515144b0fSOlivier Houchard             if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
262615144b0fSOlivier Houchard                 return packFloat64( aSign, 0x3FF, 0 );
262715144b0fSOlivier Houchard             }
262815144b0fSOlivier Houchard             break;
262915144b0fSOlivier Houchard 	 case float_round_to_zero:
263015144b0fSOlivier Houchard 	    break;
263115144b0fSOlivier Houchard          case float_round_down:
263215144b0fSOlivier Houchard             return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
263315144b0fSOlivier Houchard          case float_round_up:
263415144b0fSOlivier Houchard             return
263515144b0fSOlivier Houchard             aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
263615144b0fSOlivier Houchard         }
263715144b0fSOlivier Houchard         return packFloat64( aSign, 0, 0 );
263815144b0fSOlivier Houchard     }
263915144b0fSOlivier Houchard     lastBitMask = 1;
264015144b0fSOlivier Houchard     lastBitMask <<= 0x433 - aExp;
264115144b0fSOlivier Houchard     roundBitsMask = lastBitMask - 1;
264215144b0fSOlivier Houchard     z = a;
264315144b0fSOlivier Houchard     roundingMode = float_rounding_mode;
264415144b0fSOlivier Houchard     if ( roundingMode == float_round_nearest_even ) {
264515144b0fSOlivier Houchard         z += lastBitMask>>1;
264615144b0fSOlivier Houchard         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
264715144b0fSOlivier Houchard     }
264815144b0fSOlivier Houchard     else if ( roundingMode != float_round_to_zero ) {
264915144b0fSOlivier Houchard         if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
265015144b0fSOlivier Houchard             z += roundBitsMask;
265115144b0fSOlivier Houchard         }
265215144b0fSOlivier Houchard     }
265315144b0fSOlivier Houchard     z &= ~ roundBitsMask;
265415144b0fSOlivier Houchard     if ( z != a ) float_exception_flags |= float_flag_inexact;
265515144b0fSOlivier Houchard     return z;
265615144b0fSOlivier Houchard 
265715144b0fSOlivier Houchard }
265815144b0fSOlivier Houchard #endif
265915144b0fSOlivier Houchard 
266015144b0fSOlivier Houchard /*
266115144b0fSOlivier Houchard -------------------------------------------------------------------------------
266215144b0fSOlivier Houchard Returns the result of adding the absolute values of the double-precision
266315144b0fSOlivier Houchard floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
266415144b0fSOlivier Houchard before being returned.  `zSign' is ignored if the result is a NaN.
266515144b0fSOlivier Houchard The addition is performed according to the IEC/IEEE Standard for Binary
266615144b0fSOlivier Houchard Floating-Point Arithmetic.
266715144b0fSOlivier Houchard -------------------------------------------------------------------------------
266815144b0fSOlivier Houchard */
addFloat64Sigs(float64 a,float64 b,flag zSign)266915144b0fSOlivier Houchard static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
267015144b0fSOlivier Houchard {
267115144b0fSOlivier Houchard     int16 aExp, bExp, zExp;
267215144b0fSOlivier Houchard     bits64 aSig, bSig, zSig;
267315144b0fSOlivier Houchard     int16 expDiff;
267415144b0fSOlivier Houchard 
267515144b0fSOlivier Houchard     aSig = extractFloat64Frac( a );
267615144b0fSOlivier Houchard     aExp = extractFloat64Exp( a );
267715144b0fSOlivier Houchard     bSig = extractFloat64Frac( b );
267815144b0fSOlivier Houchard     bExp = extractFloat64Exp( b );
267915144b0fSOlivier Houchard     expDiff = aExp - bExp;
268015144b0fSOlivier Houchard     aSig <<= 9;
268115144b0fSOlivier Houchard     bSig <<= 9;
268215144b0fSOlivier Houchard     if ( 0 < expDiff ) {
268315144b0fSOlivier Houchard         if ( aExp == 0x7FF ) {
268415144b0fSOlivier Houchard             if ( aSig ) return propagateFloat64NaN( a, b );
268515144b0fSOlivier Houchard             return a;
268615144b0fSOlivier Houchard         }
268715144b0fSOlivier Houchard         if ( bExp == 0 ) {
268815144b0fSOlivier Houchard             --expDiff;
268915144b0fSOlivier Houchard         }
269015144b0fSOlivier Houchard         else {
269115144b0fSOlivier Houchard             bSig |= LIT64( 0x2000000000000000 );
269215144b0fSOlivier Houchard         }
269315144b0fSOlivier Houchard         shift64RightJamming( bSig, expDiff, &bSig );
269415144b0fSOlivier Houchard         zExp = aExp;
269515144b0fSOlivier Houchard     }
269615144b0fSOlivier Houchard     else if ( expDiff < 0 ) {
269715144b0fSOlivier Houchard         if ( bExp == 0x7FF ) {
269815144b0fSOlivier Houchard             if ( bSig ) return propagateFloat64NaN( a, b );
269915144b0fSOlivier Houchard             return packFloat64( zSign, 0x7FF, 0 );
270015144b0fSOlivier Houchard         }
270115144b0fSOlivier Houchard         if ( aExp == 0 ) {
270215144b0fSOlivier Houchard             ++expDiff;
270315144b0fSOlivier Houchard         }
270415144b0fSOlivier Houchard         else {
270515144b0fSOlivier Houchard             aSig |= LIT64( 0x2000000000000000 );
270615144b0fSOlivier Houchard         }
270715144b0fSOlivier Houchard         shift64RightJamming( aSig, - expDiff, &aSig );
270815144b0fSOlivier Houchard         zExp = bExp;
270915144b0fSOlivier Houchard     }
271015144b0fSOlivier Houchard     else {
271115144b0fSOlivier Houchard         if ( aExp == 0x7FF ) {
271215144b0fSOlivier Houchard             if ( aSig | bSig ) return propagateFloat64NaN( a, b );
271315144b0fSOlivier Houchard             return a;
271415144b0fSOlivier Houchard         }
271515144b0fSOlivier Houchard         if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
271615144b0fSOlivier Houchard         zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
271715144b0fSOlivier Houchard         zExp = aExp;
271815144b0fSOlivier Houchard         goto roundAndPack;
271915144b0fSOlivier Houchard     }
272015144b0fSOlivier Houchard     aSig |= LIT64( 0x2000000000000000 );
272115144b0fSOlivier Houchard     zSig = ( aSig + bSig )<<1;
272215144b0fSOlivier Houchard     --zExp;
272315144b0fSOlivier Houchard     if ( (sbits64) zSig < 0 ) {
272415144b0fSOlivier Houchard         zSig = aSig + bSig;
272515144b0fSOlivier Houchard         ++zExp;
272615144b0fSOlivier Houchard     }
272715144b0fSOlivier Houchard  roundAndPack:
272815144b0fSOlivier Houchard     return roundAndPackFloat64( zSign, zExp, zSig );
272915144b0fSOlivier Houchard 
273015144b0fSOlivier Houchard }
273115144b0fSOlivier Houchard 
273215144b0fSOlivier Houchard /*
273315144b0fSOlivier Houchard -------------------------------------------------------------------------------
273415144b0fSOlivier Houchard Returns the result of subtracting the absolute values of the double-
273515144b0fSOlivier Houchard precision floating-point values `a' and `b'.  If `zSign' is 1, the
273615144b0fSOlivier Houchard difference is negated before being returned.  `zSign' is ignored if the
273715144b0fSOlivier Houchard result is a NaN.  The subtraction is performed according to the IEC/IEEE
273815144b0fSOlivier Houchard Standard for Binary Floating-Point Arithmetic.
273915144b0fSOlivier Houchard -------------------------------------------------------------------------------
274015144b0fSOlivier Houchard */
subFloat64Sigs(float64 a,float64 b,flag zSign)274115144b0fSOlivier Houchard static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
274215144b0fSOlivier Houchard {
274315144b0fSOlivier Houchard     int16 aExp, bExp, zExp;
274415144b0fSOlivier Houchard     bits64 aSig, bSig, zSig;
274515144b0fSOlivier Houchard     int16 expDiff;
274615144b0fSOlivier Houchard 
274715144b0fSOlivier Houchard     aSig = extractFloat64Frac( a );
274815144b0fSOlivier Houchard     aExp = extractFloat64Exp( a );
274915144b0fSOlivier Houchard     bSig = extractFloat64Frac( b );
275015144b0fSOlivier Houchard     bExp = extractFloat64Exp( b );
275115144b0fSOlivier Houchard     expDiff = aExp - bExp;
275215144b0fSOlivier Houchard     aSig <<= 10;
275315144b0fSOlivier Houchard     bSig <<= 10;
275415144b0fSOlivier Houchard     if ( 0 < expDiff ) goto aExpBigger;
275515144b0fSOlivier Houchard     if ( expDiff < 0 ) goto bExpBigger;
275615144b0fSOlivier Houchard     if ( aExp == 0x7FF ) {
275715144b0fSOlivier Houchard         if ( aSig | bSig ) return propagateFloat64NaN( a, b );
275815144b0fSOlivier Houchard         float_raise( float_flag_invalid );
275915144b0fSOlivier Houchard         return float64_default_nan;
276015144b0fSOlivier Houchard     }
276115144b0fSOlivier Houchard     if ( aExp == 0 ) {
276215144b0fSOlivier Houchard         aExp = 1;
276315144b0fSOlivier Houchard         bExp = 1;
276415144b0fSOlivier Houchard     }
276515144b0fSOlivier Houchard     if ( bSig < aSig ) goto aBigger;
276615144b0fSOlivier Houchard     if ( aSig < bSig ) goto bBigger;
276715144b0fSOlivier Houchard     return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
276815144b0fSOlivier Houchard  bExpBigger:
276915144b0fSOlivier Houchard     if ( bExp == 0x7FF ) {
277015144b0fSOlivier Houchard         if ( bSig ) return propagateFloat64NaN( a, b );
277115144b0fSOlivier Houchard         return packFloat64( zSign ^ 1, 0x7FF, 0 );
277215144b0fSOlivier Houchard     }
277315144b0fSOlivier Houchard     if ( aExp == 0 ) {
277415144b0fSOlivier Houchard         ++expDiff;
277515144b0fSOlivier Houchard     }
277615144b0fSOlivier Houchard     else {
277715144b0fSOlivier Houchard         aSig |= LIT64( 0x4000000000000000 );
277815144b0fSOlivier Houchard     }
277915144b0fSOlivier Houchard     shift64RightJamming( aSig, - expDiff, &aSig );
278015144b0fSOlivier Houchard     bSig |= LIT64( 0x4000000000000000 );
278115144b0fSOlivier Houchard  bBigger:
278215144b0fSOlivier Houchard     zSig = bSig - aSig;
278315144b0fSOlivier Houchard     zExp = bExp;
278415144b0fSOlivier Houchard     zSign ^= 1;
278515144b0fSOlivier Houchard     goto normalizeRoundAndPack;
278615144b0fSOlivier Houchard  aExpBigger:
278715144b0fSOlivier Houchard     if ( aExp == 0x7FF ) {
278815144b0fSOlivier Houchard         if ( aSig ) return propagateFloat64NaN( a, b );
278915144b0fSOlivier Houchard         return a;
279015144b0fSOlivier Houchard     }
279115144b0fSOlivier Houchard     if ( bExp == 0 ) {
279215144b0fSOlivier Houchard         --expDiff;
279315144b0fSOlivier Houchard     }
279415144b0fSOlivier Houchard     else {
279515144b0fSOlivier Houchard         bSig |= LIT64( 0x4000000000000000 );
279615144b0fSOlivier Houchard     }
279715144b0fSOlivier Houchard     shift64RightJamming( bSig, expDiff, &bSig );
279815144b0fSOlivier Houchard     aSig |= LIT64( 0x4000000000000000 );
279915144b0fSOlivier Houchard  aBigger:
280015144b0fSOlivier Houchard     zSig = aSig - bSig;
280115144b0fSOlivier Houchard     zExp = aExp;
280215144b0fSOlivier Houchard  normalizeRoundAndPack:
280315144b0fSOlivier Houchard     --zExp;
280415144b0fSOlivier Houchard     return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
280515144b0fSOlivier Houchard 
280615144b0fSOlivier Houchard }
280715144b0fSOlivier Houchard 
280815144b0fSOlivier Houchard /*
280915144b0fSOlivier Houchard -------------------------------------------------------------------------------
281015144b0fSOlivier Houchard Returns the result of adding the double-precision floating-point values `a'
281115144b0fSOlivier Houchard and `b'.  The operation is performed according to the IEC/IEEE Standard for
281215144b0fSOlivier Houchard Binary Floating-Point Arithmetic.
281315144b0fSOlivier Houchard -------------------------------------------------------------------------------
281415144b0fSOlivier Houchard */
float64_add(float64 a,float64 b)281515144b0fSOlivier Houchard float64 float64_add( float64 a, float64 b )
281615144b0fSOlivier Houchard {
281715144b0fSOlivier Houchard     flag aSign, bSign;
281815144b0fSOlivier Houchard 
281915144b0fSOlivier Houchard     aSign = extractFloat64Sign( a );
282015144b0fSOlivier Houchard     bSign = extractFloat64Sign( b );
282115144b0fSOlivier Houchard     if ( aSign == bSign ) {
282215144b0fSOlivier Houchard         return addFloat64Sigs( a, b, aSign );
282315144b0fSOlivier Houchard     }
282415144b0fSOlivier Houchard     else {
282515144b0fSOlivier Houchard         return subFloat64Sigs( a, b, aSign );
282615144b0fSOlivier Houchard     }
282715144b0fSOlivier Houchard 
282815144b0fSOlivier Houchard }
282915144b0fSOlivier Houchard 
283015144b0fSOlivier Houchard /*
283115144b0fSOlivier Houchard -------------------------------------------------------------------------------
283215144b0fSOlivier Houchard Returns the result of subtracting the double-precision floating-point values
283315144b0fSOlivier Houchard `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
283415144b0fSOlivier Houchard for Binary Floating-Point Arithmetic.
283515144b0fSOlivier Houchard -------------------------------------------------------------------------------
283615144b0fSOlivier Houchard */
float64_sub(float64 a,float64 b)283715144b0fSOlivier Houchard float64 float64_sub( float64 a, float64 b )
283815144b0fSOlivier Houchard {
283915144b0fSOlivier Houchard     flag aSign, bSign;
284015144b0fSOlivier Houchard 
284115144b0fSOlivier Houchard     aSign = extractFloat64Sign( a );
284215144b0fSOlivier Houchard     bSign = extractFloat64Sign( b );
284315144b0fSOlivier Houchard     if ( aSign == bSign ) {
284415144b0fSOlivier Houchard         return subFloat64Sigs( a, b, aSign );
284515144b0fSOlivier Houchard     }
284615144b0fSOlivier Houchard     else {
284715144b0fSOlivier Houchard         return addFloat64Sigs( a, b, aSign );
284815144b0fSOlivier Houchard     }
284915144b0fSOlivier Houchard 
285015144b0fSOlivier Houchard }
285115144b0fSOlivier Houchard 
285215144b0fSOlivier Houchard /*
285315144b0fSOlivier Houchard -------------------------------------------------------------------------------
285415144b0fSOlivier Houchard Returns the result of multiplying the double-precision floating-point values
285515144b0fSOlivier Houchard `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
285615144b0fSOlivier Houchard for Binary Floating-Point Arithmetic.
285715144b0fSOlivier Houchard -------------------------------------------------------------------------------
285815144b0fSOlivier Houchard */
float64_mul(float64 a,float64 b)285915144b0fSOlivier Houchard float64 float64_mul( float64 a, float64 b )
286015144b0fSOlivier Houchard {
286115144b0fSOlivier Houchard     flag aSign, bSign, zSign;
286215144b0fSOlivier Houchard     int16 aExp, bExp, zExp;
286315144b0fSOlivier Houchard     bits64 aSig, bSig, zSig0, zSig1;
286415144b0fSOlivier Houchard 
286515144b0fSOlivier Houchard     aSig = extractFloat64Frac( a );
286615144b0fSOlivier Houchard     aExp = extractFloat64Exp( a );
286715144b0fSOlivier Houchard     aSign = extractFloat64Sign( a );
286815144b0fSOlivier Houchard     bSig = extractFloat64Frac( b );
286915144b0fSOlivier Houchard     bExp = extractFloat64Exp( b );
287015144b0fSOlivier Houchard     bSign = extractFloat64Sign( b );
287115144b0fSOlivier Houchard     zSign = aSign ^ bSign;
287215144b0fSOlivier Houchard     if ( aExp == 0x7FF ) {
287315144b0fSOlivier Houchard         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
287415144b0fSOlivier Houchard             return propagateFloat64NaN( a, b );
287515144b0fSOlivier Houchard         }
287615144b0fSOlivier Houchard         if ( ( bExp | bSig ) == 0 ) {
287715144b0fSOlivier Houchard             float_raise( float_flag_invalid );
287815144b0fSOlivier Houchard             return float64_default_nan;
287915144b0fSOlivier Houchard         }
288015144b0fSOlivier Houchard         return packFloat64( zSign, 0x7FF, 0 );
288115144b0fSOlivier Houchard     }
288215144b0fSOlivier Houchard     if ( bExp == 0x7FF ) {
288315144b0fSOlivier Houchard         if ( bSig ) return propagateFloat64NaN( a, b );
288415144b0fSOlivier Houchard         if ( ( aExp | aSig ) == 0 ) {
288515144b0fSOlivier Houchard             float_raise( float_flag_invalid );
288615144b0fSOlivier Houchard             return float64_default_nan;
288715144b0fSOlivier Houchard         }
288815144b0fSOlivier Houchard         return packFloat64( zSign, 0x7FF, 0 );
288915144b0fSOlivier Houchard     }
289015144b0fSOlivier Houchard     if ( aExp == 0 ) {
289115144b0fSOlivier Houchard         if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
289215144b0fSOlivier Houchard         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
289315144b0fSOlivier Houchard     }
289415144b0fSOlivier Houchard     if ( bExp == 0 ) {
289515144b0fSOlivier Houchard         if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
289615144b0fSOlivier Houchard         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
289715144b0fSOlivier Houchard     }
289815144b0fSOlivier Houchard     zExp = aExp + bExp - 0x3FF;
289915144b0fSOlivier Houchard     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
290015144b0fSOlivier Houchard     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
290115144b0fSOlivier Houchard     mul64To128( aSig, bSig, &zSig0, &zSig1 );
290215144b0fSOlivier Houchard     zSig0 |= ( zSig1 != 0 );
290315144b0fSOlivier Houchard     if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
290415144b0fSOlivier Houchard         zSig0 <<= 1;
290515144b0fSOlivier Houchard         --zExp;
290615144b0fSOlivier Houchard     }
290715144b0fSOlivier Houchard     return roundAndPackFloat64( zSign, zExp, zSig0 );
290815144b0fSOlivier Houchard 
290915144b0fSOlivier Houchard }
291015144b0fSOlivier Houchard 
291115144b0fSOlivier Houchard /*
291215144b0fSOlivier Houchard -------------------------------------------------------------------------------
291315144b0fSOlivier Houchard Returns the result of dividing the double-precision floating-point value `a'
291415144b0fSOlivier Houchard by the corresponding value `b'.  The operation is performed according to
291515144b0fSOlivier Houchard the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
291615144b0fSOlivier Houchard -------------------------------------------------------------------------------
291715144b0fSOlivier Houchard */
float64_div(float64 a,float64 b)291815144b0fSOlivier Houchard float64 float64_div( float64 a, float64 b )
291915144b0fSOlivier Houchard {
292015144b0fSOlivier Houchard     flag aSign, bSign, zSign;
292115144b0fSOlivier Houchard     int16 aExp, bExp, zExp;
292215144b0fSOlivier Houchard     bits64 aSig, bSig, zSig;
292315144b0fSOlivier Houchard     bits64 rem0, rem1;
292415144b0fSOlivier Houchard     bits64 term0, term1;
292515144b0fSOlivier Houchard 
292615144b0fSOlivier Houchard     aSig = extractFloat64Frac( a );
292715144b0fSOlivier Houchard     aExp = extractFloat64Exp( a );
292815144b0fSOlivier Houchard     aSign = extractFloat64Sign( a );
292915144b0fSOlivier Houchard     bSig = extractFloat64Frac( b );
293015144b0fSOlivier Houchard     bExp = extractFloat64Exp( b );
293115144b0fSOlivier Houchard     bSign = extractFloat64Sign( b );
293215144b0fSOlivier Houchard     zSign = aSign ^ bSign;
293315144b0fSOlivier Houchard     if ( aExp == 0x7FF ) {
293415144b0fSOlivier Houchard         if ( aSig ) return propagateFloat64NaN( a, b );
293515144b0fSOlivier Houchard         if ( bExp == 0x7FF ) {
293615144b0fSOlivier Houchard             if ( bSig ) return propagateFloat64NaN( a, b );
293715144b0fSOlivier Houchard             float_raise( float_flag_invalid );
293815144b0fSOlivier Houchard             return float64_default_nan;
293915144b0fSOlivier Houchard         }
294015144b0fSOlivier Houchard         return packFloat64( zSign, 0x7FF, 0 );
294115144b0fSOlivier Houchard     }
294215144b0fSOlivier Houchard     if ( bExp == 0x7FF ) {
294315144b0fSOlivier Houchard         if ( bSig ) return propagateFloat64NaN( a, b );
294415144b0fSOlivier Houchard         return packFloat64( zSign, 0, 0 );
294515144b0fSOlivier Houchard     }
294615144b0fSOlivier Houchard     if ( bExp == 0 ) {
294715144b0fSOlivier Houchard         if ( bSig == 0 ) {
294815144b0fSOlivier Houchard             if ( ( aExp | aSig ) == 0 ) {
294915144b0fSOlivier Houchard                 float_raise( float_flag_invalid );
295015144b0fSOlivier Houchard                 return float64_default_nan;
295115144b0fSOlivier Houchard             }
295215144b0fSOlivier Houchard             float_raise( float_flag_divbyzero );
295315144b0fSOlivier Houchard             return packFloat64( zSign, 0x7FF, 0 );
295415144b0fSOlivier Houchard         }
295515144b0fSOlivier Houchard         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
295615144b0fSOlivier Houchard     }
295715144b0fSOlivier Houchard     if ( aExp == 0 ) {
295815144b0fSOlivier Houchard         if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
295915144b0fSOlivier Houchard         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
296015144b0fSOlivier Houchard     }
296115144b0fSOlivier Houchard     zExp = aExp - bExp + 0x3FD;
296215144b0fSOlivier Houchard     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
296315144b0fSOlivier Houchard     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
296415144b0fSOlivier Houchard     if ( bSig <= ( aSig + aSig ) ) {
296515144b0fSOlivier Houchard         aSig >>= 1;
296615144b0fSOlivier Houchard         ++zExp;
296715144b0fSOlivier Houchard     }
296815144b0fSOlivier Houchard     zSig = estimateDiv128To64( aSig, 0, bSig );
296915144b0fSOlivier Houchard     if ( ( zSig & 0x1FF ) <= 2 ) {
297015144b0fSOlivier Houchard         mul64To128( bSig, zSig, &term0, &term1 );
297115144b0fSOlivier Houchard         sub128( aSig, 0, term0, term1, &rem0, &rem1 );
297215144b0fSOlivier Houchard         while ( (sbits64) rem0 < 0 ) {
297315144b0fSOlivier Houchard             --zSig;
297415144b0fSOlivier Houchard             add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
297515144b0fSOlivier Houchard         }
297615144b0fSOlivier Houchard         zSig |= ( rem1 != 0 );
297715144b0fSOlivier Houchard     }
297815144b0fSOlivier Houchard     return roundAndPackFloat64( zSign, zExp, zSig );
297915144b0fSOlivier Houchard 
298015144b0fSOlivier Houchard }
298115144b0fSOlivier Houchard 
298215144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC
298315144b0fSOlivier Houchard /*
298415144b0fSOlivier Houchard -------------------------------------------------------------------------------
298515144b0fSOlivier Houchard Returns the remainder of the double-precision floating-point value `a'
298615144b0fSOlivier Houchard with respect to the corresponding value `b'.  The operation is performed
298715144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
298815144b0fSOlivier Houchard -------------------------------------------------------------------------------
298915144b0fSOlivier Houchard */
float64_rem(float64 a,float64 b)299015144b0fSOlivier Houchard float64 float64_rem( float64 a, float64 b )
299115144b0fSOlivier Houchard {
299215144b0fSOlivier Houchard     flag aSign, bSign, zSign;
299315144b0fSOlivier Houchard     int16 aExp, bExp, expDiff;
299415144b0fSOlivier Houchard     bits64 aSig, bSig;
299515144b0fSOlivier Houchard     bits64 q, alternateASig;
299615144b0fSOlivier Houchard     sbits64 sigMean;
299715144b0fSOlivier Houchard 
299815144b0fSOlivier Houchard     aSig = extractFloat64Frac( a );
299915144b0fSOlivier Houchard     aExp = extractFloat64Exp( a );
300015144b0fSOlivier Houchard     aSign = extractFloat64Sign( a );
300115144b0fSOlivier Houchard     bSig = extractFloat64Frac( b );
300215144b0fSOlivier Houchard     bExp = extractFloat64Exp( b );
300315144b0fSOlivier Houchard     bSign = extractFloat64Sign( b );
300415144b0fSOlivier Houchard     if ( aExp == 0x7FF ) {
300515144b0fSOlivier Houchard         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
300615144b0fSOlivier Houchard             return propagateFloat64NaN( a, b );
300715144b0fSOlivier Houchard         }
300815144b0fSOlivier Houchard         float_raise( float_flag_invalid );
300915144b0fSOlivier Houchard         return float64_default_nan;
301015144b0fSOlivier Houchard     }
301115144b0fSOlivier Houchard     if ( bExp == 0x7FF ) {
301215144b0fSOlivier Houchard         if ( bSig ) return propagateFloat64NaN( a, b );
301315144b0fSOlivier Houchard         return a;
301415144b0fSOlivier Houchard     }
301515144b0fSOlivier Houchard     if ( bExp == 0 ) {
301615144b0fSOlivier Houchard         if ( bSig == 0 ) {
301715144b0fSOlivier Houchard             float_raise( float_flag_invalid );
301815144b0fSOlivier Houchard             return float64_default_nan;
301915144b0fSOlivier Houchard         }
302015144b0fSOlivier Houchard         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
302115144b0fSOlivier Houchard     }
302215144b0fSOlivier Houchard     if ( aExp == 0 ) {
302315144b0fSOlivier Houchard         if ( aSig == 0 ) return a;
302415144b0fSOlivier Houchard         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
302515144b0fSOlivier Houchard     }
302615144b0fSOlivier Houchard     expDiff = aExp - bExp;
302715144b0fSOlivier Houchard     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
302815144b0fSOlivier Houchard     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
302915144b0fSOlivier Houchard     if ( expDiff < 0 ) {
303015144b0fSOlivier Houchard         if ( expDiff < -1 ) return a;
303115144b0fSOlivier Houchard         aSig >>= 1;
303215144b0fSOlivier Houchard     }
303315144b0fSOlivier Houchard     q = ( bSig <= aSig );
303415144b0fSOlivier Houchard     if ( q ) aSig -= bSig;
303515144b0fSOlivier Houchard     expDiff -= 64;
303615144b0fSOlivier Houchard     while ( 0 < expDiff ) {
303715144b0fSOlivier Houchard         q = estimateDiv128To64( aSig, 0, bSig );
303815144b0fSOlivier Houchard         q = ( 2 < q ) ? q - 2 : 0;
303915144b0fSOlivier Houchard         aSig = - ( ( bSig>>2 ) * q );
304015144b0fSOlivier Houchard         expDiff -= 62;
304115144b0fSOlivier Houchard     }
304215144b0fSOlivier Houchard     expDiff += 64;
304315144b0fSOlivier Houchard     if ( 0 < expDiff ) {
304415144b0fSOlivier Houchard         q = estimateDiv128To64( aSig, 0, bSig );
304515144b0fSOlivier Houchard         q = ( 2 < q ) ? q - 2 : 0;
304615144b0fSOlivier Houchard         q >>= 64 - expDiff;
304715144b0fSOlivier Houchard         bSig >>= 2;
304815144b0fSOlivier Houchard         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
304915144b0fSOlivier Houchard     }
305015144b0fSOlivier Houchard     else {
305115144b0fSOlivier Houchard         aSig >>= 2;
305215144b0fSOlivier Houchard         bSig >>= 2;
305315144b0fSOlivier Houchard     }
305415144b0fSOlivier Houchard     do {
305515144b0fSOlivier Houchard         alternateASig = aSig;
305615144b0fSOlivier Houchard         ++q;
305715144b0fSOlivier Houchard         aSig -= bSig;
305815144b0fSOlivier Houchard     } while ( 0 <= (sbits64) aSig );
305915144b0fSOlivier Houchard     sigMean = aSig + alternateASig;
306015144b0fSOlivier Houchard     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
306115144b0fSOlivier Houchard         aSig = alternateASig;
306215144b0fSOlivier Houchard     }
306315144b0fSOlivier Houchard     zSign = ( (sbits64) aSig < 0 );
306415144b0fSOlivier Houchard     if ( zSign ) aSig = - aSig;
306515144b0fSOlivier Houchard     return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
306615144b0fSOlivier Houchard 
306715144b0fSOlivier Houchard }
306815144b0fSOlivier Houchard 
306915144b0fSOlivier Houchard /*
307015144b0fSOlivier Houchard -------------------------------------------------------------------------------
307115144b0fSOlivier Houchard Returns the square root of the double-precision floating-point value `a'.
307215144b0fSOlivier Houchard The operation is performed according to the IEC/IEEE Standard for Binary
307315144b0fSOlivier Houchard Floating-Point Arithmetic.
307415144b0fSOlivier Houchard -------------------------------------------------------------------------------
307515144b0fSOlivier Houchard */
float64_sqrt(float64 a)307615144b0fSOlivier Houchard float64 float64_sqrt( float64 a )
307715144b0fSOlivier Houchard {
307815144b0fSOlivier Houchard     flag aSign;
307915144b0fSOlivier Houchard     int16 aExp, zExp;
308015144b0fSOlivier Houchard     bits64 aSig, zSig, doubleZSig;
308115144b0fSOlivier Houchard     bits64 rem0, rem1, term0, term1;
308215144b0fSOlivier Houchard 
308315144b0fSOlivier Houchard     aSig = extractFloat64Frac( a );
308415144b0fSOlivier Houchard     aExp = extractFloat64Exp( a );
308515144b0fSOlivier Houchard     aSign = extractFloat64Sign( a );
308615144b0fSOlivier Houchard     if ( aExp == 0x7FF ) {
308715144b0fSOlivier Houchard         if ( aSig ) return propagateFloat64NaN( a, a );
308815144b0fSOlivier Houchard         if ( ! aSign ) return a;
308915144b0fSOlivier Houchard         float_raise( float_flag_invalid );
309015144b0fSOlivier Houchard         return float64_default_nan;
309115144b0fSOlivier Houchard     }
309215144b0fSOlivier Houchard     if ( aSign ) {
309315144b0fSOlivier Houchard         if ( ( aExp | aSig ) == 0 ) return a;
309415144b0fSOlivier Houchard         float_raise( float_flag_invalid );
309515144b0fSOlivier Houchard         return float64_default_nan;
309615144b0fSOlivier Houchard     }
309715144b0fSOlivier Houchard     if ( aExp == 0 ) {
309815144b0fSOlivier Houchard         if ( aSig == 0 ) return 0;
309915144b0fSOlivier Houchard         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
310015144b0fSOlivier Houchard     }
310115144b0fSOlivier Houchard     zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
310215144b0fSOlivier Houchard     aSig |= LIT64( 0x0010000000000000 );
310315144b0fSOlivier Houchard     zSig = estimateSqrt32( aExp, aSig>>21 );
310415144b0fSOlivier Houchard     aSig <<= 9 - ( aExp & 1 );
310515144b0fSOlivier Houchard     zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
310615144b0fSOlivier Houchard     if ( ( zSig & 0x1FF ) <= 5 ) {
310715144b0fSOlivier Houchard         doubleZSig = zSig<<1;
310815144b0fSOlivier Houchard         mul64To128( zSig, zSig, &term0, &term1 );
310915144b0fSOlivier Houchard         sub128( aSig, 0, term0, term1, &rem0, &rem1 );
311015144b0fSOlivier Houchard         while ( (sbits64) rem0 < 0 ) {
311115144b0fSOlivier Houchard             --zSig;
311215144b0fSOlivier Houchard             doubleZSig -= 2;
311315144b0fSOlivier Houchard             add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
311415144b0fSOlivier Houchard         }
311515144b0fSOlivier Houchard         zSig |= ( ( rem0 | rem1 ) != 0 );
311615144b0fSOlivier Houchard     }
311715144b0fSOlivier Houchard     return roundAndPackFloat64( 0, zExp, zSig );
311815144b0fSOlivier Houchard 
311915144b0fSOlivier Houchard }
312015144b0fSOlivier Houchard #endif
312115144b0fSOlivier Houchard 
312215144b0fSOlivier Houchard /*
312315144b0fSOlivier Houchard -------------------------------------------------------------------------------
312415144b0fSOlivier Houchard Returns 1 if the double-precision floating-point value `a' is equal to the
312515144b0fSOlivier Houchard corresponding value `b', and 0 otherwise.  The comparison is performed
312615144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
312715144b0fSOlivier Houchard -------------------------------------------------------------------------------
312815144b0fSOlivier Houchard */
float64_eq(float64 a,float64 b)312915144b0fSOlivier Houchard flag float64_eq( float64 a, float64 b )
313015144b0fSOlivier Houchard {
313115144b0fSOlivier Houchard 
313215144b0fSOlivier Houchard     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
313315144b0fSOlivier Houchard          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
313415144b0fSOlivier Houchard        ) {
313515144b0fSOlivier Houchard         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
313615144b0fSOlivier Houchard             float_raise( float_flag_invalid );
313715144b0fSOlivier Houchard         }
313815144b0fSOlivier Houchard         return 0;
313915144b0fSOlivier Houchard     }
314015144b0fSOlivier Houchard     return ( a == b ) ||
314115144b0fSOlivier Houchard 	( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
314215144b0fSOlivier Houchard 
314315144b0fSOlivier Houchard }
314415144b0fSOlivier Houchard 
314515144b0fSOlivier Houchard /*
314615144b0fSOlivier Houchard -------------------------------------------------------------------------------
314715144b0fSOlivier Houchard Returns 1 if the double-precision floating-point value `a' is less than or
314815144b0fSOlivier Houchard equal to the corresponding value `b', and 0 otherwise.  The comparison is
314915144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
315015144b0fSOlivier Houchard Arithmetic.
315115144b0fSOlivier Houchard -------------------------------------------------------------------------------
315215144b0fSOlivier Houchard */
float64_le(float64 a,float64 b)315315144b0fSOlivier Houchard flag float64_le( float64 a, float64 b )
315415144b0fSOlivier Houchard {
315515144b0fSOlivier Houchard     flag aSign, bSign;
315615144b0fSOlivier Houchard 
315715144b0fSOlivier Houchard     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
315815144b0fSOlivier Houchard          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
315915144b0fSOlivier Houchard        ) {
316015144b0fSOlivier Houchard         float_raise( float_flag_invalid );
316115144b0fSOlivier Houchard         return 0;
316215144b0fSOlivier Houchard     }
316315144b0fSOlivier Houchard     aSign = extractFloat64Sign( a );
316415144b0fSOlivier Houchard     bSign = extractFloat64Sign( b );
316515144b0fSOlivier Houchard     if ( aSign != bSign )
316615144b0fSOlivier Houchard 	return aSign ||
316715144b0fSOlivier Houchard 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
316815144b0fSOlivier Houchard 	      0 );
316915144b0fSOlivier Houchard     return ( a == b ) ||
317015144b0fSOlivier Houchard 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
317115144b0fSOlivier Houchard 
317215144b0fSOlivier Houchard }
317315144b0fSOlivier Houchard 
317415144b0fSOlivier Houchard /*
317515144b0fSOlivier Houchard -------------------------------------------------------------------------------
317615144b0fSOlivier Houchard Returns 1 if the double-precision floating-point value `a' is less than
317715144b0fSOlivier Houchard the corresponding value `b', and 0 otherwise.  The comparison is performed
317815144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
317915144b0fSOlivier Houchard -------------------------------------------------------------------------------
318015144b0fSOlivier Houchard */
float64_lt(float64 a,float64 b)318115144b0fSOlivier Houchard flag float64_lt( float64 a, float64 b )
318215144b0fSOlivier Houchard {
318315144b0fSOlivier Houchard     flag aSign, bSign;
318415144b0fSOlivier Houchard 
318515144b0fSOlivier Houchard     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
318615144b0fSOlivier Houchard          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
318715144b0fSOlivier Houchard        ) {
318815144b0fSOlivier Houchard         float_raise( float_flag_invalid );
318915144b0fSOlivier Houchard         return 0;
319015144b0fSOlivier Houchard     }
319115144b0fSOlivier Houchard     aSign = extractFloat64Sign( a );
319215144b0fSOlivier Houchard     bSign = extractFloat64Sign( b );
319315144b0fSOlivier Houchard     if ( aSign != bSign )
319415144b0fSOlivier Houchard 	return aSign &&
319515144b0fSOlivier Houchard 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
319615144b0fSOlivier Houchard 	      0 );
319715144b0fSOlivier Houchard     return ( a != b ) &&
319815144b0fSOlivier Houchard 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
319915144b0fSOlivier Houchard 
320015144b0fSOlivier Houchard }
320115144b0fSOlivier Houchard 
320215144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC
320315144b0fSOlivier Houchard /*
320415144b0fSOlivier Houchard -------------------------------------------------------------------------------
320515144b0fSOlivier Houchard Returns 1 if the double-precision floating-point value `a' is equal to the
320615144b0fSOlivier Houchard corresponding value `b', and 0 otherwise.  The invalid exception is raised
320715144b0fSOlivier Houchard if either operand is a NaN.  Otherwise, the comparison is performed
320815144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
320915144b0fSOlivier Houchard -------------------------------------------------------------------------------
321015144b0fSOlivier Houchard */
float64_eq_signaling(float64 a,float64 b)321115144b0fSOlivier Houchard flag float64_eq_signaling( float64 a, float64 b )
321215144b0fSOlivier Houchard {
321315144b0fSOlivier Houchard 
321415144b0fSOlivier Houchard     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
321515144b0fSOlivier Houchard          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
321615144b0fSOlivier Houchard        ) {
321715144b0fSOlivier Houchard         float_raise( float_flag_invalid );
321815144b0fSOlivier Houchard         return 0;
321915144b0fSOlivier Houchard     }
322015144b0fSOlivier Houchard     return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
322115144b0fSOlivier Houchard 
322215144b0fSOlivier Houchard }
322315144b0fSOlivier Houchard 
322415144b0fSOlivier Houchard /*
322515144b0fSOlivier Houchard -------------------------------------------------------------------------------
322615144b0fSOlivier Houchard Returns 1 if the double-precision floating-point value `a' is less than or
322715144b0fSOlivier Houchard equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
322815144b0fSOlivier Houchard cause an exception.  Otherwise, the comparison is performed according to the
322915144b0fSOlivier Houchard IEC/IEEE Standard for Binary Floating-Point Arithmetic.
323015144b0fSOlivier Houchard -------------------------------------------------------------------------------
323115144b0fSOlivier Houchard */
float64_le_quiet(float64 a,float64 b)323215144b0fSOlivier Houchard flag float64_le_quiet( float64 a, float64 b )
323315144b0fSOlivier Houchard {
323415144b0fSOlivier Houchard     flag aSign, bSign;
323515144b0fSOlivier Houchard 
323615144b0fSOlivier Houchard     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
323715144b0fSOlivier Houchard          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
323815144b0fSOlivier Houchard        ) {
323915144b0fSOlivier Houchard         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
324015144b0fSOlivier Houchard             float_raise( float_flag_invalid );
324115144b0fSOlivier Houchard         }
324215144b0fSOlivier Houchard         return 0;
324315144b0fSOlivier Houchard     }
324415144b0fSOlivier Houchard     aSign = extractFloat64Sign( a );
324515144b0fSOlivier Houchard     bSign = extractFloat64Sign( b );
324615144b0fSOlivier Houchard     if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
324715144b0fSOlivier Houchard     return ( a == b ) || ( aSign ^ ( a < b ) );
324815144b0fSOlivier Houchard 
324915144b0fSOlivier Houchard }
325015144b0fSOlivier Houchard 
325115144b0fSOlivier Houchard /*
325215144b0fSOlivier Houchard -------------------------------------------------------------------------------
325315144b0fSOlivier Houchard Returns 1 if the double-precision floating-point value `a' is less than
325415144b0fSOlivier Houchard the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
325515144b0fSOlivier Houchard exception.  Otherwise, the comparison is performed according to the IEC/IEEE
325615144b0fSOlivier Houchard Standard for Binary Floating-Point Arithmetic.
325715144b0fSOlivier Houchard -------------------------------------------------------------------------------
325815144b0fSOlivier Houchard */
float64_lt_quiet(float64 a,float64 b)325915144b0fSOlivier Houchard flag float64_lt_quiet( float64 a, float64 b )
326015144b0fSOlivier Houchard {
326115144b0fSOlivier Houchard     flag aSign, bSign;
326215144b0fSOlivier Houchard 
326315144b0fSOlivier Houchard     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
326415144b0fSOlivier Houchard          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
326515144b0fSOlivier Houchard        ) {
326615144b0fSOlivier Houchard         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
326715144b0fSOlivier Houchard             float_raise( float_flag_invalid );
326815144b0fSOlivier Houchard         }
326915144b0fSOlivier Houchard         return 0;
327015144b0fSOlivier Houchard     }
327115144b0fSOlivier Houchard     aSign = extractFloat64Sign( a );
327215144b0fSOlivier Houchard     bSign = extractFloat64Sign( b );
327315144b0fSOlivier Houchard     if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
327415144b0fSOlivier Houchard     return ( a != b ) && ( aSign ^ ( a < b ) );
327515144b0fSOlivier Houchard 
327615144b0fSOlivier Houchard }
327715144b0fSOlivier Houchard #endif
327815144b0fSOlivier Houchard 
327915144b0fSOlivier Houchard #ifdef FLOATX80
328015144b0fSOlivier Houchard 
328115144b0fSOlivier Houchard /*
328215144b0fSOlivier Houchard -------------------------------------------------------------------------------
328315144b0fSOlivier Houchard Returns the result of converting the extended double-precision floating-
328415144b0fSOlivier Houchard point value `a' to the 32-bit two's complement integer format.  The
328515144b0fSOlivier Houchard conversion is performed according to the IEC/IEEE Standard for Binary
328615144b0fSOlivier Houchard Floating-Point Arithmetic---which means in particular that the conversion
328715144b0fSOlivier Houchard is rounded according to the current rounding mode.  If `a' is a NaN, the
328815144b0fSOlivier Houchard largest positive integer is returned.  Otherwise, if the conversion
328915144b0fSOlivier Houchard overflows, the largest integer with the same sign as `a' is returned.
329015144b0fSOlivier Houchard -------------------------------------------------------------------------------
329115144b0fSOlivier Houchard */
floatx80_to_int32(floatx80 a)329215144b0fSOlivier Houchard int32 floatx80_to_int32( floatx80 a )
329315144b0fSOlivier Houchard {
329415144b0fSOlivier Houchard     flag aSign;
329515144b0fSOlivier Houchard     int32 aExp, shiftCount;
329615144b0fSOlivier Houchard     bits64 aSig;
329715144b0fSOlivier Houchard 
329815144b0fSOlivier Houchard     aSig = extractFloatx80Frac( a );
329915144b0fSOlivier Houchard     aExp = extractFloatx80Exp( a );
330015144b0fSOlivier Houchard     aSign = extractFloatx80Sign( a );
330115144b0fSOlivier Houchard     if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
330215144b0fSOlivier Houchard     shiftCount = 0x4037 - aExp;
330315144b0fSOlivier Houchard     if ( shiftCount <= 0 ) shiftCount = 1;
330415144b0fSOlivier Houchard     shift64RightJamming( aSig, shiftCount, &aSig );
330515144b0fSOlivier Houchard     return roundAndPackInt32( aSign, aSig );
330615144b0fSOlivier Houchard 
330715144b0fSOlivier Houchard }
330815144b0fSOlivier Houchard 
330915144b0fSOlivier Houchard /*
331015144b0fSOlivier Houchard -------------------------------------------------------------------------------
331115144b0fSOlivier Houchard Returns the result of converting the extended double-precision floating-
331215144b0fSOlivier Houchard point value `a' to the 32-bit two's complement integer format.  The
331315144b0fSOlivier Houchard conversion is performed according to the IEC/IEEE Standard for Binary
331415144b0fSOlivier Houchard Floating-Point Arithmetic, except that the conversion is always rounded
331515144b0fSOlivier Houchard toward zero.  If `a' is a NaN, the largest positive integer is returned.
331615144b0fSOlivier Houchard Otherwise, if the conversion overflows, the largest integer with the same
331715144b0fSOlivier Houchard sign as `a' is returned.
331815144b0fSOlivier Houchard -------------------------------------------------------------------------------
331915144b0fSOlivier Houchard */
floatx80_to_int32_round_to_zero(floatx80 a)332015144b0fSOlivier Houchard int32 floatx80_to_int32_round_to_zero( floatx80 a )
332115144b0fSOlivier Houchard {
332215144b0fSOlivier Houchard     flag aSign;
332315144b0fSOlivier Houchard     int32 aExp, shiftCount;
332415144b0fSOlivier Houchard     bits64 aSig, savedASig;
332515144b0fSOlivier Houchard     int32 z;
332615144b0fSOlivier Houchard 
332715144b0fSOlivier Houchard     aSig = extractFloatx80Frac( a );
332815144b0fSOlivier Houchard     aExp = extractFloatx80Exp( a );
332915144b0fSOlivier Houchard     aSign = extractFloatx80Sign( a );
333015144b0fSOlivier Houchard     if ( 0x401E < aExp ) {
333115144b0fSOlivier Houchard         if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
333215144b0fSOlivier Houchard         goto invalid;
333315144b0fSOlivier Houchard     }
333415144b0fSOlivier Houchard     else if ( aExp < 0x3FFF ) {
333515144b0fSOlivier Houchard         if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
333615144b0fSOlivier Houchard         return 0;
333715144b0fSOlivier Houchard     }
333815144b0fSOlivier Houchard     shiftCount = 0x403E - aExp;
333915144b0fSOlivier Houchard     savedASig = aSig;
334015144b0fSOlivier Houchard     aSig >>= shiftCount;
334115144b0fSOlivier Houchard     z = aSig;
334215144b0fSOlivier Houchard     if ( aSign ) z = - z;
334315144b0fSOlivier Houchard     if ( ( z < 0 ) ^ aSign ) {
334415144b0fSOlivier Houchard  invalid:
334515144b0fSOlivier Houchard         float_raise( float_flag_invalid );
334615144b0fSOlivier Houchard         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
334715144b0fSOlivier Houchard     }
334815144b0fSOlivier Houchard     if ( ( aSig<<shiftCount ) != savedASig ) {
334915144b0fSOlivier Houchard         float_exception_flags |= float_flag_inexact;
335015144b0fSOlivier Houchard     }
335115144b0fSOlivier Houchard     return z;
335215144b0fSOlivier Houchard 
335315144b0fSOlivier Houchard }
335415144b0fSOlivier Houchard 
335515144b0fSOlivier Houchard /*
335615144b0fSOlivier Houchard -------------------------------------------------------------------------------
335715144b0fSOlivier Houchard Returns the result of converting the extended double-precision floating-
335815144b0fSOlivier Houchard point value `a' to the 64-bit two's complement integer format.  The
335915144b0fSOlivier Houchard conversion is performed according to the IEC/IEEE Standard for Binary
336015144b0fSOlivier Houchard Floating-Point Arithmetic---which means in particular that the conversion
336115144b0fSOlivier Houchard is rounded according to the current rounding mode.  If `a' is a NaN,
336215144b0fSOlivier Houchard the largest positive integer is returned.  Otherwise, if the conversion
336315144b0fSOlivier Houchard overflows, the largest integer with the same sign as `a' is returned.
336415144b0fSOlivier Houchard -------------------------------------------------------------------------------
336515144b0fSOlivier Houchard */
floatx80_to_int64(floatx80 a)336615144b0fSOlivier Houchard int64 floatx80_to_int64( floatx80 a )
336715144b0fSOlivier Houchard {
336815144b0fSOlivier Houchard     flag aSign;
336915144b0fSOlivier Houchard     int32 aExp, shiftCount;
337015144b0fSOlivier Houchard     bits64 aSig, aSigExtra;
337115144b0fSOlivier Houchard 
337215144b0fSOlivier Houchard     aSig = extractFloatx80Frac( a );
337315144b0fSOlivier Houchard     aExp = extractFloatx80Exp( a );
337415144b0fSOlivier Houchard     aSign = extractFloatx80Sign( a );
337515144b0fSOlivier Houchard     shiftCount = 0x403E - aExp;
337615144b0fSOlivier Houchard     if ( shiftCount <= 0 ) {
337715144b0fSOlivier Houchard         if ( shiftCount ) {
337815144b0fSOlivier Houchard             float_raise( float_flag_invalid );
337915144b0fSOlivier Houchard             if (    ! aSign
338015144b0fSOlivier Houchard                  || (    ( aExp == 0x7FFF )
338115144b0fSOlivier Houchard                       && ( aSig != LIT64( 0x8000000000000000 ) ) )
338215144b0fSOlivier Houchard                ) {
338315144b0fSOlivier Houchard                 return LIT64( 0x7FFFFFFFFFFFFFFF );
338415144b0fSOlivier Houchard             }
338515144b0fSOlivier Houchard             return (sbits64) LIT64( 0x8000000000000000 );
338615144b0fSOlivier Houchard         }
338715144b0fSOlivier Houchard         aSigExtra = 0;
338815144b0fSOlivier Houchard     }
338915144b0fSOlivier Houchard     else {
339015144b0fSOlivier Houchard         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
339115144b0fSOlivier Houchard     }
339215144b0fSOlivier Houchard     return roundAndPackInt64( aSign, aSig, aSigExtra );
339315144b0fSOlivier Houchard 
339415144b0fSOlivier Houchard }
339515144b0fSOlivier Houchard 
339615144b0fSOlivier Houchard /*
339715144b0fSOlivier Houchard -------------------------------------------------------------------------------
339815144b0fSOlivier Houchard Returns the result of converting the extended double-precision floating-
339915144b0fSOlivier Houchard point value `a' to the 64-bit two's complement integer format.  The
340015144b0fSOlivier Houchard conversion is performed according to the IEC/IEEE Standard for Binary
340115144b0fSOlivier Houchard Floating-Point Arithmetic, except that the conversion is always rounded
340215144b0fSOlivier Houchard toward zero.  If `a' is a NaN, the largest positive integer is returned.
340315144b0fSOlivier Houchard Otherwise, if the conversion overflows, the largest integer with the same
340415144b0fSOlivier Houchard sign as `a' is returned.
340515144b0fSOlivier Houchard -------------------------------------------------------------------------------
340615144b0fSOlivier Houchard */
floatx80_to_int64_round_to_zero(floatx80 a)340715144b0fSOlivier Houchard int64 floatx80_to_int64_round_to_zero( floatx80 a )
340815144b0fSOlivier Houchard {
340915144b0fSOlivier Houchard     flag aSign;
341015144b0fSOlivier Houchard     int32 aExp, shiftCount;
341115144b0fSOlivier Houchard     bits64 aSig;
341215144b0fSOlivier Houchard     int64 z;
341315144b0fSOlivier Houchard 
341415144b0fSOlivier Houchard     aSig = extractFloatx80Frac( a );
341515144b0fSOlivier Houchard     aExp = extractFloatx80Exp( a );
341615144b0fSOlivier Houchard     aSign = extractFloatx80Sign( a );
341715144b0fSOlivier Houchard     shiftCount = aExp - 0x403E;
341815144b0fSOlivier Houchard     if ( 0 <= shiftCount ) {
341915144b0fSOlivier Houchard         aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
342015144b0fSOlivier Houchard         if ( ( a.high != 0xC03E ) || aSig ) {
342115144b0fSOlivier Houchard             float_raise( float_flag_invalid );
342215144b0fSOlivier Houchard             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
342315144b0fSOlivier Houchard                 return LIT64( 0x7FFFFFFFFFFFFFFF );
342415144b0fSOlivier Houchard             }
342515144b0fSOlivier Houchard         }
342615144b0fSOlivier Houchard         return (sbits64) LIT64( 0x8000000000000000 );
342715144b0fSOlivier Houchard     }
342815144b0fSOlivier Houchard     else if ( aExp < 0x3FFF ) {
342915144b0fSOlivier Houchard         if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
343015144b0fSOlivier Houchard         return 0;
343115144b0fSOlivier Houchard     }
343215144b0fSOlivier Houchard     z = aSig>>( - shiftCount );
343315144b0fSOlivier Houchard     if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
343415144b0fSOlivier Houchard         float_exception_flags |= float_flag_inexact;
343515144b0fSOlivier Houchard     }
343615144b0fSOlivier Houchard     if ( aSign ) z = - z;
343715144b0fSOlivier Houchard     return z;
343815144b0fSOlivier Houchard 
343915144b0fSOlivier Houchard }
344015144b0fSOlivier Houchard 
344115144b0fSOlivier Houchard /*
344215144b0fSOlivier Houchard -------------------------------------------------------------------------------
344315144b0fSOlivier Houchard Returns the result of converting the extended double-precision floating-
344415144b0fSOlivier Houchard point value `a' to the single-precision floating-point format.  The
344515144b0fSOlivier Houchard conversion is performed according to the IEC/IEEE Standard for Binary
344615144b0fSOlivier Houchard Floating-Point Arithmetic.
344715144b0fSOlivier Houchard -------------------------------------------------------------------------------
344815144b0fSOlivier Houchard */
floatx80_to_float32(floatx80 a)344915144b0fSOlivier Houchard float32 floatx80_to_float32( floatx80 a )
345015144b0fSOlivier Houchard {
345115144b0fSOlivier Houchard     flag aSign;
345215144b0fSOlivier Houchard     int32 aExp;
345315144b0fSOlivier Houchard     bits64 aSig;
345415144b0fSOlivier Houchard 
345515144b0fSOlivier Houchard     aSig = extractFloatx80Frac( a );
345615144b0fSOlivier Houchard     aExp = extractFloatx80Exp( a );
345715144b0fSOlivier Houchard     aSign = extractFloatx80Sign( a );
345815144b0fSOlivier Houchard     if ( aExp == 0x7FFF ) {
345915144b0fSOlivier Houchard         if ( (bits64) ( aSig<<1 ) ) {
346015144b0fSOlivier Houchard             return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
346115144b0fSOlivier Houchard         }
346215144b0fSOlivier Houchard         return packFloat32( aSign, 0xFF, 0 );
346315144b0fSOlivier Houchard     }
346415144b0fSOlivier Houchard     shift64RightJamming( aSig, 33, &aSig );
346515144b0fSOlivier Houchard     if ( aExp || aSig ) aExp -= 0x3F81;
346615144b0fSOlivier Houchard     return roundAndPackFloat32( aSign, aExp, aSig );
346715144b0fSOlivier Houchard 
346815144b0fSOlivier Houchard }
346915144b0fSOlivier Houchard 
347015144b0fSOlivier Houchard /*
347115144b0fSOlivier Houchard -------------------------------------------------------------------------------
347215144b0fSOlivier Houchard Returns the result of converting the extended double-precision floating-
347315144b0fSOlivier Houchard point value `a' to the double-precision floating-point format.  The
347415144b0fSOlivier Houchard conversion is performed according to the IEC/IEEE Standard for Binary
347515144b0fSOlivier Houchard Floating-Point Arithmetic.
347615144b0fSOlivier Houchard -------------------------------------------------------------------------------
347715144b0fSOlivier Houchard */
floatx80_to_float64(floatx80 a)347815144b0fSOlivier Houchard float64 floatx80_to_float64( floatx80 a )
347915144b0fSOlivier Houchard {
348015144b0fSOlivier Houchard     flag aSign;
348115144b0fSOlivier Houchard     int32 aExp;
348215144b0fSOlivier Houchard     bits64 aSig, zSig;
348315144b0fSOlivier Houchard 
348415144b0fSOlivier Houchard     aSig = extractFloatx80Frac( a );
348515144b0fSOlivier Houchard     aExp = extractFloatx80Exp( a );
348615144b0fSOlivier Houchard     aSign = extractFloatx80Sign( a );
348715144b0fSOlivier Houchard     if ( aExp == 0x7FFF ) {
348815144b0fSOlivier Houchard         if ( (bits64) ( aSig<<1 ) ) {
348915144b0fSOlivier Houchard             return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
349015144b0fSOlivier Houchard         }
349115144b0fSOlivier Houchard         return packFloat64( aSign, 0x7FF, 0 );
349215144b0fSOlivier Houchard     }
349315144b0fSOlivier Houchard     shift64RightJamming( aSig, 1, &zSig );
349415144b0fSOlivier Houchard     if ( aExp || aSig ) aExp -= 0x3C01;
349515144b0fSOlivier Houchard     return roundAndPackFloat64( aSign, aExp, zSig );
349615144b0fSOlivier Houchard 
349715144b0fSOlivier Houchard }
349815144b0fSOlivier Houchard 
349915144b0fSOlivier Houchard #ifdef FLOAT128
350015144b0fSOlivier Houchard 
350115144b0fSOlivier Houchard /*
350215144b0fSOlivier Houchard -------------------------------------------------------------------------------
350315144b0fSOlivier Houchard Returns the result of converting the extended double-precision floating-
350415144b0fSOlivier Houchard point value `a' to the quadruple-precision floating-point format.  The
350515144b0fSOlivier Houchard conversion is performed according to the IEC/IEEE Standard for Binary
350615144b0fSOlivier Houchard Floating-Point Arithmetic.
350715144b0fSOlivier Houchard -------------------------------------------------------------------------------
350815144b0fSOlivier Houchard */
floatx80_to_float128(floatx80 a)350915144b0fSOlivier Houchard float128 floatx80_to_float128( floatx80 a )
351015144b0fSOlivier Houchard {
351115144b0fSOlivier Houchard     flag aSign;
351215144b0fSOlivier Houchard     int16 aExp;
351315144b0fSOlivier Houchard     bits64 aSig, zSig0, zSig1;
351415144b0fSOlivier Houchard 
351515144b0fSOlivier Houchard     aSig = extractFloatx80Frac( a );
351615144b0fSOlivier Houchard     aExp = extractFloatx80Exp( a );
351715144b0fSOlivier Houchard     aSign = extractFloatx80Sign( a );
351815144b0fSOlivier Houchard     if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
351915144b0fSOlivier Houchard         return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
352015144b0fSOlivier Houchard     }
352115144b0fSOlivier Houchard     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
352215144b0fSOlivier Houchard     return packFloat128( aSign, aExp, zSig0, zSig1 );
352315144b0fSOlivier Houchard 
352415144b0fSOlivier Houchard }
352515144b0fSOlivier Houchard 
352615144b0fSOlivier Houchard #endif
352715144b0fSOlivier Houchard 
352815144b0fSOlivier Houchard /*
352915144b0fSOlivier Houchard -------------------------------------------------------------------------------
353015144b0fSOlivier Houchard Rounds the extended double-precision floating-point value `a' to an integer,
353115144b0fSOlivier Houchard and returns the result as an extended quadruple-precision floating-point
353215144b0fSOlivier Houchard value.  The operation is performed according to the IEC/IEEE Standard for
353315144b0fSOlivier Houchard Binary Floating-Point Arithmetic.
353415144b0fSOlivier Houchard -------------------------------------------------------------------------------
353515144b0fSOlivier Houchard */
floatx80_round_to_int(floatx80 a)353615144b0fSOlivier Houchard floatx80 floatx80_round_to_int( floatx80 a )
353715144b0fSOlivier Houchard {
353815144b0fSOlivier Houchard     flag aSign;
353915144b0fSOlivier Houchard     int32 aExp;
354015144b0fSOlivier Houchard     bits64 lastBitMask, roundBitsMask;
354115144b0fSOlivier Houchard     int8 roundingMode;
354215144b0fSOlivier Houchard     floatx80 z;
354315144b0fSOlivier Houchard 
354415144b0fSOlivier Houchard     aExp = extractFloatx80Exp( a );
354515144b0fSOlivier Houchard     if ( 0x403E <= aExp ) {
354615144b0fSOlivier Houchard         if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
354715144b0fSOlivier Houchard             return propagateFloatx80NaN( a, a );
354815144b0fSOlivier Houchard         }
354915144b0fSOlivier Houchard         return a;
355015144b0fSOlivier Houchard     }
355115144b0fSOlivier Houchard     if ( aExp < 0x3FFF ) {
355215144b0fSOlivier Houchard         if (    ( aExp == 0 )
355315144b0fSOlivier Houchard              && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
355415144b0fSOlivier Houchard             return a;
355515144b0fSOlivier Houchard         }
355615144b0fSOlivier Houchard         float_exception_flags |= float_flag_inexact;
355715144b0fSOlivier Houchard         aSign = extractFloatx80Sign( a );
355815144b0fSOlivier Houchard         switch ( float_rounding_mode ) {
355915144b0fSOlivier Houchard          case float_round_nearest_even:
356015144b0fSOlivier Houchard             if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
356115144b0fSOlivier Houchard                ) {
356215144b0fSOlivier Houchard                 return
356315144b0fSOlivier Houchard                     packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
356415144b0fSOlivier Houchard             }
356515144b0fSOlivier Houchard             break;
356615144b0fSOlivier Houchard 	 case float_round_to_zero:
356715144b0fSOlivier Houchard 	    break;
356815144b0fSOlivier Houchard          case float_round_down:
356915144b0fSOlivier Houchard             return
357015144b0fSOlivier Houchard                   aSign ?
357115144b0fSOlivier Houchard                       packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
357215144b0fSOlivier Houchard                 : packFloatx80( 0, 0, 0 );
357315144b0fSOlivier Houchard          case float_round_up:
357415144b0fSOlivier Houchard             return
357515144b0fSOlivier Houchard                   aSign ? packFloatx80( 1, 0, 0 )
357615144b0fSOlivier Houchard                 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
357715144b0fSOlivier Houchard         }
357815144b0fSOlivier Houchard         return packFloatx80( aSign, 0, 0 );
357915144b0fSOlivier Houchard     }
358015144b0fSOlivier Houchard     lastBitMask = 1;
358115144b0fSOlivier Houchard     lastBitMask <<= 0x403E - aExp;
358215144b0fSOlivier Houchard     roundBitsMask = lastBitMask - 1;
358315144b0fSOlivier Houchard     z = a;
358415144b0fSOlivier Houchard     roundingMode = float_rounding_mode;
358515144b0fSOlivier Houchard     if ( roundingMode == float_round_nearest_even ) {
358615144b0fSOlivier Houchard         z.low += lastBitMask>>1;
358715144b0fSOlivier Houchard         if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
358815144b0fSOlivier Houchard     }
358915144b0fSOlivier Houchard     else if ( roundingMode != float_round_to_zero ) {
359015144b0fSOlivier Houchard         if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
359115144b0fSOlivier Houchard             z.low += roundBitsMask;
359215144b0fSOlivier Houchard         }
359315144b0fSOlivier Houchard     }
359415144b0fSOlivier Houchard     z.low &= ~ roundBitsMask;
359515144b0fSOlivier Houchard     if ( z.low == 0 ) {
359615144b0fSOlivier Houchard         ++z.high;
359715144b0fSOlivier Houchard         z.low = LIT64( 0x8000000000000000 );
359815144b0fSOlivier Houchard     }
359915144b0fSOlivier Houchard     if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;
360015144b0fSOlivier Houchard     return z;
360115144b0fSOlivier Houchard 
360215144b0fSOlivier Houchard }
360315144b0fSOlivier Houchard 
360415144b0fSOlivier Houchard /*
360515144b0fSOlivier Houchard -------------------------------------------------------------------------------
360615144b0fSOlivier Houchard Returns the result of adding the absolute values of the extended double-
360715144b0fSOlivier Houchard precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
360815144b0fSOlivier Houchard negated before being returned.  `zSign' is ignored if the result is a NaN.
360915144b0fSOlivier Houchard The addition is performed according to the IEC/IEEE Standard for Binary
361015144b0fSOlivier Houchard Floating-Point Arithmetic.
361115144b0fSOlivier Houchard -------------------------------------------------------------------------------
361215144b0fSOlivier Houchard */
addFloatx80Sigs(floatx80 a,floatx80 b,flag zSign)361315144b0fSOlivier Houchard static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
361415144b0fSOlivier Houchard {
361515144b0fSOlivier Houchard     int32 aExp, bExp, zExp;
361615144b0fSOlivier Houchard     bits64 aSig, bSig, zSig0, zSig1;
361715144b0fSOlivier Houchard     int32 expDiff;
361815144b0fSOlivier Houchard 
361915144b0fSOlivier Houchard     aSig = extractFloatx80Frac( a );
362015144b0fSOlivier Houchard     aExp = extractFloatx80Exp( a );
362115144b0fSOlivier Houchard     bSig = extractFloatx80Frac( b );
362215144b0fSOlivier Houchard     bExp = extractFloatx80Exp( b );
362315144b0fSOlivier Houchard     expDiff = aExp - bExp;
362415144b0fSOlivier Houchard     if ( 0 < expDiff ) {
362515144b0fSOlivier Houchard         if ( aExp == 0x7FFF ) {
362615144b0fSOlivier Houchard             if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
362715144b0fSOlivier Houchard             return a;
362815144b0fSOlivier Houchard         }
362915144b0fSOlivier Houchard         if ( bExp == 0 ) --expDiff;
363015144b0fSOlivier Houchard         shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
363115144b0fSOlivier Houchard         zExp = aExp;
363215144b0fSOlivier Houchard     }
363315144b0fSOlivier Houchard     else if ( expDiff < 0 ) {
363415144b0fSOlivier Houchard         if ( bExp == 0x7FFF ) {
363515144b0fSOlivier Houchard             if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
363615144b0fSOlivier Houchard             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
363715144b0fSOlivier Houchard         }
363815144b0fSOlivier Houchard         if ( aExp == 0 ) ++expDiff;
363915144b0fSOlivier Houchard         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
364015144b0fSOlivier Houchard         zExp = bExp;
364115144b0fSOlivier Houchard     }
364215144b0fSOlivier Houchard     else {
364315144b0fSOlivier Houchard         if ( aExp == 0x7FFF ) {
364415144b0fSOlivier Houchard             if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
364515144b0fSOlivier Houchard                 return propagateFloatx80NaN( a, b );
364615144b0fSOlivier Houchard             }
364715144b0fSOlivier Houchard             return a;
364815144b0fSOlivier Houchard         }
364915144b0fSOlivier Houchard         zSig1 = 0;
365015144b0fSOlivier Houchard         zSig0 = aSig + bSig;
365115144b0fSOlivier Houchard         if ( aExp == 0 ) {
365215144b0fSOlivier Houchard             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
365315144b0fSOlivier Houchard             goto roundAndPack;
365415144b0fSOlivier Houchard         }
365515144b0fSOlivier Houchard         zExp = aExp;
365615144b0fSOlivier Houchard         goto shiftRight1;
365715144b0fSOlivier Houchard     }
365815144b0fSOlivier Houchard     zSig0 = aSig + bSig;
365915144b0fSOlivier Houchard     if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
366015144b0fSOlivier Houchard  shiftRight1:
366115144b0fSOlivier Houchard     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
366215144b0fSOlivier Houchard     zSig0 |= LIT64( 0x8000000000000000 );
366315144b0fSOlivier Houchard     ++zExp;
366415144b0fSOlivier Houchard  roundAndPack:
366515144b0fSOlivier Houchard     return
366615144b0fSOlivier Houchard         roundAndPackFloatx80(
366715144b0fSOlivier Houchard             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
366815144b0fSOlivier Houchard 
366915144b0fSOlivier Houchard }
367015144b0fSOlivier Houchard 
367115144b0fSOlivier Houchard /*
367215144b0fSOlivier Houchard -------------------------------------------------------------------------------
367315144b0fSOlivier Houchard Returns the result of subtracting the absolute values of the extended
367415144b0fSOlivier Houchard double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
367515144b0fSOlivier Houchard difference is negated before being returned.  `zSign' is ignored if the
367615144b0fSOlivier Houchard result is a NaN.  The subtraction is performed according to the IEC/IEEE
367715144b0fSOlivier Houchard Standard for Binary Floating-Point Arithmetic.
367815144b0fSOlivier Houchard -------------------------------------------------------------------------------
367915144b0fSOlivier Houchard */
subFloatx80Sigs(floatx80 a,floatx80 b,flag zSign)368015144b0fSOlivier Houchard static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
368115144b0fSOlivier Houchard {
368215144b0fSOlivier Houchard     int32 aExp, bExp, zExp;
368315144b0fSOlivier Houchard     bits64 aSig, bSig, zSig0, zSig1;
368415144b0fSOlivier Houchard     int32 expDiff;
368515144b0fSOlivier Houchard     floatx80 z;
368615144b0fSOlivier Houchard 
368715144b0fSOlivier Houchard     aSig = extractFloatx80Frac( a );
368815144b0fSOlivier Houchard     aExp = extractFloatx80Exp( a );
368915144b0fSOlivier Houchard     bSig = extractFloatx80Frac( b );
369015144b0fSOlivier Houchard     bExp = extractFloatx80Exp( b );
369115144b0fSOlivier Houchard     expDiff = aExp - bExp;
369215144b0fSOlivier Houchard     if ( 0 < expDiff ) goto aExpBigger;
369315144b0fSOlivier Houchard     if ( expDiff < 0 ) goto bExpBigger;
369415144b0fSOlivier Houchard     if ( aExp == 0x7FFF ) {
369515144b0fSOlivier Houchard         if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
369615144b0fSOlivier Houchard             return propagateFloatx80NaN( a, b );
369715144b0fSOlivier Houchard         }
369815144b0fSOlivier Houchard         float_raise( float_flag_invalid );
369915144b0fSOlivier Houchard         z.low = floatx80_default_nan_low;
370015144b0fSOlivier Houchard         z.high = floatx80_default_nan_high;
370115144b0fSOlivier Houchard         return z;
370215144b0fSOlivier Houchard     }
370315144b0fSOlivier Houchard     if ( aExp == 0 ) {
370415144b0fSOlivier Houchard         aExp = 1;
370515144b0fSOlivier Houchard         bExp = 1;
370615144b0fSOlivier Houchard     }
370715144b0fSOlivier Houchard     zSig1 = 0;
370815144b0fSOlivier Houchard     if ( bSig < aSig ) goto aBigger;
370915144b0fSOlivier Houchard     if ( aSig < bSig ) goto bBigger;
371015144b0fSOlivier Houchard     return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
371115144b0fSOlivier Houchard  bExpBigger:
371215144b0fSOlivier Houchard     if ( bExp == 0x7FFF ) {
371315144b0fSOlivier Houchard         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
371415144b0fSOlivier Houchard         return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
371515144b0fSOlivier Houchard     }
371615144b0fSOlivier Houchard     if ( aExp == 0 ) ++expDiff;
371715144b0fSOlivier Houchard     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
371815144b0fSOlivier Houchard  bBigger:
371915144b0fSOlivier Houchard     sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
372015144b0fSOlivier Houchard     zExp = bExp;
372115144b0fSOlivier Houchard     zSign ^= 1;
372215144b0fSOlivier Houchard     goto normalizeRoundAndPack;
372315144b0fSOlivier Houchard  aExpBigger:
372415144b0fSOlivier Houchard     if ( aExp == 0x7FFF ) {
372515144b0fSOlivier Houchard         if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
372615144b0fSOlivier Houchard         return a;
372715144b0fSOlivier Houchard     }
372815144b0fSOlivier Houchard     if ( bExp == 0 ) --expDiff;
372915144b0fSOlivier Houchard     shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
373015144b0fSOlivier Houchard  aBigger:
373115144b0fSOlivier Houchard     sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
373215144b0fSOlivier Houchard     zExp = aExp;
373315144b0fSOlivier Houchard  normalizeRoundAndPack:
373415144b0fSOlivier Houchard     return
373515144b0fSOlivier Houchard         normalizeRoundAndPackFloatx80(
373615144b0fSOlivier Houchard             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
373715144b0fSOlivier Houchard 
373815144b0fSOlivier Houchard }
373915144b0fSOlivier Houchard 
374015144b0fSOlivier Houchard /*
374115144b0fSOlivier Houchard -------------------------------------------------------------------------------
374215144b0fSOlivier Houchard Returns the result of adding the extended double-precision floating-point
374315144b0fSOlivier Houchard values `a' and `b'.  The operation is performed according to the IEC/IEEE
374415144b0fSOlivier Houchard Standard for Binary Floating-Point Arithmetic.
374515144b0fSOlivier Houchard -------------------------------------------------------------------------------
374615144b0fSOlivier Houchard */
floatx80_add(floatx80 a,floatx80 b)374715144b0fSOlivier Houchard floatx80 floatx80_add( floatx80 a, floatx80 b )
374815144b0fSOlivier Houchard {
374915144b0fSOlivier Houchard     flag aSign, bSign;
375015144b0fSOlivier Houchard 
375115144b0fSOlivier Houchard     aSign = extractFloatx80Sign( a );
375215144b0fSOlivier Houchard     bSign = extractFloatx80Sign( b );
375315144b0fSOlivier Houchard     if ( aSign == bSign ) {
375415144b0fSOlivier Houchard         return addFloatx80Sigs( a, b, aSign );
375515144b0fSOlivier Houchard     }
375615144b0fSOlivier Houchard     else {
375715144b0fSOlivier Houchard         return subFloatx80Sigs( a, b, aSign );
375815144b0fSOlivier Houchard     }
375915144b0fSOlivier Houchard 
376015144b0fSOlivier Houchard }
376115144b0fSOlivier Houchard 
376215144b0fSOlivier Houchard /*
376315144b0fSOlivier Houchard -------------------------------------------------------------------------------
376415144b0fSOlivier Houchard Returns the result of subtracting the extended double-precision floating-
376515144b0fSOlivier Houchard point values `a' and `b'.  The operation is performed according to the
376615144b0fSOlivier Houchard IEC/IEEE Standard for Binary Floating-Point Arithmetic.
376715144b0fSOlivier Houchard -------------------------------------------------------------------------------
376815144b0fSOlivier Houchard */
floatx80_sub(floatx80 a,floatx80 b)376915144b0fSOlivier Houchard floatx80 floatx80_sub( floatx80 a, floatx80 b )
377015144b0fSOlivier Houchard {
377115144b0fSOlivier Houchard     flag aSign, bSign;
377215144b0fSOlivier Houchard 
377315144b0fSOlivier Houchard     aSign = extractFloatx80Sign( a );
377415144b0fSOlivier Houchard     bSign = extractFloatx80Sign( b );
377515144b0fSOlivier Houchard     if ( aSign == bSign ) {
377615144b0fSOlivier Houchard         return subFloatx80Sigs( a, b, aSign );
377715144b0fSOlivier Houchard     }
377815144b0fSOlivier Houchard     else {
377915144b0fSOlivier Houchard         return addFloatx80Sigs( a, b, aSign );
378015144b0fSOlivier Houchard     }
378115144b0fSOlivier Houchard 
378215144b0fSOlivier Houchard }
378315144b0fSOlivier Houchard 
378415144b0fSOlivier Houchard /*
378515144b0fSOlivier Houchard -------------------------------------------------------------------------------
378615144b0fSOlivier Houchard Returns the result of multiplying the extended double-precision floating-
378715144b0fSOlivier Houchard point values `a' and `b'.  The operation is performed according to the
378815144b0fSOlivier Houchard IEC/IEEE Standard for Binary Floating-Point Arithmetic.
378915144b0fSOlivier Houchard -------------------------------------------------------------------------------
379015144b0fSOlivier Houchard */
floatx80_mul(floatx80 a,floatx80 b)379115144b0fSOlivier Houchard floatx80 floatx80_mul( floatx80 a, floatx80 b )
379215144b0fSOlivier Houchard {
379315144b0fSOlivier Houchard     flag aSign, bSign, zSign;
379415144b0fSOlivier Houchard     int32 aExp, bExp, zExp;
379515144b0fSOlivier Houchard     bits64 aSig, bSig, zSig0, zSig1;
379615144b0fSOlivier Houchard     floatx80 z;
379715144b0fSOlivier Houchard 
379815144b0fSOlivier Houchard     aSig = extractFloatx80Frac( a );
379915144b0fSOlivier Houchard     aExp = extractFloatx80Exp( a );
380015144b0fSOlivier Houchard     aSign = extractFloatx80Sign( a );
380115144b0fSOlivier Houchard     bSig = extractFloatx80Frac( b );
380215144b0fSOlivier Houchard     bExp = extractFloatx80Exp( b );
380315144b0fSOlivier Houchard     bSign = extractFloatx80Sign( b );
380415144b0fSOlivier Houchard     zSign = aSign ^ bSign;
380515144b0fSOlivier Houchard     if ( aExp == 0x7FFF ) {
380615144b0fSOlivier Houchard         if (    (bits64) ( aSig<<1 )
380715144b0fSOlivier Houchard              || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
380815144b0fSOlivier Houchard             return propagateFloatx80NaN( a, b );
380915144b0fSOlivier Houchard         }
381015144b0fSOlivier Houchard         if ( ( bExp | bSig ) == 0 ) goto invalid;
381115144b0fSOlivier Houchard         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
381215144b0fSOlivier Houchard     }
381315144b0fSOlivier Houchard     if ( bExp == 0x7FFF ) {
381415144b0fSOlivier Houchard         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
381515144b0fSOlivier Houchard         if ( ( aExp | aSig ) == 0 ) {
381615144b0fSOlivier Houchard  invalid:
381715144b0fSOlivier Houchard             float_raise( float_flag_invalid );
381815144b0fSOlivier Houchard             z.low = floatx80_default_nan_low;
381915144b0fSOlivier Houchard             z.high = floatx80_default_nan_high;
382015144b0fSOlivier Houchard             return z;
382115144b0fSOlivier Houchard         }
382215144b0fSOlivier Houchard         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
382315144b0fSOlivier Houchard     }
382415144b0fSOlivier Houchard     if ( aExp == 0 ) {
382515144b0fSOlivier Houchard         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
382615144b0fSOlivier Houchard         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
382715144b0fSOlivier Houchard     }
382815144b0fSOlivier Houchard     if ( bExp == 0 ) {
382915144b0fSOlivier Houchard         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
383015144b0fSOlivier Houchard         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
383115144b0fSOlivier Houchard     }
383215144b0fSOlivier Houchard     zExp = aExp + bExp - 0x3FFE;
383315144b0fSOlivier Houchard     mul64To128( aSig, bSig, &zSig0, &zSig1 );
383415144b0fSOlivier Houchard     if ( 0 < (sbits64) zSig0 ) {
383515144b0fSOlivier Houchard         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
383615144b0fSOlivier Houchard         --zExp;
383715144b0fSOlivier Houchard     }
383815144b0fSOlivier Houchard     return
383915144b0fSOlivier Houchard         roundAndPackFloatx80(
384015144b0fSOlivier Houchard             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
384115144b0fSOlivier Houchard 
384215144b0fSOlivier Houchard }
384315144b0fSOlivier Houchard 
384415144b0fSOlivier Houchard /*
384515144b0fSOlivier Houchard -------------------------------------------------------------------------------
384615144b0fSOlivier Houchard Returns the result of dividing the extended double-precision floating-point
384715144b0fSOlivier Houchard value `a' by the corresponding value `b'.  The operation is performed
384815144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
384915144b0fSOlivier Houchard -------------------------------------------------------------------------------
385015144b0fSOlivier Houchard */
floatx80_div(floatx80 a,floatx80 b)385115144b0fSOlivier Houchard floatx80 floatx80_div( floatx80 a, floatx80 b )
385215144b0fSOlivier Houchard {
385315144b0fSOlivier Houchard     flag aSign, bSign, zSign;
385415144b0fSOlivier Houchard     int32 aExp, bExp, zExp;
385515144b0fSOlivier Houchard     bits64 aSig, bSig, zSig0, zSig1;
385615144b0fSOlivier Houchard     bits64 rem0, rem1, rem2, term0, term1, term2;
385715144b0fSOlivier Houchard     floatx80 z;
385815144b0fSOlivier Houchard 
385915144b0fSOlivier Houchard     aSig = extractFloatx80Frac( a );
386015144b0fSOlivier Houchard     aExp = extractFloatx80Exp( a );
386115144b0fSOlivier Houchard     aSign = extractFloatx80Sign( a );
386215144b0fSOlivier Houchard     bSig = extractFloatx80Frac( b );
386315144b0fSOlivier Houchard     bExp = extractFloatx80Exp( b );
386415144b0fSOlivier Houchard     bSign = extractFloatx80Sign( b );
386515144b0fSOlivier Houchard     zSign = aSign ^ bSign;
386615144b0fSOlivier Houchard     if ( aExp == 0x7FFF ) {
386715144b0fSOlivier Houchard         if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
386815144b0fSOlivier Houchard         if ( bExp == 0x7FFF ) {
386915144b0fSOlivier Houchard             if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
387015144b0fSOlivier Houchard             goto invalid;
387115144b0fSOlivier Houchard         }
387215144b0fSOlivier Houchard         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
387315144b0fSOlivier Houchard     }
387415144b0fSOlivier Houchard     if ( bExp == 0x7FFF ) {
387515144b0fSOlivier Houchard         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
387615144b0fSOlivier Houchard         return packFloatx80( zSign, 0, 0 );
387715144b0fSOlivier Houchard     }
387815144b0fSOlivier Houchard     if ( bExp == 0 ) {
387915144b0fSOlivier Houchard         if ( bSig == 0 ) {
388015144b0fSOlivier Houchard             if ( ( aExp | aSig ) == 0 ) {
388115144b0fSOlivier Houchard  invalid:
388215144b0fSOlivier Houchard                 float_raise( float_flag_invalid );
388315144b0fSOlivier Houchard                 z.low = floatx80_default_nan_low;
388415144b0fSOlivier Houchard                 z.high = floatx80_default_nan_high;
388515144b0fSOlivier Houchard                 return z;
388615144b0fSOlivier Houchard             }
388715144b0fSOlivier Houchard             float_raise( float_flag_divbyzero );
388815144b0fSOlivier Houchard             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
388915144b0fSOlivier Houchard         }
389015144b0fSOlivier Houchard         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
389115144b0fSOlivier Houchard     }
389215144b0fSOlivier Houchard     if ( aExp == 0 ) {
389315144b0fSOlivier Houchard         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
389415144b0fSOlivier Houchard         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
389515144b0fSOlivier Houchard     }
389615144b0fSOlivier Houchard     zExp = aExp - bExp + 0x3FFE;
389715144b0fSOlivier Houchard     rem1 = 0;
389815144b0fSOlivier Houchard     if ( bSig <= aSig ) {
389915144b0fSOlivier Houchard         shift128Right( aSig, 0, 1, &aSig, &rem1 );
390015144b0fSOlivier Houchard         ++zExp;
390115144b0fSOlivier Houchard     }
390215144b0fSOlivier Houchard     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
390315144b0fSOlivier Houchard     mul64To128( bSig, zSig0, &term0, &term1 );
390415144b0fSOlivier Houchard     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
390515144b0fSOlivier Houchard     while ( (sbits64) rem0 < 0 ) {
390615144b0fSOlivier Houchard         --zSig0;
390715144b0fSOlivier Houchard         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
390815144b0fSOlivier Houchard     }
390915144b0fSOlivier Houchard     zSig1 = estimateDiv128To64( rem1, 0, bSig );
391015144b0fSOlivier Houchard     if ( (bits64) ( zSig1<<1 ) <= 8 ) {
391115144b0fSOlivier Houchard         mul64To128( bSig, zSig1, &term1, &term2 );
391215144b0fSOlivier Houchard         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
391315144b0fSOlivier Houchard         while ( (sbits64) rem1 < 0 ) {
391415144b0fSOlivier Houchard             --zSig1;
391515144b0fSOlivier Houchard             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
391615144b0fSOlivier Houchard         }
391715144b0fSOlivier Houchard         zSig1 |= ( ( rem1 | rem2 ) != 0 );
391815144b0fSOlivier Houchard     }
391915144b0fSOlivier Houchard     return
392015144b0fSOlivier Houchard         roundAndPackFloatx80(
392115144b0fSOlivier Houchard             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
392215144b0fSOlivier Houchard 
392315144b0fSOlivier Houchard }
392415144b0fSOlivier Houchard 
392515144b0fSOlivier Houchard /*
392615144b0fSOlivier Houchard -------------------------------------------------------------------------------
392715144b0fSOlivier Houchard Returns the remainder of the extended double-precision floating-point value
392815144b0fSOlivier Houchard `a' with respect to the corresponding value `b'.  The operation is performed
392915144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
393015144b0fSOlivier Houchard -------------------------------------------------------------------------------
393115144b0fSOlivier Houchard */
floatx80_rem(floatx80 a,floatx80 b)393215144b0fSOlivier Houchard floatx80 floatx80_rem( floatx80 a, floatx80 b )
393315144b0fSOlivier Houchard {
393415144b0fSOlivier Houchard     flag aSign, bSign, zSign;
393515144b0fSOlivier Houchard     int32 aExp, bExp, expDiff;
393615144b0fSOlivier Houchard     bits64 aSig0, aSig1, bSig;
393715144b0fSOlivier Houchard     bits64 q, term0, term1, alternateASig0, alternateASig1;
393815144b0fSOlivier Houchard     floatx80 z;
393915144b0fSOlivier Houchard 
394015144b0fSOlivier Houchard     aSig0 = extractFloatx80Frac( a );
394115144b0fSOlivier Houchard     aExp = extractFloatx80Exp( a );
394215144b0fSOlivier Houchard     aSign = extractFloatx80Sign( a );
394315144b0fSOlivier Houchard     bSig = extractFloatx80Frac( b );
394415144b0fSOlivier Houchard     bExp = extractFloatx80Exp( b );
394515144b0fSOlivier Houchard     bSign = extractFloatx80Sign( b );
394615144b0fSOlivier Houchard     if ( aExp == 0x7FFF ) {
394715144b0fSOlivier Houchard         if (    (bits64) ( aSig0<<1 )
394815144b0fSOlivier Houchard              || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
394915144b0fSOlivier Houchard             return propagateFloatx80NaN( a, b );
395015144b0fSOlivier Houchard         }
395115144b0fSOlivier Houchard         goto invalid;
395215144b0fSOlivier Houchard     }
395315144b0fSOlivier Houchard     if ( bExp == 0x7FFF ) {
395415144b0fSOlivier Houchard         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
395515144b0fSOlivier Houchard         return a;
395615144b0fSOlivier Houchard     }
395715144b0fSOlivier Houchard     if ( bExp == 0 ) {
395815144b0fSOlivier Houchard         if ( bSig == 0 ) {
395915144b0fSOlivier Houchard  invalid:
396015144b0fSOlivier Houchard             float_raise( float_flag_invalid );
396115144b0fSOlivier Houchard             z.low = floatx80_default_nan_low;
396215144b0fSOlivier Houchard             z.high = floatx80_default_nan_high;
396315144b0fSOlivier Houchard             return z;
396415144b0fSOlivier Houchard         }
396515144b0fSOlivier Houchard         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
396615144b0fSOlivier Houchard     }
396715144b0fSOlivier Houchard     if ( aExp == 0 ) {
396815144b0fSOlivier Houchard         if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
396915144b0fSOlivier Houchard         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
397015144b0fSOlivier Houchard     }
397115144b0fSOlivier Houchard     bSig |= LIT64( 0x8000000000000000 );
397215144b0fSOlivier Houchard     zSign = aSign;
397315144b0fSOlivier Houchard     expDiff = aExp - bExp;
397415144b0fSOlivier Houchard     aSig1 = 0;
397515144b0fSOlivier Houchard     if ( expDiff < 0 ) {
397615144b0fSOlivier Houchard         if ( expDiff < -1 ) return a;
397715144b0fSOlivier Houchard         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
397815144b0fSOlivier Houchard         expDiff = 0;
397915144b0fSOlivier Houchard     }
398015144b0fSOlivier Houchard     q = ( bSig <= aSig0 );
398115144b0fSOlivier Houchard     if ( q ) aSig0 -= bSig;
398215144b0fSOlivier Houchard     expDiff -= 64;
398315144b0fSOlivier Houchard     while ( 0 < expDiff ) {
398415144b0fSOlivier Houchard         q = estimateDiv128To64( aSig0, aSig1, bSig );
398515144b0fSOlivier Houchard         q = ( 2 < q ) ? q - 2 : 0;
398615144b0fSOlivier Houchard         mul64To128( bSig, q, &term0, &term1 );
398715144b0fSOlivier Houchard         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
398815144b0fSOlivier Houchard         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
398915144b0fSOlivier Houchard         expDiff -= 62;
399015144b0fSOlivier Houchard     }
399115144b0fSOlivier Houchard     expDiff += 64;
399215144b0fSOlivier Houchard     if ( 0 < expDiff ) {
399315144b0fSOlivier Houchard         q = estimateDiv128To64( aSig0, aSig1, bSig );
399415144b0fSOlivier Houchard         q = ( 2 < q ) ? q - 2 : 0;
399515144b0fSOlivier Houchard         q >>= 64 - expDiff;
399615144b0fSOlivier Houchard         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
399715144b0fSOlivier Houchard         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
399815144b0fSOlivier Houchard         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
399915144b0fSOlivier Houchard         while ( le128( term0, term1, aSig0, aSig1 ) ) {
400015144b0fSOlivier Houchard             ++q;
400115144b0fSOlivier Houchard             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
400215144b0fSOlivier Houchard         }
400315144b0fSOlivier Houchard     }
400415144b0fSOlivier Houchard     else {
400515144b0fSOlivier Houchard         term1 = 0;
400615144b0fSOlivier Houchard         term0 = bSig;
400715144b0fSOlivier Houchard     }
400815144b0fSOlivier Houchard     sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
400915144b0fSOlivier Houchard     if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
401015144b0fSOlivier Houchard          || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
401115144b0fSOlivier Houchard               && ( q & 1 ) )
401215144b0fSOlivier Houchard        ) {
401315144b0fSOlivier Houchard         aSig0 = alternateASig0;
401415144b0fSOlivier Houchard         aSig1 = alternateASig1;
401515144b0fSOlivier Houchard         zSign = ! zSign;
401615144b0fSOlivier Houchard     }
401715144b0fSOlivier Houchard     return
401815144b0fSOlivier Houchard         normalizeRoundAndPackFloatx80(
401915144b0fSOlivier Houchard             80, zSign, bExp + expDiff, aSig0, aSig1 );
402015144b0fSOlivier Houchard 
402115144b0fSOlivier Houchard }
402215144b0fSOlivier Houchard 
402315144b0fSOlivier Houchard /*
402415144b0fSOlivier Houchard -------------------------------------------------------------------------------
402515144b0fSOlivier Houchard Returns the square root of the extended double-precision floating-point
402615144b0fSOlivier Houchard value `a'.  The operation is performed according to the IEC/IEEE Standard
402715144b0fSOlivier Houchard for Binary Floating-Point Arithmetic.
402815144b0fSOlivier Houchard -------------------------------------------------------------------------------
402915144b0fSOlivier Houchard */
floatx80_sqrt(floatx80 a)403015144b0fSOlivier Houchard floatx80 floatx80_sqrt( floatx80 a )
403115144b0fSOlivier Houchard {
403215144b0fSOlivier Houchard     flag aSign;
403315144b0fSOlivier Houchard     int32 aExp, zExp;
403415144b0fSOlivier Houchard     bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
403515144b0fSOlivier Houchard     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
403615144b0fSOlivier Houchard     floatx80 z;
403715144b0fSOlivier Houchard 
403815144b0fSOlivier Houchard     aSig0 = extractFloatx80Frac( a );
403915144b0fSOlivier Houchard     aExp = extractFloatx80Exp( a );
404015144b0fSOlivier Houchard     aSign = extractFloatx80Sign( a );
404115144b0fSOlivier Houchard     if ( aExp == 0x7FFF ) {
404215144b0fSOlivier Houchard         if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
404315144b0fSOlivier Houchard         if ( ! aSign ) return a;
404415144b0fSOlivier Houchard         goto invalid;
404515144b0fSOlivier Houchard     }
404615144b0fSOlivier Houchard     if ( aSign ) {
404715144b0fSOlivier Houchard         if ( ( aExp | aSig0 ) == 0 ) return a;
404815144b0fSOlivier Houchard  invalid:
404915144b0fSOlivier Houchard         float_raise( float_flag_invalid );
405015144b0fSOlivier Houchard         z.low = floatx80_default_nan_low;
405115144b0fSOlivier Houchard         z.high = floatx80_default_nan_high;
405215144b0fSOlivier Houchard         return z;
405315144b0fSOlivier Houchard     }
405415144b0fSOlivier Houchard     if ( aExp == 0 ) {
405515144b0fSOlivier Houchard         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
405615144b0fSOlivier Houchard         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
405715144b0fSOlivier Houchard     }
405815144b0fSOlivier Houchard     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
405915144b0fSOlivier Houchard     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
406015144b0fSOlivier Houchard     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
406115144b0fSOlivier Houchard     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
406215144b0fSOlivier Houchard     doubleZSig0 = zSig0<<1;
406315144b0fSOlivier Houchard     mul64To128( zSig0, zSig0, &term0, &term1 );
406415144b0fSOlivier Houchard     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
406515144b0fSOlivier Houchard     while ( (sbits64) rem0 < 0 ) {
406615144b0fSOlivier Houchard         --zSig0;
406715144b0fSOlivier Houchard         doubleZSig0 -= 2;
406815144b0fSOlivier Houchard         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
406915144b0fSOlivier Houchard     }
407015144b0fSOlivier Houchard     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
407115144b0fSOlivier Houchard     if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
407215144b0fSOlivier Houchard         if ( zSig1 == 0 ) zSig1 = 1;
407315144b0fSOlivier Houchard         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
407415144b0fSOlivier Houchard         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
407515144b0fSOlivier Houchard         mul64To128( zSig1, zSig1, &term2, &term3 );
407615144b0fSOlivier Houchard         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
407715144b0fSOlivier Houchard         while ( (sbits64) rem1 < 0 ) {
407815144b0fSOlivier Houchard             --zSig1;
407915144b0fSOlivier Houchard             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
408015144b0fSOlivier Houchard             term3 |= 1;
408115144b0fSOlivier Houchard             term2 |= doubleZSig0;
408215144b0fSOlivier Houchard             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
408315144b0fSOlivier Houchard         }
408415144b0fSOlivier Houchard         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
408515144b0fSOlivier Houchard     }
408615144b0fSOlivier Houchard     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
408715144b0fSOlivier Houchard     zSig0 |= doubleZSig0;
408815144b0fSOlivier Houchard     return
408915144b0fSOlivier Houchard         roundAndPackFloatx80(
409015144b0fSOlivier Houchard             floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
409115144b0fSOlivier Houchard 
409215144b0fSOlivier Houchard }
409315144b0fSOlivier Houchard 
409415144b0fSOlivier Houchard /*
409515144b0fSOlivier Houchard -------------------------------------------------------------------------------
409615144b0fSOlivier Houchard Returns 1 if the extended double-precision floating-point value `a' is
409715144b0fSOlivier Houchard equal to the corresponding value `b', and 0 otherwise.  The comparison is
409815144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
409915144b0fSOlivier Houchard Arithmetic.
410015144b0fSOlivier Houchard -------------------------------------------------------------------------------
410115144b0fSOlivier Houchard */
floatx80_eq(floatx80 a,floatx80 b)410215144b0fSOlivier Houchard flag floatx80_eq( floatx80 a, floatx80 b )
410315144b0fSOlivier Houchard {
410415144b0fSOlivier Houchard 
410515144b0fSOlivier Houchard     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
410615144b0fSOlivier Houchard               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
410715144b0fSOlivier Houchard          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
410815144b0fSOlivier Houchard               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
410915144b0fSOlivier Houchard        ) {
411015144b0fSOlivier Houchard         if (    floatx80_is_signaling_nan( a )
411115144b0fSOlivier Houchard              || floatx80_is_signaling_nan( b ) ) {
411215144b0fSOlivier Houchard             float_raise( float_flag_invalid );
411315144b0fSOlivier Houchard         }
411415144b0fSOlivier Houchard         return 0;
411515144b0fSOlivier Houchard     }
411615144b0fSOlivier Houchard     return
411715144b0fSOlivier Houchard            ( a.low == b.low )
411815144b0fSOlivier Houchard         && (    ( a.high == b.high )
411915144b0fSOlivier Houchard              || (    ( a.low == 0 )
412015144b0fSOlivier Houchard                   && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
412115144b0fSOlivier Houchard            );
412215144b0fSOlivier Houchard 
412315144b0fSOlivier Houchard }
412415144b0fSOlivier Houchard 
412515144b0fSOlivier Houchard /*
412615144b0fSOlivier Houchard -------------------------------------------------------------------------------
412715144b0fSOlivier Houchard Returns 1 if the extended double-precision floating-point value `a' is
412815144b0fSOlivier Houchard less than or equal to the corresponding value `b', and 0 otherwise.  The
412915144b0fSOlivier Houchard comparison is performed according to the IEC/IEEE Standard for Binary
413015144b0fSOlivier Houchard Floating-Point Arithmetic.
413115144b0fSOlivier Houchard -------------------------------------------------------------------------------
413215144b0fSOlivier Houchard */
floatx80_le(floatx80 a,floatx80 b)413315144b0fSOlivier Houchard flag floatx80_le( floatx80 a, floatx80 b )
413415144b0fSOlivier Houchard {
413515144b0fSOlivier Houchard     flag aSign, bSign;
413615144b0fSOlivier Houchard 
413715144b0fSOlivier Houchard     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
413815144b0fSOlivier Houchard               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
413915144b0fSOlivier Houchard          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
414015144b0fSOlivier Houchard               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
414115144b0fSOlivier Houchard        ) {
414215144b0fSOlivier Houchard         float_raise( float_flag_invalid );
414315144b0fSOlivier Houchard         return 0;
414415144b0fSOlivier Houchard     }
414515144b0fSOlivier Houchard     aSign = extractFloatx80Sign( a );
414615144b0fSOlivier Houchard     bSign = extractFloatx80Sign( b );
414715144b0fSOlivier Houchard     if ( aSign != bSign ) {
414815144b0fSOlivier Houchard         return
414915144b0fSOlivier Houchard                aSign
415015144b0fSOlivier Houchard             || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
415115144b0fSOlivier Houchard                  == 0 );
415215144b0fSOlivier Houchard     }
415315144b0fSOlivier Houchard     return
415415144b0fSOlivier Houchard           aSign ? le128( b.high, b.low, a.high, a.low )
415515144b0fSOlivier Houchard         : le128( a.high, a.low, b.high, b.low );
415615144b0fSOlivier Houchard 
415715144b0fSOlivier Houchard }
415815144b0fSOlivier Houchard 
415915144b0fSOlivier Houchard /*
416015144b0fSOlivier Houchard -------------------------------------------------------------------------------
416115144b0fSOlivier Houchard Returns 1 if the extended double-precision floating-point value `a' is
416215144b0fSOlivier Houchard less than the corresponding value `b', and 0 otherwise.  The comparison
416315144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
416415144b0fSOlivier Houchard Arithmetic.
416515144b0fSOlivier Houchard -------------------------------------------------------------------------------
416615144b0fSOlivier Houchard */
floatx80_lt(floatx80 a,floatx80 b)416715144b0fSOlivier Houchard flag floatx80_lt( floatx80 a, floatx80 b )
416815144b0fSOlivier Houchard {
416915144b0fSOlivier Houchard     flag aSign, bSign;
417015144b0fSOlivier Houchard 
417115144b0fSOlivier Houchard     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
417215144b0fSOlivier Houchard               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
417315144b0fSOlivier Houchard          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
417415144b0fSOlivier Houchard               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
417515144b0fSOlivier Houchard        ) {
417615144b0fSOlivier Houchard         float_raise( float_flag_invalid );
417715144b0fSOlivier Houchard         return 0;
417815144b0fSOlivier Houchard     }
417915144b0fSOlivier Houchard     aSign = extractFloatx80Sign( a );
418015144b0fSOlivier Houchard     bSign = extractFloatx80Sign( b );
418115144b0fSOlivier Houchard     if ( aSign != bSign ) {
418215144b0fSOlivier Houchard         return
418315144b0fSOlivier Houchard                aSign
418415144b0fSOlivier Houchard             && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
418515144b0fSOlivier Houchard                  != 0 );
418615144b0fSOlivier Houchard     }
418715144b0fSOlivier Houchard     return
418815144b0fSOlivier Houchard           aSign ? lt128( b.high, b.low, a.high, a.low )
418915144b0fSOlivier Houchard         : lt128( a.high, a.low, b.high, b.low );
419015144b0fSOlivier Houchard 
419115144b0fSOlivier Houchard }
419215144b0fSOlivier Houchard 
419315144b0fSOlivier Houchard /*
419415144b0fSOlivier Houchard -------------------------------------------------------------------------------
419515144b0fSOlivier Houchard Returns 1 if the extended double-precision floating-point value `a' is equal
419615144b0fSOlivier Houchard to the corresponding value `b', and 0 otherwise.  The invalid exception is
419715144b0fSOlivier Houchard raised if either operand is a NaN.  Otherwise, the comparison is performed
419815144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
419915144b0fSOlivier Houchard -------------------------------------------------------------------------------
420015144b0fSOlivier Houchard */
floatx80_eq_signaling(floatx80 a,floatx80 b)420115144b0fSOlivier Houchard flag floatx80_eq_signaling( floatx80 a, floatx80 b )
420215144b0fSOlivier Houchard {
420315144b0fSOlivier Houchard 
420415144b0fSOlivier Houchard     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
420515144b0fSOlivier Houchard               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
420615144b0fSOlivier Houchard          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
420715144b0fSOlivier Houchard               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
420815144b0fSOlivier Houchard        ) {
420915144b0fSOlivier Houchard         float_raise( float_flag_invalid );
421015144b0fSOlivier Houchard         return 0;
421115144b0fSOlivier Houchard     }
421215144b0fSOlivier Houchard     return
421315144b0fSOlivier Houchard            ( a.low == b.low )
421415144b0fSOlivier Houchard         && (    ( a.high == b.high )
421515144b0fSOlivier Houchard              || (    ( a.low == 0 )
421615144b0fSOlivier Houchard                   && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
421715144b0fSOlivier Houchard            );
421815144b0fSOlivier Houchard 
421915144b0fSOlivier Houchard }
422015144b0fSOlivier Houchard 
422115144b0fSOlivier Houchard /*
422215144b0fSOlivier Houchard -------------------------------------------------------------------------------
422315144b0fSOlivier Houchard Returns 1 if the extended double-precision floating-point value `a' is less
422415144b0fSOlivier Houchard than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
422515144b0fSOlivier Houchard do not cause an exception.  Otherwise, the comparison is performed according
422615144b0fSOlivier Houchard to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
422715144b0fSOlivier Houchard -------------------------------------------------------------------------------
422815144b0fSOlivier Houchard */
floatx80_le_quiet(floatx80 a,floatx80 b)422915144b0fSOlivier Houchard flag floatx80_le_quiet( floatx80 a, floatx80 b )
423015144b0fSOlivier Houchard {
423115144b0fSOlivier Houchard     flag aSign, bSign;
423215144b0fSOlivier Houchard 
423315144b0fSOlivier Houchard     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
423415144b0fSOlivier Houchard               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
423515144b0fSOlivier Houchard          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
423615144b0fSOlivier Houchard               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
423715144b0fSOlivier Houchard        ) {
423815144b0fSOlivier Houchard         if (    floatx80_is_signaling_nan( a )
423915144b0fSOlivier Houchard              || floatx80_is_signaling_nan( b ) ) {
424015144b0fSOlivier Houchard             float_raise( float_flag_invalid );
424115144b0fSOlivier Houchard         }
424215144b0fSOlivier Houchard         return 0;
424315144b0fSOlivier Houchard     }
424415144b0fSOlivier Houchard     aSign = extractFloatx80Sign( a );
424515144b0fSOlivier Houchard     bSign = extractFloatx80Sign( b );
424615144b0fSOlivier Houchard     if ( aSign != bSign ) {
424715144b0fSOlivier Houchard         return
424815144b0fSOlivier Houchard                aSign
424915144b0fSOlivier Houchard             || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
425015144b0fSOlivier Houchard                  == 0 );
425115144b0fSOlivier Houchard     }
425215144b0fSOlivier Houchard     return
425315144b0fSOlivier Houchard           aSign ? le128( b.high, b.low, a.high, a.low )
425415144b0fSOlivier Houchard         : le128( a.high, a.low, b.high, b.low );
425515144b0fSOlivier Houchard 
425615144b0fSOlivier Houchard }
425715144b0fSOlivier Houchard 
425815144b0fSOlivier Houchard /*
425915144b0fSOlivier Houchard -------------------------------------------------------------------------------
426015144b0fSOlivier Houchard Returns 1 if the extended double-precision floating-point value `a' is less
426115144b0fSOlivier Houchard than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
426215144b0fSOlivier Houchard an exception.  Otherwise, the comparison is performed according to the
426315144b0fSOlivier Houchard IEC/IEEE Standard for Binary Floating-Point Arithmetic.
426415144b0fSOlivier Houchard -------------------------------------------------------------------------------
426515144b0fSOlivier Houchard */
floatx80_lt_quiet(floatx80 a,floatx80 b)426615144b0fSOlivier Houchard flag floatx80_lt_quiet( floatx80 a, floatx80 b )
426715144b0fSOlivier Houchard {
426815144b0fSOlivier Houchard     flag aSign, bSign;
426915144b0fSOlivier Houchard 
427015144b0fSOlivier Houchard     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
427115144b0fSOlivier Houchard               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
427215144b0fSOlivier Houchard          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
427315144b0fSOlivier Houchard               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
427415144b0fSOlivier Houchard        ) {
427515144b0fSOlivier Houchard         if (    floatx80_is_signaling_nan( a )
427615144b0fSOlivier Houchard              || floatx80_is_signaling_nan( b ) ) {
427715144b0fSOlivier Houchard             float_raise( float_flag_invalid );
427815144b0fSOlivier Houchard         }
427915144b0fSOlivier Houchard         return 0;
428015144b0fSOlivier Houchard     }
428115144b0fSOlivier Houchard     aSign = extractFloatx80Sign( a );
428215144b0fSOlivier Houchard     bSign = extractFloatx80Sign( b );
428315144b0fSOlivier Houchard     if ( aSign != bSign ) {
428415144b0fSOlivier Houchard         return
428515144b0fSOlivier Houchard                aSign
428615144b0fSOlivier Houchard             && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
428715144b0fSOlivier Houchard                  != 0 );
428815144b0fSOlivier Houchard     }
428915144b0fSOlivier Houchard     return
429015144b0fSOlivier Houchard           aSign ? lt128( b.high, b.low, a.high, a.low )
429115144b0fSOlivier Houchard         : lt128( a.high, a.low, b.high, b.low );
429215144b0fSOlivier Houchard 
429315144b0fSOlivier Houchard }
429415144b0fSOlivier Houchard 
429515144b0fSOlivier Houchard #endif
429615144b0fSOlivier Houchard 
429715144b0fSOlivier Houchard #ifdef FLOAT128
429815144b0fSOlivier Houchard 
429915144b0fSOlivier Houchard /*
430015144b0fSOlivier Houchard -------------------------------------------------------------------------------
430115144b0fSOlivier Houchard Returns the result of converting the quadruple-precision floating-point
430215144b0fSOlivier Houchard value `a' to the 32-bit two's complement integer format.  The conversion
430315144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
430415144b0fSOlivier Houchard Arithmetic---which means in particular that the conversion is rounded
430515144b0fSOlivier Houchard according to the current rounding mode.  If `a' is a NaN, the largest
430615144b0fSOlivier Houchard positive integer is returned.  Otherwise, if the conversion overflows, the
430715144b0fSOlivier Houchard largest integer with the same sign as `a' is returned.
430815144b0fSOlivier Houchard -------------------------------------------------------------------------------
430915144b0fSOlivier Houchard */
float128_to_int32(float128 a)431015144b0fSOlivier Houchard int32 float128_to_int32( float128 a )
431115144b0fSOlivier Houchard {
431215144b0fSOlivier Houchard     flag aSign;
431315144b0fSOlivier Houchard     int32 aExp, shiftCount;
431415144b0fSOlivier Houchard     bits64 aSig0, aSig1;
431515144b0fSOlivier Houchard 
431615144b0fSOlivier Houchard     aSig1 = extractFloat128Frac1( a );
431715144b0fSOlivier Houchard     aSig0 = extractFloat128Frac0( a );
431815144b0fSOlivier Houchard     aExp = extractFloat128Exp( a );
431915144b0fSOlivier Houchard     aSign = extractFloat128Sign( a );
432015144b0fSOlivier Houchard     if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
432115144b0fSOlivier Houchard     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
432215144b0fSOlivier Houchard     aSig0 |= ( aSig1 != 0 );
432315144b0fSOlivier Houchard     shiftCount = 0x4028 - aExp;
432415144b0fSOlivier Houchard     if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
432515144b0fSOlivier Houchard     return roundAndPackInt32( aSign, aSig0 );
432615144b0fSOlivier Houchard 
432715144b0fSOlivier Houchard }
432815144b0fSOlivier Houchard 
432915144b0fSOlivier Houchard /*
433015144b0fSOlivier Houchard -------------------------------------------------------------------------------
433115144b0fSOlivier Houchard Returns the result of converting the quadruple-precision floating-point
433215144b0fSOlivier Houchard value `a' to the 32-bit two's complement integer format.  The conversion
433315144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
433415144b0fSOlivier Houchard Arithmetic, except that the conversion is always rounded toward zero.  If
433515144b0fSOlivier Houchard `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
433615144b0fSOlivier Houchard conversion overflows, the largest integer with the same sign as `a' is
433715144b0fSOlivier Houchard returned.
433815144b0fSOlivier Houchard -------------------------------------------------------------------------------
433915144b0fSOlivier Houchard */
float128_to_int32_round_to_zero(float128 a)434015144b0fSOlivier Houchard int32 float128_to_int32_round_to_zero( float128 a )
434115144b0fSOlivier Houchard {
434215144b0fSOlivier Houchard     flag aSign;
434315144b0fSOlivier Houchard     int32 aExp, shiftCount;
434415144b0fSOlivier Houchard     bits64 aSig0, aSig1, savedASig;
434515144b0fSOlivier Houchard     int32 z;
434615144b0fSOlivier Houchard 
434715144b0fSOlivier Houchard     aSig1 = extractFloat128Frac1( a );
434815144b0fSOlivier Houchard     aSig0 = extractFloat128Frac0( a );
434915144b0fSOlivier Houchard     aExp = extractFloat128Exp( a );
435015144b0fSOlivier Houchard     aSign = extractFloat128Sign( a );
435115144b0fSOlivier Houchard     aSig0 |= ( aSig1 != 0 );
435215144b0fSOlivier Houchard     if ( 0x401E < aExp ) {
435315144b0fSOlivier Houchard         if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
435415144b0fSOlivier Houchard         goto invalid;
435515144b0fSOlivier Houchard     }
435615144b0fSOlivier Houchard     else if ( aExp < 0x3FFF ) {
435715144b0fSOlivier Houchard         if ( aExp || aSig0 ) float_exception_flags |= float_flag_inexact;
435815144b0fSOlivier Houchard         return 0;
435915144b0fSOlivier Houchard     }
436015144b0fSOlivier Houchard     aSig0 |= LIT64( 0x0001000000000000 );
436115144b0fSOlivier Houchard     shiftCount = 0x402F - aExp;
436215144b0fSOlivier Houchard     savedASig = aSig0;
436315144b0fSOlivier Houchard     aSig0 >>= shiftCount;
436415144b0fSOlivier Houchard     z = aSig0;
436515144b0fSOlivier Houchard     if ( aSign ) z = - z;
436615144b0fSOlivier Houchard     if ( ( z < 0 ) ^ aSign ) {
436715144b0fSOlivier Houchard  invalid:
436815144b0fSOlivier Houchard         float_raise( float_flag_invalid );
436915144b0fSOlivier Houchard         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
437015144b0fSOlivier Houchard     }
437115144b0fSOlivier Houchard     if ( ( aSig0<<shiftCount ) != savedASig ) {
437215144b0fSOlivier Houchard         float_exception_flags |= float_flag_inexact;
437315144b0fSOlivier Houchard     }
437415144b0fSOlivier Houchard     return z;
437515144b0fSOlivier Houchard 
437615144b0fSOlivier Houchard }
437715144b0fSOlivier Houchard 
437815144b0fSOlivier Houchard /*
437915144b0fSOlivier Houchard -------------------------------------------------------------------------------
438015144b0fSOlivier Houchard Returns the result of converting the quadruple-precision floating-point
438115144b0fSOlivier Houchard value `a' to the 64-bit two's complement integer format.  The conversion
438215144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
438315144b0fSOlivier Houchard Arithmetic---which means in particular that the conversion is rounded
438415144b0fSOlivier Houchard according to the current rounding mode.  If `a' is a NaN, the largest
438515144b0fSOlivier Houchard positive integer is returned.  Otherwise, if the conversion overflows, the
438615144b0fSOlivier Houchard largest integer with the same sign as `a' is returned.
438715144b0fSOlivier Houchard -------------------------------------------------------------------------------
438815144b0fSOlivier Houchard */
float128_to_int64(float128 a)438915144b0fSOlivier Houchard int64 float128_to_int64( float128 a )
439015144b0fSOlivier Houchard {
439115144b0fSOlivier Houchard     flag aSign;
439215144b0fSOlivier Houchard     int32 aExp, shiftCount;
439315144b0fSOlivier Houchard     bits64 aSig0, aSig1;
439415144b0fSOlivier Houchard 
439515144b0fSOlivier Houchard     aSig1 = extractFloat128Frac1( a );
439615144b0fSOlivier Houchard     aSig0 = extractFloat128Frac0( a );
439715144b0fSOlivier Houchard     aExp = extractFloat128Exp( a );
439815144b0fSOlivier Houchard     aSign = extractFloat128Sign( a );
439915144b0fSOlivier Houchard     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
440015144b0fSOlivier Houchard     shiftCount = 0x402F - aExp;
440115144b0fSOlivier Houchard     if ( shiftCount <= 0 ) {
440215144b0fSOlivier Houchard         if ( 0x403E < aExp ) {
440315144b0fSOlivier Houchard             float_raise( float_flag_invalid );
440415144b0fSOlivier Houchard             if (    ! aSign
440515144b0fSOlivier Houchard                  || (    ( aExp == 0x7FFF )
440615144b0fSOlivier Houchard                       && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
440715144b0fSOlivier Houchard                     )
440815144b0fSOlivier Houchard                ) {
440915144b0fSOlivier Houchard                 return LIT64( 0x7FFFFFFFFFFFFFFF );
441015144b0fSOlivier Houchard             }
441115144b0fSOlivier Houchard             return (sbits64) LIT64( 0x8000000000000000 );
441215144b0fSOlivier Houchard         }
441315144b0fSOlivier Houchard         shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
441415144b0fSOlivier Houchard     }
441515144b0fSOlivier Houchard     else {
441615144b0fSOlivier Houchard         shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
441715144b0fSOlivier Houchard     }
441815144b0fSOlivier Houchard     return roundAndPackInt64( aSign, aSig0, aSig1 );
441915144b0fSOlivier Houchard 
442015144b0fSOlivier Houchard }
442115144b0fSOlivier Houchard 
442215144b0fSOlivier Houchard /*
442315144b0fSOlivier Houchard -------------------------------------------------------------------------------
442415144b0fSOlivier Houchard Returns the result of converting the quadruple-precision floating-point
442515144b0fSOlivier Houchard value `a' to the 64-bit two's complement integer format.  The conversion
442615144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
442715144b0fSOlivier Houchard Arithmetic, except that the conversion is always rounded toward zero.
442815144b0fSOlivier Houchard If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
442915144b0fSOlivier Houchard the conversion overflows, the largest integer with the same sign as `a' is
443015144b0fSOlivier Houchard returned.
443115144b0fSOlivier Houchard -------------------------------------------------------------------------------
443215144b0fSOlivier Houchard */
float128_to_int64_round_to_zero(float128 a)443315144b0fSOlivier Houchard int64 float128_to_int64_round_to_zero( float128 a )
443415144b0fSOlivier Houchard {
443515144b0fSOlivier Houchard     flag aSign;
443615144b0fSOlivier Houchard     int32 aExp, shiftCount;
443715144b0fSOlivier Houchard     bits64 aSig0, aSig1;
443815144b0fSOlivier Houchard     int64 z;
443915144b0fSOlivier Houchard 
444015144b0fSOlivier Houchard     aSig1 = extractFloat128Frac1( a );
444115144b0fSOlivier Houchard     aSig0 = extractFloat128Frac0( a );
444215144b0fSOlivier Houchard     aExp = extractFloat128Exp( a );
444315144b0fSOlivier Houchard     aSign = extractFloat128Sign( a );
444415144b0fSOlivier Houchard     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
444515144b0fSOlivier Houchard     shiftCount = aExp - 0x402F;
444615144b0fSOlivier Houchard     if ( 0 < shiftCount ) {
444715144b0fSOlivier Houchard         if ( 0x403E <= aExp ) {
444815144b0fSOlivier Houchard             aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
444915144b0fSOlivier Houchard             if (    ( a.high == LIT64( 0xC03E000000000000 ) )
445015144b0fSOlivier Houchard                  && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
445115144b0fSOlivier Houchard                 if ( aSig1 ) float_exception_flags |= float_flag_inexact;
445215144b0fSOlivier Houchard             }
445315144b0fSOlivier Houchard             else {
445415144b0fSOlivier Houchard                 float_raise( float_flag_invalid );
445515144b0fSOlivier Houchard                 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
445615144b0fSOlivier Houchard                     return LIT64( 0x7FFFFFFFFFFFFFFF );
445715144b0fSOlivier Houchard                 }
445815144b0fSOlivier Houchard             }
445915144b0fSOlivier Houchard             return (sbits64) LIT64( 0x8000000000000000 );
446015144b0fSOlivier Houchard         }
446115144b0fSOlivier Houchard         z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
446215144b0fSOlivier Houchard         if ( (bits64) ( aSig1<<shiftCount ) ) {
446315144b0fSOlivier Houchard             float_exception_flags |= float_flag_inexact;
446415144b0fSOlivier Houchard         }
446515144b0fSOlivier Houchard     }
446615144b0fSOlivier Houchard     else {
446715144b0fSOlivier Houchard         if ( aExp < 0x3FFF ) {
446815144b0fSOlivier Houchard             if ( aExp | aSig0 | aSig1 ) {
446915144b0fSOlivier Houchard                 float_exception_flags |= float_flag_inexact;
447015144b0fSOlivier Houchard             }
447115144b0fSOlivier Houchard             return 0;
447215144b0fSOlivier Houchard         }
447315144b0fSOlivier Houchard         z = aSig0>>( - shiftCount );
447415144b0fSOlivier Houchard         if (    aSig1
447515144b0fSOlivier Houchard              || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
447615144b0fSOlivier Houchard             float_exception_flags |= float_flag_inexact;
447715144b0fSOlivier Houchard         }
447815144b0fSOlivier Houchard     }
447915144b0fSOlivier Houchard     if ( aSign ) z = - z;
448015144b0fSOlivier Houchard     return z;
448115144b0fSOlivier Houchard 
448215144b0fSOlivier Houchard }
448315144b0fSOlivier Houchard 
4484c36abe0dSDavid Schultz #if (defined(SOFTFLOATSPARC64_FOR_GCC) || defined(SOFTFLOAT_FOR_GCC)) \
4485c36abe0dSDavid Schultz     && defined(SOFTFLOAT_NEED_FIXUNS)
4486c36abe0dSDavid Schultz /*
4487c36abe0dSDavid Schultz  * just like above - but do not care for overflow of signed results
4488c36abe0dSDavid Schultz  */
float128_to_uint64_round_to_zero(float128 a)4489c36abe0dSDavid Schultz uint64 float128_to_uint64_round_to_zero( float128 a )
4490c36abe0dSDavid Schultz {
4491c36abe0dSDavid Schultz     flag aSign;
4492c36abe0dSDavid Schultz     int32 aExp, shiftCount;
4493c36abe0dSDavid Schultz     bits64 aSig0, aSig1;
4494c36abe0dSDavid Schultz     uint64 z;
4495c36abe0dSDavid Schultz 
4496c36abe0dSDavid Schultz     aSig1 = extractFloat128Frac1( a );
4497c36abe0dSDavid Schultz     aSig0 = extractFloat128Frac0( a );
4498c36abe0dSDavid Schultz     aExp = extractFloat128Exp( a );
4499c36abe0dSDavid Schultz     aSign = extractFloat128Sign( a );
4500c36abe0dSDavid Schultz     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4501c36abe0dSDavid Schultz     shiftCount = aExp - 0x402F;
4502c36abe0dSDavid Schultz     if ( 0 < shiftCount ) {
4503c36abe0dSDavid Schultz         if ( 0x403F <= aExp ) {
4504c36abe0dSDavid Schultz             aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4505c36abe0dSDavid Schultz             if (    ( a.high == LIT64( 0xC03E000000000000 ) )
4506c36abe0dSDavid Schultz                  && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4507c36abe0dSDavid Schultz                 if ( aSig1 ) float_exception_flags |= float_flag_inexact;
4508c36abe0dSDavid Schultz             }
4509c36abe0dSDavid Schultz             else {
4510c36abe0dSDavid Schultz                 float_raise( float_flag_invalid );
4511c36abe0dSDavid Schultz             }
4512c36abe0dSDavid Schultz             return LIT64( 0xFFFFFFFFFFFFFFFF );
4513c36abe0dSDavid Schultz         }
4514c36abe0dSDavid Schultz         z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
4515c36abe0dSDavid Schultz         if ( (bits64) ( aSig1<<shiftCount ) ) {
4516c36abe0dSDavid Schultz             float_exception_flags |= float_flag_inexact;
4517c36abe0dSDavid Schultz         }
4518c36abe0dSDavid Schultz     }
4519c36abe0dSDavid Schultz     else {
4520c36abe0dSDavid Schultz         if ( aExp < 0x3FFF ) {
4521c36abe0dSDavid Schultz             if ( aExp | aSig0 | aSig1 ) {
4522c36abe0dSDavid Schultz                 float_exception_flags |= float_flag_inexact;
4523c36abe0dSDavid Schultz             }
4524c36abe0dSDavid Schultz             return 0;
4525c36abe0dSDavid Schultz         }
4526c36abe0dSDavid Schultz         z = aSig0>>( - shiftCount );
4527c36abe0dSDavid Schultz         if (aSig1 || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4528c36abe0dSDavid Schultz             float_exception_flags |= float_flag_inexact;
4529c36abe0dSDavid Schultz         }
4530c36abe0dSDavid Schultz     }
4531c36abe0dSDavid Schultz     if ( aSign ) z = - z;
4532c36abe0dSDavid Schultz     return z;
4533c36abe0dSDavid Schultz 
4534c36abe0dSDavid Schultz }
4535c36abe0dSDavid Schultz #endif /* (SOFTFLOATSPARC64_FOR_GCC || SOFTFLOAT_FOR_GCC) && SOFTFLOAT_NEED_FIXUNS */
4536c36abe0dSDavid Schultz 
453715144b0fSOlivier Houchard /*
453815144b0fSOlivier Houchard -------------------------------------------------------------------------------
453915144b0fSOlivier Houchard Returns the result of converting the quadruple-precision floating-point
454015144b0fSOlivier Houchard value `a' to the single-precision floating-point format.  The conversion
454115144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
454215144b0fSOlivier Houchard Arithmetic.
454315144b0fSOlivier Houchard -------------------------------------------------------------------------------
454415144b0fSOlivier Houchard */
float128_to_float32(float128 a)454515144b0fSOlivier Houchard float32 float128_to_float32( float128 a )
454615144b0fSOlivier Houchard {
454715144b0fSOlivier Houchard     flag aSign;
454815144b0fSOlivier Houchard     int32 aExp;
454915144b0fSOlivier Houchard     bits64 aSig0, aSig1;
455015144b0fSOlivier Houchard     bits32 zSig;
455115144b0fSOlivier Houchard 
455215144b0fSOlivier Houchard     aSig1 = extractFloat128Frac1( a );
455315144b0fSOlivier Houchard     aSig0 = extractFloat128Frac0( a );
455415144b0fSOlivier Houchard     aExp = extractFloat128Exp( a );
455515144b0fSOlivier Houchard     aSign = extractFloat128Sign( a );
455615144b0fSOlivier Houchard     if ( aExp == 0x7FFF ) {
455715144b0fSOlivier Houchard         if ( aSig0 | aSig1 ) {
455815144b0fSOlivier Houchard             return commonNaNToFloat32( float128ToCommonNaN( a ) );
455915144b0fSOlivier Houchard         }
456015144b0fSOlivier Houchard         return packFloat32( aSign, 0xFF, 0 );
456115144b0fSOlivier Houchard     }
456215144b0fSOlivier Houchard     aSig0 |= ( aSig1 != 0 );
456315144b0fSOlivier Houchard     shift64RightJamming( aSig0, 18, &aSig0 );
456415144b0fSOlivier Houchard     zSig = aSig0;
456515144b0fSOlivier Houchard     if ( aExp || zSig ) {
456615144b0fSOlivier Houchard         zSig |= 0x40000000;
456715144b0fSOlivier Houchard         aExp -= 0x3F81;
456815144b0fSOlivier Houchard     }
456915144b0fSOlivier Houchard     return roundAndPackFloat32( aSign, aExp, zSig );
457015144b0fSOlivier Houchard 
457115144b0fSOlivier Houchard }
457215144b0fSOlivier Houchard 
457315144b0fSOlivier Houchard /*
457415144b0fSOlivier Houchard -------------------------------------------------------------------------------
457515144b0fSOlivier Houchard Returns the result of converting the quadruple-precision floating-point
457615144b0fSOlivier Houchard value `a' to the double-precision floating-point format.  The conversion
457715144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
457815144b0fSOlivier Houchard Arithmetic.
457915144b0fSOlivier Houchard -------------------------------------------------------------------------------
458015144b0fSOlivier Houchard */
float128_to_float64(float128 a)458115144b0fSOlivier Houchard float64 float128_to_float64( float128 a )
458215144b0fSOlivier Houchard {
458315144b0fSOlivier Houchard     flag aSign;
458415144b0fSOlivier Houchard     int32 aExp;
458515144b0fSOlivier Houchard     bits64 aSig0, aSig1;
458615144b0fSOlivier Houchard 
458715144b0fSOlivier Houchard     aSig1 = extractFloat128Frac1( a );
458815144b0fSOlivier Houchard     aSig0 = extractFloat128Frac0( a );
458915144b0fSOlivier Houchard     aExp = extractFloat128Exp( a );
459015144b0fSOlivier Houchard     aSign = extractFloat128Sign( a );
459115144b0fSOlivier Houchard     if ( aExp == 0x7FFF ) {
459215144b0fSOlivier Houchard         if ( aSig0 | aSig1 ) {
459315144b0fSOlivier Houchard             return commonNaNToFloat64( float128ToCommonNaN( a ) );
459415144b0fSOlivier Houchard         }
459515144b0fSOlivier Houchard         return packFloat64( aSign, 0x7FF, 0 );
459615144b0fSOlivier Houchard     }
459715144b0fSOlivier Houchard     shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
459815144b0fSOlivier Houchard     aSig0 |= ( aSig1 != 0 );
459915144b0fSOlivier Houchard     if ( aExp || aSig0 ) {
460015144b0fSOlivier Houchard         aSig0 |= LIT64( 0x4000000000000000 );
460115144b0fSOlivier Houchard         aExp -= 0x3C01;
460215144b0fSOlivier Houchard     }
460315144b0fSOlivier Houchard     return roundAndPackFloat64( aSign, aExp, aSig0 );
460415144b0fSOlivier Houchard 
460515144b0fSOlivier Houchard }
460615144b0fSOlivier Houchard 
460715144b0fSOlivier Houchard #ifdef FLOATX80
460815144b0fSOlivier Houchard 
460915144b0fSOlivier Houchard /*
461015144b0fSOlivier Houchard -------------------------------------------------------------------------------
461115144b0fSOlivier Houchard Returns the result of converting the quadruple-precision floating-point
461215144b0fSOlivier Houchard value `a' to the extended double-precision floating-point format.  The
461315144b0fSOlivier Houchard conversion is performed according to the IEC/IEEE Standard for Binary
461415144b0fSOlivier Houchard Floating-Point Arithmetic.
461515144b0fSOlivier Houchard -------------------------------------------------------------------------------
461615144b0fSOlivier Houchard */
float128_to_floatx80(float128 a)461715144b0fSOlivier Houchard floatx80 float128_to_floatx80( float128 a )
461815144b0fSOlivier Houchard {
461915144b0fSOlivier Houchard     flag aSign;
462015144b0fSOlivier Houchard     int32 aExp;
462115144b0fSOlivier Houchard     bits64 aSig0, aSig1;
462215144b0fSOlivier Houchard 
462315144b0fSOlivier Houchard     aSig1 = extractFloat128Frac1( a );
462415144b0fSOlivier Houchard     aSig0 = extractFloat128Frac0( a );
462515144b0fSOlivier Houchard     aExp = extractFloat128Exp( a );
462615144b0fSOlivier Houchard     aSign = extractFloat128Sign( a );
462715144b0fSOlivier Houchard     if ( aExp == 0x7FFF ) {
462815144b0fSOlivier Houchard         if ( aSig0 | aSig1 ) {
462915144b0fSOlivier Houchard             return commonNaNToFloatx80( float128ToCommonNaN( a ) );
463015144b0fSOlivier Houchard         }
463115144b0fSOlivier Houchard         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
463215144b0fSOlivier Houchard     }
463315144b0fSOlivier Houchard     if ( aExp == 0 ) {
463415144b0fSOlivier Houchard         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
463515144b0fSOlivier Houchard         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
463615144b0fSOlivier Houchard     }
463715144b0fSOlivier Houchard     else {
463815144b0fSOlivier Houchard         aSig0 |= LIT64( 0x0001000000000000 );
463915144b0fSOlivier Houchard     }
464015144b0fSOlivier Houchard     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
464115144b0fSOlivier Houchard     return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
464215144b0fSOlivier Houchard 
464315144b0fSOlivier Houchard }
464415144b0fSOlivier Houchard 
464515144b0fSOlivier Houchard #endif
464615144b0fSOlivier Houchard 
464715144b0fSOlivier Houchard /*
464815144b0fSOlivier Houchard -------------------------------------------------------------------------------
464915144b0fSOlivier Houchard Rounds the quadruple-precision floating-point value `a' to an integer, and
465015144b0fSOlivier Houchard returns the result as a quadruple-precision floating-point value.  The
465115144b0fSOlivier Houchard operation is performed according to the IEC/IEEE Standard for Binary
465215144b0fSOlivier Houchard Floating-Point Arithmetic.
465315144b0fSOlivier Houchard -------------------------------------------------------------------------------
465415144b0fSOlivier Houchard */
float128_round_to_int(float128 a)465515144b0fSOlivier Houchard float128 float128_round_to_int( float128 a )
465615144b0fSOlivier Houchard {
465715144b0fSOlivier Houchard     flag aSign;
465815144b0fSOlivier Houchard     int32 aExp;
465915144b0fSOlivier Houchard     bits64 lastBitMask, roundBitsMask;
466015144b0fSOlivier Houchard     int8 roundingMode;
466115144b0fSOlivier Houchard     float128 z;
466215144b0fSOlivier Houchard 
466315144b0fSOlivier Houchard     aExp = extractFloat128Exp( a );
466415144b0fSOlivier Houchard     if ( 0x402F <= aExp ) {
466515144b0fSOlivier Houchard         if ( 0x406F <= aExp ) {
466615144b0fSOlivier Houchard             if (    ( aExp == 0x7FFF )
466715144b0fSOlivier Houchard                  && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
466815144b0fSOlivier Houchard                ) {
466915144b0fSOlivier Houchard                 return propagateFloat128NaN( a, a );
467015144b0fSOlivier Houchard             }
467115144b0fSOlivier Houchard             return a;
467215144b0fSOlivier Houchard         }
467315144b0fSOlivier Houchard         lastBitMask = 1;
467415144b0fSOlivier Houchard         lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
467515144b0fSOlivier Houchard         roundBitsMask = lastBitMask - 1;
467615144b0fSOlivier Houchard         z = a;
467715144b0fSOlivier Houchard         roundingMode = float_rounding_mode;
467815144b0fSOlivier Houchard         if ( roundingMode == float_round_nearest_even ) {
467915144b0fSOlivier Houchard             if ( lastBitMask ) {
468015144b0fSOlivier Houchard                 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
468115144b0fSOlivier Houchard                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
468215144b0fSOlivier Houchard             }
468315144b0fSOlivier Houchard             else {
468415144b0fSOlivier Houchard                 if ( (sbits64) z.low < 0 ) {
468515144b0fSOlivier Houchard                     ++z.high;
468615144b0fSOlivier Houchard                     if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
468715144b0fSOlivier Houchard                 }
468815144b0fSOlivier Houchard             }
468915144b0fSOlivier Houchard         }
469015144b0fSOlivier Houchard         else if ( roundingMode != float_round_to_zero ) {
469115144b0fSOlivier Houchard             if (   extractFloat128Sign( z )
469215144b0fSOlivier Houchard                  ^ ( roundingMode == float_round_up ) ) {
469315144b0fSOlivier Houchard                 add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
469415144b0fSOlivier Houchard             }
469515144b0fSOlivier Houchard         }
469615144b0fSOlivier Houchard         z.low &= ~ roundBitsMask;
469715144b0fSOlivier Houchard     }
469815144b0fSOlivier Houchard     else {
469915144b0fSOlivier Houchard         if ( aExp < 0x3FFF ) {
470015144b0fSOlivier Houchard             if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
470115144b0fSOlivier Houchard             float_exception_flags |= float_flag_inexact;
470215144b0fSOlivier Houchard             aSign = extractFloat128Sign( a );
470315144b0fSOlivier Houchard             switch ( float_rounding_mode ) {
470415144b0fSOlivier Houchard              case float_round_nearest_even:
470515144b0fSOlivier Houchard                 if (    ( aExp == 0x3FFE )
470615144b0fSOlivier Houchard                      && (   extractFloat128Frac0( a )
470715144b0fSOlivier Houchard                           | extractFloat128Frac1( a ) )
470815144b0fSOlivier Houchard                    ) {
470915144b0fSOlivier Houchard                     return packFloat128( aSign, 0x3FFF, 0, 0 );
471015144b0fSOlivier Houchard                 }
471115144b0fSOlivier Houchard                 break;
471215144b0fSOlivier Houchard 	     case float_round_to_zero:
471315144b0fSOlivier Houchard 		break;
471415144b0fSOlivier Houchard              case float_round_down:
471515144b0fSOlivier Houchard                 return
471615144b0fSOlivier Houchard                       aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
471715144b0fSOlivier Houchard                     : packFloat128( 0, 0, 0, 0 );
471815144b0fSOlivier Houchard              case float_round_up:
471915144b0fSOlivier Houchard                 return
472015144b0fSOlivier Houchard                       aSign ? packFloat128( 1, 0, 0, 0 )
472115144b0fSOlivier Houchard                     : packFloat128( 0, 0x3FFF, 0, 0 );
472215144b0fSOlivier Houchard             }
472315144b0fSOlivier Houchard             return packFloat128( aSign, 0, 0, 0 );
472415144b0fSOlivier Houchard         }
472515144b0fSOlivier Houchard         lastBitMask = 1;
472615144b0fSOlivier Houchard         lastBitMask <<= 0x402F - aExp;
472715144b0fSOlivier Houchard         roundBitsMask = lastBitMask - 1;
472815144b0fSOlivier Houchard         z.low = 0;
472915144b0fSOlivier Houchard         z.high = a.high;
473015144b0fSOlivier Houchard         roundingMode = float_rounding_mode;
473115144b0fSOlivier Houchard         if ( roundingMode == float_round_nearest_even ) {
473215144b0fSOlivier Houchard             z.high += lastBitMask>>1;
473315144b0fSOlivier Houchard             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
473415144b0fSOlivier Houchard                 z.high &= ~ lastBitMask;
473515144b0fSOlivier Houchard             }
473615144b0fSOlivier Houchard         }
473715144b0fSOlivier Houchard         else if ( roundingMode != float_round_to_zero ) {
473815144b0fSOlivier Houchard             if (   extractFloat128Sign( z )
473915144b0fSOlivier Houchard                  ^ ( roundingMode == float_round_up ) ) {
474015144b0fSOlivier Houchard                 z.high |= ( a.low != 0 );
474115144b0fSOlivier Houchard                 z.high += roundBitsMask;
474215144b0fSOlivier Houchard             }
474315144b0fSOlivier Houchard         }
474415144b0fSOlivier Houchard         z.high &= ~ roundBitsMask;
474515144b0fSOlivier Houchard     }
474615144b0fSOlivier Houchard     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
474715144b0fSOlivier Houchard         float_exception_flags |= float_flag_inexact;
474815144b0fSOlivier Houchard     }
474915144b0fSOlivier Houchard     return z;
475015144b0fSOlivier Houchard 
475115144b0fSOlivier Houchard }
475215144b0fSOlivier Houchard 
475315144b0fSOlivier Houchard /*
475415144b0fSOlivier Houchard -------------------------------------------------------------------------------
475515144b0fSOlivier Houchard Returns the result of adding the absolute values of the quadruple-precision
475615144b0fSOlivier Houchard floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
475715144b0fSOlivier Houchard before being returned.  `zSign' is ignored if the result is a NaN.
475815144b0fSOlivier Houchard The addition is performed according to the IEC/IEEE Standard for Binary
475915144b0fSOlivier Houchard Floating-Point Arithmetic.
476015144b0fSOlivier Houchard -------------------------------------------------------------------------------
476115144b0fSOlivier Houchard */
addFloat128Sigs(float128 a,float128 b,flag zSign)476215144b0fSOlivier Houchard static float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
476315144b0fSOlivier Houchard {
476415144b0fSOlivier Houchard     int32 aExp, bExp, zExp;
476515144b0fSOlivier Houchard     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
476615144b0fSOlivier Houchard     int32 expDiff;
476715144b0fSOlivier Houchard 
476815144b0fSOlivier Houchard     aSig1 = extractFloat128Frac1( a );
476915144b0fSOlivier Houchard     aSig0 = extractFloat128Frac0( a );
477015144b0fSOlivier Houchard     aExp = extractFloat128Exp( a );
477115144b0fSOlivier Houchard     bSig1 = extractFloat128Frac1( b );
477215144b0fSOlivier Houchard     bSig0 = extractFloat128Frac0( b );
477315144b0fSOlivier Houchard     bExp = extractFloat128Exp( b );
477415144b0fSOlivier Houchard     expDiff = aExp - bExp;
477515144b0fSOlivier Houchard     if ( 0 < expDiff ) {
477615144b0fSOlivier Houchard         if ( aExp == 0x7FFF ) {
477715144b0fSOlivier Houchard             if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
477815144b0fSOlivier Houchard             return a;
477915144b0fSOlivier Houchard         }
478015144b0fSOlivier Houchard         if ( bExp == 0 ) {
478115144b0fSOlivier Houchard             --expDiff;
478215144b0fSOlivier Houchard         }
478315144b0fSOlivier Houchard         else {
478415144b0fSOlivier Houchard             bSig0 |= LIT64( 0x0001000000000000 );
478515144b0fSOlivier Houchard         }
478615144b0fSOlivier Houchard         shift128ExtraRightJamming(
478715144b0fSOlivier Houchard             bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
478815144b0fSOlivier Houchard         zExp = aExp;
478915144b0fSOlivier Houchard     }
479015144b0fSOlivier Houchard     else if ( expDiff < 0 ) {
479115144b0fSOlivier Houchard         if ( bExp == 0x7FFF ) {
479215144b0fSOlivier Houchard             if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
479315144b0fSOlivier Houchard             return packFloat128( zSign, 0x7FFF, 0, 0 );
479415144b0fSOlivier Houchard         }
479515144b0fSOlivier Houchard         if ( aExp == 0 ) {
479615144b0fSOlivier Houchard             ++expDiff;
479715144b0fSOlivier Houchard         }
479815144b0fSOlivier Houchard         else {
479915144b0fSOlivier Houchard             aSig0 |= LIT64( 0x0001000000000000 );
480015144b0fSOlivier Houchard         }
480115144b0fSOlivier Houchard         shift128ExtraRightJamming(
480215144b0fSOlivier Houchard             aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
480315144b0fSOlivier Houchard         zExp = bExp;
480415144b0fSOlivier Houchard     }
480515144b0fSOlivier Houchard     else {
480615144b0fSOlivier Houchard         if ( aExp == 0x7FFF ) {
480715144b0fSOlivier Houchard             if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
480815144b0fSOlivier Houchard                 return propagateFloat128NaN( a, b );
480915144b0fSOlivier Houchard             }
481015144b0fSOlivier Houchard             return a;
481115144b0fSOlivier Houchard         }
481215144b0fSOlivier Houchard         add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
481315144b0fSOlivier Houchard         if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
481415144b0fSOlivier Houchard         zSig2 = 0;
481515144b0fSOlivier Houchard         zSig0 |= LIT64( 0x0002000000000000 );
481615144b0fSOlivier Houchard         zExp = aExp;
481715144b0fSOlivier Houchard         goto shiftRight1;
481815144b0fSOlivier Houchard     }
481915144b0fSOlivier Houchard     aSig0 |= LIT64( 0x0001000000000000 );
482015144b0fSOlivier Houchard     add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
482115144b0fSOlivier Houchard     --zExp;
482215144b0fSOlivier Houchard     if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
482315144b0fSOlivier Houchard     ++zExp;
482415144b0fSOlivier Houchard  shiftRight1:
482515144b0fSOlivier Houchard     shift128ExtraRightJamming(
482615144b0fSOlivier Houchard         zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
482715144b0fSOlivier Houchard  roundAndPack:
482815144b0fSOlivier Houchard     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
482915144b0fSOlivier Houchard 
483015144b0fSOlivier Houchard }
483115144b0fSOlivier Houchard 
483215144b0fSOlivier Houchard /*
483315144b0fSOlivier Houchard -------------------------------------------------------------------------------
483415144b0fSOlivier Houchard Returns the result of subtracting the absolute values of the quadruple-
483515144b0fSOlivier Houchard precision floating-point values `a' and `b'.  If `zSign' is 1, the
483615144b0fSOlivier Houchard difference is negated before being returned.  `zSign' is ignored if the
483715144b0fSOlivier Houchard result is a NaN.  The subtraction is performed according to the IEC/IEEE
483815144b0fSOlivier Houchard Standard for Binary Floating-Point Arithmetic.
483915144b0fSOlivier Houchard -------------------------------------------------------------------------------
484015144b0fSOlivier Houchard */
subFloat128Sigs(float128 a,float128 b,flag zSign)484115144b0fSOlivier Houchard static float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
484215144b0fSOlivier Houchard {
484315144b0fSOlivier Houchard     int32 aExp, bExp, zExp;
484415144b0fSOlivier Houchard     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
484515144b0fSOlivier Houchard     int32 expDiff;
484615144b0fSOlivier Houchard     float128 z;
484715144b0fSOlivier Houchard 
484815144b0fSOlivier Houchard     aSig1 = extractFloat128Frac1( a );
484915144b0fSOlivier Houchard     aSig0 = extractFloat128Frac0( a );
485015144b0fSOlivier Houchard     aExp = extractFloat128Exp( a );
485115144b0fSOlivier Houchard     bSig1 = extractFloat128Frac1( b );
485215144b0fSOlivier Houchard     bSig0 = extractFloat128Frac0( b );
485315144b0fSOlivier Houchard     bExp = extractFloat128Exp( b );
485415144b0fSOlivier Houchard     expDiff = aExp - bExp;
485515144b0fSOlivier Houchard     shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
485615144b0fSOlivier Houchard     shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
485715144b0fSOlivier Houchard     if ( 0 < expDiff ) goto aExpBigger;
485815144b0fSOlivier Houchard     if ( expDiff < 0 ) goto bExpBigger;
485915144b0fSOlivier Houchard     if ( aExp == 0x7FFF ) {
486015144b0fSOlivier Houchard         if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
486115144b0fSOlivier Houchard             return propagateFloat128NaN( a, b );
486215144b0fSOlivier Houchard         }
486315144b0fSOlivier Houchard         float_raise( float_flag_invalid );
486415144b0fSOlivier Houchard         z.low = float128_default_nan_low;
486515144b0fSOlivier Houchard         z.high = float128_default_nan_high;
486615144b0fSOlivier Houchard         return z;
486715144b0fSOlivier Houchard     }
486815144b0fSOlivier Houchard     if ( aExp == 0 ) {
486915144b0fSOlivier Houchard         aExp = 1;
487015144b0fSOlivier Houchard         bExp = 1;
487115144b0fSOlivier Houchard     }
487215144b0fSOlivier Houchard     if ( bSig0 < aSig0 ) goto aBigger;
487315144b0fSOlivier Houchard     if ( aSig0 < bSig0 ) goto bBigger;
487415144b0fSOlivier Houchard     if ( bSig1 < aSig1 ) goto aBigger;
487515144b0fSOlivier Houchard     if ( aSig1 < bSig1 ) goto bBigger;
487615144b0fSOlivier Houchard     return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 );
487715144b0fSOlivier Houchard  bExpBigger:
487815144b0fSOlivier Houchard     if ( bExp == 0x7FFF ) {
487915144b0fSOlivier Houchard         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
488015144b0fSOlivier Houchard         return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
488115144b0fSOlivier Houchard     }
488215144b0fSOlivier Houchard     if ( aExp == 0 ) {
488315144b0fSOlivier Houchard         ++expDiff;
488415144b0fSOlivier Houchard     }
488515144b0fSOlivier Houchard     else {
488615144b0fSOlivier Houchard         aSig0 |= LIT64( 0x4000000000000000 );
488715144b0fSOlivier Houchard     }
488815144b0fSOlivier Houchard     shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
488915144b0fSOlivier Houchard     bSig0 |= LIT64( 0x4000000000000000 );
489015144b0fSOlivier Houchard  bBigger:
489115144b0fSOlivier Houchard     sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
489215144b0fSOlivier Houchard     zExp = bExp;
489315144b0fSOlivier Houchard     zSign ^= 1;
489415144b0fSOlivier Houchard     goto normalizeRoundAndPack;
489515144b0fSOlivier Houchard  aExpBigger:
489615144b0fSOlivier Houchard     if ( aExp == 0x7FFF ) {
489715144b0fSOlivier Houchard         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
489815144b0fSOlivier Houchard         return a;
489915144b0fSOlivier Houchard     }
490015144b0fSOlivier Houchard     if ( bExp == 0 ) {
490115144b0fSOlivier Houchard         --expDiff;
490215144b0fSOlivier Houchard     }
490315144b0fSOlivier Houchard     else {
490415144b0fSOlivier Houchard         bSig0 |= LIT64( 0x4000000000000000 );
490515144b0fSOlivier Houchard     }
490615144b0fSOlivier Houchard     shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
490715144b0fSOlivier Houchard     aSig0 |= LIT64( 0x4000000000000000 );
490815144b0fSOlivier Houchard  aBigger:
490915144b0fSOlivier Houchard     sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
491015144b0fSOlivier Houchard     zExp = aExp;
491115144b0fSOlivier Houchard  normalizeRoundAndPack:
491215144b0fSOlivier Houchard     --zExp;
491315144b0fSOlivier Houchard     return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
491415144b0fSOlivier Houchard 
491515144b0fSOlivier Houchard }
491615144b0fSOlivier Houchard 
491715144b0fSOlivier Houchard /*
491815144b0fSOlivier Houchard -------------------------------------------------------------------------------
491915144b0fSOlivier Houchard Returns the result of adding the quadruple-precision floating-point values
492015144b0fSOlivier Houchard `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
492115144b0fSOlivier Houchard for Binary Floating-Point Arithmetic.
492215144b0fSOlivier Houchard -------------------------------------------------------------------------------
492315144b0fSOlivier Houchard */
float128_add(float128 a,float128 b)492415144b0fSOlivier Houchard float128 float128_add( float128 a, float128 b )
492515144b0fSOlivier Houchard {
492615144b0fSOlivier Houchard     flag aSign, bSign;
492715144b0fSOlivier Houchard 
492815144b0fSOlivier Houchard     aSign = extractFloat128Sign( a );
492915144b0fSOlivier Houchard     bSign = extractFloat128Sign( b );
493015144b0fSOlivier Houchard     if ( aSign == bSign ) {
493115144b0fSOlivier Houchard         return addFloat128Sigs( a, b, aSign );
493215144b0fSOlivier Houchard     }
493315144b0fSOlivier Houchard     else {
493415144b0fSOlivier Houchard         return subFloat128Sigs( a, b, aSign );
493515144b0fSOlivier Houchard     }
493615144b0fSOlivier Houchard 
493715144b0fSOlivier Houchard }
493815144b0fSOlivier Houchard 
493915144b0fSOlivier Houchard /*
494015144b0fSOlivier Houchard -------------------------------------------------------------------------------
494115144b0fSOlivier Houchard Returns the result of subtracting the quadruple-precision floating-point
494215144b0fSOlivier Houchard values `a' and `b'.  The operation is performed according to the IEC/IEEE
494315144b0fSOlivier Houchard Standard for Binary Floating-Point Arithmetic.
494415144b0fSOlivier Houchard -------------------------------------------------------------------------------
494515144b0fSOlivier Houchard */
float128_sub(float128 a,float128 b)494615144b0fSOlivier Houchard float128 float128_sub( float128 a, float128 b )
494715144b0fSOlivier Houchard {
494815144b0fSOlivier Houchard     flag aSign, bSign;
494915144b0fSOlivier Houchard 
495015144b0fSOlivier Houchard     aSign = extractFloat128Sign( a );
495115144b0fSOlivier Houchard     bSign = extractFloat128Sign( b );
495215144b0fSOlivier Houchard     if ( aSign == bSign ) {
495315144b0fSOlivier Houchard         return subFloat128Sigs( a, b, aSign );
495415144b0fSOlivier Houchard     }
495515144b0fSOlivier Houchard     else {
495615144b0fSOlivier Houchard         return addFloat128Sigs( a, b, aSign );
495715144b0fSOlivier Houchard     }
495815144b0fSOlivier Houchard 
495915144b0fSOlivier Houchard }
496015144b0fSOlivier Houchard 
496115144b0fSOlivier Houchard /*
496215144b0fSOlivier Houchard -------------------------------------------------------------------------------
496315144b0fSOlivier Houchard Returns the result of multiplying the quadruple-precision floating-point
496415144b0fSOlivier Houchard values `a' and `b'.  The operation is performed according to the IEC/IEEE
496515144b0fSOlivier Houchard Standard for Binary Floating-Point Arithmetic.
496615144b0fSOlivier Houchard -------------------------------------------------------------------------------
496715144b0fSOlivier Houchard */
float128_mul(float128 a,float128 b)496815144b0fSOlivier Houchard float128 float128_mul( float128 a, float128 b )
496915144b0fSOlivier Houchard {
497015144b0fSOlivier Houchard     flag aSign, bSign, zSign;
497115144b0fSOlivier Houchard     int32 aExp, bExp, zExp;
497215144b0fSOlivier Houchard     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
497315144b0fSOlivier Houchard     float128 z;
497415144b0fSOlivier Houchard 
497515144b0fSOlivier Houchard     aSig1 = extractFloat128Frac1( a );
497615144b0fSOlivier Houchard     aSig0 = extractFloat128Frac0( a );
497715144b0fSOlivier Houchard     aExp = extractFloat128Exp( a );
497815144b0fSOlivier Houchard     aSign = extractFloat128Sign( a );
497915144b0fSOlivier Houchard     bSig1 = extractFloat128Frac1( b );
498015144b0fSOlivier Houchard     bSig0 = extractFloat128Frac0( b );
498115144b0fSOlivier Houchard     bExp = extractFloat128Exp( b );
498215144b0fSOlivier Houchard     bSign = extractFloat128Sign( b );
498315144b0fSOlivier Houchard     zSign = aSign ^ bSign;
498415144b0fSOlivier Houchard     if ( aExp == 0x7FFF ) {
498515144b0fSOlivier Houchard         if (    ( aSig0 | aSig1 )
498615144b0fSOlivier Houchard              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
498715144b0fSOlivier Houchard             return propagateFloat128NaN( a, b );
498815144b0fSOlivier Houchard         }
498915144b0fSOlivier Houchard         if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
499015144b0fSOlivier Houchard         return packFloat128( zSign, 0x7FFF, 0, 0 );
499115144b0fSOlivier Houchard     }
499215144b0fSOlivier Houchard     if ( bExp == 0x7FFF ) {
499315144b0fSOlivier Houchard         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
499415144b0fSOlivier Houchard         if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
499515144b0fSOlivier Houchard  invalid:
499615144b0fSOlivier Houchard             float_raise( float_flag_invalid );
499715144b0fSOlivier Houchard             z.low = float128_default_nan_low;
499815144b0fSOlivier Houchard             z.high = float128_default_nan_high;
499915144b0fSOlivier Houchard             return z;
500015144b0fSOlivier Houchard         }
500115144b0fSOlivier Houchard         return packFloat128( zSign, 0x7FFF, 0, 0 );
500215144b0fSOlivier Houchard     }
500315144b0fSOlivier Houchard     if ( aExp == 0 ) {
500415144b0fSOlivier Houchard         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
500515144b0fSOlivier Houchard         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
500615144b0fSOlivier Houchard     }
500715144b0fSOlivier Houchard     if ( bExp == 0 ) {
500815144b0fSOlivier Houchard         if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
500915144b0fSOlivier Houchard         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
501015144b0fSOlivier Houchard     }
501115144b0fSOlivier Houchard     zExp = aExp + bExp - 0x4000;
501215144b0fSOlivier Houchard     aSig0 |= LIT64( 0x0001000000000000 );
501315144b0fSOlivier Houchard     shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
501415144b0fSOlivier Houchard     mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
501515144b0fSOlivier Houchard     add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
501615144b0fSOlivier Houchard     zSig2 |= ( zSig3 != 0 );
501715144b0fSOlivier Houchard     if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
501815144b0fSOlivier Houchard         shift128ExtraRightJamming(
501915144b0fSOlivier Houchard             zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
502015144b0fSOlivier Houchard         ++zExp;
502115144b0fSOlivier Houchard     }
502215144b0fSOlivier Houchard     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
502315144b0fSOlivier Houchard 
502415144b0fSOlivier Houchard }
502515144b0fSOlivier Houchard 
502615144b0fSOlivier Houchard /*
502715144b0fSOlivier Houchard -------------------------------------------------------------------------------
502815144b0fSOlivier Houchard Returns the result of dividing the quadruple-precision floating-point value
502915144b0fSOlivier Houchard `a' by the corresponding value `b'.  The operation is performed according to
503015144b0fSOlivier Houchard the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
503115144b0fSOlivier Houchard -------------------------------------------------------------------------------
503215144b0fSOlivier Houchard */
float128_div(float128 a,float128 b)503315144b0fSOlivier Houchard float128 float128_div( float128 a, float128 b )
503415144b0fSOlivier Houchard {
503515144b0fSOlivier Houchard     flag aSign, bSign, zSign;
503615144b0fSOlivier Houchard     int32 aExp, bExp, zExp;
503715144b0fSOlivier Houchard     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
503815144b0fSOlivier Houchard     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
503915144b0fSOlivier Houchard     float128 z;
504015144b0fSOlivier Houchard 
504115144b0fSOlivier Houchard     aSig1 = extractFloat128Frac1( a );
504215144b0fSOlivier Houchard     aSig0 = extractFloat128Frac0( a );
504315144b0fSOlivier Houchard     aExp = extractFloat128Exp( a );
504415144b0fSOlivier Houchard     aSign = extractFloat128Sign( a );
504515144b0fSOlivier Houchard     bSig1 = extractFloat128Frac1( b );
504615144b0fSOlivier Houchard     bSig0 = extractFloat128Frac0( b );
504715144b0fSOlivier Houchard     bExp = extractFloat128Exp( b );
504815144b0fSOlivier Houchard     bSign = extractFloat128Sign( b );
504915144b0fSOlivier Houchard     zSign = aSign ^ bSign;
505015144b0fSOlivier Houchard     if ( aExp == 0x7FFF ) {
505115144b0fSOlivier Houchard         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
505215144b0fSOlivier Houchard         if ( bExp == 0x7FFF ) {
505315144b0fSOlivier Houchard             if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
505415144b0fSOlivier Houchard             goto invalid;
505515144b0fSOlivier Houchard         }
505615144b0fSOlivier Houchard         return packFloat128( zSign, 0x7FFF, 0, 0 );
505715144b0fSOlivier Houchard     }
505815144b0fSOlivier Houchard     if ( bExp == 0x7FFF ) {
505915144b0fSOlivier Houchard         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
506015144b0fSOlivier Houchard         return packFloat128( zSign, 0, 0, 0 );
506115144b0fSOlivier Houchard     }
506215144b0fSOlivier Houchard     if ( bExp == 0 ) {
506315144b0fSOlivier Houchard         if ( ( bSig0 | bSig1 ) == 0 ) {
506415144b0fSOlivier Houchard             if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
506515144b0fSOlivier Houchard  invalid:
506615144b0fSOlivier Houchard                 float_raise( float_flag_invalid );
506715144b0fSOlivier Houchard                 z.low = float128_default_nan_low;
506815144b0fSOlivier Houchard                 z.high = float128_default_nan_high;
506915144b0fSOlivier Houchard                 return z;
507015144b0fSOlivier Houchard             }
507115144b0fSOlivier Houchard             float_raise( float_flag_divbyzero );
507215144b0fSOlivier Houchard             return packFloat128( zSign, 0x7FFF, 0, 0 );
507315144b0fSOlivier Houchard         }
507415144b0fSOlivier Houchard         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
507515144b0fSOlivier Houchard     }
507615144b0fSOlivier Houchard     if ( aExp == 0 ) {
507715144b0fSOlivier Houchard         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
507815144b0fSOlivier Houchard         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
507915144b0fSOlivier Houchard     }
508015144b0fSOlivier Houchard     zExp = aExp - bExp + 0x3FFD;
508115144b0fSOlivier Houchard     shortShift128Left(
508215144b0fSOlivier Houchard         aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
508315144b0fSOlivier Houchard     shortShift128Left(
508415144b0fSOlivier Houchard         bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
508515144b0fSOlivier Houchard     if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
508615144b0fSOlivier Houchard         shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
508715144b0fSOlivier Houchard         ++zExp;
508815144b0fSOlivier Houchard     }
508915144b0fSOlivier Houchard     zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
509015144b0fSOlivier Houchard     mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
509115144b0fSOlivier Houchard     sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
509215144b0fSOlivier Houchard     while ( (sbits64) rem0 < 0 ) {
509315144b0fSOlivier Houchard         --zSig0;
509415144b0fSOlivier Houchard         add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
509515144b0fSOlivier Houchard     }
509615144b0fSOlivier Houchard     zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
509715144b0fSOlivier Houchard     if ( ( zSig1 & 0x3FFF ) <= 4 ) {
509815144b0fSOlivier Houchard         mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
509915144b0fSOlivier Houchard         sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
510015144b0fSOlivier Houchard         while ( (sbits64) rem1 < 0 ) {
510115144b0fSOlivier Houchard             --zSig1;
510215144b0fSOlivier Houchard             add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
510315144b0fSOlivier Houchard         }
510415144b0fSOlivier Houchard         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
510515144b0fSOlivier Houchard     }
510615144b0fSOlivier Houchard     shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
510715144b0fSOlivier Houchard     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
510815144b0fSOlivier Houchard 
510915144b0fSOlivier Houchard }
511015144b0fSOlivier Houchard 
511115144b0fSOlivier Houchard /*
511215144b0fSOlivier Houchard -------------------------------------------------------------------------------
511315144b0fSOlivier Houchard Returns the remainder of the quadruple-precision floating-point value `a'
511415144b0fSOlivier Houchard with respect to the corresponding value `b'.  The operation is performed
511515144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
511615144b0fSOlivier Houchard -------------------------------------------------------------------------------
511715144b0fSOlivier Houchard */
float128_rem(float128 a,float128 b)511815144b0fSOlivier Houchard float128 float128_rem( float128 a, float128 b )
511915144b0fSOlivier Houchard {
512015144b0fSOlivier Houchard     flag aSign, bSign, zSign;
512115144b0fSOlivier Houchard     int32 aExp, bExp, expDiff;
512215144b0fSOlivier Houchard     bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
512315144b0fSOlivier Houchard     bits64 allZero, alternateASig0, alternateASig1, sigMean1;
512415144b0fSOlivier Houchard     sbits64 sigMean0;
512515144b0fSOlivier Houchard     float128 z;
512615144b0fSOlivier Houchard 
512715144b0fSOlivier Houchard     aSig1 = extractFloat128Frac1( a );
512815144b0fSOlivier Houchard     aSig0 = extractFloat128Frac0( a );
512915144b0fSOlivier Houchard     aExp = extractFloat128Exp( a );
513015144b0fSOlivier Houchard     aSign = extractFloat128Sign( a );
513115144b0fSOlivier Houchard     bSig1 = extractFloat128Frac1( b );
513215144b0fSOlivier Houchard     bSig0 = extractFloat128Frac0( b );
513315144b0fSOlivier Houchard     bExp = extractFloat128Exp( b );
513415144b0fSOlivier Houchard     bSign = extractFloat128Sign( b );
513515144b0fSOlivier Houchard     if ( aExp == 0x7FFF ) {
513615144b0fSOlivier Houchard         if (    ( aSig0 | aSig1 )
513715144b0fSOlivier Houchard              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
513815144b0fSOlivier Houchard             return propagateFloat128NaN( a, b );
513915144b0fSOlivier Houchard         }
514015144b0fSOlivier Houchard         goto invalid;
514115144b0fSOlivier Houchard     }
514215144b0fSOlivier Houchard     if ( bExp == 0x7FFF ) {
514315144b0fSOlivier Houchard         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
514415144b0fSOlivier Houchard         return a;
514515144b0fSOlivier Houchard     }
514615144b0fSOlivier Houchard     if ( bExp == 0 ) {
514715144b0fSOlivier Houchard         if ( ( bSig0 | bSig1 ) == 0 ) {
514815144b0fSOlivier Houchard  invalid:
514915144b0fSOlivier Houchard             float_raise( float_flag_invalid );
515015144b0fSOlivier Houchard             z.low = float128_default_nan_low;
515115144b0fSOlivier Houchard             z.high = float128_default_nan_high;
515215144b0fSOlivier Houchard             return z;
515315144b0fSOlivier Houchard         }
515415144b0fSOlivier Houchard         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
515515144b0fSOlivier Houchard     }
515615144b0fSOlivier Houchard     if ( aExp == 0 ) {
515715144b0fSOlivier Houchard         if ( ( aSig0 | aSig1 ) == 0 ) return a;
515815144b0fSOlivier Houchard         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
515915144b0fSOlivier Houchard     }
516015144b0fSOlivier Houchard     expDiff = aExp - bExp;
516115144b0fSOlivier Houchard     if ( expDiff < -1 ) return a;
516215144b0fSOlivier Houchard     shortShift128Left(
516315144b0fSOlivier Houchard         aSig0 | LIT64( 0x0001000000000000 ),
516415144b0fSOlivier Houchard         aSig1,
516515144b0fSOlivier Houchard         15 - ( expDiff < 0 ),
516615144b0fSOlivier Houchard         &aSig0,
516715144b0fSOlivier Houchard         &aSig1
516815144b0fSOlivier Houchard     );
516915144b0fSOlivier Houchard     shortShift128Left(
517015144b0fSOlivier Houchard         bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
517115144b0fSOlivier Houchard     q = le128( bSig0, bSig1, aSig0, aSig1 );
517215144b0fSOlivier Houchard     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
517315144b0fSOlivier Houchard     expDiff -= 64;
517415144b0fSOlivier Houchard     while ( 0 < expDiff ) {
517515144b0fSOlivier Houchard         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
517615144b0fSOlivier Houchard         q = ( 4 < q ) ? q - 4 : 0;
517715144b0fSOlivier Houchard         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
517815144b0fSOlivier Houchard         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
517915144b0fSOlivier Houchard         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
518015144b0fSOlivier Houchard         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
518115144b0fSOlivier Houchard         expDiff -= 61;
518215144b0fSOlivier Houchard     }
518315144b0fSOlivier Houchard     if ( -64 < expDiff ) {
518415144b0fSOlivier Houchard         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
518515144b0fSOlivier Houchard         q = ( 4 < q ) ? q - 4 : 0;
518615144b0fSOlivier Houchard         q >>= - expDiff;
518715144b0fSOlivier Houchard         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
518815144b0fSOlivier Houchard         expDiff += 52;
518915144b0fSOlivier Houchard         if ( expDiff < 0 ) {
519015144b0fSOlivier Houchard             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
519115144b0fSOlivier Houchard         }
519215144b0fSOlivier Houchard         else {
519315144b0fSOlivier Houchard             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
519415144b0fSOlivier Houchard         }
519515144b0fSOlivier Houchard         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
519615144b0fSOlivier Houchard         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
519715144b0fSOlivier Houchard     }
519815144b0fSOlivier Houchard     else {
519915144b0fSOlivier Houchard         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
520015144b0fSOlivier Houchard         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
520115144b0fSOlivier Houchard     }
520215144b0fSOlivier Houchard     do {
520315144b0fSOlivier Houchard         alternateASig0 = aSig0;
520415144b0fSOlivier Houchard         alternateASig1 = aSig1;
520515144b0fSOlivier Houchard         ++q;
520615144b0fSOlivier Houchard         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
520715144b0fSOlivier Houchard     } while ( 0 <= (sbits64) aSig0 );
520815144b0fSOlivier Houchard     add128(
5209c36abe0dSDavid Schultz         aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
521015144b0fSOlivier Houchard     if (    ( sigMean0 < 0 )
521115144b0fSOlivier Houchard          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
521215144b0fSOlivier Houchard         aSig0 = alternateASig0;
521315144b0fSOlivier Houchard         aSig1 = alternateASig1;
521415144b0fSOlivier Houchard     }
521515144b0fSOlivier Houchard     zSign = ( (sbits64) aSig0 < 0 );
521615144b0fSOlivier Houchard     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
521715144b0fSOlivier Houchard     return
521815144b0fSOlivier Houchard         normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
521915144b0fSOlivier Houchard 
522015144b0fSOlivier Houchard }
522115144b0fSOlivier Houchard 
522215144b0fSOlivier Houchard /*
522315144b0fSOlivier Houchard -------------------------------------------------------------------------------
522415144b0fSOlivier Houchard Returns the square root of the quadruple-precision floating-point value `a'.
522515144b0fSOlivier Houchard The operation is performed according to the IEC/IEEE Standard for Binary
522615144b0fSOlivier Houchard Floating-Point Arithmetic.
522715144b0fSOlivier Houchard -------------------------------------------------------------------------------
522815144b0fSOlivier Houchard */
float128_sqrt(float128 a)522915144b0fSOlivier Houchard float128 float128_sqrt( float128 a )
523015144b0fSOlivier Houchard {
523115144b0fSOlivier Houchard     flag aSign;
523215144b0fSOlivier Houchard     int32 aExp, zExp;
523315144b0fSOlivier Houchard     bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
523415144b0fSOlivier Houchard     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
523515144b0fSOlivier Houchard     float128 z;
523615144b0fSOlivier Houchard 
523715144b0fSOlivier Houchard     aSig1 = extractFloat128Frac1( a );
523815144b0fSOlivier Houchard     aSig0 = extractFloat128Frac0( a );
523915144b0fSOlivier Houchard     aExp = extractFloat128Exp( a );
524015144b0fSOlivier Houchard     aSign = extractFloat128Sign( a );
524115144b0fSOlivier Houchard     if ( aExp == 0x7FFF ) {
524215144b0fSOlivier Houchard         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
524315144b0fSOlivier Houchard         if ( ! aSign ) return a;
524415144b0fSOlivier Houchard         goto invalid;
524515144b0fSOlivier Houchard     }
524615144b0fSOlivier Houchard     if ( aSign ) {
524715144b0fSOlivier Houchard         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
524815144b0fSOlivier Houchard  invalid:
524915144b0fSOlivier Houchard         float_raise( float_flag_invalid );
525015144b0fSOlivier Houchard         z.low = float128_default_nan_low;
525115144b0fSOlivier Houchard         z.high = float128_default_nan_high;
525215144b0fSOlivier Houchard         return z;
525315144b0fSOlivier Houchard     }
525415144b0fSOlivier Houchard     if ( aExp == 0 ) {
525515144b0fSOlivier Houchard         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
525615144b0fSOlivier Houchard         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
525715144b0fSOlivier Houchard     }
525815144b0fSOlivier Houchard     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
525915144b0fSOlivier Houchard     aSig0 |= LIT64( 0x0001000000000000 );
526015144b0fSOlivier Houchard     zSig0 = estimateSqrt32( aExp, aSig0>>17 );
526115144b0fSOlivier Houchard     shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
526215144b0fSOlivier Houchard     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
526315144b0fSOlivier Houchard     doubleZSig0 = zSig0<<1;
526415144b0fSOlivier Houchard     mul64To128( zSig0, zSig0, &term0, &term1 );
526515144b0fSOlivier Houchard     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
526615144b0fSOlivier Houchard     while ( (sbits64) rem0 < 0 ) {
526715144b0fSOlivier Houchard         --zSig0;
526815144b0fSOlivier Houchard         doubleZSig0 -= 2;
526915144b0fSOlivier Houchard         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
527015144b0fSOlivier Houchard     }
527115144b0fSOlivier Houchard     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
527215144b0fSOlivier Houchard     if ( ( zSig1 & 0x1FFF ) <= 5 ) {
527315144b0fSOlivier Houchard         if ( zSig1 == 0 ) zSig1 = 1;
527415144b0fSOlivier Houchard         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
527515144b0fSOlivier Houchard         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
527615144b0fSOlivier Houchard         mul64To128( zSig1, zSig1, &term2, &term3 );
527715144b0fSOlivier Houchard         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
527815144b0fSOlivier Houchard         while ( (sbits64) rem1 < 0 ) {
527915144b0fSOlivier Houchard             --zSig1;
528015144b0fSOlivier Houchard             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
528115144b0fSOlivier Houchard             term3 |= 1;
528215144b0fSOlivier Houchard             term2 |= doubleZSig0;
528315144b0fSOlivier Houchard             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
528415144b0fSOlivier Houchard         }
528515144b0fSOlivier Houchard         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
528615144b0fSOlivier Houchard     }
528715144b0fSOlivier Houchard     shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
528815144b0fSOlivier Houchard     return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
528915144b0fSOlivier Houchard 
529015144b0fSOlivier Houchard }
529115144b0fSOlivier Houchard 
529215144b0fSOlivier Houchard /*
529315144b0fSOlivier Houchard -------------------------------------------------------------------------------
529415144b0fSOlivier Houchard Returns 1 if the quadruple-precision floating-point value `a' is equal to
529515144b0fSOlivier Houchard the corresponding value `b', and 0 otherwise.  The comparison is performed
529615144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
529715144b0fSOlivier Houchard -------------------------------------------------------------------------------
529815144b0fSOlivier Houchard */
float128_eq(float128 a,float128 b)529915144b0fSOlivier Houchard flag float128_eq( float128 a, float128 b )
530015144b0fSOlivier Houchard {
530115144b0fSOlivier Houchard 
530215144b0fSOlivier Houchard     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
530315144b0fSOlivier Houchard               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
530415144b0fSOlivier Houchard          || (    ( extractFloat128Exp( b ) == 0x7FFF )
530515144b0fSOlivier Houchard               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
530615144b0fSOlivier Houchard        ) {
530715144b0fSOlivier Houchard         if (    float128_is_signaling_nan( a )
530815144b0fSOlivier Houchard              || float128_is_signaling_nan( b ) ) {
530915144b0fSOlivier Houchard             float_raise( float_flag_invalid );
531015144b0fSOlivier Houchard         }
531115144b0fSOlivier Houchard         return 0;
531215144b0fSOlivier Houchard     }
531315144b0fSOlivier Houchard     return
531415144b0fSOlivier Houchard            ( a.low == b.low )
531515144b0fSOlivier Houchard         && (    ( a.high == b.high )
531615144b0fSOlivier Houchard              || (    ( a.low == 0 )
531715144b0fSOlivier Houchard                   && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
531815144b0fSOlivier Houchard            );
531915144b0fSOlivier Houchard 
532015144b0fSOlivier Houchard }
532115144b0fSOlivier Houchard 
532215144b0fSOlivier Houchard /*
532315144b0fSOlivier Houchard -------------------------------------------------------------------------------
532415144b0fSOlivier Houchard Returns 1 if the quadruple-precision floating-point value `a' is less than
532515144b0fSOlivier Houchard or equal to the corresponding value `b', and 0 otherwise.  The comparison
532615144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
532715144b0fSOlivier Houchard Arithmetic.
532815144b0fSOlivier Houchard -------------------------------------------------------------------------------
532915144b0fSOlivier Houchard */
float128_le(float128 a,float128 b)533015144b0fSOlivier Houchard flag float128_le( float128 a, float128 b )
533115144b0fSOlivier Houchard {
533215144b0fSOlivier Houchard     flag aSign, bSign;
533315144b0fSOlivier Houchard 
533415144b0fSOlivier Houchard     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
533515144b0fSOlivier Houchard               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
533615144b0fSOlivier Houchard          || (    ( extractFloat128Exp( b ) == 0x7FFF )
533715144b0fSOlivier Houchard               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
533815144b0fSOlivier Houchard        ) {
533915144b0fSOlivier Houchard         float_raise( float_flag_invalid );
534015144b0fSOlivier Houchard         return 0;
534115144b0fSOlivier Houchard     }
534215144b0fSOlivier Houchard     aSign = extractFloat128Sign( a );
534315144b0fSOlivier Houchard     bSign = extractFloat128Sign( b );
534415144b0fSOlivier Houchard     if ( aSign != bSign ) {
534515144b0fSOlivier Houchard         return
534615144b0fSOlivier Houchard                aSign
534715144b0fSOlivier Houchard             || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
534815144b0fSOlivier Houchard                  == 0 );
534915144b0fSOlivier Houchard     }
535015144b0fSOlivier Houchard     return
535115144b0fSOlivier Houchard           aSign ? le128( b.high, b.low, a.high, a.low )
535215144b0fSOlivier Houchard         : le128( a.high, a.low, b.high, b.low );
535315144b0fSOlivier Houchard 
535415144b0fSOlivier Houchard }
535515144b0fSOlivier Houchard 
535615144b0fSOlivier Houchard /*
535715144b0fSOlivier Houchard -------------------------------------------------------------------------------
535815144b0fSOlivier Houchard Returns 1 if the quadruple-precision floating-point value `a' is less than
535915144b0fSOlivier Houchard the corresponding value `b', and 0 otherwise.  The comparison is performed
536015144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
536115144b0fSOlivier Houchard -------------------------------------------------------------------------------
536215144b0fSOlivier Houchard */
float128_lt(float128 a,float128 b)536315144b0fSOlivier Houchard flag float128_lt( float128 a, float128 b )
536415144b0fSOlivier Houchard {
536515144b0fSOlivier Houchard     flag aSign, bSign;
536615144b0fSOlivier Houchard 
536715144b0fSOlivier Houchard     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
536815144b0fSOlivier Houchard               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
536915144b0fSOlivier Houchard          || (    ( extractFloat128Exp( b ) == 0x7FFF )
537015144b0fSOlivier Houchard               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
537115144b0fSOlivier Houchard        ) {
537215144b0fSOlivier Houchard         float_raise( float_flag_invalid );
537315144b0fSOlivier Houchard         return 0;
537415144b0fSOlivier Houchard     }
537515144b0fSOlivier Houchard     aSign = extractFloat128Sign( a );
537615144b0fSOlivier Houchard     bSign = extractFloat128Sign( b );
537715144b0fSOlivier Houchard     if ( aSign != bSign ) {
537815144b0fSOlivier Houchard         return
537915144b0fSOlivier Houchard                aSign
538015144b0fSOlivier Houchard             && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
538115144b0fSOlivier Houchard                  != 0 );
538215144b0fSOlivier Houchard     }
538315144b0fSOlivier Houchard     return
538415144b0fSOlivier Houchard           aSign ? lt128( b.high, b.low, a.high, a.low )
538515144b0fSOlivier Houchard         : lt128( a.high, a.low, b.high, b.low );
538615144b0fSOlivier Houchard 
538715144b0fSOlivier Houchard }
538815144b0fSOlivier Houchard 
538915144b0fSOlivier Houchard /*
539015144b0fSOlivier Houchard -------------------------------------------------------------------------------
539115144b0fSOlivier Houchard Returns 1 if the quadruple-precision floating-point value `a' is equal to
539215144b0fSOlivier Houchard the corresponding value `b', and 0 otherwise.  The invalid exception is
539315144b0fSOlivier Houchard raised if either operand is a NaN.  Otherwise, the comparison is performed
539415144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
539515144b0fSOlivier Houchard -------------------------------------------------------------------------------
539615144b0fSOlivier Houchard */
float128_eq_signaling(float128 a,float128 b)539715144b0fSOlivier Houchard flag float128_eq_signaling( float128 a, float128 b )
539815144b0fSOlivier Houchard {
539915144b0fSOlivier Houchard 
540015144b0fSOlivier Houchard     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
540115144b0fSOlivier Houchard               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
540215144b0fSOlivier Houchard          || (    ( extractFloat128Exp( b ) == 0x7FFF )
540315144b0fSOlivier Houchard               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
540415144b0fSOlivier Houchard        ) {
540515144b0fSOlivier Houchard         float_raise( float_flag_invalid );
540615144b0fSOlivier Houchard         return 0;
540715144b0fSOlivier Houchard     }
540815144b0fSOlivier Houchard     return
540915144b0fSOlivier Houchard            ( a.low == b.low )
541015144b0fSOlivier Houchard         && (    ( a.high == b.high )
541115144b0fSOlivier Houchard              || (    ( a.low == 0 )
541215144b0fSOlivier Houchard                   && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
541315144b0fSOlivier Houchard            );
541415144b0fSOlivier Houchard 
541515144b0fSOlivier Houchard }
541615144b0fSOlivier Houchard 
541715144b0fSOlivier Houchard /*
541815144b0fSOlivier Houchard -------------------------------------------------------------------------------
541915144b0fSOlivier Houchard Returns 1 if the quadruple-precision floating-point value `a' is less than
542015144b0fSOlivier Houchard or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
542115144b0fSOlivier Houchard cause an exception.  Otherwise, the comparison is performed according to the
542215144b0fSOlivier Houchard IEC/IEEE Standard for Binary Floating-Point Arithmetic.
542315144b0fSOlivier Houchard -------------------------------------------------------------------------------
542415144b0fSOlivier Houchard */
float128_le_quiet(float128 a,float128 b)542515144b0fSOlivier Houchard flag float128_le_quiet( float128 a, float128 b )
542615144b0fSOlivier Houchard {
542715144b0fSOlivier Houchard     flag aSign, bSign;
542815144b0fSOlivier Houchard 
542915144b0fSOlivier Houchard     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
543015144b0fSOlivier Houchard               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
543115144b0fSOlivier Houchard          || (    ( extractFloat128Exp( b ) == 0x7FFF )
543215144b0fSOlivier Houchard               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
543315144b0fSOlivier Houchard        ) {
543415144b0fSOlivier Houchard         if (    float128_is_signaling_nan( a )
543515144b0fSOlivier Houchard              || float128_is_signaling_nan( b ) ) {
543615144b0fSOlivier Houchard             float_raise( float_flag_invalid );
543715144b0fSOlivier Houchard         }
543815144b0fSOlivier Houchard         return 0;
543915144b0fSOlivier Houchard     }
544015144b0fSOlivier Houchard     aSign = extractFloat128Sign( a );
544115144b0fSOlivier Houchard     bSign = extractFloat128Sign( b );
544215144b0fSOlivier Houchard     if ( aSign != bSign ) {
544315144b0fSOlivier Houchard         return
544415144b0fSOlivier Houchard                aSign
544515144b0fSOlivier Houchard             || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
544615144b0fSOlivier Houchard                  == 0 );
544715144b0fSOlivier Houchard     }
544815144b0fSOlivier Houchard     return
544915144b0fSOlivier Houchard           aSign ? le128( b.high, b.low, a.high, a.low )
545015144b0fSOlivier Houchard         : le128( a.high, a.low, b.high, b.low );
545115144b0fSOlivier Houchard 
545215144b0fSOlivier Houchard }
545315144b0fSOlivier Houchard 
545415144b0fSOlivier Houchard /*
545515144b0fSOlivier Houchard -------------------------------------------------------------------------------
545615144b0fSOlivier Houchard Returns 1 if the quadruple-precision floating-point value `a' is less than
545715144b0fSOlivier Houchard the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
545815144b0fSOlivier Houchard exception.  Otherwise, the comparison is performed according to the IEC/IEEE
545915144b0fSOlivier Houchard Standard for Binary Floating-Point Arithmetic.
546015144b0fSOlivier Houchard -------------------------------------------------------------------------------
546115144b0fSOlivier Houchard */
float128_lt_quiet(float128 a,float128 b)546215144b0fSOlivier Houchard flag float128_lt_quiet( float128 a, float128 b )
546315144b0fSOlivier Houchard {
546415144b0fSOlivier Houchard     flag aSign, bSign;
546515144b0fSOlivier Houchard 
546615144b0fSOlivier Houchard     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
546715144b0fSOlivier Houchard               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
546815144b0fSOlivier Houchard          || (    ( extractFloat128Exp( b ) == 0x7FFF )
546915144b0fSOlivier Houchard               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
547015144b0fSOlivier Houchard        ) {
547115144b0fSOlivier Houchard         if (    float128_is_signaling_nan( a )
547215144b0fSOlivier Houchard              || float128_is_signaling_nan( b ) ) {
547315144b0fSOlivier Houchard             float_raise( float_flag_invalid );
547415144b0fSOlivier Houchard         }
547515144b0fSOlivier Houchard         return 0;
547615144b0fSOlivier Houchard     }
547715144b0fSOlivier Houchard     aSign = extractFloat128Sign( a );
547815144b0fSOlivier Houchard     bSign = extractFloat128Sign( b );
547915144b0fSOlivier Houchard     if ( aSign != bSign ) {
548015144b0fSOlivier Houchard         return
548115144b0fSOlivier Houchard                aSign
548215144b0fSOlivier Houchard             && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
548315144b0fSOlivier Houchard                  != 0 );
548415144b0fSOlivier Houchard     }
548515144b0fSOlivier Houchard     return
548615144b0fSOlivier Houchard           aSign ? lt128( b.high, b.low, a.high, a.low )
548715144b0fSOlivier Houchard         : lt128( a.high, a.low, b.high, b.low );
548815144b0fSOlivier Houchard 
548915144b0fSOlivier Houchard }
549015144b0fSOlivier Houchard 
549115144b0fSOlivier Houchard #endif
549215144b0fSOlivier Houchard 
549315144b0fSOlivier Houchard 
549415144b0fSOlivier Houchard #if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS)
549515144b0fSOlivier Houchard 
549615144b0fSOlivier Houchard /*
549715144b0fSOlivier Houchard  * These two routines are not part of the original softfloat distribution.
549815144b0fSOlivier Houchard  *
549915144b0fSOlivier Houchard  * They are based on the corresponding conversions to integer but return
550015144b0fSOlivier Houchard  * unsigned numbers instead since these functions are required by GCC.
550115144b0fSOlivier Houchard  *
550215144b0fSOlivier Houchard  * Added by Mark Brinicombe <mark@NetBSD.org>	27/09/97
550315144b0fSOlivier Houchard  *
550415144b0fSOlivier Houchard  * float64 version overhauled for SoftFloat 2a [bjh21 2000-07-15]
550515144b0fSOlivier Houchard  */
550615144b0fSOlivier Houchard 
550715144b0fSOlivier Houchard /*
550815144b0fSOlivier Houchard -------------------------------------------------------------------------------
550915144b0fSOlivier Houchard Returns the result of converting the double-precision floating-point value
551015144b0fSOlivier Houchard `a' to the 32-bit unsigned integer format.  The conversion is
551115144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-point
551215144b0fSOlivier Houchard Arithmetic, except that the conversion is always rounded toward zero.  If
551315144b0fSOlivier Houchard `a' is a NaN, the largest positive integer is returned.  If the conversion
551415144b0fSOlivier Houchard overflows, the largest integer positive is returned.
551515144b0fSOlivier Houchard -------------------------------------------------------------------------------
551615144b0fSOlivier Houchard */
float64_to_uint32_round_to_zero(float64 a)551715144b0fSOlivier Houchard uint32 float64_to_uint32_round_to_zero( float64 a )
551815144b0fSOlivier Houchard {
551915144b0fSOlivier Houchard     flag aSign;
552015144b0fSOlivier Houchard     int16 aExp, shiftCount;
552115144b0fSOlivier Houchard     bits64 aSig, savedASig;
552215144b0fSOlivier Houchard     uint32 z;
552315144b0fSOlivier Houchard 
552415144b0fSOlivier Houchard     aSig = extractFloat64Frac( a );
552515144b0fSOlivier Houchard     aExp = extractFloat64Exp( a );
552615144b0fSOlivier Houchard     aSign = extractFloat64Sign( a );
552715144b0fSOlivier Houchard 
552815144b0fSOlivier Houchard     if (aSign) {
552915144b0fSOlivier Houchard         float_raise( float_flag_invalid );
553015144b0fSOlivier Houchard     	return(0);
553115144b0fSOlivier Houchard     }
553215144b0fSOlivier Houchard 
553315144b0fSOlivier Houchard     if ( 0x41E < aExp ) {
553415144b0fSOlivier Houchard         float_raise( float_flag_invalid );
553515144b0fSOlivier Houchard         return 0xffffffff;
553615144b0fSOlivier Houchard     }
553715144b0fSOlivier Houchard     else if ( aExp < 0x3FF ) {
553815144b0fSOlivier Houchard         if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
553915144b0fSOlivier Houchard         return 0;
554015144b0fSOlivier Houchard     }
554115144b0fSOlivier Houchard     aSig |= LIT64( 0x0010000000000000 );
554215144b0fSOlivier Houchard     shiftCount = 0x433 - aExp;
554315144b0fSOlivier Houchard     savedASig = aSig;
554415144b0fSOlivier Houchard     aSig >>= shiftCount;
554515144b0fSOlivier Houchard     z = aSig;
554615144b0fSOlivier Houchard     if ( ( aSig<<shiftCount ) != savedASig ) {
554715144b0fSOlivier Houchard         float_exception_flags |= float_flag_inexact;
554815144b0fSOlivier Houchard     }
554915144b0fSOlivier Houchard     return z;
555015144b0fSOlivier Houchard 
555115144b0fSOlivier Houchard }
555215144b0fSOlivier Houchard 
555315144b0fSOlivier Houchard /*
555415144b0fSOlivier Houchard -------------------------------------------------------------------------------
555515144b0fSOlivier Houchard Returns the result of converting the single-precision floating-point value
555615144b0fSOlivier Houchard `a' to the 32-bit unsigned integer format.  The conversion is
555715144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-point
555815144b0fSOlivier Houchard Arithmetic, except that the conversion is always rounded toward zero.  If
555915144b0fSOlivier Houchard `a' is a NaN, the largest positive integer is returned.  If the conversion
556015144b0fSOlivier Houchard overflows, the largest positive integer is returned.
556115144b0fSOlivier Houchard -------------------------------------------------------------------------------
556215144b0fSOlivier Houchard */
float32_to_uint32_round_to_zero(float32 a)556315144b0fSOlivier Houchard uint32 float32_to_uint32_round_to_zero( float32 a )
556415144b0fSOlivier Houchard {
556515144b0fSOlivier Houchard     flag aSign;
556615144b0fSOlivier Houchard     int16 aExp, shiftCount;
556715144b0fSOlivier Houchard     bits32 aSig;
556815144b0fSOlivier Houchard     uint32 z;
556915144b0fSOlivier Houchard 
557015144b0fSOlivier Houchard     aSig = extractFloat32Frac( a );
557115144b0fSOlivier Houchard     aExp = extractFloat32Exp( a );
557215144b0fSOlivier Houchard     aSign = extractFloat32Sign( a );
557315144b0fSOlivier Houchard     shiftCount = aExp - 0x9E;
557415144b0fSOlivier Houchard 
557515144b0fSOlivier Houchard     if (aSign) {
557615144b0fSOlivier Houchard         float_raise( float_flag_invalid );
557715144b0fSOlivier Houchard     	return(0);
557815144b0fSOlivier Houchard     }
557915144b0fSOlivier Houchard     if ( 0 < shiftCount ) {
558015144b0fSOlivier Houchard         float_raise( float_flag_invalid );
558115144b0fSOlivier Houchard         return 0xFFFFFFFF;
558215144b0fSOlivier Houchard     }
558315144b0fSOlivier Houchard     else if ( aExp <= 0x7E ) {
558415144b0fSOlivier Houchard         if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
558515144b0fSOlivier Houchard         return 0;
558615144b0fSOlivier Houchard     }
558715144b0fSOlivier Houchard     aSig = ( aSig | 0x800000 )<<8;
558815144b0fSOlivier Houchard     z = aSig>>( - shiftCount );
558915144b0fSOlivier Houchard     if ( aSig<<( shiftCount & 31 ) ) {
559015144b0fSOlivier Houchard         float_exception_flags |= float_flag_inexact;
559115144b0fSOlivier Houchard     }
559215144b0fSOlivier Houchard     return z;
559315144b0fSOlivier Houchard 
559415144b0fSOlivier Houchard }
559515144b0fSOlivier Houchard 
559615144b0fSOlivier Houchard #endif
5597