xref: /freebsd/lib/libc/softfloat/bits32/softfloat.c (revision 559a218c9b257775fb249b67945fe4a05b7a6b9f)
1 /* $NetBSD: softfloat.c,v 1.1 2002/05/21 23:51:07 bjh21 Exp $ */
2 
3 /*
4  * This version hacked for use with gcc -msoft-float by bjh21.
5  * (Mostly a case of #ifdefing out things GCC doesn't need or provides
6  *  itself).
7  */
8 
9 /*
10  * Things you may want to define:
11  *
12  * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
13  *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
14  *   properly renamed.
15  */
16 
17 /*
18  * This differs from the standard bits32/softfloat.c in that float64
19  * is defined to be a 64-bit integer rather than a structure.  The
20  * structure is float64s, with translation between the two going via
21  * float64u.
22  */
23 
24 /*
25 ===============================================================================
26 
27 This C source file is part of the SoftFloat IEC/IEEE Floating-Point
28 Arithmetic Package, Release 2a.
29 
30 Written by John R. Hauser.  This work was made possible in part by the
31 International Computer Science Institute, located at Suite 600, 1947 Center
32 Street, Berkeley, California 94704.  Funding was partially provided by the
33 National Science Foundation under grant MIP-9311980.  The original version
34 of this code was written as part of a project to build a fixed-point vector
35 processor in collaboration with the University of California at Berkeley,
36 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
37 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
38 arithmetic/SoftFloat.html'.
39 
40 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
41 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
42 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
43 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
44 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
45 
46 Derivative works are acceptable, even for commercial purposes, so long as
47 (1) they include prominent notice that the work is derivative, and (2) they
48 include prominent notice akin to these four paragraphs for those parts of
49 this code that are retained.
50 
51 ===============================================================================
52 */
53 
54 #ifdef SOFTFLOAT_FOR_GCC
55 #include "softfloat-for-gcc.h"
56 #endif
57 
58 #include "milieu.h"
59 #include "softfloat.h"
60 
61 /*
62  * Conversions between floats as stored in memory and floats as
63  * SoftFloat uses them
64  */
65 #ifndef FLOAT64_DEMANGLE
66 #define FLOAT64_DEMANGLE(a)	(a)
67 #endif
68 #ifndef FLOAT64_MANGLE
69 #define FLOAT64_MANGLE(a)	(a)
70 #endif
71 
72 /*
73 -------------------------------------------------------------------------------
74 Floating-point rounding mode and exception flags.
75 -------------------------------------------------------------------------------
76 */
77 int float_rounding_mode = float_round_nearest_even;
78 int float_exception_flags = 0;
79 
80 /*
81 -------------------------------------------------------------------------------
82 Primitive arithmetic functions, including multi-word arithmetic, and
83 division and square root approximations.  (Can be specialized to target if
84 desired.)
85 -------------------------------------------------------------------------------
86 */
87 #include "softfloat-macros"
88 
89 /*
90 -------------------------------------------------------------------------------
91 Functions and definitions to determine:  (1) whether tininess for underflow
92 is detected before or after rounding by default, (2) what (if anything)
93 happens when exceptions are raised, (3) how signaling NaNs are distinguished
94 from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs
95 are propagated from function inputs to output.  These details are target-
96 specific.
97 -------------------------------------------------------------------------------
98 */
99 #include "softfloat-specialize"
100 
101 /*
102 -------------------------------------------------------------------------------
103 Returns the fraction bits of the single-precision floating-point value `a'.
104 -------------------------------------------------------------------------------
105 */
extractFloat32Frac(float32 a)106 INLINE bits32 extractFloat32Frac( float32 a )
107 {
108 
109     return a & 0x007FFFFF;
110 
111 }
112 
113 /*
114 -------------------------------------------------------------------------------
115 Returns the exponent bits of the single-precision floating-point value `a'.
116 -------------------------------------------------------------------------------
117 */
extractFloat32Exp(float32 a)118 INLINE int16 extractFloat32Exp( float32 a )
119 {
120 
121     return ( a>>23 ) & 0xFF;
122 
123 }
124 
125 /*
126 -------------------------------------------------------------------------------
127 Returns the sign bit of the single-precision floating-point value `a'.
128 -------------------------------------------------------------------------------
129 */
extractFloat32Sign(float32 a)130 INLINE flag extractFloat32Sign( float32 a )
131 {
132 
133     return a>>31;
134 
135 }
136 
137 /*
138 -------------------------------------------------------------------------------
139 Normalizes the subnormal single-precision floating-point value represented
140 by the denormalized significand `aSig'.  The normalized exponent and
141 significand are stored at the locations pointed to by `zExpPtr' and
142 `zSigPtr', respectively.
143 -------------------------------------------------------------------------------
144 */
145 static void
normalizeFloat32Subnormal(bits32 aSig,int16 * zExpPtr,bits32 * zSigPtr)146  normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
147 {
148     int8 shiftCount;
149 
150     shiftCount = countLeadingZeros32( aSig ) - 8;
151     *zSigPtr = aSig<<shiftCount;
152     *zExpPtr = 1 - shiftCount;
153 
154 }
155 
156 /*
157 -------------------------------------------------------------------------------
158 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
159 single-precision floating-point value, returning the result.  After being
160 shifted into the proper positions, the three fields are simply added
161 together to form the result.  This means that any integer portion of `zSig'
162 will be added into the exponent.  Since a properly normalized significand
163 will have an integer portion equal to 1, the `zExp' input should be 1 less
164 than the desired result exponent whenever `zSig' is a complete, normalized
165 significand.
166 -------------------------------------------------------------------------------
167 */
packFloat32(flag zSign,int16 zExp,bits32 zSig)168 INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
169 {
170 
171     return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
172 
173 }
174 
175 /*
176 -------------------------------------------------------------------------------
177 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
178 and significand `zSig', and returns the proper single-precision floating-
179 point value corresponding to the abstract input.  Ordinarily, the abstract
180 value is simply rounded and packed into the single-precision format, with
181 the inexact exception raised if the abstract input cannot be represented
182 exactly.  However, if the abstract value is too large, the overflow and
183 inexact exceptions are raised and an infinity or maximal finite value is
184 returned.  If the abstract value is too small, the input value is rounded to
185 a subnormal number, and the underflow and inexact exceptions are raised if
186 the abstract input cannot be represented exactly as a subnormal single-
187 precision floating-point number.
188     The input significand `zSig' has its binary point between bits 30
189 and 29, which is 7 bits to the left of the usual location.  This shifted
190 significand must be normalized or smaller.  If `zSig' is not normalized,
191 `zExp' must be 0; in that case, the result returned is a subnormal number,
192 and it must not require rounding.  In the usual case that `zSig' is
193 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
194 The handling of underflow and overflow follows the IEC/IEEE Standard for
195 Binary Floating-Point Arithmetic.
196 -------------------------------------------------------------------------------
197 */
roundAndPackFloat32(flag zSign,int16 zExp,bits32 zSig)198 static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
199 {
200     int8 roundingMode;
201     flag roundNearestEven;
202     int8 roundIncrement, roundBits;
203     flag isTiny;
204 
205     roundingMode = float_rounding_mode;
206     roundNearestEven = roundingMode == float_round_nearest_even;
207     roundIncrement = 0x40;
208     if ( ! roundNearestEven ) {
209         if ( roundingMode == float_round_to_zero ) {
210             roundIncrement = 0;
211         }
212         else {
213             roundIncrement = 0x7F;
214             if ( zSign ) {
215                 if ( roundingMode == float_round_up ) roundIncrement = 0;
216             }
217             else {
218                 if ( roundingMode == float_round_down ) roundIncrement = 0;
219             }
220         }
221     }
222     roundBits = zSig & 0x7F;
223     if ( 0xFD <= (bits16) zExp ) {
224         if (    ( 0xFD < zExp )
225              || (    ( zExp == 0xFD )
226                   && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
227            ) {
228             float_raise( float_flag_overflow | float_flag_inexact );
229             return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
230         }
231         if ( zExp < 0 ) {
232             isTiny =
233                    ( float_detect_tininess == float_tininess_before_rounding )
234                 || ( zExp < -1 )
235                 || ( zSig + roundIncrement < 0x80000000 );
236             shift32RightJamming( zSig, - zExp, &zSig );
237             zExp = 0;
238             roundBits = zSig & 0x7F;
239             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
240         }
241     }
242     if ( roundBits ) float_exception_flags |= float_flag_inexact;
243     zSig = ( zSig + roundIncrement )>>7;
244     zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
245     if ( zSig == 0 ) zExp = 0;
246     return packFloat32( zSign, zExp, zSig );
247 
248 }
249 
250 /*
251 -------------------------------------------------------------------------------
252 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
253 and significand `zSig', and returns the proper single-precision floating-
254 point value corresponding to the abstract input.  This routine is just like
255 `roundAndPackFloat32' except that `zSig' does not have to be normalized.
256 Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
257 floating-point exponent.
258 -------------------------------------------------------------------------------
259 */
260 static float32
normalizeRoundAndPackFloat32(flag zSign,int16 zExp,bits32 zSig)261  normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
262 {
263     int8 shiftCount;
264 
265     shiftCount = countLeadingZeros32( zSig ) - 1;
266     return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
267 
268 }
269 
270 /*
271 -------------------------------------------------------------------------------
272 Returns the least-significant 32 fraction bits of the double-precision
273 floating-point value `a'.
274 -------------------------------------------------------------------------------
275 */
extractFloat64Frac1(float64 a)276 INLINE bits32 extractFloat64Frac1( float64 a )
277 {
278 
279     return FLOAT64_DEMANGLE(a) & LIT64( 0x00000000FFFFFFFF );
280 
281 }
282 
283 /*
284 -------------------------------------------------------------------------------
285 Returns the most-significant 20 fraction bits of the double-precision
286 floating-point value `a'.
287 -------------------------------------------------------------------------------
288 */
extractFloat64Frac0(float64 a)289 INLINE bits32 extractFloat64Frac0( float64 a )
290 {
291 
292     return ( FLOAT64_DEMANGLE(a)>>32 ) & 0x000FFFFF;
293 
294 }
295 
296 /*
297 -------------------------------------------------------------------------------
298 Returns the exponent bits of the double-precision floating-point value `a'.
299 -------------------------------------------------------------------------------
300 */
extractFloat64Exp(float64 a)301 INLINE int16 extractFloat64Exp( float64 a )
302 {
303 
304     return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF;
305 
306 }
307 
308 /*
309 -------------------------------------------------------------------------------
310 Returns the sign bit of the double-precision floating-point value `a'.
311 -------------------------------------------------------------------------------
312 */
extractFloat64Sign(float64 a)313 INLINE flag extractFloat64Sign( float64 a )
314 {
315 
316     return FLOAT64_DEMANGLE(a)>>63;
317 
318 }
319 
320 /*
321 -------------------------------------------------------------------------------
322 Normalizes the subnormal double-precision floating-point value represented
323 by the denormalized significand formed by the concatenation of `aSig0' and
324 `aSig1'.  The normalized exponent is stored at the location pointed to by
325 `zExpPtr'.  The most significant 21 bits of the normalized significand are
326 stored at the location pointed to by `zSig0Ptr', and the least significant
327 32 bits of the normalized significand are stored at the location pointed to
328 by `zSig1Ptr'.
329 -------------------------------------------------------------------------------
330 */
331 static void
normalizeFloat64Subnormal(bits32 aSig0,bits32 aSig1,int16 * zExpPtr,bits32 * zSig0Ptr,bits32 * zSig1Ptr)332  normalizeFloat64Subnormal(
333      bits32 aSig0,
334      bits32 aSig1,
335      int16 *zExpPtr,
336      bits32 *zSig0Ptr,
337      bits32 *zSig1Ptr
338  )
339 {
340     int8 shiftCount;
341 
342     if ( aSig0 == 0 ) {
343         shiftCount = countLeadingZeros32( aSig1 ) - 11;
344         if ( shiftCount < 0 ) {
345             *zSig0Ptr = aSig1>>( - shiftCount );
346             *zSig1Ptr = aSig1<<( shiftCount & 31 );
347         }
348         else {
349             *zSig0Ptr = aSig1<<shiftCount;
350             *zSig1Ptr = 0;
351         }
352         *zExpPtr = - shiftCount - 31;
353     }
354     else {
355         shiftCount = countLeadingZeros32( aSig0 ) - 11;
356         shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
357         *zExpPtr = 1 - shiftCount;
358     }
359 
360 }
361 
362 /*
363 -------------------------------------------------------------------------------
364 Packs the sign `zSign', the exponent `zExp', and the significand formed by
365 the concatenation of `zSig0' and `zSig1' into a double-precision floating-
366 point value, returning the result.  After being shifted into the proper
367 positions, the three fields `zSign', `zExp', and `zSig0' are simply added
368 together to form the most significant 32 bits of the result.  This means
369 that any integer portion of `zSig0' will be added into the exponent.  Since
370 a properly normalized significand will have an integer portion equal to 1,
371 the `zExp' input should be 1 less than the desired result exponent whenever
372 `zSig0' and `zSig1' concatenated form a complete, normalized significand.
373 -------------------------------------------------------------------------------
374 */
375 INLINE float64
packFloat64(flag zSign,int16 zExp,bits32 zSig0,bits32 zSig1)376  packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
377 {
378 
379     return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
380 			   ( ( (bits64) zExp )<<52 ) +
381 			   ( ( (bits64) zSig0 )<<32 ) + zSig1 );
382 
383 
384 }
385 
386 /*
387 -------------------------------------------------------------------------------
388 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
389 and extended significand formed by the concatenation of `zSig0', `zSig1',
390 and `zSig2', and returns the proper double-precision floating-point value
391 corresponding to the abstract input.  Ordinarily, the abstract value is
392 simply rounded and packed into the double-precision format, with the inexact
393 exception raised if the abstract input cannot be represented exactly.
394 However, if the abstract value is too large, the overflow and inexact
395 exceptions are raised and an infinity or maximal finite value is returned.
396 If the abstract value is too small, the input value is rounded to a
397 subnormal number, and the underflow and inexact exceptions are raised if the
398 abstract input cannot be represented exactly as a subnormal double-precision
399 floating-point number.
400     The input significand must be normalized or smaller.  If the input
401 significand is not normalized, `zExp' must be 0; in that case, the result
402 returned is a subnormal number, and it must not require rounding.  In the
403 usual case that the input significand is normalized, `zExp' must be 1 less
404 than the ``true'' floating-point exponent.  The handling of underflow and
405 overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
406 -------------------------------------------------------------------------------
407 */
408 static float64
roundAndPackFloat64(flag zSign,int16 zExp,bits32 zSig0,bits32 zSig1,bits32 zSig2)409  roundAndPackFloat64(
410      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 )
411 {
412     int8 roundingMode;
413     flag roundNearestEven, increment, isTiny;
414 
415     roundingMode = float_rounding_mode;
416     roundNearestEven = ( roundingMode == float_round_nearest_even );
417     increment = ( (sbits32) zSig2 < 0 );
418     if ( ! roundNearestEven ) {
419         if ( roundingMode == float_round_to_zero ) {
420             increment = 0;
421         }
422         else {
423             if ( zSign ) {
424                 increment = ( roundingMode == float_round_down ) && zSig2;
425             }
426             else {
427                 increment = ( roundingMode == float_round_up ) && zSig2;
428             }
429         }
430     }
431     if ( 0x7FD <= (bits16) zExp ) {
432         if (    ( 0x7FD < zExp )
433              || (    ( zExp == 0x7FD )
434                   && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 )
435                   && increment
436                 )
437            ) {
438             float_raise( float_flag_overflow | float_flag_inexact );
439             if (    ( roundingMode == float_round_to_zero )
440                  || ( zSign && ( roundingMode == float_round_up ) )
441                  || ( ! zSign && ( roundingMode == float_round_down ) )
442                ) {
443                 return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF );
444             }
445             return packFloat64( zSign, 0x7FF, 0, 0 );
446         }
447         if ( zExp < 0 ) {
448             isTiny =
449                    ( float_detect_tininess == float_tininess_before_rounding )
450                 || ( zExp < -1 )
451                 || ! increment
452                 || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF );
453             shift64ExtraRightJamming(
454                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
455             zExp = 0;
456             if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
457             if ( roundNearestEven ) {
458                 increment = ( (sbits32) zSig2 < 0 );
459             }
460             else {
461                 if ( zSign ) {
462                     increment = ( roundingMode == float_round_down ) && zSig2;
463                 }
464                 else {
465                     increment = ( roundingMode == float_round_up ) && zSig2;
466                 }
467             }
468         }
469     }
470     if ( zSig2 ) float_exception_flags |= float_flag_inexact;
471     if ( increment ) {
472         add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
473         zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
474     }
475     else {
476         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
477     }
478     return packFloat64( zSign, zExp, zSig0, zSig1 );
479 
480 }
481 
482 /*
483 -------------------------------------------------------------------------------
484 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
485 and significand formed by the concatenation of `zSig0' and `zSig1', and
486 returns the proper double-precision floating-point value corresponding
487 to the abstract input.  This routine is just like `roundAndPackFloat64'
488 except that the input significand has fewer bits and does not have to be
489 normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
490 point exponent.
491 -------------------------------------------------------------------------------
492 */
493 static float64
normalizeRoundAndPackFloat64(flag zSign,int16 zExp,bits32 zSig0,bits32 zSig1)494  normalizeRoundAndPackFloat64(
495      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
496 {
497     int8 shiftCount;
498     bits32 zSig2;
499 
500     if ( zSig0 == 0 ) {
501         zSig0 = zSig1;
502         zSig1 = 0;
503         zExp -= 32;
504     }
505     shiftCount = countLeadingZeros32( zSig0 ) - 11;
506     if ( 0 <= shiftCount ) {
507         zSig2 = 0;
508         shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
509     }
510     else {
511         shift64ExtraRightJamming(
512             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
513     }
514     zExp -= shiftCount;
515     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
516 
517 }
518 
519 /*
520 -------------------------------------------------------------------------------
521 Returns the result of converting the 32-bit two's complement integer `a' to
522 the single-precision floating-point format.  The conversion is performed
523 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
524 -------------------------------------------------------------------------------
525 */
int32_to_float32(int32 a)526 float32 int32_to_float32( int32 a )
527 {
528     flag zSign;
529 
530     if ( a == 0 ) return 0;
531     if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
532     zSign = ( a < 0 );
533     return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
534 
535 }
536 
537 /*
538 -------------------------------------------------------------------------------
539 Returns the result of converting the 32-bit two's complement integer `a' to
540 the double-precision floating-point format.  The conversion is performed
541 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
542 -------------------------------------------------------------------------------
543 */
int32_to_float64(int32 a)544 float64 int32_to_float64( int32 a )
545 {
546     flag zSign;
547     bits32 absA;
548     int8 shiftCount;
549     bits32 zSig0, zSig1;
550 
551     if ( a == 0 ) return packFloat64( 0, 0, 0, 0 );
552     zSign = ( a < 0 );
553     absA = zSign ? - a : a;
554     shiftCount = countLeadingZeros32( absA ) - 11;
555     if ( 0 <= shiftCount ) {
556         zSig0 = absA<<shiftCount;
557         zSig1 = 0;
558     }
559     else {
560         shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 );
561     }
562     return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 );
563 
564 }
565 
566 #ifndef SOFTFLOAT_FOR_GCC
567 /*
568 -------------------------------------------------------------------------------
569 Returns the result of converting the single-precision floating-point value
570 `a' to the 32-bit two's complement integer format.  The conversion is
571 performed according to the IEC/IEEE Standard for Binary Floating-Point
572 Arithmetic---which means in particular that the conversion is rounded
573 according to the current rounding mode.  If `a' is a NaN, the largest
574 positive integer is returned.  Otherwise, if the conversion overflows, the
575 largest integer with the same sign as `a' is returned.
576 -------------------------------------------------------------------------------
577 */
float32_to_int32(float32 a)578 int32 float32_to_int32( float32 a )
579 {
580     flag aSign;
581     int16 aExp, shiftCount;
582     bits32 aSig, aSigExtra;
583     int32 z;
584     int8 roundingMode;
585 
586     aSig = extractFloat32Frac( a );
587     aExp = extractFloat32Exp( a );
588     aSign = extractFloat32Sign( a );
589     shiftCount = aExp - 0x96;
590     if ( 0 <= shiftCount ) {
591         if ( 0x9E <= aExp ) {
592             if ( a != 0xCF000000 ) {
593                 float_raise( float_flag_invalid );
594                 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
595                     return 0x7FFFFFFF;
596                 }
597             }
598             return (sbits32) 0x80000000;
599         }
600         z = ( aSig | 0x00800000 )<<shiftCount;
601         if ( aSign ) z = - z;
602     }
603     else {
604         if ( aExp < 0x7E ) {
605             aSigExtra = aExp | aSig;
606             z = 0;
607         }
608         else {
609             aSig |= 0x00800000;
610             aSigExtra = aSig<<( shiftCount & 31 );
611             z = aSig>>( - shiftCount );
612         }
613         if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
614         roundingMode = float_rounding_mode;
615         if ( roundingMode == float_round_nearest_even ) {
616             if ( (sbits32) aSigExtra < 0 ) {
617                 ++z;
618                 if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1;
619             }
620             if ( aSign ) z = - z;
621         }
622         else {
623             aSigExtra = ( aSigExtra != 0 );
624             if ( aSign ) {
625                 z += ( roundingMode == float_round_down ) & aSigExtra;
626                 z = - z;
627             }
628             else {
629                 z += ( roundingMode == float_round_up ) & aSigExtra;
630             }
631         }
632     }
633     return z;
634 
635 }
636 #endif
637 
638 /*
639 -------------------------------------------------------------------------------
640 Returns the result of converting the single-precision floating-point value
641 `a' to the 32-bit two's complement integer format.  The conversion is
642 performed according to the IEC/IEEE Standard for Binary Floating-Point
643 Arithmetic, except that the conversion is always rounded toward zero.
644 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
645 the conversion overflows, the largest integer with the same sign as `a' is
646 returned.
647 -------------------------------------------------------------------------------
648 */
float32_to_int32_round_to_zero(float32 a)649 int32 float32_to_int32_round_to_zero( float32 a )
650 {
651     flag aSign;
652     int16 aExp, shiftCount;
653     bits32 aSig;
654     int32 z;
655 
656     aSig = extractFloat32Frac( a );
657     aExp = extractFloat32Exp( a );
658     aSign = extractFloat32Sign( a );
659     shiftCount = aExp - 0x9E;
660     if ( 0 <= shiftCount ) {
661         if ( a != 0xCF000000 ) {
662             float_raise( float_flag_invalid );
663             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
664         }
665         return (sbits32) 0x80000000;
666     }
667     else if ( aExp <= 0x7E ) {
668         if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
669         return 0;
670     }
671     aSig = ( aSig | 0x00800000 )<<8;
672     z = aSig>>( - shiftCount );
673     if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
674         float_exception_flags |= float_flag_inexact;
675     }
676     if ( aSign ) z = - z;
677     return z;
678 
679 }
680 
681 /*
682 -------------------------------------------------------------------------------
683 Returns the result of converting the single-precision floating-point value
684 `a' to the double-precision floating-point format.  The conversion is
685 performed according to the IEC/IEEE Standard for Binary Floating-Point
686 Arithmetic.
687 -------------------------------------------------------------------------------
688 */
float32_to_float64(float32 a)689 float64 float32_to_float64( float32 a )
690 {
691     flag aSign;
692     int16 aExp;
693     bits32 aSig, zSig0, zSig1;
694 
695     aSig = extractFloat32Frac( a );
696     aExp = extractFloat32Exp( a );
697     aSign = extractFloat32Sign( a );
698     if ( aExp == 0xFF ) {
699         if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
700         return packFloat64( aSign, 0x7FF, 0, 0 );
701     }
702     if ( aExp == 0 ) {
703         if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 );
704         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
705         --aExp;
706     }
707     shift64Right( aSig, 0, 3, &zSig0, &zSig1 );
708     return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 );
709 
710 }
711 
712 #ifndef SOFTFLOAT_FOR_GCC
713 /*
714 -------------------------------------------------------------------------------
715 Rounds the single-precision floating-point value `a' to an integer,
716 and returns the result as a single-precision floating-point value.  The
717 operation is performed according to the IEC/IEEE Standard for Binary
718 Floating-Point Arithmetic.
719 -------------------------------------------------------------------------------
720 */
float32_round_to_int(float32 a)721 float32 float32_round_to_int( float32 a )
722 {
723     flag aSign;
724     int16 aExp;
725     bits32 lastBitMask, roundBitsMask;
726     int8 roundingMode;
727     float32 z;
728 
729     aExp = extractFloat32Exp( a );
730     if ( 0x96 <= aExp ) {
731         if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
732             return propagateFloat32NaN( a, a );
733         }
734         return a;
735     }
736     if ( aExp <= 0x7E ) {
737         if ( (bits32) ( a<<1 ) == 0 ) return a;
738         float_exception_flags |= float_flag_inexact;
739         aSign = extractFloat32Sign( a );
740         switch ( float_rounding_mode ) {
741          case float_round_nearest_even:
742             if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
743                 return packFloat32( aSign, 0x7F, 0 );
744             }
745             break;
746 	 case float_round_to_zero:
747 	    break;
748          case float_round_down:
749             return aSign ? 0xBF800000 : 0;
750          case float_round_up:
751             return aSign ? 0x80000000 : 0x3F800000;
752         }
753         return packFloat32( aSign, 0, 0 );
754     }
755     lastBitMask = 1;
756     lastBitMask <<= 0x96 - aExp;
757     roundBitsMask = lastBitMask - 1;
758     z = a;
759     roundingMode = float_rounding_mode;
760     if ( roundingMode == float_round_nearest_even ) {
761         z += lastBitMask>>1;
762         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
763     }
764     else if ( roundingMode != float_round_to_zero ) {
765         if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
766             z += roundBitsMask;
767         }
768     }
769     z &= ~ roundBitsMask;
770     if ( z != a ) float_exception_flags |= float_flag_inexact;
771     return z;
772 
773 }
774 #endif
775 
776 /*
777 -------------------------------------------------------------------------------
778 Returns the result of adding the absolute values of the single-precision
779 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
780 before being returned.  `zSign' is ignored if the result is a NaN.
781 The addition is performed according to the IEC/IEEE Standard for Binary
782 Floating-Point Arithmetic.
783 -------------------------------------------------------------------------------
784 */
addFloat32Sigs(float32 a,float32 b,flag zSign)785 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
786 {
787     int16 aExp, bExp, zExp;
788     bits32 aSig, bSig, zSig;
789     int16 expDiff;
790 
791     aSig = extractFloat32Frac( a );
792     aExp = extractFloat32Exp( a );
793     bSig = extractFloat32Frac( b );
794     bExp = extractFloat32Exp( b );
795     expDiff = aExp - bExp;
796     aSig <<= 6;
797     bSig <<= 6;
798     if ( 0 < expDiff ) {
799         if ( aExp == 0xFF ) {
800             if ( aSig ) return propagateFloat32NaN( a, b );
801             return a;
802         }
803         if ( bExp == 0 ) {
804             --expDiff;
805         }
806         else {
807             bSig |= 0x20000000;
808         }
809         shift32RightJamming( bSig, expDiff, &bSig );
810         zExp = aExp;
811     }
812     else if ( expDiff < 0 ) {
813         if ( bExp == 0xFF ) {
814             if ( bSig ) return propagateFloat32NaN( a, b );
815             return packFloat32( zSign, 0xFF, 0 );
816         }
817         if ( aExp == 0 ) {
818             ++expDiff;
819         }
820         else {
821             aSig |= 0x20000000;
822         }
823         shift32RightJamming( aSig, - expDiff, &aSig );
824         zExp = bExp;
825     }
826     else {
827         if ( aExp == 0xFF ) {
828             if ( aSig | bSig ) return propagateFloat32NaN( a, b );
829             return a;
830         }
831         if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
832         zSig = 0x40000000 + aSig + bSig;
833         zExp = aExp;
834         goto roundAndPack;
835     }
836     aSig |= 0x20000000;
837     zSig = ( aSig + bSig )<<1;
838     --zExp;
839     if ( (sbits32) zSig < 0 ) {
840         zSig = aSig + bSig;
841         ++zExp;
842     }
843  roundAndPack:
844     return roundAndPackFloat32( zSign, zExp, zSig );
845 
846 }
847 
848 /*
849 -------------------------------------------------------------------------------
850 Returns the result of subtracting the absolute values of the single-
851 precision floating-point values `a' and `b'.  If `zSign' is 1, the
852 difference is negated before being returned.  `zSign' is ignored if the
853 result is a NaN.  The subtraction is performed according to the IEC/IEEE
854 Standard for Binary Floating-Point Arithmetic.
855 -------------------------------------------------------------------------------
856 */
subFloat32Sigs(float32 a,float32 b,flag zSign)857 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
858 {
859     int16 aExp, bExp, zExp;
860     bits32 aSig, bSig, zSig;
861     int16 expDiff;
862 
863     aSig = extractFloat32Frac( a );
864     aExp = extractFloat32Exp( a );
865     bSig = extractFloat32Frac( b );
866     bExp = extractFloat32Exp( b );
867     expDiff = aExp - bExp;
868     aSig <<= 7;
869     bSig <<= 7;
870     if ( 0 < expDiff ) goto aExpBigger;
871     if ( expDiff < 0 ) goto bExpBigger;
872     if ( aExp == 0xFF ) {
873         if ( aSig | bSig ) return propagateFloat32NaN( a, b );
874         float_raise( float_flag_invalid );
875         return float32_default_nan;
876     }
877     if ( aExp == 0 ) {
878         aExp = 1;
879         bExp = 1;
880     }
881     if ( bSig < aSig ) goto aBigger;
882     if ( aSig < bSig ) goto bBigger;
883     return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
884  bExpBigger:
885     if ( bExp == 0xFF ) {
886         if ( bSig ) return propagateFloat32NaN( a, b );
887         return packFloat32( zSign ^ 1, 0xFF, 0 );
888     }
889     if ( aExp == 0 ) {
890         ++expDiff;
891     }
892     else {
893         aSig |= 0x40000000;
894     }
895     shift32RightJamming( aSig, - expDiff, &aSig );
896     bSig |= 0x40000000;
897  bBigger:
898     zSig = bSig - aSig;
899     zExp = bExp;
900     zSign ^= 1;
901     goto normalizeRoundAndPack;
902  aExpBigger:
903     if ( aExp == 0xFF ) {
904         if ( aSig ) return propagateFloat32NaN( a, b );
905         return a;
906     }
907     if ( bExp == 0 ) {
908         --expDiff;
909     }
910     else {
911         bSig |= 0x40000000;
912     }
913     shift32RightJamming( bSig, expDiff, &bSig );
914     aSig |= 0x40000000;
915  aBigger:
916     zSig = aSig - bSig;
917     zExp = aExp;
918  normalizeRoundAndPack:
919     --zExp;
920     return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
921 
922 }
923 
924 /*
925 -------------------------------------------------------------------------------
926 Returns the result of adding the single-precision floating-point values `a'
927 and `b'.  The operation is performed according to the IEC/IEEE Standard for
928 Binary Floating-Point Arithmetic.
929 -------------------------------------------------------------------------------
930 */
float32_add(float32 a,float32 b)931 float32 float32_add( float32 a, float32 b )
932 {
933     flag aSign, bSign;
934 
935     aSign = extractFloat32Sign( a );
936     bSign = extractFloat32Sign( b );
937     if ( aSign == bSign ) {
938         return addFloat32Sigs( a, b, aSign );
939     }
940     else {
941         return subFloat32Sigs( a, b, aSign );
942     }
943 
944 }
945 
946 /*
947 -------------------------------------------------------------------------------
948 Returns the result of subtracting the single-precision floating-point values
949 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
950 for Binary Floating-Point Arithmetic.
951 -------------------------------------------------------------------------------
952 */
float32_sub(float32 a,float32 b)953 float32 float32_sub( float32 a, float32 b )
954 {
955     flag aSign, bSign;
956 
957     aSign = extractFloat32Sign( a );
958     bSign = extractFloat32Sign( b );
959     if ( aSign == bSign ) {
960         return subFloat32Sigs( a, b, aSign );
961     }
962     else {
963         return addFloat32Sigs( a, b, aSign );
964     }
965 
966 }
967 
968 /*
969 -------------------------------------------------------------------------------
970 Returns the result of multiplying the single-precision floating-point values
971 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
972 for Binary Floating-Point Arithmetic.
973 -------------------------------------------------------------------------------
974 */
float32_mul(float32 a,float32 b)975 float32 float32_mul( float32 a, float32 b )
976 {
977     flag aSign, bSign, zSign;
978     int16 aExp, bExp, zExp;
979     bits32 aSig, bSig, zSig0, zSig1;
980 
981     aSig = extractFloat32Frac( a );
982     aExp = extractFloat32Exp( a );
983     aSign = extractFloat32Sign( a );
984     bSig = extractFloat32Frac( b );
985     bExp = extractFloat32Exp( b );
986     bSign = extractFloat32Sign( b );
987     zSign = aSign ^ bSign;
988     if ( aExp == 0xFF ) {
989         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
990             return propagateFloat32NaN( a, b );
991         }
992         if ( ( bExp | bSig ) == 0 ) {
993             float_raise( float_flag_invalid );
994             return float32_default_nan;
995         }
996         return packFloat32( zSign, 0xFF, 0 );
997     }
998     if ( bExp == 0xFF ) {
999         if ( bSig ) return propagateFloat32NaN( a, b );
1000         if ( ( aExp | aSig ) == 0 ) {
1001             float_raise( float_flag_invalid );
1002             return float32_default_nan;
1003         }
1004         return packFloat32( zSign, 0xFF, 0 );
1005     }
1006     if ( aExp == 0 ) {
1007         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1008         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1009     }
1010     if ( bExp == 0 ) {
1011         if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1012         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1013     }
1014     zExp = aExp + bExp - 0x7F;
1015     aSig = ( aSig | 0x00800000 )<<7;
1016     bSig = ( bSig | 0x00800000 )<<8;
1017     mul32To64( aSig, bSig, &zSig0, &zSig1 );
1018     zSig0 |= ( zSig1 != 0 );
1019     if ( 0 <= (sbits32) ( zSig0<<1 ) ) {
1020         zSig0 <<= 1;
1021         --zExp;
1022     }
1023     return roundAndPackFloat32( zSign, zExp, zSig0 );
1024 
1025 }
1026 
1027 /*
1028 -------------------------------------------------------------------------------
1029 Returns the result of dividing the single-precision floating-point value `a'
1030 by the corresponding value `b'.  The operation is performed according to the
1031 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1032 -------------------------------------------------------------------------------
1033 */
float32_div(float32 a,float32 b)1034 float32 float32_div( float32 a, float32 b )
1035 {
1036     flag aSign, bSign, zSign;
1037     int16 aExp, bExp, zExp;
1038     bits32 aSig, bSig, zSig, rem0, rem1, term0, term1;
1039 
1040     aSig = extractFloat32Frac( a );
1041     aExp = extractFloat32Exp( a );
1042     aSign = extractFloat32Sign( a );
1043     bSig = extractFloat32Frac( b );
1044     bExp = extractFloat32Exp( b );
1045     bSign = extractFloat32Sign( b );
1046     zSign = aSign ^ bSign;
1047     if ( aExp == 0xFF ) {
1048         if ( aSig ) return propagateFloat32NaN( a, b );
1049         if ( bExp == 0xFF ) {
1050             if ( bSig ) return propagateFloat32NaN( a, b );
1051             float_raise( float_flag_invalid );
1052             return float32_default_nan;
1053         }
1054         return packFloat32( zSign, 0xFF, 0 );
1055     }
1056     if ( bExp == 0xFF ) {
1057         if ( bSig ) return propagateFloat32NaN( a, b );
1058         return packFloat32( zSign, 0, 0 );
1059     }
1060     if ( bExp == 0 ) {
1061         if ( bSig == 0 ) {
1062             if ( ( aExp | aSig ) == 0 ) {
1063                 float_raise( float_flag_invalid );
1064                 return float32_default_nan;
1065             }
1066             float_raise( float_flag_divbyzero );
1067             return packFloat32( zSign, 0xFF, 0 );
1068         }
1069         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1070     }
1071     if ( aExp == 0 ) {
1072         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1073         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1074     }
1075     zExp = aExp - bExp + 0x7D;
1076     aSig = ( aSig | 0x00800000 )<<7;
1077     bSig = ( bSig | 0x00800000 )<<8;
1078     if ( bSig <= ( aSig + aSig ) ) {
1079         aSig >>= 1;
1080         ++zExp;
1081     }
1082     zSig = estimateDiv64To32( aSig, 0, bSig );
1083     if ( ( zSig & 0x3F ) <= 2 ) {
1084         mul32To64( bSig, zSig, &term0, &term1 );
1085         sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1086         while ( (sbits32) rem0 < 0 ) {
1087             --zSig;
1088             add64( rem0, rem1, 0, bSig, &rem0, &rem1 );
1089         }
1090         zSig |= ( rem1 != 0 );
1091     }
1092     return roundAndPackFloat32( zSign, zExp, zSig );
1093 
1094 }
1095 
1096 #ifndef SOFTFLOAT_FOR_GCC
1097 /*
1098 -------------------------------------------------------------------------------
1099 Returns the remainder of the single-precision floating-point value `a'
1100 with respect to the corresponding value `b'.  The operation is performed
1101 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1102 -------------------------------------------------------------------------------
1103 */
float32_rem(float32 a,float32 b)1104 float32 float32_rem( float32 a, float32 b )
1105 {
1106     flag aSign, bSign, zSign;
1107     int16 aExp, bExp, expDiff;
1108     bits32 aSig, bSig, q, allZero, alternateASig;
1109     sbits32 sigMean;
1110 
1111     aSig = extractFloat32Frac( a );
1112     aExp = extractFloat32Exp( a );
1113     aSign = extractFloat32Sign( a );
1114     bSig = extractFloat32Frac( b );
1115     bExp = extractFloat32Exp( b );
1116     bSign = extractFloat32Sign( b );
1117     if ( aExp == 0xFF ) {
1118         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1119             return propagateFloat32NaN( a, b );
1120         }
1121         float_raise( float_flag_invalid );
1122         return float32_default_nan;
1123     }
1124     if ( bExp == 0xFF ) {
1125         if ( bSig ) return propagateFloat32NaN( a, b );
1126         return a;
1127     }
1128     if ( bExp == 0 ) {
1129         if ( bSig == 0 ) {
1130             float_raise( float_flag_invalid );
1131             return float32_default_nan;
1132         }
1133         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1134     }
1135     if ( aExp == 0 ) {
1136         if ( aSig == 0 ) return a;
1137         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1138     }
1139     expDiff = aExp - bExp;
1140     aSig = ( aSig | 0x00800000 )<<8;
1141     bSig = ( bSig | 0x00800000 )<<8;
1142     if ( expDiff < 0 ) {
1143         if ( expDiff < -1 ) return a;
1144         aSig >>= 1;
1145     }
1146     q = ( bSig <= aSig );
1147     if ( q ) aSig -= bSig;
1148     expDiff -= 32;
1149     while ( 0 < expDiff ) {
1150         q = estimateDiv64To32( aSig, 0, bSig );
1151         q = ( 2 < q ) ? q - 2 : 0;
1152         aSig = - ( ( bSig>>2 ) * q );
1153         expDiff -= 30;
1154     }
1155     expDiff += 32;
1156     if ( 0 < expDiff ) {
1157         q = estimateDiv64To32( aSig, 0, bSig );
1158         q = ( 2 < q ) ? q - 2 : 0;
1159         q >>= 32 - expDiff;
1160         bSig >>= 2;
1161         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1162     }
1163     else {
1164         aSig >>= 2;
1165         bSig >>= 2;
1166     }
1167     do {
1168         alternateASig = aSig;
1169         ++q;
1170         aSig -= bSig;
1171     } while ( 0 <= (sbits32) aSig );
1172     sigMean = aSig + alternateASig;
1173     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1174         aSig = alternateASig;
1175     }
1176     zSign = ( (sbits32) aSig < 0 );
1177     if ( zSign ) aSig = - aSig;
1178     return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
1179 
1180 }
1181 #endif
1182 
1183 #ifndef SOFTFLOAT_FOR_GCC
1184 /*
1185 -------------------------------------------------------------------------------
1186 Returns the square root of the single-precision floating-point value `a'.
1187 The operation is performed according to the IEC/IEEE Standard for Binary
1188 Floating-Point Arithmetic.
1189 -------------------------------------------------------------------------------
1190 */
float32_sqrt(float32 a)1191 float32 float32_sqrt( float32 a )
1192 {
1193     flag aSign;
1194     int16 aExp, zExp;
1195     bits32 aSig, zSig, rem0, rem1, term0, term1;
1196 
1197     aSig = extractFloat32Frac( a );
1198     aExp = extractFloat32Exp( a );
1199     aSign = extractFloat32Sign( a );
1200     if ( aExp == 0xFF ) {
1201         if ( aSig ) return propagateFloat32NaN( a, 0 );
1202         if ( ! aSign ) return a;
1203         float_raise( float_flag_invalid );
1204         return float32_default_nan;
1205     }
1206     if ( aSign ) {
1207         if ( ( aExp | aSig ) == 0 ) return a;
1208         float_raise( float_flag_invalid );
1209         return float32_default_nan;
1210     }
1211     if ( aExp == 0 ) {
1212         if ( aSig == 0 ) return 0;
1213         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1214     }
1215     zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
1216     aSig = ( aSig | 0x00800000 )<<8;
1217     zSig = estimateSqrt32( aExp, aSig ) + 2;
1218     if ( ( zSig & 0x7F ) <= 5 ) {
1219         if ( zSig < 2 ) {
1220             zSig = 0x7FFFFFFF;
1221             goto roundAndPack;
1222         }
1223         else {
1224             aSig >>= aExp & 1;
1225             mul32To64( zSig, zSig, &term0, &term1 );
1226             sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1227             while ( (sbits32) rem0 < 0 ) {
1228                 --zSig;
1229                 shortShift64Left( 0, zSig, 1, &term0, &term1 );
1230                 term1 |= 1;
1231                 add64( rem0, rem1, term0, term1, &rem0, &rem1 );
1232             }
1233             zSig |= ( ( rem0 | rem1 ) != 0 );
1234         }
1235     }
1236     shift32RightJamming( zSig, 1, &zSig );
1237  roundAndPack:
1238     return roundAndPackFloat32( 0, zExp, zSig );
1239 
1240 }
1241 #endif
1242 
1243 /*
1244 -------------------------------------------------------------------------------
1245 Returns 1 if the single-precision floating-point value `a' is equal to
1246 the corresponding value `b', and 0 otherwise.  The comparison is performed
1247 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1248 -------------------------------------------------------------------------------
1249 */
float32_eq(float32 a,float32 b)1250 flag float32_eq( float32 a, float32 b )
1251 {
1252 
1253     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1254          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1255        ) {
1256         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1257             float_raise( float_flag_invalid );
1258         }
1259         return 0;
1260     }
1261     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1262 
1263 }
1264 
1265 /*
1266 -------------------------------------------------------------------------------
1267 Returns 1 if the single-precision floating-point value `a' is less than
1268 or equal to the corresponding value `b', and 0 otherwise.  The comparison
1269 is performed according to the IEC/IEEE Standard for Binary Floating-Point
1270 Arithmetic.
1271 -------------------------------------------------------------------------------
1272 */
float32_le(float32 a,float32 b)1273 flag float32_le( float32 a, float32 b )
1274 {
1275     flag aSign, bSign;
1276 
1277     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1278          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1279        ) {
1280         float_raise( float_flag_invalid );
1281         return 0;
1282     }
1283     aSign = extractFloat32Sign( a );
1284     bSign = extractFloat32Sign( b );
1285     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1286     return ( a == b ) || ( aSign ^ ( a < b ) );
1287 
1288 }
1289 
1290 /*
1291 -------------------------------------------------------------------------------
1292 Returns 1 if the single-precision floating-point value `a' is less than
1293 the corresponding value `b', and 0 otherwise.  The comparison is performed
1294 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1295 -------------------------------------------------------------------------------
1296 */
float32_lt(float32 a,float32 b)1297 flag float32_lt( float32 a, float32 b )
1298 {
1299     flag aSign, bSign;
1300 
1301     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1302          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1303        ) {
1304         float_raise( float_flag_invalid );
1305         return 0;
1306     }
1307     aSign = extractFloat32Sign( a );
1308     bSign = extractFloat32Sign( b );
1309     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1310     return ( a != b ) && ( aSign ^ ( a < b ) );
1311 
1312 }
1313 
1314 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1315 /*
1316 -------------------------------------------------------------------------------
1317 Returns 1 if the single-precision floating-point value `a' is equal to
1318 the corresponding value `b', and 0 otherwise.  The invalid exception is
1319 raised if either operand is a NaN.  Otherwise, the comparison is performed
1320 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1321 -------------------------------------------------------------------------------
1322 */
float32_eq_signaling(float32 a,float32 b)1323 flag float32_eq_signaling( float32 a, float32 b )
1324 {
1325 
1326     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1327          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1328        ) {
1329         float_raise( float_flag_invalid );
1330         return 0;
1331     }
1332     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1333 
1334 }
1335 
1336 /*
1337 -------------------------------------------------------------------------------
1338 Returns 1 if the single-precision floating-point value `a' is less than or
1339 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
1340 cause an exception.  Otherwise, the comparison is performed according to the
1341 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1342 -------------------------------------------------------------------------------
1343 */
float32_le_quiet(float32 a,float32 b)1344 flag float32_le_quiet( float32 a, float32 b )
1345 {
1346     flag aSign, bSign;
1347     int16 aExp, bExp;
1348 
1349     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1350          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1351        ) {
1352         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1353             float_raise( float_flag_invalid );
1354         }
1355         return 0;
1356     }
1357     aSign = extractFloat32Sign( a );
1358     bSign = extractFloat32Sign( b );
1359     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1360     return ( a == b ) || ( aSign ^ ( a < b ) );
1361 
1362 }
1363 
1364 /*
1365 -------------------------------------------------------------------------------
1366 Returns 1 if the single-precision floating-point value `a' is less than
1367 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
1368 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
1369 Standard for Binary Floating-Point Arithmetic.
1370 -------------------------------------------------------------------------------
1371 */
float32_lt_quiet(float32 a,float32 b)1372 flag float32_lt_quiet( float32 a, float32 b )
1373 {
1374     flag aSign, bSign;
1375 
1376     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1377          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1378        ) {
1379         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1380             float_raise( float_flag_invalid );
1381         }
1382         return 0;
1383     }
1384     aSign = extractFloat32Sign( a );
1385     bSign = extractFloat32Sign( b );
1386     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1387     return ( a != b ) && ( aSign ^ ( a < b ) );
1388 
1389 }
1390 #endif /* !SOFTFLOAT_FOR_GCC */
1391 
1392 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1393 /*
1394 -------------------------------------------------------------------------------
1395 Returns the result of converting the double-precision floating-point value
1396 `a' to the 32-bit two's complement integer format.  The conversion is
1397 performed according to the IEC/IEEE Standard for Binary Floating-Point
1398 Arithmetic---which means in particular that the conversion is rounded
1399 according to the current rounding mode.  If `a' is a NaN, the largest
1400 positive integer is returned.  Otherwise, if the conversion overflows, the
1401 largest integer with the same sign as `a' is returned.
1402 -------------------------------------------------------------------------------
1403 */
float64_to_int32(float64 a)1404 int32 float64_to_int32( float64 a )
1405 {
1406     flag aSign;
1407     int16 aExp, shiftCount;
1408     bits32 aSig0, aSig1, absZ, aSigExtra;
1409     int32 z;
1410     int8 roundingMode;
1411 
1412     aSig1 = extractFloat64Frac1( a );
1413     aSig0 = extractFloat64Frac0( a );
1414     aExp = extractFloat64Exp( a );
1415     aSign = extractFloat64Sign( a );
1416     shiftCount = aExp - 0x413;
1417     if ( 0 <= shiftCount ) {
1418         if ( 0x41E < aExp ) {
1419             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1420             goto invalid;
1421         }
1422         shortShift64Left(
1423             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1424         if ( 0x80000000 < absZ ) goto invalid;
1425     }
1426     else {
1427         aSig1 = ( aSig1 != 0 );
1428         if ( aExp < 0x3FE ) {
1429             aSigExtra = aExp | aSig0 | aSig1;
1430             absZ = 0;
1431         }
1432         else {
1433             aSig0 |= 0x00100000;
1434             aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1435             absZ = aSig0>>( - shiftCount );
1436         }
1437     }
1438     roundingMode = float_rounding_mode;
1439     if ( roundingMode == float_round_nearest_even ) {
1440         if ( (sbits32) aSigExtra < 0 ) {
1441             ++absZ;
1442             if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1;
1443         }
1444         z = aSign ? - absZ : absZ;
1445     }
1446     else {
1447         aSigExtra = ( aSigExtra != 0 );
1448         if ( aSign ) {
1449             z = - (   absZ
1450                     + ( ( roundingMode == float_round_down ) & aSigExtra ) );
1451         }
1452         else {
1453             z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra );
1454         }
1455     }
1456     if ( ( aSign ^ ( z < 0 ) ) && z ) {
1457  invalid:
1458         float_raise( float_flag_invalid );
1459         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1460     }
1461     if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
1462     return z;
1463 
1464 }
1465 #endif /* !SOFTFLOAT_FOR_GCC */
1466 
1467 /*
1468 -------------------------------------------------------------------------------
1469 Returns the result of converting the double-precision floating-point value
1470 `a' to the 32-bit two's complement integer format.  The conversion is
1471 performed according to the IEC/IEEE Standard for Binary Floating-Point
1472 Arithmetic, except that the conversion is always rounded toward zero.
1473 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1474 the conversion overflows, the largest integer with the same sign as `a' is
1475 returned.
1476 -------------------------------------------------------------------------------
1477 */
float64_to_int32_round_to_zero(float64 a)1478 int32 float64_to_int32_round_to_zero( float64 a )
1479 {
1480     flag aSign;
1481     int16 aExp, shiftCount;
1482     bits32 aSig0, aSig1, absZ, aSigExtra;
1483     int32 z;
1484 
1485     aSig1 = extractFloat64Frac1( a );
1486     aSig0 = extractFloat64Frac0( a );
1487     aExp = extractFloat64Exp( a );
1488     aSign = extractFloat64Sign( a );
1489     shiftCount = aExp - 0x413;
1490     if ( 0 <= shiftCount ) {
1491         if ( 0x41E < aExp ) {
1492             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1493             goto invalid;
1494         }
1495         shortShift64Left(
1496             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1497     }
1498     else {
1499         if ( aExp < 0x3FF ) {
1500             if ( aExp | aSig0 | aSig1 ) {
1501                 float_exception_flags |= float_flag_inexact;
1502             }
1503             return 0;
1504         }
1505         aSig0 |= 0x00100000;
1506         aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1507         absZ = aSig0>>( - shiftCount );
1508     }
1509     z = aSign ? - absZ : absZ;
1510     if ( ( aSign ^ ( z < 0 ) ) && z ) {
1511  invalid:
1512         float_raise( float_flag_invalid );
1513         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1514     }
1515     if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
1516     return z;
1517 
1518 }
1519 
1520 /*
1521 -------------------------------------------------------------------------------
1522 Returns the result of converting the double-precision floating-point value
1523 `a' to the single-precision floating-point format.  The conversion is
1524 performed according to the IEC/IEEE Standard for Binary Floating-Point
1525 Arithmetic.
1526 -------------------------------------------------------------------------------
1527 */
float64_to_float32(float64 a)1528 float32 float64_to_float32( float64 a )
1529 {
1530     flag aSign;
1531     int16 aExp;
1532     bits32 aSig0, aSig1, zSig;
1533     bits32 allZero;
1534 
1535     aSig1 = extractFloat64Frac1( a );
1536     aSig0 = extractFloat64Frac0( a );
1537     aExp = extractFloat64Exp( a );
1538     aSign = extractFloat64Sign( a );
1539     if ( aExp == 0x7FF ) {
1540         if ( aSig0 | aSig1 ) {
1541             return commonNaNToFloat32( float64ToCommonNaN( a ) );
1542         }
1543         return packFloat32( aSign, 0xFF, 0 );
1544     }
1545     shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig );
1546     if ( aExp ) zSig |= 0x40000000;
1547     return roundAndPackFloat32( aSign, aExp - 0x381, zSig );
1548 
1549 }
1550 
1551 #ifndef SOFTFLOAT_FOR_GCC
1552 /*
1553 -------------------------------------------------------------------------------
1554 Rounds the double-precision floating-point value `a' to an integer,
1555 and returns the result as a double-precision floating-point value.  The
1556 operation is performed according to the IEC/IEEE Standard for Binary
1557 Floating-Point Arithmetic.
1558 -------------------------------------------------------------------------------
1559 */
float64_round_to_int(float64 a)1560 float64 float64_round_to_int( float64 a )
1561 {
1562     flag aSign;
1563     int16 aExp;
1564     bits32 lastBitMask, roundBitsMask;
1565     int8 roundingMode;
1566     float64 z;
1567 
1568     aExp = extractFloat64Exp( a );
1569     if ( 0x413 <= aExp ) {
1570         if ( 0x433 <= aExp ) {
1571             if (    ( aExp == 0x7FF )
1572                  && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) {
1573                 return propagateFloat64NaN( a, a );
1574             }
1575             return a;
1576         }
1577         lastBitMask = 1;
1578         lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1;
1579         roundBitsMask = lastBitMask - 1;
1580         z = a;
1581         roundingMode = float_rounding_mode;
1582         if ( roundingMode == float_round_nearest_even ) {
1583             if ( lastBitMask ) {
1584                 add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
1585                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
1586             }
1587             else {
1588                 if ( (sbits32) z.low < 0 ) {
1589                     ++z.high;
1590                     if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1;
1591                 }
1592             }
1593         }
1594         else if ( roundingMode != float_round_to_zero ) {
1595             if (   extractFloat64Sign( z )
1596                  ^ ( roundingMode == float_round_up ) ) {
1597                 add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
1598             }
1599         }
1600         z.low &= ~ roundBitsMask;
1601     }
1602     else {
1603         if ( aExp <= 0x3FE ) {
1604             if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
1605             float_exception_flags |= float_flag_inexact;
1606             aSign = extractFloat64Sign( a );
1607             switch ( float_rounding_mode ) {
1608              case float_round_nearest_even:
1609                 if (    ( aExp == 0x3FE )
1610                      && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) )
1611                    ) {
1612                     return packFloat64( aSign, 0x3FF, 0, 0 );
1613                 }
1614                 break;
1615              case float_round_down:
1616                 return
1617                       aSign ? packFloat64( 1, 0x3FF, 0, 0 )
1618                     : packFloat64( 0, 0, 0, 0 );
1619              case float_round_up:
1620                 return
1621                       aSign ? packFloat64( 1, 0, 0, 0 )
1622                     : packFloat64( 0, 0x3FF, 0, 0 );
1623             }
1624             return packFloat64( aSign, 0, 0, 0 );
1625         }
1626         lastBitMask = 1;
1627         lastBitMask <<= 0x413 - aExp;
1628         roundBitsMask = lastBitMask - 1;
1629         z.low = 0;
1630         z.high = a.high;
1631         roundingMode = float_rounding_mode;
1632         if ( roundingMode == float_round_nearest_even ) {
1633             z.high += lastBitMask>>1;
1634             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
1635                 z.high &= ~ lastBitMask;
1636             }
1637         }
1638         else if ( roundingMode != float_round_to_zero ) {
1639             if (   extractFloat64Sign( z )
1640                  ^ ( roundingMode == float_round_up ) ) {
1641                 z.high |= ( a.low != 0 );
1642                 z.high += roundBitsMask;
1643             }
1644         }
1645         z.high &= ~ roundBitsMask;
1646     }
1647     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
1648         float_exception_flags |= float_flag_inexact;
1649     }
1650     return z;
1651 
1652 }
1653 #endif
1654 
1655 /*
1656 -------------------------------------------------------------------------------
1657 Returns the result of adding the absolute values of the double-precision
1658 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1659 before being returned.  `zSign' is ignored if the result is a NaN.
1660 The addition is performed according to the IEC/IEEE Standard for Binary
1661 Floating-Point Arithmetic.
1662 -------------------------------------------------------------------------------
1663 */
addFloat64Sigs(float64 a,float64 b,flag zSign)1664 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
1665 {
1666     int16 aExp, bExp, zExp;
1667     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1668     int16 expDiff;
1669 
1670     aSig1 = extractFloat64Frac1( a );
1671     aSig0 = extractFloat64Frac0( a );
1672     aExp = extractFloat64Exp( a );
1673     bSig1 = extractFloat64Frac1( b );
1674     bSig0 = extractFloat64Frac0( b );
1675     bExp = extractFloat64Exp( b );
1676     expDiff = aExp - bExp;
1677     if ( 0 < expDiff ) {
1678         if ( aExp == 0x7FF ) {
1679             if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1680             return a;
1681         }
1682         if ( bExp == 0 ) {
1683             --expDiff;
1684         }
1685         else {
1686             bSig0 |= 0x00100000;
1687         }
1688         shift64ExtraRightJamming(
1689             bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
1690         zExp = aExp;
1691     }
1692     else if ( expDiff < 0 ) {
1693         if ( bExp == 0x7FF ) {
1694             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1695             return packFloat64( zSign, 0x7FF, 0, 0 );
1696         }
1697         if ( aExp == 0 ) {
1698             ++expDiff;
1699         }
1700         else {
1701             aSig0 |= 0x00100000;
1702         }
1703         shift64ExtraRightJamming(
1704             aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
1705         zExp = bExp;
1706     }
1707     else {
1708         if ( aExp == 0x7FF ) {
1709             if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1710                 return propagateFloat64NaN( a, b );
1711             }
1712             return a;
1713         }
1714         add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1715         if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 );
1716         zSig2 = 0;
1717         zSig0 |= 0x00200000;
1718         zExp = aExp;
1719         goto shiftRight1;
1720     }
1721     aSig0 |= 0x00100000;
1722     add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1723     --zExp;
1724     if ( zSig0 < 0x00200000 ) goto roundAndPack;
1725     ++zExp;
1726  shiftRight1:
1727     shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1728  roundAndPack:
1729     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1730 
1731 }
1732 
1733 /*
1734 -------------------------------------------------------------------------------
1735 Returns the result of subtracting the absolute values of the double-
1736 precision floating-point values `a' and `b'.  If `zSign' is 1, the
1737 difference is negated before being returned.  `zSign' is ignored if the
1738 result is a NaN.  The subtraction is performed according to the IEC/IEEE
1739 Standard for Binary Floating-Point Arithmetic.
1740 -------------------------------------------------------------------------------
1741 */
subFloat64Sigs(float64 a,float64 b,flag zSign)1742 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
1743 {
1744     int16 aExp, bExp, zExp;
1745     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
1746     int16 expDiff;
1747 
1748     aSig1 = extractFloat64Frac1( a );
1749     aSig0 = extractFloat64Frac0( a );
1750     aExp = extractFloat64Exp( a );
1751     bSig1 = extractFloat64Frac1( b );
1752     bSig0 = extractFloat64Frac0( b );
1753     bExp = extractFloat64Exp( b );
1754     expDiff = aExp - bExp;
1755     shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 );
1756     shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 );
1757     if ( 0 < expDiff ) goto aExpBigger;
1758     if ( expDiff < 0 ) goto bExpBigger;
1759     if ( aExp == 0x7FF ) {
1760         if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1761             return propagateFloat64NaN( a, b );
1762         }
1763         float_raise( float_flag_invalid );
1764         return float64_default_nan;
1765     }
1766     if ( aExp == 0 ) {
1767         aExp = 1;
1768         bExp = 1;
1769     }
1770     if ( bSig0 < aSig0 ) goto aBigger;
1771     if ( aSig0 < bSig0 ) goto bBigger;
1772     if ( bSig1 < aSig1 ) goto aBigger;
1773     if ( aSig1 < bSig1 ) goto bBigger;
1774     return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 );
1775  bExpBigger:
1776     if ( bExp == 0x7FF ) {
1777         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1778         return packFloat64( zSign ^ 1, 0x7FF, 0, 0 );
1779     }
1780     if ( aExp == 0 ) {
1781         ++expDiff;
1782     }
1783     else {
1784         aSig0 |= 0x40000000;
1785     }
1786     shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
1787     bSig0 |= 0x40000000;
1788  bBigger:
1789     sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
1790     zExp = bExp;
1791     zSign ^= 1;
1792     goto normalizeRoundAndPack;
1793  aExpBigger:
1794     if ( aExp == 0x7FF ) {
1795         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1796         return a;
1797     }
1798     if ( bExp == 0 ) {
1799         --expDiff;
1800     }
1801     else {
1802         bSig0 |= 0x40000000;
1803     }
1804     shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
1805     aSig0 |= 0x40000000;
1806  aBigger:
1807     sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1808     zExp = aExp;
1809  normalizeRoundAndPack:
1810     --zExp;
1811     return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 );
1812 
1813 }
1814 
1815 /*
1816 -------------------------------------------------------------------------------
1817 Returns the result of adding the double-precision floating-point values `a'
1818 and `b'.  The operation is performed according to the IEC/IEEE Standard for
1819 Binary Floating-Point Arithmetic.
1820 -------------------------------------------------------------------------------
1821 */
float64_add(float64 a,float64 b)1822 float64 float64_add( float64 a, float64 b )
1823 {
1824     flag aSign, bSign;
1825 
1826     aSign = extractFloat64Sign( a );
1827     bSign = extractFloat64Sign( b );
1828     if ( aSign == bSign ) {
1829         return addFloat64Sigs( a, b, aSign );
1830     }
1831     else {
1832         return subFloat64Sigs( a, b, aSign );
1833     }
1834 
1835 }
1836 
1837 /*
1838 -------------------------------------------------------------------------------
1839 Returns the result of subtracting the double-precision floating-point values
1840 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1841 for Binary Floating-Point Arithmetic.
1842 -------------------------------------------------------------------------------
1843 */
float64_sub(float64 a,float64 b)1844 float64 float64_sub( float64 a, float64 b )
1845 {
1846     flag aSign, bSign;
1847 
1848     aSign = extractFloat64Sign( a );
1849     bSign = extractFloat64Sign( b );
1850     if ( aSign == bSign ) {
1851         return subFloat64Sigs( a, b, aSign );
1852     }
1853     else {
1854         return addFloat64Sigs( a, b, aSign );
1855     }
1856 
1857 }
1858 
1859 /*
1860 -------------------------------------------------------------------------------
1861 Returns the result of multiplying the double-precision floating-point values
1862 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1863 for Binary Floating-Point Arithmetic.
1864 -------------------------------------------------------------------------------
1865 */
float64_mul(float64 a,float64 b)1866 float64 float64_mul( float64 a, float64 b )
1867 {
1868     flag aSign, bSign, zSign;
1869     int16 aExp, bExp, zExp;
1870     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
1871 
1872     aSig1 = extractFloat64Frac1( a );
1873     aSig0 = extractFloat64Frac0( a );
1874     aExp = extractFloat64Exp( a );
1875     aSign = extractFloat64Sign( a );
1876     bSig1 = extractFloat64Frac1( b );
1877     bSig0 = extractFloat64Frac0( b );
1878     bExp = extractFloat64Exp( b );
1879     bSign = extractFloat64Sign( b );
1880     zSign = aSign ^ bSign;
1881     if ( aExp == 0x7FF ) {
1882         if (    ( aSig0 | aSig1 )
1883              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
1884             return propagateFloat64NaN( a, b );
1885         }
1886         if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
1887         return packFloat64( zSign, 0x7FF, 0, 0 );
1888     }
1889     if ( bExp == 0x7FF ) {
1890         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1891         if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1892  invalid:
1893             float_raise( float_flag_invalid );
1894             return float64_default_nan;
1895         }
1896         return packFloat64( zSign, 0x7FF, 0, 0 );
1897     }
1898     if ( aExp == 0 ) {
1899         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1900         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1901     }
1902     if ( bExp == 0 ) {
1903         if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1904         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1905     }
1906     zExp = aExp + bExp - 0x400;
1907     aSig0 |= 0x00100000;
1908     shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 );
1909     mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
1910     add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
1911     zSig2 |= ( zSig3 != 0 );
1912     if ( 0x00200000 <= zSig0 ) {
1913         shift64ExtraRightJamming(
1914             zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1915         ++zExp;
1916     }
1917     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1918 
1919 }
1920 
1921 /*
1922 -------------------------------------------------------------------------------
1923 Returns the result of dividing the double-precision floating-point value `a'
1924 by the corresponding value `b'.  The operation is performed according to the
1925 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1926 -------------------------------------------------------------------------------
1927 */
float64_div(float64 a,float64 b)1928 float64 float64_div( float64 a, float64 b )
1929 {
1930     flag aSign, bSign, zSign;
1931     int16 aExp, bExp, zExp;
1932     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1933     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
1934 
1935     aSig1 = extractFloat64Frac1( a );
1936     aSig0 = extractFloat64Frac0( a );
1937     aExp = extractFloat64Exp( a );
1938     aSign = extractFloat64Sign( a );
1939     bSig1 = extractFloat64Frac1( b );
1940     bSig0 = extractFloat64Frac0( b );
1941     bExp = extractFloat64Exp( b );
1942     bSign = extractFloat64Sign( b );
1943     zSign = aSign ^ bSign;
1944     if ( aExp == 0x7FF ) {
1945         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1946         if ( bExp == 0x7FF ) {
1947             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1948             goto invalid;
1949         }
1950         return packFloat64( zSign, 0x7FF, 0, 0 );
1951     }
1952     if ( bExp == 0x7FF ) {
1953         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1954         return packFloat64( zSign, 0, 0, 0 );
1955     }
1956     if ( bExp == 0 ) {
1957         if ( ( bSig0 | bSig1 ) == 0 ) {
1958             if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1959  invalid:
1960                 float_raise( float_flag_invalid );
1961                 return float64_default_nan;
1962             }
1963             float_raise( float_flag_divbyzero );
1964             return packFloat64( zSign, 0x7FF, 0, 0 );
1965         }
1966         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1967     }
1968     if ( aExp == 0 ) {
1969         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1970         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1971     }
1972     zExp = aExp - bExp + 0x3FD;
1973     shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 );
1974     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
1975     if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) {
1976         shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
1977         ++zExp;
1978     }
1979     zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 );
1980     mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
1981     sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
1982     while ( (sbits32) rem0 < 0 ) {
1983         --zSig0;
1984         add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
1985     }
1986     zSig1 = estimateDiv64To32( rem1, rem2, bSig0 );
1987     if ( ( zSig1 & 0x3FF ) <= 4 ) {
1988         mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
1989         sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
1990         while ( (sbits32) rem1 < 0 ) {
1991             --zSig1;
1992             add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
1993         }
1994         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
1995     }
1996     shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 );
1997     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1998 
1999 }
2000 
2001 #ifndef SOFTFLOAT_FOR_GCC
2002 /*
2003 -------------------------------------------------------------------------------
2004 Returns the remainder of the double-precision floating-point value `a'
2005 with respect to the corresponding value `b'.  The operation is performed
2006 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2007 -------------------------------------------------------------------------------
2008 */
float64_rem(float64 a,float64 b)2009 float64 float64_rem( float64 a, float64 b )
2010 {
2011     flag aSign, bSign, zSign;
2012     int16 aExp, bExp, expDiff;
2013     bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
2014     bits32 allZero, alternateASig0, alternateASig1, sigMean1;
2015     sbits32 sigMean0;
2016     float64 z;
2017 
2018     aSig1 = extractFloat64Frac1( a );
2019     aSig0 = extractFloat64Frac0( a );
2020     aExp = extractFloat64Exp( a );
2021     aSign = extractFloat64Sign( a );
2022     bSig1 = extractFloat64Frac1( b );
2023     bSig0 = extractFloat64Frac0( b );
2024     bExp = extractFloat64Exp( b );
2025     bSign = extractFloat64Sign( b );
2026     if ( aExp == 0x7FF ) {
2027         if (    ( aSig0 | aSig1 )
2028              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
2029             return propagateFloat64NaN( a, b );
2030         }
2031         goto invalid;
2032     }
2033     if ( bExp == 0x7FF ) {
2034         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
2035         return a;
2036     }
2037     if ( bExp == 0 ) {
2038         if ( ( bSig0 | bSig1 ) == 0 ) {
2039  invalid:
2040             float_raise( float_flag_invalid );
2041             return float64_default_nan;
2042         }
2043         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
2044     }
2045     if ( aExp == 0 ) {
2046         if ( ( aSig0 | aSig1 ) == 0 ) return a;
2047         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2048     }
2049     expDiff = aExp - bExp;
2050     if ( expDiff < -1 ) return a;
2051     shortShift64Left(
2052         aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 );
2053     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
2054     q = le64( bSig0, bSig1, aSig0, aSig1 );
2055     if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2056     expDiff -= 32;
2057     while ( 0 < expDiff ) {
2058         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2059         q = ( 4 < q ) ? q - 4 : 0;
2060         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2061         shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero );
2062         shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero );
2063         sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 );
2064         expDiff -= 29;
2065     }
2066     if ( -32 < expDiff ) {
2067         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2068         q = ( 4 < q ) ? q - 4 : 0;
2069         q >>= - expDiff;
2070         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2071         expDiff += 24;
2072         if ( expDiff < 0 ) {
2073             shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
2074         }
2075         else {
2076             shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
2077         }
2078         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2079         sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
2080     }
2081     else {
2082         shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 );
2083         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2084     }
2085     do {
2086         alternateASig0 = aSig0;
2087         alternateASig1 = aSig1;
2088         ++q;
2089         sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2090     } while ( 0 <= (sbits32) aSig0 );
2091     add64(
2092         aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
2093     if (    ( sigMean0 < 0 )
2094          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
2095         aSig0 = alternateASig0;
2096         aSig1 = alternateASig1;
2097     }
2098     zSign = ( (sbits32) aSig0 < 0 );
2099     if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
2100     return
2101         normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
2102 
2103 }
2104 #endif
2105 
2106 #ifndef SOFTFLOAT_FOR_GCC
2107 /*
2108 -------------------------------------------------------------------------------
2109 Returns the square root of the double-precision floating-point value `a'.
2110 The operation is performed according to the IEC/IEEE Standard for Binary
2111 Floating-Point Arithmetic.
2112 -------------------------------------------------------------------------------
2113 */
float64_sqrt(float64 a)2114 float64 float64_sqrt( float64 a )
2115 {
2116     flag aSign;
2117     int16 aExp, zExp;
2118     bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
2119     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
2120     float64 z;
2121 
2122     aSig1 = extractFloat64Frac1( a );
2123     aSig0 = extractFloat64Frac0( a );
2124     aExp = extractFloat64Exp( a );
2125     aSign = extractFloat64Sign( a );
2126     if ( aExp == 0x7FF ) {
2127         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a );
2128         if ( ! aSign ) return a;
2129         goto invalid;
2130     }
2131     if ( aSign ) {
2132         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
2133  invalid:
2134         float_raise( float_flag_invalid );
2135         return float64_default_nan;
2136     }
2137     if ( aExp == 0 ) {
2138         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 );
2139         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2140     }
2141     zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
2142     aSig0 |= 0x00100000;
2143     shortShift64Left( aSig0, aSig1, 11, &term0, &term1 );
2144     zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1;
2145     if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF;
2146     doubleZSig0 = zSig0 + zSig0;
2147     shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 );
2148     mul32To64( zSig0, zSig0, &term0, &term1 );
2149     sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 );
2150     while ( (sbits32) rem0 < 0 ) {
2151         --zSig0;
2152         doubleZSig0 -= 2;
2153         add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 );
2154     }
2155     zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 );
2156     if ( ( zSig1 & 0x1FF ) <= 5 ) {
2157         if ( zSig1 == 0 ) zSig1 = 1;
2158         mul32To64( doubleZSig0, zSig1, &term1, &term2 );
2159         sub64( rem1, 0, term1, term2, &rem1, &rem2 );
2160         mul32To64( zSig1, zSig1, &term2, &term3 );
2161         sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
2162         while ( (sbits32) rem1 < 0 ) {
2163             --zSig1;
2164             shortShift64Left( 0, zSig1, 1, &term2, &term3 );
2165             term3 |= 1;
2166             term2 |= doubleZSig0;
2167             add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
2168         }
2169         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
2170     }
2171     shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 );
2172     return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 );
2173 
2174 }
2175 #endif
2176 
2177 /*
2178 -------------------------------------------------------------------------------
2179 Returns 1 if the double-precision floating-point value `a' is equal to
2180 the corresponding value `b', and 0 otherwise.  The comparison is performed
2181 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2182 -------------------------------------------------------------------------------
2183 */
float64_eq(float64 a,float64 b)2184 flag float64_eq( float64 a, float64 b )
2185 {
2186 
2187     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2188               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2189          || (    ( extractFloat64Exp( b ) == 0x7FF )
2190               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2191        ) {
2192         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2193             float_raise( float_flag_invalid );
2194         }
2195         return 0;
2196     }
2197     return ( a == b ) ||
2198 	( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
2199 
2200 }
2201 
2202 /*
2203 -------------------------------------------------------------------------------
2204 Returns 1 if the double-precision floating-point value `a' is less than
2205 or equal to the corresponding value `b', and 0 otherwise.  The comparison
2206 is performed according to the IEC/IEEE Standard for Binary Floating-Point
2207 Arithmetic.
2208 -------------------------------------------------------------------------------
2209 */
float64_le(float64 a,float64 b)2210 flag float64_le( float64 a, float64 b )
2211 {
2212     flag aSign, bSign;
2213 
2214     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2215               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2216          || (    ( extractFloat64Exp( b ) == 0x7FF )
2217               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2218        ) {
2219         float_raise( float_flag_invalid );
2220         return 0;
2221     }
2222     aSign = extractFloat64Sign( a );
2223     bSign = extractFloat64Sign( b );
2224     if ( aSign != bSign )
2225 	return aSign ||
2226 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
2227 	      0 );
2228     return ( a == b ) ||
2229 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2230 }
2231 
2232 /*
2233 -------------------------------------------------------------------------------
2234 Returns 1 if the double-precision floating-point value `a' is less than
2235 the corresponding value `b', and 0 otherwise.  The comparison is performed
2236 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2237 -------------------------------------------------------------------------------
2238 */
float64_lt(float64 a,float64 b)2239 flag float64_lt( float64 a, float64 b )
2240 {
2241     flag aSign, bSign;
2242 
2243     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2244               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2245          || (    ( extractFloat64Exp( b ) == 0x7FF )
2246               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2247        ) {
2248         float_raise( float_flag_invalid );
2249         return 0;
2250     }
2251     aSign = extractFloat64Sign( a );
2252     bSign = extractFloat64Sign( b );
2253     if ( aSign != bSign )
2254 	return aSign &&
2255 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
2256 	      0 );
2257     return ( a != b ) &&
2258 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2259 
2260 }
2261 
2262 #ifndef SOFTFLOAT_FOR_GCC
2263 /*
2264 -------------------------------------------------------------------------------
2265 Returns 1 if the double-precision floating-point value `a' is equal to
2266 the corresponding value `b', and 0 otherwise.  The invalid exception is
2267 raised if either operand is a NaN.  Otherwise, the comparison is performed
2268 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2269 -------------------------------------------------------------------------------
2270 */
float64_eq_signaling(float64 a,float64 b)2271 flag float64_eq_signaling( float64 a, float64 b )
2272 {
2273 
2274     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2275               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2276          || (    ( extractFloat64Exp( b ) == 0x7FF )
2277               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2278        ) {
2279         float_raise( float_flag_invalid );
2280         return 0;
2281     }
2282     return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2283 
2284 }
2285 
2286 /*
2287 -------------------------------------------------------------------------------
2288 Returns 1 if the double-precision floating-point value `a' is less than or
2289 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2290 cause an exception.  Otherwise, the comparison is performed according to the
2291 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2292 -------------------------------------------------------------------------------
2293 */
float64_le_quiet(float64 a,float64 b)2294 flag float64_le_quiet( float64 a, float64 b )
2295 {
2296     flag aSign, bSign;
2297 
2298     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2299               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2300          || (    ( extractFloat64Exp( b ) == 0x7FF )
2301               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2302        ) {
2303         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2304             float_raise( float_flag_invalid );
2305         }
2306         return 0;
2307     }
2308     aSign = extractFloat64Sign( a );
2309     bSign = extractFloat64Sign( b );
2310     if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2311     return ( a == b ) || ( aSign ^ ( a < b ) );
2312 
2313 }
2314 
2315 /*
2316 -------------------------------------------------------------------------------
2317 Returns 1 if the double-precision floating-point value `a' is less than
2318 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2319 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2320 Standard for Binary Floating-Point Arithmetic.
2321 -------------------------------------------------------------------------------
2322 */
float64_lt_quiet(float64 a,float64 b)2323 flag float64_lt_quiet( float64 a, float64 b )
2324 {
2325     flag aSign, bSign;
2326 
2327     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2328               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2329          || (    ( extractFloat64Exp( b ) == 0x7FF )
2330               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2331        ) {
2332         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2333             float_raise( float_flag_invalid );
2334         }
2335         return 0;
2336     }
2337     aSign = extractFloat64Sign( a );
2338     bSign = extractFloat64Sign( b );
2339     if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
2340     return ( a != b ) && ( aSign ^ ( a < b ) );
2341 
2342 }
2343 
2344 #endif
2345