xref: /freebsd/lib/libc/softfloat/bits32/softfloat.c (revision 4fbb9c43aa44d9145151bb5f77d302ba01fb7551)
1 /* $NetBSD: softfloat.c,v 1.1 2002/05/21 23:51:07 bjh21 Exp $ */
2 
3 /*
4  * This version hacked for use with gcc -msoft-float by bjh21.
5  * (Mostly a case of #ifdefing out things GCC doesn't need or provides
6  *  itself).
7  */
8 
9 /*
10  * Things you may want to define:
11  *
12  * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
13  *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
14  *   properly renamed.
15  */
16 
17 /*
18  * This differs from the standard bits32/softfloat.c in that float64
19  * is defined to be a 64-bit integer rather than a structure.  The
20  * structure is float64s, with translation between the two going via
21  * float64u.
22  */
23 
24 /*
25 ===============================================================================
26 
27 This C source file is part of the SoftFloat IEC/IEEE Floating-Point
28 Arithmetic Package, Release 2a.
29 
30 Written by John R. Hauser.  This work was made possible in part by the
31 International Computer Science Institute, located at Suite 600, 1947 Center
32 Street, Berkeley, California 94704.  Funding was partially provided by the
33 National Science Foundation under grant MIP-9311980.  The original version
34 of this code was written as part of a project to build a fixed-point vector
35 processor in collaboration with the University of California at Berkeley,
36 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
37 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
38 arithmetic/SoftFloat.html'.
39 
40 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
41 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
42 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
43 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
44 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
45 
46 Derivative works are acceptable, even for commercial purposes, so long as
47 (1) they include prominent notice that the work is derivative, and (2) they
48 include prominent notice akin to these four paragraphs for those parts of
49 this code that are retained.
50 
51 ===============================================================================
52 */
53 
54 #include <sys/cdefs.h>
55 #ifdef SOFTFLOAT_FOR_GCC
56 #include "softfloat-for-gcc.h"
57 #endif
58 
59 #include "milieu.h"
60 #include "softfloat.h"
61 
62 /*
63  * Conversions between floats as stored in memory and floats as
64  * SoftFloat uses them
65  */
66 #ifndef FLOAT64_DEMANGLE
67 #define FLOAT64_DEMANGLE(a)	(a)
68 #endif
69 #ifndef FLOAT64_MANGLE
70 #define FLOAT64_MANGLE(a)	(a)
71 #endif
72 
73 /*
74 -------------------------------------------------------------------------------
75 Floating-point rounding mode and exception flags.
76 -------------------------------------------------------------------------------
77 */
78 int float_rounding_mode = float_round_nearest_even;
79 int float_exception_flags = 0;
80 
81 /*
82 -------------------------------------------------------------------------------
83 Primitive arithmetic functions, including multi-word arithmetic, and
84 division and square root approximations.  (Can be specialized to target if
85 desired.)
86 -------------------------------------------------------------------------------
87 */
88 #include "softfloat-macros"
89 
90 /*
91 -------------------------------------------------------------------------------
92 Functions and definitions to determine:  (1) whether tininess for underflow
93 is detected before or after rounding by default, (2) what (if anything)
94 happens when exceptions are raised, (3) how signaling NaNs are distinguished
95 from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs
96 are propagated from function inputs to output.  These details are target-
97 specific.
98 -------------------------------------------------------------------------------
99 */
100 #include "softfloat-specialize"
101 
102 /*
103 -------------------------------------------------------------------------------
104 Returns the fraction bits of the single-precision floating-point value `a'.
105 -------------------------------------------------------------------------------
106 */
107 INLINE bits32 extractFloat32Frac( float32 a )
108 {
109 
110     return a & 0x007FFFFF;
111 
112 }
113 
114 /*
115 -------------------------------------------------------------------------------
116 Returns the exponent bits of the single-precision floating-point value `a'.
117 -------------------------------------------------------------------------------
118 */
119 INLINE int16 extractFloat32Exp( float32 a )
120 {
121 
122     return ( a>>23 ) & 0xFF;
123 
124 }
125 
126 /*
127 -------------------------------------------------------------------------------
128 Returns the sign bit of the single-precision floating-point value `a'.
129 -------------------------------------------------------------------------------
130 */
131 INLINE flag extractFloat32Sign( float32 a )
132 {
133 
134     return a>>31;
135 
136 }
137 
138 /*
139 -------------------------------------------------------------------------------
140 Normalizes the subnormal single-precision floating-point value represented
141 by the denormalized significand `aSig'.  The normalized exponent and
142 significand are stored at the locations pointed to by `zExpPtr' and
143 `zSigPtr', respectively.
144 -------------------------------------------------------------------------------
145 */
146 static void
147  normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
148 {
149     int8 shiftCount;
150 
151     shiftCount = countLeadingZeros32( aSig ) - 8;
152     *zSigPtr = aSig<<shiftCount;
153     *zExpPtr = 1 - shiftCount;
154 
155 }
156 
157 /*
158 -------------------------------------------------------------------------------
159 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
160 single-precision floating-point value, returning the result.  After being
161 shifted into the proper positions, the three fields are simply added
162 together to form the result.  This means that any integer portion of `zSig'
163 will be added into the exponent.  Since a properly normalized significand
164 will have an integer portion equal to 1, the `zExp' input should be 1 less
165 than the desired result exponent whenever `zSig' is a complete, normalized
166 significand.
167 -------------------------------------------------------------------------------
168 */
169 INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
170 {
171 
172     return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
173 
174 }
175 
176 /*
177 -------------------------------------------------------------------------------
178 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
179 and significand `zSig', and returns the proper single-precision floating-
180 point value corresponding to the abstract input.  Ordinarily, the abstract
181 value is simply rounded and packed into the single-precision format, with
182 the inexact exception raised if the abstract input cannot be represented
183 exactly.  However, if the abstract value is too large, the overflow and
184 inexact exceptions are raised and an infinity or maximal finite value is
185 returned.  If the abstract value is too small, the input value is rounded to
186 a subnormal number, and the underflow and inexact exceptions are raised if
187 the abstract input cannot be represented exactly as a subnormal single-
188 precision floating-point number.
189     The input significand `zSig' has its binary point between bits 30
190 and 29, which is 7 bits to the left of the usual location.  This shifted
191 significand must be normalized or smaller.  If `zSig' is not normalized,
192 `zExp' must be 0; in that case, the result returned is a subnormal number,
193 and it must not require rounding.  In the usual case that `zSig' is
194 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
195 The handling of underflow and overflow follows the IEC/IEEE Standard for
196 Binary Floating-Point Arithmetic.
197 -------------------------------------------------------------------------------
198 */
199 static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
200 {
201     int8 roundingMode;
202     flag roundNearestEven;
203     int8 roundIncrement, roundBits;
204     flag isTiny;
205 
206     roundingMode = float_rounding_mode;
207     roundNearestEven = roundingMode == float_round_nearest_even;
208     roundIncrement = 0x40;
209     if ( ! roundNearestEven ) {
210         if ( roundingMode == float_round_to_zero ) {
211             roundIncrement = 0;
212         }
213         else {
214             roundIncrement = 0x7F;
215             if ( zSign ) {
216                 if ( roundingMode == float_round_up ) roundIncrement = 0;
217             }
218             else {
219                 if ( roundingMode == float_round_down ) roundIncrement = 0;
220             }
221         }
222     }
223     roundBits = zSig & 0x7F;
224     if ( 0xFD <= (bits16) zExp ) {
225         if (    ( 0xFD < zExp )
226              || (    ( zExp == 0xFD )
227                   && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
228            ) {
229             float_raise( float_flag_overflow | float_flag_inexact );
230             return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
231         }
232         if ( zExp < 0 ) {
233             isTiny =
234                    ( float_detect_tininess == float_tininess_before_rounding )
235                 || ( zExp < -1 )
236                 || ( zSig + roundIncrement < 0x80000000 );
237             shift32RightJamming( zSig, - zExp, &zSig );
238             zExp = 0;
239             roundBits = zSig & 0x7F;
240             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
241         }
242     }
243     if ( roundBits ) float_exception_flags |= float_flag_inexact;
244     zSig = ( zSig + roundIncrement )>>7;
245     zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
246     if ( zSig == 0 ) zExp = 0;
247     return packFloat32( zSign, zExp, zSig );
248 
249 }
250 
251 /*
252 -------------------------------------------------------------------------------
253 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
254 and significand `zSig', and returns the proper single-precision floating-
255 point value corresponding to the abstract input.  This routine is just like
256 `roundAndPackFloat32' except that `zSig' does not have to be normalized.
257 Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
258 floating-point exponent.
259 -------------------------------------------------------------------------------
260 */
261 static float32
262  normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
263 {
264     int8 shiftCount;
265 
266     shiftCount = countLeadingZeros32( zSig ) - 1;
267     return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
268 
269 }
270 
271 /*
272 -------------------------------------------------------------------------------
273 Returns the least-significant 32 fraction bits of the double-precision
274 floating-point value `a'.
275 -------------------------------------------------------------------------------
276 */
277 INLINE bits32 extractFloat64Frac1( float64 a )
278 {
279 
280     return FLOAT64_DEMANGLE(a) & LIT64( 0x00000000FFFFFFFF );
281 
282 }
283 
284 /*
285 -------------------------------------------------------------------------------
286 Returns the most-significant 20 fraction bits of the double-precision
287 floating-point value `a'.
288 -------------------------------------------------------------------------------
289 */
290 INLINE bits32 extractFloat64Frac0( float64 a )
291 {
292 
293     return ( FLOAT64_DEMANGLE(a)>>32 ) & 0x000FFFFF;
294 
295 }
296 
297 /*
298 -------------------------------------------------------------------------------
299 Returns the exponent bits of the double-precision floating-point value `a'.
300 -------------------------------------------------------------------------------
301 */
302 INLINE int16 extractFloat64Exp( float64 a )
303 {
304 
305     return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF;
306 
307 }
308 
309 /*
310 -------------------------------------------------------------------------------
311 Returns the sign bit of the double-precision floating-point value `a'.
312 -------------------------------------------------------------------------------
313 */
314 INLINE flag extractFloat64Sign( float64 a )
315 {
316 
317     return FLOAT64_DEMANGLE(a)>>63;
318 
319 }
320 
321 /*
322 -------------------------------------------------------------------------------
323 Normalizes the subnormal double-precision floating-point value represented
324 by the denormalized significand formed by the concatenation of `aSig0' and
325 `aSig1'.  The normalized exponent is stored at the location pointed to by
326 `zExpPtr'.  The most significant 21 bits of the normalized significand are
327 stored at the location pointed to by `zSig0Ptr', and the least significant
328 32 bits of the normalized significand are stored at the location pointed to
329 by `zSig1Ptr'.
330 -------------------------------------------------------------------------------
331 */
332 static void
333  normalizeFloat64Subnormal(
334      bits32 aSig0,
335      bits32 aSig1,
336      int16 *zExpPtr,
337      bits32 *zSig0Ptr,
338      bits32 *zSig1Ptr
339  )
340 {
341     int8 shiftCount;
342 
343     if ( aSig0 == 0 ) {
344         shiftCount = countLeadingZeros32( aSig1 ) - 11;
345         if ( shiftCount < 0 ) {
346             *zSig0Ptr = aSig1>>( - shiftCount );
347             *zSig1Ptr = aSig1<<( shiftCount & 31 );
348         }
349         else {
350             *zSig0Ptr = aSig1<<shiftCount;
351             *zSig1Ptr = 0;
352         }
353         *zExpPtr = - shiftCount - 31;
354     }
355     else {
356         shiftCount = countLeadingZeros32( aSig0 ) - 11;
357         shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
358         *zExpPtr = 1 - shiftCount;
359     }
360 
361 }
362 
363 /*
364 -------------------------------------------------------------------------------
365 Packs the sign `zSign', the exponent `zExp', and the significand formed by
366 the concatenation of `zSig0' and `zSig1' into a double-precision floating-
367 point value, returning the result.  After being shifted into the proper
368 positions, the three fields `zSign', `zExp', and `zSig0' are simply added
369 together to form the most significant 32 bits of the result.  This means
370 that any integer portion of `zSig0' will be added into the exponent.  Since
371 a properly normalized significand will have an integer portion equal to 1,
372 the `zExp' input should be 1 less than the desired result exponent whenever
373 `zSig0' and `zSig1' concatenated form a complete, normalized significand.
374 -------------------------------------------------------------------------------
375 */
376 INLINE float64
377  packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
378 {
379 
380     return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
381 			   ( ( (bits64) zExp )<<52 ) +
382 			   ( ( (bits64) zSig0 )<<32 ) + zSig1 );
383 
384 
385 }
386 
387 /*
388 -------------------------------------------------------------------------------
389 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
390 and extended significand formed by the concatenation of `zSig0', `zSig1',
391 and `zSig2', and returns the proper double-precision floating-point value
392 corresponding to the abstract input.  Ordinarily, the abstract value is
393 simply rounded and packed into the double-precision format, with the inexact
394 exception raised if the abstract input cannot be represented exactly.
395 However, if the abstract value is too large, the overflow and inexact
396 exceptions are raised and an infinity or maximal finite value is returned.
397 If the abstract value is too small, the input value is rounded to a
398 subnormal number, and the underflow and inexact exceptions are raised if the
399 abstract input cannot be represented exactly as a subnormal double-precision
400 floating-point number.
401     The input significand must be normalized or smaller.  If the input
402 significand is not normalized, `zExp' must be 0; in that case, the result
403 returned is a subnormal number, and it must not require rounding.  In the
404 usual case that the input significand is normalized, `zExp' must be 1 less
405 than the ``true'' floating-point exponent.  The handling of underflow and
406 overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
407 -------------------------------------------------------------------------------
408 */
409 static float64
410  roundAndPackFloat64(
411      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 )
412 {
413     int8 roundingMode;
414     flag roundNearestEven, increment, isTiny;
415 
416     roundingMode = float_rounding_mode;
417     roundNearestEven = ( roundingMode == float_round_nearest_even );
418     increment = ( (sbits32) zSig2 < 0 );
419     if ( ! roundNearestEven ) {
420         if ( roundingMode == float_round_to_zero ) {
421             increment = 0;
422         }
423         else {
424             if ( zSign ) {
425                 increment = ( roundingMode == float_round_down ) && zSig2;
426             }
427             else {
428                 increment = ( roundingMode == float_round_up ) && zSig2;
429             }
430         }
431     }
432     if ( 0x7FD <= (bits16) zExp ) {
433         if (    ( 0x7FD < zExp )
434              || (    ( zExp == 0x7FD )
435                   && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 )
436                   && increment
437                 )
438            ) {
439             float_raise( float_flag_overflow | float_flag_inexact );
440             if (    ( roundingMode == float_round_to_zero )
441                  || ( zSign && ( roundingMode == float_round_up ) )
442                  || ( ! zSign && ( roundingMode == float_round_down ) )
443                ) {
444                 return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF );
445             }
446             return packFloat64( zSign, 0x7FF, 0, 0 );
447         }
448         if ( zExp < 0 ) {
449             isTiny =
450                    ( float_detect_tininess == float_tininess_before_rounding )
451                 || ( zExp < -1 )
452                 || ! increment
453                 || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF );
454             shift64ExtraRightJamming(
455                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
456             zExp = 0;
457             if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
458             if ( roundNearestEven ) {
459                 increment = ( (sbits32) zSig2 < 0 );
460             }
461             else {
462                 if ( zSign ) {
463                     increment = ( roundingMode == float_round_down ) && zSig2;
464                 }
465                 else {
466                     increment = ( roundingMode == float_round_up ) && zSig2;
467                 }
468             }
469         }
470     }
471     if ( zSig2 ) float_exception_flags |= float_flag_inexact;
472     if ( increment ) {
473         add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
474         zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
475     }
476     else {
477         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
478     }
479     return packFloat64( zSign, zExp, zSig0, zSig1 );
480 
481 }
482 
483 /*
484 -------------------------------------------------------------------------------
485 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
486 and significand formed by the concatenation of `zSig0' and `zSig1', and
487 returns the proper double-precision floating-point value corresponding
488 to the abstract input.  This routine is just like `roundAndPackFloat64'
489 except that the input significand has fewer bits and does not have to be
490 normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
491 point exponent.
492 -------------------------------------------------------------------------------
493 */
494 static float64
495  normalizeRoundAndPackFloat64(
496      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
497 {
498     int8 shiftCount;
499     bits32 zSig2;
500 
501     if ( zSig0 == 0 ) {
502         zSig0 = zSig1;
503         zSig1 = 0;
504         zExp -= 32;
505     }
506     shiftCount = countLeadingZeros32( zSig0 ) - 11;
507     if ( 0 <= shiftCount ) {
508         zSig2 = 0;
509         shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
510     }
511     else {
512         shift64ExtraRightJamming(
513             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
514     }
515     zExp -= shiftCount;
516     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
517 
518 }
519 
520 /*
521 -------------------------------------------------------------------------------
522 Returns the result of converting the 32-bit two's complement integer `a' to
523 the single-precision floating-point format.  The conversion is performed
524 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
525 -------------------------------------------------------------------------------
526 */
527 float32 int32_to_float32( int32 a )
528 {
529     flag zSign;
530 
531     if ( a == 0 ) return 0;
532     if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
533     zSign = ( a < 0 );
534     return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
535 
536 }
537 
538 /*
539 -------------------------------------------------------------------------------
540 Returns the result of converting the 32-bit two's complement integer `a' to
541 the double-precision floating-point format.  The conversion is performed
542 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
543 -------------------------------------------------------------------------------
544 */
545 float64 int32_to_float64( int32 a )
546 {
547     flag zSign;
548     bits32 absA;
549     int8 shiftCount;
550     bits32 zSig0, zSig1;
551 
552     if ( a == 0 ) return packFloat64( 0, 0, 0, 0 );
553     zSign = ( a < 0 );
554     absA = zSign ? - a : a;
555     shiftCount = countLeadingZeros32( absA ) - 11;
556     if ( 0 <= shiftCount ) {
557         zSig0 = absA<<shiftCount;
558         zSig1 = 0;
559     }
560     else {
561         shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 );
562     }
563     return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 );
564 
565 }
566 
567 #ifndef SOFTFLOAT_FOR_GCC
568 /*
569 -------------------------------------------------------------------------------
570 Returns the result of converting the single-precision floating-point value
571 `a' to the 32-bit two's complement integer format.  The conversion is
572 performed according to the IEC/IEEE Standard for Binary Floating-Point
573 Arithmetic---which means in particular that the conversion is rounded
574 according to the current rounding mode.  If `a' is a NaN, the largest
575 positive integer is returned.  Otherwise, if the conversion overflows, the
576 largest integer with the same sign as `a' is returned.
577 -------------------------------------------------------------------------------
578 */
579 int32 float32_to_int32( float32 a )
580 {
581     flag aSign;
582     int16 aExp, shiftCount;
583     bits32 aSig, aSigExtra;
584     int32 z;
585     int8 roundingMode;
586 
587     aSig = extractFloat32Frac( a );
588     aExp = extractFloat32Exp( a );
589     aSign = extractFloat32Sign( a );
590     shiftCount = aExp - 0x96;
591     if ( 0 <= shiftCount ) {
592         if ( 0x9E <= aExp ) {
593             if ( a != 0xCF000000 ) {
594                 float_raise( float_flag_invalid );
595                 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
596                     return 0x7FFFFFFF;
597                 }
598             }
599             return (sbits32) 0x80000000;
600         }
601         z = ( aSig | 0x00800000 )<<shiftCount;
602         if ( aSign ) z = - z;
603     }
604     else {
605         if ( aExp < 0x7E ) {
606             aSigExtra = aExp | aSig;
607             z = 0;
608         }
609         else {
610             aSig |= 0x00800000;
611             aSigExtra = aSig<<( shiftCount & 31 );
612             z = aSig>>( - shiftCount );
613         }
614         if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
615         roundingMode = float_rounding_mode;
616         if ( roundingMode == float_round_nearest_even ) {
617             if ( (sbits32) aSigExtra < 0 ) {
618                 ++z;
619                 if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1;
620             }
621             if ( aSign ) z = - z;
622         }
623         else {
624             aSigExtra = ( aSigExtra != 0 );
625             if ( aSign ) {
626                 z += ( roundingMode == float_round_down ) & aSigExtra;
627                 z = - z;
628             }
629             else {
630                 z += ( roundingMode == float_round_up ) & aSigExtra;
631             }
632         }
633     }
634     return z;
635 
636 }
637 #endif
638 
639 /*
640 -------------------------------------------------------------------------------
641 Returns the result of converting the single-precision floating-point value
642 `a' to the 32-bit two's complement integer format.  The conversion is
643 performed according to the IEC/IEEE Standard for Binary Floating-Point
644 Arithmetic, except that the conversion is always rounded toward zero.
645 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
646 the conversion overflows, the largest integer with the same sign as `a' is
647 returned.
648 -------------------------------------------------------------------------------
649 */
650 int32 float32_to_int32_round_to_zero( float32 a )
651 {
652     flag aSign;
653     int16 aExp, shiftCount;
654     bits32 aSig;
655     int32 z;
656 
657     aSig = extractFloat32Frac( a );
658     aExp = extractFloat32Exp( a );
659     aSign = extractFloat32Sign( a );
660     shiftCount = aExp - 0x9E;
661     if ( 0 <= shiftCount ) {
662         if ( a != 0xCF000000 ) {
663             float_raise( float_flag_invalid );
664             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
665         }
666         return (sbits32) 0x80000000;
667     }
668     else if ( aExp <= 0x7E ) {
669         if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
670         return 0;
671     }
672     aSig = ( aSig | 0x00800000 )<<8;
673     z = aSig>>( - shiftCount );
674     if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
675         float_exception_flags |= float_flag_inexact;
676     }
677     if ( aSign ) z = - z;
678     return z;
679 
680 }
681 
682 /*
683 -------------------------------------------------------------------------------
684 Returns the result of converting the single-precision floating-point value
685 `a' to the double-precision floating-point format.  The conversion is
686 performed according to the IEC/IEEE Standard for Binary Floating-Point
687 Arithmetic.
688 -------------------------------------------------------------------------------
689 */
690 float64 float32_to_float64( float32 a )
691 {
692     flag aSign;
693     int16 aExp;
694     bits32 aSig, zSig0, zSig1;
695 
696     aSig = extractFloat32Frac( a );
697     aExp = extractFloat32Exp( a );
698     aSign = extractFloat32Sign( a );
699     if ( aExp == 0xFF ) {
700         if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
701         return packFloat64( aSign, 0x7FF, 0, 0 );
702     }
703     if ( aExp == 0 ) {
704         if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 );
705         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
706         --aExp;
707     }
708     shift64Right( aSig, 0, 3, &zSig0, &zSig1 );
709     return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 );
710 
711 }
712 
713 #ifndef SOFTFLOAT_FOR_GCC
714 /*
715 -------------------------------------------------------------------------------
716 Rounds the single-precision floating-point value `a' to an integer,
717 and returns the result as a single-precision floating-point value.  The
718 operation is performed according to the IEC/IEEE Standard for Binary
719 Floating-Point Arithmetic.
720 -------------------------------------------------------------------------------
721 */
722 float32 float32_round_to_int( float32 a )
723 {
724     flag aSign;
725     int16 aExp;
726     bits32 lastBitMask, roundBitsMask;
727     int8 roundingMode;
728     float32 z;
729 
730     aExp = extractFloat32Exp( a );
731     if ( 0x96 <= aExp ) {
732         if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
733             return propagateFloat32NaN( a, a );
734         }
735         return a;
736     }
737     if ( aExp <= 0x7E ) {
738         if ( (bits32) ( a<<1 ) == 0 ) return a;
739         float_exception_flags |= float_flag_inexact;
740         aSign = extractFloat32Sign( a );
741         switch ( float_rounding_mode ) {
742          case float_round_nearest_even:
743             if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
744                 return packFloat32( aSign, 0x7F, 0 );
745             }
746             break;
747 	 case float_round_to_zero:
748 	    break;
749          case float_round_down:
750             return aSign ? 0xBF800000 : 0;
751          case float_round_up:
752             return aSign ? 0x80000000 : 0x3F800000;
753         }
754         return packFloat32( aSign, 0, 0 );
755     }
756     lastBitMask = 1;
757     lastBitMask <<= 0x96 - aExp;
758     roundBitsMask = lastBitMask - 1;
759     z = a;
760     roundingMode = float_rounding_mode;
761     if ( roundingMode == float_round_nearest_even ) {
762         z += lastBitMask>>1;
763         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
764     }
765     else if ( roundingMode != float_round_to_zero ) {
766         if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
767             z += roundBitsMask;
768         }
769     }
770     z &= ~ roundBitsMask;
771     if ( z != a ) float_exception_flags |= float_flag_inexact;
772     return z;
773 
774 }
775 #endif
776 
777 /*
778 -------------------------------------------------------------------------------
779 Returns the result of adding the absolute values of the single-precision
780 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
781 before being returned.  `zSign' is ignored if the result is a NaN.
782 The addition is performed according to the IEC/IEEE Standard for Binary
783 Floating-Point Arithmetic.
784 -------------------------------------------------------------------------------
785 */
786 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
787 {
788     int16 aExp, bExp, zExp;
789     bits32 aSig, bSig, zSig;
790     int16 expDiff;
791 
792     aSig = extractFloat32Frac( a );
793     aExp = extractFloat32Exp( a );
794     bSig = extractFloat32Frac( b );
795     bExp = extractFloat32Exp( b );
796     expDiff = aExp - bExp;
797     aSig <<= 6;
798     bSig <<= 6;
799     if ( 0 < expDiff ) {
800         if ( aExp == 0xFF ) {
801             if ( aSig ) return propagateFloat32NaN( a, b );
802             return a;
803         }
804         if ( bExp == 0 ) {
805             --expDiff;
806         }
807         else {
808             bSig |= 0x20000000;
809         }
810         shift32RightJamming( bSig, expDiff, &bSig );
811         zExp = aExp;
812     }
813     else if ( expDiff < 0 ) {
814         if ( bExp == 0xFF ) {
815             if ( bSig ) return propagateFloat32NaN( a, b );
816             return packFloat32( zSign, 0xFF, 0 );
817         }
818         if ( aExp == 0 ) {
819             ++expDiff;
820         }
821         else {
822             aSig |= 0x20000000;
823         }
824         shift32RightJamming( aSig, - expDiff, &aSig );
825         zExp = bExp;
826     }
827     else {
828         if ( aExp == 0xFF ) {
829             if ( aSig | bSig ) return propagateFloat32NaN( a, b );
830             return a;
831         }
832         if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
833         zSig = 0x40000000 + aSig + bSig;
834         zExp = aExp;
835         goto roundAndPack;
836     }
837     aSig |= 0x20000000;
838     zSig = ( aSig + bSig )<<1;
839     --zExp;
840     if ( (sbits32) zSig < 0 ) {
841         zSig = aSig + bSig;
842         ++zExp;
843     }
844  roundAndPack:
845     return roundAndPackFloat32( zSign, zExp, zSig );
846 
847 }
848 
849 /*
850 -------------------------------------------------------------------------------
851 Returns the result of subtracting the absolute values of the single-
852 precision floating-point values `a' and `b'.  If `zSign' is 1, the
853 difference is negated before being returned.  `zSign' is ignored if the
854 result is a NaN.  The subtraction is performed according to the IEC/IEEE
855 Standard for Binary Floating-Point Arithmetic.
856 -------------------------------------------------------------------------------
857 */
858 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
859 {
860     int16 aExp, bExp, zExp;
861     bits32 aSig, bSig, zSig;
862     int16 expDiff;
863 
864     aSig = extractFloat32Frac( a );
865     aExp = extractFloat32Exp( a );
866     bSig = extractFloat32Frac( b );
867     bExp = extractFloat32Exp( b );
868     expDiff = aExp - bExp;
869     aSig <<= 7;
870     bSig <<= 7;
871     if ( 0 < expDiff ) goto aExpBigger;
872     if ( expDiff < 0 ) goto bExpBigger;
873     if ( aExp == 0xFF ) {
874         if ( aSig | bSig ) return propagateFloat32NaN( a, b );
875         float_raise( float_flag_invalid );
876         return float32_default_nan;
877     }
878     if ( aExp == 0 ) {
879         aExp = 1;
880         bExp = 1;
881     }
882     if ( bSig < aSig ) goto aBigger;
883     if ( aSig < bSig ) goto bBigger;
884     return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
885  bExpBigger:
886     if ( bExp == 0xFF ) {
887         if ( bSig ) return propagateFloat32NaN( a, b );
888         return packFloat32( zSign ^ 1, 0xFF, 0 );
889     }
890     if ( aExp == 0 ) {
891         ++expDiff;
892     }
893     else {
894         aSig |= 0x40000000;
895     }
896     shift32RightJamming( aSig, - expDiff, &aSig );
897     bSig |= 0x40000000;
898  bBigger:
899     zSig = bSig - aSig;
900     zExp = bExp;
901     zSign ^= 1;
902     goto normalizeRoundAndPack;
903  aExpBigger:
904     if ( aExp == 0xFF ) {
905         if ( aSig ) return propagateFloat32NaN( a, b );
906         return a;
907     }
908     if ( bExp == 0 ) {
909         --expDiff;
910     }
911     else {
912         bSig |= 0x40000000;
913     }
914     shift32RightJamming( bSig, expDiff, &bSig );
915     aSig |= 0x40000000;
916  aBigger:
917     zSig = aSig - bSig;
918     zExp = aExp;
919  normalizeRoundAndPack:
920     --zExp;
921     return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
922 
923 }
924 
925 /*
926 -------------------------------------------------------------------------------
927 Returns the result of adding the single-precision floating-point values `a'
928 and `b'.  The operation is performed according to the IEC/IEEE Standard for
929 Binary Floating-Point Arithmetic.
930 -------------------------------------------------------------------------------
931 */
932 float32 float32_add( float32 a, float32 b )
933 {
934     flag aSign, bSign;
935 
936     aSign = extractFloat32Sign( a );
937     bSign = extractFloat32Sign( b );
938     if ( aSign == bSign ) {
939         return addFloat32Sigs( a, b, aSign );
940     }
941     else {
942         return subFloat32Sigs( a, b, aSign );
943     }
944 
945 }
946 
947 /*
948 -------------------------------------------------------------------------------
949 Returns the result of subtracting the single-precision floating-point values
950 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
951 for Binary Floating-Point Arithmetic.
952 -------------------------------------------------------------------------------
953 */
954 float32 float32_sub( float32 a, float32 b )
955 {
956     flag aSign, bSign;
957 
958     aSign = extractFloat32Sign( a );
959     bSign = extractFloat32Sign( b );
960     if ( aSign == bSign ) {
961         return subFloat32Sigs( a, b, aSign );
962     }
963     else {
964         return addFloat32Sigs( a, b, aSign );
965     }
966 
967 }
968 
969 /*
970 -------------------------------------------------------------------------------
971 Returns the result of multiplying the single-precision floating-point values
972 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
973 for Binary Floating-Point Arithmetic.
974 -------------------------------------------------------------------------------
975 */
976 float32 float32_mul( float32 a, float32 b )
977 {
978     flag aSign, bSign, zSign;
979     int16 aExp, bExp, zExp;
980     bits32 aSig, bSig, zSig0, zSig1;
981 
982     aSig = extractFloat32Frac( a );
983     aExp = extractFloat32Exp( a );
984     aSign = extractFloat32Sign( a );
985     bSig = extractFloat32Frac( b );
986     bExp = extractFloat32Exp( b );
987     bSign = extractFloat32Sign( b );
988     zSign = aSign ^ bSign;
989     if ( aExp == 0xFF ) {
990         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
991             return propagateFloat32NaN( a, b );
992         }
993         if ( ( bExp | bSig ) == 0 ) {
994             float_raise( float_flag_invalid );
995             return float32_default_nan;
996         }
997         return packFloat32( zSign, 0xFF, 0 );
998     }
999     if ( bExp == 0xFF ) {
1000         if ( bSig ) return propagateFloat32NaN( a, b );
1001         if ( ( aExp | aSig ) == 0 ) {
1002             float_raise( float_flag_invalid );
1003             return float32_default_nan;
1004         }
1005         return packFloat32( zSign, 0xFF, 0 );
1006     }
1007     if ( aExp == 0 ) {
1008         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1009         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1010     }
1011     if ( bExp == 0 ) {
1012         if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1013         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1014     }
1015     zExp = aExp + bExp - 0x7F;
1016     aSig = ( aSig | 0x00800000 )<<7;
1017     bSig = ( bSig | 0x00800000 )<<8;
1018     mul32To64( aSig, bSig, &zSig0, &zSig1 );
1019     zSig0 |= ( zSig1 != 0 );
1020     if ( 0 <= (sbits32) ( zSig0<<1 ) ) {
1021         zSig0 <<= 1;
1022         --zExp;
1023     }
1024     return roundAndPackFloat32( zSign, zExp, zSig0 );
1025 
1026 }
1027 
1028 /*
1029 -------------------------------------------------------------------------------
1030 Returns the result of dividing the single-precision floating-point value `a'
1031 by the corresponding value `b'.  The operation is performed according to the
1032 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1033 -------------------------------------------------------------------------------
1034 */
1035 float32 float32_div( float32 a, float32 b )
1036 {
1037     flag aSign, bSign, zSign;
1038     int16 aExp, bExp, zExp;
1039     bits32 aSig, bSig, zSig, rem0, rem1, term0, term1;
1040 
1041     aSig = extractFloat32Frac( a );
1042     aExp = extractFloat32Exp( a );
1043     aSign = extractFloat32Sign( a );
1044     bSig = extractFloat32Frac( b );
1045     bExp = extractFloat32Exp( b );
1046     bSign = extractFloat32Sign( b );
1047     zSign = aSign ^ bSign;
1048     if ( aExp == 0xFF ) {
1049         if ( aSig ) return propagateFloat32NaN( a, b );
1050         if ( bExp == 0xFF ) {
1051             if ( bSig ) return propagateFloat32NaN( a, b );
1052             float_raise( float_flag_invalid );
1053             return float32_default_nan;
1054         }
1055         return packFloat32( zSign, 0xFF, 0 );
1056     }
1057     if ( bExp == 0xFF ) {
1058         if ( bSig ) return propagateFloat32NaN( a, b );
1059         return packFloat32( zSign, 0, 0 );
1060     }
1061     if ( bExp == 0 ) {
1062         if ( bSig == 0 ) {
1063             if ( ( aExp | aSig ) == 0 ) {
1064                 float_raise( float_flag_invalid );
1065                 return float32_default_nan;
1066             }
1067             float_raise( float_flag_divbyzero );
1068             return packFloat32( zSign, 0xFF, 0 );
1069         }
1070         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1071     }
1072     if ( aExp == 0 ) {
1073         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1074         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1075     }
1076     zExp = aExp - bExp + 0x7D;
1077     aSig = ( aSig | 0x00800000 )<<7;
1078     bSig = ( bSig | 0x00800000 )<<8;
1079     if ( bSig <= ( aSig + aSig ) ) {
1080         aSig >>= 1;
1081         ++zExp;
1082     }
1083     zSig = estimateDiv64To32( aSig, 0, bSig );
1084     if ( ( zSig & 0x3F ) <= 2 ) {
1085         mul32To64( bSig, zSig, &term0, &term1 );
1086         sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1087         while ( (sbits32) rem0 < 0 ) {
1088             --zSig;
1089             add64( rem0, rem1, 0, bSig, &rem0, &rem1 );
1090         }
1091         zSig |= ( rem1 != 0 );
1092     }
1093     return roundAndPackFloat32( zSign, zExp, zSig );
1094 
1095 }
1096 
1097 #ifndef SOFTFLOAT_FOR_GCC
1098 /*
1099 -------------------------------------------------------------------------------
1100 Returns the remainder of the single-precision floating-point value `a'
1101 with respect to the corresponding value `b'.  The operation is performed
1102 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1103 -------------------------------------------------------------------------------
1104 */
1105 float32 float32_rem( float32 a, float32 b )
1106 {
1107     flag aSign, bSign, zSign;
1108     int16 aExp, bExp, expDiff;
1109     bits32 aSig, bSig, q, allZero, alternateASig;
1110     sbits32 sigMean;
1111 
1112     aSig = extractFloat32Frac( a );
1113     aExp = extractFloat32Exp( a );
1114     aSign = extractFloat32Sign( a );
1115     bSig = extractFloat32Frac( b );
1116     bExp = extractFloat32Exp( b );
1117     bSign = extractFloat32Sign( b );
1118     if ( aExp == 0xFF ) {
1119         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1120             return propagateFloat32NaN( a, b );
1121         }
1122         float_raise( float_flag_invalid );
1123         return float32_default_nan;
1124     }
1125     if ( bExp == 0xFF ) {
1126         if ( bSig ) return propagateFloat32NaN( a, b );
1127         return a;
1128     }
1129     if ( bExp == 0 ) {
1130         if ( bSig == 0 ) {
1131             float_raise( float_flag_invalid );
1132             return float32_default_nan;
1133         }
1134         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1135     }
1136     if ( aExp == 0 ) {
1137         if ( aSig == 0 ) return a;
1138         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1139     }
1140     expDiff = aExp - bExp;
1141     aSig = ( aSig | 0x00800000 )<<8;
1142     bSig = ( bSig | 0x00800000 )<<8;
1143     if ( expDiff < 0 ) {
1144         if ( expDiff < -1 ) return a;
1145         aSig >>= 1;
1146     }
1147     q = ( bSig <= aSig );
1148     if ( q ) aSig -= bSig;
1149     expDiff -= 32;
1150     while ( 0 < expDiff ) {
1151         q = estimateDiv64To32( aSig, 0, bSig );
1152         q = ( 2 < q ) ? q - 2 : 0;
1153         aSig = - ( ( bSig>>2 ) * q );
1154         expDiff -= 30;
1155     }
1156     expDiff += 32;
1157     if ( 0 < expDiff ) {
1158         q = estimateDiv64To32( aSig, 0, bSig );
1159         q = ( 2 < q ) ? q - 2 : 0;
1160         q >>= 32 - expDiff;
1161         bSig >>= 2;
1162         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1163     }
1164     else {
1165         aSig >>= 2;
1166         bSig >>= 2;
1167     }
1168     do {
1169         alternateASig = aSig;
1170         ++q;
1171         aSig -= bSig;
1172     } while ( 0 <= (sbits32) aSig );
1173     sigMean = aSig + alternateASig;
1174     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1175         aSig = alternateASig;
1176     }
1177     zSign = ( (sbits32) aSig < 0 );
1178     if ( zSign ) aSig = - aSig;
1179     return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
1180 
1181 }
1182 #endif
1183 
1184 #ifndef SOFTFLOAT_FOR_GCC
1185 /*
1186 -------------------------------------------------------------------------------
1187 Returns the square root of the single-precision floating-point value `a'.
1188 The operation is performed according to the IEC/IEEE Standard for Binary
1189 Floating-Point Arithmetic.
1190 -------------------------------------------------------------------------------
1191 */
1192 float32 float32_sqrt( float32 a )
1193 {
1194     flag aSign;
1195     int16 aExp, zExp;
1196     bits32 aSig, zSig, rem0, rem1, term0, term1;
1197 
1198     aSig = extractFloat32Frac( a );
1199     aExp = extractFloat32Exp( a );
1200     aSign = extractFloat32Sign( a );
1201     if ( aExp == 0xFF ) {
1202         if ( aSig ) return propagateFloat32NaN( a, 0 );
1203         if ( ! aSign ) return a;
1204         float_raise( float_flag_invalid );
1205         return float32_default_nan;
1206     }
1207     if ( aSign ) {
1208         if ( ( aExp | aSig ) == 0 ) return a;
1209         float_raise( float_flag_invalid );
1210         return float32_default_nan;
1211     }
1212     if ( aExp == 0 ) {
1213         if ( aSig == 0 ) return 0;
1214         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1215     }
1216     zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
1217     aSig = ( aSig | 0x00800000 )<<8;
1218     zSig = estimateSqrt32( aExp, aSig ) + 2;
1219     if ( ( zSig & 0x7F ) <= 5 ) {
1220         if ( zSig < 2 ) {
1221             zSig = 0x7FFFFFFF;
1222             goto roundAndPack;
1223         }
1224         else {
1225             aSig >>= aExp & 1;
1226             mul32To64( zSig, zSig, &term0, &term1 );
1227             sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1228             while ( (sbits32) rem0 < 0 ) {
1229                 --zSig;
1230                 shortShift64Left( 0, zSig, 1, &term0, &term1 );
1231                 term1 |= 1;
1232                 add64( rem0, rem1, term0, term1, &rem0, &rem1 );
1233             }
1234             zSig |= ( ( rem0 | rem1 ) != 0 );
1235         }
1236     }
1237     shift32RightJamming( zSig, 1, &zSig );
1238  roundAndPack:
1239     return roundAndPackFloat32( 0, zExp, zSig );
1240 
1241 }
1242 #endif
1243 
1244 /*
1245 -------------------------------------------------------------------------------
1246 Returns 1 if the single-precision floating-point value `a' is equal to
1247 the corresponding value `b', and 0 otherwise.  The comparison is performed
1248 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1249 -------------------------------------------------------------------------------
1250 */
1251 flag float32_eq( float32 a, float32 b )
1252 {
1253 
1254     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1255          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1256        ) {
1257         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1258             float_raise( float_flag_invalid );
1259         }
1260         return 0;
1261     }
1262     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1263 
1264 }
1265 
1266 /*
1267 -------------------------------------------------------------------------------
1268 Returns 1 if the single-precision floating-point value `a' is less than
1269 or equal to the corresponding value `b', and 0 otherwise.  The comparison
1270 is performed according to the IEC/IEEE Standard for Binary Floating-Point
1271 Arithmetic.
1272 -------------------------------------------------------------------------------
1273 */
1274 flag float32_le( float32 a, float32 b )
1275 {
1276     flag aSign, bSign;
1277 
1278     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1279          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1280        ) {
1281         float_raise( float_flag_invalid );
1282         return 0;
1283     }
1284     aSign = extractFloat32Sign( a );
1285     bSign = extractFloat32Sign( b );
1286     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1287     return ( a == b ) || ( aSign ^ ( a < b ) );
1288 
1289 }
1290 
1291 /*
1292 -------------------------------------------------------------------------------
1293 Returns 1 if the single-precision floating-point value `a' is less than
1294 the corresponding value `b', and 0 otherwise.  The comparison is performed
1295 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1296 -------------------------------------------------------------------------------
1297 */
1298 flag float32_lt( float32 a, float32 b )
1299 {
1300     flag aSign, bSign;
1301 
1302     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1303          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1304        ) {
1305         float_raise( float_flag_invalid );
1306         return 0;
1307     }
1308     aSign = extractFloat32Sign( a );
1309     bSign = extractFloat32Sign( b );
1310     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1311     return ( a != b ) && ( aSign ^ ( a < b ) );
1312 
1313 }
1314 
1315 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1316 /*
1317 -------------------------------------------------------------------------------
1318 Returns 1 if the single-precision floating-point value `a' is equal to
1319 the corresponding value `b', and 0 otherwise.  The invalid exception is
1320 raised if either operand is a NaN.  Otherwise, the comparison is performed
1321 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1322 -------------------------------------------------------------------------------
1323 */
1324 flag float32_eq_signaling( float32 a, float32 b )
1325 {
1326 
1327     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1328          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1329        ) {
1330         float_raise( float_flag_invalid );
1331         return 0;
1332     }
1333     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1334 
1335 }
1336 
1337 /*
1338 -------------------------------------------------------------------------------
1339 Returns 1 if the single-precision floating-point value `a' is less than or
1340 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
1341 cause an exception.  Otherwise, the comparison is performed according to the
1342 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1343 -------------------------------------------------------------------------------
1344 */
1345 flag float32_le_quiet( float32 a, float32 b )
1346 {
1347     flag aSign, bSign;
1348     int16 aExp, bExp;
1349 
1350     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1351          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1352        ) {
1353         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1354             float_raise( float_flag_invalid );
1355         }
1356         return 0;
1357     }
1358     aSign = extractFloat32Sign( a );
1359     bSign = extractFloat32Sign( b );
1360     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1361     return ( a == b ) || ( aSign ^ ( a < b ) );
1362 
1363 }
1364 
1365 /*
1366 -------------------------------------------------------------------------------
1367 Returns 1 if the single-precision floating-point value `a' is less than
1368 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
1369 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
1370 Standard for Binary Floating-Point Arithmetic.
1371 -------------------------------------------------------------------------------
1372 */
1373 flag float32_lt_quiet( float32 a, float32 b )
1374 {
1375     flag aSign, bSign;
1376 
1377     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1378          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1379        ) {
1380         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1381             float_raise( float_flag_invalid );
1382         }
1383         return 0;
1384     }
1385     aSign = extractFloat32Sign( a );
1386     bSign = extractFloat32Sign( b );
1387     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1388     return ( a != b ) && ( aSign ^ ( a < b ) );
1389 
1390 }
1391 #endif /* !SOFTFLOAT_FOR_GCC */
1392 
1393 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1394 /*
1395 -------------------------------------------------------------------------------
1396 Returns the result of converting the double-precision floating-point value
1397 `a' to the 32-bit two's complement integer format.  The conversion is
1398 performed according to the IEC/IEEE Standard for Binary Floating-Point
1399 Arithmetic---which means in particular that the conversion is rounded
1400 according to the current rounding mode.  If `a' is a NaN, the largest
1401 positive integer is returned.  Otherwise, if the conversion overflows, the
1402 largest integer with the same sign as `a' is returned.
1403 -------------------------------------------------------------------------------
1404 */
1405 int32 float64_to_int32( float64 a )
1406 {
1407     flag aSign;
1408     int16 aExp, shiftCount;
1409     bits32 aSig0, aSig1, absZ, aSigExtra;
1410     int32 z;
1411     int8 roundingMode;
1412 
1413     aSig1 = extractFloat64Frac1( a );
1414     aSig0 = extractFloat64Frac0( a );
1415     aExp = extractFloat64Exp( a );
1416     aSign = extractFloat64Sign( a );
1417     shiftCount = aExp - 0x413;
1418     if ( 0 <= shiftCount ) {
1419         if ( 0x41E < aExp ) {
1420             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1421             goto invalid;
1422         }
1423         shortShift64Left(
1424             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1425         if ( 0x80000000 < absZ ) goto invalid;
1426     }
1427     else {
1428         aSig1 = ( aSig1 != 0 );
1429         if ( aExp < 0x3FE ) {
1430             aSigExtra = aExp | aSig0 | aSig1;
1431             absZ = 0;
1432         }
1433         else {
1434             aSig0 |= 0x00100000;
1435             aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1436             absZ = aSig0>>( - shiftCount );
1437         }
1438     }
1439     roundingMode = float_rounding_mode;
1440     if ( roundingMode == float_round_nearest_even ) {
1441         if ( (sbits32) aSigExtra < 0 ) {
1442             ++absZ;
1443             if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1;
1444         }
1445         z = aSign ? - absZ : absZ;
1446     }
1447     else {
1448         aSigExtra = ( aSigExtra != 0 );
1449         if ( aSign ) {
1450             z = - (   absZ
1451                     + ( ( roundingMode == float_round_down ) & aSigExtra ) );
1452         }
1453         else {
1454             z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra );
1455         }
1456     }
1457     if ( ( aSign ^ ( z < 0 ) ) && z ) {
1458  invalid:
1459         float_raise( float_flag_invalid );
1460         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1461     }
1462     if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
1463     return z;
1464 
1465 }
1466 #endif /* !SOFTFLOAT_FOR_GCC */
1467 
1468 /*
1469 -------------------------------------------------------------------------------
1470 Returns the result of converting the double-precision floating-point value
1471 `a' to the 32-bit two's complement integer format.  The conversion is
1472 performed according to the IEC/IEEE Standard for Binary Floating-Point
1473 Arithmetic, except that the conversion is always rounded toward zero.
1474 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1475 the conversion overflows, the largest integer with the same sign as `a' is
1476 returned.
1477 -------------------------------------------------------------------------------
1478 */
1479 int32 float64_to_int32_round_to_zero( float64 a )
1480 {
1481     flag aSign;
1482     int16 aExp, shiftCount;
1483     bits32 aSig0, aSig1, absZ, aSigExtra;
1484     int32 z;
1485 
1486     aSig1 = extractFloat64Frac1( a );
1487     aSig0 = extractFloat64Frac0( a );
1488     aExp = extractFloat64Exp( a );
1489     aSign = extractFloat64Sign( a );
1490     shiftCount = aExp - 0x413;
1491     if ( 0 <= shiftCount ) {
1492         if ( 0x41E < aExp ) {
1493             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1494             goto invalid;
1495         }
1496         shortShift64Left(
1497             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1498     }
1499     else {
1500         if ( aExp < 0x3FF ) {
1501             if ( aExp | aSig0 | aSig1 ) {
1502                 float_exception_flags |= float_flag_inexact;
1503             }
1504             return 0;
1505         }
1506         aSig0 |= 0x00100000;
1507         aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1508         absZ = aSig0>>( - shiftCount );
1509     }
1510     z = aSign ? - absZ : absZ;
1511     if ( ( aSign ^ ( z < 0 ) ) && z ) {
1512  invalid:
1513         float_raise( float_flag_invalid );
1514         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1515     }
1516     if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
1517     return z;
1518 
1519 }
1520 
1521 /*
1522 -------------------------------------------------------------------------------
1523 Returns the result of converting the double-precision floating-point value
1524 `a' to the single-precision floating-point format.  The conversion is
1525 performed according to the IEC/IEEE Standard for Binary Floating-Point
1526 Arithmetic.
1527 -------------------------------------------------------------------------------
1528 */
1529 float32 float64_to_float32( float64 a )
1530 {
1531     flag aSign;
1532     int16 aExp;
1533     bits32 aSig0, aSig1, zSig;
1534     bits32 allZero;
1535 
1536     aSig1 = extractFloat64Frac1( a );
1537     aSig0 = extractFloat64Frac0( a );
1538     aExp = extractFloat64Exp( a );
1539     aSign = extractFloat64Sign( a );
1540     if ( aExp == 0x7FF ) {
1541         if ( aSig0 | aSig1 ) {
1542             return commonNaNToFloat32( float64ToCommonNaN( a ) );
1543         }
1544         return packFloat32( aSign, 0xFF, 0 );
1545     }
1546     shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig );
1547     if ( aExp ) zSig |= 0x40000000;
1548     return roundAndPackFloat32( aSign, aExp - 0x381, zSig );
1549 
1550 }
1551 
1552 #ifndef SOFTFLOAT_FOR_GCC
1553 /*
1554 -------------------------------------------------------------------------------
1555 Rounds the double-precision floating-point value `a' to an integer,
1556 and returns the result as a double-precision floating-point value.  The
1557 operation is performed according to the IEC/IEEE Standard for Binary
1558 Floating-Point Arithmetic.
1559 -------------------------------------------------------------------------------
1560 */
1561 float64 float64_round_to_int( float64 a )
1562 {
1563     flag aSign;
1564     int16 aExp;
1565     bits32 lastBitMask, roundBitsMask;
1566     int8 roundingMode;
1567     float64 z;
1568 
1569     aExp = extractFloat64Exp( a );
1570     if ( 0x413 <= aExp ) {
1571         if ( 0x433 <= aExp ) {
1572             if (    ( aExp == 0x7FF )
1573                  && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) {
1574                 return propagateFloat64NaN( a, a );
1575             }
1576             return a;
1577         }
1578         lastBitMask = 1;
1579         lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1;
1580         roundBitsMask = lastBitMask - 1;
1581         z = a;
1582         roundingMode = float_rounding_mode;
1583         if ( roundingMode == float_round_nearest_even ) {
1584             if ( lastBitMask ) {
1585                 add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
1586                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
1587             }
1588             else {
1589                 if ( (sbits32) z.low < 0 ) {
1590                     ++z.high;
1591                     if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1;
1592                 }
1593             }
1594         }
1595         else if ( roundingMode != float_round_to_zero ) {
1596             if (   extractFloat64Sign( z )
1597                  ^ ( roundingMode == float_round_up ) ) {
1598                 add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
1599             }
1600         }
1601         z.low &= ~ roundBitsMask;
1602     }
1603     else {
1604         if ( aExp <= 0x3FE ) {
1605             if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
1606             float_exception_flags |= float_flag_inexact;
1607             aSign = extractFloat64Sign( a );
1608             switch ( float_rounding_mode ) {
1609              case float_round_nearest_even:
1610                 if (    ( aExp == 0x3FE )
1611                      && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) )
1612                    ) {
1613                     return packFloat64( aSign, 0x3FF, 0, 0 );
1614                 }
1615                 break;
1616              case float_round_down:
1617                 return
1618                       aSign ? packFloat64( 1, 0x3FF, 0, 0 )
1619                     : packFloat64( 0, 0, 0, 0 );
1620              case float_round_up:
1621                 return
1622                       aSign ? packFloat64( 1, 0, 0, 0 )
1623                     : packFloat64( 0, 0x3FF, 0, 0 );
1624             }
1625             return packFloat64( aSign, 0, 0, 0 );
1626         }
1627         lastBitMask = 1;
1628         lastBitMask <<= 0x413 - aExp;
1629         roundBitsMask = lastBitMask - 1;
1630         z.low = 0;
1631         z.high = a.high;
1632         roundingMode = float_rounding_mode;
1633         if ( roundingMode == float_round_nearest_even ) {
1634             z.high += lastBitMask>>1;
1635             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
1636                 z.high &= ~ lastBitMask;
1637             }
1638         }
1639         else if ( roundingMode != float_round_to_zero ) {
1640             if (   extractFloat64Sign( z )
1641                  ^ ( roundingMode == float_round_up ) ) {
1642                 z.high |= ( a.low != 0 );
1643                 z.high += roundBitsMask;
1644             }
1645         }
1646         z.high &= ~ roundBitsMask;
1647     }
1648     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
1649         float_exception_flags |= float_flag_inexact;
1650     }
1651     return z;
1652 
1653 }
1654 #endif
1655 
1656 /*
1657 -------------------------------------------------------------------------------
1658 Returns the result of adding the absolute values of the double-precision
1659 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1660 before being returned.  `zSign' is ignored if the result is a NaN.
1661 The addition is performed according to the IEC/IEEE Standard for Binary
1662 Floating-Point Arithmetic.
1663 -------------------------------------------------------------------------------
1664 */
1665 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
1666 {
1667     int16 aExp, bExp, zExp;
1668     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1669     int16 expDiff;
1670 
1671     aSig1 = extractFloat64Frac1( a );
1672     aSig0 = extractFloat64Frac0( a );
1673     aExp = extractFloat64Exp( a );
1674     bSig1 = extractFloat64Frac1( b );
1675     bSig0 = extractFloat64Frac0( b );
1676     bExp = extractFloat64Exp( b );
1677     expDiff = aExp - bExp;
1678     if ( 0 < expDiff ) {
1679         if ( aExp == 0x7FF ) {
1680             if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1681             return a;
1682         }
1683         if ( bExp == 0 ) {
1684             --expDiff;
1685         }
1686         else {
1687             bSig0 |= 0x00100000;
1688         }
1689         shift64ExtraRightJamming(
1690             bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
1691         zExp = aExp;
1692     }
1693     else if ( expDiff < 0 ) {
1694         if ( bExp == 0x7FF ) {
1695             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1696             return packFloat64( zSign, 0x7FF, 0, 0 );
1697         }
1698         if ( aExp == 0 ) {
1699             ++expDiff;
1700         }
1701         else {
1702             aSig0 |= 0x00100000;
1703         }
1704         shift64ExtraRightJamming(
1705             aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
1706         zExp = bExp;
1707     }
1708     else {
1709         if ( aExp == 0x7FF ) {
1710             if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1711                 return propagateFloat64NaN( a, b );
1712             }
1713             return a;
1714         }
1715         add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1716         if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 );
1717         zSig2 = 0;
1718         zSig0 |= 0x00200000;
1719         zExp = aExp;
1720         goto shiftRight1;
1721     }
1722     aSig0 |= 0x00100000;
1723     add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1724     --zExp;
1725     if ( zSig0 < 0x00200000 ) goto roundAndPack;
1726     ++zExp;
1727  shiftRight1:
1728     shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1729  roundAndPack:
1730     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1731 
1732 }
1733 
1734 /*
1735 -------------------------------------------------------------------------------
1736 Returns the result of subtracting the absolute values of the double-
1737 precision floating-point values `a' and `b'.  If `zSign' is 1, the
1738 difference is negated before being returned.  `zSign' is ignored if the
1739 result is a NaN.  The subtraction is performed according to the IEC/IEEE
1740 Standard for Binary Floating-Point Arithmetic.
1741 -------------------------------------------------------------------------------
1742 */
1743 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
1744 {
1745     int16 aExp, bExp, zExp;
1746     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
1747     int16 expDiff;
1748 
1749     aSig1 = extractFloat64Frac1( a );
1750     aSig0 = extractFloat64Frac0( a );
1751     aExp = extractFloat64Exp( a );
1752     bSig1 = extractFloat64Frac1( b );
1753     bSig0 = extractFloat64Frac0( b );
1754     bExp = extractFloat64Exp( b );
1755     expDiff = aExp - bExp;
1756     shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 );
1757     shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 );
1758     if ( 0 < expDiff ) goto aExpBigger;
1759     if ( expDiff < 0 ) goto bExpBigger;
1760     if ( aExp == 0x7FF ) {
1761         if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1762             return propagateFloat64NaN( a, b );
1763         }
1764         float_raise( float_flag_invalid );
1765         return float64_default_nan;
1766     }
1767     if ( aExp == 0 ) {
1768         aExp = 1;
1769         bExp = 1;
1770     }
1771     if ( bSig0 < aSig0 ) goto aBigger;
1772     if ( aSig0 < bSig0 ) goto bBigger;
1773     if ( bSig1 < aSig1 ) goto aBigger;
1774     if ( aSig1 < bSig1 ) goto bBigger;
1775     return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 );
1776  bExpBigger:
1777     if ( bExp == 0x7FF ) {
1778         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1779         return packFloat64( zSign ^ 1, 0x7FF, 0, 0 );
1780     }
1781     if ( aExp == 0 ) {
1782         ++expDiff;
1783     }
1784     else {
1785         aSig0 |= 0x40000000;
1786     }
1787     shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
1788     bSig0 |= 0x40000000;
1789  bBigger:
1790     sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
1791     zExp = bExp;
1792     zSign ^= 1;
1793     goto normalizeRoundAndPack;
1794  aExpBigger:
1795     if ( aExp == 0x7FF ) {
1796         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1797         return a;
1798     }
1799     if ( bExp == 0 ) {
1800         --expDiff;
1801     }
1802     else {
1803         bSig0 |= 0x40000000;
1804     }
1805     shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
1806     aSig0 |= 0x40000000;
1807  aBigger:
1808     sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1809     zExp = aExp;
1810  normalizeRoundAndPack:
1811     --zExp;
1812     return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 );
1813 
1814 }
1815 
1816 /*
1817 -------------------------------------------------------------------------------
1818 Returns the result of adding the double-precision floating-point values `a'
1819 and `b'.  The operation is performed according to the IEC/IEEE Standard for
1820 Binary Floating-Point Arithmetic.
1821 -------------------------------------------------------------------------------
1822 */
1823 float64 float64_add( float64 a, float64 b )
1824 {
1825     flag aSign, bSign;
1826 
1827     aSign = extractFloat64Sign( a );
1828     bSign = extractFloat64Sign( b );
1829     if ( aSign == bSign ) {
1830         return addFloat64Sigs( a, b, aSign );
1831     }
1832     else {
1833         return subFloat64Sigs( a, b, aSign );
1834     }
1835 
1836 }
1837 
1838 /*
1839 -------------------------------------------------------------------------------
1840 Returns the result of subtracting the double-precision floating-point values
1841 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1842 for Binary Floating-Point Arithmetic.
1843 -------------------------------------------------------------------------------
1844 */
1845 float64 float64_sub( float64 a, float64 b )
1846 {
1847     flag aSign, bSign;
1848 
1849     aSign = extractFloat64Sign( a );
1850     bSign = extractFloat64Sign( b );
1851     if ( aSign == bSign ) {
1852         return subFloat64Sigs( a, b, aSign );
1853     }
1854     else {
1855         return addFloat64Sigs( a, b, aSign );
1856     }
1857 
1858 }
1859 
1860 /*
1861 -------------------------------------------------------------------------------
1862 Returns the result of multiplying the double-precision floating-point values
1863 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1864 for Binary Floating-Point Arithmetic.
1865 -------------------------------------------------------------------------------
1866 */
1867 float64 float64_mul( float64 a, float64 b )
1868 {
1869     flag aSign, bSign, zSign;
1870     int16 aExp, bExp, zExp;
1871     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
1872 
1873     aSig1 = extractFloat64Frac1( a );
1874     aSig0 = extractFloat64Frac0( a );
1875     aExp = extractFloat64Exp( a );
1876     aSign = extractFloat64Sign( a );
1877     bSig1 = extractFloat64Frac1( b );
1878     bSig0 = extractFloat64Frac0( b );
1879     bExp = extractFloat64Exp( b );
1880     bSign = extractFloat64Sign( b );
1881     zSign = aSign ^ bSign;
1882     if ( aExp == 0x7FF ) {
1883         if (    ( aSig0 | aSig1 )
1884              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
1885             return propagateFloat64NaN( a, b );
1886         }
1887         if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
1888         return packFloat64( zSign, 0x7FF, 0, 0 );
1889     }
1890     if ( bExp == 0x7FF ) {
1891         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1892         if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1893  invalid:
1894             float_raise( float_flag_invalid );
1895             return float64_default_nan;
1896         }
1897         return packFloat64( zSign, 0x7FF, 0, 0 );
1898     }
1899     if ( aExp == 0 ) {
1900         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1901         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1902     }
1903     if ( bExp == 0 ) {
1904         if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1905         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1906     }
1907     zExp = aExp + bExp - 0x400;
1908     aSig0 |= 0x00100000;
1909     shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 );
1910     mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
1911     add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
1912     zSig2 |= ( zSig3 != 0 );
1913     if ( 0x00200000 <= zSig0 ) {
1914         shift64ExtraRightJamming(
1915             zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1916         ++zExp;
1917     }
1918     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1919 
1920 }
1921 
1922 /*
1923 -------------------------------------------------------------------------------
1924 Returns the result of dividing the double-precision floating-point value `a'
1925 by the corresponding value `b'.  The operation is performed according to the
1926 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1927 -------------------------------------------------------------------------------
1928 */
1929 float64 float64_div( float64 a, float64 b )
1930 {
1931     flag aSign, bSign, zSign;
1932     int16 aExp, bExp, zExp;
1933     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1934     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
1935 
1936     aSig1 = extractFloat64Frac1( a );
1937     aSig0 = extractFloat64Frac0( a );
1938     aExp = extractFloat64Exp( a );
1939     aSign = extractFloat64Sign( a );
1940     bSig1 = extractFloat64Frac1( b );
1941     bSig0 = extractFloat64Frac0( b );
1942     bExp = extractFloat64Exp( b );
1943     bSign = extractFloat64Sign( b );
1944     zSign = aSign ^ bSign;
1945     if ( aExp == 0x7FF ) {
1946         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1947         if ( bExp == 0x7FF ) {
1948             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1949             goto invalid;
1950         }
1951         return packFloat64( zSign, 0x7FF, 0, 0 );
1952     }
1953     if ( bExp == 0x7FF ) {
1954         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1955         return packFloat64( zSign, 0, 0, 0 );
1956     }
1957     if ( bExp == 0 ) {
1958         if ( ( bSig0 | bSig1 ) == 0 ) {
1959             if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1960  invalid:
1961                 float_raise( float_flag_invalid );
1962                 return float64_default_nan;
1963             }
1964             float_raise( float_flag_divbyzero );
1965             return packFloat64( zSign, 0x7FF, 0, 0 );
1966         }
1967         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1968     }
1969     if ( aExp == 0 ) {
1970         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1971         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1972     }
1973     zExp = aExp - bExp + 0x3FD;
1974     shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 );
1975     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
1976     if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) {
1977         shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
1978         ++zExp;
1979     }
1980     zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 );
1981     mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
1982     sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
1983     while ( (sbits32) rem0 < 0 ) {
1984         --zSig0;
1985         add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
1986     }
1987     zSig1 = estimateDiv64To32( rem1, rem2, bSig0 );
1988     if ( ( zSig1 & 0x3FF ) <= 4 ) {
1989         mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
1990         sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
1991         while ( (sbits32) rem1 < 0 ) {
1992             --zSig1;
1993             add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
1994         }
1995         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
1996     }
1997     shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 );
1998     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1999 
2000 }
2001 
2002 #ifndef SOFTFLOAT_FOR_GCC
2003 /*
2004 -------------------------------------------------------------------------------
2005 Returns the remainder of the double-precision floating-point value `a'
2006 with respect to the corresponding value `b'.  The operation is performed
2007 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2008 -------------------------------------------------------------------------------
2009 */
2010 float64 float64_rem( float64 a, float64 b )
2011 {
2012     flag aSign, bSign, zSign;
2013     int16 aExp, bExp, expDiff;
2014     bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
2015     bits32 allZero, alternateASig0, alternateASig1, sigMean1;
2016     sbits32 sigMean0;
2017     float64 z;
2018 
2019     aSig1 = extractFloat64Frac1( a );
2020     aSig0 = extractFloat64Frac0( a );
2021     aExp = extractFloat64Exp( a );
2022     aSign = extractFloat64Sign( a );
2023     bSig1 = extractFloat64Frac1( b );
2024     bSig0 = extractFloat64Frac0( b );
2025     bExp = extractFloat64Exp( b );
2026     bSign = extractFloat64Sign( b );
2027     if ( aExp == 0x7FF ) {
2028         if (    ( aSig0 | aSig1 )
2029              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
2030             return propagateFloat64NaN( a, b );
2031         }
2032         goto invalid;
2033     }
2034     if ( bExp == 0x7FF ) {
2035         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
2036         return a;
2037     }
2038     if ( bExp == 0 ) {
2039         if ( ( bSig0 | bSig1 ) == 0 ) {
2040  invalid:
2041             float_raise( float_flag_invalid );
2042             return float64_default_nan;
2043         }
2044         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
2045     }
2046     if ( aExp == 0 ) {
2047         if ( ( aSig0 | aSig1 ) == 0 ) return a;
2048         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2049     }
2050     expDiff = aExp - bExp;
2051     if ( expDiff < -1 ) return a;
2052     shortShift64Left(
2053         aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 );
2054     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
2055     q = le64( bSig0, bSig1, aSig0, aSig1 );
2056     if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2057     expDiff -= 32;
2058     while ( 0 < expDiff ) {
2059         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2060         q = ( 4 < q ) ? q - 4 : 0;
2061         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2062         shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero );
2063         shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero );
2064         sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 );
2065         expDiff -= 29;
2066     }
2067     if ( -32 < expDiff ) {
2068         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2069         q = ( 4 < q ) ? q - 4 : 0;
2070         q >>= - expDiff;
2071         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2072         expDiff += 24;
2073         if ( expDiff < 0 ) {
2074             shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
2075         }
2076         else {
2077             shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
2078         }
2079         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2080         sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
2081     }
2082     else {
2083         shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 );
2084         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2085     }
2086     do {
2087         alternateASig0 = aSig0;
2088         alternateASig1 = aSig1;
2089         ++q;
2090         sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2091     } while ( 0 <= (sbits32) aSig0 );
2092     add64(
2093         aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
2094     if (    ( sigMean0 < 0 )
2095          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
2096         aSig0 = alternateASig0;
2097         aSig1 = alternateASig1;
2098     }
2099     zSign = ( (sbits32) aSig0 < 0 );
2100     if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
2101     return
2102         normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
2103 
2104 }
2105 #endif
2106 
2107 #ifndef SOFTFLOAT_FOR_GCC
2108 /*
2109 -------------------------------------------------------------------------------
2110 Returns the square root of the double-precision floating-point value `a'.
2111 The operation is performed according to the IEC/IEEE Standard for Binary
2112 Floating-Point Arithmetic.
2113 -------------------------------------------------------------------------------
2114 */
2115 float64 float64_sqrt( float64 a )
2116 {
2117     flag aSign;
2118     int16 aExp, zExp;
2119     bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
2120     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
2121     float64 z;
2122 
2123     aSig1 = extractFloat64Frac1( a );
2124     aSig0 = extractFloat64Frac0( a );
2125     aExp = extractFloat64Exp( a );
2126     aSign = extractFloat64Sign( a );
2127     if ( aExp == 0x7FF ) {
2128         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a );
2129         if ( ! aSign ) return a;
2130         goto invalid;
2131     }
2132     if ( aSign ) {
2133         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
2134  invalid:
2135         float_raise( float_flag_invalid );
2136         return float64_default_nan;
2137     }
2138     if ( aExp == 0 ) {
2139         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 );
2140         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2141     }
2142     zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
2143     aSig0 |= 0x00100000;
2144     shortShift64Left( aSig0, aSig1, 11, &term0, &term1 );
2145     zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1;
2146     if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF;
2147     doubleZSig0 = zSig0 + zSig0;
2148     shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 );
2149     mul32To64( zSig0, zSig0, &term0, &term1 );
2150     sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 );
2151     while ( (sbits32) rem0 < 0 ) {
2152         --zSig0;
2153         doubleZSig0 -= 2;
2154         add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 );
2155     }
2156     zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 );
2157     if ( ( zSig1 & 0x1FF ) <= 5 ) {
2158         if ( zSig1 == 0 ) zSig1 = 1;
2159         mul32To64( doubleZSig0, zSig1, &term1, &term2 );
2160         sub64( rem1, 0, term1, term2, &rem1, &rem2 );
2161         mul32To64( zSig1, zSig1, &term2, &term3 );
2162         sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
2163         while ( (sbits32) rem1 < 0 ) {
2164             --zSig1;
2165             shortShift64Left( 0, zSig1, 1, &term2, &term3 );
2166             term3 |= 1;
2167             term2 |= doubleZSig0;
2168             add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
2169         }
2170         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
2171     }
2172     shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 );
2173     return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 );
2174 
2175 }
2176 #endif
2177 
2178 /*
2179 -------------------------------------------------------------------------------
2180 Returns 1 if the double-precision floating-point value `a' is equal to
2181 the corresponding value `b', and 0 otherwise.  The comparison is performed
2182 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2183 -------------------------------------------------------------------------------
2184 */
2185 flag float64_eq( float64 a, float64 b )
2186 {
2187 
2188     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2189               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2190          || (    ( extractFloat64Exp( b ) == 0x7FF )
2191               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2192        ) {
2193         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2194             float_raise( float_flag_invalid );
2195         }
2196         return 0;
2197     }
2198     return ( a == b ) ||
2199 	( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
2200 
2201 }
2202 
2203 /*
2204 -------------------------------------------------------------------------------
2205 Returns 1 if the double-precision floating-point value `a' is less than
2206 or equal to the corresponding value `b', and 0 otherwise.  The comparison
2207 is performed according to the IEC/IEEE Standard for Binary Floating-Point
2208 Arithmetic.
2209 -------------------------------------------------------------------------------
2210 */
2211 flag float64_le( float64 a, float64 b )
2212 {
2213     flag aSign, bSign;
2214 
2215     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2216               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2217          || (    ( extractFloat64Exp( b ) == 0x7FF )
2218               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2219        ) {
2220         float_raise( float_flag_invalid );
2221         return 0;
2222     }
2223     aSign = extractFloat64Sign( a );
2224     bSign = extractFloat64Sign( b );
2225     if ( aSign != bSign )
2226 	return aSign ||
2227 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
2228 	      0 );
2229     return ( a == b ) ||
2230 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2231 }
2232 
2233 /*
2234 -------------------------------------------------------------------------------
2235 Returns 1 if the double-precision floating-point value `a' is less than
2236 the corresponding value `b', and 0 otherwise.  The comparison is performed
2237 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2238 -------------------------------------------------------------------------------
2239 */
2240 flag float64_lt( float64 a, float64 b )
2241 {
2242     flag aSign, bSign;
2243 
2244     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2245               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2246          || (    ( extractFloat64Exp( b ) == 0x7FF )
2247               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2248        ) {
2249         float_raise( float_flag_invalid );
2250         return 0;
2251     }
2252     aSign = extractFloat64Sign( a );
2253     bSign = extractFloat64Sign( b );
2254     if ( aSign != bSign )
2255 	return aSign &&
2256 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
2257 	      0 );
2258     return ( a != b ) &&
2259 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2260 
2261 }
2262 
2263 #ifndef SOFTFLOAT_FOR_GCC
2264 /*
2265 -------------------------------------------------------------------------------
2266 Returns 1 if the double-precision floating-point value `a' is equal to
2267 the corresponding value `b', and 0 otherwise.  The invalid exception is
2268 raised if either operand is a NaN.  Otherwise, the comparison is performed
2269 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2270 -------------------------------------------------------------------------------
2271 */
2272 flag float64_eq_signaling( float64 a, float64 b )
2273 {
2274 
2275     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2276               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2277          || (    ( extractFloat64Exp( b ) == 0x7FF )
2278               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2279        ) {
2280         float_raise( float_flag_invalid );
2281         return 0;
2282     }
2283     return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2284 
2285 }
2286 
2287 /*
2288 -------------------------------------------------------------------------------
2289 Returns 1 if the double-precision floating-point value `a' is less than or
2290 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2291 cause an exception.  Otherwise, the comparison is performed according to the
2292 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2293 -------------------------------------------------------------------------------
2294 */
2295 flag float64_le_quiet( float64 a, float64 b )
2296 {
2297     flag aSign, bSign;
2298 
2299     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2300               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2301          || (    ( extractFloat64Exp( b ) == 0x7FF )
2302               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2303        ) {
2304         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2305             float_raise( float_flag_invalid );
2306         }
2307         return 0;
2308     }
2309     aSign = extractFloat64Sign( a );
2310     bSign = extractFloat64Sign( b );
2311     if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2312     return ( a == b ) || ( aSign ^ ( a < b ) );
2313 
2314 }
2315 
2316 /*
2317 -------------------------------------------------------------------------------
2318 Returns 1 if the double-precision floating-point value `a' is less than
2319 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2320 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2321 Standard for Binary Floating-Point Arithmetic.
2322 -------------------------------------------------------------------------------
2323 */
2324 flag float64_lt_quiet( float64 a, float64 b )
2325 {
2326     flag aSign, bSign;
2327 
2328     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2329               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2330          || (    ( extractFloat64Exp( b ) == 0x7FF )
2331               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2332        ) {
2333         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2334             float_raise( float_flag_invalid );
2335         }
2336         return 0;
2337     }
2338     aSign = extractFloat64Sign( a );
2339     bSign = extractFloat64Sign( b );
2340     if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
2341     return ( a != b ) && ( aSign ^ ( a < b ) );
2342 
2343 }
2344 
2345 #endif
2346