xref: /freebsd/contrib/llvm-project/compiler-rt/lib/builtins/divtf3.c (revision e2eeea75eb8b6dd50c1298067a0655880d186734)
1 //===-- lib/divtf3.c - Quad-precision division --------------------*- C -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements quad-precision soft-float division
10 // with the IEEE-754 default rounding (to nearest, ties to even).
11 //
12 // For simplicity, this implementation currently flushes denormals to zero.
13 // It should be a fairly straightforward exercise to implement gradual
14 // underflow with correct rounding.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #define QUAD_PRECISION
19 #include "fp_lib.h"
20 
21 #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
22 COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) {
23 
24   const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
25   const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
26   const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit;
27 
28   rep_t aSignificand = toRep(a) & significandMask;
29   rep_t bSignificand = toRep(b) & significandMask;
30   int scale = 0;
31 
32   // Detect if a or b is zero, denormal, infinity, or NaN.
33   if (aExponent - 1U >= maxExponent - 1U ||
34       bExponent - 1U >= maxExponent - 1U) {
35 
36     const rep_t aAbs = toRep(a) & absMask;
37     const rep_t bAbs = toRep(b) & absMask;
38 
39     // NaN / anything = qNaN
40     if (aAbs > infRep)
41       return fromRep(toRep(a) | quietBit);
42     // anything / NaN = qNaN
43     if (bAbs > infRep)
44       return fromRep(toRep(b) | quietBit);
45 
46     if (aAbs == infRep) {
47       // infinity / infinity = NaN
48       if (bAbs == infRep)
49         return fromRep(qnanRep);
50       // infinity / anything else = +/- infinity
51       else
52         return fromRep(aAbs | quotientSign);
53     }
54 
55     // anything else / infinity = +/- 0
56     if (bAbs == infRep)
57       return fromRep(quotientSign);
58 
59     if (!aAbs) {
60       // zero / zero = NaN
61       if (!bAbs)
62         return fromRep(qnanRep);
63       // zero / anything else = +/- zero
64       else
65         return fromRep(quotientSign);
66     }
67     // anything else / zero = +/- infinity
68     if (!bAbs)
69       return fromRep(infRep | quotientSign);
70 
71     // One or both of a or b is denormal.  The other (if applicable) is a
72     // normal number.  Renormalize one or both of a and b, and set scale to
73     // include the necessary exponent adjustment.
74     if (aAbs < implicitBit)
75       scale += normalize(&aSignificand);
76     if (bAbs < implicitBit)
77       scale -= normalize(&bSignificand);
78   }
79 
80   // Set the implicit significand bit.  If we fell through from the
81   // denormal path it was already set by normalize( ), but setting it twice
82   // won't hurt anything.
83   aSignificand |= implicitBit;
84   bSignificand |= implicitBit;
85   int quotientExponent = aExponent - bExponent + scale;
86 
87   // Align the significand of b as a Q63 fixed-point number in the range
88   // [1, 2.0) and get a Q64 approximate reciprocal using a small minimax
89   // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2.  This
90   // is accurate to about 3.5 binary digits.
91   const uint64_t q63b = bSignificand >> 49;
92   uint64_t recip64 = UINT64_C(0x7504f333F9DE6484) - q63b;
93   // 0x7504f333F9DE6484 / 2^64 + 1 = 3/4 + 1/sqrt(2)
94 
95   // Now refine the reciprocal estimate using a Newton-Raphson iteration:
96   //
97   //     x1 = x0 * (2 - x0 * b)
98   //
99   // This doubles the number of correct binary digits in the approximation
100   // with each iteration.
101   uint64_t correction64;
102   correction64 = -((rep_t)recip64 * q63b >> 64);
103   recip64 = (rep_t)recip64 * correction64 >> 63;
104   correction64 = -((rep_t)recip64 * q63b >> 64);
105   recip64 = (rep_t)recip64 * correction64 >> 63;
106   correction64 = -((rep_t)recip64 * q63b >> 64);
107   recip64 = (rep_t)recip64 * correction64 >> 63;
108   correction64 = -((rep_t)recip64 * q63b >> 64);
109   recip64 = (rep_t)recip64 * correction64 >> 63;
110   correction64 = -((rep_t)recip64 * q63b >> 64);
111   recip64 = (rep_t)recip64 * correction64 >> 63;
112 
113   // The reciprocal may have overflowed to zero if the upper half of b is
114   // exactly 1.0.  This would sabatoge the full-width final stage of the
115   // computation that follows, so we adjust the reciprocal down by one bit.
116   recip64--;
117 
118   // We need to perform one more iteration to get us to 112 binary digits;
119   // The last iteration needs to happen with extra precision.
120   const uint64_t q127blo = bSignificand << 15;
121   rep_t correction, reciprocal;
122 
123   // NOTE: This operation is equivalent to __multi3, which is not implemented
124   //       in some architechure
125   rep_t r64q63, r64q127, r64cH, r64cL, dummy;
126   wideMultiply((rep_t)recip64, (rep_t)q63b, &dummy, &r64q63);
127   wideMultiply((rep_t)recip64, (rep_t)q127blo, &dummy, &r64q127);
128 
129   correction = -(r64q63 + (r64q127 >> 64));
130 
131   uint64_t cHi = correction >> 64;
132   uint64_t cLo = correction;
133 
134   wideMultiply((rep_t)recip64, (rep_t)cHi, &dummy, &r64cH);
135   wideMultiply((rep_t)recip64, (rep_t)cLo, &dummy, &r64cL);
136 
137   reciprocal = r64cH + (r64cL >> 64);
138 
139   // Adjust the final 128-bit reciprocal estimate downward to ensure that it
140   // is strictly smaller than the infinitely precise exact reciprocal. Because
141   // the computation of the Newton-Raphson step is truncating at every step,
142   // this adjustment is small; most of the work is already done.
143   reciprocal -= 2;
144 
145   // The numerical reciprocal is accurate to within 2^-112, lies in the
146   // interval [0.5, 1.0), and is strictly smaller than the true reciprocal
147   // of b.  Multiplying a by this reciprocal thus gives a numerical q = a/b
148   // in Q127 with the following properties:
149   //
150   //    1. q < a/b
151   //    2. q is in the interval [0.5, 2.0)
152   //    3. The error in q is bounded away from 2^-113 (actually, we have a
153   //       couple of bits to spare, but this is all we need).
154 
155   // We need a 128 x 128 multiply high to compute q, which isn't a basic
156   // operation in C, so we need to be a little bit fussy.
157   rep_t quotient, quotientLo;
158   wideMultiply(aSignificand << 2, reciprocal, &quotient, &quotientLo);
159 
160   // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
161   // In either case, we are going to compute a residual of the form
162   //
163   //     r = a - q*b
164   //
165   // We know from the construction of q that r satisfies:
166   //
167   //     0 <= r < ulp(q)*b
168   //
169   // If r is greater than 1/2 ulp(q)*b, then q rounds up.  Otherwise, we
170   // already have the correct result.  The exact halfway case cannot occur.
171   // We also take this time to right shift quotient if it falls in the [1,2)
172   // range and adjust the exponent accordingly.
173   rep_t residual;
174   rep_t qb;
175 
176   if (quotient < (implicitBit << 1)) {
177     wideMultiply(quotient, bSignificand, &dummy, &qb);
178     residual = (aSignificand << 113) - qb;
179     quotientExponent--;
180   } else {
181     quotient >>= 1;
182     wideMultiply(quotient, bSignificand, &dummy, &qb);
183     residual = (aSignificand << 112) - qb;
184   }
185 
186   const int writtenExponent = quotientExponent + exponentBias;
187 
188   if (writtenExponent >= maxExponent) {
189     // If we have overflowed the exponent, return infinity.
190     return fromRep(infRep | quotientSign);
191   } else if (writtenExponent < 1) {
192     if (writtenExponent == 0) {
193       // Check whether the rounded result is normal.
194       const bool round = (residual << 1) > bSignificand;
195       // Clear the implicit bit.
196       rep_t absResult = quotient & significandMask;
197       // Round.
198       absResult += round;
199       if (absResult & ~significandMask) {
200         // The rounded result is normal; return it.
201         return fromRep(absResult | quotientSign);
202       }
203     }
204     // Flush denormals to zero.  In the future, it would be nice to add
205     // code to round them correctly.
206     return fromRep(quotientSign);
207   } else {
208     const bool round = (residual << 1) >= bSignificand;
209     // Clear the implicit bit.
210     rep_t absResult = quotient & significandMask;
211     // Insert the exponent.
212     absResult |= (rep_t)writtenExponent << significandBits;
213     // Round.
214     absResult += round;
215     // Insert the sign and return.
216     const fp_t result = fromRep(absResult | quotientSign);
217     return result;
218   }
219 }
220 
221 #endif
222