xref: /freebsd/contrib/arm-optimized-routines/math/aarch64/experimental/log1p_2u.c (revision f3087bef11543b42e0d69b708f367097a4118d24)
1*f3087befSAndrew Turner /*
2*f3087befSAndrew Turner  * Double-precision log(1+x) function.
3*f3087befSAndrew Turner  *
4*f3087befSAndrew Turner  * Copyright (c) 2022-2024, Arm Limited.
5*f3087befSAndrew Turner  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*f3087befSAndrew Turner  */
7*f3087befSAndrew Turner 
8*f3087befSAndrew Turner #include "poly_scalar_f64.h"
9*f3087befSAndrew Turner #include "math_config.h"
10*f3087befSAndrew Turner #include "test_sig.h"
11*f3087befSAndrew Turner #include "test_defs.h"
12*f3087befSAndrew Turner 
13*f3087befSAndrew Turner #define Ln2Hi 0x1.62e42fefa3800p-1
14*f3087befSAndrew Turner #define Ln2Lo 0x1.ef35793c76730p-45
15*f3087befSAndrew Turner #define HfRt2Top 0x3fe6a09e /* top32(asuint64(sqrt(2)/2)).  */
16*f3087befSAndrew Turner #define OneMHfRt2Top                                                          \
17*f3087befSAndrew Turner   0x00095f62 /* top32(asuint64(1)) - top32(asuint64(sqrt(2)/2)).  */
18*f3087befSAndrew Turner #define OneTop12 0x3ff
19*f3087befSAndrew Turner #define BottomMask 0xffffffff
20*f3087befSAndrew Turner #define OneMHfRt2 0x3fd2bec333018866
21*f3087befSAndrew Turner #define Rt2MOne 0x3fda827999fcef32
22*f3087befSAndrew Turner #define AbsMask 0x7fffffffffffffff
23*f3087befSAndrew Turner #define ExpM63 0x3c00
24*f3087befSAndrew Turner 
25*f3087befSAndrew Turner static inline double
eval_poly(double f)26*f3087befSAndrew Turner eval_poly (double f)
27*f3087befSAndrew Turner {
28*f3087befSAndrew Turner   double f2 = f * f;
29*f3087befSAndrew Turner   double f4 = f2 * f2;
30*f3087befSAndrew Turner   double f8 = f4 * f4;
31*f3087befSAndrew Turner   return estrin_18_f64 (f, f2, f4, f8, f8 * f8, __log1p_data.coeffs);
32*f3087befSAndrew Turner }
33*f3087befSAndrew Turner 
34*f3087befSAndrew Turner /* log1p approximation using polynomial on reduced interval. Largest
35*f3087befSAndrew Turner    observed errors are near the lower boundary of the region where k
36*f3087befSAndrew Turner    is 0.
37*f3087befSAndrew Turner    Maximum measured error: 1.75ULP.
38*f3087befSAndrew Turner    log1p(-0x1.2e1aea97b3e5cp-2) got -0x1.65fb8659a2f9p-2
39*f3087befSAndrew Turner 			       want -0x1.65fb8659a2f92p-2.  */
40*f3087befSAndrew Turner double
log1p(double x)41*f3087befSAndrew Turner log1p (double x)
42*f3087befSAndrew Turner {
43*f3087befSAndrew Turner   uint64_t ix = asuint64 (x);
44*f3087befSAndrew Turner   uint64_t ia = ix & AbsMask;
45*f3087befSAndrew Turner   uint32_t ia16 = ia >> 48;
46*f3087befSAndrew Turner 
47*f3087befSAndrew Turner   /* Handle special cases first.  */
48*f3087befSAndrew Turner   if (unlikely (ia16 >= 0x7ff0 || ix >= 0xbff0000000000000
49*f3087befSAndrew Turner 		|| ix == 0x8000000000000000))
50*f3087befSAndrew Turner     {
51*f3087befSAndrew Turner       if (ix == 0x8000000000000000 || ix == 0x7ff0000000000000)
52*f3087befSAndrew Turner 	{
53*f3087befSAndrew Turner 	  /* x ==  -0 => log1p(x) =  -0.
54*f3087befSAndrew Turner 	     x == Inf => log1p(x) = Inf.  */
55*f3087befSAndrew Turner 	  return x;
56*f3087befSAndrew Turner 	}
57*f3087befSAndrew Turner       if (ix == 0xbff0000000000000)
58*f3087befSAndrew Turner 	{
59*f3087befSAndrew Turner 	  /* x == -1 => log1p(x) = -Inf.  */
60*f3087befSAndrew Turner 	  return __math_divzero (-1);
61*f3087befSAndrew Turner 	  ;
62*f3087befSAndrew Turner 	}
63*f3087befSAndrew Turner       if (ia16 >= 0x7ff0)
64*f3087befSAndrew Turner 	{
65*f3087befSAndrew Turner 	  /* x == +/-NaN => log1p(x) = NaN.  */
66*f3087befSAndrew Turner 	  return __math_invalid (asdouble (ia));
67*f3087befSAndrew Turner 	}
68*f3087befSAndrew Turner       /* x  <      -1 => log1p(x) =  NaN.
69*f3087befSAndrew Turner 	 x ==    -Inf => log1p(x) =  NaN.  */
70*f3087befSAndrew Turner       return __math_invalid (x);
71*f3087befSAndrew Turner     }
72*f3087befSAndrew Turner 
73*f3087befSAndrew Turner   /* With x + 1 = t * 2^k (where t = f + 1 and k is chosen such that f
74*f3087befSAndrew Turner 			   is in [sqrt(2)/2, sqrt(2)]):
75*f3087befSAndrew Turner      log1p(x) = k*log(2) + log1p(f).
76*f3087befSAndrew Turner 
77*f3087befSAndrew Turner      f may not be representable exactly, so we need a correction term:
78*f3087befSAndrew Turner      let m = round(1 + x), c = (1 + x) - m.
79*f3087befSAndrew Turner      c << m: at very small x, log1p(x) ~ x, hence:
80*f3087befSAndrew Turner      log(1+x) - log(m) ~ c/m.
81*f3087befSAndrew Turner 
82*f3087befSAndrew Turner      We therefore calculate log1p(x) by k*log2 + log1p(f) + c/m.  */
83*f3087befSAndrew Turner 
84*f3087befSAndrew Turner   uint64_t sign = ix & ~AbsMask;
85*f3087befSAndrew Turner   if (ia <= OneMHfRt2 || (!sign && ia <= Rt2MOne))
86*f3087befSAndrew Turner     {
87*f3087befSAndrew Turner       if (unlikely (ia16 <= ExpM63))
88*f3087befSAndrew Turner 	{
89*f3087befSAndrew Turner 	  /* If exponent of x <= -63 then shortcut the polynomial and avoid
90*f3087befSAndrew Turner 	     underflow by just returning x, which is exactly rounded in this
91*f3087befSAndrew Turner 	     region.  */
92*f3087befSAndrew Turner 	  return x;
93*f3087befSAndrew Turner 	}
94*f3087befSAndrew Turner       /* If x is in [sqrt(2)/2 - 1, sqrt(2) - 1] then we can shortcut all the
95*f3087befSAndrew Turner 	 logic below, as k = 0 and f = x and therefore representable exactly.
96*f3087befSAndrew Turner 	 All we need is to return the polynomial.  */
97*f3087befSAndrew Turner       return fma (x, eval_poly (x) * x, x);
98*f3087befSAndrew Turner     }
99*f3087befSAndrew Turner 
100*f3087befSAndrew Turner   /* Obtain correctly scaled k by manipulation in the exponent.  */
101*f3087befSAndrew Turner   double m = x + 1;
102*f3087befSAndrew Turner   uint64_t mi = asuint64 (m);
103*f3087befSAndrew Turner   uint32_t u = (mi >> 32) + OneMHfRt2Top;
104*f3087befSAndrew Turner   int32_t k = (int32_t) (u >> 20) - OneTop12;
105*f3087befSAndrew Turner 
106*f3087befSAndrew Turner   /* Correction term c/m.  */
107*f3087befSAndrew Turner   double cm = (x - (m - 1)) / m;
108*f3087befSAndrew Turner 
109*f3087befSAndrew Turner   /* Reduce x to f in [sqrt(2)/2, sqrt(2)].  */
110*f3087befSAndrew Turner   uint32_t utop = (u & 0x000fffff) + HfRt2Top;
111*f3087befSAndrew Turner   uint64_t u_red = ((uint64_t) utop << 32) | (mi & BottomMask);
112*f3087befSAndrew Turner   double f = asdouble (u_red) - 1;
113*f3087befSAndrew Turner 
114*f3087befSAndrew Turner   /* Approximate log1p(x) on the reduced input using a polynomial. Because
115*f3087befSAndrew Turner      log1p(0)=0 we choose an approximation of the form:
116*f3087befSAndrew Turner 	x + C0*x^2 + C1*x^3 + C2x^4 + ...
117*f3087befSAndrew Turner      Hence approximation has the form f + f^2 * P(f)
118*f3087befSAndrew Turner 	where P(x) = C0 + C1*x + C2x^2 + ...  */
119*f3087befSAndrew Turner   double p = fma (f, eval_poly (f) * f, f);
120*f3087befSAndrew Turner 
121*f3087befSAndrew Turner   double kd = k;
122*f3087befSAndrew Turner   double y = fma (Ln2Lo, kd, cm);
123*f3087befSAndrew Turner   return y + fma (Ln2Hi, kd, p);
124*f3087befSAndrew Turner }
125*f3087befSAndrew Turner 
126*f3087befSAndrew Turner TEST_SIG (S, D, 1, log1p, -0.9, 10.0)
127*f3087befSAndrew Turner TEST_ULP (log1p, 1.26)
128*f3087befSAndrew Turner TEST_SYM_INTERVAL (log1p, 0.0, 0x1p-23, 50000)
129*f3087befSAndrew Turner TEST_SYM_INTERVAL (log1p, 0x1p-23, 0.001, 50000)
130*f3087befSAndrew Turner TEST_SYM_INTERVAL (log1p, 0.001, 1.0, 50000)
131*f3087befSAndrew Turner TEST_SYM_INTERVAL (log1p, 1.0, inf, 5000)
132