xref: /freebsd/contrib/arm-optimized-routines/math/aarch64/experimental/atan_common.h (revision f3087bef11543b42e0d69b708f367097a4118d24)
1*f3087befSAndrew Turner /*
2*f3087befSAndrew Turner  * Double-precision polynomial evaluation function for scalar
3*f3087befSAndrew Turner  * atan(x) and atan2(y,x).
4*f3087befSAndrew Turner  *
5*f3087befSAndrew Turner  * Copyright (c) 2021-2024, Arm Limited.
6*f3087befSAndrew Turner  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
7*f3087befSAndrew Turner  */
8*f3087befSAndrew Turner 
9*f3087befSAndrew Turner #include "math_config.h"
10*f3087befSAndrew Turner #include "poly_scalar_f64.h"
11*f3087befSAndrew Turner 
12*f3087befSAndrew Turner /* Polynomial used in fast atan(x) and atan2(y,x) implementations
13*f3087befSAndrew Turner    The order 19 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2).  */
14*f3087befSAndrew Turner static inline double
eval_poly(double z,double az,double shift)15*f3087befSAndrew Turner eval_poly (double z, double az, double shift)
16*f3087befSAndrew Turner {
17*f3087befSAndrew Turner   /* Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of
18*f3087befSAndrew Turner      full scheme to avoid underflow in x^16.  */
19*f3087befSAndrew Turner   double z2 = z * z;
20*f3087befSAndrew Turner   double x2 = z2 * z2;
21*f3087befSAndrew Turner   double x4 = x2 * x2;
22*f3087befSAndrew Turner   double x8 = x4 * x4;
23*f3087befSAndrew Turner   double y = fma (estrin_11_f64 (z2, x2, x4, x8, __atan_poly_data.poly + 8),
24*f3087befSAndrew Turner 		  x8, estrin_7_f64 (z2, x2, x4, __atan_poly_data.poly));
25*f3087befSAndrew Turner 
26*f3087befSAndrew Turner   /* Finalize. y = shift + z + z^3 * P(z^2).  */
27*f3087befSAndrew Turner   y = fma (y, z2 * az, az);
28*f3087befSAndrew Turner   y = y + shift;
29*f3087befSAndrew Turner 
30*f3087befSAndrew Turner   return y;
31*f3087befSAndrew Turner }
32*f3087befSAndrew Turner 
33*f3087befSAndrew Turner #undef P
34