xref: /freebsd/contrib/arm-optimized-routines/math/aarch64/sinpi_3u5.c (revision f3087bef11543b42e0d69b708f367097a4118d24)
1*f3087befSAndrew Turner /*
2*f3087befSAndrew Turner  * Double-precision scalar sinpi function.
3*f3087befSAndrew Turner  *
4*f3087befSAndrew Turner  * Copyright (c) 2023-2024, Arm Limited.
5*f3087befSAndrew Turner  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*f3087befSAndrew Turner  */
7*f3087befSAndrew Turner 
8*f3087befSAndrew Turner #define _GNU_SOURCE
9*f3087befSAndrew Turner #include <math.h>
10*f3087befSAndrew Turner #include "mathlib.h"
11*f3087befSAndrew Turner #include "math_config.h"
12*f3087befSAndrew Turner #include "test_sig.h"
13*f3087befSAndrew Turner #include "test_defs.h"
14*f3087befSAndrew Turner #include "poly_scalar_f64.h"
15*f3087befSAndrew Turner 
16*f3087befSAndrew Turner /* Taylor series coefficents for sin(pi * x).
17*f3087befSAndrew Turner    C2 coefficient (orginally ~=5.16771278) has been split into two parts:
18*f3087befSAndrew Turner    C2_hi = 4, C2_lo = C2 - C2_hi (~=1.16771278)
19*f3087befSAndrew Turner    This change in magnitude reduces floating point rounding errors.
20*f3087befSAndrew Turner    C2_hi is then reintroduced after the polynomial approxmation.  */
21*f3087befSAndrew Turner static const double poly[]
22*f3087befSAndrew Turner     = { 0x1.921fb54442d184p1,  -0x1.2aef39896f94bp0,   0x1.466bc6775ab16p1,
23*f3087befSAndrew Turner 	-0x1.32d2cce62dc33p-1, 0x1.507834891188ep-4,   -0x1.e30750a28c88ep-8,
24*f3087befSAndrew Turner 	0x1.e8f48308acda4p-12, -0x1.6fc0032b3c29fp-16, 0x1.af86ae521260bp-21,
25*f3087befSAndrew Turner 	-0x1.012a9870eeb7dp-25 };
26*f3087befSAndrew Turner 
27*f3087befSAndrew Turner #define Shift 0x1.8p+52
28*f3087befSAndrew Turner /* TODO Store constant in structure for more efficient load.  */
29*f3087befSAndrew Turner #define Pi 0x1.921fb54442d18p+1
30*f3087befSAndrew Turner 
31*f3087befSAndrew Turner /* Approximation for scalar double-precision sinpi(x).
32*f3087befSAndrew Turner    Maximum error: 3.03 ULP:
33*f3087befSAndrew Turner    sinpi(0x1.a90da2818f8b5p+7) got 0x1.fe358f255a4b3p-1
34*f3087befSAndrew Turner 			      want 0x1.fe358f255a4b6p-1.  */
35*f3087befSAndrew Turner double
arm_math_sinpi(double x)36*f3087befSAndrew Turner arm_math_sinpi (double x)
37*f3087befSAndrew Turner {
38*f3087befSAndrew Turner   if (isinf (x) || isnan (x))
39*f3087befSAndrew Turner     return __math_invalid (x);
40*f3087befSAndrew Turner 
41*f3087befSAndrew Turner   double r = asdouble (asuint64 (x) & ~0x8000000000000000);
42*f3087befSAndrew Turner   uint64_t sign = asuint64 (x) & 0x8000000000000000;
43*f3087befSAndrew Turner 
44*f3087befSAndrew Turner   /* Edge cases for when sinpif should be exactly 0. (Integers)
45*f3087befSAndrew Turner      0x1p53 is the limit for single precision to store any decimal places.  */
46*f3087befSAndrew Turner   if (r >= 0x1p53)
47*f3087befSAndrew Turner     return asdouble (sign);
48*f3087befSAndrew Turner 
49*f3087befSAndrew Turner   /* If x is an integer, return 0.  */
50*f3087befSAndrew Turner   uint64_t m = (uint64_t) r;
51*f3087befSAndrew Turner   if (r == m)
52*f3087befSAndrew Turner     return asdouble (sign);
53*f3087befSAndrew Turner 
54*f3087befSAndrew Turner   /* For very small inputs, squaring r causes underflow.
55*f3087befSAndrew Turner      Values below this threshold can be approximated via sinpi(x) ≈ pi*x.  */
56*f3087befSAndrew Turner   if (r < 0x1p-63)
57*f3087befSAndrew Turner     return Pi * x;
58*f3087befSAndrew Turner 
59*f3087befSAndrew Turner   /* Any non-integer values >= 0x1x51 will be int + 0.5.
60*f3087befSAndrew Turner      These values should return exactly 1 or -1.  */
61*f3087befSAndrew Turner   if (r >= 0x1p51)
62*f3087befSAndrew Turner     {
63*f3087befSAndrew Turner       uint64_t iy = ((m & 1) << 63) ^ asuint64 (1.0);
64*f3087befSAndrew Turner       return asdouble (sign ^ iy);
65*f3087befSAndrew Turner     }
66*f3087befSAndrew Turner 
67*f3087befSAndrew Turner   /* n = rint(|x|).  */
68*f3087befSAndrew Turner   double n = r + Shift;
69*f3087befSAndrew Turner   sign ^= (asuint64 (n) << 63);
70*f3087befSAndrew Turner   n = n - Shift;
71*f3087befSAndrew Turner 
72*f3087befSAndrew Turner   /* r = |x| - n (range reduction into -1/2 .. 1/2).  */
73*f3087befSAndrew Turner   r = r - n;
74*f3087befSAndrew Turner 
75*f3087befSAndrew Turner   /* y = sin(r).  */
76*f3087befSAndrew Turner   double r2 = r * r;
77*f3087befSAndrew Turner   double y = horner_9_f64 (r2, poly);
78*f3087befSAndrew Turner   y = y * r;
79*f3087befSAndrew Turner 
80*f3087befSAndrew Turner   /* Reintroduce C2_hi.  */
81*f3087befSAndrew Turner   y = fma (-4 * r2, r, y);
82*f3087befSAndrew Turner 
83*f3087befSAndrew Turner   /* Copy sign of x to sin(|x|).  */
84*f3087befSAndrew Turner   return asdouble (asuint64 (y) ^ sign);
85*f3087befSAndrew Turner }
86*f3087befSAndrew Turner 
87*f3087befSAndrew Turner #if WANT_EXPERIMENTAL_MATH
88*f3087befSAndrew Turner double
sinpi(double x)89*f3087befSAndrew Turner sinpi (double x)
90*f3087befSAndrew Turner {
91*f3087befSAndrew Turner   return arm_math_sinpi (x);
92*f3087befSAndrew Turner }
93*f3087befSAndrew Turner #endif
94*f3087befSAndrew Turner 
95*f3087befSAndrew Turner #if WANT_TRIGPI_TESTS
96*f3087befSAndrew Turner TEST_ULP (arm_math_sinpi, 2.53)
97*f3087befSAndrew Turner TEST_SYM_INTERVAL (arm_math_sinpi, 0, 0x1p-63, 5000)
98*f3087befSAndrew Turner TEST_SYM_INTERVAL (arm_math_sinpi, 0x1p-63, 0.5, 10000)
99*f3087befSAndrew Turner TEST_SYM_INTERVAL (arm_math_sinpi, 0.5, 0x1p51, 10000)
100*f3087befSAndrew Turner TEST_SYM_INTERVAL (arm_math_sinpi, 0x1p51, inf, 10000)
101*f3087befSAndrew Turner #endif
102