xref: /freebsd/contrib/arm-optimized-routines/pl/math/sv_sincos_common.h (revision 5ca8c28cd8c725b81781201cfdb5f9969396f934)
1 /*
2  * Core approximation for double-precision vector sincos
3  *
4  * Copyright (c) 2023, Arm Limited.
5  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6  */
7 
8 #include "sv_math.h"
9 #include "poly_sve_f64.h"
10 
11 static const struct sv_sincos_data
12 {
13   double sin_poly[7], cos_poly[6], pio2[3];
14   double inv_pio2, shift, range_val;
15 } sv_sincos_data = {
16   .inv_pio2 = 0x1.45f306dc9c882p-1,
17   .pio2 = { 0x1.921fb50000000p+0, 0x1.110b460000000p-26,
18 	    0x1.1a62633145c07p-54 },
19   .shift = 0x1.8p52,
20   .sin_poly = { /* Computed using Remez in [-pi/2, pi/2].  */
21 	        -0x1.555555555547bp-3, 0x1.1111111108a4dp-7,
22 		-0x1.a01a019936f27p-13, 0x1.71de37a97d93ep-19,
23 		-0x1.ae633919987c6p-26, 0x1.60e277ae07cecp-33,
24 		-0x1.9e9540300a1p-41 },
25   .cos_poly = { /* Computed using Remez in [-pi/4, pi/4].  */
26 	        0x1.555555555554cp-5, -0x1.6c16c16c1521fp-10,
27 		0x1.a01a019cbf62ap-16, -0x1.27e4f812b681ep-22,
28 		0x1.1ee9f152a57cdp-29, -0x1.8fb131098404bp-37 },
29   .range_val = 0x1p23, };
30 
31 static inline svbool_t
32 check_ge_rangeval (svbool_t pg, svfloat64_t x, const struct sv_sincos_data *d)
33 {
34   svbool_t in_bounds = svaclt (pg, x, d->range_val);
35   return svnot_z (pg, in_bounds);
36 }
37 
38 /* Double-precision vector function allowing calculation of both sin and cos in
39    one function call, using shared argument reduction and separate polynomials.
40    Largest observed error is for sin, 3.22 ULP:
41    v_sincos_sin (0x1.d70eef40f39b1p+12) got -0x1.ffe9537d5dbb7p-3
42 				       want -0x1.ffe9537d5dbb4p-3.  */
43 static inline svfloat64x2_t
44 sv_sincos_inline (svbool_t pg, svfloat64_t x, const struct sv_sincos_data *d)
45 {
46   /* q = nearest integer to 2 * x / pi.  */
47   svfloat64_t q = svsub_x (pg, svmla_x (pg, sv_f64 (d->shift), x, d->inv_pio2),
48 			   d->shift);
49   svint64_t n = svcvt_s64_x (pg, q);
50 
51   /* Reduce x such that r is in [ -pi/4, pi/4 ].  */
52   svfloat64_t r = x;
53   r = svmls_x (pg, r, q, d->pio2[0]);
54   r = svmls_x (pg, r, q, d->pio2[1]);
55   r = svmls_x (pg, r, q, d->pio2[2]);
56 
57   svfloat64_t r2 = svmul_x (pg, r, r), r3 = svmul_x (pg, r2, r),
58 	      r4 = svmul_x (pg, r2, r2);
59 
60   /* Approximate sin(r) ~= r + r^3 * poly_sin(r^2).  */
61   svfloat64_t s = sv_pw_horner_6_f64_x (pg, r2, r4, d->sin_poly);
62   s = svmla_x (pg, r, r3, s);
63 
64   /* Approximate cos(r) ~= 1 - (r^2)/2 + r^4 * poly_cos(r^2).  */
65   svfloat64_t c = sv_pw_horner_5_f64_x (pg, r2, r4, d->cos_poly);
66   c = svmad_x (pg, c, r2, -0.5);
67   c = svmad_x (pg, c, r2, 1);
68 
69   svuint64_t un = svreinterpret_u64 (n);
70   /* If odd quadrant, swap cos and sin.  */
71   svbool_t swap = svcmpeq (pg, svlsl_x (pg, un, 63), 0);
72   svfloat64_t ss = svsel (swap, s, c);
73   svfloat64_t cc = svsel (swap, c, s);
74 
75   /* Fix signs according to quadrant.
76      ss = asdouble(asuint64(ss) ^ ((n       & 2) << 62))
77      cc = asdouble(asuint64(cc) & (((n + 1) & 2) << 62)).  */
78   svuint64_t sin_sign = svlsl_x (pg, svand_x (pg, un, 2), 62);
79   svuint64_t cos_sign = svlsl_x (
80       pg, svand_x (pg, svreinterpret_u64 (svadd_x (pg, n, 1)), 2), 62);
81   ss = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ss), sin_sign));
82   cc = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (cc), cos_sign));
83 
84   return svcreate2 (ss, cc);
85 }
86