xref: /freebsd/contrib/arm-optimized-routines/pl/math/sv_sincos_common.h (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 /*
2  * Core approximation for double-precision vector sincos
3  *
4  * Copyright (c) 2023, Arm Limited.
5  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6  */
7 
8 #include "sv_math.h"
9 #include "poly_sve_f64.h"
10 
11 static const struct sv_sincos_data
12 {
13   double sin_poly[7], cos_poly[6], pio2[3];
14   double inv_pio2, shift, range_val;
15 } sv_sincos_data = {
16   .inv_pio2 = 0x1.45f306dc9c882p-1,
17   .pio2 = { 0x1.921fb50000000p+0, 0x1.110b460000000p-26,
18 	    0x1.1a62633145c07p-54 },
19   .shift = 0x1.8p52,
20   .sin_poly = { /* Computed using Remez in [-pi/2, pi/2].  */
21 	        -0x1.555555555547bp-3, 0x1.1111111108a4dp-7,
22 		-0x1.a01a019936f27p-13, 0x1.71de37a97d93ep-19,
23 		-0x1.ae633919987c6p-26, 0x1.60e277ae07cecp-33,
24 		-0x1.9e9540300a1p-41 },
25   .cos_poly = { /* Computed using Remez in [-pi/4, pi/4].  */
26 	        0x1.555555555554cp-5, -0x1.6c16c16c1521fp-10,
27 		0x1.a01a019cbf62ap-16, -0x1.27e4f812b681ep-22,
28 		0x1.1ee9f152a57cdp-29, -0x1.8fb131098404bp-37 },
29   .range_val = 0x1p23, };
30 
31 static inline svbool_t
32 check_ge_rangeval (svbool_t pg, svfloat64_t x, const struct sv_sincos_data *d)
33 {
34   svbool_t in_bounds = svaclt (pg, x, d->range_val);
35   return svnot_z (pg, in_bounds);
36 }
37 
38 /* Double-precision vector function allowing calculation of both sin and cos in
39    one function call, using shared argument reduction and separate polynomials.
40    Largest observed error is for sin, 3.22 ULP:
41    v_sincos_sin (0x1.d70eef40f39b1p+12) got -0x1.ffe9537d5dbb7p-3
42 				       want -0x1.ffe9537d5dbb4p-3.  */
43 static inline svfloat64x2_t
44 sv_sincos_inline (svbool_t pg, svfloat64_t x, const struct sv_sincos_data *d)
45 {
46   /* q = nearest integer to 2 * x / pi.  */
47   svfloat64_t q = svsub_x (pg, svmla_x (pg, sv_f64 (d->shift), x, d->inv_pio2),
48 			   d->shift);
49   svint64_t n = svcvt_s64_x (pg, q);
50 
51   /* Reduce x such that r is in [ -pi/4, pi/4 ].  */
52   svfloat64_t r = x;
53   r = svmls_x (pg, r, q, d->pio2[0]);
54   r = svmls_x (pg, r, q, d->pio2[1]);
55   r = svmls_x (pg, r, q, d->pio2[2]);
56 
57   svfloat64_t r2 = svmul_x (pg, r, r), r3 = svmul_x (pg, r2, r),
58 	      r4 = svmul_x (pg, r2, r2);
59 
60   /* Approximate sin(r) ~= r + r^3 * poly_sin(r^2).  */
61   svfloat64_t s = sv_pw_horner_6_f64_x (pg, r2, r4, d->sin_poly);
62   s = svmla_x (pg, r, r3, s);
63 
64   /* Approximate cos(r) ~= 1 - (r^2)/2 + r^4 * poly_cos(r^2).  */
65   svfloat64_t c = sv_pw_horner_5_f64_x (pg, r2, r4, d->cos_poly);
66   c = svmad_x (pg, c, r2, -0.5);
67   c = svmad_x (pg, c, r2, 1);
68 
69   svuint64_t un = svreinterpret_u64 (n);
70   /* If odd quadrant, swap cos and sin.  */
71   svbool_t swap = svcmpeq (pg, svlsl_x (pg, un, 63), 0);
72   svfloat64_t ss = svsel (swap, s, c);
73   svfloat64_t cc = svsel (swap, c, s);
74 
75   /* Fix signs according to quadrant.
76      ss = asdouble(asuint64(ss) ^ ((n       & 2) << 62))
77      cc = asdouble(asuint64(cc) & (((n + 1) & 2) << 62)).  */
78   svuint64_t sin_sign = svlsl_x (pg, svand_x (pg, un, 2), 62);
79   svuint64_t cos_sign = svlsl_x (
80       pg, svand_x (pg, svreinterpret_u64 (svadd_x (pg, n, 1)), 2), 62);
81   ss = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ss), sin_sign));
82   cc = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (cc), cos_sign));
83 
84   return svcreate2 (ss, cc);
85 }
86