1 /* 2 * Core approximation for single-precision vector sincos 3 * 4 * Copyright (c) 2023, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8 #include "sv_math.h" 9 10 const static struct sv_sincosf_data 11 { 12 float poly_sin[3], poly_cos[3], pio2[3], inv_pio2, shift, range_val; 13 } sv_sincosf_data = { 14 .poly_sin = { /* Generated using Remez, odd coeffs only, in [-pi/4, pi/4]. */ 15 -0x1.555546p-3, 0x1.11076p-7, -0x1.994eb4p-13 }, 16 .poly_cos = { /* Generated using Remez, even coeffs only, in [-pi/4, pi/4]. */ 17 0x1.55554ap-5, -0x1.6c0c1ap-10, 0x1.99e0eep-16 }, 18 .pio2 = { 0x1.921fb6p+0f, -0x1.777a5cp-25f, -0x1.ee59dap-50f }, 19 .inv_pio2 = 0x1.45f306p-1f, 20 .shift = 0x1.8p23, 21 .range_val = 0x1p20 22 }; 23 24 static inline svbool_t 25 check_ge_rangeval (svbool_t pg, svfloat32_t x, const struct sv_sincosf_data *d) 26 { 27 svbool_t in_bounds = svaclt (pg, x, d->range_val); 28 return svnot_z (pg, in_bounds); 29 } 30 31 /* Single-precision vector function allowing calculation of both sin and cos in 32 one function call, using shared argument reduction and separate low-order 33 polynomials. 34 Worst-case error for sin is 1.67 ULP: 35 sv_sincosf_sin(0x1.c704c4p+19) got 0x1.fff698p-5 want 0x1.fff69cp-5 36 Worst-case error for cos is 1.81 ULP: 37 sv_sincosf_cos(0x1.e506fp+19) got -0x1.ffec6ep-6 want -0x1.ffec72p-6. */ 38 static inline svfloat32x2_t 39 sv_sincosf_inline (svbool_t pg, svfloat32_t x, const struct sv_sincosf_data *d) 40 { 41 /* n = rint ( x / (pi/2) ). */ 42 svfloat32_t q = svmla_x (pg, sv_f32 (d->shift), x, d->inv_pio2); 43 q = svsub_x (pg, q, d->shift); 44 svint32_t n = svcvt_s32_x (pg, q); 45 46 /* Reduce x such that r is in [ -pi/4, pi/4 ]. */ 47 svfloat32_t r = x; 48 r = svmls_x (pg, r, q, d->pio2[0]); 49 r = svmls_x (pg, r, q, d->pio2[1]); 50 r = svmls_x (pg, r, q, d->pio2[2]); 51 52 /* Approximate sin(r) ~= r + r^3 * poly_sin(r^2). */ 53 svfloat32_t r2 = svmul_x (pg, r, r), r3 = svmul_x (pg, r, r2); 54 svfloat32_t s = svmla_x (pg, sv_f32 (d->poly_sin[1]), r2, d->poly_sin[2]); 55 s = svmad_x (pg, r2, s, d->poly_sin[0]); 56 s = svmla_x (pg, r, r3, s); 57 58 /* Approximate cos(r) ~= 1 - (r^2)/2 + r^4 * poly_cos(r^2). */ 59 svfloat32_t r4 = svmul_x (pg, r2, r2); 60 svfloat32_t p = svmla_x (pg, sv_f32 (d->poly_cos[1]), r2, d->poly_cos[2]); 61 svfloat32_t c = svmad_x (pg, sv_f32 (d->poly_cos[0]), r2, -0.5); 62 c = svmla_x (pg, c, r4, p); 63 c = svmad_x (pg, r2, c, 1); 64 65 svuint32_t un = svreinterpret_u32 (n); 66 /* If odd quadrant, swap cos and sin. */ 67 svbool_t swap = svcmpeq (pg, svlsl_x (pg, un, 31), 0); 68 svfloat32_t ss = svsel (swap, s, c); 69 svfloat32_t cc = svsel (swap, c, s); 70 71 /* Fix signs according to quadrant. 72 ss = asfloat(asuint(ss) ^ ((n & 2) << 30)) 73 cc = asfloat(asuint(cc) & (((n + 1) & 2) << 30)). */ 74 svuint32_t sin_sign = svlsl_x (pg, svand_x (pg, un, 2), 30); 75 svuint32_t cos_sign = svlsl_x ( 76 pg, svand_x (pg, svreinterpret_u32 (svadd_x (pg, n, 1)), 2), 30); 77 ss = svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ss), sin_sign)); 78 cc = svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (cc), cos_sign)); 79 80 return svcreate2 (ss, cc); 81 } 82