1 /* 2 * Vector math abstractions. 3 * 4 * Copyright (c) 2019-2023, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8 #ifndef _V_MATH_H 9 #define _V_MATH_H 10 11 #ifndef WANT_VMATH 12 /* Enable the build of vector math code. */ 13 # define WANT_VMATH 1 14 #endif 15 16 #if WANT_VMATH 17 18 # if __aarch64__ 19 # define VPCS_ATTR __attribute__ ((aarch64_vector_pcs)) 20 # else 21 # error "Cannot build without AArch64" 22 # endif 23 24 # include <stdint.h> 25 # include "math_config.h" 26 # if __aarch64__ 27 28 # include <arm_neon.h> 29 30 /* Shorthand helpers for declaring constants. */ 31 # define V2(X) { X, X } 32 # define V4(X) { X, X, X, X } 33 # define V8(X) { X, X, X, X, X, X, X, X } 34 35 static inline int 36 v_any_u16h (uint16x4_t x) 37 { 38 return vget_lane_u64 (vreinterpret_u64_u16 (x), 0) != 0; 39 } 40 41 static inline float32x4_t 42 v_f32 (float x) 43 { 44 return (float32x4_t) V4 (x); 45 } 46 static inline uint32x4_t 47 v_u32 (uint32_t x) 48 { 49 return (uint32x4_t) V4 (x); 50 } 51 static inline int32x4_t 52 v_s32 (int32_t x) 53 { 54 return (int32x4_t) V4 (x); 55 } 56 57 /* true if any elements of a vector compare result is non-zero. */ 58 static inline int 59 v_any_u32 (uint32x4_t x) 60 { 61 /* assume elements in x are either 0 or -1u. */ 62 return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0; 63 } 64 static inline int 65 v_any_u32h (uint32x2_t x) 66 { 67 return vget_lane_u64 (vreinterpret_u64_u32 (x), 0) != 0; 68 } 69 static inline float32x4_t 70 v_lookup_f32 (const float *tab, uint32x4_t idx) 71 { 72 return (float32x4_t){ tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]] }; 73 } 74 static inline uint32x4_t 75 v_lookup_u32 (const uint32_t *tab, uint32x4_t idx) 76 { 77 return (uint32x4_t){ tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]] }; 78 } 79 static inline float32x4_t 80 v_call_f32 (float (*f) (float), float32x4_t x, float32x4_t y, uint32x4_t p) 81 { 82 return (float32x4_t){ p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1], 83 p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3] }; 84 } 85 static inline float32x4_t 86 v_call2_f32 (float (*f) (float, float), float32x4_t x1, float32x4_t x2, 87 float32x4_t y, uint32x4_t p) 88 { 89 return (float32x4_t){ p[0] ? f (x1[0], x2[0]) : y[0], 90 p[1] ? f (x1[1], x2[1]) : y[1], 91 p[2] ? f (x1[2], x2[2]) : y[2], 92 p[3] ? f (x1[3], x2[3]) : y[3] }; 93 } 94 static inline float32x4_t 95 v_zerofy_f32 (float32x4_t x, uint32x4_t mask) 96 { 97 return vreinterpretq_f32_u32 (vbicq_u32 (vreinterpretq_u32_f32 (x), mask)); 98 } 99 100 static inline float64x2_t 101 v_f64 (double x) 102 { 103 return (float64x2_t) V2 (x); 104 } 105 static inline uint64x2_t 106 v_u64 (uint64_t x) 107 { 108 return (uint64x2_t) V2 (x); 109 } 110 static inline int64x2_t 111 v_s64 (int64_t x) 112 { 113 return (int64x2_t) V2 (x); 114 } 115 116 /* true if any elements of a vector compare result is non-zero. */ 117 static inline int 118 v_any_u64 (uint64x2_t x) 119 { 120 /* assume elements in x are either 0 or -1u. */ 121 return vpaddd_u64 (x) != 0; 122 } 123 /* true if all elements of a vector compare result is 1. */ 124 static inline int 125 v_all_u64 (uint64x2_t x) 126 { 127 /* assume elements in x are either 0 or -1u. */ 128 return vpaddd_s64 (vreinterpretq_s64_u64 (x)) == -2; 129 } 130 static inline float64x2_t 131 v_lookup_f64 (const double *tab, uint64x2_t idx) 132 { 133 return (float64x2_t){ tab[idx[0]], tab[idx[1]] }; 134 } 135 static inline uint64x2_t 136 v_lookup_u64 (const uint64_t *tab, uint64x2_t idx) 137 { 138 return (uint64x2_t){ tab[idx[0]], tab[idx[1]] }; 139 } 140 141 static inline float64x2_t 142 v_call_f64 (double (*f) (double), float64x2_t x, float64x2_t y, uint64x2_t p) 143 { 144 double p1 = p[1]; 145 double x1 = x[1]; 146 if (likely (p[0])) 147 y[0] = f (x[0]); 148 if (likely (p1)) 149 y[1] = f (x1); 150 return y; 151 } 152 153 static inline float64x2_t 154 v_call2_f64 (double (*f) (double, double), float64x2_t x1, float64x2_t x2, 155 float64x2_t y, uint64x2_t p) 156 { 157 double p1 = p[1]; 158 double x1h = x1[1]; 159 double x2h = x2[1]; 160 if (likely (p[0])) 161 y[0] = f (x1[0], x2[0]); 162 if (likely (p1)) 163 y[1] = f (x1h, x2h); 164 return y; 165 } 166 static inline float64x2_t 167 v_zerofy_f64 (float64x2_t x, uint64x2_t mask) 168 { 169 return vreinterpretq_f64_u64 (vbicq_u64 (vreinterpretq_u64_f64 (x), mask)); 170 } 171 172 # endif 173 #endif 174 175 #endif 176