1 /* 2 * Vector math abstractions. 3 * 4 * Copyright (c) 2019-2023, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8 #ifndef _V_MATH_H 9 #define _V_MATH_H 10 11 #if !__aarch64__ 12 # error "Cannot build without AArch64" 13 #endif 14 15 #define VPCS_ATTR __attribute__ ((aarch64_vector_pcs)) 16 17 #define V_NAME_F1(fun) _ZGVnN4v_##fun##f 18 #define V_NAME_D1(fun) _ZGVnN2v_##fun 19 #define V_NAME_F2(fun) _ZGVnN4vv_##fun##f 20 #define V_NAME_D2(fun) _ZGVnN2vv_##fun 21 22 #include <stdint.h> 23 #include "../math_config.h" 24 #include <arm_neon.h> 25 26 /* Shorthand helpers for declaring constants. */ 27 # define V2(X) { X, X } 28 # define V4(X) { X, X, X, X } 29 # define V8(X) { X, X, X, X, X, X, X, X } 30 31 static inline int 32 v_any_u16h (uint16x4_t x) 33 { 34 return vget_lane_u64 (vreinterpret_u64_u16 (x), 0) != 0; 35 } 36 37 static inline int 38 v_lanes32 (void) 39 { 40 return 4; 41 } 42 43 static inline float32x4_t 44 v_f32 (float x) 45 { 46 return (float32x4_t) V4 (x); 47 } 48 static inline uint32x4_t 49 v_u32 (uint32_t x) 50 { 51 return (uint32x4_t) V4 (x); 52 } 53 /* true if any elements of a v_cond result is non-zero. */ 54 static inline int 55 v_any_u32 (uint32x4_t x) 56 { 57 /* assume elements in x are either 0 or -1u. */ 58 return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0; 59 } 60 static inline int 61 v_any_u32h (uint32x2_t x) 62 { 63 return vget_lane_u64 (vreinterpret_u64_u32 (x), 0) != 0; 64 } 65 static inline float32x4_t 66 v_lookup_f32 (const float *tab, uint32x4_t idx) 67 { 68 return (float32x4_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]}; 69 } 70 static inline uint32x4_t 71 v_lookup_u32 (const uint32_t *tab, uint32x4_t idx) 72 { 73 return (uint32x4_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]}; 74 } 75 static inline float32x4_t 76 v_call_f32 (float (*f) (float), float32x4_t x, float32x4_t y, uint32x4_t p) 77 { 78 return (float32x4_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1], 79 p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3]}; 80 } 81 static inline float32x4_t 82 v_call2_f32 (float (*f) (float, float), float32x4_t x1, float32x4_t x2, 83 float32x4_t y, uint32x4_t p) 84 { 85 return (float32x4_t){p[0] ? f (x1[0], x2[0]) : y[0], 86 p[1] ? f (x1[1], x2[1]) : y[1], 87 p[2] ? f (x1[2], x2[2]) : y[2], 88 p[3] ? f (x1[3], x2[3]) : y[3]}; 89 } 90 91 static inline int 92 v_lanes64 (void) 93 { 94 return 2; 95 } 96 static inline float64x2_t 97 v_f64 (double x) 98 { 99 return (float64x2_t) V2 (x); 100 } 101 static inline uint64x2_t 102 v_u64 (uint64_t x) 103 { 104 return (uint64x2_t) V2 (x); 105 } 106 /* true if any elements of a v_cond result is non-zero. */ 107 static inline int 108 v_any_u64 (uint64x2_t x) 109 { 110 /* assume elements in x are either 0 or -1u. */ 111 return vpaddd_u64 (x) != 0; 112 } 113 static inline float64x2_t 114 v_lookup_f64 (const double *tab, uint64x2_t idx) 115 { 116 return (float64x2_t){tab[idx[0]], tab[idx[1]]}; 117 } 118 static inline uint64x2_t 119 v_lookup_u64 (const uint64_t *tab, uint64x2_t idx) 120 { 121 return (uint64x2_t){tab[idx[0]], tab[idx[1]]}; 122 } 123 static inline float64x2_t 124 v_call_f64 (double (*f) (double), float64x2_t x, float64x2_t y, uint64x2_t p) 125 { 126 double p1 = p[1]; 127 double x1 = x[1]; 128 if (likely (p[0])) 129 y[0] = f (x[0]); 130 if (likely (p1)) 131 y[1] = f (x1); 132 return y; 133 } 134 135 #endif 136