1 /* 2 * Generic helpers for evaluating polynomials with various schemes. 3 * 4 * Copyright (c) 2023, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8 #ifndef VTYPE 9 # error Cannot use poly_generic without defining VTYPE 10 #endif 11 #ifndef VWRAP 12 # error Cannot use poly_generic without defining VWRAP 13 #endif 14 #ifndef FMA 15 # error Cannot use poly_generic without defining FMA 16 #endif 17 18 static inline VTYPE VWRAP (pairwise_poly_3) (VTYPE x, VTYPE x2, 19 const VTYPE *poly) 20 { 21 /* At order 3, Estrin and Pairwise Horner are identical. */ 22 VTYPE p01 = FMA (poly[1], x, poly[0]); 23 VTYPE p23 = FMA (poly[3], x, poly[2]); 24 return FMA (p23, x2, p01); 25 } 26 27 static inline VTYPE VWRAP (estrin_4) (VTYPE x, VTYPE x2, VTYPE x4, 28 const VTYPE *poly) 29 { 30 VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly); 31 return FMA (poly[4], x4, p03); 32 } 33 static inline VTYPE VWRAP (estrin_5) (VTYPE x, VTYPE x2, VTYPE x4, 34 const VTYPE *poly) 35 { 36 VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly); 37 VTYPE p45 = FMA (poly[5], x, poly[4]); 38 return FMA (p45, x4, p03); 39 } 40 static inline VTYPE VWRAP (estrin_6) (VTYPE x, VTYPE x2, VTYPE x4, 41 const VTYPE *poly) 42 { 43 VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly); 44 VTYPE p45 = FMA (poly[5], x, poly[4]); 45 VTYPE p46 = FMA (poly[6], x2, p45); 46 return FMA (p46, x4, p03); 47 } 48 static inline VTYPE VWRAP (estrin_7) (VTYPE x, VTYPE x2, VTYPE x4, 49 const VTYPE *poly) 50 { 51 VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly); 52 VTYPE p47 = VWRAP (pairwise_poly_3) (x, x2, poly + 4); 53 return FMA (p47, x4, p03); 54 } 55 static inline VTYPE VWRAP (estrin_8) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 56 const VTYPE *poly) 57 { 58 return FMA (poly[8], x8, VWRAP (estrin_7) (x, x2, x4, poly)); 59 } 60 static inline VTYPE VWRAP (estrin_9) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 61 const VTYPE *poly) 62 { 63 VTYPE p89 = FMA (poly[9], x, poly[8]); 64 return FMA (p89, x8, VWRAP (estrin_7) (x, x2, x4, poly)); 65 } 66 static inline VTYPE VWRAP (estrin_10) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 67 const VTYPE *poly) 68 { 69 VTYPE p89 = FMA (poly[9], x, poly[8]); 70 VTYPE p8_10 = FMA (poly[10], x2, p89); 71 return FMA (p8_10, x8, VWRAP (estrin_7) (x, x2, x4, poly)); 72 } 73 static inline VTYPE VWRAP (estrin_11) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 74 const VTYPE *poly) 75 { 76 VTYPE p8_11 = VWRAP (pairwise_poly_3) (x, x2, poly + 8); 77 return FMA (p8_11, x8, VWRAP (estrin_7) (x, x2, x4, poly)); 78 } 79 static inline VTYPE VWRAP (estrin_12) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 80 const VTYPE *poly) 81 { 82 return FMA (VWRAP (estrin_4) (x, x2, x4, poly + 8), x8, 83 VWRAP (estrin_7) (x, x2, x4, poly)); 84 } 85 static inline VTYPE VWRAP (estrin_13) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 86 const VTYPE *poly) 87 { 88 return FMA (VWRAP (estrin_5) (x, x2, x4, poly + 8), x8, 89 VWRAP (estrin_7) (x, x2, x4, poly)); 90 } 91 static inline VTYPE VWRAP (estrin_14) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 92 const VTYPE *poly) 93 { 94 return FMA (VWRAP (estrin_6) (x, x2, x4, poly + 8), x8, 95 VWRAP (estrin_7) (x, x2, x4, poly)); 96 } 97 static inline VTYPE VWRAP (estrin_15) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 98 const VTYPE *poly) 99 { 100 return FMA (VWRAP (estrin_7) (x, x2, x4, poly + 8), x8, 101 VWRAP (estrin_7) (x, x2, x4, poly)); 102 } 103 static inline VTYPE VWRAP (estrin_16) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 104 VTYPE x16, const VTYPE *poly) 105 { 106 return FMA (poly[16], x16, VWRAP (estrin_15) (x, x2, x4, x8, poly)); 107 } 108 static inline VTYPE VWRAP (estrin_17) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 109 VTYPE x16, const VTYPE *poly) 110 { 111 VTYPE p16_17 = FMA (poly[17], x, poly[16]); 112 return FMA (p16_17, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly)); 113 } 114 static inline VTYPE VWRAP (estrin_18) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 115 VTYPE x16, const VTYPE *poly) 116 { 117 VTYPE p16_17 = FMA (poly[17], x, poly[16]); 118 VTYPE p16_18 = FMA (poly[18], x2, p16_17); 119 return FMA (p16_18, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly)); 120 } 121 static inline VTYPE VWRAP (estrin_19) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 122 VTYPE x16, const VTYPE *poly) 123 { 124 VTYPE p16_19 = VWRAP (pairwise_poly_3) (x, x2, poly + 16); 125 return FMA (p16_19, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly)); 126 } 127 128 static inline VTYPE VWRAP (horner_2) (VTYPE x, const VTYPE *poly) 129 { 130 VTYPE p = FMA (poly[2], x, poly[1]); 131 return FMA (x, p, poly[0]); 132 } 133 static inline VTYPE VWRAP (horner_3) (VTYPE x, const VTYPE *poly) 134 { 135 VTYPE p = FMA (poly[3], x, poly[2]); 136 p = FMA (x, p, poly[1]); 137 p = FMA (x, p, poly[0]); 138 return p; 139 } 140 static inline VTYPE VWRAP (horner_4) (VTYPE x, const VTYPE *poly) 141 { 142 VTYPE p = FMA (poly[4], x, poly[3]); 143 p = FMA (x, p, poly[2]); 144 p = FMA (x, p, poly[1]); 145 p = FMA (x, p, poly[0]); 146 return p; 147 } 148 static inline VTYPE VWRAP (horner_5) (VTYPE x, const VTYPE *poly) 149 { 150 return FMA (x, VWRAP (horner_4) (x, poly + 1), poly[0]); 151 } 152 static inline VTYPE VWRAP (horner_6) (VTYPE x, const VTYPE *poly) 153 { 154 return FMA (x, VWRAP (horner_5) (x, poly + 1), poly[0]); 155 } 156 static inline VTYPE VWRAP (horner_7) (VTYPE x, const VTYPE *poly) 157 { 158 return FMA (x, VWRAP (horner_6) (x, poly + 1), poly[0]); 159 } 160 static inline VTYPE VWRAP (horner_8) (VTYPE x, const VTYPE *poly) 161 { 162 return FMA (x, VWRAP (horner_7) (x, poly + 1), poly[0]); 163 } 164 static inline VTYPE VWRAP (horner_9) (VTYPE x, const VTYPE *poly) 165 { 166 return FMA (x, VWRAP (horner_8) (x, poly + 1), poly[0]); 167 } 168 static inline VTYPE VWRAP (horner_10) (VTYPE x, const VTYPE *poly) 169 { 170 return FMA (x, VWRAP (horner_9) (x, poly + 1), poly[0]); 171 } 172 static inline VTYPE VWRAP (horner_11) (VTYPE x, const VTYPE *poly) 173 { 174 return FMA (x, VWRAP (horner_10) (x, poly + 1), poly[0]); 175 } 176 static inline VTYPE VWRAP (horner_12) (VTYPE x, const VTYPE *poly) 177 { 178 return FMA (x, VWRAP (horner_11) (x, poly + 1), poly[0]); 179 } 180 181 static inline VTYPE VWRAP (pw_horner_4) (VTYPE x, VTYPE x2, const VTYPE *poly) 182 { 183 VTYPE p01 = FMA (poly[1], x, poly[0]); 184 VTYPE p23 = FMA (poly[3], x, poly[2]); 185 VTYPE p; 186 p = FMA (x2, poly[4], p23); 187 p = FMA (x2, p, p01); 188 return p; 189 } 190 static inline VTYPE VWRAP (pw_horner_5) (VTYPE x, VTYPE x2, const VTYPE *poly) 191 { 192 VTYPE p01 = FMA (poly[1], x, poly[0]); 193 VTYPE p23 = FMA (poly[3], x, poly[2]); 194 VTYPE p45 = FMA (poly[5], x, poly[4]); 195 VTYPE p; 196 p = FMA (x2, p45, p23); 197 p = FMA (x2, p, p01); 198 return p; 199 } 200 static inline VTYPE VWRAP (pw_horner_6) (VTYPE x, VTYPE x2, const VTYPE *poly) 201 { 202 VTYPE p26 = VWRAP (pw_horner_4) (x, x2, poly + 2); 203 VTYPE p01 = FMA (poly[1], x, poly[0]); 204 return FMA (x2, p26, p01); 205 } 206 static inline VTYPE VWRAP (pw_horner_7) (VTYPE x, VTYPE x2, const VTYPE *poly) 207 { 208 VTYPE p27 = VWRAP (pw_horner_5) (x, x2, poly + 2); 209 VTYPE p01 = FMA (poly[1], x, poly[0]); 210 return FMA (x2, p27, p01); 211 } 212 static inline VTYPE VWRAP (pw_horner_8) (VTYPE x, VTYPE x2, const VTYPE *poly) 213 { 214 VTYPE p28 = VWRAP (pw_horner_6) (x, x2, poly + 2); 215 VTYPE p01 = FMA (poly[1], x, poly[0]); 216 return FMA (x2, p28, p01); 217 } 218 static inline VTYPE VWRAP (pw_horner_9) (VTYPE x, VTYPE x2, const VTYPE *poly) 219 { 220 VTYPE p29 = VWRAP (pw_horner_7) (x, x2, poly + 2); 221 VTYPE p01 = FMA (poly[1], x, poly[0]); 222 return FMA (x2, p29, p01); 223 } 224 static inline VTYPE VWRAP (pw_horner_10) (VTYPE x, VTYPE x2, const VTYPE *poly) 225 { 226 VTYPE p2_10 = VWRAP (pw_horner_8) (x, x2, poly + 2); 227 VTYPE p01 = FMA (poly[1], x, poly[0]); 228 return FMA (x2, p2_10, p01); 229 } 230 static inline VTYPE VWRAP (pw_horner_11) (VTYPE x, VTYPE x2, const VTYPE *poly) 231 { 232 VTYPE p2_11 = VWRAP (pw_horner_9) (x, x2, poly + 2); 233 VTYPE p01 = FMA (poly[1], x, poly[0]); 234 return FMA (x2, p2_11, p01); 235 } 236 static inline VTYPE VWRAP (pw_horner_12) (VTYPE x, VTYPE x2, const VTYPE *poly) 237 { 238 VTYPE p2_12 = VWRAP (pw_horner_10) (x, x2, poly + 2); 239 VTYPE p01 = FMA (poly[1], x, poly[0]); 240 return FMA (x2, p2_12, p01); 241 } 242 static inline VTYPE VWRAP (pw_horner_13) (VTYPE x, VTYPE x2, const VTYPE *poly) 243 { 244 VTYPE p2_13 = VWRAP (pw_horner_11) (x, x2, poly + 2); 245 VTYPE p01 = FMA (poly[1], x, poly[0]); 246 return FMA (x2, p2_13, p01); 247 } 248 static inline VTYPE VWRAP (pw_horner_14) (VTYPE x, VTYPE x2, const VTYPE *poly) 249 { 250 VTYPE p2_14 = VWRAP (pw_horner_12) (x, x2, poly + 2); 251 VTYPE p01 = FMA (poly[1], x, poly[0]); 252 return FMA (x2, p2_14, p01); 253 } 254 static inline VTYPE VWRAP (pw_horner_15) (VTYPE x, VTYPE x2, const VTYPE *poly) 255 { 256 VTYPE p2_15 = VWRAP (pw_horner_13) (x, x2, poly + 2); 257 VTYPE p01 = FMA (poly[1], x, poly[0]); 258 return FMA (x2, p2_15, p01); 259 } 260 static inline VTYPE VWRAP (pw_horner_16) (VTYPE x, VTYPE x2, const VTYPE *poly) 261 { 262 VTYPE p2_16 = VWRAP (pw_horner_14) (x, x2, poly + 2); 263 VTYPE p01 = FMA (poly[1], x, poly[0]); 264 return FMA (x2, p2_16, p01); 265 } 266 static inline VTYPE VWRAP (pw_horner_17) (VTYPE x, VTYPE x2, const VTYPE *poly) 267 { 268 VTYPE p2_17 = VWRAP (pw_horner_15) (x, x2, poly + 2); 269 VTYPE p01 = FMA (poly[1], x, poly[0]); 270 return FMA (x2, p2_17, p01); 271 } 272 static inline VTYPE VWRAP (pw_horner_18) (VTYPE x, VTYPE x2, const VTYPE *poly) 273 { 274 VTYPE p2_18 = VWRAP (pw_horner_16) (x, x2, poly + 2); 275 VTYPE p01 = FMA (poly[1], x, poly[0]); 276 return FMA (x2, p2_18, p01); 277 } 278