1 /*
2 * Generic helpers for evaluating polynomials with various schemes.
3 *
4 * Copyright (c) 2023-2024, Arm Limited.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6 */
7
8 #ifndef VTYPE
9 # error Cannot use poly_generic without defining VTYPE
10 #endif
11 #ifndef VWRAP
12 # error Cannot use poly_generic without defining VWRAP
13 #endif
14 #ifndef FMA
15 # error Cannot use poly_generic without defining FMA
16 #endif
17
VWRAP(pairwise_poly_3)18 static inline VTYPE VWRAP (pairwise_poly_3) (VTYPE x, VTYPE x2,
19 const VTYPE *poly)
20 {
21 /* At order 3, Estrin and Pairwise Horner are identical. */
22 VTYPE p01 = FMA (poly[1], x, poly[0]);
23 VTYPE p23 = FMA (poly[3], x, poly[2]);
24 return FMA (p23, x2, p01);
25 }
26
VWRAP(estrin_4)27 static inline VTYPE VWRAP (estrin_4) (VTYPE x, VTYPE x2, VTYPE x4,
28 const VTYPE *poly)
29 {
30 VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
31 return FMA (poly[4], x4, p03);
32 }
VWRAP(estrin_5)33 static inline VTYPE VWRAP (estrin_5) (VTYPE x, VTYPE x2, VTYPE x4,
34 const VTYPE *poly)
35 {
36 VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
37 VTYPE p45 = FMA (poly[5], x, poly[4]);
38 return FMA (p45, x4, p03);
39 }
VWRAP(estrin_6)40 static inline VTYPE VWRAP (estrin_6) (VTYPE x, VTYPE x2, VTYPE x4,
41 const VTYPE *poly)
42 {
43 VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
44 VTYPE p45 = FMA (poly[5], x, poly[4]);
45 VTYPE p46 = FMA (poly[6], x2, p45);
46 return FMA (p46, x4, p03);
47 }
VWRAP(estrin_7)48 static inline VTYPE VWRAP (estrin_7) (VTYPE x, VTYPE x2, VTYPE x4,
49 const VTYPE *poly)
50 {
51 VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
52 VTYPE p47 = VWRAP (pairwise_poly_3) (x, x2, poly + 4);
53 return FMA (p47, x4, p03);
54 }
VWRAP(estrin_8)55 static inline VTYPE VWRAP (estrin_8) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
56 const VTYPE *poly)
57 {
58 return FMA (poly[8], x8, VWRAP (estrin_7) (x, x2, x4, poly));
59 }
VWRAP(estrin_9)60 static inline VTYPE VWRAP (estrin_9) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
61 const VTYPE *poly)
62 {
63 VTYPE p89 = FMA (poly[9], x, poly[8]);
64 return FMA (p89, x8, VWRAP (estrin_7) (x, x2, x4, poly));
65 }
VWRAP(estrin_10)66 static inline VTYPE VWRAP (estrin_10) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
67 const VTYPE *poly)
68 {
69 VTYPE p89 = FMA (poly[9], x, poly[8]);
70 VTYPE p8_10 = FMA (poly[10], x2, p89);
71 return FMA (p8_10, x8, VWRAP (estrin_7) (x, x2, x4, poly));
72 }
VWRAP(estrin_11)73 static inline VTYPE VWRAP (estrin_11) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
74 const VTYPE *poly)
75 {
76 VTYPE p8_11 = VWRAP (pairwise_poly_3) (x, x2, poly + 8);
77 return FMA (p8_11, x8, VWRAP (estrin_7) (x, x2, x4, poly));
78 }
VWRAP(estrin_12)79 static inline VTYPE VWRAP (estrin_12) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
80 const VTYPE *poly)
81 {
82 return FMA (VWRAP (estrin_4) (x, x2, x4, poly + 8), x8,
83 VWRAP (estrin_7) (x, x2, x4, poly));
84 }
VWRAP(estrin_13)85 static inline VTYPE VWRAP (estrin_13) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
86 const VTYPE *poly)
87 {
88 return FMA (VWRAP (estrin_5) (x, x2, x4, poly + 8), x8,
89 VWRAP (estrin_7) (x, x2, x4, poly));
90 }
VWRAP(estrin_14)91 static inline VTYPE VWRAP (estrin_14) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
92 const VTYPE *poly)
93 {
94 return FMA (VWRAP (estrin_6) (x, x2, x4, poly + 8), x8,
95 VWRAP (estrin_7) (x, x2, x4, poly));
96 }
VWRAP(estrin_15)97 static inline VTYPE VWRAP (estrin_15) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
98 const VTYPE *poly)
99 {
100 return FMA (VWRAP (estrin_7) (x, x2, x4, poly + 8), x8,
101 VWRAP (estrin_7) (x, x2, x4, poly));
102 }
VWRAP(estrin_16)103 static inline VTYPE VWRAP (estrin_16) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
104 VTYPE x16, const VTYPE *poly)
105 {
106 return FMA (poly[16], x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
107 }
VWRAP(estrin_17)108 static inline VTYPE VWRAP (estrin_17) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
109 VTYPE x16, const VTYPE *poly)
110 {
111 VTYPE p16_17 = FMA (poly[17], x, poly[16]);
112 return FMA (p16_17, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
113 }
VWRAP(estrin_18)114 static inline VTYPE VWRAP (estrin_18) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
115 VTYPE x16, const VTYPE *poly)
116 {
117 VTYPE p16_17 = FMA (poly[17], x, poly[16]);
118 VTYPE p16_18 = FMA (poly[18], x2, p16_17);
119 return FMA (p16_18, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
120 }
VWRAP(estrin_19)121 static inline VTYPE VWRAP (estrin_19) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
122 VTYPE x16, const VTYPE *poly)
123 {
124 VTYPE p16_19 = VWRAP (pairwise_poly_3) (x, x2, poly + 16);
125 return FMA (p16_19, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
126 }
127
VWRAP(horner_2)128 static inline VTYPE VWRAP (horner_2) (VTYPE x, const VTYPE *poly)
129 {
130 VTYPE p = FMA (poly[2], x, poly[1]);
131 return FMA (x, p, poly[0]);
132 }
VWRAP(horner_3)133 static inline VTYPE VWRAP (horner_3) (VTYPE x, const VTYPE *poly)
134 {
135 VTYPE p = FMA (poly[3], x, poly[2]);
136 p = FMA (x, p, poly[1]);
137 p = FMA (x, p, poly[0]);
138 return p;
139 }
VWRAP(horner_4)140 static inline VTYPE VWRAP (horner_4) (VTYPE x, const VTYPE *poly)
141 {
142 VTYPE p = FMA (poly[4], x, poly[3]);
143 p = FMA (x, p, poly[2]);
144 p = FMA (x, p, poly[1]);
145 p = FMA (x, p, poly[0]);
146 return p;
147 }
VWRAP(horner_5)148 static inline VTYPE VWRAP (horner_5) (VTYPE x, const VTYPE *poly)
149 {
150 return FMA (x, VWRAP (horner_4) (x, poly + 1), poly[0]);
151 }
VWRAP(horner_6)152 static inline VTYPE VWRAP (horner_6) (VTYPE x, const VTYPE *poly)
153 {
154 return FMA (x, VWRAP (horner_5) (x, poly + 1), poly[0]);
155 }
VWRAP(horner_7)156 static inline VTYPE VWRAP (horner_7) (VTYPE x, const VTYPE *poly)
157 {
158 return FMA (x, VWRAP (horner_6) (x, poly + 1), poly[0]);
159 }
VWRAP(horner_8)160 static inline VTYPE VWRAP (horner_8) (VTYPE x, const VTYPE *poly)
161 {
162 return FMA (x, VWRAP (horner_7) (x, poly + 1), poly[0]);
163 }
VWRAP(horner_9)164 static inline VTYPE VWRAP (horner_9) (VTYPE x, const VTYPE *poly)
165 {
166 return FMA (x, VWRAP (horner_8) (x, poly + 1), poly[0]);
167 }
VWRAP(horner_10)168 static inline VTYPE VWRAP (horner_10) (VTYPE x, const VTYPE *poly)
169 {
170 return FMA (x, VWRAP (horner_9) (x, poly + 1), poly[0]);
171 }
VWRAP(horner_11)172 static inline VTYPE VWRAP (horner_11) (VTYPE x, const VTYPE *poly)
173 {
174 return FMA (x, VWRAP (horner_10) (x, poly + 1), poly[0]);
175 }
VWRAP(horner_12)176 static inline VTYPE VWRAP (horner_12) (VTYPE x, const VTYPE *poly)
177 {
178 return FMA (x, VWRAP (horner_11) (x, poly + 1), poly[0]);
179 }
180
VWRAP(pw_horner_4)181 static inline VTYPE VWRAP (pw_horner_4) (VTYPE x, VTYPE x2, const VTYPE *poly)
182 {
183 VTYPE p01 = FMA (poly[1], x, poly[0]);
184 VTYPE p23 = FMA (poly[3], x, poly[2]);
185 VTYPE p;
186 p = FMA (x2, poly[4], p23);
187 p = FMA (x2, p, p01);
188 return p;
189 }
VWRAP(pw_horner_5)190 static inline VTYPE VWRAP (pw_horner_5) (VTYPE x, VTYPE x2, const VTYPE *poly)
191 {
192 VTYPE p01 = FMA (poly[1], x, poly[0]);
193 VTYPE p23 = FMA (poly[3], x, poly[2]);
194 VTYPE p45 = FMA (poly[5], x, poly[4]);
195 VTYPE p;
196 p = FMA (x2, p45, p23);
197 p = FMA (x2, p, p01);
198 return p;
199 }
VWRAP(pw_horner_6)200 static inline VTYPE VWRAP (pw_horner_6) (VTYPE x, VTYPE x2, const VTYPE *poly)
201 {
202 VTYPE p26 = VWRAP (pw_horner_4) (x, x2, poly + 2);
203 VTYPE p01 = FMA (poly[1], x, poly[0]);
204 return FMA (x2, p26, p01);
205 }
VWRAP(pw_horner_7)206 static inline VTYPE VWRAP (pw_horner_7) (VTYPE x, VTYPE x2, const VTYPE *poly)
207 {
208 VTYPE p27 = VWRAP (pw_horner_5) (x, x2, poly + 2);
209 VTYPE p01 = FMA (poly[1], x, poly[0]);
210 return FMA (x2, p27, p01);
211 }
VWRAP(pw_horner_8)212 static inline VTYPE VWRAP (pw_horner_8) (VTYPE x, VTYPE x2, const VTYPE *poly)
213 {
214 VTYPE p28 = VWRAP (pw_horner_6) (x, x2, poly + 2);
215 VTYPE p01 = FMA (poly[1], x, poly[0]);
216 return FMA (x2, p28, p01);
217 }
VWRAP(pw_horner_9)218 static inline VTYPE VWRAP (pw_horner_9) (VTYPE x, VTYPE x2, const VTYPE *poly)
219 {
220 VTYPE p29 = VWRAP (pw_horner_7) (x, x2, poly + 2);
221 VTYPE p01 = FMA (poly[1], x, poly[0]);
222 return FMA (x2, p29, p01);
223 }
VWRAP(pw_horner_10)224 static inline VTYPE VWRAP (pw_horner_10) (VTYPE x, VTYPE x2, const VTYPE *poly)
225 {
226 VTYPE p2_10 = VWRAP (pw_horner_8) (x, x2, poly + 2);
227 VTYPE p01 = FMA (poly[1], x, poly[0]);
228 return FMA (x2, p2_10, p01);
229 }
VWRAP(pw_horner_11)230 static inline VTYPE VWRAP (pw_horner_11) (VTYPE x, VTYPE x2, const VTYPE *poly)
231 {
232 VTYPE p2_11 = VWRAP (pw_horner_9) (x, x2, poly + 2);
233 VTYPE p01 = FMA (poly[1], x, poly[0]);
234 return FMA (x2, p2_11, p01);
235 }
VWRAP(pw_horner_12)236 static inline VTYPE VWRAP (pw_horner_12) (VTYPE x, VTYPE x2, const VTYPE *poly)
237 {
238 VTYPE p2_12 = VWRAP (pw_horner_10) (x, x2, poly + 2);
239 VTYPE p01 = FMA (poly[1], x, poly[0]);
240 return FMA (x2, p2_12, p01);
241 }
VWRAP(pw_horner_13)242 static inline VTYPE VWRAP (pw_horner_13) (VTYPE x, VTYPE x2, const VTYPE *poly)
243 {
244 VTYPE p2_13 = VWRAP (pw_horner_11) (x, x2, poly + 2);
245 VTYPE p01 = FMA (poly[1], x, poly[0]);
246 return FMA (x2, p2_13, p01);
247 }
VWRAP(pw_horner_14)248 static inline VTYPE VWRAP (pw_horner_14) (VTYPE x, VTYPE x2, const VTYPE *poly)
249 {
250 VTYPE p2_14 = VWRAP (pw_horner_12) (x, x2, poly + 2);
251 VTYPE p01 = FMA (poly[1], x, poly[0]);
252 return FMA (x2, p2_14, p01);
253 }
VWRAP(pw_horner_15)254 static inline VTYPE VWRAP (pw_horner_15) (VTYPE x, VTYPE x2, const VTYPE *poly)
255 {
256 VTYPE p2_15 = VWRAP (pw_horner_13) (x, x2, poly + 2);
257 VTYPE p01 = FMA (poly[1], x, poly[0]);
258 return FMA (x2, p2_15, p01);
259 }
VWRAP(pw_horner_16)260 static inline VTYPE VWRAP (pw_horner_16) (VTYPE x, VTYPE x2, const VTYPE *poly)
261 {
262 VTYPE p2_16 = VWRAP (pw_horner_14) (x, x2, poly + 2);
263 VTYPE p01 = FMA (poly[1], x, poly[0]);
264 return FMA (x2, p2_16, p01);
265 }
VWRAP(pw_horner_17)266 static inline VTYPE VWRAP (pw_horner_17) (VTYPE x, VTYPE x2, const VTYPE *poly)
267 {
268 VTYPE p2_17 = VWRAP (pw_horner_15) (x, x2, poly + 2);
269 VTYPE p01 = FMA (poly[1], x, poly[0]);
270 return FMA (x2, p2_17, p01);
271 }
VWRAP(pw_horner_18)272 static inline VTYPE VWRAP (pw_horner_18) (VTYPE x, VTYPE x2, const VTYPE *poly)
273 {
274 VTYPE p2_18 = VWRAP (pw_horner_16) (x, x2, poly + 2);
275 VTYPE p01 = FMA (poly[1], x, poly[0]);
276 return FMA (x2, p2_18, p01);
277 }
278