xref: /freebsd/contrib/arm-optimized-routines/pl/math/poly_generic.h (revision e1e636193db45630c7881246d25902e57c43d24e)
1 /*
2  * Generic helpers for evaluating polynomials with various schemes.
3  *
4  * Copyright (c) 2023, Arm Limited.
5  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6  */
7 
8 #ifndef VTYPE
9 # error Cannot use poly_generic without defining VTYPE
10 #endif
11 #ifndef VWRAP
12 # error Cannot use poly_generic without defining VWRAP
13 #endif
14 #ifndef FMA
15 # error Cannot use poly_generic without defining FMA
16 #endif
17 
18 static inline VTYPE VWRAP (pairwise_poly_3) (VTYPE x, VTYPE x2,
19 					     const VTYPE *poly)
20 {
21   /* At order 3, Estrin and Pairwise Horner are identical.  */
22   VTYPE p01 = FMA (poly[1], x, poly[0]);
23   VTYPE p23 = FMA (poly[3], x, poly[2]);
24   return FMA (p23, x2, p01);
25 }
26 
27 static inline VTYPE VWRAP (estrin_4) (VTYPE x, VTYPE x2, VTYPE x4,
28 				      const VTYPE *poly)
29 {
30   VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
31   return FMA (poly[4], x4, p03);
32 }
33 static inline VTYPE VWRAP (estrin_5) (VTYPE x, VTYPE x2, VTYPE x4,
34 				      const VTYPE *poly)
35 {
36   VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
37   VTYPE p45 = FMA (poly[5], x, poly[4]);
38   return FMA (p45, x4, p03);
39 }
40 static inline VTYPE VWRAP (estrin_6) (VTYPE x, VTYPE x2, VTYPE x4,
41 				      const VTYPE *poly)
42 {
43   VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
44   VTYPE p45 = FMA (poly[5], x, poly[4]);
45   VTYPE p46 = FMA (poly[6], x2, p45);
46   return FMA (p46, x4, p03);
47 }
48 static inline VTYPE VWRAP (estrin_7) (VTYPE x, VTYPE x2, VTYPE x4,
49 				      const VTYPE *poly)
50 {
51   VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
52   VTYPE p47 = VWRAP (pairwise_poly_3) (x, x2, poly + 4);
53   return FMA (p47, x4, p03);
54 }
55 static inline VTYPE VWRAP (estrin_8) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
56 				      const VTYPE *poly)
57 {
58   return FMA (poly[8], x8, VWRAP (estrin_7) (x, x2, x4, poly));
59 }
60 static inline VTYPE VWRAP (estrin_9) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
61 				      const VTYPE *poly)
62 {
63   VTYPE p89 = FMA (poly[9], x, poly[8]);
64   return FMA (p89, x8, VWRAP (estrin_7) (x, x2, x4, poly));
65 }
66 static inline VTYPE VWRAP (estrin_10) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
67 				       const VTYPE *poly)
68 {
69   VTYPE p89 = FMA (poly[9], x, poly[8]);
70   VTYPE p8_10 = FMA (poly[10], x2, p89);
71   return FMA (p8_10, x8, VWRAP (estrin_7) (x, x2, x4, poly));
72 }
73 static inline VTYPE VWRAP (estrin_11) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
74 				       const VTYPE *poly)
75 {
76   VTYPE p8_11 = VWRAP (pairwise_poly_3) (x, x2, poly + 8);
77   return FMA (p8_11, x8, VWRAP (estrin_7) (x, x2, x4, poly));
78 }
79 static inline VTYPE VWRAP (estrin_12) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
80 				       const VTYPE *poly)
81 {
82   return FMA (VWRAP (estrin_4) (x, x2, x4, poly + 8), x8,
83 	      VWRAP (estrin_7) (x, x2, x4, poly));
84 }
85 static inline VTYPE VWRAP (estrin_13) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
86 				       const VTYPE *poly)
87 {
88   return FMA (VWRAP (estrin_5) (x, x2, x4, poly + 8), x8,
89 	      VWRAP (estrin_7) (x, x2, x4, poly));
90 }
91 static inline VTYPE VWRAP (estrin_14) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
92 				       const VTYPE *poly)
93 {
94   return FMA (VWRAP (estrin_6) (x, x2, x4, poly + 8), x8,
95 	      VWRAP (estrin_7) (x, x2, x4, poly));
96 }
97 static inline VTYPE VWRAP (estrin_15) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
98 				       const VTYPE *poly)
99 {
100   return FMA (VWRAP (estrin_7) (x, x2, x4, poly + 8), x8,
101 	      VWRAP (estrin_7) (x, x2, x4, poly));
102 }
103 static inline VTYPE VWRAP (estrin_16) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
104 				       VTYPE x16, const VTYPE *poly)
105 {
106   return FMA (poly[16], x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
107 }
108 static inline VTYPE VWRAP (estrin_17) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
109 				       VTYPE x16, const VTYPE *poly)
110 {
111   VTYPE p16_17 = FMA (poly[17], x, poly[16]);
112   return FMA (p16_17, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
113 }
114 static inline VTYPE VWRAP (estrin_18) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
115 				       VTYPE x16, const VTYPE *poly)
116 {
117   VTYPE p16_17 = FMA (poly[17], x, poly[16]);
118   VTYPE p16_18 = FMA (poly[18], x2, p16_17);
119   return FMA (p16_18, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
120 }
121 static inline VTYPE VWRAP (estrin_19) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
122 				       VTYPE x16, const VTYPE *poly)
123 {
124   VTYPE p16_19 = VWRAP (pairwise_poly_3) (x, x2, poly + 16);
125   return FMA (p16_19, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
126 }
127 
128 static inline VTYPE VWRAP (horner_2) (VTYPE x, const VTYPE *poly)
129 {
130   VTYPE p = FMA (poly[2], x, poly[1]);
131   return FMA (x, p, poly[0]);
132 }
133 static inline VTYPE VWRAP (horner_3) (VTYPE x, const VTYPE *poly)
134 {
135   VTYPE p = FMA (poly[3], x, poly[2]);
136   p = FMA (x, p, poly[1]);
137   p = FMA (x, p, poly[0]);
138   return p;
139 }
140 static inline VTYPE VWRAP (horner_4) (VTYPE x, const VTYPE *poly)
141 {
142   VTYPE p = FMA (poly[4], x, poly[3]);
143   p = FMA (x, p, poly[2]);
144   p = FMA (x, p, poly[1]);
145   p = FMA (x, p, poly[0]);
146   return p;
147 }
148 static inline VTYPE VWRAP (horner_5) (VTYPE x, const VTYPE *poly)
149 {
150   return FMA (x, VWRAP (horner_4) (x, poly + 1), poly[0]);
151 }
152 static inline VTYPE VWRAP (horner_6) (VTYPE x, const VTYPE *poly)
153 {
154   return FMA (x, VWRAP (horner_5) (x, poly + 1), poly[0]);
155 }
156 static inline VTYPE VWRAP (horner_7) (VTYPE x, const VTYPE *poly)
157 {
158   return FMA (x, VWRAP (horner_6) (x, poly + 1), poly[0]);
159 }
160 static inline VTYPE VWRAP (horner_8) (VTYPE x, const VTYPE *poly)
161 {
162   return FMA (x, VWRAP (horner_7) (x, poly + 1), poly[0]);
163 }
164 static inline VTYPE VWRAP (horner_9) (VTYPE x, const VTYPE *poly)
165 {
166   return FMA (x, VWRAP (horner_8) (x, poly + 1), poly[0]);
167 }
168 static inline VTYPE VWRAP (horner_10) (VTYPE x, const VTYPE *poly)
169 {
170   return FMA (x, VWRAP (horner_9) (x, poly + 1), poly[0]);
171 }
172 static inline VTYPE VWRAP (horner_11) (VTYPE x, const VTYPE *poly)
173 {
174   return FMA (x, VWRAP (horner_10) (x, poly + 1), poly[0]);
175 }
176 static inline VTYPE VWRAP (horner_12) (VTYPE x, const VTYPE *poly)
177 {
178   return FMA (x, VWRAP (horner_11) (x, poly + 1), poly[0]);
179 }
180 
181 static inline VTYPE VWRAP (pw_horner_4) (VTYPE x, VTYPE x2, const VTYPE *poly)
182 {
183   VTYPE p01 = FMA (poly[1], x, poly[0]);
184   VTYPE p23 = FMA (poly[3], x, poly[2]);
185   VTYPE p;
186   p = FMA (x2, poly[4], p23);
187   p = FMA (x2, p, p01);
188   return p;
189 }
190 static inline VTYPE VWRAP (pw_horner_5) (VTYPE x, VTYPE x2, const VTYPE *poly)
191 {
192   VTYPE p01 = FMA (poly[1], x, poly[0]);
193   VTYPE p23 = FMA (poly[3], x, poly[2]);
194   VTYPE p45 = FMA (poly[5], x, poly[4]);
195   VTYPE p;
196   p = FMA (x2, p45, p23);
197   p = FMA (x2, p, p01);
198   return p;
199 }
200 static inline VTYPE VWRAP (pw_horner_6) (VTYPE x, VTYPE x2, const VTYPE *poly)
201 {
202   VTYPE p26 = VWRAP (pw_horner_4) (x, x2, poly + 2);
203   VTYPE p01 = FMA (poly[1], x, poly[0]);
204   return FMA (x2, p26, p01);
205 }
206 static inline VTYPE VWRAP (pw_horner_7) (VTYPE x, VTYPE x2, const VTYPE *poly)
207 {
208   VTYPE p27 = VWRAP (pw_horner_5) (x, x2, poly + 2);
209   VTYPE p01 = FMA (poly[1], x, poly[0]);
210   return FMA (x2, p27, p01);
211 }
212 static inline VTYPE VWRAP (pw_horner_8) (VTYPE x, VTYPE x2, const VTYPE *poly)
213 {
214   VTYPE p28 = VWRAP (pw_horner_6) (x, x2, poly + 2);
215   VTYPE p01 = FMA (poly[1], x, poly[0]);
216   return FMA (x2, p28, p01);
217 }
218 static inline VTYPE VWRAP (pw_horner_9) (VTYPE x, VTYPE x2, const VTYPE *poly)
219 {
220   VTYPE p29 = VWRAP (pw_horner_7) (x, x2, poly + 2);
221   VTYPE p01 = FMA (poly[1], x, poly[0]);
222   return FMA (x2, p29, p01);
223 }
224 static inline VTYPE VWRAP (pw_horner_10) (VTYPE x, VTYPE x2, const VTYPE *poly)
225 {
226   VTYPE p2_10 = VWRAP (pw_horner_8) (x, x2, poly + 2);
227   VTYPE p01 = FMA (poly[1], x, poly[0]);
228   return FMA (x2, p2_10, p01);
229 }
230 static inline VTYPE VWRAP (pw_horner_11) (VTYPE x, VTYPE x2, const VTYPE *poly)
231 {
232   VTYPE p2_11 = VWRAP (pw_horner_9) (x, x2, poly + 2);
233   VTYPE p01 = FMA (poly[1], x, poly[0]);
234   return FMA (x2, p2_11, p01);
235 }
236 static inline VTYPE VWRAP (pw_horner_12) (VTYPE x, VTYPE x2, const VTYPE *poly)
237 {
238   VTYPE p2_12 = VWRAP (pw_horner_10) (x, x2, poly + 2);
239   VTYPE p01 = FMA (poly[1], x, poly[0]);
240   return FMA (x2, p2_12, p01);
241 }
242 static inline VTYPE VWRAP (pw_horner_13) (VTYPE x, VTYPE x2, const VTYPE *poly)
243 {
244   VTYPE p2_13 = VWRAP (pw_horner_11) (x, x2, poly + 2);
245   VTYPE p01 = FMA (poly[1], x, poly[0]);
246   return FMA (x2, p2_13, p01);
247 }
248 static inline VTYPE VWRAP (pw_horner_14) (VTYPE x, VTYPE x2, const VTYPE *poly)
249 {
250   VTYPE p2_14 = VWRAP (pw_horner_12) (x, x2, poly + 2);
251   VTYPE p01 = FMA (poly[1], x, poly[0]);
252   return FMA (x2, p2_14, p01);
253 }
254 static inline VTYPE VWRAP (pw_horner_15) (VTYPE x, VTYPE x2, const VTYPE *poly)
255 {
256   VTYPE p2_15 = VWRAP (pw_horner_13) (x, x2, poly + 2);
257   VTYPE p01 = FMA (poly[1], x, poly[0]);
258   return FMA (x2, p2_15, p01);
259 }
260 static inline VTYPE VWRAP (pw_horner_16) (VTYPE x, VTYPE x2, const VTYPE *poly)
261 {
262   VTYPE p2_16 = VWRAP (pw_horner_14) (x, x2, poly + 2);
263   VTYPE p01 = FMA (poly[1], x, poly[0]);
264   return FMA (x2, p2_16, p01);
265 }
266 static inline VTYPE VWRAP (pw_horner_17) (VTYPE x, VTYPE x2, const VTYPE *poly)
267 {
268   VTYPE p2_17 = VWRAP (pw_horner_15) (x, x2, poly + 2);
269   VTYPE p01 = FMA (poly[1], x, poly[0]);
270   return FMA (x2, p2_17, p01);
271 }
272 static inline VTYPE VWRAP (pw_horner_18) (VTYPE x, VTYPE x2, const VTYPE *poly)
273 {
274   VTYPE p2_18 = VWRAP (pw_horner_16) (x, x2, poly + 2);
275   VTYPE p01 = FMA (poly[1], x, poly[0]);
276   return FMA (x2, p2_18, p01);
277 }
278