xref: /freebsd/contrib/arm-optimized-routines/math/math_config.h (revision 5ca8e32633c4ffbbcd6762e5888b6a4ba0708c6c)
1 /*
2  * Configuration for math routines.
3  *
4  * Copyright (c) 2017-2023, Arm Limited.
5  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6  */
7 
8 #ifndef _MATH_CONFIG_H
9 #define _MATH_CONFIG_H
10 
11 #include <math.h>
12 #include <stdint.h>
13 
14 #ifndef WANT_ROUNDING
15 /* If defined to 1, return correct results for special cases in non-nearest
16    rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f).
17    This may be set to 0 if there is no fenv support or if math functions only
18    get called in round to nearest mode.  */
19 # define WANT_ROUNDING 1
20 #endif
21 #ifndef WANT_ERRNO
22 /* If defined to 1, set errno in math functions according to ISO C.  Many math
23    libraries do not set errno, so this is 0 by default.  It may need to be
24    set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0.  */
25 # define WANT_ERRNO 0
26 #endif
27 #ifndef WANT_ERRNO_UFLOW
28 /* Set errno to ERANGE if result underflows to 0 (in all rounding modes).  */
29 # define WANT_ERRNO_UFLOW (WANT_ROUNDING && WANT_ERRNO)
30 #endif
31 
32 /* Compiler can inline round as a single instruction.  */
33 #ifndef HAVE_FAST_ROUND
34 # if __aarch64__
35 #   define HAVE_FAST_ROUND 1
36 # else
37 #   define HAVE_FAST_ROUND 0
38 # endif
39 #endif
40 
41 /* Compiler can inline lround, but not (long)round(x).  */
42 #ifndef HAVE_FAST_LROUND
43 # if __aarch64__ && (100*__GNUC__ + __GNUC_MINOR__) >= 408 && __NO_MATH_ERRNO__
44 #   define HAVE_FAST_LROUND 1
45 # else
46 #   define HAVE_FAST_LROUND 0
47 # endif
48 #endif
49 
50 /* Compiler can inline fma as a single instruction.  */
51 #ifndef HAVE_FAST_FMA
52 # if defined FP_FAST_FMA || __aarch64__
53 #   define HAVE_FAST_FMA 1
54 # else
55 #   define HAVE_FAST_FMA 0
56 # endif
57 #endif
58 
59 /* Provide *_finite symbols and some of the glibc hidden symbols
60    so libmathlib can be used with binaries compiled against glibc
61    to interpose math functions with both static and dynamic linking.  */
62 #ifndef USE_GLIBC_ABI
63 # if __GNUC__
64 #   define USE_GLIBC_ABI 1
65 # else
66 #   define USE_GLIBC_ABI 0
67 # endif
68 #endif
69 
70 /* Optionally used extensions.  */
71 #ifdef __GNUC__
72 # define HIDDEN __attribute__ ((__visibility__ ("hidden")))
73 # define NOINLINE __attribute__ ((noinline))
74 # define UNUSED __attribute__ ((unused))
75 # define likely(x) __builtin_expect (!!(x), 1)
76 # define unlikely(x) __builtin_expect (x, 0)
77 # if __GNUC__ >= 9
78 #   define attribute_copy(f) __attribute__ ((copy (f)))
79 # else
80 #   define attribute_copy(f)
81 # endif
82 # define strong_alias(f, a) \
83   extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
84 # define hidden_alias(f, a) \
85   extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
86   attribute_copy (f);
87 #else
88 # define HIDDEN
89 # define NOINLINE
90 # define UNUSED
91 # define likely(x) (x)
92 # define unlikely(x) (x)
93 #endif
94 
95 /* Return ptr but hide its value from the compiler so accesses through it
96    cannot be optimized based on the contents.  */
97 #define ptr_barrier(ptr)                                                      \
98   ({                                                                          \
99     __typeof (ptr) __ptr = (ptr);                                             \
100     __asm("" : "+r"(__ptr));                                                  \
101     __ptr;                                                                    \
102   })
103 
104 /* Symbol renames to avoid libc conflicts.  */
105 #define __math_oflowf arm_math_oflowf
106 #define __math_uflowf arm_math_uflowf
107 #define __math_may_uflowf arm_math_may_uflowf
108 #define __math_divzerof arm_math_divzerof
109 #define __math_oflow arm_math_oflow
110 #define __math_uflow arm_math_uflow
111 #define __math_may_uflow arm_math_may_uflow
112 #define __math_divzero arm_math_divzero
113 #define __math_invalidf arm_math_invalidf
114 #define __math_invalid arm_math_invalid
115 #define __math_check_oflow arm_math_check_oflow
116 #define __math_check_uflow arm_math_check_uflow
117 #define __math_check_oflowf arm_math_check_oflowf
118 #define __math_check_uflowf arm_math_check_uflowf
119 
120 #define __sincosf_table arm_math_sincosf_table
121 #define __inv_pio4 arm_math_inv_pio4
122 #define __exp2f_data arm_math_exp2f_data
123 #define __logf_data arm_math_logf_data
124 #define __log2f_data arm_math_log2f_data
125 #define __powf_log2_data arm_math_powf_log2_data
126 #define __exp_data arm_math_exp_data
127 #define __log_data arm_math_log_data
128 #define __log2_data arm_math_log2_data
129 #define __pow_log_data arm_math_pow_log_data
130 #define __erff_data arm_math_erff_data
131 #define __erf_data arm_math_erf_data
132 #define __v_exp_data arm_math_v_exp_data
133 #define __v_log_data arm_math_v_log_data
134 
135 #if HAVE_FAST_ROUND
136 /* When set, the roundtoint and converttoint functions are provided with
137    the semantics documented below.  */
138 # define TOINT_INTRINSICS 1
139 
140 /* Round x to nearest int in all rounding modes, ties have to be rounded
141    consistently with converttoint so the results match.  If the result
142    would be outside of [-2^31, 2^31-1] then the semantics is unspecified.  */
143 static inline double_t
144 roundtoint (double_t x)
145 {
146   return round (x);
147 }
148 
149 /* Convert x to nearest int in all rounding modes, ties have to be rounded
150    consistently with roundtoint.  If the result is not representible in an
151    int32_t then the semantics is unspecified.  */
152 static inline int32_t
153 converttoint (double_t x)
154 {
155 # if HAVE_FAST_LROUND
156   return lround (x);
157 # else
158   return (long) round (x);
159 # endif
160 }
161 #endif
162 
163 static inline uint32_t
164 asuint (float f)
165 {
166   union
167   {
168     float f;
169     uint32_t i;
170   } u = {f};
171   return u.i;
172 }
173 
174 static inline float
175 asfloat (uint32_t i)
176 {
177   union
178   {
179     uint32_t i;
180     float f;
181   } u = {i};
182   return u.f;
183 }
184 
185 static inline uint64_t
186 asuint64 (double f)
187 {
188   union
189   {
190     double f;
191     uint64_t i;
192   } u = {f};
193   return u.i;
194 }
195 
196 static inline double
197 asdouble (uint64_t i)
198 {
199   union
200   {
201     uint64_t i;
202     double f;
203   } u = {i};
204   return u.f;
205 }
206 
207 #ifndef IEEE_754_2008_SNAN
208 # define IEEE_754_2008_SNAN 1
209 #endif
210 static inline int
211 issignalingf_inline (float x)
212 {
213   uint32_t ix = asuint (x);
214   if (!IEEE_754_2008_SNAN)
215     return (ix & 0x7fc00000) == 0x7fc00000;
216   return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
217 }
218 
219 static inline int
220 issignaling_inline (double x)
221 {
222   uint64_t ix = asuint64 (x);
223   if (!IEEE_754_2008_SNAN)
224     return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
225   return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
226 }
227 
228 #if __aarch64__ && __GNUC__
229 /* Prevent the optimization of a floating-point expression.  */
230 static inline float
231 opt_barrier_float (float x)
232 {
233   __asm__ __volatile__ ("" : "+w" (x));
234   return x;
235 }
236 static inline double
237 opt_barrier_double (double x)
238 {
239   __asm__ __volatile__ ("" : "+w" (x));
240   return x;
241 }
242 /* Force the evaluation of a floating-point expression for its side-effect.  */
243 static inline void
244 force_eval_float (float x)
245 {
246   __asm__ __volatile__ ("" : "+w" (x));
247 }
248 static inline void
249 force_eval_double (double x)
250 {
251   __asm__ __volatile__ ("" : "+w" (x));
252 }
253 #else
254 static inline float
255 opt_barrier_float (float x)
256 {
257   volatile float y = x;
258   return y;
259 }
260 static inline double
261 opt_barrier_double (double x)
262 {
263   volatile double y = x;
264   return y;
265 }
266 static inline void
267 force_eval_float (float x)
268 {
269   volatile float y UNUSED = x;
270 }
271 static inline void
272 force_eval_double (double x)
273 {
274   volatile double y UNUSED = x;
275 }
276 #endif
277 
278 /* Evaluate an expression as the specified type, normally a type
279    cast should be enough, but compilers implement non-standard
280    excess-precision handling, so when FLT_EVAL_METHOD != 0 then
281    these functions may need to be customized.  */
282 static inline float
283 eval_as_float (float x)
284 {
285   return x;
286 }
287 static inline double
288 eval_as_double (double x)
289 {
290   return x;
291 }
292 
293 /* Error handling tail calls for special cases, with a sign argument.
294    The sign of the return value is set if the argument is non-zero.  */
295 
296 /* The result overflows.  */
297 HIDDEN float __math_oflowf (uint32_t);
298 /* The result underflows to 0 in nearest rounding mode.  */
299 HIDDEN float __math_uflowf (uint32_t);
300 /* The result underflows to 0 in some directed rounding mode only.  */
301 HIDDEN float __math_may_uflowf (uint32_t);
302 /* Division by zero.  */
303 HIDDEN float __math_divzerof (uint32_t);
304 /* The result overflows.  */
305 HIDDEN double __math_oflow (uint32_t);
306 /* The result underflows to 0 in nearest rounding mode.  */
307 HIDDEN double __math_uflow (uint32_t);
308 /* The result underflows to 0 in some directed rounding mode only.  */
309 HIDDEN double __math_may_uflow (uint32_t);
310 /* Division by zero.  */
311 HIDDEN double __math_divzero (uint32_t);
312 
313 /* Error handling using input checking.  */
314 
315 /* Invalid input unless it is a quiet NaN.  */
316 HIDDEN float __math_invalidf (float);
317 /* Invalid input unless it is a quiet NaN.  */
318 HIDDEN double __math_invalid (double);
319 
320 /* Error handling using output checking, only for errno setting.  */
321 
322 /* Check if the result overflowed to infinity.  */
323 HIDDEN double __math_check_oflow (double);
324 /* Check if the result underflowed to 0.  */
325 HIDDEN double __math_check_uflow (double);
326 
327 /* Check if the result overflowed to infinity.  */
328 static inline double
329 check_oflow (double x)
330 {
331   return WANT_ERRNO ? __math_check_oflow (x) : x;
332 }
333 
334 /* Check if the result underflowed to 0.  */
335 static inline double
336 check_uflow (double x)
337 {
338   return WANT_ERRNO ? __math_check_uflow (x) : x;
339 }
340 
341 /* Check if the result overflowed to infinity.  */
342 HIDDEN float __math_check_oflowf (float);
343 /* Check if the result underflowed to 0.  */
344 HIDDEN float __math_check_uflowf (float);
345 
346 /* Check if the result overflowed to infinity.  */
347 static inline float
348 check_oflowf (float x)
349 {
350   return WANT_ERRNO ? __math_check_oflowf (x) : x;
351 }
352 
353 /* Check if the result underflowed to 0.  */
354 static inline float
355 check_uflowf (float x)
356 {
357   return WANT_ERRNO ? __math_check_uflowf (x) : x;
358 }
359 
360 /* Shared between expf, exp2f and powf.  */
361 #define EXP2F_TABLE_BITS 5
362 #define EXP2F_POLY_ORDER 3
363 extern const struct exp2f_data
364 {
365   uint64_t tab[1 << EXP2F_TABLE_BITS];
366   double shift_scaled;
367   double poly[EXP2F_POLY_ORDER];
368   double shift;
369   double invln2_scaled;
370   double poly_scaled[EXP2F_POLY_ORDER];
371 } __exp2f_data HIDDEN;
372 
373 #define LOGF_TABLE_BITS 4
374 #define LOGF_POLY_ORDER 4
375 extern const struct logf_data
376 {
377   struct
378   {
379     double invc, logc;
380   } tab[1 << LOGF_TABLE_BITS];
381   double ln2;
382   double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1.  */
383 } __logf_data HIDDEN;
384 
385 #define LOG2F_TABLE_BITS 4
386 #define LOG2F_POLY_ORDER 4
387 extern const struct log2f_data
388 {
389   struct
390   {
391     double invc, logc;
392   } tab[1 << LOG2F_TABLE_BITS];
393   double poly[LOG2F_POLY_ORDER];
394 } __log2f_data HIDDEN;
395 
396 #define POWF_LOG2_TABLE_BITS 4
397 #define POWF_LOG2_POLY_ORDER 5
398 #if TOINT_INTRINSICS
399 # define POWF_SCALE_BITS EXP2F_TABLE_BITS
400 #else
401 # define POWF_SCALE_BITS 0
402 #endif
403 #define POWF_SCALE ((double) (1 << POWF_SCALE_BITS))
404 extern const struct powf_log2_data
405 {
406   struct
407   {
408     double invc, logc;
409   } tab[1 << POWF_LOG2_TABLE_BITS];
410   double poly[POWF_LOG2_POLY_ORDER];
411 } __powf_log2_data HIDDEN;
412 
413 
414 #define EXP_TABLE_BITS 7
415 #define EXP_POLY_ORDER 5
416 /* Use polynomial that is optimized for a wider input range.  This may be
417    needed for good precision in non-nearest rounding and !TOINT_INTRINSICS.  */
418 #define EXP_POLY_WIDE 0
419 /* Use close to nearest rounding toint when !TOINT_INTRINSICS.  This may be
420    needed for good precision in non-nearest rouning and !EXP_POLY_WIDE.  */
421 #define EXP_USE_TOINT_NARROW 0
422 #define EXP2_POLY_ORDER 5
423 #define EXP2_POLY_WIDE 0
424 /* Wider exp10 polynomial necessary for good precision in non-nearest rounding
425    and !TOINT_INTRINSICS.  */
426 #define EXP10_POLY_WIDE 0
427 extern const struct exp_data
428 {
429   double invln2N;
430   double invlog10_2N;
431   double shift;
432   double negln2hiN;
433   double negln2loN;
434   double neglog10_2hiN;
435   double neglog10_2loN;
436   double poly[4]; /* Last four coefficients.  */
437   double exp2_shift;
438   double exp2_poly[EXP2_POLY_ORDER];
439   double exp10_poly[5];
440   uint64_t tab[2*(1 << EXP_TABLE_BITS)];
441 } __exp_data HIDDEN;
442 
443 #define LOG_TABLE_BITS 7
444 #define LOG_POLY_ORDER 6
445 #define LOG_POLY1_ORDER 12
446 extern const struct log_data
447 {
448   double ln2hi;
449   double ln2lo;
450   double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
451   double poly1[LOG_POLY1_ORDER - 1];
452   struct {double invc, logc;} tab[1 << LOG_TABLE_BITS];
453 #if !HAVE_FAST_FMA
454   struct {double chi, clo;} tab2[1 << LOG_TABLE_BITS];
455 #endif
456 } __log_data HIDDEN;
457 
458 #define LOG2_TABLE_BITS 6
459 #define LOG2_POLY_ORDER 7
460 #define LOG2_POLY1_ORDER 11
461 extern const struct log2_data
462 {
463   double invln2hi;
464   double invln2lo;
465   double poly[LOG2_POLY_ORDER - 1];
466   double poly1[LOG2_POLY1_ORDER - 1];
467   struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS];
468 #if !HAVE_FAST_FMA
469   struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS];
470 #endif
471 } __log2_data HIDDEN;
472 
473 #define POW_LOG_TABLE_BITS 7
474 #define POW_LOG_POLY_ORDER 8
475 extern const struct pow_log_data
476 {
477   double ln2hi;
478   double ln2lo;
479   double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
480   /* Note: the pad field is unused, but allows slightly faster indexing.  */
481   struct {double invc, pad, logc, logctail;} tab[1 << POW_LOG_TABLE_BITS];
482 } __pow_log_data HIDDEN;
483 
484 extern const struct erff_data
485 {
486   float erff_poly_A[6];
487   float erff_poly_B[7];
488 } __erff_data HIDDEN;
489 
490 #define ERF_POLY_A_ORDER 19
491 #define ERF_POLY_A_NCOEFFS 10
492 #define ERFC_POLY_C_NCOEFFS 16
493 #define ERFC_POLY_D_NCOEFFS 18
494 #define ERFC_POLY_E_NCOEFFS 14
495 #define ERFC_POLY_F_NCOEFFS 17
496 extern const struct erf_data
497 {
498   double erf_poly_A[ERF_POLY_A_NCOEFFS];
499   double erf_ratio_N_A[5];
500   double erf_ratio_D_A[5];
501   double erf_ratio_N_B[7];
502   double erf_ratio_D_B[6];
503   double erfc_poly_C[ERFC_POLY_C_NCOEFFS];
504   double erfc_poly_D[ERFC_POLY_D_NCOEFFS];
505   double erfc_poly_E[ERFC_POLY_E_NCOEFFS];
506   double erfc_poly_F[ERFC_POLY_F_NCOEFFS];
507 } __erf_data HIDDEN;
508 
509 #define V_EXP_TABLE_BITS 7
510 extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN;
511 
512 #define V_LOG_TABLE_BITS 7
513 extern const struct v_log_data
514 {
515   struct
516   {
517     double invc, logc;
518   } table[1 << V_LOG_TABLE_BITS];
519 } __v_log_data HIDDEN;
520 
521 #endif
522