xref: /freebsd/contrib/arm-optimized-routines/pl/math/math_config.h (revision 5a02ffc32e777041dd2dad4e651ed2a0865a0a5d)
1 /*
2  * Configuration for math routines.
3  *
4  * Copyright (c) 2017-2023, Arm Limited.
5  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6  */
7 
8 #ifndef _MATH_CONFIG_H
9 #define _MATH_CONFIG_H
10 
11 #include <math.h>
12 #include <stdint.h>
13 
14 #ifndef WANT_ROUNDING
15 /* If defined to 1, return correct results for special cases in non-nearest
16    rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than
17    -0.0f). This may be set to 0 if there is no fenv support or if math
18    functions only get called in round to nearest mode.  */
19 # define WANT_ROUNDING 1
20 #endif
21 #ifndef WANT_ERRNO
22 /* If defined to 1, set errno in math functions according to ISO C.  Many math
23    libraries do not set errno, so this is 0 by default.  It may need to be
24    set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0.  */
25 # define WANT_ERRNO 0
26 #endif
27 #ifndef WANT_SIMD_EXCEPT
28 /* If defined to 1, trigger fp exceptions in vector routines, consistently with
29    behaviour expected from the corresponding scalar routine.  */
30 # define WANT_SIMD_EXCEPT 0
31 #endif
32 
33 /* Compiler can inline round as a single instruction.  */
34 #ifndef HAVE_FAST_ROUND
35 # if __aarch64__
36 #  define HAVE_FAST_ROUND 1
37 # else
38 #  define HAVE_FAST_ROUND 0
39 # endif
40 #endif
41 
42 /* Compiler can inline lround, but not (long)round(x).  */
43 #ifndef HAVE_FAST_LROUND
44 # if __aarch64__ && (100 * __GNUC__ + __GNUC_MINOR__) >= 408                 \
45       && __NO_MATH_ERRNO__
46 #  define HAVE_FAST_LROUND 1
47 # else
48 #  define HAVE_FAST_LROUND 0
49 # endif
50 #endif
51 
52 /* Compiler can inline fma as a single instruction.  */
53 #ifndef HAVE_FAST_FMA
54 # if defined FP_FAST_FMA || __aarch64__
55 #  define HAVE_FAST_FMA 1
56 # else
57 #  define HAVE_FAST_FMA 0
58 # endif
59 #endif
60 
61 /* Provide *_finite symbols and some of the glibc hidden symbols
62    so libmathlib can be used with binaries compiled against glibc
63    to interpose math functions with both static and dynamic linking.  */
64 #ifndef USE_GLIBC_ABI
65 # if __GNUC__
66 #  define USE_GLIBC_ABI 1
67 # else
68 #  define USE_GLIBC_ABI 0
69 # endif
70 #endif
71 
72 /* Optionally used extensions.  */
73 #ifdef __GNUC__
74 # define HIDDEN __attribute__ ((__visibility__ ("hidden")))
75 # define NOINLINE __attribute__ ((noinline))
76 # define UNUSED __attribute__ ((unused))
77 # define likely(x) __builtin_expect (!!(x), 1)
78 # define unlikely(x) __builtin_expect (x, 0)
79 # if __GNUC__ >= 9
80 #  define attribute_copy(f) __attribute__ ((copy (f)))
81 # else
82 #  define attribute_copy(f)
83 # endif
84 # define strong_alias(f, a)                                                   \
85     extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
86 # define hidden_alias(f, a)                                                   \
87     extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
88 	attribute_copy (f);
89 #else
90 # define HIDDEN
91 # define NOINLINE
92 # define UNUSED
93 # define likely(x) (x)
94 # define unlikely(x) (x)
95 #endif
96 
97 /* Return ptr but hide its value from the compiler so accesses through it
98    cannot be optimized based on the contents.  */
99 #define ptr_barrier(ptr)                                                      \
100   ({                                                                          \
101     __typeof (ptr) __ptr = (ptr);                                             \
102     __asm("" : "+r"(__ptr));                                                  \
103     __ptr;                                                                    \
104   })
105 
106 /* Symbol renames to avoid libc conflicts.  */
107 #define __math_oflowf arm_math_oflowf
108 #define __math_uflowf arm_math_uflowf
109 #define __math_may_uflowf arm_math_may_uflowf
110 #define __math_divzerof arm_math_divzerof
111 #define __math_oflow arm_math_oflow
112 #define __math_uflow arm_math_uflow
113 #define __math_may_uflow arm_math_may_uflow
114 #define __math_divzero arm_math_divzero
115 #define __math_invalidf arm_math_invalidf
116 #define __math_invalid arm_math_invalid
117 #define __math_check_oflow arm_math_check_oflow
118 #define __math_check_uflow arm_math_check_uflow
119 #define __math_check_oflowf arm_math_check_oflowf
120 #define __math_check_uflowf arm_math_check_uflowf
121 
122 #if HAVE_FAST_ROUND
123 /* When set, the roundtoint and converttoint functions are provided with
124    the semantics documented below.  */
125 # define TOINT_INTRINSICS 1
126 
127 /* Round x to nearest int in all rounding modes, ties have to be rounded
128    consistently with converttoint so the results match.  If the result
129    would be outside of [-2^31, 2^31-1] then the semantics is unspecified.  */
130 static inline double_t
roundtoint(double_t x)131 roundtoint (double_t x)
132 {
133   return round (x);
134 }
135 
136 /* Convert x to nearest int in all rounding modes, ties have to be rounded
137    consistently with roundtoint.  If the result is not representible in an
138    int32_t then the semantics is unspecified.  */
139 static inline int32_t
converttoint(double_t x)140 converttoint (double_t x)
141 {
142 # if HAVE_FAST_LROUND
143   return lround (x);
144 # else
145   return (long) round (x);
146 # endif
147 }
148 #endif
149 
150 static inline uint32_t
asuint(float f)151 asuint (float f)
152 {
153   union
154   {
155     float f;
156     uint32_t i;
157   } u = { f };
158   return u.i;
159 }
160 
161 static inline float
asfloat(uint32_t i)162 asfloat (uint32_t i)
163 {
164   union
165   {
166     uint32_t i;
167     float f;
168   } u = { i };
169   return u.f;
170 }
171 
172 static inline uint64_t
asuint64(double f)173 asuint64 (double f)
174 {
175   union
176   {
177     double f;
178     uint64_t i;
179   } u = { f };
180   return u.i;
181 }
182 
183 static inline double
asdouble(uint64_t i)184 asdouble (uint64_t i)
185 {
186   union
187   {
188     uint64_t i;
189     double f;
190   } u = { i };
191   return u.f;
192 }
193 
194 #ifndef IEEE_754_2008_SNAN
195 # define IEEE_754_2008_SNAN 1
196 #endif
197 static inline int
issignalingf_inline(float x)198 issignalingf_inline (float x)
199 {
200   uint32_t ix = asuint (x);
201   if (!IEEE_754_2008_SNAN)
202     return (ix & 0x7fc00000) == 0x7fc00000;
203   return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
204 }
205 
206 static inline int
issignaling_inline(double x)207 issignaling_inline (double x)
208 {
209   uint64_t ix = asuint64 (x);
210   if (!IEEE_754_2008_SNAN)
211     return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
212   return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
213 }
214 
215 #if __aarch64__ && __GNUC__
216 /* Prevent the optimization of a floating-point expression.  */
217 static inline float
opt_barrier_float(float x)218 opt_barrier_float (float x)
219 {
220   __asm__ __volatile__ ("" : "+w" (x));
221   return x;
222 }
223 static inline double
opt_barrier_double(double x)224 opt_barrier_double (double x)
225 {
226   __asm__ __volatile__ ("" : "+w" (x));
227   return x;
228 }
229 /* Force the evaluation of a floating-point expression for its side-effect.  */
230 static inline void
force_eval_float(float x)231 force_eval_float (float x)
232 {
233   __asm__ __volatile__ ("" : "+w" (x));
234 }
235 static inline void
force_eval_double(double x)236 force_eval_double (double x)
237 {
238   __asm__ __volatile__ ("" : "+w" (x));
239 }
240 #else
241 static inline float
opt_barrier_float(float x)242 opt_barrier_float (float x)
243 {
244   volatile float y = x;
245   return y;
246 }
247 static inline double
opt_barrier_double(double x)248 opt_barrier_double (double x)
249 {
250   volatile double y = x;
251   return y;
252 }
253 static inline void
force_eval_float(float x)254 force_eval_float (float x)
255 {
256   volatile float y UNUSED = x;
257 }
258 static inline void
force_eval_double(double x)259 force_eval_double (double x)
260 {
261   volatile double y UNUSED = x;
262 }
263 #endif
264 
265 /* Evaluate an expression as the specified type, normally a type
266    cast should be enough, but compilers implement non-standard
267    excess-precision handling, so when FLT_EVAL_METHOD != 0 then
268    these functions may need to be customized.  */
269 static inline float
eval_as_float(float x)270 eval_as_float (float x)
271 {
272   return x;
273 }
274 static inline double
eval_as_double(double x)275 eval_as_double (double x)
276 {
277   return x;
278 }
279 
280 /* Error handling tail calls for special cases, with a sign argument.
281    The sign of the return value is set if the argument is non-zero.  */
282 
283 /* The result overflows.  */
284 HIDDEN float __math_oflowf (uint32_t);
285 /* The result underflows to 0 in nearest rounding mode.  */
286 HIDDEN float __math_uflowf (uint32_t);
287 /* The result underflows to 0 in some directed rounding mode only.  */
288 HIDDEN float __math_may_uflowf (uint32_t);
289 /* Division by zero.  */
290 HIDDEN float __math_divzerof (uint32_t);
291 /* The result overflows.  */
292 HIDDEN double __math_oflow (uint32_t);
293 /* The result underflows to 0 in nearest rounding mode.  */
294 HIDDEN double __math_uflow (uint32_t);
295 /* The result underflows to 0 in some directed rounding mode only.  */
296 HIDDEN double __math_may_uflow (uint32_t);
297 /* Division by zero.  */
298 HIDDEN double __math_divzero (uint32_t);
299 
300 /* Error handling using input checking.  */
301 
302 /* Invalid input unless it is a quiet NaN.  */
303 HIDDEN float __math_invalidf (float);
304 /* Invalid input unless it is a quiet NaN.  */
305 HIDDEN double __math_invalid (double);
306 
307 /* Error handling using output checking, only for errno setting.  */
308 
309 /* Check if the result overflowed to infinity.  */
310 HIDDEN double __math_check_oflow (double);
311 /* Check if the result underflowed to 0.  */
312 HIDDEN double __math_check_uflow (double);
313 
314 /* Check if the result overflowed to infinity.  */
315 static inline double
check_oflow(double x)316 check_oflow (double x)
317 {
318   return WANT_ERRNO ? __math_check_oflow (x) : x;
319 }
320 
321 /* Check if the result underflowed to 0.  */
322 static inline double
check_uflow(double x)323 check_uflow (double x)
324 {
325   return WANT_ERRNO ? __math_check_uflow (x) : x;
326 }
327 
328 /* Check if the result overflowed to infinity.  */
329 HIDDEN float __math_check_oflowf (float);
330 /* Check if the result underflowed to 0.  */
331 HIDDEN float __math_check_uflowf (float);
332 
333 /* Check if the result overflowed to infinity.  */
334 static inline float
check_oflowf(float x)335 check_oflowf (float x)
336 {
337   return WANT_ERRNO ? __math_check_oflowf (x) : x;
338 }
339 
340 /* Check if the result underflowed to 0.  */
341 static inline float
check_uflowf(float x)342 check_uflowf (float x)
343 {
344   return WANT_ERRNO ? __math_check_uflowf (x) : x;
345 }
346 
347 extern const struct erff_data
348 {
349   struct
350   {
351     float erf, scale;
352   } tab[513];
353 } __erff_data HIDDEN;
354 
355 extern const struct sv_erff_data
356 {
357   float erf[513];
358   float scale[513];
359 } __sv_erff_data HIDDEN;
360 
361 extern const struct erfcf_data
362 {
363   struct
364   {
365     float erfc, scale;
366   } tab[645];
367 } __erfcf_data HIDDEN;
368 
369 /* Data for logf and log10f.  */
370 #define LOGF_TABLE_BITS 4
371 #define LOGF_POLY_ORDER 4
372 extern const struct logf_data
373 {
374   struct
375   {
376     double invc, logc;
377   } tab[1 << LOGF_TABLE_BITS];
378   double ln2;
379   double invln10;
380   double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1.  */
381 } __logf_data HIDDEN;
382 
383 /* Data for low accuracy log10 (with 1/ln(10) included in coefficients).  */
384 #define LOG10_TABLE_BITS 7
385 #define LOG10_POLY_ORDER 6
386 #define LOG10_POLY1_ORDER 12
387 extern const struct log10_data
388 {
389   double ln2hi;
390   double ln2lo;
391   double invln10;
392   double poly[LOG10_POLY_ORDER - 1]; /* First coefficient is 1/log(10).  */
393   double poly1[LOG10_POLY1_ORDER - 1];
394   struct
395   {
396     double invc, logc;
397   } tab[1 << LOG10_TABLE_BITS];
398 #if !HAVE_FAST_FMA
399   struct
400   {
401     double chi, clo;
402   } tab2[1 << LOG10_TABLE_BITS];
403 #endif
404 } __log10_data HIDDEN;
405 
406 #define EXP_TABLE_BITS 7
407 #define EXP_POLY_ORDER 5
408 /* Use polynomial that is optimized for a wider input range.  This may be
409    needed for good precision in non-nearest rounding and !TOINT_INTRINSICS.  */
410 #define EXP_POLY_WIDE 0
411 /* Use close to nearest rounding toint when !TOINT_INTRINSICS.  This may be
412    needed for good precision in non-nearest rouning and !EXP_POLY_WIDE.  */
413 #define EXP_USE_TOINT_NARROW 0
414 #define EXP2_POLY_ORDER 5
415 #define EXP2_POLY_WIDE 0
416 extern const struct exp_data
417 {
418   double invln2N;
419   double shift;
420   double negln2hiN;
421   double negln2loN;
422   double poly[4]; /* Last four coefficients.  */
423   double exp2_shift;
424   double exp2_poly[EXP2_POLY_ORDER];
425   uint64_t tab[2 * (1 << EXP_TABLE_BITS)];
426 } __exp_data HIDDEN;
427 
428 /* Copied from math/v_exp.h for use in vector exp_tail.  */
429 #define V_EXP_TAIL_TABLE_BITS 8
430 extern const uint64_t __v_exp_tail_data[1 << V_EXP_TAIL_TABLE_BITS] HIDDEN;
431 
432 /* Copied from math/v_exp.h for use in vector exp2.  */
433 #define V_EXP_TABLE_BITS 7
434 extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN;
435 
436 extern const struct erf_data
437 {
438   struct
439   {
440     double erf, scale;
441   } tab[769];
442 } __erf_data HIDDEN;
443 
444 extern const struct sv_erf_data
445 {
446   double erf[769];
447   double scale[769];
448 } __sv_erf_data HIDDEN;
449 
450 extern const struct erfc_data
451 {
452   struct
453   {
454     double erfc, scale;
455   } tab[3488];
456 } __erfc_data HIDDEN;
457 
458 #define ATAN_POLY_NCOEFFS 20
459 extern const struct atan_poly_data
460 {
461   double poly[ATAN_POLY_NCOEFFS];
462 } __atan_poly_data HIDDEN;
463 
464 #define ATANF_POLY_NCOEFFS 8
465 extern const struct atanf_poly_data
466 {
467   float poly[ATANF_POLY_NCOEFFS];
468 } __atanf_poly_data HIDDEN;
469 
470 #define ASINHF_NCOEFFS 8
471 extern const struct asinhf_data
472 {
473   float coeffs[ASINHF_NCOEFFS];
474 } __asinhf_data HIDDEN;
475 
476 #define LOG_TABLE_BITS 7
477 #define LOG_POLY_ORDER 6
478 #define LOG_POLY1_ORDER 12
479 extern const struct log_data
480 {
481   double ln2hi;
482   double ln2lo;
483   double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
484   double poly1[LOG_POLY1_ORDER - 1];
485   struct
486   {
487     double invc, logc;
488   } tab[1 << LOG_TABLE_BITS];
489 #if !HAVE_FAST_FMA
490   struct
491   {
492     double chi, clo;
493   } tab2[1 << LOG_TABLE_BITS];
494 #endif
495 } __log_data HIDDEN;
496 
497 #define ASINH_NCOEFFS 18
498 extern const struct asinh_data
499 {
500   double poly[ASINH_NCOEFFS];
501 } __asinh_data HIDDEN;
502 
503 #define LOG1P_NCOEFFS 19
504 extern const struct log1p_data
505 {
506   double coeffs[LOG1P_NCOEFFS];
507 } __log1p_data HIDDEN;
508 
509 #define LOG1PF_2U5
510 #define LOG1PF_NCOEFFS 9
511 extern const struct log1pf_data
512 {
513   float coeffs[LOG1PF_NCOEFFS];
514 } __log1pf_data HIDDEN;
515 
516 #define TANF_P_POLY_NCOEFFS 6
517 /* cotan approach needs order 3 on [0, pi/4] to reach <3.5ulps.  */
518 #define TANF_Q_POLY_NCOEFFS 4
519 extern const struct tanf_poly_data
520 {
521   float poly_tan[TANF_P_POLY_NCOEFFS];
522   float poly_cotan[TANF_Q_POLY_NCOEFFS];
523 } __tanf_poly_data HIDDEN;
524 
525 #define V_LOG2_TABLE_BITS 7
526 extern const struct v_log2_data
527 {
528   double poly[5];
529   double invln2;
530   struct
531   {
532     double invc, log2c;
533   } table[1 << V_LOG2_TABLE_BITS];
534 } __v_log2_data HIDDEN;
535 
536 #define V_LOG10_TABLE_BITS 7
537 extern const struct v_log10_data
538 {
539   double poly[5];
540   double invln10, log10_2;
541   struct
542   {
543     double invc, log10c;
544   } table[1 << V_LOG10_TABLE_BITS];
545 } __v_log10_data HIDDEN;
546 
547 /* Some data for SVE powf's internal exp and log.  */
548 #define V_POWF_EXP2_TABLE_BITS 5
549 #define V_POWF_EXP2_N (1 << V_POWF_EXP2_TABLE_BITS)
550 #define V_POWF_LOG2_TABLE_BITS 5
551 #define V_POWF_LOG2_N (1 << V_POWF_LOG2_TABLE_BITS)
552 extern const struct v_powf_data
553 {
554   double invc[V_POWF_LOG2_N];
555   double logc[V_POWF_LOG2_N];
556   uint64_t scale[V_POWF_EXP2_N];
557 } __v_powf_data HIDDEN;
558 
559 #define V_LOG_POLY_ORDER 6
560 #define V_LOG_TABLE_BITS 7
561 extern const struct v_log_data
562 {
563   /* Shared data for vector log and log-derived routines (e.g. asinh).  */
564   double poly[V_LOG_POLY_ORDER - 1];
565   double ln2;
566   struct
567   {
568     double invc, logc;
569   } table[1 << V_LOG_TABLE_BITS];
570 } __v_log_data HIDDEN;
571 
572 #define EXPM1F_POLY_ORDER 5
573 extern const float __expm1f_poly[EXPM1F_POLY_ORDER] HIDDEN;
574 
575 #define EXPF_TABLE_BITS 5
576 #define EXPF_POLY_ORDER 3
577 extern const struct expf_data
578 {
579   uint64_t tab[1 << EXPF_TABLE_BITS];
580   double invln2_scaled;
581   double poly_scaled[EXPF_POLY_ORDER];
582 } __expf_data HIDDEN;
583 
584 #define EXPM1_POLY_ORDER 11
585 extern const double __expm1_poly[EXPM1_POLY_ORDER] HIDDEN;
586 
587 extern const struct cbrtf_data
588 {
589   float poly[4];
590   float table[5];
591 } __cbrtf_data HIDDEN;
592 
593 extern const struct cbrt_data
594 {
595   double poly[4];
596   double table[5];
597 } __cbrt_data HIDDEN;
598 
599 #define ASINF_POLY_ORDER 4
600 extern const float __asinf_poly[ASINF_POLY_ORDER + 1] HIDDEN;
601 
602 #define ASIN_POLY_ORDER 11
603 extern const double __asin_poly[ASIN_POLY_ORDER + 1] HIDDEN;
604 
605 /* Some data for AdvSIMD and SVE pow's internal exp and log.  */
606 #define V_POW_EXP_TABLE_BITS 8
607 extern const struct v_pow_exp_data
608 {
609   double poly[3];
610   double n_over_ln2, ln2_over_n_hi, ln2_over_n_lo, shift;
611   uint64_t sbits[1 << V_POW_EXP_TABLE_BITS];
612 } __v_pow_exp_data HIDDEN;
613 
614 #define V_POW_LOG_TABLE_BITS 7
615 extern const struct v_pow_log_data
616 {
617   double poly[7]; /* First coefficient is 1.  */
618   double ln2_hi, ln2_lo;
619   double invc[1 << V_POW_LOG_TABLE_BITS];
620   double logc[1 << V_POW_LOG_TABLE_BITS];
621   double logctail[1 << V_POW_LOG_TABLE_BITS];
622 } __v_pow_log_data HIDDEN;
623 
624 #endif
625