1 /*
2 * Configuration for math routines.
3 *
4 * Copyright (c) 2017-2023, Arm Limited.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6 */
7
8 #ifndef _MATH_CONFIG_H
9 #define _MATH_CONFIG_H
10
11 #include <math.h>
12 #include <stdint.h>
13
14 #ifndef WANT_ROUNDING
15 /* If defined to 1, return correct results for special cases in non-nearest
16 rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than
17 -0.0f). This may be set to 0 if there is no fenv support or if math
18 functions only get called in round to nearest mode. */
19 # define WANT_ROUNDING 1
20 #endif
21 #ifndef WANT_ERRNO
22 /* If defined to 1, set errno in math functions according to ISO C. Many math
23 libraries do not set errno, so this is 0 by default. It may need to be
24 set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0. */
25 # define WANT_ERRNO 0
26 #endif
27 #ifndef WANT_SIMD_EXCEPT
28 /* If defined to 1, trigger fp exceptions in vector routines, consistently with
29 behaviour expected from the corresponding scalar routine. */
30 # define WANT_SIMD_EXCEPT 0
31 #endif
32
33 /* Compiler can inline round as a single instruction. */
34 #ifndef HAVE_FAST_ROUND
35 # if __aarch64__
36 # define HAVE_FAST_ROUND 1
37 # else
38 # define HAVE_FAST_ROUND 0
39 # endif
40 #endif
41
42 /* Compiler can inline lround, but not (long)round(x). */
43 #ifndef HAVE_FAST_LROUND
44 # if __aarch64__ && (100 * __GNUC__ + __GNUC_MINOR__) >= 408 \
45 && __NO_MATH_ERRNO__
46 # define HAVE_FAST_LROUND 1
47 # else
48 # define HAVE_FAST_LROUND 0
49 # endif
50 #endif
51
52 /* Compiler can inline fma as a single instruction. */
53 #ifndef HAVE_FAST_FMA
54 # if defined FP_FAST_FMA || __aarch64__
55 # define HAVE_FAST_FMA 1
56 # else
57 # define HAVE_FAST_FMA 0
58 # endif
59 #endif
60
61 /* Provide *_finite symbols and some of the glibc hidden symbols
62 so libmathlib can be used with binaries compiled against glibc
63 to interpose math functions with both static and dynamic linking. */
64 #ifndef USE_GLIBC_ABI
65 # if __GNUC__
66 # define USE_GLIBC_ABI 1
67 # else
68 # define USE_GLIBC_ABI 0
69 # endif
70 #endif
71
72 /* Optionally used extensions. */
73 #ifdef __GNUC__
74 # define HIDDEN __attribute__ ((__visibility__ ("hidden")))
75 # define NOINLINE __attribute__ ((noinline))
76 # define UNUSED __attribute__ ((unused))
77 # define likely(x) __builtin_expect (!!(x), 1)
78 # define unlikely(x) __builtin_expect (x, 0)
79 # if __GNUC__ >= 9
80 # define attribute_copy(f) __attribute__ ((copy (f)))
81 # else
82 # define attribute_copy(f)
83 # endif
84 # define strong_alias(f, a) \
85 extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
86 # define hidden_alias(f, a) \
87 extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
88 attribute_copy (f);
89 #else
90 # define HIDDEN
91 # define NOINLINE
92 # define UNUSED
93 # define likely(x) (x)
94 # define unlikely(x) (x)
95 #endif
96
97 /* Return ptr but hide its value from the compiler so accesses through it
98 cannot be optimized based on the contents. */
99 #define ptr_barrier(ptr) \
100 ({ \
101 __typeof (ptr) __ptr = (ptr); \
102 __asm("" : "+r"(__ptr)); \
103 __ptr; \
104 })
105
106 /* Symbol renames to avoid libc conflicts. */
107 #define __math_oflowf arm_math_oflowf
108 #define __math_uflowf arm_math_uflowf
109 #define __math_may_uflowf arm_math_may_uflowf
110 #define __math_divzerof arm_math_divzerof
111 #define __math_oflow arm_math_oflow
112 #define __math_uflow arm_math_uflow
113 #define __math_may_uflow arm_math_may_uflow
114 #define __math_divzero arm_math_divzero
115 #define __math_invalidf arm_math_invalidf
116 #define __math_invalid arm_math_invalid
117 #define __math_check_oflow arm_math_check_oflow
118 #define __math_check_uflow arm_math_check_uflow
119 #define __math_check_oflowf arm_math_check_oflowf
120 #define __math_check_uflowf arm_math_check_uflowf
121
122 #if HAVE_FAST_ROUND
123 /* When set, the roundtoint and converttoint functions are provided with
124 the semantics documented below. */
125 # define TOINT_INTRINSICS 1
126
127 /* Round x to nearest int in all rounding modes, ties have to be rounded
128 consistently with converttoint so the results match. If the result
129 would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */
130 static inline double_t
roundtoint(double_t x)131 roundtoint (double_t x)
132 {
133 return round (x);
134 }
135
136 /* Convert x to nearest int in all rounding modes, ties have to be rounded
137 consistently with roundtoint. If the result is not representible in an
138 int32_t then the semantics is unspecified. */
139 static inline int32_t
converttoint(double_t x)140 converttoint (double_t x)
141 {
142 # if HAVE_FAST_LROUND
143 return lround (x);
144 # else
145 return (long) round (x);
146 # endif
147 }
148 #endif
149
150 static inline uint32_t
asuint(float f)151 asuint (float f)
152 {
153 union
154 {
155 float f;
156 uint32_t i;
157 } u = { f };
158 return u.i;
159 }
160
161 static inline float
asfloat(uint32_t i)162 asfloat (uint32_t i)
163 {
164 union
165 {
166 uint32_t i;
167 float f;
168 } u = { i };
169 return u.f;
170 }
171
172 static inline uint64_t
asuint64(double f)173 asuint64 (double f)
174 {
175 union
176 {
177 double f;
178 uint64_t i;
179 } u = { f };
180 return u.i;
181 }
182
183 static inline double
asdouble(uint64_t i)184 asdouble (uint64_t i)
185 {
186 union
187 {
188 uint64_t i;
189 double f;
190 } u = { i };
191 return u.f;
192 }
193
194 #ifndef IEEE_754_2008_SNAN
195 # define IEEE_754_2008_SNAN 1
196 #endif
197 static inline int
issignalingf_inline(float x)198 issignalingf_inline (float x)
199 {
200 uint32_t ix = asuint (x);
201 if (!IEEE_754_2008_SNAN)
202 return (ix & 0x7fc00000) == 0x7fc00000;
203 return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
204 }
205
206 static inline int
issignaling_inline(double x)207 issignaling_inline (double x)
208 {
209 uint64_t ix = asuint64 (x);
210 if (!IEEE_754_2008_SNAN)
211 return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
212 return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
213 }
214
215 #if __aarch64__ && __GNUC__
216 /* Prevent the optimization of a floating-point expression. */
217 static inline float
opt_barrier_float(float x)218 opt_barrier_float (float x)
219 {
220 __asm__ __volatile__ ("" : "+w" (x));
221 return x;
222 }
223 static inline double
opt_barrier_double(double x)224 opt_barrier_double (double x)
225 {
226 __asm__ __volatile__ ("" : "+w" (x));
227 return x;
228 }
229 /* Force the evaluation of a floating-point expression for its side-effect. */
230 static inline void
force_eval_float(float x)231 force_eval_float (float x)
232 {
233 __asm__ __volatile__ ("" : "+w" (x));
234 }
235 static inline void
force_eval_double(double x)236 force_eval_double (double x)
237 {
238 __asm__ __volatile__ ("" : "+w" (x));
239 }
240 #else
241 static inline float
opt_barrier_float(float x)242 opt_barrier_float (float x)
243 {
244 volatile float y = x;
245 return y;
246 }
247 static inline double
opt_barrier_double(double x)248 opt_barrier_double (double x)
249 {
250 volatile double y = x;
251 return y;
252 }
253 static inline void
force_eval_float(float x)254 force_eval_float (float x)
255 {
256 volatile float y UNUSED = x;
257 }
258 static inline void
force_eval_double(double x)259 force_eval_double (double x)
260 {
261 volatile double y UNUSED = x;
262 }
263 #endif
264
265 /* Evaluate an expression as the specified type, normally a type
266 cast should be enough, but compilers implement non-standard
267 excess-precision handling, so when FLT_EVAL_METHOD != 0 then
268 these functions may need to be customized. */
269 static inline float
eval_as_float(float x)270 eval_as_float (float x)
271 {
272 return x;
273 }
274 static inline double
eval_as_double(double x)275 eval_as_double (double x)
276 {
277 return x;
278 }
279
280 /* Error handling tail calls for special cases, with a sign argument.
281 The sign of the return value is set if the argument is non-zero. */
282
283 /* The result overflows. */
284 HIDDEN float __math_oflowf (uint32_t);
285 /* The result underflows to 0 in nearest rounding mode. */
286 HIDDEN float __math_uflowf (uint32_t);
287 /* The result underflows to 0 in some directed rounding mode only. */
288 HIDDEN float __math_may_uflowf (uint32_t);
289 /* Division by zero. */
290 HIDDEN float __math_divzerof (uint32_t);
291 /* The result overflows. */
292 HIDDEN double __math_oflow (uint32_t);
293 /* The result underflows to 0 in nearest rounding mode. */
294 HIDDEN double __math_uflow (uint32_t);
295 /* The result underflows to 0 in some directed rounding mode only. */
296 HIDDEN double __math_may_uflow (uint32_t);
297 /* Division by zero. */
298 HIDDEN double __math_divzero (uint32_t);
299
300 /* Error handling using input checking. */
301
302 /* Invalid input unless it is a quiet NaN. */
303 HIDDEN float __math_invalidf (float);
304 /* Invalid input unless it is a quiet NaN. */
305 HIDDEN double __math_invalid (double);
306
307 /* Error handling using output checking, only for errno setting. */
308
309 /* Check if the result overflowed to infinity. */
310 HIDDEN double __math_check_oflow (double);
311 /* Check if the result underflowed to 0. */
312 HIDDEN double __math_check_uflow (double);
313
314 /* Check if the result overflowed to infinity. */
315 static inline double
check_oflow(double x)316 check_oflow (double x)
317 {
318 return WANT_ERRNO ? __math_check_oflow (x) : x;
319 }
320
321 /* Check if the result underflowed to 0. */
322 static inline double
check_uflow(double x)323 check_uflow (double x)
324 {
325 return WANT_ERRNO ? __math_check_uflow (x) : x;
326 }
327
328 /* Check if the result overflowed to infinity. */
329 HIDDEN float __math_check_oflowf (float);
330 /* Check if the result underflowed to 0. */
331 HIDDEN float __math_check_uflowf (float);
332
333 /* Check if the result overflowed to infinity. */
334 static inline float
check_oflowf(float x)335 check_oflowf (float x)
336 {
337 return WANT_ERRNO ? __math_check_oflowf (x) : x;
338 }
339
340 /* Check if the result underflowed to 0. */
341 static inline float
check_uflowf(float x)342 check_uflowf (float x)
343 {
344 return WANT_ERRNO ? __math_check_uflowf (x) : x;
345 }
346
347 extern const struct erff_data
348 {
349 struct
350 {
351 float erf, scale;
352 } tab[513];
353 } __erff_data HIDDEN;
354
355 extern const struct sv_erff_data
356 {
357 float erf[513];
358 float scale[513];
359 } __sv_erff_data HIDDEN;
360
361 extern const struct erfcf_data
362 {
363 struct
364 {
365 float erfc, scale;
366 } tab[645];
367 } __erfcf_data HIDDEN;
368
369 /* Data for logf and log10f. */
370 #define LOGF_TABLE_BITS 4
371 #define LOGF_POLY_ORDER 4
372 extern const struct logf_data
373 {
374 struct
375 {
376 double invc, logc;
377 } tab[1 << LOGF_TABLE_BITS];
378 double ln2;
379 double invln10;
380 double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */
381 } __logf_data HIDDEN;
382
383 /* Data for low accuracy log10 (with 1/ln(10) included in coefficients). */
384 #define LOG10_TABLE_BITS 7
385 #define LOG10_POLY_ORDER 6
386 #define LOG10_POLY1_ORDER 12
387 extern const struct log10_data
388 {
389 double ln2hi;
390 double ln2lo;
391 double invln10;
392 double poly[LOG10_POLY_ORDER - 1]; /* First coefficient is 1/log(10). */
393 double poly1[LOG10_POLY1_ORDER - 1];
394 struct
395 {
396 double invc, logc;
397 } tab[1 << LOG10_TABLE_BITS];
398 #if !HAVE_FAST_FMA
399 struct
400 {
401 double chi, clo;
402 } tab2[1 << LOG10_TABLE_BITS];
403 #endif
404 } __log10_data HIDDEN;
405
406 #define EXP_TABLE_BITS 7
407 #define EXP_POLY_ORDER 5
408 /* Use polynomial that is optimized for a wider input range. This may be
409 needed for good precision in non-nearest rounding and !TOINT_INTRINSICS. */
410 #define EXP_POLY_WIDE 0
411 /* Use close to nearest rounding toint when !TOINT_INTRINSICS. This may be
412 needed for good precision in non-nearest rouning and !EXP_POLY_WIDE. */
413 #define EXP_USE_TOINT_NARROW 0
414 #define EXP2_POLY_ORDER 5
415 #define EXP2_POLY_WIDE 0
416 extern const struct exp_data
417 {
418 double invln2N;
419 double shift;
420 double negln2hiN;
421 double negln2loN;
422 double poly[4]; /* Last four coefficients. */
423 double exp2_shift;
424 double exp2_poly[EXP2_POLY_ORDER];
425 uint64_t tab[2 * (1 << EXP_TABLE_BITS)];
426 } __exp_data HIDDEN;
427
428 /* Copied from math/v_exp.h for use in vector exp_tail. */
429 #define V_EXP_TAIL_TABLE_BITS 8
430 extern const uint64_t __v_exp_tail_data[1 << V_EXP_TAIL_TABLE_BITS] HIDDEN;
431
432 /* Copied from math/v_exp.h for use in vector exp2. */
433 #define V_EXP_TABLE_BITS 7
434 extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN;
435
436 extern const struct erf_data
437 {
438 struct
439 {
440 double erf, scale;
441 } tab[769];
442 } __erf_data HIDDEN;
443
444 extern const struct sv_erf_data
445 {
446 double erf[769];
447 double scale[769];
448 } __sv_erf_data HIDDEN;
449
450 extern const struct erfc_data
451 {
452 struct
453 {
454 double erfc, scale;
455 } tab[3488];
456 } __erfc_data HIDDEN;
457
458 #define ATAN_POLY_NCOEFFS 20
459 extern const struct atan_poly_data
460 {
461 double poly[ATAN_POLY_NCOEFFS];
462 } __atan_poly_data HIDDEN;
463
464 #define ATANF_POLY_NCOEFFS 8
465 extern const struct atanf_poly_data
466 {
467 float poly[ATANF_POLY_NCOEFFS];
468 } __atanf_poly_data HIDDEN;
469
470 #define ASINHF_NCOEFFS 8
471 extern const struct asinhf_data
472 {
473 float coeffs[ASINHF_NCOEFFS];
474 } __asinhf_data HIDDEN;
475
476 #define LOG_TABLE_BITS 7
477 #define LOG_POLY_ORDER 6
478 #define LOG_POLY1_ORDER 12
479 extern const struct log_data
480 {
481 double ln2hi;
482 double ln2lo;
483 double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
484 double poly1[LOG_POLY1_ORDER - 1];
485 struct
486 {
487 double invc, logc;
488 } tab[1 << LOG_TABLE_BITS];
489 #if !HAVE_FAST_FMA
490 struct
491 {
492 double chi, clo;
493 } tab2[1 << LOG_TABLE_BITS];
494 #endif
495 } __log_data HIDDEN;
496
497 #define ASINH_NCOEFFS 18
498 extern const struct asinh_data
499 {
500 double poly[ASINH_NCOEFFS];
501 } __asinh_data HIDDEN;
502
503 #define LOG1P_NCOEFFS 19
504 extern const struct log1p_data
505 {
506 double coeffs[LOG1P_NCOEFFS];
507 } __log1p_data HIDDEN;
508
509 #define LOG1PF_2U5
510 #define LOG1PF_NCOEFFS 9
511 extern const struct log1pf_data
512 {
513 float coeffs[LOG1PF_NCOEFFS];
514 } __log1pf_data HIDDEN;
515
516 #define TANF_P_POLY_NCOEFFS 6
517 /* cotan approach needs order 3 on [0, pi/4] to reach <3.5ulps. */
518 #define TANF_Q_POLY_NCOEFFS 4
519 extern const struct tanf_poly_data
520 {
521 float poly_tan[TANF_P_POLY_NCOEFFS];
522 float poly_cotan[TANF_Q_POLY_NCOEFFS];
523 } __tanf_poly_data HIDDEN;
524
525 #define V_LOG2_TABLE_BITS 7
526 extern const struct v_log2_data
527 {
528 double poly[5];
529 double invln2;
530 struct
531 {
532 double invc, log2c;
533 } table[1 << V_LOG2_TABLE_BITS];
534 } __v_log2_data HIDDEN;
535
536 #define V_LOG10_TABLE_BITS 7
537 extern const struct v_log10_data
538 {
539 double poly[5];
540 double invln10, log10_2;
541 struct
542 {
543 double invc, log10c;
544 } table[1 << V_LOG10_TABLE_BITS];
545 } __v_log10_data HIDDEN;
546
547 /* Some data for SVE powf's internal exp and log. */
548 #define V_POWF_EXP2_TABLE_BITS 5
549 #define V_POWF_EXP2_N (1 << V_POWF_EXP2_TABLE_BITS)
550 #define V_POWF_LOG2_TABLE_BITS 5
551 #define V_POWF_LOG2_N (1 << V_POWF_LOG2_TABLE_BITS)
552 extern const struct v_powf_data
553 {
554 double invc[V_POWF_LOG2_N];
555 double logc[V_POWF_LOG2_N];
556 uint64_t scale[V_POWF_EXP2_N];
557 } __v_powf_data HIDDEN;
558
559 #define V_LOG_POLY_ORDER 6
560 #define V_LOG_TABLE_BITS 7
561 extern const struct v_log_data
562 {
563 /* Shared data for vector log and log-derived routines (e.g. asinh). */
564 double poly[V_LOG_POLY_ORDER - 1];
565 double ln2;
566 struct
567 {
568 double invc, logc;
569 } table[1 << V_LOG_TABLE_BITS];
570 } __v_log_data HIDDEN;
571
572 #define EXPM1F_POLY_ORDER 5
573 extern const float __expm1f_poly[EXPM1F_POLY_ORDER] HIDDEN;
574
575 #define EXPF_TABLE_BITS 5
576 #define EXPF_POLY_ORDER 3
577 extern const struct expf_data
578 {
579 uint64_t tab[1 << EXPF_TABLE_BITS];
580 double invln2_scaled;
581 double poly_scaled[EXPF_POLY_ORDER];
582 } __expf_data HIDDEN;
583
584 #define EXPM1_POLY_ORDER 11
585 extern const double __expm1_poly[EXPM1_POLY_ORDER] HIDDEN;
586
587 extern const struct cbrtf_data
588 {
589 float poly[4];
590 float table[5];
591 } __cbrtf_data HIDDEN;
592
593 extern const struct cbrt_data
594 {
595 double poly[4];
596 double table[5];
597 } __cbrt_data HIDDEN;
598
599 #define ASINF_POLY_ORDER 4
600 extern const float __asinf_poly[ASINF_POLY_ORDER + 1] HIDDEN;
601
602 #define ASIN_POLY_ORDER 11
603 extern const double __asin_poly[ASIN_POLY_ORDER + 1] HIDDEN;
604
605 /* Some data for AdvSIMD and SVE pow's internal exp and log. */
606 #define V_POW_EXP_TABLE_BITS 8
607 extern const struct v_pow_exp_data
608 {
609 double poly[3];
610 double n_over_ln2, ln2_over_n_hi, ln2_over_n_lo, shift;
611 uint64_t sbits[1 << V_POW_EXP_TABLE_BITS];
612 } __v_pow_exp_data HIDDEN;
613
614 #define V_POW_LOG_TABLE_BITS 7
615 extern const struct v_pow_log_data
616 {
617 double poly[7]; /* First coefficient is 1. */
618 double ln2_hi, ln2_lo;
619 double invc[1 << V_POW_LOG_TABLE_BITS];
620 double logc[1 << V_POW_LOG_TABLE_BITS];
621 double logctail[1 << V_POW_LOG_TABLE_BITS];
622 } __v_pow_log_data HIDDEN;
623
624 #endif
625