1 /*
2 * Configuration for math routines.
3 *
4 * Copyright (c) 2017-2023, Arm Limited.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6 */
7
8 #ifndef _MATH_CONFIG_H
9 #define _MATH_CONFIG_H
10
11 #include <math.h>
12 #include <stdint.h>
13
14 #ifndef WANT_ROUNDING
15 /* If defined to 1, return correct results for special cases in non-nearest
16 rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f).
17 This may be set to 0 if there is no fenv support or if math functions only
18 get called in round to nearest mode. */
19 # define WANT_ROUNDING 1
20 #endif
21 #ifndef WANT_ERRNO
22 /* If defined to 1, set errno in math functions according to ISO C. Many math
23 libraries do not set errno, so this is 0 by default. It may need to be
24 set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0. */
25 # define WANT_ERRNO 0
26 #endif
27 #ifndef WANT_ERRNO_UFLOW
28 /* Set errno to ERANGE if result underflows to 0 (in all rounding modes). */
29 # define WANT_ERRNO_UFLOW (WANT_ROUNDING && WANT_ERRNO)
30 #endif
31
32 /* Compiler can inline round as a single instruction. */
33 #ifndef HAVE_FAST_ROUND
34 # if __aarch64__
35 # define HAVE_FAST_ROUND 1
36 # else
37 # define HAVE_FAST_ROUND 0
38 # endif
39 #endif
40
41 /* Compiler can inline lround, but not (long)round(x). */
42 #ifndef HAVE_FAST_LROUND
43 # if __aarch64__ && (100*__GNUC__ + __GNUC_MINOR__) >= 408 && __NO_MATH_ERRNO__
44 # define HAVE_FAST_LROUND 1
45 # else
46 # define HAVE_FAST_LROUND 0
47 # endif
48 #endif
49
50 /* Compiler can inline fma as a single instruction. */
51 #ifndef HAVE_FAST_FMA
52 # if defined FP_FAST_FMA || __aarch64__
53 # define HAVE_FAST_FMA 1
54 # else
55 # define HAVE_FAST_FMA 0
56 # endif
57 #endif
58
59 /* Provide *_finite symbols and some of the glibc hidden symbols
60 so libmathlib can be used with binaries compiled against glibc
61 to interpose math functions with both static and dynamic linking. */
62 #ifndef USE_GLIBC_ABI
63 # if __GNUC__
64 # define USE_GLIBC_ABI 1
65 # else
66 # define USE_GLIBC_ABI 0
67 # endif
68 #endif
69
70 /* Optionally used extensions. */
71 #ifdef __GNUC__
72 # define HIDDEN __attribute__ ((__visibility__ ("hidden")))
73 # define NOINLINE __attribute__ ((noinline))
74 # define UNUSED __attribute__ ((unused))
75 # define likely(x) __builtin_expect (!!(x), 1)
76 # define unlikely(x) __builtin_expect (x, 0)
77 # if __GNUC__ >= 9
78 # define attribute_copy(f) __attribute__ ((copy (f)))
79 # else
80 # define attribute_copy(f)
81 # endif
82 # define strong_alias(f, a) \
83 extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
84 # define hidden_alias(f, a) \
85 extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
86 attribute_copy (f);
87 #else
88 # define HIDDEN
89 # define NOINLINE
90 # define UNUSED
91 # define likely(x) (x)
92 # define unlikely(x) (x)
93 #endif
94
95 /* Return ptr but hide its value from the compiler so accesses through it
96 cannot be optimized based on the contents. */
97 #define ptr_barrier(ptr) \
98 ({ \
99 __typeof (ptr) __ptr = (ptr); \
100 __asm("" : "+r"(__ptr)); \
101 __ptr; \
102 })
103
104 /* Symbol renames to avoid libc conflicts. */
105 #define __math_oflowf arm_math_oflowf
106 #define __math_uflowf arm_math_uflowf
107 #define __math_may_uflowf arm_math_may_uflowf
108 #define __math_divzerof arm_math_divzerof
109 #define __math_oflow arm_math_oflow
110 #define __math_uflow arm_math_uflow
111 #define __math_may_uflow arm_math_may_uflow
112 #define __math_divzero arm_math_divzero
113 #define __math_invalidf arm_math_invalidf
114 #define __math_invalid arm_math_invalid
115 #define __math_check_oflow arm_math_check_oflow
116 #define __math_check_uflow arm_math_check_uflow
117 #define __math_check_oflowf arm_math_check_oflowf
118 #define __math_check_uflowf arm_math_check_uflowf
119
120 #define __sincosf_table arm_math_sincosf_table
121 #define __inv_pio4 arm_math_inv_pio4
122 #define __exp2f_data arm_math_exp2f_data
123 #define __logf_data arm_math_logf_data
124 #define __log2f_data arm_math_log2f_data
125 #define __powf_log2_data arm_math_powf_log2_data
126 #define __exp_data arm_math_exp_data
127 #define __log_data arm_math_log_data
128 #define __log2_data arm_math_log2_data
129 #define __pow_log_data arm_math_pow_log_data
130 #define __erff_data arm_math_erff_data
131 #define __erf_data arm_math_erf_data
132 #define __v_exp_data arm_math_v_exp_data
133 #define __v_log_data arm_math_v_log_data
134
135 #if HAVE_FAST_ROUND
136 /* When set, the roundtoint and converttoint functions are provided with
137 the semantics documented below. */
138 # define TOINT_INTRINSICS 1
139
140 /* Round x to nearest int in all rounding modes, ties have to be rounded
141 consistently with converttoint so the results match. If the result
142 would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */
143 static inline double_t
roundtoint(double_t x)144 roundtoint (double_t x)
145 {
146 return round (x);
147 }
148
149 /* Convert x to nearest int in all rounding modes, ties have to be rounded
150 consistently with roundtoint. If the result is not representible in an
151 int32_t then the semantics is unspecified. */
152 static inline int32_t
converttoint(double_t x)153 converttoint (double_t x)
154 {
155 # if HAVE_FAST_LROUND
156 return lround (x);
157 # else
158 return (long) round (x);
159 # endif
160 }
161 #endif
162
163 static inline uint32_t
asuint(float f)164 asuint (float f)
165 {
166 union
167 {
168 float f;
169 uint32_t i;
170 } u = {f};
171 return u.i;
172 }
173
174 static inline float
asfloat(uint32_t i)175 asfloat (uint32_t i)
176 {
177 union
178 {
179 uint32_t i;
180 float f;
181 } u = {i};
182 return u.f;
183 }
184
185 static inline uint64_t
asuint64(double f)186 asuint64 (double f)
187 {
188 union
189 {
190 double f;
191 uint64_t i;
192 } u = {f};
193 return u.i;
194 }
195
196 static inline double
asdouble(uint64_t i)197 asdouble (uint64_t i)
198 {
199 union
200 {
201 uint64_t i;
202 double f;
203 } u = {i};
204 return u.f;
205 }
206
207 #ifndef IEEE_754_2008_SNAN
208 # define IEEE_754_2008_SNAN 1
209 #endif
210 static inline int
issignalingf_inline(float x)211 issignalingf_inline (float x)
212 {
213 uint32_t ix = asuint (x);
214 if (!IEEE_754_2008_SNAN)
215 return (ix & 0x7fc00000) == 0x7fc00000;
216 return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
217 }
218
219 static inline int
issignaling_inline(double x)220 issignaling_inline (double x)
221 {
222 uint64_t ix = asuint64 (x);
223 if (!IEEE_754_2008_SNAN)
224 return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
225 return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
226 }
227
228 #if __aarch64__ && __GNUC__
229 /* Prevent the optimization of a floating-point expression. */
230 static inline float
opt_barrier_float(float x)231 opt_barrier_float (float x)
232 {
233 __asm__ __volatile__ ("" : "+w" (x));
234 return x;
235 }
236 static inline double
opt_barrier_double(double x)237 opt_barrier_double (double x)
238 {
239 __asm__ __volatile__ ("" : "+w" (x));
240 return x;
241 }
242 /* Force the evaluation of a floating-point expression for its side-effect. */
243 static inline void
force_eval_float(float x)244 force_eval_float (float x)
245 {
246 __asm__ __volatile__ ("" : "+w" (x));
247 }
248 static inline void
force_eval_double(double x)249 force_eval_double (double x)
250 {
251 __asm__ __volatile__ ("" : "+w" (x));
252 }
253 #else
254 static inline float
opt_barrier_float(float x)255 opt_barrier_float (float x)
256 {
257 volatile float y = x;
258 return y;
259 }
260 static inline double
opt_barrier_double(double x)261 opt_barrier_double (double x)
262 {
263 volatile double y = x;
264 return y;
265 }
266 static inline void
force_eval_float(float x)267 force_eval_float (float x)
268 {
269 volatile float y UNUSED = x;
270 }
271 static inline void
force_eval_double(double x)272 force_eval_double (double x)
273 {
274 volatile double y UNUSED = x;
275 }
276 #endif
277
278 /* Evaluate an expression as the specified type, normally a type
279 cast should be enough, but compilers implement non-standard
280 excess-precision handling, so when FLT_EVAL_METHOD != 0 then
281 these functions may need to be customized. */
282 static inline float
eval_as_float(float x)283 eval_as_float (float x)
284 {
285 return x;
286 }
287 static inline double
eval_as_double(double x)288 eval_as_double (double x)
289 {
290 return x;
291 }
292
293 /* Error handling tail calls for special cases, with a sign argument.
294 The sign of the return value is set if the argument is non-zero. */
295
296 /* The result overflows. */
297 HIDDEN float __math_oflowf (uint32_t);
298 /* The result underflows to 0 in nearest rounding mode. */
299 HIDDEN float __math_uflowf (uint32_t);
300 /* The result underflows to 0 in some directed rounding mode only. */
301 HIDDEN float __math_may_uflowf (uint32_t);
302 /* Division by zero. */
303 HIDDEN float __math_divzerof (uint32_t);
304 /* The result overflows. */
305 HIDDEN double __math_oflow (uint32_t);
306 /* The result underflows to 0 in nearest rounding mode. */
307 HIDDEN double __math_uflow (uint32_t);
308 /* The result underflows to 0 in some directed rounding mode only. */
309 HIDDEN double __math_may_uflow (uint32_t);
310 /* Division by zero. */
311 HIDDEN double __math_divzero (uint32_t);
312
313 /* Error handling using input checking. */
314
315 /* Invalid input unless it is a quiet NaN. */
316 HIDDEN float __math_invalidf (float);
317 /* Invalid input unless it is a quiet NaN. */
318 HIDDEN double __math_invalid (double);
319
320 /* Error handling using output checking, only for errno setting. */
321
322 /* Check if the result overflowed to infinity. */
323 HIDDEN double __math_check_oflow (double);
324 /* Check if the result underflowed to 0. */
325 HIDDEN double __math_check_uflow (double);
326
327 /* Check if the result overflowed to infinity. */
328 static inline double
check_oflow(double x)329 check_oflow (double x)
330 {
331 return WANT_ERRNO ? __math_check_oflow (x) : x;
332 }
333
334 /* Check if the result underflowed to 0. */
335 static inline double
check_uflow(double x)336 check_uflow (double x)
337 {
338 return WANT_ERRNO ? __math_check_uflow (x) : x;
339 }
340
341 /* Check if the result overflowed to infinity. */
342 HIDDEN float __math_check_oflowf (float);
343 /* Check if the result underflowed to 0. */
344 HIDDEN float __math_check_uflowf (float);
345
346 /* Check if the result overflowed to infinity. */
347 static inline float
check_oflowf(float x)348 check_oflowf (float x)
349 {
350 return WANT_ERRNO ? __math_check_oflowf (x) : x;
351 }
352
353 /* Check if the result underflowed to 0. */
354 static inline float
check_uflowf(float x)355 check_uflowf (float x)
356 {
357 return WANT_ERRNO ? __math_check_uflowf (x) : x;
358 }
359
360 /* Shared between expf, exp2f and powf. */
361 #define EXP2F_TABLE_BITS 5
362 #define EXP2F_POLY_ORDER 3
363 extern const struct exp2f_data
364 {
365 uint64_t tab[1 << EXP2F_TABLE_BITS];
366 double shift_scaled;
367 double poly[EXP2F_POLY_ORDER];
368 double shift;
369 double invln2_scaled;
370 double poly_scaled[EXP2F_POLY_ORDER];
371 } __exp2f_data HIDDEN;
372
373 #define LOGF_TABLE_BITS 4
374 #define LOGF_POLY_ORDER 4
375 extern const struct logf_data
376 {
377 struct
378 {
379 double invc, logc;
380 } tab[1 << LOGF_TABLE_BITS];
381 double ln2;
382 double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */
383 } __logf_data HIDDEN;
384
385 #define LOG2F_TABLE_BITS 4
386 #define LOG2F_POLY_ORDER 4
387 extern const struct log2f_data
388 {
389 struct
390 {
391 double invc, logc;
392 } tab[1 << LOG2F_TABLE_BITS];
393 double poly[LOG2F_POLY_ORDER];
394 } __log2f_data HIDDEN;
395
396 #define POWF_LOG2_TABLE_BITS 4
397 #define POWF_LOG2_POLY_ORDER 5
398 #if TOINT_INTRINSICS
399 # define POWF_SCALE_BITS EXP2F_TABLE_BITS
400 #else
401 # define POWF_SCALE_BITS 0
402 #endif
403 #define POWF_SCALE ((double) (1 << POWF_SCALE_BITS))
404 extern const struct powf_log2_data
405 {
406 struct
407 {
408 double invc, logc;
409 } tab[1 << POWF_LOG2_TABLE_BITS];
410 double poly[POWF_LOG2_POLY_ORDER];
411 } __powf_log2_data HIDDEN;
412
413
414 #define EXP_TABLE_BITS 7
415 #define EXP_POLY_ORDER 5
416 /* Use polynomial that is optimized for a wider input range. This may be
417 needed for good precision in non-nearest rounding and !TOINT_INTRINSICS. */
418 #define EXP_POLY_WIDE 0
419 /* Use close to nearest rounding toint when !TOINT_INTRINSICS. This may be
420 needed for good precision in non-nearest rouning and !EXP_POLY_WIDE. */
421 #define EXP_USE_TOINT_NARROW 0
422 #define EXP2_POLY_ORDER 5
423 #define EXP2_POLY_WIDE 0
424 /* Wider exp10 polynomial necessary for good precision in non-nearest rounding
425 and !TOINT_INTRINSICS. */
426 #define EXP10_POLY_WIDE 0
427 extern const struct exp_data
428 {
429 double invln2N;
430 double invlog10_2N;
431 double shift;
432 double negln2hiN;
433 double negln2loN;
434 double neglog10_2hiN;
435 double neglog10_2loN;
436 double poly[4]; /* Last four coefficients. */
437 double exp2_shift;
438 double exp2_poly[EXP2_POLY_ORDER];
439 double exp10_poly[5];
440 uint64_t tab[2*(1 << EXP_TABLE_BITS)];
441 } __exp_data HIDDEN;
442
443 #define LOG_TABLE_BITS 7
444 #define LOG_POLY_ORDER 6
445 #define LOG_POLY1_ORDER 12
446 extern const struct log_data
447 {
448 double ln2hi;
449 double ln2lo;
450 double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
451 double poly1[LOG_POLY1_ORDER - 1];
452 struct {double invc, logc;} tab[1 << LOG_TABLE_BITS];
453 #if !HAVE_FAST_FMA
454 struct {double chi, clo;} tab2[1 << LOG_TABLE_BITS];
455 #endif
456 } __log_data HIDDEN;
457
458 #define LOG2_TABLE_BITS 6
459 #define LOG2_POLY_ORDER 7
460 #define LOG2_POLY1_ORDER 11
461 extern const struct log2_data
462 {
463 double invln2hi;
464 double invln2lo;
465 double poly[LOG2_POLY_ORDER - 1];
466 double poly1[LOG2_POLY1_ORDER - 1];
467 struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS];
468 #if !HAVE_FAST_FMA
469 struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS];
470 #endif
471 } __log2_data HIDDEN;
472
473 #define POW_LOG_TABLE_BITS 7
474 #define POW_LOG_POLY_ORDER 8
475 extern const struct pow_log_data
476 {
477 double ln2hi;
478 double ln2lo;
479 double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
480 /* Note: the pad field is unused, but allows slightly faster indexing. */
481 struct {double invc, pad, logc, logctail;} tab[1 << POW_LOG_TABLE_BITS];
482 } __pow_log_data HIDDEN;
483
484 extern const struct erff_data
485 {
486 float erff_poly_A[6];
487 float erff_poly_B[7];
488 } __erff_data HIDDEN;
489
490 #define ERF_POLY_A_ORDER 19
491 #define ERF_POLY_A_NCOEFFS 10
492 #define ERFC_POLY_C_NCOEFFS 16
493 #define ERFC_POLY_D_NCOEFFS 18
494 #define ERFC_POLY_E_NCOEFFS 14
495 #define ERFC_POLY_F_NCOEFFS 17
496 extern const struct erf_data
497 {
498 double erf_poly_A[ERF_POLY_A_NCOEFFS];
499 double erf_ratio_N_A[5];
500 double erf_ratio_D_A[5];
501 double erf_ratio_N_B[7];
502 double erf_ratio_D_B[6];
503 double erfc_poly_C[ERFC_POLY_C_NCOEFFS];
504 double erfc_poly_D[ERFC_POLY_D_NCOEFFS];
505 double erfc_poly_E[ERFC_POLY_E_NCOEFFS];
506 double erfc_poly_F[ERFC_POLY_F_NCOEFFS];
507 } __erf_data HIDDEN;
508
509 #define V_EXP_TABLE_BITS 7
510 extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN;
511
512 #define V_LOG_TABLE_BITS 7
513 extern const struct v_log_data
514 {
515 struct
516 {
517 double invc, logc;
518 } table[1 << V_LOG_TABLE_BITS];
519 } __v_log_data HIDDEN;
520
521 #endif
522