1 /* 2 * Configuration for math routines. 3 * 4 * Copyright (c) 2017-2023, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8 #ifndef _MATH_CONFIG_H 9 #define _MATH_CONFIG_H 10 11 #include <math.h> 12 #include <stdint.h> 13 14 #ifndef WANT_ROUNDING 15 /* If defined to 1, return correct results for special cases in non-nearest 16 rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than 17 -0.0f). This may be set to 0 if there is no fenv support or if math 18 functions only get called in round to nearest mode. */ 19 # define WANT_ROUNDING 1 20 #endif 21 #ifndef WANT_ERRNO 22 /* If defined to 1, set errno in math functions according to ISO C. Many math 23 libraries do not set errno, so this is 0 by default. It may need to be 24 set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0. */ 25 # define WANT_ERRNO 0 26 #endif 27 #ifndef WANT_SIMD_EXCEPT 28 /* If defined to 1, trigger fp exceptions in vector routines, consistently with 29 behaviour expected from the corresponding scalar routine. */ 30 # define WANT_SIMD_EXCEPT 0 31 #endif 32 33 /* Compiler can inline round as a single instruction. */ 34 #ifndef HAVE_FAST_ROUND 35 # if __aarch64__ 36 # define HAVE_FAST_ROUND 1 37 # else 38 # define HAVE_FAST_ROUND 0 39 # endif 40 #endif 41 42 /* Compiler can inline lround, but not (long)round(x). */ 43 #ifndef HAVE_FAST_LROUND 44 # if __aarch64__ && (100 * __GNUC__ + __GNUC_MINOR__) >= 408 \ 45 && __NO_MATH_ERRNO__ 46 # define HAVE_FAST_LROUND 1 47 # else 48 # define HAVE_FAST_LROUND 0 49 # endif 50 #endif 51 52 /* Compiler can inline fma as a single instruction. */ 53 #ifndef HAVE_FAST_FMA 54 # if defined FP_FAST_FMA || __aarch64__ 55 # define HAVE_FAST_FMA 1 56 # else 57 # define HAVE_FAST_FMA 0 58 # endif 59 #endif 60 61 /* Provide *_finite symbols and some of the glibc hidden symbols 62 so libmathlib can be used with binaries compiled against glibc 63 to interpose math functions with both static and dynamic linking. */ 64 #ifndef USE_GLIBC_ABI 65 # if __GNUC__ 66 # define USE_GLIBC_ABI 1 67 # else 68 # define USE_GLIBC_ABI 0 69 # endif 70 #endif 71 72 /* Optionally used extensions. */ 73 #ifdef __GNUC__ 74 # define HIDDEN __attribute__ ((__visibility__ ("hidden"))) 75 # define NOINLINE __attribute__ ((noinline)) 76 # define UNUSED __attribute__ ((unused)) 77 # define likely(x) __builtin_expect (!!(x), 1) 78 # define unlikely(x) __builtin_expect (x, 0) 79 # if __GNUC__ >= 9 80 # define attribute_copy(f) __attribute__ ((copy (f))) 81 # else 82 # define attribute_copy(f) 83 # endif 84 # define strong_alias(f, a) \ 85 extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f); 86 # define hidden_alias(f, a) \ 87 extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \ 88 attribute_copy (f); 89 #else 90 # define HIDDEN 91 # define NOINLINE 92 # define UNUSED 93 # define likely(x) (x) 94 # define unlikely(x) (x) 95 #endif 96 97 /* Return ptr but hide its value from the compiler so accesses through it 98 cannot be optimized based on the contents. */ 99 #define ptr_barrier(ptr) \ 100 ({ \ 101 __typeof (ptr) __ptr = (ptr); \ 102 __asm("" : "+r"(__ptr)); \ 103 __ptr; \ 104 }) 105 106 /* Symbol renames to avoid libc conflicts. */ 107 #define __math_oflowf arm_math_oflowf 108 #define __math_uflowf arm_math_uflowf 109 #define __math_may_uflowf arm_math_may_uflowf 110 #define __math_divzerof arm_math_divzerof 111 #define __math_oflow arm_math_oflow 112 #define __math_uflow arm_math_uflow 113 #define __math_may_uflow arm_math_may_uflow 114 #define __math_divzero arm_math_divzero 115 #define __math_invalidf arm_math_invalidf 116 #define __math_invalid arm_math_invalid 117 #define __math_check_oflow arm_math_check_oflow 118 #define __math_check_uflow arm_math_check_uflow 119 #define __math_check_oflowf arm_math_check_oflowf 120 #define __math_check_uflowf arm_math_check_uflowf 121 122 #if HAVE_FAST_ROUND 123 /* When set, the roundtoint and converttoint functions are provided with 124 the semantics documented below. */ 125 # define TOINT_INTRINSICS 1 126 127 /* Round x to nearest int in all rounding modes, ties have to be rounded 128 consistently with converttoint so the results match. If the result 129 would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */ 130 static inline double_t 131 roundtoint (double_t x) 132 { 133 return round (x); 134 } 135 136 /* Convert x to nearest int in all rounding modes, ties have to be rounded 137 consistently with roundtoint. If the result is not representible in an 138 int32_t then the semantics is unspecified. */ 139 static inline int32_t 140 converttoint (double_t x) 141 { 142 # if HAVE_FAST_LROUND 143 return lround (x); 144 # else 145 return (long) round (x); 146 # endif 147 } 148 #endif 149 150 static inline uint32_t 151 asuint (float f) 152 { 153 union 154 { 155 float f; 156 uint32_t i; 157 } u = { f }; 158 return u.i; 159 } 160 161 static inline float 162 asfloat (uint32_t i) 163 { 164 union 165 { 166 uint32_t i; 167 float f; 168 } u = { i }; 169 return u.f; 170 } 171 172 static inline uint64_t 173 asuint64 (double f) 174 { 175 union 176 { 177 double f; 178 uint64_t i; 179 } u = { f }; 180 return u.i; 181 } 182 183 static inline double 184 asdouble (uint64_t i) 185 { 186 union 187 { 188 uint64_t i; 189 double f; 190 } u = { i }; 191 return u.f; 192 } 193 194 #ifndef IEEE_754_2008_SNAN 195 # define IEEE_754_2008_SNAN 1 196 #endif 197 static inline int 198 issignalingf_inline (float x) 199 { 200 uint32_t ix = asuint (x); 201 if (!IEEE_754_2008_SNAN) 202 return (ix & 0x7fc00000) == 0x7fc00000; 203 return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000; 204 } 205 206 static inline int 207 issignaling_inline (double x) 208 { 209 uint64_t ix = asuint64 (x); 210 if (!IEEE_754_2008_SNAN) 211 return (ix & 0x7ff8000000000000) == 0x7ff8000000000000; 212 return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL; 213 } 214 215 #if __aarch64__ && __GNUC__ 216 /* Prevent the optimization of a floating-point expression. */ 217 static inline float 218 opt_barrier_float (float x) 219 { 220 __asm__ __volatile__ ("" : "+w" (x)); 221 return x; 222 } 223 static inline double 224 opt_barrier_double (double x) 225 { 226 __asm__ __volatile__ ("" : "+w" (x)); 227 return x; 228 } 229 /* Force the evaluation of a floating-point expression for its side-effect. */ 230 static inline void 231 force_eval_float (float x) 232 { 233 __asm__ __volatile__ ("" : "+w" (x)); 234 } 235 static inline void 236 force_eval_double (double x) 237 { 238 __asm__ __volatile__ ("" : "+w" (x)); 239 } 240 #else 241 static inline float 242 opt_barrier_float (float x) 243 { 244 volatile float y = x; 245 return y; 246 } 247 static inline double 248 opt_barrier_double (double x) 249 { 250 volatile double y = x; 251 return y; 252 } 253 static inline void 254 force_eval_float (float x) 255 { 256 volatile float y UNUSED = x; 257 } 258 static inline void 259 force_eval_double (double x) 260 { 261 volatile double y UNUSED = x; 262 } 263 #endif 264 265 /* Evaluate an expression as the specified type, normally a type 266 cast should be enough, but compilers implement non-standard 267 excess-precision handling, so when FLT_EVAL_METHOD != 0 then 268 these functions may need to be customized. */ 269 static inline float 270 eval_as_float (float x) 271 { 272 return x; 273 } 274 static inline double 275 eval_as_double (double x) 276 { 277 return x; 278 } 279 280 /* Error handling tail calls for special cases, with a sign argument. 281 The sign of the return value is set if the argument is non-zero. */ 282 283 /* The result overflows. */ 284 HIDDEN float __math_oflowf (uint32_t); 285 /* The result underflows to 0 in nearest rounding mode. */ 286 HIDDEN float __math_uflowf (uint32_t); 287 /* The result underflows to 0 in some directed rounding mode only. */ 288 HIDDEN float __math_may_uflowf (uint32_t); 289 /* Division by zero. */ 290 HIDDEN float __math_divzerof (uint32_t); 291 /* The result overflows. */ 292 HIDDEN double __math_oflow (uint32_t); 293 /* The result underflows to 0 in nearest rounding mode. */ 294 HIDDEN double __math_uflow (uint32_t); 295 /* The result underflows to 0 in some directed rounding mode only. */ 296 HIDDEN double __math_may_uflow (uint32_t); 297 /* Division by zero. */ 298 HIDDEN double __math_divzero (uint32_t); 299 300 /* Error handling using input checking. */ 301 302 /* Invalid input unless it is a quiet NaN. */ 303 HIDDEN float __math_invalidf (float); 304 /* Invalid input unless it is a quiet NaN. */ 305 HIDDEN double __math_invalid (double); 306 307 /* Error handling using output checking, only for errno setting. */ 308 309 /* Check if the result overflowed to infinity. */ 310 HIDDEN double __math_check_oflow (double); 311 /* Check if the result underflowed to 0. */ 312 HIDDEN double __math_check_uflow (double); 313 314 /* Check if the result overflowed to infinity. */ 315 static inline double 316 check_oflow (double x) 317 { 318 return WANT_ERRNO ? __math_check_oflow (x) : x; 319 } 320 321 /* Check if the result underflowed to 0. */ 322 static inline double 323 check_uflow (double x) 324 { 325 return WANT_ERRNO ? __math_check_uflow (x) : x; 326 } 327 328 /* Check if the result overflowed to infinity. */ 329 HIDDEN float __math_check_oflowf (float); 330 /* Check if the result underflowed to 0. */ 331 HIDDEN float __math_check_uflowf (float); 332 333 /* Check if the result overflowed to infinity. */ 334 static inline float 335 check_oflowf (float x) 336 { 337 return WANT_ERRNO ? __math_check_oflowf (x) : x; 338 } 339 340 /* Check if the result underflowed to 0. */ 341 static inline float 342 check_uflowf (float x) 343 { 344 return WANT_ERRNO ? __math_check_uflowf (x) : x; 345 } 346 347 extern const struct erff_data 348 { 349 struct 350 { 351 float erf, scale; 352 } tab[513]; 353 } __erff_data HIDDEN; 354 355 extern const struct sv_erff_data 356 { 357 float erf[513]; 358 float scale[513]; 359 } __sv_erff_data HIDDEN; 360 361 extern const struct erfcf_data 362 { 363 struct 364 { 365 float erfc, scale; 366 } tab[645]; 367 } __erfcf_data HIDDEN; 368 369 /* Data for logf and log10f. */ 370 #define LOGF_TABLE_BITS 4 371 #define LOGF_POLY_ORDER 4 372 extern const struct logf_data 373 { 374 struct 375 { 376 double invc, logc; 377 } tab[1 << LOGF_TABLE_BITS]; 378 double ln2; 379 double invln10; 380 double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */ 381 } __logf_data HIDDEN; 382 383 /* Data for low accuracy log10 (with 1/ln(10) included in coefficients). */ 384 #define LOG10_TABLE_BITS 7 385 #define LOG10_POLY_ORDER 6 386 #define LOG10_POLY1_ORDER 12 387 extern const struct log10_data 388 { 389 double ln2hi; 390 double ln2lo; 391 double invln10; 392 double poly[LOG10_POLY_ORDER - 1]; /* First coefficient is 1/log(10). */ 393 double poly1[LOG10_POLY1_ORDER - 1]; 394 struct 395 { 396 double invc, logc; 397 } tab[1 << LOG10_TABLE_BITS]; 398 #if !HAVE_FAST_FMA 399 struct 400 { 401 double chi, clo; 402 } tab2[1 << LOG10_TABLE_BITS]; 403 #endif 404 } __log10_data HIDDEN; 405 406 #define EXP_TABLE_BITS 7 407 #define EXP_POLY_ORDER 5 408 /* Use polynomial that is optimized for a wider input range. This may be 409 needed for good precision in non-nearest rounding and !TOINT_INTRINSICS. */ 410 #define EXP_POLY_WIDE 0 411 /* Use close to nearest rounding toint when !TOINT_INTRINSICS. This may be 412 needed for good precision in non-nearest rouning and !EXP_POLY_WIDE. */ 413 #define EXP_USE_TOINT_NARROW 0 414 #define EXP2_POLY_ORDER 5 415 #define EXP2_POLY_WIDE 0 416 extern const struct exp_data 417 { 418 double invln2N; 419 double shift; 420 double negln2hiN; 421 double negln2loN; 422 double poly[4]; /* Last four coefficients. */ 423 double exp2_shift; 424 double exp2_poly[EXP2_POLY_ORDER]; 425 uint64_t tab[2 * (1 << EXP_TABLE_BITS)]; 426 } __exp_data HIDDEN; 427 428 /* Copied from math/v_exp.h for use in vector exp_tail. */ 429 #define V_EXP_TAIL_TABLE_BITS 8 430 extern const uint64_t __v_exp_tail_data[1 << V_EXP_TAIL_TABLE_BITS] HIDDEN; 431 432 /* Copied from math/v_exp.h for use in vector exp2. */ 433 #define V_EXP_TABLE_BITS 7 434 extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN; 435 436 extern const struct erf_data 437 { 438 struct 439 { 440 double erf, scale; 441 } tab[769]; 442 } __erf_data HIDDEN; 443 444 extern const struct sv_erf_data 445 { 446 double erf[769]; 447 double scale[769]; 448 } __sv_erf_data HIDDEN; 449 450 extern const struct erfc_data 451 { 452 struct 453 { 454 double erfc, scale; 455 } tab[3488]; 456 } __erfc_data HIDDEN; 457 458 #define ATAN_POLY_NCOEFFS 20 459 extern const struct atan_poly_data 460 { 461 double poly[ATAN_POLY_NCOEFFS]; 462 } __atan_poly_data HIDDEN; 463 464 #define ATANF_POLY_NCOEFFS 8 465 extern const struct atanf_poly_data 466 { 467 float poly[ATANF_POLY_NCOEFFS]; 468 } __atanf_poly_data HIDDEN; 469 470 #define ASINHF_NCOEFFS 8 471 extern const struct asinhf_data 472 { 473 float coeffs[ASINHF_NCOEFFS]; 474 } __asinhf_data HIDDEN; 475 476 #define LOG_TABLE_BITS 7 477 #define LOG_POLY_ORDER 6 478 #define LOG_POLY1_ORDER 12 479 extern const struct log_data 480 { 481 double ln2hi; 482 double ln2lo; 483 double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */ 484 double poly1[LOG_POLY1_ORDER - 1]; 485 struct 486 { 487 double invc, logc; 488 } tab[1 << LOG_TABLE_BITS]; 489 #if !HAVE_FAST_FMA 490 struct 491 { 492 double chi, clo; 493 } tab2[1 << LOG_TABLE_BITS]; 494 #endif 495 } __log_data HIDDEN; 496 497 #define ASINH_NCOEFFS 18 498 extern const struct asinh_data 499 { 500 double poly[ASINH_NCOEFFS]; 501 } __asinh_data HIDDEN; 502 503 #define LOG1P_NCOEFFS 19 504 extern const struct log1p_data 505 { 506 double coeffs[LOG1P_NCOEFFS]; 507 } __log1p_data HIDDEN; 508 509 #define LOG1PF_2U5 510 #define LOG1PF_NCOEFFS 9 511 extern const struct log1pf_data 512 { 513 float coeffs[LOG1PF_NCOEFFS]; 514 } __log1pf_data HIDDEN; 515 516 #define TANF_P_POLY_NCOEFFS 6 517 /* cotan approach needs order 3 on [0, pi/4] to reach <3.5ulps. */ 518 #define TANF_Q_POLY_NCOEFFS 4 519 extern const struct tanf_poly_data 520 { 521 float poly_tan[TANF_P_POLY_NCOEFFS]; 522 float poly_cotan[TANF_Q_POLY_NCOEFFS]; 523 } __tanf_poly_data HIDDEN; 524 525 #define V_LOG2_TABLE_BITS 7 526 extern const struct v_log2_data 527 { 528 double poly[5]; 529 double invln2; 530 struct 531 { 532 double invc, log2c; 533 } table[1 << V_LOG2_TABLE_BITS]; 534 } __v_log2_data HIDDEN; 535 536 #define V_LOG10_TABLE_BITS 7 537 extern const struct v_log10_data 538 { 539 double poly[5]; 540 double invln10, log10_2; 541 struct 542 { 543 double invc, log10c; 544 } table[1 << V_LOG10_TABLE_BITS]; 545 } __v_log10_data HIDDEN; 546 547 /* Some data for SVE powf's internal exp and log. */ 548 #define V_POWF_EXP2_TABLE_BITS 5 549 #define V_POWF_EXP2_N (1 << V_POWF_EXP2_TABLE_BITS) 550 #define V_POWF_LOG2_TABLE_BITS 5 551 #define V_POWF_LOG2_N (1 << V_POWF_LOG2_TABLE_BITS) 552 extern const struct v_powf_data 553 { 554 double invc[V_POWF_LOG2_N]; 555 double logc[V_POWF_LOG2_N]; 556 uint64_t scale[V_POWF_EXP2_N]; 557 } __v_powf_data HIDDEN; 558 559 #define V_LOG_POLY_ORDER 6 560 #define V_LOG_TABLE_BITS 7 561 extern const struct v_log_data 562 { 563 /* Shared data for vector log and log-derived routines (e.g. asinh). */ 564 double poly[V_LOG_POLY_ORDER - 1]; 565 double ln2; 566 struct 567 { 568 double invc, logc; 569 } table[1 << V_LOG_TABLE_BITS]; 570 } __v_log_data HIDDEN; 571 572 #define EXPM1F_POLY_ORDER 5 573 extern const float __expm1f_poly[EXPM1F_POLY_ORDER] HIDDEN; 574 575 #define EXPF_TABLE_BITS 5 576 #define EXPF_POLY_ORDER 3 577 extern const struct expf_data 578 { 579 uint64_t tab[1 << EXPF_TABLE_BITS]; 580 double invln2_scaled; 581 double poly_scaled[EXPF_POLY_ORDER]; 582 } __expf_data HIDDEN; 583 584 #define EXPM1_POLY_ORDER 11 585 extern const double __expm1_poly[EXPM1_POLY_ORDER] HIDDEN; 586 587 extern const struct cbrtf_data 588 { 589 float poly[4]; 590 float table[5]; 591 } __cbrtf_data HIDDEN; 592 593 extern const struct cbrt_data 594 { 595 double poly[4]; 596 double table[5]; 597 } __cbrt_data HIDDEN; 598 599 #define ASINF_POLY_ORDER 4 600 extern const float __asinf_poly[ASINF_POLY_ORDER + 1] HIDDEN; 601 602 #define ASIN_POLY_ORDER 11 603 extern const double __asin_poly[ASIN_POLY_ORDER + 1] HIDDEN; 604 605 /* Some data for AdvSIMD and SVE pow's internal exp and log. */ 606 #define V_POW_EXP_TABLE_BITS 8 607 extern const struct v_pow_exp_data 608 { 609 double poly[3]; 610 double n_over_ln2, ln2_over_n_hi, ln2_over_n_lo, shift; 611 uint64_t sbits[1 << V_POW_EXP_TABLE_BITS]; 612 } __v_pow_exp_data HIDDEN; 613 614 #define V_POW_LOG_TABLE_BITS 7 615 extern const struct v_pow_log_data 616 { 617 double poly[7]; /* First coefficient is 1. */ 618 double ln2_hi, ln2_lo; 619 double invc[1 << V_POW_LOG_TABLE_BITS]; 620 double logc[1 << V_POW_LOG_TABLE_BITS]; 621 double logctail[1 << V_POW_LOG_TABLE_BITS]; 622 } __v_pow_log_data HIDDEN; 623 624 #endif 625