xref: /freebsd/contrib/arm-optimized-routines/pl/math/v_log1pf_inline.h (revision f81cdf24ba5436367377f7c8e8f51f6df2a75ca7)
1 /*
2  * Helper for single-precision routines which calculate log(1 + x) and do not
3  * need special-case handling
4  *
5  * Copyright (c) 2022-2023, Arm Limited.
6  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
7  */
8 
9 #ifndef PL_MATH_V_LOG1PF_INLINE_H
10 #define PL_MATH_V_LOG1PF_INLINE_H
11 
12 #include "v_math.h"
13 #include "math_config.h"
14 
15 #define Four 0x40800000
16 #define Ln2 v_f32 (0x1.62e43p-1f)
17 
18 #define C(i) v_f32 (__log1pf_data.coeffs[i])
19 
20 static inline v_f32_t
21 eval_poly (v_f32_t m)
22 {
23   /* Approximate log(1+m) on [-0.25, 0.5] using Estrin scheme.  */
24   v_f32_t p_12 = v_fma_f32 (m, C (1), C (0));
25   v_f32_t p_34 = v_fma_f32 (m, C (3), C (2));
26   v_f32_t p_56 = v_fma_f32 (m, C (5), C (4));
27   v_f32_t p_78 = v_fma_f32 (m, C (7), C (6));
28 
29   v_f32_t m2 = m * m;
30   v_f32_t p_02 = v_fma_f32 (m2, p_12, m);
31   v_f32_t p_36 = v_fma_f32 (m2, p_56, p_34);
32   v_f32_t p_79 = v_fma_f32 (m2, C (8), p_78);
33 
34   v_f32_t m4 = m2 * m2;
35   v_f32_t p_06 = v_fma_f32 (m4, p_36, p_02);
36 
37   return v_fma_f32 (m4, m4 * p_79, p_06);
38 }
39 
40 static inline v_f32_t
41 log1pf_inline (v_f32_t x)
42 {
43   /* Helper for calculating log(x + 1). Copied from log1pf_2u1.c, with no
44      special-case handling. See that file for details of the algorithm.  */
45   v_f32_t m = x + 1.0f;
46   v_u32_t k = (v_as_u32_f32 (m) - 0x3f400000) & 0xff800000;
47   v_f32_t s = v_as_f32_u32 (v_u32 (Four) - k);
48   v_f32_t m_scale = v_as_f32_u32 (v_as_u32_f32 (x) - k)
49 		    + v_fma_f32 (v_f32 (0.25f), s, v_f32 (-1.0f));
50   v_f32_t p = eval_poly (m_scale);
51   v_f32_t scale_back = v_to_f32_u32 (k) * 0x1.0p-23f;
52   return v_fma_f32 (scale_back, Ln2, p);
53 }
54 
55 #endif //  PL_MATH_V_LOG1PF_INLINE_H
56