1072a4ba8SAndrew Turner /*
2072a4ba8SAndrew Turner * Double-precision cbrt(x) function.
3072a4ba8SAndrew Turner *
4072a4ba8SAndrew Turner * Copyright (c) 2022-2023, Arm Limited.
5072a4ba8SAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6072a4ba8SAndrew Turner */
7072a4ba8SAndrew Turner
8072a4ba8SAndrew Turner #include "math_config.h"
9072a4ba8SAndrew Turner #include "pl_sig.h"
10072a4ba8SAndrew Turner #include "pl_test.h"
11072a4ba8SAndrew Turner
12072a4ba8SAndrew Turner PL_SIG (S, D, 1, cbrt, -10.0, 10.0)
13072a4ba8SAndrew Turner
14072a4ba8SAndrew Turner #define AbsMask 0x7fffffffffffffff
15072a4ba8SAndrew Turner #define TwoThirds 0x1.5555555555555p-1
16072a4ba8SAndrew Turner
17072a4ba8SAndrew Turner #define C(i) __cbrt_data.poly[i]
18072a4ba8SAndrew Turner #define T(i) __cbrt_data.table[i]
19072a4ba8SAndrew Turner
20072a4ba8SAndrew Turner /* Approximation for double-precision cbrt(x), using low-order polynomial and
21072a4ba8SAndrew Turner two Newton iterations. Greatest observed error is 1.79 ULP. Errors repeat
22072a4ba8SAndrew Turner according to the exponent, for instance an error observed for double value
23072a4ba8SAndrew Turner m * 2^e will be observed for any input m * 2^(e + 3*i), where i is an
24072a4ba8SAndrew Turner integer.
25072a4ba8SAndrew Turner cbrt(0x1.fffff403f0bc6p+1) got 0x1.965fe72821e9bp+0
26072a4ba8SAndrew Turner want 0x1.965fe72821e99p+0. */
27072a4ba8SAndrew Turner double
cbrt(double x)28072a4ba8SAndrew Turner cbrt (double x)
29072a4ba8SAndrew Turner {
30072a4ba8SAndrew Turner uint64_t ix = asuint64 (x);
31072a4ba8SAndrew Turner uint64_t iax = ix & AbsMask;
32072a4ba8SAndrew Turner uint64_t sign = ix & ~AbsMask;
33072a4ba8SAndrew Turner
34*5a02ffc3SAndrew Turner if (unlikely (iax == 0 || iax == 0x7ff0000000000000))
35072a4ba8SAndrew Turner return x;
36072a4ba8SAndrew Turner
37072a4ba8SAndrew Turner /* |x| = m * 2^e, where m is in [0.5, 1.0].
38072a4ba8SAndrew Turner We can easily decompose x into m and e using frexp. */
39072a4ba8SAndrew Turner int e;
40072a4ba8SAndrew Turner double m = frexp (asdouble (iax), &e);
41072a4ba8SAndrew Turner
42072a4ba8SAndrew Turner /* Calculate rough approximation for cbrt(m) in [0.5, 1.0], starting point for
43072a4ba8SAndrew Turner Newton iterations. */
44072a4ba8SAndrew Turner double p_01 = fma (C (1), m, C (0));
45072a4ba8SAndrew Turner double p_23 = fma (C (3), m, C (2));
46072a4ba8SAndrew Turner double p = fma (p_23, m * m, p_01);
47072a4ba8SAndrew Turner
48072a4ba8SAndrew Turner /* Two iterations of Newton's method for iteratively approximating cbrt. */
49072a4ba8SAndrew Turner double m_by_3 = m / 3;
50072a4ba8SAndrew Turner double a = fma (TwoThirds, p, m_by_3 / (p * p));
51072a4ba8SAndrew Turner a = fma (TwoThirds, a, m_by_3 / (a * a));
52072a4ba8SAndrew Turner
53072a4ba8SAndrew Turner /* Assemble the result by the following:
54072a4ba8SAndrew Turner
55072a4ba8SAndrew Turner cbrt(x) = cbrt(m) * 2 ^ (e / 3).
56072a4ba8SAndrew Turner
57072a4ba8SAndrew Turner Let t = (2 ^ (e / 3)) / (2 ^ round(e / 3)).
58072a4ba8SAndrew Turner
59072a4ba8SAndrew Turner Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3.
60072a4ba8SAndrew Turner i is an integer in [-2, 2], so t can be looked up in the table T.
61072a4ba8SAndrew Turner Hence the result is assembled as:
62072a4ba8SAndrew Turner
63072a4ba8SAndrew Turner cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign.
64072a4ba8SAndrew Turner Which can be done easily using ldexp. */
65072a4ba8SAndrew Turner return asdouble (asuint64 (ldexp (a * T (2 + e % 3), e / 3)) | sign);
66072a4ba8SAndrew Turner }
67072a4ba8SAndrew Turner
68072a4ba8SAndrew Turner PL_TEST_ULP (cbrt, 1.30)
69*5a02ffc3SAndrew Turner PL_TEST_SYM_INTERVAL (cbrt, 0, inf, 1000000)
70