msun/src/s_rsqrtl.c

*3085fc9dSSteve Kargl/*-
*3085fc9dSSteve Kargl * Copyright (c) 2026 Steven G. Kargl
*3085fc9dSSteve Kargl * All rights reserved.
*3085fc9dSSteve Kargl *
*3085fc9dSSteve Kargl * Redistribution and use in source and binary forms, with or without
*3085fc9dSSteve Kargl * modification, are permitted provided that the following conditions
*3085fc9dSSteve Kargl * are met:
*3085fc9dSSteve Kargl * 1. Redistributions of source code must retain the above copyright
*3085fc9dSSteve Kargl *    notice unmodified, this list of conditions, and the following
*3085fc9dSSteve Kargl *    disclaimer.
*3085fc9dSSteve Kargl * 2. Redistributions in binary form must reproduce the above copyright
*3085fc9dSSteve Kargl *    notice, this list of conditions and the following disclaimer in the
*3085fc9dSSteve Kargl *    documentation and/or other materials provided with the distribution.
*3085fc9dSSteve Kargl *
*3085fc9dSSteve Kargl * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
*3085fc9dSSteve Kargl * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
*3085fc9dSSteve Kargl * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
*3085fc9dSSteve Kargl * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
*3085fc9dSSteve Kargl * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
*3085fc9dSSteve Kargl * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
*3085fc9dSSteve Kargl * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
*3085fc9dSSteve Kargl * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
*3085fc9dSSteve Kargl * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
*3085fc9dSSteve Kargl * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*3085fc9dSSteve Kargl */
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl/**
*3085fc9dSSteve Kargl * Compute the inverse sqrt of x, i.e., rsqrt(x) = 1 / sqrt(x).
*3085fc9dSSteve Kargl *
*3085fc9dSSteve Kargl * First, filter out special cases:
*3085fc9dSSteve Kargl *
*3085fc9dSSteve Kargl *   1. rsqrt(+-0) = +-inf, and raise FE_DIVBYZERO exception.
*3085fc9dSSteve Kargl *   2. rsqrt(nan) = NaN.
*3085fc9dSSteve Kargl *   3. rsqrt(+inf) returns +0.
*3085fc9dSSteve Kargl *   2. rsqrt(x<0) = NaN, and raises FE_INVALID.
*3085fc9dSSteve Kargl *
*3085fc9dSSteve Kargl * If x is a subnormal, scale x into the normal range by x*0x1pN; while
*3085fc9dSSteve Kargl * recording the exponent of the scale factor N.  Split the possibly
*3085fc9dSSteve Kargl * scaled x into f*2^n with f in [0.5,1).  Set m=n or m=n-N (subnormal).
*3085fc9dSSteve Kargl * If n is odd, then set f = f/2 and increase n to n+1.  Thus, f is
*3085fc9dSSteve Kargl * in [0.25,1) with n even.
*3085fc9dSSteve Kargl *
*3085fc9dSSteve Kargl * An initial estimate of y = rqrt[f](x) is 1 / sqrt[f](x).  Exhaustive
*3085fc9dSSteve Kargl * testing of rsqrtf() gave a max ULP of 1.49; while testing 500M x in
*3085fc9dSSteve Kargl * [0,1000] gave a max ULP of 1.24 for rsqrt().  The value of y is then
*3085fc9dSSteve Kargl * used with one iteration of Goldschmidt's algorithm:
*3085fc9dSSteve Kargl *
*3085fc9dSSteve Kargl *	z = x * y
*3085fc9dSSteve Kargl *	h = y / 2
*3085fc9dSSteve Kargl *	r = 0.5 - h * z
*3085fc9dSSteve Kargl *	y = h * r + h
*3085fc9dSSteve Kargl *
*3085fc9dSSteve Kargl * A factor of 2 appears missing in the above, but it is included in the
*3085fc9dSSteve Kargl * exponent m.
*3085fc9dSSteve Kargl */
*3085fc9dSSteve Kargl#include <fenv.h>
*3085fc9dSSteve Kargl#include <float.h>
*3085fc9dSSteve Kargl#include "math.h"
*3085fc9dSSteve Kargl#include "math_private.h"
*3085fc9dSSteve Kargl#include "fpmath.h"
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl#pragma STDC FENV_ACCESS ON
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl#if LDBL_MANT_DIG == 64
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl#ifdef _CC
*3085fc9dSSteve Kargl#undef _CC
*3085fc9dSSteve Kargl#endif
*3085fc9dSSteve Kargl#define _CC (0x1p32L + 1)
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargllong double
*3085fc9dSSteve Karglrsqrtl(long double x)
*3085fc9dSSteve Kargl{
*3085fc9dSSteve Kargl	volatile static const double vzero = 0;
*3085fc9dSSteve Kargl	static const double half = 0.5;
*3085fc9dSSteve Kargl	uint32_t ux;
*3085fc9dSSteve Kargl	int m, rnd;
*3085fc9dSSteve Kargl	long double h, ph, pl, rh, rl, y, zh, zl;
*3085fc9dSSteve Kargl	union IEEEl2bits u;
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl	u.e = x;
*3085fc9dSSteve Kargl	ux = (u.bits.manl | u.bits.manh);
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl	/* x = +-0.  Raise exception. */
*3085fc9dSSteve Kargl	if ((u.bits.exp | ux) == 0)
*3085fc9dSSteve Kargl	    return (1 / x);
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl	/* x is NaN or x is +-inf. */
*3085fc9dSSteve Kargl	if (u.bits.exp == 0x7fff)
*3085fc9dSSteve Kargl	    return (ux ? (x + x) : (u.bits.sign ? vzero / vzero : 0));
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl	/* x < 0.  Raise exception. */
*3085fc9dSSteve Kargl	if (u.bits.sign)
*3085fc9dSSteve Kargl	    return (vzero / vzero);
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl	/*
*3085fc9dSSteve Kargl	 * If x is subnormal, then scale it into the normal range.
*3085fc9dSSteve Kargl	 * Split x into significand and exponent, x = f * 2^m, with
*3085fc9dSSteve Kargl	 * f in [0.5,1) and m a biased exponent.
*3085fc9dSSteve Kargl	 */
*3085fc9dSSteve Kargl	ENTERI();
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl	if (u.bits.exp == 0) {		/* Subnormal */
*3085fc9dSSteve Kargl	    u.e *= 0x1p512;
*3085fc9dSSteve Kargl	    m = u.bits.exp - 0x41fe;
*3085fc9dSSteve Kargl	} else {
*3085fc9dSSteve Kargl	    m = u.bits.exp - 0x3ffe;
*3085fc9dSSteve Kargl	}
*3085fc9dSSteve Kargl	u.bits.exp = 0x3ffe;
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl	/* m is odd.  Put x into [0.25,5) and increase m. */
*3085fc9dSSteve Kargl	if (m & 1) {
*3085fc9dSSteve Kargl	    u.e /= 2;
*3085fc9dSSteve Kargl	    m += 1;
*3085fc9dSSteve Kargl	}
*3085fc9dSSteve Kargl	m = -(m >> 1);			/* Prepare for 2^(-m/2). */
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl	y = 1 / sqrt((double)u.e);	/* ~52-bit estimate. */
*3085fc9dSSteve Kargl	y -= y * (u.e * y * y - 1) / 2;	/* ~63-bit estimate. */
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl	h = y / 2;
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl	_MUL(u.e, y, zh, zl);
*3085fc9dSSteve Kargl	_XMUL(zh, zl, h, 0, ph, pl);
*3085fc9dSSteve Kargl	_XADD(-ph, -pl, half, 0, rh, rl);
*3085fc9dSSteve Kargl	y = rh * h + h;
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl	u.e = 1;
*3085fc9dSSteve Kargl	u.xbits.expsign = 0x3fff + m + 1;
*3085fc9dSSteve Kargl	RETURNI(y * u.e);
*3085fc9dSSteve Kargl}
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl#else
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl#ifdef _CC
*3085fc9dSSteve Kargl#undef _CC
*3085fc9dSSteve Kargl#endif
*3085fc9dSSteve Kargl#define _CC (0x1p57L + 1)
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargllong double
*3085fc9dSSteve Karglrsqrtl(long double x)
*3085fc9dSSteve Kargl{
*3085fc9dSSteve Kargl	volatile static const double vzero = 0;
*3085fc9dSSteve Kargl	int hx, m, rnd;
*3085fc9dSSteve Kargl	long double y;
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl	/* x = +-0.  Raise exception. */
*3085fc9dSSteve Kargl	if (x == 0)
*3085fc9dSSteve Kargl	    return (1 / x);
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl	/* x is NaN. */
*3085fc9dSSteve Kargl	if (isnan(x))
*3085fc9dSSteve Kargl	    return (x + x);
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl	/* x is +-inf. */
*3085fc9dSSteve Kargl	if (isinf(x))
*3085fc9dSSteve Kargl	    return (x > 0 ? 0 : vzero / vzero);
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl	/* x < 0.  Raise exception. */
*3085fc9dSSteve Kargl	if (x < 0)
*3085fc9dSSteve Kargl	    return (vzero / vzero);
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl	/*
*3085fc9dSSteve Kargl	 * If x is subnormal, then scale it into the normal range.
*3085fc9dSSteve Kargl	 * Split x into significand and exponent, x = f * 2^m, with
*3085fc9dSSteve Kargl	 * f in [0.5,1) and m a biased exponent.
*3085fc9dSSteve Kargl	 */
*3085fc9dSSteve Kargl	m = 0;
*3085fc9dSSteve Kargl	if (!isnormal(x)) {
*3085fc9dSSteve Kargl	    x *= 0x1p114L;
*3085fc9dSSteve Kargl	    m = -114;
*3085fc9dSSteve Kargl	}
*3085fc9dSSteve Kargl	x = frexpl(x, &hx);
*3085fc9dSSteve Kargl	m += hx;
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl	/* m is odd.  Put x into [0.25,5) and increase m. */
*3085fc9dSSteve Kargl	if (m & 1) {
*3085fc9dSSteve Kargl	    x /= 2;
*3085fc9dSSteve Kargl	    m += 1;
*3085fc9dSSteve Kargl	}
*3085fc9dSSteve Kargl	m = -(m >> 1);			/* Prepare for 2^(-m/2). */
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl	y = 1 / sqrt((double)x);	/* ~52-bit estimate. */
*3085fc9dSSteve Kargl	y -= y * (x * y * y - 1) / 2;	/* ~104-bit estimate. */
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl	static const double half = 0.5;
*3085fc9dSSteve Kargl	long double h, ph, pl, rh, rl, zh, zl;
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl	h = y / 2;
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl	rnd = fegetround();
*3085fc9dSSteve Kargl	fesetround(FE_TOWARDZERO);
*3085fc9dSSteve Kargl	_MUL(x, y, zh, zl);
*3085fc9dSSteve Kargl	_XMUL(zh, zl, -h, 0, ph, pl);
*3085fc9dSSteve Kargl	fesetround(rnd);
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl	_XADD(ph, pl, half, 0, rh, rl);
*3085fc9dSSteve Kargl	y = rh * h + h;
*3085fc9dSSteve Kargl	m++;
*3085fc9dSSteve Kargl
*3085fc9dSSteve Kargl	RETURNI(ldexpl(y, m));
*3085fc9dSSteve Kargl}
*3085fc9dSSteve Kargl#endif