1*25c28e83SPiotr Jasiukajtis /* 2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START 3*25c28e83SPiotr Jasiukajtis * 4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the 5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License"). 6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License. 7*25c28e83SPiotr Jasiukajtis * 8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing. 10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions 11*25c28e83SPiotr Jasiukajtis * and limitations under the License. 12*25c28e83SPiotr Jasiukajtis * 13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each 14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the 16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying 17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner] 18*25c28e83SPiotr Jasiukajtis * 19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END 20*25c28e83SPiotr Jasiukajtis */ 21*25c28e83SPiotr Jasiukajtis 22*25c28e83SPiotr Jasiukajtis /* 23*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24*25c28e83SPiotr Jasiukajtis */ 25*25c28e83SPiotr Jasiukajtis /* 26*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27*25c28e83SPiotr Jasiukajtis * Use is subject to license terms. 28*25c28e83SPiotr Jasiukajtis */ 29*25c28e83SPiotr Jasiukajtis 30*25c28e83SPiotr Jasiukajtis #include <sys/isa_defs.h> 31*25c28e83SPiotr Jasiukajtis #include "libm_inlines.h" 32*25c28e83SPiotr Jasiukajtis 33*25c28e83SPiotr Jasiukajtis #ifdef _LITTLE_ENDIAN 34*25c28e83SPiotr Jasiukajtis #define HI(x) *(1+(int*)x) 35*25c28e83SPiotr Jasiukajtis #define LO(x) *(unsigned*)x 36*25c28e83SPiotr Jasiukajtis #else 37*25c28e83SPiotr Jasiukajtis #define HI(x) *(int*)x 38*25c28e83SPiotr Jasiukajtis #define LO(x) *(1+(unsigned*)x) 39*25c28e83SPiotr Jasiukajtis #endif 40*25c28e83SPiotr Jasiukajtis 41*25c28e83SPiotr Jasiukajtis #ifdef __RESTRICT 42*25c28e83SPiotr Jasiukajtis #define restrict _Restrict 43*25c28e83SPiotr Jasiukajtis #else 44*25c28e83SPiotr Jasiukajtis #define restrict 45*25c28e83SPiotr Jasiukajtis #endif 46*25c28e83SPiotr Jasiukajtis 47*25c28e83SPiotr Jasiukajtis /* double hypot(double x, double y) 48*25c28e83SPiotr Jasiukajtis * 49*25c28e83SPiotr Jasiukajtis * Method : 50*25c28e83SPiotr Jasiukajtis * 1. Special cases: 51*25c28e83SPiotr Jasiukajtis * x or y is +Inf or -Inf => +Inf 52*25c28e83SPiotr Jasiukajtis * x or y is NaN => QNaN 53*25c28e83SPiotr Jasiukajtis * 2. Computes hypot(x,y): 54*25c28e83SPiotr Jasiukajtis * hypot(x,y) = m * sqrt(xnm * xnm + ynm * ynm) 55*25c28e83SPiotr Jasiukajtis * Where: 56*25c28e83SPiotr Jasiukajtis * m = max(|x|,|y|) 57*25c28e83SPiotr Jasiukajtis * xnm = x * (1/m) 58*25c28e83SPiotr Jasiukajtis * ynm = y * (1/m) 59*25c28e83SPiotr Jasiukajtis * 60*25c28e83SPiotr Jasiukajtis * Compute xnm * xnm + ynm * ynm by simulating 61*25c28e83SPiotr Jasiukajtis * muti-precision arithmetic. 62*25c28e83SPiotr Jasiukajtis * 63*25c28e83SPiotr Jasiukajtis * Accuracy: 64*25c28e83SPiotr Jasiukajtis * Maximum error observed: less than 0.872 ulp after 16.777.216.000 65*25c28e83SPiotr Jasiukajtis * results. 66*25c28e83SPiotr Jasiukajtis */ 67*25c28e83SPiotr Jasiukajtis 68*25c28e83SPiotr Jasiukajtis extern double sqrt(double); 69*25c28e83SPiotr Jasiukajtis extern double fabs(double); 70*25c28e83SPiotr Jasiukajtis 71*25c28e83SPiotr Jasiukajtis static const unsigned long long LCONST[] = { 72*25c28e83SPiotr Jasiukajtis 0x41b0000000000000ULL, /* D2ON28 = 2 ** 28 */ 73*25c28e83SPiotr Jasiukajtis 0x0010000000000000ULL, /* D2ONM1022 = 2 ** -1022 */ 74*25c28e83SPiotr Jasiukajtis 0x7fd0000000000000ULL /* D2ONP1022 = 2 ** 1022 */ 75*25c28e83SPiotr Jasiukajtis }; 76*25c28e83SPiotr Jasiukajtis 77*25c28e83SPiotr Jasiukajtis static void 78*25c28e83SPiotr Jasiukajtis __vhypot_n(int n, double * restrict px, int stridex, double * restrict py, 79*25c28e83SPiotr Jasiukajtis int stridey, double * restrict pz, int stridez); 80*25c28e83SPiotr Jasiukajtis 81*25c28e83SPiotr Jasiukajtis #pragma no_inline(__vhypot_n) 82*25c28e83SPiotr Jasiukajtis 83*25c28e83SPiotr Jasiukajtis #define RETURN(ret) \ 84*25c28e83SPiotr Jasiukajtis { \ 85*25c28e83SPiotr Jasiukajtis *pz = (ret); \ 86*25c28e83SPiotr Jasiukajtis py += stridey; \ 87*25c28e83SPiotr Jasiukajtis pz += stridez; \ 88*25c28e83SPiotr Jasiukajtis if (n_n == 0) \ 89*25c28e83SPiotr Jasiukajtis { \ 90*25c28e83SPiotr Jasiukajtis hx0 = HI(px); \ 91*25c28e83SPiotr Jasiukajtis hy0 = HI(py); \ 92*25c28e83SPiotr Jasiukajtis spx = px; spy = py; spz = pz; \ 93*25c28e83SPiotr Jasiukajtis continue; \ 94*25c28e83SPiotr Jasiukajtis } \ 95*25c28e83SPiotr Jasiukajtis n--; \ 96*25c28e83SPiotr Jasiukajtis break; \ 97*25c28e83SPiotr Jasiukajtis } 98*25c28e83SPiotr Jasiukajtis 99*25c28e83SPiotr Jasiukajtis void 100*25c28e83SPiotr Jasiukajtis __vhypot(int n, double * restrict px, int stridex, double * restrict py, 101*25c28e83SPiotr Jasiukajtis int stridey, double * restrict pz, int stridez) 102*25c28e83SPiotr Jasiukajtis { 103*25c28e83SPiotr Jasiukajtis int hx0, hx1, hy0, j0, diff; 104*25c28e83SPiotr Jasiukajtis double x_hi, x_lo, y_hi, y_lo; 105*25c28e83SPiotr Jasiukajtis double scl = 0; 106*25c28e83SPiotr Jasiukajtis double x, y, res; 107*25c28e83SPiotr Jasiukajtis double *spx, *spy, *spz; 108*25c28e83SPiotr Jasiukajtis int n_n; 109*25c28e83SPiotr Jasiukajtis double D2ON28 = ((double*)LCONST)[0]; /* 2 ** 28 */ 110*25c28e83SPiotr Jasiukajtis double D2ONM1022 = ((double*)LCONST)[1]; /* 2 **-1022 */ 111*25c28e83SPiotr Jasiukajtis double D2ONP1022 = ((double*)LCONST)[2]; /* 2 ** 1022 */ 112*25c28e83SPiotr Jasiukajtis 113*25c28e83SPiotr Jasiukajtis while (n > 1) 114*25c28e83SPiotr Jasiukajtis { 115*25c28e83SPiotr Jasiukajtis n_n = 0; 116*25c28e83SPiotr Jasiukajtis spx = px; 117*25c28e83SPiotr Jasiukajtis spy = py; 118*25c28e83SPiotr Jasiukajtis spz = pz; 119*25c28e83SPiotr Jasiukajtis hx0 = HI(px); 120*25c28e83SPiotr Jasiukajtis hy0 = HI(py); 121*25c28e83SPiotr Jasiukajtis for (; n > 1 ; n--) 122*25c28e83SPiotr Jasiukajtis { 123*25c28e83SPiotr Jasiukajtis px += stridex; 124*25c28e83SPiotr Jasiukajtis hx0 &= 0x7fffffff; 125*25c28e83SPiotr Jasiukajtis hy0 &= 0x7fffffff; 126*25c28e83SPiotr Jasiukajtis 127*25c28e83SPiotr Jasiukajtis if (hx0 >= 0x7fe00000) /* |X| >= 2**1023 or Inf or NaN */ 128*25c28e83SPiotr Jasiukajtis { 129*25c28e83SPiotr Jasiukajtis diff = hy0 - hx0; 130*25c28e83SPiotr Jasiukajtis j0 = diff >> 31; 131*25c28e83SPiotr Jasiukajtis j0 = hy0 - (diff & j0); 132*25c28e83SPiotr Jasiukajtis j0 &= 0x7ff00000; 133*25c28e83SPiotr Jasiukajtis x = *(px - stridex); 134*25c28e83SPiotr Jasiukajtis y = *py; 135*25c28e83SPiotr Jasiukajtis x = fabs(x); 136*25c28e83SPiotr Jasiukajtis y = fabs(y); 137*25c28e83SPiotr Jasiukajtis if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */ 138*25c28e83SPiotr Jasiukajtis { 139*25c28e83SPiotr Jasiukajtis int lx = LO((px - stridex)); 140*25c28e83SPiotr Jasiukajtis int ly = LO(py); 141*25c28e83SPiotr Jasiukajtis if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x; 142*25c28e83SPiotr Jasiukajtis else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y; 143*25c28e83SPiotr Jasiukajtis else res = x + y; 144*25c28e83SPiotr Jasiukajtis RETURN (res) 145*25c28e83SPiotr Jasiukajtis } 146*25c28e83SPiotr Jasiukajtis else 147*25c28e83SPiotr Jasiukajtis { 148*25c28e83SPiotr Jasiukajtis j0 = diff >> 31; 149*25c28e83SPiotr Jasiukajtis if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */ 150*25c28e83SPiotr Jasiukajtis { 151*25c28e83SPiotr Jasiukajtis x *= D2ONM1022; 152*25c28e83SPiotr Jasiukajtis y *= D2ONM1022; 153*25c28e83SPiotr Jasiukajtis 154*25c28e83SPiotr Jasiukajtis x_hi = (x + D2ON28) - D2ON28; 155*25c28e83SPiotr Jasiukajtis x_lo = x - x_hi; 156*25c28e83SPiotr Jasiukajtis y_hi = (y + D2ON28) - D2ON28; 157*25c28e83SPiotr Jasiukajtis y_lo = y - y_hi; 158*25c28e83SPiotr Jasiukajtis res = (x_hi * x_hi + y_hi * y_hi); 159*25c28e83SPiotr Jasiukajtis res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); 160*25c28e83SPiotr Jasiukajtis 161*25c28e83SPiotr Jasiukajtis res = sqrt (res); 162*25c28e83SPiotr Jasiukajtis 163*25c28e83SPiotr Jasiukajtis res = D2ONP1022 * res; 164*25c28e83SPiotr Jasiukajtis RETURN (res) 165*25c28e83SPiotr Jasiukajtis } 166*25c28e83SPiotr Jasiukajtis else RETURN (x + y) 167*25c28e83SPiotr Jasiukajtis } 168*25c28e83SPiotr Jasiukajtis } 169*25c28e83SPiotr Jasiukajtis if (hy0 >= 0x7fe00000) /* |Y| >= 2**1023 or Inf or NaN */ 170*25c28e83SPiotr Jasiukajtis { 171*25c28e83SPiotr Jasiukajtis diff = hy0 - hx0; 172*25c28e83SPiotr Jasiukajtis j0 = diff >> 31; 173*25c28e83SPiotr Jasiukajtis j0 = hy0 - (diff & j0); 174*25c28e83SPiotr Jasiukajtis j0 &= 0x7ff00000; 175*25c28e83SPiotr Jasiukajtis x = *(px - stridex); 176*25c28e83SPiotr Jasiukajtis y = *py; 177*25c28e83SPiotr Jasiukajtis x = fabs(x); 178*25c28e83SPiotr Jasiukajtis y = fabs(y); 179*25c28e83SPiotr Jasiukajtis if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */ 180*25c28e83SPiotr Jasiukajtis { 181*25c28e83SPiotr Jasiukajtis int lx = LO((px - stridex)); 182*25c28e83SPiotr Jasiukajtis int ly = LO(py); 183*25c28e83SPiotr Jasiukajtis if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x; 184*25c28e83SPiotr Jasiukajtis else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y; 185*25c28e83SPiotr Jasiukajtis else res = x + y; 186*25c28e83SPiotr Jasiukajtis RETURN (res) 187*25c28e83SPiotr Jasiukajtis } 188*25c28e83SPiotr Jasiukajtis else 189*25c28e83SPiotr Jasiukajtis { 190*25c28e83SPiotr Jasiukajtis j0 = diff >> 31; 191*25c28e83SPiotr Jasiukajtis if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */ 192*25c28e83SPiotr Jasiukajtis { 193*25c28e83SPiotr Jasiukajtis x *= D2ONM1022; 194*25c28e83SPiotr Jasiukajtis y *= D2ONM1022; 195*25c28e83SPiotr Jasiukajtis 196*25c28e83SPiotr Jasiukajtis x_hi = (x + D2ON28) - D2ON28; 197*25c28e83SPiotr Jasiukajtis x_lo = x - x_hi; 198*25c28e83SPiotr Jasiukajtis y_hi = (y + D2ON28) - D2ON28; 199*25c28e83SPiotr Jasiukajtis y_lo = y - y_hi; 200*25c28e83SPiotr Jasiukajtis res = (x_hi * x_hi + y_hi * y_hi); 201*25c28e83SPiotr Jasiukajtis res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); 202*25c28e83SPiotr Jasiukajtis 203*25c28e83SPiotr Jasiukajtis res = sqrt (res); 204*25c28e83SPiotr Jasiukajtis 205*25c28e83SPiotr Jasiukajtis res = D2ONP1022 * res; 206*25c28e83SPiotr Jasiukajtis RETURN (res) 207*25c28e83SPiotr Jasiukajtis } 208*25c28e83SPiotr Jasiukajtis else RETURN (x + y) 209*25c28e83SPiotr Jasiukajtis } 210*25c28e83SPiotr Jasiukajtis } 211*25c28e83SPiotr Jasiukajtis 212*25c28e83SPiotr Jasiukajtis hx1 = HI(px); 213*25c28e83SPiotr Jasiukajtis 214*25c28e83SPiotr Jasiukajtis if (hx0 < 0x00100000 && hy0 < 0x00100000) /* X and Y are subnormal */ 215*25c28e83SPiotr Jasiukajtis { 216*25c28e83SPiotr Jasiukajtis x = *(px - stridex); 217*25c28e83SPiotr Jasiukajtis y = *py; 218*25c28e83SPiotr Jasiukajtis 219*25c28e83SPiotr Jasiukajtis x *= D2ONP1022; 220*25c28e83SPiotr Jasiukajtis y *= D2ONP1022; 221*25c28e83SPiotr Jasiukajtis 222*25c28e83SPiotr Jasiukajtis x_hi = (x + D2ON28) - D2ON28; 223*25c28e83SPiotr Jasiukajtis x_lo = x - x_hi; 224*25c28e83SPiotr Jasiukajtis y_hi = (y + D2ON28) - D2ON28; 225*25c28e83SPiotr Jasiukajtis y_lo = y - y_hi; 226*25c28e83SPiotr Jasiukajtis res = (x_hi * x_hi + y_hi * y_hi); 227*25c28e83SPiotr Jasiukajtis res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); 228*25c28e83SPiotr Jasiukajtis 229*25c28e83SPiotr Jasiukajtis res = sqrt(res); 230*25c28e83SPiotr Jasiukajtis 231*25c28e83SPiotr Jasiukajtis res = D2ONM1022 * res; 232*25c28e83SPiotr Jasiukajtis RETURN (res) 233*25c28e83SPiotr Jasiukajtis } 234*25c28e83SPiotr Jasiukajtis 235*25c28e83SPiotr Jasiukajtis hx0 = hx1; 236*25c28e83SPiotr Jasiukajtis py += stridey; 237*25c28e83SPiotr Jasiukajtis pz += stridez; 238*25c28e83SPiotr Jasiukajtis n_n++; 239*25c28e83SPiotr Jasiukajtis hy0 = HI(py); 240*25c28e83SPiotr Jasiukajtis } 241*25c28e83SPiotr Jasiukajtis if (n_n > 0) 242*25c28e83SPiotr Jasiukajtis __vhypot_n (n_n, spx, stridex, spy, stridey, spz, stridez); 243*25c28e83SPiotr Jasiukajtis } 244*25c28e83SPiotr Jasiukajtis 245*25c28e83SPiotr Jasiukajtis if (n > 0) 246*25c28e83SPiotr Jasiukajtis { 247*25c28e83SPiotr Jasiukajtis x = *px; 248*25c28e83SPiotr Jasiukajtis y = *py; 249*25c28e83SPiotr Jasiukajtis hx0 = HI(px); 250*25c28e83SPiotr Jasiukajtis hy0 = HI(py); 251*25c28e83SPiotr Jasiukajtis 252*25c28e83SPiotr Jasiukajtis hx0 &= 0x7fffffff; 253*25c28e83SPiotr Jasiukajtis hy0 &= 0x7fffffff; 254*25c28e83SPiotr Jasiukajtis 255*25c28e83SPiotr Jasiukajtis diff = hy0 - hx0; 256*25c28e83SPiotr Jasiukajtis j0 = diff >> 31; 257*25c28e83SPiotr Jasiukajtis j0 = hy0 - (diff & j0); 258*25c28e83SPiotr Jasiukajtis j0 &= 0x7ff00000; 259*25c28e83SPiotr Jasiukajtis 260*25c28e83SPiotr Jasiukajtis if (j0 >= 0x7fe00000) /* max(|X|,|Y|) >= 2**1023 or X or Y = Inf or NaN */ 261*25c28e83SPiotr Jasiukajtis { 262*25c28e83SPiotr Jasiukajtis x = fabs(x); 263*25c28e83SPiotr Jasiukajtis y = fabs(y); 264*25c28e83SPiotr Jasiukajtis if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */ 265*25c28e83SPiotr Jasiukajtis { 266*25c28e83SPiotr Jasiukajtis int lx = LO(px); 267*25c28e83SPiotr Jasiukajtis int ly = LO(py); 268*25c28e83SPiotr Jasiukajtis if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x; 269*25c28e83SPiotr Jasiukajtis else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y; 270*25c28e83SPiotr Jasiukajtis else res = x + y; 271*25c28e83SPiotr Jasiukajtis *pz = res; 272*25c28e83SPiotr Jasiukajtis return; 273*25c28e83SPiotr Jasiukajtis } 274*25c28e83SPiotr Jasiukajtis else 275*25c28e83SPiotr Jasiukajtis { 276*25c28e83SPiotr Jasiukajtis j0 = diff >> 31; 277*25c28e83SPiotr Jasiukajtis if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */ 278*25c28e83SPiotr Jasiukajtis { 279*25c28e83SPiotr Jasiukajtis x *= D2ONM1022; 280*25c28e83SPiotr Jasiukajtis y *= D2ONM1022; 281*25c28e83SPiotr Jasiukajtis 282*25c28e83SPiotr Jasiukajtis x_hi = (x + D2ON28) - D2ON28; 283*25c28e83SPiotr Jasiukajtis x_lo = x - x_hi; 284*25c28e83SPiotr Jasiukajtis y_hi = (y + D2ON28) - D2ON28; 285*25c28e83SPiotr Jasiukajtis y_lo = y - y_hi; 286*25c28e83SPiotr Jasiukajtis res = (x_hi * x_hi + y_hi * y_hi); 287*25c28e83SPiotr Jasiukajtis res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); 288*25c28e83SPiotr Jasiukajtis 289*25c28e83SPiotr Jasiukajtis res = sqrt (res); 290*25c28e83SPiotr Jasiukajtis 291*25c28e83SPiotr Jasiukajtis res = D2ONP1022 * res; 292*25c28e83SPiotr Jasiukajtis *pz = res; 293*25c28e83SPiotr Jasiukajtis return; 294*25c28e83SPiotr Jasiukajtis } 295*25c28e83SPiotr Jasiukajtis else 296*25c28e83SPiotr Jasiukajtis { 297*25c28e83SPiotr Jasiukajtis *pz = x + y; 298*25c28e83SPiotr Jasiukajtis return; 299*25c28e83SPiotr Jasiukajtis } 300*25c28e83SPiotr Jasiukajtis } 301*25c28e83SPiotr Jasiukajtis } 302*25c28e83SPiotr Jasiukajtis 303*25c28e83SPiotr Jasiukajtis if (j0 < 0x00100000) /* X and Y are subnormal */ 304*25c28e83SPiotr Jasiukajtis { 305*25c28e83SPiotr Jasiukajtis x *= D2ONP1022; 306*25c28e83SPiotr Jasiukajtis y *= D2ONP1022; 307*25c28e83SPiotr Jasiukajtis 308*25c28e83SPiotr Jasiukajtis x_hi = (x + D2ON28) - D2ON28; 309*25c28e83SPiotr Jasiukajtis x_lo = x - x_hi; 310*25c28e83SPiotr Jasiukajtis y_hi = (y + D2ON28) - D2ON28; 311*25c28e83SPiotr Jasiukajtis y_lo = y - y_hi; 312*25c28e83SPiotr Jasiukajtis res = (x_hi * x_hi + y_hi * y_hi); 313*25c28e83SPiotr Jasiukajtis res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); 314*25c28e83SPiotr Jasiukajtis 315*25c28e83SPiotr Jasiukajtis res = sqrt(res); 316*25c28e83SPiotr Jasiukajtis 317*25c28e83SPiotr Jasiukajtis res = D2ONM1022 * res; 318*25c28e83SPiotr Jasiukajtis *pz = res; 319*25c28e83SPiotr Jasiukajtis return; 320*25c28e83SPiotr Jasiukajtis } 321*25c28e83SPiotr Jasiukajtis 322*25c28e83SPiotr Jasiukajtis HI(&scl) = (0x7fe00000 - j0); 323*25c28e83SPiotr Jasiukajtis 324*25c28e83SPiotr Jasiukajtis x *= scl; 325*25c28e83SPiotr Jasiukajtis y *= scl; 326*25c28e83SPiotr Jasiukajtis 327*25c28e83SPiotr Jasiukajtis x_hi = (x + D2ON28) - D2ON28; 328*25c28e83SPiotr Jasiukajtis y_hi = (y + D2ON28) - D2ON28; 329*25c28e83SPiotr Jasiukajtis x_lo = x - x_hi; 330*25c28e83SPiotr Jasiukajtis y_lo = y - y_hi; 331*25c28e83SPiotr Jasiukajtis 332*25c28e83SPiotr Jasiukajtis res = (x_hi * x_hi + y_hi * y_hi); 333*25c28e83SPiotr Jasiukajtis res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); 334*25c28e83SPiotr Jasiukajtis 335*25c28e83SPiotr Jasiukajtis res = sqrt(res); 336*25c28e83SPiotr Jasiukajtis 337*25c28e83SPiotr Jasiukajtis HI(&scl) = j0; 338*25c28e83SPiotr Jasiukajtis 339*25c28e83SPiotr Jasiukajtis res = scl * res; 340*25c28e83SPiotr Jasiukajtis *pz = res; 341*25c28e83SPiotr Jasiukajtis } 342*25c28e83SPiotr Jasiukajtis } 343*25c28e83SPiotr Jasiukajtis 344*25c28e83SPiotr Jasiukajtis static void 345*25c28e83SPiotr Jasiukajtis __vhypot_n(int n, double * restrict px, int stridex, double * restrict py, 346*25c28e83SPiotr Jasiukajtis int stridey, double * restrict pz, int stridez) 347*25c28e83SPiotr Jasiukajtis { 348*25c28e83SPiotr Jasiukajtis int hx0, hy0, j0, diff0; 349*25c28e83SPiotr Jasiukajtis double x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0; 350*25c28e83SPiotr Jasiukajtis double x0, y0, res0; 351*25c28e83SPiotr Jasiukajtis double D2ON28 = ((double*)LCONST)[0]; /* 2 ** 28 */ 352*25c28e83SPiotr Jasiukajtis 353*25c28e83SPiotr Jasiukajtis for(; n > 0 ; n--) 354*25c28e83SPiotr Jasiukajtis { 355*25c28e83SPiotr Jasiukajtis x0 = *px; 356*25c28e83SPiotr Jasiukajtis y0 = *py; 357*25c28e83SPiotr Jasiukajtis hx0 = HI(px); 358*25c28e83SPiotr Jasiukajtis hy0 = HI(py); 359*25c28e83SPiotr Jasiukajtis 360*25c28e83SPiotr Jasiukajtis hx0 &= 0x7fffffff; 361*25c28e83SPiotr Jasiukajtis hy0 &= 0x7fffffff; 362*25c28e83SPiotr Jasiukajtis 363*25c28e83SPiotr Jasiukajtis diff0 = hy0 - hx0; 364*25c28e83SPiotr Jasiukajtis j0 = diff0 >> 31; 365*25c28e83SPiotr Jasiukajtis j0 = hy0 - (diff0 & j0); 366*25c28e83SPiotr Jasiukajtis j0 &= 0x7ff00000; 367*25c28e83SPiotr Jasiukajtis 368*25c28e83SPiotr Jasiukajtis px += stridex; 369*25c28e83SPiotr Jasiukajtis py += stridey; 370*25c28e83SPiotr Jasiukajtis 371*25c28e83SPiotr Jasiukajtis HI(&scl0) = (0x7fe00000 - j0); 372*25c28e83SPiotr Jasiukajtis 373*25c28e83SPiotr Jasiukajtis x0 *= scl0; 374*25c28e83SPiotr Jasiukajtis y0 *= scl0; 375*25c28e83SPiotr Jasiukajtis 376*25c28e83SPiotr Jasiukajtis x_hi0 = (x0 + D2ON28) - D2ON28; 377*25c28e83SPiotr Jasiukajtis y_hi0 = (y0 + D2ON28) - D2ON28; 378*25c28e83SPiotr Jasiukajtis x_lo0 = x0 - x_hi0; 379*25c28e83SPiotr Jasiukajtis y_lo0 = y0 - y_hi0; 380*25c28e83SPiotr Jasiukajtis 381*25c28e83SPiotr Jasiukajtis res0 = (x_hi0 * x_hi0 + y_hi0 * y_hi0); 382*25c28e83SPiotr Jasiukajtis res0 += ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0); 383*25c28e83SPiotr Jasiukajtis 384*25c28e83SPiotr Jasiukajtis res0 = sqrt(res0); 385*25c28e83SPiotr Jasiukajtis 386*25c28e83SPiotr Jasiukajtis HI(&scl0) = j0; 387*25c28e83SPiotr Jasiukajtis 388*25c28e83SPiotr Jasiukajtis res0 = scl0 * res0; 389*25c28e83SPiotr Jasiukajtis *pz = res0; 390*25c28e83SPiotr Jasiukajtis 391*25c28e83SPiotr Jasiukajtis pz += stridez; 392*25c28e83SPiotr Jasiukajtis } 393*25c28e83SPiotr Jasiukajtis } 394