1*25c28e83SPiotr Jasiukajtis /* 2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START 3*25c28e83SPiotr Jasiukajtis * 4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the 5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License"). 6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License. 7*25c28e83SPiotr Jasiukajtis * 8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing. 10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions 11*25c28e83SPiotr Jasiukajtis * and limitations under the License. 12*25c28e83SPiotr Jasiukajtis * 13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each 14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the 16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying 17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner] 18*25c28e83SPiotr Jasiukajtis * 19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END 20*25c28e83SPiotr Jasiukajtis */ 21*25c28e83SPiotr Jasiukajtis 22*25c28e83SPiotr Jasiukajtis /* 23*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24*25c28e83SPiotr Jasiukajtis */ 25*25c28e83SPiotr Jasiukajtis /* 26*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27*25c28e83SPiotr Jasiukajtis * Use is subject to license terms. 28*25c28e83SPiotr Jasiukajtis */ 29*25c28e83SPiotr Jasiukajtis 30*25c28e83SPiotr Jasiukajtis #include <sys/isa_defs.h> 31*25c28e83SPiotr Jasiukajtis #include "libm_inlines.h" 32*25c28e83SPiotr Jasiukajtis 33*25c28e83SPiotr Jasiukajtis #ifdef _LITTLE_ENDIAN 34*25c28e83SPiotr Jasiukajtis #define HI(x) *(1+(int*)x) 35*25c28e83SPiotr Jasiukajtis #define LO(x) *(unsigned*)x 36*25c28e83SPiotr Jasiukajtis #else 37*25c28e83SPiotr Jasiukajtis #define HI(x) *(int*)x 38*25c28e83SPiotr Jasiukajtis #define LO(x) *(1+(unsigned*)x) 39*25c28e83SPiotr Jasiukajtis #endif 40*25c28e83SPiotr Jasiukajtis 41*25c28e83SPiotr Jasiukajtis #ifdef __RESTRICT 42*25c28e83SPiotr Jasiukajtis #define restrict _Restrict 43*25c28e83SPiotr Jasiukajtis #else 44*25c28e83SPiotr Jasiukajtis #define restrict 45*25c28e83SPiotr Jasiukajtis #endif 46*25c28e83SPiotr Jasiukajtis 47*25c28e83SPiotr Jasiukajtis /* double rhypot(double x, double y) 48*25c28e83SPiotr Jasiukajtis * 49*25c28e83SPiotr Jasiukajtis * Method : 50*25c28e83SPiotr Jasiukajtis * 1. Special cases: 51*25c28e83SPiotr Jasiukajtis * x or y = Inf => 0 52*25c28e83SPiotr Jasiukajtis * x or y = NaN => QNaN 53*25c28e83SPiotr Jasiukajtis * x and y = 0 => Inf + divide-by-zero 54*25c28e83SPiotr Jasiukajtis * 2. Computes rhypot(x,y): 55*25c28e83SPiotr Jasiukajtis * rhypot(x,y) = m * sqrt(1/(xnm * xnm + ynm * ynm)) 56*25c28e83SPiotr Jasiukajtis * Where: 57*25c28e83SPiotr Jasiukajtis * m = 1/max(|x|,|y|) 58*25c28e83SPiotr Jasiukajtis * xnm = x * m 59*25c28e83SPiotr Jasiukajtis * ynm = y * m 60*25c28e83SPiotr Jasiukajtis * 61*25c28e83SPiotr Jasiukajtis * Compute 1/(xnm * xnm + ynm * ynm) by simulating 62*25c28e83SPiotr Jasiukajtis * muti-precision arithmetic. 63*25c28e83SPiotr Jasiukajtis * 64*25c28e83SPiotr Jasiukajtis * Accuracy: 65*25c28e83SPiotr Jasiukajtis * Maximum error observed: less than 0.869 ulp after 1.000.000.000 66*25c28e83SPiotr Jasiukajtis * results. 67*25c28e83SPiotr Jasiukajtis */ 68*25c28e83SPiotr Jasiukajtis 69*25c28e83SPiotr Jasiukajtis extern double sqrt(double); 70*25c28e83SPiotr Jasiukajtis extern double fabs(double); 71*25c28e83SPiotr Jasiukajtis 72*25c28e83SPiotr Jasiukajtis static const int __vlibm_TBL_rhypot[] = { 73*25c28e83SPiotr Jasiukajtis /* i = [0,127] 74*25c28e83SPiotr Jasiukajtis * TBL[i] = 0x3ff00000 + *(int*)&(1.0 / *(double*)&(0x3ff0000000000000ULL + (i << 45))); */ 75*25c28e83SPiotr Jasiukajtis 0x7fe00000, 0x7fdfc07f, 0x7fdf81f8, 0x7fdf4465, 76*25c28e83SPiotr Jasiukajtis 0x7fdf07c1, 0x7fdecc07, 0x7fde9131, 0x7fde573a, 77*25c28e83SPiotr Jasiukajtis 0x7fde1e1e, 0x7fdde5d6, 0x7fddae60, 0x7fdd77b6, 78*25c28e83SPiotr Jasiukajtis 0x7fdd41d4, 0x7fdd0cb5, 0x7fdcd856, 0x7fdca4b3, 79*25c28e83SPiotr Jasiukajtis 0x7fdc71c7, 0x7fdc3f8f, 0x7fdc0e07, 0x7fdbdd2b, 80*25c28e83SPiotr Jasiukajtis 0x7fdbacf9, 0x7fdb7d6c, 0x7fdb4e81, 0x7fdb2036, 81*25c28e83SPiotr Jasiukajtis 0x7fdaf286, 0x7fdac570, 0x7fda98ef, 0x7fda6d01, 82*25c28e83SPiotr Jasiukajtis 0x7fda41a4, 0x7fda16d3, 0x7fd9ec8e, 0x7fd9c2d1, 83*25c28e83SPiotr Jasiukajtis 0x7fd99999, 0x7fd970e4, 0x7fd948b0, 0x7fd920fb, 84*25c28e83SPiotr Jasiukajtis 0x7fd8f9c1, 0x7fd8d301, 0x7fd8acb9, 0x7fd886e5, 85*25c28e83SPiotr Jasiukajtis 0x7fd86186, 0x7fd83c97, 0x7fd81818, 0x7fd7f405, 86*25c28e83SPiotr Jasiukajtis 0x7fd7d05f, 0x7fd7ad22, 0x7fd78a4c, 0x7fd767dc, 87*25c28e83SPiotr Jasiukajtis 0x7fd745d1, 0x7fd72428, 0x7fd702e0, 0x7fd6e1f7, 88*25c28e83SPiotr Jasiukajtis 0x7fd6c16c, 0x7fd6a13c, 0x7fd68168, 0x7fd661ec, 89*25c28e83SPiotr Jasiukajtis 0x7fd642c8, 0x7fd623fa, 0x7fd60581, 0x7fd5e75b, 90*25c28e83SPiotr Jasiukajtis 0x7fd5c988, 0x7fd5ac05, 0x7fd58ed2, 0x7fd571ed, 91*25c28e83SPiotr Jasiukajtis 0x7fd55555, 0x7fd53909, 0x7fd51d07, 0x7fd50150, 92*25c28e83SPiotr Jasiukajtis 0x7fd4e5e0, 0x7fd4cab8, 0x7fd4afd6, 0x7fd49539, 93*25c28e83SPiotr Jasiukajtis 0x7fd47ae1, 0x7fd460cb, 0x7fd446f8, 0x7fd42d66, 94*25c28e83SPiotr Jasiukajtis 0x7fd41414, 0x7fd3fb01, 0x7fd3e22c, 0x7fd3c995, 95*25c28e83SPiotr Jasiukajtis 0x7fd3b13b, 0x7fd3991c, 0x7fd38138, 0x7fd3698d, 96*25c28e83SPiotr Jasiukajtis 0x7fd3521c, 0x7fd33ae4, 0x7fd323e3, 0x7fd30d19, 97*25c28e83SPiotr Jasiukajtis 0x7fd2f684, 0x7fd2e025, 0x7fd2c9fb, 0x7fd2b404, 98*25c28e83SPiotr Jasiukajtis 0x7fd29e41, 0x7fd288b0, 0x7fd27350, 0x7fd25e22, 99*25c28e83SPiotr Jasiukajtis 0x7fd24924, 0x7fd23456, 0x7fd21fb7, 0x7fd20b47, 100*25c28e83SPiotr Jasiukajtis 0x7fd1f704, 0x7fd1e2ef, 0x7fd1cf06, 0x7fd1bb4a, 101*25c28e83SPiotr Jasiukajtis 0x7fd1a7b9, 0x7fd19453, 0x7fd18118, 0x7fd16e06, 102*25c28e83SPiotr Jasiukajtis 0x7fd15b1e, 0x7fd1485f, 0x7fd135c8, 0x7fd12358, 103*25c28e83SPiotr Jasiukajtis 0x7fd11111, 0x7fd0fef0, 0x7fd0ecf5, 0x7fd0db20, 104*25c28e83SPiotr Jasiukajtis 0x7fd0c971, 0x7fd0b7e6, 0x7fd0a681, 0x7fd0953f, 105*25c28e83SPiotr Jasiukajtis 0x7fd08421, 0x7fd07326, 0x7fd0624d, 0x7fd05197, 106*25c28e83SPiotr Jasiukajtis 0x7fd04104, 0x7fd03091, 0x7fd02040, 0x7fd01010, 107*25c28e83SPiotr Jasiukajtis }; 108*25c28e83SPiotr Jasiukajtis 109*25c28e83SPiotr Jasiukajtis static const unsigned long long LCONST[] = { 110*25c28e83SPiotr Jasiukajtis 0x3ff0000000000000ULL, /* DONE = 1.0 */ 111*25c28e83SPiotr Jasiukajtis 0x4000000000000000ULL, /* DTWO = 2.0 */ 112*25c28e83SPiotr Jasiukajtis 0x4230000000000000ULL, /* D2ON36 = 2**36 */ 113*25c28e83SPiotr Jasiukajtis 0x7fd0000000000000ULL, /* D2ON1022 = 2**1022 */ 114*25c28e83SPiotr Jasiukajtis 0x3cb0000000000000ULL, /* D2ONM52 = 2**-52 */ 115*25c28e83SPiotr Jasiukajtis }; 116*25c28e83SPiotr Jasiukajtis 117*25c28e83SPiotr Jasiukajtis #define RET_SC(I) \ 118*25c28e83SPiotr Jasiukajtis px += stridex; \ 119*25c28e83SPiotr Jasiukajtis py += stridey; \ 120*25c28e83SPiotr Jasiukajtis pz += stridez; \ 121*25c28e83SPiotr Jasiukajtis if (--n <= 0) \ 122*25c28e83SPiotr Jasiukajtis break; \ 123*25c28e83SPiotr Jasiukajtis goto start##I; 124*25c28e83SPiotr Jasiukajtis 125*25c28e83SPiotr Jasiukajtis #define RETURN(I, ret) \ 126*25c28e83SPiotr Jasiukajtis { \ 127*25c28e83SPiotr Jasiukajtis pz[0] = (ret); \ 128*25c28e83SPiotr Jasiukajtis RET_SC(I) \ 129*25c28e83SPiotr Jasiukajtis } 130*25c28e83SPiotr Jasiukajtis 131*25c28e83SPiotr Jasiukajtis #define PREP(I) \ 132*25c28e83SPiotr Jasiukajtis hx##I = HI(px); \ 133*25c28e83SPiotr Jasiukajtis hy##I = HI(py); \ 134*25c28e83SPiotr Jasiukajtis hx##I &= 0x7fffffff; \ 135*25c28e83SPiotr Jasiukajtis hy##I &= 0x7fffffff; \ 136*25c28e83SPiotr Jasiukajtis pz##I = pz; \ 137*25c28e83SPiotr Jasiukajtis if (hx##I >= 0x7ff00000 || hy##I >= 0x7ff00000) /* |X| or |Y| = Inf,NaN */ \ 138*25c28e83SPiotr Jasiukajtis { \ 139*25c28e83SPiotr Jasiukajtis lx = LO(px); \ 140*25c28e83SPiotr Jasiukajtis ly = LO(py); \ 141*25c28e83SPiotr Jasiukajtis x = *px; \ 142*25c28e83SPiotr Jasiukajtis y = *py; \ 143*25c28e83SPiotr Jasiukajtis if (hx##I == 0x7ff00000 && lx == 0) res0 = 0.0; /* |X| = Inf */ \ 144*25c28e83SPiotr Jasiukajtis else if (hy##I == 0x7ff00000 && ly == 0) res0 = 0.0; /* |Y| = Inf */ \ 145*25c28e83SPiotr Jasiukajtis else res0 = fabs(x) + fabs(y); \ 146*25c28e83SPiotr Jasiukajtis \ 147*25c28e83SPiotr Jasiukajtis RETURN (I, res0) \ 148*25c28e83SPiotr Jasiukajtis } \ 149*25c28e83SPiotr Jasiukajtis x##I = *px; \ 150*25c28e83SPiotr Jasiukajtis y##I = *py; \ 151*25c28e83SPiotr Jasiukajtis diff0 = hy##I - hx##I; \ 152*25c28e83SPiotr Jasiukajtis j0 = diff0 >> 31; \ 153*25c28e83SPiotr Jasiukajtis if (hx##I < 0x00100000 && hy##I < 0x00100000) /* |X| and |Y| = subnormal or zero */ \ 154*25c28e83SPiotr Jasiukajtis { \ 155*25c28e83SPiotr Jasiukajtis lx = LO(px); \ 156*25c28e83SPiotr Jasiukajtis ly = LO(py); \ 157*25c28e83SPiotr Jasiukajtis x = x##I; \ 158*25c28e83SPiotr Jasiukajtis y = y##I; \ 159*25c28e83SPiotr Jasiukajtis \ 160*25c28e83SPiotr Jasiukajtis if ((hx##I | hy##I | lx | ly) == 0) /* |X| and |Y| = 0 */ \ 161*25c28e83SPiotr Jasiukajtis RETURN (I, DONE / 0.0) \ 162*25c28e83SPiotr Jasiukajtis \ 163*25c28e83SPiotr Jasiukajtis x = fabs(x); \ 164*25c28e83SPiotr Jasiukajtis y = fabs(y); \ 165*25c28e83SPiotr Jasiukajtis \ 166*25c28e83SPiotr Jasiukajtis x = *(long long*)&x; \ 167*25c28e83SPiotr Jasiukajtis y = *(long long*)&y; \ 168*25c28e83SPiotr Jasiukajtis \ 169*25c28e83SPiotr Jasiukajtis x *= D2ONM52; \ 170*25c28e83SPiotr Jasiukajtis y *= D2ONM52; \ 171*25c28e83SPiotr Jasiukajtis \ 172*25c28e83SPiotr Jasiukajtis x_hi0 = (x + D2ON36) - D2ON36; \ 173*25c28e83SPiotr Jasiukajtis y_hi0 = (y + D2ON36) - D2ON36; \ 174*25c28e83SPiotr Jasiukajtis x_lo0 = x - x_hi0; \ 175*25c28e83SPiotr Jasiukajtis y_lo0 = y - y_hi0; \ 176*25c28e83SPiotr Jasiukajtis res0_hi = (x_hi0 * x_hi0 + y_hi0 * y_hi0); \ 177*25c28e83SPiotr Jasiukajtis res0_lo = ((x + x_hi0) * x_lo0 + (y + y_hi0) * y_lo0); \ 178*25c28e83SPiotr Jasiukajtis \ 179*25c28e83SPiotr Jasiukajtis dres0 = res0_hi + res0_lo; \ 180*25c28e83SPiotr Jasiukajtis \ 181*25c28e83SPiotr Jasiukajtis iarr0 = HI(&dres0); \ 182*25c28e83SPiotr Jasiukajtis iexp0 = iarr0 & 0xfff00000; \ 183*25c28e83SPiotr Jasiukajtis \ 184*25c28e83SPiotr Jasiukajtis iarr0 = (iarr0 >> 11) & 0x1fc; \ 185*25c28e83SPiotr Jasiukajtis itbl0 = ((int*)((char*)__vlibm_TBL_rhypot + iarr0))[0]; \ 186*25c28e83SPiotr Jasiukajtis itbl0 -= iexp0; \ 187*25c28e83SPiotr Jasiukajtis HI(&dd0) = itbl0; \ 188*25c28e83SPiotr Jasiukajtis LO(&dd0) = 0; \ 189*25c28e83SPiotr Jasiukajtis \ 190*25c28e83SPiotr Jasiukajtis dd0 = dd0 * (DTWO - dd0 * dres0); \ 191*25c28e83SPiotr Jasiukajtis dd0 = dd0 * (DTWO - dd0 * dres0); \ 192*25c28e83SPiotr Jasiukajtis dres0 = dd0 * (DTWO - dd0 * dres0); \ 193*25c28e83SPiotr Jasiukajtis \ 194*25c28e83SPiotr Jasiukajtis HI(&res0) = HI(&dres0) & 0xffffff00; \ 195*25c28e83SPiotr Jasiukajtis LO(&res0) = 0; \ 196*25c28e83SPiotr Jasiukajtis res0 += (DONE - res0_hi * res0 - res0_lo * res0) * dres0; \ 197*25c28e83SPiotr Jasiukajtis res0 = sqrt (res0); \ 198*25c28e83SPiotr Jasiukajtis \ 199*25c28e83SPiotr Jasiukajtis res0 = D2ON1022 * res0; \ 200*25c28e83SPiotr Jasiukajtis RETURN (I, res0) \ 201*25c28e83SPiotr Jasiukajtis } \ 202*25c28e83SPiotr Jasiukajtis j0 = hy##I - (diff0 & j0); \ 203*25c28e83SPiotr Jasiukajtis j0 &= 0x7ff00000; \ 204*25c28e83SPiotr Jasiukajtis HI(&scl##I) = 0x7ff00000 - j0; 205*25c28e83SPiotr Jasiukajtis 206*25c28e83SPiotr Jasiukajtis void 207*25c28e83SPiotr Jasiukajtis __vrhypot(int n, double * restrict px, int stridex, double * restrict py, 208*25c28e83SPiotr Jasiukajtis int stridey, double * restrict pz, int stridez) 209*25c28e83SPiotr Jasiukajtis { 210*25c28e83SPiotr Jasiukajtis int i = 0; 211*25c28e83SPiotr Jasiukajtis double x, y; 212*25c28e83SPiotr Jasiukajtis double x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0; 213*25c28e83SPiotr Jasiukajtis double x0, y0, res0, dd0; 214*25c28e83SPiotr Jasiukajtis double res0_hi,res0_lo, dres0; 215*25c28e83SPiotr Jasiukajtis double x_hi1, x_lo1, y_hi1, y_lo1, scl1 = 0; 216*25c28e83SPiotr Jasiukajtis double x1 = 0.0L, y1 = 0.0L, res1, dd1; 217*25c28e83SPiotr Jasiukajtis double res1_hi,res1_lo, dres1; 218*25c28e83SPiotr Jasiukajtis double x_hi2, x_lo2, y_hi2, y_lo2, scl2 = 0; 219*25c28e83SPiotr Jasiukajtis double x2, y2, res2, dd2; 220*25c28e83SPiotr Jasiukajtis double res2_hi,res2_lo, dres2; 221*25c28e83SPiotr Jasiukajtis 222*25c28e83SPiotr Jasiukajtis int hx0, hy0, j0, diff0; 223*25c28e83SPiotr Jasiukajtis int iarr0, iexp0, itbl0; 224*25c28e83SPiotr Jasiukajtis int hx1, hy1; 225*25c28e83SPiotr Jasiukajtis int iarr1, iexp1, itbl1; 226*25c28e83SPiotr Jasiukajtis int hx2, hy2; 227*25c28e83SPiotr Jasiukajtis int iarr2, iexp2, itbl2; 228*25c28e83SPiotr Jasiukajtis 229*25c28e83SPiotr Jasiukajtis int lx, ly; 230*25c28e83SPiotr Jasiukajtis 231*25c28e83SPiotr Jasiukajtis double DONE = ((double*)LCONST)[0]; 232*25c28e83SPiotr Jasiukajtis double DTWO = ((double*)LCONST)[1]; 233*25c28e83SPiotr Jasiukajtis double D2ON36 = ((double*)LCONST)[2]; 234*25c28e83SPiotr Jasiukajtis double D2ON1022 = ((double*)LCONST)[3]; 235*25c28e83SPiotr Jasiukajtis double D2ONM52 = ((double*)LCONST)[4]; 236*25c28e83SPiotr Jasiukajtis 237*25c28e83SPiotr Jasiukajtis double *pz0, *pz1 = 0, *pz2; 238*25c28e83SPiotr Jasiukajtis 239*25c28e83SPiotr Jasiukajtis do 240*25c28e83SPiotr Jasiukajtis { 241*25c28e83SPiotr Jasiukajtis start0: 242*25c28e83SPiotr Jasiukajtis PREP(0) 243*25c28e83SPiotr Jasiukajtis px += stridex; 244*25c28e83SPiotr Jasiukajtis py += stridey; 245*25c28e83SPiotr Jasiukajtis pz += stridez; 246*25c28e83SPiotr Jasiukajtis i = 1; 247*25c28e83SPiotr Jasiukajtis if (--n <= 0) 248*25c28e83SPiotr Jasiukajtis break; 249*25c28e83SPiotr Jasiukajtis 250*25c28e83SPiotr Jasiukajtis start1: 251*25c28e83SPiotr Jasiukajtis PREP(1) 252*25c28e83SPiotr Jasiukajtis px += stridex; 253*25c28e83SPiotr Jasiukajtis py += stridey; 254*25c28e83SPiotr Jasiukajtis pz += stridez; 255*25c28e83SPiotr Jasiukajtis i = 2; 256*25c28e83SPiotr Jasiukajtis if (--n <= 0) 257*25c28e83SPiotr Jasiukajtis break; 258*25c28e83SPiotr Jasiukajtis 259*25c28e83SPiotr Jasiukajtis start2: 260*25c28e83SPiotr Jasiukajtis PREP(2) 261*25c28e83SPiotr Jasiukajtis 262*25c28e83SPiotr Jasiukajtis x0 *= scl0; 263*25c28e83SPiotr Jasiukajtis y0 *= scl0; 264*25c28e83SPiotr Jasiukajtis x1 *= scl1; 265*25c28e83SPiotr Jasiukajtis y1 *= scl1; 266*25c28e83SPiotr Jasiukajtis x2 *= scl2; 267*25c28e83SPiotr Jasiukajtis y2 *= scl2; 268*25c28e83SPiotr Jasiukajtis 269*25c28e83SPiotr Jasiukajtis x_hi0 = (x0 + D2ON36) - D2ON36; 270*25c28e83SPiotr Jasiukajtis y_hi0 = (y0 + D2ON36) - D2ON36; 271*25c28e83SPiotr Jasiukajtis x_hi1 = (x1 + D2ON36) - D2ON36; 272*25c28e83SPiotr Jasiukajtis y_hi1 = (y1 + D2ON36) - D2ON36; 273*25c28e83SPiotr Jasiukajtis x_hi2 = (x2 + D2ON36) - D2ON36; 274*25c28e83SPiotr Jasiukajtis y_hi2 = (y2 + D2ON36) - D2ON36; 275*25c28e83SPiotr Jasiukajtis x_lo0 = x0 - x_hi0; 276*25c28e83SPiotr Jasiukajtis y_lo0 = y0 - y_hi0; 277*25c28e83SPiotr Jasiukajtis x_lo1 = x1 - x_hi1; 278*25c28e83SPiotr Jasiukajtis y_lo1 = y1 - y_hi1; 279*25c28e83SPiotr Jasiukajtis x_lo2 = x2 - x_hi2; 280*25c28e83SPiotr Jasiukajtis y_lo2 = y2 - y_hi2; 281*25c28e83SPiotr Jasiukajtis res0_hi = (x_hi0 * x_hi0 + y_hi0 * y_hi0); 282*25c28e83SPiotr Jasiukajtis res1_hi = (x_hi1 * x_hi1 + y_hi1 * y_hi1); 283*25c28e83SPiotr Jasiukajtis res2_hi = (x_hi2 * x_hi2 + y_hi2 * y_hi2); 284*25c28e83SPiotr Jasiukajtis res0_lo = ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0); 285*25c28e83SPiotr Jasiukajtis res1_lo = ((x1 + x_hi1) * x_lo1 + (y1 + y_hi1) * y_lo1); 286*25c28e83SPiotr Jasiukajtis res2_lo = ((x2 + x_hi2) * x_lo2 + (y2 + y_hi2) * y_lo2); 287*25c28e83SPiotr Jasiukajtis 288*25c28e83SPiotr Jasiukajtis dres0 = res0_hi + res0_lo; 289*25c28e83SPiotr Jasiukajtis dres1 = res1_hi + res1_lo; 290*25c28e83SPiotr Jasiukajtis dres2 = res2_hi + res2_lo; 291*25c28e83SPiotr Jasiukajtis 292*25c28e83SPiotr Jasiukajtis iarr0 = HI(&dres0); 293*25c28e83SPiotr Jasiukajtis iarr1 = HI(&dres1); 294*25c28e83SPiotr Jasiukajtis iarr2 = HI(&dres2); 295*25c28e83SPiotr Jasiukajtis iexp0 = iarr0 & 0xfff00000; 296*25c28e83SPiotr Jasiukajtis iexp1 = iarr1 & 0xfff00000; 297*25c28e83SPiotr Jasiukajtis iexp2 = iarr2 & 0xfff00000; 298*25c28e83SPiotr Jasiukajtis 299*25c28e83SPiotr Jasiukajtis iarr0 = (iarr0 >> 11) & 0x1fc; 300*25c28e83SPiotr Jasiukajtis iarr1 = (iarr1 >> 11) & 0x1fc; 301*25c28e83SPiotr Jasiukajtis iarr2 = (iarr2 >> 11) & 0x1fc; 302*25c28e83SPiotr Jasiukajtis itbl0 = ((int*)((char*)__vlibm_TBL_rhypot + iarr0))[0]; 303*25c28e83SPiotr Jasiukajtis itbl1 = ((int*)((char*)__vlibm_TBL_rhypot + iarr1))[0]; 304*25c28e83SPiotr Jasiukajtis itbl2 = ((int*)((char*)__vlibm_TBL_rhypot + iarr2))[0]; 305*25c28e83SPiotr Jasiukajtis itbl0 -= iexp0; 306*25c28e83SPiotr Jasiukajtis itbl1 -= iexp1; 307*25c28e83SPiotr Jasiukajtis itbl2 -= iexp2; 308*25c28e83SPiotr Jasiukajtis HI(&dd0) = itbl0; 309*25c28e83SPiotr Jasiukajtis HI(&dd1) = itbl1; 310*25c28e83SPiotr Jasiukajtis HI(&dd2) = itbl2; 311*25c28e83SPiotr Jasiukajtis LO(&dd0) = 0; 312*25c28e83SPiotr Jasiukajtis LO(&dd1) = 0; 313*25c28e83SPiotr Jasiukajtis LO(&dd2) = 0; 314*25c28e83SPiotr Jasiukajtis 315*25c28e83SPiotr Jasiukajtis dd0 = dd0 * (DTWO - dd0 * dres0); 316*25c28e83SPiotr Jasiukajtis dd1 = dd1 * (DTWO - dd1 * dres1); 317*25c28e83SPiotr Jasiukajtis dd2 = dd2 * (DTWO - dd2 * dres2); 318*25c28e83SPiotr Jasiukajtis dd0 = dd0 * (DTWO - dd0 * dres0); 319*25c28e83SPiotr Jasiukajtis dd1 = dd1 * (DTWO - dd1 * dres1); 320*25c28e83SPiotr Jasiukajtis dd2 = dd2 * (DTWO - dd2 * dres2); 321*25c28e83SPiotr Jasiukajtis dres0 = dd0 * (DTWO - dd0 * dres0); 322*25c28e83SPiotr Jasiukajtis dres1 = dd1 * (DTWO - dd1 * dres1); 323*25c28e83SPiotr Jasiukajtis dres2 = dd2 * (DTWO - dd2 * dres2); 324*25c28e83SPiotr Jasiukajtis 325*25c28e83SPiotr Jasiukajtis HI(&res0) = HI(&dres0) & 0xffffff00; 326*25c28e83SPiotr Jasiukajtis HI(&res1) = HI(&dres1) & 0xffffff00; 327*25c28e83SPiotr Jasiukajtis HI(&res2) = HI(&dres2) & 0xffffff00; 328*25c28e83SPiotr Jasiukajtis LO(&res0) = 0; 329*25c28e83SPiotr Jasiukajtis LO(&res1) = 0; 330*25c28e83SPiotr Jasiukajtis LO(&res2) = 0; 331*25c28e83SPiotr Jasiukajtis res0 += (DONE - res0_hi * res0 - res0_lo * res0) * dres0; 332*25c28e83SPiotr Jasiukajtis res1 += (DONE - res1_hi * res1 - res1_lo * res1) * dres1; 333*25c28e83SPiotr Jasiukajtis res2 += (DONE - res2_hi * res2 - res2_lo * res2) * dres2; 334*25c28e83SPiotr Jasiukajtis res0 = sqrt (res0); 335*25c28e83SPiotr Jasiukajtis res1 = sqrt (res1); 336*25c28e83SPiotr Jasiukajtis res2 = sqrt (res2); 337*25c28e83SPiotr Jasiukajtis 338*25c28e83SPiotr Jasiukajtis res0 = scl0 * res0; 339*25c28e83SPiotr Jasiukajtis res1 = scl1 * res1; 340*25c28e83SPiotr Jasiukajtis res2 = scl2 * res2; 341*25c28e83SPiotr Jasiukajtis 342*25c28e83SPiotr Jasiukajtis *pz0 = res0; 343*25c28e83SPiotr Jasiukajtis *pz1 = res1; 344*25c28e83SPiotr Jasiukajtis *pz2 = res2; 345*25c28e83SPiotr Jasiukajtis 346*25c28e83SPiotr Jasiukajtis px += stridex; 347*25c28e83SPiotr Jasiukajtis py += stridey; 348*25c28e83SPiotr Jasiukajtis pz += stridez; 349*25c28e83SPiotr Jasiukajtis i = 0; 350*25c28e83SPiotr Jasiukajtis 351*25c28e83SPiotr Jasiukajtis } while (--n > 0); 352*25c28e83SPiotr Jasiukajtis 353*25c28e83SPiotr Jasiukajtis if (i > 0) 354*25c28e83SPiotr Jasiukajtis { 355*25c28e83SPiotr Jasiukajtis x0 *= scl0; 356*25c28e83SPiotr Jasiukajtis y0 *= scl0; 357*25c28e83SPiotr Jasiukajtis 358*25c28e83SPiotr Jasiukajtis x_hi0 = (x0 + D2ON36) - D2ON36; 359*25c28e83SPiotr Jasiukajtis y_hi0 = (y0 + D2ON36) - D2ON36; 360*25c28e83SPiotr Jasiukajtis x_lo0 = x0 - x_hi0; 361*25c28e83SPiotr Jasiukajtis y_lo0 = y0 - y_hi0; 362*25c28e83SPiotr Jasiukajtis res0_hi = (x_hi0 * x_hi0 + y_hi0 * y_hi0); 363*25c28e83SPiotr Jasiukajtis res0_lo = ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0); 364*25c28e83SPiotr Jasiukajtis 365*25c28e83SPiotr Jasiukajtis dres0 = res0_hi + res0_lo; 366*25c28e83SPiotr Jasiukajtis 367*25c28e83SPiotr Jasiukajtis iarr0 = HI(&dres0); 368*25c28e83SPiotr Jasiukajtis iexp0 = iarr0 & 0xfff00000; 369*25c28e83SPiotr Jasiukajtis 370*25c28e83SPiotr Jasiukajtis iarr0 = (iarr0 >> 11) & 0x1fc; 371*25c28e83SPiotr Jasiukajtis itbl0 = ((int*)((char*)__vlibm_TBL_rhypot + iarr0))[0]; 372*25c28e83SPiotr Jasiukajtis itbl0 -= iexp0; 373*25c28e83SPiotr Jasiukajtis HI(&dd0) = itbl0; 374*25c28e83SPiotr Jasiukajtis LO(&dd0) = 0; 375*25c28e83SPiotr Jasiukajtis 376*25c28e83SPiotr Jasiukajtis dd0 = dd0 * (DTWO - dd0 * dres0); 377*25c28e83SPiotr Jasiukajtis dd0 = dd0 * (DTWO - dd0 * dres0); 378*25c28e83SPiotr Jasiukajtis dres0 = dd0 * (DTWO - dd0 * dres0); 379*25c28e83SPiotr Jasiukajtis 380*25c28e83SPiotr Jasiukajtis HI(&res0) = HI(&dres0) & 0xffffff00; 381*25c28e83SPiotr Jasiukajtis LO(&res0) = 0; 382*25c28e83SPiotr Jasiukajtis res0 += (DONE - res0_hi * res0 - res0_lo * res0) * dres0; 383*25c28e83SPiotr Jasiukajtis res0 = sqrt (res0); 384*25c28e83SPiotr Jasiukajtis 385*25c28e83SPiotr Jasiukajtis res0 = scl0 * res0; 386*25c28e83SPiotr Jasiukajtis 387*25c28e83SPiotr Jasiukajtis *pz0 = res0; 388*25c28e83SPiotr Jasiukajtis 389*25c28e83SPiotr Jasiukajtis if (i > 1) 390*25c28e83SPiotr Jasiukajtis { 391*25c28e83SPiotr Jasiukajtis x1 *= scl1; 392*25c28e83SPiotr Jasiukajtis y1 *= scl1; 393*25c28e83SPiotr Jasiukajtis 394*25c28e83SPiotr Jasiukajtis x_hi1 = (x1 + D2ON36) - D2ON36; 395*25c28e83SPiotr Jasiukajtis y_hi1 = (y1 + D2ON36) - D2ON36; 396*25c28e83SPiotr Jasiukajtis x_lo1 = x1 - x_hi1; 397*25c28e83SPiotr Jasiukajtis y_lo1 = y1 - y_hi1; 398*25c28e83SPiotr Jasiukajtis res1_hi = (x_hi1 * x_hi1 + y_hi1 * y_hi1); 399*25c28e83SPiotr Jasiukajtis res1_lo = ((x1 + x_hi1) * x_lo1 + (y1 + y_hi1) * y_lo1); 400*25c28e83SPiotr Jasiukajtis 401*25c28e83SPiotr Jasiukajtis dres1 = res1_hi + res1_lo; 402*25c28e83SPiotr Jasiukajtis 403*25c28e83SPiotr Jasiukajtis iarr1 = HI(&dres1); 404*25c28e83SPiotr Jasiukajtis iexp1 = iarr1 & 0xfff00000; 405*25c28e83SPiotr Jasiukajtis 406*25c28e83SPiotr Jasiukajtis iarr1 = (iarr1 >> 11) & 0x1fc; 407*25c28e83SPiotr Jasiukajtis itbl1 = ((int*)((char*)__vlibm_TBL_rhypot + iarr1))[0]; 408*25c28e83SPiotr Jasiukajtis itbl1 -= iexp1; 409*25c28e83SPiotr Jasiukajtis HI(&dd1) = itbl1; 410*25c28e83SPiotr Jasiukajtis LO(&dd1) = 0; 411*25c28e83SPiotr Jasiukajtis 412*25c28e83SPiotr Jasiukajtis dd1 = dd1 * (DTWO - dd1 * dres1); 413*25c28e83SPiotr Jasiukajtis dd1 = dd1 * (DTWO - dd1 * dres1); 414*25c28e83SPiotr Jasiukajtis dres1 = dd1 * (DTWO - dd1 * dres1); 415*25c28e83SPiotr Jasiukajtis 416*25c28e83SPiotr Jasiukajtis HI(&res1) = HI(&dres1) & 0xffffff00; 417*25c28e83SPiotr Jasiukajtis LO(&res1) = 0; 418*25c28e83SPiotr Jasiukajtis res1 += (DONE - res1_hi * res1 - res1_lo * res1) * dres1; 419*25c28e83SPiotr Jasiukajtis res1 = sqrt (res1); 420*25c28e83SPiotr Jasiukajtis 421*25c28e83SPiotr Jasiukajtis res1 = scl1 * res1; 422*25c28e83SPiotr Jasiukajtis 423*25c28e83SPiotr Jasiukajtis *pz1 = res1; 424*25c28e83SPiotr Jasiukajtis } 425*25c28e83SPiotr Jasiukajtis } 426*25c28e83SPiotr Jasiukajtis } 427