1*5b2ba9d3SPiotr Jasiukajtis /*
2*5b2ba9d3SPiotr Jasiukajtis * CDDL HEADER START
3*5b2ba9d3SPiotr Jasiukajtis *
4*5b2ba9d3SPiotr Jasiukajtis * The contents of this file are subject to the terms of the
5*5b2ba9d3SPiotr Jasiukajtis * Common Development and Distribution License (the "License").
6*5b2ba9d3SPiotr Jasiukajtis * You may not use this file except in compliance with the License.
7*5b2ba9d3SPiotr Jasiukajtis *
8*5b2ba9d3SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*5b2ba9d3SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing.
10*5b2ba9d3SPiotr Jasiukajtis * See the License for the specific language governing permissions
11*5b2ba9d3SPiotr Jasiukajtis * and limitations under the License.
12*5b2ba9d3SPiotr Jasiukajtis *
13*5b2ba9d3SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each
14*5b2ba9d3SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*5b2ba9d3SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the
16*5b2ba9d3SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying
17*5b2ba9d3SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner]
18*5b2ba9d3SPiotr Jasiukajtis *
19*5b2ba9d3SPiotr Jasiukajtis * CDDL HEADER END
20*5b2ba9d3SPiotr Jasiukajtis */
21*5b2ba9d3SPiotr Jasiukajtis
22*5b2ba9d3SPiotr Jasiukajtis /*
23*5b2ba9d3SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24*5b2ba9d3SPiotr Jasiukajtis */
25*5b2ba9d3SPiotr Jasiukajtis /*
26*5b2ba9d3SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27*5b2ba9d3SPiotr Jasiukajtis * Use is subject to license terms.
28*5b2ba9d3SPiotr Jasiukajtis */
29*5b2ba9d3SPiotr Jasiukajtis
30*5b2ba9d3SPiotr Jasiukajtis #include <sys/isa_defs.h>
31*5b2ba9d3SPiotr Jasiukajtis #include "libm_inlines.h"
32*5b2ba9d3SPiotr Jasiukajtis
33*5b2ba9d3SPiotr Jasiukajtis #ifdef _LITTLE_ENDIAN
34*5b2ba9d3SPiotr Jasiukajtis #define HI(x) *(1+(int*)x)
35*5b2ba9d3SPiotr Jasiukajtis #define LO(x) *(unsigned*)x
36*5b2ba9d3SPiotr Jasiukajtis #else
37*5b2ba9d3SPiotr Jasiukajtis #define HI(x) *(int*)x
38*5b2ba9d3SPiotr Jasiukajtis #define LO(x) *(1+(unsigned*)x)
39*5b2ba9d3SPiotr Jasiukajtis #endif
40*5b2ba9d3SPiotr Jasiukajtis
41*5b2ba9d3SPiotr Jasiukajtis #ifdef __RESTRICT
42*5b2ba9d3SPiotr Jasiukajtis #define restrict _Restrict
43*5b2ba9d3SPiotr Jasiukajtis #else
44*5b2ba9d3SPiotr Jasiukajtis #define restrict
45*5b2ba9d3SPiotr Jasiukajtis #endif
46*5b2ba9d3SPiotr Jasiukajtis
47*5b2ba9d3SPiotr Jasiukajtis /* double rhypot(double x, double y)
48*5b2ba9d3SPiotr Jasiukajtis *
49*5b2ba9d3SPiotr Jasiukajtis * Method :
50*5b2ba9d3SPiotr Jasiukajtis * 1. Special cases:
51*5b2ba9d3SPiotr Jasiukajtis * x or y = Inf => 0
52*5b2ba9d3SPiotr Jasiukajtis * x or y = NaN => QNaN
53*5b2ba9d3SPiotr Jasiukajtis * x and y = 0 => Inf + divide-by-zero
54*5b2ba9d3SPiotr Jasiukajtis * 2. Computes rhypot(x,y):
55*5b2ba9d3SPiotr Jasiukajtis * rhypot(x,y) = m * sqrt(1/(xnm * xnm + ynm * ynm))
56*5b2ba9d3SPiotr Jasiukajtis * Where:
57*5b2ba9d3SPiotr Jasiukajtis * m = 1/max(|x|,|y|)
58*5b2ba9d3SPiotr Jasiukajtis * xnm = x * m
59*5b2ba9d3SPiotr Jasiukajtis * ynm = y * m
60*5b2ba9d3SPiotr Jasiukajtis *
61*5b2ba9d3SPiotr Jasiukajtis * Compute 1/(xnm * xnm + ynm * ynm) by simulating
62*5b2ba9d3SPiotr Jasiukajtis * muti-precision arithmetic.
63*5b2ba9d3SPiotr Jasiukajtis *
64*5b2ba9d3SPiotr Jasiukajtis * Accuracy:
65*5b2ba9d3SPiotr Jasiukajtis * Maximum error observed: less than 0.869 ulp after 1.000.000.000
66*5b2ba9d3SPiotr Jasiukajtis * results.
67*5b2ba9d3SPiotr Jasiukajtis */
68*5b2ba9d3SPiotr Jasiukajtis
69*5b2ba9d3SPiotr Jasiukajtis extern double sqrt(double);
70*5b2ba9d3SPiotr Jasiukajtis extern double fabs(double);
71*5b2ba9d3SPiotr Jasiukajtis
72*5b2ba9d3SPiotr Jasiukajtis static const int __vlibm_TBL_rhypot[] = {
73*5b2ba9d3SPiotr Jasiukajtis /* i = [0,127]
74*5b2ba9d3SPiotr Jasiukajtis * TBL[i] = 0x3ff00000 + *(int*)&(1.0 / *(double*)&(0x3ff0000000000000ULL + (i << 45))); */
75*5b2ba9d3SPiotr Jasiukajtis 0x7fe00000, 0x7fdfc07f, 0x7fdf81f8, 0x7fdf4465,
76*5b2ba9d3SPiotr Jasiukajtis 0x7fdf07c1, 0x7fdecc07, 0x7fde9131, 0x7fde573a,
77*5b2ba9d3SPiotr Jasiukajtis 0x7fde1e1e, 0x7fdde5d6, 0x7fddae60, 0x7fdd77b6,
78*5b2ba9d3SPiotr Jasiukajtis 0x7fdd41d4, 0x7fdd0cb5, 0x7fdcd856, 0x7fdca4b3,
79*5b2ba9d3SPiotr Jasiukajtis 0x7fdc71c7, 0x7fdc3f8f, 0x7fdc0e07, 0x7fdbdd2b,
80*5b2ba9d3SPiotr Jasiukajtis 0x7fdbacf9, 0x7fdb7d6c, 0x7fdb4e81, 0x7fdb2036,
81*5b2ba9d3SPiotr Jasiukajtis 0x7fdaf286, 0x7fdac570, 0x7fda98ef, 0x7fda6d01,
82*5b2ba9d3SPiotr Jasiukajtis 0x7fda41a4, 0x7fda16d3, 0x7fd9ec8e, 0x7fd9c2d1,
83*5b2ba9d3SPiotr Jasiukajtis 0x7fd99999, 0x7fd970e4, 0x7fd948b0, 0x7fd920fb,
84*5b2ba9d3SPiotr Jasiukajtis 0x7fd8f9c1, 0x7fd8d301, 0x7fd8acb9, 0x7fd886e5,
85*5b2ba9d3SPiotr Jasiukajtis 0x7fd86186, 0x7fd83c97, 0x7fd81818, 0x7fd7f405,
86*5b2ba9d3SPiotr Jasiukajtis 0x7fd7d05f, 0x7fd7ad22, 0x7fd78a4c, 0x7fd767dc,
87*5b2ba9d3SPiotr Jasiukajtis 0x7fd745d1, 0x7fd72428, 0x7fd702e0, 0x7fd6e1f7,
88*5b2ba9d3SPiotr Jasiukajtis 0x7fd6c16c, 0x7fd6a13c, 0x7fd68168, 0x7fd661ec,
89*5b2ba9d3SPiotr Jasiukajtis 0x7fd642c8, 0x7fd623fa, 0x7fd60581, 0x7fd5e75b,
90*5b2ba9d3SPiotr Jasiukajtis 0x7fd5c988, 0x7fd5ac05, 0x7fd58ed2, 0x7fd571ed,
91*5b2ba9d3SPiotr Jasiukajtis 0x7fd55555, 0x7fd53909, 0x7fd51d07, 0x7fd50150,
92*5b2ba9d3SPiotr Jasiukajtis 0x7fd4e5e0, 0x7fd4cab8, 0x7fd4afd6, 0x7fd49539,
93*5b2ba9d3SPiotr Jasiukajtis 0x7fd47ae1, 0x7fd460cb, 0x7fd446f8, 0x7fd42d66,
94*5b2ba9d3SPiotr Jasiukajtis 0x7fd41414, 0x7fd3fb01, 0x7fd3e22c, 0x7fd3c995,
95*5b2ba9d3SPiotr Jasiukajtis 0x7fd3b13b, 0x7fd3991c, 0x7fd38138, 0x7fd3698d,
96*5b2ba9d3SPiotr Jasiukajtis 0x7fd3521c, 0x7fd33ae4, 0x7fd323e3, 0x7fd30d19,
97*5b2ba9d3SPiotr Jasiukajtis 0x7fd2f684, 0x7fd2e025, 0x7fd2c9fb, 0x7fd2b404,
98*5b2ba9d3SPiotr Jasiukajtis 0x7fd29e41, 0x7fd288b0, 0x7fd27350, 0x7fd25e22,
99*5b2ba9d3SPiotr Jasiukajtis 0x7fd24924, 0x7fd23456, 0x7fd21fb7, 0x7fd20b47,
100*5b2ba9d3SPiotr Jasiukajtis 0x7fd1f704, 0x7fd1e2ef, 0x7fd1cf06, 0x7fd1bb4a,
101*5b2ba9d3SPiotr Jasiukajtis 0x7fd1a7b9, 0x7fd19453, 0x7fd18118, 0x7fd16e06,
102*5b2ba9d3SPiotr Jasiukajtis 0x7fd15b1e, 0x7fd1485f, 0x7fd135c8, 0x7fd12358,
103*5b2ba9d3SPiotr Jasiukajtis 0x7fd11111, 0x7fd0fef0, 0x7fd0ecf5, 0x7fd0db20,
104*5b2ba9d3SPiotr Jasiukajtis 0x7fd0c971, 0x7fd0b7e6, 0x7fd0a681, 0x7fd0953f,
105*5b2ba9d3SPiotr Jasiukajtis 0x7fd08421, 0x7fd07326, 0x7fd0624d, 0x7fd05197,
106*5b2ba9d3SPiotr Jasiukajtis 0x7fd04104, 0x7fd03091, 0x7fd02040, 0x7fd01010,
107*5b2ba9d3SPiotr Jasiukajtis };
108*5b2ba9d3SPiotr Jasiukajtis
109*5b2ba9d3SPiotr Jasiukajtis static const unsigned long long LCONST[] = {
110*5b2ba9d3SPiotr Jasiukajtis 0x3ff0000000000000ULL, /* DONE = 1.0 */
111*5b2ba9d3SPiotr Jasiukajtis 0x4000000000000000ULL, /* DTWO = 2.0 */
112*5b2ba9d3SPiotr Jasiukajtis 0x4230000000000000ULL, /* D2ON36 = 2**36 */
113*5b2ba9d3SPiotr Jasiukajtis 0x7fd0000000000000ULL, /* D2ON1022 = 2**1022 */
114*5b2ba9d3SPiotr Jasiukajtis 0x3cb0000000000000ULL, /* D2ONM52 = 2**-52 */
115*5b2ba9d3SPiotr Jasiukajtis };
116*5b2ba9d3SPiotr Jasiukajtis
117*5b2ba9d3SPiotr Jasiukajtis #define RET_SC(I) \
118*5b2ba9d3SPiotr Jasiukajtis px += stridex; \
119*5b2ba9d3SPiotr Jasiukajtis py += stridey; \
120*5b2ba9d3SPiotr Jasiukajtis pz += stridez; \
121*5b2ba9d3SPiotr Jasiukajtis if (--n <= 0) \
122*5b2ba9d3SPiotr Jasiukajtis break; \
123*5b2ba9d3SPiotr Jasiukajtis goto start##I;
124*5b2ba9d3SPiotr Jasiukajtis
125*5b2ba9d3SPiotr Jasiukajtis #define RETURN(I, ret) \
126*5b2ba9d3SPiotr Jasiukajtis { \
127*5b2ba9d3SPiotr Jasiukajtis pz[0] = (ret); \
128*5b2ba9d3SPiotr Jasiukajtis RET_SC(I) \
129*5b2ba9d3SPiotr Jasiukajtis }
130*5b2ba9d3SPiotr Jasiukajtis
131*5b2ba9d3SPiotr Jasiukajtis #define PREP(I) \
132*5b2ba9d3SPiotr Jasiukajtis hx##I = HI(px); \
133*5b2ba9d3SPiotr Jasiukajtis hy##I = HI(py); \
134*5b2ba9d3SPiotr Jasiukajtis hx##I &= 0x7fffffff; \
135*5b2ba9d3SPiotr Jasiukajtis hy##I &= 0x7fffffff; \
136*5b2ba9d3SPiotr Jasiukajtis pz##I = pz; \
137*5b2ba9d3SPiotr Jasiukajtis if (hx##I >= 0x7ff00000 || hy##I >= 0x7ff00000) /* |X| or |Y| = Inf,NaN */ \
138*5b2ba9d3SPiotr Jasiukajtis { \
139*5b2ba9d3SPiotr Jasiukajtis lx = LO(px); \
140*5b2ba9d3SPiotr Jasiukajtis ly = LO(py); \
141*5b2ba9d3SPiotr Jasiukajtis x = *px; \
142*5b2ba9d3SPiotr Jasiukajtis y = *py; \
143*5b2ba9d3SPiotr Jasiukajtis if (hx##I == 0x7ff00000 && lx == 0) res0 = 0.0; /* |X| = Inf */ \
144*5b2ba9d3SPiotr Jasiukajtis else if (hy##I == 0x7ff00000 && ly == 0) res0 = 0.0; /* |Y| = Inf */ \
145*5b2ba9d3SPiotr Jasiukajtis else res0 = fabs(x) + fabs(y); \
146*5b2ba9d3SPiotr Jasiukajtis \
147*5b2ba9d3SPiotr Jasiukajtis RETURN (I, res0) \
148*5b2ba9d3SPiotr Jasiukajtis } \
149*5b2ba9d3SPiotr Jasiukajtis x##I = *px; \
150*5b2ba9d3SPiotr Jasiukajtis y##I = *py; \
151*5b2ba9d3SPiotr Jasiukajtis diff0 = hy##I - hx##I; \
152*5b2ba9d3SPiotr Jasiukajtis j0 = diff0 >> 31; \
153*5b2ba9d3SPiotr Jasiukajtis if (hx##I < 0x00100000 && hy##I < 0x00100000) /* |X| and |Y| = subnormal or zero */ \
154*5b2ba9d3SPiotr Jasiukajtis { \
155*5b2ba9d3SPiotr Jasiukajtis lx = LO(px); \
156*5b2ba9d3SPiotr Jasiukajtis ly = LO(py); \
157*5b2ba9d3SPiotr Jasiukajtis x = x##I; \
158*5b2ba9d3SPiotr Jasiukajtis y = y##I; \
159*5b2ba9d3SPiotr Jasiukajtis \
160*5b2ba9d3SPiotr Jasiukajtis if ((hx##I | hy##I | lx | ly) == 0) /* |X| and |Y| = 0 */ \
161*5b2ba9d3SPiotr Jasiukajtis RETURN (I, DONE / 0.0) \
162*5b2ba9d3SPiotr Jasiukajtis \
163*5b2ba9d3SPiotr Jasiukajtis x = fabs(x); \
164*5b2ba9d3SPiotr Jasiukajtis y = fabs(y); \
165*5b2ba9d3SPiotr Jasiukajtis \
166*5b2ba9d3SPiotr Jasiukajtis x = *(long long*)&x; \
167*5b2ba9d3SPiotr Jasiukajtis y = *(long long*)&y; \
168*5b2ba9d3SPiotr Jasiukajtis \
169*5b2ba9d3SPiotr Jasiukajtis x *= D2ONM52; \
170*5b2ba9d3SPiotr Jasiukajtis y *= D2ONM52; \
171*5b2ba9d3SPiotr Jasiukajtis \
172*5b2ba9d3SPiotr Jasiukajtis x_hi0 = (x + D2ON36) - D2ON36; \
173*5b2ba9d3SPiotr Jasiukajtis y_hi0 = (y + D2ON36) - D2ON36; \
174*5b2ba9d3SPiotr Jasiukajtis x_lo0 = x - x_hi0; \
175*5b2ba9d3SPiotr Jasiukajtis y_lo0 = y - y_hi0; \
176*5b2ba9d3SPiotr Jasiukajtis res0_hi = (x_hi0 * x_hi0 + y_hi0 * y_hi0); \
177*5b2ba9d3SPiotr Jasiukajtis res0_lo = ((x + x_hi0) * x_lo0 + (y + y_hi0) * y_lo0); \
178*5b2ba9d3SPiotr Jasiukajtis \
179*5b2ba9d3SPiotr Jasiukajtis dres0 = res0_hi + res0_lo; \
180*5b2ba9d3SPiotr Jasiukajtis \
181*5b2ba9d3SPiotr Jasiukajtis iarr0 = HI(&dres0); \
182*5b2ba9d3SPiotr Jasiukajtis iexp0 = iarr0 & 0xfff00000; \
183*5b2ba9d3SPiotr Jasiukajtis \
184*5b2ba9d3SPiotr Jasiukajtis iarr0 = (iarr0 >> 11) & 0x1fc; \
185*5b2ba9d3SPiotr Jasiukajtis itbl0 = ((int*)((char*)__vlibm_TBL_rhypot + iarr0))[0]; \
186*5b2ba9d3SPiotr Jasiukajtis itbl0 -= iexp0; \
187*5b2ba9d3SPiotr Jasiukajtis HI(&dd0) = itbl0; \
188*5b2ba9d3SPiotr Jasiukajtis LO(&dd0) = 0; \
189*5b2ba9d3SPiotr Jasiukajtis \
190*5b2ba9d3SPiotr Jasiukajtis dd0 = dd0 * (DTWO - dd0 * dres0); \
191*5b2ba9d3SPiotr Jasiukajtis dd0 = dd0 * (DTWO - dd0 * dres0); \
192*5b2ba9d3SPiotr Jasiukajtis dres0 = dd0 * (DTWO - dd0 * dres0); \
193*5b2ba9d3SPiotr Jasiukajtis \
194*5b2ba9d3SPiotr Jasiukajtis HI(&res0) = HI(&dres0) & 0xffffff00; \
195*5b2ba9d3SPiotr Jasiukajtis LO(&res0) = 0; \
196*5b2ba9d3SPiotr Jasiukajtis res0 += (DONE - res0_hi * res0 - res0_lo * res0) * dres0; \
197*5b2ba9d3SPiotr Jasiukajtis res0 = sqrt (res0); \
198*5b2ba9d3SPiotr Jasiukajtis \
199*5b2ba9d3SPiotr Jasiukajtis res0 = D2ON1022 * res0; \
200*5b2ba9d3SPiotr Jasiukajtis RETURN (I, res0) \
201*5b2ba9d3SPiotr Jasiukajtis } \
202*5b2ba9d3SPiotr Jasiukajtis j0 = hy##I - (diff0 & j0); \
203*5b2ba9d3SPiotr Jasiukajtis j0 &= 0x7ff00000; \
204*5b2ba9d3SPiotr Jasiukajtis HI(&scl##I) = 0x7ff00000 - j0;
205*5b2ba9d3SPiotr Jasiukajtis
206*5b2ba9d3SPiotr Jasiukajtis void
__vrhypot(int n,double * restrict px,int stridex,double * restrict py,int stridey,double * restrict pz,int stridez)207*5b2ba9d3SPiotr Jasiukajtis __vrhypot(int n, double * restrict px, int stridex, double * restrict py,
208*5b2ba9d3SPiotr Jasiukajtis int stridey, double * restrict pz, int stridez)
209*5b2ba9d3SPiotr Jasiukajtis {
210*5b2ba9d3SPiotr Jasiukajtis int i = 0;
211*5b2ba9d3SPiotr Jasiukajtis double x, y;
212*5b2ba9d3SPiotr Jasiukajtis double x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0;
213*5b2ba9d3SPiotr Jasiukajtis double x0, y0, res0, dd0;
214*5b2ba9d3SPiotr Jasiukajtis double res0_hi,res0_lo, dres0;
215*5b2ba9d3SPiotr Jasiukajtis double x_hi1, x_lo1, y_hi1, y_lo1, scl1 = 0;
216*5b2ba9d3SPiotr Jasiukajtis double x1 = 0.0L, y1 = 0.0L, res1, dd1;
217*5b2ba9d3SPiotr Jasiukajtis double res1_hi,res1_lo, dres1;
218*5b2ba9d3SPiotr Jasiukajtis double x_hi2, x_lo2, y_hi2, y_lo2, scl2 = 0;
219*5b2ba9d3SPiotr Jasiukajtis double x2, y2, res2, dd2;
220*5b2ba9d3SPiotr Jasiukajtis double res2_hi,res2_lo, dres2;
221*5b2ba9d3SPiotr Jasiukajtis
222*5b2ba9d3SPiotr Jasiukajtis int hx0, hy0, j0, diff0;
223*5b2ba9d3SPiotr Jasiukajtis int iarr0, iexp0, itbl0;
224*5b2ba9d3SPiotr Jasiukajtis int hx1, hy1;
225*5b2ba9d3SPiotr Jasiukajtis int iarr1, iexp1, itbl1;
226*5b2ba9d3SPiotr Jasiukajtis int hx2, hy2;
227*5b2ba9d3SPiotr Jasiukajtis int iarr2, iexp2, itbl2;
228*5b2ba9d3SPiotr Jasiukajtis
229*5b2ba9d3SPiotr Jasiukajtis int lx, ly;
230*5b2ba9d3SPiotr Jasiukajtis
231*5b2ba9d3SPiotr Jasiukajtis double DONE = ((double*)LCONST)[0];
232*5b2ba9d3SPiotr Jasiukajtis double DTWO = ((double*)LCONST)[1];
233*5b2ba9d3SPiotr Jasiukajtis double D2ON36 = ((double*)LCONST)[2];
234*5b2ba9d3SPiotr Jasiukajtis double D2ON1022 = ((double*)LCONST)[3];
235*5b2ba9d3SPiotr Jasiukajtis double D2ONM52 = ((double*)LCONST)[4];
236*5b2ba9d3SPiotr Jasiukajtis
237*5b2ba9d3SPiotr Jasiukajtis double *pz0, *pz1 = 0, *pz2;
238*5b2ba9d3SPiotr Jasiukajtis
239*5b2ba9d3SPiotr Jasiukajtis do
240*5b2ba9d3SPiotr Jasiukajtis {
241*5b2ba9d3SPiotr Jasiukajtis start0:
242*5b2ba9d3SPiotr Jasiukajtis PREP(0)
243*5b2ba9d3SPiotr Jasiukajtis px += stridex;
244*5b2ba9d3SPiotr Jasiukajtis py += stridey;
245*5b2ba9d3SPiotr Jasiukajtis pz += stridez;
246*5b2ba9d3SPiotr Jasiukajtis i = 1;
247*5b2ba9d3SPiotr Jasiukajtis if (--n <= 0)
248*5b2ba9d3SPiotr Jasiukajtis break;
249*5b2ba9d3SPiotr Jasiukajtis
250*5b2ba9d3SPiotr Jasiukajtis start1:
251*5b2ba9d3SPiotr Jasiukajtis PREP(1)
252*5b2ba9d3SPiotr Jasiukajtis px += stridex;
253*5b2ba9d3SPiotr Jasiukajtis py += stridey;
254*5b2ba9d3SPiotr Jasiukajtis pz += stridez;
255*5b2ba9d3SPiotr Jasiukajtis i = 2;
256*5b2ba9d3SPiotr Jasiukajtis if (--n <= 0)
257*5b2ba9d3SPiotr Jasiukajtis break;
258*5b2ba9d3SPiotr Jasiukajtis
259*5b2ba9d3SPiotr Jasiukajtis start2:
260*5b2ba9d3SPiotr Jasiukajtis PREP(2)
261*5b2ba9d3SPiotr Jasiukajtis
262*5b2ba9d3SPiotr Jasiukajtis x0 *= scl0;
263*5b2ba9d3SPiotr Jasiukajtis y0 *= scl0;
264*5b2ba9d3SPiotr Jasiukajtis x1 *= scl1;
265*5b2ba9d3SPiotr Jasiukajtis y1 *= scl1;
266*5b2ba9d3SPiotr Jasiukajtis x2 *= scl2;
267*5b2ba9d3SPiotr Jasiukajtis y2 *= scl2;
268*5b2ba9d3SPiotr Jasiukajtis
269*5b2ba9d3SPiotr Jasiukajtis x_hi0 = (x0 + D2ON36) - D2ON36;
270*5b2ba9d3SPiotr Jasiukajtis y_hi0 = (y0 + D2ON36) - D2ON36;
271*5b2ba9d3SPiotr Jasiukajtis x_hi1 = (x1 + D2ON36) - D2ON36;
272*5b2ba9d3SPiotr Jasiukajtis y_hi1 = (y1 + D2ON36) - D2ON36;
273*5b2ba9d3SPiotr Jasiukajtis x_hi2 = (x2 + D2ON36) - D2ON36;
274*5b2ba9d3SPiotr Jasiukajtis y_hi2 = (y2 + D2ON36) - D2ON36;
275*5b2ba9d3SPiotr Jasiukajtis x_lo0 = x0 - x_hi0;
276*5b2ba9d3SPiotr Jasiukajtis y_lo0 = y0 - y_hi0;
277*5b2ba9d3SPiotr Jasiukajtis x_lo1 = x1 - x_hi1;
278*5b2ba9d3SPiotr Jasiukajtis y_lo1 = y1 - y_hi1;
279*5b2ba9d3SPiotr Jasiukajtis x_lo2 = x2 - x_hi2;
280*5b2ba9d3SPiotr Jasiukajtis y_lo2 = y2 - y_hi2;
281*5b2ba9d3SPiotr Jasiukajtis res0_hi = (x_hi0 * x_hi0 + y_hi0 * y_hi0);
282*5b2ba9d3SPiotr Jasiukajtis res1_hi = (x_hi1 * x_hi1 + y_hi1 * y_hi1);
283*5b2ba9d3SPiotr Jasiukajtis res2_hi = (x_hi2 * x_hi2 + y_hi2 * y_hi2);
284*5b2ba9d3SPiotr Jasiukajtis res0_lo = ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0);
285*5b2ba9d3SPiotr Jasiukajtis res1_lo = ((x1 + x_hi1) * x_lo1 + (y1 + y_hi1) * y_lo1);
286*5b2ba9d3SPiotr Jasiukajtis res2_lo = ((x2 + x_hi2) * x_lo2 + (y2 + y_hi2) * y_lo2);
287*5b2ba9d3SPiotr Jasiukajtis
288*5b2ba9d3SPiotr Jasiukajtis dres0 = res0_hi + res0_lo;
289*5b2ba9d3SPiotr Jasiukajtis dres1 = res1_hi + res1_lo;
290*5b2ba9d3SPiotr Jasiukajtis dres2 = res2_hi + res2_lo;
291*5b2ba9d3SPiotr Jasiukajtis
292*5b2ba9d3SPiotr Jasiukajtis iarr0 = HI(&dres0);
293*5b2ba9d3SPiotr Jasiukajtis iarr1 = HI(&dres1);
294*5b2ba9d3SPiotr Jasiukajtis iarr2 = HI(&dres2);
295*5b2ba9d3SPiotr Jasiukajtis iexp0 = iarr0 & 0xfff00000;
296*5b2ba9d3SPiotr Jasiukajtis iexp1 = iarr1 & 0xfff00000;
297*5b2ba9d3SPiotr Jasiukajtis iexp2 = iarr2 & 0xfff00000;
298*5b2ba9d3SPiotr Jasiukajtis
299*5b2ba9d3SPiotr Jasiukajtis iarr0 = (iarr0 >> 11) & 0x1fc;
300*5b2ba9d3SPiotr Jasiukajtis iarr1 = (iarr1 >> 11) & 0x1fc;
301*5b2ba9d3SPiotr Jasiukajtis iarr2 = (iarr2 >> 11) & 0x1fc;
302*5b2ba9d3SPiotr Jasiukajtis itbl0 = ((int*)((char*)__vlibm_TBL_rhypot + iarr0))[0];
303*5b2ba9d3SPiotr Jasiukajtis itbl1 = ((int*)((char*)__vlibm_TBL_rhypot + iarr1))[0];
304*5b2ba9d3SPiotr Jasiukajtis itbl2 = ((int*)((char*)__vlibm_TBL_rhypot + iarr2))[0];
305*5b2ba9d3SPiotr Jasiukajtis itbl0 -= iexp0;
306*5b2ba9d3SPiotr Jasiukajtis itbl1 -= iexp1;
307*5b2ba9d3SPiotr Jasiukajtis itbl2 -= iexp2;
308*5b2ba9d3SPiotr Jasiukajtis HI(&dd0) = itbl0;
309*5b2ba9d3SPiotr Jasiukajtis HI(&dd1) = itbl1;
310*5b2ba9d3SPiotr Jasiukajtis HI(&dd2) = itbl2;
311*5b2ba9d3SPiotr Jasiukajtis LO(&dd0) = 0;
312*5b2ba9d3SPiotr Jasiukajtis LO(&dd1) = 0;
313*5b2ba9d3SPiotr Jasiukajtis LO(&dd2) = 0;
314*5b2ba9d3SPiotr Jasiukajtis
315*5b2ba9d3SPiotr Jasiukajtis dd0 = dd0 * (DTWO - dd0 * dres0);
316*5b2ba9d3SPiotr Jasiukajtis dd1 = dd1 * (DTWO - dd1 * dres1);
317*5b2ba9d3SPiotr Jasiukajtis dd2 = dd2 * (DTWO - dd2 * dres2);
318*5b2ba9d3SPiotr Jasiukajtis dd0 = dd0 * (DTWO - dd0 * dres0);
319*5b2ba9d3SPiotr Jasiukajtis dd1 = dd1 * (DTWO - dd1 * dres1);
320*5b2ba9d3SPiotr Jasiukajtis dd2 = dd2 * (DTWO - dd2 * dres2);
321*5b2ba9d3SPiotr Jasiukajtis dres0 = dd0 * (DTWO - dd0 * dres0);
322*5b2ba9d3SPiotr Jasiukajtis dres1 = dd1 * (DTWO - dd1 * dres1);
323*5b2ba9d3SPiotr Jasiukajtis dres2 = dd2 * (DTWO - dd2 * dres2);
324*5b2ba9d3SPiotr Jasiukajtis
325*5b2ba9d3SPiotr Jasiukajtis HI(&res0) = HI(&dres0) & 0xffffff00;
326*5b2ba9d3SPiotr Jasiukajtis HI(&res1) = HI(&dres1) & 0xffffff00;
327*5b2ba9d3SPiotr Jasiukajtis HI(&res2) = HI(&dres2) & 0xffffff00;
328*5b2ba9d3SPiotr Jasiukajtis LO(&res0) = 0;
329*5b2ba9d3SPiotr Jasiukajtis LO(&res1) = 0;
330*5b2ba9d3SPiotr Jasiukajtis LO(&res2) = 0;
331*5b2ba9d3SPiotr Jasiukajtis res0 += (DONE - res0_hi * res0 - res0_lo * res0) * dres0;
332*5b2ba9d3SPiotr Jasiukajtis res1 += (DONE - res1_hi * res1 - res1_lo * res1) * dres1;
333*5b2ba9d3SPiotr Jasiukajtis res2 += (DONE - res2_hi * res2 - res2_lo * res2) * dres2;
334*5b2ba9d3SPiotr Jasiukajtis res0 = sqrt (res0);
335*5b2ba9d3SPiotr Jasiukajtis res1 = sqrt (res1);
336*5b2ba9d3SPiotr Jasiukajtis res2 = sqrt (res2);
337*5b2ba9d3SPiotr Jasiukajtis
338*5b2ba9d3SPiotr Jasiukajtis res0 = scl0 * res0;
339*5b2ba9d3SPiotr Jasiukajtis res1 = scl1 * res1;
340*5b2ba9d3SPiotr Jasiukajtis res2 = scl2 * res2;
341*5b2ba9d3SPiotr Jasiukajtis
342*5b2ba9d3SPiotr Jasiukajtis *pz0 = res0;
343*5b2ba9d3SPiotr Jasiukajtis *pz1 = res1;
344*5b2ba9d3SPiotr Jasiukajtis *pz2 = res2;
345*5b2ba9d3SPiotr Jasiukajtis
346*5b2ba9d3SPiotr Jasiukajtis px += stridex;
347*5b2ba9d3SPiotr Jasiukajtis py += stridey;
348*5b2ba9d3SPiotr Jasiukajtis pz += stridez;
349*5b2ba9d3SPiotr Jasiukajtis i = 0;
350*5b2ba9d3SPiotr Jasiukajtis
351*5b2ba9d3SPiotr Jasiukajtis } while (--n > 0);
352*5b2ba9d3SPiotr Jasiukajtis
353*5b2ba9d3SPiotr Jasiukajtis if (i > 0)
354*5b2ba9d3SPiotr Jasiukajtis {
355*5b2ba9d3SPiotr Jasiukajtis x0 *= scl0;
356*5b2ba9d3SPiotr Jasiukajtis y0 *= scl0;
357*5b2ba9d3SPiotr Jasiukajtis
358*5b2ba9d3SPiotr Jasiukajtis x_hi0 = (x0 + D2ON36) - D2ON36;
359*5b2ba9d3SPiotr Jasiukajtis y_hi0 = (y0 + D2ON36) - D2ON36;
360*5b2ba9d3SPiotr Jasiukajtis x_lo0 = x0 - x_hi0;
361*5b2ba9d3SPiotr Jasiukajtis y_lo0 = y0 - y_hi0;
362*5b2ba9d3SPiotr Jasiukajtis res0_hi = (x_hi0 * x_hi0 + y_hi0 * y_hi0);
363*5b2ba9d3SPiotr Jasiukajtis res0_lo = ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0);
364*5b2ba9d3SPiotr Jasiukajtis
365*5b2ba9d3SPiotr Jasiukajtis dres0 = res0_hi + res0_lo;
366*5b2ba9d3SPiotr Jasiukajtis
367*5b2ba9d3SPiotr Jasiukajtis iarr0 = HI(&dres0);
368*5b2ba9d3SPiotr Jasiukajtis iexp0 = iarr0 & 0xfff00000;
369*5b2ba9d3SPiotr Jasiukajtis
370*5b2ba9d3SPiotr Jasiukajtis iarr0 = (iarr0 >> 11) & 0x1fc;
371*5b2ba9d3SPiotr Jasiukajtis itbl0 = ((int*)((char*)__vlibm_TBL_rhypot + iarr0))[0];
372*5b2ba9d3SPiotr Jasiukajtis itbl0 -= iexp0;
373*5b2ba9d3SPiotr Jasiukajtis HI(&dd0) = itbl0;
374*5b2ba9d3SPiotr Jasiukajtis LO(&dd0) = 0;
375*5b2ba9d3SPiotr Jasiukajtis
376*5b2ba9d3SPiotr Jasiukajtis dd0 = dd0 * (DTWO - dd0 * dres0);
377*5b2ba9d3SPiotr Jasiukajtis dd0 = dd0 * (DTWO - dd0 * dres0);
378*5b2ba9d3SPiotr Jasiukajtis dres0 = dd0 * (DTWO - dd0 * dres0);
379*5b2ba9d3SPiotr Jasiukajtis
380*5b2ba9d3SPiotr Jasiukajtis HI(&res0) = HI(&dres0) & 0xffffff00;
381*5b2ba9d3SPiotr Jasiukajtis LO(&res0) = 0;
382*5b2ba9d3SPiotr Jasiukajtis res0 += (DONE - res0_hi * res0 - res0_lo * res0) * dres0;
383*5b2ba9d3SPiotr Jasiukajtis res0 = sqrt (res0);
384*5b2ba9d3SPiotr Jasiukajtis
385*5b2ba9d3SPiotr Jasiukajtis res0 = scl0 * res0;
386*5b2ba9d3SPiotr Jasiukajtis
387*5b2ba9d3SPiotr Jasiukajtis *pz0 = res0;
388*5b2ba9d3SPiotr Jasiukajtis
389*5b2ba9d3SPiotr Jasiukajtis if (i > 1)
390*5b2ba9d3SPiotr Jasiukajtis {
391*5b2ba9d3SPiotr Jasiukajtis x1 *= scl1;
392*5b2ba9d3SPiotr Jasiukajtis y1 *= scl1;
393*5b2ba9d3SPiotr Jasiukajtis
394*5b2ba9d3SPiotr Jasiukajtis x_hi1 = (x1 + D2ON36) - D2ON36;
395*5b2ba9d3SPiotr Jasiukajtis y_hi1 = (y1 + D2ON36) - D2ON36;
396*5b2ba9d3SPiotr Jasiukajtis x_lo1 = x1 - x_hi1;
397*5b2ba9d3SPiotr Jasiukajtis y_lo1 = y1 - y_hi1;
398*5b2ba9d3SPiotr Jasiukajtis res1_hi = (x_hi1 * x_hi1 + y_hi1 * y_hi1);
399*5b2ba9d3SPiotr Jasiukajtis res1_lo = ((x1 + x_hi1) * x_lo1 + (y1 + y_hi1) * y_lo1);
400*5b2ba9d3SPiotr Jasiukajtis
401*5b2ba9d3SPiotr Jasiukajtis dres1 = res1_hi + res1_lo;
402*5b2ba9d3SPiotr Jasiukajtis
403*5b2ba9d3SPiotr Jasiukajtis iarr1 = HI(&dres1);
404*5b2ba9d3SPiotr Jasiukajtis iexp1 = iarr1 & 0xfff00000;
405*5b2ba9d3SPiotr Jasiukajtis
406*5b2ba9d3SPiotr Jasiukajtis iarr1 = (iarr1 >> 11) & 0x1fc;
407*5b2ba9d3SPiotr Jasiukajtis itbl1 = ((int*)((char*)__vlibm_TBL_rhypot + iarr1))[0];
408*5b2ba9d3SPiotr Jasiukajtis itbl1 -= iexp1;
409*5b2ba9d3SPiotr Jasiukajtis HI(&dd1) = itbl1;
410*5b2ba9d3SPiotr Jasiukajtis LO(&dd1) = 0;
411*5b2ba9d3SPiotr Jasiukajtis
412*5b2ba9d3SPiotr Jasiukajtis dd1 = dd1 * (DTWO - dd1 * dres1);
413*5b2ba9d3SPiotr Jasiukajtis dd1 = dd1 * (DTWO - dd1 * dres1);
414*5b2ba9d3SPiotr Jasiukajtis dres1 = dd1 * (DTWO - dd1 * dres1);
415*5b2ba9d3SPiotr Jasiukajtis
416*5b2ba9d3SPiotr Jasiukajtis HI(&res1) = HI(&dres1) & 0xffffff00;
417*5b2ba9d3SPiotr Jasiukajtis LO(&res1) = 0;
418*5b2ba9d3SPiotr Jasiukajtis res1 += (DONE - res1_hi * res1 - res1_lo * res1) * dres1;
419*5b2ba9d3SPiotr Jasiukajtis res1 = sqrt (res1);
420*5b2ba9d3SPiotr Jasiukajtis
421*5b2ba9d3SPiotr Jasiukajtis res1 = scl1 * res1;
422*5b2ba9d3SPiotr Jasiukajtis
423*5b2ba9d3SPiotr Jasiukajtis *pz1 = res1;
424*5b2ba9d3SPiotr Jasiukajtis }
425*5b2ba9d3SPiotr Jasiukajtis }
426*5b2ba9d3SPiotr Jasiukajtis }
427