xref: /titanic_51/usr/src/lib/libmvec/common/__vrhypotf.c (revision ddc0e0b53c661f6e439e3b7072b3ef353eadb4af)
1*25c28e83SPiotr Jasiukajtis /*
2*25c28e83SPiotr Jasiukajtis  * CDDL HEADER START
3*25c28e83SPiotr Jasiukajtis  *
4*25c28e83SPiotr Jasiukajtis  * The contents of this file are subject to the terms of the
5*25c28e83SPiotr Jasiukajtis  * Common Development and Distribution License (the "License").
6*25c28e83SPiotr Jasiukajtis  * You may not use this file except in compliance with the License.
7*25c28e83SPiotr Jasiukajtis  *
8*25c28e83SPiotr Jasiukajtis  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*25c28e83SPiotr Jasiukajtis  * or http://www.opensolaris.org/os/licensing.
10*25c28e83SPiotr Jasiukajtis  * See the License for the specific language governing permissions
11*25c28e83SPiotr Jasiukajtis  * and limitations under the License.
12*25c28e83SPiotr Jasiukajtis  *
13*25c28e83SPiotr Jasiukajtis  * When distributing Covered Code, include this CDDL HEADER in each
14*25c28e83SPiotr Jasiukajtis  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*25c28e83SPiotr Jasiukajtis  * If applicable, add the following below this CDDL HEADER, with the
16*25c28e83SPiotr Jasiukajtis  * fields enclosed by brackets "[]" replaced with your own identifying
17*25c28e83SPiotr Jasiukajtis  * information: Portions Copyright [yyyy] [name of copyright owner]
18*25c28e83SPiotr Jasiukajtis  *
19*25c28e83SPiotr Jasiukajtis  * CDDL HEADER END
20*25c28e83SPiotr Jasiukajtis  */
21*25c28e83SPiotr Jasiukajtis 
22*25c28e83SPiotr Jasiukajtis /*
23*25c28e83SPiotr Jasiukajtis  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24*25c28e83SPiotr Jasiukajtis  */
25*25c28e83SPiotr Jasiukajtis /*
26*25c28e83SPiotr Jasiukajtis  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27*25c28e83SPiotr Jasiukajtis  * Use is subject to license terms.
28*25c28e83SPiotr Jasiukajtis  */
29*25c28e83SPiotr Jasiukajtis 
30*25c28e83SPiotr Jasiukajtis #include <sys/isa_defs.h>
31*25c28e83SPiotr Jasiukajtis #include "libm_inlines.h"
32*25c28e83SPiotr Jasiukajtis 
33*25c28e83SPiotr Jasiukajtis #ifdef _LITTLE_ENDIAN
34*25c28e83SPiotr Jasiukajtis #define HI(x)	*(1+(int*)x)
35*25c28e83SPiotr Jasiukajtis #define LO(x)	*(unsigned*)x
36*25c28e83SPiotr Jasiukajtis #else
37*25c28e83SPiotr Jasiukajtis #define HI(x)	*(int*)x
38*25c28e83SPiotr Jasiukajtis #define LO(x)	*(1+(unsigned*)x)
39*25c28e83SPiotr Jasiukajtis #endif
40*25c28e83SPiotr Jasiukajtis 
41*25c28e83SPiotr Jasiukajtis #ifdef __RESTRICT
42*25c28e83SPiotr Jasiukajtis #define restrict _Restrict
43*25c28e83SPiotr Jasiukajtis #else
44*25c28e83SPiotr Jasiukajtis #define restrict
45*25c28e83SPiotr Jasiukajtis #endif
46*25c28e83SPiotr Jasiukajtis 
47*25c28e83SPiotr Jasiukajtis /* float rhypotf(float x, float y)
48*25c28e83SPiotr Jasiukajtis  *
49*25c28e83SPiotr Jasiukajtis  * Method :
50*25c28e83SPiotr Jasiukajtis  *	1. Special cases:
51*25c28e83SPiotr Jasiukajtis  *		for x or y = Inf			=> 0;
52*25c28e83SPiotr Jasiukajtis  *		for x or y = NaN			=> QNaN;
53*25c28e83SPiotr Jasiukajtis  *		for x and y = 0				=> +Inf + divide-by-zero;
54*25c28e83SPiotr Jasiukajtis  *	2. Computes d = x * x + y * y;
55*25c28e83SPiotr Jasiukajtis  *	3. Computes reciprocal square root from:
56*25c28e83SPiotr Jasiukajtis  *		d = m * 2**n
57*25c28e83SPiotr Jasiukajtis  *	Where:
58*25c28e83SPiotr Jasiukajtis  *		m = [0.5, 2),
59*25c28e83SPiotr Jasiukajtis  *		n = ((exponent + 1) & ~1).
60*25c28e83SPiotr Jasiukajtis  *	Then:
61*25c28e83SPiotr Jasiukajtis  *		rsqrtf(d) = 1/sqrt( m * 2**n ) = (2 ** (-n/2)) * (1/sqrt(m))
62*25c28e83SPiotr Jasiukajtis  *	4. Computes 1/sqrt(m) from:
63*25c28e83SPiotr Jasiukajtis  *		1/sqrt(m) = (1/sqrt(m0)) * (1/sqrt(1 + (1/m0)*dm))
64*25c28e83SPiotr Jasiukajtis  *	Where:
65*25c28e83SPiotr Jasiukajtis  *		m = m0 + dm,
66*25c28e83SPiotr Jasiukajtis  *		m0 = 0.5 * (1 + k/64) for m = [0.5,         0.5+127/256), k = [0, 63];
67*25c28e83SPiotr Jasiukajtis  *		m0 = 1.0 * (0 + k/64) for m = [0.5+127/256, 1.0+127/128), k = [64, 127];
68*25c28e83SPiotr Jasiukajtis  *	Then:
69*25c28e83SPiotr Jasiukajtis  *		1/sqrt(m0), 1/m0 are looked up in a table,
70*25c28e83SPiotr Jasiukajtis  *		1/sqrt(1 + (1/m0)*dm) is computed using approximation:
71*25c28e83SPiotr Jasiukajtis  *			1/sqrt(1 + z) = ((a3 * z + a2) * z + a1) * z + a0
72*25c28e83SPiotr Jasiukajtis  *			where z = [-1/64, 1/64].
73*25c28e83SPiotr Jasiukajtis  *
74*25c28e83SPiotr Jasiukajtis  * Accuracy:
75*25c28e83SPiotr Jasiukajtis  *	The maximum relative error for the approximating
76*25c28e83SPiotr Jasiukajtis  *	polynomial is 2**(-27.87).
77*25c28e83SPiotr Jasiukajtis  *	Maximum error observed: less than 0.535 ulp after 3.000.000.000
78*25c28e83SPiotr Jasiukajtis  *	results.
79*25c28e83SPiotr Jasiukajtis  */
80*25c28e83SPiotr Jasiukajtis 
81*25c28e83SPiotr Jasiukajtis #pragma align 32 (__vlibm_TBL_rhypotf)
82*25c28e83SPiotr Jasiukajtis 
83*25c28e83SPiotr Jasiukajtis static const double __vlibm_TBL_rhypotf[] = {
84*25c28e83SPiotr Jasiukajtis /*
85*25c28e83SPiotr Jasiukajtis  i = [0,63]
86*25c28e83SPiotr Jasiukajtis  TBL[2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46)));
87*25c28e83SPiotr Jasiukajtis  TBL[2*i+1] = (double)(0.5/sqrtl(2) / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46))));
88*25c28e83SPiotr Jasiukajtis  TBL[128+2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46)));
89*25c28e83SPiotr Jasiukajtis  TBL[128+2*i+1] = (double)(0.25 / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46))));
90*25c28e83SPiotr Jasiukajtis */
91*25c28e83SPiotr Jasiukajtis  1.0000000000000000000e+00, 3.5355339059327378637e-01,
92*25c28e83SPiotr Jasiukajtis  9.8461538461538467004e-01, 3.5082320772281166965e-01,
93*25c28e83SPiotr Jasiukajtis  9.6969696969696972388e-01, 3.4815531191139570399e-01,
94*25c28e83SPiotr Jasiukajtis  9.5522388059701490715e-01, 3.4554737023254405992e-01,
95*25c28e83SPiotr Jasiukajtis  9.4117647058823528106e-01, 3.4299717028501769400e-01,
96*25c28e83SPiotr Jasiukajtis  9.2753623188405798228e-01, 3.4050261230349943009e-01,
97*25c28e83SPiotr Jasiukajtis  9.1428571428571425717e-01, 3.3806170189140660742e-01,
98*25c28e83SPiotr Jasiukajtis  9.0140845070422537244e-01, 3.3567254331867563133e-01,
99*25c28e83SPiotr Jasiukajtis  8.8888888888888883955e-01, 3.3333333333333331483e-01,
100*25c28e83SPiotr Jasiukajtis  8.7671232876712323900e-01, 3.3104235544094717802e-01,
101*25c28e83SPiotr Jasiukajtis  8.6486486486486491287e-01, 3.2879797461071458287e-01,
102*25c28e83SPiotr Jasiukajtis  8.5333333333333338810e-01, 3.2659863237109043599e-01,
103*25c28e83SPiotr Jasiukajtis  8.4210526315789469010e-01, 3.2444284226152508843e-01,
104*25c28e83SPiotr Jasiukajtis  8.3116883116883122362e-01, 3.2232918561015211356e-01,
105*25c28e83SPiotr Jasiukajtis  8.2051282051282048435e-01, 3.2025630761017426229e-01,
106*25c28e83SPiotr Jasiukajtis  8.1012658227848100001e-01, 3.1822291367029204023e-01,
107*25c28e83SPiotr Jasiukajtis  8.0000000000000004441e-01, 3.1622776601683794118e-01,
108*25c28e83SPiotr Jasiukajtis  7.9012345679012341293e-01, 3.1426968052735443360e-01,
109*25c28e83SPiotr Jasiukajtis  7.8048780487804880757e-01, 3.1234752377721214378e-01,
110*25c28e83SPiotr Jasiukajtis  7.7108433734939763049e-01, 3.1046021028253312224e-01,
111*25c28e83SPiotr Jasiukajtis  7.6190476190476186247e-01, 3.0860669992418382490e-01,
112*25c28e83SPiotr Jasiukajtis  7.5294117647058822484e-01, 3.0678599553894819740e-01,
113*25c28e83SPiotr Jasiukajtis  7.4418604651162789665e-01, 3.0499714066520933198e-01,
114*25c28e83SPiotr Jasiukajtis  7.3563218390804596680e-01, 3.0323921743156134756e-01,
115*25c28e83SPiotr Jasiukajtis  7.2727272727272729291e-01, 3.0151134457776362918e-01,
116*25c28e83SPiotr Jasiukajtis  7.1910112359550559802e-01, 2.9981267559834456904e-01,
117*25c28e83SPiotr Jasiukajtis  7.1111111111111113825e-01, 2.9814239699997197031e-01,
118*25c28e83SPiotr Jasiukajtis  7.0329670329670335160e-01, 2.9649972666444046610e-01,
119*25c28e83SPiotr Jasiukajtis  6.9565217391304345895e-01, 2.9488391230979427160e-01,
120*25c28e83SPiotr Jasiukajtis  6.8817204301075274309e-01, 2.9329423004270660513e-01,
121*25c28e83SPiotr Jasiukajtis  6.8085106382978721751e-01, 2.9172998299578911663e-01,
122*25c28e83SPiotr Jasiukajtis  6.7368421052631577428e-01, 2.9019050004400465115e-01,
123*25c28e83SPiotr Jasiukajtis  6.6666666666666662966e-01, 2.8867513459481286553e-01,
124*25c28e83SPiotr Jasiukajtis  6.5979381443298967813e-01, 2.8718326344709527165e-01,
125*25c28e83SPiotr Jasiukajtis  6.5306122448979586625e-01, 2.8571428571428569843e-01,
126*25c28e83SPiotr Jasiukajtis  6.4646464646464651960e-01, 2.8426762180748055275e-01,
127*25c28e83SPiotr Jasiukajtis  6.4000000000000001332e-01, 2.8284271247461900689e-01,
128*25c28e83SPiotr Jasiukajtis  6.3366336633663367106e-01, 2.8143901789211672737e-01,
129*25c28e83SPiotr Jasiukajtis  6.2745098039215685404e-01, 2.8005601680560193723e-01,
130*25c28e83SPiotr Jasiukajtis  6.2135922330097081989e-01, 2.7869320571664707442e-01,
131*25c28e83SPiotr Jasiukajtis  6.1538461538461541878e-01, 2.7735009811261457369e-01,
132*25c28e83SPiotr Jasiukajtis  6.0952380952380957879e-01, 2.7602622373694168934e-01,
133*25c28e83SPiotr Jasiukajtis  6.0377358490566035432e-01, 2.7472112789737807015e-01,
134*25c28e83SPiotr Jasiukajtis  5.9813084112149528249e-01, 2.7343437080986532361e-01,
135*25c28e83SPiotr Jasiukajtis  5.9259259259259255970e-01, 2.7216552697590867815e-01,
136*25c28e83SPiotr Jasiukajtis  5.8715596330275232617e-01, 2.7091418459143856712e-01,
137*25c28e83SPiotr Jasiukajtis  5.8181818181818178992e-01, 2.6967994498529684888e-01,
138*25c28e83SPiotr Jasiukajtis  5.7657657657657657158e-01, 2.6846242208560971987e-01,
139*25c28e83SPiotr Jasiukajtis  5.7142857142857139685e-01, 2.6726124191242439654e-01,
140*25c28e83SPiotr Jasiukajtis  5.6637168141592919568e-01, 2.6607604209509572168e-01,
141*25c28e83SPiotr Jasiukajtis  5.6140350877192979340e-01, 2.6490647141300877054e-01,
142*25c28e83SPiotr Jasiukajtis  5.5652173913043478937e-01, 2.6375218935831479250e-01,
143*25c28e83SPiotr Jasiukajtis  5.5172413793103447510e-01, 2.6261286571944508772e-01,
144*25c28e83SPiotr Jasiukajtis  5.4700854700854706358e-01, 2.6148818018424535570e-01,
145*25c28e83SPiotr Jasiukajtis  5.4237288135593220151e-01, 2.6037782196164771520e-01,
146*25c28e83SPiotr Jasiukajtis  5.3781512605042014474e-01, 2.5928148942086576278e-01,
147*25c28e83SPiotr Jasiukajtis  5.3333333333333332593e-01, 2.5819888974716115326e-01,
148*25c28e83SPiotr Jasiukajtis  5.2892561983471075848e-01, 2.5712973861329002645e-01,
149*25c28e83SPiotr Jasiukajtis  5.2459016393442625681e-01, 2.5607375986579195004e-01,
150*25c28e83SPiotr Jasiukajtis  5.2032520325203257539e-01, 2.5503068522533534068e-01,
151*25c28e83SPiotr Jasiukajtis  5.1612903225806450180e-01, 2.5400025400038100942e-01,
152*25c28e83SPiotr Jasiukajtis  5.1200000000000001066e-01, 2.5298221281347033074e-01,
153*25c28e83SPiotr Jasiukajtis  5.0793650793650790831e-01, 2.5197631533948483540e-01,
154*25c28e83SPiotr Jasiukajtis  5.0393700787401574104e-01, 2.5098232205526344041e-01,
155*25c28e83SPiotr Jasiukajtis  1.0000000000000000000e+00, 2.5000000000000000000e-01,
156*25c28e83SPiotr Jasiukajtis  9.8461538461538467004e-01, 2.4806946917841690703e-01,
157*25c28e83SPiotr Jasiukajtis  9.6969696969696972388e-01, 2.4618298195866547551e-01,
158*25c28e83SPiotr Jasiukajtis  9.5522388059701490715e-01, 2.4433888871261044695e-01,
159*25c28e83SPiotr Jasiukajtis  9.4117647058823528106e-01, 2.4253562503633296910e-01,
160*25c28e83SPiotr Jasiukajtis  9.2753623188405798228e-01, 2.4077170617153839660e-01,
161*25c28e83SPiotr Jasiukajtis  9.1428571428571425717e-01, 2.3904572186687872426e-01,
162*25c28e83SPiotr Jasiukajtis  9.0140845070422537244e-01, 2.3735633163877067897e-01,
163*25c28e83SPiotr Jasiukajtis  8.8888888888888883955e-01, 2.3570226039551583908e-01,
164*25c28e83SPiotr Jasiukajtis  8.7671232876712323900e-01, 2.3408229439226113655e-01,
165*25c28e83SPiotr Jasiukajtis  8.6486486486486491287e-01, 2.3249527748763856860e-01,
166*25c28e83SPiotr Jasiukajtis  8.5333333333333338810e-01, 2.3094010767585029797e-01,
167*25c28e83SPiotr Jasiukajtis  8.4210526315789469010e-01, 2.2941573387056177213e-01,
168*25c28e83SPiotr Jasiukajtis  8.3116883116883122362e-01, 2.2792115291927589338e-01,
169*25c28e83SPiotr Jasiukajtis  8.2051282051282048435e-01, 2.2645540682891915352e-01,
170*25c28e83SPiotr Jasiukajtis  8.1012658227848100001e-01, 2.2501758018520479077e-01,
171*25c28e83SPiotr Jasiukajtis  8.0000000000000004441e-01, 2.2360679774997896385e-01,
172*25c28e83SPiotr Jasiukajtis  7.9012345679012341293e-01, 2.2222222222222220989e-01,
173*25c28e83SPiotr Jasiukajtis  7.8048780487804880757e-01, 2.2086305214969309541e-01,
174*25c28e83SPiotr Jasiukajtis  7.7108433734939763049e-01, 2.1952851997938069295e-01,
175*25c28e83SPiotr Jasiukajtis  7.6190476190476186247e-01, 2.1821789023599238999e-01,
176*25c28e83SPiotr Jasiukajtis  7.5294117647058822484e-01, 2.1693045781865616384e-01,
177*25c28e83SPiotr Jasiukajtis  7.4418604651162789665e-01, 2.1566554640687682354e-01,
178*25c28e83SPiotr Jasiukajtis  7.3563218390804596680e-01, 2.1442250696755896233e-01,
179*25c28e83SPiotr Jasiukajtis  7.2727272727272729291e-01, 2.1320071635561044232e-01,
180*25c28e83SPiotr Jasiukajtis  7.1910112359550559802e-01, 2.1199957600127200541e-01,
181*25c28e83SPiotr Jasiukajtis  7.1111111111111113825e-01, 2.1081851067789195153e-01,
182*25c28e83SPiotr Jasiukajtis  7.0329670329670335160e-01, 2.0965696734438366011e-01,
183*25c28e83SPiotr Jasiukajtis  6.9565217391304345895e-01, 2.0851441405707477061e-01,
184*25c28e83SPiotr Jasiukajtis  6.8817204301075274309e-01, 2.0739033894608505104e-01,
185*25c28e83SPiotr Jasiukajtis  6.8085106382978721751e-01, 2.0628424925175867233e-01,
186*25c28e83SPiotr Jasiukajtis  6.7368421052631577428e-01, 2.0519567041703082322e-01,
187*25c28e83SPiotr Jasiukajtis  6.6666666666666662966e-01, 2.0412414523193150862e-01,
188*25c28e83SPiotr Jasiukajtis  6.5979381443298967813e-01, 2.0306923302672380549e-01,
189*25c28e83SPiotr Jasiukajtis  6.5306122448979586625e-01, 2.0203050891044216364e-01,
190*25c28e83SPiotr Jasiukajtis  6.4646464646464651960e-01, 2.0100756305184241945e-01,
191*25c28e83SPiotr Jasiukajtis  6.4000000000000001332e-01, 2.0000000000000001110e-01,
192*25c28e83SPiotr Jasiukajtis  6.3366336633663367106e-01, 1.9900743804199783060e-01,
193*25c28e83SPiotr Jasiukajtis  6.2745098039215685404e-01, 1.9802950859533485772e-01,
194*25c28e83SPiotr Jasiukajtis  6.2135922330097081989e-01, 1.9706585563285863860e-01,
195*25c28e83SPiotr Jasiukajtis  6.1538461538461541878e-01, 1.9611613513818404453e-01,
196*25c28e83SPiotr Jasiukajtis  6.0952380952380957879e-01, 1.9518001458970662965e-01,
197*25c28e83SPiotr Jasiukajtis  6.0377358490566035432e-01, 1.9425717247145282696e-01,
198*25c28e83SPiotr Jasiukajtis  5.9813084112149528249e-01, 1.9334729780913270658e-01,
199*25c28e83SPiotr Jasiukajtis  5.9259259259259255970e-01, 1.9245008972987526219e-01,
200*25c28e83SPiotr Jasiukajtis  5.8715596330275232617e-01, 1.9156525704423027490e-01,
201*25c28e83SPiotr Jasiukajtis  5.8181818181818178992e-01, 1.9069251784911847580e-01,
202*25c28e83SPiotr Jasiukajtis  5.7657657657657657158e-01, 1.8983159915049979682e-01,
203*25c28e83SPiotr Jasiukajtis  5.7142857142857139685e-01, 1.8898223650461362655e-01,
204*25c28e83SPiotr Jasiukajtis  5.6637168141592919568e-01, 1.8814417367671945613e-01,
205*25c28e83SPiotr Jasiukajtis  5.6140350877192979340e-01, 1.8731716231633879777e-01,
206*25c28e83SPiotr Jasiukajtis  5.5652173913043478937e-01, 1.8650096164806276300e-01,
207*25c28e83SPiotr Jasiukajtis  5.5172413793103447510e-01, 1.8569533817705186074e-01,
208*25c28e83SPiotr Jasiukajtis  5.4700854700854706358e-01, 1.8490006540840969729e-01,
209*25c28e83SPiotr Jasiukajtis  5.4237288135593220151e-01, 1.8411492357966466327e-01,
210*25c28e83SPiotr Jasiukajtis  5.3781512605042014474e-01, 1.8333969940564226464e-01,
211*25c28e83SPiotr Jasiukajtis  5.3333333333333332593e-01, 1.8257418583505535814e-01,
212*25c28e83SPiotr Jasiukajtis  5.2892561983471075848e-01, 1.8181818181818182323e-01,
213*25c28e83SPiotr Jasiukajtis  5.2459016393442625681e-01, 1.8107149208503706128e-01,
214*25c28e83SPiotr Jasiukajtis  5.2032520325203257539e-01, 1.8033392693348646030e-01,
215*25c28e83SPiotr Jasiukajtis  5.1612903225806450180e-01, 1.7960530202677491007e-01,
216*25c28e83SPiotr Jasiukajtis  5.1200000000000001066e-01, 1.7888543819998317663e-01,
217*25c28e83SPiotr Jasiukajtis  5.0793650793650790831e-01, 1.7817416127494958844e-01,
218*25c28e83SPiotr Jasiukajtis  5.0393700787401574104e-01, 1.7747130188322274291e-01,
219*25c28e83SPiotr Jasiukajtis };
220*25c28e83SPiotr Jasiukajtis 
221*25c28e83SPiotr Jasiukajtis extern float fabsf(float);
222*25c28e83SPiotr Jasiukajtis 
223*25c28e83SPiotr Jasiukajtis static const double
224*25c28e83SPiotr Jasiukajtis 	A0 = 9.99999997962321453275e-01,
225*25c28e83SPiotr Jasiukajtis 	A1 =-4.99999998166077580600e-01,
226*25c28e83SPiotr Jasiukajtis 	A2 = 3.75066768969515586277e-01,
227*25c28e83SPiotr Jasiukajtis 	A3 =-3.12560092408808548438e-01;
228*25c28e83SPiotr Jasiukajtis 
229*25c28e83SPiotr Jasiukajtis static void
230*25c28e83SPiotr Jasiukajtis __vrhypotf_n(int n, float * restrict px, int stridex, float * restrict py,
231*25c28e83SPiotr Jasiukajtis 	int stridey, float * restrict pz, int stridez);
232*25c28e83SPiotr Jasiukajtis 
233*25c28e83SPiotr Jasiukajtis #pragma no_inline(__vrhypotf_n)
234*25c28e83SPiotr Jasiukajtis 
235*25c28e83SPiotr Jasiukajtis #define RETURN(ret)						\
236*25c28e83SPiotr Jasiukajtis {								\
237*25c28e83SPiotr Jasiukajtis 	*pz = (ret);						\
238*25c28e83SPiotr Jasiukajtis 	pz += stridez;						\
239*25c28e83SPiotr Jasiukajtis 	if (n_n == 0)						\
240*25c28e83SPiotr Jasiukajtis 	{							\
241*25c28e83SPiotr Jasiukajtis 		spx = px; spy = py; spz = pz;			\
242*25c28e83SPiotr Jasiukajtis 		ay0 = *(int*)py;				\
243*25c28e83SPiotr Jasiukajtis 		continue;					\
244*25c28e83SPiotr Jasiukajtis 	}							\
245*25c28e83SPiotr Jasiukajtis 	n--;							\
246*25c28e83SPiotr Jasiukajtis 	break;							\
247*25c28e83SPiotr Jasiukajtis }
248*25c28e83SPiotr Jasiukajtis 
249*25c28e83SPiotr Jasiukajtis 
250*25c28e83SPiotr Jasiukajtis void
251*25c28e83SPiotr Jasiukajtis __vrhypotf(int n, float * restrict px, int stridex, float * restrict py,
252*25c28e83SPiotr Jasiukajtis 	int stridey, float * restrict pz, int stridez)
253*25c28e83SPiotr Jasiukajtis {
254*25c28e83SPiotr Jasiukajtis 	float		*spx, *spy, *spz;
255*25c28e83SPiotr Jasiukajtis 	int		ax0, ay0, n_n;
256*25c28e83SPiotr Jasiukajtis 	float		res, x0, y0;
257*25c28e83SPiotr Jasiukajtis 
258*25c28e83SPiotr Jasiukajtis 	while (n > 1)
259*25c28e83SPiotr Jasiukajtis 	{
260*25c28e83SPiotr Jasiukajtis 		n_n = 0;
261*25c28e83SPiotr Jasiukajtis 		spx = px;
262*25c28e83SPiotr Jasiukajtis 		spy = py;
263*25c28e83SPiotr Jasiukajtis 		spz = pz;
264*25c28e83SPiotr Jasiukajtis 		ax0 = *(int*)px;
265*25c28e83SPiotr Jasiukajtis 		ay0 = *(int*)py;
266*25c28e83SPiotr Jasiukajtis 		for (; n > 1 ; n--)
267*25c28e83SPiotr Jasiukajtis 		{
268*25c28e83SPiotr Jasiukajtis 			ax0 &= 0x7fffffff;
269*25c28e83SPiotr Jasiukajtis 			ay0 &= 0x7fffffff;
270*25c28e83SPiotr Jasiukajtis 
271*25c28e83SPiotr Jasiukajtis 			px += stridex;
272*25c28e83SPiotr Jasiukajtis 
273*25c28e83SPiotr Jasiukajtis 			if (ax0 >= 0x7f800000 || ay0 >= 0x7f800000)	/* X or Y = NaN or Inf	*/
274*25c28e83SPiotr Jasiukajtis 			{
275*25c28e83SPiotr Jasiukajtis 				x0 = *(px - stridex);
276*25c28e83SPiotr Jasiukajtis 				y0 = *py;
277*25c28e83SPiotr Jasiukajtis 				res = fabsf(x0) + fabsf(y0);
278*25c28e83SPiotr Jasiukajtis 				if (ax0 == 0x7f800000) res = 0.0f;
279*25c28e83SPiotr Jasiukajtis 				else if (ay0 == 0x7f800000) res = 0.0f;
280*25c28e83SPiotr Jasiukajtis 				ax0 = *(int*)px;
281*25c28e83SPiotr Jasiukajtis 				py += stridey;
282*25c28e83SPiotr Jasiukajtis 				RETURN (res)
283*25c28e83SPiotr Jasiukajtis 			}
284*25c28e83SPiotr Jasiukajtis 			ax0 = *(int*)px;
285*25c28e83SPiotr Jasiukajtis 			py += stridey;
286*25c28e83SPiotr Jasiukajtis 			if (ay0 == 0)		/* Y = 0	*/
287*25c28e83SPiotr Jasiukajtis 			{
288*25c28e83SPiotr Jasiukajtis 				int tx = *(int*)(px - stridex) & 0x7fffffff;
289*25c28e83SPiotr Jasiukajtis 				if (tx == 0)	/* X = 0	*/
290*25c28e83SPiotr Jasiukajtis 				{
291*25c28e83SPiotr Jasiukajtis 					RETURN (1.0f / 0.0f)
292*25c28e83SPiotr Jasiukajtis 				}
293*25c28e83SPiotr Jasiukajtis 			}
294*25c28e83SPiotr Jasiukajtis 			pz += stridez;
295*25c28e83SPiotr Jasiukajtis 			n_n++;
296*25c28e83SPiotr Jasiukajtis 			ay0 = *(int*)py;
297*25c28e83SPiotr Jasiukajtis 		}
298*25c28e83SPiotr Jasiukajtis 		if (n_n > 0)
299*25c28e83SPiotr Jasiukajtis 			__vrhypotf_n(n_n, spx, stridex, spy, stridey, spz, stridez);
300*25c28e83SPiotr Jasiukajtis 	}
301*25c28e83SPiotr Jasiukajtis 	if (n > 0)
302*25c28e83SPiotr Jasiukajtis 	{
303*25c28e83SPiotr Jasiukajtis 		ax0 = *(int*)px;
304*25c28e83SPiotr Jasiukajtis 		ay0 = *(int*)py;
305*25c28e83SPiotr Jasiukajtis 		x0 = *px;
306*25c28e83SPiotr Jasiukajtis 		y0 = *py;
307*25c28e83SPiotr Jasiukajtis 
308*25c28e83SPiotr Jasiukajtis 		ax0 &= 0x7fffffff;
309*25c28e83SPiotr Jasiukajtis 		ay0 &= 0x7fffffff;
310*25c28e83SPiotr Jasiukajtis 
311*25c28e83SPiotr Jasiukajtis 		if (ax0 >= 0x7f800000 || ay0 >= 0x7f800000)	/* X or Y = NaN or Inf	*/
312*25c28e83SPiotr Jasiukajtis 		{
313*25c28e83SPiotr Jasiukajtis 			res = fabsf(x0) + fabsf(y0);
314*25c28e83SPiotr Jasiukajtis 			if (ax0 == 0x7f800000) res = 0.0f;
315*25c28e83SPiotr Jasiukajtis 			else if (ay0 == 0x7f800000) res = 0.0f;
316*25c28e83SPiotr Jasiukajtis 			*pz = res;
317*25c28e83SPiotr Jasiukajtis 		}
318*25c28e83SPiotr Jasiukajtis 		else if (ax0 == 0 && ay0 == 0)	/* X and Y = 0	*/
319*25c28e83SPiotr Jasiukajtis 		{
320*25c28e83SPiotr Jasiukajtis 			*pz = 1.0f / 0.0f;
321*25c28e83SPiotr Jasiukajtis 		}
322*25c28e83SPiotr Jasiukajtis 		else
323*25c28e83SPiotr Jasiukajtis 		{
324*25c28e83SPiotr Jasiukajtis 			double		xx0, res0, hyp0, h_hi0 = 0, dbase0 = 0;
325*25c28e83SPiotr Jasiukajtis 			int		ibase0, si0, hyp0h;
326*25c28e83SPiotr Jasiukajtis 
327*25c28e83SPiotr Jasiukajtis 			hyp0 = x0 * (double)x0 + y0 * (double)y0;
328*25c28e83SPiotr Jasiukajtis 
329*25c28e83SPiotr Jasiukajtis 			ibase0 = HI(&hyp0);
330*25c28e83SPiotr Jasiukajtis 
331*25c28e83SPiotr Jasiukajtis 			HI(&dbase0) = (0x60000000 - ((ibase0 & 0x7fe00000) >> 1));
332*25c28e83SPiotr Jasiukajtis 
333*25c28e83SPiotr Jasiukajtis 			hyp0h = (ibase0 & 0x000fffff) | 0x3ff00000;
334*25c28e83SPiotr Jasiukajtis 			HI(&hyp0) = hyp0h;
335*25c28e83SPiotr Jasiukajtis 			HI(&h_hi0) = hyp0h & 0x7fffc000;
336*25c28e83SPiotr Jasiukajtis 
337*25c28e83SPiotr Jasiukajtis 			ibase0 >>= 10;
338*25c28e83SPiotr Jasiukajtis 			si0 = ibase0 & 0x7f0;
339*25c28e83SPiotr Jasiukajtis 			xx0 = ((double*)((char*)__vlibm_TBL_rhypotf + si0))[0];
340*25c28e83SPiotr Jasiukajtis 
341*25c28e83SPiotr Jasiukajtis 			xx0 = (hyp0 - h_hi0) * xx0;
342*25c28e83SPiotr Jasiukajtis 			res0 = ((double*)((char*)__vlibm_TBL_rhypotf + si0))[1];
343*25c28e83SPiotr Jasiukajtis 			res0 *= (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0);
344*25c28e83SPiotr Jasiukajtis 			res0 *= dbase0;
345*25c28e83SPiotr Jasiukajtis 			*pz = res0;
346*25c28e83SPiotr Jasiukajtis 		}
347*25c28e83SPiotr Jasiukajtis 	}
348*25c28e83SPiotr Jasiukajtis }
349*25c28e83SPiotr Jasiukajtis 
350*25c28e83SPiotr Jasiukajtis static void
351*25c28e83SPiotr Jasiukajtis __vrhypotf_n(int n, float * restrict px, int stridex, float * restrict py,
352*25c28e83SPiotr Jasiukajtis 	int stridey, float * restrict pz, int stridez)
353*25c28e83SPiotr Jasiukajtis {
354*25c28e83SPiotr Jasiukajtis 	double		xx0, res0, hyp0, h_hi0 = 0, dbase0 = 0;
355*25c28e83SPiotr Jasiukajtis 	double		xx1, res1, hyp1, h_hi1 = 0, dbase1 = 0;
356*25c28e83SPiotr Jasiukajtis 	double		xx2, res2, hyp2, h_hi2 = 0, dbase2 = 0;
357*25c28e83SPiotr Jasiukajtis 	float		x0, y0;
358*25c28e83SPiotr Jasiukajtis 	float		x1, y1;
359*25c28e83SPiotr Jasiukajtis 	float		x2, y2;
360*25c28e83SPiotr Jasiukajtis 	int		ibase0, si0, hyp0h;
361*25c28e83SPiotr Jasiukajtis 	int		ibase1, si1, hyp1h;
362*25c28e83SPiotr Jasiukajtis 	int		ibase2, si2, hyp2h;
363*25c28e83SPiotr Jasiukajtis 
364*25c28e83SPiotr Jasiukajtis 	for (; n > 2 ; n -= 3)
365*25c28e83SPiotr Jasiukajtis 	{
366*25c28e83SPiotr Jasiukajtis 		x0 = *px;
367*25c28e83SPiotr Jasiukajtis 		px += stridex;
368*25c28e83SPiotr Jasiukajtis 		x1 = *px;
369*25c28e83SPiotr Jasiukajtis 		px += stridex;
370*25c28e83SPiotr Jasiukajtis 		x2 = *px;
371*25c28e83SPiotr Jasiukajtis 		px += stridex;
372*25c28e83SPiotr Jasiukajtis 
373*25c28e83SPiotr Jasiukajtis 		y0 = *py;
374*25c28e83SPiotr Jasiukajtis 		py += stridey;
375*25c28e83SPiotr Jasiukajtis 		y1 = *py;
376*25c28e83SPiotr Jasiukajtis 		py += stridey;
377*25c28e83SPiotr Jasiukajtis 		y2 = *py;
378*25c28e83SPiotr Jasiukajtis 		py += stridey;
379*25c28e83SPiotr Jasiukajtis 
380*25c28e83SPiotr Jasiukajtis 		hyp0 = x0 * (double)x0 + y0 * (double)y0;
381*25c28e83SPiotr Jasiukajtis 		hyp1 = x1 * (double)x1 + y1 * (double)y1;
382*25c28e83SPiotr Jasiukajtis 		hyp2 = x2 * (double)x2 + y2 * (double)y2;
383*25c28e83SPiotr Jasiukajtis 
384*25c28e83SPiotr Jasiukajtis 		ibase0 = HI(&hyp0);
385*25c28e83SPiotr Jasiukajtis 		ibase1 = HI(&hyp1);
386*25c28e83SPiotr Jasiukajtis 		ibase2 = HI(&hyp2);
387*25c28e83SPiotr Jasiukajtis 
388*25c28e83SPiotr Jasiukajtis 		HI(&dbase0) = (0x60000000 - ((ibase0 & 0x7fe00000) >> 1));
389*25c28e83SPiotr Jasiukajtis 		HI(&dbase1) = (0x60000000 - ((ibase1 & 0x7fe00000) >> 1));
390*25c28e83SPiotr Jasiukajtis 		HI(&dbase2) = (0x60000000 - ((ibase2 & 0x7fe00000) >> 1));
391*25c28e83SPiotr Jasiukajtis 
392*25c28e83SPiotr Jasiukajtis 		hyp0h = (ibase0 & 0x000fffff) | 0x3ff00000;
393*25c28e83SPiotr Jasiukajtis 		hyp1h = (ibase1 & 0x000fffff) | 0x3ff00000;
394*25c28e83SPiotr Jasiukajtis 		hyp2h = (ibase2 & 0x000fffff) | 0x3ff00000;
395*25c28e83SPiotr Jasiukajtis 		HI(&hyp0) = hyp0h;
396*25c28e83SPiotr Jasiukajtis 		HI(&hyp1) = hyp1h;
397*25c28e83SPiotr Jasiukajtis 		HI(&hyp2) = hyp2h;
398*25c28e83SPiotr Jasiukajtis 		HI(&h_hi0) = hyp0h & 0x7fffc000;
399*25c28e83SPiotr Jasiukajtis 		HI(&h_hi1) = hyp1h & 0x7fffc000;
400*25c28e83SPiotr Jasiukajtis 		HI(&h_hi2) = hyp2h & 0x7fffc000;
401*25c28e83SPiotr Jasiukajtis 
402*25c28e83SPiotr Jasiukajtis 		ibase0 >>= 10;
403*25c28e83SPiotr Jasiukajtis 		ibase1 >>= 10;
404*25c28e83SPiotr Jasiukajtis 		ibase2 >>= 10;
405*25c28e83SPiotr Jasiukajtis 		si0 = ibase0 & 0x7f0;
406*25c28e83SPiotr Jasiukajtis 		si1 = ibase1 & 0x7f0;
407*25c28e83SPiotr Jasiukajtis 		si2 = ibase2 & 0x7f0;
408*25c28e83SPiotr Jasiukajtis 		xx0 = ((double*)((char*)__vlibm_TBL_rhypotf + si0))[0];
409*25c28e83SPiotr Jasiukajtis 		xx1 = ((double*)((char*)__vlibm_TBL_rhypotf + si1))[0];
410*25c28e83SPiotr Jasiukajtis 		xx2 = ((double*)((char*)__vlibm_TBL_rhypotf + si2))[0];
411*25c28e83SPiotr Jasiukajtis 
412*25c28e83SPiotr Jasiukajtis 		xx0 = (hyp0 - h_hi0) * xx0;
413*25c28e83SPiotr Jasiukajtis 		xx1 = (hyp1 - h_hi1) * xx1;
414*25c28e83SPiotr Jasiukajtis 		xx2 = (hyp2 - h_hi2) * xx2;
415*25c28e83SPiotr Jasiukajtis 		res0 = ((double*)((char*)__vlibm_TBL_rhypotf + si0))[1];
416*25c28e83SPiotr Jasiukajtis 		res1 = ((double*)((char*)__vlibm_TBL_rhypotf + si1))[1];
417*25c28e83SPiotr Jasiukajtis 		res2 = ((double*)((char*)__vlibm_TBL_rhypotf + si2))[1];
418*25c28e83SPiotr Jasiukajtis 		res0 *= (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0);
419*25c28e83SPiotr Jasiukajtis 		res1 *= (((A3 * xx1 + A2) * xx1 + A1) * xx1 + A0);
420*25c28e83SPiotr Jasiukajtis 		res2 *= (((A3 * xx2 + A2) * xx2 + A1) * xx2 + A0);
421*25c28e83SPiotr Jasiukajtis 		res0 *= dbase0;
422*25c28e83SPiotr Jasiukajtis 		res1 *= dbase1;
423*25c28e83SPiotr Jasiukajtis 		res2 *= dbase2;
424*25c28e83SPiotr Jasiukajtis 		*pz = res0;
425*25c28e83SPiotr Jasiukajtis 		pz += stridez;
426*25c28e83SPiotr Jasiukajtis 		*pz = res1;
427*25c28e83SPiotr Jasiukajtis 		pz += stridez;
428*25c28e83SPiotr Jasiukajtis 		*pz = res2;
429*25c28e83SPiotr Jasiukajtis 		pz += stridez;
430*25c28e83SPiotr Jasiukajtis 	}
431*25c28e83SPiotr Jasiukajtis 
432*25c28e83SPiotr Jasiukajtis 	for (; n > 0 ; n--)
433*25c28e83SPiotr Jasiukajtis 	{
434*25c28e83SPiotr Jasiukajtis 		x0 = *px;
435*25c28e83SPiotr Jasiukajtis 		px += stridex;
436*25c28e83SPiotr Jasiukajtis 
437*25c28e83SPiotr Jasiukajtis 		y0 = *py;
438*25c28e83SPiotr Jasiukajtis 		py += stridey;
439*25c28e83SPiotr Jasiukajtis 
440*25c28e83SPiotr Jasiukajtis 		hyp0 = x0 * (double)x0 + y0 * (double)y0;
441*25c28e83SPiotr Jasiukajtis 
442*25c28e83SPiotr Jasiukajtis 		ibase0 = HI(&hyp0);
443*25c28e83SPiotr Jasiukajtis 
444*25c28e83SPiotr Jasiukajtis 		HI(&dbase0) = (0x60000000 - ((ibase0 & 0x7fe00000) >> 1));
445*25c28e83SPiotr Jasiukajtis 
446*25c28e83SPiotr Jasiukajtis 		hyp0h = (ibase0 & 0x000fffff) | 0x3ff00000;
447*25c28e83SPiotr Jasiukajtis 		HI(&hyp0) = hyp0h;
448*25c28e83SPiotr Jasiukajtis 		HI(&h_hi0) = hyp0h & 0x7fffc000;
449*25c28e83SPiotr Jasiukajtis 
450*25c28e83SPiotr Jasiukajtis 		ibase0 >>= 10;
451*25c28e83SPiotr Jasiukajtis 		si0 = ibase0 & 0x7f0;
452*25c28e83SPiotr Jasiukajtis 		xx0 = ((double*)((char*)__vlibm_TBL_rhypotf + si0))[0];
453*25c28e83SPiotr Jasiukajtis 
454*25c28e83SPiotr Jasiukajtis 		xx0 = (hyp0 - h_hi0) * xx0;
455*25c28e83SPiotr Jasiukajtis 		res0 = ((double*)((char*)__vlibm_TBL_rhypotf + si0))[1];
456*25c28e83SPiotr Jasiukajtis 		res0 *= (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0);
457*25c28e83SPiotr Jasiukajtis 		res0 *= dbase0;
458*25c28e83SPiotr Jasiukajtis 		*pz = res0;
459*25c28e83SPiotr Jasiukajtis 		pz += stridez;
460*25c28e83SPiotr Jasiukajtis 	}
461*25c28e83SPiotr Jasiukajtis }
462