xref: /freebsd/contrib/llvm-project/compiler-rt/lib/builtins/hexagon/dfsqrt.S (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric//===----------------------Hexagon builtin routine ------------------------===//
2*0b57cec5SDimitry Andric//
3*0b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric//
7*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric
9*0b57cec5SDimitry Andric// Double Precision square root
10*0b57cec5SDimitry Andric
11*0b57cec5SDimitry Andric#define EXP r28
12*0b57cec5SDimitry Andric
13*0b57cec5SDimitry Andric#define A r1:0
14*0b57cec5SDimitry Andric#define AH r1
15*0b57cec5SDimitry Andric#define AL r0
16*0b57cec5SDimitry Andric
17*0b57cec5SDimitry Andric#define SFSH r3:2
18*0b57cec5SDimitry Andric#define SF_S r3
19*0b57cec5SDimitry Andric#define SF_H r2
20*0b57cec5SDimitry Andric
21*0b57cec5SDimitry Andric#define SFHALF_SONE r5:4
22*0b57cec5SDimitry Andric#define S_ONE r4
23*0b57cec5SDimitry Andric#define SFHALF r5
24*0b57cec5SDimitry Andric#define SF_D r6
25*0b57cec5SDimitry Andric#define SF_E r7
26*0b57cec5SDimitry Andric#define RECIPEST r8
27*0b57cec5SDimitry Andric#define SFRAD r9
28*0b57cec5SDimitry Andric
29*0b57cec5SDimitry Andric#define FRACRAD r11:10
30*0b57cec5SDimitry Andric#define FRACRADH r11
31*0b57cec5SDimitry Andric#define FRACRADL r10
32*0b57cec5SDimitry Andric
33*0b57cec5SDimitry Andric#define ROOT r13:12
34*0b57cec5SDimitry Andric#define ROOTHI r13
35*0b57cec5SDimitry Andric#define ROOTLO r12
36*0b57cec5SDimitry Andric
37*0b57cec5SDimitry Andric#define PROD r15:14
38*0b57cec5SDimitry Andric#define PRODHI r15
39*0b57cec5SDimitry Andric#define PRODLO r14
40*0b57cec5SDimitry Andric
41*0b57cec5SDimitry Andric#define P_TMP p0
42*0b57cec5SDimitry Andric#define P_EXP1 p1
43*0b57cec5SDimitry Andric#define NORMAL p2
44*0b57cec5SDimitry Andric
45*0b57cec5SDimitry Andric#define SF_EXPBITS 8
46*0b57cec5SDimitry Andric#define SF_MANTBITS 23
47*0b57cec5SDimitry Andric
48*0b57cec5SDimitry Andric#define DF_EXPBITS 11
49*0b57cec5SDimitry Andric#define DF_MANTBITS 52
50*0b57cec5SDimitry Andric
51*0b57cec5SDimitry Andric#define DF_BIAS 0x3ff
52*0b57cec5SDimitry Andric
53*0b57cec5SDimitry Andric#define DFCLASS_ZERO     0x01
54*0b57cec5SDimitry Andric#define DFCLASS_NORMAL   0x02
55*0b57cec5SDimitry Andric#define DFCLASS_DENORMAL 0x02
56*0b57cec5SDimitry Andric#define DFCLASS_INFINITE 0x08
57*0b57cec5SDimitry Andric#define DFCLASS_NAN      0x10
58*0b57cec5SDimitry Andric
59*0b57cec5SDimitry Andric#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG; .type __qdsp_##TAG,@function
60*0b57cec5SDimitry Andric#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG; .type __hexagon_fast_##TAG,@function
61*0b57cec5SDimitry Andric#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG; .type __hexagon_fast2_##TAG,@function
62*0b57cec5SDimitry Andric#define END(TAG) .size TAG,.-TAG
63*0b57cec5SDimitry Andric
64*0b57cec5SDimitry Andric	.text
65*0b57cec5SDimitry Andric	.global __hexagon_sqrtdf2
66*0b57cec5SDimitry Andric	.type __hexagon_sqrtdf2,@function
67*0b57cec5SDimitry Andric	.global __hexagon_sqrt
68*0b57cec5SDimitry Andric	.type __hexagon_sqrt,@function
69*0b57cec5SDimitry Andric	Q6_ALIAS(sqrtdf2)
70*0b57cec5SDimitry Andric	Q6_ALIAS(sqrt)
71*0b57cec5SDimitry Andric	FAST_ALIAS(sqrtdf2)
72*0b57cec5SDimitry Andric	FAST_ALIAS(sqrt)
73*0b57cec5SDimitry Andric	FAST2_ALIAS(sqrtdf2)
74*0b57cec5SDimitry Andric	FAST2_ALIAS(sqrt)
75*0b57cec5SDimitry Andric	.type sqrt,@function
76*0b57cec5SDimitry Andric	.p2align 5
77*0b57cec5SDimitry Andric__hexagon_sqrtdf2:
78*0b57cec5SDimitry Andric__hexagon_sqrt:
79*0b57cec5SDimitry Andric	{
80*0b57cec5SDimitry Andric		PROD = extractu(A,#SF_MANTBITS+1,#DF_MANTBITS-SF_MANTBITS)
81*0b57cec5SDimitry Andric		EXP = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
82*0b57cec5SDimitry Andric		SFHALF_SONE = combine(##0x3f000004,#1)
83*0b57cec5SDimitry Andric	}
84*0b57cec5SDimitry Andric	{
85*0b57cec5SDimitry Andric		NORMAL = dfclass(A,#DFCLASS_NORMAL)		// Is it normal
86*0b57cec5SDimitry Andric		NORMAL = cmp.gt(AH,#-1)				// and positive?
87*0b57cec5SDimitry Andric		if (!NORMAL.new) jump:nt .Lsqrt_abnormal
88*0b57cec5SDimitry Andric		SFRAD = or(SFHALF,PRODLO)
89*0b57cec5SDimitry Andric	}
90*0b57cec5SDimitry Andric#undef NORMAL
91*0b57cec5SDimitry Andric.Ldenormal_restart:
92*0b57cec5SDimitry Andric	{
93*0b57cec5SDimitry Andric		FRACRAD = A
94*0b57cec5SDimitry Andric		SF_E,P_TMP = sfinvsqrta(SFRAD)
95*0b57cec5SDimitry Andric		SFHALF = and(SFHALF,#-16)
96*0b57cec5SDimitry Andric		SFSH = #0
97*0b57cec5SDimitry Andric	}
98*0b57cec5SDimitry Andric#undef A
99*0b57cec5SDimitry Andric#undef AH
100*0b57cec5SDimitry Andric#undef AL
101*0b57cec5SDimitry Andric#define ERROR r1:0
102*0b57cec5SDimitry Andric#define ERRORHI r1
103*0b57cec5SDimitry Andric#define ERRORLO r0
104*0b57cec5SDimitry Andric	// SF_E : reciprocal square root
105*0b57cec5SDimitry Andric	// SF_H : half rsqrt
106*0b57cec5SDimitry Andric	// sf_S : square root
107*0b57cec5SDimitry Andric	// SF_D : error term
108*0b57cec5SDimitry Andric	// SFHALF: 0.5
109*0b57cec5SDimitry Andric	{
110*0b57cec5SDimitry Andric		SF_S += sfmpy(SF_E,SFRAD):lib		// s0: root
111*0b57cec5SDimitry Andric		SF_H += sfmpy(SF_E,SFHALF):lib		// h0: 0.5*y0. Could also decrement exponent...
112*0b57cec5SDimitry Andric		SF_D = SFHALF
113*0b57cec5SDimitry Andric#undef SFRAD
114*0b57cec5SDimitry Andric#define SHIFTAMT r9
115*0b57cec5SDimitry Andric		SHIFTAMT = and(EXP,#1)
116*0b57cec5SDimitry Andric	}
117*0b57cec5SDimitry Andric	{
118*0b57cec5SDimitry Andric		SF_D -= sfmpy(SF_S,SF_H):lib		// d0: 0.5-H*S = 0.5-0.5*~1
119*0b57cec5SDimitry Andric		FRACRADH = insert(S_ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)	// replace upper bits with hidden
120*0b57cec5SDimitry Andric		P_EXP1 = cmp.gtu(SHIFTAMT,#0)
121*0b57cec5SDimitry Andric	}
122*0b57cec5SDimitry Andric	{
123*0b57cec5SDimitry Andric		SF_S += sfmpy(SF_S,SF_D):lib		// s1: refine sqrt
124*0b57cec5SDimitry Andric		SF_H += sfmpy(SF_H,SF_D):lib		// h1: refine half-recip
125*0b57cec5SDimitry Andric		SF_D = SFHALF
126*0b57cec5SDimitry Andric		SHIFTAMT = mux(P_EXP1,#8,#9)
127*0b57cec5SDimitry Andric	}
128*0b57cec5SDimitry Andric	{
129*0b57cec5SDimitry Andric		SF_D -= sfmpy(SF_S,SF_H):lib		// d1: error term
130*0b57cec5SDimitry Andric		FRACRAD = asl(FRACRAD,SHIFTAMT)		// Move fracrad bits to right place
131*0b57cec5SDimitry Andric		SHIFTAMT = mux(P_EXP1,#3,#2)
132*0b57cec5SDimitry Andric	}
133*0b57cec5SDimitry Andric	{
134*0b57cec5SDimitry Andric		SF_H += sfmpy(SF_H,SF_D):lib		// d2: rsqrt
135*0b57cec5SDimitry Andric		// cool trick: half of 1/sqrt(x) has same mantissa as 1/sqrt(x).
136*0b57cec5SDimitry Andric		PROD = asl(FRACRAD,SHIFTAMT)		// fracrad<<(2+exp1)
137*0b57cec5SDimitry Andric	}
138*0b57cec5SDimitry Andric	{
139*0b57cec5SDimitry Andric		SF_H = and(SF_H,##0x007fffff)
140*0b57cec5SDimitry Andric	}
141*0b57cec5SDimitry Andric	{
142*0b57cec5SDimitry Andric		SF_H = add(SF_H,##0x00800000 - 3)
143*0b57cec5SDimitry Andric		SHIFTAMT = mux(P_EXP1,#7,#8)
144*0b57cec5SDimitry Andric	}
145*0b57cec5SDimitry Andric	{
146*0b57cec5SDimitry Andric		RECIPEST = asl(SF_H,SHIFTAMT)
147*0b57cec5SDimitry Andric		SHIFTAMT = mux(P_EXP1,#15-(1+1),#15-(1+0))
148*0b57cec5SDimitry Andric	}
149*0b57cec5SDimitry Andric	{
150*0b57cec5SDimitry Andric		ROOT = mpyu(RECIPEST,PRODHI)		// root = mpyu_full(recipest,hi(fracrad<<(2+exp1)))
151*0b57cec5SDimitry Andric	}
152*0b57cec5SDimitry Andric
153*0b57cec5SDimitry Andric#undef SFSH	// r3:2
154*0b57cec5SDimitry Andric#undef SF_H	// r2
155*0b57cec5SDimitry Andric#undef SF_S	// r3
156*0b57cec5SDimitry Andric#undef S_ONE	// r4
157*0b57cec5SDimitry Andric#undef SFHALF	// r5
158*0b57cec5SDimitry Andric#undef SFHALF_SONE	// r5:4
159*0b57cec5SDimitry Andric#undef SF_D	// r6
160*0b57cec5SDimitry Andric#undef SF_E	// r7
161*0b57cec5SDimitry Andric
162*0b57cec5SDimitry Andric#define HL r3:2
163*0b57cec5SDimitry Andric#define LL r5:4
164*0b57cec5SDimitry Andric#define HH r7:6
165*0b57cec5SDimitry Andric
166*0b57cec5SDimitry Andric#undef P_EXP1
167*0b57cec5SDimitry Andric#define P_CARRY0 p1
168*0b57cec5SDimitry Andric#define P_CARRY1 p2
169*0b57cec5SDimitry Andric#define P_CARRY2 p3
170*0b57cec5SDimitry Andric
171*0b57cec5SDimitry Andric	// Iteration 0
172*0b57cec5SDimitry Andric	// Maybe we can save a cycle by starting with ERROR=asl(fracrad), then as we multiply
173*0b57cec5SDimitry Andric	// We can shift and subtract instead of shift and add?
174*0b57cec5SDimitry Andric	{
175*0b57cec5SDimitry Andric		ERROR = asl(FRACRAD,#15)
176*0b57cec5SDimitry Andric		PROD = mpyu(ROOTHI,ROOTHI)
177*0b57cec5SDimitry Andric		P_CARRY0 = cmp.eq(r0,r0)
178*0b57cec5SDimitry Andric	}
179*0b57cec5SDimitry Andric	{
180*0b57cec5SDimitry Andric		ERROR -= asl(PROD,#15)
181*0b57cec5SDimitry Andric		PROD = mpyu(ROOTHI,ROOTLO)
182*0b57cec5SDimitry Andric		P_CARRY1 = cmp.eq(r0,r0)
183*0b57cec5SDimitry Andric	}
184*0b57cec5SDimitry Andric	{
185*0b57cec5SDimitry Andric		ERROR -= lsr(PROD,#16)
186*0b57cec5SDimitry Andric		P_CARRY2 = cmp.eq(r0,r0)
187*0b57cec5SDimitry Andric	}
188*0b57cec5SDimitry Andric	{
189*0b57cec5SDimitry Andric		ERROR = mpyu(ERRORHI,RECIPEST)
190*0b57cec5SDimitry Andric	}
191*0b57cec5SDimitry Andric	{
192*0b57cec5SDimitry Andric		ROOT += lsr(ERROR,SHIFTAMT)
193*0b57cec5SDimitry Andric		SHIFTAMT = add(SHIFTAMT,#16)
194*0b57cec5SDimitry Andric		ERROR = asl(FRACRAD,#31)		// for next iter
195*0b57cec5SDimitry Andric	}
196*0b57cec5SDimitry Andric	// Iteration 1
197*0b57cec5SDimitry Andric	{
198*0b57cec5SDimitry Andric		PROD = mpyu(ROOTHI,ROOTHI)
199*0b57cec5SDimitry Andric		ERROR -= mpyu(ROOTHI,ROOTLO)	// amount is 31, no shift needed
200*0b57cec5SDimitry Andric	}
201*0b57cec5SDimitry Andric	{
202*0b57cec5SDimitry Andric		ERROR -= asl(PROD,#31)
203*0b57cec5SDimitry Andric		PROD = mpyu(ROOTLO,ROOTLO)
204*0b57cec5SDimitry Andric	}
205*0b57cec5SDimitry Andric	{
206*0b57cec5SDimitry Andric		ERROR -= lsr(PROD,#33)
207*0b57cec5SDimitry Andric	}
208*0b57cec5SDimitry Andric	{
209*0b57cec5SDimitry Andric		ERROR = mpyu(ERRORHI,RECIPEST)
210*0b57cec5SDimitry Andric	}
211*0b57cec5SDimitry Andric	{
212*0b57cec5SDimitry Andric		ROOT += lsr(ERROR,SHIFTAMT)
213*0b57cec5SDimitry Andric		SHIFTAMT = add(SHIFTAMT,#16)
214*0b57cec5SDimitry Andric		ERROR = asl(FRACRAD,#47)	// for next iter
215*0b57cec5SDimitry Andric	}
216*0b57cec5SDimitry Andric	// Iteration 2
217*0b57cec5SDimitry Andric	{
218*0b57cec5SDimitry Andric		PROD = mpyu(ROOTHI,ROOTHI)
219*0b57cec5SDimitry Andric	}
220*0b57cec5SDimitry Andric	{
221*0b57cec5SDimitry Andric		ERROR -= asl(PROD,#47)
222*0b57cec5SDimitry Andric		PROD = mpyu(ROOTHI,ROOTLO)
223*0b57cec5SDimitry Andric	}
224*0b57cec5SDimitry Andric	{
225*0b57cec5SDimitry Andric		ERROR -= asl(PROD,#16)		// bidir shr 31-47
226*0b57cec5SDimitry Andric		PROD = mpyu(ROOTLO,ROOTLO)
227*0b57cec5SDimitry Andric	}
228*0b57cec5SDimitry Andric	{
229*0b57cec5SDimitry Andric		ERROR -= lsr(PROD,#17)		// 64-47
230*0b57cec5SDimitry Andric	}
231*0b57cec5SDimitry Andric	{
232*0b57cec5SDimitry Andric		ERROR = mpyu(ERRORHI,RECIPEST)
233*0b57cec5SDimitry Andric	}
234*0b57cec5SDimitry Andric	{
235*0b57cec5SDimitry Andric		ROOT += lsr(ERROR,SHIFTAMT)
236*0b57cec5SDimitry Andric	}
237*0b57cec5SDimitry Andric#undef ERROR
238*0b57cec5SDimitry Andric#undef PROD
239*0b57cec5SDimitry Andric#undef PRODHI
240*0b57cec5SDimitry Andric#undef PRODLO
241*0b57cec5SDimitry Andric#define REM_HI r15:14
242*0b57cec5SDimitry Andric#define REM_HI_HI r15
243*0b57cec5SDimitry Andric#define REM_LO r1:0
244*0b57cec5SDimitry Andric#undef RECIPEST
245*0b57cec5SDimitry Andric#undef SHIFTAMT
246*0b57cec5SDimitry Andric#define TWOROOT_LO r9:8
247*0b57cec5SDimitry Andric	// Adjust Root
248*0b57cec5SDimitry Andric	{
249*0b57cec5SDimitry Andric		HL = mpyu(ROOTHI,ROOTLO)
250*0b57cec5SDimitry Andric		LL = mpyu(ROOTLO,ROOTLO)
251*0b57cec5SDimitry Andric		REM_HI = #0
252*0b57cec5SDimitry Andric		REM_LO = #0
253*0b57cec5SDimitry Andric	}
254*0b57cec5SDimitry Andric	{
255*0b57cec5SDimitry Andric		HL += lsr(LL,#33)
256*0b57cec5SDimitry Andric		LL += asl(HL,#33)
257*0b57cec5SDimitry Andric		P_CARRY0 = cmp.eq(r0,r0)
258*0b57cec5SDimitry Andric	}
259*0b57cec5SDimitry Andric	{
260*0b57cec5SDimitry Andric		HH = mpyu(ROOTHI,ROOTHI)
261*0b57cec5SDimitry Andric		REM_LO = sub(REM_LO,LL,P_CARRY0):carry
262*0b57cec5SDimitry Andric		TWOROOT_LO = #1
263*0b57cec5SDimitry Andric	}
264*0b57cec5SDimitry Andric	{
265*0b57cec5SDimitry Andric		HH += lsr(HL,#31)
266*0b57cec5SDimitry Andric		TWOROOT_LO += asl(ROOT,#1)
267*0b57cec5SDimitry Andric	}
268*0b57cec5SDimitry Andric#undef HL
269*0b57cec5SDimitry Andric#undef LL
270*0b57cec5SDimitry Andric#define REM_HI_TMP r3:2
271*0b57cec5SDimitry Andric#define REM_HI_TMP_HI r3
272*0b57cec5SDimitry Andric#define REM_LO_TMP r5:4
273*0b57cec5SDimitry Andric	{
274*0b57cec5SDimitry Andric		REM_HI = sub(FRACRAD,HH,P_CARRY0):carry
275*0b57cec5SDimitry Andric		REM_LO_TMP = sub(REM_LO,TWOROOT_LO,P_CARRY1):carry
276*0b57cec5SDimitry Andric#undef FRACRAD
277*0b57cec5SDimitry Andric#undef HH
278*0b57cec5SDimitry Andric#define ZERO r11:10
279*0b57cec5SDimitry Andric#define ONE r7:6
280*0b57cec5SDimitry Andric		ONE = #1
281*0b57cec5SDimitry Andric		ZERO = #0
282*0b57cec5SDimitry Andric	}
283*0b57cec5SDimitry Andric	{
284*0b57cec5SDimitry Andric		REM_HI_TMP = sub(REM_HI,ZERO,P_CARRY1):carry
285*0b57cec5SDimitry Andric		ONE = add(ROOT,ONE)
286*0b57cec5SDimitry Andric		EXP = add(EXP,#-DF_BIAS)			// subtract bias --> signed exp
287*0b57cec5SDimitry Andric	}
288*0b57cec5SDimitry Andric	{
289*0b57cec5SDimitry Andric				// If carry set, no borrow: result was still positive
290*0b57cec5SDimitry Andric		if (P_CARRY1) ROOT = ONE
291*0b57cec5SDimitry Andric		if (P_CARRY1) REM_LO = REM_LO_TMP
292*0b57cec5SDimitry Andric		if (P_CARRY1) REM_HI = REM_HI_TMP
293*0b57cec5SDimitry Andric	}
294*0b57cec5SDimitry Andric	{
295*0b57cec5SDimitry Andric		REM_LO_TMP = sub(REM_LO,TWOROOT_LO,P_CARRY2):carry
296*0b57cec5SDimitry Andric		ONE = #1
297*0b57cec5SDimitry Andric		EXP = asr(EXP,#1)				// divide signed exp by 2
298*0b57cec5SDimitry Andric	}
299*0b57cec5SDimitry Andric	{
300*0b57cec5SDimitry Andric		REM_HI_TMP = sub(REM_HI,ZERO,P_CARRY2):carry
301*0b57cec5SDimitry Andric		ONE = add(ROOT,ONE)
302*0b57cec5SDimitry Andric	}
303*0b57cec5SDimitry Andric	{
304*0b57cec5SDimitry Andric		if (P_CARRY2) ROOT = ONE
305*0b57cec5SDimitry Andric		if (P_CARRY2) REM_LO = REM_LO_TMP
306*0b57cec5SDimitry Andric								// since tworoot <= 2^32, remhi must be zero
307*0b57cec5SDimitry Andric#undef REM_HI_TMP
308*0b57cec5SDimitry Andric#undef REM_HI_TMP_HI
309*0b57cec5SDimitry Andric#define S_ONE r2
310*0b57cec5SDimitry Andric#define ADJ r3
311*0b57cec5SDimitry Andric		S_ONE = #1
312*0b57cec5SDimitry Andric	}
313*0b57cec5SDimitry Andric	{
314*0b57cec5SDimitry Andric		P_TMP = cmp.eq(REM_LO,ZERO)			// is the low part zero
315*0b57cec5SDimitry Andric		if (!P_TMP.new) ROOTLO = or(ROOTLO,S_ONE)	// if so, it's exact... hopefully
316*0b57cec5SDimitry Andric		ADJ = cl0(ROOT)
317*0b57cec5SDimitry Andric		EXP = add(EXP,#-63)
318*0b57cec5SDimitry Andric	}
319*0b57cec5SDimitry Andric#undef REM_LO
320*0b57cec5SDimitry Andric#define RET r1:0
321*0b57cec5SDimitry Andric#define RETHI r1
322*0b57cec5SDimitry Andric	{
323*0b57cec5SDimitry Andric		RET = convert_ud2df(ROOT)			// set up mantissa, maybe set inexact flag
324*0b57cec5SDimitry Andric		EXP = add(EXP,ADJ)				// add back bias
325*0b57cec5SDimitry Andric	}
326*0b57cec5SDimitry Andric	{
327*0b57cec5SDimitry Andric		RETHI += asl(EXP,#DF_MANTBITS-32)		// add exponent adjust
328*0b57cec5SDimitry Andric		jumpr r31
329*0b57cec5SDimitry Andric	}
330*0b57cec5SDimitry Andric#undef REM_LO_TMP
331*0b57cec5SDimitry Andric#undef REM_HI_TMP
332*0b57cec5SDimitry Andric#undef REM_HI_TMP_HI
333*0b57cec5SDimitry Andric#undef REM_LO
334*0b57cec5SDimitry Andric#undef REM_HI
335*0b57cec5SDimitry Andric#undef TWOROOT_LO
336*0b57cec5SDimitry Andric
337*0b57cec5SDimitry Andric#undef RET
338*0b57cec5SDimitry Andric#define A r1:0
339*0b57cec5SDimitry Andric#define AH r1
340*0b57cec5SDimitry Andric#define AL r1
341*0b57cec5SDimitry Andric#undef S_ONE
342*0b57cec5SDimitry Andric#define TMP r3:2
343*0b57cec5SDimitry Andric#define TMPHI r3
344*0b57cec5SDimitry Andric#define TMPLO r2
345*0b57cec5SDimitry Andric#undef P_CARRY0
346*0b57cec5SDimitry Andric#define P_NEG p1
347*0b57cec5SDimitry Andric
348*0b57cec5SDimitry Andric
349*0b57cec5SDimitry Andric#define SFHALF r5
350*0b57cec5SDimitry Andric#define SFRAD r9
351*0b57cec5SDimitry Andric.Lsqrt_abnormal:
352*0b57cec5SDimitry Andric	{
353*0b57cec5SDimitry Andric		P_TMP = dfclass(A,#DFCLASS_ZERO)			// zero?
354*0b57cec5SDimitry Andric		if (P_TMP.new) jumpr:t r31
355*0b57cec5SDimitry Andric	}
356*0b57cec5SDimitry Andric	{
357*0b57cec5SDimitry Andric		P_TMP = dfclass(A,#DFCLASS_NAN)
358*0b57cec5SDimitry Andric		if (P_TMP.new) jump:nt .Lsqrt_nan
359*0b57cec5SDimitry Andric	}
360*0b57cec5SDimitry Andric	{
361*0b57cec5SDimitry Andric		P_TMP = cmp.gt(AH,#-1)
362*0b57cec5SDimitry Andric		if (!P_TMP.new) jump:nt .Lsqrt_invalid_neg
363*0b57cec5SDimitry Andric		if (!P_TMP.new) EXP = ##0x7F800001			// sNaN
364*0b57cec5SDimitry Andric	}
365*0b57cec5SDimitry Andric	{
366*0b57cec5SDimitry Andric		P_TMP = dfclass(A,#DFCLASS_INFINITE)
367*0b57cec5SDimitry Andric		if (P_TMP.new) jumpr:nt r31
368*0b57cec5SDimitry Andric	}
369*0b57cec5SDimitry Andric	// If we got here, we're denormal
370*0b57cec5SDimitry Andric	// prepare to restart
371*0b57cec5SDimitry Andric	{
372*0b57cec5SDimitry Andric		A = extractu(A,#DF_MANTBITS,#0)		// Extract mantissa
373*0b57cec5SDimitry Andric	}
374*0b57cec5SDimitry Andric	{
375*0b57cec5SDimitry Andric		EXP = add(clb(A),#-DF_EXPBITS)		// how much to normalize?
376*0b57cec5SDimitry Andric	}
377*0b57cec5SDimitry Andric	{
378*0b57cec5SDimitry Andric		A = asl(A,EXP)				// Shift mantissa
379*0b57cec5SDimitry Andric		EXP = sub(#1,EXP)			// Form exponent
380*0b57cec5SDimitry Andric	}
381*0b57cec5SDimitry Andric	{
382*0b57cec5SDimitry Andric		AH = insert(EXP,#1,#DF_MANTBITS-32)		// insert lsb of exponent
383*0b57cec5SDimitry Andric	}
384*0b57cec5SDimitry Andric	{
385*0b57cec5SDimitry Andric		TMP = extractu(A,#SF_MANTBITS+1,#DF_MANTBITS-SF_MANTBITS)	// get sf value (mant+exp1)
386*0b57cec5SDimitry Andric		SFHALF = ##0x3f000004						// form half constant
387*0b57cec5SDimitry Andric	}
388*0b57cec5SDimitry Andric	{
389*0b57cec5SDimitry Andric		SFRAD = or(SFHALF,TMPLO)			// form sf value
390*0b57cec5SDimitry Andric		SFHALF = and(SFHALF,#-16)
391*0b57cec5SDimitry Andric		jump .Ldenormal_restart				// restart
392*0b57cec5SDimitry Andric	}
393*0b57cec5SDimitry Andric.Lsqrt_nan:
394*0b57cec5SDimitry Andric	{
395*0b57cec5SDimitry Andric		EXP = convert_df2sf(A)				// if sNaN, get invalid
396*0b57cec5SDimitry Andric		A = #-1						// qNaN
397*0b57cec5SDimitry Andric		jumpr r31
398*0b57cec5SDimitry Andric	}
399*0b57cec5SDimitry Andric.Lsqrt_invalid_neg:
400*0b57cec5SDimitry Andric	{
401*0b57cec5SDimitry Andric		A = convert_sf2df(EXP)				// Invalid,NaNval
402*0b57cec5SDimitry Andric		jumpr r31
403*0b57cec5SDimitry Andric	}
404*0b57cec5SDimitry AndricEND(__hexagon_sqrt)
405*0b57cec5SDimitry AndricEND(__hexagon_sqrtdf2)
406