xref: /freebsd/contrib/llvm-project/compiler-rt/lib/builtins/hexagon/dfaddsub.S (revision 6132212808e8dccedc9e5d85fea4390c2f38059a)
1//===----------------------Hexagon builtin routine ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9// Double Precision Multiply
10
11#define A r1:0
12#define AH r1
13#define AL r0
14#define B r3:2
15#define BH r3
16#define BL r2
17
18#define EXPA r4
19#define EXPB r5
20#define EXPB_A r5:4
21
22#define ZTMP r7:6
23#define ZTMPH r7
24#define ZTMPL r6
25
26#define ATMP r13:12
27#define ATMPH r13
28#define ATMPL r12
29
30#define BTMP r9:8
31#define BTMPH r9
32#define BTMPL r8
33
34#define ATMP2 r11:10
35#define ATMP2H r11
36#define ATMP2L r10
37
38#define EXPDIFF r15
39#define EXTRACTOFF r14
40#define EXTRACTAMT r15:14
41
42#define TMP r28
43
44#define MANTBITS 52
45#define HI_MANTBITS 20
46#define EXPBITS 11
47#define BIAS 1024
48#define MANTISSA_TO_INT_BIAS 52
49#define SR_BIT_INEXACT 5
50
51#ifndef SR_ROUND_OFF
52#define SR_ROUND_OFF 22
53#endif
54
55#define NORMAL p3
56#define BIGB p2
57
58#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
59#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
60#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
61#define END(TAG) .size TAG,.-TAG
62
63	.text
64	.global __hexagon_adddf3
65	.global __hexagon_subdf3
66	.type __hexagon_adddf3, @function
67	.type __hexagon_subdf3, @function
68
69Q6_ALIAS(adddf3)
70FAST_ALIAS(adddf3)
71FAST2_ALIAS(adddf3)
72Q6_ALIAS(subdf3)
73FAST_ALIAS(subdf3)
74FAST2_ALIAS(subdf3)
75
76	.p2align 5
77__hexagon_adddf3:
78	{
79		EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
80		EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
81		ATMP = combine(##0x20000000,#0)
82	}
83	{
84		NORMAL = dfclass(A,#2)
85		NORMAL = dfclass(B,#2)
86		BTMP = ATMP
87		BIGB = cmp.gtu(EXPB,EXPA)			// Is B substantially greater than A?
88	}
89	{
90		if (!NORMAL) jump .Ladd_abnormal		// If abnormal, go to special code
91		if (BIGB) A = B				// if B >> A, swap A and B
92		if (BIGB) B = A				// If B >> A, swap A and B
93		if (BIGB) EXPB_A = combine(EXPA,EXPB)	// swap exponents
94	}
95	{
96		ATMP = insert(A,#MANTBITS,#EXPBITS-2)	// Q1.62
97		BTMP = insert(B,#MANTBITS,#EXPBITS-2)	// Q1.62
98		EXPDIFF = sub(EXPA,EXPB)
99		ZTMP = combine(#62,#1)
100	}
101#undef BIGB
102#undef NORMAL
103#define B_POS p3
104#define A_POS p2
105#define NO_STICKIES p1
106.Ladd_continue:
107	{
108		EXPDIFF = min(EXPDIFF,ZTMPH)		// If exponent difference >= ~60,
109							// will collapse to sticky bit
110		ATMP2 = neg(ATMP)
111		A_POS = cmp.gt(AH,#-1)
112		EXTRACTOFF = #0
113	}
114	{
115		if (!A_POS) ATMP = ATMP2
116		ATMP2 = extractu(BTMP,EXTRACTAMT)
117		BTMP = ASR(BTMP,EXPDIFF)
118#undef EXTRACTAMT
119#undef EXPDIFF
120#undef EXTRACTOFF
121#define ZERO r15:14
122		ZERO = #0
123	}
124	{
125		NO_STICKIES = cmp.eq(ATMP2,ZERO)
126		if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL)
127		EXPB = add(EXPA,#-BIAS-60)
128		B_POS = cmp.gt(BH,#-1)
129	}
130	{
131		ATMP = add(ATMP,BTMP)			// ADD!!!
132		ATMP2 = sub(ATMP,BTMP)			// Negate and ADD --> SUB!!!
133		ZTMP = combine(#54,##2045)
134	}
135	{
136		p0 = cmp.gtu(EXPA,ZTMPH)		// must be pretty high in case of large cancellation
137		p0 = !cmp.gtu(EXPA,ZTMPL)
138		if (!p0.new) jump:nt .Ladd_ovf_unf
139		if (!B_POS) ATMP = ATMP2		// if B neg, pick difference
140	}
141	{
142		A = convert_d2df(ATMP)			// Convert to Double Precision, taking care of flags, etc.  So nice!
143		p0 = cmp.eq(ATMPH,#0)
144		p0 = cmp.eq(ATMPL,#0)
145		if (p0.new) jump:nt .Ladd_zero		// or maybe conversion handles zero case correctly?
146	}
147	{
148		AH += asl(EXPB,#HI_MANTBITS)
149		jumpr r31
150	}
151	.falign
152__hexagon_subdf3:
153	{
154		BH = togglebit(BH,#31)
155		jump __qdsp_adddf3
156	}
157
158
159	.falign
160.Ladd_zero:
161	// True zero, full cancellation
162	// +0 unless round towards negative infinity
163	{
164		TMP = USR
165		A = #0
166		BH = #1
167	}
168	{
169		TMP = extractu(TMP,#2,#22)
170		BH = asl(BH,#31)
171	}
172	{
173		p0 = cmp.eq(TMP,#2)
174		if (p0.new) AH = xor(AH,BH)
175		jumpr r31
176	}
177	.falign
178.Ladd_ovf_unf:
179	// Overflow or Denormal is possible
180	// Good news: Underflow flag is not possible!
181
182	// ATMP has 2's complement value
183	//
184	// EXPA has A's exponent, EXPB has EXPA-BIAS-60
185	//
186	// Convert, extract exponent, add adjustment.
187	// If > 2046, overflow
188	// If <= 0, denormal
189	//
190	// Note that we've not done our zero check yet, so do that too
191
192	{
193		A = convert_d2df(ATMP)
194		p0 = cmp.eq(ATMPH,#0)
195		p0 = cmp.eq(ATMPL,#0)
196		if (p0.new) jump:nt .Ladd_zero
197	}
198	{
199		TMP = extractu(AH,#EXPBITS,#HI_MANTBITS)
200		AH += asl(EXPB,#HI_MANTBITS)
201	}
202	{
203		EXPB = add(EXPB,TMP)
204		B = combine(##0x00100000,#0)
205	}
206	{
207		p0 = cmp.gt(EXPB,##BIAS+BIAS-2)
208		if (p0.new) jump:nt .Ladd_ovf
209	}
210	{
211		p0 = cmp.gt(EXPB,#0)
212		if (p0.new) jumpr:t r31
213		TMP = sub(#1,EXPB)
214	}
215	{
216		B = insert(A,#MANTBITS,#0)
217		A = ATMP
218	}
219	{
220		B = lsr(B,TMP)
221	}
222	{
223		A = insert(B,#63,#0)
224		jumpr r31
225	}
226	.falign
227.Ladd_ovf:
228	// We get either max finite value or infinity.  Either way, overflow+inexact
229	{
230		A = ATMP				// 2's complement value
231		TMP = USR
232		ATMP = combine(##0x7fefffff,#-1)	// positive max finite
233	}
234	{
235		EXPB = extractu(TMP,#2,#SR_ROUND_OFF)	// rounding bits
236		TMP = or(TMP,#0x28)			// inexact + overflow
237		BTMP = combine(##0x7ff00000,#0)		// positive infinity
238	}
239	{
240		USR = TMP
241		EXPB ^= lsr(AH,#31)			// Does sign match rounding?
242		TMP = EXPB				// unmodified rounding mode
243	}
244	{
245		p0 = !cmp.eq(TMP,#1)			// If not round-to-zero and
246		p0 = !cmp.eq(EXPB,#2)			// Not rounding the other way,
247		if (p0.new) ATMP = BTMP			// we should get infinity
248	}
249	{
250		A = insert(ATMP,#63,#0)			// insert inf/maxfinite, leave sign
251	}
252	{
253		p0 = dfcmp.eq(A,A)
254		jumpr r31
255	}
256
257.Ladd_abnormal:
258	{
259		ATMP = extractu(A,#63,#0)		// strip off sign
260		BTMP = extractu(B,#63,#0)		// strip off sign
261	}
262	{
263		p3 = cmp.gtu(ATMP,BTMP)
264		if (!p3.new) A = B			// sort values
265		if (!p3.new) B = A			// sort values
266	}
267	{
268		// Any NaN --> NaN, possibly raise invalid if sNaN
269		p0 = dfclass(A,#0x0f)		// A not NaN?
270		if (!p0.new) jump:nt .Linvalid_nan_add
271		if (!p3) ATMP = BTMP
272		if (!p3) BTMP = ATMP
273	}
274	{
275		// Infinity + non-infinity number is infinity
276		// Infinity + infinity --> inf or nan
277		p1 = dfclass(A,#0x08)		// A is infinity
278		if (p1.new) jump:nt .Linf_add
279	}
280	{
281		p2 = dfclass(B,#0x01)		// B is zero
282		if (p2.new) jump:nt .LB_zero	// so return A or special 0+0
283		ATMP = #0
284	}
285	// We are left with adding one or more subnormals
286	{
287		p0 = dfclass(A,#4)
288		if (p0.new) jump:nt .Ladd_two_subnormal
289		ATMP = combine(##0x20000000,#0)
290	}
291	{
292		EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
293		EXPB = #1
294		// BTMP already ABS(B)
295		BTMP = asl(BTMP,#EXPBITS-2)
296	}
297#undef ZERO
298#define EXTRACTOFF r14
299#define EXPDIFF r15
300	{
301		ATMP = insert(A,#MANTBITS,#EXPBITS-2)
302		EXPDIFF = sub(EXPA,EXPB)
303		ZTMP = combine(#62,#1)
304		jump .Ladd_continue
305	}
306
307.Ladd_two_subnormal:
308	{
309		ATMP = extractu(A,#63,#0)
310		BTMP = extractu(B,#63,#0)
311	}
312	{
313		ATMP = neg(ATMP)
314		BTMP = neg(BTMP)
315		p0 = cmp.gt(AH,#-1)
316		p1 = cmp.gt(BH,#-1)
317	}
318	{
319		if (p0) ATMP = A
320		if (p1) BTMP = B
321	}
322	{
323		ATMP = add(ATMP,BTMP)
324	}
325	{
326		BTMP = neg(ATMP)
327		p0 = cmp.gt(ATMPH,#-1)
328		B = #0
329	}
330	{
331		if (!p0) A = BTMP
332		if (p0) A = ATMP
333		BH = ##0x80000000
334	}
335	{
336		if (!p0) AH = or(AH,BH)
337		p0 = dfcmp.eq(A,B)
338		if (p0.new) jump:nt .Lzero_plus_zero
339	}
340	{
341		jumpr r31
342	}
343
344.Linvalid_nan_add:
345	{
346		TMP = convert_df2sf(A)			// will generate invalid if sNaN
347		p0 = dfclass(B,#0x0f)			// if B is not NaN
348		if (p0.new) B = A 			// make it whatever A is
349	}
350	{
351		BL = convert_df2sf(B)			// will generate invalid if sNaN
352		A = #-1
353		jumpr r31
354	}
355	.falign
356.LB_zero:
357	{
358		p0 = dfcmp.eq(ATMP,A)			// is A also zero?
359		if (!p0.new) jumpr:t r31		// If not, just return A
360	}
361	// 0 + 0 is special
362	// if equal integral values, they have the same sign, which is fine for all rounding
363	// modes.
364	// If unequal in sign, we get +0 for all rounding modes except round down
365.Lzero_plus_zero:
366	{
367		p0 = cmp.eq(A,B)
368		if (p0.new) jumpr:t r31
369	}
370	{
371		TMP = USR
372	}
373	{
374		TMP = extractu(TMP,#2,#SR_ROUND_OFF)
375		A = #0
376	}
377	{
378		p0 = cmp.eq(TMP,#2)
379		if (p0.new) AH = ##0x80000000
380		jumpr r31
381	}
382.Linf_add:
383	// adding infinities is only OK if they are equal
384	{
385		p0 = !cmp.eq(AH,BH)			// Do they have different signs
386		p0 = dfclass(B,#8)			// And is B also infinite?
387		if (!p0.new) jumpr:t r31		// If not, just a normal inf
388	}
389	{
390		BL = ##0x7f800001			// sNAN
391	}
392	{
393		A = convert_sf2df(BL)			// trigger invalid, set NaN
394		jumpr r31
395	}
396END(__hexagon_adddf3)
397