xref: /freebsd/contrib/llvm-project/compiler-rt/lib/builtins/sparc64/divsi3.S (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric/*
2*0b57cec5SDimitry Andric * This m4 code has been taken from The SPARC Architecture Manual Version 8.
3*0b57cec5SDimitry Andric */
4*0b57cec5SDimitry Andric/*
5*0b57cec5SDimitry Andric * Division/Remainder
6*0b57cec5SDimitry Andric *
7*0b57cec5SDimitry Andric * Input is:
8*0b57cec5SDimitry Andric *   dividend -- the thing being divided
9*0b57cec5SDimitry Andric *   divisor -- how many ways to divide it
10*0b57cec5SDimitry Andric * Important parameters:
11*0b57cec5SDimitry Andric *   N -- how many bits per iteration we try to get
12*0b57cec5SDimitry Andric *        as our current guess:
13*0b57cec5SDimitry Andric *   WORDSIZE -- how many bits altogether we're talking about:
14*0b57cec5SDimitry Andric *               obviously:
15*0b57cec5SDimitry Andric * A derived constant:
16*0b57cec5SDimitry Andric *   TOPBITS -- how many bits are in the top "decade" of a number:
17*0b57cec5SDimitry Andric *
18*0b57cec5SDimitry Andric * Important variables are:
19*0b57cec5SDimitry Andric *   Q -- the partial quotient under development -- initially 0
20*0b57cec5SDimitry Andric *   R -- the remainder so far -- initially == the dividend
21*0b57cec5SDimitry Andric *   ITER -- number of iterations of the main division loop which will
22*0b57cec5SDimitry Andric *           be required. Equal to CEIL( lg2(quotient)/4 )
23*0b57cec5SDimitry Andric *           Note that this is log_base_(2ˆ4) of the quotient.
24*0b57cec5SDimitry Andric *   V -- the current comparand -- initially divisor*2ˆ(ITER*4-1)
25*0b57cec5SDimitry Andric * Cost:
26*0b57cec5SDimitry Andric *   current estimate for non-large dividend is
27*0b57cec5SDimitry Andric *        CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
28*0b57cec5SDimitry Andric *   a large dividend is one greater than 2ˆ(31-4 ) and takes a
29*0b57cec5SDimitry Andric *   different path, as the upper bits of the quotient must be developed
30*0b57cec5SDimitry Andric *   one bit at a time.
31*0b57cec5SDimitry Andric *   This uses the m4 and cpp macro preprocessors.
32*0b57cec5SDimitry Andric */
33*0b57cec5SDimitry Andric/*
34*0b57cec5SDimitry Andric * This is the recursive definition of how we develop quotient digits.
35*0b57cec5SDimitry Andric * It takes three important parameters:
36*0b57cec5SDimitry Andric *   $1 -- the current depth, 1<=$1<=4
37*0b57cec5SDimitry Andric *   $2 -- the current accumulation of quotient bits
38*0b57cec5SDimitry Andric *   4 -- max depth
39*0b57cec5SDimitry Andric * We add a new bit to $2 and either recurse or insert the bits in the quotient.
40*0b57cec5SDimitry Andric * Dynamic input:
41*0b57cec5SDimitry Andric *   %o3 -- current remainder
42*0b57cec5SDimitry Andric *   %o2 -- current quotient
43*0b57cec5SDimitry Andric *   %o5 -- current comparand
44*0b57cec5SDimitry Andric *   cc -- set on current value of %o3
45*0b57cec5SDimitry Andric * Dynamic output:
46*0b57cec5SDimitry Andric *   %o3', %o2', %o5', cc'
47*0b57cec5SDimitry Andric */
48*0b57cec5SDimitry Andric#include "../assembly.h"
49*0b57cec5SDimitry Andric.text
50*0b57cec5SDimitry Andric	.align 32
51*0b57cec5SDimitry AndricDEFINE_COMPILERRT_FUNCTION(__udivsi3)
52*0b57cec5SDimitry Andric	b	divide
53*0b57cec5SDimitry Andric	mov	0,%g3			! result always nonnegative
54*0b57cec5SDimitry Andric.text
55*0b57cec5SDimitry Andric	.align 32
56*0b57cec5SDimitry AndricDEFINE_COMPILERRT_FUNCTION(__divsi3)
57*0b57cec5SDimitry Andric	orcc	%o1,%o0,%g0	! are either %o0 or %o1 negative
58*0b57cec5SDimitry Andric	bge	divide			! if not, skip this junk
59*0b57cec5SDimitry Andric	xor	%o1,%o0,%g3	! record sign of result in sign of %g3
60*0b57cec5SDimitry Andric	tst	%o1
61*0b57cec5SDimitry Andric	bge	2f
62*0b57cec5SDimitry Andric	tst	%o0
63*0b57cec5SDimitry Andric	! %o1 < 0
64*0b57cec5SDimitry Andric	bge	divide
65*0b57cec5SDimitry Andric	neg	%o1
66*0b57cec5SDimitry Andric	2:
67*0b57cec5SDimitry Andric	! %o0 < 0
68*0b57cec5SDimitry Andric	neg	%o0
69*0b57cec5SDimitry Andric	! FALL THROUGH
70*0b57cec5SDimitry Andricdivide:
71*0b57cec5SDimitry Andric	! Compute size of quotient, scale comparand.
72*0b57cec5SDimitry Andric	orcc	%o1,%g0,%o5		! movcc %o1,%o5
73*0b57cec5SDimitry Andric	te	2			! if %o1 = 0
74*0b57cec5SDimitry Andric	mov	%o0,%o3
75*0b57cec5SDimitry Andric	mov	0,%o2
76*0b57cec5SDimitry Andric	sethi	%hi(1<<(32-4 -1)),%g1
77*0b57cec5SDimitry Andric	cmp	%o3,%g1
78*0b57cec5SDimitry Andric	blu	not_really_big
79*0b57cec5SDimitry Andric	mov	0,%o4
80*0b57cec5SDimitry Andric	!
81*0b57cec5SDimitry Andric	! Here, the %o0 is >= 2ˆ(31-4) or so. We must be careful here,
82*0b57cec5SDimitry Andric	! as our usual 4-at-a-shot divide step will cause overflow and havoc.
83*0b57cec5SDimitry Andric	! The total number of bits in the result here is 4*%o4+%g2, where
84*0b57cec5SDimitry Andric	! %g2 <= 4.
85*0b57cec5SDimitry Andric	! Compute %o4 in an unorthodox manner: know we need to Shift %o5 into
86*0b57cec5SDimitry Andric! the top decade: so don't even bother to compare to %o3.
87*0b57cec5SDimitry Andric1:
88*0b57cec5SDimitry Andric	cmp	%o5,%g1
89*0b57cec5SDimitry Andric	bgeu	3f
90*0b57cec5SDimitry Andric	mov	1,%g2
91*0b57cec5SDimitry Andric	sll	%o5,4,%o5
92*0b57cec5SDimitry Andric	b	1b
93*0b57cec5SDimitry Andric	inc	%o4
94*0b57cec5SDimitry Andric! Now compute %g2
95*0b57cec5SDimitry Andric2:	addcc	%o5,%o5,%o5
96*0b57cec5SDimitry Andric	bcc	not_too_big
97*0b57cec5SDimitry Andric	add	%g2,1,%g2
98*0b57cec5SDimitry Andric		! We're here if the %o1 overflowed when Shifting.
99*0b57cec5SDimitry Andric		! This means that %o3 has the high-order bit set.
100*0b57cec5SDimitry Andric		! Restore %o5 and subtract from %o3.
101*0b57cec5SDimitry Andric		sll	%g1,4 ,%g1	! high order bit
102*0b57cec5SDimitry Andric		srl	%o5,1,%o5		! rest of %o5
103*0b57cec5SDimitry Andric		add	%o5,%g1,%o5
104*0b57cec5SDimitry Andric		b	do_single_div
105*0b57cec5SDimitry Andric		dec	%g2
106*0b57cec5SDimitry Andricnot_too_big:
107*0b57cec5SDimitry Andric3:	cmp	%o5,%o3
108*0b57cec5SDimitry Andric	blu	2b
109*0b57cec5SDimitry Andric	nop
110*0b57cec5SDimitry Andric	be	do_single_div
111*0b57cec5SDimitry Andric	nop
112*0b57cec5SDimitry Andric! %o5 > %o3: went too far: back up 1 step
113*0b57cec5SDimitry Andric!     srl %o5,1,%o5
114*0b57cec5SDimitry Andric!      dec %g2
115*0b57cec5SDimitry Andric! do single-bit divide steps
116*0b57cec5SDimitry Andric!
117*0b57cec5SDimitry Andric! We have to be careful here. We know that %o3 >= %o5, so we can do the
118*0b57cec5SDimitry Andric! first divide step without thinking. BUT, the others are conditional,
119*0b57cec5SDimitry Andric! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
120*0b57cec5SDimitry Andric! order bit set in the first step, just falling into the regular
121*0b57cec5SDimitry Andric! division loop will mess up the first time around.
122*0b57cec5SDimitry Andric! So we unroll slightly...
123*0b57cec5SDimitry Andricdo_single_div:
124*0b57cec5SDimitry Andric	deccc	%g2
125*0b57cec5SDimitry Andric	bl	end_regular_divide
126*0b57cec5SDimitry Andric	nop
127*0b57cec5SDimitry Andric	sub	%o3,%o5,%o3
128*0b57cec5SDimitry Andric	mov	1,%o2
129*0b57cec5SDimitry Andric	b,a	end_single_divloop
130*0b57cec5SDimitry Andric	! EMPTY
131*0b57cec5SDimitry Andricsingle_divloop:
132*0b57cec5SDimitry Andric	sll	%o2,1,%o2
133*0b57cec5SDimitry Andric	bl	1f
134*0b57cec5SDimitry Andric	srl	%o5,1,%o5
135*0b57cec5SDimitry Andric	! %o3 >= 0
136*0b57cec5SDimitry Andric		sub	%o3,%o5,%o3
137*0b57cec5SDimitry Andric		b	2f
138*0b57cec5SDimitry Andric		inc	%o2
139*0b57cec5SDimitry Andric	1:	! %o3 < 0
140*0b57cec5SDimitry Andric		add	%o3,%o5,%o3
141*0b57cec5SDimitry Andric		dec	%o2
142*0b57cec5SDimitry Andric	2:
143*0b57cec5SDimitry Andric	end_single_divloop:
144*0b57cec5SDimitry Andric		deccc	%g2
145*0b57cec5SDimitry Andric		bge	single_divloop
146*0b57cec5SDimitry Andric		tst	%o3
147*0b57cec5SDimitry Andric		b,a	end_regular_divide
148*0b57cec5SDimitry Andric		! EMPTY
149*0b57cec5SDimitry Andricnot_really_big:
150*0b57cec5SDimitry Andric1:
151*0b57cec5SDimitry Andric	sll	%o5,4,%o5
152*0b57cec5SDimitry Andric	cmp	%o5,%o3
153*0b57cec5SDimitry Andric	bleu	1b
154*0b57cec5SDimitry Andric	inccc	%o4
155*0b57cec5SDimitry Andric	be	got_result
156*0b57cec5SDimitry Andric	dec	%o4
157*0b57cec5SDimitry Andricdo_regular_divide:
158*0b57cec5SDimitry Andric	! Do the main division iteration
159*0b57cec5SDimitry Andric	tst	%o3
160*0b57cec5SDimitry Andric	! Fall through into divide loop
161*0b57cec5SDimitry Andricdivloop:
162*0b57cec5SDimitry Andric	sll	%o2,4,%o2
163*0b57cec5SDimitry Andric		!depth 1, accumulated bits 0
164*0b57cec5SDimitry Andric	bl	L.1.16
165*0b57cec5SDimitry Andric	srl	%o5,1,%o5
166*0b57cec5SDimitry Andric	! remainder is nonnegative
167*0b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
168*0b57cec5SDimitry Andric	 	!depth 2, accumulated bits 1
169*0b57cec5SDimitry Andric	bl	L.2.17
170*0b57cec5SDimitry Andric	srl	%o5,1,%o5
171*0b57cec5SDimitry Andric	! remainder is nonnegative
172*0b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
173*0b57cec5SDimitry Andric	 	!depth 3, accumulated bits 3
174*0b57cec5SDimitry Andric	bl	L.3.19
175*0b57cec5SDimitry Andric	srl	%o5,1,%o5
176*0b57cec5SDimitry Andric	! remainder is nonnegative
177*0b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
178*0b57cec5SDimitry Andric	 	!depth 4, accumulated bits 7
179*0b57cec5SDimitry Andric	bl	L.4.23
180*0b57cec5SDimitry Andric	srl	%o5,1,%o5
181*0b57cec5SDimitry Andric	! remainder is nonnegative
182*0b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
183*0b57cec5SDimitry Andric		b	9f
184*0b57cec5SDimitry Andric		add	%o2, (7*2+1), %o2
185*0b57cec5SDimitry AndricL.4.23:
186*0b57cec5SDimitry Andric	! remainder is negative
187*0b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
188*0b57cec5SDimitry Andric		b	9f
189*0b57cec5SDimitry Andric		add	%o2, (7*2-1), %o2
190*0b57cec5SDimitry AndricL.3.19:
191*0b57cec5SDimitry Andric	! remainder is negative
192*0b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
193*0b57cec5SDimitry Andric	 	!depth 4, accumulated bits 5
194*0b57cec5SDimitry Andric	bl	L.4.21
195*0b57cec5SDimitry Andric	srl	%o5,1,%o5
196*0b57cec5SDimitry Andric	! remainder is nonnegative
197*0b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
198*0b57cec5SDimitry Andric		b	9f
199*0b57cec5SDimitry Andric		add	%o2, (5*2+1), %o2
200*0b57cec5SDimitry AndricL.4.21:
201*0b57cec5SDimitry Andric	! remainder is negative
202*0b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
203*0b57cec5SDimitry Andric		b	9f
204*0b57cec5SDimitry Andric		add	%o2, (5*2-1), %o2
205*0b57cec5SDimitry AndricL.2.17:
206*0b57cec5SDimitry Andric	! remainder is negative
207*0b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
208*0b57cec5SDimitry Andric	 	!depth 3, accumulated bits 1
209*0b57cec5SDimitry Andric	bl	L.3.17
210*0b57cec5SDimitry Andric	srl	%o5,1,%o5
211*0b57cec5SDimitry Andric	! remainder is nonnegative
212*0b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
213*0b57cec5SDimitry Andric	 	!depth 4, accumulated bits 3
214*0b57cec5SDimitry Andric	bl	L.4.19
215*0b57cec5SDimitry Andric	srl	%o5,1,%o5
216*0b57cec5SDimitry Andric	! remainder is nonnegative
217*0b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
218*0b57cec5SDimitry Andric		b	9f
219*0b57cec5SDimitry Andric		add	%o2, (3*2+1), %o2
220*0b57cec5SDimitry AndricL.4.19:
221*0b57cec5SDimitry Andric	! remainder is negative
222*0b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
223*0b57cec5SDimitry Andric		b	9f
224*0b57cec5SDimitry Andric		add	%o2, (3*2-1), %o2
225*0b57cec5SDimitry AndricL.3.17:
226*0b57cec5SDimitry Andric	! remainder is negative
227*0b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
228*0b57cec5SDimitry Andric	 	!depth 4, accumulated bits 1
229*0b57cec5SDimitry Andric	bl	L.4.17
230*0b57cec5SDimitry Andric	srl	%o5,1,%o5
231*0b57cec5SDimitry Andric	! remainder is nonnegative
232*0b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
233*0b57cec5SDimitry Andric		b	9f
234*0b57cec5SDimitry Andric		add	%o2, (1*2+1), %o2
235*0b57cec5SDimitry AndricL.4.17:
236*0b57cec5SDimitry Andric	! remainder is negative
237*0b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
238*0b57cec5SDimitry Andric		b	9f
239*0b57cec5SDimitry Andric		add	%o2, (1*2-1), %o2
240*0b57cec5SDimitry AndricL.1.16:
241*0b57cec5SDimitry Andric	! remainder is negative
242*0b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
243*0b57cec5SDimitry Andric	 	!depth 2, accumulated bits -1
244*0b57cec5SDimitry Andric	bl	L.2.15
245*0b57cec5SDimitry Andric	srl	%o5,1,%o5
246*0b57cec5SDimitry Andric	! remainder is nonnegative
247*0b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
248*0b57cec5SDimitry Andric	 	!depth 3, accumulated bits -1
249*0b57cec5SDimitry Andric	bl	L.3.15
250*0b57cec5SDimitry Andric	srl	%o5,1,%o5
251*0b57cec5SDimitry Andric	! remainder is nonnegative
252*0b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
253*0b57cec5SDimitry Andric	 	!depth 4, accumulated bits -1
254*0b57cec5SDimitry Andric	bl	L.4.15
255*0b57cec5SDimitry Andric	srl	%o5,1,%o5
256*0b57cec5SDimitry Andric	! remainder is nonnegative
257*0b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
258*0b57cec5SDimitry Andric		b	9f
259*0b57cec5SDimitry Andric		add	%o2, (-1*2+1), %o2
260*0b57cec5SDimitry AndricL.4.15:
261*0b57cec5SDimitry Andric	! remainder is negative
262*0b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
263*0b57cec5SDimitry Andric		b	9f
264*0b57cec5SDimitry Andric		add	%o2, (-1*2-1), %o2
265*0b57cec5SDimitry AndricL.3.15:
266*0b57cec5SDimitry Andric	! remainder is negative
267*0b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
268*0b57cec5SDimitry Andric	 	!depth 4, accumulated bits -3
269*0b57cec5SDimitry Andric	bl	L.4.13
270*0b57cec5SDimitry Andric	srl	%o5,1,%o5
271*0b57cec5SDimitry Andric	! remainder is nonnegative
272*0b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
273*0b57cec5SDimitry Andric		b	9f
274*0b57cec5SDimitry Andric		add	%o2, (-3*2+1), %o2
275*0b57cec5SDimitry AndricL.4.13:
276*0b57cec5SDimitry Andric	! remainder is negative
277*0b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
278*0b57cec5SDimitry Andric		b	9f
279*0b57cec5SDimitry Andric		add	%o2, (-3*2-1), %o2
280*0b57cec5SDimitry AndricL.2.15:
281*0b57cec5SDimitry Andric	! remainder is negative
282*0b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
283*0b57cec5SDimitry Andric	 	!depth 3, accumulated bits -3
284*0b57cec5SDimitry Andric	bl	L.3.13
285*0b57cec5SDimitry Andric	srl	%o5,1,%o5
286*0b57cec5SDimitry Andric	! remainder is nonnegative
287*0b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
288*0b57cec5SDimitry Andric	 	!depth 4, accumulated bits -5
289*0b57cec5SDimitry Andric	bl	L.4.11
290*0b57cec5SDimitry Andric	srl	%o5,1,%o5
291*0b57cec5SDimitry Andric	! remainder is nonnegative
292*0b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
293*0b57cec5SDimitry Andric		b	9f
294*0b57cec5SDimitry Andric		add	%o2, (-5*2+1), %o2
295*0b57cec5SDimitry AndricL.4.11:
296*0b57cec5SDimitry Andric	! remainder is negative
297*0b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
298*0b57cec5SDimitry Andric		b	9f
299*0b57cec5SDimitry Andric		add	%o2, (-5*2-1), %o2
300*0b57cec5SDimitry AndricL.3.13:
301*0b57cec5SDimitry Andric	! remainder is negative
302*0b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
303*0b57cec5SDimitry Andric	 	!depth 4, accumulated bits -7
304*0b57cec5SDimitry Andric	bl	L.4.9
305*0b57cec5SDimitry Andric	srl	%o5,1,%o5
306*0b57cec5SDimitry Andric	! remainder is nonnegative
307*0b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
308*0b57cec5SDimitry Andric		b	9f
309*0b57cec5SDimitry Andric		add	%o2, (-7*2+1), %o2
310*0b57cec5SDimitry AndricL.4.9:
311*0b57cec5SDimitry Andric	! remainder is negative
312*0b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
313*0b57cec5SDimitry Andric		b	9f
314*0b57cec5SDimitry Andric		add	%o2, (-7*2-1), %o2
315*0b57cec5SDimitry Andric	9:
316*0b57cec5SDimitry Andricend_regular_divide:
317*0b57cec5SDimitry Andric	deccc	%o4
318*0b57cec5SDimitry Andric	bge	divloop
319*0b57cec5SDimitry Andric	tst	%o3
320*0b57cec5SDimitry Andric	bl,a	got_result
321*0b57cec5SDimitry Andric	! non-restoring fixup if remainder < 0, otherwise annulled
322*0b57cec5SDimitry Andric	dec	%o2
323*0b57cec5SDimitry Andricgot_result:
324*0b57cec5SDimitry Andric	tst	%g3
325*0b57cec5SDimitry Andric	bl,a	1f
326*0b57cec5SDimitry Andric	! negate for answer < 0, otherwise annulled
327*0b57cec5SDimitry Andric	neg	%o2,%o2			! %o2 <- -%o2
328*0b57cec5SDimitry Andric1:
329*0b57cec5SDimitry Andric	retl				! leaf-routine return
330*0b57cec5SDimitry Andric	mov	%o2,%o0			! quotient <- %o2
331