xref: /freebsd/contrib/llvm-project/compiler-rt/lib/builtins/sparc64/divmod.m4 (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric/*
2*0b57cec5SDimitry Andric * This m4 code has been taken from The SPARC Architecture Manual Version 8.
3*0b57cec5SDimitry Andric */
4*0b57cec5SDimitry Andric
5*0b57cec5SDimitry Andric/*
6*0b57cec5SDimitry Andric * Division/Remainder
7*0b57cec5SDimitry Andric *
8*0b57cec5SDimitry Andric * Input is:
9*0b57cec5SDimitry Andric *   dividend -- the thing being divided
10*0b57cec5SDimitry Andric *   divisor -- how many ways to divide it
11*0b57cec5SDimitry Andric * Important parameters:
12*0b57cec5SDimitry Andric *   N -- how many bits per iteration we try to get
13*0b57cec5SDimitry Andric *        as our current guess: define(N, 4) define(TWOSUPN, 16)
14*0b57cec5SDimitry Andric *   WORDSIZE -- how many bits altogether we're talking about:
15*0b57cec5SDimitry Andric *               obviously: define(WORDSIZE, 32)
16*0b57cec5SDimitry Andric * A derived constant:
17*0b57cec5SDimitry Andric *   TOPBITS -- how many bits are in the top "decade" of a number:
18*0b57cec5SDimitry Andric *        define(TOPBITS, eval( WORDSIZE - N*((WORDSIZE-1)/N) ) )
19*0b57cec5SDimitry Andric * Important variables are:
20*0b57cec5SDimitry Andric *   Q -- the partial quotient under development -- initially 0
21*0b57cec5SDimitry Andric *   R -- the remainder so far -- initially == the dividend
22*0b57cec5SDimitry Andric *   ITER -- number of iterations of the main division loop which will
23*0b57cec5SDimitry Andric *           be required. Equal to CEIL( lg2(quotient)/N )
24*0b57cec5SDimitry Andric *           Note that this is log_base_(2ˆN) of the quotient.
25*0b57cec5SDimitry Andric *   V -- the current comparand -- initially divisor*2ˆ(ITER*N-1)
26*0b57cec5SDimitry Andric * Cost:
27*0b57cec5SDimitry Andric *   current estimate for non-large dividend is
28*0b57cec5SDimitry Andric *        CEIL( lg2(quotient) / N ) x ( 10 + 7N/2 ) + C
29*0b57cec5SDimitry Andric *   a large dividend is one greater than 2ˆ(31-TOPBITS) and takes a
30*0b57cec5SDimitry Andric *   different path, as the upper bits of the quotient must be developed
31*0b57cec5SDimitry Andric *   one bit at a time.
32*0b57cec5SDimitry Andric *   This uses the m4 and cpp macro preprocessors.
33*0b57cec5SDimitry Andric */
34*0b57cec5SDimitry Andric
35*0b57cec5SDimitry Andricdefine(dividend, `%o0')
36*0b57cec5SDimitry Andricdefine(divisor,`%o1')
37*0b57cec5SDimitry Andricdefine(Q, `%o2')
38*0b57cec5SDimitry Andricdefine(R, `%o3')
39*0b57cec5SDimitry Andricdefine(ITER, `%o4')
40*0b57cec5SDimitry Andricdefine(V, `%o5')
41*0b57cec5SDimitry Andricdefine(SIGN, `%g3')
42*0b57cec5SDimitry Andricdefine(T, `%g1')
43*0b57cec5SDimitry Andricdefine(SC,`%g2')
44*0b57cec5SDimitry Andric/*
45*0b57cec5SDimitry Andric * This is the recursive definition of how we develop quotient digits.
46*0b57cec5SDimitry Andric * It takes three important parameters:
47*0b57cec5SDimitry Andric *   $1 -- the current depth, 1<=$1<=N
48*0b57cec5SDimitry Andric *   $2 -- the current accumulation of quotient bits
49*0b57cec5SDimitry Andric *   N -- max depth
50*0b57cec5SDimitry Andric * We add a new bit to $2 and either recurse or insert the bits in the quotient.
51*0b57cec5SDimitry Andric * Dynamic input:
52*0b57cec5SDimitry Andric *   R -- current remainder
53*0b57cec5SDimitry Andric *   Q -- current quotient
54*0b57cec5SDimitry Andric *   V -- current comparand
55*0b57cec5SDimitry Andric *   cc -- set on current value of R
56*0b57cec5SDimitry Andric * Dynamic output:
57*0b57cec5SDimitry Andric *   R', Q', V', cc'
58*0b57cec5SDimitry Andric */
59*0b57cec5SDimitry Andric
60*0b57cec5SDimitry Andric#include "../assembly.h"
61*0b57cec5SDimitry Andric
62*0b57cec5SDimitry Andricdefine(DEVELOP_QUOTIENT_BITS,
63*0b57cec5SDimitry Andric`	!depth $1, accumulated bits $2
64*0b57cec5SDimitry Andric	bl	L.$1.eval(TWOSUPN+$2)
65*0b57cec5SDimitry Andric	srl	V,1,V
66*0b57cec5SDimitry Andric	! remainder is nonnegative
67*0b57cec5SDimitry Andric	subcc	R,V,R
68*0b57cec5SDimitry Andric	ifelse( $1, N,
69*0b57cec5SDimitry Andric	`	b	9f
70*0b57cec5SDimitry Andric		add	Q, ($2*2+1), Q
71*0b57cec5SDimitry Andric	',` DEVELOP_QUOTIENT_BITS( incr($1), `eval(2*$2+1)')
72*0b57cec5SDimitry Andric	')
73*0b57cec5SDimitry AndricL.$1.eval(TWOSUPN+$2):
74*0b57cec5SDimitry Andric	! remainder is negative
75*0b57cec5SDimitry Andric	addcc	R,V,R
76*0b57cec5SDimitry Andric	ifelse( $1, N,
77*0b57cec5SDimitry Andric	`	b	9f
78*0b57cec5SDimitry Andric		add	Q, ($2*2-1), Q
79*0b57cec5SDimitry Andric	',` DEVELOP_QUOTIENT_BITS( incr($1), `eval(2*$2-1)')
80*0b57cec5SDimitry Andric	')
81*0b57cec5SDimitry Andric	ifelse( $1, 1, `9:')
82*0b57cec5SDimitry Andric')
83*0b57cec5SDimitry Andricifelse( ANSWER, `quotient', `
84*0b57cec5SDimitry Andric.text
85*0b57cec5SDimitry Andric	.align 32
86*0b57cec5SDimitry AndricDEFINE_COMPILERRT_FUNCTION(__udivsi3)
87*0b57cec5SDimitry Andric	b	divide
88*0b57cec5SDimitry Andric	mov	0,SIGN			! result always nonnegative
89*0b57cec5SDimitry Andric.text
90*0b57cec5SDimitry Andric	.align 32
91*0b57cec5SDimitry AndricDEFINE_COMPILERRT_FUNCTION(__divsi3)
92*0b57cec5SDimitry Andric	orcc	divisor,dividend,%g0	! are either dividend or divisor negative
93*0b57cec5SDimitry Andric	bge	divide			! if not, skip this junk
94*0b57cec5SDimitry Andric	xor	divisor,dividend,SIGN	! record sign of result in sign of SIGN
95*0b57cec5SDimitry Andric	tst	divisor
96*0b57cec5SDimitry Andric	bge	2f
97*0b57cec5SDimitry Andric	tst	dividend
98*0b57cec5SDimitry Andric	! divisor < 0
99*0b57cec5SDimitry Andric	bge	divide
100*0b57cec5SDimitry Andric	neg	divisor
101*0b57cec5SDimitry Andric	2:
102*0b57cec5SDimitry Andric	! dividend < 0
103*0b57cec5SDimitry Andric	neg	dividend
104*0b57cec5SDimitry Andric	! FALL THROUGH
105*0b57cec5SDimitry Andric',`
106*0b57cec5SDimitry Andric.text
107*0b57cec5SDimitry Andric	.align 32
108*0b57cec5SDimitry AndricDEFINE_COMPILERRT_FUNCTION(__umodsi3)
109*0b57cec5SDimitry Andric	b	divide
110*0b57cec5SDimitry Andric	mov	0,SIGN			! result always nonnegative
111*0b57cec5SDimitry Andric.text
112*0b57cec5SDimitry Andric	.align 32
113*0b57cec5SDimitry AndricDEFINE_COMPILERRT_FUNCTION(__modsi3)
114*0b57cec5SDimitry Andric	orcc	divisor,dividend,%g0	! are either dividend or divisor negative
115*0b57cec5SDimitry Andric	bge	divide			! if not, skip this junk
116*0b57cec5SDimitry Andric	mov	dividend,SIGN		! record sign of result in sign of SIGN
117*0b57cec5SDimitry Andric	tst	divisor
118*0b57cec5SDimitry Andric	bge	2f
119*0b57cec5SDimitry Andric	tst	dividend
120*0b57cec5SDimitry Andric	! divisor < 0
121*0b57cec5SDimitry Andric	bge	divide
122*0b57cec5SDimitry Andric	neg	divisor
123*0b57cec5SDimitry Andric	2:
124*0b57cec5SDimitry Andric	! dividend < 0
125*0b57cec5SDimitry Andric	neg	dividend
126*0b57cec5SDimitry Andric	! FALL THROUGH
127*0b57cec5SDimitry Andric')
128*0b57cec5SDimitry Andric
129*0b57cec5SDimitry Andricdivide:
130*0b57cec5SDimitry Andric	! Compute size of quotient, scale comparand.
131*0b57cec5SDimitry Andric	orcc	divisor,%g0,V		! movcc divisor,V
132*0b57cec5SDimitry Andric	te	2			! if divisor = 0
133*0b57cec5SDimitry Andric	mov	dividend,R
134*0b57cec5SDimitry Andric	mov	0,Q
135*0b57cec5SDimitry Andric	sethi	%hi(1<<(WORDSIZE-TOPBITS-1)),T
136*0b57cec5SDimitry Andric	cmp	R,T
137*0b57cec5SDimitry Andric	blu	not_really_big
138*0b57cec5SDimitry Andric	mov	0,ITER
139*0b57cec5SDimitry Andric	!
140*0b57cec5SDimitry Andric	! Here, the dividend is >= 2ˆ(31-N) or so. We must be careful here,
141*0b57cec5SDimitry Andric	! as our usual N-at-a-shot divide step will cause overflow and havoc.
142*0b57cec5SDimitry Andric	! The total number of bits in the result here is N*ITER+SC, where
143*0b57cec5SDimitry Andric	! SC <= N.
144*0b57cec5SDimitry Andric	! Compute ITER in an unorthodox manner: know we need to Shift V into
145*0b57cec5SDimitry Andric! the top decade: so don't even bother to compare to R.
146*0b57cec5SDimitry Andric1:
147*0b57cec5SDimitry Andric	cmp	V,T
148*0b57cec5SDimitry Andric	bgeu	3f
149*0b57cec5SDimitry Andric	mov	1,SC
150*0b57cec5SDimitry Andric	sll	V,N,V
151*0b57cec5SDimitry Andric	b	1b
152*0b57cec5SDimitry Andric	inc	ITER
153*0b57cec5SDimitry Andric! Now compute SC
154*0b57cec5SDimitry Andric2:	addcc	V,V,V
155*0b57cec5SDimitry Andric	bcc	not_too_big
156*0b57cec5SDimitry Andric	add	SC,1,SC
157*0b57cec5SDimitry Andric		! We're here if the divisor overflowed when Shifting.
158*0b57cec5SDimitry Andric		! This means that R has the high-order bit set.
159*0b57cec5SDimitry Andric		! Restore V and subtract from R.
160*0b57cec5SDimitry Andric		sll	T,TOPBITS,T	! high order bit
161*0b57cec5SDimitry Andric		srl	V,1,V		! rest of V
162*0b57cec5SDimitry Andric		add	V,T,V
163*0b57cec5SDimitry Andric		b	do_single_div
164*0b57cec5SDimitry Andric		dec	SC
165*0b57cec5SDimitry Andricnot_too_big:
166*0b57cec5SDimitry Andric3:	cmp	V,R
167*0b57cec5SDimitry Andric	blu	2b
168*0b57cec5SDimitry Andric	nop
169*0b57cec5SDimitry Andric	be	do_single_div
170*0b57cec5SDimitry Andric	nop
171*0b57cec5SDimitry Andric! V > R: went too far: back up 1 step
172*0b57cec5SDimitry Andric!     srl V,1,V
173*0b57cec5SDimitry Andric!      dec SC
174*0b57cec5SDimitry Andric! do single-bit divide steps
175*0b57cec5SDimitry Andric!
176*0b57cec5SDimitry Andric! We have to be careful here. We know that R >= V, so we can do the
177*0b57cec5SDimitry Andric! first divide step without thinking. BUT, the others are conditional,
178*0b57cec5SDimitry Andric! and are only done if R >= 0. Because both R and V may have the high-
179*0b57cec5SDimitry Andric! order bit set in the first step, just falling into the regular
180*0b57cec5SDimitry Andric! division loop will mess up the first time around.
181*0b57cec5SDimitry Andric! So we unroll slightly...
182*0b57cec5SDimitry Andricdo_single_div:
183*0b57cec5SDimitry Andric	deccc	SC
184*0b57cec5SDimitry Andric	bl	end_regular_divide
185*0b57cec5SDimitry Andric	nop
186*0b57cec5SDimitry Andric	sub	R,V,R
187*0b57cec5SDimitry Andric	mov	1,Q
188*0b57cec5SDimitry Andric	b,a	end_single_divloop
189*0b57cec5SDimitry Andric	! EMPTY
190*0b57cec5SDimitry Andricsingle_divloop:
191*0b57cec5SDimitry Andric	sll	Q,1,Q
192*0b57cec5SDimitry Andric	bl	1f
193*0b57cec5SDimitry Andric	srl	V,1,V
194*0b57cec5SDimitry Andric	! R >= 0
195*0b57cec5SDimitry Andric		sub	R,V,R
196*0b57cec5SDimitry Andric		b	2f
197*0b57cec5SDimitry Andric		inc	Q
198*0b57cec5SDimitry Andric	1:	! R < 0
199*0b57cec5SDimitry Andric		add	R,V,R
200*0b57cec5SDimitry Andric		dec	Q
201*0b57cec5SDimitry Andric	2:
202*0b57cec5SDimitry Andric	end_single_divloop:
203*0b57cec5SDimitry Andric		deccc	SC
204*0b57cec5SDimitry Andric		bge	single_divloop
205*0b57cec5SDimitry Andric		tst	R
206*0b57cec5SDimitry Andric		b,a	end_regular_divide
207*0b57cec5SDimitry Andric		! EMPTY
208*0b57cec5SDimitry Andric
209*0b57cec5SDimitry Andricnot_really_big:
210*0b57cec5SDimitry Andric1:
211*0b57cec5SDimitry Andric	sll	V,N,V
212*0b57cec5SDimitry Andric	cmp	V,R
213*0b57cec5SDimitry Andric	bleu	1b
214*0b57cec5SDimitry Andric	inccc	ITER
215*0b57cec5SDimitry Andric	be	got_result
216*0b57cec5SDimitry Andric	dec	ITER
217*0b57cec5SDimitry Andricdo_regular_divide:
218*0b57cec5SDimitry Andric	! Do the main division iteration
219*0b57cec5SDimitry Andric	tst	R
220*0b57cec5SDimitry Andric	! Fall through into divide loop
221*0b57cec5SDimitry Andricdivloop:
222*0b57cec5SDimitry Andric	sll	Q,N,Q
223*0b57cec5SDimitry Andric	DEVELOP_QUOTIENT_BITS( 1, 0 )
224*0b57cec5SDimitry Andricend_regular_divide:
225*0b57cec5SDimitry Andric	deccc	ITER
226*0b57cec5SDimitry Andric	bge	divloop
227*0b57cec5SDimitry Andric	tst	R
228*0b57cec5SDimitry Andric	bl,a	got_result
229*0b57cec5SDimitry Andric	! non-restoring fixup if remainder < 0, otherwise annulled
230*0b57cec5SDimitry Andricifelse( ANSWER, `quotient',
231*0b57cec5SDimitry Andric`	dec	Q
232*0b57cec5SDimitry Andric',`	add	R,divisor,R
233*0b57cec5SDimitry Andric')
234*0b57cec5SDimitry Andric
235*0b57cec5SDimitry Andricgot_result:
236*0b57cec5SDimitry Andric	tst	SIGN
237*0b57cec5SDimitry Andric	bl,a	1f
238*0b57cec5SDimitry Andric	! negate for answer < 0, otherwise annulled
239*0b57cec5SDimitry Andricifelse( ANSWER, `quotient',
240*0b57cec5SDimitry Andric`	neg	%o2,%o2			! Q <- -Q
241*0b57cec5SDimitry Andric',`	neg	%o3,%o3 		! R <- -R
242*0b57cec5SDimitry Andric')
243*0b57cec5SDimitry Andric1:
244*0b57cec5SDimitry Andric	retl				! leaf-routine return
245*0b57cec5SDimitry Andricifelse( ANSWER, `quotient',
246*0b57cec5SDimitry Andric`	mov	%o2,%o0			! quotient <- Q
247*0b57cec5SDimitry Andric',`	mov	%o3,%o0			! remainder <- R
248*0b57cec5SDimitry Andric')
249