xref: /freebsd/contrib/llvm-project/compiler-rt/lib/builtins/sparc64/modsi3.S (revision dd41de95a84d979615a2ef11df6850622bf6184e)
1/*
2 * This m4 code has been taken from The SPARC Architecture Manual Version 8.
3 */
4/*
5 * Division/Remainder
6 *
7 * Input is:
8 *   dividend -- the thing being divided
9 *   divisor -- how many ways to divide it
10 * Important parameters:
11 *   N -- how many bits per iteration we try to get
12 *        as our current guess:
13 *   WORDSIZE -- how many bits altogether we're talking about:
14 *               obviously:
15 * A derived constant:
16 *   TOPBITS -- how many bits are in the top "decade" of a number:
17 *
18 * Important variables are:
19 *   Q -- the partial quotient under development -- initially 0
20 *   R -- the remainder so far -- initially == the dividend
21 *   ITER -- number of iterations of the main division loop which will
22 *           be required. Equal to CEIL( lg2(quotient)/4 )
23 *           Note that this is log_base_(2ˆ4) of the quotient.
24 *   V -- the current comparand -- initially divisor*2ˆ(ITER*4-1)
25 * Cost:
26 *   current estimate for non-large dividend is
27 *        CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
28 *   a large dividend is one greater than 2ˆ(31-4 ) and takes a
29 *   different path, as the upper bits of the quotient must be developed
30 *   one bit at a time.
31 *   This uses the m4 and cpp macro preprocessors.
32 */
33/*
34 * This is the recursive definition of how we develop quotient digits.
35 * It takes three important parameters:
36 *   $1 -- the current depth, 1<=$1<=4
37 *   $2 -- the current accumulation of quotient bits
38 *   4 -- max depth
39 * We add a new bit to $2 and either recurse or insert the bits in the quotient.
40 * Dynamic input:
41 *   %o3 -- current remainder
42 *   %o2 -- current quotient
43 *   %o5 -- current comparand
44 *   cc -- set on current value of %o3
45 * Dynamic output:
46 *   %o3', %o2', %o5', cc'
47 */
48#include "../assembly.h"
49.text
50	.align 32
51DEFINE_COMPILERRT_FUNCTION(__umodsi3)
52	b	divide
53	mov	0,%g3			! result always nonnegative
54.text
55	.align 32
56DEFINE_COMPILERRT_FUNCTION(__modsi3)
57	orcc	%o1,%o0,%g0	! are either %o0 or %o1 negative
58	bge	divide			! if not, skip this junk
59	mov	%o0,%g3		! record sign of result in sign of %g3
60	tst	%o1
61	bge	2f
62	tst	%o0
63	! %o1 < 0
64	bge	divide
65	neg	%o1
66	2:
67	! %o0 < 0
68	neg	%o0
69	! FALL THROUGH
70divide:
71	! Compute size of quotient, scale comparand.
72	orcc	%o1,%g0,%o5		! movcc %o1,%o5
73	te	2			! if %o1 = 0
74	mov	%o0,%o3
75	mov	0,%o2
76	sethi	%hi(1<<(32-4 -1)),%g1
77	cmp	%o3,%g1
78	blu	not_really_big
79	mov	0,%o4
80	!
81	! Here, the %o0 is >= 2ˆ(31-4) or so. We must be careful here,
82	! as our usual 4-at-a-shot divide step will cause overflow and havoc.
83	! The total number of bits in the result here is 4*%o4+%g2, where
84	! %g2 <= 4.
85	! Compute %o4 in an unorthodox manner: know we need to Shift %o5 into
86! the top decade: so don't even bother to compare to %o3.
871:
88	cmp	%o5,%g1
89	bgeu	3f
90	mov	1,%g2
91	sll	%o5,4,%o5
92	b	1b
93	inc	%o4
94! Now compute %g2
952:	addcc	%o5,%o5,%o5
96	bcc	not_too_big
97	add	%g2,1,%g2
98		! We're here if the %o1 overflowed when Shifting.
99		! This means that %o3 has the high-order bit set.
100		! Restore %o5 and subtract from %o3.
101		sll	%g1,4 ,%g1	! high order bit
102		srl	%o5,1,%o5		! rest of %o5
103		add	%o5,%g1,%o5
104		b	do_single_div
105		dec	%g2
106not_too_big:
1073:	cmp	%o5,%o3
108	blu	2b
109	nop
110	be	do_single_div
111	nop
112! %o5 > %o3: went too far: back up 1 step
113!     srl %o5,1,%o5
114!      dec %g2
115! do single-bit divide steps
116!
117! We have to be careful here. We know that %o3 >= %o5, so we can do the
118! first divide step without thinking. BUT, the others are conditional,
119! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
120! order bit set in the first step, just falling into the regular
121! division loop will mess up the first time around.
122! So we unroll slightly...
123do_single_div:
124	deccc	%g2
125	bl	end_regular_divide
126	nop
127	sub	%o3,%o5,%o3
128	mov	1,%o2
129	b,a	end_single_divloop
130	! EMPTY
131single_divloop:
132	sll	%o2,1,%o2
133	bl	1f
134	srl	%o5,1,%o5
135	! %o3 >= 0
136		sub	%o3,%o5,%o3
137		b	2f
138		inc	%o2
139	1:	! %o3 < 0
140		add	%o3,%o5,%o3
141		dec	%o2
142	2:
143	end_single_divloop:
144		deccc	%g2
145		bge	single_divloop
146		tst	%o3
147		b,a	end_regular_divide
148		! EMPTY
149not_really_big:
1501:
151	sll	%o5,4,%o5
152	cmp	%o5,%o3
153	bleu	1b
154	inccc	%o4
155	be	got_result
156	dec	%o4
157do_regular_divide:
158	! Do the main division iteration
159	tst	%o3
160	! Fall through into divide loop
161divloop:
162	sll	%o2,4,%o2
163		!depth 1, accumulated bits 0
164	bl	L.1.16
165	srl	%o5,1,%o5
166	! remainder is nonnegative
167	subcc	%o3,%o5,%o3
168	 	!depth 2, accumulated bits 1
169	bl	L.2.17
170	srl	%o5,1,%o5
171	! remainder is nonnegative
172	subcc	%o3,%o5,%o3
173	 	!depth 3, accumulated bits 3
174	bl	L.3.19
175	srl	%o5,1,%o5
176	! remainder is nonnegative
177	subcc	%o3,%o5,%o3
178	 	!depth 4, accumulated bits 7
179	bl	L.4.23
180	srl	%o5,1,%o5
181	! remainder is nonnegative
182	subcc	%o3,%o5,%o3
183		b	9f
184		add	%o2, (7*2+1), %o2
185L.4.23:
186	! remainder is negative
187	addcc	%o3,%o5,%o3
188		b	9f
189		add	%o2, (7*2-1), %o2
190L.3.19:
191	! remainder is negative
192	addcc	%o3,%o5,%o3
193	 	!depth 4, accumulated bits 5
194	bl	L.4.21
195	srl	%o5,1,%o5
196	! remainder is nonnegative
197	subcc	%o3,%o5,%o3
198		b	9f
199		add	%o2, (5*2+1), %o2
200L.4.21:
201	! remainder is negative
202	addcc	%o3,%o5,%o3
203		b	9f
204		add	%o2, (5*2-1), %o2
205L.2.17:
206	! remainder is negative
207	addcc	%o3,%o5,%o3
208	 	!depth 3, accumulated bits 1
209	bl	L.3.17
210	srl	%o5,1,%o5
211	! remainder is nonnegative
212	subcc	%o3,%o5,%o3
213	 	!depth 4, accumulated bits 3
214	bl	L.4.19
215	srl	%o5,1,%o5
216	! remainder is nonnegative
217	subcc	%o3,%o5,%o3
218		b	9f
219		add	%o2, (3*2+1), %o2
220L.4.19:
221	! remainder is negative
222	addcc	%o3,%o5,%o3
223		b	9f
224		add	%o2, (3*2-1), %o2
225L.3.17:
226	! remainder is negative
227	addcc	%o3,%o5,%o3
228	 	!depth 4, accumulated bits 1
229	bl	L.4.17
230	srl	%o5,1,%o5
231	! remainder is nonnegative
232	subcc	%o3,%o5,%o3
233		b	9f
234		add	%o2, (1*2+1), %o2
235L.4.17:
236	! remainder is negative
237	addcc	%o3,%o5,%o3
238		b	9f
239		add	%o2, (1*2-1), %o2
240L.1.16:
241	! remainder is negative
242	addcc	%o3,%o5,%o3
243	 	!depth 2, accumulated bits -1
244	bl	L.2.15
245	srl	%o5,1,%o5
246	! remainder is nonnegative
247	subcc	%o3,%o5,%o3
248	 	!depth 3, accumulated bits -1
249	bl	L.3.15
250	srl	%o5,1,%o5
251	! remainder is nonnegative
252	subcc	%o3,%o5,%o3
253	 	!depth 4, accumulated bits -1
254	bl	L.4.15
255	srl	%o5,1,%o5
256	! remainder is nonnegative
257	subcc	%o3,%o5,%o3
258		b	9f
259		add	%o2, (-1*2+1), %o2
260L.4.15:
261	! remainder is negative
262	addcc	%o3,%o5,%o3
263		b	9f
264		add	%o2, (-1*2-1), %o2
265L.3.15:
266	! remainder is negative
267	addcc	%o3,%o5,%o3
268	 	!depth 4, accumulated bits -3
269	bl	L.4.13
270	srl	%o5,1,%o5
271	! remainder is nonnegative
272	subcc	%o3,%o5,%o3
273		b	9f
274		add	%o2, (-3*2+1), %o2
275L.4.13:
276	! remainder is negative
277	addcc	%o3,%o5,%o3
278		b	9f
279		add	%o2, (-3*2-1), %o2
280L.2.15:
281	! remainder is negative
282	addcc	%o3,%o5,%o3
283	 	!depth 3, accumulated bits -3
284	bl	L.3.13
285	srl	%o5,1,%o5
286	! remainder is nonnegative
287	subcc	%o3,%o5,%o3
288	 	!depth 4, accumulated bits -5
289	bl	L.4.11
290	srl	%o5,1,%o5
291	! remainder is nonnegative
292	subcc	%o3,%o5,%o3
293		b	9f
294		add	%o2, (-5*2+1), %o2
295L.4.11:
296	! remainder is negative
297	addcc	%o3,%o5,%o3
298		b	9f
299		add	%o2, (-5*2-1), %o2
300L.3.13:
301	! remainder is negative
302	addcc	%o3,%o5,%o3
303	 	!depth 4, accumulated bits -7
304	bl	L.4.9
305	srl	%o5,1,%o5
306	! remainder is nonnegative
307	subcc	%o3,%o5,%o3
308		b	9f
309		add	%o2, (-7*2+1), %o2
310L.4.9:
311	! remainder is negative
312	addcc	%o3,%o5,%o3
313		b	9f
314		add	%o2, (-7*2-1), %o2
315	9:
316end_regular_divide:
317	deccc	%o4
318	bge	divloop
319	tst	%o3
320	bl,a	got_result
321	! non-restoring fixup if remainder < 0, otherwise annulled
322	add	%o3,%o1,%o3
323got_result:
324	tst	%g3
325	bl,a	1f
326	! negate for answer < 0, otherwise annulled
327	neg	%o3,%o3 		! %o3 <- -%o3
3281:
329	retl				! leaf-routine return
330	mov	%o3,%o0			! remainder <- %o3
331