xref: /titanic_52/usr/src/cmd/sgs/rtld.4.x/rem.s (revision 49bfb42b00abac0958a1308f4233e366fd083366)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22
23!	.seg	"data"
24!	.asciz	"Copyr 1986 Sun Micro"
25	.seg	"text"
26
27#ident	"%Z%%M%	%I%	%E% SMI"
28
29/*
30 * Copyright 1986 Sun Microsystems, Inc.  All rights reserved.
31 * Use is subject to license terms.
32 */
33
34/*
35 * divison/remainder
36 *
37 * Input is:
38 *	dividend -- the thing being divided
39 * divisor  -- how many ways to divide
40 * Important parameters:
41 *	N -- how many bits per iteration we try to get
42 *		as our current guess:
43 *	WORDSIZE -- how many bits altogether we're talking about:
44 *		obviously:
45 * A derived constant:
46 *	TOPBITS -- how many bits are in the top "decade" of a number:
47 *
48 * Important variables are:
49 *	Q -- the partial quotient under development -- initally 0
50 *	R -- the remainder so far -- initially == the dividend
51 *	ITER -- number of iterations of the main division loop will
52 *		be required. Equal to CEIL( lg2(quotient)/4 )
53 *		Note that this is log_base_(2^4) of the quotient.
54 *	V -- the current comparand -- initially divisor*2^(ITER*4-1)
55 * Cost:
56 *	current estimate for non-large dividend is
57 *		CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
58 *	a large dividend is one greater than 2^(31-4 ) and takes a
59 *	different path, as the upper bits of the quotient must be developed
60 *	one bit at a time.
61 */
62
63#include <sys/trap.h>
64#include <sys/asm_linkage.h>
65
66
67
68
69
70
71
72
73	! working variable
74
75
76/*
77 * this is the recursive definition of how we develop quotient digits.
78 * it takes three important parameters:
79 *	$1 -- the current depth, 1<=$1<=4
80 *	$2 -- the current accumulation of quotient bits
81 *	4  -- max depth
82 * We add a new bit to $2 and either recurse or
83 * insert the bits in the quotient.
84 * Dynamic input:
85 *	%o3 -- current remainder
86 *	%o2 -- current quotient
87 *	%o5 -- current comparand
88 *	cc -- set on current value of %o3
89 * Dynamic output:
90 * %o3', %o2', %o5', cc'
91 */
92
93
94
95
96!	RTENTRY(.urem)		! UNSIGNED REMAINDER
97	.global	.urem
98.urem:
99	b	divide
100	mov	0,%g1		! result always positive
101
102!	RTENTRY(.rem)		! SIGNED REMAINDER
103	.global	.rem
104.rem:
105	orcc	%o1,%o0,%g0 ! are either %o0 or %o1 negative
106	bge	divide		! if not, skip this junk
107	mov	%o0,%g1	! record sign of result in sign of %g1
108		tst	%o1
109		bge	2f
110		tst	%o0
111	!	%o1 < 0
112		bge	divide
113		neg	%o1
114	2:
115	!	%o0 < 0
116		neg	%o0
117	!	FALL THROUGH
118
119
120divide:
121!	compute size of quotient, scale comparand
122	orcc	%o1,%g0,%o5	! movcc	%o1,%o5
123	bnz	0f		! if %o1 != 0
124	mov	%o0,%o3
125	ba	zero_divide
126	nop
1270:
128	cmp     %o3,%o5
129	blu     got_result ! if %o3<%o5 already, there's no point in continuing
130	mov	0,%o2
131	sethi	%hi(1<<(32-4 -1)),%g2
132	cmp	%o3,%g2
133	blu	not_really_big
134	mov	0,%o4
135	!
136	! here, the %o0 is >= 2^(31-4) or so. We must be careful here, as
137	! our usual 4-at-a-shot divide step will cause overflow and havoc. The
138	! total number of bits in the result here is 4*%o4+%g3, where %g3 <= 4.
139	! compute %o4, in an unorthodox manner: know we need to Shift %o5 into
140	!	the top decade: so don't even bother to compare to %o3.
141	1:
142		cmp	%o5,%g2
143		bgeu	3f
144		mov	1,%g3
145		sll	%o5,4,%o5
146		b	1b
147		inc	%o4
148	! now compute %g3
149	2:	addcc	%o5,%o5,%o5
150		bcc	not_too_big ! bcc	not_too_big
151		add	%g3,1,%g3
152			!
153			! here if the %o1 overflowed when Shifting
154			! this means that %o3 has the high-order bit set
155			! restore %o5 and subtract from %o3
156			sll	%g2,4 ,%g2 ! high order bit
157			srl	%o5,1,%o5 ! rest of %o5
158			add	%o5,%g2,%o5
159			b	do_single_div
160			sub	%g3,1,%g3
161	not_too_big:
162	3:	cmp	%o5,%o3
163		blu	2b
164		nop
165		be	do_single_div
166		nop
167	! %o5 > %o3: went too far: back up 1 step
168	!	srl	%o5,1,%o5
169	!	dec	%g3
170	! do single-bit divide steps
171	!
172	! we have to be careful here. We know that %o3 >= %o5, so we can do the
173	! first divide step without thinking. BUT, the others are conditional,
174	! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
175	! order bit set in the first step, just falling into the regular
176	! division loop will mess up the first time around.
177	! So we unroll slightly...
178	do_single_div:
179		deccc	%g3
180		bl	end_regular_divide
181		nop
182		sub	%o3,%o5,%o3
183		mov	1,%o2
184		b,a	end_single_divloop
185	single_divloop:
186		sll	%o2,1,%o2
187		bl	1f
188		srl	%o5,1,%o5
189		! %o3 >= 0
190		sub	%o3,%o5,%o3
191		b	2f
192		inc	%o2
193	1:	! %o3 < 0
194		add	%o3,%o5,%o3
195		dec	%o2
196	2:
197	end_single_divloop:
198		deccc	%g3
199		bge	single_divloop
200		tst	%o3
201		b,a	end_regular_divide
202
203not_really_big:
2041:
205	sll	%o5,4,%o5
206	cmp	%o5,%o3
207	bleu	1b
208	inccc	%o4
209	be	got_result
210	dec	%o4
211do_regular_divide:
212
213!	do the main division iteration
214	tst	%o3
215!	fall through into divide loop
216divloop:
217	sll	%o2,4,%o2
218		!depth 1, accumulated bits 0
219	bl	L.1.16
220	srl	%o5,1,%o5
221	! remainder is positive
222	subcc	%o3,%o5,%o3
223			!depth 2, accumulated bits 1
224	bl	L.2.17
225	srl	%o5,1,%o5
226	! remainder is positive
227	subcc	%o3,%o5,%o3
228			!depth 3, accumulated bits 3
229	bl	L.3.19
230	srl	%o5,1,%o5
231	! remainder is positive
232	subcc	%o3,%o5,%o3
233			!depth 4, accumulated bits 7
234	bl	L.4.23
235	srl	%o5,1,%o5
236	! remainder is positive
237	subcc	%o3,%o5,%o3
238		b	9f
239		add	%o2, (7*2+1), %o2
240
241L.4.23:	! remainder is negative
242	addcc	%o3,%o5,%o3
243		b	9f
244		add	%o2, (7*2-1), %o2
245
246
247
248
249L.3.19:	! remainder is negative
250	addcc	%o3,%o5,%o3
251			!depth 4, accumulated bits 5
252	bl	L.4.21
253	srl	%o5,1,%o5
254	! remainder is positive
255	subcc	%o3,%o5,%o3
256		b	9f
257		add	%o2, (5*2+1), %o2
258
259L.4.21:	! remainder is negative
260	addcc	%o3,%o5,%o3
261		b	9f
262		add	%o2, (5*2-1), %o2
263
264
265
266
267
268
269
270L.2.17:	! remainder is negative
271	addcc	%o3,%o5,%o3
272			!depth 3, accumulated bits 1
273	bl	L.3.17
274	srl	%o5,1,%o5
275	! remainder is positive
276	subcc	%o3,%o5,%o3
277			!depth 4, accumulated bits 3
278	bl	L.4.19
279	srl	%o5,1,%o5
280	! remainder is positive
281	subcc	%o3,%o5,%o3
282		b	9f
283		add	%o2, (3*2+1), %o2
284
285L.4.19:	! remainder is negative
286	addcc	%o3,%o5,%o3
287		b	9f
288		add	%o2, (3*2-1), %o2
289
290
291
292
293L.3.17:	! remainder is negative
294	addcc	%o3,%o5,%o3
295			!depth 4, accumulated bits 1
296	bl	L.4.17
297	srl	%o5,1,%o5
298	! remainder is positive
299	subcc	%o3,%o5,%o3
300		b	9f
301		add	%o2, (1*2+1), %o2
302
303L.4.17:	! remainder is negative
304	addcc	%o3,%o5,%o3
305		b	9f
306		add	%o2, (1*2-1), %o2
307
308
309
310
311
312
313
314
315
316
317L.1.16:	! remainder is negative
318	addcc	%o3,%o5,%o3
319			!depth 2, accumulated bits -1
320	bl	L.2.15
321	srl	%o5,1,%o5
322	! remainder is positive
323	subcc	%o3,%o5,%o3
324			!depth 3, accumulated bits -1
325	bl	L.3.15
326	srl	%o5,1,%o5
327	! remainder is positive
328	subcc	%o3,%o5,%o3
329			!depth 4, accumulated bits -1
330	bl	L.4.15
331	srl	%o5,1,%o5
332	! remainder is positive
333	subcc	%o3,%o5,%o3
334		b	9f
335		add	%o2, (-1*2+1), %o2
336
337L.4.15:	! remainder is negative
338	addcc	%o3,%o5,%o3
339		b	9f
340		add	%o2, (-1*2-1), %o2
341
342
343
344
345L.3.15:	! remainder is negative
346	addcc	%o3,%o5,%o3
347			!depth 4, accumulated bits -3
348	bl	L.4.13
349	srl	%o5,1,%o5
350	! remainder is positive
351	subcc	%o3,%o5,%o3
352		b	9f
353		add	%o2, (-3*2+1), %o2
354
355L.4.13:	! remainder is negative
356	addcc	%o3,%o5,%o3
357		b	9f
358		add	%o2, (-3*2-1), %o2
359
360
361
362
363
364
365
366L.2.15:	! remainder is negative
367	addcc	%o3,%o5,%o3
368			!depth 3, accumulated bits -3
369	bl	L.3.13
370	srl	%o5,1,%o5
371	! remainder is positive
372	subcc	%o3,%o5,%o3
373			!depth 4, accumulated bits -5
374	bl	L.4.11
375	srl	%o5,1,%o5
376	! remainder is positive
377	subcc	%o3,%o5,%o3
378		b	9f
379		add	%o2, (-5*2+1), %o2
380
381L.4.11:	! remainder is negative
382	addcc	%o3,%o5,%o3
383		b	9f
384		add	%o2, (-5*2-1), %o2
385
386
387
388
389L.3.13:	! remainder is negative
390	addcc	%o3,%o5,%o3
391			!depth 4, accumulated bits -7
392	bl	L.4.9
393	srl	%o5,1,%o5
394	! remainder is positive
395	subcc	%o3,%o5,%o3
396		b	9f
397		add	%o2, (-7*2+1), %o2
398
399L.4.9:	! remainder is negative
400	addcc	%o3,%o5,%o3
401		b	9f
402		add	%o2, (-7*2-1), %o2
403
404
405
406
407
408
409
410
411
412
413	9:
414
415end_regular_divide:
416	deccc	%o4
417	bge	divloop
418	tst	%o3
419	bl,a	got_result
420	add	%o3,%o1,%o3
421
422
423got_result:
424	tst	%g1
425	bl,a	1f
426	neg	%o3	! remainder <- -%o3
427
4281:
429	retl
430	mov	%o3,%o0	! remainder <-  %o3
431
432
433zero_divide:
434	ta	ST_DIV0		! divide by zero trap
435	retl			! if handled, ignored, return
436	mov	0, %o0
437