xref: /illumos-gate/usr/src/common/util/i386/muldiv.S (revision 55fea89dcaa64928bed4327112404dcb3e07b79f)
1*5d9d9091SRichard Lowe/*
2*5d9d9091SRichard Lowe * CDDL HEADER START
3*5d9d9091SRichard Lowe *
4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the
5*5d9d9091SRichard Lowe * Common Development and Distribution License, Version 1.0 only
6*5d9d9091SRichard Lowe * (the "License").  You may not use this file except in compliance
7*5d9d9091SRichard Lowe * with the License.
8*5d9d9091SRichard Lowe *
9*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing.
11*5d9d9091SRichard Lowe * See the License for the specific language governing permissions
12*5d9d9091SRichard Lowe * and limitations under the License.
13*5d9d9091SRichard Lowe *
14*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each
15*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the
17*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying
18*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner]
19*5d9d9091SRichard Lowe *
20*5d9d9091SRichard Lowe * CDDL HEADER END
21*5d9d9091SRichard Lowe */
22*5d9d9091SRichard Lowe/*
23*5d9d9091SRichard Lowe * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*5d9d9091SRichard Lowe * Use is subject to license terms.
25*5d9d9091SRichard Lowe */
26*5d9d9091SRichard Lowe
27*5d9d9091SRichard Lowe#if !defined(lint)
28*5d9d9091SRichard Lowe	.ident	"%Z%%M%	%I%	%E% SMI"
29*5d9d9091SRichard Lowe
30*5d9d9091SRichard Lowe	.file	"muldiv.s"
31*5d9d9091SRichard Lowe#endif
32*5d9d9091SRichard Lowe
33*5d9d9091SRichard Lowe#if defined(__i386) && !defined(__amd64)
34*5d9d9091SRichard Lowe
35*5d9d9091SRichard Lowe/*
36*5d9d9091SRichard Lowe * Helper routines for 32-bit compilers to perform 64-bit math.
37*5d9d9091SRichard Lowe * These are used both by the Sun and GCC compilers.
38*5d9d9091SRichard Lowe */
39*5d9d9091SRichard Lowe
40*5d9d9091SRichard Lowe#include <sys/asm_linkage.h>
41*5d9d9091SRichard Lowe#include <sys/asm_misc.h>
42*5d9d9091SRichard Lowe
43*5d9d9091SRichard Lowe
44*5d9d9091SRichard Lowe#if defined(__lint)
45*5d9d9091SRichard Lowe#include <sys/types.h>
46*5d9d9091SRichard Lowe
47*5d9d9091SRichard Lowe/* ARGSUSED */
48*5d9d9091SRichard Loweint64_t
49*5d9d9091SRichard Lowe__mul64(int64_t a, int64_t b)
50*5d9d9091SRichard Lowe{
51*5d9d9091SRichard Lowe	return (0);
52*5d9d9091SRichard Lowe}
53*5d9d9091SRichard Lowe
54*5d9d9091SRichard Lowe#else   /* __lint */
55*5d9d9091SRichard Lowe
56*5d9d9091SRichard Lowe/
57*5d9d9091SRichard Lowe/   function __mul64(A,B:Longint):Longint;
58*5d9d9091SRichard Lowe/	{Overflow is not checked}
59*5d9d9091SRichard Lowe/
60*5d9d9091SRichard Lowe/ We essentially do multiply by longhand, using base 2**32 digits.
61*5d9d9091SRichard Lowe/               a       b	parameter A
62*5d9d9091SRichard Lowe/	     x 	c       d	parameter B
63*5d9d9091SRichard Lowe/		---------
64*5d9d9091SRichard Lowe/               ad      bd
65*5d9d9091SRichard Lowe/       ac	bc
66*5d9d9091SRichard Lowe/       -----------------
67*5d9d9091SRichard Lowe/       ac	ad+bc	bd
68*5d9d9091SRichard Lowe/
69*5d9d9091SRichard Lowe/       We can ignore ac and top 32 bits of ad+bc: if <> 0, overflow happened.
70*5d9d9091SRichard Lowe/
71*5d9d9091SRichard Lowe	ENTRY(__mul64)
72*5d9d9091SRichard Lowe	push	%ebp
73*5d9d9091SRichard Lowe	mov    	%esp,%ebp
74*5d9d9091SRichard Lowe	pushl	%esi
75*5d9d9091SRichard Lowe	mov	12(%ebp),%eax	/ A.hi (a)
76*5d9d9091SRichard Lowe	mull	16(%ebp)	/ Multiply A.hi by B.lo (produces ad)
77*5d9d9091SRichard Lowe	xchg	%ecx,%eax	/ ecx = bottom half of ad.
78*5d9d9091SRichard Lowe	movl    8(%ebp),%eax	/ A.Lo (b)
79*5d9d9091SRichard Lowe	movl	%eax,%esi	/ Save A.lo for later
80*5d9d9091SRichard Lowe	mull	16(%ebp)	/ Multiply A.Lo by B.LO (dx:ax = bd.)
81*5d9d9091SRichard Lowe	addl	%edx,%ecx	/ cx is ad
82*5d9d9091SRichard Lowe	xchg	%eax,%esi       / esi is bd, eax = A.lo (d)
83*5d9d9091SRichard Lowe	mull	20(%ebp)	/ Multiply A.lo * B.hi (producing bc)
84*5d9d9091SRichard Lowe	addl	%ecx,%eax	/ Produce ad+bc
85*5d9d9091SRichard Lowe	movl	%esi,%edx
86*5d9d9091SRichard Lowe	xchg	%eax,%edx
87*5d9d9091SRichard Lowe	popl	%esi
88*5d9d9091SRichard Lowe	movl	%ebp,%esp
89*5d9d9091SRichard Lowe	popl	%ebp
90*5d9d9091SRichard Lowe	ret     $16
91*5d9d9091SRichard Lowe	SET_SIZE(__mul64)
92*5d9d9091SRichard Lowe
93*5d9d9091SRichard Lowe#endif	/* __lint */
94*5d9d9091SRichard Lowe
95*5d9d9091SRichard Lowe/*
96*5d9d9091SRichard Lowe * C support for 64-bit modulo and division.
97*5d9d9091SRichard Lowe * Hand-customized compiler output - see comments for details.
98*5d9d9091SRichard Lowe */
99*5d9d9091SRichard Lowe#if defined(__lint)
100*5d9d9091SRichard Lowe
101*5d9d9091SRichard Lowe/* ARGSUSED */
102*5d9d9091SRichard Loweuint64_t
103*5d9d9091SRichard Lowe__udiv64(uint64_t a, uint64_t b)
104*5d9d9091SRichard Lowe{ return (0); }
105*5d9d9091SRichard Lowe
106*5d9d9091SRichard Lowe/* ARGSUSED */
107*5d9d9091SRichard Loweuint64_t
108*5d9d9091SRichard Lowe__urem64(int64_t a, int64_t b)
109*5d9d9091SRichard Lowe{ return (0); }
110*5d9d9091SRichard Lowe
111*5d9d9091SRichard Lowe/* ARGSUSED */
112*5d9d9091SRichard Loweint64_t
113*5d9d9091SRichard Lowe__div64(int64_t a, int64_t b)
114*5d9d9091SRichard Lowe{ return (0); }
115*5d9d9091SRichard Lowe
116*5d9d9091SRichard Lowe/* ARGSUSED */
117*5d9d9091SRichard Loweint64_t
118*5d9d9091SRichard Lowe__rem64(int64_t a, int64_t b)
119*5d9d9091SRichard Lowe{ return (0); }
120*5d9d9091SRichard Lowe
121*5d9d9091SRichard Lowe#else	/* __lint */
122*5d9d9091SRichard Lowe
123*5d9d9091SRichard Lowe/ /*
124*5d9d9091SRichard Lowe/  * Unsigned division with remainder.
125*5d9d9091SRichard Lowe/  * Divide two uint64_ts, and calculate remainder.
126*5d9d9091SRichard Lowe/  */
127*5d9d9091SRichard Lowe/ uint64_t
128*5d9d9091SRichard Lowe/ UDivRem(uint64_t x, uint64_t y, uint64_t * pmod)
129*5d9d9091SRichard Lowe/ {
130*5d9d9091SRichard Lowe/ 	/* simple cases: y is a single uint32_t */
131*5d9d9091SRichard Lowe/ 	if (HI(y) == 0) {
132*5d9d9091SRichard Lowe/ 		uint32_t	div_hi, div_rem;
133*5d9d9091SRichard Lowe/ 		uint32_t 	q0, q1;
134*5d9d9091SRichard Lowe/
135*5d9d9091SRichard Lowe/ 		/* calculate q1 */
136*5d9d9091SRichard Lowe/ 		if (HI(x) < LO(y)) {
137*5d9d9091SRichard Lowe/ 			/* result is a single uint32_t, use one division */
138*5d9d9091SRichard Lowe/ 			q1 = 0;
139*5d9d9091SRichard Lowe/ 			div_hi = HI(x);
140*5d9d9091SRichard Lowe/ 		} else {
141*5d9d9091SRichard Lowe/ 			/* result is a double uint32_t, use two divisions */
142*5d9d9091SRichard Lowe/ 			A_DIV32(HI(x), 0, LO(y), q1, div_hi);
143*5d9d9091SRichard Lowe/ 		}
144*5d9d9091SRichard Lowe/
145*5d9d9091SRichard Lowe/ 		/* calculate q0 and remainder */
146*5d9d9091SRichard Lowe/ 		A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
147*5d9d9091SRichard Lowe/
148*5d9d9091SRichard Lowe/ 		/* return remainder */
149*5d9d9091SRichard Lowe/ 		*pmod = div_rem;
150*5d9d9091SRichard Lowe/
151*5d9d9091SRichard Lowe/ 		/* return result */
152*5d9d9091SRichard Lowe/ 		return (HILO(q1, q0));
153*5d9d9091SRichard Lowe/
154*5d9d9091SRichard Lowe/ 	} else if (HI(x) < HI(y)) {
155*5d9d9091SRichard Lowe/ 		/* HI(x) < HI(y) => x < y => result is 0 */
156*5d9d9091SRichard Lowe/
157*5d9d9091SRichard Lowe/ 		/* return remainder */
158*5d9d9091SRichard Lowe/ 		*pmod = x;
159*5d9d9091SRichard Lowe/
160*5d9d9091SRichard Lowe/ 		/* return result */
161*5d9d9091SRichard Lowe/ 		return (0);
162*5d9d9091SRichard Lowe/
163*5d9d9091SRichard Lowe/ 	} else {
164*5d9d9091SRichard Lowe/ 		/*
165*5d9d9091SRichard Lowe/ 		 * uint64_t by uint64_t division, resulting in a one-uint32_t
166*5d9d9091SRichard Lowe/ 		 * result
167*5d9d9091SRichard Lowe/ 		 */
168*5d9d9091SRichard Lowe/ 		uint32_t		y0, y1;
169*5d9d9091SRichard Lowe/ 		uint32_t		x1, x0;
170*5d9d9091SRichard Lowe/ 		uint32_t		q0;
171*5d9d9091SRichard Lowe/ 		uint32_t		normshift;
172*5d9d9091SRichard Lowe/
173*5d9d9091SRichard Lowe/ 		/* normalize by shifting x and y so MSB(y) == 1 */
174*5d9d9091SRichard Lowe/ 		HIBIT(HI(y), normshift);	/* index of highest 1 bit */
175*5d9d9091SRichard Lowe/ 		normshift = 31 - normshift;
176*5d9d9091SRichard Lowe/
177*5d9d9091SRichard Lowe/ 		if (normshift == 0) {
178*5d9d9091SRichard Lowe/ 			/* no shifting needed, and x < 2*y so q <= 1 */
179*5d9d9091SRichard Lowe/ 			y1 = HI(y);
180*5d9d9091SRichard Lowe/ 			y0 = LO(y);
181*5d9d9091SRichard Lowe/ 			x1 = HI(x);
182*5d9d9091SRichard Lowe/ 			x0 = LO(x);
183*5d9d9091SRichard Lowe/
184*5d9d9091SRichard Lowe/ 			/* if x >= y then q = 1 (note x1 >= y1) */
185*5d9d9091SRichard Lowe/ 			if (x1 > y1 || x0 >= y0) {
186*5d9d9091SRichard Lowe/ 				q0 = 1;
187*5d9d9091SRichard Lowe/ 				/* subtract y from x to get remainder */
188*5d9d9091SRichard Lowe/ 				A_SUB2(y0, y1, x0, x1);
189*5d9d9091SRichard Lowe/ 			} else {
190*5d9d9091SRichard Lowe/ 				q0 = 0;
191*5d9d9091SRichard Lowe/ 			}
192*5d9d9091SRichard Lowe/
193*5d9d9091SRichard Lowe/ 			/* return remainder */
194*5d9d9091SRichard Lowe/ 			*pmod = HILO(x1, x0);
195*5d9d9091SRichard Lowe/
196*5d9d9091SRichard Lowe/ 			/* return result */
197*5d9d9091SRichard Lowe/ 			return (q0);
198*5d9d9091SRichard Lowe/
199*5d9d9091SRichard Lowe/ 		} else {
200*5d9d9091SRichard Lowe/ 			/*
201*5d9d9091SRichard Lowe/ 			 * the last case: result is one uint32_t, but we need to
202*5d9d9091SRichard Lowe/ 			 * normalize
203*5d9d9091SRichard Lowe/ 			 */
204*5d9d9091SRichard Lowe/ 			uint64_t	dt;
205*5d9d9091SRichard Lowe/ 			uint32_t		t0, t1, x2;
206*5d9d9091SRichard Lowe/
207*5d9d9091SRichard Lowe/ 			/* normalize y */
208*5d9d9091SRichard Lowe/ 			dt = (y << normshift);
209*5d9d9091SRichard Lowe/ 			y1 = HI(dt);
210*5d9d9091SRichard Lowe/ 			y0 = LO(dt);
211*5d9d9091SRichard Lowe/
212*5d9d9091SRichard Lowe/ 			/* normalize x (we need 3 uint32_ts!!!) */
213*5d9d9091SRichard Lowe/ 			x2 = (HI(x) >> (32 - normshift));
214*5d9d9091SRichard Lowe/ 			dt = (x << normshift);
215*5d9d9091SRichard Lowe/ 			x1 = HI(dt);
216*5d9d9091SRichard Lowe/ 			x0 = LO(dt);
217*5d9d9091SRichard Lowe/
218*5d9d9091SRichard Lowe/ 			/* estimate q0, and reduce x to a two uint32_t value */
219*5d9d9091SRichard Lowe/ 			A_DIV32(x1, x2, y1, q0, x1);
220*5d9d9091SRichard Lowe/
221*5d9d9091SRichard Lowe/ 			/* adjust q0 down if too high */
222*5d9d9091SRichard Lowe/ 			/*
223*5d9d9091SRichard Lowe/ 			 * because of the limited range of x2 we can only be
224*5d9d9091SRichard Lowe/ 			 * one off
225*5d9d9091SRichard Lowe/ 			 */
226*5d9d9091SRichard Lowe/ 			A_MUL32(y0, q0, t0, t1);
227*5d9d9091SRichard Lowe/ 			if (t1 > x1 || (t1 == x1 && t0 > x0)) {
228*5d9d9091SRichard Lowe/ 				q0--;
229*5d9d9091SRichard Lowe/ 				A_SUB2(y0, y1, t0, t1);
230*5d9d9091SRichard Lowe/ 			}
231*5d9d9091SRichard Lowe/ 			/* return remainder */
232*5d9d9091SRichard Lowe/ 			/* subtract product from x to get remainder */
233*5d9d9091SRichard Lowe/ 			A_SUB2(t0, t1, x0, x1);
234*5d9d9091SRichard Lowe/ 			*pmod = (HILO(x1, x0) >> normshift);
235*5d9d9091SRichard Lowe/
236*5d9d9091SRichard Lowe/ 			/* return result */
237*5d9d9091SRichard Lowe/ 			return (q0);
238*5d9d9091SRichard Lowe/ 		}
239*5d9d9091SRichard Lowe/ 	}
240*5d9d9091SRichard Lowe/ }
241*5d9d9091SRichard Lowe	ENTRY(UDivRem)
242*5d9d9091SRichard Lowe	pushl	%ebp
243*5d9d9091SRichard Lowe	pushl	%edi
244*5d9d9091SRichard Lowe	pushl	%esi
245*5d9d9091SRichard Lowe	subl	$48, %esp
246*5d9d9091SRichard Lowe	movl	68(%esp), %edi	/ y,
247*5d9d9091SRichard Lowe	testl	%edi, %edi	/ tmp63
248*5d9d9091SRichard Lowe	movl	%eax, 40(%esp)	/ x, x
249*5d9d9091SRichard Lowe	movl	%edx, 44(%esp)	/ x, x
250*5d9d9091SRichard Lowe	movl	%edi, %esi	/, tmp62
251*5d9d9091SRichard Lowe	movl	%edi, %ecx	/ tmp62, tmp63
252*5d9d9091SRichard Lowe	jne	.LL2
253*5d9d9091SRichard Lowe	movl	%edx, %eax	/, tmp68
254*5d9d9091SRichard Lowe	cmpl	64(%esp), %eax	/ y, tmp68
255*5d9d9091SRichard Lowe	jae	.LL21
256*5d9d9091SRichard Lowe.LL4:
257*5d9d9091SRichard Lowe	movl	72(%esp), %ebp	/ pmod,
258*5d9d9091SRichard Lowe	xorl	%esi, %esi	/ <result>
259*5d9d9091SRichard Lowe	movl	40(%esp), %eax	/ x, q0
260*5d9d9091SRichard Lowe	movl	%ecx, %edi	/ <result>, <result>
261*5d9d9091SRichard Lowe	divl	64(%esp)	/ y
262*5d9d9091SRichard Lowe	movl	%edx, (%ebp)	/ div_rem,
263*5d9d9091SRichard Lowe	xorl	%edx, %edx	/ q0
264*5d9d9091SRichard Lowe	addl	%eax, %esi	/ q0, <result>
265*5d9d9091SRichard Lowe	movl	$0, 4(%ebp)
266*5d9d9091SRichard Lowe	adcl	%edx, %edi	/ q0, <result>
267*5d9d9091SRichard Lowe	addl	$48, %esp
268*5d9d9091SRichard Lowe	movl	%esi, %eax	/ <result>, <result>
269*5d9d9091SRichard Lowe	popl	%esi
270*5d9d9091SRichard Lowe	movl	%edi, %edx	/ <result>, <result>
271*5d9d9091SRichard Lowe	popl	%edi
272*5d9d9091SRichard Lowe	popl	%ebp
273*5d9d9091SRichard Lowe	ret
274*5d9d9091SRichard Lowe	.align	16
275*5d9d9091SRichard Lowe.LL2:
276*5d9d9091SRichard Lowe	movl	44(%esp), %eax	/ x,
277*5d9d9091SRichard Lowe	xorl	%edx, %edx
278*5d9d9091SRichard Lowe	cmpl	%esi, %eax	/ tmp62, tmp5
279*5d9d9091SRichard Lowe	movl	%eax, 32(%esp)	/ tmp5,
280*5d9d9091SRichard Lowe	movl	%edx, 36(%esp)
281*5d9d9091SRichard Lowe	jae	.LL6
282*5d9d9091SRichard Lowe	movl	72(%esp), %esi	/ pmod,
283*5d9d9091SRichard Lowe	movl	40(%esp), %ebp	/ x,
284*5d9d9091SRichard Lowe	movl	44(%esp), %ecx	/ x,
285*5d9d9091SRichard Lowe	movl	%ebp, (%esi)
286*5d9d9091SRichard Lowe	movl	%ecx, 4(%esi)
287*5d9d9091SRichard Lowe	xorl	%edi, %edi	/ <result>
288*5d9d9091SRichard Lowe	xorl	%esi, %esi	/ <result>
289*5d9d9091SRichard Lowe.LL22:
290*5d9d9091SRichard Lowe	addl	$48, %esp
291*5d9d9091SRichard Lowe	movl	%esi, %eax	/ <result>, <result>
292*5d9d9091SRichard Lowe	popl	%esi
293*5d9d9091SRichard Lowe	movl	%edi, %edx	/ <result>, <result>
294*5d9d9091SRichard Lowe	popl	%edi
295*5d9d9091SRichard Lowe	popl	%ebp
296*5d9d9091SRichard Lowe	ret
297*5d9d9091SRichard Lowe	.align	16
298*5d9d9091SRichard Lowe.LL21:
299*5d9d9091SRichard Lowe	movl	%edi, %edx	/ tmp63, div_hi
300*5d9d9091SRichard Lowe	divl	64(%esp)	/ y
301*5d9d9091SRichard Lowe	movl	%eax, %ecx	/, q1
302*5d9d9091SRichard Lowe	jmp	.LL4
303*5d9d9091SRichard Lowe	.align	16
304*5d9d9091SRichard Lowe.LL6:
305*5d9d9091SRichard Lowe	movl	$31, %edi	/, tmp87
306*5d9d9091SRichard Lowe	bsrl	%esi,%edx	/ tmp62, normshift
307*5d9d9091SRichard Lowe	subl	%edx, %edi	/ normshift, tmp87
308*5d9d9091SRichard Lowe	movl	%edi, 28(%esp)	/ tmp87,
309*5d9d9091SRichard Lowe	jne	.LL8
310*5d9d9091SRichard Lowe	movl	32(%esp), %edx	/, x1
311*5d9d9091SRichard Lowe	cmpl	%ecx, %edx	/ y1, x1
312*5d9d9091SRichard Lowe	movl	64(%esp), %edi	/ y, y0
313*5d9d9091SRichard Lowe	movl	40(%esp), %esi	/ x, x0
314*5d9d9091SRichard Lowe	ja	.LL10
315*5d9d9091SRichard Lowe	xorl	%ebp, %ebp	/ q0
316*5d9d9091SRichard Lowe	cmpl	%edi, %esi	/ y0, x0
317*5d9d9091SRichard Lowe	jb	.LL11
318*5d9d9091SRichard Lowe.LL10:
319*5d9d9091SRichard Lowe	movl	$1, %ebp	/, q0
320*5d9d9091SRichard Lowe	subl	%edi,%esi	/ y0, x0
321*5d9d9091SRichard Lowe	sbbl	%ecx,%edx	/ tmp63, x1
322*5d9d9091SRichard Lowe.LL11:
323*5d9d9091SRichard Lowe	movl	%edx, %ecx	/ x1, x1
324*5d9d9091SRichard Lowe	xorl	%edx, %edx	/ x1
325*5d9d9091SRichard Lowe	xorl	%edi, %edi	/ x0
326*5d9d9091SRichard Lowe	addl	%esi, %edx	/ x0, x1
327*5d9d9091SRichard Lowe	adcl	%edi, %ecx	/ x0, x1
328*5d9d9091SRichard Lowe	movl	72(%esp), %esi	/ pmod,
329*5d9d9091SRichard Lowe	movl	%edx, (%esi)	/ x1,
330*5d9d9091SRichard Lowe	movl	%ecx, 4(%esi)	/ x1,
331*5d9d9091SRichard Lowe	xorl	%edi, %edi	/ <result>
332*5d9d9091SRichard Lowe	movl	%ebp, %esi	/ q0, <result>
333*5d9d9091SRichard Lowe	jmp	.LL22
334*5d9d9091SRichard Lowe	.align	16
335*5d9d9091SRichard Lowe.LL8:
336*5d9d9091SRichard Lowe	movb	28(%esp), %cl
337*5d9d9091SRichard Lowe	movl	64(%esp), %esi	/ y, dt
338*5d9d9091SRichard Lowe	movl	68(%esp), %edi	/ y, dt
339*5d9d9091SRichard Lowe	shldl	%esi, %edi	/, dt, dt
340*5d9d9091SRichard Lowe	sall	%cl, %esi	/, dt
341*5d9d9091SRichard Lowe	andl	$32, %ecx
342*5d9d9091SRichard Lowe	jne	.LL23
343*5d9d9091SRichard Lowe.LL17:
344*5d9d9091SRichard Lowe	movl	$32, %ecx	/, tmp102
345*5d9d9091SRichard Lowe	subl	28(%esp), %ecx	/, tmp102
346*5d9d9091SRichard Lowe	movl	%esi, %ebp	/ dt, y0
347*5d9d9091SRichard Lowe	movl	32(%esp), %esi
348*5d9d9091SRichard Lowe	shrl	%cl, %esi	/ tmp102,
349*5d9d9091SRichard Lowe	movl	%edi, 24(%esp)	/ tmp99,
350*5d9d9091SRichard Lowe	movb	28(%esp), %cl
351*5d9d9091SRichard Lowe	movl	%esi, 12(%esp)	/, x2
352*5d9d9091SRichard Lowe	movl	44(%esp), %edi	/ x, dt
353*5d9d9091SRichard Lowe	movl	40(%esp), %esi	/ x, dt
354*5d9d9091SRichard Lowe	shldl	%esi, %edi	/, dt, dt
355*5d9d9091SRichard Lowe	sall	%cl, %esi	/, dt
356*5d9d9091SRichard Lowe	andl	$32, %ecx
357*5d9d9091SRichard Lowe	je	.LL18
358*5d9d9091SRichard Lowe	movl	%esi, %edi	/ dt, dt
359*5d9d9091SRichard Lowe	xorl	%esi, %esi	/ dt
360*5d9d9091SRichard Lowe.LL18:
361*5d9d9091SRichard Lowe	movl	%edi, %ecx	/ dt,
362*5d9d9091SRichard Lowe	movl	%edi, %eax	/ tmp2,
363*5d9d9091SRichard Lowe	movl	%ecx, (%esp)
364*5d9d9091SRichard Lowe	movl	12(%esp), %edx	/ x2,
365*5d9d9091SRichard Lowe	divl	24(%esp)
366*5d9d9091SRichard Lowe	movl	%edx, %ecx	/, x1
367*5d9d9091SRichard Lowe	xorl	%edi, %edi
368*5d9d9091SRichard Lowe	movl	%eax, 20(%esp)
369*5d9d9091SRichard Lowe	movl	%ebp, %eax	/ y0, t0
370*5d9d9091SRichard Lowe	mull	20(%esp)
371*5d9d9091SRichard Lowe	cmpl	%ecx, %edx	/ x1, t1
372*5d9d9091SRichard Lowe	movl	%edi, 4(%esp)
373*5d9d9091SRichard Lowe	ja	.LL14
374*5d9d9091SRichard Lowe	je	.LL24
375*5d9d9091SRichard Lowe.LL15:
376*5d9d9091SRichard Lowe	movl	%ecx, %edi	/ x1,
377*5d9d9091SRichard Lowe	subl	%eax,%esi	/ t0, x0
378*5d9d9091SRichard Lowe	sbbl	%edx,%edi	/ t1,
379*5d9d9091SRichard Lowe	movl	%edi, %eax	/, x1
380*5d9d9091SRichard Lowe	movl	%eax, %edx	/ x1, x1
381*5d9d9091SRichard Lowe	xorl	%eax, %eax	/ x1
382*5d9d9091SRichard Lowe	xorl	%ebp, %ebp	/ x0
383*5d9d9091SRichard Lowe	addl	%esi, %eax	/ x0, x1
384*5d9d9091SRichard Lowe	adcl	%ebp, %edx	/ x0, x1
385*5d9d9091SRichard Lowe	movb	28(%esp), %cl
386*5d9d9091SRichard Lowe	shrdl	%edx, %eax	/, x1, x1
387*5d9d9091SRichard Lowe	shrl	%cl, %edx	/, x1
388*5d9d9091SRichard Lowe	andl	$32, %ecx
389*5d9d9091SRichard Lowe	je	.LL16
390*5d9d9091SRichard Lowe	movl	%edx, %eax	/ x1, x1
391*5d9d9091SRichard Lowe	xorl	%edx, %edx	/ x1
392*5d9d9091SRichard Lowe.LL16:
393*5d9d9091SRichard Lowe	movl	72(%esp), %ecx	/ pmod,
394*5d9d9091SRichard Lowe	movl	20(%esp), %esi	/, <result>
395*5d9d9091SRichard Lowe	xorl	%edi, %edi	/ <result>
396*5d9d9091SRichard Lowe	movl	%eax, (%ecx)	/ x1,
397*5d9d9091SRichard Lowe	movl	%edx, 4(%ecx)	/ x1,
398*5d9d9091SRichard Lowe	jmp	.LL22
399*5d9d9091SRichard Lowe	.align	16
400*5d9d9091SRichard Lowe.LL24:
401*5d9d9091SRichard Lowe	cmpl	%esi, %eax	/ x0, t0
402*5d9d9091SRichard Lowe	jbe	.LL15
403*5d9d9091SRichard Lowe.LL14:
404*5d9d9091SRichard Lowe	decl	20(%esp)
405*5d9d9091SRichard Lowe	subl	%ebp,%eax	/ y0, t0
406*5d9d9091SRichard Lowe	sbbl	24(%esp),%edx	/, t1
407*5d9d9091SRichard Lowe	jmp	.LL15
408*5d9d9091SRichard Lowe.LL23:
409*5d9d9091SRichard Lowe	movl	%esi, %edi	/ dt, dt
410*5d9d9091SRichard Lowe	xorl	%esi, %esi	/ dt
411*5d9d9091SRichard Lowe	jmp	.LL17
412*5d9d9091SRichard Lowe	SET_SIZE(UDivRem)
413*5d9d9091SRichard Lowe
414*5d9d9091SRichard Lowe/*
415*5d9d9091SRichard Lowe * Unsigned division without remainder.
416*5d9d9091SRichard Lowe */
417*5d9d9091SRichard Lowe/ uint64_t
418*5d9d9091SRichard Lowe/ UDiv(uint64_t x, uint64_t y)
419*5d9d9091SRichard Lowe/ {
420*5d9d9091SRichard Lowe/ 	if (HI(y) == 0) {
421*5d9d9091SRichard Lowe/ 		/* simple cases: y is a single uint32_t */
422*5d9d9091SRichard Lowe/ 		uint32_t	div_hi, div_rem;
423*5d9d9091SRichard Lowe/ 		uint32_t	q0, q1;
424*5d9d9091SRichard Lowe/
425*5d9d9091SRichard Lowe/ 		/* calculate q1 */
426*5d9d9091SRichard Lowe/ 		if (HI(x) < LO(y)) {
427*5d9d9091SRichard Lowe/ 			/* result is a single uint32_t, use one division */
428*5d9d9091SRichard Lowe/ 			q1 = 0;
429*5d9d9091SRichard Lowe/ 			div_hi = HI(x);
430*5d9d9091SRichard Lowe/ 		} else {
431*5d9d9091SRichard Lowe/ 			/* result is a double uint32_t, use two divisions */
432*5d9d9091SRichard Lowe/ 			A_DIV32(HI(x), 0, LO(y), q1, div_hi);
433*5d9d9091SRichard Lowe/ 		}
434*5d9d9091SRichard Lowe/
435*5d9d9091SRichard Lowe/ 		/* calculate q0 and remainder */
436*5d9d9091SRichard Lowe/ 		A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
437*5d9d9091SRichard Lowe/
438*5d9d9091SRichard Lowe/ 		/* return result */
439*5d9d9091SRichard Lowe/ 		return (HILO(q1, q0));
440*5d9d9091SRichard Lowe/
441*5d9d9091SRichard Lowe/ 	} else if (HI(x) < HI(y)) {
442*5d9d9091SRichard Lowe/ 		/* HI(x) < HI(y) => x < y => result is 0 */
443*5d9d9091SRichard Lowe/
444*5d9d9091SRichard Lowe/ 		/* return result */
445*5d9d9091SRichard Lowe/ 		return (0);
446*5d9d9091SRichard Lowe/
447*5d9d9091SRichard Lowe/ 	} else {
448*5d9d9091SRichard Lowe/ 		/*
449*5d9d9091SRichard Lowe/ 		 * uint64_t by uint64_t division, resulting in a one-uint32_t
450*5d9d9091SRichard Lowe/ 		 * result
451*5d9d9091SRichard Lowe/ 		 */
452*5d9d9091SRichard Lowe/ 		uint32_t		y0, y1;
453*5d9d9091SRichard Lowe/ 		uint32_t		x1, x0;
454*5d9d9091SRichard Lowe/ 		uint32_t		q0;
455*5d9d9091SRichard Lowe/ 		unsigned		normshift;
456*5d9d9091SRichard Lowe/
457*5d9d9091SRichard Lowe/ 		/* normalize by shifting x and y so MSB(y) == 1 */
458*5d9d9091SRichard Lowe/ 		HIBIT(HI(y), normshift);	/* index of highest 1 bit */
459*5d9d9091SRichard Lowe/ 		normshift = 31 - normshift;
460*5d9d9091SRichard Lowe/
461*5d9d9091SRichard Lowe/ 		if (normshift == 0) {
462*5d9d9091SRichard Lowe/ 			/* no shifting needed, and x < 2*y so q <= 1 */
463*5d9d9091SRichard Lowe/ 			y1 = HI(y);
464*5d9d9091SRichard Lowe/ 			y0 = LO(y);
465*5d9d9091SRichard Lowe/ 			x1 = HI(x);
466*5d9d9091SRichard Lowe/ 			x0 = LO(x);
467*5d9d9091SRichard Lowe/
468*5d9d9091SRichard Lowe/ 			/* if x >= y then q = 1 (note x1 >= y1) */
469*5d9d9091SRichard Lowe/ 			if (x1 > y1 || x0 >= y0) {
470*5d9d9091SRichard Lowe/ 				q0 = 1;
471*5d9d9091SRichard Lowe/ 				/* subtract y from x to get remainder */
472*5d9d9091SRichard Lowe/ 				/* A_SUB2(y0, y1, x0, x1); */
473*5d9d9091SRichard Lowe/ 			} else {
474*5d9d9091SRichard Lowe/ 				q0 = 0;
475*5d9d9091SRichard Lowe/ 			}
476*5d9d9091SRichard Lowe/
477*5d9d9091SRichard Lowe/ 			/* return result */
478*5d9d9091SRichard Lowe/ 			return (q0);
479*5d9d9091SRichard Lowe/
480*5d9d9091SRichard Lowe/ 		} else {
481*5d9d9091SRichard Lowe/ 			/*
482*5d9d9091SRichard Lowe/ 			 * the last case: result is one uint32_t, but we need to
483*5d9d9091SRichard Lowe/ 			 * normalize
484*5d9d9091SRichard Lowe/ 			 */
485*5d9d9091SRichard Lowe/ 			uint64_t	dt;
486*5d9d9091SRichard Lowe/ 			uint32_t		t0, t1, x2;
487*5d9d9091SRichard Lowe/
488*5d9d9091SRichard Lowe/ 			/* normalize y */
489*5d9d9091SRichard Lowe/ 			dt = (y << normshift);
490*5d9d9091SRichard Lowe/ 			y1 = HI(dt);
491*5d9d9091SRichard Lowe/ 			y0 = LO(dt);
492*5d9d9091SRichard Lowe/
493*5d9d9091SRichard Lowe/ 			/* normalize x (we need 3 uint32_ts!!!) */
494*5d9d9091SRichard Lowe/ 			x2 = (HI(x) >> (32 - normshift));
495*5d9d9091SRichard Lowe/ 			dt = (x << normshift);
496*5d9d9091SRichard Lowe/ 			x1 = HI(dt);
497*5d9d9091SRichard Lowe/ 			x0 = LO(dt);
498*5d9d9091SRichard Lowe/
499*5d9d9091SRichard Lowe/ 			/* estimate q0, and reduce x to a two uint32_t value */
500*5d9d9091SRichard Lowe/ 			A_DIV32(x1, x2, y1, q0, x1);
501*5d9d9091SRichard Lowe/
502*5d9d9091SRichard Lowe/ 			/* adjust q0 down if too high */
503*5d9d9091SRichard Lowe/ 			/*
504*5d9d9091SRichard Lowe/ 			 * because of the limited range of x2 we can only be
505*5d9d9091SRichard Lowe/ 			 * one off
506*5d9d9091SRichard Lowe/ 			 */
507*5d9d9091SRichard Lowe/ 			A_MUL32(y0, q0, t0, t1);
508*5d9d9091SRichard Lowe/ 			if (t1 > x1 || (t1 == x1 && t0 > x0)) {
509*5d9d9091SRichard Lowe/ 				q0--;
510*5d9d9091SRichard Lowe/ 			}
511*5d9d9091SRichard Lowe/ 			/* return result */
512*5d9d9091SRichard Lowe/ 			return (q0);
513*5d9d9091SRichard Lowe/ 		}
514*5d9d9091SRichard Lowe/ 	}
515*5d9d9091SRichard Lowe/ }
516*5d9d9091SRichard Lowe	ENTRY(UDiv)
517*5d9d9091SRichard Lowe	pushl	%ebp
518*5d9d9091SRichard Lowe	pushl	%edi
519*5d9d9091SRichard Lowe	pushl	%esi
520*5d9d9091SRichard Lowe	subl	$40, %esp
521*5d9d9091SRichard Lowe	movl	%edx, 36(%esp)	/ x, x
522*5d9d9091SRichard Lowe	movl	60(%esp), %edx	/ y,
523*5d9d9091SRichard Lowe	testl	%edx, %edx	/ tmp62
524*5d9d9091SRichard Lowe	movl	%eax, 32(%esp)	/ x, x
525*5d9d9091SRichard Lowe	movl	%edx, %ecx	/ tmp61, tmp62
526*5d9d9091SRichard Lowe	movl	%edx, %eax	/, tmp61
527*5d9d9091SRichard Lowe	jne	.LL26
528*5d9d9091SRichard Lowe	movl	36(%esp), %esi	/ x,
529*5d9d9091SRichard Lowe	cmpl	56(%esp), %esi	/ y, tmp67
530*5d9d9091SRichard Lowe	movl	%esi, %eax	/, tmp67
531*5d9d9091SRichard Lowe	movl	%esi, %edx	/ tmp67, div_hi
532*5d9d9091SRichard Lowe	jb	.LL28
533*5d9d9091SRichard Lowe	movl	%ecx, %edx	/ tmp62, div_hi
534*5d9d9091SRichard Lowe	divl	56(%esp)	/ y
535*5d9d9091SRichard Lowe	movl	%eax, %ecx	/, q1
536*5d9d9091SRichard Lowe.LL28:
537*5d9d9091SRichard Lowe	xorl	%esi, %esi	/ <result>
538*5d9d9091SRichard Lowe	movl	%ecx, %edi	/ <result>, <result>
539*5d9d9091SRichard Lowe	movl	32(%esp), %eax	/ x, q0
540*5d9d9091SRichard Lowe	xorl	%ecx, %ecx	/ q0
541*5d9d9091SRichard Lowe	divl	56(%esp)	/ y
542*5d9d9091SRichard Lowe	addl	%eax, %esi	/ q0, <result>
543*5d9d9091SRichard Lowe	adcl	%ecx, %edi	/ q0, <result>
544*5d9d9091SRichard Lowe.LL25:
545*5d9d9091SRichard Lowe	addl	$40, %esp
546*5d9d9091SRichard Lowe	movl	%esi, %eax	/ <result>, <result>
547*5d9d9091SRichard Lowe	popl	%esi
548*5d9d9091SRichard Lowe	movl	%edi, %edx	/ <result>, <result>
549*5d9d9091SRichard Lowe	popl	%edi
550*5d9d9091SRichard Lowe	popl	%ebp
551*5d9d9091SRichard Lowe	ret
552*5d9d9091SRichard Lowe	.align	16
553*5d9d9091SRichard Lowe.LL26:
554*5d9d9091SRichard Lowe	movl	36(%esp), %esi	/ x,
555*5d9d9091SRichard Lowe	xorl	%edi, %edi
556*5d9d9091SRichard Lowe	movl	%esi, 24(%esp)	/ tmp1,
557*5d9d9091SRichard Lowe	movl	%edi, 28(%esp)
558*5d9d9091SRichard Lowe	xorl	%esi, %esi	/ <result>
559*5d9d9091SRichard Lowe	xorl	%edi, %edi	/ <result>
560*5d9d9091SRichard Lowe	cmpl	%eax, 24(%esp)	/ tmp61,
561*5d9d9091SRichard Lowe	jb	.LL25
562*5d9d9091SRichard Lowe	bsrl	%eax,%ebp	/ tmp61, normshift
563*5d9d9091SRichard Lowe	movl	$31, %eax	/, tmp85
564*5d9d9091SRichard Lowe	subl	%ebp, %eax	/ normshift, normshift
565*5d9d9091SRichard Lowe	jne	.LL32
566*5d9d9091SRichard Lowe	movl	24(%esp), %eax	/, x1
567*5d9d9091SRichard Lowe	cmpl	%ecx, %eax	/ tmp62, x1
568*5d9d9091SRichard Lowe	movl	56(%esp), %esi	/ y, y0
569*5d9d9091SRichard Lowe	movl	32(%esp), %edx	/ x, x0
570*5d9d9091SRichard Lowe	ja	.LL34
571*5d9d9091SRichard Lowe	xorl	%eax, %eax	/ q0
572*5d9d9091SRichard Lowe	cmpl	%esi, %edx	/ y0, x0
573*5d9d9091SRichard Lowe	jb	.LL35
574*5d9d9091SRichard Lowe.LL34:
575*5d9d9091SRichard Lowe	movl	$1, %eax	/, q0
576*5d9d9091SRichard Lowe.LL35:
577*5d9d9091SRichard Lowe	movl	%eax, %esi	/ q0, <result>
578*5d9d9091SRichard Lowe	xorl	%edi, %edi	/ <result>
579*5d9d9091SRichard Lowe.LL45:
580*5d9d9091SRichard Lowe	addl	$40, %esp
581*5d9d9091SRichard Lowe	movl	%esi, %eax	/ <result>, <result>
582*5d9d9091SRichard Lowe	popl	%esi
583*5d9d9091SRichard Lowe	movl	%edi, %edx	/ <result>, <result>
584*5d9d9091SRichard Lowe	popl	%edi
585*5d9d9091SRichard Lowe	popl	%ebp
586*5d9d9091SRichard Lowe	ret
587*5d9d9091SRichard Lowe	.align	16
588*5d9d9091SRichard Lowe.LL32:
589*5d9d9091SRichard Lowe	movb	%al, %cl
590*5d9d9091SRichard Lowe	movl	56(%esp), %esi	/ y,
591*5d9d9091SRichard Lowe	movl	60(%esp), %edi	/ y,
592*5d9d9091SRichard Lowe	shldl	%esi, %edi
593*5d9d9091SRichard Lowe	sall	%cl, %esi
594*5d9d9091SRichard Lowe	andl	$32, %ecx
595*5d9d9091SRichard Lowe	jne	.LL43
596*5d9d9091SRichard Lowe.LL40:
597*5d9d9091SRichard Lowe	movl	$32, %ecx	/, tmp96
598*5d9d9091SRichard Lowe	subl	%eax, %ecx	/ normshift, tmp96
599*5d9d9091SRichard Lowe	movl	%edi, %edx
600*5d9d9091SRichard Lowe	movl	%edi, 20(%esp)	/, dt
601*5d9d9091SRichard Lowe	movl	24(%esp), %ebp	/, x2
602*5d9d9091SRichard Lowe	xorl	%edi, %edi
603*5d9d9091SRichard Lowe	shrl	%cl, %ebp	/ tmp96, x2
604*5d9d9091SRichard Lowe	movl	%esi, 16(%esp)	/, dt
605*5d9d9091SRichard Lowe	movb	%al, %cl
606*5d9d9091SRichard Lowe	movl	32(%esp), %esi	/ x, dt
607*5d9d9091SRichard Lowe	movl	%edi, 12(%esp)
608*5d9d9091SRichard Lowe	movl	36(%esp), %edi	/ x, dt
609*5d9d9091SRichard Lowe	shldl	%esi, %edi	/, dt, dt
610*5d9d9091SRichard Lowe	sall	%cl, %esi	/, dt
611*5d9d9091SRichard Lowe	andl	$32, %ecx
612*5d9d9091SRichard Lowe	movl	%edx, 8(%esp)
613*5d9d9091SRichard Lowe	je	.LL41
614*5d9d9091SRichard Lowe	movl	%esi, %edi	/ dt, dt
615*5d9d9091SRichard Lowe	xorl	%esi, %esi	/ dt
616*5d9d9091SRichard Lowe.LL41:
617*5d9d9091SRichard Lowe	xorl	%ecx, %ecx
618*5d9d9091SRichard Lowe	movl	%edi, %eax	/ tmp1,
619*5d9d9091SRichard Lowe	movl	%ebp, %edx	/ x2,
620*5d9d9091SRichard Lowe	divl	8(%esp)
621*5d9d9091SRichard Lowe	movl	%edx, %ebp	/, x1
622*5d9d9091SRichard Lowe	movl	%ecx, 4(%esp)
623*5d9d9091SRichard Lowe	movl	%eax, %ecx	/, q0
624*5d9d9091SRichard Lowe	movl	16(%esp), %eax	/ dt,
625*5d9d9091SRichard Lowe	mull	%ecx	/ q0
626*5d9d9091SRichard Lowe	cmpl	%ebp, %edx	/ x1, t1
627*5d9d9091SRichard Lowe	movl	%edi, (%esp)
628*5d9d9091SRichard Lowe	movl	%esi, %edi	/ dt, x0
629*5d9d9091SRichard Lowe	ja	.LL38
630*5d9d9091SRichard Lowe	je	.LL44
631*5d9d9091SRichard Lowe.LL39:
632*5d9d9091SRichard Lowe	movl	%ecx, %esi	/ q0, <result>
633*5d9d9091SRichard Lowe.LL46:
634*5d9d9091SRichard Lowe	xorl	%edi, %edi	/ <result>
635*5d9d9091SRichard Lowe	jmp	.LL45
636*5d9d9091SRichard Lowe.LL44:
637*5d9d9091SRichard Lowe	cmpl	%edi, %eax	/ x0, t0
638*5d9d9091SRichard Lowe	jbe	.LL39
639*5d9d9091SRichard Lowe.LL38:
640*5d9d9091SRichard Lowe	decl	%ecx		/ q0
641*5d9d9091SRichard Lowe	movl	%ecx, %esi	/ q0, <result>
642*5d9d9091SRichard Lowe	jmp	.LL46
643*5d9d9091SRichard Lowe.LL43:
644*5d9d9091SRichard Lowe	movl	%esi, %edi
645*5d9d9091SRichard Lowe	xorl	%esi, %esi
646*5d9d9091SRichard Lowe	jmp	.LL40
647*5d9d9091SRichard Lowe	SET_SIZE(UDiv)
648*5d9d9091SRichard Lowe
649*5d9d9091SRichard Lowe/*
650*5d9d9091SRichard Lowe * __udiv64
651*5d9d9091SRichard Lowe *
652*5d9d9091SRichard Lowe * Perform division of two unsigned 64-bit quantities, returning the
653*5d9d9091SRichard Lowe * quotient in %edx:%eax.  __udiv64 pops the arguments on return,
654*5d9d9091SRichard Lowe */
655*5d9d9091SRichard Lowe	ENTRY(__udiv64)
656*5d9d9091SRichard Lowe	movl	4(%esp), %eax	/ x, x
657*5d9d9091SRichard Lowe	movl	8(%esp), %edx	/ x, x
658*5d9d9091SRichard Lowe	pushl	16(%esp)	/ y
659*5d9d9091SRichard Lowe	pushl	16(%esp)
660*5d9d9091SRichard Lowe	call	UDiv
661*5d9d9091SRichard Lowe	addl	$8, %esp
662*5d9d9091SRichard Lowe	ret     $16
663*5d9d9091SRichard Lowe	SET_SIZE(__udiv64)
664*5d9d9091SRichard Lowe
665*5d9d9091SRichard Lowe/*
666*5d9d9091SRichard Lowe * __urem64
667*5d9d9091SRichard Lowe *
668*5d9d9091SRichard Lowe * Perform division of two unsigned 64-bit quantities, returning the
669*5d9d9091SRichard Lowe * remainder in %edx:%eax.  __urem64 pops the arguments on return
670*5d9d9091SRichard Lowe */
671*5d9d9091SRichard Lowe	ENTRY(__urem64)
672*5d9d9091SRichard Lowe	subl	$12, %esp
673*5d9d9091SRichard Lowe	movl	%esp, %ecx	/, tmp65
674*5d9d9091SRichard Lowe	movl	16(%esp), %eax	/ x, x
675*5d9d9091SRichard Lowe	movl	20(%esp), %edx	/ x, x
676*5d9d9091SRichard Lowe	pushl	%ecx		/ tmp65
677*5d9d9091SRichard Lowe	pushl	32(%esp)	/ y
678*5d9d9091SRichard Lowe	pushl	32(%esp)
679*5d9d9091SRichard Lowe	call	UDivRem
680*5d9d9091SRichard Lowe	movl	12(%esp), %eax	/ rem, rem
681*5d9d9091SRichard Lowe	movl	16(%esp), %edx	/ rem, rem
682*5d9d9091SRichard Lowe	addl	$24, %esp
683*5d9d9091SRichard Lowe	ret	$16
684*5d9d9091SRichard Lowe	SET_SIZE(__urem64)
685*5d9d9091SRichard Lowe
686*5d9d9091SRichard Lowe/*
687*5d9d9091SRichard Lowe * __div64
688*5d9d9091SRichard Lowe *
689*5d9d9091SRichard Lowe * Perform division of two signed 64-bit quantities, returning the
690*5d9d9091SRichard Lowe * quotient in %edx:%eax.  __div64 pops the arguments on return.
691*5d9d9091SRichard Lowe */
692*5d9d9091SRichard Lowe/ int64_t
693*5d9d9091SRichard Lowe/ __div64(int64_t x, int64_t y)
694*5d9d9091SRichard Lowe/ {
695*5d9d9091SRichard Lowe/ 	int		negative;
696*5d9d9091SRichard Lowe/ 	uint64_t	xt, yt, r;
697*5d9d9091SRichard Lowe/
698*5d9d9091SRichard Lowe/ 	if (x < 0) {
699*5d9d9091SRichard Lowe/ 		xt = -(uint64_t) x;
700*5d9d9091SRichard Lowe/ 		negative = 1;
701*5d9d9091SRichard Lowe/ 	} else {
702*5d9d9091SRichard Lowe/ 		xt = x;
703*5d9d9091SRichard Lowe/ 		negative = 0;
704*5d9d9091SRichard Lowe/ 	}
705*5d9d9091SRichard Lowe/ 	if (y < 0) {
706*5d9d9091SRichard Lowe/ 		yt = -(uint64_t) y;
707*5d9d9091SRichard Lowe/ 		negative ^= 1;
708*5d9d9091SRichard Lowe/ 	} else {
709*5d9d9091SRichard Lowe/ 		yt = y;
710*5d9d9091SRichard Lowe/ 	}
711*5d9d9091SRichard Lowe/ 	r = UDiv(xt, yt);
712*5d9d9091SRichard Lowe/ 	return (negative ? (int64_t) - r : r);
713*5d9d9091SRichard Lowe/ }
714*5d9d9091SRichard Lowe	ENTRY(__div64)
715*5d9d9091SRichard Lowe	pushl	%ebp
716*5d9d9091SRichard Lowe	pushl	%edi
717*5d9d9091SRichard Lowe	pushl	%esi
718*5d9d9091SRichard Lowe	subl	$8, %esp
719*5d9d9091SRichard Lowe	movl	28(%esp), %edx	/ x, x
720*5d9d9091SRichard Lowe	testl	%edx, %edx	/ x
721*5d9d9091SRichard Lowe	movl	24(%esp), %eax	/ x, x
722*5d9d9091SRichard Lowe	movl	32(%esp), %esi	/ y, y
723*5d9d9091SRichard Lowe	movl	36(%esp), %edi	/ y, y
724*5d9d9091SRichard Lowe	js	.LL84
725*5d9d9091SRichard Lowe	xorl	%ebp, %ebp	/ negative
726*5d9d9091SRichard Lowe	testl	%edi, %edi	/ y
727*5d9d9091SRichard Lowe	movl	%eax, (%esp)	/ x, xt
728*5d9d9091SRichard Lowe	movl	%edx, 4(%esp)	/ x, xt
729*5d9d9091SRichard Lowe	movl	%esi, %eax	/ y, yt
730*5d9d9091SRichard Lowe	movl	%edi, %edx	/ y, yt
731*5d9d9091SRichard Lowe	js	.LL85
732*5d9d9091SRichard Lowe.LL82:
733*5d9d9091SRichard Lowe	pushl	%edx		/ yt
734*5d9d9091SRichard Lowe	pushl	%eax		/ yt
735*5d9d9091SRichard Lowe	movl	8(%esp), %eax	/ xt, xt
736*5d9d9091SRichard Lowe	movl	12(%esp), %edx	/ xt, xt
737*5d9d9091SRichard Lowe	call	UDiv
738*5d9d9091SRichard Lowe	popl	%ecx
739*5d9d9091SRichard Lowe	testl	%ebp, %ebp	/ negative
740*5d9d9091SRichard Lowe	popl	%esi
741*5d9d9091SRichard Lowe	je	.LL83
742*5d9d9091SRichard Lowe	negl	%eax		/ r
743*5d9d9091SRichard Lowe	adcl	$0, %edx	/, r
744*5d9d9091SRichard Lowe	negl	%edx		/ r
745*5d9d9091SRichard Lowe.LL83:
746*5d9d9091SRichard Lowe	addl	$8, %esp
747*5d9d9091SRichard Lowe	popl	%esi
748*5d9d9091SRichard Lowe	popl	%edi
749*5d9d9091SRichard Lowe	popl	%ebp
750*5d9d9091SRichard Lowe	ret	$16
751*5d9d9091SRichard Lowe	.align	16
752*5d9d9091SRichard Lowe.LL84:
753*5d9d9091SRichard Lowe	negl	%eax		/ x
754*5d9d9091SRichard Lowe	adcl	$0, %edx	/, x
755*5d9d9091SRichard Lowe	negl	%edx		/ x
756*5d9d9091SRichard Lowe	testl	%edi, %edi	/ y
757*5d9d9091SRichard Lowe	movl	%eax, (%esp)	/ x, xt
758*5d9d9091SRichard Lowe	movl	%edx, 4(%esp)	/ x, xt
759*5d9d9091SRichard Lowe	movl	$1, %ebp	/, negative
760*5d9d9091SRichard Lowe	movl	%esi, %eax	/ y, yt
761*5d9d9091SRichard Lowe	movl	%edi, %edx	/ y, yt
762*5d9d9091SRichard Lowe	jns	.LL82
763*5d9d9091SRichard Lowe	.align	16
764*5d9d9091SRichard Lowe.LL85:
765*5d9d9091SRichard Lowe	negl	%eax		/ yt
766*5d9d9091SRichard Lowe	adcl	$0, %edx	/, yt
767*5d9d9091SRichard Lowe	negl	%edx		/ yt
768*5d9d9091SRichard Lowe	xorl	$1, %ebp	/, negative
769*5d9d9091SRichard Lowe	jmp	.LL82
770*5d9d9091SRichard Lowe	SET_SIZE(__div64)
771*5d9d9091SRichard Lowe
772*5d9d9091SRichard Lowe/*
773*5d9d9091SRichard Lowe * __rem64
774*5d9d9091SRichard Lowe *
775*5d9d9091SRichard Lowe * Perform division of two signed 64-bit quantities, returning the
776*5d9d9091SRichard Lowe * remainder in %edx:%eax.  __rem64 pops the arguments on return.
777*5d9d9091SRichard Lowe */
778*5d9d9091SRichard Lowe/ int64_t
779*5d9d9091SRichard Lowe/ __rem64(int64_t x, int64_t y)
780*5d9d9091SRichard Lowe/ {
781*5d9d9091SRichard Lowe/ 	uint64_t	xt, yt, rem;
782*5d9d9091SRichard Lowe/
783*5d9d9091SRichard Lowe/ 	if (x < 0) {
784*5d9d9091SRichard Lowe/ 		xt = -(uint64_t) x;
785*5d9d9091SRichard Lowe/ 	} else {
786*5d9d9091SRichard Lowe/ 		xt = x;
787*5d9d9091SRichard Lowe/ 	}
788*5d9d9091SRichard Lowe/ 	if (y < 0) {
789*5d9d9091SRichard Lowe/ 		yt = -(uint64_t) y;
790*5d9d9091SRichard Lowe/ 	} else {
791*5d9d9091SRichard Lowe/ 		yt = y;
792*5d9d9091SRichard Lowe/ 	}
793*5d9d9091SRichard Lowe/ 	(void) UDivRem(xt, yt, &rem);
794*5d9d9091SRichard Lowe/ 	return (x < 0 ? (int64_t) - rem : rem);
795*5d9d9091SRichard Lowe/ }
796*5d9d9091SRichard Lowe	ENTRY(__rem64)
797*5d9d9091SRichard Lowe	pushl	%edi
798*5d9d9091SRichard Lowe	pushl	%esi
799*5d9d9091SRichard Lowe	subl	$20, %esp
800*5d9d9091SRichard Lowe	movl	36(%esp), %ecx	/ x,
801*5d9d9091SRichard Lowe	movl	32(%esp), %esi	/ x,
802*5d9d9091SRichard Lowe	movl	36(%esp), %edi	/ x,
803*5d9d9091SRichard Lowe	testl	%ecx, %ecx
804*5d9d9091SRichard Lowe	movl	40(%esp), %eax	/ y, y
805*5d9d9091SRichard Lowe	movl	44(%esp), %edx	/ y, y
806*5d9d9091SRichard Lowe	movl	%esi, (%esp)	/, xt
807*5d9d9091SRichard Lowe	movl	%edi, 4(%esp)	/, xt
808*5d9d9091SRichard Lowe	js	.LL92
809*5d9d9091SRichard Lowe	testl	%edx, %edx	/ y
810*5d9d9091SRichard Lowe	movl	%eax, %esi	/ y, yt
811*5d9d9091SRichard Lowe	movl	%edx, %edi	/ y, yt
812*5d9d9091SRichard Lowe	js	.LL93
813*5d9d9091SRichard Lowe.LL90:
814*5d9d9091SRichard Lowe	leal	8(%esp), %eax	/, tmp66
815*5d9d9091SRichard Lowe	pushl	%eax		/ tmp66
816*5d9d9091SRichard Lowe	pushl	%edi		/ yt
817*5d9d9091SRichard Lowe	pushl	%esi		/ yt
818*5d9d9091SRichard Lowe	movl	12(%esp), %eax	/ xt, xt
819*5d9d9091SRichard Lowe	movl	16(%esp), %edx	/ xt, xt
820*5d9d9091SRichard Lowe	call	UDivRem
821*5d9d9091SRichard Lowe	addl	$12, %esp
822*5d9d9091SRichard Lowe	movl	36(%esp), %edi	/ x,
823*5d9d9091SRichard Lowe	testl	%edi, %edi
824*5d9d9091SRichard Lowe	movl	8(%esp), %eax	/ rem, rem
825*5d9d9091SRichard Lowe	movl	12(%esp), %edx	/ rem, rem
826*5d9d9091SRichard Lowe	js	.LL94
827*5d9d9091SRichard Lowe	addl	$20, %esp
828*5d9d9091SRichard Lowe	popl	%esi
829*5d9d9091SRichard Lowe	popl	%edi
830*5d9d9091SRichard Lowe	ret	$16
831*5d9d9091SRichard Lowe	.align	16
832*5d9d9091SRichard Lowe.LL92:
833*5d9d9091SRichard Lowe	negl	%esi
834*5d9d9091SRichard Lowe	adcl	$0, %edi
835*5d9d9091SRichard Lowe	negl	%edi
836*5d9d9091SRichard Lowe	testl	%edx, %edx	/ y
837*5d9d9091SRichard Lowe	movl	%esi, (%esp)	/, xt
838*5d9d9091SRichard Lowe	movl	%edi, 4(%esp)	/, xt
839*5d9d9091SRichard Lowe	movl	%eax, %esi	/ y, yt
840*5d9d9091SRichard Lowe	movl	%edx, %edi	/ y, yt
841*5d9d9091SRichard Lowe	jns	.LL90
842*5d9d9091SRichard Lowe	.align	16
843*5d9d9091SRichard Lowe.LL93:
844*5d9d9091SRichard Lowe	negl	%esi		/ yt
845*5d9d9091SRichard Lowe	adcl	$0, %edi	/, yt
846*5d9d9091SRichard Lowe	negl	%edi		/ yt
847*5d9d9091SRichard Lowe	jmp	.LL90
848*5d9d9091SRichard Lowe	.align	16
849*5d9d9091SRichard Lowe.LL94:
850*5d9d9091SRichard Lowe	negl	%eax		/ rem
851*5d9d9091SRichard Lowe	adcl	$0, %edx	/, rem
852*5d9d9091SRichard Lowe	addl	$20, %esp
853*5d9d9091SRichard Lowe	popl	%esi
854*5d9d9091SRichard Lowe	negl	%edx		/ rem
855*5d9d9091SRichard Lowe	popl	%edi
856*5d9d9091SRichard Lowe	ret	$16
857*5d9d9091SRichard Lowe	SET_SIZE(__rem64)
858*5d9d9091SRichard Lowe
859*5d9d9091SRichard Lowe#endif	/* __lint */
860*5d9d9091SRichard Lowe
861*5d9d9091SRichard Lowe#if defined(__lint)
862*5d9d9091SRichard Lowe
863*5d9d9091SRichard Lowe/*
864*5d9d9091SRichard Lowe * C support for 64-bit modulo and division.
865*5d9d9091SRichard Lowe * GNU routines callable from C (though generated by the compiler).
866*5d9d9091SRichard Lowe * Hand-customized compiler output - see comments for details.
867*5d9d9091SRichard Lowe */
868*5d9d9091SRichard Lowe/*ARGSUSED*/
869*5d9d9091SRichard Loweunsigned long long
870*5d9d9091SRichard Lowe__udivdi3(unsigned long long a, unsigned long long b)
871*5d9d9091SRichard Lowe{ return (0); }
872*5d9d9091SRichard Lowe
873*5d9d9091SRichard Lowe/*ARGSUSED*/
874*5d9d9091SRichard Loweunsigned long long
875*5d9d9091SRichard Lowe__umoddi3(unsigned long long a, unsigned long long b)
876*5d9d9091SRichard Lowe{ return (0); }
877*5d9d9091SRichard Lowe
878*5d9d9091SRichard Lowe/*ARGSUSED*/
879*5d9d9091SRichard Lowelong long
880*5d9d9091SRichard Lowe__divdi3(long long a, long long b)
881*5d9d9091SRichard Lowe{ return (0); }
882*5d9d9091SRichard Lowe
883*5d9d9091SRichard Lowe/*ARGSUSED*/
884*5d9d9091SRichard Lowelong long
885*5d9d9091SRichard Lowe__moddi3(long long a, long long b)
886*5d9d9091SRichard Lowe{ return (0); }
887*5d9d9091SRichard Lowe
888*5d9d9091SRichard Lowe/* ARGSUSED */
889*5d9d9091SRichard Loweint64_t __divrem64(int64_t a, int64_t b)
890*5d9d9091SRichard Lowe{ return (0); }
891*5d9d9091SRichard Lowe
892*5d9d9091SRichard Lowe/* ARGSUSED */
893*5d9d9091SRichard Loweuint64_t __udivrem64(uint64_t a, uint64_t b)
894*5d9d9091SRichard Lowe{ return (0); }
895*5d9d9091SRichard Lowe
896*5d9d9091SRichard Lowe#else	/* __lint */
897*5d9d9091SRichard Lowe
898*5d9d9091SRichard Lowe/*
899*5d9d9091SRichard Lowe * int32_t/int64_t division/manipulation
900*5d9d9091SRichard Lowe *
901*5d9d9091SRichard Lowe * Hand-customized compiler output: the non-GCC entry points depart from
902*5d9d9091SRichard Lowe * the SYS V ABI by requiring their arguments to be popped, and in the
903*5d9d9091SRichard Lowe * [u]divrem64 cases returning the remainder in %ecx:%esi. Note the
904*5d9d9091SRichard Lowe * compiler-generated use of %edx:%eax for the first argument of
905*5d9d9091SRichard Lowe * internal entry points.
906*5d9d9091SRichard Lowe *
907*5d9d9091SRichard Lowe * Inlines for speed:
908*5d9d9091SRichard Lowe * - counting the number of leading zeros in a word
909*5d9d9091SRichard Lowe * - multiplying two 32-bit numbers giving a 64-bit result
910*5d9d9091SRichard Lowe * - dividing a 64-bit number by a 32-bit number, giving both quotient
911*5d9d9091SRichard Lowe *	and remainder
912*5d9d9091SRichard Lowe * - subtracting two 64-bit results
913*5d9d9091SRichard Lowe */
914*5d9d9091SRichard Lowe/ #define	LO(X)		((uint32_t)(X) & 0xffffffff)
915*5d9d9091SRichard Lowe/ #define	HI(X)		((uint32_t)((X) >> 32) & 0xffffffff)
916*5d9d9091SRichard Lowe/ #define	HILO(H, L)	(((uint64_t)(H) << 32) + (L))
917*5d9d9091SRichard Lowe/
918*5d9d9091SRichard Lowe/ /* give index of highest bit */
919*5d9d9091SRichard Lowe/ #define	HIBIT(a, r) \
920*5d9d9091SRichard Lowe/     asm("bsrl %1,%0": "=r"((uint32_t)(r)) : "g" (a))
921*5d9d9091SRichard Lowe/
922*5d9d9091SRichard Lowe/ /* multiply two uint32_ts resulting in a uint64_t */
923*5d9d9091SRichard Lowe/ #define	A_MUL32(a, b, lo, hi) \
924*5d9d9091SRichard Lowe/     asm("mull %2" \
925*5d9d9091SRichard Lowe/ 	: "=a"((uint32_t)(lo)), "=d"((uint32_t)(hi)) : "g" (b), "0"(a))
926*5d9d9091SRichard Lowe/
927*5d9d9091SRichard Lowe/ /* divide a uint64_t by a uint32_t */
928*5d9d9091SRichard Lowe/ #define	A_DIV32(lo, hi, b, q, r) \
929*5d9d9091SRichard Lowe/     asm("divl %2" \
930*5d9d9091SRichard Lowe/ 	: "=a"((uint32_t)(q)), "=d"((uint32_t)(r)) \
931*5d9d9091SRichard Lowe/ 	: "g" (b), "0"((uint32_t)(lo)), "1"((uint32_t)hi))
932*5d9d9091SRichard Lowe/
933*5d9d9091SRichard Lowe/ /* subtract two uint64_ts (with borrow) */
934*5d9d9091SRichard Lowe/ #define	A_SUB2(bl, bh, al, ah) \
935*5d9d9091SRichard Lowe/     asm("subl %4,%0\n\tsbbl %5,%1" \
936*5d9d9091SRichard Lowe/ 	: "=&r"((uint32_t)(al)), "=r"((uint32_t)(ah)) \
937*5d9d9091SRichard Lowe/ 	: "0"((uint32_t)(al)), "1"((uint32_t)(ah)), "g"((uint32_t)(bl)), \
938*5d9d9091SRichard Lowe/ 	"g"((uint32_t)(bh)))
939*5d9d9091SRichard Lowe
940*5d9d9091SRichard Lowe/*
941*5d9d9091SRichard Lowe * __udivdi3
942*5d9d9091SRichard Lowe *
943*5d9d9091SRichard Lowe * Perform division of two unsigned 64-bit quantities, returning the
944*5d9d9091SRichard Lowe * quotient in %edx:%eax.
945*5d9d9091SRichard Lowe */
946*5d9d9091SRichard Lowe	ENTRY(__udivdi3)
947*5d9d9091SRichard Lowe	movl	4(%esp), %eax	/ x, x
948*5d9d9091SRichard Lowe	movl	8(%esp), %edx	/ x, x
949*5d9d9091SRichard Lowe	pushl	16(%esp)	/ y
950*5d9d9091SRichard Lowe	pushl	16(%esp)
951*5d9d9091SRichard Lowe	call	UDiv
952*5d9d9091SRichard Lowe	addl	$8, %esp
953*5d9d9091SRichard Lowe	ret
954*5d9d9091SRichard Lowe	SET_SIZE(__udivdi3)
955*5d9d9091SRichard Lowe
956*5d9d9091SRichard Lowe/*
957*5d9d9091SRichard Lowe * __umoddi3
958*5d9d9091SRichard Lowe *
959*5d9d9091SRichard Lowe * Perform division of two unsigned 64-bit quantities, returning the
960*5d9d9091SRichard Lowe * remainder in %edx:%eax.
961*5d9d9091SRichard Lowe */
962*5d9d9091SRichard Lowe	ENTRY(__umoddi3)
963*5d9d9091SRichard Lowe	subl	$12, %esp
964*5d9d9091SRichard Lowe	movl	%esp, %ecx	/, tmp65
965*5d9d9091SRichard Lowe	movl	16(%esp), %eax	/ x, x
966*5d9d9091SRichard Lowe	movl	20(%esp), %edx	/ x, x
967*5d9d9091SRichard Lowe	pushl	%ecx		/ tmp65
968*5d9d9091SRichard Lowe	pushl	32(%esp)	/ y
969*5d9d9091SRichard Lowe	pushl	32(%esp)
970*5d9d9091SRichard Lowe	call	UDivRem
971*5d9d9091SRichard Lowe	movl	12(%esp), %eax	/ rem, rem
972*5d9d9091SRichard Lowe	movl	16(%esp), %edx	/ rem, rem
973*5d9d9091SRichard Lowe	addl	$24, %esp
974*5d9d9091SRichard Lowe	ret
975*5d9d9091SRichard Lowe	SET_SIZE(__umoddi3)
976*5d9d9091SRichard Lowe
977*5d9d9091SRichard Lowe/*
978*5d9d9091SRichard Lowe * __divdi3
979*5d9d9091SRichard Lowe *
980*5d9d9091SRichard Lowe * Perform division of two signed 64-bit quantities, returning the
981*5d9d9091SRichard Lowe * quotient in %edx:%eax.
982*5d9d9091SRichard Lowe */
983*5d9d9091SRichard Lowe/ int64_t
984*5d9d9091SRichard Lowe/ __divdi3(int64_t x, int64_t y)
985*5d9d9091SRichard Lowe/ {
986*5d9d9091SRichard Lowe/ 	int		negative;
987*5d9d9091SRichard Lowe/ 	uint64_t	xt, yt, r;
988*5d9d9091SRichard Lowe/
989*5d9d9091SRichard Lowe/ 	if (x < 0) {
990*5d9d9091SRichard Lowe/ 		xt = -(uint64_t) x;
991*5d9d9091SRichard Lowe/ 		negative = 1;
992*5d9d9091SRichard Lowe/ 	} else {
993*5d9d9091SRichard Lowe/ 		xt = x;
994*5d9d9091SRichard Lowe/ 		negative = 0;
995*5d9d9091SRichard Lowe/ 	}
996*5d9d9091SRichard Lowe/ 	if (y < 0) {
997*5d9d9091SRichard Lowe/ 		yt = -(uint64_t) y;
998*5d9d9091SRichard Lowe/ 		negative ^= 1;
999*5d9d9091SRichard Lowe/ 	} else {
1000*5d9d9091SRichard Lowe/ 		yt = y;
1001*5d9d9091SRichard Lowe/ 	}
1002*5d9d9091SRichard Lowe/ 	r = UDiv(xt, yt);
1003*5d9d9091SRichard Lowe/ 	return (negative ? (int64_t) - r : r);
1004*5d9d9091SRichard Lowe/ }
1005*5d9d9091SRichard Lowe	ENTRY(__divdi3)
1006*5d9d9091SRichard Lowe	pushl	%ebp
1007*5d9d9091SRichard Lowe	pushl	%edi
1008*5d9d9091SRichard Lowe	pushl	%esi
1009*5d9d9091SRichard Lowe	subl	$8, %esp
1010*5d9d9091SRichard Lowe	movl	28(%esp), %edx	/ x, x
1011*5d9d9091SRichard Lowe	testl	%edx, %edx	/ x
1012*5d9d9091SRichard Lowe	movl	24(%esp), %eax	/ x, x
1013*5d9d9091SRichard Lowe	movl	32(%esp), %esi	/ y, y
1014*5d9d9091SRichard Lowe	movl	36(%esp), %edi	/ y, y
1015*5d9d9091SRichard Lowe	js	.LL55
1016*5d9d9091SRichard Lowe	xorl	%ebp, %ebp	/ negative
1017*5d9d9091SRichard Lowe	testl	%edi, %edi	/ y
1018*5d9d9091SRichard Lowe	movl	%eax, (%esp)	/ x, xt
1019*5d9d9091SRichard Lowe	movl	%edx, 4(%esp)	/ x, xt
1020*5d9d9091SRichard Lowe	movl	%esi, %eax	/ y, yt
1021*5d9d9091SRichard Lowe	movl	%edi, %edx	/ y, yt
1022*5d9d9091SRichard Lowe	js	.LL56
1023*5d9d9091SRichard Lowe.LL53:
1024*5d9d9091SRichard Lowe	pushl	%edx		/ yt
1025*5d9d9091SRichard Lowe	pushl	%eax		/ yt
1026*5d9d9091SRichard Lowe	movl	8(%esp), %eax	/ xt, xt
1027*5d9d9091SRichard Lowe	movl	12(%esp), %edx	/ xt, xt
1028*5d9d9091SRichard Lowe	call	UDiv
1029*5d9d9091SRichard Lowe	popl	%ecx
1030*5d9d9091SRichard Lowe	testl	%ebp, %ebp	/ negative
1031*5d9d9091SRichard Lowe	popl	%esi
1032*5d9d9091SRichard Lowe	je	.LL54
1033*5d9d9091SRichard Lowe	negl	%eax		/ r
1034*5d9d9091SRichard Lowe	adcl	$0, %edx	/, r
1035*5d9d9091SRichard Lowe	negl	%edx		/ r
1036*5d9d9091SRichard Lowe.LL54:
1037*5d9d9091SRichard Lowe	addl	$8, %esp
1038*5d9d9091SRichard Lowe	popl	%esi
1039*5d9d9091SRichard Lowe	popl	%edi
1040*5d9d9091SRichard Lowe	popl	%ebp
1041*5d9d9091SRichard Lowe	ret
1042*5d9d9091SRichard Lowe	.align	16
1043*5d9d9091SRichard Lowe.LL55:
1044*5d9d9091SRichard Lowe	negl	%eax		/ x
1045*5d9d9091SRichard Lowe	adcl	$0, %edx	/, x
1046*5d9d9091SRichard Lowe	negl	%edx		/ x
1047*5d9d9091SRichard Lowe	testl	%edi, %edi	/ y
1048*5d9d9091SRichard Lowe	movl	%eax, (%esp)	/ x, xt
1049*5d9d9091SRichard Lowe	movl	%edx, 4(%esp)	/ x, xt
1050*5d9d9091SRichard Lowe	movl	$1, %ebp	/, negative
1051*5d9d9091SRichard Lowe	movl	%esi, %eax	/ y, yt
1052*5d9d9091SRichard Lowe	movl	%edi, %edx	/ y, yt
1053*5d9d9091SRichard Lowe	jns	.LL53
1054*5d9d9091SRichard Lowe	.align	16
1055*5d9d9091SRichard Lowe.LL56:
1056*5d9d9091SRichard Lowe	negl	%eax		/ yt
1057*5d9d9091SRichard Lowe	adcl	$0, %edx	/, yt
1058*5d9d9091SRichard Lowe	negl	%edx		/ yt
1059*5d9d9091SRichard Lowe	xorl	$1, %ebp	/, negative
1060*5d9d9091SRichard Lowe	jmp	.LL53
1061*5d9d9091SRichard Lowe	SET_SIZE(__divdi3)
1062*5d9d9091SRichard Lowe
1063*5d9d9091SRichard Lowe/*
1064*5d9d9091SRichard Lowe * __moddi3
1065*5d9d9091SRichard Lowe *
1066*5d9d9091SRichard Lowe * Perform division of two signed 64-bit quantities, returning the
1067*5d9d9091SRichard Lowe * quotient in %edx:%eax.
1068*5d9d9091SRichard Lowe */
1069*5d9d9091SRichard Lowe/ int64_t
1070*5d9d9091SRichard Lowe/ __moddi3(int64_t x, int64_t y)
1071*5d9d9091SRichard Lowe/ {
1072*5d9d9091SRichard Lowe/ 	uint64_t	xt, yt, rem;
1073*5d9d9091SRichard Lowe/
1074*5d9d9091SRichard Lowe/ 	if (x < 0) {
1075*5d9d9091SRichard Lowe/ 		xt = -(uint64_t) x;
1076*5d9d9091SRichard Lowe/ 	} else {
1077*5d9d9091SRichard Lowe/ 		xt = x;
1078*5d9d9091SRichard Lowe/ 	}
1079*5d9d9091SRichard Lowe/ 	if (y < 0) {
1080*5d9d9091SRichard Lowe/ 		yt = -(uint64_t) y;
1081*5d9d9091SRichard Lowe/ 	} else {
1082*5d9d9091SRichard Lowe/ 		yt = y;
1083*5d9d9091SRichard Lowe/ 	}
1084*5d9d9091SRichard Lowe/ 	(void) UDivRem(xt, yt, &rem);
1085*5d9d9091SRichard Lowe/ 	return (x < 0 ? (int64_t) - rem : rem);
1086*5d9d9091SRichard Lowe/ }
1087*5d9d9091SRichard Lowe	ENTRY(__moddi3)
1088*5d9d9091SRichard Lowe	pushl	%edi
1089*5d9d9091SRichard Lowe	pushl	%esi
1090*5d9d9091SRichard Lowe	subl	$20, %esp
1091*5d9d9091SRichard Lowe	movl	36(%esp), %ecx	/ x,
1092*5d9d9091SRichard Lowe	movl	32(%esp), %esi	/ x,
1093*5d9d9091SRichard Lowe	movl	36(%esp), %edi	/ x,
1094*5d9d9091SRichard Lowe	testl	%ecx, %ecx
1095*5d9d9091SRichard Lowe	movl	40(%esp), %eax	/ y, y
1096*5d9d9091SRichard Lowe	movl	44(%esp), %edx	/ y, y
1097*5d9d9091SRichard Lowe	movl	%esi, (%esp)	/, xt
1098*5d9d9091SRichard Lowe	movl	%edi, 4(%esp)	/, xt
1099*5d9d9091SRichard Lowe	js	.LL63
1100*5d9d9091SRichard Lowe	testl	%edx, %edx	/ y
1101*5d9d9091SRichard Lowe	movl	%eax, %esi	/ y, yt
1102*5d9d9091SRichard Lowe	movl	%edx, %edi	/ y, yt
1103*5d9d9091SRichard Lowe	js	.LL64
1104*5d9d9091SRichard Lowe.LL61:
1105*5d9d9091SRichard Lowe	leal	8(%esp), %eax	/, tmp66
1106*5d9d9091SRichard Lowe	pushl	%eax		/ tmp66
1107*5d9d9091SRichard Lowe	pushl	%edi		/ yt
1108*5d9d9091SRichard Lowe	pushl	%esi		/ yt
1109*5d9d9091SRichard Lowe	movl	12(%esp), %eax	/ xt, xt
1110*5d9d9091SRichard Lowe	movl	16(%esp), %edx	/ xt, xt
1111*5d9d9091SRichard Lowe	call	UDivRem
1112*5d9d9091SRichard Lowe	addl	$12, %esp
1113*5d9d9091SRichard Lowe	movl	36(%esp), %edi	/ x,
1114*5d9d9091SRichard Lowe	testl	%edi, %edi
1115*5d9d9091SRichard Lowe	movl	8(%esp), %eax	/ rem, rem
1116*5d9d9091SRichard Lowe	movl	12(%esp), %edx	/ rem, rem
1117*5d9d9091SRichard Lowe	js	.LL65
1118*5d9d9091SRichard Lowe	addl	$20, %esp
1119*5d9d9091SRichard Lowe	popl	%esi
1120*5d9d9091SRichard Lowe	popl	%edi
1121*5d9d9091SRichard Lowe	ret
1122*5d9d9091SRichard Lowe	.align	16
1123*5d9d9091SRichard Lowe.LL63:
1124*5d9d9091SRichard Lowe	negl	%esi
1125*5d9d9091SRichard Lowe	adcl	$0, %edi
1126*5d9d9091SRichard Lowe	negl	%edi
1127*5d9d9091SRichard Lowe	testl	%edx, %edx	/ y
1128*5d9d9091SRichard Lowe	movl	%esi, (%esp)	/, xt
1129*5d9d9091SRichard Lowe	movl	%edi, 4(%esp)	/, xt
1130*5d9d9091SRichard Lowe	movl	%eax, %esi	/ y, yt
1131*5d9d9091SRichard Lowe	movl	%edx, %edi	/ y, yt
1132*5d9d9091SRichard Lowe	jns	.LL61
1133*5d9d9091SRichard Lowe	.align	16
1134*5d9d9091SRichard Lowe.LL64:
1135*5d9d9091SRichard Lowe	negl	%esi		/ yt
1136*5d9d9091SRichard Lowe	adcl	$0, %edi	/, yt
1137*5d9d9091SRichard Lowe	negl	%edi		/ yt
1138*5d9d9091SRichard Lowe	jmp	.LL61
1139*5d9d9091SRichard Lowe	.align	16
1140*5d9d9091SRichard Lowe.LL65:
1141*5d9d9091SRichard Lowe	negl	%eax		/ rem
1142*5d9d9091SRichard Lowe	adcl	$0, %edx	/, rem
1143*5d9d9091SRichard Lowe	addl	$20, %esp
1144*5d9d9091SRichard Lowe	popl	%esi
1145*5d9d9091SRichard Lowe	negl	%edx		/ rem
1146*5d9d9091SRichard Lowe	popl	%edi
1147*5d9d9091SRichard Lowe	ret
1148*5d9d9091SRichard Lowe	SET_SIZE(__moddi3)
1149*5d9d9091SRichard Lowe
1150*5d9d9091SRichard Lowe/*
1151*5d9d9091SRichard Lowe * __udivrem64
1152*5d9d9091SRichard Lowe *
1153*5d9d9091SRichard Lowe * Perform division of two unsigned 64-bit quantities, returning the
1154*5d9d9091SRichard Lowe * quotient in %edx:%eax, and the remainder in %ecx:%esi.  __udivrem64
1155*5d9d9091SRichard Lowe * pops the arguments on return.
1156*5d9d9091SRichard Lowe */
1157*5d9d9091SRichard Lowe	ENTRY(__udivrem64)
1158*5d9d9091SRichard Lowe	subl	$12, %esp
1159*5d9d9091SRichard Lowe	movl	%esp, %ecx	/, tmp64
1160*5d9d9091SRichard Lowe	movl	16(%esp), %eax	/ x, x
1161*5d9d9091SRichard Lowe	movl	20(%esp), %edx	/ x, x
1162*5d9d9091SRichard Lowe	pushl	%ecx		/ tmp64
1163*5d9d9091SRichard Lowe	pushl	32(%esp)	/ y
1164*5d9d9091SRichard Lowe	pushl	32(%esp)
1165*5d9d9091SRichard Lowe	call	UDivRem
1166*5d9d9091SRichard Lowe	movl	16(%esp), %ecx	/ rem, tmp63
1167*5d9d9091SRichard Lowe	movl	12(%esp), %esi	/ rem
1168*5d9d9091SRichard Lowe	addl	$24, %esp
1169*5d9d9091SRichard Lowe	ret	$16
1170*5d9d9091SRichard Lowe	SET_SIZE(__udivrem64)
1171*5d9d9091SRichard Lowe
1172*5d9d9091SRichard Lowe/*
1173*5d9d9091SRichard Lowe * Signed division with remainder.
1174*5d9d9091SRichard Lowe */
1175*5d9d9091SRichard Lowe/ int64_t
1176*5d9d9091SRichard Lowe/ SDivRem(int64_t x, int64_t y, int64_t * pmod)
1177*5d9d9091SRichard Lowe/ {
1178*5d9d9091SRichard Lowe/ 	int		negative;
1179*5d9d9091SRichard Lowe/ 	uint64_t	xt, yt, r, rem;
1180*5d9d9091SRichard Lowe/
1181*5d9d9091SRichard Lowe/ 	if (x < 0) {
1182*5d9d9091SRichard Lowe/ 		xt = -(uint64_t) x;
1183*5d9d9091SRichard Lowe/ 		negative = 1;
1184*5d9d9091SRichard Lowe/ 	} else {
1185*5d9d9091SRichard Lowe/ 		xt = x;
1186*5d9d9091SRichard Lowe/ 		negative = 0;
1187*5d9d9091SRichard Lowe/ 	}
1188*5d9d9091SRichard Lowe/ 	if (y < 0) {
1189*5d9d9091SRichard Lowe/ 		yt = -(uint64_t) y;
1190*5d9d9091SRichard Lowe/ 		negative ^= 1;
1191*5d9d9091SRichard Lowe/ 	} else {
1192*5d9d9091SRichard Lowe/ 		yt = y;
1193*5d9d9091SRichard Lowe/ 	}
1194*5d9d9091SRichard Lowe/ 	r = UDivRem(xt, yt, &rem);
1195*5d9d9091SRichard Lowe/ 	*pmod = (x < 0 ? (int64_t) - rem : rem);
1196*5d9d9091SRichard Lowe/ 	return (negative ? (int64_t) - r : r);
1197*5d9d9091SRichard Lowe/ }
1198*5d9d9091SRichard Lowe	ENTRY(SDivRem)
1199*5d9d9091SRichard Lowe	pushl	%ebp
1200*5d9d9091SRichard Lowe	pushl	%edi
1201*5d9d9091SRichard Lowe	pushl	%esi
1202*5d9d9091SRichard Lowe	subl	$24, %esp
1203*5d9d9091SRichard Lowe	testl	%edx, %edx	/ x
1204*5d9d9091SRichard Lowe	movl	%edx, %edi	/ x, x
1205*5d9d9091SRichard Lowe	js	.LL73
1206*5d9d9091SRichard Lowe	movl	44(%esp), %esi	/ y,
1207*5d9d9091SRichard Lowe	xorl	%ebp, %ebp	/ negative
1208*5d9d9091SRichard Lowe	testl	%esi, %esi
1209*5d9d9091SRichard Lowe	movl	%edx, 12(%esp)	/ x, xt
1210*5d9d9091SRichard Lowe	movl	%eax, 8(%esp)	/ x, xt
1211*5d9d9091SRichard Lowe	movl	40(%esp), %edx	/ y, yt
1212*5d9d9091SRichard Lowe	movl	44(%esp), %ecx	/ y, yt
1213*5d9d9091SRichard Lowe	js	.LL74
1214*5d9d9091SRichard Lowe.LL70:
1215*5d9d9091SRichard Lowe	leal	16(%esp), %eax	/, tmp70
1216*5d9d9091SRichard Lowe	pushl	%eax		/ tmp70
1217*5d9d9091SRichard Lowe	pushl	%ecx		/ yt
1218*5d9d9091SRichard Lowe	pushl	%edx		/ yt
1219*5d9d9091SRichard Lowe	movl	20(%esp), %eax	/ xt, xt
1220*5d9d9091SRichard Lowe	movl	24(%esp), %edx	/ xt, xt
1221*5d9d9091SRichard Lowe	call	UDivRem
1222*5d9d9091SRichard Lowe	movl	%edx, 16(%esp)	/, r
1223*5d9d9091SRichard Lowe	movl	%eax, 12(%esp)	/, r
1224*5d9d9091SRichard Lowe	addl	$12, %esp
1225*5d9d9091SRichard Lowe	testl	%edi, %edi	/ x
1226*5d9d9091SRichard Lowe	movl	16(%esp), %edx	/ rem, rem
1227*5d9d9091SRichard Lowe	movl	20(%esp), %ecx	/ rem, rem
1228*5d9d9091SRichard Lowe	js	.LL75
1229*5d9d9091SRichard Lowe.LL71:
1230*5d9d9091SRichard Lowe	movl	48(%esp), %edi	/ pmod, pmod
1231*5d9d9091SRichard Lowe	testl	%ebp, %ebp	/ negative
1232*5d9d9091SRichard Lowe	movl	%edx, (%edi)	/ rem,* pmod
1233*5d9d9091SRichard Lowe	movl	%ecx, 4(%edi)	/ rem,
1234*5d9d9091SRichard Lowe	movl	(%esp), %eax	/ r, r
1235*5d9d9091SRichard Lowe	movl	4(%esp), %edx	/ r, r
1236*5d9d9091SRichard Lowe	je	.LL72
1237*5d9d9091SRichard Lowe	negl	%eax		/ r
1238*5d9d9091SRichard Lowe	adcl	$0, %edx	/, r
1239*5d9d9091SRichard Lowe	negl	%edx		/ r
1240*5d9d9091SRichard Lowe.LL72:
1241*5d9d9091SRichard Lowe	addl	$24, %esp
1242*5d9d9091SRichard Lowe	popl	%esi
1243*5d9d9091SRichard Lowe	popl	%edi
1244*5d9d9091SRichard Lowe	popl	%ebp
1245*5d9d9091SRichard Lowe	ret
1246*5d9d9091SRichard Lowe	.align	16
1247*5d9d9091SRichard Lowe.LL73:
1248*5d9d9091SRichard Lowe	negl	%eax
1249*5d9d9091SRichard Lowe	adcl	$0, %edx
1250*5d9d9091SRichard Lowe	movl	44(%esp), %esi	/ y,
1251*5d9d9091SRichard Lowe	negl	%edx
1252*5d9d9091SRichard Lowe	testl	%esi, %esi
1253*5d9d9091SRichard Lowe	movl	%edx, 12(%esp)	/, xt
1254*5d9d9091SRichard Lowe	movl	%eax, 8(%esp)	/, xt
1255*5d9d9091SRichard Lowe	movl	$1, %ebp	/, negative
1256*5d9d9091SRichard Lowe	movl	40(%esp), %edx	/ y, yt
1257*5d9d9091SRichard Lowe	movl	44(%esp), %ecx	/ y, yt
1258*5d9d9091SRichard Lowe	jns	.LL70
1259*5d9d9091SRichard Lowe	.align	16
1260*5d9d9091SRichard Lowe.LL74:
1261*5d9d9091SRichard Lowe	negl	%edx		/ yt
1262*5d9d9091SRichard Lowe	adcl	$0, %ecx	/, yt
1263*5d9d9091SRichard Lowe	negl	%ecx		/ yt
1264*5d9d9091SRichard Lowe	xorl	$1, %ebp	/, negative
1265*5d9d9091SRichard Lowe	jmp	.LL70
1266*5d9d9091SRichard Lowe	.align	16
1267*5d9d9091SRichard Lowe.LL75:
1268*5d9d9091SRichard Lowe	negl	%edx		/ rem
1269*5d9d9091SRichard Lowe	adcl	$0, %ecx	/, rem
1270*5d9d9091SRichard Lowe	negl	%ecx		/ rem
1271*5d9d9091SRichard Lowe	jmp	.LL71
1272*5d9d9091SRichard Lowe	SET_SIZE(SDivRem)
1273*5d9d9091SRichard Lowe
1274*5d9d9091SRichard Lowe/*
1275*5d9d9091SRichard Lowe * __divrem64
1276*5d9d9091SRichard Lowe *
1277*5d9d9091SRichard Lowe * Perform division of two signed 64-bit quantities, returning the
1278*5d9d9091SRichard Lowe * quotient in %edx:%eax, and the remainder in %ecx:%esi.  __divrem64
1279*5d9d9091SRichard Lowe * pops the arguments on return.
1280*5d9d9091SRichard Lowe */
1281*5d9d9091SRichard Lowe	ENTRY(__divrem64)
1282*5d9d9091SRichard Lowe	subl	$20, %esp
1283*5d9d9091SRichard Lowe	movl	%esp, %ecx	/, tmp64
1284*5d9d9091SRichard Lowe	movl	24(%esp), %eax	/ x, x
1285*5d9d9091SRichard Lowe	movl	28(%esp), %edx	/ x, x
1286*5d9d9091SRichard Lowe	pushl	%ecx		/ tmp64
1287*5d9d9091SRichard Lowe	pushl	40(%esp)	/ y
1288*5d9d9091SRichard Lowe	pushl	40(%esp)
1289*5d9d9091SRichard Lowe	call	SDivRem
1290*5d9d9091SRichard Lowe	movl	16(%esp), %ecx
1291*5d9d9091SRichard Lowe	movl	12(%esp),%esi	/ rem
1292*5d9d9091SRichard Lowe	addl	$32, %esp
1293*5d9d9091SRichard Lowe	ret	$16
1294*5d9d9091SRichard Lowe	SET_SIZE(__divrem64)
1295*5d9d9091SRichard Lowe
1296*5d9d9091SRichard Lowe
1297*5d9d9091SRichard Lowe#endif /* __lint */
1298*5d9d9091SRichard Lowe
1299*5d9d9091SRichard Lowe#endif /* defined(__i386) && !defined(__amd64) */
1300