xref: /illumos-gate/usr/src/lib/libc/i386/gen/_div64.S (revision 55fea89dcaa64928bed4327112404dcb3e07b79f)
1*5d9d9091SRichard Lowe/*
2*5d9d9091SRichard Lowe * CDDL HEADER START
3*5d9d9091SRichard Lowe *
4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the
5*5d9d9091SRichard Lowe * Common Development and Distribution License (the "License").
6*5d9d9091SRichard Lowe * You may not use this file except in compliance with the License.
7*5d9d9091SRichard Lowe *
8*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing.
10*5d9d9091SRichard Lowe * See the License for the specific language governing permissions
11*5d9d9091SRichard Lowe * and limitations under the License.
12*5d9d9091SRichard Lowe *
13*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each
14*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the
16*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying
17*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner]
18*5d9d9091SRichard Lowe *
19*5d9d9091SRichard Lowe * CDDL HEADER END
20*5d9d9091SRichard Lowe */
21*5d9d9091SRichard Lowe/*
22*5d9d9091SRichard Lowe * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
23*5d9d9091SRichard Lowe * Use is subject to license terms.
24*5d9d9091SRichard Lowe */
25*5d9d9091SRichard Lowe
26*5d9d9091SRichard Lowe	.file	"_div64.s"
27*5d9d9091SRichard Lowe
28*5d9d9091SRichard Lowe#include "SYS.h"
29*5d9d9091SRichard Lowe
30*5d9d9091SRichard Lowe/*
31*5d9d9091SRichard Lowe * C support for 64-bit modulo and division.
32*5d9d9091SRichard Lowe * Hand-customized compiler output - see comments for details.
33*5d9d9091SRichard Lowe */
34*5d9d9091SRichard Lowe
35*5d9d9091SRichard Lowe/*
36*5d9d9091SRichard Lowe * int32_t/int64_t division/manipulation
37*5d9d9091SRichard Lowe *
38*5d9d9091SRichard Lowe * Hand-customized compiler output: the non-GCC entry points depart from
39*5d9d9091SRichard Lowe * the SYS V ABI by requiring their arguments to be popped, and in the
40*5d9d9091SRichard Lowe * [u]divrem64 cases returning the remainder in %ecx:%esi. Note the
41*5d9d9091SRichard Lowe * compiler-generated use of %edx:%eax for the first argument of
42*5d9d9091SRichard Lowe * internal entry points.
43*5d9d9091SRichard Lowe *
44*5d9d9091SRichard Lowe * Inlines for speed:
45*5d9d9091SRichard Lowe * - counting the number of leading zeros in a word
46*5d9d9091SRichard Lowe * - multiplying two 32-bit numbers giving a 64-bit result
47*5d9d9091SRichard Lowe * - dividing a 64-bit number by a 32-bit number, giving both quotient
48*5d9d9091SRichard Lowe *	and remainder
49*5d9d9091SRichard Lowe * - subtracting two 64-bit results
50*5d9d9091SRichard Lowe */
51*5d9d9091SRichard Lowe/ #define	LO(X)		((uint32_t)(X) & 0xffffffff)
52*5d9d9091SRichard Lowe/ #define	HI(X)		((uint32_t)((X) >> 32) & 0xffffffff)
53*5d9d9091SRichard Lowe/ #define	HILO(H, L)	(((uint64_t)(H) << 32) + (L))
54*5d9d9091SRichard Lowe/
55*5d9d9091SRichard Lowe/ /* give index of highest bit */
56*5d9d9091SRichard Lowe/ #define	HIBIT(a, r) \
57*5d9d9091SRichard Lowe/     asm("bsrl %1,%0": "=r"((uint32_t)(r)) : "g" (a))
58*5d9d9091SRichard Lowe/
59*5d9d9091SRichard Lowe/ /* multiply two uint32_ts resulting in a uint64_t */
60*5d9d9091SRichard Lowe/ #define	A_MUL32(a, b, lo, hi) \
61*5d9d9091SRichard Lowe/     asm("mull %2" \
62*5d9d9091SRichard Lowe/ 	: "=a"((uint32_t)(lo)), "=d"((uint32_t)(hi)) : "g" (b), "0"(a))
63*5d9d9091SRichard Lowe/
64*5d9d9091SRichard Lowe/ /* divide a uint64_t by a uint32_t */
65*5d9d9091SRichard Lowe/ #define	A_DIV32(lo, hi, b, q, r) \
66*5d9d9091SRichard Lowe/     asm("divl %2" \
67*5d9d9091SRichard Lowe/ 	: "=a"((uint32_t)(q)), "=d"((uint32_t)(r)) \
68*5d9d9091SRichard Lowe/ 	: "g" (b), "0"((uint32_t)(lo)), "1"((uint32_t)hi))
69*5d9d9091SRichard Lowe/
70*5d9d9091SRichard Lowe/ /* subtract two uint64_ts (with borrow) */
71*5d9d9091SRichard Lowe/ #define	A_SUB2(bl, bh, al, ah) \
72*5d9d9091SRichard Lowe/     asm("subl %4,%0\n\tsbbl %5,%1" \
73*5d9d9091SRichard Lowe/ 	: "=&r"((uint32_t)(al)), "=r"((uint32_t)(ah)) \
74*5d9d9091SRichard Lowe/ 	: "0"((uint32_t)(al)), "1"((uint32_t)(ah)), "g"((uint32_t)(bl)), \
75*5d9d9091SRichard Lowe/ 	"g"((uint32_t)(bh)))
76*5d9d9091SRichard Lowe/
77*5d9d9091SRichard Lowe/ /*
78*5d9d9091SRichard Lowe/  * Unsigned division with remainder.
79*5d9d9091SRichard Lowe/  * Divide two uint64_ts, and calculate remainder.
80*5d9d9091SRichard Lowe/  */
81*5d9d9091SRichard Lowe/ uint64_t
82*5d9d9091SRichard Lowe/ UDivRem(uint64_t x, uint64_t y, uint64_t * pmod)
83*5d9d9091SRichard Lowe/ {
84*5d9d9091SRichard Lowe/ 	/* simple cases: y is a single uint32_t */
85*5d9d9091SRichard Lowe/ 	if (HI(y) == 0) {
86*5d9d9091SRichard Lowe/ 		uint32_t	div_hi, div_rem;
87*5d9d9091SRichard Lowe/ 		uint32_t 	q0, q1;
88*5d9d9091SRichard Lowe/
89*5d9d9091SRichard Lowe/ 		/* calculate q1 */
90*5d9d9091SRichard Lowe/ 		if (HI(x) < LO(y)) {
91*5d9d9091SRichard Lowe/ 			/* result is a single uint32_t, use one division */
92*5d9d9091SRichard Lowe/ 			q1 = 0;
93*5d9d9091SRichard Lowe/ 			div_hi = HI(x);
94*5d9d9091SRichard Lowe/ 		} else {
95*5d9d9091SRichard Lowe/ 			/* result is a double uint32_t, use two divisions */
96*5d9d9091SRichard Lowe/ 			A_DIV32(HI(x), 0, LO(y), q1, div_hi);
97*5d9d9091SRichard Lowe/ 		}
98*5d9d9091SRichard Lowe/
99*5d9d9091SRichard Lowe/ 		/* calculate q0 and remainder */
100*5d9d9091SRichard Lowe/ 		A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
101*5d9d9091SRichard Lowe/
102*5d9d9091SRichard Lowe/ 		/* return remainder */
103*5d9d9091SRichard Lowe/ 		*pmod = div_rem;
104*5d9d9091SRichard Lowe/
105*5d9d9091SRichard Lowe/ 		/* return result */
106*5d9d9091SRichard Lowe/ 		return (HILO(q1, q0));
107*5d9d9091SRichard Lowe/
108*5d9d9091SRichard Lowe/ 	} else if (HI(x) < HI(y)) {
109*5d9d9091SRichard Lowe/ 		/* HI(x) < HI(y) => x < y => result is 0 */
110*5d9d9091SRichard Lowe/
111*5d9d9091SRichard Lowe/ 		/* return remainder */
112*5d9d9091SRichard Lowe/ 		*pmod = x;
113*5d9d9091SRichard Lowe/
114*5d9d9091SRichard Lowe/ 		/* return result */
115*5d9d9091SRichard Lowe/ 		return (0);
116*5d9d9091SRichard Lowe/
117*5d9d9091SRichard Lowe/ 	} else {
118*5d9d9091SRichard Lowe/ 		/*
119*5d9d9091SRichard Lowe/ 		 * uint64_t by uint64_t division, resulting in a one-uint32_t
120*5d9d9091SRichard Lowe/ 		 * result
121*5d9d9091SRichard Lowe/ 		 */
122*5d9d9091SRichard Lowe/ 		uint32_t		y0, y1;
123*5d9d9091SRichard Lowe/ 		uint32_t		x1, x0;
124*5d9d9091SRichard Lowe/ 		uint32_t		q0;
125*5d9d9091SRichard Lowe/ 		uint32_t		normshift;
126*5d9d9091SRichard Lowe/
127*5d9d9091SRichard Lowe/ 		/* normalize by shifting x and y so MSB(y) == 1 */
128*5d9d9091SRichard Lowe/ 		HIBIT(HI(y), normshift);	/* index of highest 1 bit */
129*5d9d9091SRichard Lowe/ 		normshift = 31 - normshift;
130*5d9d9091SRichard Lowe/
131*5d9d9091SRichard Lowe/ 		if (normshift == 0) {
132*5d9d9091SRichard Lowe/ 			/* no shifting needed, and x < 2*y so q <= 1 */
133*5d9d9091SRichard Lowe/ 			y1 = HI(y);
134*5d9d9091SRichard Lowe/ 			y0 = LO(y);
135*5d9d9091SRichard Lowe/ 			x1 = HI(x);
136*5d9d9091SRichard Lowe/ 			x0 = LO(x);
137*5d9d9091SRichard Lowe/
138*5d9d9091SRichard Lowe/ 			/* if x >= y then q = 1 (note x1 >= y1) */
139*5d9d9091SRichard Lowe/ 			if (x1 > y1 || x0 >= y0) {
140*5d9d9091SRichard Lowe/ 				q0 = 1;
141*5d9d9091SRichard Lowe/ 				/* subtract y from x to get remainder */
142*5d9d9091SRichard Lowe/ 				A_SUB2(y0, y1, x0, x1);
143*5d9d9091SRichard Lowe/ 			} else {
144*5d9d9091SRichard Lowe/ 				q0 = 0;
145*5d9d9091SRichard Lowe/ 			}
146*5d9d9091SRichard Lowe/
147*5d9d9091SRichard Lowe/ 			/* return remainder */
148*5d9d9091SRichard Lowe/ 			*pmod = HILO(x1, x0);
149*5d9d9091SRichard Lowe/
150*5d9d9091SRichard Lowe/ 			/* return result */
151*5d9d9091SRichard Lowe/ 			return (q0);
152*5d9d9091SRichard Lowe/
153*5d9d9091SRichard Lowe/ 		} else {
154*5d9d9091SRichard Lowe/ 			/*
155*5d9d9091SRichard Lowe/ 			 * the last case: result is one uint32_t, but we need to
156*5d9d9091SRichard Lowe/ 			 * normalize
157*5d9d9091SRichard Lowe/ 			 */
158*5d9d9091SRichard Lowe/ 			uint64_t	dt;
159*5d9d9091SRichard Lowe/ 			uint32_t		t0, t1, x2;
160*5d9d9091SRichard Lowe/
161*5d9d9091SRichard Lowe/ 			/* normalize y */
162*5d9d9091SRichard Lowe/ 			dt = (y << normshift);
163*5d9d9091SRichard Lowe/ 			y1 = HI(dt);
164*5d9d9091SRichard Lowe/ 			y0 = LO(dt);
165*5d9d9091SRichard Lowe/
166*5d9d9091SRichard Lowe/ 			/* normalize x (we need 3 uint32_ts!!!) */
167*5d9d9091SRichard Lowe/ 			x2 = (HI(x) >> (32 - normshift));
168*5d9d9091SRichard Lowe/ 			dt = (x << normshift);
169*5d9d9091SRichard Lowe/ 			x1 = HI(dt);
170*5d9d9091SRichard Lowe/ 			x0 = LO(dt);
171*5d9d9091SRichard Lowe/
172*5d9d9091SRichard Lowe/ 			/* estimate q0, and reduce x to a two uint32_t value */
173*5d9d9091SRichard Lowe/ 			A_DIV32(x1, x2, y1, q0, x1);
174*5d9d9091SRichard Lowe/
175*5d9d9091SRichard Lowe/ 			/* adjust q0 down if too high */
176*5d9d9091SRichard Lowe/ 			/*
177*5d9d9091SRichard Lowe/ 			 * because of the limited range of x2 we can only be
178*5d9d9091SRichard Lowe/ 			 * one off
179*5d9d9091SRichard Lowe/ 			 */
180*5d9d9091SRichard Lowe/ 			A_MUL32(y0, q0, t0, t1);
181*5d9d9091SRichard Lowe/ 			if (t1 > x1 || (t1 == x1 && t0 > x0)) {
182*5d9d9091SRichard Lowe/ 				q0--;
183*5d9d9091SRichard Lowe/ 				A_SUB2(y0, y1, t0, t1);
184*5d9d9091SRichard Lowe/ 			}
185*5d9d9091SRichard Lowe/ 			/* return remainder */
186*5d9d9091SRichard Lowe/ 			/* subtract product from x to get remainder */
187*5d9d9091SRichard Lowe/ 			A_SUB2(t0, t1, x0, x1);
188*5d9d9091SRichard Lowe/ 			*pmod = (HILO(x1, x0) >> normshift);
189*5d9d9091SRichard Lowe/
190*5d9d9091SRichard Lowe/ 			/* return result */
191*5d9d9091SRichard Lowe/ 			return (q0);
192*5d9d9091SRichard Lowe/ 		}
193*5d9d9091SRichard Lowe/ 	}
194*5d9d9091SRichard Lowe/ }
195*5d9d9091SRichard Lowe	ENTRY(UDivRem)
196*5d9d9091SRichard Lowe	pushl	%ebp
197*5d9d9091SRichard Lowe	pushl	%edi
198*5d9d9091SRichard Lowe	pushl	%esi
199*5d9d9091SRichard Lowe	subl	$48, %esp
200*5d9d9091SRichard Lowe	movl	68(%esp), %edi	/ y,
201*5d9d9091SRichard Lowe	testl	%edi, %edi	/ tmp63
202*5d9d9091SRichard Lowe	movl	%eax, 40(%esp)	/ x, x
203*5d9d9091SRichard Lowe	movl	%edx, 44(%esp)	/ x, x
204*5d9d9091SRichard Lowe	movl	%edi, %esi	/, tmp62
205*5d9d9091SRichard Lowe	movl	%edi, %ecx	/ tmp62, tmp63
206*5d9d9091SRichard Lowe	jne	.LL2
207*5d9d9091SRichard Lowe	movl	%edx, %eax	/, tmp68
208*5d9d9091SRichard Lowe	cmpl	64(%esp), %eax	/ y, tmp68
209*5d9d9091SRichard Lowe	jae	.LL21
210*5d9d9091SRichard Lowe.LL4:
211*5d9d9091SRichard Lowe	movl	72(%esp), %ebp	/ pmod,
212*5d9d9091SRichard Lowe	xorl	%esi, %esi	/ <result>
213*5d9d9091SRichard Lowe	movl	40(%esp), %eax	/ x, q0
214*5d9d9091SRichard Lowe	movl	%ecx, %edi	/ <result>, <result>
215*5d9d9091SRichard Lowe	divl	64(%esp)	/ y
216*5d9d9091SRichard Lowe	movl	%edx, (%ebp)	/ div_rem,
217*5d9d9091SRichard Lowe	xorl	%edx, %edx	/ q0
218*5d9d9091SRichard Lowe	addl	%eax, %esi	/ q0, <result>
219*5d9d9091SRichard Lowe	movl	$0, 4(%ebp)
220*5d9d9091SRichard Lowe	adcl	%edx, %edi	/ q0, <result>
221*5d9d9091SRichard Lowe	addl	$48, %esp
222*5d9d9091SRichard Lowe	movl	%esi, %eax	/ <result>, <result>
223*5d9d9091SRichard Lowe	popl	%esi
224*5d9d9091SRichard Lowe	movl	%edi, %edx	/ <result>, <result>
225*5d9d9091SRichard Lowe	popl	%edi
226*5d9d9091SRichard Lowe	popl	%ebp
227*5d9d9091SRichard Lowe	ret
228*5d9d9091SRichard Lowe	.align	16
229*5d9d9091SRichard Lowe.LL2:
230*5d9d9091SRichard Lowe	movl	44(%esp), %eax	/ x,
231*5d9d9091SRichard Lowe	xorl	%edx, %edx
232*5d9d9091SRichard Lowe	cmpl	%esi, %eax	/ tmp62, tmp5
233*5d9d9091SRichard Lowe	movl	%eax, 32(%esp)	/ tmp5,
234*5d9d9091SRichard Lowe	movl	%edx, 36(%esp)
235*5d9d9091SRichard Lowe	jae	.LL6
236*5d9d9091SRichard Lowe	movl	72(%esp), %esi	/ pmod,
237*5d9d9091SRichard Lowe	movl	40(%esp), %ebp	/ x,
238*5d9d9091SRichard Lowe	movl	44(%esp), %ecx	/ x,
239*5d9d9091SRichard Lowe	movl	%ebp, (%esi)
240*5d9d9091SRichard Lowe	movl	%ecx, 4(%esi)
241*5d9d9091SRichard Lowe	xorl	%edi, %edi	/ <result>
242*5d9d9091SRichard Lowe	xorl	%esi, %esi	/ <result>
243*5d9d9091SRichard Lowe.LL22:
244*5d9d9091SRichard Lowe	addl	$48, %esp
245*5d9d9091SRichard Lowe	movl	%esi, %eax	/ <result>, <result>
246*5d9d9091SRichard Lowe	popl	%esi
247*5d9d9091SRichard Lowe	movl	%edi, %edx	/ <result>, <result>
248*5d9d9091SRichard Lowe	popl	%edi
249*5d9d9091SRichard Lowe	popl	%ebp
250*5d9d9091SRichard Lowe	ret
251*5d9d9091SRichard Lowe	.align	16
252*5d9d9091SRichard Lowe.LL21:
253*5d9d9091SRichard Lowe	movl	%edi, %edx	/ tmp63, div_hi
254*5d9d9091SRichard Lowe	divl	64(%esp)	/ y
255*5d9d9091SRichard Lowe	movl	%eax, %ecx	/, q1
256*5d9d9091SRichard Lowe	jmp	.LL4
257*5d9d9091SRichard Lowe	.align	16
258*5d9d9091SRichard Lowe.LL6:
259*5d9d9091SRichard Lowe	movl	$31, %edi	/, tmp87
260*5d9d9091SRichard Lowe	bsrl	%esi,%edx	/ tmp62, normshift
261*5d9d9091SRichard Lowe	subl	%edx, %edi	/ normshift, tmp87
262*5d9d9091SRichard Lowe	movl	%edi, 28(%esp)	/ tmp87,
263*5d9d9091SRichard Lowe	jne	.LL8
264*5d9d9091SRichard Lowe	movl	32(%esp), %edx	/, x1
265*5d9d9091SRichard Lowe	cmpl	%ecx, %edx	/ y1, x1
266*5d9d9091SRichard Lowe	movl	64(%esp), %edi	/ y, y0
267*5d9d9091SRichard Lowe	movl	40(%esp), %esi	/ x, x0
268*5d9d9091SRichard Lowe	ja	.LL10
269*5d9d9091SRichard Lowe	xorl	%ebp, %ebp	/ q0
270*5d9d9091SRichard Lowe	cmpl	%edi, %esi	/ y0, x0
271*5d9d9091SRichard Lowe	jb	.LL11
272*5d9d9091SRichard Lowe.LL10:
273*5d9d9091SRichard Lowe	movl	$1, %ebp	/, q0
274*5d9d9091SRichard Lowe	subl	%edi,%esi	/ y0, x0
275*5d9d9091SRichard Lowe	sbbl	%ecx,%edx	/ tmp63, x1
276*5d9d9091SRichard Lowe.LL11:
277*5d9d9091SRichard Lowe	movl	%edx, %ecx	/ x1, x1
278*5d9d9091SRichard Lowe	xorl	%edx, %edx	/ x1
279*5d9d9091SRichard Lowe	xorl	%edi, %edi	/ x0
280*5d9d9091SRichard Lowe	addl	%esi, %edx	/ x0, x1
281*5d9d9091SRichard Lowe	adcl	%edi, %ecx	/ x0, x1
282*5d9d9091SRichard Lowe	movl	72(%esp), %esi	/ pmod,
283*5d9d9091SRichard Lowe	movl	%edx, (%esi)	/ x1,
284*5d9d9091SRichard Lowe	movl	%ecx, 4(%esi)	/ x1,
285*5d9d9091SRichard Lowe	xorl	%edi, %edi	/ <result>
286*5d9d9091SRichard Lowe	movl	%ebp, %esi	/ q0, <result>
287*5d9d9091SRichard Lowe	jmp	.LL22
288*5d9d9091SRichard Lowe	.align	16
289*5d9d9091SRichard Lowe.LL8:
290*5d9d9091SRichard Lowe	movb	28(%esp), %cl
291*5d9d9091SRichard Lowe	movl	64(%esp), %esi	/ y, dt
292*5d9d9091SRichard Lowe	movl	68(%esp), %edi	/ y, dt
293*5d9d9091SRichard Lowe	shldl	%esi, %edi	/, dt, dt
294*5d9d9091SRichard Lowe	sall	%cl, %esi	/, dt
295*5d9d9091SRichard Lowe	andl	$32, %ecx
296*5d9d9091SRichard Lowe	jne	.LL23
297*5d9d9091SRichard Lowe.LL17:
298*5d9d9091SRichard Lowe	movl	$32, %ecx	/, tmp102
299*5d9d9091SRichard Lowe	subl	28(%esp), %ecx	/, tmp102
300*5d9d9091SRichard Lowe	movl	%esi, %ebp	/ dt, y0
301*5d9d9091SRichard Lowe	movl	32(%esp), %esi
302*5d9d9091SRichard Lowe	shrl	%cl, %esi	/ tmp102,
303*5d9d9091SRichard Lowe	movl	%edi, 24(%esp)	/ tmp99,
304*5d9d9091SRichard Lowe	movb	28(%esp), %cl
305*5d9d9091SRichard Lowe	movl	%esi, 12(%esp)	/, x2
306*5d9d9091SRichard Lowe	movl	44(%esp), %edi	/ x, dt
307*5d9d9091SRichard Lowe	movl	40(%esp), %esi	/ x, dt
308*5d9d9091SRichard Lowe	shldl	%esi, %edi	/, dt, dt
309*5d9d9091SRichard Lowe	sall	%cl, %esi	/, dt
310*5d9d9091SRichard Lowe	andl	$32, %ecx
311*5d9d9091SRichard Lowe	je	.LL18
312*5d9d9091SRichard Lowe	movl	%esi, %edi	/ dt, dt
313*5d9d9091SRichard Lowe	xorl	%esi, %esi	/ dt
314*5d9d9091SRichard Lowe.LL18:
315*5d9d9091SRichard Lowe	movl	%edi, %ecx	/ dt,
316*5d9d9091SRichard Lowe	movl	%edi, %eax	/ tmp2,
317*5d9d9091SRichard Lowe	movl	%ecx, (%esp)
318*5d9d9091SRichard Lowe	movl	12(%esp), %edx	/ x2,
319*5d9d9091SRichard Lowe	divl	24(%esp)
320*5d9d9091SRichard Lowe	movl	%edx, %ecx	/, x1
321*5d9d9091SRichard Lowe	xorl	%edi, %edi
322*5d9d9091SRichard Lowe	movl	%eax, 20(%esp)
323*5d9d9091SRichard Lowe	movl	%ebp, %eax	/ y0, t0
324*5d9d9091SRichard Lowe	mull	20(%esp)
325*5d9d9091SRichard Lowe	cmpl	%ecx, %edx	/ x1, t1
326*5d9d9091SRichard Lowe	movl	%edi, 4(%esp)
327*5d9d9091SRichard Lowe	ja	.LL14
328*5d9d9091SRichard Lowe	je	.LL24
329*5d9d9091SRichard Lowe.LL15:
330*5d9d9091SRichard Lowe	movl	%ecx, %edi	/ x1,
331*5d9d9091SRichard Lowe	subl	%eax,%esi	/ t0, x0
332*5d9d9091SRichard Lowe	sbbl	%edx,%edi	/ t1,
333*5d9d9091SRichard Lowe	movl	%edi, %eax	/, x1
334*5d9d9091SRichard Lowe	movl	%eax, %edx	/ x1, x1
335*5d9d9091SRichard Lowe	xorl	%eax, %eax	/ x1
336*5d9d9091SRichard Lowe	xorl	%ebp, %ebp	/ x0
337*5d9d9091SRichard Lowe	addl	%esi, %eax	/ x0, x1
338*5d9d9091SRichard Lowe	adcl	%ebp, %edx	/ x0, x1
339*5d9d9091SRichard Lowe	movb	28(%esp), %cl
340*5d9d9091SRichard Lowe	shrdl	%edx, %eax	/, x1, x1
341*5d9d9091SRichard Lowe	shrl	%cl, %edx	/, x1
342*5d9d9091SRichard Lowe	andl	$32, %ecx
343*5d9d9091SRichard Lowe	je	.LL16
344*5d9d9091SRichard Lowe	movl	%edx, %eax	/ x1, x1
345*5d9d9091SRichard Lowe	xorl	%edx, %edx	/ x1
346*5d9d9091SRichard Lowe.LL16:
347*5d9d9091SRichard Lowe	movl	72(%esp), %ecx	/ pmod,
348*5d9d9091SRichard Lowe	movl	20(%esp), %esi	/, <result>
349*5d9d9091SRichard Lowe	xorl	%edi, %edi	/ <result>
350*5d9d9091SRichard Lowe	movl	%eax, (%ecx)	/ x1,
351*5d9d9091SRichard Lowe	movl	%edx, 4(%ecx)	/ x1,
352*5d9d9091SRichard Lowe	jmp	.LL22
353*5d9d9091SRichard Lowe	.align	16
354*5d9d9091SRichard Lowe.LL24:
355*5d9d9091SRichard Lowe	cmpl	%esi, %eax	/ x0, t0
356*5d9d9091SRichard Lowe	jbe	.LL15
357*5d9d9091SRichard Lowe.LL14:
358*5d9d9091SRichard Lowe	decl	20(%esp)
359*5d9d9091SRichard Lowe	subl	%ebp,%eax	/ y0, t0
360*5d9d9091SRichard Lowe	sbbl	24(%esp),%edx	/, t1
361*5d9d9091SRichard Lowe	jmp	.LL15
362*5d9d9091SRichard Lowe.LL23:
363*5d9d9091SRichard Lowe	movl	%esi, %edi	/ dt, dt
364*5d9d9091SRichard Lowe	xorl	%esi, %esi	/ dt
365*5d9d9091SRichard Lowe	jmp	.LL17
366*5d9d9091SRichard Lowe	SET_SIZE(UDivRem)
367*5d9d9091SRichard Lowe
368*5d9d9091SRichard Lowe/*
369*5d9d9091SRichard Lowe * Unsigned division without remainder.
370*5d9d9091SRichard Lowe */
371*5d9d9091SRichard Lowe/ uint64_t
372*5d9d9091SRichard Lowe/ UDiv(uint64_t x, uint64_t y)
373*5d9d9091SRichard Lowe/ {
374*5d9d9091SRichard Lowe/ 	if (HI(y) == 0) {
375*5d9d9091SRichard Lowe/ 		/* simple cases: y is a single uint32_t */
376*5d9d9091SRichard Lowe/ 		uint32_t	div_hi, div_rem;
377*5d9d9091SRichard Lowe/ 		uint32_t	q0, q1;
378*5d9d9091SRichard Lowe/
379*5d9d9091SRichard Lowe/ 		/* calculate q1 */
380*5d9d9091SRichard Lowe/ 		if (HI(x) < LO(y)) {
381*5d9d9091SRichard Lowe/ 			/* result is a single uint32_t, use one division */
382*5d9d9091SRichard Lowe/ 			q1 = 0;
383*5d9d9091SRichard Lowe/ 			div_hi = HI(x);
384*5d9d9091SRichard Lowe/ 		} else {
385*5d9d9091SRichard Lowe/ 			/* result is a double uint32_t, use two divisions */
386*5d9d9091SRichard Lowe/ 			A_DIV32(HI(x), 0, LO(y), q1, div_hi);
387*5d9d9091SRichard Lowe/ 		}
388*5d9d9091SRichard Lowe/
389*5d9d9091SRichard Lowe/ 		/* calculate q0 and remainder */
390*5d9d9091SRichard Lowe/ 		A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
391*5d9d9091SRichard Lowe/
392*5d9d9091SRichard Lowe/ 		/* return result */
393*5d9d9091SRichard Lowe/ 		return (HILO(q1, q0));
394*5d9d9091SRichard Lowe/
395*5d9d9091SRichard Lowe/ 	} else if (HI(x) < HI(y)) {
396*5d9d9091SRichard Lowe/ 		/* HI(x) < HI(y) => x < y => result is 0 */
397*5d9d9091SRichard Lowe/
398*5d9d9091SRichard Lowe/ 		/* return result */
399*5d9d9091SRichard Lowe/ 		return (0);
400*5d9d9091SRichard Lowe/
401*5d9d9091SRichard Lowe/ 	} else {
402*5d9d9091SRichard Lowe/ 		/*
403*5d9d9091SRichard Lowe/ 		 * uint64_t by uint64_t division, resulting in a one-uint32_t
404*5d9d9091SRichard Lowe/ 		 * result
405*5d9d9091SRichard Lowe/ 		 */
406*5d9d9091SRichard Lowe/ 		uint32_t		y0, y1;
407*5d9d9091SRichard Lowe/ 		uint32_t		x1, x0;
408*5d9d9091SRichard Lowe/ 		uint32_t		q0;
409*5d9d9091SRichard Lowe/ 		unsigned		normshift;
410*5d9d9091SRichard Lowe/
411*5d9d9091SRichard Lowe/ 		/* normalize by shifting x and y so MSB(y) == 1 */
412*5d9d9091SRichard Lowe/ 		HIBIT(HI(y), normshift);	/* index of highest 1 bit */
413*5d9d9091SRichard Lowe/ 		normshift = 31 - normshift;
414*5d9d9091SRichard Lowe/
415*5d9d9091SRichard Lowe/ 		if (normshift == 0) {
416*5d9d9091SRichard Lowe/ 			/* no shifting needed, and x < 2*y so q <= 1 */
417*5d9d9091SRichard Lowe/ 			y1 = HI(y);
418*5d9d9091SRichard Lowe/ 			y0 = LO(y);
419*5d9d9091SRichard Lowe/ 			x1 = HI(x);
420*5d9d9091SRichard Lowe/ 			x0 = LO(x);
421*5d9d9091SRichard Lowe/
422*5d9d9091SRichard Lowe/ 			/* if x >= y then q = 1 (note x1 >= y1) */
423*5d9d9091SRichard Lowe/ 			if (x1 > y1 || x0 >= y0) {
424*5d9d9091SRichard Lowe/ 				q0 = 1;
425*5d9d9091SRichard Lowe/ 				/* subtract y from x to get remainder */
426*5d9d9091SRichard Lowe/ 				/* A_SUB2(y0, y1, x0, x1); */
427*5d9d9091SRichard Lowe/ 			} else {
428*5d9d9091SRichard Lowe/ 				q0 = 0;
429*5d9d9091SRichard Lowe/ 			}
430*5d9d9091SRichard Lowe/
431*5d9d9091SRichard Lowe/ 			/* return result */
432*5d9d9091SRichard Lowe/ 			return (q0);
433*5d9d9091SRichard Lowe/
434*5d9d9091SRichard Lowe/ 		} else {
435*5d9d9091SRichard Lowe/ 			/*
436*5d9d9091SRichard Lowe/ 			 * the last case: result is one uint32_t, but we need to
437*5d9d9091SRichard Lowe/ 			 * normalize
438*5d9d9091SRichard Lowe/ 			 */
439*5d9d9091SRichard Lowe/ 			uint64_t	dt;
440*5d9d9091SRichard Lowe/ 			uint32_t		t0, t1, x2;
441*5d9d9091SRichard Lowe/
442*5d9d9091SRichard Lowe/ 			/* normalize y */
443*5d9d9091SRichard Lowe/ 			dt = (y << normshift);
444*5d9d9091SRichard Lowe/ 			y1 = HI(dt);
445*5d9d9091SRichard Lowe/ 			y0 = LO(dt);
446*5d9d9091SRichard Lowe/
447*5d9d9091SRichard Lowe/ 			/* normalize x (we need 3 uint32_ts!!!) */
448*5d9d9091SRichard Lowe/ 			x2 = (HI(x) >> (32 - normshift));
449*5d9d9091SRichard Lowe/ 			dt = (x << normshift);
450*5d9d9091SRichard Lowe/ 			x1 = HI(dt);
451*5d9d9091SRichard Lowe/ 			x0 = LO(dt);
452*5d9d9091SRichard Lowe/
453*5d9d9091SRichard Lowe/ 			/* estimate q0, and reduce x to a two uint32_t value */
454*5d9d9091SRichard Lowe/ 			A_DIV32(x1, x2, y1, q0, x1);
455*5d9d9091SRichard Lowe/
456*5d9d9091SRichard Lowe/ 			/* adjust q0 down if too high */
457*5d9d9091SRichard Lowe/ 			/*
458*5d9d9091SRichard Lowe/ 			 * because of the limited range of x2 we can only be
459*5d9d9091SRichard Lowe/ 			 * one off
460*5d9d9091SRichard Lowe/ 			 */
461*5d9d9091SRichard Lowe/ 			A_MUL32(y0, q0, t0, t1);
462*5d9d9091SRichard Lowe/ 			if (t1 > x1 || (t1 == x1 && t0 > x0)) {
463*5d9d9091SRichard Lowe/ 				q0--;
464*5d9d9091SRichard Lowe/ 			}
465*5d9d9091SRichard Lowe/ 			/* return result */
466*5d9d9091SRichard Lowe/ 			return (q0);
467*5d9d9091SRichard Lowe/ 		}
468*5d9d9091SRichard Lowe/ 	}
469*5d9d9091SRichard Lowe/ }
470*5d9d9091SRichard Lowe	ENTRY(UDiv)
471*5d9d9091SRichard Lowe	pushl	%ebp
472*5d9d9091SRichard Lowe	pushl	%edi
473*5d9d9091SRichard Lowe	pushl	%esi
474*5d9d9091SRichard Lowe	subl	$40, %esp
475*5d9d9091SRichard Lowe	movl	%edx, 36(%esp)	/ x, x
476*5d9d9091SRichard Lowe	movl	60(%esp), %edx	/ y,
477*5d9d9091SRichard Lowe	testl	%edx, %edx	/ tmp62
478*5d9d9091SRichard Lowe	movl	%eax, 32(%esp)	/ x, x
479*5d9d9091SRichard Lowe	movl	%edx, %ecx	/ tmp61, tmp62
480*5d9d9091SRichard Lowe	movl	%edx, %eax	/, tmp61
481*5d9d9091SRichard Lowe	jne	.LL26
482*5d9d9091SRichard Lowe	movl	36(%esp), %esi	/ x,
483*5d9d9091SRichard Lowe	cmpl	56(%esp), %esi	/ y, tmp67
484*5d9d9091SRichard Lowe	movl	%esi, %eax	/, tmp67
485*5d9d9091SRichard Lowe	movl	%esi, %edx	/ tmp67, div_hi
486*5d9d9091SRichard Lowe	jb	.LL28
487*5d9d9091SRichard Lowe	movl	%ecx, %edx	/ tmp62, div_hi
488*5d9d9091SRichard Lowe	divl	56(%esp)	/ y
489*5d9d9091SRichard Lowe	movl	%eax, %ecx	/, q1
490*5d9d9091SRichard Lowe.LL28:
491*5d9d9091SRichard Lowe	xorl	%esi, %esi	/ <result>
492*5d9d9091SRichard Lowe	movl	%ecx, %edi	/ <result>, <result>
493*5d9d9091SRichard Lowe	movl	32(%esp), %eax	/ x, q0
494*5d9d9091SRichard Lowe	xorl	%ecx, %ecx	/ q0
495*5d9d9091SRichard Lowe	divl	56(%esp)	/ y
496*5d9d9091SRichard Lowe	addl	%eax, %esi	/ q0, <result>
497*5d9d9091SRichard Lowe	adcl	%ecx, %edi	/ q0, <result>
498*5d9d9091SRichard Lowe.LL25:
499*5d9d9091SRichard Lowe	addl	$40, %esp
500*5d9d9091SRichard Lowe	movl	%esi, %eax	/ <result>, <result>
501*5d9d9091SRichard Lowe	popl	%esi
502*5d9d9091SRichard Lowe	movl	%edi, %edx	/ <result>, <result>
503*5d9d9091SRichard Lowe	popl	%edi
504*5d9d9091SRichard Lowe	popl	%ebp
505*5d9d9091SRichard Lowe	ret
506*5d9d9091SRichard Lowe	.align	16
507*5d9d9091SRichard Lowe.LL26:
508*5d9d9091SRichard Lowe	movl	36(%esp), %esi	/ x,
509*5d9d9091SRichard Lowe	xorl	%edi, %edi
510*5d9d9091SRichard Lowe	movl	%esi, 24(%esp)	/ tmp1,
511*5d9d9091SRichard Lowe	movl	%edi, 28(%esp)
512*5d9d9091SRichard Lowe	xorl	%esi, %esi	/ <result>
513*5d9d9091SRichard Lowe	xorl	%edi, %edi	/ <result>
514*5d9d9091SRichard Lowe	cmpl	%eax, 24(%esp)	/ tmp61,
515*5d9d9091SRichard Lowe	jb	.LL25
516*5d9d9091SRichard Lowe	bsrl	%eax,%ebp	/ tmp61, normshift
517*5d9d9091SRichard Lowe	movl	$31, %eax	/, tmp85
518*5d9d9091SRichard Lowe	subl	%ebp, %eax	/ normshift, normshift
519*5d9d9091SRichard Lowe	jne	.LL32
520*5d9d9091SRichard Lowe	movl	24(%esp), %eax	/, x1
521*5d9d9091SRichard Lowe	cmpl	%ecx, %eax	/ tmp62, x1
522*5d9d9091SRichard Lowe	movl	56(%esp), %esi	/ y, y0
523*5d9d9091SRichard Lowe	movl	32(%esp), %edx	/ x, x0
524*5d9d9091SRichard Lowe	ja	.LL34
525*5d9d9091SRichard Lowe	xorl	%eax, %eax	/ q0
526*5d9d9091SRichard Lowe	cmpl	%esi, %edx	/ y0, x0
527*5d9d9091SRichard Lowe	jb	.LL35
528*5d9d9091SRichard Lowe.LL34:
529*5d9d9091SRichard Lowe	movl	$1, %eax	/, q0
530*5d9d9091SRichard Lowe.LL35:
531*5d9d9091SRichard Lowe	movl	%eax, %esi	/ q0, <result>
532*5d9d9091SRichard Lowe	xorl	%edi, %edi	/ <result>
533*5d9d9091SRichard Lowe.LL45:
534*5d9d9091SRichard Lowe	addl	$40, %esp
535*5d9d9091SRichard Lowe	movl	%esi, %eax	/ <result>, <result>
536*5d9d9091SRichard Lowe	popl	%esi
537*5d9d9091SRichard Lowe	movl	%edi, %edx	/ <result>, <result>
538*5d9d9091SRichard Lowe	popl	%edi
539*5d9d9091SRichard Lowe	popl	%ebp
540*5d9d9091SRichard Lowe	ret
541*5d9d9091SRichard Lowe	.align	16
542*5d9d9091SRichard Lowe.LL32:
543*5d9d9091SRichard Lowe	movb	%al, %cl
544*5d9d9091SRichard Lowe	movl	56(%esp), %esi	/ y,
545*5d9d9091SRichard Lowe	movl	60(%esp), %edi	/ y,
546*5d9d9091SRichard Lowe	shldl	%esi, %edi
547*5d9d9091SRichard Lowe	sall	%cl, %esi
548*5d9d9091SRichard Lowe	andl	$32, %ecx
549*5d9d9091SRichard Lowe	jne	.LL43
550*5d9d9091SRichard Lowe.LL40:
551*5d9d9091SRichard Lowe	movl	$32, %ecx	/, tmp96
552*5d9d9091SRichard Lowe	subl	%eax, %ecx	/ normshift, tmp96
553*5d9d9091SRichard Lowe	movl	%edi, %edx
554*5d9d9091SRichard Lowe	movl	%edi, 20(%esp)	/, dt
555*5d9d9091SRichard Lowe	movl	24(%esp), %ebp	/, x2
556*5d9d9091SRichard Lowe	xorl	%edi, %edi
557*5d9d9091SRichard Lowe	shrl	%cl, %ebp	/ tmp96, x2
558*5d9d9091SRichard Lowe	movl	%esi, 16(%esp)	/, dt
559*5d9d9091SRichard Lowe	movb	%al, %cl
560*5d9d9091SRichard Lowe	movl	32(%esp), %esi	/ x, dt
561*5d9d9091SRichard Lowe	movl	%edi, 12(%esp)
562*5d9d9091SRichard Lowe	movl	36(%esp), %edi	/ x, dt
563*5d9d9091SRichard Lowe	shldl	%esi, %edi	/, dt, dt
564*5d9d9091SRichard Lowe	sall	%cl, %esi	/, dt
565*5d9d9091SRichard Lowe	andl	$32, %ecx
566*5d9d9091SRichard Lowe	movl	%edx, 8(%esp)
567*5d9d9091SRichard Lowe	je	.LL41
568*5d9d9091SRichard Lowe	movl	%esi, %edi	/ dt, dt
569*5d9d9091SRichard Lowe	xorl	%esi, %esi	/ dt
570*5d9d9091SRichard Lowe.LL41:
571*5d9d9091SRichard Lowe	xorl	%ecx, %ecx
572*5d9d9091SRichard Lowe	movl	%edi, %eax	/ tmp1,
573*5d9d9091SRichard Lowe	movl	%ebp, %edx	/ x2,
574*5d9d9091SRichard Lowe	divl	8(%esp)
575*5d9d9091SRichard Lowe	movl	%edx, %ebp	/, x1
576*5d9d9091SRichard Lowe	movl	%ecx, 4(%esp)
577*5d9d9091SRichard Lowe	movl	%eax, %ecx	/, q0
578*5d9d9091SRichard Lowe	movl	16(%esp), %eax	/ dt,
579*5d9d9091SRichard Lowe	mull	%ecx	/ q0
580*5d9d9091SRichard Lowe	cmpl	%ebp, %edx	/ x1, t1
581*5d9d9091SRichard Lowe	movl	%edi, (%esp)
582*5d9d9091SRichard Lowe	movl	%esi, %edi	/ dt, x0
583*5d9d9091SRichard Lowe	ja	.LL38
584*5d9d9091SRichard Lowe	je	.LL44
585*5d9d9091SRichard Lowe.LL39:
586*5d9d9091SRichard Lowe	movl	%ecx, %esi	/ q0, <result>
587*5d9d9091SRichard Lowe.LL46:
588*5d9d9091SRichard Lowe	xorl	%edi, %edi	/ <result>
589*5d9d9091SRichard Lowe	jmp	.LL45
590*5d9d9091SRichard Lowe.LL44:
591*5d9d9091SRichard Lowe	cmpl	%edi, %eax	/ x0, t0
592*5d9d9091SRichard Lowe	jbe	.LL39
593*5d9d9091SRichard Lowe.LL38:
594*5d9d9091SRichard Lowe	decl	%ecx		/ q0
595*5d9d9091SRichard Lowe	movl	%ecx, %esi	/ q0, <result>
596*5d9d9091SRichard Lowe	jmp	.LL46
597*5d9d9091SRichard Lowe.LL43:
598*5d9d9091SRichard Lowe	movl	%esi, %edi
599*5d9d9091SRichard Lowe	xorl	%esi, %esi
600*5d9d9091SRichard Lowe	jmp	.LL40
601*5d9d9091SRichard Lowe	SET_SIZE(UDiv)
602*5d9d9091SRichard Lowe
603*5d9d9091SRichard Lowe/*
604*5d9d9091SRichard Lowe * __udiv64
605*5d9d9091SRichard Lowe *
606*5d9d9091SRichard Lowe * Perform division of two unsigned 64-bit quantities, returning the
607*5d9d9091SRichard Lowe * quotient in %edx:%eax.  __udiv64 pops the arguments on return,
608*5d9d9091SRichard Lowe */
609*5d9d9091SRichard Lowe	ENTRY(__udiv64)
610*5d9d9091SRichard Lowe	movl	4(%esp), %eax	/ x, x
611*5d9d9091SRichard Lowe	movl	8(%esp), %edx	/ x, x
612*5d9d9091SRichard Lowe	pushl	16(%esp)	/ y
613*5d9d9091SRichard Lowe	pushl	16(%esp)
614*5d9d9091SRichard Lowe	call	UDiv
615*5d9d9091SRichard Lowe	addl	$8, %esp
616*5d9d9091SRichard Lowe	ret     $16
617*5d9d9091SRichard Lowe	SET_SIZE(__udiv64)
618*5d9d9091SRichard Lowe
619*5d9d9091SRichard Lowe/*
620*5d9d9091SRichard Lowe * __urem64
621*5d9d9091SRichard Lowe *
622*5d9d9091SRichard Lowe * Perform division of two unsigned 64-bit quantities, returning the
623*5d9d9091SRichard Lowe * remainder in %edx:%eax.  __urem64 pops the arguments on return
624*5d9d9091SRichard Lowe */
625*5d9d9091SRichard Lowe	ENTRY(__urem64)
626*5d9d9091SRichard Lowe	subl	$12, %esp
627*5d9d9091SRichard Lowe	movl	%esp, %ecx	/, tmp65
628*5d9d9091SRichard Lowe	movl	16(%esp), %eax	/ x, x
629*5d9d9091SRichard Lowe	movl	20(%esp), %edx	/ x, x
630*5d9d9091SRichard Lowe	pushl	%ecx		/ tmp65
631*5d9d9091SRichard Lowe	pushl	32(%esp)	/ y
632*5d9d9091SRichard Lowe	pushl	32(%esp)
633*5d9d9091SRichard Lowe	call	UDivRem
634*5d9d9091SRichard Lowe	movl	12(%esp), %eax	/ rem, rem
635*5d9d9091SRichard Lowe	movl	16(%esp), %edx	/ rem, rem
636*5d9d9091SRichard Lowe	addl	$24, %esp
637*5d9d9091SRichard Lowe	ret	$16
638*5d9d9091SRichard Lowe	SET_SIZE(__urem64)
639*5d9d9091SRichard Lowe
640*5d9d9091SRichard Lowe/*
641*5d9d9091SRichard Lowe * __div64
642*5d9d9091SRichard Lowe *
643*5d9d9091SRichard Lowe * Perform division of two signed 64-bit quantities, returning the
644*5d9d9091SRichard Lowe * quotient in %edx:%eax.  __div64 pops the arguments on return.
645*5d9d9091SRichard Lowe */
646*5d9d9091SRichard Lowe/ int64_t
647*5d9d9091SRichard Lowe/ __div64(int64_t x, int64_t y)
648*5d9d9091SRichard Lowe/ {
649*5d9d9091SRichard Lowe/ 	int		negative;
650*5d9d9091SRichard Lowe/ 	uint64_t	xt, yt, r;
651*5d9d9091SRichard Lowe/
652*5d9d9091SRichard Lowe/ 	if (x < 0) {
653*5d9d9091SRichard Lowe/ 		xt = -(uint64_t) x;
654*5d9d9091SRichard Lowe/ 		negative = 1;
655*5d9d9091SRichard Lowe/ 	} else {
656*5d9d9091SRichard Lowe/ 		xt = x;
657*5d9d9091SRichard Lowe/ 		negative = 0;
658*5d9d9091SRichard Lowe/ 	}
659*5d9d9091SRichard Lowe/ 	if (y < 0) {
660*5d9d9091SRichard Lowe/ 		yt = -(uint64_t) y;
661*5d9d9091SRichard Lowe/ 		negative ^= 1;
662*5d9d9091SRichard Lowe/ 	} else {
663*5d9d9091SRichard Lowe/ 		yt = y;
664*5d9d9091SRichard Lowe/ 	}
665*5d9d9091SRichard Lowe/ 	r = UDiv(xt, yt);
666*5d9d9091SRichard Lowe/ 	return (negative ? (int64_t) - r : r);
667*5d9d9091SRichard Lowe/ }
668*5d9d9091SRichard Lowe	ENTRY(__div64)
669*5d9d9091SRichard Lowe	pushl	%ebp
670*5d9d9091SRichard Lowe	pushl	%edi
671*5d9d9091SRichard Lowe	pushl	%esi
672*5d9d9091SRichard Lowe	subl	$8, %esp
673*5d9d9091SRichard Lowe	movl	28(%esp), %edx	/ x, x
674*5d9d9091SRichard Lowe	testl	%edx, %edx	/ x
675*5d9d9091SRichard Lowe	movl	24(%esp), %eax	/ x, x
676*5d9d9091SRichard Lowe	movl	32(%esp), %esi	/ y, y
677*5d9d9091SRichard Lowe	movl	36(%esp), %edi	/ y, y
678*5d9d9091SRichard Lowe	js	.LL84
679*5d9d9091SRichard Lowe	xorl	%ebp, %ebp	/ negative
680*5d9d9091SRichard Lowe	testl	%edi, %edi	/ y
681*5d9d9091SRichard Lowe	movl	%eax, (%esp)	/ x, xt
682*5d9d9091SRichard Lowe	movl	%edx, 4(%esp)	/ x, xt
683*5d9d9091SRichard Lowe	movl	%esi, %eax	/ y, yt
684*5d9d9091SRichard Lowe	movl	%edi, %edx	/ y, yt
685*5d9d9091SRichard Lowe	js	.LL85
686*5d9d9091SRichard Lowe.LL82:
687*5d9d9091SRichard Lowe	pushl	%edx		/ yt
688*5d9d9091SRichard Lowe	pushl	%eax		/ yt
689*5d9d9091SRichard Lowe	movl	8(%esp), %eax	/ xt, xt
690*5d9d9091SRichard Lowe	movl	12(%esp), %edx	/ xt, xt
691*5d9d9091SRichard Lowe	call	UDiv
692*5d9d9091SRichard Lowe	popl	%ecx
693*5d9d9091SRichard Lowe	testl	%ebp, %ebp	/ negative
694*5d9d9091SRichard Lowe	popl	%esi
695*5d9d9091SRichard Lowe	je	.LL83
696*5d9d9091SRichard Lowe	negl	%eax		/ r
697*5d9d9091SRichard Lowe	adcl	$0, %edx	/, r
698*5d9d9091SRichard Lowe	negl	%edx		/ r
699*5d9d9091SRichard Lowe.LL83:
700*5d9d9091SRichard Lowe	addl	$8, %esp
701*5d9d9091SRichard Lowe	popl	%esi
702*5d9d9091SRichard Lowe	popl	%edi
703*5d9d9091SRichard Lowe	popl	%ebp
704*5d9d9091SRichard Lowe	ret	$16
705*5d9d9091SRichard Lowe	.align	16
706*5d9d9091SRichard Lowe.LL84:
707*5d9d9091SRichard Lowe	negl	%eax		/ x
708*5d9d9091SRichard Lowe	adcl	$0, %edx	/, x
709*5d9d9091SRichard Lowe	negl	%edx		/ x
710*5d9d9091SRichard Lowe	testl	%edi, %edi	/ y
711*5d9d9091SRichard Lowe	movl	%eax, (%esp)	/ x, xt
712*5d9d9091SRichard Lowe	movl	%edx, 4(%esp)	/ x, xt
713*5d9d9091SRichard Lowe	movl	$1, %ebp	/, negative
714*5d9d9091SRichard Lowe	movl	%esi, %eax	/ y, yt
715*5d9d9091SRichard Lowe	movl	%edi, %edx	/ y, yt
716*5d9d9091SRichard Lowe	jns	.LL82
717*5d9d9091SRichard Lowe	.align	16
718*5d9d9091SRichard Lowe.LL85:
719*5d9d9091SRichard Lowe	negl	%eax		/ yt
720*5d9d9091SRichard Lowe	adcl	$0, %edx	/, yt
721*5d9d9091SRichard Lowe	negl	%edx		/ yt
722*5d9d9091SRichard Lowe	xorl	$1, %ebp	/, negative
723*5d9d9091SRichard Lowe	jmp	.LL82
724*5d9d9091SRichard Lowe	SET_SIZE(__div64)
725*5d9d9091SRichard Lowe
726*5d9d9091SRichard Lowe/*
727*5d9d9091SRichard Lowe * __rem64
728*5d9d9091SRichard Lowe *
729*5d9d9091SRichard Lowe * Perform division of two signed 64-bit quantities, returning the
730*5d9d9091SRichard Lowe * remainder in %edx:%eax.  __rem64 pops the arguments on return.
731*5d9d9091SRichard Lowe */
732*5d9d9091SRichard Lowe/ int64_t
733*5d9d9091SRichard Lowe/ __rem64(int64_t x, int64_t y)
734*5d9d9091SRichard Lowe/ {
735*5d9d9091SRichard Lowe/ 	uint64_t	xt, yt, rem;
736*5d9d9091SRichard Lowe/
737*5d9d9091SRichard Lowe/ 	if (x < 0) {
738*5d9d9091SRichard Lowe/ 		xt = -(uint64_t) x;
739*5d9d9091SRichard Lowe/ 	} else {
740*5d9d9091SRichard Lowe/ 		xt = x;
741*5d9d9091SRichard Lowe/ 	}
742*5d9d9091SRichard Lowe/ 	if (y < 0) {
743*5d9d9091SRichard Lowe/ 		yt = -(uint64_t) y;
744*5d9d9091SRichard Lowe/ 	} else {
745*5d9d9091SRichard Lowe/ 		yt = y;
746*5d9d9091SRichard Lowe/ 	}
747*5d9d9091SRichard Lowe/ 	(void) UDivRem(xt, yt, &rem);
748*5d9d9091SRichard Lowe/ 	return (x < 0 ? (int64_t) - rem : rem);
749*5d9d9091SRichard Lowe/ }
750*5d9d9091SRichard Lowe	ENTRY(__rem64)
751*5d9d9091SRichard Lowe	pushl	%edi
752*5d9d9091SRichard Lowe	pushl	%esi
753*5d9d9091SRichard Lowe	subl	$20, %esp
754*5d9d9091SRichard Lowe	movl	36(%esp), %ecx	/ x,
755*5d9d9091SRichard Lowe	movl	32(%esp), %esi	/ x,
756*5d9d9091SRichard Lowe	movl	36(%esp), %edi	/ x,
757*5d9d9091SRichard Lowe	testl	%ecx, %ecx
758*5d9d9091SRichard Lowe	movl	40(%esp), %eax	/ y, y
759*5d9d9091SRichard Lowe	movl	44(%esp), %edx	/ y, y
760*5d9d9091SRichard Lowe	movl	%esi, (%esp)	/, xt
761*5d9d9091SRichard Lowe	movl	%edi, 4(%esp)	/, xt
762*5d9d9091SRichard Lowe	js	.LL92
763*5d9d9091SRichard Lowe	testl	%edx, %edx	/ y
764*5d9d9091SRichard Lowe	movl	%eax, %esi	/ y, yt
765*5d9d9091SRichard Lowe	movl	%edx, %edi	/ y, yt
766*5d9d9091SRichard Lowe	js	.LL93
767*5d9d9091SRichard Lowe.LL90:
768*5d9d9091SRichard Lowe	leal	8(%esp), %eax	/, tmp66
769*5d9d9091SRichard Lowe	pushl	%eax		/ tmp66
770*5d9d9091SRichard Lowe	pushl	%edi		/ yt
771*5d9d9091SRichard Lowe	pushl	%esi		/ yt
772*5d9d9091SRichard Lowe	movl	12(%esp), %eax	/ xt, xt
773*5d9d9091SRichard Lowe	movl	16(%esp), %edx	/ xt, xt
774*5d9d9091SRichard Lowe	call	UDivRem
775*5d9d9091SRichard Lowe	addl	$12, %esp
776*5d9d9091SRichard Lowe	movl	36(%esp), %edi	/ x,
777*5d9d9091SRichard Lowe	testl	%edi, %edi
778*5d9d9091SRichard Lowe	movl	8(%esp), %eax	/ rem, rem
779*5d9d9091SRichard Lowe	movl	12(%esp), %edx	/ rem, rem
780*5d9d9091SRichard Lowe	js	.LL94
781*5d9d9091SRichard Lowe	addl	$20, %esp
782*5d9d9091SRichard Lowe	popl	%esi
783*5d9d9091SRichard Lowe	popl	%edi
784*5d9d9091SRichard Lowe	ret	$16
785*5d9d9091SRichard Lowe	.align	16
786*5d9d9091SRichard Lowe.LL92:
787*5d9d9091SRichard Lowe	negl	%esi
788*5d9d9091SRichard Lowe	adcl	$0, %edi
789*5d9d9091SRichard Lowe	negl	%edi
790*5d9d9091SRichard Lowe	testl	%edx, %edx	/ y
791*5d9d9091SRichard Lowe	movl	%esi, (%esp)	/, xt
792*5d9d9091SRichard Lowe	movl	%edi, 4(%esp)	/, xt
793*5d9d9091SRichard Lowe	movl	%eax, %esi	/ y, yt
794*5d9d9091SRichard Lowe	movl	%edx, %edi	/ y, yt
795*5d9d9091SRichard Lowe	jns	.LL90
796*5d9d9091SRichard Lowe	.align	16
797*5d9d9091SRichard Lowe.LL93:
798*5d9d9091SRichard Lowe	negl	%esi		/ yt
799*5d9d9091SRichard Lowe	adcl	$0, %edi	/, yt
800*5d9d9091SRichard Lowe	negl	%edi		/ yt
801*5d9d9091SRichard Lowe	jmp	.LL90
802*5d9d9091SRichard Lowe	.align	16
803*5d9d9091SRichard Lowe.LL94:
804*5d9d9091SRichard Lowe	negl	%eax		/ rem
805*5d9d9091SRichard Lowe	adcl	$0, %edx	/, rem
806*5d9d9091SRichard Lowe	addl	$20, %esp
807*5d9d9091SRichard Lowe	popl	%esi
808*5d9d9091SRichard Lowe	negl	%edx		/ rem
809*5d9d9091SRichard Lowe	popl	%edi
810*5d9d9091SRichard Lowe	ret	$16
811*5d9d9091SRichard Lowe	SET_SIZE(__rem64)
812*5d9d9091SRichard Lowe
813*5d9d9091SRichard Lowe/*
814*5d9d9091SRichard Lowe * __udivrem64
815*5d9d9091SRichard Lowe *
816*5d9d9091SRichard Lowe * Perform division of two unsigned 64-bit quantities, returning the
817*5d9d9091SRichard Lowe * quotient in %edx:%eax, and the remainder in %ecx:%esi.  __udivrem64
818*5d9d9091SRichard Lowe * pops the arguments on return.
819*5d9d9091SRichard Lowe */
820*5d9d9091SRichard Lowe	ENTRY(__udivrem64)
821*5d9d9091SRichard Lowe	subl	$12, %esp
822*5d9d9091SRichard Lowe	movl	%esp, %ecx	/, tmp64
823*5d9d9091SRichard Lowe	movl	16(%esp), %eax	/ x, x
824*5d9d9091SRichard Lowe	movl	20(%esp), %edx	/ x, x
825*5d9d9091SRichard Lowe	pushl	%ecx		/ tmp64
826*5d9d9091SRichard Lowe	pushl	32(%esp)	/ y
827*5d9d9091SRichard Lowe	pushl	32(%esp)
828*5d9d9091SRichard Lowe	call	UDivRem
829*5d9d9091SRichard Lowe	movl	16(%esp), %ecx	/ rem, tmp63
830*5d9d9091SRichard Lowe	movl	12(%esp), %esi	/ rem
831*5d9d9091SRichard Lowe	addl	$24, %esp
832*5d9d9091SRichard Lowe	ret	$16
833*5d9d9091SRichard Lowe	SET_SIZE(__udivrem64)
834*5d9d9091SRichard Lowe
835*5d9d9091SRichard Lowe/*
836*5d9d9091SRichard Lowe * Signed division with remainder.
837*5d9d9091SRichard Lowe */
838*5d9d9091SRichard Lowe/ int64_t
839*5d9d9091SRichard Lowe/ SDivRem(int64_t x, int64_t y, int64_t * pmod)
840*5d9d9091SRichard Lowe/ {
841*5d9d9091SRichard Lowe/ 	int		negative;
842*5d9d9091SRichard Lowe/ 	uint64_t	xt, yt, r, rem;
843*5d9d9091SRichard Lowe/
844*5d9d9091SRichard Lowe/ 	if (x < 0) {
845*5d9d9091SRichard Lowe/ 		xt = -(uint64_t) x;
846*5d9d9091SRichard Lowe/ 		negative = 1;
847*5d9d9091SRichard Lowe/ 	} else {
848*5d9d9091SRichard Lowe/ 		xt = x;
849*5d9d9091SRichard Lowe/ 		negative = 0;
850*5d9d9091SRichard Lowe/ 	}
851*5d9d9091SRichard Lowe/ 	if (y < 0) {
852*5d9d9091SRichard Lowe/ 		yt = -(uint64_t) y;
853*5d9d9091SRichard Lowe/ 		negative ^= 1;
854*5d9d9091SRichard Lowe/ 	} else {
855*5d9d9091SRichard Lowe/ 		yt = y;
856*5d9d9091SRichard Lowe/ 	}
857*5d9d9091SRichard Lowe/ 	r = UDivRem(xt, yt, &rem);
858*5d9d9091SRichard Lowe/ 	*pmod = (x < 0 ? (int64_t) - rem : rem);
859*5d9d9091SRichard Lowe/ 	return (negative ? (int64_t) - r : r);
860*5d9d9091SRichard Lowe/ }
861*5d9d9091SRichard Lowe	ENTRY(SDivRem)
862*5d9d9091SRichard Lowe	pushl	%ebp
863*5d9d9091SRichard Lowe	pushl	%edi
864*5d9d9091SRichard Lowe	pushl	%esi
865*5d9d9091SRichard Lowe	subl	$24, %esp
866*5d9d9091SRichard Lowe	testl	%edx, %edx	/ x
867*5d9d9091SRichard Lowe	movl	%edx, %edi	/ x, x
868*5d9d9091SRichard Lowe	js	.LL73
869*5d9d9091SRichard Lowe	movl	44(%esp), %esi	/ y,
870*5d9d9091SRichard Lowe	xorl	%ebp, %ebp	/ negative
871*5d9d9091SRichard Lowe	testl	%esi, %esi
872*5d9d9091SRichard Lowe	movl	%edx, 12(%esp)	/ x, xt
873*5d9d9091SRichard Lowe	movl	%eax, 8(%esp)	/ x, xt
874*5d9d9091SRichard Lowe	movl	40(%esp), %edx	/ y, yt
875*5d9d9091SRichard Lowe	movl	44(%esp), %ecx	/ y, yt
876*5d9d9091SRichard Lowe	js	.LL74
877*5d9d9091SRichard Lowe.LL70:
878*5d9d9091SRichard Lowe	leal	16(%esp), %eax	/, tmp70
879*5d9d9091SRichard Lowe	pushl	%eax		/ tmp70
880*5d9d9091SRichard Lowe	pushl	%ecx		/ yt
881*5d9d9091SRichard Lowe	pushl	%edx		/ yt
882*5d9d9091SRichard Lowe	movl	20(%esp), %eax	/ xt, xt
883*5d9d9091SRichard Lowe	movl	24(%esp), %edx	/ xt, xt
884*5d9d9091SRichard Lowe	call	UDivRem
885*5d9d9091SRichard Lowe	movl	%edx, 16(%esp)	/, r
886*5d9d9091SRichard Lowe	movl	%eax, 12(%esp)	/, r
887*5d9d9091SRichard Lowe	addl	$12, %esp
888*5d9d9091SRichard Lowe	testl	%edi, %edi	/ x
889*5d9d9091SRichard Lowe	movl	16(%esp), %edx	/ rem, rem
890*5d9d9091SRichard Lowe	movl	20(%esp), %ecx	/ rem, rem
891*5d9d9091SRichard Lowe	js	.LL75
892*5d9d9091SRichard Lowe.LL71:
893*5d9d9091SRichard Lowe	movl	48(%esp), %edi	/ pmod, pmod
894*5d9d9091SRichard Lowe	testl	%ebp, %ebp	/ negative
895*5d9d9091SRichard Lowe	movl	%edx, (%edi)	/ rem,* pmod
896*5d9d9091SRichard Lowe	movl	%ecx, 4(%edi)	/ rem,
897*5d9d9091SRichard Lowe	movl	(%esp), %eax	/ r, r
898*5d9d9091SRichard Lowe	movl	4(%esp), %edx	/ r, r
899*5d9d9091SRichard Lowe	je	.LL72
900*5d9d9091SRichard Lowe	negl	%eax		/ r
901*5d9d9091SRichard Lowe	adcl	$0, %edx	/, r
902*5d9d9091SRichard Lowe	negl	%edx		/ r
903*5d9d9091SRichard Lowe.LL72:
904*5d9d9091SRichard Lowe	addl	$24, %esp
905*5d9d9091SRichard Lowe	popl	%esi
906*5d9d9091SRichard Lowe	popl	%edi
907*5d9d9091SRichard Lowe	popl	%ebp
908*5d9d9091SRichard Lowe	ret
909*5d9d9091SRichard Lowe	.align	16
910*5d9d9091SRichard Lowe.LL73:
911*5d9d9091SRichard Lowe	negl	%eax
912*5d9d9091SRichard Lowe	adcl	$0, %edx
913*5d9d9091SRichard Lowe	movl	44(%esp), %esi	/ y,
914*5d9d9091SRichard Lowe	negl	%edx
915*5d9d9091SRichard Lowe	testl	%esi, %esi
916*5d9d9091SRichard Lowe	movl	%edx, 12(%esp)	/, xt
917*5d9d9091SRichard Lowe	movl	%eax, 8(%esp)	/, xt
918*5d9d9091SRichard Lowe	movl	$1, %ebp	/, negative
919*5d9d9091SRichard Lowe	movl	40(%esp), %edx	/ y, yt
920*5d9d9091SRichard Lowe	movl	44(%esp), %ecx	/ y, yt
921*5d9d9091SRichard Lowe	jns	.LL70
922*5d9d9091SRichard Lowe	.align	16
923*5d9d9091SRichard Lowe.LL74:
924*5d9d9091SRichard Lowe	negl	%edx		/ yt
925*5d9d9091SRichard Lowe	adcl	$0, %ecx	/, yt
926*5d9d9091SRichard Lowe	negl	%ecx		/ yt
927*5d9d9091SRichard Lowe	xorl	$1, %ebp	/, negative
928*5d9d9091SRichard Lowe	jmp	.LL70
929*5d9d9091SRichard Lowe	.align	16
930*5d9d9091SRichard Lowe.LL75:
931*5d9d9091SRichard Lowe	negl	%edx		/ rem
932*5d9d9091SRichard Lowe	adcl	$0, %ecx	/, rem
933*5d9d9091SRichard Lowe	negl	%ecx		/ rem
934*5d9d9091SRichard Lowe	jmp	.LL71
935*5d9d9091SRichard Lowe	SET_SIZE(SDivRem)
936*5d9d9091SRichard Lowe
937*5d9d9091SRichard Lowe/*
938*5d9d9091SRichard Lowe * __divrem64
939*5d9d9091SRichard Lowe *
940*5d9d9091SRichard Lowe * Perform division of two signed 64-bit quantities, returning the
941*5d9d9091SRichard Lowe * quotient in %edx:%eax, and the remainder in %ecx:%esi.  __divrem64
942*5d9d9091SRichard Lowe * pops the arguments on return.
943*5d9d9091SRichard Lowe */
944*5d9d9091SRichard Lowe	ENTRY(__divrem64)
945*5d9d9091SRichard Lowe	subl	$20, %esp
946*5d9d9091SRichard Lowe	movl	%esp, %ecx	/, tmp64
947*5d9d9091SRichard Lowe	movl	24(%esp), %eax	/ x, x
948*5d9d9091SRichard Lowe	movl	28(%esp), %edx	/ x, x
949*5d9d9091SRichard Lowe	pushl	%ecx		/ tmp64
950*5d9d9091SRichard Lowe	pushl	40(%esp)	/ y
951*5d9d9091SRichard Lowe	pushl	40(%esp)
952*5d9d9091SRichard Lowe	call	SDivRem
953*5d9d9091SRichard Lowe	movl	16(%esp), %ecx
954*5d9d9091SRichard Lowe	movl	12(%esp),%esi	/ rem
955*5d9d9091SRichard Lowe	addl	$32, %esp
956*5d9d9091SRichard Lowe	ret	$16
957*5d9d9091SRichard Lowe	SET_SIZE(__divrem64)
958