xref: /titanic_51/usr/src/common/util/i386/muldiv.s (revision 89518a1cfe5021ecf5ad8d04c40f53cf947e95d9)
1*89518a1cSdmick/*
2*89518a1cSdmick * CDDL HEADER START
3*89518a1cSdmick *
4*89518a1cSdmick * The contents of this file are subject to the terms of the
5*89518a1cSdmick * Common Development and Distribution License, Version 1.0 only
6*89518a1cSdmick * (the "License").  You may not use this file except in compliance
7*89518a1cSdmick * with the License.
8*89518a1cSdmick *
9*89518a1cSdmick * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*89518a1cSdmick * or http://www.opensolaris.org/os/licensing.
11*89518a1cSdmick * See the License for the specific language governing permissions
12*89518a1cSdmick * and limitations under the License.
13*89518a1cSdmick *
14*89518a1cSdmick * When distributing Covered Code, include this CDDL HEADER in each
15*89518a1cSdmick * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*89518a1cSdmick * If applicable, add the following below this CDDL HEADER, with the
17*89518a1cSdmick * fields enclosed by brackets "[]" replaced with your own identifying
18*89518a1cSdmick * information: Portions Copyright [yyyy] [name of copyright owner]
19*89518a1cSdmick *
20*89518a1cSdmick * CDDL HEADER END
21*89518a1cSdmick */
22*89518a1cSdmick/*
23*89518a1cSdmick * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*89518a1cSdmick * Use is subject to license terms.
25*89518a1cSdmick */
26*89518a1cSdmick
27*89518a1cSdmick#if !defined(lint)
28*89518a1cSdmick	.ident	"%Z%%M%	%I%	%E% SMI"
29*89518a1cSdmick
30*89518a1cSdmick	.file	"muldiv.s"
31*89518a1cSdmick#endif
32*89518a1cSdmick
33*89518a1cSdmick#if defined(__i386) && !defined(__amd64)
34*89518a1cSdmick
35*89518a1cSdmick/*
36*89518a1cSdmick * Helper routines for 32-bit compilers to perform 64-bit math.
37*89518a1cSdmick * These are used both by the Sun and GCC compilers.
38*89518a1cSdmick */
39*89518a1cSdmick
40*89518a1cSdmick#include <sys/asm_linkage.h>
41*89518a1cSdmick#include <sys/asm_misc.h>
42*89518a1cSdmick
43*89518a1cSdmick
44*89518a1cSdmick#if defined(__lint)
45*89518a1cSdmick#include <sys/types.h>
46*89518a1cSdmick
47*89518a1cSdmick/* ARGSUSED */
48*89518a1cSdmickint64_t
49*89518a1cSdmick__mul64(int64_t a, int64_t b)
50*89518a1cSdmick{
51*89518a1cSdmick	return (0);
52*89518a1cSdmick}
53*89518a1cSdmick
54*89518a1cSdmick#else   /* __lint */
55*89518a1cSdmick
56*89518a1cSdmick/
57*89518a1cSdmick/   function __mul64(A,B:Longint):Longint;
58*89518a1cSdmick/	{Overflow is not checked}
59*89518a1cSdmick/
60*89518a1cSdmick/ We essentially do multiply by longhand, using base 2**32 digits.
61*89518a1cSdmick/               a       b	parameter A
62*89518a1cSdmick/	     x 	c       d	parameter B
63*89518a1cSdmick/		---------
64*89518a1cSdmick/               ad      bd
65*89518a1cSdmick/       ac	bc
66*89518a1cSdmick/       -----------------
67*89518a1cSdmick/       ac	ad+bc	bd
68*89518a1cSdmick/
69*89518a1cSdmick/       We can ignore ac and top 32 bits of ad+bc: if <> 0, overflow happened.
70*89518a1cSdmick/
71*89518a1cSdmick	ENTRY(__mul64)
72*89518a1cSdmick	push	%ebp
73*89518a1cSdmick	mov    	%esp,%ebp
74*89518a1cSdmick	pushl	%esi
75*89518a1cSdmick	mov	12(%ebp),%eax	/ A.hi (a)
76*89518a1cSdmick	mull	16(%ebp)	/ Multiply A.hi by B.lo (produces ad)
77*89518a1cSdmick	xchg	%ecx,%eax	/ ecx = bottom half of ad.
78*89518a1cSdmick	movl    8(%ebp),%eax	/ A.Lo (b)
79*89518a1cSdmick	movl	%eax,%esi	/ Save A.lo for later
80*89518a1cSdmick	mull	16(%ebp)	/ Multiply A.Lo by B.LO (dx:ax = bd.)
81*89518a1cSdmick	addl	%edx,%ecx	/ cx is ad
82*89518a1cSdmick	xchg	%eax,%esi       / esi is bd, eax = A.lo (d)
83*89518a1cSdmick	mull	20(%ebp)	/ Multiply A.lo * B.hi (producing bc)
84*89518a1cSdmick	addl	%ecx,%eax	/ Produce ad+bc
85*89518a1cSdmick	movl	%esi,%edx
86*89518a1cSdmick	xchg	%eax,%edx
87*89518a1cSdmick	popl	%esi
88*89518a1cSdmick	movl	%ebp,%esp
89*89518a1cSdmick	popl	%ebp
90*89518a1cSdmick	ret     $16
91*89518a1cSdmick	SET_SIZE(__mul64)
92*89518a1cSdmick
93*89518a1cSdmick#endif	/* __lint */
94*89518a1cSdmick
95*89518a1cSdmick/*
96*89518a1cSdmick * C support for 64-bit modulo and division.
97*89518a1cSdmick * Hand-customized compiler output - see comments for details.
98*89518a1cSdmick */
99*89518a1cSdmick#if defined(__lint)
100*89518a1cSdmick
101*89518a1cSdmick/* ARGSUSED */
102*89518a1cSdmickuint64_t
103*89518a1cSdmick__udiv64(uint64_t a, uint64_t b)
104*89518a1cSdmick{ return (0); }
105*89518a1cSdmick
106*89518a1cSdmick/* ARGSUSED */
107*89518a1cSdmickuint64_t
108*89518a1cSdmick__urem64(int64_t a, int64_t b)
109*89518a1cSdmick{ return (0); }
110*89518a1cSdmick
111*89518a1cSdmick/* ARGSUSED */
112*89518a1cSdmickint64_t
113*89518a1cSdmick__div64(int64_t a, int64_t b)
114*89518a1cSdmick{ return (0); }
115*89518a1cSdmick
116*89518a1cSdmick/* ARGSUSED */
117*89518a1cSdmickint64_t
118*89518a1cSdmick__rem64(int64_t a, int64_t b)
119*89518a1cSdmick{ return (0); }
120*89518a1cSdmick
121*89518a1cSdmick#else	/* __lint */
122*89518a1cSdmick
123*89518a1cSdmick/ /*
124*89518a1cSdmick/  * Unsigned division with remainder.
125*89518a1cSdmick/  * Divide two uint64_ts, and calculate remainder.
126*89518a1cSdmick/  */
127*89518a1cSdmick/ uint64_t
128*89518a1cSdmick/ UDivRem(uint64_t x, uint64_t y, uint64_t * pmod)
129*89518a1cSdmick/ {
130*89518a1cSdmick/ 	/* simple cases: y is a single uint32_t */
131*89518a1cSdmick/ 	if (HI(y) == 0) {
132*89518a1cSdmick/ 		uint32_t	div_hi, div_rem;
133*89518a1cSdmick/ 		uint32_t 	q0, q1;
134*89518a1cSdmick/
135*89518a1cSdmick/ 		/* calculate q1 */
136*89518a1cSdmick/ 		if (HI(x) < LO(y)) {
137*89518a1cSdmick/ 			/* result is a single uint32_t, use one division */
138*89518a1cSdmick/ 			q1 = 0;
139*89518a1cSdmick/ 			div_hi = HI(x);
140*89518a1cSdmick/ 		} else {
141*89518a1cSdmick/ 			/* result is a double uint32_t, use two divisions */
142*89518a1cSdmick/ 			A_DIV32(HI(x), 0, LO(y), q1, div_hi);
143*89518a1cSdmick/ 		}
144*89518a1cSdmick/
145*89518a1cSdmick/ 		/* calculate q0 and remainder */
146*89518a1cSdmick/ 		A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
147*89518a1cSdmick/
148*89518a1cSdmick/ 		/* return remainder */
149*89518a1cSdmick/ 		*pmod = div_rem;
150*89518a1cSdmick/
151*89518a1cSdmick/ 		/* return result */
152*89518a1cSdmick/ 		return (HILO(q1, q0));
153*89518a1cSdmick/
154*89518a1cSdmick/ 	} else if (HI(x) < HI(y)) {
155*89518a1cSdmick/ 		/* HI(x) < HI(y) => x < y => result is 0 */
156*89518a1cSdmick/
157*89518a1cSdmick/ 		/* return remainder */
158*89518a1cSdmick/ 		*pmod = x;
159*89518a1cSdmick/
160*89518a1cSdmick/ 		/* return result */
161*89518a1cSdmick/ 		return (0);
162*89518a1cSdmick/
163*89518a1cSdmick/ 	} else {
164*89518a1cSdmick/ 		/*
165*89518a1cSdmick/ 		 * uint64_t by uint64_t division, resulting in a one-uint32_t
166*89518a1cSdmick/ 		 * result
167*89518a1cSdmick/ 		 */
168*89518a1cSdmick/ 		uint32_t		y0, y1;
169*89518a1cSdmick/ 		uint32_t		x1, x0;
170*89518a1cSdmick/ 		uint32_t		q0;
171*89518a1cSdmick/ 		uint32_t		normshift;
172*89518a1cSdmick/
173*89518a1cSdmick/ 		/* normalize by shifting x and y so MSB(y) == 1 */
174*89518a1cSdmick/ 		HIBIT(HI(y), normshift);	/* index of highest 1 bit */
175*89518a1cSdmick/ 		normshift = 31 - normshift;
176*89518a1cSdmick/
177*89518a1cSdmick/ 		if (normshift == 0) {
178*89518a1cSdmick/ 			/* no shifting needed, and x < 2*y so q <= 1 */
179*89518a1cSdmick/ 			y1 = HI(y);
180*89518a1cSdmick/ 			y0 = LO(y);
181*89518a1cSdmick/ 			x1 = HI(x);
182*89518a1cSdmick/ 			x0 = LO(x);
183*89518a1cSdmick/
184*89518a1cSdmick/ 			/* if x >= y then q = 1 (note x1 >= y1) */
185*89518a1cSdmick/ 			if (x1 > y1 || x0 >= y0) {
186*89518a1cSdmick/ 				q0 = 1;
187*89518a1cSdmick/ 				/* subtract y from x to get remainder */
188*89518a1cSdmick/ 				A_SUB2(y0, y1, x0, x1);
189*89518a1cSdmick/ 			} else {
190*89518a1cSdmick/ 				q0 = 0;
191*89518a1cSdmick/ 			}
192*89518a1cSdmick/
193*89518a1cSdmick/ 			/* return remainder */
194*89518a1cSdmick/ 			*pmod = HILO(x1, x0);
195*89518a1cSdmick/
196*89518a1cSdmick/ 			/* return result */
197*89518a1cSdmick/ 			return (q0);
198*89518a1cSdmick/
199*89518a1cSdmick/ 		} else {
200*89518a1cSdmick/ 			/*
201*89518a1cSdmick/ 			 * the last case: result is one uint32_t, but we need to
202*89518a1cSdmick/ 			 * normalize
203*89518a1cSdmick/ 			 */
204*89518a1cSdmick/ 			uint64_t	dt;
205*89518a1cSdmick/ 			uint32_t		t0, t1, x2;
206*89518a1cSdmick/
207*89518a1cSdmick/ 			/* normalize y */
208*89518a1cSdmick/ 			dt = (y << normshift);
209*89518a1cSdmick/ 			y1 = HI(dt);
210*89518a1cSdmick/ 			y0 = LO(dt);
211*89518a1cSdmick/
212*89518a1cSdmick/ 			/* normalize x (we need 3 uint32_ts!!!) */
213*89518a1cSdmick/ 			x2 = (HI(x) >> (32 - normshift));
214*89518a1cSdmick/ 			dt = (x << normshift);
215*89518a1cSdmick/ 			x1 = HI(dt);
216*89518a1cSdmick/ 			x0 = LO(dt);
217*89518a1cSdmick/
218*89518a1cSdmick/ 			/* estimate q0, and reduce x to a two uint32_t value */
219*89518a1cSdmick/ 			A_DIV32(x1, x2, y1, q0, x1);
220*89518a1cSdmick/
221*89518a1cSdmick/ 			/* adjust q0 down if too high */
222*89518a1cSdmick/ 			/*
223*89518a1cSdmick/ 			 * because of the limited range of x2 we can only be
224*89518a1cSdmick/ 			 * one off
225*89518a1cSdmick/ 			 */
226*89518a1cSdmick/ 			A_MUL32(y0, q0, t0, t1);
227*89518a1cSdmick/ 			if (t1 > x1 || (t1 == x1 && t0 > x0)) {
228*89518a1cSdmick/ 				q0--;
229*89518a1cSdmick/ 				A_SUB2(y0, y1, t0, t1);
230*89518a1cSdmick/ 			}
231*89518a1cSdmick/ 			/* return remainder */
232*89518a1cSdmick/ 			/* subtract product from x to get remainder */
233*89518a1cSdmick/ 			A_SUB2(t0, t1, x0, x1);
234*89518a1cSdmick/ 			*pmod = (HILO(x1, x0) >> normshift);
235*89518a1cSdmick/
236*89518a1cSdmick/ 			/* return result */
237*89518a1cSdmick/ 			return (q0);
238*89518a1cSdmick/ 		}
239*89518a1cSdmick/ 	}
240*89518a1cSdmick/ }
241*89518a1cSdmick	ENTRY(UDivRem)
242*89518a1cSdmick	pushl	%ebp
243*89518a1cSdmick	pushl	%edi
244*89518a1cSdmick	pushl	%esi
245*89518a1cSdmick	subl	$48, %esp
246*89518a1cSdmick	movl	68(%esp), %edi	/ y,
247*89518a1cSdmick	testl	%edi, %edi	/ tmp63
248*89518a1cSdmick	movl	%eax, 40(%esp)	/ x, x
249*89518a1cSdmick	movl	%edx, 44(%esp)	/ x, x
250*89518a1cSdmick	movl	%edi, %esi	/, tmp62
251*89518a1cSdmick	movl	%edi, %ecx	/ tmp62, tmp63
252*89518a1cSdmick	jne	.LL2
253*89518a1cSdmick	movl	%edx, %eax	/, tmp68
254*89518a1cSdmick	cmpl	64(%esp), %eax	/ y, tmp68
255*89518a1cSdmick	jae	.LL21
256*89518a1cSdmick.LL4:
257*89518a1cSdmick	movl	72(%esp), %ebp	/ pmod,
258*89518a1cSdmick	xorl	%esi, %esi	/ <result>
259*89518a1cSdmick	movl	40(%esp), %eax	/ x, q0
260*89518a1cSdmick	movl	%ecx, %edi	/ <result>, <result>
261*89518a1cSdmick	divl	64(%esp)	/ y
262*89518a1cSdmick	movl	%edx, (%ebp)	/ div_rem,
263*89518a1cSdmick	xorl	%edx, %edx	/ q0
264*89518a1cSdmick	addl	%eax, %esi	/ q0, <result>
265*89518a1cSdmick	movl	$0, 4(%ebp)
266*89518a1cSdmick	adcl	%edx, %edi	/ q0, <result>
267*89518a1cSdmick	addl	$48, %esp
268*89518a1cSdmick	movl	%esi, %eax	/ <result>, <result>
269*89518a1cSdmick	popl	%esi
270*89518a1cSdmick	movl	%edi, %edx	/ <result>, <result>
271*89518a1cSdmick	popl	%edi
272*89518a1cSdmick	popl	%ebp
273*89518a1cSdmick	ret
274*89518a1cSdmick	.align	16
275*89518a1cSdmick.LL2:
276*89518a1cSdmick	movl	44(%esp), %eax	/ x,
277*89518a1cSdmick	xorl	%edx, %edx
278*89518a1cSdmick	cmpl	%esi, %eax	/ tmp62, tmp5
279*89518a1cSdmick	movl	%eax, 32(%esp)	/ tmp5,
280*89518a1cSdmick	movl	%edx, 36(%esp)
281*89518a1cSdmick	jae	.LL6
282*89518a1cSdmick	movl	72(%esp), %esi	/ pmod,
283*89518a1cSdmick	movl	40(%esp), %ebp	/ x,
284*89518a1cSdmick	movl	44(%esp), %ecx	/ x,
285*89518a1cSdmick	movl	%ebp, (%esi)
286*89518a1cSdmick	movl	%ecx, 4(%esi)
287*89518a1cSdmick	xorl	%edi, %edi	/ <result>
288*89518a1cSdmick	xorl	%esi, %esi	/ <result>
289*89518a1cSdmick.LL22:
290*89518a1cSdmick	addl	$48, %esp
291*89518a1cSdmick	movl	%esi, %eax	/ <result>, <result>
292*89518a1cSdmick	popl	%esi
293*89518a1cSdmick	movl	%edi, %edx	/ <result>, <result>
294*89518a1cSdmick	popl	%edi
295*89518a1cSdmick	popl	%ebp
296*89518a1cSdmick	ret
297*89518a1cSdmick	.align	16
298*89518a1cSdmick.LL21:
299*89518a1cSdmick	movl	%edi, %edx	/ tmp63, div_hi
300*89518a1cSdmick	divl	64(%esp)	/ y
301*89518a1cSdmick	movl	%eax, %ecx	/, q1
302*89518a1cSdmick	jmp	.LL4
303*89518a1cSdmick	.align	16
304*89518a1cSdmick.LL6:
305*89518a1cSdmick	movl	$31, %edi	/, tmp87
306*89518a1cSdmick	bsrl	%esi,%edx	/ tmp62, normshift
307*89518a1cSdmick	subl	%edx, %edi	/ normshift, tmp87
308*89518a1cSdmick	movl	%edi, 28(%esp)	/ tmp87,
309*89518a1cSdmick	jne	.LL8
310*89518a1cSdmick	movl	32(%esp), %edx	/, x1
311*89518a1cSdmick	cmpl	%ecx, %edx	/ y1, x1
312*89518a1cSdmick	movl	64(%esp), %edi	/ y, y0
313*89518a1cSdmick	movl	40(%esp), %esi	/ x, x0
314*89518a1cSdmick	ja	.LL10
315*89518a1cSdmick	xorl	%ebp, %ebp	/ q0
316*89518a1cSdmick	cmpl	%edi, %esi	/ y0, x0
317*89518a1cSdmick	jb	.LL11
318*89518a1cSdmick.LL10:
319*89518a1cSdmick	movl	$1, %ebp	/, q0
320*89518a1cSdmick	subl	%edi,%esi	/ y0, x0
321*89518a1cSdmick	sbbl	%ecx,%edx	/ tmp63, x1
322*89518a1cSdmick.LL11:
323*89518a1cSdmick	movl	%edx, %ecx	/ x1, x1
324*89518a1cSdmick	xorl	%edx, %edx	/ x1
325*89518a1cSdmick	xorl	%edi, %edi	/ x0
326*89518a1cSdmick	addl	%esi, %edx	/ x0, x1
327*89518a1cSdmick	adcl	%edi, %ecx	/ x0, x1
328*89518a1cSdmick	movl	72(%esp), %esi	/ pmod,
329*89518a1cSdmick	movl	%edx, (%esi)	/ x1,
330*89518a1cSdmick	movl	%ecx, 4(%esi)	/ x1,
331*89518a1cSdmick	xorl	%edi, %edi	/ <result>
332*89518a1cSdmick	movl	%ebp, %esi	/ q0, <result>
333*89518a1cSdmick	jmp	.LL22
334*89518a1cSdmick	.align	16
335*89518a1cSdmick.LL8:
336*89518a1cSdmick	movb	28(%esp), %cl
337*89518a1cSdmick	movl	64(%esp), %esi	/ y, dt
338*89518a1cSdmick	movl	68(%esp), %edi	/ y, dt
339*89518a1cSdmick	shldl	%esi, %edi	/, dt, dt
340*89518a1cSdmick	sall	%cl, %esi	/, dt
341*89518a1cSdmick	andl	$32, %ecx
342*89518a1cSdmick	jne	.LL23
343*89518a1cSdmick.LL17:
344*89518a1cSdmick	movl	$32, %ecx	/, tmp102
345*89518a1cSdmick	subl	28(%esp), %ecx	/, tmp102
346*89518a1cSdmick	movl	%esi, %ebp	/ dt, y0
347*89518a1cSdmick	movl	32(%esp), %esi
348*89518a1cSdmick	shrl	%cl, %esi	/ tmp102,
349*89518a1cSdmick	movl	%edi, 24(%esp)	/ tmp99,
350*89518a1cSdmick	movb	28(%esp), %cl
351*89518a1cSdmick	movl	%esi, 12(%esp)	/, x2
352*89518a1cSdmick	movl	44(%esp), %edi	/ x, dt
353*89518a1cSdmick	movl	40(%esp), %esi	/ x, dt
354*89518a1cSdmick	shldl	%esi, %edi	/, dt, dt
355*89518a1cSdmick	sall	%cl, %esi	/, dt
356*89518a1cSdmick	andl	$32, %ecx
357*89518a1cSdmick	je	.LL18
358*89518a1cSdmick	movl	%esi, %edi	/ dt, dt
359*89518a1cSdmick	xorl	%esi, %esi	/ dt
360*89518a1cSdmick.LL18:
361*89518a1cSdmick	movl	%edi, %ecx	/ dt,
362*89518a1cSdmick	movl	%edi, %eax	/ tmp2,
363*89518a1cSdmick	movl	%ecx, (%esp)
364*89518a1cSdmick	movl	12(%esp), %edx	/ x2,
365*89518a1cSdmick	divl	24(%esp)
366*89518a1cSdmick	movl	%edx, %ecx	/, x1
367*89518a1cSdmick	xorl	%edi, %edi
368*89518a1cSdmick	movl	%eax, 20(%esp)
369*89518a1cSdmick	movl	%ebp, %eax	/ y0, t0
370*89518a1cSdmick	mull	20(%esp)
371*89518a1cSdmick	cmpl	%ecx, %edx	/ x1, t1
372*89518a1cSdmick	movl	%edi, 4(%esp)
373*89518a1cSdmick	ja	.LL14
374*89518a1cSdmick	je	.LL24
375*89518a1cSdmick.LL15:
376*89518a1cSdmick	movl	%ecx, %edi	/ x1,
377*89518a1cSdmick	subl	%eax,%esi	/ t0, x0
378*89518a1cSdmick	sbbl	%edx,%edi	/ t1,
379*89518a1cSdmick	movl	%edi, %eax	/, x1
380*89518a1cSdmick	movl	%eax, %edx	/ x1, x1
381*89518a1cSdmick	xorl	%eax, %eax	/ x1
382*89518a1cSdmick	xorl	%ebp, %ebp	/ x0
383*89518a1cSdmick	addl	%esi, %eax	/ x0, x1
384*89518a1cSdmick	adcl	%ebp, %edx	/ x0, x1
385*89518a1cSdmick	movb	28(%esp), %cl
386*89518a1cSdmick	shrdl	%edx, %eax	/, x1, x1
387*89518a1cSdmick	shrl	%cl, %edx	/, x1
388*89518a1cSdmick	andl	$32, %ecx
389*89518a1cSdmick	je	.LL16
390*89518a1cSdmick	movl	%edx, %eax	/ x1, x1
391*89518a1cSdmick	xorl	%edx, %edx	/ x1
392*89518a1cSdmick.LL16:
393*89518a1cSdmick	movl	72(%esp), %ecx	/ pmod,
394*89518a1cSdmick	movl	20(%esp), %esi	/, <result>
395*89518a1cSdmick	xorl	%edi, %edi	/ <result>
396*89518a1cSdmick	movl	%eax, (%ecx)	/ x1,
397*89518a1cSdmick	movl	%edx, 4(%ecx)	/ x1,
398*89518a1cSdmick	jmp	.LL22
399*89518a1cSdmick	.align	16
400*89518a1cSdmick.LL24:
401*89518a1cSdmick	cmpl	%esi, %eax	/ x0, t0
402*89518a1cSdmick	jbe	.LL15
403*89518a1cSdmick.LL14:
404*89518a1cSdmick	decl	20(%esp)
405*89518a1cSdmick	subl	%ebp,%eax	/ y0, t0
406*89518a1cSdmick	sbbl	24(%esp),%edx	/, t1
407*89518a1cSdmick	jmp	.LL15
408*89518a1cSdmick.LL23:
409*89518a1cSdmick	movl	%esi, %edi	/ dt, dt
410*89518a1cSdmick	xorl	%esi, %esi	/ dt
411*89518a1cSdmick	jmp	.LL17
412*89518a1cSdmick	SET_SIZE(UDivRem)
413*89518a1cSdmick
414*89518a1cSdmick/*
415*89518a1cSdmick * Unsigned division without remainder.
416*89518a1cSdmick */
417*89518a1cSdmick/ uint64_t
418*89518a1cSdmick/ UDiv(uint64_t x, uint64_t y)
419*89518a1cSdmick/ {
420*89518a1cSdmick/ 	if (HI(y) == 0) {
421*89518a1cSdmick/ 		/* simple cases: y is a single uint32_t */
422*89518a1cSdmick/ 		uint32_t	div_hi, div_rem;
423*89518a1cSdmick/ 		uint32_t	q0, q1;
424*89518a1cSdmick/
425*89518a1cSdmick/ 		/* calculate q1 */
426*89518a1cSdmick/ 		if (HI(x) < LO(y)) {
427*89518a1cSdmick/ 			/* result is a single uint32_t, use one division */
428*89518a1cSdmick/ 			q1 = 0;
429*89518a1cSdmick/ 			div_hi = HI(x);
430*89518a1cSdmick/ 		} else {
431*89518a1cSdmick/ 			/* result is a double uint32_t, use two divisions */
432*89518a1cSdmick/ 			A_DIV32(HI(x), 0, LO(y), q1, div_hi);
433*89518a1cSdmick/ 		}
434*89518a1cSdmick/
435*89518a1cSdmick/ 		/* calculate q0 and remainder */
436*89518a1cSdmick/ 		A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
437*89518a1cSdmick/
438*89518a1cSdmick/ 		/* return result */
439*89518a1cSdmick/ 		return (HILO(q1, q0));
440*89518a1cSdmick/
441*89518a1cSdmick/ 	} else if (HI(x) < HI(y)) {
442*89518a1cSdmick/ 		/* HI(x) < HI(y) => x < y => result is 0 */
443*89518a1cSdmick/
444*89518a1cSdmick/ 		/* return result */
445*89518a1cSdmick/ 		return (0);
446*89518a1cSdmick/
447*89518a1cSdmick/ 	} else {
448*89518a1cSdmick/ 		/*
449*89518a1cSdmick/ 		 * uint64_t by uint64_t division, resulting in a one-uint32_t
450*89518a1cSdmick/ 		 * result
451*89518a1cSdmick/ 		 */
452*89518a1cSdmick/ 		uint32_t		y0, y1;
453*89518a1cSdmick/ 		uint32_t		x1, x0;
454*89518a1cSdmick/ 		uint32_t		q0;
455*89518a1cSdmick/ 		unsigned		normshift;
456*89518a1cSdmick/
457*89518a1cSdmick/ 		/* normalize by shifting x and y so MSB(y) == 1 */
458*89518a1cSdmick/ 		HIBIT(HI(y), normshift);	/* index of highest 1 bit */
459*89518a1cSdmick/ 		normshift = 31 - normshift;
460*89518a1cSdmick/
461*89518a1cSdmick/ 		if (normshift == 0) {
462*89518a1cSdmick/ 			/* no shifting needed, and x < 2*y so q <= 1 */
463*89518a1cSdmick/ 			y1 = HI(y);
464*89518a1cSdmick/ 			y0 = LO(y);
465*89518a1cSdmick/ 			x1 = HI(x);
466*89518a1cSdmick/ 			x0 = LO(x);
467*89518a1cSdmick/
468*89518a1cSdmick/ 			/* if x >= y then q = 1 (note x1 >= y1) */
469*89518a1cSdmick/ 			if (x1 > y1 || x0 >= y0) {
470*89518a1cSdmick/ 				q0 = 1;
471*89518a1cSdmick/ 				/* subtract y from x to get remainder */
472*89518a1cSdmick/ 				/* A_SUB2(y0, y1, x0, x1); */
473*89518a1cSdmick/ 			} else {
474*89518a1cSdmick/ 				q0 = 0;
475*89518a1cSdmick/ 			}
476*89518a1cSdmick/
477*89518a1cSdmick/ 			/* return result */
478*89518a1cSdmick/ 			return (q0);
479*89518a1cSdmick/
480*89518a1cSdmick/ 		} else {
481*89518a1cSdmick/ 			/*
482*89518a1cSdmick/ 			 * the last case: result is one uint32_t, but we need to
483*89518a1cSdmick/ 			 * normalize
484*89518a1cSdmick/ 			 */
485*89518a1cSdmick/ 			uint64_t	dt;
486*89518a1cSdmick/ 			uint32_t		t0, t1, x2;
487*89518a1cSdmick/
488*89518a1cSdmick/ 			/* normalize y */
489*89518a1cSdmick/ 			dt = (y << normshift);
490*89518a1cSdmick/ 			y1 = HI(dt);
491*89518a1cSdmick/ 			y0 = LO(dt);
492*89518a1cSdmick/
493*89518a1cSdmick/ 			/* normalize x (we need 3 uint32_ts!!!) */
494*89518a1cSdmick/ 			x2 = (HI(x) >> (32 - normshift));
495*89518a1cSdmick/ 			dt = (x << normshift);
496*89518a1cSdmick/ 			x1 = HI(dt);
497*89518a1cSdmick/ 			x0 = LO(dt);
498*89518a1cSdmick/
499*89518a1cSdmick/ 			/* estimate q0, and reduce x to a two uint32_t value */
500*89518a1cSdmick/ 			A_DIV32(x1, x2, y1, q0, x1);
501*89518a1cSdmick/
502*89518a1cSdmick/ 			/* adjust q0 down if too high */
503*89518a1cSdmick/ 			/*
504*89518a1cSdmick/ 			 * because of the limited range of x2 we can only be
505*89518a1cSdmick/ 			 * one off
506*89518a1cSdmick/ 			 */
507*89518a1cSdmick/ 			A_MUL32(y0, q0, t0, t1);
508*89518a1cSdmick/ 			if (t1 > x1 || (t1 == x1 && t0 > x0)) {
509*89518a1cSdmick/ 				q0--;
510*89518a1cSdmick/ 			}
511*89518a1cSdmick/ 			/* return result */
512*89518a1cSdmick/ 			return (q0);
513*89518a1cSdmick/ 		}
514*89518a1cSdmick/ 	}
515*89518a1cSdmick/ }
516*89518a1cSdmick	ENTRY(UDiv)
517*89518a1cSdmick	pushl	%ebp
518*89518a1cSdmick	pushl	%edi
519*89518a1cSdmick	pushl	%esi
520*89518a1cSdmick	subl	$40, %esp
521*89518a1cSdmick	movl	%edx, 36(%esp)	/ x, x
522*89518a1cSdmick	movl	60(%esp), %edx	/ y,
523*89518a1cSdmick	testl	%edx, %edx	/ tmp62
524*89518a1cSdmick	movl	%eax, 32(%esp)	/ x, x
525*89518a1cSdmick	movl	%edx, %ecx	/ tmp61, tmp62
526*89518a1cSdmick	movl	%edx, %eax	/, tmp61
527*89518a1cSdmick	jne	.LL26
528*89518a1cSdmick	movl	36(%esp), %esi	/ x,
529*89518a1cSdmick	cmpl	56(%esp), %esi	/ y, tmp67
530*89518a1cSdmick	movl	%esi, %eax	/, tmp67
531*89518a1cSdmick	movl	%esi, %edx	/ tmp67, div_hi
532*89518a1cSdmick	jb	.LL28
533*89518a1cSdmick	movl	%ecx, %edx	/ tmp62, div_hi
534*89518a1cSdmick	divl	56(%esp)	/ y
535*89518a1cSdmick	movl	%eax, %ecx	/, q1
536*89518a1cSdmick.LL28:
537*89518a1cSdmick	xorl	%esi, %esi	/ <result>
538*89518a1cSdmick	movl	%ecx, %edi	/ <result>, <result>
539*89518a1cSdmick	movl	32(%esp), %eax	/ x, q0
540*89518a1cSdmick	xorl	%ecx, %ecx	/ q0
541*89518a1cSdmick	divl	56(%esp)	/ y
542*89518a1cSdmick	addl	%eax, %esi	/ q0, <result>
543*89518a1cSdmick	adcl	%ecx, %edi	/ q0, <result>
544*89518a1cSdmick.LL25:
545*89518a1cSdmick	addl	$40, %esp
546*89518a1cSdmick	movl	%esi, %eax	/ <result>, <result>
547*89518a1cSdmick	popl	%esi
548*89518a1cSdmick	movl	%edi, %edx	/ <result>, <result>
549*89518a1cSdmick	popl	%edi
550*89518a1cSdmick	popl	%ebp
551*89518a1cSdmick	ret
552*89518a1cSdmick	.align	16
553*89518a1cSdmick.LL26:
554*89518a1cSdmick	movl	36(%esp), %esi	/ x,
555*89518a1cSdmick	xorl	%edi, %edi
556*89518a1cSdmick	movl	%esi, 24(%esp)	/ tmp1,
557*89518a1cSdmick	movl	%edi, 28(%esp)
558*89518a1cSdmick	xorl	%esi, %esi	/ <result>
559*89518a1cSdmick	xorl	%edi, %edi	/ <result>
560*89518a1cSdmick	cmpl	%eax, 24(%esp)	/ tmp61,
561*89518a1cSdmick	jb	.LL25
562*89518a1cSdmick	bsrl	%eax,%ebp	/ tmp61, normshift
563*89518a1cSdmick	movl	$31, %eax	/, tmp85
564*89518a1cSdmick	subl	%ebp, %eax	/ normshift, normshift
565*89518a1cSdmick	jne	.LL32
566*89518a1cSdmick	movl	24(%esp), %eax	/, x1
567*89518a1cSdmick	cmpl	%ecx, %eax	/ tmp62, x1
568*89518a1cSdmick	movl	56(%esp), %esi	/ y, y0
569*89518a1cSdmick	movl	32(%esp), %edx	/ x, x0
570*89518a1cSdmick	ja	.LL34
571*89518a1cSdmick	xorl	%eax, %eax	/ q0
572*89518a1cSdmick	cmpl	%esi, %edx	/ y0, x0
573*89518a1cSdmick	jb	.LL35
574*89518a1cSdmick.LL34:
575*89518a1cSdmick	movl	$1, %eax	/, q0
576*89518a1cSdmick.LL35:
577*89518a1cSdmick	movl	%eax, %esi	/ q0, <result>
578*89518a1cSdmick	xorl	%edi, %edi	/ <result>
579*89518a1cSdmick.LL45:
580*89518a1cSdmick	addl	$40, %esp
581*89518a1cSdmick	movl	%esi, %eax	/ <result>, <result>
582*89518a1cSdmick	popl	%esi
583*89518a1cSdmick	movl	%edi, %edx	/ <result>, <result>
584*89518a1cSdmick	popl	%edi
585*89518a1cSdmick	popl	%ebp
586*89518a1cSdmick	ret
587*89518a1cSdmick	.align	16
588*89518a1cSdmick.LL32:
589*89518a1cSdmick	movb	%al, %cl
590*89518a1cSdmick	movl	56(%esp), %esi	/ y,
591*89518a1cSdmick	movl	60(%esp), %edi	/ y,
592*89518a1cSdmick	shldl	%esi, %edi
593*89518a1cSdmick	sall	%cl, %esi
594*89518a1cSdmick	andl	$32, %ecx
595*89518a1cSdmick	jne	.LL43
596*89518a1cSdmick.LL40:
597*89518a1cSdmick	movl	$32, %ecx	/, tmp96
598*89518a1cSdmick	subl	%eax, %ecx	/ normshift, tmp96
599*89518a1cSdmick	movl	%edi, %edx
600*89518a1cSdmick	movl	%edi, 20(%esp)	/, dt
601*89518a1cSdmick	movl	24(%esp), %ebp	/, x2
602*89518a1cSdmick	xorl	%edi, %edi
603*89518a1cSdmick	shrl	%cl, %ebp	/ tmp96, x2
604*89518a1cSdmick	movl	%esi, 16(%esp)	/, dt
605*89518a1cSdmick	movb	%al, %cl
606*89518a1cSdmick	movl	32(%esp), %esi	/ x, dt
607*89518a1cSdmick	movl	%edi, 12(%esp)
608*89518a1cSdmick	movl	36(%esp), %edi	/ x, dt
609*89518a1cSdmick	shldl	%esi, %edi	/, dt, dt
610*89518a1cSdmick	sall	%cl, %esi	/, dt
611*89518a1cSdmick	andl	$32, %ecx
612*89518a1cSdmick	movl	%edx, 8(%esp)
613*89518a1cSdmick	je	.LL41
614*89518a1cSdmick	movl	%esi, %edi	/ dt, dt
615*89518a1cSdmick	xorl	%esi, %esi	/ dt
616*89518a1cSdmick.LL41:
617*89518a1cSdmick	xorl	%ecx, %ecx
618*89518a1cSdmick	movl	%edi, %eax	/ tmp1,
619*89518a1cSdmick	movl	%ebp, %edx	/ x2,
620*89518a1cSdmick	divl	8(%esp)
621*89518a1cSdmick	movl	%edx, %ebp	/, x1
622*89518a1cSdmick	movl	%ecx, 4(%esp)
623*89518a1cSdmick	movl	%eax, %ecx	/, q0
624*89518a1cSdmick	movl	16(%esp), %eax	/ dt,
625*89518a1cSdmick	mull	%ecx	/ q0
626*89518a1cSdmick	cmpl	%ebp, %edx	/ x1, t1
627*89518a1cSdmick	movl	%edi, (%esp)
628*89518a1cSdmick	movl	%esi, %edi	/ dt, x0
629*89518a1cSdmick	ja	.LL38
630*89518a1cSdmick	je	.LL44
631*89518a1cSdmick.LL39:
632*89518a1cSdmick	movl	%ecx, %esi	/ q0, <result>
633*89518a1cSdmick.LL46:
634*89518a1cSdmick	xorl	%edi, %edi	/ <result>
635*89518a1cSdmick	jmp	.LL45
636*89518a1cSdmick.LL44:
637*89518a1cSdmick	cmpl	%edi, %eax	/ x0, t0
638*89518a1cSdmick	jbe	.LL39
639*89518a1cSdmick.LL38:
640*89518a1cSdmick	decl	%ecx		/ q0
641*89518a1cSdmick	movl	%ecx, %esi	/ q0, <result>
642*89518a1cSdmick	jmp	.LL46
643*89518a1cSdmick.LL43:
644*89518a1cSdmick	movl	%esi, %edi
645*89518a1cSdmick	xorl	%esi, %esi
646*89518a1cSdmick	jmp	.LL40
647*89518a1cSdmick	SET_SIZE(UDiv)
648*89518a1cSdmick
649*89518a1cSdmick/*
650*89518a1cSdmick * __udiv64
651*89518a1cSdmick *
652*89518a1cSdmick * Perform division of two unsigned 64-bit quantities, returning the
653*89518a1cSdmick * quotient in %edx:%eax.  __udiv64 pops the arguments on return,
654*89518a1cSdmick */
655*89518a1cSdmick	ENTRY(__udiv64)
656*89518a1cSdmick	movl	4(%esp), %eax	/ x, x
657*89518a1cSdmick	movl	8(%esp), %edx	/ x, x
658*89518a1cSdmick	pushl	16(%esp)	/ y
659*89518a1cSdmick	pushl	16(%esp)
660*89518a1cSdmick	call	UDiv
661*89518a1cSdmick	addl	$8, %esp
662*89518a1cSdmick	ret     $16
663*89518a1cSdmick	SET_SIZE(__udiv64)
664*89518a1cSdmick
665*89518a1cSdmick/*
666*89518a1cSdmick * __urem64
667*89518a1cSdmick *
668*89518a1cSdmick * Perform division of two unsigned 64-bit quantities, returning the
669*89518a1cSdmick * remainder in %edx:%eax.  __urem64 pops the arguments on return
670*89518a1cSdmick */
671*89518a1cSdmick	ENTRY(__urem64)
672*89518a1cSdmick	subl	$12, %esp
673*89518a1cSdmick	movl	%esp, %ecx	/, tmp65
674*89518a1cSdmick	movl	16(%esp), %eax	/ x, x
675*89518a1cSdmick	movl	20(%esp), %edx	/ x, x
676*89518a1cSdmick	pushl	%ecx		/ tmp65
677*89518a1cSdmick	pushl	32(%esp)	/ y
678*89518a1cSdmick	pushl	32(%esp)
679*89518a1cSdmick	call	UDivRem
680*89518a1cSdmick	movl	12(%esp), %eax	/ rem, rem
681*89518a1cSdmick	movl	16(%esp), %edx	/ rem, rem
682*89518a1cSdmick	addl	$24, %esp
683*89518a1cSdmick	ret	$16
684*89518a1cSdmick	SET_SIZE(__urem64)
685*89518a1cSdmick
686*89518a1cSdmick/*
687*89518a1cSdmick * __div64
688*89518a1cSdmick *
689*89518a1cSdmick * Perform division of two signed 64-bit quantities, returning the
690*89518a1cSdmick * quotient in %edx:%eax.  __div64 pops the arguments on return.
691*89518a1cSdmick */
692*89518a1cSdmick/ int64_t
693*89518a1cSdmick/ __div64(int64_t x, int64_t y)
694*89518a1cSdmick/ {
695*89518a1cSdmick/ 	int		negative;
696*89518a1cSdmick/ 	uint64_t	xt, yt, r;
697*89518a1cSdmick/
698*89518a1cSdmick/ 	if (x < 0) {
699*89518a1cSdmick/ 		xt = -(uint64_t) x;
700*89518a1cSdmick/ 		negative = 1;
701*89518a1cSdmick/ 	} else {
702*89518a1cSdmick/ 		xt = x;
703*89518a1cSdmick/ 		negative = 0;
704*89518a1cSdmick/ 	}
705*89518a1cSdmick/ 	if (y < 0) {
706*89518a1cSdmick/ 		yt = -(uint64_t) y;
707*89518a1cSdmick/ 		negative ^= 1;
708*89518a1cSdmick/ 	} else {
709*89518a1cSdmick/ 		yt = y;
710*89518a1cSdmick/ 	}
711*89518a1cSdmick/ 	r = UDiv(xt, yt);
712*89518a1cSdmick/ 	return (negative ? (int64_t) - r : r);
713*89518a1cSdmick/ }
714*89518a1cSdmick	ENTRY(__div64)
715*89518a1cSdmick	pushl	%ebp
716*89518a1cSdmick	pushl	%edi
717*89518a1cSdmick	pushl	%esi
718*89518a1cSdmick	subl	$8, %esp
719*89518a1cSdmick	movl	28(%esp), %edx	/ x, x
720*89518a1cSdmick	testl	%edx, %edx	/ x
721*89518a1cSdmick	movl	24(%esp), %eax	/ x, x
722*89518a1cSdmick	movl	32(%esp), %esi	/ y, y
723*89518a1cSdmick	movl	36(%esp), %edi	/ y, y
724*89518a1cSdmick	js	.LL84
725*89518a1cSdmick	xorl	%ebp, %ebp	/ negative
726*89518a1cSdmick	testl	%edi, %edi	/ y
727*89518a1cSdmick	movl	%eax, (%esp)	/ x, xt
728*89518a1cSdmick	movl	%edx, 4(%esp)	/ x, xt
729*89518a1cSdmick	movl	%esi, %eax	/ y, yt
730*89518a1cSdmick	movl	%edi, %edx	/ y, yt
731*89518a1cSdmick	js	.LL85
732*89518a1cSdmick.LL82:
733*89518a1cSdmick	pushl	%edx		/ yt
734*89518a1cSdmick	pushl	%eax		/ yt
735*89518a1cSdmick	movl	8(%esp), %eax	/ xt, xt
736*89518a1cSdmick	movl	12(%esp), %edx	/ xt, xt
737*89518a1cSdmick	call	UDiv
738*89518a1cSdmick	popl	%ecx
739*89518a1cSdmick	testl	%ebp, %ebp	/ negative
740*89518a1cSdmick	popl	%esi
741*89518a1cSdmick	je	.LL83
742*89518a1cSdmick	negl	%eax		/ r
743*89518a1cSdmick	adcl	$0, %edx	/, r
744*89518a1cSdmick	negl	%edx		/ r
745*89518a1cSdmick.LL83:
746*89518a1cSdmick	addl	$8, %esp
747*89518a1cSdmick	popl	%esi
748*89518a1cSdmick	popl	%edi
749*89518a1cSdmick	popl	%ebp
750*89518a1cSdmick	ret	$16
751*89518a1cSdmick	.align	16
752*89518a1cSdmick.LL84:
753*89518a1cSdmick	negl	%eax		/ x
754*89518a1cSdmick	adcl	$0, %edx	/, x
755*89518a1cSdmick	negl	%edx		/ x
756*89518a1cSdmick	testl	%edi, %edi	/ y
757*89518a1cSdmick	movl	%eax, (%esp)	/ x, xt
758*89518a1cSdmick	movl	%edx, 4(%esp)	/ x, xt
759*89518a1cSdmick	movl	$1, %ebp	/, negative
760*89518a1cSdmick	movl	%esi, %eax	/ y, yt
761*89518a1cSdmick	movl	%edi, %edx	/ y, yt
762*89518a1cSdmick	jns	.LL82
763*89518a1cSdmick	.align	16
764*89518a1cSdmick.LL85:
765*89518a1cSdmick	negl	%eax		/ yt
766*89518a1cSdmick	adcl	$0, %edx	/, yt
767*89518a1cSdmick	negl	%edx		/ yt
768*89518a1cSdmick	xorl	$1, %ebp	/, negative
769*89518a1cSdmick	jmp	.LL82
770*89518a1cSdmick	SET_SIZE(__div64)
771*89518a1cSdmick
772*89518a1cSdmick/*
773*89518a1cSdmick * __rem64
774*89518a1cSdmick *
775*89518a1cSdmick * Perform division of two signed 64-bit quantities, returning the
776*89518a1cSdmick * remainder in %edx:%eax.  __rem64 pops the arguments on return.
777*89518a1cSdmick */
778*89518a1cSdmick/ int64_t
779*89518a1cSdmick/ __rem64(int64_t x, int64_t y)
780*89518a1cSdmick/ {
781*89518a1cSdmick/ 	uint64_t	xt, yt, rem;
782*89518a1cSdmick/
783*89518a1cSdmick/ 	if (x < 0) {
784*89518a1cSdmick/ 		xt = -(uint64_t) x;
785*89518a1cSdmick/ 	} else {
786*89518a1cSdmick/ 		xt = x;
787*89518a1cSdmick/ 	}
788*89518a1cSdmick/ 	if (y < 0) {
789*89518a1cSdmick/ 		yt = -(uint64_t) y;
790*89518a1cSdmick/ 	} else {
791*89518a1cSdmick/ 		yt = y;
792*89518a1cSdmick/ 	}
793*89518a1cSdmick/ 	(void) UDivRem(xt, yt, &rem);
794*89518a1cSdmick/ 	return (x < 0 ? (int64_t) - rem : rem);
795*89518a1cSdmick/ }
796*89518a1cSdmick	ENTRY(__rem64)
797*89518a1cSdmick	pushl	%edi
798*89518a1cSdmick	pushl	%esi
799*89518a1cSdmick	subl	$20, %esp
800*89518a1cSdmick	movl	36(%esp), %ecx	/ x,
801*89518a1cSdmick	movl	32(%esp), %esi	/ x,
802*89518a1cSdmick	movl	36(%esp), %edi	/ x,
803*89518a1cSdmick	testl	%ecx, %ecx
804*89518a1cSdmick	movl	40(%esp), %eax	/ y, y
805*89518a1cSdmick	movl	44(%esp), %edx	/ y, y
806*89518a1cSdmick	movl	%esi, (%esp)	/, xt
807*89518a1cSdmick	movl	%edi, 4(%esp)	/, xt
808*89518a1cSdmick	js	.LL92
809*89518a1cSdmick	testl	%edx, %edx	/ y
810*89518a1cSdmick	movl	%eax, %esi	/ y, yt
811*89518a1cSdmick	movl	%edx, %edi	/ y, yt
812*89518a1cSdmick	js	.LL93
813*89518a1cSdmick.LL90:
814*89518a1cSdmick	leal	8(%esp), %eax	/, tmp66
815*89518a1cSdmick	pushl	%eax		/ tmp66
816*89518a1cSdmick	pushl	%edi		/ yt
817*89518a1cSdmick	pushl	%esi		/ yt
818*89518a1cSdmick	movl	12(%esp), %eax	/ xt, xt
819*89518a1cSdmick	movl	16(%esp), %edx	/ xt, xt
820*89518a1cSdmick	call	UDivRem
821*89518a1cSdmick	addl	$12, %esp
822*89518a1cSdmick	movl	36(%esp), %edi	/ x,
823*89518a1cSdmick	testl	%edi, %edi
824*89518a1cSdmick	movl	8(%esp), %eax	/ rem, rem
825*89518a1cSdmick	movl	12(%esp), %edx	/ rem, rem
826*89518a1cSdmick	js	.LL94
827*89518a1cSdmick	addl	$20, %esp
828*89518a1cSdmick	popl	%esi
829*89518a1cSdmick	popl	%edi
830*89518a1cSdmick	ret	$16
831*89518a1cSdmick	.align	16
832*89518a1cSdmick.LL92:
833*89518a1cSdmick	negl	%esi
834*89518a1cSdmick	adcl	$0, %edi
835*89518a1cSdmick	negl	%edi
836*89518a1cSdmick	testl	%edx, %edx	/ y
837*89518a1cSdmick	movl	%esi, (%esp)	/, xt
838*89518a1cSdmick	movl	%edi, 4(%esp)	/, xt
839*89518a1cSdmick	movl	%eax, %esi	/ y, yt
840*89518a1cSdmick	movl	%edx, %edi	/ y, yt
841*89518a1cSdmick	jns	.LL90
842*89518a1cSdmick	.align	16
843*89518a1cSdmick.LL93:
844*89518a1cSdmick	negl	%esi		/ yt
845*89518a1cSdmick	adcl	$0, %edi	/, yt
846*89518a1cSdmick	negl	%edi		/ yt
847*89518a1cSdmick	jmp	.LL90
848*89518a1cSdmick	.align	16
849*89518a1cSdmick.LL94:
850*89518a1cSdmick	negl	%eax		/ rem
851*89518a1cSdmick	adcl	$0, %edx	/, rem
852*89518a1cSdmick	addl	$20, %esp
853*89518a1cSdmick	popl	%esi
854*89518a1cSdmick	negl	%edx		/ rem
855*89518a1cSdmick	popl	%edi
856*89518a1cSdmick	ret	$16
857*89518a1cSdmick	SET_SIZE(__rem64)
858*89518a1cSdmick
859*89518a1cSdmick#endif	/* __lint */
860*89518a1cSdmick
861*89518a1cSdmick#if defined(__lint)
862*89518a1cSdmick
863*89518a1cSdmick/*
864*89518a1cSdmick * C support for 64-bit modulo and division.
865*89518a1cSdmick * GNU routines callable from C (though generated by the compiler).
866*89518a1cSdmick * Hand-customized compiler output - see comments for details.
867*89518a1cSdmick */
868*89518a1cSdmick/*ARGSUSED*/
869*89518a1cSdmickunsigned long long
870*89518a1cSdmick__udivdi3(unsigned long long a, unsigned long long b)
871*89518a1cSdmick{ return (0); }
872*89518a1cSdmick
873*89518a1cSdmick/*ARGSUSED*/
874*89518a1cSdmickunsigned long long
875*89518a1cSdmick__umoddi3(unsigned long long a, unsigned long long b)
876*89518a1cSdmick{ return (0); }
877*89518a1cSdmick
878*89518a1cSdmick/*ARGSUSED*/
879*89518a1cSdmicklong long
880*89518a1cSdmick__divdi3(long long a, long long b)
881*89518a1cSdmick{ return (0); }
882*89518a1cSdmick
883*89518a1cSdmick/*ARGSUSED*/
884*89518a1cSdmicklong long
885*89518a1cSdmick__moddi3(long long a, long long b)
886*89518a1cSdmick{ return (0); }
887*89518a1cSdmick
888*89518a1cSdmick/* ARGSUSED */
889*89518a1cSdmickint64_t __divrem64(int64_t a, int64_t b)
890*89518a1cSdmick{ return (0); }
891*89518a1cSdmick
892*89518a1cSdmick/* ARGSUSED */
893*89518a1cSdmickuint64_t __udivrem64(uint64_t a, uint64_t b)
894*89518a1cSdmick{ return (0); }
895*89518a1cSdmick
896*89518a1cSdmick#else	/* __lint */
897*89518a1cSdmick
898*89518a1cSdmick/*
899*89518a1cSdmick * int32_t/int64_t division/manipulation
900*89518a1cSdmick *
901*89518a1cSdmick * Hand-customized compiler output: the non-GCC entry points depart from
902*89518a1cSdmick * the SYS V ABI by requiring their arguments to be popped, and in the
903*89518a1cSdmick * [u]divrem64 cases returning the remainder in %ecx:%esi. Note the
904*89518a1cSdmick * compiler-generated use of %edx:%eax for the first argument of
905*89518a1cSdmick * internal entry points.
906*89518a1cSdmick *
907*89518a1cSdmick * Inlines for speed:
908*89518a1cSdmick * - counting the number of leading zeros in a word
909*89518a1cSdmick * - multiplying two 32-bit numbers giving a 64-bit result
910*89518a1cSdmick * - dividing a 64-bit number by a 32-bit number, giving both quotient
911*89518a1cSdmick *	and remainder
912*89518a1cSdmick * - subtracting two 64-bit results
913*89518a1cSdmick */
914*89518a1cSdmick/ #define	LO(X)		((uint32_t)(X) & 0xffffffff)
915*89518a1cSdmick/ #define	HI(X)		((uint32_t)((X) >> 32) & 0xffffffff)
916*89518a1cSdmick/ #define	HILO(H, L)	(((uint64_t)(H) << 32) + (L))
917*89518a1cSdmick/
918*89518a1cSdmick/ /* give index of highest bit */
919*89518a1cSdmick/ #define	HIBIT(a, r) \
920*89518a1cSdmick/     asm("bsrl %1,%0": "=r"((uint32_t)(r)) : "g" (a))
921*89518a1cSdmick/
922*89518a1cSdmick/ /* multiply two uint32_ts resulting in a uint64_t */
923*89518a1cSdmick/ #define	A_MUL32(a, b, lo, hi) \
924*89518a1cSdmick/     asm("mull %2" \
925*89518a1cSdmick/ 	: "=a"((uint32_t)(lo)), "=d"((uint32_t)(hi)) : "g" (b), "0"(a))
926*89518a1cSdmick/
927*89518a1cSdmick/ /* divide a uint64_t by a uint32_t */
928*89518a1cSdmick/ #define	A_DIV32(lo, hi, b, q, r) \
929*89518a1cSdmick/     asm("divl %2" \
930*89518a1cSdmick/ 	: "=a"((uint32_t)(q)), "=d"((uint32_t)(r)) \
931*89518a1cSdmick/ 	: "g" (b), "0"((uint32_t)(lo)), "1"((uint32_t)hi))
932*89518a1cSdmick/
933*89518a1cSdmick/ /* subtract two uint64_ts (with borrow) */
934*89518a1cSdmick/ #define	A_SUB2(bl, bh, al, ah) \
935*89518a1cSdmick/     asm("subl %4,%0\n\tsbbl %5,%1" \
936*89518a1cSdmick/ 	: "=&r"((uint32_t)(al)), "=r"((uint32_t)(ah)) \
937*89518a1cSdmick/ 	: "0"((uint32_t)(al)), "1"((uint32_t)(ah)), "g"((uint32_t)(bl)), \
938*89518a1cSdmick/ 	"g"((uint32_t)(bh)))
939*89518a1cSdmick
940*89518a1cSdmick/*
941*89518a1cSdmick * __udivdi3
942*89518a1cSdmick *
943*89518a1cSdmick * Perform division of two unsigned 64-bit quantities, returning the
944*89518a1cSdmick * quotient in %edx:%eax.
945*89518a1cSdmick */
946*89518a1cSdmick	ENTRY(__udivdi3)
947*89518a1cSdmick	movl	4(%esp), %eax	/ x, x
948*89518a1cSdmick	movl	8(%esp), %edx	/ x, x
949*89518a1cSdmick	pushl	16(%esp)	/ y
950*89518a1cSdmick	pushl	16(%esp)
951*89518a1cSdmick	call	UDiv
952*89518a1cSdmick	addl	$8, %esp
953*89518a1cSdmick	ret
954*89518a1cSdmick	SET_SIZE(__udivdi3)
955*89518a1cSdmick
956*89518a1cSdmick/*
957*89518a1cSdmick * __umoddi3
958*89518a1cSdmick *
959*89518a1cSdmick * Perform division of two unsigned 64-bit quantities, returning the
960*89518a1cSdmick * remainder in %edx:%eax.
961*89518a1cSdmick */
962*89518a1cSdmick	ENTRY(__umoddi3)
963*89518a1cSdmick	subl	$12, %esp
964*89518a1cSdmick	movl	%esp, %ecx	/, tmp65
965*89518a1cSdmick	movl	16(%esp), %eax	/ x, x
966*89518a1cSdmick	movl	20(%esp), %edx	/ x, x
967*89518a1cSdmick	pushl	%ecx		/ tmp65
968*89518a1cSdmick	pushl	32(%esp)	/ y
969*89518a1cSdmick	pushl	32(%esp)
970*89518a1cSdmick	call	UDivRem
971*89518a1cSdmick	movl	12(%esp), %eax	/ rem, rem
972*89518a1cSdmick	movl	16(%esp), %edx	/ rem, rem
973*89518a1cSdmick	addl	$24, %esp
974*89518a1cSdmick	ret
975*89518a1cSdmick	SET_SIZE(__umoddi3)
976*89518a1cSdmick
977*89518a1cSdmick/*
978*89518a1cSdmick * __divdi3
979*89518a1cSdmick *
980*89518a1cSdmick * Perform division of two signed 64-bit quantities, returning the
981*89518a1cSdmick * quotient in %edx:%eax.
982*89518a1cSdmick */
983*89518a1cSdmick/ int64_t
984*89518a1cSdmick/ __divdi3(int64_t x, int64_t y)
985*89518a1cSdmick/ {
986*89518a1cSdmick/ 	int		negative;
987*89518a1cSdmick/ 	uint64_t	xt, yt, r;
988*89518a1cSdmick/
989*89518a1cSdmick/ 	if (x < 0) {
990*89518a1cSdmick/ 		xt = -(uint64_t) x;
991*89518a1cSdmick/ 		negative = 1;
992*89518a1cSdmick/ 	} else {
993*89518a1cSdmick/ 		xt = x;
994*89518a1cSdmick/ 		negative = 0;
995*89518a1cSdmick/ 	}
996*89518a1cSdmick/ 	if (y < 0) {
997*89518a1cSdmick/ 		yt = -(uint64_t) y;
998*89518a1cSdmick/ 		negative ^= 1;
999*89518a1cSdmick/ 	} else {
1000*89518a1cSdmick/ 		yt = y;
1001*89518a1cSdmick/ 	}
1002*89518a1cSdmick/ 	r = UDiv(xt, yt);
1003*89518a1cSdmick/ 	return (negative ? (int64_t) - r : r);
1004*89518a1cSdmick/ }
1005*89518a1cSdmick	ENTRY(__divdi3)
1006*89518a1cSdmick	pushl	%ebp
1007*89518a1cSdmick	pushl	%edi
1008*89518a1cSdmick	pushl	%esi
1009*89518a1cSdmick	subl	$8, %esp
1010*89518a1cSdmick	movl	28(%esp), %edx	/ x, x
1011*89518a1cSdmick	testl	%edx, %edx	/ x
1012*89518a1cSdmick	movl	24(%esp), %eax	/ x, x
1013*89518a1cSdmick	movl	32(%esp), %esi	/ y, y
1014*89518a1cSdmick	movl	36(%esp), %edi	/ y, y
1015*89518a1cSdmick	js	.LL55
1016*89518a1cSdmick	xorl	%ebp, %ebp	/ negative
1017*89518a1cSdmick	testl	%edi, %edi	/ y
1018*89518a1cSdmick	movl	%eax, (%esp)	/ x, xt
1019*89518a1cSdmick	movl	%edx, 4(%esp)	/ x, xt
1020*89518a1cSdmick	movl	%esi, %eax	/ y, yt
1021*89518a1cSdmick	movl	%edi, %edx	/ y, yt
1022*89518a1cSdmick	js	.LL56
1023*89518a1cSdmick.LL53:
1024*89518a1cSdmick	pushl	%edx		/ yt
1025*89518a1cSdmick	pushl	%eax		/ yt
1026*89518a1cSdmick	movl	8(%esp), %eax	/ xt, xt
1027*89518a1cSdmick	movl	12(%esp), %edx	/ xt, xt
1028*89518a1cSdmick	call	UDiv
1029*89518a1cSdmick	popl	%ecx
1030*89518a1cSdmick	testl	%ebp, %ebp	/ negative
1031*89518a1cSdmick	popl	%esi
1032*89518a1cSdmick	je	.LL54
1033*89518a1cSdmick	negl	%eax		/ r
1034*89518a1cSdmick	adcl	$0, %edx	/, r
1035*89518a1cSdmick	negl	%edx		/ r
1036*89518a1cSdmick.LL54:
1037*89518a1cSdmick	addl	$8, %esp
1038*89518a1cSdmick	popl	%esi
1039*89518a1cSdmick	popl	%edi
1040*89518a1cSdmick	popl	%ebp
1041*89518a1cSdmick	ret
1042*89518a1cSdmick	.align	16
1043*89518a1cSdmick.LL55:
1044*89518a1cSdmick	negl	%eax		/ x
1045*89518a1cSdmick	adcl	$0, %edx	/, x
1046*89518a1cSdmick	negl	%edx		/ x
1047*89518a1cSdmick	testl	%edi, %edi	/ y
1048*89518a1cSdmick	movl	%eax, (%esp)	/ x, xt
1049*89518a1cSdmick	movl	%edx, 4(%esp)	/ x, xt
1050*89518a1cSdmick	movl	$1, %ebp	/, negative
1051*89518a1cSdmick	movl	%esi, %eax	/ y, yt
1052*89518a1cSdmick	movl	%edi, %edx	/ y, yt
1053*89518a1cSdmick	jns	.LL53
1054*89518a1cSdmick	.align	16
1055*89518a1cSdmick.LL56:
1056*89518a1cSdmick	negl	%eax		/ yt
1057*89518a1cSdmick	adcl	$0, %edx	/, yt
1058*89518a1cSdmick	negl	%edx		/ yt
1059*89518a1cSdmick	xorl	$1, %ebp	/, negative
1060*89518a1cSdmick	jmp	.LL53
1061*89518a1cSdmick	SET_SIZE(__divdi3)
1062*89518a1cSdmick
1063*89518a1cSdmick/*
1064*89518a1cSdmick * __moddi3
1065*89518a1cSdmick *
1066*89518a1cSdmick * Perform division of two signed 64-bit quantities, returning the
1067*89518a1cSdmick * quotient in %edx:%eax.
1068*89518a1cSdmick */
1069*89518a1cSdmick/ int64_t
1070*89518a1cSdmick/ __moddi3(int64_t x, int64_t y)
1071*89518a1cSdmick/ {
1072*89518a1cSdmick/ 	uint64_t	xt, yt, rem;
1073*89518a1cSdmick/
1074*89518a1cSdmick/ 	if (x < 0) {
1075*89518a1cSdmick/ 		xt = -(uint64_t) x;
1076*89518a1cSdmick/ 	} else {
1077*89518a1cSdmick/ 		xt = x;
1078*89518a1cSdmick/ 	}
1079*89518a1cSdmick/ 	if (y < 0) {
1080*89518a1cSdmick/ 		yt = -(uint64_t) y;
1081*89518a1cSdmick/ 	} else {
1082*89518a1cSdmick/ 		yt = y;
1083*89518a1cSdmick/ 	}
1084*89518a1cSdmick/ 	(void) UDivRem(xt, yt, &rem);
1085*89518a1cSdmick/ 	return (x < 0 ? (int64_t) - rem : rem);
1086*89518a1cSdmick/ }
1087*89518a1cSdmick	ENTRY(__moddi3)
1088*89518a1cSdmick	pushl	%edi
1089*89518a1cSdmick	pushl	%esi
1090*89518a1cSdmick	subl	$20, %esp
1091*89518a1cSdmick	movl	36(%esp), %ecx	/ x,
1092*89518a1cSdmick	movl	32(%esp), %esi	/ x,
1093*89518a1cSdmick	movl	36(%esp), %edi	/ x,
1094*89518a1cSdmick	testl	%ecx, %ecx
1095*89518a1cSdmick	movl	40(%esp), %eax	/ y, y
1096*89518a1cSdmick	movl	44(%esp), %edx	/ y, y
1097*89518a1cSdmick	movl	%esi, (%esp)	/, xt
1098*89518a1cSdmick	movl	%edi, 4(%esp)	/, xt
1099*89518a1cSdmick	js	.LL63
1100*89518a1cSdmick	testl	%edx, %edx	/ y
1101*89518a1cSdmick	movl	%eax, %esi	/ y, yt
1102*89518a1cSdmick	movl	%edx, %edi	/ y, yt
1103*89518a1cSdmick	js	.LL64
1104*89518a1cSdmick.LL61:
1105*89518a1cSdmick	leal	8(%esp), %eax	/, tmp66
1106*89518a1cSdmick	pushl	%eax		/ tmp66
1107*89518a1cSdmick	pushl	%edi		/ yt
1108*89518a1cSdmick	pushl	%esi		/ yt
1109*89518a1cSdmick	movl	12(%esp), %eax	/ xt, xt
1110*89518a1cSdmick	movl	16(%esp), %edx	/ xt, xt
1111*89518a1cSdmick	call	UDivRem
1112*89518a1cSdmick	addl	$12, %esp
1113*89518a1cSdmick	movl	36(%esp), %edi	/ x,
1114*89518a1cSdmick	testl	%edi, %edi
1115*89518a1cSdmick	movl	8(%esp), %eax	/ rem, rem
1116*89518a1cSdmick	movl	12(%esp), %edx	/ rem, rem
1117*89518a1cSdmick	js	.LL65
1118*89518a1cSdmick	addl	$20, %esp
1119*89518a1cSdmick	popl	%esi
1120*89518a1cSdmick	popl	%edi
1121*89518a1cSdmick	ret
1122*89518a1cSdmick	.align	16
1123*89518a1cSdmick.LL63:
1124*89518a1cSdmick	negl	%esi
1125*89518a1cSdmick	adcl	$0, %edi
1126*89518a1cSdmick	negl	%edi
1127*89518a1cSdmick	testl	%edx, %edx	/ y
1128*89518a1cSdmick	movl	%esi, (%esp)	/, xt
1129*89518a1cSdmick	movl	%edi, 4(%esp)	/, xt
1130*89518a1cSdmick	movl	%eax, %esi	/ y, yt
1131*89518a1cSdmick	movl	%edx, %edi	/ y, yt
1132*89518a1cSdmick	jns	.LL61
1133*89518a1cSdmick	.align	16
1134*89518a1cSdmick.LL64:
1135*89518a1cSdmick	negl	%esi		/ yt
1136*89518a1cSdmick	adcl	$0, %edi	/, yt
1137*89518a1cSdmick	negl	%edi		/ yt
1138*89518a1cSdmick	jmp	.LL61
1139*89518a1cSdmick	.align	16
1140*89518a1cSdmick.LL65:
1141*89518a1cSdmick	negl	%eax		/ rem
1142*89518a1cSdmick	adcl	$0, %edx	/, rem
1143*89518a1cSdmick	addl	$20, %esp
1144*89518a1cSdmick	popl	%esi
1145*89518a1cSdmick	negl	%edx		/ rem
1146*89518a1cSdmick	popl	%edi
1147*89518a1cSdmick	ret
1148*89518a1cSdmick	SET_SIZE(__moddi3)
1149*89518a1cSdmick
1150*89518a1cSdmick/*
1151*89518a1cSdmick * __udivrem64
1152*89518a1cSdmick *
1153*89518a1cSdmick * Perform division of two unsigned 64-bit quantities, returning the
1154*89518a1cSdmick * quotient in %edx:%eax, and the remainder in %ecx:%esi.  __udivrem64
1155*89518a1cSdmick * pops the arguments on return.
1156*89518a1cSdmick */
1157*89518a1cSdmick	ENTRY(__udivrem64)
1158*89518a1cSdmick	subl	$12, %esp
1159*89518a1cSdmick	movl	%esp, %ecx	/, tmp64
1160*89518a1cSdmick	movl	16(%esp), %eax	/ x, x
1161*89518a1cSdmick	movl	20(%esp), %edx	/ x, x
1162*89518a1cSdmick	pushl	%ecx		/ tmp64
1163*89518a1cSdmick	pushl	32(%esp)	/ y
1164*89518a1cSdmick	pushl	32(%esp)
1165*89518a1cSdmick	call	UDivRem
1166*89518a1cSdmick	movl	16(%esp), %ecx	/ rem, tmp63
1167*89518a1cSdmick	movl	12(%esp), %esi	/ rem
1168*89518a1cSdmick	addl	$24, %esp
1169*89518a1cSdmick	ret	$16
1170*89518a1cSdmick	SET_SIZE(__udivrem64)
1171*89518a1cSdmick
1172*89518a1cSdmick/*
1173*89518a1cSdmick * Signed division with remainder.
1174*89518a1cSdmick */
1175*89518a1cSdmick/ int64_t
1176*89518a1cSdmick/ SDivRem(int64_t x, int64_t y, int64_t * pmod)
1177*89518a1cSdmick/ {
1178*89518a1cSdmick/ 	int		negative;
1179*89518a1cSdmick/ 	uint64_t	xt, yt, r, rem;
1180*89518a1cSdmick/
1181*89518a1cSdmick/ 	if (x < 0) {
1182*89518a1cSdmick/ 		xt = -(uint64_t) x;
1183*89518a1cSdmick/ 		negative = 1;
1184*89518a1cSdmick/ 	} else {
1185*89518a1cSdmick/ 		xt = x;
1186*89518a1cSdmick/ 		negative = 0;
1187*89518a1cSdmick/ 	}
1188*89518a1cSdmick/ 	if (y < 0) {
1189*89518a1cSdmick/ 		yt = -(uint64_t) y;
1190*89518a1cSdmick/ 		negative ^= 1;
1191*89518a1cSdmick/ 	} else {
1192*89518a1cSdmick/ 		yt = y;
1193*89518a1cSdmick/ 	}
1194*89518a1cSdmick/ 	r = UDivRem(xt, yt, &rem);
1195*89518a1cSdmick/ 	*pmod = (x < 0 ? (int64_t) - rem : rem);
1196*89518a1cSdmick/ 	return (negative ? (int64_t) - r : r);
1197*89518a1cSdmick/ }
1198*89518a1cSdmick	ENTRY(SDivRem)
1199*89518a1cSdmick	pushl	%ebp
1200*89518a1cSdmick	pushl	%edi
1201*89518a1cSdmick	pushl	%esi
1202*89518a1cSdmick	subl	$24, %esp
1203*89518a1cSdmick	testl	%edx, %edx	/ x
1204*89518a1cSdmick	movl	%edx, %edi	/ x, x
1205*89518a1cSdmick	js	.LL73
1206*89518a1cSdmick	movl	44(%esp), %esi	/ y,
1207*89518a1cSdmick	xorl	%ebp, %ebp	/ negative
1208*89518a1cSdmick	testl	%esi, %esi
1209*89518a1cSdmick	movl	%edx, 12(%esp)	/ x, xt
1210*89518a1cSdmick	movl	%eax, 8(%esp)	/ x, xt
1211*89518a1cSdmick	movl	40(%esp), %edx	/ y, yt
1212*89518a1cSdmick	movl	44(%esp), %ecx	/ y, yt
1213*89518a1cSdmick	js	.LL74
1214*89518a1cSdmick.LL70:
1215*89518a1cSdmick	leal	16(%esp), %eax	/, tmp70
1216*89518a1cSdmick	pushl	%eax		/ tmp70
1217*89518a1cSdmick	pushl	%ecx		/ yt
1218*89518a1cSdmick	pushl	%edx		/ yt
1219*89518a1cSdmick	movl	20(%esp), %eax	/ xt, xt
1220*89518a1cSdmick	movl	24(%esp), %edx	/ xt, xt
1221*89518a1cSdmick	call	UDivRem
1222*89518a1cSdmick	movl	%edx, 16(%esp)	/, r
1223*89518a1cSdmick	movl	%eax, 12(%esp)	/, r
1224*89518a1cSdmick	addl	$12, %esp
1225*89518a1cSdmick	testl	%edi, %edi	/ x
1226*89518a1cSdmick	movl	16(%esp), %edx	/ rem, rem
1227*89518a1cSdmick	movl	20(%esp), %ecx	/ rem, rem
1228*89518a1cSdmick	js	.LL75
1229*89518a1cSdmick.LL71:
1230*89518a1cSdmick	movl	48(%esp), %edi	/ pmod, pmod
1231*89518a1cSdmick	testl	%ebp, %ebp	/ negative
1232*89518a1cSdmick	movl	%edx, (%edi)	/ rem,* pmod
1233*89518a1cSdmick	movl	%ecx, 4(%edi)	/ rem,
1234*89518a1cSdmick	movl	(%esp), %eax	/ r, r
1235*89518a1cSdmick	movl	4(%esp), %edx	/ r, r
1236*89518a1cSdmick	je	.LL72
1237*89518a1cSdmick	negl	%eax		/ r
1238*89518a1cSdmick	adcl	$0, %edx	/, r
1239*89518a1cSdmick	negl	%edx		/ r
1240*89518a1cSdmick.LL72:
1241*89518a1cSdmick	addl	$24, %esp
1242*89518a1cSdmick	popl	%esi
1243*89518a1cSdmick	popl	%edi
1244*89518a1cSdmick	popl	%ebp
1245*89518a1cSdmick	ret
1246*89518a1cSdmick	.align	16
1247*89518a1cSdmick.LL73:
1248*89518a1cSdmick	negl	%eax
1249*89518a1cSdmick	adcl	$0, %edx
1250*89518a1cSdmick	movl	44(%esp), %esi	/ y,
1251*89518a1cSdmick	negl	%edx
1252*89518a1cSdmick	testl	%esi, %esi
1253*89518a1cSdmick	movl	%edx, 12(%esp)	/, xt
1254*89518a1cSdmick	movl	%eax, 8(%esp)	/, xt
1255*89518a1cSdmick	movl	$1, %ebp	/, negative
1256*89518a1cSdmick	movl	40(%esp), %edx	/ y, yt
1257*89518a1cSdmick	movl	44(%esp), %ecx	/ y, yt
1258*89518a1cSdmick	jns	.LL70
1259*89518a1cSdmick	.align	16
1260*89518a1cSdmick.LL74:
1261*89518a1cSdmick	negl	%edx		/ yt
1262*89518a1cSdmick	adcl	$0, %ecx	/, yt
1263*89518a1cSdmick	negl	%ecx		/ yt
1264*89518a1cSdmick	xorl	$1, %ebp	/, negative
1265*89518a1cSdmick	jmp	.LL70
1266*89518a1cSdmick	.align	16
1267*89518a1cSdmick.LL75:
1268*89518a1cSdmick	negl	%edx		/ rem
1269*89518a1cSdmick	adcl	$0, %ecx	/, rem
1270*89518a1cSdmick	negl	%ecx		/ rem
1271*89518a1cSdmick	jmp	.LL71
1272*89518a1cSdmick	SET_SIZE(SDivRem)
1273*89518a1cSdmick
1274*89518a1cSdmick/*
1275*89518a1cSdmick * __divrem64
1276*89518a1cSdmick *
1277*89518a1cSdmick * Perform division of two signed 64-bit quantities, returning the
1278*89518a1cSdmick * quotient in %edx:%eax, and the remainder in %ecx:%esi.  __divrem64
1279*89518a1cSdmick * pops the arguments on return.
1280*89518a1cSdmick */
1281*89518a1cSdmick	ENTRY(__divrem64)
1282*89518a1cSdmick	subl	$20, %esp
1283*89518a1cSdmick	movl	%esp, %ecx	/, tmp64
1284*89518a1cSdmick	movl	24(%esp), %eax	/ x, x
1285*89518a1cSdmick	movl	28(%esp), %edx	/ x, x
1286*89518a1cSdmick	pushl	%ecx		/ tmp64
1287*89518a1cSdmick	pushl	40(%esp)	/ y
1288*89518a1cSdmick	pushl	40(%esp)
1289*89518a1cSdmick	call	SDivRem
1290*89518a1cSdmick	movl	16(%esp), %ecx
1291*89518a1cSdmick	movl	12(%esp),%esi	/ rem
1292*89518a1cSdmick	addl	$32, %esp
1293*89518a1cSdmick	ret	$16
1294*89518a1cSdmick	SET_SIZE(__divrem64)
1295*89518a1cSdmick
1296*89518a1cSdmick
1297*89518a1cSdmick#endif /* __lint */
1298*89518a1cSdmick
1299*89518a1cSdmick#endif /* defined(__i386) && !defined(__amd64) */
1300