xref: /titanic_41/usr/src/lib/libc/i386/gen/_div64.s (revision 6185db853e024a486ff8837e6784dd290d866112)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27	.ident	"%Z%%M%	%I%	%E% SMI"
28
29	.file	"%M%"
30
31#include "SYS.h"
32
33/*
34 * C support for 64-bit modulo and division.
35 * Hand-customized compiler output - see comments for details.
36 */
37
38/*
39 * int32_t/int64_t division/manipulation
40 *
41 * Hand-customized compiler output: the non-GCC entry points depart from
42 * the SYS V ABI by requiring their arguments to be popped, and in the
43 * [u]divrem64 cases returning the remainder in %ecx:%esi. Note the
44 * compiler-generated use of %edx:%eax for the first argument of
45 * internal entry points.
46 *
47 * Inlines for speed:
48 * - counting the number of leading zeros in a word
49 * - multiplying two 32-bit numbers giving a 64-bit result
50 * - dividing a 64-bit number by a 32-bit number, giving both quotient
51 *	and remainder
52 * - subtracting two 64-bit results
53 */
54/ #define	LO(X)		((uint32_t)(X) & 0xffffffff)
55/ #define	HI(X)		((uint32_t)((X) >> 32) & 0xffffffff)
56/ #define	HILO(H, L)	(((uint64_t)(H) << 32) + (L))
57/
58/ /* give index of highest bit */
59/ #define	HIBIT(a, r) \
60/     asm("bsrl %1,%0": "=r"((uint32_t)(r)) : "g" (a))
61/
62/ /* multiply two uint32_ts resulting in a uint64_t */
63/ #define	A_MUL32(a, b, lo, hi) \
64/     asm("mull %2" \
65/ 	: "=a"((uint32_t)(lo)), "=d"((uint32_t)(hi)) : "g" (b), "0"(a))
66/
67/ /* divide a uint64_t by a uint32_t */
68/ #define	A_DIV32(lo, hi, b, q, r) \
69/     asm("divl %2" \
70/ 	: "=a"((uint32_t)(q)), "=d"((uint32_t)(r)) \
71/ 	: "g" (b), "0"((uint32_t)(lo)), "1"((uint32_t)hi))
72/
73/ /* subtract two uint64_ts (with borrow) */
74/ #define	A_SUB2(bl, bh, al, ah) \
75/     asm("subl %4,%0\n\tsbbl %5,%1" \
76/ 	: "=&r"((uint32_t)(al)), "=r"((uint32_t)(ah)) \
77/ 	: "0"((uint32_t)(al)), "1"((uint32_t)(ah)), "g"((uint32_t)(bl)), \
78/ 	"g"((uint32_t)(bh)))
79/
80/ /*
81/  * Unsigned division with remainder.
82/  * Divide two uint64_ts, and calculate remainder.
83/  */
84/ uint64_t
85/ UDivRem(uint64_t x, uint64_t y, uint64_t * pmod)
86/ {
87/ 	/* simple cases: y is a single uint32_t */
88/ 	if (HI(y) == 0) {
89/ 		uint32_t	div_hi, div_rem;
90/ 		uint32_t 	q0, q1;
91/
92/ 		/* calculate q1 */
93/ 		if (HI(x) < LO(y)) {
94/ 			/* result is a single uint32_t, use one division */
95/ 			q1 = 0;
96/ 			div_hi = HI(x);
97/ 		} else {
98/ 			/* result is a double uint32_t, use two divisions */
99/ 			A_DIV32(HI(x), 0, LO(y), q1, div_hi);
100/ 		}
101/
102/ 		/* calculate q0 and remainder */
103/ 		A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
104/
105/ 		/* return remainder */
106/ 		*pmod = div_rem;
107/
108/ 		/* return result */
109/ 		return (HILO(q1, q0));
110/
111/ 	} else if (HI(x) < HI(y)) {
112/ 		/* HI(x) < HI(y) => x < y => result is 0 */
113/
114/ 		/* return remainder */
115/ 		*pmod = x;
116/
117/ 		/* return result */
118/ 		return (0);
119/
120/ 	} else {
121/ 		/*
122/ 		 * uint64_t by uint64_t division, resulting in a one-uint32_t
123/ 		 * result
124/ 		 */
125/ 		uint32_t		y0, y1;
126/ 		uint32_t		x1, x0;
127/ 		uint32_t		q0;
128/ 		uint32_t		normshift;
129/
130/ 		/* normalize by shifting x and y so MSB(y) == 1 */
131/ 		HIBIT(HI(y), normshift);	/* index of highest 1 bit */
132/ 		normshift = 31 - normshift;
133/
134/ 		if (normshift == 0) {
135/ 			/* no shifting needed, and x < 2*y so q <= 1 */
136/ 			y1 = HI(y);
137/ 			y0 = LO(y);
138/ 			x1 = HI(x);
139/ 			x0 = LO(x);
140/
141/ 			/* if x >= y then q = 1 (note x1 >= y1) */
142/ 			if (x1 > y1 || x0 >= y0) {
143/ 				q0 = 1;
144/ 				/* subtract y from x to get remainder */
145/ 				A_SUB2(y0, y1, x0, x1);
146/ 			} else {
147/ 				q0 = 0;
148/ 			}
149/
150/ 			/* return remainder */
151/ 			*pmod = HILO(x1, x0);
152/
153/ 			/* return result */
154/ 			return (q0);
155/
156/ 		} else {
157/ 			/*
158/ 			 * the last case: result is one uint32_t, but we need to
159/ 			 * normalize
160/ 			 */
161/ 			uint64_t	dt;
162/ 			uint32_t		t0, t1, x2;
163/
164/ 			/* normalize y */
165/ 			dt = (y << normshift);
166/ 			y1 = HI(dt);
167/ 			y0 = LO(dt);
168/
169/ 			/* normalize x (we need 3 uint32_ts!!!) */
170/ 			x2 = (HI(x) >> (32 - normshift));
171/ 			dt = (x << normshift);
172/ 			x1 = HI(dt);
173/ 			x0 = LO(dt);
174/
175/ 			/* estimate q0, and reduce x to a two uint32_t value */
176/ 			A_DIV32(x1, x2, y1, q0, x1);
177/
178/ 			/* adjust q0 down if too high */
179/ 			/*
180/ 			 * because of the limited range of x2 we can only be
181/ 			 * one off
182/ 			 */
183/ 			A_MUL32(y0, q0, t0, t1);
184/ 			if (t1 > x1 || (t1 == x1 && t0 > x0)) {
185/ 				q0--;
186/ 				A_SUB2(y0, y1, t0, t1);
187/ 			}
188/ 			/* return remainder */
189/ 			/* subtract product from x to get remainder */
190/ 			A_SUB2(t0, t1, x0, x1);
191/ 			*pmod = (HILO(x1, x0) >> normshift);
192/
193/ 			/* return result */
194/ 			return (q0);
195/ 		}
196/ 	}
197/ }
198	ENTRY(UDivRem)
199	pushl	%ebp
200	pushl	%edi
201	pushl	%esi
202	subl	$48, %esp
203	movl	68(%esp), %edi	/ y,
204	testl	%edi, %edi	/ tmp63
205	movl	%eax, 40(%esp)	/ x, x
206	movl	%edx, 44(%esp)	/ x, x
207	movl	%edi, %esi	/, tmp62
208	movl	%edi, %ecx	/ tmp62, tmp63
209	jne	.LL2
210	movl	%edx, %eax	/, tmp68
211	cmpl	64(%esp), %eax	/ y, tmp68
212	jae	.LL21
213.LL4:
214	movl	72(%esp), %ebp	/ pmod,
215	xorl	%esi, %esi	/ <result>
216	movl	40(%esp), %eax	/ x, q0
217	movl	%ecx, %edi	/ <result>, <result>
218	divl	64(%esp)	/ y
219	movl	%edx, (%ebp)	/ div_rem,
220	xorl	%edx, %edx	/ q0
221	addl	%eax, %esi	/ q0, <result>
222	movl	$0, 4(%ebp)
223	adcl	%edx, %edi	/ q0, <result>
224	addl	$48, %esp
225	movl	%esi, %eax	/ <result>, <result>
226	popl	%esi
227	movl	%edi, %edx	/ <result>, <result>
228	popl	%edi
229	popl	%ebp
230	ret
231	.align	16
232.LL2:
233	movl	44(%esp), %eax	/ x,
234	xorl	%edx, %edx
235	cmpl	%esi, %eax	/ tmp62, tmp5
236	movl	%eax, 32(%esp)	/ tmp5,
237	movl	%edx, 36(%esp)
238	jae	.LL6
239	movl	72(%esp), %esi	/ pmod,
240	movl	40(%esp), %ebp	/ x,
241	movl	44(%esp), %ecx	/ x,
242	movl	%ebp, (%esi)
243	movl	%ecx, 4(%esi)
244	xorl	%edi, %edi	/ <result>
245	xorl	%esi, %esi	/ <result>
246.LL22:
247	addl	$48, %esp
248	movl	%esi, %eax	/ <result>, <result>
249	popl	%esi
250	movl	%edi, %edx	/ <result>, <result>
251	popl	%edi
252	popl	%ebp
253	ret
254	.align	16
255.LL21:
256	movl	%edi, %edx	/ tmp63, div_hi
257	divl	64(%esp)	/ y
258	movl	%eax, %ecx	/, q1
259	jmp	.LL4
260	.align	16
261.LL6:
262	movl	$31, %edi	/, tmp87
263	bsrl	%esi,%edx	/ tmp62, normshift
264	subl	%edx, %edi	/ normshift, tmp87
265	movl	%edi, 28(%esp)	/ tmp87,
266	jne	.LL8
267	movl	32(%esp), %edx	/, x1
268	cmpl	%ecx, %edx	/ y1, x1
269	movl	64(%esp), %edi	/ y, y0
270	movl	40(%esp), %esi	/ x, x0
271	ja	.LL10
272	xorl	%ebp, %ebp	/ q0
273	cmpl	%edi, %esi	/ y0, x0
274	jb	.LL11
275.LL10:
276	movl	$1, %ebp	/, q0
277	subl	%edi,%esi	/ y0, x0
278	sbbl	%ecx,%edx	/ tmp63, x1
279.LL11:
280	movl	%edx, %ecx	/ x1, x1
281	xorl	%edx, %edx	/ x1
282	xorl	%edi, %edi	/ x0
283	addl	%esi, %edx	/ x0, x1
284	adcl	%edi, %ecx	/ x0, x1
285	movl	72(%esp), %esi	/ pmod,
286	movl	%edx, (%esi)	/ x1,
287	movl	%ecx, 4(%esi)	/ x1,
288	xorl	%edi, %edi	/ <result>
289	movl	%ebp, %esi	/ q0, <result>
290	jmp	.LL22
291	.align	16
292.LL8:
293	movb	28(%esp), %cl
294	movl	64(%esp), %esi	/ y, dt
295	movl	68(%esp), %edi	/ y, dt
296	shldl	%esi, %edi	/, dt, dt
297	sall	%cl, %esi	/, dt
298	andl	$32, %ecx
299	jne	.LL23
300.LL17:
301	movl	$32, %ecx	/, tmp102
302	subl	28(%esp), %ecx	/, tmp102
303	movl	%esi, %ebp	/ dt, y0
304	movl	32(%esp), %esi
305	shrl	%cl, %esi	/ tmp102,
306	movl	%edi, 24(%esp)	/ tmp99,
307	movb	28(%esp), %cl
308	movl	%esi, 12(%esp)	/, x2
309	movl	44(%esp), %edi	/ x, dt
310	movl	40(%esp), %esi	/ x, dt
311	shldl	%esi, %edi	/, dt, dt
312	sall	%cl, %esi	/, dt
313	andl	$32, %ecx
314	je	.LL18
315	movl	%esi, %edi	/ dt, dt
316	xorl	%esi, %esi	/ dt
317.LL18:
318	movl	%edi, %ecx	/ dt,
319	movl	%edi, %eax	/ tmp2,
320	movl	%ecx, (%esp)
321	movl	12(%esp), %edx	/ x2,
322	divl	24(%esp)
323	movl	%edx, %ecx	/, x1
324	xorl	%edi, %edi
325	movl	%eax, 20(%esp)
326	movl	%ebp, %eax	/ y0, t0
327	mull	20(%esp)
328	cmpl	%ecx, %edx	/ x1, t1
329	movl	%edi, 4(%esp)
330	ja	.LL14
331	je	.LL24
332.LL15:
333	movl	%ecx, %edi	/ x1,
334	subl	%eax,%esi	/ t0, x0
335	sbbl	%edx,%edi	/ t1,
336	movl	%edi, %eax	/, x1
337	movl	%eax, %edx	/ x1, x1
338	xorl	%eax, %eax	/ x1
339	xorl	%ebp, %ebp	/ x0
340	addl	%esi, %eax	/ x0, x1
341	adcl	%ebp, %edx	/ x0, x1
342	movb	28(%esp), %cl
343	shrdl	%edx, %eax	/, x1, x1
344	shrl	%cl, %edx	/, x1
345	andl	$32, %ecx
346	je	.LL16
347	movl	%edx, %eax	/ x1, x1
348	xorl	%edx, %edx	/ x1
349.LL16:
350	movl	72(%esp), %ecx	/ pmod,
351	movl	20(%esp), %esi	/, <result>
352	xorl	%edi, %edi	/ <result>
353	movl	%eax, (%ecx)	/ x1,
354	movl	%edx, 4(%ecx)	/ x1,
355	jmp	.LL22
356	.align	16
357.LL24:
358	cmpl	%esi, %eax	/ x0, t0
359	jbe	.LL15
360.LL14:
361	decl	20(%esp)
362	subl	%ebp,%eax	/ y0, t0
363	sbbl	24(%esp),%edx	/, t1
364	jmp	.LL15
365.LL23:
366	movl	%esi, %edi	/ dt, dt
367	xorl	%esi, %esi	/ dt
368	jmp	.LL17
369	SET_SIZE(UDivRem)
370
371/*
372 * Unsigned division without remainder.
373 */
374/ uint64_t
375/ UDiv(uint64_t x, uint64_t y)
376/ {
377/ 	if (HI(y) == 0) {
378/ 		/* simple cases: y is a single uint32_t */
379/ 		uint32_t	div_hi, div_rem;
380/ 		uint32_t	q0, q1;
381/
382/ 		/* calculate q1 */
383/ 		if (HI(x) < LO(y)) {
384/ 			/* result is a single uint32_t, use one division */
385/ 			q1 = 0;
386/ 			div_hi = HI(x);
387/ 		} else {
388/ 			/* result is a double uint32_t, use two divisions */
389/ 			A_DIV32(HI(x), 0, LO(y), q1, div_hi);
390/ 		}
391/
392/ 		/* calculate q0 and remainder */
393/ 		A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
394/
395/ 		/* return result */
396/ 		return (HILO(q1, q0));
397/
398/ 	} else if (HI(x) < HI(y)) {
399/ 		/* HI(x) < HI(y) => x < y => result is 0 */
400/
401/ 		/* return result */
402/ 		return (0);
403/
404/ 	} else {
405/ 		/*
406/ 		 * uint64_t by uint64_t division, resulting in a one-uint32_t
407/ 		 * result
408/ 		 */
409/ 		uint32_t		y0, y1;
410/ 		uint32_t		x1, x0;
411/ 		uint32_t		q0;
412/ 		unsigned		normshift;
413/
414/ 		/* normalize by shifting x and y so MSB(y) == 1 */
415/ 		HIBIT(HI(y), normshift);	/* index of highest 1 bit */
416/ 		normshift = 31 - normshift;
417/
418/ 		if (normshift == 0) {
419/ 			/* no shifting needed, and x < 2*y so q <= 1 */
420/ 			y1 = HI(y);
421/ 			y0 = LO(y);
422/ 			x1 = HI(x);
423/ 			x0 = LO(x);
424/
425/ 			/* if x >= y then q = 1 (note x1 >= y1) */
426/ 			if (x1 > y1 || x0 >= y0) {
427/ 				q0 = 1;
428/ 				/* subtract y from x to get remainder */
429/ 				/* A_SUB2(y0, y1, x0, x1); */
430/ 			} else {
431/ 				q0 = 0;
432/ 			}
433/
434/ 			/* return result */
435/ 			return (q0);
436/
437/ 		} else {
438/ 			/*
439/ 			 * the last case: result is one uint32_t, but we need to
440/ 			 * normalize
441/ 			 */
442/ 			uint64_t	dt;
443/ 			uint32_t		t0, t1, x2;
444/
445/ 			/* normalize y */
446/ 			dt = (y << normshift);
447/ 			y1 = HI(dt);
448/ 			y0 = LO(dt);
449/
450/ 			/* normalize x (we need 3 uint32_ts!!!) */
451/ 			x2 = (HI(x) >> (32 - normshift));
452/ 			dt = (x << normshift);
453/ 			x1 = HI(dt);
454/ 			x0 = LO(dt);
455/
456/ 			/* estimate q0, and reduce x to a two uint32_t value */
457/ 			A_DIV32(x1, x2, y1, q0, x1);
458/
459/ 			/* adjust q0 down if too high */
460/ 			/*
461/ 			 * because of the limited range of x2 we can only be
462/ 			 * one off
463/ 			 */
464/ 			A_MUL32(y0, q0, t0, t1);
465/ 			if (t1 > x1 || (t1 == x1 && t0 > x0)) {
466/ 				q0--;
467/ 			}
468/ 			/* return result */
469/ 			return (q0);
470/ 		}
471/ 	}
472/ }
473	ENTRY(UDiv)
474	pushl	%ebp
475	pushl	%edi
476	pushl	%esi
477	subl	$40, %esp
478	movl	%edx, 36(%esp)	/ x, x
479	movl	60(%esp), %edx	/ y,
480	testl	%edx, %edx	/ tmp62
481	movl	%eax, 32(%esp)	/ x, x
482	movl	%edx, %ecx	/ tmp61, tmp62
483	movl	%edx, %eax	/, tmp61
484	jne	.LL26
485	movl	36(%esp), %esi	/ x,
486	cmpl	56(%esp), %esi	/ y, tmp67
487	movl	%esi, %eax	/, tmp67
488	movl	%esi, %edx	/ tmp67, div_hi
489	jb	.LL28
490	movl	%ecx, %edx	/ tmp62, div_hi
491	divl	56(%esp)	/ y
492	movl	%eax, %ecx	/, q1
493.LL28:
494	xorl	%esi, %esi	/ <result>
495	movl	%ecx, %edi	/ <result>, <result>
496	movl	32(%esp), %eax	/ x, q0
497	xorl	%ecx, %ecx	/ q0
498	divl	56(%esp)	/ y
499	addl	%eax, %esi	/ q0, <result>
500	adcl	%ecx, %edi	/ q0, <result>
501.LL25:
502	addl	$40, %esp
503	movl	%esi, %eax	/ <result>, <result>
504	popl	%esi
505	movl	%edi, %edx	/ <result>, <result>
506	popl	%edi
507	popl	%ebp
508	ret
509	.align	16
510.LL26:
511	movl	36(%esp), %esi	/ x,
512	xorl	%edi, %edi
513	movl	%esi, 24(%esp)	/ tmp1,
514	movl	%edi, 28(%esp)
515	xorl	%esi, %esi	/ <result>
516	xorl	%edi, %edi	/ <result>
517	cmpl	%eax, 24(%esp)	/ tmp61,
518	jb	.LL25
519	bsrl	%eax,%ebp	/ tmp61, normshift
520	movl	$31, %eax	/, tmp85
521	subl	%ebp, %eax	/ normshift, normshift
522	jne	.LL32
523	movl	24(%esp), %eax	/, x1
524	cmpl	%ecx, %eax	/ tmp62, x1
525	movl	56(%esp), %esi	/ y, y0
526	movl	32(%esp), %edx	/ x, x0
527	ja	.LL34
528	xorl	%eax, %eax	/ q0
529	cmpl	%esi, %edx	/ y0, x0
530	jb	.LL35
531.LL34:
532	movl	$1, %eax	/, q0
533.LL35:
534	movl	%eax, %esi	/ q0, <result>
535	xorl	%edi, %edi	/ <result>
536.LL45:
537	addl	$40, %esp
538	movl	%esi, %eax	/ <result>, <result>
539	popl	%esi
540	movl	%edi, %edx	/ <result>, <result>
541	popl	%edi
542	popl	%ebp
543	ret
544	.align	16
545.LL32:
546	movb	%al, %cl
547	movl	56(%esp), %esi	/ y,
548	movl	60(%esp), %edi	/ y,
549	shldl	%esi, %edi
550	sall	%cl, %esi
551	andl	$32, %ecx
552	jne	.LL43
553.LL40:
554	movl	$32, %ecx	/, tmp96
555	subl	%eax, %ecx	/ normshift, tmp96
556	movl	%edi, %edx
557	movl	%edi, 20(%esp)	/, dt
558	movl	24(%esp), %ebp	/, x2
559	xorl	%edi, %edi
560	shrl	%cl, %ebp	/ tmp96, x2
561	movl	%esi, 16(%esp)	/, dt
562	movb	%al, %cl
563	movl	32(%esp), %esi	/ x, dt
564	movl	%edi, 12(%esp)
565	movl	36(%esp), %edi	/ x, dt
566	shldl	%esi, %edi	/, dt, dt
567	sall	%cl, %esi	/, dt
568	andl	$32, %ecx
569	movl	%edx, 8(%esp)
570	je	.LL41
571	movl	%esi, %edi	/ dt, dt
572	xorl	%esi, %esi	/ dt
573.LL41:
574	xorl	%ecx, %ecx
575	movl	%edi, %eax	/ tmp1,
576	movl	%ebp, %edx	/ x2,
577	divl	8(%esp)
578	movl	%edx, %ebp	/, x1
579	movl	%ecx, 4(%esp)
580	movl	%eax, %ecx	/, q0
581	movl	16(%esp), %eax	/ dt,
582	mull	%ecx	/ q0
583	cmpl	%ebp, %edx	/ x1, t1
584	movl	%edi, (%esp)
585	movl	%esi, %edi	/ dt, x0
586	ja	.LL38
587	je	.LL44
588.LL39:
589	movl	%ecx, %esi	/ q0, <result>
590.LL46:
591	xorl	%edi, %edi	/ <result>
592	jmp	.LL45
593.LL44:
594	cmpl	%edi, %eax	/ x0, t0
595	jbe	.LL39
596.LL38:
597	decl	%ecx		/ q0
598	movl	%ecx, %esi	/ q0, <result>
599	jmp	.LL46
600.LL43:
601	movl	%esi, %edi
602	xorl	%esi, %esi
603	jmp	.LL40
604	SET_SIZE(UDiv)
605
606/*
607 * __udiv64
608 *
609 * Perform division of two unsigned 64-bit quantities, returning the
610 * quotient in %edx:%eax.  __udiv64 pops the arguments on return,
611 */
612	ENTRY(__udiv64)
613	movl	4(%esp), %eax	/ x, x
614	movl	8(%esp), %edx	/ x, x
615	pushl	16(%esp)	/ y
616	pushl	16(%esp)
617	call	UDiv
618	addl	$8, %esp
619	ret     $16
620	SET_SIZE(__udiv64)
621
622/*
623 * __urem64
624 *
625 * Perform division of two unsigned 64-bit quantities, returning the
626 * remainder in %edx:%eax.  __urem64 pops the arguments on return
627 */
628	ENTRY(__urem64)
629	subl	$12, %esp
630	movl	%esp, %ecx	/, tmp65
631	movl	16(%esp), %eax	/ x, x
632	movl	20(%esp), %edx	/ x, x
633	pushl	%ecx		/ tmp65
634	pushl	32(%esp)	/ y
635	pushl	32(%esp)
636	call	UDivRem
637	movl	12(%esp), %eax	/ rem, rem
638	movl	16(%esp), %edx	/ rem, rem
639	addl	$24, %esp
640	ret	$16
641	SET_SIZE(__urem64)
642
643/*
644 * __div64
645 *
646 * Perform division of two signed 64-bit quantities, returning the
647 * quotient in %edx:%eax.  __div64 pops the arguments on return.
648 */
649/ int64_t
650/ __div64(int64_t x, int64_t y)
651/ {
652/ 	int		negative;
653/ 	uint64_t	xt, yt, r;
654/
655/ 	if (x < 0) {
656/ 		xt = -(uint64_t) x;
657/ 		negative = 1;
658/ 	} else {
659/ 		xt = x;
660/ 		negative = 0;
661/ 	}
662/ 	if (y < 0) {
663/ 		yt = -(uint64_t) y;
664/ 		negative ^= 1;
665/ 	} else {
666/ 		yt = y;
667/ 	}
668/ 	r = UDiv(xt, yt);
669/ 	return (negative ? (int64_t) - r : r);
670/ }
671	ENTRY(__div64)
672	pushl	%ebp
673	pushl	%edi
674	pushl	%esi
675	subl	$8, %esp
676	movl	28(%esp), %edx	/ x, x
677	testl	%edx, %edx	/ x
678	movl	24(%esp), %eax	/ x, x
679	movl	32(%esp), %esi	/ y, y
680	movl	36(%esp), %edi	/ y, y
681	js	.LL84
682	xorl	%ebp, %ebp	/ negative
683	testl	%edi, %edi	/ y
684	movl	%eax, (%esp)	/ x, xt
685	movl	%edx, 4(%esp)	/ x, xt
686	movl	%esi, %eax	/ y, yt
687	movl	%edi, %edx	/ y, yt
688	js	.LL85
689.LL82:
690	pushl	%edx		/ yt
691	pushl	%eax		/ yt
692	movl	8(%esp), %eax	/ xt, xt
693	movl	12(%esp), %edx	/ xt, xt
694	call	UDiv
695	popl	%ecx
696	testl	%ebp, %ebp	/ negative
697	popl	%esi
698	je	.LL83
699	negl	%eax		/ r
700	adcl	$0, %edx	/, r
701	negl	%edx		/ r
702.LL83:
703	addl	$8, %esp
704	popl	%esi
705	popl	%edi
706	popl	%ebp
707	ret	$16
708	.align	16
709.LL84:
710	negl	%eax		/ x
711	adcl	$0, %edx	/, x
712	negl	%edx		/ x
713	testl	%edi, %edi	/ y
714	movl	%eax, (%esp)	/ x, xt
715	movl	%edx, 4(%esp)	/ x, xt
716	movl	$1, %ebp	/, negative
717	movl	%esi, %eax	/ y, yt
718	movl	%edi, %edx	/ y, yt
719	jns	.LL82
720	.align	16
721.LL85:
722	negl	%eax		/ yt
723	adcl	$0, %edx	/, yt
724	negl	%edx		/ yt
725	xorl	$1, %ebp	/, negative
726	jmp	.LL82
727	SET_SIZE(__div64)
728
729/*
730 * __rem64
731 *
732 * Perform division of two signed 64-bit quantities, returning the
733 * remainder in %edx:%eax.  __rem64 pops the arguments on return.
734 */
735/ int64_t
736/ __rem64(int64_t x, int64_t y)
737/ {
738/ 	uint64_t	xt, yt, rem;
739/
740/ 	if (x < 0) {
741/ 		xt = -(uint64_t) x;
742/ 	} else {
743/ 		xt = x;
744/ 	}
745/ 	if (y < 0) {
746/ 		yt = -(uint64_t) y;
747/ 	} else {
748/ 		yt = y;
749/ 	}
750/ 	(void) UDivRem(xt, yt, &rem);
751/ 	return (x < 0 ? (int64_t) - rem : rem);
752/ }
753	ENTRY(__rem64)
754	pushl	%edi
755	pushl	%esi
756	subl	$20, %esp
757	movl	36(%esp), %ecx	/ x,
758	movl	32(%esp), %esi	/ x,
759	movl	36(%esp), %edi	/ x,
760	testl	%ecx, %ecx
761	movl	40(%esp), %eax	/ y, y
762	movl	44(%esp), %edx	/ y, y
763	movl	%esi, (%esp)	/, xt
764	movl	%edi, 4(%esp)	/, xt
765	js	.LL92
766	testl	%edx, %edx	/ y
767	movl	%eax, %esi	/ y, yt
768	movl	%edx, %edi	/ y, yt
769	js	.LL93
770.LL90:
771	leal	8(%esp), %eax	/, tmp66
772	pushl	%eax		/ tmp66
773	pushl	%edi		/ yt
774	pushl	%esi		/ yt
775	movl	12(%esp), %eax	/ xt, xt
776	movl	16(%esp), %edx	/ xt, xt
777	call	UDivRem
778	addl	$12, %esp
779	movl	36(%esp), %edi	/ x,
780	testl	%edi, %edi
781	movl	8(%esp), %eax	/ rem, rem
782	movl	12(%esp), %edx	/ rem, rem
783	js	.LL94
784	addl	$20, %esp
785	popl	%esi
786	popl	%edi
787	ret	$16
788	.align	16
789.LL92:
790	negl	%esi
791	adcl	$0, %edi
792	negl	%edi
793	testl	%edx, %edx	/ y
794	movl	%esi, (%esp)	/, xt
795	movl	%edi, 4(%esp)	/, xt
796	movl	%eax, %esi	/ y, yt
797	movl	%edx, %edi	/ y, yt
798	jns	.LL90
799	.align	16
800.LL93:
801	negl	%esi		/ yt
802	adcl	$0, %edi	/, yt
803	negl	%edi		/ yt
804	jmp	.LL90
805	.align	16
806.LL94:
807	negl	%eax		/ rem
808	adcl	$0, %edx	/, rem
809	addl	$20, %esp
810	popl	%esi
811	negl	%edx		/ rem
812	popl	%edi
813	ret	$16
814	SET_SIZE(__rem64)
815
816/*
817 * __udivrem64
818 *
819 * Perform division of two unsigned 64-bit quantities, returning the
820 * quotient in %edx:%eax, and the remainder in %ecx:%esi.  __udivrem64
821 * pops the arguments on return.
822 */
823	ENTRY(__udivrem64)
824	subl	$12, %esp
825	movl	%esp, %ecx	/, tmp64
826	movl	16(%esp), %eax	/ x, x
827	movl	20(%esp), %edx	/ x, x
828	pushl	%ecx		/ tmp64
829	pushl	32(%esp)	/ y
830	pushl	32(%esp)
831	call	UDivRem
832	movl	16(%esp), %ecx	/ rem, tmp63
833	movl	12(%esp), %esi	/ rem
834	addl	$24, %esp
835	ret	$16
836	SET_SIZE(__udivrem64)
837
838/*
839 * Signed division with remainder.
840 */
841/ int64_t
842/ SDivRem(int64_t x, int64_t y, int64_t * pmod)
843/ {
844/ 	int		negative;
845/ 	uint64_t	xt, yt, r, rem;
846/
847/ 	if (x < 0) {
848/ 		xt = -(uint64_t) x;
849/ 		negative = 1;
850/ 	} else {
851/ 		xt = x;
852/ 		negative = 0;
853/ 	}
854/ 	if (y < 0) {
855/ 		yt = -(uint64_t) y;
856/ 		negative ^= 1;
857/ 	} else {
858/ 		yt = y;
859/ 	}
860/ 	r = UDivRem(xt, yt, &rem);
861/ 	*pmod = (x < 0 ? (int64_t) - rem : rem);
862/ 	return (negative ? (int64_t) - r : r);
863/ }
864	ENTRY(SDivRem)
865	pushl	%ebp
866	pushl	%edi
867	pushl	%esi
868	subl	$24, %esp
869	testl	%edx, %edx	/ x
870	movl	%edx, %edi	/ x, x
871	js	.LL73
872	movl	44(%esp), %esi	/ y,
873	xorl	%ebp, %ebp	/ negative
874	testl	%esi, %esi
875	movl	%edx, 12(%esp)	/ x, xt
876	movl	%eax, 8(%esp)	/ x, xt
877	movl	40(%esp), %edx	/ y, yt
878	movl	44(%esp), %ecx	/ y, yt
879	js	.LL74
880.LL70:
881	leal	16(%esp), %eax	/, tmp70
882	pushl	%eax		/ tmp70
883	pushl	%ecx		/ yt
884	pushl	%edx		/ yt
885	movl	20(%esp), %eax	/ xt, xt
886	movl	24(%esp), %edx	/ xt, xt
887	call	UDivRem
888	movl	%edx, 16(%esp)	/, r
889	movl	%eax, 12(%esp)	/, r
890	addl	$12, %esp
891	testl	%edi, %edi	/ x
892	movl	16(%esp), %edx	/ rem, rem
893	movl	20(%esp), %ecx	/ rem, rem
894	js	.LL75
895.LL71:
896	movl	48(%esp), %edi	/ pmod, pmod
897	testl	%ebp, %ebp	/ negative
898	movl	%edx, (%edi)	/ rem,* pmod
899	movl	%ecx, 4(%edi)	/ rem,
900	movl	(%esp), %eax	/ r, r
901	movl	4(%esp), %edx	/ r, r
902	je	.LL72
903	negl	%eax		/ r
904	adcl	$0, %edx	/, r
905	negl	%edx		/ r
906.LL72:
907	addl	$24, %esp
908	popl	%esi
909	popl	%edi
910	popl	%ebp
911	ret
912	.align	16
913.LL73:
914	negl	%eax
915	adcl	$0, %edx
916	movl	44(%esp), %esi	/ y,
917	negl	%edx
918	testl	%esi, %esi
919	movl	%edx, 12(%esp)	/, xt
920	movl	%eax, 8(%esp)	/, xt
921	movl	$1, %ebp	/, negative
922	movl	40(%esp), %edx	/ y, yt
923	movl	44(%esp), %ecx	/ y, yt
924	jns	.LL70
925	.align	16
926.LL74:
927	negl	%edx		/ yt
928	adcl	$0, %ecx	/, yt
929	negl	%ecx		/ yt
930	xorl	$1, %ebp	/, negative
931	jmp	.LL70
932	.align	16
933.LL75:
934	negl	%edx		/ rem
935	adcl	$0, %ecx	/, rem
936	negl	%ecx		/ rem
937	jmp	.LL71
938	SET_SIZE(SDivRem)
939
940/*
941 * __divrem64
942 *
943 * Perform division of two signed 64-bit quantities, returning the
944 * quotient in %edx:%eax, and the remainder in %ecx:%esi.  __divrem64
945 * pops the arguments on return.
946 */
947	ENTRY(__divrem64)
948	subl	$20, %esp
949	movl	%esp, %ecx	/, tmp64
950	movl	24(%esp), %eax	/ x, x
951	movl	28(%esp), %edx	/ x, x
952	pushl	%ecx		/ tmp64
953	pushl	40(%esp)	/ y
954	pushl	40(%esp)
955	call	SDivRem
956	movl	16(%esp), %ecx
957	movl	12(%esp),%esi	/ rem
958	addl	$32, %esp
959	ret	$16
960	SET_SIZE(__divrem64)
961