xref: /titanic_41/usr/src/lib/libc/i386/gen/_div64.s (revision 4d0e50075058332ce0cd62bc2669a8a4dea45da0)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26	.file	"_div64.s"
27
28#include "SYS.h"
29
30/*
31 * C support for 64-bit modulo and division.
32 * Hand-customized compiler output - see comments for details.
33 */
34
35/*
36 * int32_t/int64_t division/manipulation
37 *
38 * Hand-customized compiler output: the non-GCC entry points depart from
39 * the SYS V ABI by requiring their arguments to be popped, and in the
40 * [u]divrem64 cases returning the remainder in %ecx:%esi. Note the
41 * compiler-generated use of %edx:%eax for the first argument of
42 * internal entry points.
43 *
44 * Inlines for speed:
45 * - counting the number of leading zeros in a word
46 * - multiplying two 32-bit numbers giving a 64-bit result
47 * - dividing a 64-bit number by a 32-bit number, giving both quotient
48 *	and remainder
49 * - subtracting two 64-bit results
50 */
51/ #define	LO(X)		((uint32_t)(X) & 0xffffffff)
52/ #define	HI(X)		((uint32_t)((X) >> 32) & 0xffffffff)
53/ #define	HILO(H, L)	(((uint64_t)(H) << 32) + (L))
54/
55/ /* give index of highest bit */
56/ #define	HIBIT(a, r) \
57/     asm("bsrl %1,%0": "=r"((uint32_t)(r)) : "g" (a))
58/
59/ /* multiply two uint32_ts resulting in a uint64_t */
60/ #define	A_MUL32(a, b, lo, hi) \
61/     asm("mull %2" \
62/ 	: "=a"((uint32_t)(lo)), "=d"((uint32_t)(hi)) : "g" (b), "0"(a))
63/
64/ /* divide a uint64_t by a uint32_t */
65/ #define	A_DIV32(lo, hi, b, q, r) \
66/     asm("divl %2" \
67/ 	: "=a"((uint32_t)(q)), "=d"((uint32_t)(r)) \
68/ 	: "g" (b), "0"((uint32_t)(lo)), "1"((uint32_t)hi))
69/
70/ /* subtract two uint64_ts (with borrow) */
71/ #define	A_SUB2(bl, bh, al, ah) \
72/     asm("subl %4,%0\n\tsbbl %5,%1" \
73/ 	: "=&r"((uint32_t)(al)), "=r"((uint32_t)(ah)) \
74/ 	: "0"((uint32_t)(al)), "1"((uint32_t)(ah)), "g"((uint32_t)(bl)), \
75/ 	"g"((uint32_t)(bh)))
76/
77/ /*
78/  * Unsigned division with remainder.
79/  * Divide two uint64_ts, and calculate remainder.
80/  */
81/ uint64_t
82/ UDivRem(uint64_t x, uint64_t y, uint64_t * pmod)
83/ {
84/ 	/* simple cases: y is a single uint32_t */
85/ 	if (HI(y) == 0) {
86/ 		uint32_t	div_hi, div_rem;
87/ 		uint32_t 	q0, q1;
88/
89/ 		/* calculate q1 */
90/ 		if (HI(x) < LO(y)) {
91/ 			/* result is a single uint32_t, use one division */
92/ 			q1 = 0;
93/ 			div_hi = HI(x);
94/ 		} else {
95/ 			/* result is a double uint32_t, use two divisions */
96/ 			A_DIV32(HI(x), 0, LO(y), q1, div_hi);
97/ 		}
98/
99/ 		/* calculate q0 and remainder */
100/ 		A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
101/
102/ 		/* return remainder */
103/ 		*pmod = div_rem;
104/
105/ 		/* return result */
106/ 		return (HILO(q1, q0));
107/
108/ 	} else if (HI(x) < HI(y)) {
109/ 		/* HI(x) < HI(y) => x < y => result is 0 */
110/
111/ 		/* return remainder */
112/ 		*pmod = x;
113/
114/ 		/* return result */
115/ 		return (0);
116/
117/ 	} else {
118/ 		/*
119/ 		 * uint64_t by uint64_t division, resulting in a one-uint32_t
120/ 		 * result
121/ 		 */
122/ 		uint32_t		y0, y1;
123/ 		uint32_t		x1, x0;
124/ 		uint32_t		q0;
125/ 		uint32_t		normshift;
126/
127/ 		/* normalize by shifting x and y so MSB(y) == 1 */
128/ 		HIBIT(HI(y), normshift);	/* index of highest 1 bit */
129/ 		normshift = 31 - normshift;
130/
131/ 		if (normshift == 0) {
132/ 			/* no shifting needed, and x < 2*y so q <= 1 */
133/ 			y1 = HI(y);
134/ 			y0 = LO(y);
135/ 			x1 = HI(x);
136/ 			x0 = LO(x);
137/
138/ 			/* if x >= y then q = 1 (note x1 >= y1) */
139/ 			if (x1 > y1 || x0 >= y0) {
140/ 				q0 = 1;
141/ 				/* subtract y from x to get remainder */
142/ 				A_SUB2(y0, y1, x0, x1);
143/ 			} else {
144/ 				q0 = 0;
145/ 			}
146/
147/ 			/* return remainder */
148/ 			*pmod = HILO(x1, x0);
149/
150/ 			/* return result */
151/ 			return (q0);
152/
153/ 		} else {
154/ 			/*
155/ 			 * the last case: result is one uint32_t, but we need to
156/ 			 * normalize
157/ 			 */
158/ 			uint64_t	dt;
159/ 			uint32_t		t0, t1, x2;
160/
161/ 			/* normalize y */
162/ 			dt = (y << normshift);
163/ 			y1 = HI(dt);
164/ 			y0 = LO(dt);
165/
166/ 			/* normalize x (we need 3 uint32_ts!!!) */
167/ 			x2 = (HI(x) >> (32 - normshift));
168/ 			dt = (x << normshift);
169/ 			x1 = HI(dt);
170/ 			x0 = LO(dt);
171/
172/ 			/* estimate q0, and reduce x to a two uint32_t value */
173/ 			A_DIV32(x1, x2, y1, q0, x1);
174/
175/ 			/* adjust q0 down if too high */
176/ 			/*
177/ 			 * because of the limited range of x2 we can only be
178/ 			 * one off
179/ 			 */
180/ 			A_MUL32(y0, q0, t0, t1);
181/ 			if (t1 > x1 || (t1 == x1 && t0 > x0)) {
182/ 				q0--;
183/ 				A_SUB2(y0, y1, t0, t1);
184/ 			}
185/ 			/* return remainder */
186/ 			/* subtract product from x to get remainder */
187/ 			A_SUB2(t0, t1, x0, x1);
188/ 			*pmod = (HILO(x1, x0) >> normshift);
189/
190/ 			/* return result */
191/ 			return (q0);
192/ 		}
193/ 	}
194/ }
195	ENTRY(UDivRem)
196	pushl	%ebp
197	pushl	%edi
198	pushl	%esi
199	subl	$48, %esp
200	movl	68(%esp), %edi	/ y,
201	testl	%edi, %edi	/ tmp63
202	movl	%eax, 40(%esp)	/ x, x
203	movl	%edx, 44(%esp)	/ x, x
204	movl	%edi, %esi	/, tmp62
205	movl	%edi, %ecx	/ tmp62, tmp63
206	jne	.LL2
207	movl	%edx, %eax	/, tmp68
208	cmpl	64(%esp), %eax	/ y, tmp68
209	jae	.LL21
210.LL4:
211	movl	72(%esp), %ebp	/ pmod,
212	xorl	%esi, %esi	/ <result>
213	movl	40(%esp), %eax	/ x, q0
214	movl	%ecx, %edi	/ <result>, <result>
215	divl	64(%esp)	/ y
216	movl	%edx, (%ebp)	/ div_rem,
217	xorl	%edx, %edx	/ q0
218	addl	%eax, %esi	/ q0, <result>
219	movl	$0, 4(%ebp)
220	adcl	%edx, %edi	/ q0, <result>
221	addl	$48, %esp
222	movl	%esi, %eax	/ <result>, <result>
223	popl	%esi
224	movl	%edi, %edx	/ <result>, <result>
225	popl	%edi
226	popl	%ebp
227	ret
228	.align	16
229.LL2:
230	movl	44(%esp), %eax	/ x,
231	xorl	%edx, %edx
232	cmpl	%esi, %eax	/ tmp62, tmp5
233	movl	%eax, 32(%esp)	/ tmp5,
234	movl	%edx, 36(%esp)
235	jae	.LL6
236	movl	72(%esp), %esi	/ pmod,
237	movl	40(%esp), %ebp	/ x,
238	movl	44(%esp), %ecx	/ x,
239	movl	%ebp, (%esi)
240	movl	%ecx, 4(%esi)
241	xorl	%edi, %edi	/ <result>
242	xorl	%esi, %esi	/ <result>
243.LL22:
244	addl	$48, %esp
245	movl	%esi, %eax	/ <result>, <result>
246	popl	%esi
247	movl	%edi, %edx	/ <result>, <result>
248	popl	%edi
249	popl	%ebp
250	ret
251	.align	16
252.LL21:
253	movl	%edi, %edx	/ tmp63, div_hi
254	divl	64(%esp)	/ y
255	movl	%eax, %ecx	/, q1
256	jmp	.LL4
257	.align	16
258.LL6:
259	movl	$31, %edi	/, tmp87
260	bsrl	%esi,%edx	/ tmp62, normshift
261	subl	%edx, %edi	/ normshift, tmp87
262	movl	%edi, 28(%esp)	/ tmp87,
263	jne	.LL8
264	movl	32(%esp), %edx	/, x1
265	cmpl	%ecx, %edx	/ y1, x1
266	movl	64(%esp), %edi	/ y, y0
267	movl	40(%esp), %esi	/ x, x0
268	ja	.LL10
269	xorl	%ebp, %ebp	/ q0
270	cmpl	%edi, %esi	/ y0, x0
271	jb	.LL11
272.LL10:
273	movl	$1, %ebp	/, q0
274	subl	%edi,%esi	/ y0, x0
275	sbbl	%ecx,%edx	/ tmp63, x1
276.LL11:
277	movl	%edx, %ecx	/ x1, x1
278	xorl	%edx, %edx	/ x1
279	xorl	%edi, %edi	/ x0
280	addl	%esi, %edx	/ x0, x1
281	adcl	%edi, %ecx	/ x0, x1
282	movl	72(%esp), %esi	/ pmod,
283	movl	%edx, (%esi)	/ x1,
284	movl	%ecx, 4(%esi)	/ x1,
285	xorl	%edi, %edi	/ <result>
286	movl	%ebp, %esi	/ q0, <result>
287	jmp	.LL22
288	.align	16
289.LL8:
290	movb	28(%esp), %cl
291	movl	64(%esp), %esi	/ y, dt
292	movl	68(%esp), %edi	/ y, dt
293	shldl	%esi, %edi	/, dt, dt
294	sall	%cl, %esi	/, dt
295	andl	$32, %ecx
296	jne	.LL23
297.LL17:
298	movl	$32, %ecx	/, tmp102
299	subl	28(%esp), %ecx	/, tmp102
300	movl	%esi, %ebp	/ dt, y0
301	movl	32(%esp), %esi
302	shrl	%cl, %esi	/ tmp102,
303	movl	%edi, 24(%esp)	/ tmp99,
304	movb	28(%esp), %cl
305	movl	%esi, 12(%esp)	/, x2
306	movl	44(%esp), %edi	/ x, dt
307	movl	40(%esp), %esi	/ x, dt
308	shldl	%esi, %edi	/, dt, dt
309	sall	%cl, %esi	/, dt
310	andl	$32, %ecx
311	je	.LL18
312	movl	%esi, %edi	/ dt, dt
313	xorl	%esi, %esi	/ dt
314.LL18:
315	movl	%edi, %ecx	/ dt,
316	movl	%edi, %eax	/ tmp2,
317	movl	%ecx, (%esp)
318	movl	12(%esp), %edx	/ x2,
319	divl	24(%esp)
320	movl	%edx, %ecx	/, x1
321	xorl	%edi, %edi
322	movl	%eax, 20(%esp)
323	movl	%ebp, %eax	/ y0, t0
324	mull	20(%esp)
325	cmpl	%ecx, %edx	/ x1, t1
326	movl	%edi, 4(%esp)
327	ja	.LL14
328	je	.LL24
329.LL15:
330	movl	%ecx, %edi	/ x1,
331	subl	%eax,%esi	/ t0, x0
332	sbbl	%edx,%edi	/ t1,
333	movl	%edi, %eax	/, x1
334	movl	%eax, %edx	/ x1, x1
335	xorl	%eax, %eax	/ x1
336	xorl	%ebp, %ebp	/ x0
337	addl	%esi, %eax	/ x0, x1
338	adcl	%ebp, %edx	/ x0, x1
339	movb	28(%esp), %cl
340	shrdl	%edx, %eax	/, x1, x1
341	shrl	%cl, %edx	/, x1
342	andl	$32, %ecx
343	je	.LL16
344	movl	%edx, %eax	/ x1, x1
345	xorl	%edx, %edx	/ x1
346.LL16:
347	movl	72(%esp), %ecx	/ pmod,
348	movl	20(%esp), %esi	/, <result>
349	xorl	%edi, %edi	/ <result>
350	movl	%eax, (%ecx)	/ x1,
351	movl	%edx, 4(%ecx)	/ x1,
352	jmp	.LL22
353	.align	16
354.LL24:
355	cmpl	%esi, %eax	/ x0, t0
356	jbe	.LL15
357.LL14:
358	decl	20(%esp)
359	subl	%ebp,%eax	/ y0, t0
360	sbbl	24(%esp),%edx	/, t1
361	jmp	.LL15
362.LL23:
363	movl	%esi, %edi	/ dt, dt
364	xorl	%esi, %esi	/ dt
365	jmp	.LL17
366	SET_SIZE(UDivRem)
367
368/*
369 * Unsigned division without remainder.
370 */
371/ uint64_t
372/ UDiv(uint64_t x, uint64_t y)
373/ {
374/ 	if (HI(y) == 0) {
375/ 		/* simple cases: y is a single uint32_t */
376/ 		uint32_t	div_hi, div_rem;
377/ 		uint32_t	q0, q1;
378/
379/ 		/* calculate q1 */
380/ 		if (HI(x) < LO(y)) {
381/ 			/* result is a single uint32_t, use one division */
382/ 			q1 = 0;
383/ 			div_hi = HI(x);
384/ 		} else {
385/ 			/* result is a double uint32_t, use two divisions */
386/ 			A_DIV32(HI(x), 0, LO(y), q1, div_hi);
387/ 		}
388/
389/ 		/* calculate q0 and remainder */
390/ 		A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
391/
392/ 		/* return result */
393/ 		return (HILO(q1, q0));
394/
395/ 	} else if (HI(x) < HI(y)) {
396/ 		/* HI(x) < HI(y) => x < y => result is 0 */
397/
398/ 		/* return result */
399/ 		return (0);
400/
401/ 	} else {
402/ 		/*
403/ 		 * uint64_t by uint64_t division, resulting in a one-uint32_t
404/ 		 * result
405/ 		 */
406/ 		uint32_t		y0, y1;
407/ 		uint32_t		x1, x0;
408/ 		uint32_t		q0;
409/ 		unsigned		normshift;
410/
411/ 		/* normalize by shifting x and y so MSB(y) == 1 */
412/ 		HIBIT(HI(y), normshift);	/* index of highest 1 bit */
413/ 		normshift = 31 - normshift;
414/
415/ 		if (normshift == 0) {
416/ 			/* no shifting needed, and x < 2*y so q <= 1 */
417/ 			y1 = HI(y);
418/ 			y0 = LO(y);
419/ 			x1 = HI(x);
420/ 			x0 = LO(x);
421/
422/ 			/* if x >= y then q = 1 (note x1 >= y1) */
423/ 			if (x1 > y1 || x0 >= y0) {
424/ 				q0 = 1;
425/ 				/* subtract y from x to get remainder */
426/ 				/* A_SUB2(y0, y1, x0, x1); */
427/ 			} else {
428/ 				q0 = 0;
429/ 			}
430/
431/ 			/* return result */
432/ 			return (q0);
433/
434/ 		} else {
435/ 			/*
436/ 			 * the last case: result is one uint32_t, but we need to
437/ 			 * normalize
438/ 			 */
439/ 			uint64_t	dt;
440/ 			uint32_t		t0, t1, x2;
441/
442/ 			/* normalize y */
443/ 			dt = (y << normshift);
444/ 			y1 = HI(dt);
445/ 			y0 = LO(dt);
446/
447/ 			/* normalize x (we need 3 uint32_ts!!!) */
448/ 			x2 = (HI(x) >> (32 - normshift));
449/ 			dt = (x << normshift);
450/ 			x1 = HI(dt);
451/ 			x0 = LO(dt);
452/
453/ 			/* estimate q0, and reduce x to a two uint32_t value */
454/ 			A_DIV32(x1, x2, y1, q0, x1);
455/
456/ 			/* adjust q0 down if too high */
457/ 			/*
458/ 			 * because of the limited range of x2 we can only be
459/ 			 * one off
460/ 			 */
461/ 			A_MUL32(y0, q0, t0, t1);
462/ 			if (t1 > x1 || (t1 == x1 && t0 > x0)) {
463/ 				q0--;
464/ 			}
465/ 			/* return result */
466/ 			return (q0);
467/ 		}
468/ 	}
469/ }
470	ENTRY(UDiv)
471	pushl	%ebp
472	pushl	%edi
473	pushl	%esi
474	subl	$40, %esp
475	movl	%edx, 36(%esp)	/ x, x
476	movl	60(%esp), %edx	/ y,
477	testl	%edx, %edx	/ tmp62
478	movl	%eax, 32(%esp)	/ x, x
479	movl	%edx, %ecx	/ tmp61, tmp62
480	movl	%edx, %eax	/, tmp61
481	jne	.LL26
482	movl	36(%esp), %esi	/ x,
483	cmpl	56(%esp), %esi	/ y, tmp67
484	movl	%esi, %eax	/, tmp67
485	movl	%esi, %edx	/ tmp67, div_hi
486	jb	.LL28
487	movl	%ecx, %edx	/ tmp62, div_hi
488	divl	56(%esp)	/ y
489	movl	%eax, %ecx	/, q1
490.LL28:
491	xorl	%esi, %esi	/ <result>
492	movl	%ecx, %edi	/ <result>, <result>
493	movl	32(%esp), %eax	/ x, q0
494	xorl	%ecx, %ecx	/ q0
495	divl	56(%esp)	/ y
496	addl	%eax, %esi	/ q0, <result>
497	adcl	%ecx, %edi	/ q0, <result>
498.LL25:
499	addl	$40, %esp
500	movl	%esi, %eax	/ <result>, <result>
501	popl	%esi
502	movl	%edi, %edx	/ <result>, <result>
503	popl	%edi
504	popl	%ebp
505	ret
506	.align	16
507.LL26:
508	movl	36(%esp), %esi	/ x,
509	xorl	%edi, %edi
510	movl	%esi, 24(%esp)	/ tmp1,
511	movl	%edi, 28(%esp)
512	xorl	%esi, %esi	/ <result>
513	xorl	%edi, %edi	/ <result>
514	cmpl	%eax, 24(%esp)	/ tmp61,
515	jb	.LL25
516	bsrl	%eax,%ebp	/ tmp61, normshift
517	movl	$31, %eax	/, tmp85
518	subl	%ebp, %eax	/ normshift, normshift
519	jne	.LL32
520	movl	24(%esp), %eax	/, x1
521	cmpl	%ecx, %eax	/ tmp62, x1
522	movl	56(%esp), %esi	/ y, y0
523	movl	32(%esp), %edx	/ x, x0
524	ja	.LL34
525	xorl	%eax, %eax	/ q0
526	cmpl	%esi, %edx	/ y0, x0
527	jb	.LL35
528.LL34:
529	movl	$1, %eax	/, q0
530.LL35:
531	movl	%eax, %esi	/ q0, <result>
532	xorl	%edi, %edi	/ <result>
533.LL45:
534	addl	$40, %esp
535	movl	%esi, %eax	/ <result>, <result>
536	popl	%esi
537	movl	%edi, %edx	/ <result>, <result>
538	popl	%edi
539	popl	%ebp
540	ret
541	.align	16
542.LL32:
543	movb	%al, %cl
544	movl	56(%esp), %esi	/ y,
545	movl	60(%esp), %edi	/ y,
546	shldl	%esi, %edi
547	sall	%cl, %esi
548	andl	$32, %ecx
549	jne	.LL43
550.LL40:
551	movl	$32, %ecx	/, tmp96
552	subl	%eax, %ecx	/ normshift, tmp96
553	movl	%edi, %edx
554	movl	%edi, 20(%esp)	/, dt
555	movl	24(%esp), %ebp	/, x2
556	xorl	%edi, %edi
557	shrl	%cl, %ebp	/ tmp96, x2
558	movl	%esi, 16(%esp)	/, dt
559	movb	%al, %cl
560	movl	32(%esp), %esi	/ x, dt
561	movl	%edi, 12(%esp)
562	movl	36(%esp), %edi	/ x, dt
563	shldl	%esi, %edi	/, dt, dt
564	sall	%cl, %esi	/, dt
565	andl	$32, %ecx
566	movl	%edx, 8(%esp)
567	je	.LL41
568	movl	%esi, %edi	/ dt, dt
569	xorl	%esi, %esi	/ dt
570.LL41:
571	xorl	%ecx, %ecx
572	movl	%edi, %eax	/ tmp1,
573	movl	%ebp, %edx	/ x2,
574	divl	8(%esp)
575	movl	%edx, %ebp	/, x1
576	movl	%ecx, 4(%esp)
577	movl	%eax, %ecx	/, q0
578	movl	16(%esp), %eax	/ dt,
579	mull	%ecx	/ q0
580	cmpl	%ebp, %edx	/ x1, t1
581	movl	%edi, (%esp)
582	movl	%esi, %edi	/ dt, x0
583	ja	.LL38
584	je	.LL44
585.LL39:
586	movl	%ecx, %esi	/ q0, <result>
587.LL46:
588	xorl	%edi, %edi	/ <result>
589	jmp	.LL45
590.LL44:
591	cmpl	%edi, %eax	/ x0, t0
592	jbe	.LL39
593.LL38:
594	decl	%ecx		/ q0
595	movl	%ecx, %esi	/ q0, <result>
596	jmp	.LL46
597.LL43:
598	movl	%esi, %edi
599	xorl	%esi, %esi
600	jmp	.LL40
601	SET_SIZE(UDiv)
602
603/*
604 * __udiv64
605 *
606 * Perform division of two unsigned 64-bit quantities, returning the
607 * quotient in %edx:%eax.  __udiv64 pops the arguments on return,
608 */
609	ENTRY(__udiv64)
610	movl	4(%esp), %eax	/ x, x
611	movl	8(%esp), %edx	/ x, x
612	pushl	16(%esp)	/ y
613	pushl	16(%esp)
614	call	UDiv
615	addl	$8, %esp
616	ret     $16
617	SET_SIZE(__udiv64)
618
619/*
620 * __urem64
621 *
622 * Perform division of two unsigned 64-bit quantities, returning the
623 * remainder in %edx:%eax.  __urem64 pops the arguments on return
624 */
625	ENTRY(__urem64)
626	subl	$12, %esp
627	movl	%esp, %ecx	/, tmp65
628	movl	16(%esp), %eax	/ x, x
629	movl	20(%esp), %edx	/ x, x
630	pushl	%ecx		/ tmp65
631	pushl	32(%esp)	/ y
632	pushl	32(%esp)
633	call	UDivRem
634	movl	12(%esp), %eax	/ rem, rem
635	movl	16(%esp), %edx	/ rem, rem
636	addl	$24, %esp
637	ret	$16
638	SET_SIZE(__urem64)
639
640/*
641 * __div64
642 *
643 * Perform division of two signed 64-bit quantities, returning the
644 * quotient in %edx:%eax.  __div64 pops the arguments on return.
645 */
646/ int64_t
647/ __div64(int64_t x, int64_t y)
648/ {
649/ 	int		negative;
650/ 	uint64_t	xt, yt, r;
651/
652/ 	if (x < 0) {
653/ 		xt = -(uint64_t) x;
654/ 		negative = 1;
655/ 	} else {
656/ 		xt = x;
657/ 		negative = 0;
658/ 	}
659/ 	if (y < 0) {
660/ 		yt = -(uint64_t) y;
661/ 		negative ^= 1;
662/ 	} else {
663/ 		yt = y;
664/ 	}
665/ 	r = UDiv(xt, yt);
666/ 	return (negative ? (int64_t) - r : r);
667/ }
668	ENTRY(__div64)
669	pushl	%ebp
670	pushl	%edi
671	pushl	%esi
672	subl	$8, %esp
673	movl	28(%esp), %edx	/ x, x
674	testl	%edx, %edx	/ x
675	movl	24(%esp), %eax	/ x, x
676	movl	32(%esp), %esi	/ y, y
677	movl	36(%esp), %edi	/ y, y
678	js	.LL84
679	xorl	%ebp, %ebp	/ negative
680	testl	%edi, %edi	/ y
681	movl	%eax, (%esp)	/ x, xt
682	movl	%edx, 4(%esp)	/ x, xt
683	movl	%esi, %eax	/ y, yt
684	movl	%edi, %edx	/ y, yt
685	js	.LL85
686.LL82:
687	pushl	%edx		/ yt
688	pushl	%eax		/ yt
689	movl	8(%esp), %eax	/ xt, xt
690	movl	12(%esp), %edx	/ xt, xt
691	call	UDiv
692	popl	%ecx
693	testl	%ebp, %ebp	/ negative
694	popl	%esi
695	je	.LL83
696	negl	%eax		/ r
697	adcl	$0, %edx	/, r
698	negl	%edx		/ r
699.LL83:
700	addl	$8, %esp
701	popl	%esi
702	popl	%edi
703	popl	%ebp
704	ret	$16
705	.align	16
706.LL84:
707	negl	%eax		/ x
708	adcl	$0, %edx	/, x
709	negl	%edx		/ x
710	testl	%edi, %edi	/ y
711	movl	%eax, (%esp)	/ x, xt
712	movl	%edx, 4(%esp)	/ x, xt
713	movl	$1, %ebp	/, negative
714	movl	%esi, %eax	/ y, yt
715	movl	%edi, %edx	/ y, yt
716	jns	.LL82
717	.align	16
718.LL85:
719	negl	%eax		/ yt
720	adcl	$0, %edx	/, yt
721	negl	%edx		/ yt
722	xorl	$1, %ebp	/, negative
723	jmp	.LL82
724	SET_SIZE(__div64)
725
726/*
727 * __rem64
728 *
729 * Perform division of two signed 64-bit quantities, returning the
730 * remainder in %edx:%eax.  __rem64 pops the arguments on return.
731 */
732/ int64_t
733/ __rem64(int64_t x, int64_t y)
734/ {
735/ 	uint64_t	xt, yt, rem;
736/
737/ 	if (x < 0) {
738/ 		xt = -(uint64_t) x;
739/ 	} else {
740/ 		xt = x;
741/ 	}
742/ 	if (y < 0) {
743/ 		yt = -(uint64_t) y;
744/ 	} else {
745/ 		yt = y;
746/ 	}
747/ 	(void) UDivRem(xt, yt, &rem);
748/ 	return (x < 0 ? (int64_t) - rem : rem);
749/ }
750	ENTRY(__rem64)
751	pushl	%edi
752	pushl	%esi
753	subl	$20, %esp
754	movl	36(%esp), %ecx	/ x,
755	movl	32(%esp), %esi	/ x,
756	movl	36(%esp), %edi	/ x,
757	testl	%ecx, %ecx
758	movl	40(%esp), %eax	/ y, y
759	movl	44(%esp), %edx	/ y, y
760	movl	%esi, (%esp)	/, xt
761	movl	%edi, 4(%esp)	/, xt
762	js	.LL92
763	testl	%edx, %edx	/ y
764	movl	%eax, %esi	/ y, yt
765	movl	%edx, %edi	/ y, yt
766	js	.LL93
767.LL90:
768	leal	8(%esp), %eax	/, tmp66
769	pushl	%eax		/ tmp66
770	pushl	%edi		/ yt
771	pushl	%esi		/ yt
772	movl	12(%esp), %eax	/ xt, xt
773	movl	16(%esp), %edx	/ xt, xt
774	call	UDivRem
775	addl	$12, %esp
776	movl	36(%esp), %edi	/ x,
777	testl	%edi, %edi
778	movl	8(%esp), %eax	/ rem, rem
779	movl	12(%esp), %edx	/ rem, rem
780	js	.LL94
781	addl	$20, %esp
782	popl	%esi
783	popl	%edi
784	ret	$16
785	.align	16
786.LL92:
787	negl	%esi
788	adcl	$0, %edi
789	negl	%edi
790	testl	%edx, %edx	/ y
791	movl	%esi, (%esp)	/, xt
792	movl	%edi, 4(%esp)	/, xt
793	movl	%eax, %esi	/ y, yt
794	movl	%edx, %edi	/ y, yt
795	jns	.LL90
796	.align	16
797.LL93:
798	negl	%esi		/ yt
799	adcl	$0, %edi	/, yt
800	negl	%edi		/ yt
801	jmp	.LL90
802	.align	16
803.LL94:
804	negl	%eax		/ rem
805	adcl	$0, %edx	/, rem
806	addl	$20, %esp
807	popl	%esi
808	negl	%edx		/ rem
809	popl	%edi
810	ret	$16
811	SET_SIZE(__rem64)
812
813/*
814 * __udivrem64
815 *
816 * Perform division of two unsigned 64-bit quantities, returning the
817 * quotient in %edx:%eax, and the remainder in %ecx:%esi.  __udivrem64
818 * pops the arguments on return.
819 */
820	ENTRY(__udivrem64)
821	subl	$12, %esp
822	movl	%esp, %ecx	/, tmp64
823	movl	16(%esp), %eax	/ x, x
824	movl	20(%esp), %edx	/ x, x
825	pushl	%ecx		/ tmp64
826	pushl	32(%esp)	/ y
827	pushl	32(%esp)
828	call	UDivRem
829	movl	16(%esp), %ecx	/ rem, tmp63
830	movl	12(%esp), %esi	/ rem
831	addl	$24, %esp
832	ret	$16
833	SET_SIZE(__udivrem64)
834
835/*
836 * Signed division with remainder.
837 */
838/ int64_t
839/ SDivRem(int64_t x, int64_t y, int64_t * pmod)
840/ {
841/ 	int		negative;
842/ 	uint64_t	xt, yt, r, rem;
843/
844/ 	if (x < 0) {
845/ 		xt = -(uint64_t) x;
846/ 		negative = 1;
847/ 	} else {
848/ 		xt = x;
849/ 		negative = 0;
850/ 	}
851/ 	if (y < 0) {
852/ 		yt = -(uint64_t) y;
853/ 		negative ^= 1;
854/ 	} else {
855/ 		yt = y;
856/ 	}
857/ 	r = UDivRem(xt, yt, &rem);
858/ 	*pmod = (x < 0 ? (int64_t) - rem : rem);
859/ 	return (negative ? (int64_t) - r : r);
860/ }
861	ENTRY(SDivRem)
862	pushl	%ebp
863	pushl	%edi
864	pushl	%esi
865	subl	$24, %esp
866	testl	%edx, %edx	/ x
867	movl	%edx, %edi	/ x, x
868	js	.LL73
869	movl	44(%esp), %esi	/ y,
870	xorl	%ebp, %ebp	/ negative
871	testl	%esi, %esi
872	movl	%edx, 12(%esp)	/ x, xt
873	movl	%eax, 8(%esp)	/ x, xt
874	movl	40(%esp), %edx	/ y, yt
875	movl	44(%esp), %ecx	/ y, yt
876	js	.LL74
877.LL70:
878	leal	16(%esp), %eax	/, tmp70
879	pushl	%eax		/ tmp70
880	pushl	%ecx		/ yt
881	pushl	%edx		/ yt
882	movl	20(%esp), %eax	/ xt, xt
883	movl	24(%esp), %edx	/ xt, xt
884	call	UDivRem
885	movl	%edx, 16(%esp)	/, r
886	movl	%eax, 12(%esp)	/, r
887	addl	$12, %esp
888	testl	%edi, %edi	/ x
889	movl	16(%esp), %edx	/ rem, rem
890	movl	20(%esp), %ecx	/ rem, rem
891	js	.LL75
892.LL71:
893	movl	48(%esp), %edi	/ pmod, pmod
894	testl	%ebp, %ebp	/ negative
895	movl	%edx, (%edi)	/ rem,* pmod
896	movl	%ecx, 4(%edi)	/ rem,
897	movl	(%esp), %eax	/ r, r
898	movl	4(%esp), %edx	/ r, r
899	je	.LL72
900	negl	%eax		/ r
901	adcl	$0, %edx	/, r
902	negl	%edx		/ r
903.LL72:
904	addl	$24, %esp
905	popl	%esi
906	popl	%edi
907	popl	%ebp
908	ret
909	.align	16
910.LL73:
911	negl	%eax
912	adcl	$0, %edx
913	movl	44(%esp), %esi	/ y,
914	negl	%edx
915	testl	%esi, %esi
916	movl	%edx, 12(%esp)	/, xt
917	movl	%eax, 8(%esp)	/, xt
918	movl	$1, %ebp	/, negative
919	movl	40(%esp), %edx	/ y, yt
920	movl	44(%esp), %ecx	/ y, yt
921	jns	.LL70
922	.align	16
923.LL74:
924	negl	%edx		/ yt
925	adcl	$0, %ecx	/, yt
926	negl	%ecx		/ yt
927	xorl	$1, %ebp	/, negative
928	jmp	.LL70
929	.align	16
930.LL75:
931	negl	%edx		/ rem
932	adcl	$0, %ecx	/, rem
933	negl	%ecx		/ rem
934	jmp	.LL71
935	SET_SIZE(SDivRem)
936
937/*
938 * __divrem64
939 *
940 * Perform division of two signed 64-bit quantities, returning the
941 * quotient in %edx:%eax, and the remainder in %ecx:%esi.  __divrem64
942 * pops the arguments on return.
943 */
944	ENTRY(__divrem64)
945	subl	$20, %esp
946	movl	%esp, %ecx	/, tmp64
947	movl	24(%esp), %eax	/ x, x
948	movl	28(%esp), %edx	/ x, x
949	pushl	%ecx		/ tmp64
950	pushl	40(%esp)	/ y
951	pushl	40(%esp)
952	call	SDivRem
953	movl	16(%esp), %ecx
954	movl	12(%esp),%esi	/ rem
955	addl	$32, %esp
956	ret	$16
957	SET_SIZE(__divrem64)
958