xref: /titanic_41/usr/src/common/util/i386/muldiv.s (revision fb9f9b975cb9214fec5dab37d461199adab9b964)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#if !defined(lint)
28	.ident	"%Z%%M%	%I%	%E% SMI"
29
30	.file	"muldiv.s"
31#endif
32
33#if defined(__i386) && !defined(__amd64)
34
35/*
36 * Helper routines for 32-bit compilers to perform 64-bit math.
37 * These are used both by the Sun and GCC compilers.
38 */
39
40#include <sys/asm_linkage.h>
41#include <sys/asm_misc.h>
42
43
44#if defined(__lint)
45#include <sys/types.h>
46
47/* ARGSUSED */
48int64_t
49__mul64(int64_t a, int64_t b)
50{
51	return (0);
52}
53
54#else   /* __lint */
55
56/
57/   function __mul64(A,B:Longint):Longint;
58/	{Overflow is not checked}
59/
60/ We essentially do multiply by longhand, using base 2**32 digits.
61/               a       b	parameter A
62/	     x 	c       d	parameter B
63/		---------
64/               ad      bd
65/       ac	bc
66/       -----------------
67/       ac	ad+bc	bd
68/
69/       We can ignore ac and top 32 bits of ad+bc: if <> 0, overflow happened.
70/
71	ENTRY(__mul64)
72	push	%ebp
73	mov    	%esp,%ebp
74	pushl	%esi
75	mov	12(%ebp),%eax	/ A.hi (a)
76	mull	16(%ebp)	/ Multiply A.hi by B.lo (produces ad)
77	xchg	%ecx,%eax	/ ecx = bottom half of ad.
78	movl    8(%ebp),%eax	/ A.Lo (b)
79	movl	%eax,%esi	/ Save A.lo for later
80	mull	16(%ebp)	/ Multiply A.Lo by B.LO (dx:ax = bd.)
81	addl	%edx,%ecx	/ cx is ad
82	xchg	%eax,%esi       / esi is bd, eax = A.lo (d)
83	mull	20(%ebp)	/ Multiply A.lo * B.hi (producing bc)
84	addl	%ecx,%eax	/ Produce ad+bc
85	movl	%esi,%edx
86	xchg	%eax,%edx
87	popl	%esi
88	movl	%ebp,%esp
89	popl	%ebp
90	ret     $16
91	SET_SIZE(__mul64)
92
93#endif	/* __lint */
94
95/*
96 * C support for 64-bit modulo and division.
97 * Hand-customized compiler output - see comments for details.
98 */
99#if defined(__lint)
100
101/* ARGSUSED */
102uint64_t
103__udiv64(uint64_t a, uint64_t b)
104{ return (0); }
105
106/* ARGSUSED */
107uint64_t
108__urem64(int64_t a, int64_t b)
109{ return (0); }
110
111/* ARGSUSED */
112int64_t
113__div64(int64_t a, int64_t b)
114{ return (0); }
115
116/* ARGSUSED */
117int64_t
118__rem64(int64_t a, int64_t b)
119{ return (0); }
120
121#else	/* __lint */
122
123/ /*
124/  * Unsigned division with remainder.
125/  * Divide two uint64_ts, and calculate remainder.
126/  */
127/ uint64_t
128/ UDivRem(uint64_t x, uint64_t y, uint64_t * pmod)
129/ {
130/ 	/* simple cases: y is a single uint32_t */
131/ 	if (HI(y) == 0) {
132/ 		uint32_t	div_hi, div_rem;
133/ 		uint32_t 	q0, q1;
134/
135/ 		/* calculate q1 */
136/ 		if (HI(x) < LO(y)) {
137/ 			/* result is a single uint32_t, use one division */
138/ 			q1 = 0;
139/ 			div_hi = HI(x);
140/ 		} else {
141/ 			/* result is a double uint32_t, use two divisions */
142/ 			A_DIV32(HI(x), 0, LO(y), q1, div_hi);
143/ 		}
144/
145/ 		/* calculate q0 and remainder */
146/ 		A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
147/
148/ 		/* return remainder */
149/ 		*pmod = div_rem;
150/
151/ 		/* return result */
152/ 		return (HILO(q1, q0));
153/
154/ 	} else if (HI(x) < HI(y)) {
155/ 		/* HI(x) < HI(y) => x < y => result is 0 */
156/
157/ 		/* return remainder */
158/ 		*pmod = x;
159/
160/ 		/* return result */
161/ 		return (0);
162/
163/ 	} else {
164/ 		/*
165/ 		 * uint64_t by uint64_t division, resulting in a one-uint32_t
166/ 		 * result
167/ 		 */
168/ 		uint32_t		y0, y1;
169/ 		uint32_t		x1, x0;
170/ 		uint32_t		q0;
171/ 		uint32_t		normshift;
172/
173/ 		/* normalize by shifting x and y so MSB(y) == 1 */
174/ 		HIBIT(HI(y), normshift);	/* index of highest 1 bit */
175/ 		normshift = 31 - normshift;
176/
177/ 		if (normshift == 0) {
178/ 			/* no shifting needed, and x < 2*y so q <= 1 */
179/ 			y1 = HI(y);
180/ 			y0 = LO(y);
181/ 			x1 = HI(x);
182/ 			x0 = LO(x);
183/
184/ 			/* if x >= y then q = 1 (note x1 >= y1) */
185/ 			if (x1 > y1 || x0 >= y0) {
186/ 				q0 = 1;
187/ 				/* subtract y from x to get remainder */
188/ 				A_SUB2(y0, y1, x0, x1);
189/ 			} else {
190/ 				q0 = 0;
191/ 			}
192/
193/ 			/* return remainder */
194/ 			*pmod = HILO(x1, x0);
195/
196/ 			/* return result */
197/ 			return (q0);
198/
199/ 		} else {
200/ 			/*
201/ 			 * the last case: result is one uint32_t, but we need to
202/ 			 * normalize
203/ 			 */
204/ 			uint64_t	dt;
205/ 			uint32_t		t0, t1, x2;
206/
207/ 			/* normalize y */
208/ 			dt = (y << normshift);
209/ 			y1 = HI(dt);
210/ 			y0 = LO(dt);
211/
212/ 			/* normalize x (we need 3 uint32_ts!!!) */
213/ 			x2 = (HI(x) >> (32 - normshift));
214/ 			dt = (x << normshift);
215/ 			x1 = HI(dt);
216/ 			x0 = LO(dt);
217/
218/ 			/* estimate q0, and reduce x to a two uint32_t value */
219/ 			A_DIV32(x1, x2, y1, q0, x1);
220/
221/ 			/* adjust q0 down if too high */
222/ 			/*
223/ 			 * because of the limited range of x2 we can only be
224/ 			 * one off
225/ 			 */
226/ 			A_MUL32(y0, q0, t0, t1);
227/ 			if (t1 > x1 || (t1 == x1 && t0 > x0)) {
228/ 				q0--;
229/ 				A_SUB2(y0, y1, t0, t1);
230/ 			}
231/ 			/* return remainder */
232/ 			/* subtract product from x to get remainder */
233/ 			A_SUB2(t0, t1, x0, x1);
234/ 			*pmod = (HILO(x1, x0) >> normshift);
235/
236/ 			/* return result */
237/ 			return (q0);
238/ 		}
239/ 	}
240/ }
241	ENTRY(UDivRem)
242	pushl	%ebp
243	pushl	%edi
244	pushl	%esi
245	subl	$48, %esp
246	movl	68(%esp), %edi	/ y,
247	testl	%edi, %edi	/ tmp63
248	movl	%eax, 40(%esp)	/ x, x
249	movl	%edx, 44(%esp)	/ x, x
250	movl	%edi, %esi	/, tmp62
251	movl	%edi, %ecx	/ tmp62, tmp63
252	jne	.LL2
253	movl	%edx, %eax	/, tmp68
254	cmpl	64(%esp), %eax	/ y, tmp68
255	jae	.LL21
256.LL4:
257	movl	72(%esp), %ebp	/ pmod,
258	xorl	%esi, %esi	/ <result>
259	movl	40(%esp), %eax	/ x, q0
260	movl	%ecx, %edi	/ <result>, <result>
261	divl	64(%esp)	/ y
262	movl	%edx, (%ebp)	/ div_rem,
263	xorl	%edx, %edx	/ q0
264	addl	%eax, %esi	/ q0, <result>
265	movl	$0, 4(%ebp)
266	adcl	%edx, %edi	/ q0, <result>
267	addl	$48, %esp
268	movl	%esi, %eax	/ <result>, <result>
269	popl	%esi
270	movl	%edi, %edx	/ <result>, <result>
271	popl	%edi
272	popl	%ebp
273	ret
274	.align	16
275.LL2:
276	movl	44(%esp), %eax	/ x,
277	xorl	%edx, %edx
278	cmpl	%esi, %eax	/ tmp62, tmp5
279	movl	%eax, 32(%esp)	/ tmp5,
280	movl	%edx, 36(%esp)
281	jae	.LL6
282	movl	72(%esp), %esi	/ pmod,
283	movl	40(%esp), %ebp	/ x,
284	movl	44(%esp), %ecx	/ x,
285	movl	%ebp, (%esi)
286	movl	%ecx, 4(%esi)
287	xorl	%edi, %edi	/ <result>
288	xorl	%esi, %esi	/ <result>
289.LL22:
290	addl	$48, %esp
291	movl	%esi, %eax	/ <result>, <result>
292	popl	%esi
293	movl	%edi, %edx	/ <result>, <result>
294	popl	%edi
295	popl	%ebp
296	ret
297	.align	16
298.LL21:
299	movl	%edi, %edx	/ tmp63, div_hi
300	divl	64(%esp)	/ y
301	movl	%eax, %ecx	/, q1
302	jmp	.LL4
303	.align	16
304.LL6:
305	movl	$31, %edi	/, tmp87
306	bsrl	%esi,%edx	/ tmp62, normshift
307	subl	%edx, %edi	/ normshift, tmp87
308	movl	%edi, 28(%esp)	/ tmp87,
309	jne	.LL8
310	movl	32(%esp), %edx	/, x1
311	cmpl	%ecx, %edx	/ y1, x1
312	movl	64(%esp), %edi	/ y, y0
313	movl	40(%esp), %esi	/ x, x0
314	ja	.LL10
315	xorl	%ebp, %ebp	/ q0
316	cmpl	%edi, %esi	/ y0, x0
317	jb	.LL11
318.LL10:
319	movl	$1, %ebp	/, q0
320	subl	%edi,%esi	/ y0, x0
321	sbbl	%ecx,%edx	/ tmp63, x1
322.LL11:
323	movl	%edx, %ecx	/ x1, x1
324	xorl	%edx, %edx	/ x1
325	xorl	%edi, %edi	/ x0
326	addl	%esi, %edx	/ x0, x1
327	adcl	%edi, %ecx	/ x0, x1
328	movl	72(%esp), %esi	/ pmod,
329	movl	%edx, (%esi)	/ x1,
330	movl	%ecx, 4(%esi)	/ x1,
331	xorl	%edi, %edi	/ <result>
332	movl	%ebp, %esi	/ q0, <result>
333	jmp	.LL22
334	.align	16
335.LL8:
336	movb	28(%esp), %cl
337	movl	64(%esp), %esi	/ y, dt
338	movl	68(%esp), %edi	/ y, dt
339	shldl	%esi, %edi	/, dt, dt
340	sall	%cl, %esi	/, dt
341	andl	$32, %ecx
342	jne	.LL23
343.LL17:
344	movl	$32, %ecx	/, tmp102
345	subl	28(%esp), %ecx	/, tmp102
346	movl	%esi, %ebp	/ dt, y0
347	movl	32(%esp), %esi
348	shrl	%cl, %esi	/ tmp102,
349	movl	%edi, 24(%esp)	/ tmp99,
350	movb	28(%esp), %cl
351	movl	%esi, 12(%esp)	/, x2
352	movl	44(%esp), %edi	/ x, dt
353	movl	40(%esp), %esi	/ x, dt
354	shldl	%esi, %edi	/, dt, dt
355	sall	%cl, %esi	/, dt
356	andl	$32, %ecx
357	je	.LL18
358	movl	%esi, %edi	/ dt, dt
359	xorl	%esi, %esi	/ dt
360.LL18:
361	movl	%edi, %ecx	/ dt,
362	movl	%edi, %eax	/ tmp2,
363	movl	%ecx, (%esp)
364	movl	12(%esp), %edx	/ x2,
365	divl	24(%esp)
366	movl	%edx, %ecx	/, x1
367	xorl	%edi, %edi
368	movl	%eax, 20(%esp)
369	movl	%ebp, %eax	/ y0, t0
370	mull	20(%esp)
371	cmpl	%ecx, %edx	/ x1, t1
372	movl	%edi, 4(%esp)
373	ja	.LL14
374	je	.LL24
375.LL15:
376	movl	%ecx, %edi	/ x1,
377	subl	%eax,%esi	/ t0, x0
378	sbbl	%edx,%edi	/ t1,
379	movl	%edi, %eax	/, x1
380	movl	%eax, %edx	/ x1, x1
381	xorl	%eax, %eax	/ x1
382	xorl	%ebp, %ebp	/ x0
383	addl	%esi, %eax	/ x0, x1
384	adcl	%ebp, %edx	/ x0, x1
385	movb	28(%esp), %cl
386	shrdl	%edx, %eax	/, x1, x1
387	shrl	%cl, %edx	/, x1
388	andl	$32, %ecx
389	je	.LL16
390	movl	%edx, %eax	/ x1, x1
391	xorl	%edx, %edx	/ x1
392.LL16:
393	movl	72(%esp), %ecx	/ pmod,
394	movl	20(%esp), %esi	/, <result>
395	xorl	%edi, %edi	/ <result>
396	movl	%eax, (%ecx)	/ x1,
397	movl	%edx, 4(%ecx)	/ x1,
398	jmp	.LL22
399	.align	16
400.LL24:
401	cmpl	%esi, %eax	/ x0, t0
402	jbe	.LL15
403.LL14:
404	decl	20(%esp)
405	subl	%ebp,%eax	/ y0, t0
406	sbbl	24(%esp),%edx	/, t1
407	jmp	.LL15
408.LL23:
409	movl	%esi, %edi	/ dt, dt
410	xorl	%esi, %esi	/ dt
411	jmp	.LL17
412	SET_SIZE(UDivRem)
413
414/*
415 * Unsigned division without remainder.
416 */
417/ uint64_t
418/ UDiv(uint64_t x, uint64_t y)
419/ {
420/ 	if (HI(y) == 0) {
421/ 		/* simple cases: y is a single uint32_t */
422/ 		uint32_t	div_hi, div_rem;
423/ 		uint32_t	q0, q1;
424/
425/ 		/* calculate q1 */
426/ 		if (HI(x) < LO(y)) {
427/ 			/* result is a single uint32_t, use one division */
428/ 			q1 = 0;
429/ 			div_hi = HI(x);
430/ 		} else {
431/ 			/* result is a double uint32_t, use two divisions */
432/ 			A_DIV32(HI(x), 0, LO(y), q1, div_hi);
433/ 		}
434/
435/ 		/* calculate q0 and remainder */
436/ 		A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
437/
438/ 		/* return result */
439/ 		return (HILO(q1, q0));
440/
441/ 	} else if (HI(x) < HI(y)) {
442/ 		/* HI(x) < HI(y) => x < y => result is 0 */
443/
444/ 		/* return result */
445/ 		return (0);
446/
447/ 	} else {
448/ 		/*
449/ 		 * uint64_t by uint64_t division, resulting in a one-uint32_t
450/ 		 * result
451/ 		 */
452/ 		uint32_t		y0, y1;
453/ 		uint32_t		x1, x0;
454/ 		uint32_t		q0;
455/ 		unsigned		normshift;
456/
457/ 		/* normalize by shifting x and y so MSB(y) == 1 */
458/ 		HIBIT(HI(y), normshift);	/* index of highest 1 bit */
459/ 		normshift = 31 - normshift;
460/
461/ 		if (normshift == 0) {
462/ 			/* no shifting needed, and x < 2*y so q <= 1 */
463/ 			y1 = HI(y);
464/ 			y0 = LO(y);
465/ 			x1 = HI(x);
466/ 			x0 = LO(x);
467/
468/ 			/* if x >= y then q = 1 (note x1 >= y1) */
469/ 			if (x1 > y1 || x0 >= y0) {
470/ 				q0 = 1;
471/ 				/* subtract y from x to get remainder */
472/ 				/* A_SUB2(y0, y1, x0, x1); */
473/ 			} else {
474/ 				q0 = 0;
475/ 			}
476/
477/ 			/* return result */
478/ 			return (q0);
479/
480/ 		} else {
481/ 			/*
482/ 			 * the last case: result is one uint32_t, but we need to
483/ 			 * normalize
484/ 			 */
485/ 			uint64_t	dt;
486/ 			uint32_t		t0, t1, x2;
487/
488/ 			/* normalize y */
489/ 			dt = (y << normshift);
490/ 			y1 = HI(dt);
491/ 			y0 = LO(dt);
492/
493/ 			/* normalize x (we need 3 uint32_ts!!!) */
494/ 			x2 = (HI(x) >> (32 - normshift));
495/ 			dt = (x << normshift);
496/ 			x1 = HI(dt);
497/ 			x0 = LO(dt);
498/
499/ 			/* estimate q0, and reduce x to a two uint32_t value */
500/ 			A_DIV32(x1, x2, y1, q0, x1);
501/
502/ 			/* adjust q0 down if too high */
503/ 			/*
504/ 			 * because of the limited range of x2 we can only be
505/ 			 * one off
506/ 			 */
507/ 			A_MUL32(y0, q0, t0, t1);
508/ 			if (t1 > x1 || (t1 == x1 && t0 > x0)) {
509/ 				q0--;
510/ 			}
511/ 			/* return result */
512/ 			return (q0);
513/ 		}
514/ 	}
515/ }
516	ENTRY(UDiv)
517	pushl	%ebp
518	pushl	%edi
519	pushl	%esi
520	subl	$40, %esp
521	movl	%edx, 36(%esp)	/ x, x
522	movl	60(%esp), %edx	/ y,
523	testl	%edx, %edx	/ tmp62
524	movl	%eax, 32(%esp)	/ x, x
525	movl	%edx, %ecx	/ tmp61, tmp62
526	movl	%edx, %eax	/, tmp61
527	jne	.LL26
528	movl	36(%esp), %esi	/ x,
529	cmpl	56(%esp), %esi	/ y, tmp67
530	movl	%esi, %eax	/, tmp67
531	movl	%esi, %edx	/ tmp67, div_hi
532	jb	.LL28
533	movl	%ecx, %edx	/ tmp62, div_hi
534	divl	56(%esp)	/ y
535	movl	%eax, %ecx	/, q1
536.LL28:
537	xorl	%esi, %esi	/ <result>
538	movl	%ecx, %edi	/ <result>, <result>
539	movl	32(%esp), %eax	/ x, q0
540	xorl	%ecx, %ecx	/ q0
541	divl	56(%esp)	/ y
542	addl	%eax, %esi	/ q0, <result>
543	adcl	%ecx, %edi	/ q0, <result>
544.LL25:
545	addl	$40, %esp
546	movl	%esi, %eax	/ <result>, <result>
547	popl	%esi
548	movl	%edi, %edx	/ <result>, <result>
549	popl	%edi
550	popl	%ebp
551	ret
552	.align	16
553.LL26:
554	movl	36(%esp), %esi	/ x,
555	xorl	%edi, %edi
556	movl	%esi, 24(%esp)	/ tmp1,
557	movl	%edi, 28(%esp)
558	xorl	%esi, %esi	/ <result>
559	xorl	%edi, %edi	/ <result>
560	cmpl	%eax, 24(%esp)	/ tmp61,
561	jb	.LL25
562	bsrl	%eax,%ebp	/ tmp61, normshift
563	movl	$31, %eax	/, tmp85
564	subl	%ebp, %eax	/ normshift, normshift
565	jne	.LL32
566	movl	24(%esp), %eax	/, x1
567	cmpl	%ecx, %eax	/ tmp62, x1
568	movl	56(%esp), %esi	/ y, y0
569	movl	32(%esp), %edx	/ x, x0
570	ja	.LL34
571	xorl	%eax, %eax	/ q0
572	cmpl	%esi, %edx	/ y0, x0
573	jb	.LL35
574.LL34:
575	movl	$1, %eax	/, q0
576.LL35:
577	movl	%eax, %esi	/ q0, <result>
578	xorl	%edi, %edi	/ <result>
579.LL45:
580	addl	$40, %esp
581	movl	%esi, %eax	/ <result>, <result>
582	popl	%esi
583	movl	%edi, %edx	/ <result>, <result>
584	popl	%edi
585	popl	%ebp
586	ret
587	.align	16
588.LL32:
589	movb	%al, %cl
590	movl	56(%esp), %esi	/ y,
591	movl	60(%esp), %edi	/ y,
592	shldl	%esi, %edi
593	sall	%cl, %esi
594	andl	$32, %ecx
595	jne	.LL43
596.LL40:
597	movl	$32, %ecx	/, tmp96
598	subl	%eax, %ecx	/ normshift, tmp96
599	movl	%edi, %edx
600	movl	%edi, 20(%esp)	/, dt
601	movl	24(%esp), %ebp	/, x2
602	xorl	%edi, %edi
603	shrl	%cl, %ebp	/ tmp96, x2
604	movl	%esi, 16(%esp)	/, dt
605	movb	%al, %cl
606	movl	32(%esp), %esi	/ x, dt
607	movl	%edi, 12(%esp)
608	movl	36(%esp), %edi	/ x, dt
609	shldl	%esi, %edi	/, dt, dt
610	sall	%cl, %esi	/, dt
611	andl	$32, %ecx
612	movl	%edx, 8(%esp)
613	je	.LL41
614	movl	%esi, %edi	/ dt, dt
615	xorl	%esi, %esi	/ dt
616.LL41:
617	xorl	%ecx, %ecx
618	movl	%edi, %eax	/ tmp1,
619	movl	%ebp, %edx	/ x2,
620	divl	8(%esp)
621	movl	%edx, %ebp	/, x1
622	movl	%ecx, 4(%esp)
623	movl	%eax, %ecx	/, q0
624	movl	16(%esp), %eax	/ dt,
625	mull	%ecx	/ q0
626	cmpl	%ebp, %edx	/ x1, t1
627	movl	%edi, (%esp)
628	movl	%esi, %edi	/ dt, x0
629	ja	.LL38
630	je	.LL44
631.LL39:
632	movl	%ecx, %esi	/ q0, <result>
633.LL46:
634	xorl	%edi, %edi	/ <result>
635	jmp	.LL45
636.LL44:
637	cmpl	%edi, %eax	/ x0, t0
638	jbe	.LL39
639.LL38:
640	decl	%ecx		/ q0
641	movl	%ecx, %esi	/ q0, <result>
642	jmp	.LL46
643.LL43:
644	movl	%esi, %edi
645	xorl	%esi, %esi
646	jmp	.LL40
647	SET_SIZE(UDiv)
648
649/*
650 * __udiv64
651 *
652 * Perform division of two unsigned 64-bit quantities, returning the
653 * quotient in %edx:%eax.  __udiv64 pops the arguments on return,
654 */
655	ENTRY(__udiv64)
656	movl	4(%esp), %eax	/ x, x
657	movl	8(%esp), %edx	/ x, x
658	pushl	16(%esp)	/ y
659	pushl	16(%esp)
660	call	UDiv
661	addl	$8, %esp
662	ret     $16
663	SET_SIZE(__udiv64)
664
665/*
666 * __urem64
667 *
668 * Perform division of two unsigned 64-bit quantities, returning the
669 * remainder in %edx:%eax.  __urem64 pops the arguments on return
670 */
671	ENTRY(__urem64)
672	subl	$12, %esp
673	movl	%esp, %ecx	/, tmp65
674	movl	16(%esp), %eax	/ x, x
675	movl	20(%esp), %edx	/ x, x
676	pushl	%ecx		/ tmp65
677	pushl	32(%esp)	/ y
678	pushl	32(%esp)
679	call	UDivRem
680	movl	12(%esp), %eax	/ rem, rem
681	movl	16(%esp), %edx	/ rem, rem
682	addl	$24, %esp
683	ret	$16
684	SET_SIZE(__urem64)
685
686/*
687 * __div64
688 *
689 * Perform division of two signed 64-bit quantities, returning the
690 * quotient in %edx:%eax.  __div64 pops the arguments on return.
691 */
692/ int64_t
693/ __div64(int64_t x, int64_t y)
694/ {
695/ 	int		negative;
696/ 	uint64_t	xt, yt, r;
697/
698/ 	if (x < 0) {
699/ 		xt = -(uint64_t) x;
700/ 		negative = 1;
701/ 	} else {
702/ 		xt = x;
703/ 		negative = 0;
704/ 	}
705/ 	if (y < 0) {
706/ 		yt = -(uint64_t) y;
707/ 		negative ^= 1;
708/ 	} else {
709/ 		yt = y;
710/ 	}
711/ 	r = UDiv(xt, yt);
712/ 	return (negative ? (int64_t) - r : r);
713/ }
714	ENTRY(__div64)
715	pushl	%ebp
716	pushl	%edi
717	pushl	%esi
718	subl	$8, %esp
719	movl	28(%esp), %edx	/ x, x
720	testl	%edx, %edx	/ x
721	movl	24(%esp), %eax	/ x, x
722	movl	32(%esp), %esi	/ y, y
723	movl	36(%esp), %edi	/ y, y
724	js	.LL84
725	xorl	%ebp, %ebp	/ negative
726	testl	%edi, %edi	/ y
727	movl	%eax, (%esp)	/ x, xt
728	movl	%edx, 4(%esp)	/ x, xt
729	movl	%esi, %eax	/ y, yt
730	movl	%edi, %edx	/ y, yt
731	js	.LL85
732.LL82:
733	pushl	%edx		/ yt
734	pushl	%eax		/ yt
735	movl	8(%esp), %eax	/ xt, xt
736	movl	12(%esp), %edx	/ xt, xt
737	call	UDiv
738	popl	%ecx
739	testl	%ebp, %ebp	/ negative
740	popl	%esi
741	je	.LL83
742	negl	%eax		/ r
743	adcl	$0, %edx	/, r
744	negl	%edx		/ r
745.LL83:
746	addl	$8, %esp
747	popl	%esi
748	popl	%edi
749	popl	%ebp
750	ret	$16
751	.align	16
752.LL84:
753	negl	%eax		/ x
754	adcl	$0, %edx	/, x
755	negl	%edx		/ x
756	testl	%edi, %edi	/ y
757	movl	%eax, (%esp)	/ x, xt
758	movl	%edx, 4(%esp)	/ x, xt
759	movl	$1, %ebp	/, negative
760	movl	%esi, %eax	/ y, yt
761	movl	%edi, %edx	/ y, yt
762	jns	.LL82
763	.align	16
764.LL85:
765	negl	%eax		/ yt
766	adcl	$0, %edx	/, yt
767	negl	%edx		/ yt
768	xorl	$1, %ebp	/, negative
769	jmp	.LL82
770	SET_SIZE(__div64)
771
772/*
773 * __rem64
774 *
775 * Perform division of two signed 64-bit quantities, returning the
776 * remainder in %edx:%eax.  __rem64 pops the arguments on return.
777 */
778/ int64_t
779/ __rem64(int64_t x, int64_t y)
780/ {
781/ 	uint64_t	xt, yt, rem;
782/
783/ 	if (x < 0) {
784/ 		xt = -(uint64_t) x;
785/ 	} else {
786/ 		xt = x;
787/ 	}
788/ 	if (y < 0) {
789/ 		yt = -(uint64_t) y;
790/ 	} else {
791/ 		yt = y;
792/ 	}
793/ 	(void) UDivRem(xt, yt, &rem);
794/ 	return (x < 0 ? (int64_t) - rem : rem);
795/ }
796	ENTRY(__rem64)
797	pushl	%edi
798	pushl	%esi
799	subl	$20, %esp
800	movl	36(%esp), %ecx	/ x,
801	movl	32(%esp), %esi	/ x,
802	movl	36(%esp), %edi	/ x,
803	testl	%ecx, %ecx
804	movl	40(%esp), %eax	/ y, y
805	movl	44(%esp), %edx	/ y, y
806	movl	%esi, (%esp)	/, xt
807	movl	%edi, 4(%esp)	/, xt
808	js	.LL92
809	testl	%edx, %edx	/ y
810	movl	%eax, %esi	/ y, yt
811	movl	%edx, %edi	/ y, yt
812	js	.LL93
813.LL90:
814	leal	8(%esp), %eax	/, tmp66
815	pushl	%eax		/ tmp66
816	pushl	%edi		/ yt
817	pushl	%esi		/ yt
818	movl	12(%esp), %eax	/ xt, xt
819	movl	16(%esp), %edx	/ xt, xt
820	call	UDivRem
821	addl	$12, %esp
822	movl	36(%esp), %edi	/ x,
823	testl	%edi, %edi
824	movl	8(%esp), %eax	/ rem, rem
825	movl	12(%esp), %edx	/ rem, rem
826	js	.LL94
827	addl	$20, %esp
828	popl	%esi
829	popl	%edi
830	ret	$16
831	.align	16
832.LL92:
833	negl	%esi
834	adcl	$0, %edi
835	negl	%edi
836	testl	%edx, %edx	/ y
837	movl	%esi, (%esp)	/, xt
838	movl	%edi, 4(%esp)	/, xt
839	movl	%eax, %esi	/ y, yt
840	movl	%edx, %edi	/ y, yt
841	jns	.LL90
842	.align	16
843.LL93:
844	negl	%esi		/ yt
845	adcl	$0, %edi	/, yt
846	negl	%edi		/ yt
847	jmp	.LL90
848	.align	16
849.LL94:
850	negl	%eax		/ rem
851	adcl	$0, %edx	/, rem
852	addl	$20, %esp
853	popl	%esi
854	negl	%edx		/ rem
855	popl	%edi
856	ret	$16
857	SET_SIZE(__rem64)
858
859#endif	/* __lint */
860
861#if defined(__lint)
862
863/*
864 * C support for 64-bit modulo and division.
865 * GNU routines callable from C (though generated by the compiler).
866 * Hand-customized compiler output - see comments for details.
867 */
868/*ARGSUSED*/
869unsigned long long
870__udivdi3(unsigned long long a, unsigned long long b)
871{ return (0); }
872
873/*ARGSUSED*/
874unsigned long long
875__umoddi3(unsigned long long a, unsigned long long b)
876{ return (0); }
877
878/*ARGSUSED*/
879long long
880__divdi3(long long a, long long b)
881{ return (0); }
882
883/*ARGSUSED*/
884long long
885__moddi3(long long a, long long b)
886{ return (0); }
887
888/* ARGSUSED */
889int64_t __divrem64(int64_t a, int64_t b)
890{ return (0); }
891
892/* ARGSUSED */
893uint64_t __udivrem64(uint64_t a, uint64_t b)
894{ return (0); }
895
896#else	/* __lint */
897
898/*
899 * int32_t/int64_t division/manipulation
900 *
901 * Hand-customized compiler output: the non-GCC entry points depart from
902 * the SYS V ABI by requiring their arguments to be popped, and in the
903 * [u]divrem64 cases returning the remainder in %ecx:%esi. Note the
904 * compiler-generated use of %edx:%eax for the first argument of
905 * internal entry points.
906 *
907 * Inlines for speed:
908 * - counting the number of leading zeros in a word
909 * - multiplying two 32-bit numbers giving a 64-bit result
910 * - dividing a 64-bit number by a 32-bit number, giving both quotient
911 *	and remainder
912 * - subtracting two 64-bit results
913 */
914/ #define	LO(X)		((uint32_t)(X) & 0xffffffff)
915/ #define	HI(X)		((uint32_t)((X) >> 32) & 0xffffffff)
916/ #define	HILO(H, L)	(((uint64_t)(H) << 32) + (L))
917/
918/ /* give index of highest bit */
919/ #define	HIBIT(a, r) \
920/     asm("bsrl %1,%0": "=r"((uint32_t)(r)) : "g" (a))
921/
922/ /* multiply two uint32_ts resulting in a uint64_t */
923/ #define	A_MUL32(a, b, lo, hi) \
924/     asm("mull %2" \
925/ 	: "=a"((uint32_t)(lo)), "=d"((uint32_t)(hi)) : "g" (b), "0"(a))
926/
927/ /* divide a uint64_t by a uint32_t */
928/ #define	A_DIV32(lo, hi, b, q, r) \
929/     asm("divl %2" \
930/ 	: "=a"((uint32_t)(q)), "=d"((uint32_t)(r)) \
931/ 	: "g" (b), "0"((uint32_t)(lo)), "1"((uint32_t)hi))
932/
933/ /* subtract two uint64_ts (with borrow) */
934/ #define	A_SUB2(bl, bh, al, ah) \
935/     asm("subl %4,%0\n\tsbbl %5,%1" \
936/ 	: "=&r"((uint32_t)(al)), "=r"((uint32_t)(ah)) \
937/ 	: "0"((uint32_t)(al)), "1"((uint32_t)(ah)), "g"((uint32_t)(bl)), \
938/ 	"g"((uint32_t)(bh)))
939
940/*
941 * __udivdi3
942 *
943 * Perform division of two unsigned 64-bit quantities, returning the
944 * quotient in %edx:%eax.
945 */
946	ENTRY(__udivdi3)
947	movl	4(%esp), %eax	/ x, x
948	movl	8(%esp), %edx	/ x, x
949	pushl	16(%esp)	/ y
950	pushl	16(%esp)
951	call	UDiv
952	addl	$8, %esp
953	ret
954	SET_SIZE(__udivdi3)
955
956/*
957 * __umoddi3
958 *
959 * Perform division of two unsigned 64-bit quantities, returning the
960 * remainder in %edx:%eax.
961 */
962	ENTRY(__umoddi3)
963	subl	$12, %esp
964	movl	%esp, %ecx	/, tmp65
965	movl	16(%esp), %eax	/ x, x
966	movl	20(%esp), %edx	/ x, x
967	pushl	%ecx		/ tmp65
968	pushl	32(%esp)	/ y
969	pushl	32(%esp)
970	call	UDivRem
971	movl	12(%esp), %eax	/ rem, rem
972	movl	16(%esp), %edx	/ rem, rem
973	addl	$24, %esp
974	ret
975	SET_SIZE(__umoddi3)
976
977/*
978 * __divdi3
979 *
980 * Perform division of two signed 64-bit quantities, returning the
981 * quotient in %edx:%eax.
982 */
983/ int64_t
984/ __divdi3(int64_t x, int64_t y)
985/ {
986/ 	int		negative;
987/ 	uint64_t	xt, yt, r;
988/
989/ 	if (x < 0) {
990/ 		xt = -(uint64_t) x;
991/ 		negative = 1;
992/ 	} else {
993/ 		xt = x;
994/ 		negative = 0;
995/ 	}
996/ 	if (y < 0) {
997/ 		yt = -(uint64_t) y;
998/ 		negative ^= 1;
999/ 	} else {
1000/ 		yt = y;
1001/ 	}
1002/ 	r = UDiv(xt, yt);
1003/ 	return (negative ? (int64_t) - r : r);
1004/ }
1005	ENTRY(__divdi3)
1006	pushl	%ebp
1007	pushl	%edi
1008	pushl	%esi
1009	subl	$8, %esp
1010	movl	28(%esp), %edx	/ x, x
1011	testl	%edx, %edx	/ x
1012	movl	24(%esp), %eax	/ x, x
1013	movl	32(%esp), %esi	/ y, y
1014	movl	36(%esp), %edi	/ y, y
1015	js	.LL55
1016	xorl	%ebp, %ebp	/ negative
1017	testl	%edi, %edi	/ y
1018	movl	%eax, (%esp)	/ x, xt
1019	movl	%edx, 4(%esp)	/ x, xt
1020	movl	%esi, %eax	/ y, yt
1021	movl	%edi, %edx	/ y, yt
1022	js	.LL56
1023.LL53:
1024	pushl	%edx		/ yt
1025	pushl	%eax		/ yt
1026	movl	8(%esp), %eax	/ xt, xt
1027	movl	12(%esp), %edx	/ xt, xt
1028	call	UDiv
1029	popl	%ecx
1030	testl	%ebp, %ebp	/ negative
1031	popl	%esi
1032	je	.LL54
1033	negl	%eax		/ r
1034	adcl	$0, %edx	/, r
1035	negl	%edx		/ r
1036.LL54:
1037	addl	$8, %esp
1038	popl	%esi
1039	popl	%edi
1040	popl	%ebp
1041	ret
1042	.align	16
1043.LL55:
1044	negl	%eax		/ x
1045	adcl	$0, %edx	/, x
1046	negl	%edx		/ x
1047	testl	%edi, %edi	/ y
1048	movl	%eax, (%esp)	/ x, xt
1049	movl	%edx, 4(%esp)	/ x, xt
1050	movl	$1, %ebp	/, negative
1051	movl	%esi, %eax	/ y, yt
1052	movl	%edi, %edx	/ y, yt
1053	jns	.LL53
1054	.align	16
1055.LL56:
1056	negl	%eax		/ yt
1057	adcl	$0, %edx	/, yt
1058	negl	%edx		/ yt
1059	xorl	$1, %ebp	/, negative
1060	jmp	.LL53
1061	SET_SIZE(__divdi3)
1062
1063/*
1064 * __moddi3
1065 *
1066 * Perform division of two signed 64-bit quantities, returning the
1067 * quotient in %edx:%eax.
1068 */
1069/ int64_t
1070/ __moddi3(int64_t x, int64_t y)
1071/ {
1072/ 	uint64_t	xt, yt, rem;
1073/
1074/ 	if (x < 0) {
1075/ 		xt = -(uint64_t) x;
1076/ 	} else {
1077/ 		xt = x;
1078/ 	}
1079/ 	if (y < 0) {
1080/ 		yt = -(uint64_t) y;
1081/ 	} else {
1082/ 		yt = y;
1083/ 	}
1084/ 	(void) UDivRem(xt, yt, &rem);
1085/ 	return (x < 0 ? (int64_t) - rem : rem);
1086/ }
1087	ENTRY(__moddi3)
1088	pushl	%edi
1089	pushl	%esi
1090	subl	$20, %esp
1091	movl	36(%esp), %ecx	/ x,
1092	movl	32(%esp), %esi	/ x,
1093	movl	36(%esp), %edi	/ x,
1094	testl	%ecx, %ecx
1095	movl	40(%esp), %eax	/ y, y
1096	movl	44(%esp), %edx	/ y, y
1097	movl	%esi, (%esp)	/, xt
1098	movl	%edi, 4(%esp)	/, xt
1099	js	.LL63
1100	testl	%edx, %edx	/ y
1101	movl	%eax, %esi	/ y, yt
1102	movl	%edx, %edi	/ y, yt
1103	js	.LL64
1104.LL61:
1105	leal	8(%esp), %eax	/, tmp66
1106	pushl	%eax		/ tmp66
1107	pushl	%edi		/ yt
1108	pushl	%esi		/ yt
1109	movl	12(%esp), %eax	/ xt, xt
1110	movl	16(%esp), %edx	/ xt, xt
1111	call	UDivRem
1112	addl	$12, %esp
1113	movl	36(%esp), %edi	/ x,
1114	testl	%edi, %edi
1115	movl	8(%esp), %eax	/ rem, rem
1116	movl	12(%esp), %edx	/ rem, rem
1117	js	.LL65
1118	addl	$20, %esp
1119	popl	%esi
1120	popl	%edi
1121	ret
1122	.align	16
1123.LL63:
1124	negl	%esi
1125	adcl	$0, %edi
1126	negl	%edi
1127	testl	%edx, %edx	/ y
1128	movl	%esi, (%esp)	/, xt
1129	movl	%edi, 4(%esp)	/, xt
1130	movl	%eax, %esi	/ y, yt
1131	movl	%edx, %edi	/ y, yt
1132	jns	.LL61
1133	.align	16
1134.LL64:
1135	negl	%esi		/ yt
1136	adcl	$0, %edi	/, yt
1137	negl	%edi		/ yt
1138	jmp	.LL61
1139	.align	16
1140.LL65:
1141	negl	%eax		/ rem
1142	adcl	$0, %edx	/, rem
1143	addl	$20, %esp
1144	popl	%esi
1145	negl	%edx		/ rem
1146	popl	%edi
1147	ret
1148	SET_SIZE(__moddi3)
1149
1150/*
1151 * __udivrem64
1152 *
1153 * Perform division of two unsigned 64-bit quantities, returning the
1154 * quotient in %edx:%eax, and the remainder in %ecx:%esi.  __udivrem64
1155 * pops the arguments on return.
1156 */
1157	ENTRY(__udivrem64)
1158	subl	$12, %esp
1159	movl	%esp, %ecx	/, tmp64
1160	movl	16(%esp), %eax	/ x, x
1161	movl	20(%esp), %edx	/ x, x
1162	pushl	%ecx		/ tmp64
1163	pushl	32(%esp)	/ y
1164	pushl	32(%esp)
1165	call	UDivRem
1166	movl	16(%esp), %ecx	/ rem, tmp63
1167	movl	12(%esp), %esi	/ rem
1168	addl	$24, %esp
1169	ret	$16
1170	SET_SIZE(__udivrem64)
1171
1172/*
1173 * Signed division with remainder.
1174 */
1175/ int64_t
1176/ SDivRem(int64_t x, int64_t y, int64_t * pmod)
1177/ {
1178/ 	int		negative;
1179/ 	uint64_t	xt, yt, r, rem;
1180/
1181/ 	if (x < 0) {
1182/ 		xt = -(uint64_t) x;
1183/ 		negative = 1;
1184/ 	} else {
1185/ 		xt = x;
1186/ 		negative = 0;
1187/ 	}
1188/ 	if (y < 0) {
1189/ 		yt = -(uint64_t) y;
1190/ 		negative ^= 1;
1191/ 	} else {
1192/ 		yt = y;
1193/ 	}
1194/ 	r = UDivRem(xt, yt, &rem);
1195/ 	*pmod = (x < 0 ? (int64_t) - rem : rem);
1196/ 	return (negative ? (int64_t) - r : r);
1197/ }
1198	ENTRY(SDivRem)
1199	pushl	%ebp
1200	pushl	%edi
1201	pushl	%esi
1202	subl	$24, %esp
1203	testl	%edx, %edx	/ x
1204	movl	%edx, %edi	/ x, x
1205	js	.LL73
1206	movl	44(%esp), %esi	/ y,
1207	xorl	%ebp, %ebp	/ negative
1208	testl	%esi, %esi
1209	movl	%edx, 12(%esp)	/ x, xt
1210	movl	%eax, 8(%esp)	/ x, xt
1211	movl	40(%esp), %edx	/ y, yt
1212	movl	44(%esp), %ecx	/ y, yt
1213	js	.LL74
1214.LL70:
1215	leal	16(%esp), %eax	/, tmp70
1216	pushl	%eax		/ tmp70
1217	pushl	%ecx		/ yt
1218	pushl	%edx		/ yt
1219	movl	20(%esp), %eax	/ xt, xt
1220	movl	24(%esp), %edx	/ xt, xt
1221	call	UDivRem
1222	movl	%edx, 16(%esp)	/, r
1223	movl	%eax, 12(%esp)	/, r
1224	addl	$12, %esp
1225	testl	%edi, %edi	/ x
1226	movl	16(%esp), %edx	/ rem, rem
1227	movl	20(%esp), %ecx	/ rem, rem
1228	js	.LL75
1229.LL71:
1230	movl	48(%esp), %edi	/ pmod, pmod
1231	testl	%ebp, %ebp	/ negative
1232	movl	%edx, (%edi)	/ rem,* pmod
1233	movl	%ecx, 4(%edi)	/ rem,
1234	movl	(%esp), %eax	/ r, r
1235	movl	4(%esp), %edx	/ r, r
1236	je	.LL72
1237	negl	%eax		/ r
1238	adcl	$0, %edx	/, r
1239	negl	%edx		/ r
1240.LL72:
1241	addl	$24, %esp
1242	popl	%esi
1243	popl	%edi
1244	popl	%ebp
1245	ret
1246	.align	16
1247.LL73:
1248	negl	%eax
1249	adcl	$0, %edx
1250	movl	44(%esp), %esi	/ y,
1251	negl	%edx
1252	testl	%esi, %esi
1253	movl	%edx, 12(%esp)	/, xt
1254	movl	%eax, 8(%esp)	/, xt
1255	movl	$1, %ebp	/, negative
1256	movl	40(%esp), %edx	/ y, yt
1257	movl	44(%esp), %ecx	/ y, yt
1258	jns	.LL70
1259	.align	16
1260.LL74:
1261	negl	%edx		/ yt
1262	adcl	$0, %ecx	/, yt
1263	negl	%ecx		/ yt
1264	xorl	$1, %ebp	/, negative
1265	jmp	.LL70
1266	.align	16
1267.LL75:
1268	negl	%edx		/ rem
1269	adcl	$0, %ecx	/, rem
1270	negl	%ecx		/ rem
1271	jmp	.LL71
1272	SET_SIZE(SDivRem)
1273
1274/*
1275 * __divrem64
1276 *
1277 * Perform division of two signed 64-bit quantities, returning the
1278 * quotient in %edx:%eax, and the remainder in %ecx:%esi.  __divrem64
1279 * pops the arguments on return.
1280 */
1281	ENTRY(__divrem64)
1282	subl	$20, %esp
1283	movl	%esp, %ecx	/, tmp64
1284	movl	24(%esp), %eax	/ x, x
1285	movl	28(%esp), %edx	/ x, x
1286	pushl	%ecx		/ tmp64
1287	pushl	40(%esp)	/ y
1288	pushl	40(%esp)
1289	call	SDivRem
1290	movl	16(%esp), %ecx
1291	movl	12(%esp),%esi	/ rem
1292	addl	$32, %esp
1293	ret	$16
1294	SET_SIZE(__divrem64)
1295
1296
1297#endif /* __lint */
1298
1299#endif /* defined(__i386) && !defined(__amd64) */
1300