xref: /titanic_50/usr/src/cmd/sgs/rtld.4.x/umultiply.s (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate/*
2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate *
4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate * with the License.
8*7c478bd9Sstevel@tonic-gate *
9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate *
14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate *
20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate */
22*7c478bd9Sstevel@tonic-gate/*
23*7c478bd9Sstevel@tonic-gate *	.seg	"data"
24*7c478bd9Sstevel@tonic-gate *	.asciz	"Copyr 1987 Sun Micro"
25*7c478bd9Sstevel@tonic-gate *	.align	4
26*7c478bd9Sstevel@tonic-gate */
27*7c478bd9Sstevel@tonic-gate	.seg	"text"
28*7c478bd9Sstevel@tonic-gate
29*7c478bd9Sstevel@tonic-gate#ident	"%Z%%M%	%I%	%E% SMI"
30*7c478bd9Sstevel@tonic-gate
31*7c478bd9Sstevel@tonic-gate!	Copyright (c) 1987 by Sun Microsystems, Inc.
32*7c478bd9Sstevel@tonic-gate
33*7c478bd9Sstevel@tonic-gate
34*7c478bd9Sstevel@tonic-gate#include <sys/asm_linkage.h>
35*7c478bd9Sstevel@tonic-gate
36*7c478bd9Sstevel@tonic-gate/*
37*7c478bd9Sstevel@tonic-gate * procedure to perform a 32 by 32 unsigned integer multiply.
38*7c478bd9Sstevel@tonic-gate * pass the multiplier into %o0, and the multiplicand into %o1
39*7c478bd9Sstevel@tonic-gate * the least significant 32 bits of the result will be returned in %o0,
40*7c478bd9Sstevel@tonic-gate * and the most significant in %o1
41*7c478bd9Sstevel@tonic-gate *
42*7c478bd9Sstevel@tonic-gate * Most unsigned integer multiplies involve small numbers, so it is
43*7c478bd9Sstevel@tonic-gate * worthwhile to optimize for short multiplies at the expense of long
44*7c478bd9Sstevel@tonic-gate * multiplies.  This code checks the size of the multiplier, and has
45*7c478bd9Sstevel@tonic-gate * special cases for the following:
46*7c478bd9Sstevel@tonic-gate *
47*7c478bd9Sstevel@tonic-gate *	4 or fewer bit multipliers:	19 or 21 instruction cycles
48*7c478bd9Sstevel@tonic-gate *	8 or fewer bit multipliers:	26 or 28 instruction cycles
49*7c478bd9Sstevel@tonic-gate *	12 or fewer bit multipliers:	34 or 36 instruction cycles
50*7c478bd9Sstevel@tonic-gate *	16 or fewer bit multipliers:	42 or 44 instruction cycles
51*7c478bd9Sstevel@tonic-gate *
52*7c478bd9Sstevel@tonic-gate * Long multipliers require 58 or 60 instruction cycles:
53*7c478bd9Sstevel@tonic-gate *
54*7c478bd9Sstevel@tonic-gate * This code indicates that overflow has occured, by leaving the Z condition
55*7c478bd9Sstevel@tonic-gate * code clear. The following call sequence would be used if you wish to
56*7c478bd9Sstevel@tonic-gate * deal with overflow:
57*7c478bd9Sstevel@tonic-gate *
58*7c478bd9Sstevel@tonic-gate *	 	call	.umul
59*7c478bd9Sstevel@tonic-gate *		nop		( or set up last parameter here )
60*7c478bd9Sstevel@tonic-gate *		bnz	overflow_code	(or tnz to overflow handler)
61*7c478bd9Sstevel@tonic-gate */
62*7c478bd9Sstevel@tonic-gate
63*7c478bd9Sstevel@tonic-gate!	RTENTRY(.umul)
64*7c478bd9Sstevel@tonic-gate	.global	.umul
65*7c478bd9Sstevel@tonic-gate.umul:
66*7c478bd9Sstevel@tonic-gate	wr	%o0, %y			! multiplier to Y register
67*7c478bd9Sstevel@tonic-gate
68*7c478bd9Sstevel@tonic-gate	andncc	%o0, 0xf, %o4		! mask out lower 4 bits; if branch
69*7c478bd9Sstevel@tonic-gate					! taken, %o4, N and V have been cleared
70*7c478bd9Sstevel@tonic-gate
71*7c478bd9Sstevel@tonic-gate	be	umul_4bit		! 4-bit multiplier
72*7c478bd9Sstevel@tonic-gate	sethi	%hi(0xffff0000), %o5	! mask for 16-bit case; have to
73*7c478bd9Sstevel@tonic-gate					! wait 3 instructions after wd
74*7c478bd9Sstevel@tonic-gate					! before %y has stabilized anyway
75*7c478bd9Sstevel@tonic-gate
76*7c478bd9Sstevel@tonic-gate	andncc	%o0, 0xff, %o4
77*7c478bd9Sstevel@tonic-gate	be,a	umul_8bit		! 8-bit multiplier
78*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4		! first iteration of 9
79*7c478bd9Sstevel@tonic-gate
80*7c478bd9Sstevel@tonic-gate	andncc	%o0, 0xfff, %o4
81*7c478bd9Sstevel@tonic-gate	be,a	umul_12bit		! 12-bit multiplier
82*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4		! first iteration of 13
83*7c478bd9Sstevel@tonic-gate
84*7c478bd9Sstevel@tonic-gate	andcc	%o0, %o5, %o4
85*7c478bd9Sstevel@tonic-gate	be,a	umul_16bit		! 16-bit multiplier
86*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4		! first iteration of 17
87*7c478bd9Sstevel@tonic-gate
88*7c478bd9Sstevel@tonic-gate	andcc	%g0, %g0, %o4		! zero the partial product
89*7c478bd9Sstevel@tonic-gate					! and clear N and V conditions
90*7c478bd9Sstevel@tonic-gate	!
91*7c478bd9Sstevel@tonic-gate	! long multiply
92*7c478bd9Sstevel@tonic-gate	!
93*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4		! first iteration of 33
94*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
95*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
96*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
97*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
98*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
99*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
100*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
101*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
102*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
103*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
104*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
105*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
106*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
107*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
108*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
109*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
110*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
111*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
112*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
113*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
114*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
115*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
116*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
117*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
118*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
119*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
120*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
121*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
122*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
123*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
124*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4		! 32nd iteration
125*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %g0, %o4		! last iteration only shifts
126*7c478bd9Sstevel@tonic-gate	!
127*7c478bd9Sstevel@tonic-gate	! For unsigned multiplies, a pure shifty-add approach yields the
128*7c478bd9Sstevel@tonic-gate	! correct result.  Signed multiplies introduce complications.
129*7c478bd9Sstevel@tonic-gate	!
130*7c478bd9Sstevel@tonic-gate	! With 32-bit twos-complement numbers, -x can be represented as
131*7c478bd9Sstevel@tonic-gate	!
132*7c478bd9Sstevel@tonic-gate	!	((2 - (x/(2**32)) mod 2) * 2**32.
133*7c478bd9Sstevel@tonic-gate	!
134*7c478bd9Sstevel@tonic-gate	! To simplify the equations, the radix point can be moved to just
135*7c478bd9Sstevel@tonic-gate	! to the left of the sign bit.  So:
136*7c478bd9Sstevel@tonic-gate	!
137*7c478bd9Sstevel@tonic-gate	! 	 x *  y	= (xy) mod 2
138*7c478bd9Sstevel@tonic-gate	!	-x *  y	= (2 - x) mod 2 * y = (2y - xy) mod 2
139*7c478bd9Sstevel@tonic-gate	!	 x * -y	= x * (2 - y) mod 2 = (2x - xy) mod 2
140*7c478bd9Sstevel@tonic-gate	!	-x * -y = (2 - x) * (2 - y) = (4 - 2x - 2y + xy) mod 2
141*7c478bd9Sstevel@tonic-gate	!
142*7c478bd9Sstevel@tonic-gate	! Because of the way the shift into the partial product is calculated
143*7c478bd9Sstevel@tonic-gate	! (N xor V), the extra term is automagically removed for negative
144*7c478bd9Sstevel@tonic-gate	! multiplicands, so no adjustment is necessary.
145*7c478bd9Sstevel@tonic-gate	!
146*7c478bd9Sstevel@tonic-gate	! But for unsigned multiplies, the high-order bit of the multiplicand
147*7c478bd9Sstevel@tonic-gate	! is incorrectly treated as a sign bit.  For unsigned multiplies where
148*7c478bd9Sstevel@tonic-gate	! the high-order bit of the multiplicand is one, the result is
149*7c478bd9Sstevel@tonic-gate	!
150*7c478bd9Sstevel@tonic-gate	!	xy - y * (2**32)
151*7c478bd9Sstevel@tonic-gate	!
152*7c478bd9Sstevel@tonic-gate	! we fix that here
153*7c478bd9Sstevel@tonic-gate	!
154*7c478bd9Sstevel@tonic-gate	tst	%o1
155*7c478bd9Sstevel@tonic-gate	bge	1f
156*7c478bd9Sstevel@tonic-gate	nop
157*7c478bd9Sstevel@tonic-gate
158*7c478bd9Sstevel@tonic-gate	add	%o4, %o0, %o4		! add (2**32) * %o0; bits 63-32
159*7c478bd9Sstevel@tonic-gate					! of the product are in %o4
160*7c478bd9Sstevel@tonic-gate	!
161*7c478bd9Sstevel@tonic-gate	! The multiply hasn't overflowed if the high-order bits are 0
162*7c478bd9Sstevel@tonic-gate	!
163*7c478bd9Sstevel@tonic-gate	! if you are not interested in detecting overflow,
164*7c478bd9Sstevel@tonic-gate	! replace the following code with:
165*7c478bd9Sstevel@tonic-gate	!
166*7c478bd9Sstevel@tonic-gate	!	1:
167*7c478bd9Sstevel@tonic-gate	!		rd	%y, %o0
168*7c478bd9Sstevel@tonic-gate	!		retl
169*7c478bd9Sstevel@tonic-gate	!		mov	%o4, %o1
170*7c478bd9Sstevel@tonic-gate	!
171*7c478bd9Sstevel@tonic-gate1:
172*7c478bd9Sstevel@tonic-gate	rd	%y, %o0
173*7c478bd9Sstevel@tonic-gate	retl				! leaf routine return
174*7c478bd9Sstevel@tonic-gate	addcc	%o4, %g0, %o1		! return high-order bits and set Z if
175*7c478bd9Sstevel@tonic-gate					! high order bits are 0
176*7c478bd9Sstevel@tonic-gate	!
177*7c478bd9Sstevel@tonic-gate	! 4-bit multiply
178*7c478bd9Sstevel@tonic-gate	!
179*7c478bd9Sstevel@tonic-gateumul_4bit:
180*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4		! first iteration of 5
181*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
182*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
183*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4		! 4th iteration
184*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %g0, %o4		! last iteration only shifts
185*7c478bd9Sstevel@tonic-gate
186*7c478bd9Sstevel@tonic-gate	rd	%y, %o5
187*7c478bd9Sstevel@tonic-gate	!
188*7c478bd9Sstevel@tonic-gate	! The folowing code adds (2**32) * %o0 to the product if the
189*7c478bd9Sstevel@tonic-gate	! multiplicand had it's high bit set (see 32-bit case for explanation)
190*7c478bd9Sstevel@tonic-gate	!
191*7c478bd9Sstevel@tonic-gate	tst	%o1
192*7c478bd9Sstevel@tonic-gate	bge	2f
193*7c478bd9Sstevel@tonic-gate	sra	%o4, 28, %o1		! right shift high bits by 28 bits
194*7c478bd9Sstevel@tonic-gate
195*7c478bd9Sstevel@tonic-gate	add	%o1, %o0, %o1
196*7c478bd9Sstevel@tonic-gate	!
197*7c478bd9Sstevel@tonic-gate	! The multiply hasn't overflowed if high-order bits are 0
198*7c478bd9Sstevel@tonic-gate	!
199*7c478bd9Sstevel@tonic-gate	! if you are not interested in detecting overflow,
200*7c478bd9Sstevel@tonic-gate	! replace the following code with:
201*7c478bd9Sstevel@tonic-gate	!
202*7c478bd9Sstevel@tonic-gate	!	2:
203*7c478bd9Sstevel@tonic-gate	!		sll	%o4, 4, %o0
204*7c478bd9Sstevel@tonic-gate	!		srl	%o5, 28, %o5
205*7c478bd9Sstevel@tonic-gate	!		retl
206*7c478bd9Sstevel@tonic-gate	!		or	%o5, %o0, %o0
207*7c478bd9Sstevel@tonic-gate	!
208*7c478bd9Sstevel@tonic-gate2:
209*7c478bd9Sstevel@tonic-gate	sll	%o4, 4, %o0		! left shift middle bits by 4 bits
210*7c478bd9Sstevel@tonic-gate	srl	%o5, 28, %o5		! right shift low bits by 28 bits
211*7c478bd9Sstevel@tonic-gate	or	%o5, %o0, %o0		! merge for true product
212*7c478bd9Sstevel@tonic-gate	retl				! leaf routine return
213*7c478bd9Sstevel@tonic-gate	tst	%o1			! set Z if high order bits are 0
214*7c478bd9Sstevel@tonic-gate	!
215*7c478bd9Sstevel@tonic-gate	! 8-bit multiply
216*7c478bd9Sstevel@tonic-gate	!
217*7c478bd9Sstevel@tonic-gateumul_8bit:
218*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4		! second iteration of 9
219*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
220*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
221*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
222*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
223*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
224*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4		! 8th iteration
225*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %g0, %o4		! last iteration only shifts
226*7c478bd9Sstevel@tonic-gate
227*7c478bd9Sstevel@tonic-gate	rd	%y, %o5
228*7c478bd9Sstevel@tonic-gate	!
229*7c478bd9Sstevel@tonic-gate	! The folowing code adds (2**32) * %o0 to the product if the
230*7c478bd9Sstevel@tonic-gate	! multiplicand had it's high bit set (see 32-bit case for explanation)
231*7c478bd9Sstevel@tonic-gate	!
232*7c478bd9Sstevel@tonic-gate	tst	%o1
233*7c478bd9Sstevel@tonic-gate	bge	3f
234*7c478bd9Sstevel@tonic-gate	sra	%o4, 24, %o1		! right shift high bits by 24 bits
235*7c478bd9Sstevel@tonic-gate
236*7c478bd9Sstevel@tonic-gate	add	%o1, %o0, %o1
237*7c478bd9Sstevel@tonic-gate	!
238*7c478bd9Sstevel@tonic-gate	! The multiply hasn't overflowed if high-order bits are 0
239*7c478bd9Sstevel@tonic-gate	!
240*7c478bd9Sstevel@tonic-gate	! if you are not interested in detecting overflow,
241*7c478bd9Sstevel@tonic-gate	! replace the following code with:
242*7c478bd9Sstevel@tonic-gate	!
243*7c478bd9Sstevel@tonic-gate	!	3:
244*7c478bd9Sstevel@tonic-gate	!		sll	%o4, 8, %o0
245*7c478bd9Sstevel@tonic-gate	!		srl	%o5, 24, %o5
246*7c478bd9Sstevel@tonic-gate	!		retl
247*7c478bd9Sstevel@tonic-gate	!		or	%o5, %o0, %o0
248*7c478bd9Sstevel@tonic-gate	!
249*7c478bd9Sstevel@tonic-gate3:
250*7c478bd9Sstevel@tonic-gate	sll	%o4, 8, %o0		! left shift middle bits by 8 bits
251*7c478bd9Sstevel@tonic-gate	srl	%o5, 24, %o5		! right shift low bits by 24 bits
252*7c478bd9Sstevel@tonic-gate	or	%o5, %o0, %o0		! merge for true product
253*7c478bd9Sstevel@tonic-gate	retl				! leaf routine return
254*7c478bd9Sstevel@tonic-gate	tst	%o1			! set Z if high order bits are 0
255*7c478bd9Sstevel@tonic-gate	!
256*7c478bd9Sstevel@tonic-gate	! 12-bit multiply
257*7c478bd9Sstevel@tonic-gate	!
258*7c478bd9Sstevel@tonic-gateumul_12bit:
259*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4		! second iteration of 13
260*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
261*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
262*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
263*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
264*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
265*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
266*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
267*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
268*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
269*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4		! 12th iteration
270*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %g0, %o4		! last iteration only shifts
271*7c478bd9Sstevel@tonic-gate
272*7c478bd9Sstevel@tonic-gate	rd	%y, %o5
273*7c478bd9Sstevel@tonic-gate	!
274*7c478bd9Sstevel@tonic-gate	! The folowing code adds (2**32) * %o0 to the product if the
275*7c478bd9Sstevel@tonic-gate	! multiplicand had it's high bit set (see 32-bit case for explanation)
276*7c478bd9Sstevel@tonic-gate	!
277*7c478bd9Sstevel@tonic-gate	tst	%o1
278*7c478bd9Sstevel@tonic-gate	bge	4f
279*7c478bd9Sstevel@tonic-gate	sra	%o4, 20, %o1		! right shift high bits by 20 bits
280*7c478bd9Sstevel@tonic-gate
281*7c478bd9Sstevel@tonic-gate	add	%o1, %o0, %o1
282*7c478bd9Sstevel@tonic-gate	!
283*7c478bd9Sstevel@tonic-gate	! The multiply hasn't overflowed if high-order bits are 0
284*7c478bd9Sstevel@tonic-gate	!
285*7c478bd9Sstevel@tonic-gate	! if you are not interested in detecting overflow,
286*7c478bd9Sstevel@tonic-gate	! replace the following code with:
287*7c478bd9Sstevel@tonic-gate	!
288*7c478bd9Sstevel@tonic-gate	!	4:
289*7c478bd9Sstevel@tonic-gate	!		sll	%o4, 12, %o0
290*7c478bd9Sstevel@tonic-gate	!		srl	%o5, 20, %o5
291*7c478bd9Sstevel@tonic-gate	!		retl
292*7c478bd9Sstevel@tonic-gate	!		or	%o5, %o0, %o0
293*7c478bd9Sstevel@tonic-gate	!
294*7c478bd9Sstevel@tonic-gate4:
295*7c478bd9Sstevel@tonic-gate	sll	%o4, 12, %o0		! left shift middle bits by 12 bits
296*7c478bd9Sstevel@tonic-gate	srl	%o5, 20, %o5		! right shift low bits by 20 bits
297*7c478bd9Sstevel@tonic-gate	or	%o5, %o0, %o0		! merge for true product
298*7c478bd9Sstevel@tonic-gate	retl				! leaf routine return
299*7c478bd9Sstevel@tonic-gate	tst	%o1			! set Z if high order bits are 0
300*7c478bd9Sstevel@tonic-gate	!
301*7c478bd9Sstevel@tonic-gate	! 16-bit multiply
302*7c478bd9Sstevel@tonic-gate	!
303*7c478bd9Sstevel@tonic-gateumul_16bit:
304*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4		! second iteration of 17
305*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
306*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
307*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
308*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
309*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
310*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
311*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
312*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
313*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
314*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
315*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
316*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
317*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4
318*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %o1, %o4		! 16th iteration
319*7c478bd9Sstevel@tonic-gate	mulscc	%o4, %g0, %o4		! last iteration only shifts
320*7c478bd9Sstevel@tonic-gate
321*7c478bd9Sstevel@tonic-gate	rd	%y, %o5
322*7c478bd9Sstevel@tonic-gate	!
323*7c478bd9Sstevel@tonic-gate	! The folowing code adds (2**32) * %o0 to the product if the
324*7c478bd9Sstevel@tonic-gate	! multiplicand had it's high bit set (see 32-bit case for explanation)
325*7c478bd9Sstevel@tonic-gate	!
326*7c478bd9Sstevel@tonic-gate	tst	%o1
327*7c478bd9Sstevel@tonic-gate	bge	5f
328*7c478bd9Sstevel@tonic-gate	sra	%o4, 16, %o1		! right shift high bits by 16 bits
329*7c478bd9Sstevel@tonic-gate
330*7c478bd9Sstevel@tonic-gate	add	%o1, %o0, %o1
331*7c478bd9Sstevel@tonic-gate	!
332*7c478bd9Sstevel@tonic-gate	! The multiply hasn't overflowed if high-order bits are 0
333*7c478bd9Sstevel@tonic-gate	!
334*7c478bd9Sstevel@tonic-gate	! if you are not interested in detecting overflow,
335*7c478bd9Sstevel@tonic-gate	! replace the following code with:
336*7c478bd9Sstevel@tonic-gate	!
337*7c478bd9Sstevel@tonic-gate	!	5:
338*7c478bd9Sstevel@tonic-gate	!		sll	%o4, 16, %o0
339*7c478bd9Sstevel@tonic-gate	!		srl	%o5, 16, %o5
340*7c478bd9Sstevel@tonic-gate	!		retl
341*7c478bd9Sstevel@tonic-gate	!		or	%o5, %o0, %o0
342*7c478bd9Sstevel@tonic-gate	!
343*7c478bd9Sstevel@tonic-gate5:
344*7c478bd9Sstevel@tonic-gate	sll	%o4, 16, %o0		! left shift middle bits by 16 bits
345*7c478bd9Sstevel@tonic-gate	srl	%o5, 16, %o5		! right shift low bits by 16 bits
346*7c478bd9Sstevel@tonic-gate	or	%o5, %o0, %o0		! merge for true product
347*7c478bd9Sstevel@tonic-gate	retl				! leaf routine return
348*7c478bd9Sstevel@tonic-gate	tst	%o1			! set Z if high order bits are 0
349