xref: /illumos-gate/usr/src/lib/libc/sparcv9/fp/__quad_mag64.S (revision fdd3baea1de807613d7541b2fad475760768584b)
1!
2! Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
3! Use is subject to license terms.
4!
5! CDDL HEADER START
6!
7! The contents of this file are subject to the terms of the
8! Common Development and Distribution License, Version 1.0 only
9! (the "License").  You may not use this file except in compliance
10! with the License.
11!
12! You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
13! or http://www.opensolaris.org/os/licensing.
14! See the License for the specific language governing permissions
15! and limitations under the License.
16!
17! When distributing Covered Code, include this CDDL HEADER in each
18! file and include the License file at usr/src/OPENSOLARIS.LICENSE.
19! If applicable, add the following below this CDDL HEADER, with the
20! fields enclosed by brackets "[]" replaced with your own identifying
21! information: Portions Copyright [yyyy] [name of copyright owner]
22!
23! CDDL HEADER END
24!
25
26.ident	"%Z%%M%	%I%	%E% SMI"
27
28! /*
29!  * This file contains __quad_mag_add and __quad_mag_sub, the core
30!  * of the quad precision add and subtract operations.
31!  */
32! SPARC V9 version hand-coded in assembly to use 64-bit integer registers
33
34	.file	"__quad_mag64.s"
35
36#include <sys/asm_linkage.h>
37
38! union longdouble {
39! 	struct {
40! 		unsigned int	msw;
41! 		unsigned int	frac2;
42! 		unsigned int	frac3;
43! 		unsigned int	frac4;
44! 	} l;
45! 	struct {
46! 		unsigned long	msll;
47! 		unsigned long	frac;
48! 	} ll;
49! 	long double	d;
50! };
51!
52! /*
53!  * __quad_mag_add(x, y, z, fsr)
54!  *
55!  * Sets *z = *x + *y, rounded according to the rounding mode in *fsr,
56!  * and updates the current exceptions in *fsr.  This routine assumes
57!  * *x and *y are finite, with the same sign (i.e., an addition of
58!  * magnitudes), |*x| >= |*y|, and *z already has its sign bit set.
59!  */
60! void
61! __quad_mag_add(const union longdouble *x, const union longdouble *y,
62! 	union longdouble *z, unsigned int *fsr)
63! {
64! 	unsigned long	lx, ly, frac, sticky;
65! 	unsigned int	ex, ey, round, rm;
66! 	int		e, uflo;
67!
68! 	/* get the leading significand double-words and exponents */
69! 	ex = (x->ll.msll >> 48) & 0x7fff;
70! 	lx = x->ll.msll & ~0xffff000000000000ul;
71! 	if (ex == 0)
72! 		ex = 1;
73! 	else
74! 		lx |= 0x0001000000000000ul;
75!
76! 	ey = (y->ll.msll >> 48) & 0x7fff;
77! 	ly = y->ll.msll & ~0xffff000000000000ul;
78! 	if (ey == 0)
79! 		ey = 1;
80! 	else
81! 		ly |= 0x0001000000000000ul;
82!
83! 	/* prenormalize y */
84! 	e = (int) ex - (int) ey;
85! 	round = sticky = 0;
86! 	if (e >= 114) {
87! 		frac = x->ll.frac;
88! 		sticky = ly | y->ll.frac;
89! 	} else {
90! 		frac = y->ll.frac;
91! 		if (e >= 64) {
92! 			sticky = frac & 0x7ffffffffffffffful;
93! 			round = frac >> 63;
94! 			frac = ly;
95! 			ly = 0;
96! 			e -= 64;
97! 		}
98! 		if (e) {
99! 			sticky |= round | (frac & ((1ul << (e - 1)) - 1));
100! 			round = (frac >> (e - 1)) & 1;
101! 			frac = (frac >> e) | (ly << (64 - e));
102! 			ly >>= e;
103! 		}
104!
105! 		/* add, propagating carries */
106! 		frac += x->ll.frac;
107! 		lx += ly;
108! 		if (frac < x->ll.frac)
109! 			lx++;
110!
111! 		/* postnormalize */
112! 		if (lx >= 0x0002000000000000ul) {
113! 			sticky |= round;
114! 			round = frac & 1;
115! 			frac = (frac >> 1) | (lx << 63);
116! 			lx >>= 1;
117! 			ex++;
118! 		}
119! 	}
120!
121! 	/* keep track of whether the result before rounding is tiny */
122! 	uflo = (lx < 0x0001000000000000ul);
123!
124! 	/* get the rounding mode, fudging directed rounding modes
125! 	   as though the result were positive */
126! 	rm = *fsr >> 30;
127! 	if (z->l.msw)
128! 		rm ^= (rm >> 1);
129!
130! 	/* see if we need to round */
131! 	if (round | sticky) {
132! 		*fsr |= FSR_NXC;
133!
134! 		/* round up if necessary */
135! 		if (rm == FSR_RP || (rm == FSR_RN && round &&
136! 			(sticky || (frac & 1)))) {
137! 			if (++frac == 0)
138! 				if (++lx >= 0x0002000000000000ul) {
139! 					lx >>= 1;
140! 					ex++;
141! 				}
142! 		}
143! 	}
144!
145! 	/* check for overflow */
146! 	if (ex >= 0x7fff) {
147! 		/* store the default overflowed result */
148! 		*fsr |= FSR_OFC | FSR_NXC;
149! 		if (rm == FSR_RN || rm == FSR_RP) {
150! 			z->l.msw |= 0x7fff0000;
151! 			z->l.frac2 = 0;
152! 			z->ll.frac = 0;
153! 		} else {
154! 			z->l.msw |= 0x7ffeffff;
155! 			z->l.frac2 = 0xffffffff;
156! 			z->ll.frac = 0xfffffffffffffffful;
157! 		}
158! 	} else {
159! 		/* store the result */
160! 		if (lx >= 0x0001000000000000ul)
161! 			z->l.msw |= (ex << 16);
162! 		z->l.msw |= (lx >> 32) & 0xffff;
163! 		z->l.frac2 = (lx & 0xffffffff);
164! 		z->ll.frac = frac;
165!
166! 		/* if the pre-rounded result was tiny and underflow trapping
167! 		   is enabled, simulate underflow */
168! 		if (uflo && (*fsr & FSR_UFM))
169! 			*fsr |= FSR_UFC;
170! 	}
171! }
172
173	ENTRY(__quad_mag_add)
174	save	%sp,-SA(MINFRAME),%sp
175
176	sethi	%hi(0xffff0000),%g1
177	sllx	%g1,32,%g1		! g1 = 0xffff000000000000
178
179	sethi	%hi(0x7fff),%l7
180	or	%l7,%lo(0x7fff),%l7	! l7 = 0x7fff
181
182	ldx	[%i0],%o0
183	srlx	%o0,48,%l0
184	andcc	%l0,%l7,%l0		! l0 = ex
185	beq,pn	%icc,1f
186	andn	%o0,%g1,%o0		! o0 = lx
187	ba,pt	%icc,2f
188	sub	%o0,%g1,%o0
1891:
190	mov	1,%l0
1912:
192
193	ldx	[%i1],%o1
194	srlx	%o1,48,%l1
195	andcc	%l1,%l7,%l1		! l1 = ey
196	beq,pn	%icc,1f
197	andn	%o1,%g1,%o1		! o1 = ly
198	ba,pt	%icc,2f
199	sub	%o1,%g1,%o1
2001:
201	mov	1,%l1
2022:
203
204	sub	%l0,%l1,%l1		! l1 = e = ex - ey
205	cmp	%l1,114			! see if we need to prenormalize
206	bge,pn	%icc,1f
207	mov	0,%l6			! l6 = round
208	mov	0,%o7			! o7 = sticky
209	cmp	%l1,64
210	bl,pt	%icc,3f
211	ldx	[%i1+8],%o2		! o2 = frac
212	sllx	%o2,1,%o7		! lop off high order bit
213	srlx	%o2,63,%l6
214	mov	%o1,%o2
215	mov	0,%o1
216	sub	%l1,64,%l1
2173:
218	tst	%l1
219	beq,pn	%icc,4f
220	sub	%l1,1,%l2
221	mov	1,%o3
222	sllx	%o3,%l2,%o3
223	sub	%o3,1,%o3
224	and	%o3,%o2,%o3
225	or	%o3,%l6,%o3
226	or	%o7,%o3,%o7
227	srlx	%o2,%l2,%o4
228	and	%o4,1,%l6
229	srlx	%o2,%l1,%o2
230	mov	64,%l3
231	sub	%l3,%l1,%l3
232	sllx	%o1,%l3,%o5
233	or	%o2,%o5,%o2
234	srlx	%o1,%l1,%o1
2354:
236	ldx	[%i0+8],%o3
237	add	%o2,%o3,%o2		! add, propagating carry
238	cmp	%o2,%o3
239	bgeu,pt %xcc,5f
240	add	%o0,%o1,%o0
241	add	%o0,1,%o0
2425:
243	srlx	%o0,49,%o5		! if sum carried out, postnormalize
244	tst	%o5
245	beq,pt	%icc,2f
246	nop
247	or	%o7,%l6,%o7
248	and	%o2,1,%l6
249	srlx	%o2,1,%o2
250	sllx	%o0,63,%o3
251	or	%o2,%o3,%o2
252	srlx	%o0,1,%o0
253	ba,pt	%icc,2f
254	add	%l0,1,%l0
2551:
256	ldx	[%i0+8],%o2		! (full prenormalization shift case)
257	ldx	[%i1+8],%o3
258	or	%o1,%o3,%o7
2592:
260
261	add	%o0,%g1,%o1		! see if sum is tiny
262	srlx	%o1,63,%l2		! l2 = uflo
263
264	ld	[%i3],%i4		! get the rounding mode
265	srl	%i4,30,%l3		! l3 = rm
266	ld	[%i2],%l4		! l4 = z->l.msw
267	tst	%l4
268	beq,pn	%icc,1f
269	srl	%l3,1,%l5
270	xor	%l3,%l5,%l3
2711:
272
273	orcc	%o7,%l6,%g0		! see if we need to round
274	beq,pn	%xcc,1f
275	andcc	%l3,1,%g0
276	or	%i4,1,%i4
277	bne,pn	%icc,1f
278	tst	%l3
279	bne,pn	%icc,2f
280	tst	%l6
281	beq,pn	%icc,1f
282	and	%o2,1,%o3
283	orcc	%o3,%o7,%g0
284	beq,pn	%xcc,1f
285	nop
2862:
287	addcc	%o2,1,%o2		! round up and check for carry out
288	bne,pt	%xcc,1f
289	nop
290	add	%o0,1,%o0
291	srlx	%o0,49,%o1
292	tst	%o1
293	beq,pt	%icc,1f
294	nop
295	srlx	%o0,1,%o0
296	add	%l0,1,%l0
2971:
298
299	cmp	%l0,%l7			! check for overflow
300	bge,pn	%icc,1f
301	addcc	%o0,%g1,%g0
302	bl,pn	%xcc,2f
303	sll	%l0,16,%l1
304	or	%l4,%l1,%l4
3052:
306	sllx	%o0,16,%o1
307	srlx	%o1,48,%o1
308	or	%l4,%o1,%l4
309	st	%l4,[%i2]
310	st	%o0,[%i2+4]
311	stx	%o2,[%i2+8]
312	tst	%l2			! see if we need to raise underflow
313	beq,pt	%icc,3f
314	srl	%i4,23,%i5
315	andcc	%i5,4,%i5
316	ba,pt	%icc,3f
317	or	%i4,%i5,%i4
318
3191:
320	andcc	%l3,1,%g0
321	bne,pn	%icc,2f
322	or	%i4,9,%i4		! overflow
323	sll	%l7,16,%l7		! 7fff00000...
324	or	%l4,%l7,%l4
325	st	%l4,[%i2]
326	st	%g0,[%i2+4]
327	ba,pt	%icc,3f
328	stx	%g0,[%i2+8]
3292:
330	mov	-1,%o0			! 7ffeffff...
331	sll	%l7,16,%l7
332	add	%o0,%l7,%l7
333	or	%l4,%l7,%l4
334	st	%l4,[%i2]
335	st	%o0,[%i2+4]
336	stx	%o0,[%i2+8]
337
3383:
339	st	%i4,[%i3]
340	ret
341	restore
342
343	SET_SIZE(__quad_mag_add)
344
345! /*
346!  * __quad_mag_sub(x, y, z, fsr)
347!  *
348!  * Sets *z = *x - *y, rounded according to the rounding mode in *fsr,
349!  * and updates the current exceptions in *fsr.  This routine assumes
350!  * *x and *y are finite, with opposite signs (i.e., a subtraction of
351!  * magnitudes), |*x| >= |*y|, and *z already has its sign bit set.
352!  */
353! void
354! __quad_mag_sub(const union longdouble *x, const union longdouble *y,
355! 	union longdouble *z, unsigned int *fsr)
356! {
357! 	unsigned long	lx, ly, frac, sticky;
358! 	unsigned int	ex, ey, gr, borrow, rm;
359! 	int		e;
360!
361! 	/* get the leading significand double-words and exponents */
362! 	ex = (x->ll.msll >> 48) & 0x7fff;
363! 	lx = x->ll.msll & ~0xffff000000000000ul;
364! 	if (ex == 0)
365! 		ex = 1;
366! 	else
367! 		lx |= 0x0001000000000000ul;
368!
369! 	ey = (y->ll.msll >> 48) & 0x7fff;
370! 	ly = y->ll.msll & ~0xffff000000000000ul;
371! 	if (ey == 0)
372! 		ey = 1;
373! 	else
374! 		ly |= 0x0001000000000000ul;
375!
376! 	/* prenormalize y */
377! 	e = (int) ex - (int) ey;
378! 	gr = sticky = 0;
379! 	if (e > 114) {
380! 		sticky = ly | y->ll.frac;
381! 		ly = frac = 0;
382! 	} else {
383! 		frac = y->ll.frac;
384! 		if (e >= 64) {
385! 			gr = frac >> 62;
386! 			sticky = frac << 2;
387! 			frac = ly;
388! 			ly = 0;
389! 			e -= 64;
390! 		}
391! 		if (e > 1) {
392! 			sticky |= gr | (frac & ((1ul << (e - 2)) - 1));
393! 			gr = (frac >> (e - 2)) & 3;
394! 			frac = (frac >> e) | (ly << (64 - e));
395! 			ly >>= e;
396! 		} else if (e == 1) {
397! 			sticky |= (gr & 1);
398! 			gr = (gr >> 1) | ((frac & 1) << 1);
399! 			frac = (frac >> 1) | (ly << 63);
400! 			ly >>= 1;
401! 		}
402! 	}
403!
404! 	/* complement guard, round, and sticky as need be */
405! 	gr <<= 1;
406! 	if (sticky)
407! 		gr |= 1;
408! 	gr = (-gr & 7);
409!	if (gr)
410!		if (++frac == 0)
411!			ly++;
412!
413! 	/* subtract, propagating borrows */
414! 	frac = x->ll.frac - frac;
415! 	lx -= ly;
416! 	if (frac > x->ll.frac)
417! 		lx--;
418!
419! 	/* get the rounding mode */
420! 	rm = *fsr >> 30;
421!
422! 	/* handle zero result */
423! 	if (!(lx | frac | gr)) {
424! 		z->l.msw = ((rm == FSR_RM)? 0x80000000 : 0);
425! 		z->l.frac2 = z->l.frac3 = z->l.frac4 = 0;
426! 		return;
427! 	}
428!
429! 	/* postnormalize */
430! 	if (lx < 0x0001000000000000ul) {
431! 		/* if cancellation occurred or the exponent is 1,
432! 		   the result is exact */
433! 		if (lx < 0x0000800000000000ul || ex == 1) {
434! 			if ((lx | (frac & 0xfffe000000000000ul)) == 0 &&
435! 				ex > 64) {
436! 				lx = frac;
437! 				frac = (unsigned long) gr << 61;
438! 				gr = 0;
439! 				ex -= 64;
440! 			}
441! 			while (lx < 0x0001000000000000ul && ex > 1) {
442! 				lx = (lx << 1) | (frac >> 63);
443! 				frac = (frac << 1) | (gr >> 2);
444! 				gr = 0;
445! 				ex--;
446! 			}
447! 			if (lx >= 0x0001000000000000ul)
448! 				z->l.msw |= (ex << 16);
449! 			z->l.msw |= ((lx >> 32) & 0xffff);
450! 			z->l.frac2 = (lx & 0xffffffff);
451! 			z->ll.frac = frac;
452!
453! 			/* if the result is tiny and underflow trapping is
454! 			   enabled, simulate underflow */
455! 			if (lx < 0x0001000000000000ul && (*fsr & FSR_UFM))
456! 				*fsr |= FSR_UFC;
457! 			return;
458! 		}
459!
460! 		/* otherwise we only borrowed one place */
461! 		lx = (lx << 1) | (frac >> 63);
462! 		frac = (frac << 1) | (gr >> 2);
463! 		gr &= 3;
464! 		ex--;
465! 	}
466! 	else
467! 		gr = (gr >> 1) | (gr & 1);
468!
469! 	/* fudge directed rounding modes as though the result were positive */
470! 	if (z->l.msw)
471! 		rm ^= (rm >> 1);
472!
473! 	/* see if we need to round */
474! 	if (gr) {
475! 		*fsr |= FSR_NXC;
476!
477! 		/* round up if necessary */
478! 		if (rm == FSR_RP || (rm == FSR_RN && (gr & 2) &&
479! 			((gr & 1) || (frac & 1)))) {
480! 			if (++frac == 0)
481! 				if (++lx >= 0x0002000000000000ul) {
482! 					lx >>= 1;
483! 					ex++;
484! 				}
485! 		}
486! 	}
487!
488! 	/* store the result */
489! 	z->l.msw |= (ex << 16) | ((lx >> 32) & 0xffff);
490! 	z->l.frac2 = (lx & 0xffffffff);
491! 	z->ll.frac = frac;
492! }
493
494	ENTRY(__quad_mag_sub)
495	save	%sp,-SA(MINFRAME),%sp
496
497	sethi	%hi(0xffff0000),%g1
498	sllx	%g1,32,%g1		! g1 = 0xffff000000000000
499
500	sethi	%hi(0x7fff),%l7
501	or	%l7,%lo(0x7fff),%l7	! l7 = 0x7fff
502
503	ldx	[%i0],%o0
504	srlx	%o0,48,%l0
505	andcc	%l0,%l7,%l0		! l0 = ex
506	beq,pn	%icc,1f
507	andn	%o0,%g1,%o0		! o0 = lx
508	ba,pt	%icc,2f
509	sub	%o0,%g1,%o0
5101:
511	mov	1,%l0
5122:
513
514	ldx	[%i1],%o1
515	srlx	%o1,48,%l1
516	andcc	%l1,%l7,%l1		! l1 = ey
517	beq,pn	%icc,1f
518	andn	%o1,%g1,%o1		! o1 = ly
519	ba,pt	%icc,2f
520	sub	%o1,%g1,%o1
5211:
522	mov	1,%l1
5232:
524
525	sub	%l0,%l1,%l1		! l1 = e = ex - ey
526	cmp	%l1,114			! see if we need to prenormalize y
527	bg,pn	%icc,1f
528	mov	0,%l6			! l6 = gr
529	mov	0,%o7			! o7 = sticky
530	cmp	%l1,64
531	bl,pt	%icc,3f
532	ldx	[%i1+8],%o2		! o2 = frac
533	srlx	%o2,62,%l6
534	sllx	%o2,2,%o7		! lop off top two bits
535	mov	%o1,%o2
536	mov	0,%o1
537	sub	%l1,64,%l1
5383:
539	cmp	%l1,1
540	ble,pn	%icc,4f
541	sub	%l1,2,%l2		! shift more than one bit
542	mov	1,%o3
543	sllx	%o3,%l2,%o3
544	sub	%o3,1,%o3
545	and	%o3,%o2,%o3
546	or	%o3,%l6,%o3
547	or	%o7,%o3,%o7
548	srlx	%o2,%l2,%o4
549	and	%o4,3,%l6
550	srlx	%o2,%l1,%o2
551	mov	64,%l3
552	sub	%l3,%l1,%l3
553	sllx	%o1,%l3,%o5
554	or	%o2,%o5,%o2
555	ba,pt	%icc,2f
556	srlx	%o1,%l1,%o1
5574:
558	bne,pn	%icc,2f
559	and	%l6,1,%o3		! shift one bit
560	or	%o7,%o3,%o7
561	and	%o2,1,%o4
562	sllx	%o4,1,%o4
563	srl	%l6,1,%l6
564	or	%l6,%o4,%l6
565	srlx	%o2,1,%o2
566	sllx	%o1,63,%o5
567	or	%o2,%o5,%o2
568	ba,pt	%icc,2f
569	srlx	%o1,1,%o1
5701:
571	ldx	[%i1+8],%o3		! (full prenormalization shift case)
572	or	%o1,%o3,%o7
573	mov	0,%o1
574	mov	0,%o2
5752:
576
577	tst	%o7			! complement guard, round, and
578	beq,pn	%xcc,1f			! sticky as need be
579	sll	%l6,1,%l6
580	or	%l6,1,%l6
5811:
582	subcc	%g0,%l6,%l6
583	beq,pn	%icc,1f
584	and	%l6,7,%l6
585	addcc	%o2,1,%o2
586	beq,a,pn %xcc,1f
587	add	%o1,1,%o1
5881:
589
590	ldx	[%i0+8],%o3		! subtract, propagating borrows
591	sub	%o3,%o2,%o2
592	cmp	%o3,%o2
593	bgeu,pt	%xcc,5f
594	sub	%o0,%o1,%o0
595	sub	%o0,1,%o0
5965:
597
598	ld	[%i3],%i4		! get the rounding mode
599	srl	%i4,30,%l3		! l3 = rm
600
601	or	%o0,%o2,%o1		! look for zero result
602	orcc	%o1,%l6,%g0
603	bne,pt	%xcc,1f
604	srl	%l3,1,%l4
605	and	%l3,%l4,%l4
606	sll	%l4,31,%l4
607	st	%l4,[%i2]
608	st	%g0,[%i2+4]
609	stx	%g0,[%i2+8]
610	ret
611	restore
612
6131:
614	addcc	%o0,%g1,%g0		! postnormalize
615	bl,pt	%xcc,1f
616	ld	[%i2],%l4		! l4 = z->l.msw
617	and	%l6,1,%l5		! (no cancellation or borrow case)
618	srl	%l6,1,%l6
619	ba,pt	%icc,2f
620	or	%l6,%l5,%l6
6211:
622	srax	%g1,1,%o7
623	addcc	%o0,%o7,%g0
624	bl,pn	%xcc,1f
625	cmp	%l0,1
626	beq,pt	%icc,1f
627	srlx	%o2,63,%o3		! borrowed one place
628	sllx	%o0,1,%o0
629	or	%o0,%o3,%o0
630	srl	%l6,2,%o4
631	sllx	%o2,1,%o2
632	or	%o2,%o4,%o2
633	and	%l6,3,%l6
634	ba,pt	%icc,2f
635	sub	%l0,1,%l0
6361:
637	srlx	%o2,49,%o3		! cancellation or tiny result
638	orcc	%o0,%o3,%g0
639	bne,pt	%xcc,1f
640	cmp	%l0,64
641	ble,pn	%icc,1f
642	nop
643	mov	%o2,%o0
644	sllx	%l6,61,%o2
645	mov	0,%l6
646	sub	%l0,64,%l0
6471:
648	addcc	%o0,%g1,%g0		! normalization loop
649	bge,pn	%xcc,1f
650	cmp	%l0,1
651	ble,pn	%icc,1f
652	srl	%l6,2,%l6
653	srlx	%o2,63,%o3
654	sllx	%o0,1,%o0
655	or	%o0,%o3,%o0
656	sllx	%o2,1,%o2
657	or	%o2,%l6,%o2
658	ba,pt	%icc,1b
659	sub	%l0,1,%l0
6601:
661	sllx	%o0,16,%o1
662	srlx	%o1,48,%l5
663	or	%l4,%l5,%l4
664	addcc	%o0,%g1,%g0		! see if result is tiny
665	bl,pn	%xcc,1f
666	sll	%l0,16,%l5
667	or	%l4,%l5,%l4
6681:
669	st	%l4,[%i2]
670	st	%o0,[%i2+4]
671	bge,pt	%xcc,1f
672	stx	%o2,[%i2+8]
673	srl	%i4,23,%i5
674	andcc	%i5,4,%g0		! see if we need to raise underflow
675	beq,pt	%icc,1f
676	or	%i4,4,%i4
677	st	%i4,[%i3]
6781:
679	ret
680	restore
681
6822:
683	tst	%l4			! fudge directect rounding modes
684	beq,pn	%icc,1f
685	srl	%l3,1,%l5
686	xor	%l3,%l5,%l3
6871:
688
689	tst	%l6			! see if we need to round
690	beq,pn	%icc,1f
691	or	%i4,1,%i4
692	st	%i4,[%i3]
693	andcc	%l3,1,%g0
694	bne,pn	%icc,1f
695	tst	%l3
696	bne,pn	%icc,2f
697	andcc	%l6,2,%g0
698	beq,pn	%icc,1f
699	or	%l6,%o2,%o3
700	andcc	%o3,1,%o3
701	beq,pn	%xcc,1f
702	nop
7032:
704	addcc	%o2,1,%o2		! round up and check for carry
705	bne,pt	%xcc,1f
706	nop
707	add	%o0,1,%o0
708	srlx	%o0,49,%o1
709	tst	%o1
710	beq,pt	%icc,1f
711	nop
712	srlx	%o0,1,%o0
713	add	%l0,1,%l0
7141:
715
716	sllx	%o0,16,%o1
717	srlx	%o1,48,%o1
718	or	%l4,%o1,%l4
719	sll	%l0,16,%l5
720	or	%l4,%l5,%l4
721	st	%l4,[%i2]
722	st	%o0,[%i2+4]
723	stx	%o2,[%i2+8]
724	ret
725	restore
726
727	SET_SIZE(__quad_mag_sub)
728