xref: /illumos-gate/usr/src/lib/libc/capabilities/sun4u/common/memcmp.S (revision 55fea89dcaa64928bed4327112404dcb3e07b79f)
1*5d9d9091SRichard Lowe/*
2*5d9d9091SRichard Lowe * CDDL HEADER START
3*5d9d9091SRichard Lowe *
4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the
5*5d9d9091SRichard Lowe * Common Development and Distribution License (the "License").
6*5d9d9091SRichard Lowe * You may not use this file except in compliance with the License.
7*5d9d9091SRichard Lowe *
8*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing.
10*5d9d9091SRichard Lowe * See the License for the specific language governing permissions
11*5d9d9091SRichard Lowe * and limitations under the License.
12*5d9d9091SRichard Lowe *
13*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each
14*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the
16*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying
17*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner]
18*5d9d9091SRichard Lowe *
19*5d9d9091SRichard Lowe * CDDL HEADER END
20*5d9d9091SRichard Lowe */
21*5d9d9091SRichard Lowe
22*5d9d9091SRichard Lowe/*
23*5d9d9091SRichard Lowe * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
24*5d9d9091SRichard Lowe */
25*5d9d9091SRichard Lowe
26*5d9d9091SRichard Lowe	.file	"memcmp.s"
27*5d9d9091SRichard Lowe
28*5d9d9091SRichard Lowe/*
29*5d9d9091SRichard Lowe * memcmp(s1, s2, len)
30*5d9d9091SRichard Lowe *
31*5d9d9091SRichard Lowe * Compare n bytes:  s1>s2: >0  s1==s2: 0  s1<s2: <0
32*5d9d9091SRichard Lowe *
33*5d9d9091SRichard Lowe * Fast assembler language version of the following C-program for memcmp
34*5d9d9091SRichard Lowe * which represents the `standard' for the C-library.
35*5d9d9091SRichard Lowe *
36*5d9d9091SRichard Lowe *	int
37*5d9d9091SRichard Lowe *	memcmp(const void *s1, const void *s2, size_t n)
38*5d9d9091SRichard Lowe *	{
39*5d9d9091SRichard Lowe *		if (s1 != s2 && n != 0) {
40*5d9d9091SRichard Lowe *			const char *ps1 = s1;
41*5d9d9091SRichard Lowe *			const char *ps2 = s2;
42*5d9d9091SRichard Lowe *			do {
43*5d9d9091SRichard Lowe *				if (*ps1++ != *ps2++)
44*5d9d9091SRichard Lowe *					return(ps1[-1] - ps2[-1]);
45*5d9d9091SRichard Lowe *			} while (--n != 0);
46*5d9d9091SRichard Lowe *		}
47*5d9d9091SRichard Lowe *		return (0);
48*5d9d9091SRichard Lowe *	}
49*5d9d9091SRichard Lowe */
50*5d9d9091SRichard Lowe
51*5d9d9091SRichard Lowe#include <sys/asm_linkage.h>
52*5d9d9091SRichard Lowe#include <sys/sun4asi.h>
53*5d9d9091SRichard Lowe
54*5d9d9091SRichard Lowe	ANSI_PRAGMA_WEAK(memcmp,function)
55*5d9d9091SRichard Lowe
56*5d9d9091SRichard Lowe	ENTRY(memcmp)
57*5d9d9091SRichard Lowe	cmp	%o0, %o1		! s1 == s2?
58*5d9d9091SRichard Lowe	be	%ncc, .cmpeq
59*5d9d9091SRichard Lowe
60*5d9d9091SRichard Lowe	! for small counts byte compare immediately
61*5d9d9091SRichard Lowe	cmp	%o2, 48
62*5d9d9091SRichard Lowe	bleu,a 	%ncc, .bytcmp
63*5d9d9091SRichard Lowe	mov	%o2, %o3		! o3 <= 48
64*5d9d9091SRichard Lowe
65*5d9d9091SRichard Lowe	! Count > 48. We will byte compare (8 + num of bytes to dbl align)
66*5d9d9091SRichard Lowe	! bytes. We assume that most miscompares will occur in the 1st 8 bytes
67*5d9d9091SRichard Lowe
68*5d9d9091SRichard Lowe.chkdbl:
69*5d9d9091SRichard Lowe        and     %o0, 7, %o4             ! is s1 aligned on a 8 byte bound
70*5d9d9091SRichard Lowe	mov	8, %o3			! o2 > 48;  o3 = 8
71*5d9d9091SRichard Lowe        sub     %o4, 8, %o4		! o4 = -(num of bytes to dbl align)
72*5d9d9091SRichard Lowe	ba	%ncc, .bytcmp
73*5d9d9091SRichard Lowe        sub     %o3, %o4, %o3           ! o3 = 8 + (num of bytes to dbl align)
74*5d9d9091SRichard Lowe
75*5d9d9091SRichard Lowe
76*5d9d9091SRichard Lowe1:      ldub    [%o1], %o5        	! byte compare loop
77*5d9d9091SRichard Lowe        inc     %o1
78*5d9d9091SRichard Lowe        inc     %o0
79*5d9d9091SRichard Lowe	dec	%o2
80*5d9d9091SRichard Lowe        cmp     %o4, %o5
81*5d9d9091SRichard Lowe	bne	%ncc, .noteq
82*5d9d9091SRichard Lowe.bytcmp:
83*5d9d9091SRichard Lowe	deccc   %o3
84*5d9d9091SRichard Lowe	bgeu,a   %ncc, 1b
85*5d9d9091SRichard Lowe        ldub    [%o0], %o4
86*5d9d9091SRichard Lowe
87*5d9d9091SRichard Lowe	! Check to see if there are more bytes to compare
88*5d9d9091SRichard Lowe	cmp	%o2, 0			! is o2 > 0
89*5d9d9091SRichard Lowe	bgu,a	%ncc, .blkchk		! we should already be dbl aligned
90*5d9d9091SRichard Lowe	cmp     %o2, 320                ! if cnt < 256 + 64 -  no Block ld/st
91*5d9d9091SRichard Lowe.cmpeq:
92*5d9d9091SRichard Lowe        retl                             ! strings compare equal
93*5d9d9091SRichard Lowe	sub	%g0, %g0, %o0
94*5d9d9091SRichard Lowe
95*5d9d9091SRichard Lowe.noteq:
96*5d9d9091SRichard Lowe	retl				! strings aren't equal
97*5d9d9091SRichard Lowe	sub	%o4, %o5, %o0		! return(*s1 - *s2)
98*5d9d9091SRichard Lowe
99*5d9d9091SRichard Lowe
100*5d9d9091SRichard Lowe        ! Now src1 is Double word aligned
101*5d9d9091SRichard Lowe.blkchk:
102*5d9d9091SRichard Lowe        bgeu,a   %ncc, blkcmp                  ! do block cmp
103*5d9d9091SRichard Lowe        andcc   %o0, 63, %o3            ! is src1 block aligned
104*5d9d9091SRichard Lowe
105*5d9d9091SRichard Lowe        ! double word compare - using ldd and faligndata. Compares upto
106*5d9d9091SRichard Lowe        ! 8 byte multiple count and does byte compare for the residual.
107*5d9d9091SRichard Lowe
108*5d9d9091SRichard Lowe.dwcmp:
109*5d9d9091SRichard Lowe
110*5d9d9091SRichard Lowe        rd      %fprs, %o3              ! o3 = fprs
111*5d9d9091SRichard Lowe
112*5d9d9091SRichard Lowe        ! if fprs.fef == 0, set it. Checking it, reqires 2 instructions.
113*5d9d9091SRichard Lowe        ! So set it anyway, without checking.
114*5d9d9091SRichard Lowe        wr      %g0, 0x4, %fprs         ! fprs.fef = 1
115*5d9d9091SRichard Lowe
116*5d9d9091SRichard Lowe        andn    %o2, 7, %o4             ! o4 has 8 byte aligned cnt
117*5d9d9091SRichard Lowe	sub     %o4, 8, %o4
118*5d9d9091SRichard Lowe        alignaddr %o1, %g0, %g1
119*5d9d9091SRichard Lowe        ldd     [%g1], %d0
120*5d9d9091SRichard Lowe4:
121*5d9d9091SRichard Lowe        add     %g1, 8, %g1
122*5d9d9091SRichard Lowe        ldd     [%g1], %d2
123*5d9d9091SRichard Lowe	ldd	[%o0], %d6
124*5d9d9091SRichard Lowe        faligndata %d0, %d2, %d8
125*5d9d9091SRichard Lowe	fcmpne32 %d6, %d8, %o5
126*5d9d9091SRichard Lowe	fsrc1	%d6, %d6		! 2 fsrc1's added since o5 cannot
127*5d9d9091SRichard Lowe	fsrc1	%d8, %d8		! be used for 3 cycles else we
128*5d9d9091SRichard Lowe	fmovd	%d2, %d0		! create 9 bubbles in the pipeline
129*5d9d9091SRichard Lowe	brnz,a,pn %o5, 6f
130*5d9d9091SRichard Lowe	sub     %o1, %o0, %o1           ! o1 gets the difference
131*5d9d9091SRichard Lowe        subcc   %o4, 8, %o4
132*5d9d9091SRichard Lowe        add     %o0, 8, %o0
133*5d9d9091SRichard Lowe        add     %o1, 8, %o1
134*5d9d9091SRichard Lowe        bgu,pt	%ncc, 4b
135*5d9d9091SRichard Lowe        sub     %o2, 8, %o2
136*5d9d9091SRichard Lowe
137*5d9d9091SRichard Lowe.residcmp:
138*5d9d9091SRichard Lowe        ba      6f
139*5d9d9091SRichard Lowe	sub     %o1, %o0, %o1           ! o1 gets the difference
140*5d9d9091SRichard Lowe
141*5d9d9091SRichard Lowe5:      ldub    [%o0 + %o1], %o5        ! byte compare loop
142*5d9d9091SRichard Lowe        inc     %o0
143*5d9d9091SRichard Lowe        cmp     %o4, %o5
144*5d9d9091SRichard Lowe        bne     %ncc, .dnoteq
145*5d9d9091SRichard Lowe6:
146*5d9d9091SRichard Lowe        deccc   %o2
147*5d9d9091SRichard Lowe        bgeu,a	%ncc, 5b
148*5d9d9091SRichard Lowe        ldub    [%o0], %o4
149*5d9d9091SRichard Lowe
150*5d9d9091SRichard Lowe	and     %o3, 0x4, %o3           ! fprs.du = fprs.dl = 0
151*5d9d9091SRichard Lowe	wr      %o3, %g0, %fprs         ! fprs = o3 - restore fprs
152*5d9d9091SRichard Lowe	retl
153*5d9d9091SRichard Lowe	sub	%g0, %g0, %o0		! strings compare equal
154*5d9d9091SRichard Lowe
155*5d9d9091SRichard Lowe.dnoteq:
156*5d9d9091SRichard Lowe	and     %o3, 0x4, %o3           ! fprs.du = fprs.dl = 0
157*5d9d9091SRichard Lowe	wr      %o3, %g0, %fprs         ! fprs = o3 - restore fprs
158*5d9d9091SRichard Lowe	retl
159*5d9d9091SRichard Lowe	sub	%o4, %o5, %o0		! return(*s1 - *s2)
160*5d9d9091SRichard Lowe
161*5d9d9091SRichard Lowe
162*5d9d9091SRichard Loweblkcmp:
163*5d9d9091SRichard Lowe	save    %sp, -SA(MINFRAME), %sp
164*5d9d9091SRichard Lowe        rd      %fprs, %l5              ! l5 = fprs
165*5d9d9091SRichard Lowe
166*5d9d9091SRichard Lowe        ! if fprs.fef == 0, set it. Checking it, reqires 2 instructions.
167*5d9d9091SRichard Lowe        ! So set it anyway, without checking.
168*5d9d9091SRichard Lowe        wr      %g0, 0x4, %fprs         ! fprs.fef = 1
169*5d9d9091SRichard Lowe
170*5d9d9091SRichard Lowe	bz,pn   %ncc, .blalign          ! now block aligned
171*5d9d9091SRichard Lowe        sub     %i3, 64, %i3
172*5d9d9091SRichard Lowe        neg     %i3                     ! bytes till block aligned
173*5d9d9091SRichard Lowe
174*5d9d9091SRichard Lowe        ! Compare %i3 bytes till dst is block (64 byte) aligned. use
175*5d9d9091SRichard Lowe        ! double word compares.
176*5d9d9091SRichard Lowe
177*5d9d9091SRichard Lowe        alignaddr %i1, %g0, %g1
178*5d9d9091SRichard Lowe        ldd     [%g1], %d0
179*5d9d9091SRichard Lowe7:
180*5d9d9091SRichard Lowe        add     %g1, 8, %g1
181*5d9d9091SRichard Lowe        ldd     [%g1], %d2
182*5d9d9091SRichard Lowe        ldd     [%i0], %d6
183*5d9d9091SRichard Lowe        faligndata %d0, %d2, %d8
184*5d9d9091SRichard Lowe        fcmpne32 %d6, %d8, %i5
185*5d9d9091SRichard Lowe	fsrc1	%d6, %d6		! 2 fsrc1's added since i5 cannot
186*5d9d9091SRichard Lowe	fsrc1	%d8, %d8		! be used for 3 cycles else we
187*5d9d9091SRichard Lowe	fmovd	%d2, %d0		! create 9 bubbles in the pipeline
188*5d9d9091SRichard Lowe        brnz,a,pn  %i5, .remcmp
189*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
190*5d9d9091SRichard Lowe        subcc   %i3, 8, %i3
191*5d9d9091SRichard Lowe        add     %i0, 8, %i0
192*5d9d9091SRichard Lowe        add     %i1, 8, %i1
193*5d9d9091SRichard Lowe        bgu,pt	%ncc, 7b
194*5d9d9091SRichard Lowe        sub     %i2, 8, %i2
195*5d9d9091SRichard Lowe
196*5d9d9091SRichard Lowe.blalign:
197*5d9d9091SRichard Lowe
198*5d9d9091SRichard Lowe	! src1 is block aligned
199*5d9d9091SRichard Lowe        membar  #StoreLoad
200*5d9d9091SRichard Lowe        srl     %i1, 3, %l6             ! bits 3,4,5 are now least sig in  %l6
201*5d9d9091SRichard Lowe        andcc   %l6, 7, %l6             ! mask everything except bits 1,2 3
202*5d9d9091SRichard Lowe        andn    %i2, 63, %i3            ! calc number of blocks
203*5d9d9091SRichard Lowe        alignaddr %i1, %g0, %g0         ! gen %gsr
204*5d9d9091SRichard Lowe        andn    %i1, 0x3F, %l7          ! blk aligned address
205*5d9d9091SRichard Lowe        sub     %i2, %i3, %l2
206*5d9d9091SRichard Lowe        andn    %l2, 7, %i4             ! calc doubles left after blkcpy
207*5d9d9091SRichard Lowe
208*5d9d9091SRichard Lowe	be,a	%ncc, 1f	! branch taken if src2 is 64-byte aligned
209*5d9d9091SRichard Lowe	ldda	[%l7]ASI_BLK_P, %d0
210*5d9d9091SRichard Lowe
211*5d9d9091SRichard Lowe	call	.+8		! get the address of this instruction in %o7
212*5d9d9091SRichard Lowe	sll	%l6, 2, %l4
213*5d9d9091SRichard Lowe	add	%o7, %l4, %o7
214*5d9d9091SRichard Lowe	jmp	%o7 + 16	! jump to the starting ldd instruction
215*5d9d9091SRichard Lowe	nop
216*5d9d9091SRichard Lowe	ldd	[%l7+8], %d2
217*5d9d9091SRichard Lowe	ldd	[%l7+16], %d4
218*5d9d9091SRichard Lowe	ldd	[%l7+24], %d6
219*5d9d9091SRichard Lowe	ldd	[%l7+32], %d8
220*5d9d9091SRichard Lowe	ldd	[%l7+40], %d10
221*5d9d9091SRichard Lowe	ldd	[%l7+48], %d12
222*5d9d9091SRichard Lowe	ldd	[%l7+56], %d14
223*5d9d9091SRichard Lowe1:
224*5d9d9091SRichard Lowe        add     %l7, 64, %l7
225*5d9d9091SRichard Lowe        ldda    [%l7]ASI_BLK_P, %d16
226*5d9d9091SRichard Lowe        add     %l7, 64, %l7
227*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
228*5d9d9091SRichard Lowe	sub     %i3, 128, %i3
229*5d9d9091SRichard Lowe
230*5d9d9091SRichard Lowe        ! switch statement to get us to the right 8 byte blk within a
231*5d9d9091SRichard Lowe        ! 64 byte block
232*5d9d9091SRichard Lowe
233*5d9d9091SRichard Lowe        cmp      %l6, 4
234*5d9d9091SRichard Lowe        bgeu,a	hlf
235*5d9d9091SRichard Lowe        cmp      %l6, 6
236*5d9d9091SRichard Lowe        cmp      %l6, 2
237*5d9d9091SRichard Lowe        bgeu,a	sqtr
238*5d9d9091SRichard Lowe        nop
239*5d9d9091SRichard Lowe        cmp      %l6, 1
240*5d9d9091SRichard Lowe        be,a     seg1
241*5d9d9091SRichard Lowe        nop
242*5d9d9091SRichard Lowe        ba       seg0
243*5d9d9091SRichard Lowe        nop
244*5d9d9091SRichard Lowesqtr:
245*5d9d9091SRichard Lowe        be,a     seg2
246*5d9d9091SRichard Lowe        nop
247*5d9d9091SRichard Lowe
248*5d9d9091SRichard Lowe        ba,a     seg3
249*5d9d9091SRichard Lowe        nop
250*5d9d9091SRichard Lowe
251*5d9d9091SRichard Lowehlf:
252*5d9d9091SRichard Lowe        bgeu,a	fqtr
253*5d9d9091SRichard Lowe        nop
254*5d9d9091SRichard Lowe        cmp      %l6, 5
255*5d9d9091SRichard Lowe        be,a     seg5
256*5d9d9091SRichard Lowe        nop
257*5d9d9091SRichard Lowe        ba       seg4
258*5d9d9091SRichard Lowe        nop
259*5d9d9091SRichard Lowefqtr:
260*5d9d9091SRichard Lowe        be,a     seg6
261*5d9d9091SRichard Lowe        nop
262*5d9d9091SRichard Lowe        ba       seg7
263*5d9d9091SRichard Lowe        nop
264*5d9d9091SRichard Lowe
265*5d9d9091SRichard Lowe! The fsrc1 instructions are to make sure that the results of the fcmpne32
266*5d9d9091SRichard Lowe! are used 3 cycles later - else spitfire adds 9 bubbles.
267*5d9d9091SRichard Lowe
268*5d9d9091SRichard Lowe#define	FCMPNE32_D32_D48			\
269*5d9d9091SRichard Lowe	fcmpne32	%d48, %d32, %l0		;\
270*5d9d9091SRichard Lowe	fcmpne32	%d50, %d34, %l1		;\
271*5d9d9091SRichard Lowe	fcmpne32	%d52, %d36, %l2		;\
272*5d9d9091SRichard Lowe	fcmpne32	%d54, %d38, %l3		;\
273*5d9d9091SRichard Lowe	brnz,a		%l0, add		;\
274*5d9d9091SRichard Lowe	mov		0, %l4			;\
275*5d9d9091SRichard Lowe	fcmpne32	%d56, %d40, %l0		;\
276*5d9d9091SRichard Lowe	brnz,a		%l1, add		;\
277*5d9d9091SRichard Lowe	mov		8, %l4			;\
278*5d9d9091SRichard Lowe	fcmpne32	%d58, %d42, %l1		;\
279*5d9d9091SRichard Lowe	brnz,a		%l2, add		;\
280*5d9d9091SRichard Lowe	mov		16, %l4			;\
281*5d9d9091SRichard Lowe	fcmpne32	%d60, %d44, %l2		;\
282*5d9d9091SRichard Lowe	brnz,a		%l3, add		;\
283*5d9d9091SRichard Lowe	mov		24, %l4			;\
284*5d9d9091SRichard Lowe	fcmpne32	%d62, %d46, %l3		;\
285*5d9d9091SRichard Lowe	brnz,a		%l0, add		;\
286*5d9d9091SRichard Lowe	mov		32, %l4			;\
287*5d9d9091SRichard Lowe        fsrc1           %d48, %d48              ;\
288*5d9d9091SRichard Lowe	brnz,a		%l1, add		;\
289*5d9d9091SRichard Lowe	mov		40, %l4			;\
290*5d9d9091SRichard Lowe        fsrc1           %d48, %d48              ;\
291*5d9d9091SRichard Lowe	brnz,a		%l2, add		;\
292*5d9d9091SRichard Lowe	mov		48, %l4			;\
293*5d9d9091SRichard Lowe        fsrc1           %d48, %d48              ;\
294*5d9d9091SRichard Lowe	brnz,a		%l3, add		;\
295*5d9d9091SRichard Lowe	mov		56, %l4
296*5d9d9091SRichard Lowe
297*5d9d9091SRichard Loweadd:
298*5d9d9091SRichard Lowe	add	%l4, %i0, %i0
299*5d9d9091SRichard Lowe	add	%l4, %i1, %i1
300*5d9d9091SRichard Lowe	ba	.remcmp
301*5d9d9091SRichard Lowe	sub	%i1, %i0, %i1
302*5d9d9091SRichard Lowe
303*5d9d9091SRichard Lowe#define FALIGN_D0                       \
304*5d9d9091SRichard Lowe        faligndata %d0, %d2, %d48       ;\
305*5d9d9091SRichard Lowe        faligndata %d2, %d4, %d50       ;\
306*5d9d9091SRichard Lowe        faligndata %d4, %d6, %d52       ;\
307*5d9d9091SRichard Lowe        faligndata %d6, %d8, %d54       ;\
308*5d9d9091SRichard Lowe        faligndata %d8, %d10, %d56      ;\
309*5d9d9091SRichard Lowe        faligndata %d10, %d12, %d58     ;\
310*5d9d9091SRichard Lowe        faligndata %d12, %d14, %d60     ;\
311*5d9d9091SRichard Lowe        faligndata %d14, %d16, %d62
312*5d9d9091SRichard Lowe
313*5d9d9091SRichard Lowe#define FALIGN_D16                      \
314*5d9d9091SRichard Lowe        faligndata %d16, %d18, %d48     ;\
315*5d9d9091SRichard Lowe        faligndata %d18, %d20, %d50     ;\
316*5d9d9091SRichard Lowe        faligndata %d20, %d22, %d52     ;\
317*5d9d9091SRichard Lowe        faligndata %d22, %d24, %d54     ;\
318*5d9d9091SRichard Lowe        faligndata %d24, %d26, %d56     ;\
319*5d9d9091SRichard Lowe        faligndata %d26, %d28, %d58     ;\
320*5d9d9091SRichard Lowe        faligndata %d28, %d30, %d60     ;\
321*5d9d9091SRichard Lowe        faligndata %d30, %d0, %d62
322*5d9d9091SRichard Lowe
323*5d9d9091SRichard Loweseg0:
324*5d9d9091SRichard Lowe        FALIGN_D0
325*5d9d9091SRichard Lowe        ldda    [%l7]ASI_BLK_P, %d0
326*5d9d9091SRichard Lowe        add     %l7, 64, %l7
327*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
328*5d9d9091SRichard Lowe        add     %i0, 64, %i0
329*5d9d9091SRichard Lowe        add     %i1, 64, %i1
330*5d9d9091SRichard Lowe        subcc   %i3, 64, %i3
331*5d9d9091SRichard Lowe        bz,pn   %ncc, 1f
332*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
333*5d9d9091SRichard Lowe        ldda    [%i0]ASI_BLK_P, %d32
334*5d9d9091SRichard Lowe
335*5d9d9091SRichard Lowe        FALIGN_D16
336*5d9d9091SRichard Lowe        ldda    [%l7]ASI_BLK_P, %d16
337*5d9d9091SRichard Lowe        add     %l7, 64, %l7
338*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
339*5d9d9091SRichard Lowe        add     %i0, 64, %i0
340*5d9d9091SRichard Lowe        add     %i1, 64, %i1
341*5d9d9091SRichard Lowe        subcc   %i3, 64, %i3
342*5d9d9091SRichard Lowe        bz,pn   %ncc, 0f
343*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
344*5d9d9091SRichard Lowe
345*5d9d9091SRichard Lowe        ba	%ncc, seg0
346*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
347*5d9d9091SRichard Lowe
348*5d9d9091SRichard Lowe0:
349*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
350*5d9d9091SRichard Lowe	membar  #Sync
351*5d9d9091SRichard Lowe	FALIGN_D0
352*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
353*5d9d9091SRichard Lowe        add     %i0, 64, %i0
354*5d9d9091SRichard Lowe        add     %i1, 64, %i1
355*5d9d9091SRichard Lowe	ba	%ncc, blkd16
356*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
357*5d9d9091SRichard Lowe
358*5d9d9091SRichard Lowe1:
359*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
360*5d9d9091SRichard Lowe	membar  #Sync
361*5d9d9091SRichard Lowe	FALIGN_D16
362*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
363*5d9d9091SRichard Lowe        add     %i0, 64, %i0
364*5d9d9091SRichard Lowe        add     %i1, 64, %i1
365*5d9d9091SRichard Lowe	ba	%ncc, blkd0
366*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
367*5d9d9091SRichard Lowe
368*5d9d9091SRichard Lowe#define FALIGN_D2                       \
369*5d9d9091SRichard Lowe        faligndata %d2, %d4, %d48       ;\
370*5d9d9091SRichard Lowe        faligndata %d4, %d6, %d50       ;\
371*5d9d9091SRichard Lowe        faligndata %d6, %d8, %d52       ;\
372*5d9d9091SRichard Lowe        faligndata %d8, %d10, %d54      ;\
373*5d9d9091SRichard Lowe        faligndata %d10, %d12, %d56     ;\
374*5d9d9091SRichard Lowe        faligndata %d12, %d14, %d58     ;\
375*5d9d9091SRichard Lowe        faligndata %d14, %d16, %d60     ;\
376*5d9d9091SRichard Lowe        faligndata %d16, %d18, %d62
377*5d9d9091SRichard Lowe
378*5d9d9091SRichard Lowe#define FALIGN_D18                      \
379*5d9d9091SRichard Lowe        faligndata %d18, %d20, %d48     ;\
380*5d9d9091SRichard Lowe        faligndata %d20, %d22, %d50     ;\
381*5d9d9091SRichard Lowe        faligndata %d22, %d24, %d52     ;\
382*5d9d9091SRichard Lowe        faligndata %d24, %d26, %d54     ;\
383*5d9d9091SRichard Lowe        faligndata %d26, %d28, %d56     ;\
384*5d9d9091SRichard Lowe        faligndata %d28, %d30, %d58     ;\
385*5d9d9091SRichard Lowe        faligndata %d30, %d0, %d60      ;\
386*5d9d9091SRichard Lowe        faligndata %d0, %d2, %d62
387*5d9d9091SRichard Lowe
388*5d9d9091SRichard Lowe
389*5d9d9091SRichard Loweseg1:
390*5d9d9091SRichard Lowe        FALIGN_D2
391*5d9d9091SRichard Lowe        ldda    [%l7]ASI_BLK_P, %d0
392*5d9d9091SRichard Lowe        add     %l7, 64, %l7
393*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
394*5d9d9091SRichard Lowe        add     %i0, 64, %i0
395*5d9d9091SRichard Lowe        add     %i1, 64, %i1
396*5d9d9091SRichard Lowe        subcc   %i3, 64, %i3
397*5d9d9091SRichard Lowe        bz,pn   %ncc, 1f
398*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
399*5d9d9091SRichard Lowe        ldda    [%i0]ASI_BLK_P, %d32
400*5d9d9091SRichard Lowe
401*5d9d9091SRichard Lowe        FALIGN_D18
402*5d9d9091SRichard Lowe        ldda    [%l7]ASI_BLK_P, %d16
403*5d9d9091SRichard Lowe        add     %l7, 64, %l7
404*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
405*5d9d9091SRichard Lowe        add     %i0, 64, %i0
406*5d9d9091SRichard Lowe        add     %i1, 64, %i1
407*5d9d9091SRichard Lowe        subcc   %i3, 64, %i3
408*5d9d9091SRichard Lowe        bz,pn   %ncc, 0f
409*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
410*5d9d9091SRichard Lowe
411*5d9d9091SRichard Lowe        ba	%ncc, seg1
412*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
413*5d9d9091SRichard Lowe
414*5d9d9091SRichard Lowe0:
415*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
416*5d9d9091SRichard Lowe	membar  #Sync
417*5d9d9091SRichard Lowe	FALIGN_D2
418*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
419*5d9d9091SRichard Lowe        add     %i0, 64, %i0
420*5d9d9091SRichard Lowe        add     %i1, 64, %i1
421*5d9d9091SRichard Lowe	ba	%ncc, blkd18
422*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
423*5d9d9091SRichard Lowe
424*5d9d9091SRichard Lowe1:
425*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
426*5d9d9091SRichard Lowe	membar  #Sync
427*5d9d9091SRichard Lowe	FALIGN_D18
428*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
429*5d9d9091SRichard Lowe        add     %i0, 64, %i0
430*5d9d9091SRichard Lowe        add     %i1, 64, %i1
431*5d9d9091SRichard Lowe	ba	%ncc, blkd2
432*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
433*5d9d9091SRichard Lowe
434*5d9d9091SRichard Lowe#define FALIGN_D4                       \
435*5d9d9091SRichard Lowe        faligndata %d4, %d6, %d48       ;\
436*5d9d9091SRichard Lowe        faligndata %d6, %d8, %d50       ;\
437*5d9d9091SRichard Lowe        faligndata %d8, %d10, %d52      ;\
438*5d9d9091SRichard Lowe        faligndata %d10, %d12, %d54     ;\
439*5d9d9091SRichard Lowe        faligndata %d12, %d14, %d56     ;\
440*5d9d9091SRichard Lowe        faligndata %d14, %d16, %d58     ;\
441*5d9d9091SRichard Lowe        faligndata %d16, %d18, %d60     ;\
442*5d9d9091SRichard Lowe        faligndata %d18, %d20, %d62
443*5d9d9091SRichard Lowe
444*5d9d9091SRichard Lowe#define FALIGN_D20                      \
445*5d9d9091SRichard Lowe        faligndata %d20, %d22, %d48     ;\
446*5d9d9091SRichard Lowe        faligndata %d22, %d24, %d50     ;\
447*5d9d9091SRichard Lowe        faligndata %d24, %d26, %d52     ;\
448*5d9d9091SRichard Lowe        faligndata %d26, %d28, %d54     ;\
449*5d9d9091SRichard Lowe        faligndata %d28, %d30, %d56     ;\
450*5d9d9091SRichard Lowe        faligndata %d30, %d0, %d58      ;\
451*5d9d9091SRichard Lowe        faligndata %d0, %d2, %d60       ;\
452*5d9d9091SRichard Lowe        faligndata %d2, %d4, %d62
453*5d9d9091SRichard Lowe
454*5d9d9091SRichard Loweseg2:
455*5d9d9091SRichard Lowe	FALIGN_D4
456*5d9d9091SRichard Lowe        ldda    [%l7]ASI_BLK_P, %d0
457*5d9d9091SRichard Lowe        add     %l7, 64, %l7
458*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
459*5d9d9091SRichard Lowe        add     %i0, 64, %i0
460*5d9d9091SRichard Lowe        add     %i1, 64, %i1
461*5d9d9091SRichard Lowe        subcc   %i3, 64, %i3
462*5d9d9091SRichard Lowe        bz,pn   %ncc, 1f
463*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
464*5d9d9091SRichard Lowe        ldda    [%i0]ASI_BLK_P, %d32
465*5d9d9091SRichard Lowe
466*5d9d9091SRichard Lowe        FALIGN_D20
467*5d9d9091SRichard Lowe        ldda    [%l7]ASI_BLK_P, %d16
468*5d9d9091SRichard Lowe        add     %l7, 64, %l7
469*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
470*5d9d9091SRichard Lowe        add     %i0, 64, %i0
471*5d9d9091SRichard Lowe        add     %i1, 64, %i1
472*5d9d9091SRichard Lowe        subcc   %i3, 64, %i3
473*5d9d9091SRichard Lowe        bz,pn   %ncc, 0f
474*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
475*5d9d9091SRichard Lowe
476*5d9d9091SRichard Lowe        ba	%ncc, seg2
477*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
478*5d9d9091SRichard Lowe
479*5d9d9091SRichard Lowe0:
480*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
481*5d9d9091SRichard Lowe	membar  #Sync
482*5d9d9091SRichard Lowe	FALIGN_D4
483*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
484*5d9d9091SRichard Lowe        add     %i0, 64, %i0
485*5d9d9091SRichard Lowe        add     %i1, 64, %i1
486*5d9d9091SRichard Lowe	ba	%ncc, blkd20
487*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
488*5d9d9091SRichard Lowe
489*5d9d9091SRichard Lowe1:
490*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
491*5d9d9091SRichard Lowe	membar  #Sync
492*5d9d9091SRichard Lowe	FALIGN_D20
493*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
494*5d9d9091SRichard Lowe        add     %i0, 64, %i0
495*5d9d9091SRichard Lowe        add     %i1, 64, %i1
496*5d9d9091SRichard Lowe	ba	%ncc, blkd4
497*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
498*5d9d9091SRichard Lowe
499*5d9d9091SRichard Lowe#define FALIGN_D6                       \
500*5d9d9091SRichard Lowe        faligndata %d6, %d8, %d48       ;\
501*5d9d9091SRichard Lowe        faligndata %d8, %d10, %d50      ;\
502*5d9d9091SRichard Lowe        faligndata %d10, %d12, %d52     ;\
503*5d9d9091SRichard Lowe        faligndata %d12, %d14, %d54     ;\
504*5d9d9091SRichard Lowe        faligndata %d14, %d16, %d56     ;\
505*5d9d9091SRichard Lowe        faligndata %d16, %d18, %d58     ;\
506*5d9d9091SRichard Lowe        faligndata %d18, %d20, %d60     ;\
507*5d9d9091SRichard Lowe        faligndata %d20, %d22, %d62
508*5d9d9091SRichard Lowe
509*5d9d9091SRichard Lowe#define FALIGN_D22                      \
510*5d9d9091SRichard Lowe        faligndata %d22, %d24, %d48     ;\
511*5d9d9091SRichard Lowe        faligndata %d24, %d26, %d50     ;\
512*5d9d9091SRichard Lowe        faligndata %d26, %d28, %d52     ;\
513*5d9d9091SRichard Lowe        faligndata %d28, %d30, %d54     ;\
514*5d9d9091SRichard Lowe        faligndata %d30, %d0, %d56      ;\
515*5d9d9091SRichard Lowe        faligndata %d0, %d2, %d58       ;\
516*5d9d9091SRichard Lowe        faligndata %d2, %d4, %d60       ;\
517*5d9d9091SRichard Lowe        faligndata %d4, %d6, %d62
518*5d9d9091SRichard Lowe
519*5d9d9091SRichard Lowe
520*5d9d9091SRichard Loweseg3:
521*5d9d9091SRichard Lowe        FALIGN_D6
522*5d9d9091SRichard Lowe        ldda    [%l7]ASI_BLK_P, %d0
523*5d9d9091SRichard Lowe        add     %l7, 64, %l7
524*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
525*5d9d9091SRichard Lowe        add     %i0, 64, %i0
526*5d9d9091SRichard Lowe        add     %i1, 64, %i1
527*5d9d9091SRichard Lowe        subcc   %i3, 64, %i3
528*5d9d9091SRichard Lowe        bz,pn   %ncc, 1f
529*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
530*5d9d9091SRichard Lowe        ldda    [%i0]ASI_BLK_P, %d32
531*5d9d9091SRichard Lowe
532*5d9d9091SRichard Lowe        FALIGN_D22
533*5d9d9091SRichard Lowe        ldda    [%l7]ASI_BLK_P, %d16
534*5d9d9091SRichard Lowe        add     %l7, 64, %l7
535*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
536*5d9d9091SRichard Lowe        add     %i0, 64, %i0
537*5d9d9091SRichard Lowe        add     %i1, 64, %i1
538*5d9d9091SRichard Lowe        subcc   %i3, 64, %i3
539*5d9d9091SRichard Lowe        bz,pn   %ncc, 0f
540*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
541*5d9d9091SRichard Lowe
542*5d9d9091SRichard Lowe        ba	%ncc, seg3
543*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
544*5d9d9091SRichard Lowe
545*5d9d9091SRichard Lowe
546*5d9d9091SRichard Lowe0:
547*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
548*5d9d9091SRichard Lowe	membar  #Sync
549*5d9d9091SRichard Lowe	FALIGN_D6
550*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
551*5d9d9091SRichard Lowe        add     %i0, 64, %i0
552*5d9d9091SRichard Lowe        add     %i1, 64, %i1
553*5d9d9091SRichard Lowe	ba	%ncc, blkd22
554*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
555*5d9d9091SRichard Lowe
556*5d9d9091SRichard Lowe1:
557*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
558*5d9d9091SRichard Lowe	membar  #Sync
559*5d9d9091SRichard Lowe	FALIGN_D22
560*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
561*5d9d9091SRichard Lowe        add     %i0, 64, %i0
562*5d9d9091SRichard Lowe        add     %i1, 64, %i1
563*5d9d9091SRichard Lowe	ba	%ncc, blkd6
564*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
565*5d9d9091SRichard Lowe
566*5d9d9091SRichard Lowe#define FALIGN_D8                       \
567*5d9d9091SRichard Lowe        faligndata %d8, %d10, %d48      ;\
568*5d9d9091SRichard Lowe        faligndata %d10, %d12, %d50     ;\
569*5d9d9091SRichard Lowe        faligndata %d12, %d14, %d52     ;\
570*5d9d9091SRichard Lowe        faligndata %d14, %d16, %d54     ;\
571*5d9d9091SRichard Lowe        faligndata %d16, %d18, %d56     ;\
572*5d9d9091SRichard Lowe        faligndata %d18, %d20, %d58     ;\
573*5d9d9091SRichard Lowe        faligndata %d20, %d22, %d60     ;\
574*5d9d9091SRichard Lowe        faligndata %d22, %d24, %d62
575*5d9d9091SRichard Lowe
576*5d9d9091SRichard Lowe#define FALIGN_D24                      \
577*5d9d9091SRichard Lowe        faligndata %d24, %d26, %d48     ;\
578*5d9d9091SRichard Lowe        faligndata %d26, %d28, %d50     ;\
579*5d9d9091SRichard Lowe        faligndata %d28, %d30, %d52     ;\
580*5d9d9091SRichard Lowe        faligndata %d30, %d0, %d54      ;\
581*5d9d9091SRichard Lowe        faligndata %d0, %d2, %d56       ;\
582*5d9d9091SRichard Lowe        faligndata %d2, %d4, %d58       ;\
583*5d9d9091SRichard Lowe        faligndata %d4, %d6, %d60       ;\
584*5d9d9091SRichard Lowe        faligndata %d6, %d8, %d62
585*5d9d9091SRichard Lowe
586*5d9d9091SRichard Lowe
587*5d9d9091SRichard Loweseg4:
588*5d9d9091SRichard Lowe        FALIGN_D8
589*5d9d9091SRichard Lowe        ldda    [%l7]ASI_BLK_P, %d0
590*5d9d9091SRichard Lowe        add     %l7, 64, %l7
591*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
592*5d9d9091SRichard Lowe        add     %i0, 64, %i0
593*5d9d9091SRichard Lowe        add     %i1, 64, %i1
594*5d9d9091SRichard Lowe        subcc   %i3, 64, %i3
595*5d9d9091SRichard Lowe        bz,pn   %ncc, 1f
596*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
597*5d9d9091SRichard Lowe        ldda    [%i0]ASI_BLK_P, %d32
598*5d9d9091SRichard Lowe
599*5d9d9091SRichard Lowe        FALIGN_D24
600*5d9d9091SRichard Lowe        ldda    [%l7]ASI_BLK_P, %d16
601*5d9d9091SRichard Lowe        add     %l7, 64, %l7
602*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
603*5d9d9091SRichard Lowe        add     %i0, 64, %i0
604*5d9d9091SRichard Lowe        add     %i1, 64, %i1
605*5d9d9091SRichard Lowe        subcc   %i3, 64, %i3
606*5d9d9091SRichard Lowe        bz,pn   %ncc, 0f
607*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
608*5d9d9091SRichard Lowe
609*5d9d9091SRichard Lowe        ba	%ncc, seg4
610*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
611*5d9d9091SRichard Lowe
612*5d9d9091SRichard Lowe
613*5d9d9091SRichard Lowe0:
614*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
615*5d9d9091SRichard Lowe	membar  #Sync
616*5d9d9091SRichard Lowe	FALIGN_D8
617*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
618*5d9d9091SRichard Lowe        add     %i0, 64, %i0
619*5d9d9091SRichard Lowe        add     %i1, 64, %i1
620*5d9d9091SRichard Lowe	ba	%ncc, blkd24
621*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
622*5d9d9091SRichard Lowe
623*5d9d9091SRichard Lowe1:
624*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
625*5d9d9091SRichard Lowe	membar  #Sync
626*5d9d9091SRichard Lowe	FALIGN_D24
627*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
628*5d9d9091SRichard Lowe        add     %i0, 64, %i0
629*5d9d9091SRichard Lowe        add     %i1, 64, %i1
630*5d9d9091SRichard Lowe	ba	%ncc, blkd8
631*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
632*5d9d9091SRichard Lowe
633*5d9d9091SRichard Lowe#define FALIGN_D10                      \
634*5d9d9091SRichard Lowe        faligndata %d10, %d12, %d48     ;\
635*5d9d9091SRichard Lowe        faligndata %d12, %d14, %d50     ;\
636*5d9d9091SRichard Lowe        faligndata %d14, %d16, %d52     ;\
637*5d9d9091SRichard Lowe        faligndata %d16, %d18, %d54     ;\
638*5d9d9091SRichard Lowe        faligndata %d18, %d20, %d56     ;\
639*5d9d9091SRichard Lowe        faligndata %d20, %d22, %d58     ;\
640*5d9d9091SRichard Lowe        faligndata %d22, %d24, %d60     ;\
641*5d9d9091SRichard Lowe        faligndata %d24, %d26, %d62
642*5d9d9091SRichard Lowe
643*5d9d9091SRichard Lowe#define FALIGN_D26                      \
644*5d9d9091SRichard Lowe        faligndata %d26, %d28, %d48     ;\
645*5d9d9091SRichard Lowe        faligndata %d28, %d30, %d50     ;\
646*5d9d9091SRichard Lowe        faligndata %d30, %d0, %d52      ;\
647*5d9d9091SRichard Lowe        faligndata %d0, %d2, %d54       ;\
648*5d9d9091SRichard Lowe        faligndata %d2, %d4, %d56       ;\
649*5d9d9091SRichard Lowe        faligndata %d4, %d6, %d58       ;\
650*5d9d9091SRichard Lowe        faligndata %d6, %d8, %d60       ;\
651*5d9d9091SRichard Lowe        faligndata %d8, %d10, %d62
652*5d9d9091SRichard Lowe
653*5d9d9091SRichard Lowe
654*5d9d9091SRichard Loweseg5:
655*5d9d9091SRichard Lowe        FALIGN_D10
656*5d9d9091SRichard Lowe        ldda    [%l7]ASI_BLK_P, %d0
657*5d9d9091SRichard Lowe        add     %l7, 64, %l7
658*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
659*5d9d9091SRichard Lowe        add     %i0, 64, %i0
660*5d9d9091SRichard Lowe        add     %i1, 64, %i1
661*5d9d9091SRichard Lowe        subcc   %i3, 64, %i3
662*5d9d9091SRichard Lowe        bz,pn   %ncc, 1f
663*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
664*5d9d9091SRichard Lowe        ldda    [%i0]ASI_BLK_P, %d32
665*5d9d9091SRichard Lowe
666*5d9d9091SRichard Lowe        FALIGN_D26
667*5d9d9091SRichard Lowe        ldda    [%l7]ASI_BLK_P, %d16
668*5d9d9091SRichard Lowe        add     %l7, 64, %l7
669*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
670*5d9d9091SRichard Lowe        add     %i0, 64, %i0
671*5d9d9091SRichard Lowe        add     %i1, 64, %i1
672*5d9d9091SRichard Lowe        subcc   %i3, 64, %i3
673*5d9d9091SRichard Lowe        bz,pn   %ncc, 0f
674*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
675*5d9d9091SRichard Lowe
676*5d9d9091SRichard Lowe        ba	%ncc, seg5
677*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
678*5d9d9091SRichard Lowe
679*5d9d9091SRichard Lowe
680*5d9d9091SRichard Lowe0:
681*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
682*5d9d9091SRichard Lowe	membar  #Sync
683*5d9d9091SRichard Lowe	FALIGN_D10
684*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
685*5d9d9091SRichard Lowe        add     %i0, 64, %i0
686*5d9d9091SRichard Lowe        add     %i1, 64, %i1
687*5d9d9091SRichard Lowe	ba	%ncc, blkd26
688*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
689*5d9d9091SRichard Lowe
690*5d9d9091SRichard Lowe1:
691*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
692*5d9d9091SRichard Lowe	membar  #Sync
693*5d9d9091SRichard Lowe	FALIGN_D26
694*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
695*5d9d9091SRichard Lowe        add     %i0, 64, %i0
696*5d9d9091SRichard Lowe        add     %i1, 64, %i1
697*5d9d9091SRichard Lowe	ba	%ncc, blkd10
698*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
699*5d9d9091SRichard Lowe
700*5d9d9091SRichard Lowe#define FALIGN_D12                      \
701*5d9d9091SRichard Lowe        faligndata %d12, %d14, %d48     ;\
702*5d9d9091SRichard Lowe        faligndata %d14, %d16, %d50     ;\
703*5d9d9091SRichard Lowe        faligndata %d16, %d18, %d52     ;\
704*5d9d9091SRichard Lowe        faligndata %d18, %d20, %d54     ;\
705*5d9d9091SRichard Lowe        faligndata %d20, %d22, %d56     ;\
706*5d9d9091SRichard Lowe        faligndata %d22, %d24, %d58     ;\
707*5d9d9091SRichard Lowe        faligndata %d24, %d26, %d60     ;\
708*5d9d9091SRichard Lowe        faligndata %d26, %d28, %d62
709*5d9d9091SRichard Lowe
710*5d9d9091SRichard Lowe#define FALIGN_D28                      \
711*5d9d9091SRichard Lowe        faligndata %d28, %d30, %d48     ;\
712*5d9d9091SRichard Lowe        faligndata %d30, %d0, %d50      ;\
713*5d9d9091SRichard Lowe        faligndata %d0, %d2, %d52       ;\
714*5d9d9091SRichard Lowe        faligndata %d2, %d4, %d54       ;\
715*5d9d9091SRichard Lowe        faligndata %d4, %d6, %d56       ;\
716*5d9d9091SRichard Lowe        faligndata %d6, %d8, %d58       ;\
717*5d9d9091SRichard Lowe        faligndata %d8, %d10, %d60      ;\
718*5d9d9091SRichard Lowe        faligndata %d10, %d12, %d62
719*5d9d9091SRichard Lowe
720*5d9d9091SRichard Lowe
721*5d9d9091SRichard Loweseg6:
722*5d9d9091SRichard Lowe        FALIGN_D12
723*5d9d9091SRichard Lowe        ldda    [%l7]ASI_BLK_P, %d0
724*5d9d9091SRichard Lowe        add     %l7, 64, %l7
725*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
726*5d9d9091SRichard Lowe        add     %i0, 64, %i0
727*5d9d9091SRichard Lowe        add     %i1, 64, %i1
728*5d9d9091SRichard Lowe        subcc   %i3, 64, %i3
729*5d9d9091SRichard Lowe        bz,pn   %ncc, 1f
730*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
731*5d9d9091SRichard Lowe        ldda    [%i0]ASI_BLK_P, %d32
732*5d9d9091SRichard Lowe
733*5d9d9091SRichard Lowe        FALIGN_D28
734*5d9d9091SRichard Lowe        ldda    [%l7]ASI_BLK_P, %d16
735*5d9d9091SRichard Lowe        add     %l7, 64, %l7
736*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
737*5d9d9091SRichard Lowe        add     %i0, 64, %i0
738*5d9d9091SRichard Lowe        add     %i1, 64, %i1
739*5d9d9091SRichard Lowe        subcc   %i3, 64, %i3
740*5d9d9091SRichard Lowe        bz,pn   %ncc, 0f
741*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
742*5d9d9091SRichard Lowe
743*5d9d9091SRichard Lowe        ba	%ncc, seg6
744*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
745*5d9d9091SRichard Lowe
746*5d9d9091SRichard Lowe
747*5d9d9091SRichard Lowe0:
748*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
749*5d9d9091SRichard Lowe	membar  #Sync
750*5d9d9091SRichard Lowe	FALIGN_D12
751*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
752*5d9d9091SRichard Lowe        add     %i0, 64, %i0
753*5d9d9091SRichard Lowe        add     %i1, 64, %i1
754*5d9d9091SRichard Lowe	ba	%ncc, blkd28
755*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
756*5d9d9091SRichard Lowe
757*5d9d9091SRichard Lowe1:
758*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
759*5d9d9091SRichard Lowe	membar  #Sync
760*5d9d9091SRichard Lowe	FALIGN_D28
761*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
762*5d9d9091SRichard Lowe        add     %i0, 64, %i0
763*5d9d9091SRichard Lowe        add     %i1, 64, %i1
764*5d9d9091SRichard Lowe	ba	%ncc, blkd12
765*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
766*5d9d9091SRichard Lowe
767*5d9d9091SRichard Lowe#define FALIGN_D14                      \
768*5d9d9091SRichard Lowe        faligndata %d14, %d16, %d48     ;\
769*5d9d9091SRichard Lowe        faligndata %d16, %d18, %d50     ;\
770*5d9d9091SRichard Lowe        faligndata %d18, %d20, %d52     ;\
771*5d9d9091SRichard Lowe        faligndata %d20, %d22, %d54     ;\
772*5d9d9091SRichard Lowe        faligndata %d22, %d24, %d56     ;\
773*5d9d9091SRichard Lowe        faligndata %d24, %d26, %d58     ;\
774*5d9d9091SRichard Lowe        faligndata %d26, %d28, %d60     ;\
775*5d9d9091SRichard Lowe        faligndata %d28, %d30, %d62
776*5d9d9091SRichard Lowe
777*5d9d9091SRichard Lowe#define FALIGN_D30                      \
778*5d9d9091SRichard Lowe        faligndata %d30, %d0, %d48     ;\
779*5d9d9091SRichard Lowe        faligndata %d0, %d2, %d50      ;\
780*5d9d9091SRichard Lowe        faligndata %d2, %d4, %d52      ;\
781*5d9d9091SRichard Lowe        faligndata %d4, %d6, %d54      ;\
782*5d9d9091SRichard Lowe        faligndata %d6, %d8, %d56      ;\
783*5d9d9091SRichard Lowe        faligndata %d8, %d10, %d58     ;\
784*5d9d9091SRichard Lowe        faligndata %d10, %d12, %d60    ;\
785*5d9d9091SRichard Lowe        faligndata %d12, %d14, %d62
786*5d9d9091SRichard Lowe
787*5d9d9091SRichard Loweseg7:
788*5d9d9091SRichard Lowe        FALIGN_D14
789*5d9d9091SRichard Lowe        ldda    [%l7]ASI_BLK_P, %d0
790*5d9d9091SRichard Lowe        add     %l7, 64, %l7
791*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
792*5d9d9091SRichard Lowe        add     %i0, 64, %i0
793*5d9d9091SRichard Lowe        add     %i1, 64, %i1
794*5d9d9091SRichard Lowe        subcc   %i3, 64, %i3
795*5d9d9091SRichard Lowe        bz,pn   %ncc, 1f
796*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
797*5d9d9091SRichard Lowe        ldda    [%i0]ASI_BLK_P, %d32
798*5d9d9091SRichard Lowe
799*5d9d9091SRichard Lowe        FALIGN_D30
800*5d9d9091SRichard Lowe        ldda    [%l7]ASI_BLK_P, %d16
801*5d9d9091SRichard Lowe        add     %l7, 64, %l7
802*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
803*5d9d9091SRichard Lowe        add     %i0, 64, %i0
804*5d9d9091SRichard Lowe        add     %i1, 64, %i1
805*5d9d9091SRichard Lowe        subcc   %i3, 64, %i3
806*5d9d9091SRichard Lowe        bz,pn   %ncc, 0f
807*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
808*5d9d9091SRichard Lowe
809*5d9d9091SRichard Lowe        ba	%ncc, seg7
810*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
811*5d9d9091SRichard Lowe
812*5d9d9091SRichard Lowe0:
813*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
814*5d9d9091SRichard Lowe	membar  #Sync
815*5d9d9091SRichard Lowe	FALIGN_D14
816*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
817*5d9d9091SRichard Lowe        add     %i0, 64, %i0
818*5d9d9091SRichard Lowe        add     %i1, 64, %i1
819*5d9d9091SRichard Lowe	ba	%ncc, blkd30
820*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
821*5d9d9091SRichard Lowe
822*5d9d9091SRichard Lowe1:
823*5d9d9091SRichard Lowe	ldda	[%i0]ASI_BLK_P, %d32
824*5d9d9091SRichard Lowe	membar  #Sync
825*5d9d9091SRichard Lowe	FALIGN_D30
826*5d9d9091SRichard Lowe	FCMPNE32_D32_D48
827*5d9d9091SRichard Lowe        add     %i0, 64, %i0
828*5d9d9091SRichard Lowe        add     %i1, 64, %i1
829*5d9d9091SRichard Lowe	ba	%ncc, blkd14
830*5d9d9091SRichard Lowe        sub     %i2, 64, %i2
831*5d9d9091SRichard Lowe
832*5d9d9091SRichard Lowe
833*5d9d9091SRichard Loweblkd0:
834*5d9d9091SRichard Lowe        subcc   %i4, 8, %i4
835*5d9d9091SRichard Lowe        blu,a,pn %ncc, .remcmp
836*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
837*5d9d9091SRichard Lowe        faligndata %d0, %d2, %d48
838*5d9d9091SRichard Lowe	ldd	[%i0], %d32
839*5d9d9091SRichard Lowe	fcmpne32 %d32, %d48, %l1
840*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
841*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
842*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
843*5d9d9091SRichard Lowe	brnz,a	 %l1, .remcmp
844*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
845*5d9d9091SRichard Lowe        add     %i0, 8, %i0
846*5d9d9091SRichard Lowe        add     %i1, 8, %i1
847*5d9d9091SRichard Lowe	sub	%i2, 8, %i2
848*5d9d9091SRichard Lowe
849*5d9d9091SRichard Loweblkd2:
850*5d9d9091SRichard Lowe        subcc   %i4, 8, %i4
851*5d9d9091SRichard Lowe        blu,a,pn %ncc, .remcmp
852*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
853*5d9d9091SRichard Lowe        faligndata %d2, %d4, %d48
854*5d9d9091SRichard Lowe	ldd	[%i0], %d32
855*5d9d9091SRichard Lowe	fcmpne32 %d32, %d48, %l1
856*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
857*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
858*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
859*5d9d9091SRichard Lowe	brnz,a	 %l1, .remcmp
860*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
861*5d9d9091SRichard Lowe        add     %i0, 8, %i0
862*5d9d9091SRichard Lowe        add     %i1, 8, %i1
863*5d9d9091SRichard Lowe	sub	%i2, 8, %i2
864*5d9d9091SRichard Lowe
865*5d9d9091SRichard Loweblkd4:
866*5d9d9091SRichard Lowe        subcc   %i4, 8, %i4
867*5d9d9091SRichard Lowe        blu,a,pn %ncc, .remcmp
868*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
869*5d9d9091SRichard Lowe        faligndata %d4, %d6, %d48
870*5d9d9091SRichard Lowe	ldd	[%i0], %d32
871*5d9d9091SRichard Lowe	fcmpne32 %d32, %d48, %l1
872*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
873*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
874*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
875*5d9d9091SRichard Lowe	brnz,a	 %l1, .remcmp
876*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
877*5d9d9091SRichard Lowe        add     %i0, 8, %i0
878*5d9d9091SRichard Lowe        add     %i1, 8, %i1
879*5d9d9091SRichard Lowe	sub	%i2, 8, %i2
880*5d9d9091SRichard Lowe
881*5d9d9091SRichard Loweblkd6:
882*5d9d9091SRichard Lowe        subcc   %i4, 8, %i4
883*5d9d9091SRichard Lowe        blu,a,pn %ncc, .remcmp
884*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
885*5d9d9091SRichard Lowe        faligndata %d6, %d8, %d48
886*5d9d9091SRichard Lowe	ldd	[%i0], %d32
887*5d9d9091SRichard Lowe	fcmpne32 %d32, %d48, %l1
888*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
889*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
890*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
891*5d9d9091SRichard Lowe	brnz,a	 %l1, .remcmp
892*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
893*5d9d9091SRichard Lowe        add     %i0, 8, %i0
894*5d9d9091SRichard Lowe        add     %i1, 8, %i1
895*5d9d9091SRichard Lowe	sub	%i2, 8, %i2
896*5d9d9091SRichard Lowe
897*5d9d9091SRichard Loweblkd8:
898*5d9d9091SRichard Lowe        subcc   %i4, 8, %i4
899*5d9d9091SRichard Lowe        blu,a,pn %ncc, .remcmp
900*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
901*5d9d9091SRichard Lowe        faligndata %d8, %d10, %d48
902*5d9d9091SRichard Lowe	ldd	[%i0], %d32
903*5d9d9091SRichard Lowe	fcmpne32 %d32, %d48, %l1
904*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
905*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
906*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
907*5d9d9091SRichard Lowe	brnz,a	 %l1, .remcmp
908*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
909*5d9d9091SRichard Lowe        add     %i0, 8, %i0
910*5d9d9091SRichard Lowe        add     %i1, 8, %i1
911*5d9d9091SRichard Lowe	sub	%i2, 8, %i2
912*5d9d9091SRichard Lowe
913*5d9d9091SRichard Loweblkd10:
914*5d9d9091SRichard Lowe        subcc   %i4, 8, %i4
915*5d9d9091SRichard Lowe        blu,a,pn %ncc, .remcmp
916*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
917*5d9d9091SRichard Lowe        faligndata %d10, %d12, %d48
918*5d9d9091SRichard Lowe	ldd	[%i0], %d32
919*5d9d9091SRichard Lowe	fcmpne32 %d32, %d48, %l1
920*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
921*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
922*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
923*5d9d9091SRichard Lowe	brnz,a	 %l1, .remcmp
924*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
925*5d9d9091SRichard Lowe        add     %i0, 8, %i0
926*5d9d9091SRichard Lowe        add     %i1, 8, %i1
927*5d9d9091SRichard Lowe	sub	%i2, 8, %i2
928*5d9d9091SRichard Lowe
929*5d9d9091SRichard Loweblkd12:
930*5d9d9091SRichard Lowe        subcc   %i4, 8, %i4
931*5d9d9091SRichard Lowe        blu,a,pn %ncc, .remcmp
932*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
933*5d9d9091SRichard Lowe        faligndata %d12, %d14, %d48
934*5d9d9091SRichard Lowe	ldd	[%i0], %d32
935*5d9d9091SRichard Lowe	fcmpne32 %d32, %d48, %l1
936*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
937*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
938*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
939*5d9d9091SRichard Lowe	brnz,a	 %l1, .remcmp
940*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
941*5d9d9091SRichard Lowe        add     %i0, 8, %i0
942*5d9d9091SRichard Lowe        add     %i1, 8, %i1
943*5d9d9091SRichard Lowe	sub	%i2, 8, %i2
944*5d9d9091SRichard Lowe
945*5d9d9091SRichard Loweblkd14:
946*5d9d9091SRichard Lowe        subcc   %i4, 8, %i4
947*5d9d9091SRichard Lowe        blu,a,pn %ncc, .remcmp
948*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
949*5d9d9091SRichard Lowe        ba,pt 	%ncc, blkleft
950*5d9d9091SRichard Lowe	fmovd   %d14, %d0
951*5d9d9091SRichard Lowe
952*5d9d9091SRichard Loweblkd16:
953*5d9d9091SRichard Lowe        subcc   %i4, 8, %i4
954*5d9d9091SRichard Lowe        blu,a,pn %ncc, .remcmp
955*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
956*5d9d9091SRichard Lowe        faligndata %d16, %d18, %d48
957*5d9d9091SRichard Lowe	ldd	[%i0], %d32
958*5d9d9091SRichard Lowe	fcmpne32 %d32, %d48, %l1
959*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
960*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
961*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
962*5d9d9091SRichard Lowe	brnz,a	 %l1, .remcmp
963*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
964*5d9d9091SRichard Lowe        add     %i0, 8, %i0
965*5d9d9091SRichard Lowe        add     %i1, 8, %i1
966*5d9d9091SRichard Lowe	sub	%i2, 8, %i2
967*5d9d9091SRichard Lowe
968*5d9d9091SRichard Loweblkd18:
969*5d9d9091SRichard Lowe        subcc   %i4, 8, %i4
970*5d9d9091SRichard Lowe        blu,a,pn %ncc, .remcmp
971*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
972*5d9d9091SRichard Lowe        faligndata %d18, %d20, %d48
973*5d9d9091SRichard Lowe	ldd	[%i0], %d32
974*5d9d9091SRichard Lowe	fcmpne32 %d32, %d48, %l1
975*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
976*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
977*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
978*5d9d9091SRichard Lowe	brnz,a	 %l1, .remcmp
979*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
980*5d9d9091SRichard Lowe        add     %i0, 8, %i0
981*5d9d9091SRichard Lowe        add     %i1, 8, %i1
982*5d9d9091SRichard Lowe	sub	%i2, 8, %i2
983*5d9d9091SRichard Lowe
984*5d9d9091SRichard Loweblkd20:
985*5d9d9091SRichard Lowe        subcc   %i4, 8, %i4
986*5d9d9091SRichard Lowe        blu,a,pn %ncc, .remcmp
987*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
988*5d9d9091SRichard Lowe        faligndata %d20, %d22, %d48
989*5d9d9091SRichard Lowe	ldd	[%i0], %d32
990*5d9d9091SRichard Lowe	fcmpne32 %d32, %d48, %l1
991*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
992*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
993*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
994*5d9d9091SRichard Lowe	brnz,a	 %l1, .remcmp
995*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
996*5d9d9091SRichard Lowe        add     %i0, 8, %i0
997*5d9d9091SRichard Lowe        add     %i1, 8, %i1
998*5d9d9091SRichard Lowe	sub	%i2, 8, %i2
999*5d9d9091SRichard Lowe
1000*5d9d9091SRichard Loweblkd22:
1001*5d9d9091SRichard Lowe        subcc   %i4, 8, %i4
1002*5d9d9091SRichard Lowe        blu,a,pn %ncc, .remcmp
1003*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
1004*5d9d9091SRichard Lowe        faligndata %d22, %d24, %d48
1005*5d9d9091SRichard Lowe	ldd	[%i0], %d32
1006*5d9d9091SRichard Lowe	fcmpne32 %d32, %d48, %l1
1007*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
1008*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
1009*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
1010*5d9d9091SRichard Lowe	brnz,a	 %l1, .remcmp
1011*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
1012*5d9d9091SRichard Lowe        add     %i0, 8, %i0
1013*5d9d9091SRichard Lowe        add     %i1, 8, %i1
1014*5d9d9091SRichard Lowe	sub	%i2, 8, %i2
1015*5d9d9091SRichard Lowe
1016*5d9d9091SRichard Loweblkd24:
1017*5d9d9091SRichard Lowe        subcc   %i4, 8, %i4
1018*5d9d9091SRichard Lowe        blu,a,pn %ncc, .remcmp
1019*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
1020*5d9d9091SRichard Lowe        faligndata %d24, %d26, %d48
1021*5d9d9091SRichard Lowe	ldd	[%i0], %d32
1022*5d9d9091SRichard Lowe	fcmpne32 %d32, %d48, %l1
1023*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
1024*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
1025*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
1026*5d9d9091SRichard Lowe	brnz,a	 %l1, .remcmp
1027*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
1028*5d9d9091SRichard Lowe        add     %i0, 8, %i0
1029*5d9d9091SRichard Lowe        add     %i1, 8, %i1
1030*5d9d9091SRichard Lowe	sub	%i2, 8, %i2
1031*5d9d9091SRichard Lowe
1032*5d9d9091SRichard Loweblkd26:
1033*5d9d9091SRichard Lowe        subcc   %i4, 8, %i4
1034*5d9d9091SRichard Lowe        blu,a,pn %ncc, .remcmp
1035*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
1036*5d9d9091SRichard Lowe        faligndata %d26, %d28, %d48
1037*5d9d9091SRichard Lowe	ldd	[%i0], %d32
1038*5d9d9091SRichard Lowe	fcmpne32 %d32, %d48, %l1
1039*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
1040*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
1041*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
1042*5d9d9091SRichard Lowe	brnz,a	 %l1, .remcmp
1043*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
1044*5d9d9091SRichard Lowe        add     %i0, 8, %i0
1045*5d9d9091SRichard Lowe        add     %i1, 8, %i1
1046*5d9d9091SRichard Lowe	sub	%i2, 8, %i2
1047*5d9d9091SRichard Lowe
1048*5d9d9091SRichard Loweblkd28:
1049*5d9d9091SRichard Lowe        subcc   %i4, 8, %i4
1050*5d9d9091SRichard Lowe        blu,a,pn %ncc, .remcmp
1051*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
1052*5d9d9091SRichard Lowe        faligndata %d28, %d30, %d48
1053*5d9d9091SRichard Lowe	ldd	[%i0], %d32
1054*5d9d9091SRichard Lowe	fcmpne32 %d32, %d48, %l1
1055*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
1056*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
1057*5d9d9091SRichard Lowe	fsrc1	%d32, %d32
1058*5d9d9091SRichard Lowe	brnz,a	 %l1, .remcmp
1059*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
1060*5d9d9091SRichard Lowe        add     %i0, 8, %i0
1061*5d9d9091SRichard Lowe        add     %i1, 8, %i1
1062*5d9d9091SRichard Lowe	sub	%i2, 8, %i2
1063*5d9d9091SRichard Lowe
1064*5d9d9091SRichard Loweblkd30:
1065*5d9d9091SRichard Lowe        subcc   %i4, 8, %i4
1066*5d9d9091SRichard Lowe        blu,a,pn %ncc, .remcmp
1067*5d9d9091SRichard Lowe        sub     %i1, %i0, %i1           ! i1 gets the difference
1068*5d9d9091SRichard Lowe        fmovd   %d30, %d0
1069*5d9d9091SRichard Lowe
1070*5d9d9091SRichard Lowe	! This loop handles doubles remaining that were not loaded(ldda`ed)
1071*5d9d9091SRichard Lowe	! in the Block Compare loop
1072*5d9d9091SRichard Loweblkleft:
1073*5d9d9091SRichard Lowe        ldd     [%l7], %d2
1074*5d9d9091SRichard Lowe        add     %l7, 8, %l7
1075*5d9d9091SRichard Lowe        faligndata %d0, %d2, %d8
1076*5d9d9091SRichard Lowe	ldd     [%i0], %d32
1077*5d9d9091SRichard Lowe        fcmpne32 %d32, %d8, %l1
1078*5d9d9091SRichard Lowe	fsrc1	%d2, %d0
1079*5d9d9091SRichard Lowe	fsrc1	%d2, %d0
1080*5d9d9091SRichard Lowe	fsrc1	%d2, %d0
1081*5d9d9091SRichard Lowe	brnz,a	%l1, .remcmp
1082*5d9d9091SRichard Lowe	sub     %i1, %i0, %i1           ! i1 gets the difference
1083*5d9d9091SRichard Lowe        add     %i0, 8, %i0
1084*5d9d9091SRichard Lowe        add     %i1, 8, %i1
1085*5d9d9091SRichard Lowe        subcc   %i4, 8, %i4
1086*5d9d9091SRichard Lowe        bgeu,pt  %ncc, blkleft
1087*5d9d9091SRichard Lowe        sub     %i2, 8, %i2
1088*5d9d9091SRichard Lowe
1089*5d9d9091SRichard Lowe	ba	%ncc, .remcmp
1090*5d9d9091SRichard Lowe	sub     %i1, %i0, %i1           ! i1 gets the difference
1091*5d9d9091SRichard Lowe
1092*5d9d9091SRichard Lowe6:      ldub    [%i0 + %i1], %i5        ! byte compare loop
1093*5d9d9091SRichard Lowe        inc     %i0
1094*5d9d9091SRichard Lowe        cmp     %i4, %i5
1095*5d9d9091SRichard Lowe        bne     %ncc, .bnoteq
1096*5d9d9091SRichard Lowe.remcmp:
1097*5d9d9091SRichard Lowe        deccc   %i2
1098*5d9d9091SRichard Lowe        bgeu,a   %ncc, 6b
1099*5d9d9091SRichard Lowe        ldub    [%i0], %i4
1100*5d9d9091SRichard Lowe
1101*5d9d9091SRichard Loweexit:
1102*5d9d9091SRichard Lowe	and     %l5, 0x4, %l5           ! fprs.du = fprs.dl = 0
1103*5d9d9091SRichard Lowe	wr      %l5, %g0, %fprs         ! fprs = l5 - restore fprs
1104*5d9d9091SRichard Lowe	membar  #StoreLoad|#StoreStore
1105*5d9d9091SRichard Lowe        ret
1106*5d9d9091SRichard Lowe        restore %g0, %g0, %o0
1107*5d9d9091SRichard Lowe
1108*5d9d9091SRichard Lowe
1109*5d9d9091SRichard Lowe.bnoteq:
1110*5d9d9091SRichard Lowe	and     %l5, 0x4, %l5           ! fprs.du = fprs.dl = 0
1111*5d9d9091SRichard Lowe	wr      %l5, %g0, %fprs         ! fprs = l5 - restore fprs
1112*5d9d9091SRichard Lowe	membar  #StoreLoad|#StoreStore
1113*5d9d9091SRichard Lowe	sub	%i4, %i5, %i0		! return(*s1 - *s2)
1114*5d9d9091SRichard Lowe	ret				! strings aren't equal
1115*5d9d9091SRichard Lowe	restore %i0, %g0, %o0
1116*5d9d9091SRichard Lowe
1117*5d9d9091SRichard Lowe
1118*5d9d9091SRichard Lowe
1119*5d9d9091SRichard Lowe	SET_SIZE(memcmp)
1120