xref: /titanic_52/usr/src/lib/libc/capabilities/sun4u/common/memcpy.s (revision 1e49577a7fcde812700ded04431b49d67cc57d6d)
1*1e49577aSRod Evans/*
2*1e49577aSRod Evans * CDDL HEADER START
3*1e49577aSRod Evans *
4*1e49577aSRod Evans * The contents of this file are subject to the terms of the
5*1e49577aSRod Evans * Common Development and Distribution License (the "License").
6*1e49577aSRod Evans * You may not use this file except in compliance with the License.
7*1e49577aSRod Evans *
8*1e49577aSRod Evans * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*1e49577aSRod Evans * or http://www.opensolaris.org/os/licensing.
10*1e49577aSRod Evans * See the License for the specific language governing permissions
11*1e49577aSRod Evans * and limitations under the License.
12*1e49577aSRod Evans *
13*1e49577aSRod Evans * When distributing Covered Code, include this CDDL HEADER in each
14*1e49577aSRod Evans * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*1e49577aSRod Evans * If applicable, add the following below this CDDL HEADER, with the
16*1e49577aSRod Evans * fields enclosed by brackets "[]" replaced with your own identifying
17*1e49577aSRod Evans * information: Portions Copyright [yyyy] [name of copyright owner]
18*1e49577aSRod Evans *
19*1e49577aSRod Evans * CDDL HEADER END
20*1e49577aSRod Evans */
21*1e49577aSRod Evans
22*1e49577aSRod Evans/*
23*1e49577aSRod Evans * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
24*1e49577aSRod Evans */
25*1e49577aSRod Evans
26*1e49577aSRod Evans	.file	"memcpy.s"
27*1e49577aSRod Evans
28*1e49577aSRod Evans/*
29*1e49577aSRod Evans * memcpy(s1, s2, len)
30*1e49577aSRod Evans *
31*1e49577aSRod Evans * Copy s2 to s1, always copy n bytes.
32*1e49577aSRod Evans * Note: this does not work for overlapped copies, bcopy() does
33*1e49577aSRod Evans *
34*1e49577aSRod Evans * Fast assembler language version of the following C-program for memcpy
35*1e49577aSRod Evans * which represents the `standard' for the C-library.
36*1e49577aSRod Evans *
37*1e49577aSRod Evans *	void *
38*1e49577aSRod Evans *	memcpy(void *s, const void *s0, size_t n)
39*1e49577aSRod Evans *	{
40*1e49577aSRod Evans *		if (n != 0) {
41*1e49577aSRod Evans *	   	    char *s1 = s;
42*1e49577aSRod Evans *		    const char *s2 = s0;
43*1e49577aSRod Evans *		    do {
44*1e49577aSRod Evans *			*s1++ = *s2++;
45*1e49577aSRod Evans *		    } while (--n != 0);
46*1e49577aSRod Evans *		}
47*1e49577aSRod Evans *		return ( s );
48*1e49577aSRod Evans *	}
49*1e49577aSRod Evans */
50*1e49577aSRod Evans
51*1e49577aSRod Evans#include <sys/asm_linkage.h>
52*1e49577aSRod Evans#include <sys/sun4asi.h>
53*1e49577aSRod Evans#include <sys/trap.h>
54*1e49577aSRod Evans
55*1e49577aSRod Evans	ANSI_PRAGMA_WEAK(memmove,function)
56*1e49577aSRod Evans	ANSI_PRAGMA_WEAK(memcpy,function)
57*1e49577aSRod Evans
58*1e49577aSRod Evans	ENTRY(memmove)
59*1e49577aSRod Evans	cmp	%o1, %o0	! if from address is >= to use forward copy
60*1e49577aSRod Evans	bgeu	%ncc, forcpy	! else use backward if ...
61*1e49577aSRod Evans	sub	%o0, %o1, %o4	! get difference of two addresses
62*1e49577aSRod Evans	cmp	%o2, %o4	! compare size and difference of addresses
63*1e49577aSRod Evans	bleu	%ncc, forcpy	! if size is bigger, do overlapped copy
64*1e49577aSRod Evans	nop
65*1e49577aSRod Evans
66*1e49577aSRod Evans        !
67*1e49577aSRod Evans        ! an overlapped copy that must be done "backwards"
68*1e49577aSRod Evans        !
69*1e49577aSRod Evans.ovbc:
70*1e49577aSRod Evans	mov	%o0, %o5		! save des address for return val
71*1e49577aSRod Evans	add     %o1, %o2, %o1           ! get to end of source space
72*1e49577aSRod Evans        add     %o0, %o2, %o0           ! get to end of destination space
73*1e49577aSRod Evans
74*1e49577aSRod Evans.chksize:
75*1e49577aSRod Evans	cmp	%o2, 8
76*1e49577aSRod Evans	bgeu,pn	%ncc, .dbalign
77*1e49577aSRod Evans	nop
78*1e49577aSRod Evans
79*1e49577aSRod Evans
80*1e49577aSRod Evans.byte:
81*1e49577aSRod Evans1:	deccc	%o2			! decrement count
82*1e49577aSRod Evans	blu,pn	%ncc, exit		! loop until done
83*1e49577aSRod Evans	dec	%o0			! decrement to address
84*1e49577aSRod Evans	dec	%o1			! decrement from address
85*1e49577aSRod Evans        ldub	[%o1], %o3		! read a byte
86*1e49577aSRod Evans        ba	1b			! loop until done
87*1e49577aSRod Evans	stb	%o3, [%o0]		! write byte
88*1e49577aSRod Evans
89*1e49577aSRod Evans.dbalign:
90*1e49577aSRod Evans	andcc	%o0, 7, %o3
91*1e49577aSRod Evans	bz	%ncc, .dbbck
92*1e49577aSRod Evans	nop
93*1e49577aSRod Evans	dec	%o1
94*1e49577aSRod Evans	dec	%o0
95*1e49577aSRod Evans	dec	%o2
96*1e49577aSRod Evans	ldub	[%o1], %o3
97*1e49577aSRod Evans	ba	.chksize
98*1e49577aSRod Evans	stb	%o3, [%o0]
99*1e49577aSRod Evans
100*1e49577aSRod Evans.dbbck:
101*1e49577aSRod Evans
102*1e49577aSRod Evans        rd      %fprs, %o3              ! o3 = fprs
103*1e49577aSRod Evans
104*1e49577aSRod Evans
105*1e49577aSRod Evans        ! if fprs.fef == 0, set it. Checking it, reqires 2 instructions.
106*1e49577aSRod Evans        ! So set it anyway, without checking.
107*1e49577aSRod Evans        wr      %g0, 0x4, %fprs         ! fprs.fef = 1
108*1e49577aSRod Evans
109*1e49577aSRod Evans        alignaddr	%o1, %g0, %g1		! align src
110*1e49577aSRod Evans        ldd	[%g1], %d0		! get first 8 byte block
111*1e49577aSRod Evans	sub	%g1, 8, %g1
112*1e49577aSRod Evans	andn	%o2, 7, %o4
113*1e49577aSRod Evans	sub	%o1, %o4, %o1
114*1e49577aSRod Evans
115*1e49577aSRod Evans2:
116*1e49577aSRod Evans	sub	%o0, 8, %o0		! since we are at the end
117*1e49577aSRod Evans					! when we first enter the loop
118*1e49577aSRod Evans        ldd	[%g1], %d2
119*1e49577aSRod Evans        faligndata %d2, %d0, %d8	! extract 8 bytes out
120*1e49577aSRod Evans        std	%d8, [%o0]		! store it
121*1e49577aSRod Evans
122*1e49577aSRod Evans	sub	%g1, 8, %g1
123*1e49577aSRod Evans        sub	%o2, 8, %o2		! 8 less bytes to copy
124*1e49577aSRod Evans	cmp	%o2, 8			! or do we have < 8 bytes
125*1e49577aSRod Evans        bgeu,pt	%ncc, 2b
126*1e49577aSRod Evans	fmovd	%d2, %d0
127*1e49577aSRod Evans
128*1e49577aSRod Evans        and     %o3, 0x4, %o3           ! fprs.du = fprs.dl = 0
129*1e49577aSRod Evans        ba      .byte
130*1e49577aSRod Evans        wr      %o3, %g0, %fprs         ! fprs = o3 - restore fprs
131*1e49577aSRod Evans
132*1e49577aSRod Evans	SET_SIZE(memmove)
133*1e49577aSRod Evans
134*1e49577aSRod Evans
135*1e49577aSRod Evans	ENTRY(memcpy)
136*1e49577aSRod Evans	ENTRY(__align_cpy_1)
137*1e49577aSRod Evansforcpy:
138*1e49577aSRod Evans	mov	%o0, %o5		! save des address for return val
139*1e49577aSRod Evans
140*1e49577aSRod Evans	cmp	%o2, 32			! for small counts copy bytes
141*1e49577aSRod Evans	bgu,a	%ncc, .alignsrc
142*1e49577aSRod Evans	andcc   %o1, 7, %o3             ! is src aligned on a 8 byte bound
143*1e49577aSRod Evans
144*1e49577aSRod Evans.bytecp:
145*1e49577aSRod Evans	! Do byte copy
146*1e49577aSRod Evans	tst	%o2
147*1e49577aSRod Evans	bleu,a,pn %ncc, exit
148*1e49577aSRod Evans	nop
149*1e49577aSRod Evans
150*1e49577aSRod Evans1:	ldub	[%o1], %o4
151*1e49577aSRod Evans	inc 	%o1
152*1e49577aSRod Evans	inc	%o0
153*1e49577aSRod Evans	deccc	%o2
154*1e49577aSRod Evans	bgu	%ncc, 1b
155*1e49577aSRod Evans	stb	%o4, [%o0 - 1]
156*1e49577aSRod Evans
157*1e49577aSRod Evansexit:
158*1e49577aSRod Evans	retl
159*1e49577aSRod Evans	mov	%o5, %o0
160*1e49577aSRod Evans
161*1e49577aSRod Evans.alignsrc:
162*1e49577aSRod Evans        bz      %ncc, .bigcpy		! src already double aligned
163*1e49577aSRod Evans	sub     %o3, 8, %o3
164*1e49577aSRod Evans        neg     %o3                     ! bytes till src double aligned
165*1e49577aSRod Evans
166*1e49577aSRod Evans        sub     %o2, %o3, %o2           ! update o2 with new count
167*1e49577aSRod Evans
168*1e49577aSRod Evans	! Copy %o3 bytes till double aligned
169*1e49577aSRod Evans
170*1e49577aSRod Evans2:      ldub    [%o1], %o4
171*1e49577aSRod Evans        inc     %o1
172*1e49577aSRod Evans        inc     %o0
173*1e49577aSRod Evans        deccc   %o3
174*1e49577aSRod Evans        bgu	%ncc, 2b
175*1e49577aSRod Evans        stb     %o4, [%o0 - 1]
176*1e49577aSRod Evans
177*1e49577aSRod Evans	! Now Source (%o1) is double word aligned
178*1e49577aSRod Evans
179*1e49577aSRod Evans.bigcpy: 				! >= 17 bytes to copy
180*1e49577aSRod Evans	andcc	%o0, 7, %o3		! is dst aligned on a 8 byte bound
181*1e49577aSRod Evans        bz      %ncc, .blkchk		! already double aligned
182*1e49577aSRod Evans	sub     %o3, 8, %o3
183*1e49577aSRod Evans        neg     %o3                     ! bytes till double aligned
184*1e49577aSRod Evans
185*1e49577aSRod Evans        sub     %o2, %o3, %o2           ! update o2 with new count
186*1e49577aSRod Evans
187*1e49577aSRod Evans	! Copy %o3 bytes till double aligned
188*1e49577aSRod Evans
189*1e49577aSRod Evans3:      ldub    [%o1], %o4
190*1e49577aSRod Evans        inc     %o1
191*1e49577aSRod Evans        inc     %o0
192*1e49577aSRod Evans        deccc   %o3
193*1e49577aSRod Evans        bgu	%ncc, 3b
194*1e49577aSRod Evans        stb     %o4, [%o0 - 1]
195*1e49577aSRod Evans
196*1e49577aSRod Evans	! Now Destination (%o0) is double word aligned
197*1e49577aSRod Evans.blkchk:
198*1e49577aSRod Evans	cmp     %o2, 384		! if cnt < 256 + 128 -  no Block ld/st
199*1e49577aSRod Evans	bgeu,a	%ncc, blkcpy		!    do double word copy
200*1e49577aSRod Evans	subcc	%o0, %o1, %o4		! %o4 = dest - src
201*1e49577aSRod Evans
202*1e49577aSRod Evans	! double word copy - using ldd and faligndata. Copies upto
203*1e49577aSRod Evans	! 8 byte multiple count and does byte copy for the residual.
204*1e49577aSRod Evans.dwcpy:
205*1e49577aSRod Evans	rd	%fprs, %o3		! o3 = fprs
206*1e49577aSRod Evans
207*1e49577aSRod Evans	! if fprs.fef == 0, set it. Checking it, reqires 2 instructions.
208*1e49577aSRod Evans	! So set it anyway, without checking.
209*1e49577aSRod Evans	wr	%g0, 0x4, %fprs 	! fprs.fef = 1
210*1e49577aSRod Evans	andn    %o2, 7, %o4     	! o4 has 8 byte aligned cnt
211*1e49577aSRod Evans	sub	%o4, 8, %o4
212*1e49577aSRod Evans        alignaddr %o1, %g0, %g1
213*1e49577aSRod Evans        ldd     [%g1], %d0
214*1e49577aSRod Evans        add     %g1, 8, %g1
215*1e49577aSRod Evans4:
216*1e49577aSRod Evans        ldd     [%g1], %d2
217*1e49577aSRod Evans        add     %g1, 8, %g1
218*1e49577aSRod Evans        sub     %o2, 8, %o2
219*1e49577aSRod Evans        subcc   %o4, 8, %o4
220*1e49577aSRod Evans        faligndata %d0, %d2, %d8
221*1e49577aSRod Evans        std     %d8, [%o0]
222*1e49577aSRod Evans        add     %o1, 8, %o1
223*1e49577aSRod Evans        bz,pn   %ncc, .residcp
224*1e49577aSRod Evans        add     %o0, 8, %o0
225*1e49577aSRod Evans        ldd     [%g1], %d0
226*1e49577aSRod Evans        add     %g1, 8, %g1
227*1e49577aSRod Evans        sub     %o2, 8, %o2
228*1e49577aSRod Evans        subcc   %o4, 8, %o4
229*1e49577aSRod Evans        faligndata %d2, %d0, %d8
230*1e49577aSRod Evans        std     %d8, [%o0]
231*1e49577aSRod Evans        add     %o1, 8, %o1
232*1e49577aSRod Evans        bgu,pn	%ncc, 4b
233*1e49577aSRod Evans        add     %o0, 8, %o0
234*1e49577aSRod Evans
235*1e49577aSRod Evans.residcp:				! Do byte copy
236*1e49577aSRod Evans	tst	%o2
237*1e49577aSRod Evans	bz,a,pn %ncc, dwexit
238*1e49577aSRod Evans	nop
239*1e49577aSRod Evans
240*1e49577aSRod Evans5:	ldub	[%o1], %o4
241*1e49577aSRod Evans	inc 	%o1
242*1e49577aSRod Evans	inc	%o0
243*1e49577aSRod Evans	deccc	%o2
244*1e49577aSRod Evans	bgu	%ncc, 5b
245*1e49577aSRod Evans	stb	%o4, [%o0 - 1]
246*1e49577aSRod Evans
247*1e49577aSRod Evansdwexit:
248*1e49577aSRod Evans        and     %o3, 0x4, %o3           ! fprs.du = fprs.dl = 0
249*1e49577aSRod Evans        wr      %o3, %g0, %fprs         ! fprs = o3 - restore fprs
250*1e49577aSRod Evans	retl
251*1e49577aSRod Evans	mov	%o5, %o0
252*1e49577aSRod Evans
253*1e49577aSRod Evansblkcpy:
254*1e49577aSRod Evans	! subcc	%o0, %o1, %o4		! in delay slot of branch
255*1e49577aSRod Evans	bneg,a,pn %ncc, 1f		! %o4 = abs(%o4)
256*1e49577aSRod Evans	neg	%o4
257*1e49577aSRod Evans1:
258*1e49577aSRod Evans	/*
259*1e49577aSRod Evans	 * Compare against 256 since we should be checking block addresses
260*1e49577aSRod Evans	 * and (dest & ~63) - (src & ~63) can be 3 blocks even if
261*1e49577aSRod Evans	 * src = dest + (64 * 3) + 63.
262*1e49577aSRod Evans	 */
263*1e49577aSRod Evans	cmp	%o4, 256		! if smaller than 3 blocks skip
264*1e49577aSRod Evans	blu,pn	%ncc, .dwcpy		! and do it the slower way
265*1e49577aSRod Evans	andcc	%o0, 63, %o3
266*1e49577aSRod Evans
267*1e49577aSRod Evans	save    %sp, -SA(MINFRAME), %sp
268*1e49577aSRod Evans        rd      %fprs, %l3              ! l3 = fprs
269*1e49577aSRod Evans
270*1e49577aSRod Evans        ! if fprs.fef == 0, set it. Checking it, reqires 2 instructions.
271*1e49577aSRod Evans        ! So set it anyway, without checking.
272*1e49577aSRod Evans        wr      %g0, 0x4, %fprs         ! fprs.fef = 1
273*1e49577aSRod Evans
274*1e49577aSRod Evans        bz,pn   %ncc, blalign           ! now block aligned
275*1e49577aSRod Evans        sub     %i3, 64, %i3
276*1e49577aSRod Evans        neg     %i3                     ! bytes till block aligned
277*1e49577aSRod Evans	sub	%i2, %i3, %i2		! update %i2 with new count
278*1e49577aSRod Evans
279*1e49577aSRod Evans	! Copy %i3 bytes till dst is block (64 byte) aligned. use
280*1e49577aSRod Evans	! double word copies.
281*1e49577aSRod Evans
282*1e49577aSRod Evans        alignaddr %i1, %g0, %g1
283*1e49577aSRod Evans        ldd     [%g1], %d0
284*1e49577aSRod Evans        add     %g1, 8, %g1
285*1e49577aSRod Evans6:
286*1e49577aSRod Evans        ldd     [%g1], %d2
287*1e49577aSRod Evans        add     %g1, 8, %g1
288*1e49577aSRod Evans        subcc   %i3, 8, %i3
289*1e49577aSRod Evans        faligndata %d0, %d2, %d8
290*1e49577aSRod Evans        std     %d8, [%i0]
291*1e49577aSRod Evans        add     %i1, 8, %i1
292*1e49577aSRod Evans        bz,pn   %ncc, blalign
293*1e49577aSRod Evans        add     %i0, 8, %i0
294*1e49577aSRod Evans        ldd     [%g1], %d0
295*1e49577aSRod Evans        add     %g1, 8, %g1
296*1e49577aSRod Evans        subcc   %i3, 8, %i3
297*1e49577aSRod Evans        faligndata %d2, %d0, %d8
298*1e49577aSRod Evans        std     %d8, [%i0]
299*1e49577aSRod Evans        add     %i1, 8, %i1
300*1e49577aSRod Evans        bgu,pn	%ncc, 6b
301*1e49577aSRod Evans        add     %i0, 8, %i0
302*1e49577aSRod Evans
303*1e49577aSRod Evansblalign:
304*1e49577aSRod Evans	membar  #StoreLoad
305*1e49577aSRod Evans	! %i2 = total length
306*1e49577aSRod Evans	! %i3 = blocks  (length - 64) / 64
307*1e49577aSRod Evans	! %i4 = doubles remaining  (length - blocks)
308*1e49577aSRod Evans	sub	%i2, 64, %i3
309*1e49577aSRod Evans	andn	%i3, 63, %i3
310*1e49577aSRod Evans	sub	%i2, %i3, %i4
311*1e49577aSRod Evans	andn	%i4, 7, %i4
312*1e49577aSRod Evans	sub	%i4, 16, %i4
313*1e49577aSRod Evans	sub	%i2, %i4, %i2
314*1e49577aSRod Evans	sub	%i2, %i3, %i2
315*1e49577aSRod Evans
316*1e49577aSRod Evans	andn	%i1, 0x3F, %l7		! blk aligned address
317*1e49577aSRod Evans	alignaddr %i1, %g0, %g0		! gen %gsr
318*1e49577aSRod Evans
319*1e49577aSRod Evans	srl	%i1, 3, %l5		! bits 3,4,5 are now least sig in  %l5
320*1e49577aSRod Evans	andcc  	%l5, 7, %l6		! mask everything except bits 1,2 3
321*1e49577aSRod Evans	add	%i1, %i4, %i1
322*1e49577aSRod Evans	add	%i1, %i3, %i1
323*1e49577aSRod Evans
324*1e49577aSRod Evans	be,a	%ncc, 1f	! branch taken if src is 64-byte aligned
325*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d0
326*1e49577aSRod Evans
327*1e49577aSRod Evans	call	.+8		! get the address of this instruction in %o7
328*1e49577aSRod Evans	sll	%l6, 2, %l4
329*1e49577aSRod Evans	add	%o7, %l4, %o7
330*1e49577aSRod Evans	jmp	%o7 + 16	! jump to the starting ldd instruction
331*1e49577aSRod Evans	nop
332*1e49577aSRod Evans	ldd	[%l7+8], %d2
333*1e49577aSRod Evans	ldd	[%l7+16], %d4
334*1e49577aSRod Evans	ldd	[%l7+24], %d6
335*1e49577aSRod Evans	ldd	[%l7+32], %d8
336*1e49577aSRod Evans	ldd	[%l7+40], %d10
337*1e49577aSRod Evans	ldd	[%l7+48], %d12
338*1e49577aSRod Evans	ldd	[%l7+56], %d14
339*1e49577aSRod Evans1:
340*1e49577aSRod Evans	add	%l7, 64, %l7
341*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d16
342*1e49577aSRod Evans	add	%l7, 64, %l7
343*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d32
344*1e49577aSRod Evans	add	%l7, 64, %l7
345*1e49577aSRod Evans	sub	%i3, 128, %i3
346*1e49577aSRod Evans
347*1e49577aSRod Evans
348*1e49577aSRod Evans        ! switch statement to get us to the right 8 byte blk within a
349*1e49577aSRod Evans        ! 64 byte block
350*1e49577aSRod Evans
351*1e49577aSRod Evans	cmp	 %l6, 4
352*1e49577aSRod Evans	bgeu,a	 hlf
353*1e49577aSRod Evans	cmp	 %l6, 6
354*1e49577aSRod Evans	cmp	 %l6, 2
355*1e49577aSRod Evans	bgeu,a	 sqtr
356*1e49577aSRod Evans	nop
357*1e49577aSRod Evans	cmp	 %l6, 1
358*1e49577aSRod Evans	be,a	 seg1
359*1e49577aSRod Evans	nop
360*1e49577aSRod Evans	ba	 seg0
361*1e49577aSRod Evans	nop
362*1e49577aSRod Evanssqtr:
363*1e49577aSRod Evans	be,a	 seg2
364*1e49577aSRod Evans	nop
365*1e49577aSRod Evans	ba,a	 seg3
366*1e49577aSRod Evans	nop
367*1e49577aSRod Evans
368*1e49577aSRod Evanshlf:
369*1e49577aSRod Evans	bgeu,a	 fqtr
370*1e49577aSRod Evans	nop
371*1e49577aSRod Evans	cmp	 %l6, 5
372*1e49577aSRod Evans	be,a	 seg5
373*1e49577aSRod Evans	nop
374*1e49577aSRod Evans	ba	 seg4
375*1e49577aSRod Evans	nop
376*1e49577aSRod Evansfqtr:
377*1e49577aSRod Evans	be,a	 seg6
378*1e49577aSRod Evans	nop
379*1e49577aSRod Evans	ba	 seg7
380*1e49577aSRod Evans	nop
381*1e49577aSRod Evans
382*1e49577aSRod Evans#define	FALIGN_D0			\
383*1e49577aSRod Evans	faligndata %d0, %d2, %d48	;\
384*1e49577aSRod Evans	faligndata %d2, %d4, %d50	;\
385*1e49577aSRod Evans	faligndata %d4, %d6, %d52	;\
386*1e49577aSRod Evans	faligndata %d6, %d8, %d54	;\
387*1e49577aSRod Evans	faligndata %d8, %d10, %d56	;\
388*1e49577aSRod Evans	faligndata %d10, %d12, %d58	;\
389*1e49577aSRod Evans	faligndata %d12, %d14, %d60	;\
390*1e49577aSRod Evans	faligndata %d14, %d16, %d62
391*1e49577aSRod Evans
392*1e49577aSRod Evans#define	FALIGN_D16			\
393*1e49577aSRod Evans	faligndata %d16, %d18, %d48	;\
394*1e49577aSRod Evans	faligndata %d18, %d20, %d50	;\
395*1e49577aSRod Evans	faligndata %d20, %d22, %d52	;\
396*1e49577aSRod Evans	faligndata %d22, %d24, %d54	;\
397*1e49577aSRod Evans	faligndata %d24, %d26, %d56	;\
398*1e49577aSRod Evans	faligndata %d26, %d28, %d58	;\
399*1e49577aSRod Evans	faligndata %d28, %d30, %d60	;\
400*1e49577aSRod Evans	faligndata %d30, %d32, %d62
401*1e49577aSRod Evans
402*1e49577aSRod Evans#define	FALIGN_D32			\
403*1e49577aSRod Evans	faligndata %d32, %d34, %d48	;\
404*1e49577aSRod Evans	faligndata %d34, %d36, %d50	;\
405*1e49577aSRod Evans	faligndata %d36, %d38, %d52	;\
406*1e49577aSRod Evans	faligndata %d38, %d40, %d54	;\
407*1e49577aSRod Evans	faligndata %d40, %d42, %d56	;\
408*1e49577aSRod Evans	faligndata %d42, %d44, %d58	;\
409*1e49577aSRod Evans	faligndata %d44, %d46, %d60	;\
410*1e49577aSRod Evans	faligndata %d46, %d0, %d62
411*1e49577aSRod Evans
412*1e49577aSRod Evansseg0:
413*1e49577aSRod Evans	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
414*1e49577aSRod Evans	FALIGN_D0
415*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d0
416*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
417*1e49577aSRod Evans	add	%l7, 64, %l7
418*1e49577aSRod Evans	subcc	%i3, 64, %i3
419*1e49577aSRod Evans	bz,pn	%ncc, 0f
420*1e49577aSRod Evans	add	%i0, 64, %i0
421*1e49577aSRod Evans	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
422*1e49577aSRod Evans	FALIGN_D16
423*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d16
424*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
425*1e49577aSRod Evans	add	%l7, 64, %l7
426*1e49577aSRod Evans	subcc	%i3, 64, %i3
427*1e49577aSRod Evans	bz,pn	%ncc, 1f
428*1e49577aSRod Evans	add	%i0, 64, %i0
429*1e49577aSRod Evans	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
430*1e49577aSRod Evans	FALIGN_D32
431*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d32
432*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
433*1e49577aSRod Evans	add	%l7, 64, %l7
434*1e49577aSRod Evans	subcc	%i3, 64, %i3
435*1e49577aSRod Evans	bz,pn	%ncc, 2f
436*1e49577aSRod Evans	add	%i0, 64, %i0
437*1e49577aSRod Evans	ba,a,pt	%ncc, seg0
438*1e49577aSRod Evans
439*1e49577aSRod Evans0:
440*1e49577aSRod Evans	FALIGN_D16
441*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
442*1e49577aSRod Evans	add	%i0, 64, %i0
443*1e49577aSRod Evans	membar	#Sync
444*1e49577aSRod Evans	FALIGN_D32
445*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
446*1e49577aSRod Evans	ba,pt	%ncc, blkd0
447*1e49577aSRod Evans	add	%i0, 64, %i0
448*1e49577aSRod Evans
449*1e49577aSRod Evans1:
450*1e49577aSRod Evans	FALIGN_D32
451*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
452*1e49577aSRod Evans	add	%i0, 64, %i0
453*1e49577aSRod Evans	membar	#Sync
454*1e49577aSRod Evans	FALIGN_D0
455*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
456*1e49577aSRod Evans	ba,pt	%ncc, blkd16
457*1e49577aSRod Evans	add	%i0, 64, %i0
458*1e49577aSRod Evans
459*1e49577aSRod Evans2:
460*1e49577aSRod Evans	FALIGN_D0
461*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
462*1e49577aSRod Evans	add	%i0, 64, %i0
463*1e49577aSRod Evans	membar	#Sync
464*1e49577aSRod Evans	FALIGN_D16
465*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
466*1e49577aSRod Evans	ba,pt	%ncc, blkd32
467*1e49577aSRod Evans	add	%i0, 64, %i0
468*1e49577aSRod Evans
469*1e49577aSRod Evans
470*1e49577aSRod Evans#define	FALIGN_D2			\
471*1e49577aSRod Evans	faligndata %d2, %d4, %d48	;\
472*1e49577aSRod Evans	faligndata %d4, %d6, %d50	;\
473*1e49577aSRod Evans	faligndata %d6, %d8, %d52	;\
474*1e49577aSRod Evans	faligndata %d8, %d10, %d54	;\
475*1e49577aSRod Evans	faligndata %d10, %d12, %d56	;\
476*1e49577aSRod Evans	faligndata %d12, %d14, %d58	;\
477*1e49577aSRod Evans	faligndata %d14, %d16, %d60	;\
478*1e49577aSRod Evans	faligndata %d16, %d18, %d62
479*1e49577aSRod Evans
480*1e49577aSRod Evans#define	FALIGN_D18			\
481*1e49577aSRod Evans	faligndata %d18, %d20, %d48	;\
482*1e49577aSRod Evans	faligndata %d20, %d22, %d50	;\
483*1e49577aSRod Evans	faligndata %d22, %d24, %d52	;\
484*1e49577aSRod Evans	faligndata %d24, %d26, %d54	;\
485*1e49577aSRod Evans	faligndata %d26, %d28, %d56	;\
486*1e49577aSRod Evans	faligndata %d28, %d30, %d58	;\
487*1e49577aSRod Evans	faligndata %d30, %d32, %d60	;\
488*1e49577aSRod Evans	faligndata %d32, %d34, %d62
489*1e49577aSRod Evans
490*1e49577aSRod Evans#define	FALIGN_D34			\
491*1e49577aSRod Evans	faligndata %d34, %d36, %d48	;\
492*1e49577aSRod Evans	faligndata %d36, %d38, %d50	;\
493*1e49577aSRod Evans	faligndata %d38, %d40, %d52	;\
494*1e49577aSRod Evans	faligndata %d40, %d42, %d54	;\
495*1e49577aSRod Evans	faligndata %d42, %d44, %d56	;\
496*1e49577aSRod Evans	faligndata %d44, %d46, %d58	;\
497*1e49577aSRod Evans	faligndata %d46, %d0, %d60	;\
498*1e49577aSRod Evans	faligndata %d0, %d2, %d62
499*1e49577aSRod Evans
500*1e49577aSRod Evansseg1:
501*1e49577aSRod Evans	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
502*1e49577aSRod Evans	FALIGN_D2
503*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d0
504*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
505*1e49577aSRod Evans	add	%l7, 64, %l7
506*1e49577aSRod Evans	subcc	%i3, 64, %i3
507*1e49577aSRod Evans	bz,pn	%ncc, 0f
508*1e49577aSRod Evans	add	%i0, 64, %i0
509*1e49577aSRod Evans	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
510*1e49577aSRod Evans	FALIGN_D18
511*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d16
512*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
513*1e49577aSRod Evans	add	%l7, 64, %l7
514*1e49577aSRod Evans	subcc	%i3, 64, %i3
515*1e49577aSRod Evans	bz,pn	%ncc, 1f
516*1e49577aSRod Evans	add	%i0, 64, %i0
517*1e49577aSRod Evans	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
518*1e49577aSRod Evans	FALIGN_D34
519*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d32
520*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
521*1e49577aSRod Evans	add	%l7, 64, %l7
522*1e49577aSRod Evans	subcc	%i3, 64, %i3
523*1e49577aSRod Evans	bz,pn	%ncc, 2f
524*1e49577aSRod Evans	add	%i0, 64, %i0
525*1e49577aSRod Evans	ba,a,pt	%ncc, seg1
526*1e49577aSRod Evans0:
527*1e49577aSRod Evans	FALIGN_D18
528*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
529*1e49577aSRod Evans	add	%i0, 64, %i0
530*1e49577aSRod Evans	membar	#Sync
531*1e49577aSRod Evans	FALIGN_D34
532*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
533*1e49577aSRod Evans	ba,pt	%ncc, blkd2
534*1e49577aSRod Evans	add	%i0, 64, %i0
535*1e49577aSRod Evans
536*1e49577aSRod Evans1:
537*1e49577aSRod Evans	FALIGN_D34
538*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
539*1e49577aSRod Evans	add	%i0, 64, %i0
540*1e49577aSRod Evans	membar	#Sync
541*1e49577aSRod Evans	FALIGN_D2
542*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
543*1e49577aSRod Evans	ba,pt	%ncc, blkd18
544*1e49577aSRod Evans	add	%i0, 64, %i0
545*1e49577aSRod Evans
546*1e49577aSRod Evans2:
547*1e49577aSRod Evans	FALIGN_D2
548*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
549*1e49577aSRod Evans	add	%i0, 64, %i0
550*1e49577aSRod Evans	membar	#Sync
551*1e49577aSRod Evans	FALIGN_D18
552*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
553*1e49577aSRod Evans	ba,pt	%ncc, blkd34
554*1e49577aSRod Evans	add	%i0, 64, %i0
555*1e49577aSRod Evans
556*1e49577aSRod Evans#define	FALIGN_D4			\
557*1e49577aSRod Evans	faligndata %d4, %d6, %d48	;\
558*1e49577aSRod Evans	faligndata %d6, %d8, %d50	;\
559*1e49577aSRod Evans	faligndata %d8, %d10, %d52	;\
560*1e49577aSRod Evans	faligndata %d10, %d12, %d54	;\
561*1e49577aSRod Evans	faligndata %d12, %d14, %d56	;\
562*1e49577aSRod Evans	faligndata %d14, %d16, %d58	;\
563*1e49577aSRod Evans	faligndata %d16, %d18, %d60	;\
564*1e49577aSRod Evans	faligndata %d18, %d20, %d62
565*1e49577aSRod Evans
566*1e49577aSRod Evans#define	FALIGN_D20			\
567*1e49577aSRod Evans	faligndata %d20, %d22, %d48	;\
568*1e49577aSRod Evans	faligndata %d22, %d24, %d50	;\
569*1e49577aSRod Evans	faligndata %d24, %d26, %d52	;\
570*1e49577aSRod Evans	faligndata %d26, %d28, %d54	;\
571*1e49577aSRod Evans	faligndata %d28, %d30, %d56	;\
572*1e49577aSRod Evans	faligndata %d30, %d32, %d58	;\
573*1e49577aSRod Evans	faligndata %d32, %d34, %d60	;\
574*1e49577aSRod Evans	faligndata %d34, %d36, %d62
575*1e49577aSRod Evans
576*1e49577aSRod Evans#define	FALIGN_D36			\
577*1e49577aSRod Evans	faligndata %d36, %d38, %d48	;\
578*1e49577aSRod Evans	faligndata %d38, %d40, %d50	;\
579*1e49577aSRod Evans	faligndata %d40, %d42, %d52	;\
580*1e49577aSRod Evans	faligndata %d42, %d44, %d54	;\
581*1e49577aSRod Evans	faligndata %d44, %d46, %d56	;\
582*1e49577aSRod Evans	faligndata %d46, %d0, %d58	;\
583*1e49577aSRod Evans	faligndata %d0, %d2, %d60	;\
584*1e49577aSRod Evans	faligndata %d2, %d4, %d62
585*1e49577aSRod Evans
586*1e49577aSRod Evansseg2:
587*1e49577aSRod Evans	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
588*1e49577aSRod Evans	FALIGN_D4
589*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d0
590*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
591*1e49577aSRod Evans	add	%l7, 64, %l7
592*1e49577aSRod Evans	subcc	%i3, 64, %i3
593*1e49577aSRod Evans	bz,pn	%ncc, 0f
594*1e49577aSRod Evans	add	%i0, 64, %i0
595*1e49577aSRod Evans	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
596*1e49577aSRod Evans	FALIGN_D20
597*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d16
598*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
599*1e49577aSRod Evans	add	%l7, 64, %l7
600*1e49577aSRod Evans	subcc	%i3, 64, %i3
601*1e49577aSRod Evans	bz,pn	%ncc, 1f
602*1e49577aSRod Evans	add	%i0, 64, %i0
603*1e49577aSRod Evans	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
604*1e49577aSRod Evans	FALIGN_D36
605*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d32
606*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
607*1e49577aSRod Evans	add	%l7, 64, %l7
608*1e49577aSRod Evans	subcc	%i3, 64, %i3
609*1e49577aSRod Evans	bz,pn	%ncc, 2f
610*1e49577aSRod Evans	add	%i0, 64, %i0
611*1e49577aSRod Evans	ba,a,pt	%ncc, seg2
612*1e49577aSRod Evans
613*1e49577aSRod Evans0:
614*1e49577aSRod Evans	FALIGN_D20
615*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
616*1e49577aSRod Evans	add	%i0, 64, %i0
617*1e49577aSRod Evans	membar	#Sync
618*1e49577aSRod Evans	FALIGN_D36
619*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
620*1e49577aSRod Evans	ba,pt	%ncc, blkd4
621*1e49577aSRod Evans	add	%i0, 64, %i0
622*1e49577aSRod Evans
623*1e49577aSRod Evans1:
624*1e49577aSRod Evans	FALIGN_D36
625*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
626*1e49577aSRod Evans	add	%i0, 64, %i0
627*1e49577aSRod Evans	membar	#Sync
628*1e49577aSRod Evans	FALIGN_D4
629*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
630*1e49577aSRod Evans	ba,pt	%ncc, blkd20
631*1e49577aSRod Evans	add	%i0, 64, %i0
632*1e49577aSRod Evans
633*1e49577aSRod Evans2:
634*1e49577aSRod Evans	FALIGN_D4
635*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
636*1e49577aSRod Evans	add	%i0, 64, %i0
637*1e49577aSRod Evans	membar	#Sync
638*1e49577aSRod Evans	FALIGN_D20
639*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
640*1e49577aSRod Evans	ba,pt	%ncc, blkd36
641*1e49577aSRod Evans	add	%i0, 64, %i0
642*1e49577aSRod Evans
643*1e49577aSRod Evans
644*1e49577aSRod Evans#define	FALIGN_D6			\
645*1e49577aSRod Evans	faligndata %d6, %d8, %d48	;\
646*1e49577aSRod Evans	faligndata %d8, %d10, %d50	;\
647*1e49577aSRod Evans	faligndata %d10, %d12, %d52	;\
648*1e49577aSRod Evans	faligndata %d12, %d14, %d54	;\
649*1e49577aSRod Evans	faligndata %d14, %d16, %d56	;\
650*1e49577aSRod Evans	faligndata %d16, %d18, %d58	;\
651*1e49577aSRod Evans	faligndata %d18, %d20, %d60	;\
652*1e49577aSRod Evans	faligndata %d20, %d22, %d62
653*1e49577aSRod Evans
654*1e49577aSRod Evans#define	FALIGN_D22			\
655*1e49577aSRod Evans	faligndata %d22, %d24, %d48	;\
656*1e49577aSRod Evans	faligndata %d24, %d26, %d50	;\
657*1e49577aSRod Evans	faligndata %d26, %d28, %d52	;\
658*1e49577aSRod Evans	faligndata %d28, %d30, %d54	;\
659*1e49577aSRod Evans	faligndata %d30, %d32, %d56	;\
660*1e49577aSRod Evans	faligndata %d32, %d34, %d58	;\
661*1e49577aSRod Evans	faligndata %d34, %d36, %d60	;\
662*1e49577aSRod Evans	faligndata %d36, %d38, %d62
663*1e49577aSRod Evans
664*1e49577aSRod Evans#define	FALIGN_D38			\
665*1e49577aSRod Evans	faligndata %d38, %d40, %d48	;\
666*1e49577aSRod Evans	faligndata %d40, %d42, %d50	;\
667*1e49577aSRod Evans	faligndata %d42, %d44, %d52	;\
668*1e49577aSRod Evans	faligndata %d44, %d46, %d54	;\
669*1e49577aSRod Evans	faligndata %d46, %d0, %d56	;\
670*1e49577aSRod Evans	faligndata %d0, %d2, %d58	;\
671*1e49577aSRod Evans	faligndata %d2, %d4, %d60	;\
672*1e49577aSRod Evans	faligndata %d4, %d6, %d62
673*1e49577aSRod Evans
674*1e49577aSRod Evansseg3:
675*1e49577aSRod Evans	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
676*1e49577aSRod Evans	FALIGN_D6
677*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d0
678*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
679*1e49577aSRod Evans	add	%l7, 64, %l7
680*1e49577aSRod Evans	subcc	%i3, 64, %i3
681*1e49577aSRod Evans	bz,pn	%ncc, 0f
682*1e49577aSRod Evans	add	%i0, 64, %i0
683*1e49577aSRod Evans	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
684*1e49577aSRod Evans	FALIGN_D22
685*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d16
686*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
687*1e49577aSRod Evans	add	%l7, 64, %l7
688*1e49577aSRod Evans	subcc	%i3, 64, %i3
689*1e49577aSRod Evans	bz,pn	%ncc, 1f
690*1e49577aSRod Evans	add	%i0, 64, %i0
691*1e49577aSRod Evans	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
692*1e49577aSRod Evans	FALIGN_D38
693*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d32
694*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
695*1e49577aSRod Evans	add	%l7, 64, %l7
696*1e49577aSRod Evans	subcc	%i3, 64, %i3
697*1e49577aSRod Evans	bz,pn	%ncc, 2f
698*1e49577aSRod Evans	add	%i0, 64, %i0
699*1e49577aSRod Evans	ba,a,pt	%ncc, seg3
700*1e49577aSRod Evans
701*1e49577aSRod Evans0:
702*1e49577aSRod Evans	FALIGN_D22
703*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
704*1e49577aSRod Evans	add	%i0, 64, %i0
705*1e49577aSRod Evans	membar	#Sync
706*1e49577aSRod Evans	FALIGN_D38
707*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
708*1e49577aSRod Evans	ba,pt	%ncc, blkd6
709*1e49577aSRod Evans	add	%i0, 64, %i0
710*1e49577aSRod Evans
711*1e49577aSRod Evans1:
712*1e49577aSRod Evans	FALIGN_D38
713*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
714*1e49577aSRod Evans	add	%i0, 64, %i0
715*1e49577aSRod Evans	membar	#Sync
716*1e49577aSRod Evans	FALIGN_D6
717*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
718*1e49577aSRod Evans	ba,pt	%ncc, blkd22
719*1e49577aSRod Evans	add	%i0, 64, %i0
720*1e49577aSRod Evans
721*1e49577aSRod Evans2:
722*1e49577aSRod Evans	FALIGN_D6
723*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
724*1e49577aSRod Evans	add	%i0, 64, %i0
725*1e49577aSRod Evans	membar	#Sync
726*1e49577aSRod Evans	FALIGN_D22
727*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
728*1e49577aSRod Evans	ba,pt	%ncc, blkd38
729*1e49577aSRod Evans	add	%i0, 64, %i0
730*1e49577aSRod Evans
731*1e49577aSRod Evans
732*1e49577aSRod Evans#define	FALIGN_D8			\
733*1e49577aSRod Evans	faligndata %d8, %d10, %d48	;\
734*1e49577aSRod Evans	faligndata %d10, %d12, %d50	;\
735*1e49577aSRod Evans	faligndata %d12, %d14, %d52	;\
736*1e49577aSRod Evans	faligndata %d14, %d16, %d54	;\
737*1e49577aSRod Evans	faligndata %d16, %d18, %d56	;\
738*1e49577aSRod Evans	faligndata %d18, %d20, %d58	;\
739*1e49577aSRod Evans	faligndata %d20, %d22, %d60	;\
740*1e49577aSRod Evans	faligndata %d22, %d24, %d62
741*1e49577aSRod Evans
742*1e49577aSRod Evans#define	FALIGN_D24			\
743*1e49577aSRod Evans	faligndata %d24, %d26, %d48	;\
744*1e49577aSRod Evans	faligndata %d26, %d28, %d50	;\
745*1e49577aSRod Evans	faligndata %d28, %d30, %d52	;\
746*1e49577aSRod Evans	faligndata %d30, %d32, %d54	;\
747*1e49577aSRod Evans	faligndata %d32, %d34, %d56	;\
748*1e49577aSRod Evans	faligndata %d34, %d36, %d58	;\
749*1e49577aSRod Evans	faligndata %d36, %d38, %d60	;\
750*1e49577aSRod Evans	faligndata %d38, %d40, %d62
751*1e49577aSRod Evans
752*1e49577aSRod Evans#define	FALIGN_D40			\
753*1e49577aSRod Evans	faligndata %d40, %d42, %d48	;\
754*1e49577aSRod Evans	faligndata %d42, %d44, %d50	;\
755*1e49577aSRod Evans	faligndata %d44, %d46, %d52	;\
756*1e49577aSRod Evans	faligndata %d46, %d0, %d54	;\
757*1e49577aSRod Evans	faligndata %d0, %d2, %d56	;\
758*1e49577aSRod Evans	faligndata %d2, %d4, %d58	;\
759*1e49577aSRod Evans	faligndata %d4, %d6, %d60	;\
760*1e49577aSRod Evans	faligndata %d6, %d8, %d62
761*1e49577aSRod Evans
762*1e49577aSRod Evansseg4:
763*1e49577aSRod Evans	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
764*1e49577aSRod Evans	FALIGN_D8
765*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d0
766*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
767*1e49577aSRod Evans	add	%l7, 64, %l7
768*1e49577aSRod Evans	subcc	%i3, 64, %i3
769*1e49577aSRod Evans	bz,pn	%ncc, 0f
770*1e49577aSRod Evans	add	%i0, 64, %i0
771*1e49577aSRod Evans	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
772*1e49577aSRod Evans	FALIGN_D24
773*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d16
774*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
775*1e49577aSRod Evans	add	%l7, 64, %l7
776*1e49577aSRod Evans	subcc	%i3, 64, %i3
777*1e49577aSRod Evans	bz,pn	%ncc, 1f
778*1e49577aSRod Evans	add	%i0, 64, %i0
779*1e49577aSRod Evans	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
780*1e49577aSRod Evans	FALIGN_D40
781*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d32
782*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
783*1e49577aSRod Evans	add	%l7, 64, %l7
784*1e49577aSRod Evans	subcc	%i3, 64, %i3
785*1e49577aSRod Evans	bz,pn	%ncc, 2f
786*1e49577aSRod Evans	add	%i0, 64, %i0
787*1e49577aSRod Evans	ba,a,pt	%ncc, seg4
788*1e49577aSRod Evans
789*1e49577aSRod Evans0:
790*1e49577aSRod Evans	FALIGN_D24
791*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
792*1e49577aSRod Evans	add	%i0, 64, %i0
793*1e49577aSRod Evans	membar	#Sync
794*1e49577aSRod Evans	FALIGN_D40
795*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
796*1e49577aSRod Evans	ba,pt	%ncc, blkd8
797*1e49577aSRod Evans	add	%i0, 64, %i0
798*1e49577aSRod Evans
799*1e49577aSRod Evans1:
800*1e49577aSRod Evans	FALIGN_D40
801*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
802*1e49577aSRod Evans	add	%i0, 64, %i0
803*1e49577aSRod Evans	membar	#Sync
804*1e49577aSRod Evans	FALIGN_D8
805*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
806*1e49577aSRod Evans	ba,pt	%ncc, blkd24
807*1e49577aSRod Evans	add	%i0, 64, %i0
808*1e49577aSRod Evans
809*1e49577aSRod Evans2:
810*1e49577aSRod Evans	FALIGN_D8
811*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
812*1e49577aSRod Evans	add	%i0, 64, %i0
813*1e49577aSRod Evans	membar	#Sync
814*1e49577aSRod Evans	FALIGN_D24
815*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
816*1e49577aSRod Evans	ba,pt	%ncc, blkd40
817*1e49577aSRod Evans	add	%i0, 64, %i0
818*1e49577aSRod Evans
819*1e49577aSRod Evans
820*1e49577aSRod Evans#define	FALIGN_D10			\
821*1e49577aSRod Evans	faligndata %d10, %d12, %d48	;\
822*1e49577aSRod Evans	faligndata %d12, %d14, %d50	;\
823*1e49577aSRod Evans	faligndata %d14, %d16, %d52	;\
824*1e49577aSRod Evans	faligndata %d16, %d18, %d54	;\
825*1e49577aSRod Evans	faligndata %d18, %d20, %d56	;\
826*1e49577aSRod Evans	faligndata %d20, %d22, %d58	;\
827*1e49577aSRod Evans	faligndata %d22, %d24, %d60	;\
828*1e49577aSRod Evans	faligndata %d24, %d26, %d62
829*1e49577aSRod Evans
830*1e49577aSRod Evans#define	FALIGN_D26			\
831*1e49577aSRod Evans	faligndata %d26, %d28, %d48	;\
832*1e49577aSRod Evans	faligndata %d28, %d30, %d50	;\
833*1e49577aSRod Evans	faligndata %d30, %d32, %d52	;\
834*1e49577aSRod Evans	faligndata %d32, %d34, %d54	;\
835*1e49577aSRod Evans	faligndata %d34, %d36, %d56	;\
836*1e49577aSRod Evans	faligndata %d36, %d38, %d58	;\
837*1e49577aSRod Evans	faligndata %d38, %d40, %d60	;\
838*1e49577aSRod Evans	faligndata %d40, %d42, %d62
839*1e49577aSRod Evans
840*1e49577aSRod Evans#define	FALIGN_D42			\
841*1e49577aSRod Evans	faligndata %d42, %d44, %d48	;\
842*1e49577aSRod Evans	faligndata %d44, %d46, %d50	;\
843*1e49577aSRod Evans	faligndata %d46, %d0, %d52	;\
844*1e49577aSRod Evans	faligndata %d0, %d2, %d54	;\
845*1e49577aSRod Evans	faligndata %d2, %d4, %d56	;\
846*1e49577aSRod Evans	faligndata %d4, %d6, %d58	;\
847*1e49577aSRod Evans	faligndata %d6, %d8, %d60	;\
848*1e49577aSRod Evans	faligndata %d8, %d10, %d62
849*1e49577aSRod Evans
850*1e49577aSRod Evansseg5:
851*1e49577aSRod Evans	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
852*1e49577aSRod Evans	FALIGN_D10
853*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d0
854*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
855*1e49577aSRod Evans	add	%l7, 64, %l7
856*1e49577aSRod Evans	subcc	%i3, 64, %i3
857*1e49577aSRod Evans	bz,pn	%ncc, 0f
858*1e49577aSRod Evans	add	%i0, 64, %i0
859*1e49577aSRod Evans	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
860*1e49577aSRod Evans	FALIGN_D26
861*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d16
862*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
863*1e49577aSRod Evans	add	%l7, 64, %l7
864*1e49577aSRod Evans	subcc	%i3, 64, %i3
865*1e49577aSRod Evans	bz,pn	%ncc, 1f
866*1e49577aSRod Evans	add	%i0, 64, %i0
867*1e49577aSRod Evans	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
868*1e49577aSRod Evans	FALIGN_D42
869*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d32
870*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
871*1e49577aSRod Evans	add	%l7, 64, %l7
872*1e49577aSRod Evans	subcc	%i3, 64, %i3
873*1e49577aSRod Evans	bz,pn	%ncc, 2f
874*1e49577aSRod Evans	add	%i0, 64, %i0
875*1e49577aSRod Evans	ba,a,pt	%ncc, seg5
876*1e49577aSRod Evans
877*1e49577aSRod Evans0:
878*1e49577aSRod Evans	FALIGN_D26
879*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
880*1e49577aSRod Evans	add	%i0, 64, %i0
881*1e49577aSRod Evans	membar	#Sync
882*1e49577aSRod Evans	FALIGN_D42
883*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
884*1e49577aSRod Evans	ba,pt	%ncc, blkd10
885*1e49577aSRod Evans	add	%i0, 64, %i0
886*1e49577aSRod Evans
887*1e49577aSRod Evans1:
888*1e49577aSRod Evans	FALIGN_D42
889*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
890*1e49577aSRod Evans	add	%i0, 64, %i0
891*1e49577aSRod Evans	membar	#Sync
892*1e49577aSRod Evans	FALIGN_D10
893*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
894*1e49577aSRod Evans	ba,pt	%ncc, blkd26
895*1e49577aSRod Evans	add	%i0, 64, %i0
896*1e49577aSRod Evans
897*1e49577aSRod Evans2:
898*1e49577aSRod Evans	FALIGN_D10
899*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
900*1e49577aSRod Evans	add	%i0, 64, %i0
901*1e49577aSRod Evans	membar	#Sync
902*1e49577aSRod Evans	FALIGN_D26
903*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
904*1e49577aSRod Evans	ba,pt	%ncc, blkd42
905*1e49577aSRod Evans	add	%i0, 64, %i0
906*1e49577aSRod Evans
907*1e49577aSRod Evans
908*1e49577aSRod Evans#define	FALIGN_D12			\
909*1e49577aSRod Evans	faligndata %d12, %d14, %d48	;\
910*1e49577aSRod Evans	faligndata %d14, %d16, %d50	;\
911*1e49577aSRod Evans	faligndata %d16, %d18, %d52	;\
912*1e49577aSRod Evans	faligndata %d18, %d20, %d54	;\
913*1e49577aSRod Evans	faligndata %d20, %d22, %d56	;\
914*1e49577aSRod Evans	faligndata %d22, %d24, %d58	;\
915*1e49577aSRod Evans	faligndata %d24, %d26, %d60	;\
916*1e49577aSRod Evans	faligndata %d26, %d28, %d62
917*1e49577aSRod Evans
918*1e49577aSRod Evans#define	FALIGN_D28			\
919*1e49577aSRod Evans	faligndata %d28, %d30, %d48	;\
920*1e49577aSRod Evans	faligndata %d30, %d32, %d50	;\
921*1e49577aSRod Evans	faligndata %d32, %d34, %d52	;\
922*1e49577aSRod Evans	faligndata %d34, %d36, %d54	;\
923*1e49577aSRod Evans	faligndata %d36, %d38, %d56	;\
924*1e49577aSRod Evans	faligndata %d38, %d40, %d58	;\
925*1e49577aSRod Evans	faligndata %d40, %d42, %d60	;\
926*1e49577aSRod Evans	faligndata %d42, %d44, %d62
927*1e49577aSRod Evans
928*1e49577aSRod Evans#define	FALIGN_D44			\
929*1e49577aSRod Evans	faligndata %d44, %d46, %d48	;\
930*1e49577aSRod Evans	faligndata %d46, %d0, %d50	;\
931*1e49577aSRod Evans	faligndata %d0, %d2, %d52	;\
932*1e49577aSRod Evans	faligndata %d2, %d4, %d54	;\
933*1e49577aSRod Evans	faligndata %d4, %d6, %d56	;\
934*1e49577aSRod Evans	faligndata %d6, %d8, %d58	;\
935*1e49577aSRod Evans	faligndata %d8, %d10, %d60	;\
936*1e49577aSRod Evans	faligndata %d10, %d12, %d62
937*1e49577aSRod Evans
938*1e49577aSRod Evansseg6:
939*1e49577aSRod Evans	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
940*1e49577aSRod Evans	FALIGN_D12
941*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d0
942*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
943*1e49577aSRod Evans	add	%l7, 64, %l7
944*1e49577aSRod Evans	subcc	%i3, 64, %i3
945*1e49577aSRod Evans	bz,pn	%ncc, 0f
946*1e49577aSRod Evans	add	%i0, 64, %i0
947*1e49577aSRod Evans	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
948*1e49577aSRod Evans	FALIGN_D28
949*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d16
950*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
951*1e49577aSRod Evans	add	%l7, 64, %l7
952*1e49577aSRod Evans	subcc	%i3, 64, %i3
953*1e49577aSRod Evans	bz,pn	%ncc, 1f
954*1e49577aSRod Evans	add	%i0, 64, %i0
955*1e49577aSRod Evans	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
956*1e49577aSRod Evans	FALIGN_D44
957*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d32
958*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
959*1e49577aSRod Evans	add	%l7, 64, %l7
960*1e49577aSRod Evans	subcc	%i3, 64, %i3
961*1e49577aSRod Evans	bz,pn	%ncc, 2f
962*1e49577aSRod Evans	add	%i0, 64, %i0
963*1e49577aSRod Evans	ba,a,pt	%ncc, seg6
964*1e49577aSRod Evans
965*1e49577aSRod Evans0:
966*1e49577aSRod Evans	FALIGN_D28
967*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
968*1e49577aSRod Evans	add	%i0, 64, %i0
969*1e49577aSRod Evans	membar	#Sync
970*1e49577aSRod Evans	FALIGN_D44
971*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
972*1e49577aSRod Evans	ba,pt	%ncc, blkd12
973*1e49577aSRod Evans	add	%i0, 64, %i0
974*1e49577aSRod Evans
975*1e49577aSRod Evans1:
976*1e49577aSRod Evans	FALIGN_D44
977*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
978*1e49577aSRod Evans	add	%i0, 64, %i0
979*1e49577aSRod Evans	membar	#Sync
980*1e49577aSRod Evans	FALIGN_D12
981*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
982*1e49577aSRod Evans	ba,pt	%ncc, blkd28
983*1e49577aSRod Evans	add	%i0, 64, %i0
984*1e49577aSRod Evans
985*1e49577aSRod Evans2:
986*1e49577aSRod Evans	FALIGN_D12
987*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
988*1e49577aSRod Evans	add	%i0, 64, %i0
989*1e49577aSRod Evans	membar	#Sync
990*1e49577aSRod Evans	FALIGN_D28
991*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
992*1e49577aSRod Evans	ba,pt	%ncc, blkd44
993*1e49577aSRod Evans	add	%i0, 64, %i0
994*1e49577aSRod Evans
995*1e49577aSRod Evans
996*1e49577aSRod Evans#define	FALIGN_D14			\
997*1e49577aSRod Evans	faligndata %d14, %d16, %d48	;\
998*1e49577aSRod Evans	faligndata %d16, %d18, %d50	;\
999*1e49577aSRod Evans	faligndata %d18, %d20, %d52	;\
1000*1e49577aSRod Evans	faligndata %d20, %d22, %d54	;\
1001*1e49577aSRod Evans	faligndata %d22, %d24, %d56	;\
1002*1e49577aSRod Evans	faligndata %d24, %d26, %d58	;\
1003*1e49577aSRod Evans	faligndata %d26, %d28, %d60	;\
1004*1e49577aSRod Evans	faligndata %d28, %d30, %d62
1005*1e49577aSRod Evans
1006*1e49577aSRod Evans#define	FALIGN_D30			\
1007*1e49577aSRod Evans	faligndata %d30, %d32, %d48	;\
1008*1e49577aSRod Evans	faligndata %d32, %d34, %d50	;\
1009*1e49577aSRod Evans	faligndata %d34, %d36, %d52	;\
1010*1e49577aSRod Evans	faligndata %d36, %d38, %d54	;\
1011*1e49577aSRod Evans	faligndata %d38, %d40, %d56	;\
1012*1e49577aSRod Evans	faligndata %d40, %d42, %d58	;\
1013*1e49577aSRod Evans	faligndata %d42, %d44, %d60	;\
1014*1e49577aSRod Evans	faligndata %d44, %d46, %d62
1015*1e49577aSRod Evans
1016*1e49577aSRod Evans#define	FALIGN_D46			\
1017*1e49577aSRod Evans	faligndata %d46, %d0, %d48	;\
1018*1e49577aSRod Evans	faligndata %d0, %d2, %d50	;\
1019*1e49577aSRod Evans	faligndata %d2, %d4, %d52	;\
1020*1e49577aSRod Evans	faligndata %d4, %d6, %d54	;\
1021*1e49577aSRod Evans	faligndata %d6, %d8, %d56	;\
1022*1e49577aSRod Evans	faligndata %d8, %d10, %d58	;\
1023*1e49577aSRod Evans	faligndata %d10, %d12, %d60	;\
1024*1e49577aSRod Evans	faligndata %d12, %d14, %d62
1025*1e49577aSRod Evans
1026*1e49577aSRod Evansseg7:
1027*1e49577aSRod Evans	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1028*1e49577aSRod Evans	FALIGN_D14
1029*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d0
1030*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
1031*1e49577aSRod Evans	add	%l7, 64, %l7
1032*1e49577aSRod Evans	subcc	%i3, 64, %i3
1033*1e49577aSRod Evans	bz,pn	%ncc, 0f
1034*1e49577aSRod Evans	add	%i0, 64, %i0
1035*1e49577aSRod Evans	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
1036*1e49577aSRod Evans	FALIGN_D30
1037*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d16
1038*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
1039*1e49577aSRod Evans	add	%l7, 64, %l7
1040*1e49577aSRod Evans	subcc	%i3, 64, %i3
1041*1e49577aSRod Evans	bz,pn	%ncc, 1f
1042*1e49577aSRod Evans	add	%i0, 64, %i0
1043*1e49577aSRod Evans	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
1044*1e49577aSRod Evans	FALIGN_D46
1045*1e49577aSRod Evans	ldda	[%l7]ASI_BLK_P, %d32
1046*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
1047*1e49577aSRod Evans	add	%l7, 64, %l7
1048*1e49577aSRod Evans	subcc	%i3, 64, %i3
1049*1e49577aSRod Evans	bz,pn	%ncc, 2f
1050*1e49577aSRod Evans	add	%i0, 64, %i0
1051*1e49577aSRod Evans	ba,a,pt	%ncc, seg7
1052*1e49577aSRod Evans
1053*1e49577aSRod Evans0:
1054*1e49577aSRod Evans	FALIGN_D30
1055*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
1056*1e49577aSRod Evans	add	%i0, 64, %i0
1057*1e49577aSRod Evans	membar	#Sync
1058*1e49577aSRod Evans	FALIGN_D46
1059*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
1060*1e49577aSRod Evans	ba,pt	%ncc, blkd14
1061*1e49577aSRod Evans	add	%i0, 64, %i0
1062*1e49577aSRod Evans
1063*1e49577aSRod Evans1:
1064*1e49577aSRod Evans	FALIGN_D46
1065*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
1066*1e49577aSRod Evans	add	%i0, 64, %i0
1067*1e49577aSRod Evans	membar	#Sync
1068*1e49577aSRod Evans	FALIGN_D14
1069*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
1070*1e49577aSRod Evans	ba,pt	%ncc, blkd30
1071*1e49577aSRod Evans	add	%i0, 64, %i0
1072*1e49577aSRod Evans
1073*1e49577aSRod Evans2:
1074*1e49577aSRod Evans	FALIGN_D14
1075*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
1076*1e49577aSRod Evans	add	%i0, 64, %i0
1077*1e49577aSRod Evans	membar	#Sync
1078*1e49577aSRod Evans	FALIGN_D30
1079*1e49577aSRod Evans	stda	%d48, [%i0]ASI_BLK_P
1080*1e49577aSRod Evans	ba,pt	%ncc, blkd46
1081*1e49577aSRod Evans	add	%i0, 64, %i0
1082*1e49577aSRod Evans
1083*1e49577aSRod Evans
1084*1e49577aSRod Evans	!
1085*1e49577aSRod Evans	! dribble out the last partial block
1086*1e49577aSRod Evans	!
1087*1e49577aSRod Evansblkd0:
1088*1e49577aSRod Evans	subcc	%i4, 8, %i4
1089*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1090*1e49577aSRod Evans	faligndata %d0, %d2, %d48
1091*1e49577aSRod Evans	std	%d48, [%i0]
1092*1e49577aSRod Evans	add	%i0, 8, %i0
1093*1e49577aSRod Evansblkd2:
1094*1e49577aSRod Evans	subcc	%i4, 8, %i4
1095*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1096*1e49577aSRod Evans	faligndata %d2, %d4, %d48
1097*1e49577aSRod Evans	std	%d48, [%i0]
1098*1e49577aSRod Evans	add	%i0, 8, %i0
1099*1e49577aSRod Evansblkd4:
1100*1e49577aSRod Evans	subcc	%i4, 8, %i4
1101*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1102*1e49577aSRod Evans	faligndata %d4, %d6, %d48
1103*1e49577aSRod Evans	std	%d48, [%i0]
1104*1e49577aSRod Evans	add	%i0, 8, %i0
1105*1e49577aSRod Evansblkd6:
1106*1e49577aSRod Evans	subcc	%i4, 8, %i4
1107*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1108*1e49577aSRod Evans	faligndata %d6, %d8, %d48
1109*1e49577aSRod Evans	std	%d48, [%i0]
1110*1e49577aSRod Evans	add	%i0, 8, %i0
1111*1e49577aSRod Evansblkd8:
1112*1e49577aSRod Evans	subcc	%i4, 8, %i4
1113*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1114*1e49577aSRod Evans	faligndata %d8, %d10, %d48
1115*1e49577aSRod Evans	std	%d48, [%i0]
1116*1e49577aSRod Evans	add	%i0, 8, %i0
1117*1e49577aSRod Evansblkd10:
1118*1e49577aSRod Evans	subcc	%i4, 8, %i4
1119*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1120*1e49577aSRod Evans	faligndata %d10, %d12, %d48
1121*1e49577aSRod Evans	std	%d48, [%i0]
1122*1e49577aSRod Evans	add	%i0, 8, %i0
1123*1e49577aSRod Evansblkd12:
1124*1e49577aSRod Evans	subcc	%i4, 8, %i4
1125*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1126*1e49577aSRod Evans	faligndata %d12, %d14, %d48
1127*1e49577aSRod Evans	std	%d48, [%i0]
1128*1e49577aSRod Evans	add	%i0, 8, %i0
1129*1e49577aSRod Evansblkd14:
1130*1e49577aSRod Evans	subcc	%i4, 8, %i4
1131*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1132*1e49577aSRod Evans	fsrc1	%d14, %d0
1133*1e49577aSRod Evans	ba,a,pt	%ncc, blkleft
1134*1e49577aSRod Evans
1135*1e49577aSRod Evansblkd16:
1136*1e49577aSRod Evans	subcc	%i4, 8, %i4
1137*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1138*1e49577aSRod Evans	faligndata %d16, %d18, %d48
1139*1e49577aSRod Evans	std	%d48, [%i0]
1140*1e49577aSRod Evans	add	%i0, 8, %i0
1141*1e49577aSRod Evansblkd18:
1142*1e49577aSRod Evans	subcc	%i4, 8, %i4
1143*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1144*1e49577aSRod Evans	faligndata %d18, %d20, %d48
1145*1e49577aSRod Evans	std	%d48, [%i0]
1146*1e49577aSRod Evans	add	%i0, 8, %i0
1147*1e49577aSRod Evansblkd20:
1148*1e49577aSRod Evans	subcc	%i4, 8, %i4
1149*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1150*1e49577aSRod Evans	faligndata %d20, %d22, %d48
1151*1e49577aSRod Evans	std	%d48, [%i0]
1152*1e49577aSRod Evans	add	%i0, 8, %i0
1153*1e49577aSRod Evansblkd22:
1154*1e49577aSRod Evans	subcc	%i4, 8, %i4
1155*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1156*1e49577aSRod Evans	faligndata %d22, %d24, %d48
1157*1e49577aSRod Evans	std	%d48, [%i0]
1158*1e49577aSRod Evans	add	%i0, 8, %i0
1159*1e49577aSRod Evansblkd24:
1160*1e49577aSRod Evans	subcc	%i4, 8, %i4
1161*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1162*1e49577aSRod Evans	faligndata %d24, %d26, %d48
1163*1e49577aSRod Evans	std	%d48, [%i0]
1164*1e49577aSRod Evans	add	%i0, 8, %i0
1165*1e49577aSRod Evansblkd26:
1166*1e49577aSRod Evans	subcc	%i4, 8, %i4
1167*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1168*1e49577aSRod Evans	faligndata %d26, %d28, %d48
1169*1e49577aSRod Evans	std	%d48, [%i0]
1170*1e49577aSRod Evans	add	%i0, 8, %i0
1171*1e49577aSRod Evansblkd28:
1172*1e49577aSRod Evans	subcc	%i4, 8, %i4
1173*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1174*1e49577aSRod Evans	faligndata %d28, %d30, %d48
1175*1e49577aSRod Evans	std	%d48, [%i0]
1176*1e49577aSRod Evans	add	%i0, 8, %i0
1177*1e49577aSRod Evansblkd30:
1178*1e49577aSRod Evans	subcc	%i4, 8, %i4
1179*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1180*1e49577aSRod Evans	fsrc1	%d30, %d0
1181*1e49577aSRod Evans	ba,a,pt	%ncc, blkleft
1182*1e49577aSRod Evansblkd32:
1183*1e49577aSRod Evans	subcc	%i4, 8, %i4
1184*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1185*1e49577aSRod Evans	faligndata %d32, %d34, %d48
1186*1e49577aSRod Evans	std	%d48, [%i0]
1187*1e49577aSRod Evans	add	%i0, 8, %i0
1188*1e49577aSRod Evansblkd34:
1189*1e49577aSRod Evans	subcc	%i4, 8, %i4
1190*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1191*1e49577aSRod Evans	faligndata %d34, %d36, %d48
1192*1e49577aSRod Evans	std	%d48, [%i0]
1193*1e49577aSRod Evans	add	%i0, 8, %i0
1194*1e49577aSRod Evansblkd36:
1195*1e49577aSRod Evans	subcc	%i4, 8, %i4
1196*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1197*1e49577aSRod Evans	faligndata %d36, %d38, %d48
1198*1e49577aSRod Evans	std	%d48, [%i0]
1199*1e49577aSRod Evans	add	%i0, 8, %i0
1200*1e49577aSRod Evansblkd38:
1201*1e49577aSRod Evans	subcc	%i4, 8, %i4
1202*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1203*1e49577aSRod Evans	faligndata %d38, %d40, %d48
1204*1e49577aSRod Evans	std	%d48, [%i0]
1205*1e49577aSRod Evans	add	%i0, 8, %i0
1206*1e49577aSRod Evansblkd40:
1207*1e49577aSRod Evans	subcc	%i4, 8, %i4
1208*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1209*1e49577aSRod Evans	faligndata %d40, %d42, %d48
1210*1e49577aSRod Evans	std	%d48, [%i0]
1211*1e49577aSRod Evans	add	%i0, 8, %i0
1212*1e49577aSRod Evansblkd42:
1213*1e49577aSRod Evans	subcc	%i4, 8, %i4
1214*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1215*1e49577aSRod Evans	faligndata %d42, %d44, %d48
1216*1e49577aSRod Evans	std	%d48, [%i0]
1217*1e49577aSRod Evans	add	%i0, 8, %i0
1218*1e49577aSRod Evansblkd44:
1219*1e49577aSRod Evans	subcc	%i4, 8, %i4
1220*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1221*1e49577aSRod Evans	faligndata %d44, %d46, %d48
1222*1e49577aSRod Evans	std	%d48, [%i0]
1223*1e49577aSRod Evans	add	%i0, 8, %i0
1224*1e49577aSRod Evansblkd46:
1225*1e49577aSRod Evans	subcc	%i4, 8, %i4
1226*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1227*1e49577aSRod Evans	fsrc1	%d46, %d0
1228*1e49577aSRod Evans
1229*1e49577aSRod Evansblkleft:
1230*1e49577aSRod Evans	ldd	[%l7], %d2
1231*1e49577aSRod Evans	add	%l7, 8, %l7
1232*1e49577aSRod Evans	subcc	%i4, 8, %i4
1233*1e49577aSRod Evans	faligndata %d0, %d2, %d8
1234*1e49577aSRod Evans	std	%d8, [%i0]
1235*1e49577aSRod Evans	blu,pn	%ncc, blkdone
1236*1e49577aSRod Evans	add	%i0, 8, %i0
1237*1e49577aSRod Evans	ldd	[%l7], %d0
1238*1e49577aSRod Evans	add	%l7, 8, %l7
1239*1e49577aSRod Evans	subcc	%i4, 8, %i4
1240*1e49577aSRod Evans	faligndata %d2, %d0, %d8
1241*1e49577aSRod Evans	std	%d8, [%i0]
1242*1e49577aSRod Evans	bgeu,pt	%ncc, blkleft
1243*1e49577aSRod Evans	add	%i0, 8, %i0
1244*1e49577aSRod Evans
1245*1e49577aSRod Evansblkdone:
1246*1e49577aSRod Evans	tst	%i2
1247*1e49577aSRod Evans	bz,pt 	%ncc, blkexit
1248*1e49577aSRod Evans	and	%l3, 0x4, %l3		! fprs.du = fprs.dl = 0
1249*1e49577aSRod Evans
1250*1e49577aSRod Evans7:      ldub    [%i1], %i4
1251*1e49577aSRod Evans        inc     %i1
1252*1e49577aSRod Evans        inc     %i0
1253*1e49577aSRod Evans        deccc   %i2
1254*1e49577aSRod Evans        bgu  	%ncc, 7b
1255*1e49577aSRod Evans        stb     %i4, [%i0 - 1]
1256*1e49577aSRod Evans
1257*1e49577aSRod Evansblkexit:
1258*1e49577aSRod Evans        and     %l3, 0x4, %l3           ! fprs.du = fprs.dl = 0
1259*1e49577aSRod Evans	wr      %l3, %g0, %fprs         ! fprs = l3 - restore fprs.fef
1260*1e49577aSRod Evans	membar  #StoreLoad|#StoreStore
1261*1e49577aSRod Evans	ret
1262*1e49577aSRod Evans	restore %i5, %g0, %o0
1263*1e49577aSRod Evans
1264*1e49577aSRod Evans	SET_SIZE(memcpy)
1265*1e49577aSRod Evans	SET_SIZE(__align_cpy_1)
1266