xref: /linux/arch/sparc/lib/U1memcpy.S (revision 03c11eb3b16dc0058589751dfd91f254be2be613)
1b2441318SGreg Kroah-Hartman/* SPDX-License-Identifier: GPL-2.0 */
2478b8fecSSam Ravnborg/* U1memcpy.S: UltraSPARC-I/II/IIi/IIe optimized memcpy.
3478b8fecSSam Ravnborg *
4478b8fecSSam Ravnborg * Copyright (C) 1997, 2004 David S. Miller (davem@redhat.com)
5478b8fecSSam Ravnborg * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz)
6478b8fecSSam Ravnborg */
7478b8fecSSam Ravnborg
8478b8fecSSam Ravnborg#ifdef __KERNEL__
9*4cdb71b6SMasahiro Yamada#include <linux/export.h>
10cb736fdbSDavid S. Miller#include <linux/linkage.h>
11478b8fecSSam Ravnborg#include <asm/visasm.h>
12478b8fecSSam Ravnborg#include <asm/asi.h>
13478b8fecSSam Ravnborg#define GLOBAL_SPARE	g7
14478b8fecSSam Ravnborg#else
15478b8fecSSam Ravnborg#define GLOBAL_SPARE	g5
16478b8fecSSam Ravnborg#define ASI_BLK_P 0xf0
17478b8fecSSam Ravnborg#define FPRS_FEF  0x04
18478b8fecSSam Ravnborg#ifdef MEMCPY_DEBUG
19478b8fecSSam Ravnborg#define VISEntry rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \
20478b8fecSSam Ravnborg		 clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0;
21478b8fecSSam Ravnborg#define VISExit and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
22478b8fecSSam Ravnborg#else
23478b8fecSSam Ravnborg#define VISEntry rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
24478b8fecSSam Ravnborg#define VISExit and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
25478b8fecSSam Ravnborg#endif
26478b8fecSSam Ravnborg#endif
27478b8fecSSam Ravnborg
28478b8fecSSam Ravnborg#ifndef EX_LD
29cb736fdbSDavid S. Miller#define EX_LD(x,y)	x
30478b8fecSSam Ravnborg#endif
31a7c5724bSRob Gardner#ifndef EX_LD_FP
32cb736fdbSDavid S. Miller#define EX_LD_FP(x,y)	x
33a7c5724bSRob Gardner#endif
34478b8fecSSam Ravnborg
35478b8fecSSam Ravnborg#ifndef EX_ST
36cb736fdbSDavid S. Miller#define EX_ST(x,y)	x
37478b8fecSSam Ravnborg#endif
38a7c5724bSRob Gardner#ifndef EX_ST_FP
39cb736fdbSDavid S. Miller#define EX_ST_FP(x,y)	x
40478b8fecSSam Ravnborg#endif
41478b8fecSSam Ravnborg
42478b8fecSSam Ravnborg#ifndef LOAD
43478b8fecSSam Ravnborg#define LOAD(type,addr,dest)	type [addr], dest
44478b8fecSSam Ravnborg#endif
45478b8fecSSam Ravnborg
46478b8fecSSam Ravnborg#ifndef LOAD_BLK
47478b8fecSSam Ravnborg#define LOAD_BLK(addr,dest)	ldda [addr] ASI_BLK_P, dest
48478b8fecSSam Ravnborg#endif
49478b8fecSSam Ravnborg
50478b8fecSSam Ravnborg#ifndef STORE
51478b8fecSSam Ravnborg#define STORE(type,src,addr)	type src, [addr]
52478b8fecSSam Ravnborg#endif
53478b8fecSSam Ravnborg
54478b8fecSSam Ravnborg#ifndef STORE_BLK
55478b8fecSSam Ravnborg#define STORE_BLK(src,addr)	stda src, [addr] ASI_BLK_P
56478b8fecSSam Ravnborg#endif
57478b8fecSSam Ravnborg
58478b8fecSSam Ravnborg#ifndef FUNC_NAME
59478b8fecSSam Ravnborg#define FUNC_NAME	memcpy
60478b8fecSSam Ravnborg#endif
61478b8fecSSam Ravnborg
62478b8fecSSam Ravnborg#ifndef PREAMBLE
63478b8fecSSam Ravnborg#define PREAMBLE
64478b8fecSSam Ravnborg#endif
65478b8fecSSam Ravnborg
66478b8fecSSam Ravnborg#ifndef XCC
67478b8fecSSam Ravnborg#define XCC xcc
68478b8fecSSam Ravnborg#endif
69478b8fecSSam Ravnborg
70478b8fecSSam Ravnborg#define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9)		\
71478b8fecSSam Ravnborg	faligndata		%f1, %f2, %f48;			\
72478b8fecSSam Ravnborg	faligndata		%f2, %f3, %f50;			\
73478b8fecSSam Ravnborg	faligndata		%f3, %f4, %f52;			\
74478b8fecSSam Ravnborg	faligndata		%f4, %f5, %f54;			\
75478b8fecSSam Ravnborg	faligndata		%f5, %f6, %f56;			\
76478b8fecSSam Ravnborg	faligndata		%f6, %f7, %f58;			\
77478b8fecSSam Ravnborg	faligndata		%f7, %f8, %f60;			\
78478b8fecSSam Ravnborg	faligndata		%f8, %f9, %f62;
79478b8fecSSam Ravnborg
80cb736fdbSDavid S. Miller#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt)			\
81cb736fdbSDavid S. Miller	EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp);			\
82cb736fdbSDavid S. Miller	EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp);			\
83478b8fecSSam Ravnborg	add			%src, 0x40, %src;			\
84cb736fdbSDavid S. Miller	subcc			%GLOBAL_SPARE, 0x40, %GLOBAL_SPARE;	\
85478b8fecSSam Ravnborg	be,pn			%xcc, jmptgt;				\
86478b8fecSSam Ravnborg	 add			%dest, 0x40, %dest;			\
87478b8fecSSam Ravnborg
88cb736fdbSDavid S. Miller#define LOOP_CHUNK1(src, dest, branch_dest)		\
89cb736fdbSDavid S. Miller	MAIN_LOOP_CHUNK(src, dest, f0,  f48, branch_dest)
90cb736fdbSDavid S. Miller#define LOOP_CHUNK2(src, dest, branch_dest)		\
91cb736fdbSDavid S. Miller	MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest)
92cb736fdbSDavid S. Miller#define LOOP_CHUNK3(src, dest, branch_dest)		\
93cb736fdbSDavid S. Miller	MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest)
94478b8fecSSam Ravnborg
95478b8fecSSam Ravnborg#define DO_SYNC			membar	#Sync;
96478b8fecSSam Ravnborg#define STORE_SYNC(dest, fsrc)				\
97cb736fdbSDavid S. Miller	EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp);	\
98478b8fecSSam Ravnborg	add			%dest, 0x40, %dest;	\
99478b8fecSSam Ravnborg	DO_SYNC
100478b8fecSSam Ravnborg
101478b8fecSSam Ravnborg#define STORE_JUMP(dest, fsrc, target)			\
102cb736fdbSDavid S. Miller	EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp);	\
103478b8fecSSam Ravnborg	add			%dest, 0x40, %dest;	\
104478b8fecSSam Ravnborg	ba,pt			%xcc, target;		\
105478b8fecSSam Ravnborg	 nop;
106478b8fecSSam Ravnborg
107cb736fdbSDavid S. Miller#define FINISH_VISCHUNK(dest, f0, f1)			\
108cb736fdbSDavid S. Miller	subcc			%g3, 8, %g3;		\
109478b8fecSSam Ravnborg	bl,pn			%xcc, 95f;		\
110478b8fecSSam Ravnborg	 faligndata		%f0, %f1, %f48;		\
111cb736fdbSDavid S. Miller	EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp);	\
112478b8fecSSam Ravnborg	add			%dest, 8, %dest;
113478b8fecSSam Ravnborg
114cb736fdbSDavid S. Miller#define UNEVEN_VISCHUNK_LAST(dest, f0, f1)	\
115cb736fdbSDavid S. Miller	subcc			%g3, 8, %g3;	\
116478b8fecSSam Ravnborg	bl,pn			%xcc, 95f;	\
1176f1d827fSDavid S. Miller	 fsrc2			%f0, %f1;
118478b8fecSSam Ravnborg
119cb736fdbSDavid S. Miller#define UNEVEN_VISCHUNK(dest, f0, f1)		\
120cb736fdbSDavid S. Miller	UNEVEN_VISCHUNK_LAST(dest, f0, f1)	\
121478b8fecSSam Ravnborg	ba,a,pt			%xcc, 93f;
122478b8fecSSam Ravnborg
123478b8fecSSam Ravnborg	.register	%g2,#scratch
124478b8fecSSam Ravnborg	.register	%g3,#scratch
125478b8fecSSam Ravnborg
126478b8fecSSam Ravnborg	.text
127cb736fdbSDavid S. Miller#ifndef EX_RETVAL
128cb736fdbSDavid S. Miller#define EX_RETVAL(x)	x
129cb736fdbSDavid S. MillerENTRY(U1_g1_1_fp)
130cb736fdbSDavid S. Miller	VISExitHalf
131cb736fdbSDavid S. Miller	add		%g1, 1, %g1
132cb736fdbSDavid S. Miller	add		%g1, %g2, %g1
133cb736fdbSDavid S. Miller	retl
134cb736fdbSDavid S. Miller	 add		%g1, %o2, %o0
135cb736fdbSDavid S. MillerENDPROC(U1_g1_1_fp)
136cb736fdbSDavid S. MillerENTRY(U1_g2_0_fp)
137cb736fdbSDavid S. Miller	VISExitHalf
138cb736fdbSDavid S. Miller	retl
139cb736fdbSDavid S. Miller	 add		%g2, %o2, %o0
140cb736fdbSDavid S. MillerENDPROC(U1_g2_0_fp)
141cb736fdbSDavid S. MillerENTRY(U1_g2_8_fp)
142cb736fdbSDavid S. Miller	VISExitHalf
143cb736fdbSDavid S. Miller	add		%g2, 8, %g2
144cb736fdbSDavid S. Miller	retl
145cb736fdbSDavid S. Miller	 add		%g2, %o2, %o0
146cb736fdbSDavid S. MillerENDPROC(U1_g2_8_fp)
147cb736fdbSDavid S. MillerENTRY(U1_gs_0_fp)
148cb736fdbSDavid S. Miller	VISExitHalf
149cb736fdbSDavid S. Miller	add		%GLOBAL_SPARE, %g3, %o0
150cb736fdbSDavid S. Miller	retl
151cb736fdbSDavid S. Miller	 add		%o0, %o2, %o0
152cb736fdbSDavid S. MillerENDPROC(U1_gs_0_fp)
153cb736fdbSDavid S. MillerENTRY(U1_gs_80_fp)
154cb736fdbSDavid S. Miller	VISExitHalf
155cb736fdbSDavid S. Miller	add		%GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
156cb736fdbSDavid S. Miller	add		%GLOBAL_SPARE, %g3, %o0
157cb736fdbSDavid S. Miller	retl
158cb736fdbSDavid S. Miller	 add		%o0, %o2, %o0
159cb736fdbSDavid S. MillerENDPROC(U1_gs_80_fp)
160cb736fdbSDavid S. MillerENTRY(U1_gs_40_fp)
161cb736fdbSDavid S. Miller	VISExitHalf
162cb736fdbSDavid S. Miller	add		%GLOBAL_SPARE, 0x40, %GLOBAL_SPARE
163cb736fdbSDavid S. Miller	add		%GLOBAL_SPARE, %g3, %o0
164cb736fdbSDavid S. Miller	retl
165cb736fdbSDavid S. Miller	 add		%o0, %o2, %o0
166cb736fdbSDavid S. MillerENDPROC(U1_gs_40_fp)
167cb736fdbSDavid S. MillerENTRY(U1_g3_0_fp)
168cb736fdbSDavid S. Miller	VISExitHalf
169cb736fdbSDavid S. Miller	retl
170cb736fdbSDavid S. Miller	 add		%g3, %o2, %o0
171cb736fdbSDavid S. MillerENDPROC(U1_g3_0_fp)
172cb736fdbSDavid S. MillerENTRY(U1_g3_8_fp)
173cb736fdbSDavid S. Miller	VISExitHalf
174cb736fdbSDavid S. Miller	add		%g3, 8, %g3
175cb736fdbSDavid S. Miller	retl
176cb736fdbSDavid S. Miller	 add		%g3, %o2, %o0
177cb736fdbSDavid S. MillerENDPROC(U1_g3_8_fp)
178cb736fdbSDavid S. MillerENTRY(U1_o2_0_fp)
179cb736fdbSDavid S. Miller	VISExitHalf
180cb736fdbSDavid S. Miller	retl
181cb736fdbSDavid S. Miller	 mov		%o2, %o0
182cb736fdbSDavid S. MillerENDPROC(U1_o2_0_fp)
183cb736fdbSDavid S. MillerENTRY(U1_o2_1_fp)
184cb736fdbSDavid S. Miller	VISExitHalf
185cb736fdbSDavid S. Miller	retl
186cb736fdbSDavid S. Miller	 add		%o2, 1, %o0
187cb736fdbSDavid S. MillerENDPROC(U1_o2_1_fp)
188cb736fdbSDavid S. MillerENTRY(U1_gs_0)
189cb736fdbSDavid S. Miller	VISExitHalf
190cb736fdbSDavid S. Miller	retl
191cb736fdbSDavid S. Miller	 add		%GLOBAL_SPARE, %o2, %o0
192cb736fdbSDavid S. MillerENDPROC(U1_gs_0)
193cb736fdbSDavid S. MillerENTRY(U1_gs_8)
194cb736fdbSDavid S. Miller	VISExitHalf
195cb736fdbSDavid S. Miller	add		%GLOBAL_SPARE, %o2, %GLOBAL_SPARE
196cb736fdbSDavid S. Miller	retl
197cb736fdbSDavid S. Miller	 add		%GLOBAL_SPARE, 0x8, %o0
198cb736fdbSDavid S. MillerENDPROC(U1_gs_8)
199cb736fdbSDavid S. MillerENTRY(U1_gs_10)
200cb736fdbSDavid S. Miller	VISExitHalf
201cb736fdbSDavid S. Miller	add		%GLOBAL_SPARE, %o2, %GLOBAL_SPARE
202cb736fdbSDavid S. Miller	retl
203cb736fdbSDavid S. Miller	 add		%GLOBAL_SPARE, 0x10, %o0
204cb736fdbSDavid S. MillerENDPROC(U1_gs_10)
205cb736fdbSDavid S. MillerENTRY(U1_o2_0)
206cb736fdbSDavid S. Miller	retl
207cb736fdbSDavid S. Miller	 mov		%o2, %o0
208cb736fdbSDavid S. MillerENDPROC(U1_o2_0)
209cb736fdbSDavid S. MillerENTRY(U1_o2_8)
210cb736fdbSDavid S. Miller	retl
211cb736fdbSDavid S. Miller	 add		%o2, 8, %o0
212cb736fdbSDavid S. MillerENDPROC(U1_o2_8)
213cb736fdbSDavid S. MillerENTRY(U1_o2_4)
214cb736fdbSDavid S. Miller	retl
215cb736fdbSDavid S. Miller	 add		%o2, 4, %o0
216cb736fdbSDavid S. MillerENDPROC(U1_o2_4)
217cb736fdbSDavid S. MillerENTRY(U1_o2_1)
218cb736fdbSDavid S. Miller	retl
219cb736fdbSDavid S. Miller	 add		%o2, 1, %o0
220cb736fdbSDavid S. MillerENDPROC(U1_o2_1)
221cb736fdbSDavid S. MillerENTRY(U1_g1_0)
222cb736fdbSDavid S. Miller	retl
223cb736fdbSDavid S. Miller	 add		%g1, %o2, %o0
224cb736fdbSDavid S. MillerENDPROC(U1_g1_0)
225cb736fdbSDavid S. MillerENTRY(U1_g1_1)
226cb736fdbSDavid S. Miller	add		%g1, 1, %g1
227cb736fdbSDavid S. Miller	retl
228cb736fdbSDavid S. Miller	 add		%g1, %o2, %o0
229cb736fdbSDavid S. MillerENDPROC(U1_g1_1)
230cb736fdbSDavid S. MillerENTRY(U1_gs_0_o2_adj)
231cb736fdbSDavid S. Miller	and		%o2, 7, %o2
232cb736fdbSDavid S. Miller	retl
233cb736fdbSDavid S. Miller	 add		%GLOBAL_SPARE, %o2, %o0
234cb736fdbSDavid S. MillerENDPROC(U1_gs_0_o2_adj)
235cb736fdbSDavid S. MillerENTRY(U1_gs_8_o2_adj)
236cb736fdbSDavid S. Miller	and		%o2, 7, %o2
237cb736fdbSDavid S. Miller	add		%GLOBAL_SPARE, 8, %GLOBAL_SPARE
238cb736fdbSDavid S. Miller	retl
239cb736fdbSDavid S. Miller	 add		%GLOBAL_SPARE, %o2, %o0
240cb736fdbSDavid S. MillerENDPROC(U1_gs_8_o2_adj)
241cb736fdbSDavid S. Miller#endif
242cb736fdbSDavid S. Miller
243478b8fecSSam Ravnborg	.align		64
244478b8fecSSam Ravnborg
245478b8fecSSam Ravnborg	.globl		FUNC_NAME
246478b8fecSSam Ravnborg	.type		FUNC_NAME,#function
247478b8fecSSam RavnborgFUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
248478b8fecSSam Ravnborg	srlx		%o2, 31, %g2
249478b8fecSSam Ravnborg	cmp		%g2, 0
250478b8fecSSam Ravnborg	tne		%xcc, 5
251478b8fecSSam Ravnborg	PREAMBLE
252478b8fecSSam Ravnborg	mov		%o0, %o4
253478b8fecSSam Ravnborg	cmp		%o2, 0
254478b8fecSSam Ravnborg	be,pn		%XCC, 85f
255478b8fecSSam Ravnborg	 or		%o0, %o1, %o3
256478b8fecSSam Ravnborg	cmp		%o2, 16
257478b8fecSSam Ravnborg	blu,a,pn	%XCC, 80f
258478b8fecSSam Ravnborg	 or		%o3, %o2, %o3
259478b8fecSSam Ravnborg
260478b8fecSSam Ravnborg	cmp		%o2, (5 * 64)
261478b8fecSSam Ravnborg	blu,pt		%XCC, 70f
262478b8fecSSam Ravnborg	 andcc		%o3, 0x7, %g0
263478b8fecSSam Ravnborg
264478b8fecSSam Ravnborg	/* Clobbers o5/g1/g2/g3/g7/icc/xcc.  */
265478b8fecSSam Ravnborg	VISEntry
266478b8fecSSam Ravnborg
267478b8fecSSam Ravnborg	/* Is 'dst' already aligned on an 64-byte boundary? */
268478b8fecSSam Ravnborg	andcc		%o0, 0x3f, %g2
269478b8fecSSam Ravnborg	be,pt		%XCC, 2f
270478b8fecSSam Ravnborg
271478b8fecSSam Ravnborg	/* Compute abs((dst & 0x3f) - 0x40) into %g2.  This is the number
272478b8fecSSam Ravnborg	 * of bytes to copy to make 'dst' 64-byte aligned.  We pre-
273478b8fecSSam Ravnborg	 * subtract this from 'len'.
274478b8fecSSam Ravnborg	 */
275478b8fecSSam Ravnborg	 sub		%o0, %o1, %GLOBAL_SPARE
276478b8fecSSam Ravnborg	sub		%g2, 0x40, %g2
277478b8fecSSam Ravnborg	sub		%g0, %g2, %g2
278478b8fecSSam Ravnborg	sub		%o2, %g2, %o2
279478b8fecSSam Ravnborg	andcc		%g2, 0x7, %g1
280478b8fecSSam Ravnborg	be,pt		%icc, 2f
281478b8fecSSam Ravnborg	 and		%g2, 0x38, %g2
282478b8fecSSam Ravnborg
283478b8fecSSam Ravnborg1:	subcc		%g1, 0x1, %g1
284cb736fdbSDavid S. Miller	EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp)
285cb736fdbSDavid S. Miller	EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp)
286478b8fecSSam Ravnborg	bgu,pt		%XCC, 1b
287478b8fecSSam Ravnborg	 add		%o1, 0x1, %o1
288478b8fecSSam Ravnborg
289478b8fecSSam Ravnborg	add		%o1, %GLOBAL_SPARE, %o0
290478b8fecSSam Ravnborg
291478b8fecSSam Ravnborg2:	cmp		%g2, 0x0
292478b8fecSSam Ravnborg	and		%o1, 0x7, %g1
293478b8fecSSam Ravnborg	be,pt		%icc, 3f
294478b8fecSSam Ravnborg	 alignaddr	%o1, %g0, %o1
295478b8fecSSam Ravnborg
296cb736fdbSDavid S. Miller	EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp)
297cb736fdbSDavid S. Miller1:	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp)
298478b8fecSSam Ravnborg	add		%o1, 0x8, %o1
299478b8fecSSam Ravnborg	subcc		%g2, 0x8, %g2
300478b8fecSSam Ravnborg	faligndata	%f4, %f6, %f0
301cb736fdbSDavid S. Miller	EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
302478b8fecSSam Ravnborg	be,pn		%icc, 3f
303478b8fecSSam Ravnborg	 add		%o0, 0x8, %o0
304478b8fecSSam Ravnborg
305cb736fdbSDavid S. Miller	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp)
306478b8fecSSam Ravnborg	add		%o1, 0x8, %o1
307478b8fecSSam Ravnborg	subcc		%g2, 0x8, %g2
308478b8fecSSam Ravnborg	faligndata	%f6, %f4, %f0
309cb736fdbSDavid S. Miller	EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
310478b8fecSSam Ravnborg	bne,pt		%icc, 1b
311478b8fecSSam Ravnborg	 add		%o0, 0x8, %o0
312478b8fecSSam Ravnborg
313478b8fecSSam Ravnborg	/* Destination is 64-byte aligned.  */
314478b8fecSSam Ravnborg3:
315478b8fecSSam Ravnborg	membar		  #LoadStore | #StoreStore | #StoreLoad
316478b8fecSSam Ravnborg
317478b8fecSSam Ravnborg	subcc		%o2, 0x40, %GLOBAL_SPARE
318478b8fecSSam Ravnborg	add		%o1, %g1, %g1
319478b8fecSSam Ravnborg	andncc		%GLOBAL_SPARE, (0x40 - 1), %GLOBAL_SPARE
320478b8fecSSam Ravnborg	srl		%g1, 3, %g2
321478b8fecSSam Ravnborg	sub		%o2, %GLOBAL_SPARE, %g3
322478b8fecSSam Ravnborg	andn		%o1, (0x40 - 1), %o1
323478b8fecSSam Ravnborg	and		%g2, 7, %g2
324478b8fecSSam Ravnborg	andncc		%g3, 0x7, %g3
3256f1d827fSDavid S. Miller	fsrc2		%f0, %f2
326478b8fecSSam Ravnborg	sub		%g3, 0x8, %g3
327478b8fecSSam Ravnborg	sub		%o2, %GLOBAL_SPARE, %o2
328478b8fecSSam Ravnborg
329478b8fecSSam Ravnborg	add		%g1, %GLOBAL_SPARE, %g1
330478b8fecSSam Ravnborg	subcc		%o2, %g3, %o2
331478b8fecSSam Ravnborg
332cb736fdbSDavid S. Miller	EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp)
333478b8fecSSam Ravnborg	add		%o1, 0x40, %o1
334478b8fecSSam Ravnborg	add		%g1, %g3, %g1
335cb736fdbSDavid S. Miller	EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp)
336478b8fecSSam Ravnborg	add		%o1, 0x40, %o1
337478b8fecSSam Ravnborg	sub		%GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
338cb736fdbSDavid S. Miller	EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp)
339478b8fecSSam Ravnborg	add		%o1, 0x40, %o1
340478b8fecSSam Ravnborg
341478b8fecSSam Ravnborg	/* There are 8 instances of the unrolled loop,
342478b8fecSSam Ravnborg	 * one for each possible alignment of the
343478b8fecSSam Ravnborg	 * source buffer.  Each loop instance is 452
344478b8fecSSam Ravnborg	 * bytes.
345478b8fecSSam Ravnborg	 */
346478b8fecSSam Ravnborg	sll		%g2, 3, %o3
347478b8fecSSam Ravnborg	sub		%o3, %g2, %o3
348478b8fecSSam Ravnborg	sllx		%o3, 4, %o3
349478b8fecSSam Ravnborg	add		%o3, %g2, %o3
350478b8fecSSam Ravnborg	sllx		%o3, 2, %g2
351478b8fecSSam Ravnborg1:	rd		%pc, %o3
352478b8fecSSam Ravnborg	add		%o3, %lo(1f - 1b), %o3
353478b8fecSSam Ravnborg	jmpl		%o3 + %g2, %g0
354478b8fecSSam Ravnborg	 nop
355478b8fecSSam Ravnborg
356478b8fecSSam Ravnborg	.align		64
357478b8fecSSam Ravnborg1:	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
358cb736fdbSDavid S. Miller	LOOP_CHUNK1(o1, o0, 1f)
359478b8fecSSam Ravnborg	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
360cb736fdbSDavid S. Miller	LOOP_CHUNK2(o1, o0, 2f)
361478b8fecSSam Ravnborg	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
362cb736fdbSDavid S. Miller	LOOP_CHUNK3(o1, o0, 3f)
363478b8fecSSam Ravnborg	ba,pt		%xcc, 1b+4
364478b8fecSSam Ravnborg	 faligndata	%f0, %f2, %f48
365478b8fecSSam Ravnborg1:	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
366478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
367478b8fecSSam Ravnborg	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
368478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 40f)
369478b8fecSSam Ravnborg2:	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
370478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
371478b8fecSSam Ravnborg	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
372478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 48f)
373478b8fecSSam Ravnborg3:	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
374478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
375478b8fecSSam Ravnborg	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
376478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 56f)
377478b8fecSSam Ravnborg
378478b8fecSSam Ravnborg1:	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
379cb736fdbSDavid S. Miller	LOOP_CHUNK1(o1, o0, 1f)
380478b8fecSSam Ravnborg	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
381cb736fdbSDavid S. Miller	LOOP_CHUNK2(o1, o0, 2f)
382478b8fecSSam Ravnborg	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
383cb736fdbSDavid S. Miller	LOOP_CHUNK3(o1, o0, 3f)
384478b8fecSSam Ravnborg	ba,pt		%xcc, 1b+4
385478b8fecSSam Ravnborg	 faligndata	%f2, %f4, %f48
386478b8fecSSam Ravnborg1:	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
387478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
388478b8fecSSam Ravnborg	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
389478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 41f)
390478b8fecSSam Ravnborg2:	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
391478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
392478b8fecSSam Ravnborg	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
393478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 49f)
394478b8fecSSam Ravnborg3:	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
395478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
396478b8fecSSam Ravnborg	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
397478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 57f)
398478b8fecSSam Ravnborg
399478b8fecSSam Ravnborg1:	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
400cb736fdbSDavid S. Miller	LOOP_CHUNK1(o1, o0, 1f)
401478b8fecSSam Ravnborg	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
402cb736fdbSDavid S. Miller	LOOP_CHUNK2(o1, o0, 2f)
403478b8fecSSam Ravnborg	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
404cb736fdbSDavid S. Miller	LOOP_CHUNK3(o1, o0, 3f)
405478b8fecSSam Ravnborg	ba,pt		%xcc, 1b+4
406478b8fecSSam Ravnborg	 faligndata	%f4, %f6, %f48
407478b8fecSSam Ravnborg1:	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
408478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
409478b8fecSSam Ravnborg	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
410478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 42f)
411478b8fecSSam Ravnborg2:	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
412478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
413478b8fecSSam Ravnborg	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
414478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 50f)
415478b8fecSSam Ravnborg3:	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
416478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
417478b8fecSSam Ravnborg	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
418478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 58f)
419478b8fecSSam Ravnborg
420478b8fecSSam Ravnborg1:	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
421cb736fdbSDavid S. Miller	LOOP_CHUNK1(o1, o0, 1f)
422478b8fecSSam Ravnborg	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
423cb736fdbSDavid S. Miller	LOOP_CHUNK2(o1, o0, 2f)
424478b8fecSSam Ravnborg	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
425cb736fdbSDavid S. Miller	LOOP_CHUNK3(o1, o0, 3f)
426478b8fecSSam Ravnborg	ba,pt		%xcc, 1b+4
427478b8fecSSam Ravnborg	 faligndata	%f6, %f8, %f48
428478b8fecSSam Ravnborg1:	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
429478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
430478b8fecSSam Ravnborg	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
431478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 43f)
432478b8fecSSam Ravnborg2:	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
433478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
434478b8fecSSam Ravnborg	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
435478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 51f)
436478b8fecSSam Ravnborg3:	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
437478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
438478b8fecSSam Ravnborg	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
439478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 59f)
440478b8fecSSam Ravnborg
441478b8fecSSam Ravnborg1:	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
442cb736fdbSDavid S. Miller	LOOP_CHUNK1(o1, o0, 1f)
443478b8fecSSam Ravnborg	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
444cb736fdbSDavid S. Miller	LOOP_CHUNK2(o1, o0, 2f)
445478b8fecSSam Ravnborg	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
446cb736fdbSDavid S. Miller	LOOP_CHUNK3(o1, o0, 3f)
447478b8fecSSam Ravnborg	ba,pt		%xcc, 1b+4
448478b8fecSSam Ravnborg	 faligndata	%f8, %f10, %f48
449478b8fecSSam Ravnborg1:	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
450478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
451478b8fecSSam Ravnborg	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
452478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 44f)
453478b8fecSSam Ravnborg2:	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
454478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
455478b8fecSSam Ravnborg	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
456478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 52f)
457478b8fecSSam Ravnborg3:	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
458478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
459478b8fecSSam Ravnborg	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
460478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 60f)
461478b8fecSSam Ravnborg
462478b8fecSSam Ravnborg1:	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
463cb736fdbSDavid S. Miller	LOOP_CHUNK1(o1, o0, 1f)
464478b8fecSSam Ravnborg	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
465cb736fdbSDavid S. Miller	LOOP_CHUNK2(o1, o0, 2f)
466478b8fecSSam Ravnborg	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
467cb736fdbSDavid S. Miller	LOOP_CHUNK3(o1, o0, 3f)
468478b8fecSSam Ravnborg	ba,pt		%xcc, 1b+4
469478b8fecSSam Ravnborg	 faligndata	%f10, %f12, %f48
470478b8fecSSam Ravnborg1:	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
471478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
472478b8fecSSam Ravnborg	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
473478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 45f)
474478b8fecSSam Ravnborg2:	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
475478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
476478b8fecSSam Ravnborg	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
477478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 53f)
478478b8fecSSam Ravnborg3:	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
479478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
480478b8fecSSam Ravnborg	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
481478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 61f)
482478b8fecSSam Ravnborg
483478b8fecSSam Ravnborg1:	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
484cb736fdbSDavid S. Miller	LOOP_CHUNK1(o1, o0, 1f)
485478b8fecSSam Ravnborg	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
486cb736fdbSDavid S. Miller	LOOP_CHUNK2(o1, o0, 2f)
487478b8fecSSam Ravnborg	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
488cb736fdbSDavid S. Miller	LOOP_CHUNK3(o1, o0, 3f)
489478b8fecSSam Ravnborg	ba,pt		%xcc, 1b+4
490478b8fecSSam Ravnborg	 faligndata	%f12, %f14, %f48
491478b8fecSSam Ravnborg1:	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
492478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
493478b8fecSSam Ravnborg	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
494478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 46f)
495478b8fecSSam Ravnborg2:	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
496478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
497478b8fecSSam Ravnborg	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
498478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 54f)
499478b8fecSSam Ravnborg3:	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
500478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
501478b8fecSSam Ravnborg	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
502478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 62f)
503478b8fecSSam Ravnborg
504478b8fecSSam Ravnborg1:	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
505cb736fdbSDavid S. Miller	LOOP_CHUNK1(o1, o0, 1f)
506478b8fecSSam Ravnborg	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
507cb736fdbSDavid S. Miller	LOOP_CHUNK2(o1, o0, 2f)
508478b8fecSSam Ravnborg	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
509cb736fdbSDavid S. Miller	LOOP_CHUNK3(o1, o0, 3f)
510478b8fecSSam Ravnborg	ba,pt		%xcc, 1b+4
511478b8fecSSam Ravnborg	 faligndata	%f14, %f16, %f48
512478b8fecSSam Ravnborg1:	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
513478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
514478b8fecSSam Ravnborg	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
515478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 47f)
516478b8fecSSam Ravnborg2:	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
517478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
518478b8fecSSam Ravnborg	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
519478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 55f)
520478b8fecSSam Ravnborg3:	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
521478b8fecSSam Ravnborg	STORE_SYNC(o0, f48)
522478b8fecSSam Ravnborg	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
523478b8fecSSam Ravnborg	STORE_JUMP(o0, f48, 63f)
524478b8fecSSam Ravnborg
525cb736fdbSDavid S. Miller40:	FINISH_VISCHUNK(o0, f0,  f2)
526cb736fdbSDavid S. Miller41:	FINISH_VISCHUNK(o0, f2,  f4)
527cb736fdbSDavid S. Miller42:	FINISH_VISCHUNK(o0, f4,  f6)
528cb736fdbSDavid S. Miller43:	FINISH_VISCHUNK(o0, f6,  f8)
529cb736fdbSDavid S. Miller44:	FINISH_VISCHUNK(o0, f8,  f10)
530cb736fdbSDavid S. Miller45:	FINISH_VISCHUNK(o0, f10, f12)
531cb736fdbSDavid S. Miller46:	FINISH_VISCHUNK(o0, f12, f14)
532cb736fdbSDavid S. Miller47:	UNEVEN_VISCHUNK(o0, f14, f0)
533cb736fdbSDavid S. Miller48:	FINISH_VISCHUNK(o0, f16, f18)
534cb736fdbSDavid S. Miller49:	FINISH_VISCHUNK(o0, f18, f20)
535cb736fdbSDavid S. Miller50:	FINISH_VISCHUNK(o0, f20, f22)
536cb736fdbSDavid S. Miller51:	FINISH_VISCHUNK(o0, f22, f24)
537cb736fdbSDavid S. Miller52:	FINISH_VISCHUNK(o0, f24, f26)
538cb736fdbSDavid S. Miller53:	FINISH_VISCHUNK(o0, f26, f28)
539cb736fdbSDavid S. Miller54:	FINISH_VISCHUNK(o0, f28, f30)
540cb736fdbSDavid S. Miller55:	UNEVEN_VISCHUNK(o0, f30, f0)
541cb736fdbSDavid S. Miller56:	FINISH_VISCHUNK(o0, f32, f34)
542cb736fdbSDavid S. Miller57:	FINISH_VISCHUNK(o0, f34, f36)
543cb736fdbSDavid S. Miller58:	FINISH_VISCHUNK(o0, f36, f38)
544cb736fdbSDavid S. Miller59:	FINISH_VISCHUNK(o0, f38, f40)
545cb736fdbSDavid S. Miller60:	FINISH_VISCHUNK(o0, f40, f42)
546cb736fdbSDavid S. Miller61:	FINISH_VISCHUNK(o0, f42, f44)
547cb736fdbSDavid S. Miller62:	FINISH_VISCHUNK(o0, f44, f46)
548cb736fdbSDavid S. Miller63:	UNEVEN_VISCHUNK_LAST(o0, f46, f0)
549478b8fecSSam Ravnborg
550cb736fdbSDavid S. Miller93:	EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp)
551478b8fecSSam Ravnborg	add		%o1, 8, %o1
552478b8fecSSam Ravnborg	subcc		%g3, 8, %g3
553478b8fecSSam Ravnborg	faligndata	%f0, %f2, %f8
554cb736fdbSDavid S. Miller	EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
555478b8fecSSam Ravnborg	bl,pn		%xcc, 95f
556478b8fecSSam Ravnborg	 add		%o0, 8, %o0
557cb736fdbSDavid S. Miller	EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp)
558478b8fecSSam Ravnborg	add		%o1, 8, %o1
559478b8fecSSam Ravnborg	subcc		%g3, 8, %g3
560478b8fecSSam Ravnborg	faligndata	%f2, %f0, %f8
561cb736fdbSDavid S. Miller	EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
562478b8fecSSam Ravnborg	bge,pt		%xcc, 93b
563478b8fecSSam Ravnborg	 add		%o0, 8, %o0
564478b8fecSSam Ravnborg
565478b8fecSSam Ravnborg95:	brz,pt		%o2, 2f
566478b8fecSSam Ravnborg	 mov		%g1, %o1
567478b8fecSSam Ravnborg
568cb736fdbSDavid S. Miller1:	EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp)
569478b8fecSSam Ravnborg	add		%o1, 1, %o1
570478b8fecSSam Ravnborg	subcc		%o2, 1, %o2
571cb736fdbSDavid S. Miller	EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp)
572478b8fecSSam Ravnborg	bne,pt		%xcc, 1b
573478b8fecSSam Ravnborg	 add		%o0, 1, %o0
574478b8fecSSam Ravnborg
575478b8fecSSam Ravnborg2:	membar		#StoreLoad | #StoreStore
576478b8fecSSam Ravnborg	VISExit
577478b8fecSSam Ravnborg	retl
578478b8fecSSam Ravnborg	 mov		EX_RETVAL(%o4), %o0
579478b8fecSSam Ravnborg
580478b8fecSSam Ravnborg	.align		64
581478b8fecSSam Ravnborg70:	/* 16 < len <= (5 * 64) */
582478b8fecSSam Ravnborg	bne,pn		%XCC, 75f
583478b8fecSSam Ravnborg	 sub		%o0, %o1, %o3
584478b8fecSSam Ravnborg
585478b8fecSSam Ravnborg72:	andn		%o2, 0xf, %GLOBAL_SPARE
586478b8fecSSam Ravnborg	and		%o2, 0xf, %o2
587cb736fdbSDavid S. Miller1:	EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0)
588cb736fdbSDavid S. Miller	EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0)
589478b8fecSSam Ravnborg	subcc		%GLOBAL_SPARE, 0x10, %GLOBAL_SPARE
590cb736fdbSDavid S. Miller	EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10)
591478b8fecSSam Ravnborg	add		%o1, 0x8, %o1
592cb736fdbSDavid S. Miller	EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8)
593478b8fecSSam Ravnborg	bgu,pt		%XCC, 1b
594478b8fecSSam Ravnborg	 add		%o1, 0x8, %o1
595478b8fecSSam Ravnborg73:	andcc		%o2, 0x8, %g0
596478b8fecSSam Ravnborg	be,pt		%XCC, 1f
597478b8fecSSam Ravnborg	 nop
598cb736fdbSDavid S. Miller	EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0)
599478b8fecSSam Ravnborg	sub		%o2, 0x8, %o2
600cb736fdbSDavid S. Miller	EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8)
601478b8fecSSam Ravnborg	add		%o1, 0x8, %o1
602478b8fecSSam Ravnborg1:	andcc		%o2, 0x4, %g0
603478b8fecSSam Ravnborg	be,pt		%XCC, 1f
604478b8fecSSam Ravnborg	 nop
605cb736fdbSDavid S. Miller	EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0)
606478b8fecSSam Ravnborg	sub		%o2, 0x4, %o2
607cb736fdbSDavid S. Miller	EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4)
608478b8fecSSam Ravnborg	add		%o1, 0x4, %o1
609478b8fecSSam Ravnborg1:	cmp		%o2, 0
610478b8fecSSam Ravnborg	be,pt		%XCC, 85f
611478b8fecSSam Ravnborg	 nop
612478b8fecSSam Ravnborg	ba,pt		%xcc, 90f
613478b8fecSSam Ravnborg	 nop
614478b8fecSSam Ravnborg
615478b8fecSSam Ravnborg75:	andcc		%o0, 0x7, %g1
616478b8fecSSam Ravnborg	sub		%g1, 0x8, %g1
617478b8fecSSam Ravnborg	be,pn		%icc, 2f
618478b8fecSSam Ravnborg	 sub		%g0, %g1, %g1
619478b8fecSSam Ravnborg	sub		%o2, %g1, %o2
620478b8fecSSam Ravnborg
621cb736fdbSDavid S. Miller1:	EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0)
622478b8fecSSam Ravnborg	subcc		%g1, 1, %g1
623cb736fdbSDavid S. Miller	EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1)
624478b8fecSSam Ravnborg	bgu,pt		%icc, 1b
625478b8fecSSam Ravnborg	 add		%o1, 1, %o1
626478b8fecSSam Ravnborg
627478b8fecSSam Ravnborg2:	add		%o1, %o3, %o0
628478b8fecSSam Ravnborg	andcc		%o1, 0x7, %g1
629478b8fecSSam Ravnborg	bne,pt		%icc, 8f
630478b8fecSSam Ravnborg	 sll		%g1, 3, %g1
631478b8fecSSam Ravnborg
632478b8fecSSam Ravnborg	cmp		%o2, 16
633478b8fecSSam Ravnborg	bgeu,pt		%icc, 72b
634478b8fecSSam Ravnborg	 nop
635478b8fecSSam Ravnborg	ba,a,pt		%xcc, 73b
636478b8fecSSam Ravnborg
637478b8fecSSam Ravnborg8:	mov		64, %o3
638478b8fecSSam Ravnborg	andn		%o1, 0x7, %o1
639cb736fdbSDavid S. Miller	EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0)
640478b8fecSSam Ravnborg	sub		%o3, %g1, %o3
641478b8fecSSam Ravnborg	andn		%o2, 0x7, %GLOBAL_SPARE
642478b8fecSSam Ravnborg	sllx		%g2, %g1, %g2
643cb736fdbSDavid S. Miller1:	EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj)
644478b8fecSSam Ravnborg	subcc		%GLOBAL_SPARE, 0x8, %GLOBAL_SPARE
645478b8fecSSam Ravnborg	add		%o1, 0x8, %o1
646478b8fecSSam Ravnborg	srlx		%g3, %o3, %o5
647478b8fecSSam Ravnborg	or		%o5, %g2, %o5
648cb736fdbSDavid S. Miller	EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj)
649478b8fecSSam Ravnborg	add		%o0, 0x8, %o0
650478b8fecSSam Ravnborg	bgu,pt		%icc, 1b
651478b8fecSSam Ravnborg	 sllx		%g3, %g1, %g2
652478b8fecSSam Ravnborg
653478b8fecSSam Ravnborg	srl		%g1, 3, %g1
654478b8fecSSam Ravnborg	andcc		%o2, 0x7, %o2
655478b8fecSSam Ravnborg	be,pn		%icc, 85f
656478b8fecSSam Ravnborg	 add		%o1, %g1, %o1
657478b8fecSSam Ravnborg	ba,pt		%xcc, 90f
658478b8fecSSam Ravnborg	 sub		%o0, %o1, %o3
659478b8fecSSam Ravnborg
660478b8fecSSam Ravnborg	.align		64
661478b8fecSSam Ravnborg80:	/* 0 < len <= 16 */
662478b8fecSSam Ravnborg	andcc		%o3, 0x3, %g0
663478b8fecSSam Ravnborg	bne,pn		%XCC, 90f
664478b8fecSSam Ravnborg	 sub		%o0, %o1, %o3
665478b8fecSSam Ravnborg
666cb736fdbSDavid S. Miller1:	EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0)
667478b8fecSSam Ravnborg	subcc		%o2, 4, %o2
668cb736fdbSDavid S. Miller	EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4)
669478b8fecSSam Ravnborg	bgu,pt		%XCC, 1b
670478b8fecSSam Ravnborg	 add		%o1, 4, %o1
671478b8fecSSam Ravnborg
672478b8fecSSam Ravnborg85:	retl
673478b8fecSSam Ravnborg	 mov		EX_RETVAL(%o4), %o0
674478b8fecSSam Ravnborg
675478b8fecSSam Ravnborg	.align		32
676cb736fdbSDavid S. Miller90:	EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0)
677478b8fecSSam Ravnborg	subcc		%o2, 1, %o2
678cb736fdbSDavid S. Miller	EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1)
679478b8fecSSam Ravnborg	bgu,pt		%XCC, 90b
680478b8fecSSam Ravnborg	 add		%o1, 1, %o1
681478b8fecSSam Ravnborg	retl
682478b8fecSSam Ravnborg	 mov		EX_RETVAL(%o4), %o0
683478b8fecSSam Ravnborg
684478b8fecSSam Ravnborg	.size		FUNC_NAME, .-FUNC_NAME
685d3867f04SAl ViroEXPORT_SYMBOL(FUNC_NAME)
686