xref: /titanic_51/usr/src/uts/sun4v/cpu/niagara_copy.s (revision 280575bef0daddd2683e4135171fc01408ae6d57)
17c478bd9Sstevel@tonic-gate/*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
5340af271Swh94709 * Common Development and Distribution License (the "License").
6340af271Swh94709 * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate *
87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate * and limitations under the License.
127c478bd9Sstevel@tonic-gate *
137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate *
197c478bd9Sstevel@tonic-gate * CDDL HEADER END
207c478bd9Sstevel@tonic-gate */
217c478bd9Sstevel@tonic-gate/*
22*280575beSPatrick McGehearty * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
237c478bd9Sstevel@tonic-gate */
247c478bd9Sstevel@tonic-gate
257c478bd9Sstevel@tonic-gate
267c478bd9Sstevel@tonic-gate#include <sys/param.h>
277c478bd9Sstevel@tonic-gate#include <sys/errno.h>
287c478bd9Sstevel@tonic-gate#include <sys/asm_linkage.h>
297c478bd9Sstevel@tonic-gate#include <sys/vtrace.h>
307c478bd9Sstevel@tonic-gate#include <sys/machthread.h>
317c478bd9Sstevel@tonic-gate#include <sys/clock.h>
327c478bd9Sstevel@tonic-gate#include <sys/asi.h>
337c478bd9Sstevel@tonic-gate#include <sys/fsr.h>
347c478bd9Sstevel@tonic-gate#include <sys/privregs.h>
357c478bd9Sstevel@tonic-gate#include <sys/machasi.h>
367c478bd9Sstevel@tonic-gate#include <sys/niagaraasi.h>
377c478bd9Sstevel@tonic-gate
387c478bd9Sstevel@tonic-gate#if !defined(lint)
397c478bd9Sstevel@tonic-gate#include "assym.h"
407c478bd9Sstevel@tonic-gate#endif	/* lint */
417c478bd9Sstevel@tonic-gate
427c478bd9Sstevel@tonic-gate
437c478bd9Sstevel@tonic-gate/*
447c478bd9Sstevel@tonic-gate * Pseudo-code to aid in understanding the control flow of the
457c478bd9Sstevel@tonic-gate * bcopy/kcopy routine.
467c478bd9Sstevel@tonic-gate *
47473b13d4Sae112802 *	! WARNING : <Register usage convention>
48473b13d4Sae112802 *	! In kcopy() the %o5, holds previous error handler and a flag
49473b13d4Sae112802 *	! LOFAULT_SET (low bits). The %o5 is null in bcopy().
50473b13d4Sae112802 *	! The %o5 is not available for any other use.
51473b13d4Sae112802 *
52*280575beSPatrick McGehearty * On entry:
53*280575beSPatrick McGehearty *	! Determine whether to use the FP register version or the
54*280575beSPatrick McGehearty *	! the leaf routine version depending on the size of the copy.
55*280575beSPatrick McGehearty *	! Set up error handling accordingly.
56*280575beSPatrick McGehearty *	! The transition point depends on FP_COPY
57*280575beSPatrick McGehearty *	! For both versions %o5 is reserved
58*280575beSPatrick McGehearty *
59473b13d4Sae112802 * kcopy():
60*280575beSPatrick McGehearty *	if(length > FP_COPY)
61*280575beSPatrick McGehearty *		go to regular_kcopy
62*280575beSPatrick McGehearty *
63*280575beSPatrick McGehearty *	! Setup_leaf_rtn_error_handler
64*280575beSPatrick McGehearty *	%o5 = curthread->t_lofault;		! save existing handler in %o5
65*280575beSPatrick McGehearty *	%o5 |= LOFAULT_SET;			! ORed with LOFAULT_SET flag
66*280575beSPatrick McGehearty *	curthread->t_lofault = .sm_copyerr;
67*280575beSPatrick McGehearty *	goto small_bcopy();
68*280575beSPatrick McGehearty *
69*280575beSPatrick McGehearty * regular_kcopy:
70*280575beSPatrick McGehearty *	save_registers()
71473b13d4Sae112802 *	%o5 = curthread->t_lofault;		! save existing handler in %o5
72473b13d4Sae112802 *	%o5 |= LOFAULT_SET;			! ORed with LOFAULT_SET flag
737c478bd9Sstevel@tonic-gate *	curthread->t_lofault = .copyerr;
74*280575beSPatrick McGehearty *	goto do_copy();
757c478bd9Sstevel@tonic-gate *
76473b13d4Sae112802 * bcopy():
77*280575beSPatrick McGehearty *	if(length > FP_COPY)
78*280575beSPatrick McGehearty *		go to regular_bcopy
797c478bd9Sstevel@tonic-gate *
80*280575beSPatrick McGehearty *	! Setup_leaf_rtn_error_handler
81*280575beSPatrick McGehearty *	%o5 = curthread->t_lofault;		! save existing handler in %o5
82*280575beSPatrick McGehearty *	curthread->t_lofault = .sm_copyerr;
83*280575beSPatrick McGehearty *	goto small_bcopy();
847c478bd9Sstevel@tonic-gate *
85*280575beSPatrick McGehearty * regular_bcopy:
86*280575beSPatrick McGehearty *	%o5 = curthread->t_lofault;		! save existing handler in %o5
87*280575beSPatrick McGehearty *	curthread->t_lofault = .copyerr;
88*280575beSPatrick McGehearty *	goto do_copy();
89*280575beSPatrick McGehearty *
90*280575beSPatrick McGehearty * small_bcopy:
91*280575beSPatrick McGehearty *	! handle copies smaller than FP_COPY
92*280575beSPatrick McGehearty *	restore t_lofault handler
93*280575beSPatrick McGehearty *	exit
94*280575beSPatrick McGehearty *
95*280575beSPatrick McGehearty * do_copy:
96*280575beSPatrick McGehearty *	! handle copies larger than FP_COPY
97*280575beSPatrick McGehearty *	save fp_regs
98473b13d4Sae112802 * 	blockcopy;
99*280575beSPatrick McGehearty *	restore fp_regs
100473b13d4Sae112802 *	restore t_lofault handler if came from kcopy();
101473b13d4Sae112802 *
1027c478bd9Sstevel@tonic-gate *
103*280575beSPatrick McGehearty * In leaf lofault handler:
104473b13d4Sae112802 *	curthread->t_lofault = (%o5 & ~LOFAULT_SET);	! restore old t_lofault
1057c478bd9Sstevel@tonic-gate *	return (errno)
1067c478bd9Sstevel@tonic-gate *
107*280575beSPatrick McGehearty * In lofault handler:
108*280575beSPatrick McGehearty *	curthread->t_lofault = (%o5 & ~LOFAULT_SET);	! restore old t_lofault
109*280575beSPatrick McGehearty *	restore fp_regs
110*280575beSPatrick McGehearty *	return (errno)
111*280575beSPatrick McGehearty *
112*280575beSPatrick McGehearty *
113*280575beSPatrick McGehearty *
114*280575beSPatrick McGehearty * For all of bcopy/copyin/copyout the copy logic is specialized according
115*280575beSPatrick McGehearty * to how the src and dst is aligned and how much data needs to be moved.
116*280575beSPatrick McGehearty * The following comments apply to the N2/RF code (#if !defined(NIAGARA_IMPL))
117*280575beSPatrick McGehearty *
118*280575beSPatrick McGehearty * N2/RF Flow :
119*280575beSPatrick McGehearty *
120*280575beSPatrick McGehearty * if (count < FP_COPY) {  (584 bytes)
121*280575beSPatrick McGehearty *   set small fault handler (no register window save/restore)
122*280575beSPatrick McGehearty *   if count < SHORTCOPY  (7 bytes)
123*280575beSPatrick McGehearty *	copy bytes; go to short_exit
124*280575beSPatrick McGehearty *   else
125*280575beSPatrick McGehearty *   determine dst alignment, move minimum bytes/halfwords to
126*280575beSPatrick McGehearty *   get dst aligned on long word boundary
127*280575beSPatrick McGehearty *     if( src is on long word boundary ) {
128*280575beSPatrick McGehearty * medlong:					   src/dst aligned on 8 bytes
129*280575beSPatrick McGehearty *	 copy with ldx/stx in 4-way unrolled loop;
130*280575beSPatrick McGehearty *       copy final 0-31 bytes; go to short_exit
131*280575beSPatrick McGehearty *     } else {					src/dst not aligned on 8 bytes
132*280575beSPatrick McGehearty *     if src is word aligned, ld/st words in 32-byte chunks
133*280575beSPatrick McGehearty *     if src is half word aligned, ld half, ld word, ld half; pack
134*280575beSPatrick McGehearty *		into long word, store long words in 32-byte chunks
135*280575beSPatrick McGehearty *     if src is byte aligned, ld byte,half,word parts;  pack into long
136*280575beSPatrick McGehearty *	   word, store long words in 32-byte chunks
137*280575beSPatrick McGehearty *     move final 0-31 bytes according to src alignment;  go to short_exit
138*280575beSPatrick McGehearty * short_exit:
139*280575beSPatrick McGehearty *     restore trap handler if needed, retl
140*280575beSPatrick McGehearty * else {					   More than FP_COPY bytes
141*280575beSPatrick McGehearty *     set fault handler
142*280575beSPatrick McGehearty *     disable kernel preemption
143*280575beSPatrick McGehearty *     save registers, save FP registers if in use
144*280575beSPatrick McGehearty *     move bytes to align destination register on long word boundary
145*280575beSPatrick McGehearty *     if(src is on long word boundary) {	   src/dst aligned on 8 bytes
146*280575beSPatrick McGehearty *       align dst on 64 byte boundary;  use 8-way test for each of 8 possible
147*280575beSPatrick McGehearty *       src alignments relative to a 64 byte boundary to select the
148*280575beSPatrick McGehearty *       16-way unrolled loop (128 bytes) to use for
149*280575beSPatrick McGehearty *       block load, fmovd, block-init-store, block-store, fmovd operations
150*280575beSPatrick McGehearty *       then go to remain_stuff.
151*280575beSPatrick McGehearty * remain_stuff: move remaining bytes. go to long_exit
152*280575beSPatrick McGehearty *     } else {
153*280575beSPatrick McGehearty *       setup alignaddr for faligndata instructions
154*280575beSPatrick McGehearty *       align dst on 64 byte boundary; use 8-way test for each of 8 possible
155*280575beSPatrick McGehearty *       src alignments to nearest long word relative to 64 byte boundary to
156*280575beSPatrick McGehearty *       select the 8-way unrolled loop (64 bytes) to use for
157*280575beSPatrick McGehearty *       block load, falign, fmovd, block-store loop
158*280575beSPatrick McGehearty *	 (only use block-init-store when src/dst on 8 byte boundaries.)
159*280575beSPatrick McGehearty *       goto unalign_done.
160*280575beSPatrick McGehearty * unalign_done:
161*280575beSPatrick McGehearty *       move remaining bytes for unaligned cases. go to long_exit
162*280575beSPatrick McGehearty * long_exit:
163*280575beSPatrick McGehearty *       restore %gsr, FP regs (either from stack or set to zero),
164*280575beSPatrick McGehearty *       restore trap handler, check for kernel preemption request,
165*280575beSPatrick McGehearty *       handle if needed, ret.
166*280575beSPatrick McGehearty * }
167*280575beSPatrick McGehearty *
168*280575beSPatrick McGehearty * Other platforms include hw_bcopy_limit_[1248] to control the exact
169*280575beSPatrick McGehearty * point where the FP register code is used. On those platforms, the
170*280575beSPatrick McGehearty * FP register code did not leave data in L2 cache, potentially affecting
171*280575beSPatrick McGehearty * performance more than the gain/loss from the algorithm difference.
172*280575beSPatrick McGehearty * For N2/RF, block store places data in the L2 cache, so use or non-use
173*280575beSPatrick McGehearty * of the FP registers has no effect on L2 cache behavior.
174*280575beSPatrick McGehearty * The cost for testing hw_bcopy_limit_* according to different
175*280575beSPatrick McGehearty * alignments exceeds 50 cycles for all cases, even when hw_bcopy_limits
176*280575beSPatrick McGehearty * were not used. That cost was judged too high relative to the benefits,
177*280575beSPatrick McGehearty * so the hw_bcopy_limit option is omitted from this code.
1787c478bd9Sstevel@tonic-gate */
1797c478bd9Sstevel@tonic-gate
1807c478bd9Sstevel@tonic-gate/*
1817c478bd9Sstevel@tonic-gate * Less then or equal this number of bytes we will always copy byte-for-byte
1827c478bd9Sstevel@tonic-gate */
1837c478bd9Sstevel@tonic-gate#define	SMALL_LIMIT	7
1847c478bd9Sstevel@tonic-gate
1857c478bd9Sstevel@tonic-gate/*
186473b13d4Sae112802 * LOFAULT_SET : Flag set by kzero and kcopy to indicate that t_lofault
187473b13d4Sae112802 * handler was set
1887c478bd9Sstevel@tonic-gate */
1897c478bd9Sstevel@tonic-gate#define	LOFAULT_SET 2
1907c478bd9Sstevel@tonic-gate
1917c478bd9Sstevel@tonic-gate/*
1927c478bd9Sstevel@tonic-gate * This define is to align data for the unaligned source cases.
1937c478bd9Sstevel@tonic-gate * The data1, data2 and data3 is merged into data1 and data2.
1947c478bd9Sstevel@tonic-gate * The data3 is preserved for next merge.
1957c478bd9Sstevel@tonic-gate */
1967c478bd9Sstevel@tonic-gate#define	ALIGN_DATA(data1, data2, data3, lshift, rshift, tmp)	\
1977c478bd9Sstevel@tonic-gate	sllx	data1, lshift, data1				;\
1987c478bd9Sstevel@tonic-gate	srlx	data2, rshift, tmp				;\
1997c478bd9Sstevel@tonic-gate	or	data1, tmp, data1				;\
2007c478bd9Sstevel@tonic-gate	sllx	data2, lshift, data2				;\
2017c478bd9Sstevel@tonic-gate	srlx	data3, rshift, tmp				;\
2027c478bd9Sstevel@tonic-gate	or	data2, tmp, data2
2037c478bd9Sstevel@tonic-gate/*
2047c478bd9Sstevel@tonic-gate * This macro is to align the data. Basically it merges
2057c478bd9Sstevel@tonic-gate * data1 and data2 to form double word.
2067c478bd9Sstevel@tonic-gate */
2077c478bd9Sstevel@tonic-gate#define	ALIGN_DATA_EW(data1, data2, lshift, rshift, tmp)	\
2087c478bd9Sstevel@tonic-gate	sllx	data1, lshift, data1				;\
2097c478bd9Sstevel@tonic-gate	srlx	data2, rshift, tmp				;\
2107c478bd9Sstevel@tonic-gate	or	data1, tmp, data1
2117c478bd9Sstevel@tonic-gate
212340af271Swh94709#if !defined(NIAGARA_IMPL)
213340af271Swh94709/*
214340af271Swh94709 * Flags set in the lower bits of the t_lofault address:
215340af271Swh94709 * FPUSED_FLAG: The FP registers were in use and must be restored
216*280575beSPatrick McGehearty * LOFAULT_SET: Set for bcopy calls, cleared for kcopy calls
217340af271Swh94709 * COPY_FLAGS: Both of the above
218340af271Swh94709 *
219340af271Swh94709 * Other flags:
220340af271Swh94709 * KPREEMPT_FLAG: kpreempt needs to be called
221340af271Swh94709 */
222340af271Swh94709#define	FPUSED_FLAG	1
223*280575beSPatrick McGehearty#define	LOFAULT_SET	2
224*280575beSPatrick McGehearty#define	COPY_FLAGS	(FPUSED_FLAG | LOFAULT_SET)
225340af271Swh94709#define	KPREEMPT_FLAG	4
226340af271Swh94709
227340af271Swh94709#define	ALIGN_OFF_1_7			\
228340af271Swh94709	faligndata %d0, %d2, %d48	;\
229340af271Swh94709	faligndata %d2, %d4, %d50	;\
230340af271Swh94709	faligndata %d4, %d6, %d52	;\
231340af271Swh94709	faligndata %d6, %d8, %d54	;\
232340af271Swh94709	faligndata %d8, %d10, %d56	;\
233340af271Swh94709	faligndata %d10, %d12, %d58	;\
234340af271Swh94709	faligndata %d12, %d14, %d60	;\
235340af271Swh94709	faligndata %d14, %d16, %d62
236340af271Swh94709
237340af271Swh94709#define	ALIGN_OFF_8_15			\
238340af271Swh94709	faligndata %d2, %d4, %d48	;\
239340af271Swh94709	faligndata %d4, %d6, %d50	;\
240340af271Swh94709	faligndata %d6, %d8, %d52	;\
241340af271Swh94709	faligndata %d8, %d10, %d54	;\
242340af271Swh94709	faligndata %d10, %d12, %d56	;\
243340af271Swh94709	faligndata %d12, %d14, %d58	;\
244340af271Swh94709	faligndata %d14, %d16, %d60	;\
245340af271Swh94709	faligndata %d16, %d18, %d62
246340af271Swh94709
247340af271Swh94709#define	ALIGN_OFF_16_23			\
248340af271Swh94709	faligndata %d4, %d6, %d48	;\
249340af271Swh94709	faligndata %d6, %d8, %d50	;\
250340af271Swh94709	faligndata %d8, %d10, %d52	;\
251340af271Swh94709	faligndata %d10, %d12, %d54	;\
252340af271Swh94709	faligndata %d12, %d14, %d56	;\
253340af271Swh94709	faligndata %d14, %d16, %d58	;\
254340af271Swh94709	faligndata %d16, %d18, %d60	;\
255340af271Swh94709	faligndata %d18, %d20, %d62
256340af271Swh94709
257340af271Swh94709#define	ALIGN_OFF_24_31			\
258340af271Swh94709	faligndata %d6, %d8, %d48	;\
259340af271Swh94709	faligndata %d8, %d10, %d50	;\
260340af271Swh94709	faligndata %d10, %d12, %d52	;\
261340af271Swh94709	faligndata %d12, %d14, %d54	;\
262340af271Swh94709	faligndata %d14, %d16, %d56	;\
263340af271Swh94709	faligndata %d16, %d18, %d58	;\
264340af271Swh94709	faligndata %d18, %d20, %d60	;\
265340af271Swh94709	faligndata %d20, %d22, %d62
266340af271Swh94709
267340af271Swh94709#define	ALIGN_OFF_32_39			\
268340af271Swh94709	faligndata %d8, %d10, %d48	;\
269340af271Swh94709	faligndata %d10, %d12, %d50	;\
270340af271Swh94709	faligndata %d12, %d14, %d52	;\
271340af271Swh94709	faligndata %d14, %d16, %d54	;\
272340af271Swh94709	faligndata %d16, %d18, %d56	;\
273340af271Swh94709	faligndata %d18, %d20, %d58	;\
274340af271Swh94709	faligndata %d20, %d22, %d60	;\
275340af271Swh94709	faligndata %d22, %d24, %d62
276340af271Swh94709
277340af271Swh94709#define	ALIGN_OFF_40_47			\
278340af271Swh94709	faligndata %d10, %d12, %d48	;\
279340af271Swh94709	faligndata %d12, %d14, %d50	;\
280340af271Swh94709	faligndata %d14, %d16, %d52	;\
281340af271Swh94709	faligndata %d16, %d18, %d54	;\
282340af271Swh94709	faligndata %d18, %d20, %d56	;\
283340af271Swh94709	faligndata %d20, %d22, %d58	;\
284340af271Swh94709	faligndata %d22, %d24, %d60	;\
285340af271Swh94709	faligndata %d24, %d26, %d62
286340af271Swh94709
287340af271Swh94709#define	ALIGN_OFF_48_55			\
288340af271Swh94709	faligndata %d12, %d14, %d48	;\
289340af271Swh94709	faligndata %d14, %d16, %d50	;\
290340af271Swh94709	faligndata %d16, %d18, %d52	;\
291340af271Swh94709	faligndata %d18, %d20, %d54	;\
292340af271Swh94709	faligndata %d20, %d22, %d56	;\
293340af271Swh94709	faligndata %d22, %d24, %d58	;\
294340af271Swh94709	faligndata %d24, %d26, %d60	;\
295340af271Swh94709	faligndata %d26, %d28, %d62
296340af271Swh94709
297340af271Swh94709#define	ALIGN_OFF_56_63			\
298340af271Swh94709	faligndata %d14, %d16, %d48	;\
299340af271Swh94709	faligndata %d16, %d18, %d50	;\
300340af271Swh94709	faligndata %d18, %d20, %d52	;\
301340af271Swh94709	faligndata %d20, %d22, %d54	;\
302340af271Swh94709	faligndata %d22, %d24, %d56	;\
303340af271Swh94709	faligndata %d24, %d26, %d58	;\
304340af271Swh94709	faligndata %d26, %d28, %d60	;\
305340af271Swh94709	faligndata %d28, %d30, %d62
306340af271Swh94709
307*280575beSPatrick McGehearty/*
308*280575beSPatrick McGehearty * FP_COPY indicates the minimum number of bytes needed
309*280575beSPatrick McGehearty * to justify using FP/VIS-accelerated memory operations.
310*280575beSPatrick McGehearty * The FPBLK code assumes a minimum number of bytes are available
311*280575beSPatrick McGehearty * to be moved on entry.  Check that code carefully before
312*280575beSPatrick McGehearty * reducing FP_COPY below 256.
313*280575beSPatrick McGehearty */
314*280575beSPatrick McGehearty#define FP_COPY			584
315*280575beSPatrick McGehearty#define SHORTCOPY		7
316*280575beSPatrick McGehearty#define ASI_STBI_P		ASI_BLK_INIT_ST_QUAD_LDD_P
317*280575beSPatrick McGehearty#define ASI_STBI_AIUS		ASI_BLK_INIT_QUAD_LDD_AIUS
318*280575beSPatrick McGehearty#define CACHE_LINE		64
319340af271Swh94709#define	VIS_BLOCKSIZE		64
320340af271Swh94709
321340af271Swh94709/*
322340af271Swh94709 * Size of stack frame in order to accomodate a 64-byte aligned
323340af271Swh94709 * floating-point register save area and 2 64-bit temp locations.
324340af271Swh94709 * All copy functions use three quadrants of fp registers; to assure a
325340af271Swh94709 * block-aligned three block buffer in which to save we must reserve
326340af271Swh94709 * four blocks on stack.
327340af271Swh94709 *
328340af271Swh94709 *    _______________________________________ <-- %fp + STACK_BIAS
329340af271Swh94709 *    | We may need to preserve 3 quadrants |
330340af271Swh94709 *    | of fp regs, but since we do so with |
331340af271Swh94709 *    | BST/BLD we need room in which to    |
332340af271Swh94709 *    | align to VIS_BLOCKSIZE bytes.  So   |
333340af271Swh94709 *    | this area is 4 * VIS_BLOCKSIZE.     | <--  - SAVED_FPREGS_OFFSET
334340af271Swh94709 *    |-------------------------------------|
335340af271Swh94709 *    | 8 bytes to save %fprs		    | <--  - SAVED_FPRS_OFFSET
336340af271Swh94709 *    |-------------------------------------|
337340af271Swh94709 *    | 8 bytes to save %gsr		    | <--  - SAVED_GSR_OFFSET
338340af271Swh94709 *    ---------------------------------------
339340af271Swh94709 */
340340af271Swh94709#define HWCOPYFRAMESIZE		((VIS_BLOCKSIZE * (3 + 1)) + (2 * 8))
341340af271Swh94709#define SAVED_FPREGS_OFFSET	(VIS_BLOCKSIZE * 4)
342340af271Swh94709#define SAVED_FPREGS_ADJUST	((VIS_BLOCKSIZE * 3) + 1)
343340af271Swh94709#define SAVED_FPRS_OFFSET	(SAVED_FPREGS_OFFSET + 8)
344340af271Swh94709#define SAVED_GSR_OFFSET	(SAVED_FPRS_OFFSET + 8)
345340af271Swh94709
346340af271Swh94709/*
347340af271Swh94709 * In FP copies if we do not have preserved data to restore over
348340af271Swh94709 * the fp regs we used then we must zero those regs to avoid
349340af271Swh94709 * exposing portions of the data to later threads (data security).
350340af271Swh94709 */
351340af271Swh94709#define	FZERO				\
352340af271Swh94709	fzero	%f0			;\
353340af271Swh94709	fzero	%f2			;\
354340af271Swh94709	faddd	%f0, %f2, %f4		;\
355340af271Swh94709	fmuld	%f0, %f2, %f6		;\
356340af271Swh94709	faddd	%f0, %f2, %f8		;\
357340af271Swh94709	fmuld	%f0, %f2, %f10		;\
358340af271Swh94709	faddd	%f0, %f2, %f12		;\
359340af271Swh94709	fmuld	%f0, %f2, %f14		;\
360340af271Swh94709	faddd	%f0, %f2, %f16		;\
361340af271Swh94709	fmuld	%f0, %f2, %f18		;\
362340af271Swh94709	faddd	%f0, %f2, %f20		;\
363340af271Swh94709	fmuld	%f0, %f2, %f22		;\
364340af271Swh94709	faddd	%f0, %f2, %f24		;\
365340af271Swh94709	fmuld	%f0, %f2, %f26		;\
366340af271Swh94709	faddd	%f0, %f2, %f28		;\
367340af271Swh94709	fmuld	%f0, %f2, %f30		;\
368340af271Swh94709	faddd	%f0, %f2, %f48		;\
369340af271Swh94709	fmuld	%f0, %f2, %f50		;\
370340af271Swh94709	faddd	%f0, %f2, %f52		;\
371340af271Swh94709	fmuld	%f0, %f2, %f54		;\
372340af271Swh94709	faddd	%f0, %f2, %f56		;\
373340af271Swh94709	fmuld	%f0, %f2, %f58		;\
374340af271Swh94709	faddd	%f0, %f2, %f60		;\
375340af271Swh94709	fmuld	%f0, %f2, %f62
376340af271Swh94709
37759ac0c16Sdavemq#if !defined(lint)
37859ac0c16Sdavemq
379340af271Swh94709/*
380340af271Swh94709 * Macros to save and restore fp registers to/from the stack.
381340af271Swh94709 * Used to save and restore in-use fp registers when we want to use FP.
382340af271Swh94709 */
383340af271Swh94709#define BST_FP_TOSTACK(tmp1)					\
384340af271Swh94709	/* membar #Sync	*/					;\
385340af271Swh94709	add	%fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1	;\
386340af271Swh94709	and	tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */	;\
387340af271Swh94709	stda	%f0, [tmp1]ASI_BLK_P				;\
388340af271Swh94709	add	tmp1, VIS_BLOCKSIZE, tmp1			;\
389340af271Swh94709	stda	%f16, [tmp1]ASI_BLK_P				;\
390340af271Swh94709	add	tmp1, VIS_BLOCKSIZE, tmp1			;\
391340af271Swh94709	stda	%f48, [tmp1]ASI_BLK_P				;\
392340af271Swh94709	membar	#Sync
393340af271Swh94709
394340af271Swh94709#define	BLD_FP_FROMSTACK(tmp1)					\
395340af271Swh94709	/* membar #Sync - provided at copy completion */	;\
396340af271Swh94709	add	%fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1	;\
397340af271Swh94709	and	tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */	;\
398340af271Swh94709	ldda	[tmp1]ASI_BLK_P, %f0				;\
399340af271Swh94709	add	tmp1, VIS_BLOCKSIZE, tmp1			;\
400340af271Swh94709	ldda	[tmp1]ASI_BLK_P, %f16				;\
401340af271Swh94709	add	tmp1, VIS_BLOCKSIZE, tmp1			;\
402340af271Swh94709	ldda	[tmp1]ASI_BLK_P, %f48				;\
403340af271Swh94709	membar	#Sync
404340af271Swh94709#endif	/* NIAGARA_IMPL */
405340af271Swh94709
40659ac0c16Sdavemq#endif	/* lint */
4077c478bd9Sstevel@tonic-gate/*
4087c478bd9Sstevel@tonic-gate * Copy a block of storage, returning an error code if `from' or
4097c478bd9Sstevel@tonic-gate * `to' takes a kernel pagefault which cannot be resolved.
4107c478bd9Sstevel@tonic-gate * Returns errno value on pagefault error, 0 if all ok
4117c478bd9Sstevel@tonic-gate */
4127c478bd9Sstevel@tonic-gate
4137c478bd9Sstevel@tonic-gate#if defined(lint)
4147c478bd9Sstevel@tonic-gate
4157c478bd9Sstevel@tonic-gate/* ARGSUSED */
4167c478bd9Sstevel@tonic-gateint
4177c478bd9Sstevel@tonic-gatekcopy(const void *from, void *to, size_t count)
4187c478bd9Sstevel@tonic-gate{ return(0); }
4197c478bd9Sstevel@tonic-gate
4207c478bd9Sstevel@tonic-gate#else	/* lint */
4217c478bd9Sstevel@tonic-gate
4227c478bd9Sstevel@tonic-gate	.seg	".text"
4237c478bd9Sstevel@tonic-gate	.align	4
4247c478bd9Sstevel@tonic-gate
4257c478bd9Sstevel@tonic-gate	ENTRY(kcopy)
426340af271Swh94709#if !defined(NIAGARA_IMPL)
427*280575beSPatrick McGehearty	cmp	%o2, FP_COPY			! check for small copy/leaf case
428*280575beSPatrick McGehearty	bgt,pt	%ncc, .kcopy_more		!
429*280575beSPatrick McGehearty	nop
430*280575beSPatrick McGehearty.kcopy_small:					! setup error handler
431*280575beSPatrick McGehearty	sethi	%hi(.sm_copyerr), %o4
432*280575beSPatrick McGehearty	or	%o4, %lo(.sm_copyerr), %o4	! .sm_copyerr is lofault value
433*280575beSPatrick McGehearty	ldn	[THREAD_REG + T_LOFAULT], %o5	! save existing handler
434*280575beSPatrick McGehearty	! Note that we carefully do *not* flag the setting of
435*280575beSPatrick McGehearty	! t_lofault.
436*280575beSPatrick McGehearty	membar	#Sync				! sync error barrier
437*280575beSPatrick McGehearty	b	.sm_do_copy			! common code
438*280575beSPatrick McGehearty	stn	%o4, [THREAD_REG + T_LOFAULT]	! set t_lofault
439*280575beSPatrick McGehearty
440*280575beSPatrick McGehearty
441*280575beSPatrick McGehearty.kcopy_more:
442340af271Swh94709	save	%sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
443340af271Swh94709	sethi	%hi(.copyerr), %l7		! copyerr is lofault value
444340af271Swh94709	or	%l7, %lo(.copyerr), %l7
445340af271Swh94709	ldn	[THREAD_REG + T_LOFAULT], %o5	! save existing handler
446340af271Swh94709	! Note that we carefully do *not* flag the setting of
447340af271Swh94709	! t_lofault.
448340af271Swh94709	membar	#Sync				! sync error barrier
449340af271Swh94709	b	.do_copy			! common code
450340af271Swh94709	stn	%l7, [THREAD_REG + T_LOFAULT]	! set t_lofault
451340af271Swh94709
452340af271Swh94709/*
453*280575beSPatrick McGehearty * We got here because of a fault during a small kcopy or bcopy.
454*280575beSPatrick McGehearty * if a fault handler existed when bcopy was called.
455*280575beSPatrick McGehearty * No floating point registers are used by the small copies.
456*280575beSPatrick McGehearty * Small copies are from a leaf routine
457*280575beSPatrick McGehearty * Errno value is in %g1.
458*280575beSPatrick McGehearty */
459*280575beSPatrick McGehearty.sm_copyerr:
460*280575beSPatrick McGehearty	! The kcopy will always set a t_lofault handler. If it fires,
461*280575beSPatrick McGehearty	! we're expected to just return the error code and not to
462*280575beSPatrick McGehearty	! invoke any existing error handler. As far as bcopy is concerned,
463*280575beSPatrick McGehearty	! we only set t_lofault if there was an existing lofault handler.
464*280575beSPatrick McGehearty	! In that case we're expected to invoke the previously existing
465*280575beSPatrick McGehearty	! handler after resetting the t_lofault value.
466*280575beSPatrick McGehearty	btst	LOFAULT_SET, %o5
467*280575beSPatrick McGehearty	membar	#Sync				! sync error barrier
468*280575beSPatrick McGehearty	andn	%o5, LOFAULT_SET, %o5		! clear fault flag
469*280575beSPatrick McGehearty	bnz,pn	%ncc, 3f
470*280575beSPatrick McGehearty	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
471*280575beSPatrick McGehearty	retl
472*280575beSPatrick McGehearty	mov	%g1, %o0
473*280575beSPatrick McGehearty3:
474*280575beSPatrick McGehearty	! We're here via bcopy. There must have been an error handler
475*280575beSPatrick McGehearty	! in place otherwise we would have died a nasty death already.
476*280575beSPatrick McGehearty	jmp	%o5				! goto real handler
477*280575beSPatrick McGehearty	mov	%g0, %o0
478*280575beSPatrick McGehearty/*
479*280575beSPatrick McGehearty *  end of .sm_copyerr
480*280575beSPatrick McGehearty */
481*280575beSPatrick McGehearty
482*280575beSPatrick McGehearty/*
483340af271Swh94709 * We got here because of a fault during kcopy or bcopy if a fault
484340af271Swh94709 * handler existed when bcopy was called.
485*280575beSPatrick McGehearty * stack and fp registers need to be restored
486340af271Swh94709 * Errno value is in %g1.
487340af271Swh94709 */
488340af271Swh94709.copyerr:
489340af271Swh94709	sethi	%hi(.copyerr2), %l1
490340af271Swh94709	or	%l1, %lo(.copyerr2), %l1
491340af271Swh94709	membar	#Sync				! sync error barrier
492340af271Swh94709	stn	%l1, [THREAD_REG + T_LOFAULT]	! set t_lofault
493340af271Swh94709	btst	FPUSED_FLAG, %o5
494340af271Swh94709	bz,pt	%xcc, 1f
495*280575beSPatrick McGehearty	and	%o5, LOFAULT_SET, %l1	! copy flag to %l1
496340af271Swh94709
497340af271Swh94709	membar	#Sync				! sync error barrier
498*280575beSPatrick McGehearty	wr	%l5, 0, %gsr
499*280575beSPatrick McGehearty	btst	FPRS_FEF, %g5
500340af271Swh94709	bz,pt	%icc, 4f
501340af271Swh94709	nop
502340af271Swh94709	! restore fpregs from stack
503340af271Swh94709	BLD_FP_FROMSTACK(%o2)
504340af271Swh94709	ba,pt	%ncc, 2f
505*280575beSPatrick McGehearty	wr	%g5, 0, %fprs		! restore fprs
506340af271Swh947094:
507340af271Swh94709	FZERO
508*280575beSPatrick McGehearty	wr	%g5, 0, %fprs		! restore fprs
509340af271Swh947092:
510340af271Swh94709	ldn	[THREAD_REG + T_LWP], %o2
511340af271Swh94709	brnz,pt	%o2, 1f
512340af271Swh94709	nop
513340af271Swh94709
514340af271Swh94709	ldsb	[THREAD_REG + T_PREEMPT], %l0
515340af271Swh94709	deccc	%l0
516340af271Swh94709	bnz,pn	%ncc, 1f
517340af271Swh94709	stb	%l0, [THREAD_REG + T_PREEMPT]
518340af271Swh94709
519340af271Swh94709	! Check for a kernel preemption request
520340af271Swh94709	ldn	[THREAD_REG + T_CPU], %l0
521340af271Swh94709	ldub	[%l0 + CPU_KPRUNRUN], %l0
522340af271Swh94709	brnz,a,pt	%l0, 1f	! Need to call kpreempt?
523340af271Swh94709	or	%l1, KPREEMPT_FLAG, %l1	! If so, set the flag
524340af271Swh94709
525340af271Swh94709	! The kcopy will always set a t_lofault handler. If it fires,
526340af271Swh94709	! we're expected to just return the error code and not to
527340af271Swh94709	! invoke any existing error handler. As far as bcopy is concerned,
528340af271Swh94709	! we only set t_lofault if there was an existing lofault handler.
529340af271Swh94709	! In that case we're expected to invoke the previously existing
530*280575beSPatrick McGehearty	! handler after resetting the t_lofault value.
531340af271Swh947091:
532340af271Swh94709	andn	%o5, COPY_FLAGS, %o5	! remove flags from lofault address
533340af271Swh94709	membar	#Sync				! sync error barrier
534340af271Swh94709	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
535340af271Swh94709
536340af271Swh94709	! call kpreempt if necessary
537340af271Swh94709	btst	KPREEMPT_FLAG, %l1
538340af271Swh94709	bz,pt	%icc, 2f
539340af271Swh94709	nop
540340af271Swh94709	call	kpreempt
541340af271Swh94709	rdpr	%pil, %o0	! pass %pil
542340af271Swh947092:
543*280575beSPatrick McGehearty	btst	LOFAULT_SET, %l1
544340af271Swh94709	bnz,pn	%ncc, 3f
545340af271Swh94709	nop
546340af271Swh94709	ret
547340af271Swh94709	restore	%g1, 0, %o0
548340af271Swh947093:
549340af271Swh94709	! We're here via bcopy. There must have been an error handler
550340af271Swh94709	! in place otherwise we would have died a nasty death already.
551340af271Swh94709	jmp	%o5				! goto real handler
552340af271Swh94709	restore	%g0, 0, %o0			! dispose of copy window
553340af271Swh94709
554340af271Swh94709/*
555340af271Swh94709 * We got here because of a fault in .copyerr.  We can't safely restore fp
556340af271Swh94709 * state, so we panic.
557340af271Swh94709 */
558340af271Swh94709fp_panic_msg:
559340af271Swh94709	.asciz	"Unable to restore fp state after copy operation"
560340af271Swh94709
561340af271Swh94709	.align	4
562340af271Swh94709.copyerr2:
563340af271Swh94709	set	fp_panic_msg, %o0
564340af271Swh94709	call	panic
565340af271Swh94709	nop
566*280575beSPatrick McGehearty/*
567*280575beSPatrick McGehearty *  end of .copyerr
568*280575beSPatrick McGehearty */
569*280575beSPatrick McGehearty
570340af271Swh94709#else	/* NIAGARA_IMPL */
571473b13d4Sae112802	save	%sp, -SA(MINFRAME), %sp
572473b13d4Sae112802	set	.copyerr, %l7			! copyerr is lofault value
573473b13d4Sae112802	ldn	[THREAD_REG + T_LOFAULT], %o5	! save existing handler
574473b13d4Sae112802	or	%o5, LOFAULT_SET, %o5
575473b13d4Sae112802	membar	#Sync				! sync error barrier
5767c478bd9Sstevel@tonic-gate	b	.do_copy			! common code
577473b13d4Sae112802	stn	%l7, [THREAD_REG + T_LOFAULT]	! set t_lofault
5787c478bd9Sstevel@tonic-gate
5797c478bd9Sstevel@tonic-gate/*
5807c478bd9Sstevel@tonic-gate * We got here because of a fault during kcopy.
5817c478bd9Sstevel@tonic-gate * Errno value is in %g1.
5827c478bd9Sstevel@tonic-gate */
5837c478bd9Sstevel@tonic-gate.copyerr:
584473b13d4Sae112802	! The kcopy() *always* sets a t_lofault handler and it ORs LOFAULT_SET
585473b13d4Sae112802	! into %o5 to indicate it has set t_lofault handler. Need to clear
586473b13d4Sae112802	! LOFAULT_SET flag before restoring the error handler.
587473b13d4Sae112802	andn	%o5, LOFAULT_SET, %o5
5887c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
5897c478bd9Sstevel@tonic-gate	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
5907c478bd9Sstevel@tonic-gate	ret
5917c478bd9Sstevel@tonic-gate	restore	%g1, 0, %o0
592340af271Swh94709#endif	/* NIAGARA_IMPL */
5937c478bd9Sstevel@tonic-gate
5947c478bd9Sstevel@tonic-gate	SET_SIZE(kcopy)
5957c478bd9Sstevel@tonic-gate#endif	/* lint */
5967c478bd9Sstevel@tonic-gate
5977c478bd9Sstevel@tonic-gate
5987c478bd9Sstevel@tonic-gate/*
5997c478bd9Sstevel@tonic-gate * Copy a block of storage - must not overlap (from + len <= to).
6007c478bd9Sstevel@tonic-gate */
6017c478bd9Sstevel@tonic-gate#if defined(lint)
6027c478bd9Sstevel@tonic-gate
6037c478bd9Sstevel@tonic-gate/* ARGSUSED */
6047c478bd9Sstevel@tonic-gatevoid
6057c478bd9Sstevel@tonic-gatebcopy(const void *from, void *to, size_t count)
6067c478bd9Sstevel@tonic-gate{}
6077c478bd9Sstevel@tonic-gate
6087c478bd9Sstevel@tonic-gate#else	/* lint */
6097c478bd9Sstevel@tonic-gate
6107c478bd9Sstevel@tonic-gate	ENTRY(bcopy)
611340af271Swh94709#if !defined(NIAGARA_IMPL)
612*280575beSPatrick McGehearty	cmp	%o2, FP_COPY			! check for small copy/leaf case
613*280575beSPatrick McGehearty	bgt,pt	%ncc, .bcopy_more		!
614*280575beSPatrick McGehearty	nop
615*280575beSPatrick McGehearty.bcopy_small:					! setup error handler
616*280575beSPatrick McGehearty	ldn	[THREAD_REG + T_LOFAULT], %o5	! save existing handler
617*280575beSPatrick McGehearty	tst	%o5
618*280575beSPatrick McGehearty	bz,pt	%icc, .sm_do_copy
619*280575beSPatrick McGehearty	sethi	%hi(.sm_copyerr), %o4
620*280575beSPatrick McGehearty	or	%o4, %lo(.sm_copyerr), %o4	! .sm_copyerr is lofault value
621*280575beSPatrick McGehearty	membar	#Sync				! sync error barrier
622*280575beSPatrick McGehearty	stn	%o4, [THREAD_REG + T_LOFAULT]	! set t_lofault
623*280575beSPatrick McGehearty	or	%o5, LOFAULT_SET, %o5		! Error should trampoline
624*280575beSPatrick McGehearty.sm_do_copy:
625*280575beSPatrick McGehearty	mov	%o0, %g1		! save %o0
626*280575beSPatrick McGehearty	cmp	%o2, SHORTCOPY		! make sure there is enough to align
627*280575beSPatrick McGehearty	ble,pt	%ncc, .bc_smallest
628*280575beSPatrick McGehearty	andcc	%o1, 0x7, %o3		! is dest long aligned
629*280575beSPatrick McGehearty	bnz,pn	%ncc, .bc_align
630*280575beSPatrick McGehearty	andcc	%o1, 1, %o3		! is dest byte aligned
631*280575beSPatrick McGehearty
632*280575beSPatrick McGehearty! Destination is long word aligned
633*280575beSPatrick McGehearty.bc_al_src:
634*280575beSPatrick McGehearty	andcc	%o0, 7, %o3
635*280575beSPatrick McGehearty	brnz,pt	%o3, .bc_src_dst_unal8
636*280575beSPatrick McGehearty	nop
637*280575beSPatrick McGehearty/*
638*280575beSPatrick McGehearty * Special case for handling when src and dest are both long word aligned
639*280575beSPatrick McGehearty * and total data to move is less than FP_COPY bytes
640*280575beSPatrick McGehearty * Also handles finish up for large block moves, so may be less than 32 bytes
641*280575beSPatrick McGehearty */
642*280575beSPatrick McGehearty.bc_medlong:
643*280575beSPatrick McGehearty	subcc	%o2, 31, %o2		! adjust length to allow cc test
644*280575beSPatrick McGehearty	ble,pt	%ncc, .bc_medl31
645*280575beSPatrick McGehearty	nop
646*280575beSPatrick McGehearty.bc_medl32:
647*280575beSPatrick McGehearty	ldx	[%o0], %o4		! move 32 bytes
648*280575beSPatrick McGehearty	subcc	%o2, 32, %o2		! decrement length count by 32
649*280575beSPatrick McGehearty	stx	%o4, [%o1]
650*280575beSPatrick McGehearty	ldx	[%o0+8], %o4
651*280575beSPatrick McGehearty	stx	%o4, [%o1+8]
652*280575beSPatrick McGehearty	ldx	[%o0+16], %o4
653*280575beSPatrick McGehearty	add	%o0, 32, %o0		! increase src ptr by 32
654*280575beSPatrick McGehearty	stx	%o4, [%o1+16]
655*280575beSPatrick McGehearty	ldx	[%o0-8], %o4
656*280575beSPatrick McGehearty	add	%o1, 32, %o1		! increase dst ptr by 32
657*280575beSPatrick McGehearty	bgu,pt	%ncc, .bc_medl32	! repeat if at least 32 bytes left
658*280575beSPatrick McGehearty	stx	%o4, [%o1-8]
659*280575beSPatrick McGehearty.bc_medl31:
660*280575beSPatrick McGehearty	addcc	%o2, 24, %o2		! adjust count to be off by 7
661*280575beSPatrick McGehearty	ble,pt	%ncc, .bc_medl7		! skip if 7 or fewer bytes left
662*280575beSPatrick McGehearty	nop
663*280575beSPatrick McGehearty.bc_medl8:
664*280575beSPatrick McGehearty	ldx	[%o0], %o4		! move 8 bytes
665*280575beSPatrick McGehearty	add	%o0, 8, %o0		! increase src ptr by 8
666*280575beSPatrick McGehearty	subcc	%o2, 8, %o2		! decrease count by 8
667*280575beSPatrick McGehearty	add	%o1, 8, %o1		! increase dst ptr by 8
668*280575beSPatrick McGehearty	bgu,pt	%ncc, .bc_medl8
669*280575beSPatrick McGehearty	stx	%o4, [%o1-8]
670*280575beSPatrick McGehearty.bc_medl7:
671*280575beSPatrick McGehearty	addcc	%o2, 7, %o2		! finish adjustment of remaining count
672*280575beSPatrick McGehearty	bnz,pt	%ncc, .bc_small4	! do final bytes if not finished
673*280575beSPatrick McGehearty
674*280575beSPatrick McGehearty.bc_smallx:				! finish up and exit
675*280575beSPatrick McGehearty	tst	%o5
676*280575beSPatrick McGehearty	bz,pt	%ncc, .bc_sm_done
677*280575beSPatrick McGehearty	andn	%o5, COPY_FLAGS, %o5	! remove flags from lofault address
678*280575beSPatrick McGehearty	membar	#Sync			! sync error barrier
679*280575beSPatrick McGehearty	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
680*280575beSPatrick McGehearty.bc_sm_done:
681*280575beSPatrick McGehearty	retl
682*280575beSPatrick McGehearty	mov	%g0, %o0
683*280575beSPatrick McGehearty
684*280575beSPatrick McGehearty.bc_small4:
685*280575beSPatrick McGehearty	cmp	%o2, 4
686*280575beSPatrick McGehearty	blt,pt	%ncc, .bc_small3x	! skip if less than 4 bytes left
687*280575beSPatrick McGehearty	nop				!
688*280575beSPatrick McGehearty	ld	[%o0], %o4		! move 4 bytes
689*280575beSPatrick McGehearty	add	%o0, 4, %o0		! increase src ptr by 4
690*280575beSPatrick McGehearty	add	%o1, 4, %o1		! increase dst ptr by 4
691*280575beSPatrick McGehearty	subcc	%o2, 4, %o2		! decrease count by 4
692*280575beSPatrick McGehearty	bz,pt	%ncc, .bc_smallx
693*280575beSPatrick McGehearty	stw	%o4, [%o1-4]
694*280575beSPatrick McGehearty
695*280575beSPatrick McGehearty.bc_small3x:				! Exactly 1, 2, or 3 bytes remain
696*280575beSPatrick McGehearty	subcc	%o2, 1, %o2		! reduce count for cc test
697*280575beSPatrick McGehearty	ldub	[%o0], %o4		! load one byte
698*280575beSPatrick McGehearty	bz,pt	%ncc, .bc_smallx
699*280575beSPatrick McGehearty	stb	%o4, [%o1]		! store one byte
700*280575beSPatrick McGehearty	ldub	[%o0+1], %o4		! load second byte
701*280575beSPatrick McGehearty	subcc	%o2, 1, %o2
702*280575beSPatrick McGehearty	bz,pt	%ncc, .bc_smallx
703*280575beSPatrick McGehearty	stb	%o4, [%o1+1]		! store second byte
704*280575beSPatrick McGehearty	ldub	[%o0+2], %o4		! load third byte
705*280575beSPatrick McGehearty	ba	.bc_smallx
706*280575beSPatrick McGehearty	stb	%o4, [%o1+2]		! store third byte
707*280575beSPatrick McGehearty
708*280575beSPatrick McGehearty.bc_smallest:				! 7 or fewer bytes remain
709*280575beSPatrick McGehearty	tst	%o2
710*280575beSPatrick McGehearty	bz,pt	%ncc, .bc_smallx
711*280575beSPatrick McGehearty	cmp	%o2, 4
712*280575beSPatrick McGehearty	blt,pt	%ncc, .bc_small3x
713*280575beSPatrick McGehearty	nop
714*280575beSPatrick McGehearty	ldub	[%o0], %o4		! read byte
715*280575beSPatrick McGehearty	subcc	%o2, 4, %o2		! reduce count by 4
716*280575beSPatrick McGehearty	stb	%o4, [%o1]		! write byte
717*280575beSPatrick McGehearty	ldub	[%o0+1], %o4		! repeat for total of 4 bytes
718*280575beSPatrick McGehearty	add	%o0, 4, %o0		! advance src by 4
719*280575beSPatrick McGehearty	stb	%o4, [%o1+1]
720*280575beSPatrick McGehearty	ldub	[%o0-2], %o4
721*280575beSPatrick McGehearty	add	%o1, 4, %o1		! advance dst by 4
722*280575beSPatrick McGehearty	stb	%o4, [%o1-2]
723*280575beSPatrick McGehearty	ldub	[%o0-1], %o4
724*280575beSPatrick McGehearty	bnz,pt	%ncc, .bc_small3x
725*280575beSPatrick McGehearty	stb	%o4, [%o1-1]
726*280575beSPatrick McGehearty	ba	.bc_smallx
727*280575beSPatrick McGehearty	nop
728*280575beSPatrick McGehearty
729*280575beSPatrick McGehearty/*
730*280575beSPatrick McGehearty * Align destination to long word boundary
731*280575beSPatrick McGehearty */
732*280575beSPatrick McGehearty.bc_align:				! byte align test in prior branch delay
733*280575beSPatrick McGehearty	bnz,pt	%ncc, .bc_al_d1
734*280575beSPatrick McGehearty.bc_al_d1f:				! dest is now half word aligned
735*280575beSPatrick McGehearty	andcc	%o1, 2, %o3
736*280575beSPatrick McGehearty	bnz,pt	%ncc, .bc_al_d2
737*280575beSPatrick McGehearty.bc_al_d2f:				! dest is now word aligned
738*280575beSPatrick McGehearty	andcc	%o1, 4, %o3		! is dest longword aligned?
739*280575beSPatrick McGehearty	bz,pt	%ncc, .bc_al_src
740*280575beSPatrick McGehearty	nop
741*280575beSPatrick McGehearty.bc_al_d4:				! dest is word aligned;  src is unknown
742*280575beSPatrick McGehearty	ldub	[%o0], %o4		! move a word (src align unknown)
743*280575beSPatrick McGehearty	ldub	[%o0+1], %o3
744*280575beSPatrick McGehearty	sll	%o4, 24, %o4		! position
745*280575beSPatrick McGehearty	sll	%o3, 16, %o3		! position
746*280575beSPatrick McGehearty	or	%o4, %o3, %o3		! merge
747*280575beSPatrick McGehearty	ldub	[%o0+2], %o4
748*280575beSPatrick McGehearty	sll	%o4, 8, %o4		! position
749*280575beSPatrick McGehearty	or	%o4, %o3, %o3		! merge
750*280575beSPatrick McGehearty	ldub	[%o0+3], %o4
751*280575beSPatrick McGehearty	or	%o4, %o3, %o4		! merge
752*280575beSPatrick McGehearty	stw	%o4,[%o1]		! store four bytes
753*280575beSPatrick McGehearty	add	%o0, 4, %o0		! adjust src by 4
754*280575beSPatrick McGehearty	add	%o1, 4, %o1		! adjust dest by 4
755*280575beSPatrick McGehearty	sub	%o2, 4, %o2		! adjust count by 4
756*280575beSPatrick McGehearty	andcc	%o0, 7, %o3		! check for src long word alignment
757*280575beSPatrick McGehearty	brz,pt	%o3, .bc_medlong
758*280575beSPatrick McGehearty.bc_src_dst_unal8:
759*280575beSPatrick McGehearty	! dst is 8-byte aligned, src is not
760*280575beSPatrick McGehearty	! Size is less than FP_COPY
761*280575beSPatrick McGehearty	! Following code is to select for alignment
762*280575beSPatrick McGehearty	andcc	%o0, 0x3, %o3		! test word alignment
763*280575beSPatrick McGehearty	bz,pt	%ncc, .bc_medword
764*280575beSPatrick McGehearty	nop
765*280575beSPatrick McGehearty	andcc	%o0, 0x1, %o3		! test halfword alignment
766*280575beSPatrick McGehearty	bnz,pt	%ncc, .bc_med_byte	! go to byte move if not halfword
767*280575beSPatrick McGehearty	andcc	%o0, 0x2, %o3		! test which byte alignment
768*280575beSPatrick McGehearty	ba	.bc_medhalf
769*280575beSPatrick McGehearty	nop
770*280575beSPatrick McGehearty.bc_al_d1:				! align dest to half word
771*280575beSPatrick McGehearty	ldub	[%o0], %o4		! move a byte
772*280575beSPatrick McGehearty	add	%o0, 1, %o0
773*280575beSPatrick McGehearty	stb	%o4, [%o1]
774*280575beSPatrick McGehearty	add	%o1, 1, %o1
775*280575beSPatrick McGehearty	andcc	%o1, 2, %o3
776*280575beSPatrick McGehearty	bz,pt	%ncc, .bc_al_d2f
777*280575beSPatrick McGehearty	sub	%o2, 1, %o2
778*280575beSPatrick McGehearty.bc_al_d2:				! align dest to word
779*280575beSPatrick McGehearty	ldub	[%o0], %o4		! move a half-word (src align unknown)
780*280575beSPatrick McGehearty	ldub	[%o0+1], %o3
781*280575beSPatrick McGehearty	sll	%o4, 8, %o4		! position
782*280575beSPatrick McGehearty	or	%o4, %o3, %o4		! merge
783*280575beSPatrick McGehearty	sth	%o4, [%o1]
784*280575beSPatrick McGehearty	add	%o0, 2, %o0
785*280575beSPatrick McGehearty	add	%o1, 2, %o1
786*280575beSPatrick McGehearty	andcc	%o1, 4, %o3		! is dest longword aligned?
787*280575beSPatrick McGehearty	bz,pt	%ncc, .bc_al_src
788*280575beSPatrick McGehearty	sub	%o2, 2, %o2
789*280575beSPatrick McGehearty	ba	.bc_al_d4
790*280575beSPatrick McGehearty	nop
791*280575beSPatrick McGehearty/*
792*280575beSPatrick McGehearty * Handle all cases where src and dest are aligned on word
793*280575beSPatrick McGehearty * boundaries. Use unrolled loops for better performance.
794*280575beSPatrick McGehearty * This option wins over standard large data move when
795*280575beSPatrick McGehearty * source and destination is in cache for medium
796*280575beSPatrick McGehearty * to short data moves.
797*280575beSPatrick McGehearty */
798*280575beSPatrick McGehearty.bc_medword:
799*280575beSPatrick McGehearty	subcc	%o2, 31, %o2		! adjust length to allow cc test
800*280575beSPatrick McGehearty	ble,pt	%ncc, .bc_medw31
801*280575beSPatrick McGehearty	nop
802*280575beSPatrick McGehearty.bc_medw32:
803*280575beSPatrick McGehearty	ld	[%o0], %o4		! move a block of 32 bytes
804*280575beSPatrick McGehearty	stw	%o4, [%o1]
805*280575beSPatrick McGehearty	ld	[%o0+4], %o4
806*280575beSPatrick McGehearty	stw	%o4, [%o1+4]
807*280575beSPatrick McGehearty	ld	[%o0+8], %o4
808*280575beSPatrick McGehearty	stw	%o4, [%o1+8]
809*280575beSPatrick McGehearty	ld	[%o0+12], %o4
810*280575beSPatrick McGehearty	stw	%o4, [%o1+12]
811*280575beSPatrick McGehearty	ld	[%o0+16], %o4
812*280575beSPatrick McGehearty	stw	%o4, [%o1+16]
813*280575beSPatrick McGehearty	ld	[%o0+20], %o4
814*280575beSPatrick McGehearty	subcc	%o2, 32, %o2		! decrement length count
815*280575beSPatrick McGehearty	stw	%o4, [%o1+20]
816*280575beSPatrick McGehearty	ld	[%o0+24], %o4
817*280575beSPatrick McGehearty	add	%o0, 32, %o0		! increase src ptr by 32
818*280575beSPatrick McGehearty	stw	%o4, [%o1+24]
819*280575beSPatrick McGehearty	ld	[%o0-4], %o4
820*280575beSPatrick McGehearty	add	%o1, 32, %o1		! increase dst ptr by 32
821*280575beSPatrick McGehearty	bgu,pt	%ncc, .bc_medw32	! repeat if at least 32 bytes left
822*280575beSPatrick McGehearty	stw	%o4, [%o1-4]
823*280575beSPatrick McGehearty.bc_medw31:
824*280575beSPatrick McGehearty	addcc	%o2, 24, %o2		! adjust count to be off by 7
825*280575beSPatrick McGehearty	ble,pt	%ncc, .bc_medw7		! skip if 7 or fewer bytes left
826*280575beSPatrick McGehearty	nop				!
827*280575beSPatrick McGehearty.bc_medw15:
828*280575beSPatrick McGehearty	ld	[%o0], %o4		! move a block of 8 bytes
829*280575beSPatrick McGehearty	subcc	%o2, 8, %o2		! decrement length count
830*280575beSPatrick McGehearty	stw	%o4, [%o1]
831*280575beSPatrick McGehearty	add	%o0, 8, %o0		! increase src ptr by 8
832*280575beSPatrick McGehearty	ld	[%o0-4], %o4
833*280575beSPatrick McGehearty	add	%o1, 8, %o1		! increase dst ptr by 8
834*280575beSPatrick McGehearty	bgu,pt	%ncc, .bc_medw15
835*280575beSPatrick McGehearty	stw	%o4, [%o1-4]
836*280575beSPatrick McGehearty.bc_medw7:
837*280575beSPatrick McGehearty	addcc	%o2, 7, %o2		! finish adjustment of remaining count
838*280575beSPatrick McGehearty	bz,pt	%ncc, .bc_smallx	! exit if finished
839*280575beSPatrick McGehearty	cmp	%o2, 4
840*280575beSPatrick McGehearty	blt,pt	%ncc, .bc_small3x	! skip if less than 4 bytes left
841*280575beSPatrick McGehearty	nop				!
842*280575beSPatrick McGehearty	ld	[%o0], %o4		! move 4 bytes
843*280575beSPatrick McGehearty	add	%o0, 4, %o0		! increase src ptr by 4
844*280575beSPatrick McGehearty	add	%o1, 4, %o1		! increase dst ptr by 4
845*280575beSPatrick McGehearty	subcc	%o2, 4, %o2		! decrease count by 4
846*280575beSPatrick McGehearty	bnz	.bc_small3x
847*280575beSPatrick McGehearty	stw	%o4, [%o1-4]
848*280575beSPatrick McGehearty	ba	.bc_smallx
849*280575beSPatrick McGehearty	nop
850*280575beSPatrick McGehearty
851*280575beSPatrick McGehearty.bc_medhalf:
852*280575beSPatrick McGehearty	subcc	%o2, 31, %o2		! adjust length to allow cc test
853*280575beSPatrick McGehearty	ble,pt	%ncc, .bc_medh31
854*280575beSPatrick McGehearty	nop
855*280575beSPatrick McGehearty.bc_medh32:				! load and store block of 32 bytes
856*280575beSPatrick McGehearty	subcc	%o2, 32, %o2		! decrement length count
857*280575beSPatrick McGehearty
858*280575beSPatrick McGehearty	lduh	[%o0], %o4		! move 32 bytes
859*280575beSPatrick McGehearty	lduw	[%o0+2], %o3
860*280575beSPatrick McGehearty	sllx	%o4, 48, %o4
861*280575beSPatrick McGehearty	sllx	%o3, 16, %o3
862*280575beSPatrick McGehearty	or	%o4, %o3, %o3
863*280575beSPatrick McGehearty	lduh	[%o0+6], %o4
864*280575beSPatrick McGehearty	or	%o4, %o3, %o4
865*280575beSPatrick McGehearty	stx	%o4, [%o1]
866*280575beSPatrick McGehearty
867*280575beSPatrick McGehearty	lduh	[%o0+8], %o4
868*280575beSPatrick McGehearty	lduw	[%o0+10], %o3
869*280575beSPatrick McGehearty	sllx	%o4, 48, %o4
870*280575beSPatrick McGehearty	sllx	%o3, 16, %o3
871*280575beSPatrick McGehearty	or	%o4, %o3, %o3
872*280575beSPatrick McGehearty	lduh	[%o0+14], %o4
873*280575beSPatrick McGehearty	or	%o4, %o3, %o4
874*280575beSPatrick McGehearty	stx	%o4, [%o1+8]
875*280575beSPatrick McGehearty
876*280575beSPatrick McGehearty	lduh	[%o0+16], %o4
877*280575beSPatrick McGehearty	lduw	[%o0+18], %o3
878*280575beSPatrick McGehearty	sllx	%o4, 48, %o4
879*280575beSPatrick McGehearty	sllx	%o3, 16, %o3
880*280575beSPatrick McGehearty	or	%o4, %o3, %o3
881*280575beSPatrick McGehearty	lduh	[%o0+22], %o4
882*280575beSPatrick McGehearty	or	%o4, %o3, %o4
883*280575beSPatrick McGehearty	stx	%o4, [%o1+16]
884*280575beSPatrick McGehearty
885*280575beSPatrick McGehearty	add	%o0, 32, %o0		! increase src ptr by 32
886*280575beSPatrick McGehearty	add	%o1, 32, %o1		! increase dst ptr by 32
887*280575beSPatrick McGehearty
888*280575beSPatrick McGehearty	lduh	[%o0-8], %o4
889*280575beSPatrick McGehearty	lduw	[%o0-6], %o3
890*280575beSPatrick McGehearty	sllx	%o4, 48, %o4
891*280575beSPatrick McGehearty	sllx	%o3, 16, %o3
892*280575beSPatrick McGehearty	or	%o4, %o3, %o3
893*280575beSPatrick McGehearty	lduh	[%o0-2], %o4
894*280575beSPatrick McGehearty	or	%o3, %o4, %o4
895*280575beSPatrick McGehearty	bgu,pt	%ncc, .bc_medh32	! repeat if at least 32 bytes left
896*280575beSPatrick McGehearty	stx	%o4, [%o1-8]
897*280575beSPatrick McGehearty
898*280575beSPatrick McGehearty.bc_medh31:
899*280575beSPatrick McGehearty	addcc	%o2, 24, %o2		! adjust count to be off by 7
900*280575beSPatrick McGehearty	ble,pt	%ncc, .bc_medh7		! skip if 7 or fewer bytes left
901*280575beSPatrick McGehearty	nop				!
902*280575beSPatrick McGehearty.bc_medh15:
903*280575beSPatrick McGehearty	lduh	[%o0], %o4		! move 16 bytes
904*280575beSPatrick McGehearty	subcc	%o2, 8, %o2		! decrement length count
905*280575beSPatrick McGehearty	lduw	[%o0+2], %o3
906*280575beSPatrick McGehearty	sllx	%o4, 48, %o4
907*280575beSPatrick McGehearty	sllx	%o3, 16, %o3
908*280575beSPatrick McGehearty	or	%o4, %o3, %o3
909*280575beSPatrick McGehearty	add	%o1, 8, %o1		! increase dst ptr by 8
910*280575beSPatrick McGehearty	lduh	[%o0+6], %o4
911*280575beSPatrick McGehearty	add	%o0, 8, %o0		! increase src ptr by 8
912*280575beSPatrick McGehearty	or	%o4, %o3, %o4
913*280575beSPatrick McGehearty	bgu,pt	%ncc, .bc_medh15
914*280575beSPatrick McGehearty	stx	%o4, [%o1-8]
915*280575beSPatrick McGehearty.bc_medh7:
916*280575beSPatrick McGehearty	addcc	%o2, 7, %o2		! finish adjustment of remaining count
917*280575beSPatrick McGehearty	bz,pt	%ncc, .bc_smallx	! exit if finished
918*280575beSPatrick McGehearty	cmp	%o2, 4
919*280575beSPatrick McGehearty	blt,pt	%ncc, .bc_small3x	! skip if less than 4 bytes left
920*280575beSPatrick McGehearty	nop				!
921*280575beSPatrick McGehearty	lduh	[%o0], %o4
922*280575beSPatrick McGehearty	sll	%o4, 16, %o4
923*280575beSPatrick McGehearty	lduh	[%o0+2], %o3
924*280575beSPatrick McGehearty	or	%o3, %o4, %o4
925*280575beSPatrick McGehearty	subcc	%o2, 4, %o2
926*280575beSPatrick McGehearty	add	%o0, 4, %o0
927*280575beSPatrick McGehearty	add	%o1, 4, %o1
928*280575beSPatrick McGehearty	bnz	.bc_small3x
929*280575beSPatrick McGehearty	stw	%o4, [%o1-4]
930*280575beSPatrick McGehearty	ba	.bc_smallx
931*280575beSPatrick McGehearty	nop
932*280575beSPatrick McGehearty
933*280575beSPatrick McGehearty	.align 16
934*280575beSPatrick McGehearty.bc_med_byte:
935*280575beSPatrick McGehearty	bnz,pt	%ncc, .bc_medbh32a	! go to correct byte move
936*280575beSPatrick McGehearty	subcc	%o2, 31, %o2		! adjust length to allow cc test
937*280575beSPatrick McGehearty	ble,pt	%ncc, .bc_medb31
938*280575beSPatrick McGehearty	nop
939*280575beSPatrick McGehearty.bc_medb32:				! Alignment 1 or 5
940*280575beSPatrick McGehearty	subcc	%o2, 32, %o2		! decrement length count
941*280575beSPatrick McGehearty
942*280575beSPatrick McGehearty	ldub	[%o0], %o4		! load and store a block of 32 bytes
943*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
944*280575beSPatrick McGehearty	lduh	[%o0+1], %o4
945*280575beSPatrick McGehearty	sllx	%o4, 40, %o4
946*280575beSPatrick McGehearty	or	%o4, %o3, %o3
947*280575beSPatrick McGehearty	lduw	[%o0+3], %o4
948*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
949*280575beSPatrick McGehearty	or	%o4, %o3, %o3
950*280575beSPatrick McGehearty	ldub	[%o0+7], %o4
951*280575beSPatrick McGehearty	or	%o4, %o3, %o4
952*280575beSPatrick McGehearty	stx	%o4, [%o1]
953*280575beSPatrick McGehearty
954*280575beSPatrick McGehearty	ldub	[%o0+8], %o4
955*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
956*280575beSPatrick McGehearty	lduh	[%o0+9], %o4
957*280575beSPatrick McGehearty	sllx	%o4, 40, %o4
958*280575beSPatrick McGehearty	or	%o4, %o3, %o3
959*280575beSPatrick McGehearty	lduw	[%o0+11], %o4
960*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
961*280575beSPatrick McGehearty	or	%o4, %o3, %o3
962*280575beSPatrick McGehearty	ldub	[%o0+15], %o4
963*280575beSPatrick McGehearty	or	%o4, %o3, %o4
964*280575beSPatrick McGehearty	stx	%o4, [%o1+8]
965*280575beSPatrick McGehearty
966*280575beSPatrick McGehearty	ldub	[%o0+16], %o4
967*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
968*280575beSPatrick McGehearty	lduh	[%o0+17], %o4
969*280575beSPatrick McGehearty	sllx	%o4, 40, %o4
970*280575beSPatrick McGehearty	or	%o4, %o3, %o3
971*280575beSPatrick McGehearty	lduw	[%o0+19], %o4
972*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
973*280575beSPatrick McGehearty	or	%o4, %o3, %o3
974*280575beSPatrick McGehearty	ldub	[%o0+23], %o4
975*280575beSPatrick McGehearty	or	%o4, %o3, %o4
976*280575beSPatrick McGehearty	stx	%o4, [%o1+16]
977*280575beSPatrick McGehearty
978*280575beSPatrick McGehearty	add	%o0, 32, %o0		! increase src ptr by 32
979*280575beSPatrick McGehearty	add	%o1, 32, %o1		! increase dst ptr by 32
980*280575beSPatrick McGehearty
981*280575beSPatrick McGehearty	ldub	[%o0-8], %o4
982*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
983*280575beSPatrick McGehearty	lduh	[%o0-7], %o4
984*280575beSPatrick McGehearty	sllx	%o4, 40, %o4
985*280575beSPatrick McGehearty	or	%o4, %o3, %o3
986*280575beSPatrick McGehearty	lduw	[%o0-5], %o4
987*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
988*280575beSPatrick McGehearty	or	%o4, %o3, %o3
989*280575beSPatrick McGehearty	ldub	[%o0-1], %o4
990*280575beSPatrick McGehearty	or	%o4, %o3, %o4
991*280575beSPatrick McGehearty	bgu,pt	%ncc, .bc_medb32	! repeat if at least 32 bytes left
992*280575beSPatrick McGehearty	stx	%o4, [%o1-8]
993*280575beSPatrick McGehearty
994*280575beSPatrick McGehearty.bc_medb31:				! 31 or fewer bytes remaining
995*280575beSPatrick McGehearty	addcc	%o2, 24, %o2		! adjust count to be off by 7
996*280575beSPatrick McGehearty	ble,pt	%ncc, .bc_medb7		! skip if 7 or fewer bytes left
997*280575beSPatrick McGehearty	nop				!
998*280575beSPatrick McGehearty.bc_medb15:
999*280575beSPatrick McGehearty
1000*280575beSPatrick McGehearty	ldub	[%o0], %o4		! load and store a block of 8 bytes
1001*280575beSPatrick McGehearty	subcc	%o2, 8, %o2		! decrement length count
1002*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
1003*280575beSPatrick McGehearty	lduh	[%o0+1], %o4
1004*280575beSPatrick McGehearty	sllx	%o4, 40, %o4
1005*280575beSPatrick McGehearty	or	%o4, %o3, %o3
1006*280575beSPatrick McGehearty	lduw	[%o0+3], %o4
1007*280575beSPatrick McGehearty	add	%o1, 8, %o1		! increase dst ptr by 16
1008*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
1009*280575beSPatrick McGehearty	or	%o4, %o3, %o3
1010*280575beSPatrick McGehearty	ldub	[%o0+7], %o4
1011*280575beSPatrick McGehearty	add	%o0, 8, %o0		! increase src ptr by 16
1012*280575beSPatrick McGehearty	or	%o4, %o3, %o4
1013*280575beSPatrick McGehearty	bgu,pt	%ncc, .bc_medb15
1014*280575beSPatrick McGehearty	stx	%o4, [%o1-8]
1015*280575beSPatrick McGehearty.bc_medb7:
1016*280575beSPatrick McGehearty	addcc	%o2, 7, %o2		! finish adjustment of remaining count
1017*280575beSPatrick McGehearty	bz,pt	%ncc, .bc_smallx	! exit if finished
1018*280575beSPatrick McGehearty	cmp	%o2, 4
1019*280575beSPatrick McGehearty	blt,pt	%ncc, .bc_small3x	! skip if less than 4 bytes left
1020*280575beSPatrick McGehearty	nop				!
1021*280575beSPatrick McGehearty	ldub	[%o0], %o4		! move 4 bytes
1022*280575beSPatrick McGehearty	sll	%o4, 24, %o3
1023*280575beSPatrick McGehearty	lduh	[%o0+1], %o4
1024*280575beSPatrick McGehearty	sll	%o4, 8, %o4
1025*280575beSPatrick McGehearty	or	%o4, %o3, %o3
1026*280575beSPatrick McGehearty	ldub	[%o0+3], %o4
1027*280575beSPatrick McGehearty	or	%o4, %o3, %o4
1028*280575beSPatrick McGehearty	subcc	%o2, 4, %o2
1029*280575beSPatrick McGehearty	add	%o0, 4, %o0
1030*280575beSPatrick McGehearty	add	%o1, 4, %o1
1031*280575beSPatrick McGehearty	bnz	.bc_small3x
1032*280575beSPatrick McGehearty	stw	%o4, [%o1-4]
1033*280575beSPatrick McGehearty	ba	.bc_smallx
1034*280575beSPatrick McGehearty	nop
1035*280575beSPatrick McGehearty
1036*280575beSPatrick McGehearty	.align 16
1037*280575beSPatrick McGehearty.bc_medbh32a:				! Alignment 3 or 7
1038*280575beSPatrick McGehearty	ble,pt	%ncc, .bc_medbh31
1039*280575beSPatrick McGehearty	nop
1040*280575beSPatrick McGehearty.bc_medbh32:				! Alignment 3 or 7
1041*280575beSPatrick McGehearty	subcc	%o2, 32, %o2		! decrement length count
1042*280575beSPatrick McGehearty
1043*280575beSPatrick McGehearty	ldub	[%o0], %o4		! load and store a block of 32 bytes
1044*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
1045*280575beSPatrick McGehearty	lduw	[%o0+1], %o4
1046*280575beSPatrick McGehearty	sllx	%o4, 24, %o4
1047*280575beSPatrick McGehearty	or	%o4, %o3, %o3
1048*280575beSPatrick McGehearty	lduh	[%o0+5], %o4
1049*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
1050*280575beSPatrick McGehearty	or	%o4, %o3, %o3
1051*280575beSPatrick McGehearty	ldub	[%o0+7], %o4
1052*280575beSPatrick McGehearty	or	%o4, %o3, %o4
1053*280575beSPatrick McGehearty	stx	%o4, [%o1]
1054*280575beSPatrick McGehearty
1055*280575beSPatrick McGehearty	ldub	[%o0+8], %o4
1056*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
1057*280575beSPatrick McGehearty	lduw	[%o0+9], %o4
1058*280575beSPatrick McGehearty	sllx	%o4, 24, %o4
1059*280575beSPatrick McGehearty	or	%o4, %o3, %o3
1060*280575beSPatrick McGehearty	lduh	[%o0+13], %o4
1061*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
1062*280575beSPatrick McGehearty	or	%o4, %o3, %o3
1063*280575beSPatrick McGehearty	ldub	[%o0+15], %o4
1064*280575beSPatrick McGehearty	or	%o4, %o3, %o4
1065*280575beSPatrick McGehearty	stx	%o4, [%o1+8]
1066*280575beSPatrick McGehearty
1067*280575beSPatrick McGehearty	ldub	[%o0+16], %o4
1068*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
1069*280575beSPatrick McGehearty	lduw	[%o0+17], %o4
1070*280575beSPatrick McGehearty	sllx	%o4, 24, %o4
1071*280575beSPatrick McGehearty	or	%o4, %o3, %o3
1072*280575beSPatrick McGehearty	lduh	[%o0+21], %o4
1073*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
1074*280575beSPatrick McGehearty	or	%o4, %o3, %o3
1075*280575beSPatrick McGehearty	ldub	[%o0+23], %o4
1076*280575beSPatrick McGehearty	or	%o4, %o3, %o4
1077*280575beSPatrick McGehearty	stx	%o4, [%o1+16]
1078*280575beSPatrick McGehearty
1079*280575beSPatrick McGehearty	add	%o0, 32, %o0		! increase src ptr by 32
1080*280575beSPatrick McGehearty	add	%o1, 32, %o1		! increase dst ptr by 32
1081*280575beSPatrick McGehearty
1082*280575beSPatrick McGehearty	ldub	[%o0-8], %o4
1083*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
1084*280575beSPatrick McGehearty	lduw	[%o0-7], %o4
1085*280575beSPatrick McGehearty	sllx	%o4, 24, %o4
1086*280575beSPatrick McGehearty	or	%o4, %o3, %o3
1087*280575beSPatrick McGehearty	lduh	[%o0-3], %o4
1088*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
1089*280575beSPatrick McGehearty	or	%o4, %o3, %o3
1090*280575beSPatrick McGehearty	ldub	[%o0-1], %o4
1091*280575beSPatrick McGehearty	or	%o4, %o3, %o4
1092*280575beSPatrick McGehearty	bgu,pt	%ncc, .bc_medbh32	! repeat if at least 32 bytes left
1093*280575beSPatrick McGehearty	stx	%o4, [%o1-8]
1094*280575beSPatrick McGehearty
1095*280575beSPatrick McGehearty.bc_medbh31:
1096*280575beSPatrick McGehearty	addcc	%o2, 24, %o2		! adjust count to be off by 7
1097*280575beSPatrick McGehearty	ble,pt	%ncc, .bc_medb7		! skip if 7 or fewer bytes left
1098*280575beSPatrick McGehearty	nop				!
1099*280575beSPatrick McGehearty.bc_medbh15:
1100*280575beSPatrick McGehearty	ldub	[%o0], %o4		! load and store a block of 8 bytes
1101*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
1102*280575beSPatrick McGehearty	lduw	[%o0+1], %o4
1103*280575beSPatrick McGehearty	sllx	%o4, 24, %o4
1104*280575beSPatrick McGehearty	or	%o4, %o3, %o3
1105*280575beSPatrick McGehearty	lduh	[%o0+5], %o4
1106*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
1107*280575beSPatrick McGehearty	or	%o4, %o3, %o3
1108*280575beSPatrick McGehearty	ldub	[%o0+7], %o4
1109*280575beSPatrick McGehearty	or	%o4, %o3, %o4
1110*280575beSPatrick McGehearty	stx	%o4, [%o1]
1111*280575beSPatrick McGehearty	subcc	%o2, 8, %o2		! decrement length count
1112*280575beSPatrick McGehearty	add	%o1, 8, %o1		! increase dst ptr by 8
1113*280575beSPatrick McGehearty	add	%o0, 8, %o0		! increase src ptr by 8
1114*280575beSPatrick McGehearty	bgu,pt	%ncc, .bc_medbh15
1115*280575beSPatrick McGehearty	stx	%o4, [%o1-8]
1116*280575beSPatrick McGehearty	ba	.bc_medb7
1117*280575beSPatrick McGehearty	nop
1118*280575beSPatrick McGehearty
1119*280575beSPatrick McGehearty	SET_SIZE(bcopy)
1120*280575beSPatrick McGehearty/*
1121*280575beSPatrick McGehearty * The _more entry points are not intended to be used directly by
1122*280575beSPatrick McGehearty * any caller from outside this file.  They are provided to allow
1123*280575beSPatrick McGehearty * profiling and dtrace of the portions of the copy code that uses
1124*280575beSPatrick McGehearty * the floating point registers.
1125*280575beSPatrick McGehearty*/
1126*280575beSPatrick McGehearty	ENTRY(bcopy_more)
1127*280575beSPatrick McGehearty.bcopy_more:
1128340af271Swh94709	save	%sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
1129340af271Swh94709	ldn	[THREAD_REG + T_LOFAULT], %o5	! save existing handler
1130340af271Swh94709	brz,pt	%o5, .do_copy
1131340af271Swh94709	nop
1132340af271Swh94709	sethi	%hi(.copyerr), %l7		! copyerr is lofault value
1133340af271Swh94709	or	%l7, %lo(.copyerr), %l7
1134340af271Swh94709	membar	#Sync				! sync error barrier
1135340af271Swh94709	stn	%l7, [THREAD_REG + T_LOFAULT]	! set t_lofault
1136340af271Swh94709	! We've already captured whether t_lofault was zero on entry.
1137340af271Swh94709	! We need to mark ourselves as being from bcopy since both
1138*280575beSPatrick McGehearty	! kcopy and bcopy use the same code path. If LOFAULT_SET is
1139340af271Swh94709	! set and the saved lofault was zero, we won't reset lofault on
1140340af271Swh94709	! returning.
1141*280575beSPatrick McGehearty	or	%o5, LOFAULT_SET, %o5
1142*280575beSPatrick McGehearty.do_copy:
1143*280575beSPatrick McGehearty	ldn	[THREAD_REG + T_LWP], %o3
1144*280575beSPatrick McGehearty	brnz,pt	%o3, 1f
1145*280575beSPatrick McGehearty	nop
1146*280575beSPatrick McGehearty/*
1147*280575beSPatrick McGehearty * kpreempt_disable();
1148*280575beSPatrick McGehearty */
1149*280575beSPatrick McGehearty	ldsb	[THREAD_REG +T_PREEMPT], %o3
1150*280575beSPatrick McGehearty	inc	%o3
1151*280575beSPatrick McGehearty	stb	%o3, [THREAD_REG + T_PREEMPT]
1152*280575beSPatrick McGehearty1:
1153*280575beSPatrick McGehearty/*
1154*280575beSPatrick McGehearty * Following code is for large copies. We know there is at
1155*280575beSPatrick McGehearty * least FP_COPY bytes available. FP regs are used, so
1156*280575beSPatrick McGehearty *  we save registers and fp regs before starting
1157*280575beSPatrick McGehearty */
1158*280575beSPatrick McGehearty	rd	%fprs, %g5		! check for unused fp
1159*280575beSPatrick McGehearty	or	%o5,FPUSED_FLAG,%o5
1160*280575beSPatrick McGehearty	! if fprs.fef == 0, set it.
1161*280575beSPatrick McGehearty	! Setting it when already set costs more than checking
1162*280575beSPatrick McGehearty	andcc	%g5, FPRS_FEF, %g5	! test FEF, fprs.du = fprs.dl = 0
1163*280575beSPatrick McGehearty	bz,pt	%ncc, .bc_fp_unused
1164*280575beSPatrick McGehearty	prefetch [%i0 + (1 * CACHE_LINE)], #one_read
1165*280575beSPatrick McGehearty	BST_FP_TOSTACK(%o3)
1166*280575beSPatrick McGehearty	ba	.bc_fp_ready
1167*280575beSPatrick McGehearty.bc_fp_unused:
1168*280575beSPatrick McGehearty	andcc	%i1, 1, %o3		! is dest byte aligned
1169*280575beSPatrick McGehearty	wr	%g0, FPRS_FEF, %fprs	! fprs.fef = 1
1170*280575beSPatrick McGehearty.bc_fp_ready:
1171*280575beSPatrick McGehearty	rd	%gsr, %l5		! save %gsr value
1172*280575beSPatrick McGehearty	bnz,pt	%ncc, .bc_big_d1
1173*280575beSPatrick McGehearty.bc_big_d1f:				! dest is now half word aligned
1174*280575beSPatrick McGehearty	andcc	%i1, 2, %o3
1175*280575beSPatrick McGehearty	bnz,pt	%ncc, .bc_big_d2
1176*280575beSPatrick McGehearty.bc_big_d2f:				! dest is now word aligned
1177*280575beSPatrick McGehearty	andcc	%i1, 4, %o3
1178*280575beSPatrick McGehearty	bnz,pt	%ncc, .bc_big_d4
1179*280575beSPatrick McGehearty.bc_big_d4f:				! dest is now long word aligned
1180*280575beSPatrick McGehearty	andcc	%i0, 7, %o3		! is src long word aligned
1181*280575beSPatrick McGehearty	brnz,pt	%o3, .bc_big_unal8
1182*280575beSPatrick McGehearty	prefetch [%i0 + (2 * CACHE_LINE)], #one_read
1183*280575beSPatrick McGehearty
1184*280575beSPatrick McGehearty	! Src and dst are long word aligned
1185*280575beSPatrick McGehearty	! align dst to 64 byte boundary
1186*280575beSPatrick McGehearty	andcc	%i1, 0x3f, %o3		! %o3 == 0 means dst is 64 byte aligned
1187*280575beSPatrick McGehearty	brz,pn	%o3, .bc_al_to_64
1188*280575beSPatrick McGehearty	nop
1189*280575beSPatrick McGehearty	sub	%o3, 64, %o3		! %o3 has negative bytes to move
1190*280575beSPatrick McGehearty	add	%i2, %o3, %i2		! adjust remaining count
1191*280575beSPatrick McGehearty	andcc	%o3, 8, %o4		! odd long words to move?
1192*280575beSPatrick McGehearty	brz,pt	%o4, .bc_al_to_16
1193*280575beSPatrick McGehearty	nop
1194*280575beSPatrick McGehearty	add	%o3, 8, %o3
1195*280575beSPatrick McGehearty	ldx	[%i0], %o4
1196*280575beSPatrick McGehearty	add	%i0, 8, %i0		! increment src ptr
1197*280575beSPatrick McGehearty	add	%i1, 8, %i1		! increment dst ptr
1198*280575beSPatrick McGehearty	stx	%o4, [%i1-8]
1199*280575beSPatrick McGehearty! Dest is aligned on 16 bytes, src 8 byte aligned
1200*280575beSPatrick McGehearty.bc_al_to_16:
1201*280575beSPatrick McGehearty	andcc	%o3, 0x30, %o4		! pair of long words to move?
1202*280575beSPatrick McGehearty	brz,pt	%o4, .bc_al_to_64
1203*280575beSPatrick McGehearty	nop
1204*280575beSPatrick McGehearty.bc_al_mv_16:
1205*280575beSPatrick McGehearty	add	%o3, 16, %o3
1206*280575beSPatrick McGehearty	ldx	[%i0], %o4
1207*280575beSPatrick McGehearty	stx	%o4, [%i1]
1208*280575beSPatrick McGehearty	ldx	[%i0+8], %o4
1209*280575beSPatrick McGehearty	add	%i0, 16, %i0		! increment src ptr
1210*280575beSPatrick McGehearty	stx	%o4, [%i1+8]
1211*280575beSPatrick McGehearty	andcc	%o3, 48, %o4
1212*280575beSPatrick McGehearty	brnz,pt	%o4, .bc_al_mv_16
1213*280575beSPatrick McGehearty	add	%i1, 16, %i1		! increment dst ptr
1214*280575beSPatrick McGehearty! Dest is aligned on 64 bytes, src 8 byte aligned
1215*280575beSPatrick McGehearty.bc_al_to_64:
1216*280575beSPatrick McGehearty	! Determine source alignment
1217*280575beSPatrick McGehearty	! to correct 8 byte offset
1218*280575beSPatrick McGehearty	andcc	%i0, 32, %o3
1219*280575beSPatrick McGehearty	brnz,pn	%o3, .bc_aln_1
1220*280575beSPatrick McGehearty	andcc	%i0, 16, %o3
1221*280575beSPatrick McGehearty	brnz,pn	%o3, .bc_aln_01
1222*280575beSPatrick McGehearty	andcc	%i0, 8, %o3
1223*280575beSPatrick McGehearty	brz,pn	%o3, .bc_aln_000
1224*280575beSPatrick McGehearty	prefetch [%i0 + (3 * CACHE_LINE)], #one_read
1225*280575beSPatrick McGehearty	ba	.bc_aln_001
1226*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
1227*280575beSPatrick McGehearty
1228*280575beSPatrick McGehearty.bc_aln_01:
1229*280575beSPatrick McGehearty	brnz,pn	%o3, .bc_aln_011
1230*280575beSPatrick McGehearty	prefetch [%i0 + (3 * CACHE_LINE)], #one_read
1231*280575beSPatrick McGehearty	ba	.bc_aln_010
1232*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
1233*280575beSPatrick McGehearty.bc_aln_1:
1234*280575beSPatrick McGehearty	andcc	%i0, 16, %o3
1235*280575beSPatrick McGehearty	brnz,pn	%o3, .bc_aln_11
1236*280575beSPatrick McGehearty	andcc	%i0, 8, %o3
1237*280575beSPatrick McGehearty	brnz,pn	%o3, .bc_aln_101
1238*280575beSPatrick McGehearty	prefetch [%i0 + (3 * CACHE_LINE)], #one_read
1239*280575beSPatrick McGehearty	ba	.bc_aln_100
1240*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
1241*280575beSPatrick McGehearty.bc_aln_11:
1242*280575beSPatrick McGehearty	brz,pn	%o3, .bc_aln_110
1243*280575beSPatrick McGehearty	prefetch [%i0 + (3 * CACHE_LINE)], #one_read
1244*280575beSPatrick McGehearty
1245*280575beSPatrick McGehearty.bc_aln_111:
1246*280575beSPatrick McGehearty! Alignment off by 8 bytes
1247*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
1248*280575beSPatrick McGehearty	ldd	[%i0], %d0
1249*280575beSPatrick McGehearty	add	%i0, 8, %i0
1250*280575beSPatrick McGehearty	sub	%i2, 8, %i2
1251*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
1252*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
1253*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
1254*280575beSPatrick McGehearty.bc_aln_111_loop:
1255*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_P,%d16		! block load
1256*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
1257*280575beSPatrick McGehearty	fmovd	%d16, %d2
1258*280575beSPatrick McGehearty	fmovd	%d18, %d4
1259*280575beSPatrick McGehearty	fmovd	%d20, %d6
1260*280575beSPatrick McGehearty	fmovd	%d22, %d8
1261*280575beSPatrick McGehearty	fmovd	%d24, %d10
1262*280575beSPatrick McGehearty	fmovd	%d26, %d12
1263*280575beSPatrick McGehearty	fmovd	%d28, %d14
1264*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_P	! block initializing store
1265*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_P
1266*280575beSPatrick McGehearty	add	%i0, 64, %i0
1267*280575beSPatrick McGehearty	fmovd	%d30, %d0
1268*280575beSPatrick McGehearty	bgt,pt	%ncc, .bc_aln_111_loop
1269*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
1270*280575beSPatrick McGehearty	add	%i1, %i0, %i1
1271*280575beSPatrick McGehearty
1272*280575beSPatrick McGehearty	std	%d0, [%i1]
1273*280575beSPatrick McGehearty	ba	.bc_remain_stuff
1274*280575beSPatrick McGehearty	add	%i1, 8, %i1
1275*280575beSPatrick McGehearty	! END OF aln_111
1276*280575beSPatrick McGehearty
1277*280575beSPatrick McGehearty.bc_aln_110:
1278*280575beSPatrick McGehearty! Alignment off by 16 bytes
1279*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
1280*280575beSPatrick McGehearty	ldd	[%i0], %d0
1281*280575beSPatrick McGehearty	ldd	[%i0+8], %d2
1282*280575beSPatrick McGehearty	add	%i0, 16, %i0
1283*280575beSPatrick McGehearty	sub	%i2, 16, %i2
1284*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
1285*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
1286*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
1287*280575beSPatrick McGehearty.bc_aln_110_loop:
1288*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_P,%d16		! block load
1289*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
1290*280575beSPatrick McGehearty	fmovd	%d16, %d4
1291*280575beSPatrick McGehearty	fmovd	%d18, %d6
1292*280575beSPatrick McGehearty	fmovd	%d20, %d8
1293*280575beSPatrick McGehearty	fmovd	%d22, %d10
1294*280575beSPatrick McGehearty	fmovd	%d24, %d12
1295*280575beSPatrick McGehearty	fmovd	%d26, %d14
1296*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_P	! block initializing store
1297*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_P
1298*280575beSPatrick McGehearty	add	%i0, 64, %i0
1299*280575beSPatrick McGehearty	fmovd	%d28, %d0
1300*280575beSPatrick McGehearty	fmovd	%d30, %d2
1301*280575beSPatrick McGehearty	bgt,pt	%ncc, .bc_aln_110_loop
1302*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
1303*280575beSPatrick McGehearty	add	%i1, %i0, %i1
1304*280575beSPatrick McGehearty
1305*280575beSPatrick McGehearty	std	%d0, [%i1]
1306*280575beSPatrick McGehearty	std	%d2, [%i1+8]
1307*280575beSPatrick McGehearty	ba	.bc_remain_stuff
1308*280575beSPatrick McGehearty	add	%i1, 16, %i1
1309*280575beSPatrick McGehearty	! END OF aln_110
1310*280575beSPatrick McGehearty
1311*280575beSPatrick McGehearty.bc_aln_101:
1312*280575beSPatrick McGehearty! Alignment off by 24 bytes
1313*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
1314*280575beSPatrick McGehearty	ldd	[%i0], %d0
1315*280575beSPatrick McGehearty	ldd	[%i0+8], %d2
1316*280575beSPatrick McGehearty	ldd	[%i0+16], %d4
1317*280575beSPatrick McGehearty	add	%i0, 24, %i0
1318*280575beSPatrick McGehearty	sub	%i2, 24, %i2
1319*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
1320*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
1321*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
1322*280575beSPatrick McGehearty.bc_aln_101_loop:
1323*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_P,%d16	! block load
1324*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
1325*280575beSPatrick McGehearty	fmovd	%d16, %d6
1326*280575beSPatrick McGehearty	fmovd	%d18, %d8
1327*280575beSPatrick McGehearty	fmovd	%d20, %d10
1328*280575beSPatrick McGehearty	fmovd	%d22, %d12
1329*280575beSPatrick McGehearty	fmovd	%d24, %d14
1330*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_P	! block initializing store
1331*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_P
1332*280575beSPatrick McGehearty	add	%i0, 64, %i0
1333*280575beSPatrick McGehearty	fmovd	%d26, %d0
1334*280575beSPatrick McGehearty	fmovd	%d28, %d2
1335*280575beSPatrick McGehearty	fmovd	%d30, %d4
1336*280575beSPatrick McGehearty	bgt,pt	%ncc, .bc_aln_101_loop
1337*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
1338*280575beSPatrick McGehearty	add	%i1, %i0, %i1
1339*280575beSPatrick McGehearty
1340*280575beSPatrick McGehearty	std	%d0, [%i1]
1341*280575beSPatrick McGehearty	std	%d2, [%i1+8]
1342*280575beSPatrick McGehearty	std	%d4, [%i1+16]
1343*280575beSPatrick McGehearty	ba	.bc_remain_stuff
1344*280575beSPatrick McGehearty	add	%i1, 24, %i1
1345*280575beSPatrick McGehearty	! END OF aln_101
1346*280575beSPatrick McGehearty
1347*280575beSPatrick McGehearty.bc_aln_100:
1348*280575beSPatrick McGehearty! Alignment off by 32 bytes
1349*280575beSPatrick McGehearty	ldd	[%i0], %d0
1350*280575beSPatrick McGehearty	ldd	[%i0+8], %d2
1351*280575beSPatrick McGehearty	ldd	[%i0+16],%d4
1352*280575beSPatrick McGehearty	ldd	[%i0+24],%d6
1353*280575beSPatrick McGehearty	add	%i0, 32, %i0
1354*280575beSPatrick McGehearty	sub	%i2, 32, %i2
1355*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
1356*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
1357*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
1358*280575beSPatrick McGehearty.bc_aln_100_loop:
1359*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_P,%d16	! block load
1360*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
1361*280575beSPatrick McGehearty	fmovd	%d16, %d8
1362*280575beSPatrick McGehearty	fmovd	%d18, %d10
1363*280575beSPatrick McGehearty	fmovd	%d20, %d12
1364*280575beSPatrick McGehearty	fmovd	%d22, %d14
1365*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_P	! block initializing store
1366*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_P
1367*280575beSPatrick McGehearty	add	%i0, 64, %i0
1368*280575beSPatrick McGehearty	fmovd	%d24, %d0
1369*280575beSPatrick McGehearty	fmovd	%d26, %d2
1370*280575beSPatrick McGehearty	fmovd	%d28, %d4
1371*280575beSPatrick McGehearty	fmovd	%d30, %d6
1372*280575beSPatrick McGehearty	bgt,pt	%ncc, .bc_aln_100_loop
1373*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
1374*280575beSPatrick McGehearty	add	%i1, %i0, %i1
1375*280575beSPatrick McGehearty
1376*280575beSPatrick McGehearty	std	%d0, [%i1]
1377*280575beSPatrick McGehearty	std	%d2, [%i1+8]
1378*280575beSPatrick McGehearty	std	%d4, [%i1+16]
1379*280575beSPatrick McGehearty	std	%d6, [%i1+24]
1380*280575beSPatrick McGehearty	ba	.bc_remain_stuff
1381*280575beSPatrick McGehearty	add	%i1, 32, %i1
1382*280575beSPatrick McGehearty	! END OF aln_100
1383*280575beSPatrick McGehearty
1384*280575beSPatrick McGehearty.bc_aln_011:
1385*280575beSPatrick McGehearty! Alignment off by 40 bytes
1386*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
1387*280575beSPatrick McGehearty	ldd	[%i0], %d0
1388*280575beSPatrick McGehearty	ldd	[%i0+8], %d2
1389*280575beSPatrick McGehearty	ldd	[%i0+16], %d4
1390*280575beSPatrick McGehearty	ldd	[%i0+24], %d6
1391*280575beSPatrick McGehearty	ldd	[%i0+32], %d8
1392*280575beSPatrick McGehearty	add	%i0, 40, %i0
1393*280575beSPatrick McGehearty	sub	%i2, 40, %i2
1394*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
1395*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
1396*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
1397*280575beSPatrick McGehearty.bc_aln_011_loop:
1398*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_P,%d16	! block load
1399*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
1400*280575beSPatrick McGehearty	fmovd	%d16, %d10
1401*280575beSPatrick McGehearty	fmovd	%d18, %d12
1402*280575beSPatrick McGehearty	fmovd	%d20, %d14
1403*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_P	! block initializing store
1404*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_P
1405*280575beSPatrick McGehearty	add	%i0, 64, %i0
1406*280575beSPatrick McGehearty	fmovd	%d22, %d0
1407*280575beSPatrick McGehearty	fmovd	%d24, %d2
1408*280575beSPatrick McGehearty	fmovd	%d26, %d4
1409*280575beSPatrick McGehearty	fmovd	%d28, %d6
1410*280575beSPatrick McGehearty	fmovd	%d30, %d8
1411*280575beSPatrick McGehearty	bgt,pt	%ncc, .bc_aln_011_loop
1412*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
1413*280575beSPatrick McGehearty	add	%i1, %i0, %i1
1414*280575beSPatrick McGehearty
1415*280575beSPatrick McGehearty	std	%d0, [%i1]
1416*280575beSPatrick McGehearty	std	%d2, [%i1+8]
1417*280575beSPatrick McGehearty	std	%d4, [%i1+16]
1418*280575beSPatrick McGehearty	std	%d6, [%i1+24]
1419*280575beSPatrick McGehearty	std	%d8, [%i1+32]
1420*280575beSPatrick McGehearty	ba	.bc_remain_stuff
1421*280575beSPatrick McGehearty	add	%i1, 40, %i1
1422*280575beSPatrick McGehearty	! END OF aln_011
1423*280575beSPatrick McGehearty
1424*280575beSPatrick McGehearty.bc_aln_010:
1425*280575beSPatrick McGehearty! Alignment off by 48 bytes
1426*280575beSPatrick McGehearty	ldd	[%i0], %d0
1427*280575beSPatrick McGehearty	ldd	[%i0+8], %d2
1428*280575beSPatrick McGehearty	ldd	[%i0+16], %d4
1429*280575beSPatrick McGehearty	ldd	[%i0+24], %d6
1430*280575beSPatrick McGehearty	ldd	[%i0+32], %d8
1431*280575beSPatrick McGehearty	ldd	[%i0+40], %d10
1432*280575beSPatrick McGehearty	add	%i0, 48, %i0
1433*280575beSPatrick McGehearty	sub	%i2, 48, %i2
1434*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
1435*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
1436*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
1437*280575beSPatrick McGehearty.bc_aln_010_loop:
1438*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_P,%d16	! block load
1439*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
1440*280575beSPatrick McGehearty	fmovd	%d16, %d12
1441*280575beSPatrick McGehearty	fmovd	%d18, %d14
1442*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_P	! block initializing store
1443*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_P
1444*280575beSPatrick McGehearty	add	%i0, 64, %i0
1445*280575beSPatrick McGehearty	fmovd	%d20, %d0
1446*280575beSPatrick McGehearty	fmovd	%d22, %d2
1447*280575beSPatrick McGehearty	fmovd	%d24, %d4
1448*280575beSPatrick McGehearty	fmovd	%d26, %d6
1449*280575beSPatrick McGehearty	fmovd	%d28, %d8
1450*280575beSPatrick McGehearty	fmovd	%d30, %d10
1451*280575beSPatrick McGehearty	bgt,pt	%ncc, .bc_aln_010_loop
1452*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
1453*280575beSPatrick McGehearty	add	%i1, %i0, %i1
1454*280575beSPatrick McGehearty
1455*280575beSPatrick McGehearty	std	%d0, [%i1]
1456*280575beSPatrick McGehearty	std	%d2, [%i1+8]
1457*280575beSPatrick McGehearty	std	%d4, [%i1+16]
1458*280575beSPatrick McGehearty	std	%d6, [%i1+24]
1459*280575beSPatrick McGehearty	std	%d8, [%i1+32]
1460*280575beSPatrick McGehearty	std	%d10, [%i1+40]
1461*280575beSPatrick McGehearty	ba	.bc_remain_stuff
1462*280575beSPatrick McGehearty	add	%i1, 48, %i1
1463*280575beSPatrick McGehearty	! END OF aln_010
1464*280575beSPatrick McGehearty
1465*280575beSPatrick McGehearty.bc_aln_001:
1466*280575beSPatrick McGehearty! Alignment off by 56 bytes
1467*280575beSPatrick McGehearty	ldd	[%i0], %d0
1468*280575beSPatrick McGehearty	ldd	[%i0+8], %d2
1469*280575beSPatrick McGehearty	ldd	[%i0+16], %d4
1470*280575beSPatrick McGehearty	ldd	[%i0+24], %d6
1471*280575beSPatrick McGehearty	ldd	[%i0+32], %d8
1472*280575beSPatrick McGehearty	ldd	[%i0+40], %d10
1473*280575beSPatrick McGehearty	ldd	[%i0+48], %d12
1474*280575beSPatrick McGehearty	add	%i0, 56, %i0
1475*280575beSPatrick McGehearty	sub	%i2, 56, %i2
1476*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
1477*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
1478*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
1479*280575beSPatrick McGehearty.bc_aln_001_loop:
1480*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_P,%d16	! block load
1481*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
1482*280575beSPatrick McGehearty	fmovd	%d16, %d14
1483*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_P	! block initializing store
1484*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_P
1485*280575beSPatrick McGehearty	add	%i0, 64, %i0
1486*280575beSPatrick McGehearty	fmovd	%d18, %d0
1487*280575beSPatrick McGehearty	fmovd	%d20, %d2
1488*280575beSPatrick McGehearty	fmovd	%d22, %d4
1489*280575beSPatrick McGehearty	fmovd	%d24, %d6
1490*280575beSPatrick McGehearty	fmovd	%d26, %d8
1491*280575beSPatrick McGehearty	fmovd	%d28, %d10
1492*280575beSPatrick McGehearty	fmovd	%d30, %d12
1493*280575beSPatrick McGehearty	bgt,pt	%ncc, .bc_aln_001_loop
1494*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
1495*280575beSPatrick McGehearty	add	%i1, %i0, %i1
1496*280575beSPatrick McGehearty
1497*280575beSPatrick McGehearty	std	%d0, [%i1]
1498*280575beSPatrick McGehearty	std	%d2, [%i1+8]
1499*280575beSPatrick McGehearty	std	%d4, [%i1+16]
1500*280575beSPatrick McGehearty	std	%d6, [%i1+24]
1501*280575beSPatrick McGehearty	std	%d8, [%i1+32]
1502*280575beSPatrick McGehearty	std	%d10, [%i1+40]
1503*280575beSPatrick McGehearty	std	%d12, [%i1+48]
1504*280575beSPatrick McGehearty	ba	.bc_remain_stuff
1505*280575beSPatrick McGehearty	add	%i1, 56, %i1
1506*280575beSPatrick McGehearty	! END OF aln_001
1507*280575beSPatrick McGehearty
1508*280575beSPatrick McGehearty.bc_aln_000:
1509*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
1510*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
1511*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
1512*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
1513*280575beSPatrick McGehearty.bc_aln_000_loop:
1514*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_P,%d0
1515*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
1516*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_P	! block initializing store
1517*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_P
1518*280575beSPatrick McGehearty	add	%i0, 64, %i0
1519*280575beSPatrick McGehearty	bgt,pt	%ncc, .bc_aln_000_loop
1520*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
1521*280575beSPatrick McGehearty	add	%i1, %i0, %i1
1522*280575beSPatrick McGehearty
1523*280575beSPatrick McGehearty	! END OF aln_000
1524*280575beSPatrick McGehearty
1525*280575beSPatrick McGehearty.bc_remain_stuff:
1526*280575beSPatrick McGehearty	subcc	%i2, 31, %i2		! adjust length to allow cc test
1527*280575beSPatrick McGehearty	ble,pt	%ncc, .bc_aln_31
1528*280575beSPatrick McGehearty	nop
1529*280575beSPatrick McGehearty.bc_aln_32:
1530*280575beSPatrick McGehearty	ldx	[%i0], %o4		! move 32 bytes
1531*280575beSPatrick McGehearty	subcc	%i2, 32, %i2		! decrement length count by 32
1532*280575beSPatrick McGehearty	stx	%o4, [%i1]
1533*280575beSPatrick McGehearty	ldx	[%i0+8], %o4
1534*280575beSPatrick McGehearty	stx	%o4, [%i1+8]
1535*280575beSPatrick McGehearty	ldx	[%i0+16], %o4
1536*280575beSPatrick McGehearty	add	%i0, 32, %i0		! increase src ptr by 32
1537*280575beSPatrick McGehearty	stx	%o4, [%i1+16]
1538*280575beSPatrick McGehearty	ldx	[%i0-8], %o4
1539*280575beSPatrick McGehearty	add	%i1, 32, %i1		! increase dst ptr by 32
1540*280575beSPatrick McGehearty	bgu,pt	%ncc, .bc_aln_32	! repeat if at least 32 bytes left
1541*280575beSPatrick McGehearty	stx	%o4, [%i1-8]
1542*280575beSPatrick McGehearty.bc_aln_31:
1543*280575beSPatrick McGehearty	addcc	%i2, 24, %i2		! adjust count to be off by 7
1544*280575beSPatrick McGehearty	ble,pt	%ncc, .bc_aln_7		! skip if 7 or fewer bytes left
1545*280575beSPatrick McGehearty	nop				!
1546*280575beSPatrick McGehearty.bc_aln_15:
1547*280575beSPatrick McGehearty	ldx	[%i0], %o4		! move 8 bytes
1548*280575beSPatrick McGehearty	add	%i0, 8, %i0		! increase src ptr by 8
1549*280575beSPatrick McGehearty	subcc	%i2, 8, %i2		! decrease count by 8
1550*280575beSPatrick McGehearty	add	%i1, 8, %i1		! increase dst ptr by 8
1551*280575beSPatrick McGehearty	bgu,pt	%ncc, .bc_aln_15
1552*280575beSPatrick McGehearty	stx	%o4, [%i1-8]		!
1553*280575beSPatrick McGehearty.bc_aln_7:
1554*280575beSPatrick McGehearty	addcc	%i2, 7, %i2		! finish adjustment of remaining count
1555*280575beSPatrick McGehearty	bz,pt	%ncc, .bc_exit		! exit if finished
1556*280575beSPatrick McGehearty	cmp	%i2, 4
1557*280575beSPatrick McGehearty	blt,pt	%ncc, .bc_unaln3x	! skip if less than 4 bytes left
1558*280575beSPatrick McGehearty	nop				!
1559*280575beSPatrick McGehearty	ld	[%i0], %o4		! move 4 bytes
1560*280575beSPatrick McGehearty	add	%i0, 4, %i0		! increase src ptr by 4
1561*280575beSPatrick McGehearty	add	%i1, 4, %i1		! increase dst ptr by 4
1562*280575beSPatrick McGehearty	subcc	%i2, 4, %i2		! decrease count by 4
1563*280575beSPatrick McGehearty	bnz	.bc_unaln3x
1564*280575beSPatrick McGehearty	stw	%o4, [%i1-4]
1565*280575beSPatrick McGehearty	ba	.bc_exit
1566*280575beSPatrick McGehearty	nop
1567*280575beSPatrick McGehearty
1568*280575beSPatrick McGehearty	! destination alignment code
1569*280575beSPatrick McGehearty.bc_big_d1:
1570*280575beSPatrick McGehearty	ldub	[%i0], %o4		! move a byte
1571*280575beSPatrick McGehearty	add	%i0, 1, %i0
1572*280575beSPatrick McGehearty	stb	%o4, [%i1]
1573*280575beSPatrick McGehearty	add	%i1, 1, %i1
1574*280575beSPatrick McGehearty	andcc	%i1, 2, %o3
1575*280575beSPatrick McGehearty	bz,pt	%ncc, .bc_big_d2f
1576*280575beSPatrick McGehearty	sub	%i2, 1, %i2
1577*280575beSPatrick McGehearty.bc_big_d2:
1578*280575beSPatrick McGehearty	ldub	[%i0], %o4		! move a half-word (src align unknown)
1579*280575beSPatrick McGehearty	ldub	[%i0+1], %o3
1580*280575beSPatrick McGehearty	add	%i0, 2, %i0
1581*280575beSPatrick McGehearty	sll	%o4, 8, %o4		! position
1582*280575beSPatrick McGehearty	or	%o4, %o3, %o4		! merge
1583*280575beSPatrick McGehearty	sth	%o4, [%i1]
1584*280575beSPatrick McGehearty	add	%i1, 2, %i1
1585*280575beSPatrick McGehearty	andcc	%i1, 4, %o3
1586*280575beSPatrick McGehearty	bz,pt	%ncc, .bc_big_d4f
1587*280575beSPatrick McGehearty	sub	%i2, 2, %i2
1588*280575beSPatrick McGehearty.bc_big_d4:
1589*280575beSPatrick McGehearty	ldub	[%i0], %o4		! move a word (src align unknown)
1590*280575beSPatrick McGehearty	ldub	[%i0+1], %o3
1591*280575beSPatrick McGehearty	sll	%o4, 24, %o4		! position
1592*280575beSPatrick McGehearty	sll	%o3, 16, %o3		! position
1593*280575beSPatrick McGehearty	or	%o4, %o3, %o3		! merge
1594*280575beSPatrick McGehearty	ldub	[%i0+2], %o4
1595*280575beSPatrick McGehearty	sll	%o4, 8, %o4		! position
1596*280575beSPatrick McGehearty	or	%o4, %o3, %o3		! merge
1597*280575beSPatrick McGehearty	ldub	[%i0+3], %o4
1598*280575beSPatrick McGehearty	or	%o4, %o3, %o4		! merge
1599*280575beSPatrick McGehearty	stw	%o4,[%i1]		! store four bytes
1600*280575beSPatrick McGehearty	add	%i0, 4, %i0		! adjust src by 4
1601*280575beSPatrick McGehearty	add	%i1, 4, %i1		! adjust dest by 4
1602*280575beSPatrick McGehearty	ba	.bc_big_d4f
1603*280575beSPatrick McGehearty	sub	%i2, 4, %i2		! adjust count by 4
1604*280575beSPatrick McGehearty
1605*280575beSPatrick McGehearty
1606*280575beSPatrick McGehearty	! Dst is on 8 byte boundary; src is not;
1607*280575beSPatrick McGehearty.bc_big_unal8:
1608*280575beSPatrick McGehearty	andcc	%i1, 0x3f, %o3		! is dst 64-byte block aligned?
1609*280575beSPatrick McGehearty	bz	%ncc, .bc_unalnsrc
1610*280575beSPatrick McGehearty	sub	%o3, 64, %o3		! %o3 will be multiple of 8
1611*280575beSPatrick McGehearty	neg	%o3			! bytes until dest is 64 byte aligned
1612*280575beSPatrick McGehearty	sub	%i2, %o3, %i2		! update cnt with bytes to be moved
1613*280575beSPatrick McGehearty	! Move bytes according to source alignment
1614*280575beSPatrick McGehearty	andcc	%i0, 0x1, %o4
1615*280575beSPatrick McGehearty	bnz	%ncc, .bc_unalnbyte	! check for byte alignment
1616*280575beSPatrick McGehearty	nop
1617*280575beSPatrick McGehearty	andcc	%i0, 2, %o4		! check for half word alignment
1618*280575beSPatrick McGehearty	bnz	%ncc, .bc_unalnhalf
1619*280575beSPatrick McGehearty	nop
1620*280575beSPatrick McGehearty	! Src is word aligned, move bytes until dest 64 byte aligned
1621*280575beSPatrick McGehearty.bc_unalnword:
1622*280575beSPatrick McGehearty	ld	[%i0], %o4		! load 4 bytes
1623*280575beSPatrick McGehearty	stw	%o4, [%i1]		! and store 4 bytes
1624*280575beSPatrick McGehearty	ld	[%i0+4], %o4		! load 4 bytes
1625*280575beSPatrick McGehearty	add	%i0, 8, %i0		! increase src ptr by 8
1626*280575beSPatrick McGehearty	stw	%o4, [%i1+4]		! and store 4 bytes
1627*280575beSPatrick McGehearty	subcc	%o3, 8, %o3		! decrease count by 8
1628*280575beSPatrick McGehearty	bnz	%ncc, .bc_unalnword
1629*280575beSPatrick McGehearty	add	%i1, 8, %i1		! increase dst ptr by 8
1630*280575beSPatrick McGehearty	ba	.bc_unalnsrc
1631*280575beSPatrick McGehearty	nop
1632*280575beSPatrick McGehearty
1633*280575beSPatrick McGehearty	! Src is half-word aligned, move bytes until dest 64 byte aligned
1634*280575beSPatrick McGehearty.bc_unalnhalf:
1635*280575beSPatrick McGehearty	lduh	[%i0], %o4		! load 2 bytes
1636*280575beSPatrick McGehearty	sllx	%o4, 32, %i3		! shift left
1637*280575beSPatrick McGehearty	lduw	[%i0+2], %o4
1638*280575beSPatrick McGehearty	or	%o4, %i3, %i3
1639*280575beSPatrick McGehearty	sllx	%i3, 16, %i3
1640*280575beSPatrick McGehearty	lduh	[%i0+6], %o4
1641*280575beSPatrick McGehearty	or	%o4, %i3, %i3
1642*280575beSPatrick McGehearty	stx	%i3, [%i1]
1643*280575beSPatrick McGehearty	add	%i0, 8, %i0
1644*280575beSPatrick McGehearty	subcc	%o3, 8, %o3
1645*280575beSPatrick McGehearty	bnz	%ncc, .bc_unalnhalf
1646*280575beSPatrick McGehearty	add	%i1, 8, %i1
1647*280575beSPatrick McGehearty	ba	.bc_unalnsrc
1648*280575beSPatrick McGehearty	nop
1649*280575beSPatrick McGehearty
1650*280575beSPatrick McGehearty	! Src is Byte aligned, move bytes until dest 64 byte aligned
1651*280575beSPatrick McGehearty.bc_unalnbyte:
1652*280575beSPatrick McGehearty	sub	%i1, %i0, %i1		! share pointer advance
1653*280575beSPatrick McGehearty.bc_unalnbyte_loop:
1654*280575beSPatrick McGehearty	ldub	[%i0], %o4
1655*280575beSPatrick McGehearty	sllx	%o4, 56, %i3
1656*280575beSPatrick McGehearty	lduh	[%i0+1], %o4
1657*280575beSPatrick McGehearty	sllx	%o4, 40, %o4
1658*280575beSPatrick McGehearty	or	%o4, %i3, %i3
1659*280575beSPatrick McGehearty	lduh	[%i0+3], %o4
1660*280575beSPatrick McGehearty	sllx	%o4, 24, %o4
1661*280575beSPatrick McGehearty	or	%o4, %i3, %i3
1662*280575beSPatrick McGehearty	lduh	[%i0+5], %o4
1663*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
1664*280575beSPatrick McGehearty	or	%o4, %i3, %i3
1665*280575beSPatrick McGehearty	ldub	[%i0+7], %o4
1666*280575beSPatrick McGehearty	or	%o4, %i3, %i3
1667*280575beSPatrick McGehearty	stx	%i3, [%i1+%i0]
1668*280575beSPatrick McGehearty	subcc	%o3, 8, %o3
1669*280575beSPatrick McGehearty	bnz	%ncc, .bc_unalnbyte_loop
1670*280575beSPatrick McGehearty	add	%i0, 8, %i0
1671*280575beSPatrick McGehearty	add	%i1,%i0, %i1		! restore pointer
1672*280575beSPatrick McGehearty
1673*280575beSPatrick McGehearty	! Destination is now block (64 byte aligned), src is not 8 byte aligned
1674*280575beSPatrick McGehearty.bc_unalnsrc:
1675*280575beSPatrick McGehearty	andn	%i2, 0x3f, %i3		! %i3 is multiple of block size
1676*280575beSPatrick McGehearty	and	%i2, 0x3f, %i2		! residue bytes in %i2
1677*280575beSPatrick McGehearty	add	%i2, 64, %i2		! Insure we don't load beyond
1678*280575beSPatrick McGehearty	sub	%i3, 64, %i3		! end of source buffer
1679*280575beSPatrick McGehearty
1680*280575beSPatrick McGehearty	andn	%i0, 0x3f, %o4		! %o4 has block aligned src address
1681*280575beSPatrick McGehearty	prefetch [%o4 + (3 * CACHE_LINE)], #one_read
1682*280575beSPatrick McGehearty	alignaddr %i0, %g0, %g0		! generate %gsr
1683*280575beSPatrick McGehearty	add	%i0, %i3, %i0		! advance %i0 to after blocks
1684*280575beSPatrick McGehearty	!
1685*280575beSPatrick McGehearty	! Determine source alignment to correct 8 byte offset
1686*280575beSPatrick McGehearty	andcc	%i0, 0x20, %o3
1687*280575beSPatrick McGehearty	brnz,pn	%o3, .bc_unaln_1
1688*280575beSPatrick McGehearty	andcc	%i0, 0x10, %o3
1689*280575beSPatrick McGehearty	brnz,pn	%o3, .bc_unaln_01
1690*280575beSPatrick McGehearty	andcc	%i0, 0x08, %o3
1691*280575beSPatrick McGehearty	brz,a	%o3, .bc_unaln_000
1692*280575beSPatrick McGehearty	prefetch [%o4 + (4 * CACHE_LINE)], #one_read
1693*280575beSPatrick McGehearty	ba	.bc_unaln_001
1694*280575beSPatrick McGehearty	nop
1695*280575beSPatrick McGehearty.bc_unaln_01:
1696*280575beSPatrick McGehearty	brnz,a	%o3, .bc_unaln_011
1697*280575beSPatrick McGehearty	prefetch [%o4 + (4 * CACHE_LINE)], #one_read
1698*280575beSPatrick McGehearty	ba	.bc_unaln_010
1699*280575beSPatrick McGehearty	nop
1700*280575beSPatrick McGehearty.bc_unaln_1:
1701*280575beSPatrick McGehearty	brnz,pn	%o3, .bc_unaln_11
1702*280575beSPatrick McGehearty	andcc	%i0, 0x08, %o3
1703*280575beSPatrick McGehearty	brnz,a	%o3, .bc_unaln_101
1704*280575beSPatrick McGehearty	prefetch [%o4 + (4 * CACHE_LINE)], #one_read
1705*280575beSPatrick McGehearty	ba	.bc_unaln_100
1706*280575beSPatrick McGehearty	nop
1707*280575beSPatrick McGehearty.bc_unaln_11:
1708*280575beSPatrick McGehearty	brz,pn	%o3, .bc_unaln_110
1709*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
1710*280575beSPatrick McGehearty
1711*280575beSPatrick McGehearty.bc_unaln_111:
1712*280575beSPatrick McGehearty	ldd	[%o4+56], %d14
1713*280575beSPatrick McGehearty.bc_unaln_111_loop:
1714*280575beSPatrick McGehearty	add	%o4, 64, %o4
1715*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_P, %d16
1716*280575beSPatrick McGehearty	faligndata %d14, %d16, %d48
1717*280575beSPatrick McGehearty	faligndata %d16, %d18, %d50
1718*280575beSPatrick McGehearty	faligndata %d18, %d20, %d52
1719*280575beSPatrick McGehearty	faligndata %d20, %d22, %d54
1720*280575beSPatrick McGehearty	faligndata %d22, %d24, %d56
1721*280575beSPatrick McGehearty	faligndata %d24, %d26, %d58
1722*280575beSPatrick McGehearty	faligndata %d26, %d28, %d60
1723*280575beSPatrick McGehearty	faligndata %d28, %d30, %d62
1724*280575beSPatrick McGehearty	fmovd	%d30, %d14
1725*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_P
1726*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
1727*280575beSPatrick McGehearty	add	%i1, 64, %i1
1728*280575beSPatrick McGehearty	bgu,pt	%ncc, .bc_unaln_111_loop
1729*280575beSPatrick McGehearty	prefetch [%o4 + (4 * CACHE_LINE)], #one_read
1730*280575beSPatrick McGehearty	ba	.bc_unaln_done
1731*280575beSPatrick McGehearty	nop
1732*280575beSPatrick McGehearty
1733*280575beSPatrick McGehearty.bc_unaln_110:
1734*280575beSPatrick McGehearty	ldd	[%o4+48], %d12
1735*280575beSPatrick McGehearty	ldd	[%o4+56], %d14
1736*280575beSPatrick McGehearty.bc_unaln_110_loop:
1737*280575beSPatrick McGehearty	add	%o4, 64, %o4
1738*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_P, %d16
1739*280575beSPatrick McGehearty	faligndata %d12, %d14, %d48
1740*280575beSPatrick McGehearty	faligndata %d14, %d16, %d50
1741*280575beSPatrick McGehearty	faligndata %d16, %d18, %d52
1742*280575beSPatrick McGehearty	faligndata %d18, %d20, %d54
1743*280575beSPatrick McGehearty	faligndata %d20, %d22, %d56
1744*280575beSPatrick McGehearty	faligndata %d22, %d24, %d58
1745*280575beSPatrick McGehearty	faligndata %d24, %d26, %d60
1746*280575beSPatrick McGehearty	faligndata %d26, %d28, %d62
1747*280575beSPatrick McGehearty	fmovd	%d28, %d12
1748*280575beSPatrick McGehearty	fmovd	%d30, %d14
1749*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_P
1750*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
1751*280575beSPatrick McGehearty	add	%i1, 64, %i1
1752*280575beSPatrick McGehearty	bgu,pt	%ncc, .bc_unaln_110_loop
1753*280575beSPatrick McGehearty	prefetch [%o4 + (4 * CACHE_LINE)], #one_read
1754*280575beSPatrick McGehearty	ba	.bc_unaln_done
1755*280575beSPatrick McGehearty	nop
1756*280575beSPatrick McGehearty
1757*280575beSPatrick McGehearty.bc_unaln_101:
1758*280575beSPatrick McGehearty	ldd	[%o4+40], %d10
1759*280575beSPatrick McGehearty	ldd	[%o4+48], %d12
1760*280575beSPatrick McGehearty	ldd	[%o4+56], %d14
1761*280575beSPatrick McGehearty.bc_unaln_101_loop:
1762*280575beSPatrick McGehearty	add	%o4, 64, %o4
1763*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_P, %d16
1764*280575beSPatrick McGehearty	faligndata %d10, %d12, %d48
1765*280575beSPatrick McGehearty	faligndata %d12, %d14, %d50
1766*280575beSPatrick McGehearty	faligndata %d14, %d16, %d52
1767*280575beSPatrick McGehearty	faligndata %d16, %d18, %d54
1768*280575beSPatrick McGehearty	faligndata %d18, %d20, %d56
1769*280575beSPatrick McGehearty	faligndata %d20, %d22, %d58
1770*280575beSPatrick McGehearty	faligndata %d22, %d24, %d60
1771*280575beSPatrick McGehearty	faligndata %d24, %d26, %d62
1772*280575beSPatrick McGehearty	fmovd	%d26, %d10
1773*280575beSPatrick McGehearty	fmovd	%d28, %d12
1774*280575beSPatrick McGehearty	fmovd	%d30, %d14
1775*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_P
1776*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
1777*280575beSPatrick McGehearty	add	%i1, 64, %i1
1778*280575beSPatrick McGehearty	bgu,pt	%ncc, .bc_unaln_101_loop
1779*280575beSPatrick McGehearty	prefetch [%o4 + (4 * CACHE_LINE)], #one_read
1780*280575beSPatrick McGehearty	ba	.bc_unaln_done
1781*280575beSPatrick McGehearty	nop
1782*280575beSPatrick McGehearty
1783*280575beSPatrick McGehearty.bc_unaln_100:
1784*280575beSPatrick McGehearty	ldd	[%o4+32], %d8
1785*280575beSPatrick McGehearty	ldd	[%o4+40], %d10
1786*280575beSPatrick McGehearty	ldd	[%o4+48], %d12
1787*280575beSPatrick McGehearty	ldd	[%o4+56], %d14
1788*280575beSPatrick McGehearty.bc_unaln_100_loop:
1789*280575beSPatrick McGehearty	add	%o4, 64, %o4
1790*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_P, %d16
1791*280575beSPatrick McGehearty	faligndata %d8, %d10, %d48
1792*280575beSPatrick McGehearty	faligndata %d10, %d12, %d50
1793*280575beSPatrick McGehearty	faligndata %d12, %d14, %d52
1794*280575beSPatrick McGehearty	faligndata %d14, %d16, %d54
1795*280575beSPatrick McGehearty	faligndata %d16, %d18, %d56
1796*280575beSPatrick McGehearty	faligndata %d18, %d20, %d58
1797*280575beSPatrick McGehearty	faligndata %d20, %d22, %d60
1798*280575beSPatrick McGehearty	faligndata %d22, %d24, %d62
1799*280575beSPatrick McGehearty	fmovd	%d24, %d8
1800*280575beSPatrick McGehearty	fmovd	%d26, %d10
1801*280575beSPatrick McGehearty	fmovd	%d28, %d12
1802*280575beSPatrick McGehearty	fmovd	%d30, %d14
1803*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_P
1804*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
1805*280575beSPatrick McGehearty	add	%i1, 64, %i1
1806*280575beSPatrick McGehearty	bgu,pt	%ncc, .bc_unaln_100_loop
1807*280575beSPatrick McGehearty	prefetch [%o4 + (4 * CACHE_LINE)], #one_read
1808*280575beSPatrick McGehearty	ba	.bc_unaln_done
1809*280575beSPatrick McGehearty	nop
1810*280575beSPatrick McGehearty
1811*280575beSPatrick McGehearty.bc_unaln_011:
1812*280575beSPatrick McGehearty	ldd	[%o4+24], %d6
1813*280575beSPatrick McGehearty	ldd	[%o4+32], %d8
1814*280575beSPatrick McGehearty	ldd	[%o4+40], %d10
1815*280575beSPatrick McGehearty	ldd	[%o4+48], %d12
1816*280575beSPatrick McGehearty	ldd	[%o4+56], %d14
1817*280575beSPatrick McGehearty.bc_unaln_011_loop:
1818*280575beSPatrick McGehearty	add	%o4, 64, %o4
1819*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_P, %d16
1820*280575beSPatrick McGehearty	faligndata %d6, %d8, %d48
1821*280575beSPatrick McGehearty	faligndata %d8, %d10, %d50
1822*280575beSPatrick McGehearty	faligndata %d10, %d12, %d52
1823*280575beSPatrick McGehearty	faligndata %d12, %d14, %d54
1824*280575beSPatrick McGehearty	faligndata %d14, %d16, %d56
1825*280575beSPatrick McGehearty	faligndata %d16, %d18, %d58
1826*280575beSPatrick McGehearty	faligndata %d18, %d20, %d60
1827*280575beSPatrick McGehearty	faligndata %d20, %d22, %d62
1828*280575beSPatrick McGehearty	fmovd	%d22, %d6
1829*280575beSPatrick McGehearty	fmovd	%d24, %d8
1830*280575beSPatrick McGehearty	fmovd	%d26, %d10
1831*280575beSPatrick McGehearty	fmovd	%d28, %d12
1832*280575beSPatrick McGehearty	fmovd	%d30, %d14
1833*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_P
1834*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
1835*280575beSPatrick McGehearty	add	%i1, 64, %i1
1836*280575beSPatrick McGehearty	bgu,pt	%ncc, .bc_unaln_011_loop
1837*280575beSPatrick McGehearty	prefetch [%o4 + (4 * CACHE_LINE)], #one_read
1838*280575beSPatrick McGehearty	ba	.bc_unaln_done
1839*280575beSPatrick McGehearty	nop
1840*280575beSPatrick McGehearty
1841*280575beSPatrick McGehearty.bc_unaln_010:
1842*280575beSPatrick McGehearty	ldd	[%o4+16], %d4
1843*280575beSPatrick McGehearty	ldd	[%o4+24], %d6
1844*280575beSPatrick McGehearty	ldd	[%o4+32], %d8
1845*280575beSPatrick McGehearty	ldd	[%o4+40], %d10
1846*280575beSPatrick McGehearty	ldd	[%o4+48], %d12
1847*280575beSPatrick McGehearty	ldd	[%o4+56], %d14
1848*280575beSPatrick McGehearty.bc_unaln_010_loop:
1849*280575beSPatrick McGehearty	add	%o4, 64, %o4
1850*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_P, %d16
1851*280575beSPatrick McGehearty	faligndata %d4, %d6, %d48
1852*280575beSPatrick McGehearty	faligndata %d6, %d8, %d50
1853*280575beSPatrick McGehearty	faligndata %d8, %d10, %d52
1854*280575beSPatrick McGehearty	faligndata %d10, %d12, %d54
1855*280575beSPatrick McGehearty	faligndata %d12, %d14, %d56
1856*280575beSPatrick McGehearty	faligndata %d14, %d16, %d58
1857*280575beSPatrick McGehearty	faligndata %d16, %d18, %d60
1858*280575beSPatrick McGehearty	faligndata %d18, %d20, %d62
1859*280575beSPatrick McGehearty	fmovd	%d20, %d4
1860*280575beSPatrick McGehearty	fmovd	%d22, %d6
1861*280575beSPatrick McGehearty	fmovd	%d24, %d8
1862*280575beSPatrick McGehearty	fmovd	%d26, %d10
1863*280575beSPatrick McGehearty	fmovd	%d28, %d12
1864*280575beSPatrick McGehearty	fmovd	%d30, %d14
1865*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_P
1866*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
1867*280575beSPatrick McGehearty	add	%i1, 64, %i1
1868*280575beSPatrick McGehearty	bgu,pt	%ncc, .bc_unaln_010_loop
1869*280575beSPatrick McGehearty	prefetch [%o4 + (4 * CACHE_LINE)], #one_read
1870*280575beSPatrick McGehearty	ba	.bc_unaln_done
1871*280575beSPatrick McGehearty	nop
1872*280575beSPatrick McGehearty
1873*280575beSPatrick McGehearty.bc_unaln_001:
1874*280575beSPatrick McGehearty	ldd	[%o4+8], %d2
1875*280575beSPatrick McGehearty	ldd	[%o4+16], %d4
1876*280575beSPatrick McGehearty	ldd	[%o4+24], %d6
1877*280575beSPatrick McGehearty	ldd	[%o4+32], %d8
1878*280575beSPatrick McGehearty	ldd	[%o4+40], %d10
1879*280575beSPatrick McGehearty	ldd	[%o4+48], %d12
1880*280575beSPatrick McGehearty	ldd	[%o4+56], %d14
1881*280575beSPatrick McGehearty.bc_unaln_001_loop:
1882*280575beSPatrick McGehearty	add	%o4, 64, %o4
1883*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_P, %d16
1884*280575beSPatrick McGehearty	faligndata %d2, %d4, %d48
1885*280575beSPatrick McGehearty	faligndata %d4, %d6, %d50
1886*280575beSPatrick McGehearty	faligndata %d6, %d8, %d52
1887*280575beSPatrick McGehearty	faligndata %d8, %d10, %d54
1888*280575beSPatrick McGehearty	faligndata %d10, %d12, %d56
1889*280575beSPatrick McGehearty	faligndata %d12, %d14, %d58
1890*280575beSPatrick McGehearty	faligndata %d14, %d16, %d60
1891*280575beSPatrick McGehearty	faligndata %d16, %d18, %d62
1892*280575beSPatrick McGehearty	fmovd	%d18, %d2
1893*280575beSPatrick McGehearty	fmovd	%d20, %d4
1894*280575beSPatrick McGehearty	fmovd	%d22, %d6
1895*280575beSPatrick McGehearty	fmovd	%d24, %d8
1896*280575beSPatrick McGehearty	fmovd	%d26, %d10
1897*280575beSPatrick McGehearty	fmovd	%d28, %d12
1898*280575beSPatrick McGehearty	fmovd	%d30, %d14
1899*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_P
1900*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
1901*280575beSPatrick McGehearty	add	%i1, 64, %i1
1902*280575beSPatrick McGehearty	bgu,pt	%ncc, .bc_unaln_001_loop
1903*280575beSPatrick McGehearty	prefetch [%o4 + (4 * CACHE_LINE)], #one_read
1904*280575beSPatrick McGehearty	ba	.bc_unaln_done
1905*280575beSPatrick McGehearty	nop
1906*280575beSPatrick McGehearty
1907*280575beSPatrick McGehearty.bc_unaln_000:
1908*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_P, %d0
1909*280575beSPatrick McGehearty.bc_unaln_000_loop:
1910*280575beSPatrick McGehearty	add	%o4, 64, %o4
1911*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_P, %d16
1912*280575beSPatrick McGehearty	faligndata %d0, %d2, %d48
1913*280575beSPatrick McGehearty	faligndata %d2, %d4, %d50
1914*280575beSPatrick McGehearty	faligndata %d4, %d6, %d52
1915*280575beSPatrick McGehearty	faligndata %d6, %d8, %d54
1916*280575beSPatrick McGehearty	faligndata %d8, %d10, %d56
1917*280575beSPatrick McGehearty	faligndata %d10, %d12, %d58
1918*280575beSPatrick McGehearty	faligndata %d12, %d14, %d60
1919*280575beSPatrick McGehearty	faligndata %d14, %d16, %d62
1920*280575beSPatrick McGehearty	fmovd	%d16, %d0
1921*280575beSPatrick McGehearty	fmovd	%d18, %d2
1922*280575beSPatrick McGehearty	fmovd	%d20, %d4
1923*280575beSPatrick McGehearty	fmovd	%d22, %d6
1924*280575beSPatrick McGehearty	fmovd	%d24, %d8
1925*280575beSPatrick McGehearty	fmovd	%d26, %d10
1926*280575beSPatrick McGehearty	fmovd	%d28, %d12
1927*280575beSPatrick McGehearty	fmovd	%d30, %d14
1928*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_P
1929*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
1930*280575beSPatrick McGehearty	add	%i1, 64, %i1
1931*280575beSPatrick McGehearty	bgu,pt	%ncc, .bc_unaln_000_loop
1932*280575beSPatrick McGehearty	prefetch [%o4 + (4 * CACHE_LINE)], #one_read
1933*280575beSPatrick McGehearty
1934*280575beSPatrick McGehearty.bc_unaln_done:
1935*280575beSPatrick McGehearty	! Handle trailing bytes, 64 to 127
1936*280575beSPatrick McGehearty	! Dest long word aligned, Src not long word aligned
1937*280575beSPatrick McGehearty	cmp	%i2, 15
1938*280575beSPatrick McGehearty	bleu	%ncc, .bc_unaln_short
1939*280575beSPatrick McGehearty
1940*280575beSPatrick McGehearty	andn	%i2, 0x7, %i3		! %i3 is multiple of 8
1941*280575beSPatrick McGehearty	and	%i2, 0x7, %i2		! residue bytes in %i2
1942*280575beSPatrick McGehearty	add	%i2, 8, %i2
1943*280575beSPatrick McGehearty	sub	%i3, 8, %i3		! insure we don't load past end of src
1944*280575beSPatrick McGehearty	andn	%i0, 0x7, %o4		! %o4 has long word aligned src address
1945*280575beSPatrick McGehearty	add	%i0, %i3, %i0		! advance %i0 to after multiple of 8
1946*280575beSPatrick McGehearty	ldd	[%o4], %d0		! fetch partial word
1947*280575beSPatrick McGehearty.bc_unaln_by8:
1948*280575beSPatrick McGehearty	ldd	[%o4+8], %d2
1949*280575beSPatrick McGehearty	add	%o4, 8, %o4
1950*280575beSPatrick McGehearty	faligndata %d0, %d2, %d16
1951*280575beSPatrick McGehearty	subcc	%i3, 8, %i3
1952*280575beSPatrick McGehearty	std	%d16, [%i1]
1953*280575beSPatrick McGehearty	fmovd	%d2, %d0
1954*280575beSPatrick McGehearty	bgu,pt	%ncc, .bc_unaln_by8
1955*280575beSPatrick McGehearty	add	%i1, 8, %i1
1956*280575beSPatrick McGehearty
1957*280575beSPatrick McGehearty.bc_unaln_short:
1958*280575beSPatrick McGehearty	cmp	%i2, 8
1959*280575beSPatrick McGehearty	blt,pt	%ncc, .bc_unalnfin
1960*280575beSPatrick McGehearty	nop
1961*280575beSPatrick McGehearty	ldub	[%i0], %o4
1962*280575beSPatrick McGehearty	sll	%o4, 24, %o3
1963*280575beSPatrick McGehearty	ldub	[%i0+1], %o4
1964*280575beSPatrick McGehearty	sll	%o4, 16, %o4
1965*280575beSPatrick McGehearty	or	%o4, %o3, %o3
1966*280575beSPatrick McGehearty	ldub	[%i0+2], %o4
1967*280575beSPatrick McGehearty	sll	%o4, 8, %o4
1968*280575beSPatrick McGehearty	or	%o4, %o3, %o3
1969*280575beSPatrick McGehearty	ldub	[%i0+3], %o4
1970*280575beSPatrick McGehearty	or	%o4, %o3, %o3
1971*280575beSPatrick McGehearty	stw	%o3, [%i1]
1972*280575beSPatrick McGehearty	ldub	[%i0+4], %o4
1973*280575beSPatrick McGehearty	sll	%o4, 24, %o3
1974*280575beSPatrick McGehearty	ldub	[%i0+5], %o4
1975*280575beSPatrick McGehearty	sll	%o4, 16, %o4
1976*280575beSPatrick McGehearty	or	%o4, %o3, %o3
1977*280575beSPatrick McGehearty	ldub	[%i0+6], %o4
1978*280575beSPatrick McGehearty	sll	%o4, 8, %o4
1979*280575beSPatrick McGehearty	or	%o4, %o3, %o3
1980*280575beSPatrick McGehearty	ldub	[%i0+7], %o4
1981*280575beSPatrick McGehearty	or	%o4, %o3, %o3
1982*280575beSPatrick McGehearty	stw	%o3, [%i1+4]
1983*280575beSPatrick McGehearty	add	%i0, 8, %i0
1984*280575beSPatrick McGehearty	add	%i1, 8, %i1
1985*280575beSPatrick McGehearty	sub	%i2, 8, %i2
1986*280575beSPatrick McGehearty.bc_unalnfin:
1987*280575beSPatrick McGehearty	cmp	%i2, 4
1988*280575beSPatrick McGehearty	blt,pt	%ncc, .bc_unalnz
1989*280575beSPatrick McGehearty	tst	%i2
1990*280575beSPatrick McGehearty	ldub	[%i0], %o3		! read byte
1991*280575beSPatrick McGehearty	subcc	%i2, 4, %i2		! reduce count by 4
1992*280575beSPatrick McGehearty	sll	%o3, 24, %o3		! position
1993*280575beSPatrick McGehearty	ldub	[%i0+1], %o4
1994*280575beSPatrick McGehearty	sll	%o4, 16, %o4		! position
1995*280575beSPatrick McGehearty	or	%o4, %o3, %o3		! merge
1996*280575beSPatrick McGehearty	ldub	[%i0+2], %o4
1997*280575beSPatrick McGehearty	sll	%o4, 8, %o4		! position
1998*280575beSPatrick McGehearty	or	%o4, %o3, %o3		! merge
1999*280575beSPatrick McGehearty	add	%i1, 4, %i1		! advance dst by 4
2000*280575beSPatrick McGehearty	ldub	[%i0+3], %o4
2001*280575beSPatrick McGehearty	add	%i0, 4, %i0		! advance src by 4
2002*280575beSPatrick McGehearty	or	%o4, %o3, %o4		! merge
2003*280575beSPatrick McGehearty	bnz,pt	%ncc, .bc_unaln3x
2004*280575beSPatrick McGehearty	stw	%o4, [%i1-4]
2005*280575beSPatrick McGehearty	ba	.bc_exit
2006*280575beSPatrick McGehearty	nop
2007*280575beSPatrick McGehearty.bc_unalnz:
2008*280575beSPatrick McGehearty	bz,pt	%ncc, .bc_exit
2009*280575beSPatrick McGehearty.bc_unaln3x:				! Exactly 1, 2, or 3 bytes remain
2010*280575beSPatrick McGehearty	subcc	%i2, 1, %i2		! reduce count for cc test
2011*280575beSPatrick McGehearty	ldub	[%i0], %o4		! load one byte
2012*280575beSPatrick McGehearty	bz,pt	%ncc, .bc_exit
2013*280575beSPatrick McGehearty	stb	%o4, [%i1]		! store one byte
2014*280575beSPatrick McGehearty	ldub	[%i0+1], %o4		! load second byte
2015*280575beSPatrick McGehearty	subcc	%i2, 1, %i2
2016*280575beSPatrick McGehearty	bz,pt	%ncc, .bc_exit
2017*280575beSPatrick McGehearty	stb	%o4, [%i1+1]		! store second byte
2018*280575beSPatrick McGehearty	ldub	[%i0+2], %o4		! load third byte
2019*280575beSPatrick McGehearty	stb	%o4, [%i1+2]		! store third byte
2020*280575beSPatrick McGehearty.bc_exit:
2021*280575beSPatrick McGehearty	wr	%l5, %g0, %gsr		! restore %gsr
2022*280575beSPatrick McGehearty	brnz	%g5, .bc_fp_restore
2023*280575beSPatrick McGehearty	and	%o5, COPY_FLAGS, %l1	! save flags in %l1
2024*280575beSPatrick McGehearty	FZERO
2025*280575beSPatrick McGehearty	wr	%g5, %g0, %fprs
2026*280575beSPatrick McGehearty	ba,pt	%ncc, .bc_ex2
2027*280575beSPatrick McGehearty	nop
2028*280575beSPatrick McGehearty.bc_fp_restore:
2029*280575beSPatrick McGehearty	BLD_FP_FROMSTACK(%o4)
2030*280575beSPatrick McGehearty.bc_ex2:
2031*280575beSPatrick McGehearty	ldn	[THREAD_REG + T_LWP], %o2
2032*280575beSPatrick McGehearty	brnz,pt	%o2, 1f
2033*280575beSPatrick McGehearty	nop
2034*280575beSPatrick McGehearty
2035*280575beSPatrick McGehearty	ldsb	[THREAD_REG + T_PREEMPT], %l0
2036*280575beSPatrick McGehearty	deccc	%l0
2037*280575beSPatrick McGehearty	bnz,pn	%ncc, 1f
2038*280575beSPatrick McGehearty	stb	%l0, [THREAD_REG + T_PREEMPT]
2039*280575beSPatrick McGehearty
2040*280575beSPatrick McGehearty	! Check for a kernel preemption request
2041*280575beSPatrick McGehearty	ldn	[THREAD_REG + T_CPU], %l0
2042*280575beSPatrick McGehearty	ldub	[%l0 + CPU_KPRUNRUN], %l0
2043*280575beSPatrick McGehearty	brnz,a,pt	%l0, 1f	! Need to call kpreempt?
2044*280575beSPatrick McGehearty	or	%l1, KPREEMPT_FLAG, %l1	! If so, set the flag
2045*280575beSPatrick McGehearty1:
2046*280575beSPatrick McGehearty	btst	LOFAULT_SET, %l1
2047*280575beSPatrick McGehearty	bz,pn	%icc, 3f
2048*280575beSPatrick McGehearty	andncc	%o5, COPY_FLAGS, %o5
2049*280575beSPatrick McGehearty	! Here via bcopy. Check to see if the handler was NULL.
2050*280575beSPatrick McGehearty	! If so, just return quietly. Otherwise, reset the
2051*280575beSPatrick McGehearty	! handler and return.
2052*280575beSPatrick McGehearty	bz,pn %ncc, 2f
2053*280575beSPatrick McGehearty	nop
2054*280575beSPatrick McGehearty	membar	#Sync
2055*280575beSPatrick McGehearty	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
2056*280575beSPatrick McGehearty2:
2057*280575beSPatrick McGehearty	btst	KPREEMPT_FLAG, %l1
2058*280575beSPatrick McGehearty	bz,pt	%icc, 3f
2059*280575beSPatrick McGehearty	nop
2060*280575beSPatrick McGehearty	call	kpreempt
2061*280575beSPatrick McGehearty	rdpr	%pil, %o0		! pass %pil
2062*280575beSPatrick McGehearty3:
2063*280575beSPatrick McGehearty	ret
2064*280575beSPatrick McGehearty	restore	%g0, 0, %o0
2065*280575beSPatrick McGehearty
2066*280575beSPatrick McGehearty	SET_SIZE(bcopy_more)
2067*280575beSPatrick McGehearty
2068*280575beSPatrick McGehearty
2069340af271Swh94709#else	/* NIAGARA_IMPL */
2070473b13d4Sae112802	save	%sp, -SA(MINFRAME), %sp
2071473b13d4Sae112802	clr	%o5			! flag LOFAULT_SET is not set for bcopy
20727c478bd9Sstevel@tonic-gate.do_copy:
20737c478bd9Sstevel@tonic-gate	cmp	%i2, 12			! for small counts
20747c478bd9Sstevel@tonic-gate	blu	%ncc, .bytecp		! just copy bytes
20757c478bd9Sstevel@tonic-gate	.empty
20767c478bd9Sstevel@tonic-gate
20777c478bd9Sstevel@tonic-gate	cmp	%i2, 128		! for less than 128 bytes
20787c478bd9Sstevel@tonic-gate	blu,pn	%ncc, .bcb_punt		! no block st/quad ld
20797c478bd9Sstevel@tonic-gate	nop
20807c478bd9Sstevel@tonic-gate
20817c478bd9Sstevel@tonic-gate	set	use_hw_bcopy, %o2
20827c478bd9Sstevel@tonic-gate	ld	[%o2], %o2
2083340af271Swh94709	brz,pn	%o2, .bcb_punt
20847c478bd9Sstevel@tonic-gate	nop
20857c478bd9Sstevel@tonic-gate
20867c478bd9Sstevel@tonic-gate	subcc	%i1, %i0, %i3
20877c478bd9Sstevel@tonic-gate	bneg,a,pn %ncc, 1f
20887c478bd9Sstevel@tonic-gate	neg	%i3
20897c478bd9Sstevel@tonic-gate1:
20907c478bd9Sstevel@tonic-gate	/*
20917c478bd9Sstevel@tonic-gate	 * Compare against 256 since we should be checking block addresses
20927c478bd9Sstevel@tonic-gate	 * and (dest & ~63) - (src & ~63) can be 3 blocks even if
20937c478bd9Sstevel@tonic-gate	 * src = dest + (64 * 3) + 63.
20947c478bd9Sstevel@tonic-gate	 */
20957c478bd9Sstevel@tonic-gate	cmp	%i3, 256
20967c478bd9Sstevel@tonic-gate	blu,pn	%ncc, .bcb_punt
20977c478bd9Sstevel@tonic-gate	nop
20987c478bd9Sstevel@tonic-gate
20997c478bd9Sstevel@tonic-gate	/*
21007c478bd9Sstevel@tonic-gate	 * Copy that reach here have at least 2 blocks of data to copy.
21017c478bd9Sstevel@tonic-gate	 */
21027c478bd9Sstevel@tonic-gate.do_blockcopy:
21037c478bd9Sstevel@tonic-gate	! Swap src/dst since the code below is memcpy code
21047c478bd9Sstevel@tonic-gate	! and memcpy/bcopy have different calling sequences
21057c478bd9Sstevel@tonic-gate	mov	%i1, %i5
21067c478bd9Sstevel@tonic-gate	mov	%i0, %i1
21077c478bd9Sstevel@tonic-gate	mov	%i5, %i0
21087c478bd9Sstevel@tonic-gate
2109340af271Swh94709	! Block (64 bytes) align the destination.
21107c478bd9Sstevel@tonic-gate	andcc	%i0, 0x3f, %i3		! is dst aligned on a 64 bytes
21117c478bd9Sstevel@tonic-gate	bz	%xcc, .chksrc		! dst is already double aligned
21127c478bd9Sstevel@tonic-gate	sub	%i3, 0x40, %i3
21137c478bd9Sstevel@tonic-gate	neg	%i3			! bytes till dst 64 bytes aligned
21147c478bd9Sstevel@tonic-gate	sub	%i2, %i3, %i2		! update i2 with new count
21157c478bd9Sstevel@tonic-gate
2116340af271Swh94709	! Based on source and destination alignment do
2117340af271Swh94709	! either 8 bytes, 4 bytes, 2 bytes or byte copy.
2118340af271Swh94709
2119340af271Swh94709	! Is dst & src 8B aligned
2120340af271Swh94709	or	%i0, %i1, %o2
2121340af271Swh94709	andcc	%o2, 0x7, %g0
2122340af271Swh94709	bz	%ncc, .alewdcp
2123340af271Swh94709	nop
2124340af271Swh94709
2125340af271Swh94709	! Is dst & src 4B aligned
2126340af271Swh94709	andcc	%o2, 0x3, %g0
2127340af271Swh94709	bz	%ncc, .alwdcp
2128340af271Swh94709	nop
2129340af271Swh94709
2130340af271Swh94709	! Is dst & src 2B aligned
2131340af271Swh94709	andcc	%o2, 0x1, %g0
2132340af271Swh94709	bz	%ncc, .alhlfwdcp
2133340af271Swh94709	nop
2134340af271Swh94709
2135340af271Swh94709	! 1B aligned
2136340af271Swh947091:	ldub	[%i1], %o2
2137340af271Swh94709	stb	%o2, [%i0]
21387c478bd9Sstevel@tonic-gate	inc	%i1
21397c478bd9Sstevel@tonic-gate	deccc	%i3
2140340af271Swh94709	bgu,pt	%ncc, 1b
21417c478bd9Sstevel@tonic-gate	inc	%i0
21427c478bd9Sstevel@tonic-gate
2143340af271Swh94709	ba	.chksrc
2144340af271Swh94709	nop
2145340af271Swh94709
2146340af271Swh94709	! dst & src 4B aligned
2147340af271Swh94709.alwdcp:
2148340af271Swh94709	ld	[%i1], %o2
2149340af271Swh94709	st	%o2, [%i0]
2150340af271Swh94709	add	%i1, 0x4, %i1
2151340af271Swh94709	subcc	%i3, 0x4, %i3
2152340af271Swh94709	bgu,pt	%ncc, .alwdcp
2153340af271Swh94709	add	%i0, 0x4, %i0
2154340af271Swh94709
2155340af271Swh94709	ba	.chksrc
2156340af271Swh94709	nop
2157340af271Swh94709
2158340af271Swh94709	! dst & src 2B aligned
2159340af271Swh94709.alhlfwdcp:
2160340af271Swh94709	lduh	[%i1], %o2
2161340af271Swh94709	stuh	%o2, [%i0]
2162340af271Swh94709	add	%i1, 0x2, %i1
2163340af271Swh94709	subcc	%i3, 0x2, %i3
2164340af271Swh94709	bgu,pt	%ncc, .alhlfwdcp
2165340af271Swh94709	add	%i0, 0x2, %i0
2166340af271Swh94709
2167340af271Swh94709	ba	.chksrc
2168340af271Swh94709	nop
2169340af271Swh94709
2170340af271Swh94709	! dst & src 8B aligned
2171340af271Swh94709.alewdcp:
2172340af271Swh94709	ldx	[%i1], %o2
2173340af271Swh94709	stx	%o2, [%i0]
2174340af271Swh94709	add	%i1, 0x8, %i1
2175340af271Swh94709	subcc	%i3, 0x8, %i3
2176340af271Swh94709	bgu,pt	%ncc, .alewdcp
2177340af271Swh94709	add	%i0, 0x8, %i0
2178340af271Swh94709
21797c478bd9Sstevel@tonic-gate	! Now Destination is block (64 bytes) aligned
21807c478bd9Sstevel@tonic-gate.chksrc:
21817c478bd9Sstevel@tonic-gate	andn	%i2, 0x3f, %i3		! %i3 count is multiple of block size
21827c478bd9Sstevel@tonic-gate	sub	%i2, %i3, %i2		! Residue bytes in %i2
21837c478bd9Sstevel@tonic-gate
21847c478bd9Sstevel@tonic-gate	mov	ASI_BLK_INIT_ST_QUAD_LDD_P, %asi
21857c478bd9Sstevel@tonic-gate
21867c478bd9Sstevel@tonic-gate	andcc	%i1, 0xf, %o2		! is src quadword aligned
21877c478bd9Sstevel@tonic-gate	bz,pn	%xcc, .blkcpy		! src offset in %o2
21887c478bd9Sstevel@tonic-gate	nop
21897c478bd9Sstevel@tonic-gate	cmp	%o2, 0x8
21907c478bd9Sstevel@tonic-gate	bg	.cpy_upper_double
21917c478bd9Sstevel@tonic-gate	nop
21927c478bd9Sstevel@tonic-gate	bl	.cpy_lower_double
21937c478bd9Sstevel@tonic-gate	nop
21947c478bd9Sstevel@tonic-gate
21957c478bd9Sstevel@tonic-gate	! Falls through when source offset is equal to 8 i.e.
21967c478bd9Sstevel@tonic-gate	! source is double word aligned.
21977c478bd9Sstevel@tonic-gate	! In this case no shift/merge of data is required
21987c478bd9Sstevel@tonic-gate	sub	%i1, %o2, %i1		! align the src at 16 bytes.
21997c478bd9Sstevel@tonic-gate	andn	%i1, 0x3f, %l0		! %l0 has block aligned source
22007c478bd9Sstevel@tonic-gate	prefetch [%l0+0x0], #one_read
22017c478bd9Sstevel@tonic-gate	ldda	[%i1+0x0]%asi, %l2
22027c478bd9Sstevel@tonic-gateloop0:
22037c478bd9Sstevel@tonic-gate	ldda	[%i1+0x10]%asi, %l4
22047c478bd9Sstevel@tonic-gate	prefetch [%l0+0x40], #one_read
22057c478bd9Sstevel@tonic-gate
22067c478bd9Sstevel@tonic-gate	stxa	%l3, [%i0+0x0]%asi
22077c478bd9Sstevel@tonic-gate	stxa	%l4, [%i0+0x8]%asi
22087c478bd9Sstevel@tonic-gate
22097c478bd9Sstevel@tonic-gate	ldda	[%i1+0x20]%asi, %l2
22107c478bd9Sstevel@tonic-gate	stxa	%l5, [%i0+0x10]%asi
22117c478bd9Sstevel@tonic-gate	stxa	%l2, [%i0+0x18]%asi
22127c478bd9Sstevel@tonic-gate
22137c478bd9Sstevel@tonic-gate	ldda	[%i1+0x30]%asi, %l4
22147c478bd9Sstevel@tonic-gate	stxa	%l3, [%i0+0x20]%asi
22157c478bd9Sstevel@tonic-gate	stxa	%l4, [%i0+0x28]%asi
22167c478bd9Sstevel@tonic-gate
22177c478bd9Sstevel@tonic-gate	ldda	[%i1+0x40]%asi, %l2
22187c478bd9Sstevel@tonic-gate	stxa	%l5, [%i0+0x30]%asi
22197c478bd9Sstevel@tonic-gate	stxa	%l2, [%i0+0x38]%asi
22207c478bd9Sstevel@tonic-gate
22217c478bd9Sstevel@tonic-gate	add	%l0, 0x40, %l0
22227c478bd9Sstevel@tonic-gate	add	%i1, 0x40, %i1
22237c478bd9Sstevel@tonic-gate	subcc	%i3, 0x40, %i3
22247c478bd9Sstevel@tonic-gate	bgu,pt	%xcc, loop0
22257c478bd9Sstevel@tonic-gate	add	%i0, 0x40, %i0
22267c478bd9Sstevel@tonic-gate	ba	.blkdone
22277c478bd9Sstevel@tonic-gate	add	%i1, %o2, %i1		! increment the source by src offset
22287c478bd9Sstevel@tonic-gate					! the src offset was stored in %o2
22297c478bd9Sstevel@tonic-gate
22307c478bd9Sstevel@tonic-gate.cpy_lower_double:
22317c478bd9Sstevel@tonic-gate	sub	%i1, %o2, %i1		! align the src at 16 bytes.
22327c478bd9Sstevel@tonic-gate	sll	%o2, 3, %o0		! %o0 left shift
22337c478bd9Sstevel@tonic-gate	mov	0x40, %o1
22347c478bd9Sstevel@tonic-gate	sub	%o1, %o0, %o1		! %o1 right shift = (64 - left shift)
22357c478bd9Sstevel@tonic-gate	andn	%i1, 0x3f, %l0		! %l0 has block aligned source
22367c478bd9Sstevel@tonic-gate	prefetch [%l0+0x0], #one_read
22377c478bd9Sstevel@tonic-gate	ldda	[%i1+0x0]%asi, %l2	! partial data in %l2 and %l3 has
22387c478bd9Sstevel@tonic-gate					! complete data
22397c478bd9Sstevel@tonic-gateloop1:
22407c478bd9Sstevel@tonic-gate	ldda	[%i1+0x10]%asi, %l4	! %l4 has partial data for this read.
22417c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6)	! merge %l2, %l3 and %l4
22427c478bd9Sstevel@tonic-gate							! into %l2 and %l3
22437c478bd9Sstevel@tonic-gate	prefetch [%l0+0x40], #one_read
22447c478bd9Sstevel@tonic-gate	stxa	%l2, [%i0+0x0]%asi
22457c478bd9Sstevel@tonic-gate	stxa	%l3, [%i0+0x8]%asi
22467c478bd9Sstevel@tonic-gate
22477c478bd9Sstevel@tonic-gate	ldda	[%i1+0x20]%asi, %l2
22487c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6)	! merge %l2 with %l5 and
22497c478bd9Sstevel@tonic-gate	stxa	%l4, [%i0+0x10]%asi			! %l4 from previous read
22507c478bd9Sstevel@tonic-gate	stxa	%l5, [%i0+0x18]%asi			! into %l4 and %l5
22517c478bd9Sstevel@tonic-gate
22527c478bd9Sstevel@tonic-gate	! Repeat the same for next 32 bytes.
22537c478bd9Sstevel@tonic-gate
22547c478bd9Sstevel@tonic-gate	ldda	[%i1+0x30]%asi, %l4
22557c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6)
22567c478bd9Sstevel@tonic-gate	stxa	%l2, [%i0+0x20]%asi
22577c478bd9Sstevel@tonic-gate	stxa	%l3, [%i0+0x28]%asi
22587c478bd9Sstevel@tonic-gate
22597c478bd9Sstevel@tonic-gate	ldda	[%i1+0x40]%asi, %l2
22607c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6)
22617c478bd9Sstevel@tonic-gate	stxa	%l4, [%i0+0x30]%asi
22627c478bd9Sstevel@tonic-gate	stxa	%l5, [%i0+0x38]%asi
22637c478bd9Sstevel@tonic-gate
22647c478bd9Sstevel@tonic-gate	add	%l0, 0x40, %l0
22657c478bd9Sstevel@tonic-gate	add	%i1, 0x40, %i1
22667c478bd9Sstevel@tonic-gate	subcc	%i3, 0x40, %i3
22677c478bd9Sstevel@tonic-gate	bgu,pt	%xcc, loop1
22687c478bd9Sstevel@tonic-gate	add	%i0, 0x40, %i0
22697c478bd9Sstevel@tonic-gate	ba	.blkdone
22707c478bd9Sstevel@tonic-gate	add	%i1, %o2, %i1		! increment the source by src offset
22717c478bd9Sstevel@tonic-gate					! the src offset was stored in %o2
22727c478bd9Sstevel@tonic-gate
22737c478bd9Sstevel@tonic-gate.cpy_upper_double:
22747c478bd9Sstevel@tonic-gate	sub	%i1, %o2, %i1		! align the src at 16 bytes.
22757c478bd9Sstevel@tonic-gate	mov	0x8, %o0
22767c478bd9Sstevel@tonic-gate	sub	%o2, %o0, %o0
22777c478bd9Sstevel@tonic-gate	sll	%o0, 3, %o0		! %o0 left shift
22787c478bd9Sstevel@tonic-gate	mov	0x40, %o1
22797c478bd9Sstevel@tonic-gate	sub	%o1, %o0, %o1		! %o1 right shift = (64 - left shift)
22807c478bd9Sstevel@tonic-gate	andn	%i1, 0x3f, %l0		! %l0 has block aligned source
22817c478bd9Sstevel@tonic-gate	prefetch [%l0+0x0], #one_read
22827c478bd9Sstevel@tonic-gate	ldda	[%i1+0x0]%asi, %l2	! partial data in %l3 for this read and
22837c478bd9Sstevel@tonic-gate					! no data in %l2
22847c478bd9Sstevel@tonic-gateloop2:
22857c478bd9Sstevel@tonic-gate	ldda	[%i1+0x10]%asi, %l4	! %l4 has complete data and %l5 has
22867c478bd9Sstevel@tonic-gate					! partial
22877c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6)	! merge %l3, %l4 and %l5
22887c478bd9Sstevel@tonic-gate							! into %l3 and %l4
22897c478bd9Sstevel@tonic-gate	prefetch [%l0+0x40], #one_read
22907c478bd9Sstevel@tonic-gate	stxa	%l3, [%i0+0x0]%asi
22917c478bd9Sstevel@tonic-gate	stxa	%l4, [%i0+0x8]%asi
22927c478bd9Sstevel@tonic-gate
22937c478bd9Sstevel@tonic-gate	ldda	[%i1+0x20]%asi, %l2
22947c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6)	! merge %l2 and %l3 with
22957c478bd9Sstevel@tonic-gate	stxa	%l5, [%i0+0x10]%asi			! %l5 from previous read
22967c478bd9Sstevel@tonic-gate	stxa	%l2, [%i0+0x18]%asi			! into %l5 and %l2
22977c478bd9Sstevel@tonic-gate
22987c478bd9Sstevel@tonic-gate	! Repeat the same for next 32 bytes.
22997c478bd9Sstevel@tonic-gate
23007c478bd9Sstevel@tonic-gate	ldda	[%i1+0x30]%asi, %l4
23017c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6)
23027c478bd9Sstevel@tonic-gate	stxa	%l3, [%i0+0x20]%asi
23037c478bd9Sstevel@tonic-gate	stxa	%l4, [%i0+0x28]%asi
23047c478bd9Sstevel@tonic-gate
23057c478bd9Sstevel@tonic-gate	ldda	[%i1+0x40]%asi, %l2
23067c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6)
23077c478bd9Sstevel@tonic-gate	stxa	%l5, [%i0+0x30]%asi
23087c478bd9Sstevel@tonic-gate	stxa	%l2, [%i0+0x38]%asi
23097c478bd9Sstevel@tonic-gate
23107c478bd9Sstevel@tonic-gate	add	%l0, 0x40, %l0
23117c478bd9Sstevel@tonic-gate	add	%i1, 0x40, %i1
23127c478bd9Sstevel@tonic-gate	subcc	%i3, 0x40, %i3
23137c478bd9Sstevel@tonic-gate	bgu,pt	%xcc, loop2
23147c478bd9Sstevel@tonic-gate	add	%i0, 0x40, %i0
23157c478bd9Sstevel@tonic-gate	ba	.blkdone
23167c478bd9Sstevel@tonic-gate	add	%i1, %o2, %i1		! increment the source by src offset
23177c478bd9Sstevel@tonic-gate					! the src offset was stored in %o2
23187c478bd9Sstevel@tonic-gate
23197c478bd9Sstevel@tonic-gate
23207c478bd9Sstevel@tonic-gate	! Both Source and Destination are block aligned.
23217c478bd9Sstevel@tonic-gate	! Do fast copy using ASI_BLK_INIT_ST_QUAD_LDD_P
23227c478bd9Sstevel@tonic-gate.blkcpy:
23237c478bd9Sstevel@tonic-gate	prefetch [%i1+0x0], #one_read
23247c478bd9Sstevel@tonic-gate1:
23257c478bd9Sstevel@tonic-gate	ldda	[%i1+0x0]%asi, %l0
23267c478bd9Sstevel@tonic-gate	ldda	[%i1+0x10]%asi, %l2
23277c478bd9Sstevel@tonic-gate	prefetch [%i1+0x40], #one_read
23287c478bd9Sstevel@tonic-gate
23297c478bd9Sstevel@tonic-gate	stxa	%l0, [%i0+0x0]%asi
23307c478bd9Sstevel@tonic-gate	ldda	[%i1+0x20]%asi, %l4
23317c478bd9Sstevel@tonic-gate	ldda	[%i1+0x30]%asi, %l6
23327c478bd9Sstevel@tonic-gate
23337c478bd9Sstevel@tonic-gate	stxa	%l1, [%i0+0x8]%asi
23347c478bd9Sstevel@tonic-gate	stxa	%l2, [%i0+0x10]%asi
23357c478bd9Sstevel@tonic-gate	stxa	%l3, [%i0+0x18]%asi
23367c478bd9Sstevel@tonic-gate	stxa	%l4, [%i0+0x20]%asi
23377c478bd9Sstevel@tonic-gate	stxa	%l5, [%i0+0x28]%asi
23387c478bd9Sstevel@tonic-gate	stxa	%l6, [%i0+0x30]%asi
23397c478bd9Sstevel@tonic-gate	stxa	%l7, [%i0+0x38]%asi
23407c478bd9Sstevel@tonic-gate
23417c478bd9Sstevel@tonic-gate	add	%i1, 0x40, %i1
23427c478bd9Sstevel@tonic-gate	subcc	%i3, 0x40, %i3
23437c478bd9Sstevel@tonic-gate	bgu,pt	%xcc, 1b
23447c478bd9Sstevel@tonic-gate	add	%i0, 0x40, %i0
23457c478bd9Sstevel@tonic-gate
23467c478bd9Sstevel@tonic-gate.blkdone:
2347340af271Swh94709	membar	#Sync
2348340af271Swh94709
2349340af271Swh94709	brz,pt	%i2, .blkexit
2350340af271Swh94709	nop
2351340af271Swh94709
2352340af271Swh94709	! Handle trailing bytes
2353340af271Swh94709	cmp	%i2, 0x8
2354340af271Swh94709	blu,pt	%ncc, .residue
2355340af271Swh94709	nop
2356340af271Swh94709
2357340af271Swh94709	! Can we do some 8B ops
2358340af271Swh94709	or	%i1, %i0, %o2
2359340af271Swh94709	andcc	%o2, 0x7, %g0
2360340af271Swh94709	bnz	%ncc, .last4
2361340af271Swh94709	nop
2362340af271Swh94709
2363340af271Swh94709	! Do 8byte ops as long as possible
2364340af271Swh94709.last8:
2365340af271Swh94709	ldx	[%i1], %o2
2366340af271Swh94709	stx	%o2, [%i0]
2367340af271Swh94709	add	%i1, 0x8, %i1
2368340af271Swh94709	sub	%i2, 0x8, %i2
2369340af271Swh94709	cmp	%i2, 0x8
2370340af271Swh94709	bgu,pt	%ncc, .last8
2371340af271Swh94709	add	%i0, 0x8, %i0
2372340af271Swh94709
2373340af271Swh94709	brz,pt	%i2, .blkexit
2374340af271Swh94709	nop
2375340af271Swh94709
2376340af271Swh94709	ba	.residue
2377340af271Swh94709	nop
2378340af271Swh94709
2379340af271Swh94709.last4:
2380340af271Swh94709	! Can we do 4B ops
2381340af271Swh94709	andcc	%o2, 0x3, %g0
2382340af271Swh94709	bnz	%ncc, .last2
2383340af271Swh94709	nop
2384340af271Swh947091:
2385340af271Swh94709	ld	[%i1], %o2
2386340af271Swh94709	st	%o2, [%i0]
2387340af271Swh94709	add	%i1, 0x4, %i1
2388340af271Swh94709	sub	%i2, 0x4, %i2
2389340af271Swh94709	cmp	%i2, 0x4
2390340af271Swh94709	bgu,pt	%ncc, 1b
2391340af271Swh94709	add	%i0, 0x4, %i0
2392340af271Swh94709
2393340af271Swh94709	brz,pt	%i2, .blkexit
2394340af271Swh94709	nop
2395340af271Swh94709
2396340af271Swh94709	ba	.residue
2397340af271Swh94709	nop
2398340af271Swh94709
2399340af271Swh94709.last2:
2400340af271Swh94709	! Can we do 2B ops
2401340af271Swh94709	andcc	%o2, 0x1, %g0
2402340af271Swh94709	bnz	%ncc, .residue
2403340af271Swh94709	nop
2404340af271Swh94709
2405340af271Swh947091:
2406340af271Swh94709	lduh	[%i1], %o2
2407340af271Swh94709	stuh	%o2, [%i0]
2408340af271Swh94709	add	%i1, 0x2, %i1
2409340af271Swh94709	sub	%i2, 0x2, %i2
2410340af271Swh94709	cmp	%i2, 0x2
2411340af271Swh94709	bgu,pt	%ncc, 1b
2412340af271Swh94709	add	%i0, 0x2, %i0
2413340af271Swh94709
2414340af271Swh94709	brz,pt	%i2, .blkexit
24157c478bd9Sstevel@tonic-gate	nop
24167c478bd9Sstevel@tonic-gate
24177c478bd9Sstevel@tonic-gate.residue:
2418340af271Swh94709	ldub	[%i1], %o2
2419340af271Swh94709	stb	%o2, [%i0]
24207c478bd9Sstevel@tonic-gate	inc	%i1
24217c478bd9Sstevel@tonic-gate	deccc	%i2
2422340af271Swh94709	bgu,pt	%ncc, .residue
24237c478bd9Sstevel@tonic-gate	inc	%i0
24247c478bd9Sstevel@tonic-gate
24257c478bd9Sstevel@tonic-gate.blkexit:
2426340af271Swh94709
2427473b13d4Sae112802	membar	#Sync				! sync error barrier
2428473b13d4Sae112802	! Restore t_lofault handler, if came here from kcopy().
2429473b13d4Sae112802	tst	%o5
2430473b13d4Sae112802	bz	%ncc, 1f
2431473b13d4Sae112802	andn	%o5, LOFAULT_SET, %o5
2432473b13d4Sae112802	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
2433473b13d4Sae1128021:
24347c478bd9Sstevel@tonic-gate	ret
24357c478bd9Sstevel@tonic-gate	restore	%g0, 0, %o0
24367c478bd9Sstevel@tonic-gate
2437*280575beSPatrick McGehearty
24387c478bd9Sstevel@tonic-gate.bcb_punt:
24397c478bd9Sstevel@tonic-gate	!
24407c478bd9Sstevel@tonic-gate	! use aligned transfers where possible
24417c478bd9Sstevel@tonic-gate	!
24427c478bd9Sstevel@tonic-gate	xor	%i0, %i1, %o4		! xor from and to address
24437c478bd9Sstevel@tonic-gate	btst	7, %o4			! if lower three bits zero
24447c478bd9Sstevel@tonic-gate	bz	.aldoubcp		! can align on double boundary
24457c478bd9Sstevel@tonic-gate	.empty	! assembler complaints about label
24467c478bd9Sstevel@tonic-gate
24477c478bd9Sstevel@tonic-gate	xor	%i0, %i1, %o4		! xor from and to address
24487c478bd9Sstevel@tonic-gate	btst	3, %o4			! if lower two bits zero
24497c478bd9Sstevel@tonic-gate	bz	.alwordcp		! can align on word boundary
24507c478bd9Sstevel@tonic-gate	btst	3, %i0			! delay slot, from address unaligned?
24517c478bd9Sstevel@tonic-gate	!
24527c478bd9Sstevel@tonic-gate	! use aligned reads and writes where possible
24537c478bd9Sstevel@tonic-gate	! this differs from wordcp in that it copes
24547c478bd9Sstevel@tonic-gate	! with odd alignment between source and destnation
24557c478bd9Sstevel@tonic-gate	! using word reads and writes with the proper shifts
24567c478bd9Sstevel@tonic-gate	! in between to align transfers to and from memory
24577c478bd9Sstevel@tonic-gate	! i0 - src address, i1 - dest address, i2 - count
24587c478bd9Sstevel@tonic-gate	! i3, i4 - tmps for used generating complete word
24597c478bd9Sstevel@tonic-gate	! i5 (word to write)
24607c478bd9Sstevel@tonic-gate	! l0 size in bits of upper part of source word (US)
24617c478bd9Sstevel@tonic-gate	! l1 size in bits of lower part of source word (LS = 32 - US)
24627c478bd9Sstevel@tonic-gate	! l2 size in bits of upper part of destination word (UD)
24637c478bd9Sstevel@tonic-gate	! l3 size in bits of lower part of destination word (LD = 32 - UD)
24647c478bd9Sstevel@tonic-gate	! l4 number of bytes leftover after aligned transfers complete
24657c478bd9Sstevel@tonic-gate	! l5 the number 32
24667c478bd9Sstevel@tonic-gate	!
24677c478bd9Sstevel@tonic-gate	mov	32, %l5			! load an oft-needed constant
24687c478bd9Sstevel@tonic-gate	bz	.align_dst_only
24697c478bd9Sstevel@tonic-gate	btst	3, %i1			! is destnation address aligned?
24707c478bd9Sstevel@tonic-gate	clr	%i4			! clear registers used in either case
24717c478bd9Sstevel@tonic-gate	bz	.align_src_only
24727c478bd9Sstevel@tonic-gate	clr	%l0
24737c478bd9Sstevel@tonic-gate	!
24747c478bd9Sstevel@tonic-gate	! both source and destination addresses are unaligned
24757c478bd9Sstevel@tonic-gate	!
24767c478bd9Sstevel@tonic-gate1:					! align source
24777c478bd9Sstevel@tonic-gate	ldub	[%i0], %i3		! read a byte from source address
24787c478bd9Sstevel@tonic-gate	add	%i0, 1, %i0		! increment source address
24797c478bd9Sstevel@tonic-gate	or	%i4, %i3, %i4		! or in with previous bytes (if any)
24807c478bd9Sstevel@tonic-gate	btst	3, %i0			! is source aligned?
24817c478bd9Sstevel@tonic-gate	add	%l0, 8, %l0		! increment size of upper source (US)
24827c478bd9Sstevel@tonic-gate	bnz,a	1b
24837c478bd9Sstevel@tonic-gate	sll	%i4, 8, %i4		! make room for next byte
24847c478bd9Sstevel@tonic-gate
24857c478bd9Sstevel@tonic-gate	sub	%l5, %l0, %l1		! generate shift left count (LS)
24867c478bd9Sstevel@tonic-gate	sll	%i4, %l1, %i4		! prepare to get rest
24877c478bd9Sstevel@tonic-gate	ld	[%i0], %i3		! read a word
24887c478bd9Sstevel@tonic-gate	add	%i0, 4, %i0		! increment source address
24897c478bd9Sstevel@tonic-gate	srl	%i3, %l0, %i5		! upper src bits into lower dst bits
24907c478bd9Sstevel@tonic-gate	or	%i4, %i5, %i5		! merge
24917c478bd9Sstevel@tonic-gate	mov	24, %l3			! align destination
24927c478bd9Sstevel@tonic-gate1:
24937c478bd9Sstevel@tonic-gate	srl	%i5, %l3, %i4		! prepare to write a single byte
24947c478bd9Sstevel@tonic-gate	stb	%i4, [%i1]		! write a byte
24957c478bd9Sstevel@tonic-gate	add	%i1, 1, %i1		! increment destination address
24967c478bd9Sstevel@tonic-gate	sub	%i2, 1, %i2		! decrement count
24977c478bd9Sstevel@tonic-gate	btst	3, %i1			! is destination aligned?
24987c478bd9Sstevel@tonic-gate	bnz,a	1b
24997c478bd9Sstevel@tonic-gate	sub	%l3, 8, %l3		! delay slot, decrement shift count (LD)
25007c478bd9Sstevel@tonic-gate	sub	%l5, %l3, %l2		! generate shift left count (UD)
25017c478bd9Sstevel@tonic-gate	sll	%i5, %l2, %i5		! move leftover into upper bytes
25027c478bd9Sstevel@tonic-gate	cmp	%l2, %l0		! cmp # reqd to fill dst w old src left
25037c478bd9Sstevel@tonic-gate	bgu	%ncc, .more_needed	! need more to fill than we have
25047c478bd9Sstevel@tonic-gate	nop
25057c478bd9Sstevel@tonic-gate
25067c478bd9Sstevel@tonic-gate	sll	%i3, %l1, %i3		! clear upper used byte(s)
25077c478bd9Sstevel@tonic-gate	srl	%i3, %l1, %i3
25087c478bd9Sstevel@tonic-gate	! get the odd bytes between alignments
25097c478bd9Sstevel@tonic-gate	sub	%l0, %l2, %l0		! regenerate shift count
25107c478bd9Sstevel@tonic-gate	sub	%l5, %l0, %l1		! generate new shift left count (LS)
25117c478bd9Sstevel@tonic-gate	and	%i2, 3, %l4		! must do remaining bytes if count%4 > 0
25127c478bd9Sstevel@tonic-gate	andn	%i2, 3, %i2		! # of aligned bytes that can be moved
25137c478bd9Sstevel@tonic-gate	srl	%i3, %l0, %i4
25147c478bd9Sstevel@tonic-gate	or	%i5, %i4, %i5
25157c478bd9Sstevel@tonic-gate	st	%i5, [%i1]		! write a word
25167c478bd9Sstevel@tonic-gate	subcc	%i2, 4, %i2		! decrement count
25177c478bd9Sstevel@tonic-gate	bz	%ncc, .unalign_out
25187c478bd9Sstevel@tonic-gate	add	%i1, 4, %i1		! increment destination address
25197c478bd9Sstevel@tonic-gate
25207c478bd9Sstevel@tonic-gate	b	2f
25217c478bd9Sstevel@tonic-gate	sll	%i3, %l1, %i5		! get leftover into upper bits
25227c478bd9Sstevel@tonic-gate.more_needed:
25237c478bd9Sstevel@tonic-gate	sll	%i3, %l0, %i3		! save remaining byte(s)
25247c478bd9Sstevel@tonic-gate	srl	%i3, %l0, %i3
25257c478bd9Sstevel@tonic-gate	sub	%l2, %l0, %l1		! regenerate shift count
25267c478bd9Sstevel@tonic-gate	sub	%l5, %l1, %l0		! generate new shift left count
25277c478bd9Sstevel@tonic-gate	sll	%i3, %l1, %i4		! move to fill empty space
25287c478bd9Sstevel@tonic-gate	b	3f
25297c478bd9Sstevel@tonic-gate	or	%i5, %i4, %i5		! merge to complete word
25307c478bd9Sstevel@tonic-gate	!
25317c478bd9Sstevel@tonic-gate	! the source address is aligned and destination is not
25327c478bd9Sstevel@tonic-gate	!
25337c478bd9Sstevel@tonic-gate.align_dst_only:
25347c478bd9Sstevel@tonic-gate	ld	[%i0], %i4		! read a word
25357c478bd9Sstevel@tonic-gate	add	%i0, 4, %i0		! increment source address
25367c478bd9Sstevel@tonic-gate	mov	24, %l0			! initial shift alignment count
25377c478bd9Sstevel@tonic-gate1:
25387c478bd9Sstevel@tonic-gate	srl	%i4, %l0, %i3		! prepare to write a single byte
25397c478bd9Sstevel@tonic-gate	stb	%i3, [%i1]		! write a byte
25407c478bd9Sstevel@tonic-gate	add	%i1, 1, %i1		! increment destination address
25417c478bd9Sstevel@tonic-gate	sub	%i2, 1, %i2		! decrement count
25427c478bd9Sstevel@tonic-gate	btst	3, %i1			! is destination aligned?
25437c478bd9Sstevel@tonic-gate	bnz,a	1b
25447c478bd9Sstevel@tonic-gate	sub	%l0, 8, %l0		! delay slot, decrement shift count
25457c478bd9Sstevel@tonic-gate.xfer:
25467c478bd9Sstevel@tonic-gate	sub	%l5, %l0, %l1		! generate shift left count
25477c478bd9Sstevel@tonic-gate	sll	%i4, %l1, %i5		! get leftover
25487c478bd9Sstevel@tonic-gate3:
25497c478bd9Sstevel@tonic-gate	and	%i2, 3, %l4		! must do remaining bytes if count%4 > 0
25507c478bd9Sstevel@tonic-gate	andn	%i2, 3, %i2		! # of aligned bytes that can be moved
25517c478bd9Sstevel@tonic-gate2:
25527c478bd9Sstevel@tonic-gate	ld	[%i0], %i3		! read a source word
25537c478bd9Sstevel@tonic-gate	add	%i0, 4, %i0		! increment source address
25547c478bd9Sstevel@tonic-gate	srl	%i3, %l0, %i4		! upper src bits into lower dst bits
25557c478bd9Sstevel@tonic-gate	or	%i5, %i4, %i5		! merge with upper dest bits (leftover)
25567c478bd9Sstevel@tonic-gate	st	%i5, [%i1]		! write a destination word
25577c478bd9Sstevel@tonic-gate	subcc	%i2, 4, %i2		! decrement count
25587c478bd9Sstevel@tonic-gate	bz	%ncc, .unalign_out	! check if done
25597c478bd9Sstevel@tonic-gate	add	%i1, 4, %i1		! increment destination address
25607c478bd9Sstevel@tonic-gate	b	2b			! loop
25617c478bd9Sstevel@tonic-gate	sll	%i3, %l1, %i5		! get leftover
25627c478bd9Sstevel@tonic-gate.unalign_out:
25637c478bd9Sstevel@tonic-gate	tst	%l4			! any bytes leftover?
25647c478bd9Sstevel@tonic-gate	bz	%ncc, .cpdone
25657c478bd9Sstevel@tonic-gate	.empty				! allow next instruction in delay slot
25667c478bd9Sstevel@tonic-gate1:
25677c478bd9Sstevel@tonic-gate	sub	%l0, 8, %l0		! decrement shift
25687c478bd9Sstevel@tonic-gate	srl	%i3, %l0, %i4		! upper src byte into lower dst byte
25697c478bd9Sstevel@tonic-gate	stb	%i4, [%i1]		! write a byte
25707c478bd9Sstevel@tonic-gate	subcc	%l4, 1, %l4		! decrement count
25717c478bd9Sstevel@tonic-gate	bz	%ncc, .cpdone		! done?
25727c478bd9Sstevel@tonic-gate	add	%i1, 1, %i1		! increment destination
25737c478bd9Sstevel@tonic-gate	tst	%l0			! any more previously read bytes
25747c478bd9Sstevel@tonic-gate	bnz	%ncc, 1b		! we have leftover bytes
25757c478bd9Sstevel@tonic-gate	mov	%l4, %i2		! delay slot, mv cnt where dbytecp wants
25767c478bd9Sstevel@tonic-gate	b	.dbytecp		! let dbytecp do the rest
25777c478bd9Sstevel@tonic-gate	sub	%i0, %i1, %i0		! i0 gets the difference of src and dst
25787c478bd9Sstevel@tonic-gate	!
25797c478bd9Sstevel@tonic-gate	! the destination address is aligned and the source is not
25807c478bd9Sstevel@tonic-gate	!
25817c478bd9Sstevel@tonic-gate.align_src_only:
25827c478bd9Sstevel@tonic-gate	ldub	[%i0], %i3		! read a byte from source address
25837c478bd9Sstevel@tonic-gate	add	%i0, 1, %i0		! increment source address
25847c478bd9Sstevel@tonic-gate	or	%i4, %i3, %i4		! or in with previous bytes (if any)
25857c478bd9Sstevel@tonic-gate	btst	3, %i0			! is source aligned?
25867c478bd9Sstevel@tonic-gate	add	%l0, 8, %l0		! increment shift count (US)
25877c478bd9Sstevel@tonic-gate	bnz,a	.align_src_only
25887c478bd9Sstevel@tonic-gate	sll	%i4, 8, %i4		! make room for next byte
25897c478bd9Sstevel@tonic-gate	b,a	.xfer
25907c478bd9Sstevel@tonic-gate	!
25917c478bd9Sstevel@tonic-gate	! if from address unaligned for double-word moves,
25927c478bd9Sstevel@tonic-gate	! move bytes till it is, if count is < 56 it could take
25937c478bd9Sstevel@tonic-gate	! longer to align the thing than to do the transfer
25947c478bd9Sstevel@tonic-gate	! in word size chunks right away
25957c478bd9Sstevel@tonic-gate	!
25967c478bd9Sstevel@tonic-gate.aldoubcp:
25977c478bd9Sstevel@tonic-gate	cmp	%i2, 56			! if count < 56, use wordcp, it takes
25987c478bd9Sstevel@tonic-gate	blu,a	%ncc, .alwordcp		! longer to align doubles than words
25997c478bd9Sstevel@tonic-gate	mov	3, %o0			! mask for word alignment
26007c478bd9Sstevel@tonic-gate	call	.alignit		! copy bytes until aligned
26017c478bd9Sstevel@tonic-gate	mov	7, %o0			! mask for double alignment
26027c478bd9Sstevel@tonic-gate	!
26037c478bd9Sstevel@tonic-gate	! source and destination are now double-word aligned
26047c478bd9Sstevel@tonic-gate	! i3 has aligned count returned by alignit
26057c478bd9Sstevel@tonic-gate	!
26067c478bd9Sstevel@tonic-gate	and	%i2, 7, %i2		! unaligned leftover count
26077c478bd9Sstevel@tonic-gate	sub	%i0, %i1, %i0		! i0 gets the difference of src and dst
26087c478bd9Sstevel@tonic-gate5:
26097c478bd9Sstevel@tonic-gate	ldx	[%i0+%i1], %o4		! read from address
26107c478bd9Sstevel@tonic-gate	stx	%o4, [%i1]		! write at destination address
26117c478bd9Sstevel@tonic-gate	subcc	%i3, 8, %i3		! dec count
26127c478bd9Sstevel@tonic-gate	bgu	%ncc, 5b
26137c478bd9Sstevel@tonic-gate	add	%i1, 8, %i1		! delay slot, inc to address
26147c478bd9Sstevel@tonic-gate	cmp	%i2, 4			! see if we can copy a word
26157c478bd9Sstevel@tonic-gate	blu	%ncc, .dbytecp		! if 3 or less bytes use bytecp
26167c478bd9Sstevel@tonic-gate	.empty
26177c478bd9Sstevel@tonic-gate	!
26187c478bd9Sstevel@tonic-gate	! for leftover bytes we fall into wordcp, if needed
26197c478bd9Sstevel@tonic-gate	!
26207c478bd9Sstevel@tonic-gate.wordcp:
26217c478bd9Sstevel@tonic-gate	and	%i2, 3, %i2		! unaligned leftover count
26227c478bd9Sstevel@tonic-gate5:
26237c478bd9Sstevel@tonic-gate	ld	[%i0+%i1], %o4		! read from address
26247c478bd9Sstevel@tonic-gate	st	%o4, [%i1]		! write at destination address
26257c478bd9Sstevel@tonic-gate	subcc	%i3, 4, %i3		! dec count
26267c478bd9Sstevel@tonic-gate	bgu	%ncc, 5b
26277c478bd9Sstevel@tonic-gate	add	%i1, 4, %i1		! delay slot, inc to address
26287c478bd9Sstevel@tonic-gate	b,a	.dbytecp
26297c478bd9Sstevel@tonic-gate
26307c478bd9Sstevel@tonic-gate	! we come here to align copies on word boundaries
26317c478bd9Sstevel@tonic-gate.alwordcp:
26327c478bd9Sstevel@tonic-gate	call	.alignit		! go word-align it
26337c478bd9Sstevel@tonic-gate	mov	3, %o0			! bits that must be zero to be aligned
26347c478bd9Sstevel@tonic-gate	b	.wordcp
26357c478bd9Sstevel@tonic-gate	sub	%i0, %i1, %i0		! i0 gets the difference of src and dst
26367c478bd9Sstevel@tonic-gate
26377c478bd9Sstevel@tonic-gate	!
26387c478bd9Sstevel@tonic-gate	! byte copy, works with any alignment
26397c478bd9Sstevel@tonic-gate	!
26407c478bd9Sstevel@tonic-gate.bytecp:
26417c478bd9Sstevel@tonic-gate	b	.dbytecp
26427c478bd9Sstevel@tonic-gate	sub	%i0, %i1, %i0		! i0 gets difference of src and dst
26437c478bd9Sstevel@tonic-gate
26447c478bd9Sstevel@tonic-gate	!
26457c478bd9Sstevel@tonic-gate	! differenced byte copy, works with any alignment
26467c478bd9Sstevel@tonic-gate	! assumes dest in %i1 and (source - dest) in %i0
26477c478bd9Sstevel@tonic-gate	!
26487c478bd9Sstevel@tonic-gate1:
26497c478bd9Sstevel@tonic-gate	stb	%o4, [%i1]		! write to address
26507c478bd9Sstevel@tonic-gate	inc	%i1			! inc to address
26517c478bd9Sstevel@tonic-gate.dbytecp:
26527c478bd9Sstevel@tonic-gate	deccc	%i2			! dec count
26537c478bd9Sstevel@tonic-gate	bgeu,a	%ncc, 1b		! loop till done
26547c478bd9Sstevel@tonic-gate	ldub	[%i0+%i1], %o4		! read from address
26557c478bd9Sstevel@tonic-gate.cpdone:
2656*280575beSPatrick McGehearty
26577c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
2658473b13d4Sae112802	! Restore t_lofault handler, if came here from kcopy().
2659473b13d4Sae112802	tst	%o5
2660473b13d4Sae112802	bz	%ncc, 1f
2661473b13d4Sae112802	andn	%o5, LOFAULT_SET, %o5
2662473b13d4Sae112802	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
2663473b13d4Sae1128021:
26647c478bd9Sstevel@tonic-gate	ret
26657c478bd9Sstevel@tonic-gate	restore %g0, 0, %o0		! return (0)
26667c478bd9Sstevel@tonic-gate
26677c478bd9Sstevel@tonic-gate/*
26687c478bd9Sstevel@tonic-gate * Common code used to align transfers on word and doubleword
2669*280575beSPatrick McGehearty * boundaries.  Aligns source and destination and returns a count
26707c478bd9Sstevel@tonic-gate * of aligned bytes to transfer in %i3
26717c478bd9Sstevel@tonic-gate */
26727c478bd9Sstevel@tonic-gate1:
26737c478bd9Sstevel@tonic-gate	inc	%i0			! inc from
26747c478bd9Sstevel@tonic-gate	stb	%o4, [%i1]		! write a byte
26757c478bd9Sstevel@tonic-gate	inc	%i1			! inc to
26767c478bd9Sstevel@tonic-gate	dec	%i2			! dec count
26777c478bd9Sstevel@tonic-gate.alignit:
26787c478bd9Sstevel@tonic-gate	btst	%o0, %i0		! %o0 is bit mask to check for alignment
26797c478bd9Sstevel@tonic-gate	bnz,a	1b
26807c478bd9Sstevel@tonic-gate	ldub	[%i0], %o4		! read next byte
26817c478bd9Sstevel@tonic-gate
26827c478bd9Sstevel@tonic-gate	retl
26837c478bd9Sstevel@tonic-gate	andn	%i2, %o0, %i3		! return size of aligned bytes
2684*280575beSPatrick McGehearty
26857c478bd9Sstevel@tonic-gate	SET_SIZE(bcopy)
26867c478bd9Sstevel@tonic-gate
2687*280575beSPatrick McGehearty#endif	/* NIAGARA_IMPL */
2688*280575beSPatrick McGehearty
26897c478bd9Sstevel@tonic-gate#endif	/* lint */
26907c478bd9Sstevel@tonic-gate
26917c478bd9Sstevel@tonic-gate/*
26927c478bd9Sstevel@tonic-gate * Block copy with possibly overlapped operands.
26937c478bd9Sstevel@tonic-gate */
26947c478bd9Sstevel@tonic-gate
26957c478bd9Sstevel@tonic-gate#if defined(lint)
26967c478bd9Sstevel@tonic-gate
26977c478bd9Sstevel@tonic-gate/*ARGSUSED*/
26987c478bd9Sstevel@tonic-gatevoid
26997c478bd9Sstevel@tonic-gateovbcopy(const void *from, void *to, size_t count)
27007c478bd9Sstevel@tonic-gate{}
27017c478bd9Sstevel@tonic-gate
27027c478bd9Sstevel@tonic-gate#else	/* lint */
27037c478bd9Sstevel@tonic-gate
27047c478bd9Sstevel@tonic-gate	ENTRY(ovbcopy)
27057c478bd9Sstevel@tonic-gate	tst	%o2			! check count
27067c478bd9Sstevel@tonic-gate	bgu,a	%ncc, 1f		! nothing to do or bad arguments
27077c478bd9Sstevel@tonic-gate	subcc	%o0, %o1, %o3		! difference of from and to address
27087c478bd9Sstevel@tonic-gate
27097c478bd9Sstevel@tonic-gate	retl				! return
27107c478bd9Sstevel@tonic-gate	nop
27117c478bd9Sstevel@tonic-gate1:
27127c478bd9Sstevel@tonic-gate	bneg,a	%ncc, 2f
27137c478bd9Sstevel@tonic-gate	neg	%o3			! if < 0, make it positive
27147c478bd9Sstevel@tonic-gate2:	cmp	%o2, %o3		! cmp size and abs(from - to)
27157c478bd9Sstevel@tonic-gate	bleu	%ncc, bcopy		! if size <= abs(diff): use bcopy,
27167c478bd9Sstevel@tonic-gate	.empty				!   no overlap
27177c478bd9Sstevel@tonic-gate	cmp	%o0, %o1		! compare from and to addresses
27187c478bd9Sstevel@tonic-gate	blu	%ncc, .ov_bkwd		! if from < to, copy backwards
27197c478bd9Sstevel@tonic-gate	nop
27207c478bd9Sstevel@tonic-gate	!
27217c478bd9Sstevel@tonic-gate	! Copy forwards.
27227c478bd9Sstevel@tonic-gate	!
27237c478bd9Sstevel@tonic-gate.ov_fwd:
27247c478bd9Sstevel@tonic-gate	ldub	[%o0], %o3		! read from address
27257c478bd9Sstevel@tonic-gate	inc	%o0			! inc from address
27267c478bd9Sstevel@tonic-gate	stb	%o3, [%o1]		! write to address
27277c478bd9Sstevel@tonic-gate	deccc	%o2			! dec count
27287c478bd9Sstevel@tonic-gate	bgu	%ncc, .ov_fwd		! loop till done
27297c478bd9Sstevel@tonic-gate	inc	%o1			! inc to address
27307c478bd9Sstevel@tonic-gate
27317c478bd9Sstevel@tonic-gate	retl				! return
27327c478bd9Sstevel@tonic-gate	nop
27337c478bd9Sstevel@tonic-gate	!
27347c478bd9Sstevel@tonic-gate	! Copy backwards.
27357c478bd9Sstevel@tonic-gate	!
27367c478bd9Sstevel@tonic-gate.ov_bkwd:
27377c478bd9Sstevel@tonic-gate	deccc	%o2			! dec count
27387c478bd9Sstevel@tonic-gate	ldub	[%o0 + %o2], %o3	! get byte at end of src
27397c478bd9Sstevel@tonic-gate	bgu	%ncc, .ov_bkwd		! loop till done
27407c478bd9Sstevel@tonic-gate	stb	%o3, [%o1 + %o2]	! delay slot, store at end of dst
27417c478bd9Sstevel@tonic-gate
27427c478bd9Sstevel@tonic-gate	retl				! return
27437c478bd9Sstevel@tonic-gate	nop
27447c478bd9Sstevel@tonic-gate	SET_SIZE(ovbcopy)
27457c478bd9Sstevel@tonic-gate
27467c478bd9Sstevel@tonic-gate#endif	/* lint */
27477c478bd9Sstevel@tonic-gate
27487c478bd9Sstevel@tonic-gate/*
27497c478bd9Sstevel@tonic-gate * hwblkpagecopy()
27507c478bd9Sstevel@tonic-gate *
27517c478bd9Sstevel@tonic-gate * Copies exactly one page.  This routine assumes the caller (ppcopy)
27527c478bd9Sstevel@tonic-gate * has already disabled kernel preemption and has checked
27537c478bd9Sstevel@tonic-gate * use_hw_bcopy.
27547c478bd9Sstevel@tonic-gate */
27557c478bd9Sstevel@tonic-gate#ifdef lint
27567c478bd9Sstevel@tonic-gate/*ARGSUSED*/
27577c478bd9Sstevel@tonic-gatevoid
27587c478bd9Sstevel@tonic-gatehwblkpagecopy(const void *src, void *dst)
27597c478bd9Sstevel@tonic-gate{ }
27607c478bd9Sstevel@tonic-gate#else /* lint */
27617c478bd9Sstevel@tonic-gate	ENTRY(hwblkpagecopy)
2762340af271Swh94709	save	%sp, -SA(MINFRAME), %sp
27637c478bd9Sstevel@tonic-gate
27647c478bd9Sstevel@tonic-gate	! %i0 - source address (arg)
27657c478bd9Sstevel@tonic-gate	! %i1 - destination address (arg)
27667c478bd9Sstevel@tonic-gate	! %i2 - length of region (not arg)
27677c478bd9Sstevel@tonic-gate
27687c478bd9Sstevel@tonic-gate	set	PAGESIZE, %i2
27697c478bd9Sstevel@tonic-gate
27707c478bd9Sstevel@tonic-gate	/*
27717c478bd9Sstevel@tonic-gate	 * Copying exactly one page and PAGESIZE is in mutliple of 0x80.
27727c478bd9Sstevel@tonic-gate	 */
27737c478bd9Sstevel@tonic-gate	mov	ASI_BLK_INIT_ST_QUAD_LDD_P, %asi
27747c478bd9Sstevel@tonic-gate	prefetch [%i0+0x0], #one_read
27757c478bd9Sstevel@tonic-gate	prefetch [%i0+0x40], #one_read
27767c478bd9Sstevel@tonic-gate1:
27777c478bd9Sstevel@tonic-gate	prefetch [%i0+0x80], #one_read
27787c478bd9Sstevel@tonic-gate	prefetch [%i0+0xc0], #one_read
27797c478bd9Sstevel@tonic-gate	ldda	[%i0+0x0]%asi, %l0
27807c478bd9Sstevel@tonic-gate	ldda	[%i0+0x10]%asi, %l2
27817c478bd9Sstevel@tonic-gate	ldda	[%i0+0x20]%asi, %l4
27827c478bd9Sstevel@tonic-gate	ldda	[%i0+0x30]%asi, %l6
27837c478bd9Sstevel@tonic-gate	stxa	%l0, [%i1+0x0]%asi
27847c478bd9Sstevel@tonic-gate	stxa	%l1, [%i1+0x8]%asi
27857c478bd9Sstevel@tonic-gate	stxa	%l2, [%i1+0x10]%asi
27867c478bd9Sstevel@tonic-gate	stxa	%l3, [%i1+0x18]%asi
27877c478bd9Sstevel@tonic-gate	stxa	%l4, [%i1+0x20]%asi
27887c478bd9Sstevel@tonic-gate	stxa	%l5, [%i1+0x28]%asi
27897c478bd9Sstevel@tonic-gate	stxa	%l6, [%i1+0x30]%asi
27907c478bd9Sstevel@tonic-gate	stxa	%l7, [%i1+0x38]%asi
27917c478bd9Sstevel@tonic-gate	ldda	[%i0+0x40]%asi, %l0
27927c478bd9Sstevel@tonic-gate	ldda	[%i0+0x50]%asi, %l2
27937c478bd9Sstevel@tonic-gate	ldda	[%i0+0x60]%asi, %l4
27947c478bd9Sstevel@tonic-gate	ldda	[%i0+0x70]%asi, %l6
27957c478bd9Sstevel@tonic-gate	stxa	%l0, [%i1+0x40]%asi
27967c478bd9Sstevel@tonic-gate	stxa	%l1, [%i1+0x48]%asi
27977c478bd9Sstevel@tonic-gate	stxa	%l2, [%i1+0x50]%asi
27987c478bd9Sstevel@tonic-gate	stxa	%l3, [%i1+0x58]%asi
27997c478bd9Sstevel@tonic-gate	stxa	%l4, [%i1+0x60]%asi
28007c478bd9Sstevel@tonic-gate	stxa	%l5, [%i1+0x68]%asi
28017c478bd9Sstevel@tonic-gate	stxa	%l6, [%i1+0x70]%asi
28027c478bd9Sstevel@tonic-gate	stxa	%l7, [%i1+0x78]%asi
28037c478bd9Sstevel@tonic-gate
28047c478bd9Sstevel@tonic-gate	add	%i0, 0x80, %i0
28057c478bd9Sstevel@tonic-gate	subcc	%i2, 0x80, %i2
28067c478bd9Sstevel@tonic-gate	bgu,pt	%xcc, 1b
28077c478bd9Sstevel@tonic-gate	add	%i1, 0x80, %i1
28087c478bd9Sstevel@tonic-gate
28097c478bd9Sstevel@tonic-gate	membar #Sync
28107c478bd9Sstevel@tonic-gate	ret
28117c478bd9Sstevel@tonic-gate	restore	%g0, 0, %o0
28127c478bd9Sstevel@tonic-gate	SET_SIZE(hwblkpagecopy)
28137c478bd9Sstevel@tonic-gate#endif	/* lint */
28147c478bd9Sstevel@tonic-gate
28157c478bd9Sstevel@tonic-gate
28167c478bd9Sstevel@tonic-gate/*
28177c478bd9Sstevel@tonic-gate * Transfer data to and from user space -
28187c478bd9Sstevel@tonic-gate * Note that these routines can cause faults
28197c478bd9Sstevel@tonic-gate * It is assumed that the kernel has nothing at
28207c478bd9Sstevel@tonic-gate * less than KERNELBASE in the virtual address space.
28217c478bd9Sstevel@tonic-gate *
28227c478bd9Sstevel@tonic-gate * Note that copyin(9F) and copyout(9F) are part of the
28237c478bd9Sstevel@tonic-gate * DDI/DKI which specifies that they return '-1' on "errors."
28247c478bd9Sstevel@tonic-gate *
28257c478bd9Sstevel@tonic-gate * Sigh.
28267c478bd9Sstevel@tonic-gate *
28277c478bd9Sstevel@tonic-gate * So there's two extremely similar routines - xcopyin() and xcopyout()
28287c478bd9Sstevel@tonic-gate * which return the errno that we've faithfully computed.  This
28297c478bd9Sstevel@tonic-gate * allows other callers (e.g. uiomove(9F)) to work correctly.
28307c478bd9Sstevel@tonic-gate * Given that these are used pretty heavily, we expand the calling
28317c478bd9Sstevel@tonic-gate * sequences inline for all flavours (rather than making wrappers).
28327c478bd9Sstevel@tonic-gate *
28337c478bd9Sstevel@tonic-gate * There are also stub routines for xcopyout_little and xcopyin_little,
28347c478bd9Sstevel@tonic-gate * which currently are intended to handle requests of <= 16 bytes from
28357c478bd9Sstevel@tonic-gate * do_unaligned. Future enhancement to make them handle 8k pages efficiently
28367c478bd9Sstevel@tonic-gate * is left as an exercise...
28377c478bd9Sstevel@tonic-gate */
28387c478bd9Sstevel@tonic-gate
28397c478bd9Sstevel@tonic-gate/*
28407c478bd9Sstevel@tonic-gate * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr)
28417c478bd9Sstevel@tonic-gate *
28427c478bd9Sstevel@tonic-gate * General theory of operation:
28437c478bd9Sstevel@tonic-gate *
28447c478bd9Sstevel@tonic-gate * None of the copyops routines grab a window until it's decided that
28457c478bd9Sstevel@tonic-gate * we need to do a HW block copy operation. This saves a window
28467c478bd9Sstevel@tonic-gate * spill/fill when we're called during socket ops. The typical IO
28477c478bd9Sstevel@tonic-gate * path won't cause spill/fill traps.
28487c478bd9Sstevel@tonic-gate *
28497c478bd9Sstevel@tonic-gate * This code uses a set of 4 limits for the maximum size that will
28507c478bd9Sstevel@tonic-gate * be copied given a particular input/output address alignment.
28517c478bd9Sstevel@tonic-gate * the default limits are:
28527c478bd9Sstevel@tonic-gate *
28537c478bd9Sstevel@tonic-gate * single byte aligned - 256 (hw_copy_limit_1)
28547c478bd9Sstevel@tonic-gate * two byte aligned - 512 (hw_copy_limit_2)
28557c478bd9Sstevel@tonic-gate * four byte aligned - 1024 (hw_copy_limit_4)
28567c478bd9Sstevel@tonic-gate * eight byte aligned - 1024 (hw_copy_limit_8)
28577c478bd9Sstevel@tonic-gate *
28587c478bd9Sstevel@tonic-gate * If the value for a particular limit is zero, the copy will be done
28597c478bd9Sstevel@tonic-gate * via the copy loops rather than block store/quad load instructions.
28607c478bd9Sstevel@tonic-gate *
28617c478bd9Sstevel@tonic-gate * Flow:
28627c478bd9Sstevel@tonic-gate *
28637c478bd9Sstevel@tonic-gate * If count == zero return zero.
28647c478bd9Sstevel@tonic-gate *
28657c478bd9Sstevel@tonic-gate * Store the previous lo_fault handler into %g6.
28667c478bd9Sstevel@tonic-gate * Place our secondary lofault handler into %g5.
28677c478bd9Sstevel@tonic-gate * Place the address of our nowindow fault handler into %o3.
28687c478bd9Sstevel@tonic-gate * Place the address of the windowed fault handler into %o4.
28697c478bd9Sstevel@tonic-gate * --> We'll use this handler if we end up grabbing a window
28707c478bd9Sstevel@tonic-gate * --> before we use block initializing store and quad load ASIs
28717c478bd9Sstevel@tonic-gate *
28727c478bd9Sstevel@tonic-gate * If count is less than or equal to SMALL_LIMIT (7) we
28737c478bd9Sstevel@tonic-gate * always do a byte for byte copy.
28747c478bd9Sstevel@tonic-gate *
28757c478bd9Sstevel@tonic-gate * If count is > SMALL_LIMIT, we check the alignment of the input
28767c478bd9Sstevel@tonic-gate * and output pointers. Based on the alignment we check count
28777c478bd9Sstevel@tonic-gate * against a limit based on detected alignment.  If we exceed the
28787c478bd9Sstevel@tonic-gate * alignment value we copy via block initializing store and quad
28797c478bd9Sstevel@tonic-gate * load instructions.
28807c478bd9Sstevel@tonic-gate *
28817c478bd9Sstevel@tonic-gate * If we don't exceed one of the limits, we store -count in %o3,
28827c478bd9Sstevel@tonic-gate * we store the number of chunks (8, 4, 2 or 1 byte) operated
28837c478bd9Sstevel@tonic-gate * on in our basic copy loop in %o2. Following this we branch
28847c478bd9Sstevel@tonic-gate * to the appropriate copy loop and copy that many chunks.
28857c478bd9Sstevel@tonic-gate * Since we've been adding the chunk size to %o3 each time through
28867c478bd9Sstevel@tonic-gate * as well as decrementing %o2, we can tell if any data is
28877c478bd9Sstevel@tonic-gate * is left to be copied by examining %o3. If that is zero, we're
28887c478bd9Sstevel@tonic-gate * done and can go home. If not, we figure out what the largest
28897c478bd9Sstevel@tonic-gate * chunk size left to be copied is and branch to that copy loop
28907c478bd9Sstevel@tonic-gate * unless there's only one byte left. We load that as we're
28917c478bd9Sstevel@tonic-gate * branching to code that stores it just before we return.
28927c478bd9Sstevel@tonic-gate *
28937c478bd9Sstevel@tonic-gate * Fault handlers are invoked if we reference memory that has no
28947c478bd9Sstevel@tonic-gate * current mapping.  All forms share the same copyio_fault handler.
28957c478bd9Sstevel@tonic-gate * This routine handles fixing up the stack and general housecleaning.
28967c478bd9Sstevel@tonic-gate * Each copy operation has a simple fault handler that is then called
28977c478bd9Sstevel@tonic-gate * to do the work specific to the invidual operation.  The handler
28987c478bd9Sstevel@tonic-gate * for copyOP and xcopyOP are found at the end of individual function.
28997c478bd9Sstevel@tonic-gate * The handlers for xcopyOP_little are found at the end of xcopyin_little.
29007c478bd9Sstevel@tonic-gate * The handlers for copyOP_noerr are found at the end of copyin_noerr.
29017c478bd9Sstevel@tonic-gate */
29027c478bd9Sstevel@tonic-gate
29037c478bd9Sstevel@tonic-gate/*
29047c478bd9Sstevel@tonic-gate * Copy kernel data to user space (copyout/xcopyout/xcopyout_little).
29057c478bd9Sstevel@tonic-gate */
29067c478bd9Sstevel@tonic-gate
29077c478bd9Sstevel@tonic-gate#if defined(lint)
29087c478bd9Sstevel@tonic-gate
29097c478bd9Sstevel@tonic-gate/*ARGSUSED*/
29107c478bd9Sstevel@tonic-gateint
29117c478bd9Sstevel@tonic-gatecopyout(const void *kaddr, void *uaddr, size_t count)
29127c478bd9Sstevel@tonic-gate{ return (0); }
29137c478bd9Sstevel@tonic-gate
29147c478bd9Sstevel@tonic-gate#else	/* lint */
29157c478bd9Sstevel@tonic-gate
29167c478bd9Sstevel@tonic-gate/*
29177c478bd9Sstevel@tonic-gate * We save the arguments in the following registers in case of a fault:
29187c478bd9Sstevel@tonic-gate * 	kaddr - %g2
29197c478bd9Sstevel@tonic-gate * 	uaddr - %g3
29207c478bd9Sstevel@tonic-gate * 	count - %g4
29217c478bd9Sstevel@tonic-gate */
29227c478bd9Sstevel@tonic-gate#define	SAVE_SRC	%g2
29237c478bd9Sstevel@tonic-gate#define	SAVE_DST	%g3
29247c478bd9Sstevel@tonic-gate#define	SAVE_COUNT	%g4
29257c478bd9Sstevel@tonic-gate
29267c478bd9Sstevel@tonic-gate#define	REAL_LOFAULT		%g5
29277c478bd9Sstevel@tonic-gate#define	SAVED_LOFAULT		%g6
29287c478bd9Sstevel@tonic-gate
29297c478bd9Sstevel@tonic-gate/*
29307c478bd9Sstevel@tonic-gate * Generic copyio fault handler.  This is the first line of defense when a
29317c478bd9Sstevel@tonic-gate * fault occurs in (x)copyin/(x)copyout.  In order for this to function
29327c478bd9Sstevel@tonic-gate * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT.
29337c478bd9Sstevel@tonic-gate * This allows us to share common code for all the flavors of the copy
29347c478bd9Sstevel@tonic-gate * operations, including the _noerr versions.
29357c478bd9Sstevel@tonic-gate *
29367c478bd9Sstevel@tonic-gate * Note that this function will restore the original input parameters before
29377c478bd9Sstevel@tonic-gate * calling REAL_LOFAULT.  So the real handler can vector to the appropriate
29387c478bd9Sstevel@tonic-gate * member of the t_copyop structure, if needed.
29397c478bd9Sstevel@tonic-gate */
29407c478bd9Sstevel@tonic-gate	ENTRY(copyio_fault)
2941340af271Swh94709#if !defined(NIAGARA_IMPL)
2942340af271Swh94709	btst	FPUSED_FLAG, SAVED_LOFAULT
2943340af271Swh94709	bz	1f
2944340af271Swh94709	andn	SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
2945340af271Swh94709
2946*280575beSPatrick McGehearty	wr	%l5, 0, %gsr		! restore gsr
2947340af271Swh94709
2948*280575beSPatrick McGehearty	btst	FPRS_FEF, %g1
2949340af271Swh94709	bz	%icc, 4f
2950340af271Swh94709	nop
2951340af271Swh94709
2952340af271Swh94709	! restore fpregs from stack
2953340af271Swh94709	BLD_FP_FROMSTACK(%o2)
2954340af271Swh94709
2955340af271Swh94709	ba,pt	%ncc, 1f
2956*280575beSPatrick McGehearty	nop
2957340af271Swh947094:
2958340af271Swh94709	FZERO				! zero all of the fpregs
2959*280575beSPatrick McGehearty	wr	%g1, %g0, %fprs		! restore fprs
2960340af271Swh947091:
29617c478bd9Sstevel@tonic-gate	restore
29627c478bd9Sstevel@tonic-gate	mov	SAVE_SRC, %o0
29637c478bd9Sstevel@tonic-gate	mov	SAVE_DST, %o1
29647c478bd9Sstevel@tonic-gate	jmp	REAL_LOFAULT
29657c478bd9Sstevel@tonic-gate	mov	SAVE_COUNT, %o2
2966*280575beSPatrick McGehearty
2967*280575beSPatrick McGehearty#else	/* NIAGARA_IMPL */
2968*280575beSPatrick McGehearty	membar	#Sync
2969*280575beSPatrick McGehearty	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
2970*280575beSPatrick McGehearty	restore
2971*280575beSPatrick McGehearty	mov	SAVE_SRC, %o0
2972*280575beSPatrick McGehearty	mov	SAVE_DST, %o1
2973*280575beSPatrick McGehearty	jmp	REAL_LOFAULT
2974*280575beSPatrick McGehearty	mov	SAVE_COUNT, %o2
2975*280575beSPatrick McGehearty
2976*280575beSPatrick McGehearty#endif	/* NIAGARA_IMPL */
2977*280575beSPatrick McGehearty
29787c478bd9Sstevel@tonic-gate	SET_SIZE(copyio_fault)
29797c478bd9Sstevel@tonic-gate
29807c478bd9Sstevel@tonic-gate	ENTRY(copyio_fault_nowindow)
29817c478bd9Sstevel@tonic-gate	membar	#Sync
29827c478bd9Sstevel@tonic-gate	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
29837c478bd9Sstevel@tonic-gate
29847c478bd9Sstevel@tonic-gate	mov	SAVE_SRC, %o0
29857c478bd9Sstevel@tonic-gate	mov	SAVE_DST, %o1
29867c478bd9Sstevel@tonic-gate	jmp	REAL_LOFAULT
29877c478bd9Sstevel@tonic-gate	mov	SAVE_COUNT, %o2
29887c478bd9Sstevel@tonic-gate	SET_SIZE(copyio_fault_nowindow)
29897c478bd9Sstevel@tonic-gate
29907c478bd9Sstevel@tonic-gate	ENTRY(copyout)
29917c478bd9Sstevel@tonic-gate	sethi	%hi(.copyout_err), REAL_LOFAULT
29927c478bd9Sstevel@tonic-gate	or	REAL_LOFAULT, %lo(.copyout_err), REAL_LOFAULT
29937c478bd9Sstevel@tonic-gate
2994*280575beSPatrick McGehearty#if !defined(NIAGARA_IMPL)
2995*280575beSPatrick McGehearty.do_copyout:
2996*280575beSPatrick McGehearty	tst	%o2			! check for zero count;  quick exit
2997*280575beSPatrick McGehearty	bz,pt	%ncc, .co_smallqx
2998*280575beSPatrick McGehearty	mov	%o0, SAVE_SRC
2999*280575beSPatrick McGehearty	mov	%o1, SAVE_DST
3000*280575beSPatrick McGehearty	mov	%o2, SAVE_COUNT
3001*280575beSPatrick McGehearty	cmp	%o2, FP_COPY		! check for small copy/leaf case
3002*280575beSPatrick McGehearty	bgt,pt	%ncc, .co_copy_more
3003*280575beSPatrick McGehearty	ldn	[THREAD_REG + T_LOFAULT], SAVED_LOFAULT
3004*280575beSPatrick McGehearty/*
3005*280575beSPatrick McGehearty * Small copy out code
3006*280575beSPatrick McGehearty *
3007*280575beSPatrick McGehearty */
3008*280575beSPatrick McGehearty	sethi	%hi(copyio_fault_nowindow), %o3
3009*280575beSPatrick McGehearty	or	%o3, %lo(copyio_fault_nowindow), %o3
3010*280575beSPatrick McGehearty	membar	#Sync
3011*280575beSPatrick McGehearty	stn	%o3, [THREAD_REG + T_LOFAULT]
3012*280575beSPatrick McGehearty
3013*280575beSPatrick McGehearty	mov	ASI_USER, %asi
3014*280575beSPatrick McGehearty	cmp	%o2, SHORTCOPY		! make sure there is enough to align
3015*280575beSPatrick McGehearty	ble,pt	%ncc, .co_smallest
3016*280575beSPatrick McGehearty	andcc	%o1, 0x7, %o3		! is dest long word aligned
3017*280575beSPatrick McGehearty	bnz,pn	%ncc, .co_align
3018*280575beSPatrick McGehearty	andcc	%o1, 1, %o3		! is dest byte aligned
3019*280575beSPatrick McGehearty
3020*280575beSPatrick McGehearty! Destination is long word aligned
3021*280575beSPatrick McGehearty! 8 cases for src alignment; load parts, store long words
3022*280575beSPatrick McGehearty.co_al_src:
3023*280575beSPatrick McGehearty	andcc	%o0, 7, %o3
3024*280575beSPatrick McGehearty	brnz,pt	%o3, .co_src_dst_unal8
3025*280575beSPatrick McGehearty	nop
3026*280575beSPatrick McGehearty/*
3027*280575beSPatrick McGehearty * Special case for handling when src and dest are both long word aligned
3028*280575beSPatrick McGehearty * and total data to move is less than FP_COPY bytes
3029*280575beSPatrick McGehearty * Also handles finish up for large block moves, so may be less than 32 bytes
3030*280575beSPatrick McGehearty */
3031*280575beSPatrick McGehearty.co_medlong:
3032*280575beSPatrick McGehearty	subcc	%o2, 31, %o2		! adjust length to allow cc test
3033*280575beSPatrick McGehearty	ble,pt	%ncc, .co_medl31
3034*280575beSPatrick McGehearty	nop
3035*280575beSPatrick McGehearty.co_medl32:
3036*280575beSPatrick McGehearty	ldx	[%o0], %o4		! move 32 bytes
3037*280575beSPatrick McGehearty	subcc	%o2, 32, %o2		! decrement length count by 32
3038*280575beSPatrick McGehearty	stxa	%o4, [%o1]%asi
3039*280575beSPatrick McGehearty	ldx	[%o0+8], %o4
3040*280575beSPatrick McGehearty	stxa	%o4, [%o1+8]%asi
3041*280575beSPatrick McGehearty	ldx	[%o0+16], %o4
3042*280575beSPatrick McGehearty	add	%o0, 32, %o0		! increase src ptr by 32
3043*280575beSPatrick McGehearty	stxa	%o4, [%o1+16]%asi
3044*280575beSPatrick McGehearty	ldx	[%o0-8], %o4
3045*280575beSPatrick McGehearty	add	%o1, 32, %o1		! increase dst ptr by 32
3046*280575beSPatrick McGehearty	bgu,pt	%ncc, .co_medl32	! repeat if at least 32 bytes left
3047*280575beSPatrick McGehearty	stxa	%o4, [%o1-8]%asi
3048*280575beSPatrick McGehearty.co_medl31:
3049*280575beSPatrick McGehearty	addcc	%o2, 24, %o2		! adjust count to be off by 7
3050*280575beSPatrick McGehearty	ble,pt	%ncc, .co_medl7		! skip if 7 or fewer bytes left
3051*280575beSPatrick McGehearty	nop
3052*280575beSPatrick McGehearty.co_medl8:
3053*280575beSPatrick McGehearty	ldx	[%o0], %o4		! move 8 bytes
3054*280575beSPatrick McGehearty	add	%o0, 8, %o0		! increase src ptr by 8
3055*280575beSPatrick McGehearty	subcc	%o2, 8, %o2		! decrease count by 8
3056*280575beSPatrick McGehearty	add	%o1, 8, %o1		! increase dst ptr by 8
3057*280575beSPatrick McGehearty	bgu,pt	%ncc, .co_medl8
3058*280575beSPatrick McGehearty	stxa	%o4, [%o1-8]%asi
3059*280575beSPatrick McGehearty.co_medl7:
3060*280575beSPatrick McGehearty	addcc	%o2, 7, %o2		! finish adjustment of remaining count
3061*280575beSPatrick McGehearty	bnz,pt	%ncc, .co_small4	! do final bytes if not finished
3062*280575beSPatrick McGehearty
3063*280575beSPatrick McGehearty.co_smallx:				! finish up and exit
3064*280575beSPatrick McGehearty	membar	#Sync
3065*280575beSPatrick McGehearty	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
3066*280575beSPatrick McGehearty.co_smallqx:
3067*280575beSPatrick McGehearty	retl
3068*280575beSPatrick McGehearty	mov	%g0, %o0
3069*280575beSPatrick McGehearty
3070*280575beSPatrick McGehearty.co_small4:
3071*280575beSPatrick McGehearty	cmp	%o2, 4
3072*280575beSPatrick McGehearty	blt,pt	%ncc, .co_small3x	! skip if less than 4 bytes left
3073*280575beSPatrick McGehearty	nop				!
3074*280575beSPatrick McGehearty	ld	[%o0], %o4		! move 4 bytes
3075*280575beSPatrick McGehearty	add	%o0, 4, %o0		! increase src ptr by 4
3076*280575beSPatrick McGehearty	add	%o1, 4, %o1		! increase dst ptr by 4
3077*280575beSPatrick McGehearty	subcc	%o2, 4, %o2		! decrease count by 4
3078*280575beSPatrick McGehearty	bz,pt	%ncc, .co_smallx
3079*280575beSPatrick McGehearty	stwa	%o4, [%o1-4]%asi
3080*280575beSPatrick McGehearty
3081*280575beSPatrick McGehearty.co_small3x:				! Exactly 1, 2, or 3 bytes remain
3082*280575beSPatrick McGehearty	subcc	%o2, 1, %o2		! reduce count for cc test
3083*280575beSPatrick McGehearty	ldub	[%o0], %o4		! load one byte
3084*280575beSPatrick McGehearty	bz,pt	%ncc, .co_smallx
3085*280575beSPatrick McGehearty	stba	%o4, [%o1]%asi		! store one byte
3086*280575beSPatrick McGehearty	ldub	[%o0+1], %o4		! load second byte
3087*280575beSPatrick McGehearty	subcc	%o2, 1, %o2
3088*280575beSPatrick McGehearty	bz,pt	%ncc, .co_smallx
3089*280575beSPatrick McGehearty	stba	%o4, [%o1+1]%asi	! store second byte
3090*280575beSPatrick McGehearty	ldub	[%o0+2], %o4		! load third byte
3091*280575beSPatrick McGehearty	ba	.co_smallx
3092*280575beSPatrick McGehearty	stba	%o4, [%o1+2]%asi	! store third byte
3093*280575beSPatrick McGehearty
3094*280575beSPatrick McGehearty.co_smallest:				! 7 or fewer bytes remain
3095*280575beSPatrick McGehearty	cmp	%o2, 4
3096*280575beSPatrick McGehearty	blt,pt	%ncc, .co_small3x
3097*280575beSPatrick McGehearty	nop
3098*280575beSPatrick McGehearty	ldub	[%o0], %o4		! read byte
3099*280575beSPatrick McGehearty	subcc	%o2, 4, %o2		! reduce count by 4
3100*280575beSPatrick McGehearty	stba	%o4, [%o1]%asi		! write byte
3101*280575beSPatrick McGehearty	ldub	[%o0+1], %o4		! repeat for total of 4 bytes
3102*280575beSPatrick McGehearty	add	%o0, 4, %o0		! advance src by 4
3103*280575beSPatrick McGehearty	stba	%o4, [%o1+1]%asi
3104*280575beSPatrick McGehearty	ldub	[%o0-2], %o4
3105*280575beSPatrick McGehearty	add	%o1, 4, %o1		! advance dst by 4
3106*280575beSPatrick McGehearty	stba	%o4, [%o1-2]%asi
3107*280575beSPatrick McGehearty	ldub	[%o0-1], %o4
3108*280575beSPatrick McGehearty	bnz,pt	%ncc, .co_small3x
3109*280575beSPatrick McGehearty	stba	%o4, [%o1-1]%asi
3110*280575beSPatrick McGehearty	membar	#Sync
3111*280575beSPatrick McGehearty	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
3112*280575beSPatrick McGehearty	retl
3113*280575beSPatrick McGehearty	mov	%g0, %o0
3114*280575beSPatrick McGehearty
3115*280575beSPatrick McGehearty.co_align:				! byte align test in prior branch delay
3116*280575beSPatrick McGehearty	bnz,pt	%ncc, .co_al_d1
3117*280575beSPatrick McGehearty.co_al_d1f:				! dest is now half word aligned
3118*280575beSPatrick McGehearty	andcc	%o1, 2, %o3
3119*280575beSPatrick McGehearty	bnz,pt	%ncc, .co_al_d2
3120*280575beSPatrick McGehearty.co_al_d2f:				! dest is now word aligned
3121*280575beSPatrick McGehearty	andcc	%o1, 4, %o3		! is dest longword aligned?
3122*280575beSPatrick McGehearty	bz,pt	%ncc, .co_al_src
3123*280575beSPatrick McGehearty	nop
3124*280575beSPatrick McGehearty.co_al_d4:				! dest is word aligned;  src is unknown
3125*280575beSPatrick McGehearty	ldub	[%o0], %o4		! move a word (src align unknown)
3126*280575beSPatrick McGehearty	ldub	[%o0+1], %o3
3127*280575beSPatrick McGehearty	sll	%o4, 24, %o4		! position
3128*280575beSPatrick McGehearty	sll	%o3, 16, %o3		! position
3129*280575beSPatrick McGehearty	or	%o4, %o3, %o3		! merge
3130*280575beSPatrick McGehearty	ldub	[%o0+2], %o4
3131*280575beSPatrick McGehearty	sll	%o4, 8, %o4		! position
3132*280575beSPatrick McGehearty	or	%o4, %o3, %o3		! merge
3133*280575beSPatrick McGehearty	ldub	[%o0+3], %o4
3134*280575beSPatrick McGehearty	or	%o4, %o3, %o4		! merge
3135*280575beSPatrick McGehearty	stwa	%o4,[%o1]%asi		! store four bytes
3136*280575beSPatrick McGehearty	add	%o0, 4, %o0		! adjust src by 4
3137*280575beSPatrick McGehearty	add	%o1, 4, %o1		! adjust dest by 4
3138*280575beSPatrick McGehearty	sub	%o2, 4, %o2		! adjust count by 4
3139*280575beSPatrick McGehearty	andcc	%o0, 7, %o3		! check for src long word alignment
3140*280575beSPatrick McGehearty	brz,pt	%o3, .co_medlong
3141*280575beSPatrick McGehearty.co_src_dst_unal8:
3142*280575beSPatrick McGehearty	! dst is 8-byte aligned, src is not
3143*280575beSPatrick McGehearty	! Size is less than FP_COPY
3144*280575beSPatrick McGehearty	! Following code is to select for alignment
3145*280575beSPatrick McGehearty	andcc	%o0, 0x3, %o3		! test word alignment
3146*280575beSPatrick McGehearty	bz,pt	%ncc, .co_medword
3147*280575beSPatrick McGehearty	nop
3148*280575beSPatrick McGehearty	andcc	%o0, 0x1, %o3		! test halfword alignment
3149*280575beSPatrick McGehearty	bnz,pt	%ncc, .co_med_byte	! go to byte move if not halfword
3150*280575beSPatrick McGehearty	andcc	%o0, 0x2, %o3		! test which byte alignment
3151*280575beSPatrick McGehearty	ba	.co_medhalf
3152*280575beSPatrick McGehearty	nop
3153*280575beSPatrick McGehearty.co_al_d1:				! align dest to half word
3154*280575beSPatrick McGehearty	ldub	[%o0], %o4		! move a byte
3155*280575beSPatrick McGehearty	add	%o0, 1, %o0
3156*280575beSPatrick McGehearty	stba	%o4, [%o1]%asi
3157*280575beSPatrick McGehearty	add	%o1, 1, %o1
3158*280575beSPatrick McGehearty	andcc	%o1, 2, %o3
3159*280575beSPatrick McGehearty	bz,pt	%ncc, .co_al_d2f
3160*280575beSPatrick McGehearty	sub	%o2, 1, %o2
3161*280575beSPatrick McGehearty.co_al_d2:				! align dest to word
3162*280575beSPatrick McGehearty	ldub	[%o0], %o4		! move a half-word (src align unknown)
3163*280575beSPatrick McGehearty	ldub	[%o0+1], %o3
3164*280575beSPatrick McGehearty	sll	%o4, 8, %o4		! position
3165*280575beSPatrick McGehearty	or	%o4, %o3, %o4		! merge
3166*280575beSPatrick McGehearty	stha	%o4, [%o1]%asi
3167*280575beSPatrick McGehearty	add	%o0, 2, %o0
3168*280575beSPatrick McGehearty	add	%o1, 2, %o1
3169*280575beSPatrick McGehearty	andcc	%o1, 4, %o3		! is dest longword aligned?
3170*280575beSPatrick McGehearty	bz,pt	%ncc, .co_al_src
3171*280575beSPatrick McGehearty	sub	%o2, 2, %o2
3172*280575beSPatrick McGehearty	ba	.co_al_d4
3173*280575beSPatrick McGehearty	nop
3174*280575beSPatrick McGehearty/*
3175*280575beSPatrick McGehearty * Handle all cases where src and dest are aligned on word
3176*280575beSPatrick McGehearty * boundaries. Use unrolled loops for better performance.
3177*280575beSPatrick McGehearty * This option wins over standard large data move when
3178*280575beSPatrick McGehearty * source and destination is in cache for medium
3179*280575beSPatrick McGehearty * to short data moves.
3180*280575beSPatrick McGehearty */
3181*280575beSPatrick McGehearty.co_medword:
3182*280575beSPatrick McGehearty	subcc	%o2, 31, %o2		! adjust length to allow cc test
3183*280575beSPatrick McGehearty	ble,pt	%ncc, .co_medw31
3184*280575beSPatrick McGehearty	nop
3185*280575beSPatrick McGehearty.co_medw32:
3186*280575beSPatrick McGehearty	ld	[%o0], %o4		! move a block of 32 bytes
3187*280575beSPatrick McGehearty	stwa	%o4, [%o1]%asi
3188*280575beSPatrick McGehearty	ld	[%o0+4], %o4
3189*280575beSPatrick McGehearty	stwa	%o4, [%o1+4]%asi
3190*280575beSPatrick McGehearty	ld	[%o0+8], %o4
3191*280575beSPatrick McGehearty	stwa	%o4, [%o1+8]%asi
3192*280575beSPatrick McGehearty	ld	[%o0+12], %o4
3193*280575beSPatrick McGehearty	stwa	%o4, [%o1+12]%asi
3194*280575beSPatrick McGehearty	ld	[%o0+16], %o4
3195*280575beSPatrick McGehearty	stwa	%o4, [%o1+16]%asi
3196*280575beSPatrick McGehearty	ld	[%o0+20], %o4
3197*280575beSPatrick McGehearty	subcc	%o2, 32, %o2		! decrement length count
3198*280575beSPatrick McGehearty	stwa	%o4, [%o1+20]%asi
3199*280575beSPatrick McGehearty	ld	[%o0+24], %o4
3200*280575beSPatrick McGehearty	add	%o0, 32, %o0		! increase src ptr by 32
3201*280575beSPatrick McGehearty	stwa	%o4, [%o1+24]%asi
3202*280575beSPatrick McGehearty	ld	[%o0-4], %o4
3203*280575beSPatrick McGehearty	add	%o1, 32, %o1		! increase dst ptr by 32
3204*280575beSPatrick McGehearty	bgu,pt	%ncc, .co_medw32	! repeat if at least 32 bytes left
3205*280575beSPatrick McGehearty	stwa	%o4, [%o1-4]%asi
3206*280575beSPatrick McGehearty.co_medw31:
3207*280575beSPatrick McGehearty	addcc	%o2, 24, %o2		! adjust count to be off by 7
3208*280575beSPatrick McGehearty	ble,pt	%ncc, .co_medw7		! skip if 7 or fewer bytes left
3209*280575beSPatrick McGehearty	nop				!
3210*280575beSPatrick McGehearty.co_medw15:
3211*280575beSPatrick McGehearty	ld	[%o0], %o4		! move a block of 8 bytes
3212*280575beSPatrick McGehearty	subcc	%o2, 8, %o2		! decrement length count
3213*280575beSPatrick McGehearty	stwa	%o4, [%o1]%asi
3214*280575beSPatrick McGehearty	add	%o0, 8, %o0		! increase src ptr by 8
3215*280575beSPatrick McGehearty	ld	[%o0-4], %o4
3216*280575beSPatrick McGehearty	add	%o1, 8, %o1		! increase dst ptr by 8
3217*280575beSPatrick McGehearty	bgu,pt	%ncc, .co_medw15
3218*280575beSPatrick McGehearty	stwa	%o4, [%o1-4]%asi
3219*280575beSPatrick McGehearty.co_medw7:
3220*280575beSPatrick McGehearty	addcc	%o2, 7, %o2		! finish adjustment of remaining count
3221*280575beSPatrick McGehearty	bz,pt	%ncc, .co_smallx	! exit if finished
3222*280575beSPatrick McGehearty	cmp	%o2, 4
3223*280575beSPatrick McGehearty	blt,pt	%ncc, .co_small3x	! skip if less than 4 bytes left
3224*280575beSPatrick McGehearty	nop				!
3225*280575beSPatrick McGehearty	ld	[%o0], %o4		! move 4 bytes
3226*280575beSPatrick McGehearty	add	%o0, 4, %o0		! increase src ptr by 4
3227*280575beSPatrick McGehearty	add	%o1, 4, %o1		! increase dst ptr by 4
3228*280575beSPatrick McGehearty	subcc	%o2, 4, %o2		! decrease count by 4
3229*280575beSPatrick McGehearty	bnz	.co_small3x
3230*280575beSPatrick McGehearty	stwa	%o4, [%o1-4]%asi
3231*280575beSPatrick McGehearty	membar	#Sync
3232*280575beSPatrick McGehearty	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
3233*280575beSPatrick McGehearty	retl
3234*280575beSPatrick McGehearty	mov	%g0, %o0
3235*280575beSPatrick McGehearty
3236*280575beSPatrick McGehearty.co_medhalf:
3237*280575beSPatrick McGehearty	subcc	%o2, 31, %o2		! adjust length to allow cc test
3238*280575beSPatrick McGehearty	ble,pt	%ncc, .co_medh31
3239*280575beSPatrick McGehearty	nop
3240*280575beSPatrick McGehearty.co_medh32:				! load and store block of 32 bytes
3241*280575beSPatrick McGehearty
3242*280575beSPatrick McGehearty	lduh	[%o0], %o4		! move 32 bytes
3243*280575beSPatrick McGehearty	subcc	%o2, 32, %o2		! decrement length count
3244*280575beSPatrick McGehearty	lduw	[%o0+2], %o3
3245*280575beSPatrick McGehearty	sllx	%o4, 48, %o4
3246*280575beSPatrick McGehearty	sllx	%o3, 16, %o3
3247*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3248*280575beSPatrick McGehearty	lduh	[%o0+6], %o4
3249*280575beSPatrick McGehearty	or	%o4, %o3, %o4
3250*280575beSPatrick McGehearty	stxa	%o4, [%o1]%asi
3251*280575beSPatrick McGehearty
3252*280575beSPatrick McGehearty	lduh	[%o0+8], %o4
3253*280575beSPatrick McGehearty	lduw	[%o0+10], %o3
3254*280575beSPatrick McGehearty	sllx	%o4, 48, %o4
3255*280575beSPatrick McGehearty	sllx	%o3, 16, %o3
3256*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3257*280575beSPatrick McGehearty	lduh	[%o0+14], %o4
3258*280575beSPatrick McGehearty	or	%o4, %o3, %o4
3259*280575beSPatrick McGehearty	stxa	%o4, [%o1+8]%asi
3260*280575beSPatrick McGehearty
3261*280575beSPatrick McGehearty	lduh	[%o0+16], %o4
3262*280575beSPatrick McGehearty	lduw	[%o0+18], %o3
3263*280575beSPatrick McGehearty	sllx	%o4, 48, %o4
3264*280575beSPatrick McGehearty	sllx	%o3, 16, %o3
3265*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3266*280575beSPatrick McGehearty	lduh	[%o0+22], %o4
3267*280575beSPatrick McGehearty	or	%o4, %o3, %o4
3268*280575beSPatrick McGehearty	stxa	%o4, [%o1+16]%asi
3269*280575beSPatrick McGehearty
3270*280575beSPatrick McGehearty	add	%o0, 32, %o0		! increase src ptr by 32
3271*280575beSPatrick McGehearty	add	%o1, 32, %o1		! increase dst ptr by 32
3272*280575beSPatrick McGehearty
3273*280575beSPatrick McGehearty	lduh	[%o0-8], %o4
3274*280575beSPatrick McGehearty	lduw	[%o0-6], %o3
3275*280575beSPatrick McGehearty	sllx	%o4, 48, %o4
3276*280575beSPatrick McGehearty	sllx	%o3, 16, %o3
3277*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3278*280575beSPatrick McGehearty	lduh	[%o0-2], %o4
3279*280575beSPatrick McGehearty	or	%o3, %o4, %o4
3280*280575beSPatrick McGehearty	bgu,pt	%ncc, .co_medh32	! repeat if at least 32 bytes left
3281*280575beSPatrick McGehearty	stxa	%o4, [%o1-8]%asi
3282*280575beSPatrick McGehearty
3283*280575beSPatrick McGehearty.co_medh31:
3284*280575beSPatrick McGehearty	addcc	%o2, 24, %o2		! adjust count to be off by 7
3285*280575beSPatrick McGehearty	ble,pt	%ncc, .co_medh7		! skip if 7 or fewer bytes left
3286*280575beSPatrick McGehearty	nop				!
3287*280575beSPatrick McGehearty.co_medh15:
3288*280575beSPatrick McGehearty	lduh	[%o0], %o4		! move 16 bytes
3289*280575beSPatrick McGehearty	subcc	%o2, 8, %o2		! decrement length count
3290*280575beSPatrick McGehearty	lduw	[%o0+2], %o3
3291*280575beSPatrick McGehearty	sllx	%o4, 48, %o4
3292*280575beSPatrick McGehearty	sllx	%o3, 16, %o3
3293*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3294*280575beSPatrick McGehearty	add	%o1, 8, %o1		! increase dst ptr by 8
3295*280575beSPatrick McGehearty	lduh	[%o0+6], %o4
3296*280575beSPatrick McGehearty	add	%o0, 8, %o0		! increase src ptr by 8
3297*280575beSPatrick McGehearty	or	%o4, %o3, %o4
3298*280575beSPatrick McGehearty	bgu,pt	%ncc, .co_medh15
3299*280575beSPatrick McGehearty	stxa	%o4, [%o1-8]%asi
3300*280575beSPatrick McGehearty.co_medh7:
3301*280575beSPatrick McGehearty	addcc	%o2, 7, %o2		! finish adjustment of remaining count
3302*280575beSPatrick McGehearty	bz,pt	%ncc, .co_smallx	! exit if finished
3303*280575beSPatrick McGehearty	cmp	%o2, 4
3304*280575beSPatrick McGehearty	blt,pt	%ncc, .co_small3x	! skip if less than 4 bytes left
3305*280575beSPatrick McGehearty	nop				!
3306*280575beSPatrick McGehearty	lduh	[%o0], %o4
3307*280575beSPatrick McGehearty	sll	%o4, 16, %o4
3308*280575beSPatrick McGehearty	lduh	[%o0+2], %o3
3309*280575beSPatrick McGehearty	or	%o3, %o4, %o4
3310*280575beSPatrick McGehearty	subcc	%o2, 4, %o2
3311*280575beSPatrick McGehearty	add	%o0, 4, %o0
3312*280575beSPatrick McGehearty	add	%o1, 4, %o1
3313*280575beSPatrick McGehearty	bnz	.co_small3x
3314*280575beSPatrick McGehearty	stwa	%o4, [%o1-4]%asi
3315*280575beSPatrick McGehearty	membar	#Sync
3316*280575beSPatrick McGehearty	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
3317*280575beSPatrick McGehearty	retl
3318*280575beSPatrick McGehearty	mov	%g0, %o0
3319*280575beSPatrick McGehearty
3320*280575beSPatrick McGehearty	.align 16
3321*280575beSPatrick McGehearty.co_med_byte:
3322*280575beSPatrick McGehearty	bnz,pt	%ncc, .co_medbh32a	! go to correct byte move
3323*280575beSPatrick McGehearty	subcc	%o2, 31, %o2		! adjust length to allow cc test
3324*280575beSPatrick McGehearty	ble,pt	%ncc, .co_medb31
3325*280575beSPatrick McGehearty	nop
3326*280575beSPatrick McGehearty.co_medb32:				! Alignment 1 or 5
3327*280575beSPatrick McGehearty	subcc	%o2, 32, %o2		! decrement length count
3328*280575beSPatrick McGehearty
3329*280575beSPatrick McGehearty	ldub	[%o0], %o4		! load and store a block of 32 bytes
3330*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
3331*280575beSPatrick McGehearty	lduh	[%o0+1], %o4
3332*280575beSPatrick McGehearty	sllx	%o4, 40, %o4
3333*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3334*280575beSPatrick McGehearty	lduw	[%o0+3], %o4
3335*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
3336*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3337*280575beSPatrick McGehearty	ldub	[%o0+7], %o4
3338*280575beSPatrick McGehearty	or	%o4, %o3, %o4
3339*280575beSPatrick McGehearty	stxa	%o4, [%o1]%asi
3340*280575beSPatrick McGehearty
3341*280575beSPatrick McGehearty	ldub	[%o0+8], %o4
3342*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
3343*280575beSPatrick McGehearty	lduh	[%o0+9], %o4
3344*280575beSPatrick McGehearty	sllx	%o4, 40, %o4
3345*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3346*280575beSPatrick McGehearty	lduw	[%o0+11], %o4
3347*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
3348*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3349*280575beSPatrick McGehearty	ldub	[%o0+15], %o4
3350*280575beSPatrick McGehearty	or	%o4, %o3, %o4
3351*280575beSPatrick McGehearty	stxa	%o4, [%o1+8]%asi
3352*280575beSPatrick McGehearty
3353*280575beSPatrick McGehearty	ldub	[%o0+16], %o4
3354*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
3355*280575beSPatrick McGehearty	lduh	[%o0+17], %o4
3356*280575beSPatrick McGehearty	sllx	%o4, 40, %o4
3357*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3358*280575beSPatrick McGehearty	lduw	[%o0+19], %o4
3359*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
3360*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3361*280575beSPatrick McGehearty	ldub	[%o0+23], %o4
3362*280575beSPatrick McGehearty	or	%o4, %o3, %o4
3363*280575beSPatrick McGehearty	stxa	%o4, [%o1+16]%asi
3364*280575beSPatrick McGehearty
3365*280575beSPatrick McGehearty	add	%o0, 32, %o0		! increase src ptr by 32
3366*280575beSPatrick McGehearty	add	%o1, 32, %o1		! increase dst ptr by 32
3367*280575beSPatrick McGehearty
3368*280575beSPatrick McGehearty	ldub	[%o0-8], %o4
3369*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
3370*280575beSPatrick McGehearty	lduh	[%o0-7], %o4
3371*280575beSPatrick McGehearty	sllx	%o4, 40, %o4
3372*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3373*280575beSPatrick McGehearty	lduw	[%o0-5], %o4
3374*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
3375*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3376*280575beSPatrick McGehearty	ldub	[%o0-1], %o4
3377*280575beSPatrick McGehearty	or	%o4, %o3, %o4
3378*280575beSPatrick McGehearty	bgu,pt	%ncc, .co_medb32	! repeat if at least 32 bytes left
3379*280575beSPatrick McGehearty	stxa	%o4, [%o1-8]%asi
3380*280575beSPatrick McGehearty
3381*280575beSPatrick McGehearty.co_medb31:				! 31 or fewer bytes remaining
3382*280575beSPatrick McGehearty	addcc	%o2, 24, %o2		! adjust count to be off by 7
3383*280575beSPatrick McGehearty	ble,pt	%ncc, .co_medb7		! skip if 7 or fewer bytes left
3384*280575beSPatrick McGehearty	nop				!
3385*280575beSPatrick McGehearty.co_medb15:
3386*280575beSPatrick McGehearty
3387*280575beSPatrick McGehearty	ldub	[%o0], %o4		! load and store a block of 8 bytes
3388*280575beSPatrick McGehearty	subcc	%o2, 8, %o2		! decrement length count
3389*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
3390*280575beSPatrick McGehearty	lduh	[%o0+1], %o4
3391*280575beSPatrick McGehearty	sllx	%o4, 40, %o4
3392*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3393*280575beSPatrick McGehearty	lduw	[%o0+3], %o4
3394*280575beSPatrick McGehearty	add	%o1, 8, %o1		! increase dst ptr by 16
3395*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
3396*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3397*280575beSPatrick McGehearty	ldub	[%o0+7], %o4
3398*280575beSPatrick McGehearty	add	%o0, 8, %o0		! increase src ptr by 16
3399*280575beSPatrick McGehearty	or	%o4, %o3, %o4
3400*280575beSPatrick McGehearty	bgu,pt	%ncc, .co_medb15
3401*280575beSPatrick McGehearty	stxa	%o4, [%o1-8]%asi
3402*280575beSPatrick McGehearty.co_medb7:
3403*280575beSPatrick McGehearty	addcc	%o2, 7, %o2		! finish adjustment of remaining count
3404*280575beSPatrick McGehearty	bz,pt	%ncc, .co_smallx	! exit if finished
3405*280575beSPatrick McGehearty	cmp	%o2, 4
3406*280575beSPatrick McGehearty	blt,pt	%ncc, .co_small3x	! skip if less than 4 bytes left
3407*280575beSPatrick McGehearty	nop				!
3408*280575beSPatrick McGehearty	ldub	[%o0], %o4		! move 4 bytes
3409*280575beSPatrick McGehearty	sll	%o4, 24, %o3
3410*280575beSPatrick McGehearty	lduh	[%o0+1], %o4
3411*280575beSPatrick McGehearty	sll	%o4, 8, %o4
3412*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3413*280575beSPatrick McGehearty	ldub	[%o0+3], %o4
3414*280575beSPatrick McGehearty	or	%o4, %o3, %o4
3415*280575beSPatrick McGehearty	subcc	%o2, 4, %o2
3416*280575beSPatrick McGehearty	add	%o0, 4, %o0
3417*280575beSPatrick McGehearty	add	%o1, 4, %o1
3418*280575beSPatrick McGehearty	bnz	.co_small3x
3419*280575beSPatrick McGehearty	stwa	%o4, [%o1-4]%asi
3420*280575beSPatrick McGehearty	membar	#Sync
3421*280575beSPatrick McGehearty	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
3422*280575beSPatrick McGehearty	retl
3423*280575beSPatrick McGehearty	mov	%g0, %o0
3424*280575beSPatrick McGehearty
3425*280575beSPatrick McGehearty	.align 16
3426*280575beSPatrick McGehearty.co_medbh32a:
3427*280575beSPatrick McGehearty	ble,pt	%ncc, .co_medbh31
3428*280575beSPatrick McGehearty	nop
3429*280575beSPatrick McGehearty.co_medbh32:				! Alignment 3 or 7
3430*280575beSPatrick McGehearty	subcc	%o2, 32, %o2		! decrement length count
3431*280575beSPatrick McGehearty
3432*280575beSPatrick McGehearty	ldub	[%o0], %o4		! load and store a block of 32 bytes
3433*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
3434*280575beSPatrick McGehearty	lduw	[%o0+1], %o4
3435*280575beSPatrick McGehearty	sllx	%o4, 24, %o4
3436*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3437*280575beSPatrick McGehearty	lduh	[%o0+5], %o4
3438*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
3439*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3440*280575beSPatrick McGehearty	ldub	[%o0+7], %o4
3441*280575beSPatrick McGehearty	or	%o4, %o3, %o4
3442*280575beSPatrick McGehearty	stxa	%o4, [%o1]%asi
3443*280575beSPatrick McGehearty
3444*280575beSPatrick McGehearty	ldub	[%o0+8], %o4
3445*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
3446*280575beSPatrick McGehearty	lduw	[%o0+9], %o4
3447*280575beSPatrick McGehearty	sllx	%o4, 24, %o4
3448*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3449*280575beSPatrick McGehearty	lduh	[%o0+13], %o4
3450*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
3451*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3452*280575beSPatrick McGehearty	ldub	[%o0+15], %o4
3453*280575beSPatrick McGehearty	or	%o4, %o3, %o4
3454*280575beSPatrick McGehearty	stxa	%o4, [%o1+8]%asi
3455*280575beSPatrick McGehearty
3456*280575beSPatrick McGehearty	ldub	[%o0+16], %o4
3457*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
3458*280575beSPatrick McGehearty	lduw	[%o0+17], %o4
3459*280575beSPatrick McGehearty	sllx	%o4, 24, %o4
3460*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3461*280575beSPatrick McGehearty	lduh	[%o0+21], %o4
3462*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
3463*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3464*280575beSPatrick McGehearty	ldub	[%o0+23], %o4
3465*280575beSPatrick McGehearty	or	%o4, %o3, %o4
3466*280575beSPatrick McGehearty	stxa	%o4, [%o1+16]%asi
3467*280575beSPatrick McGehearty
3468*280575beSPatrick McGehearty	add	%o0, 32, %o0		! increase src ptr by 32
3469*280575beSPatrick McGehearty	add	%o1, 32, %o1		! increase dst ptr by 32
3470*280575beSPatrick McGehearty
3471*280575beSPatrick McGehearty	ldub	[%o0-8], %o4
3472*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
3473*280575beSPatrick McGehearty	lduw	[%o0-7], %o4
3474*280575beSPatrick McGehearty	sllx	%o4, 24, %o4
3475*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3476*280575beSPatrick McGehearty	lduh	[%o0-3], %o4
3477*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
3478*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3479*280575beSPatrick McGehearty	ldub	[%o0-1], %o4
3480*280575beSPatrick McGehearty	or	%o4, %o3, %o4
3481*280575beSPatrick McGehearty	bgu,pt	%ncc, .co_medbh32	! repeat if at least 32 bytes left
3482*280575beSPatrick McGehearty	stxa	%o4, [%o1-8]%asi
3483*280575beSPatrick McGehearty
3484*280575beSPatrick McGehearty.co_medbh31:
3485*280575beSPatrick McGehearty	addcc	%o2, 24, %o2		! adjust count to be off by 7
3486*280575beSPatrick McGehearty	ble,pt	%ncc, .co_medb7		! skip if 7 or fewer bytes left
3487*280575beSPatrick McGehearty	nop				!
3488*280575beSPatrick McGehearty.co_medbh15:
3489*280575beSPatrick McGehearty	ldub	[%o0], %o4		! load and store a block of 8 bytes
3490*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
3491*280575beSPatrick McGehearty	lduw	[%o0+1], %o4
3492*280575beSPatrick McGehearty	sllx	%o4, 24, %o4
3493*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3494*280575beSPatrick McGehearty	lduh	[%o0+5], %o4
3495*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
3496*280575beSPatrick McGehearty	or	%o4, %o3, %o3
3497*280575beSPatrick McGehearty	ldub	[%o0+7], %o4
3498*280575beSPatrick McGehearty	or	%o4, %o3, %o4
3499*280575beSPatrick McGehearty	stxa	%o4, [%o1]%asi
3500*280575beSPatrick McGehearty	subcc	%o2, 8, %o2		! decrement length count
3501*280575beSPatrick McGehearty	add	%o1, 8, %o1		! increase dst ptr by 8
3502*280575beSPatrick McGehearty	add	%o0, 8, %o0		! increase src ptr by 8
3503*280575beSPatrick McGehearty	bgu,pt	%ncc, .co_medbh15
3504*280575beSPatrick McGehearty	stxa	%o4, [%o1-8]%asi
3505*280575beSPatrick McGehearty	ba	.co_medb7
3506*280575beSPatrick McGehearty	nop
3507*280575beSPatrick McGehearty/*
3508*280575beSPatrick McGehearty * End of small copy (no window) code
3509*280575beSPatrick McGehearty */
3510*280575beSPatrick McGehearty
3511*280575beSPatrick McGehearty/*
3512*280575beSPatrick McGehearty * Long copy code
3513*280575beSPatrick McGehearty */
3514*280575beSPatrick McGehearty.co_copy_more:
3515*280575beSPatrick McGehearty	sethi	%hi(copyio_fault), %o3
3516*280575beSPatrick McGehearty	or	%o3, %lo(copyio_fault), %o3
3517*280575beSPatrick McGehearty	membar	#Sync
3518*280575beSPatrick McGehearty	stn	%o3, [THREAD_REG + T_LOFAULT]
3519*280575beSPatrick McGehearty
3520*280575beSPatrick McGehearty/*
3521*280575beSPatrick McGehearty * Following code is for large copies. We know there is at
3522*280575beSPatrick McGehearty * least FP_COPY bytes available. FP regs are used, so
3523*280575beSPatrick McGehearty *  we save registers and fp regs before starting
3524*280575beSPatrick McGehearty */
3525*280575beSPatrick McGehearty	save	%sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
3526*280575beSPatrick McGehearty	or	SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
3527*280575beSPatrick McGehearty	rd	%fprs, %g1		! check for unused fp
3528*280575beSPatrick McGehearty	! if fprs.fef == 0, set it.
3529*280575beSPatrick McGehearty	! Setting it when already set costs more than checking
3530*280575beSPatrick McGehearty	andcc	%g1, FPRS_FEF, %g1	! test FEF, fprs.du = fprs.dl = 0
3531*280575beSPatrick McGehearty	bz,pt	%ncc, .co_fp_unused
3532*280575beSPatrick McGehearty	mov	ASI_USER, %asi
3533*280575beSPatrick McGehearty	BST_FP_TOSTACK(%o3)
3534*280575beSPatrick McGehearty	ba	.co_fp_ready
3535*280575beSPatrick McGehearty.co_fp_unused:
3536*280575beSPatrick McGehearty	prefetch [%i0 + (1 * CACHE_LINE)], #one_read
3537*280575beSPatrick McGehearty	wr	%g0, FPRS_FEF, %fprs	! fprs.fef = 1
3538*280575beSPatrick McGehearty.co_fp_ready:
3539*280575beSPatrick McGehearty	rd	%gsr, %l5		! save %gsr value
3540*280575beSPatrick McGehearty	andcc	%i1, 1, %o3		! is dest byte aligned
3541*280575beSPatrick McGehearty	bnz,pt	%ncc, .co_big_d1
3542*280575beSPatrick McGehearty.co_big_d1f:				! dest is now half word aligned
3543*280575beSPatrick McGehearty	andcc	%i1, 2, %o3
3544*280575beSPatrick McGehearty	bnz,pt	%ncc, .co_big_d2
3545*280575beSPatrick McGehearty.co_big_d2f:				! dest is now word aligned
3546*280575beSPatrick McGehearty	andcc	%i1, 4, %o3		! is dest longword aligned
3547*280575beSPatrick McGehearty	bnz,pt	%ncc, .co_big_d4
3548*280575beSPatrick McGehearty.co_big_d4f:				! dest is now long word aligned
3549*280575beSPatrick McGehearty	andcc	%i0, 7, %o3		! is src long word aligned
3550*280575beSPatrick McGehearty	brnz,pt	%o3, .co_big_unal8
3551*280575beSPatrick McGehearty	prefetch [%i0 + (2 * CACHE_LINE)], #one_read
3552*280575beSPatrick McGehearty	! Src and dst are long word aligned
3553*280575beSPatrick McGehearty	! align dst to 64 byte boundary
3554*280575beSPatrick McGehearty	andcc	%i1, 0x3f, %o3		! %o3 == 0 means dst is 64 byte aligned
3555*280575beSPatrick McGehearty	brz,pn	%o3, .co_al_to_64
3556*280575beSPatrick McGehearty	nop
3557*280575beSPatrick McGehearty	sub	%o3, 64, %o3		! %o3 has negative bytes to move
3558*280575beSPatrick McGehearty	add	%i2, %o3, %i2		! adjust remaining count
3559*280575beSPatrick McGehearty	andcc	%o3, 8, %o4		! odd long words to move?
3560*280575beSPatrick McGehearty	brz,pt	%o4, .co_al_to_16
3561*280575beSPatrick McGehearty	nop
3562*280575beSPatrick McGehearty	add	%o3, 8, %o3
3563*280575beSPatrick McGehearty	ldx	[%i0], %o4
3564*280575beSPatrick McGehearty	add	%i0, 8, %i0		! increment src ptr
3565*280575beSPatrick McGehearty	stxa	%o4, [%i1]ASI_USER
3566*280575beSPatrick McGehearty	add	%i1, 8, %i1		! increment dst ptr
3567*280575beSPatrick McGehearty! Dest is aligned on 16 bytes, src 8 byte aligned
3568*280575beSPatrick McGehearty.co_al_to_16:
3569*280575beSPatrick McGehearty	andcc	%o3, 0x30, %o4		! move to move?
3570*280575beSPatrick McGehearty	brz,pt	%o4, .co_al_to_64
3571*280575beSPatrick McGehearty	nop
3572*280575beSPatrick McGehearty.co_al_mv_16:
3573*280575beSPatrick McGehearty	add	%o3, 16, %o3
3574*280575beSPatrick McGehearty	ldx	[%i0], %o4
3575*280575beSPatrick McGehearty	stxa	%o4, [%i1]ASI_USER
3576*280575beSPatrick McGehearty	add	%i0, 16, %i0		! increment src ptr
3577*280575beSPatrick McGehearty	ldx	[%i0-8], %o4
3578*280575beSPatrick McGehearty	add	%i1, 8, %i1		! increment dst ptr
3579*280575beSPatrick McGehearty	stxa	%o4, [%i1]ASI_USER
3580*280575beSPatrick McGehearty	andcc	%o3, 0x30, %o4
3581*280575beSPatrick McGehearty	brnz,pt	%o4, .co_al_mv_16
3582*280575beSPatrick McGehearty	add	%i1, 8, %i1		! increment dst ptr
3583*280575beSPatrick McGehearty! Dest is aligned on 64 bytes, src 8 byte aligned
3584*280575beSPatrick McGehearty.co_al_to_64:
3585*280575beSPatrick McGehearty	! Determine source alignment
3586*280575beSPatrick McGehearty	! to correct 8 byte offset
3587*280575beSPatrick McGehearty	andcc	%i0, 32, %o3
3588*280575beSPatrick McGehearty	brnz,pn	%o3, .co_aln_1
3589*280575beSPatrick McGehearty	andcc	%i0, 16, %o3
3590*280575beSPatrick McGehearty	brnz,pn	%o3, .co_aln_01
3591*280575beSPatrick McGehearty	andcc	%i0, 8, %o3
3592*280575beSPatrick McGehearty	brz,pn	%o3, .co_aln_000
3593*280575beSPatrick McGehearty	prefetch [%i0 + (3 * CACHE_LINE)], #one_read
3594*280575beSPatrick McGehearty	ba	.co_aln_001
3595*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
3596*280575beSPatrick McGehearty.co_aln_01:
3597*280575beSPatrick McGehearty	brnz,pn	%o3, .co_aln_011
3598*280575beSPatrick McGehearty	prefetch [%i0 + (3 * CACHE_LINE)], #one_read
3599*280575beSPatrick McGehearty	ba	.co_aln_010
3600*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
3601*280575beSPatrick McGehearty.co_aln_1:
3602*280575beSPatrick McGehearty	andcc	%i0, 16, %o3
3603*280575beSPatrick McGehearty	brnz,pn	%o3, .co_aln_11
3604*280575beSPatrick McGehearty	andcc	%i0, 8, %o3
3605*280575beSPatrick McGehearty	brnz,pn	%o3, .co_aln_101
3606*280575beSPatrick McGehearty	prefetch [%i0 + (3 * CACHE_LINE)], #one_read
3607*280575beSPatrick McGehearty	ba	.co_aln_100
3608*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
3609*280575beSPatrick McGehearty.co_aln_11:
3610*280575beSPatrick McGehearty	brz,pn	%o3, .co_aln_110
3611*280575beSPatrick McGehearty	prefetch [%i0 + (3 * CACHE_LINE)], #one_read
3612*280575beSPatrick McGehearty
3613*280575beSPatrick McGehearty.co_aln_111:
3614*280575beSPatrick McGehearty! Alignment off by 8 bytes
3615*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
3616*280575beSPatrick McGehearty	ldd	[%i0], %d0
3617*280575beSPatrick McGehearty	add	%i0, 8, %i0
3618*280575beSPatrick McGehearty	sub	%i2, 8, %i2
3619*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
3620*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
3621*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
3622*280575beSPatrick McGehearty.co_aln_111_loop:
3623*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_P,%d16		! block load
3624*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
3625*280575beSPatrick McGehearty	fmovd	%d16, %d2
3626*280575beSPatrick McGehearty	fmovd	%d18, %d4
3627*280575beSPatrick McGehearty	fmovd	%d20, %d6
3628*280575beSPatrick McGehearty	fmovd	%d22, %d8
3629*280575beSPatrick McGehearty	fmovd	%d24, %d10
3630*280575beSPatrick McGehearty	fmovd	%d26, %d12
3631*280575beSPatrick McGehearty	fmovd	%d28, %d14
3632*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_AIUS	! block initializing store
3633*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_AIUS
3634*280575beSPatrick McGehearty	add	%i0, 64, %i0
3635*280575beSPatrick McGehearty	fmovd	%d30, %d0
3636*280575beSPatrick McGehearty	bgt,pt	%ncc, .co_aln_111_loop
3637*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
3638*280575beSPatrick McGehearty	add	%i1, %i0, %i1
3639*280575beSPatrick McGehearty
3640*280575beSPatrick McGehearty	stda	%d0, [%i1]ASI_USER
3641*280575beSPatrick McGehearty	ba	.co_remain_stuff
3642*280575beSPatrick McGehearty	add	%i1, 8, %i1
3643*280575beSPatrick McGehearty	! END OF aln_111
3644*280575beSPatrick McGehearty
3645*280575beSPatrick McGehearty.co_aln_110:
3646*280575beSPatrick McGehearty! Alignment off by 16 bytes
3647*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
3648*280575beSPatrick McGehearty	ldd	[%i0], %d0
3649*280575beSPatrick McGehearty	ldd	[%i0+8], %d2
3650*280575beSPatrick McGehearty	add	%i0, 16, %i0
3651*280575beSPatrick McGehearty	sub	%i2, 16, %i2
3652*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
3653*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
3654*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
3655*280575beSPatrick McGehearty.co_aln_110_loop:
3656*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_P,%d16		! block load
3657*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
3658*280575beSPatrick McGehearty	fmovd	%d16, %d4
3659*280575beSPatrick McGehearty	fmovd	%d18, %d6
3660*280575beSPatrick McGehearty	fmovd	%d20, %d8
3661*280575beSPatrick McGehearty	fmovd	%d22, %d10
3662*280575beSPatrick McGehearty	fmovd	%d24, %d12
3663*280575beSPatrick McGehearty	fmovd	%d26, %d14
3664*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_AIUS	! block initializing store
3665*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_AIUS
3666*280575beSPatrick McGehearty	add	%i0, 64, %i0
3667*280575beSPatrick McGehearty	fmovd	%d28, %d0
3668*280575beSPatrick McGehearty	fmovd	%d30, %d2
3669*280575beSPatrick McGehearty	bgt,pt	%ncc, .co_aln_110_loop
3670*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
3671*280575beSPatrick McGehearty	add	%i1, %i0, %i1
3672*280575beSPatrick McGehearty
3673*280575beSPatrick McGehearty	stda	%d0, [%i1]%asi
3674*280575beSPatrick McGehearty	stda	%d2, [%i1+8]%asi
3675*280575beSPatrick McGehearty	ba	.co_remain_stuff
3676*280575beSPatrick McGehearty	add	%i1, 16, %i1
3677*280575beSPatrick McGehearty	! END OF aln_110
3678*280575beSPatrick McGehearty
3679*280575beSPatrick McGehearty.co_aln_101:
3680*280575beSPatrick McGehearty! Alignment off by 24 bytes
3681*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
3682*280575beSPatrick McGehearty	ldd	[%i0], %d0
3683*280575beSPatrick McGehearty	ldd	[%i0+8], %d2
3684*280575beSPatrick McGehearty	ldd	[%i0+16], %d4
3685*280575beSPatrick McGehearty	add	%i0, 24, %i0
3686*280575beSPatrick McGehearty	sub	%i2, 24, %i2
3687*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
3688*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
3689*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
3690*280575beSPatrick McGehearty.co_aln_101_loop:
3691*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_P,%d16	! block load
3692*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
3693*280575beSPatrick McGehearty	fmovd	%d16, %d6
3694*280575beSPatrick McGehearty	fmovd	%d18, %d8
3695*280575beSPatrick McGehearty	fmovd	%d20, %d10
3696*280575beSPatrick McGehearty	fmovd	%d22, %d12
3697*280575beSPatrick McGehearty	fmovd	%d24, %d14
3698*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_AIUS	! block initializing store
3699*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_AIUS
3700*280575beSPatrick McGehearty	add	%i0, 64, %i0
3701*280575beSPatrick McGehearty	fmovd	%d26, %d0
3702*280575beSPatrick McGehearty	fmovd	%d28, %d2
3703*280575beSPatrick McGehearty	fmovd	%d30, %d4
3704*280575beSPatrick McGehearty	bgt,pt	%ncc, .co_aln_101_loop
3705*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
3706*280575beSPatrick McGehearty	add	%i1, %i0, %i1
3707*280575beSPatrick McGehearty
3708*280575beSPatrick McGehearty	stda	%d0, [%i1]%asi
3709*280575beSPatrick McGehearty	stda	%d2, [%i1+8]%asi
3710*280575beSPatrick McGehearty	stda	%d4, [%i1+16]%asi
3711*280575beSPatrick McGehearty	ba	.co_remain_stuff
3712*280575beSPatrick McGehearty	add	%i1, 24, %i1
3713*280575beSPatrick McGehearty	! END OF aln_101
3714*280575beSPatrick McGehearty
3715*280575beSPatrick McGehearty.co_aln_100:
3716*280575beSPatrick McGehearty! Alignment off by 32 bytes
3717*280575beSPatrick McGehearty	ldd	[%i0], %d0
3718*280575beSPatrick McGehearty	ldd	[%i0+8], %d2
3719*280575beSPatrick McGehearty	ldd	[%i0+16],%d4
3720*280575beSPatrick McGehearty	ldd	[%i0+24],%d6
3721*280575beSPatrick McGehearty	add	%i0, 32, %i0
3722*280575beSPatrick McGehearty	sub	%i2, 32, %i2
3723*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
3724*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
3725*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
3726*280575beSPatrick McGehearty.co_aln_100_loop:
3727*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_P,%d16	! block load
3728*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
3729*280575beSPatrick McGehearty	fmovd	%d16, %d8
3730*280575beSPatrick McGehearty	fmovd	%d18, %d10
3731*280575beSPatrick McGehearty	fmovd	%d20, %d12
3732*280575beSPatrick McGehearty	fmovd	%d22, %d14
3733*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_AIUS	! block initializing store
3734*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_AIUS
3735*280575beSPatrick McGehearty	add	%i0, 64, %i0
3736*280575beSPatrick McGehearty	fmovd	%d24, %d0
3737*280575beSPatrick McGehearty	fmovd	%d26, %d2
3738*280575beSPatrick McGehearty	fmovd	%d28, %d4
3739*280575beSPatrick McGehearty	fmovd	%d30, %d6
3740*280575beSPatrick McGehearty	bgt,pt	%ncc, .co_aln_100_loop
3741*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
3742*280575beSPatrick McGehearty	add	%i1, %i0, %i1
3743*280575beSPatrick McGehearty
3744*280575beSPatrick McGehearty	stda	%d0, [%i1]%asi
3745*280575beSPatrick McGehearty	stda	%d2, [%i1+8]%asi
3746*280575beSPatrick McGehearty	stda	%d4, [%i1+16]%asi
3747*280575beSPatrick McGehearty	stda	%d6, [%i1+24]%asi
3748*280575beSPatrick McGehearty	ba	.co_remain_stuff
3749*280575beSPatrick McGehearty	add	%i1, 32, %i1
3750*280575beSPatrick McGehearty	! END OF aln_100
3751*280575beSPatrick McGehearty
3752*280575beSPatrick McGehearty.co_aln_011:
3753*280575beSPatrick McGehearty! Alignment off by 40 bytes
3754*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
3755*280575beSPatrick McGehearty	ldd	[%i0], %d0
3756*280575beSPatrick McGehearty	ldd	[%i0+8], %d2
3757*280575beSPatrick McGehearty	ldd	[%i0+16], %d4
3758*280575beSPatrick McGehearty	ldd	[%i0+24], %d6
3759*280575beSPatrick McGehearty	ldd	[%i0+32], %d8
3760*280575beSPatrick McGehearty	add	%i0, 40, %i0
3761*280575beSPatrick McGehearty	sub	%i2, 40, %i2
3762*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
3763*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
3764*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
3765*280575beSPatrick McGehearty.co_aln_011_loop:
3766*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_P,%d16	! block load
3767*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
3768*280575beSPatrick McGehearty	fmovd	%d16, %d10
3769*280575beSPatrick McGehearty	fmovd	%d18, %d12
3770*280575beSPatrick McGehearty	fmovd	%d20, %d14
3771*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_AIUS	! block initializing store
3772*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_AIUS
3773*280575beSPatrick McGehearty	add	%i0, 64, %i0
3774*280575beSPatrick McGehearty	fmovd	%d22, %d0
3775*280575beSPatrick McGehearty	fmovd	%d24, %d2
3776*280575beSPatrick McGehearty	fmovd	%d26, %d4
3777*280575beSPatrick McGehearty	fmovd	%d28, %d6
3778*280575beSPatrick McGehearty	fmovd	%d30, %d8
3779*280575beSPatrick McGehearty	bgt,pt	%ncc, .co_aln_011_loop
3780*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
3781*280575beSPatrick McGehearty	add	%i1, %i0, %i1
3782*280575beSPatrick McGehearty
3783*280575beSPatrick McGehearty	stda	%d0, [%i1]%asi
3784*280575beSPatrick McGehearty	stda	%d2, [%i1+8]%asi
3785*280575beSPatrick McGehearty	stda	%d4, [%i1+16]%asi
3786*280575beSPatrick McGehearty	stda	%d6, [%i1+24]%asi
3787*280575beSPatrick McGehearty	stda	%d8, [%i1+32]%asi
3788*280575beSPatrick McGehearty	ba	.co_remain_stuff
3789*280575beSPatrick McGehearty	add	%i1, 40, %i1
3790*280575beSPatrick McGehearty	! END OF aln_011
3791*280575beSPatrick McGehearty
3792*280575beSPatrick McGehearty.co_aln_010:
3793*280575beSPatrick McGehearty! Alignment off by 48 bytes
3794*280575beSPatrick McGehearty	ldd	[%i0], %d0
3795*280575beSPatrick McGehearty	ldd	[%i0+8], %d2
3796*280575beSPatrick McGehearty	ldd	[%i0+16], %d4
3797*280575beSPatrick McGehearty	ldd	[%i0+24], %d6
3798*280575beSPatrick McGehearty	ldd	[%i0+32], %d8
3799*280575beSPatrick McGehearty	ldd	[%i0+40], %d10
3800*280575beSPatrick McGehearty	add	%i0, 48, %i0
3801*280575beSPatrick McGehearty	sub	%i2, 48, %i2
3802*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
3803*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
3804*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
3805*280575beSPatrick McGehearty.co_aln_010_loop:
3806*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_P,%d16	! block load
3807*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
3808*280575beSPatrick McGehearty	fmovd	%d16, %d12
3809*280575beSPatrick McGehearty	fmovd	%d18, %d14
3810*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_AIUS	! block initializing store
3811*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_AIUS
3812*280575beSPatrick McGehearty	add	%i0, 64, %i0
3813*280575beSPatrick McGehearty	fmovd	%d20, %d0
3814*280575beSPatrick McGehearty	fmovd	%d22, %d2
3815*280575beSPatrick McGehearty	fmovd	%d24, %d4
3816*280575beSPatrick McGehearty	fmovd	%d26, %d6
3817*280575beSPatrick McGehearty	fmovd	%d28, %d8
3818*280575beSPatrick McGehearty	fmovd	%d30, %d10
3819*280575beSPatrick McGehearty	bgt,pt	%ncc, .co_aln_010_loop
3820*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
3821*280575beSPatrick McGehearty	add	%i1, %i0, %i1
3822*280575beSPatrick McGehearty
3823*280575beSPatrick McGehearty	stda	%d0, [%i1]%asi
3824*280575beSPatrick McGehearty	stda	%d2, [%i1+8]%asi
3825*280575beSPatrick McGehearty	stda	%d4, [%i1+16]%asi
3826*280575beSPatrick McGehearty	stda	%d6, [%i1+24]%asi
3827*280575beSPatrick McGehearty	stda	%d8, [%i1+32]%asi
3828*280575beSPatrick McGehearty	stda	%d10, [%i1+40]%asi
3829*280575beSPatrick McGehearty	ba	.co_remain_stuff
3830*280575beSPatrick McGehearty	add	%i1, 48, %i1
3831*280575beSPatrick McGehearty	! END OF aln_010
3832*280575beSPatrick McGehearty
3833*280575beSPatrick McGehearty.co_aln_001:
3834*280575beSPatrick McGehearty! Alignment off by 56 bytes
3835*280575beSPatrick McGehearty	ldd	[%i0], %d0
3836*280575beSPatrick McGehearty	ldd	[%i0+8], %d2
3837*280575beSPatrick McGehearty	ldd	[%i0+16], %d4
3838*280575beSPatrick McGehearty	ldd	[%i0+24], %d6
3839*280575beSPatrick McGehearty	ldd	[%i0+32], %d8
3840*280575beSPatrick McGehearty	ldd	[%i0+40], %d10
3841*280575beSPatrick McGehearty	ldd	[%i0+48], %d12
3842*280575beSPatrick McGehearty	add	%i0, 56, %i0
3843*280575beSPatrick McGehearty	sub	%i2, 56, %i2
3844*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
3845*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
3846*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
3847*280575beSPatrick McGehearty.co_aln_001_loop:
3848*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_P,%d16	! block load
3849*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
3850*280575beSPatrick McGehearty	fmovd	%d16, %d14
3851*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_AIUS	! block initializing store
3852*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_AIUS
3853*280575beSPatrick McGehearty	add	%i0, 64, %i0
3854*280575beSPatrick McGehearty	fmovd	%d18, %d0
3855*280575beSPatrick McGehearty	fmovd	%d20, %d2
3856*280575beSPatrick McGehearty	fmovd	%d22, %d4
3857*280575beSPatrick McGehearty	fmovd	%d24, %d6
3858*280575beSPatrick McGehearty	fmovd	%d26, %d8
3859*280575beSPatrick McGehearty	fmovd	%d28, %d10
3860*280575beSPatrick McGehearty	fmovd	%d30, %d12
3861*280575beSPatrick McGehearty	bgt,pt	%ncc, .co_aln_001_loop
3862*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
3863*280575beSPatrick McGehearty	add	%i1, %i0, %i1
3864*280575beSPatrick McGehearty
3865*280575beSPatrick McGehearty	stda	%d0, [%i1]%asi
3866*280575beSPatrick McGehearty	stda	%d2, [%i1+8]%asi
3867*280575beSPatrick McGehearty	stda	%d4, [%i1+16]%asi
3868*280575beSPatrick McGehearty	stda	%d6, [%i1+24]%asi
3869*280575beSPatrick McGehearty	stda	%d8, [%i1+32]%asi
3870*280575beSPatrick McGehearty	stda	%d10, [%i1+40]%asi
3871*280575beSPatrick McGehearty	stda	%d12, [%i1+48]%asi
3872*280575beSPatrick McGehearty	ba	.co_remain_stuff
3873*280575beSPatrick McGehearty	add	%i1, 56, %i1
3874*280575beSPatrick McGehearty	! END OF aln_001
3875*280575beSPatrick McGehearty
3876*280575beSPatrick McGehearty.co_aln_000:
3877*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
3878*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
3879*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
3880*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
3881*280575beSPatrick McGehearty.co_aln_000_loop:
3882*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_P,%d0
3883*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
3884*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_AIUS	! block initializing store
3885*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_AIUS
3886*280575beSPatrick McGehearty	add	%i0, 64, %i0
3887*280575beSPatrick McGehearty	bgt,pt	%ncc, .co_aln_000_loop
3888*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
3889*280575beSPatrick McGehearty	add	%i1, %i0, %i1
3890*280575beSPatrick McGehearty
3891*280575beSPatrick McGehearty	! END OF aln_000
3892*280575beSPatrick McGehearty
3893*280575beSPatrick McGehearty.co_remain_stuff:
3894*280575beSPatrick McGehearty	subcc	%i2, 31, %i2		! adjust length to allow cc test
3895*280575beSPatrick McGehearty	ble,pt	%ncc, .co_aln_31
3896*280575beSPatrick McGehearty	nop
3897*280575beSPatrick McGehearty.co_aln_32:
3898*280575beSPatrick McGehearty	ldx	[%i0], %o4		! move 32 bytes
3899*280575beSPatrick McGehearty	subcc	%i2, 32, %i2		! decrement length count by 32
3900*280575beSPatrick McGehearty	stxa	%o4, [%i1]%asi
3901*280575beSPatrick McGehearty	ldx	[%i0+8], %o4
3902*280575beSPatrick McGehearty	stxa	%o4, [%i1+8]%asi
3903*280575beSPatrick McGehearty	ldx	[%i0+16], %o4
3904*280575beSPatrick McGehearty	add	%i0, 32, %i0		! increase src ptr by 32
3905*280575beSPatrick McGehearty	stxa	%o4, [%i1+16]%asi
3906*280575beSPatrick McGehearty	ldx	[%i0-8], %o4
3907*280575beSPatrick McGehearty	add	%i1, 32, %i1		! increase dst ptr by 32
3908*280575beSPatrick McGehearty	bgu,pt	%ncc, .co_aln_32	! repeat if at least 32 bytes left
3909*280575beSPatrick McGehearty	stxa	%o4, [%i1-8]%asi
3910*280575beSPatrick McGehearty.co_aln_31:
3911*280575beSPatrick McGehearty	addcc	%i2, 24, %i2		! adjust count to be off by 7
3912*280575beSPatrick McGehearty	ble,pt	%ncc, .co_aln_7		! skip if 7 or fewer bytes left
3913*280575beSPatrick McGehearty	nop				!
3914*280575beSPatrick McGehearty.co_aln_15:
3915*280575beSPatrick McGehearty	ldx	[%i0], %o4		! move 8 bytes
3916*280575beSPatrick McGehearty	add	%i0, 8, %i0		! increase src ptr by 8
3917*280575beSPatrick McGehearty	subcc	%i2, 8, %i2		! decrease count by 8
3918*280575beSPatrick McGehearty	add	%i1, 8, %i1		! increase dst ptr by 8
3919*280575beSPatrick McGehearty	bgu,pt	%ncc, .co_aln_15
3920*280575beSPatrick McGehearty	stxa	%o4, [%i1-8]%asi
3921*280575beSPatrick McGehearty.co_aln_7:
3922*280575beSPatrick McGehearty	addcc	%i2, 7, %i2		! finish adjustment of remaining count
3923*280575beSPatrick McGehearty	bz,pt	%ncc, .co_exit		! exit if finished
3924*280575beSPatrick McGehearty	cmp	%i2, 4
3925*280575beSPatrick McGehearty	blt,pt	%ncc, .co_unaln3x	! skip if less than 4 bytes left
3926*280575beSPatrick McGehearty	nop				!
3927*280575beSPatrick McGehearty	ld	[%i0], %o4		! move 4 bytes
3928*280575beSPatrick McGehearty	add	%i0, 4, %i0		! increase src ptr by 4
3929*280575beSPatrick McGehearty	add	%i1, 4, %i1		! increase dst ptr by 4
3930*280575beSPatrick McGehearty	subcc	%i2, 4, %i2		! decrease count by 4
3931*280575beSPatrick McGehearty	bnz	.co_unaln3x
3932*280575beSPatrick McGehearty	stwa	%o4, [%i1-4]%asi
3933*280575beSPatrick McGehearty	ba	.co_exit
3934*280575beSPatrick McGehearty	nop
3935*280575beSPatrick McGehearty
3936*280575beSPatrick McGehearty	! destination alignment code
3937*280575beSPatrick McGehearty.co_big_d1:
3938*280575beSPatrick McGehearty	ldub	[%i0], %o4		! move a byte
3939*280575beSPatrick McGehearty	add	%i0, 1, %i0
3940*280575beSPatrick McGehearty	stba	%o4, [%i1]ASI_USER
3941*280575beSPatrick McGehearty	add	%i1, 1, %i1
3942*280575beSPatrick McGehearty	andcc	%i1, 2, %o3
3943*280575beSPatrick McGehearty	bz,pt	%ncc, .co_big_d2f
3944*280575beSPatrick McGehearty	sub	%i2, 1, %i2
3945*280575beSPatrick McGehearty.co_big_d2:
3946*280575beSPatrick McGehearty	ldub	[%i0], %o4		! move a half-word (src align unknown)
3947*280575beSPatrick McGehearty	ldub	[%i0+1], %o3
3948*280575beSPatrick McGehearty	add	%i0, 2, %i0
3949*280575beSPatrick McGehearty	sll	%o4, 8, %o4		! position
3950*280575beSPatrick McGehearty	or	%o4, %o3, %o4		! merge
3951*280575beSPatrick McGehearty	stha	%o4, [%i1]ASI_USER
3952*280575beSPatrick McGehearty	add	%i1, 2, %i1
3953*280575beSPatrick McGehearty	andcc	%i1, 4, %o3		! is dest longword aligned
3954*280575beSPatrick McGehearty	bz,pt	%ncc, .co_big_d4f
3955*280575beSPatrick McGehearty	sub	%i2, 2, %i2
3956*280575beSPatrick McGehearty.co_big_d4:				! dest is at least word aligned
3957*280575beSPatrick McGehearty	nop
3958*280575beSPatrick McGehearty	ldub	[%i0], %o4		! move a word (src align unknown)
3959*280575beSPatrick McGehearty	ldub	[%i0+1], %o3
3960*280575beSPatrick McGehearty	sll	%o4, 24, %o4		! position
3961*280575beSPatrick McGehearty	sll	%o3, 16, %o3		! position
3962*280575beSPatrick McGehearty	or	%o4, %o3, %o3		! merge
3963*280575beSPatrick McGehearty	ldub	[%i0+2], %o4
3964*280575beSPatrick McGehearty	sll	%o4, 8, %o4		! position
3965*280575beSPatrick McGehearty	or	%o4, %o3, %o3		! merge
3966*280575beSPatrick McGehearty	ldub	[%i0+3], %o4
3967*280575beSPatrick McGehearty	or	%o4, %o3, %o4		! merge
3968*280575beSPatrick McGehearty	stwa	%o4,[%i1]ASI_USER	! store four bytes
3969*280575beSPatrick McGehearty	add	%i0, 4, %i0		! adjust src by 4
3970*280575beSPatrick McGehearty	add	%i1, 4, %i1		! adjust dest by 4
3971*280575beSPatrick McGehearty	ba	.co_big_d4f
3972*280575beSPatrick McGehearty	sub	%i2, 4, %i2		! adjust count by 4
3973*280575beSPatrick McGehearty
3974*280575beSPatrick McGehearty
3975*280575beSPatrick McGehearty	! Dst is on 8 byte boundary; src is not;
3976*280575beSPatrick McGehearty.co_big_unal8:
3977*280575beSPatrick McGehearty	andcc	%i1, 0x3f, %o3		! is dst 64-byte block aligned?
3978*280575beSPatrick McGehearty	bz	%ncc, .co_unalnsrc
3979*280575beSPatrick McGehearty	sub	%o3, 64, %o3		! %o3 will be multiple of 8
3980*280575beSPatrick McGehearty	neg	%o3			! bytes until dest is 64 byte aligned
3981*280575beSPatrick McGehearty	sub	%i2, %o3, %i2		! update cnt with bytes to be moved
3982*280575beSPatrick McGehearty	! Move bytes according to source alignment
3983*280575beSPatrick McGehearty	andcc	%i0, 0x1, %o4
3984*280575beSPatrick McGehearty	bnz	%ncc, .co_unalnbyte	! check for byte alignment
3985*280575beSPatrick McGehearty	nop
3986*280575beSPatrick McGehearty	andcc	%i0, 2, %o4		! check for half word alignment
3987*280575beSPatrick McGehearty	bnz	%ncc, .co_unalnhalf
3988*280575beSPatrick McGehearty	nop
3989*280575beSPatrick McGehearty	! Src is word aligned, move bytes until dest 64 byte aligned
3990*280575beSPatrick McGehearty.co_unalnword:
3991*280575beSPatrick McGehearty	ld	[%i0], %o4		! load 4 bytes
3992*280575beSPatrick McGehearty	stwa	%o4, [%i1]%asi		! and store 4 bytes
3993*280575beSPatrick McGehearty	ld	[%i0+4], %o4		! load 4 bytes
3994*280575beSPatrick McGehearty	add	%i0, 8, %i0		! increase src ptr by 8
3995*280575beSPatrick McGehearty	stwa	%o4, [%i1+4]%asi	! and store 4 bytes
3996*280575beSPatrick McGehearty	subcc	%o3, 8, %o3		! decrease count by 8
3997*280575beSPatrick McGehearty	bnz	%ncc, .co_unalnword
3998*280575beSPatrick McGehearty	add	%i1, 8, %i1		! increase dst ptr by 8
3999*280575beSPatrick McGehearty	ba	.co_unalnsrc
4000*280575beSPatrick McGehearty	nop
4001*280575beSPatrick McGehearty
4002*280575beSPatrick McGehearty	! Src is half-word aligned, move bytes until dest 64 byte aligned
4003*280575beSPatrick McGehearty.co_unalnhalf:
4004*280575beSPatrick McGehearty	lduh	[%i0], %o4		! load 2 bytes
4005*280575beSPatrick McGehearty	sllx	%o4, 32, %i3		! shift left
4006*280575beSPatrick McGehearty	lduw	[%i0+2], %o4
4007*280575beSPatrick McGehearty	or	%o4, %i3, %i3
4008*280575beSPatrick McGehearty	sllx	%i3, 16, %i3
4009*280575beSPatrick McGehearty	lduh	[%i0+6], %o4
4010*280575beSPatrick McGehearty	or	%o4, %i3, %i3
4011*280575beSPatrick McGehearty	stxa	%i3, [%i1]ASI_USER
4012*280575beSPatrick McGehearty	add	%i0, 8, %i0
4013*280575beSPatrick McGehearty	subcc	%o3, 8, %o3
4014*280575beSPatrick McGehearty	bnz	%ncc, .co_unalnhalf
4015*280575beSPatrick McGehearty	add	%i1, 8, %i1
4016*280575beSPatrick McGehearty	ba	.co_unalnsrc
4017*280575beSPatrick McGehearty	nop
4018*280575beSPatrick McGehearty
4019*280575beSPatrick McGehearty	! Src is Byte aligned, move bytes until dest 64 byte aligned
4020*280575beSPatrick McGehearty.co_unalnbyte:
4021*280575beSPatrick McGehearty	sub	%i1, %i0, %i1		! share pointer advance
4022*280575beSPatrick McGehearty.co_unalnbyte_loop:
4023*280575beSPatrick McGehearty	ldub	[%i0], %o4
4024*280575beSPatrick McGehearty	sllx	%o4, 56, %i3
4025*280575beSPatrick McGehearty	lduh	[%i0+1], %o4
4026*280575beSPatrick McGehearty	sllx	%o4, 40, %o4
4027*280575beSPatrick McGehearty	or	%o4, %i3, %i3
4028*280575beSPatrick McGehearty	lduh	[%i0+3], %o4
4029*280575beSPatrick McGehearty	sllx	%o4, 24, %o4
4030*280575beSPatrick McGehearty	or	%o4, %i3, %i3
4031*280575beSPatrick McGehearty	lduh	[%i0+5], %o4
4032*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
4033*280575beSPatrick McGehearty	or	%o4, %i3, %i3
4034*280575beSPatrick McGehearty	ldub	[%i0+7], %o4
4035*280575beSPatrick McGehearty	or	%o4, %i3, %i3
4036*280575beSPatrick McGehearty	stxa	%i3, [%i1+%i0]ASI_USER
4037*280575beSPatrick McGehearty	subcc	%o3, 8, %o3
4038*280575beSPatrick McGehearty	bnz	%ncc, .co_unalnbyte_loop
4039*280575beSPatrick McGehearty	add	%i0, 8, %i0
4040*280575beSPatrick McGehearty	add	%i1,%i0, %i1		! restore pointer
4041*280575beSPatrick McGehearty
4042*280575beSPatrick McGehearty	! Destination is now block (64 byte aligned), src is not 8 byte aligned
4043*280575beSPatrick McGehearty.co_unalnsrc:
4044*280575beSPatrick McGehearty	andn	%i2, 0x3f, %i3		! %i3 is multiple of block size
4045*280575beSPatrick McGehearty	and	%i2, 0x3f, %i2		! residue bytes in %i2
4046*280575beSPatrick McGehearty	add	%i2, 64, %i2		! Insure we don't load beyond
4047*280575beSPatrick McGehearty	sub	%i3, 64, %i3		! end of source buffer
4048*280575beSPatrick McGehearty
4049*280575beSPatrick McGehearty	andn	%i0, 0x3f, %o4		! %o4 has block aligned src address
4050*280575beSPatrick McGehearty	prefetch [%o4 + (3 * CACHE_LINE)], #one_read
4051*280575beSPatrick McGehearty	alignaddr %i0, %g0, %g0		! generate %gsr
4052*280575beSPatrick McGehearty	add	%i0, %i3, %i0		! advance %i0 to after blocks
4053*280575beSPatrick McGehearty	!
4054*280575beSPatrick McGehearty	! Determine source alignment to correct 8 byte offset
4055*280575beSPatrick McGehearty	andcc	%i0, 0x20, %o3
4056*280575beSPatrick McGehearty	brnz,pn	%o3, .co_unaln_1
4057*280575beSPatrick McGehearty	andcc	%i0, 0x10, %o3
4058*280575beSPatrick McGehearty	brnz,pn	%o3, .co_unaln_01
4059*280575beSPatrick McGehearty	andcc	%i0, 0x08, %o3
4060*280575beSPatrick McGehearty	brz,a	%o3, .co_unaln_000
4061*280575beSPatrick McGehearty	prefetch [%o4 + (4 * CACHE_LINE)], #one_read
4062*280575beSPatrick McGehearty	ba	.co_unaln_001
4063*280575beSPatrick McGehearty	nop
4064*280575beSPatrick McGehearty.co_unaln_01:
4065*280575beSPatrick McGehearty	brnz,a	%o3, .co_unaln_011
4066*280575beSPatrick McGehearty	prefetch [%o4 + (4 * CACHE_LINE)], #one_read
4067*280575beSPatrick McGehearty	ba	.co_unaln_010
4068*280575beSPatrick McGehearty	nop
4069*280575beSPatrick McGehearty.co_unaln_1:
4070*280575beSPatrick McGehearty	brnz,pn	%o3, .co_unaln_11
4071*280575beSPatrick McGehearty	andcc	%i0, 0x08, %o3
4072*280575beSPatrick McGehearty	brnz,a	%o3, .co_unaln_101
4073*280575beSPatrick McGehearty	prefetch [%o4 + (4 * CACHE_LINE)], #one_read
4074*280575beSPatrick McGehearty	ba	.co_unaln_100
4075*280575beSPatrick McGehearty	nop
4076*280575beSPatrick McGehearty.co_unaln_11:
4077*280575beSPatrick McGehearty	brz,pn	%o3, .co_unaln_110
4078*280575beSPatrick McGehearty	prefetch [%i0 + (4 * CACHE_LINE)], #one_read
4079*280575beSPatrick McGehearty
4080*280575beSPatrick McGehearty.co_unaln_111:
4081*280575beSPatrick McGehearty	ldd	[%o4+56], %d14
4082*280575beSPatrick McGehearty.co_unaln_111_loop:
4083*280575beSPatrick McGehearty	add	%o4, 64, %o4
4084*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_P, %d16
4085*280575beSPatrick McGehearty	faligndata %d14, %d16, %d48
4086*280575beSPatrick McGehearty	faligndata %d16, %d18, %d50
4087*280575beSPatrick McGehearty	faligndata %d18, %d20, %d52
4088*280575beSPatrick McGehearty	faligndata %d20, %d22, %d54
4089*280575beSPatrick McGehearty	faligndata %d22, %d24, %d56
4090*280575beSPatrick McGehearty	faligndata %d24, %d26, %d58
4091*280575beSPatrick McGehearty	faligndata %d26, %d28, %d60
4092*280575beSPatrick McGehearty	faligndata %d28, %d30, %d62
4093*280575beSPatrick McGehearty	fmovd	%d30, %d14
4094*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_AIUS
4095*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
4096*280575beSPatrick McGehearty	add	%i1, 64, %i1
4097*280575beSPatrick McGehearty	bgu,pt	%ncc, .co_unaln_111_loop
4098*280575beSPatrick McGehearty	prefetch [%o4 + (4 * CACHE_LINE)], #one_read
4099*280575beSPatrick McGehearty	ba	.co_unaln_done
4100*280575beSPatrick McGehearty	nop
4101*280575beSPatrick McGehearty
4102*280575beSPatrick McGehearty.co_unaln_110:
4103*280575beSPatrick McGehearty	ldd	[%o4+48], %d12
4104*280575beSPatrick McGehearty	ldd	[%o4+56], %d14
4105*280575beSPatrick McGehearty.co_unaln_110_loop:
4106*280575beSPatrick McGehearty	add	%o4, 64, %o4
4107*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_P, %d16
4108*280575beSPatrick McGehearty	faligndata %d12, %d14, %d48
4109*280575beSPatrick McGehearty	faligndata %d14, %d16, %d50
4110*280575beSPatrick McGehearty	faligndata %d16, %d18, %d52
4111*280575beSPatrick McGehearty	faligndata %d18, %d20, %d54
4112*280575beSPatrick McGehearty	faligndata %d20, %d22, %d56
4113*280575beSPatrick McGehearty	faligndata %d22, %d24, %d58
4114*280575beSPatrick McGehearty	faligndata %d24, %d26, %d60
4115*280575beSPatrick McGehearty	faligndata %d26, %d28, %d62
4116*280575beSPatrick McGehearty	fmovd	%d28, %d12
4117*280575beSPatrick McGehearty	fmovd	%d30, %d14
4118*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_AIUS
4119*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
4120*280575beSPatrick McGehearty	add	%i1, 64, %i1
4121*280575beSPatrick McGehearty	bgu,pt	%ncc, .co_unaln_110_loop
4122*280575beSPatrick McGehearty	prefetch [%o4 + (4 * CACHE_LINE)], #one_read
4123*280575beSPatrick McGehearty	ba	.co_unaln_done
4124*280575beSPatrick McGehearty	nop
4125*280575beSPatrick McGehearty
4126*280575beSPatrick McGehearty.co_unaln_101:
4127*280575beSPatrick McGehearty	ldd	[%o4+40], %d10
4128*280575beSPatrick McGehearty	ldd	[%o4+48], %d12
4129*280575beSPatrick McGehearty	ldd	[%o4+56], %d14
4130*280575beSPatrick McGehearty.co_unaln_101_loop:
4131*280575beSPatrick McGehearty	add	%o4, 64, %o4
4132*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_P, %d16
4133*280575beSPatrick McGehearty	faligndata %d10, %d12, %d48
4134*280575beSPatrick McGehearty	faligndata %d12, %d14, %d50
4135*280575beSPatrick McGehearty	faligndata %d14, %d16, %d52
4136*280575beSPatrick McGehearty	faligndata %d16, %d18, %d54
4137*280575beSPatrick McGehearty	faligndata %d18, %d20, %d56
4138*280575beSPatrick McGehearty	faligndata %d20, %d22, %d58
4139*280575beSPatrick McGehearty	faligndata %d22, %d24, %d60
4140*280575beSPatrick McGehearty	faligndata %d24, %d26, %d62
4141*280575beSPatrick McGehearty	fmovd	%d26, %d10
4142*280575beSPatrick McGehearty	fmovd	%d28, %d12
4143*280575beSPatrick McGehearty	fmovd	%d30, %d14
4144*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_AIUS
4145*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
4146*280575beSPatrick McGehearty	add	%i1, 64, %i1
4147*280575beSPatrick McGehearty	bgu,pt	%ncc, .co_unaln_101_loop
4148*280575beSPatrick McGehearty	prefetch [%o4 + (4 * CACHE_LINE)], #one_read
4149*280575beSPatrick McGehearty	ba	.co_unaln_done
4150*280575beSPatrick McGehearty	nop
4151*280575beSPatrick McGehearty
4152*280575beSPatrick McGehearty.co_unaln_100:
4153*280575beSPatrick McGehearty	ldd	[%o4+32], %d8
4154*280575beSPatrick McGehearty	ldd	[%o4+40], %d10
4155*280575beSPatrick McGehearty	ldd	[%o4+48], %d12
4156*280575beSPatrick McGehearty	ldd	[%o4+56], %d14
4157*280575beSPatrick McGehearty.co_unaln_100_loop:
4158*280575beSPatrick McGehearty	add	%o4, 64, %o4
4159*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_P, %d16
4160*280575beSPatrick McGehearty	faligndata %d8, %d10, %d48
4161*280575beSPatrick McGehearty	faligndata %d10, %d12, %d50
4162*280575beSPatrick McGehearty	faligndata %d12, %d14, %d52
4163*280575beSPatrick McGehearty	faligndata %d14, %d16, %d54
4164*280575beSPatrick McGehearty	faligndata %d16, %d18, %d56
4165*280575beSPatrick McGehearty	faligndata %d18, %d20, %d58
4166*280575beSPatrick McGehearty	faligndata %d20, %d22, %d60
4167*280575beSPatrick McGehearty	faligndata %d22, %d24, %d62
4168*280575beSPatrick McGehearty	fmovd	%d24, %d8
4169*280575beSPatrick McGehearty	fmovd	%d26, %d10
4170*280575beSPatrick McGehearty	fmovd	%d28, %d12
4171*280575beSPatrick McGehearty	fmovd	%d30, %d14
4172*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_AIUS
4173*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
4174*280575beSPatrick McGehearty	add	%i1, 64, %i1
4175*280575beSPatrick McGehearty	bgu,pt	%ncc, .co_unaln_100_loop
4176*280575beSPatrick McGehearty	prefetch [%o4 + (4 * CACHE_LINE)], #one_read
4177*280575beSPatrick McGehearty	ba	.co_unaln_done
4178*280575beSPatrick McGehearty	nop
4179*280575beSPatrick McGehearty
4180*280575beSPatrick McGehearty.co_unaln_011:
4181*280575beSPatrick McGehearty	ldd	[%o4+24], %d6
4182*280575beSPatrick McGehearty	ldd	[%o4+32], %d8
4183*280575beSPatrick McGehearty	ldd	[%o4+40], %d10
4184*280575beSPatrick McGehearty	ldd	[%o4+48], %d12
4185*280575beSPatrick McGehearty	ldd	[%o4+56], %d14
4186*280575beSPatrick McGehearty.co_unaln_011_loop:
4187*280575beSPatrick McGehearty	add	%o4, 64, %o4
4188*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_P, %d16
4189*280575beSPatrick McGehearty	faligndata %d6, %d8, %d48
4190*280575beSPatrick McGehearty	faligndata %d8, %d10, %d50
4191*280575beSPatrick McGehearty	faligndata %d10, %d12, %d52
4192*280575beSPatrick McGehearty	faligndata %d12, %d14, %d54
4193*280575beSPatrick McGehearty	faligndata %d14, %d16, %d56
4194*280575beSPatrick McGehearty	faligndata %d16, %d18, %d58
4195*280575beSPatrick McGehearty	faligndata %d18, %d20, %d60
4196*280575beSPatrick McGehearty	faligndata %d20, %d22, %d62
4197*280575beSPatrick McGehearty	fmovd	%d22, %d6
4198*280575beSPatrick McGehearty	fmovd	%d24, %d8
4199*280575beSPatrick McGehearty	fmovd	%d26, %d10
4200*280575beSPatrick McGehearty	fmovd	%d28, %d12
4201*280575beSPatrick McGehearty	fmovd	%d30, %d14
4202*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_AIUS
4203*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
4204*280575beSPatrick McGehearty	add	%i1, 64, %i1
4205*280575beSPatrick McGehearty	bgu,pt	%ncc, .co_unaln_011_loop
4206*280575beSPatrick McGehearty	prefetch [%o4 + (4 * CACHE_LINE)], #one_read
4207*280575beSPatrick McGehearty	ba	.co_unaln_done
4208*280575beSPatrick McGehearty	nop
4209*280575beSPatrick McGehearty
4210*280575beSPatrick McGehearty.co_unaln_010:
4211*280575beSPatrick McGehearty	ldd	[%o4+16], %d4
4212*280575beSPatrick McGehearty	ldd	[%o4+24], %d6
4213*280575beSPatrick McGehearty	ldd	[%o4+32], %d8
4214*280575beSPatrick McGehearty	ldd	[%o4+40], %d10
4215*280575beSPatrick McGehearty	ldd	[%o4+48], %d12
4216*280575beSPatrick McGehearty	ldd	[%o4+56], %d14
4217*280575beSPatrick McGehearty.co_unaln_010_loop:
4218*280575beSPatrick McGehearty	add	%o4, 64, %o4
4219*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_P, %d16
4220*280575beSPatrick McGehearty	faligndata %d4, %d6, %d48
4221*280575beSPatrick McGehearty	faligndata %d6, %d8, %d50
4222*280575beSPatrick McGehearty	faligndata %d8, %d10, %d52
4223*280575beSPatrick McGehearty	faligndata %d10, %d12, %d54
4224*280575beSPatrick McGehearty	faligndata %d12, %d14, %d56
4225*280575beSPatrick McGehearty	faligndata %d14, %d16, %d58
4226*280575beSPatrick McGehearty	faligndata %d16, %d18, %d60
4227*280575beSPatrick McGehearty	faligndata %d18, %d20, %d62
4228*280575beSPatrick McGehearty	fmovd	%d20, %d4
4229*280575beSPatrick McGehearty	fmovd	%d22, %d6
4230*280575beSPatrick McGehearty	fmovd	%d24, %d8
4231*280575beSPatrick McGehearty	fmovd	%d26, %d10
4232*280575beSPatrick McGehearty	fmovd	%d28, %d12
4233*280575beSPatrick McGehearty	fmovd	%d30, %d14
4234*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_AIUS
4235*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
4236*280575beSPatrick McGehearty	add	%i1, 64, %i1
4237*280575beSPatrick McGehearty	bgu,pt	%ncc, .co_unaln_010_loop
4238*280575beSPatrick McGehearty	prefetch [%o4 + (4 * CACHE_LINE)], #one_read
4239*280575beSPatrick McGehearty	ba	.co_unaln_done
4240*280575beSPatrick McGehearty	nop
4241*280575beSPatrick McGehearty
4242*280575beSPatrick McGehearty.co_unaln_001:
4243*280575beSPatrick McGehearty	ldd	[%o4+8], %d2
4244*280575beSPatrick McGehearty	ldd	[%o4+16], %d4
4245*280575beSPatrick McGehearty	ldd	[%o4+24], %d6
4246*280575beSPatrick McGehearty	ldd	[%o4+32], %d8
4247*280575beSPatrick McGehearty	ldd	[%o4+40], %d10
4248*280575beSPatrick McGehearty	ldd	[%o4+48], %d12
4249*280575beSPatrick McGehearty	ldd	[%o4+56], %d14
4250*280575beSPatrick McGehearty.co_unaln_001_loop:
4251*280575beSPatrick McGehearty	add	%o4, 64, %o4
4252*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_P, %d16
4253*280575beSPatrick McGehearty	faligndata %d2, %d4, %d48
4254*280575beSPatrick McGehearty	faligndata %d4, %d6, %d50
4255*280575beSPatrick McGehearty	faligndata %d6, %d8, %d52
4256*280575beSPatrick McGehearty	faligndata %d8, %d10, %d54
4257*280575beSPatrick McGehearty	faligndata %d10, %d12, %d56
4258*280575beSPatrick McGehearty	faligndata %d12, %d14, %d58
4259*280575beSPatrick McGehearty	faligndata %d14, %d16, %d60
4260*280575beSPatrick McGehearty	faligndata %d16, %d18, %d62
4261*280575beSPatrick McGehearty	fmovd	%d18, %d2
4262*280575beSPatrick McGehearty	fmovd	%d20, %d4
4263*280575beSPatrick McGehearty	fmovd	%d22, %d6
4264*280575beSPatrick McGehearty	fmovd	%d24, %d8
4265*280575beSPatrick McGehearty	fmovd	%d26, %d10
4266*280575beSPatrick McGehearty	fmovd	%d28, %d12
4267*280575beSPatrick McGehearty	fmovd	%d30, %d14
4268*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_AIUS
4269*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
4270*280575beSPatrick McGehearty	add	%i1, 64, %i1
4271*280575beSPatrick McGehearty	bgu,pt	%ncc, .co_unaln_001_loop
4272*280575beSPatrick McGehearty	prefetch [%o4 + (4 * CACHE_LINE)], #one_read
4273*280575beSPatrick McGehearty	ba	.co_unaln_done
4274*280575beSPatrick McGehearty	nop
4275*280575beSPatrick McGehearty
4276*280575beSPatrick McGehearty.co_unaln_000:
4277*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_P, %d0
4278*280575beSPatrick McGehearty.co_unaln_000_loop:
4279*280575beSPatrick McGehearty	add	%o4, 64, %o4
4280*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_P, %d16
4281*280575beSPatrick McGehearty	faligndata %d0, %d2, %d48
4282*280575beSPatrick McGehearty	faligndata %d2, %d4, %d50
4283*280575beSPatrick McGehearty	faligndata %d4, %d6, %d52
4284*280575beSPatrick McGehearty	faligndata %d6, %d8, %d54
4285*280575beSPatrick McGehearty	faligndata %d8, %d10, %d56
4286*280575beSPatrick McGehearty	faligndata %d10, %d12, %d58
4287*280575beSPatrick McGehearty	faligndata %d12, %d14, %d60
4288*280575beSPatrick McGehearty	faligndata %d14, %d16, %d62
4289*280575beSPatrick McGehearty	fmovd	%d16, %d0
4290*280575beSPatrick McGehearty	fmovd	%d18, %d2
4291*280575beSPatrick McGehearty	fmovd	%d20, %d4
4292*280575beSPatrick McGehearty	fmovd	%d22, %d6
4293*280575beSPatrick McGehearty	fmovd	%d24, %d8
4294*280575beSPatrick McGehearty	fmovd	%d26, %d10
4295*280575beSPatrick McGehearty	fmovd	%d28, %d12
4296*280575beSPatrick McGehearty	fmovd	%d30, %d14
4297*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_AIUS
4298*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
4299*280575beSPatrick McGehearty	add	%i1, 64, %i1
4300*280575beSPatrick McGehearty	bgu,pt	%ncc, .co_unaln_000_loop
4301*280575beSPatrick McGehearty	prefetch [%o4 + (4 * CACHE_LINE)], #one_read
4302*280575beSPatrick McGehearty
4303*280575beSPatrick McGehearty.co_unaln_done:
4304*280575beSPatrick McGehearty	! Handle trailing bytes, 64 to 127
4305*280575beSPatrick McGehearty	! Dest long word aligned, Src not long word aligned
4306*280575beSPatrick McGehearty	cmp	%i2, 15
4307*280575beSPatrick McGehearty	bleu	%ncc, .co_unaln_short
4308*280575beSPatrick McGehearty
4309*280575beSPatrick McGehearty	andn	%i2, 0x7, %i3		! %i3 is multiple of 8
4310*280575beSPatrick McGehearty	and	%i2, 0x7, %i2		! residue bytes in %i2
4311*280575beSPatrick McGehearty	add	%i2, 8, %i2
4312*280575beSPatrick McGehearty	sub	%i3, 8, %i3		! insure we don't load past end of src
4313*280575beSPatrick McGehearty	andn	%i0, 0x7, %o4		! %o4 has long word aligned src address
4314*280575beSPatrick McGehearty	add	%i0, %i3, %i0		! advance %i0 to after multiple of 8
4315*280575beSPatrick McGehearty	ldd	[%o4], %d0		! fetch partial word
4316*280575beSPatrick McGehearty.co_unaln_by8:
4317*280575beSPatrick McGehearty	ldd	[%o4+8], %d2
4318*280575beSPatrick McGehearty	add	%o4, 8, %o4
4319*280575beSPatrick McGehearty	faligndata %d0, %d2, %d16
4320*280575beSPatrick McGehearty	subcc	%i3, 8, %i3
4321*280575beSPatrick McGehearty	stda	%d16, [%i1]%asi
4322*280575beSPatrick McGehearty	fmovd	%d2, %d0
4323*280575beSPatrick McGehearty	bgu,pt	%ncc, .co_unaln_by8
4324*280575beSPatrick McGehearty	add	%i1, 8, %i1
4325*280575beSPatrick McGehearty
4326*280575beSPatrick McGehearty.co_unaln_short:
4327*280575beSPatrick McGehearty	cmp	%i2, 8
4328*280575beSPatrick McGehearty	blt,pt	%ncc, .co_unalnfin
4329*280575beSPatrick McGehearty	nop
4330*280575beSPatrick McGehearty	ldub	[%i0], %o4
4331*280575beSPatrick McGehearty	sll	%o4, 24, %o3
4332*280575beSPatrick McGehearty	ldub	[%i0+1], %o4
4333*280575beSPatrick McGehearty	sll	%o4, 16, %o4
4334*280575beSPatrick McGehearty	or	%o4, %o3, %o3
4335*280575beSPatrick McGehearty	ldub	[%i0+2], %o4
4336*280575beSPatrick McGehearty	sll	%o4, 8, %o4
4337*280575beSPatrick McGehearty	or	%o4, %o3, %o3
4338*280575beSPatrick McGehearty	ldub	[%i0+3], %o4
4339*280575beSPatrick McGehearty	or	%o4, %o3, %o3
4340*280575beSPatrick McGehearty	stwa	%o3, [%i1]%asi
4341*280575beSPatrick McGehearty	ldub	[%i0+4], %o4
4342*280575beSPatrick McGehearty	sll	%o4, 24, %o3
4343*280575beSPatrick McGehearty	ldub	[%i0+5], %o4
4344*280575beSPatrick McGehearty	sll	%o4, 16, %o4
4345*280575beSPatrick McGehearty	or	%o4, %o3, %o3
4346*280575beSPatrick McGehearty	ldub	[%i0+6], %o4
4347*280575beSPatrick McGehearty	sll	%o4, 8, %o4
4348*280575beSPatrick McGehearty	or	%o4, %o3, %o3
4349*280575beSPatrick McGehearty	ldub	[%i0+7], %o4
4350*280575beSPatrick McGehearty	or	%o4, %o3, %o3
4351*280575beSPatrick McGehearty	stwa	%o3, [%i1+4]%asi
4352*280575beSPatrick McGehearty	add	%i0, 8, %i0
4353*280575beSPatrick McGehearty	add	%i1, 8, %i1
4354*280575beSPatrick McGehearty	sub	%i2, 8, %i2
4355*280575beSPatrick McGehearty.co_unalnfin:
4356*280575beSPatrick McGehearty	cmp	%i2, 4
4357*280575beSPatrick McGehearty	blt,pt	%ncc, .co_unalnz
4358*280575beSPatrick McGehearty	tst	%i2
4359*280575beSPatrick McGehearty	ldub	[%i0], %o3		! read byte
4360*280575beSPatrick McGehearty	subcc	%i2, 4, %i2		! reduce count by 4
4361*280575beSPatrick McGehearty	sll	%o3, 24, %o3		! position
4362*280575beSPatrick McGehearty	ldub	[%i0+1], %o4
4363*280575beSPatrick McGehearty	sll	%o4, 16, %o4		! position
4364*280575beSPatrick McGehearty	or	%o4, %o3, %o3		! merge
4365*280575beSPatrick McGehearty	ldub	[%i0+2], %o4
4366*280575beSPatrick McGehearty	sll	%o4, 8, %o4		! position
4367*280575beSPatrick McGehearty	or	%o4, %o3, %o3		! merge
4368*280575beSPatrick McGehearty	add	%i1, 4, %i1		! advance dst by 4
4369*280575beSPatrick McGehearty	ldub	[%i0+3], %o4
4370*280575beSPatrick McGehearty	add	%i0, 4, %i0		! advance src by 4
4371*280575beSPatrick McGehearty	or	%o4, %o3, %o4		! merge
4372*280575beSPatrick McGehearty	bnz,pt	%ncc, .co_unaln3x
4373*280575beSPatrick McGehearty	stwa	%o4, [%i1-4]%asi
4374*280575beSPatrick McGehearty	ba	.co_exit
4375*280575beSPatrick McGehearty	nop
4376*280575beSPatrick McGehearty.co_unalnz:
4377*280575beSPatrick McGehearty	bz,pt	%ncc, .co_exit
4378*280575beSPatrick McGehearty	wr	%l5, %g0, %gsr		! restore %gsr
4379*280575beSPatrick McGehearty.co_unaln3x:				! Exactly 1, 2, or 3 bytes remain
4380*280575beSPatrick McGehearty	subcc	%i2, 1, %i2		! reduce count for cc test
4381*280575beSPatrick McGehearty	ldub	[%i0], %o4		! load one byte
4382*280575beSPatrick McGehearty	bz,pt	%ncc, .co_exit
4383*280575beSPatrick McGehearty	stba	%o4, [%i1]%asi		! store one byte
4384*280575beSPatrick McGehearty	ldub	[%i0+1], %o4		! load second byte
4385*280575beSPatrick McGehearty	subcc	%i2, 1, %i2
4386*280575beSPatrick McGehearty	bz,pt	%ncc, .co_exit
4387*280575beSPatrick McGehearty	stba	%o4, [%i1+1]%asi	! store second byte
4388*280575beSPatrick McGehearty	ldub	[%i0+2], %o4		! load third byte
4389*280575beSPatrick McGehearty	stba	%o4, [%i1+2]%asi	! store third byte
4390*280575beSPatrick McGehearty.co_exit:
4391*280575beSPatrick McGehearty	brnz	%g1, .co_fp_restore
4392*280575beSPatrick McGehearty	nop
4393*280575beSPatrick McGehearty	FZERO
4394*280575beSPatrick McGehearty	wr	%g1, %g0, %fprs
4395*280575beSPatrick McGehearty	ba,pt	%ncc, .co_ex2
4396*280575beSPatrick McGehearty	membar	#Sync
4397*280575beSPatrick McGehearty.co_fp_restore:
4398*280575beSPatrick McGehearty	BLD_FP_FROMSTACK(%o4)
4399*280575beSPatrick McGehearty.co_ex2:
4400*280575beSPatrick McGehearty	andn	SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
4401*280575beSPatrick McGehearty	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
4402*280575beSPatrick McGehearty	ret
4403*280575beSPatrick McGehearty	restore %g0, 0, %o0
4404*280575beSPatrick McGehearty
4405*280575beSPatrick McGehearty.copyout_err:
4406*280575beSPatrick McGehearty	ldn	[THREAD_REG + T_COPYOPS], %o4
4407*280575beSPatrick McGehearty	brz	%o4, 2f
4408*280575beSPatrick McGehearty	nop
4409*280575beSPatrick McGehearty	ldn	[%o4 + CP_COPYOUT], %g2
4410*280575beSPatrick McGehearty	jmp	%g2
4411*280575beSPatrick McGehearty	nop
4412*280575beSPatrick McGehearty2:
4413*280575beSPatrick McGehearty	retl
4414*280575beSPatrick McGehearty	mov	-1, %o0
4415*280575beSPatrick McGehearty
4416*280575beSPatrick McGehearty#else	/* NIAGARA_IMPL */
44177c478bd9Sstevel@tonic-gate.do_copyout:
44187c478bd9Sstevel@tonic-gate	!
44197c478bd9Sstevel@tonic-gate	! Check the length and bail if zero.
44207c478bd9Sstevel@tonic-gate	!
44217c478bd9Sstevel@tonic-gate	tst	%o2
44227c478bd9Sstevel@tonic-gate	bnz,pt	%ncc, 1f
44237c478bd9Sstevel@tonic-gate	nop
44247c478bd9Sstevel@tonic-gate	retl
44257c478bd9Sstevel@tonic-gate	clr	%o0
44267c478bd9Sstevel@tonic-gate1:
44277c478bd9Sstevel@tonic-gate	sethi	%hi(copyio_fault), %o4
44287c478bd9Sstevel@tonic-gate	or	%o4, %lo(copyio_fault), %o4
44297c478bd9Sstevel@tonic-gate	sethi	%hi(copyio_fault_nowindow), %o3
44307c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], SAVED_LOFAULT
44317c478bd9Sstevel@tonic-gate	or	%o3, %lo(copyio_fault_nowindow), %o3
44327c478bd9Sstevel@tonic-gate	membar	#Sync
44337c478bd9Sstevel@tonic-gate	stn	%o3, [THREAD_REG + T_LOFAULT]
44347c478bd9Sstevel@tonic-gate
44357c478bd9Sstevel@tonic-gate	mov	%o0, SAVE_SRC
44367c478bd9Sstevel@tonic-gate	mov	%o1, SAVE_DST
44377c478bd9Sstevel@tonic-gate	mov	%o2, SAVE_COUNT
44387c478bd9Sstevel@tonic-gate
44397c478bd9Sstevel@tonic-gate	!
44407c478bd9Sstevel@tonic-gate	! Check to see if we're more than SMALL_LIMIT (7 bytes).
44417c478bd9Sstevel@tonic-gate	! Run in leaf mode, using the %o regs as our input regs.
44427c478bd9Sstevel@tonic-gate	!
44437c478bd9Sstevel@tonic-gate	subcc	%o2, SMALL_LIMIT, %o3
44447c478bd9Sstevel@tonic-gate	bgu,a,pt %ncc, .dco_ns
44457c478bd9Sstevel@tonic-gate	or	%o0, %o1, %o3
44467c478bd9Sstevel@tonic-gate	!
44477c478bd9Sstevel@tonic-gate	! What was previously ".small_copyout"
44487c478bd9Sstevel@tonic-gate	! Do full differenced copy.
44497c478bd9Sstevel@tonic-gate	!
44507c478bd9Sstevel@tonic-gate.dcobcp:
44517c478bd9Sstevel@tonic-gate	sub	%g0, %o2, %o3		! negate count
44527c478bd9Sstevel@tonic-gate	add	%o0, %o2, %o0		! make %o0 point at the end
44537c478bd9Sstevel@tonic-gate	add	%o1, %o2, %o1		! make %o1 point at the end
44547c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .dcocl
44557c478bd9Sstevel@tonic-gate	ldub	[%o0 + %o3], %o4	! load first byte
44567c478bd9Sstevel@tonic-gate	!
44577c478bd9Sstevel@tonic-gate	! %o0 and %o2 point at the end and remain pointing at the end
44587c478bd9Sstevel@tonic-gate	! of their buffers. We pull things out by adding %o3 (which is
44597c478bd9Sstevel@tonic-gate	! the negation of the length) to the buffer end which gives us
44607c478bd9Sstevel@tonic-gate	! the curent location in the buffers. By incrementing %o3 we walk
44617c478bd9Sstevel@tonic-gate	! through both buffers without having to bump each buffer's
44627c478bd9Sstevel@tonic-gate	! pointer. A very fast 4 instruction loop.
44637c478bd9Sstevel@tonic-gate	!
44647c478bd9Sstevel@tonic-gate	.align 16
44657c478bd9Sstevel@tonic-gate.dcocl:
44667c478bd9Sstevel@tonic-gate	stba	%o4, [%o1 + %o3]ASI_USER
44677c478bd9Sstevel@tonic-gate	inccc	%o3
44687c478bd9Sstevel@tonic-gate	bl,a,pt	%ncc, .dcocl
44697c478bd9Sstevel@tonic-gate	ldub	[%o0 + %o3], %o4
44707c478bd9Sstevel@tonic-gate	!
44717c478bd9Sstevel@tonic-gate	! We're done. Go home.
44727c478bd9Sstevel@tonic-gate	!
44737c478bd9Sstevel@tonic-gate	membar	#Sync
44747c478bd9Sstevel@tonic-gate	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
44757c478bd9Sstevel@tonic-gate	retl
44767c478bd9Sstevel@tonic-gate	clr	%o0
44777c478bd9Sstevel@tonic-gate	!
44787c478bd9Sstevel@tonic-gate	! Try aligned copies from here.
44797c478bd9Sstevel@tonic-gate	!
44807c478bd9Sstevel@tonic-gate.dco_ns:
44817c478bd9Sstevel@tonic-gate	! %o0 = kernel addr (to be copied from)
44827c478bd9Sstevel@tonic-gate	! %o1 = user addr (to be copied to)
44837c478bd9Sstevel@tonic-gate	! %o2 = length
44847c478bd9Sstevel@tonic-gate	! %o3 = %o1 | %o2 (used for alignment checking)
44857c478bd9Sstevel@tonic-gate	! %o4 is alternate lo_fault
44867c478bd9Sstevel@tonic-gate	! %o5 is original lo_fault
44877c478bd9Sstevel@tonic-gate	!
44887c478bd9Sstevel@tonic-gate	! See if we're single byte aligned. If we are, check the
44897c478bd9Sstevel@tonic-gate	! limit for single byte copies. If we're smaller or equal,
44907c478bd9Sstevel@tonic-gate	! bounce to the byte for byte copy loop. Otherwise do it in
44917c478bd9Sstevel@tonic-gate	! HW (if enabled).
44927c478bd9Sstevel@tonic-gate	!
44937c478bd9Sstevel@tonic-gate	btst	1, %o3
44947c478bd9Sstevel@tonic-gate	bz,pt	%icc, .dcoh8
44957c478bd9Sstevel@tonic-gate	btst	7, %o3
44967c478bd9Sstevel@tonic-gate	!
44977c478bd9Sstevel@tonic-gate	! Single byte aligned. Do we do it via HW or via
44987c478bd9Sstevel@tonic-gate	! byte for byte? Do a quick no memory reference
44997c478bd9Sstevel@tonic-gate	! check to pick up small copies.
45007c478bd9Sstevel@tonic-gate	!
45017c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_1), %o3
45027c478bd9Sstevel@tonic-gate	!
45037c478bd9Sstevel@tonic-gate	! Big enough that we need to check the HW limit for
45047c478bd9Sstevel@tonic-gate	! this size copy.
45057c478bd9Sstevel@tonic-gate	!
45067c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_1)], %o3
45077c478bd9Sstevel@tonic-gate	!
45087c478bd9Sstevel@tonic-gate	! Is HW copy on? If not, do everything byte for byte.
45097c478bd9Sstevel@tonic-gate	!
45107c478bd9Sstevel@tonic-gate	tst	%o3
45117c478bd9Sstevel@tonic-gate	bz,pn	%icc, .dcobcp
45127c478bd9Sstevel@tonic-gate	subcc	%o3, %o2, %o3
45137c478bd9Sstevel@tonic-gate	!
45147c478bd9Sstevel@tonic-gate	! If we're less than or equal to the single byte copy limit,
45157c478bd9Sstevel@tonic-gate	! bop to the copy loop.
45167c478bd9Sstevel@tonic-gate	!
45177c478bd9Sstevel@tonic-gate	bge,pt	%ncc, .dcobcp
45187c478bd9Sstevel@tonic-gate	nop
45197c478bd9Sstevel@tonic-gate	!
45207c478bd9Sstevel@tonic-gate	! We're big enough and copy is on. Do it with HW.
45217c478bd9Sstevel@tonic-gate	!
45227c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .big_copyout
45237c478bd9Sstevel@tonic-gate	nop
45247c478bd9Sstevel@tonic-gate.dcoh8:
45257c478bd9Sstevel@tonic-gate	!
45267c478bd9Sstevel@tonic-gate	! 8 byte aligned?
45277c478bd9Sstevel@tonic-gate	!
45287c478bd9Sstevel@tonic-gate	bnz,a	%ncc, .dcoh4
45297c478bd9Sstevel@tonic-gate	btst	3, %o3
45307c478bd9Sstevel@tonic-gate	!
45317c478bd9Sstevel@tonic-gate	! See if we're in the "small range".
45327c478bd9Sstevel@tonic-gate	! If so, go off and do the copy.
45337c478bd9Sstevel@tonic-gate	! If not, load the hard limit. %o3 is
45347c478bd9Sstevel@tonic-gate	! available for reuse.
45357c478bd9Sstevel@tonic-gate	!
45367c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_8), %o3
45377c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_8)], %o3
45387c478bd9Sstevel@tonic-gate	!
45397c478bd9Sstevel@tonic-gate	! If it's zero, there's no HW bcopy.
45407c478bd9Sstevel@tonic-gate	! Bop off to the aligned copy.
45417c478bd9Sstevel@tonic-gate	!
45427c478bd9Sstevel@tonic-gate	tst	%o3
45437c478bd9Sstevel@tonic-gate	bz,pn	%icc, .dcos8
45447c478bd9Sstevel@tonic-gate	subcc	%o3, %o2, %o3
45457c478bd9Sstevel@tonic-gate	!
45467c478bd9Sstevel@tonic-gate	! We're negative if our size is larger than hw_copy_limit_8.
45477c478bd9Sstevel@tonic-gate	!
45487c478bd9Sstevel@tonic-gate	bge,pt	%ncc, .dcos8
45497c478bd9Sstevel@tonic-gate	nop
45507c478bd9Sstevel@tonic-gate	!
45517c478bd9Sstevel@tonic-gate	! HW assist is on and we're large enough. Do it.
45527c478bd9Sstevel@tonic-gate	!
45537c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .big_copyout
45547c478bd9Sstevel@tonic-gate	nop
45557c478bd9Sstevel@tonic-gate.dcos8:
45567c478bd9Sstevel@tonic-gate	!
45577c478bd9Sstevel@tonic-gate	! Housekeeping for copy loops. Uses same idea as in the byte for
45587c478bd9Sstevel@tonic-gate	! byte copy loop above.
45597c478bd9Sstevel@tonic-gate	!
45607c478bd9Sstevel@tonic-gate	add	%o0, %o2, %o0
45617c478bd9Sstevel@tonic-gate	add	%o1, %o2, %o1
45627c478bd9Sstevel@tonic-gate	sub	%g0, %o2, %o3
45637c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .dodebc
45647c478bd9Sstevel@tonic-gate	srl	%o2, 3, %o2		! Number of 8 byte chunks to copy
45657c478bd9Sstevel@tonic-gate	!
45667c478bd9Sstevel@tonic-gate	! 4 byte aligned?
45677c478bd9Sstevel@tonic-gate	!
45687c478bd9Sstevel@tonic-gate.dcoh4:
45697c478bd9Sstevel@tonic-gate	bnz,pn	%ncc, .dcoh2
45707c478bd9Sstevel@tonic-gate	!
45717c478bd9Sstevel@tonic-gate	! See if we're in the "small range".
45727c478bd9Sstevel@tonic-gate	! If so, go off an do the copy.
45737c478bd9Sstevel@tonic-gate	! If not, load the hard limit. %o3 is
45747c478bd9Sstevel@tonic-gate	! available for reuse.
45757c478bd9Sstevel@tonic-gate	!
45767c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_4), %o3
45777c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_4)], %o3
45787c478bd9Sstevel@tonic-gate	!
45797c478bd9Sstevel@tonic-gate	! If it's zero, there's no HW bcopy.
45807c478bd9Sstevel@tonic-gate	! Bop off to the aligned copy.
45817c478bd9Sstevel@tonic-gate	!
45827c478bd9Sstevel@tonic-gate	tst	%o3
45837c478bd9Sstevel@tonic-gate	bz,pn	%icc, .dcos4
45847c478bd9Sstevel@tonic-gate	subcc	%o3, %o2, %o3
45857c478bd9Sstevel@tonic-gate	!
45867c478bd9Sstevel@tonic-gate	! We're negative if our size is larger than hw_copy_limit_4.
45877c478bd9Sstevel@tonic-gate	!
45887c478bd9Sstevel@tonic-gate	bge,pt	%ncc, .dcos4
45897c478bd9Sstevel@tonic-gate	nop
45907c478bd9Sstevel@tonic-gate	!
45917c478bd9Sstevel@tonic-gate	! HW assist is on and we're large enough. Do it.
45927c478bd9Sstevel@tonic-gate	!
45937c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .big_copyout
45947c478bd9Sstevel@tonic-gate	nop
45957c478bd9Sstevel@tonic-gate.dcos4:
45967c478bd9Sstevel@tonic-gate	add	%o0, %o2, %o0
45977c478bd9Sstevel@tonic-gate	add	%o1, %o2, %o1
45987c478bd9Sstevel@tonic-gate	sub	%g0, %o2, %o3
45997c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .dodfbc
46007c478bd9Sstevel@tonic-gate	srl	%o2, 2, %o2		! Number of 4 byte chunks to copy
46017c478bd9Sstevel@tonic-gate	!
46027c478bd9Sstevel@tonic-gate	! We must be 2 byte aligned. Off we go.
46037c478bd9Sstevel@tonic-gate	! The check for small copies was done in the
46047c478bd9Sstevel@tonic-gate	! delay at .dcoh4
46057c478bd9Sstevel@tonic-gate	!
46067c478bd9Sstevel@tonic-gate.dcoh2:
46077c478bd9Sstevel@tonic-gate	ble	%ncc, .dcos2
46087c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_2), %o3
46097c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_2)], %o3
46107c478bd9Sstevel@tonic-gate	tst	%o3
46117c478bd9Sstevel@tonic-gate	bz,pn	%icc, .dcos2
46127c478bd9Sstevel@tonic-gate	subcc	%o3, %o2, %o3
46137c478bd9Sstevel@tonic-gate	bge,pt	%ncc, .dcos2
46147c478bd9Sstevel@tonic-gate	nop
46157c478bd9Sstevel@tonic-gate	!
46167c478bd9Sstevel@tonic-gate	! HW is on and we're big enough. Do it.
46177c478bd9Sstevel@tonic-gate	!
46187c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .big_copyout
46197c478bd9Sstevel@tonic-gate	nop
46207c478bd9Sstevel@tonic-gate.dcos2:
46217c478bd9Sstevel@tonic-gate	add	%o0, %o2, %o0
46227c478bd9Sstevel@tonic-gate	add	%o1, %o2, %o1
46237c478bd9Sstevel@tonic-gate	sub	%g0, %o2, %o3
46247c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .dodtbc
46257c478bd9Sstevel@tonic-gate	srl	%o2, 1, %o2		! Number of 2 byte chunks to copy
46267c478bd9Sstevel@tonic-gate.small_copyout:
46277c478bd9Sstevel@tonic-gate	!
46287c478bd9Sstevel@tonic-gate	! Why are we doing this AGAIN? There are certain conditions in
46297c478bd9Sstevel@tonic-gate	! big_copyout that will cause us to forego the HW assisted copies
46307c478bd9Sstevel@tonic-gate	! and bounce back to a non-HW assisted copy. This dispatches those
46317c478bd9Sstevel@tonic-gate	! copies. Note that we branch around this in the main line code.
46327c478bd9Sstevel@tonic-gate	!
46337c478bd9Sstevel@tonic-gate	! We make no check for limits or HW enablement here. We've
46347c478bd9Sstevel@tonic-gate	! already been told that we're a poster child so just go off
46357c478bd9Sstevel@tonic-gate	! and do it.
46367c478bd9Sstevel@tonic-gate	!
46377c478bd9Sstevel@tonic-gate	or	%o0, %o1, %o3
46387c478bd9Sstevel@tonic-gate	btst	1, %o3
46397c478bd9Sstevel@tonic-gate	bnz	%icc, .dcobcp		! Most likely
46407c478bd9Sstevel@tonic-gate	btst	7, %o3
46417c478bd9Sstevel@tonic-gate	bz	%icc, .dcos8
46427c478bd9Sstevel@tonic-gate	btst	3, %o3
46437c478bd9Sstevel@tonic-gate	bz	%icc, .dcos4
46447c478bd9Sstevel@tonic-gate	nop
46457c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .dcos2
46467c478bd9Sstevel@tonic-gate	nop
46477c478bd9Sstevel@tonic-gate	.align 32
46487c478bd9Sstevel@tonic-gate.dodebc:
46497c478bd9Sstevel@tonic-gate	ldx	[%o0 + %o3], %o4
46507c478bd9Sstevel@tonic-gate	deccc	%o2
46517c478bd9Sstevel@tonic-gate	stxa	%o4, [%o1 + %o3]ASI_USER
46527c478bd9Sstevel@tonic-gate	bg,pt	%ncc, .dodebc
46537c478bd9Sstevel@tonic-gate	addcc	%o3, 8, %o3
46547c478bd9Sstevel@tonic-gate	!
46557c478bd9Sstevel@tonic-gate	! End of copy loop. Check to see if we're done. Most
46567c478bd9Sstevel@tonic-gate	! eight byte aligned copies end here.
46577c478bd9Sstevel@tonic-gate	!
46587c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .dcofh
46597c478bd9Sstevel@tonic-gate	nop
46607c478bd9Sstevel@tonic-gate	!
46617c478bd9Sstevel@tonic-gate	! Something is left - do it byte for byte.
46627c478bd9Sstevel@tonic-gate	!
46637c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .dcocl
46647c478bd9Sstevel@tonic-gate	ldub	[%o0 + %o3], %o4	! load next byte
46657c478bd9Sstevel@tonic-gate	!
46667c478bd9Sstevel@tonic-gate	! Four byte copy loop. %o2 is the number of 4 byte chunks to copy.
46677c478bd9Sstevel@tonic-gate	!
46687c478bd9Sstevel@tonic-gate	.align 32
46697c478bd9Sstevel@tonic-gate.dodfbc:
46707c478bd9Sstevel@tonic-gate	lduw	[%o0 + %o3], %o4
46717c478bd9Sstevel@tonic-gate	deccc	%o2
46727c478bd9Sstevel@tonic-gate	sta	%o4, [%o1 + %o3]ASI_USER
46737c478bd9Sstevel@tonic-gate	bg,pt	%ncc, .dodfbc
46747c478bd9Sstevel@tonic-gate	addcc	%o3, 4, %o3
46757c478bd9Sstevel@tonic-gate	!
46767c478bd9Sstevel@tonic-gate	! End of copy loop. Check to see if we're done. Most
46777c478bd9Sstevel@tonic-gate	! four byte aligned copies end here.
46787c478bd9Sstevel@tonic-gate	!
46797c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .dcofh
46807c478bd9Sstevel@tonic-gate	nop
46817c478bd9Sstevel@tonic-gate	!
46827c478bd9Sstevel@tonic-gate	! Something is left. Do it byte for byte.
46837c478bd9Sstevel@tonic-gate	!
46847c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .dcocl
46857c478bd9Sstevel@tonic-gate	ldub	[%o0 + %o3], %o4	! load next byte
46867c478bd9Sstevel@tonic-gate	!
46877c478bd9Sstevel@tonic-gate	! two byte aligned copy loop. %o2 is the number of 2 byte chunks to
46887c478bd9Sstevel@tonic-gate	! copy.
46897c478bd9Sstevel@tonic-gate	!
46907c478bd9Sstevel@tonic-gate	.align 32
46917c478bd9Sstevel@tonic-gate.dodtbc:
46927c478bd9Sstevel@tonic-gate	lduh	[%o0 + %o3], %o4
46937c478bd9Sstevel@tonic-gate	deccc	%o2
46947c478bd9Sstevel@tonic-gate	stha	%o4, [%o1 + %o3]ASI_USER
46957c478bd9Sstevel@tonic-gate	bg,pt	%ncc, .dodtbc
46967c478bd9Sstevel@tonic-gate	addcc	%o3, 2, %o3
46977c478bd9Sstevel@tonic-gate	!
46987c478bd9Sstevel@tonic-gate	! End of copy loop. Anything left?
46997c478bd9Sstevel@tonic-gate	!
47007c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .dcofh
47017c478bd9Sstevel@tonic-gate	nop
47027c478bd9Sstevel@tonic-gate	!
47037c478bd9Sstevel@tonic-gate	! Deal with the last byte
47047c478bd9Sstevel@tonic-gate	!
47057c478bd9Sstevel@tonic-gate	ldub	[%o0 + %o3], %o4
47067c478bd9Sstevel@tonic-gate	stba	%o4, [%o1 + %o3]ASI_USER
47077c478bd9Sstevel@tonic-gate.dcofh:
47087c478bd9Sstevel@tonic-gate	membar	#Sync
47097c478bd9Sstevel@tonic-gate	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
47107c478bd9Sstevel@tonic-gate	retl
47117c478bd9Sstevel@tonic-gate	clr	%o0
47127c478bd9Sstevel@tonic-gate
47137c478bd9Sstevel@tonic-gate.big_copyout:
47147c478bd9Sstevel@tonic-gate	! We're going to go off and do a block copy.
47157c478bd9Sstevel@tonic-gate	! Switch fault handlers and grab a window. We
47167c478bd9Sstevel@tonic-gate	! don't do a membar #Sync since we've done only
47177c478bd9Sstevel@tonic-gate	! kernel data to this point.
47187c478bd9Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]
47197c478bd9Sstevel@tonic-gate
47207c478bd9Sstevel@tonic-gate	! Copy out that reach here are larger than 256 bytes. The
47217c478bd9Sstevel@tonic-gate	! hw_copy_limit_1 is set to 256. Never set this limit less
47227c478bd9Sstevel@tonic-gate	! 128 bytes.
4723340af271Swh94709	save	%sp, -SA(MINFRAME), %sp
47247c478bd9Sstevel@tonic-gate.do_block_copyout:
47257c478bd9Sstevel@tonic-gate
47267c478bd9Sstevel@tonic-gate	! Swap src/dst since the code below is memcpy code
47277c478bd9Sstevel@tonic-gate	! and memcpy/bcopy have different calling sequences
47287c478bd9Sstevel@tonic-gate	mov	%i1, %i5
47297c478bd9Sstevel@tonic-gate	mov	%i0, %i1
47307c478bd9Sstevel@tonic-gate	mov	%i5, %i0
47317c478bd9Sstevel@tonic-gate
4732340af271Swh94709	! Block (64 bytes) align the destination.
4733340af271Swh94709	andcc	%i0, 0x3f, %i3		! is dst block aligned
4734340af271Swh94709	bz	%ncc, copyout_blalign	! dst already block aligned
4735340af271Swh94709	sub	%i3, 0x40, %i3
4736340af271Swh94709	neg	%i3			! bytes till dst 64 bytes aligned
4737340af271Swh94709	sub	%i2, %i3, %i2		! update i2 with new count
47387c478bd9Sstevel@tonic-gate
4739340af271Swh94709	! Based on source and destination alignment do
4740340af271Swh94709	! either 8 bytes, 4 bytes, 2 bytes or byte copy.
47417c478bd9Sstevel@tonic-gate
4742340af271Swh94709	! Is dst & src 8B aligned
4743340af271Swh94709	or	%i0, %i1, %o2
4744340af271Swh94709	andcc	%o2, 0x7, %g0
4745340af271Swh94709	bz	%ncc, .co_alewdcp
4746340af271Swh94709	nop
4747340af271Swh94709
4748340af271Swh94709	! Is dst & src 4B aligned
4749340af271Swh94709	andcc	%o2, 0x3, %g0
4750340af271Swh94709	bz	%ncc, .co_alwdcp
4751340af271Swh94709	nop
4752340af271Swh94709
4753340af271Swh94709	! Is dst & src 2B aligned
4754340af271Swh94709	andcc	%o2, 0x1, %g0
4755340af271Swh94709	bz	%ncc, .co_alhlfwdcp
4756340af271Swh94709	nop
4757340af271Swh94709
4758340af271Swh94709	! 1B aligned
4759340af271Swh947091:	ldub	[%i1], %o2
4760340af271Swh94709	stba	%o2, [%i0]ASI_USER
47617c478bd9Sstevel@tonic-gate	inc	%i1
47627c478bd9Sstevel@tonic-gate	deccc	%i3
4763340af271Swh94709	bgu,pt	%ncc, 1b
47647c478bd9Sstevel@tonic-gate	inc	%i0
47657c478bd9Sstevel@tonic-gate
47667c478bd9Sstevel@tonic-gate	ba	copyout_blalign
4767340af271Swh94709	nop
47687c478bd9Sstevel@tonic-gate
4769340af271Swh94709	! dst & src 4B aligned
4770340af271Swh94709.co_alwdcp:
4771340af271Swh94709	ld	[%i1], %o2
4772340af271Swh94709	sta	%o2, [%i0]ASI_USER
4773340af271Swh94709	add	%i1, 0x4, %i1
4774340af271Swh94709	subcc	%i3, 0x4, %i3
4775340af271Swh94709	bgu,pt	%ncc, .co_alwdcp
4776340af271Swh94709	add	%i0, 0x4, %i0
4777340af271Swh94709
4778340af271Swh94709	ba	copyout_blalign
4779340af271Swh94709	nop
4780340af271Swh94709
4781340af271Swh94709	! dst & src 2B aligned
4782340af271Swh94709.co_alhlfwdcp:
4783340af271Swh94709	lduh	[%i1], %o2
4784340af271Swh94709	stuha	%o2, [%i0]ASI_USER
4785340af271Swh94709	add	%i1, 0x2, %i1
4786340af271Swh94709	subcc	%i3, 0x2, %i3
4787340af271Swh94709	bgu,pt	%ncc, .co_alhlfwdcp
4788340af271Swh94709	add	%i0, 0x2, %i0
4789340af271Swh94709
4790340af271Swh94709	ba	copyout_blalign
4791340af271Swh94709	nop
4792340af271Swh94709
4793340af271Swh94709	! dst & src 8B aligned
4794340af271Swh94709.co_alewdcp:
47957c478bd9Sstevel@tonic-gate	ldx	[%i1], %o2
47967c478bd9Sstevel@tonic-gate	stxa	%o2, [%i0]ASI_USER
47977c478bd9Sstevel@tonic-gate	add	%i1, 0x8, %i1
47987c478bd9Sstevel@tonic-gate	subcc	%i3, 0x8, %i3
4799340af271Swh94709	bgu,pt	%ncc, .co_alewdcp
48007c478bd9Sstevel@tonic-gate	add	%i0, 0x8, %i0
48017c478bd9Sstevel@tonic-gate
4802340af271Swh94709	! Now Destination is block (64 bytes) aligned
48037c478bd9Sstevel@tonic-gatecopyout_blalign:
48047c478bd9Sstevel@tonic-gate	andn	%i2, 0x3f, %i3		! %i3 count is multiple of block size
48057c478bd9Sstevel@tonic-gate	sub	%i2, %i3, %i2		! Residue bytes in %i2
48067c478bd9Sstevel@tonic-gate
48077c478bd9Sstevel@tonic-gate	mov	ASI_BLK_INIT_QUAD_LDD_AIUS, %asi
48087c478bd9Sstevel@tonic-gate
48097c478bd9Sstevel@tonic-gate	andcc	%i1, 0xf, %o2		! is src quadword aligned
48107c478bd9Sstevel@tonic-gate	bz,pn	%xcc, .co_blkcpy	! src offset in %o2 (last 4-bits)
48117c478bd9Sstevel@tonic-gate	nop
48127c478bd9Sstevel@tonic-gate	cmp	%o2, 0x8
48137c478bd9Sstevel@tonic-gate	bg	.co_upper_double
48147c478bd9Sstevel@tonic-gate	nop
48157c478bd9Sstevel@tonic-gate	bl	.co_lower_double
48167c478bd9Sstevel@tonic-gate	nop
48177c478bd9Sstevel@tonic-gate
48187c478bd9Sstevel@tonic-gate	! Falls through when source offset is equal to 8 i.e.
48197c478bd9Sstevel@tonic-gate	! source is double word aligned.
48207c478bd9Sstevel@tonic-gate	! In this case no shift/merge of data is required
48217c478bd9Sstevel@tonic-gate
48227c478bd9Sstevel@tonic-gate	sub	%i1, %o2, %i1		! align the src at 16 bytes.
48237c478bd9Sstevel@tonic-gate	andn	%i1, 0x3f, %l0		! %l0 has block aligned source
48247c478bd9Sstevel@tonic-gate	prefetch [%l0+0x0], #one_read
48257c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2
48267c478bd9Sstevel@tonic-gate.co_loop0:
48277c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
48287c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4
48297c478bd9Sstevel@tonic-gate	prefetch [%l0+0x40], #one_read
48307c478bd9Sstevel@tonic-gate
48317c478bd9Sstevel@tonic-gate	stxa	%l3, [%i0+0x0]%asi
48327c478bd9Sstevel@tonic-gate	stxa	%l4, [%i0+0x8]%asi
48337c478bd9Sstevel@tonic-gate
48347c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
48357c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2
48367c478bd9Sstevel@tonic-gate
48377c478bd9Sstevel@tonic-gate	stxa	%l5, [%i0+0x10]%asi
48387c478bd9Sstevel@tonic-gate	stxa	%l2, [%i0+0x18]%asi
48397c478bd9Sstevel@tonic-gate
48407c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
48417c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4
48427c478bd9Sstevel@tonic-gate
48437c478bd9Sstevel@tonic-gate	stxa	%l3, [%i0+0x20]%asi
48447c478bd9Sstevel@tonic-gate	stxa	%l4, [%i0+0x28]%asi
48457c478bd9Sstevel@tonic-gate
48467c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
48477c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2
48487c478bd9Sstevel@tonic-gate
48497c478bd9Sstevel@tonic-gate	stxa	%l5, [%i0+0x30]%asi
48507c478bd9Sstevel@tonic-gate	stxa	%l2, [%i0+0x38]%asi
48517c478bd9Sstevel@tonic-gate
48527c478bd9Sstevel@tonic-gate	add	%l0, 0x40, %l0
48537c478bd9Sstevel@tonic-gate	subcc	%i3, 0x40, %i3
48547c478bd9Sstevel@tonic-gate	bgu,pt	%xcc, .co_loop0
48557c478bd9Sstevel@tonic-gate	add	%i0, 0x40, %i0
48567c478bd9Sstevel@tonic-gate	ba	.co_blkdone
48577c478bd9Sstevel@tonic-gate	add	%i1, %o2, %i1		! increment the source by src offset
48587c478bd9Sstevel@tonic-gate					! the src offset was stored in %o2
48597c478bd9Sstevel@tonic-gate
48607c478bd9Sstevel@tonic-gate.co_lower_double:
48617c478bd9Sstevel@tonic-gate
48627c478bd9Sstevel@tonic-gate	sub	%i1, %o2, %i1		! align the src at 16 bytes.
48637c478bd9Sstevel@tonic-gate	sll	%o2, 3, %o0		! %o0 left shift
48647c478bd9Sstevel@tonic-gate	mov	0x40, %o1
48657c478bd9Sstevel@tonic-gate	sub	%o1, %o0, %o1		! %o1 right shift = (64 - left shift)
48667c478bd9Sstevel@tonic-gate	andn	%i1, 0x3f, %l0		! %l0 has block aligned source
48677c478bd9Sstevel@tonic-gate	prefetch [%l0+0x0], #one_read
48687c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2	! partial data in %l2 and %l3 has
48697c478bd9Sstevel@tonic-gate					! complete data
48707c478bd9Sstevel@tonic-gate.co_loop1:
48717c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
48727c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4	! %l4 has partial data
48737c478bd9Sstevel@tonic-gate							! for this read.
48747c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6)	! merge %l2, %l3 and %l4
48757c478bd9Sstevel@tonic-gate							! into %l2 and %l3
48767c478bd9Sstevel@tonic-gate	prefetch [%l0+0x40], #one_read
48777c478bd9Sstevel@tonic-gate
48787c478bd9Sstevel@tonic-gate	stxa	%l2, [%i0+0x0]%asi
48797c478bd9Sstevel@tonic-gate	stxa	%l3, [%i0+0x8]%asi
48807c478bd9Sstevel@tonic-gate
48817c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
48827c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2
48837c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6)	! merge %l2 with %l5 and
48847c478bd9Sstevel@tonic-gate							! %l4 from previous read
48857c478bd9Sstevel@tonic-gate							! into %l4 and %l5
48867c478bd9Sstevel@tonic-gate	stxa	%l4, [%i0+0x10]%asi
48877c478bd9Sstevel@tonic-gate	stxa	%l5, [%i0+0x18]%asi
48887c478bd9Sstevel@tonic-gate
48897c478bd9Sstevel@tonic-gate	! Repeat the same for next 32 bytes.
48907c478bd9Sstevel@tonic-gate
48917c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
48927c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4
48937c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6)
48947c478bd9Sstevel@tonic-gate
48957c478bd9Sstevel@tonic-gate	stxa	%l2, [%i0+0x20]%asi
48967c478bd9Sstevel@tonic-gate	stxa	%l3, [%i0+0x28]%asi
48977c478bd9Sstevel@tonic-gate
48987c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
48997c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2
49007c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6)
49017c478bd9Sstevel@tonic-gate
49027c478bd9Sstevel@tonic-gate	stxa	%l4, [%i0+0x30]%asi
49037c478bd9Sstevel@tonic-gate	stxa	%l5, [%i0+0x38]%asi
49047c478bd9Sstevel@tonic-gate
49057c478bd9Sstevel@tonic-gate	add	%l0, 0x40, %l0
49067c478bd9Sstevel@tonic-gate	subcc	%i3, 0x40, %i3
49077c478bd9Sstevel@tonic-gate	bgu,pt	%xcc, .co_loop1
49087c478bd9Sstevel@tonic-gate	add	%i0, 0x40, %i0
49097c478bd9Sstevel@tonic-gate	ba	.co_blkdone
49107c478bd9Sstevel@tonic-gate	add	%i1, %o2, %i1		! increment the source by src offset
49117c478bd9Sstevel@tonic-gate					! the src offset was stored in %o2
49127c478bd9Sstevel@tonic-gate
49137c478bd9Sstevel@tonic-gate.co_upper_double:
49147c478bd9Sstevel@tonic-gate
49157c478bd9Sstevel@tonic-gate	sub	%i1, %o2, %i1		! align the src at 16 bytes.
49167c478bd9Sstevel@tonic-gate	sub	%o2, 0x8, %o0
49177c478bd9Sstevel@tonic-gate	sll	%o0, 3, %o0		! %o0 left shift
49187c478bd9Sstevel@tonic-gate	mov	0x40, %o1
49197c478bd9Sstevel@tonic-gate	sub	%o1, %o0, %o1		! %o1 right shift = (64 - left shift)
49207c478bd9Sstevel@tonic-gate	andn	%i1, 0x3f, %l0		! %l0 has block aligned source
49217c478bd9Sstevel@tonic-gate	prefetch [%l0+0x0], #one_read
49227c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2	! partial data in %l3
49237c478bd9Sstevel@tonic-gate							! for this read and
49247c478bd9Sstevel@tonic-gate							! no data in %l2
49257c478bd9Sstevel@tonic-gate.co_loop2:
49267c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
49277c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4	! %l4 has complete data
49287c478bd9Sstevel@tonic-gate							! and %l5 has partial
49297c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6)	! merge %l3, %l4 and %l5
49307c478bd9Sstevel@tonic-gate							! into %l3 and %l4
49317c478bd9Sstevel@tonic-gate	prefetch [%l0+0x40], #one_read
49327c478bd9Sstevel@tonic-gate
49337c478bd9Sstevel@tonic-gate	stxa	%l3, [%i0+0x0]%asi
49347c478bd9Sstevel@tonic-gate	stxa	%l4, [%i0+0x8]%asi
49357c478bd9Sstevel@tonic-gate
49367c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
49377c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2
49387c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6)	! merge %l2 and %l3 with
49397c478bd9Sstevel@tonic-gate							! %l5 from previous read
49407c478bd9Sstevel@tonic-gate							! into %l5 and %l2
49417c478bd9Sstevel@tonic-gate
49427c478bd9Sstevel@tonic-gate	stxa	%l5, [%i0+0x10]%asi
49437c478bd9Sstevel@tonic-gate	stxa	%l2, [%i0+0x18]%asi
49447c478bd9Sstevel@tonic-gate
49457c478bd9Sstevel@tonic-gate	! Repeat the same for next 32 bytes.
49467c478bd9Sstevel@tonic-gate
49477c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
49487c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4
49497c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6)
49507c478bd9Sstevel@tonic-gate
49517c478bd9Sstevel@tonic-gate	stxa	%l3, [%i0+0x20]%asi
49527c478bd9Sstevel@tonic-gate	stxa	%l4, [%i0+0x28]%asi
49537c478bd9Sstevel@tonic-gate
49547c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
49557c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2
49567c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6)
49577c478bd9Sstevel@tonic-gate
49587c478bd9Sstevel@tonic-gate	stxa	%l5, [%i0+0x30]%asi
49597c478bd9Sstevel@tonic-gate	stxa	%l2, [%i0+0x38]%asi
49607c478bd9Sstevel@tonic-gate
49617c478bd9Sstevel@tonic-gate	add	%l0, 0x40, %l0
49627c478bd9Sstevel@tonic-gate	subcc	%i3, 0x40, %i3
49637c478bd9Sstevel@tonic-gate	bgu,pt	%xcc, .co_loop2
49647c478bd9Sstevel@tonic-gate	add	%i0, 0x40, %i0
49657c478bd9Sstevel@tonic-gate	ba	.co_blkdone
49667c478bd9Sstevel@tonic-gate	add	%i1, %o2, %i1		! increment the source by src offset
49677c478bd9Sstevel@tonic-gate					! the src offset was stored in %o2
49687c478bd9Sstevel@tonic-gate
49697c478bd9Sstevel@tonic-gate
49707c478bd9Sstevel@tonic-gate	! Do fast copy using ASI_BLK_INIT_ST_QUAD_LDD_P
49717c478bd9Sstevel@tonic-gate.co_blkcpy:
49727c478bd9Sstevel@tonic-gate
49737c478bd9Sstevel@tonic-gate	andn	%i1, 0x3f, %o0		! %o0 has block aligned source
49747c478bd9Sstevel@tonic-gate	prefetch [%o0+0x0], #one_read
49757c478bd9Sstevel@tonic-gate1:
49767c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l0
49777c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
49787c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2
49797c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
49807c478bd9Sstevel@tonic-gate
49817c478bd9Sstevel@tonic-gate	prefetch [%o0+0x40], #one_read
49827c478bd9Sstevel@tonic-gate
49837c478bd9Sstevel@tonic-gate	stxa	%l0, [%i0+0x0]%asi
49847c478bd9Sstevel@tonic-gate
49857c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4
49867c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
49877c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l6
49887c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
49897c478bd9Sstevel@tonic-gate
49907c478bd9Sstevel@tonic-gate	stxa	%l1, [%i0+0x8]%asi
49917c478bd9Sstevel@tonic-gate	stxa	%l2, [%i0+0x10]%asi
49927c478bd9Sstevel@tonic-gate	stxa	%l3, [%i0+0x18]%asi
49937c478bd9Sstevel@tonic-gate	stxa	%l4, [%i0+0x20]%asi
49947c478bd9Sstevel@tonic-gate	stxa	%l5, [%i0+0x28]%asi
49957c478bd9Sstevel@tonic-gate	stxa	%l6, [%i0+0x30]%asi
49967c478bd9Sstevel@tonic-gate	stxa	%l7, [%i0+0x38]%asi
49977c478bd9Sstevel@tonic-gate
49987c478bd9Sstevel@tonic-gate	add	%o0, 0x40, %o0
49997c478bd9Sstevel@tonic-gate	subcc	%i3, 0x40, %i3
50007c478bd9Sstevel@tonic-gate	bgu,pt	%xcc, 1b
50017c478bd9Sstevel@tonic-gate	add	%i0, 0x40, %i0
50027c478bd9Sstevel@tonic-gate
50037c478bd9Sstevel@tonic-gate.co_blkdone:
50047c478bd9Sstevel@tonic-gate	membar	#Sync
50057c478bd9Sstevel@tonic-gate
5006340af271Swh94709	brz,pt	%i2, .copyout_exit
50077c478bd9Sstevel@tonic-gate	nop
50087c478bd9Sstevel@tonic-gate
5009340af271Swh94709	! Handle trailing bytes
5010340af271Swh94709	cmp	%i2, 0x8
5011340af271Swh94709	blu,pt	%ncc, .co_residue
50127c478bd9Sstevel@tonic-gate	nop
50137c478bd9Sstevel@tonic-gate
5014340af271Swh94709	! Can we do some 8B ops
5015340af271Swh94709	or	%i1, %i0, %o2
5016340af271Swh94709	andcc	%o2, 0x7, %g0
5017340af271Swh94709	bnz	%ncc, .co_last4
5018340af271Swh94709	nop
50197c478bd9Sstevel@tonic-gate
5020340af271Swh94709	! Do 8byte ops as long as possible
5021340af271Swh94709.co_last8:
50227c478bd9Sstevel@tonic-gate	ldx	[%i1], %o2
50237c478bd9Sstevel@tonic-gate	stxa	%o2, [%i0]ASI_USER
50247c478bd9Sstevel@tonic-gate	add	%i1, 0x8, %i1
5025340af271Swh94709	sub	%i2, 0x8, %i2
5026340af271Swh94709	cmp	%i2, 0x8
5027340af271Swh94709	bgu,pt	%ncc, .co_last8
50287c478bd9Sstevel@tonic-gate	add	%i0, 0x8, %i0
50297c478bd9Sstevel@tonic-gate
5030340af271Swh94709	brz,pt	%i2, .copyout_exit
5031340af271Swh94709	nop
5032340af271Swh94709
5033340af271Swh94709	ba	.co_residue
5034340af271Swh94709	nop
5035340af271Swh94709
5036340af271Swh94709.co_last4:
5037340af271Swh94709	! Can we do 4B ops
5038340af271Swh94709	andcc	%o2, 0x3, %g0
5039340af271Swh94709	bnz	%ncc, .co_last2
5040340af271Swh94709	nop
5041340af271Swh947091:
5042340af271Swh94709	ld	[%i1], %o2
5043340af271Swh94709	sta	%o2, [%i0]ASI_USER
5044340af271Swh94709	add	%i1, 0x4, %i1
5045340af271Swh94709	sub	%i2, 0x4, %i2
5046340af271Swh94709	cmp	%i2, 0x4
5047340af271Swh94709	bgu,pt	%ncc, 1b
5048340af271Swh94709	add	%i0, 0x4, %i0
5049340af271Swh94709
5050340af271Swh94709	brz,pt	%i2, .copyout_exit
5051340af271Swh94709	nop
5052340af271Swh94709
5053340af271Swh94709	ba	.co_residue
5054340af271Swh94709	nop
5055340af271Swh94709
5056340af271Swh94709.co_last2:
5057340af271Swh94709	! Can we do 2B ops
5058340af271Swh94709	andcc	%o2, 0x1, %g0
5059340af271Swh94709	bnz	%ncc, .co_residue
5060340af271Swh94709	nop
5061340af271Swh94709
5062340af271Swh947091:
5063340af271Swh94709	lduh	[%i1], %o2
5064340af271Swh94709	stuha	%o2, [%i0]ASI_USER
5065340af271Swh94709	add	%i1, 0x2, %i1
5066340af271Swh94709	sub	%i2, 0x2, %i2
5067340af271Swh94709	cmp	%i2, 0x2
5068340af271Swh94709	bgu,pt	%ncc, 1b
5069340af271Swh94709	add	%i0, 0x2, %i0
5070340af271Swh94709
5071340af271Swh94709	brz,pt	%i2, .copyout_exit
50727c478bd9Sstevel@tonic-gate	nop
50737c478bd9Sstevel@tonic-gate
50747c478bd9Sstevel@tonic-gate	! Copy the residue as byte copy
50757c478bd9Sstevel@tonic-gate.co_residue:
50767c478bd9Sstevel@tonic-gate	ldub	[%i1], %i4
50777c478bd9Sstevel@tonic-gate	stba	%i4, [%i0]ASI_USER
50787c478bd9Sstevel@tonic-gate	inc	%i1
50797c478bd9Sstevel@tonic-gate	deccc	%i2
5080340af271Swh94709	bgu,pt	%xcc, .co_residue
50817c478bd9Sstevel@tonic-gate	inc	%i0
50827c478bd9Sstevel@tonic-gate
50837c478bd9Sstevel@tonic-gate.copyout_exit:
50847c478bd9Sstevel@tonic-gate	membar	#Sync
50857c478bd9Sstevel@tonic-gate	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
50867c478bd9Sstevel@tonic-gate	ret
50877c478bd9Sstevel@tonic-gate	restore	%g0, 0, %o0
50887c478bd9Sstevel@tonic-gate
50897c478bd9Sstevel@tonic-gate.copyout_err:
50907c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_COPYOPS], %o4
50917c478bd9Sstevel@tonic-gate	brz	%o4, 2f
50927c478bd9Sstevel@tonic-gate	nop
50937c478bd9Sstevel@tonic-gate	ldn	[%o4 + CP_COPYOUT], %g2
50947c478bd9Sstevel@tonic-gate	jmp	%g2
50957c478bd9Sstevel@tonic-gate	nop
50967c478bd9Sstevel@tonic-gate2:
50977c478bd9Sstevel@tonic-gate	retl
50987c478bd9Sstevel@tonic-gate	mov	-1, %o0
5099*280575beSPatrick McGehearty#endif	/* NIAGARA_IMPL */
51007c478bd9Sstevel@tonic-gate	SET_SIZE(copyout)
51017c478bd9Sstevel@tonic-gate
51027c478bd9Sstevel@tonic-gate#endif	/* lint */
51037c478bd9Sstevel@tonic-gate
51047c478bd9Sstevel@tonic-gate
51057c478bd9Sstevel@tonic-gate#ifdef	lint
51067c478bd9Sstevel@tonic-gate
51077c478bd9Sstevel@tonic-gate/*ARGSUSED*/
51087c478bd9Sstevel@tonic-gateint
51097c478bd9Sstevel@tonic-gatexcopyout(const void *kaddr, void *uaddr, size_t count)
51107c478bd9Sstevel@tonic-gate{ return (0); }
51117c478bd9Sstevel@tonic-gate
51127c478bd9Sstevel@tonic-gate#else	/* lint */
51137c478bd9Sstevel@tonic-gate
51147c478bd9Sstevel@tonic-gate	ENTRY(xcopyout)
51157c478bd9Sstevel@tonic-gate	sethi	%hi(.xcopyout_err), REAL_LOFAULT
51167c478bd9Sstevel@tonic-gate	b	.do_copyout
51177c478bd9Sstevel@tonic-gate	or	REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT
51187c478bd9Sstevel@tonic-gate.xcopyout_err:
51197c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_COPYOPS], %o4
51207c478bd9Sstevel@tonic-gate	brz	%o4, 2f
51217c478bd9Sstevel@tonic-gate	nop
51227c478bd9Sstevel@tonic-gate	ldn	[%o4 + CP_XCOPYOUT], %g2
51237c478bd9Sstevel@tonic-gate	jmp	%g2
51247c478bd9Sstevel@tonic-gate	nop
51257c478bd9Sstevel@tonic-gate2:
51267c478bd9Sstevel@tonic-gate	retl
51277c478bd9Sstevel@tonic-gate	mov	%g1, %o0
51287c478bd9Sstevel@tonic-gate	SET_SIZE(xcopyout)
51297c478bd9Sstevel@tonic-gate
51307c478bd9Sstevel@tonic-gate#endif	/* lint */
51317c478bd9Sstevel@tonic-gate
51327c478bd9Sstevel@tonic-gate#ifdef	lint
51337c478bd9Sstevel@tonic-gate
51347c478bd9Sstevel@tonic-gate/*ARGSUSED*/
51357c478bd9Sstevel@tonic-gateint
51367c478bd9Sstevel@tonic-gatexcopyout_little(const void *kaddr, void *uaddr, size_t count)
51377c478bd9Sstevel@tonic-gate{ return (0); }
51387c478bd9Sstevel@tonic-gate
51397c478bd9Sstevel@tonic-gate#else	/* lint */
51407c478bd9Sstevel@tonic-gate
51417c478bd9Sstevel@tonic-gate	ENTRY(xcopyout_little)
51427c478bd9Sstevel@tonic-gate	sethi	%hi(.little_err), %o4
51437c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], %o5
51447c478bd9Sstevel@tonic-gate	or	%o4, %lo(.little_err), %o4
51457c478bd9Sstevel@tonic-gate	membar	#Sync			! sync error barrier
51467c478bd9Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]
51477c478bd9Sstevel@tonic-gate
51487c478bd9Sstevel@tonic-gate	subcc	%g0, %o2, %o3
51497c478bd9Sstevel@tonic-gate	add	%o0, %o2, %o0
51507c478bd9Sstevel@tonic-gate	bz,pn	%ncc, 2f		! check for zero bytes
51517c478bd9Sstevel@tonic-gate	sub	%o2, 1, %o4
51527c478bd9Sstevel@tonic-gate	add	%o0, %o4, %o0		! start w/last byte
51537c478bd9Sstevel@tonic-gate	add	%o1, %o2, %o1
51547c478bd9Sstevel@tonic-gate	ldub	[%o0+%o3], %o4
51557c478bd9Sstevel@tonic-gate
51567c478bd9Sstevel@tonic-gate1:	stba	%o4, [%o1+%o3]ASI_AIUSL
51577c478bd9Sstevel@tonic-gate	inccc	%o3
51587c478bd9Sstevel@tonic-gate	sub	%o0, 2, %o0		! get next byte
51597c478bd9Sstevel@tonic-gate	bcc,a,pt %ncc, 1b
51607c478bd9Sstevel@tonic-gate	ldub	[%o0+%o3], %o4
51617c478bd9Sstevel@tonic-gate
51627c478bd9Sstevel@tonic-gate2:	membar	#Sync			! sync error barrier
51637c478bd9Sstevel@tonic-gate	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
51647c478bd9Sstevel@tonic-gate	retl
51657c478bd9Sstevel@tonic-gate	mov	%g0, %o0		! return (0)
51667c478bd9Sstevel@tonic-gate	SET_SIZE(xcopyout_little)
51677c478bd9Sstevel@tonic-gate
51687c478bd9Sstevel@tonic-gate#endif	/* lint */
51697c478bd9Sstevel@tonic-gate
51707c478bd9Sstevel@tonic-gate/*
51717c478bd9Sstevel@tonic-gate * Copy user data to kernel space (copyin/xcopyin/xcopyin_little)
51727c478bd9Sstevel@tonic-gate */
51737c478bd9Sstevel@tonic-gate
51747c478bd9Sstevel@tonic-gate#if defined(lint)
51757c478bd9Sstevel@tonic-gate
51767c478bd9Sstevel@tonic-gate/*ARGSUSED*/
51777c478bd9Sstevel@tonic-gateint
51787c478bd9Sstevel@tonic-gatecopyin(const void *uaddr, void *kaddr, size_t count)
51797c478bd9Sstevel@tonic-gate{ return (0); }
51807c478bd9Sstevel@tonic-gate
51817c478bd9Sstevel@tonic-gate#else	/* lint */
51827c478bd9Sstevel@tonic-gate
51837c478bd9Sstevel@tonic-gate	ENTRY(copyin)
51847c478bd9Sstevel@tonic-gate	sethi	%hi(.copyin_err), REAL_LOFAULT
51857c478bd9Sstevel@tonic-gate	or	REAL_LOFAULT, %lo(.copyin_err), REAL_LOFAULT
51867c478bd9Sstevel@tonic-gate
5187*280575beSPatrick McGehearty#if !defined(NIAGARA_IMPL)
5188*280575beSPatrick McGehearty.do_copyin:
5189*280575beSPatrick McGehearty	tst	%o2			! check for zero count;  quick exit
5190*280575beSPatrick McGehearty	bz,pt	%ncc, .ci_smallqx
5191*280575beSPatrick McGehearty	mov	%o0, SAVE_SRC
5192*280575beSPatrick McGehearty	mov	%o1, SAVE_DST
5193*280575beSPatrick McGehearty	mov	%o2, SAVE_COUNT
5194*280575beSPatrick McGehearty	cmp	%o2, FP_COPY		! check for small copy/leaf case
5195*280575beSPatrick McGehearty	bgt,pt	%ncc, .ci_copy_more
5196*280575beSPatrick McGehearty	ldn	[THREAD_REG + T_LOFAULT], SAVED_LOFAULT
5197*280575beSPatrick McGehearty/*
5198*280575beSPatrick McGehearty * Small copy in code
5199*280575beSPatrick McGehearty *
5200*280575beSPatrick McGehearty */
5201*280575beSPatrick McGehearty	sethi	%hi(copyio_fault_nowindow), %o3
5202*280575beSPatrick McGehearty	or	%o3, %lo(copyio_fault_nowindow), %o3
5203*280575beSPatrick McGehearty	membar	#Sync
5204*280575beSPatrick McGehearty	stn	%o3, [THREAD_REG + T_LOFAULT]
5205*280575beSPatrick McGehearty
5206*280575beSPatrick McGehearty	mov	ASI_USER, %asi
5207*280575beSPatrick McGehearty	cmp	%o2, SHORTCOPY		! make sure there is enough to align
5208*280575beSPatrick McGehearty	ble,pt	%ncc, .ci_smallest
5209*280575beSPatrick McGehearty	andcc	%o1, 0x7, %o3		! is dest long word aligned
5210*280575beSPatrick McGehearty	bnz,pn	%ncc, .ci_align
5211*280575beSPatrick McGehearty	andcc	%o1, 1, %o3		! is dest byte aligned
5212*280575beSPatrick McGehearty
5213*280575beSPatrick McGehearty! Destination is long word aligned
5214*280575beSPatrick McGehearty.ci_al_src:
5215*280575beSPatrick McGehearty	andcc	%o0, 7, %o3
5216*280575beSPatrick McGehearty	brnz,pt	%o3, .ci_src_dst_unal8
5217*280575beSPatrick McGehearty	nop
5218*280575beSPatrick McGehearty/*
5219*280575beSPatrick McGehearty * Special case for handling when src and dest are both long word aligned
5220*280575beSPatrick McGehearty * and total data to move is less than FP_COPY bytes
5221*280575beSPatrick McGehearty * Also handles finish up for large block moves, so may be less than 32 bytes
5222*280575beSPatrick McGehearty */
5223*280575beSPatrick McGehearty.ci_medlong:
5224*280575beSPatrick McGehearty	subcc	%o2, 31, %o2		! adjust length to allow cc test
5225*280575beSPatrick McGehearty	ble,pt	%ncc, .ci_medl31
5226*280575beSPatrick McGehearty	nop
5227*280575beSPatrick McGehearty.ci_medl32:
5228*280575beSPatrick McGehearty	ldxa	[%o0]%asi, %o4		! move 32 bytes
5229*280575beSPatrick McGehearty	subcc	%o2, 32, %o2		! decrement length count by 32
5230*280575beSPatrick McGehearty	stx	%o4, [%o1]
5231*280575beSPatrick McGehearty	ldxa	[%o0+8]%asi, %o4
5232*280575beSPatrick McGehearty	stx	%o4, [%o1+8]
5233*280575beSPatrick McGehearty	ldxa	[%o0+16]%asi, %o4
5234*280575beSPatrick McGehearty	add	%o0, 32, %o0		! increase src ptr by 32
5235*280575beSPatrick McGehearty	stx	%o4, [%o1+16]
5236*280575beSPatrick McGehearty	ldxa	[%o0-8]%asi, %o4
5237*280575beSPatrick McGehearty	add	%o1, 32, %o1		! increase dst ptr by 32
5238*280575beSPatrick McGehearty	bgu,pt	%ncc, .ci_medl32	! repeat if at least 32 bytes left
5239*280575beSPatrick McGehearty	stx	%o4, [%o1-8]
5240*280575beSPatrick McGehearty.ci_medl31:
5241*280575beSPatrick McGehearty	addcc	%o2, 24, %o2		! adjust count to be off by 7
5242*280575beSPatrick McGehearty	ble,pt	%ncc, .ci_medl7		! skip if 7 or fewer bytes left
5243*280575beSPatrick McGehearty	nop
5244*280575beSPatrick McGehearty.ci_medl8:
5245*280575beSPatrick McGehearty	ldxa	[%o0]%asi, %o4		! move 8 bytes
5246*280575beSPatrick McGehearty	add	%o0, 8, %o0		! increase src ptr by 8
5247*280575beSPatrick McGehearty	subcc	%o2, 8, %o2		! decrease count by 8
5248*280575beSPatrick McGehearty	add	%o1, 8, %o1		! increase dst ptr by 8
5249*280575beSPatrick McGehearty	bgu,pt	%ncc, .ci_medl8
5250*280575beSPatrick McGehearty	stx	%o4, [%o1-8]
5251*280575beSPatrick McGehearty.ci_medl7:
5252*280575beSPatrick McGehearty	addcc	%o2, 7, %o2		! finish adjustment of remaining count
5253*280575beSPatrick McGehearty	bnz,pt	%ncc, .ci_small4	! do final bytes if not finished
5254*280575beSPatrick McGehearty	nop
5255*280575beSPatrick McGehearty.ci_smallx:				! finish up and exit
5256*280575beSPatrick McGehearty	membar	#Sync
5257*280575beSPatrick McGehearty	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
5258*280575beSPatrick McGehearty.ci_smallqx:
5259*280575beSPatrick McGehearty	retl
5260*280575beSPatrick McGehearty	mov	%g0, %o0
5261*280575beSPatrick McGehearty
5262*280575beSPatrick McGehearty.ci_small4:
5263*280575beSPatrick McGehearty	cmp	%o2, 4
5264*280575beSPatrick McGehearty	blt,pt	%ncc, .ci_small3x	! skip if less than 4 bytes left
5265*280575beSPatrick McGehearty	nop				!
5266*280575beSPatrick McGehearty	lda	[%o0]%asi, %o4		! move 4 bytes
5267*280575beSPatrick McGehearty	add	%o0, 4, %o0		! increase src ptr by 4
5268*280575beSPatrick McGehearty	add	%o1, 4, %o1		! increase dst ptr by 4
5269*280575beSPatrick McGehearty	subcc	%o2, 4, %o2		! decrease count by 4
5270*280575beSPatrick McGehearty	bz	%ncc, .ci_smallx
5271*280575beSPatrick McGehearty	stw	%o4, [%o1-4]
5272*280575beSPatrick McGehearty
5273*280575beSPatrick McGehearty.ci_small3x:				! Exactly 1, 2, or 3 bytes remain
5274*280575beSPatrick McGehearty	subcc	%o2, 1, %o2		! reduce count for cc test
5275*280575beSPatrick McGehearty	lduba	[%o0]%asi, %o4		! load one byte
5276*280575beSPatrick McGehearty	bz,pt	%ncc, .ci_smallx
5277*280575beSPatrick McGehearty	stb	%o4, [%o1]		! store one byte
5278*280575beSPatrick McGehearty	lduba	[%o0+1]%asi, %o4	! load second byte
5279*280575beSPatrick McGehearty	subcc	%o2, 1, %o2
5280*280575beSPatrick McGehearty	bz,pt	%ncc, .ci_smallx
5281*280575beSPatrick McGehearty	stb	%o4, [%o1+1]		! store second byte
5282*280575beSPatrick McGehearty	lduba	[%o0+2]%asi, %o4	! load third byte
5283*280575beSPatrick McGehearty	ba	.ci_smallx
5284*280575beSPatrick McGehearty	stb	%o4, [%o1+2]		! store third byte
5285*280575beSPatrick McGehearty
5286*280575beSPatrick McGehearty.ci_smallest:				! 7 or fewer bytes remain
5287*280575beSPatrick McGehearty	cmp	%o2, 4
5288*280575beSPatrick McGehearty	blt,pt	%ncc, .ci_small3x
5289*280575beSPatrick McGehearty	nop
5290*280575beSPatrick McGehearty	lduba	[%o0]%asi, %o4		! read byte
5291*280575beSPatrick McGehearty	subcc	%o2, 4, %o2		! reduce count by 4
5292*280575beSPatrick McGehearty	stb	%o4, [%o1]		! write byte
5293*280575beSPatrick McGehearty	lduba	[%o0+1]%asi, %o4	! repeat for total of 4 bytes
5294*280575beSPatrick McGehearty	add	%o0, 4, %o0		! advance src by 4
5295*280575beSPatrick McGehearty	stb	%o4, [%o1+1]
5296*280575beSPatrick McGehearty	lduba	[%o0-2]%asi, %o4
5297*280575beSPatrick McGehearty	add	%o1, 4, %o1		! advance dst by 4
5298*280575beSPatrick McGehearty	stb	%o4, [%o1-2]
5299*280575beSPatrick McGehearty	lduba	[%o0-1]%asi, %o4
5300*280575beSPatrick McGehearty	bnz,pt	%ncc, .ci_small3x
5301*280575beSPatrick McGehearty	stb	%o4, [%o1-1]
5302*280575beSPatrick McGehearty	membar	#Sync
5303*280575beSPatrick McGehearty	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
5304*280575beSPatrick McGehearty	retl
5305*280575beSPatrick McGehearty	mov	%g0, %o0
5306*280575beSPatrick McGehearty
5307*280575beSPatrick McGehearty.ci_align:
5308*280575beSPatrick McGehearty	bnz,pt	%ncc, .ci_al_d1
5309*280575beSPatrick McGehearty.ci_al_d1f:				! dest is now half word aligned
5310*280575beSPatrick McGehearty	andcc	%o1, 2, %o3		! is dest word aligned
5311*280575beSPatrick McGehearty	bnz,pt	%ncc, .ci_al_d2
5312*280575beSPatrick McGehearty.ci_al_d2f:				! dest is now word aligned
5313*280575beSPatrick McGehearty	andcc	%o1, 4, %o3		! is dest longword aligned?
5314*280575beSPatrick McGehearty	bz,pt	%ncc, .ci_al_src
5315*280575beSPatrick McGehearty	nop
5316*280575beSPatrick McGehearty.ci_al_d4:				! dest is word aligned;  src is unknown
5317*280575beSPatrick McGehearty	lduba	[%o0]%asi, %o4		! move a word (src align unknown)
5318*280575beSPatrick McGehearty	lduba	[%o0+1]%asi, %o3
5319*280575beSPatrick McGehearty	sll	%o4, 24, %o4		! position
5320*280575beSPatrick McGehearty	sll	%o3, 16, %o3		! position
5321*280575beSPatrick McGehearty	or	%o4, %o3, %o3		! merge
5322*280575beSPatrick McGehearty	lduba	[%o0+2]%asi, %o4
5323*280575beSPatrick McGehearty	sll	%o4, 8, %o4		! position
5324*280575beSPatrick McGehearty	or	%o4, %o3, %o3		! merge
5325*280575beSPatrick McGehearty	lduba	[%o0+3]%asi, %o4
5326*280575beSPatrick McGehearty	or	%o4, %o3, %o4		! merge
5327*280575beSPatrick McGehearty	stw	%o4,[%o1]		! store four bytes
5328*280575beSPatrick McGehearty	add	%o0, 4, %o0		! adjust src by 4
5329*280575beSPatrick McGehearty	add	%o1, 4, %o1		! adjust dest by 4
5330*280575beSPatrick McGehearty	sub	%o2, 4, %o2		! adjust count by 4
5331*280575beSPatrick McGehearty	andcc	%o0, 7, %o3		! check for src long word alignment
5332*280575beSPatrick McGehearty	brz,pt	%o3, .ci_medlong
5333*280575beSPatrick McGehearty.ci_src_dst_unal8:
5334*280575beSPatrick McGehearty	! dst is 8-byte aligned, src is not
5335*280575beSPatrick McGehearty	! Size is less than FP_COPY
5336*280575beSPatrick McGehearty	! Following code is to select for alignment
5337*280575beSPatrick McGehearty	andcc	%o0, 0x3, %o3		! test word alignment
5338*280575beSPatrick McGehearty	bz,pt	%ncc, .ci_medword
5339*280575beSPatrick McGehearty	nop
5340*280575beSPatrick McGehearty	andcc	%o0, 0x1, %o3		! test halfword alignment
5341*280575beSPatrick McGehearty	bnz,pt	%ncc, .ci_med_byte	! go to byte move if not halfword
5342*280575beSPatrick McGehearty	andcc	%o0, 0x2, %o3		! test which byte alignment
5343*280575beSPatrick McGehearty	ba	.ci_medhalf
5344*280575beSPatrick McGehearty	nop
5345*280575beSPatrick McGehearty.ci_al_d1:				! align dest to half word
5346*280575beSPatrick McGehearty	lduba	[%o0]%asi, %o4		! move a byte
5347*280575beSPatrick McGehearty	add	%o0, 1, %o0
5348*280575beSPatrick McGehearty	stb	%o4, [%o1]
5349*280575beSPatrick McGehearty	add	%o1, 1, %o1
5350*280575beSPatrick McGehearty	andcc	%o1, 2, %o3		! is dest word aligned
5351*280575beSPatrick McGehearty	bz,pt	%ncc, .ci_al_d2f
5352*280575beSPatrick McGehearty	sub	%o2, 1, %o2
5353*280575beSPatrick McGehearty.ci_al_d2:				! align dest to word
5354*280575beSPatrick McGehearty	lduba	[%o0]%asi, %o4		! move a half-word (src align unknown)
5355*280575beSPatrick McGehearty	lduba	[%o0+1]%asi, %o3
5356*280575beSPatrick McGehearty	sll	%o4, 8, %o4		! position
5357*280575beSPatrick McGehearty	or	%o4, %o3, %o4		! merge
5358*280575beSPatrick McGehearty	sth	%o4, [%o1]
5359*280575beSPatrick McGehearty	add	%o0, 2, %o0
5360*280575beSPatrick McGehearty	add	%o1, 2, %o1
5361*280575beSPatrick McGehearty	andcc	%o1, 4, %o3		! is dest longword aligned?
5362*280575beSPatrick McGehearty	bz,pt	%ncc, .ci_al_src
5363*280575beSPatrick McGehearty	sub	%o2, 2, %o2
5364*280575beSPatrick McGehearty	ba	.ci_al_d4
5365*280575beSPatrick McGehearty	nop
5366*280575beSPatrick McGehearty/*
5367*280575beSPatrick McGehearty * Handle all cases where src and dest are aligned on word
5368*280575beSPatrick McGehearty * boundaries. Use unrolled loops for better performance.
5369*280575beSPatrick McGehearty * This option wins over standard large data move when
5370*280575beSPatrick McGehearty * source and destination is in cache for medium
5371*280575beSPatrick McGehearty * to short data moves.
5372*280575beSPatrick McGehearty */
5373*280575beSPatrick McGehearty.ci_medword:
5374*280575beSPatrick McGehearty	subcc	%o2, 31, %o2		! adjust length to allow cc test
5375*280575beSPatrick McGehearty	ble,pt	%ncc, .ci_medw31
5376*280575beSPatrick McGehearty	nop
5377*280575beSPatrick McGehearty.ci_medw32:
5378*280575beSPatrick McGehearty	lda	[%o0]%asi, %o4		! move a block of 32 bytes
5379*280575beSPatrick McGehearty	stw	%o4, [%o1]
5380*280575beSPatrick McGehearty	lda	[%o0+4]%asi, %o4
5381*280575beSPatrick McGehearty	stw	%o4, [%o1+4]
5382*280575beSPatrick McGehearty	lda	[%o0+8]%asi, %o4
5383*280575beSPatrick McGehearty	stw	%o4, [%o1+8]
5384*280575beSPatrick McGehearty	lda	[%o0+12]%asi, %o4
5385*280575beSPatrick McGehearty	stw	%o4, [%o1+12]
5386*280575beSPatrick McGehearty	lda	[%o0+16]%asi, %o4
5387*280575beSPatrick McGehearty	stw	%o4, [%o1+16]
5388*280575beSPatrick McGehearty	lda	[%o0+20]%asi, %o4
5389*280575beSPatrick McGehearty	subcc	%o2, 32, %o2		! decrement length count
5390*280575beSPatrick McGehearty	stw	%o4, [%o1+20]
5391*280575beSPatrick McGehearty	lda	[%o0+24]%asi, %o4
5392*280575beSPatrick McGehearty	add	%o0, 32, %o0		! increase src ptr by 32
5393*280575beSPatrick McGehearty	stw	%o4, [%o1+24]
5394*280575beSPatrick McGehearty	lda	[%o0-4]%asi, %o4
5395*280575beSPatrick McGehearty	add	%o1, 32, %o1		! increase dst ptr by 32
5396*280575beSPatrick McGehearty	bgu,pt	%ncc, .ci_medw32	! repeat if at least 32 bytes left
5397*280575beSPatrick McGehearty	stw	%o4, [%o1-4]
5398*280575beSPatrick McGehearty.ci_medw31:
5399*280575beSPatrick McGehearty	addcc	%o2, 24, %o2		! adjust count to be off by 7
5400*280575beSPatrick McGehearty	ble,pt	%ncc, .ci_medw7		! skip if 7 or fewer bytes left
5401*280575beSPatrick McGehearty	nop				!
5402*280575beSPatrick McGehearty.ci_medw15:
5403*280575beSPatrick McGehearty	lda	[%o0]%asi, %o4		! move a block of 8 bytes
5404*280575beSPatrick McGehearty	subcc	%o2, 8, %o2		! decrement length count
5405*280575beSPatrick McGehearty	stw	%o4, [%o1]
5406*280575beSPatrick McGehearty	add	%o0, 8, %o0		! increase src ptr by 8
5407*280575beSPatrick McGehearty	lda	[%o0-4]%asi, %o4
5408*280575beSPatrick McGehearty	add	%o1, 8, %o1		! increase dst ptr by 8
5409*280575beSPatrick McGehearty	bgu,pt	%ncc, .ci_medw15
5410*280575beSPatrick McGehearty	stw	%o4, [%o1-4]
5411*280575beSPatrick McGehearty.ci_medw7:
5412*280575beSPatrick McGehearty	addcc	%o2, 7, %o2		! finish adjustment of remaining count
5413*280575beSPatrick McGehearty	bz,pt	%ncc, .ci_smallx	! exit if finished
5414*280575beSPatrick McGehearty	cmp	%o2, 4
5415*280575beSPatrick McGehearty	blt,pt	%ncc, .ci_small3x	! skip if less than 4 bytes left
5416*280575beSPatrick McGehearty	nop				!
5417*280575beSPatrick McGehearty	lda	[%o0]%asi, %o4		! move 4 bytes
5418*280575beSPatrick McGehearty	add	%o0, 4, %o0		! increase src ptr by 4
5419*280575beSPatrick McGehearty	add	%o1, 4, %o1		! increase dst ptr by 4
5420*280575beSPatrick McGehearty	subcc	%o2, 4, %o2		! decrease count by 4
5421*280575beSPatrick McGehearty	bnz	.ci_small3x
5422*280575beSPatrick McGehearty	stw	%o4, [%o1-4]
5423*280575beSPatrick McGehearty	membar	#Sync
5424*280575beSPatrick McGehearty	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
5425*280575beSPatrick McGehearty	retl
5426*280575beSPatrick McGehearty	mov	%g0, %o0
5427*280575beSPatrick McGehearty
5428*280575beSPatrick McGehearty.ci_medhalf:
5429*280575beSPatrick McGehearty	subcc	%o2, 31, %o2		! adjust length to allow cc test
5430*280575beSPatrick McGehearty	ble,pt	%ncc, .ci_medh31
5431*280575beSPatrick McGehearty	nop
5432*280575beSPatrick McGehearty.ci_medh32:				! load and store block of 32 bytes
5433*280575beSPatrick McGehearty	subcc	%o2, 32, %o2		! decrement length count
5434*280575beSPatrick McGehearty
5435*280575beSPatrick McGehearty	lduha	[%o0]%asi, %o4		! move 32 bytes
5436*280575beSPatrick McGehearty	lduwa	[%o0+2]%asi, %o3
5437*280575beSPatrick McGehearty	sllx	%o4, 48, %o4
5438*280575beSPatrick McGehearty	sllx	%o3, 16, %o3
5439*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5440*280575beSPatrick McGehearty	lduha	[%o0+6]%asi, %o4
5441*280575beSPatrick McGehearty	or	%o4, %o3, %o4
5442*280575beSPatrick McGehearty	stx	%o4, [%o1]
5443*280575beSPatrick McGehearty
5444*280575beSPatrick McGehearty	lduha	[%o0+8]%asi, %o4
5445*280575beSPatrick McGehearty	lduwa	[%o0+10]%asi, %o3
5446*280575beSPatrick McGehearty	sllx	%o4, 48, %o4
5447*280575beSPatrick McGehearty	sllx	%o3, 16, %o3
5448*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5449*280575beSPatrick McGehearty	lduha	[%o0+14]%asi, %o4
5450*280575beSPatrick McGehearty	or	%o4, %o3, %o4
5451*280575beSPatrick McGehearty	stx	%o4, [%o1+8]
5452*280575beSPatrick McGehearty
5453*280575beSPatrick McGehearty	lduha	[%o0+16]%asi, %o4
5454*280575beSPatrick McGehearty	lduwa	[%o0+18]%asi, %o3
5455*280575beSPatrick McGehearty	sllx	%o4, 48, %o4
5456*280575beSPatrick McGehearty	sllx	%o3, 16, %o3
5457*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5458*280575beSPatrick McGehearty	lduha	[%o0+22]%asi, %o4
5459*280575beSPatrick McGehearty	or	%o4, %o3, %o4
5460*280575beSPatrick McGehearty	stx	%o4, [%o1+16]
5461*280575beSPatrick McGehearty
5462*280575beSPatrick McGehearty	add	%o0, 32, %o0		! increase src ptr by 32
5463*280575beSPatrick McGehearty	add	%o1, 32, %o1		! increase dst ptr by 32
5464*280575beSPatrick McGehearty
5465*280575beSPatrick McGehearty	lduha	[%o0-8]%asi, %o4
5466*280575beSPatrick McGehearty	lduwa	[%o0-6]%asi, %o3
5467*280575beSPatrick McGehearty	sllx	%o4, 48, %o4
5468*280575beSPatrick McGehearty	sllx	%o3, 16, %o3
5469*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5470*280575beSPatrick McGehearty	lduha	[%o0-2]%asi, %o4
5471*280575beSPatrick McGehearty	or	%o3, %o4, %o4
5472*280575beSPatrick McGehearty	bgu,pt	%ncc, .ci_medh32	! repeat if at least 32 bytes left
5473*280575beSPatrick McGehearty	stx	%o4, [%o1-8]
5474*280575beSPatrick McGehearty
5475*280575beSPatrick McGehearty.ci_medh31:
5476*280575beSPatrick McGehearty	addcc	%o2, 24, %o2		! adjust count to be off by 7
5477*280575beSPatrick McGehearty	ble,pt	%ncc, .ci_medh7		! skip if 7 or fewer bytes left
5478*280575beSPatrick McGehearty	nop				!
5479*280575beSPatrick McGehearty.ci_medh15:
5480*280575beSPatrick McGehearty	lduha	[%o0]%asi, %o4		! move 16 bytes
5481*280575beSPatrick McGehearty	subcc	%o2, 8, %o2		! decrement length count
5482*280575beSPatrick McGehearty	lduwa	[%o0+2]%asi, %o3
5483*280575beSPatrick McGehearty	sllx	%o4, 48, %o4
5484*280575beSPatrick McGehearty	sllx	%o3, 16, %o3
5485*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5486*280575beSPatrick McGehearty	add	%o1, 8, %o1		! increase dst ptr by 8
5487*280575beSPatrick McGehearty	lduha	[%o0+6]%asi, %o4
5488*280575beSPatrick McGehearty	add	%o0, 8, %o0		! increase src ptr by 8
5489*280575beSPatrick McGehearty	or	%o4, %o3, %o4
5490*280575beSPatrick McGehearty	bgu,pt	%ncc, .ci_medh15
5491*280575beSPatrick McGehearty	stx	%o4, [%o1-8]
5492*280575beSPatrick McGehearty.ci_medh7:
5493*280575beSPatrick McGehearty	addcc	%o2, 7, %o2		! finish adjustment of remaining count
5494*280575beSPatrick McGehearty	bz,pt	%ncc, .ci_smallx	! exit if finished
5495*280575beSPatrick McGehearty	cmp	%o2, 4
5496*280575beSPatrick McGehearty	blt,pt	%ncc, .ci_small3x	! skip if less than 4 bytes left
5497*280575beSPatrick McGehearty	nop				!
5498*280575beSPatrick McGehearty	lduha	[%o0]%asi, %o4
5499*280575beSPatrick McGehearty	sll	%o4, 16, %o4
5500*280575beSPatrick McGehearty	lduha	[%o0+2]%asi, %o3
5501*280575beSPatrick McGehearty	or	%o3, %o4, %o4
5502*280575beSPatrick McGehearty	subcc	%o2, 4, %o2
5503*280575beSPatrick McGehearty	add	%o0, 4, %o0
5504*280575beSPatrick McGehearty	add	%o1, 4, %o1
5505*280575beSPatrick McGehearty	bnz	.ci_small3x
5506*280575beSPatrick McGehearty	stw	%o4, [%o1-4]
5507*280575beSPatrick McGehearty	membar	#Sync
5508*280575beSPatrick McGehearty	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
5509*280575beSPatrick McGehearty	retl
5510*280575beSPatrick McGehearty	mov	%g0, %o0
5511*280575beSPatrick McGehearty
5512*280575beSPatrick McGehearty	.align 16
5513*280575beSPatrick McGehearty.ci_med_byte:
5514*280575beSPatrick McGehearty	bnz,pt	%ncc, .ci_medbh32a	! go to correct byte move
5515*280575beSPatrick McGehearty	subcc	%o2, 31, %o2		! adjust length to allow cc test
5516*280575beSPatrick McGehearty	ble,pt	%ncc, .ci_medb31
5517*280575beSPatrick McGehearty	nop
5518*280575beSPatrick McGehearty.ci_medb32:				! Alignment 1 or 5
5519*280575beSPatrick McGehearty	subcc	%o2, 32, %o2		! decrement length count
5520*280575beSPatrick McGehearty
5521*280575beSPatrick McGehearty	lduba	[%o0]%asi, %o4		! load and store a block of 32 bytes
5522*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
5523*280575beSPatrick McGehearty	lduha	[%o0+1]%asi, %o4
5524*280575beSPatrick McGehearty	sllx	%o4, 40, %o4
5525*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5526*280575beSPatrick McGehearty	lduwa	[%o0+3]%asi, %o4
5527*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
5528*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5529*280575beSPatrick McGehearty	lduba	[%o0+7]%asi, %o4
5530*280575beSPatrick McGehearty	or	%o4, %o3, %o4
5531*280575beSPatrick McGehearty	stx	%o4, [%o1]
5532*280575beSPatrick McGehearty
5533*280575beSPatrick McGehearty	lduba	[%o0+8]%asi, %o4
5534*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
5535*280575beSPatrick McGehearty	lduha	[%o0+9]%asi, %o4
5536*280575beSPatrick McGehearty	sllx	%o4, 40, %o4
5537*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5538*280575beSPatrick McGehearty	lduwa	[%o0+11]%asi, %o4
5539*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
5540*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5541*280575beSPatrick McGehearty	lduba	[%o0+15]%asi, %o4
5542*280575beSPatrick McGehearty	or	%o4, %o3, %o4
5543*280575beSPatrick McGehearty	stx	%o4, [%o1+8]
5544*280575beSPatrick McGehearty
5545*280575beSPatrick McGehearty	lduba	[%o0+16]%asi, %o4
5546*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
5547*280575beSPatrick McGehearty	lduha	[%o0+17]%asi, %o4
5548*280575beSPatrick McGehearty	sllx	%o4, 40, %o4
5549*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5550*280575beSPatrick McGehearty	lduwa	[%o0+19]%asi, %o4
5551*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
5552*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5553*280575beSPatrick McGehearty	lduba	[%o0+23]%asi, %o4
5554*280575beSPatrick McGehearty	or	%o4, %o3, %o4
5555*280575beSPatrick McGehearty	stx	%o4, [%o1+16]
5556*280575beSPatrick McGehearty
5557*280575beSPatrick McGehearty	add	%o0, 32, %o0		! increase src ptr by 32
5558*280575beSPatrick McGehearty	add	%o1, 32, %o1		! increase dst ptr by 32
5559*280575beSPatrick McGehearty
5560*280575beSPatrick McGehearty	lduba	[%o0-8]%asi, %o4
5561*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
5562*280575beSPatrick McGehearty	lduha	[%o0-7]%asi, %o4
5563*280575beSPatrick McGehearty	sllx	%o4, 40, %o4
5564*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5565*280575beSPatrick McGehearty	lduwa	[%o0-5]%asi, %o4
5566*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
5567*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5568*280575beSPatrick McGehearty	lduba	[%o0-1]%asi, %o4
5569*280575beSPatrick McGehearty	or	%o4, %o3, %o4
5570*280575beSPatrick McGehearty	bgu,pt	%ncc, .ci_medb32	! repeat if at least 32 bytes left
5571*280575beSPatrick McGehearty	stx	%o4, [%o1-8]
5572*280575beSPatrick McGehearty
5573*280575beSPatrick McGehearty.ci_medb31:				! 31 or fewer bytes remaining
5574*280575beSPatrick McGehearty	addcc	%o2, 24, %o2		! adjust count to be off by 7
5575*280575beSPatrick McGehearty	ble,pt	%ncc, .ci_medb7		! skip if 7 or fewer bytes left
5576*280575beSPatrick McGehearty	nop				!
5577*280575beSPatrick McGehearty.ci_medb15:
5578*280575beSPatrick McGehearty
5579*280575beSPatrick McGehearty	lduba	[%o0]%asi, %o4		! load and store a block of 8 bytes
5580*280575beSPatrick McGehearty	subcc	%o2, 8, %o2		! decrement length count
5581*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
5582*280575beSPatrick McGehearty	lduha	[%o0+1]%asi, %o4
5583*280575beSPatrick McGehearty	sllx	%o4, 40, %o4
5584*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5585*280575beSPatrick McGehearty	lduwa	[%o0+3]%asi, %o4
5586*280575beSPatrick McGehearty	add	%o1, 8, %o1		! increase dst ptr by 16
5587*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
5588*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5589*280575beSPatrick McGehearty	lduba	[%o0+7]%asi, %o4
5590*280575beSPatrick McGehearty	add	%o0, 8, %o0		! increase src ptr by 16
5591*280575beSPatrick McGehearty	or	%o4, %o3, %o4
5592*280575beSPatrick McGehearty	bgu,pt	%ncc, .ci_medb15
5593*280575beSPatrick McGehearty	stx	%o4, [%o1-8]
5594*280575beSPatrick McGehearty.ci_medb7:
5595*280575beSPatrick McGehearty	addcc	%o2, 7, %o2		! finish adjustment of remaining count
5596*280575beSPatrick McGehearty	bz,pt	%ncc, .ci_smallx	! exit if finished
5597*280575beSPatrick McGehearty	cmp	%o2, 4
5598*280575beSPatrick McGehearty	blt,pt	%ncc, .ci_small3x	! skip if less than 4 bytes left
5599*280575beSPatrick McGehearty	nop				!
5600*280575beSPatrick McGehearty	lduba	[%o0]%asi, %o4		! move 4 bytes
5601*280575beSPatrick McGehearty	sll	%o4, 24, %o3
5602*280575beSPatrick McGehearty	lduha	[%o0+1]%asi, %o4
5603*280575beSPatrick McGehearty	sll	%o4, 8, %o4
5604*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5605*280575beSPatrick McGehearty	lduba	[%o0+3]%asi, %o4
5606*280575beSPatrick McGehearty	or	%o4, %o3, %o4
5607*280575beSPatrick McGehearty	subcc	%o2, 4, %o2
5608*280575beSPatrick McGehearty	add	%o0, 4, %o0
5609*280575beSPatrick McGehearty	add	%o1, 4, %o1
5610*280575beSPatrick McGehearty	bnz	.ci_small3x
5611*280575beSPatrick McGehearty	stw	%o4, [%o1-4]
5612*280575beSPatrick McGehearty	membar	#Sync
5613*280575beSPatrick McGehearty	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
5614*280575beSPatrick McGehearty	retl
5615*280575beSPatrick McGehearty	mov	%g0, %o0
5616*280575beSPatrick McGehearty
5617*280575beSPatrick McGehearty	.align 16
5618*280575beSPatrick McGehearty.ci_medbh32a:				! Alignment 3 or 7
5619*280575beSPatrick McGehearty	ble,pt	%ncc, .ci_medbh31
5620*280575beSPatrick McGehearty	nop
5621*280575beSPatrick McGehearty.ci_medbh32:				! Alignment 3 or 7
5622*280575beSPatrick McGehearty	subcc	%o2, 32, %o2		! decrement length count
5623*280575beSPatrick McGehearty
5624*280575beSPatrick McGehearty	lduba	[%o0]%asi, %o4		! load and store a block of 32 bytes
5625*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
5626*280575beSPatrick McGehearty	lduwa	[%o0+1]%asi, %o4
5627*280575beSPatrick McGehearty	sllx	%o4, 24, %o4
5628*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5629*280575beSPatrick McGehearty	lduha	[%o0+5]%asi, %o4
5630*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
5631*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5632*280575beSPatrick McGehearty	lduba	[%o0+7]%asi, %o4
5633*280575beSPatrick McGehearty	or	%o4, %o3, %o4
5634*280575beSPatrick McGehearty	stx	%o4, [%o1]
5635*280575beSPatrick McGehearty
5636*280575beSPatrick McGehearty	lduba	[%o0+8]%asi, %o4
5637*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
5638*280575beSPatrick McGehearty	lduwa	[%o0+9]%asi, %o4
5639*280575beSPatrick McGehearty	sllx	%o4, 24, %o4
5640*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5641*280575beSPatrick McGehearty	lduha	[%o0+13]%asi, %o4
5642*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
5643*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5644*280575beSPatrick McGehearty	lduba	[%o0+15]%asi, %o4
5645*280575beSPatrick McGehearty	or	%o4, %o3, %o4
5646*280575beSPatrick McGehearty	stx	%o4, [%o1+8]
5647*280575beSPatrick McGehearty
5648*280575beSPatrick McGehearty	lduba	[%o0+16]%asi, %o4
5649*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
5650*280575beSPatrick McGehearty	lduwa	[%o0+17]%asi, %o4
5651*280575beSPatrick McGehearty	sllx	%o4, 24, %o4
5652*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5653*280575beSPatrick McGehearty	lduha	[%o0+21]%asi, %o4
5654*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
5655*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5656*280575beSPatrick McGehearty	lduba	[%o0+23]%asi, %o4
5657*280575beSPatrick McGehearty	or	%o4, %o3, %o4
5658*280575beSPatrick McGehearty	stx	%o4, [%o1+16]
5659*280575beSPatrick McGehearty
5660*280575beSPatrick McGehearty	add	%o0, 32, %o0		! increase src ptr by 32
5661*280575beSPatrick McGehearty	add	%o1, 32, %o1		! increase dst ptr by 32
5662*280575beSPatrick McGehearty
5663*280575beSPatrick McGehearty	lduba	[%o0-8]%asi, %o4
5664*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
5665*280575beSPatrick McGehearty	lduwa	[%o0-7]%asi, %o4
5666*280575beSPatrick McGehearty	sllx	%o4, 24, %o4
5667*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5668*280575beSPatrick McGehearty	lduha	[%o0-3]%asi, %o4
5669*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
5670*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5671*280575beSPatrick McGehearty	lduba	[%o0-1]%asi, %o4
5672*280575beSPatrick McGehearty	or	%o4, %o3, %o4
5673*280575beSPatrick McGehearty	bgu,pt	%ncc, .ci_medbh32	! repeat if at least 32 bytes left
5674*280575beSPatrick McGehearty	stx	%o4, [%o1-8]
5675*280575beSPatrick McGehearty
5676*280575beSPatrick McGehearty.ci_medbh31:
5677*280575beSPatrick McGehearty	addcc	%o2, 24, %o2		! adjust count to be off by 7
5678*280575beSPatrick McGehearty	ble,pt	%ncc, .ci_medb7		! skip if 7 or fewer bytes left
5679*280575beSPatrick McGehearty	nop				!
5680*280575beSPatrick McGehearty.ci_medbh15:
5681*280575beSPatrick McGehearty	lduba	[%o0]%asi, %o4		! load and store a block of 8 bytes
5682*280575beSPatrick McGehearty	sllx	%o4, 56, %o3
5683*280575beSPatrick McGehearty	lduwa	[%o0+1]%asi, %o4
5684*280575beSPatrick McGehearty	sllx	%o4, 24, %o4
5685*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5686*280575beSPatrick McGehearty	lduha	[%o0+5]%asi, %o4
5687*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
5688*280575beSPatrick McGehearty	or	%o4, %o3, %o3
5689*280575beSPatrick McGehearty	lduba	[%o0+7]%asi, %o4
5690*280575beSPatrick McGehearty	or	%o4, %o3, %o4
5691*280575beSPatrick McGehearty	stx	%o4, [%o1]
5692*280575beSPatrick McGehearty	subcc	%o2, 8, %o2		! decrement length count
5693*280575beSPatrick McGehearty	add	%o1, 8, %o1		! increase dst ptr by 8
5694*280575beSPatrick McGehearty	add	%o0, 8, %o0		! increase src ptr by 8
5695*280575beSPatrick McGehearty	bgu,pt	%ncc, .ci_medbh15
5696*280575beSPatrick McGehearty	stx	%o4, [%o1-8]
5697*280575beSPatrick McGehearty	ba	.ci_medb7
5698*280575beSPatrick McGehearty	nop
5699*280575beSPatrick McGehearty
5700*280575beSPatrick McGehearty/*
5701*280575beSPatrick McGehearty * End of small copy in code (no window)
5702*280575beSPatrick McGehearty *
5703*280575beSPatrick McGehearty */
5704*280575beSPatrick McGehearty
5705*280575beSPatrick McGehearty/*
5706*280575beSPatrick McGehearty * Long copy in code (using register window and fp regs)
5707*280575beSPatrick McGehearty *
5708*280575beSPatrick McGehearty */
5709*280575beSPatrick McGehearty
5710*280575beSPatrick McGehearty.ci_copy_more:
5711*280575beSPatrick McGehearty	sethi	%hi(copyio_fault), %o3
5712*280575beSPatrick McGehearty	or	%o3, %lo(copyio_fault), %o3
5713*280575beSPatrick McGehearty	membar	#Sync
5714*280575beSPatrick McGehearty	stn	%o3, [THREAD_REG + T_LOFAULT]
5715*280575beSPatrick McGehearty/*
5716*280575beSPatrick McGehearty * Following code is for large copies. We know there is at
5717*280575beSPatrick McGehearty * least FP_COPY bytes available. FP regs are used, so
5718*280575beSPatrick McGehearty *  we save registers and fp regs before starting
5719*280575beSPatrick McGehearty */
5720*280575beSPatrick McGehearty	save	%sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
5721*280575beSPatrick McGehearty	or	SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
5722*280575beSPatrick McGehearty	rd	%fprs, %g1		! check for unused fp
5723*280575beSPatrick McGehearty	! if fprs.fef == 0, set it.
5724*280575beSPatrick McGehearty	! Setting it when already set costs more than checking
5725*280575beSPatrick McGehearty	andcc	%g1, FPRS_FEF, %g1	! test FEF, fprs.du = fprs.dl = 0
5726*280575beSPatrick McGehearty	bz,pt	%ncc, .ci_fp_unused
5727*280575beSPatrick McGehearty	mov	ASI_USER, %asi
5728*280575beSPatrick McGehearty	BST_FP_TOSTACK(%o3)
5729*280575beSPatrick McGehearty	ba	.ci_fp_ready
5730*280575beSPatrick McGehearty.ci_fp_unused:
5731*280575beSPatrick McGehearty	prefetcha [%i0 + (1 * CACHE_LINE)]%asi, #one_read
5732*280575beSPatrick McGehearty	wr	%g0, FPRS_FEF, %fprs	! fprs.fef = 1
5733*280575beSPatrick McGehearty.ci_fp_ready:
5734*280575beSPatrick McGehearty	rd	%gsr, %l5		! save %gsr value
5735*280575beSPatrick McGehearty	andcc	%i1, 1, %o3		! is dest byte aligned
5736*280575beSPatrick McGehearty	bnz,pt	%ncc, .ci_big_d1
5737*280575beSPatrick McGehearty.ci_big_d1f:				! dest is now half word aligned
5738*280575beSPatrick McGehearty	andcc	%i1, 2, %o3
5739*280575beSPatrick McGehearty	bnz,pt	%ncc, .ci_big_d2
5740*280575beSPatrick McGehearty.ci_big_d2f:				! dest is now word aligned
5741*280575beSPatrick McGehearty	andcc	%i1, 4, %o3
5742*280575beSPatrick McGehearty	bnz,pt	%ncc, .ci_big_d4
5743*280575beSPatrick McGehearty.ci_big_d4f:				! dest is long word aligned
5744*280575beSPatrick McGehearty	andcc	%i0, 7, %o3		! is src long word aligned
5745*280575beSPatrick McGehearty	brnz,pt	%o3, .ci_big_unal8
5746*280575beSPatrick McGehearty	prefetcha [%i0 + (2 * CACHE_LINE)]%asi, #one_read
5747*280575beSPatrick McGehearty	! Src and dst are long word aligned
5748*280575beSPatrick McGehearty	! align dst to 64 byte boundary
5749*280575beSPatrick McGehearty	andcc	%i1, 0x3f, %o3		! %o3 == 0 means dst is 64 byte aligned
5750*280575beSPatrick McGehearty	brz,pn	%o3, .ci_al_to_64
5751*280575beSPatrick McGehearty	nop
5752*280575beSPatrick McGehearty	sub	%o3, 64, %o3		! %o3 has negative bytes to move
5753*280575beSPatrick McGehearty	add	%i2, %o3, %i2		! adjust remaining count
5754*280575beSPatrick McGehearty	andcc	%o3, 8, %o4		! odd long words to move?
5755*280575beSPatrick McGehearty	brz,pt	%o4, .ci_al_to_16
5756*280575beSPatrick McGehearty	nop
5757*280575beSPatrick McGehearty	add	%o3, 8, %o3
5758*280575beSPatrick McGehearty	ldxa	[%i0]%asi, %o4
5759*280575beSPatrick McGehearty	add	%i0, 8, %i0		! increment src ptr
5760*280575beSPatrick McGehearty	add	%i1, 8, %i1		! increment dst ptr
5761*280575beSPatrick McGehearty	stx	%o4, [%i1-8]
5762*280575beSPatrick McGehearty! Dest is aligned on 16 bytes, src 8 byte aligned
5763*280575beSPatrick McGehearty.ci_al_to_16:
5764*280575beSPatrick McGehearty	andcc	%o3, 0x30, %o4		! pair of long words to move?
5765*280575beSPatrick McGehearty	brz,pt	%o4, .ci_al_to_64
5766*280575beSPatrick McGehearty	nop
5767*280575beSPatrick McGehearty.ci_al_mv_16:
5768*280575beSPatrick McGehearty	add	%o3, 16, %o3
5769*280575beSPatrick McGehearty	ldxa	[%i0]%asi, %o4
5770*280575beSPatrick McGehearty	stx	%o4, [%i1]
5771*280575beSPatrick McGehearty	add	%i0, 16, %i0		! increment src ptr
5772*280575beSPatrick McGehearty	ldxa	[%i0-8]%asi, %o4
5773*280575beSPatrick McGehearty	stx	%o4, [%i1+8]
5774*280575beSPatrick McGehearty	andcc	%o3, 0x30, %o4
5775*280575beSPatrick McGehearty	brnz,pt	%o4, .ci_al_mv_16
5776*280575beSPatrick McGehearty	add	%i1, 16, %i1		! increment dst ptr
5777*280575beSPatrick McGehearty! Dest is aligned on 64 bytes, src 8 byte aligned
5778*280575beSPatrick McGehearty.ci_al_to_64:
5779*280575beSPatrick McGehearty	! Determine source alignment
5780*280575beSPatrick McGehearty	! to correct 8 byte offset
5781*280575beSPatrick McGehearty	andcc	%i0, 32, %o3
5782*280575beSPatrick McGehearty	brnz,pn	%o3, .ci_aln_1
5783*280575beSPatrick McGehearty	andcc	%i0, 16, %o3
5784*280575beSPatrick McGehearty	brnz,pn	%o3, .ci_aln_01
5785*280575beSPatrick McGehearty	andcc	%i0, 8, %o3
5786*280575beSPatrick McGehearty	brz,pn	%o3, .ci_aln_000
5787*280575beSPatrick McGehearty	prefetcha [%i0 + (3 * CACHE_LINE)]%asi, #one_read
5788*280575beSPatrick McGehearty	ba	.ci_aln_001
5789*280575beSPatrick McGehearty	prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read
5790*280575beSPatrick McGehearty.ci_aln_01:
5791*280575beSPatrick McGehearty	brnz,pn	%o3, .ci_aln_011
5792*280575beSPatrick McGehearty	prefetcha [%i0 + (3 * CACHE_LINE)]%asi, #one_read
5793*280575beSPatrick McGehearty	ba	.ci_aln_010
5794*280575beSPatrick McGehearty	prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read
5795*280575beSPatrick McGehearty.ci_aln_1:
5796*280575beSPatrick McGehearty	andcc	%i0, 16, %o3
5797*280575beSPatrick McGehearty	brnz,pn	%o3, .ci_aln_11
5798*280575beSPatrick McGehearty	andcc	%i0, 8, %o3
5799*280575beSPatrick McGehearty	brnz,pn	%o3, .ci_aln_101
5800*280575beSPatrick McGehearty	prefetcha [%i0 + (3 * CACHE_LINE)]%asi, #one_read
5801*280575beSPatrick McGehearty	ba	.ci_aln_100
5802*280575beSPatrick McGehearty	prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read
5803*280575beSPatrick McGehearty.ci_aln_11:
5804*280575beSPatrick McGehearty	brz,pn	%o3, .ci_aln_110
5805*280575beSPatrick McGehearty	prefetcha [%i0 + (3 * CACHE_LINE)]%asi, #one_read
5806*280575beSPatrick McGehearty
5807*280575beSPatrick McGehearty.ci_aln_111:
5808*280575beSPatrick McGehearty! Alignment off by 8 bytes
5809*280575beSPatrick McGehearty	prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read
5810*280575beSPatrick McGehearty	ldda	[%i0]%asi, %d0
5811*280575beSPatrick McGehearty	add	%i0, 8, %i0
5812*280575beSPatrick McGehearty	sub	%i2, 8, %i2
5813*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
5814*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
5815*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
5816*280575beSPatrick McGehearty.ci_aln_111_loop:
5817*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_AIUS,%d16		! block load
5818*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
5819*280575beSPatrick McGehearty	fmovd	%d16, %d2
5820*280575beSPatrick McGehearty	fmovd	%d18, %d4
5821*280575beSPatrick McGehearty	fmovd	%d20, %d6
5822*280575beSPatrick McGehearty	fmovd	%d22, %d8
5823*280575beSPatrick McGehearty	fmovd	%d24, %d10
5824*280575beSPatrick McGehearty	fmovd	%d26, %d12
5825*280575beSPatrick McGehearty	fmovd	%d28, %d14
5826*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_P	! block initializing store
5827*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_P
5828*280575beSPatrick McGehearty	add	%i0, 64, %i0
5829*280575beSPatrick McGehearty	fmovd	%d30, %d0
5830*280575beSPatrick McGehearty	bgt,pt	%ncc, .ci_aln_111_loop
5831*280575beSPatrick McGehearty	prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read
5832*280575beSPatrick McGehearty	add	%i1, %i0, %i1
5833*280575beSPatrick McGehearty
5834*280575beSPatrick McGehearty	std	%d0, [%i1]
5835*280575beSPatrick McGehearty	ba	.ci_remain_stuff
5836*280575beSPatrick McGehearty	add	%i1, 8, %i1
5837*280575beSPatrick McGehearty	! END OF aln_111
5838*280575beSPatrick McGehearty
5839*280575beSPatrick McGehearty.ci_aln_110:
5840*280575beSPatrick McGehearty! Alignment off by 16 bytes
5841*280575beSPatrick McGehearty	prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read
5842*280575beSPatrick McGehearty	ldda	[%i0]%asi, %d0
5843*280575beSPatrick McGehearty	ldda	[%i0+8]%asi, %d2
5844*280575beSPatrick McGehearty	add	%i0, 16, %i0
5845*280575beSPatrick McGehearty	sub	%i2, 16, %i2
5846*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
5847*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
5848*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
5849*280575beSPatrick McGehearty.ci_aln_110_loop:
5850*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_AIUS,%d16		! block load
5851*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
5852*280575beSPatrick McGehearty	fmovd	%d16, %d4
5853*280575beSPatrick McGehearty	fmovd	%d18, %d6
5854*280575beSPatrick McGehearty	fmovd	%d20, %d8
5855*280575beSPatrick McGehearty	fmovd	%d22, %d10
5856*280575beSPatrick McGehearty	fmovd	%d24, %d12
5857*280575beSPatrick McGehearty	fmovd	%d26, %d14
5858*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_P	! block initializing store
5859*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_P
5860*280575beSPatrick McGehearty	add	%i0, 64, %i0
5861*280575beSPatrick McGehearty	fmovd	%d28, %d0
5862*280575beSPatrick McGehearty	fmovd	%d30, %d2
5863*280575beSPatrick McGehearty	bgt,pt	%ncc, .ci_aln_110_loop
5864*280575beSPatrick McGehearty	prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read
5865*280575beSPatrick McGehearty	add	%i1, %i0, %i1
5866*280575beSPatrick McGehearty
5867*280575beSPatrick McGehearty	std	%d0, [%i1]
5868*280575beSPatrick McGehearty	std	%d2, [%i1+8]
5869*280575beSPatrick McGehearty	ba	.ci_remain_stuff
5870*280575beSPatrick McGehearty	add	%i1, 16, %i1
5871*280575beSPatrick McGehearty	! END OF aln_110
5872*280575beSPatrick McGehearty
5873*280575beSPatrick McGehearty.ci_aln_101:
5874*280575beSPatrick McGehearty! Alignment off by 24 bytes
5875*280575beSPatrick McGehearty	prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read
5876*280575beSPatrick McGehearty	ldda	[%i0]%asi, %d0
5877*280575beSPatrick McGehearty	ldda	[%i0+8]%asi, %d2
5878*280575beSPatrick McGehearty	ldda	[%i0+16]%asi, %d4
5879*280575beSPatrick McGehearty	add	%i0, 24, %i0
5880*280575beSPatrick McGehearty	sub	%i2, 24, %i2
5881*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
5882*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
5883*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
5884*280575beSPatrick McGehearty.ci_aln_101_loop:
5885*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_AIUS,%d16	! block load
5886*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
5887*280575beSPatrick McGehearty	fmovd	%d16, %d6
5888*280575beSPatrick McGehearty	fmovd	%d18, %d8
5889*280575beSPatrick McGehearty	fmovd	%d20, %d10
5890*280575beSPatrick McGehearty	fmovd	%d22, %d12
5891*280575beSPatrick McGehearty	fmovd	%d24, %d14
5892*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_P	! block initializing store
5893*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_P
5894*280575beSPatrick McGehearty	add	%i0, 64, %i0
5895*280575beSPatrick McGehearty	fmovd	%d26, %d0
5896*280575beSPatrick McGehearty	fmovd	%d28, %d2
5897*280575beSPatrick McGehearty	fmovd	%d30, %d4
5898*280575beSPatrick McGehearty	bgt,pt	%ncc, .ci_aln_101_loop
5899*280575beSPatrick McGehearty	prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read
5900*280575beSPatrick McGehearty	add	%i1, %i0, %i1
5901*280575beSPatrick McGehearty
5902*280575beSPatrick McGehearty	std	%d0, [%i1]
5903*280575beSPatrick McGehearty	std	%d2, [%i1+8]
5904*280575beSPatrick McGehearty	std	%d4, [%i1+16]
5905*280575beSPatrick McGehearty	ba	.ci_remain_stuff
5906*280575beSPatrick McGehearty	add	%i1, 24, %i1
5907*280575beSPatrick McGehearty	! END OF aln_101
5908*280575beSPatrick McGehearty
5909*280575beSPatrick McGehearty.ci_aln_100:
5910*280575beSPatrick McGehearty! Alignment off by 32 bytes
5911*280575beSPatrick McGehearty	ldda	[%i0]%asi, %d0
5912*280575beSPatrick McGehearty	ldda	[%i0+8]%asi, %d2
5913*280575beSPatrick McGehearty	ldda	[%i0+16]%asi,%d4
5914*280575beSPatrick McGehearty	ldda	[%i0+24]%asi,%d6
5915*280575beSPatrick McGehearty	add	%i0, 32, %i0
5916*280575beSPatrick McGehearty	sub	%i2, 32, %i2
5917*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
5918*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
5919*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
5920*280575beSPatrick McGehearty.ci_aln_100_loop:
5921*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_AIUS,%d16	! block load
5922*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
5923*280575beSPatrick McGehearty	fmovd	%d16, %d8
5924*280575beSPatrick McGehearty	fmovd	%d18, %d10
5925*280575beSPatrick McGehearty	fmovd	%d20, %d12
5926*280575beSPatrick McGehearty	fmovd	%d22, %d14
5927*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_P	! block initializing store
5928*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_P
5929*280575beSPatrick McGehearty	add	%i0, 64, %i0
5930*280575beSPatrick McGehearty	fmovd	%d24, %d0
5931*280575beSPatrick McGehearty	fmovd	%d26, %d2
5932*280575beSPatrick McGehearty	fmovd	%d28, %d4
5933*280575beSPatrick McGehearty	fmovd	%d30, %d6
5934*280575beSPatrick McGehearty	bgt,pt	%ncc, .ci_aln_100_loop
5935*280575beSPatrick McGehearty	prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read
5936*280575beSPatrick McGehearty	add	%i1, %i0, %i1
5937*280575beSPatrick McGehearty
5938*280575beSPatrick McGehearty	std	%d0, [%i1]
5939*280575beSPatrick McGehearty	std	%d2, [%i1+8]
5940*280575beSPatrick McGehearty	std	%d4, [%i1+16]
5941*280575beSPatrick McGehearty	std	%d6, [%i1+24]
5942*280575beSPatrick McGehearty	ba	.ci_remain_stuff
5943*280575beSPatrick McGehearty	add	%i1, 32, %i1
5944*280575beSPatrick McGehearty	! END OF aln_100
5945*280575beSPatrick McGehearty
5946*280575beSPatrick McGehearty.ci_aln_011:
5947*280575beSPatrick McGehearty! Alignment off by 40 bytes
5948*280575beSPatrick McGehearty	prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read
5949*280575beSPatrick McGehearty	ldda	[%i0]%asi, %d0
5950*280575beSPatrick McGehearty	ldda	[%i0+8]%asi, %d2
5951*280575beSPatrick McGehearty	ldda	[%i0+16]%asi, %d4
5952*280575beSPatrick McGehearty	ldda	[%i0+24]%asi, %d6
5953*280575beSPatrick McGehearty	ldda	[%i0+32]%asi, %d8
5954*280575beSPatrick McGehearty	add	%i0, 40, %i0
5955*280575beSPatrick McGehearty	sub	%i2, 40, %i2
5956*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
5957*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
5958*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
5959*280575beSPatrick McGehearty.ci_aln_011_loop:
5960*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_AIUS,%d16	! block load
5961*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
5962*280575beSPatrick McGehearty	fmovd	%d16, %d10
5963*280575beSPatrick McGehearty	fmovd	%d18, %d12
5964*280575beSPatrick McGehearty	fmovd	%d20, %d14
5965*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_P	! block initializing store
5966*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_P
5967*280575beSPatrick McGehearty	add	%i0, 64, %i0
5968*280575beSPatrick McGehearty	fmovd	%d22, %d0
5969*280575beSPatrick McGehearty	fmovd	%d24, %d2
5970*280575beSPatrick McGehearty	fmovd	%d26, %d4
5971*280575beSPatrick McGehearty	fmovd	%d28, %d6
5972*280575beSPatrick McGehearty	fmovd	%d30, %d8
5973*280575beSPatrick McGehearty	bgt,pt	%ncc, .ci_aln_011_loop
5974*280575beSPatrick McGehearty	prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read
5975*280575beSPatrick McGehearty	add	%i1, %i0, %i1
5976*280575beSPatrick McGehearty
5977*280575beSPatrick McGehearty	std	%d0, [%i1]
5978*280575beSPatrick McGehearty	std	%d2, [%i1+8]
5979*280575beSPatrick McGehearty	std	%d4, [%i1+16]
5980*280575beSPatrick McGehearty	std	%d6, [%i1+24]
5981*280575beSPatrick McGehearty	std	%d8, [%i1+32]
5982*280575beSPatrick McGehearty	ba	.ci_remain_stuff
5983*280575beSPatrick McGehearty	add	%i1, 40, %i1
5984*280575beSPatrick McGehearty	! END OF aln_011
5985*280575beSPatrick McGehearty
5986*280575beSPatrick McGehearty.ci_aln_010:
5987*280575beSPatrick McGehearty! Alignment off by 48 bytes
5988*280575beSPatrick McGehearty	ldda	[%i0]%asi, %d0
5989*280575beSPatrick McGehearty	ldda	[%i0+8]%asi, %d2
5990*280575beSPatrick McGehearty	ldda	[%i0+16]%asi, %d4
5991*280575beSPatrick McGehearty	ldda	[%i0+24]%asi, %d6
5992*280575beSPatrick McGehearty	ldda	[%i0+32]%asi, %d8
5993*280575beSPatrick McGehearty	ldda	[%i0+40]%asi, %d10
5994*280575beSPatrick McGehearty	add	%i0, 48, %i0
5995*280575beSPatrick McGehearty	sub	%i2, 48, %i2
5996*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
5997*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
5998*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
5999*280575beSPatrick McGehearty.ci_aln_010_loop:
6000*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_AIUS,%d16	! block load
6001*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
6002*280575beSPatrick McGehearty	fmovd	%d16, %d12
6003*280575beSPatrick McGehearty	fmovd	%d18, %d14
6004*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_P	! block initializing store
6005*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_P
6006*280575beSPatrick McGehearty	add	%i0, 64, %i0
6007*280575beSPatrick McGehearty	fmovd	%d20, %d0
6008*280575beSPatrick McGehearty	fmovd	%d22, %d2
6009*280575beSPatrick McGehearty	fmovd	%d24, %d4
6010*280575beSPatrick McGehearty	fmovd	%d26, %d6
6011*280575beSPatrick McGehearty	fmovd	%d28, %d8
6012*280575beSPatrick McGehearty	fmovd	%d30, %d10
6013*280575beSPatrick McGehearty	bgt,pt	%ncc, .ci_aln_010_loop
6014*280575beSPatrick McGehearty	prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read
6015*280575beSPatrick McGehearty	add	%i1, %i0, %i1
6016*280575beSPatrick McGehearty
6017*280575beSPatrick McGehearty	std	%d0, [%i1]
6018*280575beSPatrick McGehearty	std	%d2, [%i1+8]
6019*280575beSPatrick McGehearty	std	%d4, [%i1+16]
6020*280575beSPatrick McGehearty	std	%d6, [%i1+24]
6021*280575beSPatrick McGehearty	std	%d8, [%i1+32]
6022*280575beSPatrick McGehearty	std	%d10, [%i1+40]
6023*280575beSPatrick McGehearty	ba	.ci_remain_stuff
6024*280575beSPatrick McGehearty	add	%i1, 48, %i1
6025*280575beSPatrick McGehearty	! END OF aln_010
6026*280575beSPatrick McGehearty
6027*280575beSPatrick McGehearty.ci_aln_001:
6028*280575beSPatrick McGehearty! Alignment off by 56 bytes
6029*280575beSPatrick McGehearty	ldda	[%i0]%asi, %d0
6030*280575beSPatrick McGehearty	ldda	[%i0+8]%asi, %d2
6031*280575beSPatrick McGehearty	ldda	[%i0+16]%asi, %d4
6032*280575beSPatrick McGehearty	ldda	[%i0+24]%asi, %d6
6033*280575beSPatrick McGehearty	ldda	[%i0+32]%asi, %d8
6034*280575beSPatrick McGehearty	ldda	[%i0+40]%asi, %d10
6035*280575beSPatrick McGehearty	ldda	[%i0+48]%asi, %d12
6036*280575beSPatrick McGehearty	add	%i0, 56, %i0
6037*280575beSPatrick McGehearty	sub	%i2, 56, %i2
6038*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
6039*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
6040*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
6041*280575beSPatrick McGehearty.ci_aln_001_loop:
6042*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_AIUS,%d16	! block load
6043*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
6044*280575beSPatrick McGehearty	fmovd	%d16, %d14
6045*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_P	! block initializing store
6046*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_P
6047*280575beSPatrick McGehearty	add	%i0, 64, %i0
6048*280575beSPatrick McGehearty	fmovd	%d18, %d0
6049*280575beSPatrick McGehearty	fmovd	%d20, %d2
6050*280575beSPatrick McGehearty	fmovd	%d22, %d4
6051*280575beSPatrick McGehearty	fmovd	%d24, %d6
6052*280575beSPatrick McGehearty	fmovd	%d26, %d8
6053*280575beSPatrick McGehearty	fmovd	%d28, %d10
6054*280575beSPatrick McGehearty	fmovd	%d30, %d12
6055*280575beSPatrick McGehearty	bgt,pt	%ncc, .ci_aln_001_loop
6056*280575beSPatrick McGehearty	prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read
6057*280575beSPatrick McGehearty	add	%i1, %i0, %i1
6058*280575beSPatrick McGehearty
6059*280575beSPatrick McGehearty	std	%d0, [%i1]
6060*280575beSPatrick McGehearty	std	%d2, [%i1+8]
6061*280575beSPatrick McGehearty	std	%d4, [%i1+16]
6062*280575beSPatrick McGehearty	std	%d6, [%i1+24]
6063*280575beSPatrick McGehearty	std	%d8, [%i1+32]
6064*280575beSPatrick McGehearty	std	%d10, [%i1+40]
6065*280575beSPatrick McGehearty	std	%d12, [%i1+48]
6066*280575beSPatrick McGehearty	ba	.ci_remain_stuff
6067*280575beSPatrick McGehearty	add	%i1, 56, %i1
6068*280575beSPatrick McGehearty	! END OF aln_001
6069*280575beSPatrick McGehearty
6070*280575beSPatrick McGehearty.ci_aln_000:
6071*280575beSPatrick McGehearty	prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read
6072*280575beSPatrick McGehearty	andn	%i2, 0x7f, %o3		! %o3 is multiple of 2*block size
6073*280575beSPatrick McGehearty	and	%i2, 0x7f, %i2		! residue bytes in %i2
6074*280575beSPatrick McGehearty	sub	%i1, %i0, %i1
6075*280575beSPatrick McGehearty.ci_aln_000_loop:
6076*280575beSPatrick McGehearty	ldda	[%i0]ASI_BLK_AIUS,%d0
6077*280575beSPatrick McGehearty	subcc	%o3, 64, %o3
6078*280575beSPatrick McGehearty	stxa	%g0,[%i0+%i1]ASI_STBI_P	! block initializing store
6079*280575beSPatrick McGehearty	stda	%d0,[%i0+%i1]ASI_BLK_P
6080*280575beSPatrick McGehearty	add	%i0, 64, %i0
6081*280575beSPatrick McGehearty	bgt,pt	%ncc, .ci_aln_000_loop
6082*280575beSPatrick McGehearty	prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read
6083*280575beSPatrick McGehearty	add	%i1, %i0, %i1
6084*280575beSPatrick McGehearty
6085*280575beSPatrick McGehearty	! END OF aln_000
6086*280575beSPatrick McGehearty
6087*280575beSPatrick McGehearty.ci_remain_stuff:
6088*280575beSPatrick McGehearty	subcc	%i2, 31, %i2		! adjust length to allow cc test
6089*280575beSPatrick McGehearty	ble,pt	%ncc, .ci_aln_31
6090*280575beSPatrick McGehearty	nop
6091*280575beSPatrick McGehearty.ci_aln_32:
6092*280575beSPatrick McGehearty	ldxa	[%i0]%asi, %o4		! move 32 bytes
6093*280575beSPatrick McGehearty	subcc	%i2, 32, %i2		! decrement length count by 32
6094*280575beSPatrick McGehearty	stx	%o4, [%i1]
6095*280575beSPatrick McGehearty	ldxa	[%i0+8]%asi, %o4
6096*280575beSPatrick McGehearty	stx	%o4, [%i1+8]
6097*280575beSPatrick McGehearty	ldxa	[%i0+16]%asi, %o4
6098*280575beSPatrick McGehearty	add	%i0, 32, %i0		! increase src ptr by 32
6099*280575beSPatrick McGehearty	stx	%o4, [%i1+16]
6100*280575beSPatrick McGehearty	ldxa	[%i0-8]%asi, %o4
6101*280575beSPatrick McGehearty	add	%i1, 32, %i1		! increase dst ptr by 32
6102*280575beSPatrick McGehearty	bgu,pt	%ncc, .ci_aln_32	! repeat if at least 32 bytes left
6103*280575beSPatrick McGehearty	stx	%o4, [%i1-8]
6104*280575beSPatrick McGehearty.ci_aln_31:
6105*280575beSPatrick McGehearty	addcc	%i2, 24, %i2		! adjust count to be off by 7
6106*280575beSPatrick McGehearty	ble,pt	%ncc, .ci_aln_7		! skip if 7 or fewer bytes left
6107*280575beSPatrick McGehearty	nop				!
6108*280575beSPatrick McGehearty.ci_aln_15:
6109*280575beSPatrick McGehearty	ldxa	[%i0]%asi, %o4		! move 8 bytes
6110*280575beSPatrick McGehearty	add	%i0, 8, %i0		! increase src ptr by 8
6111*280575beSPatrick McGehearty	subcc	%i2, 8, %i2		! decrease count by 8
6112*280575beSPatrick McGehearty	add	%i1, 8, %i1		! increase dst ptr by 8
6113*280575beSPatrick McGehearty	bgu,pt	%ncc, .ci_aln_15
6114*280575beSPatrick McGehearty	stx	%o4, [%i1-8]		!
6115*280575beSPatrick McGehearty.ci_aln_7:
6116*280575beSPatrick McGehearty	addcc	%i2, 7, %i2		! finish adjustment of remaining count
6117*280575beSPatrick McGehearty	bz,pt	%ncc, .ci_exit		! exit if finished
6118*280575beSPatrick McGehearty	cmp	%i2, 4
6119*280575beSPatrick McGehearty	blt,pt	%ncc, .ci_unaln3x	! skip if less than 4 bytes left
6120*280575beSPatrick McGehearty	nop				!
6121*280575beSPatrick McGehearty	lda	[%i0]%asi, %o4		! move 4 bytes
6122*280575beSPatrick McGehearty	add	%i0, 4, %i0		! increase src ptr by 4
6123*280575beSPatrick McGehearty	add	%i1, 4, %i1		! increase dst ptr by 4
6124*280575beSPatrick McGehearty	subcc	%i2, 4, %i2		! decrease count by 4
6125*280575beSPatrick McGehearty	bnz	.ci_unaln3x
6126*280575beSPatrick McGehearty	stw	%o4, [%i1-4]
6127*280575beSPatrick McGehearty	ba	.ci_exit
6128*280575beSPatrick McGehearty	nop
6129*280575beSPatrick McGehearty
6130*280575beSPatrick McGehearty	! destination alignment code
6131*280575beSPatrick McGehearty.ci_big_d1:
6132*280575beSPatrick McGehearty	lduba	[%i0]%asi, %o4		! move a byte
6133*280575beSPatrick McGehearty	add	%i0, 1, %i0
6134*280575beSPatrick McGehearty	stb	%o4, [%i1]
6135*280575beSPatrick McGehearty	add	%i1, 1, %i1
6136*280575beSPatrick McGehearty	andcc	%i1, 2, %o3
6137*280575beSPatrick McGehearty	bz,pt	%ncc, .ci_big_d2f
6138*280575beSPatrick McGehearty	sub	%i2, 1, %i2
6139*280575beSPatrick McGehearty.ci_big_d2:				! dest is now at least half word aligned
6140*280575beSPatrick McGehearty	lduba	[%i0]%asi, %o4		! move a half-word (src align unknown)
6141*280575beSPatrick McGehearty	lduba	[%i0+1]%asi, %o3
6142*280575beSPatrick McGehearty	add	%i0, 2, %i0
6143*280575beSPatrick McGehearty	sll	%o4, 8, %o4		! position
6144*280575beSPatrick McGehearty	or	%o4, %o3, %o4		! merge
6145*280575beSPatrick McGehearty	sth	%o4, [%i1]
6146*280575beSPatrick McGehearty	add	%i1, 2, %i1
6147*280575beSPatrick McGehearty	andcc	%i1, 4, %o3
6148*280575beSPatrick McGehearty	bz,pt	%ncc, .ci_big_d4f
6149*280575beSPatrick McGehearty	sub	%i2, 2, %i2
6150*280575beSPatrick McGehearty.ci_big_d4:				! dest is at least word aligned
6151*280575beSPatrick McGehearty	nop
6152*280575beSPatrick McGehearty	lduba	[%i0]%asi, %o4		! move a word (src align unknown)
6153*280575beSPatrick McGehearty	lduba	[%i0+1]%asi, %o3
6154*280575beSPatrick McGehearty	sll	%o4, 24, %o4		! position
6155*280575beSPatrick McGehearty	sll	%o3, 16, %o3		! position
6156*280575beSPatrick McGehearty	or	%o4, %o3, %o3		! merge
6157*280575beSPatrick McGehearty	lduba	[%i0+2]%asi, %o4
6158*280575beSPatrick McGehearty	sll	%o4, 8, %o4		! position
6159*280575beSPatrick McGehearty	or	%o4, %o3, %o3		! merge
6160*280575beSPatrick McGehearty	lduba	[%i0+3]%asi, %o4
6161*280575beSPatrick McGehearty	or	%o4, %o3, %o4		! merge
6162*280575beSPatrick McGehearty	stw	%o4,[%i1]		! store four bytes
6163*280575beSPatrick McGehearty	add	%i0, 4, %i0		! adjust src by 4
6164*280575beSPatrick McGehearty	add	%i1, 4, %i1		! adjust dest by 4
6165*280575beSPatrick McGehearty	ba	.ci_big_d4f
6166*280575beSPatrick McGehearty	sub	%i2, 4, %i2		! adjust count by 4
6167*280575beSPatrick McGehearty
6168*280575beSPatrick McGehearty
6169*280575beSPatrick McGehearty	! Dst is on 8 byte boundary; src is not;
6170*280575beSPatrick McGehearty.ci_big_unal8:
6171*280575beSPatrick McGehearty	andcc	%i1, 0x3f, %o3		! is dst 64-byte block aligned?
6172*280575beSPatrick McGehearty	bz	%ncc, .ci_unalnsrc
6173*280575beSPatrick McGehearty	sub	%o3, 64, %o3		! %o3 will be multiple of 8
6174*280575beSPatrick McGehearty	neg	%o3			! bytes until dest is 64 byte aligned
6175*280575beSPatrick McGehearty	sub	%i2, %o3, %i2		! update cnt with bytes to be moved
6176*280575beSPatrick McGehearty	! Move bytes according to source alignment
6177*280575beSPatrick McGehearty	andcc	%i0, 0x1, %o4
6178*280575beSPatrick McGehearty	bnz	%ncc, .ci_unalnbyte	! check for byte alignment
6179*280575beSPatrick McGehearty	nop
6180*280575beSPatrick McGehearty	andcc	%i0, 2, %o4		! check for half word alignment
6181*280575beSPatrick McGehearty	bnz	%ncc, .ci_unalnhalf
6182*280575beSPatrick McGehearty	nop
6183*280575beSPatrick McGehearty	! Src is word aligned, move bytes until dest 64 byte aligned
6184*280575beSPatrick McGehearty.ci_unalnword:
6185*280575beSPatrick McGehearty	lda	[%i0]%asi, %o4		! load 4 bytes
6186*280575beSPatrick McGehearty	stw	%o4, [%i1]		! and store 4 bytes
6187*280575beSPatrick McGehearty	lda	[%i0+4]%asi, %o4	! load 4 bytes
6188*280575beSPatrick McGehearty	add	%i0, 8, %i0		! increase src ptr by 8
6189*280575beSPatrick McGehearty	stw	%o4, [%i1+4]		! and store 4 bytes
6190*280575beSPatrick McGehearty	subcc	%o3, 8, %o3		! decrease count by 8
6191*280575beSPatrick McGehearty	bnz	%ncc, .ci_unalnword
6192*280575beSPatrick McGehearty	add	%i1, 8, %i1		! increase dst ptr by 8
6193*280575beSPatrick McGehearty	ba	.ci_unalnsrc
6194*280575beSPatrick McGehearty	nop
6195*280575beSPatrick McGehearty
6196*280575beSPatrick McGehearty	! Src is half-word aligned, move bytes until dest 64 byte aligned
6197*280575beSPatrick McGehearty.ci_unalnhalf:
6198*280575beSPatrick McGehearty	lduha	[%i0]%asi, %o4		! load 2 bytes
6199*280575beSPatrick McGehearty	sllx	%o4, 32, %i3		! shift left
6200*280575beSPatrick McGehearty	lduwa	[%i0+2]%asi, %o4
6201*280575beSPatrick McGehearty	or	%o4, %i3, %i3
6202*280575beSPatrick McGehearty	sllx	%i3, 16, %i3
6203*280575beSPatrick McGehearty	lduha	[%i0+6]%asi, %o4
6204*280575beSPatrick McGehearty	or	%o4, %i3, %i3
6205*280575beSPatrick McGehearty	stx	%i3, [%i1]
6206*280575beSPatrick McGehearty	add	%i0, 8, %i0
6207*280575beSPatrick McGehearty	subcc	%o3, 8, %o3
6208*280575beSPatrick McGehearty	bnz	%ncc, .ci_unalnhalf
6209*280575beSPatrick McGehearty	add	%i1, 8, %i1
6210*280575beSPatrick McGehearty	ba	.ci_unalnsrc
6211*280575beSPatrick McGehearty	nop
6212*280575beSPatrick McGehearty
6213*280575beSPatrick McGehearty	! Src is Byte aligned, move bytes until dest 64 byte aligned
6214*280575beSPatrick McGehearty.ci_unalnbyte:
6215*280575beSPatrick McGehearty	sub	%i1, %i0, %i1		! share pointer advance
6216*280575beSPatrick McGehearty.ci_unalnbyte_loop:
6217*280575beSPatrick McGehearty	lduba	[%i0]%asi, %o4
6218*280575beSPatrick McGehearty	sllx	%o4, 56, %i3
6219*280575beSPatrick McGehearty	lduha	[%i0+1]%asi, %o4
6220*280575beSPatrick McGehearty	sllx	%o4, 40, %o4
6221*280575beSPatrick McGehearty	or	%o4, %i3, %i3
6222*280575beSPatrick McGehearty	lduha	[%i0+3]%asi, %o4
6223*280575beSPatrick McGehearty	sllx	%o4, 24, %o4
6224*280575beSPatrick McGehearty	or	%o4, %i3, %i3
6225*280575beSPatrick McGehearty	lduha	[%i0+5]%asi, %o4
6226*280575beSPatrick McGehearty	sllx	%o4, 8, %o4
6227*280575beSPatrick McGehearty	or	%o4, %i3, %i3
6228*280575beSPatrick McGehearty	lduba	[%i0+7]%asi, %o4
6229*280575beSPatrick McGehearty	or	%o4, %i3, %i3
6230*280575beSPatrick McGehearty	stx	%i3, [%i1+%i0]
6231*280575beSPatrick McGehearty	subcc	%o3, 8, %o3
6232*280575beSPatrick McGehearty	bnz	%ncc, .ci_unalnbyte_loop
6233*280575beSPatrick McGehearty	add	%i0, 8, %i0
6234*280575beSPatrick McGehearty	add	%i1,%i0, %i1		! restore pointer
6235*280575beSPatrick McGehearty
6236*280575beSPatrick McGehearty	! Destination is now block (64 byte aligned), src is not 8 byte aligned
6237*280575beSPatrick McGehearty.ci_unalnsrc:
6238*280575beSPatrick McGehearty	andn	%i2, 0x3f, %i3		! %i3 is multiple of block size
6239*280575beSPatrick McGehearty	and	%i2, 0x3f, %i2		! residue bytes in %i2
6240*280575beSPatrick McGehearty	add	%i2, 64, %i2		! Insure we don't load beyond
6241*280575beSPatrick McGehearty	sub	%i3, 64, %i3		! end of source buffer
6242*280575beSPatrick McGehearty
6243*280575beSPatrick McGehearty	andn	%i0, 0x3f, %o4		! %o4 has block aligned src address
6244*280575beSPatrick McGehearty	prefetcha [%o4 + (3 * CACHE_LINE)]%asi, #one_read
6245*280575beSPatrick McGehearty	alignaddr %i0, %g0, %g0		! generate %gsr
6246*280575beSPatrick McGehearty	add	%i0, %i3, %i0		! advance %i0 to after blocks
6247*280575beSPatrick McGehearty	!
6248*280575beSPatrick McGehearty	! Determine source alignment to correct 8 byte offset
6249*280575beSPatrick McGehearty	andcc	%i0, 0x20, %o3
6250*280575beSPatrick McGehearty	brnz,pn	%o3, .ci_unaln_1
6251*280575beSPatrick McGehearty	andcc	%i0, 0x10, %o3
6252*280575beSPatrick McGehearty	brnz,pn	%o3, .ci_unaln_01
6253*280575beSPatrick McGehearty	andcc	%i0, 0x08, %o3
6254*280575beSPatrick McGehearty	brz,a	%o3, .ci_unaln_000
6255*280575beSPatrick McGehearty	prefetcha [%o4 + (4 * CACHE_LINE)]%asi, #one_read
6256*280575beSPatrick McGehearty	ba	.ci_unaln_001
6257*280575beSPatrick McGehearty	nop
6258*280575beSPatrick McGehearty.ci_unaln_01:
6259*280575beSPatrick McGehearty	brnz,a	%o3, .ci_unaln_011
6260*280575beSPatrick McGehearty	prefetcha [%o4 + (4 * CACHE_LINE)]%asi, #one_read
6261*280575beSPatrick McGehearty	ba	.ci_unaln_010
6262*280575beSPatrick McGehearty	nop
6263*280575beSPatrick McGehearty.ci_unaln_1:
6264*280575beSPatrick McGehearty	brnz,pn	%o3, .ci_unaln_11
6265*280575beSPatrick McGehearty	andcc	%i0, 0x08, %o3
6266*280575beSPatrick McGehearty	brnz,a	%o3, .ci_unaln_101
6267*280575beSPatrick McGehearty	prefetcha [%o4 + (4 * CACHE_LINE)]%asi, #one_read
6268*280575beSPatrick McGehearty	ba	.ci_unaln_100
6269*280575beSPatrick McGehearty	nop
6270*280575beSPatrick McGehearty.ci_unaln_11:
6271*280575beSPatrick McGehearty	brz,pn	%o3, .ci_unaln_110
6272*280575beSPatrick McGehearty	prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read
6273*280575beSPatrick McGehearty
6274*280575beSPatrick McGehearty.ci_unaln_111:
6275*280575beSPatrick McGehearty	ldda	[%o4+56]%asi, %d14
6276*280575beSPatrick McGehearty.ci_unaln_111_loop:
6277*280575beSPatrick McGehearty	add	%o4, 64, %o4
6278*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_AIUS, %d16
6279*280575beSPatrick McGehearty	faligndata %d14, %d16, %d48
6280*280575beSPatrick McGehearty	faligndata %d16, %d18, %d50
6281*280575beSPatrick McGehearty	faligndata %d18, %d20, %d52
6282*280575beSPatrick McGehearty	faligndata %d20, %d22, %d54
6283*280575beSPatrick McGehearty	faligndata %d22, %d24, %d56
6284*280575beSPatrick McGehearty	faligndata %d24, %d26, %d58
6285*280575beSPatrick McGehearty	faligndata %d26, %d28, %d60
6286*280575beSPatrick McGehearty	faligndata %d28, %d30, %d62
6287*280575beSPatrick McGehearty	fmovd	%d30, %d14
6288*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_P
6289*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
6290*280575beSPatrick McGehearty	add	%i1, 64, %i1
6291*280575beSPatrick McGehearty	bgu,pt	%ncc, .ci_unaln_111_loop
6292*280575beSPatrick McGehearty	prefetcha [%o4 + (4 * CACHE_LINE)]%asi, #one_read
6293*280575beSPatrick McGehearty	ba	.ci_unaln_done
6294*280575beSPatrick McGehearty	nop
6295*280575beSPatrick McGehearty
6296*280575beSPatrick McGehearty.ci_unaln_110:
6297*280575beSPatrick McGehearty	ldda	[%o4+48]%asi, %d12
6298*280575beSPatrick McGehearty	ldda	[%o4+56]%asi, %d14
6299*280575beSPatrick McGehearty.ci_unaln_110_loop:
6300*280575beSPatrick McGehearty	add	%o4, 64, %o4
6301*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_AIUS, %d16
6302*280575beSPatrick McGehearty	faligndata %d12, %d14, %d48
6303*280575beSPatrick McGehearty	faligndata %d14, %d16, %d50
6304*280575beSPatrick McGehearty	faligndata %d16, %d18, %d52
6305*280575beSPatrick McGehearty	faligndata %d18, %d20, %d54
6306*280575beSPatrick McGehearty	faligndata %d20, %d22, %d56
6307*280575beSPatrick McGehearty	faligndata %d22, %d24, %d58
6308*280575beSPatrick McGehearty	faligndata %d24, %d26, %d60
6309*280575beSPatrick McGehearty	faligndata %d26, %d28, %d62
6310*280575beSPatrick McGehearty	fmovd	%d28, %d12
6311*280575beSPatrick McGehearty	fmovd	%d30, %d14
6312*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_P
6313*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
6314*280575beSPatrick McGehearty	add	%i1, 64, %i1
6315*280575beSPatrick McGehearty	bgu,pt	%ncc, .ci_unaln_110_loop
6316*280575beSPatrick McGehearty	prefetcha [%o4 + (4 * CACHE_LINE)]%asi, #one_read
6317*280575beSPatrick McGehearty	ba	.ci_unaln_done
6318*280575beSPatrick McGehearty	nop
6319*280575beSPatrick McGehearty
6320*280575beSPatrick McGehearty.ci_unaln_101:
6321*280575beSPatrick McGehearty	ldda	[%o4+40]%asi, %d10
6322*280575beSPatrick McGehearty	ldda	[%o4+48]%asi, %d12
6323*280575beSPatrick McGehearty	ldda	[%o4+56]%asi, %d14
6324*280575beSPatrick McGehearty.ci_unaln_101_loop:
6325*280575beSPatrick McGehearty	add	%o4, 64, %o4
6326*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_AIUS, %d16
6327*280575beSPatrick McGehearty	faligndata %d10, %d12, %d48
6328*280575beSPatrick McGehearty	faligndata %d12, %d14, %d50
6329*280575beSPatrick McGehearty	faligndata %d14, %d16, %d52
6330*280575beSPatrick McGehearty	faligndata %d16, %d18, %d54
6331*280575beSPatrick McGehearty	faligndata %d18, %d20, %d56
6332*280575beSPatrick McGehearty	faligndata %d20, %d22, %d58
6333*280575beSPatrick McGehearty	faligndata %d22, %d24, %d60
6334*280575beSPatrick McGehearty	faligndata %d24, %d26, %d62
6335*280575beSPatrick McGehearty	fmovd	%d26, %d10
6336*280575beSPatrick McGehearty	fmovd	%d28, %d12
6337*280575beSPatrick McGehearty	fmovd	%d30, %d14
6338*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_P
6339*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
6340*280575beSPatrick McGehearty	add	%i1, 64, %i1
6341*280575beSPatrick McGehearty	bgu,pt	%ncc, .ci_unaln_101_loop
6342*280575beSPatrick McGehearty	prefetcha [%o4 + (4 * CACHE_LINE)]%asi, #one_read
6343*280575beSPatrick McGehearty	ba	.ci_unaln_done
6344*280575beSPatrick McGehearty	nop
6345*280575beSPatrick McGehearty
6346*280575beSPatrick McGehearty.ci_unaln_100:
6347*280575beSPatrick McGehearty	ldda	[%o4+32]%asi, %d8
6348*280575beSPatrick McGehearty	ldda	[%o4+40]%asi, %d10
6349*280575beSPatrick McGehearty	ldda	[%o4+48]%asi, %d12
6350*280575beSPatrick McGehearty	ldda	[%o4+56]%asi, %d14
6351*280575beSPatrick McGehearty.ci_unaln_100_loop:
6352*280575beSPatrick McGehearty	add	%o4, 64, %o4
6353*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_AIUS, %d16
6354*280575beSPatrick McGehearty	faligndata %d8, %d10, %d48
6355*280575beSPatrick McGehearty	faligndata %d10, %d12, %d50
6356*280575beSPatrick McGehearty	faligndata %d12, %d14, %d52
6357*280575beSPatrick McGehearty	faligndata %d14, %d16, %d54
6358*280575beSPatrick McGehearty	faligndata %d16, %d18, %d56
6359*280575beSPatrick McGehearty	faligndata %d18, %d20, %d58
6360*280575beSPatrick McGehearty	faligndata %d20, %d22, %d60
6361*280575beSPatrick McGehearty	faligndata %d22, %d24, %d62
6362*280575beSPatrick McGehearty	fmovd	%d24, %d8
6363*280575beSPatrick McGehearty	fmovd	%d26, %d10
6364*280575beSPatrick McGehearty	fmovd	%d28, %d12
6365*280575beSPatrick McGehearty	fmovd	%d30, %d14
6366*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_P
6367*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
6368*280575beSPatrick McGehearty	add	%i1, 64, %i1
6369*280575beSPatrick McGehearty	bgu,pt	%ncc, .ci_unaln_100_loop
6370*280575beSPatrick McGehearty	prefetcha [%o4 + (4 * CACHE_LINE)]%asi, #one_read
6371*280575beSPatrick McGehearty	ba	.ci_unaln_done
6372*280575beSPatrick McGehearty	nop
6373*280575beSPatrick McGehearty
6374*280575beSPatrick McGehearty.ci_unaln_011:
6375*280575beSPatrick McGehearty	ldda	[%o4+24]%asi, %d6
6376*280575beSPatrick McGehearty	ldda	[%o4+32]%asi, %d8
6377*280575beSPatrick McGehearty	ldda	[%o4+40]%asi, %d10
6378*280575beSPatrick McGehearty	ldda	[%o4+48]%asi, %d12
6379*280575beSPatrick McGehearty	ldda	[%o4+56]%asi, %d14
6380*280575beSPatrick McGehearty.ci_unaln_011_loop:
6381*280575beSPatrick McGehearty	add	%o4, 64, %o4
6382*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_AIUS, %d16
6383*280575beSPatrick McGehearty	faligndata %d6, %d8, %d48
6384*280575beSPatrick McGehearty	faligndata %d8, %d10, %d50
6385*280575beSPatrick McGehearty	faligndata %d10, %d12, %d52
6386*280575beSPatrick McGehearty	faligndata %d12, %d14, %d54
6387*280575beSPatrick McGehearty	faligndata %d14, %d16, %d56
6388*280575beSPatrick McGehearty	faligndata %d16, %d18, %d58
6389*280575beSPatrick McGehearty	faligndata %d18, %d20, %d60
6390*280575beSPatrick McGehearty	faligndata %d20, %d22, %d62
6391*280575beSPatrick McGehearty	fmovd	%d22, %d6
6392*280575beSPatrick McGehearty	fmovd	%d24, %d8
6393*280575beSPatrick McGehearty	fmovd	%d26, %d10
6394*280575beSPatrick McGehearty	fmovd	%d28, %d12
6395*280575beSPatrick McGehearty	fmovd	%d30, %d14
6396*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_P
6397*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
6398*280575beSPatrick McGehearty	add	%i1, 64, %i1
6399*280575beSPatrick McGehearty	bgu,pt	%ncc, .ci_unaln_011_loop
6400*280575beSPatrick McGehearty	prefetcha [%o4 + (4 * CACHE_LINE)]%asi, #one_read
6401*280575beSPatrick McGehearty	ba	.ci_unaln_done
6402*280575beSPatrick McGehearty	nop
6403*280575beSPatrick McGehearty
6404*280575beSPatrick McGehearty.ci_unaln_010:
6405*280575beSPatrick McGehearty	ldda	[%o4+16]%asi, %d4
6406*280575beSPatrick McGehearty	ldda	[%o4+24]%asi, %d6
6407*280575beSPatrick McGehearty	ldda	[%o4+32]%asi, %d8
6408*280575beSPatrick McGehearty	ldda	[%o4+40]%asi, %d10
6409*280575beSPatrick McGehearty	ldda	[%o4+48]%asi, %d12
6410*280575beSPatrick McGehearty	ldda	[%o4+56]%asi, %d14
6411*280575beSPatrick McGehearty.ci_unaln_010_loop:
6412*280575beSPatrick McGehearty	add	%o4, 64, %o4
6413*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_AIUS, %d16
6414*280575beSPatrick McGehearty	faligndata %d4, %d6, %d48
6415*280575beSPatrick McGehearty	faligndata %d6, %d8, %d50
6416*280575beSPatrick McGehearty	faligndata %d8, %d10, %d52
6417*280575beSPatrick McGehearty	faligndata %d10, %d12, %d54
6418*280575beSPatrick McGehearty	faligndata %d12, %d14, %d56
6419*280575beSPatrick McGehearty	faligndata %d14, %d16, %d58
6420*280575beSPatrick McGehearty	faligndata %d16, %d18, %d60
6421*280575beSPatrick McGehearty	faligndata %d18, %d20, %d62
6422*280575beSPatrick McGehearty	fmovd	%d20, %d4
6423*280575beSPatrick McGehearty	fmovd	%d22, %d6
6424*280575beSPatrick McGehearty	fmovd	%d24, %d8
6425*280575beSPatrick McGehearty	fmovd	%d26, %d10
6426*280575beSPatrick McGehearty	fmovd	%d28, %d12
6427*280575beSPatrick McGehearty	fmovd	%d30, %d14
6428*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_P
6429*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
6430*280575beSPatrick McGehearty	add	%i1, 64, %i1
6431*280575beSPatrick McGehearty	bgu,pt	%ncc, .ci_unaln_010_loop
6432*280575beSPatrick McGehearty	prefetcha [%o4 + (4 * CACHE_LINE)]%asi, #one_read
6433*280575beSPatrick McGehearty	ba	.ci_unaln_done
6434*280575beSPatrick McGehearty	nop
6435*280575beSPatrick McGehearty
6436*280575beSPatrick McGehearty.ci_unaln_001:
6437*280575beSPatrick McGehearty	ldda	[%o4+8]%asi, %d2
6438*280575beSPatrick McGehearty	ldda	[%o4+16]%asi, %d4
6439*280575beSPatrick McGehearty	ldda	[%o4+24]%asi, %d6
6440*280575beSPatrick McGehearty	ldda	[%o4+32]%asi, %d8
6441*280575beSPatrick McGehearty	ldda	[%o4+40]%asi, %d10
6442*280575beSPatrick McGehearty	ldda	[%o4+48]%asi, %d12
6443*280575beSPatrick McGehearty	ldda	[%o4+56]%asi, %d14
6444*280575beSPatrick McGehearty.ci_unaln_001_loop:
6445*280575beSPatrick McGehearty	add	%o4, 64, %o4
6446*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_AIUS, %d16
6447*280575beSPatrick McGehearty	faligndata %d2, %d4, %d48
6448*280575beSPatrick McGehearty	faligndata %d4, %d6, %d50
6449*280575beSPatrick McGehearty	faligndata %d6, %d8, %d52
6450*280575beSPatrick McGehearty	faligndata %d8, %d10, %d54
6451*280575beSPatrick McGehearty	faligndata %d10, %d12, %d56
6452*280575beSPatrick McGehearty	faligndata %d12, %d14, %d58
6453*280575beSPatrick McGehearty	faligndata %d14, %d16, %d60
6454*280575beSPatrick McGehearty	faligndata %d16, %d18, %d62
6455*280575beSPatrick McGehearty	fmovd	%d18, %d2
6456*280575beSPatrick McGehearty	fmovd	%d20, %d4
6457*280575beSPatrick McGehearty	fmovd	%d22, %d6
6458*280575beSPatrick McGehearty	fmovd	%d24, %d8
6459*280575beSPatrick McGehearty	fmovd	%d26, %d10
6460*280575beSPatrick McGehearty	fmovd	%d28, %d12
6461*280575beSPatrick McGehearty	fmovd	%d30, %d14
6462*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_P
6463*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
6464*280575beSPatrick McGehearty	add	%i1, 64, %i1
6465*280575beSPatrick McGehearty	bgu,pt	%ncc, .ci_unaln_001_loop
6466*280575beSPatrick McGehearty	prefetcha [%o4 + (4 * CACHE_LINE)]%asi, #one_read
6467*280575beSPatrick McGehearty	ba	.ci_unaln_done
6468*280575beSPatrick McGehearty	nop
6469*280575beSPatrick McGehearty
6470*280575beSPatrick McGehearty.ci_unaln_000:
6471*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_AIUS, %d0
6472*280575beSPatrick McGehearty.ci_unaln_000_loop:
6473*280575beSPatrick McGehearty	add	%o4, 64, %o4
6474*280575beSPatrick McGehearty	ldda	[%o4]ASI_BLK_AIUS, %d16
6475*280575beSPatrick McGehearty	faligndata %d0, %d2, %d48
6476*280575beSPatrick McGehearty	faligndata %d2, %d4, %d50
6477*280575beSPatrick McGehearty	faligndata %d4, %d6, %d52
6478*280575beSPatrick McGehearty	faligndata %d6, %d8, %d54
6479*280575beSPatrick McGehearty	faligndata %d8, %d10, %d56
6480*280575beSPatrick McGehearty	faligndata %d10, %d12, %d58
6481*280575beSPatrick McGehearty	faligndata %d12, %d14, %d60
6482*280575beSPatrick McGehearty	faligndata %d14, %d16, %d62
6483*280575beSPatrick McGehearty	fmovd	%d16, %d0
6484*280575beSPatrick McGehearty	fmovd	%d18, %d2
6485*280575beSPatrick McGehearty	fmovd	%d20, %d4
6486*280575beSPatrick McGehearty	fmovd	%d22, %d6
6487*280575beSPatrick McGehearty	fmovd	%d24, %d8
6488*280575beSPatrick McGehearty	fmovd	%d26, %d10
6489*280575beSPatrick McGehearty	fmovd	%d28, %d12
6490*280575beSPatrick McGehearty	fmovd	%d30, %d14
6491*280575beSPatrick McGehearty	stda	%d48, [%i1]ASI_BLK_P
6492*280575beSPatrick McGehearty	subcc	%i3, 64, %i3
6493*280575beSPatrick McGehearty	add	%i1, 64, %i1
6494*280575beSPatrick McGehearty	bgu,pt	%ncc, .ci_unaln_000_loop
6495*280575beSPatrick McGehearty	prefetcha [%o4 + (4 * CACHE_LINE)]%asi, #one_read
6496*280575beSPatrick McGehearty
6497*280575beSPatrick McGehearty.ci_unaln_done:
6498*280575beSPatrick McGehearty	! Handle trailing bytes, 64 to 127
6499*280575beSPatrick McGehearty	! Dest long word aligned, Src not long word aligned
6500*280575beSPatrick McGehearty	cmp	%i2, 15
6501*280575beSPatrick McGehearty	bleu	%ncc, .ci_unaln_short
6502*280575beSPatrick McGehearty
6503*280575beSPatrick McGehearty	andn	%i2, 0x7, %i3		! %i3 is multiple of 8
6504*280575beSPatrick McGehearty	and	%i2, 0x7, %i2		! residue bytes in %i2
6505*280575beSPatrick McGehearty	add	%i2, 8, %i2
6506*280575beSPatrick McGehearty	sub	%i3, 8, %i3		! insure we don't load past end of src
6507*280575beSPatrick McGehearty	andn	%i0, 0x7, %o4		! %o4 has long word aligned src address
6508*280575beSPatrick McGehearty	add	%i0, %i3, %i0		! advance %i0 to after multiple of 8
6509*280575beSPatrick McGehearty	ldda	[%o4]%asi, %d0		! fetch partial word
6510*280575beSPatrick McGehearty.ci_unaln_by8:
6511*280575beSPatrick McGehearty	ldda	[%o4+8]%asi, %d2
6512*280575beSPatrick McGehearty	add	%o4, 8, %o4
6513*280575beSPatrick McGehearty	faligndata %d0, %d2, %d16
6514*280575beSPatrick McGehearty	subcc	%i3, 8, %i3
6515*280575beSPatrick McGehearty	std	%d16, [%i1]
6516*280575beSPatrick McGehearty	fmovd	%d2, %d0
6517*280575beSPatrick McGehearty	bgu,pt	%ncc, .ci_unaln_by8
6518*280575beSPatrick McGehearty	add	%i1, 8, %i1
6519*280575beSPatrick McGehearty
6520*280575beSPatrick McGehearty.ci_unaln_short:
6521*280575beSPatrick McGehearty	cmp	%i2, 8
6522*280575beSPatrick McGehearty	blt,pt	%ncc, .ci_unalnfin
6523*280575beSPatrick McGehearty	nop
6524*280575beSPatrick McGehearty	lduba	[%i0]%asi, %o4
6525*280575beSPatrick McGehearty	sll	%o4, 24, %o3
6526*280575beSPatrick McGehearty	lduba	[%i0+1]%asi, %o4
6527*280575beSPatrick McGehearty	sll	%o4, 16, %o4
6528*280575beSPatrick McGehearty	or	%o4, %o3, %o3
6529*280575beSPatrick McGehearty	lduba	[%i0+2]%asi, %o4
6530*280575beSPatrick McGehearty	sll	%o4, 8, %o4
6531*280575beSPatrick McGehearty	or	%o4, %o3, %o3
6532*280575beSPatrick McGehearty	lduba	[%i0+3]%asi, %o4
6533*280575beSPatrick McGehearty	or	%o4, %o3, %o3
6534*280575beSPatrick McGehearty	stw	%o3, [%i1]
6535*280575beSPatrick McGehearty	lduba	[%i0+4]%asi, %o4
6536*280575beSPatrick McGehearty	sll	%o4, 24, %o3
6537*280575beSPatrick McGehearty	lduba	[%i0+5]%asi, %o4
6538*280575beSPatrick McGehearty	sll	%o4, 16, %o4
6539*280575beSPatrick McGehearty	or	%o4, %o3, %o3
6540*280575beSPatrick McGehearty	lduba	[%i0+6]%asi, %o4
6541*280575beSPatrick McGehearty	sll	%o4, 8, %o4
6542*280575beSPatrick McGehearty	or	%o4, %o3, %o3
6543*280575beSPatrick McGehearty	lduba	[%i0+7]%asi, %o4
6544*280575beSPatrick McGehearty	or	%o4, %o3, %o3
6545*280575beSPatrick McGehearty	stw	%o3, [%i1+4]
6546*280575beSPatrick McGehearty	add	%i0, 8, %i0
6547*280575beSPatrick McGehearty	add	%i1, 8, %i1
6548*280575beSPatrick McGehearty	sub	%i2, 8, %i2
6549*280575beSPatrick McGehearty.ci_unalnfin:
6550*280575beSPatrick McGehearty	cmp	%i2, 4
6551*280575beSPatrick McGehearty	blt,pt	%ncc, .ci_unalnz
6552*280575beSPatrick McGehearty	tst	%i2
6553*280575beSPatrick McGehearty	lduba	[%i0]%asi, %o3		! read byte
6554*280575beSPatrick McGehearty	subcc	%i2, 4, %i2		! reduce count by 4
6555*280575beSPatrick McGehearty	sll	%o3, 24, %o3		! position
6556*280575beSPatrick McGehearty	lduba	[%i0+1]%asi, %o4
6557*280575beSPatrick McGehearty	sll	%o4, 16, %o4		! position
6558*280575beSPatrick McGehearty	or	%o4, %o3, %o3		! merge
6559*280575beSPatrick McGehearty	lduba	[%i0+2]%asi, %o4
6560*280575beSPatrick McGehearty	sll	%o4, 8, %o4		! position
6561*280575beSPatrick McGehearty	or	%o4, %o3, %o3		! merge
6562*280575beSPatrick McGehearty	add	%i1, 4, %i1		! advance dst by 4
6563*280575beSPatrick McGehearty	lduba	[%i0+3]%asi, %o4
6564*280575beSPatrick McGehearty	add	%i0, 4, %i0		! advance src by 4
6565*280575beSPatrick McGehearty	or	%o4, %o3, %o4		! merge
6566*280575beSPatrick McGehearty	bnz,pt	%ncc, .ci_unaln3x
6567*280575beSPatrick McGehearty	stw	%o4, [%i1-4]
6568*280575beSPatrick McGehearty	ba	.ci_exit
6569*280575beSPatrick McGehearty	nop
6570*280575beSPatrick McGehearty.ci_unalnz:
6571*280575beSPatrick McGehearty	bz,pt	%ncc, .ci_exit
6572*280575beSPatrick McGehearty	wr	%l5, %g0, %gsr		! restore %gsr
6573*280575beSPatrick McGehearty.ci_unaln3x:				! Exactly 1, 2, or 3 bytes remain
6574*280575beSPatrick McGehearty	subcc	%i2, 1, %i2		! reduce count for cc test
6575*280575beSPatrick McGehearty	lduba	[%i0]%asi, %o4		! load one byte
6576*280575beSPatrick McGehearty	bz,pt	%ncc, .ci_exit
6577*280575beSPatrick McGehearty	stb	%o4, [%i1]		! store one byte
6578*280575beSPatrick McGehearty	lduba	[%i0+1]%asi, %o4	! load second byte
6579*280575beSPatrick McGehearty	subcc	%i2, 1, %i2
6580*280575beSPatrick McGehearty	bz,pt	%ncc, .ci_exit
6581*280575beSPatrick McGehearty	stb	%o4, [%i1+1]		! store second byte
6582*280575beSPatrick McGehearty	lduba	[%i0+2]%asi, %o4	! load third byte
6583*280575beSPatrick McGehearty	stb	%o4, [%i1+2]		! store third byte
6584*280575beSPatrick McGehearty.ci_exit:
6585*280575beSPatrick McGehearty	brnz	%g1, .ci_fp_restore
6586*280575beSPatrick McGehearty	nop
6587*280575beSPatrick McGehearty	FZERO
6588*280575beSPatrick McGehearty	wr	%g1, %g0, %fprs
6589*280575beSPatrick McGehearty	ba,pt	%ncc, .ci_ex2
6590*280575beSPatrick McGehearty	membar	#Sync
6591*280575beSPatrick McGehearty.ci_fp_restore:
6592*280575beSPatrick McGehearty	BLD_FP_FROMSTACK(%o4)
6593*280575beSPatrick McGehearty.ci_ex2:
6594*280575beSPatrick McGehearty	andn	SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
6595*280575beSPatrick McGehearty	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
6596*280575beSPatrick McGehearty	ret
6597*280575beSPatrick McGehearty	restore %g0, 0, %o0
6598*280575beSPatrick McGehearty
6599*280575beSPatrick McGehearty.copyin_err:
6600*280575beSPatrick McGehearty	ldn	[THREAD_REG + T_COPYOPS], %o4
6601*280575beSPatrick McGehearty	brz	%o4, 2f
6602*280575beSPatrick McGehearty	nop
6603*280575beSPatrick McGehearty	ldn	[%o4 + CP_COPYIN], %g2
6604*280575beSPatrick McGehearty	jmp	%g2
6605*280575beSPatrick McGehearty	nop
6606*280575beSPatrick McGehearty2:
6607*280575beSPatrick McGehearty	retl
6608*280575beSPatrick McGehearty	mov	-1, %o0
6609*280575beSPatrick McGehearty
6610*280575beSPatrick McGehearty#else	/* NIAGARA_IMPL */
66117c478bd9Sstevel@tonic-gate.do_copyin:
66127c478bd9Sstevel@tonic-gate	!
66137c478bd9Sstevel@tonic-gate	! Check the length and bail if zero.
66147c478bd9Sstevel@tonic-gate	!
66157c478bd9Sstevel@tonic-gate	tst	%o2
66167c478bd9Sstevel@tonic-gate	bnz,pt	%ncc, 1f
66177c478bd9Sstevel@tonic-gate	nop
66187c478bd9Sstevel@tonic-gate	retl
66197c478bd9Sstevel@tonic-gate	clr	%o0
66207c478bd9Sstevel@tonic-gate1:
66217c478bd9Sstevel@tonic-gate	sethi	%hi(copyio_fault), %o4
66227c478bd9Sstevel@tonic-gate	or	%o4, %lo(copyio_fault), %o4
66237c478bd9Sstevel@tonic-gate	sethi	%hi(copyio_fault_nowindow), %o3
66247c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], SAVED_LOFAULT
66257c478bd9Sstevel@tonic-gate	or	%o3, %lo(copyio_fault_nowindow), %o3
66267c478bd9Sstevel@tonic-gate	membar	#Sync
66277c478bd9Sstevel@tonic-gate	stn	%o3, [THREAD_REG + T_LOFAULT]
66287c478bd9Sstevel@tonic-gate
66297c478bd9Sstevel@tonic-gate	mov	%o0, SAVE_SRC
66307c478bd9Sstevel@tonic-gate	mov	%o1, SAVE_DST
66317c478bd9Sstevel@tonic-gate	mov	%o2, SAVE_COUNT
66327c478bd9Sstevel@tonic-gate
66337c478bd9Sstevel@tonic-gate	!
66347c478bd9Sstevel@tonic-gate	! Check to see if we're more than SMALL_LIMIT.
66357c478bd9Sstevel@tonic-gate	!
66367c478bd9Sstevel@tonic-gate	subcc	%o2, SMALL_LIMIT, %o3
66377c478bd9Sstevel@tonic-gate	bgu,a,pt %ncc, .dci_ns
66387c478bd9Sstevel@tonic-gate	or	%o0, %o1, %o3
66397c478bd9Sstevel@tonic-gate	!
66407c478bd9Sstevel@tonic-gate	! What was previously ".small_copyin"
66417c478bd9Sstevel@tonic-gate	!
66427c478bd9Sstevel@tonic-gate.dcibcp:
66437c478bd9Sstevel@tonic-gate	sub	%g0, %o2, %o3		! setup for copy loop
66447c478bd9Sstevel@tonic-gate	add	%o0, %o2, %o0
66457c478bd9Sstevel@tonic-gate	add	%o1, %o2, %o1
66467c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .dcicl
66477c478bd9Sstevel@tonic-gate	lduba	[%o0 + %o3]ASI_USER, %o4
66487c478bd9Sstevel@tonic-gate	!
66497c478bd9Sstevel@tonic-gate	! %o0 and %o1 point at the end and remain pointing at the end
66507c478bd9Sstevel@tonic-gate	! of their buffers. We pull things out by adding %o3 (which is
66517c478bd9Sstevel@tonic-gate	! the negation of the length) to the buffer end which gives us
66527c478bd9Sstevel@tonic-gate	! the curent location in the buffers. By incrementing %o3 we walk
66537c478bd9Sstevel@tonic-gate	! through both buffers without having to bump each buffer's
66547c478bd9Sstevel@tonic-gate	! pointer. A very fast 4 instruction loop.
66557c478bd9Sstevel@tonic-gate	!
66567c478bd9Sstevel@tonic-gate	.align 16
66577c478bd9Sstevel@tonic-gate.dcicl:
66587c478bd9Sstevel@tonic-gate	stb	%o4, [%o1 + %o3]
66597c478bd9Sstevel@tonic-gate	inccc	%o3
66607c478bd9Sstevel@tonic-gate	bl,a,pt %ncc, .dcicl
66617c478bd9Sstevel@tonic-gate	lduba	[%o0 + %o3]ASI_USER, %o4
66627c478bd9Sstevel@tonic-gate	!
66637c478bd9Sstevel@tonic-gate	! We're done. Go home.
66647c478bd9Sstevel@tonic-gate	!
66657c478bd9Sstevel@tonic-gate	membar	#Sync
66667c478bd9Sstevel@tonic-gate	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
66677c478bd9Sstevel@tonic-gate	retl
66687c478bd9Sstevel@tonic-gate	clr	%o0
66697c478bd9Sstevel@tonic-gate	!
66707c478bd9Sstevel@tonic-gate	! Try aligned copies from here.
66717c478bd9Sstevel@tonic-gate	!
66727c478bd9Sstevel@tonic-gate.dci_ns:
66737c478bd9Sstevel@tonic-gate	!
66747c478bd9Sstevel@tonic-gate	! See if we're single byte aligned. If we are, check the
66757c478bd9Sstevel@tonic-gate	! limit for single byte copies. If we're smaller, or equal,
66767c478bd9Sstevel@tonic-gate	! bounce to the byte for byte copy loop. Otherwise do it in
66777c478bd9Sstevel@tonic-gate	! HW (if enabled).
66787c478bd9Sstevel@tonic-gate	!
66797c478bd9Sstevel@tonic-gate	btst	1, %o3
66807c478bd9Sstevel@tonic-gate	bz,a,pt	%icc, .dcih8
66817c478bd9Sstevel@tonic-gate	btst	7, %o3
66827c478bd9Sstevel@tonic-gate	!
66837c478bd9Sstevel@tonic-gate	! We're single byte aligned.
66847c478bd9Sstevel@tonic-gate	!
66857c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_1), %o3
66867c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_1)], %o3
66877c478bd9Sstevel@tonic-gate	!
66887c478bd9Sstevel@tonic-gate	! Is HW copy on? If not do everything byte for byte.
66897c478bd9Sstevel@tonic-gate	!
66907c478bd9Sstevel@tonic-gate	tst	%o3
66917c478bd9Sstevel@tonic-gate	bz,pn	%icc, .dcibcp
66927c478bd9Sstevel@tonic-gate	subcc	%o3, %o2, %o3
66937c478bd9Sstevel@tonic-gate	!
66947c478bd9Sstevel@tonic-gate	! Are we bigger than the HW limit? If not
66957c478bd9Sstevel@tonic-gate	! go to byte for byte.
66967c478bd9Sstevel@tonic-gate	!
66977c478bd9Sstevel@tonic-gate	bge,pt	%ncc, .dcibcp
66987c478bd9Sstevel@tonic-gate	nop
66997c478bd9Sstevel@tonic-gate	!
67007c478bd9Sstevel@tonic-gate	! We're big enough and copy is on. Do it with HW.
67017c478bd9Sstevel@tonic-gate	!
67027c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .big_copyin
67037c478bd9Sstevel@tonic-gate	nop
67047c478bd9Sstevel@tonic-gate.dcih8:
67057c478bd9Sstevel@tonic-gate	!
67067c478bd9Sstevel@tonic-gate	! 8 byte aligned?
67077c478bd9Sstevel@tonic-gate	!
67087c478bd9Sstevel@tonic-gate	bnz,a	%ncc, .dcih4
67097c478bd9Sstevel@tonic-gate	btst	3, %o3
67107c478bd9Sstevel@tonic-gate	!
67117c478bd9Sstevel@tonic-gate	! We're eight byte aligned.
67127c478bd9Sstevel@tonic-gate	!
67137c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_8), %o3
67147c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_8)], %o3
67157c478bd9Sstevel@tonic-gate	!
67167c478bd9Sstevel@tonic-gate	! Is HW assist on? If not, do it with the aligned copy.
67177c478bd9Sstevel@tonic-gate	!
67187c478bd9Sstevel@tonic-gate	tst	%o3
67197c478bd9Sstevel@tonic-gate	bz,pn	%icc, .dcis8
67207c478bd9Sstevel@tonic-gate	subcc	%o3, %o2, %o3
67217c478bd9Sstevel@tonic-gate	bge	%ncc, .dcis8
67227c478bd9Sstevel@tonic-gate	nop
67237c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .big_copyin
67247c478bd9Sstevel@tonic-gate	nop
67257c478bd9Sstevel@tonic-gate.dcis8:
67267c478bd9Sstevel@tonic-gate	!
67277c478bd9Sstevel@tonic-gate	! Housekeeping for copy loops. Uses same idea as in the byte for
67287c478bd9Sstevel@tonic-gate	! byte copy loop above.
67297c478bd9Sstevel@tonic-gate	!
67307c478bd9Sstevel@tonic-gate	add	%o0, %o2, %o0
67317c478bd9Sstevel@tonic-gate	add	%o1, %o2, %o1
67327c478bd9Sstevel@tonic-gate	sub	%g0, %o2, %o3
67337c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .didebc
67347c478bd9Sstevel@tonic-gate	srl	%o2, 3, %o2		! Number of 8 byte chunks to copy
67357c478bd9Sstevel@tonic-gate	!
67367c478bd9Sstevel@tonic-gate	! 4 byte aligned?
67377c478bd9Sstevel@tonic-gate	!
67387c478bd9Sstevel@tonic-gate.dcih4:
67397c478bd9Sstevel@tonic-gate	bnz	%ncc, .dcih2
67407c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_4), %o3
67417c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_4)], %o3
67427c478bd9Sstevel@tonic-gate	!
67437c478bd9Sstevel@tonic-gate	! Is HW assist on? If not, do it with the aligned copy.
67447c478bd9Sstevel@tonic-gate	!
67457c478bd9Sstevel@tonic-gate	tst	%o3
67467c478bd9Sstevel@tonic-gate	bz,pn	%icc, .dcis4
67477c478bd9Sstevel@tonic-gate	subcc	%o3, %o2, %o3
67487c478bd9Sstevel@tonic-gate	!
67497c478bd9Sstevel@tonic-gate	! We're negative if our size is less than or equal to hw_copy_limit_4.
67507c478bd9Sstevel@tonic-gate	!
67517c478bd9Sstevel@tonic-gate	bge	%ncc, .dcis4
67527c478bd9Sstevel@tonic-gate	nop
67537c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .big_copyin
67547c478bd9Sstevel@tonic-gate	nop
67557c478bd9Sstevel@tonic-gate.dcis4:
67567c478bd9Sstevel@tonic-gate	!
67577c478bd9Sstevel@tonic-gate	! Housekeeping for copy loops. Uses same idea as in the byte
67587c478bd9Sstevel@tonic-gate	! for byte copy loop above.
67597c478bd9Sstevel@tonic-gate	!
67607c478bd9Sstevel@tonic-gate	add	%o0, %o2, %o0
67617c478bd9Sstevel@tonic-gate	add	%o1, %o2, %o1
67627c478bd9Sstevel@tonic-gate	sub	%g0, %o2, %o3
67637c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .didfbc
67647c478bd9Sstevel@tonic-gate	srl	%o2, 2, %o2		! Number of 4 byte chunks to copy
67657c478bd9Sstevel@tonic-gate.dcih2:
67667c478bd9Sstevel@tonic-gate	!
67677c478bd9Sstevel@tonic-gate	! We're two byte aligned. Check for "smallness"
67687c478bd9Sstevel@tonic-gate	! done in delay at .dcih4
67697c478bd9Sstevel@tonic-gate	!
67707c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .dcis2
67717c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_2), %o3
67727c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_2)], %o3
67737c478bd9Sstevel@tonic-gate	!
67747c478bd9Sstevel@tonic-gate	! Is HW assist on? If not, do it with the aligned copy.
67757c478bd9Sstevel@tonic-gate	!
67767c478bd9Sstevel@tonic-gate	tst	%o3
67777c478bd9Sstevel@tonic-gate	bz,pn	%icc, .dcis2
67787c478bd9Sstevel@tonic-gate	subcc	%o3, %o2, %o3
67797c478bd9Sstevel@tonic-gate	!
67807c478bd9Sstevel@tonic-gate	! Are we larger than the HW limit?
67817c478bd9Sstevel@tonic-gate	!
67827c478bd9Sstevel@tonic-gate	bge	%ncc, .dcis2
67837c478bd9Sstevel@tonic-gate	nop
67847c478bd9Sstevel@tonic-gate	!
67857c478bd9Sstevel@tonic-gate	! HW assist is on and we're large enough to use it.
67867c478bd9Sstevel@tonic-gate	!
67877c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .big_copyin
67887c478bd9Sstevel@tonic-gate	nop
67897c478bd9Sstevel@tonic-gate	!
67907c478bd9Sstevel@tonic-gate	! Housekeeping for copy loops. Uses same idea as in the byte
67917c478bd9Sstevel@tonic-gate	! for byte copy loop above.
67927c478bd9Sstevel@tonic-gate	!
67937c478bd9Sstevel@tonic-gate.dcis2:
67947c478bd9Sstevel@tonic-gate	add	%o0, %o2, %o0
67957c478bd9Sstevel@tonic-gate	add	%o1, %o2, %o1
67967c478bd9Sstevel@tonic-gate	sub	%g0, %o2, %o3
67977c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .didtbc
67987c478bd9Sstevel@tonic-gate	srl	%o2, 1, %o2		! Number of 2 byte chunks to copy
67997c478bd9Sstevel@tonic-gate	!
68007c478bd9Sstevel@tonic-gate.small_copyin:
68017c478bd9Sstevel@tonic-gate	!
68027c478bd9Sstevel@tonic-gate	! Why are we doing this AGAIN? There are certain conditions in
68037c478bd9Sstevel@tonic-gate	! big copyin that will cause us to forgo the HW assisted copys
68047c478bd9Sstevel@tonic-gate	! and bounce back to a non-hw assisted copy. This dispatches
68057c478bd9Sstevel@tonic-gate	! those copies. Note that we branch around this in the main line
68067c478bd9Sstevel@tonic-gate	! code.
68077c478bd9Sstevel@tonic-gate	!
68087c478bd9Sstevel@tonic-gate	! We make no check for limits or HW enablement here. We've
68097c478bd9Sstevel@tonic-gate	! already been told that we're a poster child so just go off
68107c478bd9Sstevel@tonic-gate	! and do it.
68117c478bd9Sstevel@tonic-gate	!
68127c478bd9Sstevel@tonic-gate	or	%o0, %o1, %o3
68137c478bd9Sstevel@tonic-gate	btst	1, %o3
68147c478bd9Sstevel@tonic-gate	bnz	%icc, .dcibcp		! Most likely
68157c478bd9Sstevel@tonic-gate	btst	7, %o3
68167c478bd9Sstevel@tonic-gate	bz	%icc, .dcis8
68177c478bd9Sstevel@tonic-gate	btst	3, %o3
68187c478bd9Sstevel@tonic-gate	bz	%icc, .dcis4
68197c478bd9Sstevel@tonic-gate	nop
68207c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .dcis2
68217c478bd9Sstevel@tonic-gate	nop
68227c478bd9Sstevel@tonic-gate	!
68237c478bd9Sstevel@tonic-gate	! Eight byte aligned copies. A steal from the original .small_copyin
68247c478bd9Sstevel@tonic-gate	! with modifications. %o2 is number of 8 byte chunks to copy. When
68257c478bd9Sstevel@tonic-gate	! done, we examine %o3. If this is < 0, we have 1 - 7 bytes more
68267c478bd9Sstevel@tonic-gate	! to copy.
68277c478bd9Sstevel@tonic-gate	!
68287c478bd9Sstevel@tonic-gate	.align 32
68297c478bd9Sstevel@tonic-gate.didebc:
68307c478bd9Sstevel@tonic-gate	ldxa	[%o0 + %o3]ASI_USER, %o4
68317c478bd9Sstevel@tonic-gate	deccc	%o2
68327c478bd9Sstevel@tonic-gate	stx	%o4, [%o1 + %o3]
68337c478bd9Sstevel@tonic-gate	bg,pt	%ncc, .didebc
68347c478bd9Sstevel@tonic-gate	addcc	%o3, 8, %o3
68357c478bd9Sstevel@tonic-gate	!
68367c478bd9Sstevel@tonic-gate	! End of copy loop. Most 8 byte aligned copies end here.
68377c478bd9Sstevel@tonic-gate	!
68387c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .dcifh
68397c478bd9Sstevel@tonic-gate	nop
68407c478bd9Sstevel@tonic-gate	!
68417c478bd9Sstevel@tonic-gate	! Something is left. Do it byte for byte.
68427c478bd9Sstevel@tonic-gate	!
68437c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .dcicl
68447c478bd9Sstevel@tonic-gate	lduba	[%o0 + %o3]ASI_USER, %o4
68457c478bd9Sstevel@tonic-gate	!
68467c478bd9Sstevel@tonic-gate	! 4 byte copy loop. %o2 is number of 4 byte chunks to copy.
68477c478bd9Sstevel@tonic-gate	!
68487c478bd9Sstevel@tonic-gate	.align 32
68497c478bd9Sstevel@tonic-gate.didfbc:
68507c478bd9Sstevel@tonic-gate	lduwa	[%o0 + %o3]ASI_USER, %o4
68517c478bd9Sstevel@tonic-gate	deccc	%o2
68527c478bd9Sstevel@tonic-gate	st	%o4, [%o1 + %o3]
68537c478bd9Sstevel@tonic-gate	bg,pt	%ncc, .didfbc
68547c478bd9Sstevel@tonic-gate	addcc	%o3, 4, %o3
68557c478bd9Sstevel@tonic-gate	!
68567c478bd9Sstevel@tonic-gate	! End of copy loop. Most 4 byte aligned copies end here.
68577c478bd9Sstevel@tonic-gate	!
68587c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .dcifh
68597c478bd9Sstevel@tonic-gate	nop
68607c478bd9Sstevel@tonic-gate	!
68617c478bd9Sstevel@tonic-gate	! Something is left. Do it byte for byte.
68627c478bd9Sstevel@tonic-gate	!
68637c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .dcicl
68647c478bd9Sstevel@tonic-gate	lduba	[%o0 + %o3]ASI_USER, %o4
68657c478bd9Sstevel@tonic-gate	!
68667c478bd9Sstevel@tonic-gate	! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to
68677c478bd9Sstevel@tonic-gate	! copy.
68687c478bd9Sstevel@tonic-gate	!
68697c478bd9Sstevel@tonic-gate	.align 32
68707c478bd9Sstevel@tonic-gate.didtbc:
68717c478bd9Sstevel@tonic-gate	lduha	[%o0 + %o3]ASI_USER, %o4
68727c478bd9Sstevel@tonic-gate	deccc	%o2
68737c478bd9Sstevel@tonic-gate	sth	%o4, [%o1 + %o3]
68747c478bd9Sstevel@tonic-gate	bg,pt	%ncc, .didtbc
68757c478bd9Sstevel@tonic-gate	addcc	%o3, 2, %o3
68767c478bd9Sstevel@tonic-gate	!
68777c478bd9Sstevel@tonic-gate	! End of copy loop. Most 2 byte aligned copies end here.
68787c478bd9Sstevel@tonic-gate	!
68797c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .dcifh
68807c478bd9Sstevel@tonic-gate	nop
68817c478bd9Sstevel@tonic-gate	!
68827c478bd9Sstevel@tonic-gate	! Deal with the last byte
68837c478bd9Sstevel@tonic-gate	!
68847c478bd9Sstevel@tonic-gate	lduba	[%o0 + %o3]ASI_USER, %o4
68857c478bd9Sstevel@tonic-gate	stb	%o4, [%o1 + %o3]
68867c478bd9Sstevel@tonic-gate.dcifh:
68877c478bd9Sstevel@tonic-gate	membar	#Sync
68887c478bd9Sstevel@tonic-gate	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
68897c478bd9Sstevel@tonic-gate	retl
68907c478bd9Sstevel@tonic-gate	clr	%o0
68917c478bd9Sstevel@tonic-gate
68927c478bd9Sstevel@tonic-gate.big_copyin:
68937c478bd9Sstevel@tonic-gate	! We're going off to do a block copy.
68947c478bd9Sstevel@tonic-gate	! Switch fault hendlers and grab a window. We
68957c478bd9Sstevel@tonic-gate	! don't do a membar #Sync since we've done only
68967c478bd9Sstevel@tonic-gate	! kernel data to this point.
68977c478bd9Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]
68987c478bd9Sstevel@tonic-gate
68997c478bd9Sstevel@tonic-gate	! Copy in that reach here are larger than 256 bytes. The
69007c478bd9Sstevel@tonic-gate	! hw_copy_limit_1 is set to 256. Never set this limit less
69017c478bd9Sstevel@tonic-gate	! 128 bytes.
6902340af271Swh94709	save	%sp, -SA(MINFRAME), %sp
69037c478bd9Sstevel@tonic-gate.do_blockcopyin:
69047c478bd9Sstevel@tonic-gate
69057c478bd9Sstevel@tonic-gate	! Swap src/dst since the code below is memcpy code
69067c478bd9Sstevel@tonic-gate	! and memcpy/bcopy have different calling sequences
69077c478bd9Sstevel@tonic-gate	mov	%i1, %i5
69087c478bd9Sstevel@tonic-gate	mov	%i0, %i1
69097c478bd9Sstevel@tonic-gate	mov	%i5, %i0
69107c478bd9Sstevel@tonic-gate
6911340af271Swh94709	! Block (64 bytes) align the destination.
6912340af271Swh94709	andcc	%i0, 0x3f, %i3		! is dst block aligned
6913340af271Swh94709	bz	%ncc, copyin_blalign	! dst already block aligned
6914340af271Swh94709	sub	%i3, 0x40, %i3
6915340af271Swh94709	neg	%i3			! bytes till dst 64 bytes aligned
6916340af271Swh94709	sub	%i2, %i3, %i2		! update i2 with new count
69177c478bd9Sstevel@tonic-gate
6918340af271Swh94709	! Based on source and destination alignment do
6919340af271Swh94709	! either 8 bytes, 4 bytes, 2 bytes or byte copy.
69207c478bd9Sstevel@tonic-gate
6921340af271Swh94709	! Is dst & src 8B aligned
6922340af271Swh94709	or	%i0, %i1, %o2
6923340af271Swh94709	andcc	%o2, 0x7, %g0
6924340af271Swh94709	bz	%ncc, .ci_alewdcp
6925340af271Swh94709	nop
6926340af271Swh94709
6927340af271Swh94709	! Is dst & src 4B aligned
6928340af271Swh94709	andcc	%o2, 0x3, %g0
6929340af271Swh94709	bz	%ncc, .ci_alwdcp
6930340af271Swh94709	nop
6931340af271Swh94709
6932340af271Swh94709	! Is dst & src 2B aligned
6933340af271Swh94709	andcc	%o2, 0x1, %g0
6934340af271Swh94709	bz	%ncc, .ci_alhlfwdcp
6935340af271Swh94709	nop
6936340af271Swh94709
6937340af271Swh94709	! 1B aligned
6938340af271Swh947091:	lduba	[%i1]ASI_USER, %o2
6939340af271Swh94709	stb	%o2, [%i0]
69407c478bd9Sstevel@tonic-gate	inc	%i1
69417c478bd9Sstevel@tonic-gate	deccc	%i3
6942340af271Swh94709	bgu,pt	%ncc, 1b
69437c478bd9Sstevel@tonic-gate	inc	%i0
69447c478bd9Sstevel@tonic-gate
69457c478bd9Sstevel@tonic-gate	ba	copyin_blalign
6946340af271Swh94709	nop
69477c478bd9Sstevel@tonic-gate
6948340af271Swh94709	! dst & src 4B aligned
6949340af271Swh94709.ci_alwdcp:
6950340af271Swh94709	lda	[%i1]ASI_USER, %o2
6951340af271Swh94709	st	%o2, [%i0]
6952340af271Swh94709	add	%i1, 0x4, %i1
6953340af271Swh94709	subcc	%i3, 0x4, %i3
6954340af271Swh94709	bgu,pt	%ncc, .ci_alwdcp
6955340af271Swh94709	add	%i0, 0x4, %i0
6956340af271Swh94709
6957340af271Swh94709	ba	copyin_blalign
6958340af271Swh94709	nop
6959340af271Swh94709
6960340af271Swh94709	! dst & src 2B aligned
6961340af271Swh94709.ci_alhlfwdcp:
6962340af271Swh94709	lduha	[%i1]ASI_USER, %o2
6963340af271Swh94709	stuh	%o2, [%i0]
6964340af271Swh94709	add	%i1, 0x2, %i1
6965340af271Swh94709	subcc	%i3, 0x2, %i3
6966340af271Swh94709	bgu,pt	%ncc, .ci_alhlfwdcp
6967340af271Swh94709	add	%i0, 0x2, %i0
6968340af271Swh94709
6969340af271Swh94709	ba	copyin_blalign
6970340af271Swh94709	nop
6971340af271Swh94709
6972340af271Swh94709	! dst & src 8B aligned
6973340af271Swh94709.ci_alewdcp:
69747c478bd9Sstevel@tonic-gate	ldxa	[%i1]ASI_USER, %o2
69757c478bd9Sstevel@tonic-gate	stx	%o2, [%i0]
69767c478bd9Sstevel@tonic-gate	add	%i1, 0x8, %i1
69777c478bd9Sstevel@tonic-gate	subcc	%i3, 0x8, %i3
6978340af271Swh94709	bgu,pt	%ncc, .ci_alewdcp
69797c478bd9Sstevel@tonic-gate	add	%i0, 0x8, %i0
69807c478bd9Sstevel@tonic-gate
69817c478bd9Sstevel@tonic-gatecopyin_blalign:
69827c478bd9Sstevel@tonic-gate	andn	%i2, 0x3f, %i3		! %i3 count is multiple of block size
69837c478bd9Sstevel@tonic-gate	sub	%i2, %i3, %i2		! Residue bytes in %i2
69847c478bd9Sstevel@tonic-gate
69857c478bd9Sstevel@tonic-gate	mov	ASI_BLK_INIT_ST_QUAD_LDD_P, %asi
69867c478bd9Sstevel@tonic-gate
69877c478bd9Sstevel@tonic-gate	andcc	%i1, 0xf, %o2		! is src quadword aligned
69887c478bd9Sstevel@tonic-gate	bz,pn	%xcc, .ci_blkcpy	! src offset in %o2 (last 4-bits)
69897c478bd9Sstevel@tonic-gate	nop
69907c478bd9Sstevel@tonic-gate	cmp	%o2, 0x8
69917c478bd9Sstevel@tonic-gate	bg	.ci_upper_double
69927c478bd9Sstevel@tonic-gate	nop
69937c478bd9Sstevel@tonic-gate	bl	.ci_lower_double
69947c478bd9Sstevel@tonic-gate	nop
69957c478bd9Sstevel@tonic-gate
69967c478bd9Sstevel@tonic-gate	! Falls through when source offset is equal to 8 i.e.
69977c478bd9Sstevel@tonic-gate	! source is double word aligned.
69987c478bd9Sstevel@tonic-gate	! In this case no shift/merge of data is required
69997c478bd9Sstevel@tonic-gate
70007c478bd9Sstevel@tonic-gate	sub	%i1, %o2, %i1		! align the src at 16 bytes.
70017c478bd9Sstevel@tonic-gate	andn	%i1, 0x3f, %l0		! %l0 has block aligned source
7002d142717dSae112802	prefetcha [%l0]ASI_USER, #one_read
70037c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2
7004d142717dSae112802	add	%l0, 0x40, %l0
70057c478bd9Sstevel@tonic-gate.ci_loop0:
70067c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
70077c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4
70087c478bd9Sstevel@tonic-gate
7009d142717dSae112802	prefetcha [%l0]ASI_USER, #one_read
70107c478bd9Sstevel@tonic-gate
70117c478bd9Sstevel@tonic-gate	stxa	%l3, [%i0+0x0]%asi
70127c478bd9Sstevel@tonic-gate	stxa	%l4, [%i0+0x8]%asi
70137c478bd9Sstevel@tonic-gate
70147c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
70157c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2
70167c478bd9Sstevel@tonic-gate
70177c478bd9Sstevel@tonic-gate	stxa	%l5, [%i0+0x10]%asi
70187c478bd9Sstevel@tonic-gate	stxa	%l2, [%i0+0x18]%asi
70197c478bd9Sstevel@tonic-gate
70207c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
70217c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4
70227c478bd9Sstevel@tonic-gate
70237c478bd9Sstevel@tonic-gate	stxa	%l3, [%i0+0x20]%asi
70247c478bd9Sstevel@tonic-gate	stxa	%l4, [%i0+0x28]%asi
70257c478bd9Sstevel@tonic-gate
70267c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
70277c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2
70287c478bd9Sstevel@tonic-gate
70297c478bd9Sstevel@tonic-gate	stxa	%l5, [%i0+0x30]%asi
70307c478bd9Sstevel@tonic-gate	stxa	%l2, [%i0+0x38]%asi
70317c478bd9Sstevel@tonic-gate
70327c478bd9Sstevel@tonic-gate	add	%l0, 0x40, %l0
70337c478bd9Sstevel@tonic-gate	subcc	%i3, 0x40, %i3
70347c478bd9Sstevel@tonic-gate	bgu,pt	%xcc, .ci_loop0
70357c478bd9Sstevel@tonic-gate	add	%i0, 0x40, %i0
70367c478bd9Sstevel@tonic-gate	ba	.ci_blkdone
70377c478bd9Sstevel@tonic-gate	add	%i1, %o2, %i1		! increment the source by src offset
70387c478bd9Sstevel@tonic-gate					! the src offset was stored in %o2
70397c478bd9Sstevel@tonic-gate
70407c478bd9Sstevel@tonic-gate.ci_lower_double:
70417c478bd9Sstevel@tonic-gate
70427c478bd9Sstevel@tonic-gate	sub	%i1, %o2, %i1		! align the src at 16 bytes.
70437c478bd9Sstevel@tonic-gate	sll	%o2, 3, %o0		! %o0 left shift
70447c478bd9Sstevel@tonic-gate	mov	0x40, %o1
70457c478bd9Sstevel@tonic-gate	sub	%o1, %o0, %o1		! %o1 right shift = (64 - left shift)
70467c478bd9Sstevel@tonic-gate	andn	%i1, 0x3f, %l0		! %l0 has block aligned source
7047d142717dSae112802	prefetcha [%l0]ASI_USER, #one_read
70487c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2	! partial data in %l2
70497c478bd9Sstevel@tonic-gate							! and %l3 has complete
70507c478bd9Sstevel@tonic-gate							! data
7051d142717dSae112802	add	%l0, 0x40, %l0
70527c478bd9Sstevel@tonic-gate.ci_loop1:
70537c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
70547c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4	! %l4 has partial data
70557c478bd9Sstevel@tonic-gate							! for this read.
70567c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6)	! merge %l2, %l3 and %l4
70577c478bd9Sstevel@tonic-gate							! into %l2 and %l3
70587c478bd9Sstevel@tonic-gate
7059d142717dSae112802	prefetcha [%l0]ASI_USER, #one_read
70607c478bd9Sstevel@tonic-gate
70617c478bd9Sstevel@tonic-gate	stxa	%l2, [%i0+0x0]%asi
70627c478bd9Sstevel@tonic-gate	stxa	%l3, [%i0+0x8]%asi
70637c478bd9Sstevel@tonic-gate
70647c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
70657c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2
70667c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6)	! merge %l2 with %l5 and
70677c478bd9Sstevel@tonic-gate							! %l4 from previous read
70687c478bd9Sstevel@tonic-gate							! into %l4 and %l5
70697c478bd9Sstevel@tonic-gate	stxa	%l4, [%i0+0x10]%asi
70707c478bd9Sstevel@tonic-gate	stxa	%l5, [%i0+0x18]%asi
70717c478bd9Sstevel@tonic-gate
70727c478bd9Sstevel@tonic-gate	! Repeat the same for next 32 bytes.
70737c478bd9Sstevel@tonic-gate
70747c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
70757c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4
70767c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6)
70777c478bd9Sstevel@tonic-gate
70787c478bd9Sstevel@tonic-gate	stxa	%l2, [%i0+0x20]%asi
70797c478bd9Sstevel@tonic-gate	stxa	%l3, [%i0+0x28]%asi
70807c478bd9Sstevel@tonic-gate
70817c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
70827c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2
70837c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6)
70847c478bd9Sstevel@tonic-gate
70857c478bd9Sstevel@tonic-gate	stxa	%l4, [%i0+0x30]%asi
70867c478bd9Sstevel@tonic-gate	stxa	%l5, [%i0+0x38]%asi
70877c478bd9Sstevel@tonic-gate
70887c478bd9Sstevel@tonic-gate	add	%l0, 0x40, %l0
70897c478bd9Sstevel@tonic-gate	subcc	%i3, 0x40, %i3
70907c478bd9Sstevel@tonic-gate	bgu,pt	%xcc, .ci_loop1
70917c478bd9Sstevel@tonic-gate	add	%i0, 0x40, %i0
70927c478bd9Sstevel@tonic-gate	ba	.ci_blkdone
70937c478bd9Sstevel@tonic-gate	add	%i1, %o2, %i1		! increment the source by src offset
70947c478bd9Sstevel@tonic-gate					! the src offset was stored in %o2
70957c478bd9Sstevel@tonic-gate
70967c478bd9Sstevel@tonic-gate.ci_upper_double:
70977c478bd9Sstevel@tonic-gate
70987c478bd9Sstevel@tonic-gate	sub	%i1, %o2, %i1		! align the src at 16 bytes.
70997c478bd9Sstevel@tonic-gate	sub	%o2, 0x8, %o0
71007c478bd9Sstevel@tonic-gate	sll	%o0, 3, %o0		! %o0 left shift
71017c478bd9Sstevel@tonic-gate	mov	0x40, %o1
71027c478bd9Sstevel@tonic-gate	sub	%o1, %o0, %o1		! %o1 right shift = (64 - left shift)
71037c478bd9Sstevel@tonic-gate	andn	%i1, 0x3f, %l0		! %l0 has block aligned source
7104d142717dSae112802	prefetcha [%l0]ASI_USER, #one_read
71057c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2	! partial data in %l3
71067c478bd9Sstevel@tonic-gate							! for this read and
71077c478bd9Sstevel@tonic-gate							! no data in %l2
7108d142717dSae112802	add	%l0, 0x40, %l0
71097c478bd9Sstevel@tonic-gate.ci_loop2:
71107c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
71117c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4	! %l4 has complete data
71127c478bd9Sstevel@tonic-gate							! and %l5 has partial
71137c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6)	! merge %l3, %l4 and %l5
71147c478bd9Sstevel@tonic-gate							! into %l3 and %l4
7115d142717dSae112802	prefetcha [%l0]ASI_USER, #one_read
71167c478bd9Sstevel@tonic-gate
71177c478bd9Sstevel@tonic-gate	stxa	%l3, [%i0+0x0]%asi
71187c478bd9Sstevel@tonic-gate	stxa	%l4, [%i0+0x8]%asi
71197c478bd9Sstevel@tonic-gate
71207c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
71217c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2
71227c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6)	! merge %l2 and %l3 with
71237c478bd9Sstevel@tonic-gate							! %l5 from previous read
71247c478bd9Sstevel@tonic-gate							! into %l5 and %l2
71257c478bd9Sstevel@tonic-gate
71267c478bd9Sstevel@tonic-gate	stxa	%l5, [%i0+0x10]%asi
71277c478bd9Sstevel@tonic-gate	stxa	%l2, [%i0+0x18]%asi
71287c478bd9Sstevel@tonic-gate
71297c478bd9Sstevel@tonic-gate	! Repeat the same for next 32 bytes.
71307c478bd9Sstevel@tonic-gate
71317c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
71327c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4
71337c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6)
71347c478bd9Sstevel@tonic-gate
71357c478bd9Sstevel@tonic-gate	stxa	%l3, [%i0+0x20]%asi
71367c478bd9Sstevel@tonic-gate	stxa	%l4, [%i0+0x28]%asi
71377c478bd9Sstevel@tonic-gate
71387c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
71397c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2
71407c478bd9Sstevel@tonic-gate	ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6)
71417c478bd9Sstevel@tonic-gate
71427c478bd9Sstevel@tonic-gate	stxa	%l5, [%i0+0x30]%asi
71437c478bd9Sstevel@tonic-gate	stxa	%l2, [%i0+0x38]%asi
71447c478bd9Sstevel@tonic-gate
71457c478bd9Sstevel@tonic-gate	add	%l0, 0x40, %l0
71467c478bd9Sstevel@tonic-gate	subcc	%i3, 0x40, %i3
71477c478bd9Sstevel@tonic-gate	bgu,pt	%xcc, .ci_loop2
71487c478bd9Sstevel@tonic-gate	add	%i0, 0x40, %i0
71497c478bd9Sstevel@tonic-gate	ba	.ci_blkdone
71507c478bd9Sstevel@tonic-gate	add	%i1, %o2, %i1		! increment the source by src offset
71517c478bd9Sstevel@tonic-gate					! the src offset was stored in %o2
71527c478bd9Sstevel@tonic-gate
71537c478bd9Sstevel@tonic-gate
71547c478bd9Sstevel@tonic-gate	! Do fast copy using ASI_BLK_INIT_ST_QUAD_LDD_P
71557c478bd9Sstevel@tonic-gate.ci_blkcpy:
71567c478bd9Sstevel@tonic-gate
71577c478bd9Sstevel@tonic-gate	andn	%i1, 0x3f, %o0		! %o0 has block aligned source
7158d142717dSae112802	prefetcha [%o0]ASI_USER, #one_read
7159d142717dSae112802	add	%o0, 0x40, %o0
71607c478bd9Sstevel@tonic-gate1:
71617c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l0
71627c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
71637c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2
71647c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
71657c478bd9Sstevel@tonic-gate
7166d142717dSae112802	prefetcha [%o0]ASI_USER, #one_read
71677c478bd9Sstevel@tonic-gate
71687c478bd9Sstevel@tonic-gate	stxa	%l0, [%i0+0x0]%asi
71697c478bd9Sstevel@tonic-gate
71707c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4
71717c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
71727c478bd9Sstevel@tonic-gate	ldda	[%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l6
71737c478bd9Sstevel@tonic-gate	add	%i1, 0x10, %i1
71747c478bd9Sstevel@tonic-gate
71757c478bd9Sstevel@tonic-gate	stxa	%l1, [%i0+0x8]%asi
71767c478bd9Sstevel@tonic-gate	stxa	%l2, [%i0+0x10]%asi
71777c478bd9Sstevel@tonic-gate	stxa	%l3, [%i0+0x18]%asi
71787c478bd9Sstevel@tonic-gate	stxa	%l4, [%i0+0x20]%asi
71797c478bd9Sstevel@tonic-gate	stxa	%l5, [%i0+0x28]%asi
71807c478bd9Sstevel@tonic-gate	stxa	%l6, [%i0+0x30]%asi
71817c478bd9Sstevel@tonic-gate	stxa	%l7, [%i0+0x38]%asi
71827c478bd9Sstevel@tonic-gate
71837c478bd9Sstevel@tonic-gate	add	%o0, 0x40, %o0
71847c478bd9Sstevel@tonic-gate	subcc	%i3, 0x40, %i3
71857c478bd9Sstevel@tonic-gate	bgu,pt	%xcc, 1b
71867c478bd9Sstevel@tonic-gate	add	%i0, 0x40, %i0
71877c478bd9Sstevel@tonic-gate
71887c478bd9Sstevel@tonic-gate.ci_blkdone:
71897c478bd9Sstevel@tonic-gate	membar	#Sync
71907c478bd9Sstevel@tonic-gate
7191340af271Swh94709	brz,pt	%i2, .copyin_exit
71927c478bd9Sstevel@tonic-gate	nop
71937c478bd9Sstevel@tonic-gate
7194340af271Swh94709	! Handle trailing bytes
7195340af271Swh94709	cmp	%i2, 0x8
7196340af271Swh94709	blu,pt	%ncc, .ci_residue
71977c478bd9Sstevel@tonic-gate	nop
71987c478bd9Sstevel@tonic-gate
7199340af271Swh94709	! Can we do some 8B ops
7200340af271Swh94709	or	%i1, %i0, %o2
7201340af271Swh94709	andcc	%o2, 0x7, %g0
7202340af271Swh94709	bnz	%ncc, .ci_last4
7203340af271Swh94709	nop
72047c478bd9Sstevel@tonic-gate
7205340af271Swh94709	! Do 8byte ops as long as possible
7206340af271Swh94709.ci_last8:
72077c478bd9Sstevel@tonic-gate	ldxa	[%i1]ASI_USER, %o2
72087c478bd9Sstevel@tonic-gate	stx	%o2, [%i0]
72097c478bd9Sstevel@tonic-gate	add	%i1, 0x8, %i1
7210340af271Swh94709	sub	%i2, 0x8, %i2
7211340af271Swh94709	cmp	%i2, 0x8
7212340af271Swh94709	bgu,pt	%ncc, .ci_last8
72137c478bd9Sstevel@tonic-gate	add	%i0, 0x8, %i0
72147c478bd9Sstevel@tonic-gate
7215340af271Swh94709	brz,pt	%i2, .copyin_exit
7216340af271Swh94709	nop
7217340af271Swh94709
7218340af271Swh94709	ba	.ci_residue
7219340af271Swh94709	nop
7220340af271Swh94709
7221340af271Swh94709.ci_last4:
7222340af271Swh94709	! Can we do 4B ops
7223340af271Swh94709	andcc	%o2, 0x3, %g0
7224340af271Swh94709	bnz	%ncc, .ci_last2
7225340af271Swh94709	nop
7226340af271Swh947091:
7227340af271Swh94709	lda	[%i1]ASI_USER, %o2
7228340af271Swh94709	st	%o2, [%i0]
7229340af271Swh94709	add	%i1, 0x4, %i1
7230340af271Swh94709	sub	%i2, 0x4, %i2
7231340af271Swh94709	cmp	%i2, 0x4
7232340af271Swh94709	bgu,pt	%ncc, 1b
7233340af271Swh94709	add	%i0, 0x4, %i0
7234340af271Swh94709
7235340af271Swh94709	brz,pt	%i2, .copyin_exit
7236340af271Swh94709	nop
7237340af271Swh94709
7238340af271Swh94709	ba	.ci_residue
7239340af271Swh94709	nop
7240340af271Swh94709
7241340af271Swh94709.ci_last2:
7242340af271Swh94709	! Can we do 2B ops
7243340af271Swh94709	andcc	%o2, 0x1, %g0
7244340af271Swh94709	bnz	%ncc, .ci_residue
7245340af271Swh94709	nop
7246340af271Swh94709
7247340af271Swh947091:
7248340af271Swh94709	lduha	[%i1]ASI_USER, %o2
7249340af271Swh94709	stuh	%o2, [%i0]
7250340af271Swh94709	add	%i1, 0x2, %i1
7251340af271Swh94709	sub	%i2, 0x2, %i2
7252340af271Swh94709	cmp	%i2, 0x2
7253340af271Swh94709	bgu,pt	%ncc, 1b
7254340af271Swh94709	add	%i0, 0x2, %i0
7255340af271Swh94709
7256340af271Swh94709	brz,pt	%i2, .copyin_exit
72577c478bd9Sstevel@tonic-gate	nop
72587c478bd9Sstevel@tonic-gate
72597c478bd9Sstevel@tonic-gate	! Copy the residue as byte copy
72607c478bd9Sstevel@tonic-gate.ci_residue:
72617c478bd9Sstevel@tonic-gate	lduba	[%i1]ASI_USER, %i4
72627c478bd9Sstevel@tonic-gate	stb	%i4, [%i0]
72637c478bd9Sstevel@tonic-gate	inc	%i1
72647c478bd9Sstevel@tonic-gate	deccc	%i2
7265340af271Swh94709	bgu,pt	%xcc, .ci_residue
72667c478bd9Sstevel@tonic-gate	inc	%i0
72677c478bd9Sstevel@tonic-gate
72687c478bd9Sstevel@tonic-gate.copyin_exit:
72697c478bd9Sstevel@tonic-gate	membar	#Sync
72707c478bd9Sstevel@tonic-gate	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
72717c478bd9Sstevel@tonic-gate	ret
72727c478bd9Sstevel@tonic-gate	restore	%g0, 0, %o0
72737c478bd9Sstevel@tonic-gate.copyin_err:
72747c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_COPYOPS], %o4
72757c478bd9Sstevel@tonic-gate	brz	%o4, 2f
72767c478bd9Sstevel@tonic-gate	nop
72777c478bd9Sstevel@tonic-gate	ldn	[%o4 + CP_COPYIN], %g2
72787c478bd9Sstevel@tonic-gate	jmp	%g2
72797c478bd9Sstevel@tonic-gate	nop
72807c478bd9Sstevel@tonic-gate2:
72817c478bd9Sstevel@tonic-gate	retl
72827c478bd9Sstevel@tonic-gate	mov	-1, %o0
7283*280575beSPatrick McGehearty#endif	/* NIAGARA_IMPL */
72847c478bd9Sstevel@tonic-gate	SET_SIZE(copyin)
72857c478bd9Sstevel@tonic-gate
72867c478bd9Sstevel@tonic-gate#endif	/* lint */
72877c478bd9Sstevel@tonic-gate
72887c478bd9Sstevel@tonic-gate#ifdef	lint
72897c478bd9Sstevel@tonic-gate
72907c478bd9Sstevel@tonic-gate/*ARGSUSED*/
72917c478bd9Sstevel@tonic-gateint
72927c478bd9Sstevel@tonic-gatexcopyin(const void *uaddr, void *kaddr, size_t count)
72937c478bd9Sstevel@tonic-gate{ return (0); }
72947c478bd9Sstevel@tonic-gate
72957c478bd9Sstevel@tonic-gate#else	/* lint */
72967c478bd9Sstevel@tonic-gate
72977c478bd9Sstevel@tonic-gate	ENTRY(xcopyin)
72987c478bd9Sstevel@tonic-gate	sethi	%hi(.xcopyin_err), REAL_LOFAULT
72997c478bd9Sstevel@tonic-gate	b	.do_copyin
73007c478bd9Sstevel@tonic-gate	or	REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT
73017c478bd9Sstevel@tonic-gate.xcopyin_err:
73027c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_COPYOPS], %o4
73037c478bd9Sstevel@tonic-gate	brz	%o4, 2f
73047c478bd9Sstevel@tonic-gate	nop
73057c478bd9Sstevel@tonic-gate	ldn	[%o4 + CP_XCOPYIN], %g2
73067c478bd9Sstevel@tonic-gate	jmp	%g2
73077c478bd9Sstevel@tonic-gate	nop
73087c478bd9Sstevel@tonic-gate2:
73097c478bd9Sstevel@tonic-gate	retl
73107c478bd9Sstevel@tonic-gate	mov	%g1, %o0
73117c478bd9Sstevel@tonic-gate	SET_SIZE(xcopyin)
73127c478bd9Sstevel@tonic-gate
73137c478bd9Sstevel@tonic-gate#endif	/* lint */
73147c478bd9Sstevel@tonic-gate
73157c478bd9Sstevel@tonic-gate#ifdef	lint
73167c478bd9Sstevel@tonic-gate
73177c478bd9Sstevel@tonic-gate/*ARGSUSED*/
73187c478bd9Sstevel@tonic-gateint
73197c478bd9Sstevel@tonic-gatexcopyin_little(const void *uaddr, void *kaddr, size_t count)
73207c478bd9Sstevel@tonic-gate{ return (0); }
73217c478bd9Sstevel@tonic-gate
73227c478bd9Sstevel@tonic-gate#else	/* lint */
73237c478bd9Sstevel@tonic-gate
73247c478bd9Sstevel@tonic-gate	ENTRY(xcopyin_little)
73257c478bd9Sstevel@tonic-gate	sethi	%hi(.little_err), %o4
73267c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], %o5
73277c478bd9Sstevel@tonic-gate	or	%o4, %lo(.little_err), %o4
73287c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
73297c478bd9Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]
73307c478bd9Sstevel@tonic-gate
73317c478bd9Sstevel@tonic-gate	subcc	%g0, %o2, %o3
73327c478bd9Sstevel@tonic-gate	add	%o0, %o2, %o0
73337c478bd9Sstevel@tonic-gate	bz,pn	%ncc, 2f		! check for zero bytes
73347c478bd9Sstevel@tonic-gate	sub	%o2, 1, %o4
73357c478bd9Sstevel@tonic-gate	add	%o0, %o4, %o0		! start w/last byte
73367c478bd9Sstevel@tonic-gate	add	%o1, %o2, %o1
73377c478bd9Sstevel@tonic-gate	lduba	[%o0+%o3]ASI_AIUSL, %o4
73387c478bd9Sstevel@tonic-gate
73397c478bd9Sstevel@tonic-gate1:	stb	%o4, [%o1+%o3]
73407c478bd9Sstevel@tonic-gate	inccc	%o3
73417c478bd9Sstevel@tonic-gate	sub	%o0, 2, %o0		! get next byte
73427c478bd9Sstevel@tonic-gate	bcc,a,pt %ncc, 1b
73437c478bd9Sstevel@tonic-gate	lduba	[%o0+%o3]ASI_AIUSL, %o4
73447c478bd9Sstevel@tonic-gate
73457c478bd9Sstevel@tonic-gate2:	membar	#Sync				! sync error barrier
73467c478bd9Sstevel@tonic-gate	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
73477c478bd9Sstevel@tonic-gate	retl
73487c478bd9Sstevel@tonic-gate	mov	%g0, %o0		! return (0)
73497c478bd9Sstevel@tonic-gate
73507c478bd9Sstevel@tonic-gate.little_err:
73517c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
73527c478bd9Sstevel@tonic-gate	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
73537c478bd9Sstevel@tonic-gate	retl
73547c478bd9Sstevel@tonic-gate	mov	%g1, %o0
73557c478bd9Sstevel@tonic-gate	SET_SIZE(xcopyin_little)
73567c478bd9Sstevel@tonic-gate
73577c478bd9Sstevel@tonic-gate#endif	/* lint */
73587c478bd9Sstevel@tonic-gate
73597c478bd9Sstevel@tonic-gate
73607c478bd9Sstevel@tonic-gate/*
73617c478bd9Sstevel@tonic-gate * Copy a block of storage - must not overlap (from + len <= to).
73627c478bd9Sstevel@tonic-gate * No fault handler installed (to be called under on_fault())
73637c478bd9Sstevel@tonic-gate */
73647c478bd9Sstevel@tonic-gate#if defined(lint)
73657c478bd9Sstevel@tonic-gate
73667c478bd9Sstevel@tonic-gate/* ARGSUSED */
73677c478bd9Sstevel@tonic-gatevoid
73687c478bd9Sstevel@tonic-gatecopyin_noerr(const void *ufrom, void *kto, size_t count)
73697c478bd9Sstevel@tonic-gate{}
73707c478bd9Sstevel@tonic-gate
73717c478bd9Sstevel@tonic-gate#else	/* lint */
73727c478bd9Sstevel@tonic-gate
73737c478bd9Sstevel@tonic-gate	ENTRY(copyin_noerr)
73747c478bd9Sstevel@tonic-gate	sethi	%hi(.copyio_noerr), REAL_LOFAULT
73757c478bd9Sstevel@tonic-gate	b	.do_copyin
73767c478bd9Sstevel@tonic-gate	or	REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
73777c478bd9Sstevel@tonic-gate.copyio_noerr:
73787c478bd9Sstevel@tonic-gate	jmp	SAVED_LOFAULT
73797c478bd9Sstevel@tonic-gate	nop
73807c478bd9Sstevel@tonic-gate	SET_SIZE(copyin_noerr)
73817c478bd9Sstevel@tonic-gate
73827c478bd9Sstevel@tonic-gate#endif /* lint */
73837c478bd9Sstevel@tonic-gate
73847c478bd9Sstevel@tonic-gate/*
73857c478bd9Sstevel@tonic-gate * Copy a block of storage - must not overlap (from + len <= to).
73867c478bd9Sstevel@tonic-gate * No fault handler installed (to be called under on_fault())
73877c478bd9Sstevel@tonic-gate */
73887c478bd9Sstevel@tonic-gate
73897c478bd9Sstevel@tonic-gate#if defined(lint)
73907c478bd9Sstevel@tonic-gate
73917c478bd9Sstevel@tonic-gate/* ARGSUSED */
73927c478bd9Sstevel@tonic-gatevoid
73937c478bd9Sstevel@tonic-gatecopyout_noerr(const void *kfrom, void *uto, size_t count)
73947c478bd9Sstevel@tonic-gate{}
73957c478bd9Sstevel@tonic-gate
73967c478bd9Sstevel@tonic-gate#else	/* lint */
73977c478bd9Sstevel@tonic-gate
73987c478bd9Sstevel@tonic-gate	ENTRY(copyout_noerr)
73997c478bd9Sstevel@tonic-gate	sethi	%hi(.copyio_noerr), REAL_LOFAULT
74007c478bd9Sstevel@tonic-gate	b	.do_copyout
74017c478bd9Sstevel@tonic-gate	or	REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
74027c478bd9Sstevel@tonic-gate	SET_SIZE(copyout_noerr)
74037c478bd9Sstevel@tonic-gate
74047c478bd9Sstevel@tonic-gate#endif /* lint */
74057c478bd9Sstevel@tonic-gate
74067c478bd9Sstevel@tonic-gate#if defined(lint)
74077c478bd9Sstevel@tonic-gate
74087c478bd9Sstevel@tonic-gateint use_hw_bcopy = 1;
74097c478bd9Sstevel@tonic-gateint use_hw_bzero = 1;
74107c478bd9Sstevel@tonic-gateuint_t hw_copy_limit_1 = 0x100;
74117c478bd9Sstevel@tonic-gateuint_t hw_copy_limit_2 = 0x200;
74127c478bd9Sstevel@tonic-gateuint_t hw_copy_limit_4 = 0x400;
74137c478bd9Sstevel@tonic-gateuint_t hw_copy_limit_8 = 0x400;
74147c478bd9Sstevel@tonic-gate
74157c478bd9Sstevel@tonic-gate#else /* !lint */
74167c478bd9Sstevel@tonic-gate
74177c478bd9Sstevel@tonic-gate	.align	4
74187c478bd9Sstevel@tonic-gate	DGDEF(use_hw_bcopy)
74197c478bd9Sstevel@tonic-gate	.word	1
74207c478bd9Sstevel@tonic-gate	DGDEF(use_hw_bzero)
74217c478bd9Sstevel@tonic-gate	.word	1
74227c478bd9Sstevel@tonic-gate	DGDEF(hw_copy_limit_1)
74237c478bd9Sstevel@tonic-gate	.word	0x100
74247c478bd9Sstevel@tonic-gate	DGDEF(hw_copy_limit_2)
74257c478bd9Sstevel@tonic-gate	.word	0x200
74267c478bd9Sstevel@tonic-gate	DGDEF(hw_copy_limit_4)
74277c478bd9Sstevel@tonic-gate	.word	0x400
74287c478bd9Sstevel@tonic-gate	DGDEF(hw_copy_limit_8)
74297c478bd9Sstevel@tonic-gate	.word	0x400
74307c478bd9Sstevel@tonic-gate
74317c478bd9Sstevel@tonic-gate	.align	64
74327c478bd9Sstevel@tonic-gate	.section ".text"
74337c478bd9Sstevel@tonic-gate#endif /* !lint */
74347c478bd9Sstevel@tonic-gate
74357c478bd9Sstevel@tonic-gate/*
74367c478bd9Sstevel@tonic-gate * hwblkclr - clears block-aligned, block-multiple-sized regions that are
74377c478bd9Sstevel@tonic-gate * longer than 256 bytes in length using Niagara's block stores/quad store.
74387c478bd9Sstevel@tonic-gate * If the criteria for using this routine are not met then it calls bzero
74397c478bd9Sstevel@tonic-gate * and returns 1.  Otherwise 0 is returned indicating success.
74407c478bd9Sstevel@tonic-gate * Caller is responsible for ensuring use_hw_bzero is true and that
74417c478bd9Sstevel@tonic-gate * kpreempt_disable() has been called.
74427c478bd9Sstevel@tonic-gate */
74437c478bd9Sstevel@tonic-gate#ifdef lint
74447c478bd9Sstevel@tonic-gate/*ARGSUSED*/
74457c478bd9Sstevel@tonic-gateint
74467c478bd9Sstevel@tonic-gatehwblkclr(void *addr, size_t len)
74477c478bd9Sstevel@tonic-gate{
74487c478bd9Sstevel@tonic-gate	return(0);
74497c478bd9Sstevel@tonic-gate}
74507c478bd9Sstevel@tonic-gate#else /* lint */
74517c478bd9Sstevel@tonic-gate	! %i0 - start address
74527c478bd9Sstevel@tonic-gate	! %i1 - length of region (multiple of 64)
74537c478bd9Sstevel@tonic-gate
74547c478bd9Sstevel@tonic-gate	ENTRY(hwblkclr)
74557c478bd9Sstevel@tonic-gate	save	%sp, -SA(MINFRAME), %sp
74567c478bd9Sstevel@tonic-gate
74577c478bd9Sstevel@tonic-gate	! Must be block-aligned
74587c478bd9Sstevel@tonic-gate	andcc	%i0, 0x3f, %g0
74597c478bd9Sstevel@tonic-gate	bnz,pn	%ncc, 1f
74607c478bd9Sstevel@tonic-gate	nop
74617c478bd9Sstevel@tonic-gate
74627c478bd9Sstevel@tonic-gate	! ... and must be 256 bytes or more
74637c478bd9Sstevel@tonic-gate	cmp	%i1, 0x100
74647c478bd9Sstevel@tonic-gate	blu,pn	%ncc, 1f
74657c478bd9Sstevel@tonic-gate	nop
74667c478bd9Sstevel@tonic-gate
74677c478bd9Sstevel@tonic-gate	! ... and length must be a multiple of 64
74687c478bd9Sstevel@tonic-gate	andcc	%i1, 0x3f, %g0
74697c478bd9Sstevel@tonic-gate	bz,pn	%ncc, .pz_doblock
74707c478bd9Sstevel@tonic-gate	mov	ASI_BLK_INIT_ST_QUAD_LDD_P, %asi
74717c478bd9Sstevel@tonic-gate
74727c478bd9Sstevel@tonic-gate1:	! punt, call bzero but notify the caller that bzero was used
74737c478bd9Sstevel@tonic-gate	mov	%i0, %o0
74747c478bd9Sstevel@tonic-gate	call	bzero
74757c478bd9Sstevel@tonic-gate	mov	%i1, %o1
74767c478bd9Sstevel@tonic-gate	ret
74777c478bd9Sstevel@tonic-gate	restore	%g0, 1, %o0	! return (1) - did not use block operations
74787c478bd9Sstevel@tonic-gate
74797c478bd9Sstevel@tonic-gate	! Already verified that there are at least 256 bytes to set
74807c478bd9Sstevel@tonic-gate.pz_doblock:
74817c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x0]%asi
74827c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x40]%asi
74837c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x80]%asi
74847c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0xc0]%asi
74857c478bd9Sstevel@tonic-gate
74867c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x8]%asi
74877c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x10]%asi
74887c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x18]%asi
74897c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x20]%asi
74907c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x28]%asi
74917c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x30]%asi
74927c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x38]%asi
74937c478bd9Sstevel@tonic-gate
74947c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x48]%asi
74957c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x50]%asi
74967c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x58]%asi
74977c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x60]%asi
74987c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x68]%asi
74997c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x70]%asi
75007c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x78]%asi
75017c478bd9Sstevel@tonic-gate
75027c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x88]%asi
75037c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x90]%asi
75047c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x98]%asi
75057c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0xa0]%asi
75067c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0xa8]%asi
75077c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0xb0]%asi
75087c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0xb8]%asi
75097c478bd9Sstevel@tonic-gate
75107c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0xc8]%asi
75117c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0xd0]%asi
75127c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0xd8]%asi
75137c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0xe0]%asi
75147c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0xe8]%asi
75157c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0xf0]%asi
75167c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0xf8]%asi
75177c478bd9Sstevel@tonic-gate
75187c478bd9Sstevel@tonic-gate	sub	%i1, 0x100, %i1
75197c478bd9Sstevel@tonic-gate	cmp	%i1, 0x100
75207c478bd9Sstevel@tonic-gate	bgu,pt	%ncc, .pz_doblock
75217c478bd9Sstevel@tonic-gate	add	%i0, 0x100, %i0
75227c478bd9Sstevel@tonic-gate
75237c478bd9Sstevel@tonic-gate2:
75247c478bd9Sstevel@tonic-gate	! Check if more than 64 bytes to set
75257c478bd9Sstevel@tonic-gate	cmp	%i1,0x40
75267c478bd9Sstevel@tonic-gate	blu	%ncc, .pz_finish
75277c478bd9Sstevel@tonic-gate	nop
75287c478bd9Sstevel@tonic-gate
75297c478bd9Sstevel@tonic-gate3:
75307c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x0]%asi
75317c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x8]%asi
75327c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x10]%asi
75337c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x18]%asi
75347c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x20]%asi
75357c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x28]%asi
75367c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x30]%asi
75377c478bd9Sstevel@tonic-gate	stxa	%g0, [%i0+0x38]%asi
75387c478bd9Sstevel@tonic-gate
75397c478bd9Sstevel@tonic-gate	subcc	%i1, 0x40, %i1
75407c478bd9Sstevel@tonic-gate	bgu,pt	%ncc, 3b
75417c478bd9Sstevel@tonic-gate	add	%i0, 0x40, %i0
75427c478bd9Sstevel@tonic-gate
75437c478bd9Sstevel@tonic-gate.pz_finish:
75447c478bd9Sstevel@tonic-gate	membar	#Sync
75457c478bd9Sstevel@tonic-gate	ret
75467c478bd9Sstevel@tonic-gate	restore	%g0, 0, %o0		! return (bzero or not)
75477c478bd9Sstevel@tonic-gate	SET_SIZE(hwblkclr)
75487c478bd9Sstevel@tonic-gate#endif	/* lint */
75497c478bd9Sstevel@tonic-gate
75507c478bd9Sstevel@tonic-gate#ifdef	lint
75517c478bd9Sstevel@tonic-gate/* Copy 32 bytes of data from src to dst using physical addresses */
75527c478bd9Sstevel@tonic-gate/*ARGSUSED*/
75537c478bd9Sstevel@tonic-gatevoid
75547c478bd9Sstevel@tonic-gatehw_pa_bcopy32(uint64_t src, uint64_t dst)
75557c478bd9Sstevel@tonic-gate{}
75567c478bd9Sstevel@tonic-gate#else	/*!lint */
75577c478bd9Sstevel@tonic-gate
75587c478bd9Sstevel@tonic-gate	/*
75597c478bd9Sstevel@tonic-gate	 * Copy 32 bytes of data from src (%o0) to dst (%o1)
75607c478bd9Sstevel@tonic-gate	 * using physical addresses.
75617c478bd9Sstevel@tonic-gate	 */
75627c478bd9Sstevel@tonic-gate	ENTRY_NP(hw_pa_bcopy32)
75637c478bd9Sstevel@tonic-gate	rdpr	%pstate, %g1
75647c478bd9Sstevel@tonic-gate	andn	%g1, PSTATE_IE, %g2
75657c478bd9Sstevel@tonic-gate	wrpr	%g0, %g2, %pstate
75667c478bd9Sstevel@tonic-gate
75677c478bd9Sstevel@tonic-gate	ldxa	[%o0]ASI_MEM, %o2
75687c478bd9Sstevel@tonic-gate	add	%o0, 8, %o0
75697c478bd9Sstevel@tonic-gate	ldxa	[%o0]ASI_MEM, %o3
75707c478bd9Sstevel@tonic-gate	add	%o0, 8, %o0
75717c478bd9Sstevel@tonic-gate	ldxa	[%o0]ASI_MEM, %o4
75727c478bd9Sstevel@tonic-gate	add	%o0, 8, %o0
75737c478bd9Sstevel@tonic-gate	ldxa	[%o0]ASI_MEM, %o5
75747c478bd9Sstevel@tonic-gate	stxa	%o2, [%o1]ASI_MEM
75757c478bd9Sstevel@tonic-gate	add	%o1, 8, %o1
75767c478bd9Sstevel@tonic-gate	stxa	%o3, [%o1]ASI_MEM
75777c478bd9Sstevel@tonic-gate	add	%o1, 8, %o1
75787c478bd9Sstevel@tonic-gate	stxa	%o4, [%o1]ASI_MEM
75797c478bd9Sstevel@tonic-gate	add	%o1, 8, %o1
75807c478bd9Sstevel@tonic-gate	stxa	%o5, [%o1]ASI_MEM
75817c478bd9Sstevel@tonic-gate
75827c478bd9Sstevel@tonic-gate	membar	#Sync
75837c478bd9Sstevel@tonic-gate	retl
75847c478bd9Sstevel@tonic-gate	wrpr	%g0, %g1, %pstate
75857c478bd9Sstevel@tonic-gate	SET_SIZE(hw_pa_bcopy32)
75867c478bd9Sstevel@tonic-gate#endif /* lint */
75877c478bd9Sstevel@tonic-gate
75887c478bd9Sstevel@tonic-gate/*
75897c478bd9Sstevel@tonic-gate * Zero a block of storage.
75907c478bd9Sstevel@tonic-gate *
75917c478bd9Sstevel@tonic-gate * uzero is used by the kernel to zero a block in user address space.
75927c478bd9Sstevel@tonic-gate */
75937c478bd9Sstevel@tonic-gate
75947c478bd9Sstevel@tonic-gate/*
75957c478bd9Sstevel@tonic-gate * Control flow of the bzero/kzero/uzero routine.
75967c478bd9Sstevel@tonic-gate *
75977c478bd9Sstevel@tonic-gate *	For fewer than 7 bytes stores, bytes will be zeroed.
75987c478bd9Sstevel@tonic-gate *
75997c478bd9Sstevel@tonic-gate *	For less than 15 bytes stores, align the address on 4 byte boundary.
76007c478bd9Sstevel@tonic-gate *	Then store as many 4-byte chunks, followed by trailing bytes.
76017c478bd9Sstevel@tonic-gate *
76027c478bd9Sstevel@tonic-gate *	For sizes greater than 15 bytes, align the address on 8 byte boundary.
76037c478bd9Sstevel@tonic-gate *	if (count > 128) {
76047c478bd9Sstevel@tonic-gate *		store as many 8-bytes chunks to block align the address
76057c478bd9Sstevel@tonic-gate *		store using ASI_BLK_INIT_ST_QUAD_LDD_P (bzero/kzero) OR
76067c478bd9Sstevel@tonic-gate *		store using ASI_BLK_INIT_QUAD_LDD_AIUS (uzero)
76077c478bd9Sstevel@tonic-gate *	}
76087c478bd9Sstevel@tonic-gate *	Store as many 8-byte chunks, followed by trailing bytes.
76097c478bd9Sstevel@tonic-gate */
76107c478bd9Sstevel@tonic-gate
76117c478bd9Sstevel@tonic-gate#if defined(lint)
76127c478bd9Sstevel@tonic-gate
76137c478bd9Sstevel@tonic-gate/* ARGSUSED */
76147c478bd9Sstevel@tonic-gateint
76157c478bd9Sstevel@tonic-gatekzero(void *addr, size_t count)
76167c478bd9Sstevel@tonic-gate{ return(0); }
76177c478bd9Sstevel@tonic-gate
76187c478bd9Sstevel@tonic-gate/* ARGSUSED */
76197c478bd9Sstevel@tonic-gatevoid
76207c478bd9Sstevel@tonic-gateuzero(void *addr, size_t count)
76217c478bd9Sstevel@tonic-gate{}
76227c478bd9Sstevel@tonic-gate
76237c478bd9Sstevel@tonic-gate#else	/* lint */
76247c478bd9Sstevel@tonic-gate
76257c478bd9Sstevel@tonic-gate	ENTRY(uzero)
76267c478bd9Sstevel@tonic-gate	!
76277c478bd9Sstevel@tonic-gate	! Set a new lo_fault handler only if we came in with one
76287c478bd9Sstevel@tonic-gate	! already specified.
76297c478bd9Sstevel@tonic-gate	!
76307c478bd9Sstevel@tonic-gate	wr	%g0, ASI_USER, %asi
76317c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], %o5
76327c478bd9Sstevel@tonic-gate	tst	%o5
76337c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .do_zero
76347c478bd9Sstevel@tonic-gate	sethi	%hi(.zeroerr), %o2
76357c478bd9Sstevel@tonic-gate	or	%o2, %lo(.zeroerr), %o2
76367c478bd9Sstevel@tonic-gate	membar	#Sync
76377c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .do_zero
76387c478bd9Sstevel@tonic-gate	stn	%o2, [THREAD_REG + T_LOFAULT]
76397c478bd9Sstevel@tonic-gate
76407c478bd9Sstevel@tonic-gate	ENTRY(kzero)
76417c478bd9Sstevel@tonic-gate	!
76427c478bd9Sstevel@tonic-gate	! Always set a lo_fault handler
76437c478bd9Sstevel@tonic-gate	!
76447c478bd9Sstevel@tonic-gate	wr	%g0, ASI_P, %asi
76457c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], %o5
76467c478bd9Sstevel@tonic-gate	sethi	%hi(.zeroerr), %o2
76477c478bd9Sstevel@tonic-gate	or	%o5, LOFAULT_SET, %o5
76487c478bd9Sstevel@tonic-gate	or	%o2, %lo(.zeroerr), %o2
76497c478bd9Sstevel@tonic-gate	membar	#Sync
76507c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .do_zero
76517c478bd9Sstevel@tonic-gate	stn	%o2, [THREAD_REG + T_LOFAULT]
76527c478bd9Sstevel@tonic-gate
76537c478bd9Sstevel@tonic-gate/*
76547c478bd9Sstevel@tonic-gate * We got here because of a fault during kzero or if
76557c478bd9Sstevel@tonic-gate * uzero or bzero was called with t_lofault non-zero.
76567c478bd9Sstevel@tonic-gate * Otherwise we've already run screaming from the room.
76577c478bd9Sstevel@tonic-gate * Errno value is in %g1. Note that we're here iff
76587c478bd9Sstevel@tonic-gate * we did set t_lofault.
76597c478bd9Sstevel@tonic-gate */
76607c478bd9Sstevel@tonic-gate.zeroerr:
76617c478bd9Sstevel@tonic-gate	!
76627c478bd9Sstevel@tonic-gate	! Undo asi register setting. Just set it to be the
76637c478bd9Sstevel@tonic-gate	! kernel default without checking.
76647c478bd9Sstevel@tonic-gate	!
76657c478bd9Sstevel@tonic-gate	wr	%g0, ASI_P, %asi
76667c478bd9Sstevel@tonic-gate
76677c478bd9Sstevel@tonic-gate	!
76687c478bd9Sstevel@tonic-gate	! We did set t_lofault. It may well have been zero coming in.
76697c478bd9Sstevel@tonic-gate	!
76707c478bd9Sstevel@tonic-gate1:
76717c478bd9Sstevel@tonic-gate	tst	%o5
76727c478bd9Sstevel@tonic-gate	membar #Sync
76737c478bd9Sstevel@tonic-gate	bne,pn	%ncc, 3f
76747c478bd9Sstevel@tonic-gate	andncc	%o5, LOFAULT_SET, %o5
76757c478bd9Sstevel@tonic-gate2:
76767c478bd9Sstevel@tonic-gate	!
76777c478bd9Sstevel@tonic-gate	! Old handler was zero. Just return the error.
76787c478bd9Sstevel@tonic-gate	!
76797c478bd9Sstevel@tonic-gate	retl				! return
76807c478bd9Sstevel@tonic-gate	mov	%g1, %o0		! error code from %g1
76817c478bd9Sstevel@tonic-gate3:
76827c478bd9Sstevel@tonic-gate	!
76837c478bd9Sstevel@tonic-gate	! We're here because %o5 was non-zero. It was non-zero
76847c478bd9Sstevel@tonic-gate	! because either LOFAULT_SET was present, a previous fault
76857c478bd9Sstevel@tonic-gate	! handler was present or both. In all cases we need to reset
76867c478bd9Sstevel@tonic-gate	! T_LOFAULT to the value of %o5 after clearing LOFAULT_SET
76877c478bd9Sstevel@tonic-gate	! before we either simply return the error or we invoke the
76887c478bd9Sstevel@tonic-gate	! previously specified handler.
76897c478bd9Sstevel@tonic-gate	!
76907c478bd9Sstevel@tonic-gate	be	%ncc, 2b
76917c478bd9Sstevel@tonic-gate	stn	%o5, [THREAD_REG + T_LOFAULT]
76927c478bd9Sstevel@tonic-gate	jmp	%o5			! goto real handler
76937c478bd9Sstevel@tonic-gate	nop
76947c478bd9Sstevel@tonic-gate	SET_SIZE(kzero)
76957c478bd9Sstevel@tonic-gate	SET_SIZE(uzero)
76967c478bd9Sstevel@tonic-gate
76977c478bd9Sstevel@tonic-gate#endif	/* lint */
76987c478bd9Sstevel@tonic-gate
76997c478bd9Sstevel@tonic-gate/*
77007c478bd9Sstevel@tonic-gate * Zero a block of storage.
77017c478bd9Sstevel@tonic-gate */
77027c478bd9Sstevel@tonic-gate
77037c478bd9Sstevel@tonic-gate#if defined(lint)
77047c478bd9Sstevel@tonic-gate
77057c478bd9Sstevel@tonic-gate/* ARGSUSED */
77067c478bd9Sstevel@tonic-gatevoid
77077c478bd9Sstevel@tonic-gatebzero(void *addr, size_t count)
77087c478bd9Sstevel@tonic-gate{}
77097c478bd9Sstevel@tonic-gate
77107c478bd9Sstevel@tonic-gate#else	/* lint */
77117c478bd9Sstevel@tonic-gate
77127c478bd9Sstevel@tonic-gate	ENTRY(bzero)
77137c478bd9Sstevel@tonic-gate	wr	%g0, ASI_P, %asi
77147c478bd9Sstevel@tonic-gate
77157c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], %o5	! save old vector
77167c478bd9Sstevel@tonic-gate	tst	%o5
77177c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .do_zero
77187c478bd9Sstevel@tonic-gate	sethi	%hi(.zeroerr), %o2
77197c478bd9Sstevel@tonic-gate	or	%o2, %lo(.zeroerr), %o2
77207c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
77217c478bd9Sstevel@tonic-gate	stn	%o2, [THREAD_REG + T_LOFAULT]	! install new vector
77227c478bd9Sstevel@tonic-gate
77237c478bd9Sstevel@tonic-gate.do_zero:
77247c478bd9Sstevel@tonic-gate	cmp	%o1, 7
77257c478bd9Sstevel@tonic-gate	blu,pn	%ncc, .byteclr
77267c478bd9Sstevel@tonic-gate	nop
77277c478bd9Sstevel@tonic-gate
77287c478bd9Sstevel@tonic-gate	cmp	%o1, 15
77297c478bd9Sstevel@tonic-gate	blu,pn	%ncc, .wdalign
77307c478bd9Sstevel@tonic-gate	nop
77317c478bd9Sstevel@tonic-gate
77327c478bd9Sstevel@tonic-gate	andcc	%o0, 7, %o3		! is add aligned on a 8 byte bound
77337c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .blkalign		! already double aligned
77347c478bd9Sstevel@tonic-gate	sub	%o3, 8, %o3		! -(bytes till double aligned)
77357c478bd9Sstevel@tonic-gate	add	%o1, %o3, %o1		! update o1 with new count
77367c478bd9Sstevel@tonic-gate
77377c478bd9Sstevel@tonic-gate1:
77387c478bd9Sstevel@tonic-gate	stba	%g0, [%o0]%asi
77397c478bd9Sstevel@tonic-gate	inccc	%o3
77407c478bd9Sstevel@tonic-gate	bl,pt	%ncc, 1b
77417c478bd9Sstevel@tonic-gate	inc	%o0
77427c478bd9Sstevel@tonic-gate
77437c478bd9Sstevel@tonic-gate	! Now address is double aligned
77447c478bd9Sstevel@tonic-gate.blkalign:
77457c478bd9Sstevel@tonic-gate	cmp	%o1, 0x80		! check if there are 128 bytes to set
77467c478bd9Sstevel@tonic-gate	blu,pn	%ncc, .bzero_small
77477c478bd9Sstevel@tonic-gate	mov	%o1, %o3
77487c478bd9Sstevel@tonic-gate
77497c478bd9Sstevel@tonic-gate	sethi	%hi(use_hw_bzero), %o2
77507c478bd9Sstevel@tonic-gate	ld	[%o2 + %lo(use_hw_bzero)], %o2
77517c478bd9Sstevel@tonic-gate	tst	%o2
77527c478bd9Sstevel@tonic-gate	bz	%ncc, .bzero_small
77537c478bd9Sstevel@tonic-gate	mov	%o1, %o3
77547c478bd9Sstevel@tonic-gate
77557c478bd9Sstevel@tonic-gate	rd	%asi, %o3
77567c478bd9Sstevel@tonic-gate	wr	%g0, ASI_BLK_INIT_ST_QUAD_LDD_P, %asi
77577c478bd9Sstevel@tonic-gate	cmp	%o3, ASI_P
77587c478bd9Sstevel@tonic-gate	bne,a	%ncc, .algnblk
77597c478bd9Sstevel@tonic-gate	wr	%g0, ASI_BLK_INIT_QUAD_LDD_AIUS, %asi
77607c478bd9Sstevel@tonic-gate
77617c478bd9Sstevel@tonic-gate.algnblk:
77627c478bd9Sstevel@tonic-gate	andcc	%o0, 0x3f, %o3		! is block aligned?
77637c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bzero_blk
77647c478bd9Sstevel@tonic-gate	sub	%o3, 0x40, %o3		! -(bytes till block aligned)
77657c478bd9Sstevel@tonic-gate	add	%o1, %o3, %o1		! o1 is the remainder
77667c478bd9Sstevel@tonic-gate
77677c478bd9Sstevel@tonic-gate	! Clear -(%o3) bytes till block aligned
77687c478bd9Sstevel@tonic-gate1:
77697c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0]%asi
77707c478bd9Sstevel@tonic-gate	addcc	%o3, 8, %o3
77717c478bd9Sstevel@tonic-gate	bl,pt	%ncc, 1b
77727c478bd9Sstevel@tonic-gate	add	%o0, 8, %o0
77737c478bd9Sstevel@tonic-gate
77747c478bd9Sstevel@tonic-gate.bzero_blk:
77757c478bd9Sstevel@tonic-gate	and	%o1, 0x3f, %o3		! calc bytes left after blk clear
77767c478bd9Sstevel@tonic-gate	andn	%o1, 0x3f, %o4		! calc size of blocks in bytes
77777c478bd9Sstevel@tonic-gate
77787c478bd9Sstevel@tonic-gate	cmp	%o4, 0x100		! 256 bytes or more
77797c478bd9Sstevel@tonic-gate	blu,pn	%ncc, 3f
77807c478bd9Sstevel@tonic-gate	nop
77817c478bd9Sstevel@tonic-gate
77827c478bd9Sstevel@tonic-gate2:
77837c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x0]%asi
77847c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x40]%asi
77857c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x80]%asi
77867c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0xc0]%asi
77877c478bd9Sstevel@tonic-gate
77887c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x8]%asi
77897c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x10]%asi
77907c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x18]%asi
77917c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x20]%asi
77927c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x28]%asi
77937c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x30]%asi
77947c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x38]%asi
77957c478bd9Sstevel@tonic-gate
77967c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x48]%asi
77977c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x50]%asi
77987c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x58]%asi
77997c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x60]%asi
78007c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x68]%asi
78017c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x70]%asi
78027c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x78]%asi
78037c478bd9Sstevel@tonic-gate
78047c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x88]%asi
78057c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x90]%asi
78067c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x98]%asi
78077c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0xa0]%asi
78087c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0xa8]%asi
78097c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0xb0]%asi
78107c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0xb8]%asi
78117c478bd9Sstevel@tonic-gate
78127c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0xc8]%asi
78137c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0xd0]%asi
78147c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0xd8]%asi
78157c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0xe0]%asi
78167c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0xe8]%asi
78177c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0xf0]%asi
78187c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0xf8]%asi
78197c478bd9Sstevel@tonic-gate
78207c478bd9Sstevel@tonic-gate	sub	%o4, 0x100, %o4
78217c478bd9Sstevel@tonic-gate	cmp	%o4, 0x100
78227c478bd9Sstevel@tonic-gate	bgu,pt	%ncc, 2b
78237c478bd9Sstevel@tonic-gate	add	%o0, 0x100, %o0
78247c478bd9Sstevel@tonic-gate
78257c478bd9Sstevel@tonic-gate3:
78267c478bd9Sstevel@tonic-gate	! ... check if 64 bytes to set
78277c478bd9Sstevel@tonic-gate	cmp	%o4, 0x40
78287c478bd9Sstevel@tonic-gate	blu	%ncc, .bzero_blk_done
78297c478bd9Sstevel@tonic-gate	nop
78307c478bd9Sstevel@tonic-gate
78317c478bd9Sstevel@tonic-gate4:
78327c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x0]%asi
78337c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x8]%asi
78347c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x10]%asi
78357c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x18]%asi
78367c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x20]%asi
78377c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x28]%asi
78387c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x30]%asi
78397c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0+0x38]%asi
78407c478bd9Sstevel@tonic-gate
78417c478bd9Sstevel@tonic-gate	subcc	%o4, 0x40, %o4
78427c478bd9Sstevel@tonic-gate	bgu,pt	%ncc, 3b
78437c478bd9Sstevel@tonic-gate	add	%o0, 0x40, %o0
78447c478bd9Sstevel@tonic-gate
78457c478bd9Sstevel@tonic-gate.bzero_blk_done:
78467c478bd9Sstevel@tonic-gate	membar	#Sync
78477c478bd9Sstevel@tonic-gate	!
78487c478bd9Sstevel@tonic-gate	! Undo asi register setting.
78497c478bd9Sstevel@tonic-gate	!
78507c478bd9Sstevel@tonic-gate	rd	%asi, %o4
78517c478bd9Sstevel@tonic-gate	wr	%g0, ASI_P, %asi
78527c478bd9Sstevel@tonic-gate	cmp	%o4, ASI_BLK_INIT_ST_QUAD_LDD_P
78537c478bd9Sstevel@tonic-gate	bne,a	%ncc, .bzero_small
78547c478bd9Sstevel@tonic-gate	wr	%g0, ASI_USER, %asi
78557c478bd9Sstevel@tonic-gate
78567c478bd9Sstevel@tonic-gate.bzero_small:
78577c478bd9Sstevel@tonic-gate	! Set the remaining doubles
78587c478bd9Sstevel@tonic-gate	subcc	%o3, 8, %o3		! Can we store any doubles?
78597c478bd9Sstevel@tonic-gate	blu,pn	%ncc, .byteclr
78607c478bd9Sstevel@tonic-gate	and	%o1, 7, %o1		! calc bytes left after doubles
78617c478bd9Sstevel@tonic-gate
78627c478bd9Sstevel@tonic-gate.dbclr:
78637c478bd9Sstevel@tonic-gate	stxa	%g0, [%o0]%asi		! Clear the doubles
78647c478bd9Sstevel@tonic-gate	subcc	%o3, 8, %o3
78657c478bd9Sstevel@tonic-gate	bgeu,pt	%ncc, .dbclr
78667c478bd9Sstevel@tonic-gate	add	%o0, 8, %o0
78677c478bd9Sstevel@tonic-gate
78687c478bd9Sstevel@tonic-gate	ba	.byteclr
78697c478bd9Sstevel@tonic-gate	nop
78707c478bd9Sstevel@tonic-gate
78717c478bd9Sstevel@tonic-gate.wdalign:
78727c478bd9Sstevel@tonic-gate	andcc	%o0, 3, %o3		! is add aligned on a word boundary
78737c478bd9Sstevel@tonic-gate	bz,pn	%ncc, .wdclr
78747c478bd9Sstevel@tonic-gate	andn	%o1, 3, %o3		! create word sized count in %o3
78757c478bd9Sstevel@tonic-gate
78767c478bd9Sstevel@tonic-gate	dec	%o1			! decrement count
78777c478bd9Sstevel@tonic-gate	stba	%g0, [%o0]%asi		! clear a byte
78787c478bd9Sstevel@tonic-gate	ba	.wdalign
78797c478bd9Sstevel@tonic-gate	inc	%o0			! next byte
78807c478bd9Sstevel@tonic-gate
78817c478bd9Sstevel@tonic-gate.wdclr:
78827c478bd9Sstevel@tonic-gate	sta	%g0, [%o0]%asi		! 4-byte clearing loop
78837c478bd9Sstevel@tonic-gate	subcc	%o3, 4, %o3
78847c478bd9Sstevel@tonic-gate	bnz,pt	%ncc, .wdclr
78857c478bd9Sstevel@tonic-gate	inc	4, %o0
78867c478bd9Sstevel@tonic-gate
78877c478bd9Sstevel@tonic-gate	and	%o1, 3, %o1		! leftover count, if any
78887c478bd9Sstevel@tonic-gate
78897c478bd9Sstevel@tonic-gate.byteclr:
78907c478bd9Sstevel@tonic-gate	! Set the leftover bytes
78917c478bd9Sstevel@tonic-gate	brz	%o1, .bzero_exit
78927c478bd9Sstevel@tonic-gate	nop
78937c478bd9Sstevel@tonic-gate
78947c478bd9Sstevel@tonic-gate7:
78957c478bd9Sstevel@tonic-gate	deccc	%o1			! byte clearing loop
78967c478bd9Sstevel@tonic-gate	stba	%g0, [%o0]%asi
78977c478bd9Sstevel@tonic-gate	bgu,pt	%ncc, 7b
78987c478bd9Sstevel@tonic-gate	inc	%o0
78997c478bd9Sstevel@tonic-gate
79007c478bd9Sstevel@tonic-gate.bzero_exit:
79017c478bd9Sstevel@tonic-gate	!
79027c478bd9Sstevel@tonic-gate	! We're just concerned with whether t_lofault was set
79037c478bd9Sstevel@tonic-gate	! when we came in. We end up here from either kzero()
79047c478bd9Sstevel@tonic-gate	! or bzero(). kzero() *always* sets a lofault handler.
79057c478bd9Sstevel@tonic-gate	! It ors LOFAULT_SET into %o5 to indicate it has done
79067c478bd9Sstevel@tonic-gate	! this even if the value of %o5 is otherwise zero.
79077c478bd9Sstevel@tonic-gate	! bzero() sets a lofault handler *only* if one was
79087c478bd9Sstevel@tonic-gate	! previously set. Accordingly we need to examine
79097c478bd9Sstevel@tonic-gate	! %o5 and if it is non-zero be sure to clear LOFAULT_SET
79107c478bd9Sstevel@tonic-gate	! before resetting the error handler.
79117c478bd9Sstevel@tonic-gate	!
79127c478bd9Sstevel@tonic-gate	tst	%o5
79137c478bd9Sstevel@tonic-gate	bz	%ncc, 1f
79147c478bd9Sstevel@tonic-gate	andn	%o5, LOFAULT_SET, %o5
79157c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
79167c478bd9Sstevel@tonic-gate	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
79177c478bd9Sstevel@tonic-gate1:
79187c478bd9Sstevel@tonic-gate	retl
79197c478bd9Sstevel@tonic-gate	clr	%o0			! return (0)
79207c478bd9Sstevel@tonic-gate
79217c478bd9Sstevel@tonic-gate	SET_SIZE(bzero)
79227c478bd9Sstevel@tonic-gate#endif	/* lint */
7923