xref: /titanic_41/usr/src/uts/sun4v/cpu/generic_copy.s (revision 59ac0c1669407488b67ae9e273667a340dccc611)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#include <sys/param.h>
29#include <sys/errno.h>
30#include <sys/asm_linkage.h>
31#include <sys/vtrace.h>
32#include <sys/machthread.h>
33#include <sys/clock.h>
34#include <sys/asi.h>
35#include <sys/fsr.h>
36#include <sys/privregs.h>
37
38#if !defined(lint)
39#include "assym.h"
40#endif	/* lint */
41
42
43/*
44 * Less then or equal this number of bytes we will always copy byte-for-byte
45 */
46#define	SMALL_LIMIT	7
47
48/*
49 * LOFAULT_SET : Flag set by kzero and kcopy to indicate that t_lofault
50 * handler was set
51 */
52#define	LOFAULT_SET 2
53
54
55/*
56 * Copy a block of storage, returning an error code if `from' or
57 * `to' takes a kernel pagefault which cannot be resolved.
58 * Returns errno value on pagefault error, 0 if all ok
59 */
60
61
62
63#if defined(lint)
64
65/* ARGSUSED */
66int
67kcopy(const void *from, void *to, size_t count)
68{ return(0); }
69
70#else	/* lint */
71
72	.seg	".text"
73	.align	4
74
75	ENTRY(kcopy)
76
77	save	%sp, -SA(MINFRAME), %sp
78	set	.copyerr, %l7			! copyerr is lofault value
79	ldn	[THREAD_REG + T_LOFAULT], %o5	! save existing handler
80	or	%o5, LOFAULT_SET, %o5
81	membar	#Sync				! sync error barrier
82	b	.do_copy			! common code
83	stn	%l7, [THREAD_REG + T_LOFAULT]	! set t_lofault
84
85/*
86 * We got here because of a fault during kcopy.
87 * Errno value is in %g1.
88 */
89.copyerr:
90	! The kcopy() *always* sets a t_lofault handler and it ORs LOFAULT_SET
91	! into %o5 to indicate it has set t_lofault handler. Need to clear
92	! LOFAULT_SET flag before restoring the error handler.
93	andn	%o5, LOFAULT_SET, %o5
94	membar	#Sync			! sync error barrier
95	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
96	ret
97	restore	%g1, 0, %o0
98
99	SET_SIZE(kcopy)
100#endif	/* lint */
101
102
103/*
104 * Copy a block of storage - must not overlap (from + len <= to).
105 */
106#if defined(lint)
107
108/* ARGSUSED */
109void
110bcopy(const void *from, void *to, size_t count)
111{}
112
113#else	/* lint */
114
115	ENTRY(bcopy)
116
117	save	%sp, -SA(MINFRAME), %sp
118	clr	%o5			! flag LOFAULT_SET is not set for bcopy
119
120.do_copy:
121        mov     %i1, %g5                ! save dest addr start
122
123        mov     %i2, %l6                ! save size
124
125	cmp	%i2, 12			! for small counts
126	blu	%ncc, .bytecp		! just copy bytes
127	  .empty
128
129	!
130	! use aligned transfers where possible
131	!
132	xor	%i0, %i1, %o4		! xor from and to address
133	btst	7, %o4			! if lower three bits zero
134	bz	.aldoubcp		! can align on double boundary
135	.empty	! assembler complaints about label
136
137	xor	%i0, %i1, %o4		! xor from and to address
138	btst	3, %o4			! if lower two bits zero
139	bz	.alwordcp		! can align on word boundary
140	btst	3, %i0			! delay slot, from address unaligned?
141	!
142	! use aligned reads and writes where possible
143	! this differs from wordcp in that it copes
144	! with odd alignment between source and destnation
145	! using word reads and writes with the proper shifts
146	! in between to align transfers to and from memory
147	! i0 - src address, i1 - dest address, i2 - count
148	! i3, i4 - tmps for used generating complete word
149	! i5 (word to write)
150	! l0 size in bits of upper part of source word (US)
151	! l1 size in bits of lower part of source word (LS = 32 - US)
152	! l2 size in bits of upper part of destination word (UD)
153	! l3 size in bits of lower part of destination word (LD = 32 - UD)
154	! l4 number of bytes leftover after aligned transfers complete
155	! l5 the number 32
156	!
157	mov	32, %l5			! load an oft-needed constant
158	bz	.align_dst_only
159	btst	3, %i1			! is destnation address aligned?
160	clr	%i4			! clear registers used in either case
161	bz	.align_src_only
162	clr	%l0
163	!
164	! both source and destination addresses are unaligned
165	!
1661:					! align source
167	ldub	[%i0], %i3		! read a byte from source address
168	add	%i0, 1, %i0		! increment source address
169	or	%i4, %i3, %i4		! or in with previous bytes (if any)
170	btst	3, %i0			! is source aligned?
171	add	%l0, 8, %l0		! increment size of upper source (US)
172	bnz,a	1b
173	sll	%i4, 8, %i4		! make room for next byte
174
175	sub	%l5, %l0, %l1		! generate shift left count (LS)
176	sll	%i4, %l1, %i4		! prepare to get rest
177	ld	[%i0], %i3		! read a word
178	add	%i0, 4, %i0		! increment source address
179	srl	%i3, %l0, %i5		! upper src bits into lower dst bits
180	or	%i4, %i5, %i5		! merge
181	mov	24, %l3			! align destination
1821:
183	srl	%i5, %l3, %i4		! prepare to write a single byte
184	stb	%i4, [%i1]		! write a byte
185	add	%i1, 1, %i1		! increment destination address
186	sub	%i2, 1, %i2		! decrement count
187	btst	3, %i1			! is destination aligned?
188	bnz,a	1b
189	sub	%l3, 8, %l3		! delay slot, decrement shift count (LD)
190	sub	%l5, %l3, %l2		! generate shift left count (UD)
191	sll	%i5, %l2, %i5		! move leftover into upper bytes
192	cmp	%l2, %l0		! cmp # reqd to fill dst w old src left
193	bgu	%ncc, .more_needed	! need more to fill than we have
194	nop
195
196	sll	%i3, %l1, %i3		! clear upper used byte(s)
197	srl	%i3, %l1, %i3
198	! get the odd bytes between alignments
199	sub	%l0, %l2, %l0		! regenerate shift count
200	sub	%l5, %l0, %l1		! generate new shift left count (LS)
201	and	%i2, 3, %l4		! must do remaining bytes if count%4 > 0
202	andn	%i2, 3, %i2		! # of aligned bytes that can be moved
203	srl	%i3, %l0, %i4
204	or	%i5, %i4, %i5
205	st	%i5, [%i1]		! write a word
206	subcc	%i2, 4, %i2		! decrement count
207	bz	%ncc, .unalign_out
208	add	%i1, 4, %i1		! increment destination address
209
210	b	2f
211	sll	%i3, %l1, %i5		! get leftover into upper bits
212.more_needed:
213	sll	%i3, %l0, %i3		! save remaining byte(s)
214	srl	%i3, %l0, %i3
215	sub	%l2, %l0, %l1		! regenerate shift count
216	sub	%l5, %l1, %l0		! generate new shift left count
217	sll	%i3, %l1, %i4		! move to fill empty space
218	b	3f
219	or	%i5, %i4, %i5		! merge to complete word
220	!
221	! the source address is aligned and destination is not
222	!
223.align_dst_only:
224	ld	[%i0], %i4		! read a word
225	add	%i0, 4, %i0		! increment source address
226	mov	24, %l0			! initial shift alignment count
2271:
228	srl	%i4, %l0, %i3		! prepare to write a single byte
229	stb	%i3, [%i1]		! write a byte
230	add	%i1, 1, %i1		! increment destination address
231	sub	%i2, 1, %i2		! decrement count
232	btst	3, %i1			! is destination aligned?
233	bnz,a	1b
234	sub	%l0, 8, %l0		! delay slot, decrement shift count
235.xfer:
236	sub	%l5, %l0, %l1		! generate shift left count
237	sll	%i4, %l1, %i5		! get leftover
2383:
239	and	%i2, 3, %l4		! must do remaining bytes if count%4 > 0
240	andn	%i2, 3, %i2		! # of aligned bytes that can be moved
2412:
242	ld	[%i0], %i3		! read a source word
243	add	%i0, 4, %i0		! increment source address
244	srl	%i3, %l0, %i4		! upper src bits into lower dst bits
245	or	%i5, %i4, %i5		! merge with upper dest bits (leftover)
246	st	%i5, [%i1]		! write a destination word
247	subcc	%i2, 4, %i2		! decrement count
248	bz	%ncc, .unalign_out	! check if done
249	add	%i1, 4, %i1		! increment destination address
250	b	2b			! loop
251	sll	%i3, %l1, %i5		! get leftover
252.unalign_out:
253	tst	%l4			! any bytes leftover?
254	bz	%ncc, .cpdone
255	.empty				! allow next instruction in delay slot
2561:
257	sub	%l0, 8, %l0		! decrement shift
258	srl	%i3, %l0, %i4		! upper src byte into lower dst byte
259	stb	%i4, [%i1]		! write a byte
260	subcc	%l4, 1, %l4		! decrement count
261	bz	%ncc, .cpdone		! done?
262	add	%i1, 1, %i1		! increment destination
263	tst	%l0			! any more previously read bytes
264	bnz	%ncc, 1b		! we have leftover bytes
265	mov	%l4, %i2		! delay slot, mv cnt where dbytecp wants
266	b	.dbytecp		! let dbytecp do the rest
267	sub	%i0, %i1, %i0		! i0 gets the difference of src and dst
268	!
269	! the destination address is aligned and the source is not
270	!
271.align_src_only:
272	ldub	[%i0], %i3		! read a byte from source address
273	add	%i0, 1, %i0		! increment source address
274	or	%i4, %i3, %i4		! or in with previous bytes (if any)
275	btst	3, %i0			! is source aligned?
276	add	%l0, 8, %l0		! increment shift count (US)
277	bnz,a	.align_src_only
278	sll	%i4, 8, %i4		! make room for next byte
279	b,a	.xfer
280	!
281	! if from address unaligned for double-word moves,
282	! move bytes till it is, if count is < 56 it could take
283	! longer to align the thing than to do the transfer
284	! in word size chunks right away
285	!
286.aldoubcp:
287	cmp	%i2, 56			! if count < 56, use wordcp, it takes
288	blu,a	%ncc, .alwordcp		! longer to align doubles than words
289	mov	3, %o0			! mask for word alignment
290	call	.alignit		! copy bytes until aligned
291	mov	7, %o0			! mask for double alignment
292	!
293	! source and destination are now double-word aligned
294	! i3 has aligned count returned by alignit
295	!
296	and	%i2, 7, %i2		! unaligned leftover count
297	sub	%i0, %i1, %i0		! i0 gets the difference of src and dst
2985:
299	ldx	[%i0+%i1], %o4		! read from address
300	stx	%o4, [%i1]		! write at destination address
301	subcc	%i3, 8, %i3		! dec count
302	bgu	%ncc, 5b
303	add	%i1, 8, %i1		! delay slot, inc to address
304	cmp	%i2, 4			! see if we can copy a word
305	blu	%ncc, .dbytecp		! if 3 or less bytes use bytecp
306	.empty
307	!
308	! for leftover bytes we fall into wordcp, if needed
309	!
310.wordcp:
311	and	%i2, 3, %i2		! unaligned leftover count
3125:
313	ld	[%i0+%i1], %o4		! read from address
314	st	%o4, [%i1]		! write at destination address
315	subcc	%i3, 4, %i3		! dec count
316	bgu	%ncc, 5b
317	add	%i1, 4, %i1		! delay slot, inc to address
318	b,a	.dbytecp
319
320	! we come here to align copies on word boundaries
321.alwordcp:
322	call	.alignit		! go word-align it
323	mov	3, %o0			! bits that must be zero to be aligned
324	b	.wordcp
325	sub	%i0, %i1, %i0		! i0 gets the difference of src and dst
326
327	!
328	! byte copy, works with any alignment
329	!
330.bytecp:
331	b	.dbytecp
332	sub	%i0, %i1, %i0		! i0 gets difference of src and dst
333
334	!
335	! differenced byte copy, works with any alignment
336	! assumes dest in %i1 and (source - dest) in %i0
337	!
3381:
339	stb	%o4, [%i1]		! write to address
340	inc	%i1			! inc to address
341.dbytecp:
342	deccc	%i2			! dec count
343	bgeu,a	%ncc, 1b		! loop till done
344	ldub	[%i0+%i1], %o4		! read from address
345.cpdone:
346	membar	#Sync				! sync error barrier
347	! Restore t_lofault handler, if came here from kcopy().
348	tst	%o5
349	bz	%ncc, 1f
350	andn	%o5, LOFAULT_SET, %o5
351	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
3521:
353        mov     %g5, %o0                ! copy dest address
354        call    sync_icache
355        mov     %l6, %o1                ! saved size
356	ret
357	restore %g0, 0, %o0		! return (0)
358
359/*
360 * Common code used to align transfers on word and doubleword
361 * boudaries.  Aligns source and destination and returns a count
362 * of aligned bytes to transfer in %i3
363 */
3641:
365	inc	%i0			! inc from
366	stb	%o4, [%i1]		! write a byte
367	inc	%i1			! inc to
368	dec	%i2			! dec count
369.alignit:
370	btst	%o0, %i0		! %o0 is bit mask to check for alignment
371	bnz,a	1b
372	ldub	[%i0], %o4		! read next byte
373
374	retl
375	andn	%i2, %o0, %i3		! return size of aligned bytes
376	SET_SIZE(bcopy)
377
378#endif	/* lint */
379
380/*
381 * Block copy with possibly overlapped operands.
382 */
383
384#if defined(lint)
385
386/*ARGSUSED*/
387void
388ovbcopy(const void *from, void *to, size_t count)
389{}
390
391#else	/* lint */
392
393	ENTRY(ovbcopy)
394	tst	%o2			! check count
395	bgu,a	%ncc, 1f		! nothing to do or bad arguments
396	subcc	%o0, %o1, %o3		! difference of from and to address
397
398	retl				! return
399	nop
4001:
401	bneg,a	%ncc, 2f
402	neg	%o3			! if < 0, make it positive
4032:	cmp	%o2, %o3		! cmp size and abs(from - to)
404	bleu	%ncc, bcopy		! if size <= abs(diff): use bcopy,
405	.empty				!   no overlap
406	cmp	%o0, %o1		! compare from and to addresses
407	blu	%ncc, .ov_bkwd		! if from < to, copy backwards
408	nop
409	!
410	! Copy forwards.
411	!
412.ov_fwd:
413	ldub	[%o0], %o3		! read from address
414	inc	%o0			! inc from address
415	stb	%o3, [%o1]		! write to address
416	deccc	%o2			! dec count
417	bgu	%ncc, .ov_fwd		! loop till done
418	inc	%o1			! inc to address
419
420	retl				! return
421	nop
422	!
423	! Copy backwards.
424	!
425.ov_bkwd:
426	deccc	%o2			! dec count
427	ldub	[%o0 + %o2], %o3	! get byte at end of src
428	bgu	%ncc, .ov_bkwd		! loop till done
429	stb	%o3, [%o1 + %o2]	! delay slot, store at end of dst
430
431	retl				! return
432	nop
433	SET_SIZE(ovbcopy)
434
435#endif	/* lint */
436
437/*
438 * hwblkpagecopy()
439 *
440 * Copies exactly one page.  This routine assumes the caller (ppcopy)
441 * has already disabled kernel preemption and has checked
442 * use_hw_bcopy.
443 */
444#ifdef lint
445/*ARGSUSED*/
446void
447hwblkpagecopy(const void *src, void *dst)
448{ }
449#else /* lint */
450	ENTRY(hwblkpagecopy)
451	save	%sp, -SA(MINFRAME), %sp
452
453	! %i0 - source address (arg)
454	! %i1 - destination address (arg)
455	! %i2 - length of region (not arg)
456
457	set	PAGESIZE, %i2
458	mov     %i1,    %o0     ! store destination address for flushing
459
460	/*
461	 * Copying exactly one page and PAGESIZE is in mutliple of 0x80.
462	 */
4631:
464	ldx	[%i0+0x0], %l0
465	ldx	[%i0+0x8], %l1
466	ldx	[%i0+0x10], %l2
467	ldx	[%i0+0x18], %l3
468	ldx	[%i0+0x20], %l4
469	ldx	[%i0+0x28], %l5
470	ldx	[%i0+0x30], %l6
471	ldx	[%i0+0x38], %l7
472	stx	%l0, [%i1+0x0]
473	stx	%l1, [%i1+0x8]
474	stx	%l2, [%i1+0x10]
475	stx	%l3, [%i1+0x18]
476	stx	%l4, [%i1+0x20]
477	stx	%l5, [%i1+0x28]
478	stx	%l6, [%i1+0x30]
479	stx	%l7, [%i1+0x38]
480
481	ldx	[%i0+0x40], %l0
482	ldx	[%i0+0x48], %l1
483	ldx	[%i0+0x50], %l2
484	ldx	[%i0+0x58], %l3
485	ldx	[%i0+0x60], %l4
486	ldx	[%i0+0x68], %l5
487	ldx	[%i0+0x70], %l6
488	ldx	[%i0+0x78], %l7
489	stx	%l0, [%i1+0x40]
490	stx	%l1, [%i1+0x48]
491	stx	%l2, [%i1+0x50]
492	stx	%l3, [%i1+0x58]
493	stx	%l4, [%i1+0x60]
494	stx	%l5, [%i1+0x68]
495	stx	%l6, [%i1+0x70]
496	stx	%l7, [%i1+0x78]
497
498	add	%i0, 0x80, %i0
499	subcc	%i2, 0x80, %i2
500	bgu,pt	%xcc, 1b
501	add	%i1, 0x80, %i1
502
503	! %o0 contains the dest. address
504	set	PAGESIZE, %o1
505	call	sync_icache
506	nop
507
508	membar #Sync
509	ret
510	restore	%g0, 0, %o0
511	SET_SIZE(hwblkpagecopy)
512#endif	/* lint */
513
514
515/*
516 * Transfer data to and from user space -
517 * Note that these routines can cause faults
518 * It is assumed that the kernel has nothing at
519 * less than KERNELBASE in the virtual address space.
520 *
521 * Note that copyin(9F) and copyout(9F) are part of the
522 * DDI/DKI which specifies that they return '-1' on "errors."
523 *
524 * Sigh.
525 *
526 * So there's two extremely similar routines - xcopyin() and xcopyout()
527 * which return the errno that we've faithfully computed.  This
528 * allows other callers (e.g. uiomove(9F)) to work correctly.
529 * Given that these are used pretty heavily, we expand the calling
530 * sequences inline for all flavours (rather than making wrappers).
531 *
532 * There are also stub routines for xcopyout_little and xcopyin_little,
533 * which currently are intended to handle requests of <= 16 bytes from
534 * do_unaligned. Future enhancement to make them handle 8k pages efficiently
535 * is left as an exercise...
536 */
537
538/*
539 * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr)
540 *
541 * General theory of operation:
542 *
543 * None of the copyops routines grab a window.
544 *
545 * Flow:
546 *
547 * If count == zero return zero.
548 *
549 * Store the previous lo_fault handler into %g6.
550 * Place our secondary lofault handler into %g5.
551 * Place the address of our fault handler into %o3.
552 *
553 * If count is less than or equal to SMALL_LIMIT (7) we
554 * always do a byte for byte copy.
555 *
556 * If count is > SMALL_LIMIT, we check the alignment of the input
557 * and output pointers.  We store -count in %o3, we store the number
558 * of chunks (8, 4, 2 or 1 byte) operated on in our basic copy loop
559 * in %o2. Following this we branch to the appropriate copy loop and
560 * copy that many chunks.  Since we've been adding the chunk size
561 * to %o3 each time through as well as decrementing %o2, we can tell
562 * if any data is is left to be copied by examining %o3. If that is
563 * zero, we're done and can go home. If not, we figure out what the
564 * largest chunk size left to be copied is and branch to that copy
565 * loop unless there's only one byte left. We load that as we're
566 * branching to code that stores it just before we return.
567 *
568 * Fault handlers are invoked if we reference memory that has no
569 * current mapping.  All forms share the same copyio_fault handler.
570 * This routine handles fixing up the stack and general housecleaning.
571 * Each copy operation has a simple fault handler that is then called
572 * to do the work specific to the invidual operation.  The handler
573 * for copyOP and xcopyOP are found at the end of individual function.
574 * The handlers for xcopyOP_little are found at the end of xcopyin_little.
575 * The handlers for copyOP_noerr are found at the end of copyin_noerr.
576 */
577
578/*
579 * Copy kernel data to user space (copyout/xcopyout/xcopyout_little).
580 */
581
582#if defined(lint)
583
584/*ARGSUSED*/
585int
586copyout(const void *kaddr, void *uaddr, size_t count)
587{ return (0); }
588
589#else	/* lint */
590
591/*
592 * We save the arguments in the following registers in case of a fault:
593 * 	kaddr - %g2
594 * 	uaddr - %g3
595 * 	count - %g4
596 */
597#define	SAVE_SRC	%g2
598#define	SAVE_DST	%g3
599#define	SAVE_COUNT	%g4
600
601#define	REAL_LOFAULT		%g5
602#define	SAVED_LOFAULT		%g6
603
604/*
605 * Generic copyio fault handler.  This is the first line of defense when a
606 * fault occurs in (x)copyin/(x)copyout.  In order for this to function
607 * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT.
608 * This allows us to share common code for all the flavors of the copy
609 * operations, including the _noerr versions.
610 *
611 * Note that this function will restore the original input parameters before
612 * calling REAL_LOFAULT.  So the real handler can vector to the appropriate
613 * member of the t_copyop structure, if needed.
614 */
615	ENTRY(copyio_fault)
616	membar	#Sync
617	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
618
619	mov	SAVE_SRC, %o0
620	mov	SAVE_DST, %o1
621	jmp	REAL_LOFAULT
622	  mov	SAVE_COUNT, %o2
623	SET_SIZE(copyio_fault)
624
625	ENTRY(copyout)
626	sethi	%hi(.copyout_err), REAL_LOFAULT
627	or	REAL_LOFAULT, %lo(.copyout_err), REAL_LOFAULT
628
629.do_copyout:
630	!
631	! Check the length and bail if zero.
632	!
633	tst	%o2
634	bnz,pt	%ncc, 1f
635	  nop
636	retl
637	  clr	%o0
6381:
639	sethi	%hi(copyio_fault), %o3
640	ldn	[THREAD_REG + T_LOFAULT], SAVED_LOFAULT
641	or	%o3, %lo(copyio_fault), %o3
642	membar	#Sync
643	stn	%o3, [THREAD_REG + T_LOFAULT]
644
645	mov	%o0, SAVE_SRC
646	mov	%o1, SAVE_DST
647	mov	%o2, SAVE_COUNT
648
649	!
650	! Check to see if we're more than SMALL_LIMIT (7 bytes).
651	! Run in leaf mode, using the %o regs as our input regs.
652	!
653	subcc	%o2, SMALL_LIMIT, %o3
654	bgu,a,pt %ncc, .dco_ns
655	or	%o0, %o1, %o3
656
657.dcobcp:
658	sub	%g0, %o2, %o3		! negate count
659	add	%o0, %o2, %o0		! make %o0 point at the end
660	add	%o1, %o2, %o1		! make %o1 point at the end
661	ba,pt	%ncc, .dcocl
662	ldub	[%o0 + %o3], %o4	! load first byte
663	!
664	! %o0 and %o2 point at the end and remain pointing at the end
665	! of their buffers. We pull things out by adding %o3 (which is
666	! the negation of the length) to the buffer end which gives us
667	! the curent location in the buffers. By incrementing %o3 we walk
668	! through both buffers without having to bump each buffer's
669	! pointer. A very fast 4 instruction loop.
670	!
671	.align 16
672.dcocl:
673	stba	%o4, [%o1 + %o3]ASI_USER
674	inccc	%o3
675	bl,a,pt	%ncc, .dcocl
676	ldub	[%o0 + %o3], %o4
677	!
678	! We're done. Go home.
679	!
680	membar	#Sync
681	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
682	retl
683	clr	%o0
684	!
685	! Try aligned copies from here.
686	!
687.dco_ns:
688	! %o0 = kernel addr (to be copied from)
689	! %o1 = user addr (to be copied to)
690	! %o2 = length
691	! %o3 = %o1 | %o2 (used for alignment checking)
692	! %o4 is alternate lo_fault
693	! %o5 is original lo_fault
694	!
695	! See if we're single byte aligned. If we are, check the
696	! limit for single byte copies. If we're smaller or equal,
697	! bounce to the byte for byte copy loop. Otherwise do it in
698	! HW (if enabled).
699	!
700	btst	1, %o3
701	bz,pt	%icc, .dcoh8
702	btst	7, %o3
703
704	ba	.dcobcp
705	nop
706.dcoh8:
707	!
708	! 8 byte aligned?
709	!
710	bnz,a	%ncc, .dcoh4
711	btst	3, %o3
712.dcos8:
713	!
714	! Housekeeping for copy loops. Uses same idea as in the byte for
715	! byte copy loop above.
716	!
717	add	%o0, %o2, %o0
718	add	%o1, %o2, %o1
719	sub	%g0, %o2, %o3
720	ba,pt	%ncc, .dodebc
721	srl	%o2, 3, %o2		! Number of 8 byte chunks to copy
722	!
723	! 4 byte aligned?
724	!
725.dcoh4:
726	bnz,pn	%ncc, .dcoh2
727	nop
728.dcos4:
729	add	%o0, %o2, %o0
730	add	%o1, %o2, %o1
731	sub	%g0, %o2, %o3
732	ba,pt	%ncc, .dodfbc
733	srl	%o2, 2, %o2		! Number of 4 byte chunks to copy
734	!
735	! We must be 2 byte aligned. Off we go.
736	! The check for small copies was done in the
737	! delay at .dcoh4
738	!
739.dcoh2:
740.dcos2:
741	add	%o0, %o2, %o0
742	add	%o1, %o2, %o1
743	sub	%g0, %o2, %o3
744	ba,pt	%ncc, .dodtbc
745	srl	%o2, 1, %o2		! Number of 2 byte chunks to copy
746
747.dodebc:
748	ldx	[%o0 + %o3], %o4
749	deccc	%o2
750	stxa	%o4, [%o1 + %o3]ASI_USER
751	bg,pt	%ncc, .dodebc
752	addcc	%o3, 8, %o3
753	!
754	! End of copy loop. Check to see if we're done. Most
755	! eight byte aligned copies end here.
756	!
757	bz,pt	%ncc, .dcofh
758	nop
759	!
760	! Something is left - do it byte for byte.
761	!
762	ba,pt	%ncc, .dcocl
763	ldub	[%o0 + %o3], %o4	! load next byte
764	!
765	! Four byte copy loop. %o2 is the number of 4 byte chunks to copy.
766	!
767	.align 32
768.dodfbc:
769	lduw	[%o0 + %o3], %o4
770	deccc	%o2
771	sta	%o4, [%o1 + %o3]ASI_USER
772	bg,pt	%ncc, .dodfbc
773	addcc	%o3, 4, %o3
774	!
775	! End of copy loop. Check to see if we're done. Most
776	! four byte aligned copies end here.
777	!
778	bz,pt	%ncc, .dcofh
779	nop
780	!
781	! Something is left. Do it byte for byte.
782	!
783	ba,pt	%ncc, .dcocl
784	ldub	[%o0 + %o3], %o4	! load next byte
785	!
786	! two byte aligned copy loop. %o2 is the number of 2 byte chunks to
787	! copy.
788	!
789	.align 32
790.dodtbc:
791	lduh	[%o0 + %o3], %o4
792	deccc	%o2
793	stha	%o4, [%o1 + %o3]ASI_USER
794	bg,pt	%ncc, .dodtbc
795	addcc	%o3, 2, %o3
796	!
797	! End of copy loop. Anything left?
798	!
799	bz,pt	%ncc, .dcofh
800	nop
801	!
802	! Deal with the last byte
803	!
804	ldub	[%o0 + %o3], %o4
805	stba	%o4, [%o1 + %o3]ASI_USER
806.dcofh:
807	membar	#Sync
808	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
809	retl
810	clr	%o0
811
812.copyout_err:
813	ldn	[THREAD_REG + T_COPYOPS], %o4
814	brz	%o4, 2f
815	nop
816	ldn	[%o4 + CP_COPYOUT], %g2
817	jmp	%g2
818	nop
8192:
820	retl
821	mov	-1, %o0
822	SET_SIZE(copyout)
823
824#endif	/* lint */
825
826
827#ifdef	lint
828
829/*ARGSUSED*/
830int
831xcopyout(const void *kaddr, void *uaddr, size_t count)
832{ return (0); }
833
834#else	/* lint */
835
836	ENTRY(xcopyout)
837	sethi	%hi(.xcopyout_err), REAL_LOFAULT
838	b	.do_copyout
839	  or	REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT
840.xcopyout_err:
841	ldn	[THREAD_REG + T_COPYOPS], %o4
842	brz	%o4, 2f
843	nop
844	ldn	[%o4 + CP_XCOPYOUT], %g2
845	jmp	%g2
846	nop
8472:
848	retl
849	mov	%g1, %o0
850	SET_SIZE(xcopyout)
851
852#endif	/* lint */
853
854#ifdef	lint
855
856/*ARGSUSED*/
857int
858xcopyout_little(const void *kaddr, void *uaddr, size_t count)
859{ return (0); }
860
861#else	/* lint */
862
863	ENTRY(xcopyout_little)
864	sethi	%hi(.little_err), %o4
865	ldn	[THREAD_REG + T_LOFAULT], %o5
866	or	%o4, %lo(.little_err), %o4
867	membar	#Sync			! sync error barrier
868	stn	%o4, [THREAD_REG + T_LOFAULT]
869
870	subcc	%g0, %o2, %o3
871	add	%o0, %o2, %o0
872	bz,pn	%ncc, 2f		! check for zero bytes
873	sub	%o2, 1, %o4
874	add	%o0, %o4, %o0		! start w/last byte
875	add	%o1, %o2, %o1
876	ldub	[%o0+%o3], %o4
877
8781:	stba	%o4, [%o1+%o3]ASI_AIUSL
879	inccc	%o3
880	sub	%o0, 2, %o0		! get next byte
881	bcc,a,pt %ncc, 1b
882	  ldub	[%o0+%o3], %o4
883
8842:	membar	#Sync			! sync error barrier
885	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
886	retl
887	mov	%g0, %o0		! return (0)
888	SET_SIZE(xcopyout_little)
889
890#endif	/* lint */
891
892/*
893 * Copy user data to kernel space (copyin/xcopyin/xcopyin_little)
894 */
895
896#if defined(lint)
897
898/*ARGSUSED*/
899int
900copyin(const void *uaddr, void *kaddr, size_t count)
901{ return (0); }
902
903#else	/* lint */
904
905	ENTRY(copyin)
906	sethi	%hi(.copyin_err), REAL_LOFAULT
907	or	REAL_LOFAULT, %lo(.copyin_err), REAL_LOFAULT
908
909.do_copyin:
910	!
911	! Check the length and bail if zero.
912	!
913	tst	%o2
914	bnz,pt	%ncc, 1f
915	  nop
916	retl
917	  clr	%o0
9181:
919	sethi	%hi(copyio_fault), %o3
920	ldn	[THREAD_REG + T_LOFAULT], SAVED_LOFAULT
921	or	%o3, %lo(copyio_fault), %o3
922	membar	#Sync
923	stn	%o3, [THREAD_REG + T_LOFAULT]
924
925	mov	%o0, SAVE_SRC
926	mov	%o1, SAVE_DST
927	mov	%o2, SAVE_COUNT
928
929	!
930	! Check to see if we're more than SMALL_LIMIT.
931	!
932	subcc	%o2, SMALL_LIMIT, %o3
933	bgu,a,pt %ncc, .dci_ns
934	or	%o0, %o1, %o3
935
936.dcibcp:
937	sub	%g0, %o2, %o3		! setup for copy loop
938	add	%o0, %o2, %o0
939	add	%o1, %o2, %o1
940	ba,pt	%ncc, .dcicl
941	lduba	[%o0 + %o3]ASI_USER, %o4
942	!
943	! %o0 and %o1 point at the end and remain pointing at the end
944	! of their buffers. We pull things out by adding %o3 (which is
945	! the negation of the length) to the buffer end which gives us
946	! the curent location in the buffers. By incrementing %o3 we walk
947	! through both buffers without having to bump each buffer's
948	! pointer. A very fast 4 instruction loop.
949	!
950	.align 16
951.dcicl:
952	stb	%o4, [%o1 + %o3]
953	inccc	%o3
954	bl,a,pt %ncc, .dcicl
955	lduba	[%o0 + %o3]ASI_USER, %o4
956	!
957	! We're done. Go home.
958	!
959	membar	#Sync
960	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
961	retl
962	clr	%o0
963	!
964	! Try aligned copies from here.
965	!
966.dci_ns:
967	!
968	! See if we're single byte aligned. If we are, check the
969	! limit for single byte copies. If we're smaller, or equal,
970	! bounce to the byte for byte copy loop. Otherwise do it in
971	! HW (if enabled).
972	!
973	btst	1, %o3
974	bz,a,pt	%icc, .dcih8
975	btst	7, %o3
976	ba	.dcibcp
977	nop
978
979.dcih8:
980	!
981	! 8 byte aligned?
982	!
983	bnz,a	%ncc, .dcih4
984	btst	3, %o3
985.dcis8:
986	!
987	! Housekeeping for copy loops. Uses same idea as in the byte for
988	! byte copy loop above.
989	!
990	add	%o0, %o2, %o0
991	add	%o1, %o2, %o1
992	sub	%g0, %o2, %o3
993	ba,pt	%ncc, .didebc
994	srl	%o2, 3, %o2		! Number of 8 byte chunks to copy
995	!
996	! 4 byte aligned?
997	!
998.dcih4:
999	bnz	%ncc, .dcih2
1000	nop
1001.dcis4:
1002	!
1003	! Housekeeping for copy loops. Uses same idea as in the byte
1004	! for byte copy loop above.
1005	!
1006	add	%o0, %o2, %o0
1007	add	%o1, %o2, %o1
1008	sub	%g0, %o2, %o3
1009	ba,pt	%ncc, .didfbc
1010	srl	%o2, 2, %o2		! Number of 4 byte chunks to copy
1011.dcih2:
1012.dcis2:
1013	add	%o0, %o2, %o0
1014	add	%o1, %o2, %o1
1015	sub	%g0, %o2, %o3
1016	ba,pt	%ncc, .didtbc
1017	srl	%o2, 1, %o2		! Number of 2 byte chunks to copy
1018
1019.didebc:
1020	ldxa	[%o0 + %o3]ASI_USER, %o4
1021	deccc	%o2
1022	stx	%o4, [%o1 + %o3]
1023	bg,pt	%ncc, .didebc
1024	addcc	%o3, 8, %o3
1025	!
1026	! End of copy loop. Most 8 byte aligned copies end here.
1027	!
1028	bz,pt	%ncc, .dcifh
1029	nop
1030	!
1031	! Something is left. Do it byte for byte.
1032	!
1033	ba,pt	%ncc, .dcicl
1034	lduba	[%o0 + %o3]ASI_USER, %o4
1035	!
1036	! 4 byte copy loop. %o2 is number of 4 byte chunks to copy.
1037	!
1038	.align 32
1039.didfbc:
1040	lduwa	[%o0 + %o3]ASI_USER, %o4
1041	deccc	%o2
1042	st	%o4, [%o1 + %o3]
1043	bg,pt	%ncc, .didfbc
1044	addcc	%o3, 4, %o3
1045	!
1046	! End of copy loop. Most 4 byte aligned copies end here.
1047	!
1048	bz,pt	%ncc, .dcifh
1049	nop
1050	!
1051	! Something is left. Do it byte for byte.
1052	!
1053	ba,pt	%ncc, .dcicl
1054	lduba	[%o0 + %o3]ASI_USER, %o4
1055	!
1056	! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to
1057	! copy.
1058	!
1059	.align 32
1060.didtbc:
1061	lduha	[%o0 + %o3]ASI_USER, %o4
1062	deccc	%o2
1063	sth	%o4, [%o1 + %o3]
1064	bg,pt	%ncc, .didtbc
1065	addcc	%o3, 2, %o3
1066	!
1067	! End of copy loop. Most 2 byte aligned copies end here.
1068	!
1069	bz,pt	%ncc, .dcifh
1070	nop
1071	!
1072	! Deal with the last byte
1073	!
1074	lduba	[%o0 + %o3]ASI_USER, %o4
1075	stb	%o4, [%o1 + %o3]
1076.dcifh:
1077	membar	#Sync
1078	stn     SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
1079	retl
1080	clr	%o0
1081
1082.copyin_err:
1083	ldn	[THREAD_REG + T_COPYOPS], %o4
1084	brz	%o4, 2f
1085	nop
1086	ldn	[%o4 + CP_COPYIN], %g2
1087	jmp	%g2
1088	nop
10892:
1090	retl
1091	mov	-1, %o0
1092	SET_SIZE(copyin)
1093
1094#endif	/* lint */
1095
1096#ifdef	lint
1097
1098/*ARGSUSED*/
1099int
1100xcopyin(const void *uaddr, void *kaddr, size_t count)
1101{ return (0); }
1102
1103#else	/* lint */
1104
1105	ENTRY(xcopyin)
1106	sethi	%hi(.xcopyin_err), REAL_LOFAULT
1107	b	.do_copyin
1108	  or	REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT
1109.xcopyin_err:
1110	ldn	[THREAD_REG + T_COPYOPS], %o4
1111	brz	%o4, 2f
1112	nop
1113	ldn	[%o4 + CP_XCOPYIN], %g2
1114	jmp	%g2
1115	nop
11162:
1117	retl
1118	mov	%g1, %o0
1119	SET_SIZE(xcopyin)
1120
1121#endif	/* lint */
1122
1123#ifdef	lint
1124
1125/*ARGSUSED*/
1126int
1127xcopyin_little(const void *uaddr, void *kaddr, size_t count)
1128{ return (0); }
1129
1130#else	/* lint */
1131
1132	ENTRY(xcopyin_little)
1133	sethi	%hi(.little_err), %o4
1134	ldn	[THREAD_REG + T_LOFAULT], %o5
1135	or	%o4, %lo(.little_err), %o4
1136	membar	#Sync				! sync error barrier
1137	stn	%o4, [THREAD_REG + T_LOFAULT]
1138
1139	subcc	%g0, %o2, %o3
1140	add	%o0, %o2, %o0
1141	bz,pn	%ncc, 2f		! check for zero bytes
1142	sub	%o2, 1, %o4
1143	add	%o0, %o4, %o0		! start w/last byte
1144	add	%o1, %o2, %o1
1145	lduba	[%o0+%o3]ASI_AIUSL, %o4
1146
11471:	stb	%o4, [%o1+%o3]
1148	inccc	%o3
1149	sub	%o0, 2, %o0		! get next byte
1150	bcc,a,pt %ncc, 1b
1151	  lduba	[%o0+%o3]ASI_AIUSL, %o4
1152
11532:	membar	#Sync				! sync error barrier
1154	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
1155	retl
1156	mov	%g0, %o0		! return (0)
1157
1158.little_err:
1159	membar	#Sync				! sync error barrier
1160	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
1161	retl
1162	mov	%g1, %o0
1163	SET_SIZE(xcopyin_little)
1164
1165#endif	/* lint */
1166
1167
1168/*
1169 * Copy a block of storage - must not overlap (from + len <= to).
1170 * No fault handler installed (to be called under on_fault())
1171 */
1172#if defined(lint)
1173
1174/* ARGSUSED */
1175void
1176copyin_noerr(const void *ufrom, void *kto, size_t count)
1177{}
1178
1179#else	/* lint */
1180
1181	ENTRY(copyin_noerr)
1182	sethi	%hi(.copyio_noerr), REAL_LOFAULT
1183	b	.do_copyin
1184	  or	REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
1185.copyio_noerr:
1186	jmp	SAVED_LOFAULT
1187	  nop
1188	SET_SIZE(copyin_noerr)
1189
1190#endif /* lint */
1191
1192/*
1193 * Copy a block of storage - must not overlap (from + len <= to).
1194 * No fault handler installed (to be called under on_fault())
1195 */
1196
1197#if defined(lint)
1198
1199/* ARGSUSED */
1200void
1201copyout_noerr(const void *kfrom, void *uto, size_t count)
1202{}
1203
1204#else	/* lint */
1205
1206	ENTRY(copyout_noerr)
1207	sethi	%hi(.copyio_noerr), REAL_LOFAULT
1208	b	.do_copyout
1209	  or	REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
1210	SET_SIZE(copyout_noerr)
1211
1212#endif /* lint */
1213
1214#if defined(lint)
1215
1216int use_hw_bcopy = 1;
1217int use_hw_bzero = 1;
1218
1219#else /* !lint */
1220
1221	.align	4
1222	DGDEF(use_hw_bcopy)
1223	.word	1
1224	DGDEF(use_hw_bzero)
1225	.word	1
1226
1227	.align	64
1228	.section ".text"
1229#endif /* !lint */
1230
1231
1232/*
1233 * hwblkclr - clears block-aligned, block-multiple-sized regions that are
1234 * longer than 256 bytes in length using load/stores. For the generic
1235 * module, we will return 1 to ensure that the pages in cache should be
1236 * flushed to ensure integrity.
1237 * Caller is responsible for ensuring use_hw_bzero is true and that
1238 * kpreempt_disable() has been called.
1239 */
1240#ifdef lint
1241/*ARGSUSED*/
1242int
1243hwblkclr(void *addr, size_t len)
1244{
1245	return(0);
1246}
1247#else /* lint */
1248	! %i0 - start address
1249	! %i1 - length of region (multiple of 64)
1250
1251	ENTRY(hwblkclr)
1252	save	%sp, -SA(MINFRAME), %sp
1253
1254	! Must be block-aligned
1255	andcc	%i0, 0x3f, %g0
1256	bnz,pn	%ncc, 1f
1257	  nop
1258
1259	! ... and must be 256 bytes or more
1260	cmp	%i1, 0x100
1261	blu,pn	%ncc, 1f
1262	  nop
1263
1264	! ... and length must be a multiple of 64
1265	andcc	%i1, 0x3f, %g0
1266	bz,pn	%ncc, .pz_doblock
1267	nop
1268
12691:	! punt, call bzero but notify the caller that bzero was used
1270	mov	%i0, %o0
1271	call	bzero
1272	  mov	%i1, %o1
1273	ret
1274	restore	%g0, 1, %o0	! return (1) - did not use block operations
1275
1276	! Already verified that there are at least 256 bytes to set
1277.pz_doblock:
1278	stx	%g0, [%i0+0x0]
1279	stx	%g0, [%i0+0x40]
1280	stx	%g0, [%i0+0x80]
1281	stx	%g0, [%i0+0xc0]
1282
1283	stx	%g0, [%i0+0x8]
1284	stx	%g0, [%i0+0x10]
1285	stx	%g0, [%i0+0x18]
1286	stx	%g0, [%i0+0x20]
1287	stx	%g0, [%i0+0x28]
1288	stx	%g0, [%i0+0x30]
1289	stx	%g0, [%i0+0x38]
1290
1291	stx	%g0, [%i0+0x48]
1292	stx	%g0, [%i0+0x50]
1293	stx	%g0, [%i0+0x58]
1294	stx	%g0, [%i0+0x60]
1295	stx	%g0, [%i0+0x68]
1296	stx	%g0, [%i0+0x70]
1297	stx	%g0, [%i0+0x78]
1298
1299	stx	%g0, [%i0+0x88]
1300	stx	%g0, [%i0+0x90]
1301	stx	%g0, [%i0+0x98]
1302	stx	%g0, [%i0+0xa0]
1303	stx	%g0, [%i0+0xa8]
1304	stx	%g0, [%i0+0xb0]
1305	stx	%g0, [%i0+0xb8]
1306
1307	stx	%g0, [%i0+0xc8]
1308	stx	%g0, [%i0+0xd0]
1309	stx	%g0, [%i0+0xd8]
1310	stx	%g0, [%i0+0xe0]
1311	stx	%g0, [%i0+0xe8]
1312	stx	%g0, [%i0+0xf0]
1313	stx	%g0, [%i0+0xf8]
1314
1315	sub	%i1, 0x100, %i1
1316	cmp	%i1, 0x100
1317	bgu,pt	%ncc, .pz_doblock
1318	add	%i0, 0x100, %i0
1319
13202:
1321	! Check if more than 64 bytes to set
1322	cmp	%i1,0x40
1323	blu	%ncc, .pz_finish
1324	nop
1325
13263:
1327	stx	%g0, [%i0+0x0]
1328	stx	%g0, [%i0+0x8]
1329	stx	%g0, [%i0+0x10]
1330	stx	%g0, [%i0+0x18]
1331	stx	%g0, [%i0+0x20]
1332	stx	%g0, [%i0+0x28]
1333	stx	%g0, [%i0+0x30]
1334	stx	%g0, [%i0+0x38]
1335
1336	subcc	%i1, 0x40, %i1
1337	bgu,pt	%ncc, 3b
1338	add	%i0, 0x40, %i0
1339
1340.pz_finish:
1341	membar	#Sync
1342	ret
1343	restore	%g0, 1, %o0	! return (1) - did not use block operations
1344	SET_SIZE(hwblkclr)
1345#endif	/* lint */
1346
1347#ifdef	lint
1348/* Copy 32 bytes of data from src to dst using physical addresses */
1349/*ARGSUSED*/
1350void
1351hw_pa_bcopy32(uint64_t src, uint64_t dst)
1352{}
1353#else	/*!lint */
1354
1355	/*
1356	 * Copy 32 bytes of data from src (%o0) to dst (%o1)
1357	 * using physical addresses.
1358	 */
1359	ENTRY_NP(hw_pa_bcopy32)
1360	rdpr    %pstate, %g1
1361	andn    %g1, PSTATE_IE, %g2
1362	wrpr    %g0, %g2, %pstate
1363
1364	ldxa    [%o0]ASI_MEM, %o2
1365	add     %o0, 8, %o0
1366	ldxa    [%o0]ASI_MEM, %o3
1367	add     %o0, 8, %o0
1368	ldxa    [%o0]ASI_MEM, %o4
1369	add     %o0, 8, %o0
1370	ldxa    [%o0]ASI_MEM, %o5
1371	stxa    %o2, [%o1]ASI_MEM
1372	add     %o1, 8, %o1
1373	stxa    %o3, [%o1]ASI_MEM
1374	add     %o1, 8, %o1
1375	stxa    %o4, [%o1]ASI_MEM
1376	add     %o1, 8, %o1
1377	stxa    %o5, [%o1]ASI_MEM
1378
1379	membar	#Sync
1380	retl
1381	  wrpr    %g0, %g1, %pstate
1382	SET_SIZE(hw_pa_bcopy32)
1383#endif /* lint */
1384
1385/*
1386 * Zero a block of storage.
1387 *
1388 * uzero is used by the kernel to zero a block in user address space.
1389 */
1390
1391
1392#if defined(lint)
1393
1394/* ARGSUSED */
1395int
1396kzero(void *addr, size_t count)
1397{ return(0); }
1398
1399/* ARGSUSED */
1400void
1401uzero(void *addr, size_t count)
1402{}
1403
1404#else	/* lint */
1405
1406	ENTRY(uzero)
1407	!
1408	! Set a new lo_fault handler only if we came in with one
1409	! already specified.
1410	!
1411	wr	%g0, ASI_USER, %asi
1412	ldn	[THREAD_REG + T_LOFAULT], %o5
1413	tst	%o5
1414	bz,pt	%ncc, .do_zero
1415	sethi	%hi(.zeroerr), %o2
1416	or	%o2, %lo(.zeroerr), %o2
1417	membar	#Sync
1418	ba,pt	%ncc, .do_zero
1419	stn	%o2, [THREAD_REG + T_LOFAULT]
1420
1421	ENTRY(kzero)
1422	!
1423	! Always set a lo_fault handler
1424	!
1425	wr	%g0, ASI_P, %asi
1426	ldn	[THREAD_REG + T_LOFAULT], %o5
1427	sethi	%hi(.zeroerr), %o2
1428	or	%o5, LOFAULT_SET, %o5
1429	or	%o2, %lo(.zeroerr), %o2
1430	membar	#Sync
1431	ba,pt	%ncc, .do_zero
1432	stn	%o2, [THREAD_REG + T_LOFAULT]
1433
1434/*
1435 * We got here because of a fault during kzero or if
1436 * uzero or bzero was called with t_lofault non-zero.
1437 * Otherwise we've already run screaming from the room.
1438 * Errno value is in %g1. Note that we're here iff
1439 * we did set t_lofault.
1440 */
1441.zeroerr:
1442	!
1443	! Undo asi register setting. Just set it to be the
1444        ! kernel default without checking.
1445	!
1446	wr	%g0, ASI_P, %asi
1447
1448	!
1449	! We did set t_lofault. It may well have been zero coming in.
1450	!
14511:
1452	tst	%o5
1453	membar #Sync
1454	bne,pn	%ncc, 3f
1455	andncc	%o5, LOFAULT_SET, %o5
14562:
1457	!
1458	! Old handler was zero. Just return the error.
1459	!
1460	retl				! return
1461	mov	%g1, %o0		! error code from %g1
14623:
1463	!
1464	! We're here because %o5 was non-zero. It was non-zero
1465	! because either LOFAULT_SET was present, a previous fault
1466	! handler was present or both. In all cases we need to reset
1467	! T_LOFAULT to the value of %o5 after clearing LOFAULT_SET
1468	! before we either simply return the error or we invoke the
1469	! previously specified handler.
1470	!
1471	be	%ncc, 2b
1472	stn	%o5, [THREAD_REG + T_LOFAULT]
1473	jmp	%o5			! goto real handler
1474	  nop
1475	SET_SIZE(kzero)
1476	SET_SIZE(uzero)
1477
1478#endif	/* lint */
1479
1480/*
1481 * Zero a block of storage.
1482 */
1483
1484#if defined(lint)
1485
1486/* ARGSUSED */
1487void
1488bzero(void *addr, size_t count)
1489{}
1490
1491#else	/* lint */
1492
1493	ENTRY(bzero)
1494	wr	%g0, ASI_P, %asi
1495
1496	ldn	[THREAD_REG + T_LOFAULT], %o5	! save old vector
1497	tst	%o5
1498	bz,pt	%ncc, .do_zero
1499	sethi	%hi(.zeroerr), %o2
1500	or	%o2, %lo(.zeroerr), %o2
1501	membar	#Sync				! sync error barrier
1502	stn	%o2, [THREAD_REG + T_LOFAULT]	! install new vector
1503
1504.do_zero:
1505	cmp	%o1, 7
1506	blu,pn	%ncc, .byteclr
1507	nop
1508
1509	cmp	%o1, 15
1510	blu,pn	%ncc, .wdalign
1511	nop
1512
1513	andcc	%o0, 7, %o3		! is add aligned on a 8 byte bound
1514	bz,pt	%ncc, .blkalign		! already double aligned
1515	sub	%o3, 8, %o3		! -(bytes till double aligned)
1516	add	%o1, %o3, %o1		! update o1 with new count
1517
15181:
1519	stba	%g0, [%o0]%asi
1520	inccc	%o3
1521	bl,pt	%ncc, 1b
1522	inc	%o0
1523
1524	! Now address is double aligned
1525.blkalign:
1526	cmp	%o1, 0x80		! check if there are 128 bytes to set
1527	blu,pn	%ncc, .bzero_small
1528	mov	%o1, %o3
1529
1530	andcc	%o0, 0x3f, %o3		! is block aligned?
1531	bz,pt	%ncc, .bzero_blk
1532	sub	%o3, 0x40, %o3		! -(bytes till block aligned)
1533	add	%o1, %o3, %o1		! o1 is the remainder
1534
1535	! Clear -(%o3) bytes till block aligned
15361:
1537	stxa	%g0, [%o0]%asi
1538	addcc	%o3, 8, %o3
1539	bl,pt	%ncc, 1b
1540	add	%o0, 8, %o0
1541
1542.bzero_blk:
1543	and	%o1, 0x3f, %o3		! calc bytes left after blk clear
1544	andn	%o1, 0x3f, %o4		! calc size of blocks in bytes
1545
1546	cmp	%o4, 0x100		! 256 bytes or more
1547	blu,pn	%ncc, 3f
1548	nop
1549
15502:
1551	stxa	%g0, [%o0+0x0]%asi
1552	stxa	%g0, [%o0+0x40]%asi
1553	stxa	%g0, [%o0+0x80]%asi
1554	stxa	%g0, [%o0+0xc0]%asi
1555
1556	stxa	%g0, [%o0+0x8]%asi
1557	stxa	%g0, [%o0+0x10]%asi
1558	stxa	%g0, [%o0+0x18]%asi
1559	stxa	%g0, [%o0+0x20]%asi
1560	stxa	%g0, [%o0+0x28]%asi
1561	stxa	%g0, [%o0+0x30]%asi
1562	stxa	%g0, [%o0+0x38]%asi
1563
1564	stxa	%g0, [%o0+0x48]%asi
1565	stxa	%g0, [%o0+0x50]%asi
1566	stxa	%g0, [%o0+0x58]%asi
1567	stxa	%g0, [%o0+0x60]%asi
1568	stxa	%g0, [%o0+0x68]%asi
1569	stxa	%g0, [%o0+0x70]%asi
1570	stxa	%g0, [%o0+0x78]%asi
1571
1572	stxa	%g0, [%o0+0x88]%asi
1573	stxa	%g0, [%o0+0x90]%asi
1574	stxa	%g0, [%o0+0x98]%asi
1575	stxa	%g0, [%o0+0xa0]%asi
1576	stxa	%g0, [%o0+0xa8]%asi
1577	stxa	%g0, [%o0+0xb0]%asi
1578	stxa	%g0, [%o0+0xb8]%asi
1579
1580	stxa	%g0, [%o0+0xc8]%asi
1581	stxa	%g0, [%o0+0xd0]%asi
1582	stxa	%g0, [%o0+0xd8]%asi
1583	stxa	%g0, [%o0+0xe0]%asi
1584	stxa	%g0, [%o0+0xe8]%asi
1585	stxa	%g0, [%o0+0xf0]%asi
1586	stxa	%g0, [%o0+0xf8]%asi
1587
1588	sub	%o4, 0x100, %o4
1589	cmp	%o4, 0x100
1590	bgu,pt	%ncc, 2b
1591	add	%o0, 0x100, %o0
1592
15933:
1594	! ... check if 64 bytes to set
1595	cmp	%o4, 0x40
1596	blu	%ncc, .bzero_blk_done
1597	nop
1598
15994:
1600	stxa	%g0, [%o0+0x0]%asi
1601	stxa	%g0, [%o0+0x8]%asi
1602	stxa	%g0, [%o0+0x10]%asi
1603	stxa	%g0, [%o0+0x18]%asi
1604	stxa	%g0, [%o0+0x20]%asi
1605	stxa	%g0, [%o0+0x28]%asi
1606	stxa	%g0, [%o0+0x30]%asi
1607	stxa	%g0, [%o0+0x38]%asi
1608
1609	subcc	%o4, 0x40, %o4
1610	bgu,pt	%ncc, 3b
1611	add	%o0, 0x40, %o0
1612
1613.bzero_blk_done:
1614	membar	#Sync
1615
1616.bzero_small:
1617	! Set the remaining doubles
1618	subcc	%o3, 8, %o3		! Can we store any doubles?
1619	blu,pn	%ncc, .byteclr
1620	and	%o1, 7, %o1		! calc bytes left after doubles
1621
1622.dbclr:
1623	stxa	%g0, [%o0]%asi		! Clear the doubles
1624	subcc	%o3, 8, %o3
1625	bgeu,pt	%ncc, .dbclr
1626	add	%o0, 8, %o0
1627
1628	ba	.byteclr
1629	nop
1630
1631.wdalign:
1632	andcc	%o0, 3, %o3		! is add aligned on a word boundary
1633	bz,pn	%ncc, .wdclr
1634	andn	%o1, 3, %o3		! create word sized count in %o3
1635
1636	dec	%o1			! decrement count
1637	stba	%g0, [%o0]%asi		! clear a byte
1638	ba	.wdalign
1639	inc	%o0			! next byte
1640
1641.wdclr:
1642	sta	%g0, [%o0]%asi		! 4-byte clearing loop
1643	subcc	%o3, 4, %o3
1644	bnz,pt	%ncc, .wdclr
1645	inc	4, %o0
1646
1647	and	%o1, 3, %o1		! leftover count, if any
1648
1649.byteclr:
1650	! Set the leftover bytes
1651	brz	%o1, .bzero_exit
1652	nop
1653
16547:
1655	deccc	%o1			! byte clearing loop
1656	stba	%g0, [%o0]%asi
1657	bgu,pt	%ncc, 7b
1658	inc	%o0
1659
1660.bzero_exit:
1661	!
1662	! We're just concerned with whether t_lofault was set
1663	! when we came in. We end up here from either kzero()
1664	! or bzero(). kzero() *always* sets a lofault handler.
1665	! It ors LOFAULT_SET into %o5 to indicate it has done
1666	! this even if the value of %o5 is otherwise zero.
1667	! bzero() sets a lofault handler *only* if one was
1668	! previously set. Accordingly we need to examine
1669	! %o5 and if it is non-zero be sure to clear LOFAULT_SET
1670	! before resetting the error handler.
1671	!
1672	tst	%o5
1673	bz	%ncc, 1f
1674	andn	%o5, LOFAULT_SET, %o5
1675	membar	#Sync				! sync error barrier
1676	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
16771:
1678	retl
1679	clr	%o0			! return (0)
1680
1681	SET_SIZE(bzero)
1682#endif	/* lint */
1683