/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" #include #include #include #include #include #include #include #include #include #if !defined(lint) #include "assym.h" #endif /* lint */ /* * Less then or equal this number of bytes we will always copy byte-for-byte */ #define SMALL_LIMIT 7 /* * LOFAULT_SET : Flag set by kzero and kcopy to indicate that t_lofault * handler was set */ #define LOFAULT_SET 2 /* * Copy a block of storage, returning an error code if `from' or * `to' takes a kernel pagefault which cannot be resolved. * Returns errno value on pagefault error, 0 if all ok */ #if defined(lint) /* ARGSUSED */ int kcopy(const void *from, void *to, size_t count) { return(0); } #else /* lint */ .seg ".text" .align 4 ENTRY(kcopy) save %sp, -SA(MINFRAME), %sp set .copyerr, %l7 ! copyerr is lofault value ldn [THREAD_REG + T_LOFAULT], %o5 ! save existing handler or %o5, LOFAULT_SET, %o5 membar #Sync ! sync error barrier b .do_copy ! common code stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault /* * We got here because of a fault during kcopy. * Errno value is in %g1. */ .copyerr: ! The kcopy() *always* sets a t_lofault handler and it ORs LOFAULT_SET ! into %o5 to indicate it has set t_lofault handler. Need to clear ! LOFAULT_SET flag before restoring the error handler. andn %o5, LOFAULT_SET, %o5 membar #Sync ! sync error barrier stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault ret restore %g1, 0, %o0 SET_SIZE(kcopy) #endif /* lint */ /* * Copy a block of storage - must not overlap (from + len <= to). */ #if defined(lint) /* ARGSUSED */ void bcopy(const void *from, void *to, size_t count) {} #else /* lint */ ENTRY(bcopy) save %sp, -SA(MINFRAME), %sp clr %o5 ! flag LOFAULT_SET is not set for bcopy .do_copy: cmp %i2, 12 ! for small counts blu %ncc, .bytecp ! just copy bytes .empty ! ! use aligned transfers where possible ! xor %i0, %i1, %o4 ! xor from and to address btst 7, %o4 ! if lower three bits zero bz .aldoubcp ! can align on double boundary .empty ! assembler complaints about label xor %i0, %i1, %o4 ! xor from and to address btst 3, %o4 ! if lower two bits zero bz .alwordcp ! can align on word boundary btst 3, %i0 ! delay slot, from address unaligned? ! ! use aligned reads and writes where possible ! this differs from wordcp in that it copes ! with odd alignment between source and destnation ! using word reads and writes with the proper shifts ! in between to align transfers to and from memory ! i0 - src address, i1 - dest address, i2 - count ! i3, i4 - tmps for used generating complete word ! i5 (word to write) ! l0 size in bits of upper part of source word (US) ! l1 size in bits of lower part of source word (LS = 32 - US) ! l2 size in bits of upper part of destination word (UD) ! l3 size in bits of lower part of destination word (LD = 32 - UD) ! l4 number of bytes leftover after aligned transfers complete ! l5 the number 32 ! mov 32, %l5 ! load an oft-needed constant bz .align_dst_only btst 3, %i1 ! is destnation address aligned? clr %i4 ! clear registers used in either case bz .align_src_only clr %l0 ! ! both source and destination addresses are unaligned ! 1: ! align source ldub [%i0], %i3 ! read a byte from source address add %i0, 1, %i0 ! increment source address or %i4, %i3, %i4 ! or in with previous bytes (if any) btst 3, %i0 ! is source aligned? add %l0, 8, %l0 ! increment size of upper source (US) bnz,a 1b sll %i4, 8, %i4 ! make room for next byte sub %l5, %l0, %l1 ! generate shift left count (LS) sll %i4, %l1, %i4 ! prepare to get rest ld [%i0], %i3 ! read a word add %i0, 4, %i0 ! increment source address srl %i3, %l0, %i5 ! upper src bits into lower dst bits or %i4, %i5, %i5 ! merge mov 24, %l3 ! align destination 1: srl %i5, %l3, %i4 ! prepare to write a single byte stb %i4, [%i1] ! write a byte add %i1, 1, %i1 ! increment destination address sub %i2, 1, %i2 ! decrement count btst 3, %i1 ! is destination aligned? bnz,a 1b sub %l3, 8, %l3 ! delay slot, decrement shift count (LD) sub %l5, %l3, %l2 ! generate shift left count (UD) sll %i5, %l2, %i5 ! move leftover into upper bytes cmp %l2, %l0 ! cmp # reqd to fill dst w old src left bgu %ncc, .more_needed ! need more to fill than we have nop sll %i3, %l1, %i3 ! clear upper used byte(s) srl %i3, %l1, %i3 ! get the odd bytes between alignments sub %l0, %l2, %l0 ! regenerate shift count sub %l5, %l0, %l1 ! generate new shift left count (LS) and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0 andn %i2, 3, %i2 ! # of aligned bytes that can be moved srl %i3, %l0, %i4 or %i5, %i4, %i5 st %i5, [%i1] ! write a word subcc %i2, 4, %i2 ! decrement count bz %ncc, .unalign_out add %i1, 4, %i1 ! increment destination address b 2f sll %i3, %l1, %i5 ! get leftover into upper bits .more_needed: sll %i3, %l0, %i3 ! save remaining byte(s) srl %i3, %l0, %i3 sub %l2, %l0, %l1 ! regenerate shift count sub %l5, %l1, %l0 ! generate new shift left count sll %i3, %l1, %i4 ! move to fill empty space b 3f or %i5, %i4, %i5 ! merge to complete word ! ! the source address is aligned and destination is not ! .align_dst_only: ld [%i0], %i4 ! read a word add %i0, 4, %i0 ! increment source address mov 24, %l0 ! initial shift alignment count 1: srl %i4, %l0, %i3 ! prepare to write a single byte stb %i3, [%i1] ! write a byte add %i1, 1, %i1 ! increment destination address sub %i2, 1, %i2 ! decrement count btst 3, %i1 ! is destination aligned? bnz,a 1b sub %l0, 8, %l0 ! delay slot, decrement shift count .xfer: sub %l5, %l0, %l1 ! generate shift left count sll %i4, %l1, %i5 ! get leftover 3: and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0 andn %i2, 3, %i2 ! # of aligned bytes that can be moved 2: ld [%i0], %i3 ! read a source word add %i0, 4, %i0 ! increment source address srl %i3, %l0, %i4 ! upper src bits into lower dst bits or %i5, %i4, %i5 ! merge with upper dest bits (leftover) st %i5, [%i1] ! write a destination word subcc %i2, 4, %i2 ! decrement count bz %ncc, .unalign_out ! check if done add %i1, 4, %i1 ! increment destination address b 2b ! loop sll %i3, %l1, %i5 ! get leftover .unalign_out: tst %l4 ! any bytes leftover? bz %ncc, .cpdone .empty ! allow next instruction in delay slot 1: sub %l0, 8, %l0 ! decrement shift srl %i3, %l0, %i4 ! upper src byte into lower dst byte stb %i4, [%i1] ! write a byte subcc %l4, 1, %l4 ! decrement count bz %ncc, .cpdone ! done? add %i1, 1, %i1 ! increment destination tst %l0 ! any more previously read bytes bnz %ncc, 1b ! we have leftover bytes mov %l4, %i2 ! delay slot, mv cnt where dbytecp wants b .dbytecp ! let dbytecp do the rest sub %i0, %i1, %i0 ! i0 gets the difference of src and dst ! ! the destination address is aligned and the source is not ! .align_src_only: ldub [%i0], %i3 ! read a byte from source address add %i0, 1, %i0 ! increment source address or %i4, %i3, %i4 ! or in with previous bytes (if any) btst 3, %i0 ! is source aligned? add %l0, 8, %l0 ! increment shift count (US) bnz,a .align_src_only sll %i4, 8, %i4 ! make room for next byte b,a .xfer ! ! if from address unaligned for double-word moves, ! move bytes till it is, if count is < 56 it could take ! longer to align the thing than to do the transfer ! in word size chunks right away ! .aldoubcp: cmp %i2, 56 ! if count < 56, use wordcp, it takes blu,a %ncc, .alwordcp ! longer to align doubles than words mov 3, %o0 ! mask for word alignment call .alignit ! copy bytes until aligned mov 7, %o0 ! mask for double alignment ! ! source and destination are now double-word aligned ! i3 has aligned count returned by alignit ! and %i2, 7, %i2 ! unaligned leftover count sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 5: ldx [%i0+%i1], %o4 ! read from address stx %o4, [%i1] ! write at destination address subcc %i3, 8, %i3 ! dec count bgu %ncc, 5b add %i1, 8, %i1 ! delay slot, inc to address cmp %i2, 4 ! see if we can copy a word blu %ncc, .dbytecp ! if 3 or less bytes use bytecp .empty ! ! for leftover bytes we fall into wordcp, if needed ! .wordcp: and %i2, 3, %i2 ! unaligned leftover count 5: ld [%i0+%i1], %o4 ! read from address st %o4, [%i1] ! write at destination address subcc %i3, 4, %i3 ! dec count bgu %ncc, 5b add %i1, 4, %i1 ! delay slot, inc to address b,a .dbytecp ! we come here to align copies on word boundaries .alwordcp: call .alignit ! go word-align it mov 3, %o0 ! bits that must be zero to be aligned b .wordcp sub %i0, %i1, %i0 ! i0 gets the difference of src and dst ! ! byte copy, works with any alignment ! .bytecp: b .dbytecp sub %i0, %i1, %i0 ! i0 gets difference of src and dst ! ! differenced byte copy, works with any alignment ! assumes dest in %i1 and (source - dest) in %i0 ! 1: stb %o4, [%i1] ! write to address inc %i1 ! inc to address .dbytecp: deccc %i2 ! dec count bgeu,a %ncc, 1b ! loop till done ldub [%i0+%i1], %o4 ! read from address .cpdone: membar #Sync ! sync error barrier ! Restore t_lofault handler, if came here from kcopy(). tst %o5 bz %ncc, 1f andn %o5, LOFAULT_SET, %o5 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1: ret restore %g0, 0, %o0 ! return (0) /* * Common code used to align transfers on word and doubleword * boudaries. Aligns source and destination and returns a count * of aligned bytes to transfer in %i3 */ 1: inc %i0 ! inc from stb %o4, [%i1] ! write a byte inc %i1 ! inc to dec %i2 ! dec count .alignit: btst %o0, %i0 ! %o0 is bit mask to check for alignment bnz,a 1b ldub [%i0], %o4 ! read next byte retl andn %i2, %o0, %i3 ! return size of aligned bytes SET_SIZE(bcopy) #endif /* lint */ /* * Block copy with possibly overlapped operands. */ #if defined(lint) /*ARGSUSED*/ void ovbcopy(const void *from, void *to, size_t count) {} #else /* lint */ ENTRY(ovbcopy) tst %o2 ! check count bgu,a %ncc, 1f ! nothing to do or bad arguments subcc %o0, %o1, %o3 ! difference of from and to address retl ! return nop 1: bneg,a %ncc, 2f neg %o3 ! if < 0, make it positive 2: cmp %o2, %o3 ! cmp size and abs(from - to) bleu %ncc, bcopy ! if size <= abs(diff): use bcopy, .empty ! no overlap cmp %o0, %o1 ! compare from and to addresses blu %ncc, .ov_bkwd ! if from < to, copy backwards nop ! ! Copy forwards. ! .ov_fwd: ldub [%o0], %o3 ! read from address inc %o0 ! inc from address stb %o3, [%o1] ! write to address deccc %o2 ! dec count bgu %ncc, .ov_fwd ! loop till done inc %o1 ! inc to address retl ! return nop ! ! Copy backwards. ! .ov_bkwd: deccc %o2 ! dec count ldub [%o0 + %o2], %o3 ! get byte at end of src bgu %ncc, .ov_bkwd ! loop till done stb %o3, [%o1 + %o2] ! delay slot, store at end of dst retl ! return nop SET_SIZE(ovbcopy) #endif /* lint */ /* * hwblkpagecopy() * * Copies exactly one page. This routine assumes the caller (ppcopy) * has already disabled kernel preemption and has checked * use_hw_bcopy. */ #ifdef lint /*ARGSUSED*/ void hwblkpagecopy(const void *src, void *dst) { } #else /* lint */ ENTRY(hwblkpagecopy) save %sp, -SA(MINFRAME), %sp ! %i0 - source address (arg) ! %i1 - destination address (arg) ! %i2 - length of region (not arg) set PAGESIZE, %i2 /* * Copying exactly one page and PAGESIZE is in mutliple of 0x80. */ 1: ldx [%i0+0x0], %l0 ldx [%i0+0x8], %l1 ldx [%i0+0x10], %l2 ldx [%i0+0x18], %l3 ldx [%i0+0x20], %l4 ldx [%i0+0x28], %l5 ldx [%i0+0x30], %l6 ldx [%i0+0x38], %l7 stx %l0, [%i1+0x0] stx %l1, [%i1+0x8] stx %l2, [%i1+0x10] stx %l3, [%i1+0x18] stx %l4, [%i1+0x20] stx %l5, [%i1+0x28] stx %l6, [%i1+0x30] stx %l7, [%i1+0x38] ldx [%i0+0x40], %l0 ldx [%i0+0x48], %l1 ldx [%i0+0x50], %l2 ldx [%i0+0x58], %l3 ldx [%i0+0x60], %l4 ldx [%i0+0x68], %l5 ldx [%i0+0x70], %l6 ldx [%i0+0x78], %l7 stx %l0, [%i1+0x40] stx %l1, [%i1+0x48] stx %l2, [%i1+0x50] stx %l3, [%i1+0x58] stx %l4, [%i1+0x60] stx %l5, [%i1+0x68] stx %l6, [%i1+0x70] stx %l7, [%i1+0x78] add %i0, 0x80, %i0 subcc %i2, 0x80, %i2 bgu,pt %xcc, 1b add %i1, 0x80, %i1 membar #Sync ret restore %g0, 0, %o0 SET_SIZE(hwblkpagecopy) #endif /* lint */ /* * Transfer data to and from user space - * Note that these routines can cause faults * It is assumed that the kernel has nothing at * less than KERNELBASE in the virtual address space. * * Note that copyin(9F) and copyout(9F) are part of the * DDI/DKI which specifies that they return '-1' on "errors." * * Sigh. * * So there's two extremely similar routines - xcopyin() and xcopyout() * which return the errno that we've faithfully computed. This * allows other callers (e.g. uiomove(9F)) to work correctly. * Given that these are used pretty heavily, we expand the calling * sequences inline for all flavours (rather than making wrappers). * * There are also stub routines for xcopyout_little and xcopyin_little, * which currently are intended to handle requests of <= 16 bytes from * do_unaligned. Future enhancement to make them handle 8k pages efficiently * is left as an exercise... */ /* * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr) * * General theory of operation: * * None of the copyops routines grab a window. * * Flow: * * If count == zero return zero. * * Store the previous lo_fault handler into %g6. * Place our secondary lofault handler into %g5. * Place the address of our fault handler into %o3. * * If count is less than or equal to SMALL_LIMIT (7) we * always do a byte for byte copy. * * If count is > SMALL_LIMIT, we check the alignment of the input * and output pointers. We store -count in %o3, we store the number * of chunks (8, 4, 2 or 1 byte) operated on in our basic copy loop * in %o2. Following this we branch to the appropriate copy loop and * copy that many chunks. Since we've been adding the chunk size * to %o3 each time through as well as decrementing %o2, we can tell * if any data is is left to be copied by examining %o3. If that is * zero, we're done and can go home. If not, we figure out what the * largest chunk size left to be copied is and branch to that copy * loop unless there's only one byte left. We load that as we're * branching to code that stores it just before we return. * * Fault handlers are invoked if we reference memory that has no * current mapping. All forms share the same copyio_fault handler. * This routine handles fixing up the stack and general housecleaning. * Each copy operation has a simple fault handler that is then called * to do the work specific to the invidual operation. The handler * for copyOP and xcopyOP are found at the end of individual function. * The handlers for xcopyOP_little are found at the end of xcopyin_little. * The handlers for copyOP_noerr are found at the end of copyin_noerr. */ /* * Copy kernel data to user space (copyout/xcopyout/xcopyout_little). */ #if defined(lint) /*ARGSUSED*/ int copyout(const void *kaddr, void *uaddr, size_t count) { return (0); } #else /* lint */ /* * We save the arguments in the following registers in case of a fault: * kaddr - %g2 * uaddr - %g3 * count - %g4 */ #define SAVE_SRC %g2 #define SAVE_DST %g3 #define SAVE_COUNT %g4 #define REAL_LOFAULT %g5 #define SAVED_LOFAULT %g6 /* * Generic copyio fault handler. This is the first line of defense when a * fault occurs in (x)copyin/(x)copyout. In order for this to function * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT. * This allows us to share common code for all the flavors of the copy * operations, including the _noerr versions. * * Note that this function will restore the original input parameters before * calling REAL_LOFAULT. So the real handler can vector to the appropriate * member of the t_copyop structure, if needed. */ ENTRY(copyio_fault) membar #Sync stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault mov SAVE_SRC, %o0 mov SAVE_DST, %o1 jmp REAL_LOFAULT mov SAVE_COUNT, %o2 SET_SIZE(copyio_fault) ENTRY(copyout) sethi %hi(.copyout_err), REAL_LOFAULT or REAL_LOFAULT, %lo(.copyout_err), REAL_LOFAULT .do_copyout: ! ! Check the length and bail if zero. ! tst %o2 bnz,pt %ncc, 1f nop retl clr %o0 1: sethi %hi(copyio_fault), %o3 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT or %o3, %lo(copyio_fault), %o3 membar #Sync stn %o3, [THREAD_REG + T_LOFAULT] mov %o0, SAVE_SRC mov %o1, SAVE_DST mov %o2, SAVE_COUNT ! ! Check to see if we're more than SMALL_LIMIT (7 bytes). ! Run in leaf mode, using the %o regs as our input regs. ! subcc %o2, SMALL_LIMIT, %o3 bgu,a,pt %ncc, .dco_ns or %o0, %o1, %o3 .dcobcp: sub %g0, %o2, %o3 ! negate count add %o0, %o2, %o0 ! make %o0 point at the end add %o1, %o2, %o1 ! make %o1 point at the end ba,pt %ncc, .dcocl ldub [%o0 + %o3], %o4 ! load first byte ! ! %o0 and %o2 point at the end and remain pointing at the end ! of their buffers. We pull things out by adding %o3 (which is ! the negation of the length) to the buffer end which gives us ! the curent location in the buffers. By incrementing %o3 we walk ! through both buffers without having to bump each buffer's ! pointer. A very fast 4 instruction loop. ! .align 16 .dcocl: stba %o4, [%o1 + %o3]ASI_USER inccc %o3 bl,a,pt %ncc, .dcocl ldub [%o0 + %o3], %o4 ! ! We're done. Go home. ! membar #Sync stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] retl clr %o0 ! ! Try aligned copies from here. ! .dco_ns: ! %o0 = kernel addr (to be copied from) ! %o1 = user addr (to be copied to) ! %o2 = length ! %o3 = %o1 | %o2 (used for alignment checking) ! %o4 is alternate lo_fault ! %o5 is original lo_fault ! ! See if we're single byte aligned. If we are, check the ! limit for single byte copies. If we're smaller or equal, ! bounce to the byte for byte copy loop. Otherwise do it in ! HW (if enabled). ! btst 1, %o3 bz,pt %icc, .dcoh8 btst 7, %o3 ba .dcobcp nop .dcoh8: ! ! 8 byte aligned? ! bnz,a %ncc, .dcoh4 btst 3, %o3 .dcos8: ! ! Housekeeping for copy loops. Uses same idea as in the byte for ! byte copy loop above. ! add %o0, %o2, %o0 add %o1, %o2, %o1 sub %g0, %o2, %o3 ba,pt %ncc, .dodebc srl %o2, 3, %o2 ! Number of 8 byte chunks to copy ! ! 4 byte aligned? ! .dcoh4: bnz,pn %ncc, .dcoh2 nop .dcos4: add %o0, %o2, %o0 add %o1, %o2, %o1 sub %g0, %o2, %o3 ba,pt %ncc, .dodfbc srl %o2, 2, %o2 ! Number of 4 byte chunks to copy ! ! We must be 2 byte aligned. Off we go. ! The check for small copies was done in the ! delay at .dcoh4 ! .dcoh2: .dcos2: add %o0, %o2, %o0 add %o1, %o2, %o1 sub %g0, %o2, %o3 ba,pt %ncc, .dodtbc srl %o2, 1, %o2 ! Number of 2 byte chunks to copy .dodebc: ldx [%o0 + %o3], %o4 deccc %o2 stxa %o4, [%o1 + %o3]ASI_USER bg,pt %ncc, .dodebc addcc %o3, 8, %o3 ! ! End of copy loop. Check to see if we're done. Most ! eight byte aligned copies end here. ! bz,pt %ncc, .dcofh nop ! ! Something is left - do it byte for byte. ! ba,pt %ncc, .dcocl ldub [%o0 + %o3], %o4 ! load next byte ! ! Four byte copy loop. %o2 is the number of 4 byte chunks to copy. ! .align 32 .dodfbc: lduw [%o0 + %o3], %o4 deccc %o2 sta %o4, [%o1 + %o3]ASI_USER bg,pt %ncc, .dodfbc addcc %o3, 4, %o3 ! ! End of copy loop. Check to see if we're done. Most ! four byte aligned copies end here. ! bz,pt %ncc, .dcofh nop ! ! Something is left. Do it byte for byte. ! ba,pt %ncc, .dcocl ldub [%o0 + %o3], %o4 ! load next byte ! ! two byte aligned copy loop. %o2 is the number of 2 byte chunks to ! copy. ! .align 32 .dodtbc: lduh [%o0 + %o3], %o4 deccc %o2 stha %o4, [%o1 + %o3]ASI_USER bg,pt %ncc, .dodtbc addcc %o3, 2, %o3 ! ! End of copy loop. Anything left? ! bz,pt %ncc, .dcofh nop ! ! Deal with the last byte ! ldub [%o0 + %o3], %o4 stba %o4, [%o1 + %o3]ASI_USER .dcofh: membar #Sync stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault retl clr %o0 .copyout_err: ldn [THREAD_REG + T_COPYOPS], %o4 brz %o4, 2f nop ldn [%o4 + CP_COPYOUT], %g2 jmp %g2 nop 2: retl mov -1, %o0 SET_SIZE(copyout) #endif /* lint */ #ifdef lint /*ARGSUSED*/ int xcopyout(const void *kaddr, void *uaddr, size_t count) { return (0); } #else /* lint */ ENTRY(xcopyout) sethi %hi(.xcopyout_err), REAL_LOFAULT b .do_copyout or REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT .xcopyout_err: ldn [THREAD_REG + T_COPYOPS], %o4 brz %o4, 2f nop ldn [%o4 + CP_XCOPYOUT], %g2 jmp %g2 nop 2: retl mov %g1, %o0 SET_SIZE(xcopyout) #endif /* lint */ #ifdef lint /*ARGSUSED*/ int xcopyout_little(const void *kaddr, void *uaddr, size_t count) { return (0); } #else /* lint */ ENTRY(xcopyout_little) sethi %hi(.little_err), %o4 ldn [THREAD_REG + T_LOFAULT], %o5 or %o4, %lo(.little_err), %o4 membar #Sync ! sync error barrier stn %o4, [THREAD_REG + T_LOFAULT] subcc %g0, %o2, %o3 add %o0, %o2, %o0 bz,pn %ncc, 2f ! check for zero bytes sub %o2, 1, %o4 add %o0, %o4, %o0 ! start w/last byte add %o1, %o2, %o1 ldub [%o0+%o3], %o4 1: stba %o4, [%o1+%o3]ASI_AIUSL inccc %o3 sub %o0, 2, %o0 ! get next byte bcc,a,pt %ncc, 1b ldub [%o0+%o3], %o4 2: membar #Sync ! sync error barrier stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault retl mov %g0, %o0 ! return (0) SET_SIZE(xcopyout_little) #endif /* lint */ /* * Copy user data to kernel space (copyin/xcopyin/xcopyin_little) */ #if defined(lint) /*ARGSUSED*/ int copyin(const void *uaddr, void *kaddr, size_t count) { return (0); } #else /* lint */ ENTRY(copyin) sethi %hi(.copyin_err), REAL_LOFAULT or REAL_LOFAULT, %lo(.copyin_err), REAL_LOFAULT .do_copyin: ! ! Check the length and bail if zero. ! tst %o2 bnz,pt %ncc, 1f nop retl clr %o0 1: sethi %hi(copyio_fault), %o3 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT or %o3, %lo(copyio_fault), %o3 membar #Sync stn %o3, [THREAD_REG + T_LOFAULT] mov %o0, SAVE_SRC mov %o1, SAVE_DST mov %o2, SAVE_COUNT ! ! Check to see if we're more than SMALL_LIMIT. ! subcc %o2, SMALL_LIMIT, %o3 bgu,a,pt %ncc, .dci_ns or %o0, %o1, %o3 .dcibcp: sub %g0, %o2, %o3 ! setup for copy loop add %o0, %o2, %o0 add %o1, %o2, %o1 ba,pt %ncc, .dcicl lduba [%o0 + %o3]ASI_USER, %o4 ! ! %o0 and %o1 point at the end and remain pointing at the end ! of their buffers. We pull things out by adding %o3 (which is ! the negation of the length) to the buffer end which gives us ! the curent location in the buffers. By incrementing %o3 we walk ! through both buffers without having to bump each buffer's ! pointer. A very fast 4 instruction loop. ! .align 16 .dcicl: stb %o4, [%o1 + %o3] inccc %o3 bl,a,pt %ncc, .dcicl lduba [%o0 + %o3]ASI_USER, %o4 ! ! We're done. Go home. ! membar #Sync stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] retl clr %o0 ! ! Try aligned copies from here. ! .dci_ns: ! ! See if we're single byte aligned. If we are, check the ! limit for single byte copies. If we're smaller, or equal, ! bounce to the byte for byte copy loop. Otherwise do it in ! HW (if enabled). ! btst 1, %o3 bz,a,pt %icc, .dcih8 btst 7, %o3 ba .dcibcp nop .dcih8: ! ! 8 byte aligned? ! bnz,a %ncc, .dcih4 btst 3, %o3 .dcis8: ! ! Housekeeping for copy loops. Uses same idea as in the byte for ! byte copy loop above. ! add %o0, %o2, %o0 add %o1, %o2, %o1 sub %g0, %o2, %o3 ba,pt %ncc, .didebc srl %o2, 3, %o2 ! Number of 8 byte chunks to copy ! ! 4 byte aligned? ! .dcih4: bnz %ncc, .dcih2 nop .dcis4: ! ! Housekeeping for copy loops. Uses same idea as in the byte ! for byte copy loop above. ! add %o0, %o2, %o0 add %o1, %o2, %o1 sub %g0, %o2, %o3 ba,pt %ncc, .didfbc srl %o2, 2, %o2 ! Number of 4 byte chunks to copy .dcih2: .dcis2: add %o0, %o2, %o0 add %o1, %o2, %o1 sub %g0, %o2, %o3 ba,pt %ncc, .didtbc srl %o2, 1, %o2 ! Number of 2 byte chunks to copy .didebc: ldxa [%o0 + %o3]ASI_USER, %o4 deccc %o2 stx %o4, [%o1 + %o3] bg,pt %ncc, .didebc addcc %o3, 8, %o3 ! ! End of copy loop. Most 8 byte aligned copies end here. ! bz,pt %ncc, .dcifh nop ! ! Something is left. Do it byte for byte. ! ba,pt %ncc, .dcicl lduba [%o0 + %o3]ASI_USER, %o4 ! ! 4 byte copy loop. %o2 is number of 4 byte chunks to copy. ! .align 32 .didfbc: lduwa [%o0 + %o3]ASI_USER, %o4 deccc %o2 st %o4, [%o1 + %o3] bg,pt %ncc, .didfbc addcc %o3, 4, %o3 ! ! End of copy loop. Most 4 byte aligned copies end here. ! bz,pt %ncc, .dcifh nop ! ! Something is left. Do it byte for byte. ! ba,pt %ncc, .dcicl lduba [%o0 + %o3]ASI_USER, %o4 ! ! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to ! copy. ! .align 32 .didtbc: lduha [%o0 + %o3]ASI_USER, %o4 deccc %o2 sth %o4, [%o1 + %o3] bg,pt %ncc, .didtbc addcc %o3, 2, %o3 ! ! End of copy loop. Most 2 byte aligned copies end here. ! bz,pt %ncc, .dcifh nop ! ! Deal with the last byte ! lduba [%o0 + %o3]ASI_USER, %o4 stb %o4, [%o1 + %o3] .dcifh: membar #Sync stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault retl clr %o0 .copyin_err: ldn [THREAD_REG + T_COPYOPS], %o4 brz %o4, 2f nop ldn [%o4 + CP_COPYIN], %g2 jmp %g2 nop 2: retl mov -1, %o0 SET_SIZE(copyin) #endif /* lint */ #ifdef lint /*ARGSUSED*/ int xcopyin(const void *uaddr, void *kaddr, size_t count) { return (0); } #else /* lint */ ENTRY(xcopyin) sethi %hi(.xcopyin_err), REAL_LOFAULT b .do_copyin or REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT .xcopyin_err: ldn [THREAD_REG + T_COPYOPS], %o4 brz %o4, 2f nop ldn [%o4 + CP_XCOPYIN], %g2 jmp %g2 nop 2: retl mov %g1, %o0 SET_SIZE(xcopyin) #endif /* lint */ #ifdef lint /*ARGSUSED*/ int xcopyin_little(const void *uaddr, void *kaddr, size_t count) { return (0); } #else /* lint */ ENTRY(xcopyin_little) sethi %hi(.little_err), %o4 ldn [THREAD_REG + T_LOFAULT], %o5 or %o4, %lo(.little_err), %o4 membar #Sync ! sync error barrier stn %o4, [THREAD_REG + T_LOFAULT] subcc %g0, %o2, %o3 add %o0, %o2, %o0 bz,pn %ncc, 2f ! check for zero bytes sub %o2, 1, %o4 add %o0, %o4, %o0 ! start w/last byte add %o1, %o2, %o1 lduba [%o0+%o3]ASI_AIUSL, %o4 1: stb %o4, [%o1+%o3] inccc %o3 sub %o0, 2, %o0 ! get next byte bcc,a,pt %ncc, 1b lduba [%o0+%o3]ASI_AIUSL, %o4 2: membar #Sync ! sync error barrier stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault retl mov %g0, %o0 ! return (0) .little_err: membar #Sync ! sync error barrier stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault retl mov %g1, %o0 SET_SIZE(xcopyin_little) #endif /* lint */ /* * Copy a block of storage - must not overlap (from + len <= to). * No fault handler installed (to be called under on_fault()) */ #if defined(lint) /* ARGSUSED */ void copyin_noerr(const void *ufrom, void *kto, size_t count) {} #else /* lint */ ENTRY(copyin_noerr) sethi %hi(.copyio_noerr), REAL_LOFAULT b .do_copyin or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT .copyio_noerr: jmp SAVED_LOFAULT nop SET_SIZE(copyin_noerr) #endif /* lint */ /* * Copy a block of storage - must not overlap (from + len <= to). * No fault handler installed (to be called under on_fault()) */ #if defined(lint) /* ARGSUSED */ void copyout_noerr(const void *kfrom, void *uto, size_t count) {} #else /* lint */ ENTRY(copyout_noerr) sethi %hi(.copyio_noerr), REAL_LOFAULT b .do_copyout or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT SET_SIZE(copyout_noerr) #endif /* lint */ #if defined(lint) int use_hw_bcopy = 1; int use_hw_bzero = 1; #else /* !lint */ .align 4 DGDEF(use_hw_bcopy) .word 1 DGDEF(use_hw_bzero) .word 1 .align 64 .section ".text" #endif /* !lint */ /* * hwblkclr - clears block-aligned, block-multiple-sized regions that are * longer than 256 bytes in length using load/stores. If * the criteria for using this routine are not met then it calls bzero * and returns 1. Otherwise 0 is returned indicating success. * Caller is responsible for ensuring use_hw_bzero is true and that * kpreempt_disable() has been called. */ #ifdef lint /*ARGSUSED*/ int hwblkclr(void *addr, size_t len) { return(0); } #else /* lint */ ! %i0 - start address ! %i1 - length of region (multiple of 64) ENTRY(hwblkclr) save %sp, -SA(MINFRAME), %sp ! Must be block-aligned andcc %i0, 0x3f, %g0 bnz,pn %ncc, 1f nop ! ... and must be 256 bytes or more cmp %i1, 0x100 blu,pn %ncc, 1f nop ! ... and length must be a multiple of 64 andcc %i1, 0x3f, %g0 bz,pn %ncc, .pz_doblock nop 1: ! punt, call bzero but notify the caller that bzero was used mov %i0, %o0 call bzero mov %i1, %o1 ret restore %g0, 1, %o0 ! return (1) - did not use block operations ! Already verified that there are at least 256 bytes to set .pz_doblock: stx %g0, [%i0+0x0] stx %g0, [%i0+0x40] stx %g0, [%i0+0x80] stx %g0, [%i0+0xc0] stx %g0, [%i0+0x8] stx %g0, [%i0+0x10] stx %g0, [%i0+0x18] stx %g0, [%i0+0x20] stx %g0, [%i0+0x28] stx %g0, [%i0+0x30] stx %g0, [%i0+0x38] stx %g0, [%i0+0x48] stx %g0, [%i0+0x50] stx %g0, [%i0+0x58] stx %g0, [%i0+0x60] stx %g0, [%i0+0x68] stx %g0, [%i0+0x70] stx %g0, [%i0+0x78] stx %g0, [%i0+0x88] stx %g0, [%i0+0x90] stx %g0, [%i0+0x98] stx %g0, [%i0+0xa0] stx %g0, [%i0+0xa8] stx %g0, [%i0+0xb0] stx %g0, [%i0+0xb8] stx %g0, [%i0+0xc8] stx %g0, [%i0+0xd0] stx %g0, [%i0+0xd8] stx %g0, [%i0+0xe0] stx %g0, [%i0+0xe8] stx %g0, [%i0+0xf0] stx %g0, [%i0+0xf8] sub %i1, 0x100, %i1 cmp %i1, 0x100 bgu,pt %ncc, .pz_doblock add %i0, 0x100, %i0 2: ! Check if more than 64 bytes to set cmp %i1,0x40 blu %ncc, .pz_finish nop 3: stx %g0, [%i0+0x0] stx %g0, [%i0+0x8] stx %g0, [%i0+0x10] stx %g0, [%i0+0x18] stx %g0, [%i0+0x20] stx %g0, [%i0+0x28] stx %g0, [%i0+0x30] stx %g0, [%i0+0x38] subcc %i1, 0x40, %i1 bgu,pt %ncc, 3b add %i0, 0x40, %i0 .pz_finish: membar #Sync ret restore %g0, 0, %o0 ! return (bzero or not) SET_SIZE(hwblkclr) #endif /* lint */ #ifdef lint /* Copy 32 bytes of data from src to dst using physical addresses */ /*ARGSUSED*/ void hw_pa_bcopy32(uint64_t src, uint64_t dst) {} #else /*!lint */ /* * Copy 32 bytes of data from src (%o0) to dst (%o1) * using physical addresses. */ ENTRY_NP(hw_pa_bcopy32) rdpr %pstate, %g1 andn %g1, PSTATE_IE, %g2 wrpr %g0, %g2, %pstate ldxa [%o0]ASI_MEM, %o2 add %o0, 8, %o0 ldxa [%o0]ASI_MEM, %o3 add %o0, 8, %o0 ldxa [%o0]ASI_MEM, %o4 add %o0, 8, %o0 ldxa [%o0]ASI_MEM, %o5 stxa %o2, [%o1]ASI_MEM add %o1, 8, %o1 stxa %o3, [%o1]ASI_MEM add %o1, 8, %o1 stxa %o4, [%o1]ASI_MEM add %o1, 8, %o1 stxa %o5, [%o1]ASI_MEM membar #Sync retl wrpr %g0, %g1, %pstate SET_SIZE(hw_pa_bcopy32) #endif /* lint */ /* * Zero a block of storage. * * uzero is used by the kernel to zero a block in user address space. */ #if defined(lint) /* ARGSUSED */ int kzero(void *addr, size_t count) { return(0); } /* ARGSUSED */ void uzero(void *addr, size_t count) {} #else /* lint */ ENTRY(uzero) ! ! Set a new lo_fault handler only if we came in with one ! already specified. ! wr %g0, ASI_USER, %asi ldn [THREAD_REG + T_LOFAULT], %o5 tst %o5 bz,pt %ncc, .do_zero sethi %hi(.zeroerr), %o2 or %o2, %lo(.zeroerr), %o2 membar #Sync ba,pt %ncc, .do_zero stn %o2, [THREAD_REG + T_LOFAULT] ENTRY(kzero) ! ! Always set a lo_fault handler ! wr %g0, ASI_P, %asi ldn [THREAD_REG + T_LOFAULT], %o5 sethi %hi(.zeroerr), %o2 or %o5, LOFAULT_SET, %o5 or %o2, %lo(.zeroerr), %o2 membar #Sync ba,pt %ncc, .do_zero stn %o2, [THREAD_REG + T_LOFAULT] /* * We got here because of a fault during kzero or if * uzero or bzero was called with t_lofault non-zero. * Otherwise we've already run screaming from the room. * Errno value is in %g1. Note that we're here iff * we did set t_lofault. */ .zeroerr: ! ! Undo asi register setting. Just set it to be the ! kernel default without checking. ! wr %g0, ASI_P, %asi ! ! We did set t_lofault. It may well have been zero coming in. ! 1: tst %o5 membar #Sync bne,pn %ncc, 3f andncc %o5, LOFAULT_SET, %o5 2: ! ! Old handler was zero. Just return the error. ! retl ! return mov %g1, %o0 ! error code from %g1 3: ! ! We're here because %o5 was non-zero. It was non-zero ! because either LOFAULT_SET was present, a previous fault ! handler was present or both. In all cases we need to reset ! T_LOFAULT to the value of %o5 after clearing LOFAULT_SET ! before we either simply return the error or we invoke the ! previously specified handler. ! be %ncc, 2b stn %o5, [THREAD_REG + T_LOFAULT] jmp %o5 ! goto real handler nop SET_SIZE(kzero) SET_SIZE(uzero) #endif /* lint */ /* * Zero a block of storage. */ #if defined(lint) /* ARGSUSED */ void bzero(void *addr, size_t count) {} #else /* lint */ ENTRY(bzero) wr %g0, ASI_P, %asi ldn [THREAD_REG + T_LOFAULT], %o5 ! save old vector tst %o5 bz,pt %ncc, .do_zero sethi %hi(.zeroerr), %o2 or %o2, %lo(.zeroerr), %o2 membar #Sync ! sync error barrier stn %o2, [THREAD_REG + T_LOFAULT] ! install new vector .do_zero: cmp %o1, 7 blu,pn %ncc, .byteclr nop cmp %o1, 15 blu,pn %ncc, .wdalign nop andcc %o0, 7, %o3 ! is add aligned on a 8 byte bound bz,pt %ncc, .blkalign ! already double aligned sub %o3, 8, %o3 ! -(bytes till double aligned) add %o1, %o3, %o1 ! update o1 with new count 1: stba %g0, [%o0]%asi inccc %o3 bl,pt %ncc, 1b inc %o0 ! Now address is double aligned .blkalign: cmp %o1, 0x80 ! check if there are 128 bytes to set blu,pn %ncc, .bzero_small mov %o1, %o3 andcc %o0, 0x3f, %o3 ! is block aligned? bz,pt %ncc, .bzero_blk sub %o3, 0x40, %o3 ! -(bytes till block aligned) add %o1, %o3, %o1 ! o1 is the remainder ! Clear -(%o3) bytes till block aligned 1: stxa %g0, [%o0]%asi addcc %o3, 8, %o3 bl,pt %ncc, 1b add %o0, 8, %o0 .bzero_blk: and %o1, 0x3f, %o3 ! calc bytes left after blk clear andn %o1, 0x3f, %o4 ! calc size of blocks in bytes cmp %o4, 0x100 ! 256 bytes or more blu,pn %ncc, 3f nop 2: stxa %g0, [%o0+0x0]%asi stxa %g0, [%o0+0x40]%asi stxa %g0, [%o0+0x80]%asi stxa %g0, [%o0+0xc0]%asi stxa %g0, [%o0+0x8]%asi stxa %g0, [%o0+0x10]%asi stxa %g0, [%o0+0x18]%asi stxa %g0, [%o0+0x20]%asi stxa %g0, [%o0+0x28]%asi stxa %g0, [%o0+0x30]%asi stxa %g0, [%o0+0x38]%asi stxa %g0, [%o0+0x48]%asi stxa %g0, [%o0+0x50]%asi stxa %g0, [%o0+0x58]%asi stxa %g0, [%o0+0x60]%asi stxa %g0, [%o0+0x68]%asi stxa %g0, [%o0+0x70]%asi stxa %g0, [%o0+0x78]%asi stxa %g0, [%o0+0x88]%asi stxa %g0, [%o0+0x90]%asi stxa %g0, [%o0+0x98]%asi stxa %g0, [%o0+0xa0]%asi stxa %g0, [%o0+0xa8]%asi stxa %g0, [%o0+0xb0]%asi stxa %g0, [%o0+0xb8]%asi stxa %g0, [%o0+0xc8]%asi stxa %g0, [%o0+0xd0]%asi stxa %g0, [%o0+0xd8]%asi stxa %g0, [%o0+0xe0]%asi stxa %g0, [%o0+0xe8]%asi stxa %g0, [%o0+0xf0]%asi stxa %g0, [%o0+0xf8]%asi sub %o4, 0x100, %o4 cmp %o4, 0x100 bgu,pt %ncc, 2b add %o0, 0x100, %o0 3: ! ... check if 64 bytes to set cmp %o4, 0x40 blu %ncc, .bzero_blk_done nop 4: stxa %g0, [%o0+0x0]%asi stxa %g0, [%o0+0x8]%asi stxa %g0, [%o0+0x10]%asi stxa %g0, [%o0+0x18]%asi stxa %g0, [%o0+0x20]%asi stxa %g0, [%o0+0x28]%asi stxa %g0, [%o0+0x30]%asi stxa %g0, [%o0+0x38]%asi subcc %o4, 0x40, %o4 bgu,pt %ncc, 3b add %o0, 0x40, %o0 .bzero_blk_done: membar #Sync .bzero_small: ! Set the remaining doubles subcc %o3, 8, %o3 ! Can we store any doubles? blu,pn %ncc, .byteclr and %o1, 7, %o1 ! calc bytes left after doubles .dbclr: stxa %g0, [%o0]%asi ! Clear the doubles subcc %o3, 8, %o3 bgeu,pt %ncc, .dbclr add %o0, 8, %o0 ba .byteclr nop .wdalign: andcc %o0, 3, %o3 ! is add aligned on a word boundary bz,pn %ncc, .wdclr andn %o1, 3, %o3 ! create word sized count in %o3 dec %o1 ! decrement count stba %g0, [%o0]%asi ! clear a byte ba .wdalign inc %o0 ! next byte .wdclr: sta %g0, [%o0]%asi ! 4-byte clearing loop subcc %o3, 4, %o3 bnz,pt %ncc, .wdclr inc 4, %o0 and %o1, 3, %o1 ! leftover count, if any .byteclr: ! Set the leftover bytes brz %o1, .bzero_exit nop 7: deccc %o1 ! byte clearing loop stba %g0, [%o0]%asi bgu,pt %ncc, 7b inc %o0 .bzero_exit: ! ! We're just concerned with whether t_lofault was set ! when we came in. We end up here from either kzero() ! or bzero(). kzero() *always* sets a lofault handler. ! It ors LOFAULT_SET into %o5 to indicate it has done ! this even if the value of %o5 is otherwise zero. ! bzero() sets a lofault handler *only* if one was ! previously set. Accordingly we need to examine ! %o5 and if it is non-zero be sure to clear LOFAULT_SET ! before resetting the error handler. ! tst %o5 bz %ncc, 1f andn %o5, LOFAULT_SET, %o5 membar #Sync ! sync error barrier stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1: retl clr %o0 ! return (0) SET_SIZE(bzero) #endif /* lint */