1*7c478bd9Sstevel@tonic-gate/* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate/* 23*7c478bd9Sstevel@tonic-gate * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 25*7c478bd9Sstevel@tonic-gate */ 26*7c478bd9Sstevel@tonic-gate 27*7c478bd9Sstevel@tonic-gate#pragma ident "%Z%%M% %I% %E% SMI" 28*7c478bd9Sstevel@tonic-gate 29*7c478bd9Sstevel@tonic-gate#include <sys/param.h> 30*7c478bd9Sstevel@tonic-gate#include <sys/errno.h> 31*7c478bd9Sstevel@tonic-gate#include <sys/asm_linkage.h> 32*7c478bd9Sstevel@tonic-gate#include <sys/vtrace.h> 33*7c478bd9Sstevel@tonic-gate#include <sys/machthread.h> 34*7c478bd9Sstevel@tonic-gate#include <sys/clock.h> 35*7c478bd9Sstevel@tonic-gate#include <sys/asi.h> 36*7c478bd9Sstevel@tonic-gate#include <sys/fsr.h> 37*7c478bd9Sstevel@tonic-gate#include <sys/privregs.h> 38*7c478bd9Sstevel@tonic-gate#include <sys/fpras_impl.h> 39*7c478bd9Sstevel@tonic-gate 40*7c478bd9Sstevel@tonic-gate#if !defined(lint) 41*7c478bd9Sstevel@tonic-gate#include "assym.h" 42*7c478bd9Sstevel@tonic-gate#endif /* lint */ 43*7c478bd9Sstevel@tonic-gate 44*7c478bd9Sstevel@tonic-gate/* 45*7c478bd9Sstevel@tonic-gate * Pseudo-code to aid in understanding the control flow of the 46*7c478bd9Sstevel@tonic-gate * bcopy/copyin/copyout routines. 47*7c478bd9Sstevel@tonic-gate * 48*7c478bd9Sstevel@tonic-gate * On entry: 49*7c478bd9Sstevel@tonic-gate * 50*7c478bd9Sstevel@tonic-gate * ! Determine whether to use the FP register version 51*7c478bd9Sstevel@tonic-gate * ! or the leaf routine version depending on size 52*7c478bd9Sstevel@tonic-gate * ! of copy and flags. Set up error handling accordingly. 53*7c478bd9Sstevel@tonic-gate * ! The transition point depends on whether the src and 54*7c478bd9Sstevel@tonic-gate * ! dst addresses can be aligned to long word, word, 55*7c478bd9Sstevel@tonic-gate * ! half word, or byte boundaries. 56*7c478bd9Sstevel@tonic-gate * ! 57*7c478bd9Sstevel@tonic-gate * ! WARNING: <Register usage convention> 58*7c478bd9Sstevel@tonic-gate * ! For FP version, %l6 holds previous error handling and 59*7c478bd9Sstevel@tonic-gate * ! a flag: TRAMP_FLAG (low bits) 60*7c478bd9Sstevel@tonic-gate * ! for leaf routine version, %o4 holds those values. 61*7c478bd9Sstevel@tonic-gate * ! So either %l6 or %o4 is reserved and not available for 62*7c478bd9Sstevel@tonic-gate * ! any other use. 63*7c478bd9Sstevel@tonic-gate * 64*7c478bd9Sstevel@tonic-gate * if (length <= VIS_COPY_THRESHOLD) ! start with a quick test 65*7c478bd9Sstevel@tonic-gate * go to small_copy; ! to speed short copies 66*7c478bd9Sstevel@tonic-gate * 67*7c478bd9Sstevel@tonic-gate * ! src, dst long word alignable 68*7c478bd9Sstevel@tonic-gate * if (hw_copy_limit_8 == 0) ! hw_copy disabled 69*7c478bd9Sstevel@tonic-gate * go to small_copy; 70*7c478bd9Sstevel@tonic-gate * if (length <= hw_copy_limit_8) 71*7c478bd9Sstevel@tonic-gate * go to small_copy; 72*7c478bd9Sstevel@tonic-gate * go to FPBLK_copy; 73*7c478bd9Sstevel@tonic-gate * } 74*7c478bd9Sstevel@tonic-gate * if (src,dst not alignable) { 75*7c478bd9Sstevel@tonic-gate * if (hw_copy_limit_1 == 0) ! hw_copy disabled 76*7c478bd9Sstevel@tonic-gate * go to small_copy; 77*7c478bd9Sstevel@tonic-gate * if (length <= hw_copy_limit_1) 78*7c478bd9Sstevel@tonic-gate * go to small_copy; 79*7c478bd9Sstevel@tonic-gate * go to FPBLK_copy; 80*7c478bd9Sstevel@tonic-gate * } 81*7c478bd9Sstevel@tonic-gate * if (src,dst halfword alignable) { 82*7c478bd9Sstevel@tonic-gate * if (hw_copy_limit_2 == 0) ! hw_copy disabled 83*7c478bd9Sstevel@tonic-gate * go to small_copy; 84*7c478bd9Sstevel@tonic-gate * if (length <= hw_copy_limit_2) 85*7c478bd9Sstevel@tonic-gate * go to small_copy; 86*7c478bd9Sstevel@tonic-gate * go to FPBLK_copy; 87*7c478bd9Sstevel@tonic-gate * } 88*7c478bd9Sstevel@tonic-gate * if (src,dst word alignable) { 89*7c478bd9Sstevel@tonic-gate * if (hw_copy_limit_4 == 0) ! hw_copy disabled 90*7c478bd9Sstevel@tonic-gate * go to small_copy; 91*7c478bd9Sstevel@tonic-gate * if (length <= hw_copy_limit_4) 92*7c478bd9Sstevel@tonic-gate * go to small_copy; 93*7c478bd9Sstevel@tonic-gate * go to FPBLK_copy; 94*7c478bd9Sstevel@tonic-gate * } 95*7c478bd9Sstevel@tonic-gate * 96*7c478bd9Sstevel@tonic-gate * small_copy: 97*7c478bd9Sstevel@tonic-gate * Setup_leaf_rtn_error_handler; ! diffs for each entry point 98*7c478bd9Sstevel@tonic-gate * 99*7c478bd9Sstevel@tonic-gate * if (count <= 3) ! fast path for tiny copies 100*7c478bd9Sstevel@tonic-gate * go to sm_left; ! special finish up code 101*7c478bd9Sstevel@tonic-gate * else 102*7c478bd9Sstevel@tonic-gate * if (count > CHKSIZE) ! medium sized copies 103*7c478bd9Sstevel@tonic-gate * go to sm_med ! tuned by alignment 104*7c478bd9Sstevel@tonic-gate * if(src&dst not both word aligned) { 105*7c478bd9Sstevel@tonic-gate * sm_movebytes: 106*7c478bd9Sstevel@tonic-gate * move byte by byte in 4-way unrolled loop 107*7c478bd9Sstevel@tonic-gate * fall into sm_left; 108*7c478bd9Sstevel@tonic-gate * sm_left: 109*7c478bd9Sstevel@tonic-gate * move 0-3 bytes byte at a time as needed. 110*7c478bd9Sstevel@tonic-gate * restore error handler and exit. 111*7c478bd9Sstevel@tonic-gate * 112*7c478bd9Sstevel@tonic-gate * } else { ! src&dst are word aligned 113*7c478bd9Sstevel@tonic-gate * check for at least 8 bytes left, 114*7c478bd9Sstevel@tonic-gate * move word at a time, unrolled by 2 115*7c478bd9Sstevel@tonic-gate * when fewer than 8 bytes left, 116*7c478bd9Sstevel@tonic-gate * sm_half: move half word at a time while 2 or more bytes left 117*7c478bd9Sstevel@tonic-gate * sm_byte: move final byte if necessary 118*7c478bd9Sstevel@tonic-gate * sm_exit: 119*7c478bd9Sstevel@tonic-gate * restore error handler and exit. 120*7c478bd9Sstevel@tonic-gate * } 121*7c478bd9Sstevel@tonic-gate * 122*7c478bd9Sstevel@tonic-gate * ! Medium length cases with at least CHKSIZE bytes available 123*7c478bd9Sstevel@tonic-gate * ! method: line up src and dst as best possible, then 124*7c478bd9Sstevel@tonic-gate * ! move data in 4-way unrolled loops. 125*7c478bd9Sstevel@tonic-gate * 126*7c478bd9Sstevel@tonic-gate * sm_med: 127*7c478bd9Sstevel@tonic-gate * if(src&dst unalignable) 128*7c478bd9Sstevel@tonic-gate * go to sm_movebytes 129*7c478bd9Sstevel@tonic-gate * if(src&dst halfword alignable) 130*7c478bd9Sstevel@tonic-gate * go to sm_movehalf 131*7c478bd9Sstevel@tonic-gate * if(src&dst word alignable) 132*7c478bd9Sstevel@tonic-gate * go to sm_moveword 133*7c478bd9Sstevel@tonic-gate * ! fall into long word movement 134*7c478bd9Sstevel@tonic-gate * move bytes until src is word aligned 135*7c478bd9Sstevel@tonic-gate * if not long word aligned, move a word 136*7c478bd9Sstevel@tonic-gate * move long words in 4-way unrolled loop until < 32 bytes left 137*7c478bd9Sstevel@tonic-gate * move long words in 1-way unrolled loop until < 8 bytes left 138*7c478bd9Sstevel@tonic-gate * if zero bytes left, goto sm_exit 139*7c478bd9Sstevel@tonic-gate * if one byte left, go to sm_byte 140*7c478bd9Sstevel@tonic-gate * else go to sm_half 141*7c478bd9Sstevel@tonic-gate * 142*7c478bd9Sstevel@tonic-gate * sm_moveword: 143*7c478bd9Sstevel@tonic-gate * move bytes until src is word aligned 144*7c478bd9Sstevel@tonic-gate * move words in 4-way unrolled loop until < 16 bytes left 145*7c478bd9Sstevel@tonic-gate * move words in 1-way unrolled loop until < 4 bytes left 146*7c478bd9Sstevel@tonic-gate * if zero bytes left, goto sm_exit 147*7c478bd9Sstevel@tonic-gate * if one byte left, go to sm_byte 148*7c478bd9Sstevel@tonic-gate * else go to sm_half 149*7c478bd9Sstevel@tonic-gate * 150*7c478bd9Sstevel@tonic-gate * sm_movehalf: 151*7c478bd9Sstevel@tonic-gate * move a byte if needed to align src on halfword 152*7c478bd9Sstevel@tonic-gate * move halfwords in 4-way unrolled loop until < 8 bytes left 153*7c478bd9Sstevel@tonic-gate * if zero bytes left, goto sm_exit 154*7c478bd9Sstevel@tonic-gate * if one byte left, go to sm_byte 155*7c478bd9Sstevel@tonic-gate * else go to sm_half 156*7c478bd9Sstevel@tonic-gate * 157*7c478bd9Sstevel@tonic-gate * 158*7c478bd9Sstevel@tonic-gate * FPBLK_copy: 159*7c478bd9Sstevel@tonic-gate * %l6 = curthread->t_lofault; 160*7c478bd9Sstevel@tonic-gate * if (%l6 != NULL) { 161*7c478bd9Sstevel@tonic-gate * membar #Sync 162*7c478bd9Sstevel@tonic-gate * curthread->t_lofault = .copyerr; 163*7c478bd9Sstevel@tonic-gate * caller_error_handler = TRUE ! %l6 |= 2 164*7c478bd9Sstevel@tonic-gate * } 165*7c478bd9Sstevel@tonic-gate * 166*7c478bd9Sstevel@tonic-gate * ! for FPU testing we must not migrate cpus 167*7c478bd9Sstevel@tonic-gate * if (curthread->t_lwp == NULL) { 168*7c478bd9Sstevel@tonic-gate * ! Kernel threads do not have pcb's in which to store 169*7c478bd9Sstevel@tonic-gate * ! the floating point state, so disallow preemption during 170*7c478bd9Sstevel@tonic-gate * ! the copy. This also prevents cpu migration. 171*7c478bd9Sstevel@tonic-gate * kpreempt_disable(curthread); 172*7c478bd9Sstevel@tonic-gate * } else { 173*7c478bd9Sstevel@tonic-gate * thread_nomigrate(); 174*7c478bd9Sstevel@tonic-gate * } 175*7c478bd9Sstevel@tonic-gate * 176*7c478bd9Sstevel@tonic-gate * old_fprs = %fprs; 177*7c478bd9Sstevel@tonic-gate * old_gsr = %gsr; 178*7c478bd9Sstevel@tonic-gate * if (%fprs.fef) { 179*7c478bd9Sstevel@tonic-gate * %fprs.fef = 1; 180*7c478bd9Sstevel@tonic-gate * save current fpregs on stack using blockstore 181*7c478bd9Sstevel@tonic-gate * } else { 182*7c478bd9Sstevel@tonic-gate * %fprs.fef = 1; 183*7c478bd9Sstevel@tonic-gate * } 184*7c478bd9Sstevel@tonic-gate * 185*7c478bd9Sstevel@tonic-gate * 186*7c478bd9Sstevel@tonic-gate * do_blockcopy_here; 187*7c478bd9Sstevel@tonic-gate * 188*7c478bd9Sstevel@tonic-gate * In lofault handler: 189*7c478bd9Sstevel@tonic-gate * curthread->t_lofault = .copyerr2; 190*7c478bd9Sstevel@tonic-gate * Continue on with the normal exit handler 191*7c478bd9Sstevel@tonic-gate * 192*7c478bd9Sstevel@tonic-gate * On normal exit: 193*7c478bd9Sstevel@tonic-gate * %gsr = old_gsr; 194*7c478bd9Sstevel@tonic-gate * if (old_fprs & FPRS_FEF) 195*7c478bd9Sstevel@tonic-gate * restore fpregs from stack using blockload 196*7c478bd9Sstevel@tonic-gate * else 197*7c478bd9Sstevel@tonic-gate * zero fpregs 198*7c478bd9Sstevel@tonic-gate * %fprs = old_fprs; 199*7c478bd9Sstevel@tonic-gate * membar #Sync 200*7c478bd9Sstevel@tonic-gate * curthread->t_lofault = (%l6 & ~3); 201*7c478bd9Sstevel@tonic-gate * ! following test omitted from copyin/copyout as they 202*7c478bd9Sstevel@tonic-gate * ! will always have a current thread 203*7c478bd9Sstevel@tonic-gate * if (curthread->t_lwp == NULL) 204*7c478bd9Sstevel@tonic-gate * kpreempt_enable(curthread); 205*7c478bd9Sstevel@tonic-gate * else 206*7c478bd9Sstevel@tonic-gate * thread_allowmigrate(); 207*7c478bd9Sstevel@tonic-gate * return (0) 208*7c478bd9Sstevel@tonic-gate * 209*7c478bd9Sstevel@tonic-gate * In second lofault handler (.copyerr2): 210*7c478bd9Sstevel@tonic-gate * We've tried to restore fp state from the stack and failed. To 211*7c478bd9Sstevel@tonic-gate * prevent from returning with a corrupted fp state, we will panic. 212*7c478bd9Sstevel@tonic-gate */ 213*7c478bd9Sstevel@tonic-gate 214*7c478bd9Sstevel@tonic-gate/* 215*7c478bd9Sstevel@tonic-gate * Comments about optimization choices 216*7c478bd9Sstevel@tonic-gate * 217*7c478bd9Sstevel@tonic-gate * The initial optimization decision in this code is to determine 218*7c478bd9Sstevel@tonic-gate * whether to use the FP registers for a copy or not. If we don't 219*7c478bd9Sstevel@tonic-gate * use the FP registers, we can execute the copy as a leaf routine, 220*7c478bd9Sstevel@tonic-gate * saving a register save and restore. Also, less elaborate setup 221*7c478bd9Sstevel@tonic-gate * is required, allowing short copies to be completed more quickly. 222*7c478bd9Sstevel@tonic-gate * For longer copies, especially unaligned ones (where the src and 223*7c478bd9Sstevel@tonic-gate * dst do not align to allow simple ldx,stx operation), the FP 224*7c478bd9Sstevel@tonic-gate * registers allow much faster copy operations. 225*7c478bd9Sstevel@tonic-gate * 226*7c478bd9Sstevel@tonic-gate * The estimated extra cost of the FP path will vary depending on 227*7c478bd9Sstevel@tonic-gate * src/dst alignment, dst offset from the next 64 byte FPblock store 228*7c478bd9Sstevel@tonic-gate * boundary, remaining src data after the last full dst cache line is 229*7c478bd9Sstevel@tonic-gate * moved whether the FP registers need to be saved, and some other 230*7c478bd9Sstevel@tonic-gate * minor issues. The average additional overhead is estimated to be 231*7c478bd9Sstevel@tonic-gate * 400 clocks. Since each non-repeated/predicted tst and branch costs 232*7c478bd9Sstevel@tonic-gate * around 10 clocks, elaborate calculation would slow down to all 233*7c478bd9Sstevel@tonic-gate * longer copies and only benefit a small portion of medium sized 234*7c478bd9Sstevel@tonic-gate * copies. Rather than incur such cost, we chose fixed transition 235*7c478bd9Sstevel@tonic-gate * points for each of the alignment choices. 236*7c478bd9Sstevel@tonic-gate * 237*7c478bd9Sstevel@tonic-gate * For the inner loop, here is a comparison of the per cache line 238*7c478bd9Sstevel@tonic-gate * costs for each alignment when src&dst are in cache: 239*7c478bd9Sstevel@tonic-gate * 240*7c478bd9Sstevel@tonic-gate * byte aligned: 108 clocks slower for non-FPBLK 241*7c478bd9Sstevel@tonic-gate * half aligned: 44 clocks slower for non-FPBLK 242*7c478bd9Sstevel@tonic-gate * word aligned: 12 clocks slower for non-FPBLK 243*7c478bd9Sstevel@tonic-gate * long aligned: 4 clocks >>faster<< for non-FPBLK 244*7c478bd9Sstevel@tonic-gate * 245*7c478bd9Sstevel@tonic-gate * The long aligned loop runs faster because it does no prefetching. 246*7c478bd9Sstevel@tonic-gate * That wins if the data is not in cache or there is too little 247*7c478bd9Sstevel@tonic-gate * data to gain much benefit from prefetching. But when there 248*7c478bd9Sstevel@tonic-gate * is more data and that data is not in cache, failing to prefetch 249*7c478bd9Sstevel@tonic-gate * can run much slower. In addition, there is a 2 Kbyte store queue 250*7c478bd9Sstevel@tonic-gate * which will cause the non-FPBLK inner loop to slow for larger copies. 251*7c478bd9Sstevel@tonic-gate * The exact tradeoff is strongly load and application dependent, with 252*7c478bd9Sstevel@tonic-gate * increasing risk of a customer visible performance regression if the 253*7c478bd9Sstevel@tonic-gate * non-FPBLK code is used for larger copies. Studies of synthetic in-cache 254*7c478bd9Sstevel@tonic-gate * vs out-of-cache copy tests in user space suggest 1024 bytes as a safe 255*7c478bd9Sstevel@tonic-gate * upper limit for the non-FPBLK code. To minimize performance regression 256*7c478bd9Sstevel@tonic-gate * risk while still gaining the primary benefits of the improvements to 257*7c478bd9Sstevel@tonic-gate * the non-FPBLK code, we set an upper bound of 1024 bytes for the various 258*7c478bd9Sstevel@tonic-gate * hw_copy_limit_*. Later experimental studies using different values 259*7c478bd9Sstevel@tonic-gate * of hw_copy_limit_* can be used to make further adjustments if 260*7c478bd9Sstevel@tonic-gate * appropriate. 261*7c478bd9Sstevel@tonic-gate * 262*7c478bd9Sstevel@tonic-gate * hw_copy_limit_1 = src and dst are byte aligned but not halfword aligned 263*7c478bd9Sstevel@tonic-gate * hw_copy_limit_2 = src and dst are halfword aligned but not word aligned 264*7c478bd9Sstevel@tonic-gate * hw_copy_limit_4 = src and dst are word aligned but not longword aligned 265*7c478bd9Sstevel@tonic-gate * hw_copy_limit_8 = src and dst are longword aligned 266*7c478bd9Sstevel@tonic-gate * 267*7c478bd9Sstevel@tonic-gate * To say that src and dst are word aligned means that after 268*7c478bd9Sstevel@tonic-gate * some initial alignment activity of moving 0 to 3 bytes, 269*7c478bd9Sstevel@tonic-gate * both the src and dst will be on word boundaries so that 270*7c478bd9Sstevel@tonic-gate * word loads and stores may be used. 271*7c478bd9Sstevel@tonic-gate * 272*7c478bd9Sstevel@tonic-gate * Recommended initial values as of Mar 2004, includes testing 273*7c478bd9Sstevel@tonic-gate * on Cheetah+ (900MHz), Cheetah++ (1200MHz), and Jaguar(1050MHz): 274*7c478bd9Sstevel@tonic-gate * hw_copy_limit_1 = 256 275*7c478bd9Sstevel@tonic-gate * hw_copy_limit_2 = 512 276*7c478bd9Sstevel@tonic-gate * hw_copy_limit_4 = 1024 277*7c478bd9Sstevel@tonic-gate * hw_copy_limit_8 = 1024 (or 1536 on some systems) 278*7c478bd9Sstevel@tonic-gate * 279*7c478bd9Sstevel@tonic-gate * 280*7c478bd9Sstevel@tonic-gate * If hw_copy_limit_? is set to zero, then use of FPBLK copy is 281*7c478bd9Sstevel@tonic-gate * disabled for that alignment choice. 282*7c478bd9Sstevel@tonic-gate * If hw_copy_limit_? is set to a value between 1 and VIS_COPY_THRESHOLD (256) 283*7c478bd9Sstevel@tonic-gate * the value of VIS_COPY_THRESHOLD is used. 284*7c478bd9Sstevel@tonic-gate * It is not envisioned that hw_copy_limit_? will be changed in the field 285*7c478bd9Sstevel@tonic-gate * It is provided to allow for disabling FPBLK copies and to allow 286*7c478bd9Sstevel@tonic-gate * easy testing of alternate values on future HW implementations 287*7c478bd9Sstevel@tonic-gate * that might have different cache sizes, clock rates or instruction 288*7c478bd9Sstevel@tonic-gate * timing rules. 289*7c478bd9Sstevel@tonic-gate * 290*7c478bd9Sstevel@tonic-gate * Our first test for FPBLK copies vs non-FPBLK copies checks a minimum 291*7c478bd9Sstevel@tonic-gate * threshold to speedup all shorter copies (less than 256). That 292*7c478bd9Sstevel@tonic-gate * saves an alignment test, memory reference, and enabling test 293*7c478bd9Sstevel@tonic-gate * for all short copies, or an estimated 24 clocks. 294*7c478bd9Sstevel@tonic-gate * 295*7c478bd9Sstevel@tonic-gate * The order in which these limits are checked does matter since each 296*7c478bd9Sstevel@tonic-gate * non-predicted tst and branch costs around 10 clocks. 297*7c478bd9Sstevel@tonic-gate * If src and dst are randomly selected addresses, 298*7c478bd9Sstevel@tonic-gate * 4 of 8 will not be alignable. 299*7c478bd9Sstevel@tonic-gate * 2 of 8 will be half word alignable. 300*7c478bd9Sstevel@tonic-gate * 1 of 8 will be word alignable. 301*7c478bd9Sstevel@tonic-gate * 1 of 8 will be long word alignable. 302*7c478bd9Sstevel@tonic-gate * But, tests on running kernels show that src and dst to copy code 303*7c478bd9Sstevel@tonic-gate * are typically not on random alignments. Structure copies and 304*7c478bd9Sstevel@tonic-gate * copies of larger data sizes are often on long word boundaries. 305*7c478bd9Sstevel@tonic-gate * So we test the long word alignment case first, then 306*7c478bd9Sstevel@tonic-gate * the byte alignment, then halfword, then word alignment. 307*7c478bd9Sstevel@tonic-gate * 308*7c478bd9Sstevel@tonic-gate * Several times, tests for length are made to split the code 309*7c478bd9Sstevel@tonic-gate * into subcases. These tests often allow later tests to be 310*7c478bd9Sstevel@tonic-gate * avoided. For example, within the non-FPBLK copy, we first 311*7c478bd9Sstevel@tonic-gate * check for tiny copies of 3 bytes or less. That allows us 312*7c478bd9Sstevel@tonic-gate * to use a 4-way unrolled loop for the general byte copy case 313*7c478bd9Sstevel@tonic-gate * without a test on loop entry. 314*7c478bd9Sstevel@tonic-gate * We subdivide the non-FPBLK case further into CHKSIZE bytes and less 315*7c478bd9Sstevel@tonic-gate * vs longer cases. For the really short case, we don't attempt 316*7c478bd9Sstevel@tonic-gate * align src and dst. We try to minimize special case tests in 317*7c478bd9Sstevel@tonic-gate * the shortest loops as each test adds a significant percentage 318*7c478bd9Sstevel@tonic-gate * to the total time. 319*7c478bd9Sstevel@tonic-gate * 320*7c478bd9Sstevel@tonic-gate * For the medium sized cases, we allow ourselves to adjust the 321*7c478bd9Sstevel@tonic-gate * src and dst alignment and provide special cases for each of 322*7c478bd9Sstevel@tonic-gate * the four adjusted alignment cases. The CHKSIZE that was used 323*7c478bd9Sstevel@tonic-gate * to decide between short and medium size was chosen to be 39 324*7c478bd9Sstevel@tonic-gate * as that allows for the worst case of 7 bytes of alignment 325*7c478bd9Sstevel@tonic-gate * shift and 4 times 8 bytes for the first long word unrolling. 326*7c478bd9Sstevel@tonic-gate * That knowledge saves an initial test for length on entry into 327*7c478bd9Sstevel@tonic-gate * the medium cases. If the general loop unrolling factor were 328*7c478bd9Sstevel@tonic-gate * to be increases, this number would also need to be adjusted. 329*7c478bd9Sstevel@tonic-gate * 330*7c478bd9Sstevel@tonic-gate * For all cases in the non-FPBLK code where it is known that at 331*7c478bd9Sstevel@tonic-gate * least 4 chunks of data are available for movement, the 332*7c478bd9Sstevel@tonic-gate * loop is unrolled by four. This 4-way loop runs in 8 clocks 333*7c478bd9Sstevel@tonic-gate * or 2 clocks per data element. Due to limitations of the 334*7c478bd9Sstevel@tonic-gate * branch instruction on Cheetah, Jaguar, and Panther, the 335*7c478bd9Sstevel@tonic-gate * minimum time for a small, tight loop is 3 clocks. So 336*7c478bd9Sstevel@tonic-gate * the 4-way loop runs 50% faster than the fastest non-unrolled 337*7c478bd9Sstevel@tonic-gate * loop. 338*7c478bd9Sstevel@tonic-gate * 339*7c478bd9Sstevel@tonic-gate * Instruction alignment is forced by used of .align 16 directives 340*7c478bd9Sstevel@tonic-gate * and nops which are not executed in the code. This 341*7c478bd9Sstevel@tonic-gate * combination of operations shifts the alignment of following 342*7c478bd9Sstevel@tonic-gate * loops to insure that loops are aligned so that their instructions 343*7c478bd9Sstevel@tonic-gate * fall within the minimum number of 4 instruction fetch groups. 344*7c478bd9Sstevel@tonic-gate * If instructions are inserted or removed between the .align 345*7c478bd9Sstevel@tonic-gate * instruction and the unrolled loops, then the alignment needs 346*7c478bd9Sstevel@tonic-gate * to be readjusted. Misaligned loops can add a clock per loop 347*7c478bd9Sstevel@tonic-gate * iteration to the loop timing. 348*7c478bd9Sstevel@tonic-gate * 349*7c478bd9Sstevel@tonic-gate * In a few cases, code is duplicated to avoid a branch. Since 350*7c478bd9Sstevel@tonic-gate * a non-predicted tst and branch takes 10 clocks, this savings 351*7c478bd9Sstevel@tonic-gate * is judged an appropriate time-space tradeoff. 352*7c478bd9Sstevel@tonic-gate * 353*7c478bd9Sstevel@tonic-gate * Within the FPBLK-code, the prefetch method in the inner 354*7c478bd9Sstevel@tonic-gate * loop needs to be explained as it is not standard. Two 355*7c478bd9Sstevel@tonic-gate * prefetches are issued for each cache line instead of one. 356*7c478bd9Sstevel@tonic-gate * The primary one is at the maximum reach of 8 cache lines. 357*7c478bd9Sstevel@tonic-gate * Most of the time, that maximum prefetch reach gives the 358*7c478bd9Sstevel@tonic-gate * cache line more time to reach the processor for systems with 359*7c478bd9Sstevel@tonic-gate * higher processor clocks. But, sometimes memory interference 360*7c478bd9Sstevel@tonic-gate * can cause that prefetch to be dropped. Putting a second 361*7c478bd9Sstevel@tonic-gate * prefetch at a reach of 5 cache lines catches the drops 362*7c478bd9Sstevel@tonic-gate * three iterations later and shows a measured improvement 363*7c478bd9Sstevel@tonic-gate * in performance over any similar loop with a single prefetch. 364*7c478bd9Sstevel@tonic-gate * The prefetches are placed in the loop so they overlap with 365*7c478bd9Sstevel@tonic-gate * non-memory instructions, so that there is no extra cost 366*7c478bd9Sstevel@tonic-gate * when the data is already in-cache. 367*7c478bd9Sstevel@tonic-gate * 368*7c478bd9Sstevel@tonic-gate */ 369*7c478bd9Sstevel@tonic-gate 370*7c478bd9Sstevel@tonic-gate/* 371*7c478bd9Sstevel@tonic-gate * Notes on preserving existing fp state and on membars. 372*7c478bd9Sstevel@tonic-gate * 373*7c478bd9Sstevel@tonic-gate * When a copyOP decides to use fp we may have to preserve existing 374*7c478bd9Sstevel@tonic-gate * floating point state. It is not the caller's state that we need to 375*7c478bd9Sstevel@tonic-gate * preserve - the rest of the kernel does not use fp and, anyway, fp 376*7c478bd9Sstevel@tonic-gate * registers are volatile across a call. Some examples: 377*7c478bd9Sstevel@tonic-gate * 378*7c478bd9Sstevel@tonic-gate * - userland has fp state and is interrupted (device interrupt 379*7c478bd9Sstevel@tonic-gate * or trap) and within the interrupt/trap handling we use 380*7c478bd9Sstevel@tonic-gate * bcopy() 381*7c478bd9Sstevel@tonic-gate * - another (higher level) interrupt or trap handler uses bcopy 382*7c478bd9Sstevel@tonic-gate * while a bcopy from an earlier interrupt is still active 383*7c478bd9Sstevel@tonic-gate * - an asynchronous error trap occurs while fp state exists (in 384*7c478bd9Sstevel@tonic-gate * userland or in kernel copy) and the tl0 component of the handling 385*7c478bd9Sstevel@tonic-gate * uses bcopy 386*7c478bd9Sstevel@tonic-gate * - a user process with fp state incurs a copy-on-write fault and 387*7c478bd9Sstevel@tonic-gate * hwblkpagecopy always uses fp 388*7c478bd9Sstevel@tonic-gate * 389*7c478bd9Sstevel@tonic-gate * We therefore need a per-call place in which to preserve fp state - 390*7c478bd9Sstevel@tonic-gate * using our stack is ideal (and since fp copy cannot be leaf optimized 391*7c478bd9Sstevel@tonic-gate * because of calls it makes, this is no hardship). 392*7c478bd9Sstevel@tonic-gate * 393*7c478bd9Sstevel@tonic-gate * The following membar BLD/BST discussion is Cheetah pipeline specific. 394*7c478bd9Sstevel@tonic-gate * In Cheetah BLD is blocking, #LoadLoad/#LoadStore/#StoreStore are 395*7c478bd9Sstevel@tonic-gate * nops (those semantics always apply) and #StoreLoad is implemented 396*7c478bd9Sstevel@tonic-gate * as a membar #Sync. 397*7c478bd9Sstevel@tonic-gate * 398*7c478bd9Sstevel@tonic-gate * It is possible that the owner of the fp state has a block load or 399*7c478bd9Sstevel@tonic-gate * block store still "in flight" at the time we come to preserve that 400*7c478bd9Sstevel@tonic-gate * state. Block loads are blocking in Cheetah pipelines so we do not 401*7c478bd9Sstevel@tonic-gate * need to sync with them. In preserving fp regs we will use block stores 402*7c478bd9Sstevel@tonic-gate * (which are not blocking in Cheetah pipelines) so we require a membar #Sync 403*7c478bd9Sstevel@tonic-gate * after storing state (so that our subsequent use of those registers 404*7c478bd9Sstevel@tonic-gate * does not modify them before the block stores complete); this membar 405*7c478bd9Sstevel@tonic-gate * also serves to sync with block stores the owner of the fp state has 406*7c478bd9Sstevel@tonic-gate * initiated. 407*7c478bd9Sstevel@tonic-gate * 408*7c478bd9Sstevel@tonic-gate * When we have finished fp copy (with it's repeated block stores) 409*7c478bd9Sstevel@tonic-gate * we must membar #Sync so that our block stores may complete before 410*7c478bd9Sstevel@tonic-gate * we either restore the original fp state into the fp registers or 411*7c478bd9Sstevel@tonic-gate * return to a caller which may initiate other fp operations that could 412*7c478bd9Sstevel@tonic-gate * modify the fp regs we used before the block stores complete. 413*7c478bd9Sstevel@tonic-gate * 414*7c478bd9Sstevel@tonic-gate * Synchronous faults (eg, unresolvable DMMU miss) that occur while 415*7c478bd9Sstevel@tonic-gate * t_lofault is not NULL will not panic but will instead trampoline 416*7c478bd9Sstevel@tonic-gate * to the registered lofault handler. There is no need for any 417*7c478bd9Sstevel@tonic-gate * membars for these - eg, our store to t_lofault will always be visible to 418*7c478bd9Sstevel@tonic-gate * ourselves and it is our cpu which will take any trap. 419*7c478bd9Sstevel@tonic-gate * 420*7c478bd9Sstevel@tonic-gate * Asynchronous faults (eg, uncorrectable ECC error from memory) that occur 421*7c478bd9Sstevel@tonic-gate * while t_lofault is not NULL will also not panic. Since we're copying 422*7c478bd9Sstevel@tonic-gate * to or from userland the extent of the damage is known - the destination 423*7c478bd9Sstevel@tonic-gate * buffer is incomplete. So trap handlers will trampoline to the lofault 424*7c478bd9Sstevel@tonic-gate * handler in this case which should take some form of error action to 425*7c478bd9Sstevel@tonic-gate * avoid using the incomplete buffer. The trap handler also flags the 426*7c478bd9Sstevel@tonic-gate * fault so that later return-from-trap handling (for the trap that brought 427*7c478bd9Sstevel@tonic-gate * this thread into the kernel in the first place) can notify the process 428*7c478bd9Sstevel@tonic-gate * and reboot the system (or restart the service with Greenline/Contracts). 429*7c478bd9Sstevel@tonic-gate * 430*7c478bd9Sstevel@tonic-gate * Asynchronous faults (eg, uncorrectable ECC error from memory) can 431*7c478bd9Sstevel@tonic-gate * result in deferred error traps - the trap is taken sometime after 432*7c478bd9Sstevel@tonic-gate * the event and the trap PC may not be the PC of the faulting access. 433*7c478bd9Sstevel@tonic-gate * Delivery of such pending traps can be forced by a membar #Sync, acting 434*7c478bd9Sstevel@tonic-gate * as an "error barrier" in this role. To accurately apply the user/kernel 435*7c478bd9Sstevel@tonic-gate * separation described in the preceding paragraph we must force delivery 436*7c478bd9Sstevel@tonic-gate * of deferred traps affecting kernel state before we install a lofault 437*7c478bd9Sstevel@tonic-gate * handler (if we interpose a new lofault handler on an existing one there 438*7c478bd9Sstevel@tonic-gate * is no need to repeat this), and we must force delivery of deferred 439*7c478bd9Sstevel@tonic-gate * errors affecting the lofault-protected region before we clear t_lofault. 440*7c478bd9Sstevel@tonic-gate * Failure to do so results in lost kernel state being interpreted as 441*7c478bd9Sstevel@tonic-gate * affecting a copyin/copyout only, or of an error that really only 442*7c478bd9Sstevel@tonic-gate * affects copy data being interpreted as losing kernel state. 443*7c478bd9Sstevel@tonic-gate * 444*7c478bd9Sstevel@tonic-gate * Since the copy operations may preserve and later restore floating 445*7c478bd9Sstevel@tonic-gate * point state that does not belong to the caller (see examples above), 446*7c478bd9Sstevel@tonic-gate * we must be careful in how we do this in order to prevent corruption 447*7c478bd9Sstevel@tonic-gate * of another program. 448*7c478bd9Sstevel@tonic-gate * 449*7c478bd9Sstevel@tonic-gate * To make sure that floating point state is always saved and restored 450*7c478bd9Sstevel@tonic-gate * correctly, the following "big rules" must be followed when the floating 451*7c478bd9Sstevel@tonic-gate * point registers will be used: 452*7c478bd9Sstevel@tonic-gate * 453*7c478bd9Sstevel@tonic-gate * 1. %l6 always holds the caller's lofault handler. Also in this register, 454*7c478bd9Sstevel@tonic-gate * Bit 1 (FPUSED_FLAG) indicates that the floating point registers are in 455*7c478bd9Sstevel@tonic-gate * use. Bit 2 (TRAMP_FLAG) indicates that the call was to bcopy, and a 456*7c478bd9Sstevel@tonic-gate * lofault handler was set coming in. 457*7c478bd9Sstevel@tonic-gate * 458*7c478bd9Sstevel@tonic-gate * 2. The FPUSED flag indicates that all FP state has been successfully stored 459*7c478bd9Sstevel@tonic-gate * on the stack. It should not be set until this save has been completed. 460*7c478bd9Sstevel@tonic-gate * 461*7c478bd9Sstevel@tonic-gate * 3. The FPUSED flag should not be cleared on exit until all FP state has 462*7c478bd9Sstevel@tonic-gate * been restored from the stack. If an error occurs while restoring 463*7c478bd9Sstevel@tonic-gate * data from the stack, the error handler can check this flag to see if 464*7c478bd9Sstevel@tonic-gate * a restore is necessary. 465*7c478bd9Sstevel@tonic-gate * 466*7c478bd9Sstevel@tonic-gate * 4. Code run under the new lofault handler must be kept to a minimum. In 467*7c478bd9Sstevel@tonic-gate * particular, any calls to FP_ALLOWMIGRATE, which could result in a call 468*7c478bd9Sstevel@tonic-gate * to kpreempt(), should not be made until after the lofault handler has 469*7c478bd9Sstevel@tonic-gate * been restored. 470*7c478bd9Sstevel@tonic-gate */ 471*7c478bd9Sstevel@tonic-gate 472*7c478bd9Sstevel@tonic-gate/* 473*7c478bd9Sstevel@tonic-gate * VIS_COPY_THRESHOLD indicates the minimum number of bytes needed 474*7c478bd9Sstevel@tonic-gate * to "break even" using FP/VIS-accelerated memory operations. 475*7c478bd9Sstevel@tonic-gate * The FPBLK code assumes a minimum number of bytes are available 476*7c478bd9Sstevel@tonic-gate * to be moved on entry. Check that code carefully before 477*7c478bd9Sstevel@tonic-gate * reducing VIS_COPY_THRESHOLD below 256. 478*7c478bd9Sstevel@tonic-gate */ 479*7c478bd9Sstevel@tonic-gate/* 480*7c478bd9Sstevel@tonic-gate * This shadows sys/machsystm.h which can't be included due to the lack of 481*7c478bd9Sstevel@tonic-gate * _ASM guards in include files it references. Change it here, change it there. 482*7c478bd9Sstevel@tonic-gate */ 483*7c478bd9Sstevel@tonic-gate#define VIS_COPY_THRESHOLD 256 484*7c478bd9Sstevel@tonic-gate 485*7c478bd9Sstevel@tonic-gate/* 486*7c478bd9Sstevel@tonic-gate * TEST for very short copies 487*7c478bd9Sstevel@tonic-gate * Be aware that the maximum unroll for the short unaligned case 488*7c478bd9Sstevel@tonic-gate * is SHORTCOPY+1 489*7c478bd9Sstevel@tonic-gate */ 490*7c478bd9Sstevel@tonic-gate#define SHORTCOPY 3 491*7c478bd9Sstevel@tonic-gate#define CHKSIZE 39 492*7c478bd9Sstevel@tonic-gate 493*7c478bd9Sstevel@tonic-gate/* 494*7c478bd9Sstevel@tonic-gate * Indicates that we're to trampoline to the error handler. 495*7c478bd9Sstevel@tonic-gate * Entry points bcopy, copyin_noerr, and copyout_noerr use this flag. 496*7c478bd9Sstevel@tonic-gate * kcopy, copyout, xcopyout, copyin, and xcopyin do not set this flag. 497*7c478bd9Sstevel@tonic-gate */ 498*7c478bd9Sstevel@tonic-gate#define FPUSED_FLAG 1 499*7c478bd9Sstevel@tonic-gate#define TRAMP_FLAG 2 500*7c478bd9Sstevel@tonic-gate#define MASK_FLAGS 3 501*7c478bd9Sstevel@tonic-gate 502*7c478bd9Sstevel@tonic-gate/* 503*7c478bd9Sstevel@tonic-gate * Number of outstanding prefetches. 504*7c478bd9Sstevel@tonic-gate * Testing with 1200 MHz Cheetah+ and Jaguar gives best results with 505*7c478bd9Sstevel@tonic-gate * two prefetches, one with a reach of 8*BLOCK_SIZE+8 and one with a 506*7c478bd9Sstevel@tonic-gate * reach of 5*BLOCK_SIZE. The double prefetch gives an typical improvement 507*7c478bd9Sstevel@tonic-gate * of 5% for large copies as compared to a single prefetch. The reason 508*7c478bd9Sstevel@tonic-gate * for the improvement is that with Cheetah and Jaguar, some prefetches 509*7c478bd9Sstevel@tonic-gate * are dropped due to the prefetch queue being full. The second prefetch 510*7c478bd9Sstevel@tonic-gate * reduces the number of cache lines that are dropped. 511*7c478bd9Sstevel@tonic-gate * Do not remove the double prefetch or change either CHEETAH_PREFETCH 512*7c478bd9Sstevel@tonic-gate * or CHEETAH_2ND_PREFETCH without extensive performance tests to prove 513*7c478bd9Sstevel@tonic-gate * there is no loss of performance. 514*7c478bd9Sstevel@tonic-gate */ 515*7c478bd9Sstevel@tonic-gate#define CHEETAH_PREFETCH 8 516*7c478bd9Sstevel@tonic-gate#define CHEETAH_2ND_PREFETCH 5 517*7c478bd9Sstevel@tonic-gate 518*7c478bd9Sstevel@tonic-gate#define VIS_BLOCKSIZE 64 519*7c478bd9Sstevel@tonic-gate 520*7c478bd9Sstevel@tonic-gate/* 521*7c478bd9Sstevel@tonic-gate * Size of stack frame in order to accomodate a 64-byte aligned 522*7c478bd9Sstevel@tonic-gate * floating-point register save area and 2 64-bit temp locations. 523*7c478bd9Sstevel@tonic-gate * All copy functions use two quadrants of fp registers; to assure a 524*7c478bd9Sstevel@tonic-gate * block-aligned two block buffer in which to save we must reserve 525*7c478bd9Sstevel@tonic-gate * three blocks on stack. Not all functions preserve %pfrs on stack 526*7c478bd9Sstevel@tonic-gate * or need to preserve %gsr but we use HWCOPYFRAMESIZE for all. 527*7c478bd9Sstevel@tonic-gate * 528*7c478bd9Sstevel@tonic-gate * _______________________________________ <-- %fp + STACK_BIAS 529*7c478bd9Sstevel@tonic-gate * | We may need to preserve 2 quadrants | 530*7c478bd9Sstevel@tonic-gate * | of fp regs, but since we do so with | 531*7c478bd9Sstevel@tonic-gate * | BST/BLD we need room in which to | 532*7c478bd9Sstevel@tonic-gate * | align to VIS_BLOCKSIZE bytes. So | 533*7c478bd9Sstevel@tonic-gate * | this area is 3 * VIS_BLOCKSIZE. | <-- - SAVED_FPREGS_OFFSET 534*7c478bd9Sstevel@tonic-gate * |-------------------------------------| 535*7c478bd9Sstevel@tonic-gate * | 8 bytes to save %fprs | <-- - SAVED_FPRS_OFFSET 536*7c478bd9Sstevel@tonic-gate * |-------------------------------------| 537*7c478bd9Sstevel@tonic-gate * | 8 bytes to save %gsr | <-- - SAVED_GSR_OFFSET 538*7c478bd9Sstevel@tonic-gate * --------------------------------------- 539*7c478bd9Sstevel@tonic-gate */ 540*7c478bd9Sstevel@tonic-gate#define HWCOPYFRAMESIZE ((VIS_BLOCKSIZE * (2 + 1)) + (2 * 8)) 541*7c478bd9Sstevel@tonic-gate#define SAVED_FPREGS_OFFSET (VIS_BLOCKSIZE * 3) 542*7c478bd9Sstevel@tonic-gate#define SAVED_FPREGS_ADJUST ((VIS_BLOCKSIZE * 2) - 1) 543*7c478bd9Sstevel@tonic-gate#define SAVED_FPRS_OFFSET (SAVED_FPREGS_OFFSET + 8) 544*7c478bd9Sstevel@tonic-gate#define SAVED_GSR_OFFSET (SAVED_FPRS_OFFSET + 8) 545*7c478bd9Sstevel@tonic-gate 546*7c478bd9Sstevel@tonic-gate/* 547*7c478bd9Sstevel@tonic-gate * Common macros used by the various versions of the block copy 548*7c478bd9Sstevel@tonic-gate * routines in this file. 549*7c478bd9Sstevel@tonic-gate */ 550*7c478bd9Sstevel@tonic-gate 551*7c478bd9Sstevel@tonic-gate/* 552*7c478bd9Sstevel@tonic-gate * In FP copies if we do not have preserved data to restore over 553*7c478bd9Sstevel@tonic-gate * the fp regs we used then we must zero those regs to avoid 554*7c478bd9Sstevel@tonic-gate * exposing portions of the data to later threads (data security). 555*7c478bd9Sstevel@tonic-gate * 556*7c478bd9Sstevel@tonic-gate * Copy functions use either quadrants 1 and 3 or 2 and 4. 557*7c478bd9Sstevel@tonic-gate * 558*7c478bd9Sstevel@tonic-gate * FZEROQ1Q3: Zero quadrants 1 and 3, ie %f0 - %f15 and %f32 - %f47 559*7c478bd9Sstevel@tonic-gate * FZEROQ2Q4: Zero quadrants 2 and 4, ie %f16 - %f31 and %f48 - %f63 560*7c478bd9Sstevel@tonic-gate * 561*7c478bd9Sstevel@tonic-gate * The instructions below are quicker than repeated fzero instructions 562*7c478bd9Sstevel@tonic-gate * since they can dispatch down two fp pipelines. 563*7c478bd9Sstevel@tonic-gate */ 564*7c478bd9Sstevel@tonic-gate#define FZEROQ1Q3 \ 565*7c478bd9Sstevel@tonic-gate fzero %f0 ;\ 566*7c478bd9Sstevel@tonic-gate fzero %f2 ;\ 567*7c478bd9Sstevel@tonic-gate faddd %f0, %f2, %f4 ;\ 568*7c478bd9Sstevel@tonic-gate fmuld %f0, %f2, %f6 ;\ 569*7c478bd9Sstevel@tonic-gate faddd %f0, %f2, %f8 ;\ 570*7c478bd9Sstevel@tonic-gate fmuld %f0, %f2, %f10 ;\ 571*7c478bd9Sstevel@tonic-gate faddd %f0, %f2, %f12 ;\ 572*7c478bd9Sstevel@tonic-gate fmuld %f0, %f2, %f14 ;\ 573*7c478bd9Sstevel@tonic-gate faddd %f0, %f2, %f32 ;\ 574*7c478bd9Sstevel@tonic-gate fmuld %f0, %f2, %f34 ;\ 575*7c478bd9Sstevel@tonic-gate faddd %f0, %f2, %f36 ;\ 576*7c478bd9Sstevel@tonic-gate fmuld %f0, %f2, %f38 ;\ 577*7c478bd9Sstevel@tonic-gate faddd %f0, %f2, %f40 ;\ 578*7c478bd9Sstevel@tonic-gate fmuld %f0, %f2, %f42 ;\ 579*7c478bd9Sstevel@tonic-gate faddd %f0, %f2, %f44 ;\ 580*7c478bd9Sstevel@tonic-gate fmuld %f0, %f2, %f46 581*7c478bd9Sstevel@tonic-gate 582*7c478bd9Sstevel@tonic-gate#define FZEROQ2Q4 \ 583*7c478bd9Sstevel@tonic-gate fzero %f16 ;\ 584*7c478bd9Sstevel@tonic-gate fzero %f18 ;\ 585*7c478bd9Sstevel@tonic-gate faddd %f16, %f18, %f20 ;\ 586*7c478bd9Sstevel@tonic-gate fmuld %f16, %f18, %f22 ;\ 587*7c478bd9Sstevel@tonic-gate faddd %f16, %f18, %f24 ;\ 588*7c478bd9Sstevel@tonic-gate fmuld %f16, %f18, %f26 ;\ 589*7c478bd9Sstevel@tonic-gate faddd %f16, %f18, %f28 ;\ 590*7c478bd9Sstevel@tonic-gate fmuld %f16, %f18, %f30 ;\ 591*7c478bd9Sstevel@tonic-gate faddd %f16, %f18, %f48 ;\ 592*7c478bd9Sstevel@tonic-gate fmuld %f16, %f18, %f50 ;\ 593*7c478bd9Sstevel@tonic-gate faddd %f16, %f18, %f52 ;\ 594*7c478bd9Sstevel@tonic-gate fmuld %f16, %f18, %f54 ;\ 595*7c478bd9Sstevel@tonic-gate faddd %f16, %f18, %f56 ;\ 596*7c478bd9Sstevel@tonic-gate fmuld %f16, %f18, %f58 ;\ 597*7c478bd9Sstevel@tonic-gate faddd %f16, %f18, %f60 ;\ 598*7c478bd9Sstevel@tonic-gate fmuld %f16, %f18, %f62 599*7c478bd9Sstevel@tonic-gate 600*7c478bd9Sstevel@tonic-gate/* 601*7c478bd9Sstevel@tonic-gate * Macros to save and restore quadrants 1 and 3 or 2 and 4 to/from the stack. 602*7c478bd9Sstevel@tonic-gate * Used to save and restore in-use fp registers when we want to use FP 603*7c478bd9Sstevel@tonic-gate * and find fp already in use and copy size still large enough to justify 604*7c478bd9Sstevel@tonic-gate * the additional overhead of this save and restore. 605*7c478bd9Sstevel@tonic-gate * 606*7c478bd9Sstevel@tonic-gate * A membar #Sync is needed before save to sync fp ops initiated before 607*7c478bd9Sstevel@tonic-gate * the call to the copy function (by whoever has fp in use); for example 608*7c478bd9Sstevel@tonic-gate * an earlier block load to the quadrant we are about to save may still be 609*7c478bd9Sstevel@tonic-gate * "in flight". A membar #Sync is required at the end of the save to 610*7c478bd9Sstevel@tonic-gate * sync our block store (the copy code is about to begin ldd's to the 611*7c478bd9Sstevel@tonic-gate * first quadrant). Note, however, that since Cheetah pipeline block load 612*7c478bd9Sstevel@tonic-gate * is blocking we can omit the initial membar before saving fp state (they're 613*7c478bd9Sstevel@tonic-gate * commented below in case of future porting to a chip that does not block 614*7c478bd9Sstevel@tonic-gate * on block load). 615*7c478bd9Sstevel@tonic-gate * 616*7c478bd9Sstevel@tonic-gate * Similarly: a membar #Sync before restore allows the block stores of 617*7c478bd9Sstevel@tonic-gate * the copy operation to complete before we fill the quadrants with their 618*7c478bd9Sstevel@tonic-gate * original data, and a membar #Sync after restore lets the block loads 619*7c478bd9Sstevel@tonic-gate * of the restore complete before we return to whoever has the fp regs 620*7c478bd9Sstevel@tonic-gate * in use. To avoid repeated membar #Sync we make it the responsibility 621*7c478bd9Sstevel@tonic-gate * of the copy code to membar #Sync immediately after copy is complete 622*7c478bd9Sstevel@tonic-gate * and before using the BLD_*_FROMSTACK macro. 623*7c478bd9Sstevel@tonic-gate */ 624*7c478bd9Sstevel@tonic-gate#if !defined(lint) 625*7c478bd9Sstevel@tonic-gate#define BST_FPQ1Q3_TOSTACK(tmp1) \ 626*7c478bd9Sstevel@tonic-gate /* membar #Sync */ ;\ 627*7c478bd9Sstevel@tonic-gate add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\ 628*7c478bd9Sstevel@tonic-gate and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\ 629*7c478bd9Sstevel@tonic-gate stda %f0, [tmp1]ASI_BLK_P ;\ 630*7c478bd9Sstevel@tonic-gate add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 631*7c478bd9Sstevel@tonic-gate stda %f32, [tmp1]ASI_BLK_P ;\ 632*7c478bd9Sstevel@tonic-gate membar #Sync 633*7c478bd9Sstevel@tonic-gate 634*7c478bd9Sstevel@tonic-gate#define BLD_FPQ1Q3_FROMSTACK(tmp1) \ 635*7c478bd9Sstevel@tonic-gate /* membar #Sync - provided at copy completion */ ;\ 636*7c478bd9Sstevel@tonic-gate add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\ 637*7c478bd9Sstevel@tonic-gate and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\ 638*7c478bd9Sstevel@tonic-gate ldda [tmp1]ASI_BLK_P, %f0 ;\ 639*7c478bd9Sstevel@tonic-gate add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 640*7c478bd9Sstevel@tonic-gate ldda [tmp1]ASI_BLK_P, %f32 ;\ 641*7c478bd9Sstevel@tonic-gate membar #Sync 642*7c478bd9Sstevel@tonic-gate 643*7c478bd9Sstevel@tonic-gate#define BST_FPQ2Q4_TOSTACK(tmp1) \ 644*7c478bd9Sstevel@tonic-gate /* membar #Sync */ ;\ 645*7c478bd9Sstevel@tonic-gate add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\ 646*7c478bd9Sstevel@tonic-gate and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\ 647*7c478bd9Sstevel@tonic-gate stda %f16, [tmp1]ASI_BLK_P ;\ 648*7c478bd9Sstevel@tonic-gate add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 649*7c478bd9Sstevel@tonic-gate stda %f48, [tmp1]ASI_BLK_P ;\ 650*7c478bd9Sstevel@tonic-gate membar #Sync 651*7c478bd9Sstevel@tonic-gate 652*7c478bd9Sstevel@tonic-gate#define BLD_FPQ2Q4_FROMSTACK(tmp1) \ 653*7c478bd9Sstevel@tonic-gate /* membar #Sync - provided at copy completion */ ;\ 654*7c478bd9Sstevel@tonic-gate add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\ 655*7c478bd9Sstevel@tonic-gate and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\ 656*7c478bd9Sstevel@tonic-gate ldda [tmp1]ASI_BLK_P, %f16 ;\ 657*7c478bd9Sstevel@tonic-gate add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 658*7c478bd9Sstevel@tonic-gate ldda [tmp1]ASI_BLK_P, %f48 ;\ 659*7c478bd9Sstevel@tonic-gate membar #Sync 660*7c478bd9Sstevel@tonic-gate#endif 661*7c478bd9Sstevel@tonic-gate 662*7c478bd9Sstevel@tonic-gate/* 663*7c478bd9Sstevel@tonic-gate * FP_NOMIGRATE and FP_ALLOWMIGRATE. Prevent migration (or, stronger, 664*7c478bd9Sstevel@tonic-gate * prevent preemption if there is no t_lwp to save FP state to on context 665*7c478bd9Sstevel@tonic-gate * switch) before commencing a FP copy, and reallow it on completion or 666*7c478bd9Sstevel@tonic-gate * in error trampoline paths when we were using FP copy. 667*7c478bd9Sstevel@tonic-gate * 668*7c478bd9Sstevel@tonic-gate * Both macros may call other functions, so be aware that all outputs are 669*7c478bd9Sstevel@tonic-gate * forfeit after using these macros. For this reason we do not pass registers 670*7c478bd9Sstevel@tonic-gate * to use - we just use any outputs we want. 671*7c478bd9Sstevel@tonic-gate * 672*7c478bd9Sstevel@tonic-gate * For fpRAS we need to perform the fpRAS mechanism test on the same 673*7c478bd9Sstevel@tonic-gate * CPU as we use for the copy operation, both so that we validate the 674*7c478bd9Sstevel@tonic-gate * CPU we perform the copy on and so that we know which CPU failed 675*7c478bd9Sstevel@tonic-gate * if a failure is detected. Hence we need to be bound to "our" CPU. 676*7c478bd9Sstevel@tonic-gate * This could be achieved through disabling preemption (and we have do it that 677*7c478bd9Sstevel@tonic-gate * way for threads with no t_lwp) but for larger copies this may hold 678*7c478bd9Sstevel@tonic-gate * higher priority threads off of cpu for too long (eg, realtime). So we 679*7c478bd9Sstevel@tonic-gate * make use of the lightweight t_nomigrate mechanism where we can (ie, when 680*7c478bd9Sstevel@tonic-gate * we have a t_lwp). 681*7c478bd9Sstevel@tonic-gate * 682*7c478bd9Sstevel@tonic-gate * Pseudo code: 683*7c478bd9Sstevel@tonic-gate * 684*7c478bd9Sstevel@tonic-gate * FP_NOMIGRATE: 685*7c478bd9Sstevel@tonic-gate * 686*7c478bd9Sstevel@tonic-gate * if (curthread->t_lwp) { 687*7c478bd9Sstevel@tonic-gate * thread_nomigrate(); 688*7c478bd9Sstevel@tonic-gate * } else { 689*7c478bd9Sstevel@tonic-gate * kpreempt_disable(); 690*7c478bd9Sstevel@tonic-gate * } 691*7c478bd9Sstevel@tonic-gate * 692*7c478bd9Sstevel@tonic-gate * FP_ALLOWMIGRATE: 693*7c478bd9Sstevel@tonic-gate * 694*7c478bd9Sstevel@tonic-gate * if (curthread->t_lwp) { 695*7c478bd9Sstevel@tonic-gate * thread_allowmigrate(); 696*7c478bd9Sstevel@tonic-gate * } else { 697*7c478bd9Sstevel@tonic-gate * kpreempt_enable(); 698*7c478bd9Sstevel@tonic-gate * } 699*7c478bd9Sstevel@tonic-gate */ 700*7c478bd9Sstevel@tonic-gate 701*7c478bd9Sstevel@tonic-gate#define FP_NOMIGRATE(label1, label2) \ 702*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LWP], %o0 ;\ 703*7c478bd9Sstevel@tonic-gate brz,a,pn %o0, label1/**/f ;\ 704*7c478bd9Sstevel@tonic-gate ldsb [THREAD_REG + T_PREEMPT], %o1 ;\ 705*7c478bd9Sstevel@tonic-gate call thread_nomigrate ;\ 706*7c478bd9Sstevel@tonic-gate nop ;\ 707*7c478bd9Sstevel@tonic-gate ba label2/**/f ;\ 708*7c478bd9Sstevel@tonic-gate nop ;\ 709*7c478bd9Sstevel@tonic-gatelabel1: ;\ 710*7c478bd9Sstevel@tonic-gate inc %o1 ;\ 711*7c478bd9Sstevel@tonic-gate stb %o1, [THREAD_REG + T_PREEMPT] ;\ 712*7c478bd9Sstevel@tonic-gatelabel2: 713*7c478bd9Sstevel@tonic-gate 714*7c478bd9Sstevel@tonic-gate#define FP_ALLOWMIGRATE(label1, label2) \ 715*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LWP], %o0 ;\ 716*7c478bd9Sstevel@tonic-gate brz,a,pn %o0, label1/**/f ;\ 717*7c478bd9Sstevel@tonic-gate ldsb [THREAD_REG + T_PREEMPT], %o1 ;\ 718*7c478bd9Sstevel@tonic-gate call thread_allowmigrate ;\ 719*7c478bd9Sstevel@tonic-gate nop ;\ 720*7c478bd9Sstevel@tonic-gate ba label2/**/f ;\ 721*7c478bd9Sstevel@tonic-gate nop ;\ 722*7c478bd9Sstevel@tonic-gatelabel1: ;\ 723*7c478bd9Sstevel@tonic-gate dec %o1 ;\ 724*7c478bd9Sstevel@tonic-gate brnz,pn %o1, label2/**/f ;\ 725*7c478bd9Sstevel@tonic-gate stb %o1, [THREAD_REG + T_PREEMPT] ;\ 726*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_CPU], %o0 ;\ 727*7c478bd9Sstevel@tonic-gate ldub [%o0 + CPU_KPRUNRUN], %o0 ;\ 728*7c478bd9Sstevel@tonic-gate brz,pt %o0, label2/**/f ;\ 729*7c478bd9Sstevel@tonic-gate nop ;\ 730*7c478bd9Sstevel@tonic-gate call kpreempt ;\ 731*7c478bd9Sstevel@tonic-gate rdpr %pil, %o0 ;\ 732*7c478bd9Sstevel@tonic-gatelabel2: 733*7c478bd9Sstevel@tonic-gate 734*7c478bd9Sstevel@tonic-gate/* 735*7c478bd9Sstevel@tonic-gate * Copy a block of storage, returning an error code if `from' or 736*7c478bd9Sstevel@tonic-gate * `to' takes a kernel pagefault which cannot be resolved. 737*7c478bd9Sstevel@tonic-gate * Returns errno value on pagefault error, 0 if all ok 738*7c478bd9Sstevel@tonic-gate */ 739*7c478bd9Sstevel@tonic-gate 740*7c478bd9Sstevel@tonic-gate#if defined(lint) 741*7c478bd9Sstevel@tonic-gate 742*7c478bd9Sstevel@tonic-gate/* ARGSUSED */ 743*7c478bd9Sstevel@tonic-gateint 744*7c478bd9Sstevel@tonic-gatekcopy(const void *from, void *to, size_t count) 745*7c478bd9Sstevel@tonic-gate{ return(0); } 746*7c478bd9Sstevel@tonic-gate 747*7c478bd9Sstevel@tonic-gate#else /* lint */ 748*7c478bd9Sstevel@tonic-gate 749*7c478bd9Sstevel@tonic-gate .seg ".text" 750*7c478bd9Sstevel@tonic-gate .align 4 751*7c478bd9Sstevel@tonic-gate 752*7c478bd9Sstevel@tonic-gate ENTRY(kcopy) 753*7c478bd9Sstevel@tonic-gate 754*7c478bd9Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 755*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .kcopy_small ! go to larger cases 756*7c478bd9Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 757*7c478bd9Sstevel@tonic-gate btst 7, %o3 ! 758*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .kcopy_8 ! check for longword alignment 759*7c478bd9Sstevel@tonic-gate nop 760*7c478bd9Sstevel@tonic-gate btst 1, %o3 ! 761*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .kcopy_2 ! check for half-word 762*7c478bd9Sstevel@tonic-gate nop 763*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 764*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 765*7c478bd9Sstevel@tonic-gate tst %o3 766*7c478bd9Sstevel@tonic-gate bz,pn %icc, .kcopy_small ! if zero, disable HW copy 767*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 768*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .kcopy_small ! go to small copy 769*7c478bd9Sstevel@tonic-gate nop 770*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .kcopy_more ! otherwise go to large copy 771*7c478bd9Sstevel@tonic-gate nop 772*7c478bd9Sstevel@tonic-gate.kcopy_2: 773*7c478bd9Sstevel@tonic-gate btst 3, %o3 ! 774*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .kcopy_4 ! check for word alignment 775*7c478bd9Sstevel@tonic-gate nop 776*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 777*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 778*7c478bd9Sstevel@tonic-gate tst %o3 779*7c478bd9Sstevel@tonic-gate bz,pn %icc, .kcopy_small ! if zero, disable HW copy 780*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 781*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .kcopy_small ! go to small copy 782*7c478bd9Sstevel@tonic-gate nop 783*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .kcopy_more ! otherwise go to large copy 784*7c478bd9Sstevel@tonic-gate nop 785*7c478bd9Sstevel@tonic-gate.kcopy_4: 786*7c478bd9Sstevel@tonic-gate ! already checked longword, must be word aligned 787*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 788*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 789*7c478bd9Sstevel@tonic-gate tst %o3 790*7c478bd9Sstevel@tonic-gate bz,pn %icc, .kcopy_small ! if zero, disable HW copy 791*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 792*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .kcopy_small ! go to small copy 793*7c478bd9Sstevel@tonic-gate nop 794*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .kcopy_more ! otherwise go to large copy 795*7c478bd9Sstevel@tonic-gate nop 796*7c478bd9Sstevel@tonic-gate.kcopy_8: 797*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 798*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 799*7c478bd9Sstevel@tonic-gate tst %o3 800*7c478bd9Sstevel@tonic-gate bz,pn %icc, .kcopy_small ! if zero, disable HW copy 801*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 802*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .kcopy_small ! go to small copy 803*7c478bd9Sstevel@tonic-gate nop 804*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .kcopy_more ! otherwise go to large copy 805*7c478bd9Sstevel@tonic-gate nop 806*7c478bd9Sstevel@tonic-gate 807*7c478bd9Sstevel@tonic-gate.kcopy_small: 808*7c478bd9Sstevel@tonic-gate sethi %hi(.sm_copyerr), %o5 ! sm_copyerr is lofault value 809*7c478bd9Sstevel@tonic-gate or %o5, %lo(.sm_copyerr), %o5 810*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 ! save existing handler 811*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 812*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .sm_do_copy ! common code 813*7c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! set t_lofault 814*7c478bd9Sstevel@tonic-gate 815*7c478bd9Sstevel@tonic-gate.kcopy_more: 816*7c478bd9Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 817*7c478bd9Sstevel@tonic-gate sethi %hi(.copyerr), %l7 ! copyerr is lofault value 818*7c478bd9Sstevel@tonic-gate or %l7, %lo(.copyerr), %l7 819*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %l6 ! save existing handler 820*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 821*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .do_copy ! common code 822*7c478bd9Sstevel@tonic-gate stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault 823*7c478bd9Sstevel@tonic-gate 824*7c478bd9Sstevel@tonic-gate 825*7c478bd9Sstevel@tonic-gate/* 826*7c478bd9Sstevel@tonic-gate * We got here because of a fault during bcopy_more, called from kcopy or bcopy. 827*7c478bd9Sstevel@tonic-gate * Errno value is in %g1. bcopy_more uses fp quadrants 1 and 3. 828*7c478bd9Sstevel@tonic-gate */ 829*7c478bd9Sstevel@tonic-gate.copyerr: 830*7c478bd9Sstevel@tonic-gate set .copyerr2, %l0 831*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 832*7c478bd9Sstevel@tonic-gate stn %l0, [THREAD_REG + T_LOFAULT] ! set t_lofault 833*7c478bd9Sstevel@tonic-gate btst FPUSED_FLAG, %l6 834*7c478bd9Sstevel@tonic-gate bz %ncc, 1f 835*7c478bd9Sstevel@tonic-gate and %l6, TRAMP_FLAG, %l0 ! copy trampoline flag to %l0 836*7c478bd9Sstevel@tonic-gate 837*7c478bd9Sstevel@tonic-gate ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr 838*7c478bd9Sstevel@tonic-gate wr %o2, 0, %gsr 839*7c478bd9Sstevel@tonic-gate 840*7c478bd9Sstevel@tonic-gate ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 841*7c478bd9Sstevel@tonic-gate btst FPRS_FEF, %o3 842*7c478bd9Sstevel@tonic-gate bz,pt %icc, 4f 843*7c478bd9Sstevel@tonic-gate nop 844*7c478bd9Sstevel@tonic-gate 845*7c478bd9Sstevel@tonic-gate BLD_FPQ1Q3_FROMSTACK(%o2) 846*7c478bd9Sstevel@tonic-gate 847*7c478bd9Sstevel@tonic-gate ba,pt %ncc, 1f 848*7c478bd9Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 849*7c478bd9Sstevel@tonic-gate 850*7c478bd9Sstevel@tonic-gate4: 851*7c478bd9Sstevel@tonic-gate FZEROQ1Q3 852*7c478bd9Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 853*7c478bd9Sstevel@tonic-gate 854*7c478bd9Sstevel@tonic-gate ! 855*7c478bd9Sstevel@tonic-gate ! Need to cater for the different expectations of kcopy 856*7c478bd9Sstevel@tonic-gate ! and bcopy. kcopy will *always* set a t_lofault handler 857*7c478bd9Sstevel@tonic-gate ! If it fires, we're expected to just return the error code 858*7c478bd9Sstevel@tonic-gate ! and *not* to invoke any existing error handler. As far as 859*7c478bd9Sstevel@tonic-gate ! bcopy is concerned, we only set t_lofault if there was an 860*7c478bd9Sstevel@tonic-gate ! existing lofault handler. In that case we're expected to 861*7c478bd9Sstevel@tonic-gate ! invoke the previously existing handler after resetting the 862*7c478bd9Sstevel@tonic-gate ! t_lofault value. 863*7c478bd9Sstevel@tonic-gate ! 864*7c478bd9Sstevel@tonic-gate1: 865*7c478bd9Sstevel@tonic-gate andn %l6, MASK_FLAGS, %l6 ! turn trampoline flag off 866*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 867*7c478bd9Sstevel@tonic-gate stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 868*7c478bd9Sstevel@tonic-gate FP_ALLOWMIGRATE(5, 6) 869*7c478bd9Sstevel@tonic-gate 870*7c478bd9Sstevel@tonic-gate btst TRAMP_FLAG, %l0 871*7c478bd9Sstevel@tonic-gate bnz,pn %ncc, 3f 872*7c478bd9Sstevel@tonic-gate nop 873*7c478bd9Sstevel@tonic-gate ret 874*7c478bd9Sstevel@tonic-gate restore %g1, 0, %o0 875*7c478bd9Sstevel@tonic-gate 876*7c478bd9Sstevel@tonic-gate3: 877*7c478bd9Sstevel@tonic-gate ! 878*7c478bd9Sstevel@tonic-gate ! We're here via bcopy. There *must* have been an error handler 879*7c478bd9Sstevel@tonic-gate ! in place otherwise we would have died a nasty death already. 880*7c478bd9Sstevel@tonic-gate ! 881*7c478bd9Sstevel@tonic-gate jmp %l6 ! goto real handler 882*7c478bd9Sstevel@tonic-gate restore %g0, 0, %o0 ! dispose of copy window 883*7c478bd9Sstevel@tonic-gate 884*7c478bd9Sstevel@tonic-gate/* 885*7c478bd9Sstevel@tonic-gate * We got here because of a fault in .copyerr. We can't safely restore fp 886*7c478bd9Sstevel@tonic-gate * state, so we panic. 887*7c478bd9Sstevel@tonic-gate */ 888*7c478bd9Sstevel@tonic-gatefp_panic_msg: 889*7c478bd9Sstevel@tonic-gate .asciz "Unable to restore fp state after copy operation" 890*7c478bd9Sstevel@tonic-gate 891*7c478bd9Sstevel@tonic-gate .align 4 892*7c478bd9Sstevel@tonic-gate.copyerr2: 893*7c478bd9Sstevel@tonic-gate set fp_panic_msg, %o0 894*7c478bd9Sstevel@tonic-gate call panic 895*7c478bd9Sstevel@tonic-gate nop 896*7c478bd9Sstevel@tonic-gate 897*7c478bd9Sstevel@tonic-gate/* 898*7c478bd9Sstevel@tonic-gate * We got here because of a fault during a small kcopy or bcopy. 899*7c478bd9Sstevel@tonic-gate * No floating point registers are used by the small copies. 900*7c478bd9Sstevel@tonic-gate * Errno value is in %g1. 901*7c478bd9Sstevel@tonic-gate */ 902*7c478bd9Sstevel@tonic-gate.sm_copyerr: 903*7c478bd9Sstevel@tonic-gate1: 904*7c478bd9Sstevel@tonic-gate btst TRAMP_FLAG, %o4 905*7c478bd9Sstevel@tonic-gate membar #Sync 906*7c478bd9Sstevel@tonic-gate andn %o4, TRAMP_FLAG, %o4 907*7c478bd9Sstevel@tonic-gate bnz,pn %ncc, 3f 908*7c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 909*7c478bd9Sstevel@tonic-gate retl 910*7c478bd9Sstevel@tonic-gate mov %g1, %o0 911*7c478bd9Sstevel@tonic-gate3: 912*7c478bd9Sstevel@tonic-gate jmp %o4 ! goto real handler 913*7c478bd9Sstevel@tonic-gate mov %g0, %o0 ! 914*7c478bd9Sstevel@tonic-gate 915*7c478bd9Sstevel@tonic-gate SET_SIZE(kcopy) 916*7c478bd9Sstevel@tonic-gate#endif /* lint */ 917*7c478bd9Sstevel@tonic-gate 918*7c478bd9Sstevel@tonic-gate 919*7c478bd9Sstevel@tonic-gate/* 920*7c478bd9Sstevel@tonic-gate * Copy a block of storage - must not overlap (from + len <= to). 921*7c478bd9Sstevel@tonic-gate * Registers: l6 - saved t_lofault 922*7c478bd9Sstevel@tonic-gate * (for short copies, o4 - saved t_lofault) 923*7c478bd9Sstevel@tonic-gate * 924*7c478bd9Sstevel@tonic-gate * Copy a page of memory. 925*7c478bd9Sstevel@tonic-gate * Assumes double word alignment and a count >= 256. 926*7c478bd9Sstevel@tonic-gate */ 927*7c478bd9Sstevel@tonic-gate#if defined(lint) 928*7c478bd9Sstevel@tonic-gate 929*7c478bd9Sstevel@tonic-gate/* ARGSUSED */ 930*7c478bd9Sstevel@tonic-gatevoid 931*7c478bd9Sstevel@tonic-gatebcopy(const void *from, void *to, size_t count) 932*7c478bd9Sstevel@tonic-gate{} 933*7c478bd9Sstevel@tonic-gate 934*7c478bd9Sstevel@tonic-gate#else /* lint */ 935*7c478bd9Sstevel@tonic-gate 936*7c478bd9Sstevel@tonic-gate ENTRY(bcopy) 937*7c478bd9Sstevel@tonic-gate 938*7c478bd9Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 939*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .bcopy_small ! go to larger cases 940*7c478bd9Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 941*7c478bd9Sstevel@tonic-gate btst 7, %o3 ! 942*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .bcopy_8 ! check for longword alignment 943*7c478bd9Sstevel@tonic-gate nop 944*7c478bd9Sstevel@tonic-gate btst 1, %o3 ! 945*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .bcopy_2 ! check for half-word 946*7c478bd9Sstevel@tonic-gate nop 947*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 948*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 949*7c478bd9Sstevel@tonic-gate tst %o3 950*7c478bd9Sstevel@tonic-gate bz,pn %icc, .bcopy_small ! if zero, disable HW copy 951*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 952*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .bcopy_small ! go to small copy 953*7c478bd9Sstevel@tonic-gate nop 954*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .bcopy_more ! otherwise go to large copy 955*7c478bd9Sstevel@tonic-gate nop 956*7c478bd9Sstevel@tonic-gate.bcopy_2: 957*7c478bd9Sstevel@tonic-gate btst 3, %o3 ! 958*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .bcopy_4 ! check for word alignment 959*7c478bd9Sstevel@tonic-gate nop 960*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 961*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 962*7c478bd9Sstevel@tonic-gate tst %o3 963*7c478bd9Sstevel@tonic-gate bz,pn %icc, .bcopy_small ! if zero, disable HW copy 964*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 965*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .bcopy_small ! go to small copy 966*7c478bd9Sstevel@tonic-gate nop 967*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .bcopy_more ! otherwise go to large copy 968*7c478bd9Sstevel@tonic-gate nop 969*7c478bd9Sstevel@tonic-gate.bcopy_4: 970*7c478bd9Sstevel@tonic-gate ! already checked longword, must be word aligned 971*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 972*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 973*7c478bd9Sstevel@tonic-gate tst %o3 974*7c478bd9Sstevel@tonic-gate bz,pn %icc, .bcopy_small ! if zero, disable HW copy 975*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 976*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .bcopy_small ! go to small copy 977*7c478bd9Sstevel@tonic-gate nop 978*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .bcopy_more ! otherwise go to large copy 979*7c478bd9Sstevel@tonic-gate nop 980*7c478bd9Sstevel@tonic-gate.bcopy_8: 981*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 982*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 983*7c478bd9Sstevel@tonic-gate tst %o3 984*7c478bd9Sstevel@tonic-gate bz,pn %icc, .bcopy_small ! if zero, disable HW copy 985*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 986*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .bcopy_small ! go to small copy 987*7c478bd9Sstevel@tonic-gate nop 988*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .bcopy_more ! otherwise go to large copy 989*7c478bd9Sstevel@tonic-gate nop 990*7c478bd9Sstevel@tonic-gate 991*7c478bd9Sstevel@tonic-gate .align 16 992*7c478bd9Sstevel@tonic-gate.bcopy_small: 993*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 ! save t_lofault 994*7c478bd9Sstevel@tonic-gate tst %o4 995*7c478bd9Sstevel@tonic-gate bz,pt %icc, .sm_do_copy 996*7c478bd9Sstevel@tonic-gate nop 997*7c478bd9Sstevel@tonic-gate sethi %hi(.sm_copyerr), %o5 998*7c478bd9Sstevel@tonic-gate or %o5, %lo(.sm_copyerr), %o5 999*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 1000*7c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! install new vector 1001*7c478bd9Sstevel@tonic-gate or %o4, TRAMP_FLAG, %o4 ! error should trampoline 1002*7c478bd9Sstevel@tonic-gate.sm_do_copy: 1003*7c478bd9Sstevel@tonic-gate cmp %o2, SHORTCOPY ! check for really short case 1004*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .bc_sm_left ! 1005*7c478bd9Sstevel@tonic-gate cmp %o2, CHKSIZE ! check for medium length cases 1006*7c478bd9Sstevel@tonic-gate bgu,pn %ncc, .bc_med ! 1007*7c478bd9Sstevel@tonic-gate or %o0, %o1, %o3 ! prepare alignment check 1008*7c478bd9Sstevel@tonic-gate andcc %o3, 0x3, %g0 ! test for alignment 1009*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_word ! branch to word aligned case 1010*7c478bd9Sstevel@tonic-gate.bc_sm_movebytes: 1011*7c478bd9Sstevel@tonic-gate sub %o2, 3, %o2 ! adjust count to allow cc zero test 1012*7c478bd9Sstevel@tonic-gate.bc_sm_notalign4: 1013*7c478bd9Sstevel@tonic-gate ldub [%o0], %o3 ! read byte 1014*7c478bd9Sstevel@tonic-gate stb %o3, [%o1] ! write byte 1015*7c478bd9Sstevel@tonic-gate subcc %o2, 4, %o2 ! reduce count by 4 1016*7c478bd9Sstevel@tonic-gate ldub [%o0 + 1], %o3 ! repeat for a total of 4 bytes 1017*7c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 1018*7c478bd9Sstevel@tonic-gate stb %o3, [%o1 + 1] 1019*7c478bd9Sstevel@tonic-gate ldub [%o0 - 2], %o3 1020*7c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 1021*7c478bd9Sstevel@tonic-gate stb %o3, [%o1 - 2] 1022*7c478bd9Sstevel@tonic-gate ldub [%o0 - 1], %o3 1023*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .bc_sm_notalign4 ! loop til 3 or fewer bytes remain 1024*7c478bd9Sstevel@tonic-gate stb %o3, [%o1 - 1] 1025*7c478bd9Sstevel@tonic-gate add %o2, 3, %o2 ! restore count 1026*7c478bd9Sstevel@tonic-gate.bc_sm_left: 1027*7c478bd9Sstevel@tonic-gate tst %o2 1028*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit ! check for zero length 1029*7c478bd9Sstevel@tonic-gate deccc %o2 ! reduce count for cc test 1030*7c478bd9Sstevel@tonic-gate ldub [%o0], %o3 ! move one byte 1031*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 1032*7c478bd9Sstevel@tonic-gate stb %o3, [%o1] 1033*7c478bd9Sstevel@tonic-gate ldub [%o0 + 1], %o3 ! move another byte 1034*7c478bd9Sstevel@tonic-gate deccc %o2 ! check for more 1035*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 1036*7c478bd9Sstevel@tonic-gate stb %o3, [%o1 + 1] 1037*7c478bd9Sstevel@tonic-gate ldub [%o0 + 2], %o3 ! move final byte 1038*7c478bd9Sstevel@tonic-gate stb %o3, [%o1 + 2] 1039*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 1040*7c478bd9Sstevel@tonic-gate andn %o4, TRAMP_FLAG, %o4 1041*7c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1042*7c478bd9Sstevel@tonic-gate retl 1043*7c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return 0 1044*7c478bd9Sstevel@tonic-gate .align 16 1045*7c478bd9Sstevel@tonic-gate nop ! instruction alignment 1046*7c478bd9Sstevel@tonic-gate ! see discussion at start of file 1047*7c478bd9Sstevel@tonic-gate.bc_sm_words: 1048*7c478bd9Sstevel@tonic-gate lduw [%o0], %o3 ! read word 1049*7c478bd9Sstevel@tonic-gate.bc_sm_wordx: 1050*7c478bd9Sstevel@tonic-gate subcc %o2, 8, %o2 ! update count 1051*7c478bd9Sstevel@tonic-gate stw %o3, [%o1] ! write word 1052*7c478bd9Sstevel@tonic-gate add %o0, 8, %o0 ! update SRC 1053*7c478bd9Sstevel@tonic-gate lduw [%o0 - 4], %o3 ! read word 1054*7c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! update DST 1055*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .bc_sm_words ! loop til done 1056*7c478bd9Sstevel@tonic-gate stw %o3, [%o1 - 4] ! write word 1057*7c478bd9Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore count 1058*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 1059*7c478bd9Sstevel@tonic-gate deccc %o2 1060*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_byte 1061*7c478bd9Sstevel@tonic-gate.bc_sm_half: 1062*7c478bd9Sstevel@tonic-gate subcc %o2, 2, %o2 ! reduce count by 2 1063*7c478bd9Sstevel@tonic-gate add %o0, 2, %o0 ! advance SRC by 2 1064*7c478bd9Sstevel@tonic-gate lduh [%o0 - 2], %o3 ! read half word 1065*7c478bd9Sstevel@tonic-gate add %o1, 2, %o1 ! advance DST by 2 1066*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .bc_sm_half ! loop til done 1067*7c478bd9Sstevel@tonic-gate sth %o3, [%o1 - 2] ! write half word 1068*7c478bd9Sstevel@tonic-gate addcc %o2, 1, %o2 ! restore count 1069*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 1070*7c478bd9Sstevel@tonic-gate nop 1071*7c478bd9Sstevel@tonic-gate.bc_sm_byte: 1072*7c478bd9Sstevel@tonic-gate ldub [%o0], %o3 1073*7c478bd9Sstevel@tonic-gate stb %o3, [%o1] 1074*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 1075*7c478bd9Sstevel@tonic-gate andn %o4, TRAMP_FLAG, %o4 1076*7c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1077*7c478bd9Sstevel@tonic-gate retl 1078*7c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return 0 1079*7c478bd9Sstevel@tonic-gate 1080*7c478bd9Sstevel@tonic-gate.bc_sm_word: 1081*7c478bd9Sstevel@tonic-gate subcc %o2, 4, %o2 ! update count 1082*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .bc_sm_wordx 1083*7c478bd9Sstevel@tonic-gate lduw [%o0], %o3 ! read word 1084*7c478bd9Sstevel@tonic-gate addcc %o2, 3, %o2 ! restore count 1085*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 1086*7c478bd9Sstevel@tonic-gate stw %o3, [%o1] ! write word 1087*7c478bd9Sstevel@tonic-gate deccc %o2 ! reduce count for cc test 1088*7c478bd9Sstevel@tonic-gate ldub [%o0 + 4], %o3 ! load one byte 1089*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 1090*7c478bd9Sstevel@tonic-gate stb %o3, [%o1 + 4] ! store one byte 1091*7c478bd9Sstevel@tonic-gate ldub [%o0 + 5], %o3 ! load second byte 1092*7c478bd9Sstevel@tonic-gate deccc %o2 1093*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 1094*7c478bd9Sstevel@tonic-gate stb %o3, [%o1 + 5] ! store second byte 1095*7c478bd9Sstevel@tonic-gate ldub [%o0 + 6], %o3 ! load third byte 1096*7c478bd9Sstevel@tonic-gate stb %o3, [%o1 + 6] ! store third byte 1097*7c478bd9Sstevel@tonic-gate.bc_sm_exit: 1098*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 1099*7c478bd9Sstevel@tonic-gate andn %o4, TRAMP_FLAG, %o4 1100*7c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1101*7c478bd9Sstevel@tonic-gate retl 1102*7c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return 0 1103*7c478bd9Sstevel@tonic-gate 1104*7c478bd9Sstevel@tonic-gate .align 16 1105*7c478bd9Sstevel@tonic-gate.bc_med: 1106*7c478bd9Sstevel@tonic-gate xor %o0, %o1, %o3 ! setup alignment check 1107*7c478bd9Sstevel@tonic-gate btst 1, %o3 1108*7c478bd9Sstevel@tonic-gate bnz,pt %ncc, .bc_sm_movebytes ! unaligned 1109*7c478bd9Sstevel@tonic-gate nop 1110*7c478bd9Sstevel@tonic-gate btst 3, %o3 1111*7c478bd9Sstevel@tonic-gate bnz,pt %ncc, .bc_med_half ! halfword aligned 1112*7c478bd9Sstevel@tonic-gate nop 1113*7c478bd9Sstevel@tonic-gate btst 7, %o3 1114*7c478bd9Sstevel@tonic-gate bnz,pt %ncc, .bc_med_word ! word aligned 1115*7c478bd9Sstevel@tonic-gate nop 1116*7c478bd9Sstevel@tonic-gate.bc_med_long: 1117*7c478bd9Sstevel@tonic-gate btst 3, %o0 ! check for 1118*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_med_long1 ! word alignment 1119*7c478bd9Sstevel@tonic-gate nop 1120*7c478bd9Sstevel@tonic-gate.bc_med_long0: 1121*7c478bd9Sstevel@tonic-gate ldub [%o0], %o3 ! load one byte 1122*7c478bd9Sstevel@tonic-gate inc %o0 1123*7c478bd9Sstevel@tonic-gate stb %o3,[%o1] ! store byte 1124*7c478bd9Sstevel@tonic-gate inc %o1 1125*7c478bd9Sstevel@tonic-gate btst 3, %o0 1126*7c478bd9Sstevel@tonic-gate bnz,pt %ncc, .bc_med_long0 1127*7c478bd9Sstevel@tonic-gate dec %o2 1128*7c478bd9Sstevel@tonic-gate.bc_med_long1: ! word aligned 1129*7c478bd9Sstevel@tonic-gate btst 7, %o0 ! check for long word 1130*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_med_long2 1131*7c478bd9Sstevel@tonic-gate nop 1132*7c478bd9Sstevel@tonic-gate lduw [%o0], %o3 ! load word 1133*7c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 1134*7c478bd9Sstevel@tonic-gate stw %o3, [%o1] ! store word 1135*7c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 1136*7c478bd9Sstevel@tonic-gate sub %o2, 4, %o2 ! reduce count by 4 1137*7c478bd9Sstevel@tonic-gate! 1138*7c478bd9Sstevel@tonic-gate! Now long word aligned and have at least 32 bytes to move 1139*7c478bd9Sstevel@tonic-gate! 1140*7c478bd9Sstevel@tonic-gate.bc_med_long2: 1141*7c478bd9Sstevel@tonic-gate sub %o2, 31, %o2 ! adjust count to allow cc zero test 1142*7c478bd9Sstevel@tonic-gate.bc_med_lmove: 1143*7c478bd9Sstevel@tonic-gate ldx [%o0], %o3 ! read long word 1144*7c478bd9Sstevel@tonic-gate stx %o3, [%o1] ! write long word 1145*7c478bd9Sstevel@tonic-gate subcc %o2, 32, %o2 ! reduce count by 32 1146*7c478bd9Sstevel@tonic-gate ldx [%o0 + 8], %o3 ! repeat for a total for 4 long words 1147*7c478bd9Sstevel@tonic-gate add %o0, 32, %o0 ! advance SRC by 32 1148*7c478bd9Sstevel@tonic-gate stx %o3, [%o1 + 8] 1149*7c478bd9Sstevel@tonic-gate ldx [%o0 - 16], %o3 1150*7c478bd9Sstevel@tonic-gate add %o1, 32, %o1 ! advance DST by 32 1151*7c478bd9Sstevel@tonic-gate stx %o3, [%o1 - 16] 1152*7c478bd9Sstevel@tonic-gate ldx [%o0 - 8], %o3 1153*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .bc_med_lmove ! loop til 31 or fewer bytes left 1154*7c478bd9Sstevel@tonic-gate stx %o3, [%o1 - 8] 1155*7c478bd9Sstevel@tonic-gate addcc %o2, 24, %o2 ! restore count to long word offset 1156*7c478bd9Sstevel@tonic-gate ble,pt %ncc, .bc_med_lextra ! check for more long words to move 1157*7c478bd9Sstevel@tonic-gate nop 1158*7c478bd9Sstevel@tonic-gate.bc_med_lword: 1159*7c478bd9Sstevel@tonic-gate ldx [%o0], %o3 ! read long word 1160*7c478bd9Sstevel@tonic-gate subcc %o2, 8, %o2 ! reduce count by 8 1161*7c478bd9Sstevel@tonic-gate stx %o3, [%o1] ! write long word 1162*7c478bd9Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 1163*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .bc_med_lword ! loop til 7 or fewer bytes left 1164*7c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 1165*7c478bd9Sstevel@tonic-gate.bc_med_lextra: 1166*7c478bd9Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore rest of count 1167*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit ! if zero, then done 1168*7c478bd9Sstevel@tonic-gate deccc %o2 1169*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_byte 1170*7c478bd9Sstevel@tonic-gate nop 1171*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .bc_sm_half 1172*7c478bd9Sstevel@tonic-gate nop 1173*7c478bd9Sstevel@tonic-gate 1174*7c478bd9Sstevel@tonic-gate .align 16 1175*7c478bd9Sstevel@tonic-gate.bc_med_word: 1176*7c478bd9Sstevel@tonic-gate btst 3, %o0 ! check for 1177*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_med_word1 ! word alignment 1178*7c478bd9Sstevel@tonic-gate nop 1179*7c478bd9Sstevel@tonic-gate.bc_med_word0: 1180*7c478bd9Sstevel@tonic-gate ldub [%o0], %o3 ! load one byte 1181*7c478bd9Sstevel@tonic-gate inc %o0 1182*7c478bd9Sstevel@tonic-gate stb %o3,[%o1] ! store byte 1183*7c478bd9Sstevel@tonic-gate inc %o1 1184*7c478bd9Sstevel@tonic-gate btst 3, %o0 1185*7c478bd9Sstevel@tonic-gate bnz,pt %ncc, .bc_med_word0 1186*7c478bd9Sstevel@tonic-gate dec %o2 1187*7c478bd9Sstevel@tonic-gate! 1188*7c478bd9Sstevel@tonic-gate! Now word aligned and have at least 36 bytes to move 1189*7c478bd9Sstevel@tonic-gate! 1190*7c478bd9Sstevel@tonic-gate.bc_med_word1: 1191*7c478bd9Sstevel@tonic-gate sub %o2, 15, %o2 ! adjust count to allow cc zero test 1192*7c478bd9Sstevel@tonic-gate.bc_med_wmove: 1193*7c478bd9Sstevel@tonic-gate lduw [%o0], %o3 ! read word 1194*7c478bd9Sstevel@tonic-gate stw %o3, [%o1] ! write word 1195*7c478bd9Sstevel@tonic-gate subcc %o2, 16, %o2 ! reduce count by 16 1196*7c478bd9Sstevel@tonic-gate lduw [%o0 + 4], %o3 ! repeat for a total for 4 words 1197*7c478bd9Sstevel@tonic-gate add %o0, 16, %o0 ! advance SRC by 16 1198*7c478bd9Sstevel@tonic-gate stw %o3, [%o1 + 4] 1199*7c478bd9Sstevel@tonic-gate lduw [%o0 - 8], %o3 1200*7c478bd9Sstevel@tonic-gate add %o1, 16, %o1 ! advance DST by 16 1201*7c478bd9Sstevel@tonic-gate stw %o3, [%o1 - 8] 1202*7c478bd9Sstevel@tonic-gate lduw [%o0 - 4], %o3 1203*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .bc_med_wmove ! loop til 15 or fewer bytes left 1204*7c478bd9Sstevel@tonic-gate stw %o3, [%o1 - 4] 1205*7c478bd9Sstevel@tonic-gate addcc %o2, 12, %o2 ! restore count to word offset 1206*7c478bd9Sstevel@tonic-gate ble,pt %ncc, .bc_med_wextra ! check for more words to move 1207*7c478bd9Sstevel@tonic-gate nop 1208*7c478bd9Sstevel@tonic-gate.bc_med_word2: 1209*7c478bd9Sstevel@tonic-gate lduw [%o0], %o3 ! read word 1210*7c478bd9Sstevel@tonic-gate subcc %o2, 4, %o2 ! reduce count by 4 1211*7c478bd9Sstevel@tonic-gate stw %o3, [%o1] ! write word 1212*7c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 1213*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .bc_med_word2 ! loop til 3 or fewer bytes left 1214*7c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 1215*7c478bd9Sstevel@tonic-gate.bc_med_wextra: 1216*7c478bd9Sstevel@tonic-gate addcc %o2, 3, %o2 ! restore rest of count 1217*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit ! if zero, then done 1218*7c478bd9Sstevel@tonic-gate deccc %o2 1219*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_byte 1220*7c478bd9Sstevel@tonic-gate nop 1221*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .bc_sm_half 1222*7c478bd9Sstevel@tonic-gate nop 1223*7c478bd9Sstevel@tonic-gate 1224*7c478bd9Sstevel@tonic-gate .align 16 1225*7c478bd9Sstevel@tonic-gate.bc_med_half: 1226*7c478bd9Sstevel@tonic-gate btst 1, %o0 ! check for 1227*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_med_half1 ! half word alignment 1228*7c478bd9Sstevel@tonic-gate nop 1229*7c478bd9Sstevel@tonic-gate ldub [%o0], %o3 ! load one byte 1230*7c478bd9Sstevel@tonic-gate inc %o0 1231*7c478bd9Sstevel@tonic-gate stb %o3,[%o1] ! store byte 1232*7c478bd9Sstevel@tonic-gate inc %o1 1233*7c478bd9Sstevel@tonic-gate dec %o2 1234*7c478bd9Sstevel@tonic-gate! 1235*7c478bd9Sstevel@tonic-gate! Now half word aligned and have at least 38 bytes to move 1236*7c478bd9Sstevel@tonic-gate! 1237*7c478bd9Sstevel@tonic-gate.bc_med_half1: 1238*7c478bd9Sstevel@tonic-gate sub %o2, 7, %o2 ! adjust count to allow cc zero test 1239*7c478bd9Sstevel@tonic-gate.bc_med_hmove: 1240*7c478bd9Sstevel@tonic-gate lduh [%o0], %o3 ! read half word 1241*7c478bd9Sstevel@tonic-gate sth %o3, [%o1] ! write half word 1242*7c478bd9Sstevel@tonic-gate subcc %o2, 8, %o2 ! reduce count by 8 1243*7c478bd9Sstevel@tonic-gate lduh [%o0 + 2], %o3 ! repeat for a total for 4 halfwords 1244*7c478bd9Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 1245*7c478bd9Sstevel@tonic-gate sth %o3, [%o1 + 2] 1246*7c478bd9Sstevel@tonic-gate lduh [%o0 - 4], %o3 1247*7c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 1248*7c478bd9Sstevel@tonic-gate sth %o3, [%o1 - 4] 1249*7c478bd9Sstevel@tonic-gate lduh [%o0 - 2], %o3 1250*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .bc_med_hmove ! loop til 7 or fewer bytes left 1251*7c478bd9Sstevel@tonic-gate sth %o3, [%o1 - 2] 1252*7c478bd9Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore count 1253*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 1254*7c478bd9Sstevel@tonic-gate deccc %o2 1255*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_byte 1256*7c478bd9Sstevel@tonic-gate nop 1257*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .bc_sm_half 1258*7c478bd9Sstevel@tonic-gate nop 1259*7c478bd9Sstevel@tonic-gate 1260*7c478bd9Sstevel@tonic-gate SET_SIZE(bcopy) 1261*7c478bd9Sstevel@tonic-gate 1262*7c478bd9Sstevel@tonic-gate/* 1263*7c478bd9Sstevel@tonic-gate * The _more entry points are not intended to be used directly by 1264*7c478bd9Sstevel@tonic-gate * any caller from outside this file. They are provided to allow 1265*7c478bd9Sstevel@tonic-gate * profiling and dtrace of the portions of the copy code that uses 1266*7c478bd9Sstevel@tonic-gate * the floating point registers. 1267*7c478bd9Sstevel@tonic-gate * This entry is particularly important as DTRACE (at least as of 1268*7c478bd9Sstevel@tonic-gate * 4/2004) does not support leaf functions. 1269*7c478bd9Sstevel@tonic-gate */ 1270*7c478bd9Sstevel@tonic-gate 1271*7c478bd9Sstevel@tonic-gate ENTRY(bcopy_more) 1272*7c478bd9Sstevel@tonic-gate.bcopy_more: 1273*7c478bd9Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 1274*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %l6 ! save t_lofault 1275*7c478bd9Sstevel@tonic-gate tst %l6 1276*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .do_copy 1277*7c478bd9Sstevel@tonic-gate nop 1278*7c478bd9Sstevel@tonic-gate sethi %hi(.copyerr), %o2 1279*7c478bd9Sstevel@tonic-gate or %o2, %lo(.copyerr), %o2 1280*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 1281*7c478bd9Sstevel@tonic-gate stn %o2, [THREAD_REG + T_LOFAULT] ! install new vector 1282*7c478bd9Sstevel@tonic-gate ! 1283*7c478bd9Sstevel@tonic-gate ! We've already captured whether t_lofault was zero on entry. 1284*7c478bd9Sstevel@tonic-gate ! We need to mark ourselves as being from bcopy since both 1285*7c478bd9Sstevel@tonic-gate ! kcopy and bcopy use the same code path. If TRAMP_FLAG is set 1286*7c478bd9Sstevel@tonic-gate ! and the saved lofault was zero, we won't reset lofault on 1287*7c478bd9Sstevel@tonic-gate ! returning. 1288*7c478bd9Sstevel@tonic-gate ! 1289*7c478bd9Sstevel@tonic-gate or %l6, TRAMP_FLAG, %l6 1290*7c478bd9Sstevel@tonic-gate 1291*7c478bd9Sstevel@tonic-gate/* 1292*7c478bd9Sstevel@tonic-gate * Copies that reach here are larger than VIS_COPY_THRESHOLD bytes 1293*7c478bd9Sstevel@tonic-gate * Also, use of FP registers has been tested to be enabled 1294*7c478bd9Sstevel@tonic-gate */ 1295*7c478bd9Sstevel@tonic-gate.do_copy: 1296*7c478bd9Sstevel@tonic-gate FP_NOMIGRATE(6, 7) 1297*7c478bd9Sstevel@tonic-gate 1298*7c478bd9Sstevel@tonic-gate rd %fprs, %o2 ! check for unused fp 1299*7c478bd9Sstevel@tonic-gate st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs 1300*7c478bd9Sstevel@tonic-gate btst FPRS_FEF, %o2 1301*7c478bd9Sstevel@tonic-gate bz,a,pt %icc, .do_blockcopy 1302*7c478bd9Sstevel@tonic-gate wr %g0, FPRS_FEF, %fprs 1303*7c478bd9Sstevel@tonic-gate 1304*7c478bd9Sstevel@tonic-gate BST_FPQ1Q3_TOSTACK(%o2) 1305*7c478bd9Sstevel@tonic-gate 1306*7c478bd9Sstevel@tonic-gate.do_blockcopy: 1307*7c478bd9Sstevel@tonic-gate rd %gsr, %o2 1308*7c478bd9Sstevel@tonic-gate stx %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr 1309*7c478bd9Sstevel@tonic-gate or %l6, FPUSED_FLAG, %l6 1310*7c478bd9Sstevel@tonic-gate 1311*7c478bd9Sstevel@tonic-gate#define REALSRC %i0 1312*7c478bd9Sstevel@tonic-gate#define DST %i1 1313*7c478bd9Sstevel@tonic-gate#define CNT %i2 1314*7c478bd9Sstevel@tonic-gate#define SRC %i3 1315*7c478bd9Sstevel@tonic-gate#define TMP %i5 1316*7c478bd9Sstevel@tonic-gate 1317*7c478bd9Sstevel@tonic-gate andcc DST, VIS_BLOCKSIZE - 1, TMP 1318*7c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f 1319*7c478bd9Sstevel@tonic-gate neg TMP 1320*7c478bd9Sstevel@tonic-gate add TMP, VIS_BLOCKSIZE, TMP 1321*7c478bd9Sstevel@tonic-gate 1322*7c478bd9Sstevel@tonic-gate ! TMP = bytes required to align DST on FP_BLOCK boundary 1323*7c478bd9Sstevel@tonic-gate ! Using SRC as a tmp here 1324*7c478bd9Sstevel@tonic-gate cmp TMP, 3 1325*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, 1f 1326*7c478bd9Sstevel@tonic-gate sub CNT,TMP,CNT ! adjust main count 1327*7c478bd9Sstevel@tonic-gate sub TMP, 3, TMP ! adjust for end of loop test 1328*7c478bd9Sstevel@tonic-gate.bc_blkalign: 1329*7c478bd9Sstevel@tonic-gate ldub [REALSRC], SRC ! move 4 bytes per loop iteration 1330*7c478bd9Sstevel@tonic-gate stb SRC, [DST] 1331*7c478bd9Sstevel@tonic-gate subcc TMP, 4, TMP 1332*7c478bd9Sstevel@tonic-gate ldub [REALSRC + 1], SRC 1333*7c478bd9Sstevel@tonic-gate add REALSRC, 4, REALSRC 1334*7c478bd9Sstevel@tonic-gate stb SRC, [DST + 1] 1335*7c478bd9Sstevel@tonic-gate ldub [REALSRC - 2], SRC 1336*7c478bd9Sstevel@tonic-gate add DST, 4, DST 1337*7c478bd9Sstevel@tonic-gate stb SRC, [DST - 2] 1338*7c478bd9Sstevel@tonic-gate ldub [REALSRC - 1], SRC 1339*7c478bd9Sstevel@tonic-gate bgu,pt %ncc, .bc_blkalign 1340*7c478bd9Sstevel@tonic-gate stb SRC, [DST - 1] 1341*7c478bd9Sstevel@tonic-gate 1342*7c478bd9Sstevel@tonic-gate addcc TMP, 3, TMP ! restore count adjustment 1343*7c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f ! no bytes left? 1344*7c478bd9Sstevel@tonic-gate nop 1345*7c478bd9Sstevel@tonic-gate1: ldub [REALSRC], SRC 1346*7c478bd9Sstevel@tonic-gate inc REALSRC 1347*7c478bd9Sstevel@tonic-gate inc DST 1348*7c478bd9Sstevel@tonic-gate deccc TMP 1349*7c478bd9Sstevel@tonic-gate bgu %ncc, 1b 1350*7c478bd9Sstevel@tonic-gate stb SRC, [DST - 1] 1351*7c478bd9Sstevel@tonic-gate 1352*7c478bd9Sstevel@tonic-gate2: 1353*7c478bd9Sstevel@tonic-gate andn REALSRC, 0x7, SRC 1354*7c478bd9Sstevel@tonic-gate alignaddr REALSRC, %g0, %g0 1355*7c478bd9Sstevel@tonic-gate 1356*7c478bd9Sstevel@tonic-gate ! SRC - 8-byte aligned 1357*7c478bd9Sstevel@tonic-gate ! DST - 64-byte aligned 1358*7c478bd9Sstevel@tonic-gate prefetch [SRC], #one_read 1359*7c478bd9Sstevel@tonic-gate prefetch [SRC + (1 * VIS_BLOCKSIZE)], #one_read 1360*7c478bd9Sstevel@tonic-gate prefetch [SRC + (2 * VIS_BLOCKSIZE)], #one_read 1361*7c478bd9Sstevel@tonic-gate prefetch [SRC + (3 * VIS_BLOCKSIZE)], #one_read 1362*7c478bd9Sstevel@tonic-gate ldd [SRC], %f0 1363*7c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 4 1364*7c478bd9Sstevel@tonic-gate prefetch [SRC + (4 * VIS_BLOCKSIZE)], #one_read 1365*7c478bd9Sstevel@tonic-gate#endif 1366*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x08], %f2 1367*7c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 5 1368*7c478bd9Sstevel@tonic-gate prefetch [SRC + (5 * VIS_BLOCKSIZE)], #one_read 1369*7c478bd9Sstevel@tonic-gate#endif 1370*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x10], %f4 1371*7c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 6 1372*7c478bd9Sstevel@tonic-gate prefetch [SRC + (6 * VIS_BLOCKSIZE)], #one_read 1373*7c478bd9Sstevel@tonic-gate#endif 1374*7c478bd9Sstevel@tonic-gate faligndata %f0, %f2, %f32 1375*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x18], %f6 1376*7c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 7 1377*7c478bd9Sstevel@tonic-gate prefetch [SRC + (7 * VIS_BLOCKSIZE)], #one_read 1378*7c478bd9Sstevel@tonic-gate#endif 1379*7c478bd9Sstevel@tonic-gate faligndata %f2, %f4, %f34 1380*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x20], %f8 1381*7c478bd9Sstevel@tonic-gate faligndata %f4, %f6, %f36 1382*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x28], %f10 1383*7c478bd9Sstevel@tonic-gate faligndata %f6, %f8, %f38 1384*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x30], %f12 1385*7c478bd9Sstevel@tonic-gate faligndata %f8, %f10, %f40 1386*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x38], %f14 1387*7c478bd9Sstevel@tonic-gate faligndata %f10, %f12, %f42 1388*7c478bd9Sstevel@tonic-gate ldd [SRC + VIS_BLOCKSIZE], %f0 1389*7c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 1390*7c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 1391*7c478bd9Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 1392*7c478bd9Sstevel@tonic-gate ba,a,pt %ncc, 1f 1393*7c478bd9Sstevel@tonic-gate nop 1394*7c478bd9Sstevel@tonic-gate .align 16 1395*7c478bd9Sstevel@tonic-gate1: 1396*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x08], %f2 1397*7c478bd9Sstevel@tonic-gate faligndata %f12, %f14, %f44 1398*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x10], %f4 1399*7c478bd9Sstevel@tonic-gate faligndata %f14, %f0, %f46 1400*7c478bd9Sstevel@tonic-gate stda %f32, [DST]ASI_BLK_P 1401*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x18], %f6 1402*7c478bd9Sstevel@tonic-gate faligndata %f0, %f2, %f32 1403*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x20], %f8 1404*7c478bd9Sstevel@tonic-gate faligndata %f2, %f4, %f34 1405*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x28], %f10 1406*7c478bd9Sstevel@tonic-gate faligndata %f4, %f6, %f36 1407*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x30], %f12 1408*7c478bd9Sstevel@tonic-gate faligndata %f6, %f8, %f38 1409*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x38], %f14 1410*7c478bd9Sstevel@tonic-gate faligndata %f8, %f10, %f40 1411*7c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 1412*7c478bd9Sstevel@tonic-gate ldd [SRC + VIS_BLOCKSIZE], %f0 1413*7c478bd9Sstevel@tonic-gate faligndata %f10, %f12, %f42 1414*7c478bd9Sstevel@tonic-gate prefetch [SRC + ((CHEETAH_PREFETCH) * VIS_BLOCKSIZE) + 8], #one_read 1415*7c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 1416*7c478bd9Sstevel@tonic-gate prefetch [SRC + ((CHEETAH_2ND_PREFETCH) * VIS_BLOCKSIZE)], #one_read 1417*7c478bd9Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 1418*7c478bd9Sstevel@tonic-gate cmp CNT, VIS_BLOCKSIZE + 8 1419*7c478bd9Sstevel@tonic-gate bgu,pt %ncc, 1b 1420*7c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 1421*7c478bd9Sstevel@tonic-gate 1422*7c478bd9Sstevel@tonic-gate ! only if REALSRC & 0x7 is 0 1423*7c478bd9Sstevel@tonic-gate cmp CNT, VIS_BLOCKSIZE 1424*7c478bd9Sstevel@tonic-gate bne %ncc, 3f 1425*7c478bd9Sstevel@tonic-gate andcc REALSRC, 0x7, %g0 1426*7c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f 1427*7c478bd9Sstevel@tonic-gate nop 1428*7c478bd9Sstevel@tonic-gate3: 1429*7c478bd9Sstevel@tonic-gate faligndata %f12, %f14, %f44 1430*7c478bd9Sstevel@tonic-gate faligndata %f14, %f0, %f46 1431*7c478bd9Sstevel@tonic-gate stda %f32, [DST]ASI_BLK_P 1432*7c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 1433*7c478bd9Sstevel@tonic-gate ba,pt %ncc, 3f 1434*7c478bd9Sstevel@tonic-gate nop 1435*7c478bd9Sstevel@tonic-gate2: 1436*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x08], %f2 1437*7c478bd9Sstevel@tonic-gate fsrc1 %f12, %f44 1438*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x10], %f4 1439*7c478bd9Sstevel@tonic-gate fsrc1 %f14, %f46 1440*7c478bd9Sstevel@tonic-gate stda %f32, [DST]ASI_BLK_P 1441*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x18], %f6 1442*7c478bd9Sstevel@tonic-gate fsrc1 %f0, %f32 1443*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x20], %f8 1444*7c478bd9Sstevel@tonic-gate fsrc1 %f2, %f34 1445*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x28], %f10 1446*7c478bd9Sstevel@tonic-gate fsrc1 %f4, %f36 1447*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x30], %f12 1448*7c478bd9Sstevel@tonic-gate fsrc1 %f6, %f38 1449*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x38], %f14 1450*7c478bd9Sstevel@tonic-gate fsrc1 %f8, %f40 1451*7c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 1452*7c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 1453*7c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 1454*7c478bd9Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 1455*7c478bd9Sstevel@tonic-gate fsrc1 %f10, %f42 1456*7c478bd9Sstevel@tonic-gate fsrc1 %f12, %f44 1457*7c478bd9Sstevel@tonic-gate fsrc1 %f14, %f46 1458*7c478bd9Sstevel@tonic-gate stda %f32, [DST]ASI_BLK_P 1459*7c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 1460*7c478bd9Sstevel@tonic-gate ba,a,pt %ncc, .bcb_exit 1461*7c478bd9Sstevel@tonic-gate nop 1462*7c478bd9Sstevel@tonic-gate 1463*7c478bd9Sstevel@tonic-gate3: tst CNT 1464*7c478bd9Sstevel@tonic-gate bz,a,pt %ncc, .bcb_exit 1465*7c478bd9Sstevel@tonic-gate nop 1466*7c478bd9Sstevel@tonic-gate 1467*7c478bd9Sstevel@tonic-gate5: ldub [REALSRC], TMP 1468*7c478bd9Sstevel@tonic-gate inc REALSRC 1469*7c478bd9Sstevel@tonic-gate inc DST 1470*7c478bd9Sstevel@tonic-gate deccc CNT 1471*7c478bd9Sstevel@tonic-gate bgu %ncc, 5b 1472*7c478bd9Sstevel@tonic-gate stb TMP, [DST - 1] 1473*7c478bd9Sstevel@tonic-gate.bcb_exit: 1474*7c478bd9Sstevel@tonic-gate membar #Sync 1475*7c478bd9Sstevel@tonic-gate 1476*7c478bd9Sstevel@tonic-gate FPRAS_INTERVAL(FPRAS_BCOPY, 0, %l5, %o2, %o3, %o4, %o5, 8) 1477*7c478bd9Sstevel@tonic-gate FPRAS_REWRITE_TYPE2Q1(0, %l5, %o2, %o3, 8, 9) 1478*7c478bd9Sstevel@tonic-gate FPRAS_CHECK(FPRAS_BCOPY, %l5, 9) ! outputs lost 1479*7c478bd9Sstevel@tonic-gate 1480*7c478bd9Sstevel@tonic-gate ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr 1481*7c478bd9Sstevel@tonic-gate wr %o2, 0, %gsr 1482*7c478bd9Sstevel@tonic-gate 1483*7c478bd9Sstevel@tonic-gate ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 1484*7c478bd9Sstevel@tonic-gate btst FPRS_FEF, %o3 1485*7c478bd9Sstevel@tonic-gate bz,pt %icc, 4f 1486*7c478bd9Sstevel@tonic-gate nop 1487*7c478bd9Sstevel@tonic-gate 1488*7c478bd9Sstevel@tonic-gate BLD_FPQ1Q3_FROMSTACK(%o2) 1489*7c478bd9Sstevel@tonic-gate 1490*7c478bd9Sstevel@tonic-gate ba,pt %ncc, 2f 1491*7c478bd9Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 1492*7c478bd9Sstevel@tonic-gate4: 1493*7c478bd9Sstevel@tonic-gate FZEROQ1Q3 1494*7c478bd9Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 1495*7c478bd9Sstevel@tonic-gate2: 1496*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 1497*7c478bd9Sstevel@tonic-gate andn %l6, MASK_FLAGS, %l6 1498*7c478bd9Sstevel@tonic-gate stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1499*7c478bd9Sstevel@tonic-gate FP_ALLOWMIGRATE(5, 6) 1500*7c478bd9Sstevel@tonic-gate ret 1501*7c478bd9Sstevel@tonic-gate restore %g0, 0, %o0 1502*7c478bd9Sstevel@tonic-gate 1503*7c478bd9Sstevel@tonic-gate SET_SIZE(bcopy_more) 1504*7c478bd9Sstevel@tonic-gate 1505*7c478bd9Sstevel@tonic-gate#endif /* lint */ 1506*7c478bd9Sstevel@tonic-gate 1507*7c478bd9Sstevel@tonic-gate/* 1508*7c478bd9Sstevel@tonic-gate * Block copy with possibly overlapped operands. 1509*7c478bd9Sstevel@tonic-gate */ 1510*7c478bd9Sstevel@tonic-gate 1511*7c478bd9Sstevel@tonic-gate#if defined(lint) 1512*7c478bd9Sstevel@tonic-gate 1513*7c478bd9Sstevel@tonic-gate/*ARGSUSED*/ 1514*7c478bd9Sstevel@tonic-gatevoid 1515*7c478bd9Sstevel@tonic-gateovbcopy(const void *from, void *to, size_t count) 1516*7c478bd9Sstevel@tonic-gate{} 1517*7c478bd9Sstevel@tonic-gate 1518*7c478bd9Sstevel@tonic-gate#else /* lint */ 1519*7c478bd9Sstevel@tonic-gate 1520*7c478bd9Sstevel@tonic-gate ENTRY(ovbcopy) 1521*7c478bd9Sstevel@tonic-gate tst %o2 ! check count 1522*7c478bd9Sstevel@tonic-gate bgu,a %ncc, 1f ! nothing to do or bad arguments 1523*7c478bd9Sstevel@tonic-gate subcc %o0, %o1, %o3 ! difference of from and to address 1524*7c478bd9Sstevel@tonic-gate 1525*7c478bd9Sstevel@tonic-gate retl ! return 1526*7c478bd9Sstevel@tonic-gate nop 1527*7c478bd9Sstevel@tonic-gate1: 1528*7c478bd9Sstevel@tonic-gate bneg,a %ncc, 2f 1529*7c478bd9Sstevel@tonic-gate neg %o3 ! if < 0, make it positive 1530*7c478bd9Sstevel@tonic-gate2: cmp %o2, %o3 ! cmp size and abs(from - to) 1531*7c478bd9Sstevel@tonic-gate bleu %ncc, bcopy ! if size <= abs(diff): use bcopy, 1532*7c478bd9Sstevel@tonic-gate .empty ! no overlap 1533*7c478bd9Sstevel@tonic-gate cmp %o0, %o1 ! compare from and to addresses 1534*7c478bd9Sstevel@tonic-gate blu %ncc, .ov_bkwd ! if from < to, copy backwards 1535*7c478bd9Sstevel@tonic-gate nop 1536*7c478bd9Sstevel@tonic-gate ! 1537*7c478bd9Sstevel@tonic-gate ! Copy forwards. 1538*7c478bd9Sstevel@tonic-gate ! 1539*7c478bd9Sstevel@tonic-gate.ov_fwd: 1540*7c478bd9Sstevel@tonic-gate ldub [%o0], %o3 ! read from address 1541*7c478bd9Sstevel@tonic-gate inc %o0 ! inc from address 1542*7c478bd9Sstevel@tonic-gate stb %o3, [%o1] ! write to address 1543*7c478bd9Sstevel@tonic-gate deccc %o2 ! dec count 1544*7c478bd9Sstevel@tonic-gate bgu %ncc, .ov_fwd ! loop till done 1545*7c478bd9Sstevel@tonic-gate inc %o1 ! inc to address 1546*7c478bd9Sstevel@tonic-gate 1547*7c478bd9Sstevel@tonic-gate retl ! return 1548*7c478bd9Sstevel@tonic-gate nop 1549*7c478bd9Sstevel@tonic-gate ! 1550*7c478bd9Sstevel@tonic-gate ! Copy backwards. 1551*7c478bd9Sstevel@tonic-gate ! 1552*7c478bd9Sstevel@tonic-gate.ov_bkwd: 1553*7c478bd9Sstevel@tonic-gate deccc %o2 ! dec count 1554*7c478bd9Sstevel@tonic-gate ldub [%o0 + %o2], %o3 ! get byte at end of src 1555*7c478bd9Sstevel@tonic-gate bgu %ncc, .ov_bkwd ! loop till done 1556*7c478bd9Sstevel@tonic-gate stb %o3, [%o1 + %o2] ! delay slot, store at end of dst 1557*7c478bd9Sstevel@tonic-gate 1558*7c478bd9Sstevel@tonic-gate retl ! return 1559*7c478bd9Sstevel@tonic-gate nop 1560*7c478bd9Sstevel@tonic-gate 1561*7c478bd9Sstevel@tonic-gate SET_SIZE(ovbcopy) 1562*7c478bd9Sstevel@tonic-gate 1563*7c478bd9Sstevel@tonic-gate#endif /* lint */ 1564*7c478bd9Sstevel@tonic-gate 1565*7c478bd9Sstevel@tonic-gate 1566*7c478bd9Sstevel@tonic-gate/* 1567*7c478bd9Sstevel@tonic-gate * hwblkpagecopy() 1568*7c478bd9Sstevel@tonic-gate * 1569*7c478bd9Sstevel@tonic-gate * Copies exactly one page. This routine assumes the caller (ppcopy) 1570*7c478bd9Sstevel@tonic-gate * has already disabled kernel preemption and has checked 1571*7c478bd9Sstevel@tonic-gate * use_hw_bcopy. Preventing preemption also prevents cpu migration. 1572*7c478bd9Sstevel@tonic-gate */ 1573*7c478bd9Sstevel@tonic-gate#ifdef lint 1574*7c478bd9Sstevel@tonic-gate/*ARGSUSED*/ 1575*7c478bd9Sstevel@tonic-gatevoid 1576*7c478bd9Sstevel@tonic-gatehwblkpagecopy(const void *src, void *dst) 1577*7c478bd9Sstevel@tonic-gate{ } 1578*7c478bd9Sstevel@tonic-gate#else /* lint */ 1579*7c478bd9Sstevel@tonic-gate ENTRY(hwblkpagecopy) 1580*7c478bd9Sstevel@tonic-gate ! get another window w/space for three aligned blocks of saved fpregs 1581*7c478bd9Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 1582*7c478bd9Sstevel@tonic-gate 1583*7c478bd9Sstevel@tonic-gate ! %i0 - source address (arg) 1584*7c478bd9Sstevel@tonic-gate ! %i1 - destination address (arg) 1585*7c478bd9Sstevel@tonic-gate ! %i2 - length of region (not arg) 1586*7c478bd9Sstevel@tonic-gate ! %l0 - saved fprs 1587*7c478bd9Sstevel@tonic-gate ! %l1 - pointer to saved fpregs 1588*7c478bd9Sstevel@tonic-gate 1589*7c478bd9Sstevel@tonic-gate rd %fprs, %l0 ! check for unused fp 1590*7c478bd9Sstevel@tonic-gate btst FPRS_FEF, %l0 1591*7c478bd9Sstevel@tonic-gate bz,a,pt %icc, 1f 1592*7c478bd9Sstevel@tonic-gate wr %g0, FPRS_FEF, %fprs 1593*7c478bd9Sstevel@tonic-gate 1594*7c478bd9Sstevel@tonic-gate BST_FPQ1Q3_TOSTACK(%l1) 1595*7c478bd9Sstevel@tonic-gate 1596*7c478bd9Sstevel@tonic-gate1: set PAGESIZE, CNT 1597*7c478bd9Sstevel@tonic-gate mov REALSRC, SRC 1598*7c478bd9Sstevel@tonic-gate 1599*7c478bd9Sstevel@tonic-gate prefetch [SRC], #one_read 1600*7c478bd9Sstevel@tonic-gate prefetch [SRC + (1 * VIS_BLOCKSIZE)], #one_read 1601*7c478bd9Sstevel@tonic-gate prefetch [SRC + (2 * VIS_BLOCKSIZE)], #one_read 1602*7c478bd9Sstevel@tonic-gate prefetch [SRC + (3 * VIS_BLOCKSIZE)], #one_read 1603*7c478bd9Sstevel@tonic-gate ldd [SRC], %f0 1604*7c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 4 1605*7c478bd9Sstevel@tonic-gate prefetch [SRC + (4 * VIS_BLOCKSIZE)], #one_read 1606*7c478bd9Sstevel@tonic-gate#endif 1607*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x08], %f2 1608*7c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 5 1609*7c478bd9Sstevel@tonic-gate prefetch [SRC + (5 * VIS_BLOCKSIZE)], #one_read 1610*7c478bd9Sstevel@tonic-gate#endif 1611*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x10], %f4 1612*7c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 6 1613*7c478bd9Sstevel@tonic-gate prefetch [SRC + (6 * VIS_BLOCKSIZE)], #one_read 1614*7c478bd9Sstevel@tonic-gate#endif 1615*7c478bd9Sstevel@tonic-gate fsrc1 %f0, %f32 1616*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x18], %f6 1617*7c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 7 1618*7c478bd9Sstevel@tonic-gate prefetch [SRC + (7 * VIS_BLOCKSIZE)], #one_read 1619*7c478bd9Sstevel@tonic-gate#endif 1620*7c478bd9Sstevel@tonic-gate fsrc1 %f2, %f34 1621*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x20], %f8 1622*7c478bd9Sstevel@tonic-gate fsrc1 %f4, %f36 1623*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x28], %f10 1624*7c478bd9Sstevel@tonic-gate fsrc1 %f6, %f38 1625*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x30], %f12 1626*7c478bd9Sstevel@tonic-gate fsrc1 %f8, %f40 1627*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x38], %f14 1628*7c478bd9Sstevel@tonic-gate fsrc1 %f10, %f42 1629*7c478bd9Sstevel@tonic-gate ldd [SRC + VIS_BLOCKSIZE], %f0 1630*7c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 1631*7c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 1632*7c478bd9Sstevel@tonic-gate ba,a,pt %ncc, 2f 1633*7c478bd9Sstevel@tonic-gate nop 1634*7c478bd9Sstevel@tonic-gate .align 16 1635*7c478bd9Sstevel@tonic-gate2: 1636*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x08], %f2 1637*7c478bd9Sstevel@tonic-gate fsrc1 %f12, %f44 1638*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x10], %f4 1639*7c478bd9Sstevel@tonic-gate fsrc1 %f14, %f46 1640*7c478bd9Sstevel@tonic-gate stda %f32, [DST]ASI_BLK_P 1641*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x18], %f6 1642*7c478bd9Sstevel@tonic-gate fsrc1 %f0, %f32 1643*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x20], %f8 1644*7c478bd9Sstevel@tonic-gate fsrc1 %f2, %f34 1645*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x28], %f10 1646*7c478bd9Sstevel@tonic-gate fsrc1 %f4, %f36 1647*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x30], %f12 1648*7c478bd9Sstevel@tonic-gate fsrc1 %f6, %f38 1649*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x38], %f14 1650*7c478bd9Sstevel@tonic-gate fsrc1 %f8, %f40 1651*7c478bd9Sstevel@tonic-gate ldd [SRC + VIS_BLOCKSIZE], %f0 1652*7c478bd9Sstevel@tonic-gate fsrc1 %f10, %f42 1653*7c478bd9Sstevel@tonic-gate prefetch [SRC + ((CHEETAH_PREFETCH) * VIS_BLOCKSIZE) + 8], #one_read 1654*7c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 1655*7c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 1656*7c478bd9Sstevel@tonic-gate cmp CNT, VIS_BLOCKSIZE + 8 1657*7c478bd9Sstevel@tonic-gate prefetch [SRC + ((CHEETAH_2ND_PREFETCH) * VIS_BLOCKSIZE)], #one_read 1658*7c478bd9Sstevel@tonic-gate bgu,pt %ncc, 2b 1659*7c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 1660*7c478bd9Sstevel@tonic-gate 1661*7c478bd9Sstevel@tonic-gate ! trailing block 1662*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x08], %f2 1663*7c478bd9Sstevel@tonic-gate fsrc1 %f12, %f44 1664*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x10], %f4 1665*7c478bd9Sstevel@tonic-gate fsrc1 %f14, %f46 1666*7c478bd9Sstevel@tonic-gate stda %f32, [DST]ASI_BLK_P 1667*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x18], %f6 1668*7c478bd9Sstevel@tonic-gate fsrc1 %f0, %f32 1669*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x20], %f8 1670*7c478bd9Sstevel@tonic-gate fsrc1 %f2, %f34 1671*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x28], %f10 1672*7c478bd9Sstevel@tonic-gate fsrc1 %f4, %f36 1673*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x30], %f12 1674*7c478bd9Sstevel@tonic-gate fsrc1 %f6, %f38 1675*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x38], %f14 1676*7c478bd9Sstevel@tonic-gate fsrc1 %f8, %f40 1677*7c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 1678*7c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 1679*7c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 1680*7c478bd9Sstevel@tonic-gate fsrc1 %f10, %f42 1681*7c478bd9Sstevel@tonic-gate fsrc1 %f12, %f44 1682*7c478bd9Sstevel@tonic-gate fsrc1 %f14, %f46 1683*7c478bd9Sstevel@tonic-gate stda %f32, [DST]ASI_BLK_P 1684*7c478bd9Sstevel@tonic-gate 1685*7c478bd9Sstevel@tonic-gate membar #Sync 1686*7c478bd9Sstevel@tonic-gate 1687*7c478bd9Sstevel@tonic-gate FPRAS_INTERVAL(FPRAS_PGCOPY, 1, %l5, %o2, %o3, %o4, %o5, 8) 1688*7c478bd9Sstevel@tonic-gate FPRAS_REWRITE_TYPE1(1, %l5, %f32, %o2, 9) 1689*7c478bd9Sstevel@tonic-gate FPRAS_CHECK(FPRAS_PGCOPY, %l5, 9) ! lose outputs 1690*7c478bd9Sstevel@tonic-gate 1691*7c478bd9Sstevel@tonic-gate btst FPRS_FEF, %l0 1692*7c478bd9Sstevel@tonic-gate bz,pt %icc, 2f 1693*7c478bd9Sstevel@tonic-gate nop 1694*7c478bd9Sstevel@tonic-gate 1695*7c478bd9Sstevel@tonic-gate BLD_FPQ1Q3_FROMSTACK(%l3) 1696*7c478bd9Sstevel@tonic-gate ba 3f 1697*7c478bd9Sstevel@tonic-gate nop 1698*7c478bd9Sstevel@tonic-gate 1699*7c478bd9Sstevel@tonic-gate2: FZEROQ1Q3 1700*7c478bd9Sstevel@tonic-gate 1701*7c478bd9Sstevel@tonic-gate3: wr %l0, 0, %fprs ! restore fprs 1702*7c478bd9Sstevel@tonic-gate ret 1703*7c478bd9Sstevel@tonic-gate restore %g0, 0, %o0 1704*7c478bd9Sstevel@tonic-gate 1705*7c478bd9Sstevel@tonic-gate SET_SIZE(hwblkpagecopy) 1706*7c478bd9Sstevel@tonic-gate#endif /* lint */ 1707*7c478bd9Sstevel@tonic-gate 1708*7c478bd9Sstevel@tonic-gate 1709*7c478bd9Sstevel@tonic-gate/* 1710*7c478bd9Sstevel@tonic-gate * Transfer data to and from user space - 1711*7c478bd9Sstevel@tonic-gate * Note that these routines can cause faults 1712*7c478bd9Sstevel@tonic-gate * It is assumed that the kernel has nothing at 1713*7c478bd9Sstevel@tonic-gate * less than KERNELBASE in the virtual address space. 1714*7c478bd9Sstevel@tonic-gate * 1715*7c478bd9Sstevel@tonic-gate * Note that copyin(9F) and copyout(9F) are part of the 1716*7c478bd9Sstevel@tonic-gate * DDI/DKI which specifies that they return '-1' on "errors." 1717*7c478bd9Sstevel@tonic-gate * 1718*7c478bd9Sstevel@tonic-gate * Sigh. 1719*7c478bd9Sstevel@tonic-gate * 1720*7c478bd9Sstevel@tonic-gate * So there's two extremely similar routines - xcopyin() and xcopyout() 1721*7c478bd9Sstevel@tonic-gate * which return the errno that we've faithfully computed. This 1722*7c478bd9Sstevel@tonic-gate * allows other callers (e.g. uiomove(9F)) to work correctly. 1723*7c478bd9Sstevel@tonic-gate * Given that these are used pretty heavily, we expand the calling 1724*7c478bd9Sstevel@tonic-gate * sequences inline for all flavours (rather than making wrappers). 1725*7c478bd9Sstevel@tonic-gate * 1726*7c478bd9Sstevel@tonic-gate * There are also stub routines for xcopyout_little and xcopyin_little, 1727*7c478bd9Sstevel@tonic-gate * which currently are intended to handle requests of <= 16 bytes from 1728*7c478bd9Sstevel@tonic-gate * do_unaligned. Future enhancement to make them handle 8k pages efficiently 1729*7c478bd9Sstevel@tonic-gate * is left as an exercise... 1730*7c478bd9Sstevel@tonic-gate */ 1731*7c478bd9Sstevel@tonic-gate 1732*7c478bd9Sstevel@tonic-gate/* 1733*7c478bd9Sstevel@tonic-gate * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr) 1734*7c478bd9Sstevel@tonic-gate * 1735*7c478bd9Sstevel@tonic-gate * General theory of operation: 1736*7c478bd9Sstevel@tonic-gate * 1737*7c478bd9Sstevel@tonic-gate * The only difference between copy{in,out} and 1738*7c478bd9Sstevel@tonic-gate * xcopy{in,out} is in the error handling routine they invoke 1739*7c478bd9Sstevel@tonic-gate * when a memory access error occurs. xcopyOP returns the errno 1740*7c478bd9Sstevel@tonic-gate * while copyOP returns -1 (see above). copy{in,out}_noerr set 1741*7c478bd9Sstevel@tonic-gate * a special flag (by oring the TRAMP_FLAG into the fault handler address) 1742*7c478bd9Sstevel@tonic-gate * if they are called with a fault handler already in place. That flag 1743*7c478bd9Sstevel@tonic-gate * causes the default handlers to trampoline to the previous handler 1744*7c478bd9Sstevel@tonic-gate * upon an error. 1745*7c478bd9Sstevel@tonic-gate * 1746*7c478bd9Sstevel@tonic-gate * None of the copyops routines grab a window until it's decided that 1747*7c478bd9Sstevel@tonic-gate * we need to do a HW block copy operation. This saves a window 1748*7c478bd9Sstevel@tonic-gate * spill/fill when we're called during socket ops. The typical IO 1749*7c478bd9Sstevel@tonic-gate * path won't cause spill/fill traps. 1750*7c478bd9Sstevel@tonic-gate * 1751*7c478bd9Sstevel@tonic-gate * This code uses a set of 4 limits for the maximum size that will 1752*7c478bd9Sstevel@tonic-gate * be copied given a particular input/output address alignment. 1753*7c478bd9Sstevel@tonic-gate * If the value for a particular limit is zero, the copy will be performed 1754*7c478bd9Sstevel@tonic-gate * by the plain copy loops rather than FPBLK. 1755*7c478bd9Sstevel@tonic-gate * 1756*7c478bd9Sstevel@tonic-gate * See the description of bcopy above for more details of the 1757*7c478bd9Sstevel@tonic-gate * data copying algorithm and the default limits. 1758*7c478bd9Sstevel@tonic-gate * 1759*7c478bd9Sstevel@tonic-gate */ 1760*7c478bd9Sstevel@tonic-gate 1761*7c478bd9Sstevel@tonic-gate/* 1762*7c478bd9Sstevel@tonic-gate * Copy kernel data to user space (copyout/xcopyout/xcopyout_little). 1763*7c478bd9Sstevel@tonic-gate */ 1764*7c478bd9Sstevel@tonic-gate 1765*7c478bd9Sstevel@tonic-gate#if defined(lint) 1766*7c478bd9Sstevel@tonic-gate 1767*7c478bd9Sstevel@tonic-gate 1768*7c478bd9Sstevel@tonic-gate#else /* lint */ 1769*7c478bd9Sstevel@tonic-gate/* 1770*7c478bd9Sstevel@tonic-gate * We save the arguments in the following registers in case of a fault: 1771*7c478bd9Sstevel@tonic-gate * kaddr - %l1 1772*7c478bd9Sstevel@tonic-gate * uaddr - %l2 1773*7c478bd9Sstevel@tonic-gate * count - %l3 1774*7c478bd9Sstevel@tonic-gate */ 1775*7c478bd9Sstevel@tonic-gate#define SAVE_SRC %l1 1776*7c478bd9Sstevel@tonic-gate#define SAVE_DST %l2 1777*7c478bd9Sstevel@tonic-gate#define SAVE_COUNT %l3 1778*7c478bd9Sstevel@tonic-gate 1779*7c478bd9Sstevel@tonic-gate#define SM_SAVE_SRC %g4 1780*7c478bd9Sstevel@tonic-gate#define SM_SAVE_DST %g5 1781*7c478bd9Sstevel@tonic-gate#define SM_SAVE_COUNT %o5 1782*7c478bd9Sstevel@tonic-gate#define ERRNO %l5 1783*7c478bd9Sstevel@tonic-gate 1784*7c478bd9Sstevel@tonic-gate 1785*7c478bd9Sstevel@tonic-gate#define REAL_LOFAULT %l4 1786*7c478bd9Sstevel@tonic-gate/* 1787*7c478bd9Sstevel@tonic-gate * Generic copyio fault handler. This is the first line of defense when a 1788*7c478bd9Sstevel@tonic-gate * fault occurs in (x)copyin/(x)copyout. In order for this to function 1789*7c478bd9Sstevel@tonic-gate * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT. 1790*7c478bd9Sstevel@tonic-gate * This allows us to share common code for all the flavors of the copy 1791*7c478bd9Sstevel@tonic-gate * operations, including the _noerr versions. 1792*7c478bd9Sstevel@tonic-gate * 1793*7c478bd9Sstevel@tonic-gate * Note that this function will restore the original input parameters before 1794*7c478bd9Sstevel@tonic-gate * calling REAL_LOFAULT. So the real handler can vector to the appropriate 1795*7c478bd9Sstevel@tonic-gate * member of the t_copyop structure, if needed. 1796*7c478bd9Sstevel@tonic-gate */ 1797*7c478bd9Sstevel@tonic-gate ENTRY(copyio_fault) 1798*7c478bd9Sstevel@tonic-gate membar #Sync 1799*7c478bd9Sstevel@tonic-gate mov %g1,ERRNO ! save errno in ERRNO 1800*7c478bd9Sstevel@tonic-gate btst FPUSED_FLAG, %l6 1801*7c478bd9Sstevel@tonic-gate bz %ncc, 1f 1802*7c478bd9Sstevel@tonic-gate nop 1803*7c478bd9Sstevel@tonic-gate 1804*7c478bd9Sstevel@tonic-gate ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 1805*7c478bd9Sstevel@tonic-gate wr %o2, 0, %gsr ! restore gsr 1806*7c478bd9Sstevel@tonic-gate 1807*7c478bd9Sstevel@tonic-gate ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 1808*7c478bd9Sstevel@tonic-gate btst FPRS_FEF, %o3 1809*7c478bd9Sstevel@tonic-gate bz,pt %icc, 4f 1810*7c478bd9Sstevel@tonic-gate nop 1811*7c478bd9Sstevel@tonic-gate 1812*7c478bd9Sstevel@tonic-gate BLD_FPQ2Q4_FROMSTACK(%o2) 1813*7c478bd9Sstevel@tonic-gate 1814*7c478bd9Sstevel@tonic-gate ba,pt %ncc, 1f 1815*7c478bd9Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 1816*7c478bd9Sstevel@tonic-gate 1817*7c478bd9Sstevel@tonic-gate4: 1818*7c478bd9Sstevel@tonic-gate FZEROQ2Q4 1819*7c478bd9Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 1820*7c478bd9Sstevel@tonic-gate 1821*7c478bd9Sstevel@tonic-gate1: 1822*7c478bd9Sstevel@tonic-gate andn %l6, FPUSED_FLAG, %l6 1823*7c478bd9Sstevel@tonic-gate membar #Sync 1824*7c478bd9Sstevel@tonic-gate stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1825*7c478bd9Sstevel@tonic-gate FP_ALLOWMIGRATE(5, 6) 1826*7c478bd9Sstevel@tonic-gate 1827*7c478bd9Sstevel@tonic-gate mov SAVE_SRC, %i0 1828*7c478bd9Sstevel@tonic-gate mov SAVE_DST, %i1 1829*7c478bd9Sstevel@tonic-gate jmp REAL_LOFAULT 1830*7c478bd9Sstevel@tonic-gate mov SAVE_COUNT, %i2 1831*7c478bd9Sstevel@tonic-gate 1832*7c478bd9Sstevel@tonic-gate SET_SIZE(copyio_fault) 1833*7c478bd9Sstevel@tonic-gate 1834*7c478bd9Sstevel@tonic-gate 1835*7c478bd9Sstevel@tonic-gate#endif 1836*7c478bd9Sstevel@tonic-gate 1837*7c478bd9Sstevel@tonic-gate#if defined(lint) 1838*7c478bd9Sstevel@tonic-gate 1839*7c478bd9Sstevel@tonic-gate/*ARGSUSED*/ 1840*7c478bd9Sstevel@tonic-gateint 1841*7c478bd9Sstevel@tonic-gatecopyout(const void *kaddr, void *uaddr, size_t count) 1842*7c478bd9Sstevel@tonic-gate{ return (0); } 1843*7c478bd9Sstevel@tonic-gate 1844*7c478bd9Sstevel@tonic-gate#else /* lint */ 1845*7c478bd9Sstevel@tonic-gate 1846*7c478bd9Sstevel@tonic-gate ENTRY(copyout) 1847*7c478bd9Sstevel@tonic-gate 1848*7c478bd9Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 1849*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyout_small ! go to larger cases 1850*7c478bd9Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 1851*7c478bd9Sstevel@tonic-gate btst 7, %o3 ! 1852*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyout_8 ! check for longword alignment 1853*7c478bd9Sstevel@tonic-gate nop 1854*7c478bd9Sstevel@tonic-gate btst 1, %o3 ! 1855*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyout_2 ! check for half-word 1856*7c478bd9Sstevel@tonic-gate nop 1857*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 1858*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 1859*7c478bd9Sstevel@tonic-gate tst %o3 1860*7c478bd9Sstevel@tonic-gate bz,pn %icc, .copyout_small ! if zero, disable HW copy 1861*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 1862*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyout_small ! go to small copy 1863*7c478bd9Sstevel@tonic-gate nop 1864*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyout_more ! otherwise go to large copy 1865*7c478bd9Sstevel@tonic-gate nop 1866*7c478bd9Sstevel@tonic-gate.copyout_2: 1867*7c478bd9Sstevel@tonic-gate btst 3, %o3 ! 1868*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyout_4 ! check for word alignment 1869*7c478bd9Sstevel@tonic-gate nop 1870*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 1871*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 1872*7c478bd9Sstevel@tonic-gate tst %o3 1873*7c478bd9Sstevel@tonic-gate bz,pn %icc, .copyout_small ! if zero, disable HW copy 1874*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 1875*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyout_small ! go to small copy 1876*7c478bd9Sstevel@tonic-gate nop 1877*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyout_more ! otherwise go to large copy 1878*7c478bd9Sstevel@tonic-gate nop 1879*7c478bd9Sstevel@tonic-gate.copyout_4: 1880*7c478bd9Sstevel@tonic-gate ! already checked longword, must be word aligned 1881*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 1882*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 1883*7c478bd9Sstevel@tonic-gate tst %o3 1884*7c478bd9Sstevel@tonic-gate bz,pn %icc, .copyout_small ! if zero, disable HW copy 1885*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 1886*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyout_small ! go to small copy 1887*7c478bd9Sstevel@tonic-gate nop 1888*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyout_more ! otherwise go to large copy 1889*7c478bd9Sstevel@tonic-gate nop 1890*7c478bd9Sstevel@tonic-gate.copyout_8: 1891*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 1892*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 1893*7c478bd9Sstevel@tonic-gate tst %o3 1894*7c478bd9Sstevel@tonic-gate bz,pn %icc, .copyout_small ! if zero, disable HW copy 1895*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 1896*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyout_small ! go to small copy 1897*7c478bd9Sstevel@tonic-gate nop 1898*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyout_more ! otherwise go to large copy 1899*7c478bd9Sstevel@tonic-gate nop 1900*7c478bd9Sstevel@tonic-gate 1901*7c478bd9Sstevel@tonic-gate .align 16 1902*7c478bd9Sstevel@tonic-gate nop ! instruction alignment 1903*7c478bd9Sstevel@tonic-gate ! see discussion at start of file 1904*7c478bd9Sstevel@tonic-gate.copyout_small: 1905*7c478bd9Sstevel@tonic-gate sethi %hi(.sm_copyout_err), %o5 ! .sm_copyout_err is lofault 1906*7c478bd9Sstevel@tonic-gate or %o5, %lo(.sm_copyout_err), %o5 1907*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 ! save existing handler 1908*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 1909*7c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! set t_lofault 1910*7c478bd9Sstevel@tonic-gate.sm_do_copyout: 1911*7c478bd9Sstevel@tonic-gate mov %o0, SM_SAVE_SRC 1912*7c478bd9Sstevel@tonic-gate mov %o1, SM_SAVE_DST 1913*7c478bd9Sstevel@tonic-gate cmp %o2, SHORTCOPY ! check for really short case 1914*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .co_sm_left ! 1915*7c478bd9Sstevel@tonic-gate mov %o2, SM_SAVE_COUNT 1916*7c478bd9Sstevel@tonic-gate cmp %o2, CHKSIZE ! check for medium length cases 1917*7c478bd9Sstevel@tonic-gate bgu,pn %ncc, .co_med ! 1918*7c478bd9Sstevel@tonic-gate or %o0, %o1, %o3 ! prepare alignment check 1919*7c478bd9Sstevel@tonic-gate andcc %o3, 0x3, %g0 ! test for alignment 1920*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_word ! branch to word aligned case 1921*7c478bd9Sstevel@tonic-gate.co_sm_movebytes: 1922*7c478bd9Sstevel@tonic-gate sub %o2, 3, %o2 ! adjust count to allow cc zero test 1923*7c478bd9Sstevel@tonic-gate.co_sm_notalign4: 1924*7c478bd9Sstevel@tonic-gate ldub [%o0], %o3 ! read byte 1925*7c478bd9Sstevel@tonic-gate subcc %o2, 4, %o2 ! reduce count by 4 1926*7c478bd9Sstevel@tonic-gate stba %o3, [%o1]ASI_USER ! write byte 1927*7c478bd9Sstevel@tonic-gate inc %o1 ! advance DST by 1 1928*7c478bd9Sstevel@tonic-gate ldub [%o0 + 1], %o3 ! repeat for a total of 4 bytes 1929*7c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 1930*7c478bd9Sstevel@tonic-gate stba %o3, [%o1]ASI_USER 1931*7c478bd9Sstevel@tonic-gate inc %o1 ! advance DST by 1 1932*7c478bd9Sstevel@tonic-gate ldub [%o0 - 2], %o3 1933*7c478bd9Sstevel@tonic-gate stba %o3, [%o1]ASI_USER 1934*7c478bd9Sstevel@tonic-gate inc %o1 ! advance DST by 1 1935*7c478bd9Sstevel@tonic-gate ldub [%o0 - 1], %o3 1936*7c478bd9Sstevel@tonic-gate stba %o3, [%o1]ASI_USER 1937*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .co_sm_notalign4 ! loop til 3 or fewer bytes remain 1938*7c478bd9Sstevel@tonic-gate inc %o1 ! advance DST by 1 1939*7c478bd9Sstevel@tonic-gate add %o2, 3, %o2 ! restore count 1940*7c478bd9Sstevel@tonic-gate.co_sm_left: 1941*7c478bd9Sstevel@tonic-gate tst %o2 1942*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit ! check for zero length 1943*7c478bd9Sstevel@tonic-gate nop 1944*7c478bd9Sstevel@tonic-gate ldub [%o0], %o3 ! load one byte 1945*7c478bd9Sstevel@tonic-gate deccc %o2 ! reduce count for cc test 1946*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 1947*7c478bd9Sstevel@tonic-gate stba %o3,[%o1]ASI_USER ! store one byte 1948*7c478bd9Sstevel@tonic-gate ldub [%o0 + 1], %o3 ! load second byte 1949*7c478bd9Sstevel@tonic-gate deccc %o2 1950*7c478bd9Sstevel@tonic-gate inc %o1 1951*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 1952*7c478bd9Sstevel@tonic-gate stba %o3,[%o1]ASI_USER ! store second byte 1953*7c478bd9Sstevel@tonic-gate ldub [%o0 + 2], %o3 ! load third byte 1954*7c478bd9Sstevel@tonic-gate inc %o1 1955*7c478bd9Sstevel@tonic-gate stba %o3,[%o1]ASI_USER ! store third byte 1956*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 1957*7c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1958*7c478bd9Sstevel@tonic-gate retl 1959*7c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return 0 1960*7c478bd9Sstevel@tonic-gate .align 16 1961*7c478bd9Sstevel@tonic-gate.co_sm_words: 1962*7c478bd9Sstevel@tonic-gate lduw [%o0], %o3 ! read word 1963*7c478bd9Sstevel@tonic-gate.co_sm_wordx: 1964*7c478bd9Sstevel@tonic-gate subcc %o2, 8, %o2 ! update count 1965*7c478bd9Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER ! write word 1966*7c478bd9Sstevel@tonic-gate add %o0, 8, %o0 ! update SRC 1967*7c478bd9Sstevel@tonic-gate lduw [%o0 - 4], %o3 ! read word 1968*7c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! update DST 1969*7c478bd9Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER ! write word 1970*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .co_sm_words ! loop til done 1971*7c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! update DST 1972*7c478bd9Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore count 1973*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 1974*7c478bd9Sstevel@tonic-gate nop 1975*7c478bd9Sstevel@tonic-gate deccc %o2 1976*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_byte 1977*7c478bd9Sstevel@tonic-gate.co_sm_half: 1978*7c478bd9Sstevel@tonic-gate subcc %o2, 2, %o2 ! reduce count by 2 1979*7c478bd9Sstevel@tonic-gate lduh [%o0], %o3 ! read half word 1980*7c478bd9Sstevel@tonic-gate add %o0, 2, %o0 ! advance SRC by 2 1981*7c478bd9Sstevel@tonic-gate stha %o3, [%o1]ASI_USER ! write half word 1982*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .co_sm_half ! loop til done 1983*7c478bd9Sstevel@tonic-gate add %o1, 2, %o1 ! advance DST by 2 1984*7c478bd9Sstevel@tonic-gate addcc %o2, 1, %o2 ! restore count 1985*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 1986*7c478bd9Sstevel@tonic-gate nop 1987*7c478bd9Sstevel@tonic-gate.co_sm_byte: 1988*7c478bd9Sstevel@tonic-gate ldub [%o0], %o3 1989*7c478bd9Sstevel@tonic-gate stba %o3, [%o1]ASI_USER 1990*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 1991*7c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1992*7c478bd9Sstevel@tonic-gate retl 1993*7c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return 0 1994*7c478bd9Sstevel@tonic-gate .align 16 1995*7c478bd9Sstevel@tonic-gate.co_sm_word: 1996*7c478bd9Sstevel@tonic-gate subcc %o2, 4, %o2 ! update count 1997*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .co_sm_wordx 1998*7c478bd9Sstevel@tonic-gate lduw [%o0], %o3 ! read word 1999*7c478bd9Sstevel@tonic-gate addcc %o2, 3, %o2 ! restore count 2000*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 2001*7c478bd9Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER ! write word 2002*7c478bd9Sstevel@tonic-gate deccc %o2 ! reduce count for cc test 2003*7c478bd9Sstevel@tonic-gate ldub [%o0 + 4], %o3 ! load one byte 2004*7c478bd9Sstevel@tonic-gate add %o1, 4, %o1 2005*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 2006*7c478bd9Sstevel@tonic-gate stba %o3, [%o1]ASI_USER ! store one byte 2007*7c478bd9Sstevel@tonic-gate ldub [%o0 + 5], %o3 ! load second byte 2008*7c478bd9Sstevel@tonic-gate deccc %o2 2009*7c478bd9Sstevel@tonic-gate inc %o1 2010*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 2011*7c478bd9Sstevel@tonic-gate stba %o3, [%o1]ASI_USER ! store second byte 2012*7c478bd9Sstevel@tonic-gate ldub [%o0 + 6], %o3 ! load third byte 2013*7c478bd9Sstevel@tonic-gate inc %o1 2014*7c478bd9Sstevel@tonic-gate stba %o3, [%o1]ASI_USER ! store third byte 2015*7c478bd9Sstevel@tonic-gate.co_sm_exit: 2016*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 2017*7c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2018*7c478bd9Sstevel@tonic-gate retl 2019*7c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return 0 2020*7c478bd9Sstevel@tonic-gate 2021*7c478bd9Sstevel@tonic-gate .align 16 2022*7c478bd9Sstevel@tonic-gate.co_med: 2023*7c478bd9Sstevel@tonic-gate xor %o0, %o1, %o3 ! setup alignment check 2024*7c478bd9Sstevel@tonic-gate btst 1, %o3 2025*7c478bd9Sstevel@tonic-gate bnz,pt %ncc, .co_sm_movebytes ! unaligned 2026*7c478bd9Sstevel@tonic-gate nop 2027*7c478bd9Sstevel@tonic-gate btst 3, %o3 2028*7c478bd9Sstevel@tonic-gate bnz,pt %ncc, .co_med_half ! halfword aligned 2029*7c478bd9Sstevel@tonic-gate nop 2030*7c478bd9Sstevel@tonic-gate btst 7, %o3 2031*7c478bd9Sstevel@tonic-gate bnz,pt %ncc, .co_med_word ! word aligned 2032*7c478bd9Sstevel@tonic-gate nop 2033*7c478bd9Sstevel@tonic-gate.co_med_long: 2034*7c478bd9Sstevel@tonic-gate btst 3, %o0 ! check for 2035*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_med_long1 ! word alignment 2036*7c478bd9Sstevel@tonic-gate nop 2037*7c478bd9Sstevel@tonic-gate.co_med_long0: 2038*7c478bd9Sstevel@tonic-gate ldub [%o0], %o3 ! load one byte 2039*7c478bd9Sstevel@tonic-gate inc %o0 2040*7c478bd9Sstevel@tonic-gate stba %o3,[%o1]ASI_USER ! store byte 2041*7c478bd9Sstevel@tonic-gate inc %o1 2042*7c478bd9Sstevel@tonic-gate btst 3, %o0 2043*7c478bd9Sstevel@tonic-gate bnz,pt %ncc, .co_med_long0 2044*7c478bd9Sstevel@tonic-gate dec %o2 2045*7c478bd9Sstevel@tonic-gate.co_med_long1: ! word aligned 2046*7c478bd9Sstevel@tonic-gate btst 7, %o0 ! check for long word 2047*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_med_long2 2048*7c478bd9Sstevel@tonic-gate nop 2049*7c478bd9Sstevel@tonic-gate lduw [%o0], %o3 ! load word 2050*7c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 2051*7c478bd9Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER ! store word 2052*7c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 2053*7c478bd9Sstevel@tonic-gate sub %o2, 4, %o2 ! reduce count by 4 2054*7c478bd9Sstevel@tonic-gate! 2055*7c478bd9Sstevel@tonic-gate! Now long word aligned and have at least 32 bytes to move 2056*7c478bd9Sstevel@tonic-gate! 2057*7c478bd9Sstevel@tonic-gate.co_med_long2: 2058*7c478bd9Sstevel@tonic-gate sub %o2, 31, %o2 ! adjust count to allow cc zero test 2059*7c478bd9Sstevel@tonic-gate sub %o1, 8, %o1 ! adjust pointer to allow store in 2060*7c478bd9Sstevel@tonic-gate ! branch delay slot instead of add 2061*7c478bd9Sstevel@tonic-gate.co_med_lmove: 2062*7c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 2063*7c478bd9Sstevel@tonic-gate ldx [%o0], %o3 ! read long word 2064*7c478bd9Sstevel@tonic-gate subcc %o2, 32, %o2 ! reduce count by 32 2065*7c478bd9Sstevel@tonic-gate stxa %o3, [%o1]ASI_USER ! write long word 2066*7c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 2067*7c478bd9Sstevel@tonic-gate ldx [%o0 + 8], %o3 ! repeat for a total for 4 long words 2068*7c478bd9Sstevel@tonic-gate add %o0, 32, %o0 ! advance SRC by 32 2069*7c478bd9Sstevel@tonic-gate stxa %o3, [%o1]ASI_USER 2070*7c478bd9Sstevel@tonic-gate ldx [%o0 - 16], %o3 2071*7c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 2072*7c478bd9Sstevel@tonic-gate stxa %o3, [%o1]ASI_USER 2073*7c478bd9Sstevel@tonic-gate ldx [%o0 - 8], %o3 2074*7c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 2075*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .co_med_lmove ! loop til 31 or fewer bytes left 2076*7c478bd9Sstevel@tonic-gate stxa %o3, [%o1]ASI_USER 2077*7c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 2078*7c478bd9Sstevel@tonic-gate addcc %o2, 24, %o2 ! restore count to long word offset 2079*7c478bd9Sstevel@tonic-gate ble,pt %ncc, .co_med_lextra ! check for more long words to move 2080*7c478bd9Sstevel@tonic-gate nop 2081*7c478bd9Sstevel@tonic-gate.co_med_lword: 2082*7c478bd9Sstevel@tonic-gate ldx [%o0], %o3 ! read long word 2083*7c478bd9Sstevel@tonic-gate subcc %o2, 8, %o2 ! reduce count by 8 2084*7c478bd9Sstevel@tonic-gate stxa %o3, [%o1]ASI_USER ! write long word 2085*7c478bd9Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 2086*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .co_med_lword ! loop til 7 or fewer bytes left 2087*7c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 2088*7c478bd9Sstevel@tonic-gate.co_med_lextra: 2089*7c478bd9Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore rest of count 2090*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit ! if zero, then done 2091*7c478bd9Sstevel@tonic-gate deccc %o2 2092*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_byte 2093*7c478bd9Sstevel@tonic-gate nop 2094*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .co_sm_half 2095*7c478bd9Sstevel@tonic-gate nop 2096*7c478bd9Sstevel@tonic-gate 2097*7c478bd9Sstevel@tonic-gate .align 16 2098*7c478bd9Sstevel@tonic-gate nop ! instruction alignment 2099*7c478bd9Sstevel@tonic-gate ! see discussion at start of file 2100*7c478bd9Sstevel@tonic-gate.co_med_word: 2101*7c478bd9Sstevel@tonic-gate btst 3, %o0 ! check for 2102*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_med_word1 ! word alignment 2103*7c478bd9Sstevel@tonic-gate nop 2104*7c478bd9Sstevel@tonic-gate.co_med_word0: 2105*7c478bd9Sstevel@tonic-gate ldub [%o0], %o3 ! load one byte 2106*7c478bd9Sstevel@tonic-gate inc %o0 2107*7c478bd9Sstevel@tonic-gate stba %o3,[%o1]ASI_USER ! store byte 2108*7c478bd9Sstevel@tonic-gate inc %o1 2109*7c478bd9Sstevel@tonic-gate btst 3, %o0 2110*7c478bd9Sstevel@tonic-gate bnz,pt %ncc, .co_med_word0 2111*7c478bd9Sstevel@tonic-gate dec %o2 2112*7c478bd9Sstevel@tonic-gate! 2113*7c478bd9Sstevel@tonic-gate! Now word aligned and have at least 36 bytes to move 2114*7c478bd9Sstevel@tonic-gate! 2115*7c478bd9Sstevel@tonic-gate.co_med_word1: 2116*7c478bd9Sstevel@tonic-gate sub %o2, 15, %o2 ! adjust count to allow cc zero test 2117*7c478bd9Sstevel@tonic-gate.co_med_wmove: 2118*7c478bd9Sstevel@tonic-gate lduw [%o0], %o3 ! read word 2119*7c478bd9Sstevel@tonic-gate subcc %o2, 16, %o2 ! reduce count by 16 2120*7c478bd9Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER ! write word 2121*7c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 2122*7c478bd9Sstevel@tonic-gate lduw [%o0 + 4], %o3 ! repeat for a total for 4 words 2123*7c478bd9Sstevel@tonic-gate add %o0, 16, %o0 ! advance SRC by 16 2124*7c478bd9Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER 2125*7c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 2126*7c478bd9Sstevel@tonic-gate lduw [%o0 - 8], %o3 2127*7c478bd9Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER 2128*7c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 2129*7c478bd9Sstevel@tonic-gate lduw [%o0 - 4], %o3 2130*7c478bd9Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER 2131*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .co_med_wmove ! loop til 15 or fewer bytes left 2132*7c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 2133*7c478bd9Sstevel@tonic-gate addcc %o2, 12, %o2 ! restore count to word offset 2134*7c478bd9Sstevel@tonic-gate ble,pt %ncc, .co_med_wextra ! check for more words to move 2135*7c478bd9Sstevel@tonic-gate nop 2136*7c478bd9Sstevel@tonic-gate.co_med_word2: 2137*7c478bd9Sstevel@tonic-gate lduw [%o0], %o3 ! read word 2138*7c478bd9Sstevel@tonic-gate subcc %o2, 4, %o2 ! reduce count by 4 2139*7c478bd9Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER ! write word 2140*7c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 2141*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .co_med_word2 ! loop til 3 or fewer bytes left 2142*7c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 2143*7c478bd9Sstevel@tonic-gate.co_med_wextra: 2144*7c478bd9Sstevel@tonic-gate addcc %o2, 3, %o2 ! restore rest of count 2145*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit ! if zero, then done 2146*7c478bd9Sstevel@tonic-gate deccc %o2 2147*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_byte 2148*7c478bd9Sstevel@tonic-gate nop 2149*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .co_sm_half 2150*7c478bd9Sstevel@tonic-gate nop 2151*7c478bd9Sstevel@tonic-gate 2152*7c478bd9Sstevel@tonic-gate .align 16 2153*7c478bd9Sstevel@tonic-gate nop ! instruction alignment 2154*7c478bd9Sstevel@tonic-gate nop ! see discussion at start of file 2155*7c478bd9Sstevel@tonic-gate nop 2156*7c478bd9Sstevel@tonic-gate.co_med_half: 2157*7c478bd9Sstevel@tonic-gate btst 1, %o0 ! check for 2158*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_med_half1 ! half word alignment 2159*7c478bd9Sstevel@tonic-gate nop 2160*7c478bd9Sstevel@tonic-gate ldub [%o0], %o3 ! load one byte 2161*7c478bd9Sstevel@tonic-gate inc %o0 2162*7c478bd9Sstevel@tonic-gate stba %o3,[%o1]ASI_USER ! store byte 2163*7c478bd9Sstevel@tonic-gate inc %o1 2164*7c478bd9Sstevel@tonic-gate dec %o2 2165*7c478bd9Sstevel@tonic-gate! 2166*7c478bd9Sstevel@tonic-gate! Now half word aligned and have at least 38 bytes to move 2167*7c478bd9Sstevel@tonic-gate! 2168*7c478bd9Sstevel@tonic-gate.co_med_half1: 2169*7c478bd9Sstevel@tonic-gate sub %o2, 7, %o2 ! adjust count to allow cc zero test 2170*7c478bd9Sstevel@tonic-gate.co_med_hmove: 2171*7c478bd9Sstevel@tonic-gate lduh [%o0], %o3 ! read half word 2172*7c478bd9Sstevel@tonic-gate subcc %o2, 8, %o2 ! reduce count by 8 2173*7c478bd9Sstevel@tonic-gate stha %o3, [%o1]ASI_USER ! write half word 2174*7c478bd9Sstevel@tonic-gate add %o1, 2, %o1 ! advance DST by 2 2175*7c478bd9Sstevel@tonic-gate lduh [%o0 + 2], %o3 ! repeat for a total for 4 halfwords 2176*7c478bd9Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 2177*7c478bd9Sstevel@tonic-gate stha %o3, [%o1]ASI_USER 2178*7c478bd9Sstevel@tonic-gate add %o1, 2, %o1 ! advance DST by 2 2179*7c478bd9Sstevel@tonic-gate lduh [%o0 - 4], %o3 2180*7c478bd9Sstevel@tonic-gate stha %o3, [%o1]ASI_USER 2181*7c478bd9Sstevel@tonic-gate add %o1, 2, %o1 ! advance DST by 2 2182*7c478bd9Sstevel@tonic-gate lduh [%o0 - 2], %o3 2183*7c478bd9Sstevel@tonic-gate stha %o3, [%o1]ASI_USER 2184*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .co_med_hmove ! loop til 7 or fewer bytes left 2185*7c478bd9Sstevel@tonic-gate add %o1, 2, %o1 ! advance DST by 2 2186*7c478bd9Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore count 2187*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 2188*7c478bd9Sstevel@tonic-gate deccc %o2 2189*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_byte 2190*7c478bd9Sstevel@tonic-gate nop 2191*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .co_sm_half 2192*7c478bd9Sstevel@tonic-gate nop 2193*7c478bd9Sstevel@tonic-gate 2194*7c478bd9Sstevel@tonic-gate/* 2195*7c478bd9Sstevel@tonic-gate * We got here because of a fault during short copyout. 2196*7c478bd9Sstevel@tonic-gate * Errno value is in ERRNO, but DDI/DKI says return -1 (sigh). 2197*7c478bd9Sstevel@tonic-gate */ 2198*7c478bd9Sstevel@tonic-gate.sm_copyout_err: 2199*7c478bd9Sstevel@tonic-gate membar #Sync 2200*7c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2201*7c478bd9Sstevel@tonic-gate mov SM_SAVE_SRC, %o0 2202*7c478bd9Sstevel@tonic-gate mov SM_SAVE_DST, %o1 2203*7c478bd9Sstevel@tonic-gate mov SM_SAVE_COUNT, %o2 2204*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler 2205*7c478bd9Sstevel@tonic-gate tst %o3 2206*7c478bd9Sstevel@tonic-gate bz,pt %ncc, 3f ! if not, return error 2207*7c478bd9Sstevel@tonic-gate nop 2208*7c478bd9Sstevel@tonic-gate ldn [%o3 + CP_COPYOUT], %o5 ! if handler, invoke it with 2209*7c478bd9Sstevel@tonic-gate jmp %o5 ! original arguments 2210*7c478bd9Sstevel@tonic-gate nop 2211*7c478bd9Sstevel@tonic-gate3: 2212*7c478bd9Sstevel@tonic-gate retl 2213*7c478bd9Sstevel@tonic-gate or %g0, -1, %o0 ! return error value 2214*7c478bd9Sstevel@tonic-gate 2215*7c478bd9Sstevel@tonic-gate SET_SIZE(copyout) 2216*7c478bd9Sstevel@tonic-gate 2217*7c478bd9Sstevel@tonic-gate/* 2218*7c478bd9Sstevel@tonic-gate * The _more entry points are not intended to be used directly by 2219*7c478bd9Sstevel@tonic-gate * any caller from outside this file. They are provided to allow 2220*7c478bd9Sstevel@tonic-gate * profiling and dtrace of the portions of the copy code that uses 2221*7c478bd9Sstevel@tonic-gate * the floating point registers. 2222*7c478bd9Sstevel@tonic-gate * This entry is particularly important as DTRACE (at least as of 2223*7c478bd9Sstevel@tonic-gate * 4/2004) does not support leaf functions. 2224*7c478bd9Sstevel@tonic-gate */ 2225*7c478bd9Sstevel@tonic-gate 2226*7c478bd9Sstevel@tonic-gate ENTRY(copyout_more) 2227*7c478bd9Sstevel@tonic-gate.copyout_more: 2228*7c478bd9Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 2229*7c478bd9Sstevel@tonic-gate set .copyout_err, REAL_LOFAULT 2230*7c478bd9Sstevel@tonic-gate 2231*7c478bd9Sstevel@tonic-gate/* 2232*7c478bd9Sstevel@tonic-gate * Copy outs that reach here are larger than VIS_COPY_THRESHOLD bytes 2233*7c478bd9Sstevel@tonic-gate */ 2234*7c478bd9Sstevel@tonic-gate.do_copyout: 2235*7c478bd9Sstevel@tonic-gate set copyio_fault, %l7 ! .copyio_fault is lofault val 2236*7c478bd9Sstevel@tonic-gate 2237*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %l6 ! save existing handler 2238*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 2239*7c478bd9Sstevel@tonic-gate stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault 2240*7c478bd9Sstevel@tonic-gate 2241*7c478bd9Sstevel@tonic-gate mov %i0, SAVE_SRC 2242*7c478bd9Sstevel@tonic-gate mov %i1, SAVE_DST 2243*7c478bd9Sstevel@tonic-gate mov %i2, SAVE_COUNT 2244*7c478bd9Sstevel@tonic-gate 2245*7c478bd9Sstevel@tonic-gate FP_NOMIGRATE(6, 7) 2246*7c478bd9Sstevel@tonic-gate 2247*7c478bd9Sstevel@tonic-gate rd %fprs, %o2 ! check for unused fp 2248*7c478bd9Sstevel@tonic-gate st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs 2249*7c478bd9Sstevel@tonic-gate btst FPRS_FEF, %o2 2250*7c478bd9Sstevel@tonic-gate bz,a,pt %icc, .do_blockcopyout 2251*7c478bd9Sstevel@tonic-gate wr %g0, FPRS_FEF, %fprs 2252*7c478bd9Sstevel@tonic-gate 2253*7c478bd9Sstevel@tonic-gate BST_FPQ2Q4_TOSTACK(%o2) 2254*7c478bd9Sstevel@tonic-gate 2255*7c478bd9Sstevel@tonic-gate.do_blockcopyout: 2256*7c478bd9Sstevel@tonic-gate rd %gsr, %o2 2257*7c478bd9Sstevel@tonic-gate stx %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr 2258*7c478bd9Sstevel@tonic-gate or %l6, FPUSED_FLAG, %l6 2259*7c478bd9Sstevel@tonic-gate 2260*7c478bd9Sstevel@tonic-gate andcc DST, VIS_BLOCKSIZE - 1, TMP 2261*7c478bd9Sstevel@tonic-gate mov ASI_USER, %asi 2262*7c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f 2263*7c478bd9Sstevel@tonic-gate neg TMP 2264*7c478bd9Sstevel@tonic-gate add TMP, VIS_BLOCKSIZE, TMP 2265*7c478bd9Sstevel@tonic-gate 2266*7c478bd9Sstevel@tonic-gate ! TMP = bytes required to align DST on FP_BLOCK boundary 2267*7c478bd9Sstevel@tonic-gate ! Using SRC as a tmp here 2268*7c478bd9Sstevel@tonic-gate cmp TMP, 3 2269*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, 1f 2270*7c478bd9Sstevel@tonic-gate sub CNT,TMP,CNT ! adjust main count 2271*7c478bd9Sstevel@tonic-gate sub TMP, 3, TMP ! adjust for end of loop test 2272*7c478bd9Sstevel@tonic-gate.co_blkalign: 2273*7c478bd9Sstevel@tonic-gate ldub [REALSRC], SRC ! move 4 bytes per loop iteration 2274*7c478bd9Sstevel@tonic-gate stba SRC, [DST]%asi 2275*7c478bd9Sstevel@tonic-gate subcc TMP, 4, TMP 2276*7c478bd9Sstevel@tonic-gate ldub [REALSRC + 1], SRC 2277*7c478bd9Sstevel@tonic-gate add REALSRC, 4, REALSRC 2278*7c478bd9Sstevel@tonic-gate stba SRC, [DST + 1]%asi 2279*7c478bd9Sstevel@tonic-gate ldub [REALSRC - 2], SRC 2280*7c478bd9Sstevel@tonic-gate add DST, 4, DST 2281*7c478bd9Sstevel@tonic-gate stba SRC, [DST - 2]%asi 2282*7c478bd9Sstevel@tonic-gate ldub [REALSRC - 1], SRC 2283*7c478bd9Sstevel@tonic-gate bgu,pt %ncc, .co_blkalign 2284*7c478bd9Sstevel@tonic-gate stba SRC, [DST - 1]%asi 2285*7c478bd9Sstevel@tonic-gate 2286*7c478bd9Sstevel@tonic-gate addcc TMP, 3, TMP ! restore count adjustment 2287*7c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f ! no bytes left? 2288*7c478bd9Sstevel@tonic-gate nop 2289*7c478bd9Sstevel@tonic-gate1: ldub [REALSRC], SRC 2290*7c478bd9Sstevel@tonic-gate inc REALSRC 2291*7c478bd9Sstevel@tonic-gate inc DST 2292*7c478bd9Sstevel@tonic-gate deccc TMP 2293*7c478bd9Sstevel@tonic-gate bgu %ncc, 1b 2294*7c478bd9Sstevel@tonic-gate stba SRC, [DST - 1]%asi 2295*7c478bd9Sstevel@tonic-gate 2296*7c478bd9Sstevel@tonic-gate2: 2297*7c478bd9Sstevel@tonic-gate andn REALSRC, 0x7, SRC 2298*7c478bd9Sstevel@tonic-gate alignaddr REALSRC, %g0, %g0 2299*7c478bd9Sstevel@tonic-gate 2300*7c478bd9Sstevel@tonic-gate ! SRC - 8-byte aligned 2301*7c478bd9Sstevel@tonic-gate ! DST - 64-byte aligned 2302*7c478bd9Sstevel@tonic-gate prefetch [SRC], #one_read 2303*7c478bd9Sstevel@tonic-gate prefetch [SRC + (1 * VIS_BLOCKSIZE)], #one_read 2304*7c478bd9Sstevel@tonic-gate prefetch [SRC + (2 * VIS_BLOCKSIZE)], #one_read 2305*7c478bd9Sstevel@tonic-gate prefetch [SRC + (3 * VIS_BLOCKSIZE)], #one_read 2306*7c478bd9Sstevel@tonic-gate ldd [SRC], %f16 2307*7c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 4 2308*7c478bd9Sstevel@tonic-gate prefetch [SRC + (4 * VIS_BLOCKSIZE)], #one_read 2309*7c478bd9Sstevel@tonic-gate#endif 2310*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x08], %f18 2311*7c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 5 2312*7c478bd9Sstevel@tonic-gate prefetch [SRC + (5 * VIS_BLOCKSIZE)], #one_read 2313*7c478bd9Sstevel@tonic-gate#endif 2314*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x10], %f20 2315*7c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 6 2316*7c478bd9Sstevel@tonic-gate prefetch [SRC + (6 * VIS_BLOCKSIZE)], #one_read 2317*7c478bd9Sstevel@tonic-gate#endif 2318*7c478bd9Sstevel@tonic-gate faligndata %f16, %f18, %f48 2319*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x18], %f22 2320*7c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 7 2321*7c478bd9Sstevel@tonic-gate prefetch [SRC + (7 * VIS_BLOCKSIZE)], #one_read 2322*7c478bd9Sstevel@tonic-gate#endif 2323*7c478bd9Sstevel@tonic-gate faligndata %f18, %f20, %f50 2324*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x20], %f24 2325*7c478bd9Sstevel@tonic-gate faligndata %f20, %f22, %f52 2326*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x28], %f26 2327*7c478bd9Sstevel@tonic-gate faligndata %f22, %f24, %f54 2328*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x30], %f28 2329*7c478bd9Sstevel@tonic-gate faligndata %f24, %f26, %f56 2330*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x38], %f30 2331*7c478bd9Sstevel@tonic-gate faligndata %f26, %f28, %f58 2332*7c478bd9Sstevel@tonic-gate ldd [SRC + VIS_BLOCKSIZE], %f16 2333*7c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 2334*7c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 2335*7c478bd9Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 2336*7c478bd9Sstevel@tonic-gate ba,a,pt %ncc, 1f 2337*7c478bd9Sstevel@tonic-gate nop 2338*7c478bd9Sstevel@tonic-gate .align 16 2339*7c478bd9Sstevel@tonic-gate1: 2340*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x08], %f18 2341*7c478bd9Sstevel@tonic-gate faligndata %f28, %f30, %f60 2342*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x10], %f20 2343*7c478bd9Sstevel@tonic-gate faligndata %f30, %f16, %f62 2344*7c478bd9Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_AIUS 2345*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x18], %f22 2346*7c478bd9Sstevel@tonic-gate faligndata %f16, %f18, %f48 2347*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x20], %f24 2348*7c478bd9Sstevel@tonic-gate faligndata %f18, %f20, %f50 2349*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x28], %f26 2350*7c478bd9Sstevel@tonic-gate faligndata %f20, %f22, %f52 2351*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x30], %f28 2352*7c478bd9Sstevel@tonic-gate faligndata %f22, %f24, %f54 2353*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x38], %f30 2354*7c478bd9Sstevel@tonic-gate faligndata %f24, %f26, %f56 2355*7c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 2356*7c478bd9Sstevel@tonic-gate ldd [SRC + VIS_BLOCKSIZE], %f16 2357*7c478bd9Sstevel@tonic-gate faligndata %f26, %f28, %f58 2358*7c478bd9Sstevel@tonic-gate prefetch [SRC + ((CHEETAH_PREFETCH) * VIS_BLOCKSIZE) + 8], #one_read 2359*7c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 2360*7c478bd9Sstevel@tonic-gate prefetch [SRC + ((CHEETAH_2ND_PREFETCH) * VIS_BLOCKSIZE)], #one_read 2361*7c478bd9Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 2362*7c478bd9Sstevel@tonic-gate cmp CNT, VIS_BLOCKSIZE + 8 2363*7c478bd9Sstevel@tonic-gate bgu,pt %ncc, 1b 2364*7c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 2365*7c478bd9Sstevel@tonic-gate 2366*7c478bd9Sstevel@tonic-gate ! only if REALSRC & 0x7 is 0 2367*7c478bd9Sstevel@tonic-gate cmp CNT, VIS_BLOCKSIZE 2368*7c478bd9Sstevel@tonic-gate bne %ncc, 3f 2369*7c478bd9Sstevel@tonic-gate andcc REALSRC, 0x7, %g0 2370*7c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f 2371*7c478bd9Sstevel@tonic-gate nop 2372*7c478bd9Sstevel@tonic-gate3: 2373*7c478bd9Sstevel@tonic-gate faligndata %f28, %f30, %f60 2374*7c478bd9Sstevel@tonic-gate faligndata %f30, %f16, %f62 2375*7c478bd9Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_AIUS 2376*7c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 2377*7c478bd9Sstevel@tonic-gate ba,pt %ncc, 3f 2378*7c478bd9Sstevel@tonic-gate nop 2379*7c478bd9Sstevel@tonic-gate2: 2380*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x08], %f18 2381*7c478bd9Sstevel@tonic-gate fsrc1 %f28, %f60 2382*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x10], %f20 2383*7c478bd9Sstevel@tonic-gate fsrc1 %f30, %f62 2384*7c478bd9Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_AIUS 2385*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x18], %f22 2386*7c478bd9Sstevel@tonic-gate fsrc1 %f16, %f48 2387*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x20], %f24 2388*7c478bd9Sstevel@tonic-gate fsrc1 %f18, %f50 2389*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x28], %f26 2390*7c478bd9Sstevel@tonic-gate fsrc1 %f20, %f52 2391*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x30], %f28 2392*7c478bd9Sstevel@tonic-gate fsrc1 %f22, %f54 2393*7c478bd9Sstevel@tonic-gate ldd [SRC + 0x38], %f30 2394*7c478bd9Sstevel@tonic-gate fsrc1 %f24, %f56 2395*7c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 2396*7c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 2397*7c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 2398*7c478bd9Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 2399*7c478bd9Sstevel@tonic-gate fsrc1 %f26, %f58 2400*7c478bd9Sstevel@tonic-gate fsrc1 %f28, %f60 2401*7c478bd9Sstevel@tonic-gate fsrc1 %f30, %f62 2402*7c478bd9Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_AIUS 2403*7c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 2404*7c478bd9Sstevel@tonic-gate ba,a,pt %ncc, 4f 2405*7c478bd9Sstevel@tonic-gate nop 2406*7c478bd9Sstevel@tonic-gate 2407*7c478bd9Sstevel@tonic-gate3: tst CNT 2408*7c478bd9Sstevel@tonic-gate bz,a %ncc, 4f 2409*7c478bd9Sstevel@tonic-gate nop 2410*7c478bd9Sstevel@tonic-gate 2411*7c478bd9Sstevel@tonic-gate5: ldub [REALSRC], TMP 2412*7c478bd9Sstevel@tonic-gate inc REALSRC 2413*7c478bd9Sstevel@tonic-gate inc DST 2414*7c478bd9Sstevel@tonic-gate deccc CNT 2415*7c478bd9Sstevel@tonic-gate bgu %ncc, 5b 2416*7c478bd9Sstevel@tonic-gate stba TMP, [DST - 1]%asi 2417*7c478bd9Sstevel@tonic-gate4: 2418*7c478bd9Sstevel@tonic-gate 2419*7c478bd9Sstevel@tonic-gate.copyout_exit: 2420*7c478bd9Sstevel@tonic-gate membar #Sync 2421*7c478bd9Sstevel@tonic-gate 2422*7c478bd9Sstevel@tonic-gate FPRAS_INTERVAL(FPRAS_COPYOUT, 0, %l5, %o2, %o3, %o4, %o5, 8) 2423*7c478bd9Sstevel@tonic-gate FPRAS_REWRITE_TYPE2Q2(0, %l5, %o2, %o3, 8, 9) 2424*7c478bd9Sstevel@tonic-gate FPRAS_CHECK(FPRAS_COPYOUT, %l5, 9) ! lose outputs 2425*7c478bd9Sstevel@tonic-gate 2426*7c478bd9Sstevel@tonic-gate ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 2427*7c478bd9Sstevel@tonic-gate wr %o2, 0, %gsr ! restore gsr 2428*7c478bd9Sstevel@tonic-gate 2429*7c478bd9Sstevel@tonic-gate ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 2430*7c478bd9Sstevel@tonic-gate btst FPRS_FEF, %o3 2431*7c478bd9Sstevel@tonic-gate bz,pt %icc, 4f 2432*7c478bd9Sstevel@tonic-gate nop 2433*7c478bd9Sstevel@tonic-gate 2434*7c478bd9Sstevel@tonic-gate BLD_FPQ2Q4_FROMSTACK(%o2) 2435*7c478bd9Sstevel@tonic-gate 2436*7c478bd9Sstevel@tonic-gate ba,pt %ncc, 1f 2437*7c478bd9Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 2438*7c478bd9Sstevel@tonic-gate 2439*7c478bd9Sstevel@tonic-gate4: 2440*7c478bd9Sstevel@tonic-gate FZEROQ2Q4 2441*7c478bd9Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 2442*7c478bd9Sstevel@tonic-gate 2443*7c478bd9Sstevel@tonic-gate1: 2444*7c478bd9Sstevel@tonic-gate membar #Sync 2445*7c478bd9Sstevel@tonic-gate andn %l6, FPUSED_FLAG, %l6 2446*7c478bd9Sstevel@tonic-gate stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2447*7c478bd9Sstevel@tonic-gate FP_ALLOWMIGRATE(5, 6) 2448*7c478bd9Sstevel@tonic-gate ret 2449*7c478bd9Sstevel@tonic-gate restore %g0, 0, %o0 2450*7c478bd9Sstevel@tonic-gate 2451*7c478bd9Sstevel@tonic-gate/* 2452*7c478bd9Sstevel@tonic-gate * We got here because of a fault during copyout. 2453*7c478bd9Sstevel@tonic-gate * Errno value is in ERRNO, but DDI/DKI says return -1 (sigh). 2454*7c478bd9Sstevel@tonic-gate */ 2455*7c478bd9Sstevel@tonic-gate.copyout_err: 2456*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler 2457*7c478bd9Sstevel@tonic-gate tst %o4 2458*7c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f ! if not, return error 2459*7c478bd9Sstevel@tonic-gate nop 2460*7c478bd9Sstevel@tonic-gate ldn [%o4 + CP_COPYOUT], %g2 ! if handler, invoke it with 2461*7c478bd9Sstevel@tonic-gate jmp %g2 ! original arguments 2462*7c478bd9Sstevel@tonic-gate restore %g0, 0, %g0 ! dispose of copy window 2463*7c478bd9Sstevel@tonic-gate2: 2464*7c478bd9Sstevel@tonic-gate ret 2465*7c478bd9Sstevel@tonic-gate restore %g0, -1, %o0 ! return error value 2466*7c478bd9Sstevel@tonic-gate 2467*7c478bd9Sstevel@tonic-gate 2468*7c478bd9Sstevel@tonic-gate SET_SIZE(copyout_more) 2469*7c478bd9Sstevel@tonic-gate 2470*7c478bd9Sstevel@tonic-gate#endif /* lint */ 2471*7c478bd9Sstevel@tonic-gate 2472*7c478bd9Sstevel@tonic-gate 2473*7c478bd9Sstevel@tonic-gate#ifdef lint 2474*7c478bd9Sstevel@tonic-gate 2475*7c478bd9Sstevel@tonic-gate/*ARGSUSED*/ 2476*7c478bd9Sstevel@tonic-gateint 2477*7c478bd9Sstevel@tonic-gatexcopyout(const void *kaddr, void *uaddr, size_t count) 2478*7c478bd9Sstevel@tonic-gate{ return (0); } 2479*7c478bd9Sstevel@tonic-gate 2480*7c478bd9Sstevel@tonic-gate#else /* lint */ 2481*7c478bd9Sstevel@tonic-gate 2482*7c478bd9Sstevel@tonic-gate ENTRY(xcopyout) 2483*7c478bd9Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 2484*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .xcopyout_small ! go to larger cases 2485*7c478bd9Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 2486*7c478bd9Sstevel@tonic-gate btst 7, %o3 ! 2487*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .xcopyout_8 ! 2488*7c478bd9Sstevel@tonic-gate nop 2489*7c478bd9Sstevel@tonic-gate btst 1, %o3 ! 2490*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .xcopyout_2 ! check for half-word 2491*7c478bd9Sstevel@tonic-gate nop 2492*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 2493*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 2494*7c478bd9Sstevel@tonic-gate tst %o3 2495*7c478bd9Sstevel@tonic-gate bz,pn %icc, .xcopyout_small ! if zero, disable HW copy 2496*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 2497*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .xcopyout_small ! go to small copy 2498*7c478bd9Sstevel@tonic-gate nop 2499*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .xcopyout_more ! otherwise go to large copy 2500*7c478bd9Sstevel@tonic-gate nop 2501*7c478bd9Sstevel@tonic-gate.xcopyout_2: 2502*7c478bd9Sstevel@tonic-gate btst 3, %o3 ! 2503*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .xcopyout_4 ! check for word alignment 2504*7c478bd9Sstevel@tonic-gate nop 2505*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 2506*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 2507*7c478bd9Sstevel@tonic-gate tst %o3 2508*7c478bd9Sstevel@tonic-gate bz,pn %icc, .xcopyout_small ! if zero, disable HW copy 2509*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 2510*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .xcopyout_small ! go to small copy 2511*7c478bd9Sstevel@tonic-gate nop 2512*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .xcopyout_more ! otherwise go to large copy 2513*7c478bd9Sstevel@tonic-gate nop 2514*7c478bd9Sstevel@tonic-gate.xcopyout_4: 2515*7c478bd9Sstevel@tonic-gate ! already checked longword, must be word aligned 2516*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 2517*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 2518*7c478bd9Sstevel@tonic-gate tst %o3 2519*7c478bd9Sstevel@tonic-gate bz,pn %icc, .xcopyout_small ! if zero, disable HW copy 2520*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 2521*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .xcopyout_small ! go to small copy 2522*7c478bd9Sstevel@tonic-gate nop 2523*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .xcopyout_more ! otherwise go to large copy 2524*7c478bd9Sstevel@tonic-gate nop 2525*7c478bd9Sstevel@tonic-gate.xcopyout_8: 2526*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 2527*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 2528*7c478bd9Sstevel@tonic-gate tst %o3 2529*7c478bd9Sstevel@tonic-gate bz,pn %icc, .xcopyout_small ! if zero, disable HW copy 2530*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 2531*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .xcopyout_small ! go to small copy 2532*7c478bd9Sstevel@tonic-gate nop 2533*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .xcopyout_more ! otherwise go to large copy 2534*7c478bd9Sstevel@tonic-gate nop 2535*7c478bd9Sstevel@tonic-gate 2536*7c478bd9Sstevel@tonic-gate.xcopyout_small: 2537*7c478bd9Sstevel@tonic-gate sethi %hi(.sm_xcopyout_err), %o5 ! .sm_xcopyout_err is lofault 2538*7c478bd9Sstevel@tonic-gate or %o5, %lo(.sm_xcopyout_err), %o5 2539*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 ! save existing handler 2540*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 2541*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .sm_do_copyout ! common code 2542*7c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! set t_lofault 2543*7c478bd9Sstevel@tonic-gate 2544*7c478bd9Sstevel@tonic-gate.xcopyout_more: 2545*7c478bd9Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 2546*7c478bd9Sstevel@tonic-gate sethi %hi(.xcopyout_err), REAL_LOFAULT 2547*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .do_copyout ! common code 2548*7c478bd9Sstevel@tonic-gate or REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT 2549*7c478bd9Sstevel@tonic-gate 2550*7c478bd9Sstevel@tonic-gate/* 2551*7c478bd9Sstevel@tonic-gate * We got here because of fault during xcopyout 2552*7c478bd9Sstevel@tonic-gate * Errno value is in ERRNO 2553*7c478bd9Sstevel@tonic-gate */ 2554*7c478bd9Sstevel@tonic-gate.xcopyout_err: 2555*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler 2556*7c478bd9Sstevel@tonic-gate tst %o4 2557*7c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f ! if not, return error 2558*7c478bd9Sstevel@tonic-gate nop 2559*7c478bd9Sstevel@tonic-gate ldn [%o4 + CP_XCOPYOUT], %g2 ! if handler, invoke it with 2560*7c478bd9Sstevel@tonic-gate jmp %g2 ! original arguments 2561*7c478bd9Sstevel@tonic-gate restore %g0, 0, %g0 ! dispose of copy window 2562*7c478bd9Sstevel@tonic-gate2: 2563*7c478bd9Sstevel@tonic-gate ret 2564*7c478bd9Sstevel@tonic-gate restore ERRNO, 0, %o0 ! return errno value 2565*7c478bd9Sstevel@tonic-gate 2566*7c478bd9Sstevel@tonic-gate.sm_xcopyout_err: 2567*7c478bd9Sstevel@tonic-gate 2568*7c478bd9Sstevel@tonic-gate membar #Sync 2569*7c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2570*7c478bd9Sstevel@tonic-gate mov SM_SAVE_SRC, %o0 2571*7c478bd9Sstevel@tonic-gate mov SM_SAVE_DST, %o1 2572*7c478bd9Sstevel@tonic-gate mov SM_SAVE_COUNT, %o2 2573*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler 2574*7c478bd9Sstevel@tonic-gate tst %o3 2575*7c478bd9Sstevel@tonic-gate bz,pt %ncc, 3f ! if not, return error 2576*7c478bd9Sstevel@tonic-gate nop 2577*7c478bd9Sstevel@tonic-gate ldn [%o3 + CP_XCOPYOUT], %o5 ! if handler, invoke it with 2578*7c478bd9Sstevel@tonic-gate jmp %o5 ! original arguments 2579*7c478bd9Sstevel@tonic-gate nop 2580*7c478bd9Sstevel@tonic-gate3: 2581*7c478bd9Sstevel@tonic-gate retl 2582*7c478bd9Sstevel@tonic-gate or %g1, 0, %o0 ! return errno value 2583*7c478bd9Sstevel@tonic-gate 2584*7c478bd9Sstevel@tonic-gate SET_SIZE(xcopyout) 2585*7c478bd9Sstevel@tonic-gate 2586*7c478bd9Sstevel@tonic-gate#endif /* lint */ 2587*7c478bd9Sstevel@tonic-gate 2588*7c478bd9Sstevel@tonic-gate#ifdef lint 2589*7c478bd9Sstevel@tonic-gate 2590*7c478bd9Sstevel@tonic-gate/*ARGSUSED*/ 2591*7c478bd9Sstevel@tonic-gateint 2592*7c478bd9Sstevel@tonic-gatexcopyout_little(const void *kaddr, void *uaddr, size_t count) 2593*7c478bd9Sstevel@tonic-gate{ return (0); } 2594*7c478bd9Sstevel@tonic-gate 2595*7c478bd9Sstevel@tonic-gate#else /* lint */ 2596*7c478bd9Sstevel@tonic-gate 2597*7c478bd9Sstevel@tonic-gate ENTRY(xcopyout_little) 2598*7c478bd9Sstevel@tonic-gate sethi %hi(.xcopyio_err), %o5 2599*7c478bd9Sstevel@tonic-gate or %o5, %lo(.xcopyio_err), %o5 2600*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 2601*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 2602*7c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] 2603*7c478bd9Sstevel@tonic-gate mov %o4, %o5 2604*7c478bd9Sstevel@tonic-gate 2605*7c478bd9Sstevel@tonic-gate subcc %g0, %o2, %o3 2606*7c478bd9Sstevel@tonic-gate add %o0, %o2, %o0 2607*7c478bd9Sstevel@tonic-gate bz,pn %ncc, 2f ! check for zero bytes 2608*7c478bd9Sstevel@tonic-gate sub %o2, 1, %o4 2609*7c478bd9Sstevel@tonic-gate add %o0, %o4, %o0 ! start w/last byte 2610*7c478bd9Sstevel@tonic-gate add %o1, %o2, %o1 2611*7c478bd9Sstevel@tonic-gate ldub [%o0 + %o3], %o4 2612*7c478bd9Sstevel@tonic-gate 2613*7c478bd9Sstevel@tonic-gate1: stba %o4, [%o1 + %o3]ASI_AIUSL 2614*7c478bd9Sstevel@tonic-gate inccc %o3 2615*7c478bd9Sstevel@tonic-gate sub %o0, 2, %o0 ! get next byte 2616*7c478bd9Sstevel@tonic-gate bcc,a,pt %ncc, 1b 2617*7c478bd9Sstevel@tonic-gate ldub [%o0 + %o3], %o4 2618*7c478bd9Sstevel@tonic-gate 2619*7c478bd9Sstevel@tonic-gate2: 2620*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 2621*7c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2622*7c478bd9Sstevel@tonic-gate retl 2623*7c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return (0) 2624*7c478bd9Sstevel@tonic-gate 2625*7c478bd9Sstevel@tonic-gate SET_SIZE(xcopyout_little) 2626*7c478bd9Sstevel@tonic-gate 2627*7c478bd9Sstevel@tonic-gate#endif /* lint */ 2628*7c478bd9Sstevel@tonic-gate 2629*7c478bd9Sstevel@tonic-gate/* 2630*7c478bd9Sstevel@tonic-gate * Copy user data to kernel space (copyin/xcopyin/xcopyin_little) 2631*7c478bd9Sstevel@tonic-gate */ 2632*7c478bd9Sstevel@tonic-gate 2633*7c478bd9Sstevel@tonic-gate#if defined(lint) 2634*7c478bd9Sstevel@tonic-gate 2635*7c478bd9Sstevel@tonic-gate/*ARGSUSED*/ 2636*7c478bd9Sstevel@tonic-gateint 2637*7c478bd9Sstevel@tonic-gatecopyin(const void *uaddr, void *kaddr, size_t count) 2638*7c478bd9Sstevel@tonic-gate{ return (0); } 2639*7c478bd9Sstevel@tonic-gate 2640*7c478bd9Sstevel@tonic-gate#else /* lint */ 2641*7c478bd9Sstevel@tonic-gate 2642*7c478bd9Sstevel@tonic-gate ENTRY(copyin) 2643*7c478bd9Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 2644*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyin_small ! go to larger cases 2645*7c478bd9Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 2646*7c478bd9Sstevel@tonic-gate btst 7, %o3 ! 2647*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyin_8 ! check for longword alignment 2648*7c478bd9Sstevel@tonic-gate nop 2649*7c478bd9Sstevel@tonic-gate btst 1, %o3 ! 2650*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyin_2 ! check for half-word 2651*7c478bd9Sstevel@tonic-gate nop 2652*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 2653*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 2654*7c478bd9Sstevel@tonic-gate tst %o3 2655*7c478bd9Sstevel@tonic-gate bz,pn %icc, .copyin_small ! if zero, disable HW copy 2656*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 2657*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyin_small ! go to small copy 2658*7c478bd9Sstevel@tonic-gate nop 2659*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyin_more ! otherwise go to large copy 2660*7c478bd9Sstevel@tonic-gate nop 2661*7c478bd9Sstevel@tonic-gate.copyin_2: 2662*7c478bd9Sstevel@tonic-gate btst 3, %o3 ! 2663*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyin_4 ! check for word alignment 2664*7c478bd9Sstevel@tonic-gate nop 2665*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 2666*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 2667*7c478bd9Sstevel@tonic-gate tst %o3 2668*7c478bd9Sstevel@tonic-gate bz,pn %icc, .copyin_small ! if zero, disable HW copy 2669*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 2670*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyin_small ! go to small copy 2671*7c478bd9Sstevel@tonic-gate nop 2672*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyin_more ! otherwise go to large copy 2673*7c478bd9Sstevel@tonic-gate nop 2674*7c478bd9Sstevel@tonic-gate.copyin_4: 2675*7c478bd9Sstevel@tonic-gate ! already checked longword, must be word aligned 2676*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 2677*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 2678*7c478bd9Sstevel@tonic-gate tst %o3 2679*7c478bd9Sstevel@tonic-gate bz,pn %icc, .copyin_small ! if zero, disable HW copy 2680*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 2681*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyin_small ! go to small copy 2682*7c478bd9Sstevel@tonic-gate nop 2683*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyin_more ! otherwise go to large copy 2684*7c478bd9Sstevel@tonic-gate nop 2685*7c478bd9Sstevel@tonic-gate.copyin_8: 2686*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 2687*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 2688*7c478bd9Sstevel@tonic-gate tst %o3 2689*7c478bd9Sstevel@tonic-gate bz,pn %icc, .copyin_small ! if zero, disable HW copy 2690*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 2691*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyin_small ! go to small copy 2692*7c478bd9Sstevel@tonic-gate nop 2693*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyin_more ! otherwise go to large copy 2694*7c478bd9Sstevel@tonic-gate nop 2695*7c478bd9Sstevel@tonic-gate 2696*7c478bd9Sstevel@tonic-gate .align 16 2697*7c478bd9Sstevel@tonic-gate nop ! instruction alignment 2698*7c478bd9Sstevel@tonic-gate ! see discussion at start of file 2699*7c478bd9Sstevel@tonic-gate.copyin_small: 2700*7c478bd9Sstevel@tonic-gate sethi %hi(.sm_copyin_err), %o5 ! .sm_copyin_err is lofault 2701*7c478bd9Sstevel@tonic-gate or %o5, %lo(.sm_copyin_err), %o5 2702*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 ! set/save t_lofault, no tramp 2703*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 2704*7c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] 2705*7c478bd9Sstevel@tonic-gate.sm_do_copyin: 2706*7c478bd9Sstevel@tonic-gate mov %o0, SM_SAVE_SRC 2707*7c478bd9Sstevel@tonic-gate mov %o1, SM_SAVE_DST 2708*7c478bd9Sstevel@tonic-gate cmp %o2, SHORTCOPY ! check for really short case 2709*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .ci_sm_left ! 2710*7c478bd9Sstevel@tonic-gate mov %o2, SM_SAVE_COUNT 2711*7c478bd9Sstevel@tonic-gate cmp %o2, CHKSIZE ! check for medium length cases 2712*7c478bd9Sstevel@tonic-gate bgu,pn %ncc, .ci_med ! 2713*7c478bd9Sstevel@tonic-gate or %o0, %o1, %o3 ! prepare alignment check 2714*7c478bd9Sstevel@tonic-gate andcc %o3, 0x3, %g0 ! test for alignment 2715*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_word ! branch to word aligned case 2716*7c478bd9Sstevel@tonic-gate.ci_sm_movebytes: 2717*7c478bd9Sstevel@tonic-gate sub %o2, 3, %o2 ! adjust count to allow cc zero test 2718*7c478bd9Sstevel@tonic-gate.ci_sm_notalign4: 2719*7c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! read byte 2720*7c478bd9Sstevel@tonic-gate subcc %o2, 4, %o2 ! reduce count by 4 2721*7c478bd9Sstevel@tonic-gate stb %o3, [%o1] ! write byte 2722*7c478bd9Sstevel@tonic-gate add %o0, 1, %o0 ! advance SRC by 1 2723*7c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! repeat for a total of 4 bytes 2724*7c478bd9Sstevel@tonic-gate add %o0, 1, %o0 ! advance SRC by 1 2725*7c478bd9Sstevel@tonic-gate stb %o3, [%o1 + 1] 2726*7c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 2727*7c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 2728*7c478bd9Sstevel@tonic-gate add %o0, 1, %o0 ! advance SRC by 1 2729*7c478bd9Sstevel@tonic-gate stb %o3, [%o1 - 2] 2730*7c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 2731*7c478bd9Sstevel@tonic-gate add %o0, 1, %o0 ! advance SRC by 1 2732*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .ci_sm_notalign4 ! loop til 3 or fewer bytes remain 2733*7c478bd9Sstevel@tonic-gate stb %o3, [%o1 - 1] 2734*7c478bd9Sstevel@tonic-gate add %o2, 3, %o2 ! restore count 2735*7c478bd9Sstevel@tonic-gate.ci_sm_left: 2736*7c478bd9Sstevel@tonic-gate tst %o2 2737*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 2738*7c478bd9Sstevel@tonic-gate nop 2739*7c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load one byte 2740*7c478bd9Sstevel@tonic-gate deccc %o2 ! reduce count for cc test 2741*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 2742*7c478bd9Sstevel@tonic-gate stb %o3,[%o1] ! store one byte 2743*7c478bd9Sstevel@tonic-gate inc %o0 2744*7c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load second byte 2745*7c478bd9Sstevel@tonic-gate deccc %o2 2746*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 2747*7c478bd9Sstevel@tonic-gate stb %o3,[%o1 + 1] ! store second byte 2748*7c478bd9Sstevel@tonic-gate inc %o0 2749*7c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load third byte 2750*7c478bd9Sstevel@tonic-gate stb %o3,[%o1 + 2] ! store third byte 2751*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 2752*7c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2753*7c478bd9Sstevel@tonic-gate retl 2754*7c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return 0 2755*7c478bd9Sstevel@tonic-gate .align 16 2756*7c478bd9Sstevel@tonic-gate.ci_sm_words: 2757*7c478bd9Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 ! read word 2758*7c478bd9Sstevel@tonic-gate.ci_sm_wordx: 2759*7c478bd9Sstevel@tonic-gate subcc %o2, 8, %o2 ! update count 2760*7c478bd9Sstevel@tonic-gate stw %o3, [%o1] ! write word 2761*7c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! update SRC 2762*7c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! update DST 2763*7c478bd9Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 ! read word 2764*7c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! update SRC 2765*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .ci_sm_words ! loop til done 2766*7c478bd9Sstevel@tonic-gate stw %o3, [%o1 - 4] ! write word 2767*7c478bd9Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore count 2768*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 2769*7c478bd9Sstevel@tonic-gate nop 2770*7c478bd9Sstevel@tonic-gate deccc %o2 2771*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_byte 2772*7c478bd9Sstevel@tonic-gate.ci_sm_half: 2773*7c478bd9Sstevel@tonic-gate subcc %o2, 2, %o2 ! reduce count by 2 2774*7c478bd9Sstevel@tonic-gate lduha [%o0]ASI_USER, %o3 ! read half word 2775*7c478bd9Sstevel@tonic-gate add %o0, 2, %o0 ! advance SRC by 2 2776*7c478bd9Sstevel@tonic-gate add %o1, 2, %o1 ! advance DST by 2 2777*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .ci_sm_half ! loop til done 2778*7c478bd9Sstevel@tonic-gate sth %o3, [%o1 - 2] ! write half word 2779*7c478bd9Sstevel@tonic-gate addcc %o2, 1, %o2 ! restore count 2780*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 2781*7c478bd9Sstevel@tonic-gate nop 2782*7c478bd9Sstevel@tonic-gate.ci_sm_byte: 2783*7c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 2784*7c478bd9Sstevel@tonic-gate stb %o3, [%o1] 2785*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 2786*7c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2787*7c478bd9Sstevel@tonic-gate retl 2788*7c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return 0 2789*7c478bd9Sstevel@tonic-gate .align 16 2790*7c478bd9Sstevel@tonic-gate.ci_sm_word: 2791*7c478bd9Sstevel@tonic-gate subcc %o2, 4, %o2 ! update count 2792*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .ci_sm_wordx 2793*7c478bd9Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 ! read word 2794*7c478bd9Sstevel@tonic-gate addcc %o2, 3, %o2 ! restore count 2795*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 2796*7c478bd9Sstevel@tonic-gate stw %o3, [%o1] ! write word 2797*7c478bd9Sstevel@tonic-gate deccc %o2 ! reduce count for cc test 2798*7c478bd9Sstevel@tonic-gate add %o0, 4, %o0 2799*7c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load one byte 2800*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 2801*7c478bd9Sstevel@tonic-gate stb %o3, [%o1 + 4] ! store one byte 2802*7c478bd9Sstevel@tonic-gate inc %o0 2803*7c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load second byte 2804*7c478bd9Sstevel@tonic-gate deccc %o2 2805*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 2806*7c478bd9Sstevel@tonic-gate stb %o3, [%o1 + 5] ! store second byte 2807*7c478bd9Sstevel@tonic-gate inc %o0 2808*7c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load third byte 2809*7c478bd9Sstevel@tonic-gate stb %o3, [%o1 + 6] ! store third byte 2810*7c478bd9Sstevel@tonic-gate.ci_sm_exit: 2811*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 2812*7c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2813*7c478bd9Sstevel@tonic-gate retl 2814*7c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return 0 2815*7c478bd9Sstevel@tonic-gate 2816*7c478bd9Sstevel@tonic-gate .align 16 2817*7c478bd9Sstevel@tonic-gate.ci_med: 2818*7c478bd9Sstevel@tonic-gate xor %o0, %o1, %o3 ! setup alignment check 2819*7c478bd9Sstevel@tonic-gate btst 1, %o3 2820*7c478bd9Sstevel@tonic-gate bnz,pt %ncc, .ci_sm_movebytes ! unaligned 2821*7c478bd9Sstevel@tonic-gate nop 2822*7c478bd9Sstevel@tonic-gate btst 3, %o3 2823*7c478bd9Sstevel@tonic-gate bnz,pt %ncc, .ci_med_half ! halfword aligned 2824*7c478bd9Sstevel@tonic-gate nop 2825*7c478bd9Sstevel@tonic-gate btst 7, %o3 2826*7c478bd9Sstevel@tonic-gate bnz,pt %ncc, .ci_med_word ! word aligned 2827*7c478bd9Sstevel@tonic-gate nop 2828*7c478bd9Sstevel@tonic-gate.ci_med_long: 2829*7c478bd9Sstevel@tonic-gate btst 3, %o0 ! check for 2830*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_med_long1 ! word alignment 2831*7c478bd9Sstevel@tonic-gate nop 2832*7c478bd9Sstevel@tonic-gate.ci_med_long0: 2833*7c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load one byte 2834*7c478bd9Sstevel@tonic-gate inc %o0 2835*7c478bd9Sstevel@tonic-gate stb %o3,[%o1] ! store byte 2836*7c478bd9Sstevel@tonic-gate inc %o1 2837*7c478bd9Sstevel@tonic-gate btst 3, %o0 2838*7c478bd9Sstevel@tonic-gate bnz,pt %ncc, .ci_med_long0 2839*7c478bd9Sstevel@tonic-gate dec %o2 2840*7c478bd9Sstevel@tonic-gate.ci_med_long1: ! word aligned 2841*7c478bd9Sstevel@tonic-gate btst 7, %o0 ! check for long word 2842*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_med_long2 2843*7c478bd9Sstevel@tonic-gate nop 2844*7c478bd9Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 ! load word 2845*7c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 2846*7c478bd9Sstevel@tonic-gate stw %o3, [%o1] ! store word 2847*7c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 2848*7c478bd9Sstevel@tonic-gate sub %o2, 4, %o2 ! reduce count by 4 2849*7c478bd9Sstevel@tonic-gate! 2850*7c478bd9Sstevel@tonic-gate! Now long word aligned and have at least 32 bytes to move 2851*7c478bd9Sstevel@tonic-gate! 2852*7c478bd9Sstevel@tonic-gate.ci_med_long2: 2853*7c478bd9Sstevel@tonic-gate sub %o2, 31, %o2 ! adjust count to allow cc zero test 2854*7c478bd9Sstevel@tonic-gate.ci_med_lmove: 2855*7c478bd9Sstevel@tonic-gate ldxa [%o0]ASI_USER, %o3 ! read long word 2856*7c478bd9Sstevel@tonic-gate subcc %o2, 32, %o2 ! reduce count by 32 2857*7c478bd9Sstevel@tonic-gate stx %o3, [%o1] ! write long word 2858*7c478bd9Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 2859*7c478bd9Sstevel@tonic-gate ldxa [%o0]ASI_USER, %o3 ! repeat for a total for 4 long words 2860*7c478bd9Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 2861*7c478bd9Sstevel@tonic-gate stx %o3, [%o1 + 8] 2862*7c478bd9Sstevel@tonic-gate add %o1, 32, %o1 ! advance DST by 32 2863*7c478bd9Sstevel@tonic-gate ldxa [%o0]ASI_USER, %o3 2864*7c478bd9Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 2865*7c478bd9Sstevel@tonic-gate stx %o3, [%o1 - 16] 2866*7c478bd9Sstevel@tonic-gate ldxa [%o0]ASI_USER, %o3 2867*7c478bd9Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 2868*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .ci_med_lmove ! loop til 31 or fewer bytes left 2869*7c478bd9Sstevel@tonic-gate stx %o3, [%o1 - 8] 2870*7c478bd9Sstevel@tonic-gate addcc %o2, 24, %o2 ! restore count to long word offset 2871*7c478bd9Sstevel@tonic-gate ble,pt %ncc, .ci_med_lextra ! check for more long words to move 2872*7c478bd9Sstevel@tonic-gate nop 2873*7c478bd9Sstevel@tonic-gate.ci_med_lword: 2874*7c478bd9Sstevel@tonic-gate ldxa [%o0]ASI_USER, %o3 ! read long word 2875*7c478bd9Sstevel@tonic-gate subcc %o2, 8, %o2 ! reduce count by 8 2876*7c478bd9Sstevel@tonic-gate stx %o3, [%o1] ! write long word 2877*7c478bd9Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 2878*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .ci_med_lword ! loop til 7 or fewer bytes left 2879*7c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 2880*7c478bd9Sstevel@tonic-gate.ci_med_lextra: 2881*7c478bd9Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore rest of count 2882*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit ! if zero, then done 2883*7c478bd9Sstevel@tonic-gate deccc %o2 2884*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_byte 2885*7c478bd9Sstevel@tonic-gate nop 2886*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .ci_sm_half 2887*7c478bd9Sstevel@tonic-gate nop 2888*7c478bd9Sstevel@tonic-gate 2889*7c478bd9Sstevel@tonic-gate .align 16 2890*7c478bd9Sstevel@tonic-gate nop ! instruction alignment 2891*7c478bd9Sstevel@tonic-gate ! see discussion at start of file 2892*7c478bd9Sstevel@tonic-gate.ci_med_word: 2893*7c478bd9Sstevel@tonic-gate btst 3, %o0 ! check for 2894*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_med_word1 ! word alignment 2895*7c478bd9Sstevel@tonic-gate nop 2896*7c478bd9Sstevel@tonic-gate.ci_med_word0: 2897*7c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load one byte 2898*7c478bd9Sstevel@tonic-gate inc %o0 2899*7c478bd9Sstevel@tonic-gate stb %o3,[%o1] ! store byte 2900*7c478bd9Sstevel@tonic-gate inc %o1 2901*7c478bd9Sstevel@tonic-gate btst 3, %o0 2902*7c478bd9Sstevel@tonic-gate bnz,pt %ncc, .ci_med_word0 2903*7c478bd9Sstevel@tonic-gate dec %o2 2904*7c478bd9Sstevel@tonic-gate! 2905*7c478bd9Sstevel@tonic-gate! Now word aligned and have at least 36 bytes to move 2906*7c478bd9Sstevel@tonic-gate! 2907*7c478bd9Sstevel@tonic-gate.ci_med_word1: 2908*7c478bd9Sstevel@tonic-gate sub %o2, 15, %o2 ! adjust count to allow cc zero test 2909*7c478bd9Sstevel@tonic-gate.ci_med_wmove: 2910*7c478bd9Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 ! read word 2911*7c478bd9Sstevel@tonic-gate subcc %o2, 16, %o2 ! reduce count by 16 2912*7c478bd9Sstevel@tonic-gate stw %o3, [%o1] ! write word 2913*7c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 2914*7c478bd9Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 ! repeat for a total for 4 words 2915*7c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 2916*7c478bd9Sstevel@tonic-gate stw %o3, [%o1 + 4] 2917*7c478bd9Sstevel@tonic-gate add %o1, 16, %o1 ! advance DST by 16 2918*7c478bd9Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 2919*7c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 2920*7c478bd9Sstevel@tonic-gate stw %o3, [%o1 - 8] 2921*7c478bd9Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 2922*7c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 2923*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .ci_med_wmove ! loop til 15 or fewer bytes left 2924*7c478bd9Sstevel@tonic-gate stw %o3, [%o1 - 4] 2925*7c478bd9Sstevel@tonic-gate addcc %o2, 12, %o2 ! restore count to word offset 2926*7c478bd9Sstevel@tonic-gate ble,pt %ncc, .ci_med_wextra ! check for more words to move 2927*7c478bd9Sstevel@tonic-gate nop 2928*7c478bd9Sstevel@tonic-gate.ci_med_word2: 2929*7c478bd9Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 ! read word 2930*7c478bd9Sstevel@tonic-gate subcc %o2, 4, %o2 ! reduce count by 4 2931*7c478bd9Sstevel@tonic-gate stw %o3, [%o1] ! write word 2932*7c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 2933*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .ci_med_word2 ! loop til 3 or fewer bytes left 2934*7c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 2935*7c478bd9Sstevel@tonic-gate.ci_med_wextra: 2936*7c478bd9Sstevel@tonic-gate addcc %o2, 3, %o2 ! restore rest of count 2937*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit ! if zero, then done 2938*7c478bd9Sstevel@tonic-gate deccc %o2 2939*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_byte 2940*7c478bd9Sstevel@tonic-gate nop 2941*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .ci_sm_half 2942*7c478bd9Sstevel@tonic-gate nop 2943*7c478bd9Sstevel@tonic-gate 2944*7c478bd9Sstevel@tonic-gate .align 16 2945*7c478bd9Sstevel@tonic-gate nop ! instruction alignment 2946*7c478bd9Sstevel@tonic-gate ! see discussion at start of file 2947*7c478bd9Sstevel@tonic-gate.ci_med_half: 2948*7c478bd9Sstevel@tonic-gate btst 1, %o0 ! check for 2949*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_med_half1 ! half word alignment 2950*7c478bd9Sstevel@tonic-gate nop 2951*7c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load one byte 2952*7c478bd9Sstevel@tonic-gate inc %o0 2953*7c478bd9Sstevel@tonic-gate stb %o3,[%o1] ! store byte 2954*7c478bd9Sstevel@tonic-gate inc %o1 2955*7c478bd9Sstevel@tonic-gate dec %o2 2956*7c478bd9Sstevel@tonic-gate! 2957*7c478bd9Sstevel@tonic-gate! Now half word aligned and have at least 38 bytes to move 2958*7c478bd9Sstevel@tonic-gate! 2959*7c478bd9Sstevel@tonic-gate.ci_med_half1: 2960*7c478bd9Sstevel@tonic-gate sub %o2, 7, %o2 ! adjust count to allow cc zero test 2961*7c478bd9Sstevel@tonic-gate.ci_med_hmove: 2962*7c478bd9Sstevel@tonic-gate lduha [%o0]ASI_USER, %o3 ! read half word 2963*7c478bd9Sstevel@tonic-gate subcc %o2, 8, %o2 ! reduce count by 8 2964*7c478bd9Sstevel@tonic-gate sth %o3, [%o1] ! write half word 2965*7c478bd9Sstevel@tonic-gate add %o0, 2, %o0 ! advance SRC by 2 2966*7c478bd9Sstevel@tonic-gate lduha [%o0]ASI_USER, %o3 ! repeat for a total for 4 halfwords 2967*7c478bd9Sstevel@tonic-gate add %o0, 2, %o0 ! advance SRC by 2 2968*7c478bd9Sstevel@tonic-gate sth %o3, [%o1 + 2] 2969*7c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 2970*7c478bd9Sstevel@tonic-gate lduha [%o0]ASI_USER, %o3 2971*7c478bd9Sstevel@tonic-gate add %o0, 2, %o0 ! advance SRC by 2 2972*7c478bd9Sstevel@tonic-gate sth %o3, [%o1 - 4] 2973*7c478bd9Sstevel@tonic-gate lduha [%o0]ASI_USER, %o3 2974*7c478bd9Sstevel@tonic-gate add %o0, 2, %o0 ! advance SRC by 2 2975*7c478bd9Sstevel@tonic-gate bgt,pt %ncc, .ci_med_hmove ! loop til 7 or fewer bytes left 2976*7c478bd9Sstevel@tonic-gate sth %o3, [%o1 - 2] 2977*7c478bd9Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore count 2978*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 2979*7c478bd9Sstevel@tonic-gate deccc %o2 2980*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_byte 2981*7c478bd9Sstevel@tonic-gate nop 2982*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .ci_sm_half 2983*7c478bd9Sstevel@tonic-gate nop 2984*7c478bd9Sstevel@tonic-gate 2985*7c478bd9Sstevel@tonic-gate.sm_copyin_err: 2986*7c478bd9Sstevel@tonic-gate membar #Sync 2987*7c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2988*7c478bd9Sstevel@tonic-gate mov SM_SAVE_SRC, %o0 2989*7c478bd9Sstevel@tonic-gate mov SM_SAVE_DST, %o1 2990*7c478bd9Sstevel@tonic-gate mov SM_SAVE_COUNT, %o2 2991*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler 2992*7c478bd9Sstevel@tonic-gate tst %o3 2993*7c478bd9Sstevel@tonic-gate bz,pt %ncc, 3f ! if not, return error 2994*7c478bd9Sstevel@tonic-gate nop 2995*7c478bd9Sstevel@tonic-gate ldn [%o3 + CP_COPYIN], %o5 ! if handler, invoke it with 2996*7c478bd9Sstevel@tonic-gate jmp %o5 ! original arguments 2997*7c478bd9Sstevel@tonic-gate nop 2998*7c478bd9Sstevel@tonic-gate3: 2999*7c478bd9Sstevel@tonic-gate retl 3000*7c478bd9Sstevel@tonic-gate or %g0, -1, %o0 ! return errno value 3001*7c478bd9Sstevel@tonic-gate 3002*7c478bd9Sstevel@tonic-gate SET_SIZE(copyin) 3003*7c478bd9Sstevel@tonic-gate 3004*7c478bd9Sstevel@tonic-gate 3005*7c478bd9Sstevel@tonic-gate/* 3006*7c478bd9Sstevel@tonic-gate * The _more entry points are not intended to be used directly by 3007*7c478bd9Sstevel@tonic-gate * any caller from outside this file. They are provided to allow 3008*7c478bd9Sstevel@tonic-gate * profiling and dtrace of the portions of the copy code that uses 3009*7c478bd9Sstevel@tonic-gate * the floating point registers. 3010*7c478bd9Sstevel@tonic-gate * This entry is particularly important as DTRACE (at least as of 3011*7c478bd9Sstevel@tonic-gate * 4/2004) does not support leaf functions. 3012*7c478bd9Sstevel@tonic-gate */ 3013*7c478bd9Sstevel@tonic-gate 3014*7c478bd9Sstevel@tonic-gate ENTRY(copyin_more) 3015*7c478bd9Sstevel@tonic-gate.copyin_more: 3016*7c478bd9Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 3017*7c478bd9Sstevel@tonic-gate set .copyin_err, REAL_LOFAULT 3018*7c478bd9Sstevel@tonic-gate 3019*7c478bd9Sstevel@tonic-gate/* 3020*7c478bd9Sstevel@tonic-gate * Copy ins that reach here are larger than VIS_COPY_THRESHOLD bytes 3021*7c478bd9Sstevel@tonic-gate */ 3022*7c478bd9Sstevel@tonic-gate.do_copyin: 3023*7c478bd9Sstevel@tonic-gate set copyio_fault, %l7 ! .copyio_fault is lofault val 3024*7c478bd9Sstevel@tonic-gate 3025*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %l6 ! save existing handler 3026*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 3027*7c478bd9Sstevel@tonic-gate stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault 3028*7c478bd9Sstevel@tonic-gate 3029*7c478bd9Sstevel@tonic-gate mov %i0, SAVE_SRC 3030*7c478bd9Sstevel@tonic-gate mov %i1, SAVE_DST 3031*7c478bd9Sstevel@tonic-gate mov %i2, SAVE_COUNT 3032*7c478bd9Sstevel@tonic-gate 3033*7c478bd9Sstevel@tonic-gate FP_NOMIGRATE(6, 7) 3034*7c478bd9Sstevel@tonic-gate 3035*7c478bd9Sstevel@tonic-gate rd %fprs, %o2 ! check for unused fp 3036*7c478bd9Sstevel@tonic-gate st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs 3037*7c478bd9Sstevel@tonic-gate btst FPRS_FEF, %o2 3038*7c478bd9Sstevel@tonic-gate bz,a,pt %icc, .do_blockcopyin 3039*7c478bd9Sstevel@tonic-gate wr %g0, FPRS_FEF, %fprs 3040*7c478bd9Sstevel@tonic-gate 3041*7c478bd9Sstevel@tonic-gate BST_FPQ2Q4_TOSTACK(%o2) 3042*7c478bd9Sstevel@tonic-gate 3043*7c478bd9Sstevel@tonic-gate.do_blockcopyin: 3044*7c478bd9Sstevel@tonic-gate rd %gsr, %o2 3045*7c478bd9Sstevel@tonic-gate stx %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr 3046*7c478bd9Sstevel@tonic-gate or %l6, FPUSED_FLAG, %l6 3047*7c478bd9Sstevel@tonic-gate 3048*7c478bd9Sstevel@tonic-gate andcc DST, VIS_BLOCKSIZE - 1, TMP 3049*7c478bd9Sstevel@tonic-gate mov ASI_USER, %asi 3050*7c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f 3051*7c478bd9Sstevel@tonic-gate neg TMP 3052*7c478bd9Sstevel@tonic-gate add TMP, VIS_BLOCKSIZE, TMP 3053*7c478bd9Sstevel@tonic-gate 3054*7c478bd9Sstevel@tonic-gate ! TMP = bytes required to align DST on FP_BLOCK boundary 3055*7c478bd9Sstevel@tonic-gate ! Using SRC as a tmp here 3056*7c478bd9Sstevel@tonic-gate cmp TMP, 3 3057*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, 1f 3058*7c478bd9Sstevel@tonic-gate sub CNT,TMP,CNT ! adjust main count 3059*7c478bd9Sstevel@tonic-gate sub TMP, 3, TMP ! adjust for end of loop test 3060*7c478bd9Sstevel@tonic-gate.ci_blkalign: 3061*7c478bd9Sstevel@tonic-gate lduba [REALSRC]%asi, SRC ! move 4 bytes per loop iteration 3062*7c478bd9Sstevel@tonic-gate stb SRC, [DST] 3063*7c478bd9Sstevel@tonic-gate subcc TMP, 4, TMP 3064*7c478bd9Sstevel@tonic-gate lduba [REALSRC + 1]%asi, SRC 3065*7c478bd9Sstevel@tonic-gate add REALSRC, 4, REALSRC 3066*7c478bd9Sstevel@tonic-gate stb SRC, [DST + 1] 3067*7c478bd9Sstevel@tonic-gate lduba [REALSRC - 2]%asi, SRC 3068*7c478bd9Sstevel@tonic-gate add DST, 4, DST 3069*7c478bd9Sstevel@tonic-gate stb SRC, [DST - 2] 3070*7c478bd9Sstevel@tonic-gate lduba [REALSRC - 1]%asi, SRC 3071*7c478bd9Sstevel@tonic-gate bgu,pt %ncc, .ci_blkalign 3072*7c478bd9Sstevel@tonic-gate stb SRC, [DST - 1] 3073*7c478bd9Sstevel@tonic-gate 3074*7c478bd9Sstevel@tonic-gate addcc TMP, 3, TMP ! restore count adjustment 3075*7c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f ! no bytes left? 3076*7c478bd9Sstevel@tonic-gate nop 3077*7c478bd9Sstevel@tonic-gate1: lduba [REALSRC]%asi, SRC 3078*7c478bd9Sstevel@tonic-gate inc REALSRC 3079*7c478bd9Sstevel@tonic-gate inc DST 3080*7c478bd9Sstevel@tonic-gate deccc TMP 3081*7c478bd9Sstevel@tonic-gate bgu %ncc, 1b 3082*7c478bd9Sstevel@tonic-gate stb SRC, [DST - 1] 3083*7c478bd9Sstevel@tonic-gate 3084*7c478bd9Sstevel@tonic-gate2: 3085*7c478bd9Sstevel@tonic-gate andn REALSRC, 0x7, SRC 3086*7c478bd9Sstevel@tonic-gate alignaddr REALSRC, %g0, %g0 3087*7c478bd9Sstevel@tonic-gate 3088*7c478bd9Sstevel@tonic-gate ! SRC - 8-byte aligned 3089*7c478bd9Sstevel@tonic-gate ! DST - 64-byte aligned 3090*7c478bd9Sstevel@tonic-gate prefetcha [SRC]%asi, #one_read 3091*7c478bd9Sstevel@tonic-gate prefetcha [SRC + (1 * VIS_BLOCKSIZE)]%asi, #one_read 3092*7c478bd9Sstevel@tonic-gate prefetcha [SRC + (2 * VIS_BLOCKSIZE)]%asi, #one_read 3093*7c478bd9Sstevel@tonic-gate prefetcha [SRC + (3 * VIS_BLOCKSIZE)]%asi, #one_read 3094*7c478bd9Sstevel@tonic-gate ldda [SRC]%asi, %f16 3095*7c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 4 3096*7c478bd9Sstevel@tonic-gate prefetcha [SRC + (4 * VIS_BLOCKSIZE)]%asi, #one_read 3097*7c478bd9Sstevel@tonic-gate#endif 3098*7c478bd9Sstevel@tonic-gate ldda [SRC + 0x08]%asi, %f18 3099*7c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 5 3100*7c478bd9Sstevel@tonic-gate prefetcha [SRC + (5 * VIS_BLOCKSIZE)]%asi, #one_read 3101*7c478bd9Sstevel@tonic-gate#endif 3102*7c478bd9Sstevel@tonic-gate ldda [SRC + 0x10]%asi, %f20 3103*7c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 6 3104*7c478bd9Sstevel@tonic-gate prefetcha [SRC + (6 * VIS_BLOCKSIZE)]%asi, #one_read 3105*7c478bd9Sstevel@tonic-gate#endif 3106*7c478bd9Sstevel@tonic-gate faligndata %f16, %f18, %f48 3107*7c478bd9Sstevel@tonic-gate ldda [SRC + 0x18]%asi, %f22 3108*7c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 7 3109*7c478bd9Sstevel@tonic-gate prefetcha [SRC + (7 * VIS_BLOCKSIZE)]%asi, #one_read 3110*7c478bd9Sstevel@tonic-gate#endif 3111*7c478bd9Sstevel@tonic-gate faligndata %f18, %f20, %f50 3112*7c478bd9Sstevel@tonic-gate ldda [SRC + 0x20]%asi, %f24 3113*7c478bd9Sstevel@tonic-gate faligndata %f20, %f22, %f52 3114*7c478bd9Sstevel@tonic-gate ldda [SRC + 0x28]%asi, %f26 3115*7c478bd9Sstevel@tonic-gate faligndata %f22, %f24, %f54 3116*7c478bd9Sstevel@tonic-gate ldda [SRC + 0x30]%asi, %f28 3117*7c478bd9Sstevel@tonic-gate faligndata %f24, %f26, %f56 3118*7c478bd9Sstevel@tonic-gate ldda [SRC + 0x38]%asi, %f30 3119*7c478bd9Sstevel@tonic-gate faligndata %f26, %f28, %f58 3120*7c478bd9Sstevel@tonic-gate ldda [SRC + VIS_BLOCKSIZE]%asi, %f16 3121*7c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 3122*7c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 3123*7c478bd9Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 3124*7c478bd9Sstevel@tonic-gate ba,a,pt %ncc, 1f 3125*7c478bd9Sstevel@tonic-gate nop 3126*7c478bd9Sstevel@tonic-gate .align 16 3127*7c478bd9Sstevel@tonic-gate1: 3128*7c478bd9Sstevel@tonic-gate ldda [SRC + 0x08]%asi, %f18 3129*7c478bd9Sstevel@tonic-gate faligndata %f28, %f30, %f60 3130*7c478bd9Sstevel@tonic-gate ldda [SRC + 0x10]%asi, %f20 3131*7c478bd9Sstevel@tonic-gate faligndata %f30, %f16, %f62 3132*7c478bd9Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_P 3133*7c478bd9Sstevel@tonic-gate ldda [SRC + 0x18]%asi, %f22 3134*7c478bd9Sstevel@tonic-gate faligndata %f16, %f18, %f48 3135*7c478bd9Sstevel@tonic-gate ldda [SRC + 0x20]%asi, %f24 3136*7c478bd9Sstevel@tonic-gate faligndata %f18, %f20, %f50 3137*7c478bd9Sstevel@tonic-gate ldda [SRC + 0x28]%asi, %f26 3138*7c478bd9Sstevel@tonic-gate faligndata %f20, %f22, %f52 3139*7c478bd9Sstevel@tonic-gate ldda [SRC + 0x30]%asi, %f28 3140*7c478bd9Sstevel@tonic-gate faligndata %f22, %f24, %f54 3141*7c478bd9Sstevel@tonic-gate ldda [SRC + 0x38]%asi, %f30 3142*7c478bd9Sstevel@tonic-gate faligndata %f24, %f26, %f56 3143*7c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 3144*7c478bd9Sstevel@tonic-gate ldda [SRC + VIS_BLOCKSIZE]%asi, %f16 3145*7c478bd9Sstevel@tonic-gate faligndata %f26, %f28, %f58 3146*7c478bd9Sstevel@tonic-gate prefetcha [SRC + ((CHEETAH_PREFETCH) * VIS_BLOCKSIZE) + 8]%asi, #one_read 3147*7c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 3148*7c478bd9Sstevel@tonic-gate prefetcha [SRC + ((CHEETAH_2ND_PREFETCH) * VIS_BLOCKSIZE)]%asi, #one_read 3149*7c478bd9Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 3150*7c478bd9Sstevel@tonic-gate cmp CNT, VIS_BLOCKSIZE + 8 3151*7c478bd9Sstevel@tonic-gate bgu,pt %ncc, 1b 3152*7c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 3153*7c478bd9Sstevel@tonic-gate 3154*7c478bd9Sstevel@tonic-gate ! only if REALSRC & 0x7 is 0 3155*7c478bd9Sstevel@tonic-gate cmp CNT, VIS_BLOCKSIZE 3156*7c478bd9Sstevel@tonic-gate bne %ncc, 3f 3157*7c478bd9Sstevel@tonic-gate andcc REALSRC, 0x7, %g0 3158*7c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f 3159*7c478bd9Sstevel@tonic-gate nop 3160*7c478bd9Sstevel@tonic-gate3: 3161*7c478bd9Sstevel@tonic-gate faligndata %f28, %f30, %f60 3162*7c478bd9Sstevel@tonic-gate faligndata %f30, %f16, %f62 3163*7c478bd9Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_P 3164*7c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 3165*7c478bd9Sstevel@tonic-gate ba,pt %ncc, 3f 3166*7c478bd9Sstevel@tonic-gate nop 3167*7c478bd9Sstevel@tonic-gate2: 3168*7c478bd9Sstevel@tonic-gate ldda [SRC + 0x08]%asi, %f18 3169*7c478bd9Sstevel@tonic-gate fsrc1 %f28, %f60 3170*7c478bd9Sstevel@tonic-gate ldda [SRC + 0x10]%asi, %f20 3171*7c478bd9Sstevel@tonic-gate fsrc1 %f30, %f62 3172*7c478bd9Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_P 3173*7c478bd9Sstevel@tonic-gate ldda [SRC + 0x18]%asi, %f22 3174*7c478bd9Sstevel@tonic-gate fsrc1 %f16, %f48 3175*7c478bd9Sstevel@tonic-gate ldda [SRC + 0x20]%asi, %f24 3176*7c478bd9Sstevel@tonic-gate fsrc1 %f18, %f50 3177*7c478bd9Sstevel@tonic-gate ldda [SRC + 0x28]%asi, %f26 3178*7c478bd9Sstevel@tonic-gate fsrc1 %f20, %f52 3179*7c478bd9Sstevel@tonic-gate ldda [SRC + 0x30]%asi, %f28 3180*7c478bd9Sstevel@tonic-gate fsrc1 %f22, %f54 3181*7c478bd9Sstevel@tonic-gate ldda [SRC + 0x38]%asi, %f30 3182*7c478bd9Sstevel@tonic-gate fsrc1 %f24, %f56 3183*7c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 3184*7c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 3185*7c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 3186*7c478bd9Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 3187*7c478bd9Sstevel@tonic-gate fsrc1 %f26, %f58 3188*7c478bd9Sstevel@tonic-gate fsrc1 %f28, %f60 3189*7c478bd9Sstevel@tonic-gate fsrc1 %f30, %f62 3190*7c478bd9Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_P 3191*7c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 3192*7c478bd9Sstevel@tonic-gate ba,a,pt %ncc, 4f 3193*7c478bd9Sstevel@tonic-gate nop 3194*7c478bd9Sstevel@tonic-gate 3195*7c478bd9Sstevel@tonic-gate3: tst CNT 3196*7c478bd9Sstevel@tonic-gate bz,a %ncc, 4f 3197*7c478bd9Sstevel@tonic-gate nop 3198*7c478bd9Sstevel@tonic-gate 3199*7c478bd9Sstevel@tonic-gate5: lduba [REALSRC]ASI_USER, TMP 3200*7c478bd9Sstevel@tonic-gate inc REALSRC 3201*7c478bd9Sstevel@tonic-gate inc DST 3202*7c478bd9Sstevel@tonic-gate deccc CNT 3203*7c478bd9Sstevel@tonic-gate bgu %ncc, 5b 3204*7c478bd9Sstevel@tonic-gate stb TMP, [DST - 1] 3205*7c478bd9Sstevel@tonic-gate4: 3206*7c478bd9Sstevel@tonic-gate 3207*7c478bd9Sstevel@tonic-gate.copyin_exit: 3208*7c478bd9Sstevel@tonic-gate membar #Sync 3209*7c478bd9Sstevel@tonic-gate 3210*7c478bd9Sstevel@tonic-gate FPRAS_INTERVAL(FPRAS_COPYIN, 1, %l5, %o2, %o3, %o4, %o5, 8) 3211*7c478bd9Sstevel@tonic-gate FPRAS_REWRITE_TYPE1(1, %l5, %f48, %o2, 9) 3212*7c478bd9Sstevel@tonic-gate FPRAS_CHECK(FPRAS_COPYIN, %l5, 9) ! lose outputs 3213*7c478bd9Sstevel@tonic-gate 3214*7c478bd9Sstevel@tonic-gate ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr 3215*7c478bd9Sstevel@tonic-gate wr %o2, 0, %gsr 3216*7c478bd9Sstevel@tonic-gate 3217*7c478bd9Sstevel@tonic-gate ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 3218*7c478bd9Sstevel@tonic-gate btst FPRS_FEF, %o3 3219*7c478bd9Sstevel@tonic-gate bz,pt %icc, 4f 3220*7c478bd9Sstevel@tonic-gate nop 3221*7c478bd9Sstevel@tonic-gate 3222*7c478bd9Sstevel@tonic-gate BLD_FPQ2Q4_FROMSTACK(%o2) 3223*7c478bd9Sstevel@tonic-gate 3224*7c478bd9Sstevel@tonic-gate ba,pt %ncc, 1f 3225*7c478bd9Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 3226*7c478bd9Sstevel@tonic-gate 3227*7c478bd9Sstevel@tonic-gate4: 3228*7c478bd9Sstevel@tonic-gate FZEROQ2Q4 3229*7c478bd9Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 3230*7c478bd9Sstevel@tonic-gate 3231*7c478bd9Sstevel@tonic-gate1: 3232*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 3233*7c478bd9Sstevel@tonic-gate andn %l6, FPUSED_FLAG, %l6 3234*7c478bd9Sstevel@tonic-gate stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 3235*7c478bd9Sstevel@tonic-gate FP_ALLOWMIGRATE(5, 6) 3236*7c478bd9Sstevel@tonic-gate ret 3237*7c478bd9Sstevel@tonic-gate restore %g0, 0, %o0 3238*7c478bd9Sstevel@tonic-gate/* 3239*7c478bd9Sstevel@tonic-gate * We got here because of a fault during copyin 3240*7c478bd9Sstevel@tonic-gate * Errno value is in ERRNO, but DDI/DKI says return -1 (sigh). 3241*7c478bd9Sstevel@tonic-gate */ 3242*7c478bd9Sstevel@tonic-gate.copyin_err: 3243*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler 3244*7c478bd9Sstevel@tonic-gate tst %o4 3245*7c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f ! if not, return error 3246*7c478bd9Sstevel@tonic-gate nop 3247*7c478bd9Sstevel@tonic-gate ldn [%o4 + CP_COPYIN], %g2 ! if handler, invoke it with 3248*7c478bd9Sstevel@tonic-gate jmp %g2 ! original arguments 3249*7c478bd9Sstevel@tonic-gate restore %g0, 0, %g0 ! dispose of copy window 3250*7c478bd9Sstevel@tonic-gate2: 3251*7c478bd9Sstevel@tonic-gate ret 3252*7c478bd9Sstevel@tonic-gate restore %g0, -1, %o0 ! return error value 3253*7c478bd9Sstevel@tonic-gate 3254*7c478bd9Sstevel@tonic-gate 3255*7c478bd9Sstevel@tonic-gate SET_SIZE(copyin_more) 3256*7c478bd9Sstevel@tonic-gate 3257*7c478bd9Sstevel@tonic-gate#endif /* lint */ 3258*7c478bd9Sstevel@tonic-gate 3259*7c478bd9Sstevel@tonic-gate#ifdef lint 3260*7c478bd9Sstevel@tonic-gate 3261*7c478bd9Sstevel@tonic-gate/*ARGSUSED*/ 3262*7c478bd9Sstevel@tonic-gateint 3263*7c478bd9Sstevel@tonic-gatexcopyin(const void *uaddr, void *kaddr, size_t count) 3264*7c478bd9Sstevel@tonic-gate{ return (0); } 3265*7c478bd9Sstevel@tonic-gate 3266*7c478bd9Sstevel@tonic-gate#else /* lint */ 3267*7c478bd9Sstevel@tonic-gate 3268*7c478bd9Sstevel@tonic-gate ENTRY(xcopyin) 3269*7c478bd9Sstevel@tonic-gate 3270*7c478bd9Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 3271*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .xcopyin_small ! go to larger cases 3272*7c478bd9Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 3273*7c478bd9Sstevel@tonic-gate btst 7, %o3 ! 3274*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .xcopyin_8 ! check for longword alignment 3275*7c478bd9Sstevel@tonic-gate nop 3276*7c478bd9Sstevel@tonic-gate btst 1, %o3 ! 3277*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .xcopyin_2 ! check for half-word 3278*7c478bd9Sstevel@tonic-gate nop 3279*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 3280*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 3281*7c478bd9Sstevel@tonic-gate tst %o3 3282*7c478bd9Sstevel@tonic-gate bz,pn %icc, .xcopyin_small ! if zero, disable HW copy 3283*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3284*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .xcopyin_small ! go to small copy 3285*7c478bd9Sstevel@tonic-gate nop 3286*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .xcopyin_more ! otherwise go to large copy 3287*7c478bd9Sstevel@tonic-gate nop 3288*7c478bd9Sstevel@tonic-gate.xcopyin_2: 3289*7c478bd9Sstevel@tonic-gate btst 3, %o3 ! 3290*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .xcopyin_4 ! check for word alignment 3291*7c478bd9Sstevel@tonic-gate nop 3292*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 3293*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 3294*7c478bd9Sstevel@tonic-gate tst %o3 3295*7c478bd9Sstevel@tonic-gate bz,pn %icc, .xcopyin_small ! if zero, disable HW copy 3296*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3297*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .xcopyin_small ! go to small copy 3298*7c478bd9Sstevel@tonic-gate nop 3299*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .xcopyin_more ! otherwise go to large copy 3300*7c478bd9Sstevel@tonic-gate nop 3301*7c478bd9Sstevel@tonic-gate.xcopyin_4: 3302*7c478bd9Sstevel@tonic-gate ! already checked longword, must be word aligned 3303*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 3304*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 3305*7c478bd9Sstevel@tonic-gate tst %o3 3306*7c478bd9Sstevel@tonic-gate bz,pn %icc, .xcopyin_small ! if zero, disable HW copy 3307*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3308*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .xcopyin_small ! go to small copy 3309*7c478bd9Sstevel@tonic-gate nop 3310*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .xcopyin_more ! otherwise go to large copy 3311*7c478bd9Sstevel@tonic-gate nop 3312*7c478bd9Sstevel@tonic-gate.xcopyin_8: 3313*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 3314*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 3315*7c478bd9Sstevel@tonic-gate tst %o3 3316*7c478bd9Sstevel@tonic-gate bz,pn %icc, .xcopyin_small ! if zero, disable HW copy 3317*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3318*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .xcopyin_small ! go to small copy 3319*7c478bd9Sstevel@tonic-gate nop 3320*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .xcopyin_more ! otherwise go to large copy 3321*7c478bd9Sstevel@tonic-gate nop 3322*7c478bd9Sstevel@tonic-gate 3323*7c478bd9Sstevel@tonic-gate.xcopyin_small: 3324*7c478bd9Sstevel@tonic-gate sethi %hi(.sm_xcopyin_err), %o5 ! .sm_xcopyin_err is lofault value 3325*7c478bd9Sstevel@tonic-gate or %o5, %lo(.sm_xcopyin_err), %o5 3326*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 ! set/save t_lofaul 3327*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 3328*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .sm_do_copyin ! common code 3329*7c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] 3330*7c478bd9Sstevel@tonic-gate 3331*7c478bd9Sstevel@tonic-gate.xcopyin_more: 3332*7c478bd9Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 3333*7c478bd9Sstevel@tonic-gate sethi %hi(.xcopyin_err), REAL_LOFAULT ! .xcopyin_err is lofault value 3334*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .do_copyin 3335*7c478bd9Sstevel@tonic-gate or REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT 3336*7c478bd9Sstevel@tonic-gate 3337*7c478bd9Sstevel@tonic-gate/* 3338*7c478bd9Sstevel@tonic-gate * We got here because of fault during xcopyin 3339*7c478bd9Sstevel@tonic-gate * Errno value is in ERRNO 3340*7c478bd9Sstevel@tonic-gate */ 3341*7c478bd9Sstevel@tonic-gate.xcopyin_err: 3342*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler 3343*7c478bd9Sstevel@tonic-gate tst %o4 3344*7c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f ! if not, return error 3345*7c478bd9Sstevel@tonic-gate nop 3346*7c478bd9Sstevel@tonic-gate ldn [%o4 + CP_XCOPYIN], %g2 ! if handler, invoke it with 3347*7c478bd9Sstevel@tonic-gate jmp %g2 ! original arguments 3348*7c478bd9Sstevel@tonic-gate restore %g0, 0, %g0 ! dispose of copy window 3349*7c478bd9Sstevel@tonic-gate2: 3350*7c478bd9Sstevel@tonic-gate ret 3351*7c478bd9Sstevel@tonic-gate restore ERRNO, 0, %o0 ! return errno value 3352*7c478bd9Sstevel@tonic-gate 3353*7c478bd9Sstevel@tonic-gate.sm_xcopyin_err: 3354*7c478bd9Sstevel@tonic-gate 3355*7c478bd9Sstevel@tonic-gate membar #Sync 3356*7c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 3357*7c478bd9Sstevel@tonic-gate mov SM_SAVE_SRC, %o0 3358*7c478bd9Sstevel@tonic-gate mov SM_SAVE_DST, %o1 3359*7c478bd9Sstevel@tonic-gate mov SM_SAVE_COUNT, %o2 3360*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler 3361*7c478bd9Sstevel@tonic-gate tst %o3 3362*7c478bd9Sstevel@tonic-gate bz,pt %ncc, 3f ! if not, return error 3363*7c478bd9Sstevel@tonic-gate nop 3364*7c478bd9Sstevel@tonic-gate ldn [%o3 + CP_XCOPYIN], %o5 ! if handler, invoke it with 3365*7c478bd9Sstevel@tonic-gate jmp %o5 ! original arguments 3366*7c478bd9Sstevel@tonic-gate nop 3367*7c478bd9Sstevel@tonic-gate3: 3368*7c478bd9Sstevel@tonic-gate retl 3369*7c478bd9Sstevel@tonic-gate or %g1, 0, %o0 ! return errno value 3370*7c478bd9Sstevel@tonic-gate 3371*7c478bd9Sstevel@tonic-gate SET_SIZE(xcopyin) 3372*7c478bd9Sstevel@tonic-gate 3373*7c478bd9Sstevel@tonic-gate#endif /* lint */ 3374*7c478bd9Sstevel@tonic-gate 3375*7c478bd9Sstevel@tonic-gate#ifdef lint 3376*7c478bd9Sstevel@tonic-gate 3377*7c478bd9Sstevel@tonic-gate/*ARGSUSED*/ 3378*7c478bd9Sstevel@tonic-gateint 3379*7c478bd9Sstevel@tonic-gatexcopyin_little(const void *uaddr, void *kaddr, size_t count) 3380*7c478bd9Sstevel@tonic-gate{ return (0); } 3381*7c478bd9Sstevel@tonic-gate 3382*7c478bd9Sstevel@tonic-gate#else /* lint */ 3383*7c478bd9Sstevel@tonic-gate 3384*7c478bd9Sstevel@tonic-gate ENTRY(xcopyin_little) 3385*7c478bd9Sstevel@tonic-gate sethi %hi(.xcopyio_err), %o5 3386*7c478bd9Sstevel@tonic-gate or %o5, %lo(.xcopyio_err), %o5 3387*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 3388*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 3389*7c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] 3390*7c478bd9Sstevel@tonic-gate mov %o4, %o5 3391*7c478bd9Sstevel@tonic-gate 3392*7c478bd9Sstevel@tonic-gate subcc %g0, %o2, %o3 3393*7c478bd9Sstevel@tonic-gate add %o0, %o2, %o0 3394*7c478bd9Sstevel@tonic-gate bz,pn %ncc, 2f ! check for zero bytes 3395*7c478bd9Sstevel@tonic-gate sub %o2, 1, %o4 3396*7c478bd9Sstevel@tonic-gate add %o0, %o4, %o0 ! start w/last byte 3397*7c478bd9Sstevel@tonic-gate add %o1, %o2, %o1 3398*7c478bd9Sstevel@tonic-gate lduba [%o0 + %o3]ASI_AIUSL, %o4 3399*7c478bd9Sstevel@tonic-gate 3400*7c478bd9Sstevel@tonic-gate1: stb %o4, [%o1 + %o3] 3401*7c478bd9Sstevel@tonic-gate inccc %o3 3402*7c478bd9Sstevel@tonic-gate sub %o0, 2, %o0 ! get next byte 3403*7c478bd9Sstevel@tonic-gate bcc,a,pt %ncc, 1b 3404*7c478bd9Sstevel@tonic-gate lduba [%o0 + %o3]ASI_AIUSL, %o4 3405*7c478bd9Sstevel@tonic-gate 3406*7c478bd9Sstevel@tonic-gate2: 3407*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 3408*7c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 3409*7c478bd9Sstevel@tonic-gate retl 3410*7c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return (0) 3411*7c478bd9Sstevel@tonic-gate 3412*7c478bd9Sstevel@tonic-gate.xcopyio_err: 3413*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 3414*7c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 3415*7c478bd9Sstevel@tonic-gate retl 3416*7c478bd9Sstevel@tonic-gate mov %g1, %o0 3417*7c478bd9Sstevel@tonic-gate 3418*7c478bd9Sstevel@tonic-gate SET_SIZE(xcopyin_little) 3419*7c478bd9Sstevel@tonic-gate 3420*7c478bd9Sstevel@tonic-gate#endif /* lint */ 3421*7c478bd9Sstevel@tonic-gate 3422*7c478bd9Sstevel@tonic-gate 3423*7c478bd9Sstevel@tonic-gate/* 3424*7c478bd9Sstevel@tonic-gate * Copy a block of storage - must not overlap (from + len <= to). 3425*7c478bd9Sstevel@tonic-gate * No fault handler installed (to be called under on_fault()) 3426*7c478bd9Sstevel@tonic-gate */ 3427*7c478bd9Sstevel@tonic-gate#if defined(lint) 3428*7c478bd9Sstevel@tonic-gate 3429*7c478bd9Sstevel@tonic-gate/* ARGSUSED */ 3430*7c478bd9Sstevel@tonic-gatevoid 3431*7c478bd9Sstevel@tonic-gatecopyin_noerr(const void *ufrom, void *kto, size_t count) 3432*7c478bd9Sstevel@tonic-gate{} 3433*7c478bd9Sstevel@tonic-gate 3434*7c478bd9Sstevel@tonic-gate#else /* lint */ 3435*7c478bd9Sstevel@tonic-gate ENTRY(copyin_noerr) 3436*7c478bd9Sstevel@tonic-gate 3437*7c478bd9Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 3438*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyin_ne_small ! go to larger cases 3439*7c478bd9Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 3440*7c478bd9Sstevel@tonic-gate btst 7, %o3 ! 3441*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyin_ne_8 ! check for longword alignment 3442*7c478bd9Sstevel@tonic-gate nop 3443*7c478bd9Sstevel@tonic-gate btst 1, %o3 ! 3444*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyin_ne_2 ! check for half-word 3445*7c478bd9Sstevel@tonic-gate nop 3446*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 3447*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 3448*7c478bd9Sstevel@tonic-gate tst %o3 3449*7c478bd9Sstevel@tonic-gate bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy 3450*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3451*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyin_ne_small ! go to small copy 3452*7c478bd9Sstevel@tonic-gate nop 3453*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy 3454*7c478bd9Sstevel@tonic-gate nop 3455*7c478bd9Sstevel@tonic-gate.copyin_ne_2: 3456*7c478bd9Sstevel@tonic-gate btst 3, %o3 ! 3457*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyin_ne_4 ! check for word alignment 3458*7c478bd9Sstevel@tonic-gate nop 3459*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 3460*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 3461*7c478bd9Sstevel@tonic-gate tst %o3 3462*7c478bd9Sstevel@tonic-gate bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy 3463*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3464*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyin_ne_small ! go to small copy 3465*7c478bd9Sstevel@tonic-gate nop 3466*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy 3467*7c478bd9Sstevel@tonic-gate nop 3468*7c478bd9Sstevel@tonic-gate.copyin_ne_4: 3469*7c478bd9Sstevel@tonic-gate ! already checked longword, must be word aligned 3470*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 3471*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 3472*7c478bd9Sstevel@tonic-gate tst %o3 3473*7c478bd9Sstevel@tonic-gate bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy 3474*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3475*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyin_ne_small ! go to small copy 3476*7c478bd9Sstevel@tonic-gate nop 3477*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy 3478*7c478bd9Sstevel@tonic-gate nop 3479*7c478bd9Sstevel@tonic-gate.copyin_ne_8: 3480*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 3481*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 3482*7c478bd9Sstevel@tonic-gate tst %o3 3483*7c478bd9Sstevel@tonic-gate bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy 3484*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3485*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyin_ne_small ! go to small copy 3486*7c478bd9Sstevel@tonic-gate nop 3487*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy 3488*7c478bd9Sstevel@tonic-gate nop 3489*7c478bd9Sstevel@tonic-gate 3490*7c478bd9Sstevel@tonic-gate.copyin_ne_small: 3491*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 3492*7c478bd9Sstevel@tonic-gate tst %o4 3493*7c478bd9Sstevel@tonic-gate bz,pn %ncc, .sm_do_copyin 3494*7c478bd9Sstevel@tonic-gate nop 3495*7c478bd9Sstevel@tonic-gate sethi %hi(.sm_copyio_noerr), %o5 3496*7c478bd9Sstevel@tonic-gate or %o5, %lo(.sm_copyio_noerr), %o5 3497*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 3498*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .sm_do_copyin 3499*7c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! set/save t_lofault 3500*7c478bd9Sstevel@tonic-gate 3501*7c478bd9Sstevel@tonic-gate.copyin_noerr_more: 3502*7c478bd9Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 3503*7c478bd9Sstevel@tonic-gate sethi %hi(.copyio_noerr), REAL_LOFAULT 3504*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .do_copyin 3505*7c478bd9Sstevel@tonic-gate or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 3506*7c478bd9Sstevel@tonic-gate 3507*7c478bd9Sstevel@tonic-gate.copyio_noerr: 3508*7c478bd9Sstevel@tonic-gate jmp %l6 3509*7c478bd9Sstevel@tonic-gate restore %g0,0,%g0 3510*7c478bd9Sstevel@tonic-gate 3511*7c478bd9Sstevel@tonic-gate.sm_copyio_noerr: 3512*7c478bd9Sstevel@tonic-gate membar #Sync 3513*7c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore t_lofault 3514*7c478bd9Sstevel@tonic-gate jmp %o4 3515*7c478bd9Sstevel@tonic-gate nop 3516*7c478bd9Sstevel@tonic-gate 3517*7c478bd9Sstevel@tonic-gate SET_SIZE(copyin_noerr) 3518*7c478bd9Sstevel@tonic-gate#endif /* lint */ 3519*7c478bd9Sstevel@tonic-gate 3520*7c478bd9Sstevel@tonic-gate/* 3521*7c478bd9Sstevel@tonic-gate * Copy a block of storage - must not overlap (from + len <= to). 3522*7c478bd9Sstevel@tonic-gate * No fault handler installed (to be called under on_fault()) 3523*7c478bd9Sstevel@tonic-gate */ 3524*7c478bd9Sstevel@tonic-gate 3525*7c478bd9Sstevel@tonic-gate#if defined(lint) 3526*7c478bd9Sstevel@tonic-gate 3527*7c478bd9Sstevel@tonic-gate/* ARGSUSED */ 3528*7c478bd9Sstevel@tonic-gatevoid 3529*7c478bd9Sstevel@tonic-gatecopyout_noerr(const void *kfrom, void *uto, size_t count) 3530*7c478bd9Sstevel@tonic-gate{} 3531*7c478bd9Sstevel@tonic-gate 3532*7c478bd9Sstevel@tonic-gate#else /* lint */ 3533*7c478bd9Sstevel@tonic-gate ENTRY(copyout_noerr) 3534*7c478bd9Sstevel@tonic-gate 3535*7c478bd9Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 3536*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyout_ne_small ! go to larger cases 3537*7c478bd9Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 3538*7c478bd9Sstevel@tonic-gate btst 7, %o3 ! 3539*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyout_ne_8 ! check for longword alignment 3540*7c478bd9Sstevel@tonic-gate nop 3541*7c478bd9Sstevel@tonic-gate btst 1, %o3 ! 3542*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyout_ne_2 ! check for half-word 3543*7c478bd9Sstevel@tonic-gate nop 3544*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 3545*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 3546*7c478bd9Sstevel@tonic-gate tst %o3 3547*7c478bd9Sstevel@tonic-gate bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy 3548*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3549*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyout_ne_small ! go to small copy 3550*7c478bd9Sstevel@tonic-gate nop 3551*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy 3552*7c478bd9Sstevel@tonic-gate nop 3553*7c478bd9Sstevel@tonic-gate.copyout_ne_2: 3554*7c478bd9Sstevel@tonic-gate btst 3, %o3 ! 3555*7c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyout_ne_4 ! check for word alignment 3556*7c478bd9Sstevel@tonic-gate nop 3557*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 3558*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 3559*7c478bd9Sstevel@tonic-gate tst %o3 3560*7c478bd9Sstevel@tonic-gate bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy 3561*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3562*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyout_ne_small ! go to small copy 3563*7c478bd9Sstevel@tonic-gate nop 3564*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy 3565*7c478bd9Sstevel@tonic-gate nop 3566*7c478bd9Sstevel@tonic-gate.copyout_ne_4: 3567*7c478bd9Sstevel@tonic-gate ! already checked longword, must be word aligned 3568*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 3569*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 3570*7c478bd9Sstevel@tonic-gate tst %o3 3571*7c478bd9Sstevel@tonic-gate bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy 3572*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3573*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyout_ne_small ! go to small copy 3574*7c478bd9Sstevel@tonic-gate nop 3575*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy 3576*7c478bd9Sstevel@tonic-gate nop 3577*7c478bd9Sstevel@tonic-gate.copyout_ne_8: 3578*7c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 3579*7c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 3580*7c478bd9Sstevel@tonic-gate tst %o3 3581*7c478bd9Sstevel@tonic-gate bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy 3582*7c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3583*7c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyout_ne_small ! go to small copy 3584*7c478bd9Sstevel@tonic-gate nop 3585*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy 3586*7c478bd9Sstevel@tonic-gate nop 3587*7c478bd9Sstevel@tonic-gate 3588*7c478bd9Sstevel@tonic-gate.copyout_ne_small: 3589*7c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 3590*7c478bd9Sstevel@tonic-gate tst %o4 3591*7c478bd9Sstevel@tonic-gate bz,pn %ncc, .sm_do_copyout 3592*7c478bd9Sstevel@tonic-gate nop 3593*7c478bd9Sstevel@tonic-gate sethi %hi(.sm_copyio_noerr), %o5 3594*7c478bd9Sstevel@tonic-gate or %o5, %lo(.sm_copyio_noerr), %o5 3595*7c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 3596*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .sm_do_copyout 3597*7c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! set/save t_lofault 3598*7c478bd9Sstevel@tonic-gate 3599*7c478bd9Sstevel@tonic-gate.copyout_noerr_more: 3600*7c478bd9Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 3601*7c478bd9Sstevel@tonic-gate sethi %hi(.copyio_noerr), REAL_LOFAULT 3602*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .do_copyout 3603*7c478bd9Sstevel@tonic-gate or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 3604*7c478bd9Sstevel@tonic-gate 3605*7c478bd9Sstevel@tonic-gate SET_SIZE(copyout_noerr) 3606*7c478bd9Sstevel@tonic-gate#endif /* lint */ 3607*7c478bd9Sstevel@tonic-gate 3608*7c478bd9Sstevel@tonic-gate 3609*7c478bd9Sstevel@tonic-gate/* 3610*7c478bd9Sstevel@tonic-gate * hwblkclr - clears block-aligned, block-multiple-sized regions that are 3611*7c478bd9Sstevel@tonic-gate * longer than 256 bytes in length using spitfire's block stores. If 3612*7c478bd9Sstevel@tonic-gate * the criteria for using this routine are not met then it calls bzero 3613*7c478bd9Sstevel@tonic-gate * and returns 1. Otherwise 0 is returned indicating success. 3614*7c478bd9Sstevel@tonic-gate * Caller is responsible for ensuring use_hw_bzero is true and that 3615*7c478bd9Sstevel@tonic-gate * kpreempt_disable() has been called. 3616*7c478bd9Sstevel@tonic-gate */ 3617*7c478bd9Sstevel@tonic-gate#ifdef lint 3618*7c478bd9Sstevel@tonic-gate/*ARGSUSED*/ 3619*7c478bd9Sstevel@tonic-gateint 3620*7c478bd9Sstevel@tonic-gatehwblkclr(void *addr, size_t len) 3621*7c478bd9Sstevel@tonic-gate{ 3622*7c478bd9Sstevel@tonic-gate return(0); 3623*7c478bd9Sstevel@tonic-gate} 3624*7c478bd9Sstevel@tonic-gate#else /* lint */ 3625*7c478bd9Sstevel@tonic-gate ! %i0 - start address 3626*7c478bd9Sstevel@tonic-gate ! %i1 - length of region (multiple of 64) 3627*7c478bd9Sstevel@tonic-gate ! %l0 - saved fprs 3628*7c478bd9Sstevel@tonic-gate ! %l1 - pointer to saved %d0 block 3629*7c478bd9Sstevel@tonic-gate ! %l2 - saved curthread->t_lwp 3630*7c478bd9Sstevel@tonic-gate 3631*7c478bd9Sstevel@tonic-gate ENTRY(hwblkclr) 3632*7c478bd9Sstevel@tonic-gate ! get another window w/space for one aligned block of saved fpregs 3633*7c478bd9Sstevel@tonic-gate save %sp, -SA(MINFRAME + 2*VIS_BLOCKSIZE), %sp 3634*7c478bd9Sstevel@tonic-gate 3635*7c478bd9Sstevel@tonic-gate ! Must be block-aligned 3636*7c478bd9Sstevel@tonic-gate andcc %i0, (VIS_BLOCKSIZE-1), %g0 3637*7c478bd9Sstevel@tonic-gate bnz,pn %ncc, 1f 3638*7c478bd9Sstevel@tonic-gate nop 3639*7c478bd9Sstevel@tonic-gate 3640*7c478bd9Sstevel@tonic-gate ! ... and must be 256 bytes or more 3641*7c478bd9Sstevel@tonic-gate cmp %i1, 256 3642*7c478bd9Sstevel@tonic-gate blu,pn %ncc, 1f 3643*7c478bd9Sstevel@tonic-gate nop 3644*7c478bd9Sstevel@tonic-gate 3645*7c478bd9Sstevel@tonic-gate ! ... and length must be a multiple of VIS_BLOCKSIZE 3646*7c478bd9Sstevel@tonic-gate andcc %i1, (VIS_BLOCKSIZE-1), %g0 3647*7c478bd9Sstevel@tonic-gate bz,pn %ncc, 2f 3648*7c478bd9Sstevel@tonic-gate nop 3649*7c478bd9Sstevel@tonic-gate 3650*7c478bd9Sstevel@tonic-gate1: ! punt, call bzero but notify the caller that bzero was used 3651*7c478bd9Sstevel@tonic-gate mov %i0, %o0 3652*7c478bd9Sstevel@tonic-gate call bzero 3653*7c478bd9Sstevel@tonic-gate mov %i1, %o1 3654*7c478bd9Sstevel@tonic-gate ret 3655*7c478bd9Sstevel@tonic-gate restore %g0, 1, %o0 ! return (1) - did not use block operations 3656*7c478bd9Sstevel@tonic-gate 3657*7c478bd9Sstevel@tonic-gate2: rd %fprs, %l0 ! check for unused fp 3658*7c478bd9Sstevel@tonic-gate btst FPRS_FEF, %l0 3659*7c478bd9Sstevel@tonic-gate bz,pt %icc, 1f 3660*7c478bd9Sstevel@tonic-gate nop 3661*7c478bd9Sstevel@tonic-gate 3662*7c478bd9Sstevel@tonic-gate ! save in-use fpregs on stack 3663*7c478bd9Sstevel@tonic-gate membar #Sync 3664*7c478bd9Sstevel@tonic-gate add %fp, STACK_BIAS - 65, %l1 3665*7c478bd9Sstevel@tonic-gate and %l1, -VIS_BLOCKSIZE, %l1 3666*7c478bd9Sstevel@tonic-gate stda %d0, [%l1]ASI_BLK_P 3667*7c478bd9Sstevel@tonic-gate 3668*7c478bd9Sstevel@tonic-gate1: membar #StoreStore|#StoreLoad|#LoadStore 3669*7c478bd9Sstevel@tonic-gate wr %g0, FPRS_FEF, %fprs 3670*7c478bd9Sstevel@tonic-gate wr %g0, ASI_BLK_P, %asi 3671*7c478bd9Sstevel@tonic-gate 3672*7c478bd9Sstevel@tonic-gate ! Clear block 3673*7c478bd9Sstevel@tonic-gate fzero %d0 3674*7c478bd9Sstevel@tonic-gate fzero %d2 3675*7c478bd9Sstevel@tonic-gate fzero %d4 3676*7c478bd9Sstevel@tonic-gate fzero %d6 3677*7c478bd9Sstevel@tonic-gate fzero %d8 3678*7c478bd9Sstevel@tonic-gate fzero %d10 3679*7c478bd9Sstevel@tonic-gate fzero %d12 3680*7c478bd9Sstevel@tonic-gate fzero %d14 3681*7c478bd9Sstevel@tonic-gate 3682*7c478bd9Sstevel@tonic-gate mov 256, %i3 3683*7c478bd9Sstevel@tonic-gate ba,pt %ncc, .pz_doblock 3684*7c478bd9Sstevel@tonic-gate nop 3685*7c478bd9Sstevel@tonic-gate 3686*7c478bd9Sstevel@tonic-gate.pz_blkstart: 3687*7c478bd9Sstevel@tonic-gate ! stda %d0, [%i0 + 192]%asi ! in dly slot of branch that got us here 3688*7c478bd9Sstevel@tonic-gate stda %d0, [%i0 + 128]%asi 3689*7c478bd9Sstevel@tonic-gate stda %d0, [%i0 + 64]%asi 3690*7c478bd9Sstevel@tonic-gate stda %d0, [%i0]%asi 3691*7c478bd9Sstevel@tonic-gate.pz_zinst: 3692*7c478bd9Sstevel@tonic-gate add %i0, %i3, %i0 3693*7c478bd9Sstevel@tonic-gate sub %i1, %i3, %i1 3694*7c478bd9Sstevel@tonic-gate.pz_doblock: 3695*7c478bd9Sstevel@tonic-gate cmp %i1, 256 3696*7c478bd9Sstevel@tonic-gate bgeu,a %ncc, .pz_blkstart 3697*7c478bd9Sstevel@tonic-gate stda %d0, [%i0 + 192]%asi 3698*7c478bd9Sstevel@tonic-gate 3699*7c478bd9Sstevel@tonic-gate cmp %i1, 64 3700*7c478bd9Sstevel@tonic-gate blu %ncc, .pz_finish 3701*7c478bd9Sstevel@tonic-gate 3702*7c478bd9Sstevel@tonic-gate andn %i1, (64-1), %i3 3703*7c478bd9Sstevel@tonic-gate srl %i3, 4, %i2 ! using blocks, 1 instr / 16 words 3704*7c478bd9Sstevel@tonic-gate set .pz_zinst, %i4 3705*7c478bd9Sstevel@tonic-gate sub %i4, %i2, %i4 3706*7c478bd9Sstevel@tonic-gate jmp %i4 3707*7c478bd9Sstevel@tonic-gate nop 3708*7c478bd9Sstevel@tonic-gate 3709*7c478bd9Sstevel@tonic-gate.pz_finish: 3710*7c478bd9Sstevel@tonic-gate membar #Sync 3711*7c478bd9Sstevel@tonic-gate btst FPRS_FEF, %l0 3712*7c478bd9Sstevel@tonic-gate bz,a .pz_finished 3713*7c478bd9Sstevel@tonic-gate wr %l0, 0, %fprs ! restore fprs 3714*7c478bd9Sstevel@tonic-gate 3715*7c478bd9Sstevel@tonic-gate ! restore fpregs from stack 3716*7c478bd9Sstevel@tonic-gate ldda [%l1]ASI_BLK_P, %d0 3717*7c478bd9Sstevel@tonic-gate membar #Sync 3718*7c478bd9Sstevel@tonic-gate wr %l0, 0, %fprs ! restore fprs 3719*7c478bd9Sstevel@tonic-gate 3720*7c478bd9Sstevel@tonic-gate.pz_finished: 3721*7c478bd9Sstevel@tonic-gate ret 3722*7c478bd9Sstevel@tonic-gate restore %g0, 0, %o0 ! return (bzero or not) 3723*7c478bd9Sstevel@tonic-gate 3724*7c478bd9Sstevel@tonic-gate SET_SIZE(hwblkclr) 3725*7c478bd9Sstevel@tonic-gate#endif /* lint */ 3726*7c478bd9Sstevel@tonic-gate 3727*7c478bd9Sstevel@tonic-gate#ifdef lint 3728*7c478bd9Sstevel@tonic-gate/*ARGSUSED*/ 3729*7c478bd9Sstevel@tonic-gatevoid 3730*7c478bd9Sstevel@tonic-gatehw_pa_bcopy32(uint64_t src, uint64_t dst) 3731*7c478bd9Sstevel@tonic-gate{} 3732*7c478bd9Sstevel@tonic-gate#else /*!lint */ 3733*7c478bd9Sstevel@tonic-gate /* 3734*7c478bd9Sstevel@tonic-gate * Copy 32 bytes of data from src (%o0) to dst (%o1) 3735*7c478bd9Sstevel@tonic-gate * using physical addresses. 3736*7c478bd9Sstevel@tonic-gate */ 3737*7c478bd9Sstevel@tonic-gate ENTRY_NP(hw_pa_bcopy32) 3738*7c478bd9Sstevel@tonic-gate rdpr %pstate, %g1 3739*7c478bd9Sstevel@tonic-gate andn %g1, PSTATE_IE, %g2 3740*7c478bd9Sstevel@tonic-gate wrpr %g0, %g2, %pstate 3741*7c478bd9Sstevel@tonic-gate 3742*7c478bd9Sstevel@tonic-gate rdpr %pstate, %g0 3743*7c478bd9Sstevel@tonic-gate ldxa [%o0]ASI_MEM, %o2 3744*7c478bd9Sstevel@tonic-gate add %o0, 8, %o0 3745*7c478bd9Sstevel@tonic-gate ldxa [%o0]ASI_MEM, %o3 3746*7c478bd9Sstevel@tonic-gate add %o0, 8, %o0 3747*7c478bd9Sstevel@tonic-gate ldxa [%o0]ASI_MEM, %o4 3748*7c478bd9Sstevel@tonic-gate add %o0, 8, %o0 3749*7c478bd9Sstevel@tonic-gate ldxa [%o0]ASI_MEM, %o5 3750*7c478bd9Sstevel@tonic-gate 3751*7c478bd9Sstevel@tonic-gate stxa %g0, [%o1]ASI_DC_INVAL 3752*7c478bd9Sstevel@tonic-gate membar #Sync 3753*7c478bd9Sstevel@tonic-gate 3754*7c478bd9Sstevel@tonic-gate stxa %o2, [%o1]ASI_MEM 3755*7c478bd9Sstevel@tonic-gate add %o1, 8, %o1 3756*7c478bd9Sstevel@tonic-gate stxa %o3, [%o1]ASI_MEM 3757*7c478bd9Sstevel@tonic-gate add %o1, 8, %o1 3758*7c478bd9Sstevel@tonic-gate stxa %o4, [%o1]ASI_MEM 3759*7c478bd9Sstevel@tonic-gate add %o1, 8, %o1 3760*7c478bd9Sstevel@tonic-gate stxa %o5, [%o1]ASI_MEM 3761*7c478bd9Sstevel@tonic-gate 3762*7c478bd9Sstevel@tonic-gate retl 3763*7c478bd9Sstevel@tonic-gate wrpr %g0, %g1, %pstate 3764*7c478bd9Sstevel@tonic-gate 3765*7c478bd9Sstevel@tonic-gate SET_SIZE(hw_pa_bcopy32) 3766*7c478bd9Sstevel@tonic-gate 3767*7c478bd9Sstevel@tonic-gate#endif /* lint */ 3768*7c478bd9Sstevel@tonic-gate 3769*7c478bd9Sstevel@tonic-gate#if defined(lint) 3770*7c478bd9Sstevel@tonic-gate 3771*7c478bd9Sstevel@tonic-gateint use_hw_bcopy = 1; 3772*7c478bd9Sstevel@tonic-gateint use_hw_bzero = 1; 3773*7c478bd9Sstevel@tonic-gateuint_t hw_copy_limit_1 = 0; 3774*7c478bd9Sstevel@tonic-gateuint_t hw_copy_limit_2 = 0; 3775*7c478bd9Sstevel@tonic-gateuint_t hw_copy_limit_4 = 0; 3776*7c478bd9Sstevel@tonic-gateuint_t hw_copy_limit_8 = 0; 3777*7c478bd9Sstevel@tonic-gate 3778*7c478bd9Sstevel@tonic-gate#else /* !lint */ 3779*7c478bd9Sstevel@tonic-gate 3780*7c478bd9Sstevel@tonic-gate DGDEF(use_hw_bcopy) 3781*7c478bd9Sstevel@tonic-gate .word 1 3782*7c478bd9Sstevel@tonic-gate DGDEF(use_hw_bzero) 3783*7c478bd9Sstevel@tonic-gate .word 1 3784*7c478bd9Sstevel@tonic-gate DGDEF(hw_copy_limit_1) 3785*7c478bd9Sstevel@tonic-gate .word 0 3786*7c478bd9Sstevel@tonic-gate DGDEF(hw_copy_limit_2) 3787*7c478bd9Sstevel@tonic-gate .word 0 3788*7c478bd9Sstevel@tonic-gate DGDEF(hw_copy_limit_4) 3789*7c478bd9Sstevel@tonic-gate .word 0 3790*7c478bd9Sstevel@tonic-gate DGDEF(hw_copy_limit_8) 3791*7c478bd9Sstevel@tonic-gate .word 0 3792*7c478bd9Sstevel@tonic-gate 3793*7c478bd9Sstevel@tonic-gate .align 64 3794*7c478bd9Sstevel@tonic-gate .section ".text" 3795*7c478bd9Sstevel@tonic-gate#endif /* !lint */ 3796