17c478bd9Sstevel@tonic-gate/* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5340af271Swh94709 * Common Development and Distribution License (the "License"). 6340af271Swh94709 * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate/* 22*280575beSPatrick McGehearty * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 237c478bd9Sstevel@tonic-gate */ 247c478bd9Sstevel@tonic-gate 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate#include <sys/param.h> 277c478bd9Sstevel@tonic-gate#include <sys/errno.h> 287c478bd9Sstevel@tonic-gate#include <sys/asm_linkage.h> 297c478bd9Sstevel@tonic-gate#include <sys/vtrace.h> 307c478bd9Sstevel@tonic-gate#include <sys/machthread.h> 317c478bd9Sstevel@tonic-gate#include <sys/clock.h> 327c478bd9Sstevel@tonic-gate#include <sys/asi.h> 337c478bd9Sstevel@tonic-gate#include <sys/fsr.h> 347c478bd9Sstevel@tonic-gate#include <sys/privregs.h> 357c478bd9Sstevel@tonic-gate#include <sys/machasi.h> 367c478bd9Sstevel@tonic-gate#include <sys/niagaraasi.h> 377c478bd9Sstevel@tonic-gate 387c478bd9Sstevel@tonic-gate#if !defined(lint) 397c478bd9Sstevel@tonic-gate#include "assym.h" 407c478bd9Sstevel@tonic-gate#endif /* lint */ 417c478bd9Sstevel@tonic-gate 427c478bd9Sstevel@tonic-gate 437c478bd9Sstevel@tonic-gate/* 447c478bd9Sstevel@tonic-gate * Pseudo-code to aid in understanding the control flow of the 457c478bd9Sstevel@tonic-gate * bcopy/kcopy routine. 467c478bd9Sstevel@tonic-gate * 47473b13d4Sae112802 * ! WARNING : <Register usage convention> 48473b13d4Sae112802 * ! In kcopy() the %o5, holds previous error handler and a flag 49473b13d4Sae112802 * ! LOFAULT_SET (low bits). The %o5 is null in bcopy(). 50473b13d4Sae112802 * ! The %o5 is not available for any other use. 51473b13d4Sae112802 * 52*280575beSPatrick McGehearty * On entry: 53*280575beSPatrick McGehearty * ! Determine whether to use the FP register version or the 54*280575beSPatrick McGehearty * ! the leaf routine version depending on the size of the copy. 55*280575beSPatrick McGehearty * ! Set up error handling accordingly. 56*280575beSPatrick McGehearty * ! The transition point depends on FP_COPY 57*280575beSPatrick McGehearty * ! For both versions %o5 is reserved 58*280575beSPatrick McGehearty * 59473b13d4Sae112802 * kcopy(): 60*280575beSPatrick McGehearty * if(length > FP_COPY) 61*280575beSPatrick McGehearty * go to regular_kcopy 62*280575beSPatrick McGehearty * 63*280575beSPatrick McGehearty * ! Setup_leaf_rtn_error_handler 64*280575beSPatrick McGehearty * %o5 = curthread->t_lofault; ! save existing handler in %o5 65*280575beSPatrick McGehearty * %o5 |= LOFAULT_SET; ! ORed with LOFAULT_SET flag 66*280575beSPatrick McGehearty * curthread->t_lofault = .sm_copyerr; 67*280575beSPatrick McGehearty * goto small_bcopy(); 68*280575beSPatrick McGehearty * 69*280575beSPatrick McGehearty * regular_kcopy: 70*280575beSPatrick McGehearty * save_registers() 71473b13d4Sae112802 * %o5 = curthread->t_lofault; ! save existing handler in %o5 72473b13d4Sae112802 * %o5 |= LOFAULT_SET; ! ORed with LOFAULT_SET flag 737c478bd9Sstevel@tonic-gate * curthread->t_lofault = .copyerr; 74*280575beSPatrick McGehearty * goto do_copy(); 757c478bd9Sstevel@tonic-gate * 76473b13d4Sae112802 * bcopy(): 77*280575beSPatrick McGehearty * if(length > FP_COPY) 78*280575beSPatrick McGehearty * go to regular_bcopy 797c478bd9Sstevel@tonic-gate * 80*280575beSPatrick McGehearty * ! Setup_leaf_rtn_error_handler 81*280575beSPatrick McGehearty * %o5 = curthread->t_lofault; ! save existing handler in %o5 82*280575beSPatrick McGehearty * curthread->t_lofault = .sm_copyerr; 83*280575beSPatrick McGehearty * goto small_bcopy(); 847c478bd9Sstevel@tonic-gate * 85*280575beSPatrick McGehearty * regular_bcopy: 86*280575beSPatrick McGehearty * %o5 = curthread->t_lofault; ! save existing handler in %o5 87*280575beSPatrick McGehearty * curthread->t_lofault = .copyerr; 88*280575beSPatrick McGehearty * goto do_copy(); 89*280575beSPatrick McGehearty * 90*280575beSPatrick McGehearty * small_bcopy: 91*280575beSPatrick McGehearty * ! handle copies smaller than FP_COPY 92*280575beSPatrick McGehearty * restore t_lofault handler 93*280575beSPatrick McGehearty * exit 94*280575beSPatrick McGehearty * 95*280575beSPatrick McGehearty * do_copy: 96*280575beSPatrick McGehearty * ! handle copies larger than FP_COPY 97*280575beSPatrick McGehearty * save fp_regs 98473b13d4Sae112802 * blockcopy; 99*280575beSPatrick McGehearty * restore fp_regs 100473b13d4Sae112802 * restore t_lofault handler if came from kcopy(); 101473b13d4Sae112802 * 1027c478bd9Sstevel@tonic-gate * 103*280575beSPatrick McGehearty * In leaf lofault handler: 104473b13d4Sae112802 * curthread->t_lofault = (%o5 & ~LOFAULT_SET); ! restore old t_lofault 1057c478bd9Sstevel@tonic-gate * return (errno) 1067c478bd9Sstevel@tonic-gate * 107*280575beSPatrick McGehearty * In lofault handler: 108*280575beSPatrick McGehearty * curthread->t_lofault = (%o5 & ~LOFAULT_SET); ! restore old t_lofault 109*280575beSPatrick McGehearty * restore fp_regs 110*280575beSPatrick McGehearty * return (errno) 111*280575beSPatrick McGehearty * 112*280575beSPatrick McGehearty * 113*280575beSPatrick McGehearty * 114*280575beSPatrick McGehearty * For all of bcopy/copyin/copyout the copy logic is specialized according 115*280575beSPatrick McGehearty * to how the src and dst is aligned and how much data needs to be moved. 116*280575beSPatrick McGehearty * The following comments apply to the N2/RF code (#if !defined(NIAGARA_IMPL)) 117*280575beSPatrick McGehearty * 118*280575beSPatrick McGehearty * N2/RF Flow : 119*280575beSPatrick McGehearty * 120*280575beSPatrick McGehearty * if (count < FP_COPY) { (584 bytes) 121*280575beSPatrick McGehearty * set small fault handler (no register window save/restore) 122*280575beSPatrick McGehearty * if count < SHORTCOPY (7 bytes) 123*280575beSPatrick McGehearty * copy bytes; go to short_exit 124*280575beSPatrick McGehearty * else 125*280575beSPatrick McGehearty * determine dst alignment, move minimum bytes/halfwords to 126*280575beSPatrick McGehearty * get dst aligned on long word boundary 127*280575beSPatrick McGehearty * if( src is on long word boundary ) { 128*280575beSPatrick McGehearty * medlong: src/dst aligned on 8 bytes 129*280575beSPatrick McGehearty * copy with ldx/stx in 4-way unrolled loop; 130*280575beSPatrick McGehearty * copy final 0-31 bytes; go to short_exit 131*280575beSPatrick McGehearty * } else { src/dst not aligned on 8 bytes 132*280575beSPatrick McGehearty * if src is word aligned, ld/st words in 32-byte chunks 133*280575beSPatrick McGehearty * if src is half word aligned, ld half, ld word, ld half; pack 134*280575beSPatrick McGehearty * into long word, store long words in 32-byte chunks 135*280575beSPatrick McGehearty * if src is byte aligned, ld byte,half,word parts; pack into long 136*280575beSPatrick McGehearty * word, store long words in 32-byte chunks 137*280575beSPatrick McGehearty * move final 0-31 bytes according to src alignment; go to short_exit 138*280575beSPatrick McGehearty * short_exit: 139*280575beSPatrick McGehearty * restore trap handler if needed, retl 140*280575beSPatrick McGehearty * else { More than FP_COPY bytes 141*280575beSPatrick McGehearty * set fault handler 142*280575beSPatrick McGehearty * disable kernel preemption 143*280575beSPatrick McGehearty * save registers, save FP registers if in use 144*280575beSPatrick McGehearty * move bytes to align destination register on long word boundary 145*280575beSPatrick McGehearty * if(src is on long word boundary) { src/dst aligned on 8 bytes 146*280575beSPatrick McGehearty * align dst on 64 byte boundary; use 8-way test for each of 8 possible 147*280575beSPatrick McGehearty * src alignments relative to a 64 byte boundary to select the 148*280575beSPatrick McGehearty * 16-way unrolled loop (128 bytes) to use for 149*280575beSPatrick McGehearty * block load, fmovd, block-init-store, block-store, fmovd operations 150*280575beSPatrick McGehearty * then go to remain_stuff. 151*280575beSPatrick McGehearty * remain_stuff: move remaining bytes. go to long_exit 152*280575beSPatrick McGehearty * } else { 153*280575beSPatrick McGehearty * setup alignaddr for faligndata instructions 154*280575beSPatrick McGehearty * align dst on 64 byte boundary; use 8-way test for each of 8 possible 155*280575beSPatrick McGehearty * src alignments to nearest long word relative to 64 byte boundary to 156*280575beSPatrick McGehearty * select the 8-way unrolled loop (64 bytes) to use for 157*280575beSPatrick McGehearty * block load, falign, fmovd, block-store loop 158*280575beSPatrick McGehearty * (only use block-init-store when src/dst on 8 byte boundaries.) 159*280575beSPatrick McGehearty * goto unalign_done. 160*280575beSPatrick McGehearty * unalign_done: 161*280575beSPatrick McGehearty * move remaining bytes for unaligned cases. go to long_exit 162*280575beSPatrick McGehearty * long_exit: 163*280575beSPatrick McGehearty * restore %gsr, FP regs (either from stack or set to zero), 164*280575beSPatrick McGehearty * restore trap handler, check for kernel preemption request, 165*280575beSPatrick McGehearty * handle if needed, ret. 166*280575beSPatrick McGehearty * } 167*280575beSPatrick McGehearty * 168*280575beSPatrick McGehearty * Other platforms include hw_bcopy_limit_[1248] to control the exact 169*280575beSPatrick McGehearty * point where the FP register code is used. On those platforms, the 170*280575beSPatrick McGehearty * FP register code did not leave data in L2 cache, potentially affecting 171*280575beSPatrick McGehearty * performance more than the gain/loss from the algorithm difference. 172*280575beSPatrick McGehearty * For N2/RF, block store places data in the L2 cache, so use or non-use 173*280575beSPatrick McGehearty * of the FP registers has no effect on L2 cache behavior. 174*280575beSPatrick McGehearty * The cost for testing hw_bcopy_limit_* according to different 175*280575beSPatrick McGehearty * alignments exceeds 50 cycles for all cases, even when hw_bcopy_limits 176*280575beSPatrick McGehearty * were not used. That cost was judged too high relative to the benefits, 177*280575beSPatrick McGehearty * so the hw_bcopy_limit option is omitted from this code. 1787c478bd9Sstevel@tonic-gate */ 1797c478bd9Sstevel@tonic-gate 1807c478bd9Sstevel@tonic-gate/* 1817c478bd9Sstevel@tonic-gate * Less then or equal this number of bytes we will always copy byte-for-byte 1827c478bd9Sstevel@tonic-gate */ 1837c478bd9Sstevel@tonic-gate#define SMALL_LIMIT 7 1847c478bd9Sstevel@tonic-gate 1857c478bd9Sstevel@tonic-gate/* 186473b13d4Sae112802 * LOFAULT_SET : Flag set by kzero and kcopy to indicate that t_lofault 187473b13d4Sae112802 * handler was set 1887c478bd9Sstevel@tonic-gate */ 1897c478bd9Sstevel@tonic-gate#define LOFAULT_SET 2 1907c478bd9Sstevel@tonic-gate 1917c478bd9Sstevel@tonic-gate/* 1927c478bd9Sstevel@tonic-gate * This define is to align data for the unaligned source cases. 1937c478bd9Sstevel@tonic-gate * The data1, data2 and data3 is merged into data1 and data2. 1947c478bd9Sstevel@tonic-gate * The data3 is preserved for next merge. 1957c478bd9Sstevel@tonic-gate */ 1967c478bd9Sstevel@tonic-gate#define ALIGN_DATA(data1, data2, data3, lshift, rshift, tmp) \ 1977c478bd9Sstevel@tonic-gate sllx data1, lshift, data1 ;\ 1987c478bd9Sstevel@tonic-gate srlx data2, rshift, tmp ;\ 1997c478bd9Sstevel@tonic-gate or data1, tmp, data1 ;\ 2007c478bd9Sstevel@tonic-gate sllx data2, lshift, data2 ;\ 2017c478bd9Sstevel@tonic-gate srlx data3, rshift, tmp ;\ 2027c478bd9Sstevel@tonic-gate or data2, tmp, data2 2037c478bd9Sstevel@tonic-gate/* 2047c478bd9Sstevel@tonic-gate * This macro is to align the data. Basically it merges 2057c478bd9Sstevel@tonic-gate * data1 and data2 to form double word. 2067c478bd9Sstevel@tonic-gate */ 2077c478bd9Sstevel@tonic-gate#define ALIGN_DATA_EW(data1, data2, lshift, rshift, tmp) \ 2087c478bd9Sstevel@tonic-gate sllx data1, lshift, data1 ;\ 2097c478bd9Sstevel@tonic-gate srlx data2, rshift, tmp ;\ 2107c478bd9Sstevel@tonic-gate or data1, tmp, data1 2117c478bd9Sstevel@tonic-gate 212340af271Swh94709#if !defined(NIAGARA_IMPL) 213340af271Swh94709/* 214340af271Swh94709 * Flags set in the lower bits of the t_lofault address: 215340af271Swh94709 * FPUSED_FLAG: The FP registers were in use and must be restored 216*280575beSPatrick McGehearty * LOFAULT_SET: Set for bcopy calls, cleared for kcopy calls 217340af271Swh94709 * COPY_FLAGS: Both of the above 218340af271Swh94709 * 219340af271Swh94709 * Other flags: 220340af271Swh94709 * KPREEMPT_FLAG: kpreempt needs to be called 221340af271Swh94709 */ 222340af271Swh94709#define FPUSED_FLAG 1 223*280575beSPatrick McGehearty#define LOFAULT_SET 2 224*280575beSPatrick McGehearty#define COPY_FLAGS (FPUSED_FLAG | LOFAULT_SET) 225340af271Swh94709#define KPREEMPT_FLAG 4 226340af271Swh94709 227340af271Swh94709#define ALIGN_OFF_1_7 \ 228340af271Swh94709 faligndata %d0, %d2, %d48 ;\ 229340af271Swh94709 faligndata %d2, %d4, %d50 ;\ 230340af271Swh94709 faligndata %d4, %d6, %d52 ;\ 231340af271Swh94709 faligndata %d6, %d8, %d54 ;\ 232340af271Swh94709 faligndata %d8, %d10, %d56 ;\ 233340af271Swh94709 faligndata %d10, %d12, %d58 ;\ 234340af271Swh94709 faligndata %d12, %d14, %d60 ;\ 235340af271Swh94709 faligndata %d14, %d16, %d62 236340af271Swh94709 237340af271Swh94709#define ALIGN_OFF_8_15 \ 238340af271Swh94709 faligndata %d2, %d4, %d48 ;\ 239340af271Swh94709 faligndata %d4, %d6, %d50 ;\ 240340af271Swh94709 faligndata %d6, %d8, %d52 ;\ 241340af271Swh94709 faligndata %d8, %d10, %d54 ;\ 242340af271Swh94709 faligndata %d10, %d12, %d56 ;\ 243340af271Swh94709 faligndata %d12, %d14, %d58 ;\ 244340af271Swh94709 faligndata %d14, %d16, %d60 ;\ 245340af271Swh94709 faligndata %d16, %d18, %d62 246340af271Swh94709 247340af271Swh94709#define ALIGN_OFF_16_23 \ 248340af271Swh94709 faligndata %d4, %d6, %d48 ;\ 249340af271Swh94709 faligndata %d6, %d8, %d50 ;\ 250340af271Swh94709 faligndata %d8, %d10, %d52 ;\ 251340af271Swh94709 faligndata %d10, %d12, %d54 ;\ 252340af271Swh94709 faligndata %d12, %d14, %d56 ;\ 253340af271Swh94709 faligndata %d14, %d16, %d58 ;\ 254340af271Swh94709 faligndata %d16, %d18, %d60 ;\ 255340af271Swh94709 faligndata %d18, %d20, %d62 256340af271Swh94709 257340af271Swh94709#define ALIGN_OFF_24_31 \ 258340af271Swh94709 faligndata %d6, %d8, %d48 ;\ 259340af271Swh94709 faligndata %d8, %d10, %d50 ;\ 260340af271Swh94709 faligndata %d10, %d12, %d52 ;\ 261340af271Swh94709 faligndata %d12, %d14, %d54 ;\ 262340af271Swh94709 faligndata %d14, %d16, %d56 ;\ 263340af271Swh94709 faligndata %d16, %d18, %d58 ;\ 264340af271Swh94709 faligndata %d18, %d20, %d60 ;\ 265340af271Swh94709 faligndata %d20, %d22, %d62 266340af271Swh94709 267340af271Swh94709#define ALIGN_OFF_32_39 \ 268340af271Swh94709 faligndata %d8, %d10, %d48 ;\ 269340af271Swh94709 faligndata %d10, %d12, %d50 ;\ 270340af271Swh94709 faligndata %d12, %d14, %d52 ;\ 271340af271Swh94709 faligndata %d14, %d16, %d54 ;\ 272340af271Swh94709 faligndata %d16, %d18, %d56 ;\ 273340af271Swh94709 faligndata %d18, %d20, %d58 ;\ 274340af271Swh94709 faligndata %d20, %d22, %d60 ;\ 275340af271Swh94709 faligndata %d22, %d24, %d62 276340af271Swh94709 277340af271Swh94709#define ALIGN_OFF_40_47 \ 278340af271Swh94709 faligndata %d10, %d12, %d48 ;\ 279340af271Swh94709 faligndata %d12, %d14, %d50 ;\ 280340af271Swh94709 faligndata %d14, %d16, %d52 ;\ 281340af271Swh94709 faligndata %d16, %d18, %d54 ;\ 282340af271Swh94709 faligndata %d18, %d20, %d56 ;\ 283340af271Swh94709 faligndata %d20, %d22, %d58 ;\ 284340af271Swh94709 faligndata %d22, %d24, %d60 ;\ 285340af271Swh94709 faligndata %d24, %d26, %d62 286340af271Swh94709 287340af271Swh94709#define ALIGN_OFF_48_55 \ 288340af271Swh94709 faligndata %d12, %d14, %d48 ;\ 289340af271Swh94709 faligndata %d14, %d16, %d50 ;\ 290340af271Swh94709 faligndata %d16, %d18, %d52 ;\ 291340af271Swh94709 faligndata %d18, %d20, %d54 ;\ 292340af271Swh94709 faligndata %d20, %d22, %d56 ;\ 293340af271Swh94709 faligndata %d22, %d24, %d58 ;\ 294340af271Swh94709 faligndata %d24, %d26, %d60 ;\ 295340af271Swh94709 faligndata %d26, %d28, %d62 296340af271Swh94709 297340af271Swh94709#define ALIGN_OFF_56_63 \ 298340af271Swh94709 faligndata %d14, %d16, %d48 ;\ 299340af271Swh94709 faligndata %d16, %d18, %d50 ;\ 300340af271Swh94709 faligndata %d18, %d20, %d52 ;\ 301340af271Swh94709 faligndata %d20, %d22, %d54 ;\ 302340af271Swh94709 faligndata %d22, %d24, %d56 ;\ 303340af271Swh94709 faligndata %d24, %d26, %d58 ;\ 304340af271Swh94709 faligndata %d26, %d28, %d60 ;\ 305340af271Swh94709 faligndata %d28, %d30, %d62 306340af271Swh94709 307*280575beSPatrick McGehearty/* 308*280575beSPatrick McGehearty * FP_COPY indicates the minimum number of bytes needed 309*280575beSPatrick McGehearty * to justify using FP/VIS-accelerated memory operations. 310*280575beSPatrick McGehearty * The FPBLK code assumes a minimum number of bytes are available 311*280575beSPatrick McGehearty * to be moved on entry. Check that code carefully before 312*280575beSPatrick McGehearty * reducing FP_COPY below 256. 313*280575beSPatrick McGehearty */ 314*280575beSPatrick McGehearty#define FP_COPY 584 315*280575beSPatrick McGehearty#define SHORTCOPY 7 316*280575beSPatrick McGehearty#define ASI_STBI_P ASI_BLK_INIT_ST_QUAD_LDD_P 317*280575beSPatrick McGehearty#define ASI_STBI_AIUS ASI_BLK_INIT_QUAD_LDD_AIUS 318*280575beSPatrick McGehearty#define CACHE_LINE 64 319340af271Swh94709#define VIS_BLOCKSIZE 64 320340af271Swh94709 321340af271Swh94709/* 322340af271Swh94709 * Size of stack frame in order to accomodate a 64-byte aligned 323340af271Swh94709 * floating-point register save area and 2 64-bit temp locations. 324340af271Swh94709 * All copy functions use three quadrants of fp registers; to assure a 325340af271Swh94709 * block-aligned three block buffer in which to save we must reserve 326340af271Swh94709 * four blocks on stack. 327340af271Swh94709 * 328340af271Swh94709 * _______________________________________ <-- %fp + STACK_BIAS 329340af271Swh94709 * | We may need to preserve 3 quadrants | 330340af271Swh94709 * | of fp regs, but since we do so with | 331340af271Swh94709 * | BST/BLD we need room in which to | 332340af271Swh94709 * | align to VIS_BLOCKSIZE bytes. So | 333340af271Swh94709 * | this area is 4 * VIS_BLOCKSIZE. | <-- - SAVED_FPREGS_OFFSET 334340af271Swh94709 * |-------------------------------------| 335340af271Swh94709 * | 8 bytes to save %fprs | <-- - SAVED_FPRS_OFFSET 336340af271Swh94709 * |-------------------------------------| 337340af271Swh94709 * | 8 bytes to save %gsr | <-- - SAVED_GSR_OFFSET 338340af271Swh94709 * --------------------------------------- 339340af271Swh94709 */ 340340af271Swh94709#define HWCOPYFRAMESIZE ((VIS_BLOCKSIZE * (3 + 1)) + (2 * 8)) 341340af271Swh94709#define SAVED_FPREGS_OFFSET (VIS_BLOCKSIZE * 4) 342340af271Swh94709#define SAVED_FPREGS_ADJUST ((VIS_BLOCKSIZE * 3) + 1) 343340af271Swh94709#define SAVED_FPRS_OFFSET (SAVED_FPREGS_OFFSET + 8) 344340af271Swh94709#define SAVED_GSR_OFFSET (SAVED_FPRS_OFFSET + 8) 345340af271Swh94709 346340af271Swh94709/* 347340af271Swh94709 * In FP copies if we do not have preserved data to restore over 348340af271Swh94709 * the fp regs we used then we must zero those regs to avoid 349340af271Swh94709 * exposing portions of the data to later threads (data security). 350340af271Swh94709 */ 351340af271Swh94709#define FZERO \ 352340af271Swh94709 fzero %f0 ;\ 353340af271Swh94709 fzero %f2 ;\ 354340af271Swh94709 faddd %f0, %f2, %f4 ;\ 355340af271Swh94709 fmuld %f0, %f2, %f6 ;\ 356340af271Swh94709 faddd %f0, %f2, %f8 ;\ 357340af271Swh94709 fmuld %f0, %f2, %f10 ;\ 358340af271Swh94709 faddd %f0, %f2, %f12 ;\ 359340af271Swh94709 fmuld %f0, %f2, %f14 ;\ 360340af271Swh94709 faddd %f0, %f2, %f16 ;\ 361340af271Swh94709 fmuld %f0, %f2, %f18 ;\ 362340af271Swh94709 faddd %f0, %f2, %f20 ;\ 363340af271Swh94709 fmuld %f0, %f2, %f22 ;\ 364340af271Swh94709 faddd %f0, %f2, %f24 ;\ 365340af271Swh94709 fmuld %f0, %f2, %f26 ;\ 366340af271Swh94709 faddd %f0, %f2, %f28 ;\ 367340af271Swh94709 fmuld %f0, %f2, %f30 ;\ 368340af271Swh94709 faddd %f0, %f2, %f48 ;\ 369340af271Swh94709 fmuld %f0, %f2, %f50 ;\ 370340af271Swh94709 faddd %f0, %f2, %f52 ;\ 371340af271Swh94709 fmuld %f0, %f2, %f54 ;\ 372340af271Swh94709 faddd %f0, %f2, %f56 ;\ 373340af271Swh94709 fmuld %f0, %f2, %f58 ;\ 374340af271Swh94709 faddd %f0, %f2, %f60 ;\ 375340af271Swh94709 fmuld %f0, %f2, %f62 376340af271Swh94709 37759ac0c16Sdavemq#if !defined(lint) 37859ac0c16Sdavemq 379340af271Swh94709/* 380340af271Swh94709 * Macros to save and restore fp registers to/from the stack. 381340af271Swh94709 * Used to save and restore in-use fp registers when we want to use FP. 382340af271Swh94709 */ 383340af271Swh94709#define BST_FP_TOSTACK(tmp1) \ 384340af271Swh94709 /* membar #Sync */ ;\ 385340af271Swh94709 add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\ 386340af271Swh94709 and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\ 387340af271Swh94709 stda %f0, [tmp1]ASI_BLK_P ;\ 388340af271Swh94709 add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 389340af271Swh94709 stda %f16, [tmp1]ASI_BLK_P ;\ 390340af271Swh94709 add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 391340af271Swh94709 stda %f48, [tmp1]ASI_BLK_P ;\ 392340af271Swh94709 membar #Sync 393340af271Swh94709 394340af271Swh94709#define BLD_FP_FROMSTACK(tmp1) \ 395340af271Swh94709 /* membar #Sync - provided at copy completion */ ;\ 396340af271Swh94709 add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\ 397340af271Swh94709 and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\ 398340af271Swh94709 ldda [tmp1]ASI_BLK_P, %f0 ;\ 399340af271Swh94709 add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 400340af271Swh94709 ldda [tmp1]ASI_BLK_P, %f16 ;\ 401340af271Swh94709 add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 402340af271Swh94709 ldda [tmp1]ASI_BLK_P, %f48 ;\ 403340af271Swh94709 membar #Sync 404340af271Swh94709#endif /* NIAGARA_IMPL */ 405340af271Swh94709 40659ac0c16Sdavemq#endif /* lint */ 4077c478bd9Sstevel@tonic-gate/* 4087c478bd9Sstevel@tonic-gate * Copy a block of storage, returning an error code if `from' or 4097c478bd9Sstevel@tonic-gate * `to' takes a kernel pagefault which cannot be resolved. 4107c478bd9Sstevel@tonic-gate * Returns errno value on pagefault error, 0 if all ok 4117c478bd9Sstevel@tonic-gate */ 4127c478bd9Sstevel@tonic-gate 4137c478bd9Sstevel@tonic-gate#if defined(lint) 4147c478bd9Sstevel@tonic-gate 4157c478bd9Sstevel@tonic-gate/* ARGSUSED */ 4167c478bd9Sstevel@tonic-gateint 4177c478bd9Sstevel@tonic-gatekcopy(const void *from, void *to, size_t count) 4187c478bd9Sstevel@tonic-gate{ return(0); } 4197c478bd9Sstevel@tonic-gate 4207c478bd9Sstevel@tonic-gate#else /* lint */ 4217c478bd9Sstevel@tonic-gate 4227c478bd9Sstevel@tonic-gate .seg ".text" 4237c478bd9Sstevel@tonic-gate .align 4 4247c478bd9Sstevel@tonic-gate 4257c478bd9Sstevel@tonic-gate ENTRY(kcopy) 426340af271Swh94709#if !defined(NIAGARA_IMPL) 427*280575beSPatrick McGehearty cmp %o2, FP_COPY ! check for small copy/leaf case 428*280575beSPatrick McGehearty bgt,pt %ncc, .kcopy_more ! 429*280575beSPatrick McGehearty nop 430*280575beSPatrick McGehearty.kcopy_small: ! setup error handler 431*280575beSPatrick McGehearty sethi %hi(.sm_copyerr), %o4 432*280575beSPatrick McGehearty or %o4, %lo(.sm_copyerr), %o4 ! .sm_copyerr is lofault value 433*280575beSPatrick McGehearty ldn [THREAD_REG + T_LOFAULT], %o5 ! save existing handler 434*280575beSPatrick McGehearty ! Note that we carefully do *not* flag the setting of 435*280575beSPatrick McGehearty ! t_lofault. 436*280575beSPatrick McGehearty membar #Sync ! sync error barrier 437*280575beSPatrick McGehearty b .sm_do_copy ! common code 438*280575beSPatrick McGehearty stn %o4, [THREAD_REG + T_LOFAULT] ! set t_lofault 439*280575beSPatrick McGehearty 440*280575beSPatrick McGehearty 441*280575beSPatrick McGehearty.kcopy_more: 442340af271Swh94709 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 443340af271Swh94709 sethi %hi(.copyerr), %l7 ! copyerr is lofault value 444340af271Swh94709 or %l7, %lo(.copyerr), %l7 445340af271Swh94709 ldn [THREAD_REG + T_LOFAULT], %o5 ! save existing handler 446340af271Swh94709 ! Note that we carefully do *not* flag the setting of 447340af271Swh94709 ! t_lofault. 448340af271Swh94709 membar #Sync ! sync error barrier 449340af271Swh94709 b .do_copy ! common code 450340af271Swh94709 stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault 451340af271Swh94709 452340af271Swh94709/* 453*280575beSPatrick McGehearty * We got here because of a fault during a small kcopy or bcopy. 454*280575beSPatrick McGehearty * if a fault handler existed when bcopy was called. 455*280575beSPatrick McGehearty * No floating point registers are used by the small copies. 456*280575beSPatrick McGehearty * Small copies are from a leaf routine 457*280575beSPatrick McGehearty * Errno value is in %g1. 458*280575beSPatrick McGehearty */ 459*280575beSPatrick McGehearty.sm_copyerr: 460*280575beSPatrick McGehearty ! The kcopy will always set a t_lofault handler. If it fires, 461*280575beSPatrick McGehearty ! we're expected to just return the error code and not to 462*280575beSPatrick McGehearty ! invoke any existing error handler. As far as bcopy is concerned, 463*280575beSPatrick McGehearty ! we only set t_lofault if there was an existing lofault handler. 464*280575beSPatrick McGehearty ! In that case we're expected to invoke the previously existing 465*280575beSPatrick McGehearty ! handler after resetting the t_lofault value. 466*280575beSPatrick McGehearty btst LOFAULT_SET, %o5 467*280575beSPatrick McGehearty membar #Sync ! sync error barrier 468*280575beSPatrick McGehearty andn %o5, LOFAULT_SET, %o5 ! clear fault flag 469*280575beSPatrick McGehearty bnz,pn %ncc, 3f 470*280575beSPatrick McGehearty stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 471*280575beSPatrick McGehearty retl 472*280575beSPatrick McGehearty mov %g1, %o0 473*280575beSPatrick McGehearty3: 474*280575beSPatrick McGehearty ! We're here via bcopy. There must have been an error handler 475*280575beSPatrick McGehearty ! in place otherwise we would have died a nasty death already. 476*280575beSPatrick McGehearty jmp %o5 ! goto real handler 477*280575beSPatrick McGehearty mov %g0, %o0 478*280575beSPatrick McGehearty/* 479*280575beSPatrick McGehearty * end of .sm_copyerr 480*280575beSPatrick McGehearty */ 481*280575beSPatrick McGehearty 482*280575beSPatrick McGehearty/* 483340af271Swh94709 * We got here because of a fault during kcopy or bcopy if a fault 484340af271Swh94709 * handler existed when bcopy was called. 485*280575beSPatrick McGehearty * stack and fp registers need to be restored 486340af271Swh94709 * Errno value is in %g1. 487340af271Swh94709 */ 488340af271Swh94709.copyerr: 489340af271Swh94709 sethi %hi(.copyerr2), %l1 490340af271Swh94709 or %l1, %lo(.copyerr2), %l1 491340af271Swh94709 membar #Sync ! sync error barrier 492340af271Swh94709 stn %l1, [THREAD_REG + T_LOFAULT] ! set t_lofault 493340af271Swh94709 btst FPUSED_FLAG, %o5 494340af271Swh94709 bz,pt %xcc, 1f 495*280575beSPatrick McGehearty and %o5, LOFAULT_SET, %l1 ! copy flag to %l1 496340af271Swh94709 497340af271Swh94709 membar #Sync ! sync error barrier 498*280575beSPatrick McGehearty wr %l5, 0, %gsr 499*280575beSPatrick McGehearty btst FPRS_FEF, %g5 500340af271Swh94709 bz,pt %icc, 4f 501340af271Swh94709 nop 502340af271Swh94709 ! restore fpregs from stack 503340af271Swh94709 BLD_FP_FROMSTACK(%o2) 504340af271Swh94709 ba,pt %ncc, 2f 505*280575beSPatrick McGehearty wr %g5, 0, %fprs ! restore fprs 506340af271Swh947094: 507340af271Swh94709 FZERO 508*280575beSPatrick McGehearty wr %g5, 0, %fprs ! restore fprs 509340af271Swh947092: 510340af271Swh94709 ldn [THREAD_REG + T_LWP], %o2 511340af271Swh94709 brnz,pt %o2, 1f 512340af271Swh94709 nop 513340af271Swh94709 514340af271Swh94709 ldsb [THREAD_REG + T_PREEMPT], %l0 515340af271Swh94709 deccc %l0 516340af271Swh94709 bnz,pn %ncc, 1f 517340af271Swh94709 stb %l0, [THREAD_REG + T_PREEMPT] 518340af271Swh94709 519340af271Swh94709 ! Check for a kernel preemption request 520340af271Swh94709 ldn [THREAD_REG + T_CPU], %l0 521340af271Swh94709 ldub [%l0 + CPU_KPRUNRUN], %l0 522340af271Swh94709 brnz,a,pt %l0, 1f ! Need to call kpreempt? 523340af271Swh94709 or %l1, KPREEMPT_FLAG, %l1 ! If so, set the flag 524340af271Swh94709 525340af271Swh94709 ! The kcopy will always set a t_lofault handler. If it fires, 526340af271Swh94709 ! we're expected to just return the error code and not to 527340af271Swh94709 ! invoke any existing error handler. As far as bcopy is concerned, 528340af271Swh94709 ! we only set t_lofault if there was an existing lofault handler. 529340af271Swh94709 ! In that case we're expected to invoke the previously existing 530*280575beSPatrick McGehearty ! handler after resetting the t_lofault value. 531340af271Swh947091: 532340af271Swh94709 andn %o5, COPY_FLAGS, %o5 ! remove flags from lofault address 533340af271Swh94709 membar #Sync ! sync error barrier 534340af271Swh94709 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 535340af271Swh94709 536340af271Swh94709 ! call kpreempt if necessary 537340af271Swh94709 btst KPREEMPT_FLAG, %l1 538340af271Swh94709 bz,pt %icc, 2f 539340af271Swh94709 nop 540340af271Swh94709 call kpreempt 541340af271Swh94709 rdpr %pil, %o0 ! pass %pil 542340af271Swh947092: 543*280575beSPatrick McGehearty btst LOFAULT_SET, %l1 544340af271Swh94709 bnz,pn %ncc, 3f 545340af271Swh94709 nop 546340af271Swh94709 ret 547340af271Swh94709 restore %g1, 0, %o0 548340af271Swh947093: 549340af271Swh94709 ! We're here via bcopy. There must have been an error handler 550340af271Swh94709 ! in place otherwise we would have died a nasty death already. 551340af271Swh94709 jmp %o5 ! goto real handler 552340af271Swh94709 restore %g0, 0, %o0 ! dispose of copy window 553340af271Swh94709 554340af271Swh94709/* 555340af271Swh94709 * We got here because of a fault in .copyerr. We can't safely restore fp 556340af271Swh94709 * state, so we panic. 557340af271Swh94709 */ 558340af271Swh94709fp_panic_msg: 559340af271Swh94709 .asciz "Unable to restore fp state after copy operation" 560340af271Swh94709 561340af271Swh94709 .align 4 562340af271Swh94709.copyerr2: 563340af271Swh94709 set fp_panic_msg, %o0 564340af271Swh94709 call panic 565340af271Swh94709 nop 566*280575beSPatrick McGehearty/* 567*280575beSPatrick McGehearty * end of .copyerr 568*280575beSPatrick McGehearty */ 569*280575beSPatrick McGehearty 570340af271Swh94709#else /* NIAGARA_IMPL */ 571473b13d4Sae112802 save %sp, -SA(MINFRAME), %sp 572473b13d4Sae112802 set .copyerr, %l7 ! copyerr is lofault value 573473b13d4Sae112802 ldn [THREAD_REG + T_LOFAULT], %o5 ! save existing handler 574473b13d4Sae112802 or %o5, LOFAULT_SET, %o5 575473b13d4Sae112802 membar #Sync ! sync error barrier 5767c478bd9Sstevel@tonic-gate b .do_copy ! common code 577473b13d4Sae112802 stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault 5787c478bd9Sstevel@tonic-gate 5797c478bd9Sstevel@tonic-gate/* 5807c478bd9Sstevel@tonic-gate * We got here because of a fault during kcopy. 5817c478bd9Sstevel@tonic-gate * Errno value is in %g1. 5827c478bd9Sstevel@tonic-gate */ 5837c478bd9Sstevel@tonic-gate.copyerr: 584473b13d4Sae112802 ! The kcopy() *always* sets a t_lofault handler and it ORs LOFAULT_SET 585473b13d4Sae112802 ! into %o5 to indicate it has set t_lofault handler. Need to clear 586473b13d4Sae112802 ! LOFAULT_SET flag before restoring the error handler. 587473b13d4Sae112802 andn %o5, LOFAULT_SET, %o5 5887c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 5897c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 5907c478bd9Sstevel@tonic-gate ret 5917c478bd9Sstevel@tonic-gate restore %g1, 0, %o0 592340af271Swh94709#endif /* NIAGARA_IMPL */ 5937c478bd9Sstevel@tonic-gate 5947c478bd9Sstevel@tonic-gate SET_SIZE(kcopy) 5957c478bd9Sstevel@tonic-gate#endif /* lint */ 5967c478bd9Sstevel@tonic-gate 5977c478bd9Sstevel@tonic-gate 5987c478bd9Sstevel@tonic-gate/* 5997c478bd9Sstevel@tonic-gate * Copy a block of storage - must not overlap (from + len <= to). 6007c478bd9Sstevel@tonic-gate */ 6017c478bd9Sstevel@tonic-gate#if defined(lint) 6027c478bd9Sstevel@tonic-gate 6037c478bd9Sstevel@tonic-gate/* ARGSUSED */ 6047c478bd9Sstevel@tonic-gatevoid 6057c478bd9Sstevel@tonic-gatebcopy(const void *from, void *to, size_t count) 6067c478bd9Sstevel@tonic-gate{} 6077c478bd9Sstevel@tonic-gate 6087c478bd9Sstevel@tonic-gate#else /* lint */ 6097c478bd9Sstevel@tonic-gate 6107c478bd9Sstevel@tonic-gate ENTRY(bcopy) 611340af271Swh94709#if !defined(NIAGARA_IMPL) 612*280575beSPatrick McGehearty cmp %o2, FP_COPY ! check for small copy/leaf case 613*280575beSPatrick McGehearty bgt,pt %ncc, .bcopy_more ! 614*280575beSPatrick McGehearty nop 615*280575beSPatrick McGehearty.bcopy_small: ! setup error handler 616*280575beSPatrick McGehearty ldn [THREAD_REG + T_LOFAULT], %o5 ! save existing handler 617*280575beSPatrick McGehearty tst %o5 618*280575beSPatrick McGehearty bz,pt %icc, .sm_do_copy 619*280575beSPatrick McGehearty sethi %hi(.sm_copyerr), %o4 620*280575beSPatrick McGehearty or %o4, %lo(.sm_copyerr), %o4 ! .sm_copyerr is lofault value 621*280575beSPatrick McGehearty membar #Sync ! sync error barrier 622*280575beSPatrick McGehearty stn %o4, [THREAD_REG + T_LOFAULT] ! set t_lofault 623*280575beSPatrick McGehearty or %o5, LOFAULT_SET, %o5 ! Error should trampoline 624*280575beSPatrick McGehearty.sm_do_copy: 625*280575beSPatrick McGehearty mov %o0, %g1 ! save %o0 626*280575beSPatrick McGehearty cmp %o2, SHORTCOPY ! make sure there is enough to align 627*280575beSPatrick McGehearty ble,pt %ncc, .bc_smallest 628*280575beSPatrick McGehearty andcc %o1, 0x7, %o3 ! is dest long aligned 629*280575beSPatrick McGehearty bnz,pn %ncc, .bc_align 630*280575beSPatrick McGehearty andcc %o1, 1, %o3 ! is dest byte aligned 631*280575beSPatrick McGehearty 632*280575beSPatrick McGehearty! Destination is long word aligned 633*280575beSPatrick McGehearty.bc_al_src: 634*280575beSPatrick McGehearty andcc %o0, 7, %o3 635*280575beSPatrick McGehearty brnz,pt %o3, .bc_src_dst_unal8 636*280575beSPatrick McGehearty nop 637*280575beSPatrick McGehearty/* 638*280575beSPatrick McGehearty * Special case for handling when src and dest are both long word aligned 639*280575beSPatrick McGehearty * and total data to move is less than FP_COPY bytes 640*280575beSPatrick McGehearty * Also handles finish up for large block moves, so may be less than 32 bytes 641*280575beSPatrick McGehearty */ 642*280575beSPatrick McGehearty.bc_medlong: 643*280575beSPatrick McGehearty subcc %o2, 31, %o2 ! adjust length to allow cc test 644*280575beSPatrick McGehearty ble,pt %ncc, .bc_medl31 645*280575beSPatrick McGehearty nop 646*280575beSPatrick McGehearty.bc_medl32: 647*280575beSPatrick McGehearty ldx [%o0], %o4 ! move 32 bytes 648*280575beSPatrick McGehearty subcc %o2, 32, %o2 ! decrement length count by 32 649*280575beSPatrick McGehearty stx %o4, [%o1] 650*280575beSPatrick McGehearty ldx [%o0+8], %o4 651*280575beSPatrick McGehearty stx %o4, [%o1+8] 652*280575beSPatrick McGehearty ldx [%o0+16], %o4 653*280575beSPatrick McGehearty add %o0, 32, %o0 ! increase src ptr by 32 654*280575beSPatrick McGehearty stx %o4, [%o1+16] 655*280575beSPatrick McGehearty ldx [%o0-8], %o4 656*280575beSPatrick McGehearty add %o1, 32, %o1 ! increase dst ptr by 32 657*280575beSPatrick McGehearty bgu,pt %ncc, .bc_medl32 ! repeat if at least 32 bytes left 658*280575beSPatrick McGehearty stx %o4, [%o1-8] 659*280575beSPatrick McGehearty.bc_medl31: 660*280575beSPatrick McGehearty addcc %o2, 24, %o2 ! adjust count to be off by 7 661*280575beSPatrick McGehearty ble,pt %ncc, .bc_medl7 ! skip if 7 or fewer bytes left 662*280575beSPatrick McGehearty nop 663*280575beSPatrick McGehearty.bc_medl8: 664*280575beSPatrick McGehearty ldx [%o0], %o4 ! move 8 bytes 665*280575beSPatrick McGehearty add %o0, 8, %o0 ! increase src ptr by 8 666*280575beSPatrick McGehearty subcc %o2, 8, %o2 ! decrease count by 8 667*280575beSPatrick McGehearty add %o1, 8, %o1 ! increase dst ptr by 8 668*280575beSPatrick McGehearty bgu,pt %ncc, .bc_medl8 669*280575beSPatrick McGehearty stx %o4, [%o1-8] 670*280575beSPatrick McGehearty.bc_medl7: 671*280575beSPatrick McGehearty addcc %o2, 7, %o2 ! finish adjustment of remaining count 672*280575beSPatrick McGehearty bnz,pt %ncc, .bc_small4 ! do final bytes if not finished 673*280575beSPatrick McGehearty 674*280575beSPatrick McGehearty.bc_smallx: ! finish up and exit 675*280575beSPatrick McGehearty tst %o5 676*280575beSPatrick McGehearty bz,pt %ncc, .bc_sm_done 677*280575beSPatrick McGehearty andn %o5, COPY_FLAGS, %o5 ! remove flags from lofault address 678*280575beSPatrick McGehearty membar #Sync ! sync error barrier 679*280575beSPatrick McGehearty stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 680*280575beSPatrick McGehearty.bc_sm_done: 681*280575beSPatrick McGehearty retl 682*280575beSPatrick McGehearty mov %g0, %o0 683*280575beSPatrick McGehearty 684*280575beSPatrick McGehearty.bc_small4: 685*280575beSPatrick McGehearty cmp %o2, 4 686*280575beSPatrick McGehearty blt,pt %ncc, .bc_small3x ! skip if less than 4 bytes left 687*280575beSPatrick McGehearty nop ! 688*280575beSPatrick McGehearty ld [%o0], %o4 ! move 4 bytes 689*280575beSPatrick McGehearty add %o0, 4, %o0 ! increase src ptr by 4 690*280575beSPatrick McGehearty add %o1, 4, %o1 ! increase dst ptr by 4 691*280575beSPatrick McGehearty subcc %o2, 4, %o2 ! decrease count by 4 692*280575beSPatrick McGehearty bz,pt %ncc, .bc_smallx 693*280575beSPatrick McGehearty stw %o4, [%o1-4] 694*280575beSPatrick McGehearty 695*280575beSPatrick McGehearty.bc_small3x: ! Exactly 1, 2, or 3 bytes remain 696*280575beSPatrick McGehearty subcc %o2, 1, %o2 ! reduce count for cc test 697*280575beSPatrick McGehearty ldub [%o0], %o4 ! load one byte 698*280575beSPatrick McGehearty bz,pt %ncc, .bc_smallx 699*280575beSPatrick McGehearty stb %o4, [%o1] ! store one byte 700*280575beSPatrick McGehearty ldub [%o0+1], %o4 ! load second byte 701*280575beSPatrick McGehearty subcc %o2, 1, %o2 702*280575beSPatrick McGehearty bz,pt %ncc, .bc_smallx 703*280575beSPatrick McGehearty stb %o4, [%o1+1] ! store second byte 704*280575beSPatrick McGehearty ldub [%o0+2], %o4 ! load third byte 705*280575beSPatrick McGehearty ba .bc_smallx 706*280575beSPatrick McGehearty stb %o4, [%o1+2] ! store third byte 707*280575beSPatrick McGehearty 708*280575beSPatrick McGehearty.bc_smallest: ! 7 or fewer bytes remain 709*280575beSPatrick McGehearty tst %o2 710*280575beSPatrick McGehearty bz,pt %ncc, .bc_smallx 711*280575beSPatrick McGehearty cmp %o2, 4 712*280575beSPatrick McGehearty blt,pt %ncc, .bc_small3x 713*280575beSPatrick McGehearty nop 714*280575beSPatrick McGehearty ldub [%o0], %o4 ! read byte 715*280575beSPatrick McGehearty subcc %o2, 4, %o2 ! reduce count by 4 716*280575beSPatrick McGehearty stb %o4, [%o1] ! write byte 717*280575beSPatrick McGehearty ldub [%o0+1], %o4 ! repeat for total of 4 bytes 718*280575beSPatrick McGehearty add %o0, 4, %o0 ! advance src by 4 719*280575beSPatrick McGehearty stb %o4, [%o1+1] 720*280575beSPatrick McGehearty ldub [%o0-2], %o4 721*280575beSPatrick McGehearty add %o1, 4, %o1 ! advance dst by 4 722*280575beSPatrick McGehearty stb %o4, [%o1-2] 723*280575beSPatrick McGehearty ldub [%o0-1], %o4 724*280575beSPatrick McGehearty bnz,pt %ncc, .bc_small3x 725*280575beSPatrick McGehearty stb %o4, [%o1-1] 726*280575beSPatrick McGehearty ba .bc_smallx 727*280575beSPatrick McGehearty nop 728*280575beSPatrick McGehearty 729*280575beSPatrick McGehearty/* 730*280575beSPatrick McGehearty * Align destination to long word boundary 731*280575beSPatrick McGehearty */ 732*280575beSPatrick McGehearty.bc_align: ! byte align test in prior branch delay 733*280575beSPatrick McGehearty bnz,pt %ncc, .bc_al_d1 734*280575beSPatrick McGehearty.bc_al_d1f: ! dest is now half word aligned 735*280575beSPatrick McGehearty andcc %o1, 2, %o3 736*280575beSPatrick McGehearty bnz,pt %ncc, .bc_al_d2 737*280575beSPatrick McGehearty.bc_al_d2f: ! dest is now word aligned 738*280575beSPatrick McGehearty andcc %o1, 4, %o3 ! is dest longword aligned? 739*280575beSPatrick McGehearty bz,pt %ncc, .bc_al_src 740*280575beSPatrick McGehearty nop 741*280575beSPatrick McGehearty.bc_al_d4: ! dest is word aligned; src is unknown 742*280575beSPatrick McGehearty ldub [%o0], %o4 ! move a word (src align unknown) 743*280575beSPatrick McGehearty ldub [%o0+1], %o3 744*280575beSPatrick McGehearty sll %o4, 24, %o4 ! position 745*280575beSPatrick McGehearty sll %o3, 16, %o3 ! position 746*280575beSPatrick McGehearty or %o4, %o3, %o3 ! merge 747*280575beSPatrick McGehearty ldub [%o0+2], %o4 748*280575beSPatrick McGehearty sll %o4, 8, %o4 ! position 749*280575beSPatrick McGehearty or %o4, %o3, %o3 ! merge 750*280575beSPatrick McGehearty ldub [%o0+3], %o4 751*280575beSPatrick McGehearty or %o4, %o3, %o4 ! merge 752*280575beSPatrick McGehearty stw %o4,[%o1] ! store four bytes 753*280575beSPatrick McGehearty add %o0, 4, %o0 ! adjust src by 4 754*280575beSPatrick McGehearty add %o1, 4, %o1 ! adjust dest by 4 755*280575beSPatrick McGehearty sub %o2, 4, %o2 ! adjust count by 4 756*280575beSPatrick McGehearty andcc %o0, 7, %o3 ! check for src long word alignment 757*280575beSPatrick McGehearty brz,pt %o3, .bc_medlong 758*280575beSPatrick McGehearty.bc_src_dst_unal8: 759*280575beSPatrick McGehearty ! dst is 8-byte aligned, src is not 760*280575beSPatrick McGehearty ! Size is less than FP_COPY 761*280575beSPatrick McGehearty ! Following code is to select for alignment 762*280575beSPatrick McGehearty andcc %o0, 0x3, %o3 ! test word alignment 763*280575beSPatrick McGehearty bz,pt %ncc, .bc_medword 764*280575beSPatrick McGehearty nop 765*280575beSPatrick McGehearty andcc %o0, 0x1, %o3 ! test halfword alignment 766*280575beSPatrick McGehearty bnz,pt %ncc, .bc_med_byte ! go to byte move if not halfword 767*280575beSPatrick McGehearty andcc %o0, 0x2, %o3 ! test which byte alignment 768*280575beSPatrick McGehearty ba .bc_medhalf 769*280575beSPatrick McGehearty nop 770*280575beSPatrick McGehearty.bc_al_d1: ! align dest to half word 771*280575beSPatrick McGehearty ldub [%o0], %o4 ! move a byte 772*280575beSPatrick McGehearty add %o0, 1, %o0 773*280575beSPatrick McGehearty stb %o4, [%o1] 774*280575beSPatrick McGehearty add %o1, 1, %o1 775*280575beSPatrick McGehearty andcc %o1, 2, %o3 776*280575beSPatrick McGehearty bz,pt %ncc, .bc_al_d2f 777*280575beSPatrick McGehearty sub %o2, 1, %o2 778*280575beSPatrick McGehearty.bc_al_d2: ! align dest to word 779*280575beSPatrick McGehearty ldub [%o0], %o4 ! move a half-word (src align unknown) 780*280575beSPatrick McGehearty ldub [%o0+1], %o3 781*280575beSPatrick McGehearty sll %o4, 8, %o4 ! position 782*280575beSPatrick McGehearty or %o4, %o3, %o4 ! merge 783*280575beSPatrick McGehearty sth %o4, [%o1] 784*280575beSPatrick McGehearty add %o0, 2, %o0 785*280575beSPatrick McGehearty add %o1, 2, %o1 786*280575beSPatrick McGehearty andcc %o1, 4, %o3 ! is dest longword aligned? 787*280575beSPatrick McGehearty bz,pt %ncc, .bc_al_src 788*280575beSPatrick McGehearty sub %o2, 2, %o2 789*280575beSPatrick McGehearty ba .bc_al_d4 790*280575beSPatrick McGehearty nop 791*280575beSPatrick McGehearty/* 792*280575beSPatrick McGehearty * Handle all cases where src and dest are aligned on word 793*280575beSPatrick McGehearty * boundaries. Use unrolled loops for better performance. 794*280575beSPatrick McGehearty * This option wins over standard large data move when 795*280575beSPatrick McGehearty * source and destination is in cache for medium 796*280575beSPatrick McGehearty * to short data moves. 797*280575beSPatrick McGehearty */ 798*280575beSPatrick McGehearty.bc_medword: 799*280575beSPatrick McGehearty subcc %o2, 31, %o2 ! adjust length to allow cc test 800*280575beSPatrick McGehearty ble,pt %ncc, .bc_medw31 801*280575beSPatrick McGehearty nop 802*280575beSPatrick McGehearty.bc_medw32: 803*280575beSPatrick McGehearty ld [%o0], %o4 ! move a block of 32 bytes 804*280575beSPatrick McGehearty stw %o4, [%o1] 805*280575beSPatrick McGehearty ld [%o0+4], %o4 806*280575beSPatrick McGehearty stw %o4, [%o1+4] 807*280575beSPatrick McGehearty ld [%o0+8], %o4 808*280575beSPatrick McGehearty stw %o4, [%o1+8] 809*280575beSPatrick McGehearty ld [%o0+12], %o4 810*280575beSPatrick McGehearty stw %o4, [%o1+12] 811*280575beSPatrick McGehearty ld [%o0+16], %o4 812*280575beSPatrick McGehearty stw %o4, [%o1+16] 813*280575beSPatrick McGehearty ld [%o0+20], %o4 814*280575beSPatrick McGehearty subcc %o2, 32, %o2 ! decrement length count 815*280575beSPatrick McGehearty stw %o4, [%o1+20] 816*280575beSPatrick McGehearty ld [%o0+24], %o4 817*280575beSPatrick McGehearty add %o0, 32, %o0 ! increase src ptr by 32 818*280575beSPatrick McGehearty stw %o4, [%o1+24] 819*280575beSPatrick McGehearty ld [%o0-4], %o4 820*280575beSPatrick McGehearty add %o1, 32, %o1 ! increase dst ptr by 32 821*280575beSPatrick McGehearty bgu,pt %ncc, .bc_medw32 ! repeat if at least 32 bytes left 822*280575beSPatrick McGehearty stw %o4, [%o1-4] 823*280575beSPatrick McGehearty.bc_medw31: 824*280575beSPatrick McGehearty addcc %o2, 24, %o2 ! adjust count to be off by 7 825*280575beSPatrick McGehearty ble,pt %ncc, .bc_medw7 ! skip if 7 or fewer bytes left 826*280575beSPatrick McGehearty nop ! 827*280575beSPatrick McGehearty.bc_medw15: 828*280575beSPatrick McGehearty ld [%o0], %o4 ! move a block of 8 bytes 829*280575beSPatrick McGehearty subcc %o2, 8, %o2 ! decrement length count 830*280575beSPatrick McGehearty stw %o4, [%o1] 831*280575beSPatrick McGehearty add %o0, 8, %o0 ! increase src ptr by 8 832*280575beSPatrick McGehearty ld [%o0-4], %o4 833*280575beSPatrick McGehearty add %o1, 8, %o1 ! increase dst ptr by 8 834*280575beSPatrick McGehearty bgu,pt %ncc, .bc_medw15 835*280575beSPatrick McGehearty stw %o4, [%o1-4] 836*280575beSPatrick McGehearty.bc_medw7: 837*280575beSPatrick McGehearty addcc %o2, 7, %o2 ! finish adjustment of remaining count 838*280575beSPatrick McGehearty bz,pt %ncc, .bc_smallx ! exit if finished 839*280575beSPatrick McGehearty cmp %o2, 4 840*280575beSPatrick McGehearty blt,pt %ncc, .bc_small3x ! skip if less than 4 bytes left 841*280575beSPatrick McGehearty nop ! 842*280575beSPatrick McGehearty ld [%o0], %o4 ! move 4 bytes 843*280575beSPatrick McGehearty add %o0, 4, %o0 ! increase src ptr by 4 844*280575beSPatrick McGehearty add %o1, 4, %o1 ! increase dst ptr by 4 845*280575beSPatrick McGehearty subcc %o2, 4, %o2 ! decrease count by 4 846*280575beSPatrick McGehearty bnz .bc_small3x 847*280575beSPatrick McGehearty stw %o4, [%o1-4] 848*280575beSPatrick McGehearty ba .bc_smallx 849*280575beSPatrick McGehearty nop 850*280575beSPatrick McGehearty 851*280575beSPatrick McGehearty.bc_medhalf: 852*280575beSPatrick McGehearty subcc %o2, 31, %o2 ! adjust length to allow cc test 853*280575beSPatrick McGehearty ble,pt %ncc, .bc_medh31 854*280575beSPatrick McGehearty nop 855*280575beSPatrick McGehearty.bc_medh32: ! load and store block of 32 bytes 856*280575beSPatrick McGehearty subcc %o2, 32, %o2 ! decrement length count 857*280575beSPatrick McGehearty 858*280575beSPatrick McGehearty lduh [%o0], %o4 ! move 32 bytes 859*280575beSPatrick McGehearty lduw [%o0+2], %o3 860*280575beSPatrick McGehearty sllx %o4, 48, %o4 861*280575beSPatrick McGehearty sllx %o3, 16, %o3 862*280575beSPatrick McGehearty or %o4, %o3, %o3 863*280575beSPatrick McGehearty lduh [%o0+6], %o4 864*280575beSPatrick McGehearty or %o4, %o3, %o4 865*280575beSPatrick McGehearty stx %o4, [%o1] 866*280575beSPatrick McGehearty 867*280575beSPatrick McGehearty lduh [%o0+8], %o4 868*280575beSPatrick McGehearty lduw [%o0+10], %o3 869*280575beSPatrick McGehearty sllx %o4, 48, %o4 870*280575beSPatrick McGehearty sllx %o3, 16, %o3 871*280575beSPatrick McGehearty or %o4, %o3, %o3 872*280575beSPatrick McGehearty lduh [%o0+14], %o4 873*280575beSPatrick McGehearty or %o4, %o3, %o4 874*280575beSPatrick McGehearty stx %o4, [%o1+8] 875*280575beSPatrick McGehearty 876*280575beSPatrick McGehearty lduh [%o0+16], %o4 877*280575beSPatrick McGehearty lduw [%o0+18], %o3 878*280575beSPatrick McGehearty sllx %o4, 48, %o4 879*280575beSPatrick McGehearty sllx %o3, 16, %o3 880*280575beSPatrick McGehearty or %o4, %o3, %o3 881*280575beSPatrick McGehearty lduh [%o0+22], %o4 882*280575beSPatrick McGehearty or %o4, %o3, %o4 883*280575beSPatrick McGehearty stx %o4, [%o1+16] 884*280575beSPatrick McGehearty 885*280575beSPatrick McGehearty add %o0, 32, %o0 ! increase src ptr by 32 886*280575beSPatrick McGehearty add %o1, 32, %o1 ! increase dst ptr by 32 887*280575beSPatrick McGehearty 888*280575beSPatrick McGehearty lduh [%o0-8], %o4 889*280575beSPatrick McGehearty lduw [%o0-6], %o3 890*280575beSPatrick McGehearty sllx %o4, 48, %o4 891*280575beSPatrick McGehearty sllx %o3, 16, %o3 892*280575beSPatrick McGehearty or %o4, %o3, %o3 893*280575beSPatrick McGehearty lduh [%o0-2], %o4 894*280575beSPatrick McGehearty or %o3, %o4, %o4 895*280575beSPatrick McGehearty bgu,pt %ncc, .bc_medh32 ! repeat if at least 32 bytes left 896*280575beSPatrick McGehearty stx %o4, [%o1-8] 897*280575beSPatrick McGehearty 898*280575beSPatrick McGehearty.bc_medh31: 899*280575beSPatrick McGehearty addcc %o2, 24, %o2 ! adjust count to be off by 7 900*280575beSPatrick McGehearty ble,pt %ncc, .bc_medh7 ! skip if 7 or fewer bytes left 901*280575beSPatrick McGehearty nop ! 902*280575beSPatrick McGehearty.bc_medh15: 903*280575beSPatrick McGehearty lduh [%o0], %o4 ! move 16 bytes 904*280575beSPatrick McGehearty subcc %o2, 8, %o2 ! decrement length count 905*280575beSPatrick McGehearty lduw [%o0+2], %o3 906*280575beSPatrick McGehearty sllx %o4, 48, %o4 907*280575beSPatrick McGehearty sllx %o3, 16, %o3 908*280575beSPatrick McGehearty or %o4, %o3, %o3 909*280575beSPatrick McGehearty add %o1, 8, %o1 ! increase dst ptr by 8 910*280575beSPatrick McGehearty lduh [%o0+6], %o4 911*280575beSPatrick McGehearty add %o0, 8, %o0 ! increase src ptr by 8 912*280575beSPatrick McGehearty or %o4, %o3, %o4 913*280575beSPatrick McGehearty bgu,pt %ncc, .bc_medh15 914*280575beSPatrick McGehearty stx %o4, [%o1-8] 915*280575beSPatrick McGehearty.bc_medh7: 916*280575beSPatrick McGehearty addcc %o2, 7, %o2 ! finish adjustment of remaining count 917*280575beSPatrick McGehearty bz,pt %ncc, .bc_smallx ! exit if finished 918*280575beSPatrick McGehearty cmp %o2, 4 919*280575beSPatrick McGehearty blt,pt %ncc, .bc_small3x ! skip if less than 4 bytes left 920*280575beSPatrick McGehearty nop ! 921*280575beSPatrick McGehearty lduh [%o0], %o4 922*280575beSPatrick McGehearty sll %o4, 16, %o4 923*280575beSPatrick McGehearty lduh [%o0+2], %o3 924*280575beSPatrick McGehearty or %o3, %o4, %o4 925*280575beSPatrick McGehearty subcc %o2, 4, %o2 926*280575beSPatrick McGehearty add %o0, 4, %o0 927*280575beSPatrick McGehearty add %o1, 4, %o1 928*280575beSPatrick McGehearty bnz .bc_small3x 929*280575beSPatrick McGehearty stw %o4, [%o1-4] 930*280575beSPatrick McGehearty ba .bc_smallx 931*280575beSPatrick McGehearty nop 932*280575beSPatrick McGehearty 933*280575beSPatrick McGehearty .align 16 934*280575beSPatrick McGehearty.bc_med_byte: 935*280575beSPatrick McGehearty bnz,pt %ncc, .bc_medbh32a ! go to correct byte move 936*280575beSPatrick McGehearty subcc %o2, 31, %o2 ! adjust length to allow cc test 937*280575beSPatrick McGehearty ble,pt %ncc, .bc_medb31 938*280575beSPatrick McGehearty nop 939*280575beSPatrick McGehearty.bc_medb32: ! Alignment 1 or 5 940*280575beSPatrick McGehearty subcc %o2, 32, %o2 ! decrement length count 941*280575beSPatrick McGehearty 942*280575beSPatrick McGehearty ldub [%o0], %o4 ! load and store a block of 32 bytes 943*280575beSPatrick McGehearty sllx %o4, 56, %o3 944*280575beSPatrick McGehearty lduh [%o0+1], %o4 945*280575beSPatrick McGehearty sllx %o4, 40, %o4 946*280575beSPatrick McGehearty or %o4, %o3, %o3 947*280575beSPatrick McGehearty lduw [%o0+3], %o4 948*280575beSPatrick McGehearty sllx %o4, 8, %o4 949*280575beSPatrick McGehearty or %o4, %o3, %o3 950*280575beSPatrick McGehearty ldub [%o0+7], %o4 951*280575beSPatrick McGehearty or %o4, %o3, %o4 952*280575beSPatrick McGehearty stx %o4, [%o1] 953*280575beSPatrick McGehearty 954*280575beSPatrick McGehearty ldub [%o0+8], %o4 955*280575beSPatrick McGehearty sllx %o4, 56, %o3 956*280575beSPatrick McGehearty lduh [%o0+9], %o4 957*280575beSPatrick McGehearty sllx %o4, 40, %o4 958*280575beSPatrick McGehearty or %o4, %o3, %o3 959*280575beSPatrick McGehearty lduw [%o0+11], %o4 960*280575beSPatrick McGehearty sllx %o4, 8, %o4 961*280575beSPatrick McGehearty or %o4, %o3, %o3 962*280575beSPatrick McGehearty ldub [%o0+15], %o4 963*280575beSPatrick McGehearty or %o4, %o3, %o4 964*280575beSPatrick McGehearty stx %o4, [%o1+8] 965*280575beSPatrick McGehearty 966*280575beSPatrick McGehearty ldub [%o0+16], %o4 967*280575beSPatrick McGehearty sllx %o4, 56, %o3 968*280575beSPatrick McGehearty lduh [%o0+17], %o4 969*280575beSPatrick McGehearty sllx %o4, 40, %o4 970*280575beSPatrick McGehearty or %o4, %o3, %o3 971*280575beSPatrick McGehearty lduw [%o0+19], %o4 972*280575beSPatrick McGehearty sllx %o4, 8, %o4 973*280575beSPatrick McGehearty or %o4, %o3, %o3 974*280575beSPatrick McGehearty ldub [%o0+23], %o4 975*280575beSPatrick McGehearty or %o4, %o3, %o4 976*280575beSPatrick McGehearty stx %o4, [%o1+16] 977*280575beSPatrick McGehearty 978*280575beSPatrick McGehearty add %o0, 32, %o0 ! increase src ptr by 32 979*280575beSPatrick McGehearty add %o1, 32, %o1 ! increase dst ptr by 32 980*280575beSPatrick McGehearty 981*280575beSPatrick McGehearty ldub [%o0-8], %o4 982*280575beSPatrick McGehearty sllx %o4, 56, %o3 983*280575beSPatrick McGehearty lduh [%o0-7], %o4 984*280575beSPatrick McGehearty sllx %o4, 40, %o4 985*280575beSPatrick McGehearty or %o4, %o3, %o3 986*280575beSPatrick McGehearty lduw [%o0-5], %o4 987*280575beSPatrick McGehearty sllx %o4, 8, %o4 988*280575beSPatrick McGehearty or %o4, %o3, %o3 989*280575beSPatrick McGehearty ldub [%o0-1], %o4 990*280575beSPatrick McGehearty or %o4, %o3, %o4 991*280575beSPatrick McGehearty bgu,pt %ncc, .bc_medb32 ! repeat if at least 32 bytes left 992*280575beSPatrick McGehearty stx %o4, [%o1-8] 993*280575beSPatrick McGehearty 994*280575beSPatrick McGehearty.bc_medb31: ! 31 or fewer bytes remaining 995*280575beSPatrick McGehearty addcc %o2, 24, %o2 ! adjust count to be off by 7 996*280575beSPatrick McGehearty ble,pt %ncc, .bc_medb7 ! skip if 7 or fewer bytes left 997*280575beSPatrick McGehearty nop ! 998*280575beSPatrick McGehearty.bc_medb15: 999*280575beSPatrick McGehearty 1000*280575beSPatrick McGehearty ldub [%o0], %o4 ! load and store a block of 8 bytes 1001*280575beSPatrick McGehearty subcc %o2, 8, %o2 ! decrement length count 1002*280575beSPatrick McGehearty sllx %o4, 56, %o3 1003*280575beSPatrick McGehearty lduh [%o0+1], %o4 1004*280575beSPatrick McGehearty sllx %o4, 40, %o4 1005*280575beSPatrick McGehearty or %o4, %o3, %o3 1006*280575beSPatrick McGehearty lduw [%o0+3], %o4 1007*280575beSPatrick McGehearty add %o1, 8, %o1 ! increase dst ptr by 16 1008*280575beSPatrick McGehearty sllx %o4, 8, %o4 1009*280575beSPatrick McGehearty or %o4, %o3, %o3 1010*280575beSPatrick McGehearty ldub [%o0+7], %o4 1011*280575beSPatrick McGehearty add %o0, 8, %o0 ! increase src ptr by 16 1012*280575beSPatrick McGehearty or %o4, %o3, %o4 1013*280575beSPatrick McGehearty bgu,pt %ncc, .bc_medb15 1014*280575beSPatrick McGehearty stx %o4, [%o1-8] 1015*280575beSPatrick McGehearty.bc_medb7: 1016*280575beSPatrick McGehearty addcc %o2, 7, %o2 ! finish adjustment of remaining count 1017*280575beSPatrick McGehearty bz,pt %ncc, .bc_smallx ! exit if finished 1018*280575beSPatrick McGehearty cmp %o2, 4 1019*280575beSPatrick McGehearty blt,pt %ncc, .bc_small3x ! skip if less than 4 bytes left 1020*280575beSPatrick McGehearty nop ! 1021*280575beSPatrick McGehearty ldub [%o0], %o4 ! move 4 bytes 1022*280575beSPatrick McGehearty sll %o4, 24, %o3 1023*280575beSPatrick McGehearty lduh [%o0+1], %o4 1024*280575beSPatrick McGehearty sll %o4, 8, %o4 1025*280575beSPatrick McGehearty or %o4, %o3, %o3 1026*280575beSPatrick McGehearty ldub [%o0+3], %o4 1027*280575beSPatrick McGehearty or %o4, %o3, %o4 1028*280575beSPatrick McGehearty subcc %o2, 4, %o2 1029*280575beSPatrick McGehearty add %o0, 4, %o0 1030*280575beSPatrick McGehearty add %o1, 4, %o1 1031*280575beSPatrick McGehearty bnz .bc_small3x 1032*280575beSPatrick McGehearty stw %o4, [%o1-4] 1033*280575beSPatrick McGehearty ba .bc_smallx 1034*280575beSPatrick McGehearty nop 1035*280575beSPatrick McGehearty 1036*280575beSPatrick McGehearty .align 16 1037*280575beSPatrick McGehearty.bc_medbh32a: ! Alignment 3 or 7 1038*280575beSPatrick McGehearty ble,pt %ncc, .bc_medbh31 1039*280575beSPatrick McGehearty nop 1040*280575beSPatrick McGehearty.bc_medbh32: ! Alignment 3 or 7 1041*280575beSPatrick McGehearty subcc %o2, 32, %o2 ! decrement length count 1042*280575beSPatrick McGehearty 1043*280575beSPatrick McGehearty ldub [%o0], %o4 ! load and store a block of 32 bytes 1044*280575beSPatrick McGehearty sllx %o4, 56, %o3 1045*280575beSPatrick McGehearty lduw [%o0+1], %o4 1046*280575beSPatrick McGehearty sllx %o4, 24, %o4 1047*280575beSPatrick McGehearty or %o4, %o3, %o3 1048*280575beSPatrick McGehearty lduh [%o0+5], %o4 1049*280575beSPatrick McGehearty sllx %o4, 8, %o4 1050*280575beSPatrick McGehearty or %o4, %o3, %o3 1051*280575beSPatrick McGehearty ldub [%o0+7], %o4 1052*280575beSPatrick McGehearty or %o4, %o3, %o4 1053*280575beSPatrick McGehearty stx %o4, [%o1] 1054*280575beSPatrick McGehearty 1055*280575beSPatrick McGehearty ldub [%o0+8], %o4 1056*280575beSPatrick McGehearty sllx %o4, 56, %o3 1057*280575beSPatrick McGehearty lduw [%o0+9], %o4 1058*280575beSPatrick McGehearty sllx %o4, 24, %o4 1059*280575beSPatrick McGehearty or %o4, %o3, %o3 1060*280575beSPatrick McGehearty lduh [%o0+13], %o4 1061*280575beSPatrick McGehearty sllx %o4, 8, %o4 1062*280575beSPatrick McGehearty or %o4, %o3, %o3 1063*280575beSPatrick McGehearty ldub [%o0+15], %o4 1064*280575beSPatrick McGehearty or %o4, %o3, %o4 1065*280575beSPatrick McGehearty stx %o4, [%o1+8] 1066*280575beSPatrick McGehearty 1067*280575beSPatrick McGehearty ldub [%o0+16], %o4 1068*280575beSPatrick McGehearty sllx %o4, 56, %o3 1069*280575beSPatrick McGehearty lduw [%o0+17], %o4 1070*280575beSPatrick McGehearty sllx %o4, 24, %o4 1071*280575beSPatrick McGehearty or %o4, %o3, %o3 1072*280575beSPatrick McGehearty lduh [%o0+21], %o4 1073*280575beSPatrick McGehearty sllx %o4, 8, %o4 1074*280575beSPatrick McGehearty or %o4, %o3, %o3 1075*280575beSPatrick McGehearty ldub [%o0+23], %o4 1076*280575beSPatrick McGehearty or %o4, %o3, %o4 1077*280575beSPatrick McGehearty stx %o4, [%o1+16] 1078*280575beSPatrick McGehearty 1079*280575beSPatrick McGehearty add %o0, 32, %o0 ! increase src ptr by 32 1080*280575beSPatrick McGehearty add %o1, 32, %o1 ! increase dst ptr by 32 1081*280575beSPatrick McGehearty 1082*280575beSPatrick McGehearty ldub [%o0-8], %o4 1083*280575beSPatrick McGehearty sllx %o4, 56, %o3 1084*280575beSPatrick McGehearty lduw [%o0-7], %o4 1085*280575beSPatrick McGehearty sllx %o4, 24, %o4 1086*280575beSPatrick McGehearty or %o4, %o3, %o3 1087*280575beSPatrick McGehearty lduh [%o0-3], %o4 1088*280575beSPatrick McGehearty sllx %o4, 8, %o4 1089*280575beSPatrick McGehearty or %o4, %o3, %o3 1090*280575beSPatrick McGehearty ldub [%o0-1], %o4 1091*280575beSPatrick McGehearty or %o4, %o3, %o4 1092*280575beSPatrick McGehearty bgu,pt %ncc, .bc_medbh32 ! repeat if at least 32 bytes left 1093*280575beSPatrick McGehearty stx %o4, [%o1-8] 1094*280575beSPatrick McGehearty 1095*280575beSPatrick McGehearty.bc_medbh31: 1096*280575beSPatrick McGehearty addcc %o2, 24, %o2 ! adjust count to be off by 7 1097*280575beSPatrick McGehearty ble,pt %ncc, .bc_medb7 ! skip if 7 or fewer bytes left 1098*280575beSPatrick McGehearty nop ! 1099*280575beSPatrick McGehearty.bc_medbh15: 1100*280575beSPatrick McGehearty ldub [%o0], %o4 ! load and store a block of 8 bytes 1101*280575beSPatrick McGehearty sllx %o4, 56, %o3 1102*280575beSPatrick McGehearty lduw [%o0+1], %o4 1103*280575beSPatrick McGehearty sllx %o4, 24, %o4 1104*280575beSPatrick McGehearty or %o4, %o3, %o3 1105*280575beSPatrick McGehearty lduh [%o0+5], %o4 1106*280575beSPatrick McGehearty sllx %o4, 8, %o4 1107*280575beSPatrick McGehearty or %o4, %o3, %o3 1108*280575beSPatrick McGehearty ldub [%o0+7], %o4 1109*280575beSPatrick McGehearty or %o4, %o3, %o4 1110*280575beSPatrick McGehearty stx %o4, [%o1] 1111*280575beSPatrick McGehearty subcc %o2, 8, %o2 ! decrement length count 1112*280575beSPatrick McGehearty add %o1, 8, %o1 ! increase dst ptr by 8 1113*280575beSPatrick McGehearty add %o0, 8, %o0 ! increase src ptr by 8 1114*280575beSPatrick McGehearty bgu,pt %ncc, .bc_medbh15 1115*280575beSPatrick McGehearty stx %o4, [%o1-8] 1116*280575beSPatrick McGehearty ba .bc_medb7 1117*280575beSPatrick McGehearty nop 1118*280575beSPatrick McGehearty 1119*280575beSPatrick McGehearty SET_SIZE(bcopy) 1120*280575beSPatrick McGehearty/* 1121*280575beSPatrick McGehearty * The _more entry points are not intended to be used directly by 1122*280575beSPatrick McGehearty * any caller from outside this file. They are provided to allow 1123*280575beSPatrick McGehearty * profiling and dtrace of the portions of the copy code that uses 1124*280575beSPatrick McGehearty * the floating point registers. 1125*280575beSPatrick McGehearty*/ 1126*280575beSPatrick McGehearty ENTRY(bcopy_more) 1127*280575beSPatrick McGehearty.bcopy_more: 1128340af271Swh94709 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 1129340af271Swh94709 ldn [THREAD_REG + T_LOFAULT], %o5 ! save existing handler 1130340af271Swh94709 brz,pt %o5, .do_copy 1131340af271Swh94709 nop 1132340af271Swh94709 sethi %hi(.copyerr), %l7 ! copyerr is lofault value 1133340af271Swh94709 or %l7, %lo(.copyerr), %l7 1134340af271Swh94709 membar #Sync ! sync error barrier 1135340af271Swh94709 stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault 1136340af271Swh94709 ! We've already captured whether t_lofault was zero on entry. 1137340af271Swh94709 ! We need to mark ourselves as being from bcopy since both 1138*280575beSPatrick McGehearty ! kcopy and bcopy use the same code path. If LOFAULT_SET is 1139340af271Swh94709 ! set and the saved lofault was zero, we won't reset lofault on 1140340af271Swh94709 ! returning. 1141*280575beSPatrick McGehearty or %o5, LOFAULT_SET, %o5 1142*280575beSPatrick McGehearty.do_copy: 1143*280575beSPatrick McGehearty ldn [THREAD_REG + T_LWP], %o3 1144*280575beSPatrick McGehearty brnz,pt %o3, 1f 1145*280575beSPatrick McGehearty nop 1146*280575beSPatrick McGehearty/* 1147*280575beSPatrick McGehearty * kpreempt_disable(); 1148*280575beSPatrick McGehearty */ 1149*280575beSPatrick McGehearty ldsb [THREAD_REG +T_PREEMPT], %o3 1150*280575beSPatrick McGehearty inc %o3 1151*280575beSPatrick McGehearty stb %o3, [THREAD_REG + T_PREEMPT] 1152*280575beSPatrick McGehearty1: 1153*280575beSPatrick McGehearty/* 1154*280575beSPatrick McGehearty * Following code is for large copies. We know there is at 1155*280575beSPatrick McGehearty * least FP_COPY bytes available. FP regs are used, so 1156*280575beSPatrick McGehearty * we save registers and fp regs before starting 1157*280575beSPatrick McGehearty */ 1158*280575beSPatrick McGehearty rd %fprs, %g5 ! check for unused fp 1159*280575beSPatrick McGehearty or %o5,FPUSED_FLAG,%o5 1160*280575beSPatrick McGehearty ! if fprs.fef == 0, set it. 1161*280575beSPatrick McGehearty ! Setting it when already set costs more than checking 1162*280575beSPatrick McGehearty andcc %g5, FPRS_FEF, %g5 ! test FEF, fprs.du = fprs.dl = 0 1163*280575beSPatrick McGehearty bz,pt %ncc, .bc_fp_unused 1164*280575beSPatrick McGehearty prefetch [%i0 + (1 * CACHE_LINE)], #one_read 1165*280575beSPatrick McGehearty BST_FP_TOSTACK(%o3) 1166*280575beSPatrick McGehearty ba .bc_fp_ready 1167*280575beSPatrick McGehearty.bc_fp_unused: 1168*280575beSPatrick McGehearty andcc %i1, 1, %o3 ! is dest byte aligned 1169*280575beSPatrick McGehearty wr %g0, FPRS_FEF, %fprs ! fprs.fef = 1 1170*280575beSPatrick McGehearty.bc_fp_ready: 1171*280575beSPatrick McGehearty rd %gsr, %l5 ! save %gsr value 1172*280575beSPatrick McGehearty bnz,pt %ncc, .bc_big_d1 1173*280575beSPatrick McGehearty.bc_big_d1f: ! dest is now half word aligned 1174*280575beSPatrick McGehearty andcc %i1, 2, %o3 1175*280575beSPatrick McGehearty bnz,pt %ncc, .bc_big_d2 1176*280575beSPatrick McGehearty.bc_big_d2f: ! dest is now word aligned 1177*280575beSPatrick McGehearty andcc %i1, 4, %o3 1178*280575beSPatrick McGehearty bnz,pt %ncc, .bc_big_d4 1179*280575beSPatrick McGehearty.bc_big_d4f: ! dest is now long word aligned 1180*280575beSPatrick McGehearty andcc %i0, 7, %o3 ! is src long word aligned 1181*280575beSPatrick McGehearty brnz,pt %o3, .bc_big_unal8 1182*280575beSPatrick McGehearty prefetch [%i0 + (2 * CACHE_LINE)], #one_read 1183*280575beSPatrick McGehearty 1184*280575beSPatrick McGehearty ! Src and dst are long word aligned 1185*280575beSPatrick McGehearty ! align dst to 64 byte boundary 1186*280575beSPatrick McGehearty andcc %i1, 0x3f, %o3 ! %o3 == 0 means dst is 64 byte aligned 1187*280575beSPatrick McGehearty brz,pn %o3, .bc_al_to_64 1188*280575beSPatrick McGehearty nop 1189*280575beSPatrick McGehearty sub %o3, 64, %o3 ! %o3 has negative bytes to move 1190*280575beSPatrick McGehearty add %i2, %o3, %i2 ! adjust remaining count 1191*280575beSPatrick McGehearty andcc %o3, 8, %o4 ! odd long words to move? 1192*280575beSPatrick McGehearty brz,pt %o4, .bc_al_to_16 1193*280575beSPatrick McGehearty nop 1194*280575beSPatrick McGehearty add %o3, 8, %o3 1195*280575beSPatrick McGehearty ldx [%i0], %o4 1196*280575beSPatrick McGehearty add %i0, 8, %i0 ! increment src ptr 1197*280575beSPatrick McGehearty add %i1, 8, %i1 ! increment dst ptr 1198*280575beSPatrick McGehearty stx %o4, [%i1-8] 1199*280575beSPatrick McGehearty! Dest is aligned on 16 bytes, src 8 byte aligned 1200*280575beSPatrick McGehearty.bc_al_to_16: 1201*280575beSPatrick McGehearty andcc %o3, 0x30, %o4 ! pair of long words to move? 1202*280575beSPatrick McGehearty brz,pt %o4, .bc_al_to_64 1203*280575beSPatrick McGehearty nop 1204*280575beSPatrick McGehearty.bc_al_mv_16: 1205*280575beSPatrick McGehearty add %o3, 16, %o3 1206*280575beSPatrick McGehearty ldx [%i0], %o4 1207*280575beSPatrick McGehearty stx %o4, [%i1] 1208*280575beSPatrick McGehearty ldx [%i0+8], %o4 1209*280575beSPatrick McGehearty add %i0, 16, %i0 ! increment src ptr 1210*280575beSPatrick McGehearty stx %o4, [%i1+8] 1211*280575beSPatrick McGehearty andcc %o3, 48, %o4 1212*280575beSPatrick McGehearty brnz,pt %o4, .bc_al_mv_16 1213*280575beSPatrick McGehearty add %i1, 16, %i1 ! increment dst ptr 1214*280575beSPatrick McGehearty! Dest is aligned on 64 bytes, src 8 byte aligned 1215*280575beSPatrick McGehearty.bc_al_to_64: 1216*280575beSPatrick McGehearty ! Determine source alignment 1217*280575beSPatrick McGehearty ! to correct 8 byte offset 1218*280575beSPatrick McGehearty andcc %i0, 32, %o3 1219*280575beSPatrick McGehearty brnz,pn %o3, .bc_aln_1 1220*280575beSPatrick McGehearty andcc %i0, 16, %o3 1221*280575beSPatrick McGehearty brnz,pn %o3, .bc_aln_01 1222*280575beSPatrick McGehearty andcc %i0, 8, %o3 1223*280575beSPatrick McGehearty brz,pn %o3, .bc_aln_000 1224*280575beSPatrick McGehearty prefetch [%i0 + (3 * CACHE_LINE)], #one_read 1225*280575beSPatrick McGehearty ba .bc_aln_001 1226*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 1227*280575beSPatrick McGehearty 1228*280575beSPatrick McGehearty.bc_aln_01: 1229*280575beSPatrick McGehearty brnz,pn %o3, .bc_aln_011 1230*280575beSPatrick McGehearty prefetch [%i0 + (3 * CACHE_LINE)], #one_read 1231*280575beSPatrick McGehearty ba .bc_aln_010 1232*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 1233*280575beSPatrick McGehearty.bc_aln_1: 1234*280575beSPatrick McGehearty andcc %i0, 16, %o3 1235*280575beSPatrick McGehearty brnz,pn %o3, .bc_aln_11 1236*280575beSPatrick McGehearty andcc %i0, 8, %o3 1237*280575beSPatrick McGehearty brnz,pn %o3, .bc_aln_101 1238*280575beSPatrick McGehearty prefetch [%i0 + (3 * CACHE_LINE)], #one_read 1239*280575beSPatrick McGehearty ba .bc_aln_100 1240*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 1241*280575beSPatrick McGehearty.bc_aln_11: 1242*280575beSPatrick McGehearty brz,pn %o3, .bc_aln_110 1243*280575beSPatrick McGehearty prefetch [%i0 + (3 * CACHE_LINE)], #one_read 1244*280575beSPatrick McGehearty 1245*280575beSPatrick McGehearty.bc_aln_111: 1246*280575beSPatrick McGehearty! Alignment off by 8 bytes 1247*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 1248*280575beSPatrick McGehearty ldd [%i0], %d0 1249*280575beSPatrick McGehearty add %i0, 8, %i0 1250*280575beSPatrick McGehearty sub %i2, 8, %i2 1251*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 1252*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 1253*280575beSPatrick McGehearty sub %i1, %i0, %i1 1254*280575beSPatrick McGehearty.bc_aln_111_loop: 1255*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_P,%d16 ! block load 1256*280575beSPatrick McGehearty subcc %o3, 64, %o3 1257*280575beSPatrick McGehearty fmovd %d16, %d2 1258*280575beSPatrick McGehearty fmovd %d18, %d4 1259*280575beSPatrick McGehearty fmovd %d20, %d6 1260*280575beSPatrick McGehearty fmovd %d22, %d8 1261*280575beSPatrick McGehearty fmovd %d24, %d10 1262*280575beSPatrick McGehearty fmovd %d26, %d12 1263*280575beSPatrick McGehearty fmovd %d28, %d14 1264*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_P ! block initializing store 1265*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_P 1266*280575beSPatrick McGehearty add %i0, 64, %i0 1267*280575beSPatrick McGehearty fmovd %d30, %d0 1268*280575beSPatrick McGehearty bgt,pt %ncc, .bc_aln_111_loop 1269*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 1270*280575beSPatrick McGehearty add %i1, %i0, %i1 1271*280575beSPatrick McGehearty 1272*280575beSPatrick McGehearty std %d0, [%i1] 1273*280575beSPatrick McGehearty ba .bc_remain_stuff 1274*280575beSPatrick McGehearty add %i1, 8, %i1 1275*280575beSPatrick McGehearty ! END OF aln_111 1276*280575beSPatrick McGehearty 1277*280575beSPatrick McGehearty.bc_aln_110: 1278*280575beSPatrick McGehearty! Alignment off by 16 bytes 1279*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 1280*280575beSPatrick McGehearty ldd [%i0], %d0 1281*280575beSPatrick McGehearty ldd [%i0+8], %d2 1282*280575beSPatrick McGehearty add %i0, 16, %i0 1283*280575beSPatrick McGehearty sub %i2, 16, %i2 1284*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 1285*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 1286*280575beSPatrick McGehearty sub %i1, %i0, %i1 1287*280575beSPatrick McGehearty.bc_aln_110_loop: 1288*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_P,%d16 ! block load 1289*280575beSPatrick McGehearty subcc %o3, 64, %o3 1290*280575beSPatrick McGehearty fmovd %d16, %d4 1291*280575beSPatrick McGehearty fmovd %d18, %d6 1292*280575beSPatrick McGehearty fmovd %d20, %d8 1293*280575beSPatrick McGehearty fmovd %d22, %d10 1294*280575beSPatrick McGehearty fmovd %d24, %d12 1295*280575beSPatrick McGehearty fmovd %d26, %d14 1296*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_P ! block initializing store 1297*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_P 1298*280575beSPatrick McGehearty add %i0, 64, %i0 1299*280575beSPatrick McGehearty fmovd %d28, %d0 1300*280575beSPatrick McGehearty fmovd %d30, %d2 1301*280575beSPatrick McGehearty bgt,pt %ncc, .bc_aln_110_loop 1302*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 1303*280575beSPatrick McGehearty add %i1, %i0, %i1 1304*280575beSPatrick McGehearty 1305*280575beSPatrick McGehearty std %d0, [%i1] 1306*280575beSPatrick McGehearty std %d2, [%i1+8] 1307*280575beSPatrick McGehearty ba .bc_remain_stuff 1308*280575beSPatrick McGehearty add %i1, 16, %i1 1309*280575beSPatrick McGehearty ! END OF aln_110 1310*280575beSPatrick McGehearty 1311*280575beSPatrick McGehearty.bc_aln_101: 1312*280575beSPatrick McGehearty! Alignment off by 24 bytes 1313*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 1314*280575beSPatrick McGehearty ldd [%i0], %d0 1315*280575beSPatrick McGehearty ldd [%i0+8], %d2 1316*280575beSPatrick McGehearty ldd [%i0+16], %d4 1317*280575beSPatrick McGehearty add %i0, 24, %i0 1318*280575beSPatrick McGehearty sub %i2, 24, %i2 1319*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 1320*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 1321*280575beSPatrick McGehearty sub %i1, %i0, %i1 1322*280575beSPatrick McGehearty.bc_aln_101_loop: 1323*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_P,%d16 ! block load 1324*280575beSPatrick McGehearty subcc %o3, 64, %o3 1325*280575beSPatrick McGehearty fmovd %d16, %d6 1326*280575beSPatrick McGehearty fmovd %d18, %d8 1327*280575beSPatrick McGehearty fmovd %d20, %d10 1328*280575beSPatrick McGehearty fmovd %d22, %d12 1329*280575beSPatrick McGehearty fmovd %d24, %d14 1330*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_P ! block initializing store 1331*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_P 1332*280575beSPatrick McGehearty add %i0, 64, %i0 1333*280575beSPatrick McGehearty fmovd %d26, %d0 1334*280575beSPatrick McGehearty fmovd %d28, %d2 1335*280575beSPatrick McGehearty fmovd %d30, %d4 1336*280575beSPatrick McGehearty bgt,pt %ncc, .bc_aln_101_loop 1337*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 1338*280575beSPatrick McGehearty add %i1, %i0, %i1 1339*280575beSPatrick McGehearty 1340*280575beSPatrick McGehearty std %d0, [%i1] 1341*280575beSPatrick McGehearty std %d2, [%i1+8] 1342*280575beSPatrick McGehearty std %d4, [%i1+16] 1343*280575beSPatrick McGehearty ba .bc_remain_stuff 1344*280575beSPatrick McGehearty add %i1, 24, %i1 1345*280575beSPatrick McGehearty ! END OF aln_101 1346*280575beSPatrick McGehearty 1347*280575beSPatrick McGehearty.bc_aln_100: 1348*280575beSPatrick McGehearty! Alignment off by 32 bytes 1349*280575beSPatrick McGehearty ldd [%i0], %d0 1350*280575beSPatrick McGehearty ldd [%i0+8], %d2 1351*280575beSPatrick McGehearty ldd [%i0+16],%d4 1352*280575beSPatrick McGehearty ldd [%i0+24],%d6 1353*280575beSPatrick McGehearty add %i0, 32, %i0 1354*280575beSPatrick McGehearty sub %i2, 32, %i2 1355*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 1356*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 1357*280575beSPatrick McGehearty sub %i1, %i0, %i1 1358*280575beSPatrick McGehearty.bc_aln_100_loop: 1359*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_P,%d16 ! block load 1360*280575beSPatrick McGehearty subcc %o3, 64, %o3 1361*280575beSPatrick McGehearty fmovd %d16, %d8 1362*280575beSPatrick McGehearty fmovd %d18, %d10 1363*280575beSPatrick McGehearty fmovd %d20, %d12 1364*280575beSPatrick McGehearty fmovd %d22, %d14 1365*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_P ! block initializing store 1366*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_P 1367*280575beSPatrick McGehearty add %i0, 64, %i0 1368*280575beSPatrick McGehearty fmovd %d24, %d0 1369*280575beSPatrick McGehearty fmovd %d26, %d2 1370*280575beSPatrick McGehearty fmovd %d28, %d4 1371*280575beSPatrick McGehearty fmovd %d30, %d6 1372*280575beSPatrick McGehearty bgt,pt %ncc, .bc_aln_100_loop 1373*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 1374*280575beSPatrick McGehearty add %i1, %i0, %i1 1375*280575beSPatrick McGehearty 1376*280575beSPatrick McGehearty std %d0, [%i1] 1377*280575beSPatrick McGehearty std %d2, [%i1+8] 1378*280575beSPatrick McGehearty std %d4, [%i1+16] 1379*280575beSPatrick McGehearty std %d6, [%i1+24] 1380*280575beSPatrick McGehearty ba .bc_remain_stuff 1381*280575beSPatrick McGehearty add %i1, 32, %i1 1382*280575beSPatrick McGehearty ! END OF aln_100 1383*280575beSPatrick McGehearty 1384*280575beSPatrick McGehearty.bc_aln_011: 1385*280575beSPatrick McGehearty! Alignment off by 40 bytes 1386*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 1387*280575beSPatrick McGehearty ldd [%i0], %d0 1388*280575beSPatrick McGehearty ldd [%i0+8], %d2 1389*280575beSPatrick McGehearty ldd [%i0+16], %d4 1390*280575beSPatrick McGehearty ldd [%i0+24], %d6 1391*280575beSPatrick McGehearty ldd [%i0+32], %d8 1392*280575beSPatrick McGehearty add %i0, 40, %i0 1393*280575beSPatrick McGehearty sub %i2, 40, %i2 1394*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 1395*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 1396*280575beSPatrick McGehearty sub %i1, %i0, %i1 1397*280575beSPatrick McGehearty.bc_aln_011_loop: 1398*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_P,%d16 ! block load 1399*280575beSPatrick McGehearty subcc %o3, 64, %o3 1400*280575beSPatrick McGehearty fmovd %d16, %d10 1401*280575beSPatrick McGehearty fmovd %d18, %d12 1402*280575beSPatrick McGehearty fmovd %d20, %d14 1403*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_P ! block initializing store 1404*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_P 1405*280575beSPatrick McGehearty add %i0, 64, %i0 1406*280575beSPatrick McGehearty fmovd %d22, %d0 1407*280575beSPatrick McGehearty fmovd %d24, %d2 1408*280575beSPatrick McGehearty fmovd %d26, %d4 1409*280575beSPatrick McGehearty fmovd %d28, %d6 1410*280575beSPatrick McGehearty fmovd %d30, %d8 1411*280575beSPatrick McGehearty bgt,pt %ncc, .bc_aln_011_loop 1412*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 1413*280575beSPatrick McGehearty add %i1, %i0, %i1 1414*280575beSPatrick McGehearty 1415*280575beSPatrick McGehearty std %d0, [%i1] 1416*280575beSPatrick McGehearty std %d2, [%i1+8] 1417*280575beSPatrick McGehearty std %d4, [%i1+16] 1418*280575beSPatrick McGehearty std %d6, [%i1+24] 1419*280575beSPatrick McGehearty std %d8, [%i1+32] 1420*280575beSPatrick McGehearty ba .bc_remain_stuff 1421*280575beSPatrick McGehearty add %i1, 40, %i1 1422*280575beSPatrick McGehearty ! END OF aln_011 1423*280575beSPatrick McGehearty 1424*280575beSPatrick McGehearty.bc_aln_010: 1425*280575beSPatrick McGehearty! Alignment off by 48 bytes 1426*280575beSPatrick McGehearty ldd [%i0], %d0 1427*280575beSPatrick McGehearty ldd [%i0+8], %d2 1428*280575beSPatrick McGehearty ldd [%i0+16], %d4 1429*280575beSPatrick McGehearty ldd [%i0+24], %d6 1430*280575beSPatrick McGehearty ldd [%i0+32], %d8 1431*280575beSPatrick McGehearty ldd [%i0+40], %d10 1432*280575beSPatrick McGehearty add %i0, 48, %i0 1433*280575beSPatrick McGehearty sub %i2, 48, %i2 1434*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 1435*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 1436*280575beSPatrick McGehearty sub %i1, %i0, %i1 1437*280575beSPatrick McGehearty.bc_aln_010_loop: 1438*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_P,%d16 ! block load 1439*280575beSPatrick McGehearty subcc %o3, 64, %o3 1440*280575beSPatrick McGehearty fmovd %d16, %d12 1441*280575beSPatrick McGehearty fmovd %d18, %d14 1442*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_P ! block initializing store 1443*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_P 1444*280575beSPatrick McGehearty add %i0, 64, %i0 1445*280575beSPatrick McGehearty fmovd %d20, %d0 1446*280575beSPatrick McGehearty fmovd %d22, %d2 1447*280575beSPatrick McGehearty fmovd %d24, %d4 1448*280575beSPatrick McGehearty fmovd %d26, %d6 1449*280575beSPatrick McGehearty fmovd %d28, %d8 1450*280575beSPatrick McGehearty fmovd %d30, %d10 1451*280575beSPatrick McGehearty bgt,pt %ncc, .bc_aln_010_loop 1452*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 1453*280575beSPatrick McGehearty add %i1, %i0, %i1 1454*280575beSPatrick McGehearty 1455*280575beSPatrick McGehearty std %d0, [%i1] 1456*280575beSPatrick McGehearty std %d2, [%i1+8] 1457*280575beSPatrick McGehearty std %d4, [%i1+16] 1458*280575beSPatrick McGehearty std %d6, [%i1+24] 1459*280575beSPatrick McGehearty std %d8, [%i1+32] 1460*280575beSPatrick McGehearty std %d10, [%i1+40] 1461*280575beSPatrick McGehearty ba .bc_remain_stuff 1462*280575beSPatrick McGehearty add %i1, 48, %i1 1463*280575beSPatrick McGehearty ! END OF aln_010 1464*280575beSPatrick McGehearty 1465*280575beSPatrick McGehearty.bc_aln_001: 1466*280575beSPatrick McGehearty! Alignment off by 56 bytes 1467*280575beSPatrick McGehearty ldd [%i0], %d0 1468*280575beSPatrick McGehearty ldd [%i0+8], %d2 1469*280575beSPatrick McGehearty ldd [%i0+16], %d4 1470*280575beSPatrick McGehearty ldd [%i0+24], %d6 1471*280575beSPatrick McGehearty ldd [%i0+32], %d8 1472*280575beSPatrick McGehearty ldd [%i0+40], %d10 1473*280575beSPatrick McGehearty ldd [%i0+48], %d12 1474*280575beSPatrick McGehearty add %i0, 56, %i0 1475*280575beSPatrick McGehearty sub %i2, 56, %i2 1476*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 1477*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 1478*280575beSPatrick McGehearty sub %i1, %i0, %i1 1479*280575beSPatrick McGehearty.bc_aln_001_loop: 1480*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_P,%d16 ! block load 1481*280575beSPatrick McGehearty subcc %o3, 64, %o3 1482*280575beSPatrick McGehearty fmovd %d16, %d14 1483*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_P ! block initializing store 1484*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_P 1485*280575beSPatrick McGehearty add %i0, 64, %i0 1486*280575beSPatrick McGehearty fmovd %d18, %d0 1487*280575beSPatrick McGehearty fmovd %d20, %d2 1488*280575beSPatrick McGehearty fmovd %d22, %d4 1489*280575beSPatrick McGehearty fmovd %d24, %d6 1490*280575beSPatrick McGehearty fmovd %d26, %d8 1491*280575beSPatrick McGehearty fmovd %d28, %d10 1492*280575beSPatrick McGehearty fmovd %d30, %d12 1493*280575beSPatrick McGehearty bgt,pt %ncc, .bc_aln_001_loop 1494*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 1495*280575beSPatrick McGehearty add %i1, %i0, %i1 1496*280575beSPatrick McGehearty 1497*280575beSPatrick McGehearty std %d0, [%i1] 1498*280575beSPatrick McGehearty std %d2, [%i1+8] 1499*280575beSPatrick McGehearty std %d4, [%i1+16] 1500*280575beSPatrick McGehearty std %d6, [%i1+24] 1501*280575beSPatrick McGehearty std %d8, [%i1+32] 1502*280575beSPatrick McGehearty std %d10, [%i1+40] 1503*280575beSPatrick McGehearty std %d12, [%i1+48] 1504*280575beSPatrick McGehearty ba .bc_remain_stuff 1505*280575beSPatrick McGehearty add %i1, 56, %i1 1506*280575beSPatrick McGehearty ! END OF aln_001 1507*280575beSPatrick McGehearty 1508*280575beSPatrick McGehearty.bc_aln_000: 1509*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 1510*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 1511*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 1512*280575beSPatrick McGehearty sub %i1, %i0, %i1 1513*280575beSPatrick McGehearty.bc_aln_000_loop: 1514*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_P,%d0 1515*280575beSPatrick McGehearty subcc %o3, 64, %o3 1516*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_P ! block initializing store 1517*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_P 1518*280575beSPatrick McGehearty add %i0, 64, %i0 1519*280575beSPatrick McGehearty bgt,pt %ncc, .bc_aln_000_loop 1520*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 1521*280575beSPatrick McGehearty add %i1, %i0, %i1 1522*280575beSPatrick McGehearty 1523*280575beSPatrick McGehearty ! END OF aln_000 1524*280575beSPatrick McGehearty 1525*280575beSPatrick McGehearty.bc_remain_stuff: 1526*280575beSPatrick McGehearty subcc %i2, 31, %i2 ! adjust length to allow cc test 1527*280575beSPatrick McGehearty ble,pt %ncc, .bc_aln_31 1528*280575beSPatrick McGehearty nop 1529*280575beSPatrick McGehearty.bc_aln_32: 1530*280575beSPatrick McGehearty ldx [%i0], %o4 ! move 32 bytes 1531*280575beSPatrick McGehearty subcc %i2, 32, %i2 ! decrement length count by 32 1532*280575beSPatrick McGehearty stx %o4, [%i1] 1533*280575beSPatrick McGehearty ldx [%i0+8], %o4 1534*280575beSPatrick McGehearty stx %o4, [%i1+8] 1535*280575beSPatrick McGehearty ldx [%i0+16], %o4 1536*280575beSPatrick McGehearty add %i0, 32, %i0 ! increase src ptr by 32 1537*280575beSPatrick McGehearty stx %o4, [%i1+16] 1538*280575beSPatrick McGehearty ldx [%i0-8], %o4 1539*280575beSPatrick McGehearty add %i1, 32, %i1 ! increase dst ptr by 32 1540*280575beSPatrick McGehearty bgu,pt %ncc, .bc_aln_32 ! repeat if at least 32 bytes left 1541*280575beSPatrick McGehearty stx %o4, [%i1-8] 1542*280575beSPatrick McGehearty.bc_aln_31: 1543*280575beSPatrick McGehearty addcc %i2, 24, %i2 ! adjust count to be off by 7 1544*280575beSPatrick McGehearty ble,pt %ncc, .bc_aln_7 ! skip if 7 or fewer bytes left 1545*280575beSPatrick McGehearty nop ! 1546*280575beSPatrick McGehearty.bc_aln_15: 1547*280575beSPatrick McGehearty ldx [%i0], %o4 ! move 8 bytes 1548*280575beSPatrick McGehearty add %i0, 8, %i0 ! increase src ptr by 8 1549*280575beSPatrick McGehearty subcc %i2, 8, %i2 ! decrease count by 8 1550*280575beSPatrick McGehearty add %i1, 8, %i1 ! increase dst ptr by 8 1551*280575beSPatrick McGehearty bgu,pt %ncc, .bc_aln_15 1552*280575beSPatrick McGehearty stx %o4, [%i1-8] ! 1553*280575beSPatrick McGehearty.bc_aln_7: 1554*280575beSPatrick McGehearty addcc %i2, 7, %i2 ! finish adjustment of remaining count 1555*280575beSPatrick McGehearty bz,pt %ncc, .bc_exit ! exit if finished 1556*280575beSPatrick McGehearty cmp %i2, 4 1557*280575beSPatrick McGehearty blt,pt %ncc, .bc_unaln3x ! skip if less than 4 bytes left 1558*280575beSPatrick McGehearty nop ! 1559*280575beSPatrick McGehearty ld [%i0], %o4 ! move 4 bytes 1560*280575beSPatrick McGehearty add %i0, 4, %i0 ! increase src ptr by 4 1561*280575beSPatrick McGehearty add %i1, 4, %i1 ! increase dst ptr by 4 1562*280575beSPatrick McGehearty subcc %i2, 4, %i2 ! decrease count by 4 1563*280575beSPatrick McGehearty bnz .bc_unaln3x 1564*280575beSPatrick McGehearty stw %o4, [%i1-4] 1565*280575beSPatrick McGehearty ba .bc_exit 1566*280575beSPatrick McGehearty nop 1567*280575beSPatrick McGehearty 1568*280575beSPatrick McGehearty ! destination alignment code 1569*280575beSPatrick McGehearty.bc_big_d1: 1570*280575beSPatrick McGehearty ldub [%i0], %o4 ! move a byte 1571*280575beSPatrick McGehearty add %i0, 1, %i0 1572*280575beSPatrick McGehearty stb %o4, [%i1] 1573*280575beSPatrick McGehearty add %i1, 1, %i1 1574*280575beSPatrick McGehearty andcc %i1, 2, %o3 1575*280575beSPatrick McGehearty bz,pt %ncc, .bc_big_d2f 1576*280575beSPatrick McGehearty sub %i2, 1, %i2 1577*280575beSPatrick McGehearty.bc_big_d2: 1578*280575beSPatrick McGehearty ldub [%i0], %o4 ! move a half-word (src align unknown) 1579*280575beSPatrick McGehearty ldub [%i0+1], %o3 1580*280575beSPatrick McGehearty add %i0, 2, %i0 1581*280575beSPatrick McGehearty sll %o4, 8, %o4 ! position 1582*280575beSPatrick McGehearty or %o4, %o3, %o4 ! merge 1583*280575beSPatrick McGehearty sth %o4, [%i1] 1584*280575beSPatrick McGehearty add %i1, 2, %i1 1585*280575beSPatrick McGehearty andcc %i1, 4, %o3 1586*280575beSPatrick McGehearty bz,pt %ncc, .bc_big_d4f 1587*280575beSPatrick McGehearty sub %i2, 2, %i2 1588*280575beSPatrick McGehearty.bc_big_d4: 1589*280575beSPatrick McGehearty ldub [%i0], %o4 ! move a word (src align unknown) 1590*280575beSPatrick McGehearty ldub [%i0+1], %o3 1591*280575beSPatrick McGehearty sll %o4, 24, %o4 ! position 1592*280575beSPatrick McGehearty sll %o3, 16, %o3 ! position 1593*280575beSPatrick McGehearty or %o4, %o3, %o3 ! merge 1594*280575beSPatrick McGehearty ldub [%i0+2], %o4 1595*280575beSPatrick McGehearty sll %o4, 8, %o4 ! position 1596*280575beSPatrick McGehearty or %o4, %o3, %o3 ! merge 1597*280575beSPatrick McGehearty ldub [%i0+3], %o4 1598*280575beSPatrick McGehearty or %o4, %o3, %o4 ! merge 1599*280575beSPatrick McGehearty stw %o4,[%i1] ! store four bytes 1600*280575beSPatrick McGehearty add %i0, 4, %i0 ! adjust src by 4 1601*280575beSPatrick McGehearty add %i1, 4, %i1 ! adjust dest by 4 1602*280575beSPatrick McGehearty ba .bc_big_d4f 1603*280575beSPatrick McGehearty sub %i2, 4, %i2 ! adjust count by 4 1604*280575beSPatrick McGehearty 1605*280575beSPatrick McGehearty 1606*280575beSPatrick McGehearty ! Dst is on 8 byte boundary; src is not; 1607*280575beSPatrick McGehearty.bc_big_unal8: 1608*280575beSPatrick McGehearty andcc %i1, 0x3f, %o3 ! is dst 64-byte block aligned? 1609*280575beSPatrick McGehearty bz %ncc, .bc_unalnsrc 1610*280575beSPatrick McGehearty sub %o3, 64, %o3 ! %o3 will be multiple of 8 1611*280575beSPatrick McGehearty neg %o3 ! bytes until dest is 64 byte aligned 1612*280575beSPatrick McGehearty sub %i2, %o3, %i2 ! update cnt with bytes to be moved 1613*280575beSPatrick McGehearty ! Move bytes according to source alignment 1614*280575beSPatrick McGehearty andcc %i0, 0x1, %o4 1615*280575beSPatrick McGehearty bnz %ncc, .bc_unalnbyte ! check for byte alignment 1616*280575beSPatrick McGehearty nop 1617*280575beSPatrick McGehearty andcc %i0, 2, %o4 ! check for half word alignment 1618*280575beSPatrick McGehearty bnz %ncc, .bc_unalnhalf 1619*280575beSPatrick McGehearty nop 1620*280575beSPatrick McGehearty ! Src is word aligned, move bytes until dest 64 byte aligned 1621*280575beSPatrick McGehearty.bc_unalnword: 1622*280575beSPatrick McGehearty ld [%i0], %o4 ! load 4 bytes 1623*280575beSPatrick McGehearty stw %o4, [%i1] ! and store 4 bytes 1624*280575beSPatrick McGehearty ld [%i0+4], %o4 ! load 4 bytes 1625*280575beSPatrick McGehearty add %i0, 8, %i0 ! increase src ptr by 8 1626*280575beSPatrick McGehearty stw %o4, [%i1+4] ! and store 4 bytes 1627*280575beSPatrick McGehearty subcc %o3, 8, %o3 ! decrease count by 8 1628*280575beSPatrick McGehearty bnz %ncc, .bc_unalnword 1629*280575beSPatrick McGehearty add %i1, 8, %i1 ! increase dst ptr by 8 1630*280575beSPatrick McGehearty ba .bc_unalnsrc 1631*280575beSPatrick McGehearty nop 1632*280575beSPatrick McGehearty 1633*280575beSPatrick McGehearty ! Src is half-word aligned, move bytes until dest 64 byte aligned 1634*280575beSPatrick McGehearty.bc_unalnhalf: 1635*280575beSPatrick McGehearty lduh [%i0], %o4 ! load 2 bytes 1636*280575beSPatrick McGehearty sllx %o4, 32, %i3 ! shift left 1637*280575beSPatrick McGehearty lduw [%i0+2], %o4 1638*280575beSPatrick McGehearty or %o4, %i3, %i3 1639*280575beSPatrick McGehearty sllx %i3, 16, %i3 1640*280575beSPatrick McGehearty lduh [%i0+6], %o4 1641*280575beSPatrick McGehearty or %o4, %i3, %i3 1642*280575beSPatrick McGehearty stx %i3, [%i1] 1643*280575beSPatrick McGehearty add %i0, 8, %i0 1644*280575beSPatrick McGehearty subcc %o3, 8, %o3 1645*280575beSPatrick McGehearty bnz %ncc, .bc_unalnhalf 1646*280575beSPatrick McGehearty add %i1, 8, %i1 1647*280575beSPatrick McGehearty ba .bc_unalnsrc 1648*280575beSPatrick McGehearty nop 1649*280575beSPatrick McGehearty 1650*280575beSPatrick McGehearty ! Src is Byte aligned, move bytes until dest 64 byte aligned 1651*280575beSPatrick McGehearty.bc_unalnbyte: 1652*280575beSPatrick McGehearty sub %i1, %i0, %i1 ! share pointer advance 1653*280575beSPatrick McGehearty.bc_unalnbyte_loop: 1654*280575beSPatrick McGehearty ldub [%i0], %o4 1655*280575beSPatrick McGehearty sllx %o4, 56, %i3 1656*280575beSPatrick McGehearty lduh [%i0+1], %o4 1657*280575beSPatrick McGehearty sllx %o4, 40, %o4 1658*280575beSPatrick McGehearty or %o4, %i3, %i3 1659*280575beSPatrick McGehearty lduh [%i0+3], %o4 1660*280575beSPatrick McGehearty sllx %o4, 24, %o4 1661*280575beSPatrick McGehearty or %o4, %i3, %i3 1662*280575beSPatrick McGehearty lduh [%i0+5], %o4 1663*280575beSPatrick McGehearty sllx %o4, 8, %o4 1664*280575beSPatrick McGehearty or %o4, %i3, %i3 1665*280575beSPatrick McGehearty ldub [%i0+7], %o4 1666*280575beSPatrick McGehearty or %o4, %i3, %i3 1667*280575beSPatrick McGehearty stx %i3, [%i1+%i0] 1668*280575beSPatrick McGehearty subcc %o3, 8, %o3 1669*280575beSPatrick McGehearty bnz %ncc, .bc_unalnbyte_loop 1670*280575beSPatrick McGehearty add %i0, 8, %i0 1671*280575beSPatrick McGehearty add %i1,%i0, %i1 ! restore pointer 1672*280575beSPatrick McGehearty 1673*280575beSPatrick McGehearty ! Destination is now block (64 byte aligned), src is not 8 byte aligned 1674*280575beSPatrick McGehearty.bc_unalnsrc: 1675*280575beSPatrick McGehearty andn %i2, 0x3f, %i3 ! %i3 is multiple of block size 1676*280575beSPatrick McGehearty and %i2, 0x3f, %i2 ! residue bytes in %i2 1677*280575beSPatrick McGehearty add %i2, 64, %i2 ! Insure we don't load beyond 1678*280575beSPatrick McGehearty sub %i3, 64, %i3 ! end of source buffer 1679*280575beSPatrick McGehearty 1680*280575beSPatrick McGehearty andn %i0, 0x3f, %o4 ! %o4 has block aligned src address 1681*280575beSPatrick McGehearty prefetch [%o4 + (3 * CACHE_LINE)], #one_read 1682*280575beSPatrick McGehearty alignaddr %i0, %g0, %g0 ! generate %gsr 1683*280575beSPatrick McGehearty add %i0, %i3, %i0 ! advance %i0 to after blocks 1684*280575beSPatrick McGehearty ! 1685*280575beSPatrick McGehearty ! Determine source alignment to correct 8 byte offset 1686*280575beSPatrick McGehearty andcc %i0, 0x20, %o3 1687*280575beSPatrick McGehearty brnz,pn %o3, .bc_unaln_1 1688*280575beSPatrick McGehearty andcc %i0, 0x10, %o3 1689*280575beSPatrick McGehearty brnz,pn %o3, .bc_unaln_01 1690*280575beSPatrick McGehearty andcc %i0, 0x08, %o3 1691*280575beSPatrick McGehearty brz,a %o3, .bc_unaln_000 1692*280575beSPatrick McGehearty prefetch [%o4 + (4 * CACHE_LINE)], #one_read 1693*280575beSPatrick McGehearty ba .bc_unaln_001 1694*280575beSPatrick McGehearty nop 1695*280575beSPatrick McGehearty.bc_unaln_01: 1696*280575beSPatrick McGehearty brnz,a %o3, .bc_unaln_011 1697*280575beSPatrick McGehearty prefetch [%o4 + (4 * CACHE_LINE)], #one_read 1698*280575beSPatrick McGehearty ba .bc_unaln_010 1699*280575beSPatrick McGehearty nop 1700*280575beSPatrick McGehearty.bc_unaln_1: 1701*280575beSPatrick McGehearty brnz,pn %o3, .bc_unaln_11 1702*280575beSPatrick McGehearty andcc %i0, 0x08, %o3 1703*280575beSPatrick McGehearty brnz,a %o3, .bc_unaln_101 1704*280575beSPatrick McGehearty prefetch [%o4 + (4 * CACHE_LINE)], #one_read 1705*280575beSPatrick McGehearty ba .bc_unaln_100 1706*280575beSPatrick McGehearty nop 1707*280575beSPatrick McGehearty.bc_unaln_11: 1708*280575beSPatrick McGehearty brz,pn %o3, .bc_unaln_110 1709*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 1710*280575beSPatrick McGehearty 1711*280575beSPatrick McGehearty.bc_unaln_111: 1712*280575beSPatrick McGehearty ldd [%o4+56], %d14 1713*280575beSPatrick McGehearty.bc_unaln_111_loop: 1714*280575beSPatrick McGehearty add %o4, 64, %o4 1715*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_P, %d16 1716*280575beSPatrick McGehearty faligndata %d14, %d16, %d48 1717*280575beSPatrick McGehearty faligndata %d16, %d18, %d50 1718*280575beSPatrick McGehearty faligndata %d18, %d20, %d52 1719*280575beSPatrick McGehearty faligndata %d20, %d22, %d54 1720*280575beSPatrick McGehearty faligndata %d22, %d24, %d56 1721*280575beSPatrick McGehearty faligndata %d24, %d26, %d58 1722*280575beSPatrick McGehearty faligndata %d26, %d28, %d60 1723*280575beSPatrick McGehearty faligndata %d28, %d30, %d62 1724*280575beSPatrick McGehearty fmovd %d30, %d14 1725*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_P 1726*280575beSPatrick McGehearty subcc %i3, 64, %i3 1727*280575beSPatrick McGehearty add %i1, 64, %i1 1728*280575beSPatrick McGehearty bgu,pt %ncc, .bc_unaln_111_loop 1729*280575beSPatrick McGehearty prefetch [%o4 + (4 * CACHE_LINE)], #one_read 1730*280575beSPatrick McGehearty ba .bc_unaln_done 1731*280575beSPatrick McGehearty nop 1732*280575beSPatrick McGehearty 1733*280575beSPatrick McGehearty.bc_unaln_110: 1734*280575beSPatrick McGehearty ldd [%o4+48], %d12 1735*280575beSPatrick McGehearty ldd [%o4+56], %d14 1736*280575beSPatrick McGehearty.bc_unaln_110_loop: 1737*280575beSPatrick McGehearty add %o4, 64, %o4 1738*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_P, %d16 1739*280575beSPatrick McGehearty faligndata %d12, %d14, %d48 1740*280575beSPatrick McGehearty faligndata %d14, %d16, %d50 1741*280575beSPatrick McGehearty faligndata %d16, %d18, %d52 1742*280575beSPatrick McGehearty faligndata %d18, %d20, %d54 1743*280575beSPatrick McGehearty faligndata %d20, %d22, %d56 1744*280575beSPatrick McGehearty faligndata %d22, %d24, %d58 1745*280575beSPatrick McGehearty faligndata %d24, %d26, %d60 1746*280575beSPatrick McGehearty faligndata %d26, %d28, %d62 1747*280575beSPatrick McGehearty fmovd %d28, %d12 1748*280575beSPatrick McGehearty fmovd %d30, %d14 1749*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_P 1750*280575beSPatrick McGehearty subcc %i3, 64, %i3 1751*280575beSPatrick McGehearty add %i1, 64, %i1 1752*280575beSPatrick McGehearty bgu,pt %ncc, .bc_unaln_110_loop 1753*280575beSPatrick McGehearty prefetch [%o4 + (4 * CACHE_LINE)], #one_read 1754*280575beSPatrick McGehearty ba .bc_unaln_done 1755*280575beSPatrick McGehearty nop 1756*280575beSPatrick McGehearty 1757*280575beSPatrick McGehearty.bc_unaln_101: 1758*280575beSPatrick McGehearty ldd [%o4+40], %d10 1759*280575beSPatrick McGehearty ldd [%o4+48], %d12 1760*280575beSPatrick McGehearty ldd [%o4+56], %d14 1761*280575beSPatrick McGehearty.bc_unaln_101_loop: 1762*280575beSPatrick McGehearty add %o4, 64, %o4 1763*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_P, %d16 1764*280575beSPatrick McGehearty faligndata %d10, %d12, %d48 1765*280575beSPatrick McGehearty faligndata %d12, %d14, %d50 1766*280575beSPatrick McGehearty faligndata %d14, %d16, %d52 1767*280575beSPatrick McGehearty faligndata %d16, %d18, %d54 1768*280575beSPatrick McGehearty faligndata %d18, %d20, %d56 1769*280575beSPatrick McGehearty faligndata %d20, %d22, %d58 1770*280575beSPatrick McGehearty faligndata %d22, %d24, %d60 1771*280575beSPatrick McGehearty faligndata %d24, %d26, %d62 1772*280575beSPatrick McGehearty fmovd %d26, %d10 1773*280575beSPatrick McGehearty fmovd %d28, %d12 1774*280575beSPatrick McGehearty fmovd %d30, %d14 1775*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_P 1776*280575beSPatrick McGehearty subcc %i3, 64, %i3 1777*280575beSPatrick McGehearty add %i1, 64, %i1 1778*280575beSPatrick McGehearty bgu,pt %ncc, .bc_unaln_101_loop 1779*280575beSPatrick McGehearty prefetch [%o4 + (4 * CACHE_LINE)], #one_read 1780*280575beSPatrick McGehearty ba .bc_unaln_done 1781*280575beSPatrick McGehearty nop 1782*280575beSPatrick McGehearty 1783*280575beSPatrick McGehearty.bc_unaln_100: 1784*280575beSPatrick McGehearty ldd [%o4+32], %d8 1785*280575beSPatrick McGehearty ldd [%o4+40], %d10 1786*280575beSPatrick McGehearty ldd [%o4+48], %d12 1787*280575beSPatrick McGehearty ldd [%o4+56], %d14 1788*280575beSPatrick McGehearty.bc_unaln_100_loop: 1789*280575beSPatrick McGehearty add %o4, 64, %o4 1790*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_P, %d16 1791*280575beSPatrick McGehearty faligndata %d8, %d10, %d48 1792*280575beSPatrick McGehearty faligndata %d10, %d12, %d50 1793*280575beSPatrick McGehearty faligndata %d12, %d14, %d52 1794*280575beSPatrick McGehearty faligndata %d14, %d16, %d54 1795*280575beSPatrick McGehearty faligndata %d16, %d18, %d56 1796*280575beSPatrick McGehearty faligndata %d18, %d20, %d58 1797*280575beSPatrick McGehearty faligndata %d20, %d22, %d60 1798*280575beSPatrick McGehearty faligndata %d22, %d24, %d62 1799*280575beSPatrick McGehearty fmovd %d24, %d8 1800*280575beSPatrick McGehearty fmovd %d26, %d10 1801*280575beSPatrick McGehearty fmovd %d28, %d12 1802*280575beSPatrick McGehearty fmovd %d30, %d14 1803*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_P 1804*280575beSPatrick McGehearty subcc %i3, 64, %i3 1805*280575beSPatrick McGehearty add %i1, 64, %i1 1806*280575beSPatrick McGehearty bgu,pt %ncc, .bc_unaln_100_loop 1807*280575beSPatrick McGehearty prefetch [%o4 + (4 * CACHE_LINE)], #one_read 1808*280575beSPatrick McGehearty ba .bc_unaln_done 1809*280575beSPatrick McGehearty nop 1810*280575beSPatrick McGehearty 1811*280575beSPatrick McGehearty.bc_unaln_011: 1812*280575beSPatrick McGehearty ldd [%o4+24], %d6 1813*280575beSPatrick McGehearty ldd [%o4+32], %d8 1814*280575beSPatrick McGehearty ldd [%o4+40], %d10 1815*280575beSPatrick McGehearty ldd [%o4+48], %d12 1816*280575beSPatrick McGehearty ldd [%o4+56], %d14 1817*280575beSPatrick McGehearty.bc_unaln_011_loop: 1818*280575beSPatrick McGehearty add %o4, 64, %o4 1819*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_P, %d16 1820*280575beSPatrick McGehearty faligndata %d6, %d8, %d48 1821*280575beSPatrick McGehearty faligndata %d8, %d10, %d50 1822*280575beSPatrick McGehearty faligndata %d10, %d12, %d52 1823*280575beSPatrick McGehearty faligndata %d12, %d14, %d54 1824*280575beSPatrick McGehearty faligndata %d14, %d16, %d56 1825*280575beSPatrick McGehearty faligndata %d16, %d18, %d58 1826*280575beSPatrick McGehearty faligndata %d18, %d20, %d60 1827*280575beSPatrick McGehearty faligndata %d20, %d22, %d62 1828*280575beSPatrick McGehearty fmovd %d22, %d6 1829*280575beSPatrick McGehearty fmovd %d24, %d8 1830*280575beSPatrick McGehearty fmovd %d26, %d10 1831*280575beSPatrick McGehearty fmovd %d28, %d12 1832*280575beSPatrick McGehearty fmovd %d30, %d14 1833*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_P 1834*280575beSPatrick McGehearty subcc %i3, 64, %i3 1835*280575beSPatrick McGehearty add %i1, 64, %i1 1836*280575beSPatrick McGehearty bgu,pt %ncc, .bc_unaln_011_loop 1837*280575beSPatrick McGehearty prefetch [%o4 + (4 * CACHE_LINE)], #one_read 1838*280575beSPatrick McGehearty ba .bc_unaln_done 1839*280575beSPatrick McGehearty nop 1840*280575beSPatrick McGehearty 1841*280575beSPatrick McGehearty.bc_unaln_010: 1842*280575beSPatrick McGehearty ldd [%o4+16], %d4 1843*280575beSPatrick McGehearty ldd [%o4+24], %d6 1844*280575beSPatrick McGehearty ldd [%o4+32], %d8 1845*280575beSPatrick McGehearty ldd [%o4+40], %d10 1846*280575beSPatrick McGehearty ldd [%o4+48], %d12 1847*280575beSPatrick McGehearty ldd [%o4+56], %d14 1848*280575beSPatrick McGehearty.bc_unaln_010_loop: 1849*280575beSPatrick McGehearty add %o4, 64, %o4 1850*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_P, %d16 1851*280575beSPatrick McGehearty faligndata %d4, %d6, %d48 1852*280575beSPatrick McGehearty faligndata %d6, %d8, %d50 1853*280575beSPatrick McGehearty faligndata %d8, %d10, %d52 1854*280575beSPatrick McGehearty faligndata %d10, %d12, %d54 1855*280575beSPatrick McGehearty faligndata %d12, %d14, %d56 1856*280575beSPatrick McGehearty faligndata %d14, %d16, %d58 1857*280575beSPatrick McGehearty faligndata %d16, %d18, %d60 1858*280575beSPatrick McGehearty faligndata %d18, %d20, %d62 1859*280575beSPatrick McGehearty fmovd %d20, %d4 1860*280575beSPatrick McGehearty fmovd %d22, %d6 1861*280575beSPatrick McGehearty fmovd %d24, %d8 1862*280575beSPatrick McGehearty fmovd %d26, %d10 1863*280575beSPatrick McGehearty fmovd %d28, %d12 1864*280575beSPatrick McGehearty fmovd %d30, %d14 1865*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_P 1866*280575beSPatrick McGehearty subcc %i3, 64, %i3 1867*280575beSPatrick McGehearty add %i1, 64, %i1 1868*280575beSPatrick McGehearty bgu,pt %ncc, .bc_unaln_010_loop 1869*280575beSPatrick McGehearty prefetch [%o4 + (4 * CACHE_LINE)], #one_read 1870*280575beSPatrick McGehearty ba .bc_unaln_done 1871*280575beSPatrick McGehearty nop 1872*280575beSPatrick McGehearty 1873*280575beSPatrick McGehearty.bc_unaln_001: 1874*280575beSPatrick McGehearty ldd [%o4+8], %d2 1875*280575beSPatrick McGehearty ldd [%o4+16], %d4 1876*280575beSPatrick McGehearty ldd [%o4+24], %d6 1877*280575beSPatrick McGehearty ldd [%o4+32], %d8 1878*280575beSPatrick McGehearty ldd [%o4+40], %d10 1879*280575beSPatrick McGehearty ldd [%o4+48], %d12 1880*280575beSPatrick McGehearty ldd [%o4+56], %d14 1881*280575beSPatrick McGehearty.bc_unaln_001_loop: 1882*280575beSPatrick McGehearty add %o4, 64, %o4 1883*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_P, %d16 1884*280575beSPatrick McGehearty faligndata %d2, %d4, %d48 1885*280575beSPatrick McGehearty faligndata %d4, %d6, %d50 1886*280575beSPatrick McGehearty faligndata %d6, %d8, %d52 1887*280575beSPatrick McGehearty faligndata %d8, %d10, %d54 1888*280575beSPatrick McGehearty faligndata %d10, %d12, %d56 1889*280575beSPatrick McGehearty faligndata %d12, %d14, %d58 1890*280575beSPatrick McGehearty faligndata %d14, %d16, %d60 1891*280575beSPatrick McGehearty faligndata %d16, %d18, %d62 1892*280575beSPatrick McGehearty fmovd %d18, %d2 1893*280575beSPatrick McGehearty fmovd %d20, %d4 1894*280575beSPatrick McGehearty fmovd %d22, %d6 1895*280575beSPatrick McGehearty fmovd %d24, %d8 1896*280575beSPatrick McGehearty fmovd %d26, %d10 1897*280575beSPatrick McGehearty fmovd %d28, %d12 1898*280575beSPatrick McGehearty fmovd %d30, %d14 1899*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_P 1900*280575beSPatrick McGehearty subcc %i3, 64, %i3 1901*280575beSPatrick McGehearty add %i1, 64, %i1 1902*280575beSPatrick McGehearty bgu,pt %ncc, .bc_unaln_001_loop 1903*280575beSPatrick McGehearty prefetch [%o4 + (4 * CACHE_LINE)], #one_read 1904*280575beSPatrick McGehearty ba .bc_unaln_done 1905*280575beSPatrick McGehearty nop 1906*280575beSPatrick McGehearty 1907*280575beSPatrick McGehearty.bc_unaln_000: 1908*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_P, %d0 1909*280575beSPatrick McGehearty.bc_unaln_000_loop: 1910*280575beSPatrick McGehearty add %o4, 64, %o4 1911*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_P, %d16 1912*280575beSPatrick McGehearty faligndata %d0, %d2, %d48 1913*280575beSPatrick McGehearty faligndata %d2, %d4, %d50 1914*280575beSPatrick McGehearty faligndata %d4, %d6, %d52 1915*280575beSPatrick McGehearty faligndata %d6, %d8, %d54 1916*280575beSPatrick McGehearty faligndata %d8, %d10, %d56 1917*280575beSPatrick McGehearty faligndata %d10, %d12, %d58 1918*280575beSPatrick McGehearty faligndata %d12, %d14, %d60 1919*280575beSPatrick McGehearty faligndata %d14, %d16, %d62 1920*280575beSPatrick McGehearty fmovd %d16, %d0 1921*280575beSPatrick McGehearty fmovd %d18, %d2 1922*280575beSPatrick McGehearty fmovd %d20, %d4 1923*280575beSPatrick McGehearty fmovd %d22, %d6 1924*280575beSPatrick McGehearty fmovd %d24, %d8 1925*280575beSPatrick McGehearty fmovd %d26, %d10 1926*280575beSPatrick McGehearty fmovd %d28, %d12 1927*280575beSPatrick McGehearty fmovd %d30, %d14 1928*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_P 1929*280575beSPatrick McGehearty subcc %i3, 64, %i3 1930*280575beSPatrick McGehearty add %i1, 64, %i1 1931*280575beSPatrick McGehearty bgu,pt %ncc, .bc_unaln_000_loop 1932*280575beSPatrick McGehearty prefetch [%o4 + (4 * CACHE_LINE)], #one_read 1933*280575beSPatrick McGehearty 1934*280575beSPatrick McGehearty.bc_unaln_done: 1935*280575beSPatrick McGehearty ! Handle trailing bytes, 64 to 127 1936*280575beSPatrick McGehearty ! Dest long word aligned, Src not long word aligned 1937*280575beSPatrick McGehearty cmp %i2, 15 1938*280575beSPatrick McGehearty bleu %ncc, .bc_unaln_short 1939*280575beSPatrick McGehearty 1940*280575beSPatrick McGehearty andn %i2, 0x7, %i3 ! %i3 is multiple of 8 1941*280575beSPatrick McGehearty and %i2, 0x7, %i2 ! residue bytes in %i2 1942*280575beSPatrick McGehearty add %i2, 8, %i2 1943*280575beSPatrick McGehearty sub %i3, 8, %i3 ! insure we don't load past end of src 1944*280575beSPatrick McGehearty andn %i0, 0x7, %o4 ! %o4 has long word aligned src address 1945*280575beSPatrick McGehearty add %i0, %i3, %i0 ! advance %i0 to after multiple of 8 1946*280575beSPatrick McGehearty ldd [%o4], %d0 ! fetch partial word 1947*280575beSPatrick McGehearty.bc_unaln_by8: 1948*280575beSPatrick McGehearty ldd [%o4+8], %d2 1949*280575beSPatrick McGehearty add %o4, 8, %o4 1950*280575beSPatrick McGehearty faligndata %d0, %d2, %d16 1951*280575beSPatrick McGehearty subcc %i3, 8, %i3 1952*280575beSPatrick McGehearty std %d16, [%i1] 1953*280575beSPatrick McGehearty fmovd %d2, %d0 1954*280575beSPatrick McGehearty bgu,pt %ncc, .bc_unaln_by8 1955*280575beSPatrick McGehearty add %i1, 8, %i1 1956*280575beSPatrick McGehearty 1957*280575beSPatrick McGehearty.bc_unaln_short: 1958*280575beSPatrick McGehearty cmp %i2, 8 1959*280575beSPatrick McGehearty blt,pt %ncc, .bc_unalnfin 1960*280575beSPatrick McGehearty nop 1961*280575beSPatrick McGehearty ldub [%i0], %o4 1962*280575beSPatrick McGehearty sll %o4, 24, %o3 1963*280575beSPatrick McGehearty ldub [%i0+1], %o4 1964*280575beSPatrick McGehearty sll %o4, 16, %o4 1965*280575beSPatrick McGehearty or %o4, %o3, %o3 1966*280575beSPatrick McGehearty ldub [%i0+2], %o4 1967*280575beSPatrick McGehearty sll %o4, 8, %o4 1968*280575beSPatrick McGehearty or %o4, %o3, %o3 1969*280575beSPatrick McGehearty ldub [%i0+3], %o4 1970*280575beSPatrick McGehearty or %o4, %o3, %o3 1971*280575beSPatrick McGehearty stw %o3, [%i1] 1972*280575beSPatrick McGehearty ldub [%i0+4], %o4 1973*280575beSPatrick McGehearty sll %o4, 24, %o3 1974*280575beSPatrick McGehearty ldub [%i0+5], %o4 1975*280575beSPatrick McGehearty sll %o4, 16, %o4 1976*280575beSPatrick McGehearty or %o4, %o3, %o3 1977*280575beSPatrick McGehearty ldub [%i0+6], %o4 1978*280575beSPatrick McGehearty sll %o4, 8, %o4 1979*280575beSPatrick McGehearty or %o4, %o3, %o3 1980*280575beSPatrick McGehearty ldub [%i0+7], %o4 1981*280575beSPatrick McGehearty or %o4, %o3, %o3 1982*280575beSPatrick McGehearty stw %o3, [%i1+4] 1983*280575beSPatrick McGehearty add %i0, 8, %i0 1984*280575beSPatrick McGehearty add %i1, 8, %i1 1985*280575beSPatrick McGehearty sub %i2, 8, %i2 1986*280575beSPatrick McGehearty.bc_unalnfin: 1987*280575beSPatrick McGehearty cmp %i2, 4 1988*280575beSPatrick McGehearty blt,pt %ncc, .bc_unalnz 1989*280575beSPatrick McGehearty tst %i2 1990*280575beSPatrick McGehearty ldub [%i0], %o3 ! read byte 1991*280575beSPatrick McGehearty subcc %i2, 4, %i2 ! reduce count by 4 1992*280575beSPatrick McGehearty sll %o3, 24, %o3 ! position 1993*280575beSPatrick McGehearty ldub [%i0+1], %o4 1994*280575beSPatrick McGehearty sll %o4, 16, %o4 ! position 1995*280575beSPatrick McGehearty or %o4, %o3, %o3 ! merge 1996*280575beSPatrick McGehearty ldub [%i0+2], %o4 1997*280575beSPatrick McGehearty sll %o4, 8, %o4 ! position 1998*280575beSPatrick McGehearty or %o4, %o3, %o3 ! merge 1999*280575beSPatrick McGehearty add %i1, 4, %i1 ! advance dst by 4 2000*280575beSPatrick McGehearty ldub [%i0+3], %o4 2001*280575beSPatrick McGehearty add %i0, 4, %i0 ! advance src by 4 2002*280575beSPatrick McGehearty or %o4, %o3, %o4 ! merge 2003*280575beSPatrick McGehearty bnz,pt %ncc, .bc_unaln3x 2004*280575beSPatrick McGehearty stw %o4, [%i1-4] 2005*280575beSPatrick McGehearty ba .bc_exit 2006*280575beSPatrick McGehearty nop 2007*280575beSPatrick McGehearty.bc_unalnz: 2008*280575beSPatrick McGehearty bz,pt %ncc, .bc_exit 2009*280575beSPatrick McGehearty.bc_unaln3x: ! Exactly 1, 2, or 3 bytes remain 2010*280575beSPatrick McGehearty subcc %i2, 1, %i2 ! reduce count for cc test 2011*280575beSPatrick McGehearty ldub [%i0], %o4 ! load one byte 2012*280575beSPatrick McGehearty bz,pt %ncc, .bc_exit 2013*280575beSPatrick McGehearty stb %o4, [%i1] ! store one byte 2014*280575beSPatrick McGehearty ldub [%i0+1], %o4 ! load second byte 2015*280575beSPatrick McGehearty subcc %i2, 1, %i2 2016*280575beSPatrick McGehearty bz,pt %ncc, .bc_exit 2017*280575beSPatrick McGehearty stb %o4, [%i1+1] ! store second byte 2018*280575beSPatrick McGehearty ldub [%i0+2], %o4 ! load third byte 2019*280575beSPatrick McGehearty stb %o4, [%i1+2] ! store third byte 2020*280575beSPatrick McGehearty.bc_exit: 2021*280575beSPatrick McGehearty wr %l5, %g0, %gsr ! restore %gsr 2022*280575beSPatrick McGehearty brnz %g5, .bc_fp_restore 2023*280575beSPatrick McGehearty and %o5, COPY_FLAGS, %l1 ! save flags in %l1 2024*280575beSPatrick McGehearty FZERO 2025*280575beSPatrick McGehearty wr %g5, %g0, %fprs 2026*280575beSPatrick McGehearty ba,pt %ncc, .bc_ex2 2027*280575beSPatrick McGehearty nop 2028*280575beSPatrick McGehearty.bc_fp_restore: 2029*280575beSPatrick McGehearty BLD_FP_FROMSTACK(%o4) 2030*280575beSPatrick McGehearty.bc_ex2: 2031*280575beSPatrick McGehearty ldn [THREAD_REG + T_LWP], %o2 2032*280575beSPatrick McGehearty brnz,pt %o2, 1f 2033*280575beSPatrick McGehearty nop 2034*280575beSPatrick McGehearty 2035*280575beSPatrick McGehearty ldsb [THREAD_REG + T_PREEMPT], %l0 2036*280575beSPatrick McGehearty deccc %l0 2037*280575beSPatrick McGehearty bnz,pn %ncc, 1f 2038*280575beSPatrick McGehearty stb %l0, [THREAD_REG + T_PREEMPT] 2039*280575beSPatrick McGehearty 2040*280575beSPatrick McGehearty ! Check for a kernel preemption request 2041*280575beSPatrick McGehearty ldn [THREAD_REG + T_CPU], %l0 2042*280575beSPatrick McGehearty ldub [%l0 + CPU_KPRUNRUN], %l0 2043*280575beSPatrick McGehearty brnz,a,pt %l0, 1f ! Need to call kpreempt? 2044*280575beSPatrick McGehearty or %l1, KPREEMPT_FLAG, %l1 ! If so, set the flag 2045*280575beSPatrick McGehearty1: 2046*280575beSPatrick McGehearty btst LOFAULT_SET, %l1 2047*280575beSPatrick McGehearty bz,pn %icc, 3f 2048*280575beSPatrick McGehearty andncc %o5, COPY_FLAGS, %o5 2049*280575beSPatrick McGehearty ! Here via bcopy. Check to see if the handler was NULL. 2050*280575beSPatrick McGehearty ! If so, just return quietly. Otherwise, reset the 2051*280575beSPatrick McGehearty ! handler and return. 2052*280575beSPatrick McGehearty bz,pn %ncc, 2f 2053*280575beSPatrick McGehearty nop 2054*280575beSPatrick McGehearty membar #Sync 2055*280575beSPatrick McGehearty stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2056*280575beSPatrick McGehearty2: 2057*280575beSPatrick McGehearty btst KPREEMPT_FLAG, %l1 2058*280575beSPatrick McGehearty bz,pt %icc, 3f 2059*280575beSPatrick McGehearty nop 2060*280575beSPatrick McGehearty call kpreempt 2061*280575beSPatrick McGehearty rdpr %pil, %o0 ! pass %pil 2062*280575beSPatrick McGehearty3: 2063*280575beSPatrick McGehearty ret 2064*280575beSPatrick McGehearty restore %g0, 0, %o0 2065*280575beSPatrick McGehearty 2066*280575beSPatrick McGehearty SET_SIZE(bcopy_more) 2067*280575beSPatrick McGehearty 2068*280575beSPatrick McGehearty 2069340af271Swh94709#else /* NIAGARA_IMPL */ 2070473b13d4Sae112802 save %sp, -SA(MINFRAME), %sp 2071473b13d4Sae112802 clr %o5 ! flag LOFAULT_SET is not set for bcopy 20727c478bd9Sstevel@tonic-gate.do_copy: 20737c478bd9Sstevel@tonic-gate cmp %i2, 12 ! for small counts 20747c478bd9Sstevel@tonic-gate blu %ncc, .bytecp ! just copy bytes 20757c478bd9Sstevel@tonic-gate .empty 20767c478bd9Sstevel@tonic-gate 20777c478bd9Sstevel@tonic-gate cmp %i2, 128 ! for less than 128 bytes 20787c478bd9Sstevel@tonic-gate blu,pn %ncc, .bcb_punt ! no block st/quad ld 20797c478bd9Sstevel@tonic-gate nop 20807c478bd9Sstevel@tonic-gate 20817c478bd9Sstevel@tonic-gate set use_hw_bcopy, %o2 20827c478bd9Sstevel@tonic-gate ld [%o2], %o2 2083340af271Swh94709 brz,pn %o2, .bcb_punt 20847c478bd9Sstevel@tonic-gate nop 20857c478bd9Sstevel@tonic-gate 20867c478bd9Sstevel@tonic-gate subcc %i1, %i0, %i3 20877c478bd9Sstevel@tonic-gate bneg,a,pn %ncc, 1f 20887c478bd9Sstevel@tonic-gate neg %i3 20897c478bd9Sstevel@tonic-gate1: 20907c478bd9Sstevel@tonic-gate /* 20917c478bd9Sstevel@tonic-gate * Compare against 256 since we should be checking block addresses 20927c478bd9Sstevel@tonic-gate * and (dest & ~63) - (src & ~63) can be 3 blocks even if 20937c478bd9Sstevel@tonic-gate * src = dest + (64 * 3) + 63. 20947c478bd9Sstevel@tonic-gate */ 20957c478bd9Sstevel@tonic-gate cmp %i3, 256 20967c478bd9Sstevel@tonic-gate blu,pn %ncc, .bcb_punt 20977c478bd9Sstevel@tonic-gate nop 20987c478bd9Sstevel@tonic-gate 20997c478bd9Sstevel@tonic-gate /* 21007c478bd9Sstevel@tonic-gate * Copy that reach here have at least 2 blocks of data to copy. 21017c478bd9Sstevel@tonic-gate */ 21027c478bd9Sstevel@tonic-gate.do_blockcopy: 21037c478bd9Sstevel@tonic-gate ! Swap src/dst since the code below is memcpy code 21047c478bd9Sstevel@tonic-gate ! and memcpy/bcopy have different calling sequences 21057c478bd9Sstevel@tonic-gate mov %i1, %i5 21067c478bd9Sstevel@tonic-gate mov %i0, %i1 21077c478bd9Sstevel@tonic-gate mov %i5, %i0 21087c478bd9Sstevel@tonic-gate 2109340af271Swh94709 ! Block (64 bytes) align the destination. 21107c478bd9Sstevel@tonic-gate andcc %i0, 0x3f, %i3 ! is dst aligned on a 64 bytes 21117c478bd9Sstevel@tonic-gate bz %xcc, .chksrc ! dst is already double aligned 21127c478bd9Sstevel@tonic-gate sub %i3, 0x40, %i3 21137c478bd9Sstevel@tonic-gate neg %i3 ! bytes till dst 64 bytes aligned 21147c478bd9Sstevel@tonic-gate sub %i2, %i3, %i2 ! update i2 with new count 21157c478bd9Sstevel@tonic-gate 2116340af271Swh94709 ! Based on source and destination alignment do 2117340af271Swh94709 ! either 8 bytes, 4 bytes, 2 bytes or byte copy. 2118340af271Swh94709 2119340af271Swh94709 ! Is dst & src 8B aligned 2120340af271Swh94709 or %i0, %i1, %o2 2121340af271Swh94709 andcc %o2, 0x7, %g0 2122340af271Swh94709 bz %ncc, .alewdcp 2123340af271Swh94709 nop 2124340af271Swh94709 2125340af271Swh94709 ! Is dst & src 4B aligned 2126340af271Swh94709 andcc %o2, 0x3, %g0 2127340af271Swh94709 bz %ncc, .alwdcp 2128340af271Swh94709 nop 2129340af271Swh94709 2130340af271Swh94709 ! Is dst & src 2B aligned 2131340af271Swh94709 andcc %o2, 0x1, %g0 2132340af271Swh94709 bz %ncc, .alhlfwdcp 2133340af271Swh94709 nop 2134340af271Swh94709 2135340af271Swh94709 ! 1B aligned 2136340af271Swh947091: ldub [%i1], %o2 2137340af271Swh94709 stb %o2, [%i0] 21387c478bd9Sstevel@tonic-gate inc %i1 21397c478bd9Sstevel@tonic-gate deccc %i3 2140340af271Swh94709 bgu,pt %ncc, 1b 21417c478bd9Sstevel@tonic-gate inc %i0 21427c478bd9Sstevel@tonic-gate 2143340af271Swh94709 ba .chksrc 2144340af271Swh94709 nop 2145340af271Swh94709 2146340af271Swh94709 ! dst & src 4B aligned 2147340af271Swh94709.alwdcp: 2148340af271Swh94709 ld [%i1], %o2 2149340af271Swh94709 st %o2, [%i0] 2150340af271Swh94709 add %i1, 0x4, %i1 2151340af271Swh94709 subcc %i3, 0x4, %i3 2152340af271Swh94709 bgu,pt %ncc, .alwdcp 2153340af271Swh94709 add %i0, 0x4, %i0 2154340af271Swh94709 2155340af271Swh94709 ba .chksrc 2156340af271Swh94709 nop 2157340af271Swh94709 2158340af271Swh94709 ! dst & src 2B aligned 2159340af271Swh94709.alhlfwdcp: 2160340af271Swh94709 lduh [%i1], %o2 2161340af271Swh94709 stuh %o2, [%i0] 2162340af271Swh94709 add %i1, 0x2, %i1 2163340af271Swh94709 subcc %i3, 0x2, %i3 2164340af271Swh94709 bgu,pt %ncc, .alhlfwdcp 2165340af271Swh94709 add %i0, 0x2, %i0 2166340af271Swh94709 2167340af271Swh94709 ba .chksrc 2168340af271Swh94709 nop 2169340af271Swh94709 2170340af271Swh94709 ! dst & src 8B aligned 2171340af271Swh94709.alewdcp: 2172340af271Swh94709 ldx [%i1], %o2 2173340af271Swh94709 stx %o2, [%i0] 2174340af271Swh94709 add %i1, 0x8, %i1 2175340af271Swh94709 subcc %i3, 0x8, %i3 2176340af271Swh94709 bgu,pt %ncc, .alewdcp 2177340af271Swh94709 add %i0, 0x8, %i0 2178340af271Swh94709 21797c478bd9Sstevel@tonic-gate ! Now Destination is block (64 bytes) aligned 21807c478bd9Sstevel@tonic-gate.chksrc: 21817c478bd9Sstevel@tonic-gate andn %i2, 0x3f, %i3 ! %i3 count is multiple of block size 21827c478bd9Sstevel@tonic-gate sub %i2, %i3, %i2 ! Residue bytes in %i2 21837c478bd9Sstevel@tonic-gate 21847c478bd9Sstevel@tonic-gate mov ASI_BLK_INIT_ST_QUAD_LDD_P, %asi 21857c478bd9Sstevel@tonic-gate 21867c478bd9Sstevel@tonic-gate andcc %i1, 0xf, %o2 ! is src quadword aligned 21877c478bd9Sstevel@tonic-gate bz,pn %xcc, .blkcpy ! src offset in %o2 21887c478bd9Sstevel@tonic-gate nop 21897c478bd9Sstevel@tonic-gate cmp %o2, 0x8 21907c478bd9Sstevel@tonic-gate bg .cpy_upper_double 21917c478bd9Sstevel@tonic-gate nop 21927c478bd9Sstevel@tonic-gate bl .cpy_lower_double 21937c478bd9Sstevel@tonic-gate nop 21947c478bd9Sstevel@tonic-gate 21957c478bd9Sstevel@tonic-gate ! Falls through when source offset is equal to 8 i.e. 21967c478bd9Sstevel@tonic-gate ! source is double word aligned. 21977c478bd9Sstevel@tonic-gate ! In this case no shift/merge of data is required 21987c478bd9Sstevel@tonic-gate sub %i1, %o2, %i1 ! align the src at 16 bytes. 21997c478bd9Sstevel@tonic-gate andn %i1, 0x3f, %l0 ! %l0 has block aligned source 22007c478bd9Sstevel@tonic-gate prefetch [%l0+0x0], #one_read 22017c478bd9Sstevel@tonic-gate ldda [%i1+0x0]%asi, %l2 22027c478bd9Sstevel@tonic-gateloop0: 22037c478bd9Sstevel@tonic-gate ldda [%i1+0x10]%asi, %l4 22047c478bd9Sstevel@tonic-gate prefetch [%l0+0x40], #one_read 22057c478bd9Sstevel@tonic-gate 22067c478bd9Sstevel@tonic-gate stxa %l3, [%i0+0x0]%asi 22077c478bd9Sstevel@tonic-gate stxa %l4, [%i0+0x8]%asi 22087c478bd9Sstevel@tonic-gate 22097c478bd9Sstevel@tonic-gate ldda [%i1+0x20]%asi, %l2 22107c478bd9Sstevel@tonic-gate stxa %l5, [%i0+0x10]%asi 22117c478bd9Sstevel@tonic-gate stxa %l2, [%i0+0x18]%asi 22127c478bd9Sstevel@tonic-gate 22137c478bd9Sstevel@tonic-gate ldda [%i1+0x30]%asi, %l4 22147c478bd9Sstevel@tonic-gate stxa %l3, [%i0+0x20]%asi 22157c478bd9Sstevel@tonic-gate stxa %l4, [%i0+0x28]%asi 22167c478bd9Sstevel@tonic-gate 22177c478bd9Sstevel@tonic-gate ldda [%i1+0x40]%asi, %l2 22187c478bd9Sstevel@tonic-gate stxa %l5, [%i0+0x30]%asi 22197c478bd9Sstevel@tonic-gate stxa %l2, [%i0+0x38]%asi 22207c478bd9Sstevel@tonic-gate 22217c478bd9Sstevel@tonic-gate add %l0, 0x40, %l0 22227c478bd9Sstevel@tonic-gate add %i1, 0x40, %i1 22237c478bd9Sstevel@tonic-gate subcc %i3, 0x40, %i3 22247c478bd9Sstevel@tonic-gate bgu,pt %xcc, loop0 22257c478bd9Sstevel@tonic-gate add %i0, 0x40, %i0 22267c478bd9Sstevel@tonic-gate ba .blkdone 22277c478bd9Sstevel@tonic-gate add %i1, %o2, %i1 ! increment the source by src offset 22287c478bd9Sstevel@tonic-gate ! the src offset was stored in %o2 22297c478bd9Sstevel@tonic-gate 22307c478bd9Sstevel@tonic-gate.cpy_lower_double: 22317c478bd9Sstevel@tonic-gate sub %i1, %o2, %i1 ! align the src at 16 bytes. 22327c478bd9Sstevel@tonic-gate sll %o2, 3, %o0 ! %o0 left shift 22337c478bd9Sstevel@tonic-gate mov 0x40, %o1 22347c478bd9Sstevel@tonic-gate sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 22357c478bd9Sstevel@tonic-gate andn %i1, 0x3f, %l0 ! %l0 has block aligned source 22367c478bd9Sstevel@tonic-gate prefetch [%l0+0x0], #one_read 22377c478bd9Sstevel@tonic-gate ldda [%i1+0x0]%asi, %l2 ! partial data in %l2 and %l3 has 22387c478bd9Sstevel@tonic-gate ! complete data 22397c478bd9Sstevel@tonic-gateloop1: 22407c478bd9Sstevel@tonic-gate ldda [%i1+0x10]%asi, %l4 ! %l4 has partial data for this read. 22417c478bd9Sstevel@tonic-gate ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) ! merge %l2, %l3 and %l4 22427c478bd9Sstevel@tonic-gate ! into %l2 and %l3 22437c478bd9Sstevel@tonic-gate prefetch [%l0+0x40], #one_read 22447c478bd9Sstevel@tonic-gate stxa %l2, [%i0+0x0]%asi 22457c478bd9Sstevel@tonic-gate stxa %l3, [%i0+0x8]%asi 22467c478bd9Sstevel@tonic-gate 22477c478bd9Sstevel@tonic-gate ldda [%i1+0x20]%asi, %l2 22487c478bd9Sstevel@tonic-gate ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) ! merge %l2 with %l5 and 22497c478bd9Sstevel@tonic-gate stxa %l4, [%i0+0x10]%asi ! %l4 from previous read 22507c478bd9Sstevel@tonic-gate stxa %l5, [%i0+0x18]%asi ! into %l4 and %l5 22517c478bd9Sstevel@tonic-gate 22527c478bd9Sstevel@tonic-gate ! Repeat the same for next 32 bytes. 22537c478bd9Sstevel@tonic-gate 22547c478bd9Sstevel@tonic-gate ldda [%i1+0x30]%asi, %l4 22557c478bd9Sstevel@tonic-gate ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) 22567c478bd9Sstevel@tonic-gate stxa %l2, [%i0+0x20]%asi 22577c478bd9Sstevel@tonic-gate stxa %l3, [%i0+0x28]%asi 22587c478bd9Sstevel@tonic-gate 22597c478bd9Sstevel@tonic-gate ldda [%i1+0x40]%asi, %l2 22607c478bd9Sstevel@tonic-gate ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) 22617c478bd9Sstevel@tonic-gate stxa %l4, [%i0+0x30]%asi 22627c478bd9Sstevel@tonic-gate stxa %l5, [%i0+0x38]%asi 22637c478bd9Sstevel@tonic-gate 22647c478bd9Sstevel@tonic-gate add %l0, 0x40, %l0 22657c478bd9Sstevel@tonic-gate add %i1, 0x40, %i1 22667c478bd9Sstevel@tonic-gate subcc %i3, 0x40, %i3 22677c478bd9Sstevel@tonic-gate bgu,pt %xcc, loop1 22687c478bd9Sstevel@tonic-gate add %i0, 0x40, %i0 22697c478bd9Sstevel@tonic-gate ba .blkdone 22707c478bd9Sstevel@tonic-gate add %i1, %o2, %i1 ! increment the source by src offset 22717c478bd9Sstevel@tonic-gate ! the src offset was stored in %o2 22727c478bd9Sstevel@tonic-gate 22737c478bd9Sstevel@tonic-gate.cpy_upper_double: 22747c478bd9Sstevel@tonic-gate sub %i1, %o2, %i1 ! align the src at 16 bytes. 22757c478bd9Sstevel@tonic-gate mov 0x8, %o0 22767c478bd9Sstevel@tonic-gate sub %o2, %o0, %o0 22777c478bd9Sstevel@tonic-gate sll %o0, 3, %o0 ! %o0 left shift 22787c478bd9Sstevel@tonic-gate mov 0x40, %o1 22797c478bd9Sstevel@tonic-gate sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 22807c478bd9Sstevel@tonic-gate andn %i1, 0x3f, %l0 ! %l0 has block aligned source 22817c478bd9Sstevel@tonic-gate prefetch [%l0+0x0], #one_read 22827c478bd9Sstevel@tonic-gate ldda [%i1+0x0]%asi, %l2 ! partial data in %l3 for this read and 22837c478bd9Sstevel@tonic-gate ! no data in %l2 22847c478bd9Sstevel@tonic-gateloop2: 22857c478bd9Sstevel@tonic-gate ldda [%i1+0x10]%asi, %l4 ! %l4 has complete data and %l5 has 22867c478bd9Sstevel@tonic-gate ! partial 22877c478bd9Sstevel@tonic-gate ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) ! merge %l3, %l4 and %l5 22887c478bd9Sstevel@tonic-gate ! into %l3 and %l4 22897c478bd9Sstevel@tonic-gate prefetch [%l0+0x40], #one_read 22907c478bd9Sstevel@tonic-gate stxa %l3, [%i0+0x0]%asi 22917c478bd9Sstevel@tonic-gate stxa %l4, [%i0+0x8]%asi 22927c478bd9Sstevel@tonic-gate 22937c478bd9Sstevel@tonic-gate ldda [%i1+0x20]%asi, %l2 22947c478bd9Sstevel@tonic-gate ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) ! merge %l2 and %l3 with 22957c478bd9Sstevel@tonic-gate stxa %l5, [%i0+0x10]%asi ! %l5 from previous read 22967c478bd9Sstevel@tonic-gate stxa %l2, [%i0+0x18]%asi ! into %l5 and %l2 22977c478bd9Sstevel@tonic-gate 22987c478bd9Sstevel@tonic-gate ! Repeat the same for next 32 bytes. 22997c478bd9Sstevel@tonic-gate 23007c478bd9Sstevel@tonic-gate ldda [%i1+0x30]%asi, %l4 23017c478bd9Sstevel@tonic-gate ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) 23027c478bd9Sstevel@tonic-gate stxa %l3, [%i0+0x20]%asi 23037c478bd9Sstevel@tonic-gate stxa %l4, [%i0+0x28]%asi 23047c478bd9Sstevel@tonic-gate 23057c478bd9Sstevel@tonic-gate ldda [%i1+0x40]%asi, %l2 23067c478bd9Sstevel@tonic-gate ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) 23077c478bd9Sstevel@tonic-gate stxa %l5, [%i0+0x30]%asi 23087c478bd9Sstevel@tonic-gate stxa %l2, [%i0+0x38]%asi 23097c478bd9Sstevel@tonic-gate 23107c478bd9Sstevel@tonic-gate add %l0, 0x40, %l0 23117c478bd9Sstevel@tonic-gate add %i1, 0x40, %i1 23127c478bd9Sstevel@tonic-gate subcc %i3, 0x40, %i3 23137c478bd9Sstevel@tonic-gate bgu,pt %xcc, loop2 23147c478bd9Sstevel@tonic-gate add %i0, 0x40, %i0 23157c478bd9Sstevel@tonic-gate ba .blkdone 23167c478bd9Sstevel@tonic-gate add %i1, %o2, %i1 ! increment the source by src offset 23177c478bd9Sstevel@tonic-gate ! the src offset was stored in %o2 23187c478bd9Sstevel@tonic-gate 23197c478bd9Sstevel@tonic-gate 23207c478bd9Sstevel@tonic-gate ! Both Source and Destination are block aligned. 23217c478bd9Sstevel@tonic-gate ! Do fast copy using ASI_BLK_INIT_ST_QUAD_LDD_P 23227c478bd9Sstevel@tonic-gate.blkcpy: 23237c478bd9Sstevel@tonic-gate prefetch [%i1+0x0], #one_read 23247c478bd9Sstevel@tonic-gate1: 23257c478bd9Sstevel@tonic-gate ldda [%i1+0x0]%asi, %l0 23267c478bd9Sstevel@tonic-gate ldda [%i1+0x10]%asi, %l2 23277c478bd9Sstevel@tonic-gate prefetch [%i1+0x40], #one_read 23287c478bd9Sstevel@tonic-gate 23297c478bd9Sstevel@tonic-gate stxa %l0, [%i0+0x0]%asi 23307c478bd9Sstevel@tonic-gate ldda [%i1+0x20]%asi, %l4 23317c478bd9Sstevel@tonic-gate ldda [%i1+0x30]%asi, %l6 23327c478bd9Sstevel@tonic-gate 23337c478bd9Sstevel@tonic-gate stxa %l1, [%i0+0x8]%asi 23347c478bd9Sstevel@tonic-gate stxa %l2, [%i0+0x10]%asi 23357c478bd9Sstevel@tonic-gate stxa %l3, [%i0+0x18]%asi 23367c478bd9Sstevel@tonic-gate stxa %l4, [%i0+0x20]%asi 23377c478bd9Sstevel@tonic-gate stxa %l5, [%i0+0x28]%asi 23387c478bd9Sstevel@tonic-gate stxa %l6, [%i0+0x30]%asi 23397c478bd9Sstevel@tonic-gate stxa %l7, [%i0+0x38]%asi 23407c478bd9Sstevel@tonic-gate 23417c478bd9Sstevel@tonic-gate add %i1, 0x40, %i1 23427c478bd9Sstevel@tonic-gate subcc %i3, 0x40, %i3 23437c478bd9Sstevel@tonic-gate bgu,pt %xcc, 1b 23447c478bd9Sstevel@tonic-gate add %i0, 0x40, %i0 23457c478bd9Sstevel@tonic-gate 23467c478bd9Sstevel@tonic-gate.blkdone: 2347340af271Swh94709 membar #Sync 2348340af271Swh94709 2349340af271Swh94709 brz,pt %i2, .blkexit 2350340af271Swh94709 nop 2351340af271Swh94709 2352340af271Swh94709 ! Handle trailing bytes 2353340af271Swh94709 cmp %i2, 0x8 2354340af271Swh94709 blu,pt %ncc, .residue 2355340af271Swh94709 nop 2356340af271Swh94709 2357340af271Swh94709 ! Can we do some 8B ops 2358340af271Swh94709 or %i1, %i0, %o2 2359340af271Swh94709 andcc %o2, 0x7, %g0 2360340af271Swh94709 bnz %ncc, .last4 2361340af271Swh94709 nop 2362340af271Swh94709 2363340af271Swh94709 ! Do 8byte ops as long as possible 2364340af271Swh94709.last8: 2365340af271Swh94709 ldx [%i1], %o2 2366340af271Swh94709 stx %o2, [%i0] 2367340af271Swh94709 add %i1, 0x8, %i1 2368340af271Swh94709 sub %i2, 0x8, %i2 2369340af271Swh94709 cmp %i2, 0x8 2370340af271Swh94709 bgu,pt %ncc, .last8 2371340af271Swh94709 add %i0, 0x8, %i0 2372340af271Swh94709 2373340af271Swh94709 brz,pt %i2, .blkexit 2374340af271Swh94709 nop 2375340af271Swh94709 2376340af271Swh94709 ba .residue 2377340af271Swh94709 nop 2378340af271Swh94709 2379340af271Swh94709.last4: 2380340af271Swh94709 ! Can we do 4B ops 2381340af271Swh94709 andcc %o2, 0x3, %g0 2382340af271Swh94709 bnz %ncc, .last2 2383340af271Swh94709 nop 2384340af271Swh947091: 2385340af271Swh94709 ld [%i1], %o2 2386340af271Swh94709 st %o2, [%i0] 2387340af271Swh94709 add %i1, 0x4, %i1 2388340af271Swh94709 sub %i2, 0x4, %i2 2389340af271Swh94709 cmp %i2, 0x4 2390340af271Swh94709 bgu,pt %ncc, 1b 2391340af271Swh94709 add %i0, 0x4, %i0 2392340af271Swh94709 2393340af271Swh94709 brz,pt %i2, .blkexit 2394340af271Swh94709 nop 2395340af271Swh94709 2396340af271Swh94709 ba .residue 2397340af271Swh94709 nop 2398340af271Swh94709 2399340af271Swh94709.last2: 2400340af271Swh94709 ! Can we do 2B ops 2401340af271Swh94709 andcc %o2, 0x1, %g0 2402340af271Swh94709 bnz %ncc, .residue 2403340af271Swh94709 nop 2404340af271Swh94709 2405340af271Swh947091: 2406340af271Swh94709 lduh [%i1], %o2 2407340af271Swh94709 stuh %o2, [%i0] 2408340af271Swh94709 add %i1, 0x2, %i1 2409340af271Swh94709 sub %i2, 0x2, %i2 2410340af271Swh94709 cmp %i2, 0x2 2411340af271Swh94709 bgu,pt %ncc, 1b 2412340af271Swh94709 add %i0, 0x2, %i0 2413340af271Swh94709 2414340af271Swh94709 brz,pt %i2, .blkexit 24157c478bd9Sstevel@tonic-gate nop 24167c478bd9Sstevel@tonic-gate 24177c478bd9Sstevel@tonic-gate.residue: 2418340af271Swh94709 ldub [%i1], %o2 2419340af271Swh94709 stb %o2, [%i0] 24207c478bd9Sstevel@tonic-gate inc %i1 24217c478bd9Sstevel@tonic-gate deccc %i2 2422340af271Swh94709 bgu,pt %ncc, .residue 24237c478bd9Sstevel@tonic-gate inc %i0 24247c478bd9Sstevel@tonic-gate 24257c478bd9Sstevel@tonic-gate.blkexit: 2426340af271Swh94709 2427473b13d4Sae112802 membar #Sync ! sync error barrier 2428473b13d4Sae112802 ! Restore t_lofault handler, if came here from kcopy(). 2429473b13d4Sae112802 tst %o5 2430473b13d4Sae112802 bz %ncc, 1f 2431473b13d4Sae112802 andn %o5, LOFAULT_SET, %o5 2432473b13d4Sae112802 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2433473b13d4Sae1128021: 24347c478bd9Sstevel@tonic-gate ret 24357c478bd9Sstevel@tonic-gate restore %g0, 0, %o0 24367c478bd9Sstevel@tonic-gate 2437*280575beSPatrick McGehearty 24387c478bd9Sstevel@tonic-gate.bcb_punt: 24397c478bd9Sstevel@tonic-gate ! 24407c478bd9Sstevel@tonic-gate ! use aligned transfers where possible 24417c478bd9Sstevel@tonic-gate ! 24427c478bd9Sstevel@tonic-gate xor %i0, %i1, %o4 ! xor from and to address 24437c478bd9Sstevel@tonic-gate btst 7, %o4 ! if lower three bits zero 24447c478bd9Sstevel@tonic-gate bz .aldoubcp ! can align on double boundary 24457c478bd9Sstevel@tonic-gate .empty ! assembler complaints about label 24467c478bd9Sstevel@tonic-gate 24477c478bd9Sstevel@tonic-gate xor %i0, %i1, %o4 ! xor from and to address 24487c478bd9Sstevel@tonic-gate btst 3, %o4 ! if lower two bits zero 24497c478bd9Sstevel@tonic-gate bz .alwordcp ! can align on word boundary 24507c478bd9Sstevel@tonic-gate btst 3, %i0 ! delay slot, from address unaligned? 24517c478bd9Sstevel@tonic-gate ! 24527c478bd9Sstevel@tonic-gate ! use aligned reads and writes where possible 24537c478bd9Sstevel@tonic-gate ! this differs from wordcp in that it copes 24547c478bd9Sstevel@tonic-gate ! with odd alignment between source and destnation 24557c478bd9Sstevel@tonic-gate ! using word reads and writes with the proper shifts 24567c478bd9Sstevel@tonic-gate ! in between to align transfers to and from memory 24577c478bd9Sstevel@tonic-gate ! i0 - src address, i1 - dest address, i2 - count 24587c478bd9Sstevel@tonic-gate ! i3, i4 - tmps for used generating complete word 24597c478bd9Sstevel@tonic-gate ! i5 (word to write) 24607c478bd9Sstevel@tonic-gate ! l0 size in bits of upper part of source word (US) 24617c478bd9Sstevel@tonic-gate ! l1 size in bits of lower part of source word (LS = 32 - US) 24627c478bd9Sstevel@tonic-gate ! l2 size in bits of upper part of destination word (UD) 24637c478bd9Sstevel@tonic-gate ! l3 size in bits of lower part of destination word (LD = 32 - UD) 24647c478bd9Sstevel@tonic-gate ! l4 number of bytes leftover after aligned transfers complete 24657c478bd9Sstevel@tonic-gate ! l5 the number 32 24667c478bd9Sstevel@tonic-gate ! 24677c478bd9Sstevel@tonic-gate mov 32, %l5 ! load an oft-needed constant 24687c478bd9Sstevel@tonic-gate bz .align_dst_only 24697c478bd9Sstevel@tonic-gate btst 3, %i1 ! is destnation address aligned? 24707c478bd9Sstevel@tonic-gate clr %i4 ! clear registers used in either case 24717c478bd9Sstevel@tonic-gate bz .align_src_only 24727c478bd9Sstevel@tonic-gate clr %l0 24737c478bd9Sstevel@tonic-gate ! 24747c478bd9Sstevel@tonic-gate ! both source and destination addresses are unaligned 24757c478bd9Sstevel@tonic-gate ! 24767c478bd9Sstevel@tonic-gate1: ! align source 24777c478bd9Sstevel@tonic-gate ldub [%i0], %i3 ! read a byte from source address 24787c478bd9Sstevel@tonic-gate add %i0, 1, %i0 ! increment source address 24797c478bd9Sstevel@tonic-gate or %i4, %i3, %i4 ! or in with previous bytes (if any) 24807c478bd9Sstevel@tonic-gate btst 3, %i0 ! is source aligned? 24817c478bd9Sstevel@tonic-gate add %l0, 8, %l0 ! increment size of upper source (US) 24827c478bd9Sstevel@tonic-gate bnz,a 1b 24837c478bd9Sstevel@tonic-gate sll %i4, 8, %i4 ! make room for next byte 24847c478bd9Sstevel@tonic-gate 24857c478bd9Sstevel@tonic-gate sub %l5, %l0, %l1 ! generate shift left count (LS) 24867c478bd9Sstevel@tonic-gate sll %i4, %l1, %i4 ! prepare to get rest 24877c478bd9Sstevel@tonic-gate ld [%i0], %i3 ! read a word 24887c478bd9Sstevel@tonic-gate add %i0, 4, %i0 ! increment source address 24897c478bd9Sstevel@tonic-gate srl %i3, %l0, %i5 ! upper src bits into lower dst bits 24907c478bd9Sstevel@tonic-gate or %i4, %i5, %i5 ! merge 24917c478bd9Sstevel@tonic-gate mov 24, %l3 ! align destination 24927c478bd9Sstevel@tonic-gate1: 24937c478bd9Sstevel@tonic-gate srl %i5, %l3, %i4 ! prepare to write a single byte 24947c478bd9Sstevel@tonic-gate stb %i4, [%i1] ! write a byte 24957c478bd9Sstevel@tonic-gate add %i1, 1, %i1 ! increment destination address 24967c478bd9Sstevel@tonic-gate sub %i2, 1, %i2 ! decrement count 24977c478bd9Sstevel@tonic-gate btst 3, %i1 ! is destination aligned? 24987c478bd9Sstevel@tonic-gate bnz,a 1b 24997c478bd9Sstevel@tonic-gate sub %l3, 8, %l3 ! delay slot, decrement shift count (LD) 25007c478bd9Sstevel@tonic-gate sub %l5, %l3, %l2 ! generate shift left count (UD) 25017c478bd9Sstevel@tonic-gate sll %i5, %l2, %i5 ! move leftover into upper bytes 25027c478bd9Sstevel@tonic-gate cmp %l2, %l0 ! cmp # reqd to fill dst w old src left 25037c478bd9Sstevel@tonic-gate bgu %ncc, .more_needed ! need more to fill than we have 25047c478bd9Sstevel@tonic-gate nop 25057c478bd9Sstevel@tonic-gate 25067c478bd9Sstevel@tonic-gate sll %i3, %l1, %i3 ! clear upper used byte(s) 25077c478bd9Sstevel@tonic-gate srl %i3, %l1, %i3 25087c478bd9Sstevel@tonic-gate ! get the odd bytes between alignments 25097c478bd9Sstevel@tonic-gate sub %l0, %l2, %l0 ! regenerate shift count 25107c478bd9Sstevel@tonic-gate sub %l5, %l0, %l1 ! generate new shift left count (LS) 25117c478bd9Sstevel@tonic-gate and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0 25127c478bd9Sstevel@tonic-gate andn %i2, 3, %i2 ! # of aligned bytes that can be moved 25137c478bd9Sstevel@tonic-gate srl %i3, %l0, %i4 25147c478bd9Sstevel@tonic-gate or %i5, %i4, %i5 25157c478bd9Sstevel@tonic-gate st %i5, [%i1] ! write a word 25167c478bd9Sstevel@tonic-gate subcc %i2, 4, %i2 ! decrement count 25177c478bd9Sstevel@tonic-gate bz %ncc, .unalign_out 25187c478bd9Sstevel@tonic-gate add %i1, 4, %i1 ! increment destination address 25197c478bd9Sstevel@tonic-gate 25207c478bd9Sstevel@tonic-gate b 2f 25217c478bd9Sstevel@tonic-gate sll %i3, %l1, %i5 ! get leftover into upper bits 25227c478bd9Sstevel@tonic-gate.more_needed: 25237c478bd9Sstevel@tonic-gate sll %i3, %l0, %i3 ! save remaining byte(s) 25247c478bd9Sstevel@tonic-gate srl %i3, %l0, %i3 25257c478bd9Sstevel@tonic-gate sub %l2, %l0, %l1 ! regenerate shift count 25267c478bd9Sstevel@tonic-gate sub %l5, %l1, %l0 ! generate new shift left count 25277c478bd9Sstevel@tonic-gate sll %i3, %l1, %i4 ! move to fill empty space 25287c478bd9Sstevel@tonic-gate b 3f 25297c478bd9Sstevel@tonic-gate or %i5, %i4, %i5 ! merge to complete word 25307c478bd9Sstevel@tonic-gate ! 25317c478bd9Sstevel@tonic-gate ! the source address is aligned and destination is not 25327c478bd9Sstevel@tonic-gate ! 25337c478bd9Sstevel@tonic-gate.align_dst_only: 25347c478bd9Sstevel@tonic-gate ld [%i0], %i4 ! read a word 25357c478bd9Sstevel@tonic-gate add %i0, 4, %i0 ! increment source address 25367c478bd9Sstevel@tonic-gate mov 24, %l0 ! initial shift alignment count 25377c478bd9Sstevel@tonic-gate1: 25387c478bd9Sstevel@tonic-gate srl %i4, %l0, %i3 ! prepare to write a single byte 25397c478bd9Sstevel@tonic-gate stb %i3, [%i1] ! write a byte 25407c478bd9Sstevel@tonic-gate add %i1, 1, %i1 ! increment destination address 25417c478bd9Sstevel@tonic-gate sub %i2, 1, %i2 ! decrement count 25427c478bd9Sstevel@tonic-gate btst 3, %i1 ! is destination aligned? 25437c478bd9Sstevel@tonic-gate bnz,a 1b 25447c478bd9Sstevel@tonic-gate sub %l0, 8, %l0 ! delay slot, decrement shift count 25457c478bd9Sstevel@tonic-gate.xfer: 25467c478bd9Sstevel@tonic-gate sub %l5, %l0, %l1 ! generate shift left count 25477c478bd9Sstevel@tonic-gate sll %i4, %l1, %i5 ! get leftover 25487c478bd9Sstevel@tonic-gate3: 25497c478bd9Sstevel@tonic-gate and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0 25507c478bd9Sstevel@tonic-gate andn %i2, 3, %i2 ! # of aligned bytes that can be moved 25517c478bd9Sstevel@tonic-gate2: 25527c478bd9Sstevel@tonic-gate ld [%i0], %i3 ! read a source word 25537c478bd9Sstevel@tonic-gate add %i0, 4, %i0 ! increment source address 25547c478bd9Sstevel@tonic-gate srl %i3, %l0, %i4 ! upper src bits into lower dst bits 25557c478bd9Sstevel@tonic-gate or %i5, %i4, %i5 ! merge with upper dest bits (leftover) 25567c478bd9Sstevel@tonic-gate st %i5, [%i1] ! write a destination word 25577c478bd9Sstevel@tonic-gate subcc %i2, 4, %i2 ! decrement count 25587c478bd9Sstevel@tonic-gate bz %ncc, .unalign_out ! check if done 25597c478bd9Sstevel@tonic-gate add %i1, 4, %i1 ! increment destination address 25607c478bd9Sstevel@tonic-gate b 2b ! loop 25617c478bd9Sstevel@tonic-gate sll %i3, %l1, %i5 ! get leftover 25627c478bd9Sstevel@tonic-gate.unalign_out: 25637c478bd9Sstevel@tonic-gate tst %l4 ! any bytes leftover? 25647c478bd9Sstevel@tonic-gate bz %ncc, .cpdone 25657c478bd9Sstevel@tonic-gate .empty ! allow next instruction in delay slot 25667c478bd9Sstevel@tonic-gate1: 25677c478bd9Sstevel@tonic-gate sub %l0, 8, %l0 ! decrement shift 25687c478bd9Sstevel@tonic-gate srl %i3, %l0, %i4 ! upper src byte into lower dst byte 25697c478bd9Sstevel@tonic-gate stb %i4, [%i1] ! write a byte 25707c478bd9Sstevel@tonic-gate subcc %l4, 1, %l4 ! decrement count 25717c478bd9Sstevel@tonic-gate bz %ncc, .cpdone ! done? 25727c478bd9Sstevel@tonic-gate add %i1, 1, %i1 ! increment destination 25737c478bd9Sstevel@tonic-gate tst %l0 ! any more previously read bytes 25747c478bd9Sstevel@tonic-gate bnz %ncc, 1b ! we have leftover bytes 25757c478bd9Sstevel@tonic-gate mov %l4, %i2 ! delay slot, mv cnt where dbytecp wants 25767c478bd9Sstevel@tonic-gate b .dbytecp ! let dbytecp do the rest 25777c478bd9Sstevel@tonic-gate sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 25787c478bd9Sstevel@tonic-gate ! 25797c478bd9Sstevel@tonic-gate ! the destination address is aligned and the source is not 25807c478bd9Sstevel@tonic-gate ! 25817c478bd9Sstevel@tonic-gate.align_src_only: 25827c478bd9Sstevel@tonic-gate ldub [%i0], %i3 ! read a byte from source address 25837c478bd9Sstevel@tonic-gate add %i0, 1, %i0 ! increment source address 25847c478bd9Sstevel@tonic-gate or %i4, %i3, %i4 ! or in with previous bytes (if any) 25857c478bd9Sstevel@tonic-gate btst 3, %i0 ! is source aligned? 25867c478bd9Sstevel@tonic-gate add %l0, 8, %l0 ! increment shift count (US) 25877c478bd9Sstevel@tonic-gate bnz,a .align_src_only 25887c478bd9Sstevel@tonic-gate sll %i4, 8, %i4 ! make room for next byte 25897c478bd9Sstevel@tonic-gate b,a .xfer 25907c478bd9Sstevel@tonic-gate ! 25917c478bd9Sstevel@tonic-gate ! if from address unaligned for double-word moves, 25927c478bd9Sstevel@tonic-gate ! move bytes till it is, if count is < 56 it could take 25937c478bd9Sstevel@tonic-gate ! longer to align the thing than to do the transfer 25947c478bd9Sstevel@tonic-gate ! in word size chunks right away 25957c478bd9Sstevel@tonic-gate ! 25967c478bd9Sstevel@tonic-gate.aldoubcp: 25977c478bd9Sstevel@tonic-gate cmp %i2, 56 ! if count < 56, use wordcp, it takes 25987c478bd9Sstevel@tonic-gate blu,a %ncc, .alwordcp ! longer to align doubles than words 25997c478bd9Sstevel@tonic-gate mov 3, %o0 ! mask for word alignment 26007c478bd9Sstevel@tonic-gate call .alignit ! copy bytes until aligned 26017c478bd9Sstevel@tonic-gate mov 7, %o0 ! mask for double alignment 26027c478bd9Sstevel@tonic-gate ! 26037c478bd9Sstevel@tonic-gate ! source and destination are now double-word aligned 26047c478bd9Sstevel@tonic-gate ! i3 has aligned count returned by alignit 26057c478bd9Sstevel@tonic-gate ! 26067c478bd9Sstevel@tonic-gate and %i2, 7, %i2 ! unaligned leftover count 26077c478bd9Sstevel@tonic-gate sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 26087c478bd9Sstevel@tonic-gate5: 26097c478bd9Sstevel@tonic-gate ldx [%i0+%i1], %o4 ! read from address 26107c478bd9Sstevel@tonic-gate stx %o4, [%i1] ! write at destination address 26117c478bd9Sstevel@tonic-gate subcc %i3, 8, %i3 ! dec count 26127c478bd9Sstevel@tonic-gate bgu %ncc, 5b 26137c478bd9Sstevel@tonic-gate add %i1, 8, %i1 ! delay slot, inc to address 26147c478bd9Sstevel@tonic-gate cmp %i2, 4 ! see if we can copy a word 26157c478bd9Sstevel@tonic-gate blu %ncc, .dbytecp ! if 3 or less bytes use bytecp 26167c478bd9Sstevel@tonic-gate .empty 26177c478bd9Sstevel@tonic-gate ! 26187c478bd9Sstevel@tonic-gate ! for leftover bytes we fall into wordcp, if needed 26197c478bd9Sstevel@tonic-gate ! 26207c478bd9Sstevel@tonic-gate.wordcp: 26217c478bd9Sstevel@tonic-gate and %i2, 3, %i2 ! unaligned leftover count 26227c478bd9Sstevel@tonic-gate5: 26237c478bd9Sstevel@tonic-gate ld [%i0+%i1], %o4 ! read from address 26247c478bd9Sstevel@tonic-gate st %o4, [%i1] ! write at destination address 26257c478bd9Sstevel@tonic-gate subcc %i3, 4, %i3 ! dec count 26267c478bd9Sstevel@tonic-gate bgu %ncc, 5b 26277c478bd9Sstevel@tonic-gate add %i1, 4, %i1 ! delay slot, inc to address 26287c478bd9Sstevel@tonic-gate b,a .dbytecp 26297c478bd9Sstevel@tonic-gate 26307c478bd9Sstevel@tonic-gate ! we come here to align copies on word boundaries 26317c478bd9Sstevel@tonic-gate.alwordcp: 26327c478bd9Sstevel@tonic-gate call .alignit ! go word-align it 26337c478bd9Sstevel@tonic-gate mov 3, %o0 ! bits that must be zero to be aligned 26347c478bd9Sstevel@tonic-gate b .wordcp 26357c478bd9Sstevel@tonic-gate sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 26367c478bd9Sstevel@tonic-gate 26377c478bd9Sstevel@tonic-gate ! 26387c478bd9Sstevel@tonic-gate ! byte copy, works with any alignment 26397c478bd9Sstevel@tonic-gate ! 26407c478bd9Sstevel@tonic-gate.bytecp: 26417c478bd9Sstevel@tonic-gate b .dbytecp 26427c478bd9Sstevel@tonic-gate sub %i0, %i1, %i0 ! i0 gets difference of src and dst 26437c478bd9Sstevel@tonic-gate 26447c478bd9Sstevel@tonic-gate ! 26457c478bd9Sstevel@tonic-gate ! differenced byte copy, works with any alignment 26467c478bd9Sstevel@tonic-gate ! assumes dest in %i1 and (source - dest) in %i0 26477c478bd9Sstevel@tonic-gate ! 26487c478bd9Sstevel@tonic-gate1: 26497c478bd9Sstevel@tonic-gate stb %o4, [%i1] ! write to address 26507c478bd9Sstevel@tonic-gate inc %i1 ! inc to address 26517c478bd9Sstevel@tonic-gate.dbytecp: 26527c478bd9Sstevel@tonic-gate deccc %i2 ! dec count 26537c478bd9Sstevel@tonic-gate bgeu,a %ncc, 1b ! loop till done 26547c478bd9Sstevel@tonic-gate ldub [%i0+%i1], %o4 ! read from address 26557c478bd9Sstevel@tonic-gate.cpdone: 2656*280575beSPatrick McGehearty 26577c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 2658473b13d4Sae112802 ! Restore t_lofault handler, if came here from kcopy(). 2659473b13d4Sae112802 tst %o5 2660473b13d4Sae112802 bz %ncc, 1f 2661473b13d4Sae112802 andn %o5, LOFAULT_SET, %o5 2662473b13d4Sae112802 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2663473b13d4Sae1128021: 26647c478bd9Sstevel@tonic-gate ret 26657c478bd9Sstevel@tonic-gate restore %g0, 0, %o0 ! return (0) 26667c478bd9Sstevel@tonic-gate 26677c478bd9Sstevel@tonic-gate/* 26687c478bd9Sstevel@tonic-gate * Common code used to align transfers on word and doubleword 2669*280575beSPatrick McGehearty * boundaries. Aligns source and destination and returns a count 26707c478bd9Sstevel@tonic-gate * of aligned bytes to transfer in %i3 26717c478bd9Sstevel@tonic-gate */ 26727c478bd9Sstevel@tonic-gate1: 26737c478bd9Sstevel@tonic-gate inc %i0 ! inc from 26747c478bd9Sstevel@tonic-gate stb %o4, [%i1] ! write a byte 26757c478bd9Sstevel@tonic-gate inc %i1 ! inc to 26767c478bd9Sstevel@tonic-gate dec %i2 ! dec count 26777c478bd9Sstevel@tonic-gate.alignit: 26787c478bd9Sstevel@tonic-gate btst %o0, %i0 ! %o0 is bit mask to check for alignment 26797c478bd9Sstevel@tonic-gate bnz,a 1b 26807c478bd9Sstevel@tonic-gate ldub [%i0], %o4 ! read next byte 26817c478bd9Sstevel@tonic-gate 26827c478bd9Sstevel@tonic-gate retl 26837c478bd9Sstevel@tonic-gate andn %i2, %o0, %i3 ! return size of aligned bytes 2684*280575beSPatrick McGehearty 26857c478bd9Sstevel@tonic-gate SET_SIZE(bcopy) 26867c478bd9Sstevel@tonic-gate 2687*280575beSPatrick McGehearty#endif /* NIAGARA_IMPL */ 2688*280575beSPatrick McGehearty 26897c478bd9Sstevel@tonic-gate#endif /* lint */ 26907c478bd9Sstevel@tonic-gate 26917c478bd9Sstevel@tonic-gate/* 26927c478bd9Sstevel@tonic-gate * Block copy with possibly overlapped operands. 26937c478bd9Sstevel@tonic-gate */ 26947c478bd9Sstevel@tonic-gate 26957c478bd9Sstevel@tonic-gate#if defined(lint) 26967c478bd9Sstevel@tonic-gate 26977c478bd9Sstevel@tonic-gate/*ARGSUSED*/ 26987c478bd9Sstevel@tonic-gatevoid 26997c478bd9Sstevel@tonic-gateovbcopy(const void *from, void *to, size_t count) 27007c478bd9Sstevel@tonic-gate{} 27017c478bd9Sstevel@tonic-gate 27027c478bd9Sstevel@tonic-gate#else /* lint */ 27037c478bd9Sstevel@tonic-gate 27047c478bd9Sstevel@tonic-gate ENTRY(ovbcopy) 27057c478bd9Sstevel@tonic-gate tst %o2 ! check count 27067c478bd9Sstevel@tonic-gate bgu,a %ncc, 1f ! nothing to do or bad arguments 27077c478bd9Sstevel@tonic-gate subcc %o0, %o1, %o3 ! difference of from and to address 27087c478bd9Sstevel@tonic-gate 27097c478bd9Sstevel@tonic-gate retl ! return 27107c478bd9Sstevel@tonic-gate nop 27117c478bd9Sstevel@tonic-gate1: 27127c478bd9Sstevel@tonic-gate bneg,a %ncc, 2f 27137c478bd9Sstevel@tonic-gate neg %o3 ! if < 0, make it positive 27147c478bd9Sstevel@tonic-gate2: cmp %o2, %o3 ! cmp size and abs(from - to) 27157c478bd9Sstevel@tonic-gate bleu %ncc, bcopy ! if size <= abs(diff): use bcopy, 27167c478bd9Sstevel@tonic-gate .empty ! no overlap 27177c478bd9Sstevel@tonic-gate cmp %o0, %o1 ! compare from and to addresses 27187c478bd9Sstevel@tonic-gate blu %ncc, .ov_bkwd ! if from < to, copy backwards 27197c478bd9Sstevel@tonic-gate nop 27207c478bd9Sstevel@tonic-gate ! 27217c478bd9Sstevel@tonic-gate ! Copy forwards. 27227c478bd9Sstevel@tonic-gate ! 27237c478bd9Sstevel@tonic-gate.ov_fwd: 27247c478bd9Sstevel@tonic-gate ldub [%o0], %o3 ! read from address 27257c478bd9Sstevel@tonic-gate inc %o0 ! inc from address 27267c478bd9Sstevel@tonic-gate stb %o3, [%o1] ! write to address 27277c478bd9Sstevel@tonic-gate deccc %o2 ! dec count 27287c478bd9Sstevel@tonic-gate bgu %ncc, .ov_fwd ! loop till done 27297c478bd9Sstevel@tonic-gate inc %o1 ! inc to address 27307c478bd9Sstevel@tonic-gate 27317c478bd9Sstevel@tonic-gate retl ! return 27327c478bd9Sstevel@tonic-gate nop 27337c478bd9Sstevel@tonic-gate ! 27347c478bd9Sstevel@tonic-gate ! Copy backwards. 27357c478bd9Sstevel@tonic-gate ! 27367c478bd9Sstevel@tonic-gate.ov_bkwd: 27377c478bd9Sstevel@tonic-gate deccc %o2 ! dec count 27387c478bd9Sstevel@tonic-gate ldub [%o0 + %o2], %o3 ! get byte at end of src 27397c478bd9Sstevel@tonic-gate bgu %ncc, .ov_bkwd ! loop till done 27407c478bd9Sstevel@tonic-gate stb %o3, [%o1 + %o2] ! delay slot, store at end of dst 27417c478bd9Sstevel@tonic-gate 27427c478bd9Sstevel@tonic-gate retl ! return 27437c478bd9Sstevel@tonic-gate nop 27447c478bd9Sstevel@tonic-gate SET_SIZE(ovbcopy) 27457c478bd9Sstevel@tonic-gate 27467c478bd9Sstevel@tonic-gate#endif /* lint */ 27477c478bd9Sstevel@tonic-gate 27487c478bd9Sstevel@tonic-gate/* 27497c478bd9Sstevel@tonic-gate * hwblkpagecopy() 27507c478bd9Sstevel@tonic-gate * 27517c478bd9Sstevel@tonic-gate * Copies exactly one page. This routine assumes the caller (ppcopy) 27527c478bd9Sstevel@tonic-gate * has already disabled kernel preemption and has checked 27537c478bd9Sstevel@tonic-gate * use_hw_bcopy. 27547c478bd9Sstevel@tonic-gate */ 27557c478bd9Sstevel@tonic-gate#ifdef lint 27567c478bd9Sstevel@tonic-gate/*ARGSUSED*/ 27577c478bd9Sstevel@tonic-gatevoid 27587c478bd9Sstevel@tonic-gatehwblkpagecopy(const void *src, void *dst) 27597c478bd9Sstevel@tonic-gate{ } 27607c478bd9Sstevel@tonic-gate#else /* lint */ 27617c478bd9Sstevel@tonic-gate ENTRY(hwblkpagecopy) 2762340af271Swh94709 save %sp, -SA(MINFRAME), %sp 27637c478bd9Sstevel@tonic-gate 27647c478bd9Sstevel@tonic-gate ! %i0 - source address (arg) 27657c478bd9Sstevel@tonic-gate ! %i1 - destination address (arg) 27667c478bd9Sstevel@tonic-gate ! %i2 - length of region (not arg) 27677c478bd9Sstevel@tonic-gate 27687c478bd9Sstevel@tonic-gate set PAGESIZE, %i2 27697c478bd9Sstevel@tonic-gate 27707c478bd9Sstevel@tonic-gate /* 27717c478bd9Sstevel@tonic-gate * Copying exactly one page and PAGESIZE is in mutliple of 0x80. 27727c478bd9Sstevel@tonic-gate */ 27737c478bd9Sstevel@tonic-gate mov ASI_BLK_INIT_ST_QUAD_LDD_P, %asi 27747c478bd9Sstevel@tonic-gate prefetch [%i0+0x0], #one_read 27757c478bd9Sstevel@tonic-gate prefetch [%i0+0x40], #one_read 27767c478bd9Sstevel@tonic-gate1: 27777c478bd9Sstevel@tonic-gate prefetch [%i0+0x80], #one_read 27787c478bd9Sstevel@tonic-gate prefetch [%i0+0xc0], #one_read 27797c478bd9Sstevel@tonic-gate ldda [%i0+0x0]%asi, %l0 27807c478bd9Sstevel@tonic-gate ldda [%i0+0x10]%asi, %l2 27817c478bd9Sstevel@tonic-gate ldda [%i0+0x20]%asi, %l4 27827c478bd9Sstevel@tonic-gate ldda [%i0+0x30]%asi, %l6 27837c478bd9Sstevel@tonic-gate stxa %l0, [%i1+0x0]%asi 27847c478bd9Sstevel@tonic-gate stxa %l1, [%i1+0x8]%asi 27857c478bd9Sstevel@tonic-gate stxa %l2, [%i1+0x10]%asi 27867c478bd9Sstevel@tonic-gate stxa %l3, [%i1+0x18]%asi 27877c478bd9Sstevel@tonic-gate stxa %l4, [%i1+0x20]%asi 27887c478bd9Sstevel@tonic-gate stxa %l5, [%i1+0x28]%asi 27897c478bd9Sstevel@tonic-gate stxa %l6, [%i1+0x30]%asi 27907c478bd9Sstevel@tonic-gate stxa %l7, [%i1+0x38]%asi 27917c478bd9Sstevel@tonic-gate ldda [%i0+0x40]%asi, %l0 27927c478bd9Sstevel@tonic-gate ldda [%i0+0x50]%asi, %l2 27937c478bd9Sstevel@tonic-gate ldda [%i0+0x60]%asi, %l4 27947c478bd9Sstevel@tonic-gate ldda [%i0+0x70]%asi, %l6 27957c478bd9Sstevel@tonic-gate stxa %l0, [%i1+0x40]%asi 27967c478bd9Sstevel@tonic-gate stxa %l1, [%i1+0x48]%asi 27977c478bd9Sstevel@tonic-gate stxa %l2, [%i1+0x50]%asi 27987c478bd9Sstevel@tonic-gate stxa %l3, [%i1+0x58]%asi 27997c478bd9Sstevel@tonic-gate stxa %l4, [%i1+0x60]%asi 28007c478bd9Sstevel@tonic-gate stxa %l5, [%i1+0x68]%asi 28017c478bd9Sstevel@tonic-gate stxa %l6, [%i1+0x70]%asi 28027c478bd9Sstevel@tonic-gate stxa %l7, [%i1+0x78]%asi 28037c478bd9Sstevel@tonic-gate 28047c478bd9Sstevel@tonic-gate add %i0, 0x80, %i0 28057c478bd9Sstevel@tonic-gate subcc %i2, 0x80, %i2 28067c478bd9Sstevel@tonic-gate bgu,pt %xcc, 1b 28077c478bd9Sstevel@tonic-gate add %i1, 0x80, %i1 28087c478bd9Sstevel@tonic-gate 28097c478bd9Sstevel@tonic-gate membar #Sync 28107c478bd9Sstevel@tonic-gate ret 28117c478bd9Sstevel@tonic-gate restore %g0, 0, %o0 28127c478bd9Sstevel@tonic-gate SET_SIZE(hwblkpagecopy) 28137c478bd9Sstevel@tonic-gate#endif /* lint */ 28147c478bd9Sstevel@tonic-gate 28157c478bd9Sstevel@tonic-gate 28167c478bd9Sstevel@tonic-gate/* 28177c478bd9Sstevel@tonic-gate * Transfer data to and from user space - 28187c478bd9Sstevel@tonic-gate * Note that these routines can cause faults 28197c478bd9Sstevel@tonic-gate * It is assumed that the kernel has nothing at 28207c478bd9Sstevel@tonic-gate * less than KERNELBASE in the virtual address space. 28217c478bd9Sstevel@tonic-gate * 28227c478bd9Sstevel@tonic-gate * Note that copyin(9F) and copyout(9F) are part of the 28237c478bd9Sstevel@tonic-gate * DDI/DKI which specifies that they return '-1' on "errors." 28247c478bd9Sstevel@tonic-gate * 28257c478bd9Sstevel@tonic-gate * Sigh. 28267c478bd9Sstevel@tonic-gate * 28277c478bd9Sstevel@tonic-gate * So there's two extremely similar routines - xcopyin() and xcopyout() 28287c478bd9Sstevel@tonic-gate * which return the errno that we've faithfully computed. This 28297c478bd9Sstevel@tonic-gate * allows other callers (e.g. uiomove(9F)) to work correctly. 28307c478bd9Sstevel@tonic-gate * Given that these are used pretty heavily, we expand the calling 28317c478bd9Sstevel@tonic-gate * sequences inline for all flavours (rather than making wrappers). 28327c478bd9Sstevel@tonic-gate * 28337c478bd9Sstevel@tonic-gate * There are also stub routines for xcopyout_little and xcopyin_little, 28347c478bd9Sstevel@tonic-gate * which currently are intended to handle requests of <= 16 bytes from 28357c478bd9Sstevel@tonic-gate * do_unaligned. Future enhancement to make them handle 8k pages efficiently 28367c478bd9Sstevel@tonic-gate * is left as an exercise... 28377c478bd9Sstevel@tonic-gate */ 28387c478bd9Sstevel@tonic-gate 28397c478bd9Sstevel@tonic-gate/* 28407c478bd9Sstevel@tonic-gate * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr) 28417c478bd9Sstevel@tonic-gate * 28427c478bd9Sstevel@tonic-gate * General theory of operation: 28437c478bd9Sstevel@tonic-gate * 28447c478bd9Sstevel@tonic-gate * None of the copyops routines grab a window until it's decided that 28457c478bd9Sstevel@tonic-gate * we need to do a HW block copy operation. This saves a window 28467c478bd9Sstevel@tonic-gate * spill/fill when we're called during socket ops. The typical IO 28477c478bd9Sstevel@tonic-gate * path won't cause spill/fill traps. 28487c478bd9Sstevel@tonic-gate * 28497c478bd9Sstevel@tonic-gate * This code uses a set of 4 limits for the maximum size that will 28507c478bd9Sstevel@tonic-gate * be copied given a particular input/output address alignment. 28517c478bd9Sstevel@tonic-gate * the default limits are: 28527c478bd9Sstevel@tonic-gate * 28537c478bd9Sstevel@tonic-gate * single byte aligned - 256 (hw_copy_limit_1) 28547c478bd9Sstevel@tonic-gate * two byte aligned - 512 (hw_copy_limit_2) 28557c478bd9Sstevel@tonic-gate * four byte aligned - 1024 (hw_copy_limit_4) 28567c478bd9Sstevel@tonic-gate * eight byte aligned - 1024 (hw_copy_limit_8) 28577c478bd9Sstevel@tonic-gate * 28587c478bd9Sstevel@tonic-gate * If the value for a particular limit is zero, the copy will be done 28597c478bd9Sstevel@tonic-gate * via the copy loops rather than block store/quad load instructions. 28607c478bd9Sstevel@tonic-gate * 28617c478bd9Sstevel@tonic-gate * Flow: 28627c478bd9Sstevel@tonic-gate * 28637c478bd9Sstevel@tonic-gate * If count == zero return zero. 28647c478bd9Sstevel@tonic-gate * 28657c478bd9Sstevel@tonic-gate * Store the previous lo_fault handler into %g6. 28667c478bd9Sstevel@tonic-gate * Place our secondary lofault handler into %g5. 28677c478bd9Sstevel@tonic-gate * Place the address of our nowindow fault handler into %o3. 28687c478bd9Sstevel@tonic-gate * Place the address of the windowed fault handler into %o4. 28697c478bd9Sstevel@tonic-gate * --> We'll use this handler if we end up grabbing a window 28707c478bd9Sstevel@tonic-gate * --> before we use block initializing store and quad load ASIs 28717c478bd9Sstevel@tonic-gate * 28727c478bd9Sstevel@tonic-gate * If count is less than or equal to SMALL_LIMIT (7) we 28737c478bd9Sstevel@tonic-gate * always do a byte for byte copy. 28747c478bd9Sstevel@tonic-gate * 28757c478bd9Sstevel@tonic-gate * If count is > SMALL_LIMIT, we check the alignment of the input 28767c478bd9Sstevel@tonic-gate * and output pointers. Based on the alignment we check count 28777c478bd9Sstevel@tonic-gate * against a limit based on detected alignment. If we exceed the 28787c478bd9Sstevel@tonic-gate * alignment value we copy via block initializing store and quad 28797c478bd9Sstevel@tonic-gate * load instructions. 28807c478bd9Sstevel@tonic-gate * 28817c478bd9Sstevel@tonic-gate * If we don't exceed one of the limits, we store -count in %o3, 28827c478bd9Sstevel@tonic-gate * we store the number of chunks (8, 4, 2 or 1 byte) operated 28837c478bd9Sstevel@tonic-gate * on in our basic copy loop in %o2. Following this we branch 28847c478bd9Sstevel@tonic-gate * to the appropriate copy loop and copy that many chunks. 28857c478bd9Sstevel@tonic-gate * Since we've been adding the chunk size to %o3 each time through 28867c478bd9Sstevel@tonic-gate * as well as decrementing %o2, we can tell if any data is 28877c478bd9Sstevel@tonic-gate * is left to be copied by examining %o3. If that is zero, we're 28887c478bd9Sstevel@tonic-gate * done and can go home. If not, we figure out what the largest 28897c478bd9Sstevel@tonic-gate * chunk size left to be copied is and branch to that copy loop 28907c478bd9Sstevel@tonic-gate * unless there's only one byte left. We load that as we're 28917c478bd9Sstevel@tonic-gate * branching to code that stores it just before we return. 28927c478bd9Sstevel@tonic-gate * 28937c478bd9Sstevel@tonic-gate * Fault handlers are invoked if we reference memory that has no 28947c478bd9Sstevel@tonic-gate * current mapping. All forms share the same copyio_fault handler. 28957c478bd9Sstevel@tonic-gate * This routine handles fixing up the stack and general housecleaning. 28967c478bd9Sstevel@tonic-gate * Each copy operation has a simple fault handler that is then called 28977c478bd9Sstevel@tonic-gate * to do the work specific to the invidual operation. The handler 28987c478bd9Sstevel@tonic-gate * for copyOP and xcopyOP are found at the end of individual function. 28997c478bd9Sstevel@tonic-gate * The handlers for xcopyOP_little are found at the end of xcopyin_little. 29007c478bd9Sstevel@tonic-gate * The handlers for copyOP_noerr are found at the end of copyin_noerr. 29017c478bd9Sstevel@tonic-gate */ 29027c478bd9Sstevel@tonic-gate 29037c478bd9Sstevel@tonic-gate/* 29047c478bd9Sstevel@tonic-gate * Copy kernel data to user space (copyout/xcopyout/xcopyout_little). 29057c478bd9Sstevel@tonic-gate */ 29067c478bd9Sstevel@tonic-gate 29077c478bd9Sstevel@tonic-gate#if defined(lint) 29087c478bd9Sstevel@tonic-gate 29097c478bd9Sstevel@tonic-gate/*ARGSUSED*/ 29107c478bd9Sstevel@tonic-gateint 29117c478bd9Sstevel@tonic-gatecopyout(const void *kaddr, void *uaddr, size_t count) 29127c478bd9Sstevel@tonic-gate{ return (0); } 29137c478bd9Sstevel@tonic-gate 29147c478bd9Sstevel@tonic-gate#else /* lint */ 29157c478bd9Sstevel@tonic-gate 29167c478bd9Sstevel@tonic-gate/* 29177c478bd9Sstevel@tonic-gate * We save the arguments in the following registers in case of a fault: 29187c478bd9Sstevel@tonic-gate * kaddr - %g2 29197c478bd9Sstevel@tonic-gate * uaddr - %g3 29207c478bd9Sstevel@tonic-gate * count - %g4 29217c478bd9Sstevel@tonic-gate */ 29227c478bd9Sstevel@tonic-gate#define SAVE_SRC %g2 29237c478bd9Sstevel@tonic-gate#define SAVE_DST %g3 29247c478bd9Sstevel@tonic-gate#define SAVE_COUNT %g4 29257c478bd9Sstevel@tonic-gate 29267c478bd9Sstevel@tonic-gate#define REAL_LOFAULT %g5 29277c478bd9Sstevel@tonic-gate#define SAVED_LOFAULT %g6 29287c478bd9Sstevel@tonic-gate 29297c478bd9Sstevel@tonic-gate/* 29307c478bd9Sstevel@tonic-gate * Generic copyio fault handler. This is the first line of defense when a 29317c478bd9Sstevel@tonic-gate * fault occurs in (x)copyin/(x)copyout. In order for this to function 29327c478bd9Sstevel@tonic-gate * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT. 29337c478bd9Sstevel@tonic-gate * This allows us to share common code for all the flavors of the copy 29347c478bd9Sstevel@tonic-gate * operations, including the _noerr versions. 29357c478bd9Sstevel@tonic-gate * 29367c478bd9Sstevel@tonic-gate * Note that this function will restore the original input parameters before 29377c478bd9Sstevel@tonic-gate * calling REAL_LOFAULT. So the real handler can vector to the appropriate 29387c478bd9Sstevel@tonic-gate * member of the t_copyop structure, if needed. 29397c478bd9Sstevel@tonic-gate */ 29407c478bd9Sstevel@tonic-gate ENTRY(copyio_fault) 2941340af271Swh94709#if !defined(NIAGARA_IMPL) 2942340af271Swh94709 btst FPUSED_FLAG, SAVED_LOFAULT 2943340af271Swh94709 bz 1f 2944340af271Swh94709 andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 2945340af271Swh94709 2946*280575beSPatrick McGehearty wr %l5, 0, %gsr ! restore gsr 2947340af271Swh94709 2948*280575beSPatrick McGehearty btst FPRS_FEF, %g1 2949340af271Swh94709 bz %icc, 4f 2950340af271Swh94709 nop 2951340af271Swh94709 2952340af271Swh94709 ! restore fpregs from stack 2953340af271Swh94709 BLD_FP_FROMSTACK(%o2) 2954340af271Swh94709 2955340af271Swh94709 ba,pt %ncc, 1f 2956*280575beSPatrick McGehearty nop 2957340af271Swh947094: 2958340af271Swh94709 FZERO ! zero all of the fpregs 2959*280575beSPatrick McGehearty wr %g1, %g0, %fprs ! restore fprs 2960340af271Swh947091: 29617c478bd9Sstevel@tonic-gate restore 29627c478bd9Sstevel@tonic-gate mov SAVE_SRC, %o0 29637c478bd9Sstevel@tonic-gate mov SAVE_DST, %o1 29647c478bd9Sstevel@tonic-gate jmp REAL_LOFAULT 29657c478bd9Sstevel@tonic-gate mov SAVE_COUNT, %o2 2966*280575beSPatrick McGehearty 2967*280575beSPatrick McGehearty#else /* NIAGARA_IMPL */ 2968*280575beSPatrick McGehearty membar #Sync 2969*280575beSPatrick McGehearty stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2970*280575beSPatrick McGehearty restore 2971*280575beSPatrick McGehearty mov SAVE_SRC, %o0 2972*280575beSPatrick McGehearty mov SAVE_DST, %o1 2973*280575beSPatrick McGehearty jmp REAL_LOFAULT 2974*280575beSPatrick McGehearty mov SAVE_COUNT, %o2 2975*280575beSPatrick McGehearty 2976*280575beSPatrick McGehearty#endif /* NIAGARA_IMPL */ 2977*280575beSPatrick McGehearty 29787c478bd9Sstevel@tonic-gate SET_SIZE(copyio_fault) 29797c478bd9Sstevel@tonic-gate 29807c478bd9Sstevel@tonic-gate ENTRY(copyio_fault_nowindow) 29817c478bd9Sstevel@tonic-gate membar #Sync 29827c478bd9Sstevel@tonic-gate stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 29837c478bd9Sstevel@tonic-gate 29847c478bd9Sstevel@tonic-gate mov SAVE_SRC, %o0 29857c478bd9Sstevel@tonic-gate mov SAVE_DST, %o1 29867c478bd9Sstevel@tonic-gate jmp REAL_LOFAULT 29877c478bd9Sstevel@tonic-gate mov SAVE_COUNT, %o2 29887c478bd9Sstevel@tonic-gate SET_SIZE(copyio_fault_nowindow) 29897c478bd9Sstevel@tonic-gate 29907c478bd9Sstevel@tonic-gate ENTRY(copyout) 29917c478bd9Sstevel@tonic-gate sethi %hi(.copyout_err), REAL_LOFAULT 29927c478bd9Sstevel@tonic-gate or REAL_LOFAULT, %lo(.copyout_err), REAL_LOFAULT 29937c478bd9Sstevel@tonic-gate 2994*280575beSPatrick McGehearty#if !defined(NIAGARA_IMPL) 2995*280575beSPatrick McGehearty.do_copyout: 2996*280575beSPatrick McGehearty tst %o2 ! check for zero count; quick exit 2997*280575beSPatrick McGehearty bz,pt %ncc, .co_smallqx 2998*280575beSPatrick McGehearty mov %o0, SAVE_SRC 2999*280575beSPatrick McGehearty mov %o1, SAVE_DST 3000*280575beSPatrick McGehearty mov %o2, SAVE_COUNT 3001*280575beSPatrick McGehearty cmp %o2, FP_COPY ! check for small copy/leaf case 3002*280575beSPatrick McGehearty bgt,pt %ncc, .co_copy_more 3003*280575beSPatrick McGehearty ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT 3004*280575beSPatrick McGehearty/* 3005*280575beSPatrick McGehearty * Small copy out code 3006*280575beSPatrick McGehearty * 3007*280575beSPatrick McGehearty */ 3008*280575beSPatrick McGehearty sethi %hi(copyio_fault_nowindow), %o3 3009*280575beSPatrick McGehearty or %o3, %lo(copyio_fault_nowindow), %o3 3010*280575beSPatrick McGehearty membar #Sync 3011*280575beSPatrick McGehearty stn %o3, [THREAD_REG + T_LOFAULT] 3012*280575beSPatrick McGehearty 3013*280575beSPatrick McGehearty mov ASI_USER, %asi 3014*280575beSPatrick McGehearty cmp %o2, SHORTCOPY ! make sure there is enough to align 3015*280575beSPatrick McGehearty ble,pt %ncc, .co_smallest 3016*280575beSPatrick McGehearty andcc %o1, 0x7, %o3 ! is dest long word aligned 3017*280575beSPatrick McGehearty bnz,pn %ncc, .co_align 3018*280575beSPatrick McGehearty andcc %o1, 1, %o3 ! is dest byte aligned 3019*280575beSPatrick McGehearty 3020*280575beSPatrick McGehearty! Destination is long word aligned 3021*280575beSPatrick McGehearty! 8 cases for src alignment; load parts, store long words 3022*280575beSPatrick McGehearty.co_al_src: 3023*280575beSPatrick McGehearty andcc %o0, 7, %o3 3024*280575beSPatrick McGehearty brnz,pt %o3, .co_src_dst_unal8 3025*280575beSPatrick McGehearty nop 3026*280575beSPatrick McGehearty/* 3027*280575beSPatrick McGehearty * Special case for handling when src and dest are both long word aligned 3028*280575beSPatrick McGehearty * and total data to move is less than FP_COPY bytes 3029*280575beSPatrick McGehearty * Also handles finish up for large block moves, so may be less than 32 bytes 3030*280575beSPatrick McGehearty */ 3031*280575beSPatrick McGehearty.co_medlong: 3032*280575beSPatrick McGehearty subcc %o2, 31, %o2 ! adjust length to allow cc test 3033*280575beSPatrick McGehearty ble,pt %ncc, .co_medl31 3034*280575beSPatrick McGehearty nop 3035*280575beSPatrick McGehearty.co_medl32: 3036*280575beSPatrick McGehearty ldx [%o0], %o4 ! move 32 bytes 3037*280575beSPatrick McGehearty subcc %o2, 32, %o2 ! decrement length count by 32 3038*280575beSPatrick McGehearty stxa %o4, [%o1]%asi 3039*280575beSPatrick McGehearty ldx [%o0+8], %o4 3040*280575beSPatrick McGehearty stxa %o4, [%o1+8]%asi 3041*280575beSPatrick McGehearty ldx [%o0+16], %o4 3042*280575beSPatrick McGehearty add %o0, 32, %o0 ! increase src ptr by 32 3043*280575beSPatrick McGehearty stxa %o4, [%o1+16]%asi 3044*280575beSPatrick McGehearty ldx [%o0-8], %o4 3045*280575beSPatrick McGehearty add %o1, 32, %o1 ! increase dst ptr by 32 3046*280575beSPatrick McGehearty bgu,pt %ncc, .co_medl32 ! repeat if at least 32 bytes left 3047*280575beSPatrick McGehearty stxa %o4, [%o1-8]%asi 3048*280575beSPatrick McGehearty.co_medl31: 3049*280575beSPatrick McGehearty addcc %o2, 24, %o2 ! adjust count to be off by 7 3050*280575beSPatrick McGehearty ble,pt %ncc, .co_medl7 ! skip if 7 or fewer bytes left 3051*280575beSPatrick McGehearty nop 3052*280575beSPatrick McGehearty.co_medl8: 3053*280575beSPatrick McGehearty ldx [%o0], %o4 ! move 8 bytes 3054*280575beSPatrick McGehearty add %o0, 8, %o0 ! increase src ptr by 8 3055*280575beSPatrick McGehearty subcc %o2, 8, %o2 ! decrease count by 8 3056*280575beSPatrick McGehearty add %o1, 8, %o1 ! increase dst ptr by 8 3057*280575beSPatrick McGehearty bgu,pt %ncc, .co_medl8 3058*280575beSPatrick McGehearty stxa %o4, [%o1-8]%asi 3059*280575beSPatrick McGehearty.co_medl7: 3060*280575beSPatrick McGehearty addcc %o2, 7, %o2 ! finish adjustment of remaining count 3061*280575beSPatrick McGehearty bnz,pt %ncc, .co_small4 ! do final bytes if not finished 3062*280575beSPatrick McGehearty 3063*280575beSPatrick McGehearty.co_smallx: ! finish up and exit 3064*280575beSPatrick McGehearty membar #Sync 3065*280575beSPatrick McGehearty stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 3066*280575beSPatrick McGehearty.co_smallqx: 3067*280575beSPatrick McGehearty retl 3068*280575beSPatrick McGehearty mov %g0, %o0 3069*280575beSPatrick McGehearty 3070*280575beSPatrick McGehearty.co_small4: 3071*280575beSPatrick McGehearty cmp %o2, 4 3072*280575beSPatrick McGehearty blt,pt %ncc, .co_small3x ! skip if less than 4 bytes left 3073*280575beSPatrick McGehearty nop ! 3074*280575beSPatrick McGehearty ld [%o0], %o4 ! move 4 bytes 3075*280575beSPatrick McGehearty add %o0, 4, %o0 ! increase src ptr by 4 3076*280575beSPatrick McGehearty add %o1, 4, %o1 ! increase dst ptr by 4 3077*280575beSPatrick McGehearty subcc %o2, 4, %o2 ! decrease count by 4 3078*280575beSPatrick McGehearty bz,pt %ncc, .co_smallx 3079*280575beSPatrick McGehearty stwa %o4, [%o1-4]%asi 3080*280575beSPatrick McGehearty 3081*280575beSPatrick McGehearty.co_small3x: ! Exactly 1, 2, or 3 bytes remain 3082*280575beSPatrick McGehearty subcc %o2, 1, %o2 ! reduce count for cc test 3083*280575beSPatrick McGehearty ldub [%o0], %o4 ! load one byte 3084*280575beSPatrick McGehearty bz,pt %ncc, .co_smallx 3085*280575beSPatrick McGehearty stba %o4, [%o1]%asi ! store one byte 3086*280575beSPatrick McGehearty ldub [%o0+1], %o4 ! load second byte 3087*280575beSPatrick McGehearty subcc %o2, 1, %o2 3088*280575beSPatrick McGehearty bz,pt %ncc, .co_smallx 3089*280575beSPatrick McGehearty stba %o4, [%o1+1]%asi ! store second byte 3090*280575beSPatrick McGehearty ldub [%o0+2], %o4 ! load third byte 3091*280575beSPatrick McGehearty ba .co_smallx 3092*280575beSPatrick McGehearty stba %o4, [%o1+2]%asi ! store third byte 3093*280575beSPatrick McGehearty 3094*280575beSPatrick McGehearty.co_smallest: ! 7 or fewer bytes remain 3095*280575beSPatrick McGehearty cmp %o2, 4 3096*280575beSPatrick McGehearty blt,pt %ncc, .co_small3x 3097*280575beSPatrick McGehearty nop 3098*280575beSPatrick McGehearty ldub [%o0], %o4 ! read byte 3099*280575beSPatrick McGehearty subcc %o2, 4, %o2 ! reduce count by 4 3100*280575beSPatrick McGehearty stba %o4, [%o1]%asi ! write byte 3101*280575beSPatrick McGehearty ldub [%o0+1], %o4 ! repeat for total of 4 bytes 3102*280575beSPatrick McGehearty add %o0, 4, %o0 ! advance src by 4 3103*280575beSPatrick McGehearty stba %o4, [%o1+1]%asi 3104*280575beSPatrick McGehearty ldub [%o0-2], %o4 3105*280575beSPatrick McGehearty add %o1, 4, %o1 ! advance dst by 4 3106*280575beSPatrick McGehearty stba %o4, [%o1-2]%asi 3107*280575beSPatrick McGehearty ldub [%o0-1], %o4 3108*280575beSPatrick McGehearty bnz,pt %ncc, .co_small3x 3109*280575beSPatrick McGehearty stba %o4, [%o1-1]%asi 3110*280575beSPatrick McGehearty membar #Sync 3111*280575beSPatrick McGehearty stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 3112*280575beSPatrick McGehearty retl 3113*280575beSPatrick McGehearty mov %g0, %o0 3114*280575beSPatrick McGehearty 3115*280575beSPatrick McGehearty.co_align: ! byte align test in prior branch delay 3116*280575beSPatrick McGehearty bnz,pt %ncc, .co_al_d1 3117*280575beSPatrick McGehearty.co_al_d1f: ! dest is now half word aligned 3118*280575beSPatrick McGehearty andcc %o1, 2, %o3 3119*280575beSPatrick McGehearty bnz,pt %ncc, .co_al_d2 3120*280575beSPatrick McGehearty.co_al_d2f: ! dest is now word aligned 3121*280575beSPatrick McGehearty andcc %o1, 4, %o3 ! is dest longword aligned? 3122*280575beSPatrick McGehearty bz,pt %ncc, .co_al_src 3123*280575beSPatrick McGehearty nop 3124*280575beSPatrick McGehearty.co_al_d4: ! dest is word aligned; src is unknown 3125*280575beSPatrick McGehearty ldub [%o0], %o4 ! move a word (src align unknown) 3126*280575beSPatrick McGehearty ldub [%o0+1], %o3 3127*280575beSPatrick McGehearty sll %o4, 24, %o4 ! position 3128*280575beSPatrick McGehearty sll %o3, 16, %o3 ! position 3129*280575beSPatrick McGehearty or %o4, %o3, %o3 ! merge 3130*280575beSPatrick McGehearty ldub [%o0+2], %o4 3131*280575beSPatrick McGehearty sll %o4, 8, %o4 ! position 3132*280575beSPatrick McGehearty or %o4, %o3, %o3 ! merge 3133*280575beSPatrick McGehearty ldub [%o0+3], %o4 3134*280575beSPatrick McGehearty or %o4, %o3, %o4 ! merge 3135*280575beSPatrick McGehearty stwa %o4,[%o1]%asi ! store four bytes 3136*280575beSPatrick McGehearty add %o0, 4, %o0 ! adjust src by 4 3137*280575beSPatrick McGehearty add %o1, 4, %o1 ! adjust dest by 4 3138*280575beSPatrick McGehearty sub %o2, 4, %o2 ! adjust count by 4 3139*280575beSPatrick McGehearty andcc %o0, 7, %o3 ! check for src long word alignment 3140*280575beSPatrick McGehearty brz,pt %o3, .co_medlong 3141*280575beSPatrick McGehearty.co_src_dst_unal8: 3142*280575beSPatrick McGehearty ! dst is 8-byte aligned, src is not 3143*280575beSPatrick McGehearty ! Size is less than FP_COPY 3144*280575beSPatrick McGehearty ! Following code is to select for alignment 3145*280575beSPatrick McGehearty andcc %o0, 0x3, %o3 ! test word alignment 3146*280575beSPatrick McGehearty bz,pt %ncc, .co_medword 3147*280575beSPatrick McGehearty nop 3148*280575beSPatrick McGehearty andcc %o0, 0x1, %o3 ! test halfword alignment 3149*280575beSPatrick McGehearty bnz,pt %ncc, .co_med_byte ! go to byte move if not halfword 3150*280575beSPatrick McGehearty andcc %o0, 0x2, %o3 ! test which byte alignment 3151*280575beSPatrick McGehearty ba .co_medhalf 3152*280575beSPatrick McGehearty nop 3153*280575beSPatrick McGehearty.co_al_d1: ! align dest to half word 3154*280575beSPatrick McGehearty ldub [%o0], %o4 ! move a byte 3155*280575beSPatrick McGehearty add %o0, 1, %o0 3156*280575beSPatrick McGehearty stba %o4, [%o1]%asi 3157*280575beSPatrick McGehearty add %o1, 1, %o1 3158*280575beSPatrick McGehearty andcc %o1, 2, %o3 3159*280575beSPatrick McGehearty bz,pt %ncc, .co_al_d2f 3160*280575beSPatrick McGehearty sub %o2, 1, %o2 3161*280575beSPatrick McGehearty.co_al_d2: ! align dest to word 3162*280575beSPatrick McGehearty ldub [%o0], %o4 ! move a half-word (src align unknown) 3163*280575beSPatrick McGehearty ldub [%o0+1], %o3 3164*280575beSPatrick McGehearty sll %o4, 8, %o4 ! position 3165*280575beSPatrick McGehearty or %o4, %o3, %o4 ! merge 3166*280575beSPatrick McGehearty stha %o4, [%o1]%asi 3167*280575beSPatrick McGehearty add %o0, 2, %o0 3168*280575beSPatrick McGehearty add %o1, 2, %o1 3169*280575beSPatrick McGehearty andcc %o1, 4, %o3 ! is dest longword aligned? 3170*280575beSPatrick McGehearty bz,pt %ncc, .co_al_src 3171*280575beSPatrick McGehearty sub %o2, 2, %o2 3172*280575beSPatrick McGehearty ba .co_al_d4 3173*280575beSPatrick McGehearty nop 3174*280575beSPatrick McGehearty/* 3175*280575beSPatrick McGehearty * Handle all cases where src and dest are aligned on word 3176*280575beSPatrick McGehearty * boundaries. Use unrolled loops for better performance. 3177*280575beSPatrick McGehearty * This option wins over standard large data move when 3178*280575beSPatrick McGehearty * source and destination is in cache for medium 3179*280575beSPatrick McGehearty * to short data moves. 3180*280575beSPatrick McGehearty */ 3181*280575beSPatrick McGehearty.co_medword: 3182*280575beSPatrick McGehearty subcc %o2, 31, %o2 ! adjust length to allow cc test 3183*280575beSPatrick McGehearty ble,pt %ncc, .co_medw31 3184*280575beSPatrick McGehearty nop 3185*280575beSPatrick McGehearty.co_medw32: 3186*280575beSPatrick McGehearty ld [%o0], %o4 ! move a block of 32 bytes 3187*280575beSPatrick McGehearty stwa %o4, [%o1]%asi 3188*280575beSPatrick McGehearty ld [%o0+4], %o4 3189*280575beSPatrick McGehearty stwa %o4, [%o1+4]%asi 3190*280575beSPatrick McGehearty ld [%o0+8], %o4 3191*280575beSPatrick McGehearty stwa %o4, [%o1+8]%asi 3192*280575beSPatrick McGehearty ld [%o0+12], %o4 3193*280575beSPatrick McGehearty stwa %o4, [%o1+12]%asi 3194*280575beSPatrick McGehearty ld [%o0+16], %o4 3195*280575beSPatrick McGehearty stwa %o4, [%o1+16]%asi 3196*280575beSPatrick McGehearty ld [%o0+20], %o4 3197*280575beSPatrick McGehearty subcc %o2, 32, %o2 ! decrement length count 3198*280575beSPatrick McGehearty stwa %o4, [%o1+20]%asi 3199*280575beSPatrick McGehearty ld [%o0+24], %o4 3200*280575beSPatrick McGehearty add %o0, 32, %o0 ! increase src ptr by 32 3201*280575beSPatrick McGehearty stwa %o4, [%o1+24]%asi 3202*280575beSPatrick McGehearty ld [%o0-4], %o4 3203*280575beSPatrick McGehearty add %o1, 32, %o1 ! increase dst ptr by 32 3204*280575beSPatrick McGehearty bgu,pt %ncc, .co_medw32 ! repeat if at least 32 bytes left 3205*280575beSPatrick McGehearty stwa %o4, [%o1-4]%asi 3206*280575beSPatrick McGehearty.co_medw31: 3207*280575beSPatrick McGehearty addcc %o2, 24, %o2 ! adjust count to be off by 7 3208*280575beSPatrick McGehearty ble,pt %ncc, .co_medw7 ! skip if 7 or fewer bytes left 3209*280575beSPatrick McGehearty nop ! 3210*280575beSPatrick McGehearty.co_medw15: 3211*280575beSPatrick McGehearty ld [%o0], %o4 ! move a block of 8 bytes 3212*280575beSPatrick McGehearty subcc %o2, 8, %o2 ! decrement length count 3213*280575beSPatrick McGehearty stwa %o4, [%o1]%asi 3214*280575beSPatrick McGehearty add %o0, 8, %o0 ! increase src ptr by 8 3215*280575beSPatrick McGehearty ld [%o0-4], %o4 3216*280575beSPatrick McGehearty add %o1, 8, %o1 ! increase dst ptr by 8 3217*280575beSPatrick McGehearty bgu,pt %ncc, .co_medw15 3218*280575beSPatrick McGehearty stwa %o4, [%o1-4]%asi 3219*280575beSPatrick McGehearty.co_medw7: 3220*280575beSPatrick McGehearty addcc %o2, 7, %o2 ! finish adjustment of remaining count 3221*280575beSPatrick McGehearty bz,pt %ncc, .co_smallx ! exit if finished 3222*280575beSPatrick McGehearty cmp %o2, 4 3223*280575beSPatrick McGehearty blt,pt %ncc, .co_small3x ! skip if less than 4 bytes left 3224*280575beSPatrick McGehearty nop ! 3225*280575beSPatrick McGehearty ld [%o0], %o4 ! move 4 bytes 3226*280575beSPatrick McGehearty add %o0, 4, %o0 ! increase src ptr by 4 3227*280575beSPatrick McGehearty add %o1, 4, %o1 ! increase dst ptr by 4 3228*280575beSPatrick McGehearty subcc %o2, 4, %o2 ! decrease count by 4 3229*280575beSPatrick McGehearty bnz .co_small3x 3230*280575beSPatrick McGehearty stwa %o4, [%o1-4]%asi 3231*280575beSPatrick McGehearty membar #Sync 3232*280575beSPatrick McGehearty stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 3233*280575beSPatrick McGehearty retl 3234*280575beSPatrick McGehearty mov %g0, %o0 3235*280575beSPatrick McGehearty 3236*280575beSPatrick McGehearty.co_medhalf: 3237*280575beSPatrick McGehearty subcc %o2, 31, %o2 ! adjust length to allow cc test 3238*280575beSPatrick McGehearty ble,pt %ncc, .co_medh31 3239*280575beSPatrick McGehearty nop 3240*280575beSPatrick McGehearty.co_medh32: ! load and store block of 32 bytes 3241*280575beSPatrick McGehearty 3242*280575beSPatrick McGehearty lduh [%o0], %o4 ! move 32 bytes 3243*280575beSPatrick McGehearty subcc %o2, 32, %o2 ! decrement length count 3244*280575beSPatrick McGehearty lduw [%o0+2], %o3 3245*280575beSPatrick McGehearty sllx %o4, 48, %o4 3246*280575beSPatrick McGehearty sllx %o3, 16, %o3 3247*280575beSPatrick McGehearty or %o4, %o3, %o3 3248*280575beSPatrick McGehearty lduh [%o0+6], %o4 3249*280575beSPatrick McGehearty or %o4, %o3, %o4 3250*280575beSPatrick McGehearty stxa %o4, [%o1]%asi 3251*280575beSPatrick McGehearty 3252*280575beSPatrick McGehearty lduh [%o0+8], %o4 3253*280575beSPatrick McGehearty lduw [%o0+10], %o3 3254*280575beSPatrick McGehearty sllx %o4, 48, %o4 3255*280575beSPatrick McGehearty sllx %o3, 16, %o3 3256*280575beSPatrick McGehearty or %o4, %o3, %o3 3257*280575beSPatrick McGehearty lduh [%o0+14], %o4 3258*280575beSPatrick McGehearty or %o4, %o3, %o4 3259*280575beSPatrick McGehearty stxa %o4, [%o1+8]%asi 3260*280575beSPatrick McGehearty 3261*280575beSPatrick McGehearty lduh [%o0+16], %o4 3262*280575beSPatrick McGehearty lduw [%o0+18], %o3 3263*280575beSPatrick McGehearty sllx %o4, 48, %o4 3264*280575beSPatrick McGehearty sllx %o3, 16, %o3 3265*280575beSPatrick McGehearty or %o4, %o3, %o3 3266*280575beSPatrick McGehearty lduh [%o0+22], %o4 3267*280575beSPatrick McGehearty or %o4, %o3, %o4 3268*280575beSPatrick McGehearty stxa %o4, [%o1+16]%asi 3269*280575beSPatrick McGehearty 3270*280575beSPatrick McGehearty add %o0, 32, %o0 ! increase src ptr by 32 3271*280575beSPatrick McGehearty add %o1, 32, %o1 ! increase dst ptr by 32 3272*280575beSPatrick McGehearty 3273*280575beSPatrick McGehearty lduh [%o0-8], %o4 3274*280575beSPatrick McGehearty lduw [%o0-6], %o3 3275*280575beSPatrick McGehearty sllx %o4, 48, %o4 3276*280575beSPatrick McGehearty sllx %o3, 16, %o3 3277*280575beSPatrick McGehearty or %o4, %o3, %o3 3278*280575beSPatrick McGehearty lduh [%o0-2], %o4 3279*280575beSPatrick McGehearty or %o3, %o4, %o4 3280*280575beSPatrick McGehearty bgu,pt %ncc, .co_medh32 ! repeat if at least 32 bytes left 3281*280575beSPatrick McGehearty stxa %o4, [%o1-8]%asi 3282*280575beSPatrick McGehearty 3283*280575beSPatrick McGehearty.co_medh31: 3284*280575beSPatrick McGehearty addcc %o2, 24, %o2 ! adjust count to be off by 7 3285*280575beSPatrick McGehearty ble,pt %ncc, .co_medh7 ! skip if 7 or fewer bytes left 3286*280575beSPatrick McGehearty nop ! 3287*280575beSPatrick McGehearty.co_medh15: 3288*280575beSPatrick McGehearty lduh [%o0], %o4 ! move 16 bytes 3289*280575beSPatrick McGehearty subcc %o2, 8, %o2 ! decrement length count 3290*280575beSPatrick McGehearty lduw [%o0+2], %o3 3291*280575beSPatrick McGehearty sllx %o4, 48, %o4 3292*280575beSPatrick McGehearty sllx %o3, 16, %o3 3293*280575beSPatrick McGehearty or %o4, %o3, %o3 3294*280575beSPatrick McGehearty add %o1, 8, %o1 ! increase dst ptr by 8 3295*280575beSPatrick McGehearty lduh [%o0+6], %o4 3296*280575beSPatrick McGehearty add %o0, 8, %o0 ! increase src ptr by 8 3297*280575beSPatrick McGehearty or %o4, %o3, %o4 3298*280575beSPatrick McGehearty bgu,pt %ncc, .co_medh15 3299*280575beSPatrick McGehearty stxa %o4, [%o1-8]%asi 3300*280575beSPatrick McGehearty.co_medh7: 3301*280575beSPatrick McGehearty addcc %o2, 7, %o2 ! finish adjustment of remaining count 3302*280575beSPatrick McGehearty bz,pt %ncc, .co_smallx ! exit if finished 3303*280575beSPatrick McGehearty cmp %o2, 4 3304*280575beSPatrick McGehearty blt,pt %ncc, .co_small3x ! skip if less than 4 bytes left 3305*280575beSPatrick McGehearty nop ! 3306*280575beSPatrick McGehearty lduh [%o0], %o4 3307*280575beSPatrick McGehearty sll %o4, 16, %o4 3308*280575beSPatrick McGehearty lduh [%o0+2], %o3 3309*280575beSPatrick McGehearty or %o3, %o4, %o4 3310*280575beSPatrick McGehearty subcc %o2, 4, %o2 3311*280575beSPatrick McGehearty add %o0, 4, %o0 3312*280575beSPatrick McGehearty add %o1, 4, %o1 3313*280575beSPatrick McGehearty bnz .co_small3x 3314*280575beSPatrick McGehearty stwa %o4, [%o1-4]%asi 3315*280575beSPatrick McGehearty membar #Sync 3316*280575beSPatrick McGehearty stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 3317*280575beSPatrick McGehearty retl 3318*280575beSPatrick McGehearty mov %g0, %o0 3319*280575beSPatrick McGehearty 3320*280575beSPatrick McGehearty .align 16 3321*280575beSPatrick McGehearty.co_med_byte: 3322*280575beSPatrick McGehearty bnz,pt %ncc, .co_medbh32a ! go to correct byte move 3323*280575beSPatrick McGehearty subcc %o2, 31, %o2 ! adjust length to allow cc test 3324*280575beSPatrick McGehearty ble,pt %ncc, .co_medb31 3325*280575beSPatrick McGehearty nop 3326*280575beSPatrick McGehearty.co_medb32: ! Alignment 1 or 5 3327*280575beSPatrick McGehearty subcc %o2, 32, %o2 ! decrement length count 3328*280575beSPatrick McGehearty 3329*280575beSPatrick McGehearty ldub [%o0], %o4 ! load and store a block of 32 bytes 3330*280575beSPatrick McGehearty sllx %o4, 56, %o3 3331*280575beSPatrick McGehearty lduh [%o0+1], %o4 3332*280575beSPatrick McGehearty sllx %o4, 40, %o4 3333*280575beSPatrick McGehearty or %o4, %o3, %o3 3334*280575beSPatrick McGehearty lduw [%o0+3], %o4 3335*280575beSPatrick McGehearty sllx %o4, 8, %o4 3336*280575beSPatrick McGehearty or %o4, %o3, %o3 3337*280575beSPatrick McGehearty ldub [%o0+7], %o4 3338*280575beSPatrick McGehearty or %o4, %o3, %o4 3339*280575beSPatrick McGehearty stxa %o4, [%o1]%asi 3340*280575beSPatrick McGehearty 3341*280575beSPatrick McGehearty ldub [%o0+8], %o4 3342*280575beSPatrick McGehearty sllx %o4, 56, %o3 3343*280575beSPatrick McGehearty lduh [%o0+9], %o4 3344*280575beSPatrick McGehearty sllx %o4, 40, %o4 3345*280575beSPatrick McGehearty or %o4, %o3, %o3 3346*280575beSPatrick McGehearty lduw [%o0+11], %o4 3347*280575beSPatrick McGehearty sllx %o4, 8, %o4 3348*280575beSPatrick McGehearty or %o4, %o3, %o3 3349*280575beSPatrick McGehearty ldub [%o0+15], %o4 3350*280575beSPatrick McGehearty or %o4, %o3, %o4 3351*280575beSPatrick McGehearty stxa %o4, [%o1+8]%asi 3352*280575beSPatrick McGehearty 3353*280575beSPatrick McGehearty ldub [%o0+16], %o4 3354*280575beSPatrick McGehearty sllx %o4, 56, %o3 3355*280575beSPatrick McGehearty lduh [%o0+17], %o4 3356*280575beSPatrick McGehearty sllx %o4, 40, %o4 3357*280575beSPatrick McGehearty or %o4, %o3, %o3 3358*280575beSPatrick McGehearty lduw [%o0+19], %o4 3359*280575beSPatrick McGehearty sllx %o4, 8, %o4 3360*280575beSPatrick McGehearty or %o4, %o3, %o3 3361*280575beSPatrick McGehearty ldub [%o0+23], %o4 3362*280575beSPatrick McGehearty or %o4, %o3, %o4 3363*280575beSPatrick McGehearty stxa %o4, [%o1+16]%asi 3364*280575beSPatrick McGehearty 3365*280575beSPatrick McGehearty add %o0, 32, %o0 ! increase src ptr by 32 3366*280575beSPatrick McGehearty add %o1, 32, %o1 ! increase dst ptr by 32 3367*280575beSPatrick McGehearty 3368*280575beSPatrick McGehearty ldub [%o0-8], %o4 3369*280575beSPatrick McGehearty sllx %o4, 56, %o3 3370*280575beSPatrick McGehearty lduh [%o0-7], %o4 3371*280575beSPatrick McGehearty sllx %o4, 40, %o4 3372*280575beSPatrick McGehearty or %o4, %o3, %o3 3373*280575beSPatrick McGehearty lduw [%o0-5], %o4 3374*280575beSPatrick McGehearty sllx %o4, 8, %o4 3375*280575beSPatrick McGehearty or %o4, %o3, %o3 3376*280575beSPatrick McGehearty ldub [%o0-1], %o4 3377*280575beSPatrick McGehearty or %o4, %o3, %o4 3378*280575beSPatrick McGehearty bgu,pt %ncc, .co_medb32 ! repeat if at least 32 bytes left 3379*280575beSPatrick McGehearty stxa %o4, [%o1-8]%asi 3380*280575beSPatrick McGehearty 3381*280575beSPatrick McGehearty.co_medb31: ! 31 or fewer bytes remaining 3382*280575beSPatrick McGehearty addcc %o2, 24, %o2 ! adjust count to be off by 7 3383*280575beSPatrick McGehearty ble,pt %ncc, .co_medb7 ! skip if 7 or fewer bytes left 3384*280575beSPatrick McGehearty nop ! 3385*280575beSPatrick McGehearty.co_medb15: 3386*280575beSPatrick McGehearty 3387*280575beSPatrick McGehearty ldub [%o0], %o4 ! load and store a block of 8 bytes 3388*280575beSPatrick McGehearty subcc %o2, 8, %o2 ! decrement length count 3389*280575beSPatrick McGehearty sllx %o4, 56, %o3 3390*280575beSPatrick McGehearty lduh [%o0+1], %o4 3391*280575beSPatrick McGehearty sllx %o4, 40, %o4 3392*280575beSPatrick McGehearty or %o4, %o3, %o3 3393*280575beSPatrick McGehearty lduw [%o0+3], %o4 3394*280575beSPatrick McGehearty add %o1, 8, %o1 ! increase dst ptr by 16 3395*280575beSPatrick McGehearty sllx %o4, 8, %o4 3396*280575beSPatrick McGehearty or %o4, %o3, %o3 3397*280575beSPatrick McGehearty ldub [%o0+7], %o4 3398*280575beSPatrick McGehearty add %o0, 8, %o0 ! increase src ptr by 16 3399*280575beSPatrick McGehearty or %o4, %o3, %o4 3400*280575beSPatrick McGehearty bgu,pt %ncc, .co_medb15 3401*280575beSPatrick McGehearty stxa %o4, [%o1-8]%asi 3402*280575beSPatrick McGehearty.co_medb7: 3403*280575beSPatrick McGehearty addcc %o2, 7, %o2 ! finish adjustment of remaining count 3404*280575beSPatrick McGehearty bz,pt %ncc, .co_smallx ! exit if finished 3405*280575beSPatrick McGehearty cmp %o2, 4 3406*280575beSPatrick McGehearty blt,pt %ncc, .co_small3x ! skip if less than 4 bytes left 3407*280575beSPatrick McGehearty nop ! 3408*280575beSPatrick McGehearty ldub [%o0], %o4 ! move 4 bytes 3409*280575beSPatrick McGehearty sll %o4, 24, %o3 3410*280575beSPatrick McGehearty lduh [%o0+1], %o4 3411*280575beSPatrick McGehearty sll %o4, 8, %o4 3412*280575beSPatrick McGehearty or %o4, %o3, %o3 3413*280575beSPatrick McGehearty ldub [%o0+3], %o4 3414*280575beSPatrick McGehearty or %o4, %o3, %o4 3415*280575beSPatrick McGehearty subcc %o2, 4, %o2 3416*280575beSPatrick McGehearty add %o0, 4, %o0 3417*280575beSPatrick McGehearty add %o1, 4, %o1 3418*280575beSPatrick McGehearty bnz .co_small3x 3419*280575beSPatrick McGehearty stwa %o4, [%o1-4]%asi 3420*280575beSPatrick McGehearty membar #Sync 3421*280575beSPatrick McGehearty stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 3422*280575beSPatrick McGehearty retl 3423*280575beSPatrick McGehearty mov %g0, %o0 3424*280575beSPatrick McGehearty 3425*280575beSPatrick McGehearty .align 16 3426*280575beSPatrick McGehearty.co_medbh32a: 3427*280575beSPatrick McGehearty ble,pt %ncc, .co_medbh31 3428*280575beSPatrick McGehearty nop 3429*280575beSPatrick McGehearty.co_medbh32: ! Alignment 3 or 7 3430*280575beSPatrick McGehearty subcc %o2, 32, %o2 ! decrement length count 3431*280575beSPatrick McGehearty 3432*280575beSPatrick McGehearty ldub [%o0], %o4 ! load and store a block of 32 bytes 3433*280575beSPatrick McGehearty sllx %o4, 56, %o3 3434*280575beSPatrick McGehearty lduw [%o0+1], %o4 3435*280575beSPatrick McGehearty sllx %o4, 24, %o4 3436*280575beSPatrick McGehearty or %o4, %o3, %o3 3437*280575beSPatrick McGehearty lduh [%o0+5], %o4 3438*280575beSPatrick McGehearty sllx %o4, 8, %o4 3439*280575beSPatrick McGehearty or %o4, %o3, %o3 3440*280575beSPatrick McGehearty ldub [%o0+7], %o4 3441*280575beSPatrick McGehearty or %o4, %o3, %o4 3442*280575beSPatrick McGehearty stxa %o4, [%o1]%asi 3443*280575beSPatrick McGehearty 3444*280575beSPatrick McGehearty ldub [%o0+8], %o4 3445*280575beSPatrick McGehearty sllx %o4, 56, %o3 3446*280575beSPatrick McGehearty lduw [%o0+9], %o4 3447*280575beSPatrick McGehearty sllx %o4, 24, %o4 3448*280575beSPatrick McGehearty or %o4, %o3, %o3 3449*280575beSPatrick McGehearty lduh [%o0+13], %o4 3450*280575beSPatrick McGehearty sllx %o4, 8, %o4 3451*280575beSPatrick McGehearty or %o4, %o3, %o3 3452*280575beSPatrick McGehearty ldub [%o0+15], %o4 3453*280575beSPatrick McGehearty or %o4, %o3, %o4 3454*280575beSPatrick McGehearty stxa %o4, [%o1+8]%asi 3455*280575beSPatrick McGehearty 3456*280575beSPatrick McGehearty ldub [%o0+16], %o4 3457*280575beSPatrick McGehearty sllx %o4, 56, %o3 3458*280575beSPatrick McGehearty lduw [%o0+17], %o4 3459*280575beSPatrick McGehearty sllx %o4, 24, %o4 3460*280575beSPatrick McGehearty or %o4, %o3, %o3 3461*280575beSPatrick McGehearty lduh [%o0+21], %o4 3462*280575beSPatrick McGehearty sllx %o4, 8, %o4 3463*280575beSPatrick McGehearty or %o4, %o3, %o3 3464*280575beSPatrick McGehearty ldub [%o0+23], %o4 3465*280575beSPatrick McGehearty or %o4, %o3, %o4 3466*280575beSPatrick McGehearty stxa %o4, [%o1+16]%asi 3467*280575beSPatrick McGehearty 3468*280575beSPatrick McGehearty add %o0, 32, %o0 ! increase src ptr by 32 3469*280575beSPatrick McGehearty add %o1, 32, %o1 ! increase dst ptr by 32 3470*280575beSPatrick McGehearty 3471*280575beSPatrick McGehearty ldub [%o0-8], %o4 3472*280575beSPatrick McGehearty sllx %o4, 56, %o3 3473*280575beSPatrick McGehearty lduw [%o0-7], %o4 3474*280575beSPatrick McGehearty sllx %o4, 24, %o4 3475*280575beSPatrick McGehearty or %o4, %o3, %o3 3476*280575beSPatrick McGehearty lduh [%o0-3], %o4 3477*280575beSPatrick McGehearty sllx %o4, 8, %o4 3478*280575beSPatrick McGehearty or %o4, %o3, %o3 3479*280575beSPatrick McGehearty ldub [%o0-1], %o4 3480*280575beSPatrick McGehearty or %o4, %o3, %o4 3481*280575beSPatrick McGehearty bgu,pt %ncc, .co_medbh32 ! repeat if at least 32 bytes left 3482*280575beSPatrick McGehearty stxa %o4, [%o1-8]%asi 3483*280575beSPatrick McGehearty 3484*280575beSPatrick McGehearty.co_medbh31: 3485*280575beSPatrick McGehearty addcc %o2, 24, %o2 ! adjust count to be off by 7 3486*280575beSPatrick McGehearty ble,pt %ncc, .co_medb7 ! skip if 7 or fewer bytes left 3487*280575beSPatrick McGehearty nop ! 3488*280575beSPatrick McGehearty.co_medbh15: 3489*280575beSPatrick McGehearty ldub [%o0], %o4 ! load and store a block of 8 bytes 3490*280575beSPatrick McGehearty sllx %o4, 56, %o3 3491*280575beSPatrick McGehearty lduw [%o0+1], %o4 3492*280575beSPatrick McGehearty sllx %o4, 24, %o4 3493*280575beSPatrick McGehearty or %o4, %o3, %o3 3494*280575beSPatrick McGehearty lduh [%o0+5], %o4 3495*280575beSPatrick McGehearty sllx %o4, 8, %o4 3496*280575beSPatrick McGehearty or %o4, %o3, %o3 3497*280575beSPatrick McGehearty ldub [%o0+7], %o4 3498*280575beSPatrick McGehearty or %o4, %o3, %o4 3499*280575beSPatrick McGehearty stxa %o4, [%o1]%asi 3500*280575beSPatrick McGehearty subcc %o2, 8, %o2 ! decrement length count 3501*280575beSPatrick McGehearty add %o1, 8, %o1 ! increase dst ptr by 8 3502*280575beSPatrick McGehearty add %o0, 8, %o0 ! increase src ptr by 8 3503*280575beSPatrick McGehearty bgu,pt %ncc, .co_medbh15 3504*280575beSPatrick McGehearty stxa %o4, [%o1-8]%asi 3505*280575beSPatrick McGehearty ba .co_medb7 3506*280575beSPatrick McGehearty nop 3507*280575beSPatrick McGehearty/* 3508*280575beSPatrick McGehearty * End of small copy (no window) code 3509*280575beSPatrick McGehearty */ 3510*280575beSPatrick McGehearty 3511*280575beSPatrick McGehearty/* 3512*280575beSPatrick McGehearty * Long copy code 3513*280575beSPatrick McGehearty */ 3514*280575beSPatrick McGehearty.co_copy_more: 3515*280575beSPatrick McGehearty sethi %hi(copyio_fault), %o3 3516*280575beSPatrick McGehearty or %o3, %lo(copyio_fault), %o3 3517*280575beSPatrick McGehearty membar #Sync 3518*280575beSPatrick McGehearty stn %o3, [THREAD_REG + T_LOFAULT] 3519*280575beSPatrick McGehearty 3520*280575beSPatrick McGehearty/* 3521*280575beSPatrick McGehearty * Following code is for large copies. We know there is at 3522*280575beSPatrick McGehearty * least FP_COPY bytes available. FP regs are used, so 3523*280575beSPatrick McGehearty * we save registers and fp regs before starting 3524*280575beSPatrick McGehearty */ 3525*280575beSPatrick McGehearty save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 3526*280575beSPatrick McGehearty or SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 3527*280575beSPatrick McGehearty rd %fprs, %g1 ! check for unused fp 3528*280575beSPatrick McGehearty ! if fprs.fef == 0, set it. 3529*280575beSPatrick McGehearty ! Setting it when already set costs more than checking 3530*280575beSPatrick McGehearty andcc %g1, FPRS_FEF, %g1 ! test FEF, fprs.du = fprs.dl = 0 3531*280575beSPatrick McGehearty bz,pt %ncc, .co_fp_unused 3532*280575beSPatrick McGehearty mov ASI_USER, %asi 3533*280575beSPatrick McGehearty BST_FP_TOSTACK(%o3) 3534*280575beSPatrick McGehearty ba .co_fp_ready 3535*280575beSPatrick McGehearty.co_fp_unused: 3536*280575beSPatrick McGehearty prefetch [%i0 + (1 * CACHE_LINE)], #one_read 3537*280575beSPatrick McGehearty wr %g0, FPRS_FEF, %fprs ! fprs.fef = 1 3538*280575beSPatrick McGehearty.co_fp_ready: 3539*280575beSPatrick McGehearty rd %gsr, %l5 ! save %gsr value 3540*280575beSPatrick McGehearty andcc %i1, 1, %o3 ! is dest byte aligned 3541*280575beSPatrick McGehearty bnz,pt %ncc, .co_big_d1 3542*280575beSPatrick McGehearty.co_big_d1f: ! dest is now half word aligned 3543*280575beSPatrick McGehearty andcc %i1, 2, %o3 3544*280575beSPatrick McGehearty bnz,pt %ncc, .co_big_d2 3545*280575beSPatrick McGehearty.co_big_d2f: ! dest is now word aligned 3546*280575beSPatrick McGehearty andcc %i1, 4, %o3 ! is dest longword aligned 3547*280575beSPatrick McGehearty bnz,pt %ncc, .co_big_d4 3548*280575beSPatrick McGehearty.co_big_d4f: ! dest is now long word aligned 3549*280575beSPatrick McGehearty andcc %i0, 7, %o3 ! is src long word aligned 3550*280575beSPatrick McGehearty brnz,pt %o3, .co_big_unal8 3551*280575beSPatrick McGehearty prefetch [%i0 + (2 * CACHE_LINE)], #one_read 3552*280575beSPatrick McGehearty ! Src and dst are long word aligned 3553*280575beSPatrick McGehearty ! align dst to 64 byte boundary 3554*280575beSPatrick McGehearty andcc %i1, 0x3f, %o3 ! %o3 == 0 means dst is 64 byte aligned 3555*280575beSPatrick McGehearty brz,pn %o3, .co_al_to_64 3556*280575beSPatrick McGehearty nop 3557*280575beSPatrick McGehearty sub %o3, 64, %o3 ! %o3 has negative bytes to move 3558*280575beSPatrick McGehearty add %i2, %o3, %i2 ! adjust remaining count 3559*280575beSPatrick McGehearty andcc %o3, 8, %o4 ! odd long words to move? 3560*280575beSPatrick McGehearty brz,pt %o4, .co_al_to_16 3561*280575beSPatrick McGehearty nop 3562*280575beSPatrick McGehearty add %o3, 8, %o3 3563*280575beSPatrick McGehearty ldx [%i0], %o4 3564*280575beSPatrick McGehearty add %i0, 8, %i0 ! increment src ptr 3565*280575beSPatrick McGehearty stxa %o4, [%i1]ASI_USER 3566*280575beSPatrick McGehearty add %i1, 8, %i1 ! increment dst ptr 3567*280575beSPatrick McGehearty! Dest is aligned on 16 bytes, src 8 byte aligned 3568*280575beSPatrick McGehearty.co_al_to_16: 3569*280575beSPatrick McGehearty andcc %o3, 0x30, %o4 ! move to move? 3570*280575beSPatrick McGehearty brz,pt %o4, .co_al_to_64 3571*280575beSPatrick McGehearty nop 3572*280575beSPatrick McGehearty.co_al_mv_16: 3573*280575beSPatrick McGehearty add %o3, 16, %o3 3574*280575beSPatrick McGehearty ldx [%i0], %o4 3575*280575beSPatrick McGehearty stxa %o4, [%i1]ASI_USER 3576*280575beSPatrick McGehearty add %i0, 16, %i0 ! increment src ptr 3577*280575beSPatrick McGehearty ldx [%i0-8], %o4 3578*280575beSPatrick McGehearty add %i1, 8, %i1 ! increment dst ptr 3579*280575beSPatrick McGehearty stxa %o4, [%i1]ASI_USER 3580*280575beSPatrick McGehearty andcc %o3, 0x30, %o4 3581*280575beSPatrick McGehearty brnz,pt %o4, .co_al_mv_16 3582*280575beSPatrick McGehearty add %i1, 8, %i1 ! increment dst ptr 3583*280575beSPatrick McGehearty! Dest is aligned on 64 bytes, src 8 byte aligned 3584*280575beSPatrick McGehearty.co_al_to_64: 3585*280575beSPatrick McGehearty ! Determine source alignment 3586*280575beSPatrick McGehearty ! to correct 8 byte offset 3587*280575beSPatrick McGehearty andcc %i0, 32, %o3 3588*280575beSPatrick McGehearty brnz,pn %o3, .co_aln_1 3589*280575beSPatrick McGehearty andcc %i0, 16, %o3 3590*280575beSPatrick McGehearty brnz,pn %o3, .co_aln_01 3591*280575beSPatrick McGehearty andcc %i0, 8, %o3 3592*280575beSPatrick McGehearty brz,pn %o3, .co_aln_000 3593*280575beSPatrick McGehearty prefetch [%i0 + (3 * CACHE_LINE)], #one_read 3594*280575beSPatrick McGehearty ba .co_aln_001 3595*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 3596*280575beSPatrick McGehearty.co_aln_01: 3597*280575beSPatrick McGehearty brnz,pn %o3, .co_aln_011 3598*280575beSPatrick McGehearty prefetch [%i0 + (3 * CACHE_LINE)], #one_read 3599*280575beSPatrick McGehearty ba .co_aln_010 3600*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 3601*280575beSPatrick McGehearty.co_aln_1: 3602*280575beSPatrick McGehearty andcc %i0, 16, %o3 3603*280575beSPatrick McGehearty brnz,pn %o3, .co_aln_11 3604*280575beSPatrick McGehearty andcc %i0, 8, %o3 3605*280575beSPatrick McGehearty brnz,pn %o3, .co_aln_101 3606*280575beSPatrick McGehearty prefetch [%i0 + (3 * CACHE_LINE)], #one_read 3607*280575beSPatrick McGehearty ba .co_aln_100 3608*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 3609*280575beSPatrick McGehearty.co_aln_11: 3610*280575beSPatrick McGehearty brz,pn %o3, .co_aln_110 3611*280575beSPatrick McGehearty prefetch [%i0 + (3 * CACHE_LINE)], #one_read 3612*280575beSPatrick McGehearty 3613*280575beSPatrick McGehearty.co_aln_111: 3614*280575beSPatrick McGehearty! Alignment off by 8 bytes 3615*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 3616*280575beSPatrick McGehearty ldd [%i0], %d0 3617*280575beSPatrick McGehearty add %i0, 8, %i0 3618*280575beSPatrick McGehearty sub %i2, 8, %i2 3619*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 3620*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 3621*280575beSPatrick McGehearty sub %i1, %i0, %i1 3622*280575beSPatrick McGehearty.co_aln_111_loop: 3623*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_P,%d16 ! block load 3624*280575beSPatrick McGehearty subcc %o3, 64, %o3 3625*280575beSPatrick McGehearty fmovd %d16, %d2 3626*280575beSPatrick McGehearty fmovd %d18, %d4 3627*280575beSPatrick McGehearty fmovd %d20, %d6 3628*280575beSPatrick McGehearty fmovd %d22, %d8 3629*280575beSPatrick McGehearty fmovd %d24, %d10 3630*280575beSPatrick McGehearty fmovd %d26, %d12 3631*280575beSPatrick McGehearty fmovd %d28, %d14 3632*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_AIUS ! block initializing store 3633*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_AIUS 3634*280575beSPatrick McGehearty add %i0, 64, %i0 3635*280575beSPatrick McGehearty fmovd %d30, %d0 3636*280575beSPatrick McGehearty bgt,pt %ncc, .co_aln_111_loop 3637*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 3638*280575beSPatrick McGehearty add %i1, %i0, %i1 3639*280575beSPatrick McGehearty 3640*280575beSPatrick McGehearty stda %d0, [%i1]ASI_USER 3641*280575beSPatrick McGehearty ba .co_remain_stuff 3642*280575beSPatrick McGehearty add %i1, 8, %i1 3643*280575beSPatrick McGehearty ! END OF aln_111 3644*280575beSPatrick McGehearty 3645*280575beSPatrick McGehearty.co_aln_110: 3646*280575beSPatrick McGehearty! Alignment off by 16 bytes 3647*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 3648*280575beSPatrick McGehearty ldd [%i0], %d0 3649*280575beSPatrick McGehearty ldd [%i0+8], %d2 3650*280575beSPatrick McGehearty add %i0, 16, %i0 3651*280575beSPatrick McGehearty sub %i2, 16, %i2 3652*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 3653*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 3654*280575beSPatrick McGehearty sub %i1, %i0, %i1 3655*280575beSPatrick McGehearty.co_aln_110_loop: 3656*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_P,%d16 ! block load 3657*280575beSPatrick McGehearty subcc %o3, 64, %o3 3658*280575beSPatrick McGehearty fmovd %d16, %d4 3659*280575beSPatrick McGehearty fmovd %d18, %d6 3660*280575beSPatrick McGehearty fmovd %d20, %d8 3661*280575beSPatrick McGehearty fmovd %d22, %d10 3662*280575beSPatrick McGehearty fmovd %d24, %d12 3663*280575beSPatrick McGehearty fmovd %d26, %d14 3664*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_AIUS ! block initializing store 3665*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_AIUS 3666*280575beSPatrick McGehearty add %i0, 64, %i0 3667*280575beSPatrick McGehearty fmovd %d28, %d0 3668*280575beSPatrick McGehearty fmovd %d30, %d2 3669*280575beSPatrick McGehearty bgt,pt %ncc, .co_aln_110_loop 3670*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 3671*280575beSPatrick McGehearty add %i1, %i0, %i1 3672*280575beSPatrick McGehearty 3673*280575beSPatrick McGehearty stda %d0, [%i1]%asi 3674*280575beSPatrick McGehearty stda %d2, [%i1+8]%asi 3675*280575beSPatrick McGehearty ba .co_remain_stuff 3676*280575beSPatrick McGehearty add %i1, 16, %i1 3677*280575beSPatrick McGehearty ! END OF aln_110 3678*280575beSPatrick McGehearty 3679*280575beSPatrick McGehearty.co_aln_101: 3680*280575beSPatrick McGehearty! Alignment off by 24 bytes 3681*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 3682*280575beSPatrick McGehearty ldd [%i0], %d0 3683*280575beSPatrick McGehearty ldd [%i0+8], %d2 3684*280575beSPatrick McGehearty ldd [%i0+16], %d4 3685*280575beSPatrick McGehearty add %i0, 24, %i0 3686*280575beSPatrick McGehearty sub %i2, 24, %i2 3687*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 3688*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 3689*280575beSPatrick McGehearty sub %i1, %i0, %i1 3690*280575beSPatrick McGehearty.co_aln_101_loop: 3691*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_P,%d16 ! block load 3692*280575beSPatrick McGehearty subcc %o3, 64, %o3 3693*280575beSPatrick McGehearty fmovd %d16, %d6 3694*280575beSPatrick McGehearty fmovd %d18, %d8 3695*280575beSPatrick McGehearty fmovd %d20, %d10 3696*280575beSPatrick McGehearty fmovd %d22, %d12 3697*280575beSPatrick McGehearty fmovd %d24, %d14 3698*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_AIUS ! block initializing store 3699*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_AIUS 3700*280575beSPatrick McGehearty add %i0, 64, %i0 3701*280575beSPatrick McGehearty fmovd %d26, %d0 3702*280575beSPatrick McGehearty fmovd %d28, %d2 3703*280575beSPatrick McGehearty fmovd %d30, %d4 3704*280575beSPatrick McGehearty bgt,pt %ncc, .co_aln_101_loop 3705*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 3706*280575beSPatrick McGehearty add %i1, %i0, %i1 3707*280575beSPatrick McGehearty 3708*280575beSPatrick McGehearty stda %d0, [%i1]%asi 3709*280575beSPatrick McGehearty stda %d2, [%i1+8]%asi 3710*280575beSPatrick McGehearty stda %d4, [%i1+16]%asi 3711*280575beSPatrick McGehearty ba .co_remain_stuff 3712*280575beSPatrick McGehearty add %i1, 24, %i1 3713*280575beSPatrick McGehearty ! END OF aln_101 3714*280575beSPatrick McGehearty 3715*280575beSPatrick McGehearty.co_aln_100: 3716*280575beSPatrick McGehearty! Alignment off by 32 bytes 3717*280575beSPatrick McGehearty ldd [%i0], %d0 3718*280575beSPatrick McGehearty ldd [%i0+8], %d2 3719*280575beSPatrick McGehearty ldd [%i0+16],%d4 3720*280575beSPatrick McGehearty ldd [%i0+24],%d6 3721*280575beSPatrick McGehearty add %i0, 32, %i0 3722*280575beSPatrick McGehearty sub %i2, 32, %i2 3723*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 3724*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 3725*280575beSPatrick McGehearty sub %i1, %i0, %i1 3726*280575beSPatrick McGehearty.co_aln_100_loop: 3727*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_P,%d16 ! block load 3728*280575beSPatrick McGehearty subcc %o3, 64, %o3 3729*280575beSPatrick McGehearty fmovd %d16, %d8 3730*280575beSPatrick McGehearty fmovd %d18, %d10 3731*280575beSPatrick McGehearty fmovd %d20, %d12 3732*280575beSPatrick McGehearty fmovd %d22, %d14 3733*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_AIUS ! block initializing store 3734*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_AIUS 3735*280575beSPatrick McGehearty add %i0, 64, %i0 3736*280575beSPatrick McGehearty fmovd %d24, %d0 3737*280575beSPatrick McGehearty fmovd %d26, %d2 3738*280575beSPatrick McGehearty fmovd %d28, %d4 3739*280575beSPatrick McGehearty fmovd %d30, %d6 3740*280575beSPatrick McGehearty bgt,pt %ncc, .co_aln_100_loop 3741*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 3742*280575beSPatrick McGehearty add %i1, %i0, %i1 3743*280575beSPatrick McGehearty 3744*280575beSPatrick McGehearty stda %d0, [%i1]%asi 3745*280575beSPatrick McGehearty stda %d2, [%i1+8]%asi 3746*280575beSPatrick McGehearty stda %d4, [%i1+16]%asi 3747*280575beSPatrick McGehearty stda %d6, [%i1+24]%asi 3748*280575beSPatrick McGehearty ba .co_remain_stuff 3749*280575beSPatrick McGehearty add %i1, 32, %i1 3750*280575beSPatrick McGehearty ! END OF aln_100 3751*280575beSPatrick McGehearty 3752*280575beSPatrick McGehearty.co_aln_011: 3753*280575beSPatrick McGehearty! Alignment off by 40 bytes 3754*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 3755*280575beSPatrick McGehearty ldd [%i0], %d0 3756*280575beSPatrick McGehearty ldd [%i0+8], %d2 3757*280575beSPatrick McGehearty ldd [%i0+16], %d4 3758*280575beSPatrick McGehearty ldd [%i0+24], %d6 3759*280575beSPatrick McGehearty ldd [%i0+32], %d8 3760*280575beSPatrick McGehearty add %i0, 40, %i0 3761*280575beSPatrick McGehearty sub %i2, 40, %i2 3762*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 3763*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 3764*280575beSPatrick McGehearty sub %i1, %i0, %i1 3765*280575beSPatrick McGehearty.co_aln_011_loop: 3766*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_P,%d16 ! block load 3767*280575beSPatrick McGehearty subcc %o3, 64, %o3 3768*280575beSPatrick McGehearty fmovd %d16, %d10 3769*280575beSPatrick McGehearty fmovd %d18, %d12 3770*280575beSPatrick McGehearty fmovd %d20, %d14 3771*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_AIUS ! block initializing store 3772*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_AIUS 3773*280575beSPatrick McGehearty add %i0, 64, %i0 3774*280575beSPatrick McGehearty fmovd %d22, %d0 3775*280575beSPatrick McGehearty fmovd %d24, %d2 3776*280575beSPatrick McGehearty fmovd %d26, %d4 3777*280575beSPatrick McGehearty fmovd %d28, %d6 3778*280575beSPatrick McGehearty fmovd %d30, %d8 3779*280575beSPatrick McGehearty bgt,pt %ncc, .co_aln_011_loop 3780*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 3781*280575beSPatrick McGehearty add %i1, %i0, %i1 3782*280575beSPatrick McGehearty 3783*280575beSPatrick McGehearty stda %d0, [%i1]%asi 3784*280575beSPatrick McGehearty stda %d2, [%i1+8]%asi 3785*280575beSPatrick McGehearty stda %d4, [%i1+16]%asi 3786*280575beSPatrick McGehearty stda %d6, [%i1+24]%asi 3787*280575beSPatrick McGehearty stda %d8, [%i1+32]%asi 3788*280575beSPatrick McGehearty ba .co_remain_stuff 3789*280575beSPatrick McGehearty add %i1, 40, %i1 3790*280575beSPatrick McGehearty ! END OF aln_011 3791*280575beSPatrick McGehearty 3792*280575beSPatrick McGehearty.co_aln_010: 3793*280575beSPatrick McGehearty! Alignment off by 48 bytes 3794*280575beSPatrick McGehearty ldd [%i0], %d0 3795*280575beSPatrick McGehearty ldd [%i0+8], %d2 3796*280575beSPatrick McGehearty ldd [%i0+16], %d4 3797*280575beSPatrick McGehearty ldd [%i0+24], %d6 3798*280575beSPatrick McGehearty ldd [%i0+32], %d8 3799*280575beSPatrick McGehearty ldd [%i0+40], %d10 3800*280575beSPatrick McGehearty add %i0, 48, %i0 3801*280575beSPatrick McGehearty sub %i2, 48, %i2 3802*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 3803*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 3804*280575beSPatrick McGehearty sub %i1, %i0, %i1 3805*280575beSPatrick McGehearty.co_aln_010_loop: 3806*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_P,%d16 ! block load 3807*280575beSPatrick McGehearty subcc %o3, 64, %o3 3808*280575beSPatrick McGehearty fmovd %d16, %d12 3809*280575beSPatrick McGehearty fmovd %d18, %d14 3810*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_AIUS ! block initializing store 3811*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_AIUS 3812*280575beSPatrick McGehearty add %i0, 64, %i0 3813*280575beSPatrick McGehearty fmovd %d20, %d0 3814*280575beSPatrick McGehearty fmovd %d22, %d2 3815*280575beSPatrick McGehearty fmovd %d24, %d4 3816*280575beSPatrick McGehearty fmovd %d26, %d6 3817*280575beSPatrick McGehearty fmovd %d28, %d8 3818*280575beSPatrick McGehearty fmovd %d30, %d10 3819*280575beSPatrick McGehearty bgt,pt %ncc, .co_aln_010_loop 3820*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 3821*280575beSPatrick McGehearty add %i1, %i0, %i1 3822*280575beSPatrick McGehearty 3823*280575beSPatrick McGehearty stda %d0, [%i1]%asi 3824*280575beSPatrick McGehearty stda %d2, [%i1+8]%asi 3825*280575beSPatrick McGehearty stda %d4, [%i1+16]%asi 3826*280575beSPatrick McGehearty stda %d6, [%i1+24]%asi 3827*280575beSPatrick McGehearty stda %d8, [%i1+32]%asi 3828*280575beSPatrick McGehearty stda %d10, [%i1+40]%asi 3829*280575beSPatrick McGehearty ba .co_remain_stuff 3830*280575beSPatrick McGehearty add %i1, 48, %i1 3831*280575beSPatrick McGehearty ! END OF aln_010 3832*280575beSPatrick McGehearty 3833*280575beSPatrick McGehearty.co_aln_001: 3834*280575beSPatrick McGehearty! Alignment off by 56 bytes 3835*280575beSPatrick McGehearty ldd [%i0], %d0 3836*280575beSPatrick McGehearty ldd [%i0+8], %d2 3837*280575beSPatrick McGehearty ldd [%i0+16], %d4 3838*280575beSPatrick McGehearty ldd [%i0+24], %d6 3839*280575beSPatrick McGehearty ldd [%i0+32], %d8 3840*280575beSPatrick McGehearty ldd [%i0+40], %d10 3841*280575beSPatrick McGehearty ldd [%i0+48], %d12 3842*280575beSPatrick McGehearty add %i0, 56, %i0 3843*280575beSPatrick McGehearty sub %i2, 56, %i2 3844*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 3845*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 3846*280575beSPatrick McGehearty sub %i1, %i0, %i1 3847*280575beSPatrick McGehearty.co_aln_001_loop: 3848*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_P,%d16 ! block load 3849*280575beSPatrick McGehearty subcc %o3, 64, %o3 3850*280575beSPatrick McGehearty fmovd %d16, %d14 3851*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_AIUS ! block initializing store 3852*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_AIUS 3853*280575beSPatrick McGehearty add %i0, 64, %i0 3854*280575beSPatrick McGehearty fmovd %d18, %d0 3855*280575beSPatrick McGehearty fmovd %d20, %d2 3856*280575beSPatrick McGehearty fmovd %d22, %d4 3857*280575beSPatrick McGehearty fmovd %d24, %d6 3858*280575beSPatrick McGehearty fmovd %d26, %d8 3859*280575beSPatrick McGehearty fmovd %d28, %d10 3860*280575beSPatrick McGehearty fmovd %d30, %d12 3861*280575beSPatrick McGehearty bgt,pt %ncc, .co_aln_001_loop 3862*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 3863*280575beSPatrick McGehearty add %i1, %i0, %i1 3864*280575beSPatrick McGehearty 3865*280575beSPatrick McGehearty stda %d0, [%i1]%asi 3866*280575beSPatrick McGehearty stda %d2, [%i1+8]%asi 3867*280575beSPatrick McGehearty stda %d4, [%i1+16]%asi 3868*280575beSPatrick McGehearty stda %d6, [%i1+24]%asi 3869*280575beSPatrick McGehearty stda %d8, [%i1+32]%asi 3870*280575beSPatrick McGehearty stda %d10, [%i1+40]%asi 3871*280575beSPatrick McGehearty stda %d12, [%i1+48]%asi 3872*280575beSPatrick McGehearty ba .co_remain_stuff 3873*280575beSPatrick McGehearty add %i1, 56, %i1 3874*280575beSPatrick McGehearty ! END OF aln_001 3875*280575beSPatrick McGehearty 3876*280575beSPatrick McGehearty.co_aln_000: 3877*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 3878*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 3879*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 3880*280575beSPatrick McGehearty sub %i1, %i0, %i1 3881*280575beSPatrick McGehearty.co_aln_000_loop: 3882*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_P,%d0 3883*280575beSPatrick McGehearty subcc %o3, 64, %o3 3884*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_AIUS ! block initializing store 3885*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_AIUS 3886*280575beSPatrick McGehearty add %i0, 64, %i0 3887*280575beSPatrick McGehearty bgt,pt %ncc, .co_aln_000_loop 3888*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 3889*280575beSPatrick McGehearty add %i1, %i0, %i1 3890*280575beSPatrick McGehearty 3891*280575beSPatrick McGehearty ! END OF aln_000 3892*280575beSPatrick McGehearty 3893*280575beSPatrick McGehearty.co_remain_stuff: 3894*280575beSPatrick McGehearty subcc %i2, 31, %i2 ! adjust length to allow cc test 3895*280575beSPatrick McGehearty ble,pt %ncc, .co_aln_31 3896*280575beSPatrick McGehearty nop 3897*280575beSPatrick McGehearty.co_aln_32: 3898*280575beSPatrick McGehearty ldx [%i0], %o4 ! move 32 bytes 3899*280575beSPatrick McGehearty subcc %i2, 32, %i2 ! decrement length count by 32 3900*280575beSPatrick McGehearty stxa %o4, [%i1]%asi 3901*280575beSPatrick McGehearty ldx [%i0+8], %o4 3902*280575beSPatrick McGehearty stxa %o4, [%i1+8]%asi 3903*280575beSPatrick McGehearty ldx [%i0+16], %o4 3904*280575beSPatrick McGehearty add %i0, 32, %i0 ! increase src ptr by 32 3905*280575beSPatrick McGehearty stxa %o4, [%i1+16]%asi 3906*280575beSPatrick McGehearty ldx [%i0-8], %o4 3907*280575beSPatrick McGehearty add %i1, 32, %i1 ! increase dst ptr by 32 3908*280575beSPatrick McGehearty bgu,pt %ncc, .co_aln_32 ! repeat if at least 32 bytes left 3909*280575beSPatrick McGehearty stxa %o4, [%i1-8]%asi 3910*280575beSPatrick McGehearty.co_aln_31: 3911*280575beSPatrick McGehearty addcc %i2, 24, %i2 ! adjust count to be off by 7 3912*280575beSPatrick McGehearty ble,pt %ncc, .co_aln_7 ! skip if 7 or fewer bytes left 3913*280575beSPatrick McGehearty nop ! 3914*280575beSPatrick McGehearty.co_aln_15: 3915*280575beSPatrick McGehearty ldx [%i0], %o4 ! move 8 bytes 3916*280575beSPatrick McGehearty add %i0, 8, %i0 ! increase src ptr by 8 3917*280575beSPatrick McGehearty subcc %i2, 8, %i2 ! decrease count by 8 3918*280575beSPatrick McGehearty add %i1, 8, %i1 ! increase dst ptr by 8 3919*280575beSPatrick McGehearty bgu,pt %ncc, .co_aln_15 3920*280575beSPatrick McGehearty stxa %o4, [%i1-8]%asi 3921*280575beSPatrick McGehearty.co_aln_7: 3922*280575beSPatrick McGehearty addcc %i2, 7, %i2 ! finish adjustment of remaining count 3923*280575beSPatrick McGehearty bz,pt %ncc, .co_exit ! exit if finished 3924*280575beSPatrick McGehearty cmp %i2, 4 3925*280575beSPatrick McGehearty blt,pt %ncc, .co_unaln3x ! skip if less than 4 bytes left 3926*280575beSPatrick McGehearty nop ! 3927*280575beSPatrick McGehearty ld [%i0], %o4 ! move 4 bytes 3928*280575beSPatrick McGehearty add %i0, 4, %i0 ! increase src ptr by 4 3929*280575beSPatrick McGehearty add %i1, 4, %i1 ! increase dst ptr by 4 3930*280575beSPatrick McGehearty subcc %i2, 4, %i2 ! decrease count by 4 3931*280575beSPatrick McGehearty bnz .co_unaln3x 3932*280575beSPatrick McGehearty stwa %o4, [%i1-4]%asi 3933*280575beSPatrick McGehearty ba .co_exit 3934*280575beSPatrick McGehearty nop 3935*280575beSPatrick McGehearty 3936*280575beSPatrick McGehearty ! destination alignment code 3937*280575beSPatrick McGehearty.co_big_d1: 3938*280575beSPatrick McGehearty ldub [%i0], %o4 ! move a byte 3939*280575beSPatrick McGehearty add %i0, 1, %i0 3940*280575beSPatrick McGehearty stba %o4, [%i1]ASI_USER 3941*280575beSPatrick McGehearty add %i1, 1, %i1 3942*280575beSPatrick McGehearty andcc %i1, 2, %o3 3943*280575beSPatrick McGehearty bz,pt %ncc, .co_big_d2f 3944*280575beSPatrick McGehearty sub %i2, 1, %i2 3945*280575beSPatrick McGehearty.co_big_d2: 3946*280575beSPatrick McGehearty ldub [%i0], %o4 ! move a half-word (src align unknown) 3947*280575beSPatrick McGehearty ldub [%i0+1], %o3 3948*280575beSPatrick McGehearty add %i0, 2, %i0 3949*280575beSPatrick McGehearty sll %o4, 8, %o4 ! position 3950*280575beSPatrick McGehearty or %o4, %o3, %o4 ! merge 3951*280575beSPatrick McGehearty stha %o4, [%i1]ASI_USER 3952*280575beSPatrick McGehearty add %i1, 2, %i1 3953*280575beSPatrick McGehearty andcc %i1, 4, %o3 ! is dest longword aligned 3954*280575beSPatrick McGehearty bz,pt %ncc, .co_big_d4f 3955*280575beSPatrick McGehearty sub %i2, 2, %i2 3956*280575beSPatrick McGehearty.co_big_d4: ! dest is at least word aligned 3957*280575beSPatrick McGehearty nop 3958*280575beSPatrick McGehearty ldub [%i0], %o4 ! move a word (src align unknown) 3959*280575beSPatrick McGehearty ldub [%i0+1], %o3 3960*280575beSPatrick McGehearty sll %o4, 24, %o4 ! position 3961*280575beSPatrick McGehearty sll %o3, 16, %o3 ! position 3962*280575beSPatrick McGehearty or %o4, %o3, %o3 ! merge 3963*280575beSPatrick McGehearty ldub [%i0+2], %o4 3964*280575beSPatrick McGehearty sll %o4, 8, %o4 ! position 3965*280575beSPatrick McGehearty or %o4, %o3, %o3 ! merge 3966*280575beSPatrick McGehearty ldub [%i0+3], %o4 3967*280575beSPatrick McGehearty or %o4, %o3, %o4 ! merge 3968*280575beSPatrick McGehearty stwa %o4,[%i1]ASI_USER ! store four bytes 3969*280575beSPatrick McGehearty add %i0, 4, %i0 ! adjust src by 4 3970*280575beSPatrick McGehearty add %i1, 4, %i1 ! adjust dest by 4 3971*280575beSPatrick McGehearty ba .co_big_d4f 3972*280575beSPatrick McGehearty sub %i2, 4, %i2 ! adjust count by 4 3973*280575beSPatrick McGehearty 3974*280575beSPatrick McGehearty 3975*280575beSPatrick McGehearty ! Dst is on 8 byte boundary; src is not; 3976*280575beSPatrick McGehearty.co_big_unal8: 3977*280575beSPatrick McGehearty andcc %i1, 0x3f, %o3 ! is dst 64-byte block aligned? 3978*280575beSPatrick McGehearty bz %ncc, .co_unalnsrc 3979*280575beSPatrick McGehearty sub %o3, 64, %o3 ! %o3 will be multiple of 8 3980*280575beSPatrick McGehearty neg %o3 ! bytes until dest is 64 byte aligned 3981*280575beSPatrick McGehearty sub %i2, %o3, %i2 ! update cnt with bytes to be moved 3982*280575beSPatrick McGehearty ! Move bytes according to source alignment 3983*280575beSPatrick McGehearty andcc %i0, 0x1, %o4 3984*280575beSPatrick McGehearty bnz %ncc, .co_unalnbyte ! check for byte alignment 3985*280575beSPatrick McGehearty nop 3986*280575beSPatrick McGehearty andcc %i0, 2, %o4 ! check for half word alignment 3987*280575beSPatrick McGehearty bnz %ncc, .co_unalnhalf 3988*280575beSPatrick McGehearty nop 3989*280575beSPatrick McGehearty ! Src is word aligned, move bytes until dest 64 byte aligned 3990*280575beSPatrick McGehearty.co_unalnword: 3991*280575beSPatrick McGehearty ld [%i0], %o4 ! load 4 bytes 3992*280575beSPatrick McGehearty stwa %o4, [%i1]%asi ! and store 4 bytes 3993*280575beSPatrick McGehearty ld [%i0+4], %o4 ! load 4 bytes 3994*280575beSPatrick McGehearty add %i0, 8, %i0 ! increase src ptr by 8 3995*280575beSPatrick McGehearty stwa %o4, [%i1+4]%asi ! and store 4 bytes 3996*280575beSPatrick McGehearty subcc %o3, 8, %o3 ! decrease count by 8 3997*280575beSPatrick McGehearty bnz %ncc, .co_unalnword 3998*280575beSPatrick McGehearty add %i1, 8, %i1 ! increase dst ptr by 8 3999*280575beSPatrick McGehearty ba .co_unalnsrc 4000*280575beSPatrick McGehearty nop 4001*280575beSPatrick McGehearty 4002*280575beSPatrick McGehearty ! Src is half-word aligned, move bytes until dest 64 byte aligned 4003*280575beSPatrick McGehearty.co_unalnhalf: 4004*280575beSPatrick McGehearty lduh [%i0], %o4 ! load 2 bytes 4005*280575beSPatrick McGehearty sllx %o4, 32, %i3 ! shift left 4006*280575beSPatrick McGehearty lduw [%i0+2], %o4 4007*280575beSPatrick McGehearty or %o4, %i3, %i3 4008*280575beSPatrick McGehearty sllx %i3, 16, %i3 4009*280575beSPatrick McGehearty lduh [%i0+6], %o4 4010*280575beSPatrick McGehearty or %o4, %i3, %i3 4011*280575beSPatrick McGehearty stxa %i3, [%i1]ASI_USER 4012*280575beSPatrick McGehearty add %i0, 8, %i0 4013*280575beSPatrick McGehearty subcc %o3, 8, %o3 4014*280575beSPatrick McGehearty bnz %ncc, .co_unalnhalf 4015*280575beSPatrick McGehearty add %i1, 8, %i1 4016*280575beSPatrick McGehearty ba .co_unalnsrc 4017*280575beSPatrick McGehearty nop 4018*280575beSPatrick McGehearty 4019*280575beSPatrick McGehearty ! Src is Byte aligned, move bytes until dest 64 byte aligned 4020*280575beSPatrick McGehearty.co_unalnbyte: 4021*280575beSPatrick McGehearty sub %i1, %i0, %i1 ! share pointer advance 4022*280575beSPatrick McGehearty.co_unalnbyte_loop: 4023*280575beSPatrick McGehearty ldub [%i0], %o4 4024*280575beSPatrick McGehearty sllx %o4, 56, %i3 4025*280575beSPatrick McGehearty lduh [%i0+1], %o4 4026*280575beSPatrick McGehearty sllx %o4, 40, %o4 4027*280575beSPatrick McGehearty or %o4, %i3, %i3 4028*280575beSPatrick McGehearty lduh [%i0+3], %o4 4029*280575beSPatrick McGehearty sllx %o4, 24, %o4 4030*280575beSPatrick McGehearty or %o4, %i3, %i3 4031*280575beSPatrick McGehearty lduh [%i0+5], %o4 4032*280575beSPatrick McGehearty sllx %o4, 8, %o4 4033*280575beSPatrick McGehearty or %o4, %i3, %i3 4034*280575beSPatrick McGehearty ldub [%i0+7], %o4 4035*280575beSPatrick McGehearty or %o4, %i3, %i3 4036*280575beSPatrick McGehearty stxa %i3, [%i1+%i0]ASI_USER 4037*280575beSPatrick McGehearty subcc %o3, 8, %o3 4038*280575beSPatrick McGehearty bnz %ncc, .co_unalnbyte_loop 4039*280575beSPatrick McGehearty add %i0, 8, %i0 4040*280575beSPatrick McGehearty add %i1,%i0, %i1 ! restore pointer 4041*280575beSPatrick McGehearty 4042*280575beSPatrick McGehearty ! Destination is now block (64 byte aligned), src is not 8 byte aligned 4043*280575beSPatrick McGehearty.co_unalnsrc: 4044*280575beSPatrick McGehearty andn %i2, 0x3f, %i3 ! %i3 is multiple of block size 4045*280575beSPatrick McGehearty and %i2, 0x3f, %i2 ! residue bytes in %i2 4046*280575beSPatrick McGehearty add %i2, 64, %i2 ! Insure we don't load beyond 4047*280575beSPatrick McGehearty sub %i3, 64, %i3 ! end of source buffer 4048*280575beSPatrick McGehearty 4049*280575beSPatrick McGehearty andn %i0, 0x3f, %o4 ! %o4 has block aligned src address 4050*280575beSPatrick McGehearty prefetch [%o4 + (3 * CACHE_LINE)], #one_read 4051*280575beSPatrick McGehearty alignaddr %i0, %g0, %g0 ! generate %gsr 4052*280575beSPatrick McGehearty add %i0, %i3, %i0 ! advance %i0 to after blocks 4053*280575beSPatrick McGehearty ! 4054*280575beSPatrick McGehearty ! Determine source alignment to correct 8 byte offset 4055*280575beSPatrick McGehearty andcc %i0, 0x20, %o3 4056*280575beSPatrick McGehearty brnz,pn %o3, .co_unaln_1 4057*280575beSPatrick McGehearty andcc %i0, 0x10, %o3 4058*280575beSPatrick McGehearty brnz,pn %o3, .co_unaln_01 4059*280575beSPatrick McGehearty andcc %i0, 0x08, %o3 4060*280575beSPatrick McGehearty brz,a %o3, .co_unaln_000 4061*280575beSPatrick McGehearty prefetch [%o4 + (4 * CACHE_LINE)], #one_read 4062*280575beSPatrick McGehearty ba .co_unaln_001 4063*280575beSPatrick McGehearty nop 4064*280575beSPatrick McGehearty.co_unaln_01: 4065*280575beSPatrick McGehearty brnz,a %o3, .co_unaln_011 4066*280575beSPatrick McGehearty prefetch [%o4 + (4 * CACHE_LINE)], #one_read 4067*280575beSPatrick McGehearty ba .co_unaln_010 4068*280575beSPatrick McGehearty nop 4069*280575beSPatrick McGehearty.co_unaln_1: 4070*280575beSPatrick McGehearty brnz,pn %o3, .co_unaln_11 4071*280575beSPatrick McGehearty andcc %i0, 0x08, %o3 4072*280575beSPatrick McGehearty brnz,a %o3, .co_unaln_101 4073*280575beSPatrick McGehearty prefetch [%o4 + (4 * CACHE_LINE)], #one_read 4074*280575beSPatrick McGehearty ba .co_unaln_100 4075*280575beSPatrick McGehearty nop 4076*280575beSPatrick McGehearty.co_unaln_11: 4077*280575beSPatrick McGehearty brz,pn %o3, .co_unaln_110 4078*280575beSPatrick McGehearty prefetch [%i0 + (4 * CACHE_LINE)], #one_read 4079*280575beSPatrick McGehearty 4080*280575beSPatrick McGehearty.co_unaln_111: 4081*280575beSPatrick McGehearty ldd [%o4+56], %d14 4082*280575beSPatrick McGehearty.co_unaln_111_loop: 4083*280575beSPatrick McGehearty add %o4, 64, %o4 4084*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_P, %d16 4085*280575beSPatrick McGehearty faligndata %d14, %d16, %d48 4086*280575beSPatrick McGehearty faligndata %d16, %d18, %d50 4087*280575beSPatrick McGehearty faligndata %d18, %d20, %d52 4088*280575beSPatrick McGehearty faligndata %d20, %d22, %d54 4089*280575beSPatrick McGehearty faligndata %d22, %d24, %d56 4090*280575beSPatrick McGehearty faligndata %d24, %d26, %d58 4091*280575beSPatrick McGehearty faligndata %d26, %d28, %d60 4092*280575beSPatrick McGehearty faligndata %d28, %d30, %d62 4093*280575beSPatrick McGehearty fmovd %d30, %d14 4094*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_AIUS 4095*280575beSPatrick McGehearty subcc %i3, 64, %i3 4096*280575beSPatrick McGehearty add %i1, 64, %i1 4097*280575beSPatrick McGehearty bgu,pt %ncc, .co_unaln_111_loop 4098*280575beSPatrick McGehearty prefetch [%o4 + (4 * CACHE_LINE)], #one_read 4099*280575beSPatrick McGehearty ba .co_unaln_done 4100*280575beSPatrick McGehearty nop 4101*280575beSPatrick McGehearty 4102*280575beSPatrick McGehearty.co_unaln_110: 4103*280575beSPatrick McGehearty ldd [%o4+48], %d12 4104*280575beSPatrick McGehearty ldd [%o4+56], %d14 4105*280575beSPatrick McGehearty.co_unaln_110_loop: 4106*280575beSPatrick McGehearty add %o4, 64, %o4 4107*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_P, %d16 4108*280575beSPatrick McGehearty faligndata %d12, %d14, %d48 4109*280575beSPatrick McGehearty faligndata %d14, %d16, %d50 4110*280575beSPatrick McGehearty faligndata %d16, %d18, %d52 4111*280575beSPatrick McGehearty faligndata %d18, %d20, %d54 4112*280575beSPatrick McGehearty faligndata %d20, %d22, %d56 4113*280575beSPatrick McGehearty faligndata %d22, %d24, %d58 4114*280575beSPatrick McGehearty faligndata %d24, %d26, %d60 4115*280575beSPatrick McGehearty faligndata %d26, %d28, %d62 4116*280575beSPatrick McGehearty fmovd %d28, %d12 4117*280575beSPatrick McGehearty fmovd %d30, %d14 4118*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_AIUS 4119*280575beSPatrick McGehearty subcc %i3, 64, %i3 4120*280575beSPatrick McGehearty add %i1, 64, %i1 4121*280575beSPatrick McGehearty bgu,pt %ncc, .co_unaln_110_loop 4122*280575beSPatrick McGehearty prefetch [%o4 + (4 * CACHE_LINE)], #one_read 4123*280575beSPatrick McGehearty ba .co_unaln_done 4124*280575beSPatrick McGehearty nop 4125*280575beSPatrick McGehearty 4126*280575beSPatrick McGehearty.co_unaln_101: 4127*280575beSPatrick McGehearty ldd [%o4+40], %d10 4128*280575beSPatrick McGehearty ldd [%o4+48], %d12 4129*280575beSPatrick McGehearty ldd [%o4+56], %d14 4130*280575beSPatrick McGehearty.co_unaln_101_loop: 4131*280575beSPatrick McGehearty add %o4, 64, %o4 4132*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_P, %d16 4133*280575beSPatrick McGehearty faligndata %d10, %d12, %d48 4134*280575beSPatrick McGehearty faligndata %d12, %d14, %d50 4135*280575beSPatrick McGehearty faligndata %d14, %d16, %d52 4136*280575beSPatrick McGehearty faligndata %d16, %d18, %d54 4137*280575beSPatrick McGehearty faligndata %d18, %d20, %d56 4138*280575beSPatrick McGehearty faligndata %d20, %d22, %d58 4139*280575beSPatrick McGehearty faligndata %d22, %d24, %d60 4140*280575beSPatrick McGehearty faligndata %d24, %d26, %d62 4141*280575beSPatrick McGehearty fmovd %d26, %d10 4142*280575beSPatrick McGehearty fmovd %d28, %d12 4143*280575beSPatrick McGehearty fmovd %d30, %d14 4144*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_AIUS 4145*280575beSPatrick McGehearty subcc %i3, 64, %i3 4146*280575beSPatrick McGehearty add %i1, 64, %i1 4147*280575beSPatrick McGehearty bgu,pt %ncc, .co_unaln_101_loop 4148*280575beSPatrick McGehearty prefetch [%o4 + (4 * CACHE_LINE)], #one_read 4149*280575beSPatrick McGehearty ba .co_unaln_done 4150*280575beSPatrick McGehearty nop 4151*280575beSPatrick McGehearty 4152*280575beSPatrick McGehearty.co_unaln_100: 4153*280575beSPatrick McGehearty ldd [%o4+32], %d8 4154*280575beSPatrick McGehearty ldd [%o4+40], %d10 4155*280575beSPatrick McGehearty ldd [%o4+48], %d12 4156*280575beSPatrick McGehearty ldd [%o4+56], %d14 4157*280575beSPatrick McGehearty.co_unaln_100_loop: 4158*280575beSPatrick McGehearty add %o4, 64, %o4 4159*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_P, %d16 4160*280575beSPatrick McGehearty faligndata %d8, %d10, %d48 4161*280575beSPatrick McGehearty faligndata %d10, %d12, %d50 4162*280575beSPatrick McGehearty faligndata %d12, %d14, %d52 4163*280575beSPatrick McGehearty faligndata %d14, %d16, %d54 4164*280575beSPatrick McGehearty faligndata %d16, %d18, %d56 4165*280575beSPatrick McGehearty faligndata %d18, %d20, %d58 4166*280575beSPatrick McGehearty faligndata %d20, %d22, %d60 4167*280575beSPatrick McGehearty faligndata %d22, %d24, %d62 4168*280575beSPatrick McGehearty fmovd %d24, %d8 4169*280575beSPatrick McGehearty fmovd %d26, %d10 4170*280575beSPatrick McGehearty fmovd %d28, %d12 4171*280575beSPatrick McGehearty fmovd %d30, %d14 4172*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_AIUS 4173*280575beSPatrick McGehearty subcc %i3, 64, %i3 4174*280575beSPatrick McGehearty add %i1, 64, %i1 4175*280575beSPatrick McGehearty bgu,pt %ncc, .co_unaln_100_loop 4176*280575beSPatrick McGehearty prefetch [%o4 + (4 * CACHE_LINE)], #one_read 4177*280575beSPatrick McGehearty ba .co_unaln_done 4178*280575beSPatrick McGehearty nop 4179*280575beSPatrick McGehearty 4180*280575beSPatrick McGehearty.co_unaln_011: 4181*280575beSPatrick McGehearty ldd [%o4+24], %d6 4182*280575beSPatrick McGehearty ldd [%o4+32], %d8 4183*280575beSPatrick McGehearty ldd [%o4+40], %d10 4184*280575beSPatrick McGehearty ldd [%o4+48], %d12 4185*280575beSPatrick McGehearty ldd [%o4+56], %d14 4186*280575beSPatrick McGehearty.co_unaln_011_loop: 4187*280575beSPatrick McGehearty add %o4, 64, %o4 4188*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_P, %d16 4189*280575beSPatrick McGehearty faligndata %d6, %d8, %d48 4190*280575beSPatrick McGehearty faligndata %d8, %d10, %d50 4191*280575beSPatrick McGehearty faligndata %d10, %d12, %d52 4192*280575beSPatrick McGehearty faligndata %d12, %d14, %d54 4193*280575beSPatrick McGehearty faligndata %d14, %d16, %d56 4194*280575beSPatrick McGehearty faligndata %d16, %d18, %d58 4195*280575beSPatrick McGehearty faligndata %d18, %d20, %d60 4196*280575beSPatrick McGehearty faligndata %d20, %d22, %d62 4197*280575beSPatrick McGehearty fmovd %d22, %d6 4198*280575beSPatrick McGehearty fmovd %d24, %d8 4199*280575beSPatrick McGehearty fmovd %d26, %d10 4200*280575beSPatrick McGehearty fmovd %d28, %d12 4201*280575beSPatrick McGehearty fmovd %d30, %d14 4202*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_AIUS 4203*280575beSPatrick McGehearty subcc %i3, 64, %i3 4204*280575beSPatrick McGehearty add %i1, 64, %i1 4205*280575beSPatrick McGehearty bgu,pt %ncc, .co_unaln_011_loop 4206*280575beSPatrick McGehearty prefetch [%o4 + (4 * CACHE_LINE)], #one_read 4207*280575beSPatrick McGehearty ba .co_unaln_done 4208*280575beSPatrick McGehearty nop 4209*280575beSPatrick McGehearty 4210*280575beSPatrick McGehearty.co_unaln_010: 4211*280575beSPatrick McGehearty ldd [%o4+16], %d4 4212*280575beSPatrick McGehearty ldd [%o4+24], %d6 4213*280575beSPatrick McGehearty ldd [%o4+32], %d8 4214*280575beSPatrick McGehearty ldd [%o4+40], %d10 4215*280575beSPatrick McGehearty ldd [%o4+48], %d12 4216*280575beSPatrick McGehearty ldd [%o4+56], %d14 4217*280575beSPatrick McGehearty.co_unaln_010_loop: 4218*280575beSPatrick McGehearty add %o4, 64, %o4 4219*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_P, %d16 4220*280575beSPatrick McGehearty faligndata %d4, %d6, %d48 4221*280575beSPatrick McGehearty faligndata %d6, %d8, %d50 4222*280575beSPatrick McGehearty faligndata %d8, %d10, %d52 4223*280575beSPatrick McGehearty faligndata %d10, %d12, %d54 4224*280575beSPatrick McGehearty faligndata %d12, %d14, %d56 4225*280575beSPatrick McGehearty faligndata %d14, %d16, %d58 4226*280575beSPatrick McGehearty faligndata %d16, %d18, %d60 4227*280575beSPatrick McGehearty faligndata %d18, %d20, %d62 4228*280575beSPatrick McGehearty fmovd %d20, %d4 4229*280575beSPatrick McGehearty fmovd %d22, %d6 4230*280575beSPatrick McGehearty fmovd %d24, %d8 4231*280575beSPatrick McGehearty fmovd %d26, %d10 4232*280575beSPatrick McGehearty fmovd %d28, %d12 4233*280575beSPatrick McGehearty fmovd %d30, %d14 4234*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_AIUS 4235*280575beSPatrick McGehearty subcc %i3, 64, %i3 4236*280575beSPatrick McGehearty add %i1, 64, %i1 4237*280575beSPatrick McGehearty bgu,pt %ncc, .co_unaln_010_loop 4238*280575beSPatrick McGehearty prefetch [%o4 + (4 * CACHE_LINE)], #one_read 4239*280575beSPatrick McGehearty ba .co_unaln_done 4240*280575beSPatrick McGehearty nop 4241*280575beSPatrick McGehearty 4242*280575beSPatrick McGehearty.co_unaln_001: 4243*280575beSPatrick McGehearty ldd [%o4+8], %d2 4244*280575beSPatrick McGehearty ldd [%o4+16], %d4 4245*280575beSPatrick McGehearty ldd [%o4+24], %d6 4246*280575beSPatrick McGehearty ldd [%o4+32], %d8 4247*280575beSPatrick McGehearty ldd [%o4+40], %d10 4248*280575beSPatrick McGehearty ldd [%o4+48], %d12 4249*280575beSPatrick McGehearty ldd [%o4+56], %d14 4250*280575beSPatrick McGehearty.co_unaln_001_loop: 4251*280575beSPatrick McGehearty add %o4, 64, %o4 4252*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_P, %d16 4253*280575beSPatrick McGehearty faligndata %d2, %d4, %d48 4254*280575beSPatrick McGehearty faligndata %d4, %d6, %d50 4255*280575beSPatrick McGehearty faligndata %d6, %d8, %d52 4256*280575beSPatrick McGehearty faligndata %d8, %d10, %d54 4257*280575beSPatrick McGehearty faligndata %d10, %d12, %d56 4258*280575beSPatrick McGehearty faligndata %d12, %d14, %d58 4259*280575beSPatrick McGehearty faligndata %d14, %d16, %d60 4260*280575beSPatrick McGehearty faligndata %d16, %d18, %d62 4261*280575beSPatrick McGehearty fmovd %d18, %d2 4262*280575beSPatrick McGehearty fmovd %d20, %d4 4263*280575beSPatrick McGehearty fmovd %d22, %d6 4264*280575beSPatrick McGehearty fmovd %d24, %d8 4265*280575beSPatrick McGehearty fmovd %d26, %d10 4266*280575beSPatrick McGehearty fmovd %d28, %d12 4267*280575beSPatrick McGehearty fmovd %d30, %d14 4268*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_AIUS 4269*280575beSPatrick McGehearty subcc %i3, 64, %i3 4270*280575beSPatrick McGehearty add %i1, 64, %i1 4271*280575beSPatrick McGehearty bgu,pt %ncc, .co_unaln_001_loop 4272*280575beSPatrick McGehearty prefetch [%o4 + (4 * CACHE_LINE)], #one_read 4273*280575beSPatrick McGehearty ba .co_unaln_done 4274*280575beSPatrick McGehearty nop 4275*280575beSPatrick McGehearty 4276*280575beSPatrick McGehearty.co_unaln_000: 4277*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_P, %d0 4278*280575beSPatrick McGehearty.co_unaln_000_loop: 4279*280575beSPatrick McGehearty add %o4, 64, %o4 4280*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_P, %d16 4281*280575beSPatrick McGehearty faligndata %d0, %d2, %d48 4282*280575beSPatrick McGehearty faligndata %d2, %d4, %d50 4283*280575beSPatrick McGehearty faligndata %d4, %d6, %d52 4284*280575beSPatrick McGehearty faligndata %d6, %d8, %d54 4285*280575beSPatrick McGehearty faligndata %d8, %d10, %d56 4286*280575beSPatrick McGehearty faligndata %d10, %d12, %d58 4287*280575beSPatrick McGehearty faligndata %d12, %d14, %d60 4288*280575beSPatrick McGehearty faligndata %d14, %d16, %d62 4289*280575beSPatrick McGehearty fmovd %d16, %d0 4290*280575beSPatrick McGehearty fmovd %d18, %d2 4291*280575beSPatrick McGehearty fmovd %d20, %d4 4292*280575beSPatrick McGehearty fmovd %d22, %d6 4293*280575beSPatrick McGehearty fmovd %d24, %d8 4294*280575beSPatrick McGehearty fmovd %d26, %d10 4295*280575beSPatrick McGehearty fmovd %d28, %d12 4296*280575beSPatrick McGehearty fmovd %d30, %d14 4297*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_AIUS 4298*280575beSPatrick McGehearty subcc %i3, 64, %i3 4299*280575beSPatrick McGehearty add %i1, 64, %i1 4300*280575beSPatrick McGehearty bgu,pt %ncc, .co_unaln_000_loop 4301*280575beSPatrick McGehearty prefetch [%o4 + (4 * CACHE_LINE)], #one_read 4302*280575beSPatrick McGehearty 4303*280575beSPatrick McGehearty.co_unaln_done: 4304*280575beSPatrick McGehearty ! Handle trailing bytes, 64 to 127 4305*280575beSPatrick McGehearty ! Dest long word aligned, Src not long word aligned 4306*280575beSPatrick McGehearty cmp %i2, 15 4307*280575beSPatrick McGehearty bleu %ncc, .co_unaln_short 4308*280575beSPatrick McGehearty 4309*280575beSPatrick McGehearty andn %i2, 0x7, %i3 ! %i3 is multiple of 8 4310*280575beSPatrick McGehearty and %i2, 0x7, %i2 ! residue bytes in %i2 4311*280575beSPatrick McGehearty add %i2, 8, %i2 4312*280575beSPatrick McGehearty sub %i3, 8, %i3 ! insure we don't load past end of src 4313*280575beSPatrick McGehearty andn %i0, 0x7, %o4 ! %o4 has long word aligned src address 4314*280575beSPatrick McGehearty add %i0, %i3, %i0 ! advance %i0 to after multiple of 8 4315*280575beSPatrick McGehearty ldd [%o4], %d0 ! fetch partial word 4316*280575beSPatrick McGehearty.co_unaln_by8: 4317*280575beSPatrick McGehearty ldd [%o4+8], %d2 4318*280575beSPatrick McGehearty add %o4, 8, %o4 4319*280575beSPatrick McGehearty faligndata %d0, %d2, %d16 4320*280575beSPatrick McGehearty subcc %i3, 8, %i3 4321*280575beSPatrick McGehearty stda %d16, [%i1]%asi 4322*280575beSPatrick McGehearty fmovd %d2, %d0 4323*280575beSPatrick McGehearty bgu,pt %ncc, .co_unaln_by8 4324*280575beSPatrick McGehearty add %i1, 8, %i1 4325*280575beSPatrick McGehearty 4326*280575beSPatrick McGehearty.co_unaln_short: 4327*280575beSPatrick McGehearty cmp %i2, 8 4328*280575beSPatrick McGehearty blt,pt %ncc, .co_unalnfin 4329*280575beSPatrick McGehearty nop 4330*280575beSPatrick McGehearty ldub [%i0], %o4 4331*280575beSPatrick McGehearty sll %o4, 24, %o3 4332*280575beSPatrick McGehearty ldub [%i0+1], %o4 4333*280575beSPatrick McGehearty sll %o4, 16, %o4 4334*280575beSPatrick McGehearty or %o4, %o3, %o3 4335*280575beSPatrick McGehearty ldub [%i0+2], %o4 4336*280575beSPatrick McGehearty sll %o4, 8, %o4 4337*280575beSPatrick McGehearty or %o4, %o3, %o3 4338*280575beSPatrick McGehearty ldub [%i0+3], %o4 4339*280575beSPatrick McGehearty or %o4, %o3, %o3 4340*280575beSPatrick McGehearty stwa %o3, [%i1]%asi 4341*280575beSPatrick McGehearty ldub [%i0+4], %o4 4342*280575beSPatrick McGehearty sll %o4, 24, %o3 4343*280575beSPatrick McGehearty ldub [%i0+5], %o4 4344*280575beSPatrick McGehearty sll %o4, 16, %o4 4345*280575beSPatrick McGehearty or %o4, %o3, %o3 4346*280575beSPatrick McGehearty ldub [%i0+6], %o4 4347*280575beSPatrick McGehearty sll %o4, 8, %o4 4348*280575beSPatrick McGehearty or %o4, %o3, %o3 4349*280575beSPatrick McGehearty ldub [%i0+7], %o4 4350*280575beSPatrick McGehearty or %o4, %o3, %o3 4351*280575beSPatrick McGehearty stwa %o3, [%i1+4]%asi 4352*280575beSPatrick McGehearty add %i0, 8, %i0 4353*280575beSPatrick McGehearty add %i1, 8, %i1 4354*280575beSPatrick McGehearty sub %i2, 8, %i2 4355*280575beSPatrick McGehearty.co_unalnfin: 4356*280575beSPatrick McGehearty cmp %i2, 4 4357*280575beSPatrick McGehearty blt,pt %ncc, .co_unalnz 4358*280575beSPatrick McGehearty tst %i2 4359*280575beSPatrick McGehearty ldub [%i0], %o3 ! read byte 4360*280575beSPatrick McGehearty subcc %i2, 4, %i2 ! reduce count by 4 4361*280575beSPatrick McGehearty sll %o3, 24, %o3 ! position 4362*280575beSPatrick McGehearty ldub [%i0+1], %o4 4363*280575beSPatrick McGehearty sll %o4, 16, %o4 ! position 4364*280575beSPatrick McGehearty or %o4, %o3, %o3 ! merge 4365*280575beSPatrick McGehearty ldub [%i0+2], %o4 4366*280575beSPatrick McGehearty sll %o4, 8, %o4 ! position 4367*280575beSPatrick McGehearty or %o4, %o3, %o3 ! merge 4368*280575beSPatrick McGehearty add %i1, 4, %i1 ! advance dst by 4 4369*280575beSPatrick McGehearty ldub [%i0+3], %o4 4370*280575beSPatrick McGehearty add %i0, 4, %i0 ! advance src by 4 4371*280575beSPatrick McGehearty or %o4, %o3, %o4 ! merge 4372*280575beSPatrick McGehearty bnz,pt %ncc, .co_unaln3x 4373*280575beSPatrick McGehearty stwa %o4, [%i1-4]%asi 4374*280575beSPatrick McGehearty ba .co_exit 4375*280575beSPatrick McGehearty nop 4376*280575beSPatrick McGehearty.co_unalnz: 4377*280575beSPatrick McGehearty bz,pt %ncc, .co_exit 4378*280575beSPatrick McGehearty wr %l5, %g0, %gsr ! restore %gsr 4379*280575beSPatrick McGehearty.co_unaln3x: ! Exactly 1, 2, or 3 bytes remain 4380*280575beSPatrick McGehearty subcc %i2, 1, %i2 ! reduce count for cc test 4381*280575beSPatrick McGehearty ldub [%i0], %o4 ! load one byte 4382*280575beSPatrick McGehearty bz,pt %ncc, .co_exit 4383*280575beSPatrick McGehearty stba %o4, [%i1]%asi ! store one byte 4384*280575beSPatrick McGehearty ldub [%i0+1], %o4 ! load second byte 4385*280575beSPatrick McGehearty subcc %i2, 1, %i2 4386*280575beSPatrick McGehearty bz,pt %ncc, .co_exit 4387*280575beSPatrick McGehearty stba %o4, [%i1+1]%asi ! store second byte 4388*280575beSPatrick McGehearty ldub [%i0+2], %o4 ! load third byte 4389*280575beSPatrick McGehearty stba %o4, [%i1+2]%asi ! store third byte 4390*280575beSPatrick McGehearty.co_exit: 4391*280575beSPatrick McGehearty brnz %g1, .co_fp_restore 4392*280575beSPatrick McGehearty nop 4393*280575beSPatrick McGehearty FZERO 4394*280575beSPatrick McGehearty wr %g1, %g0, %fprs 4395*280575beSPatrick McGehearty ba,pt %ncc, .co_ex2 4396*280575beSPatrick McGehearty membar #Sync 4397*280575beSPatrick McGehearty.co_fp_restore: 4398*280575beSPatrick McGehearty BLD_FP_FROMSTACK(%o4) 4399*280575beSPatrick McGehearty.co_ex2: 4400*280575beSPatrick McGehearty andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 4401*280575beSPatrick McGehearty stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 4402*280575beSPatrick McGehearty ret 4403*280575beSPatrick McGehearty restore %g0, 0, %o0 4404*280575beSPatrick McGehearty 4405*280575beSPatrick McGehearty.copyout_err: 4406*280575beSPatrick McGehearty ldn [THREAD_REG + T_COPYOPS], %o4 4407*280575beSPatrick McGehearty brz %o4, 2f 4408*280575beSPatrick McGehearty nop 4409*280575beSPatrick McGehearty ldn [%o4 + CP_COPYOUT], %g2 4410*280575beSPatrick McGehearty jmp %g2 4411*280575beSPatrick McGehearty nop 4412*280575beSPatrick McGehearty2: 4413*280575beSPatrick McGehearty retl 4414*280575beSPatrick McGehearty mov -1, %o0 4415*280575beSPatrick McGehearty 4416*280575beSPatrick McGehearty#else /* NIAGARA_IMPL */ 44177c478bd9Sstevel@tonic-gate.do_copyout: 44187c478bd9Sstevel@tonic-gate ! 44197c478bd9Sstevel@tonic-gate ! Check the length and bail if zero. 44207c478bd9Sstevel@tonic-gate ! 44217c478bd9Sstevel@tonic-gate tst %o2 44227c478bd9Sstevel@tonic-gate bnz,pt %ncc, 1f 44237c478bd9Sstevel@tonic-gate nop 44247c478bd9Sstevel@tonic-gate retl 44257c478bd9Sstevel@tonic-gate clr %o0 44267c478bd9Sstevel@tonic-gate1: 44277c478bd9Sstevel@tonic-gate sethi %hi(copyio_fault), %o4 44287c478bd9Sstevel@tonic-gate or %o4, %lo(copyio_fault), %o4 44297c478bd9Sstevel@tonic-gate sethi %hi(copyio_fault_nowindow), %o3 44307c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT 44317c478bd9Sstevel@tonic-gate or %o3, %lo(copyio_fault_nowindow), %o3 44327c478bd9Sstevel@tonic-gate membar #Sync 44337c478bd9Sstevel@tonic-gate stn %o3, [THREAD_REG + T_LOFAULT] 44347c478bd9Sstevel@tonic-gate 44357c478bd9Sstevel@tonic-gate mov %o0, SAVE_SRC 44367c478bd9Sstevel@tonic-gate mov %o1, SAVE_DST 44377c478bd9Sstevel@tonic-gate mov %o2, SAVE_COUNT 44387c478bd9Sstevel@tonic-gate 44397c478bd9Sstevel@tonic-gate ! 44407c478bd9Sstevel@tonic-gate ! Check to see if we're more than SMALL_LIMIT (7 bytes). 44417c478bd9Sstevel@tonic-gate ! Run in leaf mode, using the %o regs as our input regs. 44427c478bd9Sstevel@tonic-gate ! 44437c478bd9Sstevel@tonic-gate subcc %o2, SMALL_LIMIT, %o3 44447c478bd9Sstevel@tonic-gate bgu,a,pt %ncc, .dco_ns 44457c478bd9Sstevel@tonic-gate or %o0, %o1, %o3 44467c478bd9Sstevel@tonic-gate ! 44477c478bd9Sstevel@tonic-gate ! What was previously ".small_copyout" 44487c478bd9Sstevel@tonic-gate ! Do full differenced copy. 44497c478bd9Sstevel@tonic-gate ! 44507c478bd9Sstevel@tonic-gate.dcobcp: 44517c478bd9Sstevel@tonic-gate sub %g0, %o2, %o3 ! negate count 44527c478bd9Sstevel@tonic-gate add %o0, %o2, %o0 ! make %o0 point at the end 44537c478bd9Sstevel@tonic-gate add %o1, %o2, %o1 ! make %o1 point at the end 44547c478bd9Sstevel@tonic-gate ba,pt %ncc, .dcocl 44557c478bd9Sstevel@tonic-gate ldub [%o0 + %o3], %o4 ! load first byte 44567c478bd9Sstevel@tonic-gate ! 44577c478bd9Sstevel@tonic-gate ! %o0 and %o2 point at the end and remain pointing at the end 44587c478bd9Sstevel@tonic-gate ! of their buffers. We pull things out by adding %o3 (which is 44597c478bd9Sstevel@tonic-gate ! the negation of the length) to the buffer end which gives us 44607c478bd9Sstevel@tonic-gate ! the curent location in the buffers. By incrementing %o3 we walk 44617c478bd9Sstevel@tonic-gate ! through both buffers without having to bump each buffer's 44627c478bd9Sstevel@tonic-gate ! pointer. A very fast 4 instruction loop. 44637c478bd9Sstevel@tonic-gate ! 44647c478bd9Sstevel@tonic-gate .align 16 44657c478bd9Sstevel@tonic-gate.dcocl: 44667c478bd9Sstevel@tonic-gate stba %o4, [%o1 + %o3]ASI_USER 44677c478bd9Sstevel@tonic-gate inccc %o3 44687c478bd9Sstevel@tonic-gate bl,a,pt %ncc, .dcocl 44697c478bd9Sstevel@tonic-gate ldub [%o0 + %o3], %o4 44707c478bd9Sstevel@tonic-gate ! 44717c478bd9Sstevel@tonic-gate ! We're done. Go home. 44727c478bd9Sstevel@tonic-gate ! 44737c478bd9Sstevel@tonic-gate membar #Sync 44747c478bd9Sstevel@tonic-gate stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 44757c478bd9Sstevel@tonic-gate retl 44767c478bd9Sstevel@tonic-gate clr %o0 44777c478bd9Sstevel@tonic-gate ! 44787c478bd9Sstevel@tonic-gate ! Try aligned copies from here. 44797c478bd9Sstevel@tonic-gate ! 44807c478bd9Sstevel@tonic-gate.dco_ns: 44817c478bd9Sstevel@tonic-gate ! %o0 = kernel addr (to be copied from) 44827c478bd9Sstevel@tonic-gate ! %o1 = user addr (to be copied to) 44837c478bd9Sstevel@tonic-gate ! %o2 = length 44847c478bd9Sstevel@tonic-gate ! %o3 = %o1 | %o2 (used for alignment checking) 44857c478bd9Sstevel@tonic-gate ! %o4 is alternate lo_fault 44867c478bd9Sstevel@tonic-gate ! %o5 is original lo_fault 44877c478bd9Sstevel@tonic-gate ! 44887c478bd9Sstevel@tonic-gate ! See if we're single byte aligned. If we are, check the 44897c478bd9Sstevel@tonic-gate ! limit for single byte copies. If we're smaller or equal, 44907c478bd9Sstevel@tonic-gate ! bounce to the byte for byte copy loop. Otherwise do it in 44917c478bd9Sstevel@tonic-gate ! HW (if enabled). 44927c478bd9Sstevel@tonic-gate ! 44937c478bd9Sstevel@tonic-gate btst 1, %o3 44947c478bd9Sstevel@tonic-gate bz,pt %icc, .dcoh8 44957c478bd9Sstevel@tonic-gate btst 7, %o3 44967c478bd9Sstevel@tonic-gate ! 44977c478bd9Sstevel@tonic-gate ! Single byte aligned. Do we do it via HW or via 44987c478bd9Sstevel@tonic-gate ! byte for byte? Do a quick no memory reference 44997c478bd9Sstevel@tonic-gate ! check to pick up small copies. 45007c478bd9Sstevel@tonic-gate ! 45017c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 45027c478bd9Sstevel@tonic-gate ! 45037c478bd9Sstevel@tonic-gate ! Big enough that we need to check the HW limit for 45047c478bd9Sstevel@tonic-gate ! this size copy. 45057c478bd9Sstevel@tonic-gate ! 45067c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 45077c478bd9Sstevel@tonic-gate ! 45087c478bd9Sstevel@tonic-gate ! Is HW copy on? If not, do everything byte for byte. 45097c478bd9Sstevel@tonic-gate ! 45107c478bd9Sstevel@tonic-gate tst %o3 45117c478bd9Sstevel@tonic-gate bz,pn %icc, .dcobcp 45127c478bd9Sstevel@tonic-gate subcc %o3, %o2, %o3 45137c478bd9Sstevel@tonic-gate ! 45147c478bd9Sstevel@tonic-gate ! If we're less than or equal to the single byte copy limit, 45157c478bd9Sstevel@tonic-gate ! bop to the copy loop. 45167c478bd9Sstevel@tonic-gate ! 45177c478bd9Sstevel@tonic-gate bge,pt %ncc, .dcobcp 45187c478bd9Sstevel@tonic-gate nop 45197c478bd9Sstevel@tonic-gate ! 45207c478bd9Sstevel@tonic-gate ! We're big enough and copy is on. Do it with HW. 45217c478bd9Sstevel@tonic-gate ! 45227c478bd9Sstevel@tonic-gate ba,pt %ncc, .big_copyout 45237c478bd9Sstevel@tonic-gate nop 45247c478bd9Sstevel@tonic-gate.dcoh8: 45257c478bd9Sstevel@tonic-gate ! 45267c478bd9Sstevel@tonic-gate ! 8 byte aligned? 45277c478bd9Sstevel@tonic-gate ! 45287c478bd9Sstevel@tonic-gate bnz,a %ncc, .dcoh4 45297c478bd9Sstevel@tonic-gate btst 3, %o3 45307c478bd9Sstevel@tonic-gate ! 45317c478bd9Sstevel@tonic-gate ! See if we're in the "small range". 45327c478bd9Sstevel@tonic-gate ! If so, go off and do the copy. 45337c478bd9Sstevel@tonic-gate ! If not, load the hard limit. %o3 is 45347c478bd9Sstevel@tonic-gate ! available for reuse. 45357c478bd9Sstevel@tonic-gate ! 45367c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 45377c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 45387c478bd9Sstevel@tonic-gate ! 45397c478bd9Sstevel@tonic-gate ! If it's zero, there's no HW bcopy. 45407c478bd9Sstevel@tonic-gate ! Bop off to the aligned copy. 45417c478bd9Sstevel@tonic-gate ! 45427c478bd9Sstevel@tonic-gate tst %o3 45437c478bd9Sstevel@tonic-gate bz,pn %icc, .dcos8 45447c478bd9Sstevel@tonic-gate subcc %o3, %o2, %o3 45457c478bd9Sstevel@tonic-gate ! 45467c478bd9Sstevel@tonic-gate ! We're negative if our size is larger than hw_copy_limit_8. 45477c478bd9Sstevel@tonic-gate ! 45487c478bd9Sstevel@tonic-gate bge,pt %ncc, .dcos8 45497c478bd9Sstevel@tonic-gate nop 45507c478bd9Sstevel@tonic-gate ! 45517c478bd9Sstevel@tonic-gate ! HW assist is on and we're large enough. Do it. 45527c478bd9Sstevel@tonic-gate ! 45537c478bd9Sstevel@tonic-gate ba,pt %ncc, .big_copyout 45547c478bd9Sstevel@tonic-gate nop 45557c478bd9Sstevel@tonic-gate.dcos8: 45567c478bd9Sstevel@tonic-gate ! 45577c478bd9Sstevel@tonic-gate ! Housekeeping for copy loops. Uses same idea as in the byte for 45587c478bd9Sstevel@tonic-gate ! byte copy loop above. 45597c478bd9Sstevel@tonic-gate ! 45607c478bd9Sstevel@tonic-gate add %o0, %o2, %o0 45617c478bd9Sstevel@tonic-gate add %o1, %o2, %o1 45627c478bd9Sstevel@tonic-gate sub %g0, %o2, %o3 45637c478bd9Sstevel@tonic-gate ba,pt %ncc, .dodebc 45647c478bd9Sstevel@tonic-gate srl %o2, 3, %o2 ! Number of 8 byte chunks to copy 45657c478bd9Sstevel@tonic-gate ! 45667c478bd9Sstevel@tonic-gate ! 4 byte aligned? 45677c478bd9Sstevel@tonic-gate ! 45687c478bd9Sstevel@tonic-gate.dcoh4: 45697c478bd9Sstevel@tonic-gate bnz,pn %ncc, .dcoh2 45707c478bd9Sstevel@tonic-gate ! 45717c478bd9Sstevel@tonic-gate ! See if we're in the "small range". 45727c478bd9Sstevel@tonic-gate ! If so, go off an do the copy. 45737c478bd9Sstevel@tonic-gate ! If not, load the hard limit. %o3 is 45747c478bd9Sstevel@tonic-gate ! available for reuse. 45757c478bd9Sstevel@tonic-gate ! 45767c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 45777c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 45787c478bd9Sstevel@tonic-gate ! 45797c478bd9Sstevel@tonic-gate ! If it's zero, there's no HW bcopy. 45807c478bd9Sstevel@tonic-gate ! Bop off to the aligned copy. 45817c478bd9Sstevel@tonic-gate ! 45827c478bd9Sstevel@tonic-gate tst %o3 45837c478bd9Sstevel@tonic-gate bz,pn %icc, .dcos4 45847c478bd9Sstevel@tonic-gate subcc %o3, %o2, %o3 45857c478bd9Sstevel@tonic-gate ! 45867c478bd9Sstevel@tonic-gate ! We're negative if our size is larger than hw_copy_limit_4. 45877c478bd9Sstevel@tonic-gate ! 45887c478bd9Sstevel@tonic-gate bge,pt %ncc, .dcos4 45897c478bd9Sstevel@tonic-gate nop 45907c478bd9Sstevel@tonic-gate ! 45917c478bd9Sstevel@tonic-gate ! HW assist is on and we're large enough. Do it. 45927c478bd9Sstevel@tonic-gate ! 45937c478bd9Sstevel@tonic-gate ba,pt %ncc, .big_copyout 45947c478bd9Sstevel@tonic-gate nop 45957c478bd9Sstevel@tonic-gate.dcos4: 45967c478bd9Sstevel@tonic-gate add %o0, %o2, %o0 45977c478bd9Sstevel@tonic-gate add %o1, %o2, %o1 45987c478bd9Sstevel@tonic-gate sub %g0, %o2, %o3 45997c478bd9Sstevel@tonic-gate ba,pt %ncc, .dodfbc 46007c478bd9Sstevel@tonic-gate srl %o2, 2, %o2 ! Number of 4 byte chunks to copy 46017c478bd9Sstevel@tonic-gate ! 46027c478bd9Sstevel@tonic-gate ! We must be 2 byte aligned. Off we go. 46037c478bd9Sstevel@tonic-gate ! The check for small copies was done in the 46047c478bd9Sstevel@tonic-gate ! delay at .dcoh4 46057c478bd9Sstevel@tonic-gate ! 46067c478bd9Sstevel@tonic-gate.dcoh2: 46077c478bd9Sstevel@tonic-gate ble %ncc, .dcos2 46087c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 46097c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 46107c478bd9Sstevel@tonic-gate tst %o3 46117c478bd9Sstevel@tonic-gate bz,pn %icc, .dcos2 46127c478bd9Sstevel@tonic-gate subcc %o3, %o2, %o3 46137c478bd9Sstevel@tonic-gate bge,pt %ncc, .dcos2 46147c478bd9Sstevel@tonic-gate nop 46157c478bd9Sstevel@tonic-gate ! 46167c478bd9Sstevel@tonic-gate ! HW is on and we're big enough. Do it. 46177c478bd9Sstevel@tonic-gate ! 46187c478bd9Sstevel@tonic-gate ba,pt %ncc, .big_copyout 46197c478bd9Sstevel@tonic-gate nop 46207c478bd9Sstevel@tonic-gate.dcos2: 46217c478bd9Sstevel@tonic-gate add %o0, %o2, %o0 46227c478bd9Sstevel@tonic-gate add %o1, %o2, %o1 46237c478bd9Sstevel@tonic-gate sub %g0, %o2, %o3 46247c478bd9Sstevel@tonic-gate ba,pt %ncc, .dodtbc 46257c478bd9Sstevel@tonic-gate srl %o2, 1, %o2 ! Number of 2 byte chunks to copy 46267c478bd9Sstevel@tonic-gate.small_copyout: 46277c478bd9Sstevel@tonic-gate ! 46287c478bd9Sstevel@tonic-gate ! Why are we doing this AGAIN? There are certain conditions in 46297c478bd9Sstevel@tonic-gate ! big_copyout that will cause us to forego the HW assisted copies 46307c478bd9Sstevel@tonic-gate ! and bounce back to a non-HW assisted copy. This dispatches those 46317c478bd9Sstevel@tonic-gate ! copies. Note that we branch around this in the main line code. 46327c478bd9Sstevel@tonic-gate ! 46337c478bd9Sstevel@tonic-gate ! We make no check for limits or HW enablement here. We've 46347c478bd9Sstevel@tonic-gate ! already been told that we're a poster child so just go off 46357c478bd9Sstevel@tonic-gate ! and do it. 46367c478bd9Sstevel@tonic-gate ! 46377c478bd9Sstevel@tonic-gate or %o0, %o1, %o3 46387c478bd9Sstevel@tonic-gate btst 1, %o3 46397c478bd9Sstevel@tonic-gate bnz %icc, .dcobcp ! Most likely 46407c478bd9Sstevel@tonic-gate btst 7, %o3 46417c478bd9Sstevel@tonic-gate bz %icc, .dcos8 46427c478bd9Sstevel@tonic-gate btst 3, %o3 46437c478bd9Sstevel@tonic-gate bz %icc, .dcos4 46447c478bd9Sstevel@tonic-gate nop 46457c478bd9Sstevel@tonic-gate ba,pt %ncc, .dcos2 46467c478bd9Sstevel@tonic-gate nop 46477c478bd9Sstevel@tonic-gate .align 32 46487c478bd9Sstevel@tonic-gate.dodebc: 46497c478bd9Sstevel@tonic-gate ldx [%o0 + %o3], %o4 46507c478bd9Sstevel@tonic-gate deccc %o2 46517c478bd9Sstevel@tonic-gate stxa %o4, [%o1 + %o3]ASI_USER 46527c478bd9Sstevel@tonic-gate bg,pt %ncc, .dodebc 46537c478bd9Sstevel@tonic-gate addcc %o3, 8, %o3 46547c478bd9Sstevel@tonic-gate ! 46557c478bd9Sstevel@tonic-gate ! End of copy loop. Check to see if we're done. Most 46567c478bd9Sstevel@tonic-gate ! eight byte aligned copies end here. 46577c478bd9Sstevel@tonic-gate ! 46587c478bd9Sstevel@tonic-gate bz,pt %ncc, .dcofh 46597c478bd9Sstevel@tonic-gate nop 46607c478bd9Sstevel@tonic-gate ! 46617c478bd9Sstevel@tonic-gate ! Something is left - do it byte for byte. 46627c478bd9Sstevel@tonic-gate ! 46637c478bd9Sstevel@tonic-gate ba,pt %ncc, .dcocl 46647c478bd9Sstevel@tonic-gate ldub [%o0 + %o3], %o4 ! load next byte 46657c478bd9Sstevel@tonic-gate ! 46667c478bd9Sstevel@tonic-gate ! Four byte copy loop. %o2 is the number of 4 byte chunks to copy. 46677c478bd9Sstevel@tonic-gate ! 46687c478bd9Sstevel@tonic-gate .align 32 46697c478bd9Sstevel@tonic-gate.dodfbc: 46707c478bd9Sstevel@tonic-gate lduw [%o0 + %o3], %o4 46717c478bd9Sstevel@tonic-gate deccc %o2 46727c478bd9Sstevel@tonic-gate sta %o4, [%o1 + %o3]ASI_USER 46737c478bd9Sstevel@tonic-gate bg,pt %ncc, .dodfbc 46747c478bd9Sstevel@tonic-gate addcc %o3, 4, %o3 46757c478bd9Sstevel@tonic-gate ! 46767c478bd9Sstevel@tonic-gate ! End of copy loop. Check to see if we're done. Most 46777c478bd9Sstevel@tonic-gate ! four byte aligned copies end here. 46787c478bd9Sstevel@tonic-gate ! 46797c478bd9Sstevel@tonic-gate bz,pt %ncc, .dcofh 46807c478bd9Sstevel@tonic-gate nop 46817c478bd9Sstevel@tonic-gate ! 46827c478bd9Sstevel@tonic-gate ! Something is left. Do it byte for byte. 46837c478bd9Sstevel@tonic-gate ! 46847c478bd9Sstevel@tonic-gate ba,pt %ncc, .dcocl 46857c478bd9Sstevel@tonic-gate ldub [%o0 + %o3], %o4 ! load next byte 46867c478bd9Sstevel@tonic-gate ! 46877c478bd9Sstevel@tonic-gate ! two byte aligned copy loop. %o2 is the number of 2 byte chunks to 46887c478bd9Sstevel@tonic-gate ! copy. 46897c478bd9Sstevel@tonic-gate ! 46907c478bd9Sstevel@tonic-gate .align 32 46917c478bd9Sstevel@tonic-gate.dodtbc: 46927c478bd9Sstevel@tonic-gate lduh [%o0 + %o3], %o4 46937c478bd9Sstevel@tonic-gate deccc %o2 46947c478bd9Sstevel@tonic-gate stha %o4, [%o1 + %o3]ASI_USER 46957c478bd9Sstevel@tonic-gate bg,pt %ncc, .dodtbc 46967c478bd9Sstevel@tonic-gate addcc %o3, 2, %o3 46977c478bd9Sstevel@tonic-gate ! 46987c478bd9Sstevel@tonic-gate ! End of copy loop. Anything left? 46997c478bd9Sstevel@tonic-gate ! 47007c478bd9Sstevel@tonic-gate bz,pt %ncc, .dcofh 47017c478bd9Sstevel@tonic-gate nop 47027c478bd9Sstevel@tonic-gate ! 47037c478bd9Sstevel@tonic-gate ! Deal with the last byte 47047c478bd9Sstevel@tonic-gate ! 47057c478bd9Sstevel@tonic-gate ldub [%o0 + %o3], %o4 47067c478bd9Sstevel@tonic-gate stba %o4, [%o1 + %o3]ASI_USER 47077c478bd9Sstevel@tonic-gate.dcofh: 47087c478bd9Sstevel@tonic-gate membar #Sync 47097c478bd9Sstevel@tonic-gate stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 47107c478bd9Sstevel@tonic-gate retl 47117c478bd9Sstevel@tonic-gate clr %o0 47127c478bd9Sstevel@tonic-gate 47137c478bd9Sstevel@tonic-gate.big_copyout: 47147c478bd9Sstevel@tonic-gate ! We're going to go off and do a block copy. 47157c478bd9Sstevel@tonic-gate ! Switch fault handlers and grab a window. We 47167c478bd9Sstevel@tonic-gate ! don't do a membar #Sync since we've done only 47177c478bd9Sstevel@tonic-gate ! kernel data to this point. 47187c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] 47197c478bd9Sstevel@tonic-gate 47207c478bd9Sstevel@tonic-gate ! Copy out that reach here are larger than 256 bytes. The 47217c478bd9Sstevel@tonic-gate ! hw_copy_limit_1 is set to 256. Never set this limit less 47227c478bd9Sstevel@tonic-gate ! 128 bytes. 4723340af271Swh94709 save %sp, -SA(MINFRAME), %sp 47247c478bd9Sstevel@tonic-gate.do_block_copyout: 47257c478bd9Sstevel@tonic-gate 47267c478bd9Sstevel@tonic-gate ! Swap src/dst since the code below is memcpy code 47277c478bd9Sstevel@tonic-gate ! and memcpy/bcopy have different calling sequences 47287c478bd9Sstevel@tonic-gate mov %i1, %i5 47297c478bd9Sstevel@tonic-gate mov %i0, %i1 47307c478bd9Sstevel@tonic-gate mov %i5, %i0 47317c478bd9Sstevel@tonic-gate 4732340af271Swh94709 ! Block (64 bytes) align the destination. 4733340af271Swh94709 andcc %i0, 0x3f, %i3 ! is dst block aligned 4734340af271Swh94709 bz %ncc, copyout_blalign ! dst already block aligned 4735340af271Swh94709 sub %i3, 0x40, %i3 4736340af271Swh94709 neg %i3 ! bytes till dst 64 bytes aligned 4737340af271Swh94709 sub %i2, %i3, %i2 ! update i2 with new count 47387c478bd9Sstevel@tonic-gate 4739340af271Swh94709 ! Based on source and destination alignment do 4740340af271Swh94709 ! either 8 bytes, 4 bytes, 2 bytes or byte copy. 47417c478bd9Sstevel@tonic-gate 4742340af271Swh94709 ! Is dst & src 8B aligned 4743340af271Swh94709 or %i0, %i1, %o2 4744340af271Swh94709 andcc %o2, 0x7, %g0 4745340af271Swh94709 bz %ncc, .co_alewdcp 4746340af271Swh94709 nop 4747340af271Swh94709 4748340af271Swh94709 ! Is dst & src 4B aligned 4749340af271Swh94709 andcc %o2, 0x3, %g0 4750340af271Swh94709 bz %ncc, .co_alwdcp 4751340af271Swh94709 nop 4752340af271Swh94709 4753340af271Swh94709 ! Is dst & src 2B aligned 4754340af271Swh94709 andcc %o2, 0x1, %g0 4755340af271Swh94709 bz %ncc, .co_alhlfwdcp 4756340af271Swh94709 nop 4757340af271Swh94709 4758340af271Swh94709 ! 1B aligned 4759340af271Swh947091: ldub [%i1], %o2 4760340af271Swh94709 stba %o2, [%i0]ASI_USER 47617c478bd9Sstevel@tonic-gate inc %i1 47627c478bd9Sstevel@tonic-gate deccc %i3 4763340af271Swh94709 bgu,pt %ncc, 1b 47647c478bd9Sstevel@tonic-gate inc %i0 47657c478bd9Sstevel@tonic-gate 47667c478bd9Sstevel@tonic-gate ba copyout_blalign 4767340af271Swh94709 nop 47687c478bd9Sstevel@tonic-gate 4769340af271Swh94709 ! dst & src 4B aligned 4770340af271Swh94709.co_alwdcp: 4771340af271Swh94709 ld [%i1], %o2 4772340af271Swh94709 sta %o2, [%i0]ASI_USER 4773340af271Swh94709 add %i1, 0x4, %i1 4774340af271Swh94709 subcc %i3, 0x4, %i3 4775340af271Swh94709 bgu,pt %ncc, .co_alwdcp 4776340af271Swh94709 add %i0, 0x4, %i0 4777340af271Swh94709 4778340af271Swh94709 ba copyout_blalign 4779340af271Swh94709 nop 4780340af271Swh94709 4781340af271Swh94709 ! dst & src 2B aligned 4782340af271Swh94709.co_alhlfwdcp: 4783340af271Swh94709 lduh [%i1], %o2 4784340af271Swh94709 stuha %o2, [%i0]ASI_USER 4785340af271Swh94709 add %i1, 0x2, %i1 4786340af271Swh94709 subcc %i3, 0x2, %i3 4787340af271Swh94709 bgu,pt %ncc, .co_alhlfwdcp 4788340af271Swh94709 add %i0, 0x2, %i0 4789340af271Swh94709 4790340af271Swh94709 ba copyout_blalign 4791340af271Swh94709 nop 4792340af271Swh94709 4793340af271Swh94709 ! dst & src 8B aligned 4794340af271Swh94709.co_alewdcp: 47957c478bd9Sstevel@tonic-gate ldx [%i1], %o2 47967c478bd9Sstevel@tonic-gate stxa %o2, [%i0]ASI_USER 47977c478bd9Sstevel@tonic-gate add %i1, 0x8, %i1 47987c478bd9Sstevel@tonic-gate subcc %i3, 0x8, %i3 4799340af271Swh94709 bgu,pt %ncc, .co_alewdcp 48007c478bd9Sstevel@tonic-gate add %i0, 0x8, %i0 48017c478bd9Sstevel@tonic-gate 4802340af271Swh94709 ! Now Destination is block (64 bytes) aligned 48037c478bd9Sstevel@tonic-gatecopyout_blalign: 48047c478bd9Sstevel@tonic-gate andn %i2, 0x3f, %i3 ! %i3 count is multiple of block size 48057c478bd9Sstevel@tonic-gate sub %i2, %i3, %i2 ! Residue bytes in %i2 48067c478bd9Sstevel@tonic-gate 48077c478bd9Sstevel@tonic-gate mov ASI_BLK_INIT_QUAD_LDD_AIUS, %asi 48087c478bd9Sstevel@tonic-gate 48097c478bd9Sstevel@tonic-gate andcc %i1, 0xf, %o2 ! is src quadword aligned 48107c478bd9Sstevel@tonic-gate bz,pn %xcc, .co_blkcpy ! src offset in %o2 (last 4-bits) 48117c478bd9Sstevel@tonic-gate nop 48127c478bd9Sstevel@tonic-gate cmp %o2, 0x8 48137c478bd9Sstevel@tonic-gate bg .co_upper_double 48147c478bd9Sstevel@tonic-gate nop 48157c478bd9Sstevel@tonic-gate bl .co_lower_double 48167c478bd9Sstevel@tonic-gate nop 48177c478bd9Sstevel@tonic-gate 48187c478bd9Sstevel@tonic-gate ! Falls through when source offset is equal to 8 i.e. 48197c478bd9Sstevel@tonic-gate ! source is double word aligned. 48207c478bd9Sstevel@tonic-gate ! In this case no shift/merge of data is required 48217c478bd9Sstevel@tonic-gate 48227c478bd9Sstevel@tonic-gate sub %i1, %o2, %i1 ! align the src at 16 bytes. 48237c478bd9Sstevel@tonic-gate andn %i1, 0x3f, %l0 ! %l0 has block aligned source 48247c478bd9Sstevel@tonic-gate prefetch [%l0+0x0], #one_read 48257c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 48267c478bd9Sstevel@tonic-gate.co_loop0: 48277c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 48287c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 48297c478bd9Sstevel@tonic-gate prefetch [%l0+0x40], #one_read 48307c478bd9Sstevel@tonic-gate 48317c478bd9Sstevel@tonic-gate stxa %l3, [%i0+0x0]%asi 48327c478bd9Sstevel@tonic-gate stxa %l4, [%i0+0x8]%asi 48337c478bd9Sstevel@tonic-gate 48347c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 48357c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 48367c478bd9Sstevel@tonic-gate 48377c478bd9Sstevel@tonic-gate stxa %l5, [%i0+0x10]%asi 48387c478bd9Sstevel@tonic-gate stxa %l2, [%i0+0x18]%asi 48397c478bd9Sstevel@tonic-gate 48407c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 48417c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 48427c478bd9Sstevel@tonic-gate 48437c478bd9Sstevel@tonic-gate stxa %l3, [%i0+0x20]%asi 48447c478bd9Sstevel@tonic-gate stxa %l4, [%i0+0x28]%asi 48457c478bd9Sstevel@tonic-gate 48467c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 48477c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 48487c478bd9Sstevel@tonic-gate 48497c478bd9Sstevel@tonic-gate stxa %l5, [%i0+0x30]%asi 48507c478bd9Sstevel@tonic-gate stxa %l2, [%i0+0x38]%asi 48517c478bd9Sstevel@tonic-gate 48527c478bd9Sstevel@tonic-gate add %l0, 0x40, %l0 48537c478bd9Sstevel@tonic-gate subcc %i3, 0x40, %i3 48547c478bd9Sstevel@tonic-gate bgu,pt %xcc, .co_loop0 48557c478bd9Sstevel@tonic-gate add %i0, 0x40, %i0 48567c478bd9Sstevel@tonic-gate ba .co_blkdone 48577c478bd9Sstevel@tonic-gate add %i1, %o2, %i1 ! increment the source by src offset 48587c478bd9Sstevel@tonic-gate ! the src offset was stored in %o2 48597c478bd9Sstevel@tonic-gate 48607c478bd9Sstevel@tonic-gate.co_lower_double: 48617c478bd9Sstevel@tonic-gate 48627c478bd9Sstevel@tonic-gate sub %i1, %o2, %i1 ! align the src at 16 bytes. 48637c478bd9Sstevel@tonic-gate sll %o2, 3, %o0 ! %o0 left shift 48647c478bd9Sstevel@tonic-gate mov 0x40, %o1 48657c478bd9Sstevel@tonic-gate sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 48667c478bd9Sstevel@tonic-gate andn %i1, 0x3f, %l0 ! %l0 has block aligned source 48677c478bd9Sstevel@tonic-gate prefetch [%l0+0x0], #one_read 48687c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 ! partial data in %l2 and %l3 has 48697c478bd9Sstevel@tonic-gate ! complete data 48707c478bd9Sstevel@tonic-gate.co_loop1: 48717c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 48727c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 ! %l4 has partial data 48737c478bd9Sstevel@tonic-gate ! for this read. 48747c478bd9Sstevel@tonic-gate ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) ! merge %l2, %l3 and %l4 48757c478bd9Sstevel@tonic-gate ! into %l2 and %l3 48767c478bd9Sstevel@tonic-gate prefetch [%l0+0x40], #one_read 48777c478bd9Sstevel@tonic-gate 48787c478bd9Sstevel@tonic-gate stxa %l2, [%i0+0x0]%asi 48797c478bd9Sstevel@tonic-gate stxa %l3, [%i0+0x8]%asi 48807c478bd9Sstevel@tonic-gate 48817c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 48827c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 48837c478bd9Sstevel@tonic-gate ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) ! merge %l2 with %l5 and 48847c478bd9Sstevel@tonic-gate ! %l4 from previous read 48857c478bd9Sstevel@tonic-gate ! into %l4 and %l5 48867c478bd9Sstevel@tonic-gate stxa %l4, [%i0+0x10]%asi 48877c478bd9Sstevel@tonic-gate stxa %l5, [%i0+0x18]%asi 48887c478bd9Sstevel@tonic-gate 48897c478bd9Sstevel@tonic-gate ! Repeat the same for next 32 bytes. 48907c478bd9Sstevel@tonic-gate 48917c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 48927c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 48937c478bd9Sstevel@tonic-gate ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) 48947c478bd9Sstevel@tonic-gate 48957c478bd9Sstevel@tonic-gate stxa %l2, [%i0+0x20]%asi 48967c478bd9Sstevel@tonic-gate stxa %l3, [%i0+0x28]%asi 48977c478bd9Sstevel@tonic-gate 48987c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 48997c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 49007c478bd9Sstevel@tonic-gate ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) 49017c478bd9Sstevel@tonic-gate 49027c478bd9Sstevel@tonic-gate stxa %l4, [%i0+0x30]%asi 49037c478bd9Sstevel@tonic-gate stxa %l5, [%i0+0x38]%asi 49047c478bd9Sstevel@tonic-gate 49057c478bd9Sstevel@tonic-gate add %l0, 0x40, %l0 49067c478bd9Sstevel@tonic-gate subcc %i3, 0x40, %i3 49077c478bd9Sstevel@tonic-gate bgu,pt %xcc, .co_loop1 49087c478bd9Sstevel@tonic-gate add %i0, 0x40, %i0 49097c478bd9Sstevel@tonic-gate ba .co_blkdone 49107c478bd9Sstevel@tonic-gate add %i1, %o2, %i1 ! increment the source by src offset 49117c478bd9Sstevel@tonic-gate ! the src offset was stored in %o2 49127c478bd9Sstevel@tonic-gate 49137c478bd9Sstevel@tonic-gate.co_upper_double: 49147c478bd9Sstevel@tonic-gate 49157c478bd9Sstevel@tonic-gate sub %i1, %o2, %i1 ! align the src at 16 bytes. 49167c478bd9Sstevel@tonic-gate sub %o2, 0x8, %o0 49177c478bd9Sstevel@tonic-gate sll %o0, 3, %o0 ! %o0 left shift 49187c478bd9Sstevel@tonic-gate mov 0x40, %o1 49197c478bd9Sstevel@tonic-gate sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 49207c478bd9Sstevel@tonic-gate andn %i1, 0x3f, %l0 ! %l0 has block aligned source 49217c478bd9Sstevel@tonic-gate prefetch [%l0+0x0], #one_read 49227c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 ! partial data in %l3 49237c478bd9Sstevel@tonic-gate ! for this read and 49247c478bd9Sstevel@tonic-gate ! no data in %l2 49257c478bd9Sstevel@tonic-gate.co_loop2: 49267c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 49277c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 ! %l4 has complete data 49287c478bd9Sstevel@tonic-gate ! and %l5 has partial 49297c478bd9Sstevel@tonic-gate ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) ! merge %l3, %l4 and %l5 49307c478bd9Sstevel@tonic-gate ! into %l3 and %l4 49317c478bd9Sstevel@tonic-gate prefetch [%l0+0x40], #one_read 49327c478bd9Sstevel@tonic-gate 49337c478bd9Sstevel@tonic-gate stxa %l3, [%i0+0x0]%asi 49347c478bd9Sstevel@tonic-gate stxa %l4, [%i0+0x8]%asi 49357c478bd9Sstevel@tonic-gate 49367c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 49377c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 49387c478bd9Sstevel@tonic-gate ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) ! merge %l2 and %l3 with 49397c478bd9Sstevel@tonic-gate ! %l5 from previous read 49407c478bd9Sstevel@tonic-gate ! into %l5 and %l2 49417c478bd9Sstevel@tonic-gate 49427c478bd9Sstevel@tonic-gate stxa %l5, [%i0+0x10]%asi 49437c478bd9Sstevel@tonic-gate stxa %l2, [%i0+0x18]%asi 49447c478bd9Sstevel@tonic-gate 49457c478bd9Sstevel@tonic-gate ! Repeat the same for next 32 bytes. 49467c478bd9Sstevel@tonic-gate 49477c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 49487c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 49497c478bd9Sstevel@tonic-gate ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) 49507c478bd9Sstevel@tonic-gate 49517c478bd9Sstevel@tonic-gate stxa %l3, [%i0+0x20]%asi 49527c478bd9Sstevel@tonic-gate stxa %l4, [%i0+0x28]%asi 49537c478bd9Sstevel@tonic-gate 49547c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 49557c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 49567c478bd9Sstevel@tonic-gate ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) 49577c478bd9Sstevel@tonic-gate 49587c478bd9Sstevel@tonic-gate stxa %l5, [%i0+0x30]%asi 49597c478bd9Sstevel@tonic-gate stxa %l2, [%i0+0x38]%asi 49607c478bd9Sstevel@tonic-gate 49617c478bd9Sstevel@tonic-gate add %l0, 0x40, %l0 49627c478bd9Sstevel@tonic-gate subcc %i3, 0x40, %i3 49637c478bd9Sstevel@tonic-gate bgu,pt %xcc, .co_loop2 49647c478bd9Sstevel@tonic-gate add %i0, 0x40, %i0 49657c478bd9Sstevel@tonic-gate ba .co_blkdone 49667c478bd9Sstevel@tonic-gate add %i1, %o2, %i1 ! increment the source by src offset 49677c478bd9Sstevel@tonic-gate ! the src offset was stored in %o2 49687c478bd9Sstevel@tonic-gate 49697c478bd9Sstevel@tonic-gate 49707c478bd9Sstevel@tonic-gate ! Do fast copy using ASI_BLK_INIT_ST_QUAD_LDD_P 49717c478bd9Sstevel@tonic-gate.co_blkcpy: 49727c478bd9Sstevel@tonic-gate 49737c478bd9Sstevel@tonic-gate andn %i1, 0x3f, %o0 ! %o0 has block aligned source 49747c478bd9Sstevel@tonic-gate prefetch [%o0+0x0], #one_read 49757c478bd9Sstevel@tonic-gate1: 49767c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l0 49777c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 49787c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 49797c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 49807c478bd9Sstevel@tonic-gate 49817c478bd9Sstevel@tonic-gate prefetch [%o0+0x40], #one_read 49827c478bd9Sstevel@tonic-gate 49837c478bd9Sstevel@tonic-gate stxa %l0, [%i0+0x0]%asi 49847c478bd9Sstevel@tonic-gate 49857c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 49867c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 49877c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l6 49887c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 49897c478bd9Sstevel@tonic-gate 49907c478bd9Sstevel@tonic-gate stxa %l1, [%i0+0x8]%asi 49917c478bd9Sstevel@tonic-gate stxa %l2, [%i0+0x10]%asi 49927c478bd9Sstevel@tonic-gate stxa %l3, [%i0+0x18]%asi 49937c478bd9Sstevel@tonic-gate stxa %l4, [%i0+0x20]%asi 49947c478bd9Sstevel@tonic-gate stxa %l5, [%i0+0x28]%asi 49957c478bd9Sstevel@tonic-gate stxa %l6, [%i0+0x30]%asi 49967c478bd9Sstevel@tonic-gate stxa %l7, [%i0+0x38]%asi 49977c478bd9Sstevel@tonic-gate 49987c478bd9Sstevel@tonic-gate add %o0, 0x40, %o0 49997c478bd9Sstevel@tonic-gate subcc %i3, 0x40, %i3 50007c478bd9Sstevel@tonic-gate bgu,pt %xcc, 1b 50017c478bd9Sstevel@tonic-gate add %i0, 0x40, %i0 50027c478bd9Sstevel@tonic-gate 50037c478bd9Sstevel@tonic-gate.co_blkdone: 50047c478bd9Sstevel@tonic-gate membar #Sync 50057c478bd9Sstevel@tonic-gate 5006340af271Swh94709 brz,pt %i2, .copyout_exit 50077c478bd9Sstevel@tonic-gate nop 50087c478bd9Sstevel@tonic-gate 5009340af271Swh94709 ! Handle trailing bytes 5010340af271Swh94709 cmp %i2, 0x8 5011340af271Swh94709 blu,pt %ncc, .co_residue 50127c478bd9Sstevel@tonic-gate nop 50137c478bd9Sstevel@tonic-gate 5014340af271Swh94709 ! Can we do some 8B ops 5015340af271Swh94709 or %i1, %i0, %o2 5016340af271Swh94709 andcc %o2, 0x7, %g0 5017340af271Swh94709 bnz %ncc, .co_last4 5018340af271Swh94709 nop 50197c478bd9Sstevel@tonic-gate 5020340af271Swh94709 ! Do 8byte ops as long as possible 5021340af271Swh94709.co_last8: 50227c478bd9Sstevel@tonic-gate ldx [%i1], %o2 50237c478bd9Sstevel@tonic-gate stxa %o2, [%i0]ASI_USER 50247c478bd9Sstevel@tonic-gate add %i1, 0x8, %i1 5025340af271Swh94709 sub %i2, 0x8, %i2 5026340af271Swh94709 cmp %i2, 0x8 5027340af271Swh94709 bgu,pt %ncc, .co_last8 50287c478bd9Sstevel@tonic-gate add %i0, 0x8, %i0 50297c478bd9Sstevel@tonic-gate 5030340af271Swh94709 brz,pt %i2, .copyout_exit 5031340af271Swh94709 nop 5032340af271Swh94709 5033340af271Swh94709 ba .co_residue 5034340af271Swh94709 nop 5035340af271Swh94709 5036340af271Swh94709.co_last4: 5037340af271Swh94709 ! Can we do 4B ops 5038340af271Swh94709 andcc %o2, 0x3, %g0 5039340af271Swh94709 bnz %ncc, .co_last2 5040340af271Swh94709 nop 5041340af271Swh947091: 5042340af271Swh94709 ld [%i1], %o2 5043340af271Swh94709 sta %o2, [%i0]ASI_USER 5044340af271Swh94709 add %i1, 0x4, %i1 5045340af271Swh94709 sub %i2, 0x4, %i2 5046340af271Swh94709 cmp %i2, 0x4 5047340af271Swh94709 bgu,pt %ncc, 1b 5048340af271Swh94709 add %i0, 0x4, %i0 5049340af271Swh94709 5050340af271Swh94709 brz,pt %i2, .copyout_exit 5051340af271Swh94709 nop 5052340af271Swh94709 5053340af271Swh94709 ba .co_residue 5054340af271Swh94709 nop 5055340af271Swh94709 5056340af271Swh94709.co_last2: 5057340af271Swh94709 ! Can we do 2B ops 5058340af271Swh94709 andcc %o2, 0x1, %g0 5059340af271Swh94709 bnz %ncc, .co_residue 5060340af271Swh94709 nop 5061340af271Swh94709 5062340af271Swh947091: 5063340af271Swh94709 lduh [%i1], %o2 5064340af271Swh94709 stuha %o2, [%i0]ASI_USER 5065340af271Swh94709 add %i1, 0x2, %i1 5066340af271Swh94709 sub %i2, 0x2, %i2 5067340af271Swh94709 cmp %i2, 0x2 5068340af271Swh94709 bgu,pt %ncc, 1b 5069340af271Swh94709 add %i0, 0x2, %i0 5070340af271Swh94709 5071340af271Swh94709 brz,pt %i2, .copyout_exit 50727c478bd9Sstevel@tonic-gate nop 50737c478bd9Sstevel@tonic-gate 50747c478bd9Sstevel@tonic-gate ! Copy the residue as byte copy 50757c478bd9Sstevel@tonic-gate.co_residue: 50767c478bd9Sstevel@tonic-gate ldub [%i1], %i4 50777c478bd9Sstevel@tonic-gate stba %i4, [%i0]ASI_USER 50787c478bd9Sstevel@tonic-gate inc %i1 50797c478bd9Sstevel@tonic-gate deccc %i2 5080340af271Swh94709 bgu,pt %xcc, .co_residue 50817c478bd9Sstevel@tonic-gate inc %i0 50827c478bd9Sstevel@tonic-gate 50837c478bd9Sstevel@tonic-gate.copyout_exit: 50847c478bd9Sstevel@tonic-gate membar #Sync 50857c478bd9Sstevel@tonic-gate stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 50867c478bd9Sstevel@tonic-gate ret 50877c478bd9Sstevel@tonic-gate restore %g0, 0, %o0 50887c478bd9Sstevel@tonic-gate 50897c478bd9Sstevel@tonic-gate.copyout_err: 50907c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o4 50917c478bd9Sstevel@tonic-gate brz %o4, 2f 50927c478bd9Sstevel@tonic-gate nop 50937c478bd9Sstevel@tonic-gate ldn [%o4 + CP_COPYOUT], %g2 50947c478bd9Sstevel@tonic-gate jmp %g2 50957c478bd9Sstevel@tonic-gate nop 50967c478bd9Sstevel@tonic-gate2: 50977c478bd9Sstevel@tonic-gate retl 50987c478bd9Sstevel@tonic-gate mov -1, %o0 5099*280575beSPatrick McGehearty#endif /* NIAGARA_IMPL */ 51007c478bd9Sstevel@tonic-gate SET_SIZE(copyout) 51017c478bd9Sstevel@tonic-gate 51027c478bd9Sstevel@tonic-gate#endif /* lint */ 51037c478bd9Sstevel@tonic-gate 51047c478bd9Sstevel@tonic-gate 51057c478bd9Sstevel@tonic-gate#ifdef lint 51067c478bd9Sstevel@tonic-gate 51077c478bd9Sstevel@tonic-gate/*ARGSUSED*/ 51087c478bd9Sstevel@tonic-gateint 51097c478bd9Sstevel@tonic-gatexcopyout(const void *kaddr, void *uaddr, size_t count) 51107c478bd9Sstevel@tonic-gate{ return (0); } 51117c478bd9Sstevel@tonic-gate 51127c478bd9Sstevel@tonic-gate#else /* lint */ 51137c478bd9Sstevel@tonic-gate 51147c478bd9Sstevel@tonic-gate ENTRY(xcopyout) 51157c478bd9Sstevel@tonic-gate sethi %hi(.xcopyout_err), REAL_LOFAULT 51167c478bd9Sstevel@tonic-gate b .do_copyout 51177c478bd9Sstevel@tonic-gate or REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT 51187c478bd9Sstevel@tonic-gate.xcopyout_err: 51197c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o4 51207c478bd9Sstevel@tonic-gate brz %o4, 2f 51217c478bd9Sstevel@tonic-gate nop 51227c478bd9Sstevel@tonic-gate ldn [%o4 + CP_XCOPYOUT], %g2 51237c478bd9Sstevel@tonic-gate jmp %g2 51247c478bd9Sstevel@tonic-gate nop 51257c478bd9Sstevel@tonic-gate2: 51267c478bd9Sstevel@tonic-gate retl 51277c478bd9Sstevel@tonic-gate mov %g1, %o0 51287c478bd9Sstevel@tonic-gate SET_SIZE(xcopyout) 51297c478bd9Sstevel@tonic-gate 51307c478bd9Sstevel@tonic-gate#endif /* lint */ 51317c478bd9Sstevel@tonic-gate 51327c478bd9Sstevel@tonic-gate#ifdef lint 51337c478bd9Sstevel@tonic-gate 51347c478bd9Sstevel@tonic-gate/*ARGSUSED*/ 51357c478bd9Sstevel@tonic-gateint 51367c478bd9Sstevel@tonic-gatexcopyout_little(const void *kaddr, void *uaddr, size_t count) 51377c478bd9Sstevel@tonic-gate{ return (0); } 51387c478bd9Sstevel@tonic-gate 51397c478bd9Sstevel@tonic-gate#else /* lint */ 51407c478bd9Sstevel@tonic-gate 51417c478bd9Sstevel@tonic-gate ENTRY(xcopyout_little) 51427c478bd9Sstevel@tonic-gate sethi %hi(.little_err), %o4 51437c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o5 51447c478bd9Sstevel@tonic-gate or %o4, %lo(.little_err), %o4 51457c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 51467c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] 51477c478bd9Sstevel@tonic-gate 51487c478bd9Sstevel@tonic-gate subcc %g0, %o2, %o3 51497c478bd9Sstevel@tonic-gate add %o0, %o2, %o0 51507c478bd9Sstevel@tonic-gate bz,pn %ncc, 2f ! check for zero bytes 51517c478bd9Sstevel@tonic-gate sub %o2, 1, %o4 51527c478bd9Sstevel@tonic-gate add %o0, %o4, %o0 ! start w/last byte 51537c478bd9Sstevel@tonic-gate add %o1, %o2, %o1 51547c478bd9Sstevel@tonic-gate ldub [%o0+%o3], %o4 51557c478bd9Sstevel@tonic-gate 51567c478bd9Sstevel@tonic-gate1: stba %o4, [%o1+%o3]ASI_AIUSL 51577c478bd9Sstevel@tonic-gate inccc %o3 51587c478bd9Sstevel@tonic-gate sub %o0, 2, %o0 ! get next byte 51597c478bd9Sstevel@tonic-gate bcc,a,pt %ncc, 1b 51607c478bd9Sstevel@tonic-gate ldub [%o0+%o3], %o4 51617c478bd9Sstevel@tonic-gate 51627c478bd9Sstevel@tonic-gate2: membar #Sync ! sync error barrier 51637c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 51647c478bd9Sstevel@tonic-gate retl 51657c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return (0) 51667c478bd9Sstevel@tonic-gate SET_SIZE(xcopyout_little) 51677c478bd9Sstevel@tonic-gate 51687c478bd9Sstevel@tonic-gate#endif /* lint */ 51697c478bd9Sstevel@tonic-gate 51707c478bd9Sstevel@tonic-gate/* 51717c478bd9Sstevel@tonic-gate * Copy user data to kernel space (copyin/xcopyin/xcopyin_little) 51727c478bd9Sstevel@tonic-gate */ 51737c478bd9Sstevel@tonic-gate 51747c478bd9Sstevel@tonic-gate#if defined(lint) 51757c478bd9Sstevel@tonic-gate 51767c478bd9Sstevel@tonic-gate/*ARGSUSED*/ 51777c478bd9Sstevel@tonic-gateint 51787c478bd9Sstevel@tonic-gatecopyin(const void *uaddr, void *kaddr, size_t count) 51797c478bd9Sstevel@tonic-gate{ return (0); } 51807c478bd9Sstevel@tonic-gate 51817c478bd9Sstevel@tonic-gate#else /* lint */ 51827c478bd9Sstevel@tonic-gate 51837c478bd9Sstevel@tonic-gate ENTRY(copyin) 51847c478bd9Sstevel@tonic-gate sethi %hi(.copyin_err), REAL_LOFAULT 51857c478bd9Sstevel@tonic-gate or REAL_LOFAULT, %lo(.copyin_err), REAL_LOFAULT 51867c478bd9Sstevel@tonic-gate 5187*280575beSPatrick McGehearty#if !defined(NIAGARA_IMPL) 5188*280575beSPatrick McGehearty.do_copyin: 5189*280575beSPatrick McGehearty tst %o2 ! check for zero count; quick exit 5190*280575beSPatrick McGehearty bz,pt %ncc, .ci_smallqx 5191*280575beSPatrick McGehearty mov %o0, SAVE_SRC 5192*280575beSPatrick McGehearty mov %o1, SAVE_DST 5193*280575beSPatrick McGehearty mov %o2, SAVE_COUNT 5194*280575beSPatrick McGehearty cmp %o2, FP_COPY ! check for small copy/leaf case 5195*280575beSPatrick McGehearty bgt,pt %ncc, .ci_copy_more 5196*280575beSPatrick McGehearty ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT 5197*280575beSPatrick McGehearty/* 5198*280575beSPatrick McGehearty * Small copy in code 5199*280575beSPatrick McGehearty * 5200*280575beSPatrick McGehearty */ 5201*280575beSPatrick McGehearty sethi %hi(copyio_fault_nowindow), %o3 5202*280575beSPatrick McGehearty or %o3, %lo(copyio_fault_nowindow), %o3 5203*280575beSPatrick McGehearty membar #Sync 5204*280575beSPatrick McGehearty stn %o3, [THREAD_REG + T_LOFAULT] 5205*280575beSPatrick McGehearty 5206*280575beSPatrick McGehearty mov ASI_USER, %asi 5207*280575beSPatrick McGehearty cmp %o2, SHORTCOPY ! make sure there is enough to align 5208*280575beSPatrick McGehearty ble,pt %ncc, .ci_smallest 5209*280575beSPatrick McGehearty andcc %o1, 0x7, %o3 ! is dest long word aligned 5210*280575beSPatrick McGehearty bnz,pn %ncc, .ci_align 5211*280575beSPatrick McGehearty andcc %o1, 1, %o3 ! is dest byte aligned 5212*280575beSPatrick McGehearty 5213*280575beSPatrick McGehearty! Destination is long word aligned 5214*280575beSPatrick McGehearty.ci_al_src: 5215*280575beSPatrick McGehearty andcc %o0, 7, %o3 5216*280575beSPatrick McGehearty brnz,pt %o3, .ci_src_dst_unal8 5217*280575beSPatrick McGehearty nop 5218*280575beSPatrick McGehearty/* 5219*280575beSPatrick McGehearty * Special case for handling when src and dest are both long word aligned 5220*280575beSPatrick McGehearty * and total data to move is less than FP_COPY bytes 5221*280575beSPatrick McGehearty * Also handles finish up for large block moves, so may be less than 32 bytes 5222*280575beSPatrick McGehearty */ 5223*280575beSPatrick McGehearty.ci_medlong: 5224*280575beSPatrick McGehearty subcc %o2, 31, %o2 ! adjust length to allow cc test 5225*280575beSPatrick McGehearty ble,pt %ncc, .ci_medl31 5226*280575beSPatrick McGehearty nop 5227*280575beSPatrick McGehearty.ci_medl32: 5228*280575beSPatrick McGehearty ldxa [%o0]%asi, %o4 ! move 32 bytes 5229*280575beSPatrick McGehearty subcc %o2, 32, %o2 ! decrement length count by 32 5230*280575beSPatrick McGehearty stx %o4, [%o1] 5231*280575beSPatrick McGehearty ldxa [%o0+8]%asi, %o4 5232*280575beSPatrick McGehearty stx %o4, [%o1+8] 5233*280575beSPatrick McGehearty ldxa [%o0+16]%asi, %o4 5234*280575beSPatrick McGehearty add %o0, 32, %o0 ! increase src ptr by 32 5235*280575beSPatrick McGehearty stx %o4, [%o1+16] 5236*280575beSPatrick McGehearty ldxa [%o0-8]%asi, %o4 5237*280575beSPatrick McGehearty add %o1, 32, %o1 ! increase dst ptr by 32 5238*280575beSPatrick McGehearty bgu,pt %ncc, .ci_medl32 ! repeat if at least 32 bytes left 5239*280575beSPatrick McGehearty stx %o4, [%o1-8] 5240*280575beSPatrick McGehearty.ci_medl31: 5241*280575beSPatrick McGehearty addcc %o2, 24, %o2 ! adjust count to be off by 7 5242*280575beSPatrick McGehearty ble,pt %ncc, .ci_medl7 ! skip if 7 or fewer bytes left 5243*280575beSPatrick McGehearty nop 5244*280575beSPatrick McGehearty.ci_medl8: 5245*280575beSPatrick McGehearty ldxa [%o0]%asi, %o4 ! move 8 bytes 5246*280575beSPatrick McGehearty add %o0, 8, %o0 ! increase src ptr by 8 5247*280575beSPatrick McGehearty subcc %o2, 8, %o2 ! decrease count by 8 5248*280575beSPatrick McGehearty add %o1, 8, %o1 ! increase dst ptr by 8 5249*280575beSPatrick McGehearty bgu,pt %ncc, .ci_medl8 5250*280575beSPatrick McGehearty stx %o4, [%o1-8] 5251*280575beSPatrick McGehearty.ci_medl7: 5252*280575beSPatrick McGehearty addcc %o2, 7, %o2 ! finish adjustment of remaining count 5253*280575beSPatrick McGehearty bnz,pt %ncc, .ci_small4 ! do final bytes if not finished 5254*280575beSPatrick McGehearty nop 5255*280575beSPatrick McGehearty.ci_smallx: ! finish up and exit 5256*280575beSPatrick McGehearty membar #Sync 5257*280575beSPatrick McGehearty stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 5258*280575beSPatrick McGehearty.ci_smallqx: 5259*280575beSPatrick McGehearty retl 5260*280575beSPatrick McGehearty mov %g0, %o0 5261*280575beSPatrick McGehearty 5262*280575beSPatrick McGehearty.ci_small4: 5263*280575beSPatrick McGehearty cmp %o2, 4 5264*280575beSPatrick McGehearty blt,pt %ncc, .ci_small3x ! skip if less than 4 bytes left 5265*280575beSPatrick McGehearty nop ! 5266*280575beSPatrick McGehearty lda [%o0]%asi, %o4 ! move 4 bytes 5267*280575beSPatrick McGehearty add %o0, 4, %o0 ! increase src ptr by 4 5268*280575beSPatrick McGehearty add %o1, 4, %o1 ! increase dst ptr by 4 5269*280575beSPatrick McGehearty subcc %o2, 4, %o2 ! decrease count by 4 5270*280575beSPatrick McGehearty bz %ncc, .ci_smallx 5271*280575beSPatrick McGehearty stw %o4, [%o1-4] 5272*280575beSPatrick McGehearty 5273*280575beSPatrick McGehearty.ci_small3x: ! Exactly 1, 2, or 3 bytes remain 5274*280575beSPatrick McGehearty subcc %o2, 1, %o2 ! reduce count for cc test 5275*280575beSPatrick McGehearty lduba [%o0]%asi, %o4 ! load one byte 5276*280575beSPatrick McGehearty bz,pt %ncc, .ci_smallx 5277*280575beSPatrick McGehearty stb %o4, [%o1] ! store one byte 5278*280575beSPatrick McGehearty lduba [%o0+1]%asi, %o4 ! load second byte 5279*280575beSPatrick McGehearty subcc %o2, 1, %o2 5280*280575beSPatrick McGehearty bz,pt %ncc, .ci_smallx 5281*280575beSPatrick McGehearty stb %o4, [%o1+1] ! store second byte 5282*280575beSPatrick McGehearty lduba [%o0+2]%asi, %o4 ! load third byte 5283*280575beSPatrick McGehearty ba .ci_smallx 5284*280575beSPatrick McGehearty stb %o4, [%o1+2] ! store third byte 5285*280575beSPatrick McGehearty 5286*280575beSPatrick McGehearty.ci_smallest: ! 7 or fewer bytes remain 5287*280575beSPatrick McGehearty cmp %o2, 4 5288*280575beSPatrick McGehearty blt,pt %ncc, .ci_small3x 5289*280575beSPatrick McGehearty nop 5290*280575beSPatrick McGehearty lduba [%o0]%asi, %o4 ! read byte 5291*280575beSPatrick McGehearty subcc %o2, 4, %o2 ! reduce count by 4 5292*280575beSPatrick McGehearty stb %o4, [%o1] ! write byte 5293*280575beSPatrick McGehearty lduba [%o0+1]%asi, %o4 ! repeat for total of 4 bytes 5294*280575beSPatrick McGehearty add %o0, 4, %o0 ! advance src by 4 5295*280575beSPatrick McGehearty stb %o4, [%o1+1] 5296*280575beSPatrick McGehearty lduba [%o0-2]%asi, %o4 5297*280575beSPatrick McGehearty add %o1, 4, %o1 ! advance dst by 4 5298*280575beSPatrick McGehearty stb %o4, [%o1-2] 5299*280575beSPatrick McGehearty lduba [%o0-1]%asi, %o4 5300*280575beSPatrick McGehearty bnz,pt %ncc, .ci_small3x 5301*280575beSPatrick McGehearty stb %o4, [%o1-1] 5302*280575beSPatrick McGehearty membar #Sync 5303*280575beSPatrick McGehearty stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 5304*280575beSPatrick McGehearty retl 5305*280575beSPatrick McGehearty mov %g0, %o0 5306*280575beSPatrick McGehearty 5307*280575beSPatrick McGehearty.ci_align: 5308*280575beSPatrick McGehearty bnz,pt %ncc, .ci_al_d1 5309*280575beSPatrick McGehearty.ci_al_d1f: ! dest is now half word aligned 5310*280575beSPatrick McGehearty andcc %o1, 2, %o3 ! is dest word aligned 5311*280575beSPatrick McGehearty bnz,pt %ncc, .ci_al_d2 5312*280575beSPatrick McGehearty.ci_al_d2f: ! dest is now word aligned 5313*280575beSPatrick McGehearty andcc %o1, 4, %o3 ! is dest longword aligned? 5314*280575beSPatrick McGehearty bz,pt %ncc, .ci_al_src 5315*280575beSPatrick McGehearty nop 5316*280575beSPatrick McGehearty.ci_al_d4: ! dest is word aligned; src is unknown 5317*280575beSPatrick McGehearty lduba [%o0]%asi, %o4 ! move a word (src align unknown) 5318*280575beSPatrick McGehearty lduba [%o0+1]%asi, %o3 5319*280575beSPatrick McGehearty sll %o4, 24, %o4 ! position 5320*280575beSPatrick McGehearty sll %o3, 16, %o3 ! position 5321*280575beSPatrick McGehearty or %o4, %o3, %o3 ! merge 5322*280575beSPatrick McGehearty lduba [%o0+2]%asi, %o4 5323*280575beSPatrick McGehearty sll %o4, 8, %o4 ! position 5324*280575beSPatrick McGehearty or %o4, %o3, %o3 ! merge 5325*280575beSPatrick McGehearty lduba [%o0+3]%asi, %o4 5326*280575beSPatrick McGehearty or %o4, %o3, %o4 ! merge 5327*280575beSPatrick McGehearty stw %o4,[%o1] ! store four bytes 5328*280575beSPatrick McGehearty add %o0, 4, %o0 ! adjust src by 4 5329*280575beSPatrick McGehearty add %o1, 4, %o1 ! adjust dest by 4 5330*280575beSPatrick McGehearty sub %o2, 4, %o2 ! adjust count by 4 5331*280575beSPatrick McGehearty andcc %o0, 7, %o3 ! check for src long word alignment 5332*280575beSPatrick McGehearty brz,pt %o3, .ci_medlong 5333*280575beSPatrick McGehearty.ci_src_dst_unal8: 5334*280575beSPatrick McGehearty ! dst is 8-byte aligned, src is not 5335*280575beSPatrick McGehearty ! Size is less than FP_COPY 5336*280575beSPatrick McGehearty ! Following code is to select for alignment 5337*280575beSPatrick McGehearty andcc %o0, 0x3, %o3 ! test word alignment 5338*280575beSPatrick McGehearty bz,pt %ncc, .ci_medword 5339*280575beSPatrick McGehearty nop 5340*280575beSPatrick McGehearty andcc %o0, 0x1, %o3 ! test halfword alignment 5341*280575beSPatrick McGehearty bnz,pt %ncc, .ci_med_byte ! go to byte move if not halfword 5342*280575beSPatrick McGehearty andcc %o0, 0x2, %o3 ! test which byte alignment 5343*280575beSPatrick McGehearty ba .ci_medhalf 5344*280575beSPatrick McGehearty nop 5345*280575beSPatrick McGehearty.ci_al_d1: ! align dest to half word 5346*280575beSPatrick McGehearty lduba [%o0]%asi, %o4 ! move a byte 5347*280575beSPatrick McGehearty add %o0, 1, %o0 5348*280575beSPatrick McGehearty stb %o4, [%o1] 5349*280575beSPatrick McGehearty add %o1, 1, %o1 5350*280575beSPatrick McGehearty andcc %o1, 2, %o3 ! is dest word aligned 5351*280575beSPatrick McGehearty bz,pt %ncc, .ci_al_d2f 5352*280575beSPatrick McGehearty sub %o2, 1, %o2 5353*280575beSPatrick McGehearty.ci_al_d2: ! align dest to word 5354*280575beSPatrick McGehearty lduba [%o0]%asi, %o4 ! move a half-word (src align unknown) 5355*280575beSPatrick McGehearty lduba [%o0+1]%asi, %o3 5356*280575beSPatrick McGehearty sll %o4, 8, %o4 ! position 5357*280575beSPatrick McGehearty or %o4, %o3, %o4 ! merge 5358*280575beSPatrick McGehearty sth %o4, [%o1] 5359*280575beSPatrick McGehearty add %o0, 2, %o0 5360*280575beSPatrick McGehearty add %o1, 2, %o1 5361*280575beSPatrick McGehearty andcc %o1, 4, %o3 ! is dest longword aligned? 5362*280575beSPatrick McGehearty bz,pt %ncc, .ci_al_src 5363*280575beSPatrick McGehearty sub %o2, 2, %o2 5364*280575beSPatrick McGehearty ba .ci_al_d4 5365*280575beSPatrick McGehearty nop 5366*280575beSPatrick McGehearty/* 5367*280575beSPatrick McGehearty * Handle all cases where src and dest are aligned on word 5368*280575beSPatrick McGehearty * boundaries. Use unrolled loops for better performance. 5369*280575beSPatrick McGehearty * This option wins over standard large data move when 5370*280575beSPatrick McGehearty * source and destination is in cache for medium 5371*280575beSPatrick McGehearty * to short data moves. 5372*280575beSPatrick McGehearty */ 5373*280575beSPatrick McGehearty.ci_medword: 5374*280575beSPatrick McGehearty subcc %o2, 31, %o2 ! adjust length to allow cc test 5375*280575beSPatrick McGehearty ble,pt %ncc, .ci_medw31 5376*280575beSPatrick McGehearty nop 5377*280575beSPatrick McGehearty.ci_medw32: 5378*280575beSPatrick McGehearty lda [%o0]%asi, %o4 ! move a block of 32 bytes 5379*280575beSPatrick McGehearty stw %o4, [%o1] 5380*280575beSPatrick McGehearty lda [%o0+4]%asi, %o4 5381*280575beSPatrick McGehearty stw %o4, [%o1+4] 5382*280575beSPatrick McGehearty lda [%o0+8]%asi, %o4 5383*280575beSPatrick McGehearty stw %o4, [%o1+8] 5384*280575beSPatrick McGehearty lda [%o0+12]%asi, %o4 5385*280575beSPatrick McGehearty stw %o4, [%o1+12] 5386*280575beSPatrick McGehearty lda [%o0+16]%asi, %o4 5387*280575beSPatrick McGehearty stw %o4, [%o1+16] 5388*280575beSPatrick McGehearty lda [%o0+20]%asi, %o4 5389*280575beSPatrick McGehearty subcc %o2, 32, %o2 ! decrement length count 5390*280575beSPatrick McGehearty stw %o4, [%o1+20] 5391*280575beSPatrick McGehearty lda [%o0+24]%asi, %o4 5392*280575beSPatrick McGehearty add %o0, 32, %o0 ! increase src ptr by 32 5393*280575beSPatrick McGehearty stw %o4, [%o1+24] 5394*280575beSPatrick McGehearty lda [%o0-4]%asi, %o4 5395*280575beSPatrick McGehearty add %o1, 32, %o1 ! increase dst ptr by 32 5396*280575beSPatrick McGehearty bgu,pt %ncc, .ci_medw32 ! repeat if at least 32 bytes left 5397*280575beSPatrick McGehearty stw %o4, [%o1-4] 5398*280575beSPatrick McGehearty.ci_medw31: 5399*280575beSPatrick McGehearty addcc %o2, 24, %o2 ! adjust count to be off by 7 5400*280575beSPatrick McGehearty ble,pt %ncc, .ci_medw7 ! skip if 7 or fewer bytes left 5401*280575beSPatrick McGehearty nop ! 5402*280575beSPatrick McGehearty.ci_medw15: 5403*280575beSPatrick McGehearty lda [%o0]%asi, %o4 ! move a block of 8 bytes 5404*280575beSPatrick McGehearty subcc %o2, 8, %o2 ! decrement length count 5405*280575beSPatrick McGehearty stw %o4, [%o1] 5406*280575beSPatrick McGehearty add %o0, 8, %o0 ! increase src ptr by 8 5407*280575beSPatrick McGehearty lda [%o0-4]%asi, %o4 5408*280575beSPatrick McGehearty add %o1, 8, %o1 ! increase dst ptr by 8 5409*280575beSPatrick McGehearty bgu,pt %ncc, .ci_medw15 5410*280575beSPatrick McGehearty stw %o4, [%o1-4] 5411*280575beSPatrick McGehearty.ci_medw7: 5412*280575beSPatrick McGehearty addcc %o2, 7, %o2 ! finish adjustment of remaining count 5413*280575beSPatrick McGehearty bz,pt %ncc, .ci_smallx ! exit if finished 5414*280575beSPatrick McGehearty cmp %o2, 4 5415*280575beSPatrick McGehearty blt,pt %ncc, .ci_small3x ! skip if less than 4 bytes left 5416*280575beSPatrick McGehearty nop ! 5417*280575beSPatrick McGehearty lda [%o0]%asi, %o4 ! move 4 bytes 5418*280575beSPatrick McGehearty add %o0, 4, %o0 ! increase src ptr by 4 5419*280575beSPatrick McGehearty add %o1, 4, %o1 ! increase dst ptr by 4 5420*280575beSPatrick McGehearty subcc %o2, 4, %o2 ! decrease count by 4 5421*280575beSPatrick McGehearty bnz .ci_small3x 5422*280575beSPatrick McGehearty stw %o4, [%o1-4] 5423*280575beSPatrick McGehearty membar #Sync 5424*280575beSPatrick McGehearty stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 5425*280575beSPatrick McGehearty retl 5426*280575beSPatrick McGehearty mov %g0, %o0 5427*280575beSPatrick McGehearty 5428*280575beSPatrick McGehearty.ci_medhalf: 5429*280575beSPatrick McGehearty subcc %o2, 31, %o2 ! adjust length to allow cc test 5430*280575beSPatrick McGehearty ble,pt %ncc, .ci_medh31 5431*280575beSPatrick McGehearty nop 5432*280575beSPatrick McGehearty.ci_medh32: ! load and store block of 32 bytes 5433*280575beSPatrick McGehearty subcc %o2, 32, %o2 ! decrement length count 5434*280575beSPatrick McGehearty 5435*280575beSPatrick McGehearty lduha [%o0]%asi, %o4 ! move 32 bytes 5436*280575beSPatrick McGehearty lduwa [%o0+2]%asi, %o3 5437*280575beSPatrick McGehearty sllx %o4, 48, %o4 5438*280575beSPatrick McGehearty sllx %o3, 16, %o3 5439*280575beSPatrick McGehearty or %o4, %o3, %o3 5440*280575beSPatrick McGehearty lduha [%o0+6]%asi, %o4 5441*280575beSPatrick McGehearty or %o4, %o3, %o4 5442*280575beSPatrick McGehearty stx %o4, [%o1] 5443*280575beSPatrick McGehearty 5444*280575beSPatrick McGehearty lduha [%o0+8]%asi, %o4 5445*280575beSPatrick McGehearty lduwa [%o0+10]%asi, %o3 5446*280575beSPatrick McGehearty sllx %o4, 48, %o4 5447*280575beSPatrick McGehearty sllx %o3, 16, %o3 5448*280575beSPatrick McGehearty or %o4, %o3, %o3 5449*280575beSPatrick McGehearty lduha [%o0+14]%asi, %o4 5450*280575beSPatrick McGehearty or %o4, %o3, %o4 5451*280575beSPatrick McGehearty stx %o4, [%o1+8] 5452*280575beSPatrick McGehearty 5453*280575beSPatrick McGehearty lduha [%o0+16]%asi, %o4 5454*280575beSPatrick McGehearty lduwa [%o0+18]%asi, %o3 5455*280575beSPatrick McGehearty sllx %o4, 48, %o4 5456*280575beSPatrick McGehearty sllx %o3, 16, %o3 5457*280575beSPatrick McGehearty or %o4, %o3, %o3 5458*280575beSPatrick McGehearty lduha [%o0+22]%asi, %o4 5459*280575beSPatrick McGehearty or %o4, %o3, %o4 5460*280575beSPatrick McGehearty stx %o4, [%o1+16] 5461*280575beSPatrick McGehearty 5462*280575beSPatrick McGehearty add %o0, 32, %o0 ! increase src ptr by 32 5463*280575beSPatrick McGehearty add %o1, 32, %o1 ! increase dst ptr by 32 5464*280575beSPatrick McGehearty 5465*280575beSPatrick McGehearty lduha [%o0-8]%asi, %o4 5466*280575beSPatrick McGehearty lduwa [%o0-6]%asi, %o3 5467*280575beSPatrick McGehearty sllx %o4, 48, %o4 5468*280575beSPatrick McGehearty sllx %o3, 16, %o3 5469*280575beSPatrick McGehearty or %o4, %o3, %o3 5470*280575beSPatrick McGehearty lduha [%o0-2]%asi, %o4 5471*280575beSPatrick McGehearty or %o3, %o4, %o4 5472*280575beSPatrick McGehearty bgu,pt %ncc, .ci_medh32 ! repeat if at least 32 bytes left 5473*280575beSPatrick McGehearty stx %o4, [%o1-8] 5474*280575beSPatrick McGehearty 5475*280575beSPatrick McGehearty.ci_medh31: 5476*280575beSPatrick McGehearty addcc %o2, 24, %o2 ! adjust count to be off by 7 5477*280575beSPatrick McGehearty ble,pt %ncc, .ci_medh7 ! skip if 7 or fewer bytes left 5478*280575beSPatrick McGehearty nop ! 5479*280575beSPatrick McGehearty.ci_medh15: 5480*280575beSPatrick McGehearty lduha [%o0]%asi, %o4 ! move 16 bytes 5481*280575beSPatrick McGehearty subcc %o2, 8, %o2 ! decrement length count 5482*280575beSPatrick McGehearty lduwa [%o0+2]%asi, %o3 5483*280575beSPatrick McGehearty sllx %o4, 48, %o4 5484*280575beSPatrick McGehearty sllx %o3, 16, %o3 5485*280575beSPatrick McGehearty or %o4, %o3, %o3 5486*280575beSPatrick McGehearty add %o1, 8, %o1 ! increase dst ptr by 8 5487*280575beSPatrick McGehearty lduha [%o0+6]%asi, %o4 5488*280575beSPatrick McGehearty add %o0, 8, %o0 ! increase src ptr by 8 5489*280575beSPatrick McGehearty or %o4, %o3, %o4 5490*280575beSPatrick McGehearty bgu,pt %ncc, .ci_medh15 5491*280575beSPatrick McGehearty stx %o4, [%o1-8] 5492*280575beSPatrick McGehearty.ci_medh7: 5493*280575beSPatrick McGehearty addcc %o2, 7, %o2 ! finish adjustment of remaining count 5494*280575beSPatrick McGehearty bz,pt %ncc, .ci_smallx ! exit if finished 5495*280575beSPatrick McGehearty cmp %o2, 4 5496*280575beSPatrick McGehearty blt,pt %ncc, .ci_small3x ! skip if less than 4 bytes left 5497*280575beSPatrick McGehearty nop ! 5498*280575beSPatrick McGehearty lduha [%o0]%asi, %o4 5499*280575beSPatrick McGehearty sll %o4, 16, %o4 5500*280575beSPatrick McGehearty lduha [%o0+2]%asi, %o3 5501*280575beSPatrick McGehearty or %o3, %o4, %o4 5502*280575beSPatrick McGehearty subcc %o2, 4, %o2 5503*280575beSPatrick McGehearty add %o0, 4, %o0 5504*280575beSPatrick McGehearty add %o1, 4, %o1 5505*280575beSPatrick McGehearty bnz .ci_small3x 5506*280575beSPatrick McGehearty stw %o4, [%o1-4] 5507*280575beSPatrick McGehearty membar #Sync 5508*280575beSPatrick McGehearty stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 5509*280575beSPatrick McGehearty retl 5510*280575beSPatrick McGehearty mov %g0, %o0 5511*280575beSPatrick McGehearty 5512*280575beSPatrick McGehearty .align 16 5513*280575beSPatrick McGehearty.ci_med_byte: 5514*280575beSPatrick McGehearty bnz,pt %ncc, .ci_medbh32a ! go to correct byte move 5515*280575beSPatrick McGehearty subcc %o2, 31, %o2 ! adjust length to allow cc test 5516*280575beSPatrick McGehearty ble,pt %ncc, .ci_medb31 5517*280575beSPatrick McGehearty nop 5518*280575beSPatrick McGehearty.ci_medb32: ! Alignment 1 or 5 5519*280575beSPatrick McGehearty subcc %o2, 32, %o2 ! decrement length count 5520*280575beSPatrick McGehearty 5521*280575beSPatrick McGehearty lduba [%o0]%asi, %o4 ! load and store a block of 32 bytes 5522*280575beSPatrick McGehearty sllx %o4, 56, %o3 5523*280575beSPatrick McGehearty lduha [%o0+1]%asi, %o4 5524*280575beSPatrick McGehearty sllx %o4, 40, %o4 5525*280575beSPatrick McGehearty or %o4, %o3, %o3 5526*280575beSPatrick McGehearty lduwa [%o0+3]%asi, %o4 5527*280575beSPatrick McGehearty sllx %o4, 8, %o4 5528*280575beSPatrick McGehearty or %o4, %o3, %o3 5529*280575beSPatrick McGehearty lduba [%o0+7]%asi, %o4 5530*280575beSPatrick McGehearty or %o4, %o3, %o4 5531*280575beSPatrick McGehearty stx %o4, [%o1] 5532*280575beSPatrick McGehearty 5533*280575beSPatrick McGehearty lduba [%o0+8]%asi, %o4 5534*280575beSPatrick McGehearty sllx %o4, 56, %o3 5535*280575beSPatrick McGehearty lduha [%o0+9]%asi, %o4 5536*280575beSPatrick McGehearty sllx %o4, 40, %o4 5537*280575beSPatrick McGehearty or %o4, %o3, %o3 5538*280575beSPatrick McGehearty lduwa [%o0+11]%asi, %o4 5539*280575beSPatrick McGehearty sllx %o4, 8, %o4 5540*280575beSPatrick McGehearty or %o4, %o3, %o3 5541*280575beSPatrick McGehearty lduba [%o0+15]%asi, %o4 5542*280575beSPatrick McGehearty or %o4, %o3, %o4 5543*280575beSPatrick McGehearty stx %o4, [%o1+8] 5544*280575beSPatrick McGehearty 5545*280575beSPatrick McGehearty lduba [%o0+16]%asi, %o4 5546*280575beSPatrick McGehearty sllx %o4, 56, %o3 5547*280575beSPatrick McGehearty lduha [%o0+17]%asi, %o4 5548*280575beSPatrick McGehearty sllx %o4, 40, %o4 5549*280575beSPatrick McGehearty or %o4, %o3, %o3 5550*280575beSPatrick McGehearty lduwa [%o0+19]%asi, %o4 5551*280575beSPatrick McGehearty sllx %o4, 8, %o4 5552*280575beSPatrick McGehearty or %o4, %o3, %o3 5553*280575beSPatrick McGehearty lduba [%o0+23]%asi, %o4 5554*280575beSPatrick McGehearty or %o4, %o3, %o4 5555*280575beSPatrick McGehearty stx %o4, [%o1+16] 5556*280575beSPatrick McGehearty 5557*280575beSPatrick McGehearty add %o0, 32, %o0 ! increase src ptr by 32 5558*280575beSPatrick McGehearty add %o1, 32, %o1 ! increase dst ptr by 32 5559*280575beSPatrick McGehearty 5560*280575beSPatrick McGehearty lduba [%o0-8]%asi, %o4 5561*280575beSPatrick McGehearty sllx %o4, 56, %o3 5562*280575beSPatrick McGehearty lduha [%o0-7]%asi, %o4 5563*280575beSPatrick McGehearty sllx %o4, 40, %o4 5564*280575beSPatrick McGehearty or %o4, %o3, %o3 5565*280575beSPatrick McGehearty lduwa [%o0-5]%asi, %o4 5566*280575beSPatrick McGehearty sllx %o4, 8, %o4 5567*280575beSPatrick McGehearty or %o4, %o3, %o3 5568*280575beSPatrick McGehearty lduba [%o0-1]%asi, %o4 5569*280575beSPatrick McGehearty or %o4, %o3, %o4 5570*280575beSPatrick McGehearty bgu,pt %ncc, .ci_medb32 ! repeat if at least 32 bytes left 5571*280575beSPatrick McGehearty stx %o4, [%o1-8] 5572*280575beSPatrick McGehearty 5573*280575beSPatrick McGehearty.ci_medb31: ! 31 or fewer bytes remaining 5574*280575beSPatrick McGehearty addcc %o2, 24, %o2 ! adjust count to be off by 7 5575*280575beSPatrick McGehearty ble,pt %ncc, .ci_medb7 ! skip if 7 or fewer bytes left 5576*280575beSPatrick McGehearty nop ! 5577*280575beSPatrick McGehearty.ci_medb15: 5578*280575beSPatrick McGehearty 5579*280575beSPatrick McGehearty lduba [%o0]%asi, %o4 ! load and store a block of 8 bytes 5580*280575beSPatrick McGehearty subcc %o2, 8, %o2 ! decrement length count 5581*280575beSPatrick McGehearty sllx %o4, 56, %o3 5582*280575beSPatrick McGehearty lduha [%o0+1]%asi, %o4 5583*280575beSPatrick McGehearty sllx %o4, 40, %o4 5584*280575beSPatrick McGehearty or %o4, %o3, %o3 5585*280575beSPatrick McGehearty lduwa [%o0+3]%asi, %o4 5586*280575beSPatrick McGehearty add %o1, 8, %o1 ! increase dst ptr by 16 5587*280575beSPatrick McGehearty sllx %o4, 8, %o4 5588*280575beSPatrick McGehearty or %o4, %o3, %o3 5589*280575beSPatrick McGehearty lduba [%o0+7]%asi, %o4 5590*280575beSPatrick McGehearty add %o0, 8, %o0 ! increase src ptr by 16 5591*280575beSPatrick McGehearty or %o4, %o3, %o4 5592*280575beSPatrick McGehearty bgu,pt %ncc, .ci_medb15 5593*280575beSPatrick McGehearty stx %o4, [%o1-8] 5594*280575beSPatrick McGehearty.ci_medb7: 5595*280575beSPatrick McGehearty addcc %o2, 7, %o2 ! finish adjustment of remaining count 5596*280575beSPatrick McGehearty bz,pt %ncc, .ci_smallx ! exit if finished 5597*280575beSPatrick McGehearty cmp %o2, 4 5598*280575beSPatrick McGehearty blt,pt %ncc, .ci_small3x ! skip if less than 4 bytes left 5599*280575beSPatrick McGehearty nop ! 5600*280575beSPatrick McGehearty lduba [%o0]%asi, %o4 ! move 4 bytes 5601*280575beSPatrick McGehearty sll %o4, 24, %o3 5602*280575beSPatrick McGehearty lduha [%o0+1]%asi, %o4 5603*280575beSPatrick McGehearty sll %o4, 8, %o4 5604*280575beSPatrick McGehearty or %o4, %o3, %o3 5605*280575beSPatrick McGehearty lduba [%o0+3]%asi, %o4 5606*280575beSPatrick McGehearty or %o4, %o3, %o4 5607*280575beSPatrick McGehearty subcc %o2, 4, %o2 5608*280575beSPatrick McGehearty add %o0, 4, %o0 5609*280575beSPatrick McGehearty add %o1, 4, %o1 5610*280575beSPatrick McGehearty bnz .ci_small3x 5611*280575beSPatrick McGehearty stw %o4, [%o1-4] 5612*280575beSPatrick McGehearty membar #Sync 5613*280575beSPatrick McGehearty stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 5614*280575beSPatrick McGehearty retl 5615*280575beSPatrick McGehearty mov %g0, %o0 5616*280575beSPatrick McGehearty 5617*280575beSPatrick McGehearty .align 16 5618*280575beSPatrick McGehearty.ci_medbh32a: ! Alignment 3 or 7 5619*280575beSPatrick McGehearty ble,pt %ncc, .ci_medbh31 5620*280575beSPatrick McGehearty nop 5621*280575beSPatrick McGehearty.ci_medbh32: ! Alignment 3 or 7 5622*280575beSPatrick McGehearty subcc %o2, 32, %o2 ! decrement length count 5623*280575beSPatrick McGehearty 5624*280575beSPatrick McGehearty lduba [%o0]%asi, %o4 ! load and store a block of 32 bytes 5625*280575beSPatrick McGehearty sllx %o4, 56, %o3 5626*280575beSPatrick McGehearty lduwa [%o0+1]%asi, %o4 5627*280575beSPatrick McGehearty sllx %o4, 24, %o4 5628*280575beSPatrick McGehearty or %o4, %o3, %o3 5629*280575beSPatrick McGehearty lduha [%o0+5]%asi, %o4 5630*280575beSPatrick McGehearty sllx %o4, 8, %o4 5631*280575beSPatrick McGehearty or %o4, %o3, %o3 5632*280575beSPatrick McGehearty lduba [%o0+7]%asi, %o4 5633*280575beSPatrick McGehearty or %o4, %o3, %o4 5634*280575beSPatrick McGehearty stx %o4, [%o1] 5635*280575beSPatrick McGehearty 5636*280575beSPatrick McGehearty lduba [%o0+8]%asi, %o4 5637*280575beSPatrick McGehearty sllx %o4, 56, %o3 5638*280575beSPatrick McGehearty lduwa [%o0+9]%asi, %o4 5639*280575beSPatrick McGehearty sllx %o4, 24, %o4 5640*280575beSPatrick McGehearty or %o4, %o3, %o3 5641*280575beSPatrick McGehearty lduha [%o0+13]%asi, %o4 5642*280575beSPatrick McGehearty sllx %o4, 8, %o4 5643*280575beSPatrick McGehearty or %o4, %o3, %o3 5644*280575beSPatrick McGehearty lduba [%o0+15]%asi, %o4 5645*280575beSPatrick McGehearty or %o4, %o3, %o4 5646*280575beSPatrick McGehearty stx %o4, [%o1+8] 5647*280575beSPatrick McGehearty 5648*280575beSPatrick McGehearty lduba [%o0+16]%asi, %o4 5649*280575beSPatrick McGehearty sllx %o4, 56, %o3 5650*280575beSPatrick McGehearty lduwa [%o0+17]%asi, %o4 5651*280575beSPatrick McGehearty sllx %o4, 24, %o4 5652*280575beSPatrick McGehearty or %o4, %o3, %o3 5653*280575beSPatrick McGehearty lduha [%o0+21]%asi, %o4 5654*280575beSPatrick McGehearty sllx %o4, 8, %o4 5655*280575beSPatrick McGehearty or %o4, %o3, %o3 5656*280575beSPatrick McGehearty lduba [%o0+23]%asi, %o4 5657*280575beSPatrick McGehearty or %o4, %o3, %o4 5658*280575beSPatrick McGehearty stx %o4, [%o1+16] 5659*280575beSPatrick McGehearty 5660*280575beSPatrick McGehearty add %o0, 32, %o0 ! increase src ptr by 32 5661*280575beSPatrick McGehearty add %o1, 32, %o1 ! increase dst ptr by 32 5662*280575beSPatrick McGehearty 5663*280575beSPatrick McGehearty lduba [%o0-8]%asi, %o4 5664*280575beSPatrick McGehearty sllx %o4, 56, %o3 5665*280575beSPatrick McGehearty lduwa [%o0-7]%asi, %o4 5666*280575beSPatrick McGehearty sllx %o4, 24, %o4 5667*280575beSPatrick McGehearty or %o4, %o3, %o3 5668*280575beSPatrick McGehearty lduha [%o0-3]%asi, %o4 5669*280575beSPatrick McGehearty sllx %o4, 8, %o4 5670*280575beSPatrick McGehearty or %o4, %o3, %o3 5671*280575beSPatrick McGehearty lduba [%o0-1]%asi, %o4 5672*280575beSPatrick McGehearty or %o4, %o3, %o4 5673*280575beSPatrick McGehearty bgu,pt %ncc, .ci_medbh32 ! repeat if at least 32 bytes left 5674*280575beSPatrick McGehearty stx %o4, [%o1-8] 5675*280575beSPatrick McGehearty 5676*280575beSPatrick McGehearty.ci_medbh31: 5677*280575beSPatrick McGehearty addcc %o2, 24, %o2 ! adjust count to be off by 7 5678*280575beSPatrick McGehearty ble,pt %ncc, .ci_medb7 ! skip if 7 or fewer bytes left 5679*280575beSPatrick McGehearty nop ! 5680*280575beSPatrick McGehearty.ci_medbh15: 5681*280575beSPatrick McGehearty lduba [%o0]%asi, %o4 ! load and store a block of 8 bytes 5682*280575beSPatrick McGehearty sllx %o4, 56, %o3 5683*280575beSPatrick McGehearty lduwa [%o0+1]%asi, %o4 5684*280575beSPatrick McGehearty sllx %o4, 24, %o4 5685*280575beSPatrick McGehearty or %o4, %o3, %o3 5686*280575beSPatrick McGehearty lduha [%o0+5]%asi, %o4 5687*280575beSPatrick McGehearty sllx %o4, 8, %o4 5688*280575beSPatrick McGehearty or %o4, %o3, %o3 5689*280575beSPatrick McGehearty lduba [%o0+7]%asi, %o4 5690*280575beSPatrick McGehearty or %o4, %o3, %o4 5691*280575beSPatrick McGehearty stx %o4, [%o1] 5692*280575beSPatrick McGehearty subcc %o2, 8, %o2 ! decrement length count 5693*280575beSPatrick McGehearty add %o1, 8, %o1 ! increase dst ptr by 8 5694*280575beSPatrick McGehearty add %o0, 8, %o0 ! increase src ptr by 8 5695*280575beSPatrick McGehearty bgu,pt %ncc, .ci_medbh15 5696*280575beSPatrick McGehearty stx %o4, [%o1-8] 5697*280575beSPatrick McGehearty ba .ci_medb7 5698*280575beSPatrick McGehearty nop 5699*280575beSPatrick McGehearty 5700*280575beSPatrick McGehearty/* 5701*280575beSPatrick McGehearty * End of small copy in code (no window) 5702*280575beSPatrick McGehearty * 5703*280575beSPatrick McGehearty */ 5704*280575beSPatrick McGehearty 5705*280575beSPatrick McGehearty/* 5706*280575beSPatrick McGehearty * Long copy in code (using register window and fp regs) 5707*280575beSPatrick McGehearty * 5708*280575beSPatrick McGehearty */ 5709*280575beSPatrick McGehearty 5710*280575beSPatrick McGehearty.ci_copy_more: 5711*280575beSPatrick McGehearty sethi %hi(copyio_fault), %o3 5712*280575beSPatrick McGehearty or %o3, %lo(copyio_fault), %o3 5713*280575beSPatrick McGehearty membar #Sync 5714*280575beSPatrick McGehearty stn %o3, [THREAD_REG + T_LOFAULT] 5715*280575beSPatrick McGehearty/* 5716*280575beSPatrick McGehearty * Following code is for large copies. We know there is at 5717*280575beSPatrick McGehearty * least FP_COPY bytes available. FP regs are used, so 5718*280575beSPatrick McGehearty * we save registers and fp regs before starting 5719*280575beSPatrick McGehearty */ 5720*280575beSPatrick McGehearty save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 5721*280575beSPatrick McGehearty or SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 5722*280575beSPatrick McGehearty rd %fprs, %g1 ! check for unused fp 5723*280575beSPatrick McGehearty ! if fprs.fef == 0, set it. 5724*280575beSPatrick McGehearty ! Setting it when already set costs more than checking 5725*280575beSPatrick McGehearty andcc %g1, FPRS_FEF, %g1 ! test FEF, fprs.du = fprs.dl = 0 5726*280575beSPatrick McGehearty bz,pt %ncc, .ci_fp_unused 5727*280575beSPatrick McGehearty mov ASI_USER, %asi 5728*280575beSPatrick McGehearty BST_FP_TOSTACK(%o3) 5729*280575beSPatrick McGehearty ba .ci_fp_ready 5730*280575beSPatrick McGehearty.ci_fp_unused: 5731*280575beSPatrick McGehearty prefetcha [%i0 + (1 * CACHE_LINE)]%asi, #one_read 5732*280575beSPatrick McGehearty wr %g0, FPRS_FEF, %fprs ! fprs.fef = 1 5733*280575beSPatrick McGehearty.ci_fp_ready: 5734*280575beSPatrick McGehearty rd %gsr, %l5 ! save %gsr value 5735*280575beSPatrick McGehearty andcc %i1, 1, %o3 ! is dest byte aligned 5736*280575beSPatrick McGehearty bnz,pt %ncc, .ci_big_d1 5737*280575beSPatrick McGehearty.ci_big_d1f: ! dest is now half word aligned 5738*280575beSPatrick McGehearty andcc %i1, 2, %o3 5739*280575beSPatrick McGehearty bnz,pt %ncc, .ci_big_d2 5740*280575beSPatrick McGehearty.ci_big_d2f: ! dest is now word aligned 5741*280575beSPatrick McGehearty andcc %i1, 4, %o3 5742*280575beSPatrick McGehearty bnz,pt %ncc, .ci_big_d4 5743*280575beSPatrick McGehearty.ci_big_d4f: ! dest is long word aligned 5744*280575beSPatrick McGehearty andcc %i0, 7, %o3 ! is src long word aligned 5745*280575beSPatrick McGehearty brnz,pt %o3, .ci_big_unal8 5746*280575beSPatrick McGehearty prefetcha [%i0 + (2 * CACHE_LINE)]%asi, #one_read 5747*280575beSPatrick McGehearty ! Src and dst are long word aligned 5748*280575beSPatrick McGehearty ! align dst to 64 byte boundary 5749*280575beSPatrick McGehearty andcc %i1, 0x3f, %o3 ! %o3 == 0 means dst is 64 byte aligned 5750*280575beSPatrick McGehearty brz,pn %o3, .ci_al_to_64 5751*280575beSPatrick McGehearty nop 5752*280575beSPatrick McGehearty sub %o3, 64, %o3 ! %o3 has negative bytes to move 5753*280575beSPatrick McGehearty add %i2, %o3, %i2 ! adjust remaining count 5754*280575beSPatrick McGehearty andcc %o3, 8, %o4 ! odd long words to move? 5755*280575beSPatrick McGehearty brz,pt %o4, .ci_al_to_16 5756*280575beSPatrick McGehearty nop 5757*280575beSPatrick McGehearty add %o3, 8, %o3 5758*280575beSPatrick McGehearty ldxa [%i0]%asi, %o4 5759*280575beSPatrick McGehearty add %i0, 8, %i0 ! increment src ptr 5760*280575beSPatrick McGehearty add %i1, 8, %i1 ! increment dst ptr 5761*280575beSPatrick McGehearty stx %o4, [%i1-8] 5762*280575beSPatrick McGehearty! Dest is aligned on 16 bytes, src 8 byte aligned 5763*280575beSPatrick McGehearty.ci_al_to_16: 5764*280575beSPatrick McGehearty andcc %o3, 0x30, %o4 ! pair of long words to move? 5765*280575beSPatrick McGehearty brz,pt %o4, .ci_al_to_64 5766*280575beSPatrick McGehearty nop 5767*280575beSPatrick McGehearty.ci_al_mv_16: 5768*280575beSPatrick McGehearty add %o3, 16, %o3 5769*280575beSPatrick McGehearty ldxa [%i0]%asi, %o4 5770*280575beSPatrick McGehearty stx %o4, [%i1] 5771*280575beSPatrick McGehearty add %i0, 16, %i0 ! increment src ptr 5772*280575beSPatrick McGehearty ldxa [%i0-8]%asi, %o4 5773*280575beSPatrick McGehearty stx %o4, [%i1+8] 5774*280575beSPatrick McGehearty andcc %o3, 0x30, %o4 5775*280575beSPatrick McGehearty brnz,pt %o4, .ci_al_mv_16 5776*280575beSPatrick McGehearty add %i1, 16, %i1 ! increment dst ptr 5777*280575beSPatrick McGehearty! Dest is aligned on 64 bytes, src 8 byte aligned 5778*280575beSPatrick McGehearty.ci_al_to_64: 5779*280575beSPatrick McGehearty ! Determine source alignment 5780*280575beSPatrick McGehearty ! to correct 8 byte offset 5781*280575beSPatrick McGehearty andcc %i0, 32, %o3 5782*280575beSPatrick McGehearty brnz,pn %o3, .ci_aln_1 5783*280575beSPatrick McGehearty andcc %i0, 16, %o3 5784*280575beSPatrick McGehearty brnz,pn %o3, .ci_aln_01 5785*280575beSPatrick McGehearty andcc %i0, 8, %o3 5786*280575beSPatrick McGehearty brz,pn %o3, .ci_aln_000 5787*280575beSPatrick McGehearty prefetcha [%i0 + (3 * CACHE_LINE)]%asi, #one_read 5788*280575beSPatrick McGehearty ba .ci_aln_001 5789*280575beSPatrick McGehearty prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read 5790*280575beSPatrick McGehearty.ci_aln_01: 5791*280575beSPatrick McGehearty brnz,pn %o3, .ci_aln_011 5792*280575beSPatrick McGehearty prefetcha [%i0 + (3 * CACHE_LINE)]%asi, #one_read 5793*280575beSPatrick McGehearty ba .ci_aln_010 5794*280575beSPatrick McGehearty prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read 5795*280575beSPatrick McGehearty.ci_aln_1: 5796*280575beSPatrick McGehearty andcc %i0, 16, %o3 5797*280575beSPatrick McGehearty brnz,pn %o3, .ci_aln_11 5798*280575beSPatrick McGehearty andcc %i0, 8, %o3 5799*280575beSPatrick McGehearty brnz,pn %o3, .ci_aln_101 5800*280575beSPatrick McGehearty prefetcha [%i0 + (3 * CACHE_LINE)]%asi, #one_read 5801*280575beSPatrick McGehearty ba .ci_aln_100 5802*280575beSPatrick McGehearty prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read 5803*280575beSPatrick McGehearty.ci_aln_11: 5804*280575beSPatrick McGehearty brz,pn %o3, .ci_aln_110 5805*280575beSPatrick McGehearty prefetcha [%i0 + (3 * CACHE_LINE)]%asi, #one_read 5806*280575beSPatrick McGehearty 5807*280575beSPatrick McGehearty.ci_aln_111: 5808*280575beSPatrick McGehearty! Alignment off by 8 bytes 5809*280575beSPatrick McGehearty prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read 5810*280575beSPatrick McGehearty ldda [%i0]%asi, %d0 5811*280575beSPatrick McGehearty add %i0, 8, %i0 5812*280575beSPatrick McGehearty sub %i2, 8, %i2 5813*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 5814*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 5815*280575beSPatrick McGehearty sub %i1, %i0, %i1 5816*280575beSPatrick McGehearty.ci_aln_111_loop: 5817*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_AIUS,%d16 ! block load 5818*280575beSPatrick McGehearty subcc %o3, 64, %o3 5819*280575beSPatrick McGehearty fmovd %d16, %d2 5820*280575beSPatrick McGehearty fmovd %d18, %d4 5821*280575beSPatrick McGehearty fmovd %d20, %d6 5822*280575beSPatrick McGehearty fmovd %d22, %d8 5823*280575beSPatrick McGehearty fmovd %d24, %d10 5824*280575beSPatrick McGehearty fmovd %d26, %d12 5825*280575beSPatrick McGehearty fmovd %d28, %d14 5826*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_P ! block initializing store 5827*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_P 5828*280575beSPatrick McGehearty add %i0, 64, %i0 5829*280575beSPatrick McGehearty fmovd %d30, %d0 5830*280575beSPatrick McGehearty bgt,pt %ncc, .ci_aln_111_loop 5831*280575beSPatrick McGehearty prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read 5832*280575beSPatrick McGehearty add %i1, %i0, %i1 5833*280575beSPatrick McGehearty 5834*280575beSPatrick McGehearty std %d0, [%i1] 5835*280575beSPatrick McGehearty ba .ci_remain_stuff 5836*280575beSPatrick McGehearty add %i1, 8, %i1 5837*280575beSPatrick McGehearty ! END OF aln_111 5838*280575beSPatrick McGehearty 5839*280575beSPatrick McGehearty.ci_aln_110: 5840*280575beSPatrick McGehearty! Alignment off by 16 bytes 5841*280575beSPatrick McGehearty prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read 5842*280575beSPatrick McGehearty ldda [%i0]%asi, %d0 5843*280575beSPatrick McGehearty ldda [%i0+8]%asi, %d2 5844*280575beSPatrick McGehearty add %i0, 16, %i0 5845*280575beSPatrick McGehearty sub %i2, 16, %i2 5846*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 5847*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 5848*280575beSPatrick McGehearty sub %i1, %i0, %i1 5849*280575beSPatrick McGehearty.ci_aln_110_loop: 5850*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_AIUS,%d16 ! block load 5851*280575beSPatrick McGehearty subcc %o3, 64, %o3 5852*280575beSPatrick McGehearty fmovd %d16, %d4 5853*280575beSPatrick McGehearty fmovd %d18, %d6 5854*280575beSPatrick McGehearty fmovd %d20, %d8 5855*280575beSPatrick McGehearty fmovd %d22, %d10 5856*280575beSPatrick McGehearty fmovd %d24, %d12 5857*280575beSPatrick McGehearty fmovd %d26, %d14 5858*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_P ! block initializing store 5859*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_P 5860*280575beSPatrick McGehearty add %i0, 64, %i0 5861*280575beSPatrick McGehearty fmovd %d28, %d0 5862*280575beSPatrick McGehearty fmovd %d30, %d2 5863*280575beSPatrick McGehearty bgt,pt %ncc, .ci_aln_110_loop 5864*280575beSPatrick McGehearty prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read 5865*280575beSPatrick McGehearty add %i1, %i0, %i1 5866*280575beSPatrick McGehearty 5867*280575beSPatrick McGehearty std %d0, [%i1] 5868*280575beSPatrick McGehearty std %d2, [%i1+8] 5869*280575beSPatrick McGehearty ba .ci_remain_stuff 5870*280575beSPatrick McGehearty add %i1, 16, %i1 5871*280575beSPatrick McGehearty ! END OF aln_110 5872*280575beSPatrick McGehearty 5873*280575beSPatrick McGehearty.ci_aln_101: 5874*280575beSPatrick McGehearty! Alignment off by 24 bytes 5875*280575beSPatrick McGehearty prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read 5876*280575beSPatrick McGehearty ldda [%i0]%asi, %d0 5877*280575beSPatrick McGehearty ldda [%i0+8]%asi, %d2 5878*280575beSPatrick McGehearty ldda [%i0+16]%asi, %d4 5879*280575beSPatrick McGehearty add %i0, 24, %i0 5880*280575beSPatrick McGehearty sub %i2, 24, %i2 5881*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 5882*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 5883*280575beSPatrick McGehearty sub %i1, %i0, %i1 5884*280575beSPatrick McGehearty.ci_aln_101_loop: 5885*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_AIUS,%d16 ! block load 5886*280575beSPatrick McGehearty subcc %o3, 64, %o3 5887*280575beSPatrick McGehearty fmovd %d16, %d6 5888*280575beSPatrick McGehearty fmovd %d18, %d8 5889*280575beSPatrick McGehearty fmovd %d20, %d10 5890*280575beSPatrick McGehearty fmovd %d22, %d12 5891*280575beSPatrick McGehearty fmovd %d24, %d14 5892*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_P ! block initializing store 5893*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_P 5894*280575beSPatrick McGehearty add %i0, 64, %i0 5895*280575beSPatrick McGehearty fmovd %d26, %d0 5896*280575beSPatrick McGehearty fmovd %d28, %d2 5897*280575beSPatrick McGehearty fmovd %d30, %d4 5898*280575beSPatrick McGehearty bgt,pt %ncc, .ci_aln_101_loop 5899*280575beSPatrick McGehearty prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read 5900*280575beSPatrick McGehearty add %i1, %i0, %i1 5901*280575beSPatrick McGehearty 5902*280575beSPatrick McGehearty std %d0, [%i1] 5903*280575beSPatrick McGehearty std %d2, [%i1+8] 5904*280575beSPatrick McGehearty std %d4, [%i1+16] 5905*280575beSPatrick McGehearty ba .ci_remain_stuff 5906*280575beSPatrick McGehearty add %i1, 24, %i1 5907*280575beSPatrick McGehearty ! END OF aln_101 5908*280575beSPatrick McGehearty 5909*280575beSPatrick McGehearty.ci_aln_100: 5910*280575beSPatrick McGehearty! Alignment off by 32 bytes 5911*280575beSPatrick McGehearty ldda [%i0]%asi, %d0 5912*280575beSPatrick McGehearty ldda [%i0+8]%asi, %d2 5913*280575beSPatrick McGehearty ldda [%i0+16]%asi,%d4 5914*280575beSPatrick McGehearty ldda [%i0+24]%asi,%d6 5915*280575beSPatrick McGehearty add %i0, 32, %i0 5916*280575beSPatrick McGehearty sub %i2, 32, %i2 5917*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 5918*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 5919*280575beSPatrick McGehearty sub %i1, %i0, %i1 5920*280575beSPatrick McGehearty.ci_aln_100_loop: 5921*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_AIUS,%d16 ! block load 5922*280575beSPatrick McGehearty subcc %o3, 64, %o3 5923*280575beSPatrick McGehearty fmovd %d16, %d8 5924*280575beSPatrick McGehearty fmovd %d18, %d10 5925*280575beSPatrick McGehearty fmovd %d20, %d12 5926*280575beSPatrick McGehearty fmovd %d22, %d14 5927*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_P ! block initializing store 5928*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_P 5929*280575beSPatrick McGehearty add %i0, 64, %i0 5930*280575beSPatrick McGehearty fmovd %d24, %d0 5931*280575beSPatrick McGehearty fmovd %d26, %d2 5932*280575beSPatrick McGehearty fmovd %d28, %d4 5933*280575beSPatrick McGehearty fmovd %d30, %d6 5934*280575beSPatrick McGehearty bgt,pt %ncc, .ci_aln_100_loop 5935*280575beSPatrick McGehearty prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read 5936*280575beSPatrick McGehearty add %i1, %i0, %i1 5937*280575beSPatrick McGehearty 5938*280575beSPatrick McGehearty std %d0, [%i1] 5939*280575beSPatrick McGehearty std %d2, [%i1+8] 5940*280575beSPatrick McGehearty std %d4, [%i1+16] 5941*280575beSPatrick McGehearty std %d6, [%i1+24] 5942*280575beSPatrick McGehearty ba .ci_remain_stuff 5943*280575beSPatrick McGehearty add %i1, 32, %i1 5944*280575beSPatrick McGehearty ! END OF aln_100 5945*280575beSPatrick McGehearty 5946*280575beSPatrick McGehearty.ci_aln_011: 5947*280575beSPatrick McGehearty! Alignment off by 40 bytes 5948*280575beSPatrick McGehearty prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read 5949*280575beSPatrick McGehearty ldda [%i0]%asi, %d0 5950*280575beSPatrick McGehearty ldda [%i0+8]%asi, %d2 5951*280575beSPatrick McGehearty ldda [%i0+16]%asi, %d4 5952*280575beSPatrick McGehearty ldda [%i0+24]%asi, %d6 5953*280575beSPatrick McGehearty ldda [%i0+32]%asi, %d8 5954*280575beSPatrick McGehearty add %i0, 40, %i0 5955*280575beSPatrick McGehearty sub %i2, 40, %i2 5956*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 5957*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 5958*280575beSPatrick McGehearty sub %i1, %i0, %i1 5959*280575beSPatrick McGehearty.ci_aln_011_loop: 5960*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_AIUS,%d16 ! block load 5961*280575beSPatrick McGehearty subcc %o3, 64, %o3 5962*280575beSPatrick McGehearty fmovd %d16, %d10 5963*280575beSPatrick McGehearty fmovd %d18, %d12 5964*280575beSPatrick McGehearty fmovd %d20, %d14 5965*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_P ! block initializing store 5966*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_P 5967*280575beSPatrick McGehearty add %i0, 64, %i0 5968*280575beSPatrick McGehearty fmovd %d22, %d0 5969*280575beSPatrick McGehearty fmovd %d24, %d2 5970*280575beSPatrick McGehearty fmovd %d26, %d4 5971*280575beSPatrick McGehearty fmovd %d28, %d6 5972*280575beSPatrick McGehearty fmovd %d30, %d8 5973*280575beSPatrick McGehearty bgt,pt %ncc, .ci_aln_011_loop 5974*280575beSPatrick McGehearty prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read 5975*280575beSPatrick McGehearty add %i1, %i0, %i1 5976*280575beSPatrick McGehearty 5977*280575beSPatrick McGehearty std %d0, [%i1] 5978*280575beSPatrick McGehearty std %d2, [%i1+8] 5979*280575beSPatrick McGehearty std %d4, [%i1+16] 5980*280575beSPatrick McGehearty std %d6, [%i1+24] 5981*280575beSPatrick McGehearty std %d8, [%i1+32] 5982*280575beSPatrick McGehearty ba .ci_remain_stuff 5983*280575beSPatrick McGehearty add %i1, 40, %i1 5984*280575beSPatrick McGehearty ! END OF aln_011 5985*280575beSPatrick McGehearty 5986*280575beSPatrick McGehearty.ci_aln_010: 5987*280575beSPatrick McGehearty! Alignment off by 48 bytes 5988*280575beSPatrick McGehearty ldda [%i0]%asi, %d0 5989*280575beSPatrick McGehearty ldda [%i0+8]%asi, %d2 5990*280575beSPatrick McGehearty ldda [%i0+16]%asi, %d4 5991*280575beSPatrick McGehearty ldda [%i0+24]%asi, %d6 5992*280575beSPatrick McGehearty ldda [%i0+32]%asi, %d8 5993*280575beSPatrick McGehearty ldda [%i0+40]%asi, %d10 5994*280575beSPatrick McGehearty add %i0, 48, %i0 5995*280575beSPatrick McGehearty sub %i2, 48, %i2 5996*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 5997*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 5998*280575beSPatrick McGehearty sub %i1, %i0, %i1 5999*280575beSPatrick McGehearty.ci_aln_010_loop: 6000*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_AIUS,%d16 ! block load 6001*280575beSPatrick McGehearty subcc %o3, 64, %o3 6002*280575beSPatrick McGehearty fmovd %d16, %d12 6003*280575beSPatrick McGehearty fmovd %d18, %d14 6004*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_P ! block initializing store 6005*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_P 6006*280575beSPatrick McGehearty add %i0, 64, %i0 6007*280575beSPatrick McGehearty fmovd %d20, %d0 6008*280575beSPatrick McGehearty fmovd %d22, %d2 6009*280575beSPatrick McGehearty fmovd %d24, %d4 6010*280575beSPatrick McGehearty fmovd %d26, %d6 6011*280575beSPatrick McGehearty fmovd %d28, %d8 6012*280575beSPatrick McGehearty fmovd %d30, %d10 6013*280575beSPatrick McGehearty bgt,pt %ncc, .ci_aln_010_loop 6014*280575beSPatrick McGehearty prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read 6015*280575beSPatrick McGehearty add %i1, %i0, %i1 6016*280575beSPatrick McGehearty 6017*280575beSPatrick McGehearty std %d0, [%i1] 6018*280575beSPatrick McGehearty std %d2, [%i1+8] 6019*280575beSPatrick McGehearty std %d4, [%i1+16] 6020*280575beSPatrick McGehearty std %d6, [%i1+24] 6021*280575beSPatrick McGehearty std %d8, [%i1+32] 6022*280575beSPatrick McGehearty std %d10, [%i1+40] 6023*280575beSPatrick McGehearty ba .ci_remain_stuff 6024*280575beSPatrick McGehearty add %i1, 48, %i1 6025*280575beSPatrick McGehearty ! END OF aln_010 6026*280575beSPatrick McGehearty 6027*280575beSPatrick McGehearty.ci_aln_001: 6028*280575beSPatrick McGehearty! Alignment off by 56 bytes 6029*280575beSPatrick McGehearty ldda [%i0]%asi, %d0 6030*280575beSPatrick McGehearty ldda [%i0+8]%asi, %d2 6031*280575beSPatrick McGehearty ldda [%i0+16]%asi, %d4 6032*280575beSPatrick McGehearty ldda [%i0+24]%asi, %d6 6033*280575beSPatrick McGehearty ldda [%i0+32]%asi, %d8 6034*280575beSPatrick McGehearty ldda [%i0+40]%asi, %d10 6035*280575beSPatrick McGehearty ldda [%i0+48]%asi, %d12 6036*280575beSPatrick McGehearty add %i0, 56, %i0 6037*280575beSPatrick McGehearty sub %i2, 56, %i2 6038*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 6039*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 6040*280575beSPatrick McGehearty sub %i1, %i0, %i1 6041*280575beSPatrick McGehearty.ci_aln_001_loop: 6042*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_AIUS,%d16 ! block load 6043*280575beSPatrick McGehearty subcc %o3, 64, %o3 6044*280575beSPatrick McGehearty fmovd %d16, %d14 6045*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_P ! block initializing store 6046*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_P 6047*280575beSPatrick McGehearty add %i0, 64, %i0 6048*280575beSPatrick McGehearty fmovd %d18, %d0 6049*280575beSPatrick McGehearty fmovd %d20, %d2 6050*280575beSPatrick McGehearty fmovd %d22, %d4 6051*280575beSPatrick McGehearty fmovd %d24, %d6 6052*280575beSPatrick McGehearty fmovd %d26, %d8 6053*280575beSPatrick McGehearty fmovd %d28, %d10 6054*280575beSPatrick McGehearty fmovd %d30, %d12 6055*280575beSPatrick McGehearty bgt,pt %ncc, .ci_aln_001_loop 6056*280575beSPatrick McGehearty prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read 6057*280575beSPatrick McGehearty add %i1, %i0, %i1 6058*280575beSPatrick McGehearty 6059*280575beSPatrick McGehearty std %d0, [%i1] 6060*280575beSPatrick McGehearty std %d2, [%i1+8] 6061*280575beSPatrick McGehearty std %d4, [%i1+16] 6062*280575beSPatrick McGehearty std %d6, [%i1+24] 6063*280575beSPatrick McGehearty std %d8, [%i1+32] 6064*280575beSPatrick McGehearty std %d10, [%i1+40] 6065*280575beSPatrick McGehearty std %d12, [%i1+48] 6066*280575beSPatrick McGehearty ba .ci_remain_stuff 6067*280575beSPatrick McGehearty add %i1, 56, %i1 6068*280575beSPatrick McGehearty ! END OF aln_001 6069*280575beSPatrick McGehearty 6070*280575beSPatrick McGehearty.ci_aln_000: 6071*280575beSPatrick McGehearty prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read 6072*280575beSPatrick McGehearty andn %i2, 0x7f, %o3 ! %o3 is multiple of 2*block size 6073*280575beSPatrick McGehearty and %i2, 0x7f, %i2 ! residue bytes in %i2 6074*280575beSPatrick McGehearty sub %i1, %i0, %i1 6075*280575beSPatrick McGehearty.ci_aln_000_loop: 6076*280575beSPatrick McGehearty ldda [%i0]ASI_BLK_AIUS,%d0 6077*280575beSPatrick McGehearty subcc %o3, 64, %o3 6078*280575beSPatrick McGehearty stxa %g0,[%i0+%i1]ASI_STBI_P ! block initializing store 6079*280575beSPatrick McGehearty stda %d0,[%i0+%i1]ASI_BLK_P 6080*280575beSPatrick McGehearty add %i0, 64, %i0 6081*280575beSPatrick McGehearty bgt,pt %ncc, .ci_aln_000_loop 6082*280575beSPatrick McGehearty prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read 6083*280575beSPatrick McGehearty add %i1, %i0, %i1 6084*280575beSPatrick McGehearty 6085*280575beSPatrick McGehearty ! END OF aln_000 6086*280575beSPatrick McGehearty 6087*280575beSPatrick McGehearty.ci_remain_stuff: 6088*280575beSPatrick McGehearty subcc %i2, 31, %i2 ! adjust length to allow cc test 6089*280575beSPatrick McGehearty ble,pt %ncc, .ci_aln_31 6090*280575beSPatrick McGehearty nop 6091*280575beSPatrick McGehearty.ci_aln_32: 6092*280575beSPatrick McGehearty ldxa [%i0]%asi, %o4 ! move 32 bytes 6093*280575beSPatrick McGehearty subcc %i2, 32, %i2 ! decrement length count by 32 6094*280575beSPatrick McGehearty stx %o4, [%i1] 6095*280575beSPatrick McGehearty ldxa [%i0+8]%asi, %o4 6096*280575beSPatrick McGehearty stx %o4, [%i1+8] 6097*280575beSPatrick McGehearty ldxa [%i0+16]%asi, %o4 6098*280575beSPatrick McGehearty add %i0, 32, %i0 ! increase src ptr by 32 6099*280575beSPatrick McGehearty stx %o4, [%i1+16] 6100*280575beSPatrick McGehearty ldxa [%i0-8]%asi, %o4 6101*280575beSPatrick McGehearty add %i1, 32, %i1 ! increase dst ptr by 32 6102*280575beSPatrick McGehearty bgu,pt %ncc, .ci_aln_32 ! repeat if at least 32 bytes left 6103*280575beSPatrick McGehearty stx %o4, [%i1-8] 6104*280575beSPatrick McGehearty.ci_aln_31: 6105*280575beSPatrick McGehearty addcc %i2, 24, %i2 ! adjust count to be off by 7 6106*280575beSPatrick McGehearty ble,pt %ncc, .ci_aln_7 ! skip if 7 or fewer bytes left 6107*280575beSPatrick McGehearty nop ! 6108*280575beSPatrick McGehearty.ci_aln_15: 6109*280575beSPatrick McGehearty ldxa [%i0]%asi, %o4 ! move 8 bytes 6110*280575beSPatrick McGehearty add %i0, 8, %i0 ! increase src ptr by 8 6111*280575beSPatrick McGehearty subcc %i2, 8, %i2 ! decrease count by 8 6112*280575beSPatrick McGehearty add %i1, 8, %i1 ! increase dst ptr by 8 6113*280575beSPatrick McGehearty bgu,pt %ncc, .ci_aln_15 6114*280575beSPatrick McGehearty stx %o4, [%i1-8] ! 6115*280575beSPatrick McGehearty.ci_aln_7: 6116*280575beSPatrick McGehearty addcc %i2, 7, %i2 ! finish adjustment of remaining count 6117*280575beSPatrick McGehearty bz,pt %ncc, .ci_exit ! exit if finished 6118*280575beSPatrick McGehearty cmp %i2, 4 6119*280575beSPatrick McGehearty blt,pt %ncc, .ci_unaln3x ! skip if less than 4 bytes left 6120*280575beSPatrick McGehearty nop ! 6121*280575beSPatrick McGehearty lda [%i0]%asi, %o4 ! move 4 bytes 6122*280575beSPatrick McGehearty add %i0, 4, %i0 ! increase src ptr by 4 6123*280575beSPatrick McGehearty add %i1, 4, %i1 ! increase dst ptr by 4 6124*280575beSPatrick McGehearty subcc %i2, 4, %i2 ! decrease count by 4 6125*280575beSPatrick McGehearty bnz .ci_unaln3x 6126*280575beSPatrick McGehearty stw %o4, [%i1-4] 6127*280575beSPatrick McGehearty ba .ci_exit 6128*280575beSPatrick McGehearty nop 6129*280575beSPatrick McGehearty 6130*280575beSPatrick McGehearty ! destination alignment code 6131*280575beSPatrick McGehearty.ci_big_d1: 6132*280575beSPatrick McGehearty lduba [%i0]%asi, %o4 ! move a byte 6133*280575beSPatrick McGehearty add %i0, 1, %i0 6134*280575beSPatrick McGehearty stb %o4, [%i1] 6135*280575beSPatrick McGehearty add %i1, 1, %i1 6136*280575beSPatrick McGehearty andcc %i1, 2, %o3 6137*280575beSPatrick McGehearty bz,pt %ncc, .ci_big_d2f 6138*280575beSPatrick McGehearty sub %i2, 1, %i2 6139*280575beSPatrick McGehearty.ci_big_d2: ! dest is now at least half word aligned 6140*280575beSPatrick McGehearty lduba [%i0]%asi, %o4 ! move a half-word (src align unknown) 6141*280575beSPatrick McGehearty lduba [%i0+1]%asi, %o3 6142*280575beSPatrick McGehearty add %i0, 2, %i0 6143*280575beSPatrick McGehearty sll %o4, 8, %o4 ! position 6144*280575beSPatrick McGehearty or %o4, %o3, %o4 ! merge 6145*280575beSPatrick McGehearty sth %o4, [%i1] 6146*280575beSPatrick McGehearty add %i1, 2, %i1 6147*280575beSPatrick McGehearty andcc %i1, 4, %o3 6148*280575beSPatrick McGehearty bz,pt %ncc, .ci_big_d4f 6149*280575beSPatrick McGehearty sub %i2, 2, %i2 6150*280575beSPatrick McGehearty.ci_big_d4: ! dest is at least word aligned 6151*280575beSPatrick McGehearty nop 6152*280575beSPatrick McGehearty lduba [%i0]%asi, %o4 ! move a word (src align unknown) 6153*280575beSPatrick McGehearty lduba [%i0+1]%asi, %o3 6154*280575beSPatrick McGehearty sll %o4, 24, %o4 ! position 6155*280575beSPatrick McGehearty sll %o3, 16, %o3 ! position 6156*280575beSPatrick McGehearty or %o4, %o3, %o3 ! merge 6157*280575beSPatrick McGehearty lduba [%i0+2]%asi, %o4 6158*280575beSPatrick McGehearty sll %o4, 8, %o4 ! position 6159*280575beSPatrick McGehearty or %o4, %o3, %o3 ! merge 6160*280575beSPatrick McGehearty lduba [%i0+3]%asi, %o4 6161*280575beSPatrick McGehearty or %o4, %o3, %o4 ! merge 6162*280575beSPatrick McGehearty stw %o4,[%i1] ! store four bytes 6163*280575beSPatrick McGehearty add %i0, 4, %i0 ! adjust src by 4 6164*280575beSPatrick McGehearty add %i1, 4, %i1 ! adjust dest by 4 6165*280575beSPatrick McGehearty ba .ci_big_d4f 6166*280575beSPatrick McGehearty sub %i2, 4, %i2 ! adjust count by 4 6167*280575beSPatrick McGehearty 6168*280575beSPatrick McGehearty 6169*280575beSPatrick McGehearty ! Dst is on 8 byte boundary; src is not; 6170*280575beSPatrick McGehearty.ci_big_unal8: 6171*280575beSPatrick McGehearty andcc %i1, 0x3f, %o3 ! is dst 64-byte block aligned? 6172*280575beSPatrick McGehearty bz %ncc, .ci_unalnsrc 6173*280575beSPatrick McGehearty sub %o3, 64, %o3 ! %o3 will be multiple of 8 6174*280575beSPatrick McGehearty neg %o3 ! bytes until dest is 64 byte aligned 6175*280575beSPatrick McGehearty sub %i2, %o3, %i2 ! update cnt with bytes to be moved 6176*280575beSPatrick McGehearty ! Move bytes according to source alignment 6177*280575beSPatrick McGehearty andcc %i0, 0x1, %o4 6178*280575beSPatrick McGehearty bnz %ncc, .ci_unalnbyte ! check for byte alignment 6179*280575beSPatrick McGehearty nop 6180*280575beSPatrick McGehearty andcc %i0, 2, %o4 ! check for half word alignment 6181*280575beSPatrick McGehearty bnz %ncc, .ci_unalnhalf 6182*280575beSPatrick McGehearty nop 6183*280575beSPatrick McGehearty ! Src is word aligned, move bytes until dest 64 byte aligned 6184*280575beSPatrick McGehearty.ci_unalnword: 6185*280575beSPatrick McGehearty lda [%i0]%asi, %o4 ! load 4 bytes 6186*280575beSPatrick McGehearty stw %o4, [%i1] ! and store 4 bytes 6187*280575beSPatrick McGehearty lda [%i0+4]%asi, %o4 ! load 4 bytes 6188*280575beSPatrick McGehearty add %i0, 8, %i0 ! increase src ptr by 8 6189*280575beSPatrick McGehearty stw %o4, [%i1+4] ! and store 4 bytes 6190*280575beSPatrick McGehearty subcc %o3, 8, %o3 ! decrease count by 8 6191*280575beSPatrick McGehearty bnz %ncc, .ci_unalnword 6192*280575beSPatrick McGehearty add %i1, 8, %i1 ! increase dst ptr by 8 6193*280575beSPatrick McGehearty ba .ci_unalnsrc 6194*280575beSPatrick McGehearty nop 6195*280575beSPatrick McGehearty 6196*280575beSPatrick McGehearty ! Src is half-word aligned, move bytes until dest 64 byte aligned 6197*280575beSPatrick McGehearty.ci_unalnhalf: 6198*280575beSPatrick McGehearty lduha [%i0]%asi, %o4 ! load 2 bytes 6199*280575beSPatrick McGehearty sllx %o4, 32, %i3 ! shift left 6200*280575beSPatrick McGehearty lduwa [%i0+2]%asi, %o4 6201*280575beSPatrick McGehearty or %o4, %i3, %i3 6202*280575beSPatrick McGehearty sllx %i3, 16, %i3 6203*280575beSPatrick McGehearty lduha [%i0+6]%asi, %o4 6204*280575beSPatrick McGehearty or %o4, %i3, %i3 6205*280575beSPatrick McGehearty stx %i3, [%i1] 6206*280575beSPatrick McGehearty add %i0, 8, %i0 6207*280575beSPatrick McGehearty subcc %o3, 8, %o3 6208*280575beSPatrick McGehearty bnz %ncc, .ci_unalnhalf 6209*280575beSPatrick McGehearty add %i1, 8, %i1 6210*280575beSPatrick McGehearty ba .ci_unalnsrc 6211*280575beSPatrick McGehearty nop 6212*280575beSPatrick McGehearty 6213*280575beSPatrick McGehearty ! Src is Byte aligned, move bytes until dest 64 byte aligned 6214*280575beSPatrick McGehearty.ci_unalnbyte: 6215*280575beSPatrick McGehearty sub %i1, %i0, %i1 ! share pointer advance 6216*280575beSPatrick McGehearty.ci_unalnbyte_loop: 6217*280575beSPatrick McGehearty lduba [%i0]%asi, %o4 6218*280575beSPatrick McGehearty sllx %o4, 56, %i3 6219*280575beSPatrick McGehearty lduha [%i0+1]%asi, %o4 6220*280575beSPatrick McGehearty sllx %o4, 40, %o4 6221*280575beSPatrick McGehearty or %o4, %i3, %i3 6222*280575beSPatrick McGehearty lduha [%i0+3]%asi, %o4 6223*280575beSPatrick McGehearty sllx %o4, 24, %o4 6224*280575beSPatrick McGehearty or %o4, %i3, %i3 6225*280575beSPatrick McGehearty lduha [%i0+5]%asi, %o4 6226*280575beSPatrick McGehearty sllx %o4, 8, %o4 6227*280575beSPatrick McGehearty or %o4, %i3, %i3 6228*280575beSPatrick McGehearty lduba [%i0+7]%asi, %o4 6229*280575beSPatrick McGehearty or %o4, %i3, %i3 6230*280575beSPatrick McGehearty stx %i3, [%i1+%i0] 6231*280575beSPatrick McGehearty subcc %o3, 8, %o3 6232*280575beSPatrick McGehearty bnz %ncc, .ci_unalnbyte_loop 6233*280575beSPatrick McGehearty add %i0, 8, %i0 6234*280575beSPatrick McGehearty add %i1,%i0, %i1 ! restore pointer 6235*280575beSPatrick McGehearty 6236*280575beSPatrick McGehearty ! Destination is now block (64 byte aligned), src is not 8 byte aligned 6237*280575beSPatrick McGehearty.ci_unalnsrc: 6238*280575beSPatrick McGehearty andn %i2, 0x3f, %i3 ! %i3 is multiple of block size 6239*280575beSPatrick McGehearty and %i2, 0x3f, %i2 ! residue bytes in %i2 6240*280575beSPatrick McGehearty add %i2, 64, %i2 ! Insure we don't load beyond 6241*280575beSPatrick McGehearty sub %i3, 64, %i3 ! end of source buffer 6242*280575beSPatrick McGehearty 6243*280575beSPatrick McGehearty andn %i0, 0x3f, %o4 ! %o4 has block aligned src address 6244*280575beSPatrick McGehearty prefetcha [%o4 + (3 * CACHE_LINE)]%asi, #one_read 6245*280575beSPatrick McGehearty alignaddr %i0, %g0, %g0 ! generate %gsr 6246*280575beSPatrick McGehearty add %i0, %i3, %i0 ! advance %i0 to after blocks 6247*280575beSPatrick McGehearty ! 6248*280575beSPatrick McGehearty ! Determine source alignment to correct 8 byte offset 6249*280575beSPatrick McGehearty andcc %i0, 0x20, %o3 6250*280575beSPatrick McGehearty brnz,pn %o3, .ci_unaln_1 6251*280575beSPatrick McGehearty andcc %i0, 0x10, %o3 6252*280575beSPatrick McGehearty brnz,pn %o3, .ci_unaln_01 6253*280575beSPatrick McGehearty andcc %i0, 0x08, %o3 6254*280575beSPatrick McGehearty brz,a %o3, .ci_unaln_000 6255*280575beSPatrick McGehearty prefetcha [%o4 + (4 * CACHE_LINE)]%asi, #one_read 6256*280575beSPatrick McGehearty ba .ci_unaln_001 6257*280575beSPatrick McGehearty nop 6258*280575beSPatrick McGehearty.ci_unaln_01: 6259*280575beSPatrick McGehearty brnz,a %o3, .ci_unaln_011 6260*280575beSPatrick McGehearty prefetcha [%o4 + (4 * CACHE_LINE)]%asi, #one_read 6261*280575beSPatrick McGehearty ba .ci_unaln_010 6262*280575beSPatrick McGehearty nop 6263*280575beSPatrick McGehearty.ci_unaln_1: 6264*280575beSPatrick McGehearty brnz,pn %o3, .ci_unaln_11 6265*280575beSPatrick McGehearty andcc %i0, 0x08, %o3 6266*280575beSPatrick McGehearty brnz,a %o3, .ci_unaln_101 6267*280575beSPatrick McGehearty prefetcha [%o4 + (4 * CACHE_LINE)]%asi, #one_read 6268*280575beSPatrick McGehearty ba .ci_unaln_100 6269*280575beSPatrick McGehearty nop 6270*280575beSPatrick McGehearty.ci_unaln_11: 6271*280575beSPatrick McGehearty brz,pn %o3, .ci_unaln_110 6272*280575beSPatrick McGehearty prefetcha [%i0 + (4 * CACHE_LINE)]%asi, #one_read 6273*280575beSPatrick McGehearty 6274*280575beSPatrick McGehearty.ci_unaln_111: 6275*280575beSPatrick McGehearty ldda [%o4+56]%asi, %d14 6276*280575beSPatrick McGehearty.ci_unaln_111_loop: 6277*280575beSPatrick McGehearty add %o4, 64, %o4 6278*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_AIUS, %d16 6279*280575beSPatrick McGehearty faligndata %d14, %d16, %d48 6280*280575beSPatrick McGehearty faligndata %d16, %d18, %d50 6281*280575beSPatrick McGehearty faligndata %d18, %d20, %d52 6282*280575beSPatrick McGehearty faligndata %d20, %d22, %d54 6283*280575beSPatrick McGehearty faligndata %d22, %d24, %d56 6284*280575beSPatrick McGehearty faligndata %d24, %d26, %d58 6285*280575beSPatrick McGehearty faligndata %d26, %d28, %d60 6286*280575beSPatrick McGehearty faligndata %d28, %d30, %d62 6287*280575beSPatrick McGehearty fmovd %d30, %d14 6288*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_P 6289*280575beSPatrick McGehearty subcc %i3, 64, %i3 6290*280575beSPatrick McGehearty add %i1, 64, %i1 6291*280575beSPatrick McGehearty bgu,pt %ncc, .ci_unaln_111_loop 6292*280575beSPatrick McGehearty prefetcha [%o4 + (4 * CACHE_LINE)]%asi, #one_read 6293*280575beSPatrick McGehearty ba .ci_unaln_done 6294*280575beSPatrick McGehearty nop 6295*280575beSPatrick McGehearty 6296*280575beSPatrick McGehearty.ci_unaln_110: 6297*280575beSPatrick McGehearty ldda [%o4+48]%asi, %d12 6298*280575beSPatrick McGehearty ldda [%o4+56]%asi, %d14 6299*280575beSPatrick McGehearty.ci_unaln_110_loop: 6300*280575beSPatrick McGehearty add %o4, 64, %o4 6301*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_AIUS, %d16 6302*280575beSPatrick McGehearty faligndata %d12, %d14, %d48 6303*280575beSPatrick McGehearty faligndata %d14, %d16, %d50 6304*280575beSPatrick McGehearty faligndata %d16, %d18, %d52 6305*280575beSPatrick McGehearty faligndata %d18, %d20, %d54 6306*280575beSPatrick McGehearty faligndata %d20, %d22, %d56 6307*280575beSPatrick McGehearty faligndata %d22, %d24, %d58 6308*280575beSPatrick McGehearty faligndata %d24, %d26, %d60 6309*280575beSPatrick McGehearty faligndata %d26, %d28, %d62 6310*280575beSPatrick McGehearty fmovd %d28, %d12 6311*280575beSPatrick McGehearty fmovd %d30, %d14 6312*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_P 6313*280575beSPatrick McGehearty subcc %i3, 64, %i3 6314*280575beSPatrick McGehearty add %i1, 64, %i1 6315*280575beSPatrick McGehearty bgu,pt %ncc, .ci_unaln_110_loop 6316*280575beSPatrick McGehearty prefetcha [%o4 + (4 * CACHE_LINE)]%asi, #one_read 6317*280575beSPatrick McGehearty ba .ci_unaln_done 6318*280575beSPatrick McGehearty nop 6319*280575beSPatrick McGehearty 6320*280575beSPatrick McGehearty.ci_unaln_101: 6321*280575beSPatrick McGehearty ldda [%o4+40]%asi, %d10 6322*280575beSPatrick McGehearty ldda [%o4+48]%asi, %d12 6323*280575beSPatrick McGehearty ldda [%o4+56]%asi, %d14 6324*280575beSPatrick McGehearty.ci_unaln_101_loop: 6325*280575beSPatrick McGehearty add %o4, 64, %o4 6326*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_AIUS, %d16 6327*280575beSPatrick McGehearty faligndata %d10, %d12, %d48 6328*280575beSPatrick McGehearty faligndata %d12, %d14, %d50 6329*280575beSPatrick McGehearty faligndata %d14, %d16, %d52 6330*280575beSPatrick McGehearty faligndata %d16, %d18, %d54 6331*280575beSPatrick McGehearty faligndata %d18, %d20, %d56 6332*280575beSPatrick McGehearty faligndata %d20, %d22, %d58 6333*280575beSPatrick McGehearty faligndata %d22, %d24, %d60 6334*280575beSPatrick McGehearty faligndata %d24, %d26, %d62 6335*280575beSPatrick McGehearty fmovd %d26, %d10 6336*280575beSPatrick McGehearty fmovd %d28, %d12 6337*280575beSPatrick McGehearty fmovd %d30, %d14 6338*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_P 6339*280575beSPatrick McGehearty subcc %i3, 64, %i3 6340*280575beSPatrick McGehearty add %i1, 64, %i1 6341*280575beSPatrick McGehearty bgu,pt %ncc, .ci_unaln_101_loop 6342*280575beSPatrick McGehearty prefetcha [%o4 + (4 * CACHE_LINE)]%asi, #one_read 6343*280575beSPatrick McGehearty ba .ci_unaln_done 6344*280575beSPatrick McGehearty nop 6345*280575beSPatrick McGehearty 6346*280575beSPatrick McGehearty.ci_unaln_100: 6347*280575beSPatrick McGehearty ldda [%o4+32]%asi, %d8 6348*280575beSPatrick McGehearty ldda [%o4+40]%asi, %d10 6349*280575beSPatrick McGehearty ldda [%o4+48]%asi, %d12 6350*280575beSPatrick McGehearty ldda [%o4+56]%asi, %d14 6351*280575beSPatrick McGehearty.ci_unaln_100_loop: 6352*280575beSPatrick McGehearty add %o4, 64, %o4 6353*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_AIUS, %d16 6354*280575beSPatrick McGehearty faligndata %d8, %d10, %d48 6355*280575beSPatrick McGehearty faligndata %d10, %d12, %d50 6356*280575beSPatrick McGehearty faligndata %d12, %d14, %d52 6357*280575beSPatrick McGehearty faligndata %d14, %d16, %d54 6358*280575beSPatrick McGehearty faligndata %d16, %d18, %d56 6359*280575beSPatrick McGehearty faligndata %d18, %d20, %d58 6360*280575beSPatrick McGehearty faligndata %d20, %d22, %d60 6361*280575beSPatrick McGehearty faligndata %d22, %d24, %d62 6362*280575beSPatrick McGehearty fmovd %d24, %d8 6363*280575beSPatrick McGehearty fmovd %d26, %d10 6364*280575beSPatrick McGehearty fmovd %d28, %d12 6365*280575beSPatrick McGehearty fmovd %d30, %d14 6366*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_P 6367*280575beSPatrick McGehearty subcc %i3, 64, %i3 6368*280575beSPatrick McGehearty add %i1, 64, %i1 6369*280575beSPatrick McGehearty bgu,pt %ncc, .ci_unaln_100_loop 6370*280575beSPatrick McGehearty prefetcha [%o4 + (4 * CACHE_LINE)]%asi, #one_read 6371*280575beSPatrick McGehearty ba .ci_unaln_done 6372*280575beSPatrick McGehearty nop 6373*280575beSPatrick McGehearty 6374*280575beSPatrick McGehearty.ci_unaln_011: 6375*280575beSPatrick McGehearty ldda [%o4+24]%asi, %d6 6376*280575beSPatrick McGehearty ldda [%o4+32]%asi, %d8 6377*280575beSPatrick McGehearty ldda [%o4+40]%asi, %d10 6378*280575beSPatrick McGehearty ldda [%o4+48]%asi, %d12 6379*280575beSPatrick McGehearty ldda [%o4+56]%asi, %d14 6380*280575beSPatrick McGehearty.ci_unaln_011_loop: 6381*280575beSPatrick McGehearty add %o4, 64, %o4 6382*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_AIUS, %d16 6383*280575beSPatrick McGehearty faligndata %d6, %d8, %d48 6384*280575beSPatrick McGehearty faligndata %d8, %d10, %d50 6385*280575beSPatrick McGehearty faligndata %d10, %d12, %d52 6386*280575beSPatrick McGehearty faligndata %d12, %d14, %d54 6387*280575beSPatrick McGehearty faligndata %d14, %d16, %d56 6388*280575beSPatrick McGehearty faligndata %d16, %d18, %d58 6389*280575beSPatrick McGehearty faligndata %d18, %d20, %d60 6390*280575beSPatrick McGehearty faligndata %d20, %d22, %d62 6391*280575beSPatrick McGehearty fmovd %d22, %d6 6392*280575beSPatrick McGehearty fmovd %d24, %d8 6393*280575beSPatrick McGehearty fmovd %d26, %d10 6394*280575beSPatrick McGehearty fmovd %d28, %d12 6395*280575beSPatrick McGehearty fmovd %d30, %d14 6396*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_P 6397*280575beSPatrick McGehearty subcc %i3, 64, %i3 6398*280575beSPatrick McGehearty add %i1, 64, %i1 6399*280575beSPatrick McGehearty bgu,pt %ncc, .ci_unaln_011_loop 6400*280575beSPatrick McGehearty prefetcha [%o4 + (4 * CACHE_LINE)]%asi, #one_read 6401*280575beSPatrick McGehearty ba .ci_unaln_done 6402*280575beSPatrick McGehearty nop 6403*280575beSPatrick McGehearty 6404*280575beSPatrick McGehearty.ci_unaln_010: 6405*280575beSPatrick McGehearty ldda [%o4+16]%asi, %d4 6406*280575beSPatrick McGehearty ldda [%o4+24]%asi, %d6 6407*280575beSPatrick McGehearty ldda [%o4+32]%asi, %d8 6408*280575beSPatrick McGehearty ldda [%o4+40]%asi, %d10 6409*280575beSPatrick McGehearty ldda [%o4+48]%asi, %d12 6410*280575beSPatrick McGehearty ldda [%o4+56]%asi, %d14 6411*280575beSPatrick McGehearty.ci_unaln_010_loop: 6412*280575beSPatrick McGehearty add %o4, 64, %o4 6413*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_AIUS, %d16 6414*280575beSPatrick McGehearty faligndata %d4, %d6, %d48 6415*280575beSPatrick McGehearty faligndata %d6, %d8, %d50 6416*280575beSPatrick McGehearty faligndata %d8, %d10, %d52 6417*280575beSPatrick McGehearty faligndata %d10, %d12, %d54 6418*280575beSPatrick McGehearty faligndata %d12, %d14, %d56 6419*280575beSPatrick McGehearty faligndata %d14, %d16, %d58 6420*280575beSPatrick McGehearty faligndata %d16, %d18, %d60 6421*280575beSPatrick McGehearty faligndata %d18, %d20, %d62 6422*280575beSPatrick McGehearty fmovd %d20, %d4 6423*280575beSPatrick McGehearty fmovd %d22, %d6 6424*280575beSPatrick McGehearty fmovd %d24, %d8 6425*280575beSPatrick McGehearty fmovd %d26, %d10 6426*280575beSPatrick McGehearty fmovd %d28, %d12 6427*280575beSPatrick McGehearty fmovd %d30, %d14 6428*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_P 6429*280575beSPatrick McGehearty subcc %i3, 64, %i3 6430*280575beSPatrick McGehearty add %i1, 64, %i1 6431*280575beSPatrick McGehearty bgu,pt %ncc, .ci_unaln_010_loop 6432*280575beSPatrick McGehearty prefetcha [%o4 + (4 * CACHE_LINE)]%asi, #one_read 6433*280575beSPatrick McGehearty ba .ci_unaln_done 6434*280575beSPatrick McGehearty nop 6435*280575beSPatrick McGehearty 6436*280575beSPatrick McGehearty.ci_unaln_001: 6437*280575beSPatrick McGehearty ldda [%o4+8]%asi, %d2 6438*280575beSPatrick McGehearty ldda [%o4+16]%asi, %d4 6439*280575beSPatrick McGehearty ldda [%o4+24]%asi, %d6 6440*280575beSPatrick McGehearty ldda [%o4+32]%asi, %d8 6441*280575beSPatrick McGehearty ldda [%o4+40]%asi, %d10 6442*280575beSPatrick McGehearty ldda [%o4+48]%asi, %d12 6443*280575beSPatrick McGehearty ldda [%o4+56]%asi, %d14 6444*280575beSPatrick McGehearty.ci_unaln_001_loop: 6445*280575beSPatrick McGehearty add %o4, 64, %o4 6446*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_AIUS, %d16 6447*280575beSPatrick McGehearty faligndata %d2, %d4, %d48 6448*280575beSPatrick McGehearty faligndata %d4, %d6, %d50 6449*280575beSPatrick McGehearty faligndata %d6, %d8, %d52 6450*280575beSPatrick McGehearty faligndata %d8, %d10, %d54 6451*280575beSPatrick McGehearty faligndata %d10, %d12, %d56 6452*280575beSPatrick McGehearty faligndata %d12, %d14, %d58 6453*280575beSPatrick McGehearty faligndata %d14, %d16, %d60 6454*280575beSPatrick McGehearty faligndata %d16, %d18, %d62 6455*280575beSPatrick McGehearty fmovd %d18, %d2 6456*280575beSPatrick McGehearty fmovd %d20, %d4 6457*280575beSPatrick McGehearty fmovd %d22, %d6 6458*280575beSPatrick McGehearty fmovd %d24, %d8 6459*280575beSPatrick McGehearty fmovd %d26, %d10 6460*280575beSPatrick McGehearty fmovd %d28, %d12 6461*280575beSPatrick McGehearty fmovd %d30, %d14 6462*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_P 6463*280575beSPatrick McGehearty subcc %i3, 64, %i3 6464*280575beSPatrick McGehearty add %i1, 64, %i1 6465*280575beSPatrick McGehearty bgu,pt %ncc, .ci_unaln_001_loop 6466*280575beSPatrick McGehearty prefetcha [%o4 + (4 * CACHE_LINE)]%asi, #one_read 6467*280575beSPatrick McGehearty ba .ci_unaln_done 6468*280575beSPatrick McGehearty nop 6469*280575beSPatrick McGehearty 6470*280575beSPatrick McGehearty.ci_unaln_000: 6471*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_AIUS, %d0 6472*280575beSPatrick McGehearty.ci_unaln_000_loop: 6473*280575beSPatrick McGehearty add %o4, 64, %o4 6474*280575beSPatrick McGehearty ldda [%o4]ASI_BLK_AIUS, %d16 6475*280575beSPatrick McGehearty faligndata %d0, %d2, %d48 6476*280575beSPatrick McGehearty faligndata %d2, %d4, %d50 6477*280575beSPatrick McGehearty faligndata %d4, %d6, %d52 6478*280575beSPatrick McGehearty faligndata %d6, %d8, %d54 6479*280575beSPatrick McGehearty faligndata %d8, %d10, %d56 6480*280575beSPatrick McGehearty faligndata %d10, %d12, %d58 6481*280575beSPatrick McGehearty faligndata %d12, %d14, %d60 6482*280575beSPatrick McGehearty faligndata %d14, %d16, %d62 6483*280575beSPatrick McGehearty fmovd %d16, %d0 6484*280575beSPatrick McGehearty fmovd %d18, %d2 6485*280575beSPatrick McGehearty fmovd %d20, %d4 6486*280575beSPatrick McGehearty fmovd %d22, %d6 6487*280575beSPatrick McGehearty fmovd %d24, %d8 6488*280575beSPatrick McGehearty fmovd %d26, %d10 6489*280575beSPatrick McGehearty fmovd %d28, %d12 6490*280575beSPatrick McGehearty fmovd %d30, %d14 6491*280575beSPatrick McGehearty stda %d48, [%i1]ASI_BLK_P 6492*280575beSPatrick McGehearty subcc %i3, 64, %i3 6493*280575beSPatrick McGehearty add %i1, 64, %i1 6494*280575beSPatrick McGehearty bgu,pt %ncc, .ci_unaln_000_loop 6495*280575beSPatrick McGehearty prefetcha [%o4 + (4 * CACHE_LINE)]%asi, #one_read 6496*280575beSPatrick McGehearty 6497*280575beSPatrick McGehearty.ci_unaln_done: 6498*280575beSPatrick McGehearty ! Handle trailing bytes, 64 to 127 6499*280575beSPatrick McGehearty ! Dest long word aligned, Src not long word aligned 6500*280575beSPatrick McGehearty cmp %i2, 15 6501*280575beSPatrick McGehearty bleu %ncc, .ci_unaln_short 6502*280575beSPatrick McGehearty 6503*280575beSPatrick McGehearty andn %i2, 0x7, %i3 ! %i3 is multiple of 8 6504*280575beSPatrick McGehearty and %i2, 0x7, %i2 ! residue bytes in %i2 6505*280575beSPatrick McGehearty add %i2, 8, %i2 6506*280575beSPatrick McGehearty sub %i3, 8, %i3 ! insure we don't load past end of src 6507*280575beSPatrick McGehearty andn %i0, 0x7, %o4 ! %o4 has long word aligned src address 6508*280575beSPatrick McGehearty add %i0, %i3, %i0 ! advance %i0 to after multiple of 8 6509*280575beSPatrick McGehearty ldda [%o4]%asi, %d0 ! fetch partial word 6510*280575beSPatrick McGehearty.ci_unaln_by8: 6511*280575beSPatrick McGehearty ldda [%o4+8]%asi, %d2 6512*280575beSPatrick McGehearty add %o4, 8, %o4 6513*280575beSPatrick McGehearty faligndata %d0, %d2, %d16 6514*280575beSPatrick McGehearty subcc %i3, 8, %i3 6515*280575beSPatrick McGehearty std %d16, [%i1] 6516*280575beSPatrick McGehearty fmovd %d2, %d0 6517*280575beSPatrick McGehearty bgu,pt %ncc, .ci_unaln_by8 6518*280575beSPatrick McGehearty add %i1, 8, %i1 6519*280575beSPatrick McGehearty 6520*280575beSPatrick McGehearty.ci_unaln_short: 6521*280575beSPatrick McGehearty cmp %i2, 8 6522*280575beSPatrick McGehearty blt,pt %ncc, .ci_unalnfin 6523*280575beSPatrick McGehearty nop 6524*280575beSPatrick McGehearty lduba [%i0]%asi, %o4 6525*280575beSPatrick McGehearty sll %o4, 24, %o3 6526*280575beSPatrick McGehearty lduba [%i0+1]%asi, %o4 6527*280575beSPatrick McGehearty sll %o4, 16, %o4 6528*280575beSPatrick McGehearty or %o4, %o3, %o3 6529*280575beSPatrick McGehearty lduba [%i0+2]%asi, %o4 6530*280575beSPatrick McGehearty sll %o4, 8, %o4 6531*280575beSPatrick McGehearty or %o4, %o3, %o3 6532*280575beSPatrick McGehearty lduba [%i0+3]%asi, %o4 6533*280575beSPatrick McGehearty or %o4, %o3, %o3 6534*280575beSPatrick McGehearty stw %o3, [%i1] 6535*280575beSPatrick McGehearty lduba [%i0+4]%asi, %o4 6536*280575beSPatrick McGehearty sll %o4, 24, %o3 6537*280575beSPatrick McGehearty lduba [%i0+5]%asi, %o4 6538*280575beSPatrick McGehearty sll %o4, 16, %o4 6539*280575beSPatrick McGehearty or %o4, %o3, %o3 6540*280575beSPatrick McGehearty lduba [%i0+6]%asi, %o4 6541*280575beSPatrick McGehearty sll %o4, 8, %o4 6542*280575beSPatrick McGehearty or %o4, %o3, %o3 6543*280575beSPatrick McGehearty lduba [%i0+7]%asi, %o4 6544*280575beSPatrick McGehearty or %o4, %o3, %o3 6545*280575beSPatrick McGehearty stw %o3, [%i1+4] 6546*280575beSPatrick McGehearty add %i0, 8, %i0 6547*280575beSPatrick McGehearty add %i1, 8, %i1 6548*280575beSPatrick McGehearty sub %i2, 8, %i2 6549*280575beSPatrick McGehearty.ci_unalnfin: 6550*280575beSPatrick McGehearty cmp %i2, 4 6551*280575beSPatrick McGehearty blt,pt %ncc, .ci_unalnz 6552*280575beSPatrick McGehearty tst %i2 6553*280575beSPatrick McGehearty lduba [%i0]%asi, %o3 ! read byte 6554*280575beSPatrick McGehearty subcc %i2, 4, %i2 ! reduce count by 4 6555*280575beSPatrick McGehearty sll %o3, 24, %o3 ! position 6556*280575beSPatrick McGehearty lduba [%i0+1]%asi, %o4 6557*280575beSPatrick McGehearty sll %o4, 16, %o4 ! position 6558*280575beSPatrick McGehearty or %o4, %o3, %o3 ! merge 6559*280575beSPatrick McGehearty lduba [%i0+2]%asi, %o4 6560*280575beSPatrick McGehearty sll %o4, 8, %o4 ! position 6561*280575beSPatrick McGehearty or %o4, %o3, %o3 ! merge 6562*280575beSPatrick McGehearty add %i1, 4, %i1 ! advance dst by 4 6563*280575beSPatrick McGehearty lduba [%i0+3]%asi, %o4 6564*280575beSPatrick McGehearty add %i0, 4, %i0 ! advance src by 4 6565*280575beSPatrick McGehearty or %o4, %o3, %o4 ! merge 6566*280575beSPatrick McGehearty bnz,pt %ncc, .ci_unaln3x 6567*280575beSPatrick McGehearty stw %o4, [%i1-4] 6568*280575beSPatrick McGehearty ba .ci_exit 6569*280575beSPatrick McGehearty nop 6570*280575beSPatrick McGehearty.ci_unalnz: 6571*280575beSPatrick McGehearty bz,pt %ncc, .ci_exit 6572*280575beSPatrick McGehearty wr %l5, %g0, %gsr ! restore %gsr 6573*280575beSPatrick McGehearty.ci_unaln3x: ! Exactly 1, 2, or 3 bytes remain 6574*280575beSPatrick McGehearty subcc %i2, 1, %i2 ! reduce count for cc test 6575*280575beSPatrick McGehearty lduba [%i0]%asi, %o4 ! load one byte 6576*280575beSPatrick McGehearty bz,pt %ncc, .ci_exit 6577*280575beSPatrick McGehearty stb %o4, [%i1] ! store one byte 6578*280575beSPatrick McGehearty lduba [%i0+1]%asi, %o4 ! load second byte 6579*280575beSPatrick McGehearty subcc %i2, 1, %i2 6580*280575beSPatrick McGehearty bz,pt %ncc, .ci_exit 6581*280575beSPatrick McGehearty stb %o4, [%i1+1] ! store second byte 6582*280575beSPatrick McGehearty lduba [%i0+2]%asi, %o4 ! load third byte 6583*280575beSPatrick McGehearty stb %o4, [%i1+2] ! store third byte 6584*280575beSPatrick McGehearty.ci_exit: 6585*280575beSPatrick McGehearty brnz %g1, .ci_fp_restore 6586*280575beSPatrick McGehearty nop 6587*280575beSPatrick McGehearty FZERO 6588*280575beSPatrick McGehearty wr %g1, %g0, %fprs 6589*280575beSPatrick McGehearty ba,pt %ncc, .ci_ex2 6590*280575beSPatrick McGehearty membar #Sync 6591*280575beSPatrick McGehearty.ci_fp_restore: 6592*280575beSPatrick McGehearty BLD_FP_FROMSTACK(%o4) 6593*280575beSPatrick McGehearty.ci_ex2: 6594*280575beSPatrick McGehearty andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 6595*280575beSPatrick McGehearty stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 6596*280575beSPatrick McGehearty ret 6597*280575beSPatrick McGehearty restore %g0, 0, %o0 6598*280575beSPatrick McGehearty 6599*280575beSPatrick McGehearty.copyin_err: 6600*280575beSPatrick McGehearty ldn [THREAD_REG + T_COPYOPS], %o4 6601*280575beSPatrick McGehearty brz %o4, 2f 6602*280575beSPatrick McGehearty nop 6603*280575beSPatrick McGehearty ldn [%o4 + CP_COPYIN], %g2 6604*280575beSPatrick McGehearty jmp %g2 6605*280575beSPatrick McGehearty nop 6606*280575beSPatrick McGehearty2: 6607*280575beSPatrick McGehearty retl 6608*280575beSPatrick McGehearty mov -1, %o0 6609*280575beSPatrick McGehearty 6610*280575beSPatrick McGehearty#else /* NIAGARA_IMPL */ 66117c478bd9Sstevel@tonic-gate.do_copyin: 66127c478bd9Sstevel@tonic-gate ! 66137c478bd9Sstevel@tonic-gate ! Check the length and bail if zero. 66147c478bd9Sstevel@tonic-gate ! 66157c478bd9Sstevel@tonic-gate tst %o2 66167c478bd9Sstevel@tonic-gate bnz,pt %ncc, 1f 66177c478bd9Sstevel@tonic-gate nop 66187c478bd9Sstevel@tonic-gate retl 66197c478bd9Sstevel@tonic-gate clr %o0 66207c478bd9Sstevel@tonic-gate1: 66217c478bd9Sstevel@tonic-gate sethi %hi(copyio_fault), %o4 66227c478bd9Sstevel@tonic-gate or %o4, %lo(copyio_fault), %o4 66237c478bd9Sstevel@tonic-gate sethi %hi(copyio_fault_nowindow), %o3 66247c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT 66257c478bd9Sstevel@tonic-gate or %o3, %lo(copyio_fault_nowindow), %o3 66267c478bd9Sstevel@tonic-gate membar #Sync 66277c478bd9Sstevel@tonic-gate stn %o3, [THREAD_REG + T_LOFAULT] 66287c478bd9Sstevel@tonic-gate 66297c478bd9Sstevel@tonic-gate mov %o0, SAVE_SRC 66307c478bd9Sstevel@tonic-gate mov %o1, SAVE_DST 66317c478bd9Sstevel@tonic-gate mov %o2, SAVE_COUNT 66327c478bd9Sstevel@tonic-gate 66337c478bd9Sstevel@tonic-gate ! 66347c478bd9Sstevel@tonic-gate ! Check to see if we're more than SMALL_LIMIT. 66357c478bd9Sstevel@tonic-gate ! 66367c478bd9Sstevel@tonic-gate subcc %o2, SMALL_LIMIT, %o3 66377c478bd9Sstevel@tonic-gate bgu,a,pt %ncc, .dci_ns 66387c478bd9Sstevel@tonic-gate or %o0, %o1, %o3 66397c478bd9Sstevel@tonic-gate ! 66407c478bd9Sstevel@tonic-gate ! What was previously ".small_copyin" 66417c478bd9Sstevel@tonic-gate ! 66427c478bd9Sstevel@tonic-gate.dcibcp: 66437c478bd9Sstevel@tonic-gate sub %g0, %o2, %o3 ! setup for copy loop 66447c478bd9Sstevel@tonic-gate add %o0, %o2, %o0 66457c478bd9Sstevel@tonic-gate add %o1, %o2, %o1 66467c478bd9Sstevel@tonic-gate ba,pt %ncc, .dcicl 66477c478bd9Sstevel@tonic-gate lduba [%o0 + %o3]ASI_USER, %o4 66487c478bd9Sstevel@tonic-gate ! 66497c478bd9Sstevel@tonic-gate ! %o0 and %o1 point at the end and remain pointing at the end 66507c478bd9Sstevel@tonic-gate ! of their buffers. We pull things out by adding %o3 (which is 66517c478bd9Sstevel@tonic-gate ! the negation of the length) to the buffer end which gives us 66527c478bd9Sstevel@tonic-gate ! the curent location in the buffers. By incrementing %o3 we walk 66537c478bd9Sstevel@tonic-gate ! through both buffers without having to bump each buffer's 66547c478bd9Sstevel@tonic-gate ! pointer. A very fast 4 instruction loop. 66557c478bd9Sstevel@tonic-gate ! 66567c478bd9Sstevel@tonic-gate .align 16 66577c478bd9Sstevel@tonic-gate.dcicl: 66587c478bd9Sstevel@tonic-gate stb %o4, [%o1 + %o3] 66597c478bd9Sstevel@tonic-gate inccc %o3 66607c478bd9Sstevel@tonic-gate bl,a,pt %ncc, .dcicl 66617c478bd9Sstevel@tonic-gate lduba [%o0 + %o3]ASI_USER, %o4 66627c478bd9Sstevel@tonic-gate ! 66637c478bd9Sstevel@tonic-gate ! We're done. Go home. 66647c478bd9Sstevel@tonic-gate ! 66657c478bd9Sstevel@tonic-gate membar #Sync 66667c478bd9Sstevel@tonic-gate stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 66677c478bd9Sstevel@tonic-gate retl 66687c478bd9Sstevel@tonic-gate clr %o0 66697c478bd9Sstevel@tonic-gate ! 66707c478bd9Sstevel@tonic-gate ! Try aligned copies from here. 66717c478bd9Sstevel@tonic-gate ! 66727c478bd9Sstevel@tonic-gate.dci_ns: 66737c478bd9Sstevel@tonic-gate ! 66747c478bd9Sstevel@tonic-gate ! See if we're single byte aligned. If we are, check the 66757c478bd9Sstevel@tonic-gate ! limit for single byte copies. If we're smaller, or equal, 66767c478bd9Sstevel@tonic-gate ! bounce to the byte for byte copy loop. Otherwise do it in 66777c478bd9Sstevel@tonic-gate ! HW (if enabled). 66787c478bd9Sstevel@tonic-gate ! 66797c478bd9Sstevel@tonic-gate btst 1, %o3 66807c478bd9Sstevel@tonic-gate bz,a,pt %icc, .dcih8 66817c478bd9Sstevel@tonic-gate btst 7, %o3 66827c478bd9Sstevel@tonic-gate ! 66837c478bd9Sstevel@tonic-gate ! We're single byte aligned. 66847c478bd9Sstevel@tonic-gate ! 66857c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 66867c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 66877c478bd9Sstevel@tonic-gate ! 66887c478bd9Sstevel@tonic-gate ! Is HW copy on? If not do everything byte for byte. 66897c478bd9Sstevel@tonic-gate ! 66907c478bd9Sstevel@tonic-gate tst %o3 66917c478bd9Sstevel@tonic-gate bz,pn %icc, .dcibcp 66927c478bd9Sstevel@tonic-gate subcc %o3, %o2, %o3 66937c478bd9Sstevel@tonic-gate ! 66947c478bd9Sstevel@tonic-gate ! Are we bigger than the HW limit? If not 66957c478bd9Sstevel@tonic-gate ! go to byte for byte. 66967c478bd9Sstevel@tonic-gate ! 66977c478bd9Sstevel@tonic-gate bge,pt %ncc, .dcibcp 66987c478bd9Sstevel@tonic-gate nop 66997c478bd9Sstevel@tonic-gate ! 67007c478bd9Sstevel@tonic-gate ! We're big enough and copy is on. Do it with HW. 67017c478bd9Sstevel@tonic-gate ! 67027c478bd9Sstevel@tonic-gate ba,pt %ncc, .big_copyin 67037c478bd9Sstevel@tonic-gate nop 67047c478bd9Sstevel@tonic-gate.dcih8: 67057c478bd9Sstevel@tonic-gate ! 67067c478bd9Sstevel@tonic-gate ! 8 byte aligned? 67077c478bd9Sstevel@tonic-gate ! 67087c478bd9Sstevel@tonic-gate bnz,a %ncc, .dcih4 67097c478bd9Sstevel@tonic-gate btst 3, %o3 67107c478bd9Sstevel@tonic-gate ! 67117c478bd9Sstevel@tonic-gate ! We're eight byte aligned. 67127c478bd9Sstevel@tonic-gate ! 67137c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 67147c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 67157c478bd9Sstevel@tonic-gate ! 67167c478bd9Sstevel@tonic-gate ! Is HW assist on? If not, do it with the aligned copy. 67177c478bd9Sstevel@tonic-gate ! 67187c478bd9Sstevel@tonic-gate tst %o3 67197c478bd9Sstevel@tonic-gate bz,pn %icc, .dcis8 67207c478bd9Sstevel@tonic-gate subcc %o3, %o2, %o3 67217c478bd9Sstevel@tonic-gate bge %ncc, .dcis8 67227c478bd9Sstevel@tonic-gate nop 67237c478bd9Sstevel@tonic-gate ba,pt %ncc, .big_copyin 67247c478bd9Sstevel@tonic-gate nop 67257c478bd9Sstevel@tonic-gate.dcis8: 67267c478bd9Sstevel@tonic-gate ! 67277c478bd9Sstevel@tonic-gate ! Housekeeping for copy loops. Uses same idea as in the byte for 67287c478bd9Sstevel@tonic-gate ! byte copy loop above. 67297c478bd9Sstevel@tonic-gate ! 67307c478bd9Sstevel@tonic-gate add %o0, %o2, %o0 67317c478bd9Sstevel@tonic-gate add %o1, %o2, %o1 67327c478bd9Sstevel@tonic-gate sub %g0, %o2, %o3 67337c478bd9Sstevel@tonic-gate ba,pt %ncc, .didebc 67347c478bd9Sstevel@tonic-gate srl %o2, 3, %o2 ! Number of 8 byte chunks to copy 67357c478bd9Sstevel@tonic-gate ! 67367c478bd9Sstevel@tonic-gate ! 4 byte aligned? 67377c478bd9Sstevel@tonic-gate ! 67387c478bd9Sstevel@tonic-gate.dcih4: 67397c478bd9Sstevel@tonic-gate bnz %ncc, .dcih2 67407c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 67417c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 67427c478bd9Sstevel@tonic-gate ! 67437c478bd9Sstevel@tonic-gate ! Is HW assist on? If not, do it with the aligned copy. 67447c478bd9Sstevel@tonic-gate ! 67457c478bd9Sstevel@tonic-gate tst %o3 67467c478bd9Sstevel@tonic-gate bz,pn %icc, .dcis4 67477c478bd9Sstevel@tonic-gate subcc %o3, %o2, %o3 67487c478bd9Sstevel@tonic-gate ! 67497c478bd9Sstevel@tonic-gate ! We're negative if our size is less than or equal to hw_copy_limit_4. 67507c478bd9Sstevel@tonic-gate ! 67517c478bd9Sstevel@tonic-gate bge %ncc, .dcis4 67527c478bd9Sstevel@tonic-gate nop 67537c478bd9Sstevel@tonic-gate ba,pt %ncc, .big_copyin 67547c478bd9Sstevel@tonic-gate nop 67557c478bd9Sstevel@tonic-gate.dcis4: 67567c478bd9Sstevel@tonic-gate ! 67577c478bd9Sstevel@tonic-gate ! Housekeeping for copy loops. Uses same idea as in the byte 67587c478bd9Sstevel@tonic-gate ! for byte copy loop above. 67597c478bd9Sstevel@tonic-gate ! 67607c478bd9Sstevel@tonic-gate add %o0, %o2, %o0 67617c478bd9Sstevel@tonic-gate add %o1, %o2, %o1 67627c478bd9Sstevel@tonic-gate sub %g0, %o2, %o3 67637c478bd9Sstevel@tonic-gate ba,pt %ncc, .didfbc 67647c478bd9Sstevel@tonic-gate srl %o2, 2, %o2 ! Number of 4 byte chunks to copy 67657c478bd9Sstevel@tonic-gate.dcih2: 67667c478bd9Sstevel@tonic-gate ! 67677c478bd9Sstevel@tonic-gate ! We're two byte aligned. Check for "smallness" 67687c478bd9Sstevel@tonic-gate ! done in delay at .dcih4 67697c478bd9Sstevel@tonic-gate ! 67707c478bd9Sstevel@tonic-gate bleu,pt %ncc, .dcis2 67717c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 67727c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 67737c478bd9Sstevel@tonic-gate ! 67747c478bd9Sstevel@tonic-gate ! Is HW assist on? If not, do it with the aligned copy. 67757c478bd9Sstevel@tonic-gate ! 67767c478bd9Sstevel@tonic-gate tst %o3 67777c478bd9Sstevel@tonic-gate bz,pn %icc, .dcis2 67787c478bd9Sstevel@tonic-gate subcc %o3, %o2, %o3 67797c478bd9Sstevel@tonic-gate ! 67807c478bd9Sstevel@tonic-gate ! Are we larger than the HW limit? 67817c478bd9Sstevel@tonic-gate ! 67827c478bd9Sstevel@tonic-gate bge %ncc, .dcis2 67837c478bd9Sstevel@tonic-gate nop 67847c478bd9Sstevel@tonic-gate ! 67857c478bd9Sstevel@tonic-gate ! HW assist is on and we're large enough to use it. 67867c478bd9Sstevel@tonic-gate ! 67877c478bd9Sstevel@tonic-gate ba,pt %ncc, .big_copyin 67887c478bd9Sstevel@tonic-gate nop 67897c478bd9Sstevel@tonic-gate ! 67907c478bd9Sstevel@tonic-gate ! Housekeeping for copy loops. Uses same idea as in the byte 67917c478bd9Sstevel@tonic-gate ! for byte copy loop above. 67927c478bd9Sstevel@tonic-gate ! 67937c478bd9Sstevel@tonic-gate.dcis2: 67947c478bd9Sstevel@tonic-gate add %o0, %o2, %o0 67957c478bd9Sstevel@tonic-gate add %o1, %o2, %o1 67967c478bd9Sstevel@tonic-gate sub %g0, %o2, %o3 67977c478bd9Sstevel@tonic-gate ba,pt %ncc, .didtbc 67987c478bd9Sstevel@tonic-gate srl %o2, 1, %o2 ! Number of 2 byte chunks to copy 67997c478bd9Sstevel@tonic-gate ! 68007c478bd9Sstevel@tonic-gate.small_copyin: 68017c478bd9Sstevel@tonic-gate ! 68027c478bd9Sstevel@tonic-gate ! Why are we doing this AGAIN? There are certain conditions in 68037c478bd9Sstevel@tonic-gate ! big copyin that will cause us to forgo the HW assisted copys 68047c478bd9Sstevel@tonic-gate ! and bounce back to a non-hw assisted copy. This dispatches 68057c478bd9Sstevel@tonic-gate ! those copies. Note that we branch around this in the main line 68067c478bd9Sstevel@tonic-gate ! code. 68077c478bd9Sstevel@tonic-gate ! 68087c478bd9Sstevel@tonic-gate ! We make no check for limits or HW enablement here. We've 68097c478bd9Sstevel@tonic-gate ! already been told that we're a poster child so just go off 68107c478bd9Sstevel@tonic-gate ! and do it. 68117c478bd9Sstevel@tonic-gate ! 68127c478bd9Sstevel@tonic-gate or %o0, %o1, %o3 68137c478bd9Sstevel@tonic-gate btst 1, %o3 68147c478bd9Sstevel@tonic-gate bnz %icc, .dcibcp ! Most likely 68157c478bd9Sstevel@tonic-gate btst 7, %o3 68167c478bd9Sstevel@tonic-gate bz %icc, .dcis8 68177c478bd9Sstevel@tonic-gate btst 3, %o3 68187c478bd9Sstevel@tonic-gate bz %icc, .dcis4 68197c478bd9Sstevel@tonic-gate nop 68207c478bd9Sstevel@tonic-gate ba,pt %ncc, .dcis2 68217c478bd9Sstevel@tonic-gate nop 68227c478bd9Sstevel@tonic-gate ! 68237c478bd9Sstevel@tonic-gate ! Eight byte aligned copies. A steal from the original .small_copyin 68247c478bd9Sstevel@tonic-gate ! with modifications. %o2 is number of 8 byte chunks to copy. When 68257c478bd9Sstevel@tonic-gate ! done, we examine %o3. If this is < 0, we have 1 - 7 bytes more 68267c478bd9Sstevel@tonic-gate ! to copy. 68277c478bd9Sstevel@tonic-gate ! 68287c478bd9Sstevel@tonic-gate .align 32 68297c478bd9Sstevel@tonic-gate.didebc: 68307c478bd9Sstevel@tonic-gate ldxa [%o0 + %o3]ASI_USER, %o4 68317c478bd9Sstevel@tonic-gate deccc %o2 68327c478bd9Sstevel@tonic-gate stx %o4, [%o1 + %o3] 68337c478bd9Sstevel@tonic-gate bg,pt %ncc, .didebc 68347c478bd9Sstevel@tonic-gate addcc %o3, 8, %o3 68357c478bd9Sstevel@tonic-gate ! 68367c478bd9Sstevel@tonic-gate ! End of copy loop. Most 8 byte aligned copies end here. 68377c478bd9Sstevel@tonic-gate ! 68387c478bd9Sstevel@tonic-gate bz,pt %ncc, .dcifh 68397c478bd9Sstevel@tonic-gate nop 68407c478bd9Sstevel@tonic-gate ! 68417c478bd9Sstevel@tonic-gate ! Something is left. Do it byte for byte. 68427c478bd9Sstevel@tonic-gate ! 68437c478bd9Sstevel@tonic-gate ba,pt %ncc, .dcicl 68447c478bd9Sstevel@tonic-gate lduba [%o0 + %o3]ASI_USER, %o4 68457c478bd9Sstevel@tonic-gate ! 68467c478bd9Sstevel@tonic-gate ! 4 byte copy loop. %o2 is number of 4 byte chunks to copy. 68477c478bd9Sstevel@tonic-gate ! 68487c478bd9Sstevel@tonic-gate .align 32 68497c478bd9Sstevel@tonic-gate.didfbc: 68507c478bd9Sstevel@tonic-gate lduwa [%o0 + %o3]ASI_USER, %o4 68517c478bd9Sstevel@tonic-gate deccc %o2 68527c478bd9Sstevel@tonic-gate st %o4, [%o1 + %o3] 68537c478bd9Sstevel@tonic-gate bg,pt %ncc, .didfbc 68547c478bd9Sstevel@tonic-gate addcc %o3, 4, %o3 68557c478bd9Sstevel@tonic-gate ! 68567c478bd9Sstevel@tonic-gate ! End of copy loop. Most 4 byte aligned copies end here. 68577c478bd9Sstevel@tonic-gate ! 68587c478bd9Sstevel@tonic-gate bz,pt %ncc, .dcifh 68597c478bd9Sstevel@tonic-gate nop 68607c478bd9Sstevel@tonic-gate ! 68617c478bd9Sstevel@tonic-gate ! Something is left. Do it byte for byte. 68627c478bd9Sstevel@tonic-gate ! 68637c478bd9Sstevel@tonic-gate ba,pt %ncc, .dcicl 68647c478bd9Sstevel@tonic-gate lduba [%o0 + %o3]ASI_USER, %o4 68657c478bd9Sstevel@tonic-gate ! 68667c478bd9Sstevel@tonic-gate ! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to 68677c478bd9Sstevel@tonic-gate ! copy. 68687c478bd9Sstevel@tonic-gate ! 68697c478bd9Sstevel@tonic-gate .align 32 68707c478bd9Sstevel@tonic-gate.didtbc: 68717c478bd9Sstevel@tonic-gate lduha [%o0 + %o3]ASI_USER, %o4 68727c478bd9Sstevel@tonic-gate deccc %o2 68737c478bd9Sstevel@tonic-gate sth %o4, [%o1 + %o3] 68747c478bd9Sstevel@tonic-gate bg,pt %ncc, .didtbc 68757c478bd9Sstevel@tonic-gate addcc %o3, 2, %o3 68767c478bd9Sstevel@tonic-gate ! 68777c478bd9Sstevel@tonic-gate ! End of copy loop. Most 2 byte aligned copies end here. 68787c478bd9Sstevel@tonic-gate ! 68797c478bd9Sstevel@tonic-gate bz,pt %ncc, .dcifh 68807c478bd9Sstevel@tonic-gate nop 68817c478bd9Sstevel@tonic-gate ! 68827c478bd9Sstevel@tonic-gate ! Deal with the last byte 68837c478bd9Sstevel@tonic-gate ! 68847c478bd9Sstevel@tonic-gate lduba [%o0 + %o3]ASI_USER, %o4 68857c478bd9Sstevel@tonic-gate stb %o4, [%o1 + %o3] 68867c478bd9Sstevel@tonic-gate.dcifh: 68877c478bd9Sstevel@tonic-gate membar #Sync 68887c478bd9Sstevel@tonic-gate stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 68897c478bd9Sstevel@tonic-gate retl 68907c478bd9Sstevel@tonic-gate clr %o0 68917c478bd9Sstevel@tonic-gate 68927c478bd9Sstevel@tonic-gate.big_copyin: 68937c478bd9Sstevel@tonic-gate ! We're going off to do a block copy. 68947c478bd9Sstevel@tonic-gate ! Switch fault hendlers and grab a window. We 68957c478bd9Sstevel@tonic-gate ! don't do a membar #Sync since we've done only 68967c478bd9Sstevel@tonic-gate ! kernel data to this point. 68977c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] 68987c478bd9Sstevel@tonic-gate 68997c478bd9Sstevel@tonic-gate ! Copy in that reach here are larger than 256 bytes. The 69007c478bd9Sstevel@tonic-gate ! hw_copy_limit_1 is set to 256. Never set this limit less 69017c478bd9Sstevel@tonic-gate ! 128 bytes. 6902340af271Swh94709 save %sp, -SA(MINFRAME), %sp 69037c478bd9Sstevel@tonic-gate.do_blockcopyin: 69047c478bd9Sstevel@tonic-gate 69057c478bd9Sstevel@tonic-gate ! Swap src/dst since the code below is memcpy code 69067c478bd9Sstevel@tonic-gate ! and memcpy/bcopy have different calling sequences 69077c478bd9Sstevel@tonic-gate mov %i1, %i5 69087c478bd9Sstevel@tonic-gate mov %i0, %i1 69097c478bd9Sstevel@tonic-gate mov %i5, %i0 69107c478bd9Sstevel@tonic-gate 6911340af271Swh94709 ! Block (64 bytes) align the destination. 6912340af271Swh94709 andcc %i0, 0x3f, %i3 ! is dst block aligned 6913340af271Swh94709 bz %ncc, copyin_blalign ! dst already block aligned 6914340af271Swh94709 sub %i3, 0x40, %i3 6915340af271Swh94709 neg %i3 ! bytes till dst 64 bytes aligned 6916340af271Swh94709 sub %i2, %i3, %i2 ! update i2 with new count 69177c478bd9Sstevel@tonic-gate 6918340af271Swh94709 ! Based on source and destination alignment do 6919340af271Swh94709 ! either 8 bytes, 4 bytes, 2 bytes or byte copy. 69207c478bd9Sstevel@tonic-gate 6921340af271Swh94709 ! Is dst & src 8B aligned 6922340af271Swh94709 or %i0, %i1, %o2 6923340af271Swh94709 andcc %o2, 0x7, %g0 6924340af271Swh94709 bz %ncc, .ci_alewdcp 6925340af271Swh94709 nop 6926340af271Swh94709 6927340af271Swh94709 ! Is dst & src 4B aligned 6928340af271Swh94709 andcc %o2, 0x3, %g0 6929340af271Swh94709 bz %ncc, .ci_alwdcp 6930340af271Swh94709 nop 6931340af271Swh94709 6932340af271Swh94709 ! Is dst & src 2B aligned 6933340af271Swh94709 andcc %o2, 0x1, %g0 6934340af271Swh94709 bz %ncc, .ci_alhlfwdcp 6935340af271Swh94709 nop 6936340af271Swh94709 6937340af271Swh94709 ! 1B aligned 6938340af271Swh947091: lduba [%i1]ASI_USER, %o2 6939340af271Swh94709 stb %o2, [%i0] 69407c478bd9Sstevel@tonic-gate inc %i1 69417c478bd9Sstevel@tonic-gate deccc %i3 6942340af271Swh94709 bgu,pt %ncc, 1b 69437c478bd9Sstevel@tonic-gate inc %i0 69447c478bd9Sstevel@tonic-gate 69457c478bd9Sstevel@tonic-gate ba copyin_blalign 6946340af271Swh94709 nop 69477c478bd9Sstevel@tonic-gate 6948340af271Swh94709 ! dst & src 4B aligned 6949340af271Swh94709.ci_alwdcp: 6950340af271Swh94709 lda [%i1]ASI_USER, %o2 6951340af271Swh94709 st %o2, [%i0] 6952340af271Swh94709 add %i1, 0x4, %i1 6953340af271Swh94709 subcc %i3, 0x4, %i3 6954340af271Swh94709 bgu,pt %ncc, .ci_alwdcp 6955340af271Swh94709 add %i0, 0x4, %i0 6956340af271Swh94709 6957340af271Swh94709 ba copyin_blalign 6958340af271Swh94709 nop 6959340af271Swh94709 6960340af271Swh94709 ! dst & src 2B aligned 6961340af271Swh94709.ci_alhlfwdcp: 6962340af271Swh94709 lduha [%i1]ASI_USER, %o2 6963340af271Swh94709 stuh %o2, [%i0] 6964340af271Swh94709 add %i1, 0x2, %i1 6965340af271Swh94709 subcc %i3, 0x2, %i3 6966340af271Swh94709 bgu,pt %ncc, .ci_alhlfwdcp 6967340af271Swh94709 add %i0, 0x2, %i0 6968340af271Swh94709 6969340af271Swh94709 ba copyin_blalign 6970340af271Swh94709 nop 6971340af271Swh94709 6972340af271Swh94709 ! dst & src 8B aligned 6973340af271Swh94709.ci_alewdcp: 69747c478bd9Sstevel@tonic-gate ldxa [%i1]ASI_USER, %o2 69757c478bd9Sstevel@tonic-gate stx %o2, [%i0] 69767c478bd9Sstevel@tonic-gate add %i1, 0x8, %i1 69777c478bd9Sstevel@tonic-gate subcc %i3, 0x8, %i3 6978340af271Swh94709 bgu,pt %ncc, .ci_alewdcp 69797c478bd9Sstevel@tonic-gate add %i0, 0x8, %i0 69807c478bd9Sstevel@tonic-gate 69817c478bd9Sstevel@tonic-gatecopyin_blalign: 69827c478bd9Sstevel@tonic-gate andn %i2, 0x3f, %i3 ! %i3 count is multiple of block size 69837c478bd9Sstevel@tonic-gate sub %i2, %i3, %i2 ! Residue bytes in %i2 69847c478bd9Sstevel@tonic-gate 69857c478bd9Sstevel@tonic-gate mov ASI_BLK_INIT_ST_QUAD_LDD_P, %asi 69867c478bd9Sstevel@tonic-gate 69877c478bd9Sstevel@tonic-gate andcc %i1, 0xf, %o2 ! is src quadword aligned 69887c478bd9Sstevel@tonic-gate bz,pn %xcc, .ci_blkcpy ! src offset in %o2 (last 4-bits) 69897c478bd9Sstevel@tonic-gate nop 69907c478bd9Sstevel@tonic-gate cmp %o2, 0x8 69917c478bd9Sstevel@tonic-gate bg .ci_upper_double 69927c478bd9Sstevel@tonic-gate nop 69937c478bd9Sstevel@tonic-gate bl .ci_lower_double 69947c478bd9Sstevel@tonic-gate nop 69957c478bd9Sstevel@tonic-gate 69967c478bd9Sstevel@tonic-gate ! Falls through when source offset is equal to 8 i.e. 69977c478bd9Sstevel@tonic-gate ! source is double word aligned. 69987c478bd9Sstevel@tonic-gate ! In this case no shift/merge of data is required 69997c478bd9Sstevel@tonic-gate 70007c478bd9Sstevel@tonic-gate sub %i1, %o2, %i1 ! align the src at 16 bytes. 70017c478bd9Sstevel@tonic-gate andn %i1, 0x3f, %l0 ! %l0 has block aligned source 7002d142717dSae112802 prefetcha [%l0]ASI_USER, #one_read 70037c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 7004d142717dSae112802 add %l0, 0x40, %l0 70057c478bd9Sstevel@tonic-gate.ci_loop0: 70067c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 70077c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 70087c478bd9Sstevel@tonic-gate 7009d142717dSae112802 prefetcha [%l0]ASI_USER, #one_read 70107c478bd9Sstevel@tonic-gate 70117c478bd9Sstevel@tonic-gate stxa %l3, [%i0+0x0]%asi 70127c478bd9Sstevel@tonic-gate stxa %l4, [%i0+0x8]%asi 70137c478bd9Sstevel@tonic-gate 70147c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 70157c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 70167c478bd9Sstevel@tonic-gate 70177c478bd9Sstevel@tonic-gate stxa %l5, [%i0+0x10]%asi 70187c478bd9Sstevel@tonic-gate stxa %l2, [%i0+0x18]%asi 70197c478bd9Sstevel@tonic-gate 70207c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 70217c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 70227c478bd9Sstevel@tonic-gate 70237c478bd9Sstevel@tonic-gate stxa %l3, [%i0+0x20]%asi 70247c478bd9Sstevel@tonic-gate stxa %l4, [%i0+0x28]%asi 70257c478bd9Sstevel@tonic-gate 70267c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 70277c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 70287c478bd9Sstevel@tonic-gate 70297c478bd9Sstevel@tonic-gate stxa %l5, [%i0+0x30]%asi 70307c478bd9Sstevel@tonic-gate stxa %l2, [%i0+0x38]%asi 70317c478bd9Sstevel@tonic-gate 70327c478bd9Sstevel@tonic-gate add %l0, 0x40, %l0 70337c478bd9Sstevel@tonic-gate subcc %i3, 0x40, %i3 70347c478bd9Sstevel@tonic-gate bgu,pt %xcc, .ci_loop0 70357c478bd9Sstevel@tonic-gate add %i0, 0x40, %i0 70367c478bd9Sstevel@tonic-gate ba .ci_blkdone 70377c478bd9Sstevel@tonic-gate add %i1, %o2, %i1 ! increment the source by src offset 70387c478bd9Sstevel@tonic-gate ! the src offset was stored in %o2 70397c478bd9Sstevel@tonic-gate 70407c478bd9Sstevel@tonic-gate.ci_lower_double: 70417c478bd9Sstevel@tonic-gate 70427c478bd9Sstevel@tonic-gate sub %i1, %o2, %i1 ! align the src at 16 bytes. 70437c478bd9Sstevel@tonic-gate sll %o2, 3, %o0 ! %o0 left shift 70447c478bd9Sstevel@tonic-gate mov 0x40, %o1 70457c478bd9Sstevel@tonic-gate sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 70467c478bd9Sstevel@tonic-gate andn %i1, 0x3f, %l0 ! %l0 has block aligned source 7047d142717dSae112802 prefetcha [%l0]ASI_USER, #one_read 70487c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 ! partial data in %l2 70497c478bd9Sstevel@tonic-gate ! and %l3 has complete 70507c478bd9Sstevel@tonic-gate ! data 7051d142717dSae112802 add %l0, 0x40, %l0 70527c478bd9Sstevel@tonic-gate.ci_loop1: 70537c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 70547c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 ! %l4 has partial data 70557c478bd9Sstevel@tonic-gate ! for this read. 70567c478bd9Sstevel@tonic-gate ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) ! merge %l2, %l3 and %l4 70577c478bd9Sstevel@tonic-gate ! into %l2 and %l3 70587c478bd9Sstevel@tonic-gate 7059d142717dSae112802 prefetcha [%l0]ASI_USER, #one_read 70607c478bd9Sstevel@tonic-gate 70617c478bd9Sstevel@tonic-gate stxa %l2, [%i0+0x0]%asi 70627c478bd9Sstevel@tonic-gate stxa %l3, [%i0+0x8]%asi 70637c478bd9Sstevel@tonic-gate 70647c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 70657c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 70667c478bd9Sstevel@tonic-gate ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) ! merge %l2 with %l5 and 70677c478bd9Sstevel@tonic-gate ! %l4 from previous read 70687c478bd9Sstevel@tonic-gate ! into %l4 and %l5 70697c478bd9Sstevel@tonic-gate stxa %l4, [%i0+0x10]%asi 70707c478bd9Sstevel@tonic-gate stxa %l5, [%i0+0x18]%asi 70717c478bd9Sstevel@tonic-gate 70727c478bd9Sstevel@tonic-gate ! Repeat the same for next 32 bytes. 70737c478bd9Sstevel@tonic-gate 70747c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 70757c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 70767c478bd9Sstevel@tonic-gate ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) 70777c478bd9Sstevel@tonic-gate 70787c478bd9Sstevel@tonic-gate stxa %l2, [%i0+0x20]%asi 70797c478bd9Sstevel@tonic-gate stxa %l3, [%i0+0x28]%asi 70807c478bd9Sstevel@tonic-gate 70817c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 70827c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 70837c478bd9Sstevel@tonic-gate ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) 70847c478bd9Sstevel@tonic-gate 70857c478bd9Sstevel@tonic-gate stxa %l4, [%i0+0x30]%asi 70867c478bd9Sstevel@tonic-gate stxa %l5, [%i0+0x38]%asi 70877c478bd9Sstevel@tonic-gate 70887c478bd9Sstevel@tonic-gate add %l0, 0x40, %l0 70897c478bd9Sstevel@tonic-gate subcc %i3, 0x40, %i3 70907c478bd9Sstevel@tonic-gate bgu,pt %xcc, .ci_loop1 70917c478bd9Sstevel@tonic-gate add %i0, 0x40, %i0 70927c478bd9Sstevel@tonic-gate ba .ci_blkdone 70937c478bd9Sstevel@tonic-gate add %i1, %o2, %i1 ! increment the source by src offset 70947c478bd9Sstevel@tonic-gate ! the src offset was stored in %o2 70957c478bd9Sstevel@tonic-gate 70967c478bd9Sstevel@tonic-gate.ci_upper_double: 70977c478bd9Sstevel@tonic-gate 70987c478bd9Sstevel@tonic-gate sub %i1, %o2, %i1 ! align the src at 16 bytes. 70997c478bd9Sstevel@tonic-gate sub %o2, 0x8, %o0 71007c478bd9Sstevel@tonic-gate sll %o0, 3, %o0 ! %o0 left shift 71017c478bd9Sstevel@tonic-gate mov 0x40, %o1 71027c478bd9Sstevel@tonic-gate sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 71037c478bd9Sstevel@tonic-gate andn %i1, 0x3f, %l0 ! %l0 has block aligned source 7104d142717dSae112802 prefetcha [%l0]ASI_USER, #one_read 71057c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 ! partial data in %l3 71067c478bd9Sstevel@tonic-gate ! for this read and 71077c478bd9Sstevel@tonic-gate ! no data in %l2 7108d142717dSae112802 add %l0, 0x40, %l0 71097c478bd9Sstevel@tonic-gate.ci_loop2: 71107c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 71117c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 ! %l4 has complete data 71127c478bd9Sstevel@tonic-gate ! and %l5 has partial 71137c478bd9Sstevel@tonic-gate ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) ! merge %l3, %l4 and %l5 71147c478bd9Sstevel@tonic-gate ! into %l3 and %l4 7115d142717dSae112802 prefetcha [%l0]ASI_USER, #one_read 71167c478bd9Sstevel@tonic-gate 71177c478bd9Sstevel@tonic-gate stxa %l3, [%i0+0x0]%asi 71187c478bd9Sstevel@tonic-gate stxa %l4, [%i0+0x8]%asi 71197c478bd9Sstevel@tonic-gate 71207c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 71217c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 71227c478bd9Sstevel@tonic-gate ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) ! merge %l2 and %l3 with 71237c478bd9Sstevel@tonic-gate ! %l5 from previous read 71247c478bd9Sstevel@tonic-gate ! into %l5 and %l2 71257c478bd9Sstevel@tonic-gate 71267c478bd9Sstevel@tonic-gate stxa %l5, [%i0+0x10]%asi 71277c478bd9Sstevel@tonic-gate stxa %l2, [%i0+0x18]%asi 71287c478bd9Sstevel@tonic-gate 71297c478bd9Sstevel@tonic-gate ! Repeat the same for next 32 bytes. 71307c478bd9Sstevel@tonic-gate 71317c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 71327c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 71337c478bd9Sstevel@tonic-gate ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) 71347c478bd9Sstevel@tonic-gate 71357c478bd9Sstevel@tonic-gate stxa %l3, [%i0+0x20]%asi 71367c478bd9Sstevel@tonic-gate stxa %l4, [%i0+0x28]%asi 71377c478bd9Sstevel@tonic-gate 71387c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 71397c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 71407c478bd9Sstevel@tonic-gate ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) 71417c478bd9Sstevel@tonic-gate 71427c478bd9Sstevel@tonic-gate stxa %l5, [%i0+0x30]%asi 71437c478bd9Sstevel@tonic-gate stxa %l2, [%i0+0x38]%asi 71447c478bd9Sstevel@tonic-gate 71457c478bd9Sstevel@tonic-gate add %l0, 0x40, %l0 71467c478bd9Sstevel@tonic-gate subcc %i3, 0x40, %i3 71477c478bd9Sstevel@tonic-gate bgu,pt %xcc, .ci_loop2 71487c478bd9Sstevel@tonic-gate add %i0, 0x40, %i0 71497c478bd9Sstevel@tonic-gate ba .ci_blkdone 71507c478bd9Sstevel@tonic-gate add %i1, %o2, %i1 ! increment the source by src offset 71517c478bd9Sstevel@tonic-gate ! the src offset was stored in %o2 71527c478bd9Sstevel@tonic-gate 71537c478bd9Sstevel@tonic-gate 71547c478bd9Sstevel@tonic-gate ! Do fast copy using ASI_BLK_INIT_ST_QUAD_LDD_P 71557c478bd9Sstevel@tonic-gate.ci_blkcpy: 71567c478bd9Sstevel@tonic-gate 71577c478bd9Sstevel@tonic-gate andn %i1, 0x3f, %o0 ! %o0 has block aligned source 7158d142717dSae112802 prefetcha [%o0]ASI_USER, #one_read 7159d142717dSae112802 add %o0, 0x40, %o0 71607c478bd9Sstevel@tonic-gate1: 71617c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l0 71627c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 71637c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 71647c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 71657c478bd9Sstevel@tonic-gate 7166d142717dSae112802 prefetcha [%o0]ASI_USER, #one_read 71677c478bd9Sstevel@tonic-gate 71687c478bd9Sstevel@tonic-gate stxa %l0, [%i0+0x0]%asi 71697c478bd9Sstevel@tonic-gate 71707c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 71717c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 71727c478bd9Sstevel@tonic-gate ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l6 71737c478bd9Sstevel@tonic-gate add %i1, 0x10, %i1 71747c478bd9Sstevel@tonic-gate 71757c478bd9Sstevel@tonic-gate stxa %l1, [%i0+0x8]%asi 71767c478bd9Sstevel@tonic-gate stxa %l2, [%i0+0x10]%asi 71777c478bd9Sstevel@tonic-gate stxa %l3, [%i0+0x18]%asi 71787c478bd9Sstevel@tonic-gate stxa %l4, [%i0+0x20]%asi 71797c478bd9Sstevel@tonic-gate stxa %l5, [%i0+0x28]%asi 71807c478bd9Sstevel@tonic-gate stxa %l6, [%i0+0x30]%asi 71817c478bd9Sstevel@tonic-gate stxa %l7, [%i0+0x38]%asi 71827c478bd9Sstevel@tonic-gate 71837c478bd9Sstevel@tonic-gate add %o0, 0x40, %o0 71847c478bd9Sstevel@tonic-gate subcc %i3, 0x40, %i3 71857c478bd9Sstevel@tonic-gate bgu,pt %xcc, 1b 71867c478bd9Sstevel@tonic-gate add %i0, 0x40, %i0 71877c478bd9Sstevel@tonic-gate 71887c478bd9Sstevel@tonic-gate.ci_blkdone: 71897c478bd9Sstevel@tonic-gate membar #Sync 71907c478bd9Sstevel@tonic-gate 7191340af271Swh94709 brz,pt %i2, .copyin_exit 71927c478bd9Sstevel@tonic-gate nop 71937c478bd9Sstevel@tonic-gate 7194340af271Swh94709 ! Handle trailing bytes 7195340af271Swh94709 cmp %i2, 0x8 7196340af271Swh94709 blu,pt %ncc, .ci_residue 71977c478bd9Sstevel@tonic-gate nop 71987c478bd9Sstevel@tonic-gate 7199340af271Swh94709 ! Can we do some 8B ops 7200340af271Swh94709 or %i1, %i0, %o2 7201340af271Swh94709 andcc %o2, 0x7, %g0 7202340af271Swh94709 bnz %ncc, .ci_last4 7203340af271Swh94709 nop 72047c478bd9Sstevel@tonic-gate 7205340af271Swh94709 ! Do 8byte ops as long as possible 7206340af271Swh94709.ci_last8: 72077c478bd9Sstevel@tonic-gate ldxa [%i1]ASI_USER, %o2 72087c478bd9Sstevel@tonic-gate stx %o2, [%i0] 72097c478bd9Sstevel@tonic-gate add %i1, 0x8, %i1 7210340af271Swh94709 sub %i2, 0x8, %i2 7211340af271Swh94709 cmp %i2, 0x8 7212340af271Swh94709 bgu,pt %ncc, .ci_last8 72137c478bd9Sstevel@tonic-gate add %i0, 0x8, %i0 72147c478bd9Sstevel@tonic-gate 7215340af271Swh94709 brz,pt %i2, .copyin_exit 7216340af271Swh94709 nop 7217340af271Swh94709 7218340af271Swh94709 ba .ci_residue 7219340af271Swh94709 nop 7220340af271Swh94709 7221340af271Swh94709.ci_last4: 7222340af271Swh94709 ! Can we do 4B ops 7223340af271Swh94709 andcc %o2, 0x3, %g0 7224340af271Swh94709 bnz %ncc, .ci_last2 7225340af271Swh94709 nop 7226340af271Swh947091: 7227340af271Swh94709 lda [%i1]ASI_USER, %o2 7228340af271Swh94709 st %o2, [%i0] 7229340af271Swh94709 add %i1, 0x4, %i1 7230340af271Swh94709 sub %i2, 0x4, %i2 7231340af271Swh94709 cmp %i2, 0x4 7232340af271Swh94709 bgu,pt %ncc, 1b 7233340af271Swh94709 add %i0, 0x4, %i0 7234340af271Swh94709 7235340af271Swh94709 brz,pt %i2, .copyin_exit 7236340af271Swh94709 nop 7237340af271Swh94709 7238340af271Swh94709 ba .ci_residue 7239340af271Swh94709 nop 7240340af271Swh94709 7241340af271Swh94709.ci_last2: 7242340af271Swh94709 ! Can we do 2B ops 7243340af271Swh94709 andcc %o2, 0x1, %g0 7244340af271Swh94709 bnz %ncc, .ci_residue 7245340af271Swh94709 nop 7246340af271Swh94709 7247340af271Swh947091: 7248340af271Swh94709 lduha [%i1]ASI_USER, %o2 7249340af271Swh94709 stuh %o2, [%i0] 7250340af271Swh94709 add %i1, 0x2, %i1 7251340af271Swh94709 sub %i2, 0x2, %i2 7252340af271Swh94709 cmp %i2, 0x2 7253340af271Swh94709 bgu,pt %ncc, 1b 7254340af271Swh94709 add %i0, 0x2, %i0 7255340af271Swh94709 7256340af271Swh94709 brz,pt %i2, .copyin_exit 72577c478bd9Sstevel@tonic-gate nop 72587c478bd9Sstevel@tonic-gate 72597c478bd9Sstevel@tonic-gate ! Copy the residue as byte copy 72607c478bd9Sstevel@tonic-gate.ci_residue: 72617c478bd9Sstevel@tonic-gate lduba [%i1]ASI_USER, %i4 72627c478bd9Sstevel@tonic-gate stb %i4, [%i0] 72637c478bd9Sstevel@tonic-gate inc %i1 72647c478bd9Sstevel@tonic-gate deccc %i2 7265340af271Swh94709 bgu,pt %xcc, .ci_residue 72667c478bd9Sstevel@tonic-gate inc %i0 72677c478bd9Sstevel@tonic-gate 72687c478bd9Sstevel@tonic-gate.copyin_exit: 72697c478bd9Sstevel@tonic-gate membar #Sync 72707c478bd9Sstevel@tonic-gate stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 72717c478bd9Sstevel@tonic-gate ret 72727c478bd9Sstevel@tonic-gate restore %g0, 0, %o0 72737c478bd9Sstevel@tonic-gate.copyin_err: 72747c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o4 72757c478bd9Sstevel@tonic-gate brz %o4, 2f 72767c478bd9Sstevel@tonic-gate nop 72777c478bd9Sstevel@tonic-gate ldn [%o4 + CP_COPYIN], %g2 72787c478bd9Sstevel@tonic-gate jmp %g2 72797c478bd9Sstevel@tonic-gate nop 72807c478bd9Sstevel@tonic-gate2: 72817c478bd9Sstevel@tonic-gate retl 72827c478bd9Sstevel@tonic-gate mov -1, %o0 7283*280575beSPatrick McGehearty#endif /* NIAGARA_IMPL */ 72847c478bd9Sstevel@tonic-gate SET_SIZE(copyin) 72857c478bd9Sstevel@tonic-gate 72867c478bd9Sstevel@tonic-gate#endif /* lint */ 72877c478bd9Sstevel@tonic-gate 72887c478bd9Sstevel@tonic-gate#ifdef lint 72897c478bd9Sstevel@tonic-gate 72907c478bd9Sstevel@tonic-gate/*ARGSUSED*/ 72917c478bd9Sstevel@tonic-gateint 72927c478bd9Sstevel@tonic-gatexcopyin(const void *uaddr, void *kaddr, size_t count) 72937c478bd9Sstevel@tonic-gate{ return (0); } 72947c478bd9Sstevel@tonic-gate 72957c478bd9Sstevel@tonic-gate#else /* lint */ 72967c478bd9Sstevel@tonic-gate 72977c478bd9Sstevel@tonic-gate ENTRY(xcopyin) 72987c478bd9Sstevel@tonic-gate sethi %hi(.xcopyin_err), REAL_LOFAULT 72997c478bd9Sstevel@tonic-gate b .do_copyin 73007c478bd9Sstevel@tonic-gate or REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT 73017c478bd9Sstevel@tonic-gate.xcopyin_err: 73027c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o4 73037c478bd9Sstevel@tonic-gate brz %o4, 2f 73047c478bd9Sstevel@tonic-gate nop 73057c478bd9Sstevel@tonic-gate ldn [%o4 + CP_XCOPYIN], %g2 73067c478bd9Sstevel@tonic-gate jmp %g2 73077c478bd9Sstevel@tonic-gate nop 73087c478bd9Sstevel@tonic-gate2: 73097c478bd9Sstevel@tonic-gate retl 73107c478bd9Sstevel@tonic-gate mov %g1, %o0 73117c478bd9Sstevel@tonic-gate SET_SIZE(xcopyin) 73127c478bd9Sstevel@tonic-gate 73137c478bd9Sstevel@tonic-gate#endif /* lint */ 73147c478bd9Sstevel@tonic-gate 73157c478bd9Sstevel@tonic-gate#ifdef lint 73167c478bd9Sstevel@tonic-gate 73177c478bd9Sstevel@tonic-gate/*ARGSUSED*/ 73187c478bd9Sstevel@tonic-gateint 73197c478bd9Sstevel@tonic-gatexcopyin_little(const void *uaddr, void *kaddr, size_t count) 73207c478bd9Sstevel@tonic-gate{ return (0); } 73217c478bd9Sstevel@tonic-gate 73227c478bd9Sstevel@tonic-gate#else /* lint */ 73237c478bd9Sstevel@tonic-gate 73247c478bd9Sstevel@tonic-gate ENTRY(xcopyin_little) 73257c478bd9Sstevel@tonic-gate sethi %hi(.little_err), %o4 73267c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o5 73277c478bd9Sstevel@tonic-gate or %o4, %lo(.little_err), %o4 73287c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 73297c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] 73307c478bd9Sstevel@tonic-gate 73317c478bd9Sstevel@tonic-gate subcc %g0, %o2, %o3 73327c478bd9Sstevel@tonic-gate add %o0, %o2, %o0 73337c478bd9Sstevel@tonic-gate bz,pn %ncc, 2f ! check for zero bytes 73347c478bd9Sstevel@tonic-gate sub %o2, 1, %o4 73357c478bd9Sstevel@tonic-gate add %o0, %o4, %o0 ! start w/last byte 73367c478bd9Sstevel@tonic-gate add %o1, %o2, %o1 73377c478bd9Sstevel@tonic-gate lduba [%o0+%o3]ASI_AIUSL, %o4 73387c478bd9Sstevel@tonic-gate 73397c478bd9Sstevel@tonic-gate1: stb %o4, [%o1+%o3] 73407c478bd9Sstevel@tonic-gate inccc %o3 73417c478bd9Sstevel@tonic-gate sub %o0, 2, %o0 ! get next byte 73427c478bd9Sstevel@tonic-gate bcc,a,pt %ncc, 1b 73437c478bd9Sstevel@tonic-gate lduba [%o0+%o3]ASI_AIUSL, %o4 73447c478bd9Sstevel@tonic-gate 73457c478bd9Sstevel@tonic-gate2: membar #Sync ! sync error barrier 73467c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 73477c478bd9Sstevel@tonic-gate retl 73487c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return (0) 73497c478bd9Sstevel@tonic-gate 73507c478bd9Sstevel@tonic-gate.little_err: 73517c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 73527c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 73537c478bd9Sstevel@tonic-gate retl 73547c478bd9Sstevel@tonic-gate mov %g1, %o0 73557c478bd9Sstevel@tonic-gate SET_SIZE(xcopyin_little) 73567c478bd9Sstevel@tonic-gate 73577c478bd9Sstevel@tonic-gate#endif /* lint */ 73587c478bd9Sstevel@tonic-gate 73597c478bd9Sstevel@tonic-gate 73607c478bd9Sstevel@tonic-gate/* 73617c478bd9Sstevel@tonic-gate * Copy a block of storage - must not overlap (from + len <= to). 73627c478bd9Sstevel@tonic-gate * No fault handler installed (to be called under on_fault()) 73637c478bd9Sstevel@tonic-gate */ 73647c478bd9Sstevel@tonic-gate#if defined(lint) 73657c478bd9Sstevel@tonic-gate 73667c478bd9Sstevel@tonic-gate/* ARGSUSED */ 73677c478bd9Sstevel@tonic-gatevoid 73687c478bd9Sstevel@tonic-gatecopyin_noerr(const void *ufrom, void *kto, size_t count) 73697c478bd9Sstevel@tonic-gate{} 73707c478bd9Sstevel@tonic-gate 73717c478bd9Sstevel@tonic-gate#else /* lint */ 73727c478bd9Sstevel@tonic-gate 73737c478bd9Sstevel@tonic-gate ENTRY(copyin_noerr) 73747c478bd9Sstevel@tonic-gate sethi %hi(.copyio_noerr), REAL_LOFAULT 73757c478bd9Sstevel@tonic-gate b .do_copyin 73767c478bd9Sstevel@tonic-gate or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 73777c478bd9Sstevel@tonic-gate.copyio_noerr: 73787c478bd9Sstevel@tonic-gate jmp SAVED_LOFAULT 73797c478bd9Sstevel@tonic-gate nop 73807c478bd9Sstevel@tonic-gate SET_SIZE(copyin_noerr) 73817c478bd9Sstevel@tonic-gate 73827c478bd9Sstevel@tonic-gate#endif /* lint */ 73837c478bd9Sstevel@tonic-gate 73847c478bd9Sstevel@tonic-gate/* 73857c478bd9Sstevel@tonic-gate * Copy a block of storage - must not overlap (from + len <= to). 73867c478bd9Sstevel@tonic-gate * No fault handler installed (to be called under on_fault()) 73877c478bd9Sstevel@tonic-gate */ 73887c478bd9Sstevel@tonic-gate 73897c478bd9Sstevel@tonic-gate#if defined(lint) 73907c478bd9Sstevel@tonic-gate 73917c478bd9Sstevel@tonic-gate/* ARGSUSED */ 73927c478bd9Sstevel@tonic-gatevoid 73937c478bd9Sstevel@tonic-gatecopyout_noerr(const void *kfrom, void *uto, size_t count) 73947c478bd9Sstevel@tonic-gate{} 73957c478bd9Sstevel@tonic-gate 73967c478bd9Sstevel@tonic-gate#else /* lint */ 73977c478bd9Sstevel@tonic-gate 73987c478bd9Sstevel@tonic-gate ENTRY(copyout_noerr) 73997c478bd9Sstevel@tonic-gate sethi %hi(.copyio_noerr), REAL_LOFAULT 74007c478bd9Sstevel@tonic-gate b .do_copyout 74017c478bd9Sstevel@tonic-gate or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 74027c478bd9Sstevel@tonic-gate SET_SIZE(copyout_noerr) 74037c478bd9Sstevel@tonic-gate 74047c478bd9Sstevel@tonic-gate#endif /* lint */ 74057c478bd9Sstevel@tonic-gate 74067c478bd9Sstevel@tonic-gate#if defined(lint) 74077c478bd9Sstevel@tonic-gate 74087c478bd9Sstevel@tonic-gateint use_hw_bcopy = 1; 74097c478bd9Sstevel@tonic-gateint use_hw_bzero = 1; 74107c478bd9Sstevel@tonic-gateuint_t hw_copy_limit_1 = 0x100; 74117c478bd9Sstevel@tonic-gateuint_t hw_copy_limit_2 = 0x200; 74127c478bd9Sstevel@tonic-gateuint_t hw_copy_limit_4 = 0x400; 74137c478bd9Sstevel@tonic-gateuint_t hw_copy_limit_8 = 0x400; 74147c478bd9Sstevel@tonic-gate 74157c478bd9Sstevel@tonic-gate#else /* !lint */ 74167c478bd9Sstevel@tonic-gate 74177c478bd9Sstevel@tonic-gate .align 4 74187c478bd9Sstevel@tonic-gate DGDEF(use_hw_bcopy) 74197c478bd9Sstevel@tonic-gate .word 1 74207c478bd9Sstevel@tonic-gate DGDEF(use_hw_bzero) 74217c478bd9Sstevel@tonic-gate .word 1 74227c478bd9Sstevel@tonic-gate DGDEF(hw_copy_limit_1) 74237c478bd9Sstevel@tonic-gate .word 0x100 74247c478bd9Sstevel@tonic-gate DGDEF(hw_copy_limit_2) 74257c478bd9Sstevel@tonic-gate .word 0x200 74267c478bd9Sstevel@tonic-gate DGDEF(hw_copy_limit_4) 74277c478bd9Sstevel@tonic-gate .word 0x400 74287c478bd9Sstevel@tonic-gate DGDEF(hw_copy_limit_8) 74297c478bd9Sstevel@tonic-gate .word 0x400 74307c478bd9Sstevel@tonic-gate 74317c478bd9Sstevel@tonic-gate .align 64 74327c478bd9Sstevel@tonic-gate .section ".text" 74337c478bd9Sstevel@tonic-gate#endif /* !lint */ 74347c478bd9Sstevel@tonic-gate 74357c478bd9Sstevel@tonic-gate/* 74367c478bd9Sstevel@tonic-gate * hwblkclr - clears block-aligned, block-multiple-sized regions that are 74377c478bd9Sstevel@tonic-gate * longer than 256 bytes in length using Niagara's block stores/quad store. 74387c478bd9Sstevel@tonic-gate * If the criteria for using this routine are not met then it calls bzero 74397c478bd9Sstevel@tonic-gate * and returns 1. Otherwise 0 is returned indicating success. 74407c478bd9Sstevel@tonic-gate * Caller is responsible for ensuring use_hw_bzero is true and that 74417c478bd9Sstevel@tonic-gate * kpreempt_disable() has been called. 74427c478bd9Sstevel@tonic-gate */ 74437c478bd9Sstevel@tonic-gate#ifdef lint 74447c478bd9Sstevel@tonic-gate/*ARGSUSED*/ 74457c478bd9Sstevel@tonic-gateint 74467c478bd9Sstevel@tonic-gatehwblkclr(void *addr, size_t len) 74477c478bd9Sstevel@tonic-gate{ 74487c478bd9Sstevel@tonic-gate return(0); 74497c478bd9Sstevel@tonic-gate} 74507c478bd9Sstevel@tonic-gate#else /* lint */ 74517c478bd9Sstevel@tonic-gate ! %i0 - start address 74527c478bd9Sstevel@tonic-gate ! %i1 - length of region (multiple of 64) 74537c478bd9Sstevel@tonic-gate 74547c478bd9Sstevel@tonic-gate ENTRY(hwblkclr) 74557c478bd9Sstevel@tonic-gate save %sp, -SA(MINFRAME), %sp 74567c478bd9Sstevel@tonic-gate 74577c478bd9Sstevel@tonic-gate ! Must be block-aligned 74587c478bd9Sstevel@tonic-gate andcc %i0, 0x3f, %g0 74597c478bd9Sstevel@tonic-gate bnz,pn %ncc, 1f 74607c478bd9Sstevel@tonic-gate nop 74617c478bd9Sstevel@tonic-gate 74627c478bd9Sstevel@tonic-gate ! ... and must be 256 bytes or more 74637c478bd9Sstevel@tonic-gate cmp %i1, 0x100 74647c478bd9Sstevel@tonic-gate blu,pn %ncc, 1f 74657c478bd9Sstevel@tonic-gate nop 74667c478bd9Sstevel@tonic-gate 74677c478bd9Sstevel@tonic-gate ! ... and length must be a multiple of 64 74687c478bd9Sstevel@tonic-gate andcc %i1, 0x3f, %g0 74697c478bd9Sstevel@tonic-gate bz,pn %ncc, .pz_doblock 74707c478bd9Sstevel@tonic-gate mov ASI_BLK_INIT_ST_QUAD_LDD_P, %asi 74717c478bd9Sstevel@tonic-gate 74727c478bd9Sstevel@tonic-gate1: ! punt, call bzero but notify the caller that bzero was used 74737c478bd9Sstevel@tonic-gate mov %i0, %o0 74747c478bd9Sstevel@tonic-gate call bzero 74757c478bd9Sstevel@tonic-gate mov %i1, %o1 74767c478bd9Sstevel@tonic-gate ret 74777c478bd9Sstevel@tonic-gate restore %g0, 1, %o0 ! return (1) - did not use block operations 74787c478bd9Sstevel@tonic-gate 74797c478bd9Sstevel@tonic-gate ! Already verified that there are at least 256 bytes to set 74807c478bd9Sstevel@tonic-gate.pz_doblock: 74817c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x0]%asi 74827c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x40]%asi 74837c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x80]%asi 74847c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0xc0]%asi 74857c478bd9Sstevel@tonic-gate 74867c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x8]%asi 74877c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x10]%asi 74887c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x18]%asi 74897c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x20]%asi 74907c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x28]%asi 74917c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x30]%asi 74927c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x38]%asi 74937c478bd9Sstevel@tonic-gate 74947c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x48]%asi 74957c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x50]%asi 74967c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x58]%asi 74977c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x60]%asi 74987c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x68]%asi 74997c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x70]%asi 75007c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x78]%asi 75017c478bd9Sstevel@tonic-gate 75027c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x88]%asi 75037c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x90]%asi 75047c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x98]%asi 75057c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0xa0]%asi 75067c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0xa8]%asi 75077c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0xb0]%asi 75087c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0xb8]%asi 75097c478bd9Sstevel@tonic-gate 75107c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0xc8]%asi 75117c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0xd0]%asi 75127c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0xd8]%asi 75137c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0xe0]%asi 75147c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0xe8]%asi 75157c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0xf0]%asi 75167c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0xf8]%asi 75177c478bd9Sstevel@tonic-gate 75187c478bd9Sstevel@tonic-gate sub %i1, 0x100, %i1 75197c478bd9Sstevel@tonic-gate cmp %i1, 0x100 75207c478bd9Sstevel@tonic-gate bgu,pt %ncc, .pz_doblock 75217c478bd9Sstevel@tonic-gate add %i0, 0x100, %i0 75227c478bd9Sstevel@tonic-gate 75237c478bd9Sstevel@tonic-gate2: 75247c478bd9Sstevel@tonic-gate ! Check if more than 64 bytes to set 75257c478bd9Sstevel@tonic-gate cmp %i1,0x40 75267c478bd9Sstevel@tonic-gate blu %ncc, .pz_finish 75277c478bd9Sstevel@tonic-gate nop 75287c478bd9Sstevel@tonic-gate 75297c478bd9Sstevel@tonic-gate3: 75307c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x0]%asi 75317c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x8]%asi 75327c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x10]%asi 75337c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x18]%asi 75347c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x20]%asi 75357c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x28]%asi 75367c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x30]%asi 75377c478bd9Sstevel@tonic-gate stxa %g0, [%i0+0x38]%asi 75387c478bd9Sstevel@tonic-gate 75397c478bd9Sstevel@tonic-gate subcc %i1, 0x40, %i1 75407c478bd9Sstevel@tonic-gate bgu,pt %ncc, 3b 75417c478bd9Sstevel@tonic-gate add %i0, 0x40, %i0 75427c478bd9Sstevel@tonic-gate 75437c478bd9Sstevel@tonic-gate.pz_finish: 75447c478bd9Sstevel@tonic-gate membar #Sync 75457c478bd9Sstevel@tonic-gate ret 75467c478bd9Sstevel@tonic-gate restore %g0, 0, %o0 ! return (bzero or not) 75477c478bd9Sstevel@tonic-gate SET_SIZE(hwblkclr) 75487c478bd9Sstevel@tonic-gate#endif /* lint */ 75497c478bd9Sstevel@tonic-gate 75507c478bd9Sstevel@tonic-gate#ifdef lint 75517c478bd9Sstevel@tonic-gate/* Copy 32 bytes of data from src to dst using physical addresses */ 75527c478bd9Sstevel@tonic-gate/*ARGSUSED*/ 75537c478bd9Sstevel@tonic-gatevoid 75547c478bd9Sstevel@tonic-gatehw_pa_bcopy32(uint64_t src, uint64_t dst) 75557c478bd9Sstevel@tonic-gate{} 75567c478bd9Sstevel@tonic-gate#else /*!lint */ 75577c478bd9Sstevel@tonic-gate 75587c478bd9Sstevel@tonic-gate /* 75597c478bd9Sstevel@tonic-gate * Copy 32 bytes of data from src (%o0) to dst (%o1) 75607c478bd9Sstevel@tonic-gate * using physical addresses. 75617c478bd9Sstevel@tonic-gate */ 75627c478bd9Sstevel@tonic-gate ENTRY_NP(hw_pa_bcopy32) 75637c478bd9Sstevel@tonic-gate rdpr %pstate, %g1 75647c478bd9Sstevel@tonic-gate andn %g1, PSTATE_IE, %g2 75657c478bd9Sstevel@tonic-gate wrpr %g0, %g2, %pstate 75667c478bd9Sstevel@tonic-gate 75677c478bd9Sstevel@tonic-gate ldxa [%o0]ASI_MEM, %o2 75687c478bd9Sstevel@tonic-gate add %o0, 8, %o0 75697c478bd9Sstevel@tonic-gate ldxa [%o0]ASI_MEM, %o3 75707c478bd9Sstevel@tonic-gate add %o0, 8, %o0 75717c478bd9Sstevel@tonic-gate ldxa [%o0]ASI_MEM, %o4 75727c478bd9Sstevel@tonic-gate add %o0, 8, %o0 75737c478bd9Sstevel@tonic-gate ldxa [%o0]ASI_MEM, %o5 75747c478bd9Sstevel@tonic-gate stxa %o2, [%o1]ASI_MEM 75757c478bd9Sstevel@tonic-gate add %o1, 8, %o1 75767c478bd9Sstevel@tonic-gate stxa %o3, [%o1]ASI_MEM 75777c478bd9Sstevel@tonic-gate add %o1, 8, %o1 75787c478bd9Sstevel@tonic-gate stxa %o4, [%o1]ASI_MEM 75797c478bd9Sstevel@tonic-gate add %o1, 8, %o1 75807c478bd9Sstevel@tonic-gate stxa %o5, [%o1]ASI_MEM 75817c478bd9Sstevel@tonic-gate 75827c478bd9Sstevel@tonic-gate membar #Sync 75837c478bd9Sstevel@tonic-gate retl 75847c478bd9Sstevel@tonic-gate wrpr %g0, %g1, %pstate 75857c478bd9Sstevel@tonic-gate SET_SIZE(hw_pa_bcopy32) 75867c478bd9Sstevel@tonic-gate#endif /* lint */ 75877c478bd9Sstevel@tonic-gate 75887c478bd9Sstevel@tonic-gate/* 75897c478bd9Sstevel@tonic-gate * Zero a block of storage. 75907c478bd9Sstevel@tonic-gate * 75917c478bd9Sstevel@tonic-gate * uzero is used by the kernel to zero a block in user address space. 75927c478bd9Sstevel@tonic-gate */ 75937c478bd9Sstevel@tonic-gate 75947c478bd9Sstevel@tonic-gate/* 75957c478bd9Sstevel@tonic-gate * Control flow of the bzero/kzero/uzero routine. 75967c478bd9Sstevel@tonic-gate * 75977c478bd9Sstevel@tonic-gate * For fewer than 7 bytes stores, bytes will be zeroed. 75987c478bd9Sstevel@tonic-gate * 75997c478bd9Sstevel@tonic-gate * For less than 15 bytes stores, align the address on 4 byte boundary. 76007c478bd9Sstevel@tonic-gate * Then store as many 4-byte chunks, followed by trailing bytes. 76017c478bd9Sstevel@tonic-gate * 76027c478bd9Sstevel@tonic-gate * For sizes greater than 15 bytes, align the address on 8 byte boundary. 76037c478bd9Sstevel@tonic-gate * if (count > 128) { 76047c478bd9Sstevel@tonic-gate * store as many 8-bytes chunks to block align the address 76057c478bd9Sstevel@tonic-gate * store using ASI_BLK_INIT_ST_QUAD_LDD_P (bzero/kzero) OR 76067c478bd9Sstevel@tonic-gate * store using ASI_BLK_INIT_QUAD_LDD_AIUS (uzero) 76077c478bd9Sstevel@tonic-gate * } 76087c478bd9Sstevel@tonic-gate * Store as many 8-byte chunks, followed by trailing bytes. 76097c478bd9Sstevel@tonic-gate */ 76107c478bd9Sstevel@tonic-gate 76117c478bd9Sstevel@tonic-gate#if defined(lint) 76127c478bd9Sstevel@tonic-gate 76137c478bd9Sstevel@tonic-gate/* ARGSUSED */ 76147c478bd9Sstevel@tonic-gateint 76157c478bd9Sstevel@tonic-gatekzero(void *addr, size_t count) 76167c478bd9Sstevel@tonic-gate{ return(0); } 76177c478bd9Sstevel@tonic-gate 76187c478bd9Sstevel@tonic-gate/* ARGSUSED */ 76197c478bd9Sstevel@tonic-gatevoid 76207c478bd9Sstevel@tonic-gateuzero(void *addr, size_t count) 76217c478bd9Sstevel@tonic-gate{} 76227c478bd9Sstevel@tonic-gate 76237c478bd9Sstevel@tonic-gate#else /* lint */ 76247c478bd9Sstevel@tonic-gate 76257c478bd9Sstevel@tonic-gate ENTRY(uzero) 76267c478bd9Sstevel@tonic-gate ! 76277c478bd9Sstevel@tonic-gate ! Set a new lo_fault handler only if we came in with one 76287c478bd9Sstevel@tonic-gate ! already specified. 76297c478bd9Sstevel@tonic-gate ! 76307c478bd9Sstevel@tonic-gate wr %g0, ASI_USER, %asi 76317c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o5 76327c478bd9Sstevel@tonic-gate tst %o5 76337c478bd9Sstevel@tonic-gate bz,pt %ncc, .do_zero 76347c478bd9Sstevel@tonic-gate sethi %hi(.zeroerr), %o2 76357c478bd9Sstevel@tonic-gate or %o2, %lo(.zeroerr), %o2 76367c478bd9Sstevel@tonic-gate membar #Sync 76377c478bd9Sstevel@tonic-gate ba,pt %ncc, .do_zero 76387c478bd9Sstevel@tonic-gate stn %o2, [THREAD_REG + T_LOFAULT] 76397c478bd9Sstevel@tonic-gate 76407c478bd9Sstevel@tonic-gate ENTRY(kzero) 76417c478bd9Sstevel@tonic-gate ! 76427c478bd9Sstevel@tonic-gate ! Always set a lo_fault handler 76437c478bd9Sstevel@tonic-gate ! 76447c478bd9Sstevel@tonic-gate wr %g0, ASI_P, %asi 76457c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o5 76467c478bd9Sstevel@tonic-gate sethi %hi(.zeroerr), %o2 76477c478bd9Sstevel@tonic-gate or %o5, LOFAULT_SET, %o5 76487c478bd9Sstevel@tonic-gate or %o2, %lo(.zeroerr), %o2 76497c478bd9Sstevel@tonic-gate membar #Sync 76507c478bd9Sstevel@tonic-gate ba,pt %ncc, .do_zero 76517c478bd9Sstevel@tonic-gate stn %o2, [THREAD_REG + T_LOFAULT] 76527c478bd9Sstevel@tonic-gate 76537c478bd9Sstevel@tonic-gate/* 76547c478bd9Sstevel@tonic-gate * We got here because of a fault during kzero or if 76557c478bd9Sstevel@tonic-gate * uzero or bzero was called with t_lofault non-zero. 76567c478bd9Sstevel@tonic-gate * Otherwise we've already run screaming from the room. 76577c478bd9Sstevel@tonic-gate * Errno value is in %g1. Note that we're here iff 76587c478bd9Sstevel@tonic-gate * we did set t_lofault. 76597c478bd9Sstevel@tonic-gate */ 76607c478bd9Sstevel@tonic-gate.zeroerr: 76617c478bd9Sstevel@tonic-gate ! 76627c478bd9Sstevel@tonic-gate ! Undo asi register setting. Just set it to be the 76637c478bd9Sstevel@tonic-gate ! kernel default without checking. 76647c478bd9Sstevel@tonic-gate ! 76657c478bd9Sstevel@tonic-gate wr %g0, ASI_P, %asi 76667c478bd9Sstevel@tonic-gate 76677c478bd9Sstevel@tonic-gate ! 76687c478bd9Sstevel@tonic-gate ! We did set t_lofault. It may well have been zero coming in. 76697c478bd9Sstevel@tonic-gate ! 76707c478bd9Sstevel@tonic-gate1: 76717c478bd9Sstevel@tonic-gate tst %o5 76727c478bd9Sstevel@tonic-gate membar #Sync 76737c478bd9Sstevel@tonic-gate bne,pn %ncc, 3f 76747c478bd9Sstevel@tonic-gate andncc %o5, LOFAULT_SET, %o5 76757c478bd9Sstevel@tonic-gate2: 76767c478bd9Sstevel@tonic-gate ! 76777c478bd9Sstevel@tonic-gate ! Old handler was zero. Just return the error. 76787c478bd9Sstevel@tonic-gate ! 76797c478bd9Sstevel@tonic-gate retl ! return 76807c478bd9Sstevel@tonic-gate mov %g1, %o0 ! error code from %g1 76817c478bd9Sstevel@tonic-gate3: 76827c478bd9Sstevel@tonic-gate ! 76837c478bd9Sstevel@tonic-gate ! We're here because %o5 was non-zero. It was non-zero 76847c478bd9Sstevel@tonic-gate ! because either LOFAULT_SET was present, a previous fault 76857c478bd9Sstevel@tonic-gate ! handler was present or both. In all cases we need to reset 76867c478bd9Sstevel@tonic-gate ! T_LOFAULT to the value of %o5 after clearing LOFAULT_SET 76877c478bd9Sstevel@tonic-gate ! before we either simply return the error or we invoke the 76887c478bd9Sstevel@tonic-gate ! previously specified handler. 76897c478bd9Sstevel@tonic-gate ! 76907c478bd9Sstevel@tonic-gate be %ncc, 2b 76917c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] 76927c478bd9Sstevel@tonic-gate jmp %o5 ! goto real handler 76937c478bd9Sstevel@tonic-gate nop 76947c478bd9Sstevel@tonic-gate SET_SIZE(kzero) 76957c478bd9Sstevel@tonic-gate SET_SIZE(uzero) 76967c478bd9Sstevel@tonic-gate 76977c478bd9Sstevel@tonic-gate#endif /* lint */ 76987c478bd9Sstevel@tonic-gate 76997c478bd9Sstevel@tonic-gate/* 77007c478bd9Sstevel@tonic-gate * Zero a block of storage. 77017c478bd9Sstevel@tonic-gate */ 77027c478bd9Sstevel@tonic-gate 77037c478bd9Sstevel@tonic-gate#if defined(lint) 77047c478bd9Sstevel@tonic-gate 77057c478bd9Sstevel@tonic-gate/* ARGSUSED */ 77067c478bd9Sstevel@tonic-gatevoid 77077c478bd9Sstevel@tonic-gatebzero(void *addr, size_t count) 77087c478bd9Sstevel@tonic-gate{} 77097c478bd9Sstevel@tonic-gate 77107c478bd9Sstevel@tonic-gate#else /* lint */ 77117c478bd9Sstevel@tonic-gate 77127c478bd9Sstevel@tonic-gate ENTRY(bzero) 77137c478bd9Sstevel@tonic-gate wr %g0, ASI_P, %asi 77147c478bd9Sstevel@tonic-gate 77157c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o5 ! save old vector 77167c478bd9Sstevel@tonic-gate tst %o5 77177c478bd9Sstevel@tonic-gate bz,pt %ncc, .do_zero 77187c478bd9Sstevel@tonic-gate sethi %hi(.zeroerr), %o2 77197c478bd9Sstevel@tonic-gate or %o2, %lo(.zeroerr), %o2 77207c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 77217c478bd9Sstevel@tonic-gate stn %o2, [THREAD_REG + T_LOFAULT] ! install new vector 77227c478bd9Sstevel@tonic-gate 77237c478bd9Sstevel@tonic-gate.do_zero: 77247c478bd9Sstevel@tonic-gate cmp %o1, 7 77257c478bd9Sstevel@tonic-gate blu,pn %ncc, .byteclr 77267c478bd9Sstevel@tonic-gate nop 77277c478bd9Sstevel@tonic-gate 77287c478bd9Sstevel@tonic-gate cmp %o1, 15 77297c478bd9Sstevel@tonic-gate blu,pn %ncc, .wdalign 77307c478bd9Sstevel@tonic-gate nop 77317c478bd9Sstevel@tonic-gate 77327c478bd9Sstevel@tonic-gate andcc %o0, 7, %o3 ! is add aligned on a 8 byte bound 77337c478bd9Sstevel@tonic-gate bz,pt %ncc, .blkalign ! already double aligned 77347c478bd9Sstevel@tonic-gate sub %o3, 8, %o3 ! -(bytes till double aligned) 77357c478bd9Sstevel@tonic-gate add %o1, %o3, %o1 ! update o1 with new count 77367c478bd9Sstevel@tonic-gate 77377c478bd9Sstevel@tonic-gate1: 77387c478bd9Sstevel@tonic-gate stba %g0, [%o0]%asi 77397c478bd9Sstevel@tonic-gate inccc %o3 77407c478bd9Sstevel@tonic-gate bl,pt %ncc, 1b 77417c478bd9Sstevel@tonic-gate inc %o0 77427c478bd9Sstevel@tonic-gate 77437c478bd9Sstevel@tonic-gate ! Now address is double aligned 77447c478bd9Sstevel@tonic-gate.blkalign: 77457c478bd9Sstevel@tonic-gate cmp %o1, 0x80 ! check if there are 128 bytes to set 77467c478bd9Sstevel@tonic-gate blu,pn %ncc, .bzero_small 77477c478bd9Sstevel@tonic-gate mov %o1, %o3 77487c478bd9Sstevel@tonic-gate 77497c478bd9Sstevel@tonic-gate sethi %hi(use_hw_bzero), %o2 77507c478bd9Sstevel@tonic-gate ld [%o2 + %lo(use_hw_bzero)], %o2 77517c478bd9Sstevel@tonic-gate tst %o2 77527c478bd9Sstevel@tonic-gate bz %ncc, .bzero_small 77537c478bd9Sstevel@tonic-gate mov %o1, %o3 77547c478bd9Sstevel@tonic-gate 77557c478bd9Sstevel@tonic-gate rd %asi, %o3 77567c478bd9Sstevel@tonic-gate wr %g0, ASI_BLK_INIT_ST_QUAD_LDD_P, %asi 77577c478bd9Sstevel@tonic-gate cmp %o3, ASI_P 77587c478bd9Sstevel@tonic-gate bne,a %ncc, .algnblk 77597c478bd9Sstevel@tonic-gate wr %g0, ASI_BLK_INIT_QUAD_LDD_AIUS, %asi 77607c478bd9Sstevel@tonic-gate 77617c478bd9Sstevel@tonic-gate.algnblk: 77627c478bd9Sstevel@tonic-gate andcc %o0, 0x3f, %o3 ! is block aligned? 77637c478bd9Sstevel@tonic-gate bz,pt %ncc, .bzero_blk 77647c478bd9Sstevel@tonic-gate sub %o3, 0x40, %o3 ! -(bytes till block aligned) 77657c478bd9Sstevel@tonic-gate add %o1, %o3, %o1 ! o1 is the remainder 77667c478bd9Sstevel@tonic-gate 77677c478bd9Sstevel@tonic-gate ! Clear -(%o3) bytes till block aligned 77687c478bd9Sstevel@tonic-gate1: 77697c478bd9Sstevel@tonic-gate stxa %g0, [%o0]%asi 77707c478bd9Sstevel@tonic-gate addcc %o3, 8, %o3 77717c478bd9Sstevel@tonic-gate bl,pt %ncc, 1b 77727c478bd9Sstevel@tonic-gate add %o0, 8, %o0 77737c478bd9Sstevel@tonic-gate 77747c478bd9Sstevel@tonic-gate.bzero_blk: 77757c478bd9Sstevel@tonic-gate and %o1, 0x3f, %o3 ! calc bytes left after blk clear 77767c478bd9Sstevel@tonic-gate andn %o1, 0x3f, %o4 ! calc size of blocks in bytes 77777c478bd9Sstevel@tonic-gate 77787c478bd9Sstevel@tonic-gate cmp %o4, 0x100 ! 256 bytes or more 77797c478bd9Sstevel@tonic-gate blu,pn %ncc, 3f 77807c478bd9Sstevel@tonic-gate nop 77817c478bd9Sstevel@tonic-gate 77827c478bd9Sstevel@tonic-gate2: 77837c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x0]%asi 77847c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x40]%asi 77857c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x80]%asi 77867c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0xc0]%asi 77877c478bd9Sstevel@tonic-gate 77887c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x8]%asi 77897c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x10]%asi 77907c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x18]%asi 77917c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x20]%asi 77927c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x28]%asi 77937c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x30]%asi 77947c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x38]%asi 77957c478bd9Sstevel@tonic-gate 77967c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x48]%asi 77977c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x50]%asi 77987c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x58]%asi 77997c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x60]%asi 78007c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x68]%asi 78017c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x70]%asi 78027c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x78]%asi 78037c478bd9Sstevel@tonic-gate 78047c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x88]%asi 78057c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x90]%asi 78067c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x98]%asi 78077c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0xa0]%asi 78087c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0xa8]%asi 78097c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0xb0]%asi 78107c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0xb8]%asi 78117c478bd9Sstevel@tonic-gate 78127c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0xc8]%asi 78137c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0xd0]%asi 78147c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0xd8]%asi 78157c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0xe0]%asi 78167c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0xe8]%asi 78177c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0xf0]%asi 78187c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0xf8]%asi 78197c478bd9Sstevel@tonic-gate 78207c478bd9Sstevel@tonic-gate sub %o4, 0x100, %o4 78217c478bd9Sstevel@tonic-gate cmp %o4, 0x100 78227c478bd9Sstevel@tonic-gate bgu,pt %ncc, 2b 78237c478bd9Sstevel@tonic-gate add %o0, 0x100, %o0 78247c478bd9Sstevel@tonic-gate 78257c478bd9Sstevel@tonic-gate3: 78267c478bd9Sstevel@tonic-gate ! ... check if 64 bytes to set 78277c478bd9Sstevel@tonic-gate cmp %o4, 0x40 78287c478bd9Sstevel@tonic-gate blu %ncc, .bzero_blk_done 78297c478bd9Sstevel@tonic-gate nop 78307c478bd9Sstevel@tonic-gate 78317c478bd9Sstevel@tonic-gate4: 78327c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x0]%asi 78337c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x8]%asi 78347c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x10]%asi 78357c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x18]%asi 78367c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x20]%asi 78377c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x28]%asi 78387c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x30]%asi 78397c478bd9Sstevel@tonic-gate stxa %g0, [%o0+0x38]%asi 78407c478bd9Sstevel@tonic-gate 78417c478bd9Sstevel@tonic-gate subcc %o4, 0x40, %o4 78427c478bd9Sstevel@tonic-gate bgu,pt %ncc, 3b 78437c478bd9Sstevel@tonic-gate add %o0, 0x40, %o0 78447c478bd9Sstevel@tonic-gate 78457c478bd9Sstevel@tonic-gate.bzero_blk_done: 78467c478bd9Sstevel@tonic-gate membar #Sync 78477c478bd9Sstevel@tonic-gate ! 78487c478bd9Sstevel@tonic-gate ! Undo asi register setting. 78497c478bd9Sstevel@tonic-gate ! 78507c478bd9Sstevel@tonic-gate rd %asi, %o4 78517c478bd9Sstevel@tonic-gate wr %g0, ASI_P, %asi 78527c478bd9Sstevel@tonic-gate cmp %o4, ASI_BLK_INIT_ST_QUAD_LDD_P 78537c478bd9Sstevel@tonic-gate bne,a %ncc, .bzero_small 78547c478bd9Sstevel@tonic-gate wr %g0, ASI_USER, %asi 78557c478bd9Sstevel@tonic-gate 78567c478bd9Sstevel@tonic-gate.bzero_small: 78577c478bd9Sstevel@tonic-gate ! Set the remaining doubles 78587c478bd9Sstevel@tonic-gate subcc %o3, 8, %o3 ! Can we store any doubles? 78597c478bd9Sstevel@tonic-gate blu,pn %ncc, .byteclr 78607c478bd9Sstevel@tonic-gate and %o1, 7, %o1 ! calc bytes left after doubles 78617c478bd9Sstevel@tonic-gate 78627c478bd9Sstevel@tonic-gate.dbclr: 78637c478bd9Sstevel@tonic-gate stxa %g0, [%o0]%asi ! Clear the doubles 78647c478bd9Sstevel@tonic-gate subcc %o3, 8, %o3 78657c478bd9Sstevel@tonic-gate bgeu,pt %ncc, .dbclr 78667c478bd9Sstevel@tonic-gate add %o0, 8, %o0 78677c478bd9Sstevel@tonic-gate 78687c478bd9Sstevel@tonic-gate ba .byteclr 78697c478bd9Sstevel@tonic-gate nop 78707c478bd9Sstevel@tonic-gate 78717c478bd9Sstevel@tonic-gate.wdalign: 78727c478bd9Sstevel@tonic-gate andcc %o0, 3, %o3 ! is add aligned on a word boundary 78737c478bd9Sstevel@tonic-gate bz,pn %ncc, .wdclr 78747c478bd9Sstevel@tonic-gate andn %o1, 3, %o3 ! create word sized count in %o3 78757c478bd9Sstevel@tonic-gate 78767c478bd9Sstevel@tonic-gate dec %o1 ! decrement count 78777c478bd9Sstevel@tonic-gate stba %g0, [%o0]%asi ! clear a byte 78787c478bd9Sstevel@tonic-gate ba .wdalign 78797c478bd9Sstevel@tonic-gate inc %o0 ! next byte 78807c478bd9Sstevel@tonic-gate 78817c478bd9Sstevel@tonic-gate.wdclr: 78827c478bd9Sstevel@tonic-gate sta %g0, [%o0]%asi ! 4-byte clearing loop 78837c478bd9Sstevel@tonic-gate subcc %o3, 4, %o3 78847c478bd9Sstevel@tonic-gate bnz,pt %ncc, .wdclr 78857c478bd9Sstevel@tonic-gate inc 4, %o0 78867c478bd9Sstevel@tonic-gate 78877c478bd9Sstevel@tonic-gate and %o1, 3, %o1 ! leftover count, if any 78887c478bd9Sstevel@tonic-gate 78897c478bd9Sstevel@tonic-gate.byteclr: 78907c478bd9Sstevel@tonic-gate ! Set the leftover bytes 78917c478bd9Sstevel@tonic-gate brz %o1, .bzero_exit 78927c478bd9Sstevel@tonic-gate nop 78937c478bd9Sstevel@tonic-gate 78947c478bd9Sstevel@tonic-gate7: 78957c478bd9Sstevel@tonic-gate deccc %o1 ! byte clearing loop 78967c478bd9Sstevel@tonic-gate stba %g0, [%o0]%asi 78977c478bd9Sstevel@tonic-gate bgu,pt %ncc, 7b 78987c478bd9Sstevel@tonic-gate inc %o0 78997c478bd9Sstevel@tonic-gate 79007c478bd9Sstevel@tonic-gate.bzero_exit: 79017c478bd9Sstevel@tonic-gate ! 79027c478bd9Sstevel@tonic-gate ! We're just concerned with whether t_lofault was set 79037c478bd9Sstevel@tonic-gate ! when we came in. We end up here from either kzero() 79047c478bd9Sstevel@tonic-gate ! or bzero(). kzero() *always* sets a lofault handler. 79057c478bd9Sstevel@tonic-gate ! It ors LOFAULT_SET into %o5 to indicate it has done 79067c478bd9Sstevel@tonic-gate ! this even if the value of %o5 is otherwise zero. 79077c478bd9Sstevel@tonic-gate ! bzero() sets a lofault handler *only* if one was 79087c478bd9Sstevel@tonic-gate ! previously set. Accordingly we need to examine 79097c478bd9Sstevel@tonic-gate ! %o5 and if it is non-zero be sure to clear LOFAULT_SET 79107c478bd9Sstevel@tonic-gate ! before resetting the error handler. 79117c478bd9Sstevel@tonic-gate ! 79127c478bd9Sstevel@tonic-gate tst %o5 79137c478bd9Sstevel@tonic-gate bz %ncc, 1f 79147c478bd9Sstevel@tonic-gate andn %o5, LOFAULT_SET, %o5 79157c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 79167c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 79177c478bd9Sstevel@tonic-gate1: 79187c478bd9Sstevel@tonic-gate retl 79197c478bd9Sstevel@tonic-gate clr %o0 ! return (0) 79207c478bd9Sstevel@tonic-gate 79217c478bd9Sstevel@tonic-gate SET_SIZE(bzero) 79227c478bd9Sstevel@tonic-gate#endif /* lint */ 7923