1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22/* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27#include <sys/param.h> 28#include <sys/errno.h> 29#include <sys/asm_linkage.h> 30#include <sys/vtrace.h> 31#include <sys/machthread.h> 32#include <sys/clock.h> 33#include <sys/asi.h> 34#include <sys/fsr.h> 35#include <sys/privregs.h> 36 37#include "assym.h" 38 39 40/* 41 * Pseudo-code to aid in understanding the control flow of the 42 * bcopy routine. 43 * 44 * On entry to bcopy: 45 * 46 * %l6 = curthread->t_lofault; 47 * used_block_copy = FALSE; ! %l6 |= 1 48 * if (%l6 != NULL) { 49 * curthread->t_lofault = .copyerr; 50 * caller_error_handler = TRUE ! %l6 |= 2 51 * } 52 * 53 * if (length < VIS_COPY) 54 * goto regular_copy; 55 * 56 * if (!use_vis) 57 * goto_regular_copy; 58 * 59 * if (curthread->t_lwp == NULL) { 60 * ! Kernel threads do not have pcb's in which to store 61 * ! the floating point state, disallow preemption during 62 * ! the copy. 63 * kpreempt_disable(curthread); 64 * } 65 * 66 * old_fprs = %fprs; 67 * old_gsr = %gsr; 68 * if (%fprs.fef) { 69 * ! If we need to save 4 blocks of fpregs then make sure 70 * ! the length is still appropriate for that extra overhead. 71 * if (length < (large_length + (64 * 4))) { 72 * if (curthread->t_lwp == NULL) 73 * kpreempt_enable(curthread); 74 * goto regular_copy; 75 * } 76 * %fprs.fef = 1; 77 * save current fpregs on stack using blockstore 78 * } else { 79 * %fprs.fef = 1; 80 * } 81 * 82 * used_block_copy = 1; ! %l6 |= 1 83 * do_blockcopy_here; 84 * 85 * In lofault handler: 86 * curthread->t_lofault = .copyerr2; 87 * Continue on with the normal exit handler 88 * 89 * On exit: 90 * call_kpreempt = 0; 91 * if (used_block_copy) { ! %l6 & 1 92 * %gsr = old_gsr; 93 * if (old_fprs & FPRS_FEF) 94 * restore fpregs from stack using blockload 95 * else 96 * zero fpregs 97 * %fprs = old_fprs; 98 * if (curthread->t_lwp == NULL) { 99 * kpreempt_enable(curthread); 100 * call_kpreempt = 1; 101 * } 102 * } 103 * curthread->t_lofault = (%l6 & ~3); 104 * if (call_kpreempt) 105 * kpreempt(%pil); 106 * return (0) 107 * 108 * In second lofault handler (.copyerr2): 109 * We've tried to restore fp state from the stack and failed. To 110 * prevent from returning with a corrupted fp state, we will panic. 111 */ 112 113/* 114 * Notes on preserving existing fp state: 115 * 116 * When a copyOP decides to use fp we may have to preserve existing 117 * floating point state. It is not the caller's state that we need to 118 * preserve - the rest of the kernel does not use fp and, anyway, fp 119 * registers are volatile across a call. Some examples: 120 * 121 * - userland has fp state and is interrupted (device interrupt 122 * or trap) and within the interrupt/trap handling we use 123 * bcopy() 124 * - another (higher level) interrupt or trap handler uses bcopy 125 * while a bcopy from an earlier interrupt is still active 126 * - an asynchronous error trap occurs while fp state exists (in 127 * userland or in kernel copy) and the tl0 component of the handling 128 * uses bcopy 129 * - a user process with fp state incurs a copy-on-write fault and 130 * hwblkpagecopy always uses fp 131 * 132 * We therefore need a per-call place in which to preserve fp state - 133 * using our stack is ideal (and since fp copy cannot be leaf optimized 134 * because of calls it makes, this is no hardship). 135 * 136 * To make sure that floating point state is always saved and restored 137 * correctly, the following "big rules" must be followed when the floating 138 * point registers will be used: 139 * 140 * 1. %l6 always holds the caller's lofault handler. Also in this register, 141 * Bit 1 (FPUSED_FLAG) indicates that the floating point registers are in 142 * use. Bit 2 (BCOPY_FLAG) indicates that the call was to bcopy. 143 * 144 * 2. The FPUSED flag indicates that all FP state has been successfully stored 145 * on the stack. It should not be set until this save has been completed. 146 * 147 * 3. The FPUSED flag should not be cleared on exit until all FP state has 148 * been restored from the stack. If an error occurs while restoring 149 * data from the stack, the error handler can check this flag to see if 150 * a restore is necessary. 151 * 152 * 4. Code run under the new lofault handler must be kept to a minimum. In 153 * particular, any calls to kpreempt() should not be made until after the 154 * lofault handler has been restored. 155 */ 156 157/* 158 * This shadows sys/machsystm.h which can't be included due to the lack of 159 * _ASM guards in include files it references. Change it here, change it there. 160 */ 161#define VIS_COPY_THRESHOLD 900 162 163/* 164 * Less then or equal this number of bytes we will always copy byte-for-byte 165 */ 166#define SMALL_LIMIT 7 167 168/* 169 * Flags set in the lower bits of the t_lofault address: 170 * FPUSED_FLAG: The FP registers were in use and must be restored 171 * BCOPY_FLAG: Set for bcopy calls, cleared for kcopy calls 172 * COPY_FLAGS: Both of the above 173 * 174 * Other flags: 175 * KPREEMPT_FLAG: kpreempt needs to be called 176 */ 177#define FPUSED_FLAG 1 178#define BCOPY_FLAG 2 179#define COPY_FLAGS (FPUSED_FLAG | BCOPY_FLAG) 180#define KPREEMPT_FLAG 4 181 182/* 183 * Size of stack frame in order to accomodate a 64-byte aligned 184 * floating-point register save area and 2 32-bit temp locations. 185 */ 186#define HWCOPYFRAMESIZE ((64 * 5) + (2 * 4)) 187 188#define SAVED_FPREGS_OFFSET (64 * 5) 189#define SAVED_FPRS_OFFSET (SAVED_FPREGS_OFFSET + 4) 190#define SAVED_GSR_OFFSET (SAVED_FPRS_OFFSET + 4) 191 192/* 193 * Common macros used by the various versions of the block copy 194 * routines in this file. 195 */ 196 197#define FZERO \ 198 fzero %f0 ;\ 199 fzero %f2 ;\ 200 faddd %f0, %f2, %f4 ;\ 201 fmuld %f0, %f2, %f6 ;\ 202 faddd %f0, %f2, %f8 ;\ 203 fmuld %f0, %f2, %f10 ;\ 204 faddd %f0, %f2, %f12 ;\ 205 fmuld %f0, %f2, %f14 ;\ 206 faddd %f0, %f2, %f16 ;\ 207 fmuld %f0, %f2, %f18 ;\ 208 faddd %f0, %f2, %f20 ;\ 209 fmuld %f0, %f2, %f22 ;\ 210 faddd %f0, %f2, %f24 ;\ 211 fmuld %f0, %f2, %f26 ;\ 212 faddd %f0, %f2, %f28 ;\ 213 fmuld %f0, %f2, %f30 ;\ 214 faddd %f0, %f2, %f32 ;\ 215 fmuld %f0, %f2, %f34 ;\ 216 faddd %f0, %f2, %f36 ;\ 217 fmuld %f0, %f2, %f38 ;\ 218 faddd %f0, %f2, %f40 ;\ 219 fmuld %f0, %f2, %f42 ;\ 220 faddd %f0, %f2, %f44 ;\ 221 fmuld %f0, %f2, %f46 ;\ 222 faddd %f0, %f2, %f48 ;\ 223 fmuld %f0, %f2, %f50 ;\ 224 faddd %f0, %f2, %f52 ;\ 225 fmuld %f0, %f2, %f54 ;\ 226 faddd %f0, %f2, %f56 ;\ 227 fmuld %f0, %f2, %f58 ;\ 228 faddd %f0, %f2, %f60 ;\ 229 fmuld %f0, %f2, %f62 230 231 232#define FALIGN_D0 \ 233 faligndata %d0, %d2, %d48 ;\ 234 faligndata %d2, %d4, %d50 ;\ 235 faligndata %d4, %d6, %d52 ;\ 236 faligndata %d6, %d8, %d54 ;\ 237 faligndata %d8, %d10, %d56 ;\ 238 faligndata %d10, %d12, %d58 ;\ 239 faligndata %d12, %d14, %d60 ;\ 240 faligndata %d14, %d16, %d62 241 242#define FALIGN_D16 \ 243 faligndata %d16, %d18, %d48 ;\ 244 faligndata %d18, %d20, %d50 ;\ 245 faligndata %d20, %d22, %d52 ;\ 246 faligndata %d22, %d24, %d54 ;\ 247 faligndata %d24, %d26, %d56 ;\ 248 faligndata %d26, %d28, %d58 ;\ 249 faligndata %d28, %d30, %d60 ;\ 250 faligndata %d30, %d32, %d62 251 252#define FALIGN_D32 \ 253 faligndata %d32, %d34, %d48 ;\ 254 faligndata %d34, %d36, %d50 ;\ 255 faligndata %d36, %d38, %d52 ;\ 256 faligndata %d38, %d40, %d54 ;\ 257 faligndata %d40, %d42, %d56 ;\ 258 faligndata %d42, %d44, %d58 ;\ 259 faligndata %d44, %d46, %d60 ;\ 260 faligndata %d46, %d0, %d62 261 262#define FALIGN_D2 \ 263 faligndata %d2, %d4, %d48 ;\ 264 faligndata %d4, %d6, %d50 ;\ 265 faligndata %d6, %d8, %d52 ;\ 266 faligndata %d8, %d10, %d54 ;\ 267 faligndata %d10, %d12, %d56 ;\ 268 faligndata %d12, %d14, %d58 ;\ 269 faligndata %d14, %d16, %d60 ;\ 270 faligndata %d16, %d18, %d62 271 272#define FALIGN_D18 \ 273 faligndata %d18, %d20, %d48 ;\ 274 faligndata %d20, %d22, %d50 ;\ 275 faligndata %d22, %d24, %d52 ;\ 276 faligndata %d24, %d26, %d54 ;\ 277 faligndata %d26, %d28, %d56 ;\ 278 faligndata %d28, %d30, %d58 ;\ 279 faligndata %d30, %d32, %d60 ;\ 280 faligndata %d32, %d34, %d62 281 282#define FALIGN_D34 \ 283 faligndata %d34, %d36, %d48 ;\ 284 faligndata %d36, %d38, %d50 ;\ 285 faligndata %d38, %d40, %d52 ;\ 286 faligndata %d40, %d42, %d54 ;\ 287 faligndata %d42, %d44, %d56 ;\ 288 faligndata %d44, %d46, %d58 ;\ 289 faligndata %d46, %d0, %d60 ;\ 290 faligndata %d0, %d2, %d62 291 292#define FALIGN_D4 \ 293 faligndata %d4, %d6, %d48 ;\ 294 faligndata %d6, %d8, %d50 ;\ 295 faligndata %d8, %d10, %d52 ;\ 296 faligndata %d10, %d12, %d54 ;\ 297 faligndata %d12, %d14, %d56 ;\ 298 faligndata %d14, %d16, %d58 ;\ 299 faligndata %d16, %d18, %d60 ;\ 300 faligndata %d18, %d20, %d62 301 302#define FALIGN_D20 \ 303 faligndata %d20, %d22, %d48 ;\ 304 faligndata %d22, %d24, %d50 ;\ 305 faligndata %d24, %d26, %d52 ;\ 306 faligndata %d26, %d28, %d54 ;\ 307 faligndata %d28, %d30, %d56 ;\ 308 faligndata %d30, %d32, %d58 ;\ 309 faligndata %d32, %d34, %d60 ;\ 310 faligndata %d34, %d36, %d62 311 312#define FALIGN_D36 \ 313 faligndata %d36, %d38, %d48 ;\ 314 faligndata %d38, %d40, %d50 ;\ 315 faligndata %d40, %d42, %d52 ;\ 316 faligndata %d42, %d44, %d54 ;\ 317 faligndata %d44, %d46, %d56 ;\ 318 faligndata %d46, %d0, %d58 ;\ 319 faligndata %d0, %d2, %d60 ;\ 320 faligndata %d2, %d4, %d62 321 322#define FALIGN_D6 \ 323 faligndata %d6, %d8, %d48 ;\ 324 faligndata %d8, %d10, %d50 ;\ 325 faligndata %d10, %d12, %d52 ;\ 326 faligndata %d12, %d14, %d54 ;\ 327 faligndata %d14, %d16, %d56 ;\ 328 faligndata %d16, %d18, %d58 ;\ 329 faligndata %d18, %d20, %d60 ;\ 330 faligndata %d20, %d22, %d62 331 332#define FALIGN_D22 \ 333 faligndata %d22, %d24, %d48 ;\ 334 faligndata %d24, %d26, %d50 ;\ 335 faligndata %d26, %d28, %d52 ;\ 336 faligndata %d28, %d30, %d54 ;\ 337 faligndata %d30, %d32, %d56 ;\ 338 faligndata %d32, %d34, %d58 ;\ 339 faligndata %d34, %d36, %d60 ;\ 340 faligndata %d36, %d38, %d62 341 342#define FALIGN_D38 \ 343 faligndata %d38, %d40, %d48 ;\ 344 faligndata %d40, %d42, %d50 ;\ 345 faligndata %d42, %d44, %d52 ;\ 346 faligndata %d44, %d46, %d54 ;\ 347 faligndata %d46, %d0, %d56 ;\ 348 faligndata %d0, %d2, %d58 ;\ 349 faligndata %d2, %d4, %d60 ;\ 350 faligndata %d4, %d6, %d62 351 352#define FALIGN_D8 \ 353 faligndata %d8, %d10, %d48 ;\ 354 faligndata %d10, %d12, %d50 ;\ 355 faligndata %d12, %d14, %d52 ;\ 356 faligndata %d14, %d16, %d54 ;\ 357 faligndata %d16, %d18, %d56 ;\ 358 faligndata %d18, %d20, %d58 ;\ 359 faligndata %d20, %d22, %d60 ;\ 360 faligndata %d22, %d24, %d62 361 362#define FALIGN_D24 \ 363 faligndata %d24, %d26, %d48 ;\ 364 faligndata %d26, %d28, %d50 ;\ 365 faligndata %d28, %d30, %d52 ;\ 366 faligndata %d30, %d32, %d54 ;\ 367 faligndata %d32, %d34, %d56 ;\ 368 faligndata %d34, %d36, %d58 ;\ 369 faligndata %d36, %d38, %d60 ;\ 370 faligndata %d38, %d40, %d62 371 372#define FALIGN_D40 \ 373 faligndata %d40, %d42, %d48 ;\ 374 faligndata %d42, %d44, %d50 ;\ 375 faligndata %d44, %d46, %d52 ;\ 376 faligndata %d46, %d0, %d54 ;\ 377 faligndata %d0, %d2, %d56 ;\ 378 faligndata %d2, %d4, %d58 ;\ 379 faligndata %d4, %d6, %d60 ;\ 380 faligndata %d6, %d8, %d62 381 382#define FALIGN_D10 \ 383 faligndata %d10, %d12, %d48 ;\ 384 faligndata %d12, %d14, %d50 ;\ 385 faligndata %d14, %d16, %d52 ;\ 386 faligndata %d16, %d18, %d54 ;\ 387 faligndata %d18, %d20, %d56 ;\ 388 faligndata %d20, %d22, %d58 ;\ 389 faligndata %d22, %d24, %d60 ;\ 390 faligndata %d24, %d26, %d62 391 392#define FALIGN_D26 \ 393 faligndata %d26, %d28, %d48 ;\ 394 faligndata %d28, %d30, %d50 ;\ 395 faligndata %d30, %d32, %d52 ;\ 396 faligndata %d32, %d34, %d54 ;\ 397 faligndata %d34, %d36, %d56 ;\ 398 faligndata %d36, %d38, %d58 ;\ 399 faligndata %d38, %d40, %d60 ;\ 400 faligndata %d40, %d42, %d62 401 402#define FALIGN_D42 \ 403 faligndata %d42, %d44, %d48 ;\ 404 faligndata %d44, %d46, %d50 ;\ 405 faligndata %d46, %d0, %d52 ;\ 406 faligndata %d0, %d2, %d54 ;\ 407 faligndata %d2, %d4, %d56 ;\ 408 faligndata %d4, %d6, %d58 ;\ 409 faligndata %d6, %d8, %d60 ;\ 410 faligndata %d8, %d10, %d62 411 412#define FALIGN_D12 \ 413 faligndata %d12, %d14, %d48 ;\ 414 faligndata %d14, %d16, %d50 ;\ 415 faligndata %d16, %d18, %d52 ;\ 416 faligndata %d18, %d20, %d54 ;\ 417 faligndata %d20, %d22, %d56 ;\ 418 faligndata %d22, %d24, %d58 ;\ 419 faligndata %d24, %d26, %d60 ;\ 420 faligndata %d26, %d28, %d62 421 422#define FALIGN_D28 \ 423 faligndata %d28, %d30, %d48 ;\ 424 faligndata %d30, %d32, %d50 ;\ 425 faligndata %d32, %d34, %d52 ;\ 426 faligndata %d34, %d36, %d54 ;\ 427 faligndata %d36, %d38, %d56 ;\ 428 faligndata %d38, %d40, %d58 ;\ 429 faligndata %d40, %d42, %d60 ;\ 430 faligndata %d42, %d44, %d62 431 432#define FALIGN_D44 \ 433 faligndata %d44, %d46, %d48 ;\ 434 faligndata %d46, %d0, %d50 ;\ 435 faligndata %d0, %d2, %d52 ;\ 436 faligndata %d2, %d4, %d54 ;\ 437 faligndata %d4, %d6, %d56 ;\ 438 faligndata %d6, %d8, %d58 ;\ 439 faligndata %d8, %d10, %d60 ;\ 440 faligndata %d10, %d12, %d62 441 442#define FALIGN_D14 \ 443 faligndata %d14, %d16, %d48 ;\ 444 faligndata %d16, %d18, %d50 ;\ 445 faligndata %d18, %d20, %d52 ;\ 446 faligndata %d20, %d22, %d54 ;\ 447 faligndata %d22, %d24, %d56 ;\ 448 faligndata %d24, %d26, %d58 ;\ 449 faligndata %d26, %d28, %d60 ;\ 450 faligndata %d28, %d30, %d62 451 452#define FALIGN_D30 \ 453 faligndata %d30, %d32, %d48 ;\ 454 faligndata %d32, %d34, %d50 ;\ 455 faligndata %d34, %d36, %d52 ;\ 456 faligndata %d36, %d38, %d54 ;\ 457 faligndata %d38, %d40, %d56 ;\ 458 faligndata %d40, %d42, %d58 ;\ 459 faligndata %d42, %d44, %d60 ;\ 460 faligndata %d44, %d46, %d62 461 462#define FALIGN_D46 \ 463 faligndata %d46, %d0, %d48 ;\ 464 faligndata %d0, %d2, %d50 ;\ 465 faligndata %d2, %d4, %d52 ;\ 466 faligndata %d4, %d6, %d54 ;\ 467 faligndata %d6, %d8, %d56 ;\ 468 faligndata %d8, %d10, %d58 ;\ 469 faligndata %d10, %d12, %d60 ;\ 470 faligndata %d12, %d14, %d62 471 472 473/* 474 * Copy a block of storage, returning an error code if `from' or 475 * `to' takes a kernel pagefault which cannot be resolved. 476 * Returns errno value on pagefault error, 0 if all ok 477 */ 478 479 480 481 .seg ".text" 482 .align 4 483 484 ENTRY(kcopy) 485 486 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 487 set .copyerr, %l6 ! copyerr is lofault value 488 ldn [THREAD_REG + T_LOFAULT], %l7 ! save existing handler 489 membar #Sync ! sync error barrier (see copy.s) 490 stn %l6, [THREAD_REG + T_LOFAULT] ! set t_lofault 491 ! 492 ! Note that we carefully do *not* flag the setting of 493 ! t_lofault. 494 ! 495 ba,pt %ncc, .do_copy ! common code 496 mov %l7, %l6 497 498/* 499 * We got here because of a fault during kcopy or bcopy if a fault 500 * handler existed when bcopy was called. 501 * Errno value is in %g1. 502 */ 503.copyerr: 504 set .copyerr2, %l1 505 membar #Sync ! sync error barrier 506 stn %l1, [THREAD_REG + T_LOFAULT] ! set t_lofault 507 btst FPUSED_FLAG, %l6 508 bz %icc, 1f 509 and %l6, BCOPY_FLAG, %l1 ! copy flag to %l1 510 511 membar #Sync 512 513 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr 514 wr %o2, 0, %gsr 515 516 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 517 btst FPRS_FEF, %o3 518 bz %icc, 4f 519 nop 520 521 ! restore fpregs from stack 522 membar #Sync 523 add %fp, STACK_BIAS - 257, %o2 524 and %o2, -64, %o2 525 ldda [%o2]ASI_BLK_P, %d0 526 add %o2, 64, %o2 527 ldda [%o2]ASI_BLK_P, %d16 528 add %o2, 64, %o2 529 ldda [%o2]ASI_BLK_P, %d32 530 add %o2, 64, %o2 531 ldda [%o2]ASI_BLK_P, %d48 532 membar #Sync 533 534 ba,pt %ncc, 2f 535 wr %o3, 0, %fprs ! restore fprs 536 5374: 538 FZERO ! zero all of the fpregs 539 wr %o3, 0, %fprs ! restore fprs 540 5412: ldn [THREAD_REG + T_LWP], %o2 542 tst %o2 543 bnz,pt %ncc, 1f 544 nop 545 546 ldsb [THREAD_REG + T_PREEMPT], %l0 547 deccc %l0 548 bnz,pn %ncc, 1f 549 stb %l0, [THREAD_REG + T_PREEMPT] 550 551 ! Check for a kernel preemption request 552 ldn [THREAD_REG + T_CPU], %l0 553 ldub [%l0 + CPU_KPRUNRUN], %l0 554 tst %l0 555 bnz,a,pt %ncc, 1f ! Need to call kpreempt? 556 or %l1, KPREEMPT_FLAG, %l1 ! If so, set the flag 557 558 ! 559 ! Need to cater for the different expectations of kcopy 560 ! and bcopy. kcopy will *always* set a t_lofault handler 561 ! If it fires, we're expected to just return the error code 562 ! and *not* to invoke any existing error handler. As far as 563 ! bcopy is concerned, we only set t_lofault if there was an 564 ! existing lofault handler. In that case we're expected to 565 ! invoke the previously existing handler after restting the 566 ! t_lofault value. 567 ! 5681: 569 andn %l6, COPY_FLAGS, %l6 ! remove flags from lofault address 570 membar #Sync ! sync error barrier 571 stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 572 573 ! call kpreempt if necessary 574 btst KPREEMPT_FLAG, %l1 575 bz,pt %icc, 2f 576 nop 577 call kpreempt 578 rdpr %pil, %o0 ! pass %pil 5792: 580 btst BCOPY_FLAG, %l1 581 bnz,pn %ncc, 3f 582 nop 583 ret 584 restore %g1, 0, %o0 585 5863: 587 ! 588 ! We're here via bcopy. There *must* have been an error handler 589 ! in place otheerwise we would have died a nasty death already. 590 ! 591 jmp %l6 ! goto real handler 592 restore %g0, 0, %o0 ! dispose of copy window 593 594/* 595 * We got here because of a fault in .copyerr. We can't safely restore fp 596 * state, so we panic. 597 */ 598fp_panic_msg: 599 .asciz "Unable to restore fp state after copy operation" 600 601 .align 4 602.copyerr2: 603 set fp_panic_msg, %o0 604 call panic 605 nop 606 SET_SIZE(kcopy) 607 608 609/* 610 * Copy a block of storage - must not overlap (from + len <= to). 611 * Registers: l6 - saved t_lofault 612 * 613 * Copy a page of memory. 614 * Assumes double word alignment and a count >= 256. 615 */ 616 617 ENTRY(bcopy) 618 619 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 620 ldn [THREAD_REG + T_LOFAULT], %l6 ! save t_lofault 621 tst %l6 622 ! 623 ! We've already captured whether t_lofault was zero on entry. 624 ! We need to mark ourselves as being from bcopy since both 625 ! kcopy and bcopy use the same code path. If BCOPY_FLAG is set 626 ! and the saved lofault was zero, we won't reset lofault on 627 ! returning. 628 ! 629 or %l6, BCOPY_FLAG, %l6 630 bz,pt %ncc, .do_copy 631 sethi %hi(.copyerr), %o2 632 or %o2, %lo(.copyerr), %o2 633 membar #Sync ! sync error barrier 634 stn %o2, [THREAD_REG + T_LOFAULT] ! install new vector 635 636.do_copy: 637 cmp %i2, 12 ! for small counts 638 blu %ncc, .bytecp ! just copy bytes 639 .empty 640 641 cmp %i2, VIS_COPY_THRESHOLD ! for large counts 642 blu,pt %ncc, .bcb_punt 643 .empty 644 645 ! 646 ! Check to see if VIS acceleration is enabled 647 ! 648 sethi %hi(use_hw_bcopy), %o2 649 ld [%o2 + %lo(use_hw_bcopy)], %o2 650 tst %o2 651 bz,pn %icc, .bcb_punt 652 nop 653 654 subcc %i1, %i0, %i3 655 bneg,a,pn %ncc, 1f 656 neg %i3 6571: 658 /* 659 * Compare against 256 since we should be checking block addresses 660 * and (dest & ~63) - (src & ~63) can be 3 blocks even if 661 * src = dest + (64 * 3) + 63. 662 */ 663 cmp %i3, 256 664 blu,pn %ncc, .bcb_punt 665 nop 666 667 ldn [THREAD_REG + T_LWP], %o3 668 tst %o3 669 bnz,pt %ncc, 1f 670 nop 671 672 ! kpreempt_disable(); 673 ldsb [THREAD_REG + T_PREEMPT], %o2 674 inc %o2 675 stb %o2, [THREAD_REG + T_PREEMPT] 676 6771: 678 rd %fprs, %o2 ! check for unused fp 679 st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs 680 btst FPRS_FEF, %o2 681 bz,a %icc, .do_blockcopy 682 wr %g0, FPRS_FEF, %fprs 683 684.bcb_fpregs_inuse: 685 cmp %i2, VIS_COPY_THRESHOLD+(64*4) ! for large counts (larger 686 bgeu %ncc, 1f ! if we have to save the fpregs) 687 nop 688 689 tst %o3 690 bnz,pt %ncc, .bcb_punt 691 nop 692 693 ldsb [THREAD_REG + T_PREEMPT], %l0 694 deccc %l0 695 bnz,pn %icc, .bcb_punt 696 stb %l0, [THREAD_REG + T_PREEMPT] 697 698 ! Check for a kernel preemption request 699 ldn [THREAD_REG + T_CPU], %l0 700 ldub [%l0 + CPU_KPRUNRUN], %l0 701 tst %l0 702 bz,pt %icc, .bcb_punt 703 nop 704 705 ! Attempt to preempt 706 call kpreempt 707 rdpr %pil, %o0 ! pass %pil 708 709 ba,pt %ncc, .bcb_punt 710 nop 711 7121: 713 wr %g0, FPRS_FEF, %fprs 714 715 ! save in-use fpregs on stack 716 membar #Sync 717 add %fp, STACK_BIAS - 257, %o2 718 and %o2, -64, %o2 719 stda %d0, [%o2]ASI_BLK_P 720 add %o2, 64, %o2 721 stda %d16, [%o2]ASI_BLK_P 722 add %o2, 64, %o2 723 stda %d32, [%o2]ASI_BLK_P 724 add %o2, 64, %o2 725 stda %d48, [%o2]ASI_BLK_P 726 membar #Sync 727 728.do_blockcopy: 729 membar #StoreStore|#StoreLoad|#LoadStore 730 731 rd %gsr, %o2 732 st %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr 733 734 ! Set the lower bit in the saved t_lofault to indicate 735 ! that we need to clear the %fprs register on the way 736 ! out 737 or %l6, FPUSED_FLAG, %l6 738 739 ! Swap src/dst since the code below is memcpy code 740 ! and memcpy/bcopy have different calling sequences 741 mov %i1, %i5 742 mov %i0, %i1 743 mov %i5, %i0 744 745!!! This code is nearly identical to the version in the sun4u 746!!! libc_psr. Most bugfixes made to that file should be 747!!! merged into this routine. 748 749 andcc %i0, 7, %o3 750 bz,pt %ncc, blkcpy 751 sub %o3, 8, %o3 752 neg %o3 753 sub %i2, %o3, %i2 754 755 ! Align Destination on double-word boundary 756 7572: ldub [%i1], %o4 758 inc %i1 759 inc %i0 760 deccc %o3 761 bgu %ncc, 2b 762 stb %o4, [%i0 - 1] 763blkcpy: 764 andcc %i0, 63, %i3 765 bz,pn %ncc, blalign ! now block aligned 766 sub %i3, 64, %i3 767 neg %i3 ! bytes till block aligned 768 sub %i2, %i3, %i2 ! update %i2 with new count 769 770 ! Copy %i3 bytes till dst is block (64 byte) aligned. use 771 ! double word copies. 772 773 alignaddr %i1, %g0, %g1 774 ldd [%g1], %d0 775 add %g1, 8, %g1 7766: 777 ldd [%g1], %d2 778 add %g1, 8, %g1 779 subcc %i3, 8, %i3 780 faligndata %d0, %d2, %d8 781 std %d8, [%i0] 782 add %i1, 8, %i1 783 bz,pn %ncc, blalign 784 add %i0, 8, %i0 785 ldd [%g1], %d0 786 add %g1, 8, %g1 787 subcc %i3, 8, %i3 788 faligndata %d2, %d0, %d8 789 std %d8, [%i0] 790 add %i1, 8, %i1 791 bgu,pn %ncc, 6b 792 add %i0, 8, %i0 793 794blalign: 795 membar #StoreLoad 796 ! %i2 = total length 797 ! %i3 = blocks (length - 64) / 64 798 ! %i4 = doubles remaining (length - blocks) 799 sub %i2, 64, %i3 800 andn %i3, 63, %i3 801 sub %i2, %i3, %i4 802 andn %i4, 7, %i4 803 sub %i4, 16, %i4 804 sub %i2, %i4, %i2 805 sub %i2, %i3, %i2 806 807 andn %i1, 0x3f, %l7 ! blk aligned address 808 alignaddr %i1, %g0, %g0 ! gen %gsr 809 810 srl %i1, 3, %l5 ! bits 3,4,5 are now least sig in %l5 811 andcc %l5, 7, %i5 ! mask everything except bits 1,2 3 812 add %i1, %i4, %i1 813 add %i1, %i3, %i1 814 815 ldda [%l7]ASI_BLK_P, %d0 816 add %l7, 64, %l7 817 ldda [%l7]ASI_BLK_P, %d16 818 add %l7, 64, %l7 819 ldda [%l7]ASI_BLK_P, %d32 820 add %l7, 64, %l7 821 sub %i3, 128, %i3 822 823 ! switch statement to get us to the right 8 byte blk within a 824 ! 64 byte block 825 cmp %i5, 4 826 bgeu,a hlf 827 cmp %i5, 6 828 cmp %i5, 2 829 bgeu,a sqtr 830 nop 831 cmp %i5, 1 832 be,a seg1 833 nop 834 ba,pt %ncc, seg0 835 nop 836sqtr: 837 be,a seg2 838 nop 839 ba,pt %ncc, seg3 840 nop 841 842hlf: 843 bgeu,a fqtr 844 nop 845 cmp %i5, 5 846 be,a seg5 847 nop 848 ba,pt %ncc, seg4 849 nop 850fqtr: 851 be,a seg6 852 nop 853 ba,pt %ncc, seg7 854 nop 855 856 857seg0: 858 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 859 FALIGN_D0 860 ldda [%l7]ASI_BLK_P, %d0 861 stda %d48, [%i0]ASI_BLK_P 862 add %l7, 64, %l7 863 subcc %i3, 64, %i3 864 bz,pn %ncc, 0f 865 add %i0, 64, %i0 866 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 867 FALIGN_D16 868 ldda [%l7]ASI_BLK_P, %d16 869 stda %d48, [%i0]ASI_BLK_P 870 add %l7, 64, %l7 871 subcc %i3, 64, %i3 872 bz,pn %ncc, 1f 873 add %i0, 64, %i0 874 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 875 FALIGN_D32 876 ldda [%l7]ASI_BLK_P, %d32 877 stda %d48, [%i0]ASI_BLK_P 878 add %l7, 64, %l7 879 subcc %i3, 64, %i3 880 bz,pn %ncc, 2f 881 add %i0, 64, %i0 882 ba,a,pt %ncc, seg0 883 8840: 885 FALIGN_D16 886 stda %d48, [%i0]ASI_BLK_P 887 add %i0, 64, %i0 888 membar #Sync 889 FALIGN_D32 890 stda %d48, [%i0]ASI_BLK_P 891 ba,pt %ncc, blkd0 892 add %i0, 64, %i0 893 8941: 895 FALIGN_D32 896 stda %d48, [%i0]ASI_BLK_P 897 add %i0, 64, %i0 898 membar #Sync 899 FALIGN_D0 900 stda %d48, [%i0]ASI_BLK_P 901 ba,pt %ncc, blkd16 902 add %i0, 64, %i0 903 9042: 905 FALIGN_D0 906 stda %d48, [%i0]ASI_BLK_P 907 add %i0, 64, %i0 908 membar #Sync 909 FALIGN_D16 910 stda %d48, [%i0]ASI_BLK_P 911 ba,pt %ncc, blkd32 912 add %i0, 64, %i0 913 914seg1: 915 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 916 FALIGN_D2 917 ldda [%l7]ASI_BLK_P, %d0 918 stda %d48, [%i0]ASI_BLK_P 919 add %l7, 64, %l7 920 subcc %i3, 64, %i3 921 bz,pn %ncc, 0f 922 add %i0, 64, %i0 923 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 924 FALIGN_D18 925 ldda [%l7]ASI_BLK_P, %d16 926 stda %d48, [%i0]ASI_BLK_P 927 add %l7, 64, %l7 928 subcc %i3, 64, %i3 929 bz,pn %ncc, 1f 930 add %i0, 64, %i0 931 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 932 FALIGN_D34 933 ldda [%l7]ASI_BLK_P, %d32 934 stda %d48, [%i0]ASI_BLK_P 935 add %l7, 64, %l7 936 subcc %i3, 64, %i3 937 bz,pn %ncc, 2f 938 add %i0, 64, %i0 939 ba,a,pt %ncc, seg1 9400: 941 FALIGN_D18 942 stda %d48, [%i0]ASI_BLK_P 943 add %i0, 64, %i0 944 membar #Sync 945 FALIGN_D34 946 stda %d48, [%i0]ASI_BLK_P 947 ba,pt %ncc, blkd2 948 add %i0, 64, %i0 949 9501: 951 FALIGN_D34 952 stda %d48, [%i0]ASI_BLK_P 953 add %i0, 64, %i0 954 membar #Sync 955 FALIGN_D2 956 stda %d48, [%i0]ASI_BLK_P 957 ba,pt %ncc, blkd18 958 add %i0, 64, %i0 959 9602: 961 FALIGN_D2 962 stda %d48, [%i0]ASI_BLK_P 963 add %i0, 64, %i0 964 membar #Sync 965 FALIGN_D18 966 stda %d48, [%i0]ASI_BLK_P 967 ba,pt %ncc, blkd34 968 add %i0, 64, %i0 969 970seg2: 971 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 972 FALIGN_D4 973 ldda [%l7]ASI_BLK_P, %d0 974 stda %d48, [%i0]ASI_BLK_P 975 add %l7, 64, %l7 976 subcc %i3, 64, %i3 977 bz,pn %ncc, 0f 978 add %i0, 64, %i0 979 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 980 FALIGN_D20 981 ldda [%l7]ASI_BLK_P, %d16 982 stda %d48, [%i0]ASI_BLK_P 983 add %l7, 64, %l7 984 subcc %i3, 64, %i3 985 bz,pn %ncc, 1f 986 add %i0, 64, %i0 987 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 988 FALIGN_D36 989 ldda [%l7]ASI_BLK_P, %d32 990 stda %d48, [%i0]ASI_BLK_P 991 add %l7, 64, %l7 992 subcc %i3, 64, %i3 993 bz,pn %ncc, 2f 994 add %i0, 64, %i0 995 ba,a,pt %ncc, seg2 996 9970: 998 FALIGN_D20 999 stda %d48, [%i0]ASI_BLK_P 1000 add %i0, 64, %i0 1001 membar #Sync 1002 FALIGN_D36 1003 stda %d48, [%i0]ASI_BLK_P 1004 ba,pt %ncc, blkd4 1005 add %i0, 64, %i0 1006 10071: 1008 FALIGN_D36 1009 stda %d48, [%i0]ASI_BLK_P 1010 add %i0, 64, %i0 1011 membar #Sync 1012 FALIGN_D4 1013 stda %d48, [%i0]ASI_BLK_P 1014 ba,pt %ncc, blkd20 1015 add %i0, 64, %i0 1016 10172: 1018 FALIGN_D4 1019 stda %d48, [%i0]ASI_BLK_P 1020 add %i0, 64, %i0 1021 membar #Sync 1022 FALIGN_D20 1023 stda %d48, [%i0]ASI_BLK_P 1024 ba,pt %ncc, blkd36 1025 add %i0, 64, %i0 1026 1027seg3: 1028 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 1029 FALIGN_D6 1030 ldda [%l7]ASI_BLK_P, %d0 1031 stda %d48, [%i0]ASI_BLK_P 1032 add %l7, 64, %l7 1033 subcc %i3, 64, %i3 1034 bz,pn %ncc, 0f 1035 add %i0, 64, %i0 1036 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 1037 FALIGN_D22 1038 ldda [%l7]ASI_BLK_P, %d16 1039 stda %d48, [%i0]ASI_BLK_P 1040 add %l7, 64, %l7 1041 subcc %i3, 64, %i3 1042 bz,pn %ncc, 1f 1043 add %i0, 64, %i0 1044 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 1045 FALIGN_D38 1046 ldda [%l7]ASI_BLK_P, %d32 1047 stda %d48, [%i0]ASI_BLK_P 1048 add %l7, 64, %l7 1049 subcc %i3, 64, %i3 1050 bz,pn %ncc, 2f 1051 add %i0, 64, %i0 1052 ba,a,pt %ncc, seg3 1053 10540: 1055 FALIGN_D22 1056 stda %d48, [%i0]ASI_BLK_P 1057 add %i0, 64, %i0 1058 membar #Sync 1059 FALIGN_D38 1060 stda %d48, [%i0]ASI_BLK_P 1061 ba,pt %ncc, blkd6 1062 add %i0, 64, %i0 1063 10641: 1065 FALIGN_D38 1066 stda %d48, [%i0]ASI_BLK_P 1067 add %i0, 64, %i0 1068 membar #Sync 1069 FALIGN_D6 1070 stda %d48, [%i0]ASI_BLK_P 1071 ba,pt %ncc, blkd22 1072 add %i0, 64, %i0 1073 10742: 1075 FALIGN_D6 1076 stda %d48, [%i0]ASI_BLK_P 1077 add %i0, 64, %i0 1078 membar #Sync 1079 FALIGN_D22 1080 stda %d48, [%i0]ASI_BLK_P 1081 ba,pt %ncc, blkd38 1082 add %i0, 64, %i0 1083 1084seg4: 1085 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 1086 FALIGN_D8 1087 ldda [%l7]ASI_BLK_P, %d0 1088 stda %d48, [%i0]ASI_BLK_P 1089 add %l7, 64, %l7 1090 subcc %i3, 64, %i3 1091 bz,pn %ncc, 0f 1092 add %i0, 64, %i0 1093 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 1094 FALIGN_D24 1095 ldda [%l7]ASI_BLK_P, %d16 1096 stda %d48, [%i0]ASI_BLK_P 1097 add %l7, 64, %l7 1098 subcc %i3, 64, %i3 1099 bz,pn %ncc, 1f 1100 add %i0, 64, %i0 1101 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 1102 FALIGN_D40 1103 ldda [%l7]ASI_BLK_P, %d32 1104 stda %d48, [%i0]ASI_BLK_P 1105 add %l7, 64, %l7 1106 subcc %i3, 64, %i3 1107 bz,pn %ncc, 2f 1108 add %i0, 64, %i0 1109 ba,a,pt %ncc, seg4 1110 11110: 1112 FALIGN_D24 1113 stda %d48, [%i0]ASI_BLK_P 1114 add %i0, 64, %i0 1115 membar #Sync 1116 FALIGN_D40 1117 stda %d48, [%i0]ASI_BLK_P 1118 ba,pt %ncc, blkd8 1119 add %i0, 64, %i0 1120 11211: 1122 FALIGN_D40 1123 stda %d48, [%i0]ASI_BLK_P 1124 add %i0, 64, %i0 1125 membar #Sync 1126 FALIGN_D8 1127 stda %d48, [%i0]ASI_BLK_P 1128 ba,pt %ncc, blkd24 1129 add %i0, 64, %i0 1130 11312: 1132 FALIGN_D8 1133 stda %d48, [%i0]ASI_BLK_P 1134 add %i0, 64, %i0 1135 membar #Sync 1136 FALIGN_D24 1137 stda %d48, [%i0]ASI_BLK_P 1138 ba,pt %ncc, blkd40 1139 add %i0, 64, %i0 1140 1141seg5: 1142 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 1143 FALIGN_D10 1144 ldda [%l7]ASI_BLK_P, %d0 1145 stda %d48, [%i0]ASI_BLK_P 1146 add %l7, 64, %l7 1147 subcc %i3, 64, %i3 1148 bz,pn %ncc, 0f 1149 add %i0, 64, %i0 1150 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 1151 FALIGN_D26 1152 ldda [%l7]ASI_BLK_P, %d16 1153 stda %d48, [%i0]ASI_BLK_P 1154 add %l7, 64, %l7 1155 subcc %i3, 64, %i3 1156 bz,pn %ncc, 1f 1157 add %i0, 64, %i0 1158 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 1159 FALIGN_D42 1160 ldda [%l7]ASI_BLK_P, %d32 1161 stda %d48, [%i0]ASI_BLK_P 1162 add %l7, 64, %l7 1163 subcc %i3, 64, %i3 1164 bz,pn %ncc, 2f 1165 add %i0, 64, %i0 1166 ba,a,pt %ncc, seg5 1167 11680: 1169 FALIGN_D26 1170 stda %d48, [%i0]ASI_BLK_P 1171 add %i0, 64, %i0 1172 membar #Sync 1173 FALIGN_D42 1174 stda %d48, [%i0]ASI_BLK_P 1175 ba,pt %ncc, blkd10 1176 add %i0, 64, %i0 1177 11781: 1179 FALIGN_D42 1180 stda %d48, [%i0]ASI_BLK_P 1181 add %i0, 64, %i0 1182 membar #Sync 1183 FALIGN_D10 1184 stda %d48, [%i0]ASI_BLK_P 1185 ba,pt %ncc, blkd26 1186 add %i0, 64, %i0 1187 11882: 1189 FALIGN_D10 1190 stda %d48, [%i0]ASI_BLK_P 1191 add %i0, 64, %i0 1192 membar #Sync 1193 FALIGN_D26 1194 stda %d48, [%i0]ASI_BLK_P 1195 ba,pt %ncc, blkd42 1196 add %i0, 64, %i0 1197 1198seg6: 1199 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 1200 FALIGN_D12 1201 ldda [%l7]ASI_BLK_P, %d0 1202 stda %d48, [%i0]ASI_BLK_P 1203 add %l7, 64, %l7 1204 subcc %i3, 64, %i3 1205 bz,pn %ncc, 0f 1206 add %i0, 64, %i0 1207 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 1208 FALIGN_D28 1209 ldda [%l7]ASI_BLK_P, %d16 1210 stda %d48, [%i0]ASI_BLK_P 1211 add %l7, 64, %l7 1212 subcc %i3, 64, %i3 1213 bz,pn %ncc, 1f 1214 add %i0, 64, %i0 1215 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 1216 FALIGN_D44 1217 ldda [%l7]ASI_BLK_P, %d32 1218 stda %d48, [%i0]ASI_BLK_P 1219 add %l7, 64, %l7 1220 subcc %i3, 64, %i3 1221 bz,pn %ncc, 2f 1222 add %i0, 64, %i0 1223 ba,a,pt %ncc, seg6 1224 12250: 1226 FALIGN_D28 1227 stda %d48, [%i0]ASI_BLK_P 1228 add %i0, 64, %i0 1229 membar #Sync 1230 FALIGN_D44 1231 stda %d48, [%i0]ASI_BLK_P 1232 ba,pt %ncc, blkd12 1233 add %i0, 64, %i0 1234 12351: 1236 FALIGN_D44 1237 stda %d48, [%i0]ASI_BLK_P 1238 add %i0, 64, %i0 1239 membar #Sync 1240 FALIGN_D12 1241 stda %d48, [%i0]ASI_BLK_P 1242 ba,pt %ncc, blkd28 1243 add %i0, 64, %i0 1244 12452: 1246 FALIGN_D12 1247 stda %d48, [%i0]ASI_BLK_P 1248 add %i0, 64, %i0 1249 membar #Sync 1250 FALIGN_D28 1251 stda %d48, [%i0]ASI_BLK_P 1252 ba,pt %ncc, blkd44 1253 add %i0, 64, %i0 1254 1255seg7: 1256 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 1257 FALIGN_D14 1258 ldda [%l7]ASI_BLK_P, %d0 1259 stda %d48, [%i0]ASI_BLK_P 1260 add %l7, 64, %l7 1261 subcc %i3, 64, %i3 1262 bz,pn %ncc, 0f 1263 add %i0, 64, %i0 1264 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 1265 FALIGN_D30 1266 ldda [%l7]ASI_BLK_P, %d16 1267 stda %d48, [%i0]ASI_BLK_P 1268 add %l7, 64, %l7 1269 subcc %i3, 64, %i3 1270 bz,pn %ncc, 1f 1271 add %i0, 64, %i0 1272 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 1273 FALIGN_D46 1274 ldda [%l7]ASI_BLK_P, %d32 1275 stda %d48, [%i0]ASI_BLK_P 1276 add %l7, 64, %l7 1277 subcc %i3, 64, %i3 1278 bz,pn %ncc, 2f 1279 add %i0, 64, %i0 1280 ba,a,pt %ncc, seg7 1281 12820: 1283 FALIGN_D30 1284 stda %d48, [%i0]ASI_BLK_P 1285 add %i0, 64, %i0 1286 membar #Sync 1287 FALIGN_D46 1288 stda %d48, [%i0]ASI_BLK_P 1289 ba,pt %ncc, blkd14 1290 add %i0, 64, %i0 1291 12921: 1293 FALIGN_D46 1294 stda %d48, [%i0]ASI_BLK_P 1295 add %i0, 64, %i0 1296 membar #Sync 1297 FALIGN_D14 1298 stda %d48, [%i0]ASI_BLK_P 1299 ba,pt %ncc, blkd30 1300 add %i0, 64, %i0 1301 13022: 1303 FALIGN_D14 1304 stda %d48, [%i0]ASI_BLK_P 1305 add %i0, 64, %i0 1306 membar #Sync 1307 FALIGN_D30 1308 stda %d48, [%i0]ASI_BLK_P 1309 ba,pt %ncc, blkd46 1310 add %i0, 64, %i0 1311 1312 1313 ! 1314 ! dribble out the last partial block 1315 ! 1316blkd0: 1317 subcc %i4, 8, %i4 1318 blu,pn %ncc, blkdone 1319 faligndata %d0, %d2, %d48 1320 std %d48, [%i0] 1321 add %i0, 8, %i0 1322blkd2: 1323 subcc %i4, 8, %i4 1324 blu,pn %ncc, blkdone 1325 faligndata %d2, %d4, %d48 1326 std %d48, [%i0] 1327 add %i0, 8, %i0 1328blkd4: 1329 subcc %i4, 8, %i4 1330 blu,pn %ncc, blkdone 1331 faligndata %d4, %d6, %d48 1332 std %d48, [%i0] 1333 add %i0, 8, %i0 1334blkd6: 1335 subcc %i4, 8, %i4 1336 blu,pn %ncc, blkdone 1337 faligndata %d6, %d8, %d48 1338 std %d48, [%i0] 1339 add %i0, 8, %i0 1340blkd8: 1341 subcc %i4, 8, %i4 1342 blu,pn %ncc, blkdone 1343 faligndata %d8, %d10, %d48 1344 std %d48, [%i0] 1345 add %i0, 8, %i0 1346blkd10: 1347 subcc %i4, 8, %i4 1348 blu,pn %ncc, blkdone 1349 faligndata %d10, %d12, %d48 1350 std %d48, [%i0] 1351 add %i0, 8, %i0 1352blkd12: 1353 subcc %i4, 8, %i4 1354 blu,pn %ncc, blkdone 1355 faligndata %d12, %d14, %d48 1356 std %d48, [%i0] 1357 add %i0, 8, %i0 1358blkd14: 1359 subcc %i4, 8, %i4 1360 blu,pn %ncc, blkdone 1361 fsrc1 %d14, %d0 1362 ba,a,pt %ncc, blkleft 1363 1364blkd16: 1365 subcc %i4, 8, %i4 1366 blu,pn %ncc, blkdone 1367 faligndata %d16, %d18, %d48 1368 std %d48, [%i0] 1369 add %i0, 8, %i0 1370blkd18: 1371 subcc %i4, 8, %i4 1372 blu,pn %ncc, blkdone 1373 faligndata %d18, %d20, %d48 1374 std %d48, [%i0] 1375 add %i0, 8, %i0 1376blkd20: 1377 subcc %i4, 8, %i4 1378 blu,pn %ncc, blkdone 1379 faligndata %d20, %d22, %d48 1380 std %d48, [%i0] 1381 add %i0, 8, %i0 1382blkd22: 1383 subcc %i4, 8, %i4 1384 blu,pn %ncc, blkdone 1385 faligndata %d22, %d24, %d48 1386 std %d48, [%i0] 1387 add %i0, 8, %i0 1388blkd24: 1389 subcc %i4, 8, %i4 1390 blu,pn %ncc, blkdone 1391 faligndata %d24, %d26, %d48 1392 std %d48, [%i0] 1393 add %i0, 8, %i0 1394blkd26: 1395 subcc %i4, 8, %i4 1396 blu,pn %ncc, blkdone 1397 faligndata %d26, %d28, %d48 1398 std %d48, [%i0] 1399 add %i0, 8, %i0 1400blkd28: 1401 subcc %i4, 8, %i4 1402 blu,pn %ncc, blkdone 1403 faligndata %d28, %d30, %d48 1404 std %d48, [%i0] 1405 add %i0, 8, %i0 1406blkd30: 1407 subcc %i4, 8, %i4 1408 blu,pn %ncc, blkdone 1409 fsrc1 %d30, %d0 1410 ba,a,pt %ncc, blkleft 1411blkd32: 1412 subcc %i4, 8, %i4 1413 blu,pn %ncc, blkdone 1414 faligndata %d32, %d34, %d48 1415 std %d48, [%i0] 1416 add %i0, 8, %i0 1417blkd34: 1418 subcc %i4, 8, %i4 1419 blu,pn %ncc, blkdone 1420 faligndata %d34, %d36, %d48 1421 std %d48, [%i0] 1422 add %i0, 8, %i0 1423blkd36: 1424 subcc %i4, 8, %i4 1425 blu,pn %ncc, blkdone 1426 faligndata %d36, %d38, %d48 1427 std %d48, [%i0] 1428 add %i0, 8, %i0 1429blkd38: 1430 subcc %i4, 8, %i4 1431 blu,pn %ncc, blkdone 1432 faligndata %d38, %d40, %d48 1433 std %d48, [%i0] 1434 add %i0, 8, %i0 1435blkd40: 1436 subcc %i4, 8, %i4 1437 blu,pn %ncc, blkdone 1438 faligndata %d40, %d42, %d48 1439 std %d48, [%i0] 1440 add %i0, 8, %i0 1441blkd42: 1442 subcc %i4, 8, %i4 1443 blu,pn %ncc, blkdone 1444 faligndata %d42, %d44, %d48 1445 std %d48, [%i0] 1446 add %i0, 8, %i0 1447blkd44: 1448 subcc %i4, 8, %i4 1449 blu,pn %ncc, blkdone 1450 faligndata %d44, %d46, %d48 1451 std %d48, [%i0] 1452 add %i0, 8, %i0 1453blkd46: 1454 subcc %i4, 8, %i4 1455 blu,pn %ncc, blkdone 1456 fsrc1 %d46, %d0 1457 1458blkleft: 14591: 1460 ldd [%l7], %d2 1461 add %l7, 8, %l7 1462 subcc %i4, 8, %i4 1463 faligndata %d0, %d2, %d8 1464 std %d8, [%i0] 1465 blu,pn %ncc, blkdone 1466 add %i0, 8, %i0 1467 ldd [%l7], %d0 1468 add %l7, 8, %l7 1469 subcc %i4, 8, %i4 1470 faligndata %d2, %d0, %d8 1471 std %d8, [%i0] 1472 bgeu,pt %ncc, 1b 1473 add %i0, 8, %i0 1474 1475blkdone: 1476 tst %i2 1477 bz,pt %ncc, .bcb_exit 1478 and %l3, 0x4, %l3 ! fprs.du = fprs.dl = 0 1479 14807: ldub [%i1], %i4 1481 inc %i1 1482 inc %i0 1483 deccc %i2 1484 bgu,pt %ncc, 7b 1485 stb %i4, [%i0 - 1] 1486 1487.bcb_exit: 1488 membar #StoreLoad|#StoreStore 1489 btst FPUSED_FLAG, %l6 1490 bz %icc, 1f 1491 and %l6, COPY_FLAGS, %l1 ! Store flags in %l1 1492 ! We can't clear the flags from %l6 yet. 1493 ! If there's an error, .copyerr will 1494 ! need them 1495 1496 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr 1497 wr %o2, 0, %gsr 1498 1499 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 1500 btst FPRS_FEF, %o3 1501 bz %icc, 4f 1502 nop 1503 1504 ! restore fpregs from stack 1505 membar #Sync 1506 add %fp, STACK_BIAS - 257, %o2 1507 and %o2, -64, %o2 1508 ldda [%o2]ASI_BLK_P, %d0 1509 add %o2, 64, %o2 1510 ldda [%o2]ASI_BLK_P, %d16 1511 add %o2, 64, %o2 1512 ldda [%o2]ASI_BLK_P, %d32 1513 add %o2, 64, %o2 1514 ldda [%o2]ASI_BLK_P, %d48 1515 membar #Sync 1516 1517 ba,pt %ncc, 2f 1518 wr %o3, 0, %fprs ! restore fprs 1519 15204: 1521 FZERO ! zero all of the fpregs 1522 wr %o3, 0, %fprs ! restore fprs 1523 15242: ldn [THREAD_REG + T_LWP], %o2 1525 tst %o2 1526 bnz,pt %ncc, 1f 1527 nop 1528 1529 ldsb [THREAD_REG + T_PREEMPT], %l0 1530 deccc %l0 1531 bnz,pn %ncc, 1f 1532 stb %l0, [THREAD_REG + T_PREEMPT] 1533 1534 ! Check for a kernel preemption request 1535 ldn [THREAD_REG + T_CPU], %l0 1536 ldub [%l0 + CPU_KPRUNRUN], %l0 1537 tst %l0 1538 bnz,a,pt %ncc, 1f ! Need to call kpreempt? 1539 or %l1, KPREEMPT_FLAG, %l1 ! If so, set the flag 1540 15411: 1542 btst BCOPY_FLAG, %l1 1543 bz,pn %icc, 3f 1544 andncc %l6, COPY_FLAGS, %l6 1545 1546 ! 1547 ! Here via bcopy. Check to see if the handler was NULL. 1548 ! If so, just return quietly. Otherwise, reset the 1549 ! handler and go home. 1550 ! 1551 bnz,pn %ncc, 3f 1552 nop 1553 1554 ! 1555 ! Null handler. Check for kpreempt flag, call if necessary, 1556 ! then return. 1557 ! 1558 btst KPREEMPT_FLAG, %l1 1559 bz,pt %icc, 2f 1560 nop 1561 call kpreempt 1562 rdpr %pil, %o0 ! pass %pil 15632: 1564 ret 1565 restore %g0, 0, %o0 1566 1567 ! 1568 ! Here via kcopy or bcopy with a handler.Reset the 1569 ! fault handler. 1570 ! 15713: 1572 membar #Sync 1573 stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1574 1575 ! call kpreempt if necessary 1576 btst KPREEMPT_FLAG, %l1 1577 bz,pt %icc, 4f 1578 nop 1579 call kpreempt 1580 rdpr %pil, %o0 15814: 1582 ret 1583 restore %g0, 0, %o0 1584 1585.bcb_punt: 1586 ! 1587 ! use aligned transfers where possible 1588 ! 1589 xor %i0, %i1, %o4 ! xor from and to address 1590 btst 7, %o4 ! if lower three bits zero 1591 bz %icc, .aldoubcp ! can align on double boundary 1592 .empty ! assembler complaints about label 1593 1594 xor %i0, %i1, %o4 ! xor from and to address 1595 btst 3, %o4 ! if lower two bits zero 1596 bz %icc, .alwordcp ! can align on word boundary 1597 btst 3, %i0 ! delay slot, from address unaligned? 1598 ! 1599 ! use aligned reads and writes where possible 1600 ! this differs from wordcp in that it copes 1601 ! with odd alignment between source and destnation 1602 ! using word reads and writes with the proper shifts 1603 ! in between to align transfers to and from memory 1604 ! i0 - src address, i1 - dest address, i2 - count 1605 ! i3, i4 - tmps for used generating complete word 1606 ! i5 (word to write) 1607 ! l0 size in bits of upper part of source word (US) 1608 ! l1 size in bits of lower part of source word (LS = 32 - US) 1609 ! l2 size in bits of upper part of destination word (UD) 1610 ! l3 size in bits of lower part of destination word (LD = 32 - UD) 1611 ! l4 number of bytes leftover after aligned transfers complete 1612 ! l5 the number 32 1613 ! 1614 mov 32, %l5 ! load an oft-needed constant 1615 bz .align_dst_only 1616 btst 3, %i1 ! is destnation address aligned? 1617 clr %i4 ! clear registers used in either case 1618 bz %icc, .align_src_only 1619 clr %l0 1620 ! 1621 ! both source and destination addresses are unaligned 1622 ! 16231: ! align source 1624 ldub [%i0], %i3 ! read a byte from source address 1625 add %i0, 1, %i0 ! increment source address 1626 or %i4, %i3, %i4 ! or in with previous bytes (if any) 1627 btst 3, %i0 ! is source aligned? 1628 add %l0, 8, %l0 ! increment size of upper source (US) 1629 bnz,a 1b 1630 sll %i4, 8, %i4 ! make room for next byte 1631 1632 sub %l5, %l0, %l1 ! generate shift left count (LS) 1633 sll %i4, %l1, %i4 ! prepare to get rest 1634 ld [%i0], %i3 ! read a word 1635 add %i0, 4, %i0 ! increment source address 1636 srl %i3, %l0, %i5 ! upper src bits into lower dst bits 1637 or %i4, %i5, %i5 ! merge 1638 mov 24, %l3 ! align destination 16391: 1640 srl %i5, %l3, %i4 ! prepare to write a single byte 1641 stb %i4, [%i1] ! write a byte 1642 add %i1, 1, %i1 ! increment destination address 1643 sub %i2, 1, %i2 ! decrement count 1644 btst 3, %i1 ! is destination aligned? 1645 bnz,a 1b 1646 sub %l3, 8, %l3 ! delay slot, decrement shift count (LD) 1647 sub %l5, %l3, %l2 ! generate shift left count (UD) 1648 sll %i5, %l2, %i5 ! move leftover into upper bytes 1649 cmp %l2, %l0 ! cmp # reqd to fill dst w old src left 1650 bgu %ncc, .more_needed ! need more to fill than we have 1651 nop 1652 1653 sll %i3, %l1, %i3 ! clear upper used byte(s) 1654 srl %i3, %l1, %i3 1655 ! get the odd bytes between alignments 1656 sub %l0, %l2, %l0 ! regenerate shift count 1657 sub %l5, %l0, %l1 ! generate new shift left count (LS) 1658 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0 1659 andn %i2, 3, %i2 ! # of aligned bytes that can be moved 1660 srl %i3, %l0, %i4 1661 or %i5, %i4, %i5 1662 st %i5, [%i1] ! write a word 1663 subcc %i2, 4, %i2 ! decrement count 1664 bz %ncc, .unalign_out 1665 add %i1, 4, %i1 ! increment destination address 1666 1667 b 2f 1668 sll %i3, %l1, %i5 ! get leftover into upper bits 1669.more_needed: 1670 sll %i3, %l0, %i3 ! save remaining byte(s) 1671 srl %i3, %l0, %i3 1672 sub %l2, %l0, %l1 ! regenerate shift count 1673 sub %l5, %l1, %l0 ! generate new shift left count 1674 sll %i3, %l1, %i4 ! move to fill empty space 1675 b 3f 1676 or %i5, %i4, %i5 ! merge to complete word 1677 ! 1678 ! the source address is aligned and destination is not 1679 ! 1680.align_dst_only: 1681 ld [%i0], %i4 ! read a word 1682 add %i0, 4, %i0 ! increment source address 1683 mov 24, %l0 ! initial shift alignment count 16841: 1685 srl %i4, %l0, %i3 ! prepare to write a single byte 1686 stb %i3, [%i1] ! write a byte 1687 add %i1, 1, %i1 ! increment destination address 1688 sub %i2, 1, %i2 ! decrement count 1689 btst 3, %i1 ! is destination aligned? 1690 bnz,a 1b 1691 sub %l0, 8, %l0 ! delay slot, decrement shift count 1692.xfer: 1693 sub %l5, %l0, %l1 ! generate shift left count 1694 sll %i4, %l1, %i5 ! get leftover 16953: 1696 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0 1697 andn %i2, 3, %i2 ! # of aligned bytes that can be moved 16982: 1699 ld [%i0], %i3 ! read a source word 1700 add %i0, 4, %i0 ! increment source address 1701 srl %i3, %l0, %i4 ! upper src bits into lower dst bits 1702 or %i5, %i4, %i5 ! merge with upper dest bits (leftover) 1703 st %i5, [%i1] ! write a destination word 1704 subcc %i2, 4, %i2 ! decrement count 1705 bz %ncc, .unalign_out ! check if done 1706 add %i1, 4, %i1 ! increment destination address 1707 b 2b ! loop 1708 sll %i3, %l1, %i5 ! get leftover 1709.unalign_out: 1710 tst %l4 ! any bytes leftover? 1711 bz %ncc, .cpdone 1712 .empty ! allow next instruction in delay slot 17131: 1714 sub %l0, 8, %l0 ! decrement shift 1715 srl %i3, %l0, %i4 ! upper src byte into lower dst byte 1716 stb %i4, [%i1] ! write a byte 1717 subcc %l4, 1, %l4 ! decrement count 1718 bz %ncc, .cpdone ! done? 1719 add %i1, 1, %i1 ! increment destination 1720 tst %l0 ! any more previously read bytes 1721 bnz %ncc, 1b ! we have leftover bytes 1722 mov %l4, %i2 ! delay slot, mv cnt where dbytecp wants 1723 b .dbytecp ! let dbytecp do the rest 1724 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 1725 ! 1726 ! the destination address is aligned and the source is not 1727 ! 1728.align_src_only: 1729 ldub [%i0], %i3 ! read a byte from source address 1730 add %i0, 1, %i0 ! increment source address 1731 or %i4, %i3, %i4 ! or in with previous bytes (if any) 1732 btst 3, %i0 ! is source aligned? 1733 add %l0, 8, %l0 ! increment shift count (US) 1734 bnz,a .align_src_only 1735 sll %i4, 8, %i4 ! make room for next byte 1736 b,a .xfer 1737 ! 1738 ! if from address unaligned for double-word moves, 1739 ! move bytes till it is, if count is < 56 it could take 1740 ! longer to align the thing than to do the transfer 1741 ! in word size chunks right away 1742 ! 1743.aldoubcp: 1744 cmp %i2, 56 ! if count < 56, use wordcp, it takes 1745 blu,a %ncc, .alwordcp ! longer to align doubles than words 1746 mov 3, %o0 ! mask for word alignment 1747 call .alignit ! copy bytes until aligned 1748 mov 7, %o0 ! mask for double alignment 1749 ! 1750 ! source and destination are now double-word aligned 1751 ! i3 has aligned count returned by alignit 1752 ! 1753 and %i2, 7, %i2 ! unaligned leftover count 1754 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 17555: 1756 ldx [%i0+%i1], %o4 ! read from address 1757 stx %o4, [%i1] ! write at destination address 1758 subcc %i3, 8, %i3 ! dec count 1759 bgu %ncc, 5b 1760 add %i1, 8, %i1 ! delay slot, inc to address 1761 cmp %i2, 4 ! see if we can copy a word 1762 blu %ncc, .dbytecp ! if 3 or less bytes use bytecp 1763 .empty 1764 ! 1765 ! for leftover bytes we fall into wordcp, if needed 1766 ! 1767.wordcp: 1768 and %i2, 3, %i2 ! unaligned leftover count 17695: 1770 ld [%i0+%i1], %o4 ! read from address 1771 st %o4, [%i1] ! write at destination address 1772 subcc %i3, 4, %i3 ! dec count 1773 bgu %ncc, 5b 1774 add %i1, 4, %i1 ! delay slot, inc to address 1775 b,a .dbytecp 1776 1777 ! we come here to align copies on word boundaries 1778.alwordcp: 1779 call .alignit ! go word-align it 1780 mov 3, %o0 ! bits that must be zero to be aligned 1781 b .wordcp 1782 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 1783 1784 ! 1785 ! byte copy, works with any alignment 1786 ! 1787.bytecp: 1788 b .dbytecp 1789 sub %i0, %i1, %i0 ! i0 gets difference of src and dst 1790 1791 ! 1792 ! differenced byte copy, works with any alignment 1793 ! assumes dest in %i1 and (source - dest) in %i0 1794 ! 17951: 1796 stb %o4, [%i1] ! write to address 1797 inc %i1 ! inc to address 1798.dbytecp: 1799 deccc %i2 ! dec count 1800 bgeu,a %ncc, 1b ! loop till done 1801 ldub [%i0+%i1], %o4 ! read from address 1802 ! 1803 ! FPUSED_FLAG will not have been set in any path leading to 1804 ! this point. No need to deal with it. 1805 ! 1806.cpdone: 1807 btst BCOPY_FLAG, %l6 1808 bz,pn %icc, 2f 1809 andncc %l6, BCOPY_FLAG, %l6 1810 ! 1811 ! Here via bcopy. Check to see if the handler was NULL. 1812 ! If so, just return quietly. Otherwise, reset the 1813 ! handler and go home. 1814 ! 1815 bnz,pn %ncc, 2f 1816 nop 1817 ! 1818 ! Null handler. 1819 ! 1820 ret 1821 restore %g0, 0, %o0 1822 ! 1823 ! Here via kcopy or bcopy with a handler.Reset the 1824 ! fault handler. 1825 ! 18262: 1827 membar #Sync 1828 stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1829 ret 1830 restore %g0, 0, %o0 ! return (0) 1831 1832/* 1833 * Common code used to align transfers on word and doubleword 1834 * boudaries. Aligns source and destination and returns a count 1835 * of aligned bytes to transfer in %i3 1836 */ 18371: 1838 inc %i0 ! inc from 1839 stb %o4, [%i1] ! write a byte 1840 inc %i1 ! inc to 1841 dec %i2 ! dec count 1842.alignit: 1843 btst %o0, %i0 ! %o0 is bit mask to check for alignment 1844 bnz,a 1b 1845 ldub [%i0], %o4 ! read next byte 1846 1847 retl 1848 andn %i2, %o0, %i3 ! return size of aligned bytes 1849 SET_SIZE(bcopy) 1850 1851/* 1852 * Block copy with possibly overlapped operands. 1853 */ 1854 1855 ENTRY(ovbcopy) 1856 tst %o2 ! check count 1857 bgu,a %ncc, 1f ! nothing to do or bad arguments 1858 subcc %o0, %o1, %o3 ! difference of from and to address 1859 1860 retl ! return 1861 nop 18621: 1863 bneg,a %ncc, 2f 1864 neg %o3 ! if < 0, make it positive 18652: cmp %o2, %o3 ! cmp size and abs(from - to) 1866 bleu %ncc, bcopy ! if size <= abs(diff): use bcopy, 1867 .empty ! no overlap 1868 cmp %o0, %o1 ! compare from and to addresses 1869 blu %ncc, .ov_bkwd ! if from < to, copy backwards 1870 nop 1871 ! 1872 ! Copy forwards. 1873 ! 1874.ov_fwd: 1875 ldub [%o0], %o3 ! read from address 1876 inc %o0 ! inc from address 1877 stb %o3, [%o1] ! write to address 1878 deccc %o2 ! dec count 1879 bgu %ncc, .ov_fwd ! loop till done 1880 inc %o1 ! inc to address 1881 1882 retl ! return 1883 nop 1884 ! 1885 ! Copy backwards. 1886 ! 1887.ov_bkwd: 1888 deccc %o2 ! dec count 1889 ldub [%o0 + %o2], %o3 ! get byte at end of src 1890 bgu %ncc, .ov_bkwd ! loop till done 1891 stb %o3, [%o1 + %o2] ! delay slot, store at end of dst 1892 1893 retl ! return 1894 nop 1895 SET_SIZE(ovbcopy) 1896 1897/* 1898 * hwblkpagecopy() 1899 * 1900 * Copies exactly one page. This routine assumes the caller (ppcopy) 1901 * has already disabled kernel preemption and has checked 1902 * use_hw_bcopy. 1903 */ 1904 ENTRY(hwblkpagecopy) 1905 ! get another window w/space for three aligned blocks of saved fpregs 1906 save %sp, -SA(MINFRAME + 4*64), %sp 1907 1908 ! %i0 - source address (arg) 1909 ! %i1 - destination address (arg) 1910 ! %i2 - length of region (not arg) 1911 ! %l0 - saved fprs 1912 ! %l1 - pointer to saved fpregs 1913 1914 rd %fprs, %l0 ! check for unused fp 1915 btst FPRS_FEF, %l0 1916 bz 1f 1917 membar #Sync 1918 1919 ! save in-use fpregs on stack 1920 add %fp, STACK_BIAS - 193, %l1 1921 and %l1, -64, %l1 1922 stda %d0, [%l1]ASI_BLK_P 1923 add %l1, 64, %l3 1924 stda %d16, [%l3]ASI_BLK_P 1925 add %l3, 64, %l3 1926 stda %d32, [%l3]ASI_BLK_P 1927 membar #Sync 1928 19291: wr %g0, FPRS_FEF, %fprs 1930 ldda [%i0]ASI_BLK_P, %d0 1931 add %i0, 64, %i0 1932 set PAGESIZE - 64, %i2 1933 19342: ldda [%i0]ASI_BLK_P, %d16 1935 fsrc1 %d0, %d32 1936 fsrc1 %d2, %d34 1937 fsrc1 %d4, %d36 1938 fsrc1 %d6, %d38 1939 fsrc1 %d8, %d40 1940 fsrc1 %d10, %d42 1941 fsrc1 %d12, %d44 1942 fsrc1 %d14, %d46 1943 stda %d32, [%i1]ASI_BLK_P 1944 add %i0, 64, %i0 1945 subcc %i2, 64, %i2 1946 bz,pn %ncc, 3f 1947 add %i1, 64, %i1 1948 ldda [%i0]ASI_BLK_P, %d0 1949 fsrc1 %d16, %d32 1950 fsrc1 %d18, %d34 1951 fsrc1 %d20, %d36 1952 fsrc1 %d22, %d38 1953 fsrc1 %d24, %d40 1954 fsrc1 %d26, %d42 1955 fsrc1 %d28, %d44 1956 fsrc1 %d30, %d46 1957 stda %d32, [%i1]ASI_BLK_P 1958 add %i0, 64, %i0 1959 sub %i2, 64, %i2 1960 ba,pt %ncc, 2b 1961 add %i1, 64, %i1 1962 19633: membar #Sync 1964 btst FPRS_FEF, %l0 1965 bz 4f 1966 stda %d16, [%i1]ASI_BLK_P 1967 1968 ! restore fpregs from stack 1969 membar #Sync 1970 ldda [%l1]ASI_BLK_P, %d0 1971 add %l1, 64, %l3 1972 ldda [%l3]ASI_BLK_P, %d16 1973 add %l3, 64, %l3 1974 ldda [%l3]ASI_BLK_P, %d32 1975 19764: wr %l0, 0, %fprs ! restore fprs 1977 membar #Sync 1978 ret 1979 restore %g0, 0, %o0 1980 SET_SIZE(hwblkpagecopy) 1981 1982 1983/* 1984 * Transfer data to and from user space - 1985 * Note that these routines can cause faults 1986 * It is assumed that the kernel has nothing at 1987 * less than KERNELBASE in the virtual address space. 1988 * 1989 * Note that copyin(9F) and copyout(9F) are part of the 1990 * DDI/DKI which specifies that they return '-1' on "errors." 1991 * 1992 * Sigh. 1993 * 1994 * So there's two extremely similar routines - xcopyin() and xcopyout() 1995 * which return the errno that we've faithfully computed. This 1996 * allows other callers (e.g. uiomove(9F)) to work correctly. 1997 * Given that these are used pretty heavily, we expand the calling 1998 * sequences inline for all flavours (rather than making wrappers). 1999 * 2000 * There are also stub routines for xcopyout_little and xcopyin_little, 2001 * which currently are intended to handle requests of <= 16 bytes from 2002 * do_unaligned. Future enhancement to make them handle 8k pages efficiently 2003 * is left as an exercise... 2004 */ 2005 2006/* 2007 * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr) 2008 * 2009 * General theory of operation: 2010 * 2011 * The only difference between default_copy{in,out} and 2012 * default_xcopy{in,out} is in the error handling routine they invoke 2013 * when a memory access error is seen. default_xcopyOP returns the errno 2014 * while default_copyOP returns -1 (see above). copy{in,out}_noerr set 2015 * a special flag (by oring the value 2 into the fault handler address) 2016 * if they are called with a fault handler already in place. That flag 2017 * causes the default handlers to trampoline to the previous handler 2018 * upon an error. 2019 * 2020 * None of the copyops routines grab a window until it's decided that 2021 * we need to do a HW block copy operation. This saves a window 2022 * spill/fill when we're called during socket ops. The typical IO 2023 * path won't cause spill/fill traps. 2024 * 2025 * This code uses a set of 4 limits for the maximum size that will 2026 * be copied given a particular input/output address alignment. 2027 * the default limits are: 2028 * 2029 * single byte aligned - 900 (hw_copy_limit_1) 2030 * two byte aligned - 1800 (hw_copy_limit_2) 2031 * four byte aligned - 3600 (hw_copy_limit_4) 2032 * eight byte aligned - 7200 (hw_copy_limit_8) 2033 * 2034 * If the value for a particular limit is zero, the copy will be done 2035 * via the copy loops rather than VIS. 2036 * 2037 * Flow: 2038 * 2039 * If count == zero return zero. 2040 * 2041 * Store the previous lo_fault handler into %g6. 2042 * Place our secondary lofault handler into %g5. 2043 * Place the address of our nowindow fault handler into %o3. 2044 * Place the address of the windowed fault handler into %o4. 2045 * --> We'll use this handler if we end up grabbing a window 2046 * --> before we use VIS instructions. 2047 * 2048 * If count is less than or equal to SMALL_LIMIT (7) we 2049 * always do a byte for byte copy. 2050 * 2051 * If count is > SMALL_LIMIT, we check the alignment of the input 2052 * and output pointers. Based on the alignment we check count 2053 * against a soft limit of VIS_COPY_THRESHOLD (900 on spitfire). If 2054 * we're larger than VIS_COPY_THRESHOLD, we check against a limit based 2055 * on detected alignment. If we exceed the alignment value we copy 2056 * via VIS instructions. 2057 * 2058 * If we don't exceed one of the limits, we store -count in %o3, 2059 * we store the number of chunks (8, 4, 2 or 1 byte) operated 2060 * on in our basic copy loop in %o2. Following this we branch 2061 * to the appropriate copy loop and copy that many chunks. 2062 * Since we've been adding the chunk size to %o3 each time through 2063 * as well as decrementing %o2, we can tell if any data is 2064 * is left to be copied by examining %o3. If that is zero, we're 2065 * done and can go home. If not, we figure out what the largest 2066 * chunk size left to be copied is and branch to that copy loop 2067 * unless there's only one byte left. We load that as we're 2068 * branching to code that stores it just before we return. 2069 * 2070 * There is one potential situation in which we start to do a VIS 2071 * copy but decide to punt and return to the copy loops. There is 2072 * (in the default configuration) a window of 256 bytes between 2073 * the single byte aligned copy limit and what VIS treats as its 2074 * minimum if floating point is in use in the calling app. We need 2075 * to be prepared to handle this. See the .small_copyOP label for 2076 * details. 2077 * 2078 * Fault handlers are invoked if we reference memory that has no 2079 * current mapping. All forms share the same copyio_fault handler. 2080 * This routine handles fixing up the stack and general housecleaning. 2081 * Each copy operation has a simple fault handler that is then called 2082 * to do the work specific to the invidual operation. The handlers 2083 * for default_copyOP and copyOP_noerr are found at the end of 2084 * default_copyout. The handlers for default_xcopyOP are found at the 2085 * end of xdefault_copyin. 2086 */ 2087 2088/* 2089 * Copy kernel data to user space (copyout/xcopyout/xcopyout_little). 2090 */ 2091 2092/* 2093 * We save the arguments in the following registers in case of a fault: 2094 * kaddr - %g2 2095 * uaddr - %g3 2096 * count - %g4 2097 */ 2098#define SAVE_SRC %g2 2099#define SAVE_DST %g3 2100#define SAVE_COUNT %g4 2101 2102#define REAL_LOFAULT %g5 2103#define SAVED_LOFAULT %g6 2104 2105/* 2106 * Generic copyio fault handler. This is the first line of defense when a 2107 * fault occurs in (x)copyin/(x)copyout. In order for this to function 2108 * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT. 2109 * This allows us to share common code for all the flavors of the copy 2110 * operations, including the _noerr versions. 2111 * 2112 * Note that this function will restore the original input parameters before 2113 * calling REAL_LOFAULT. So the real handler can vector to the appropriate 2114 * member of the t_copyop structure, if needed. 2115 */ 2116 ENTRY(copyio_fault) 2117 btst FPUSED_FLAG, SAVED_LOFAULT 2118 bz 1f 2119 andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 2120 2121 membar #Sync 2122 2123 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 2124 wr %o2, 0, %gsr ! restore gsr 2125 2126 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 2127 btst FPRS_FEF, %o3 2128 bz 4f 2129 nop 2130 2131 ! restore fpregs from stack 2132 membar #Sync 2133 add %fp, STACK_BIAS - 257, %o2 2134 and %o2, -64, %o2 2135 ldda [%o2]ASI_BLK_P, %d0 2136 add %o2, 64, %o2 2137 ldda [%o2]ASI_BLK_P, %d16 2138 add %o2, 64, %o2 2139 ldda [%o2]ASI_BLK_P, %d32 2140 add %o2, 64, %o2 2141 ldda [%o2]ASI_BLK_P, %d48 2142 membar #Sync 2143 2144 ba,pt %ncc, 1f 2145 wr %o3, 0, %fprs ! restore fprs 2146 21474: 2148 FZERO ! zero all of the fpregs 2149 wr %o3, 0, %fprs ! restore fprs 2150 21511: 2152 2153 restore 2154 2155 mov SAVE_SRC, %o0 2156 mov SAVE_DST, %o1 2157 jmp REAL_LOFAULT 2158 mov SAVE_COUNT, %o2 2159 SET_SIZE(copyio_fault) 2160 2161 ENTRY(copyio_fault_nowindow) 2162 membar #Sync 2163 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2164 2165 mov SAVE_SRC, %o0 2166 mov SAVE_DST, %o1 2167 jmp REAL_LOFAULT 2168 mov SAVE_COUNT, %o2 2169 SET_SIZE(copyio_fault_nowindow) 2170 2171 ENTRY(copyout) 2172 sethi %hi(.copyout_err), REAL_LOFAULT 2173 or REAL_LOFAULT, %lo(.copyout_err), REAL_LOFAULT 2174 2175.do_copyout: 2176 ! 2177 ! Check the length and bail if zero. 2178 ! 2179 tst %o2 2180 bnz,pt %ncc, 1f 2181 nop 2182 retl 2183 clr %o0 21841: 2185 sethi %hi(copyio_fault), %o4 2186 or %o4, %lo(copyio_fault), %o4 2187 sethi %hi(copyio_fault_nowindow), %o3 2188 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT 2189 or %o3, %lo(copyio_fault_nowindow), %o3 2190 membar #Sync 2191 stn %o3, [THREAD_REG + T_LOFAULT] 2192 2193 mov %o0, SAVE_SRC 2194 mov %o1, SAVE_DST 2195 mov %o2, SAVE_COUNT 2196 2197 ! 2198 ! Check to see if we're more than SMALL_LIMIT (7 bytes). 2199 ! Run in leaf mode, using the %o regs as our input regs. 2200 ! 2201 subcc %o2, SMALL_LIMIT, %o3 2202 bgu,a,pt %ncc, .dco_ns 2203 or %o0, %o1, %o3 2204 ! 2205 ! What was previously ".small_copyout" 2206 ! Do full differenced copy. 2207 ! 2208.dcobcp: 2209 sub %g0, %o2, %o3 ! negate count 2210 add %o0, %o2, %o0 ! make %o0 point at the end 2211 add %o1, %o2, %o1 ! make %o1 point at the end 2212 ba,pt %ncc, .dcocl 2213 ldub [%o0 + %o3], %o4 ! load first byte 2214 ! 2215 ! %o0 and %o2 point at the end and remain pointing at the end 2216 ! of their buffers. We pull things out by adding %o3 (which is 2217 ! the negation of the length) to the buffer end which gives us 2218 ! the curent location in the buffers. By incrementing %o3 we walk 2219 ! through both buffers without having to bump each buffer's 2220 ! pointer. A very fast 4 instruction loop. 2221 ! 2222 .align 16 2223.dcocl: 2224 stba %o4, [%o1 + %o3]ASI_USER 2225 inccc %o3 2226 bl,a,pt %ncc, .dcocl 2227 ldub [%o0 + %o3], %o4 2228 ! 2229 ! We're done. Go home. 2230 ! 2231 membar #Sync 2232 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 2233 retl 2234 clr %o0 2235 ! 2236 ! Try aligned copies from here. 2237 ! 2238.dco_ns: 2239 ! %o0 = kernel addr (to be copied from) 2240 ! %o1 = user addr (to be copied to) 2241 ! %o2 = length 2242 ! %o3 = %o1 | %o2 (used for alignment checking) 2243 ! %o4 is alternate lo_fault 2244 ! %o5 is original lo_fault 2245 ! 2246 ! See if we're single byte aligned. If we are, check the 2247 ! limit for single byte copies. If we're smaller or equal, 2248 ! bounce to the byte for byte copy loop. Otherwise do it in 2249 ! HW (if enabled). 2250 ! 2251 btst 1, %o3 2252 bz,pt %icc, .dcoh8 2253 btst 7, %o3 2254 ! 2255 ! Single byte aligned. Do we do it via HW or via 2256 ! byte for byte? Do a quick no memory reference 2257 ! check to pick up small copies. 2258 ! 2259 subcc %o2, VIS_COPY_THRESHOLD, %o3 2260 bleu,pt %ncc, .dcobcp 2261 sethi %hi(hw_copy_limit_1), %o3 2262 ! 2263 ! Big enough that we need to check the HW limit for 2264 ! this size copy. 2265 ! 2266 ld [%o3 + %lo(hw_copy_limit_1)], %o3 2267 ! 2268 ! Is HW copy on? If not, do everything byte for byte. 2269 ! 2270 tst %o3 2271 bz,pn %icc, .dcobcp 2272 subcc %o3, %o2, %o3 2273 ! 2274 ! If we're less than or equal to the single byte copy limit, 2275 ! bop to the copy loop. 2276 ! 2277 bge,pt %ncc, .dcobcp 2278 nop 2279 ! 2280 ! We're big enough and copy is on. Do it with HW. 2281 ! 2282 ba,pt %ncc, .big_copyout 2283 nop 2284.dcoh8: 2285 ! 2286 ! 8 byte aligned? 2287 ! 2288 bnz,a %ncc, .dcoh4 2289 btst 3, %o3 2290 ! 2291 ! See if we're in the "small range". 2292 ! If so, go off and do the copy. 2293 ! If not, load the hard limit. %o3 is 2294 ! available for reuse. 2295 ! 2296 subcc %o2, VIS_COPY_THRESHOLD, %o3 2297 bleu,pt %ncc, .dcos8 2298 sethi %hi(hw_copy_limit_8), %o3 2299 ld [%o3 + %lo(hw_copy_limit_8)], %o3 2300 ! 2301 ! If it's zero, there's no HW bcopy. 2302 ! Bop off to the aligned copy. 2303 ! 2304 tst %o3 2305 bz,pn %icc, .dcos8 2306 subcc %o3, %o2, %o3 2307 ! 2308 ! We're negative if our size is larger than hw_copy_limit_8. 2309 ! 2310 bge,pt %ncc, .dcos8 2311 nop 2312 ! 2313 ! HW assist is on and we're large enough. Do it. 2314 ! 2315 ba,pt %ncc, .big_copyout 2316 nop 2317.dcos8: 2318 ! 2319 ! Housekeeping for copy loops. Uses same idea as in the byte for 2320 ! byte copy loop above. 2321 ! 2322 add %o0, %o2, %o0 2323 add %o1, %o2, %o1 2324 sub %g0, %o2, %o3 2325 ba,pt %ncc, .dodebc 2326 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy 2327 ! 2328 ! 4 byte aligned? 2329 ! 2330.dcoh4: 2331 bnz,pn %ncc, .dcoh2 2332 ! 2333 ! See if we're in the "small range". 2334 ! If so, go off an do the copy. 2335 ! If not, load the hard limit. %o3 is 2336 ! available for reuse. 2337 ! 2338 subcc %o2, VIS_COPY_THRESHOLD, %o3 2339 bleu,pt %ncc, .dcos4 2340 sethi %hi(hw_copy_limit_4), %o3 2341 ld [%o3 + %lo(hw_copy_limit_4)], %o3 2342 ! 2343 ! If it's zero, there's no HW bcopy. 2344 ! Bop off to the aligned copy. 2345 ! 2346 tst %o3 2347 bz,pn %icc, .dcos4 2348 subcc %o3, %o2, %o3 2349 ! 2350 ! We're negative if our size is larger than hw_copy_limit_4. 2351 ! 2352 bge,pt %ncc, .dcos4 2353 nop 2354 ! 2355 ! HW assist is on and we're large enough. Do it. 2356 ! 2357 ba,pt %ncc, .big_copyout 2358 nop 2359.dcos4: 2360 add %o0, %o2, %o0 2361 add %o1, %o2, %o1 2362 sub %g0, %o2, %o3 2363 ba,pt %ncc, .dodfbc 2364 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy 2365 ! 2366 ! We must be 2 byte aligned. Off we go. 2367 ! The check for small copies was done in the 2368 ! delay at .dcoh4 2369 ! 2370.dcoh2: 2371 ble %ncc, .dcos2 2372 sethi %hi(hw_copy_limit_2), %o3 2373 ld [%o3 + %lo(hw_copy_limit_2)], %o3 2374 tst %o3 2375 bz,pn %icc, .dcos2 2376 subcc %o3, %o2, %o3 2377 bge,pt %ncc, .dcos2 2378 nop 2379 ! 2380 ! HW is on and we're big enough. Do it. 2381 ! 2382 ba,pt %ncc, .big_copyout 2383 nop 2384.dcos2: 2385 add %o0, %o2, %o0 2386 add %o1, %o2, %o1 2387 sub %g0, %o2, %o3 2388 ba,pt %ncc, .dodtbc 2389 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy 2390.small_copyout: 2391 ! 2392 ! Why are we doing this AGAIN? There are certain conditions in 2393 ! big_copyout that will cause us to forego the HW assisted copies 2394 ! and bounce back to a non-HW assisted copy. This dispatches those 2395 ! copies. Note that we branch around this in the main line code. 2396 ! 2397 ! We make no check for limits or HW enablement here. We've 2398 ! already been told that we're a poster child so just go off 2399 ! and do it. 2400 ! 2401 or %o0, %o1, %o3 2402 btst 1, %o3 2403 bnz %icc, .dcobcp ! Most likely 2404 btst 7, %o3 2405 bz %icc, .dcos8 2406 btst 3, %o3 2407 bz %icc, .dcos4 2408 nop 2409 ba,pt %ncc, .dcos2 2410 nop 2411 .align 32 2412.dodebc: 2413 ldx [%o0 + %o3], %o4 2414 deccc %o2 2415 stxa %o4, [%o1 + %o3]ASI_USER 2416 bg,pt %ncc, .dodebc 2417 addcc %o3, 8, %o3 2418 ! 2419 ! End of copy loop. Check to see if we're done. Most 2420 ! eight byte aligned copies end here. 2421 ! 2422 bz,pt %ncc, .dcofh 2423 nop 2424 ! 2425 ! Something is left - do it byte for byte. 2426 ! 2427 ba,pt %ncc, .dcocl 2428 ldub [%o0 + %o3], %o4 ! load next byte 2429 ! 2430 ! Four byte copy loop. %o2 is the number of 4 byte chunks to copy. 2431 ! 2432 .align 32 2433.dodfbc: 2434 lduw [%o0 + %o3], %o4 2435 deccc %o2 2436 sta %o4, [%o1 + %o3]ASI_USER 2437 bg,pt %ncc, .dodfbc 2438 addcc %o3, 4, %o3 2439 ! 2440 ! End of copy loop. Check to see if we're done. Most 2441 ! four byte aligned copies end here. 2442 ! 2443 bz,pt %ncc, .dcofh 2444 nop 2445 ! 2446 ! Something is left. Do it byte for byte. 2447 ! 2448 ba,pt %ncc, .dcocl 2449 ldub [%o0 + %o3], %o4 ! load next byte 2450 ! 2451 ! two byte aligned copy loop. %o2 is the number of 2 byte chunks to 2452 ! copy. 2453 ! 2454 .align 32 2455.dodtbc: 2456 lduh [%o0 + %o3], %o4 2457 deccc %o2 2458 stha %o4, [%o1 + %o3]ASI_USER 2459 bg,pt %ncc, .dodtbc 2460 addcc %o3, 2, %o3 2461 ! 2462 ! End of copy loop. Anything left? 2463 ! 2464 bz,pt %ncc, .dcofh 2465 nop 2466 ! 2467 ! Deal with the last byte 2468 ! 2469 ldub [%o0 + %o3], %o4 2470 stba %o4, [%o1 + %o3]ASI_USER 2471.dcofh: 2472 membar #Sync 2473 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2474 retl 2475 clr %o0 2476 2477.big_copyout: 2478 ! 2479 ! Are we using the FP registers? 2480 ! 2481 rd %fprs, %o3 ! check for unused fp 2482 btst FPRS_FEF, %o3 2483 bnz %icc, .copyout_fpregs_inuse 2484 nop 2485 ! 2486 ! We're going to go off and do a block copy. 2487 ! Switch fault hendlers and grab a window. We 2488 ! don't do a membar #Sync since we've done only 2489 ! kernel data to this point. 2490 ! 2491 stn %o4, [THREAD_REG + T_LOFAULT] 2492 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 2493 ! 2494 ! %o3 is now %i3. Save original %fprs. 2495 ! 2496 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] 2497 ba,pt %ncc, .do_block_copyout ! Not in use. Go off and do it. 2498 wr %g0, FPRS_FEF, %fprs ! clear %fprs 2499 ! 2500.copyout_fpregs_inuse: 2501 ! 2502 ! We're here if the FP regs are in use. Need to see if the request 2503 ! exceeds our suddenly larger minimum. 2504 ! 2505 cmp %i2, VIS_COPY_THRESHOLD+(64*4) ! for large counts (larger 2506 bl %ncc, .small_copyout 2507 nop 2508 ! 2509 ! We're going to go off and do a block copy. 2510 ! Change to the heavy duty fault handler and grab a window first. 2511 ! 2512 stn %o4, [THREAD_REG + T_LOFAULT] 2513 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 2514 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] 2515 ! 2516 ! save in-use fpregs on stack 2517 ! 2518 wr %g0, FPRS_FEF, %fprs 2519 membar #Sync 2520 add %fp, STACK_BIAS - 257, %o2 2521 and %o2, -64, %o2 2522 stda %d0, [%o2]ASI_BLK_P 2523 add %o2, 64, %o2 2524 stda %d16, [%o2]ASI_BLK_P 2525 add %o2, 64, %o2 2526 stda %d32, [%o2]ASI_BLK_P 2527 add %o2, 64, %o2 2528 stda %d48, [%o2]ASI_BLK_P 2529 membar #Sync 2530 2531.do_block_copyout: 2532 membar #StoreStore|#StoreLoad|#LoadStore 2533 2534 rd %gsr, %o2 2535 st %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr 2536 2537 ! Set the lower bit in the saved t_lofault to indicate 2538 ! that we need to clear the %fprs register on the way 2539 ! out 2540 or SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 2541 2542 ! Swap src/dst since the code below is memcpy code 2543 ! and memcpy/bcopy have different calling sequences 2544 mov %i1, %i5 2545 mov %i0, %i1 2546 mov %i5, %i0 2547 2548!!! This code is nearly identical to the version in the sun4u 2549!!! libc_psr. Most bugfixes made to that file should be 2550!!! merged into this routine. 2551 2552 andcc %i0, 7, %o3 2553 bz %ncc, copyout_blkcpy 2554 sub %o3, 8, %o3 2555 neg %o3 2556 sub %i2, %o3, %i2 2557 2558 ! Align Destination on double-word boundary 2559 25602: ldub [%i1], %o4 2561 inc %i1 2562 stba %o4, [%i0]ASI_USER 2563 deccc %o3 2564 bgu %ncc, 2b 2565 inc %i0 2566copyout_blkcpy: 2567 andcc %i0, 63, %i3 2568 bz,pn %ncc, copyout_blalign ! now block aligned 2569 sub %i3, 64, %i3 2570 neg %i3 ! bytes till block aligned 2571 sub %i2, %i3, %i2 ! update %i2 with new count 2572 2573 ! Copy %i3 bytes till dst is block (64 byte) aligned. use 2574 ! double word copies. 2575 2576 alignaddr %i1, %g0, %g1 2577 ldd [%g1], %d0 2578 add %g1, 8, %g1 25796: 2580 ldd [%g1], %d2 2581 add %g1, 8, %g1 2582 subcc %i3, 8, %i3 2583 faligndata %d0, %d2, %d8 2584 stda %d8, [%i0]ASI_USER 2585 add %i1, 8, %i1 2586 bz,pn %ncc, copyout_blalign 2587 add %i0, 8, %i0 2588 ldd [%g1], %d0 2589 add %g1, 8, %g1 2590 subcc %i3, 8, %i3 2591 faligndata %d2, %d0, %d8 2592 stda %d8, [%i0]ASI_USER 2593 add %i1, 8, %i1 2594 bgu,pn %ncc, 6b 2595 add %i0, 8, %i0 2596 2597copyout_blalign: 2598 membar #StoreLoad 2599 ! %i2 = total length 2600 ! %i3 = blocks (length - 64) / 64 2601 ! %i4 = doubles remaining (length - blocks) 2602 sub %i2, 64, %i3 2603 andn %i3, 63, %i3 2604 sub %i2, %i3, %i4 2605 andn %i4, 7, %i4 2606 sub %i4, 16, %i4 2607 sub %i2, %i4, %i2 2608 sub %i2, %i3, %i2 2609 2610 andn %i1, 0x3f, %l7 ! blk aligned address 2611 alignaddr %i1, %g0, %g0 ! gen %gsr 2612 2613 srl %i1, 3, %l5 ! bits 3,4,5 are now least sig in %l5 2614 andcc %l5, 7, %i5 ! mask everything except bits 1,2 3 2615 add %i1, %i4, %i1 2616 add %i1, %i3, %i1 2617 2618 ldda [%l7]ASI_BLK_P, %d0 2619 add %l7, 64, %l7 2620 ldda [%l7]ASI_BLK_P, %d16 2621 add %l7, 64, %l7 2622 ldda [%l7]ASI_BLK_P, %d32 2623 add %l7, 64, %l7 2624 sub %i3, 128, %i3 2625 2626 ! switch statement to get us to the right 8 byte blk within a 2627 ! 64 byte block 2628 2629 cmp %i5, 4 2630 bgeu,a copyout_hlf 2631 cmp %i5, 6 2632 cmp %i5, 2 2633 bgeu,a copyout_sqtr 2634 nop 2635 cmp %i5, 1 2636 be,a copyout_seg1 2637 nop 2638 ba,pt %ncc, copyout_seg0 2639 nop 2640copyout_sqtr: 2641 be,a copyout_seg2 2642 nop 2643 ba,pt %ncc, copyout_seg3 2644 nop 2645 2646copyout_hlf: 2647 bgeu,a copyout_fqtr 2648 nop 2649 cmp %i5, 5 2650 be,a copyout_seg5 2651 nop 2652 ba,pt %ncc, copyout_seg4 2653 nop 2654copyout_fqtr: 2655 be,a copyout_seg6 2656 nop 2657 ba,pt %ncc, copyout_seg7 2658 nop 2659 2660copyout_seg0: 2661 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 2662 FALIGN_D0 2663 ldda [%l7]ASI_BLK_P, %d0 2664 stda %d48, [%i0]ASI_BLK_AIUS 2665 add %l7, 64, %l7 2666 subcc %i3, 64, %i3 2667 bz,pn %ncc, 0f 2668 add %i0, 64, %i0 2669 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 2670 FALIGN_D16 2671 ldda [%l7]ASI_BLK_P, %d16 2672 stda %d48, [%i0]ASI_BLK_AIUS 2673 add %l7, 64, %l7 2674 subcc %i3, 64, %i3 2675 bz,pn %ncc, 1f 2676 add %i0, 64, %i0 2677 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 2678 FALIGN_D32 2679 ldda [%l7]ASI_BLK_P, %d32 2680 stda %d48, [%i0]ASI_BLK_AIUS 2681 add %l7, 64, %l7 2682 subcc %i3, 64, %i3 2683 bz,pn %ncc, 2f 2684 add %i0, 64, %i0 2685 ba,a,pt %ncc, copyout_seg0 2686 26870: 2688 FALIGN_D16 2689 stda %d48, [%i0]ASI_BLK_AIUS 2690 add %i0, 64, %i0 2691 membar #Sync 2692 FALIGN_D32 2693 stda %d48, [%i0]ASI_BLK_AIUS 2694 ba,pt %ncc, copyout_blkd0 2695 add %i0, 64, %i0 2696 26971: 2698 FALIGN_D32 2699 stda %d48, [%i0]ASI_BLK_AIUS 2700 add %i0, 64, %i0 2701 membar #Sync 2702 FALIGN_D0 2703 stda %d48, [%i0]ASI_BLK_AIUS 2704 ba,pt %ncc, copyout_blkd16 2705 add %i0, 64, %i0 2706 27072: 2708 FALIGN_D0 2709 stda %d48, [%i0]ASI_BLK_AIUS 2710 add %i0, 64, %i0 2711 membar #Sync 2712 FALIGN_D16 2713 stda %d48, [%i0]ASI_BLK_AIUS 2714 ba,pt %ncc, copyout_blkd32 2715 add %i0, 64, %i0 2716 2717copyout_seg1: 2718 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 2719 FALIGN_D2 2720 ldda [%l7]ASI_BLK_P, %d0 2721 stda %d48, [%i0]ASI_BLK_AIUS 2722 add %l7, 64, %l7 2723 subcc %i3, 64, %i3 2724 bz,pn %ncc, 0f 2725 add %i0, 64, %i0 2726 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 2727 FALIGN_D18 2728 ldda [%l7]ASI_BLK_P, %d16 2729 stda %d48, [%i0]ASI_BLK_AIUS 2730 add %l7, 64, %l7 2731 subcc %i3, 64, %i3 2732 bz,pn %ncc, 1f 2733 add %i0, 64, %i0 2734 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 2735 FALIGN_D34 2736 ldda [%l7]ASI_BLK_P, %d32 2737 stda %d48, [%i0]ASI_BLK_AIUS 2738 add %l7, 64, %l7 2739 subcc %i3, 64, %i3 2740 bz,pn %ncc, 2f 2741 add %i0, 64, %i0 2742 ba,a,pt %ncc, copyout_seg1 27430: 2744 FALIGN_D18 2745 stda %d48, [%i0]ASI_BLK_AIUS 2746 add %i0, 64, %i0 2747 membar #Sync 2748 FALIGN_D34 2749 stda %d48, [%i0]ASI_BLK_AIUS 2750 ba,pt %ncc, copyout_blkd2 2751 add %i0, 64, %i0 2752 27531: 2754 FALIGN_D34 2755 stda %d48, [%i0]ASI_BLK_AIUS 2756 add %i0, 64, %i0 2757 membar #Sync 2758 FALIGN_D2 2759 stda %d48, [%i0]ASI_BLK_AIUS 2760 ba,pt %ncc, copyout_blkd18 2761 add %i0, 64, %i0 2762 27632: 2764 FALIGN_D2 2765 stda %d48, [%i0]ASI_BLK_AIUS 2766 add %i0, 64, %i0 2767 membar #Sync 2768 FALIGN_D18 2769 stda %d48, [%i0]ASI_BLK_AIUS 2770 ba,pt %ncc, copyout_blkd34 2771 add %i0, 64, %i0 2772 2773copyout_seg2: 2774 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 2775 FALIGN_D4 2776 ldda [%l7]ASI_BLK_P, %d0 2777 stda %d48, [%i0]ASI_BLK_AIUS 2778 add %l7, 64, %l7 2779 subcc %i3, 64, %i3 2780 bz,pn %ncc, 0f 2781 add %i0, 64, %i0 2782 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 2783 FALIGN_D20 2784 ldda [%l7]ASI_BLK_P, %d16 2785 stda %d48, [%i0]ASI_BLK_AIUS 2786 add %l7, 64, %l7 2787 subcc %i3, 64, %i3 2788 bz,pn %ncc, 1f 2789 add %i0, 64, %i0 2790 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 2791 FALIGN_D36 2792 ldda [%l7]ASI_BLK_P, %d32 2793 stda %d48, [%i0]ASI_BLK_AIUS 2794 add %l7, 64, %l7 2795 subcc %i3, 64, %i3 2796 bz,pn %ncc, 2f 2797 add %i0, 64, %i0 2798 ba,a,pt %ncc, copyout_seg2 2799 28000: 2801 FALIGN_D20 2802 stda %d48, [%i0]ASI_BLK_AIUS 2803 add %i0, 64, %i0 2804 membar #Sync 2805 FALIGN_D36 2806 stda %d48, [%i0]ASI_BLK_AIUS 2807 ba,pt %ncc, copyout_blkd4 2808 add %i0, 64, %i0 2809 28101: 2811 FALIGN_D36 2812 stda %d48, [%i0]ASI_BLK_AIUS 2813 add %i0, 64, %i0 2814 membar #Sync 2815 FALIGN_D4 2816 stda %d48, [%i0]ASI_BLK_AIUS 2817 ba,pt %ncc, copyout_blkd20 2818 add %i0, 64, %i0 2819 28202: 2821 FALIGN_D4 2822 stda %d48, [%i0]ASI_BLK_AIUS 2823 add %i0, 64, %i0 2824 membar #Sync 2825 FALIGN_D20 2826 stda %d48, [%i0]ASI_BLK_AIUS 2827 ba,pt %ncc, copyout_blkd36 2828 add %i0, 64, %i0 2829 2830copyout_seg3: 2831 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 2832 FALIGN_D6 2833 ldda [%l7]ASI_BLK_P, %d0 2834 stda %d48, [%i0]ASI_BLK_AIUS 2835 add %l7, 64, %l7 2836 subcc %i3, 64, %i3 2837 bz,pn %ncc, 0f 2838 add %i0, 64, %i0 2839 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 2840 FALIGN_D22 2841 ldda [%l7]ASI_BLK_P, %d16 2842 stda %d48, [%i0]ASI_BLK_AIUS 2843 add %l7, 64, %l7 2844 subcc %i3, 64, %i3 2845 bz,pn %ncc, 1f 2846 add %i0, 64, %i0 2847 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 2848 FALIGN_D38 2849 ldda [%l7]ASI_BLK_P, %d32 2850 stda %d48, [%i0]ASI_BLK_AIUS 2851 add %l7, 64, %l7 2852 subcc %i3, 64, %i3 2853 bz,pn %ncc, 2f 2854 add %i0, 64, %i0 2855 ba,a,pt %ncc, copyout_seg3 2856 28570: 2858 FALIGN_D22 2859 stda %d48, [%i0]ASI_BLK_AIUS 2860 add %i0, 64, %i0 2861 membar #Sync 2862 FALIGN_D38 2863 stda %d48, [%i0]ASI_BLK_AIUS 2864 ba,pt %ncc, copyout_blkd6 2865 add %i0, 64, %i0 2866 28671: 2868 FALIGN_D38 2869 stda %d48, [%i0]ASI_BLK_AIUS 2870 add %i0, 64, %i0 2871 membar #Sync 2872 FALIGN_D6 2873 stda %d48, [%i0]ASI_BLK_AIUS 2874 ba,pt %ncc, copyout_blkd22 2875 add %i0, 64, %i0 2876 28772: 2878 FALIGN_D6 2879 stda %d48, [%i0]ASI_BLK_AIUS 2880 add %i0, 64, %i0 2881 membar #Sync 2882 FALIGN_D22 2883 stda %d48, [%i0]ASI_BLK_AIUS 2884 ba,pt %ncc, copyout_blkd38 2885 add %i0, 64, %i0 2886 2887copyout_seg4: 2888 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 2889 FALIGN_D8 2890 ldda [%l7]ASI_BLK_P, %d0 2891 stda %d48, [%i0]ASI_BLK_AIUS 2892 add %l7, 64, %l7 2893 subcc %i3, 64, %i3 2894 bz,pn %ncc, 0f 2895 add %i0, 64, %i0 2896 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 2897 FALIGN_D24 2898 ldda [%l7]ASI_BLK_P, %d16 2899 stda %d48, [%i0]ASI_BLK_AIUS 2900 add %l7, 64, %l7 2901 subcc %i3, 64, %i3 2902 bz,pn %ncc, 1f 2903 add %i0, 64, %i0 2904 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 2905 FALIGN_D40 2906 ldda [%l7]ASI_BLK_P, %d32 2907 stda %d48, [%i0]ASI_BLK_AIUS 2908 add %l7, 64, %l7 2909 subcc %i3, 64, %i3 2910 bz,pn %ncc, 2f 2911 add %i0, 64, %i0 2912 ba,a,pt %ncc, copyout_seg4 2913 29140: 2915 FALIGN_D24 2916 stda %d48, [%i0]ASI_BLK_AIUS 2917 add %i0, 64, %i0 2918 membar #Sync 2919 FALIGN_D40 2920 stda %d48, [%i0]ASI_BLK_AIUS 2921 ba,pt %ncc, copyout_blkd8 2922 add %i0, 64, %i0 2923 29241: 2925 FALIGN_D40 2926 stda %d48, [%i0]ASI_BLK_AIUS 2927 add %i0, 64, %i0 2928 membar #Sync 2929 FALIGN_D8 2930 stda %d48, [%i0]ASI_BLK_AIUS 2931 ba,pt %ncc, copyout_blkd24 2932 add %i0, 64, %i0 2933 29342: 2935 FALIGN_D8 2936 stda %d48, [%i0]ASI_BLK_AIUS 2937 add %i0, 64, %i0 2938 membar #Sync 2939 FALIGN_D24 2940 stda %d48, [%i0]ASI_BLK_AIUS 2941 ba,pt %ncc, copyout_blkd40 2942 add %i0, 64, %i0 2943 2944copyout_seg5: 2945 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 2946 FALIGN_D10 2947 ldda [%l7]ASI_BLK_P, %d0 2948 stda %d48, [%i0]ASI_BLK_AIUS 2949 add %l7, 64, %l7 2950 subcc %i3, 64, %i3 2951 bz,pn %ncc, 0f 2952 add %i0, 64, %i0 2953 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 2954 FALIGN_D26 2955 ldda [%l7]ASI_BLK_P, %d16 2956 stda %d48, [%i0]ASI_BLK_AIUS 2957 add %l7, 64, %l7 2958 subcc %i3, 64, %i3 2959 bz,pn %ncc, 1f 2960 add %i0, 64, %i0 2961 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 2962 FALIGN_D42 2963 ldda [%l7]ASI_BLK_P, %d32 2964 stda %d48, [%i0]ASI_BLK_AIUS 2965 add %l7, 64, %l7 2966 subcc %i3, 64, %i3 2967 bz,pn %ncc, 2f 2968 add %i0, 64, %i0 2969 ba,a,pt %ncc, copyout_seg5 2970 29710: 2972 FALIGN_D26 2973 stda %d48, [%i0]ASI_BLK_AIUS 2974 add %i0, 64, %i0 2975 membar #Sync 2976 FALIGN_D42 2977 stda %d48, [%i0]ASI_BLK_AIUS 2978 ba,pt %ncc, copyout_blkd10 2979 add %i0, 64, %i0 2980 29811: 2982 FALIGN_D42 2983 stda %d48, [%i0]ASI_BLK_AIUS 2984 add %i0, 64, %i0 2985 membar #Sync 2986 FALIGN_D10 2987 stda %d48, [%i0]ASI_BLK_AIUS 2988 ba,pt %ncc, copyout_blkd26 2989 add %i0, 64, %i0 2990 29912: 2992 FALIGN_D10 2993 stda %d48, [%i0]ASI_BLK_AIUS 2994 add %i0, 64, %i0 2995 membar #Sync 2996 FALIGN_D26 2997 stda %d48, [%i0]ASI_BLK_AIUS 2998 ba,pt %ncc, copyout_blkd42 2999 add %i0, 64, %i0 3000 3001copyout_seg6: 3002 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 3003 FALIGN_D12 3004 ldda [%l7]ASI_BLK_P, %d0 3005 stda %d48, [%i0]ASI_BLK_AIUS 3006 add %l7, 64, %l7 3007 subcc %i3, 64, %i3 3008 bz,pn %ncc, 0f 3009 add %i0, 64, %i0 3010 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 3011 FALIGN_D28 3012 ldda [%l7]ASI_BLK_P, %d16 3013 stda %d48, [%i0]ASI_BLK_AIUS 3014 add %l7, 64, %l7 3015 subcc %i3, 64, %i3 3016 bz,pn %ncc, 1f 3017 add %i0, 64, %i0 3018 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 3019 FALIGN_D44 3020 ldda [%l7]ASI_BLK_P, %d32 3021 stda %d48, [%i0]ASI_BLK_AIUS 3022 add %l7, 64, %l7 3023 subcc %i3, 64, %i3 3024 bz,pn %ncc, 2f 3025 add %i0, 64, %i0 3026 ba,a,pt %ncc, copyout_seg6 3027 30280: 3029 FALIGN_D28 3030 stda %d48, [%i0]ASI_BLK_AIUS 3031 add %i0, 64, %i0 3032 membar #Sync 3033 FALIGN_D44 3034 stda %d48, [%i0]ASI_BLK_AIUS 3035 ba,pt %ncc, copyout_blkd12 3036 add %i0, 64, %i0 3037 30381: 3039 FALIGN_D44 3040 stda %d48, [%i0]ASI_BLK_AIUS 3041 add %i0, 64, %i0 3042 membar #Sync 3043 FALIGN_D12 3044 stda %d48, [%i0]ASI_BLK_AIUS 3045 ba,pt %ncc, copyout_blkd28 3046 add %i0, 64, %i0 3047 30482: 3049 FALIGN_D12 3050 stda %d48, [%i0]ASI_BLK_AIUS 3051 add %i0, 64, %i0 3052 membar #Sync 3053 FALIGN_D28 3054 stda %d48, [%i0]ASI_BLK_AIUS 3055 ba,pt %ncc, copyout_blkd44 3056 add %i0, 64, %i0 3057 3058copyout_seg7: 3059 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 3060 FALIGN_D14 3061 ldda [%l7]ASI_BLK_P, %d0 3062 stda %d48, [%i0]ASI_BLK_AIUS 3063 add %l7, 64, %l7 3064 subcc %i3, 64, %i3 3065 bz,pn %ncc, 0f 3066 add %i0, 64, %i0 3067 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 3068 FALIGN_D30 3069 ldda [%l7]ASI_BLK_P, %d16 3070 stda %d48, [%i0]ASI_BLK_AIUS 3071 add %l7, 64, %l7 3072 subcc %i3, 64, %i3 3073 bz,pn %ncc, 1f 3074 add %i0, 64, %i0 3075 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 3076 FALIGN_D46 3077 ldda [%l7]ASI_BLK_P, %d32 3078 stda %d48, [%i0]ASI_BLK_AIUS 3079 add %l7, 64, %l7 3080 subcc %i3, 64, %i3 3081 bz,pn %ncc, 2f 3082 add %i0, 64, %i0 3083 ba,a,pt %ncc, copyout_seg7 3084 30850: 3086 FALIGN_D30 3087 stda %d48, [%i0]ASI_BLK_AIUS 3088 add %i0, 64, %i0 3089 membar #Sync 3090 FALIGN_D46 3091 stda %d48, [%i0]ASI_BLK_AIUS 3092 ba,pt %ncc, copyout_blkd14 3093 add %i0, 64, %i0 3094 30951: 3096 FALIGN_D46 3097 stda %d48, [%i0]ASI_BLK_AIUS 3098 add %i0, 64, %i0 3099 membar #Sync 3100 FALIGN_D14 3101 stda %d48, [%i0]ASI_BLK_AIUS 3102 ba,pt %ncc, copyout_blkd30 3103 add %i0, 64, %i0 3104 31052: 3106 FALIGN_D14 3107 stda %d48, [%i0]ASI_BLK_AIUS 3108 add %i0, 64, %i0 3109 membar #Sync 3110 FALIGN_D30 3111 stda %d48, [%i0]ASI_BLK_AIUS 3112 ba,pt %ncc, copyout_blkd46 3113 add %i0, 64, %i0 3114 3115 3116 ! 3117 ! dribble out the last partial block 3118 ! 3119copyout_blkd0: 3120 subcc %i4, 8, %i4 3121 blu,pn %ncc, copyout_blkdone 3122 faligndata %d0, %d2, %d48 3123 stda %d48, [%i0]ASI_USER 3124 add %i0, 8, %i0 3125copyout_blkd2: 3126 subcc %i4, 8, %i4 3127 blu,pn %ncc, copyout_blkdone 3128 faligndata %d2, %d4, %d48 3129 stda %d48, [%i0]ASI_USER 3130 add %i0, 8, %i0 3131copyout_blkd4: 3132 subcc %i4, 8, %i4 3133 blu,pn %ncc, copyout_blkdone 3134 faligndata %d4, %d6, %d48 3135 stda %d48, [%i0]ASI_USER 3136 add %i0, 8, %i0 3137copyout_blkd6: 3138 subcc %i4, 8, %i4 3139 blu,pn %ncc, copyout_blkdone 3140 faligndata %d6, %d8, %d48 3141 stda %d48, [%i0]ASI_USER 3142 add %i0, 8, %i0 3143copyout_blkd8: 3144 subcc %i4, 8, %i4 3145 blu,pn %ncc, copyout_blkdone 3146 faligndata %d8, %d10, %d48 3147 stda %d48, [%i0]ASI_USER 3148 add %i0, 8, %i0 3149copyout_blkd10: 3150 subcc %i4, 8, %i4 3151 blu,pn %ncc, copyout_blkdone 3152 faligndata %d10, %d12, %d48 3153 stda %d48, [%i0]ASI_USER 3154 add %i0, 8, %i0 3155copyout_blkd12: 3156 subcc %i4, 8, %i4 3157 blu,pn %ncc, copyout_blkdone 3158 faligndata %d12, %d14, %d48 3159 stda %d48, [%i0]ASI_USER 3160 add %i0, 8, %i0 3161copyout_blkd14: 3162 subcc %i4, 8, %i4 3163 blu,pn %ncc, copyout_blkdone 3164 fsrc1 %d14, %d0 3165 ba,a,pt %ncc, copyout_blkleft 3166 3167copyout_blkd16: 3168 subcc %i4, 8, %i4 3169 blu,pn %ncc, copyout_blkdone 3170 faligndata %d16, %d18, %d48 3171 stda %d48, [%i0]ASI_USER 3172 add %i0, 8, %i0 3173copyout_blkd18: 3174 subcc %i4, 8, %i4 3175 blu,pn %ncc, copyout_blkdone 3176 faligndata %d18, %d20, %d48 3177 stda %d48, [%i0]ASI_USER 3178 add %i0, 8, %i0 3179copyout_blkd20: 3180 subcc %i4, 8, %i4 3181 blu,pn %ncc, copyout_blkdone 3182 faligndata %d20, %d22, %d48 3183 stda %d48, [%i0]ASI_USER 3184 add %i0, 8, %i0 3185copyout_blkd22: 3186 subcc %i4, 8, %i4 3187 blu,pn %ncc, copyout_blkdone 3188 faligndata %d22, %d24, %d48 3189 stda %d48, [%i0]ASI_USER 3190 add %i0, 8, %i0 3191copyout_blkd24: 3192 subcc %i4, 8, %i4 3193 blu,pn %ncc, copyout_blkdone 3194 faligndata %d24, %d26, %d48 3195 stda %d48, [%i0]ASI_USER 3196 add %i0, 8, %i0 3197copyout_blkd26: 3198 subcc %i4, 8, %i4 3199 blu,pn %ncc, copyout_blkdone 3200 faligndata %d26, %d28, %d48 3201 stda %d48, [%i0]ASI_USER 3202 add %i0, 8, %i0 3203copyout_blkd28: 3204 subcc %i4, 8, %i4 3205 blu,pn %ncc, copyout_blkdone 3206 faligndata %d28, %d30, %d48 3207 stda %d48, [%i0]ASI_USER 3208 add %i0, 8, %i0 3209copyout_blkd30: 3210 subcc %i4, 8, %i4 3211 blu,pn %ncc, copyout_blkdone 3212 fsrc1 %d30, %d0 3213 ba,a,pt %ncc, copyout_blkleft 3214copyout_blkd32: 3215 subcc %i4, 8, %i4 3216 blu,pn %ncc, copyout_blkdone 3217 faligndata %d32, %d34, %d48 3218 stda %d48, [%i0]ASI_USER 3219 add %i0, 8, %i0 3220copyout_blkd34: 3221 subcc %i4, 8, %i4 3222 blu,pn %ncc, copyout_blkdone 3223 faligndata %d34, %d36, %d48 3224 stda %d48, [%i0]ASI_USER 3225 add %i0, 8, %i0 3226copyout_blkd36: 3227 subcc %i4, 8, %i4 3228 blu,pn %ncc, copyout_blkdone 3229 faligndata %d36, %d38, %d48 3230 stda %d48, [%i0]ASI_USER 3231 add %i0, 8, %i0 3232copyout_blkd38: 3233 subcc %i4, 8, %i4 3234 blu,pn %ncc, copyout_blkdone 3235 faligndata %d38, %d40, %d48 3236 stda %d48, [%i0]ASI_USER 3237 add %i0, 8, %i0 3238copyout_blkd40: 3239 subcc %i4, 8, %i4 3240 blu,pn %ncc, copyout_blkdone 3241 faligndata %d40, %d42, %d48 3242 stda %d48, [%i0]ASI_USER 3243 add %i0, 8, %i0 3244copyout_blkd42: 3245 subcc %i4, 8, %i4 3246 blu,pn %ncc, copyout_blkdone 3247 faligndata %d42, %d44, %d48 3248 stda %d48, [%i0]ASI_USER 3249 add %i0, 8, %i0 3250copyout_blkd44: 3251 subcc %i4, 8, %i4 3252 blu,pn %ncc, copyout_blkdone 3253 faligndata %d44, %d46, %d48 3254 stda %d48, [%i0]ASI_USER 3255 add %i0, 8, %i0 3256copyout_blkd46: 3257 subcc %i4, 8, %i4 3258 blu,pn %ncc, copyout_blkdone 3259 fsrc1 %d46, %d0 3260 3261copyout_blkleft: 32621: 3263 ldd [%l7], %d2 3264 add %l7, 8, %l7 3265 subcc %i4, 8, %i4 3266 faligndata %d0, %d2, %d8 3267 stda %d8, [%i0]ASI_USER 3268 blu,pn %ncc, copyout_blkdone 3269 add %i0, 8, %i0 3270 ldd [%l7], %d0 3271 add %l7, 8, %l7 3272 subcc %i4, 8, %i4 3273 faligndata %d2, %d0, %d8 3274 stda %d8, [%i0]ASI_USER 3275 bgeu,pt %ncc, 1b 3276 add %i0, 8, %i0 3277 3278copyout_blkdone: 3279 tst %i2 3280 bz,pt %ncc, .copyout_exit 3281 and %l3, 0x4, %l3 ! fprs.du = fprs.dl = 0 3282 32837: ldub [%i1], %i4 3284 inc %i1 3285 stba %i4, [%i0]ASI_USER 3286 inc %i0 3287 deccc %i2 3288 bgu %ncc, 7b 3289 nop 3290 3291.copyout_exit: 3292 membar #StoreLoad|#StoreStore 3293 btst FPUSED_FLAG, SAVED_LOFAULT 3294 bz 1f 3295 nop 3296 3297 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 3298 wr %o2, 0, %gsr ! restore gsr 3299 3300 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 3301 btst FPRS_FEF, %o3 3302 bz 4f 3303 nop 3304 3305 ! restore fpregs from stack 3306 membar #Sync 3307 add %fp, STACK_BIAS - 257, %o2 3308 and %o2, -64, %o2 3309 ldda [%o2]ASI_BLK_P, %d0 3310 add %o2, 64, %o2 3311 ldda [%o2]ASI_BLK_P, %d16 3312 add %o2, 64, %o2 3313 ldda [%o2]ASI_BLK_P, %d32 3314 add %o2, 64, %o2 3315 ldda [%o2]ASI_BLK_P, %d48 3316 membar #Sync 3317 3318 ba,pt %ncc, 1f 3319 wr %o3, 0, %fprs ! restore fprs 3320 33214: 3322 FZERO ! zero all of the fpregs 3323 wr %o3, 0, %fprs ! restore fprs 3324 33251: 3326 andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 3327 membar #Sync ! sync error barrier 3328 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 3329 ret 3330 restore %g0, 0, %o0 3331 3332.copyout_err: 3333 ldn [THREAD_REG + T_COPYOPS], %o4 3334 brz %o4, 2f 3335 nop 3336 ldn [%o4 + CP_COPYOUT], %g2 3337 jmp %g2 3338 nop 33392: 3340 retl 3341 mov -1, %o0 3342 SET_SIZE(copyout) 3343 3344 3345 ENTRY(xcopyout) 3346 sethi %hi(.xcopyout_err), REAL_LOFAULT 3347 b .do_copyout 3348 or REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT 3349.xcopyout_err: 3350 ldn [THREAD_REG + T_COPYOPS], %o4 3351 brz %o4, 2f 3352 nop 3353 ldn [%o4 + CP_XCOPYOUT], %g2 3354 jmp %g2 3355 nop 33562: 3357 retl 3358 mov %g1, %o0 3359 SET_SIZE(xcopyout) 3360 3361 ENTRY(xcopyout_little) 3362 sethi %hi(.little_err), %o4 3363 ldn [THREAD_REG + T_LOFAULT], %o5 3364 or %o4, %lo(.little_err), %o4 3365 membar #Sync ! sync error barrier 3366 stn %o4, [THREAD_REG + T_LOFAULT] 3367 3368 subcc %g0, %o2, %o3 3369 add %o0, %o2, %o0 3370 bz,pn %ncc, 2f ! check for zero bytes 3371 sub %o2, 1, %o4 3372 add %o0, %o4, %o0 ! start w/last byte 3373 add %o1, %o2, %o1 3374 ldub [%o0+%o3], %o4 3375 33761: stba %o4, [%o1+%o3]ASI_AIUSL 3377 inccc %o3 3378 sub %o0, 2, %o0 ! get next byte 3379 bcc,a,pt %ncc, 1b 3380 ldub [%o0+%o3], %o4 3381 33822: membar #Sync ! sync error barrier 3383 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 3384 retl 3385 mov %g0, %o0 ! return (0) 3386 SET_SIZE(xcopyout_little) 3387 3388/* 3389 * Copy user data to kernel space (copyin/xcopyin/xcopyin_little) 3390 */ 3391 3392 ENTRY(copyin) 3393 sethi %hi(.copyin_err), REAL_LOFAULT 3394 or REAL_LOFAULT, %lo(.copyin_err), REAL_LOFAULT 3395 3396.do_copyin: 3397 ! 3398 ! Check the length and bail if zero. 3399 ! 3400 tst %o2 3401 bnz,pt %ncc, 1f 3402 nop 3403 retl 3404 clr %o0 34051: 3406 sethi %hi(copyio_fault), %o4 3407 or %o4, %lo(copyio_fault), %o4 3408 sethi %hi(copyio_fault_nowindow), %o3 3409 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT 3410 or %o3, %lo(copyio_fault_nowindow), %o3 3411 membar #Sync 3412 stn %o3, [THREAD_REG + T_LOFAULT] 3413 3414 mov %o0, SAVE_SRC 3415 mov %o1, SAVE_DST 3416 mov %o2, SAVE_COUNT 3417 3418 ! 3419 ! Check to see if we're more than SMALL_LIMIT. 3420 ! 3421 subcc %o2, SMALL_LIMIT, %o3 3422 bgu,a,pt %ncc, .dci_ns 3423 or %o0, %o1, %o3 3424 ! 3425 ! What was previously ".small_copyin" 3426 ! 3427.dcibcp: 3428 sub %g0, %o2, %o3 ! setup for copy loop 3429 add %o0, %o2, %o0 3430 add %o1, %o2, %o1 3431 ba,pt %ncc, .dcicl 3432 lduba [%o0 + %o3]ASI_USER, %o4 3433 ! 3434 ! %o0 and %o1 point at the end and remain pointing at the end 3435 ! of their buffers. We pull things out by adding %o3 (which is 3436 ! the negation of the length) to the buffer end which gives us 3437 ! the curent location in the buffers. By incrementing %o3 we walk 3438 ! through both buffers without having to bump each buffer's 3439 ! pointer. A very fast 4 instruction loop. 3440 ! 3441 .align 16 3442.dcicl: 3443 stb %o4, [%o1 + %o3] 3444 inccc %o3 3445 bl,a,pt %ncc, .dcicl 3446 lduba [%o0 + %o3]ASI_USER, %o4 3447 ! 3448 ! We're done. Go home. 3449 ! 3450 membar #Sync 3451 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 3452 retl 3453 clr %o0 3454 ! 3455 ! Try aligned copies from here. 3456 ! 3457.dci_ns: 3458 ! 3459 ! See if we're single byte aligned. If we are, check the 3460 ! limit for single byte copies. If we're smaller, or equal, 3461 ! bounce to the byte for byte copy loop. Otherwise do it in 3462 ! HW (if enabled). 3463 ! 3464 btst 1, %o3 3465 bz,a,pt %icc, .dcih8 3466 btst 7, %o3 3467 ! 3468 ! We're single byte aligned. 3469 ! 3470 subcc %o2, VIS_COPY_THRESHOLD, %o3 3471 bleu,pt %ncc, .dcibcp 3472 sethi %hi(hw_copy_limit_1), %o3 3473 ld [%o3 + %lo(hw_copy_limit_1)], %o3 3474 ! 3475 ! Is HW copy on? If not do everything byte for byte. 3476 ! 3477 tst %o3 3478 bz,pn %icc, .dcibcp 3479 subcc %o3, %o2, %o3 3480 ! 3481 ! Are we bigger than the HW limit? If not 3482 ! go to byte for byte. 3483 ! 3484 bge,pt %ncc, .dcibcp 3485 nop 3486 ! 3487 ! We're big enough and copy is on. Do it with HW. 3488 ! 3489 ba,pt %ncc, .big_copyin 3490 nop 3491.dcih8: 3492 ! 3493 ! 8 byte aligned? 3494 ! 3495 bnz,a %ncc, .dcih4 3496 btst 3, %o3 3497 ! 3498 ! We're eight byte aligned. 3499 ! 3500 subcc %o2, VIS_COPY_THRESHOLD, %o3 3501 bleu,pt %ncc, .dcis8 3502 sethi %hi(hw_copy_limit_8), %o3 3503 ld [%o3 + %lo(hw_copy_limit_8)], %o3 3504 ! 3505 ! Is HW assist on? If not, do it with the aligned copy. 3506 ! 3507 tst %o3 3508 bz,pn %icc, .dcis8 3509 subcc %o3, %o2, %o3 3510 bge %ncc, .dcis8 3511 nop 3512 ba,pt %ncc, .big_copyin 3513 nop 3514.dcis8: 3515 ! 3516 ! Housekeeping for copy loops. Uses same idea as in the byte for 3517 ! byte copy loop above. 3518 ! 3519 add %o0, %o2, %o0 3520 add %o1, %o2, %o1 3521 sub %g0, %o2, %o3 3522 ba,pt %ncc, .didebc 3523 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy 3524 ! 3525 ! 4 byte aligned? 3526 ! 3527.dcih4: 3528 bnz %ncc, .dcih2 3529 subcc %o2, VIS_COPY_THRESHOLD, %o3 3530 bleu,pt %ncc, .dcis4 3531 sethi %hi(hw_copy_limit_4), %o3 3532 ld [%o3 + %lo(hw_copy_limit_4)], %o3 3533 ! 3534 ! Is HW assist on? If not, do it with the aligned copy. 3535 ! 3536 tst %o3 3537 bz,pn %icc, .dcis4 3538 subcc %o3, %o2, %o3 3539 ! 3540 ! We're negative if our size is less than or equal to hw_copy_limit_4. 3541 ! 3542 bge %ncc, .dcis4 3543 nop 3544 ba,pt %ncc, .big_copyin 3545 nop 3546.dcis4: 3547 ! 3548 ! Housekeeping for copy loops. Uses same idea as in the byte 3549 ! for byte copy loop above. 3550 ! 3551 add %o0, %o2, %o0 3552 add %o1, %o2, %o1 3553 sub %g0, %o2, %o3 3554 ba,pt %ncc, .didfbc 3555 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy 3556.dcih2: 3557 ! 3558 ! We're two byte aligned. Check for "smallness" 3559 ! done in delay at .dcih4 3560 ! 3561 bleu,pt %ncc, .dcis2 3562 sethi %hi(hw_copy_limit_2), %o3 3563 ld [%o3 + %lo(hw_copy_limit_2)], %o3 3564 ! 3565 ! Is HW assist on? If not, do it with the aligned copy. 3566 ! 3567 tst %o3 3568 bz,pn %icc, .dcis2 3569 subcc %o3, %o2, %o3 3570 ! 3571 ! Are we larger than the HW limit? 3572 ! 3573 bge %ncc, .dcis2 3574 nop 3575 ! 3576 ! HW assist is on and we're large enough to use it. 3577 ! 3578 ba,pt %ncc, .big_copyin 3579 nop 3580 ! 3581 ! Housekeeping for copy loops. Uses same idea as in the byte 3582 ! for byte copy loop above. 3583 ! 3584.dcis2: 3585 add %o0, %o2, %o0 3586 add %o1, %o2, %o1 3587 sub %g0, %o2, %o3 3588 ba,pt %ncc, .didtbc 3589 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy 3590 ! 3591.small_copyin: 3592 ! 3593 ! Why are we doing this AGAIN? There are certain conditions in 3594 ! big copyin that will cause us to forgo the HW assisted copys 3595 ! and bounce back to a non-hw assisted copy. This dispatches 3596 ! those copies. Note that we branch around this in the main line 3597 ! code. 3598 ! 3599 ! We make no check for limits or HW enablement here. We've 3600 ! already been told that we're a poster child so just go off 3601 ! and do it. 3602 ! 3603 or %o0, %o1, %o3 3604 btst 1, %o3 3605 bnz %icc, .dcibcp ! Most likely 3606 btst 7, %o3 3607 bz %icc, .dcis8 3608 btst 3, %o3 3609 bz %icc, .dcis4 3610 nop 3611 ba,pt %ncc, .dcis2 3612 nop 3613 ! 3614 ! Eight byte aligned copies. A steal from the original .small_copyin 3615 ! with modifications. %o2 is number of 8 byte chunks to copy. When 3616 ! done, we examine %o3. If this is < 0, we have 1 - 7 bytes more 3617 ! to copy. 3618 ! 3619 .align 32 3620.didebc: 3621 ldxa [%o0 + %o3]ASI_USER, %o4 3622 deccc %o2 3623 stx %o4, [%o1 + %o3] 3624 bg,pt %ncc, .didebc 3625 addcc %o3, 8, %o3 3626 ! 3627 ! End of copy loop. Most 8 byte aligned copies end here. 3628 ! 3629 bz,pt %ncc, .dcifh 3630 nop 3631 ! 3632 ! Something is left. Do it byte for byte. 3633 ! 3634 ba,pt %ncc, .dcicl 3635 lduba [%o0 + %o3]ASI_USER, %o4 3636 ! 3637 ! 4 byte copy loop. %o2 is number of 4 byte chunks to copy. 3638 ! 3639 .align 32 3640.didfbc: 3641 lduwa [%o0 + %o3]ASI_USER, %o4 3642 deccc %o2 3643 st %o4, [%o1 + %o3] 3644 bg,pt %ncc, .didfbc 3645 addcc %o3, 4, %o3 3646 ! 3647 ! End of copy loop. Most 4 byte aligned copies end here. 3648 ! 3649 bz,pt %ncc, .dcifh 3650 nop 3651 ! 3652 ! Something is left. Do it byte for byte. 3653 ! 3654 ba,pt %ncc, .dcicl 3655 lduba [%o0 + %o3]ASI_USER, %o4 3656 ! 3657 ! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to 3658 ! copy. 3659 ! 3660 .align 32 3661.didtbc: 3662 lduha [%o0 + %o3]ASI_USER, %o4 3663 deccc %o2 3664 sth %o4, [%o1 + %o3] 3665 bg,pt %ncc, .didtbc 3666 addcc %o3, 2, %o3 3667 ! 3668 ! End of copy loop. Most 2 byte aligned copies end here. 3669 ! 3670 bz,pt %ncc, .dcifh 3671 nop 3672 ! 3673 ! Deal with the last byte 3674 ! 3675 lduba [%o0 + %o3]ASI_USER, %o4 3676 stb %o4, [%o1 + %o3] 3677.dcifh: 3678 membar #Sync 3679 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 3680 retl 3681 clr %o0 3682 3683.big_copyin: 3684 ! 3685 ! Are we using the FP registers? 3686 ! 3687 rd %fprs, %o3 ! check for unused fp 3688 btst FPRS_FEF, %o3 3689 bnz %ncc, .copyin_fpregs_inuse 3690 nop 3691 ! 3692 ! We're going off to do a block copy. 3693 ! Switch fault hendlers and grab a window. We 3694 ! don't do a membar #Sync since we've done only 3695 ! kernel data to this point. 3696 ! 3697 stn %o4, [THREAD_REG + T_LOFAULT] 3698 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 3699 ! 3700 ! %o3 is %i3 after the save... 3701 ! 3702 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] 3703 ba,pt %ncc, .do_blockcopyin 3704 wr %g0, FPRS_FEF, %fprs 3705.copyin_fpregs_inuse: 3706 ! 3707 ! We're here if the FP regs are in use. Need to see if the request 3708 ! exceeds our suddenly larger minimum. 3709 ! 3710 cmp %i2, VIS_COPY_THRESHOLD+(64*4) 3711 bl %ncc, .small_copyin 3712 nop 3713 ! 3714 ! We're going off and do a block copy. 3715 ! Change to the heavy duty fault handler and grab a window first. 3716 ! New handler is passed in 3717 ! 3718 stn %o4, [THREAD_REG + T_LOFAULT] 3719 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 3720 ! 3721 ! %o3 is now %i3 3722 ! 3723 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] 3724 3725 ! save in-use fpregs on stack 3726 wr %g0, FPRS_FEF, %fprs 3727 membar #Sync 3728 add %fp, STACK_BIAS - 257, %o2 3729 and %o2, -64, %o2 3730 stda %d0, [%o2]ASI_BLK_P 3731 add %o2, 64, %o2 3732 stda %d16, [%o2]ASI_BLK_P 3733 add %o2, 64, %o2 3734 stda %d32, [%o2]ASI_BLK_P 3735 add %o2, 64, %o2 3736 stda %d48, [%o2]ASI_BLK_P 3737 membar #Sync 3738 3739.do_blockcopyin: 3740 membar #StoreStore|#StoreLoad|#LoadStore 3741 3742 rd %gsr, %o2 3743 st %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr 3744 3745 ! Set the lower bit in the saved t_lofault to indicate 3746 ! that we need to clear the %fprs register on the way 3747 ! out 3748 or SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 3749 3750 ! Swap src/dst since the code below is memcpy code 3751 ! and memcpy/bcopy have different calling sequences 3752 mov %i1, %i5 3753 mov %i0, %i1 3754 mov %i5, %i0 3755 3756!!! This code is nearly identical to the version in the sun4u 3757!!! libc_psr. Most bugfixes made to that file should be 3758!!! merged into this routine. 3759 3760 andcc %i0, 7, %o3 3761 bz copyin_blkcpy 3762 sub %o3, 8, %o3 3763 neg %o3 3764 sub %i2, %o3, %i2 3765 3766 ! Align Destination on double-word boundary 3767 37682: lduba [%i1]ASI_USER, %o4 3769 inc %i1 3770 inc %i0 3771 deccc %o3 3772 bgu %ncc, 2b 3773 stb %o4, [%i0-1] 3774copyin_blkcpy: 3775 andcc %i0, 63, %i3 3776 bz,pn %ncc, copyin_blalign ! now block aligned 3777 sub %i3, 64, %i3 3778 neg %i3 ! bytes till block aligned 3779 sub %i2, %i3, %i2 ! update %i2 with new count 3780 3781 ! Copy %i3 bytes till dst is block (64 byte) aligned. use 3782 ! double word copies. 3783 3784 alignaddr %i1, %g0, %g1 3785 ldda [%g1]ASI_USER, %d0 3786 add %g1, 8, %g1 37876: 3788 ldda [%g1]ASI_USER, %d2 3789 add %g1, 8, %g1 3790 subcc %i3, 8, %i3 3791 faligndata %d0, %d2, %d8 3792 std %d8, [%i0] 3793 add %i1, 8, %i1 3794 bz,pn %ncc, copyin_blalign 3795 add %i0, 8, %i0 3796 ldda [%g1]ASI_USER, %d0 3797 add %g1, 8, %g1 3798 subcc %i3, 8, %i3 3799 faligndata %d2, %d0, %d8 3800 std %d8, [%i0] 3801 add %i1, 8, %i1 3802 bgu,pn %ncc, 6b 3803 add %i0, 8, %i0 3804 3805copyin_blalign: 3806 membar #StoreLoad 3807 ! %i2 = total length 3808 ! %i3 = blocks (length - 64) / 64 3809 ! %i4 = doubles remaining (length - blocks) 3810 sub %i2, 64, %i3 3811 andn %i3, 63, %i3 3812 sub %i2, %i3, %i4 3813 andn %i4, 7, %i4 3814 sub %i4, 16, %i4 3815 sub %i2, %i4, %i2 3816 sub %i2, %i3, %i2 3817 3818 andn %i1, 0x3f, %l7 ! blk aligned address 3819 alignaddr %i1, %g0, %g0 ! gen %gsr 3820 3821 srl %i1, 3, %l5 ! bits 3,4,5 are now least sig in %l5 3822 andcc %l5, 7, %i5 ! mask everything except bits 1,2 3 3823 add %i1, %i4, %i1 3824 add %i1, %i3, %i1 3825 3826 ldda [%l7]ASI_BLK_AIUS, %d0 3827 add %l7, 64, %l7 3828 ldda [%l7]ASI_BLK_AIUS, %d16 3829 add %l7, 64, %l7 3830 ldda [%l7]ASI_BLK_AIUS, %d32 3831 add %l7, 64, %l7 3832 sub %i3, 128, %i3 3833 3834 ! switch statement to get us to the right 8 byte blk within a 3835 ! 64 byte block 3836 3837 cmp %i5, 4 3838 bgeu,a copyin_hlf 3839 cmp %i5, 6 3840 cmp %i5, 2 3841 bgeu,a copyin_sqtr 3842 nop 3843 cmp %i5, 1 3844 be,a copyin_seg1 3845 nop 3846 ba,pt %ncc, copyin_seg0 3847 nop 3848copyin_sqtr: 3849 be,a copyin_seg2 3850 nop 3851 ba,pt %ncc, copyin_seg3 3852 nop 3853 3854copyin_hlf: 3855 bgeu,a copyin_fqtr 3856 nop 3857 cmp %i5, 5 3858 be,a copyin_seg5 3859 nop 3860 ba,pt %ncc, copyin_seg4 3861 nop 3862copyin_fqtr: 3863 be,a copyin_seg6 3864 nop 3865 ba,pt %ncc, copyin_seg7 3866 nop 3867 3868copyin_seg0: 3869 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 3870 FALIGN_D0 3871 ldda [%l7]ASI_BLK_AIUS, %d0 3872 stda %d48, [%i0]ASI_BLK_P 3873 add %l7, 64, %l7 3874 subcc %i3, 64, %i3 3875 bz,pn %ncc, 0f 3876 add %i0, 64, %i0 3877 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 3878 FALIGN_D16 3879 ldda [%l7]ASI_BLK_AIUS, %d16 3880 stda %d48, [%i0]ASI_BLK_P 3881 add %l7, 64, %l7 3882 subcc %i3, 64, %i3 3883 bz,pn %ncc, 1f 3884 add %i0, 64, %i0 3885 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 3886 FALIGN_D32 3887 ldda [%l7]ASI_BLK_AIUS, %d32 3888 stda %d48, [%i0]ASI_BLK_P 3889 add %l7, 64, %l7 3890 subcc %i3, 64, %i3 3891 bz,pn %ncc, 2f 3892 add %i0, 64, %i0 3893 ba,a,pt %ncc, copyin_seg0 3894 38950: 3896 FALIGN_D16 3897 stda %d48, [%i0]ASI_BLK_P 3898 add %i0, 64, %i0 3899 membar #Sync 3900 FALIGN_D32 3901 stda %d48, [%i0]ASI_BLK_P 3902 ba,pt %ncc, copyin_blkd0 3903 add %i0, 64, %i0 3904 39051: 3906 FALIGN_D32 3907 stda %d48, [%i0]ASI_BLK_P 3908 add %i0, 64, %i0 3909 membar #Sync 3910 FALIGN_D0 3911 stda %d48, [%i0]ASI_BLK_P 3912 ba,pt %ncc, copyin_blkd16 3913 add %i0, 64, %i0 3914 39152: 3916 FALIGN_D0 3917 stda %d48, [%i0]ASI_BLK_P 3918 add %i0, 64, %i0 3919 membar #Sync 3920 FALIGN_D16 3921 stda %d48, [%i0]ASI_BLK_P 3922 ba,pt %ncc, copyin_blkd32 3923 add %i0, 64, %i0 3924 3925copyin_seg1: 3926 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 3927 FALIGN_D2 3928 ldda [%l7]ASI_BLK_AIUS, %d0 3929 stda %d48, [%i0]ASI_BLK_P 3930 add %l7, 64, %l7 3931 subcc %i3, 64, %i3 3932 bz,pn %ncc, 0f 3933 add %i0, 64, %i0 3934 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 3935 FALIGN_D18 3936 ldda [%l7]ASI_BLK_AIUS, %d16 3937 stda %d48, [%i0]ASI_BLK_P 3938 add %l7, 64, %l7 3939 subcc %i3, 64, %i3 3940 bz,pn %ncc, 1f 3941 add %i0, 64, %i0 3942 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 3943 FALIGN_D34 3944 ldda [%l7]ASI_BLK_AIUS, %d32 3945 stda %d48, [%i0]ASI_BLK_P 3946 add %l7, 64, %l7 3947 subcc %i3, 64, %i3 3948 bz,pn %ncc, 2f 3949 add %i0, 64, %i0 3950 ba,a,pt %ncc, copyin_seg1 39510: 3952 FALIGN_D18 3953 stda %d48, [%i0]ASI_BLK_P 3954 add %i0, 64, %i0 3955 membar #Sync 3956 FALIGN_D34 3957 stda %d48, [%i0]ASI_BLK_P 3958 ba,pt %ncc, copyin_blkd2 3959 add %i0, 64, %i0 3960 39611: 3962 FALIGN_D34 3963 stda %d48, [%i0]ASI_BLK_P 3964 add %i0, 64, %i0 3965 membar #Sync 3966 FALIGN_D2 3967 stda %d48, [%i0]ASI_BLK_P 3968 ba,pt %ncc, copyin_blkd18 3969 add %i0, 64, %i0 3970 39712: 3972 FALIGN_D2 3973 stda %d48, [%i0]ASI_BLK_P 3974 add %i0, 64, %i0 3975 membar #Sync 3976 FALIGN_D18 3977 stda %d48, [%i0]ASI_BLK_P 3978 ba,pt %ncc, copyin_blkd34 3979 add %i0, 64, %i0 3980copyin_seg2: 3981 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 3982 FALIGN_D4 3983 ldda [%l7]ASI_BLK_AIUS, %d0 3984 stda %d48, [%i0]ASI_BLK_P 3985 add %l7, 64, %l7 3986 subcc %i3, 64, %i3 3987 bz,pn %ncc, 0f 3988 add %i0, 64, %i0 3989 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 3990 FALIGN_D20 3991 ldda [%l7]ASI_BLK_AIUS, %d16 3992 stda %d48, [%i0]ASI_BLK_P 3993 add %l7, 64, %l7 3994 subcc %i3, 64, %i3 3995 bz,pn %ncc, 1f 3996 add %i0, 64, %i0 3997 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 3998 FALIGN_D36 3999 ldda [%l7]ASI_BLK_AIUS, %d32 4000 stda %d48, [%i0]ASI_BLK_P 4001 add %l7, 64, %l7 4002 subcc %i3, 64, %i3 4003 bz,pn %ncc, 2f 4004 add %i0, 64, %i0 4005 ba,a,pt %ncc, copyin_seg2 4006 40070: 4008 FALIGN_D20 4009 stda %d48, [%i0]ASI_BLK_P 4010 add %i0, 64, %i0 4011 membar #Sync 4012 FALIGN_D36 4013 stda %d48, [%i0]ASI_BLK_P 4014 ba,pt %ncc, copyin_blkd4 4015 add %i0, 64, %i0 4016 40171: 4018 FALIGN_D36 4019 stda %d48, [%i0]ASI_BLK_P 4020 add %i0, 64, %i0 4021 membar #Sync 4022 FALIGN_D4 4023 stda %d48, [%i0]ASI_BLK_P 4024 ba,pt %ncc, copyin_blkd20 4025 add %i0, 64, %i0 4026 40272: 4028 FALIGN_D4 4029 stda %d48, [%i0]ASI_BLK_P 4030 add %i0, 64, %i0 4031 membar #Sync 4032 FALIGN_D20 4033 stda %d48, [%i0]ASI_BLK_P 4034 ba,pt %ncc, copyin_blkd36 4035 add %i0, 64, %i0 4036 4037copyin_seg3: 4038 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 4039 FALIGN_D6 4040 ldda [%l7]ASI_BLK_AIUS, %d0 4041 stda %d48, [%i0]ASI_BLK_P 4042 add %l7, 64, %l7 4043 subcc %i3, 64, %i3 4044 bz,pn %ncc, 0f 4045 add %i0, 64, %i0 4046 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 4047 FALIGN_D22 4048 ldda [%l7]ASI_BLK_AIUS, %d16 4049 stda %d48, [%i0]ASI_BLK_P 4050 add %l7, 64, %l7 4051 subcc %i3, 64, %i3 4052 bz,pn %ncc, 1f 4053 add %i0, 64, %i0 4054 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 4055 FALIGN_D38 4056 ldda [%l7]ASI_BLK_AIUS, %d32 4057 stda %d48, [%i0]ASI_BLK_P 4058 add %l7, 64, %l7 4059 subcc %i3, 64, %i3 4060 bz,pn %ncc, 2f 4061 add %i0, 64, %i0 4062 ba,a,pt %ncc, copyin_seg3 4063 40640: 4065 FALIGN_D22 4066 stda %d48, [%i0]ASI_BLK_P 4067 add %i0, 64, %i0 4068 membar #Sync 4069 FALIGN_D38 4070 stda %d48, [%i0]ASI_BLK_P 4071 ba,pt %ncc, copyin_blkd6 4072 add %i0, 64, %i0 4073 40741: 4075 FALIGN_D38 4076 stda %d48, [%i0]ASI_BLK_P 4077 add %i0, 64, %i0 4078 membar #Sync 4079 FALIGN_D6 4080 stda %d48, [%i0]ASI_BLK_P 4081 ba,pt %ncc, copyin_blkd22 4082 add %i0, 64, %i0 4083 40842: 4085 FALIGN_D6 4086 stda %d48, [%i0]ASI_BLK_P 4087 add %i0, 64, %i0 4088 membar #Sync 4089 FALIGN_D22 4090 stda %d48, [%i0]ASI_BLK_P 4091 ba,pt %ncc, copyin_blkd38 4092 add %i0, 64, %i0 4093 4094copyin_seg4: 4095 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 4096 FALIGN_D8 4097 ldda [%l7]ASI_BLK_AIUS, %d0 4098 stda %d48, [%i0]ASI_BLK_P 4099 add %l7, 64, %l7 4100 subcc %i3, 64, %i3 4101 bz,pn %ncc, 0f 4102 add %i0, 64, %i0 4103 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 4104 FALIGN_D24 4105 ldda [%l7]ASI_BLK_AIUS, %d16 4106 stda %d48, [%i0]ASI_BLK_P 4107 add %l7, 64, %l7 4108 subcc %i3, 64, %i3 4109 bz,pn %ncc, 1f 4110 add %i0, 64, %i0 4111 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 4112 FALIGN_D40 4113 ldda [%l7]ASI_BLK_AIUS, %d32 4114 stda %d48, [%i0]ASI_BLK_P 4115 add %l7, 64, %l7 4116 subcc %i3, 64, %i3 4117 bz,pn %ncc, 2f 4118 add %i0, 64, %i0 4119 ba,a,pt %ncc, copyin_seg4 4120 41210: 4122 FALIGN_D24 4123 stda %d48, [%i0]ASI_BLK_P 4124 add %i0, 64, %i0 4125 membar #Sync 4126 FALIGN_D40 4127 stda %d48, [%i0]ASI_BLK_P 4128 ba,pt %ncc, copyin_blkd8 4129 add %i0, 64, %i0 4130 41311: 4132 FALIGN_D40 4133 stda %d48, [%i0]ASI_BLK_P 4134 add %i0, 64, %i0 4135 membar #Sync 4136 FALIGN_D8 4137 stda %d48, [%i0]ASI_BLK_P 4138 ba,pt %ncc, copyin_blkd24 4139 add %i0, 64, %i0 4140 41412: 4142 FALIGN_D8 4143 stda %d48, [%i0]ASI_BLK_P 4144 add %i0, 64, %i0 4145 membar #Sync 4146 FALIGN_D24 4147 stda %d48, [%i0]ASI_BLK_P 4148 ba,pt %ncc, copyin_blkd40 4149 add %i0, 64, %i0 4150 4151copyin_seg5: 4152 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 4153 FALIGN_D10 4154 ldda [%l7]ASI_BLK_AIUS, %d0 4155 stda %d48, [%i0]ASI_BLK_P 4156 add %l7, 64, %l7 4157 subcc %i3, 64, %i3 4158 bz,pn %ncc, 0f 4159 add %i0, 64, %i0 4160 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 4161 FALIGN_D26 4162 ldda [%l7]ASI_BLK_AIUS, %d16 4163 stda %d48, [%i0]ASI_BLK_P 4164 add %l7, 64, %l7 4165 subcc %i3, 64, %i3 4166 bz,pn %ncc, 1f 4167 add %i0, 64, %i0 4168 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 4169 FALIGN_D42 4170 ldda [%l7]ASI_BLK_AIUS, %d32 4171 stda %d48, [%i0]ASI_BLK_P 4172 add %l7, 64, %l7 4173 subcc %i3, 64, %i3 4174 bz,pn %ncc, 2f 4175 add %i0, 64, %i0 4176 ba,a,pt %ncc, copyin_seg5 4177 41780: 4179 FALIGN_D26 4180 stda %d48, [%i0]ASI_BLK_P 4181 add %i0, 64, %i0 4182 membar #Sync 4183 FALIGN_D42 4184 stda %d48, [%i0]ASI_BLK_P 4185 ba,pt %ncc, copyin_blkd10 4186 add %i0, 64, %i0 4187 41881: 4189 FALIGN_D42 4190 stda %d48, [%i0]ASI_BLK_P 4191 add %i0, 64, %i0 4192 membar #Sync 4193 FALIGN_D10 4194 stda %d48, [%i0]ASI_BLK_P 4195 ba,pt %ncc, copyin_blkd26 4196 add %i0, 64, %i0 4197 41982: 4199 FALIGN_D10 4200 stda %d48, [%i0]ASI_BLK_P 4201 add %i0, 64, %i0 4202 membar #Sync 4203 FALIGN_D26 4204 stda %d48, [%i0]ASI_BLK_P 4205 ba,pt %ncc, copyin_blkd42 4206 add %i0, 64, %i0 4207 4208copyin_seg6: 4209 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 4210 FALIGN_D12 4211 ldda [%l7]ASI_BLK_AIUS, %d0 4212 stda %d48, [%i0]ASI_BLK_P 4213 add %l7, 64, %l7 4214 subcc %i3, 64, %i3 4215 bz,pn %ncc, 0f 4216 add %i0, 64, %i0 4217 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 4218 FALIGN_D28 4219 ldda [%l7]ASI_BLK_AIUS, %d16 4220 stda %d48, [%i0]ASI_BLK_P 4221 add %l7, 64, %l7 4222 subcc %i3, 64, %i3 4223 bz,pn %ncc, 1f 4224 add %i0, 64, %i0 4225 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 4226 FALIGN_D44 4227 ldda [%l7]ASI_BLK_AIUS, %d32 4228 stda %d48, [%i0]ASI_BLK_P 4229 add %l7, 64, %l7 4230 subcc %i3, 64, %i3 4231 bz,pn %ncc, 2f 4232 add %i0, 64, %i0 4233 ba,a,pt %ncc, copyin_seg6 4234 42350: 4236 FALIGN_D28 4237 stda %d48, [%i0]ASI_BLK_P 4238 add %i0, 64, %i0 4239 membar #Sync 4240 FALIGN_D44 4241 stda %d48, [%i0]ASI_BLK_P 4242 ba,pt %ncc, copyin_blkd12 4243 add %i0, 64, %i0 4244 42451: 4246 FALIGN_D44 4247 stda %d48, [%i0]ASI_BLK_P 4248 add %i0, 64, %i0 4249 membar #Sync 4250 FALIGN_D12 4251 stda %d48, [%i0]ASI_BLK_P 4252 ba,pt %ncc, copyin_blkd28 4253 add %i0, 64, %i0 4254 42552: 4256 FALIGN_D12 4257 stda %d48, [%i0]ASI_BLK_P 4258 add %i0, 64, %i0 4259 membar #Sync 4260 FALIGN_D28 4261 stda %d48, [%i0]ASI_BLK_P 4262 ba,pt %ncc, copyin_blkd44 4263 add %i0, 64, %i0 4264 4265copyin_seg7: 4266 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 4267 FALIGN_D14 4268 ldda [%l7]ASI_BLK_AIUS, %d0 4269 stda %d48, [%i0]ASI_BLK_P 4270 add %l7, 64, %l7 4271 subcc %i3, 64, %i3 4272 bz,pn %ncc, 0f 4273 add %i0, 64, %i0 4274 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 4275 FALIGN_D30 4276 ldda [%l7]ASI_BLK_AIUS, %d16 4277 stda %d48, [%i0]ASI_BLK_P 4278 add %l7, 64, %l7 4279 subcc %i3, 64, %i3 4280 bz,pn %ncc, 1f 4281 add %i0, 64, %i0 4282 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 4283 FALIGN_D46 4284 ldda [%l7]ASI_BLK_AIUS, %d32 4285 stda %d48, [%i0]ASI_BLK_P 4286 add %l7, 64, %l7 4287 subcc %i3, 64, %i3 4288 bz,pn %ncc, 2f 4289 add %i0, 64, %i0 4290 ba,a,pt %ncc, copyin_seg7 4291 42920: 4293 FALIGN_D30 4294 stda %d48, [%i0]ASI_BLK_P 4295 add %i0, 64, %i0 4296 membar #Sync 4297 FALIGN_D46 4298 stda %d48, [%i0]ASI_BLK_P 4299 ba,pt %ncc, copyin_blkd14 4300 add %i0, 64, %i0 4301 43021: 4303 FALIGN_D46 4304 stda %d48, [%i0]ASI_BLK_P 4305 add %i0, 64, %i0 4306 membar #Sync 4307 FALIGN_D14 4308 stda %d48, [%i0]ASI_BLK_P 4309 ba,pt %ncc, copyin_blkd30 4310 add %i0, 64, %i0 4311 43122: 4313 FALIGN_D14 4314 stda %d48, [%i0]ASI_BLK_P 4315 add %i0, 64, %i0 4316 membar #Sync 4317 FALIGN_D30 4318 stda %d48, [%i0]ASI_BLK_P 4319 ba,pt %ncc, copyin_blkd46 4320 add %i0, 64, %i0 4321 4322 4323 ! 4324 ! dribble out the last partial block 4325 ! 4326copyin_blkd0: 4327 subcc %i4, 8, %i4 4328 blu,pn %ncc, copyin_blkdone 4329 faligndata %d0, %d2, %d48 4330 std %d48, [%i0] 4331 add %i0, 8, %i0 4332copyin_blkd2: 4333 subcc %i4, 8, %i4 4334 blu,pn %ncc, copyin_blkdone 4335 faligndata %d2, %d4, %d48 4336 std %d48, [%i0] 4337 add %i0, 8, %i0 4338copyin_blkd4: 4339 subcc %i4, 8, %i4 4340 blu,pn %ncc, copyin_blkdone 4341 faligndata %d4, %d6, %d48 4342 std %d48, [%i0] 4343 add %i0, 8, %i0 4344copyin_blkd6: 4345 subcc %i4, 8, %i4 4346 blu,pn %ncc, copyin_blkdone 4347 faligndata %d6, %d8, %d48 4348 std %d48, [%i0] 4349 add %i0, 8, %i0 4350copyin_blkd8: 4351 subcc %i4, 8, %i4 4352 blu,pn %ncc, copyin_blkdone 4353 faligndata %d8, %d10, %d48 4354 std %d48, [%i0] 4355 add %i0, 8, %i0 4356copyin_blkd10: 4357 subcc %i4, 8, %i4 4358 blu,pn %ncc, copyin_blkdone 4359 faligndata %d10, %d12, %d48 4360 std %d48, [%i0] 4361 add %i0, 8, %i0 4362copyin_blkd12: 4363 subcc %i4, 8, %i4 4364 blu,pn %ncc, copyin_blkdone 4365 faligndata %d12, %d14, %d48 4366 std %d48, [%i0] 4367 add %i0, 8, %i0 4368copyin_blkd14: 4369 subcc %i4, 8, %i4 4370 blu,pn %ncc, copyin_blkdone 4371 fsrc1 %d14, %d0 4372 ba,a,pt %ncc, copyin_blkleft 4373 4374copyin_blkd16: 4375 subcc %i4, 8, %i4 4376 blu,pn %ncc, copyin_blkdone 4377 faligndata %d16, %d18, %d48 4378 std %d48, [%i0] 4379 add %i0, 8, %i0 4380copyin_blkd18: 4381 subcc %i4, 8, %i4 4382 blu,pn %ncc, copyin_blkdone 4383 faligndata %d18, %d20, %d48 4384 std %d48, [%i0] 4385 add %i0, 8, %i0 4386copyin_blkd20: 4387 subcc %i4, 8, %i4 4388 blu,pn %ncc, copyin_blkdone 4389 faligndata %d20, %d22, %d48 4390 std %d48, [%i0] 4391 add %i0, 8, %i0 4392copyin_blkd22: 4393 subcc %i4, 8, %i4 4394 blu,pn %ncc, copyin_blkdone 4395 faligndata %d22, %d24, %d48 4396 std %d48, [%i0] 4397 add %i0, 8, %i0 4398copyin_blkd24: 4399 subcc %i4, 8, %i4 4400 blu,pn %ncc, copyin_blkdone 4401 faligndata %d24, %d26, %d48 4402 std %d48, [%i0] 4403 add %i0, 8, %i0 4404copyin_blkd26: 4405 subcc %i4, 8, %i4 4406 blu,pn %ncc, copyin_blkdone 4407 faligndata %d26, %d28, %d48 4408 std %d48, [%i0] 4409 add %i0, 8, %i0 4410copyin_blkd28: 4411 subcc %i4, 8, %i4 4412 blu,pn %ncc, copyin_blkdone 4413 faligndata %d28, %d30, %d48 4414 std %d48, [%i0] 4415 add %i0, 8, %i0 4416copyin_blkd30: 4417 subcc %i4, 8, %i4 4418 blu,pn %ncc, copyin_blkdone 4419 fsrc1 %d30, %d0 4420 ba,a,pt %ncc, copyin_blkleft 4421copyin_blkd32: 4422 subcc %i4, 8, %i4 4423 blu,pn %ncc, copyin_blkdone 4424 faligndata %d32, %d34, %d48 4425 std %d48, [%i0] 4426 add %i0, 8, %i0 4427copyin_blkd34: 4428 subcc %i4, 8, %i4 4429 blu,pn %ncc, copyin_blkdone 4430 faligndata %d34, %d36, %d48 4431 std %d48, [%i0] 4432 add %i0, 8, %i0 4433copyin_blkd36: 4434 subcc %i4, 8, %i4 4435 blu,pn %ncc, copyin_blkdone 4436 faligndata %d36, %d38, %d48 4437 std %d48, [%i0] 4438 add %i0, 8, %i0 4439copyin_blkd38: 4440 subcc %i4, 8, %i4 4441 blu,pn %ncc, copyin_blkdone 4442 faligndata %d38, %d40, %d48 4443 std %d48, [%i0] 4444 add %i0, 8, %i0 4445copyin_blkd40: 4446 subcc %i4, 8, %i4 4447 blu,pn %ncc, copyin_blkdone 4448 faligndata %d40, %d42, %d48 4449 std %d48, [%i0] 4450 add %i0, 8, %i0 4451copyin_blkd42: 4452 subcc %i4, 8, %i4 4453 blu,pn %ncc, copyin_blkdone 4454 faligndata %d42, %d44, %d48 4455 std %d48, [%i0] 4456 add %i0, 8, %i0 4457copyin_blkd44: 4458 subcc %i4, 8, %i4 4459 blu,pn %ncc, copyin_blkdone 4460 faligndata %d44, %d46, %d48 4461 std %d48, [%i0] 4462 add %i0, 8, %i0 4463copyin_blkd46: 4464 subcc %i4, 8, %i4 4465 blu,pn %ncc, copyin_blkdone 4466 fsrc1 %d46, %d0 4467 4468copyin_blkleft: 44691: 4470 ldda [%l7]ASI_USER, %d2 4471 add %l7, 8, %l7 4472 subcc %i4, 8, %i4 4473 faligndata %d0, %d2, %d8 4474 std %d8, [%i0] 4475 blu,pn %ncc, copyin_blkdone 4476 add %i0, 8, %i0 4477 ldda [%l7]ASI_USER, %d0 4478 add %l7, 8, %l7 4479 subcc %i4, 8, %i4 4480 faligndata %d2, %d0, %d8 4481 std %d8, [%i0] 4482 bgeu,pt %ncc, 1b 4483 add %i0, 8, %i0 4484 4485copyin_blkdone: 4486 tst %i2 4487 bz,pt %ncc, .copyin_exit 4488 and %l3, 0x4, %l3 ! fprs.du = fprs.dl = 0 4489 44907: lduba [%i1]ASI_USER, %i4 4491 inc %i1 4492 inc %i0 4493 deccc %i2 4494 bgu %ncc, 7b 4495 stb %i4, [%i0 - 1] 4496 4497.copyin_exit: 4498 membar #StoreLoad|#StoreStore 4499 btst FPUSED_FLAG, SAVED_LOFAULT 4500 bz %icc, 1f 4501 nop 4502 4503 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr 4504 wr %o2, 0, %gsr 4505 4506 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 4507 btst FPRS_FEF, %o3 4508 bz %icc, 4f 4509 nop 4510 4511 ! restore fpregs from stack 4512 membar #Sync 4513 add %fp, STACK_BIAS - 257, %o2 4514 and %o2, -64, %o2 4515 ldda [%o2]ASI_BLK_P, %d0 4516 add %o2, 64, %o2 4517 ldda [%o2]ASI_BLK_P, %d16 4518 add %o2, 64, %o2 4519 ldda [%o2]ASI_BLK_P, %d32 4520 add %o2, 64, %o2 4521 ldda [%o2]ASI_BLK_P, %d48 4522 membar #Sync 4523 4524 ba,pt %ncc, 1f 4525 wr %o3, 0, %fprs ! restore fprs 4526 45274: 4528 FZERO ! zero all of the fpregs 4529 wr %o3, 0, %fprs ! restore fprs 4530 45311: 4532 andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 4533 membar #Sync ! sync error barrier 4534 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 4535 ret 4536 restore %g0, 0, %o0 4537.copyin_err: 4538 ldn [THREAD_REG + T_COPYOPS], %o4 4539 brz %o4, 2f 4540 nop 4541 ldn [%o4 + CP_COPYIN], %g2 4542 jmp %g2 4543 nop 45442: 4545 retl 4546 mov -1, %o0 4547 SET_SIZE(copyin) 4548 4549 ENTRY(xcopyin) 4550 sethi %hi(.xcopyin_err), REAL_LOFAULT 4551 b .do_copyin 4552 or REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT 4553.xcopyin_err: 4554 ldn [THREAD_REG + T_COPYOPS], %o4 4555 brz %o4, 2f 4556 nop 4557 ldn [%o4 + CP_XCOPYIN], %g2 4558 jmp %g2 4559 nop 45602: 4561 retl 4562 mov %g1, %o0 4563 SET_SIZE(xcopyin) 4564 4565 ENTRY(xcopyin_little) 4566 sethi %hi(.little_err), %o4 4567 ldn [THREAD_REG + T_LOFAULT], %o5 4568 or %o4, %lo(.little_err), %o4 4569 membar #Sync ! sync error barrier 4570 stn %o4, [THREAD_REG + T_LOFAULT] 4571 4572 subcc %g0, %o2, %o3 4573 add %o0, %o2, %o0 4574 bz,pn %ncc, 2f ! check for zero bytes 4575 sub %o2, 1, %o4 4576 add %o0, %o4, %o0 ! start w/last byte 4577 add %o1, %o2, %o1 4578 lduba [%o0+%o3]ASI_AIUSL, %o4 4579 45801: stb %o4, [%o1+%o3] 4581 inccc %o3 4582 sub %o0, 2, %o0 ! get next byte 4583 bcc,a,pt %ncc, 1b 4584 lduba [%o0+%o3]ASI_AIUSL, %o4 4585 45862: membar #Sync ! sync error barrier 4587 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 4588 retl 4589 mov %g0, %o0 ! return (0) 4590 4591.little_err: 4592 membar #Sync ! sync error barrier 4593 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 4594 retl 4595 mov %g1, %o0 4596 SET_SIZE(xcopyin_little) 4597 4598 4599/* 4600 * Copy a block of storage - must not overlap (from + len <= to). 4601 * No fault handler installed (to be called under on_fault()) 4602 */ 4603 4604 ENTRY(copyin_noerr) 4605 sethi %hi(.copyio_noerr), REAL_LOFAULT 4606 b .do_copyin 4607 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 4608.copyio_noerr: 4609 jmp SAVED_LOFAULT 4610 nop 4611 SET_SIZE(copyin_noerr) 4612 4613/* 4614 * Copy a block of storage - must not overlap (from + len <= to). 4615 * No fault handler installed (to be called under on_fault()) 4616 */ 4617 4618 ENTRY(copyout_noerr) 4619 sethi %hi(.copyio_noerr), REAL_LOFAULT 4620 b .do_copyout 4621 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 4622 SET_SIZE(copyout_noerr) 4623 4624 .align 4 4625 DGDEF(use_hw_bcopy) 4626 .word 1 4627 DGDEF(use_hw_copyio) 4628 .word 1 4629 DGDEF(use_hw_bzero) 4630 .word 1 4631 DGDEF(hw_copy_limit_1) 4632 .word 0 4633 DGDEF(hw_copy_limit_2) 4634 .word 0 4635 DGDEF(hw_copy_limit_4) 4636 .word 0 4637 DGDEF(hw_copy_limit_8) 4638 .word 0 4639 4640 .align 64 4641 .section ".text" 4642 4643 4644/* 4645 * hwblkclr - clears block-aligned, block-multiple-sized regions that are 4646 * longer than 256 bytes in length using spitfire's block stores. If 4647 * the criteria for using this routine are not met then it calls bzero 4648 * and returns 1. Otherwise 0 is returned indicating success. 4649 * Caller is responsible for ensuring use_hw_bzero is true and that 4650 * kpreempt_disable() has been called. 4651 */ 4652 ! %i0 - start address 4653 ! %i1 - length of region (multiple of 64) 4654 ! %l0 - saved fprs 4655 ! %l1 - pointer to saved %d0 block 4656 ! %l2 - saved curthread->t_lwp 4657 4658 ENTRY(hwblkclr) 4659 ! get another window w/space for one aligned block of saved fpregs 4660 save %sp, -SA(MINFRAME + 2*64), %sp 4661 4662 ! Must be block-aligned 4663 andcc %i0, (64-1), %g0 4664 bnz,pn %ncc, 1f 4665 nop 4666 4667 ! ... and must be 256 bytes or more 4668 cmp %i1, 256 4669 blu,pn %ncc, 1f 4670 nop 4671 4672 ! ... and length must be a multiple of 64 4673 andcc %i1, (64-1), %g0 4674 bz,pn %ncc, 2f 4675 nop 4676 46771: ! punt, call bzero but notify the caller that bzero was used 4678 mov %i0, %o0 4679 call bzero 4680 mov %i1, %o1 4681 ret 4682 restore %g0, 1, %o0 ! return (1) - did not use block operations 4683 46842: rd %fprs, %l0 ! check for unused fp 4685 btst FPRS_FEF, %l0 4686 bz 1f 4687 nop 4688 4689 ! save in-use fpregs on stack 4690 membar #Sync 4691 add %fp, STACK_BIAS - 65, %l1 4692 and %l1, -64, %l1 4693 stda %d0, [%l1]ASI_BLK_P 4694 46951: membar #StoreStore|#StoreLoad|#LoadStore 4696 wr %g0, FPRS_FEF, %fprs 4697 wr %g0, ASI_BLK_P, %asi 4698 4699 ! Clear block 4700 fzero %d0 4701 fzero %d2 4702 fzero %d4 4703 fzero %d6 4704 fzero %d8 4705 fzero %d10 4706 fzero %d12 4707 fzero %d14 4708 4709 mov 256, %i3 4710 ba .pz_doblock 4711 nop 4712 4713.pz_blkstart: 4714 ! stda %d0, [%i0+192]%asi ! in dly slot of branch that got us here 4715 stda %d0, [%i0+128]%asi 4716 stda %d0, [%i0+64]%asi 4717 stda %d0, [%i0]%asi 4718.pz_zinst: 4719 add %i0, %i3, %i0 4720 sub %i1, %i3, %i1 4721.pz_doblock: 4722 cmp %i1, 256 4723 bgeu,a %ncc, .pz_blkstart 4724 stda %d0, [%i0+192]%asi 4725 4726 cmp %i1, 64 4727 blu %ncc, .pz_finish 4728 4729 andn %i1, (64-1), %i3 4730 srl %i3, 4, %i2 ! using blocks, 1 instr / 16 words 4731 set .pz_zinst, %i4 4732 sub %i4, %i2, %i4 4733 jmp %i4 4734 nop 4735 4736.pz_finish: 4737 membar #Sync 4738 btst FPRS_FEF, %l0 4739 bz,a .pz_finished 4740 wr %l0, 0, %fprs ! restore fprs 4741 4742 ! restore fpregs from stack 4743 ldda [%l1]ASI_BLK_P, %d0 4744 membar #Sync 4745 wr %l0, 0, %fprs ! restore fprs 4746 4747.pz_finished: 4748 ret 4749 restore %g0, 0, %o0 ! return (bzero or not) 4750 SET_SIZE(hwblkclr) 4751 4752 /* 4753 * Copy 32 bytes of data from src (%o0) to dst (%o1) 4754 * using physical addresses. 4755 */ 4756 ENTRY_NP(hw_pa_bcopy32) 4757 rdpr %pstate, %g1 4758 andn %g1, PSTATE_IE, %g2 4759 wrpr %g0, %g2, %pstate 4760 4761 ldxa [%o0]ASI_MEM, %o2 4762 add %o0, 8, %o0 4763 ldxa [%o0]ASI_MEM, %o3 4764 add %o0, 8, %o0 4765 ldxa [%o0]ASI_MEM, %o4 4766 add %o0, 8, %o0 4767 ldxa [%o0]ASI_MEM, %o5 4768 stxa %o2, [%o1]ASI_MEM 4769 add %o1, 8, %o1 4770 stxa %o3, [%o1]ASI_MEM 4771 add %o1, 8, %o1 4772 stxa %o4, [%o1]ASI_MEM 4773 add %o1, 8, %o1 4774 stxa %o5, [%o1]ASI_MEM 4775 4776 membar #Sync 4777 retl 4778 wrpr %g0, %g1, %pstate 4779 SET_SIZE(hw_pa_bcopy32) 4780