1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22/* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27#pragma ident "%Z%%M% %I% %E% SMI" 28 29#include <sys/param.h> 30#include <sys/errno.h> 31#include <sys/asm_linkage.h> 32#include <sys/vtrace.h> 33#include <sys/machthread.h> 34#include <sys/clock.h> 35#include <sys/asi.h> 36#include <sys/fsr.h> 37#include <sys/privregs.h> 38 39#if !defined(lint) 40#include "assym.h" 41#endif /* lint */ 42 43 44/* 45 * Pseudo-code to aid in understanding the control flow of the 46 * bcopy routine. 47 * 48 * On entry to bcopy: 49 * 50 * %l6 = curthread->t_lofault; 51 * used_block_copy = FALSE; ! %l6 |= 1 52 * if (%l6 != NULL) { 53 * curthread->t_lofault = .copyerr; 54 * caller_error_handler = TRUE ! %l6 |= 2 55 * } 56 * 57 * if (length < VIS_COPY) 58 * goto regular_copy; 59 * 60 * if (!use_vis) 61 * goto_regular_copy; 62 * 63 * if (curthread->t_lwp == NULL) { 64 * ! Kernel threads do not have pcb's in which to store 65 * ! the floating point state, disallow preemption during 66 * ! the copy. 67 * kpreempt_disable(curthread); 68 * } 69 * 70 * old_fprs = %fprs; 71 * old_gsr = %gsr; 72 * if (%fprs.fef) { 73 * ! If we need to save 4 blocks of fpregs then make sure 74 * ! the length is still appropriate for that extra overhead. 75 * if (length < (large_length + (64 * 4))) { 76 * if (curthread->t_lwp == NULL) 77 * kpreempt_enable(curthread); 78 * goto regular_copy; 79 * } 80 * %fprs.fef = 1; 81 * save current fpregs on stack using blockstore 82 * } else { 83 * %fprs.fef = 1; 84 * } 85 * 86 * used_block_copy = 1; ! %l6 |= 1 87 * do_blockcopy_here; 88 * 89 * In lofault handler: 90 * curthread->t_lofault = .copyerr2; 91 * Continue on with the normal exit handler 92 * 93 * On exit: 94 * call_kpreempt = 0; 95 * if (used_block_copy) { ! %l6 & 1 96 * %gsr = old_gsr; 97 * if (old_fprs & FPRS_FEF) 98 * restore fpregs from stack using blockload 99 * else 100 * zero fpregs 101 * %fprs = old_fprs; 102 * if (curthread->t_lwp == NULL) { 103 * kpreempt_enable(curthread); 104 * call_kpreempt = 1; 105 * } 106 * } 107 * curthread->t_lofault = (%l6 & ~3); 108 * if (call_kpreempt) 109 * kpreempt(%pil); 110 * return (0) 111 * 112 * In second lofault handler (.copyerr2): 113 * We've tried to restore fp state from the stack and failed. To 114 * prevent from returning with a corrupted fp state, we will panic. 115 */ 116 117/* 118 * Notes on preserving existing fp state: 119 * 120 * When a copyOP decides to use fp we may have to preserve existing 121 * floating point state. It is not the caller's state that we need to 122 * preserve - the rest of the kernel does not use fp and, anyway, fp 123 * registers are volatile across a call. Some examples: 124 * 125 * - userland has fp state and is interrupted (device interrupt 126 * or trap) and within the interrupt/trap handling we use 127 * bcopy() 128 * - another (higher level) interrupt or trap handler uses bcopy 129 * while a bcopy from an earlier interrupt is still active 130 * - an asynchronous error trap occurs while fp state exists (in 131 * userland or in kernel copy) and the tl0 component of the handling 132 * uses bcopy 133 * - a user process with fp state incurs a copy-on-write fault and 134 * hwblkpagecopy always uses fp 135 * 136 * We therefore need a per-call place in which to preserve fp state - 137 * using our stack is ideal (and since fp copy cannot be leaf optimized 138 * because of calls it makes, this is no hardship). 139 * 140 * To make sure that floating point state is always saved and restored 141 * correctly, the following "big rules" must be followed when the floating 142 * point registers will be used: 143 * 144 * 1. %l6 always holds the caller's lofault handler. Also in this register, 145 * Bit 1 (FPUSED_FLAG) indicates that the floating point registers are in 146 * use. Bit 2 (BCOPY_FLAG) indicates that the call was to bcopy. 147 * 148 * 2. The FPUSED flag indicates that all FP state has been successfully stored 149 * on the stack. It should not be set until this save has been completed. 150 * 151 * 3. The FPUSED flag should not be cleared on exit until all FP state has 152 * been restored from the stack. If an error occurs while restoring 153 * data from the stack, the error handler can check this flag to see if 154 * a restore is necessary. 155 * 156 * 4. Code run under the new lofault handler must be kept to a minimum. In 157 * particular, any calls to kpreempt() should not be made until after the 158 * lofault handler has been restored. 159 */ 160 161/* 162 * This shadows sys/machsystm.h which can't be included due to the lack of 163 * _ASM guards in include files it references. Change it here, change it there. 164 */ 165#define VIS_COPY_THRESHOLD 900 166 167/* 168 * Less then or equal this number of bytes we will always copy byte-for-byte 169 */ 170#define SMALL_LIMIT 7 171 172/* 173 * Flags set in the lower bits of the t_lofault address: 174 * FPUSED_FLAG: The FP registers were in use and must be restored 175 * BCOPY_FLAG: Set for bcopy calls, cleared for kcopy calls 176 * COPY_FLAGS: Both of the above 177 * 178 * Other flags: 179 * KPREEMPT_FLAG: kpreempt needs to be called 180 */ 181#define FPUSED_FLAG 1 182#define BCOPY_FLAG 2 183#define COPY_FLAGS (FPUSED_FLAG | BCOPY_FLAG) 184#define KPREEMPT_FLAG 4 185 186/* 187 * Size of stack frame in order to accomodate a 64-byte aligned 188 * floating-point register save area and 2 32-bit temp locations. 189 */ 190#define HWCOPYFRAMESIZE ((64 * 5) + (2 * 4)) 191 192#define SAVED_FPREGS_OFFSET (64 * 5) 193#define SAVED_FPRS_OFFSET (SAVED_FPREGS_OFFSET + 4) 194#define SAVED_GSR_OFFSET (SAVED_FPRS_OFFSET + 4) 195 196/* 197 * Common macros used by the various versions of the block copy 198 * routines in this file. 199 */ 200 201#define FZERO \ 202 fzero %f0 ;\ 203 fzero %f2 ;\ 204 faddd %f0, %f2, %f4 ;\ 205 fmuld %f0, %f2, %f6 ;\ 206 faddd %f0, %f2, %f8 ;\ 207 fmuld %f0, %f2, %f10 ;\ 208 faddd %f0, %f2, %f12 ;\ 209 fmuld %f0, %f2, %f14 ;\ 210 faddd %f0, %f2, %f16 ;\ 211 fmuld %f0, %f2, %f18 ;\ 212 faddd %f0, %f2, %f20 ;\ 213 fmuld %f0, %f2, %f22 ;\ 214 faddd %f0, %f2, %f24 ;\ 215 fmuld %f0, %f2, %f26 ;\ 216 faddd %f0, %f2, %f28 ;\ 217 fmuld %f0, %f2, %f30 ;\ 218 faddd %f0, %f2, %f32 ;\ 219 fmuld %f0, %f2, %f34 ;\ 220 faddd %f0, %f2, %f36 ;\ 221 fmuld %f0, %f2, %f38 ;\ 222 faddd %f0, %f2, %f40 ;\ 223 fmuld %f0, %f2, %f42 ;\ 224 faddd %f0, %f2, %f44 ;\ 225 fmuld %f0, %f2, %f46 ;\ 226 faddd %f0, %f2, %f48 ;\ 227 fmuld %f0, %f2, %f50 ;\ 228 faddd %f0, %f2, %f52 ;\ 229 fmuld %f0, %f2, %f54 ;\ 230 faddd %f0, %f2, %f56 ;\ 231 fmuld %f0, %f2, %f58 ;\ 232 faddd %f0, %f2, %f60 ;\ 233 fmuld %f0, %f2, %f62 234 235 236#define FALIGN_D0 \ 237 faligndata %d0, %d2, %d48 ;\ 238 faligndata %d2, %d4, %d50 ;\ 239 faligndata %d4, %d6, %d52 ;\ 240 faligndata %d6, %d8, %d54 ;\ 241 faligndata %d8, %d10, %d56 ;\ 242 faligndata %d10, %d12, %d58 ;\ 243 faligndata %d12, %d14, %d60 ;\ 244 faligndata %d14, %d16, %d62 245 246#define FALIGN_D16 \ 247 faligndata %d16, %d18, %d48 ;\ 248 faligndata %d18, %d20, %d50 ;\ 249 faligndata %d20, %d22, %d52 ;\ 250 faligndata %d22, %d24, %d54 ;\ 251 faligndata %d24, %d26, %d56 ;\ 252 faligndata %d26, %d28, %d58 ;\ 253 faligndata %d28, %d30, %d60 ;\ 254 faligndata %d30, %d32, %d62 255 256#define FALIGN_D32 \ 257 faligndata %d32, %d34, %d48 ;\ 258 faligndata %d34, %d36, %d50 ;\ 259 faligndata %d36, %d38, %d52 ;\ 260 faligndata %d38, %d40, %d54 ;\ 261 faligndata %d40, %d42, %d56 ;\ 262 faligndata %d42, %d44, %d58 ;\ 263 faligndata %d44, %d46, %d60 ;\ 264 faligndata %d46, %d0, %d62 265 266#define FALIGN_D2 \ 267 faligndata %d2, %d4, %d48 ;\ 268 faligndata %d4, %d6, %d50 ;\ 269 faligndata %d6, %d8, %d52 ;\ 270 faligndata %d8, %d10, %d54 ;\ 271 faligndata %d10, %d12, %d56 ;\ 272 faligndata %d12, %d14, %d58 ;\ 273 faligndata %d14, %d16, %d60 ;\ 274 faligndata %d16, %d18, %d62 275 276#define FALIGN_D18 \ 277 faligndata %d18, %d20, %d48 ;\ 278 faligndata %d20, %d22, %d50 ;\ 279 faligndata %d22, %d24, %d52 ;\ 280 faligndata %d24, %d26, %d54 ;\ 281 faligndata %d26, %d28, %d56 ;\ 282 faligndata %d28, %d30, %d58 ;\ 283 faligndata %d30, %d32, %d60 ;\ 284 faligndata %d32, %d34, %d62 285 286#define FALIGN_D34 \ 287 faligndata %d34, %d36, %d48 ;\ 288 faligndata %d36, %d38, %d50 ;\ 289 faligndata %d38, %d40, %d52 ;\ 290 faligndata %d40, %d42, %d54 ;\ 291 faligndata %d42, %d44, %d56 ;\ 292 faligndata %d44, %d46, %d58 ;\ 293 faligndata %d46, %d0, %d60 ;\ 294 faligndata %d0, %d2, %d62 295 296#define FALIGN_D4 \ 297 faligndata %d4, %d6, %d48 ;\ 298 faligndata %d6, %d8, %d50 ;\ 299 faligndata %d8, %d10, %d52 ;\ 300 faligndata %d10, %d12, %d54 ;\ 301 faligndata %d12, %d14, %d56 ;\ 302 faligndata %d14, %d16, %d58 ;\ 303 faligndata %d16, %d18, %d60 ;\ 304 faligndata %d18, %d20, %d62 305 306#define FALIGN_D20 \ 307 faligndata %d20, %d22, %d48 ;\ 308 faligndata %d22, %d24, %d50 ;\ 309 faligndata %d24, %d26, %d52 ;\ 310 faligndata %d26, %d28, %d54 ;\ 311 faligndata %d28, %d30, %d56 ;\ 312 faligndata %d30, %d32, %d58 ;\ 313 faligndata %d32, %d34, %d60 ;\ 314 faligndata %d34, %d36, %d62 315 316#define FALIGN_D36 \ 317 faligndata %d36, %d38, %d48 ;\ 318 faligndata %d38, %d40, %d50 ;\ 319 faligndata %d40, %d42, %d52 ;\ 320 faligndata %d42, %d44, %d54 ;\ 321 faligndata %d44, %d46, %d56 ;\ 322 faligndata %d46, %d0, %d58 ;\ 323 faligndata %d0, %d2, %d60 ;\ 324 faligndata %d2, %d4, %d62 325 326#define FALIGN_D6 \ 327 faligndata %d6, %d8, %d48 ;\ 328 faligndata %d8, %d10, %d50 ;\ 329 faligndata %d10, %d12, %d52 ;\ 330 faligndata %d12, %d14, %d54 ;\ 331 faligndata %d14, %d16, %d56 ;\ 332 faligndata %d16, %d18, %d58 ;\ 333 faligndata %d18, %d20, %d60 ;\ 334 faligndata %d20, %d22, %d62 335 336#define FALIGN_D22 \ 337 faligndata %d22, %d24, %d48 ;\ 338 faligndata %d24, %d26, %d50 ;\ 339 faligndata %d26, %d28, %d52 ;\ 340 faligndata %d28, %d30, %d54 ;\ 341 faligndata %d30, %d32, %d56 ;\ 342 faligndata %d32, %d34, %d58 ;\ 343 faligndata %d34, %d36, %d60 ;\ 344 faligndata %d36, %d38, %d62 345 346#define FALIGN_D38 \ 347 faligndata %d38, %d40, %d48 ;\ 348 faligndata %d40, %d42, %d50 ;\ 349 faligndata %d42, %d44, %d52 ;\ 350 faligndata %d44, %d46, %d54 ;\ 351 faligndata %d46, %d0, %d56 ;\ 352 faligndata %d0, %d2, %d58 ;\ 353 faligndata %d2, %d4, %d60 ;\ 354 faligndata %d4, %d6, %d62 355 356#define FALIGN_D8 \ 357 faligndata %d8, %d10, %d48 ;\ 358 faligndata %d10, %d12, %d50 ;\ 359 faligndata %d12, %d14, %d52 ;\ 360 faligndata %d14, %d16, %d54 ;\ 361 faligndata %d16, %d18, %d56 ;\ 362 faligndata %d18, %d20, %d58 ;\ 363 faligndata %d20, %d22, %d60 ;\ 364 faligndata %d22, %d24, %d62 365 366#define FALIGN_D24 \ 367 faligndata %d24, %d26, %d48 ;\ 368 faligndata %d26, %d28, %d50 ;\ 369 faligndata %d28, %d30, %d52 ;\ 370 faligndata %d30, %d32, %d54 ;\ 371 faligndata %d32, %d34, %d56 ;\ 372 faligndata %d34, %d36, %d58 ;\ 373 faligndata %d36, %d38, %d60 ;\ 374 faligndata %d38, %d40, %d62 375 376#define FALIGN_D40 \ 377 faligndata %d40, %d42, %d48 ;\ 378 faligndata %d42, %d44, %d50 ;\ 379 faligndata %d44, %d46, %d52 ;\ 380 faligndata %d46, %d0, %d54 ;\ 381 faligndata %d0, %d2, %d56 ;\ 382 faligndata %d2, %d4, %d58 ;\ 383 faligndata %d4, %d6, %d60 ;\ 384 faligndata %d6, %d8, %d62 385 386#define FALIGN_D10 \ 387 faligndata %d10, %d12, %d48 ;\ 388 faligndata %d12, %d14, %d50 ;\ 389 faligndata %d14, %d16, %d52 ;\ 390 faligndata %d16, %d18, %d54 ;\ 391 faligndata %d18, %d20, %d56 ;\ 392 faligndata %d20, %d22, %d58 ;\ 393 faligndata %d22, %d24, %d60 ;\ 394 faligndata %d24, %d26, %d62 395 396#define FALIGN_D26 \ 397 faligndata %d26, %d28, %d48 ;\ 398 faligndata %d28, %d30, %d50 ;\ 399 faligndata %d30, %d32, %d52 ;\ 400 faligndata %d32, %d34, %d54 ;\ 401 faligndata %d34, %d36, %d56 ;\ 402 faligndata %d36, %d38, %d58 ;\ 403 faligndata %d38, %d40, %d60 ;\ 404 faligndata %d40, %d42, %d62 405 406#define FALIGN_D42 \ 407 faligndata %d42, %d44, %d48 ;\ 408 faligndata %d44, %d46, %d50 ;\ 409 faligndata %d46, %d0, %d52 ;\ 410 faligndata %d0, %d2, %d54 ;\ 411 faligndata %d2, %d4, %d56 ;\ 412 faligndata %d4, %d6, %d58 ;\ 413 faligndata %d6, %d8, %d60 ;\ 414 faligndata %d8, %d10, %d62 415 416#define FALIGN_D12 \ 417 faligndata %d12, %d14, %d48 ;\ 418 faligndata %d14, %d16, %d50 ;\ 419 faligndata %d16, %d18, %d52 ;\ 420 faligndata %d18, %d20, %d54 ;\ 421 faligndata %d20, %d22, %d56 ;\ 422 faligndata %d22, %d24, %d58 ;\ 423 faligndata %d24, %d26, %d60 ;\ 424 faligndata %d26, %d28, %d62 425 426#define FALIGN_D28 \ 427 faligndata %d28, %d30, %d48 ;\ 428 faligndata %d30, %d32, %d50 ;\ 429 faligndata %d32, %d34, %d52 ;\ 430 faligndata %d34, %d36, %d54 ;\ 431 faligndata %d36, %d38, %d56 ;\ 432 faligndata %d38, %d40, %d58 ;\ 433 faligndata %d40, %d42, %d60 ;\ 434 faligndata %d42, %d44, %d62 435 436#define FALIGN_D44 \ 437 faligndata %d44, %d46, %d48 ;\ 438 faligndata %d46, %d0, %d50 ;\ 439 faligndata %d0, %d2, %d52 ;\ 440 faligndata %d2, %d4, %d54 ;\ 441 faligndata %d4, %d6, %d56 ;\ 442 faligndata %d6, %d8, %d58 ;\ 443 faligndata %d8, %d10, %d60 ;\ 444 faligndata %d10, %d12, %d62 445 446#define FALIGN_D14 \ 447 faligndata %d14, %d16, %d48 ;\ 448 faligndata %d16, %d18, %d50 ;\ 449 faligndata %d18, %d20, %d52 ;\ 450 faligndata %d20, %d22, %d54 ;\ 451 faligndata %d22, %d24, %d56 ;\ 452 faligndata %d24, %d26, %d58 ;\ 453 faligndata %d26, %d28, %d60 ;\ 454 faligndata %d28, %d30, %d62 455 456#define FALIGN_D30 \ 457 faligndata %d30, %d32, %d48 ;\ 458 faligndata %d32, %d34, %d50 ;\ 459 faligndata %d34, %d36, %d52 ;\ 460 faligndata %d36, %d38, %d54 ;\ 461 faligndata %d38, %d40, %d56 ;\ 462 faligndata %d40, %d42, %d58 ;\ 463 faligndata %d42, %d44, %d60 ;\ 464 faligndata %d44, %d46, %d62 465 466#define FALIGN_D46 \ 467 faligndata %d46, %d0, %d48 ;\ 468 faligndata %d0, %d2, %d50 ;\ 469 faligndata %d2, %d4, %d52 ;\ 470 faligndata %d4, %d6, %d54 ;\ 471 faligndata %d6, %d8, %d56 ;\ 472 faligndata %d8, %d10, %d58 ;\ 473 faligndata %d10, %d12, %d60 ;\ 474 faligndata %d12, %d14, %d62 475 476 477/* 478 * Copy a block of storage, returning an error code if `from' or 479 * `to' takes a kernel pagefault which cannot be resolved. 480 * Returns errno value on pagefault error, 0 if all ok 481 */ 482 483 484 485#if defined(lint) 486 487/* ARGSUSED */ 488int 489kcopy(const void *from, void *to, size_t count) 490{ return(0); } 491 492#else /* lint */ 493 494 .seg ".text" 495 .align 4 496 497 ENTRY(kcopy) 498 499 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 500 set .copyerr, %l6 ! copyerr is lofault value 501 ldn [THREAD_REG + T_LOFAULT], %l7 ! save existing handler 502 membar #Sync ! sync error barrier (see copy.s) 503 stn %l6, [THREAD_REG + T_LOFAULT] ! set t_lofault 504 ! 505 ! Note that we carefully do *not* flag the setting of 506 ! t_lofault. 507 ! 508 ba,pt %ncc, .do_copy ! common code 509 mov %l7, %l6 510 511/* 512 * We got here because of a fault during kcopy or bcopy if a fault 513 * handler existed when bcopy was called. 514 * Errno value is in %g1. 515 */ 516.copyerr: 517 set .copyerr2, %l1 518 membar #Sync ! sync error barrier 519 stn %l1, [THREAD_REG + T_LOFAULT] ! set t_lofault 520 btst FPUSED_FLAG, %l6 521 bz %icc, 1f 522 and %l6, BCOPY_FLAG, %l1 ! copy flag to %l1 523 524 membar #Sync 525 526 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr 527 wr %o2, 0, %gsr 528 529 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 530 btst FPRS_FEF, %o3 531 bz %icc, 4f 532 nop 533 534 ! restore fpregs from stack 535 membar #Sync 536 add %fp, STACK_BIAS - 257, %o2 537 and %o2, -64, %o2 538 ldda [%o2]ASI_BLK_P, %d0 539 add %o2, 64, %o2 540 ldda [%o2]ASI_BLK_P, %d16 541 add %o2, 64, %o2 542 ldda [%o2]ASI_BLK_P, %d32 543 add %o2, 64, %o2 544 ldda [%o2]ASI_BLK_P, %d48 545 membar #Sync 546 547 ba,pt %ncc, 2f 548 wr %o3, 0, %fprs ! restore fprs 549 5504: 551 FZERO ! zero all of the fpregs 552 wr %o3, 0, %fprs ! restore fprs 553 5542: ldn [THREAD_REG + T_LWP], %o2 555 tst %o2 556 bnz,pt %ncc, 1f 557 nop 558 559 ldsb [THREAD_REG + T_PREEMPT], %l0 560 deccc %l0 561 bnz,pn %ncc, 1f 562 stb %l0, [THREAD_REG + T_PREEMPT] 563 564 ! Check for a kernel preemption request 565 ldn [THREAD_REG + T_CPU], %l0 566 ldub [%l0 + CPU_KPRUNRUN], %l0 567 tst %l0 568 bnz,a,pt %ncc, 1f ! Need to call kpreempt? 569 or %l1, KPREEMPT_FLAG, %l1 ! If so, set the flag 570 571 ! 572 ! Need to cater for the different expectations of kcopy 573 ! and bcopy. kcopy will *always* set a t_lofault handler 574 ! If it fires, we're expected to just return the error code 575 ! and *not* to invoke any existing error handler. As far as 576 ! bcopy is concerned, we only set t_lofault if there was an 577 ! existing lofault handler. In that case we're expected to 578 ! invoke the previously existing handler after restting the 579 ! t_lofault value. 580 ! 5811: 582 andn %l6, COPY_FLAGS, %l6 ! remove flags from lofault address 583 membar #Sync ! sync error barrier 584 stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 585 586 ! call kpreempt if necessary 587 btst KPREEMPT_FLAG, %l1 588 bz,pt %icc, 2f 589 nop 590 call kpreempt 591 rdpr %pil, %o0 ! pass %pil 5922: 593 btst BCOPY_FLAG, %l1 594 bnz,pn %ncc, 3f 595 nop 596 ret 597 restore %g1, 0, %o0 598 5993: 600 ! 601 ! We're here via bcopy. There *must* have been an error handler 602 ! in place otheerwise we would have died a nasty death already. 603 ! 604 jmp %l6 ! goto real handler 605 restore %g0, 0, %o0 ! dispose of copy window 606 607/* 608 * We got here because of a fault in .copyerr. We can't safely restore fp 609 * state, so we panic. 610 */ 611fp_panic_msg: 612 .asciz "Unable to restore fp state after copy operation" 613 614 .align 4 615.copyerr2: 616 set fp_panic_msg, %o0 617 call panic 618 nop 619 SET_SIZE(kcopy) 620#endif /* lint */ 621 622 623/* 624 * Copy a block of storage - must not overlap (from + len <= to). 625 * Registers: l6 - saved t_lofault 626 * 627 * Copy a page of memory. 628 * Assumes double word alignment and a count >= 256. 629 */ 630#if defined(lint) 631 632/* ARGSUSED */ 633void 634bcopy(const void *from, void *to, size_t count) 635{} 636 637#else /* lint */ 638 639 ENTRY(bcopy) 640 641 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 642 ldn [THREAD_REG + T_LOFAULT], %l6 ! save t_lofault 643 tst %l6 644 ! 645 ! We've already captured whether t_lofault was zero on entry. 646 ! We need to mark ourselves as being from bcopy since both 647 ! kcopy and bcopy use the same code path. If BCOPY_FLAG is set 648 ! and the saved lofault was zero, we won't reset lofault on 649 ! returning. 650 ! 651 or %l6, BCOPY_FLAG, %l6 652 bz,pt %ncc, .do_copy 653 sethi %hi(.copyerr), %o2 654 or %o2, %lo(.copyerr), %o2 655 membar #Sync ! sync error barrier 656 stn %o2, [THREAD_REG + T_LOFAULT] ! install new vector 657 658.do_copy: 659 cmp %i2, 12 ! for small counts 660 blu %ncc, .bytecp ! just copy bytes 661 .empty 662 663 cmp %i2, VIS_COPY_THRESHOLD ! for large counts 664 blu,pt %ncc, .bcb_punt 665 .empty 666 667 ! 668 ! Check to see if VIS acceleration is enabled 669 ! 670 sethi %hi(use_hw_bcopy), %o2 671 ld [%o2 + %lo(use_hw_bcopy)], %o2 672 tst %o2 673 bz,pn %icc, .bcb_punt 674 nop 675 676 subcc %i1, %i0, %i3 677 bneg,a,pn %ncc, 1f 678 neg %i3 6791: 680 /* 681 * Compare against 256 since we should be checking block addresses 682 * and (dest & ~63) - (src & ~63) can be 3 blocks even if 683 * src = dest + (64 * 3) + 63. 684 */ 685 cmp %i3, 256 686 blu,pn %ncc, .bcb_punt 687 nop 688 689 ldn [THREAD_REG + T_LWP], %o3 690 tst %o3 691 bnz,pt %ncc, 1f 692 nop 693 694 ! kpreempt_disable(); 695 ldsb [THREAD_REG + T_PREEMPT], %o2 696 inc %o2 697 stb %o2, [THREAD_REG + T_PREEMPT] 698 6991: 700 rd %fprs, %o2 ! check for unused fp 701 st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs 702 btst FPRS_FEF, %o2 703 bz,a %icc, .do_blockcopy 704 wr %g0, FPRS_FEF, %fprs 705 706.bcb_fpregs_inuse: 707 cmp %i2, VIS_COPY_THRESHOLD+(64*4) ! for large counts (larger 708 bgeu %ncc, 1f ! if we have to save the fpregs) 709 nop 710 711 tst %o3 712 bnz,pt %ncc, .bcb_punt 713 nop 714 715 ldsb [THREAD_REG + T_PREEMPT], %l0 716 deccc %l0 717 bnz,pn %icc, .bcb_punt 718 stb %l0, [THREAD_REG + T_PREEMPT] 719 720 ! Check for a kernel preemption request 721 ldn [THREAD_REG + T_CPU], %l0 722 ldub [%l0 + CPU_KPRUNRUN], %l0 723 tst %l0 724 bz,pt %icc, .bcb_punt 725 nop 726 727 ! Attempt to preempt 728 call kpreempt 729 rdpr %pil, %o0 ! pass %pil 730 731 ba,pt %ncc, .bcb_punt 732 nop 733 7341: 735 wr %g0, FPRS_FEF, %fprs 736 737 ! save in-use fpregs on stack 738 membar #Sync 739 add %fp, STACK_BIAS - 257, %o2 740 and %o2, -64, %o2 741 stda %d0, [%o2]ASI_BLK_P 742 add %o2, 64, %o2 743 stda %d16, [%o2]ASI_BLK_P 744 add %o2, 64, %o2 745 stda %d32, [%o2]ASI_BLK_P 746 add %o2, 64, %o2 747 stda %d48, [%o2]ASI_BLK_P 748 membar #Sync 749 750.do_blockcopy: 751 membar #StoreStore|#StoreLoad|#LoadStore 752 753 rd %gsr, %o2 754 st %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr 755 756 ! Set the lower bit in the saved t_lofault to indicate 757 ! that we need to clear the %fprs register on the way 758 ! out 759 or %l6, FPUSED_FLAG, %l6 760 761 ! Swap src/dst since the code below is memcpy code 762 ! and memcpy/bcopy have different calling sequences 763 mov %i1, %i5 764 mov %i0, %i1 765 mov %i5, %i0 766 767!!! This code is nearly identical to the version in the sun4u 768!!! libc_psr. Most bugfixes made to that file should be 769!!! merged into this routine. 770 771 andcc %i0, 7, %o3 772 bz,pt %ncc, blkcpy 773 sub %o3, 8, %o3 774 neg %o3 775 sub %i2, %o3, %i2 776 777 ! Align Destination on double-word boundary 778 7792: ldub [%i1], %o4 780 inc %i1 781 inc %i0 782 deccc %o3 783 bgu %ncc, 2b 784 stb %o4, [%i0 - 1] 785blkcpy: 786 andcc %i0, 63, %i3 787 bz,pn %ncc, blalign ! now block aligned 788 sub %i3, 64, %i3 789 neg %i3 ! bytes till block aligned 790 sub %i2, %i3, %i2 ! update %i2 with new count 791 792 ! Copy %i3 bytes till dst is block (64 byte) aligned. use 793 ! double word copies. 794 795 alignaddr %i1, %g0, %g1 796 ldd [%g1], %d0 797 add %g1, 8, %g1 7986: 799 ldd [%g1], %d2 800 add %g1, 8, %g1 801 subcc %i3, 8, %i3 802 faligndata %d0, %d2, %d8 803 std %d8, [%i0] 804 add %i1, 8, %i1 805 bz,pn %ncc, blalign 806 add %i0, 8, %i0 807 ldd [%g1], %d0 808 add %g1, 8, %g1 809 subcc %i3, 8, %i3 810 faligndata %d2, %d0, %d8 811 std %d8, [%i0] 812 add %i1, 8, %i1 813 bgu,pn %ncc, 6b 814 add %i0, 8, %i0 815 816blalign: 817 membar #StoreLoad 818 ! %i2 = total length 819 ! %i3 = blocks (length - 64) / 64 820 ! %i4 = doubles remaining (length - blocks) 821 sub %i2, 64, %i3 822 andn %i3, 63, %i3 823 sub %i2, %i3, %i4 824 andn %i4, 7, %i4 825 sub %i4, 16, %i4 826 sub %i2, %i4, %i2 827 sub %i2, %i3, %i2 828 829 andn %i1, 0x3f, %l7 ! blk aligned address 830 alignaddr %i1, %g0, %g0 ! gen %gsr 831 832 srl %i1, 3, %l5 ! bits 3,4,5 are now least sig in %l5 833 andcc %l5, 7, %i5 ! mask everything except bits 1,2 3 834 add %i1, %i4, %i1 835 add %i1, %i3, %i1 836 837 ldda [%l7]ASI_BLK_P, %d0 838 add %l7, 64, %l7 839 ldda [%l7]ASI_BLK_P, %d16 840 add %l7, 64, %l7 841 ldda [%l7]ASI_BLK_P, %d32 842 add %l7, 64, %l7 843 sub %i3, 128, %i3 844 845 ! switch statement to get us to the right 8 byte blk within a 846 ! 64 byte block 847 cmp %i5, 4 848 bgeu,a hlf 849 cmp %i5, 6 850 cmp %i5, 2 851 bgeu,a sqtr 852 nop 853 cmp %i5, 1 854 be,a seg1 855 nop 856 ba,pt %ncc, seg0 857 nop 858sqtr: 859 be,a seg2 860 nop 861 ba,pt %ncc, seg3 862 nop 863 864hlf: 865 bgeu,a fqtr 866 nop 867 cmp %i5, 5 868 be,a seg5 869 nop 870 ba,pt %ncc, seg4 871 nop 872fqtr: 873 be,a seg6 874 nop 875 ba,pt %ncc, seg7 876 nop 877 878 879seg0: 880 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 881 FALIGN_D0 882 ldda [%l7]ASI_BLK_P, %d0 883 stda %d48, [%i0]ASI_BLK_P 884 add %l7, 64, %l7 885 subcc %i3, 64, %i3 886 bz,pn %ncc, 0f 887 add %i0, 64, %i0 888 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 889 FALIGN_D16 890 ldda [%l7]ASI_BLK_P, %d16 891 stda %d48, [%i0]ASI_BLK_P 892 add %l7, 64, %l7 893 subcc %i3, 64, %i3 894 bz,pn %ncc, 1f 895 add %i0, 64, %i0 896 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 897 FALIGN_D32 898 ldda [%l7]ASI_BLK_P, %d32 899 stda %d48, [%i0]ASI_BLK_P 900 add %l7, 64, %l7 901 subcc %i3, 64, %i3 902 bz,pn %ncc, 2f 903 add %i0, 64, %i0 904 ba,a,pt %ncc, seg0 905 9060: 907 FALIGN_D16 908 stda %d48, [%i0]ASI_BLK_P 909 add %i0, 64, %i0 910 membar #Sync 911 FALIGN_D32 912 stda %d48, [%i0]ASI_BLK_P 913 ba,pt %ncc, blkd0 914 add %i0, 64, %i0 915 9161: 917 FALIGN_D32 918 stda %d48, [%i0]ASI_BLK_P 919 add %i0, 64, %i0 920 membar #Sync 921 FALIGN_D0 922 stda %d48, [%i0]ASI_BLK_P 923 ba,pt %ncc, blkd16 924 add %i0, 64, %i0 925 9262: 927 FALIGN_D0 928 stda %d48, [%i0]ASI_BLK_P 929 add %i0, 64, %i0 930 membar #Sync 931 FALIGN_D16 932 stda %d48, [%i0]ASI_BLK_P 933 ba,pt %ncc, blkd32 934 add %i0, 64, %i0 935 936seg1: 937 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 938 FALIGN_D2 939 ldda [%l7]ASI_BLK_P, %d0 940 stda %d48, [%i0]ASI_BLK_P 941 add %l7, 64, %l7 942 subcc %i3, 64, %i3 943 bz,pn %ncc, 0f 944 add %i0, 64, %i0 945 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 946 FALIGN_D18 947 ldda [%l7]ASI_BLK_P, %d16 948 stda %d48, [%i0]ASI_BLK_P 949 add %l7, 64, %l7 950 subcc %i3, 64, %i3 951 bz,pn %ncc, 1f 952 add %i0, 64, %i0 953 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 954 FALIGN_D34 955 ldda [%l7]ASI_BLK_P, %d32 956 stda %d48, [%i0]ASI_BLK_P 957 add %l7, 64, %l7 958 subcc %i3, 64, %i3 959 bz,pn %ncc, 2f 960 add %i0, 64, %i0 961 ba,a,pt %ncc, seg1 9620: 963 FALIGN_D18 964 stda %d48, [%i0]ASI_BLK_P 965 add %i0, 64, %i0 966 membar #Sync 967 FALIGN_D34 968 stda %d48, [%i0]ASI_BLK_P 969 ba,pt %ncc, blkd2 970 add %i0, 64, %i0 971 9721: 973 FALIGN_D34 974 stda %d48, [%i0]ASI_BLK_P 975 add %i0, 64, %i0 976 membar #Sync 977 FALIGN_D2 978 stda %d48, [%i0]ASI_BLK_P 979 ba,pt %ncc, blkd18 980 add %i0, 64, %i0 981 9822: 983 FALIGN_D2 984 stda %d48, [%i0]ASI_BLK_P 985 add %i0, 64, %i0 986 membar #Sync 987 FALIGN_D18 988 stda %d48, [%i0]ASI_BLK_P 989 ba,pt %ncc, blkd34 990 add %i0, 64, %i0 991 992seg2: 993 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 994 FALIGN_D4 995 ldda [%l7]ASI_BLK_P, %d0 996 stda %d48, [%i0]ASI_BLK_P 997 add %l7, 64, %l7 998 subcc %i3, 64, %i3 999 bz,pn %ncc, 0f 1000 add %i0, 64, %i0 1001 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 1002 FALIGN_D20 1003 ldda [%l7]ASI_BLK_P, %d16 1004 stda %d48, [%i0]ASI_BLK_P 1005 add %l7, 64, %l7 1006 subcc %i3, 64, %i3 1007 bz,pn %ncc, 1f 1008 add %i0, 64, %i0 1009 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 1010 FALIGN_D36 1011 ldda [%l7]ASI_BLK_P, %d32 1012 stda %d48, [%i0]ASI_BLK_P 1013 add %l7, 64, %l7 1014 subcc %i3, 64, %i3 1015 bz,pn %ncc, 2f 1016 add %i0, 64, %i0 1017 ba,a,pt %ncc, seg2 1018 10190: 1020 FALIGN_D20 1021 stda %d48, [%i0]ASI_BLK_P 1022 add %i0, 64, %i0 1023 membar #Sync 1024 FALIGN_D36 1025 stda %d48, [%i0]ASI_BLK_P 1026 ba,pt %ncc, blkd4 1027 add %i0, 64, %i0 1028 10291: 1030 FALIGN_D36 1031 stda %d48, [%i0]ASI_BLK_P 1032 add %i0, 64, %i0 1033 membar #Sync 1034 FALIGN_D4 1035 stda %d48, [%i0]ASI_BLK_P 1036 ba,pt %ncc, blkd20 1037 add %i0, 64, %i0 1038 10392: 1040 FALIGN_D4 1041 stda %d48, [%i0]ASI_BLK_P 1042 add %i0, 64, %i0 1043 membar #Sync 1044 FALIGN_D20 1045 stda %d48, [%i0]ASI_BLK_P 1046 ba,pt %ncc, blkd36 1047 add %i0, 64, %i0 1048 1049seg3: 1050 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 1051 FALIGN_D6 1052 ldda [%l7]ASI_BLK_P, %d0 1053 stda %d48, [%i0]ASI_BLK_P 1054 add %l7, 64, %l7 1055 subcc %i3, 64, %i3 1056 bz,pn %ncc, 0f 1057 add %i0, 64, %i0 1058 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 1059 FALIGN_D22 1060 ldda [%l7]ASI_BLK_P, %d16 1061 stda %d48, [%i0]ASI_BLK_P 1062 add %l7, 64, %l7 1063 subcc %i3, 64, %i3 1064 bz,pn %ncc, 1f 1065 add %i0, 64, %i0 1066 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 1067 FALIGN_D38 1068 ldda [%l7]ASI_BLK_P, %d32 1069 stda %d48, [%i0]ASI_BLK_P 1070 add %l7, 64, %l7 1071 subcc %i3, 64, %i3 1072 bz,pn %ncc, 2f 1073 add %i0, 64, %i0 1074 ba,a,pt %ncc, seg3 1075 10760: 1077 FALIGN_D22 1078 stda %d48, [%i0]ASI_BLK_P 1079 add %i0, 64, %i0 1080 membar #Sync 1081 FALIGN_D38 1082 stda %d48, [%i0]ASI_BLK_P 1083 ba,pt %ncc, blkd6 1084 add %i0, 64, %i0 1085 10861: 1087 FALIGN_D38 1088 stda %d48, [%i0]ASI_BLK_P 1089 add %i0, 64, %i0 1090 membar #Sync 1091 FALIGN_D6 1092 stda %d48, [%i0]ASI_BLK_P 1093 ba,pt %ncc, blkd22 1094 add %i0, 64, %i0 1095 10962: 1097 FALIGN_D6 1098 stda %d48, [%i0]ASI_BLK_P 1099 add %i0, 64, %i0 1100 membar #Sync 1101 FALIGN_D22 1102 stda %d48, [%i0]ASI_BLK_P 1103 ba,pt %ncc, blkd38 1104 add %i0, 64, %i0 1105 1106seg4: 1107 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 1108 FALIGN_D8 1109 ldda [%l7]ASI_BLK_P, %d0 1110 stda %d48, [%i0]ASI_BLK_P 1111 add %l7, 64, %l7 1112 subcc %i3, 64, %i3 1113 bz,pn %ncc, 0f 1114 add %i0, 64, %i0 1115 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 1116 FALIGN_D24 1117 ldda [%l7]ASI_BLK_P, %d16 1118 stda %d48, [%i0]ASI_BLK_P 1119 add %l7, 64, %l7 1120 subcc %i3, 64, %i3 1121 bz,pn %ncc, 1f 1122 add %i0, 64, %i0 1123 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 1124 FALIGN_D40 1125 ldda [%l7]ASI_BLK_P, %d32 1126 stda %d48, [%i0]ASI_BLK_P 1127 add %l7, 64, %l7 1128 subcc %i3, 64, %i3 1129 bz,pn %ncc, 2f 1130 add %i0, 64, %i0 1131 ba,a,pt %ncc, seg4 1132 11330: 1134 FALIGN_D24 1135 stda %d48, [%i0]ASI_BLK_P 1136 add %i0, 64, %i0 1137 membar #Sync 1138 FALIGN_D40 1139 stda %d48, [%i0]ASI_BLK_P 1140 ba,pt %ncc, blkd8 1141 add %i0, 64, %i0 1142 11431: 1144 FALIGN_D40 1145 stda %d48, [%i0]ASI_BLK_P 1146 add %i0, 64, %i0 1147 membar #Sync 1148 FALIGN_D8 1149 stda %d48, [%i0]ASI_BLK_P 1150 ba,pt %ncc, blkd24 1151 add %i0, 64, %i0 1152 11532: 1154 FALIGN_D8 1155 stda %d48, [%i0]ASI_BLK_P 1156 add %i0, 64, %i0 1157 membar #Sync 1158 FALIGN_D24 1159 stda %d48, [%i0]ASI_BLK_P 1160 ba,pt %ncc, blkd40 1161 add %i0, 64, %i0 1162 1163seg5: 1164 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 1165 FALIGN_D10 1166 ldda [%l7]ASI_BLK_P, %d0 1167 stda %d48, [%i0]ASI_BLK_P 1168 add %l7, 64, %l7 1169 subcc %i3, 64, %i3 1170 bz,pn %ncc, 0f 1171 add %i0, 64, %i0 1172 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 1173 FALIGN_D26 1174 ldda [%l7]ASI_BLK_P, %d16 1175 stda %d48, [%i0]ASI_BLK_P 1176 add %l7, 64, %l7 1177 subcc %i3, 64, %i3 1178 bz,pn %ncc, 1f 1179 add %i0, 64, %i0 1180 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 1181 FALIGN_D42 1182 ldda [%l7]ASI_BLK_P, %d32 1183 stda %d48, [%i0]ASI_BLK_P 1184 add %l7, 64, %l7 1185 subcc %i3, 64, %i3 1186 bz,pn %ncc, 2f 1187 add %i0, 64, %i0 1188 ba,a,pt %ncc, seg5 1189 11900: 1191 FALIGN_D26 1192 stda %d48, [%i0]ASI_BLK_P 1193 add %i0, 64, %i0 1194 membar #Sync 1195 FALIGN_D42 1196 stda %d48, [%i0]ASI_BLK_P 1197 ba,pt %ncc, blkd10 1198 add %i0, 64, %i0 1199 12001: 1201 FALIGN_D42 1202 stda %d48, [%i0]ASI_BLK_P 1203 add %i0, 64, %i0 1204 membar #Sync 1205 FALIGN_D10 1206 stda %d48, [%i0]ASI_BLK_P 1207 ba,pt %ncc, blkd26 1208 add %i0, 64, %i0 1209 12102: 1211 FALIGN_D10 1212 stda %d48, [%i0]ASI_BLK_P 1213 add %i0, 64, %i0 1214 membar #Sync 1215 FALIGN_D26 1216 stda %d48, [%i0]ASI_BLK_P 1217 ba,pt %ncc, blkd42 1218 add %i0, 64, %i0 1219 1220seg6: 1221 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 1222 FALIGN_D12 1223 ldda [%l7]ASI_BLK_P, %d0 1224 stda %d48, [%i0]ASI_BLK_P 1225 add %l7, 64, %l7 1226 subcc %i3, 64, %i3 1227 bz,pn %ncc, 0f 1228 add %i0, 64, %i0 1229 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 1230 FALIGN_D28 1231 ldda [%l7]ASI_BLK_P, %d16 1232 stda %d48, [%i0]ASI_BLK_P 1233 add %l7, 64, %l7 1234 subcc %i3, 64, %i3 1235 bz,pn %ncc, 1f 1236 add %i0, 64, %i0 1237 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 1238 FALIGN_D44 1239 ldda [%l7]ASI_BLK_P, %d32 1240 stda %d48, [%i0]ASI_BLK_P 1241 add %l7, 64, %l7 1242 subcc %i3, 64, %i3 1243 bz,pn %ncc, 2f 1244 add %i0, 64, %i0 1245 ba,a,pt %ncc, seg6 1246 12470: 1248 FALIGN_D28 1249 stda %d48, [%i0]ASI_BLK_P 1250 add %i0, 64, %i0 1251 membar #Sync 1252 FALIGN_D44 1253 stda %d48, [%i0]ASI_BLK_P 1254 ba,pt %ncc, blkd12 1255 add %i0, 64, %i0 1256 12571: 1258 FALIGN_D44 1259 stda %d48, [%i0]ASI_BLK_P 1260 add %i0, 64, %i0 1261 membar #Sync 1262 FALIGN_D12 1263 stda %d48, [%i0]ASI_BLK_P 1264 ba,pt %ncc, blkd28 1265 add %i0, 64, %i0 1266 12672: 1268 FALIGN_D12 1269 stda %d48, [%i0]ASI_BLK_P 1270 add %i0, 64, %i0 1271 membar #Sync 1272 FALIGN_D28 1273 stda %d48, [%i0]ASI_BLK_P 1274 ba,pt %ncc, blkd44 1275 add %i0, 64, %i0 1276 1277seg7: 1278 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 1279 FALIGN_D14 1280 ldda [%l7]ASI_BLK_P, %d0 1281 stda %d48, [%i0]ASI_BLK_P 1282 add %l7, 64, %l7 1283 subcc %i3, 64, %i3 1284 bz,pn %ncc, 0f 1285 add %i0, 64, %i0 1286 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 1287 FALIGN_D30 1288 ldda [%l7]ASI_BLK_P, %d16 1289 stda %d48, [%i0]ASI_BLK_P 1290 add %l7, 64, %l7 1291 subcc %i3, 64, %i3 1292 bz,pn %ncc, 1f 1293 add %i0, 64, %i0 1294 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 1295 FALIGN_D46 1296 ldda [%l7]ASI_BLK_P, %d32 1297 stda %d48, [%i0]ASI_BLK_P 1298 add %l7, 64, %l7 1299 subcc %i3, 64, %i3 1300 bz,pn %ncc, 2f 1301 add %i0, 64, %i0 1302 ba,a,pt %ncc, seg7 1303 13040: 1305 FALIGN_D30 1306 stda %d48, [%i0]ASI_BLK_P 1307 add %i0, 64, %i0 1308 membar #Sync 1309 FALIGN_D46 1310 stda %d48, [%i0]ASI_BLK_P 1311 ba,pt %ncc, blkd14 1312 add %i0, 64, %i0 1313 13141: 1315 FALIGN_D46 1316 stda %d48, [%i0]ASI_BLK_P 1317 add %i0, 64, %i0 1318 membar #Sync 1319 FALIGN_D14 1320 stda %d48, [%i0]ASI_BLK_P 1321 ba,pt %ncc, blkd30 1322 add %i0, 64, %i0 1323 13242: 1325 FALIGN_D14 1326 stda %d48, [%i0]ASI_BLK_P 1327 add %i0, 64, %i0 1328 membar #Sync 1329 FALIGN_D30 1330 stda %d48, [%i0]ASI_BLK_P 1331 ba,pt %ncc, blkd46 1332 add %i0, 64, %i0 1333 1334 1335 ! 1336 ! dribble out the last partial block 1337 ! 1338blkd0: 1339 subcc %i4, 8, %i4 1340 blu,pn %ncc, blkdone 1341 faligndata %d0, %d2, %d48 1342 std %d48, [%i0] 1343 add %i0, 8, %i0 1344blkd2: 1345 subcc %i4, 8, %i4 1346 blu,pn %ncc, blkdone 1347 faligndata %d2, %d4, %d48 1348 std %d48, [%i0] 1349 add %i0, 8, %i0 1350blkd4: 1351 subcc %i4, 8, %i4 1352 blu,pn %ncc, blkdone 1353 faligndata %d4, %d6, %d48 1354 std %d48, [%i0] 1355 add %i0, 8, %i0 1356blkd6: 1357 subcc %i4, 8, %i4 1358 blu,pn %ncc, blkdone 1359 faligndata %d6, %d8, %d48 1360 std %d48, [%i0] 1361 add %i0, 8, %i0 1362blkd8: 1363 subcc %i4, 8, %i4 1364 blu,pn %ncc, blkdone 1365 faligndata %d8, %d10, %d48 1366 std %d48, [%i0] 1367 add %i0, 8, %i0 1368blkd10: 1369 subcc %i4, 8, %i4 1370 blu,pn %ncc, blkdone 1371 faligndata %d10, %d12, %d48 1372 std %d48, [%i0] 1373 add %i0, 8, %i0 1374blkd12: 1375 subcc %i4, 8, %i4 1376 blu,pn %ncc, blkdone 1377 faligndata %d12, %d14, %d48 1378 std %d48, [%i0] 1379 add %i0, 8, %i0 1380blkd14: 1381 subcc %i4, 8, %i4 1382 blu,pn %ncc, blkdone 1383 fsrc1 %d14, %d0 1384 ba,a,pt %ncc, blkleft 1385 1386blkd16: 1387 subcc %i4, 8, %i4 1388 blu,pn %ncc, blkdone 1389 faligndata %d16, %d18, %d48 1390 std %d48, [%i0] 1391 add %i0, 8, %i0 1392blkd18: 1393 subcc %i4, 8, %i4 1394 blu,pn %ncc, blkdone 1395 faligndata %d18, %d20, %d48 1396 std %d48, [%i0] 1397 add %i0, 8, %i0 1398blkd20: 1399 subcc %i4, 8, %i4 1400 blu,pn %ncc, blkdone 1401 faligndata %d20, %d22, %d48 1402 std %d48, [%i0] 1403 add %i0, 8, %i0 1404blkd22: 1405 subcc %i4, 8, %i4 1406 blu,pn %ncc, blkdone 1407 faligndata %d22, %d24, %d48 1408 std %d48, [%i0] 1409 add %i0, 8, %i0 1410blkd24: 1411 subcc %i4, 8, %i4 1412 blu,pn %ncc, blkdone 1413 faligndata %d24, %d26, %d48 1414 std %d48, [%i0] 1415 add %i0, 8, %i0 1416blkd26: 1417 subcc %i4, 8, %i4 1418 blu,pn %ncc, blkdone 1419 faligndata %d26, %d28, %d48 1420 std %d48, [%i0] 1421 add %i0, 8, %i0 1422blkd28: 1423 subcc %i4, 8, %i4 1424 blu,pn %ncc, blkdone 1425 faligndata %d28, %d30, %d48 1426 std %d48, [%i0] 1427 add %i0, 8, %i0 1428blkd30: 1429 subcc %i4, 8, %i4 1430 blu,pn %ncc, blkdone 1431 fsrc1 %d30, %d0 1432 ba,a,pt %ncc, blkleft 1433blkd32: 1434 subcc %i4, 8, %i4 1435 blu,pn %ncc, blkdone 1436 faligndata %d32, %d34, %d48 1437 std %d48, [%i0] 1438 add %i0, 8, %i0 1439blkd34: 1440 subcc %i4, 8, %i4 1441 blu,pn %ncc, blkdone 1442 faligndata %d34, %d36, %d48 1443 std %d48, [%i0] 1444 add %i0, 8, %i0 1445blkd36: 1446 subcc %i4, 8, %i4 1447 blu,pn %ncc, blkdone 1448 faligndata %d36, %d38, %d48 1449 std %d48, [%i0] 1450 add %i0, 8, %i0 1451blkd38: 1452 subcc %i4, 8, %i4 1453 blu,pn %ncc, blkdone 1454 faligndata %d38, %d40, %d48 1455 std %d48, [%i0] 1456 add %i0, 8, %i0 1457blkd40: 1458 subcc %i4, 8, %i4 1459 blu,pn %ncc, blkdone 1460 faligndata %d40, %d42, %d48 1461 std %d48, [%i0] 1462 add %i0, 8, %i0 1463blkd42: 1464 subcc %i4, 8, %i4 1465 blu,pn %ncc, blkdone 1466 faligndata %d42, %d44, %d48 1467 std %d48, [%i0] 1468 add %i0, 8, %i0 1469blkd44: 1470 subcc %i4, 8, %i4 1471 blu,pn %ncc, blkdone 1472 faligndata %d44, %d46, %d48 1473 std %d48, [%i0] 1474 add %i0, 8, %i0 1475blkd46: 1476 subcc %i4, 8, %i4 1477 blu,pn %ncc, blkdone 1478 fsrc1 %d46, %d0 1479 1480blkleft: 14811: 1482 ldd [%l7], %d2 1483 add %l7, 8, %l7 1484 subcc %i4, 8, %i4 1485 faligndata %d0, %d2, %d8 1486 std %d8, [%i0] 1487 blu,pn %ncc, blkdone 1488 add %i0, 8, %i0 1489 ldd [%l7], %d0 1490 add %l7, 8, %l7 1491 subcc %i4, 8, %i4 1492 faligndata %d2, %d0, %d8 1493 std %d8, [%i0] 1494 bgeu,pt %ncc, 1b 1495 add %i0, 8, %i0 1496 1497blkdone: 1498 tst %i2 1499 bz,pt %ncc, .bcb_exit 1500 and %l3, 0x4, %l3 ! fprs.du = fprs.dl = 0 1501 15027: ldub [%i1], %i4 1503 inc %i1 1504 inc %i0 1505 deccc %i2 1506 bgu,pt %ncc, 7b 1507 stb %i4, [%i0 - 1] 1508 1509.bcb_exit: 1510 membar #StoreLoad|#StoreStore 1511 btst FPUSED_FLAG, %l6 1512 bz %icc, 1f 1513 and %l6, COPY_FLAGS, %l1 ! Store flags in %l1 1514 ! We can't clear the flags from %l6 yet. 1515 ! If there's an error, .copyerr will 1516 ! need them 1517 1518 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr 1519 wr %o2, 0, %gsr 1520 1521 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 1522 btst FPRS_FEF, %o3 1523 bz %icc, 4f 1524 nop 1525 1526 ! restore fpregs from stack 1527 membar #Sync 1528 add %fp, STACK_BIAS - 257, %o2 1529 and %o2, -64, %o2 1530 ldda [%o2]ASI_BLK_P, %d0 1531 add %o2, 64, %o2 1532 ldda [%o2]ASI_BLK_P, %d16 1533 add %o2, 64, %o2 1534 ldda [%o2]ASI_BLK_P, %d32 1535 add %o2, 64, %o2 1536 ldda [%o2]ASI_BLK_P, %d48 1537 membar #Sync 1538 1539 ba,pt %ncc, 2f 1540 wr %o3, 0, %fprs ! restore fprs 1541 15424: 1543 FZERO ! zero all of the fpregs 1544 wr %o3, 0, %fprs ! restore fprs 1545 15462: ldn [THREAD_REG + T_LWP], %o2 1547 tst %o2 1548 bnz,pt %ncc, 1f 1549 nop 1550 1551 ldsb [THREAD_REG + T_PREEMPT], %l0 1552 deccc %l0 1553 bnz,pn %ncc, 1f 1554 stb %l0, [THREAD_REG + T_PREEMPT] 1555 1556 ! Check for a kernel preemption request 1557 ldn [THREAD_REG + T_CPU], %l0 1558 ldub [%l0 + CPU_KPRUNRUN], %l0 1559 tst %l0 1560 bnz,a,pt %ncc, 1f ! Need to call kpreempt? 1561 or %l1, KPREEMPT_FLAG, %l1 ! If so, set the flag 1562 15631: 1564 btst BCOPY_FLAG, %l1 1565 bz,pn %icc, 3f 1566 andncc %l6, COPY_FLAGS, %l6 1567 1568 ! 1569 ! Here via bcopy. Check to see if the handler was NULL. 1570 ! If so, just return quietly. Otherwise, reset the 1571 ! handler and go home. 1572 ! 1573 bnz,pn %ncc, 3f 1574 nop 1575 1576 ! 1577 ! Null handler. Check for kpreempt flag, call if necessary, 1578 ! then return. 1579 ! 1580 btst KPREEMPT_FLAG, %l1 1581 bz,pt %icc, 2f 1582 nop 1583 call kpreempt 1584 rdpr %pil, %o0 ! pass %pil 15852: 1586 ret 1587 restore %g0, 0, %o0 1588 1589 ! 1590 ! Here via kcopy or bcopy with a handler.Reset the 1591 ! fault handler. 1592 ! 15933: 1594 membar #Sync 1595 stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1596 1597 ! call kpreempt if necessary 1598 btst KPREEMPT_FLAG, %l1 1599 bz,pt %icc, 4f 1600 nop 1601 call kpreempt 1602 rdpr %pil, %o0 16034: 1604 ret 1605 restore %g0, 0, %o0 1606 1607.bcb_punt: 1608 ! 1609 ! use aligned transfers where possible 1610 ! 1611 xor %i0, %i1, %o4 ! xor from and to address 1612 btst 7, %o4 ! if lower three bits zero 1613 bz %icc, .aldoubcp ! can align on double boundary 1614 .empty ! assembler complaints about label 1615 1616 xor %i0, %i1, %o4 ! xor from and to address 1617 btst 3, %o4 ! if lower two bits zero 1618 bz %icc, .alwordcp ! can align on word boundary 1619 btst 3, %i0 ! delay slot, from address unaligned? 1620 ! 1621 ! use aligned reads and writes where possible 1622 ! this differs from wordcp in that it copes 1623 ! with odd alignment between source and destnation 1624 ! using word reads and writes with the proper shifts 1625 ! in between to align transfers to and from memory 1626 ! i0 - src address, i1 - dest address, i2 - count 1627 ! i3, i4 - tmps for used generating complete word 1628 ! i5 (word to write) 1629 ! l0 size in bits of upper part of source word (US) 1630 ! l1 size in bits of lower part of source word (LS = 32 - US) 1631 ! l2 size in bits of upper part of destination word (UD) 1632 ! l3 size in bits of lower part of destination word (LD = 32 - UD) 1633 ! l4 number of bytes leftover after aligned transfers complete 1634 ! l5 the number 32 1635 ! 1636 mov 32, %l5 ! load an oft-needed constant 1637 bz .align_dst_only 1638 btst 3, %i1 ! is destnation address aligned? 1639 clr %i4 ! clear registers used in either case 1640 bz %icc, .align_src_only 1641 clr %l0 1642 ! 1643 ! both source and destination addresses are unaligned 1644 ! 16451: ! align source 1646 ldub [%i0], %i3 ! read a byte from source address 1647 add %i0, 1, %i0 ! increment source address 1648 or %i4, %i3, %i4 ! or in with previous bytes (if any) 1649 btst 3, %i0 ! is source aligned? 1650 add %l0, 8, %l0 ! increment size of upper source (US) 1651 bnz,a 1b 1652 sll %i4, 8, %i4 ! make room for next byte 1653 1654 sub %l5, %l0, %l1 ! generate shift left count (LS) 1655 sll %i4, %l1, %i4 ! prepare to get rest 1656 ld [%i0], %i3 ! read a word 1657 add %i0, 4, %i0 ! increment source address 1658 srl %i3, %l0, %i5 ! upper src bits into lower dst bits 1659 or %i4, %i5, %i5 ! merge 1660 mov 24, %l3 ! align destination 16611: 1662 srl %i5, %l3, %i4 ! prepare to write a single byte 1663 stb %i4, [%i1] ! write a byte 1664 add %i1, 1, %i1 ! increment destination address 1665 sub %i2, 1, %i2 ! decrement count 1666 btst 3, %i1 ! is destination aligned? 1667 bnz,a 1b 1668 sub %l3, 8, %l3 ! delay slot, decrement shift count (LD) 1669 sub %l5, %l3, %l2 ! generate shift left count (UD) 1670 sll %i5, %l2, %i5 ! move leftover into upper bytes 1671 cmp %l2, %l0 ! cmp # reqd to fill dst w old src left 1672 bgu %ncc, .more_needed ! need more to fill than we have 1673 nop 1674 1675 sll %i3, %l1, %i3 ! clear upper used byte(s) 1676 srl %i3, %l1, %i3 1677 ! get the odd bytes between alignments 1678 sub %l0, %l2, %l0 ! regenerate shift count 1679 sub %l5, %l0, %l1 ! generate new shift left count (LS) 1680 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0 1681 andn %i2, 3, %i2 ! # of aligned bytes that can be moved 1682 srl %i3, %l0, %i4 1683 or %i5, %i4, %i5 1684 st %i5, [%i1] ! write a word 1685 subcc %i2, 4, %i2 ! decrement count 1686 bz %ncc, .unalign_out 1687 add %i1, 4, %i1 ! increment destination address 1688 1689 b 2f 1690 sll %i3, %l1, %i5 ! get leftover into upper bits 1691.more_needed: 1692 sll %i3, %l0, %i3 ! save remaining byte(s) 1693 srl %i3, %l0, %i3 1694 sub %l2, %l0, %l1 ! regenerate shift count 1695 sub %l5, %l1, %l0 ! generate new shift left count 1696 sll %i3, %l1, %i4 ! move to fill empty space 1697 b 3f 1698 or %i5, %i4, %i5 ! merge to complete word 1699 ! 1700 ! the source address is aligned and destination is not 1701 ! 1702.align_dst_only: 1703 ld [%i0], %i4 ! read a word 1704 add %i0, 4, %i0 ! increment source address 1705 mov 24, %l0 ! initial shift alignment count 17061: 1707 srl %i4, %l0, %i3 ! prepare to write a single byte 1708 stb %i3, [%i1] ! write a byte 1709 add %i1, 1, %i1 ! increment destination address 1710 sub %i2, 1, %i2 ! decrement count 1711 btst 3, %i1 ! is destination aligned? 1712 bnz,a 1b 1713 sub %l0, 8, %l0 ! delay slot, decrement shift count 1714.xfer: 1715 sub %l5, %l0, %l1 ! generate shift left count 1716 sll %i4, %l1, %i5 ! get leftover 17173: 1718 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0 1719 andn %i2, 3, %i2 ! # of aligned bytes that can be moved 17202: 1721 ld [%i0], %i3 ! read a source word 1722 add %i0, 4, %i0 ! increment source address 1723 srl %i3, %l0, %i4 ! upper src bits into lower dst bits 1724 or %i5, %i4, %i5 ! merge with upper dest bits (leftover) 1725 st %i5, [%i1] ! write a destination word 1726 subcc %i2, 4, %i2 ! decrement count 1727 bz %ncc, .unalign_out ! check if done 1728 add %i1, 4, %i1 ! increment destination address 1729 b 2b ! loop 1730 sll %i3, %l1, %i5 ! get leftover 1731.unalign_out: 1732 tst %l4 ! any bytes leftover? 1733 bz %ncc, .cpdone 1734 .empty ! allow next instruction in delay slot 17351: 1736 sub %l0, 8, %l0 ! decrement shift 1737 srl %i3, %l0, %i4 ! upper src byte into lower dst byte 1738 stb %i4, [%i1] ! write a byte 1739 subcc %l4, 1, %l4 ! decrement count 1740 bz %ncc, .cpdone ! done? 1741 add %i1, 1, %i1 ! increment destination 1742 tst %l0 ! any more previously read bytes 1743 bnz %ncc, 1b ! we have leftover bytes 1744 mov %l4, %i2 ! delay slot, mv cnt where dbytecp wants 1745 b .dbytecp ! let dbytecp do the rest 1746 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 1747 ! 1748 ! the destination address is aligned and the source is not 1749 ! 1750.align_src_only: 1751 ldub [%i0], %i3 ! read a byte from source address 1752 add %i0, 1, %i0 ! increment source address 1753 or %i4, %i3, %i4 ! or in with previous bytes (if any) 1754 btst 3, %i0 ! is source aligned? 1755 add %l0, 8, %l0 ! increment shift count (US) 1756 bnz,a .align_src_only 1757 sll %i4, 8, %i4 ! make room for next byte 1758 b,a .xfer 1759 ! 1760 ! if from address unaligned for double-word moves, 1761 ! move bytes till it is, if count is < 56 it could take 1762 ! longer to align the thing than to do the transfer 1763 ! in word size chunks right away 1764 ! 1765.aldoubcp: 1766 cmp %i2, 56 ! if count < 56, use wordcp, it takes 1767 blu,a %ncc, .alwordcp ! longer to align doubles than words 1768 mov 3, %o0 ! mask for word alignment 1769 call .alignit ! copy bytes until aligned 1770 mov 7, %o0 ! mask for double alignment 1771 ! 1772 ! source and destination are now double-word aligned 1773 ! i3 has aligned count returned by alignit 1774 ! 1775 and %i2, 7, %i2 ! unaligned leftover count 1776 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 17775: 1778 ldx [%i0+%i1], %o4 ! read from address 1779 stx %o4, [%i1] ! write at destination address 1780 subcc %i3, 8, %i3 ! dec count 1781 bgu %ncc, 5b 1782 add %i1, 8, %i1 ! delay slot, inc to address 1783 cmp %i2, 4 ! see if we can copy a word 1784 blu %ncc, .dbytecp ! if 3 or less bytes use bytecp 1785 .empty 1786 ! 1787 ! for leftover bytes we fall into wordcp, if needed 1788 ! 1789.wordcp: 1790 and %i2, 3, %i2 ! unaligned leftover count 17915: 1792 ld [%i0+%i1], %o4 ! read from address 1793 st %o4, [%i1] ! write at destination address 1794 subcc %i3, 4, %i3 ! dec count 1795 bgu %ncc, 5b 1796 add %i1, 4, %i1 ! delay slot, inc to address 1797 b,a .dbytecp 1798 1799 ! we come here to align copies on word boundaries 1800.alwordcp: 1801 call .alignit ! go word-align it 1802 mov 3, %o0 ! bits that must be zero to be aligned 1803 b .wordcp 1804 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 1805 1806 ! 1807 ! byte copy, works with any alignment 1808 ! 1809.bytecp: 1810 b .dbytecp 1811 sub %i0, %i1, %i0 ! i0 gets difference of src and dst 1812 1813 ! 1814 ! differenced byte copy, works with any alignment 1815 ! assumes dest in %i1 and (source - dest) in %i0 1816 ! 18171: 1818 stb %o4, [%i1] ! write to address 1819 inc %i1 ! inc to address 1820.dbytecp: 1821 deccc %i2 ! dec count 1822 bgeu,a %ncc, 1b ! loop till done 1823 ldub [%i0+%i1], %o4 ! read from address 1824 ! 1825 ! FPUSED_FLAG will not have been set in any path leading to 1826 ! this point. No need to deal with it. 1827 ! 1828.cpdone: 1829 btst BCOPY_FLAG, %l6 1830 bz,pn %icc, 2f 1831 andncc %l6, BCOPY_FLAG, %l6 1832 ! 1833 ! Here via bcopy. Check to see if the handler was NULL. 1834 ! If so, just return quietly. Otherwise, reset the 1835 ! handler and go home. 1836 ! 1837 bnz,pn %ncc, 2f 1838 nop 1839 ! 1840 ! Null handler. 1841 ! 1842 ret 1843 restore %g0, 0, %o0 1844 ! 1845 ! Here via kcopy or bcopy with a handler.Reset the 1846 ! fault handler. 1847 ! 18482: 1849 membar #Sync 1850 stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1851 ret 1852 restore %g0, 0, %o0 ! return (0) 1853 1854/* 1855 * Common code used to align transfers on word and doubleword 1856 * boudaries. Aligns source and destination and returns a count 1857 * of aligned bytes to transfer in %i3 1858 */ 18591: 1860 inc %i0 ! inc from 1861 stb %o4, [%i1] ! write a byte 1862 inc %i1 ! inc to 1863 dec %i2 ! dec count 1864.alignit: 1865 btst %o0, %i0 ! %o0 is bit mask to check for alignment 1866 bnz,a 1b 1867 ldub [%i0], %o4 ! read next byte 1868 1869 retl 1870 andn %i2, %o0, %i3 ! return size of aligned bytes 1871 SET_SIZE(bcopy) 1872 1873#endif /* lint */ 1874 1875/* 1876 * Block copy with possibly overlapped operands. 1877 */ 1878 1879#if defined(lint) 1880 1881/*ARGSUSED*/ 1882void 1883ovbcopy(const void *from, void *to, size_t count) 1884{} 1885 1886#else /* lint */ 1887 1888 ENTRY(ovbcopy) 1889 tst %o2 ! check count 1890 bgu,a %ncc, 1f ! nothing to do or bad arguments 1891 subcc %o0, %o1, %o3 ! difference of from and to address 1892 1893 retl ! return 1894 nop 18951: 1896 bneg,a %ncc, 2f 1897 neg %o3 ! if < 0, make it positive 18982: cmp %o2, %o3 ! cmp size and abs(from - to) 1899 bleu %ncc, bcopy ! if size <= abs(diff): use bcopy, 1900 .empty ! no overlap 1901 cmp %o0, %o1 ! compare from and to addresses 1902 blu %ncc, .ov_bkwd ! if from < to, copy backwards 1903 nop 1904 ! 1905 ! Copy forwards. 1906 ! 1907.ov_fwd: 1908 ldub [%o0], %o3 ! read from address 1909 inc %o0 ! inc from address 1910 stb %o3, [%o1] ! write to address 1911 deccc %o2 ! dec count 1912 bgu %ncc, .ov_fwd ! loop till done 1913 inc %o1 ! inc to address 1914 1915 retl ! return 1916 nop 1917 ! 1918 ! Copy backwards. 1919 ! 1920.ov_bkwd: 1921 deccc %o2 ! dec count 1922 ldub [%o0 + %o2], %o3 ! get byte at end of src 1923 bgu %ncc, .ov_bkwd ! loop till done 1924 stb %o3, [%o1 + %o2] ! delay slot, store at end of dst 1925 1926 retl ! return 1927 nop 1928 SET_SIZE(ovbcopy) 1929 1930#endif /* lint */ 1931 1932/* 1933 * hwblkpagecopy() 1934 * 1935 * Copies exactly one page. This routine assumes the caller (ppcopy) 1936 * has already disabled kernel preemption and has checked 1937 * use_hw_bcopy. 1938 */ 1939#ifdef lint 1940/*ARGSUSED*/ 1941void 1942hwblkpagecopy(const void *src, void *dst) 1943{ } 1944#else /* lint */ 1945 ENTRY(hwblkpagecopy) 1946 ! get another window w/space for three aligned blocks of saved fpregs 1947 save %sp, -SA(MINFRAME + 4*64), %sp 1948 1949 ! %i0 - source address (arg) 1950 ! %i1 - destination address (arg) 1951 ! %i2 - length of region (not arg) 1952 ! %l0 - saved fprs 1953 ! %l1 - pointer to saved fpregs 1954 1955 rd %fprs, %l0 ! check for unused fp 1956 btst FPRS_FEF, %l0 1957 bz 1f 1958 membar #Sync 1959 1960 ! save in-use fpregs on stack 1961 add %fp, STACK_BIAS - 193, %l1 1962 and %l1, -64, %l1 1963 stda %d0, [%l1]ASI_BLK_P 1964 add %l1, 64, %l3 1965 stda %d16, [%l3]ASI_BLK_P 1966 add %l3, 64, %l3 1967 stda %d32, [%l3]ASI_BLK_P 1968 membar #Sync 1969 19701: wr %g0, FPRS_FEF, %fprs 1971 ldda [%i0]ASI_BLK_P, %d0 1972 add %i0, 64, %i0 1973 set PAGESIZE - 64, %i2 1974 19752: ldda [%i0]ASI_BLK_P, %d16 1976 fsrc1 %d0, %d32 1977 fsrc1 %d2, %d34 1978 fsrc1 %d4, %d36 1979 fsrc1 %d6, %d38 1980 fsrc1 %d8, %d40 1981 fsrc1 %d10, %d42 1982 fsrc1 %d12, %d44 1983 fsrc1 %d14, %d46 1984 stda %d32, [%i1]ASI_BLK_P 1985 add %i0, 64, %i0 1986 subcc %i2, 64, %i2 1987 bz,pn %ncc, 3f 1988 add %i1, 64, %i1 1989 ldda [%i0]ASI_BLK_P, %d0 1990 fsrc1 %d16, %d32 1991 fsrc1 %d18, %d34 1992 fsrc1 %d20, %d36 1993 fsrc1 %d22, %d38 1994 fsrc1 %d24, %d40 1995 fsrc1 %d26, %d42 1996 fsrc1 %d28, %d44 1997 fsrc1 %d30, %d46 1998 stda %d32, [%i1]ASI_BLK_P 1999 add %i0, 64, %i0 2000 sub %i2, 64, %i2 2001 ba,pt %ncc, 2b 2002 add %i1, 64, %i1 2003 20043: membar #Sync 2005 btst FPRS_FEF, %l0 2006 bz 4f 2007 stda %d16, [%i1]ASI_BLK_P 2008 2009 ! restore fpregs from stack 2010 membar #Sync 2011 ldda [%l1]ASI_BLK_P, %d0 2012 add %l1, 64, %l3 2013 ldda [%l3]ASI_BLK_P, %d16 2014 add %l3, 64, %l3 2015 ldda [%l3]ASI_BLK_P, %d32 2016 20174: wr %l0, 0, %fprs ! restore fprs 2018 membar #Sync 2019 ret 2020 restore %g0, 0, %o0 2021 SET_SIZE(hwblkpagecopy) 2022#endif /* lint */ 2023 2024 2025/* 2026 * Transfer data to and from user space - 2027 * Note that these routines can cause faults 2028 * It is assumed that the kernel has nothing at 2029 * less than KERNELBASE in the virtual address space. 2030 * 2031 * Note that copyin(9F) and copyout(9F) are part of the 2032 * DDI/DKI which specifies that they return '-1' on "errors." 2033 * 2034 * Sigh. 2035 * 2036 * So there's two extremely similar routines - xcopyin() and xcopyout() 2037 * which return the errno that we've faithfully computed. This 2038 * allows other callers (e.g. uiomove(9F)) to work correctly. 2039 * Given that these are used pretty heavily, we expand the calling 2040 * sequences inline for all flavours (rather than making wrappers). 2041 * 2042 * There are also stub routines for xcopyout_little and xcopyin_little, 2043 * which currently are intended to handle requests of <= 16 bytes from 2044 * do_unaligned. Future enhancement to make them handle 8k pages efficiently 2045 * is left as an exercise... 2046 */ 2047 2048/* 2049 * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr) 2050 * 2051 * General theory of operation: 2052 * 2053 * The only difference between default_copy{in,out} and 2054 * default_xcopy{in,out} is in the error handling routine they invoke 2055 * when a memory access error is seen. default_xcopyOP returns the errno 2056 * while default_copyOP returns -1 (see above). copy{in,out}_noerr set 2057 * a special flag (by oring the value 2 into the fault handler address) 2058 * if they are called with a fault handler already in place. That flag 2059 * causes the default handlers to trampoline to the previous handler 2060 * upon an error. 2061 * 2062 * None of the copyops routines grab a window until it's decided that 2063 * we need to do a HW block copy operation. This saves a window 2064 * spill/fill when we're called during socket ops. The typical IO 2065 * path won't cause spill/fill traps. 2066 * 2067 * This code uses a set of 4 limits for the maximum size that will 2068 * be copied given a particular input/output address alignment. 2069 * the default limits are: 2070 * 2071 * single byte aligned - 900 (hw_copy_limit_1) 2072 * two byte aligned - 1800 (hw_copy_limit_2) 2073 * four byte aligned - 3600 (hw_copy_limit_4) 2074 * eight byte aligned - 7200 (hw_copy_limit_8) 2075 * 2076 * If the value for a particular limit is zero, the copy will be done 2077 * via the copy loops rather than VIS. 2078 * 2079 * Flow: 2080 * 2081 * If count == zero return zero. 2082 * 2083 * Store the previous lo_fault handler into %g6. 2084 * Place our secondary lofault handler into %g5. 2085 * Place the address of our nowindow fault handler into %o3. 2086 * Place the address of the windowed fault handler into %o4. 2087 * --> We'll use this handler if we end up grabbing a window 2088 * --> before we use VIS instructions. 2089 * 2090 * If count is less than or equal to SMALL_LIMIT (7) we 2091 * always do a byte for byte copy. 2092 * 2093 * If count is > SMALL_LIMIT, we check the alignment of the input 2094 * and output pointers. Based on the alignment we check count 2095 * against a soft limit of VIS_COPY_THRESHOLD (900 on spitfire). If 2096 * we're larger than VIS_COPY_THRESHOLD, we check against a limit based 2097 * on detected alignment. If we exceed the alignment value we copy 2098 * via VIS instructions. 2099 * 2100 * If we don't exceed one of the limits, we store -count in %o3, 2101 * we store the number of chunks (8, 4, 2 or 1 byte) operated 2102 * on in our basic copy loop in %o2. Following this we branch 2103 * to the appropriate copy loop and copy that many chunks. 2104 * Since we've been adding the chunk size to %o3 each time through 2105 * as well as decrementing %o2, we can tell if any data is 2106 * is left to be copied by examining %o3. If that is zero, we're 2107 * done and can go home. If not, we figure out what the largest 2108 * chunk size left to be copied is and branch to that copy loop 2109 * unless there's only one byte left. We load that as we're 2110 * branching to code that stores it just before we return. 2111 * 2112 * There is one potential situation in which we start to do a VIS 2113 * copy but decide to punt and return to the copy loops. There is 2114 * (in the default configuration) a window of 256 bytes between 2115 * the single byte aligned copy limit and what VIS treats as its 2116 * minimum if floating point is in use in the calling app. We need 2117 * to be prepared to handle this. See the .small_copyOP label for 2118 * details. 2119 * 2120 * Fault handlers are invoked if we reference memory that has no 2121 * current mapping. All forms share the same copyio_fault handler. 2122 * This routine handles fixing up the stack and general housecleaning. 2123 * Each copy operation has a simple fault handler that is then called 2124 * to do the work specific to the invidual operation. The handlers 2125 * for default_copyOP and copyOP_noerr are found at the end of 2126 * default_copyout. The handlers for default_xcopyOP are found at the 2127 * end of xdefault_copyin. 2128 */ 2129 2130/* 2131 * Copy kernel data to user space (copyout/xcopyout/xcopyout_little). 2132 */ 2133 2134#if defined(lint) 2135 2136/*ARGSUSED*/ 2137int 2138copyout(const void *kaddr, void *uaddr, size_t count) 2139{ return (0); } 2140 2141#else /* lint */ 2142 2143/* 2144 * We save the arguments in the following registers in case of a fault: 2145 * kaddr - %g2 2146 * uaddr - %g3 2147 * count - %g4 2148 */ 2149#define SAVE_SRC %g2 2150#define SAVE_DST %g3 2151#define SAVE_COUNT %g4 2152 2153#define REAL_LOFAULT %g5 2154#define SAVED_LOFAULT %g6 2155 2156/* 2157 * Generic copyio fault handler. This is the first line of defense when a 2158 * fault occurs in (x)copyin/(x)copyout. In order for this to function 2159 * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT. 2160 * This allows us to share common code for all the flavors of the copy 2161 * operations, including the _noerr versions. 2162 * 2163 * Note that this function will restore the original input parameters before 2164 * calling REAL_LOFAULT. So the real handler can vector to the appropriate 2165 * member of the t_copyop structure, if needed. 2166 */ 2167 ENTRY(copyio_fault) 2168 btst FPUSED_FLAG, SAVED_LOFAULT 2169 bz 1f 2170 andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 2171 2172 membar #Sync 2173 2174 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 2175 wr %o2, 0, %gsr ! restore gsr 2176 2177 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 2178 btst FPRS_FEF, %o3 2179 bz 4f 2180 nop 2181 2182 ! restore fpregs from stack 2183 membar #Sync 2184 add %fp, STACK_BIAS - 257, %o2 2185 and %o2, -64, %o2 2186 ldda [%o2]ASI_BLK_P, %d0 2187 add %o2, 64, %o2 2188 ldda [%o2]ASI_BLK_P, %d16 2189 add %o2, 64, %o2 2190 ldda [%o2]ASI_BLK_P, %d32 2191 add %o2, 64, %o2 2192 ldda [%o2]ASI_BLK_P, %d48 2193 membar #Sync 2194 2195 ba,pt %ncc, 1f 2196 wr %o3, 0, %fprs ! restore fprs 2197 21984: 2199 FZERO ! zero all of the fpregs 2200 wr %o3, 0, %fprs ! restore fprs 2201 22021: 2203 2204 restore 2205 2206 mov SAVE_SRC, %o0 2207 mov SAVE_DST, %o1 2208 jmp REAL_LOFAULT 2209 mov SAVE_COUNT, %o2 2210 SET_SIZE(copyio_fault) 2211 2212 ENTRY(copyio_fault_nowindow) 2213 membar #Sync 2214 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2215 2216 mov SAVE_SRC, %o0 2217 mov SAVE_DST, %o1 2218 jmp REAL_LOFAULT 2219 mov SAVE_COUNT, %o2 2220 SET_SIZE(copyio_fault_nowindow) 2221 2222 ENTRY(copyout) 2223 sethi %hi(.copyout_err), REAL_LOFAULT 2224 or REAL_LOFAULT, %lo(.copyout_err), REAL_LOFAULT 2225 2226.do_copyout: 2227 ! 2228 ! Check the length and bail if zero. 2229 ! 2230 tst %o2 2231 bnz,pt %ncc, 1f 2232 nop 2233 retl 2234 clr %o0 22351: 2236 sethi %hi(copyio_fault), %o4 2237 or %o4, %lo(copyio_fault), %o4 2238 sethi %hi(copyio_fault_nowindow), %o3 2239 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT 2240 or %o3, %lo(copyio_fault_nowindow), %o3 2241 membar #Sync 2242 stn %o3, [THREAD_REG + T_LOFAULT] 2243 2244 mov %o0, SAVE_SRC 2245 mov %o1, SAVE_DST 2246 mov %o2, SAVE_COUNT 2247 2248 ! 2249 ! Check to see if we're more than SMALL_LIMIT (7 bytes). 2250 ! Run in leaf mode, using the %o regs as our input regs. 2251 ! 2252 subcc %o2, SMALL_LIMIT, %o3 2253 bgu,a,pt %ncc, .dco_ns 2254 or %o0, %o1, %o3 2255 ! 2256 ! What was previously ".small_copyout" 2257 ! Do full differenced copy. 2258 ! 2259.dcobcp: 2260 sub %g0, %o2, %o3 ! negate count 2261 add %o0, %o2, %o0 ! make %o0 point at the end 2262 add %o1, %o2, %o1 ! make %o1 point at the end 2263 ba,pt %ncc, .dcocl 2264 ldub [%o0 + %o3], %o4 ! load first byte 2265 ! 2266 ! %o0 and %o2 point at the end and remain pointing at the end 2267 ! of their buffers. We pull things out by adding %o3 (which is 2268 ! the negation of the length) to the buffer end which gives us 2269 ! the curent location in the buffers. By incrementing %o3 we walk 2270 ! through both buffers without having to bump each buffer's 2271 ! pointer. A very fast 4 instruction loop. 2272 ! 2273 .align 16 2274.dcocl: 2275 stba %o4, [%o1 + %o3]ASI_USER 2276 inccc %o3 2277 bl,a,pt %ncc, .dcocl 2278 ldub [%o0 + %o3], %o4 2279 ! 2280 ! We're done. Go home. 2281 ! 2282 membar #Sync 2283 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 2284 retl 2285 clr %o0 2286 ! 2287 ! Try aligned copies from here. 2288 ! 2289.dco_ns: 2290 ! %o0 = kernel addr (to be copied from) 2291 ! %o1 = user addr (to be copied to) 2292 ! %o2 = length 2293 ! %o3 = %o1 | %o2 (used for alignment checking) 2294 ! %o4 is alternate lo_fault 2295 ! %o5 is original lo_fault 2296 ! 2297 ! See if we're single byte aligned. If we are, check the 2298 ! limit for single byte copies. If we're smaller or equal, 2299 ! bounce to the byte for byte copy loop. Otherwise do it in 2300 ! HW (if enabled). 2301 ! 2302 btst 1, %o3 2303 bz,pt %icc, .dcoh8 2304 btst 7, %o3 2305 ! 2306 ! Single byte aligned. Do we do it via HW or via 2307 ! byte for byte? Do a quick no memory reference 2308 ! check to pick up small copies. 2309 ! 2310 subcc %o2, VIS_COPY_THRESHOLD, %o3 2311 bleu,pt %ncc, .dcobcp 2312 sethi %hi(hw_copy_limit_1), %o3 2313 ! 2314 ! Big enough that we need to check the HW limit for 2315 ! this size copy. 2316 ! 2317 ld [%o3 + %lo(hw_copy_limit_1)], %o3 2318 ! 2319 ! Is HW copy on? If not, do everything byte for byte. 2320 ! 2321 tst %o3 2322 bz,pn %icc, .dcobcp 2323 subcc %o3, %o2, %o3 2324 ! 2325 ! If we're less than or equal to the single byte copy limit, 2326 ! bop to the copy loop. 2327 ! 2328 bge,pt %ncc, .dcobcp 2329 nop 2330 ! 2331 ! We're big enough and copy is on. Do it with HW. 2332 ! 2333 ba,pt %ncc, .big_copyout 2334 nop 2335.dcoh8: 2336 ! 2337 ! 8 byte aligned? 2338 ! 2339 bnz,a %ncc, .dcoh4 2340 btst 3, %o3 2341 ! 2342 ! See if we're in the "small range". 2343 ! If so, go off and do the copy. 2344 ! If not, load the hard limit. %o3 is 2345 ! available for reuse. 2346 ! 2347 subcc %o2, VIS_COPY_THRESHOLD, %o3 2348 bleu,pt %ncc, .dcos8 2349 sethi %hi(hw_copy_limit_8), %o3 2350 ld [%o3 + %lo(hw_copy_limit_8)], %o3 2351 ! 2352 ! If it's zero, there's no HW bcopy. 2353 ! Bop off to the aligned copy. 2354 ! 2355 tst %o3 2356 bz,pn %icc, .dcos8 2357 subcc %o3, %o2, %o3 2358 ! 2359 ! We're negative if our size is larger than hw_copy_limit_8. 2360 ! 2361 bge,pt %ncc, .dcos8 2362 nop 2363 ! 2364 ! HW assist is on and we're large enough. Do it. 2365 ! 2366 ba,pt %ncc, .big_copyout 2367 nop 2368.dcos8: 2369 ! 2370 ! Housekeeping for copy loops. Uses same idea as in the byte for 2371 ! byte copy loop above. 2372 ! 2373 add %o0, %o2, %o0 2374 add %o1, %o2, %o1 2375 sub %g0, %o2, %o3 2376 ba,pt %ncc, .dodebc 2377 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy 2378 ! 2379 ! 4 byte aligned? 2380 ! 2381.dcoh4: 2382 bnz,pn %ncc, .dcoh2 2383 ! 2384 ! See if we're in the "small range". 2385 ! If so, go off an do the copy. 2386 ! If not, load the hard limit. %o3 is 2387 ! available for reuse. 2388 ! 2389 subcc %o2, VIS_COPY_THRESHOLD, %o3 2390 bleu,pt %ncc, .dcos4 2391 sethi %hi(hw_copy_limit_4), %o3 2392 ld [%o3 + %lo(hw_copy_limit_4)], %o3 2393 ! 2394 ! If it's zero, there's no HW bcopy. 2395 ! Bop off to the aligned copy. 2396 ! 2397 tst %o3 2398 bz,pn %icc, .dcos4 2399 subcc %o3, %o2, %o3 2400 ! 2401 ! We're negative if our size is larger than hw_copy_limit_4. 2402 ! 2403 bge,pt %ncc, .dcos4 2404 nop 2405 ! 2406 ! HW assist is on and we're large enough. Do it. 2407 ! 2408 ba,pt %ncc, .big_copyout 2409 nop 2410.dcos4: 2411 add %o0, %o2, %o0 2412 add %o1, %o2, %o1 2413 sub %g0, %o2, %o3 2414 ba,pt %ncc, .dodfbc 2415 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy 2416 ! 2417 ! We must be 2 byte aligned. Off we go. 2418 ! The check for small copies was done in the 2419 ! delay at .dcoh4 2420 ! 2421.dcoh2: 2422 ble %ncc, .dcos2 2423 sethi %hi(hw_copy_limit_2), %o3 2424 ld [%o3 + %lo(hw_copy_limit_2)], %o3 2425 tst %o3 2426 bz,pn %icc, .dcos2 2427 subcc %o3, %o2, %o3 2428 bge,pt %ncc, .dcos2 2429 nop 2430 ! 2431 ! HW is on and we're big enough. Do it. 2432 ! 2433 ba,pt %ncc, .big_copyout 2434 nop 2435.dcos2: 2436 add %o0, %o2, %o0 2437 add %o1, %o2, %o1 2438 sub %g0, %o2, %o3 2439 ba,pt %ncc, .dodtbc 2440 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy 2441.small_copyout: 2442 ! 2443 ! Why are we doing this AGAIN? There are certain conditions in 2444 ! big_copyout that will cause us to forego the HW assisted copies 2445 ! and bounce back to a non-HW assisted copy. This dispatches those 2446 ! copies. Note that we branch around this in the main line code. 2447 ! 2448 ! We make no check for limits or HW enablement here. We've 2449 ! already been told that we're a poster child so just go off 2450 ! and do it. 2451 ! 2452 or %o0, %o1, %o3 2453 btst 1, %o3 2454 bnz %icc, .dcobcp ! Most likely 2455 btst 7, %o3 2456 bz %icc, .dcos8 2457 btst 3, %o3 2458 bz %icc, .dcos4 2459 nop 2460 ba,pt %ncc, .dcos2 2461 nop 2462 .align 32 2463.dodebc: 2464 ldx [%o0 + %o3], %o4 2465 deccc %o2 2466 stxa %o4, [%o1 + %o3]ASI_USER 2467 bg,pt %ncc, .dodebc 2468 addcc %o3, 8, %o3 2469 ! 2470 ! End of copy loop. Check to see if we're done. Most 2471 ! eight byte aligned copies end here. 2472 ! 2473 bz,pt %ncc, .dcofh 2474 nop 2475 ! 2476 ! Something is left - do it byte for byte. 2477 ! 2478 ba,pt %ncc, .dcocl 2479 ldub [%o0 + %o3], %o4 ! load next byte 2480 ! 2481 ! Four byte copy loop. %o2 is the number of 4 byte chunks to copy. 2482 ! 2483 .align 32 2484.dodfbc: 2485 lduw [%o0 + %o3], %o4 2486 deccc %o2 2487 sta %o4, [%o1 + %o3]ASI_USER 2488 bg,pt %ncc, .dodfbc 2489 addcc %o3, 4, %o3 2490 ! 2491 ! End of copy loop. Check to see if we're done. Most 2492 ! four byte aligned copies end here. 2493 ! 2494 bz,pt %ncc, .dcofh 2495 nop 2496 ! 2497 ! Something is left. Do it byte for byte. 2498 ! 2499 ba,pt %ncc, .dcocl 2500 ldub [%o0 + %o3], %o4 ! load next byte 2501 ! 2502 ! two byte aligned copy loop. %o2 is the number of 2 byte chunks to 2503 ! copy. 2504 ! 2505 .align 32 2506.dodtbc: 2507 lduh [%o0 + %o3], %o4 2508 deccc %o2 2509 stha %o4, [%o1 + %o3]ASI_USER 2510 bg,pt %ncc, .dodtbc 2511 addcc %o3, 2, %o3 2512 ! 2513 ! End of copy loop. Anything left? 2514 ! 2515 bz,pt %ncc, .dcofh 2516 nop 2517 ! 2518 ! Deal with the last byte 2519 ! 2520 ldub [%o0 + %o3], %o4 2521 stba %o4, [%o1 + %o3]ASI_USER 2522.dcofh: 2523 membar #Sync 2524 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2525 retl 2526 clr %o0 2527 2528.big_copyout: 2529 ! 2530 ! Are we using the FP registers? 2531 ! 2532 rd %fprs, %o3 ! check for unused fp 2533 btst FPRS_FEF, %o3 2534 bnz %icc, .copyout_fpregs_inuse 2535 nop 2536 ! 2537 ! We're going to go off and do a block copy. 2538 ! Switch fault hendlers and grab a window. We 2539 ! don't do a membar #Sync since we've done only 2540 ! kernel data to this point. 2541 ! 2542 stn %o4, [THREAD_REG + T_LOFAULT] 2543 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 2544 ! 2545 ! %o3 is now %i3. Save original %fprs. 2546 ! 2547 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] 2548 ba,pt %ncc, .do_block_copyout ! Not in use. Go off and do it. 2549 wr %g0, FPRS_FEF, %fprs ! clear %fprs 2550 ! 2551.copyout_fpregs_inuse: 2552 ! 2553 ! We're here if the FP regs are in use. Need to see if the request 2554 ! exceeds our suddenly larger minimum. 2555 ! 2556 cmp %i2, VIS_COPY_THRESHOLD+(64*4) ! for large counts (larger 2557 bl %ncc, .small_copyout 2558 nop 2559 ! 2560 ! We're going to go off and do a block copy. 2561 ! Change to the heavy duty fault handler and grab a window first. 2562 ! 2563 stn %o4, [THREAD_REG + T_LOFAULT] 2564 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 2565 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] 2566 ! 2567 ! save in-use fpregs on stack 2568 ! 2569 wr %g0, FPRS_FEF, %fprs 2570 membar #Sync 2571 add %fp, STACK_BIAS - 257, %o2 2572 and %o2, -64, %o2 2573 stda %d0, [%o2]ASI_BLK_P 2574 add %o2, 64, %o2 2575 stda %d16, [%o2]ASI_BLK_P 2576 add %o2, 64, %o2 2577 stda %d32, [%o2]ASI_BLK_P 2578 add %o2, 64, %o2 2579 stda %d48, [%o2]ASI_BLK_P 2580 membar #Sync 2581 2582.do_block_copyout: 2583 membar #StoreStore|#StoreLoad|#LoadStore 2584 2585 rd %gsr, %o2 2586 st %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr 2587 2588 ! Set the lower bit in the saved t_lofault to indicate 2589 ! that we need to clear the %fprs register on the way 2590 ! out 2591 or SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 2592 2593 ! Swap src/dst since the code below is memcpy code 2594 ! and memcpy/bcopy have different calling sequences 2595 mov %i1, %i5 2596 mov %i0, %i1 2597 mov %i5, %i0 2598 2599!!! This code is nearly identical to the version in the sun4u 2600!!! libc_psr. Most bugfixes made to that file should be 2601!!! merged into this routine. 2602 2603 andcc %i0, 7, %o3 2604 bz %ncc, copyout_blkcpy 2605 sub %o3, 8, %o3 2606 neg %o3 2607 sub %i2, %o3, %i2 2608 2609 ! Align Destination on double-word boundary 2610 26112: ldub [%i1], %o4 2612 inc %i1 2613 stba %o4, [%i0]ASI_USER 2614 deccc %o3 2615 bgu %ncc, 2b 2616 inc %i0 2617copyout_blkcpy: 2618 andcc %i0, 63, %i3 2619 bz,pn %ncc, copyout_blalign ! now block aligned 2620 sub %i3, 64, %i3 2621 neg %i3 ! bytes till block aligned 2622 sub %i2, %i3, %i2 ! update %i2 with new count 2623 2624 ! Copy %i3 bytes till dst is block (64 byte) aligned. use 2625 ! double word copies. 2626 2627 alignaddr %i1, %g0, %g1 2628 ldd [%g1], %d0 2629 add %g1, 8, %g1 26306: 2631 ldd [%g1], %d2 2632 add %g1, 8, %g1 2633 subcc %i3, 8, %i3 2634 faligndata %d0, %d2, %d8 2635 stda %d8, [%i0]ASI_USER 2636 add %i1, 8, %i1 2637 bz,pn %ncc, copyout_blalign 2638 add %i0, 8, %i0 2639 ldd [%g1], %d0 2640 add %g1, 8, %g1 2641 subcc %i3, 8, %i3 2642 faligndata %d2, %d0, %d8 2643 stda %d8, [%i0]ASI_USER 2644 add %i1, 8, %i1 2645 bgu,pn %ncc, 6b 2646 add %i0, 8, %i0 2647 2648copyout_blalign: 2649 membar #StoreLoad 2650 ! %i2 = total length 2651 ! %i3 = blocks (length - 64) / 64 2652 ! %i4 = doubles remaining (length - blocks) 2653 sub %i2, 64, %i3 2654 andn %i3, 63, %i3 2655 sub %i2, %i3, %i4 2656 andn %i4, 7, %i4 2657 sub %i4, 16, %i4 2658 sub %i2, %i4, %i2 2659 sub %i2, %i3, %i2 2660 2661 andn %i1, 0x3f, %l7 ! blk aligned address 2662 alignaddr %i1, %g0, %g0 ! gen %gsr 2663 2664 srl %i1, 3, %l5 ! bits 3,4,5 are now least sig in %l5 2665 andcc %l5, 7, %i5 ! mask everything except bits 1,2 3 2666 add %i1, %i4, %i1 2667 add %i1, %i3, %i1 2668 2669 ldda [%l7]ASI_BLK_P, %d0 2670 add %l7, 64, %l7 2671 ldda [%l7]ASI_BLK_P, %d16 2672 add %l7, 64, %l7 2673 ldda [%l7]ASI_BLK_P, %d32 2674 add %l7, 64, %l7 2675 sub %i3, 128, %i3 2676 2677 ! switch statement to get us to the right 8 byte blk within a 2678 ! 64 byte block 2679 2680 cmp %i5, 4 2681 bgeu,a copyout_hlf 2682 cmp %i5, 6 2683 cmp %i5, 2 2684 bgeu,a copyout_sqtr 2685 nop 2686 cmp %i5, 1 2687 be,a copyout_seg1 2688 nop 2689 ba,pt %ncc, copyout_seg0 2690 nop 2691copyout_sqtr: 2692 be,a copyout_seg2 2693 nop 2694 ba,pt %ncc, copyout_seg3 2695 nop 2696 2697copyout_hlf: 2698 bgeu,a copyout_fqtr 2699 nop 2700 cmp %i5, 5 2701 be,a copyout_seg5 2702 nop 2703 ba,pt %ncc, copyout_seg4 2704 nop 2705copyout_fqtr: 2706 be,a copyout_seg6 2707 nop 2708 ba,pt %ncc, copyout_seg7 2709 nop 2710 2711copyout_seg0: 2712 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 2713 FALIGN_D0 2714 ldda [%l7]ASI_BLK_P, %d0 2715 stda %d48, [%i0]ASI_BLK_AIUS 2716 add %l7, 64, %l7 2717 subcc %i3, 64, %i3 2718 bz,pn %ncc, 0f 2719 add %i0, 64, %i0 2720 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 2721 FALIGN_D16 2722 ldda [%l7]ASI_BLK_P, %d16 2723 stda %d48, [%i0]ASI_BLK_AIUS 2724 add %l7, 64, %l7 2725 subcc %i3, 64, %i3 2726 bz,pn %ncc, 1f 2727 add %i0, 64, %i0 2728 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 2729 FALIGN_D32 2730 ldda [%l7]ASI_BLK_P, %d32 2731 stda %d48, [%i0]ASI_BLK_AIUS 2732 add %l7, 64, %l7 2733 subcc %i3, 64, %i3 2734 bz,pn %ncc, 2f 2735 add %i0, 64, %i0 2736 ba,a,pt %ncc, copyout_seg0 2737 27380: 2739 FALIGN_D16 2740 stda %d48, [%i0]ASI_BLK_AIUS 2741 add %i0, 64, %i0 2742 membar #Sync 2743 FALIGN_D32 2744 stda %d48, [%i0]ASI_BLK_AIUS 2745 ba,pt %ncc, copyout_blkd0 2746 add %i0, 64, %i0 2747 27481: 2749 FALIGN_D32 2750 stda %d48, [%i0]ASI_BLK_AIUS 2751 add %i0, 64, %i0 2752 membar #Sync 2753 FALIGN_D0 2754 stda %d48, [%i0]ASI_BLK_AIUS 2755 ba,pt %ncc, copyout_blkd16 2756 add %i0, 64, %i0 2757 27582: 2759 FALIGN_D0 2760 stda %d48, [%i0]ASI_BLK_AIUS 2761 add %i0, 64, %i0 2762 membar #Sync 2763 FALIGN_D16 2764 stda %d48, [%i0]ASI_BLK_AIUS 2765 ba,pt %ncc, copyout_blkd32 2766 add %i0, 64, %i0 2767 2768copyout_seg1: 2769 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 2770 FALIGN_D2 2771 ldda [%l7]ASI_BLK_P, %d0 2772 stda %d48, [%i0]ASI_BLK_AIUS 2773 add %l7, 64, %l7 2774 subcc %i3, 64, %i3 2775 bz,pn %ncc, 0f 2776 add %i0, 64, %i0 2777 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 2778 FALIGN_D18 2779 ldda [%l7]ASI_BLK_P, %d16 2780 stda %d48, [%i0]ASI_BLK_AIUS 2781 add %l7, 64, %l7 2782 subcc %i3, 64, %i3 2783 bz,pn %ncc, 1f 2784 add %i0, 64, %i0 2785 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 2786 FALIGN_D34 2787 ldda [%l7]ASI_BLK_P, %d32 2788 stda %d48, [%i0]ASI_BLK_AIUS 2789 add %l7, 64, %l7 2790 subcc %i3, 64, %i3 2791 bz,pn %ncc, 2f 2792 add %i0, 64, %i0 2793 ba,a,pt %ncc, copyout_seg1 27940: 2795 FALIGN_D18 2796 stda %d48, [%i0]ASI_BLK_AIUS 2797 add %i0, 64, %i0 2798 membar #Sync 2799 FALIGN_D34 2800 stda %d48, [%i0]ASI_BLK_AIUS 2801 ba,pt %ncc, copyout_blkd2 2802 add %i0, 64, %i0 2803 28041: 2805 FALIGN_D34 2806 stda %d48, [%i0]ASI_BLK_AIUS 2807 add %i0, 64, %i0 2808 membar #Sync 2809 FALIGN_D2 2810 stda %d48, [%i0]ASI_BLK_AIUS 2811 ba,pt %ncc, copyout_blkd18 2812 add %i0, 64, %i0 2813 28142: 2815 FALIGN_D2 2816 stda %d48, [%i0]ASI_BLK_AIUS 2817 add %i0, 64, %i0 2818 membar #Sync 2819 FALIGN_D18 2820 stda %d48, [%i0]ASI_BLK_AIUS 2821 ba,pt %ncc, copyout_blkd34 2822 add %i0, 64, %i0 2823 2824copyout_seg2: 2825 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 2826 FALIGN_D4 2827 ldda [%l7]ASI_BLK_P, %d0 2828 stda %d48, [%i0]ASI_BLK_AIUS 2829 add %l7, 64, %l7 2830 subcc %i3, 64, %i3 2831 bz,pn %ncc, 0f 2832 add %i0, 64, %i0 2833 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 2834 FALIGN_D20 2835 ldda [%l7]ASI_BLK_P, %d16 2836 stda %d48, [%i0]ASI_BLK_AIUS 2837 add %l7, 64, %l7 2838 subcc %i3, 64, %i3 2839 bz,pn %ncc, 1f 2840 add %i0, 64, %i0 2841 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 2842 FALIGN_D36 2843 ldda [%l7]ASI_BLK_P, %d32 2844 stda %d48, [%i0]ASI_BLK_AIUS 2845 add %l7, 64, %l7 2846 subcc %i3, 64, %i3 2847 bz,pn %ncc, 2f 2848 add %i0, 64, %i0 2849 ba,a,pt %ncc, copyout_seg2 2850 28510: 2852 FALIGN_D20 2853 stda %d48, [%i0]ASI_BLK_AIUS 2854 add %i0, 64, %i0 2855 membar #Sync 2856 FALIGN_D36 2857 stda %d48, [%i0]ASI_BLK_AIUS 2858 ba,pt %ncc, copyout_blkd4 2859 add %i0, 64, %i0 2860 28611: 2862 FALIGN_D36 2863 stda %d48, [%i0]ASI_BLK_AIUS 2864 add %i0, 64, %i0 2865 membar #Sync 2866 FALIGN_D4 2867 stda %d48, [%i0]ASI_BLK_AIUS 2868 ba,pt %ncc, copyout_blkd20 2869 add %i0, 64, %i0 2870 28712: 2872 FALIGN_D4 2873 stda %d48, [%i0]ASI_BLK_AIUS 2874 add %i0, 64, %i0 2875 membar #Sync 2876 FALIGN_D20 2877 stda %d48, [%i0]ASI_BLK_AIUS 2878 ba,pt %ncc, copyout_blkd36 2879 add %i0, 64, %i0 2880 2881copyout_seg3: 2882 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 2883 FALIGN_D6 2884 ldda [%l7]ASI_BLK_P, %d0 2885 stda %d48, [%i0]ASI_BLK_AIUS 2886 add %l7, 64, %l7 2887 subcc %i3, 64, %i3 2888 bz,pn %ncc, 0f 2889 add %i0, 64, %i0 2890 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 2891 FALIGN_D22 2892 ldda [%l7]ASI_BLK_P, %d16 2893 stda %d48, [%i0]ASI_BLK_AIUS 2894 add %l7, 64, %l7 2895 subcc %i3, 64, %i3 2896 bz,pn %ncc, 1f 2897 add %i0, 64, %i0 2898 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 2899 FALIGN_D38 2900 ldda [%l7]ASI_BLK_P, %d32 2901 stda %d48, [%i0]ASI_BLK_AIUS 2902 add %l7, 64, %l7 2903 subcc %i3, 64, %i3 2904 bz,pn %ncc, 2f 2905 add %i0, 64, %i0 2906 ba,a,pt %ncc, copyout_seg3 2907 29080: 2909 FALIGN_D22 2910 stda %d48, [%i0]ASI_BLK_AIUS 2911 add %i0, 64, %i0 2912 membar #Sync 2913 FALIGN_D38 2914 stda %d48, [%i0]ASI_BLK_AIUS 2915 ba,pt %ncc, copyout_blkd6 2916 add %i0, 64, %i0 2917 29181: 2919 FALIGN_D38 2920 stda %d48, [%i0]ASI_BLK_AIUS 2921 add %i0, 64, %i0 2922 membar #Sync 2923 FALIGN_D6 2924 stda %d48, [%i0]ASI_BLK_AIUS 2925 ba,pt %ncc, copyout_blkd22 2926 add %i0, 64, %i0 2927 29282: 2929 FALIGN_D6 2930 stda %d48, [%i0]ASI_BLK_AIUS 2931 add %i0, 64, %i0 2932 membar #Sync 2933 FALIGN_D22 2934 stda %d48, [%i0]ASI_BLK_AIUS 2935 ba,pt %ncc, copyout_blkd38 2936 add %i0, 64, %i0 2937 2938copyout_seg4: 2939 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 2940 FALIGN_D8 2941 ldda [%l7]ASI_BLK_P, %d0 2942 stda %d48, [%i0]ASI_BLK_AIUS 2943 add %l7, 64, %l7 2944 subcc %i3, 64, %i3 2945 bz,pn %ncc, 0f 2946 add %i0, 64, %i0 2947 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 2948 FALIGN_D24 2949 ldda [%l7]ASI_BLK_P, %d16 2950 stda %d48, [%i0]ASI_BLK_AIUS 2951 add %l7, 64, %l7 2952 subcc %i3, 64, %i3 2953 bz,pn %ncc, 1f 2954 add %i0, 64, %i0 2955 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 2956 FALIGN_D40 2957 ldda [%l7]ASI_BLK_P, %d32 2958 stda %d48, [%i0]ASI_BLK_AIUS 2959 add %l7, 64, %l7 2960 subcc %i3, 64, %i3 2961 bz,pn %ncc, 2f 2962 add %i0, 64, %i0 2963 ba,a,pt %ncc, copyout_seg4 2964 29650: 2966 FALIGN_D24 2967 stda %d48, [%i0]ASI_BLK_AIUS 2968 add %i0, 64, %i0 2969 membar #Sync 2970 FALIGN_D40 2971 stda %d48, [%i0]ASI_BLK_AIUS 2972 ba,pt %ncc, copyout_blkd8 2973 add %i0, 64, %i0 2974 29751: 2976 FALIGN_D40 2977 stda %d48, [%i0]ASI_BLK_AIUS 2978 add %i0, 64, %i0 2979 membar #Sync 2980 FALIGN_D8 2981 stda %d48, [%i0]ASI_BLK_AIUS 2982 ba,pt %ncc, copyout_blkd24 2983 add %i0, 64, %i0 2984 29852: 2986 FALIGN_D8 2987 stda %d48, [%i0]ASI_BLK_AIUS 2988 add %i0, 64, %i0 2989 membar #Sync 2990 FALIGN_D24 2991 stda %d48, [%i0]ASI_BLK_AIUS 2992 ba,pt %ncc, copyout_blkd40 2993 add %i0, 64, %i0 2994 2995copyout_seg5: 2996 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 2997 FALIGN_D10 2998 ldda [%l7]ASI_BLK_P, %d0 2999 stda %d48, [%i0]ASI_BLK_AIUS 3000 add %l7, 64, %l7 3001 subcc %i3, 64, %i3 3002 bz,pn %ncc, 0f 3003 add %i0, 64, %i0 3004 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 3005 FALIGN_D26 3006 ldda [%l7]ASI_BLK_P, %d16 3007 stda %d48, [%i0]ASI_BLK_AIUS 3008 add %l7, 64, %l7 3009 subcc %i3, 64, %i3 3010 bz,pn %ncc, 1f 3011 add %i0, 64, %i0 3012 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 3013 FALIGN_D42 3014 ldda [%l7]ASI_BLK_P, %d32 3015 stda %d48, [%i0]ASI_BLK_AIUS 3016 add %l7, 64, %l7 3017 subcc %i3, 64, %i3 3018 bz,pn %ncc, 2f 3019 add %i0, 64, %i0 3020 ba,a,pt %ncc, copyout_seg5 3021 30220: 3023 FALIGN_D26 3024 stda %d48, [%i0]ASI_BLK_AIUS 3025 add %i0, 64, %i0 3026 membar #Sync 3027 FALIGN_D42 3028 stda %d48, [%i0]ASI_BLK_AIUS 3029 ba,pt %ncc, copyout_blkd10 3030 add %i0, 64, %i0 3031 30321: 3033 FALIGN_D42 3034 stda %d48, [%i0]ASI_BLK_AIUS 3035 add %i0, 64, %i0 3036 membar #Sync 3037 FALIGN_D10 3038 stda %d48, [%i0]ASI_BLK_AIUS 3039 ba,pt %ncc, copyout_blkd26 3040 add %i0, 64, %i0 3041 30422: 3043 FALIGN_D10 3044 stda %d48, [%i0]ASI_BLK_AIUS 3045 add %i0, 64, %i0 3046 membar #Sync 3047 FALIGN_D26 3048 stda %d48, [%i0]ASI_BLK_AIUS 3049 ba,pt %ncc, copyout_blkd42 3050 add %i0, 64, %i0 3051 3052copyout_seg6: 3053 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 3054 FALIGN_D12 3055 ldda [%l7]ASI_BLK_P, %d0 3056 stda %d48, [%i0]ASI_BLK_AIUS 3057 add %l7, 64, %l7 3058 subcc %i3, 64, %i3 3059 bz,pn %ncc, 0f 3060 add %i0, 64, %i0 3061 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 3062 FALIGN_D28 3063 ldda [%l7]ASI_BLK_P, %d16 3064 stda %d48, [%i0]ASI_BLK_AIUS 3065 add %l7, 64, %l7 3066 subcc %i3, 64, %i3 3067 bz,pn %ncc, 1f 3068 add %i0, 64, %i0 3069 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 3070 FALIGN_D44 3071 ldda [%l7]ASI_BLK_P, %d32 3072 stda %d48, [%i0]ASI_BLK_AIUS 3073 add %l7, 64, %l7 3074 subcc %i3, 64, %i3 3075 bz,pn %ncc, 2f 3076 add %i0, 64, %i0 3077 ba,a,pt %ncc, copyout_seg6 3078 30790: 3080 FALIGN_D28 3081 stda %d48, [%i0]ASI_BLK_AIUS 3082 add %i0, 64, %i0 3083 membar #Sync 3084 FALIGN_D44 3085 stda %d48, [%i0]ASI_BLK_AIUS 3086 ba,pt %ncc, copyout_blkd12 3087 add %i0, 64, %i0 3088 30891: 3090 FALIGN_D44 3091 stda %d48, [%i0]ASI_BLK_AIUS 3092 add %i0, 64, %i0 3093 membar #Sync 3094 FALIGN_D12 3095 stda %d48, [%i0]ASI_BLK_AIUS 3096 ba,pt %ncc, copyout_blkd28 3097 add %i0, 64, %i0 3098 30992: 3100 FALIGN_D12 3101 stda %d48, [%i0]ASI_BLK_AIUS 3102 add %i0, 64, %i0 3103 membar #Sync 3104 FALIGN_D28 3105 stda %d48, [%i0]ASI_BLK_AIUS 3106 ba,pt %ncc, copyout_blkd44 3107 add %i0, 64, %i0 3108 3109copyout_seg7: 3110 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 3111 FALIGN_D14 3112 ldda [%l7]ASI_BLK_P, %d0 3113 stda %d48, [%i0]ASI_BLK_AIUS 3114 add %l7, 64, %l7 3115 subcc %i3, 64, %i3 3116 bz,pn %ncc, 0f 3117 add %i0, 64, %i0 3118 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 3119 FALIGN_D30 3120 ldda [%l7]ASI_BLK_P, %d16 3121 stda %d48, [%i0]ASI_BLK_AIUS 3122 add %l7, 64, %l7 3123 subcc %i3, 64, %i3 3124 bz,pn %ncc, 1f 3125 add %i0, 64, %i0 3126 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 3127 FALIGN_D46 3128 ldda [%l7]ASI_BLK_P, %d32 3129 stda %d48, [%i0]ASI_BLK_AIUS 3130 add %l7, 64, %l7 3131 subcc %i3, 64, %i3 3132 bz,pn %ncc, 2f 3133 add %i0, 64, %i0 3134 ba,a,pt %ncc, copyout_seg7 3135 31360: 3137 FALIGN_D30 3138 stda %d48, [%i0]ASI_BLK_AIUS 3139 add %i0, 64, %i0 3140 membar #Sync 3141 FALIGN_D46 3142 stda %d48, [%i0]ASI_BLK_AIUS 3143 ba,pt %ncc, copyout_blkd14 3144 add %i0, 64, %i0 3145 31461: 3147 FALIGN_D46 3148 stda %d48, [%i0]ASI_BLK_AIUS 3149 add %i0, 64, %i0 3150 membar #Sync 3151 FALIGN_D14 3152 stda %d48, [%i0]ASI_BLK_AIUS 3153 ba,pt %ncc, copyout_blkd30 3154 add %i0, 64, %i0 3155 31562: 3157 FALIGN_D14 3158 stda %d48, [%i0]ASI_BLK_AIUS 3159 add %i0, 64, %i0 3160 membar #Sync 3161 FALIGN_D30 3162 stda %d48, [%i0]ASI_BLK_AIUS 3163 ba,pt %ncc, copyout_blkd46 3164 add %i0, 64, %i0 3165 3166 3167 ! 3168 ! dribble out the last partial block 3169 ! 3170copyout_blkd0: 3171 subcc %i4, 8, %i4 3172 blu,pn %ncc, copyout_blkdone 3173 faligndata %d0, %d2, %d48 3174 stda %d48, [%i0]ASI_USER 3175 add %i0, 8, %i0 3176copyout_blkd2: 3177 subcc %i4, 8, %i4 3178 blu,pn %ncc, copyout_blkdone 3179 faligndata %d2, %d4, %d48 3180 stda %d48, [%i0]ASI_USER 3181 add %i0, 8, %i0 3182copyout_blkd4: 3183 subcc %i4, 8, %i4 3184 blu,pn %ncc, copyout_blkdone 3185 faligndata %d4, %d6, %d48 3186 stda %d48, [%i0]ASI_USER 3187 add %i0, 8, %i0 3188copyout_blkd6: 3189 subcc %i4, 8, %i4 3190 blu,pn %ncc, copyout_blkdone 3191 faligndata %d6, %d8, %d48 3192 stda %d48, [%i0]ASI_USER 3193 add %i0, 8, %i0 3194copyout_blkd8: 3195 subcc %i4, 8, %i4 3196 blu,pn %ncc, copyout_blkdone 3197 faligndata %d8, %d10, %d48 3198 stda %d48, [%i0]ASI_USER 3199 add %i0, 8, %i0 3200copyout_blkd10: 3201 subcc %i4, 8, %i4 3202 blu,pn %ncc, copyout_blkdone 3203 faligndata %d10, %d12, %d48 3204 stda %d48, [%i0]ASI_USER 3205 add %i0, 8, %i0 3206copyout_blkd12: 3207 subcc %i4, 8, %i4 3208 blu,pn %ncc, copyout_blkdone 3209 faligndata %d12, %d14, %d48 3210 stda %d48, [%i0]ASI_USER 3211 add %i0, 8, %i0 3212copyout_blkd14: 3213 subcc %i4, 8, %i4 3214 blu,pn %ncc, copyout_blkdone 3215 fsrc1 %d14, %d0 3216 ba,a,pt %ncc, copyout_blkleft 3217 3218copyout_blkd16: 3219 subcc %i4, 8, %i4 3220 blu,pn %ncc, copyout_blkdone 3221 faligndata %d16, %d18, %d48 3222 stda %d48, [%i0]ASI_USER 3223 add %i0, 8, %i0 3224copyout_blkd18: 3225 subcc %i4, 8, %i4 3226 blu,pn %ncc, copyout_blkdone 3227 faligndata %d18, %d20, %d48 3228 stda %d48, [%i0]ASI_USER 3229 add %i0, 8, %i0 3230copyout_blkd20: 3231 subcc %i4, 8, %i4 3232 blu,pn %ncc, copyout_blkdone 3233 faligndata %d20, %d22, %d48 3234 stda %d48, [%i0]ASI_USER 3235 add %i0, 8, %i0 3236copyout_blkd22: 3237 subcc %i4, 8, %i4 3238 blu,pn %ncc, copyout_blkdone 3239 faligndata %d22, %d24, %d48 3240 stda %d48, [%i0]ASI_USER 3241 add %i0, 8, %i0 3242copyout_blkd24: 3243 subcc %i4, 8, %i4 3244 blu,pn %ncc, copyout_blkdone 3245 faligndata %d24, %d26, %d48 3246 stda %d48, [%i0]ASI_USER 3247 add %i0, 8, %i0 3248copyout_blkd26: 3249 subcc %i4, 8, %i4 3250 blu,pn %ncc, copyout_blkdone 3251 faligndata %d26, %d28, %d48 3252 stda %d48, [%i0]ASI_USER 3253 add %i0, 8, %i0 3254copyout_blkd28: 3255 subcc %i4, 8, %i4 3256 blu,pn %ncc, copyout_blkdone 3257 faligndata %d28, %d30, %d48 3258 stda %d48, [%i0]ASI_USER 3259 add %i0, 8, %i0 3260copyout_blkd30: 3261 subcc %i4, 8, %i4 3262 blu,pn %ncc, copyout_blkdone 3263 fsrc1 %d30, %d0 3264 ba,a,pt %ncc, copyout_blkleft 3265copyout_blkd32: 3266 subcc %i4, 8, %i4 3267 blu,pn %ncc, copyout_blkdone 3268 faligndata %d32, %d34, %d48 3269 stda %d48, [%i0]ASI_USER 3270 add %i0, 8, %i0 3271copyout_blkd34: 3272 subcc %i4, 8, %i4 3273 blu,pn %ncc, copyout_blkdone 3274 faligndata %d34, %d36, %d48 3275 stda %d48, [%i0]ASI_USER 3276 add %i0, 8, %i0 3277copyout_blkd36: 3278 subcc %i4, 8, %i4 3279 blu,pn %ncc, copyout_blkdone 3280 faligndata %d36, %d38, %d48 3281 stda %d48, [%i0]ASI_USER 3282 add %i0, 8, %i0 3283copyout_blkd38: 3284 subcc %i4, 8, %i4 3285 blu,pn %ncc, copyout_blkdone 3286 faligndata %d38, %d40, %d48 3287 stda %d48, [%i0]ASI_USER 3288 add %i0, 8, %i0 3289copyout_blkd40: 3290 subcc %i4, 8, %i4 3291 blu,pn %ncc, copyout_blkdone 3292 faligndata %d40, %d42, %d48 3293 stda %d48, [%i0]ASI_USER 3294 add %i0, 8, %i0 3295copyout_blkd42: 3296 subcc %i4, 8, %i4 3297 blu,pn %ncc, copyout_blkdone 3298 faligndata %d42, %d44, %d48 3299 stda %d48, [%i0]ASI_USER 3300 add %i0, 8, %i0 3301copyout_blkd44: 3302 subcc %i4, 8, %i4 3303 blu,pn %ncc, copyout_blkdone 3304 faligndata %d44, %d46, %d48 3305 stda %d48, [%i0]ASI_USER 3306 add %i0, 8, %i0 3307copyout_blkd46: 3308 subcc %i4, 8, %i4 3309 blu,pn %ncc, copyout_blkdone 3310 fsrc1 %d46, %d0 3311 3312copyout_blkleft: 33131: 3314 ldd [%l7], %d2 3315 add %l7, 8, %l7 3316 subcc %i4, 8, %i4 3317 faligndata %d0, %d2, %d8 3318 stda %d8, [%i0]ASI_USER 3319 blu,pn %ncc, copyout_blkdone 3320 add %i0, 8, %i0 3321 ldd [%l7], %d0 3322 add %l7, 8, %l7 3323 subcc %i4, 8, %i4 3324 faligndata %d2, %d0, %d8 3325 stda %d8, [%i0]ASI_USER 3326 bgeu,pt %ncc, 1b 3327 add %i0, 8, %i0 3328 3329copyout_blkdone: 3330 tst %i2 3331 bz,pt %ncc, .copyout_exit 3332 and %l3, 0x4, %l3 ! fprs.du = fprs.dl = 0 3333 33347: ldub [%i1], %i4 3335 inc %i1 3336 stba %i4, [%i0]ASI_USER 3337 inc %i0 3338 deccc %i2 3339 bgu %ncc, 7b 3340 nop 3341 3342.copyout_exit: 3343 membar #StoreLoad|#StoreStore 3344 btst FPUSED_FLAG, SAVED_LOFAULT 3345 bz 1f 3346 nop 3347 3348 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 3349 wr %o2, 0, %gsr ! restore gsr 3350 3351 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 3352 btst FPRS_FEF, %o3 3353 bz 4f 3354 nop 3355 3356 ! restore fpregs from stack 3357 membar #Sync 3358 add %fp, STACK_BIAS - 257, %o2 3359 and %o2, -64, %o2 3360 ldda [%o2]ASI_BLK_P, %d0 3361 add %o2, 64, %o2 3362 ldda [%o2]ASI_BLK_P, %d16 3363 add %o2, 64, %o2 3364 ldda [%o2]ASI_BLK_P, %d32 3365 add %o2, 64, %o2 3366 ldda [%o2]ASI_BLK_P, %d48 3367 membar #Sync 3368 3369 ba,pt %ncc, 1f 3370 wr %o3, 0, %fprs ! restore fprs 3371 33724: 3373 FZERO ! zero all of the fpregs 3374 wr %o3, 0, %fprs ! restore fprs 3375 33761: 3377 andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 3378 membar #Sync ! sync error barrier 3379 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 3380 ret 3381 restore %g0, 0, %o0 3382 3383.copyout_err: 3384 ldn [THREAD_REG + T_COPYOPS], %o4 3385 brz %o4, 2f 3386 nop 3387 ldn [%o4 + CP_COPYOUT], %g2 3388 jmp %g2 3389 nop 33902: 3391 retl 3392 mov -1, %o0 3393 SET_SIZE(copyout) 3394 3395#endif /* lint */ 3396 3397 3398#ifdef lint 3399 3400/*ARGSUSED*/ 3401int 3402xcopyout(const void *kaddr, void *uaddr, size_t count) 3403{ return (0); } 3404 3405#else /* lint */ 3406 3407 ENTRY(xcopyout) 3408 sethi %hi(.xcopyout_err), REAL_LOFAULT 3409 b .do_copyout 3410 or REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT 3411.xcopyout_err: 3412 ldn [THREAD_REG + T_COPYOPS], %o4 3413 brz %o4, 2f 3414 nop 3415 ldn [%o4 + CP_XCOPYOUT], %g2 3416 jmp %g2 3417 nop 34182: 3419 retl 3420 mov %g1, %o0 3421 SET_SIZE(xcopyout) 3422 3423#endif /* lint */ 3424 3425#ifdef lint 3426 3427/*ARGSUSED*/ 3428int 3429xcopyout_little(const void *kaddr, void *uaddr, size_t count) 3430{ return (0); } 3431 3432#else /* lint */ 3433 3434 ENTRY(xcopyout_little) 3435 sethi %hi(.little_err), %o4 3436 ldn [THREAD_REG + T_LOFAULT], %o5 3437 or %o4, %lo(.little_err), %o4 3438 membar #Sync ! sync error barrier 3439 stn %o4, [THREAD_REG + T_LOFAULT] 3440 3441 subcc %g0, %o2, %o3 3442 add %o0, %o2, %o0 3443 bz,pn %ncc, 2f ! check for zero bytes 3444 sub %o2, 1, %o4 3445 add %o0, %o4, %o0 ! start w/last byte 3446 add %o1, %o2, %o1 3447 ldub [%o0+%o3], %o4 3448 34491: stba %o4, [%o1+%o3]ASI_AIUSL 3450 inccc %o3 3451 sub %o0, 2, %o0 ! get next byte 3452 bcc,a,pt %ncc, 1b 3453 ldub [%o0+%o3], %o4 3454 34552: membar #Sync ! sync error barrier 3456 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 3457 retl 3458 mov %g0, %o0 ! return (0) 3459 SET_SIZE(xcopyout_little) 3460 3461#endif /* lint */ 3462 3463/* 3464 * Copy user data to kernel space (copyin/xcopyin/xcopyin_little) 3465 */ 3466 3467#if defined(lint) 3468 3469/*ARGSUSED*/ 3470int 3471copyin(const void *uaddr, void *kaddr, size_t count) 3472{ return (0); } 3473 3474#else /* lint */ 3475 3476 ENTRY(copyin) 3477 sethi %hi(.copyin_err), REAL_LOFAULT 3478 or REAL_LOFAULT, %lo(.copyin_err), REAL_LOFAULT 3479 3480.do_copyin: 3481 ! 3482 ! Check the length and bail if zero. 3483 ! 3484 tst %o2 3485 bnz,pt %ncc, 1f 3486 nop 3487 retl 3488 clr %o0 34891: 3490 sethi %hi(copyio_fault), %o4 3491 or %o4, %lo(copyio_fault), %o4 3492 sethi %hi(copyio_fault_nowindow), %o3 3493 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT 3494 or %o3, %lo(copyio_fault_nowindow), %o3 3495 membar #Sync 3496 stn %o3, [THREAD_REG + T_LOFAULT] 3497 3498 mov %o0, SAVE_SRC 3499 mov %o1, SAVE_DST 3500 mov %o2, SAVE_COUNT 3501 3502 ! 3503 ! Check to see if we're more than SMALL_LIMIT. 3504 ! 3505 subcc %o2, SMALL_LIMIT, %o3 3506 bgu,a,pt %ncc, .dci_ns 3507 or %o0, %o1, %o3 3508 ! 3509 ! What was previously ".small_copyin" 3510 ! 3511.dcibcp: 3512 sub %g0, %o2, %o3 ! setup for copy loop 3513 add %o0, %o2, %o0 3514 add %o1, %o2, %o1 3515 ba,pt %ncc, .dcicl 3516 lduba [%o0 + %o3]ASI_USER, %o4 3517 ! 3518 ! %o0 and %o1 point at the end and remain pointing at the end 3519 ! of their buffers. We pull things out by adding %o3 (which is 3520 ! the negation of the length) to the buffer end which gives us 3521 ! the curent location in the buffers. By incrementing %o3 we walk 3522 ! through both buffers without having to bump each buffer's 3523 ! pointer. A very fast 4 instruction loop. 3524 ! 3525 .align 16 3526.dcicl: 3527 stb %o4, [%o1 + %o3] 3528 inccc %o3 3529 bl,a,pt %ncc, .dcicl 3530 lduba [%o0 + %o3]ASI_USER, %o4 3531 ! 3532 ! We're done. Go home. 3533 ! 3534 membar #Sync 3535 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 3536 retl 3537 clr %o0 3538 ! 3539 ! Try aligned copies from here. 3540 ! 3541.dci_ns: 3542 ! 3543 ! See if we're single byte aligned. If we are, check the 3544 ! limit for single byte copies. If we're smaller, or equal, 3545 ! bounce to the byte for byte copy loop. Otherwise do it in 3546 ! HW (if enabled). 3547 ! 3548 btst 1, %o3 3549 bz,a,pt %icc, .dcih8 3550 btst 7, %o3 3551 ! 3552 ! We're single byte aligned. 3553 ! 3554 subcc %o2, VIS_COPY_THRESHOLD, %o3 3555 bleu,pt %ncc, .dcibcp 3556 sethi %hi(hw_copy_limit_1), %o3 3557 ld [%o3 + %lo(hw_copy_limit_1)], %o3 3558 ! 3559 ! Is HW copy on? If not do everything byte for byte. 3560 ! 3561 tst %o3 3562 bz,pn %icc, .dcibcp 3563 subcc %o3, %o2, %o3 3564 ! 3565 ! Are we bigger than the HW limit? If not 3566 ! go to byte for byte. 3567 ! 3568 bge,pt %ncc, .dcibcp 3569 nop 3570 ! 3571 ! We're big enough and copy is on. Do it with HW. 3572 ! 3573 ba,pt %ncc, .big_copyin 3574 nop 3575.dcih8: 3576 ! 3577 ! 8 byte aligned? 3578 ! 3579 bnz,a %ncc, .dcih4 3580 btst 3, %o3 3581 ! 3582 ! We're eight byte aligned. 3583 ! 3584 subcc %o2, VIS_COPY_THRESHOLD, %o3 3585 bleu,pt %ncc, .dcis8 3586 sethi %hi(hw_copy_limit_8), %o3 3587 ld [%o3 + %lo(hw_copy_limit_8)], %o3 3588 ! 3589 ! Is HW assist on? If not, do it with the aligned copy. 3590 ! 3591 tst %o3 3592 bz,pn %icc, .dcis8 3593 subcc %o3, %o2, %o3 3594 bge %ncc, .dcis8 3595 nop 3596 ba,pt %ncc, .big_copyin 3597 nop 3598.dcis8: 3599 ! 3600 ! Housekeeping for copy loops. Uses same idea as in the byte for 3601 ! byte copy loop above. 3602 ! 3603 add %o0, %o2, %o0 3604 add %o1, %o2, %o1 3605 sub %g0, %o2, %o3 3606 ba,pt %ncc, .didebc 3607 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy 3608 ! 3609 ! 4 byte aligned? 3610 ! 3611.dcih4: 3612 bnz %ncc, .dcih2 3613 subcc %o2, VIS_COPY_THRESHOLD, %o3 3614 bleu,pt %ncc, .dcis4 3615 sethi %hi(hw_copy_limit_4), %o3 3616 ld [%o3 + %lo(hw_copy_limit_4)], %o3 3617 ! 3618 ! Is HW assist on? If not, do it with the aligned copy. 3619 ! 3620 tst %o3 3621 bz,pn %icc, .dcis4 3622 subcc %o3, %o2, %o3 3623 ! 3624 ! We're negative if our size is less than or equal to hw_copy_limit_4. 3625 ! 3626 bge %ncc, .dcis4 3627 nop 3628 ba,pt %ncc, .big_copyin 3629 nop 3630.dcis4: 3631 ! 3632 ! Housekeeping for copy loops. Uses same idea as in the byte 3633 ! for byte copy loop above. 3634 ! 3635 add %o0, %o2, %o0 3636 add %o1, %o2, %o1 3637 sub %g0, %o2, %o3 3638 ba,pt %ncc, .didfbc 3639 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy 3640.dcih2: 3641 ! 3642 ! We're two byte aligned. Check for "smallness" 3643 ! done in delay at .dcih4 3644 ! 3645 bleu,pt %ncc, .dcis2 3646 sethi %hi(hw_copy_limit_2), %o3 3647 ld [%o3 + %lo(hw_copy_limit_2)], %o3 3648 ! 3649 ! Is HW assist on? If not, do it with the aligned copy. 3650 ! 3651 tst %o3 3652 bz,pn %icc, .dcis2 3653 subcc %o3, %o2, %o3 3654 ! 3655 ! Are we larger than the HW limit? 3656 ! 3657 bge %ncc, .dcis2 3658 nop 3659 ! 3660 ! HW assist is on and we're large enough to use it. 3661 ! 3662 ba,pt %ncc, .big_copyin 3663 nop 3664 ! 3665 ! Housekeeping for copy loops. Uses same idea as in the byte 3666 ! for byte copy loop above. 3667 ! 3668.dcis2: 3669 add %o0, %o2, %o0 3670 add %o1, %o2, %o1 3671 sub %g0, %o2, %o3 3672 ba,pt %ncc, .didtbc 3673 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy 3674 ! 3675.small_copyin: 3676 ! 3677 ! Why are we doing this AGAIN? There are certain conditions in 3678 ! big copyin that will cause us to forgo the HW assisted copys 3679 ! and bounce back to a non-hw assisted copy. This dispatches 3680 ! those copies. Note that we branch around this in the main line 3681 ! code. 3682 ! 3683 ! We make no check for limits or HW enablement here. We've 3684 ! already been told that we're a poster child so just go off 3685 ! and do it. 3686 ! 3687 or %o0, %o1, %o3 3688 btst 1, %o3 3689 bnz %icc, .dcibcp ! Most likely 3690 btst 7, %o3 3691 bz %icc, .dcis8 3692 btst 3, %o3 3693 bz %icc, .dcis4 3694 nop 3695 ba,pt %ncc, .dcis2 3696 nop 3697 ! 3698 ! Eight byte aligned copies. A steal from the original .small_copyin 3699 ! with modifications. %o2 is number of 8 byte chunks to copy. When 3700 ! done, we examine %o3. If this is < 0, we have 1 - 7 bytes more 3701 ! to copy. 3702 ! 3703 .align 32 3704.didebc: 3705 ldxa [%o0 + %o3]ASI_USER, %o4 3706 deccc %o2 3707 stx %o4, [%o1 + %o3] 3708 bg,pt %ncc, .didebc 3709 addcc %o3, 8, %o3 3710 ! 3711 ! End of copy loop. Most 8 byte aligned copies end here. 3712 ! 3713 bz,pt %ncc, .dcifh 3714 nop 3715 ! 3716 ! Something is left. Do it byte for byte. 3717 ! 3718 ba,pt %ncc, .dcicl 3719 lduba [%o0 + %o3]ASI_USER, %o4 3720 ! 3721 ! 4 byte copy loop. %o2 is number of 4 byte chunks to copy. 3722 ! 3723 .align 32 3724.didfbc: 3725 lduwa [%o0 + %o3]ASI_USER, %o4 3726 deccc %o2 3727 st %o4, [%o1 + %o3] 3728 bg,pt %ncc, .didfbc 3729 addcc %o3, 4, %o3 3730 ! 3731 ! End of copy loop. Most 4 byte aligned copies end here. 3732 ! 3733 bz,pt %ncc, .dcifh 3734 nop 3735 ! 3736 ! Something is left. Do it byte for byte. 3737 ! 3738 ba,pt %ncc, .dcicl 3739 lduba [%o0 + %o3]ASI_USER, %o4 3740 ! 3741 ! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to 3742 ! copy. 3743 ! 3744 .align 32 3745.didtbc: 3746 lduha [%o0 + %o3]ASI_USER, %o4 3747 deccc %o2 3748 sth %o4, [%o1 + %o3] 3749 bg,pt %ncc, .didtbc 3750 addcc %o3, 2, %o3 3751 ! 3752 ! End of copy loop. Most 2 byte aligned copies end here. 3753 ! 3754 bz,pt %ncc, .dcifh 3755 nop 3756 ! 3757 ! Deal with the last byte 3758 ! 3759 lduba [%o0 + %o3]ASI_USER, %o4 3760 stb %o4, [%o1 + %o3] 3761.dcifh: 3762 membar #Sync 3763 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 3764 retl 3765 clr %o0 3766 3767.big_copyin: 3768 ! 3769 ! Are we using the FP registers? 3770 ! 3771 rd %fprs, %o3 ! check for unused fp 3772 btst FPRS_FEF, %o3 3773 bnz %ncc, .copyin_fpregs_inuse 3774 nop 3775 ! 3776 ! We're going off to do a block copy. 3777 ! Switch fault hendlers and grab a window. We 3778 ! don't do a membar #Sync since we've done only 3779 ! kernel data to this point. 3780 ! 3781 stn %o4, [THREAD_REG + T_LOFAULT] 3782 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 3783 ! 3784 ! %o3 is %i3 after the save... 3785 ! 3786 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] 3787 ba,pt %ncc, .do_blockcopyin 3788 wr %g0, FPRS_FEF, %fprs 3789.copyin_fpregs_inuse: 3790 ! 3791 ! We're here if the FP regs are in use. Need to see if the request 3792 ! exceeds our suddenly larger minimum. 3793 ! 3794 cmp %i2, VIS_COPY_THRESHOLD+(64*4) 3795 bl %ncc, .small_copyin 3796 nop 3797 ! 3798 ! We're going off and do a block copy. 3799 ! Change to the heavy duty fault handler and grab a window first. 3800 ! New handler is passed in 3801 ! 3802 stn %o4, [THREAD_REG + T_LOFAULT] 3803 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 3804 ! 3805 ! %o3 is now %i3 3806 ! 3807 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] 3808 3809 ! save in-use fpregs on stack 3810 wr %g0, FPRS_FEF, %fprs 3811 membar #Sync 3812 add %fp, STACK_BIAS - 257, %o2 3813 and %o2, -64, %o2 3814 stda %d0, [%o2]ASI_BLK_P 3815 add %o2, 64, %o2 3816 stda %d16, [%o2]ASI_BLK_P 3817 add %o2, 64, %o2 3818 stda %d32, [%o2]ASI_BLK_P 3819 add %o2, 64, %o2 3820 stda %d48, [%o2]ASI_BLK_P 3821 membar #Sync 3822 3823.do_blockcopyin: 3824 membar #StoreStore|#StoreLoad|#LoadStore 3825 3826 rd %gsr, %o2 3827 st %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr 3828 3829 ! Set the lower bit in the saved t_lofault to indicate 3830 ! that we need to clear the %fprs register on the way 3831 ! out 3832 or SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 3833 3834 ! Swap src/dst since the code below is memcpy code 3835 ! and memcpy/bcopy have different calling sequences 3836 mov %i1, %i5 3837 mov %i0, %i1 3838 mov %i5, %i0 3839 3840!!! This code is nearly identical to the version in the sun4u 3841!!! libc_psr. Most bugfixes made to that file should be 3842!!! merged into this routine. 3843 3844 andcc %i0, 7, %o3 3845 bz copyin_blkcpy 3846 sub %o3, 8, %o3 3847 neg %o3 3848 sub %i2, %o3, %i2 3849 3850 ! Align Destination on double-word boundary 3851 38522: lduba [%i1]ASI_USER, %o4 3853 inc %i1 3854 inc %i0 3855 deccc %o3 3856 bgu %ncc, 2b 3857 stb %o4, [%i0-1] 3858copyin_blkcpy: 3859 andcc %i0, 63, %i3 3860 bz,pn %ncc, copyin_blalign ! now block aligned 3861 sub %i3, 64, %i3 3862 neg %i3 ! bytes till block aligned 3863 sub %i2, %i3, %i2 ! update %i2 with new count 3864 3865 ! Copy %i3 bytes till dst is block (64 byte) aligned. use 3866 ! double word copies. 3867 3868 alignaddr %i1, %g0, %g1 3869 ldda [%g1]ASI_USER, %d0 3870 add %g1, 8, %g1 38716: 3872 ldda [%g1]ASI_USER, %d2 3873 add %g1, 8, %g1 3874 subcc %i3, 8, %i3 3875 faligndata %d0, %d2, %d8 3876 std %d8, [%i0] 3877 add %i1, 8, %i1 3878 bz,pn %ncc, copyin_blalign 3879 add %i0, 8, %i0 3880 ldda [%g1]ASI_USER, %d0 3881 add %g1, 8, %g1 3882 subcc %i3, 8, %i3 3883 faligndata %d2, %d0, %d8 3884 std %d8, [%i0] 3885 add %i1, 8, %i1 3886 bgu,pn %ncc, 6b 3887 add %i0, 8, %i0 3888 3889copyin_blalign: 3890 membar #StoreLoad 3891 ! %i2 = total length 3892 ! %i3 = blocks (length - 64) / 64 3893 ! %i4 = doubles remaining (length - blocks) 3894 sub %i2, 64, %i3 3895 andn %i3, 63, %i3 3896 sub %i2, %i3, %i4 3897 andn %i4, 7, %i4 3898 sub %i4, 16, %i4 3899 sub %i2, %i4, %i2 3900 sub %i2, %i3, %i2 3901 3902 andn %i1, 0x3f, %l7 ! blk aligned address 3903 alignaddr %i1, %g0, %g0 ! gen %gsr 3904 3905 srl %i1, 3, %l5 ! bits 3,4,5 are now least sig in %l5 3906 andcc %l5, 7, %i5 ! mask everything except bits 1,2 3 3907 add %i1, %i4, %i1 3908 add %i1, %i3, %i1 3909 3910 ldda [%l7]ASI_BLK_AIUS, %d0 3911 add %l7, 64, %l7 3912 ldda [%l7]ASI_BLK_AIUS, %d16 3913 add %l7, 64, %l7 3914 ldda [%l7]ASI_BLK_AIUS, %d32 3915 add %l7, 64, %l7 3916 sub %i3, 128, %i3 3917 3918 ! switch statement to get us to the right 8 byte blk within a 3919 ! 64 byte block 3920 3921 cmp %i5, 4 3922 bgeu,a copyin_hlf 3923 cmp %i5, 6 3924 cmp %i5, 2 3925 bgeu,a copyin_sqtr 3926 nop 3927 cmp %i5, 1 3928 be,a copyin_seg1 3929 nop 3930 ba,pt %ncc, copyin_seg0 3931 nop 3932copyin_sqtr: 3933 be,a copyin_seg2 3934 nop 3935 ba,pt %ncc, copyin_seg3 3936 nop 3937 3938copyin_hlf: 3939 bgeu,a copyin_fqtr 3940 nop 3941 cmp %i5, 5 3942 be,a copyin_seg5 3943 nop 3944 ba,pt %ncc, copyin_seg4 3945 nop 3946copyin_fqtr: 3947 be,a copyin_seg6 3948 nop 3949 ba,pt %ncc, copyin_seg7 3950 nop 3951 3952copyin_seg0: 3953 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 3954 FALIGN_D0 3955 ldda [%l7]ASI_BLK_AIUS, %d0 3956 stda %d48, [%i0]ASI_BLK_P 3957 add %l7, 64, %l7 3958 subcc %i3, 64, %i3 3959 bz,pn %ncc, 0f 3960 add %i0, 64, %i0 3961 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 3962 FALIGN_D16 3963 ldda [%l7]ASI_BLK_AIUS, %d16 3964 stda %d48, [%i0]ASI_BLK_P 3965 add %l7, 64, %l7 3966 subcc %i3, 64, %i3 3967 bz,pn %ncc, 1f 3968 add %i0, 64, %i0 3969 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 3970 FALIGN_D32 3971 ldda [%l7]ASI_BLK_AIUS, %d32 3972 stda %d48, [%i0]ASI_BLK_P 3973 add %l7, 64, %l7 3974 subcc %i3, 64, %i3 3975 bz,pn %ncc, 2f 3976 add %i0, 64, %i0 3977 ba,a,pt %ncc, copyin_seg0 3978 39790: 3980 FALIGN_D16 3981 stda %d48, [%i0]ASI_BLK_P 3982 add %i0, 64, %i0 3983 membar #Sync 3984 FALIGN_D32 3985 stda %d48, [%i0]ASI_BLK_P 3986 ba,pt %ncc, copyin_blkd0 3987 add %i0, 64, %i0 3988 39891: 3990 FALIGN_D32 3991 stda %d48, [%i0]ASI_BLK_P 3992 add %i0, 64, %i0 3993 membar #Sync 3994 FALIGN_D0 3995 stda %d48, [%i0]ASI_BLK_P 3996 ba,pt %ncc, copyin_blkd16 3997 add %i0, 64, %i0 3998 39992: 4000 FALIGN_D0 4001 stda %d48, [%i0]ASI_BLK_P 4002 add %i0, 64, %i0 4003 membar #Sync 4004 FALIGN_D16 4005 stda %d48, [%i0]ASI_BLK_P 4006 ba,pt %ncc, copyin_blkd32 4007 add %i0, 64, %i0 4008 4009copyin_seg1: 4010 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 4011 FALIGN_D2 4012 ldda [%l7]ASI_BLK_AIUS, %d0 4013 stda %d48, [%i0]ASI_BLK_P 4014 add %l7, 64, %l7 4015 subcc %i3, 64, %i3 4016 bz,pn %ncc, 0f 4017 add %i0, 64, %i0 4018 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 4019 FALIGN_D18 4020 ldda [%l7]ASI_BLK_AIUS, %d16 4021 stda %d48, [%i0]ASI_BLK_P 4022 add %l7, 64, %l7 4023 subcc %i3, 64, %i3 4024 bz,pn %ncc, 1f 4025 add %i0, 64, %i0 4026 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 4027 FALIGN_D34 4028 ldda [%l7]ASI_BLK_AIUS, %d32 4029 stda %d48, [%i0]ASI_BLK_P 4030 add %l7, 64, %l7 4031 subcc %i3, 64, %i3 4032 bz,pn %ncc, 2f 4033 add %i0, 64, %i0 4034 ba,a,pt %ncc, copyin_seg1 40350: 4036 FALIGN_D18 4037 stda %d48, [%i0]ASI_BLK_P 4038 add %i0, 64, %i0 4039 membar #Sync 4040 FALIGN_D34 4041 stda %d48, [%i0]ASI_BLK_P 4042 ba,pt %ncc, copyin_blkd2 4043 add %i0, 64, %i0 4044 40451: 4046 FALIGN_D34 4047 stda %d48, [%i0]ASI_BLK_P 4048 add %i0, 64, %i0 4049 membar #Sync 4050 FALIGN_D2 4051 stda %d48, [%i0]ASI_BLK_P 4052 ba,pt %ncc, copyin_blkd18 4053 add %i0, 64, %i0 4054 40552: 4056 FALIGN_D2 4057 stda %d48, [%i0]ASI_BLK_P 4058 add %i0, 64, %i0 4059 membar #Sync 4060 FALIGN_D18 4061 stda %d48, [%i0]ASI_BLK_P 4062 ba,pt %ncc, copyin_blkd34 4063 add %i0, 64, %i0 4064copyin_seg2: 4065 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 4066 FALIGN_D4 4067 ldda [%l7]ASI_BLK_AIUS, %d0 4068 stda %d48, [%i0]ASI_BLK_P 4069 add %l7, 64, %l7 4070 subcc %i3, 64, %i3 4071 bz,pn %ncc, 0f 4072 add %i0, 64, %i0 4073 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 4074 FALIGN_D20 4075 ldda [%l7]ASI_BLK_AIUS, %d16 4076 stda %d48, [%i0]ASI_BLK_P 4077 add %l7, 64, %l7 4078 subcc %i3, 64, %i3 4079 bz,pn %ncc, 1f 4080 add %i0, 64, %i0 4081 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 4082 FALIGN_D36 4083 ldda [%l7]ASI_BLK_AIUS, %d32 4084 stda %d48, [%i0]ASI_BLK_P 4085 add %l7, 64, %l7 4086 subcc %i3, 64, %i3 4087 bz,pn %ncc, 2f 4088 add %i0, 64, %i0 4089 ba,a,pt %ncc, copyin_seg2 4090 40910: 4092 FALIGN_D20 4093 stda %d48, [%i0]ASI_BLK_P 4094 add %i0, 64, %i0 4095 membar #Sync 4096 FALIGN_D36 4097 stda %d48, [%i0]ASI_BLK_P 4098 ba,pt %ncc, copyin_blkd4 4099 add %i0, 64, %i0 4100 41011: 4102 FALIGN_D36 4103 stda %d48, [%i0]ASI_BLK_P 4104 add %i0, 64, %i0 4105 membar #Sync 4106 FALIGN_D4 4107 stda %d48, [%i0]ASI_BLK_P 4108 ba,pt %ncc, copyin_blkd20 4109 add %i0, 64, %i0 4110 41112: 4112 FALIGN_D4 4113 stda %d48, [%i0]ASI_BLK_P 4114 add %i0, 64, %i0 4115 membar #Sync 4116 FALIGN_D20 4117 stda %d48, [%i0]ASI_BLK_P 4118 ba,pt %ncc, copyin_blkd36 4119 add %i0, 64, %i0 4120 4121copyin_seg3: 4122 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 4123 FALIGN_D6 4124 ldda [%l7]ASI_BLK_AIUS, %d0 4125 stda %d48, [%i0]ASI_BLK_P 4126 add %l7, 64, %l7 4127 subcc %i3, 64, %i3 4128 bz,pn %ncc, 0f 4129 add %i0, 64, %i0 4130 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 4131 FALIGN_D22 4132 ldda [%l7]ASI_BLK_AIUS, %d16 4133 stda %d48, [%i0]ASI_BLK_P 4134 add %l7, 64, %l7 4135 subcc %i3, 64, %i3 4136 bz,pn %ncc, 1f 4137 add %i0, 64, %i0 4138 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 4139 FALIGN_D38 4140 ldda [%l7]ASI_BLK_AIUS, %d32 4141 stda %d48, [%i0]ASI_BLK_P 4142 add %l7, 64, %l7 4143 subcc %i3, 64, %i3 4144 bz,pn %ncc, 2f 4145 add %i0, 64, %i0 4146 ba,a,pt %ncc, copyin_seg3 4147 41480: 4149 FALIGN_D22 4150 stda %d48, [%i0]ASI_BLK_P 4151 add %i0, 64, %i0 4152 membar #Sync 4153 FALIGN_D38 4154 stda %d48, [%i0]ASI_BLK_P 4155 ba,pt %ncc, copyin_blkd6 4156 add %i0, 64, %i0 4157 41581: 4159 FALIGN_D38 4160 stda %d48, [%i0]ASI_BLK_P 4161 add %i0, 64, %i0 4162 membar #Sync 4163 FALIGN_D6 4164 stda %d48, [%i0]ASI_BLK_P 4165 ba,pt %ncc, copyin_blkd22 4166 add %i0, 64, %i0 4167 41682: 4169 FALIGN_D6 4170 stda %d48, [%i0]ASI_BLK_P 4171 add %i0, 64, %i0 4172 membar #Sync 4173 FALIGN_D22 4174 stda %d48, [%i0]ASI_BLK_P 4175 ba,pt %ncc, copyin_blkd38 4176 add %i0, 64, %i0 4177 4178copyin_seg4: 4179 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 4180 FALIGN_D8 4181 ldda [%l7]ASI_BLK_AIUS, %d0 4182 stda %d48, [%i0]ASI_BLK_P 4183 add %l7, 64, %l7 4184 subcc %i3, 64, %i3 4185 bz,pn %ncc, 0f 4186 add %i0, 64, %i0 4187 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 4188 FALIGN_D24 4189 ldda [%l7]ASI_BLK_AIUS, %d16 4190 stda %d48, [%i0]ASI_BLK_P 4191 add %l7, 64, %l7 4192 subcc %i3, 64, %i3 4193 bz,pn %ncc, 1f 4194 add %i0, 64, %i0 4195 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 4196 FALIGN_D40 4197 ldda [%l7]ASI_BLK_AIUS, %d32 4198 stda %d48, [%i0]ASI_BLK_P 4199 add %l7, 64, %l7 4200 subcc %i3, 64, %i3 4201 bz,pn %ncc, 2f 4202 add %i0, 64, %i0 4203 ba,a,pt %ncc, copyin_seg4 4204 42050: 4206 FALIGN_D24 4207 stda %d48, [%i0]ASI_BLK_P 4208 add %i0, 64, %i0 4209 membar #Sync 4210 FALIGN_D40 4211 stda %d48, [%i0]ASI_BLK_P 4212 ba,pt %ncc, copyin_blkd8 4213 add %i0, 64, %i0 4214 42151: 4216 FALIGN_D40 4217 stda %d48, [%i0]ASI_BLK_P 4218 add %i0, 64, %i0 4219 membar #Sync 4220 FALIGN_D8 4221 stda %d48, [%i0]ASI_BLK_P 4222 ba,pt %ncc, copyin_blkd24 4223 add %i0, 64, %i0 4224 42252: 4226 FALIGN_D8 4227 stda %d48, [%i0]ASI_BLK_P 4228 add %i0, 64, %i0 4229 membar #Sync 4230 FALIGN_D24 4231 stda %d48, [%i0]ASI_BLK_P 4232 ba,pt %ncc, copyin_blkd40 4233 add %i0, 64, %i0 4234 4235copyin_seg5: 4236 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 4237 FALIGN_D10 4238 ldda [%l7]ASI_BLK_AIUS, %d0 4239 stda %d48, [%i0]ASI_BLK_P 4240 add %l7, 64, %l7 4241 subcc %i3, 64, %i3 4242 bz,pn %ncc, 0f 4243 add %i0, 64, %i0 4244 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 4245 FALIGN_D26 4246 ldda [%l7]ASI_BLK_AIUS, %d16 4247 stda %d48, [%i0]ASI_BLK_P 4248 add %l7, 64, %l7 4249 subcc %i3, 64, %i3 4250 bz,pn %ncc, 1f 4251 add %i0, 64, %i0 4252 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 4253 FALIGN_D42 4254 ldda [%l7]ASI_BLK_AIUS, %d32 4255 stda %d48, [%i0]ASI_BLK_P 4256 add %l7, 64, %l7 4257 subcc %i3, 64, %i3 4258 bz,pn %ncc, 2f 4259 add %i0, 64, %i0 4260 ba,a,pt %ncc, copyin_seg5 4261 42620: 4263 FALIGN_D26 4264 stda %d48, [%i0]ASI_BLK_P 4265 add %i0, 64, %i0 4266 membar #Sync 4267 FALIGN_D42 4268 stda %d48, [%i0]ASI_BLK_P 4269 ba,pt %ncc, copyin_blkd10 4270 add %i0, 64, %i0 4271 42721: 4273 FALIGN_D42 4274 stda %d48, [%i0]ASI_BLK_P 4275 add %i0, 64, %i0 4276 membar #Sync 4277 FALIGN_D10 4278 stda %d48, [%i0]ASI_BLK_P 4279 ba,pt %ncc, copyin_blkd26 4280 add %i0, 64, %i0 4281 42822: 4283 FALIGN_D10 4284 stda %d48, [%i0]ASI_BLK_P 4285 add %i0, 64, %i0 4286 membar #Sync 4287 FALIGN_D26 4288 stda %d48, [%i0]ASI_BLK_P 4289 ba,pt %ncc, copyin_blkd42 4290 add %i0, 64, %i0 4291 4292copyin_seg6: 4293 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 4294 FALIGN_D12 4295 ldda [%l7]ASI_BLK_AIUS, %d0 4296 stda %d48, [%i0]ASI_BLK_P 4297 add %l7, 64, %l7 4298 subcc %i3, 64, %i3 4299 bz,pn %ncc, 0f 4300 add %i0, 64, %i0 4301 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 4302 FALIGN_D28 4303 ldda [%l7]ASI_BLK_AIUS, %d16 4304 stda %d48, [%i0]ASI_BLK_P 4305 add %l7, 64, %l7 4306 subcc %i3, 64, %i3 4307 bz,pn %ncc, 1f 4308 add %i0, 64, %i0 4309 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 4310 FALIGN_D44 4311 ldda [%l7]ASI_BLK_AIUS, %d32 4312 stda %d48, [%i0]ASI_BLK_P 4313 add %l7, 64, %l7 4314 subcc %i3, 64, %i3 4315 bz,pn %ncc, 2f 4316 add %i0, 64, %i0 4317 ba,a,pt %ncc, copyin_seg6 4318 43190: 4320 FALIGN_D28 4321 stda %d48, [%i0]ASI_BLK_P 4322 add %i0, 64, %i0 4323 membar #Sync 4324 FALIGN_D44 4325 stda %d48, [%i0]ASI_BLK_P 4326 ba,pt %ncc, copyin_blkd12 4327 add %i0, 64, %i0 4328 43291: 4330 FALIGN_D44 4331 stda %d48, [%i0]ASI_BLK_P 4332 add %i0, 64, %i0 4333 membar #Sync 4334 FALIGN_D12 4335 stda %d48, [%i0]ASI_BLK_P 4336 ba,pt %ncc, copyin_blkd28 4337 add %i0, 64, %i0 4338 43392: 4340 FALIGN_D12 4341 stda %d48, [%i0]ASI_BLK_P 4342 add %i0, 64, %i0 4343 membar #Sync 4344 FALIGN_D28 4345 stda %d48, [%i0]ASI_BLK_P 4346 ba,pt %ncc, copyin_blkd44 4347 add %i0, 64, %i0 4348 4349copyin_seg7: 4350 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst 4351 FALIGN_D14 4352 ldda [%l7]ASI_BLK_AIUS, %d0 4353 stda %d48, [%i0]ASI_BLK_P 4354 add %l7, 64, %l7 4355 subcc %i3, 64, %i3 4356 bz,pn %ncc, 0f 4357 add %i0, 64, %i0 4358 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst 4359 FALIGN_D30 4360 ldda [%l7]ASI_BLK_AIUS, %d16 4361 stda %d48, [%i0]ASI_BLK_P 4362 add %l7, 64, %l7 4363 subcc %i3, 64, %i3 4364 bz,pn %ncc, 1f 4365 add %i0, 64, %i0 4366 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst 4367 FALIGN_D46 4368 ldda [%l7]ASI_BLK_AIUS, %d32 4369 stda %d48, [%i0]ASI_BLK_P 4370 add %l7, 64, %l7 4371 subcc %i3, 64, %i3 4372 bz,pn %ncc, 2f 4373 add %i0, 64, %i0 4374 ba,a,pt %ncc, copyin_seg7 4375 43760: 4377 FALIGN_D30 4378 stda %d48, [%i0]ASI_BLK_P 4379 add %i0, 64, %i0 4380 membar #Sync 4381 FALIGN_D46 4382 stda %d48, [%i0]ASI_BLK_P 4383 ba,pt %ncc, copyin_blkd14 4384 add %i0, 64, %i0 4385 43861: 4387 FALIGN_D46 4388 stda %d48, [%i0]ASI_BLK_P 4389 add %i0, 64, %i0 4390 membar #Sync 4391 FALIGN_D14 4392 stda %d48, [%i0]ASI_BLK_P 4393 ba,pt %ncc, copyin_blkd30 4394 add %i0, 64, %i0 4395 43962: 4397 FALIGN_D14 4398 stda %d48, [%i0]ASI_BLK_P 4399 add %i0, 64, %i0 4400 membar #Sync 4401 FALIGN_D30 4402 stda %d48, [%i0]ASI_BLK_P 4403 ba,pt %ncc, copyin_blkd46 4404 add %i0, 64, %i0 4405 4406 4407 ! 4408 ! dribble out the last partial block 4409 ! 4410copyin_blkd0: 4411 subcc %i4, 8, %i4 4412 blu,pn %ncc, copyin_blkdone 4413 faligndata %d0, %d2, %d48 4414 std %d48, [%i0] 4415 add %i0, 8, %i0 4416copyin_blkd2: 4417 subcc %i4, 8, %i4 4418 blu,pn %ncc, copyin_blkdone 4419 faligndata %d2, %d4, %d48 4420 std %d48, [%i0] 4421 add %i0, 8, %i0 4422copyin_blkd4: 4423 subcc %i4, 8, %i4 4424 blu,pn %ncc, copyin_blkdone 4425 faligndata %d4, %d6, %d48 4426 std %d48, [%i0] 4427 add %i0, 8, %i0 4428copyin_blkd6: 4429 subcc %i4, 8, %i4 4430 blu,pn %ncc, copyin_blkdone 4431 faligndata %d6, %d8, %d48 4432 std %d48, [%i0] 4433 add %i0, 8, %i0 4434copyin_blkd8: 4435 subcc %i4, 8, %i4 4436 blu,pn %ncc, copyin_blkdone 4437 faligndata %d8, %d10, %d48 4438 std %d48, [%i0] 4439 add %i0, 8, %i0 4440copyin_blkd10: 4441 subcc %i4, 8, %i4 4442 blu,pn %ncc, copyin_blkdone 4443 faligndata %d10, %d12, %d48 4444 std %d48, [%i0] 4445 add %i0, 8, %i0 4446copyin_blkd12: 4447 subcc %i4, 8, %i4 4448 blu,pn %ncc, copyin_blkdone 4449 faligndata %d12, %d14, %d48 4450 std %d48, [%i0] 4451 add %i0, 8, %i0 4452copyin_blkd14: 4453 subcc %i4, 8, %i4 4454 blu,pn %ncc, copyin_blkdone 4455 fsrc1 %d14, %d0 4456 ba,a,pt %ncc, copyin_blkleft 4457 4458copyin_blkd16: 4459 subcc %i4, 8, %i4 4460 blu,pn %ncc, copyin_blkdone 4461 faligndata %d16, %d18, %d48 4462 std %d48, [%i0] 4463 add %i0, 8, %i0 4464copyin_blkd18: 4465 subcc %i4, 8, %i4 4466 blu,pn %ncc, copyin_blkdone 4467 faligndata %d18, %d20, %d48 4468 std %d48, [%i0] 4469 add %i0, 8, %i0 4470copyin_blkd20: 4471 subcc %i4, 8, %i4 4472 blu,pn %ncc, copyin_blkdone 4473 faligndata %d20, %d22, %d48 4474 std %d48, [%i0] 4475 add %i0, 8, %i0 4476copyin_blkd22: 4477 subcc %i4, 8, %i4 4478 blu,pn %ncc, copyin_blkdone 4479 faligndata %d22, %d24, %d48 4480 std %d48, [%i0] 4481 add %i0, 8, %i0 4482copyin_blkd24: 4483 subcc %i4, 8, %i4 4484 blu,pn %ncc, copyin_blkdone 4485 faligndata %d24, %d26, %d48 4486 std %d48, [%i0] 4487 add %i0, 8, %i0 4488copyin_blkd26: 4489 subcc %i4, 8, %i4 4490 blu,pn %ncc, copyin_blkdone 4491 faligndata %d26, %d28, %d48 4492 std %d48, [%i0] 4493 add %i0, 8, %i0 4494copyin_blkd28: 4495 subcc %i4, 8, %i4 4496 blu,pn %ncc, copyin_blkdone 4497 faligndata %d28, %d30, %d48 4498 std %d48, [%i0] 4499 add %i0, 8, %i0 4500copyin_blkd30: 4501 subcc %i4, 8, %i4 4502 blu,pn %ncc, copyin_blkdone 4503 fsrc1 %d30, %d0 4504 ba,a,pt %ncc, copyin_blkleft 4505copyin_blkd32: 4506 subcc %i4, 8, %i4 4507 blu,pn %ncc, copyin_blkdone 4508 faligndata %d32, %d34, %d48 4509 std %d48, [%i0] 4510 add %i0, 8, %i0 4511copyin_blkd34: 4512 subcc %i4, 8, %i4 4513 blu,pn %ncc, copyin_blkdone 4514 faligndata %d34, %d36, %d48 4515 std %d48, [%i0] 4516 add %i0, 8, %i0 4517copyin_blkd36: 4518 subcc %i4, 8, %i4 4519 blu,pn %ncc, copyin_blkdone 4520 faligndata %d36, %d38, %d48 4521 std %d48, [%i0] 4522 add %i0, 8, %i0 4523copyin_blkd38: 4524 subcc %i4, 8, %i4 4525 blu,pn %ncc, copyin_blkdone 4526 faligndata %d38, %d40, %d48 4527 std %d48, [%i0] 4528 add %i0, 8, %i0 4529copyin_blkd40: 4530 subcc %i4, 8, %i4 4531 blu,pn %ncc, copyin_blkdone 4532 faligndata %d40, %d42, %d48 4533 std %d48, [%i0] 4534 add %i0, 8, %i0 4535copyin_blkd42: 4536 subcc %i4, 8, %i4 4537 blu,pn %ncc, copyin_blkdone 4538 faligndata %d42, %d44, %d48 4539 std %d48, [%i0] 4540 add %i0, 8, %i0 4541copyin_blkd44: 4542 subcc %i4, 8, %i4 4543 blu,pn %ncc, copyin_blkdone 4544 faligndata %d44, %d46, %d48 4545 std %d48, [%i0] 4546 add %i0, 8, %i0 4547copyin_blkd46: 4548 subcc %i4, 8, %i4 4549 blu,pn %ncc, copyin_blkdone 4550 fsrc1 %d46, %d0 4551 4552copyin_blkleft: 45531: 4554 ldda [%l7]ASI_USER, %d2 4555 add %l7, 8, %l7 4556 subcc %i4, 8, %i4 4557 faligndata %d0, %d2, %d8 4558 std %d8, [%i0] 4559 blu,pn %ncc, copyin_blkdone 4560 add %i0, 8, %i0 4561 ldda [%l7]ASI_USER, %d0 4562 add %l7, 8, %l7 4563 subcc %i4, 8, %i4 4564 faligndata %d2, %d0, %d8 4565 std %d8, [%i0] 4566 bgeu,pt %ncc, 1b 4567 add %i0, 8, %i0 4568 4569copyin_blkdone: 4570 tst %i2 4571 bz,pt %ncc, .copyin_exit 4572 and %l3, 0x4, %l3 ! fprs.du = fprs.dl = 0 4573 45747: lduba [%i1]ASI_USER, %i4 4575 inc %i1 4576 inc %i0 4577 deccc %i2 4578 bgu %ncc, 7b 4579 stb %i4, [%i0 - 1] 4580 4581.copyin_exit: 4582 membar #StoreLoad|#StoreStore 4583 btst FPUSED_FLAG, SAVED_LOFAULT 4584 bz %icc, 1f 4585 nop 4586 4587 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr 4588 wr %o2, 0, %gsr 4589 4590 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 4591 btst FPRS_FEF, %o3 4592 bz %icc, 4f 4593 nop 4594 4595 ! restore fpregs from stack 4596 membar #Sync 4597 add %fp, STACK_BIAS - 257, %o2 4598 and %o2, -64, %o2 4599 ldda [%o2]ASI_BLK_P, %d0 4600 add %o2, 64, %o2 4601 ldda [%o2]ASI_BLK_P, %d16 4602 add %o2, 64, %o2 4603 ldda [%o2]ASI_BLK_P, %d32 4604 add %o2, 64, %o2 4605 ldda [%o2]ASI_BLK_P, %d48 4606 membar #Sync 4607 4608 ba,pt %ncc, 1f 4609 wr %o3, 0, %fprs ! restore fprs 4610 46114: 4612 FZERO ! zero all of the fpregs 4613 wr %o3, 0, %fprs ! restore fprs 4614 46151: 4616 andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 4617 membar #Sync ! sync error barrier 4618 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 4619 ret 4620 restore %g0, 0, %o0 4621.copyin_err: 4622 ldn [THREAD_REG + T_COPYOPS], %o4 4623 brz %o4, 2f 4624 nop 4625 ldn [%o4 + CP_COPYIN], %g2 4626 jmp %g2 4627 nop 46282: 4629 retl 4630 mov -1, %o0 4631 SET_SIZE(copyin) 4632 4633#endif /* lint */ 4634 4635#ifdef lint 4636 4637/*ARGSUSED*/ 4638int 4639xcopyin(const void *uaddr, void *kaddr, size_t count) 4640{ return (0); } 4641 4642#else /* lint */ 4643 4644 ENTRY(xcopyin) 4645 sethi %hi(.xcopyin_err), REAL_LOFAULT 4646 b .do_copyin 4647 or REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT 4648.xcopyin_err: 4649 ldn [THREAD_REG + T_COPYOPS], %o4 4650 brz %o4, 2f 4651 nop 4652 ldn [%o4 + CP_XCOPYIN], %g2 4653 jmp %g2 4654 nop 46552: 4656 retl 4657 mov %g1, %o0 4658 SET_SIZE(xcopyin) 4659 4660#endif /* lint */ 4661 4662#ifdef lint 4663 4664/*ARGSUSED*/ 4665int 4666xcopyin_little(const void *uaddr, void *kaddr, size_t count) 4667{ return (0); } 4668 4669#else /* lint */ 4670 4671 ENTRY(xcopyin_little) 4672 sethi %hi(.little_err), %o4 4673 ldn [THREAD_REG + T_LOFAULT], %o5 4674 or %o4, %lo(.little_err), %o4 4675 membar #Sync ! sync error barrier 4676 stn %o4, [THREAD_REG + T_LOFAULT] 4677 4678 subcc %g0, %o2, %o3 4679 add %o0, %o2, %o0 4680 bz,pn %ncc, 2f ! check for zero bytes 4681 sub %o2, 1, %o4 4682 add %o0, %o4, %o0 ! start w/last byte 4683 add %o1, %o2, %o1 4684 lduba [%o0+%o3]ASI_AIUSL, %o4 4685 46861: stb %o4, [%o1+%o3] 4687 inccc %o3 4688 sub %o0, 2, %o0 ! get next byte 4689 bcc,a,pt %ncc, 1b 4690 lduba [%o0+%o3]ASI_AIUSL, %o4 4691 46922: membar #Sync ! sync error barrier 4693 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 4694 retl 4695 mov %g0, %o0 ! return (0) 4696 4697.little_err: 4698 membar #Sync ! sync error barrier 4699 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 4700 retl 4701 mov %g1, %o0 4702 SET_SIZE(xcopyin_little) 4703 4704#endif /* lint */ 4705 4706 4707/* 4708 * Copy a block of storage - must not overlap (from + len <= to). 4709 * No fault handler installed (to be called under on_fault()) 4710 */ 4711#if defined(lint) 4712 4713/* ARGSUSED */ 4714void 4715copyin_noerr(const void *ufrom, void *kto, size_t count) 4716{} 4717 4718#else /* lint */ 4719 4720 ENTRY(copyin_noerr) 4721 sethi %hi(.copyio_noerr), REAL_LOFAULT 4722 b .do_copyin 4723 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 4724.copyio_noerr: 4725 jmp SAVED_LOFAULT 4726 nop 4727 SET_SIZE(copyin_noerr) 4728 4729#endif /* lint */ 4730 4731/* 4732 * Copy a block of storage - must not overlap (from + len <= to). 4733 * No fault handler installed (to be called under on_fault()) 4734 */ 4735 4736#if defined(lint) 4737 4738/* ARGSUSED */ 4739void 4740copyout_noerr(const void *kfrom, void *uto, size_t count) 4741{} 4742 4743#else /* lint */ 4744 4745 ENTRY(copyout_noerr) 4746 sethi %hi(.copyio_noerr), REAL_LOFAULT 4747 b .do_copyout 4748 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 4749 SET_SIZE(copyout_noerr) 4750 4751#endif /* lint */ 4752 4753#if defined(lint) 4754 4755int use_hw_bcopy = 1; 4756int use_hw_copyio = 1; 4757int use_hw_bzero = 1; 4758uint_t hw_copy_limit_1 = 0; 4759uint_t hw_copy_limit_2 = 0; 4760uint_t hw_copy_limit_4 = 0; 4761uint_t hw_copy_limit_8 = 0; 4762 4763#else /* !lint */ 4764 4765 .align 4 4766 DGDEF(use_hw_bcopy) 4767 .word 1 4768 DGDEF(use_hw_copyio) 4769 .word 1 4770 DGDEF(use_hw_bzero) 4771 .word 1 4772 DGDEF(hw_copy_limit_1) 4773 .word 0 4774 DGDEF(hw_copy_limit_2) 4775 .word 0 4776 DGDEF(hw_copy_limit_4) 4777 .word 0 4778 DGDEF(hw_copy_limit_8) 4779 .word 0 4780 4781 .align 64 4782 .section ".text" 4783#endif /* !lint */ 4784 4785 4786/* 4787 * hwblkclr - clears block-aligned, block-multiple-sized regions that are 4788 * longer than 256 bytes in length using spitfire's block stores. If 4789 * the criteria for using this routine are not met then it calls bzero 4790 * and returns 1. Otherwise 0 is returned indicating success. 4791 * Caller is responsible for ensuring use_hw_bzero is true and that 4792 * kpreempt_disable() has been called. 4793 */ 4794#ifdef lint 4795/*ARGSUSED*/ 4796int 4797hwblkclr(void *addr, size_t len) 4798{ 4799 return(0); 4800} 4801#else /* lint */ 4802 ! %i0 - start address 4803 ! %i1 - length of region (multiple of 64) 4804 ! %l0 - saved fprs 4805 ! %l1 - pointer to saved %d0 block 4806 ! %l2 - saved curthread->t_lwp 4807 4808 ENTRY(hwblkclr) 4809 ! get another window w/space for one aligned block of saved fpregs 4810 save %sp, -SA(MINFRAME + 2*64), %sp 4811 4812 ! Must be block-aligned 4813 andcc %i0, (64-1), %g0 4814 bnz,pn %ncc, 1f 4815 nop 4816 4817 ! ... and must be 256 bytes or more 4818 cmp %i1, 256 4819 blu,pn %ncc, 1f 4820 nop 4821 4822 ! ... and length must be a multiple of 64 4823 andcc %i1, (64-1), %g0 4824 bz,pn %ncc, 2f 4825 nop 4826 48271: ! punt, call bzero but notify the caller that bzero was used 4828 mov %i0, %o0 4829 call bzero 4830 mov %i1, %o1 4831 ret 4832 restore %g0, 1, %o0 ! return (1) - did not use block operations 4833 48342: rd %fprs, %l0 ! check for unused fp 4835 btst FPRS_FEF, %l0 4836 bz 1f 4837 nop 4838 4839 ! save in-use fpregs on stack 4840 membar #Sync 4841 add %fp, STACK_BIAS - 65, %l1 4842 and %l1, -64, %l1 4843 stda %d0, [%l1]ASI_BLK_P 4844 48451: membar #StoreStore|#StoreLoad|#LoadStore 4846 wr %g0, FPRS_FEF, %fprs 4847 wr %g0, ASI_BLK_P, %asi 4848 4849 ! Clear block 4850 fzero %d0 4851 fzero %d2 4852 fzero %d4 4853 fzero %d6 4854 fzero %d8 4855 fzero %d10 4856 fzero %d12 4857 fzero %d14 4858 4859 mov 256, %i3 4860 ba .pz_doblock 4861 nop 4862 4863.pz_blkstart: 4864 ! stda %d0, [%i0+192]%asi ! in dly slot of branch that got us here 4865 stda %d0, [%i0+128]%asi 4866 stda %d0, [%i0+64]%asi 4867 stda %d0, [%i0]%asi 4868.pz_zinst: 4869 add %i0, %i3, %i0 4870 sub %i1, %i3, %i1 4871.pz_doblock: 4872 cmp %i1, 256 4873 bgeu,a %ncc, .pz_blkstart 4874 stda %d0, [%i0+192]%asi 4875 4876 cmp %i1, 64 4877 blu %ncc, .pz_finish 4878 4879 andn %i1, (64-1), %i3 4880 srl %i3, 4, %i2 ! using blocks, 1 instr / 16 words 4881 set .pz_zinst, %i4 4882 sub %i4, %i2, %i4 4883 jmp %i4 4884 nop 4885 4886.pz_finish: 4887 membar #Sync 4888 btst FPRS_FEF, %l0 4889 bz,a .pz_finished 4890 wr %l0, 0, %fprs ! restore fprs 4891 4892 ! restore fpregs from stack 4893 ldda [%l1]ASI_BLK_P, %d0 4894 membar #Sync 4895 wr %l0, 0, %fprs ! restore fprs 4896 4897.pz_finished: 4898 ret 4899 restore %g0, 0, %o0 ! return (bzero or not) 4900 SET_SIZE(hwblkclr) 4901#endif /* lint */ 4902 4903#ifdef lint 4904/* Copy 32 bytes of data from src to dst using physical addresses */ 4905/*ARGSUSED*/ 4906void 4907hw_pa_bcopy32(uint64_t src, uint64_t dst) 4908{} 4909#else /*!lint */ 4910 4911 /* 4912 * Copy 32 bytes of data from src (%o0) to dst (%o1) 4913 * using physical addresses. 4914 */ 4915 ENTRY_NP(hw_pa_bcopy32) 4916 rdpr %pstate, %g1 4917 andn %g1, PSTATE_IE, %g2 4918 wrpr %g0, %g2, %pstate 4919 4920 ldxa [%o0]ASI_MEM, %o2 4921 add %o0, 8, %o0 4922 ldxa [%o0]ASI_MEM, %o3 4923 add %o0, 8, %o0 4924 ldxa [%o0]ASI_MEM, %o4 4925 add %o0, 8, %o0 4926 ldxa [%o0]ASI_MEM, %o5 4927 stxa %o2, [%o1]ASI_MEM 4928 add %o1, 8, %o1 4929 stxa %o3, [%o1]ASI_MEM 4930 add %o1, 8, %o1 4931 stxa %o4, [%o1]ASI_MEM 4932 add %o1, 8, %o1 4933 stxa %o5, [%o1]ASI_MEM 4934 4935 membar #Sync 4936 retl 4937 wrpr %g0, %g1, %pstate 4938 SET_SIZE(hw_pa_bcopy32) 4939#endif /* lint */ 4940