1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26#pragma ident "%Z%%M% %I% %E% SMI" 27 28#include <sys/param.h> 29#include <sys/errno.h> 30#include <sys/asm_linkage.h> 31#include <sys/vtrace.h> 32#include <sys/machthread.h> 33#include <sys/clock.h> 34#include <sys/asi.h> 35#include <sys/fsr.h> 36#include <sys/privregs.h> 37#include <sys/machasi.h> 38#include <sys/niagaraasi.h> 39 40#if !defined(lint) 41#include "assym.h" 42#endif /* lint */ 43 44 45/* 46 * Pseudo-code to aid in understanding the control flow of the 47 * bcopy/kcopy routine. 48 * 49 * ! WARNING : <Register usage convention> 50 * ! In kcopy() the %o5, holds previous error handler and a flag 51 * ! LOFAULT_SET (low bits). The %o5 is null in bcopy(). 52 * ! The %o5 is not available for any other use. 53 * 54 * kcopy(): 55 * %o5 = curthread->t_lofault; ! save existing handler in %o5 56 * %o5 |= LOFAULT_SET; ! ORed with LOFAULT_SET flag 57 * curthread->t_lofault = .copyerr; 58 * Call bcopy(); 59 * 60 * bcopy(): 61 * if (length < 128) 62 * goto regular_copy; 63 * 64 * if (!use_hw_bcopy) 65 * goto regular_copy; 66 * 67 * blockcopy; 68 * restore t_lofault handler if came from kcopy(); 69 * 70 * regular_copy; 71 * restore t_lofault handler if came from kcopy(); 72 * 73 * In lofault handler: 74 * curthread->t_lofault = (%o5 & ~LOFAULT_SET); ! restore old t_lofault 75 * return (errno) 76 * 77 */ 78 79/* 80 * Less then or equal this number of bytes we will always copy byte-for-byte 81 */ 82#define SMALL_LIMIT 7 83 84/* 85 * LOFAULT_SET : Flag set by kzero and kcopy to indicate that t_lofault 86 * handler was set 87 */ 88#define LOFAULT_SET 2 89 90/* 91 * This define is to align data for the unaligned source cases. 92 * The data1, data2 and data3 is merged into data1 and data2. 93 * The data3 is preserved for next merge. 94 */ 95#define ALIGN_DATA(data1, data2, data3, lshift, rshift, tmp) \ 96 sllx data1, lshift, data1 ;\ 97 srlx data2, rshift, tmp ;\ 98 or data1, tmp, data1 ;\ 99 sllx data2, lshift, data2 ;\ 100 srlx data3, rshift, tmp ;\ 101 or data2, tmp, data2 102/* 103 * This macro is to align the data. Basically it merges 104 * data1 and data2 to form double word. 105 */ 106#define ALIGN_DATA_EW(data1, data2, lshift, rshift, tmp) \ 107 sllx data1, lshift, data1 ;\ 108 srlx data2, rshift, tmp ;\ 109 or data1, tmp, data1 110 111#if !defined(NIAGARA_IMPL) 112/* 113 * Flags set in the lower bits of the t_lofault address: 114 * FPUSED_FLAG: The FP registers were in use and must be restored 115 * BCOPY_FLAG: Set for bcopy calls, cleared for kcopy calls 116 * COPY_FLAGS: Both of the above 117 * 118 * Other flags: 119 * KPREEMPT_FLAG: kpreempt needs to be called 120 */ 121#define FPUSED_FLAG 1 122#define BCOPY_FLAG 2 123#define COPY_FLAGS (FPUSED_FLAG | BCOPY_FLAG) 124#define KPREEMPT_FLAG 4 125 126#define ALIGN_OFF_1_7 \ 127 faligndata %d0, %d2, %d48 ;\ 128 faligndata %d2, %d4, %d50 ;\ 129 faligndata %d4, %d6, %d52 ;\ 130 faligndata %d6, %d8, %d54 ;\ 131 faligndata %d8, %d10, %d56 ;\ 132 faligndata %d10, %d12, %d58 ;\ 133 faligndata %d12, %d14, %d60 ;\ 134 faligndata %d14, %d16, %d62 135 136#define ALIGN_OFF_8_15 \ 137 faligndata %d2, %d4, %d48 ;\ 138 faligndata %d4, %d6, %d50 ;\ 139 faligndata %d6, %d8, %d52 ;\ 140 faligndata %d8, %d10, %d54 ;\ 141 faligndata %d10, %d12, %d56 ;\ 142 faligndata %d12, %d14, %d58 ;\ 143 faligndata %d14, %d16, %d60 ;\ 144 faligndata %d16, %d18, %d62 145 146#define ALIGN_OFF_16_23 \ 147 faligndata %d4, %d6, %d48 ;\ 148 faligndata %d6, %d8, %d50 ;\ 149 faligndata %d8, %d10, %d52 ;\ 150 faligndata %d10, %d12, %d54 ;\ 151 faligndata %d12, %d14, %d56 ;\ 152 faligndata %d14, %d16, %d58 ;\ 153 faligndata %d16, %d18, %d60 ;\ 154 faligndata %d18, %d20, %d62 155 156#define ALIGN_OFF_24_31 \ 157 faligndata %d6, %d8, %d48 ;\ 158 faligndata %d8, %d10, %d50 ;\ 159 faligndata %d10, %d12, %d52 ;\ 160 faligndata %d12, %d14, %d54 ;\ 161 faligndata %d14, %d16, %d56 ;\ 162 faligndata %d16, %d18, %d58 ;\ 163 faligndata %d18, %d20, %d60 ;\ 164 faligndata %d20, %d22, %d62 165 166#define ALIGN_OFF_32_39 \ 167 faligndata %d8, %d10, %d48 ;\ 168 faligndata %d10, %d12, %d50 ;\ 169 faligndata %d12, %d14, %d52 ;\ 170 faligndata %d14, %d16, %d54 ;\ 171 faligndata %d16, %d18, %d56 ;\ 172 faligndata %d18, %d20, %d58 ;\ 173 faligndata %d20, %d22, %d60 ;\ 174 faligndata %d22, %d24, %d62 175 176#define ALIGN_OFF_40_47 \ 177 faligndata %d10, %d12, %d48 ;\ 178 faligndata %d12, %d14, %d50 ;\ 179 faligndata %d14, %d16, %d52 ;\ 180 faligndata %d16, %d18, %d54 ;\ 181 faligndata %d18, %d20, %d56 ;\ 182 faligndata %d20, %d22, %d58 ;\ 183 faligndata %d22, %d24, %d60 ;\ 184 faligndata %d24, %d26, %d62 185 186#define ALIGN_OFF_48_55 \ 187 faligndata %d12, %d14, %d48 ;\ 188 faligndata %d14, %d16, %d50 ;\ 189 faligndata %d16, %d18, %d52 ;\ 190 faligndata %d18, %d20, %d54 ;\ 191 faligndata %d20, %d22, %d56 ;\ 192 faligndata %d22, %d24, %d58 ;\ 193 faligndata %d24, %d26, %d60 ;\ 194 faligndata %d26, %d28, %d62 195 196#define ALIGN_OFF_56_63 \ 197 faligndata %d14, %d16, %d48 ;\ 198 faligndata %d16, %d18, %d50 ;\ 199 faligndata %d18, %d20, %d52 ;\ 200 faligndata %d20, %d22, %d54 ;\ 201 faligndata %d22, %d24, %d56 ;\ 202 faligndata %d24, %d26, %d58 ;\ 203 faligndata %d26, %d28, %d60 ;\ 204 faligndata %d28, %d30, %d62 205 206#define VIS_BLOCKSIZE 64 207 208/* 209 * Size of stack frame in order to accomodate a 64-byte aligned 210 * floating-point register save area and 2 64-bit temp locations. 211 * All copy functions use three quadrants of fp registers; to assure a 212 * block-aligned three block buffer in which to save we must reserve 213 * four blocks on stack. 214 * 215 * _______________________________________ <-- %fp + STACK_BIAS 216 * | We may need to preserve 3 quadrants | 217 * | of fp regs, but since we do so with | 218 * | BST/BLD we need room in which to | 219 * | align to VIS_BLOCKSIZE bytes. So | 220 * | this area is 4 * VIS_BLOCKSIZE. | <-- - SAVED_FPREGS_OFFSET 221 * |-------------------------------------| 222 * | 8 bytes to save %fprs | <-- - SAVED_FPRS_OFFSET 223 * |-------------------------------------| 224 * | 8 bytes to save %gsr | <-- - SAVED_GSR_OFFSET 225 * --------------------------------------- 226 */ 227#define HWCOPYFRAMESIZE ((VIS_BLOCKSIZE * (3 + 1)) + (2 * 8)) 228#define SAVED_FPREGS_OFFSET (VIS_BLOCKSIZE * 4) 229#define SAVED_FPREGS_ADJUST ((VIS_BLOCKSIZE * 3) + 1) 230#define SAVED_FPRS_OFFSET (SAVED_FPREGS_OFFSET + 8) 231#define SAVED_GSR_OFFSET (SAVED_FPRS_OFFSET + 8) 232 233/* 234 * In FP copies if we do not have preserved data to restore over 235 * the fp regs we used then we must zero those regs to avoid 236 * exposing portions of the data to later threads (data security). 237 */ 238#define FZERO \ 239 fzero %f0 ;\ 240 fzero %f2 ;\ 241 faddd %f0, %f2, %f4 ;\ 242 fmuld %f0, %f2, %f6 ;\ 243 faddd %f0, %f2, %f8 ;\ 244 fmuld %f0, %f2, %f10 ;\ 245 faddd %f0, %f2, %f12 ;\ 246 fmuld %f0, %f2, %f14 ;\ 247 faddd %f0, %f2, %f16 ;\ 248 fmuld %f0, %f2, %f18 ;\ 249 faddd %f0, %f2, %f20 ;\ 250 fmuld %f0, %f2, %f22 ;\ 251 faddd %f0, %f2, %f24 ;\ 252 fmuld %f0, %f2, %f26 ;\ 253 faddd %f0, %f2, %f28 ;\ 254 fmuld %f0, %f2, %f30 ;\ 255 faddd %f0, %f2, %f48 ;\ 256 fmuld %f0, %f2, %f50 ;\ 257 faddd %f0, %f2, %f52 ;\ 258 fmuld %f0, %f2, %f54 ;\ 259 faddd %f0, %f2, %f56 ;\ 260 fmuld %f0, %f2, %f58 ;\ 261 faddd %f0, %f2, %f60 ;\ 262 fmuld %f0, %f2, %f62 263 264/* 265 * Macros to save and restore fp registers to/from the stack. 266 * Used to save and restore in-use fp registers when we want to use FP. 267 */ 268#define BST_FP_TOSTACK(tmp1) \ 269 /* membar #Sync */ ;\ 270 add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\ 271 and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\ 272 stda %f0, [tmp1]ASI_BLK_P ;\ 273 add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 274 stda %f16, [tmp1]ASI_BLK_P ;\ 275 add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 276 stda %f48, [tmp1]ASI_BLK_P ;\ 277 membar #Sync 278 279#define BLD_FP_FROMSTACK(tmp1) \ 280 /* membar #Sync - provided at copy completion */ ;\ 281 add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\ 282 and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\ 283 ldda [tmp1]ASI_BLK_P, %f0 ;\ 284 add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 285 ldda [tmp1]ASI_BLK_P, %f16 ;\ 286 add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 287 ldda [tmp1]ASI_BLK_P, %f48 ;\ 288 membar #Sync 289#endif /* NIAGARA_IMPL */ 290 291/* 292 * Copy a block of storage, returning an error code if `from' or 293 * `to' takes a kernel pagefault which cannot be resolved. 294 * Returns errno value on pagefault error, 0 if all ok 295 */ 296 297#if defined(lint) 298 299/* ARGSUSED */ 300int 301kcopy(const void *from, void *to, size_t count) 302{ return(0); } 303 304#else /* lint */ 305 306 .seg ".text" 307 .align 4 308 309 ENTRY(kcopy) 310 311#if !defined(NIAGARA_IMPL) 312 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 313 sethi %hi(.copyerr), %l7 ! copyerr is lofault value 314 or %l7, %lo(.copyerr), %l7 315 ldn [THREAD_REG + T_LOFAULT], %o5 ! save existing handler 316 ! Note that we carefully do *not* flag the setting of 317 ! t_lofault. 318 membar #Sync ! sync error barrier 319 b .do_copy ! common code 320 stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault 321 322/* 323 * We got here because of a fault during kcopy or bcopy if a fault 324 * handler existed when bcopy was called. 325 * Errno value is in %g1. 326 */ 327.copyerr: 328 sethi %hi(.copyerr2), %l1 329 or %l1, %lo(.copyerr2), %l1 330 membar #Sync ! sync error barrier 331 stn %l1, [THREAD_REG + T_LOFAULT] ! set t_lofault 332 btst FPUSED_FLAG, %o5 333 bz,pt %xcc, 1f 334 and %o5, BCOPY_FLAG, %l1 ! copy flag to %l1 335 336 membar #Sync ! sync error barrier 337 ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr 338 wr %o2, 0, %gsr 339 340 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 341 btst FPRS_FEF, %o3 342 bz,pt %icc, 4f 343 nop 344 345 ! restore fpregs from stack 346 BLD_FP_FROMSTACK(%o2) 347 348 ba,pt %ncc, 2f 349 wr %o3, 0, %fprs ! restore fprs 350 3514: 352 FZERO 353 wr %o3, 0, %fprs ! restore fprs 354 3552: 356 ldn [THREAD_REG + T_LWP], %o2 357 brnz,pt %o2, 1f 358 nop 359 360 ldsb [THREAD_REG + T_PREEMPT], %l0 361 deccc %l0 362 bnz,pn %ncc, 1f 363 stb %l0, [THREAD_REG + T_PREEMPT] 364 365 ! Check for a kernel preemption request 366 ldn [THREAD_REG + T_CPU], %l0 367 ldub [%l0 + CPU_KPRUNRUN], %l0 368 brnz,a,pt %l0, 1f ! Need to call kpreempt? 369 or %l1, KPREEMPT_FLAG, %l1 ! If so, set the flag 370 371 ! The kcopy will always set a t_lofault handler. If it fires, 372 ! we're expected to just return the error code and not to 373 ! invoke any existing error handler. As far as bcopy is concerned, 374 ! we only set t_lofault if there was an existing lofault handler. 375 ! In that case we're expected to invoke the previously existing 376 ! handler after restting the t_lofault value. 3771: 378 andn %o5, COPY_FLAGS, %o5 ! remove flags from lofault address 379 membar #Sync ! sync error barrier 380 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 381 382 ! call kpreempt if necessary 383 btst KPREEMPT_FLAG, %l1 384 bz,pt %icc, 2f 385 nop 386 call kpreempt 387 rdpr %pil, %o0 ! pass %pil 3882: 389 btst BCOPY_FLAG, %l1 390 bnz,pn %ncc, 3f 391 nop 392 ret 393 restore %g1, 0, %o0 394 3953: 396 ! We're here via bcopy. There must have been an error handler 397 ! in place otherwise we would have died a nasty death already. 398 jmp %o5 ! goto real handler 399 restore %g0, 0, %o0 ! dispose of copy window 400 401/* 402 * We got here because of a fault in .copyerr. We can't safely restore fp 403 * state, so we panic. 404 */ 405fp_panic_msg: 406 .asciz "Unable to restore fp state after copy operation" 407 408 .align 4 409.copyerr2: 410 set fp_panic_msg, %o0 411 call panic 412 nop 413#else /* NIAGARA_IMPL */ 414 save %sp, -SA(MINFRAME), %sp 415 set .copyerr, %l7 ! copyerr is lofault value 416 ldn [THREAD_REG + T_LOFAULT], %o5 ! save existing handler 417 or %o5, LOFAULT_SET, %o5 418 membar #Sync ! sync error barrier 419 b .do_copy ! common code 420 stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault 421 422/* 423 * We got here because of a fault during kcopy. 424 * Errno value is in %g1. 425 */ 426.copyerr: 427 ! The kcopy() *always* sets a t_lofault handler and it ORs LOFAULT_SET 428 ! into %o5 to indicate it has set t_lofault handler. Need to clear 429 ! LOFAULT_SET flag before restoring the error handler. 430 andn %o5, LOFAULT_SET, %o5 431 membar #Sync ! sync error barrier 432 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 433 ret 434 restore %g1, 0, %o0 435#endif /* NIAGARA_IMPL */ 436 437 SET_SIZE(kcopy) 438#endif /* lint */ 439 440 441/* 442 * Copy a block of storage - must not overlap (from + len <= to). 443 */ 444#if defined(lint) 445 446/* ARGSUSED */ 447void 448bcopy(const void *from, void *to, size_t count) 449{} 450 451#else /* lint */ 452 453 ENTRY(bcopy) 454 455#if !defined(NIAGARA_IMPL) 456 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 457 ldn [THREAD_REG + T_LOFAULT], %o5 ! save existing handler 458 brz,pt %o5, .do_copy 459 nop 460 sethi %hi(.copyerr), %l7 ! copyerr is lofault value 461 or %l7, %lo(.copyerr), %l7 462 membar #Sync ! sync error barrier 463 stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault 464 ! We've already captured whether t_lofault was zero on entry. 465 ! We need to mark ourselves as being from bcopy since both 466 ! kcopy and bcopy use the same code path. If BCOPY_FLAG is 467 ! set and the saved lofault was zero, we won't reset lofault on 468 ! returning. 469 or %o5, BCOPY_FLAG, %o5 470#else /* NIAGARA_IMPL */ 471 save %sp, -SA(MINFRAME), %sp 472 clr %o5 ! flag LOFAULT_SET is not set for bcopy 473#endif /* NIAGARA_IMPL */ 474 475.do_copy: 476 cmp %i2, 12 ! for small counts 477 blu %ncc, .bytecp ! just copy bytes 478 .empty 479 480 cmp %i2, 128 ! for less than 128 bytes 481 blu,pn %ncc, .bcb_punt ! no block st/quad ld 482 nop 483 484 set use_hw_bcopy, %o2 485 ld [%o2], %o2 486 brz,pn %o2, .bcb_punt 487 nop 488 489 subcc %i1, %i0, %i3 490 bneg,a,pn %ncc, 1f 491 neg %i3 4921: 493 /* 494 * Compare against 256 since we should be checking block addresses 495 * and (dest & ~63) - (src & ~63) can be 3 blocks even if 496 * src = dest + (64 * 3) + 63. 497 */ 498 cmp %i3, 256 499 blu,pn %ncc, .bcb_punt 500 nop 501 502 /* 503 * Copy that reach here have at least 2 blocks of data to copy. 504 */ 505#if !defined(NIAGARA_IMPL) 506 ldn [THREAD_REG + T_LWP], %o3 507 brnz,pt %o3, 1f 508 nop 509 510 ! kpreempt_disable(); 511 ldsb [THREAD_REG + T_PREEMPT], %o2 512 inc %o2 513 stb %o2, [THREAD_REG + T_PREEMPT] 514 5151: 516 rd %fprs, %o2 ! check for unused fp 517 st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs 518 btst FPRS_FEF, %o2 519 bz,a,pt %icc, .do_blockcopy 520 wr %g0, FPRS_FEF, %fprs 521 522 ! save in-use fpregs on stack 523 BST_FP_TOSTACK(%o2) 524#endif /* NIAGARA_IMPL */ 525 526.do_blockcopy: 527 528#if !defined(NIAGARA_IMPL) 529 rd %gsr, %o2 530 stx %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr 531 or %o5, FPUSED_FLAG, %o5 ! fp regs are in use 532#endif /* NIAGARA_IMPL */ 533 534 ! Swap src/dst since the code below is memcpy code 535 ! and memcpy/bcopy have different calling sequences 536 mov %i1, %i5 537 mov %i0, %i1 538 mov %i5, %i0 539 540 ! Block (64 bytes) align the destination. 541 andcc %i0, 0x3f, %i3 ! is dst aligned on a 64 bytes 542 bz %xcc, .chksrc ! dst is already double aligned 543 sub %i3, 0x40, %i3 544 neg %i3 ! bytes till dst 64 bytes aligned 545 sub %i2, %i3, %i2 ! update i2 with new count 546 547 ! Based on source and destination alignment do 548 ! either 8 bytes, 4 bytes, 2 bytes or byte copy. 549 550 ! Is dst & src 8B aligned 551 or %i0, %i1, %o2 552 andcc %o2, 0x7, %g0 553 bz %ncc, .alewdcp 554 nop 555 556 ! Is dst & src 4B aligned 557 andcc %o2, 0x3, %g0 558 bz %ncc, .alwdcp 559 nop 560 561 ! Is dst & src 2B aligned 562 andcc %o2, 0x1, %g0 563 bz %ncc, .alhlfwdcp 564 nop 565 566 ! 1B aligned 5671: ldub [%i1], %o2 568 stb %o2, [%i0] 569 inc %i1 570 deccc %i3 571 bgu,pt %ncc, 1b 572 inc %i0 573 574 ba .chksrc 575 nop 576 577 ! dst & src 4B aligned 578.alwdcp: 579 ld [%i1], %o2 580 st %o2, [%i0] 581 add %i1, 0x4, %i1 582 subcc %i3, 0x4, %i3 583 bgu,pt %ncc, .alwdcp 584 add %i0, 0x4, %i0 585 586 ba .chksrc 587 nop 588 589 ! dst & src 2B aligned 590.alhlfwdcp: 591 lduh [%i1], %o2 592 stuh %o2, [%i0] 593 add %i1, 0x2, %i1 594 subcc %i3, 0x2, %i3 595 bgu,pt %ncc, .alhlfwdcp 596 add %i0, 0x2, %i0 597 598 ba .chksrc 599 nop 600 601 ! dst & src 8B aligned 602.alewdcp: 603 ldx [%i1], %o2 604 stx %o2, [%i0] 605 add %i1, 0x8, %i1 606 subcc %i3, 0x8, %i3 607 bgu,pt %ncc, .alewdcp 608 add %i0, 0x8, %i0 609 610 ! Now Destination is block (64 bytes) aligned 611.chksrc: 612 andn %i2, 0x3f, %i3 ! %i3 count is multiple of block size 613 sub %i2, %i3, %i2 ! Residue bytes in %i2 614 615 mov ASI_BLK_INIT_ST_QUAD_LDD_P, %asi 616 617#if !defined(NIAGARA_IMPL) 618 andn %i1, 0x3f, %l0 ! %l0 has block aligned src address 619 prefetch [%l0+0x0], #one_read 620 andcc %i1, 0x3f, %g0 ! is src 64B aligned 621 bz,pn %ncc, .blkcpy 622 nop 623 624 ! handle misaligned source cases 625 alignaddr %i1, %g0, %g0 ! generate %gsr 626 627 srl %i1, 0x3, %l1 ! src add bits 3, 4, 5 are now least 628 ! significant in %l1 629 andcc %l1, 0x7, %l2 ! mask everything except bits 1, 2, 3 630 add %i1, %i3, %i1 631 632 ! switch statement to get to right 8 byte block within 633 ! 64 byte block 634 cmp %l2, 0x4 635 bgeu,a hlf 636 cmp %l2, 0x6 637 cmp %l2, 0x2 638 bgeu,a sqtr 639 nop 640 cmp %l2, 0x1 641 be,a off15 642 nop 643 ba off7 644 nop 645sqtr: 646 be,a off23 647 nop 648 ba,a off31 649 nop 650 651hlf: 652 bgeu,a fqtr 653 nop 654 cmp %l2, 0x5 655 be,a off47 656 nop 657 ba off39 658 nop 659fqtr: 660 be,a off55 661 nop 662 663 ! Falls through when the source offset is greater than 56 664 ldd [%l0+0x38], %d14 665 prefetch [%l0+0x40], #one_read 666 prefetch [%l0+0x80], #one_read 6677: 668 add %l0, 0x40, %l0 669 stxa %g0, [%i0]%asi ! initialize the cache line 670 671 ldda [%l0]ASI_BLK_P, %d16 672 ALIGN_OFF_56_63 673 fmovd %d30, %d14 674 675 stda %d48, [%i0]ASI_BLK_P 676 subcc %i3, 0x40, %i3 677 add %i0, 0x40, %i0 678 bgu,pt %ncc, 7b 679 prefetch [%l0+0x80], #one_read 680 ba .blkdone 681 membar #Sync 682 683 ! This copy case for source offset between 1 and 7 684off7: 685 ldda [%l0]ASI_BLK_P, %d0 686 prefetch [%l0+0x40], #one_read 687 prefetch [%l0+0x80], #one_read 6880: 689 add %l0, 0x40, %l0 690 stxa %g0, [%i0]%asi ! initialize the cache line 691 692 ldda [%l0]ASI_BLK_P, %d16 693 ALIGN_OFF_1_7 694 fmovd %d16, %d0 695 fmovd %d18, %d2 696 fmovd %d20, %d4 697 fmovd %d22, %d6 698 fmovd %d24, %d8 699 fmovd %d26, %d10 700 fmovd %d28, %d12 701 fmovd %d30, %d14 702 703 stda %d48, [%i0]ASI_BLK_P 704 subcc %i3, 0x40, %i3 705 add %i0, 0x40, %i0 706 bgu,pt %ncc, 0b 707 prefetch [%l0+0x80], #one_read 708 ba .blkdone 709 membar #Sync 710 711 ! This copy case for source offset between 8 and 15 712off15: 713 ldd [%l0+0x8], %d2 714 ldd [%l0+0x10], %d4 715 ldd [%l0+0x18], %d6 716 ldd [%l0+0x20], %d8 717 ldd [%l0+0x28], %d10 718 ldd [%l0+0x30], %d12 719 ldd [%l0+0x38], %d14 720 prefetch [%l0+0x40], #one_read 721 prefetch [%l0+0x80], #one_read 7221: 723 add %l0, 0x40, %l0 724 stxa %g0, [%i0]%asi ! initialize the cache line 725 726 ldda [%l0]ASI_BLK_P, %d16 727 ALIGN_OFF_8_15 728 fmovd %d18, %d2 729 fmovd %d20, %d4 730 fmovd %d22, %d6 731 fmovd %d24, %d8 732 fmovd %d26, %d10 733 fmovd %d28, %d12 734 fmovd %d30, %d14 735 736 stda %d48, [%i0]ASI_BLK_P 737 subcc %i3, 0x40, %i3 738 add %i0, 0x40, %i0 739 bgu,pt %ncc, 1b 740 prefetch [%l0+0x80], #one_read 741 ba .blkdone 742 membar #Sync 743 744 ! This copy case for source offset between 16 and 23 745off23: 746 ldd [%l0+0x10], %d4 747 ldd [%l0+0x18], %d6 748 ldd [%l0+0x20], %d8 749 ldd [%l0+0x28], %d10 750 ldd [%l0+0x30], %d12 751 ldd [%l0+0x38], %d14 752 prefetch [%l0+0x40], #one_read 753 prefetch [%l0+0x80], #one_read 7542: 755 add %l0, 0x40, %l0 756 stxa %g0, [%i0]%asi ! initialize the cache line 757 758 ldda [%l0]ASI_BLK_P, %d16 759 ALIGN_OFF_16_23 760 fmovd %d20, %d4 761 fmovd %d22, %d6 762 fmovd %d24, %d8 763 fmovd %d26, %d10 764 fmovd %d28, %d12 765 fmovd %d30, %d14 766 767 stda %d48, [%i0]ASI_BLK_P 768 subcc %i3, 0x40, %i3 769 add %i0, 0x40, %i0 770 bgu,pt %ncc, 2b 771 prefetch [%l0+0x80], #one_read 772 ba .blkdone 773 membar #Sync 774 775 ! This copy case for source offset between 24 and 31 776off31: 777 ldd [%l0+0x18], %d6 778 ldd [%l0+0x20], %d8 779 ldd [%l0+0x28], %d10 780 ldd [%l0+0x30], %d12 781 ldd [%l0+0x38], %d14 782 prefetch [%l0+0x40], #one_read 783 prefetch [%l0+0x80], #one_read 7843: 785 add %l0, 0x40, %l0 786 stxa %g0, [%i0]%asi ! initialize the cache line 787 788 ldda [%l0]ASI_BLK_P, %d16 789 ALIGN_OFF_24_31 790 fmovd %d22, %d6 791 fmovd %d24, %d8 792 fmovd %d26, %d10 793 fmovd %d28, %d12 794 fmovd %d30, %d14 795 796 stda %d48, [%i0]ASI_BLK_P 797 subcc %i3, 0x40, %i3 798 add %i0, 0x40, %i0 799 bgu,pt %ncc, 3b 800 prefetch [%l0+0x80], #one_read 801 ba .blkdone 802 membar #Sync 803 804 ! This copy case for source offset between 32 and 39 805off39: 806 ldd [%l0+0x20], %d8 807 ldd [%l0+0x28], %d10 808 ldd [%l0+0x30], %d12 809 ldd [%l0+0x38], %d14 810 prefetch [%l0+0x40], #one_read 811 prefetch [%l0+0x80], #one_read 8124: 813 add %l0, 0x40, %l0 814 stxa %g0, [%i0]%asi ! initialize the cache line 815 816 ldda [%l0]ASI_BLK_P, %d16 817 ALIGN_OFF_32_39 818 fmovd %d24, %d8 819 fmovd %d26, %d10 820 fmovd %d28, %d12 821 fmovd %d30, %d14 822 823 stda %d48, [%i0]ASI_BLK_P 824 subcc %i3, 0x40, %i3 825 add %i0, 0x40, %i0 826 bgu,pt %ncc, 4b 827 prefetch [%l0+0x80], #one_read 828 ba .blkdone 829 membar #Sync 830 831 ! This copy case for source offset between 40 and 47 832off47: 833 ldd [%l0+0x28], %d10 834 ldd [%l0+0x30], %d12 835 ldd [%l0+0x38], %d14 836 prefetch [%l0+0x40], #one_read 837 prefetch [%l0+0x80], #one_read 8385: 839 add %l0, 0x40, %l0 840 stxa %g0, [%i0]%asi ! initialize the cache line 841 842 ldda [%l0]ASI_BLK_P, %d16 843 ALIGN_OFF_40_47 844 fmovd %d26, %d10 845 fmovd %d28, %d12 846 fmovd %d30, %d14 847 848 stda %d48, [%i0]ASI_BLK_P 849 subcc %i3, 0x40, %i3 850 add %i0, 0x40, %i0 851 bgu,pt %ncc, 5b 852 prefetch [%l0+0x80], #one_read 853 ba .blkdone 854 membar #Sync 855 856 ! This copy case for source offset between 48 and 55 857off55: 858 ldd [%l0+0x30], %d12 859 ldd [%l0+0x38], %d14 860 prefetch [%l0+0x40], #one_read 861 prefetch [%l0+0x80], #one_read 8626: 863 add %l0, 0x40, %l0 864 stxa %g0, [%i0]%asi ! initialize the cache line 865 866 ldda [%l0]ASI_BLK_P, %d16 867 ALIGN_OFF_48_55 868 fmovd %d28, %d12 869 fmovd %d30, %d14 870 871 stda %d48, [%i0]ASI_BLK_P 872 subcc %i3, 0x40, %i3 873 add %i0, 0x40, %i0 874 bgu,pt %ncc, 6b 875 prefetch [%l0+0x80], #one_read 876 ba .blkdone 877 membar #Sync 878 879 ! Both source and destination are block aligned. 880.blkcpy: 881 prefetch [%i1+0x40], #one_read 882 prefetch [%i1+0x80], #one_read 8838: 884 stxa %g0, [%i0]%asi ! initialize the cache line 885 ldda [%i1]ASI_BLK_P, %d0 886 stda %d0, [%i0]ASI_BLK_P 887 888 add %i1, 0x40, %i1 889 subcc %i3, 0x40, %i3 890 add %i0, 0x40, %i0 891 bgu,pt %ncc, 8b 892 prefetch [%i1+0x80], #one_read 893 membar #Sync 894 895.blkdone: 896#else /* NIAGARA_IMPL */ 897 andcc %i1, 0xf, %o2 ! is src quadword aligned 898 bz,pn %xcc, .blkcpy ! src offset in %o2 899 nop 900 cmp %o2, 0x8 901 bg .cpy_upper_double 902 nop 903 bl .cpy_lower_double 904 nop 905 906 ! Falls through when source offset is equal to 8 i.e. 907 ! source is double word aligned. 908 ! In this case no shift/merge of data is required 909 sub %i1, %o2, %i1 ! align the src at 16 bytes. 910 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 911 prefetch [%l0+0x0], #one_read 912 ldda [%i1+0x0]%asi, %l2 913loop0: 914 ldda [%i1+0x10]%asi, %l4 915 prefetch [%l0+0x40], #one_read 916 917 stxa %l3, [%i0+0x0]%asi 918 stxa %l4, [%i0+0x8]%asi 919 920 ldda [%i1+0x20]%asi, %l2 921 stxa %l5, [%i0+0x10]%asi 922 stxa %l2, [%i0+0x18]%asi 923 924 ldda [%i1+0x30]%asi, %l4 925 stxa %l3, [%i0+0x20]%asi 926 stxa %l4, [%i0+0x28]%asi 927 928 ldda [%i1+0x40]%asi, %l2 929 stxa %l5, [%i0+0x30]%asi 930 stxa %l2, [%i0+0x38]%asi 931 932 add %l0, 0x40, %l0 933 add %i1, 0x40, %i1 934 subcc %i3, 0x40, %i3 935 bgu,pt %xcc, loop0 936 add %i0, 0x40, %i0 937 ba .blkdone 938 add %i1, %o2, %i1 ! increment the source by src offset 939 ! the src offset was stored in %o2 940 941.cpy_lower_double: 942 sub %i1, %o2, %i1 ! align the src at 16 bytes. 943 sll %o2, 3, %o0 ! %o0 left shift 944 mov 0x40, %o1 945 sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 946 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 947 prefetch [%l0+0x0], #one_read 948 ldda [%i1+0x0]%asi, %l2 ! partial data in %l2 and %l3 has 949 ! complete data 950loop1: 951 ldda [%i1+0x10]%asi, %l4 ! %l4 has partial data for this read. 952 ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) ! merge %l2, %l3 and %l4 953 ! into %l2 and %l3 954 prefetch [%l0+0x40], #one_read 955 stxa %l2, [%i0+0x0]%asi 956 stxa %l3, [%i0+0x8]%asi 957 958 ldda [%i1+0x20]%asi, %l2 959 ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) ! merge %l2 with %l5 and 960 stxa %l4, [%i0+0x10]%asi ! %l4 from previous read 961 stxa %l5, [%i0+0x18]%asi ! into %l4 and %l5 962 963 ! Repeat the same for next 32 bytes. 964 965 ldda [%i1+0x30]%asi, %l4 966 ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) 967 stxa %l2, [%i0+0x20]%asi 968 stxa %l3, [%i0+0x28]%asi 969 970 ldda [%i1+0x40]%asi, %l2 971 ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) 972 stxa %l4, [%i0+0x30]%asi 973 stxa %l5, [%i0+0x38]%asi 974 975 add %l0, 0x40, %l0 976 add %i1, 0x40, %i1 977 subcc %i3, 0x40, %i3 978 bgu,pt %xcc, loop1 979 add %i0, 0x40, %i0 980 ba .blkdone 981 add %i1, %o2, %i1 ! increment the source by src offset 982 ! the src offset was stored in %o2 983 984.cpy_upper_double: 985 sub %i1, %o2, %i1 ! align the src at 16 bytes. 986 mov 0x8, %o0 987 sub %o2, %o0, %o0 988 sll %o0, 3, %o0 ! %o0 left shift 989 mov 0x40, %o1 990 sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 991 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 992 prefetch [%l0+0x0], #one_read 993 ldda [%i1+0x0]%asi, %l2 ! partial data in %l3 for this read and 994 ! no data in %l2 995loop2: 996 ldda [%i1+0x10]%asi, %l4 ! %l4 has complete data and %l5 has 997 ! partial 998 ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) ! merge %l3, %l4 and %l5 999 ! into %l3 and %l4 1000 prefetch [%l0+0x40], #one_read 1001 stxa %l3, [%i0+0x0]%asi 1002 stxa %l4, [%i0+0x8]%asi 1003 1004 ldda [%i1+0x20]%asi, %l2 1005 ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) ! merge %l2 and %l3 with 1006 stxa %l5, [%i0+0x10]%asi ! %l5 from previous read 1007 stxa %l2, [%i0+0x18]%asi ! into %l5 and %l2 1008 1009 ! Repeat the same for next 32 bytes. 1010 1011 ldda [%i1+0x30]%asi, %l4 1012 ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) 1013 stxa %l3, [%i0+0x20]%asi 1014 stxa %l4, [%i0+0x28]%asi 1015 1016 ldda [%i1+0x40]%asi, %l2 1017 ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) 1018 stxa %l5, [%i0+0x30]%asi 1019 stxa %l2, [%i0+0x38]%asi 1020 1021 add %l0, 0x40, %l0 1022 add %i1, 0x40, %i1 1023 subcc %i3, 0x40, %i3 1024 bgu,pt %xcc, loop2 1025 add %i0, 0x40, %i0 1026 ba .blkdone 1027 add %i1, %o2, %i1 ! increment the source by src offset 1028 ! the src offset was stored in %o2 1029 1030 1031 ! Both Source and Destination are block aligned. 1032 ! Do fast copy using ASI_BLK_INIT_ST_QUAD_LDD_P 1033.blkcpy: 1034 prefetch [%i1+0x0], #one_read 10351: 1036 ldda [%i1+0x0]%asi, %l0 1037 ldda [%i1+0x10]%asi, %l2 1038 prefetch [%i1+0x40], #one_read 1039 1040 stxa %l0, [%i0+0x0]%asi 1041 ldda [%i1+0x20]%asi, %l4 1042 ldda [%i1+0x30]%asi, %l6 1043 1044 stxa %l1, [%i0+0x8]%asi 1045 stxa %l2, [%i0+0x10]%asi 1046 stxa %l3, [%i0+0x18]%asi 1047 stxa %l4, [%i0+0x20]%asi 1048 stxa %l5, [%i0+0x28]%asi 1049 stxa %l6, [%i0+0x30]%asi 1050 stxa %l7, [%i0+0x38]%asi 1051 1052 add %i1, 0x40, %i1 1053 subcc %i3, 0x40, %i3 1054 bgu,pt %xcc, 1b 1055 add %i0, 0x40, %i0 1056 1057.blkdone: 1058 membar #Sync 1059#endif /* NIAGARA_IMPL */ 1060 1061 brz,pt %i2, .blkexit 1062 nop 1063 1064 ! Handle trailing bytes 1065 cmp %i2, 0x8 1066 blu,pt %ncc, .residue 1067 nop 1068 1069 ! Can we do some 8B ops 1070 or %i1, %i0, %o2 1071 andcc %o2, 0x7, %g0 1072 bnz %ncc, .last4 1073 nop 1074 1075 ! Do 8byte ops as long as possible 1076.last8: 1077 ldx [%i1], %o2 1078 stx %o2, [%i0] 1079 add %i1, 0x8, %i1 1080 sub %i2, 0x8, %i2 1081 cmp %i2, 0x8 1082 bgu,pt %ncc, .last8 1083 add %i0, 0x8, %i0 1084 1085 brz,pt %i2, .blkexit 1086 nop 1087 1088 ba .residue 1089 nop 1090 1091.last4: 1092 ! Can we do 4B ops 1093 andcc %o2, 0x3, %g0 1094 bnz %ncc, .last2 1095 nop 10961: 1097 ld [%i1], %o2 1098 st %o2, [%i0] 1099 add %i1, 0x4, %i1 1100 sub %i2, 0x4, %i2 1101 cmp %i2, 0x4 1102 bgu,pt %ncc, 1b 1103 add %i0, 0x4, %i0 1104 1105 brz,pt %i2, .blkexit 1106 nop 1107 1108 ba .residue 1109 nop 1110 1111.last2: 1112 ! Can we do 2B ops 1113 andcc %o2, 0x1, %g0 1114 bnz %ncc, .residue 1115 nop 1116 11171: 1118 lduh [%i1], %o2 1119 stuh %o2, [%i0] 1120 add %i1, 0x2, %i1 1121 sub %i2, 0x2, %i2 1122 cmp %i2, 0x2 1123 bgu,pt %ncc, 1b 1124 add %i0, 0x2, %i0 1125 1126 brz,pt %i2, .blkexit 1127 nop 1128 1129.residue: 1130 ldub [%i1], %o2 1131 stb %o2, [%i0] 1132 inc %i1 1133 deccc %i2 1134 bgu,pt %ncc, .residue 1135 inc %i0 1136 1137.blkexit: 1138#if !defined(NIAGARA_IMPL) 1139 btst FPUSED_FLAG, %o5 1140 bz %icc, 1f 1141 and %o5, COPY_FLAGS, %l1 ! Store flags in %l1 1142 ! We can't clear the flags from %o5 yet 1143 ! If there's an error, .copyerr will 1144 ! need them 1145 1146 ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr 1147 wr %o2, 0, %gsr 1148 1149 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 1150 btst FPRS_FEF, %o3 1151 bz,pt %icc, 4f 1152 nop 1153 1154 ! restore fpregs from stack 1155 BLD_FP_FROMSTACK(%o2) 1156 1157 ba,pt %ncc, 2f 1158 wr %o3, 0, %fprs ! restore fprs 1159 11604: 1161 FZERO 1162 wr %o3, 0, %fprs ! restore fprs 1163 11642: 1165 ldn [THREAD_REG + T_LWP], %o2 1166 brnz,pt %o2, 1f 1167 nop 1168 1169 ldsb [THREAD_REG + T_PREEMPT], %l0 1170 deccc %l0 1171 bnz,pn %ncc, 1f 1172 stb %l0, [THREAD_REG + T_PREEMPT] 1173 1174 ! Check for a kernel preemption request 1175 ldn [THREAD_REG + T_CPU], %l0 1176 ldub [%l0 + CPU_KPRUNRUN], %l0 1177 brnz,a,pt %l0, 1f ! Need to call kpreempt? 1178 or %l1, KPREEMPT_FLAG, %l1 ! If so, set the flag 1179 11801: 1181 btst BCOPY_FLAG, %l1 1182 bz,pn %icc, 3f 1183 andncc %o5, COPY_FLAGS, %o5 1184 1185 ! Here via bcopy. Check to see if the handler was NULL. 1186 ! If so, just return quietly. Otherwise, reset the 1187 ! handler and go home. 1188 bnz,pn %ncc, 3f 1189 nop 1190 1191 ! Null handler. 1192 btst KPREEMPT_FLAG, %l1 1193 bz,pt %icc, 2f 1194 nop 1195 call kpreempt 1196 rdpr %pil, %o0 ! pass %pil 11972: 1198 1199 ret 1200 restore %g0, 0, %o0 1201 1202 ! Here via kcopy or bcopy with a handler. 1203 ! Reset the fault handler. 12043: 1205 membar #Sync 1206 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1207 1208 ! call kpreempt if necessary 1209 btst KPREEMPT_FLAG, %l1 1210 bz,pt %icc, 4f 1211 nop 1212 call kpreempt 1213 rdpr %pil, %o0 12144: 1215#else /* NIAGARA_IMPL */ 1216 membar #Sync ! sync error barrier 1217 ! Restore t_lofault handler, if came here from kcopy(). 1218 tst %o5 1219 bz %ncc, 1f 1220 andn %o5, LOFAULT_SET, %o5 1221 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 12221: 1223#endif /* NIAGARA_IMPL */ 1224 ret 1225 restore %g0, 0, %o0 1226 1227.bcb_punt: 1228 ! 1229 ! use aligned transfers where possible 1230 ! 1231 xor %i0, %i1, %o4 ! xor from and to address 1232 btst 7, %o4 ! if lower three bits zero 1233 bz .aldoubcp ! can align on double boundary 1234 .empty ! assembler complaints about label 1235 1236 xor %i0, %i1, %o4 ! xor from and to address 1237 btst 3, %o4 ! if lower two bits zero 1238 bz .alwordcp ! can align on word boundary 1239 btst 3, %i0 ! delay slot, from address unaligned? 1240 ! 1241 ! use aligned reads and writes where possible 1242 ! this differs from wordcp in that it copes 1243 ! with odd alignment between source and destnation 1244 ! using word reads and writes with the proper shifts 1245 ! in between to align transfers to and from memory 1246 ! i0 - src address, i1 - dest address, i2 - count 1247 ! i3, i4 - tmps for used generating complete word 1248 ! i5 (word to write) 1249 ! l0 size in bits of upper part of source word (US) 1250 ! l1 size in bits of lower part of source word (LS = 32 - US) 1251 ! l2 size in bits of upper part of destination word (UD) 1252 ! l3 size in bits of lower part of destination word (LD = 32 - UD) 1253 ! l4 number of bytes leftover after aligned transfers complete 1254 ! l5 the number 32 1255 ! 1256 mov 32, %l5 ! load an oft-needed constant 1257 bz .align_dst_only 1258 btst 3, %i1 ! is destnation address aligned? 1259 clr %i4 ! clear registers used in either case 1260 bz .align_src_only 1261 clr %l0 1262 ! 1263 ! both source and destination addresses are unaligned 1264 ! 12651: ! align source 1266 ldub [%i0], %i3 ! read a byte from source address 1267 add %i0, 1, %i0 ! increment source address 1268 or %i4, %i3, %i4 ! or in with previous bytes (if any) 1269 btst 3, %i0 ! is source aligned? 1270 add %l0, 8, %l0 ! increment size of upper source (US) 1271 bnz,a 1b 1272 sll %i4, 8, %i4 ! make room for next byte 1273 1274 sub %l5, %l0, %l1 ! generate shift left count (LS) 1275 sll %i4, %l1, %i4 ! prepare to get rest 1276 ld [%i0], %i3 ! read a word 1277 add %i0, 4, %i0 ! increment source address 1278 srl %i3, %l0, %i5 ! upper src bits into lower dst bits 1279 or %i4, %i5, %i5 ! merge 1280 mov 24, %l3 ! align destination 12811: 1282 srl %i5, %l3, %i4 ! prepare to write a single byte 1283 stb %i4, [%i1] ! write a byte 1284 add %i1, 1, %i1 ! increment destination address 1285 sub %i2, 1, %i2 ! decrement count 1286 btst 3, %i1 ! is destination aligned? 1287 bnz,a 1b 1288 sub %l3, 8, %l3 ! delay slot, decrement shift count (LD) 1289 sub %l5, %l3, %l2 ! generate shift left count (UD) 1290 sll %i5, %l2, %i5 ! move leftover into upper bytes 1291 cmp %l2, %l0 ! cmp # reqd to fill dst w old src left 1292 bgu %ncc, .more_needed ! need more to fill than we have 1293 nop 1294 1295 sll %i3, %l1, %i3 ! clear upper used byte(s) 1296 srl %i3, %l1, %i3 1297 ! get the odd bytes between alignments 1298 sub %l0, %l2, %l0 ! regenerate shift count 1299 sub %l5, %l0, %l1 ! generate new shift left count (LS) 1300 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0 1301 andn %i2, 3, %i2 ! # of aligned bytes that can be moved 1302 srl %i3, %l0, %i4 1303 or %i5, %i4, %i5 1304 st %i5, [%i1] ! write a word 1305 subcc %i2, 4, %i2 ! decrement count 1306 bz %ncc, .unalign_out 1307 add %i1, 4, %i1 ! increment destination address 1308 1309 b 2f 1310 sll %i3, %l1, %i5 ! get leftover into upper bits 1311.more_needed: 1312 sll %i3, %l0, %i3 ! save remaining byte(s) 1313 srl %i3, %l0, %i3 1314 sub %l2, %l0, %l1 ! regenerate shift count 1315 sub %l5, %l1, %l0 ! generate new shift left count 1316 sll %i3, %l1, %i4 ! move to fill empty space 1317 b 3f 1318 or %i5, %i4, %i5 ! merge to complete word 1319 ! 1320 ! the source address is aligned and destination is not 1321 ! 1322.align_dst_only: 1323 ld [%i0], %i4 ! read a word 1324 add %i0, 4, %i0 ! increment source address 1325 mov 24, %l0 ! initial shift alignment count 13261: 1327 srl %i4, %l0, %i3 ! prepare to write a single byte 1328 stb %i3, [%i1] ! write a byte 1329 add %i1, 1, %i1 ! increment destination address 1330 sub %i2, 1, %i2 ! decrement count 1331 btst 3, %i1 ! is destination aligned? 1332 bnz,a 1b 1333 sub %l0, 8, %l0 ! delay slot, decrement shift count 1334.xfer: 1335 sub %l5, %l0, %l1 ! generate shift left count 1336 sll %i4, %l1, %i5 ! get leftover 13373: 1338 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0 1339 andn %i2, 3, %i2 ! # of aligned bytes that can be moved 13402: 1341 ld [%i0], %i3 ! read a source word 1342 add %i0, 4, %i0 ! increment source address 1343 srl %i3, %l0, %i4 ! upper src bits into lower dst bits 1344 or %i5, %i4, %i5 ! merge with upper dest bits (leftover) 1345 st %i5, [%i1] ! write a destination word 1346 subcc %i2, 4, %i2 ! decrement count 1347 bz %ncc, .unalign_out ! check if done 1348 add %i1, 4, %i1 ! increment destination address 1349 b 2b ! loop 1350 sll %i3, %l1, %i5 ! get leftover 1351.unalign_out: 1352 tst %l4 ! any bytes leftover? 1353 bz %ncc, .cpdone 1354 .empty ! allow next instruction in delay slot 13551: 1356 sub %l0, 8, %l0 ! decrement shift 1357 srl %i3, %l0, %i4 ! upper src byte into lower dst byte 1358 stb %i4, [%i1] ! write a byte 1359 subcc %l4, 1, %l4 ! decrement count 1360 bz %ncc, .cpdone ! done? 1361 add %i1, 1, %i1 ! increment destination 1362 tst %l0 ! any more previously read bytes 1363 bnz %ncc, 1b ! we have leftover bytes 1364 mov %l4, %i2 ! delay slot, mv cnt where dbytecp wants 1365 b .dbytecp ! let dbytecp do the rest 1366 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 1367 ! 1368 ! the destination address is aligned and the source is not 1369 ! 1370.align_src_only: 1371 ldub [%i0], %i3 ! read a byte from source address 1372 add %i0, 1, %i0 ! increment source address 1373 or %i4, %i3, %i4 ! or in with previous bytes (if any) 1374 btst 3, %i0 ! is source aligned? 1375 add %l0, 8, %l0 ! increment shift count (US) 1376 bnz,a .align_src_only 1377 sll %i4, 8, %i4 ! make room for next byte 1378 b,a .xfer 1379 ! 1380 ! if from address unaligned for double-word moves, 1381 ! move bytes till it is, if count is < 56 it could take 1382 ! longer to align the thing than to do the transfer 1383 ! in word size chunks right away 1384 ! 1385.aldoubcp: 1386 cmp %i2, 56 ! if count < 56, use wordcp, it takes 1387 blu,a %ncc, .alwordcp ! longer to align doubles than words 1388 mov 3, %o0 ! mask for word alignment 1389 call .alignit ! copy bytes until aligned 1390 mov 7, %o0 ! mask for double alignment 1391 ! 1392 ! source and destination are now double-word aligned 1393 ! i3 has aligned count returned by alignit 1394 ! 1395 and %i2, 7, %i2 ! unaligned leftover count 1396 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 13975: 1398 ldx [%i0+%i1], %o4 ! read from address 1399 stx %o4, [%i1] ! write at destination address 1400 subcc %i3, 8, %i3 ! dec count 1401 bgu %ncc, 5b 1402 add %i1, 8, %i1 ! delay slot, inc to address 1403 cmp %i2, 4 ! see if we can copy a word 1404 blu %ncc, .dbytecp ! if 3 or less bytes use bytecp 1405 .empty 1406 ! 1407 ! for leftover bytes we fall into wordcp, if needed 1408 ! 1409.wordcp: 1410 and %i2, 3, %i2 ! unaligned leftover count 14115: 1412 ld [%i0+%i1], %o4 ! read from address 1413 st %o4, [%i1] ! write at destination address 1414 subcc %i3, 4, %i3 ! dec count 1415 bgu %ncc, 5b 1416 add %i1, 4, %i1 ! delay slot, inc to address 1417 b,a .dbytecp 1418 1419 ! we come here to align copies on word boundaries 1420.alwordcp: 1421 call .alignit ! go word-align it 1422 mov 3, %o0 ! bits that must be zero to be aligned 1423 b .wordcp 1424 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 1425 1426 ! 1427 ! byte copy, works with any alignment 1428 ! 1429.bytecp: 1430 b .dbytecp 1431 sub %i0, %i1, %i0 ! i0 gets difference of src and dst 1432 1433 ! 1434 ! differenced byte copy, works with any alignment 1435 ! assumes dest in %i1 and (source - dest) in %i0 1436 ! 14371: 1438 stb %o4, [%i1] ! write to address 1439 inc %i1 ! inc to address 1440.dbytecp: 1441 deccc %i2 ! dec count 1442 bgeu,a %ncc, 1b ! loop till done 1443 ldub [%i0+%i1], %o4 ! read from address 1444.cpdone: 1445#if !defined(NIAGARA_IMPL) 1446 ! FPUSED_FLAG will not have been set in any path leading to 1447 ! this point. No need to deal with it. 1448 btst BCOPY_FLAG, %o5 1449 bz,pn %icc, 2f 1450 andcc %o5, BCOPY_FLAG, %o5 1451 ! Here via bcopy. Check to see if the handler was NULL. 1452 ! If so, just return quietly. Otherwise, reset the 1453 ! handler and go home. 1454 bnz,pn %ncc, 2f 1455 nop 1456 ! 1457 ! Null handler. 1458 ! 1459 ret 1460 restore %g0, 0, %o0 1461 ! Here via kcopy or bcopy with a handler. 1462 ! Reset the fault handler. 14632: 1464 membar #Sync 1465 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1466#else /* NIAGARA_IMPL */ 1467 membar #Sync ! sync error barrier 1468 ! Restore t_lofault handler, if came here from kcopy(). 1469 tst %o5 1470 bz %ncc, 1f 1471 andn %o5, LOFAULT_SET, %o5 1472 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 14731: 1474#endif /* NIAGARA_IMPL */ 1475 ret 1476 restore %g0, 0, %o0 ! return (0) 1477 1478/* 1479 * Common code used to align transfers on word and doubleword 1480 * boudaries. Aligns source and destination and returns a count 1481 * of aligned bytes to transfer in %i3 1482 */ 14831: 1484 inc %i0 ! inc from 1485 stb %o4, [%i1] ! write a byte 1486 inc %i1 ! inc to 1487 dec %i2 ! dec count 1488.alignit: 1489 btst %o0, %i0 ! %o0 is bit mask to check for alignment 1490 bnz,a 1b 1491 ldub [%i0], %o4 ! read next byte 1492 1493 retl 1494 andn %i2, %o0, %i3 ! return size of aligned bytes 1495 SET_SIZE(bcopy) 1496 1497#endif /* lint */ 1498 1499/* 1500 * Block copy with possibly overlapped operands. 1501 */ 1502 1503#if defined(lint) 1504 1505/*ARGSUSED*/ 1506void 1507ovbcopy(const void *from, void *to, size_t count) 1508{} 1509 1510#else /* lint */ 1511 1512 ENTRY(ovbcopy) 1513 tst %o2 ! check count 1514 bgu,a %ncc, 1f ! nothing to do or bad arguments 1515 subcc %o0, %o1, %o3 ! difference of from and to address 1516 1517 retl ! return 1518 nop 15191: 1520 bneg,a %ncc, 2f 1521 neg %o3 ! if < 0, make it positive 15222: cmp %o2, %o3 ! cmp size and abs(from - to) 1523 bleu %ncc, bcopy ! if size <= abs(diff): use bcopy, 1524 .empty ! no overlap 1525 cmp %o0, %o1 ! compare from and to addresses 1526 blu %ncc, .ov_bkwd ! if from < to, copy backwards 1527 nop 1528 ! 1529 ! Copy forwards. 1530 ! 1531.ov_fwd: 1532 ldub [%o0], %o3 ! read from address 1533 inc %o0 ! inc from address 1534 stb %o3, [%o1] ! write to address 1535 deccc %o2 ! dec count 1536 bgu %ncc, .ov_fwd ! loop till done 1537 inc %o1 ! inc to address 1538 1539 retl ! return 1540 nop 1541 ! 1542 ! Copy backwards. 1543 ! 1544.ov_bkwd: 1545 deccc %o2 ! dec count 1546 ldub [%o0 + %o2], %o3 ! get byte at end of src 1547 bgu %ncc, .ov_bkwd ! loop till done 1548 stb %o3, [%o1 + %o2] ! delay slot, store at end of dst 1549 1550 retl ! return 1551 nop 1552 SET_SIZE(ovbcopy) 1553 1554#endif /* lint */ 1555 1556/* 1557 * hwblkpagecopy() 1558 * 1559 * Copies exactly one page. This routine assumes the caller (ppcopy) 1560 * has already disabled kernel preemption and has checked 1561 * use_hw_bcopy. 1562 */ 1563#ifdef lint 1564/*ARGSUSED*/ 1565void 1566hwblkpagecopy(const void *src, void *dst) 1567{ } 1568#else /* lint */ 1569 ENTRY(hwblkpagecopy) 1570 save %sp, -SA(MINFRAME), %sp 1571 1572 ! %i0 - source address (arg) 1573 ! %i1 - destination address (arg) 1574 ! %i2 - length of region (not arg) 1575 1576 set PAGESIZE, %i2 1577 1578 /* 1579 * Copying exactly one page and PAGESIZE is in mutliple of 0x80. 1580 */ 1581 mov ASI_BLK_INIT_ST_QUAD_LDD_P, %asi 1582 prefetch [%i0+0x0], #one_read 1583 prefetch [%i0+0x40], #one_read 15841: 1585 prefetch [%i0+0x80], #one_read 1586 prefetch [%i0+0xc0], #one_read 1587 ldda [%i0+0x0]%asi, %l0 1588 ldda [%i0+0x10]%asi, %l2 1589 ldda [%i0+0x20]%asi, %l4 1590 ldda [%i0+0x30]%asi, %l6 1591 stxa %l0, [%i1+0x0]%asi 1592 stxa %l1, [%i1+0x8]%asi 1593 stxa %l2, [%i1+0x10]%asi 1594 stxa %l3, [%i1+0x18]%asi 1595 stxa %l4, [%i1+0x20]%asi 1596 stxa %l5, [%i1+0x28]%asi 1597 stxa %l6, [%i1+0x30]%asi 1598 stxa %l7, [%i1+0x38]%asi 1599 ldda [%i0+0x40]%asi, %l0 1600 ldda [%i0+0x50]%asi, %l2 1601 ldda [%i0+0x60]%asi, %l4 1602 ldda [%i0+0x70]%asi, %l6 1603 stxa %l0, [%i1+0x40]%asi 1604 stxa %l1, [%i1+0x48]%asi 1605 stxa %l2, [%i1+0x50]%asi 1606 stxa %l3, [%i1+0x58]%asi 1607 stxa %l4, [%i1+0x60]%asi 1608 stxa %l5, [%i1+0x68]%asi 1609 stxa %l6, [%i1+0x70]%asi 1610 stxa %l7, [%i1+0x78]%asi 1611 1612 add %i0, 0x80, %i0 1613 subcc %i2, 0x80, %i2 1614 bgu,pt %xcc, 1b 1615 add %i1, 0x80, %i1 1616 1617 membar #Sync 1618 ret 1619 restore %g0, 0, %o0 1620 SET_SIZE(hwblkpagecopy) 1621#endif /* lint */ 1622 1623 1624/* 1625 * Transfer data to and from user space - 1626 * Note that these routines can cause faults 1627 * It is assumed that the kernel has nothing at 1628 * less than KERNELBASE in the virtual address space. 1629 * 1630 * Note that copyin(9F) and copyout(9F) are part of the 1631 * DDI/DKI which specifies that they return '-1' on "errors." 1632 * 1633 * Sigh. 1634 * 1635 * So there's two extremely similar routines - xcopyin() and xcopyout() 1636 * which return the errno that we've faithfully computed. This 1637 * allows other callers (e.g. uiomove(9F)) to work correctly. 1638 * Given that these are used pretty heavily, we expand the calling 1639 * sequences inline for all flavours (rather than making wrappers). 1640 * 1641 * There are also stub routines for xcopyout_little and xcopyin_little, 1642 * which currently are intended to handle requests of <= 16 bytes from 1643 * do_unaligned. Future enhancement to make them handle 8k pages efficiently 1644 * is left as an exercise... 1645 */ 1646 1647/* 1648 * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr) 1649 * 1650 * General theory of operation: 1651 * 1652 * None of the copyops routines grab a window until it's decided that 1653 * we need to do a HW block copy operation. This saves a window 1654 * spill/fill when we're called during socket ops. The typical IO 1655 * path won't cause spill/fill traps. 1656 * 1657 * This code uses a set of 4 limits for the maximum size that will 1658 * be copied given a particular input/output address alignment. 1659 * the default limits are: 1660 * 1661 * single byte aligned - 256 (hw_copy_limit_1) 1662 * two byte aligned - 512 (hw_copy_limit_2) 1663 * four byte aligned - 1024 (hw_copy_limit_4) 1664 * eight byte aligned - 1024 (hw_copy_limit_8) 1665 * 1666 * If the value for a particular limit is zero, the copy will be done 1667 * via the copy loops rather than block store/quad load instructions. 1668 * 1669 * Flow: 1670 * 1671 * If count == zero return zero. 1672 * 1673 * Store the previous lo_fault handler into %g6. 1674 * Place our secondary lofault handler into %g5. 1675 * Place the address of our nowindow fault handler into %o3. 1676 * Place the address of the windowed fault handler into %o4. 1677 * --> We'll use this handler if we end up grabbing a window 1678 * --> before we use block initializing store and quad load ASIs 1679 * 1680 * If count is less than or equal to SMALL_LIMIT (7) we 1681 * always do a byte for byte copy. 1682 * 1683 * If count is > SMALL_LIMIT, we check the alignment of the input 1684 * and output pointers. Based on the alignment we check count 1685 * against a limit based on detected alignment. If we exceed the 1686 * alignment value we copy via block initializing store and quad 1687 * load instructions. 1688 * 1689 * If we don't exceed one of the limits, we store -count in %o3, 1690 * we store the number of chunks (8, 4, 2 or 1 byte) operated 1691 * on in our basic copy loop in %o2. Following this we branch 1692 * to the appropriate copy loop and copy that many chunks. 1693 * Since we've been adding the chunk size to %o3 each time through 1694 * as well as decrementing %o2, we can tell if any data is 1695 * is left to be copied by examining %o3. If that is zero, we're 1696 * done and can go home. If not, we figure out what the largest 1697 * chunk size left to be copied is and branch to that copy loop 1698 * unless there's only one byte left. We load that as we're 1699 * branching to code that stores it just before we return. 1700 * 1701 * Fault handlers are invoked if we reference memory that has no 1702 * current mapping. All forms share the same copyio_fault handler. 1703 * This routine handles fixing up the stack and general housecleaning. 1704 * Each copy operation has a simple fault handler that is then called 1705 * to do the work specific to the invidual operation. The handler 1706 * for copyOP and xcopyOP are found at the end of individual function. 1707 * The handlers for xcopyOP_little are found at the end of xcopyin_little. 1708 * The handlers for copyOP_noerr are found at the end of copyin_noerr. 1709 */ 1710 1711/* 1712 * Copy kernel data to user space (copyout/xcopyout/xcopyout_little). 1713 */ 1714 1715#if defined(lint) 1716 1717/*ARGSUSED*/ 1718int 1719copyout(const void *kaddr, void *uaddr, size_t count) 1720{ return (0); } 1721 1722#else /* lint */ 1723 1724/* 1725 * We save the arguments in the following registers in case of a fault: 1726 * kaddr - %g2 1727 * uaddr - %g3 1728 * count - %g4 1729 */ 1730#define SAVE_SRC %g2 1731#define SAVE_DST %g3 1732#define SAVE_COUNT %g4 1733 1734#define REAL_LOFAULT %g5 1735#define SAVED_LOFAULT %g6 1736 1737/* 1738 * Generic copyio fault handler. This is the first line of defense when a 1739 * fault occurs in (x)copyin/(x)copyout. In order for this to function 1740 * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT. 1741 * This allows us to share common code for all the flavors of the copy 1742 * operations, including the _noerr versions. 1743 * 1744 * Note that this function will restore the original input parameters before 1745 * calling REAL_LOFAULT. So the real handler can vector to the appropriate 1746 * member of the t_copyop structure, if needed. 1747 */ 1748 ENTRY(copyio_fault) 1749#if !defined(NIAGARA_IMPL) 1750 btst FPUSED_FLAG, SAVED_LOFAULT 1751 bz 1f 1752 andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 1753 1754 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 1755 wr %o2, 0, %gsr ! restore gsr 1756 1757 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 1758 btst FPRS_FEF, %o3 1759 bz %icc, 4f 1760 nop 1761 1762 ! restore fpregs from stack 1763 BLD_FP_FROMSTACK(%o2) 1764 1765 ba,pt %ncc, 1f 1766 wr %o3, 0, %fprs ! restore fprs 1767 17684: 1769 FZERO ! zero all of the fpregs 1770 wr %o3, 0, %fprs ! restore fprs 1771 17721: 1773#else /* NIAGARA_IMPL */ 1774 membar #Sync 1775 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1776#endif /* NIAGARA_IMPL */ 1777 1778 restore 1779 1780 mov SAVE_SRC, %o0 1781 mov SAVE_DST, %o1 1782 jmp REAL_LOFAULT 1783 mov SAVE_COUNT, %o2 1784 SET_SIZE(copyio_fault) 1785 1786 ENTRY(copyio_fault_nowindow) 1787 membar #Sync 1788 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1789 1790 mov SAVE_SRC, %o0 1791 mov SAVE_DST, %o1 1792 jmp REAL_LOFAULT 1793 mov SAVE_COUNT, %o2 1794 SET_SIZE(copyio_fault_nowindow) 1795 1796 ENTRY(copyout) 1797 sethi %hi(.copyout_err), REAL_LOFAULT 1798 or REAL_LOFAULT, %lo(.copyout_err), REAL_LOFAULT 1799 1800.do_copyout: 1801 ! 1802 ! Check the length and bail if zero. 1803 ! 1804 tst %o2 1805 bnz,pt %ncc, 1f 1806 nop 1807 retl 1808 clr %o0 18091: 1810 sethi %hi(copyio_fault), %o4 1811 or %o4, %lo(copyio_fault), %o4 1812 sethi %hi(copyio_fault_nowindow), %o3 1813 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT 1814 or %o3, %lo(copyio_fault_nowindow), %o3 1815 membar #Sync 1816 stn %o3, [THREAD_REG + T_LOFAULT] 1817 1818 mov %o0, SAVE_SRC 1819 mov %o1, SAVE_DST 1820 mov %o2, SAVE_COUNT 1821 1822 ! 1823 ! Check to see if we're more than SMALL_LIMIT (7 bytes). 1824 ! Run in leaf mode, using the %o regs as our input regs. 1825 ! 1826 subcc %o2, SMALL_LIMIT, %o3 1827 bgu,a,pt %ncc, .dco_ns 1828 or %o0, %o1, %o3 1829 ! 1830 ! What was previously ".small_copyout" 1831 ! Do full differenced copy. 1832 ! 1833.dcobcp: 1834 sub %g0, %o2, %o3 ! negate count 1835 add %o0, %o2, %o0 ! make %o0 point at the end 1836 add %o1, %o2, %o1 ! make %o1 point at the end 1837 ba,pt %ncc, .dcocl 1838 ldub [%o0 + %o3], %o4 ! load first byte 1839 ! 1840 ! %o0 and %o2 point at the end and remain pointing at the end 1841 ! of their buffers. We pull things out by adding %o3 (which is 1842 ! the negation of the length) to the buffer end which gives us 1843 ! the curent location in the buffers. By incrementing %o3 we walk 1844 ! through both buffers without having to bump each buffer's 1845 ! pointer. A very fast 4 instruction loop. 1846 ! 1847 .align 16 1848.dcocl: 1849 stba %o4, [%o1 + %o3]ASI_USER 1850 inccc %o3 1851 bl,a,pt %ncc, .dcocl 1852 ldub [%o0 + %o3], %o4 1853 ! 1854 ! We're done. Go home. 1855 ! 1856 membar #Sync 1857 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 1858 retl 1859 clr %o0 1860 ! 1861 ! Try aligned copies from here. 1862 ! 1863.dco_ns: 1864 ! %o0 = kernel addr (to be copied from) 1865 ! %o1 = user addr (to be copied to) 1866 ! %o2 = length 1867 ! %o3 = %o1 | %o2 (used for alignment checking) 1868 ! %o4 is alternate lo_fault 1869 ! %o5 is original lo_fault 1870 ! 1871 ! See if we're single byte aligned. If we are, check the 1872 ! limit for single byte copies. If we're smaller or equal, 1873 ! bounce to the byte for byte copy loop. Otherwise do it in 1874 ! HW (if enabled). 1875 ! 1876 btst 1, %o3 1877 bz,pt %icc, .dcoh8 1878 btst 7, %o3 1879 ! 1880 ! Single byte aligned. Do we do it via HW or via 1881 ! byte for byte? Do a quick no memory reference 1882 ! check to pick up small copies. 1883 ! 1884 sethi %hi(hw_copy_limit_1), %o3 1885 ! 1886 ! Big enough that we need to check the HW limit for 1887 ! this size copy. 1888 ! 1889 ld [%o3 + %lo(hw_copy_limit_1)], %o3 1890 ! 1891 ! Is HW copy on? If not, do everything byte for byte. 1892 ! 1893 tst %o3 1894 bz,pn %icc, .dcobcp 1895 subcc %o3, %o2, %o3 1896 ! 1897 ! If we're less than or equal to the single byte copy limit, 1898 ! bop to the copy loop. 1899 ! 1900 bge,pt %ncc, .dcobcp 1901 nop 1902 ! 1903 ! We're big enough and copy is on. Do it with HW. 1904 ! 1905 ba,pt %ncc, .big_copyout 1906 nop 1907.dcoh8: 1908 ! 1909 ! 8 byte aligned? 1910 ! 1911 bnz,a %ncc, .dcoh4 1912 btst 3, %o3 1913 ! 1914 ! See if we're in the "small range". 1915 ! If so, go off and do the copy. 1916 ! If not, load the hard limit. %o3 is 1917 ! available for reuse. 1918 ! 1919 sethi %hi(hw_copy_limit_8), %o3 1920 ld [%o3 + %lo(hw_copy_limit_8)], %o3 1921 ! 1922 ! If it's zero, there's no HW bcopy. 1923 ! Bop off to the aligned copy. 1924 ! 1925 tst %o3 1926 bz,pn %icc, .dcos8 1927 subcc %o3, %o2, %o3 1928 ! 1929 ! We're negative if our size is larger than hw_copy_limit_8. 1930 ! 1931 bge,pt %ncc, .dcos8 1932 nop 1933 ! 1934 ! HW assist is on and we're large enough. Do it. 1935 ! 1936 ba,pt %ncc, .big_copyout 1937 nop 1938.dcos8: 1939 ! 1940 ! Housekeeping for copy loops. Uses same idea as in the byte for 1941 ! byte copy loop above. 1942 ! 1943 add %o0, %o2, %o0 1944 add %o1, %o2, %o1 1945 sub %g0, %o2, %o3 1946 ba,pt %ncc, .dodebc 1947 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy 1948 ! 1949 ! 4 byte aligned? 1950 ! 1951.dcoh4: 1952 bnz,pn %ncc, .dcoh2 1953 ! 1954 ! See if we're in the "small range". 1955 ! If so, go off an do the copy. 1956 ! If not, load the hard limit. %o3 is 1957 ! available for reuse. 1958 ! 1959 sethi %hi(hw_copy_limit_4), %o3 1960 ld [%o3 + %lo(hw_copy_limit_4)], %o3 1961 ! 1962 ! If it's zero, there's no HW bcopy. 1963 ! Bop off to the aligned copy. 1964 ! 1965 tst %o3 1966 bz,pn %icc, .dcos4 1967 subcc %o3, %o2, %o3 1968 ! 1969 ! We're negative if our size is larger than hw_copy_limit_4. 1970 ! 1971 bge,pt %ncc, .dcos4 1972 nop 1973 ! 1974 ! HW assist is on and we're large enough. Do it. 1975 ! 1976 ba,pt %ncc, .big_copyout 1977 nop 1978.dcos4: 1979 add %o0, %o2, %o0 1980 add %o1, %o2, %o1 1981 sub %g0, %o2, %o3 1982 ba,pt %ncc, .dodfbc 1983 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy 1984 ! 1985 ! We must be 2 byte aligned. Off we go. 1986 ! The check for small copies was done in the 1987 ! delay at .dcoh4 1988 ! 1989.dcoh2: 1990 ble %ncc, .dcos2 1991 sethi %hi(hw_copy_limit_2), %o3 1992 ld [%o3 + %lo(hw_copy_limit_2)], %o3 1993 tst %o3 1994 bz,pn %icc, .dcos2 1995 subcc %o3, %o2, %o3 1996 bge,pt %ncc, .dcos2 1997 nop 1998 ! 1999 ! HW is on and we're big enough. Do it. 2000 ! 2001 ba,pt %ncc, .big_copyout 2002 nop 2003.dcos2: 2004 add %o0, %o2, %o0 2005 add %o1, %o2, %o1 2006 sub %g0, %o2, %o3 2007 ba,pt %ncc, .dodtbc 2008 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy 2009.small_copyout: 2010 ! 2011 ! Why are we doing this AGAIN? There are certain conditions in 2012 ! big_copyout that will cause us to forego the HW assisted copies 2013 ! and bounce back to a non-HW assisted copy. This dispatches those 2014 ! copies. Note that we branch around this in the main line code. 2015 ! 2016 ! We make no check for limits or HW enablement here. We've 2017 ! already been told that we're a poster child so just go off 2018 ! and do it. 2019 ! 2020 or %o0, %o1, %o3 2021 btst 1, %o3 2022 bnz %icc, .dcobcp ! Most likely 2023 btst 7, %o3 2024 bz %icc, .dcos8 2025 btst 3, %o3 2026 bz %icc, .dcos4 2027 nop 2028 ba,pt %ncc, .dcos2 2029 nop 2030 .align 32 2031.dodebc: 2032 ldx [%o0 + %o3], %o4 2033 deccc %o2 2034 stxa %o4, [%o1 + %o3]ASI_USER 2035 bg,pt %ncc, .dodebc 2036 addcc %o3, 8, %o3 2037 ! 2038 ! End of copy loop. Check to see if we're done. Most 2039 ! eight byte aligned copies end here. 2040 ! 2041 bz,pt %ncc, .dcofh 2042 nop 2043 ! 2044 ! Something is left - do it byte for byte. 2045 ! 2046 ba,pt %ncc, .dcocl 2047 ldub [%o0 + %o3], %o4 ! load next byte 2048 ! 2049 ! Four byte copy loop. %o2 is the number of 4 byte chunks to copy. 2050 ! 2051 .align 32 2052.dodfbc: 2053 lduw [%o0 + %o3], %o4 2054 deccc %o2 2055 sta %o4, [%o1 + %o3]ASI_USER 2056 bg,pt %ncc, .dodfbc 2057 addcc %o3, 4, %o3 2058 ! 2059 ! End of copy loop. Check to see if we're done. Most 2060 ! four byte aligned copies end here. 2061 ! 2062 bz,pt %ncc, .dcofh 2063 nop 2064 ! 2065 ! Something is left. Do it byte for byte. 2066 ! 2067 ba,pt %ncc, .dcocl 2068 ldub [%o0 + %o3], %o4 ! load next byte 2069 ! 2070 ! two byte aligned copy loop. %o2 is the number of 2 byte chunks to 2071 ! copy. 2072 ! 2073 .align 32 2074.dodtbc: 2075 lduh [%o0 + %o3], %o4 2076 deccc %o2 2077 stha %o4, [%o1 + %o3]ASI_USER 2078 bg,pt %ncc, .dodtbc 2079 addcc %o3, 2, %o3 2080 ! 2081 ! End of copy loop. Anything left? 2082 ! 2083 bz,pt %ncc, .dcofh 2084 nop 2085 ! 2086 ! Deal with the last byte 2087 ! 2088 ldub [%o0 + %o3], %o4 2089 stba %o4, [%o1 + %o3]ASI_USER 2090.dcofh: 2091 membar #Sync 2092 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2093 retl 2094 clr %o0 2095 2096.big_copyout: 2097 ! We're going to go off and do a block copy. 2098 ! Switch fault handlers and grab a window. We 2099 ! don't do a membar #Sync since we've done only 2100 ! kernel data to this point. 2101 stn %o4, [THREAD_REG + T_LOFAULT] 2102 2103 ! Copy out that reach here are larger than 256 bytes. The 2104 ! hw_copy_limit_1 is set to 256. Never set this limit less 2105 ! 128 bytes. 2106#if !defined(NIAGARA_IMPL) 2107 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 2108 2109 rd %fprs, %o2 ! check for unused fp 2110 st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save %fprs 2111 btst FPRS_FEF, %o2 2112 bz,a,pt %icc, .do_block_copyout 2113 wr %g0, FPRS_FEF, %fprs 2114 2115 ! save in-use fpregs on stack 2116 BST_FP_TOSTACK(%o2) 2117#else /* NIAGARA_IMPL */ 2118 save %sp, -SA(MINFRAME), %sp 2119#endif /* NIAGARA_IMPL */ 2120 2121.do_block_copyout: 2122 2123#if !defined(NIAGARA_IMPL) 2124 rd %gsr, %o2 2125 stx %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr 2126 ! set the lower bit saved t_lofault to indicate that we need 2127 ! clear %fprs register on the way out 2128 or SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 2129#endif /* NIAGARA_IMPL */ 2130 2131 ! Swap src/dst since the code below is memcpy code 2132 ! and memcpy/bcopy have different calling sequences 2133 mov %i1, %i5 2134 mov %i0, %i1 2135 mov %i5, %i0 2136 2137 ! Block (64 bytes) align the destination. 2138 andcc %i0, 0x3f, %i3 ! is dst block aligned 2139 bz %ncc, copyout_blalign ! dst already block aligned 2140 sub %i3, 0x40, %i3 2141 neg %i3 ! bytes till dst 64 bytes aligned 2142 sub %i2, %i3, %i2 ! update i2 with new count 2143 2144 ! Based on source and destination alignment do 2145 ! either 8 bytes, 4 bytes, 2 bytes or byte copy. 2146 2147 ! Is dst & src 8B aligned 2148 or %i0, %i1, %o2 2149 andcc %o2, 0x7, %g0 2150 bz %ncc, .co_alewdcp 2151 nop 2152 2153 ! Is dst & src 4B aligned 2154 andcc %o2, 0x3, %g0 2155 bz %ncc, .co_alwdcp 2156 nop 2157 2158 ! Is dst & src 2B aligned 2159 andcc %o2, 0x1, %g0 2160 bz %ncc, .co_alhlfwdcp 2161 nop 2162 2163 ! 1B aligned 21641: ldub [%i1], %o2 2165 stba %o2, [%i0]ASI_USER 2166 inc %i1 2167 deccc %i3 2168 bgu,pt %ncc, 1b 2169 inc %i0 2170 2171 ba copyout_blalign 2172 nop 2173 2174 ! dst & src 4B aligned 2175.co_alwdcp: 2176 ld [%i1], %o2 2177 sta %o2, [%i0]ASI_USER 2178 add %i1, 0x4, %i1 2179 subcc %i3, 0x4, %i3 2180 bgu,pt %ncc, .co_alwdcp 2181 add %i0, 0x4, %i0 2182 2183 ba copyout_blalign 2184 nop 2185 2186 ! dst & src 2B aligned 2187.co_alhlfwdcp: 2188 lduh [%i1], %o2 2189 stuha %o2, [%i0]ASI_USER 2190 add %i1, 0x2, %i1 2191 subcc %i3, 0x2, %i3 2192 bgu,pt %ncc, .co_alhlfwdcp 2193 add %i0, 0x2, %i0 2194 2195 ba copyout_blalign 2196 nop 2197 2198 ! dst & src 8B aligned 2199.co_alewdcp: 2200 ldx [%i1], %o2 2201 stxa %o2, [%i0]ASI_USER 2202 add %i1, 0x8, %i1 2203 subcc %i3, 0x8, %i3 2204 bgu,pt %ncc, .co_alewdcp 2205 add %i0, 0x8, %i0 2206 2207 ! Now Destination is block (64 bytes) aligned 2208copyout_blalign: 2209 andn %i2, 0x3f, %i3 ! %i3 count is multiple of block size 2210 sub %i2, %i3, %i2 ! Residue bytes in %i2 2211 2212 mov ASI_BLK_INIT_QUAD_LDD_AIUS, %asi 2213 2214#if !defined(NIAGARA_IMPL) 2215 andn %i1, 0x3f, %l0 ! %l0 has block aligned src address 2216 prefetch [%l0+0x0], #one_read 2217 andcc %i1, 0x3f, %g0 ! is src 64B aligned 2218 bz,pn %ncc, .co_blkcpy 2219 nop 2220 2221 ! handle misaligned source cases 2222 alignaddr %i1, %g0, %g0 ! generate %gsr 2223 2224 srl %i1, 0x3, %l1 ! src add bits 3, 4, 5 are now least 2225 ! significant in %l1 2226 andcc %l1, 0x7, %l2 ! mask everything except bits 1, 2, 3 2227 add %i1, %i3, %i1 2228 2229 ! switch statement to get to right 8 byte block within 2230 ! 64 byte block 2231 cmp %l2, 0x4 2232 bgeu,a co_hlf 2233 cmp %l2, 0x6 2234 cmp %l2, 0x2 2235 bgeu,a co_sqtr 2236 nop 2237 cmp %l2, 0x1 2238 be,a co_off15 2239 nop 2240 ba co_off7 2241 nop 2242co_sqtr: 2243 be,a co_off23 2244 nop 2245 ba,a co_off31 2246 nop 2247 2248co_hlf: 2249 bgeu,a co_fqtr 2250 nop 2251 cmp %l2, 0x5 2252 be,a co_off47 2253 nop 2254 ba co_off39 2255 nop 2256co_fqtr: 2257 be,a co_off55 2258 nop 2259 2260 ldd [%l0+0x38], %d14 2261 prefetch [%l0+0x40], #one_read 2262 prefetch [%l0+0x80], #one_read 22637: 2264 add %l0, 0x40, %l0 2265 stxa %g0, [%i0]%asi ! initialize the cache line 2266 2267 ldda [%l0]ASI_BLK_P, %d16 2268 ALIGN_OFF_56_63 2269 fmovd %d30, %d14 2270 2271 stda %d48, [%i0]ASI_BLK_AIUS 2272 subcc %i3, 0x40, %i3 2273 add %i0, 0x40, %i0 2274 bgu,pt %ncc, 7b 2275 prefetch [%l0+0x80], #one_read 2276 ba .co_blkdone 2277 membar #Sync 2278 2279co_off7: 2280 ldda [%l0]ASI_BLK_P, %d0 2281 prefetch [%l0+0x40], #one_read 2282 prefetch [%l0+0x80], #one_read 22830: 2284 add %l0, 0x40, %l0 2285 stxa %g0, [%i0]%asi ! initialize the cache line 2286 2287 ldda [%l0]ASI_BLK_P, %d16 2288 ALIGN_OFF_1_7 2289 fmovd %d16, %d0 2290 fmovd %d18, %d2 2291 fmovd %d20, %d4 2292 fmovd %d22, %d6 2293 fmovd %d24, %d8 2294 fmovd %d26, %d10 2295 fmovd %d28, %d12 2296 fmovd %d30, %d14 2297 2298 stda %d48, [%i0]ASI_BLK_AIUS 2299 subcc %i3, 0x40, %i3 2300 add %i0, 0x40, %i0 2301 bgu,pt %ncc, 0b 2302 prefetch [%l0+0x80], #one_read 2303 ba .co_blkdone 2304 membar #Sync 2305 2306co_off15: 2307 ldd [%l0+0x8], %d2 2308 ldd [%l0+0x10], %d4 2309 ldd [%l0+0x18], %d6 2310 ldd [%l0+0x20], %d8 2311 ldd [%l0+0x28], %d10 2312 ldd [%l0+0x30], %d12 2313 ldd [%l0+0x38], %d14 2314 prefetch [%l0+0x40], #one_read 2315 prefetch [%l0+0x80], #one_read 23161: 2317 add %l0, 0x40, %l0 2318 stxa %g0, [%i0]%asi ! initialize the cache line 2319 2320 ldda [%l0]ASI_BLK_P, %d16 2321 ALIGN_OFF_8_15 2322 fmovd %d18, %d2 2323 fmovd %d20, %d4 2324 fmovd %d22, %d6 2325 fmovd %d24, %d8 2326 fmovd %d26, %d10 2327 fmovd %d28, %d12 2328 fmovd %d30, %d14 2329 2330 stda %d48, [%i0]ASI_BLK_AIUS 2331 subcc %i3, 0x40, %i3 2332 add %i0, 0x40, %i0 2333 bgu,pt %ncc, 1b 2334 prefetch [%l0+0x80], #one_read 2335 ba .co_blkdone 2336 membar #Sync 2337 2338co_off23: 2339 ldd [%l0+0x10], %d4 2340 ldd [%l0+0x18], %d6 2341 ldd [%l0+0x20], %d8 2342 ldd [%l0+0x28], %d10 2343 ldd [%l0+0x30], %d12 2344 ldd [%l0+0x38], %d14 2345 prefetch [%l0+0x40], #one_read 2346 prefetch [%l0+0x80], #one_read 23472: 2348 add %l0, 0x40, %l0 2349 stxa %g0, [%i0]%asi ! initialize the cache line 2350 2351 ldda [%l0]ASI_BLK_P, %d16 2352 ALIGN_OFF_16_23 2353 fmovd %d20, %d4 2354 fmovd %d22, %d6 2355 fmovd %d24, %d8 2356 fmovd %d26, %d10 2357 fmovd %d28, %d12 2358 fmovd %d30, %d14 2359 2360 stda %d48, [%i0]ASI_BLK_AIUS 2361 subcc %i3, 0x40, %i3 2362 add %i0, 0x40, %i0 2363 bgu,pt %ncc, 2b 2364 prefetch [%l0+0x80], #one_read 2365 ba .co_blkdone 2366 membar #Sync 2367 2368co_off31: 2369 ldd [%l0+0x18], %d6 2370 ldd [%l0+0x20], %d8 2371 ldd [%l0+0x28], %d10 2372 ldd [%l0+0x30], %d12 2373 ldd [%l0+0x38], %d14 2374 prefetch [%l0+0x40], #one_read 2375 prefetch [%l0+0x80], #one_read 23763: 2377 add %l0, 0x40, %l0 2378 stxa %g0, [%i0]%asi ! initialize the cache line 2379 2380 ldda [%l0]ASI_BLK_P, %d16 2381 ALIGN_OFF_24_31 2382 fmovd %d22, %d6 2383 fmovd %d24, %d8 2384 fmovd %d26, %d10 2385 fmovd %d28, %d12 2386 fmovd %d30, %d14 2387 2388 stda %d48, [%i0]ASI_BLK_AIUS 2389 subcc %i3, 0x40, %i3 2390 add %i0, 0x40, %i0 2391 bgu,pt %ncc, 3b 2392 prefetch [%l0+0x80], #one_read 2393 ba .co_blkdone 2394 membar #Sync 2395 2396co_off39: 2397 ldd [%l0+0x20], %d8 2398 ldd [%l0+0x28], %d10 2399 ldd [%l0+0x30], %d12 2400 ldd [%l0+0x38], %d14 2401 prefetch [%l0+0x40], #one_read 2402 prefetch [%l0+0x80], #one_read 24034: 2404 add %l0, 0x40, %l0 2405 stxa %g0, [%i0]%asi ! initialize the cache line 2406 2407 ldda [%l0]ASI_BLK_P, %d16 2408 ALIGN_OFF_32_39 2409 fmovd %d24, %d8 2410 fmovd %d26, %d10 2411 fmovd %d28, %d12 2412 fmovd %d30, %d14 2413 2414 stda %d48, [%i0]ASI_BLK_AIUS 2415 subcc %i3, 0x40, %i3 2416 add %i0, 0x40, %i0 2417 bgu,pt %ncc, 4b 2418 prefetch [%l0+0x80], #one_read 2419 ba .co_blkdone 2420 membar #Sync 2421 2422co_off47: 2423 ldd [%l0+0x28], %d10 2424 ldd [%l0+0x30], %d12 2425 ldd [%l0+0x38], %d14 2426 prefetch [%l0+0x40], #one_read 2427 prefetch [%l0+0x80], #one_read 24285: 2429 add %l0, 0x40, %l0 2430 stxa %g0, [%i0]%asi ! initialize the cache line 2431 2432 ldda [%l0]ASI_BLK_P, %d16 2433 ALIGN_OFF_40_47 2434 fmovd %d26, %d10 2435 fmovd %d28, %d12 2436 fmovd %d30, %d14 2437 2438 stda %d48, [%i0]ASI_BLK_AIUS 2439 subcc %i3, 0x40, %i3 2440 add %i0, 0x40, %i0 2441 bgu,pt %ncc, 5b 2442 prefetch [%l0+0x80], #one_read 2443 ba .co_blkdone 2444 membar #Sync 2445 2446co_off55: 2447 ldd [%l0+0x30], %d12 2448 ldd [%l0+0x38], %d14 2449 prefetch [%l0+0x40], #one_read 2450 prefetch [%l0+0x80], #one_read 24516: 2452 add %l0, 0x40, %l0 2453 stxa %g0, [%i0]%asi ! initialize the cache line 2454 2455 ldda [%l0]ASI_BLK_P, %d16 2456 ALIGN_OFF_48_55 2457 fmovd %d28, %d12 2458 fmovd %d30, %d14 2459 2460 stda %d48, [%i0]ASI_BLK_AIUS 2461 subcc %i3, 0x40, %i3 2462 add %i0, 0x40, %i0 2463 bgu,pt %ncc, 6b 2464 prefetch [%l0+0x80], #one_read 2465 ba .co_blkdone 2466 membar #Sync 2467 2468.co_blkcpy: 2469 prefetch [%i1+0x40], #one_read 2470 prefetch [%i1+0x80], #one_read 24718: 2472 stxa %g0, [%i0]%asi ! initialize the cache line 2473 ldda [%i1]ASI_BLK_P, %d0 2474 stda %d0, [%i0]ASI_BLK_AIUS 2475 2476 add %i1, 0x40, %i1 2477 subcc %i3, 0x40, %i3 2478 add %i0, 0x40, %i0 2479 bgu,pt %ncc, 8b 2480 prefetch [%i1+0x80], #one_read 2481 membar #Sync 2482 2483.co_blkdone: 2484#else /* NIAGARA_IMPL */ 2485 andcc %i1, 0xf, %o2 ! is src quadword aligned 2486 bz,pn %xcc, .co_blkcpy ! src offset in %o2 (last 4-bits) 2487 nop 2488 cmp %o2, 0x8 2489 bg .co_upper_double 2490 nop 2491 bl .co_lower_double 2492 nop 2493 2494 ! Falls through when source offset is equal to 8 i.e. 2495 ! source is double word aligned. 2496 ! In this case no shift/merge of data is required 2497 2498 sub %i1, %o2, %i1 ! align the src at 16 bytes. 2499 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 2500 prefetch [%l0+0x0], #one_read 2501 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 2502.co_loop0: 2503 add %i1, 0x10, %i1 2504 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 2505 prefetch [%l0+0x40], #one_read 2506 2507 stxa %l3, [%i0+0x0]%asi 2508 stxa %l4, [%i0+0x8]%asi 2509 2510 add %i1, 0x10, %i1 2511 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 2512 2513 stxa %l5, [%i0+0x10]%asi 2514 stxa %l2, [%i0+0x18]%asi 2515 2516 add %i1, 0x10, %i1 2517 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 2518 2519 stxa %l3, [%i0+0x20]%asi 2520 stxa %l4, [%i0+0x28]%asi 2521 2522 add %i1, 0x10, %i1 2523 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 2524 2525 stxa %l5, [%i0+0x30]%asi 2526 stxa %l2, [%i0+0x38]%asi 2527 2528 add %l0, 0x40, %l0 2529 subcc %i3, 0x40, %i3 2530 bgu,pt %xcc, .co_loop0 2531 add %i0, 0x40, %i0 2532 ba .co_blkdone 2533 add %i1, %o2, %i1 ! increment the source by src offset 2534 ! the src offset was stored in %o2 2535 2536.co_lower_double: 2537 2538 sub %i1, %o2, %i1 ! align the src at 16 bytes. 2539 sll %o2, 3, %o0 ! %o0 left shift 2540 mov 0x40, %o1 2541 sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 2542 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 2543 prefetch [%l0+0x0], #one_read 2544 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 ! partial data in %l2 and %l3 has 2545 ! complete data 2546.co_loop1: 2547 add %i1, 0x10, %i1 2548 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 ! %l4 has partial data 2549 ! for this read. 2550 ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) ! merge %l2, %l3 and %l4 2551 ! into %l2 and %l3 2552 prefetch [%l0+0x40], #one_read 2553 2554 stxa %l2, [%i0+0x0]%asi 2555 stxa %l3, [%i0+0x8]%asi 2556 2557 add %i1, 0x10, %i1 2558 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 2559 ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) ! merge %l2 with %l5 and 2560 ! %l4 from previous read 2561 ! into %l4 and %l5 2562 stxa %l4, [%i0+0x10]%asi 2563 stxa %l5, [%i0+0x18]%asi 2564 2565 ! Repeat the same for next 32 bytes. 2566 2567 add %i1, 0x10, %i1 2568 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 2569 ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) 2570 2571 stxa %l2, [%i0+0x20]%asi 2572 stxa %l3, [%i0+0x28]%asi 2573 2574 add %i1, 0x10, %i1 2575 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 2576 ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) 2577 2578 stxa %l4, [%i0+0x30]%asi 2579 stxa %l5, [%i0+0x38]%asi 2580 2581 add %l0, 0x40, %l0 2582 subcc %i3, 0x40, %i3 2583 bgu,pt %xcc, .co_loop1 2584 add %i0, 0x40, %i0 2585 ba .co_blkdone 2586 add %i1, %o2, %i1 ! increment the source by src offset 2587 ! the src offset was stored in %o2 2588 2589.co_upper_double: 2590 2591 sub %i1, %o2, %i1 ! align the src at 16 bytes. 2592 sub %o2, 0x8, %o0 2593 sll %o0, 3, %o0 ! %o0 left shift 2594 mov 0x40, %o1 2595 sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 2596 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 2597 prefetch [%l0+0x0], #one_read 2598 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 ! partial data in %l3 2599 ! for this read and 2600 ! no data in %l2 2601.co_loop2: 2602 add %i1, 0x10, %i1 2603 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 ! %l4 has complete data 2604 ! and %l5 has partial 2605 ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) ! merge %l3, %l4 and %l5 2606 ! into %l3 and %l4 2607 prefetch [%l0+0x40], #one_read 2608 2609 stxa %l3, [%i0+0x0]%asi 2610 stxa %l4, [%i0+0x8]%asi 2611 2612 add %i1, 0x10, %i1 2613 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 2614 ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) ! merge %l2 and %l3 with 2615 ! %l5 from previous read 2616 ! into %l5 and %l2 2617 2618 stxa %l5, [%i0+0x10]%asi 2619 stxa %l2, [%i0+0x18]%asi 2620 2621 ! Repeat the same for next 32 bytes. 2622 2623 add %i1, 0x10, %i1 2624 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 2625 ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) 2626 2627 stxa %l3, [%i0+0x20]%asi 2628 stxa %l4, [%i0+0x28]%asi 2629 2630 add %i1, 0x10, %i1 2631 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 2632 ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) 2633 2634 stxa %l5, [%i0+0x30]%asi 2635 stxa %l2, [%i0+0x38]%asi 2636 2637 add %l0, 0x40, %l0 2638 subcc %i3, 0x40, %i3 2639 bgu,pt %xcc, .co_loop2 2640 add %i0, 0x40, %i0 2641 ba .co_blkdone 2642 add %i1, %o2, %i1 ! increment the source by src offset 2643 ! the src offset was stored in %o2 2644 2645 2646 ! Do fast copy using ASI_BLK_INIT_ST_QUAD_LDD_P 2647.co_blkcpy: 2648 2649 andn %i1, 0x3f, %o0 ! %o0 has block aligned source 2650 prefetch [%o0+0x0], #one_read 26511: 2652 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l0 2653 add %i1, 0x10, %i1 2654 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 2655 add %i1, 0x10, %i1 2656 2657 prefetch [%o0+0x40], #one_read 2658 2659 stxa %l0, [%i0+0x0]%asi 2660 2661 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 2662 add %i1, 0x10, %i1 2663 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l6 2664 add %i1, 0x10, %i1 2665 2666 stxa %l1, [%i0+0x8]%asi 2667 stxa %l2, [%i0+0x10]%asi 2668 stxa %l3, [%i0+0x18]%asi 2669 stxa %l4, [%i0+0x20]%asi 2670 stxa %l5, [%i0+0x28]%asi 2671 stxa %l6, [%i0+0x30]%asi 2672 stxa %l7, [%i0+0x38]%asi 2673 2674 add %o0, 0x40, %o0 2675 subcc %i3, 0x40, %i3 2676 bgu,pt %xcc, 1b 2677 add %i0, 0x40, %i0 2678 2679.co_blkdone: 2680 membar #Sync 2681#endif /* NIAGARA_IMPL */ 2682 2683 brz,pt %i2, .copyout_exit 2684 nop 2685 2686 ! Handle trailing bytes 2687 cmp %i2, 0x8 2688 blu,pt %ncc, .co_residue 2689 nop 2690 2691 ! Can we do some 8B ops 2692 or %i1, %i0, %o2 2693 andcc %o2, 0x7, %g0 2694 bnz %ncc, .co_last4 2695 nop 2696 2697 ! Do 8byte ops as long as possible 2698.co_last8: 2699 ldx [%i1], %o2 2700 stxa %o2, [%i0]ASI_USER 2701 add %i1, 0x8, %i1 2702 sub %i2, 0x8, %i2 2703 cmp %i2, 0x8 2704 bgu,pt %ncc, .co_last8 2705 add %i0, 0x8, %i0 2706 2707 brz,pt %i2, .copyout_exit 2708 nop 2709 2710 ba .co_residue 2711 nop 2712 2713.co_last4: 2714 ! Can we do 4B ops 2715 andcc %o2, 0x3, %g0 2716 bnz %ncc, .co_last2 2717 nop 27181: 2719 ld [%i1], %o2 2720 sta %o2, [%i0]ASI_USER 2721 add %i1, 0x4, %i1 2722 sub %i2, 0x4, %i2 2723 cmp %i2, 0x4 2724 bgu,pt %ncc, 1b 2725 add %i0, 0x4, %i0 2726 2727 brz,pt %i2, .copyout_exit 2728 nop 2729 2730 ba .co_residue 2731 nop 2732 2733.co_last2: 2734 ! Can we do 2B ops 2735 andcc %o2, 0x1, %g0 2736 bnz %ncc, .co_residue 2737 nop 2738 27391: 2740 lduh [%i1], %o2 2741 stuha %o2, [%i0]ASI_USER 2742 add %i1, 0x2, %i1 2743 sub %i2, 0x2, %i2 2744 cmp %i2, 0x2 2745 bgu,pt %ncc, 1b 2746 add %i0, 0x2, %i0 2747 2748 brz,pt %i2, .copyout_exit 2749 nop 2750 2751 ! Copy the residue as byte copy 2752.co_residue: 2753 ldub [%i1], %i4 2754 stba %i4, [%i0]ASI_USER 2755 inc %i1 2756 deccc %i2 2757 bgu,pt %xcc, .co_residue 2758 inc %i0 2759 2760.copyout_exit: 2761#if !defined(NIAGARA_IMPL) 2762 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 2763 wr %o2, 0, %gsr ! restore gsr 2764 2765 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 2766 btst FPRS_FEF, %o3 2767 bz %icc, 4f 2768 nop 2769 2770 ! restore fpregs from stack 2771 BLD_FP_FROMSTACK(%o2) 2772 2773 ba,pt %ncc, 2f 2774 wr %o3, 0, %fprs ! restore fprs 2775 27764: 2777 FZERO ! zero all of the fpregs 2778 wr %o3, 0, %fprs ! restore fprs 2779 27802: 2781 membar #Sync 2782 andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 2783#else /* NIAGARA_IMPL */ 2784 membar #Sync 2785#endif /* NIAGARA_IMPL */ 2786 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2787 ret 2788 restore %g0, 0, %o0 2789 2790.copyout_err: 2791 ldn [THREAD_REG + T_COPYOPS], %o4 2792 brz %o4, 2f 2793 nop 2794 ldn [%o4 + CP_COPYOUT], %g2 2795 jmp %g2 2796 nop 27972: 2798 retl 2799 mov -1, %o0 2800 SET_SIZE(copyout) 2801 2802#endif /* lint */ 2803 2804 2805#ifdef lint 2806 2807/*ARGSUSED*/ 2808int 2809xcopyout(const void *kaddr, void *uaddr, size_t count) 2810{ return (0); } 2811 2812#else /* lint */ 2813 2814 ENTRY(xcopyout) 2815 sethi %hi(.xcopyout_err), REAL_LOFAULT 2816 b .do_copyout 2817 or REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT 2818.xcopyout_err: 2819 ldn [THREAD_REG + T_COPYOPS], %o4 2820 brz %o4, 2f 2821 nop 2822 ldn [%o4 + CP_XCOPYOUT], %g2 2823 jmp %g2 2824 nop 28252: 2826 retl 2827 mov %g1, %o0 2828 SET_SIZE(xcopyout) 2829 2830#endif /* lint */ 2831 2832#ifdef lint 2833 2834/*ARGSUSED*/ 2835int 2836xcopyout_little(const void *kaddr, void *uaddr, size_t count) 2837{ return (0); } 2838 2839#else /* lint */ 2840 2841 ENTRY(xcopyout_little) 2842 sethi %hi(.little_err), %o4 2843 ldn [THREAD_REG + T_LOFAULT], %o5 2844 or %o4, %lo(.little_err), %o4 2845 membar #Sync ! sync error barrier 2846 stn %o4, [THREAD_REG + T_LOFAULT] 2847 2848 subcc %g0, %o2, %o3 2849 add %o0, %o2, %o0 2850 bz,pn %ncc, 2f ! check for zero bytes 2851 sub %o2, 1, %o4 2852 add %o0, %o4, %o0 ! start w/last byte 2853 add %o1, %o2, %o1 2854 ldub [%o0+%o3], %o4 2855 28561: stba %o4, [%o1+%o3]ASI_AIUSL 2857 inccc %o3 2858 sub %o0, 2, %o0 ! get next byte 2859 bcc,a,pt %ncc, 1b 2860 ldub [%o0+%o3], %o4 2861 28622: membar #Sync ! sync error barrier 2863 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2864 retl 2865 mov %g0, %o0 ! return (0) 2866 SET_SIZE(xcopyout_little) 2867 2868#endif /* lint */ 2869 2870/* 2871 * Copy user data to kernel space (copyin/xcopyin/xcopyin_little) 2872 */ 2873 2874#if defined(lint) 2875 2876/*ARGSUSED*/ 2877int 2878copyin(const void *uaddr, void *kaddr, size_t count) 2879{ return (0); } 2880 2881#else /* lint */ 2882 2883 ENTRY(copyin) 2884 sethi %hi(.copyin_err), REAL_LOFAULT 2885 or REAL_LOFAULT, %lo(.copyin_err), REAL_LOFAULT 2886 2887.do_copyin: 2888 ! 2889 ! Check the length and bail if zero. 2890 ! 2891 tst %o2 2892 bnz,pt %ncc, 1f 2893 nop 2894 retl 2895 clr %o0 28961: 2897 sethi %hi(copyio_fault), %o4 2898 or %o4, %lo(copyio_fault), %o4 2899 sethi %hi(copyio_fault_nowindow), %o3 2900 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT 2901 or %o3, %lo(copyio_fault_nowindow), %o3 2902 membar #Sync 2903 stn %o3, [THREAD_REG + T_LOFAULT] 2904 2905 mov %o0, SAVE_SRC 2906 mov %o1, SAVE_DST 2907 mov %o2, SAVE_COUNT 2908 2909 ! 2910 ! Check to see if we're more than SMALL_LIMIT. 2911 ! 2912 subcc %o2, SMALL_LIMIT, %o3 2913 bgu,a,pt %ncc, .dci_ns 2914 or %o0, %o1, %o3 2915 ! 2916 ! What was previously ".small_copyin" 2917 ! 2918.dcibcp: 2919 sub %g0, %o2, %o3 ! setup for copy loop 2920 add %o0, %o2, %o0 2921 add %o1, %o2, %o1 2922 ba,pt %ncc, .dcicl 2923 lduba [%o0 + %o3]ASI_USER, %o4 2924 ! 2925 ! %o0 and %o1 point at the end and remain pointing at the end 2926 ! of their buffers. We pull things out by adding %o3 (which is 2927 ! the negation of the length) to the buffer end which gives us 2928 ! the curent location in the buffers. By incrementing %o3 we walk 2929 ! through both buffers without having to bump each buffer's 2930 ! pointer. A very fast 4 instruction loop. 2931 ! 2932 .align 16 2933.dcicl: 2934 stb %o4, [%o1 + %o3] 2935 inccc %o3 2936 bl,a,pt %ncc, .dcicl 2937 lduba [%o0 + %o3]ASI_USER, %o4 2938 ! 2939 ! We're done. Go home. 2940 ! 2941 membar #Sync 2942 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 2943 retl 2944 clr %o0 2945 ! 2946 ! Try aligned copies from here. 2947 ! 2948.dci_ns: 2949 ! 2950 ! See if we're single byte aligned. If we are, check the 2951 ! limit for single byte copies. If we're smaller, or equal, 2952 ! bounce to the byte for byte copy loop. Otherwise do it in 2953 ! HW (if enabled). 2954 ! 2955 btst 1, %o3 2956 bz,a,pt %icc, .dcih8 2957 btst 7, %o3 2958 ! 2959 ! We're single byte aligned. 2960 ! 2961 sethi %hi(hw_copy_limit_1), %o3 2962 ld [%o3 + %lo(hw_copy_limit_1)], %o3 2963 ! 2964 ! Is HW copy on? If not do everything byte for byte. 2965 ! 2966 tst %o3 2967 bz,pn %icc, .dcibcp 2968 subcc %o3, %o2, %o3 2969 ! 2970 ! Are we bigger than the HW limit? If not 2971 ! go to byte for byte. 2972 ! 2973 bge,pt %ncc, .dcibcp 2974 nop 2975 ! 2976 ! We're big enough and copy is on. Do it with HW. 2977 ! 2978 ba,pt %ncc, .big_copyin 2979 nop 2980.dcih8: 2981 ! 2982 ! 8 byte aligned? 2983 ! 2984 bnz,a %ncc, .dcih4 2985 btst 3, %o3 2986 ! 2987 ! We're eight byte aligned. 2988 ! 2989 sethi %hi(hw_copy_limit_8), %o3 2990 ld [%o3 + %lo(hw_copy_limit_8)], %o3 2991 ! 2992 ! Is HW assist on? If not, do it with the aligned copy. 2993 ! 2994 tst %o3 2995 bz,pn %icc, .dcis8 2996 subcc %o3, %o2, %o3 2997 bge %ncc, .dcis8 2998 nop 2999 ba,pt %ncc, .big_copyin 3000 nop 3001.dcis8: 3002 ! 3003 ! Housekeeping for copy loops. Uses same idea as in the byte for 3004 ! byte copy loop above. 3005 ! 3006 add %o0, %o2, %o0 3007 add %o1, %o2, %o1 3008 sub %g0, %o2, %o3 3009 ba,pt %ncc, .didebc 3010 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy 3011 ! 3012 ! 4 byte aligned? 3013 ! 3014.dcih4: 3015 bnz %ncc, .dcih2 3016 sethi %hi(hw_copy_limit_4), %o3 3017 ld [%o3 + %lo(hw_copy_limit_4)], %o3 3018 ! 3019 ! Is HW assist on? If not, do it with the aligned copy. 3020 ! 3021 tst %o3 3022 bz,pn %icc, .dcis4 3023 subcc %o3, %o2, %o3 3024 ! 3025 ! We're negative if our size is less than or equal to hw_copy_limit_4. 3026 ! 3027 bge %ncc, .dcis4 3028 nop 3029 ba,pt %ncc, .big_copyin 3030 nop 3031.dcis4: 3032 ! 3033 ! Housekeeping for copy loops. Uses same idea as in the byte 3034 ! for byte copy loop above. 3035 ! 3036 add %o0, %o2, %o0 3037 add %o1, %o2, %o1 3038 sub %g0, %o2, %o3 3039 ba,pt %ncc, .didfbc 3040 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy 3041.dcih2: 3042 ! 3043 ! We're two byte aligned. Check for "smallness" 3044 ! done in delay at .dcih4 3045 ! 3046 bleu,pt %ncc, .dcis2 3047 sethi %hi(hw_copy_limit_2), %o3 3048 ld [%o3 + %lo(hw_copy_limit_2)], %o3 3049 ! 3050 ! Is HW assist on? If not, do it with the aligned copy. 3051 ! 3052 tst %o3 3053 bz,pn %icc, .dcis2 3054 subcc %o3, %o2, %o3 3055 ! 3056 ! Are we larger than the HW limit? 3057 ! 3058 bge %ncc, .dcis2 3059 nop 3060 ! 3061 ! HW assist is on and we're large enough to use it. 3062 ! 3063 ba,pt %ncc, .big_copyin 3064 nop 3065 ! 3066 ! Housekeeping for copy loops. Uses same idea as in the byte 3067 ! for byte copy loop above. 3068 ! 3069.dcis2: 3070 add %o0, %o2, %o0 3071 add %o1, %o2, %o1 3072 sub %g0, %o2, %o3 3073 ba,pt %ncc, .didtbc 3074 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy 3075 ! 3076.small_copyin: 3077 ! 3078 ! Why are we doing this AGAIN? There are certain conditions in 3079 ! big copyin that will cause us to forgo the HW assisted copys 3080 ! and bounce back to a non-hw assisted copy. This dispatches 3081 ! those copies. Note that we branch around this in the main line 3082 ! code. 3083 ! 3084 ! We make no check for limits or HW enablement here. We've 3085 ! already been told that we're a poster child so just go off 3086 ! and do it. 3087 ! 3088 or %o0, %o1, %o3 3089 btst 1, %o3 3090 bnz %icc, .dcibcp ! Most likely 3091 btst 7, %o3 3092 bz %icc, .dcis8 3093 btst 3, %o3 3094 bz %icc, .dcis4 3095 nop 3096 ba,pt %ncc, .dcis2 3097 nop 3098 ! 3099 ! Eight byte aligned copies. A steal from the original .small_copyin 3100 ! with modifications. %o2 is number of 8 byte chunks to copy. When 3101 ! done, we examine %o3. If this is < 0, we have 1 - 7 bytes more 3102 ! to copy. 3103 ! 3104 .align 32 3105.didebc: 3106 ldxa [%o0 + %o3]ASI_USER, %o4 3107 deccc %o2 3108 stx %o4, [%o1 + %o3] 3109 bg,pt %ncc, .didebc 3110 addcc %o3, 8, %o3 3111 ! 3112 ! End of copy loop. Most 8 byte aligned copies end here. 3113 ! 3114 bz,pt %ncc, .dcifh 3115 nop 3116 ! 3117 ! Something is left. Do it byte for byte. 3118 ! 3119 ba,pt %ncc, .dcicl 3120 lduba [%o0 + %o3]ASI_USER, %o4 3121 ! 3122 ! 4 byte copy loop. %o2 is number of 4 byte chunks to copy. 3123 ! 3124 .align 32 3125.didfbc: 3126 lduwa [%o0 + %o3]ASI_USER, %o4 3127 deccc %o2 3128 st %o4, [%o1 + %o3] 3129 bg,pt %ncc, .didfbc 3130 addcc %o3, 4, %o3 3131 ! 3132 ! End of copy loop. Most 4 byte aligned copies end here. 3133 ! 3134 bz,pt %ncc, .dcifh 3135 nop 3136 ! 3137 ! Something is left. Do it byte for byte. 3138 ! 3139 ba,pt %ncc, .dcicl 3140 lduba [%o0 + %o3]ASI_USER, %o4 3141 ! 3142 ! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to 3143 ! copy. 3144 ! 3145 .align 32 3146.didtbc: 3147 lduha [%o0 + %o3]ASI_USER, %o4 3148 deccc %o2 3149 sth %o4, [%o1 + %o3] 3150 bg,pt %ncc, .didtbc 3151 addcc %o3, 2, %o3 3152 ! 3153 ! End of copy loop. Most 2 byte aligned copies end here. 3154 ! 3155 bz,pt %ncc, .dcifh 3156 nop 3157 ! 3158 ! Deal with the last byte 3159 ! 3160 lduba [%o0 + %o3]ASI_USER, %o4 3161 stb %o4, [%o1 + %o3] 3162.dcifh: 3163 membar #Sync 3164 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 3165 retl 3166 clr %o0 3167 3168.big_copyin: 3169 ! We're going off to do a block copy. 3170 ! Switch fault hendlers and grab a window. We 3171 ! don't do a membar #Sync since we've done only 3172 ! kernel data to this point. 3173 stn %o4, [THREAD_REG + T_LOFAULT] 3174 3175 ! Copy in that reach here are larger than 256 bytes. The 3176 ! hw_copy_limit_1 is set to 256. Never set this limit less 3177 ! 128 bytes. 3178#if !defined(NIAGARA_IMPL) 3179 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 3180 3181 rd %fprs, %o2 ! check for unused fp 3182 st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save %fprs 3183 btst FPRS_FEF, %o2 3184 bz,a,pt %icc, .do_blockcopyin 3185 wr %g0, FPRS_FEF, %fprs 3186 3187 ! save in-use fpregs on stack 3188 BST_FP_TOSTACK(%o2) 3189#else /* NIAGARA_IMPL */ 3190 save %sp, -SA(MINFRAME), %sp 3191#endif /* NIAGARA_IMPL */ 3192 3193.do_blockcopyin: 3194 3195#if !defined(NIAGARA_IMPL) 3196 rd %gsr, %o2 3197 stx %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr 3198 ! set the lower bit saved t_lofault to indicate that we need 3199 ! clear %fprs register on the way out 3200 or SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 3201#endif /* NIAGARA_IMPL */ 3202 3203 ! Swap src/dst since the code below is memcpy code 3204 ! and memcpy/bcopy have different calling sequences 3205 mov %i1, %i5 3206 mov %i0, %i1 3207 mov %i5, %i0 3208 3209 ! Block (64 bytes) align the destination. 3210 andcc %i0, 0x3f, %i3 ! is dst block aligned 3211 bz %ncc, copyin_blalign ! dst already block aligned 3212 sub %i3, 0x40, %i3 3213 neg %i3 ! bytes till dst 64 bytes aligned 3214 sub %i2, %i3, %i2 ! update i2 with new count 3215 3216 ! Based on source and destination alignment do 3217 ! either 8 bytes, 4 bytes, 2 bytes or byte copy. 3218 3219 ! Is dst & src 8B aligned 3220 or %i0, %i1, %o2 3221 andcc %o2, 0x7, %g0 3222 bz %ncc, .ci_alewdcp 3223 nop 3224 3225 ! Is dst & src 4B aligned 3226 andcc %o2, 0x3, %g0 3227 bz %ncc, .ci_alwdcp 3228 nop 3229 3230 ! Is dst & src 2B aligned 3231 andcc %o2, 0x1, %g0 3232 bz %ncc, .ci_alhlfwdcp 3233 nop 3234 3235 ! 1B aligned 32361: lduba [%i1]ASI_USER, %o2 3237 stb %o2, [%i0] 3238 inc %i1 3239 deccc %i3 3240 bgu,pt %ncc, 1b 3241 inc %i0 3242 3243 ba copyin_blalign 3244 nop 3245 3246 ! dst & src 4B aligned 3247.ci_alwdcp: 3248 lda [%i1]ASI_USER, %o2 3249 st %o2, [%i0] 3250 add %i1, 0x4, %i1 3251 subcc %i3, 0x4, %i3 3252 bgu,pt %ncc, .ci_alwdcp 3253 add %i0, 0x4, %i0 3254 3255 ba copyin_blalign 3256 nop 3257 3258 ! dst & src 2B aligned 3259.ci_alhlfwdcp: 3260 lduha [%i1]ASI_USER, %o2 3261 stuh %o2, [%i0] 3262 add %i1, 0x2, %i1 3263 subcc %i3, 0x2, %i3 3264 bgu,pt %ncc, .ci_alhlfwdcp 3265 add %i0, 0x2, %i0 3266 3267 ba copyin_blalign 3268 nop 3269 3270 ! dst & src 8B aligned 3271.ci_alewdcp: 3272 ldxa [%i1]ASI_USER, %o2 3273 stx %o2, [%i0] 3274 add %i1, 0x8, %i1 3275 subcc %i3, 0x8, %i3 3276 bgu,pt %ncc, .ci_alewdcp 3277 add %i0, 0x8, %i0 3278 3279copyin_blalign: 3280 andn %i2, 0x3f, %i3 ! %i3 count is multiple of block size 3281 sub %i2, %i3, %i2 ! Residue bytes in %i2 3282 3283#if !defined(NIAGARA_IMPL) 3284 mov ASI_USER, %asi 3285 3286 andn %i1, 0x3f, %l0 ! %l0 has block aligned src address 3287 prefetch [%l0+0x0], #one_read 3288 andcc %i1, 0x3f, %g0 ! is src 64B aligned 3289 bz,pn %ncc, .ci_blkcpy 3290 nop 3291 3292 ! handle misaligned source cases 3293 alignaddr %i1, %g0, %g0 ! generate %gsr 3294 3295 srl %i1, 0x3, %l1 ! src add bits 3, 4, 5 are now least 3296 ! significant in %l1 3297 andcc %l1, 0x7, %l2 ! mask everything except bits 1, 2, 3 3298 add %i1, %i3, %i1 3299 3300 ! switch statement to get to right 8 byte block within 3301 ! 64 byte block 3302 cmp %l2, 0x4 3303 bgeu,a ci_hlf 3304 cmp %l2, 0x6 3305 cmp %l2, 0x2 3306 bgeu,a ci_sqtr 3307 nop 3308 cmp %l2, 0x1 3309 be,a ci_off15 3310 nop 3311 ba ci_off7 3312 nop 3313ci_sqtr: 3314 be,a ci_off23 3315 nop 3316 ba,a ci_off31 3317 nop 3318 3319ci_hlf: 3320 bgeu,a ci_fqtr 3321 nop 3322 cmp %l2, 0x5 3323 be,a ci_off47 3324 nop 3325 ba ci_off39 3326 nop 3327ci_fqtr: 3328 be,a ci_off55 3329 nop 3330 3331 ldda [%l0+0x38]%asi, %d14 3332 prefetch [%l0+0x40], #one_read 3333 prefetch [%l0+0x80], #one_read 33347: 3335 add %l0, 0x40, %l0 3336 stxa %g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line 3337 3338 ldda [%l0]ASI_BLK_AIUS, %d16 3339 ALIGN_OFF_56_63 3340 fmovd %d30, %d14 3341 3342 stda %d48, [%i0]ASI_BLK_P 3343 subcc %i3, 0x40, %i3 3344 add %i0, 0x40, %i0 3345 bgu,pt %ncc, 7b 3346 prefetch [%l0+0x80], #one_read 3347 ba .ci_blkdone 3348 membar #Sync 3349 3350ci_off7: 3351 ldda [%l0]ASI_BLK_AIUS, %d0 3352 prefetch [%l0+0x40], #one_read 3353 prefetch [%l0+0x80], #one_read 33540: 3355 add %l0, 0x40, %l0 3356 stxa %g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line 3357 3358 ldda [%l0]ASI_BLK_AIUS, %d16 3359 ALIGN_OFF_1_7 3360 fmovd %d16, %d0 3361 fmovd %d18, %d2 3362 fmovd %d20, %d4 3363 fmovd %d22, %d6 3364 fmovd %d24, %d8 3365 fmovd %d26, %d10 3366 fmovd %d28, %d12 3367 fmovd %d30, %d14 3368 3369 stda %d48, [%i0]ASI_BLK_P 3370 subcc %i3, 0x40, %i3 3371 add %i0, 0x40, %i0 3372 bgu,pt %ncc, 0b 3373 prefetch [%l0+0x80], #one_read 3374 ba .ci_blkdone 3375 membar #Sync 3376 3377ci_off15: 3378 ldda [%l0+0x8]%asi, %d2 3379 ldda [%l0+0x10]%asi, %d4 3380 ldda [%l0+0x18]%asi, %d6 3381 ldda [%l0+0x20]%asi, %d8 3382 ldda [%l0+0x28]%asi, %d10 3383 ldda [%l0+0x30]%asi, %d12 3384 ldda [%l0+0x38]%asi, %d14 3385 prefetch [%l0+0x40], #one_read 3386 prefetch [%l0+0x80], #one_read 33871: 3388 add %l0, 0x40, %l0 3389 stxa %g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line 3390 3391 ldda [%l0]ASI_BLK_AIUS, %d16 3392 ALIGN_OFF_8_15 3393 fmovd %d18, %d2 3394 fmovd %d20, %d4 3395 fmovd %d22, %d6 3396 fmovd %d24, %d8 3397 fmovd %d26, %d10 3398 fmovd %d28, %d12 3399 fmovd %d30, %d14 3400 3401 stda %d48, [%i0]ASI_BLK_P 3402 subcc %i3, 0x40, %i3 3403 add %i0, 0x40, %i0 3404 bgu,pt %ncc, 1b 3405 prefetch [%l0+0x80], #one_read 3406 ba .ci_blkdone 3407 membar #Sync 3408 3409ci_off23: 3410 ldda [%l0+0x10]%asi, %d4 3411 ldda [%l0+0x18]%asi, %d6 3412 ldda [%l0+0x20]%asi, %d8 3413 ldda [%l0+0x28]%asi, %d10 3414 ldda [%l0+0x30]%asi, %d12 3415 ldda [%l0+0x38]%asi, %d14 3416 prefetch [%l0+0x40], #one_read 3417 prefetch [%l0+0x80], #one_read 34182: 3419 add %l0, 0x40, %l0 3420 stxa %g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line 3421 3422 ldda [%l0]ASI_BLK_AIUS, %d16 3423 ALIGN_OFF_16_23 3424 fmovd %d20, %d4 3425 fmovd %d22, %d6 3426 fmovd %d24, %d8 3427 fmovd %d26, %d10 3428 fmovd %d28, %d12 3429 fmovd %d30, %d14 3430 3431 stda %d48, [%i0]ASI_BLK_P 3432 subcc %i3, 0x40, %i3 3433 add %i0, 0x40, %i0 3434 bgu,pt %ncc, 2b 3435 prefetch [%l0+0x80], #one_read 3436 ba .ci_blkdone 3437 membar #Sync 3438 3439ci_off31: 3440 ldda [%l0+0x18]%asi, %d6 3441 ldda [%l0+0x20]%asi, %d8 3442 ldda [%l0+0x28]%asi, %d10 3443 ldda [%l0+0x30]%asi, %d12 3444 ldda [%l0+0x38]%asi, %d14 3445 prefetch [%l0+0x40], #one_read 3446 prefetch [%l0+0x80], #one_read 34473: 3448 add %l0, 0x40, %l0 3449 stxa %g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line 3450 3451 ldda [%l0]ASI_BLK_AIUS, %d16 3452 ALIGN_OFF_24_31 3453 fmovd %d22, %d6 3454 fmovd %d24, %d8 3455 fmovd %d26, %d10 3456 fmovd %d28, %d12 3457 fmovd %d30, %d14 3458 3459 stda %d48, [%i0]ASI_BLK_P 3460 subcc %i3, 0x40, %i3 3461 add %i0, 0x40, %i0 3462 bgu,pt %ncc, 3b 3463 prefetch [%l0+0x80], #one_read 3464 ba .ci_blkdone 3465 membar #Sync 3466 3467ci_off39: 3468 ldda [%l0+0x20]%asi, %d8 3469 ldda [%l0+0x28]%asi, %d10 3470 ldda [%l0+0x30]%asi, %d12 3471 ldda [%l0+0x38]%asi, %d14 3472 prefetch [%l0+0x40], #one_read 3473 prefetch [%l0+0x80], #one_read 34744: 3475 add %l0, 0x40, %l0 3476 stxa %g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line 3477 3478 ldda [%l0]ASI_BLK_AIUS, %d16 3479 ALIGN_OFF_32_39 3480 fmovd %d24, %d8 3481 fmovd %d26, %d10 3482 fmovd %d28, %d12 3483 fmovd %d30, %d14 3484 3485 stda %d48, [%i0]ASI_BLK_P 3486 subcc %i3, 0x40, %i3 3487 add %i0, 0x40, %i0 3488 bgu,pt %ncc, 4b 3489 prefetch [%l0+0x80], #one_read 3490 ba .ci_blkdone 3491 membar #Sync 3492 3493ci_off47: 3494 ldda [%l0+0x28]%asi, %d10 3495 ldda [%l0+0x30]%asi, %d12 3496 ldda [%l0+0x38]%asi, %d14 3497 prefetch [%l0+0x40], #one_read 3498 prefetch [%l0+0x80], #one_read 34995: 3500 add %l0, 0x40, %l0 3501 stxa %g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line 3502 3503 ldda [%l0]ASI_BLK_AIUS, %d16 3504 ALIGN_OFF_40_47 3505 fmovd %d26, %d10 3506 fmovd %d28, %d12 3507 fmovd %d30, %d14 3508 3509 stda %d48, [%i0]ASI_BLK_P 3510 subcc %i3, 0x40, %i3 3511 add %i0, 0x40, %i0 3512 bgu,pt %ncc, 5b 3513 prefetch [%l0+0x80], #one_read 3514 ba .ci_blkdone 3515 membar #Sync 3516 3517ci_off55: 3518 ldda [%l0+0x30]%asi, %d12 3519 ldda [%l0+0x38]%asi, %d14 3520 prefetch [%l0+0x40], #one_read 3521 prefetch [%l0+0x80], #one_read 35226: 3523 add %l0, 0x40, %l0 3524 stxa %g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line 3525 3526 ldda [%l0]ASI_BLK_AIUS, %d16 3527 ALIGN_OFF_48_55 3528 fmovd %d28, %d12 3529 fmovd %d30, %d14 3530 3531 stda %d48, [%i0]ASI_BLK_P 3532 subcc %i3, 0x40, %i3 3533 add %i0, 0x40, %i0 3534 bgu,pt %ncc, 6b 3535 prefetch [%l0+0x80], #one_read 3536 ba .ci_blkdone 3537 membar #Sync 3538 3539.ci_blkcpy: 3540 prefetch [%i1+0x40], #one_read 3541 prefetch [%i1+0x80], #one_read 35428: 3543 stxa %g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line 3544 ldda [%i1]ASI_BLK_AIUS, %d0 3545 stda %d0, [%i0]ASI_BLK_P 3546 3547 add %i1, 0x40, %i1 3548 subcc %i3, 0x40, %i3 3549 add %i0, 0x40, %i0 3550 bgu,pt %ncc, 8b 3551 prefetch [%i1+0x80], #one_read 3552 membar #Sync 3553 3554.ci_blkdone: 3555#else /* NIAGARA_IMPL */ 3556 mov ASI_BLK_INIT_ST_QUAD_LDD_P, %asi 3557 3558 andcc %i1, 0xf, %o2 ! is src quadword aligned 3559 bz,pn %xcc, .ci_blkcpy ! src offset in %o2 (last 4-bits) 3560 nop 3561 cmp %o2, 0x8 3562 bg .ci_upper_double 3563 nop 3564 bl .ci_lower_double 3565 nop 3566 3567 ! Falls through when source offset is equal to 8 i.e. 3568 ! source is double word aligned. 3569 ! In this case no shift/merge of data is required 3570 3571 sub %i1, %o2, %i1 ! align the src at 16 bytes. 3572 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 3573 prefetch [%l0+0x0], #one_read 3574 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 3575.ci_loop0: 3576 add %i1, 0x10, %i1 3577 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 3578 3579 prefetch [%l0+0x40], #one_read 3580 3581 stxa %l3, [%i0+0x0]%asi 3582 stxa %l4, [%i0+0x8]%asi 3583 3584 add %i1, 0x10, %i1 3585 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 3586 3587 stxa %l5, [%i0+0x10]%asi 3588 stxa %l2, [%i0+0x18]%asi 3589 3590 add %i1, 0x10, %i1 3591 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 3592 3593 stxa %l3, [%i0+0x20]%asi 3594 stxa %l4, [%i0+0x28]%asi 3595 3596 add %i1, 0x10, %i1 3597 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 3598 3599 stxa %l5, [%i0+0x30]%asi 3600 stxa %l2, [%i0+0x38]%asi 3601 3602 add %l0, 0x40, %l0 3603 subcc %i3, 0x40, %i3 3604 bgu,pt %xcc, .ci_loop0 3605 add %i0, 0x40, %i0 3606 ba .ci_blkdone 3607 add %i1, %o2, %i1 ! increment the source by src offset 3608 ! the src offset was stored in %o2 3609 3610.ci_lower_double: 3611 3612 sub %i1, %o2, %i1 ! align the src at 16 bytes. 3613 sll %o2, 3, %o0 ! %o0 left shift 3614 mov 0x40, %o1 3615 sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 3616 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 3617 prefetch [%l0+0x0], #one_read 3618 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 ! partial data in %l2 3619 ! and %l3 has complete 3620 ! data 3621.ci_loop1: 3622 add %i1, 0x10, %i1 3623 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 ! %l4 has partial data 3624 ! for this read. 3625 ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) ! merge %l2, %l3 and %l4 3626 ! into %l2 and %l3 3627 3628 prefetch [%l0+0x40], #one_read 3629 3630 stxa %l2, [%i0+0x0]%asi 3631 stxa %l3, [%i0+0x8]%asi 3632 3633 add %i1, 0x10, %i1 3634 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 3635 ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) ! merge %l2 with %l5 and 3636 ! %l4 from previous read 3637 ! into %l4 and %l5 3638 stxa %l4, [%i0+0x10]%asi 3639 stxa %l5, [%i0+0x18]%asi 3640 3641 ! Repeat the same for next 32 bytes. 3642 3643 add %i1, 0x10, %i1 3644 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 3645 ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) 3646 3647 stxa %l2, [%i0+0x20]%asi 3648 stxa %l3, [%i0+0x28]%asi 3649 3650 add %i1, 0x10, %i1 3651 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 3652 ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) 3653 3654 stxa %l4, [%i0+0x30]%asi 3655 stxa %l5, [%i0+0x38]%asi 3656 3657 add %l0, 0x40, %l0 3658 subcc %i3, 0x40, %i3 3659 bgu,pt %xcc, .ci_loop1 3660 add %i0, 0x40, %i0 3661 ba .ci_blkdone 3662 add %i1, %o2, %i1 ! increment the source by src offset 3663 ! the src offset was stored in %o2 3664 3665.ci_upper_double: 3666 3667 sub %i1, %o2, %i1 ! align the src at 16 bytes. 3668 sub %o2, 0x8, %o0 3669 sll %o0, 3, %o0 ! %o0 left shift 3670 mov 0x40, %o1 3671 sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 3672 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 3673 prefetch [%l0+0x0], #one_read 3674 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 ! partial data in %l3 3675 ! for this read and 3676 ! no data in %l2 3677.ci_loop2: 3678 add %i1, 0x10, %i1 3679 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 ! %l4 has complete data 3680 ! and %l5 has partial 3681 ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) ! merge %l3, %l4 and %l5 3682 ! into %l3 and %l4 3683 prefetch [%l0+0x40], #one_read 3684 3685 stxa %l3, [%i0+0x0]%asi 3686 stxa %l4, [%i0+0x8]%asi 3687 3688 add %i1, 0x10, %i1 3689 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 3690 ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) ! merge %l2 and %l3 with 3691 ! %l5 from previous read 3692 ! into %l5 and %l2 3693 3694 stxa %l5, [%i0+0x10]%asi 3695 stxa %l2, [%i0+0x18]%asi 3696 3697 ! Repeat the same for next 32 bytes. 3698 3699 add %i1, 0x10, %i1 3700 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 3701 ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) 3702 3703 stxa %l3, [%i0+0x20]%asi 3704 stxa %l4, [%i0+0x28]%asi 3705 3706 add %i1, 0x10, %i1 3707 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 3708 ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) 3709 3710 stxa %l5, [%i0+0x30]%asi 3711 stxa %l2, [%i0+0x38]%asi 3712 3713 add %l0, 0x40, %l0 3714 subcc %i3, 0x40, %i3 3715 bgu,pt %xcc, .ci_loop2 3716 add %i0, 0x40, %i0 3717 ba .ci_blkdone 3718 add %i1, %o2, %i1 ! increment the source by src offset 3719 ! the src offset was stored in %o2 3720 3721 3722 ! Do fast copy using ASI_BLK_INIT_ST_QUAD_LDD_P 3723.ci_blkcpy: 3724 3725 andn %i1, 0x3f, %o0 ! %o0 has block aligned source 3726 prefetch [%o0+0x0], #one_read 37271: 3728 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l0 3729 add %i1, 0x10, %i1 3730 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 3731 add %i1, 0x10, %i1 3732 3733 prefetch [%o0+0x40], #one_read 3734 3735 stxa %l0, [%i0+0x0]%asi 3736 3737 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 3738 add %i1, 0x10, %i1 3739 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l6 3740 add %i1, 0x10, %i1 3741 3742 stxa %l1, [%i0+0x8]%asi 3743 stxa %l2, [%i0+0x10]%asi 3744 stxa %l3, [%i0+0x18]%asi 3745 stxa %l4, [%i0+0x20]%asi 3746 stxa %l5, [%i0+0x28]%asi 3747 stxa %l6, [%i0+0x30]%asi 3748 stxa %l7, [%i0+0x38]%asi 3749 3750 add %o0, 0x40, %o0 3751 subcc %i3, 0x40, %i3 3752 bgu,pt %xcc, 1b 3753 add %i0, 0x40, %i0 3754 3755.ci_blkdone: 3756 membar #Sync 3757#endif /* NIAGARA_IMPL */ 3758 3759 brz,pt %i2, .copyin_exit 3760 nop 3761 3762 ! Handle trailing bytes 3763 cmp %i2, 0x8 3764 blu,pt %ncc, .ci_residue 3765 nop 3766 3767 ! Can we do some 8B ops 3768 or %i1, %i0, %o2 3769 andcc %o2, 0x7, %g0 3770 bnz %ncc, .ci_last4 3771 nop 3772 3773 ! Do 8byte ops as long as possible 3774.ci_last8: 3775 ldxa [%i1]ASI_USER, %o2 3776 stx %o2, [%i0] 3777 add %i1, 0x8, %i1 3778 sub %i2, 0x8, %i2 3779 cmp %i2, 0x8 3780 bgu,pt %ncc, .ci_last8 3781 add %i0, 0x8, %i0 3782 3783 brz,pt %i2, .copyin_exit 3784 nop 3785 3786 ba .ci_residue 3787 nop 3788 3789.ci_last4: 3790 ! Can we do 4B ops 3791 andcc %o2, 0x3, %g0 3792 bnz %ncc, .ci_last2 3793 nop 37941: 3795 lda [%i1]ASI_USER, %o2 3796 st %o2, [%i0] 3797 add %i1, 0x4, %i1 3798 sub %i2, 0x4, %i2 3799 cmp %i2, 0x4 3800 bgu,pt %ncc, 1b 3801 add %i0, 0x4, %i0 3802 3803 brz,pt %i2, .copyin_exit 3804 nop 3805 3806 ba .ci_residue 3807 nop 3808 3809.ci_last2: 3810 ! Can we do 2B ops 3811 andcc %o2, 0x1, %g0 3812 bnz %ncc, .ci_residue 3813 nop 3814 38151: 3816 lduha [%i1]ASI_USER, %o2 3817 stuh %o2, [%i0] 3818 add %i1, 0x2, %i1 3819 sub %i2, 0x2, %i2 3820 cmp %i2, 0x2 3821 bgu,pt %ncc, 1b 3822 add %i0, 0x2, %i0 3823 3824 brz,pt %i2, .copyin_exit 3825 nop 3826 3827 ! Copy the residue as byte copy 3828.ci_residue: 3829 lduba [%i1]ASI_USER, %i4 3830 stb %i4, [%i0] 3831 inc %i1 3832 deccc %i2 3833 bgu,pt %xcc, .ci_residue 3834 inc %i0 3835 3836.copyin_exit: 3837#if !defined(NIAGARA_IMPL) 3838 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 3839 wr %o2, 0, %gsr ! restore gsr 3840 3841 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 3842 btst FPRS_FEF, %o3 3843 bz %icc, 4f 3844 nop 3845 3846 ! restore fpregs from stack 3847 BLD_FP_FROMSTACK(%o2) 3848 3849 ba,pt %ncc, 2f 3850 wr %o3, 0, %fprs ! restore fprs 3851 38524: 3853 FZERO ! zero all of the fpregs 3854 wr %o3, 0, %fprs ! restore fprs 3855 38562: 3857 membar #Sync ! sync error barrier 3858 andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 3859#else /* NIAGARA_IMPL */ 3860 membar #Sync 3861#endif /* NIAGARA_IMPL */ 3862 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 3863 ret 3864 restore %g0, 0, %o0 3865.copyin_err: 3866 ldn [THREAD_REG + T_COPYOPS], %o4 3867 brz %o4, 2f 3868 nop 3869 ldn [%o4 + CP_COPYIN], %g2 3870 jmp %g2 3871 nop 38722: 3873 retl 3874 mov -1, %o0 3875 SET_SIZE(copyin) 3876 3877#endif /* lint */ 3878 3879#ifdef lint 3880 3881/*ARGSUSED*/ 3882int 3883xcopyin(const void *uaddr, void *kaddr, size_t count) 3884{ return (0); } 3885 3886#else /* lint */ 3887 3888 ENTRY(xcopyin) 3889 sethi %hi(.xcopyin_err), REAL_LOFAULT 3890 b .do_copyin 3891 or REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT 3892.xcopyin_err: 3893 ldn [THREAD_REG + T_COPYOPS], %o4 3894 brz %o4, 2f 3895 nop 3896 ldn [%o4 + CP_XCOPYIN], %g2 3897 jmp %g2 3898 nop 38992: 3900 retl 3901 mov %g1, %o0 3902 SET_SIZE(xcopyin) 3903 3904#endif /* lint */ 3905 3906#ifdef lint 3907 3908/*ARGSUSED*/ 3909int 3910xcopyin_little(const void *uaddr, void *kaddr, size_t count) 3911{ return (0); } 3912 3913#else /* lint */ 3914 3915 ENTRY(xcopyin_little) 3916 sethi %hi(.little_err), %o4 3917 ldn [THREAD_REG + T_LOFAULT], %o5 3918 or %o4, %lo(.little_err), %o4 3919 membar #Sync ! sync error barrier 3920 stn %o4, [THREAD_REG + T_LOFAULT] 3921 3922 subcc %g0, %o2, %o3 3923 add %o0, %o2, %o0 3924 bz,pn %ncc, 2f ! check for zero bytes 3925 sub %o2, 1, %o4 3926 add %o0, %o4, %o0 ! start w/last byte 3927 add %o1, %o2, %o1 3928 lduba [%o0+%o3]ASI_AIUSL, %o4 3929 39301: stb %o4, [%o1+%o3] 3931 inccc %o3 3932 sub %o0, 2, %o0 ! get next byte 3933 bcc,a,pt %ncc, 1b 3934 lduba [%o0+%o3]ASI_AIUSL, %o4 3935 39362: membar #Sync ! sync error barrier 3937 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 3938 retl 3939 mov %g0, %o0 ! return (0) 3940 3941.little_err: 3942 membar #Sync ! sync error barrier 3943 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 3944 retl 3945 mov %g1, %o0 3946 SET_SIZE(xcopyin_little) 3947 3948#endif /* lint */ 3949 3950 3951/* 3952 * Copy a block of storage - must not overlap (from + len <= to). 3953 * No fault handler installed (to be called under on_fault()) 3954 */ 3955#if defined(lint) 3956 3957/* ARGSUSED */ 3958void 3959copyin_noerr(const void *ufrom, void *kto, size_t count) 3960{} 3961 3962#else /* lint */ 3963 3964 ENTRY(copyin_noerr) 3965 sethi %hi(.copyio_noerr), REAL_LOFAULT 3966 b .do_copyin 3967 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 3968.copyio_noerr: 3969 jmp SAVED_LOFAULT 3970 nop 3971 SET_SIZE(copyin_noerr) 3972 3973#endif /* lint */ 3974 3975/* 3976 * Copy a block of storage - must not overlap (from + len <= to). 3977 * No fault handler installed (to be called under on_fault()) 3978 */ 3979 3980#if defined(lint) 3981 3982/* ARGSUSED */ 3983void 3984copyout_noerr(const void *kfrom, void *uto, size_t count) 3985{} 3986 3987#else /* lint */ 3988 3989 ENTRY(copyout_noerr) 3990 sethi %hi(.copyio_noerr), REAL_LOFAULT 3991 b .do_copyout 3992 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 3993 SET_SIZE(copyout_noerr) 3994 3995#endif /* lint */ 3996 3997#if defined(lint) 3998 3999int use_hw_bcopy = 1; 4000int use_hw_bzero = 1; 4001uint_t hw_copy_limit_1 = 0x100; 4002uint_t hw_copy_limit_2 = 0x200; 4003uint_t hw_copy_limit_4 = 0x400; 4004uint_t hw_copy_limit_8 = 0x400; 4005 4006#else /* !lint */ 4007 4008 .align 4 4009 DGDEF(use_hw_bcopy) 4010 .word 1 4011 DGDEF(use_hw_bzero) 4012 .word 1 4013 DGDEF(hw_copy_limit_1) 4014 .word 0x100 4015 DGDEF(hw_copy_limit_2) 4016 .word 0x200 4017 DGDEF(hw_copy_limit_4) 4018 .word 0x400 4019 DGDEF(hw_copy_limit_8) 4020 .word 0x400 4021 4022 .align 64 4023 .section ".text" 4024#endif /* !lint */ 4025 4026/* 4027 * hwblkclr - clears block-aligned, block-multiple-sized regions that are 4028 * longer than 256 bytes in length using Niagara's block stores/quad store. 4029 * If the criteria for using this routine are not met then it calls bzero 4030 * and returns 1. Otherwise 0 is returned indicating success. 4031 * Caller is responsible for ensuring use_hw_bzero is true and that 4032 * kpreempt_disable() has been called. 4033 */ 4034#ifdef lint 4035/*ARGSUSED*/ 4036int 4037hwblkclr(void *addr, size_t len) 4038{ 4039 return(0); 4040} 4041#else /* lint */ 4042 ! %i0 - start address 4043 ! %i1 - length of region (multiple of 64) 4044 4045 ENTRY(hwblkclr) 4046 save %sp, -SA(MINFRAME), %sp 4047 4048 ! Must be block-aligned 4049 andcc %i0, 0x3f, %g0 4050 bnz,pn %ncc, 1f 4051 nop 4052 4053 ! ... and must be 256 bytes or more 4054 cmp %i1, 0x100 4055 blu,pn %ncc, 1f 4056 nop 4057 4058 ! ... and length must be a multiple of 64 4059 andcc %i1, 0x3f, %g0 4060 bz,pn %ncc, .pz_doblock 4061 mov ASI_BLK_INIT_ST_QUAD_LDD_P, %asi 4062 40631: ! punt, call bzero but notify the caller that bzero was used 4064 mov %i0, %o0 4065 call bzero 4066 mov %i1, %o1 4067 ret 4068 restore %g0, 1, %o0 ! return (1) - did not use block operations 4069 4070 ! Already verified that there are at least 256 bytes to set 4071.pz_doblock: 4072 stxa %g0, [%i0+0x0]%asi 4073 stxa %g0, [%i0+0x40]%asi 4074 stxa %g0, [%i0+0x80]%asi 4075 stxa %g0, [%i0+0xc0]%asi 4076 4077 stxa %g0, [%i0+0x8]%asi 4078 stxa %g0, [%i0+0x10]%asi 4079 stxa %g0, [%i0+0x18]%asi 4080 stxa %g0, [%i0+0x20]%asi 4081 stxa %g0, [%i0+0x28]%asi 4082 stxa %g0, [%i0+0x30]%asi 4083 stxa %g0, [%i0+0x38]%asi 4084 4085 stxa %g0, [%i0+0x48]%asi 4086 stxa %g0, [%i0+0x50]%asi 4087 stxa %g0, [%i0+0x58]%asi 4088 stxa %g0, [%i0+0x60]%asi 4089 stxa %g0, [%i0+0x68]%asi 4090 stxa %g0, [%i0+0x70]%asi 4091 stxa %g0, [%i0+0x78]%asi 4092 4093 stxa %g0, [%i0+0x88]%asi 4094 stxa %g0, [%i0+0x90]%asi 4095 stxa %g0, [%i0+0x98]%asi 4096 stxa %g0, [%i0+0xa0]%asi 4097 stxa %g0, [%i0+0xa8]%asi 4098 stxa %g0, [%i0+0xb0]%asi 4099 stxa %g0, [%i0+0xb8]%asi 4100 4101 stxa %g0, [%i0+0xc8]%asi 4102 stxa %g0, [%i0+0xd0]%asi 4103 stxa %g0, [%i0+0xd8]%asi 4104 stxa %g0, [%i0+0xe0]%asi 4105 stxa %g0, [%i0+0xe8]%asi 4106 stxa %g0, [%i0+0xf0]%asi 4107 stxa %g0, [%i0+0xf8]%asi 4108 4109 sub %i1, 0x100, %i1 4110 cmp %i1, 0x100 4111 bgu,pt %ncc, .pz_doblock 4112 add %i0, 0x100, %i0 4113 41142: 4115 ! Check if more than 64 bytes to set 4116 cmp %i1,0x40 4117 blu %ncc, .pz_finish 4118 nop 4119 41203: 4121 stxa %g0, [%i0+0x0]%asi 4122 stxa %g0, [%i0+0x8]%asi 4123 stxa %g0, [%i0+0x10]%asi 4124 stxa %g0, [%i0+0x18]%asi 4125 stxa %g0, [%i0+0x20]%asi 4126 stxa %g0, [%i0+0x28]%asi 4127 stxa %g0, [%i0+0x30]%asi 4128 stxa %g0, [%i0+0x38]%asi 4129 4130 subcc %i1, 0x40, %i1 4131 bgu,pt %ncc, 3b 4132 add %i0, 0x40, %i0 4133 4134.pz_finish: 4135 membar #Sync 4136 ret 4137 restore %g0, 0, %o0 ! return (bzero or not) 4138 SET_SIZE(hwblkclr) 4139#endif /* lint */ 4140 4141#ifdef lint 4142/* Copy 32 bytes of data from src to dst using physical addresses */ 4143/*ARGSUSED*/ 4144void 4145hw_pa_bcopy32(uint64_t src, uint64_t dst) 4146{} 4147#else /*!lint */ 4148 4149 /* 4150 * Copy 32 bytes of data from src (%o0) to dst (%o1) 4151 * using physical addresses. 4152 */ 4153 ENTRY_NP(hw_pa_bcopy32) 4154 rdpr %pstate, %g1 4155 andn %g1, PSTATE_IE, %g2 4156 wrpr %g0, %g2, %pstate 4157 4158 ldxa [%o0]ASI_MEM, %o2 4159 add %o0, 8, %o0 4160 ldxa [%o0]ASI_MEM, %o3 4161 add %o0, 8, %o0 4162 ldxa [%o0]ASI_MEM, %o4 4163 add %o0, 8, %o0 4164 ldxa [%o0]ASI_MEM, %o5 4165 stxa %o2, [%o1]ASI_MEM 4166 add %o1, 8, %o1 4167 stxa %o3, [%o1]ASI_MEM 4168 add %o1, 8, %o1 4169 stxa %o4, [%o1]ASI_MEM 4170 add %o1, 8, %o1 4171 stxa %o5, [%o1]ASI_MEM 4172 4173 membar #Sync 4174 retl 4175 wrpr %g0, %g1, %pstate 4176 SET_SIZE(hw_pa_bcopy32) 4177#endif /* lint */ 4178 4179/* 4180 * Zero a block of storage. 4181 * 4182 * uzero is used by the kernel to zero a block in user address space. 4183 */ 4184 4185/* 4186 * Control flow of the bzero/kzero/uzero routine. 4187 * 4188 * For fewer than 7 bytes stores, bytes will be zeroed. 4189 * 4190 * For less than 15 bytes stores, align the address on 4 byte boundary. 4191 * Then store as many 4-byte chunks, followed by trailing bytes. 4192 * 4193 * For sizes greater than 15 bytes, align the address on 8 byte boundary. 4194 * if (count > 128) { 4195 * store as many 8-bytes chunks to block align the address 4196 * store using ASI_BLK_INIT_ST_QUAD_LDD_P (bzero/kzero) OR 4197 * store using ASI_BLK_INIT_QUAD_LDD_AIUS (uzero) 4198 * } 4199 * Store as many 8-byte chunks, followed by trailing bytes. 4200 */ 4201 4202#if defined(lint) 4203 4204/* ARGSUSED */ 4205int 4206kzero(void *addr, size_t count) 4207{ return(0); } 4208 4209/* ARGSUSED */ 4210void 4211uzero(void *addr, size_t count) 4212{} 4213 4214#else /* lint */ 4215 4216 ENTRY(uzero) 4217 ! 4218 ! Set a new lo_fault handler only if we came in with one 4219 ! already specified. 4220 ! 4221 wr %g0, ASI_USER, %asi 4222 ldn [THREAD_REG + T_LOFAULT], %o5 4223 tst %o5 4224 bz,pt %ncc, .do_zero 4225 sethi %hi(.zeroerr), %o2 4226 or %o2, %lo(.zeroerr), %o2 4227 membar #Sync 4228 ba,pt %ncc, .do_zero 4229 stn %o2, [THREAD_REG + T_LOFAULT] 4230 4231 ENTRY(kzero) 4232 ! 4233 ! Always set a lo_fault handler 4234 ! 4235 wr %g0, ASI_P, %asi 4236 ldn [THREAD_REG + T_LOFAULT], %o5 4237 sethi %hi(.zeroerr), %o2 4238 or %o5, LOFAULT_SET, %o5 4239 or %o2, %lo(.zeroerr), %o2 4240 membar #Sync 4241 ba,pt %ncc, .do_zero 4242 stn %o2, [THREAD_REG + T_LOFAULT] 4243 4244/* 4245 * We got here because of a fault during kzero or if 4246 * uzero or bzero was called with t_lofault non-zero. 4247 * Otherwise we've already run screaming from the room. 4248 * Errno value is in %g1. Note that we're here iff 4249 * we did set t_lofault. 4250 */ 4251.zeroerr: 4252 ! 4253 ! Undo asi register setting. Just set it to be the 4254 ! kernel default without checking. 4255 ! 4256 wr %g0, ASI_P, %asi 4257 4258 ! 4259 ! We did set t_lofault. It may well have been zero coming in. 4260 ! 42611: 4262 tst %o5 4263 membar #Sync 4264 bne,pn %ncc, 3f 4265 andncc %o5, LOFAULT_SET, %o5 42662: 4267 ! 4268 ! Old handler was zero. Just return the error. 4269 ! 4270 retl ! return 4271 mov %g1, %o0 ! error code from %g1 42723: 4273 ! 4274 ! We're here because %o5 was non-zero. It was non-zero 4275 ! because either LOFAULT_SET was present, a previous fault 4276 ! handler was present or both. In all cases we need to reset 4277 ! T_LOFAULT to the value of %o5 after clearing LOFAULT_SET 4278 ! before we either simply return the error or we invoke the 4279 ! previously specified handler. 4280 ! 4281 be %ncc, 2b 4282 stn %o5, [THREAD_REG + T_LOFAULT] 4283 jmp %o5 ! goto real handler 4284 nop 4285 SET_SIZE(kzero) 4286 SET_SIZE(uzero) 4287 4288#endif /* lint */ 4289 4290/* 4291 * Zero a block of storage. 4292 */ 4293 4294#if defined(lint) 4295 4296/* ARGSUSED */ 4297void 4298bzero(void *addr, size_t count) 4299{} 4300 4301#else /* lint */ 4302 4303 ENTRY(bzero) 4304 wr %g0, ASI_P, %asi 4305 4306 ldn [THREAD_REG + T_LOFAULT], %o5 ! save old vector 4307 tst %o5 4308 bz,pt %ncc, .do_zero 4309 sethi %hi(.zeroerr), %o2 4310 or %o2, %lo(.zeroerr), %o2 4311 membar #Sync ! sync error barrier 4312 stn %o2, [THREAD_REG + T_LOFAULT] ! install new vector 4313 4314.do_zero: 4315 cmp %o1, 7 4316 blu,pn %ncc, .byteclr 4317 nop 4318 4319 cmp %o1, 15 4320 blu,pn %ncc, .wdalign 4321 nop 4322 4323 andcc %o0, 7, %o3 ! is add aligned on a 8 byte bound 4324 bz,pt %ncc, .blkalign ! already double aligned 4325 sub %o3, 8, %o3 ! -(bytes till double aligned) 4326 add %o1, %o3, %o1 ! update o1 with new count 4327 43281: 4329 stba %g0, [%o0]%asi 4330 inccc %o3 4331 bl,pt %ncc, 1b 4332 inc %o0 4333 4334 ! Now address is double aligned 4335.blkalign: 4336 cmp %o1, 0x80 ! check if there are 128 bytes to set 4337 blu,pn %ncc, .bzero_small 4338 mov %o1, %o3 4339 4340 sethi %hi(use_hw_bzero), %o2 4341 ld [%o2 + %lo(use_hw_bzero)], %o2 4342 tst %o2 4343 bz %ncc, .bzero_small 4344 mov %o1, %o3 4345 4346 rd %asi, %o3 4347 wr %g0, ASI_BLK_INIT_ST_QUAD_LDD_P, %asi 4348 cmp %o3, ASI_P 4349 bne,a %ncc, .algnblk 4350 wr %g0, ASI_BLK_INIT_QUAD_LDD_AIUS, %asi 4351 4352.algnblk: 4353 andcc %o0, 0x3f, %o3 ! is block aligned? 4354 bz,pt %ncc, .bzero_blk 4355 sub %o3, 0x40, %o3 ! -(bytes till block aligned) 4356 add %o1, %o3, %o1 ! o1 is the remainder 4357 4358 ! Clear -(%o3) bytes till block aligned 43591: 4360 stxa %g0, [%o0]%asi 4361 addcc %o3, 8, %o3 4362 bl,pt %ncc, 1b 4363 add %o0, 8, %o0 4364 4365.bzero_blk: 4366 and %o1, 0x3f, %o3 ! calc bytes left after blk clear 4367 andn %o1, 0x3f, %o4 ! calc size of blocks in bytes 4368 4369 cmp %o4, 0x100 ! 256 bytes or more 4370 blu,pn %ncc, 3f 4371 nop 4372 43732: 4374 stxa %g0, [%o0+0x0]%asi 4375 stxa %g0, [%o0+0x40]%asi 4376 stxa %g0, [%o0+0x80]%asi 4377 stxa %g0, [%o0+0xc0]%asi 4378 4379 stxa %g0, [%o0+0x8]%asi 4380 stxa %g0, [%o0+0x10]%asi 4381 stxa %g0, [%o0+0x18]%asi 4382 stxa %g0, [%o0+0x20]%asi 4383 stxa %g0, [%o0+0x28]%asi 4384 stxa %g0, [%o0+0x30]%asi 4385 stxa %g0, [%o0+0x38]%asi 4386 4387 stxa %g0, [%o0+0x48]%asi 4388 stxa %g0, [%o0+0x50]%asi 4389 stxa %g0, [%o0+0x58]%asi 4390 stxa %g0, [%o0+0x60]%asi 4391 stxa %g0, [%o0+0x68]%asi 4392 stxa %g0, [%o0+0x70]%asi 4393 stxa %g0, [%o0+0x78]%asi 4394 4395 stxa %g0, [%o0+0x88]%asi 4396 stxa %g0, [%o0+0x90]%asi 4397 stxa %g0, [%o0+0x98]%asi 4398 stxa %g0, [%o0+0xa0]%asi 4399 stxa %g0, [%o0+0xa8]%asi 4400 stxa %g0, [%o0+0xb0]%asi 4401 stxa %g0, [%o0+0xb8]%asi 4402 4403 stxa %g0, [%o0+0xc8]%asi 4404 stxa %g0, [%o0+0xd0]%asi 4405 stxa %g0, [%o0+0xd8]%asi 4406 stxa %g0, [%o0+0xe0]%asi 4407 stxa %g0, [%o0+0xe8]%asi 4408 stxa %g0, [%o0+0xf0]%asi 4409 stxa %g0, [%o0+0xf8]%asi 4410 4411 sub %o4, 0x100, %o4 4412 cmp %o4, 0x100 4413 bgu,pt %ncc, 2b 4414 add %o0, 0x100, %o0 4415 44163: 4417 ! ... check if 64 bytes to set 4418 cmp %o4, 0x40 4419 blu %ncc, .bzero_blk_done 4420 nop 4421 44224: 4423 stxa %g0, [%o0+0x0]%asi 4424 stxa %g0, [%o0+0x8]%asi 4425 stxa %g0, [%o0+0x10]%asi 4426 stxa %g0, [%o0+0x18]%asi 4427 stxa %g0, [%o0+0x20]%asi 4428 stxa %g0, [%o0+0x28]%asi 4429 stxa %g0, [%o0+0x30]%asi 4430 stxa %g0, [%o0+0x38]%asi 4431 4432 subcc %o4, 0x40, %o4 4433 bgu,pt %ncc, 3b 4434 add %o0, 0x40, %o0 4435 4436.bzero_blk_done: 4437 membar #Sync 4438 ! 4439 ! Undo asi register setting. 4440 ! 4441 rd %asi, %o4 4442 wr %g0, ASI_P, %asi 4443 cmp %o4, ASI_BLK_INIT_ST_QUAD_LDD_P 4444 bne,a %ncc, .bzero_small 4445 wr %g0, ASI_USER, %asi 4446 4447.bzero_small: 4448 ! Set the remaining doubles 4449 subcc %o3, 8, %o3 ! Can we store any doubles? 4450 blu,pn %ncc, .byteclr 4451 and %o1, 7, %o1 ! calc bytes left after doubles 4452 4453.dbclr: 4454 stxa %g0, [%o0]%asi ! Clear the doubles 4455 subcc %o3, 8, %o3 4456 bgeu,pt %ncc, .dbclr 4457 add %o0, 8, %o0 4458 4459 ba .byteclr 4460 nop 4461 4462.wdalign: 4463 andcc %o0, 3, %o3 ! is add aligned on a word boundary 4464 bz,pn %ncc, .wdclr 4465 andn %o1, 3, %o3 ! create word sized count in %o3 4466 4467 dec %o1 ! decrement count 4468 stba %g0, [%o0]%asi ! clear a byte 4469 ba .wdalign 4470 inc %o0 ! next byte 4471 4472.wdclr: 4473 sta %g0, [%o0]%asi ! 4-byte clearing loop 4474 subcc %o3, 4, %o3 4475 bnz,pt %ncc, .wdclr 4476 inc 4, %o0 4477 4478 and %o1, 3, %o1 ! leftover count, if any 4479 4480.byteclr: 4481 ! Set the leftover bytes 4482 brz %o1, .bzero_exit 4483 nop 4484 44857: 4486 deccc %o1 ! byte clearing loop 4487 stba %g0, [%o0]%asi 4488 bgu,pt %ncc, 7b 4489 inc %o0 4490 4491.bzero_exit: 4492 ! 4493 ! We're just concerned with whether t_lofault was set 4494 ! when we came in. We end up here from either kzero() 4495 ! or bzero(). kzero() *always* sets a lofault handler. 4496 ! It ors LOFAULT_SET into %o5 to indicate it has done 4497 ! this even if the value of %o5 is otherwise zero. 4498 ! bzero() sets a lofault handler *only* if one was 4499 ! previously set. Accordingly we need to examine 4500 ! %o5 and if it is non-zero be sure to clear LOFAULT_SET 4501 ! before resetting the error handler. 4502 ! 4503 tst %o5 4504 bz %ncc, 1f 4505 andn %o5, LOFAULT_SET, %o5 4506 membar #Sync ! sync error barrier 4507 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 45081: 4509 retl 4510 clr %o0 ! return (0) 4511 4512 SET_SIZE(bzero) 4513#endif /* lint */ 4514