1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26#pragma ident "%Z%%M% %I% %E% SMI" 27 28#include <sys/param.h> 29#include <sys/errno.h> 30#include <sys/asm_linkage.h> 31#include <sys/vtrace.h> 32#include <sys/machthread.h> 33#include <sys/clock.h> 34#include <sys/asi.h> 35#include <sys/fsr.h> 36#include <sys/privregs.h> 37#include <sys/machasi.h> 38#include <sys/niagaraasi.h> 39 40#if !defined(lint) 41#include "assym.h" 42#endif /* lint */ 43 44 45/* 46 * Pseudo-code to aid in understanding the control flow of the 47 * bcopy/kcopy routine. 48 * 49 * ! WARNING : <Register usage convention> 50 * ! In kcopy() the %o5, holds previous error handler and a flag 51 * ! LOFAULT_SET (low bits). The %o5 is null in bcopy(). 52 * ! The %o5 is not available for any other use. 53 * 54 * kcopy(): 55 * %o5 = curthread->t_lofault; ! save existing handler in %o5 56 * %o5 |= LOFAULT_SET; ! ORed with LOFAULT_SET flag 57 * curthread->t_lofault = .copyerr; 58 * Call bcopy(); 59 * 60 * bcopy(): 61 * if (length < 128) 62 * goto regular_copy; 63 * 64 * if (!use_hw_bcopy) 65 * goto regular_copy; 66 * 67 * blockcopy; 68 * restore t_lofault handler if came from kcopy(); 69 * 70 * regular_copy; 71 * restore t_lofault handler if came from kcopy(); 72 * 73 * In lofault handler: 74 * curthread->t_lofault = (%o5 & ~LOFAULT_SET); ! restore old t_lofault 75 * return (errno) 76 * 77 */ 78 79/* 80 * Less then or equal this number of bytes we will always copy byte-for-byte 81 */ 82#define SMALL_LIMIT 7 83 84/* 85 * LOFAULT_SET : Flag set by kzero and kcopy to indicate that t_lofault 86 * handler was set 87 */ 88#define LOFAULT_SET 2 89 90/* 91 * This define is to align data for the unaligned source cases. 92 * The data1, data2 and data3 is merged into data1 and data2. 93 * The data3 is preserved for next merge. 94 */ 95#define ALIGN_DATA(data1, data2, data3, lshift, rshift, tmp) \ 96 sllx data1, lshift, data1 ;\ 97 srlx data2, rshift, tmp ;\ 98 or data1, tmp, data1 ;\ 99 sllx data2, lshift, data2 ;\ 100 srlx data3, rshift, tmp ;\ 101 or data2, tmp, data2 102/* 103 * This macro is to align the data. Basically it merges 104 * data1 and data2 to form double word. 105 */ 106#define ALIGN_DATA_EW(data1, data2, lshift, rshift, tmp) \ 107 sllx data1, lshift, data1 ;\ 108 srlx data2, rshift, tmp ;\ 109 or data1, tmp, data1 110 111#if !defined(NIAGARA_IMPL) 112/* 113 * Flags set in the lower bits of the t_lofault address: 114 * FPUSED_FLAG: The FP registers were in use and must be restored 115 * BCOPY_FLAG: Set for bcopy calls, cleared for kcopy calls 116 * COPY_FLAGS: Both of the above 117 * 118 * Other flags: 119 * KPREEMPT_FLAG: kpreempt needs to be called 120 */ 121#define FPUSED_FLAG 1 122#define BCOPY_FLAG 2 123#define COPY_FLAGS (FPUSED_FLAG | BCOPY_FLAG) 124#define KPREEMPT_FLAG 4 125 126#define ALIGN_OFF_1_7 \ 127 faligndata %d0, %d2, %d48 ;\ 128 faligndata %d2, %d4, %d50 ;\ 129 faligndata %d4, %d6, %d52 ;\ 130 faligndata %d6, %d8, %d54 ;\ 131 faligndata %d8, %d10, %d56 ;\ 132 faligndata %d10, %d12, %d58 ;\ 133 faligndata %d12, %d14, %d60 ;\ 134 faligndata %d14, %d16, %d62 135 136#define ALIGN_OFF_8_15 \ 137 faligndata %d2, %d4, %d48 ;\ 138 faligndata %d4, %d6, %d50 ;\ 139 faligndata %d6, %d8, %d52 ;\ 140 faligndata %d8, %d10, %d54 ;\ 141 faligndata %d10, %d12, %d56 ;\ 142 faligndata %d12, %d14, %d58 ;\ 143 faligndata %d14, %d16, %d60 ;\ 144 faligndata %d16, %d18, %d62 145 146#define ALIGN_OFF_16_23 \ 147 faligndata %d4, %d6, %d48 ;\ 148 faligndata %d6, %d8, %d50 ;\ 149 faligndata %d8, %d10, %d52 ;\ 150 faligndata %d10, %d12, %d54 ;\ 151 faligndata %d12, %d14, %d56 ;\ 152 faligndata %d14, %d16, %d58 ;\ 153 faligndata %d16, %d18, %d60 ;\ 154 faligndata %d18, %d20, %d62 155 156#define ALIGN_OFF_24_31 \ 157 faligndata %d6, %d8, %d48 ;\ 158 faligndata %d8, %d10, %d50 ;\ 159 faligndata %d10, %d12, %d52 ;\ 160 faligndata %d12, %d14, %d54 ;\ 161 faligndata %d14, %d16, %d56 ;\ 162 faligndata %d16, %d18, %d58 ;\ 163 faligndata %d18, %d20, %d60 ;\ 164 faligndata %d20, %d22, %d62 165 166#define ALIGN_OFF_32_39 \ 167 faligndata %d8, %d10, %d48 ;\ 168 faligndata %d10, %d12, %d50 ;\ 169 faligndata %d12, %d14, %d52 ;\ 170 faligndata %d14, %d16, %d54 ;\ 171 faligndata %d16, %d18, %d56 ;\ 172 faligndata %d18, %d20, %d58 ;\ 173 faligndata %d20, %d22, %d60 ;\ 174 faligndata %d22, %d24, %d62 175 176#define ALIGN_OFF_40_47 \ 177 faligndata %d10, %d12, %d48 ;\ 178 faligndata %d12, %d14, %d50 ;\ 179 faligndata %d14, %d16, %d52 ;\ 180 faligndata %d16, %d18, %d54 ;\ 181 faligndata %d18, %d20, %d56 ;\ 182 faligndata %d20, %d22, %d58 ;\ 183 faligndata %d22, %d24, %d60 ;\ 184 faligndata %d24, %d26, %d62 185 186#define ALIGN_OFF_48_55 \ 187 faligndata %d12, %d14, %d48 ;\ 188 faligndata %d14, %d16, %d50 ;\ 189 faligndata %d16, %d18, %d52 ;\ 190 faligndata %d18, %d20, %d54 ;\ 191 faligndata %d20, %d22, %d56 ;\ 192 faligndata %d22, %d24, %d58 ;\ 193 faligndata %d24, %d26, %d60 ;\ 194 faligndata %d26, %d28, %d62 195 196#define ALIGN_OFF_56_63 \ 197 faligndata %d14, %d16, %d48 ;\ 198 faligndata %d16, %d18, %d50 ;\ 199 faligndata %d18, %d20, %d52 ;\ 200 faligndata %d20, %d22, %d54 ;\ 201 faligndata %d22, %d24, %d56 ;\ 202 faligndata %d24, %d26, %d58 ;\ 203 faligndata %d26, %d28, %d60 ;\ 204 faligndata %d28, %d30, %d62 205 206#define VIS_BLOCKSIZE 64 207 208/* 209 * Size of stack frame in order to accomodate a 64-byte aligned 210 * floating-point register save area and 2 64-bit temp locations. 211 * All copy functions use three quadrants of fp registers; to assure a 212 * block-aligned three block buffer in which to save we must reserve 213 * four blocks on stack. 214 * 215 * _______________________________________ <-- %fp + STACK_BIAS 216 * | We may need to preserve 3 quadrants | 217 * | of fp regs, but since we do so with | 218 * | BST/BLD we need room in which to | 219 * | align to VIS_BLOCKSIZE bytes. So | 220 * | this area is 4 * VIS_BLOCKSIZE. | <-- - SAVED_FPREGS_OFFSET 221 * |-------------------------------------| 222 * | 8 bytes to save %fprs | <-- - SAVED_FPRS_OFFSET 223 * |-------------------------------------| 224 * | 8 bytes to save %gsr | <-- - SAVED_GSR_OFFSET 225 * --------------------------------------- 226 */ 227#define HWCOPYFRAMESIZE ((VIS_BLOCKSIZE * (3 + 1)) + (2 * 8)) 228#define SAVED_FPREGS_OFFSET (VIS_BLOCKSIZE * 4) 229#define SAVED_FPREGS_ADJUST ((VIS_BLOCKSIZE * 3) + 1) 230#define SAVED_FPRS_OFFSET (SAVED_FPREGS_OFFSET + 8) 231#define SAVED_GSR_OFFSET (SAVED_FPRS_OFFSET + 8) 232 233/* 234 * In FP copies if we do not have preserved data to restore over 235 * the fp regs we used then we must zero those regs to avoid 236 * exposing portions of the data to later threads (data security). 237 */ 238#define FZERO \ 239 fzero %f0 ;\ 240 fzero %f2 ;\ 241 faddd %f0, %f2, %f4 ;\ 242 fmuld %f0, %f2, %f6 ;\ 243 faddd %f0, %f2, %f8 ;\ 244 fmuld %f0, %f2, %f10 ;\ 245 faddd %f0, %f2, %f12 ;\ 246 fmuld %f0, %f2, %f14 ;\ 247 faddd %f0, %f2, %f16 ;\ 248 fmuld %f0, %f2, %f18 ;\ 249 faddd %f0, %f2, %f20 ;\ 250 fmuld %f0, %f2, %f22 ;\ 251 faddd %f0, %f2, %f24 ;\ 252 fmuld %f0, %f2, %f26 ;\ 253 faddd %f0, %f2, %f28 ;\ 254 fmuld %f0, %f2, %f30 ;\ 255 faddd %f0, %f2, %f48 ;\ 256 fmuld %f0, %f2, %f50 ;\ 257 faddd %f0, %f2, %f52 ;\ 258 fmuld %f0, %f2, %f54 ;\ 259 faddd %f0, %f2, %f56 ;\ 260 fmuld %f0, %f2, %f58 ;\ 261 faddd %f0, %f2, %f60 ;\ 262 fmuld %f0, %f2, %f62 263 264#if !defined(lint) 265 266/* 267 * Macros to save and restore fp registers to/from the stack. 268 * Used to save and restore in-use fp registers when we want to use FP. 269 */ 270#define BST_FP_TOSTACK(tmp1) \ 271 /* membar #Sync */ ;\ 272 add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\ 273 and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\ 274 stda %f0, [tmp1]ASI_BLK_P ;\ 275 add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 276 stda %f16, [tmp1]ASI_BLK_P ;\ 277 add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 278 stda %f48, [tmp1]ASI_BLK_P ;\ 279 membar #Sync 280 281#define BLD_FP_FROMSTACK(tmp1) \ 282 /* membar #Sync - provided at copy completion */ ;\ 283 add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\ 284 and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\ 285 ldda [tmp1]ASI_BLK_P, %f0 ;\ 286 add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 287 ldda [tmp1]ASI_BLK_P, %f16 ;\ 288 add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 289 ldda [tmp1]ASI_BLK_P, %f48 ;\ 290 membar #Sync 291#endif /* NIAGARA_IMPL */ 292 293#endif /* lint */ 294/* 295 * Copy a block of storage, returning an error code if `from' or 296 * `to' takes a kernel pagefault which cannot be resolved. 297 * Returns errno value on pagefault error, 0 if all ok 298 */ 299 300#if defined(lint) 301 302/* ARGSUSED */ 303int 304kcopy(const void *from, void *to, size_t count) 305{ return(0); } 306 307#else /* lint */ 308 309 .seg ".text" 310 .align 4 311 312 ENTRY(kcopy) 313 314#if !defined(NIAGARA_IMPL) 315 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 316 sethi %hi(.copyerr), %l7 ! copyerr is lofault value 317 or %l7, %lo(.copyerr), %l7 318 ldn [THREAD_REG + T_LOFAULT], %o5 ! save existing handler 319 ! Note that we carefully do *not* flag the setting of 320 ! t_lofault. 321 membar #Sync ! sync error barrier 322 b .do_copy ! common code 323 stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault 324 325/* 326 * We got here because of a fault during kcopy or bcopy if a fault 327 * handler existed when bcopy was called. 328 * Errno value is in %g1. 329 */ 330.copyerr: 331 sethi %hi(.copyerr2), %l1 332 or %l1, %lo(.copyerr2), %l1 333 membar #Sync ! sync error barrier 334 stn %l1, [THREAD_REG + T_LOFAULT] ! set t_lofault 335 btst FPUSED_FLAG, %o5 336 bz,pt %xcc, 1f 337 and %o5, BCOPY_FLAG, %l1 ! copy flag to %l1 338 339 membar #Sync ! sync error barrier 340 ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr 341 wr %o2, 0, %gsr 342 343 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 344 btst FPRS_FEF, %o3 345 bz,pt %icc, 4f 346 nop 347 348 ! restore fpregs from stack 349 BLD_FP_FROMSTACK(%o2) 350 351 ba,pt %ncc, 2f 352 wr %o3, 0, %fprs ! restore fprs 353 3544: 355 FZERO 356 wr %o3, 0, %fprs ! restore fprs 357 3582: 359 ldn [THREAD_REG + T_LWP], %o2 360 brnz,pt %o2, 1f 361 nop 362 363 ldsb [THREAD_REG + T_PREEMPT], %l0 364 deccc %l0 365 bnz,pn %ncc, 1f 366 stb %l0, [THREAD_REG + T_PREEMPT] 367 368 ! Check for a kernel preemption request 369 ldn [THREAD_REG + T_CPU], %l0 370 ldub [%l0 + CPU_KPRUNRUN], %l0 371 brnz,a,pt %l0, 1f ! Need to call kpreempt? 372 or %l1, KPREEMPT_FLAG, %l1 ! If so, set the flag 373 374 ! The kcopy will always set a t_lofault handler. If it fires, 375 ! we're expected to just return the error code and not to 376 ! invoke any existing error handler. As far as bcopy is concerned, 377 ! we only set t_lofault if there was an existing lofault handler. 378 ! In that case we're expected to invoke the previously existing 379 ! handler after restting the t_lofault value. 3801: 381 andn %o5, COPY_FLAGS, %o5 ! remove flags from lofault address 382 membar #Sync ! sync error barrier 383 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 384 385 ! call kpreempt if necessary 386 btst KPREEMPT_FLAG, %l1 387 bz,pt %icc, 2f 388 nop 389 call kpreempt 390 rdpr %pil, %o0 ! pass %pil 3912: 392 btst BCOPY_FLAG, %l1 393 bnz,pn %ncc, 3f 394 nop 395 ret 396 restore %g1, 0, %o0 397 3983: 399 ! We're here via bcopy. There must have been an error handler 400 ! in place otherwise we would have died a nasty death already. 401 jmp %o5 ! goto real handler 402 restore %g0, 0, %o0 ! dispose of copy window 403 404/* 405 * We got here because of a fault in .copyerr. We can't safely restore fp 406 * state, so we panic. 407 */ 408fp_panic_msg: 409 .asciz "Unable to restore fp state after copy operation" 410 411 .align 4 412.copyerr2: 413 set fp_panic_msg, %o0 414 call panic 415 nop 416#else /* NIAGARA_IMPL */ 417 save %sp, -SA(MINFRAME), %sp 418 set .copyerr, %l7 ! copyerr is lofault value 419 ldn [THREAD_REG + T_LOFAULT], %o5 ! save existing handler 420 or %o5, LOFAULT_SET, %o5 421 membar #Sync ! sync error barrier 422 b .do_copy ! common code 423 stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault 424 425/* 426 * We got here because of a fault during kcopy. 427 * Errno value is in %g1. 428 */ 429.copyerr: 430 ! The kcopy() *always* sets a t_lofault handler and it ORs LOFAULT_SET 431 ! into %o5 to indicate it has set t_lofault handler. Need to clear 432 ! LOFAULT_SET flag before restoring the error handler. 433 andn %o5, LOFAULT_SET, %o5 434 membar #Sync ! sync error barrier 435 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 436 ret 437 restore %g1, 0, %o0 438#endif /* NIAGARA_IMPL */ 439 440 SET_SIZE(kcopy) 441#endif /* lint */ 442 443 444/* 445 * Copy a block of storage - must not overlap (from + len <= to). 446 */ 447#if defined(lint) 448 449/* ARGSUSED */ 450void 451bcopy(const void *from, void *to, size_t count) 452{} 453 454#else /* lint */ 455 456 ENTRY(bcopy) 457 458#if !defined(NIAGARA_IMPL) 459 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 460 ldn [THREAD_REG + T_LOFAULT], %o5 ! save existing handler 461 brz,pt %o5, .do_copy 462 nop 463 sethi %hi(.copyerr), %l7 ! copyerr is lofault value 464 or %l7, %lo(.copyerr), %l7 465 membar #Sync ! sync error barrier 466 stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault 467 ! We've already captured whether t_lofault was zero on entry. 468 ! We need to mark ourselves as being from bcopy since both 469 ! kcopy and bcopy use the same code path. If BCOPY_FLAG is 470 ! set and the saved lofault was zero, we won't reset lofault on 471 ! returning. 472 or %o5, BCOPY_FLAG, %o5 473#else /* NIAGARA_IMPL */ 474 save %sp, -SA(MINFRAME), %sp 475 clr %o5 ! flag LOFAULT_SET is not set for bcopy 476#endif /* NIAGARA_IMPL */ 477 478.do_copy: 479 cmp %i2, 12 ! for small counts 480 blu %ncc, .bytecp ! just copy bytes 481 .empty 482 483 cmp %i2, 128 ! for less than 128 bytes 484 blu,pn %ncc, .bcb_punt ! no block st/quad ld 485 nop 486 487 set use_hw_bcopy, %o2 488 ld [%o2], %o2 489 brz,pn %o2, .bcb_punt 490 nop 491 492 subcc %i1, %i0, %i3 493 bneg,a,pn %ncc, 1f 494 neg %i3 4951: 496 /* 497 * Compare against 256 since we should be checking block addresses 498 * and (dest & ~63) - (src & ~63) can be 3 blocks even if 499 * src = dest + (64 * 3) + 63. 500 */ 501 cmp %i3, 256 502 blu,pn %ncc, .bcb_punt 503 nop 504 505 /* 506 * Copy that reach here have at least 2 blocks of data to copy. 507 */ 508#if !defined(NIAGARA_IMPL) 509 ldn [THREAD_REG + T_LWP], %o3 510 brnz,pt %o3, 1f 511 nop 512 513 ! kpreempt_disable(); 514 ldsb [THREAD_REG + T_PREEMPT], %o2 515 inc %o2 516 stb %o2, [THREAD_REG + T_PREEMPT] 517 5181: 519 rd %fprs, %o2 ! check for unused fp 520 st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs 521 btst FPRS_FEF, %o2 522 bz,a,pt %icc, .do_blockcopy 523 wr %g0, FPRS_FEF, %fprs 524 525 ! save in-use fpregs on stack 526 BST_FP_TOSTACK(%o2) 527#endif /* NIAGARA_IMPL */ 528 529.do_blockcopy: 530 531#if !defined(NIAGARA_IMPL) 532 rd %gsr, %o2 533 stx %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr 534 or %o5, FPUSED_FLAG, %o5 ! fp regs are in use 535#endif /* NIAGARA_IMPL */ 536 537 ! Swap src/dst since the code below is memcpy code 538 ! and memcpy/bcopy have different calling sequences 539 mov %i1, %i5 540 mov %i0, %i1 541 mov %i5, %i0 542 543 ! Block (64 bytes) align the destination. 544 andcc %i0, 0x3f, %i3 ! is dst aligned on a 64 bytes 545 bz %xcc, .chksrc ! dst is already double aligned 546 sub %i3, 0x40, %i3 547 neg %i3 ! bytes till dst 64 bytes aligned 548 sub %i2, %i3, %i2 ! update i2 with new count 549 550 ! Based on source and destination alignment do 551 ! either 8 bytes, 4 bytes, 2 bytes or byte copy. 552 553 ! Is dst & src 8B aligned 554 or %i0, %i1, %o2 555 andcc %o2, 0x7, %g0 556 bz %ncc, .alewdcp 557 nop 558 559 ! Is dst & src 4B aligned 560 andcc %o2, 0x3, %g0 561 bz %ncc, .alwdcp 562 nop 563 564 ! Is dst & src 2B aligned 565 andcc %o2, 0x1, %g0 566 bz %ncc, .alhlfwdcp 567 nop 568 569 ! 1B aligned 5701: ldub [%i1], %o2 571 stb %o2, [%i0] 572 inc %i1 573 deccc %i3 574 bgu,pt %ncc, 1b 575 inc %i0 576 577 ba .chksrc 578 nop 579 580 ! dst & src 4B aligned 581.alwdcp: 582 ld [%i1], %o2 583 st %o2, [%i0] 584 add %i1, 0x4, %i1 585 subcc %i3, 0x4, %i3 586 bgu,pt %ncc, .alwdcp 587 add %i0, 0x4, %i0 588 589 ba .chksrc 590 nop 591 592 ! dst & src 2B aligned 593.alhlfwdcp: 594 lduh [%i1], %o2 595 stuh %o2, [%i0] 596 add %i1, 0x2, %i1 597 subcc %i3, 0x2, %i3 598 bgu,pt %ncc, .alhlfwdcp 599 add %i0, 0x2, %i0 600 601 ba .chksrc 602 nop 603 604 ! dst & src 8B aligned 605.alewdcp: 606 ldx [%i1], %o2 607 stx %o2, [%i0] 608 add %i1, 0x8, %i1 609 subcc %i3, 0x8, %i3 610 bgu,pt %ncc, .alewdcp 611 add %i0, 0x8, %i0 612 613 ! Now Destination is block (64 bytes) aligned 614.chksrc: 615 andn %i2, 0x3f, %i3 ! %i3 count is multiple of block size 616 sub %i2, %i3, %i2 ! Residue bytes in %i2 617 618 mov ASI_BLK_INIT_ST_QUAD_LDD_P, %asi 619 620#if !defined(NIAGARA_IMPL) 621 andn %i1, 0x3f, %l0 ! %l0 has block aligned src address 622 prefetch [%l0+0x0], #one_read 623 andcc %i1, 0x3f, %g0 ! is src 64B aligned 624 bz,pn %ncc, .blkcpy 625 nop 626 627 ! handle misaligned source cases 628 alignaddr %i1, %g0, %g0 ! generate %gsr 629 630 srl %i1, 0x3, %l1 ! src add bits 3, 4, 5 are now least 631 ! significant in %l1 632 andcc %l1, 0x7, %l2 ! mask everything except bits 1, 2, 3 633 add %i1, %i3, %i1 634 635 ! switch statement to get to right 8 byte block within 636 ! 64 byte block 637 cmp %l2, 0x4 638 bgeu,a hlf 639 cmp %l2, 0x6 640 cmp %l2, 0x2 641 bgeu,a sqtr 642 nop 643 cmp %l2, 0x1 644 be,a off15 645 nop 646 ba off7 647 nop 648sqtr: 649 be,a off23 650 nop 651 ba,a off31 652 nop 653 654hlf: 655 bgeu,a fqtr 656 nop 657 cmp %l2, 0x5 658 be,a off47 659 nop 660 ba off39 661 nop 662fqtr: 663 be,a off55 664 nop 665 666 ! Falls through when the source offset is greater than 56 667 ldd [%l0+0x38], %d14 668 prefetch [%l0+0x40], #one_read 669 prefetch [%l0+0x80], #one_read 6707: 671 add %l0, 0x40, %l0 672 stxa %g0, [%i0]%asi ! initialize the cache line 673 674 ldda [%l0]ASI_BLK_P, %d16 675 ALIGN_OFF_56_63 676 fmovd %d30, %d14 677 678 stda %d48, [%i0]ASI_BLK_P 679 subcc %i3, 0x40, %i3 680 add %i0, 0x40, %i0 681 bgu,pt %ncc, 7b 682 prefetch [%l0+0x80], #one_read 683 ba .blkdone 684 membar #Sync 685 686 ! This copy case for source offset between 1 and 7 687off7: 688 ldda [%l0]ASI_BLK_P, %d0 689 prefetch [%l0+0x40], #one_read 690 prefetch [%l0+0x80], #one_read 6910: 692 add %l0, 0x40, %l0 693 stxa %g0, [%i0]%asi ! initialize the cache line 694 695 ldda [%l0]ASI_BLK_P, %d16 696 ALIGN_OFF_1_7 697 fmovd %d16, %d0 698 fmovd %d18, %d2 699 fmovd %d20, %d4 700 fmovd %d22, %d6 701 fmovd %d24, %d8 702 fmovd %d26, %d10 703 fmovd %d28, %d12 704 fmovd %d30, %d14 705 706 stda %d48, [%i0]ASI_BLK_P 707 subcc %i3, 0x40, %i3 708 add %i0, 0x40, %i0 709 bgu,pt %ncc, 0b 710 prefetch [%l0+0x80], #one_read 711 ba .blkdone 712 membar #Sync 713 714 ! This copy case for source offset between 8 and 15 715off15: 716 ldd [%l0+0x8], %d2 717 ldd [%l0+0x10], %d4 718 ldd [%l0+0x18], %d6 719 ldd [%l0+0x20], %d8 720 ldd [%l0+0x28], %d10 721 ldd [%l0+0x30], %d12 722 ldd [%l0+0x38], %d14 723 prefetch [%l0+0x40], #one_read 724 prefetch [%l0+0x80], #one_read 7251: 726 add %l0, 0x40, %l0 727 stxa %g0, [%i0]%asi ! initialize the cache line 728 729 ldda [%l0]ASI_BLK_P, %d16 730 ALIGN_OFF_8_15 731 fmovd %d18, %d2 732 fmovd %d20, %d4 733 fmovd %d22, %d6 734 fmovd %d24, %d8 735 fmovd %d26, %d10 736 fmovd %d28, %d12 737 fmovd %d30, %d14 738 739 stda %d48, [%i0]ASI_BLK_P 740 subcc %i3, 0x40, %i3 741 add %i0, 0x40, %i0 742 bgu,pt %ncc, 1b 743 prefetch [%l0+0x80], #one_read 744 ba .blkdone 745 membar #Sync 746 747 ! This copy case for source offset between 16 and 23 748off23: 749 ldd [%l0+0x10], %d4 750 ldd [%l0+0x18], %d6 751 ldd [%l0+0x20], %d8 752 ldd [%l0+0x28], %d10 753 ldd [%l0+0x30], %d12 754 ldd [%l0+0x38], %d14 755 prefetch [%l0+0x40], #one_read 756 prefetch [%l0+0x80], #one_read 7572: 758 add %l0, 0x40, %l0 759 stxa %g0, [%i0]%asi ! initialize the cache line 760 761 ldda [%l0]ASI_BLK_P, %d16 762 ALIGN_OFF_16_23 763 fmovd %d20, %d4 764 fmovd %d22, %d6 765 fmovd %d24, %d8 766 fmovd %d26, %d10 767 fmovd %d28, %d12 768 fmovd %d30, %d14 769 770 stda %d48, [%i0]ASI_BLK_P 771 subcc %i3, 0x40, %i3 772 add %i0, 0x40, %i0 773 bgu,pt %ncc, 2b 774 prefetch [%l0+0x80], #one_read 775 ba .blkdone 776 membar #Sync 777 778 ! This copy case for source offset between 24 and 31 779off31: 780 ldd [%l0+0x18], %d6 781 ldd [%l0+0x20], %d8 782 ldd [%l0+0x28], %d10 783 ldd [%l0+0x30], %d12 784 ldd [%l0+0x38], %d14 785 prefetch [%l0+0x40], #one_read 786 prefetch [%l0+0x80], #one_read 7873: 788 add %l0, 0x40, %l0 789 stxa %g0, [%i0]%asi ! initialize the cache line 790 791 ldda [%l0]ASI_BLK_P, %d16 792 ALIGN_OFF_24_31 793 fmovd %d22, %d6 794 fmovd %d24, %d8 795 fmovd %d26, %d10 796 fmovd %d28, %d12 797 fmovd %d30, %d14 798 799 stda %d48, [%i0]ASI_BLK_P 800 subcc %i3, 0x40, %i3 801 add %i0, 0x40, %i0 802 bgu,pt %ncc, 3b 803 prefetch [%l0+0x80], #one_read 804 ba .blkdone 805 membar #Sync 806 807 ! This copy case for source offset between 32 and 39 808off39: 809 ldd [%l0+0x20], %d8 810 ldd [%l0+0x28], %d10 811 ldd [%l0+0x30], %d12 812 ldd [%l0+0x38], %d14 813 prefetch [%l0+0x40], #one_read 814 prefetch [%l0+0x80], #one_read 8154: 816 add %l0, 0x40, %l0 817 stxa %g0, [%i0]%asi ! initialize the cache line 818 819 ldda [%l0]ASI_BLK_P, %d16 820 ALIGN_OFF_32_39 821 fmovd %d24, %d8 822 fmovd %d26, %d10 823 fmovd %d28, %d12 824 fmovd %d30, %d14 825 826 stda %d48, [%i0]ASI_BLK_P 827 subcc %i3, 0x40, %i3 828 add %i0, 0x40, %i0 829 bgu,pt %ncc, 4b 830 prefetch [%l0+0x80], #one_read 831 ba .blkdone 832 membar #Sync 833 834 ! This copy case for source offset between 40 and 47 835off47: 836 ldd [%l0+0x28], %d10 837 ldd [%l0+0x30], %d12 838 ldd [%l0+0x38], %d14 839 prefetch [%l0+0x40], #one_read 840 prefetch [%l0+0x80], #one_read 8415: 842 add %l0, 0x40, %l0 843 stxa %g0, [%i0]%asi ! initialize the cache line 844 845 ldda [%l0]ASI_BLK_P, %d16 846 ALIGN_OFF_40_47 847 fmovd %d26, %d10 848 fmovd %d28, %d12 849 fmovd %d30, %d14 850 851 stda %d48, [%i0]ASI_BLK_P 852 subcc %i3, 0x40, %i3 853 add %i0, 0x40, %i0 854 bgu,pt %ncc, 5b 855 prefetch [%l0+0x80], #one_read 856 ba .blkdone 857 membar #Sync 858 859 ! This copy case for source offset between 48 and 55 860off55: 861 ldd [%l0+0x30], %d12 862 ldd [%l0+0x38], %d14 863 prefetch [%l0+0x40], #one_read 864 prefetch [%l0+0x80], #one_read 8656: 866 add %l0, 0x40, %l0 867 stxa %g0, [%i0]%asi ! initialize the cache line 868 869 ldda [%l0]ASI_BLK_P, %d16 870 ALIGN_OFF_48_55 871 fmovd %d28, %d12 872 fmovd %d30, %d14 873 874 stda %d48, [%i0]ASI_BLK_P 875 subcc %i3, 0x40, %i3 876 add %i0, 0x40, %i0 877 bgu,pt %ncc, 6b 878 prefetch [%l0+0x80], #one_read 879 ba .blkdone 880 membar #Sync 881 882 ! Both source and destination are block aligned. 883.blkcpy: 884 prefetch [%i1+0x40], #one_read 885 prefetch [%i1+0x80], #one_read 8868: 887 stxa %g0, [%i0]%asi ! initialize the cache line 888 ldda [%i1]ASI_BLK_P, %d0 889 stda %d0, [%i0]ASI_BLK_P 890 891 add %i1, 0x40, %i1 892 subcc %i3, 0x40, %i3 893 add %i0, 0x40, %i0 894 bgu,pt %ncc, 8b 895 prefetch [%i1+0x80], #one_read 896 membar #Sync 897 898.blkdone: 899#else /* NIAGARA_IMPL */ 900 andcc %i1, 0xf, %o2 ! is src quadword aligned 901 bz,pn %xcc, .blkcpy ! src offset in %o2 902 nop 903 cmp %o2, 0x8 904 bg .cpy_upper_double 905 nop 906 bl .cpy_lower_double 907 nop 908 909 ! Falls through when source offset is equal to 8 i.e. 910 ! source is double word aligned. 911 ! In this case no shift/merge of data is required 912 sub %i1, %o2, %i1 ! align the src at 16 bytes. 913 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 914 prefetch [%l0+0x0], #one_read 915 ldda [%i1+0x0]%asi, %l2 916loop0: 917 ldda [%i1+0x10]%asi, %l4 918 prefetch [%l0+0x40], #one_read 919 920 stxa %l3, [%i0+0x0]%asi 921 stxa %l4, [%i0+0x8]%asi 922 923 ldda [%i1+0x20]%asi, %l2 924 stxa %l5, [%i0+0x10]%asi 925 stxa %l2, [%i0+0x18]%asi 926 927 ldda [%i1+0x30]%asi, %l4 928 stxa %l3, [%i0+0x20]%asi 929 stxa %l4, [%i0+0x28]%asi 930 931 ldda [%i1+0x40]%asi, %l2 932 stxa %l5, [%i0+0x30]%asi 933 stxa %l2, [%i0+0x38]%asi 934 935 add %l0, 0x40, %l0 936 add %i1, 0x40, %i1 937 subcc %i3, 0x40, %i3 938 bgu,pt %xcc, loop0 939 add %i0, 0x40, %i0 940 ba .blkdone 941 add %i1, %o2, %i1 ! increment the source by src offset 942 ! the src offset was stored in %o2 943 944.cpy_lower_double: 945 sub %i1, %o2, %i1 ! align the src at 16 bytes. 946 sll %o2, 3, %o0 ! %o0 left shift 947 mov 0x40, %o1 948 sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 949 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 950 prefetch [%l0+0x0], #one_read 951 ldda [%i1+0x0]%asi, %l2 ! partial data in %l2 and %l3 has 952 ! complete data 953loop1: 954 ldda [%i1+0x10]%asi, %l4 ! %l4 has partial data for this read. 955 ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) ! merge %l2, %l3 and %l4 956 ! into %l2 and %l3 957 prefetch [%l0+0x40], #one_read 958 stxa %l2, [%i0+0x0]%asi 959 stxa %l3, [%i0+0x8]%asi 960 961 ldda [%i1+0x20]%asi, %l2 962 ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) ! merge %l2 with %l5 and 963 stxa %l4, [%i0+0x10]%asi ! %l4 from previous read 964 stxa %l5, [%i0+0x18]%asi ! into %l4 and %l5 965 966 ! Repeat the same for next 32 bytes. 967 968 ldda [%i1+0x30]%asi, %l4 969 ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) 970 stxa %l2, [%i0+0x20]%asi 971 stxa %l3, [%i0+0x28]%asi 972 973 ldda [%i1+0x40]%asi, %l2 974 ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) 975 stxa %l4, [%i0+0x30]%asi 976 stxa %l5, [%i0+0x38]%asi 977 978 add %l0, 0x40, %l0 979 add %i1, 0x40, %i1 980 subcc %i3, 0x40, %i3 981 bgu,pt %xcc, loop1 982 add %i0, 0x40, %i0 983 ba .blkdone 984 add %i1, %o2, %i1 ! increment the source by src offset 985 ! the src offset was stored in %o2 986 987.cpy_upper_double: 988 sub %i1, %o2, %i1 ! align the src at 16 bytes. 989 mov 0x8, %o0 990 sub %o2, %o0, %o0 991 sll %o0, 3, %o0 ! %o0 left shift 992 mov 0x40, %o1 993 sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 994 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 995 prefetch [%l0+0x0], #one_read 996 ldda [%i1+0x0]%asi, %l2 ! partial data in %l3 for this read and 997 ! no data in %l2 998loop2: 999 ldda [%i1+0x10]%asi, %l4 ! %l4 has complete data and %l5 has 1000 ! partial 1001 ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) ! merge %l3, %l4 and %l5 1002 ! into %l3 and %l4 1003 prefetch [%l0+0x40], #one_read 1004 stxa %l3, [%i0+0x0]%asi 1005 stxa %l4, [%i0+0x8]%asi 1006 1007 ldda [%i1+0x20]%asi, %l2 1008 ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) ! merge %l2 and %l3 with 1009 stxa %l5, [%i0+0x10]%asi ! %l5 from previous read 1010 stxa %l2, [%i0+0x18]%asi ! into %l5 and %l2 1011 1012 ! Repeat the same for next 32 bytes. 1013 1014 ldda [%i1+0x30]%asi, %l4 1015 ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) 1016 stxa %l3, [%i0+0x20]%asi 1017 stxa %l4, [%i0+0x28]%asi 1018 1019 ldda [%i1+0x40]%asi, %l2 1020 ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) 1021 stxa %l5, [%i0+0x30]%asi 1022 stxa %l2, [%i0+0x38]%asi 1023 1024 add %l0, 0x40, %l0 1025 add %i1, 0x40, %i1 1026 subcc %i3, 0x40, %i3 1027 bgu,pt %xcc, loop2 1028 add %i0, 0x40, %i0 1029 ba .blkdone 1030 add %i1, %o2, %i1 ! increment the source by src offset 1031 ! the src offset was stored in %o2 1032 1033 1034 ! Both Source and Destination are block aligned. 1035 ! Do fast copy using ASI_BLK_INIT_ST_QUAD_LDD_P 1036.blkcpy: 1037 prefetch [%i1+0x0], #one_read 10381: 1039 ldda [%i1+0x0]%asi, %l0 1040 ldda [%i1+0x10]%asi, %l2 1041 prefetch [%i1+0x40], #one_read 1042 1043 stxa %l0, [%i0+0x0]%asi 1044 ldda [%i1+0x20]%asi, %l4 1045 ldda [%i1+0x30]%asi, %l6 1046 1047 stxa %l1, [%i0+0x8]%asi 1048 stxa %l2, [%i0+0x10]%asi 1049 stxa %l3, [%i0+0x18]%asi 1050 stxa %l4, [%i0+0x20]%asi 1051 stxa %l5, [%i0+0x28]%asi 1052 stxa %l6, [%i0+0x30]%asi 1053 stxa %l7, [%i0+0x38]%asi 1054 1055 add %i1, 0x40, %i1 1056 subcc %i3, 0x40, %i3 1057 bgu,pt %xcc, 1b 1058 add %i0, 0x40, %i0 1059 1060.blkdone: 1061 membar #Sync 1062#endif /* NIAGARA_IMPL */ 1063 1064 brz,pt %i2, .blkexit 1065 nop 1066 1067 ! Handle trailing bytes 1068 cmp %i2, 0x8 1069 blu,pt %ncc, .residue 1070 nop 1071 1072 ! Can we do some 8B ops 1073 or %i1, %i0, %o2 1074 andcc %o2, 0x7, %g0 1075 bnz %ncc, .last4 1076 nop 1077 1078 ! Do 8byte ops as long as possible 1079.last8: 1080 ldx [%i1], %o2 1081 stx %o2, [%i0] 1082 add %i1, 0x8, %i1 1083 sub %i2, 0x8, %i2 1084 cmp %i2, 0x8 1085 bgu,pt %ncc, .last8 1086 add %i0, 0x8, %i0 1087 1088 brz,pt %i2, .blkexit 1089 nop 1090 1091 ba .residue 1092 nop 1093 1094.last4: 1095 ! Can we do 4B ops 1096 andcc %o2, 0x3, %g0 1097 bnz %ncc, .last2 1098 nop 10991: 1100 ld [%i1], %o2 1101 st %o2, [%i0] 1102 add %i1, 0x4, %i1 1103 sub %i2, 0x4, %i2 1104 cmp %i2, 0x4 1105 bgu,pt %ncc, 1b 1106 add %i0, 0x4, %i0 1107 1108 brz,pt %i2, .blkexit 1109 nop 1110 1111 ba .residue 1112 nop 1113 1114.last2: 1115 ! Can we do 2B ops 1116 andcc %o2, 0x1, %g0 1117 bnz %ncc, .residue 1118 nop 1119 11201: 1121 lduh [%i1], %o2 1122 stuh %o2, [%i0] 1123 add %i1, 0x2, %i1 1124 sub %i2, 0x2, %i2 1125 cmp %i2, 0x2 1126 bgu,pt %ncc, 1b 1127 add %i0, 0x2, %i0 1128 1129 brz,pt %i2, .blkexit 1130 nop 1131 1132.residue: 1133 ldub [%i1], %o2 1134 stb %o2, [%i0] 1135 inc %i1 1136 deccc %i2 1137 bgu,pt %ncc, .residue 1138 inc %i0 1139 1140.blkexit: 1141#if !defined(NIAGARA_IMPL) 1142 btst FPUSED_FLAG, %o5 1143 bz %icc, 1f 1144 and %o5, COPY_FLAGS, %l1 ! Store flags in %l1 1145 ! We can't clear the flags from %o5 yet 1146 ! If there's an error, .copyerr will 1147 ! need them 1148 1149 ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr 1150 wr %o2, 0, %gsr 1151 1152 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 1153 btst FPRS_FEF, %o3 1154 bz,pt %icc, 4f 1155 nop 1156 1157 ! restore fpregs from stack 1158 BLD_FP_FROMSTACK(%o2) 1159 1160 ba,pt %ncc, 2f 1161 wr %o3, 0, %fprs ! restore fprs 1162 11634: 1164 FZERO 1165 wr %o3, 0, %fprs ! restore fprs 1166 11672: 1168 ldn [THREAD_REG + T_LWP], %o2 1169 brnz,pt %o2, 1f 1170 nop 1171 1172 ldsb [THREAD_REG + T_PREEMPT], %l0 1173 deccc %l0 1174 bnz,pn %ncc, 1f 1175 stb %l0, [THREAD_REG + T_PREEMPT] 1176 1177 ! Check for a kernel preemption request 1178 ldn [THREAD_REG + T_CPU], %l0 1179 ldub [%l0 + CPU_KPRUNRUN], %l0 1180 brnz,a,pt %l0, 1f ! Need to call kpreempt? 1181 or %l1, KPREEMPT_FLAG, %l1 ! If so, set the flag 1182 11831: 1184 btst BCOPY_FLAG, %l1 1185 bz,pn %icc, 3f 1186 andncc %o5, COPY_FLAGS, %o5 1187 1188 ! Here via bcopy. Check to see if the handler was NULL. 1189 ! If so, just return quietly. Otherwise, reset the 1190 ! handler and go home. 1191 bnz,pn %ncc, 3f 1192 nop 1193 1194 ! Null handler. 1195 btst KPREEMPT_FLAG, %l1 1196 bz,pt %icc, 2f 1197 nop 1198 call kpreempt 1199 rdpr %pil, %o0 ! pass %pil 12002: 1201 1202 ret 1203 restore %g0, 0, %o0 1204 1205 ! Here via kcopy or bcopy with a handler. 1206 ! Reset the fault handler. 12073: 1208 membar #Sync 1209 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1210 1211 ! call kpreempt if necessary 1212 btst KPREEMPT_FLAG, %l1 1213 bz,pt %icc, 4f 1214 nop 1215 call kpreempt 1216 rdpr %pil, %o0 12174: 1218#else /* NIAGARA_IMPL */ 1219 membar #Sync ! sync error barrier 1220 ! Restore t_lofault handler, if came here from kcopy(). 1221 tst %o5 1222 bz %ncc, 1f 1223 andn %o5, LOFAULT_SET, %o5 1224 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 12251: 1226#endif /* NIAGARA_IMPL */ 1227 ret 1228 restore %g0, 0, %o0 1229 1230.bcb_punt: 1231 ! 1232 ! use aligned transfers where possible 1233 ! 1234 xor %i0, %i1, %o4 ! xor from and to address 1235 btst 7, %o4 ! if lower three bits zero 1236 bz .aldoubcp ! can align on double boundary 1237 .empty ! assembler complaints about label 1238 1239 xor %i0, %i1, %o4 ! xor from and to address 1240 btst 3, %o4 ! if lower two bits zero 1241 bz .alwordcp ! can align on word boundary 1242 btst 3, %i0 ! delay slot, from address unaligned? 1243 ! 1244 ! use aligned reads and writes where possible 1245 ! this differs from wordcp in that it copes 1246 ! with odd alignment between source and destnation 1247 ! using word reads and writes with the proper shifts 1248 ! in between to align transfers to and from memory 1249 ! i0 - src address, i1 - dest address, i2 - count 1250 ! i3, i4 - tmps for used generating complete word 1251 ! i5 (word to write) 1252 ! l0 size in bits of upper part of source word (US) 1253 ! l1 size in bits of lower part of source word (LS = 32 - US) 1254 ! l2 size in bits of upper part of destination word (UD) 1255 ! l3 size in bits of lower part of destination word (LD = 32 - UD) 1256 ! l4 number of bytes leftover after aligned transfers complete 1257 ! l5 the number 32 1258 ! 1259 mov 32, %l5 ! load an oft-needed constant 1260 bz .align_dst_only 1261 btst 3, %i1 ! is destnation address aligned? 1262 clr %i4 ! clear registers used in either case 1263 bz .align_src_only 1264 clr %l0 1265 ! 1266 ! both source and destination addresses are unaligned 1267 ! 12681: ! align source 1269 ldub [%i0], %i3 ! read a byte from source address 1270 add %i0, 1, %i0 ! increment source address 1271 or %i4, %i3, %i4 ! or in with previous bytes (if any) 1272 btst 3, %i0 ! is source aligned? 1273 add %l0, 8, %l0 ! increment size of upper source (US) 1274 bnz,a 1b 1275 sll %i4, 8, %i4 ! make room for next byte 1276 1277 sub %l5, %l0, %l1 ! generate shift left count (LS) 1278 sll %i4, %l1, %i4 ! prepare to get rest 1279 ld [%i0], %i3 ! read a word 1280 add %i0, 4, %i0 ! increment source address 1281 srl %i3, %l0, %i5 ! upper src bits into lower dst bits 1282 or %i4, %i5, %i5 ! merge 1283 mov 24, %l3 ! align destination 12841: 1285 srl %i5, %l3, %i4 ! prepare to write a single byte 1286 stb %i4, [%i1] ! write a byte 1287 add %i1, 1, %i1 ! increment destination address 1288 sub %i2, 1, %i2 ! decrement count 1289 btst 3, %i1 ! is destination aligned? 1290 bnz,a 1b 1291 sub %l3, 8, %l3 ! delay slot, decrement shift count (LD) 1292 sub %l5, %l3, %l2 ! generate shift left count (UD) 1293 sll %i5, %l2, %i5 ! move leftover into upper bytes 1294 cmp %l2, %l0 ! cmp # reqd to fill dst w old src left 1295 bgu %ncc, .more_needed ! need more to fill than we have 1296 nop 1297 1298 sll %i3, %l1, %i3 ! clear upper used byte(s) 1299 srl %i3, %l1, %i3 1300 ! get the odd bytes between alignments 1301 sub %l0, %l2, %l0 ! regenerate shift count 1302 sub %l5, %l0, %l1 ! generate new shift left count (LS) 1303 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0 1304 andn %i2, 3, %i2 ! # of aligned bytes that can be moved 1305 srl %i3, %l0, %i4 1306 or %i5, %i4, %i5 1307 st %i5, [%i1] ! write a word 1308 subcc %i2, 4, %i2 ! decrement count 1309 bz %ncc, .unalign_out 1310 add %i1, 4, %i1 ! increment destination address 1311 1312 b 2f 1313 sll %i3, %l1, %i5 ! get leftover into upper bits 1314.more_needed: 1315 sll %i3, %l0, %i3 ! save remaining byte(s) 1316 srl %i3, %l0, %i3 1317 sub %l2, %l0, %l1 ! regenerate shift count 1318 sub %l5, %l1, %l0 ! generate new shift left count 1319 sll %i3, %l1, %i4 ! move to fill empty space 1320 b 3f 1321 or %i5, %i4, %i5 ! merge to complete word 1322 ! 1323 ! the source address is aligned and destination is not 1324 ! 1325.align_dst_only: 1326 ld [%i0], %i4 ! read a word 1327 add %i0, 4, %i0 ! increment source address 1328 mov 24, %l0 ! initial shift alignment count 13291: 1330 srl %i4, %l0, %i3 ! prepare to write a single byte 1331 stb %i3, [%i1] ! write a byte 1332 add %i1, 1, %i1 ! increment destination address 1333 sub %i2, 1, %i2 ! decrement count 1334 btst 3, %i1 ! is destination aligned? 1335 bnz,a 1b 1336 sub %l0, 8, %l0 ! delay slot, decrement shift count 1337.xfer: 1338 sub %l5, %l0, %l1 ! generate shift left count 1339 sll %i4, %l1, %i5 ! get leftover 13403: 1341 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0 1342 andn %i2, 3, %i2 ! # of aligned bytes that can be moved 13432: 1344 ld [%i0], %i3 ! read a source word 1345 add %i0, 4, %i0 ! increment source address 1346 srl %i3, %l0, %i4 ! upper src bits into lower dst bits 1347 or %i5, %i4, %i5 ! merge with upper dest bits (leftover) 1348 st %i5, [%i1] ! write a destination word 1349 subcc %i2, 4, %i2 ! decrement count 1350 bz %ncc, .unalign_out ! check if done 1351 add %i1, 4, %i1 ! increment destination address 1352 b 2b ! loop 1353 sll %i3, %l1, %i5 ! get leftover 1354.unalign_out: 1355 tst %l4 ! any bytes leftover? 1356 bz %ncc, .cpdone 1357 .empty ! allow next instruction in delay slot 13581: 1359 sub %l0, 8, %l0 ! decrement shift 1360 srl %i3, %l0, %i4 ! upper src byte into lower dst byte 1361 stb %i4, [%i1] ! write a byte 1362 subcc %l4, 1, %l4 ! decrement count 1363 bz %ncc, .cpdone ! done? 1364 add %i1, 1, %i1 ! increment destination 1365 tst %l0 ! any more previously read bytes 1366 bnz %ncc, 1b ! we have leftover bytes 1367 mov %l4, %i2 ! delay slot, mv cnt where dbytecp wants 1368 b .dbytecp ! let dbytecp do the rest 1369 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 1370 ! 1371 ! the destination address is aligned and the source is not 1372 ! 1373.align_src_only: 1374 ldub [%i0], %i3 ! read a byte from source address 1375 add %i0, 1, %i0 ! increment source address 1376 or %i4, %i3, %i4 ! or in with previous bytes (if any) 1377 btst 3, %i0 ! is source aligned? 1378 add %l0, 8, %l0 ! increment shift count (US) 1379 bnz,a .align_src_only 1380 sll %i4, 8, %i4 ! make room for next byte 1381 b,a .xfer 1382 ! 1383 ! if from address unaligned for double-word moves, 1384 ! move bytes till it is, if count is < 56 it could take 1385 ! longer to align the thing than to do the transfer 1386 ! in word size chunks right away 1387 ! 1388.aldoubcp: 1389 cmp %i2, 56 ! if count < 56, use wordcp, it takes 1390 blu,a %ncc, .alwordcp ! longer to align doubles than words 1391 mov 3, %o0 ! mask for word alignment 1392 call .alignit ! copy bytes until aligned 1393 mov 7, %o0 ! mask for double alignment 1394 ! 1395 ! source and destination are now double-word aligned 1396 ! i3 has aligned count returned by alignit 1397 ! 1398 and %i2, 7, %i2 ! unaligned leftover count 1399 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 14005: 1401 ldx [%i0+%i1], %o4 ! read from address 1402 stx %o4, [%i1] ! write at destination address 1403 subcc %i3, 8, %i3 ! dec count 1404 bgu %ncc, 5b 1405 add %i1, 8, %i1 ! delay slot, inc to address 1406 cmp %i2, 4 ! see if we can copy a word 1407 blu %ncc, .dbytecp ! if 3 or less bytes use bytecp 1408 .empty 1409 ! 1410 ! for leftover bytes we fall into wordcp, if needed 1411 ! 1412.wordcp: 1413 and %i2, 3, %i2 ! unaligned leftover count 14145: 1415 ld [%i0+%i1], %o4 ! read from address 1416 st %o4, [%i1] ! write at destination address 1417 subcc %i3, 4, %i3 ! dec count 1418 bgu %ncc, 5b 1419 add %i1, 4, %i1 ! delay slot, inc to address 1420 b,a .dbytecp 1421 1422 ! we come here to align copies on word boundaries 1423.alwordcp: 1424 call .alignit ! go word-align it 1425 mov 3, %o0 ! bits that must be zero to be aligned 1426 b .wordcp 1427 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 1428 1429 ! 1430 ! byte copy, works with any alignment 1431 ! 1432.bytecp: 1433 b .dbytecp 1434 sub %i0, %i1, %i0 ! i0 gets difference of src and dst 1435 1436 ! 1437 ! differenced byte copy, works with any alignment 1438 ! assumes dest in %i1 and (source - dest) in %i0 1439 ! 14401: 1441 stb %o4, [%i1] ! write to address 1442 inc %i1 ! inc to address 1443.dbytecp: 1444 deccc %i2 ! dec count 1445 bgeu,a %ncc, 1b ! loop till done 1446 ldub [%i0+%i1], %o4 ! read from address 1447.cpdone: 1448#if !defined(NIAGARA_IMPL) 1449 ! FPUSED_FLAG will not have been set in any path leading to 1450 ! this point. No need to deal with it. 1451 btst BCOPY_FLAG, %o5 1452 bz,pn %icc, 2f 1453 andcc %o5, BCOPY_FLAG, %o5 1454 ! Here via bcopy. Check to see if the handler was NULL. 1455 ! If so, just return quietly. Otherwise, reset the 1456 ! handler and go home. 1457 bnz,pn %ncc, 2f 1458 nop 1459 ! 1460 ! Null handler. 1461 ! 1462 ret 1463 restore %g0, 0, %o0 1464 ! Here via kcopy or bcopy with a handler. 1465 ! Reset the fault handler. 14662: 1467 membar #Sync 1468 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1469#else /* NIAGARA_IMPL */ 1470 membar #Sync ! sync error barrier 1471 ! Restore t_lofault handler, if came here from kcopy(). 1472 tst %o5 1473 bz %ncc, 1f 1474 andn %o5, LOFAULT_SET, %o5 1475 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 14761: 1477#endif /* NIAGARA_IMPL */ 1478 ret 1479 restore %g0, 0, %o0 ! return (0) 1480 1481/* 1482 * Common code used to align transfers on word and doubleword 1483 * boudaries. Aligns source and destination and returns a count 1484 * of aligned bytes to transfer in %i3 1485 */ 14861: 1487 inc %i0 ! inc from 1488 stb %o4, [%i1] ! write a byte 1489 inc %i1 ! inc to 1490 dec %i2 ! dec count 1491.alignit: 1492 btst %o0, %i0 ! %o0 is bit mask to check for alignment 1493 bnz,a 1b 1494 ldub [%i0], %o4 ! read next byte 1495 1496 retl 1497 andn %i2, %o0, %i3 ! return size of aligned bytes 1498 SET_SIZE(bcopy) 1499 1500#endif /* lint */ 1501 1502/* 1503 * Block copy with possibly overlapped operands. 1504 */ 1505 1506#if defined(lint) 1507 1508/*ARGSUSED*/ 1509void 1510ovbcopy(const void *from, void *to, size_t count) 1511{} 1512 1513#else /* lint */ 1514 1515 ENTRY(ovbcopy) 1516 tst %o2 ! check count 1517 bgu,a %ncc, 1f ! nothing to do or bad arguments 1518 subcc %o0, %o1, %o3 ! difference of from and to address 1519 1520 retl ! return 1521 nop 15221: 1523 bneg,a %ncc, 2f 1524 neg %o3 ! if < 0, make it positive 15252: cmp %o2, %o3 ! cmp size and abs(from - to) 1526 bleu %ncc, bcopy ! if size <= abs(diff): use bcopy, 1527 .empty ! no overlap 1528 cmp %o0, %o1 ! compare from and to addresses 1529 blu %ncc, .ov_bkwd ! if from < to, copy backwards 1530 nop 1531 ! 1532 ! Copy forwards. 1533 ! 1534.ov_fwd: 1535 ldub [%o0], %o3 ! read from address 1536 inc %o0 ! inc from address 1537 stb %o3, [%o1] ! write to address 1538 deccc %o2 ! dec count 1539 bgu %ncc, .ov_fwd ! loop till done 1540 inc %o1 ! inc to address 1541 1542 retl ! return 1543 nop 1544 ! 1545 ! Copy backwards. 1546 ! 1547.ov_bkwd: 1548 deccc %o2 ! dec count 1549 ldub [%o0 + %o2], %o3 ! get byte at end of src 1550 bgu %ncc, .ov_bkwd ! loop till done 1551 stb %o3, [%o1 + %o2] ! delay slot, store at end of dst 1552 1553 retl ! return 1554 nop 1555 SET_SIZE(ovbcopy) 1556 1557#endif /* lint */ 1558 1559/* 1560 * hwblkpagecopy() 1561 * 1562 * Copies exactly one page. This routine assumes the caller (ppcopy) 1563 * has already disabled kernel preemption and has checked 1564 * use_hw_bcopy. 1565 */ 1566#ifdef lint 1567/*ARGSUSED*/ 1568void 1569hwblkpagecopy(const void *src, void *dst) 1570{ } 1571#else /* lint */ 1572 ENTRY(hwblkpagecopy) 1573 save %sp, -SA(MINFRAME), %sp 1574 1575 ! %i0 - source address (arg) 1576 ! %i1 - destination address (arg) 1577 ! %i2 - length of region (not arg) 1578 1579 set PAGESIZE, %i2 1580 1581 /* 1582 * Copying exactly one page and PAGESIZE is in mutliple of 0x80. 1583 */ 1584 mov ASI_BLK_INIT_ST_QUAD_LDD_P, %asi 1585 prefetch [%i0+0x0], #one_read 1586 prefetch [%i0+0x40], #one_read 15871: 1588 prefetch [%i0+0x80], #one_read 1589 prefetch [%i0+0xc0], #one_read 1590 ldda [%i0+0x0]%asi, %l0 1591 ldda [%i0+0x10]%asi, %l2 1592 ldda [%i0+0x20]%asi, %l4 1593 ldda [%i0+0x30]%asi, %l6 1594 stxa %l0, [%i1+0x0]%asi 1595 stxa %l1, [%i1+0x8]%asi 1596 stxa %l2, [%i1+0x10]%asi 1597 stxa %l3, [%i1+0x18]%asi 1598 stxa %l4, [%i1+0x20]%asi 1599 stxa %l5, [%i1+0x28]%asi 1600 stxa %l6, [%i1+0x30]%asi 1601 stxa %l7, [%i1+0x38]%asi 1602 ldda [%i0+0x40]%asi, %l0 1603 ldda [%i0+0x50]%asi, %l2 1604 ldda [%i0+0x60]%asi, %l4 1605 ldda [%i0+0x70]%asi, %l6 1606 stxa %l0, [%i1+0x40]%asi 1607 stxa %l1, [%i1+0x48]%asi 1608 stxa %l2, [%i1+0x50]%asi 1609 stxa %l3, [%i1+0x58]%asi 1610 stxa %l4, [%i1+0x60]%asi 1611 stxa %l5, [%i1+0x68]%asi 1612 stxa %l6, [%i1+0x70]%asi 1613 stxa %l7, [%i1+0x78]%asi 1614 1615 add %i0, 0x80, %i0 1616 subcc %i2, 0x80, %i2 1617 bgu,pt %xcc, 1b 1618 add %i1, 0x80, %i1 1619 1620 membar #Sync 1621 ret 1622 restore %g0, 0, %o0 1623 SET_SIZE(hwblkpagecopy) 1624#endif /* lint */ 1625 1626 1627/* 1628 * Transfer data to and from user space - 1629 * Note that these routines can cause faults 1630 * It is assumed that the kernel has nothing at 1631 * less than KERNELBASE in the virtual address space. 1632 * 1633 * Note that copyin(9F) and copyout(9F) are part of the 1634 * DDI/DKI which specifies that they return '-1' on "errors." 1635 * 1636 * Sigh. 1637 * 1638 * So there's two extremely similar routines - xcopyin() and xcopyout() 1639 * which return the errno that we've faithfully computed. This 1640 * allows other callers (e.g. uiomove(9F)) to work correctly. 1641 * Given that these are used pretty heavily, we expand the calling 1642 * sequences inline for all flavours (rather than making wrappers). 1643 * 1644 * There are also stub routines for xcopyout_little and xcopyin_little, 1645 * which currently are intended to handle requests of <= 16 bytes from 1646 * do_unaligned. Future enhancement to make them handle 8k pages efficiently 1647 * is left as an exercise... 1648 */ 1649 1650/* 1651 * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr) 1652 * 1653 * General theory of operation: 1654 * 1655 * None of the copyops routines grab a window until it's decided that 1656 * we need to do a HW block copy operation. This saves a window 1657 * spill/fill when we're called during socket ops. The typical IO 1658 * path won't cause spill/fill traps. 1659 * 1660 * This code uses a set of 4 limits for the maximum size that will 1661 * be copied given a particular input/output address alignment. 1662 * the default limits are: 1663 * 1664 * single byte aligned - 256 (hw_copy_limit_1) 1665 * two byte aligned - 512 (hw_copy_limit_2) 1666 * four byte aligned - 1024 (hw_copy_limit_4) 1667 * eight byte aligned - 1024 (hw_copy_limit_8) 1668 * 1669 * If the value for a particular limit is zero, the copy will be done 1670 * via the copy loops rather than block store/quad load instructions. 1671 * 1672 * Flow: 1673 * 1674 * If count == zero return zero. 1675 * 1676 * Store the previous lo_fault handler into %g6. 1677 * Place our secondary lofault handler into %g5. 1678 * Place the address of our nowindow fault handler into %o3. 1679 * Place the address of the windowed fault handler into %o4. 1680 * --> We'll use this handler if we end up grabbing a window 1681 * --> before we use block initializing store and quad load ASIs 1682 * 1683 * If count is less than or equal to SMALL_LIMIT (7) we 1684 * always do a byte for byte copy. 1685 * 1686 * If count is > SMALL_LIMIT, we check the alignment of the input 1687 * and output pointers. Based on the alignment we check count 1688 * against a limit based on detected alignment. If we exceed the 1689 * alignment value we copy via block initializing store and quad 1690 * load instructions. 1691 * 1692 * If we don't exceed one of the limits, we store -count in %o3, 1693 * we store the number of chunks (8, 4, 2 or 1 byte) operated 1694 * on in our basic copy loop in %o2. Following this we branch 1695 * to the appropriate copy loop and copy that many chunks. 1696 * Since we've been adding the chunk size to %o3 each time through 1697 * as well as decrementing %o2, we can tell if any data is 1698 * is left to be copied by examining %o3. If that is zero, we're 1699 * done and can go home. If not, we figure out what the largest 1700 * chunk size left to be copied is and branch to that copy loop 1701 * unless there's only one byte left. We load that as we're 1702 * branching to code that stores it just before we return. 1703 * 1704 * Fault handlers are invoked if we reference memory that has no 1705 * current mapping. All forms share the same copyio_fault handler. 1706 * This routine handles fixing up the stack and general housecleaning. 1707 * Each copy operation has a simple fault handler that is then called 1708 * to do the work specific to the invidual operation. The handler 1709 * for copyOP and xcopyOP are found at the end of individual function. 1710 * The handlers for xcopyOP_little are found at the end of xcopyin_little. 1711 * The handlers for copyOP_noerr are found at the end of copyin_noerr. 1712 */ 1713 1714/* 1715 * Copy kernel data to user space (copyout/xcopyout/xcopyout_little). 1716 */ 1717 1718#if defined(lint) 1719 1720/*ARGSUSED*/ 1721int 1722copyout(const void *kaddr, void *uaddr, size_t count) 1723{ return (0); } 1724 1725#else /* lint */ 1726 1727/* 1728 * We save the arguments in the following registers in case of a fault: 1729 * kaddr - %g2 1730 * uaddr - %g3 1731 * count - %g4 1732 */ 1733#define SAVE_SRC %g2 1734#define SAVE_DST %g3 1735#define SAVE_COUNT %g4 1736 1737#define REAL_LOFAULT %g5 1738#define SAVED_LOFAULT %g6 1739 1740/* 1741 * Generic copyio fault handler. This is the first line of defense when a 1742 * fault occurs in (x)copyin/(x)copyout. In order for this to function 1743 * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT. 1744 * This allows us to share common code for all the flavors of the copy 1745 * operations, including the _noerr versions. 1746 * 1747 * Note that this function will restore the original input parameters before 1748 * calling REAL_LOFAULT. So the real handler can vector to the appropriate 1749 * member of the t_copyop structure, if needed. 1750 */ 1751 ENTRY(copyio_fault) 1752#if !defined(NIAGARA_IMPL) 1753 btst FPUSED_FLAG, SAVED_LOFAULT 1754 bz 1f 1755 andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 1756 1757 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 1758 wr %o2, 0, %gsr ! restore gsr 1759 1760 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 1761 btst FPRS_FEF, %o3 1762 bz %icc, 4f 1763 nop 1764 1765 ! restore fpregs from stack 1766 BLD_FP_FROMSTACK(%o2) 1767 1768 ba,pt %ncc, 1f 1769 wr %o3, 0, %fprs ! restore fprs 1770 17714: 1772 FZERO ! zero all of the fpregs 1773 wr %o3, 0, %fprs ! restore fprs 1774 17751: 1776#else /* NIAGARA_IMPL */ 1777 membar #Sync 1778 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1779#endif /* NIAGARA_IMPL */ 1780 1781 restore 1782 1783 mov SAVE_SRC, %o0 1784 mov SAVE_DST, %o1 1785 jmp REAL_LOFAULT 1786 mov SAVE_COUNT, %o2 1787 SET_SIZE(copyio_fault) 1788 1789 ENTRY(copyio_fault_nowindow) 1790 membar #Sync 1791 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1792 1793 mov SAVE_SRC, %o0 1794 mov SAVE_DST, %o1 1795 jmp REAL_LOFAULT 1796 mov SAVE_COUNT, %o2 1797 SET_SIZE(copyio_fault_nowindow) 1798 1799 ENTRY(copyout) 1800 sethi %hi(.copyout_err), REAL_LOFAULT 1801 or REAL_LOFAULT, %lo(.copyout_err), REAL_LOFAULT 1802 1803.do_copyout: 1804 ! 1805 ! Check the length and bail if zero. 1806 ! 1807 tst %o2 1808 bnz,pt %ncc, 1f 1809 nop 1810 retl 1811 clr %o0 18121: 1813 sethi %hi(copyio_fault), %o4 1814 or %o4, %lo(copyio_fault), %o4 1815 sethi %hi(copyio_fault_nowindow), %o3 1816 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT 1817 or %o3, %lo(copyio_fault_nowindow), %o3 1818 membar #Sync 1819 stn %o3, [THREAD_REG + T_LOFAULT] 1820 1821 mov %o0, SAVE_SRC 1822 mov %o1, SAVE_DST 1823 mov %o2, SAVE_COUNT 1824 1825 ! 1826 ! Check to see if we're more than SMALL_LIMIT (7 bytes). 1827 ! Run in leaf mode, using the %o regs as our input regs. 1828 ! 1829 subcc %o2, SMALL_LIMIT, %o3 1830 bgu,a,pt %ncc, .dco_ns 1831 or %o0, %o1, %o3 1832 ! 1833 ! What was previously ".small_copyout" 1834 ! Do full differenced copy. 1835 ! 1836.dcobcp: 1837 sub %g0, %o2, %o3 ! negate count 1838 add %o0, %o2, %o0 ! make %o0 point at the end 1839 add %o1, %o2, %o1 ! make %o1 point at the end 1840 ba,pt %ncc, .dcocl 1841 ldub [%o0 + %o3], %o4 ! load first byte 1842 ! 1843 ! %o0 and %o2 point at the end and remain pointing at the end 1844 ! of their buffers. We pull things out by adding %o3 (which is 1845 ! the negation of the length) to the buffer end which gives us 1846 ! the curent location in the buffers. By incrementing %o3 we walk 1847 ! through both buffers without having to bump each buffer's 1848 ! pointer. A very fast 4 instruction loop. 1849 ! 1850 .align 16 1851.dcocl: 1852 stba %o4, [%o1 + %o3]ASI_USER 1853 inccc %o3 1854 bl,a,pt %ncc, .dcocl 1855 ldub [%o0 + %o3], %o4 1856 ! 1857 ! We're done. Go home. 1858 ! 1859 membar #Sync 1860 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 1861 retl 1862 clr %o0 1863 ! 1864 ! Try aligned copies from here. 1865 ! 1866.dco_ns: 1867 ! %o0 = kernel addr (to be copied from) 1868 ! %o1 = user addr (to be copied to) 1869 ! %o2 = length 1870 ! %o3 = %o1 | %o2 (used for alignment checking) 1871 ! %o4 is alternate lo_fault 1872 ! %o5 is original lo_fault 1873 ! 1874 ! See if we're single byte aligned. If we are, check the 1875 ! limit for single byte copies. If we're smaller or equal, 1876 ! bounce to the byte for byte copy loop. Otherwise do it in 1877 ! HW (if enabled). 1878 ! 1879 btst 1, %o3 1880 bz,pt %icc, .dcoh8 1881 btst 7, %o3 1882 ! 1883 ! Single byte aligned. Do we do it via HW or via 1884 ! byte for byte? Do a quick no memory reference 1885 ! check to pick up small copies. 1886 ! 1887 sethi %hi(hw_copy_limit_1), %o3 1888 ! 1889 ! Big enough that we need to check the HW limit for 1890 ! this size copy. 1891 ! 1892 ld [%o3 + %lo(hw_copy_limit_1)], %o3 1893 ! 1894 ! Is HW copy on? If not, do everything byte for byte. 1895 ! 1896 tst %o3 1897 bz,pn %icc, .dcobcp 1898 subcc %o3, %o2, %o3 1899 ! 1900 ! If we're less than or equal to the single byte copy limit, 1901 ! bop to the copy loop. 1902 ! 1903 bge,pt %ncc, .dcobcp 1904 nop 1905 ! 1906 ! We're big enough and copy is on. Do it with HW. 1907 ! 1908 ba,pt %ncc, .big_copyout 1909 nop 1910.dcoh8: 1911 ! 1912 ! 8 byte aligned? 1913 ! 1914 bnz,a %ncc, .dcoh4 1915 btst 3, %o3 1916 ! 1917 ! See if we're in the "small range". 1918 ! If so, go off and do the copy. 1919 ! If not, load the hard limit. %o3 is 1920 ! available for reuse. 1921 ! 1922 sethi %hi(hw_copy_limit_8), %o3 1923 ld [%o3 + %lo(hw_copy_limit_8)], %o3 1924 ! 1925 ! If it's zero, there's no HW bcopy. 1926 ! Bop off to the aligned copy. 1927 ! 1928 tst %o3 1929 bz,pn %icc, .dcos8 1930 subcc %o3, %o2, %o3 1931 ! 1932 ! We're negative if our size is larger than hw_copy_limit_8. 1933 ! 1934 bge,pt %ncc, .dcos8 1935 nop 1936 ! 1937 ! HW assist is on and we're large enough. Do it. 1938 ! 1939 ba,pt %ncc, .big_copyout 1940 nop 1941.dcos8: 1942 ! 1943 ! Housekeeping for copy loops. Uses same idea as in the byte for 1944 ! byte copy loop above. 1945 ! 1946 add %o0, %o2, %o0 1947 add %o1, %o2, %o1 1948 sub %g0, %o2, %o3 1949 ba,pt %ncc, .dodebc 1950 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy 1951 ! 1952 ! 4 byte aligned? 1953 ! 1954.dcoh4: 1955 bnz,pn %ncc, .dcoh2 1956 ! 1957 ! See if we're in the "small range". 1958 ! If so, go off an do the copy. 1959 ! If not, load the hard limit. %o3 is 1960 ! available for reuse. 1961 ! 1962 sethi %hi(hw_copy_limit_4), %o3 1963 ld [%o3 + %lo(hw_copy_limit_4)], %o3 1964 ! 1965 ! If it's zero, there's no HW bcopy. 1966 ! Bop off to the aligned copy. 1967 ! 1968 tst %o3 1969 bz,pn %icc, .dcos4 1970 subcc %o3, %o2, %o3 1971 ! 1972 ! We're negative if our size is larger than hw_copy_limit_4. 1973 ! 1974 bge,pt %ncc, .dcos4 1975 nop 1976 ! 1977 ! HW assist is on and we're large enough. Do it. 1978 ! 1979 ba,pt %ncc, .big_copyout 1980 nop 1981.dcos4: 1982 add %o0, %o2, %o0 1983 add %o1, %o2, %o1 1984 sub %g0, %o2, %o3 1985 ba,pt %ncc, .dodfbc 1986 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy 1987 ! 1988 ! We must be 2 byte aligned. Off we go. 1989 ! The check for small copies was done in the 1990 ! delay at .dcoh4 1991 ! 1992.dcoh2: 1993 ble %ncc, .dcos2 1994 sethi %hi(hw_copy_limit_2), %o3 1995 ld [%o3 + %lo(hw_copy_limit_2)], %o3 1996 tst %o3 1997 bz,pn %icc, .dcos2 1998 subcc %o3, %o2, %o3 1999 bge,pt %ncc, .dcos2 2000 nop 2001 ! 2002 ! HW is on and we're big enough. Do it. 2003 ! 2004 ba,pt %ncc, .big_copyout 2005 nop 2006.dcos2: 2007 add %o0, %o2, %o0 2008 add %o1, %o2, %o1 2009 sub %g0, %o2, %o3 2010 ba,pt %ncc, .dodtbc 2011 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy 2012.small_copyout: 2013 ! 2014 ! Why are we doing this AGAIN? There are certain conditions in 2015 ! big_copyout that will cause us to forego the HW assisted copies 2016 ! and bounce back to a non-HW assisted copy. This dispatches those 2017 ! copies. Note that we branch around this in the main line code. 2018 ! 2019 ! We make no check for limits or HW enablement here. We've 2020 ! already been told that we're a poster child so just go off 2021 ! and do it. 2022 ! 2023 or %o0, %o1, %o3 2024 btst 1, %o3 2025 bnz %icc, .dcobcp ! Most likely 2026 btst 7, %o3 2027 bz %icc, .dcos8 2028 btst 3, %o3 2029 bz %icc, .dcos4 2030 nop 2031 ba,pt %ncc, .dcos2 2032 nop 2033 .align 32 2034.dodebc: 2035 ldx [%o0 + %o3], %o4 2036 deccc %o2 2037 stxa %o4, [%o1 + %o3]ASI_USER 2038 bg,pt %ncc, .dodebc 2039 addcc %o3, 8, %o3 2040 ! 2041 ! End of copy loop. Check to see if we're done. Most 2042 ! eight byte aligned copies end here. 2043 ! 2044 bz,pt %ncc, .dcofh 2045 nop 2046 ! 2047 ! Something is left - do it byte for byte. 2048 ! 2049 ba,pt %ncc, .dcocl 2050 ldub [%o0 + %o3], %o4 ! load next byte 2051 ! 2052 ! Four byte copy loop. %o2 is the number of 4 byte chunks to copy. 2053 ! 2054 .align 32 2055.dodfbc: 2056 lduw [%o0 + %o3], %o4 2057 deccc %o2 2058 sta %o4, [%o1 + %o3]ASI_USER 2059 bg,pt %ncc, .dodfbc 2060 addcc %o3, 4, %o3 2061 ! 2062 ! End of copy loop. Check to see if we're done. Most 2063 ! four byte aligned copies end here. 2064 ! 2065 bz,pt %ncc, .dcofh 2066 nop 2067 ! 2068 ! Something is left. Do it byte for byte. 2069 ! 2070 ba,pt %ncc, .dcocl 2071 ldub [%o0 + %o3], %o4 ! load next byte 2072 ! 2073 ! two byte aligned copy loop. %o2 is the number of 2 byte chunks to 2074 ! copy. 2075 ! 2076 .align 32 2077.dodtbc: 2078 lduh [%o0 + %o3], %o4 2079 deccc %o2 2080 stha %o4, [%o1 + %o3]ASI_USER 2081 bg,pt %ncc, .dodtbc 2082 addcc %o3, 2, %o3 2083 ! 2084 ! End of copy loop. Anything left? 2085 ! 2086 bz,pt %ncc, .dcofh 2087 nop 2088 ! 2089 ! Deal with the last byte 2090 ! 2091 ldub [%o0 + %o3], %o4 2092 stba %o4, [%o1 + %o3]ASI_USER 2093.dcofh: 2094 membar #Sync 2095 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2096 retl 2097 clr %o0 2098 2099.big_copyout: 2100 ! We're going to go off and do a block copy. 2101 ! Switch fault handlers and grab a window. We 2102 ! don't do a membar #Sync since we've done only 2103 ! kernel data to this point. 2104 stn %o4, [THREAD_REG + T_LOFAULT] 2105 2106 ! Copy out that reach here are larger than 256 bytes. The 2107 ! hw_copy_limit_1 is set to 256. Never set this limit less 2108 ! 128 bytes. 2109#if !defined(NIAGARA_IMPL) 2110 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 2111 2112 rd %fprs, %o2 ! check for unused fp 2113 st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save %fprs 2114 btst FPRS_FEF, %o2 2115 bz,a,pt %icc, .do_block_copyout 2116 wr %g0, FPRS_FEF, %fprs 2117 2118 ! save in-use fpregs on stack 2119 BST_FP_TOSTACK(%o2) 2120#else /* NIAGARA_IMPL */ 2121 save %sp, -SA(MINFRAME), %sp 2122#endif /* NIAGARA_IMPL */ 2123 2124.do_block_copyout: 2125 2126#if !defined(NIAGARA_IMPL) 2127 rd %gsr, %o2 2128 stx %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr 2129 ! set the lower bit saved t_lofault to indicate that we need 2130 ! clear %fprs register on the way out 2131 or SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 2132#endif /* NIAGARA_IMPL */ 2133 2134 ! Swap src/dst since the code below is memcpy code 2135 ! and memcpy/bcopy have different calling sequences 2136 mov %i1, %i5 2137 mov %i0, %i1 2138 mov %i5, %i0 2139 2140 ! Block (64 bytes) align the destination. 2141 andcc %i0, 0x3f, %i3 ! is dst block aligned 2142 bz %ncc, copyout_blalign ! dst already block aligned 2143 sub %i3, 0x40, %i3 2144 neg %i3 ! bytes till dst 64 bytes aligned 2145 sub %i2, %i3, %i2 ! update i2 with new count 2146 2147 ! Based on source and destination alignment do 2148 ! either 8 bytes, 4 bytes, 2 bytes or byte copy. 2149 2150 ! Is dst & src 8B aligned 2151 or %i0, %i1, %o2 2152 andcc %o2, 0x7, %g0 2153 bz %ncc, .co_alewdcp 2154 nop 2155 2156 ! Is dst & src 4B aligned 2157 andcc %o2, 0x3, %g0 2158 bz %ncc, .co_alwdcp 2159 nop 2160 2161 ! Is dst & src 2B aligned 2162 andcc %o2, 0x1, %g0 2163 bz %ncc, .co_alhlfwdcp 2164 nop 2165 2166 ! 1B aligned 21671: ldub [%i1], %o2 2168 stba %o2, [%i0]ASI_USER 2169 inc %i1 2170 deccc %i3 2171 bgu,pt %ncc, 1b 2172 inc %i0 2173 2174 ba copyout_blalign 2175 nop 2176 2177 ! dst & src 4B aligned 2178.co_alwdcp: 2179 ld [%i1], %o2 2180 sta %o2, [%i0]ASI_USER 2181 add %i1, 0x4, %i1 2182 subcc %i3, 0x4, %i3 2183 bgu,pt %ncc, .co_alwdcp 2184 add %i0, 0x4, %i0 2185 2186 ba copyout_blalign 2187 nop 2188 2189 ! dst & src 2B aligned 2190.co_alhlfwdcp: 2191 lduh [%i1], %o2 2192 stuha %o2, [%i0]ASI_USER 2193 add %i1, 0x2, %i1 2194 subcc %i3, 0x2, %i3 2195 bgu,pt %ncc, .co_alhlfwdcp 2196 add %i0, 0x2, %i0 2197 2198 ba copyout_blalign 2199 nop 2200 2201 ! dst & src 8B aligned 2202.co_alewdcp: 2203 ldx [%i1], %o2 2204 stxa %o2, [%i0]ASI_USER 2205 add %i1, 0x8, %i1 2206 subcc %i3, 0x8, %i3 2207 bgu,pt %ncc, .co_alewdcp 2208 add %i0, 0x8, %i0 2209 2210 ! Now Destination is block (64 bytes) aligned 2211copyout_blalign: 2212 andn %i2, 0x3f, %i3 ! %i3 count is multiple of block size 2213 sub %i2, %i3, %i2 ! Residue bytes in %i2 2214 2215 mov ASI_BLK_INIT_QUAD_LDD_AIUS, %asi 2216 2217#if !defined(NIAGARA_IMPL) 2218 andn %i1, 0x3f, %l0 ! %l0 has block aligned src address 2219 prefetch [%l0+0x0], #one_read 2220 andcc %i1, 0x3f, %g0 ! is src 64B aligned 2221 bz,pn %ncc, .co_blkcpy 2222 nop 2223 2224 ! handle misaligned source cases 2225 alignaddr %i1, %g0, %g0 ! generate %gsr 2226 2227 srl %i1, 0x3, %l1 ! src add bits 3, 4, 5 are now least 2228 ! significant in %l1 2229 andcc %l1, 0x7, %l2 ! mask everything except bits 1, 2, 3 2230 add %i1, %i3, %i1 2231 2232 ! switch statement to get to right 8 byte block within 2233 ! 64 byte block 2234 cmp %l2, 0x4 2235 bgeu,a co_hlf 2236 cmp %l2, 0x6 2237 cmp %l2, 0x2 2238 bgeu,a co_sqtr 2239 nop 2240 cmp %l2, 0x1 2241 be,a co_off15 2242 nop 2243 ba co_off7 2244 nop 2245co_sqtr: 2246 be,a co_off23 2247 nop 2248 ba,a co_off31 2249 nop 2250 2251co_hlf: 2252 bgeu,a co_fqtr 2253 nop 2254 cmp %l2, 0x5 2255 be,a co_off47 2256 nop 2257 ba co_off39 2258 nop 2259co_fqtr: 2260 be,a co_off55 2261 nop 2262 2263 ldd [%l0+0x38], %d14 2264 prefetch [%l0+0x40], #one_read 2265 prefetch [%l0+0x80], #one_read 22667: 2267 add %l0, 0x40, %l0 2268 stxa %g0, [%i0]%asi ! initialize the cache line 2269 2270 ldda [%l0]ASI_BLK_P, %d16 2271 ALIGN_OFF_56_63 2272 fmovd %d30, %d14 2273 2274 stda %d48, [%i0]ASI_BLK_AIUS 2275 subcc %i3, 0x40, %i3 2276 add %i0, 0x40, %i0 2277 bgu,pt %ncc, 7b 2278 prefetch [%l0+0x80], #one_read 2279 ba .co_blkdone 2280 membar #Sync 2281 2282co_off7: 2283 ldda [%l0]ASI_BLK_P, %d0 2284 prefetch [%l0+0x40], #one_read 2285 prefetch [%l0+0x80], #one_read 22860: 2287 add %l0, 0x40, %l0 2288 stxa %g0, [%i0]%asi ! initialize the cache line 2289 2290 ldda [%l0]ASI_BLK_P, %d16 2291 ALIGN_OFF_1_7 2292 fmovd %d16, %d0 2293 fmovd %d18, %d2 2294 fmovd %d20, %d4 2295 fmovd %d22, %d6 2296 fmovd %d24, %d8 2297 fmovd %d26, %d10 2298 fmovd %d28, %d12 2299 fmovd %d30, %d14 2300 2301 stda %d48, [%i0]ASI_BLK_AIUS 2302 subcc %i3, 0x40, %i3 2303 add %i0, 0x40, %i0 2304 bgu,pt %ncc, 0b 2305 prefetch [%l0+0x80], #one_read 2306 ba .co_blkdone 2307 membar #Sync 2308 2309co_off15: 2310 ldd [%l0+0x8], %d2 2311 ldd [%l0+0x10], %d4 2312 ldd [%l0+0x18], %d6 2313 ldd [%l0+0x20], %d8 2314 ldd [%l0+0x28], %d10 2315 ldd [%l0+0x30], %d12 2316 ldd [%l0+0x38], %d14 2317 prefetch [%l0+0x40], #one_read 2318 prefetch [%l0+0x80], #one_read 23191: 2320 add %l0, 0x40, %l0 2321 stxa %g0, [%i0]%asi ! initialize the cache line 2322 2323 ldda [%l0]ASI_BLK_P, %d16 2324 ALIGN_OFF_8_15 2325 fmovd %d18, %d2 2326 fmovd %d20, %d4 2327 fmovd %d22, %d6 2328 fmovd %d24, %d8 2329 fmovd %d26, %d10 2330 fmovd %d28, %d12 2331 fmovd %d30, %d14 2332 2333 stda %d48, [%i0]ASI_BLK_AIUS 2334 subcc %i3, 0x40, %i3 2335 add %i0, 0x40, %i0 2336 bgu,pt %ncc, 1b 2337 prefetch [%l0+0x80], #one_read 2338 ba .co_blkdone 2339 membar #Sync 2340 2341co_off23: 2342 ldd [%l0+0x10], %d4 2343 ldd [%l0+0x18], %d6 2344 ldd [%l0+0x20], %d8 2345 ldd [%l0+0x28], %d10 2346 ldd [%l0+0x30], %d12 2347 ldd [%l0+0x38], %d14 2348 prefetch [%l0+0x40], #one_read 2349 prefetch [%l0+0x80], #one_read 23502: 2351 add %l0, 0x40, %l0 2352 stxa %g0, [%i0]%asi ! initialize the cache line 2353 2354 ldda [%l0]ASI_BLK_P, %d16 2355 ALIGN_OFF_16_23 2356 fmovd %d20, %d4 2357 fmovd %d22, %d6 2358 fmovd %d24, %d8 2359 fmovd %d26, %d10 2360 fmovd %d28, %d12 2361 fmovd %d30, %d14 2362 2363 stda %d48, [%i0]ASI_BLK_AIUS 2364 subcc %i3, 0x40, %i3 2365 add %i0, 0x40, %i0 2366 bgu,pt %ncc, 2b 2367 prefetch [%l0+0x80], #one_read 2368 ba .co_blkdone 2369 membar #Sync 2370 2371co_off31: 2372 ldd [%l0+0x18], %d6 2373 ldd [%l0+0x20], %d8 2374 ldd [%l0+0x28], %d10 2375 ldd [%l0+0x30], %d12 2376 ldd [%l0+0x38], %d14 2377 prefetch [%l0+0x40], #one_read 2378 prefetch [%l0+0x80], #one_read 23793: 2380 add %l0, 0x40, %l0 2381 stxa %g0, [%i0]%asi ! initialize the cache line 2382 2383 ldda [%l0]ASI_BLK_P, %d16 2384 ALIGN_OFF_24_31 2385 fmovd %d22, %d6 2386 fmovd %d24, %d8 2387 fmovd %d26, %d10 2388 fmovd %d28, %d12 2389 fmovd %d30, %d14 2390 2391 stda %d48, [%i0]ASI_BLK_AIUS 2392 subcc %i3, 0x40, %i3 2393 add %i0, 0x40, %i0 2394 bgu,pt %ncc, 3b 2395 prefetch [%l0+0x80], #one_read 2396 ba .co_blkdone 2397 membar #Sync 2398 2399co_off39: 2400 ldd [%l0+0x20], %d8 2401 ldd [%l0+0x28], %d10 2402 ldd [%l0+0x30], %d12 2403 ldd [%l0+0x38], %d14 2404 prefetch [%l0+0x40], #one_read 2405 prefetch [%l0+0x80], #one_read 24064: 2407 add %l0, 0x40, %l0 2408 stxa %g0, [%i0]%asi ! initialize the cache line 2409 2410 ldda [%l0]ASI_BLK_P, %d16 2411 ALIGN_OFF_32_39 2412 fmovd %d24, %d8 2413 fmovd %d26, %d10 2414 fmovd %d28, %d12 2415 fmovd %d30, %d14 2416 2417 stda %d48, [%i0]ASI_BLK_AIUS 2418 subcc %i3, 0x40, %i3 2419 add %i0, 0x40, %i0 2420 bgu,pt %ncc, 4b 2421 prefetch [%l0+0x80], #one_read 2422 ba .co_blkdone 2423 membar #Sync 2424 2425co_off47: 2426 ldd [%l0+0x28], %d10 2427 ldd [%l0+0x30], %d12 2428 ldd [%l0+0x38], %d14 2429 prefetch [%l0+0x40], #one_read 2430 prefetch [%l0+0x80], #one_read 24315: 2432 add %l0, 0x40, %l0 2433 stxa %g0, [%i0]%asi ! initialize the cache line 2434 2435 ldda [%l0]ASI_BLK_P, %d16 2436 ALIGN_OFF_40_47 2437 fmovd %d26, %d10 2438 fmovd %d28, %d12 2439 fmovd %d30, %d14 2440 2441 stda %d48, [%i0]ASI_BLK_AIUS 2442 subcc %i3, 0x40, %i3 2443 add %i0, 0x40, %i0 2444 bgu,pt %ncc, 5b 2445 prefetch [%l0+0x80], #one_read 2446 ba .co_blkdone 2447 membar #Sync 2448 2449co_off55: 2450 ldd [%l0+0x30], %d12 2451 ldd [%l0+0x38], %d14 2452 prefetch [%l0+0x40], #one_read 2453 prefetch [%l0+0x80], #one_read 24546: 2455 add %l0, 0x40, %l0 2456 stxa %g0, [%i0]%asi ! initialize the cache line 2457 2458 ldda [%l0]ASI_BLK_P, %d16 2459 ALIGN_OFF_48_55 2460 fmovd %d28, %d12 2461 fmovd %d30, %d14 2462 2463 stda %d48, [%i0]ASI_BLK_AIUS 2464 subcc %i3, 0x40, %i3 2465 add %i0, 0x40, %i0 2466 bgu,pt %ncc, 6b 2467 prefetch [%l0+0x80], #one_read 2468 ba .co_blkdone 2469 membar #Sync 2470 2471.co_blkcpy: 2472 prefetch [%i1+0x40], #one_read 2473 prefetch [%i1+0x80], #one_read 24748: 2475 stxa %g0, [%i0]%asi ! initialize the cache line 2476 ldda [%i1]ASI_BLK_P, %d0 2477 stda %d0, [%i0]ASI_BLK_AIUS 2478 2479 add %i1, 0x40, %i1 2480 subcc %i3, 0x40, %i3 2481 add %i0, 0x40, %i0 2482 bgu,pt %ncc, 8b 2483 prefetch [%i1+0x80], #one_read 2484 membar #Sync 2485 2486.co_blkdone: 2487#else /* NIAGARA_IMPL */ 2488 andcc %i1, 0xf, %o2 ! is src quadword aligned 2489 bz,pn %xcc, .co_blkcpy ! src offset in %o2 (last 4-bits) 2490 nop 2491 cmp %o2, 0x8 2492 bg .co_upper_double 2493 nop 2494 bl .co_lower_double 2495 nop 2496 2497 ! Falls through when source offset is equal to 8 i.e. 2498 ! source is double word aligned. 2499 ! In this case no shift/merge of data is required 2500 2501 sub %i1, %o2, %i1 ! align the src at 16 bytes. 2502 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 2503 prefetch [%l0+0x0], #one_read 2504 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 2505.co_loop0: 2506 add %i1, 0x10, %i1 2507 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 2508 prefetch [%l0+0x40], #one_read 2509 2510 stxa %l3, [%i0+0x0]%asi 2511 stxa %l4, [%i0+0x8]%asi 2512 2513 add %i1, 0x10, %i1 2514 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 2515 2516 stxa %l5, [%i0+0x10]%asi 2517 stxa %l2, [%i0+0x18]%asi 2518 2519 add %i1, 0x10, %i1 2520 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 2521 2522 stxa %l3, [%i0+0x20]%asi 2523 stxa %l4, [%i0+0x28]%asi 2524 2525 add %i1, 0x10, %i1 2526 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 2527 2528 stxa %l5, [%i0+0x30]%asi 2529 stxa %l2, [%i0+0x38]%asi 2530 2531 add %l0, 0x40, %l0 2532 subcc %i3, 0x40, %i3 2533 bgu,pt %xcc, .co_loop0 2534 add %i0, 0x40, %i0 2535 ba .co_blkdone 2536 add %i1, %o2, %i1 ! increment the source by src offset 2537 ! the src offset was stored in %o2 2538 2539.co_lower_double: 2540 2541 sub %i1, %o2, %i1 ! align the src at 16 bytes. 2542 sll %o2, 3, %o0 ! %o0 left shift 2543 mov 0x40, %o1 2544 sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 2545 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 2546 prefetch [%l0+0x0], #one_read 2547 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 ! partial data in %l2 and %l3 has 2548 ! complete data 2549.co_loop1: 2550 add %i1, 0x10, %i1 2551 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 ! %l4 has partial data 2552 ! for this read. 2553 ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) ! merge %l2, %l3 and %l4 2554 ! into %l2 and %l3 2555 prefetch [%l0+0x40], #one_read 2556 2557 stxa %l2, [%i0+0x0]%asi 2558 stxa %l3, [%i0+0x8]%asi 2559 2560 add %i1, 0x10, %i1 2561 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 2562 ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) ! merge %l2 with %l5 and 2563 ! %l4 from previous read 2564 ! into %l4 and %l5 2565 stxa %l4, [%i0+0x10]%asi 2566 stxa %l5, [%i0+0x18]%asi 2567 2568 ! Repeat the same for next 32 bytes. 2569 2570 add %i1, 0x10, %i1 2571 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 2572 ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) 2573 2574 stxa %l2, [%i0+0x20]%asi 2575 stxa %l3, [%i0+0x28]%asi 2576 2577 add %i1, 0x10, %i1 2578 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 2579 ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) 2580 2581 stxa %l4, [%i0+0x30]%asi 2582 stxa %l5, [%i0+0x38]%asi 2583 2584 add %l0, 0x40, %l0 2585 subcc %i3, 0x40, %i3 2586 bgu,pt %xcc, .co_loop1 2587 add %i0, 0x40, %i0 2588 ba .co_blkdone 2589 add %i1, %o2, %i1 ! increment the source by src offset 2590 ! the src offset was stored in %o2 2591 2592.co_upper_double: 2593 2594 sub %i1, %o2, %i1 ! align the src at 16 bytes. 2595 sub %o2, 0x8, %o0 2596 sll %o0, 3, %o0 ! %o0 left shift 2597 mov 0x40, %o1 2598 sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 2599 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 2600 prefetch [%l0+0x0], #one_read 2601 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 ! partial data in %l3 2602 ! for this read and 2603 ! no data in %l2 2604.co_loop2: 2605 add %i1, 0x10, %i1 2606 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 ! %l4 has complete data 2607 ! and %l5 has partial 2608 ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) ! merge %l3, %l4 and %l5 2609 ! into %l3 and %l4 2610 prefetch [%l0+0x40], #one_read 2611 2612 stxa %l3, [%i0+0x0]%asi 2613 stxa %l4, [%i0+0x8]%asi 2614 2615 add %i1, 0x10, %i1 2616 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 2617 ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) ! merge %l2 and %l3 with 2618 ! %l5 from previous read 2619 ! into %l5 and %l2 2620 2621 stxa %l5, [%i0+0x10]%asi 2622 stxa %l2, [%i0+0x18]%asi 2623 2624 ! Repeat the same for next 32 bytes. 2625 2626 add %i1, 0x10, %i1 2627 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 2628 ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) 2629 2630 stxa %l3, [%i0+0x20]%asi 2631 stxa %l4, [%i0+0x28]%asi 2632 2633 add %i1, 0x10, %i1 2634 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 2635 ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) 2636 2637 stxa %l5, [%i0+0x30]%asi 2638 stxa %l2, [%i0+0x38]%asi 2639 2640 add %l0, 0x40, %l0 2641 subcc %i3, 0x40, %i3 2642 bgu,pt %xcc, .co_loop2 2643 add %i0, 0x40, %i0 2644 ba .co_blkdone 2645 add %i1, %o2, %i1 ! increment the source by src offset 2646 ! the src offset was stored in %o2 2647 2648 2649 ! Do fast copy using ASI_BLK_INIT_ST_QUAD_LDD_P 2650.co_blkcpy: 2651 2652 andn %i1, 0x3f, %o0 ! %o0 has block aligned source 2653 prefetch [%o0+0x0], #one_read 26541: 2655 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l0 2656 add %i1, 0x10, %i1 2657 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 2658 add %i1, 0x10, %i1 2659 2660 prefetch [%o0+0x40], #one_read 2661 2662 stxa %l0, [%i0+0x0]%asi 2663 2664 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 2665 add %i1, 0x10, %i1 2666 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l6 2667 add %i1, 0x10, %i1 2668 2669 stxa %l1, [%i0+0x8]%asi 2670 stxa %l2, [%i0+0x10]%asi 2671 stxa %l3, [%i0+0x18]%asi 2672 stxa %l4, [%i0+0x20]%asi 2673 stxa %l5, [%i0+0x28]%asi 2674 stxa %l6, [%i0+0x30]%asi 2675 stxa %l7, [%i0+0x38]%asi 2676 2677 add %o0, 0x40, %o0 2678 subcc %i3, 0x40, %i3 2679 bgu,pt %xcc, 1b 2680 add %i0, 0x40, %i0 2681 2682.co_blkdone: 2683 membar #Sync 2684#endif /* NIAGARA_IMPL */ 2685 2686 brz,pt %i2, .copyout_exit 2687 nop 2688 2689 ! Handle trailing bytes 2690 cmp %i2, 0x8 2691 blu,pt %ncc, .co_residue 2692 nop 2693 2694 ! Can we do some 8B ops 2695 or %i1, %i0, %o2 2696 andcc %o2, 0x7, %g0 2697 bnz %ncc, .co_last4 2698 nop 2699 2700 ! Do 8byte ops as long as possible 2701.co_last8: 2702 ldx [%i1], %o2 2703 stxa %o2, [%i0]ASI_USER 2704 add %i1, 0x8, %i1 2705 sub %i2, 0x8, %i2 2706 cmp %i2, 0x8 2707 bgu,pt %ncc, .co_last8 2708 add %i0, 0x8, %i0 2709 2710 brz,pt %i2, .copyout_exit 2711 nop 2712 2713 ba .co_residue 2714 nop 2715 2716.co_last4: 2717 ! Can we do 4B ops 2718 andcc %o2, 0x3, %g0 2719 bnz %ncc, .co_last2 2720 nop 27211: 2722 ld [%i1], %o2 2723 sta %o2, [%i0]ASI_USER 2724 add %i1, 0x4, %i1 2725 sub %i2, 0x4, %i2 2726 cmp %i2, 0x4 2727 bgu,pt %ncc, 1b 2728 add %i0, 0x4, %i0 2729 2730 brz,pt %i2, .copyout_exit 2731 nop 2732 2733 ba .co_residue 2734 nop 2735 2736.co_last2: 2737 ! Can we do 2B ops 2738 andcc %o2, 0x1, %g0 2739 bnz %ncc, .co_residue 2740 nop 2741 27421: 2743 lduh [%i1], %o2 2744 stuha %o2, [%i0]ASI_USER 2745 add %i1, 0x2, %i1 2746 sub %i2, 0x2, %i2 2747 cmp %i2, 0x2 2748 bgu,pt %ncc, 1b 2749 add %i0, 0x2, %i0 2750 2751 brz,pt %i2, .copyout_exit 2752 nop 2753 2754 ! Copy the residue as byte copy 2755.co_residue: 2756 ldub [%i1], %i4 2757 stba %i4, [%i0]ASI_USER 2758 inc %i1 2759 deccc %i2 2760 bgu,pt %xcc, .co_residue 2761 inc %i0 2762 2763.copyout_exit: 2764#if !defined(NIAGARA_IMPL) 2765 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 2766 wr %o2, 0, %gsr ! restore gsr 2767 2768 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 2769 btst FPRS_FEF, %o3 2770 bz %icc, 4f 2771 nop 2772 2773 ! restore fpregs from stack 2774 BLD_FP_FROMSTACK(%o2) 2775 2776 ba,pt %ncc, 2f 2777 wr %o3, 0, %fprs ! restore fprs 2778 27794: 2780 FZERO ! zero all of the fpregs 2781 wr %o3, 0, %fprs ! restore fprs 2782 27832: 2784 membar #Sync 2785 andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 2786#else /* NIAGARA_IMPL */ 2787 membar #Sync 2788#endif /* NIAGARA_IMPL */ 2789 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2790 ret 2791 restore %g0, 0, %o0 2792 2793.copyout_err: 2794 ldn [THREAD_REG + T_COPYOPS], %o4 2795 brz %o4, 2f 2796 nop 2797 ldn [%o4 + CP_COPYOUT], %g2 2798 jmp %g2 2799 nop 28002: 2801 retl 2802 mov -1, %o0 2803 SET_SIZE(copyout) 2804 2805#endif /* lint */ 2806 2807 2808#ifdef lint 2809 2810/*ARGSUSED*/ 2811int 2812xcopyout(const void *kaddr, void *uaddr, size_t count) 2813{ return (0); } 2814 2815#else /* lint */ 2816 2817 ENTRY(xcopyout) 2818 sethi %hi(.xcopyout_err), REAL_LOFAULT 2819 b .do_copyout 2820 or REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT 2821.xcopyout_err: 2822 ldn [THREAD_REG + T_COPYOPS], %o4 2823 brz %o4, 2f 2824 nop 2825 ldn [%o4 + CP_XCOPYOUT], %g2 2826 jmp %g2 2827 nop 28282: 2829 retl 2830 mov %g1, %o0 2831 SET_SIZE(xcopyout) 2832 2833#endif /* lint */ 2834 2835#ifdef lint 2836 2837/*ARGSUSED*/ 2838int 2839xcopyout_little(const void *kaddr, void *uaddr, size_t count) 2840{ return (0); } 2841 2842#else /* lint */ 2843 2844 ENTRY(xcopyout_little) 2845 sethi %hi(.little_err), %o4 2846 ldn [THREAD_REG + T_LOFAULT], %o5 2847 or %o4, %lo(.little_err), %o4 2848 membar #Sync ! sync error barrier 2849 stn %o4, [THREAD_REG + T_LOFAULT] 2850 2851 subcc %g0, %o2, %o3 2852 add %o0, %o2, %o0 2853 bz,pn %ncc, 2f ! check for zero bytes 2854 sub %o2, 1, %o4 2855 add %o0, %o4, %o0 ! start w/last byte 2856 add %o1, %o2, %o1 2857 ldub [%o0+%o3], %o4 2858 28591: stba %o4, [%o1+%o3]ASI_AIUSL 2860 inccc %o3 2861 sub %o0, 2, %o0 ! get next byte 2862 bcc,a,pt %ncc, 1b 2863 ldub [%o0+%o3], %o4 2864 28652: membar #Sync ! sync error barrier 2866 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2867 retl 2868 mov %g0, %o0 ! return (0) 2869 SET_SIZE(xcopyout_little) 2870 2871#endif /* lint */ 2872 2873/* 2874 * Copy user data to kernel space (copyin/xcopyin/xcopyin_little) 2875 */ 2876 2877#if defined(lint) 2878 2879/*ARGSUSED*/ 2880int 2881copyin(const void *uaddr, void *kaddr, size_t count) 2882{ return (0); } 2883 2884#else /* lint */ 2885 2886 ENTRY(copyin) 2887 sethi %hi(.copyin_err), REAL_LOFAULT 2888 or REAL_LOFAULT, %lo(.copyin_err), REAL_LOFAULT 2889 2890.do_copyin: 2891 ! 2892 ! Check the length and bail if zero. 2893 ! 2894 tst %o2 2895 bnz,pt %ncc, 1f 2896 nop 2897 retl 2898 clr %o0 28991: 2900 sethi %hi(copyio_fault), %o4 2901 or %o4, %lo(copyio_fault), %o4 2902 sethi %hi(copyio_fault_nowindow), %o3 2903 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT 2904 or %o3, %lo(copyio_fault_nowindow), %o3 2905 membar #Sync 2906 stn %o3, [THREAD_REG + T_LOFAULT] 2907 2908 mov %o0, SAVE_SRC 2909 mov %o1, SAVE_DST 2910 mov %o2, SAVE_COUNT 2911 2912 ! 2913 ! Check to see if we're more than SMALL_LIMIT. 2914 ! 2915 subcc %o2, SMALL_LIMIT, %o3 2916 bgu,a,pt %ncc, .dci_ns 2917 or %o0, %o1, %o3 2918 ! 2919 ! What was previously ".small_copyin" 2920 ! 2921.dcibcp: 2922 sub %g0, %o2, %o3 ! setup for copy loop 2923 add %o0, %o2, %o0 2924 add %o1, %o2, %o1 2925 ba,pt %ncc, .dcicl 2926 lduba [%o0 + %o3]ASI_USER, %o4 2927 ! 2928 ! %o0 and %o1 point at the end and remain pointing at the end 2929 ! of their buffers. We pull things out by adding %o3 (which is 2930 ! the negation of the length) to the buffer end which gives us 2931 ! the curent location in the buffers. By incrementing %o3 we walk 2932 ! through both buffers without having to bump each buffer's 2933 ! pointer. A very fast 4 instruction loop. 2934 ! 2935 .align 16 2936.dcicl: 2937 stb %o4, [%o1 + %o3] 2938 inccc %o3 2939 bl,a,pt %ncc, .dcicl 2940 lduba [%o0 + %o3]ASI_USER, %o4 2941 ! 2942 ! We're done. Go home. 2943 ! 2944 membar #Sync 2945 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 2946 retl 2947 clr %o0 2948 ! 2949 ! Try aligned copies from here. 2950 ! 2951.dci_ns: 2952 ! 2953 ! See if we're single byte aligned. If we are, check the 2954 ! limit for single byte copies. If we're smaller, or equal, 2955 ! bounce to the byte for byte copy loop. Otherwise do it in 2956 ! HW (if enabled). 2957 ! 2958 btst 1, %o3 2959 bz,a,pt %icc, .dcih8 2960 btst 7, %o3 2961 ! 2962 ! We're single byte aligned. 2963 ! 2964 sethi %hi(hw_copy_limit_1), %o3 2965 ld [%o3 + %lo(hw_copy_limit_1)], %o3 2966 ! 2967 ! Is HW copy on? If not do everything byte for byte. 2968 ! 2969 tst %o3 2970 bz,pn %icc, .dcibcp 2971 subcc %o3, %o2, %o3 2972 ! 2973 ! Are we bigger than the HW limit? If not 2974 ! go to byte for byte. 2975 ! 2976 bge,pt %ncc, .dcibcp 2977 nop 2978 ! 2979 ! We're big enough and copy is on. Do it with HW. 2980 ! 2981 ba,pt %ncc, .big_copyin 2982 nop 2983.dcih8: 2984 ! 2985 ! 8 byte aligned? 2986 ! 2987 bnz,a %ncc, .dcih4 2988 btst 3, %o3 2989 ! 2990 ! We're eight byte aligned. 2991 ! 2992 sethi %hi(hw_copy_limit_8), %o3 2993 ld [%o3 + %lo(hw_copy_limit_8)], %o3 2994 ! 2995 ! Is HW assist on? If not, do it with the aligned copy. 2996 ! 2997 tst %o3 2998 bz,pn %icc, .dcis8 2999 subcc %o3, %o2, %o3 3000 bge %ncc, .dcis8 3001 nop 3002 ba,pt %ncc, .big_copyin 3003 nop 3004.dcis8: 3005 ! 3006 ! Housekeeping for copy loops. Uses same idea as in the byte for 3007 ! byte copy loop above. 3008 ! 3009 add %o0, %o2, %o0 3010 add %o1, %o2, %o1 3011 sub %g0, %o2, %o3 3012 ba,pt %ncc, .didebc 3013 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy 3014 ! 3015 ! 4 byte aligned? 3016 ! 3017.dcih4: 3018 bnz %ncc, .dcih2 3019 sethi %hi(hw_copy_limit_4), %o3 3020 ld [%o3 + %lo(hw_copy_limit_4)], %o3 3021 ! 3022 ! Is HW assist on? If not, do it with the aligned copy. 3023 ! 3024 tst %o3 3025 bz,pn %icc, .dcis4 3026 subcc %o3, %o2, %o3 3027 ! 3028 ! We're negative if our size is less than or equal to hw_copy_limit_4. 3029 ! 3030 bge %ncc, .dcis4 3031 nop 3032 ba,pt %ncc, .big_copyin 3033 nop 3034.dcis4: 3035 ! 3036 ! Housekeeping for copy loops. Uses same idea as in the byte 3037 ! for byte copy loop above. 3038 ! 3039 add %o0, %o2, %o0 3040 add %o1, %o2, %o1 3041 sub %g0, %o2, %o3 3042 ba,pt %ncc, .didfbc 3043 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy 3044.dcih2: 3045 ! 3046 ! We're two byte aligned. Check for "smallness" 3047 ! done in delay at .dcih4 3048 ! 3049 bleu,pt %ncc, .dcis2 3050 sethi %hi(hw_copy_limit_2), %o3 3051 ld [%o3 + %lo(hw_copy_limit_2)], %o3 3052 ! 3053 ! Is HW assist on? If not, do it with the aligned copy. 3054 ! 3055 tst %o3 3056 bz,pn %icc, .dcis2 3057 subcc %o3, %o2, %o3 3058 ! 3059 ! Are we larger than the HW limit? 3060 ! 3061 bge %ncc, .dcis2 3062 nop 3063 ! 3064 ! HW assist is on and we're large enough to use it. 3065 ! 3066 ba,pt %ncc, .big_copyin 3067 nop 3068 ! 3069 ! Housekeeping for copy loops. Uses same idea as in the byte 3070 ! for byte copy loop above. 3071 ! 3072.dcis2: 3073 add %o0, %o2, %o0 3074 add %o1, %o2, %o1 3075 sub %g0, %o2, %o3 3076 ba,pt %ncc, .didtbc 3077 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy 3078 ! 3079.small_copyin: 3080 ! 3081 ! Why are we doing this AGAIN? There are certain conditions in 3082 ! big copyin that will cause us to forgo the HW assisted copys 3083 ! and bounce back to a non-hw assisted copy. This dispatches 3084 ! those copies. Note that we branch around this in the main line 3085 ! code. 3086 ! 3087 ! We make no check for limits or HW enablement here. We've 3088 ! already been told that we're a poster child so just go off 3089 ! and do it. 3090 ! 3091 or %o0, %o1, %o3 3092 btst 1, %o3 3093 bnz %icc, .dcibcp ! Most likely 3094 btst 7, %o3 3095 bz %icc, .dcis8 3096 btst 3, %o3 3097 bz %icc, .dcis4 3098 nop 3099 ba,pt %ncc, .dcis2 3100 nop 3101 ! 3102 ! Eight byte aligned copies. A steal from the original .small_copyin 3103 ! with modifications. %o2 is number of 8 byte chunks to copy. When 3104 ! done, we examine %o3. If this is < 0, we have 1 - 7 bytes more 3105 ! to copy. 3106 ! 3107 .align 32 3108.didebc: 3109 ldxa [%o0 + %o3]ASI_USER, %o4 3110 deccc %o2 3111 stx %o4, [%o1 + %o3] 3112 bg,pt %ncc, .didebc 3113 addcc %o3, 8, %o3 3114 ! 3115 ! End of copy loop. Most 8 byte aligned copies end here. 3116 ! 3117 bz,pt %ncc, .dcifh 3118 nop 3119 ! 3120 ! Something is left. Do it byte for byte. 3121 ! 3122 ba,pt %ncc, .dcicl 3123 lduba [%o0 + %o3]ASI_USER, %o4 3124 ! 3125 ! 4 byte copy loop. %o2 is number of 4 byte chunks to copy. 3126 ! 3127 .align 32 3128.didfbc: 3129 lduwa [%o0 + %o3]ASI_USER, %o4 3130 deccc %o2 3131 st %o4, [%o1 + %o3] 3132 bg,pt %ncc, .didfbc 3133 addcc %o3, 4, %o3 3134 ! 3135 ! End of copy loop. Most 4 byte aligned copies end here. 3136 ! 3137 bz,pt %ncc, .dcifh 3138 nop 3139 ! 3140 ! Something is left. Do it byte for byte. 3141 ! 3142 ba,pt %ncc, .dcicl 3143 lduba [%o0 + %o3]ASI_USER, %o4 3144 ! 3145 ! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to 3146 ! copy. 3147 ! 3148 .align 32 3149.didtbc: 3150 lduha [%o0 + %o3]ASI_USER, %o4 3151 deccc %o2 3152 sth %o4, [%o1 + %o3] 3153 bg,pt %ncc, .didtbc 3154 addcc %o3, 2, %o3 3155 ! 3156 ! End of copy loop. Most 2 byte aligned copies end here. 3157 ! 3158 bz,pt %ncc, .dcifh 3159 nop 3160 ! 3161 ! Deal with the last byte 3162 ! 3163 lduba [%o0 + %o3]ASI_USER, %o4 3164 stb %o4, [%o1 + %o3] 3165.dcifh: 3166 membar #Sync 3167 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 3168 retl 3169 clr %o0 3170 3171.big_copyin: 3172 ! We're going off to do a block copy. 3173 ! Switch fault hendlers and grab a window. We 3174 ! don't do a membar #Sync since we've done only 3175 ! kernel data to this point. 3176 stn %o4, [THREAD_REG + T_LOFAULT] 3177 3178 ! Copy in that reach here are larger than 256 bytes. The 3179 ! hw_copy_limit_1 is set to 256. Never set this limit less 3180 ! 128 bytes. 3181#if !defined(NIAGARA_IMPL) 3182 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 3183 3184 rd %fprs, %o2 ! check for unused fp 3185 st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save %fprs 3186 btst FPRS_FEF, %o2 3187 bz,a,pt %icc, .do_blockcopyin 3188 wr %g0, FPRS_FEF, %fprs 3189 3190 ! save in-use fpregs on stack 3191 BST_FP_TOSTACK(%o2) 3192#else /* NIAGARA_IMPL */ 3193 save %sp, -SA(MINFRAME), %sp 3194#endif /* NIAGARA_IMPL */ 3195 3196.do_blockcopyin: 3197 3198#if !defined(NIAGARA_IMPL) 3199 rd %gsr, %o2 3200 stx %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr 3201 ! set the lower bit saved t_lofault to indicate that we need 3202 ! clear %fprs register on the way out 3203 or SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 3204#endif /* NIAGARA_IMPL */ 3205 3206 ! Swap src/dst since the code below is memcpy code 3207 ! and memcpy/bcopy have different calling sequences 3208 mov %i1, %i5 3209 mov %i0, %i1 3210 mov %i5, %i0 3211 3212 ! Block (64 bytes) align the destination. 3213 andcc %i0, 0x3f, %i3 ! is dst block aligned 3214 bz %ncc, copyin_blalign ! dst already block aligned 3215 sub %i3, 0x40, %i3 3216 neg %i3 ! bytes till dst 64 bytes aligned 3217 sub %i2, %i3, %i2 ! update i2 with new count 3218 3219 ! Based on source and destination alignment do 3220 ! either 8 bytes, 4 bytes, 2 bytes or byte copy. 3221 3222 ! Is dst & src 8B aligned 3223 or %i0, %i1, %o2 3224 andcc %o2, 0x7, %g0 3225 bz %ncc, .ci_alewdcp 3226 nop 3227 3228 ! Is dst & src 4B aligned 3229 andcc %o2, 0x3, %g0 3230 bz %ncc, .ci_alwdcp 3231 nop 3232 3233 ! Is dst & src 2B aligned 3234 andcc %o2, 0x1, %g0 3235 bz %ncc, .ci_alhlfwdcp 3236 nop 3237 3238 ! 1B aligned 32391: lduba [%i1]ASI_USER, %o2 3240 stb %o2, [%i0] 3241 inc %i1 3242 deccc %i3 3243 bgu,pt %ncc, 1b 3244 inc %i0 3245 3246 ba copyin_blalign 3247 nop 3248 3249 ! dst & src 4B aligned 3250.ci_alwdcp: 3251 lda [%i1]ASI_USER, %o2 3252 st %o2, [%i0] 3253 add %i1, 0x4, %i1 3254 subcc %i3, 0x4, %i3 3255 bgu,pt %ncc, .ci_alwdcp 3256 add %i0, 0x4, %i0 3257 3258 ba copyin_blalign 3259 nop 3260 3261 ! dst & src 2B aligned 3262.ci_alhlfwdcp: 3263 lduha [%i1]ASI_USER, %o2 3264 stuh %o2, [%i0] 3265 add %i1, 0x2, %i1 3266 subcc %i3, 0x2, %i3 3267 bgu,pt %ncc, .ci_alhlfwdcp 3268 add %i0, 0x2, %i0 3269 3270 ba copyin_blalign 3271 nop 3272 3273 ! dst & src 8B aligned 3274.ci_alewdcp: 3275 ldxa [%i1]ASI_USER, %o2 3276 stx %o2, [%i0] 3277 add %i1, 0x8, %i1 3278 subcc %i3, 0x8, %i3 3279 bgu,pt %ncc, .ci_alewdcp 3280 add %i0, 0x8, %i0 3281 3282copyin_blalign: 3283 andn %i2, 0x3f, %i3 ! %i3 count is multiple of block size 3284 sub %i2, %i3, %i2 ! Residue bytes in %i2 3285 3286#if !defined(NIAGARA_IMPL) 3287 mov ASI_USER, %asi 3288 3289 andn %i1, 0x3f, %l0 ! %l0 has block aligned src address 3290 prefetcha [%l0+0x0]%asi, #one_read 3291 andcc %i1, 0x3f, %g0 ! is src 64B aligned 3292 bz,pn %ncc, .ci_blkcpy 3293 nop 3294 3295 ! handle misaligned source cases 3296 alignaddr %i1, %g0, %g0 ! generate %gsr 3297 3298 srl %i1, 0x3, %l1 ! src add bits 3, 4, 5 are now least 3299 ! significant in %l1 3300 andcc %l1, 0x7, %l2 ! mask everything except bits 1, 2, 3 3301 add %i1, %i3, %i1 3302 3303 ! switch statement to get to right 8 byte block within 3304 ! 64 byte block 3305 cmp %l2, 0x4 3306 bgeu,a ci_hlf 3307 cmp %l2, 0x6 3308 cmp %l2, 0x2 3309 bgeu,a ci_sqtr 3310 nop 3311 cmp %l2, 0x1 3312 be,a ci_off15 3313 nop 3314 ba ci_off7 3315 nop 3316ci_sqtr: 3317 be,a ci_off23 3318 nop 3319 ba,a ci_off31 3320 nop 3321 3322ci_hlf: 3323 bgeu,a ci_fqtr 3324 nop 3325 cmp %l2, 0x5 3326 be,a ci_off47 3327 nop 3328 ba ci_off39 3329 nop 3330ci_fqtr: 3331 be,a ci_off55 3332 nop 3333 3334 ldda [%l0+0x38]%asi, %d14 3335 prefetcha [%l0+0x40]%asi, #one_read 3336 prefetcha [%l0+0x80]%asi, #one_read 33377: 3338 add %l0, 0x40, %l0 3339 stxa %g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line 3340 3341 ldda [%l0]ASI_BLK_AIUS, %d16 3342 ALIGN_OFF_56_63 3343 fmovd %d30, %d14 3344 3345 stda %d48, [%i0]ASI_BLK_P 3346 subcc %i3, 0x40, %i3 3347 add %i0, 0x40, %i0 3348 bgu,pt %ncc, 7b 3349 prefetcha [%l0+0x80]%asi, #one_read 3350 ba .ci_blkdone 3351 membar #Sync 3352 3353ci_off7: 3354 ldda [%l0]ASI_BLK_AIUS, %d0 3355 prefetcha [%l0+0x40]%asi, #one_read 3356 prefetcha [%l0+0x80]%asi, #one_read 33570: 3358 add %l0, 0x40, %l0 3359 stxa %g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line 3360 3361 ldda [%l0]ASI_BLK_AIUS, %d16 3362 ALIGN_OFF_1_7 3363 fmovd %d16, %d0 3364 fmovd %d18, %d2 3365 fmovd %d20, %d4 3366 fmovd %d22, %d6 3367 fmovd %d24, %d8 3368 fmovd %d26, %d10 3369 fmovd %d28, %d12 3370 fmovd %d30, %d14 3371 3372 stda %d48, [%i0]ASI_BLK_P 3373 subcc %i3, 0x40, %i3 3374 add %i0, 0x40, %i0 3375 bgu,pt %ncc, 0b 3376 prefetcha [%l0+0x80]%asi, #one_read 3377 ba .ci_blkdone 3378 membar #Sync 3379 3380ci_off15: 3381 ldda [%l0+0x8]%asi, %d2 3382 ldda [%l0+0x10]%asi, %d4 3383 ldda [%l0+0x18]%asi, %d6 3384 ldda [%l0+0x20]%asi, %d8 3385 ldda [%l0+0x28]%asi, %d10 3386 ldda [%l0+0x30]%asi, %d12 3387 ldda [%l0+0x38]%asi, %d14 3388 prefetcha [%l0+0x40]%asi, #one_read 3389 prefetcha [%l0+0x80]%asi, #one_read 33901: 3391 add %l0, 0x40, %l0 3392 stxa %g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line 3393 3394 ldda [%l0]ASI_BLK_AIUS, %d16 3395 ALIGN_OFF_8_15 3396 fmovd %d18, %d2 3397 fmovd %d20, %d4 3398 fmovd %d22, %d6 3399 fmovd %d24, %d8 3400 fmovd %d26, %d10 3401 fmovd %d28, %d12 3402 fmovd %d30, %d14 3403 3404 stda %d48, [%i0]ASI_BLK_P 3405 subcc %i3, 0x40, %i3 3406 add %i0, 0x40, %i0 3407 bgu,pt %ncc, 1b 3408 prefetcha [%l0+0x80]%asi, #one_read 3409 ba .ci_blkdone 3410 membar #Sync 3411 3412ci_off23: 3413 ldda [%l0+0x10]%asi, %d4 3414 ldda [%l0+0x18]%asi, %d6 3415 ldda [%l0+0x20]%asi, %d8 3416 ldda [%l0+0x28]%asi, %d10 3417 ldda [%l0+0x30]%asi, %d12 3418 ldda [%l0+0x38]%asi, %d14 3419 prefetcha [%l0+0x40]%asi, #one_read 3420 prefetcha [%l0+0x80]%asi, #one_read 34212: 3422 add %l0, 0x40, %l0 3423 stxa %g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line 3424 3425 ldda [%l0]ASI_BLK_AIUS, %d16 3426 ALIGN_OFF_16_23 3427 fmovd %d20, %d4 3428 fmovd %d22, %d6 3429 fmovd %d24, %d8 3430 fmovd %d26, %d10 3431 fmovd %d28, %d12 3432 fmovd %d30, %d14 3433 3434 stda %d48, [%i0]ASI_BLK_P 3435 subcc %i3, 0x40, %i3 3436 add %i0, 0x40, %i0 3437 bgu,pt %ncc, 2b 3438 prefetcha [%l0+0x80]%asi, #one_read 3439 ba .ci_blkdone 3440 membar #Sync 3441 3442ci_off31: 3443 ldda [%l0+0x18]%asi, %d6 3444 ldda [%l0+0x20]%asi, %d8 3445 ldda [%l0+0x28]%asi, %d10 3446 ldda [%l0+0x30]%asi, %d12 3447 ldda [%l0+0x38]%asi, %d14 3448 prefetcha [%l0+0x40]%asi, #one_read 3449 prefetcha [%l0+0x80]%asi, #one_read 34503: 3451 add %l0, 0x40, %l0 3452 stxa %g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line 3453 3454 ldda [%l0]ASI_BLK_AIUS, %d16 3455 ALIGN_OFF_24_31 3456 fmovd %d22, %d6 3457 fmovd %d24, %d8 3458 fmovd %d26, %d10 3459 fmovd %d28, %d12 3460 fmovd %d30, %d14 3461 3462 stda %d48, [%i0]ASI_BLK_P 3463 subcc %i3, 0x40, %i3 3464 add %i0, 0x40, %i0 3465 bgu,pt %ncc, 3b 3466 prefetcha [%l0+0x80]%asi, #one_read 3467 ba .ci_blkdone 3468 membar #Sync 3469 3470ci_off39: 3471 ldda [%l0+0x20]%asi, %d8 3472 ldda [%l0+0x28]%asi, %d10 3473 ldda [%l0+0x30]%asi, %d12 3474 ldda [%l0+0x38]%asi, %d14 3475 prefetcha [%l0+0x40]%asi, #one_read 3476 prefetcha [%l0+0x80]%asi, #one_read 34774: 3478 add %l0, 0x40, %l0 3479 stxa %g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line 3480 3481 ldda [%l0]ASI_BLK_AIUS, %d16 3482 ALIGN_OFF_32_39 3483 fmovd %d24, %d8 3484 fmovd %d26, %d10 3485 fmovd %d28, %d12 3486 fmovd %d30, %d14 3487 3488 stda %d48, [%i0]ASI_BLK_P 3489 subcc %i3, 0x40, %i3 3490 add %i0, 0x40, %i0 3491 bgu,pt %ncc, 4b 3492 prefetcha [%l0+0x80]%asi, #one_read 3493 ba .ci_blkdone 3494 membar #Sync 3495 3496ci_off47: 3497 ldda [%l0+0x28]%asi, %d10 3498 ldda [%l0+0x30]%asi, %d12 3499 ldda [%l0+0x38]%asi, %d14 3500 prefetcha [%l0+0x40]%asi, #one_read 3501 prefetcha [%l0+0x80]%asi, #one_read 35025: 3503 add %l0, 0x40, %l0 3504 stxa %g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line 3505 3506 ldda [%l0]ASI_BLK_AIUS, %d16 3507 ALIGN_OFF_40_47 3508 fmovd %d26, %d10 3509 fmovd %d28, %d12 3510 fmovd %d30, %d14 3511 3512 stda %d48, [%i0]ASI_BLK_P 3513 subcc %i3, 0x40, %i3 3514 add %i0, 0x40, %i0 3515 bgu,pt %ncc, 5b 3516 prefetcha [%l0+0x80]%asi, #one_read 3517 ba .ci_blkdone 3518 membar #Sync 3519 3520ci_off55: 3521 ldda [%l0+0x30]%asi, %d12 3522 ldda [%l0+0x38]%asi, %d14 3523 prefetcha [%l0+0x40]%asi, #one_read 3524 prefetcha [%l0+0x80]%asi, #one_read 35256: 3526 add %l0, 0x40, %l0 3527 stxa %g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line 3528 3529 ldda [%l0]ASI_BLK_AIUS, %d16 3530 ALIGN_OFF_48_55 3531 fmovd %d28, %d12 3532 fmovd %d30, %d14 3533 3534 stda %d48, [%i0]ASI_BLK_P 3535 subcc %i3, 0x40, %i3 3536 add %i0, 0x40, %i0 3537 bgu,pt %ncc, 6b 3538 prefetcha [%l0+0x80]%asi, #one_read 3539 ba .ci_blkdone 3540 membar #Sync 3541 3542.ci_blkcpy: 3543 prefetcha [%i1+0x40]%asi, #one_read 3544 prefetcha [%i1+0x80]%asi, #one_read 35458: 3546 stxa %g0, [%i0]ASI_BLK_INIT_ST_QUAD_LDD_P ! initialize the cache line 3547 ldda [%i1]ASI_BLK_AIUS, %d0 3548 stda %d0, [%i0]ASI_BLK_P 3549 3550 add %i1, 0x40, %i1 3551 subcc %i3, 0x40, %i3 3552 add %i0, 0x40, %i0 3553 bgu,pt %ncc, 8b 3554 prefetcha [%i1+0x80]%asi, #one_read 3555 membar #Sync 3556 3557.ci_blkdone: 3558#else /* NIAGARA_IMPL */ 3559 mov ASI_BLK_INIT_ST_QUAD_LDD_P, %asi 3560 3561 andcc %i1, 0xf, %o2 ! is src quadword aligned 3562 bz,pn %xcc, .ci_blkcpy ! src offset in %o2 (last 4-bits) 3563 nop 3564 cmp %o2, 0x8 3565 bg .ci_upper_double 3566 nop 3567 bl .ci_lower_double 3568 nop 3569 3570 ! Falls through when source offset is equal to 8 i.e. 3571 ! source is double word aligned. 3572 ! In this case no shift/merge of data is required 3573 3574 sub %i1, %o2, %i1 ! align the src at 16 bytes. 3575 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 3576 prefetcha [%l0]ASI_USER, #one_read 3577 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 3578 add %l0, 0x40, %l0 3579.ci_loop0: 3580 add %i1, 0x10, %i1 3581 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 3582 3583 prefetcha [%l0]ASI_USER, #one_read 3584 3585 stxa %l3, [%i0+0x0]%asi 3586 stxa %l4, [%i0+0x8]%asi 3587 3588 add %i1, 0x10, %i1 3589 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 3590 3591 stxa %l5, [%i0+0x10]%asi 3592 stxa %l2, [%i0+0x18]%asi 3593 3594 add %i1, 0x10, %i1 3595 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 3596 3597 stxa %l3, [%i0+0x20]%asi 3598 stxa %l4, [%i0+0x28]%asi 3599 3600 add %i1, 0x10, %i1 3601 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 3602 3603 stxa %l5, [%i0+0x30]%asi 3604 stxa %l2, [%i0+0x38]%asi 3605 3606 add %l0, 0x40, %l0 3607 subcc %i3, 0x40, %i3 3608 bgu,pt %xcc, .ci_loop0 3609 add %i0, 0x40, %i0 3610 ba .ci_blkdone 3611 add %i1, %o2, %i1 ! increment the source by src offset 3612 ! the src offset was stored in %o2 3613 3614.ci_lower_double: 3615 3616 sub %i1, %o2, %i1 ! align the src at 16 bytes. 3617 sll %o2, 3, %o0 ! %o0 left shift 3618 mov 0x40, %o1 3619 sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 3620 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 3621 prefetcha [%l0]ASI_USER, #one_read 3622 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 ! partial data in %l2 3623 ! and %l3 has complete 3624 ! data 3625 add %l0, 0x40, %l0 3626.ci_loop1: 3627 add %i1, 0x10, %i1 3628 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 ! %l4 has partial data 3629 ! for this read. 3630 ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) ! merge %l2, %l3 and %l4 3631 ! into %l2 and %l3 3632 3633 prefetcha [%l0]ASI_USER, #one_read 3634 3635 stxa %l2, [%i0+0x0]%asi 3636 stxa %l3, [%i0+0x8]%asi 3637 3638 add %i1, 0x10, %i1 3639 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 3640 ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) ! merge %l2 with %l5 and 3641 ! %l4 from previous read 3642 ! into %l4 and %l5 3643 stxa %l4, [%i0+0x10]%asi 3644 stxa %l5, [%i0+0x18]%asi 3645 3646 ! Repeat the same for next 32 bytes. 3647 3648 add %i1, 0x10, %i1 3649 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 3650 ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) 3651 3652 stxa %l2, [%i0+0x20]%asi 3653 stxa %l3, [%i0+0x28]%asi 3654 3655 add %i1, 0x10, %i1 3656 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 3657 ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) 3658 3659 stxa %l4, [%i0+0x30]%asi 3660 stxa %l5, [%i0+0x38]%asi 3661 3662 add %l0, 0x40, %l0 3663 subcc %i3, 0x40, %i3 3664 bgu,pt %xcc, .ci_loop1 3665 add %i0, 0x40, %i0 3666 ba .ci_blkdone 3667 add %i1, %o2, %i1 ! increment the source by src offset 3668 ! the src offset was stored in %o2 3669 3670.ci_upper_double: 3671 3672 sub %i1, %o2, %i1 ! align the src at 16 bytes. 3673 sub %o2, 0x8, %o0 3674 sll %o0, 3, %o0 ! %o0 left shift 3675 mov 0x40, %o1 3676 sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 3677 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 3678 prefetcha [%l0]ASI_USER, #one_read 3679 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 ! partial data in %l3 3680 ! for this read and 3681 ! no data in %l2 3682 add %l0, 0x40, %l0 3683.ci_loop2: 3684 add %i1, 0x10, %i1 3685 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 ! %l4 has complete data 3686 ! and %l5 has partial 3687 ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) ! merge %l3, %l4 and %l5 3688 ! into %l3 and %l4 3689 prefetcha [%l0]ASI_USER, #one_read 3690 3691 stxa %l3, [%i0+0x0]%asi 3692 stxa %l4, [%i0+0x8]%asi 3693 3694 add %i1, 0x10, %i1 3695 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 3696 ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) ! merge %l2 and %l3 with 3697 ! %l5 from previous read 3698 ! into %l5 and %l2 3699 3700 stxa %l5, [%i0+0x10]%asi 3701 stxa %l2, [%i0+0x18]%asi 3702 3703 ! Repeat the same for next 32 bytes. 3704 3705 add %i1, 0x10, %i1 3706 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 3707 ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) 3708 3709 stxa %l3, [%i0+0x20]%asi 3710 stxa %l4, [%i0+0x28]%asi 3711 3712 add %i1, 0x10, %i1 3713 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 3714 ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) 3715 3716 stxa %l5, [%i0+0x30]%asi 3717 stxa %l2, [%i0+0x38]%asi 3718 3719 add %l0, 0x40, %l0 3720 subcc %i3, 0x40, %i3 3721 bgu,pt %xcc, .ci_loop2 3722 add %i0, 0x40, %i0 3723 ba .ci_blkdone 3724 add %i1, %o2, %i1 ! increment the source by src offset 3725 ! the src offset was stored in %o2 3726 3727 3728 ! Do fast copy using ASI_BLK_INIT_ST_QUAD_LDD_P 3729.ci_blkcpy: 3730 3731 andn %i1, 0x3f, %o0 ! %o0 has block aligned source 3732 prefetcha [%o0]ASI_USER, #one_read 3733 add %o0, 0x40, %o0 37341: 3735 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l0 3736 add %i1, 0x10, %i1 3737 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 3738 add %i1, 0x10, %i1 3739 3740 prefetcha [%o0]ASI_USER, #one_read 3741 3742 stxa %l0, [%i0+0x0]%asi 3743 3744 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 3745 add %i1, 0x10, %i1 3746 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l6 3747 add %i1, 0x10, %i1 3748 3749 stxa %l1, [%i0+0x8]%asi 3750 stxa %l2, [%i0+0x10]%asi 3751 stxa %l3, [%i0+0x18]%asi 3752 stxa %l4, [%i0+0x20]%asi 3753 stxa %l5, [%i0+0x28]%asi 3754 stxa %l6, [%i0+0x30]%asi 3755 stxa %l7, [%i0+0x38]%asi 3756 3757 add %o0, 0x40, %o0 3758 subcc %i3, 0x40, %i3 3759 bgu,pt %xcc, 1b 3760 add %i0, 0x40, %i0 3761 3762.ci_blkdone: 3763 membar #Sync 3764#endif /* NIAGARA_IMPL */ 3765 3766 brz,pt %i2, .copyin_exit 3767 nop 3768 3769 ! Handle trailing bytes 3770 cmp %i2, 0x8 3771 blu,pt %ncc, .ci_residue 3772 nop 3773 3774 ! Can we do some 8B ops 3775 or %i1, %i0, %o2 3776 andcc %o2, 0x7, %g0 3777 bnz %ncc, .ci_last4 3778 nop 3779 3780 ! Do 8byte ops as long as possible 3781.ci_last8: 3782 ldxa [%i1]ASI_USER, %o2 3783 stx %o2, [%i0] 3784 add %i1, 0x8, %i1 3785 sub %i2, 0x8, %i2 3786 cmp %i2, 0x8 3787 bgu,pt %ncc, .ci_last8 3788 add %i0, 0x8, %i0 3789 3790 brz,pt %i2, .copyin_exit 3791 nop 3792 3793 ba .ci_residue 3794 nop 3795 3796.ci_last4: 3797 ! Can we do 4B ops 3798 andcc %o2, 0x3, %g0 3799 bnz %ncc, .ci_last2 3800 nop 38011: 3802 lda [%i1]ASI_USER, %o2 3803 st %o2, [%i0] 3804 add %i1, 0x4, %i1 3805 sub %i2, 0x4, %i2 3806 cmp %i2, 0x4 3807 bgu,pt %ncc, 1b 3808 add %i0, 0x4, %i0 3809 3810 brz,pt %i2, .copyin_exit 3811 nop 3812 3813 ba .ci_residue 3814 nop 3815 3816.ci_last2: 3817 ! Can we do 2B ops 3818 andcc %o2, 0x1, %g0 3819 bnz %ncc, .ci_residue 3820 nop 3821 38221: 3823 lduha [%i1]ASI_USER, %o2 3824 stuh %o2, [%i0] 3825 add %i1, 0x2, %i1 3826 sub %i2, 0x2, %i2 3827 cmp %i2, 0x2 3828 bgu,pt %ncc, 1b 3829 add %i0, 0x2, %i0 3830 3831 brz,pt %i2, .copyin_exit 3832 nop 3833 3834 ! Copy the residue as byte copy 3835.ci_residue: 3836 lduba [%i1]ASI_USER, %i4 3837 stb %i4, [%i0] 3838 inc %i1 3839 deccc %i2 3840 bgu,pt %xcc, .ci_residue 3841 inc %i0 3842 3843.copyin_exit: 3844#if !defined(NIAGARA_IMPL) 3845 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 3846 wr %o2, 0, %gsr ! restore gsr 3847 3848 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 3849 btst FPRS_FEF, %o3 3850 bz %icc, 4f 3851 nop 3852 3853 ! restore fpregs from stack 3854 BLD_FP_FROMSTACK(%o2) 3855 3856 ba,pt %ncc, 2f 3857 wr %o3, 0, %fprs ! restore fprs 3858 38594: 3860 FZERO ! zero all of the fpregs 3861 wr %o3, 0, %fprs ! restore fprs 3862 38632: 3864 membar #Sync ! sync error barrier 3865 andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT 3866#else /* NIAGARA_IMPL */ 3867 membar #Sync 3868#endif /* NIAGARA_IMPL */ 3869 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 3870 ret 3871 restore %g0, 0, %o0 3872.copyin_err: 3873 ldn [THREAD_REG + T_COPYOPS], %o4 3874 brz %o4, 2f 3875 nop 3876 ldn [%o4 + CP_COPYIN], %g2 3877 jmp %g2 3878 nop 38792: 3880 retl 3881 mov -1, %o0 3882 SET_SIZE(copyin) 3883 3884#endif /* lint */ 3885 3886#ifdef lint 3887 3888/*ARGSUSED*/ 3889int 3890xcopyin(const void *uaddr, void *kaddr, size_t count) 3891{ return (0); } 3892 3893#else /* lint */ 3894 3895 ENTRY(xcopyin) 3896 sethi %hi(.xcopyin_err), REAL_LOFAULT 3897 b .do_copyin 3898 or REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT 3899.xcopyin_err: 3900 ldn [THREAD_REG + T_COPYOPS], %o4 3901 brz %o4, 2f 3902 nop 3903 ldn [%o4 + CP_XCOPYIN], %g2 3904 jmp %g2 3905 nop 39062: 3907 retl 3908 mov %g1, %o0 3909 SET_SIZE(xcopyin) 3910 3911#endif /* lint */ 3912 3913#ifdef lint 3914 3915/*ARGSUSED*/ 3916int 3917xcopyin_little(const void *uaddr, void *kaddr, size_t count) 3918{ return (0); } 3919 3920#else /* lint */ 3921 3922 ENTRY(xcopyin_little) 3923 sethi %hi(.little_err), %o4 3924 ldn [THREAD_REG + T_LOFAULT], %o5 3925 or %o4, %lo(.little_err), %o4 3926 membar #Sync ! sync error barrier 3927 stn %o4, [THREAD_REG + T_LOFAULT] 3928 3929 subcc %g0, %o2, %o3 3930 add %o0, %o2, %o0 3931 bz,pn %ncc, 2f ! check for zero bytes 3932 sub %o2, 1, %o4 3933 add %o0, %o4, %o0 ! start w/last byte 3934 add %o1, %o2, %o1 3935 lduba [%o0+%o3]ASI_AIUSL, %o4 3936 39371: stb %o4, [%o1+%o3] 3938 inccc %o3 3939 sub %o0, 2, %o0 ! get next byte 3940 bcc,a,pt %ncc, 1b 3941 lduba [%o0+%o3]ASI_AIUSL, %o4 3942 39432: membar #Sync ! sync error barrier 3944 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 3945 retl 3946 mov %g0, %o0 ! return (0) 3947 3948.little_err: 3949 membar #Sync ! sync error barrier 3950 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 3951 retl 3952 mov %g1, %o0 3953 SET_SIZE(xcopyin_little) 3954 3955#endif /* lint */ 3956 3957 3958/* 3959 * Copy a block of storage - must not overlap (from + len <= to). 3960 * No fault handler installed (to be called under on_fault()) 3961 */ 3962#if defined(lint) 3963 3964/* ARGSUSED */ 3965void 3966copyin_noerr(const void *ufrom, void *kto, size_t count) 3967{} 3968 3969#else /* lint */ 3970 3971 ENTRY(copyin_noerr) 3972 sethi %hi(.copyio_noerr), REAL_LOFAULT 3973 b .do_copyin 3974 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 3975.copyio_noerr: 3976 jmp SAVED_LOFAULT 3977 nop 3978 SET_SIZE(copyin_noerr) 3979 3980#endif /* lint */ 3981 3982/* 3983 * Copy a block of storage - must not overlap (from + len <= to). 3984 * No fault handler installed (to be called under on_fault()) 3985 */ 3986 3987#if defined(lint) 3988 3989/* ARGSUSED */ 3990void 3991copyout_noerr(const void *kfrom, void *uto, size_t count) 3992{} 3993 3994#else /* lint */ 3995 3996 ENTRY(copyout_noerr) 3997 sethi %hi(.copyio_noerr), REAL_LOFAULT 3998 b .do_copyout 3999 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 4000 SET_SIZE(copyout_noerr) 4001 4002#endif /* lint */ 4003 4004#if defined(lint) 4005 4006int use_hw_bcopy = 1; 4007int use_hw_bzero = 1; 4008uint_t hw_copy_limit_1 = 0x100; 4009uint_t hw_copy_limit_2 = 0x200; 4010uint_t hw_copy_limit_4 = 0x400; 4011uint_t hw_copy_limit_8 = 0x400; 4012 4013#else /* !lint */ 4014 4015 .align 4 4016 DGDEF(use_hw_bcopy) 4017 .word 1 4018 DGDEF(use_hw_bzero) 4019 .word 1 4020 DGDEF(hw_copy_limit_1) 4021 .word 0x100 4022 DGDEF(hw_copy_limit_2) 4023 .word 0x200 4024 DGDEF(hw_copy_limit_4) 4025 .word 0x400 4026 DGDEF(hw_copy_limit_8) 4027 .word 0x400 4028 4029 .align 64 4030 .section ".text" 4031#endif /* !lint */ 4032 4033/* 4034 * hwblkclr - clears block-aligned, block-multiple-sized regions that are 4035 * longer than 256 bytes in length using Niagara's block stores/quad store. 4036 * If the criteria for using this routine are not met then it calls bzero 4037 * and returns 1. Otherwise 0 is returned indicating success. 4038 * Caller is responsible for ensuring use_hw_bzero is true and that 4039 * kpreempt_disable() has been called. 4040 */ 4041#ifdef lint 4042/*ARGSUSED*/ 4043int 4044hwblkclr(void *addr, size_t len) 4045{ 4046 return(0); 4047} 4048#else /* lint */ 4049 ! %i0 - start address 4050 ! %i1 - length of region (multiple of 64) 4051 4052 ENTRY(hwblkclr) 4053 save %sp, -SA(MINFRAME), %sp 4054 4055 ! Must be block-aligned 4056 andcc %i0, 0x3f, %g0 4057 bnz,pn %ncc, 1f 4058 nop 4059 4060 ! ... and must be 256 bytes or more 4061 cmp %i1, 0x100 4062 blu,pn %ncc, 1f 4063 nop 4064 4065 ! ... and length must be a multiple of 64 4066 andcc %i1, 0x3f, %g0 4067 bz,pn %ncc, .pz_doblock 4068 mov ASI_BLK_INIT_ST_QUAD_LDD_P, %asi 4069 40701: ! punt, call bzero but notify the caller that bzero was used 4071 mov %i0, %o0 4072 call bzero 4073 mov %i1, %o1 4074 ret 4075 restore %g0, 1, %o0 ! return (1) - did not use block operations 4076 4077 ! Already verified that there are at least 256 bytes to set 4078.pz_doblock: 4079 stxa %g0, [%i0+0x0]%asi 4080 stxa %g0, [%i0+0x40]%asi 4081 stxa %g0, [%i0+0x80]%asi 4082 stxa %g0, [%i0+0xc0]%asi 4083 4084 stxa %g0, [%i0+0x8]%asi 4085 stxa %g0, [%i0+0x10]%asi 4086 stxa %g0, [%i0+0x18]%asi 4087 stxa %g0, [%i0+0x20]%asi 4088 stxa %g0, [%i0+0x28]%asi 4089 stxa %g0, [%i0+0x30]%asi 4090 stxa %g0, [%i0+0x38]%asi 4091 4092 stxa %g0, [%i0+0x48]%asi 4093 stxa %g0, [%i0+0x50]%asi 4094 stxa %g0, [%i0+0x58]%asi 4095 stxa %g0, [%i0+0x60]%asi 4096 stxa %g0, [%i0+0x68]%asi 4097 stxa %g0, [%i0+0x70]%asi 4098 stxa %g0, [%i0+0x78]%asi 4099 4100 stxa %g0, [%i0+0x88]%asi 4101 stxa %g0, [%i0+0x90]%asi 4102 stxa %g0, [%i0+0x98]%asi 4103 stxa %g0, [%i0+0xa0]%asi 4104 stxa %g0, [%i0+0xa8]%asi 4105 stxa %g0, [%i0+0xb0]%asi 4106 stxa %g0, [%i0+0xb8]%asi 4107 4108 stxa %g0, [%i0+0xc8]%asi 4109 stxa %g0, [%i0+0xd0]%asi 4110 stxa %g0, [%i0+0xd8]%asi 4111 stxa %g0, [%i0+0xe0]%asi 4112 stxa %g0, [%i0+0xe8]%asi 4113 stxa %g0, [%i0+0xf0]%asi 4114 stxa %g0, [%i0+0xf8]%asi 4115 4116 sub %i1, 0x100, %i1 4117 cmp %i1, 0x100 4118 bgu,pt %ncc, .pz_doblock 4119 add %i0, 0x100, %i0 4120 41212: 4122 ! Check if more than 64 bytes to set 4123 cmp %i1,0x40 4124 blu %ncc, .pz_finish 4125 nop 4126 41273: 4128 stxa %g0, [%i0+0x0]%asi 4129 stxa %g0, [%i0+0x8]%asi 4130 stxa %g0, [%i0+0x10]%asi 4131 stxa %g0, [%i0+0x18]%asi 4132 stxa %g0, [%i0+0x20]%asi 4133 stxa %g0, [%i0+0x28]%asi 4134 stxa %g0, [%i0+0x30]%asi 4135 stxa %g0, [%i0+0x38]%asi 4136 4137 subcc %i1, 0x40, %i1 4138 bgu,pt %ncc, 3b 4139 add %i0, 0x40, %i0 4140 4141.pz_finish: 4142 membar #Sync 4143 ret 4144 restore %g0, 0, %o0 ! return (bzero or not) 4145 SET_SIZE(hwblkclr) 4146#endif /* lint */ 4147 4148#ifdef lint 4149/* Copy 32 bytes of data from src to dst using physical addresses */ 4150/*ARGSUSED*/ 4151void 4152hw_pa_bcopy32(uint64_t src, uint64_t dst) 4153{} 4154#else /*!lint */ 4155 4156 /* 4157 * Copy 32 bytes of data from src (%o0) to dst (%o1) 4158 * using physical addresses. 4159 */ 4160 ENTRY_NP(hw_pa_bcopy32) 4161 rdpr %pstate, %g1 4162 andn %g1, PSTATE_IE, %g2 4163 wrpr %g0, %g2, %pstate 4164 4165 ldxa [%o0]ASI_MEM, %o2 4166 add %o0, 8, %o0 4167 ldxa [%o0]ASI_MEM, %o3 4168 add %o0, 8, %o0 4169 ldxa [%o0]ASI_MEM, %o4 4170 add %o0, 8, %o0 4171 ldxa [%o0]ASI_MEM, %o5 4172 stxa %o2, [%o1]ASI_MEM 4173 add %o1, 8, %o1 4174 stxa %o3, [%o1]ASI_MEM 4175 add %o1, 8, %o1 4176 stxa %o4, [%o1]ASI_MEM 4177 add %o1, 8, %o1 4178 stxa %o5, [%o1]ASI_MEM 4179 4180 membar #Sync 4181 retl 4182 wrpr %g0, %g1, %pstate 4183 SET_SIZE(hw_pa_bcopy32) 4184#endif /* lint */ 4185 4186/* 4187 * Zero a block of storage. 4188 * 4189 * uzero is used by the kernel to zero a block in user address space. 4190 */ 4191 4192/* 4193 * Control flow of the bzero/kzero/uzero routine. 4194 * 4195 * For fewer than 7 bytes stores, bytes will be zeroed. 4196 * 4197 * For less than 15 bytes stores, align the address on 4 byte boundary. 4198 * Then store as many 4-byte chunks, followed by trailing bytes. 4199 * 4200 * For sizes greater than 15 bytes, align the address on 8 byte boundary. 4201 * if (count > 128) { 4202 * store as many 8-bytes chunks to block align the address 4203 * store using ASI_BLK_INIT_ST_QUAD_LDD_P (bzero/kzero) OR 4204 * store using ASI_BLK_INIT_QUAD_LDD_AIUS (uzero) 4205 * } 4206 * Store as many 8-byte chunks, followed by trailing bytes. 4207 */ 4208 4209#if defined(lint) 4210 4211/* ARGSUSED */ 4212int 4213kzero(void *addr, size_t count) 4214{ return(0); } 4215 4216/* ARGSUSED */ 4217void 4218uzero(void *addr, size_t count) 4219{} 4220 4221#else /* lint */ 4222 4223 ENTRY(uzero) 4224 ! 4225 ! Set a new lo_fault handler only if we came in with one 4226 ! already specified. 4227 ! 4228 wr %g0, ASI_USER, %asi 4229 ldn [THREAD_REG + T_LOFAULT], %o5 4230 tst %o5 4231 bz,pt %ncc, .do_zero 4232 sethi %hi(.zeroerr), %o2 4233 or %o2, %lo(.zeroerr), %o2 4234 membar #Sync 4235 ba,pt %ncc, .do_zero 4236 stn %o2, [THREAD_REG + T_LOFAULT] 4237 4238 ENTRY(kzero) 4239 ! 4240 ! Always set a lo_fault handler 4241 ! 4242 wr %g0, ASI_P, %asi 4243 ldn [THREAD_REG + T_LOFAULT], %o5 4244 sethi %hi(.zeroerr), %o2 4245 or %o5, LOFAULT_SET, %o5 4246 or %o2, %lo(.zeroerr), %o2 4247 membar #Sync 4248 ba,pt %ncc, .do_zero 4249 stn %o2, [THREAD_REG + T_LOFAULT] 4250 4251/* 4252 * We got here because of a fault during kzero or if 4253 * uzero or bzero was called with t_lofault non-zero. 4254 * Otherwise we've already run screaming from the room. 4255 * Errno value is in %g1. Note that we're here iff 4256 * we did set t_lofault. 4257 */ 4258.zeroerr: 4259 ! 4260 ! Undo asi register setting. Just set it to be the 4261 ! kernel default without checking. 4262 ! 4263 wr %g0, ASI_P, %asi 4264 4265 ! 4266 ! We did set t_lofault. It may well have been zero coming in. 4267 ! 42681: 4269 tst %o5 4270 membar #Sync 4271 bne,pn %ncc, 3f 4272 andncc %o5, LOFAULT_SET, %o5 42732: 4274 ! 4275 ! Old handler was zero. Just return the error. 4276 ! 4277 retl ! return 4278 mov %g1, %o0 ! error code from %g1 42793: 4280 ! 4281 ! We're here because %o5 was non-zero. It was non-zero 4282 ! because either LOFAULT_SET was present, a previous fault 4283 ! handler was present or both. In all cases we need to reset 4284 ! T_LOFAULT to the value of %o5 after clearing LOFAULT_SET 4285 ! before we either simply return the error or we invoke the 4286 ! previously specified handler. 4287 ! 4288 be %ncc, 2b 4289 stn %o5, [THREAD_REG + T_LOFAULT] 4290 jmp %o5 ! goto real handler 4291 nop 4292 SET_SIZE(kzero) 4293 SET_SIZE(uzero) 4294 4295#endif /* lint */ 4296 4297/* 4298 * Zero a block of storage. 4299 */ 4300 4301#if defined(lint) 4302 4303/* ARGSUSED */ 4304void 4305bzero(void *addr, size_t count) 4306{} 4307 4308#else /* lint */ 4309 4310 ENTRY(bzero) 4311 wr %g0, ASI_P, %asi 4312 4313 ldn [THREAD_REG + T_LOFAULT], %o5 ! save old vector 4314 tst %o5 4315 bz,pt %ncc, .do_zero 4316 sethi %hi(.zeroerr), %o2 4317 or %o2, %lo(.zeroerr), %o2 4318 membar #Sync ! sync error barrier 4319 stn %o2, [THREAD_REG + T_LOFAULT] ! install new vector 4320 4321.do_zero: 4322 cmp %o1, 7 4323 blu,pn %ncc, .byteclr 4324 nop 4325 4326 cmp %o1, 15 4327 blu,pn %ncc, .wdalign 4328 nop 4329 4330 andcc %o0, 7, %o3 ! is add aligned on a 8 byte bound 4331 bz,pt %ncc, .blkalign ! already double aligned 4332 sub %o3, 8, %o3 ! -(bytes till double aligned) 4333 add %o1, %o3, %o1 ! update o1 with new count 4334 43351: 4336 stba %g0, [%o0]%asi 4337 inccc %o3 4338 bl,pt %ncc, 1b 4339 inc %o0 4340 4341 ! Now address is double aligned 4342.blkalign: 4343 cmp %o1, 0x80 ! check if there are 128 bytes to set 4344 blu,pn %ncc, .bzero_small 4345 mov %o1, %o3 4346 4347 sethi %hi(use_hw_bzero), %o2 4348 ld [%o2 + %lo(use_hw_bzero)], %o2 4349 tst %o2 4350 bz %ncc, .bzero_small 4351 mov %o1, %o3 4352 4353 rd %asi, %o3 4354 wr %g0, ASI_BLK_INIT_ST_QUAD_LDD_P, %asi 4355 cmp %o3, ASI_P 4356 bne,a %ncc, .algnblk 4357 wr %g0, ASI_BLK_INIT_QUAD_LDD_AIUS, %asi 4358 4359.algnblk: 4360 andcc %o0, 0x3f, %o3 ! is block aligned? 4361 bz,pt %ncc, .bzero_blk 4362 sub %o3, 0x40, %o3 ! -(bytes till block aligned) 4363 add %o1, %o3, %o1 ! o1 is the remainder 4364 4365 ! Clear -(%o3) bytes till block aligned 43661: 4367 stxa %g0, [%o0]%asi 4368 addcc %o3, 8, %o3 4369 bl,pt %ncc, 1b 4370 add %o0, 8, %o0 4371 4372.bzero_blk: 4373 and %o1, 0x3f, %o3 ! calc bytes left after blk clear 4374 andn %o1, 0x3f, %o4 ! calc size of blocks in bytes 4375 4376 cmp %o4, 0x100 ! 256 bytes or more 4377 blu,pn %ncc, 3f 4378 nop 4379 43802: 4381 stxa %g0, [%o0+0x0]%asi 4382 stxa %g0, [%o0+0x40]%asi 4383 stxa %g0, [%o0+0x80]%asi 4384 stxa %g0, [%o0+0xc0]%asi 4385 4386 stxa %g0, [%o0+0x8]%asi 4387 stxa %g0, [%o0+0x10]%asi 4388 stxa %g0, [%o0+0x18]%asi 4389 stxa %g0, [%o0+0x20]%asi 4390 stxa %g0, [%o0+0x28]%asi 4391 stxa %g0, [%o0+0x30]%asi 4392 stxa %g0, [%o0+0x38]%asi 4393 4394 stxa %g0, [%o0+0x48]%asi 4395 stxa %g0, [%o0+0x50]%asi 4396 stxa %g0, [%o0+0x58]%asi 4397 stxa %g0, [%o0+0x60]%asi 4398 stxa %g0, [%o0+0x68]%asi 4399 stxa %g0, [%o0+0x70]%asi 4400 stxa %g0, [%o0+0x78]%asi 4401 4402 stxa %g0, [%o0+0x88]%asi 4403 stxa %g0, [%o0+0x90]%asi 4404 stxa %g0, [%o0+0x98]%asi 4405 stxa %g0, [%o0+0xa0]%asi 4406 stxa %g0, [%o0+0xa8]%asi 4407 stxa %g0, [%o0+0xb0]%asi 4408 stxa %g0, [%o0+0xb8]%asi 4409 4410 stxa %g0, [%o0+0xc8]%asi 4411 stxa %g0, [%o0+0xd0]%asi 4412 stxa %g0, [%o0+0xd8]%asi 4413 stxa %g0, [%o0+0xe0]%asi 4414 stxa %g0, [%o0+0xe8]%asi 4415 stxa %g0, [%o0+0xf0]%asi 4416 stxa %g0, [%o0+0xf8]%asi 4417 4418 sub %o4, 0x100, %o4 4419 cmp %o4, 0x100 4420 bgu,pt %ncc, 2b 4421 add %o0, 0x100, %o0 4422 44233: 4424 ! ... check if 64 bytes to set 4425 cmp %o4, 0x40 4426 blu %ncc, .bzero_blk_done 4427 nop 4428 44294: 4430 stxa %g0, [%o0+0x0]%asi 4431 stxa %g0, [%o0+0x8]%asi 4432 stxa %g0, [%o0+0x10]%asi 4433 stxa %g0, [%o0+0x18]%asi 4434 stxa %g0, [%o0+0x20]%asi 4435 stxa %g0, [%o0+0x28]%asi 4436 stxa %g0, [%o0+0x30]%asi 4437 stxa %g0, [%o0+0x38]%asi 4438 4439 subcc %o4, 0x40, %o4 4440 bgu,pt %ncc, 3b 4441 add %o0, 0x40, %o0 4442 4443.bzero_blk_done: 4444 membar #Sync 4445 ! 4446 ! Undo asi register setting. 4447 ! 4448 rd %asi, %o4 4449 wr %g0, ASI_P, %asi 4450 cmp %o4, ASI_BLK_INIT_ST_QUAD_LDD_P 4451 bne,a %ncc, .bzero_small 4452 wr %g0, ASI_USER, %asi 4453 4454.bzero_small: 4455 ! Set the remaining doubles 4456 subcc %o3, 8, %o3 ! Can we store any doubles? 4457 blu,pn %ncc, .byteclr 4458 and %o1, 7, %o1 ! calc bytes left after doubles 4459 4460.dbclr: 4461 stxa %g0, [%o0]%asi ! Clear the doubles 4462 subcc %o3, 8, %o3 4463 bgeu,pt %ncc, .dbclr 4464 add %o0, 8, %o0 4465 4466 ba .byteclr 4467 nop 4468 4469.wdalign: 4470 andcc %o0, 3, %o3 ! is add aligned on a word boundary 4471 bz,pn %ncc, .wdclr 4472 andn %o1, 3, %o3 ! create word sized count in %o3 4473 4474 dec %o1 ! decrement count 4475 stba %g0, [%o0]%asi ! clear a byte 4476 ba .wdalign 4477 inc %o0 ! next byte 4478 4479.wdclr: 4480 sta %g0, [%o0]%asi ! 4-byte clearing loop 4481 subcc %o3, 4, %o3 4482 bnz,pt %ncc, .wdclr 4483 inc 4, %o0 4484 4485 and %o1, 3, %o1 ! leftover count, if any 4486 4487.byteclr: 4488 ! Set the leftover bytes 4489 brz %o1, .bzero_exit 4490 nop 4491 44927: 4493 deccc %o1 ! byte clearing loop 4494 stba %g0, [%o0]%asi 4495 bgu,pt %ncc, 7b 4496 inc %o0 4497 4498.bzero_exit: 4499 ! 4500 ! We're just concerned with whether t_lofault was set 4501 ! when we came in. We end up here from either kzero() 4502 ! or bzero(). kzero() *always* sets a lofault handler. 4503 ! It ors LOFAULT_SET into %o5 to indicate it has done 4504 ! this even if the value of %o5 is otherwise zero. 4505 ! bzero() sets a lofault handler *only* if one was 4506 ! previously set. Accordingly we need to examine 4507 ! %o5 and if it is non-zero be sure to clear LOFAULT_SET 4508 ! before resetting the error handler. 4509 ! 4510 tst %o5 4511 bz %ncc, 1f 4512 andn %o5, LOFAULT_SET, %o5 4513 membar #Sync ! sync error barrier 4514 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 45151: 4516 retl 4517 clr %o0 ! return (0) 4518 4519 SET_SIZE(bzero) 4520#endif /* lint */ 4521