1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22/* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27#pragma ident "%Z%%M% %I% %E% SMI" 28 29#include <sys/param.h> 30#include <sys/errno.h> 31#include <sys/asm_linkage.h> 32#include <sys/vtrace.h> 33#include <sys/machthread.h> 34#include <sys/clock.h> 35#include <sys/asi.h> 36#include <sys/fsr.h> 37#include <sys/privregs.h> 38 39#if !defined(lint) 40#include "assym.h" 41#endif /* lint */ 42 43 44/* 45 * Less then or equal this number of bytes we will always copy byte-for-byte 46 */ 47#define SMALL_LIMIT 7 48 49/* 50 * LOFAULT_SET : Flag set by kzero and kcopy to indicate that t_lofault 51 * handler was set 52 */ 53#define LOFAULT_SET 2 54 55 56/* 57 * Copy a block of storage, returning an error code if `from' or 58 * `to' takes a kernel pagefault which cannot be resolved. 59 * Returns errno value on pagefault error, 0 if all ok 60 */ 61 62 63 64#if defined(lint) 65 66/* ARGSUSED */ 67int 68kcopy(const void *from, void *to, size_t count) 69{ return(0); } 70 71#else /* lint */ 72 73 .seg ".text" 74 .align 4 75 76 ENTRY(kcopy) 77 78 save %sp, -SA(MINFRAME), %sp 79 set .copyerr, %l7 ! copyerr is lofault value 80 ldn [THREAD_REG + T_LOFAULT], %o5 ! save existing handler 81 or %o5, LOFAULT_SET, %o5 82 membar #Sync ! sync error barrier 83 b .do_copy ! common code 84 stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault 85 86/* 87 * We got here because of a fault during kcopy. 88 * Errno value is in %g1. 89 */ 90.copyerr: 91 ! The kcopy() *always* sets a t_lofault handler and it ORs LOFAULT_SET 92 ! into %o5 to indicate it has set t_lofault handler. Need to clear 93 ! LOFAULT_SET flag before restoring the error handler. 94 andn %o5, LOFAULT_SET, %o5 95 membar #Sync ! sync error barrier 96 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 97 ret 98 restore %g1, 0, %o0 99 100 SET_SIZE(kcopy) 101#endif /* lint */ 102 103 104/* 105 * Copy a block of storage - must not overlap (from + len <= to). 106 */ 107#if defined(lint) 108 109/* ARGSUSED */ 110void 111bcopy(const void *from, void *to, size_t count) 112{} 113 114#else /* lint */ 115 116 ENTRY(bcopy) 117 118 save %sp, -SA(MINFRAME), %sp 119 clr %o5 ! flag LOFAULT_SET is not set for bcopy 120 121.do_copy: 122 cmp %i2, 12 ! for small counts 123 blu %ncc, .bytecp ! just copy bytes 124 .empty 125 126 ! 127 ! use aligned transfers where possible 128 ! 129 xor %i0, %i1, %o4 ! xor from and to address 130 btst 7, %o4 ! if lower three bits zero 131 bz .aldoubcp ! can align on double boundary 132 .empty ! assembler complaints about label 133 134 xor %i0, %i1, %o4 ! xor from and to address 135 btst 3, %o4 ! if lower two bits zero 136 bz .alwordcp ! can align on word boundary 137 btst 3, %i0 ! delay slot, from address unaligned? 138 ! 139 ! use aligned reads and writes where possible 140 ! this differs from wordcp in that it copes 141 ! with odd alignment between source and destnation 142 ! using word reads and writes with the proper shifts 143 ! in between to align transfers to and from memory 144 ! i0 - src address, i1 - dest address, i2 - count 145 ! i3, i4 - tmps for used generating complete word 146 ! i5 (word to write) 147 ! l0 size in bits of upper part of source word (US) 148 ! l1 size in bits of lower part of source word (LS = 32 - US) 149 ! l2 size in bits of upper part of destination word (UD) 150 ! l3 size in bits of lower part of destination word (LD = 32 - UD) 151 ! l4 number of bytes leftover after aligned transfers complete 152 ! l5 the number 32 153 ! 154 mov 32, %l5 ! load an oft-needed constant 155 bz .align_dst_only 156 btst 3, %i1 ! is destnation address aligned? 157 clr %i4 ! clear registers used in either case 158 bz .align_src_only 159 clr %l0 160 ! 161 ! both source and destination addresses are unaligned 162 ! 1631: ! align source 164 ldub [%i0], %i3 ! read a byte from source address 165 add %i0, 1, %i0 ! increment source address 166 or %i4, %i3, %i4 ! or in with previous bytes (if any) 167 btst 3, %i0 ! is source aligned? 168 add %l0, 8, %l0 ! increment size of upper source (US) 169 bnz,a 1b 170 sll %i4, 8, %i4 ! make room for next byte 171 172 sub %l5, %l0, %l1 ! generate shift left count (LS) 173 sll %i4, %l1, %i4 ! prepare to get rest 174 ld [%i0], %i3 ! read a word 175 add %i0, 4, %i0 ! increment source address 176 srl %i3, %l0, %i5 ! upper src bits into lower dst bits 177 or %i4, %i5, %i5 ! merge 178 mov 24, %l3 ! align destination 1791: 180 srl %i5, %l3, %i4 ! prepare to write a single byte 181 stb %i4, [%i1] ! write a byte 182 add %i1, 1, %i1 ! increment destination address 183 sub %i2, 1, %i2 ! decrement count 184 btst 3, %i1 ! is destination aligned? 185 bnz,a 1b 186 sub %l3, 8, %l3 ! delay slot, decrement shift count (LD) 187 sub %l5, %l3, %l2 ! generate shift left count (UD) 188 sll %i5, %l2, %i5 ! move leftover into upper bytes 189 cmp %l2, %l0 ! cmp # reqd to fill dst w old src left 190 bgu %ncc, .more_needed ! need more to fill than we have 191 nop 192 193 sll %i3, %l1, %i3 ! clear upper used byte(s) 194 srl %i3, %l1, %i3 195 ! get the odd bytes between alignments 196 sub %l0, %l2, %l0 ! regenerate shift count 197 sub %l5, %l0, %l1 ! generate new shift left count (LS) 198 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0 199 andn %i2, 3, %i2 ! # of aligned bytes that can be moved 200 srl %i3, %l0, %i4 201 or %i5, %i4, %i5 202 st %i5, [%i1] ! write a word 203 subcc %i2, 4, %i2 ! decrement count 204 bz %ncc, .unalign_out 205 add %i1, 4, %i1 ! increment destination address 206 207 b 2f 208 sll %i3, %l1, %i5 ! get leftover into upper bits 209.more_needed: 210 sll %i3, %l0, %i3 ! save remaining byte(s) 211 srl %i3, %l0, %i3 212 sub %l2, %l0, %l1 ! regenerate shift count 213 sub %l5, %l1, %l0 ! generate new shift left count 214 sll %i3, %l1, %i4 ! move to fill empty space 215 b 3f 216 or %i5, %i4, %i5 ! merge to complete word 217 ! 218 ! the source address is aligned and destination is not 219 ! 220.align_dst_only: 221 ld [%i0], %i4 ! read a word 222 add %i0, 4, %i0 ! increment source address 223 mov 24, %l0 ! initial shift alignment count 2241: 225 srl %i4, %l0, %i3 ! prepare to write a single byte 226 stb %i3, [%i1] ! write a byte 227 add %i1, 1, %i1 ! increment destination address 228 sub %i2, 1, %i2 ! decrement count 229 btst 3, %i1 ! is destination aligned? 230 bnz,a 1b 231 sub %l0, 8, %l0 ! delay slot, decrement shift count 232.xfer: 233 sub %l5, %l0, %l1 ! generate shift left count 234 sll %i4, %l1, %i5 ! get leftover 2353: 236 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0 237 andn %i2, 3, %i2 ! # of aligned bytes that can be moved 2382: 239 ld [%i0], %i3 ! read a source word 240 add %i0, 4, %i0 ! increment source address 241 srl %i3, %l0, %i4 ! upper src bits into lower dst bits 242 or %i5, %i4, %i5 ! merge with upper dest bits (leftover) 243 st %i5, [%i1] ! write a destination word 244 subcc %i2, 4, %i2 ! decrement count 245 bz %ncc, .unalign_out ! check if done 246 add %i1, 4, %i1 ! increment destination address 247 b 2b ! loop 248 sll %i3, %l1, %i5 ! get leftover 249.unalign_out: 250 tst %l4 ! any bytes leftover? 251 bz %ncc, .cpdone 252 .empty ! allow next instruction in delay slot 2531: 254 sub %l0, 8, %l0 ! decrement shift 255 srl %i3, %l0, %i4 ! upper src byte into lower dst byte 256 stb %i4, [%i1] ! write a byte 257 subcc %l4, 1, %l4 ! decrement count 258 bz %ncc, .cpdone ! done? 259 add %i1, 1, %i1 ! increment destination 260 tst %l0 ! any more previously read bytes 261 bnz %ncc, 1b ! we have leftover bytes 262 mov %l4, %i2 ! delay slot, mv cnt where dbytecp wants 263 b .dbytecp ! let dbytecp do the rest 264 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 265 ! 266 ! the destination address is aligned and the source is not 267 ! 268.align_src_only: 269 ldub [%i0], %i3 ! read a byte from source address 270 add %i0, 1, %i0 ! increment source address 271 or %i4, %i3, %i4 ! or in with previous bytes (if any) 272 btst 3, %i0 ! is source aligned? 273 add %l0, 8, %l0 ! increment shift count (US) 274 bnz,a .align_src_only 275 sll %i4, 8, %i4 ! make room for next byte 276 b,a .xfer 277 ! 278 ! if from address unaligned for double-word moves, 279 ! move bytes till it is, if count is < 56 it could take 280 ! longer to align the thing than to do the transfer 281 ! in word size chunks right away 282 ! 283.aldoubcp: 284 cmp %i2, 56 ! if count < 56, use wordcp, it takes 285 blu,a %ncc, .alwordcp ! longer to align doubles than words 286 mov 3, %o0 ! mask for word alignment 287 call .alignit ! copy bytes until aligned 288 mov 7, %o0 ! mask for double alignment 289 ! 290 ! source and destination are now double-word aligned 291 ! i3 has aligned count returned by alignit 292 ! 293 and %i2, 7, %i2 ! unaligned leftover count 294 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 2955: 296 ldx [%i0+%i1], %o4 ! read from address 297 stx %o4, [%i1] ! write at destination address 298 subcc %i3, 8, %i3 ! dec count 299 bgu %ncc, 5b 300 add %i1, 8, %i1 ! delay slot, inc to address 301 cmp %i2, 4 ! see if we can copy a word 302 blu %ncc, .dbytecp ! if 3 or less bytes use bytecp 303 .empty 304 ! 305 ! for leftover bytes we fall into wordcp, if needed 306 ! 307.wordcp: 308 and %i2, 3, %i2 ! unaligned leftover count 3095: 310 ld [%i0+%i1], %o4 ! read from address 311 st %o4, [%i1] ! write at destination address 312 subcc %i3, 4, %i3 ! dec count 313 bgu %ncc, 5b 314 add %i1, 4, %i1 ! delay slot, inc to address 315 b,a .dbytecp 316 317 ! we come here to align copies on word boundaries 318.alwordcp: 319 call .alignit ! go word-align it 320 mov 3, %o0 ! bits that must be zero to be aligned 321 b .wordcp 322 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 323 324 ! 325 ! byte copy, works with any alignment 326 ! 327.bytecp: 328 b .dbytecp 329 sub %i0, %i1, %i0 ! i0 gets difference of src and dst 330 331 ! 332 ! differenced byte copy, works with any alignment 333 ! assumes dest in %i1 and (source - dest) in %i0 334 ! 3351: 336 stb %o4, [%i1] ! write to address 337 inc %i1 ! inc to address 338.dbytecp: 339 deccc %i2 ! dec count 340 bgeu,a %ncc, 1b ! loop till done 341 ldub [%i0+%i1], %o4 ! read from address 342.cpdone: 343 membar #Sync ! sync error barrier 344 ! Restore t_lofault handler, if came here from kcopy(). 345 tst %o5 346 bz %ncc, 1f 347 andn %o5, LOFAULT_SET, %o5 348 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 3491: 350 ret 351 restore %g0, 0, %o0 ! return (0) 352 353/* 354 * Common code used to align transfers on word and doubleword 355 * boudaries. Aligns source and destination and returns a count 356 * of aligned bytes to transfer in %i3 357 */ 3581: 359 inc %i0 ! inc from 360 stb %o4, [%i1] ! write a byte 361 inc %i1 ! inc to 362 dec %i2 ! dec count 363.alignit: 364 btst %o0, %i0 ! %o0 is bit mask to check for alignment 365 bnz,a 1b 366 ldub [%i0], %o4 ! read next byte 367 368 retl 369 andn %i2, %o0, %i3 ! return size of aligned bytes 370 SET_SIZE(bcopy) 371 372#endif /* lint */ 373 374/* 375 * Block copy with possibly overlapped operands. 376 */ 377 378#if defined(lint) 379 380/*ARGSUSED*/ 381void 382ovbcopy(const void *from, void *to, size_t count) 383{} 384 385#else /* lint */ 386 387 ENTRY(ovbcopy) 388 tst %o2 ! check count 389 bgu,a %ncc, 1f ! nothing to do or bad arguments 390 subcc %o0, %o1, %o3 ! difference of from and to address 391 392 retl ! return 393 nop 3941: 395 bneg,a %ncc, 2f 396 neg %o3 ! if < 0, make it positive 3972: cmp %o2, %o3 ! cmp size and abs(from - to) 398 bleu %ncc, bcopy ! if size <= abs(diff): use bcopy, 399 .empty ! no overlap 400 cmp %o0, %o1 ! compare from and to addresses 401 blu %ncc, .ov_bkwd ! if from < to, copy backwards 402 nop 403 ! 404 ! Copy forwards. 405 ! 406.ov_fwd: 407 ldub [%o0], %o3 ! read from address 408 inc %o0 ! inc from address 409 stb %o3, [%o1] ! write to address 410 deccc %o2 ! dec count 411 bgu %ncc, .ov_fwd ! loop till done 412 inc %o1 ! inc to address 413 414 retl ! return 415 nop 416 ! 417 ! Copy backwards. 418 ! 419.ov_bkwd: 420 deccc %o2 ! dec count 421 ldub [%o0 + %o2], %o3 ! get byte at end of src 422 bgu %ncc, .ov_bkwd ! loop till done 423 stb %o3, [%o1 + %o2] ! delay slot, store at end of dst 424 425 retl ! return 426 nop 427 SET_SIZE(ovbcopy) 428 429#endif /* lint */ 430 431/* 432 * hwblkpagecopy() 433 * 434 * Copies exactly one page. This routine assumes the caller (ppcopy) 435 * has already disabled kernel preemption and has checked 436 * use_hw_bcopy. 437 */ 438#ifdef lint 439/*ARGSUSED*/ 440void 441hwblkpagecopy(const void *src, void *dst) 442{ } 443#else /* lint */ 444 ENTRY(hwblkpagecopy) 445 save %sp, -SA(MINFRAME), %sp 446 447 ! %i0 - source address (arg) 448 ! %i1 - destination address (arg) 449 ! %i2 - length of region (not arg) 450 451 set PAGESIZE, %i2 452 453 /* 454 * Copying exactly one page and PAGESIZE is in mutliple of 0x80. 455 */ 4561: 457 ldx [%i0+0x0], %l0 458 ldx [%i0+0x8], %l1 459 ldx [%i0+0x10], %l2 460 ldx [%i0+0x18], %l3 461 ldx [%i0+0x20], %l4 462 ldx [%i0+0x28], %l5 463 ldx [%i0+0x30], %l6 464 ldx [%i0+0x38], %l7 465 stx %l0, [%i1+0x0] 466 stx %l1, [%i1+0x8] 467 stx %l2, [%i1+0x10] 468 stx %l3, [%i1+0x18] 469 stx %l4, [%i1+0x20] 470 stx %l5, [%i1+0x28] 471 stx %l6, [%i1+0x30] 472 stx %l7, [%i1+0x38] 473 474 ldx [%i0+0x40], %l0 475 ldx [%i0+0x48], %l1 476 ldx [%i0+0x50], %l2 477 ldx [%i0+0x58], %l3 478 ldx [%i0+0x60], %l4 479 ldx [%i0+0x68], %l5 480 ldx [%i0+0x70], %l6 481 ldx [%i0+0x78], %l7 482 stx %l0, [%i1+0x40] 483 stx %l1, [%i1+0x48] 484 stx %l2, [%i1+0x50] 485 stx %l3, [%i1+0x58] 486 stx %l4, [%i1+0x60] 487 stx %l5, [%i1+0x68] 488 stx %l6, [%i1+0x70] 489 stx %l7, [%i1+0x78] 490 491 add %i0, 0x80, %i0 492 subcc %i2, 0x80, %i2 493 bgu,pt %xcc, 1b 494 add %i1, 0x80, %i1 495 496 membar #Sync 497 ret 498 restore %g0, 0, %o0 499 SET_SIZE(hwblkpagecopy) 500#endif /* lint */ 501 502 503/* 504 * Transfer data to and from user space - 505 * Note that these routines can cause faults 506 * It is assumed that the kernel has nothing at 507 * less than KERNELBASE in the virtual address space. 508 * 509 * Note that copyin(9F) and copyout(9F) are part of the 510 * DDI/DKI which specifies that they return '-1' on "errors." 511 * 512 * Sigh. 513 * 514 * So there's two extremely similar routines - xcopyin() and xcopyout() 515 * which return the errno that we've faithfully computed. This 516 * allows other callers (e.g. uiomove(9F)) to work correctly. 517 * Given that these are used pretty heavily, we expand the calling 518 * sequences inline for all flavours (rather than making wrappers). 519 * 520 * There are also stub routines for xcopyout_little and xcopyin_little, 521 * which currently are intended to handle requests of <= 16 bytes from 522 * do_unaligned. Future enhancement to make them handle 8k pages efficiently 523 * is left as an exercise... 524 */ 525 526/* 527 * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr) 528 * 529 * General theory of operation: 530 * 531 * None of the copyops routines grab a window. 532 * 533 * Flow: 534 * 535 * If count == zero return zero. 536 * 537 * Store the previous lo_fault handler into %g6. 538 * Place our secondary lofault handler into %g5. 539 * Place the address of our fault handler into %o3. 540 * 541 * If count is less than or equal to SMALL_LIMIT (7) we 542 * always do a byte for byte copy. 543 * 544 * If count is > SMALL_LIMIT, we check the alignment of the input 545 * and output pointers. We store -count in %o3, we store the number 546 * of chunks (8, 4, 2 or 1 byte) operated on in our basic copy loop 547 * in %o2. Following this we branch to the appropriate copy loop and 548 * copy that many chunks. Since we've been adding the chunk size 549 * to %o3 each time through as well as decrementing %o2, we can tell 550 * if any data is is left to be copied by examining %o3. If that is 551 * zero, we're done and can go home. If not, we figure out what the 552 * largest chunk size left to be copied is and branch to that copy 553 * loop unless there's only one byte left. We load that as we're 554 * branching to code that stores it just before we return. 555 * 556 * Fault handlers are invoked if we reference memory that has no 557 * current mapping. All forms share the same copyio_fault handler. 558 * This routine handles fixing up the stack and general housecleaning. 559 * Each copy operation has a simple fault handler that is then called 560 * to do the work specific to the invidual operation. The handler 561 * for copyOP and xcopyOP are found at the end of individual function. 562 * The handlers for xcopyOP_little are found at the end of xcopyin_little. 563 * The handlers for copyOP_noerr are found at the end of copyin_noerr. 564 */ 565 566/* 567 * Copy kernel data to user space (copyout/xcopyout/xcopyout_little). 568 */ 569 570#if defined(lint) 571 572/*ARGSUSED*/ 573int 574copyout(const void *kaddr, void *uaddr, size_t count) 575{ return (0); } 576 577#else /* lint */ 578 579/* 580 * We save the arguments in the following registers in case of a fault: 581 * kaddr - %g2 582 * uaddr - %g3 583 * count - %g4 584 */ 585#define SAVE_SRC %g2 586#define SAVE_DST %g3 587#define SAVE_COUNT %g4 588 589#define REAL_LOFAULT %g5 590#define SAVED_LOFAULT %g6 591 592/* 593 * Generic copyio fault handler. This is the first line of defense when a 594 * fault occurs in (x)copyin/(x)copyout. In order for this to function 595 * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT. 596 * This allows us to share common code for all the flavors of the copy 597 * operations, including the _noerr versions. 598 * 599 * Note that this function will restore the original input parameters before 600 * calling REAL_LOFAULT. So the real handler can vector to the appropriate 601 * member of the t_copyop structure, if needed. 602 */ 603 ENTRY(copyio_fault) 604 membar #Sync 605 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 606 607 mov SAVE_SRC, %o0 608 mov SAVE_DST, %o1 609 jmp REAL_LOFAULT 610 mov SAVE_COUNT, %o2 611 SET_SIZE(copyio_fault) 612 613 ENTRY(copyout) 614 sethi %hi(.copyout_err), REAL_LOFAULT 615 or REAL_LOFAULT, %lo(.copyout_err), REAL_LOFAULT 616 617.do_copyout: 618 ! 619 ! Check the length and bail if zero. 620 ! 621 tst %o2 622 bnz,pt %ncc, 1f 623 nop 624 retl 625 clr %o0 6261: 627 sethi %hi(copyio_fault), %o3 628 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT 629 or %o3, %lo(copyio_fault), %o3 630 membar #Sync 631 stn %o3, [THREAD_REG + T_LOFAULT] 632 633 mov %o0, SAVE_SRC 634 mov %o1, SAVE_DST 635 mov %o2, SAVE_COUNT 636 637 ! 638 ! Check to see if we're more than SMALL_LIMIT (7 bytes). 639 ! Run in leaf mode, using the %o regs as our input regs. 640 ! 641 subcc %o2, SMALL_LIMIT, %o3 642 bgu,a,pt %ncc, .dco_ns 643 or %o0, %o1, %o3 644 645.dcobcp: 646 sub %g0, %o2, %o3 ! negate count 647 add %o0, %o2, %o0 ! make %o0 point at the end 648 add %o1, %o2, %o1 ! make %o1 point at the end 649 ba,pt %ncc, .dcocl 650 ldub [%o0 + %o3], %o4 ! load first byte 651 ! 652 ! %o0 and %o2 point at the end and remain pointing at the end 653 ! of their buffers. We pull things out by adding %o3 (which is 654 ! the negation of the length) to the buffer end which gives us 655 ! the curent location in the buffers. By incrementing %o3 we walk 656 ! through both buffers without having to bump each buffer's 657 ! pointer. A very fast 4 instruction loop. 658 ! 659 .align 16 660.dcocl: 661 stba %o4, [%o1 + %o3]ASI_USER 662 inccc %o3 663 bl,a,pt %ncc, .dcocl 664 ldub [%o0 + %o3], %o4 665 ! 666 ! We're done. Go home. 667 ! 668 membar #Sync 669 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 670 retl 671 clr %o0 672 ! 673 ! Try aligned copies from here. 674 ! 675.dco_ns: 676 ! %o0 = kernel addr (to be copied from) 677 ! %o1 = user addr (to be copied to) 678 ! %o2 = length 679 ! %o3 = %o1 | %o2 (used for alignment checking) 680 ! %o4 is alternate lo_fault 681 ! %o5 is original lo_fault 682 ! 683 ! See if we're single byte aligned. If we are, check the 684 ! limit for single byte copies. If we're smaller or equal, 685 ! bounce to the byte for byte copy loop. Otherwise do it in 686 ! HW (if enabled). 687 ! 688 btst 1, %o3 689 bz,pt %icc, .dcoh8 690 btst 7, %o3 691 692 ba .dcobcp 693 nop 694.dcoh8: 695 ! 696 ! 8 byte aligned? 697 ! 698 bnz,a %ncc, .dcoh4 699 btst 3, %o3 700.dcos8: 701 ! 702 ! Housekeeping for copy loops. Uses same idea as in the byte for 703 ! byte copy loop above. 704 ! 705 add %o0, %o2, %o0 706 add %o1, %o2, %o1 707 sub %g0, %o2, %o3 708 ba,pt %ncc, .dodebc 709 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy 710 ! 711 ! 4 byte aligned? 712 ! 713.dcoh4: 714 bnz,pn %ncc, .dcoh2 715 nop 716.dcos4: 717 add %o0, %o2, %o0 718 add %o1, %o2, %o1 719 sub %g0, %o2, %o3 720 ba,pt %ncc, .dodfbc 721 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy 722 ! 723 ! We must be 2 byte aligned. Off we go. 724 ! The check for small copies was done in the 725 ! delay at .dcoh4 726 ! 727.dcoh2: 728.dcos2: 729 add %o0, %o2, %o0 730 add %o1, %o2, %o1 731 sub %g0, %o2, %o3 732 ba,pt %ncc, .dodtbc 733 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy 734 735.dodebc: 736 ldx [%o0 + %o3], %o4 737 deccc %o2 738 stxa %o4, [%o1 + %o3]ASI_USER 739 bg,pt %ncc, .dodebc 740 addcc %o3, 8, %o3 741 ! 742 ! End of copy loop. Check to see if we're done. Most 743 ! eight byte aligned copies end here. 744 ! 745 bz,pt %ncc, .dcofh 746 nop 747 ! 748 ! Something is left - do it byte for byte. 749 ! 750 ba,pt %ncc, .dcocl 751 ldub [%o0 + %o3], %o4 ! load next byte 752 ! 753 ! Four byte copy loop. %o2 is the number of 4 byte chunks to copy. 754 ! 755 .align 32 756.dodfbc: 757 lduw [%o0 + %o3], %o4 758 deccc %o2 759 sta %o4, [%o1 + %o3]ASI_USER 760 bg,pt %ncc, .dodfbc 761 addcc %o3, 4, %o3 762 ! 763 ! End of copy loop. Check to see if we're done. Most 764 ! four byte aligned copies end here. 765 ! 766 bz,pt %ncc, .dcofh 767 nop 768 ! 769 ! Something is left. Do it byte for byte. 770 ! 771 ba,pt %ncc, .dcocl 772 ldub [%o0 + %o3], %o4 ! load next byte 773 ! 774 ! two byte aligned copy loop. %o2 is the number of 2 byte chunks to 775 ! copy. 776 ! 777 .align 32 778.dodtbc: 779 lduh [%o0 + %o3], %o4 780 deccc %o2 781 stha %o4, [%o1 + %o3]ASI_USER 782 bg,pt %ncc, .dodtbc 783 addcc %o3, 2, %o3 784 ! 785 ! End of copy loop. Anything left? 786 ! 787 bz,pt %ncc, .dcofh 788 nop 789 ! 790 ! Deal with the last byte 791 ! 792 ldub [%o0 + %o3], %o4 793 stba %o4, [%o1 + %o3]ASI_USER 794.dcofh: 795 membar #Sync 796 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 797 retl 798 clr %o0 799 800.copyout_err: 801 ldn [THREAD_REG + T_COPYOPS], %o4 802 brz %o4, 2f 803 nop 804 ldn [%o4 + CP_COPYOUT], %g2 805 jmp %g2 806 nop 8072: 808 retl 809 mov -1, %o0 810 SET_SIZE(copyout) 811 812#endif /* lint */ 813 814 815#ifdef lint 816 817/*ARGSUSED*/ 818int 819xcopyout(const void *kaddr, void *uaddr, size_t count) 820{ return (0); } 821 822#else /* lint */ 823 824 ENTRY(xcopyout) 825 sethi %hi(.xcopyout_err), REAL_LOFAULT 826 b .do_copyout 827 or REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT 828.xcopyout_err: 829 ldn [THREAD_REG + T_COPYOPS], %o4 830 brz %o4, 2f 831 nop 832 ldn [%o4 + CP_XCOPYOUT], %g2 833 jmp %g2 834 nop 8352: 836 retl 837 mov %g1, %o0 838 SET_SIZE(xcopyout) 839 840#endif /* lint */ 841 842#ifdef lint 843 844/*ARGSUSED*/ 845int 846xcopyout_little(const void *kaddr, void *uaddr, size_t count) 847{ return (0); } 848 849#else /* lint */ 850 851 ENTRY(xcopyout_little) 852 sethi %hi(.little_err), %o4 853 ldn [THREAD_REG + T_LOFAULT], %o5 854 or %o4, %lo(.little_err), %o4 855 membar #Sync ! sync error barrier 856 stn %o4, [THREAD_REG + T_LOFAULT] 857 858 subcc %g0, %o2, %o3 859 add %o0, %o2, %o0 860 bz,pn %ncc, 2f ! check for zero bytes 861 sub %o2, 1, %o4 862 add %o0, %o4, %o0 ! start w/last byte 863 add %o1, %o2, %o1 864 ldub [%o0+%o3], %o4 865 8661: stba %o4, [%o1+%o3]ASI_AIUSL 867 inccc %o3 868 sub %o0, 2, %o0 ! get next byte 869 bcc,a,pt %ncc, 1b 870 ldub [%o0+%o3], %o4 871 8722: membar #Sync ! sync error barrier 873 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 874 retl 875 mov %g0, %o0 ! return (0) 876 SET_SIZE(xcopyout_little) 877 878#endif /* lint */ 879 880/* 881 * Copy user data to kernel space (copyin/xcopyin/xcopyin_little) 882 */ 883 884#if defined(lint) 885 886/*ARGSUSED*/ 887int 888copyin(const void *uaddr, void *kaddr, size_t count) 889{ return (0); } 890 891#else /* lint */ 892 893 ENTRY(copyin) 894 sethi %hi(.copyin_err), REAL_LOFAULT 895 or REAL_LOFAULT, %lo(.copyin_err), REAL_LOFAULT 896 897.do_copyin: 898 ! 899 ! Check the length and bail if zero. 900 ! 901 tst %o2 902 bnz,pt %ncc, 1f 903 nop 904 retl 905 clr %o0 9061: 907 sethi %hi(copyio_fault), %o3 908 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT 909 or %o3, %lo(copyio_fault), %o3 910 membar #Sync 911 stn %o3, [THREAD_REG + T_LOFAULT] 912 913 mov %o0, SAVE_SRC 914 mov %o1, SAVE_DST 915 mov %o2, SAVE_COUNT 916 917 ! 918 ! Check to see if we're more than SMALL_LIMIT. 919 ! 920 subcc %o2, SMALL_LIMIT, %o3 921 bgu,a,pt %ncc, .dci_ns 922 or %o0, %o1, %o3 923 924.dcibcp: 925 sub %g0, %o2, %o3 ! setup for copy loop 926 add %o0, %o2, %o0 927 add %o1, %o2, %o1 928 ba,pt %ncc, .dcicl 929 lduba [%o0 + %o3]ASI_USER, %o4 930 ! 931 ! %o0 and %o1 point at the end and remain pointing at the end 932 ! of their buffers. We pull things out by adding %o3 (which is 933 ! the negation of the length) to the buffer end which gives us 934 ! the curent location in the buffers. By incrementing %o3 we walk 935 ! through both buffers without having to bump each buffer's 936 ! pointer. A very fast 4 instruction loop. 937 ! 938 .align 16 939.dcicl: 940 stb %o4, [%o1 + %o3] 941 inccc %o3 942 bl,a,pt %ncc, .dcicl 943 lduba [%o0 + %o3]ASI_USER, %o4 944 ! 945 ! We're done. Go home. 946 ! 947 membar #Sync 948 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 949 retl 950 clr %o0 951 ! 952 ! Try aligned copies from here. 953 ! 954.dci_ns: 955 ! 956 ! See if we're single byte aligned. If we are, check the 957 ! limit for single byte copies. If we're smaller, or equal, 958 ! bounce to the byte for byte copy loop. Otherwise do it in 959 ! HW (if enabled). 960 ! 961 btst 1, %o3 962 bz,a,pt %icc, .dcih8 963 btst 7, %o3 964 ba .dcibcp 965 nop 966 967.dcih8: 968 ! 969 ! 8 byte aligned? 970 ! 971 bnz,a %ncc, .dcih4 972 btst 3, %o3 973.dcis8: 974 ! 975 ! Housekeeping for copy loops. Uses same idea as in the byte for 976 ! byte copy loop above. 977 ! 978 add %o0, %o2, %o0 979 add %o1, %o2, %o1 980 sub %g0, %o2, %o3 981 ba,pt %ncc, .didebc 982 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy 983 ! 984 ! 4 byte aligned? 985 ! 986.dcih4: 987 bnz %ncc, .dcih2 988 nop 989.dcis4: 990 ! 991 ! Housekeeping for copy loops. Uses same idea as in the byte 992 ! for byte copy loop above. 993 ! 994 add %o0, %o2, %o0 995 add %o1, %o2, %o1 996 sub %g0, %o2, %o3 997 ba,pt %ncc, .didfbc 998 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy 999.dcih2: 1000.dcis2: 1001 add %o0, %o2, %o0 1002 add %o1, %o2, %o1 1003 sub %g0, %o2, %o3 1004 ba,pt %ncc, .didtbc 1005 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy 1006 1007.didebc: 1008 ldxa [%o0 + %o3]ASI_USER, %o4 1009 deccc %o2 1010 stx %o4, [%o1 + %o3] 1011 bg,pt %ncc, .didebc 1012 addcc %o3, 8, %o3 1013 ! 1014 ! End of copy loop. Most 8 byte aligned copies end here. 1015 ! 1016 bz,pt %ncc, .dcifh 1017 nop 1018 ! 1019 ! Something is left. Do it byte for byte. 1020 ! 1021 ba,pt %ncc, .dcicl 1022 lduba [%o0 + %o3]ASI_USER, %o4 1023 ! 1024 ! 4 byte copy loop. %o2 is number of 4 byte chunks to copy. 1025 ! 1026 .align 32 1027.didfbc: 1028 lduwa [%o0 + %o3]ASI_USER, %o4 1029 deccc %o2 1030 st %o4, [%o1 + %o3] 1031 bg,pt %ncc, .didfbc 1032 addcc %o3, 4, %o3 1033 ! 1034 ! End of copy loop. Most 4 byte aligned copies end here. 1035 ! 1036 bz,pt %ncc, .dcifh 1037 nop 1038 ! 1039 ! Something is left. Do it byte for byte. 1040 ! 1041 ba,pt %ncc, .dcicl 1042 lduba [%o0 + %o3]ASI_USER, %o4 1043 ! 1044 ! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to 1045 ! copy. 1046 ! 1047 .align 32 1048.didtbc: 1049 lduha [%o0 + %o3]ASI_USER, %o4 1050 deccc %o2 1051 sth %o4, [%o1 + %o3] 1052 bg,pt %ncc, .didtbc 1053 addcc %o3, 2, %o3 1054 ! 1055 ! End of copy loop. Most 2 byte aligned copies end here. 1056 ! 1057 bz,pt %ncc, .dcifh 1058 nop 1059 ! 1060 ! Deal with the last byte 1061 ! 1062 lduba [%o0 + %o3]ASI_USER, %o4 1063 stb %o4, [%o1 + %o3] 1064.dcifh: 1065 membar #Sync 1066 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1067 retl 1068 clr %o0 1069 1070.copyin_err: 1071 ldn [THREAD_REG + T_COPYOPS], %o4 1072 brz %o4, 2f 1073 nop 1074 ldn [%o4 + CP_COPYIN], %g2 1075 jmp %g2 1076 nop 10772: 1078 retl 1079 mov -1, %o0 1080 SET_SIZE(copyin) 1081 1082#endif /* lint */ 1083 1084#ifdef lint 1085 1086/*ARGSUSED*/ 1087int 1088xcopyin(const void *uaddr, void *kaddr, size_t count) 1089{ return (0); } 1090 1091#else /* lint */ 1092 1093 ENTRY(xcopyin) 1094 sethi %hi(.xcopyin_err), REAL_LOFAULT 1095 b .do_copyin 1096 or REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT 1097.xcopyin_err: 1098 ldn [THREAD_REG + T_COPYOPS], %o4 1099 brz %o4, 2f 1100 nop 1101 ldn [%o4 + CP_XCOPYIN], %g2 1102 jmp %g2 1103 nop 11042: 1105 retl 1106 mov %g1, %o0 1107 SET_SIZE(xcopyin) 1108 1109#endif /* lint */ 1110 1111#ifdef lint 1112 1113/*ARGSUSED*/ 1114int 1115xcopyin_little(const void *uaddr, void *kaddr, size_t count) 1116{ return (0); } 1117 1118#else /* lint */ 1119 1120 ENTRY(xcopyin_little) 1121 sethi %hi(.little_err), %o4 1122 ldn [THREAD_REG + T_LOFAULT], %o5 1123 or %o4, %lo(.little_err), %o4 1124 membar #Sync ! sync error barrier 1125 stn %o4, [THREAD_REG + T_LOFAULT] 1126 1127 subcc %g0, %o2, %o3 1128 add %o0, %o2, %o0 1129 bz,pn %ncc, 2f ! check for zero bytes 1130 sub %o2, 1, %o4 1131 add %o0, %o4, %o0 ! start w/last byte 1132 add %o1, %o2, %o1 1133 lduba [%o0+%o3]ASI_AIUSL, %o4 1134 11351: stb %o4, [%o1+%o3] 1136 inccc %o3 1137 sub %o0, 2, %o0 ! get next byte 1138 bcc,a,pt %ncc, 1b 1139 lduba [%o0+%o3]ASI_AIUSL, %o4 1140 11412: membar #Sync ! sync error barrier 1142 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1143 retl 1144 mov %g0, %o0 ! return (0) 1145 1146.little_err: 1147 membar #Sync ! sync error barrier 1148 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1149 retl 1150 mov %g1, %o0 1151 SET_SIZE(xcopyin_little) 1152 1153#endif /* lint */ 1154 1155 1156/* 1157 * Copy a block of storage - must not overlap (from + len <= to). 1158 * No fault handler installed (to be called under on_fault()) 1159 */ 1160#if defined(lint) 1161 1162/* ARGSUSED */ 1163void 1164copyin_noerr(const void *ufrom, void *kto, size_t count) 1165{} 1166 1167#else /* lint */ 1168 1169 ENTRY(copyin_noerr) 1170 sethi %hi(.copyio_noerr), REAL_LOFAULT 1171 b .do_copyin 1172 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 1173.copyio_noerr: 1174 jmp SAVED_LOFAULT 1175 nop 1176 SET_SIZE(copyin_noerr) 1177 1178#endif /* lint */ 1179 1180/* 1181 * Copy a block of storage - must not overlap (from + len <= to). 1182 * No fault handler installed (to be called under on_fault()) 1183 */ 1184 1185#if defined(lint) 1186 1187/* ARGSUSED */ 1188void 1189copyout_noerr(const void *kfrom, void *uto, size_t count) 1190{} 1191 1192#else /* lint */ 1193 1194 ENTRY(copyout_noerr) 1195 sethi %hi(.copyio_noerr), REAL_LOFAULT 1196 b .do_copyout 1197 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 1198 SET_SIZE(copyout_noerr) 1199 1200#endif /* lint */ 1201 1202#if defined(lint) 1203 1204int use_hw_bcopy = 1; 1205int use_hw_bzero = 1; 1206 1207#else /* !lint */ 1208 1209 .align 4 1210 DGDEF(use_hw_bcopy) 1211 .word 1 1212 DGDEF(use_hw_bzero) 1213 .word 1 1214 1215 .align 64 1216 .section ".text" 1217#endif /* !lint */ 1218 1219 1220/* 1221 * hwblkclr - clears block-aligned, block-multiple-sized regions that are 1222 * longer than 256 bytes in length using load/stores. If 1223 * the criteria for using this routine are not met then it calls bzero 1224 * and returns 1. Otherwise 0 is returned indicating success. 1225 * Caller is responsible for ensuring use_hw_bzero is true and that 1226 * kpreempt_disable() has been called. 1227 */ 1228#ifdef lint 1229/*ARGSUSED*/ 1230int 1231hwblkclr(void *addr, size_t len) 1232{ 1233 return(0); 1234} 1235#else /* lint */ 1236 ! %i0 - start address 1237 ! %i1 - length of region (multiple of 64) 1238 1239 ENTRY(hwblkclr) 1240 save %sp, -SA(MINFRAME), %sp 1241 1242 ! Must be block-aligned 1243 andcc %i0, 0x3f, %g0 1244 bnz,pn %ncc, 1f 1245 nop 1246 1247 ! ... and must be 256 bytes or more 1248 cmp %i1, 0x100 1249 blu,pn %ncc, 1f 1250 nop 1251 1252 ! ... and length must be a multiple of 64 1253 andcc %i1, 0x3f, %g0 1254 bz,pn %ncc, .pz_doblock 1255 nop 1256 12571: ! punt, call bzero but notify the caller that bzero was used 1258 mov %i0, %o0 1259 call bzero 1260 mov %i1, %o1 1261 ret 1262 restore %g0, 1, %o0 ! return (1) - did not use block operations 1263 1264 ! Already verified that there are at least 256 bytes to set 1265.pz_doblock: 1266 stx %g0, [%i0+0x0] 1267 stx %g0, [%i0+0x40] 1268 stx %g0, [%i0+0x80] 1269 stx %g0, [%i0+0xc0] 1270 1271 stx %g0, [%i0+0x8] 1272 stx %g0, [%i0+0x10] 1273 stx %g0, [%i0+0x18] 1274 stx %g0, [%i0+0x20] 1275 stx %g0, [%i0+0x28] 1276 stx %g0, [%i0+0x30] 1277 stx %g0, [%i0+0x38] 1278 1279 stx %g0, [%i0+0x48] 1280 stx %g0, [%i0+0x50] 1281 stx %g0, [%i0+0x58] 1282 stx %g0, [%i0+0x60] 1283 stx %g0, [%i0+0x68] 1284 stx %g0, [%i0+0x70] 1285 stx %g0, [%i0+0x78] 1286 1287 stx %g0, [%i0+0x88] 1288 stx %g0, [%i0+0x90] 1289 stx %g0, [%i0+0x98] 1290 stx %g0, [%i0+0xa0] 1291 stx %g0, [%i0+0xa8] 1292 stx %g0, [%i0+0xb0] 1293 stx %g0, [%i0+0xb8] 1294 1295 stx %g0, [%i0+0xc8] 1296 stx %g0, [%i0+0xd0] 1297 stx %g0, [%i0+0xd8] 1298 stx %g0, [%i0+0xe0] 1299 stx %g0, [%i0+0xe8] 1300 stx %g0, [%i0+0xf0] 1301 stx %g0, [%i0+0xf8] 1302 1303 sub %i1, 0x100, %i1 1304 cmp %i1, 0x100 1305 bgu,pt %ncc, .pz_doblock 1306 add %i0, 0x100, %i0 1307 13082: 1309 ! Check if more than 64 bytes to set 1310 cmp %i1,0x40 1311 blu %ncc, .pz_finish 1312 nop 1313 13143: 1315 stx %g0, [%i0+0x0] 1316 stx %g0, [%i0+0x8] 1317 stx %g0, [%i0+0x10] 1318 stx %g0, [%i0+0x18] 1319 stx %g0, [%i0+0x20] 1320 stx %g0, [%i0+0x28] 1321 stx %g0, [%i0+0x30] 1322 stx %g0, [%i0+0x38] 1323 1324 subcc %i1, 0x40, %i1 1325 bgu,pt %ncc, 3b 1326 add %i0, 0x40, %i0 1327 1328.pz_finish: 1329 membar #Sync 1330 ret 1331 restore %g0, 0, %o0 ! return (bzero or not) 1332 SET_SIZE(hwblkclr) 1333#endif /* lint */ 1334 1335#ifdef lint 1336/* Copy 32 bytes of data from src to dst using physical addresses */ 1337/*ARGSUSED*/ 1338void 1339hw_pa_bcopy32(uint64_t src, uint64_t dst) 1340{} 1341#else /*!lint */ 1342 1343 /* 1344 * Copy 32 bytes of data from src (%o0) to dst (%o1) 1345 * using physical addresses. 1346 */ 1347 ENTRY_NP(hw_pa_bcopy32) 1348 rdpr %pstate, %g1 1349 andn %g1, PSTATE_IE, %g2 1350 wrpr %g0, %g2, %pstate 1351 1352 ldxa [%o0]ASI_MEM, %o2 1353 add %o0, 8, %o0 1354 ldxa [%o0]ASI_MEM, %o3 1355 add %o0, 8, %o0 1356 ldxa [%o0]ASI_MEM, %o4 1357 add %o0, 8, %o0 1358 ldxa [%o0]ASI_MEM, %o5 1359 stxa %o2, [%o1]ASI_MEM 1360 add %o1, 8, %o1 1361 stxa %o3, [%o1]ASI_MEM 1362 add %o1, 8, %o1 1363 stxa %o4, [%o1]ASI_MEM 1364 add %o1, 8, %o1 1365 stxa %o5, [%o1]ASI_MEM 1366 1367 membar #Sync 1368 retl 1369 wrpr %g0, %g1, %pstate 1370 SET_SIZE(hw_pa_bcopy32) 1371#endif /* lint */ 1372 1373/* 1374 * Zero a block of storage. 1375 * 1376 * uzero is used by the kernel to zero a block in user address space. 1377 */ 1378 1379 1380#if defined(lint) 1381 1382/* ARGSUSED */ 1383int 1384kzero(void *addr, size_t count) 1385{ return(0); } 1386 1387/* ARGSUSED */ 1388void 1389uzero(void *addr, size_t count) 1390{} 1391 1392#else /* lint */ 1393 1394 ENTRY(uzero) 1395 ! 1396 ! Set a new lo_fault handler only if we came in with one 1397 ! already specified. 1398 ! 1399 wr %g0, ASI_USER, %asi 1400 ldn [THREAD_REG + T_LOFAULT], %o5 1401 tst %o5 1402 bz,pt %ncc, .do_zero 1403 sethi %hi(.zeroerr), %o2 1404 or %o2, %lo(.zeroerr), %o2 1405 membar #Sync 1406 ba,pt %ncc, .do_zero 1407 stn %o2, [THREAD_REG + T_LOFAULT] 1408 1409 ENTRY(kzero) 1410 ! 1411 ! Always set a lo_fault handler 1412 ! 1413 wr %g0, ASI_P, %asi 1414 ldn [THREAD_REG + T_LOFAULT], %o5 1415 sethi %hi(.zeroerr), %o2 1416 or %o5, LOFAULT_SET, %o5 1417 or %o2, %lo(.zeroerr), %o2 1418 membar #Sync 1419 ba,pt %ncc, .do_zero 1420 stn %o2, [THREAD_REG + T_LOFAULT] 1421 1422/* 1423 * We got here because of a fault during kzero or if 1424 * uzero or bzero was called with t_lofault non-zero. 1425 * Otherwise we've already run screaming from the room. 1426 * Errno value is in %g1. Note that we're here iff 1427 * we did set t_lofault. 1428 */ 1429.zeroerr: 1430 ! 1431 ! Undo asi register setting. Just set it to be the 1432 ! kernel default without checking. 1433 ! 1434 wr %g0, ASI_P, %asi 1435 1436 ! 1437 ! We did set t_lofault. It may well have been zero coming in. 1438 ! 14391: 1440 tst %o5 1441 membar #Sync 1442 bne,pn %ncc, 3f 1443 andncc %o5, LOFAULT_SET, %o5 14442: 1445 ! 1446 ! Old handler was zero. Just return the error. 1447 ! 1448 retl ! return 1449 mov %g1, %o0 ! error code from %g1 14503: 1451 ! 1452 ! We're here because %o5 was non-zero. It was non-zero 1453 ! because either LOFAULT_SET was present, a previous fault 1454 ! handler was present or both. In all cases we need to reset 1455 ! T_LOFAULT to the value of %o5 after clearing LOFAULT_SET 1456 ! before we either simply return the error or we invoke the 1457 ! previously specified handler. 1458 ! 1459 be %ncc, 2b 1460 stn %o5, [THREAD_REG + T_LOFAULT] 1461 jmp %o5 ! goto real handler 1462 nop 1463 SET_SIZE(kzero) 1464 SET_SIZE(uzero) 1465 1466#endif /* lint */ 1467 1468/* 1469 * Zero a block of storage. 1470 */ 1471 1472#if defined(lint) 1473 1474/* ARGSUSED */ 1475void 1476bzero(void *addr, size_t count) 1477{} 1478 1479#else /* lint */ 1480 1481 ENTRY(bzero) 1482 wr %g0, ASI_P, %asi 1483 1484 ldn [THREAD_REG + T_LOFAULT], %o5 ! save old vector 1485 tst %o5 1486 bz,pt %ncc, .do_zero 1487 sethi %hi(.zeroerr), %o2 1488 or %o2, %lo(.zeroerr), %o2 1489 membar #Sync ! sync error barrier 1490 stn %o2, [THREAD_REG + T_LOFAULT] ! install new vector 1491 1492.do_zero: 1493 cmp %o1, 7 1494 blu,pn %ncc, .byteclr 1495 nop 1496 1497 cmp %o1, 15 1498 blu,pn %ncc, .wdalign 1499 nop 1500 1501 andcc %o0, 7, %o3 ! is add aligned on a 8 byte bound 1502 bz,pt %ncc, .blkalign ! already double aligned 1503 sub %o3, 8, %o3 ! -(bytes till double aligned) 1504 add %o1, %o3, %o1 ! update o1 with new count 1505 15061: 1507 stba %g0, [%o0]%asi 1508 inccc %o3 1509 bl,pt %ncc, 1b 1510 inc %o0 1511 1512 ! Now address is double aligned 1513.blkalign: 1514 cmp %o1, 0x80 ! check if there are 128 bytes to set 1515 blu,pn %ncc, .bzero_small 1516 mov %o1, %o3 1517 1518 andcc %o0, 0x3f, %o3 ! is block aligned? 1519 bz,pt %ncc, .bzero_blk 1520 sub %o3, 0x40, %o3 ! -(bytes till block aligned) 1521 add %o1, %o3, %o1 ! o1 is the remainder 1522 1523 ! Clear -(%o3) bytes till block aligned 15241: 1525 stxa %g0, [%o0]%asi 1526 addcc %o3, 8, %o3 1527 bl,pt %ncc, 1b 1528 add %o0, 8, %o0 1529 1530.bzero_blk: 1531 and %o1, 0x3f, %o3 ! calc bytes left after blk clear 1532 andn %o1, 0x3f, %o4 ! calc size of blocks in bytes 1533 1534 cmp %o4, 0x100 ! 256 bytes or more 1535 blu,pn %ncc, 3f 1536 nop 1537 15382: 1539 stxa %g0, [%o0+0x0]%asi 1540 stxa %g0, [%o0+0x40]%asi 1541 stxa %g0, [%o0+0x80]%asi 1542 stxa %g0, [%o0+0xc0]%asi 1543 1544 stxa %g0, [%o0+0x8]%asi 1545 stxa %g0, [%o0+0x10]%asi 1546 stxa %g0, [%o0+0x18]%asi 1547 stxa %g0, [%o0+0x20]%asi 1548 stxa %g0, [%o0+0x28]%asi 1549 stxa %g0, [%o0+0x30]%asi 1550 stxa %g0, [%o0+0x38]%asi 1551 1552 stxa %g0, [%o0+0x48]%asi 1553 stxa %g0, [%o0+0x50]%asi 1554 stxa %g0, [%o0+0x58]%asi 1555 stxa %g0, [%o0+0x60]%asi 1556 stxa %g0, [%o0+0x68]%asi 1557 stxa %g0, [%o0+0x70]%asi 1558 stxa %g0, [%o0+0x78]%asi 1559 1560 stxa %g0, [%o0+0x88]%asi 1561 stxa %g0, [%o0+0x90]%asi 1562 stxa %g0, [%o0+0x98]%asi 1563 stxa %g0, [%o0+0xa0]%asi 1564 stxa %g0, [%o0+0xa8]%asi 1565 stxa %g0, [%o0+0xb0]%asi 1566 stxa %g0, [%o0+0xb8]%asi 1567 1568 stxa %g0, [%o0+0xc8]%asi 1569 stxa %g0, [%o0+0xd0]%asi 1570 stxa %g0, [%o0+0xd8]%asi 1571 stxa %g0, [%o0+0xe0]%asi 1572 stxa %g0, [%o0+0xe8]%asi 1573 stxa %g0, [%o0+0xf0]%asi 1574 stxa %g0, [%o0+0xf8]%asi 1575 1576 sub %o4, 0x100, %o4 1577 cmp %o4, 0x100 1578 bgu,pt %ncc, 2b 1579 add %o0, 0x100, %o0 1580 15813: 1582 ! ... check if 64 bytes to set 1583 cmp %o4, 0x40 1584 blu %ncc, .bzero_blk_done 1585 nop 1586 15874: 1588 stxa %g0, [%o0+0x0]%asi 1589 stxa %g0, [%o0+0x8]%asi 1590 stxa %g0, [%o0+0x10]%asi 1591 stxa %g0, [%o0+0x18]%asi 1592 stxa %g0, [%o0+0x20]%asi 1593 stxa %g0, [%o0+0x28]%asi 1594 stxa %g0, [%o0+0x30]%asi 1595 stxa %g0, [%o0+0x38]%asi 1596 1597 subcc %o4, 0x40, %o4 1598 bgu,pt %ncc, 3b 1599 add %o0, 0x40, %o0 1600 1601.bzero_blk_done: 1602 membar #Sync 1603 1604.bzero_small: 1605 ! Set the remaining doubles 1606 subcc %o3, 8, %o3 ! Can we store any doubles? 1607 blu,pn %ncc, .byteclr 1608 and %o1, 7, %o1 ! calc bytes left after doubles 1609 1610.dbclr: 1611 stxa %g0, [%o0]%asi ! Clear the doubles 1612 subcc %o3, 8, %o3 1613 bgeu,pt %ncc, .dbclr 1614 add %o0, 8, %o0 1615 1616 ba .byteclr 1617 nop 1618 1619.wdalign: 1620 andcc %o0, 3, %o3 ! is add aligned on a word boundary 1621 bz,pn %ncc, .wdclr 1622 andn %o1, 3, %o3 ! create word sized count in %o3 1623 1624 dec %o1 ! decrement count 1625 stba %g0, [%o0]%asi ! clear a byte 1626 ba .wdalign 1627 inc %o0 ! next byte 1628 1629.wdclr: 1630 sta %g0, [%o0]%asi ! 4-byte clearing loop 1631 subcc %o3, 4, %o3 1632 bnz,pt %ncc, .wdclr 1633 inc 4, %o0 1634 1635 and %o1, 3, %o1 ! leftover count, if any 1636 1637.byteclr: 1638 ! Set the leftover bytes 1639 brz %o1, .bzero_exit 1640 nop 1641 16427: 1643 deccc %o1 ! byte clearing loop 1644 stba %g0, [%o0]%asi 1645 bgu,pt %ncc, 7b 1646 inc %o0 1647 1648.bzero_exit: 1649 ! 1650 ! We're just concerned with whether t_lofault was set 1651 ! when we came in. We end up here from either kzero() 1652 ! or bzero(). kzero() *always* sets a lofault handler. 1653 ! It ors LOFAULT_SET into %o5 to indicate it has done 1654 ! this even if the value of %o5 is otherwise zero. 1655 ! bzero() sets a lofault handler *only* if one was 1656 ! previously set. Accordingly we need to examine 1657 ! %o5 and if it is non-zero be sure to clear LOFAULT_SET 1658 ! before resetting the error handler. 1659 ! 1660 tst %o5 1661 bz %ncc, 1f 1662 andn %o5, LOFAULT_SET, %o5 1663 membar #Sync ! sync error barrier 1664 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 16651: 1666 retl 1667 clr %o0 ! return (0) 1668 1669 SET_SIZE(bzero) 1670#endif /* lint */ 1671