1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22/* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27#pragma ident "%Z%%M% %I% %E% SMI" 28 29#include <sys/param.h> 30#include <sys/errno.h> 31#include <sys/asm_linkage.h> 32#include <sys/vtrace.h> 33#include <sys/machthread.h> 34#include <sys/clock.h> 35#include <sys/asi.h> 36#include <sys/fsr.h> 37#include <sys/privregs.h> 38#include <sys/machasi.h> 39#include <sys/niagaraasi.h> 40 41#if !defined(lint) 42#include "assym.h" 43#endif /* lint */ 44 45 46/* 47 * Pseudo-code to aid in understanding the control flow of the 48 * bcopy/kcopy routine. 49 * 50 * ! WARNING : <Register usage convention> 51 * ! In kcopy() the %o5, holds previous error handler and a flag 52 * ! LOFAULT_SET (low bits). The %o5 is null in bcopy(). 53 * ! The %o5 is not available for any other use. 54 * 55 * kcopy(): 56 * %o5 = curthread->t_lofault; ! save existing handler in %o5 57 * %o5 |= LOFAULT_SET; ! ORed with LOFAULT_SET flag 58 * curthread->t_lofault = .copyerr; 59 * Call bcopy(); 60 * 61 * bcopy(): 62 * if (length < 128) 63 * goto regular_copy; 64 * 65 * if (!use_hw_bcopy) 66 * goto regular_copy; 67 * 68 * blockcopy; 69 * restore t_lofault handler if came from kcopy(); 70 * 71 * regular_copy; 72 * restore t_lofault handler if came from kcopy(); 73 * 74 * In lofault handler: 75 * curthread->t_lofault = (%o5 & ~LOFAULT_SET); ! restore old t_lofault 76 * return (errno) 77 * 78 */ 79 80/* 81 * Less then or equal this number of bytes we will always copy byte-for-byte 82 */ 83#define SMALL_LIMIT 7 84 85/* 86 * LOFAULT_SET : Flag set by kzero and kcopy to indicate that t_lofault 87 * handler was set 88 */ 89#define LOFAULT_SET 2 90 91/* 92 * This define is to align data for the unaligned source cases. 93 * The data1, data2 and data3 is merged into data1 and data2. 94 * The data3 is preserved for next merge. 95 */ 96#define ALIGN_DATA(data1, data2, data3, lshift, rshift, tmp) \ 97 sllx data1, lshift, data1 ;\ 98 srlx data2, rshift, tmp ;\ 99 or data1, tmp, data1 ;\ 100 sllx data2, lshift, data2 ;\ 101 srlx data3, rshift, tmp ;\ 102 or data2, tmp, data2 103/* 104 * This macro is to align the data. Basically it merges 105 * data1 and data2 to form double word. 106 */ 107#define ALIGN_DATA_EW(data1, data2, lshift, rshift, tmp) \ 108 sllx data1, lshift, data1 ;\ 109 srlx data2, rshift, tmp ;\ 110 or data1, tmp, data1 111 112/* 113 * Copy a block of storage, returning an error code if `from' or 114 * `to' takes a kernel pagefault which cannot be resolved. 115 * Returns errno value on pagefault error, 0 if all ok 116 */ 117 118 119 120#if defined(lint) 121 122/* ARGSUSED */ 123int 124kcopy(const void *from, void *to, size_t count) 125{ return(0); } 126 127#else /* lint */ 128 129 .seg ".text" 130 .align 4 131 132 ENTRY(kcopy) 133 134 save %sp, -SA(MINFRAME), %sp 135 set .copyerr, %l7 ! copyerr is lofault value 136 ldn [THREAD_REG + T_LOFAULT], %o5 ! save existing handler 137 or %o5, LOFAULT_SET, %o5 138 membar #Sync ! sync error barrier 139 b .do_copy ! common code 140 stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault 141 142/* 143 * We got here because of a fault during kcopy. 144 * Errno value is in %g1. 145 */ 146.copyerr: 147 ! The kcopy() *always* sets a t_lofault handler and it ORs LOFAULT_SET 148 ! into %o5 to indicate it has set t_lofault handler. Need to clear 149 ! LOFAULT_SET flag before restoring the error handler. 150 andn %o5, LOFAULT_SET, %o5 151 membar #Sync ! sync error barrier 152 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 153 ret 154 restore %g1, 0, %o0 155 156 SET_SIZE(kcopy) 157#endif /* lint */ 158 159 160/* 161 * Copy a block of storage - must not overlap (from + len <= to). 162 */ 163#if defined(lint) 164 165/* ARGSUSED */ 166void 167bcopy(const void *from, void *to, size_t count) 168{} 169 170#else /* lint */ 171 172 ENTRY(bcopy) 173 174 save %sp, -SA(MINFRAME), %sp 175 clr %o5 ! flag LOFAULT_SET is not set for bcopy 176 177.do_copy: 178 cmp %i2, 12 ! for small counts 179 blu %ncc, .bytecp ! just copy bytes 180 .empty 181 182 cmp %i2, 128 ! for less than 128 bytes 183 blu,pn %ncc, .bcb_punt ! no block st/quad ld 184 nop 185 186 set use_hw_bcopy, %o2 187 ld [%o2], %o2 188 tst %o2 189 bz .bcb_punt 190 nop 191 192 subcc %i1, %i0, %i3 193 bneg,a,pn %ncc, 1f 194 neg %i3 1951: 196 /* 197 * Compare against 256 since we should be checking block addresses 198 * and (dest & ~63) - (src & ~63) can be 3 blocks even if 199 * src = dest + (64 * 3) + 63. 200 */ 201 cmp %i3, 256 202 blu,pn %ncc, .bcb_punt 203 nop 204 205 /* 206 * Copy that reach here have at least 2 blocks of data to copy. 207 */ 208.do_blockcopy: 209 ! Swap src/dst since the code below is memcpy code 210 ! and memcpy/bcopy have different calling sequences 211 mov %i1, %i5 212 mov %i0, %i1 213 mov %i5, %i0 214 215 andcc %i0, 0x3f, %i3 ! is dst aligned on a 64 bytes 216 bz %xcc, .chksrc ! dst is already double aligned 217 sub %i3, 0x40, %i3 218 neg %i3 ! bytes till dst 64 bytes aligned 219 sub %i2, %i3, %i2 ! update i2 with new count 220 2211: ldub [%i1], %i4 222 stb %i4, [%i0] 223 inc %i1 224 deccc %i3 225 bgu %xcc, 1b 226 inc %i0 227 228 ! Now Destination is block (64 bytes) aligned 229.chksrc: 230 andn %i2, 0x3f, %i3 ! %i3 count is multiple of block size 231 sub %i2, %i3, %i2 ! Residue bytes in %i2 232 233 mov ASI_BLK_INIT_ST_QUAD_LDD_P, %asi 234 235 andcc %i1, 0xf, %o2 ! is src quadword aligned 236 bz,pn %xcc, .blkcpy ! src offset in %o2 237 nop 238 cmp %o2, 0x8 239 bg .cpy_upper_double 240 nop 241 bl .cpy_lower_double 242 nop 243 244 ! Falls through when source offset is equal to 8 i.e. 245 ! source is double word aligned. 246 ! In this case no shift/merge of data is required 247 sub %i1, %o2, %i1 ! align the src at 16 bytes. 248 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 249 prefetch [%l0+0x0], #one_read 250 ldda [%i1+0x0]%asi, %l2 251loop0: 252 ldda [%i1+0x10]%asi, %l4 253 prefetch [%l0+0x40], #one_read 254 255 stxa %l3, [%i0+0x0]%asi 256 stxa %l4, [%i0+0x8]%asi 257 258 ldda [%i1+0x20]%asi, %l2 259 stxa %l5, [%i0+0x10]%asi 260 stxa %l2, [%i0+0x18]%asi 261 262 ldda [%i1+0x30]%asi, %l4 263 stxa %l3, [%i0+0x20]%asi 264 stxa %l4, [%i0+0x28]%asi 265 266 ldda [%i1+0x40]%asi, %l2 267 stxa %l5, [%i0+0x30]%asi 268 stxa %l2, [%i0+0x38]%asi 269 270 add %l0, 0x40, %l0 271 add %i1, 0x40, %i1 272 subcc %i3, 0x40, %i3 273 bgu,pt %xcc, loop0 274 add %i0, 0x40, %i0 275 ba .blkdone 276 add %i1, %o2, %i1 ! increment the source by src offset 277 ! the src offset was stored in %o2 278 279.cpy_lower_double: 280 sub %i1, %o2, %i1 ! align the src at 16 bytes. 281 sll %o2, 3, %o0 ! %o0 left shift 282 mov 0x40, %o1 283 sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 284 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 285 prefetch [%l0+0x0], #one_read 286 ldda [%i1+0x0]%asi, %l2 ! partial data in %l2 and %l3 has 287 ! complete data 288loop1: 289 ldda [%i1+0x10]%asi, %l4 ! %l4 has partial data for this read. 290 ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) ! merge %l2, %l3 and %l4 291 ! into %l2 and %l3 292 prefetch [%l0+0x40], #one_read 293 stxa %l2, [%i0+0x0]%asi 294 stxa %l3, [%i0+0x8]%asi 295 296 ldda [%i1+0x20]%asi, %l2 297 ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) ! merge %l2 with %l5 and 298 stxa %l4, [%i0+0x10]%asi ! %l4 from previous read 299 stxa %l5, [%i0+0x18]%asi ! into %l4 and %l5 300 301 ! Repeat the same for next 32 bytes. 302 303 ldda [%i1+0x30]%asi, %l4 304 ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) 305 stxa %l2, [%i0+0x20]%asi 306 stxa %l3, [%i0+0x28]%asi 307 308 ldda [%i1+0x40]%asi, %l2 309 ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) 310 stxa %l4, [%i0+0x30]%asi 311 stxa %l5, [%i0+0x38]%asi 312 313 add %l0, 0x40, %l0 314 add %i1, 0x40, %i1 315 subcc %i3, 0x40, %i3 316 bgu,pt %xcc, loop1 317 add %i0, 0x40, %i0 318 ba .blkdone 319 add %i1, %o2, %i1 ! increment the source by src offset 320 ! the src offset was stored in %o2 321 322.cpy_upper_double: 323 sub %i1, %o2, %i1 ! align the src at 16 bytes. 324 mov 0x8, %o0 325 sub %o2, %o0, %o0 326 sll %o0, 3, %o0 ! %o0 left shift 327 mov 0x40, %o1 328 sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 329 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 330 prefetch [%l0+0x0], #one_read 331 ldda [%i1+0x0]%asi, %l2 ! partial data in %l3 for this read and 332 ! no data in %l2 333loop2: 334 ldda [%i1+0x10]%asi, %l4 ! %l4 has complete data and %l5 has 335 ! partial 336 ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) ! merge %l3, %l4 and %l5 337 ! into %l3 and %l4 338 prefetch [%l0+0x40], #one_read 339 stxa %l3, [%i0+0x0]%asi 340 stxa %l4, [%i0+0x8]%asi 341 342 ldda [%i1+0x20]%asi, %l2 343 ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) ! merge %l2 and %l3 with 344 stxa %l5, [%i0+0x10]%asi ! %l5 from previous read 345 stxa %l2, [%i0+0x18]%asi ! into %l5 and %l2 346 347 ! Repeat the same for next 32 bytes. 348 349 ldda [%i1+0x30]%asi, %l4 350 ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) 351 stxa %l3, [%i0+0x20]%asi 352 stxa %l4, [%i0+0x28]%asi 353 354 ldda [%i1+0x40]%asi, %l2 355 ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) 356 stxa %l5, [%i0+0x30]%asi 357 stxa %l2, [%i0+0x38]%asi 358 359 add %l0, 0x40, %l0 360 add %i1, 0x40, %i1 361 subcc %i3, 0x40, %i3 362 bgu,pt %xcc, loop2 363 add %i0, 0x40, %i0 364 ba .blkdone 365 add %i1, %o2, %i1 ! increment the source by src offset 366 ! the src offset was stored in %o2 367 368 369 ! Both Source and Destination are block aligned. 370 ! Do fast copy using ASI_BLK_INIT_ST_QUAD_LDD_P 371.blkcpy: 372 prefetch [%i1+0x0], #one_read 3731: 374 ldda [%i1+0x0]%asi, %l0 375 ldda [%i1+0x10]%asi, %l2 376 prefetch [%i1+0x40], #one_read 377 378 stxa %l0, [%i0+0x0]%asi 379 ldda [%i1+0x20]%asi, %l4 380 ldda [%i1+0x30]%asi, %l6 381 382 stxa %l1, [%i0+0x8]%asi 383 stxa %l2, [%i0+0x10]%asi 384 stxa %l3, [%i0+0x18]%asi 385 stxa %l4, [%i0+0x20]%asi 386 stxa %l5, [%i0+0x28]%asi 387 stxa %l6, [%i0+0x30]%asi 388 stxa %l7, [%i0+0x38]%asi 389 390 add %i1, 0x40, %i1 391 subcc %i3, 0x40, %i3 392 bgu,pt %xcc, 1b 393 add %i0, 0x40, %i0 394 395.blkdone: 396 tst %i2 397 bz,pt %xcc, .blkexit 398 nop 399 400.residue: 401 ldub [%i1], %i4 402 stb %i4, [%i0] 403 inc %i1 404 deccc %i2 405 bgu %xcc, .residue 406 inc %i0 407 408.blkexit: 409 membar #Sync ! sync error barrier 410 ! Restore t_lofault handler, if came here from kcopy(). 411 tst %o5 412 bz %ncc, 1f 413 andn %o5, LOFAULT_SET, %o5 414 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 4151: 416 ret 417 restore %g0, 0, %o0 418 419.bcb_punt: 420 ! 421 ! use aligned transfers where possible 422 ! 423 xor %i0, %i1, %o4 ! xor from and to address 424 btst 7, %o4 ! if lower three bits zero 425 bz .aldoubcp ! can align on double boundary 426 .empty ! assembler complaints about label 427 428 xor %i0, %i1, %o4 ! xor from and to address 429 btst 3, %o4 ! if lower two bits zero 430 bz .alwordcp ! can align on word boundary 431 btst 3, %i0 ! delay slot, from address unaligned? 432 ! 433 ! use aligned reads and writes where possible 434 ! this differs from wordcp in that it copes 435 ! with odd alignment between source and destnation 436 ! using word reads and writes with the proper shifts 437 ! in between to align transfers to and from memory 438 ! i0 - src address, i1 - dest address, i2 - count 439 ! i3, i4 - tmps for used generating complete word 440 ! i5 (word to write) 441 ! l0 size in bits of upper part of source word (US) 442 ! l1 size in bits of lower part of source word (LS = 32 - US) 443 ! l2 size in bits of upper part of destination word (UD) 444 ! l3 size in bits of lower part of destination word (LD = 32 - UD) 445 ! l4 number of bytes leftover after aligned transfers complete 446 ! l5 the number 32 447 ! 448 mov 32, %l5 ! load an oft-needed constant 449 bz .align_dst_only 450 btst 3, %i1 ! is destnation address aligned? 451 clr %i4 ! clear registers used in either case 452 bz .align_src_only 453 clr %l0 454 ! 455 ! both source and destination addresses are unaligned 456 ! 4571: ! align source 458 ldub [%i0], %i3 ! read a byte from source address 459 add %i0, 1, %i0 ! increment source address 460 or %i4, %i3, %i4 ! or in with previous bytes (if any) 461 btst 3, %i0 ! is source aligned? 462 add %l0, 8, %l0 ! increment size of upper source (US) 463 bnz,a 1b 464 sll %i4, 8, %i4 ! make room for next byte 465 466 sub %l5, %l0, %l1 ! generate shift left count (LS) 467 sll %i4, %l1, %i4 ! prepare to get rest 468 ld [%i0], %i3 ! read a word 469 add %i0, 4, %i0 ! increment source address 470 srl %i3, %l0, %i5 ! upper src bits into lower dst bits 471 or %i4, %i5, %i5 ! merge 472 mov 24, %l3 ! align destination 4731: 474 srl %i5, %l3, %i4 ! prepare to write a single byte 475 stb %i4, [%i1] ! write a byte 476 add %i1, 1, %i1 ! increment destination address 477 sub %i2, 1, %i2 ! decrement count 478 btst 3, %i1 ! is destination aligned? 479 bnz,a 1b 480 sub %l3, 8, %l3 ! delay slot, decrement shift count (LD) 481 sub %l5, %l3, %l2 ! generate shift left count (UD) 482 sll %i5, %l2, %i5 ! move leftover into upper bytes 483 cmp %l2, %l0 ! cmp # reqd to fill dst w old src left 484 bgu %ncc, .more_needed ! need more to fill than we have 485 nop 486 487 sll %i3, %l1, %i3 ! clear upper used byte(s) 488 srl %i3, %l1, %i3 489 ! get the odd bytes between alignments 490 sub %l0, %l2, %l0 ! regenerate shift count 491 sub %l5, %l0, %l1 ! generate new shift left count (LS) 492 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0 493 andn %i2, 3, %i2 ! # of aligned bytes that can be moved 494 srl %i3, %l0, %i4 495 or %i5, %i4, %i5 496 st %i5, [%i1] ! write a word 497 subcc %i2, 4, %i2 ! decrement count 498 bz %ncc, .unalign_out 499 add %i1, 4, %i1 ! increment destination address 500 501 b 2f 502 sll %i3, %l1, %i5 ! get leftover into upper bits 503.more_needed: 504 sll %i3, %l0, %i3 ! save remaining byte(s) 505 srl %i3, %l0, %i3 506 sub %l2, %l0, %l1 ! regenerate shift count 507 sub %l5, %l1, %l0 ! generate new shift left count 508 sll %i3, %l1, %i4 ! move to fill empty space 509 b 3f 510 or %i5, %i4, %i5 ! merge to complete word 511 ! 512 ! the source address is aligned and destination is not 513 ! 514.align_dst_only: 515 ld [%i0], %i4 ! read a word 516 add %i0, 4, %i0 ! increment source address 517 mov 24, %l0 ! initial shift alignment count 5181: 519 srl %i4, %l0, %i3 ! prepare to write a single byte 520 stb %i3, [%i1] ! write a byte 521 add %i1, 1, %i1 ! increment destination address 522 sub %i2, 1, %i2 ! decrement count 523 btst 3, %i1 ! is destination aligned? 524 bnz,a 1b 525 sub %l0, 8, %l0 ! delay slot, decrement shift count 526.xfer: 527 sub %l5, %l0, %l1 ! generate shift left count 528 sll %i4, %l1, %i5 ! get leftover 5293: 530 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0 531 andn %i2, 3, %i2 ! # of aligned bytes that can be moved 5322: 533 ld [%i0], %i3 ! read a source word 534 add %i0, 4, %i0 ! increment source address 535 srl %i3, %l0, %i4 ! upper src bits into lower dst bits 536 or %i5, %i4, %i5 ! merge with upper dest bits (leftover) 537 st %i5, [%i1] ! write a destination word 538 subcc %i2, 4, %i2 ! decrement count 539 bz %ncc, .unalign_out ! check if done 540 add %i1, 4, %i1 ! increment destination address 541 b 2b ! loop 542 sll %i3, %l1, %i5 ! get leftover 543.unalign_out: 544 tst %l4 ! any bytes leftover? 545 bz %ncc, .cpdone 546 .empty ! allow next instruction in delay slot 5471: 548 sub %l0, 8, %l0 ! decrement shift 549 srl %i3, %l0, %i4 ! upper src byte into lower dst byte 550 stb %i4, [%i1] ! write a byte 551 subcc %l4, 1, %l4 ! decrement count 552 bz %ncc, .cpdone ! done? 553 add %i1, 1, %i1 ! increment destination 554 tst %l0 ! any more previously read bytes 555 bnz %ncc, 1b ! we have leftover bytes 556 mov %l4, %i2 ! delay slot, mv cnt where dbytecp wants 557 b .dbytecp ! let dbytecp do the rest 558 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 559 ! 560 ! the destination address is aligned and the source is not 561 ! 562.align_src_only: 563 ldub [%i0], %i3 ! read a byte from source address 564 add %i0, 1, %i0 ! increment source address 565 or %i4, %i3, %i4 ! or in with previous bytes (if any) 566 btst 3, %i0 ! is source aligned? 567 add %l0, 8, %l0 ! increment shift count (US) 568 bnz,a .align_src_only 569 sll %i4, 8, %i4 ! make room for next byte 570 b,a .xfer 571 ! 572 ! if from address unaligned for double-word moves, 573 ! move bytes till it is, if count is < 56 it could take 574 ! longer to align the thing than to do the transfer 575 ! in word size chunks right away 576 ! 577.aldoubcp: 578 cmp %i2, 56 ! if count < 56, use wordcp, it takes 579 blu,a %ncc, .alwordcp ! longer to align doubles than words 580 mov 3, %o0 ! mask for word alignment 581 call .alignit ! copy bytes until aligned 582 mov 7, %o0 ! mask for double alignment 583 ! 584 ! source and destination are now double-word aligned 585 ! i3 has aligned count returned by alignit 586 ! 587 and %i2, 7, %i2 ! unaligned leftover count 588 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 5895: 590 ldx [%i0+%i1], %o4 ! read from address 591 stx %o4, [%i1] ! write at destination address 592 subcc %i3, 8, %i3 ! dec count 593 bgu %ncc, 5b 594 add %i1, 8, %i1 ! delay slot, inc to address 595 cmp %i2, 4 ! see if we can copy a word 596 blu %ncc, .dbytecp ! if 3 or less bytes use bytecp 597 .empty 598 ! 599 ! for leftover bytes we fall into wordcp, if needed 600 ! 601.wordcp: 602 and %i2, 3, %i2 ! unaligned leftover count 6035: 604 ld [%i0+%i1], %o4 ! read from address 605 st %o4, [%i1] ! write at destination address 606 subcc %i3, 4, %i3 ! dec count 607 bgu %ncc, 5b 608 add %i1, 4, %i1 ! delay slot, inc to address 609 b,a .dbytecp 610 611 ! we come here to align copies on word boundaries 612.alwordcp: 613 call .alignit ! go word-align it 614 mov 3, %o0 ! bits that must be zero to be aligned 615 b .wordcp 616 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 617 618 ! 619 ! byte copy, works with any alignment 620 ! 621.bytecp: 622 b .dbytecp 623 sub %i0, %i1, %i0 ! i0 gets difference of src and dst 624 625 ! 626 ! differenced byte copy, works with any alignment 627 ! assumes dest in %i1 and (source - dest) in %i0 628 ! 6291: 630 stb %o4, [%i1] ! write to address 631 inc %i1 ! inc to address 632.dbytecp: 633 deccc %i2 ! dec count 634 bgeu,a %ncc, 1b ! loop till done 635 ldub [%i0+%i1], %o4 ! read from address 636.cpdone: 637 membar #Sync ! sync error barrier 638 ! Restore t_lofault handler, if came here from kcopy(). 639 tst %o5 640 bz %ncc, 1f 641 andn %o5, LOFAULT_SET, %o5 642 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 6431: 644 ret 645 restore %g0, 0, %o0 ! return (0) 646 647/* 648 * Common code used to align transfers on word and doubleword 649 * boudaries. Aligns source and destination and returns a count 650 * of aligned bytes to transfer in %i3 651 */ 6521: 653 inc %i0 ! inc from 654 stb %o4, [%i1] ! write a byte 655 inc %i1 ! inc to 656 dec %i2 ! dec count 657.alignit: 658 btst %o0, %i0 ! %o0 is bit mask to check for alignment 659 bnz,a 1b 660 ldub [%i0], %o4 ! read next byte 661 662 retl 663 andn %i2, %o0, %i3 ! return size of aligned bytes 664 SET_SIZE(bcopy) 665 666#endif /* lint */ 667 668/* 669 * Block copy with possibly overlapped operands. 670 */ 671 672#if defined(lint) 673 674/*ARGSUSED*/ 675void 676ovbcopy(const void *from, void *to, size_t count) 677{} 678 679#else /* lint */ 680 681 ENTRY(ovbcopy) 682 tst %o2 ! check count 683 bgu,a %ncc, 1f ! nothing to do or bad arguments 684 subcc %o0, %o1, %o3 ! difference of from and to address 685 686 retl ! return 687 nop 6881: 689 bneg,a %ncc, 2f 690 neg %o3 ! if < 0, make it positive 6912: cmp %o2, %o3 ! cmp size and abs(from - to) 692 bleu %ncc, bcopy ! if size <= abs(diff): use bcopy, 693 .empty ! no overlap 694 cmp %o0, %o1 ! compare from and to addresses 695 blu %ncc, .ov_bkwd ! if from < to, copy backwards 696 nop 697 ! 698 ! Copy forwards. 699 ! 700.ov_fwd: 701 ldub [%o0], %o3 ! read from address 702 inc %o0 ! inc from address 703 stb %o3, [%o1] ! write to address 704 deccc %o2 ! dec count 705 bgu %ncc, .ov_fwd ! loop till done 706 inc %o1 ! inc to address 707 708 retl ! return 709 nop 710 ! 711 ! Copy backwards. 712 ! 713.ov_bkwd: 714 deccc %o2 ! dec count 715 ldub [%o0 + %o2], %o3 ! get byte at end of src 716 bgu %ncc, .ov_bkwd ! loop till done 717 stb %o3, [%o1 + %o2] ! delay slot, store at end of dst 718 719 retl ! return 720 nop 721 SET_SIZE(ovbcopy) 722 723#endif /* lint */ 724 725/* 726 * hwblkpagecopy() 727 * 728 * Copies exactly one page. This routine assumes the caller (ppcopy) 729 * has already disabled kernel preemption and has checked 730 * use_hw_bcopy. 731 */ 732#ifdef lint 733/*ARGSUSED*/ 734void 735hwblkpagecopy(const void *src, void *dst) 736{ } 737#else /* lint */ 738 ENTRY(hwblkpagecopy) 739 save %sp, -SA(MINFRAME + 4*64), %sp 740 741 ! %i0 - source address (arg) 742 ! %i1 - destination address (arg) 743 ! %i2 - length of region (not arg) 744 745 set PAGESIZE, %i2 746 747 /* 748 * Copying exactly one page and PAGESIZE is in mutliple of 0x80. 749 */ 750 mov ASI_BLK_INIT_ST_QUAD_LDD_P, %asi 751 prefetch [%i0+0x0], #one_read 752 prefetch [%i0+0x40], #one_read 7531: 754 prefetch [%i0+0x80], #one_read 755 prefetch [%i0+0xc0], #one_read 756 ldda [%i0+0x0]%asi, %l0 757 ldda [%i0+0x10]%asi, %l2 758 ldda [%i0+0x20]%asi, %l4 759 ldda [%i0+0x30]%asi, %l6 760 stxa %l0, [%i1+0x0]%asi 761 stxa %l1, [%i1+0x8]%asi 762 stxa %l2, [%i1+0x10]%asi 763 stxa %l3, [%i1+0x18]%asi 764 stxa %l4, [%i1+0x20]%asi 765 stxa %l5, [%i1+0x28]%asi 766 stxa %l6, [%i1+0x30]%asi 767 stxa %l7, [%i1+0x38]%asi 768 ldda [%i0+0x40]%asi, %l0 769 ldda [%i0+0x50]%asi, %l2 770 ldda [%i0+0x60]%asi, %l4 771 ldda [%i0+0x70]%asi, %l6 772 stxa %l0, [%i1+0x40]%asi 773 stxa %l1, [%i1+0x48]%asi 774 stxa %l2, [%i1+0x50]%asi 775 stxa %l3, [%i1+0x58]%asi 776 stxa %l4, [%i1+0x60]%asi 777 stxa %l5, [%i1+0x68]%asi 778 stxa %l6, [%i1+0x70]%asi 779 stxa %l7, [%i1+0x78]%asi 780 781 add %i0, 0x80, %i0 782 subcc %i2, 0x80, %i2 783 bgu,pt %xcc, 1b 784 add %i1, 0x80, %i1 785 786 membar #Sync 787 ret 788 restore %g0, 0, %o0 789 SET_SIZE(hwblkpagecopy) 790#endif /* lint */ 791 792 793/* 794 * Transfer data to and from user space - 795 * Note that these routines can cause faults 796 * It is assumed that the kernel has nothing at 797 * less than KERNELBASE in the virtual address space. 798 * 799 * Note that copyin(9F) and copyout(9F) are part of the 800 * DDI/DKI which specifies that they return '-1' on "errors." 801 * 802 * Sigh. 803 * 804 * So there's two extremely similar routines - xcopyin() and xcopyout() 805 * which return the errno that we've faithfully computed. This 806 * allows other callers (e.g. uiomove(9F)) to work correctly. 807 * Given that these are used pretty heavily, we expand the calling 808 * sequences inline for all flavours (rather than making wrappers). 809 * 810 * There are also stub routines for xcopyout_little and xcopyin_little, 811 * which currently are intended to handle requests of <= 16 bytes from 812 * do_unaligned. Future enhancement to make them handle 8k pages efficiently 813 * is left as an exercise... 814 */ 815 816/* 817 * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr) 818 * 819 * General theory of operation: 820 * 821 * None of the copyops routines grab a window until it's decided that 822 * we need to do a HW block copy operation. This saves a window 823 * spill/fill when we're called during socket ops. The typical IO 824 * path won't cause spill/fill traps. 825 * 826 * This code uses a set of 4 limits for the maximum size that will 827 * be copied given a particular input/output address alignment. 828 * the default limits are: 829 * 830 * single byte aligned - 256 (hw_copy_limit_1) 831 * two byte aligned - 512 (hw_copy_limit_2) 832 * four byte aligned - 1024 (hw_copy_limit_4) 833 * eight byte aligned - 1024 (hw_copy_limit_8) 834 * 835 * If the value for a particular limit is zero, the copy will be done 836 * via the copy loops rather than block store/quad load instructions. 837 * 838 * Flow: 839 * 840 * If count == zero return zero. 841 * 842 * Store the previous lo_fault handler into %g6. 843 * Place our secondary lofault handler into %g5. 844 * Place the address of our nowindow fault handler into %o3. 845 * Place the address of the windowed fault handler into %o4. 846 * --> We'll use this handler if we end up grabbing a window 847 * --> before we use block initializing store and quad load ASIs 848 * 849 * If count is less than or equal to SMALL_LIMIT (7) we 850 * always do a byte for byte copy. 851 * 852 * If count is > SMALL_LIMIT, we check the alignment of the input 853 * and output pointers. Based on the alignment we check count 854 * against a limit based on detected alignment. If we exceed the 855 * alignment value we copy via block initializing store and quad 856 * load instructions. 857 * 858 * If we don't exceed one of the limits, we store -count in %o3, 859 * we store the number of chunks (8, 4, 2 or 1 byte) operated 860 * on in our basic copy loop in %o2. Following this we branch 861 * to the appropriate copy loop and copy that many chunks. 862 * Since we've been adding the chunk size to %o3 each time through 863 * as well as decrementing %o2, we can tell if any data is 864 * is left to be copied by examining %o3. If that is zero, we're 865 * done and can go home. If not, we figure out what the largest 866 * chunk size left to be copied is and branch to that copy loop 867 * unless there's only one byte left. We load that as we're 868 * branching to code that stores it just before we return. 869 * 870 * Fault handlers are invoked if we reference memory that has no 871 * current mapping. All forms share the same copyio_fault handler. 872 * This routine handles fixing up the stack and general housecleaning. 873 * Each copy operation has a simple fault handler that is then called 874 * to do the work specific to the invidual operation. The handler 875 * for copyOP and xcopyOP are found at the end of individual function. 876 * The handlers for xcopyOP_little are found at the end of xcopyin_little. 877 * The handlers for copyOP_noerr are found at the end of copyin_noerr. 878 */ 879 880/* 881 * Copy kernel data to user space (copyout/xcopyout/xcopyout_little). 882 */ 883 884#if defined(lint) 885 886/*ARGSUSED*/ 887int 888copyout(const void *kaddr, void *uaddr, size_t count) 889{ return (0); } 890 891#else /* lint */ 892 893/* 894 * We save the arguments in the following registers in case of a fault: 895 * kaddr - %g2 896 * uaddr - %g3 897 * count - %g4 898 */ 899#define SAVE_SRC %g2 900#define SAVE_DST %g3 901#define SAVE_COUNT %g4 902 903#define REAL_LOFAULT %g5 904#define SAVED_LOFAULT %g6 905 906/* 907 * Generic copyio fault handler. This is the first line of defense when a 908 * fault occurs in (x)copyin/(x)copyout. In order for this to function 909 * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT. 910 * This allows us to share common code for all the flavors of the copy 911 * operations, including the _noerr versions. 912 * 913 * Note that this function will restore the original input parameters before 914 * calling REAL_LOFAULT. So the real handler can vector to the appropriate 915 * member of the t_copyop structure, if needed. 916 */ 917 ENTRY(copyio_fault) 918 membar #Sync 919 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 920 921 restore 922 923 mov SAVE_SRC, %o0 924 mov SAVE_DST, %o1 925 jmp REAL_LOFAULT 926 mov SAVE_COUNT, %o2 927 SET_SIZE(copyio_fault) 928 929 ENTRY(copyio_fault_nowindow) 930 membar #Sync 931 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 932 933 mov SAVE_SRC, %o0 934 mov SAVE_DST, %o1 935 jmp REAL_LOFAULT 936 mov SAVE_COUNT, %o2 937 SET_SIZE(copyio_fault_nowindow) 938 939 ENTRY(copyout) 940 sethi %hi(.copyout_err), REAL_LOFAULT 941 or REAL_LOFAULT, %lo(.copyout_err), REAL_LOFAULT 942 943.do_copyout: 944 ! 945 ! Check the length and bail if zero. 946 ! 947 tst %o2 948 bnz,pt %ncc, 1f 949 nop 950 retl 951 clr %o0 9521: 953 sethi %hi(copyio_fault), %o4 954 or %o4, %lo(copyio_fault), %o4 955 sethi %hi(copyio_fault_nowindow), %o3 956 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT 957 or %o3, %lo(copyio_fault_nowindow), %o3 958 membar #Sync 959 stn %o3, [THREAD_REG + T_LOFAULT] 960 961 mov %o0, SAVE_SRC 962 mov %o1, SAVE_DST 963 mov %o2, SAVE_COUNT 964 965 ! 966 ! Check to see if we're more than SMALL_LIMIT (7 bytes). 967 ! Run in leaf mode, using the %o regs as our input regs. 968 ! 969 subcc %o2, SMALL_LIMIT, %o3 970 bgu,a,pt %ncc, .dco_ns 971 or %o0, %o1, %o3 972 ! 973 ! What was previously ".small_copyout" 974 ! Do full differenced copy. 975 ! 976.dcobcp: 977 sub %g0, %o2, %o3 ! negate count 978 add %o0, %o2, %o0 ! make %o0 point at the end 979 add %o1, %o2, %o1 ! make %o1 point at the end 980 ba,pt %ncc, .dcocl 981 ldub [%o0 + %o3], %o4 ! load first byte 982 ! 983 ! %o0 and %o2 point at the end and remain pointing at the end 984 ! of their buffers. We pull things out by adding %o3 (which is 985 ! the negation of the length) to the buffer end which gives us 986 ! the curent location in the buffers. By incrementing %o3 we walk 987 ! through both buffers without having to bump each buffer's 988 ! pointer. A very fast 4 instruction loop. 989 ! 990 .align 16 991.dcocl: 992 stba %o4, [%o1 + %o3]ASI_USER 993 inccc %o3 994 bl,a,pt %ncc, .dcocl 995 ldub [%o0 + %o3], %o4 996 ! 997 ! We're done. Go home. 998 ! 999 membar #Sync 1000 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 1001 retl 1002 clr %o0 1003 ! 1004 ! Try aligned copies from here. 1005 ! 1006.dco_ns: 1007 ! %o0 = kernel addr (to be copied from) 1008 ! %o1 = user addr (to be copied to) 1009 ! %o2 = length 1010 ! %o3 = %o1 | %o2 (used for alignment checking) 1011 ! %o4 is alternate lo_fault 1012 ! %o5 is original lo_fault 1013 ! 1014 ! See if we're single byte aligned. If we are, check the 1015 ! limit for single byte copies. If we're smaller or equal, 1016 ! bounce to the byte for byte copy loop. Otherwise do it in 1017 ! HW (if enabled). 1018 ! 1019 btst 1, %o3 1020 bz,pt %icc, .dcoh8 1021 btst 7, %o3 1022 ! 1023 ! Single byte aligned. Do we do it via HW or via 1024 ! byte for byte? Do a quick no memory reference 1025 ! check to pick up small copies. 1026 ! 1027 sethi %hi(hw_copy_limit_1), %o3 1028 ! 1029 ! Big enough that we need to check the HW limit for 1030 ! this size copy. 1031 ! 1032 ld [%o3 + %lo(hw_copy_limit_1)], %o3 1033 ! 1034 ! Is HW copy on? If not, do everything byte for byte. 1035 ! 1036 tst %o3 1037 bz,pn %icc, .dcobcp 1038 subcc %o3, %o2, %o3 1039 ! 1040 ! If we're less than or equal to the single byte copy limit, 1041 ! bop to the copy loop. 1042 ! 1043 bge,pt %ncc, .dcobcp 1044 nop 1045 ! 1046 ! We're big enough and copy is on. Do it with HW. 1047 ! 1048 ba,pt %ncc, .big_copyout 1049 nop 1050.dcoh8: 1051 ! 1052 ! 8 byte aligned? 1053 ! 1054 bnz,a %ncc, .dcoh4 1055 btst 3, %o3 1056 ! 1057 ! See if we're in the "small range". 1058 ! If so, go off and do the copy. 1059 ! If not, load the hard limit. %o3 is 1060 ! available for reuse. 1061 ! 1062 sethi %hi(hw_copy_limit_8), %o3 1063 ld [%o3 + %lo(hw_copy_limit_8)], %o3 1064 ! 1065 ! If it's zero, there's no HW bcopy. 1066 ! Bop off to the aligned copy. 1067 ! 1068 tst %o3 1069 bz,pn %icc, .dcos8 1070 subcc %o3, %o2, %o3 1071 ! 1072 ! We're negative if our size is larger than hw_copy_limit_8. 1073 ! 1074 bge,pt %ncc, .dcos8 1075 nop 1076 ! 1077 ! HW assist is on and we're large enough. Do it. 1078 ! 1079 ba,pt %ncc, .big_copyout 1080 nop 1081.dcos8: 1082 ! 1083 ! Housekeeping for copy loops. Uses same idea as in the byte for 1084 ! byte copy loop above. 1085 ! 1086 add %o0, %o2, %o0 1087 add %o1, %o2, %o1 1088 sub %g0, %o2, %o3 1089 ba,pt %ncc, .dodebc 1090 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy 1091 ! 1092 ! 4 byte aligned? 1093 ! 1094.dcoh4: 1095 bnz,pn %ncc, .dcoh2 1096 ! 1097 ! See if we're in the "small range". 1098 ! If so, go off an do the copy. 1099 ! If not, load the hard limit. %o3 is 1100 ! available for reuse. 1101 ! 1102 sethi %hi(hw_copy_limit_4), %o3 1103 ld [%o3 + %lo(hw_copy_limit_4)], %o3 1104 ! 1105 ! If it's zero, there's no HW bcopy. 1106 ! Bop off to the aligned copy. 1107 ! 1108 tst %o3 1109 bz,pn %icc, .dcos4 1110 subcc %o3, %o2, %o3 1111 ! 1112 ! We're negative if our size is larger than hw_copy_limit_4. 1113 ! 1114 bge,pt %ncc, .dcos4 1115 nop 1116 ! 1117 ! HW assist is on and we're large enough. Do it. 1118 ! 1119 ba,pt %ncc, .big_copyout 1120 nop 1121.dcos4: 1122 add %o0, %o2, %o0 1123 add %o1, %o2, %o1 1124 sub %g0, %o2, %o3 1125 ba,pt %ncc, .dodfbc 1126 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy 1127 ! 1128 ! We must be 2 byte aligned. Off we go. 1129 ! The check for small copies was done in the 1130 ! delay at .dcoh4 1131 ! 1132.dcoh2: 1133 ble %ncc, .dcos2 1134 sethi %hi(hw_copy_limit_2), %o3 1135 ld [%o3 + %lo(hw_copy_limit_2)], %o3 1136 tst %o3 1137 bz,pn %icc, .dcos2 1138 subcc %o3, %o2, %o3 1139 bge,pt %ncc, .dcos2 1140 nop 1141 ! 1142 ! HW is on and we're big enough. Do it. 1143 ! 1144 ba,pt %ncc, .big_copyout 1145 nop 1146.dcos2: 1147 add %o0, %o2, %o0 1148 add %o1, %o2, %o1 1149 sub %g0, %o2, %o3 1150 ba,pt %ncc, .dodtbc 1151 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy 1152.small_copyout: 1153 ! 1154 ! Why are we doing this AGAIN? There are certain conditions in 1155 ! big_copyout that will cause us to forego the HW assisted copies 1156 ! and bounce back to a non-HW assisted copy. This dispatches those 1157 ! copies. Note that we branch around this in the main line code. 1158 ! 1159 ! We make no check for limits or HW enablement here. We've 1160 ! already been told that we're a poster child so just go off 1161 ! and do it. 1162 ! 1163 or %o0, %o1, %o3 1164 btst 1, %o3 1165 bnz %icc, .dcobcp ! Most likely 1166 btst 7, %o3 1167 bz %icc, .dcos8 1168 btst 3, %o3 1169 bz %icc, .dcos4 1170 nop 1171 ba,pt %ncc, .dcos2 1172 nop 1173 .align 32 1174.dodebc: 1175 ldx [%o0 + %o3], %o4 1176 deccc %o2 1177 stxa %o4, [%o1 + %o3]ASI_USER 1178 bg,pt %ncc, .dodebc 1179 addcc %o3, 8, %o3 1180 ! 1181 ! End of copy loop. Check to see if we're done. Most 1182 ! eight byte aligned copies end here. 1183 ! 1184 bz,pt %ncc, .dcofh 1185 nop 1186 ! 1187 ! Something is left - do it byte for byte. 1188 ! 1189 ba,pt %ncc, .dcocl 1190 ldub [%o0 + %o3], %o4 ! load next byte 1191 ! 1192 ! Four byte copy loop. %o2 is the number of 4 byte chunks to copy. 1193 ! 1194 .align 32 1195.dodfbc: 1196 lduw [%o0 + %o3], %o4 1197 deccc %o2 1198 sta %o4, [%o1 + %o3]ASI_USER 1199 bg,pt %ncc, .dodfbc 1200 addcc %o3, 4, %o3 1201 ! 1202 ! End of copy loop. Check to see if we're done. Most 1203 ! four byte aligned copies end here. 1204 ! 1205 bz,pt %ncc, .dcofh 1206 nop 1207 ! 1208 ! Something is left. Do it byte for byte. 1209 ! 1210 ba,pt %ncc, .dcocl 1211 ldub [%o0 + %o3], %o4 ! load next byte 1212 ! 1213 ! two byte aligned copy loop. %o2 is the number of 2 byte chunks to 1214 ! copy. 1215 ! 1216 .align 32 1217.dodtbc: 1218 lduh [%o0 + %o3], %o4 1219 deccc %o2 1220 stha %o4, [%o1 + %o3]ASI_USER 1221 bg,pt %ncc, .dodtbc 1222 addcc %o3, 2, %o3 1223 ! 1224 ! End of copy loop. Anything left? 1225 ! 1226 bz,pt %ncc, .dcofh 1227 nop 1228 ! 1229 ! Deal with the last byte 1230 ! 1231 ldub [%o0 + %o3], %o4 1232 stba %o4, [%o1 + %o3]ASI_USER 1233.dcofh: 1234 membar #Sync 1235 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1236 retl 1237 clr %o0 1238 1239.big_copyout: 1240 ! 1241 ! We're going to go off and do a block copy. 1242 ! Switch fault handlers and grab a window. We 1243 ! don't do a membar #Sync since we've done only 1244 ! kernel data to this point. 1245 ! 1246 stn %o4, [THREAD_REG + T_LOFAULT] 1247 save %sp, -SA(MINFRAME), %sp 1248 1249 ! Copy out that reach here are larger than 256 bytes. The 1250 ! hw_copy_limit_1 is set to 256. Never set this limit less 1251 ! 128 bytes. 1252.do_block_copyout: 1253 1254 ! Swap src/dst since the code below is memcpy code 1255 ! and memcpy/bcopy have different calling sequences 1256 mov %i1, %i5 1257 mov %i0, %i1 1258 mov %i5, %i0 1259 1260 andcc %i0, 7, %i3 ! is dst double aligned 1261 bz %ncc, copyout_blkcpy 1262 sub %i3, 8, %i3 1263 neg %i3 ! bytes till double aligned 1264 sub %i2, %i3, %i2 ! update %i2 with new count 1265 1266 ! Align Destination on double-word boundary 1267 12681: ldub [%i1], %i4 1269 inc %i1 1270 stba %i4, [%i0]ASI_USER 1271 deccc %i3 1272 bgu %ncc, 1b 1273 inc %i0 1274 1275copyout_blkcpy: 1276 andcc %i0, 63, %i3 1277 bz,pn %ncc, copyout_blalign ! now block aligned 1278 sub %i3, 64, %i3 1279 neg %i3 ! bytes till block aligned 1280 sub %i2, %i3, %i2 ! update %i2 with new count 1281 1282 ! Copy %i3 bytes till dst is block (64 byte) aligned. use 1283 ! double word copies. 1284 1285 andcc %i1, 7, %g1 ! is src aligned on a 8 bytes 1286 bz %ncc, .co_dbcopy ! %g1 has source offset (last 3-bits) 1287 sll %g1, 3, %l1 ! left shift 1288 mov 0x40, %l2 1289 sub %l2, %l1, %l2 ! right shift = (64 - left shift) 1290 1291 ! Now use double word copies to align destination. 1292.co_double: 1293 sub %i1, %g1, %i1 ! align the src at 8 bytes. 1294 ldx [%i1], %o2 12952: 1296 ldx [%i1+8], %o4 1297 ALIGN_DATA_EW(%o2, %o4, %l1, %l2, %o3) 1298 stxa %o2, [%i0]ASI_USER 1299 mov %o4, %o2 1300 add %i1, 0x8, %i1 1301 subcc %i3, 0x8, %i3 1302 bgu,pt %ncc, 2b 1303 add %i0, 0x8, %i0 1304 ba copyout_blalign 1305 add %i1, %g1, %i1 1306 1307 ! Both source and destination are double aligned. 1308 ! No shift and merge of data required in this case. 1309.co_dbcopy: 1310 ldx [%i1], %o2 1311 stxa %o2, [%i0]ASI_USER 1312 add %i1, 0x8, %i1 1313 subcc %i3, 0x8, %i3 1314 bgu,pt %ncc, .co_dbcopy 1315 add %i0, 0x8, %i0 1316 1317copyout_blalign: 1318 andn %i2, 0x3f, %i3 ! %i3 count is multiple of block size 1319 sub %i2, %i3, %i2 ! Residue bytes in %i2 1320 1321 mov ASI_BLK_INIT_QUAD_LDD_AIUS, %asi 1322 1323 andcc %i1, 0xf, %o2 ! is src quadword aligned 1324 bz,pn %xcc, .co_blkcpy ! src offset in %o2 (last 4-bits) 1325 nop 1326 cmp %o2, 0x8 1327 bg .co_upper_double 1328 nop 1329 bl .co_lower_double 1330 nop 1331 1332 ! Falls through when source offset is equal to 8 i.e. 1333 ! source is double word aligned. 1334 ! In this case no shift/merge of data is required 1335 1336 sub %i1, %o2, %i1 ! align the src at 16 bytes. 1337 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 1338 prefetch [%l0+0x0], #one_read 1339 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 1340.co_loop0: 1341 add %i1, 0x10, %i1 1342 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 1343 prefetch [%l0+0x40], #one_read 1344 1345 stxa %l3, [%i0+0x0]%asi 1346 stxa %l4, [%i0+0x8]%asi 1347 1348 add %i1, 0x10, %i1 1349 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 1350 1351 stxa %l5, [%i0+0x10]%asi 1352 stxa %l2, [%i0+0x18]%asi 1353 1354 add %i1, 0x10, %i1 1355 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 1356 1357 stxa %l3, [%i0+0x20]%asi 1358 stxa %l4, [%i0+0x28]%asi 1359 1360 add %i1, 0x10, %i1 1361 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 1362 1363 stxa %l5, [%i0+0x30]%asi 1364 stxa %l2, [%i0+0x38]%asi 1365 1366 add %l0, 0x40, %l0 1367 subcc %i3, 0x40, %i3 1368 bgu,pt %xcc, .co_loop0 1369 add %i0, 0x40, %i0 1370 ba .co_blkdone 1371 add %i1, %o2, %i1 ! increment the source by src offset 1372 ! the src offset was stored in %o2 1373 1374.co_lower_double: 1375 1376 sub %i1, %o2, %i1 ! align the src at 16 bytes. 1377 sll %o2, 3, %o0 ! %o0 left shift 1378 mov 0x40, %o1 1379 sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 1380 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 1381 prefetch [%l0+0x0], #one_read 1382 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 ! partial data in %l2 and %l3 has 1383 ! complete data 1384.co_loop1: 1385 add %i1, 0x10, %i1 1386 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 ! %l4 has partial data 1387 ! for this read. 1388 ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) ! merge %l2, %l3 and %l4 1389 ! into %l2 and %l3 1390 prefetch [%l0+0x40], #one_read 1391 1392 stxa %l2, [%i0+0x0]%asi 1393 stxa %l3, [%i0+0x8]%asi 1394 1395 add %i1, 0x10, %i1 1396 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 1397 ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) ! merge %l2 with %l5 and 1398 ! %l4 from previous read 1399 ! into %l4 and %l5 1400 stxa %l4, [%i0+0x10]%asi 1401 stxa %l5, [%i0+0x18]%asi 1402 1403 ! Repeat the same for next 32 bytes. 1404 1405 add %i1, 0x10, %i1 1406 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 1407 ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) 1408 1409 stxa %l2, [%i0+0x20]%asi 1410 stxa %l3, [%i0+0x28]%asi 1411 1412 add %i1, 0x10, %i1 1413 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 1414 ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) 1415 1416 stxa %l4, [%i0+0x30]%asi 1417 stxa %l5, [%i0+0x38]%asi 1418 1419 add %l0, 0x40, %l0 1420 subcc %i3, 0x40, %i3 1421 bgu,pt %xcc, .co_loop1 1422 add %i0, 0x40, %i0 1423 ba .co_blkdone 1424 add %i1, %o2, %i1 ! increment the source by src offset 1425 ! the src offset was stored in %o2 1426 1427.co_upper_double: 1428 1429 sub %i1, %o2, %i1 ! align the src at 16 bytes. 1430 sub %o2, 0x8, %o0 1431 sll %o0, 3, %o0 ! %o0 left shift 1432 mov 0x40, %o1 1433 sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 1434 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 1435 prefetch [%l0+0x0], #one_read 1436 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 ! partial data in %l3 1437 ! for this read and 1438 ! no data in %l2 1439.co_loop2: 1440 add %i1, 0x10, %i1 1441 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 ! %l4 has complete data 1442 ! and %l5 has partial 1443 ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) ! merge %l3, %l4 and %l5 1444 ! into %l3 and %l4 1445 prefetch [%l0+0x40], #one_read 1446 1447 stxa %l3, [%i0+0x0]%asi 1448 stxa %l4, [%i0+0x8]%asi 1449 1450 add %i1, 0x10, %i1 1451 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 1452 ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) ! merge %l2 and %l3 with 1453 ! %l5 from previous read 1454 ! into %l5 and %l2 1455 1456 stxa %l5, [%i0+0x10]%asi 1457 stxa %l2, [%i0+0x18]%asi 1458 1459 ! Repeat the same for next 32 bytes. 1460 1461 add %i1, 0x10, %i1 1462 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 1463 ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) 1464 1465 stxa %l3, [%i0+0x20]%asi 1466 stxa %l4, [%i0+0x28]%asi 1467 1468 add %i1, 0x10, %i1 1469 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 1470 ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) 1471 1472 stxa %l5, [%i0+0x30]%asi 1473 stxa %l2, [%i0+0x38]%asi 1474 1475 add %l0, 0x40, %l0 1476 subcc %i3, 0x40, %i3 1477 bgu,pt %xcc, .co_loop2 1478 add %i0, 0x40, %i0 1479 ba .co_blkdone 1480 add %i1, %o2, %i1 ! increment the source by src offset 1481 ! the src offset was stored in %o2 1482 1483 1484 ! Do fast copy using ASI_BLK_INIT_ST_QUAD_LDD_P 1485.co_blkcpy: 1486 1487 andn %i1, 0x3f, %o0 ! %o0 has block aligned source 1488 prefetch [%o0+0x0], #one_read 14891: 1490 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l0 1491 add %i1, 0x10, %i1 1492 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l2 1493 add %i1, 0x10, %i1 1494 1495 prefetch [%o0+0x40], #one_read 1496 1497 stxa %l0, [%i0+0x0]%asi 1498 1499 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l4 1500 add %i1, 0x10, %i1 1501 ldda [%i1]ASI_BLK_INIT_ST_QUAD_LDD_P, %l6 1502 add %i1, 0x10, %i1 1503 1504 stxa %l1, [%i0+0x8]%asi 1505 stxa %l2, [%i0+0x10]%asi 1506 stxa %l3, [%i0+0x18]%asi 1507 stxa %l4, [%i0+0x20]%asi 1508 stxa %l5, [%i0+0x28]%asi 1509 stxa %l6, [%i0+0x30]%asi 1510 stxa %l7, [%i0+0x38]%asi 1511 1512 add %o0, 0x40, %o0 1513 subcc %i3, 0x40, %i3 1514 bgu,pt %xcc, 1b 1515 add %i0, 0x40, %i0 1516 1517.co_blkdone: 1518 membar #Sync 1519 1520 ! Copy as much rest of the data as double word copy. 1521.co_dwcp: 1522 cmp %i2, 0x8 ! Not enough bytes to copy as double 1523 blu %ncc, .co_dbdone 1524 nop 1525 1526 andn %i2, 0x7, %i3 ! %i3 count is multiple of 8 bytes size 1527 sub %i2, %i3, %i2 ! Residue bytes in %i2 1528 1529 andcc %i1, 7, %g1 ! is src aligned on a 8 bytes 1530 bz %ncc, .co_cpy_db 1531 nop 1532 1533 sll %g1, 3, %l0 ! left shift 1534 mov 0x40, %l1 1535 sub %l1, %l0, %l1 ! right shift = (64 - left shift) 1536 1537.co_cpy_wd: 1538 sub %i1, %g1, %i1 ! align the src at 8 bytes. 1539 ldx [%i1], %o2 15403: 1541 ldx [%i1+8], %o4 1542 ALIGN_DATA_EW(%o2, %o4, %l0, %l1, %o3) 1543 stxa %o2, [%i0]ASI_USER 1544 mov %o4, %o2 1545 add %i1, 0x8, %i1 1546 subcc %i3, 0x8, %i3 1547 bgu,pt %ncc, 3b 1548 add %i0, 0x8, %i0 1549 ba .co_dbdone 1550 add %i1, %g1, %i1 1551 1552.co_cpy_db: 1553 ldx [%i1], %o2 1554 stxa %o2, [%i0]ASI_USER 1555 add %i1, 0x8, %i1 1556 subcc %i3, 0x8, %i3 1557 bgu,pt %ncc, .co_cpy_db 1558 add %i0, 0x8, %i0 1559 1560.co_dbdone: 1561 tst %i2 1562 bz,pt %xcc, .copyout_exit 1563 nop 1564 1565 ! Copy the residue as byte copy 1566.co_residue: 1567 ldub [%i1], %i4 1568 stba %i4, [%i0]ASI_USER 1569 inc %i1 1570 deccc %i2 1571 bgu %xcc, .co_residue 1572 inc %i0 1573 1574.copyout_exit: 1575 membar #Sync 1576 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1577 ret 1578 restore %g0, 0, %o0 1579 1580.copyout_err: 1581 ldn [THREAD_REG + T_COPYOPS], %o4 1582 brz %o4, 2f 1583 nop 1584 ldn [%o4 + CP_COPYOUT], %g2 1585 jmp %g2 1586 nop 15872: 1588 retl 1589 mov -1, %o0 1590 SET_SIZE(copyout) 1591 1592#endif /* lint */ 1593 1594 1595#ifdef lint 1596 1597/*ARGSUSED*/ 1598int 1599xcopyout(const void *kaddr, void *uaddr, size_t count) 1600{ return (0); } 1601 1602#else /* lint */ 1603 1604 ENTRY(xcopyout) 1605 sethi %hi(.xcopyout_err), REAL_LOFAULT 1606 b .do_copyout 1607 or REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT 1608.xcopyout_err: 1609 ldn [THREAD_REG + T_COPYOPS], %o4 1610 brz %o4, 2f 1611 nop 1612 ldn [%o4 + CP_XCOPYOUT], %g2 1613 jmp %g2 1614 nop 16152: 1616 retl 1617 mov %g1, %o0 1618 SET_SIZE(xcopyout) 1619 1620#endif /* lint */ 1621 1622#ifdef lint 1623 1624/*ARGSUSED*/ 1625int 1626xcopyout_little(const void *kaddr, void *uaddr, size_t count) 1627{ return (0); } 1628 1629#else /* lint */ 1630 1631 ENTRY(xcopyout_little) 1632 sethi %hi(.little_err), %o4 1633 ldn [THREAD_REG + T_LOFAULT], %o5 1634 or %o4, %lo(.little_err), %o4 1635 membar #Sync ! sync error barrier 1636 stn %o4, [THREAD_REG + T_LOFAULT] 1637 1638 subcc %g0, %o2, %o3 1639 add %o0, %o2, %o0 1640 bz,pn %ncc, 2f ! check for zero bytes 1641 sub %o2, 1, %o4 1642 add %o0, %o4, %o0 ! start w/last byte 1643 add %o1, %o2, %o1 1644 ldub [%o0+%o3], %o4 1645 16461: stba %o4, [%o1+%o3]ASI_AIUSL 1647 inccc %o3 1648 sub %o0, 2, %o0 ! get next byte 1649 bcc,a,pt %ncc, 1b 1650 ldub [%o0+%o3], %o4 1651 16522: membar #Sync ! sync error barrier 1653 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1654 retl 1655 mov %g0, %o0 ! return (0) 1656 SET_SIZE(xcopyout_little) 1657 1658#endif /* lint */ 1659 1660/* 1661 * Copy user data to kernel space (copyin/xcopyin/xcopyin_little) 1662 */ 1663 1664#if defined(lint) 1665 1666/*ARGSUSED*/ 1667int 1668copyin(const void *uaddr, void *kaddr, size_t count) 1669{ return (0); } 1670 1671#else /* lint */ 1672 1673 ENTRY(copyin) 1674 sethi %hi(.copyin_err), REAL_LOFAULT 1675 or REAL_LOFAULT, %lo(.copyin_err), REAL_LOFAULT 1676 1677.do_copyin: 1678 ! 1679 ! Check the length and bail if zero. 1680 ! 1681 tst %o2 1682 bnz,pt %ncc, 1f 1683 nop 1684 retl 1685 clr %o0 16861: 1687 sethi %hi(copyio_fault), %o4 1688 or %o4, %lo(copyio_fault), %o4 1689 sethi %hi(copyio_fault_nowindow), %o3 1690 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT 1691 or %o3, %lo(copyio_fault_nowindow), %o3 1692 membar #Sync 1693 stn %o3, [THREAD_REG + T_LOFAULT] 1694 1695 mov %o0, SAVE_SRC 1696 mov %o1, SAVE_DST 1697 mov %o2, SAVE_COUNT 1698 1699 ! 1700 ! Check to see if we're more than SMALL_LIMIT. 1701 ! 1702 subcc %o2, SMALL_LIMIT, %o3 1703 bgu,a,pt %ncc, .dci_ns 1704 or %o0, %o1, %o3 1705 ! 1706 ! What was previously ".small_copyin" 1707 ! 1708.dcibcp: 1709 sub %g0, %o2, %o3 ! setup for copy loop 1710 add %o0, %o2, %o0 1711 add %o1, %o2, %o1 1712 ba,pt %ncc, .dcicl 1713 lduba [%o0 + %o3]ASI_USER, %o4 1714 ! 1715 ! %o0 and %o1 point at the end and remain pointing at the end 1716 ! of their buffers. We pull things out by adding %o3 (which is 1717 ! the negation of the length) to the buffer end which gives us 1718 ! the curent location in the buffers. By incrementing %o3 we walk 1719 ! through both buffers without having to bump each buffer's 1720 ! pointer. A very fast 4 instruction loop. 1721 ! 1722 .align 16 1723.dcicl: 1724 stb %o4, [%o1 + %o3] 1725 inccc %o3 1726 bl,a,pt %ncc, .dcicl 1727 lduba [%o0 + %o3]ASI_USER, %o4 1728 ! 1729 ! We're done. Go home. 1730 ! 1731 membar #Sync 1732 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 1733 retl 1734 clr %o0 1735 ! 1736 ! Try aligned copies from here. 1737 ! 1738.dci_ns: 1739 ! 1740 ! See if we're single byte aligned. If we are, check the 1741 ! limit for single byte copies. If we're smaller, or equal, 1742 ! bounce to the byte for byte copy loop. Otherwise do it in 1743 ! HW (if enabled). 1744 ! 1745 btst 1, %o3 1746 bz,a,pt %icc, .dcih8 1747 btst 7, %o3 1748 ! 1749 ! We're single byte aligned. 1750 ! 1751 sethi %hi(hw_copy_limit_1), %o3 1752 ld [%o3 + %lo(hw_copy_limit_1)], %o3 1753 ! 1754 ! Is HW copy on? If not do everything byte for byte. 1755 ! 1756 tst %o3 1757 bz,pn %icc, .dcibcp 1758 subcc %o3, %o2, %o3 1759 ! 1760 ! Are we bigger than the HW limit? If not 1761 ! go to byte for byte. 1762 ! 1763 bge,pt %ncc, .dcibcp 1764 nop 1765 ! 1766 ! We're big enough and copy is on. Do it with HW. 1767 ! 1768 ba,pt %ncc, .big_copyin 1769 nop 1770.dcih8: 1771 ! 1772 ! 8 byte aligned? 1773 ! 1774 bnz,a %ncc, .dcih4 1775 btst 3, %o3 1776 ! 1777 ! We're eight byte aligned. 1778 ! 1779 sethi %hi(hw_copy_limit_8), %o3 1780 ld [%o3 + %lo(hw_copy_limit_8)], %o3 1781 ! 1782 ! Is HW assist on? If not, do it with the aligned copy. 1783 ! 1784 tst %o3 1785 bz,pn %icc, .dcis8 1786 subcc %o3, %o2, %o3 1787 bge %ncc, .dcis8 1788 nop 1789 ba,pt %ncc, .big_copyin 1790 nop 1791.dcis8: 1792 ! 1793 ! Housekeeping for copy loops. Uses same idea as in the byte for 1794 ! byte copy loop above. 1795 ! 1796 add %o0, %o2, %o0 1797 add %o1, %o2, %o1 1798 sub %g0, %o2, %o3 1799 ba,pt %ncc, .didebc 1800 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy 1801 ! 1802 ! 4 byte aligned? 1803 ! 1804.dcih4: 1805 bnz %ncc, .dcih2 1806 sethi %hi(hw_copy_limit_4), %o3 1807 ld [%o3 + %lo(hw_copy_limit_4)], %o3 1808 ! 1809 ! Is HW assist on? If not, do it with the aligned copy. 1810 ! 1811 tst %o3 1812 bz,pn %icc, .dcis4 1813 subcc %o3, %o2, %o3 1814 ! 1815 ! We're negative if our size is less than or equal to hw_copy_limit_4. 1816 ! 1817 bge %ncc, .dcis4 1818 nop 1819 ba,pt %ncc, .big_copyin 1820 nop 1821.dcis4: 1822 ! 1823 ! Housekeeping for copy loops. Uses same idea as in the byte 1824 ! for byte copy loop above. 1825 ! 1826 add %o0, %o2, %o0 1827 add %o1, %o2, %o1 1828 sub %g0, %o2, %o3 1829 ba,pt %ncc, .didfbc 1830 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy 1831.dcih2: 1832 ! 1833 ! We're two byte aligned. Check for "smallness" 1834 ! done in delay at .dcih4 1835 ! 1836 bleu,pt %ncc, .dcis2 1837 sethi %hi(hw_copy_limit_2), %o3 1838 ld [%o3 + %lo(hw_copy_limit_2)], %o3 1839 ! 1840 ! Is HW assist on? If not, do it with the aligned copy. 1841 ! 1842 tst %o3 1843 bz,pn %icc, .dcis2 1844 subcc %o3, %o2, %o3 1845 ! 1846 ! Are we larger than the HW limit? 1847 ! 1848 bge %ncc, .dcis2 1849 nop 1850 ! 1851 ! HW assist is on and we're large enough to use it. 1852 ! 1853 ba,pt %ncc, .big_copyin 1854 nop 1855 ! 1856 ! Housekeeping for copy loops. Uses same idea as in the byte 1857 ! for byte copy loop above. 1858 ! 1859.dcis2: 1860 add %o0, %o2, %o0 1861 add %o1, %o2, %o1 1862 sub %g0, %o2, %o3 1863 ba,pt %ncc, .didtbc 1864 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy 1865 ! 1866.small_copyin: 1867 ! 1868 ! Why are we doing this AGAIN? There are certain conditions in 1869 ! big copyin that will cause us to forgo the HW assisted copys 1870 ! and bounce back to a non-hw assisted copy. This dispatches 1871 ! those copies. Note that we branch around this in the main line 1872 ! code. 1873 ! 1874 ! We make no check for limits or HW enablement here. We've 1875 ! already been told that we're a poster child so just go off 1876 ! and do it. 1877 ! 1878 or %o0, %o1, %o3 1879 btst 1, %o3 1880 bnz %icc, .dcibcp ! Most likely 1881 btst 7, %o3 1882 bz %icc, .dcis8 1883 btst 3, %o3 1884 bz %icc, .dcis4 1885 nop 1886 ba,pt %ncc, .dcis2 1887 nop 1888 ! 1889 ! Eight byte aligned copies. A steal from the original .small_copyin 1890 ! with modifications. %o2 is number of 8 byte chunks to copy. When 1891 ! done, we examine %o3. If this is < 0, we have 1 - 7 bytes more 1892 ! to copy. 1893 ! 1894 .align 32 1895.didebc: 1896 ldxa [%o0 + %o3]ASI_USER, %o4 1897 deccc %o2 1898 stx %o4, [%o1 + %o3] 1899 bg,pt %ncc, .didebc 1900 addcc %o3, 8, %o3 1901 ! 1902 ! End of copy loop. Most 8 byte aligned copies end here. 1903 ! 1904 bz,pt %ncc, .dcifh 1905 nop 1906 ! 1907 ! Something is left. Do it byte for byte. 1908 ! 1909 ba,pt %ncc, .dcicl 1910 lduba [%o0 + %o3]ASI_USER, %o4 1911 ! 1912 ! 4 byte copy loop. %o2 is number of 4 byte chunks to copy. 1913 ! 1914 .align 32 1915.didfbc: 1916 lduwa [%o0 + %o3]ASI_USER, %o4 1917 deccc %o2 1918 st %o4, [%o1 + %o3] 1919 bg,pt %ncc, .didfbc 1920 addcc %o3, 4, %o3 1921 ! 1922 ! End of copy loop. Most 4 byte aligned copies end here. 1923 ! 1924 bz,pt %ncc, .dcifh 1925 nop 1926 ! 1927 ! Something is left. Do it byte for byte. 1928 ! 1929 ba,pt %ncc, .dcicl 1930 lduba [%o0 + %o3]ASI_USER, %o4 1931 ! 1932 ! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to 1933 ! copy. 1934 ! 1935 .align 32 1936.didtbc: 1937 lduha [%o0 + %o3]ASI_USER, %o4 1938 deccc %o2 1939 sth %o4, [%o1 + %o3] 1940 bg,pt %ncc, .didtbc 1941 addcc %o3, 2, %o3 1942 ! 1943 ! End of copy loop. Most 2 byte aligned copies end here. 1944 ! 1945 bz,pt %ncc, .dcifh 1946 nop 1947 ! 1948 ! Deal with the last byte 1949 ! 1950 lduba [%o0 + %o3]ASI_USER, %o4 1951 stb %o4, [%o1 + %o3] 1952.dcifh: 1953 membar #Sync 1954 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1955 retl 1956 clr %o0 1957 1958.big_copyin: 1959 ! 1960 ! We're going off to do a block copy. 1961 ! Switch fault hendlers and grab a window. We 1962 ! don't do a membar #Sync since we've done only 1963 ! kernel data to this point. 1964 ! 1965 stn %o4, [THREAD_REG + T_LOFAULT] 1966 save %sp, -SA(MINFRAME), %sp 1967 1968 ! Copy in that reach here are larger than 256 bytes. The 1969 ! hw_copy_limit_1 is set to 256. Never set this limit less 1970 ! 128 bytes. 1971.do_blockcopyin: 1972 1973 ! Swap src/dst since the code below is memcpy code 1974 ! and memcpy/bcopy have different calling sequences 1975 mov %i1, %i5 1976 mov %i0, %i1 1977 mov %i5, %i0 1978 1979 andcc %i0, 7, %i3 ! is dst double aligned 1980 bz %ncc, copyin_blkcpy 1981 sub %i3, 8, %i3 1982 neg %i3 ! bytes till double aligned 1983 sub %i2, %i3, %i2 ! update %i2 with new count 1984 1985 ! Align Destination on double-word boundary 1986 19871: lduba [%i1]ASI_USER, %i4 1988 inc %i1 1989 stb %i4, [%i0] 1990 deccc %i3 1991 bgu %ncc, 1b 1992 inc %i0 1993 1994copyin_blkcpy: 1995 andcc %i0, 63, %i3 1996 bz,pn %ncc, copyin_blalign ! now block aligned 1997 sub %i3, 64, %i3 1998 neg %i3 ! bytes till block aligned 1999 sub %i2, %i3, %i2 ! update %i2 with new count 2000 2001 ! Copy %i3 bytes till dst is block (64 byte) aligned. use 2002 ! double word copies. 2003 2004 andcc %i1, 7, %g1 ! is src aligned on a 8 bytes 2005 bz %ncc, .ci_dbcopy ! %g1 has source offset (last 3-bits) 2006 sll %g1, 3, %l1 ! left shift 2007 mov 0x40, %l2 2008 sub %l2, %l1, %l2 ! right shift = (64 - left shift) 2009 2010 ! Now use double word copies to align destination. 2011.ci_double: 2012 sub %i1, %g1, %i1 ! align the src at 8 bytes. 2013 ldxa [%i1]ASI_USER, %o2 20142: 2015 add %i1, 0x8, %i1 2016 ldxa [%i1]ASI_USER, %o4 2017 ALIGN_DATA_EW(%o2, %o4, %l1, %l2, %o3) 2018 stx %o2, [%i0] 2019 mov %o4, %o2 2020 subcc %i3, 0x8, %i3 2021 bgu,pt %ncc, 2b 2022 add %i0, 0x8, %i0 2023 ba copyin_blalign 2024 add %i1, %g1, %i1 2025 2026 ! Both source and destination are double aligned. 2027 ! No shift and merge of data required in this case. 2028.ci_dbcopy: 2029 ldxa [%i1]ASI_USER, %o2 2030 stx %o2, [%i0] 2031 add %i1, 0x8, %i1 2032 subcc %i3, 0x8, %i3 2033 bgu,pt %ncc, .ci_dbcopy 2034 add %i0, 0x8, %i0 2035 2036copyin_blalign: 2037 andn %i2, 0x3f, %i3 ! %i3 count is multiple of block size 2038 sub %i2, %i3, %i2 ! Residue bytes in %i2 2039 2040 mov ASI_BLK_INIT_ST_QUAD_LDD_P, %asi 2041 2042 andcc %i1, 0xf, %o2 ! is src quadword aligned 2043 bz,pn %xcc, .ci_blkcpy ! src offset in %o2 (last 4-bits) 2044 nop 2045 cmp %o2, 0x8 2046 bg .ci_upper_double 2047 nop 2048 bl .ci_lower_double 2049 nop 2050 2051 ! Falls through when source offset is equal to 8 i.e. 2052 ! source is double word aligned. 2053 ! In this case no shift/merge of data is required 2054 2055 sub %i1, %o2, %i1 ! align the src at 16 bytes. 2056 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 2057 prefetch [%l0+0x0], #one_read 2058 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 2059.ci_loop0: 2060 add %i1, 0x10, %i1 2061 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 2062 2063 prefetch [%l0+0x40], #one_read 2064 2065 stxa %l3, [%i0+0x0]%asi 2066 stxa %l4, [%i0+0x8]%asi 2067 2068 add %i1, 0x10, %i1 2069 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 2070 2071 stxa %l5, [%i0+0x10]%asi 2072 stxa %l2, [%i0+0x18]%asi 2073 2074 add %i1, 0x10, %i1 2075 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 2076 2077 stxa %l3, [%i0+0x20]%asi 2078 stxa %l4, [%i0+0x28]%asi 2079 2080 add %i1, 0x10, %i1 2081 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 2082 2083 stxa %l5, [%i0+0x30]%asi 2084 stxa %l2, [%i0+0x38]%asi 2085 2086 add %l0, 0x40, %l0 2087 subcc %i3, 0x40, %i3 2088 bgu,pt %xcc, .ci_loop0 2089 add %i0, 0x40, %i0 2090 ba .ci_blkdone 2091 add %i1, %o2, %i1 ! increment the source by src offset 2092 ! the src offset was stored in %o2 2093 2094.ci_lower_double: 2095 2096 sub %i1, %o2, %i1 ! align the src at 16 bytes. 2097 sll %o2, 3, %o0 ! %o0 left shift 2098 mov 0x40, %o1 2099 sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 2100 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 2101 prefetch [%l0+0x0], #one_read 2102 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 ! partial data in %l2 2103 ! and %l3 has complete 2104 ! data 2105.ci_loop1: 2106 add %i1, 0x10, %i1 2107 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 ! %l4 has partial data 2108 ! for this read. 2109 ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) ! merge %l2, %l3 and %l4 2110 ! into %l2 and %l3 2111 2112 prefetch [%l0+0x40], #one_read 2113 2114 stxa %l2, [%i0+0x0]%asi 2115 stxa %l3, [%i0+0x8]%asi 2116 2117 add %i1, 0x10, %i1 2118 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 2119 ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) ! merge %l2 with %l5 and 2120 ! %l4 from previous read 2121 ! into %l4 and %l5 2122 stxa %l4, [%i0+0x10]%asi 2123 stxa %l5, [%i0+0x18]%asi 2124 2125 ! Repeat the same for next 32 bytes. 2126 2127 add %i1, 0x10, %i1 2128 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 2129 ALIGN_DATA(%l2, %l3, %l4, %o0, %o1, %l6) 2130 2131 stxa %l2, [%i0+0x20]%asi 2132 stxa %l3, [%i0+0x28]%asi 2133 2134 add %i1, 0x10, %i1 2135 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 2136 ALIGN_DATA(%l4, %l5, %l2, %o0, %o1, %l6) 2137 2138 stxa %l4, [%i0+0x30]%asi 2139 stxa %l5, [%i0+0x38]%asi 2140 2141 add %l0, 0x40, %l0 2142 subcc %i3, 0x40, %i3 2143 bgu,pt %xcc, .ci_loop1 2144 add %i0, 0x40, %i0 2145 ba .ci_blkdone 2146 add %i1, %o2, %i1 ! increment the source by src offset 2147 ! the src offset was stored in %o2 2148 2149.ci_upper_double: 2150 2151 sub %i1, %o2, %i1 ! align the src at 16 bytes. 2152 sub %o2, 0x8, %o0 2153 sll %o0, 3, %o0 ! %o0 left shift 2154 mov 0x40, %o1 2155 sub %o1, %o0, %o1 ! %o1 right shift = (64 - left shift) 2156 andn %i1, 0x3f, %l0 ! %l0 has block aligned source 2157 prefetch [%l0+0x0], #one_read 2158 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 ! partial data in %l3 2159 ! for this read and 2160 ! no data in %l2 2161.ci_loop2: 2162 add %i1, 0x10, %i1 2163 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 ! %l4 has complete data 2164 ! and %l5 has partial 2165 ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) ! merge %l3, %l4 and %l5 2166 ! into %l3 and %l4 2167 prefetch [%l0+0x40], #one_read 2168 2169 stxa %l3, [%i0+0x0]%asi 2170 stxa %l4, [%i0+0x8]%asi 2171 2172 add %i1, 0x10, %i1 2173 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 2174 ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) ! merge %l2 and %l3 with 2175 ! %l5 from previous read 2176 ! into %l5 and %l2 2177 2178 stxa %l5, [%i0+0x10]%asi 2179 stxa %l2, [%i0+0x18]%asi 2180 2181 ! Repeat the same for next 32 bytes. 2182 2183 add %i1, 0x10, %i1 2184 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 2185 ALIGN_DATA(%l3, %l4, %l5, %o0, %o1, %l6) 2186 2187 stxa %l3, [%i0+0x20]%asi 2188 stxa %l4, [%i0+0x28]%asi 2189 2190 add %i1, 0x10, %i1 2191 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 2192 ALIGN_DATA(%l5, %l2, %l3, %o0, %o1, %l6) 2193 2194 stxa %l5, [%i0+0x30]%asi 2195 stxa %l2, [%i0+0x38]%asi 2196 2197 add %l0, 0x40, %l0 2198 subcc %i3, 0x40, %i3 2199 bgu,pt %xcc, .ci_loop2 2200 add %i0, 0x40, %i0 2201 ba .ci_blkdone 2202 add %i1, %o2, %i1 ! increment the source by src offset 2203 ! the src offset was stored in %o2 2204 2205 2206 ! Do fast copy using ASI_BLK_INIT_ST_QUAD_LDD_P 2207.ci_blkcpy: 2208 2209 andn %i1, 0x3f, %o0 ! %o0 has block aligned source 2210 prefetch [%o0+0x0], #one_read 22111: 2212 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l0 2213 add %i1, 0x10, %i1 2214 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l2 2215 add %i1, 0x10, %i1 2216 2217 prefetch [%o0+0x40], #one_read 2218 2219 stxa %l0, [%i0+0x0]%asi 2220 2221 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l4 2222 add %i1, 0x10, %i1 2223 ldda [%i1]ASI_BLK_INIT_QUAD_LDD_AIUS, %l6 2224 add %i1, 0x10, %i1 2225 2226 stxa %l1, [%i0+0x8]%asi 2227 stxa %l2, [%i0+0x10]%asi 2228 stxa %l3, [%i0+0x18]%asi 2229 stxa %l4, [%i0+0x20]%asi 2230 stxa %l5, [%i0+0x28]%asi 2231 stxa %l6, [%i0+0x30]%asi 2232 stxa %l7, [%i0+0x38]%asi 2233 2234 add %o0, 0x40, %o0 2235 subcc %i3, 0x40, %i3 2236 bgu,pt %xcc, 1b 2237 add %i0, 0x40, %i0 2238 2239.ci_blkdone: 2240 membar #Sync 2241 2242 ! Copy as much rest of the data as double word copy. 2243.ci_dwcp: 2244 cmp %i2, 0x8 ! Not enough bytes to copy as double 2245 blu %ncc, .ci_dbdone 2246 nop 2247 2248 andn %i2, 0x7, %i3 ! %i3 count is multiple of 8 bytes size 2249 sub %i2, %i3, %i2 ! Residue bytes in %i2 2250 2251 andcc %i1, 7, %g1 ! is src aligned on a 8 bytes 2252 bz %ncc, .ci_cpy_db 2253 nop 2254 2255 sll %g1, 3, %l0 ! left shift 2256 mov 0x40, %l1 2257 sub %l1, %l0, %l1 ! right shift = (64 - left shift) 2258 2259.ci_cpy_dbwd: 2260 sub %i1, %g1, %i1 ! align the src at 8 bytes. 2261 ldxa [%i1]ASI_USER, %o2 22623: 2263 add %i1, 0x8, %i1 2264 ldxa [%i1]ASI_USER, %o4 2265 ALIGN_DATA_EW(%o2, %o4, %l0, %l1, %o3) 2266 stx %o2, [%i0] 2267 mov %o4, %o2 2268 subcc %i3, 0x8, %i3 2269 bgu,pt %ncc, 3b 2270 add %i0, 0x8, %i0 2271 ba .ci_dbdone 2272 add %i1, %g1, %i1 2273 2274.ci_cpy_db: 2275 ldxa [%i1]ASI_USER, %o2 2276 stx %o2, [%i0] 2277 add %i1, 0x8, %i1 2278 subcc %i3, 0x8, %i3 2279 bgu,pt %ncc, .ci_cpy_db 2280 add %i0, 0x8, %i0 2281 2282.ci_dbdone: 2283 tst %i2 2284 bz,pt %xcc, .copyin_exit 2285 nop 2286 2287 ! Copy the residue as byte copy 2288.ci_residue: 2289 lduba [%i1]ASI_USER, %i4 2290 stb %i4, [%i0] 2291 inc %i1 2292 deccc %i2 2293 bgu %xcc, .ci_residue 2294 inc %i0 2295 2296.copyin_exit: 2297 membar #Sync 2298 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2299 ret 2300 restore %g0, 0, %o0 2301.copyin_err: 2302 ldn [THREAD_REG + T_COPYOPS], %o4 2303 brz %o4, 2f 2304 nop 2305 ldn [%o4 + CP_COPYIN], %g2 2306 jmp %g2 2307 nop 23082: 2309 retl 2310 mov -1, %o0 2311 SET_SIZE(copyin) 2312 2313#endif /* lint */ 2314 2315#ifdef lint 2316 2317/*ARGSUSED*/ 2318int 2319xcopyin(const void *uaddr, void *kaddr, size_t count) 2320{ return (0); } 2321 2322#else /* lint */ 2323 2324 ENTRY(xcopyin) 2325 sethi %hi(.xcopyin_err), REAL_LOFAULT 2326 b .do_copyin 2327 or REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT 2328.xcopyin_err: 2329 ldn [THREAD_REG + T_COPYOPS], %o4 2330 brz %o4, 2f 2331 nop 2332 ldn [%o4 + CP_XCOPYIN], %g2 2333 jmp %g2 2334 nop 23352: 2336 retl 2337 mov %g1, %o0 2338 SET_SIZE(xcopyin) 2339 2340#endif /* lint */ 2341 2342#ifdef lint 2343 2344/*ARGSUSED*/ 2345int 2346xcopyin_little(const void *uaddr, void *kaddr, size_t count) 2347{ return (0); } 2348 2349#else /* lint */ 2350 2351 ENTRY(xcopyin_little) 2352 sethi %hi(.little_err), %o4 2353 ldn [THREAD_REG + T_LOFAULT], %o5 2354 or %o4, %lo(.little_err), %o4 2355 membar #Sync ! sync error barrier 2356 stn %o4, [THREAD_REG + T_LOFAULT] 2357 2358 subcc %g0, %o2, %o3 2359 add %o0, %o2, %o0 2360 bz,pn %ncc, 2f ! check for zero bytes 2361 sub %o2, 1, %o4 2362 add %o0, %o4, %o0 ! start w/last byte 2363 add %o1, %o2, %o1 2364 lduba [%o0+%o3]ASI_AIUSL, %o4 2365 23661: stb %o4, [%o1+%o3] 2367 inccc %o3 2368 sub %o0, 2, %o0 ! get next byte 2369 bcc,a,pt %ncc, 1b 2370 lduba [%o0+%o3]ASI_AIUSL, %o4 2371 23722: membar #Sync ! sync error barrier 2373 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2374 retl 2375 mov %g0, %o0 ! return (0) 2376 2377.little_err: 2378 membar #Sync ! sync error barrier 2379 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2380 retl 2381 mov %g1, %o0 2382 SET_SIZE(xcopyin_little) 2383 2384#endif /* lint */ 2385 2386 2387/* 2388 * Copy a block of storage - must not overlap (from + len <= to). 2389 * No fault handler installed (to be called under on_fault()) 2390 */ 2391#if defined(lint) 2392 2393/* ARGSUSED */ 2394void 2395copyin_noerr(const void *ufrom, void *kto, size_t count) 2396{} 2397 2398#else /* lint */ 2399 2400 ENTRY(copyin_noerr) 2401 sethi %hi(.copyio_noerr), REAL_LOFAULT 2402 b .do_copyin 2403 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 2404.copyio_noerr: 2405 jmp SAVED_LOFAULT 2406 nop 2407 SET_SIZE(copyin_noerr) 2408 2409#endif /* lint */ 2410 2411/* 2412 * Copy a block of storage - must not overlap (from + len <= to). 2413 * No fault handler installed (to be called under on_fault()) 2414 */ 2415 2416#if defined(lint) 2417 2418/* ARGSUSED */ 2419void 2420copyout_noerr(const void *kfrom, void *uto, size_t count) 2421{} 2422 2423#else /* lint */ 2424 2425 ENTRY(copyout_noerr) 2426 sethi %hi(.copyio_noerr), REAL_LOFAULT 2427 b .do_copyout 2428 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 2429 SET_SIZE(copyout_noerr) 2430 2431#endif /* lint */ 2432 2433#if defined(lint) 2434 2435int use_hw_bcopy = 1; 2436int use_hw_bzero = 1; 2437uint_t hw_copy_limit_1 = 0x100; 2438uint_t hw_copy_limit_2 = 0x200; 2439uint_t hw_copy_limit_4 = 0x400; 2440uint_t hw_copy_limit_8 = 0x400; 2441 2442#else /* !lint */ 2443 2444 .align 4 2445 DGDEF(use_hw_bcopy) 2446 .word 1 2447 DGDEF(use_hw_bzero) 2448 .word 1 2449 DGDEF(hw_copy_limit_1) 2450 .word 0x100 2451 DGDEF(hw_copy_limit_2) 2452 .word 0x200 2453 DGDEF(hw_copy_limit_4) 2454 .word 0x400 2455 DGDEF(hw_copy_limit_8) 2456 .word 0x400 2457 2458 .align 64 2459 .section ".text" 2460#endif /* !lint */ 2461 2462/* 2463 * hwblkclr - clears block-aligned, block-multiple-sized regions that are 2464 * longer than 256 bytes in length using Niagara's block stores/quad store. 2465 * If the criteria for using this routine are not met then it calls bzero 2466 * and returns 1. Otherwise 0 is returned indicating success. 2467 * Caller is responsible for ensuring use_hw_bzero is true and that 2468 * kpreempt_disable() has been called. 2469 */ 2470#ifdef lint 2471/*ARGSUSED*/ 2472int 2473hwblkclr(void *addr, size_t len) 2474{ 2475 return(0); 2476} 2477#else /* lint */ 2478 ! %i0 - start address 2479 ! %i1 - length of region (multiple of 64) 2480 2481 ENTRY(hwblkclr) 2482 save %sp, -SA(MINFRAME), %sp 2483 2484 ! Must be block-aligned 2485 andcc %i0, 0x3f, %g0 2486 bnz,pn %ncc, 1f 2487 nop 2488 2489 ! ... and must be 256 bytes or more 2490 cmp %i1, 0x100 2491 blu,pn %ncc, 1f 2492 nop 2493 2494 ! ... and length must be a multiple of 64 2495 andcc %i1, 0x3f, %g0 2496 bz,pn %ncc, .pz_doblock 2497 mov ASI_BLK_INIT_ST_QUAD_LDD_P, %asi 2498 24991: ! punt, call bzero but notify the caller that bzero was used 2500 mov %i0, %o0 2501 call bzero 2502 mov %i1, %o1 2503 ret 2504 restore %g0, 1, %o0 ! return (1) - did not use block operations 2505 2506 ! Already verified that there are at least 256 bytes to set 2507.pz_doblock: 2508 stxa %g0, [%i0+0x0]%asi 2509 stxa %g0, [%i0+0x40]%asi 2510 stxa %g0, [%i0+0x80]%asi 2511 stxa %g0, [%i0+0xc0]%asi 2512 2513 stxa %g0, [%i0+0x8]%asi 2514 stxa %g0, [%i0+0x10]%asi 2515 stxa %g0, [%i0+0x18]%asi 2516 stxa %g0, [%i0+0x20]%asi 2517 stxa %g0, [%i0+0x28]%asi 2518 stxa %g0, [%i0+0x30]%asi 2519 stxa %g0, [%i0+0x38]%asi 2520 2521 stxa %g0, [%i0+0x48]%asi 2522 stxa %g0, [%i0+0x50]%asi 2523 stxa %g0, [%i0+0x58]%asi 2524 stxa %g0, [%i0+0x60]%asi 2525 stxa %g0, [%i0+0x68]%asi 2526 stxa %g0, [%i0+0x70]%asi 2527 stxa %g0, [%i0+0x78]%asi 2528 2529 stxa %g0, [%i0+0x88]%asi 2530 stxa %g0, [%i0+0x90]%asi 2531 stxa %g0, [%i0+0x98]%asi 2532 stxa %g0, [%i0+0xa0]%asi 2533 stxa %g0, [%i0+0xa8]%asi 2534 stxa %g0, [%i0+0xb0]%asi 2535 stxa %g0, [%i0+0xb8]%asi 2536 2537 stxa %g0, [%i0+0xc8]%asi 2538 stxa %g0, [%i0+0xd0]%asi 2539 stxa %g0, [%i0+0xd8]%asi 2540 stxa %g0, [%i0+0xe0]%asi 2541 stxa %g0, [%i0+0xe8]%asi 2542 stxa %g0, [%i0+0xf0]%asi 2543 stxa %g0, [%i0+0xf8]%asi 2544 2545 sub %i1, 0x100, %i1 2546 cmp %i1, 0x100 2547 bgu,pt %ncc, .pz_doblock 2548 add %i0, 0x100, %i0 2549 25502: 2551 ! Check if more than 64 bytes to set 2552 cmp %i1,0x40 2553 blu %ncc, .pz_finish 2554 nop 2555 25563: 2557 stxa %g0, [%i0+0x0]%asi 2558 stxa %g0, [%i0+0x8]%asi 2559 stxa %g0, [%i0+0x10]%asi 2560 stxa %g0, [%i0+0x18]%asi 2561 stxa %g0, [%i0+0x20]%asi 2562 stxa %g0, [%i0+0x28]%asi 2563 stxa %g0, [%i0+0x30]%asi 2564 stxa %g0, [%i0+0x38]%asi 2565 2566 subcc %i1, 0x40, %i1 2567 bgu,pt %ncc, 3b 2568 add %i0, 0x40, %i0 2569 2570.pz_finish: 2571 membar #Sync 2572 ret 2573 restore %g0, 0, %o0 ! return (bzero or not) 2574 SET_SIZE(hwblkclr) 2575#endif /* lint */ 2576 2577#ifdef lint 2578/* Copy 32 bytes of data from src to dst using physical addresses */ 2579/*ARGSUSED*/ 2580void 2581hw_pa_bcopy32(uint64_t src, uint64_t dst) 2582{} 2583#else /*!lint */ 2584 2585 /* 2586 * Copy 32 bytes of data from src (%o0) to dst (%o1) 2587 * using physical addresses. 2588 */ 2589 ENTRY_NP(hw_pa_bcopy32) 2590 rdpr %pstate, %g1 2591 andn %g1, PSTATE_IE, %g2 2592 wrpr %g0, %g2, %pstate 2593 2594 ldxa [%o0]ASI_MEM, %o2 2595 add %o0, 8, %o0 2596 ldxa [%o0]ASI_MEM, %o3 2597 add %o0, 8, %o0 2598 ldxa [%o0]ASI_MEM, %o4 2599 add %o0, 8, %o0 2600 ldxa [%o0]ASI_MEM, %o5 2601 stxa %o2, [%o1]ASI_MEM 2602 add %o1, 8, %o1 2603 stxa %o3, [%o1]ASI_MEM 2604 add %o1, 8, %o1 2605 stxa %o4, [%o1]ASI_MEM 2606 add %o1, 8, %o1 2607 stxa %o5, [%o1]ASI_MEM 2608 2609 membar #Sync 2610 retl 2611 wrpr %g0, %g1, %pstate 2612 SET_SIZE(hw_pa_bcopy32) 2613#endif /* lint */ 2614 2615/* 2616 * Zero a block of storage. 2617 * 2618 * uzero is used by the kernel to zero a block in user address space. 2619 */ 2620 2621/* 2622 * Control flow of the bzero/kzero/uzero routine. 2623 * 2624 * For fewer than 7 bytes stores, bytes will be zeroed. 2625 * 2626 * For less than 15 bytes stores, align the address on 4 byte boundary. 2627 * Then store as many 4-byte chunks, followed by trailing bytes. 2628 * 2629 * For sizes greater than 15 bytes, align the address on 8 byte boundary. 2630 * if (count > 128) { 2631 * store as many 8-bytes chunks to block align the address 2632 * store using ASI_BLK_INIT_ST_QUAD_LDD_P (bzero/kzero) OR 2633 * store using ASI_BLK_INIT_QUAD_LDD_AIUS (uzero) 2634 * } 2635 * Store as many 8-byte chunks, followed by trailing bytes. 2636 */ 2637 2638#if defined(lint) 2639 2640/* ARGSUSED */ 2641int 2642kzero(void *addr, size_t count) 2643{ return(0); } 2644 2645/* ARGSUSED */ 2646void 2647uzero(void *addr, size_t count) 2648{} 2649 2650#else /* lint */ 2651 2652 ENTRY(uzero) 2653 ! 2654 ! Set a new lo_fault handler only if we came in with one 2655 ! already specified. 2656 ! 2657 wr %g0, ASI_USER, %asi 2658 ldn [THREAD_REG + T_LOFAULT], %o5 2659 tst %o5 2660 bz,pt %ncc, .do_zero 2661 sethi %hi(.zeroerr), %o2 2662 or %o2, %lo(.zeroerr), %o2 2663 membar #Sync 2664 ba,pt %ncc, .do_zero 2665 stn %o2, [THREAD_REG + T_LOFAULT] 2666 2667 ENTRY(kzero) 2668 ! 2669 ! Always set a lo_fault handler 2670 ! 2671 wr %g0, ASI_P, %asi 2672 ldn [THREAD_REG + T_LOFAULT], %o5 2673 sethi %hi(.zeroerr), %o2 2674 or %o5, LOFAULT_SET, %o5 2675 or %o2, %lo(.zeroerr), %o2 2676 membar #Sync 2677 ba,pt %ncc, .do_zero 2678 stn %o2, [THREAD_REG + T_LOFAULT] 2679 2680/* 2681 * We got here because of a fault during kzero or if 2682 * uzero or bzero was called with t_lofault non-zero. 2683 * Otherwise we've already run screaming from the room. 2684 * Errno value is in %g1. Note that we're here iff 2685 * we did set t_lofault. 2686 */ 2687.zeroerr: 2688 ! 2689 ! Undo asi register setting. Just set it to be the 2690 ! kernel default without checking. 2691 ! 2692 wr %g0, ASI_P, %asi 2693 2694 ! 2695 ! We did set t_lofault. It may well have been zero coming in. 2696 ! 26971: 2698 tst %o5 2699 membar #Sync 2700 bne,pn %ncc, 3f 2701 andncc %o5, LOFAULT_SET, %o5 27022: 2703 ! 2704 ! Old handler was zero. Just return the error. 2705 ! 2706 retl ! return 2707 mov %g1, %o0 ! error code from %g1 27083: 2709 ! 2710 ! We're here because %o5 was non-zero. It was non-zero 2711 ! because either LOFAULT_SET was present, a previous fault 2712 ! handler was present or both. In all cases we need to reset 2713 ! T_LOFAULT to the value of %o5 after clearing LOFAULT_SET 2714 ! before we either simply return the error or we invoke the 2715 ! previously specified handler. 2716 ! 2717 be %ncc, 2b 2718 stn %o5, [THREAD_REG + T_LOFAULT] 2719 jmp %o5 ! goto real handler 2720 nop 2721 SET_SIZE(kzero) 2722 SET_SIZE(uzero) 2723 2724#endif /* lint */ 2725 2726/* 2727 * Zero a block of storage. 2728 */ 2729 2730#if defined(lint) 2731 2732/* ARGSUSED */ 2733void 2734bzero(void *addr, size_t count) 2735{} 2736 2737#else /* lint */ 2738 2739 ENTRY(bzero) 2740 wr %g0, ASI_P, %asi 2741 2742 ldn [THREAD_REG + T_LOFAULT], %o5 ! save old vector 2743 tst %o5 2744 bz,pt %ncc, .do_zero 2745 sethi %hi(.zeroerr), %o2 2746 or %o2, %lo(.zeroerr), %o2 2747 membar #Sync ! sync error barrier 2748 stn %o2, [THREAD_REG + T_LOFAULT] ! install new vector 2749 2750.do_zero: 2751 cmp %o1, 7 2752 blu,pn %ncc, .byteclr 2753 nop 2754 2755 cmp %o1, 15 2756 blu,pn %ncc, .wdalign 2757 nop 2758 2759 andcc %o0, 7, %o3 ! is add aligned on a 8 byte bound 2760 bz,pt %ncc, .blkalign ! already double aligned 2761 sub %o3, 8, %o3 ! -(bytes till double aligned) 2762 add %o1, %o3, %o1 ! update o1 with new count 2763 27641: 2765 stba %g0, [%o0]%asi 2766 inccc %o3 2767 bl,pt %ncc, 1b 2768 inc %o0 2769 2770 ! Now address is double aligned 2771.blkalign: 2772 cmp %o1, 0x80 ! check if there are 128 bytes to set 2773 blu,pn %ncc, .bzero_small 2774 mov %o1, %o3 2775 2776 sethi %hi(use_hw_bzero), %o2 2777 ld [%o2 + %lo(use_hw_bzero)], %o2 2778 tst %o2 2779 bz %ncc, .bzero_small 2780 mov %o1, %o3 2781 2782 rd %asi, %o3 2783 wr %g0, ASI_BLK_INIT_ST_QUAD_LDD_P, %asi 2784 cmp %o3, ASI_P 2785 bne,a %ncc, .algnblk 2786 wr %g0, ASI_BLK_INIT_QUAD_LDD_AIUS, %asi 2787 2788.algnblk: 2789 andcc %o0, 0x3f, %o3 ! is block aligned? 2790 bz,pt %ncc, .bzero_blk 2791 sub %o3, 0x40, %o3 ! -(bytes till block aligned) 2792 add %o1, %o3, %o1 ! o1 is the remainder 2793 2794 ! Clear -(%o3) bytes till block aligned 27951: 2796 stxa %g0, [%o0]%asi 2797 addcc %o3, 8, %o3 2798 bl,pt %ncc, 1b 2799 add %o0, 8, %o0 2800 2801.bzero_blk: 2802 and %o1, 0x3f, %o3 ! calc bytes left after blk clear 2803 andn %o1, 0x3f, %o4 ! calc size of blocks in bytes 2804 2805 cmp %o4, 0x100 ! 256 bytes or more 2806 blu,pn %ncc, 3f 2807 nop 2808 28092: 2810 stxa %g0, [%o0+0x0]%asi 2811 stxa %g0, [%o0+0x40]%asi 2812 stxa %g0, [%o0+0x80]%asi 2813 stxa %g0, [%o0+0xc0]%asi 2814 2815 stxa %g0, [%o0+0x8]%asi 2816 stxa %g0, [%o0+0x10]%asi 2817 stxa %g0, [%o0+0x18]%asi 2818 stxa %g0, [%o0+0x20]%asi 2819 stxa %g0, [%o0+0x28]%asi 2820 stxa %g0, [%o0+0x30]%asi 2821 stxa %g0, [%o0+0x38]%asi 2822 2823 stxa %g0, [%o0+0x48]%asi 2824 stxa %g0, [%o0+0x50]%asi 2825 stxa %g0, [%o0+0x58]%asi 2826 stxa %g0, [%o0+0x60]%asi 2827 stxa %g0, [%o0+0x68]%asi 2828 stxa %g0, [%o0+0x70]%asi 2829 stxa %g0, [%o0+0x78]%asi 2830 2831 stxa %g0, [%o0+0x88]%asi 2832 stxa %g0, [%o0+0x90]%asi 2833 stxa %g0, [%o0+0x98]%asi 2834 stxa %g0, [%o0+0xa0]%asi 2835 stxa %g0, [%o0+0xa8]%asi 2836 stxa %g0, [%o0+0xb0]%asi 2837 stxa %g0, [%o0+0xb8]%asi 2838 2839 stxa %g0, [%o0+0xc8]%asi 2840 stxa %g0, [%o0+0xd0]%asi 2841 stxa %g0, [%o0+0xd8]%asi 2842 stxa %g0, [%o0+0xe0]%asi 2843 stxa %g0, [%o0+0xe8]%asi 2844 stxa %g0, [%o0+0xf0]%asi 2845 stxa %g0, [%o0+0xf8]%asi 2846 2847 sub %o4, 0x100, %o4 2848 cmp %o4, 0x100 2849 bgu,pt %ncc, 2b 2850 add %o0, 0x100, %o0 2851 28523: 2853 ! ... check if 64 bytes to set 2854 cmp %o4, 0x40 2855 blu %ncc, .bzero_blk_done 2856 nop 2857 28584: 2859 stxa %g0, [%o0+0x0]%asi 2860 stxa %g0, [%o0+0x8]%asi 2861 stxa %g0, [%o0+0x10]%asi 2862 stxa %g0, [%o0+0x18]%asi 2863 stxa %g0, [%o0+0x20]%asi 2864 stxa %g0, [%o0+0x28]%asi 2865 stxa %g0, [%o0+0x30]%asi 2866 stxa %g0, [%o0+0x38]%asi 2867 2868 subcc %o4, 0x40, %o4 2869 bgu,pt %ncc, 3b 2870 add %o0, 0x40, %o0 2871 2872.bzero_blk_done: 2873 membar #Sync 2874 ! 2875 ! Undo asi register setting. 2876 ! 2877 rd %asi, %o4 2878 wr %g0, ASI_P, %asi 2879 cmp %o4, ASI_BLK_INIT_ST_QUAD_LDD_P 2880 bne,a %ncc, .bzero_small 2881 wr %g0, ASI_USER, %asi 2882 2883.bzero_small: 2884 ! Set the remaining doubles 2885 subcc %o3, 8, %o3 ! Can we store any doubles? 2886 blu,pn %ncc, .byteclr 2887 and %o1, 7, %o1 ! calc bytes left after doubles 2888 2889.dbclr: 2890 stxa %g0, [%o0]%asi ! Clear the doubles 2891 subcc %o3, 8, %o3 2892 bgeu,pt %ncc, .dbclr 2893 add %o0, 8, %o0 2894 2895 ba .byteclr 2896 nop 2897 2898.wdalign: 2899 andcc %o0, 3, %o3 ! is add aligned on a word boundary 2900 bz,pn %ncc, .wdclr 2901 andn %o1, 3, %o3 ! create word sized count in %o3 2902 2903 dec %o1 ! decrement count 2904 stba %g0, [%o0]%asi ! clear a byte 2905 ba .wdalign 2906 inc %o0 ! next byte 2907 2908.wdclr: 2909 sta %g0, [%o0]%asi ! 4-byte clearing loop 2910 subcc %o3, 4, %o3 2911 bnz,pt %ncc, .wdclr 2912 inc 4, %o0 2913 2914 and %o1, 3, %o1 ! leftover count, if any 2915 2916.byteclr: 2917 ! Set the leftover bytes 2918 brz %o1, .bzero_exit 2919 nop 2920 29217: 2922 deccc %o1 ! byte clearing loop 2923 stba %g0, [%o0]%asi 2924 bgu,pt %ncc, 7b 2925 inc %o0 2926 2927.bzero_exit: 2928 ! 2929 ! We're just concerned with whether t_lofault was set 2930 ! when we came in. We end up here from either kzero() 2931 ! or bzero(). kzero() *always* sets a lofault handler. 2932 ! It ors LOFAULT_SET into %o5 to indicate it has done 2933 ! this even if the value of %o5 is otherwise zero. 2934 ! bzero() sets a lofault handler *only* if one was 2935 ! previously set. Accordingly we need to examine 2936 ! %o5 and if it is non-zero be sure to clear LOFAULT_SET 2937 ! before resetting the error handler. 2938 ! 2939 tst %o5 2940 bz %ncc, 1f 2941 andn %o5, LOFAULT_SET, %o5 2942 membar #Sync ! sync error barrier 2943 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 29441: 2945 retl 2946 clr %o0 ! return (0) 2947 2948 SET_SIZE(bzero) 2949#endif /* lint */ 2950