1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22/* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27#pragma ident "%Z%%M% %I% %E% SMI" 28 29#include <sys/asm_linkage.h> 30#include <sys/regset.h> 31#include <sys/privregs.h> 32 33#if defined(__lint) 34#include <sys/types.h> 35#include <sys/archsystm.h> 36#else 37#include "assym.h" 38#endif 39 40/* 41 * Do block operations using Streaming SIMD extensions 42 */ 43 44#if defined(DEBUG) 45#if defined(__amd64) 46#define ASSERT_KPREEMPT_DISABLED(t, r32, msg) \ 47 movq %gs:CPU_THREAD, t; \ 48 movsbl T_PREEMPT(t), r32; \ 49 testl r32, r32; \ 50 jne 5f; \ 51 pushq %rbp; \ 52 movq %rsp, %rbp; \ 53 leaq msg(%rip), %rdi; \ 54 xorl %eax, %eax; \ 55 call panic; \ 565: 57#elif defined(__i386) 58#define ASSERT_KPREEMPT_DISABLED(t, r32, msg) \ 59 movl %gs:CPU_THREAD, t; \ 60 movsbl T_PREEMPT(t), r32; \ 61 testl r32, r32; \ 62 jne 5f; \ 63 pushl %ebp; \ 64 movl %esp, %ebp; \ 65 pushl $msg; \ 66 call panic; \ 675: 68#endif /* __i386 */ 69#else /* DEBUG */ 70#define ASSERT_KPREEMPT_DISABLED(t, r32, msg) 71#endif /* DEBUG */ 72 73#define BLOCKSHIFT 6 74#define BLOCKSIZE 64 /* (1 << BLOCKSHIFT) */ 75#define BLOCKMASK 63 /* (BLOCKSIZE - 1) */ 76 77#if (1 << BLOCKSHIFT) != BLOCKSIZE || BLOCKMASK != (BLOCKSIZE - 1) 78#error "mucked up constants" 79#endif 80 81#if defined(__lint) 82 83/*ARGSUSED*/ 84void 85hwblkclr(void *addr, size_t size) 86{} 87 88#else /* __lint */ 89 90#if defined(__amd64) 91#define ADD addq 92#define SUB subq 93#else 94#define ADD addl 95#define SUB subl 96#endif 97 98#define SAVE_XMM0(r) \ 99 SAVE_XMM_PROLOG(r, 1); \ 100 movdqa %xmm0, (r) 101 102#define ZERO_LOOP_INIT_XMM(dst) \ 103 pxor %xmm0, %xmm0 104 105#define ZERO_LOOP_BODY_XMM(dst, cnt) \ 106 movntdq %xmm0, (dst); \ 107 movntdq %xmm0, 0x10(dst); \ 108 movntdq %xmm0, 0x20(dst); \ 109 movntdq %xmm0, 0x30(dst); \ 110 ADD $BLOCKSIZE, dst; \ 111 SUB $1, cnt 112 113#define ZERO_LOOP_FINI_XMM(dst) \ 114 mfence 115 116#define RSTOR_XMM0(r) \ 117 movdqa 0x0(r), %xmm0; \ 118 RSTOR_XMM_EPILOG(r, 1) 119 120#if defined(__amd64) 121 122 /* 123 * %rdi dst 124 * %rsi size 125 * %rax saved %cr0 (#if DEBUG then %eax is t->t_preempt) 126 * %r8 pointer to %xmm register save area 127 */ 128 ENTRY(hwblkclr) 129 pushq %rbp 130 movq %rsp, %rbp 131 testl $BLOCKMASK, %edi /* address must be BLOCKSIZE aligned */ 132 jne .dobzero 133 cmpq $BLOCKSIZE, %rsi /* size must be at least BLOCKSIZE */ 134 jl .dobzero 135 testq $BLOCKMASK, %rsi /* .. and be a multiple of BLOCKSIZE */ 136 jne .dobzero 137 shrq $BLOCKSHIFT, %rsi 138 139 ASSERT_KPREEMPT_DISABLED(%r11, %eax, .not_disabled) 140 movq %cr0, %rax 141 clts 142 testl $CR0_TS, %eax 143 jnz 1f 144 145 SAVE_XMM0(%r8) 1461: ZERO_LOOP_INIT_XMM(%rdi) 1479: ZERO_LOOP_BODY_XMM(%rdi, %rsi) 148 jnz 9b 149 ZERO_LOOP_FINI_XMM(%rdi) 150 151 testl $CR0_TS, %eax 152 jnz 2f 153 RSTOR_XMM0(%r8) 1542: movq %rax, %cr0 155 leave 156 ret 157.dobzero: 158 leave 159 jmp bzero 160 SET_SIZE(hwblkclr) 161 162#elif defined(__i386) 163 164 /* 165 * %eax dst 166 * %ecx size in bytes, loop count 167 * %ebx saved %cr0 (#if DEBUG then t->t_preempt) 168 * %edi pointer to %xmm register save area 169 */ 170 ENTRY(hwblkclr) 171 movl 4(%esp), %eax 172 movl 8(%esp), %ecx 173 testl $BLOCKMASK, %eax /* address must be BLOCKSIZE aligned */ 174 jne .dobzero 175 cmpl $BLOCKSIZE, %ecx /* size must be at least BLOCKSIZE */ 176 jl .dobzero 177 testl $BLOCKMASK, %ecx /* .. and be a multiple of BLOCKSIZE */ 178 jne .dobzero 179 shrl $BLOCKSHIFT, %ecx 180 movl 0xc(%esp), %edx 181 pushl %ebx 182 183 pushl %esi 184 ASSERT_KPREEMPT_DISABLED(%esi, %ebx, .not_disabled) 185 popl %esi 186 movl %cr0, %ebx 187 clts 188 testl $CR0_TS, %ebx 189 jnz 1f 190 191 pushl %edi 192 SAVE_XMM0(%edi) 1931: ZERO_LOOP_INIT_XMM(%eax) 1949: ZERO_LOOP_BODY_XMM(%eax, %ecx) 195 jnz 9b 196 ZERO_LOOP_FINI_XMM(%eax) 197 198 testl $CR0_TS, %ebx 199 jnz 2f 200 RSTOR_XMM0(%edi) 201 popl %edi 2022: movl %ebx, %cr0 203 popl %ebx 204 ret 205.dobzero: 206 jmp bzero 207 SET_SIZE(hwblkclr) 208 209#endif /* __i386 */ 210#endif /* __lint */ 211 212 213#if defined(__lint) 214 215/*ARGSUSED*/ 216void 217hwblkpagecopy(const void *src, void *dst) 218{} 219 220#else /* __lint */ 221 222#define PREFETCH_START(src) \ 223 prefetchnta 0x0(src); \ 224 prefetchnta 0x40(src) 225 226#define SAVE_XMMS(r) \ 227 SAVE_XMM_PROLOG(r, 8); \ 228 movdqa %xmm0, (r); \ 229 movdqa %xmm1, 0x10(r); \ 230 movdqa %xmm2, 0x20(r); \ 231 movdqa %xmm3, 0x30(r); \ 232 movdqa %xmm4, 0x40(r); \ 233 movdqa %xmm5, 0x50(r); \ 234 movdqa %xmm6, 0x60(r); \ 235 movdqa %xmm7, 0x70(r) 236 237#define COPY_LOOP_INIT_XMM(src) \ 238 prefetchnta 0x80(src); \ 239 prefetchnta 0xc0(src); \ 240 movdqa 0x0(src), %xmm0; \ 241 movdqa 0x10(src), %xmm1; \ 242 movdqa 0x20(src), %xmm2; \ 243 movdqa 0x30(src), %xmm3; \ 244 movdqa 0x40(src), %xmm4; \ 245 movdqa 0x50(src), %xmm5; \ 246 movdqa 0x60(src), %xmm6; \ 247 movdqa 0x70(src), %xmm7; \ 248 ADD $0x80, src 249 250#define COPY_LOOP_BODY_XMM(src, dst, cnt) \ 251 prefetchnta 0x80(src); \ 252 prefetchnta 0xc0(src); \ 253 prefetchnta 0x100(src); \ 254 prefetchnta 0x140(src); \ 255 movntdq %xmm0, (dst); \ 256 movntdq %xmm1, 0x10(dst); \ 257 movntdq %xmm2, 0x20(dst); \ 258 movntdq %xmm3, 0x30(dst); \ 259 movdqa 0x0(src), %xmm0; \ 260 movdqa 0x10(src), %xmm1; \ 261 movntdq %xmm4, 0x40(dst); \ 262 movntdq %xmm5, 0x50(dst); \ 263 movdqa 0x20(src), %xmm2; \ 264 movdqa 0x30(src), %xmm3; \ 265 movntdq %xmm6, 0x60(dst); \ 266 movntdq %xmm7, 0x70(dst); \ 267 movdqa 0x40(src), %xmm4; \ 268 movdqa 0x50(src), %xmm5; \ 269 ADD $0x80, dst; \ 270 movdqa 0x60(src), %xmm6; \ 271 movdqa 0x70(src), %xmm7; \ 272 ADD $0x80, src; \ 273 subl $1, cnt 274 275#define COPY_LOOP_FINI_XMM(dst) \ 276 movntdq %xmm0, 0x0(dst); \ 277 movntdq %xmm1, 0x10(dst); \ 278 movntdq %xmm2, 0x20(dst); \ 279 movntdq %xmm3, 0x30(dst); \ 280 movntdq %xmm4, 0x40(dst); \ 281 movntdq %xmm5, 0x50(dst); \ 282 movntdq %xmm6, 0x60(dst); \ 283 movntdq %xmm7, 0x70(dst) 284 285#define RSTOR_XMMS(r) \ 286 movdqa 0x0(r), %xmm0; \ 287 movdqa 0x10(r), %xmm1; \ 288 movdqa 0x20(r), %xmm2; \ 289 movdqa 0x30(r), %xmm3; \ 290 movdqa 0x40(r), %xmm4; \ 291 movdqa 0x50(r), %xmm5; \ 292 movdqa 0x60(r), %xmm6; \ 293 movdqa 0x70(r), %xmm7; \ 294 RSTOR_XMM_EPILOG(r, 8) 295 296#if defined(__amd64) 297 298 /* 299 * %rdi src 300 * %rsi dst 301 * %rdx #if DEBUG then curthread 302 * %ecx loop count 303 * %rax saved %cr0 (#if DEBUG then %eax is t->t_prempt) 304 * %r8 pointer to %xmm register save area 305 */ 306 ENTRY(hwblkpagecopy) 307 pushq %rbp 308 movq %rsp, %rbp 309 PREFETCH_START(%rdi) 310 /* 311 * PAGESIZE is 4096, each loop moves 128 bytes, but the initial 312 * load and final store save us on loop count 313 */ 314 movl $_CONST(32 - 1), %ecx 315 ASSERT_KPREEMPT_DISABLED(%rdx, %eax, .not_disabled) 316 movq %cr0, %rax 317 clts 318 testl $CR0_TS, %eax 319 jnz 3f 320 SAVE_XMMS(%r8) 3213: COPY_LOOP_INIT_XMM(%rdi) 3224: COPY_LOOP_BODY_XMM(%rdi, %rsi, %ecx) 323 jnz 4b 324 COPY_LOOP_FINI_XMM(%rsi) 325 testl $CR0_TS, %eax 326 jnz 5f 327 RSTOR_XMMS(%r8) 3285: movq %rax, %cr0 329 mfence 330 leave 331 ret 332 SET_SIZE(hwblkpagecopy) 333 334#elif defined(__i386) 335 336 /* 337 * %eax src 338 * %edx dst 339 * %ecx loop count 340 * %ebx saved %cr0 (#if DEBUG then t->t_prempt) 341 * %edi pointer to %xmm register save area 342 * %esi #if DEBUG temporary thread pointer 343 */ 344 ENTRY(hwblkpagecopy) 345 movl 4(%esp), %eax 346 movl 8(%esp), %edx 347 PREFETCH_START(%eax) 348 pushl %ebx 349 /* 350 * PAGESIZE is 4096, each loop moves 128 bytes, but the initial 351 * load and final store save us one loop count 352 */ 353 movl $_CONST(32 - 1), %ecx 354 pushl %esi 355 ASSERT_KPREEMPT_DISABLED(%esi, %ebx, .not_disabled) 356 popl %esi 357 movl %cr0, %ebx 358 clts 359 testl $CR0_TS, %ebx 360 jnz 3f 361 pushl %edi 362 SAVE_XMMS(%edi) 3633: COPY_LOOP_INIT_XMM(%eax) 3644: COPY_LOOP_BODY_XMM(%eax, %edx, %ecx) 365 jnz 4b 366 COPY_LOOP_FINI_XMM(%edx) 367 testl $CR0_TS, %ebx 368 jnz 5f 369 RSTOR_XMMS(%edi) 370 popl %edi 3715: movl %ebx, %cr0 372 popl %ebx 373 mfence 374 ret 375 SET_SIZE(hwblkpagecopy) 376 377#endif /* __i386 */ 378#endif /* __lint */ 379 380 381#if defined(__lint) 382 383/*ARGSUSED*/ 384void 385hat_pte_zero(void *dst, size_t len) 386{} 387 388#else 389 390#if defined(__amd64) 391 392 ENTRY(hat_pte_zero) 393 xorl %eax, %eax 3941: 395 movnti %rax, (%rdi) 396 addq $8, %rdi 397 subq $8, %rsi 398 jnz 1b 399 mfence 400 ret 401 SET_SIZE(hat_pte_zero) 402 403#elif defined(__i386) 404 405 ENTRY(hat_pte_zero) 406 xorl %eax, %eax 407 movl 4(%esp), %edx 408 movl 8(%esp), %ecx 4091: 410 movnti %eax, (%edx) 411 addl $4, %edx 412 subl $4, %ecx 413 jnz 1b 414 mfence 415 ret 416 SET_SIZE(hat_pte_zero) 417 418#endif /* __i386 */ 419 420#endif /* __lint */ 421 422#if defined(DEBUG) && !defined(__lint) 423 .text 424.not_disabled: 425 .string "sseblk: preemption not disabled!" 426#endif 427