/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright 2019 Joyent, Inc. */ #include #include #include #include "assym.h" /* * Do block operations using Streaming SIMD extensions */ #if defined(DEBUG) #define ASSERT_KPREEMPT_DISABLED(t, r32, msg) \ movq %gs:CPU_THREAD, t; \ movsbl T_PREEMPT(t), r32; \ testl r32, r32; \ jne 5f; \ pushq %rbp; \ movq %rsp, %rbp; \ leaq msg(%rip), %rdi; \ xorl %eax, %eax; \ call panic; \ 5: #else /* DEBUG */ #define ASSERT_KPREEMPT_DISABLED(t, r32, msg) #endif /* DEBUG */ #define BLOCKSHIFT 6 #define BLOCKSIZE 64 /* (1 << BLOCKSHIFT) */ #define BLOCKMASK 63 /* (BLOCKSIZE - 1) */ #if (1 << BLOCKSHIFT) != BLOCKSIZE || BLOCKMASK != (BLOCKSIZE - 1) #error "mucked up constants" #endif #define SAVE_XMM0(r) \ SAVE_XMM_PROLOG(r, 1); \ movdqa %xmm0, (r) #define ZERO_LOOP_INIT_XMM(dst) \ pxor %xmm0, %xmm0 #define ZERO_LOOP_BODY_XMM(dst, cnt) \ movntdq %xmm0, (dst); \ movntdq %xmm0, 0x10(dst); \ movntdq %xmm0, 0x20(dst); \ movntdq %xmm0, 0x30(dst); \ addq $BLOCKSIZE, dst; \ subq $1, cnt #define ZERO_LOOP_FINI_XMM(dst) \ mfence #define RSTOR_XMM0(r) \ movdqa 0x0(r), %xmm0; \ RSTOR_XMM_EPILOG(r, 1) /* * %rdi dst * %rsi size * %rax saved %cr0 (#if DEBUG then %eax is t->t_preempt) * %r8 pointer to %xmm register save area */ ENTRY(hwblkclr) pushq %rbp movq %rsp, %rbp testl $BLOCKMASK, %edi /* address must be BLOCKSIZE aligned */ jne .dobzero cmpq $BLOCKSIZE, %rsi /* size must be at least BLOCKSIZE */ jl .dobzero testq $BLOCKMASK, %rsi /* .. and be a multiple of BLOCKSIZE */ jne .dobzero shrq $BLOCKSHIFT, %rsi ASSERT_KPREEMPT_DISABLED(%r11, %eax, .not_disabled) movq %cr0, %rax clts testl $CR0_TS, %eax jnz 1f SAVE_XMM0(%r8) 1: ZERO_LOOP_INIT_XMM(%rdi) 9: ZERO_LOOP_BODY_XMM(%rdi, %rsi) jnz 9b ZERO_LOOP_FINI_XMM(%rdi) testl $CR0_TS, %eax jnz 2f RSTOR_XMM0(%r8) 2: movq %rax, %cr0 leave ret .dobzero: leave jmp bzero SET_SIZE(hwblkclr) #define PREFETCH_START(src) \ prefetchnta 0x0(src); \ prefetchnta 0x40(src) #define SAVE_XMMS(r) \ SAVE_XMM_PROLOG(r, 8); \ movdqa %xmm0, (r); \ movdqa %xmm1, 0x10(r); \ movdqa %xmm2, 0x20(r); \ movdqa %xmm3, 0x30(r); \ movdqa %xmm4, 0x40(r); \ movdqa %xmm5, 0x50(r); \ movdqa %xmm6, 0x60(r); \ movdqa %xmm7, 0x70(r) #define COPY_LOOP_INIT_XMM(src) \ prefetchnta 0x80(src); \ prefetchnta 0xc0(src); \ movdqa 0x0(src), %xmm0; \ movdqa 0x10(src), %xmm1; \ movdqa 0x20(src), %xmm2; \ movdqa 0x30(src), %xmm3; \ movdqa 0x40(src), %xmm4; \ movdqa 0x50(src), %xmm5; \ movdqa 0x60(src), %xmm6; \ movdqa 0x70(src), %xmm7; \ addq $0x80, src #define COPY_LOOP_BODY_XMM(src, dst, cnt) \ prefetchnta 0x80(src); \ prefetchnta 0xc0(src); \ prefetchnta 0x100(src); \ prefetchnta 0x140(src); \ movntdq %xmm0, (dst); \ movntdq %xmm1, 0x10(dst); \ movntdq %xmm2, 0x20(dst); \ movntdq %xmm3, 0x30(dst); \ movdqa 0x0(src), %xmm0; \ movdqa 0x10(src), %xmm1; \ movntdq %xmm4, 0x40(dst); \ movntdq %xmm5, 0x50(dst); \ movdqa 0x20(src), %xmm2; \ movdqa 0x30(src), %xmm3; \ movntdq %xmm6, 0x60(dst); \ movntdq %xmm7, 0x70(dst); \ movdqa 0x40(src), %xmm4; \ movdqa 0x50(src), %xmm5; \ addq $0x80, dst; \ movdqa 0x60(src), %xmm6; \ movdqa 0x70(src), %xmm7; \ addq $0x80, src; \ subl $1, cnt #define COPY_LOOP_FINI_XMM(dst) \ movntdq %xmm0, 0x0(dst); \ movntdq %xmm1, 0x10(dst); \ movntdq %xmm2, 0x20(dst); \ movntdq %xmm3, 0x30(dst); \ movntdq %xmm4, 0x40(dst); \ movntdq %xmm5, 0x50(dst); \ movntdq %xmm6, 0x60(dst); \ movntdq %xmm7, 0x70(dst) #define RSTOR_XMMS(r) \ movdqa 0x0(r), %xmm0; \ movdqa 0x10(r), %xmm1; \ movdqa 0x20(r), %xmm2; \ movdqa 0x30(r), %xmm3; \ movdqa 0x40(r), %xmm4; \ movdqa 0x50(r), %xmm5; \ movdqa 0x60(r), %xmm6; \ movdqa 0x70(r), %xmm7; \ RSTOR_XMM_EPILOG(r, 8) /* * %rdi src * %rsi dst * %rdx #if DEBUG then curthread * %ecx loop count * %rax saved %cr0 (#if DEBUG then %eax is t->t_prempt) * %r8 pointer to %xmm register save area */ ENTRY(hwblkpagecopy) pushq %rbp movq %rsp, %rbp PREFETCH_START(%rdi) /* * PAGESIZE is 4096, each loop moves 128 bytes, but the initial * load and final store save us on loop count */ movl $_CONST(32 - 1), %ecx ASSERT_KPREEMPT_DISABLED(%rdx, %eax, .not_disabled) movq %cr0, %rax clts testl $CR0_TS, %eax jnz 3f SAVE_XMMS(%r8) 3: COPY_LOOP_INIT_XMM(%rdi) 4: COPY_LOOP_BODY_XMM(%rdi, %rsi, %ecx) jnz 4b COPY_LOOP_FINI_XMM(%rsi) testl $CR0_TS, %eax jnz 5f RSTOR_XMMS(%r8) 5: movq %rax, %cr0 mfence leave ret SET_SIZE(hwblkpagecopy) ENTRY(block_zero_no_xmm) pushq %rbp movq %rsp, %rbp xorl %eax, %eax addq %rsi, %rdi negq %rsi 1: movnti %rax, (%rdi, %rsi) movnti %rax, 8(%rdi, %rsi) movnti %rax, 16(%rdi, %rsi) movnti %rax, 24(%rdi, %rsi) addq $32, %rsi jnz 1b mfence leave ret SET_SIZE(block_zero_no_xmm) ENTRY(page_copy_no_xmm) movq $MMU_STD_PAGESIZE, %rcx addq %rcx, %rdi addq %rcx, %rsi negq %rcx 1: movq (%rsi, %rcx), %rax movnti %rax, (%rdi, %rcx) movq 8(%rsi, %rcx), %rax movnti %rax, 8(%rdi, %rcx) movq 16(%rsi, %rcx), %rax movnti %rax, 16(%rdi, %rcx) movq 24(%rsi, %rcx), %rax movnti %rax, 24(%rdi, %rcx) addq $32, %rcx jnz 1b mfence ret SET_SIZE(page_copy_no_xmm) #if defined(DEBUG) .text .not_disabled: .string "sseblk: preemption not disabled!" #endif