1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26/* 27 * Copyright 2019 Joyent, Inc. 28 */ 29 30#include <sys/asm_linkage.h> 31#include <sys/regset.h> 32#include <sys/privregs.h> 33 34#include "assym.h" 35 36/* 37 * Do block operations using Streaming SIMD extensions 38 */ 39 40#if defined(DEBUG) 41#define ASSERT_KPREEMPT_DISABLED(t, r32, msg) \ 42 movq %gs:CPU_THREAD, t; \ 43 movsbl T_PREEMPT(t), r32; \ 44 testl r32, r32; \ 45 jne 5f; \ 46 pushq %rbp; \ 47 movq %rsp, %rbp; \ 48 leaq msg(%rip), %rdi; \ 49 xorl %eax, %eax; \ 50 call panic; \ 515: 52#else /* DEBUG */ 53#define ASSERT_KPREEMPT_DISABLED(t, r32, msg) 54#endif /* DEBUG */ 55 56#define BLOCKSHIFT 6 57#define BLOCKSIZE 64 /* (1 << BLOCKSHIFT) */ 58#define BLOCKMASK 63 /* (BLOCKSIZE - 1) */ 59 60#if (1 << BLOCKSHIFT) != BLOCKSIZE || BLOCKMASK != (BLOCKSIZE - 1) 61#error "mucked up constants" 62#endif 63 64#define SAVE_XMM0(r) \ 65 SAVE_XMM_PROLOG(r, 1); \ 66 movdqa %xmm0, (r) 67 68#define ZERO_LOOP_INIT_XMM(dst) \ 69 pxor %xmm0, %xmm0 70 71#define ZERO_LOOP_BODY_XMM(dst, cnt) \ 72 movntdq %xmm0, (dst); \ 73 movntdq %xmm0, 0x10(dst); \ 74 movntdq %xmm0, 0x20(dst); \ 75 movntdq %xmm0, 0x30(dst); \ 76 addq $BLOCKSIZE, dst; \ 77 subq $1, cnt 78 79#define ZERO_LOOP_FINI_XMM(dst) \ 80 mfence 81 82#define RSTOR_XMM0(r) \ 83 movdqa 0x0(r), %xmm0; \ 84 RSTOR_XMM_EPILOG(r, 1) 85 86 /* 87 * %rdi dst 88 * %rsi size 89 * %rax saved %cr0 (#if DEBUG then %eax is t->t_preempt) 90 * %r8 pointer to %xmm register save area 91 */ 92 ENTRY(hwblkclr) 93 pushq %rbp 94 movq %rsp, %rbp 95 testl $BLOCKMASK, %edi /* address must be BLOCKSIZE aligned */ 96 jne .dobzero 97 cmpq $BLOCKSIZE, %rsi /* size must be at least BLOCKSIZE */ 98 jl .dobzero 99 testq $BLOCKMASK, %rsi /* .. and be a multiple of BLOCKSIZE */ 100 jne .dobzero 101 shrq $BLOCKSHIFT, %rsi 102 103 ASSERT_KPREEMPT_DISABLED(%r11, %eax, .not_disabled) 104 movq %cr0, %rax 105 clts 106 testl $CR0_TS, %eax 107 jnz 1f 108 109 SAVE_XMM0(%r8) 1101: ZERO_LOOP_INIT_XMM(%rdi) 1119: ZERO_LOOP_BODY_XMM(%rdi, %rsi) 112 jnz 9b 113 ZERO_LOOP_FINI_XMM(%rdi) 114 115 testl $CR0_TS, %eax 116 jnz 2f 117 RSTOR_XMM0(%r8) 1182: movq %rax, %cr0 119 leave 120 ret 121.dobzero: 122 leave 123 jmp bzero 124 SET_SIZE(hwblkclr) 125 126 127#define PREFETCH_START(src) \ 128 prefetchnta 0x0(src); \ 129 prefetchnta 0x40(src) 130 131#define SAVE_XMMS(r) \ 132 SAVE_XMM_PROLOG(r, 8); \ 133 movdqa %xmm0, (r); \ 134 movdqa %xmm1, 0x10(r); \ 135 movdqa %xmm2, 0x20(r); \ 136 movdqa %xmm3, 0x30(r); \ 137 movdqa %xmm4, 0x40(r); \ 138 movdqa %xmm5, 0x50(r); \ 139 movdqa %xmm6, 0x60(r); \ 140 movdqa %xmm7, 0x70(r) 141 142#define COPY_LOOP_INIT_XMM(src) \ 143 prefetchnta 0x80(src); \ 144 prefetchnta 0xc0(src); \ 145 movdqa 0x0(src), %xmm0; \ 146 movdqa 0x10(src), %xmm1; \ 147 movdqa 0x20(src), %xmm2; \ 148 movdqa 0x30(src), %xmm3; \ 149 movdqa 0x40(src), %xmm4; \ 150 movdqa 0x50(src), %xmm5; \ 151 movdqa 0x60(src), %xmm6; \ 152 movdqa 0x70(src), %xmm7; \ 153 addq $0x80, src 154 155#define COPY_LOOP_BODY_XMM(src, dst, cnt) \ 156 prefetchnta 0x80(src); \ 157 prefetchnta 0xc0(src); \ 158 prefetchnta 0x100(src); \ 159 prefetchnta 0x140(src); \ 160 movntdq %xmm0, (dst); \ 161 movntdq %xmm1, 0x10(dst); \ 162 movntdq %xmm2, 0x20(dst); \ 163 movntdq %xmm3, 0x30(dst); \ 164 movdqa 0x0(src), %xmm0; \ 165 movdqa 0x10(src), %xmm1; \ 166 movntdq %xmm4, 0x40(dst); \ 167 movntdq %xmm5, 0x50(dst); \ 168 movdqa 0x20(src), %xmm2; \ 169 movdqa 0x30(src), %xmm3; \ 170 movntdq %xmm6, 0x60(dst); \ 171 movntdq %xmm7, 0x70(dst); \ 172 movdqa 0x40(src), %xmm4; \ 173 movdqa 0x50(src), %xmm5; \ 174 addq $0x80, dst; \ 175 movdqa 0x60(src), %xmm6; \ 176 movdqa 0x70(src), %xmm7; \ 177 addq $0x80, src; \ 178 subl $1, cnt 179 180#define COPY_LOOP_FINI_XMM(dst) \ 181 movntdq %xmm0, 0x0(dst); \ 182 movntdq %xmm1, 0x10(dst); \ 183 movntdq %xmm2, 0x20(dst); \ 184 movntdq %xmm3, 0x30(dst); \ 185 movntdq %xmm4, 0x40(dst); \ 186 movntdq %xmm5, 0x50(dst); \ 187 movntdq %xmm6, 0x60(dst); \ 188 movntdq %xmm7, 0x70(dst) 189 190#define RSTOR_XMMS(r) \ 191 movdqa 0x0(r), %xmm0; \ 192 movdqa 0x10(r), %xmm1; \ 193 movdqa 0x20(r), %xmm2; \ 194 movdqa 0x30(r), %xmm3; \ 195 movdqa 0x40(r), %xmm4; \ 196 movdqa 0x50(r), %xmm5; \ 197 movdqa 0x60(r), %xmm6; \ 198 movdqa 0x70(r), %xmm7; \ 199 RSTOR_XMM_EPILOG(r, 8) 200 201 /* 202 * %rdi src 203 * %rsi dst 204 * %rdx #if DEBUG then curthread 205 * %ecx loop count 206 * %rax saved %cr0 (#if DEBUG then %eax is t->t_prempt) 207 * %r8 pointer to %xmm register save area 208 */ 209 ENTRY(hwblkpagecopy) 210 pushq %rbp 211 movq %rsp, %rbp 212 PREFETCH_START(%rdi) 213 /* 214 * PAGESIZE is 4096, each loop moves 128 bytes, but the initial 215 * load and final store save us on loop count 216 */ 217 movl $_CONST(32 - 1), %ecx 218 ASSERT_KPREEMPT_DISABLED(%rdx, %eax, .not_disabled) 219 movq %cr0, %rax 220 clts 221 testl $CR0_TS, %eax 222 jnz 3f 223 SAVE_XMMS(%r8) 2243: COPY_LOOP_INIT_XMM(%rdi) 2254: COPY_LOOP_BODY_XMM(%rdi, %rsi, %ecx) 226 jnz 4b 227 COPY_LOOP_FINI_XMM(%rsi) 228 testl $CR0_TS, %eax 229 jnz 5f 230 RSTOR_XMMS(%r8) 2315: movq %rax, %cr0 232 mfence 233 leave 234 ret 235 SET_SIZE(hwblkpagecopy) 236 237 ENTRY(block_zero_no_xmm) 238 pushq %rbp 239 movq %rsp, %rbp 240 xorl %eax, %eax 241 addq %rsi, %rdi 242 negq %rsi 2431: 244 movnti %rax, (%rdi, %rsi) 245 movnti %rax, 8(%rdi, %rsi) 246 movnti %rax, 16(%rdi, %rsi) 247 movnti %rax, 24(%rdi, %rsi) 248 addq $32, %rsi 249 jnz 1b 250 mfence 251 leave 252 ret 253 SET_SIZE(block_zero_no_xmm) 254 255 256 ENTRY(page_copy_no_xmm) 257 movq $MMU_STD_PAGESIZE, %rcx 258 addq %rcx, %rdi 259 addq %rcx, %rsi 260 negq %rcx 2611: 262 movq (%rsi, %rcx), %rax 263 movnti %rax, (%rdi, %rcx) 264 movq 8(%rsi, %rcx), %rax 265 movnti %rax, 8(%rdi, %rcx) 266 movq 16(%rsi, %rcx), %rax 267 movnti %rax, 16(%rdi, %rcx) 268 movq 24(%rsi, %rcx), %rax 269 movnti %rax, 24(%rdi, %rcx) 270 addq $32, %rcx 271 jnz 1b 272 mfence 273 ret 274 SET_SIZE(page_copy_no_xmm) 275 276#if defined(DEBUG) 277 .text 278.not_disabled: 279 .string "sseblk: preemption not disabled!" 280#endif 281