1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2018, Joyent, Inc. 25 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. 26 */ 27 28/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ 29/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ 30/* All Rights Reserved */ 31 32/* Copyright (c) 1987, 1988 Microsoft Corporation */ 33/* All Rights Reserved */ 34 35/* 36 * Copyright (c) 2009, Intel Corporation. 37 * All rights reserved. 38 */ 39 40#include <sys/asm_linkage.h> 41#include <sys/asm_misc.h> 42#include <sys/regset.h> 43#include <sys/privregs.h> 44#include <sys/x86_archext.h> 45 46#include "assym.h" 47 48 /* 49 * Returns zero if x87 "chip" is present(!) 50 */ 51 ENTRY_NP(fpu_initial_probe) 52 CLTS 53 fninit 54 fnstsw %ax 55 movzbl %al, %eax 56 ret 57 SET_SIZE(fpu_initial_probe) 58 59 ENTRY_NP(fxsave_insn) 60 fxsaveq (%rdi) 61 ret 62 SET_SIZE(fxsave_insn) 63 64/* 65 * One of these routines is called from any lwp with floating 66 * point context as part of the prolog of a context switch. 67 */ 68 69/* 70 * These three functions define the Intel "xsave" handling for CPUs with 71 * different features. Newer AMD CPUs can also use these functions. See the 72 * 'exception pointers' comment below. 73 */ 74 ENTRY_NP(fpxsave_ctxt) /* %rdi is a struct fpu_ctx */ 75 cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) 76 jne 1f 77 movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) 78 movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */ 79 fxsaveq (%rdi) 80 STTS(%rsi) /* trap on next fpu touch */ 811: rep; ret /* use 2 byte return instruction when branch target */ 82 /* AMD Software Optimization Guide - Section 6.2 */ 83 SET_SIZE(fpxsave_ctxt) 84 85 ENTRY_NP(xsave_ctxt) 86 cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) 87 jne 1f 88 movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) 89 movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */ 90 movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx 91 movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */ 92 xsave (%rsi) 93 STTS(%rsi) /* trap on next fpu touch */ 941: ret 95 SET_SIZE(xsave_ctxt) 96 97 ENTRY_NP(xsaveopt_ctxt) 98 cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) 99 jne 1f 100 movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) 101 movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */ 102 movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx 103 movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */ 104 xsaveopt (%rsi) 105 STTS(%rsi) /* trap on next fpu touch */ 1061: ret 107 SET_SIZE(xsaveopt_ctxt) 108 109/* 110 * On certain AMD processors, the "exception pointers" (i.e. the last 111 * instruction pointer, last data pointer, and last opcode) are saved by the 112 * fxsave, xsave or xsaveopt instruction ONLY if the exception summary bit is 113 * set. 114 * 115 * On newer CPUs, AMD has changed their behavior to mirror the Intel behavior. 116 * We can detect this via an AMD specific cpuid feature bit 117 * (CPUID_AMD_EBX_ERR_PTR_ZERO) and use the simpler Intel-oriented functions. 118 * Otherwise we use these more complex functions on AMD CPUs. All three follow 119 * the same logic after the xsave* instruction. 120 */ 121 ENTRY_NP(fpxsave_excp_clr_ctxt) /* %rdi is a struct fpu_ctx */ 122 cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) 123 jne 1f 124 movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) 125 movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */ 126 fxsaveq (%rdi) 127 /* 128 * To ensure that we don't leak these values into the next context 129 * on the cpu, we could just issue an fninit here, but that's 130 * rather slow and so we issue an instruction sequence that 131 * clears them more quickly, if a little obscurely. 132 */ 133 btw $7, FXSAVE_STATE_FSW(%rdi) /* Test saved ES bit */ 134 jnc 0f /* jump if ES = 0 */ 135 fnclex /* clear pending x87 exceptions */ 1360: ffree %st(7) /* clear tag bit to remove possible stack overflow */ 137 fildl .fpzero_const(%rip) 138 /* dummy load changes all exception pointers */ 139 STTS(%rsi) /* trap on next fpu touch */ 1401: rep; ret /* use 2 byte return instruction when branch target */ 141 /* AMD Software Optimization Guide - Section 6.2 */ 142 SET_SIZE(fpxsave_excp_clr_ctxt) 143 144 ENTRY_NP(xsave_excp_clr_ctxt) 145 cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) 146 jne 1f 147 movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) 148 movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax 149 movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx 150 movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */ 151 xsave (%rsi) 152 btw $7, FXSAVE_STATE_FSW(%rsi) /* Test saved ES bit */ 153 jnc 0f /* jump if ES = 0 */ 154 fnclex /* clear pending x87 exceptions */ 1550: ffree %st(7) /* clear tag bit to remove possible stack overflow */ 156 fildl .fpzero_const(%rip) /* dummy load changes all excp. pointers */ 157 STTS(%rsi) /* trap on next fpu touch */ 1581: ret 159 SET_SIZE(xsave_excp_clr_ctxt) 160 161 ENTRY_NP(xsaveopt_excp_clr_ctxt) 162 cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) 163 jne 1f 164 movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) 165 movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax 166 movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx 167 movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */ 168 xsaveopt (%rsi) 169 btw $7, FXSAVE_STATE_FSW(%rsi) /* Test saved ES bit */ 170 jnc 0f /* jump if ES = 0 */ 171 fnclex /* clear pending x87 exceptions */ 1720: ffree %st(7) /* clear tag bit to remove possible stack overflow */ 173 fildl .fpzero_const(%rip) /* dummy load changes all excp. pointers */ 174 STTS(%rsi) /* trap on next fpu touch */ 1751: ret 176 SET_SIZE(xsaveopt_excp_clr_ctxt) 177 178 .align 8 179.fpzero_const: 180 .4byte 0x0 181 .4byte 0x0 182 183 184 ENTRY_NP(fpxsave) 185 CLTS 186 fxsaveq (%rdi) 187 fninit /* clear exceptions, init x87 tags */ 188 STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ 189 ret 190 SET_SIZE(fpxsave) 191 192 ENTRY_NP(xsave) 193 CLTS 194 movl %esi, %eax /* bv mask */ 195 movq %rsi, %rdx 196 shrq $32, %rdx 197 xsave (%rdi) 198 199 fninit /* clear exceptions, init x87 tags */ 200 STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ 201 ret 202 SET_SIZE(xsave) 203 204 ENTRY_NP(xsaveopt) 205 CLTS 206 movl %esi, %eax /* bv mask */ 207 movq %rsi, %rdx 208 shrq $32, %rdx 209 xsaveopt (%rdi) 210 211 fninit /* clear exceptions, init x87 tags */ 212 STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ 213 ret 214 SET_SIZE(xsaveopt) 215 216/* 217 * These functions are used when restoring the FPU as part of the epilogue of a 218 * context switch. 219 */ 220 221 ENTRY(fpxrestore_ctxt) 222 cmpl $_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi) 223 jne 1f 224 movl $_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) 225 movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */ 226 CLTS 227 fxrstorq (%rdi) 2281: 229 ret 230 SET_SIZE(fpxrestore_ctxt) 231 232 ENTRY(xrestore_ctxt) 233 cmpl $_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi) 234 jne 1f 235 movl $_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) 236 movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */ 237 movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx 238 movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_xs ptr */ 239 CLTS 240 xrstor (%rdi) 2411: 242 ret 243 SET_SIZE(xrestore_ctxt) 244 245 246 ENTRY_NP(fpxrestore) 247 CLTS 248 fxrstorq (%rdi) 249 ret 250 SET_SIZE(fpxrestore) 251 252 ENTRY_NP(xrestore) 253 CLTS 254 movl %esi, %eax /* bv mask */ 255 movq %rsi, %rdx 256 shrq $32, %rdx 257 xrstor (%rdi) 258 ret 259 SET_SIZE(xrestore) 260 261/* 262 * Disable the floating point unit. 263 */ 264 265 ENTRY_NP(fpdisable) 266 STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ 267 ret 268 SET_SIZE(fpdisable) 269 270/* 271 * Initialize the fpu hardware. 272 */ 273 274 ENTRY_NP(fpinit) 275 CLTS 276 cmpl $FP_XSAVE, fp_save_mech 277 je 1f 278 279 /* fxsave */ 280 leaq sse_initial(%rip), %rax 281 fxrstorq (%rax) /* load clean initial state */ 282 ret 283 2841: /* xsave */ 285 leaq avx_initial(%rip), %rcx 286 xorl %edx, %edx 287 movl $XFEATURE_AVX, %eax 288 btl $X86FSET_AVX, x86_featureset 289 cmovael %edx, %eax 290 orl $(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax 291 xrstor (%rcx) 292 ret 293 SET_SIZE(fpinit) 294 295/* 296 * Clears FPU exception state. 297 * Returns the FP status word. 298 */ 299 300 ENTRY_NP(fperr_reset) 301 CLTS 302 xorl %eax, %eax 303 fnstsw %ax 304 fnclex 305 ret 306 SET_SIZE(fperr_reset) 307 308 ENTRY_NP(fpxerr_reset) 309 pushq %rbp 310 movq %rsp, %rbp 311 subq $0x10, %rsp /* make some temporary space */ 312 CLTS 313 stmxcsr (%rsp) 314 movl (%rsp), %eax 315 andl $_BITNOT(SSE_MXCSR_EFLAGS), (%rsp) 316 ldmxcsr (%rsp) /* clear processor exceptions */ 317 leave 318 ret 319 SET_SIZE(fpxerr_reset) 320 321 ENTRY_NP(fpgetcwsw) 322 pushq %rbp 323 movq %rsp, %rbp 324 subq $0x10, %rsp /* make some temporary space */ 325 CLTS 326 fnstsw (%rsp) /* store the status word */ 327 fnstcw 2(%rsp) /* store the control word */ 328 movl (%rsp), %eax /* put both in %eax */ 329 leave 330 ret 331 SET_SIZE(fpgetcwsw) 332 333/* 334 * Returns the MXCSR register. 335 */ 336 337 ENTRY_NP(fpgetmxcsr) 338 pushq %rbp 339 movq %rsp, %rbp 340 subq $0x10, %rsp /* make some temporary space */ 341 CLTS 342 stmxcsr (%rsp) 343 movl (%rsp), %eax 344 leave 345 ret 346 SET_SIZE(fpgetmxcsr) 347 348