1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ 28/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ 29/* All Rights Reserved */ 30 31/* Copyright (c) 1987, 1988 Microsoft Corporation */ 32/* All Rights Reserved */ 33 34#pragma ident "%Z%%M% %I% %E% SMI" 35 36#include <sys/asm_linkage.h> 37#include <sys/asm_misc.h> 38#include <sys/regset.h> 39#include <sys/privregs.h> 40#include <sys/x86_archext.h> 41 42#if defined(__lint) 43#include <sys/types.h> 44#include <sys/fp.h> 45#else 46#include "assym.h" 47#endif 48 49#if defined(__lint) 50 51uint_t 52fpu_initial_probe(void) 53{ return (0); } 54 55#else /* __lint */ 56 57 /* 58 * Returns zero if x87 "chip" is present(!) 59 */ 60 ENTRY_NP(fpu_initial_probe) 61 CLTS 62 fninit 63 fnstsw %ax 64 movzbl %al, %eax 65 ret 66 SET_SIZE(fpu_initial_probe) 67 68#endif /* __lint */ 69 70#if defined(__lint) 71 72/*ARGSUSED*/ 73void 74fxsave_insn(struct fxsave_state *fx) 75{} 76 77#else /* __lint */ 78 79#if defined(__amd64) 80 81 ENTRY_NP(fxsave_insn) 82 fxsave (%rdi) 83 ret 84 SET_SIZE(fxsave_insn) 85 86#elif defined(__i386) 87 88 ENTRY_NP(fxsave_insn) 89 movl 4(%esp), %eax 90 fxsave (%eax) 91 ret 92 SET_SIZE(fxsave_insn) 93 94#endif 95 96#endif /* __lint */ 97 98#if defined(__i386) 99 100/* 101 * If (num1/num2 > num1/num3) the FPU has the FDIV bug. 102 */ 103 104#if defined(__lint) 105 106int 107fpu_probe_pentium_fdivbug(void) 108{ return (0); } 109 110#else /* __lint */ 111 112 ENTRY_NP(fpu_probe_pentium_fdivbug) 113 fldl .num1 114 fldl .num2 115 fdivr %st(1), %st 116 fxch %st(1) 117 fdivl .num3 118 fcompp 119 fstsw %ax 120 sahf 121 jae 0f 122 movl $1, %eax 123 ret 124 1250: xorl %eax, %eax 126 ret 127 128 .align 4 129.num1: .4byte 0xbce4217d /* 4.999999 */ 130 .4byte 0x4013ffff 131.num2: .4byte 0x0 /* 15.0 */ 132 .4byte 0x402e0000 133.num3: .4byte 0xde7210bf /* 14.999999 */ 134 .4byte 0x402dffff 135 SET_SIZE(fpu_probe_pentium_fdivbug) 136 137#endif /* __lint */ 138 139/* 140 * To cope with processors that do not implement fxsave/fxrstor 141 * instructions, patch hot paths in the kernel to use them only 142 * when that feature has been detected. 143 */ 144 145#if defined(__lint) 146 147void 148patch_sse(void) 149{} 150 151void 152patch_sse2(void) 153{} 154 155#else /* __lint */ 156 157 ENTRY_NP(patch_sse) 158 _HOT_PATCH_PROLOG 159 / 160 / frstor (%ebx); nop -> fxrstor (%ebx) 161 / 162 _HOT_PATCH(_fxrstor_ebx_insn, _patch_fxrstor_ebx, 3) 163 / 164 / lock; xorl $0, (%esp) -> sfence; ret 165 / 166 _HOT_PATCH(_sfence_ret_insn, _patch_sfence_ret, 4) 167 _HOT_PATCH_EPILOG 168 ret 169_fxrstor_ebx_insn: / see ndptrap_frstor() 170 fxrstor (%ebx) 171_ldmxcsr_ebx_insn: / see resume_from_zombie() 172 ldmxcsr (%ebx) 173_sfence_ret_insn: / see membar_producer() 174 .byte 0xf, 0xae, 0xf8 / [sfence instruction] 175 ret 176 SET_SIZE(patch_sse) 177 178 ENTRY_NP(patch_sse2) 179 _HOT_PATCH_PROLOG 180 / 181 / lock; xorl $0, (%esp) -> lfence; ret 182 / 183 _HOT_PATCH(_lfence_ret_insn, _patch_lfence_ret, 4) 184 _HOT_PATCH_EPILOG 185 ret 186_lfence_ret_insn: / see membar_consumer() 187 .byte 0xf, 0xae, 0xe8 / [lfence instruction] 188 ret 189 SET_SIZE(patch_sse2) 190 191#endif /* __lint */ 192#endif /* __i386 */ 193 194 195/* 196 * One of these routines is called from any lwp with floating 197 * point context as part of the prolog of a context switch. 198 */ 199 200#if defined(__lint) 201 202/*ARGSUSED*/ 203void 204fpxsave_ctxt(void *arg) 205{} 206 207/*ARGSUSED*/ 208void 209fpnsave_ctxt(void *arg) 210{} 211 212#else /* __lint */ 213 214#if defined(__amd64) 215 216 ENTRY_NP(fpxsave_ctxt) 217 cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) 218 jne 1f 219 220 movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) 221 fxsave FPU_CTX_FPU_REGS(%rdi) 222 /* 223 * On certain AMD processors, the "exception pointers" i.e. the last 224 * instruction pointer, last data pointer, and last opcode 225 * are saved by the fxsave instruction ONLY if the exception summary 226 * bit is set. 227 * 228 * To ensure that we don't leak these values into the next context 229 * on the cpu, we could just issue an fninit here, but that's 230 * rather slow and so we issue an instruction sequence that 231 * clears them more quickly, if a little obscurely. 232 */ 233 btw $7, FXSAVE_STATE_FSW(%rdi) /* Test saved ES bit */ 234 jnc 0f /* jump if ES = 0 */ 235 fnclex /* clear pending x87 exceptions */ 2360: ffree %st(7) /* clear tag bit to remove possible stack overflow */ 237 fildl .fpzero_const(%rip) 238 /* dummy load changes all exception pointers */ 239 STTS(%rsi) /* trap on next fpu touch */ 2401: rep; ret /* use 2 byte return instruction when branch target */ 241 /* AMD Software Optimization Guide - Section 6.2 */ 242 SET_SIZE(fpxsave_ctxt) 243 244#elif defined(__i386) 245 246 ENTRY_NP(fpnsave_ctxt) 247 movl 4(%esp), %eax /* a struct fpu_ctx */ 248 cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%eax) 249 jne 1f 250 251 movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax) 252 fnsave FPU_CTX_FPU_REGS(%eax) 253 /* (fnsave also reinitializes x87 state) */ 254 STTS(%edx) /* trap on next fpu touch */ 2551: rep; ret /* use 2 byte return instruction when branch target */ 256 /* AMD Software Optimization Guide - Section 6.2 */ 257 SET_SIZE(fpnsave_ctxt) 258 259 ENTRY_NP(fpxsave_ctxt) 260 movl 4(%esp), %eax /* a struct fpu_ctx */ 261 cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%eax) 262 jne 1f 263 264 movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax) 265 fxsave FPU_CTX_FPU_REGS(%eax) 266 /* (see notes above about "exception pointers") */ 267 btw $7, FXSAVE_STATE_FSW(%eax) /* Test saved ES bit */ 268 jnc 0f /* jump if ES = 0 */ 269 fnclex /* clear pending x87 exceptions */ 2700: ffree %st(7) /* clear tag bit to remove possible stack overflow */ 271 fildl .fpzero_const 272 /* dummy load changes all exception pointers */ 273 STTS(%edx) /* trap on next fpu touch */ 2741: rep; ret /* use 2 byte return instruction when branch target */ 275 /* AMD Software Optimization Guide - Section 6.2 */ 276 SET_SIZE(fpxsave_ctxt) 277 278#endif /* __i386 */ 279 280 .align 8 281.fpzero_const: 282 .4byte 0x0 283 .4byte 0x0 284 285#endif /* __lint */ 286 287 288#if defined(__lint) 289 290/*ARGSUSED*/ 291void 292fpsave(struct fnsave_state *f) 293{} 294 295/*ARGSUSED*/ 296void 297fpxsave(struct fxsave_state *f) 298{} 299 300#else /* __lint */ 301 302#if defined(__amd64) 303 304 ENTRY_NP(fpxsave) 305 CLTS 306 fxsave (%rdi) 307 fninit /* clear exceptions, init x87 tags */ 308 STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ 309 ret 310 SET_SIZE(fpxsave) 311 312#elif defined(__i386) 313 314 ENTRY_NP(fpsave) 315 CLTS 316 movl 4(%esp), %eax 317 fnsave (%eax) 318 STTS(%eax) /* set TS bit in %cr0 (disable FPU) */ 319 ret 320 SET_SIZE(fpsave) 321 322 ENTRY_NP(fpxsave) 323 CLTS 324 movl 4(%esp), %eax 325 fxsave (%eax) 326 fninit /* clear exceptions, init x87 tags */ 327 STTS(%eax) /* set TS bit in %cr0 (disable FPU) */ 328 ret 329 SET_SIZE(fpxsave) 330 331#endif /* __i386 */ 332#endif /* __lint */ 333 334#if defined(__lint) 335 336/*ARGSUSED*/ 337void 338fprestore(struct fnsave_state *f) 339{} 340 341/*ARGSUSED*/ 342void 343fpxrestore(struct fxsave_state *f) 344{} 345 346#else /* __lint */ 347 348#if defined(__amd64) 349 350 ENTRY_NP(fpxrestore) 351 CLTS 352 fxrstor (%rdi) 353 ret 354 SET_SIZE(fpxrestore) 355 356#elif defined(__i386) 357 358 ENTRY_NP(fprestore) 359 CLTS 360 movl 4(%esp), %eax 361 frstor (%eax) 362 ret 363 SET_SIZE(fprestore) 364 365 ENTRY_NP(fpxrestore) 366 CLTS 367 movl 4(%esp), %eax 368 fxrstor (%eax) 369 ret 370 SET_SIZE(fpxrestore) 371 372#endif /* __i386 */ 373#endif /* __lint */ 374 375/* 376 * Disable the floating point unit. 377 */ 378 379#if defined(__lint) 380 381void 382fpdisable(void) 383{} 384 385#else /* __lint */ 386 387#if defined(__amd64) 388 389 ENTRY_NP(fpdisable) 390 STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ 391 ret 392 SET_SIZE(fpdisable) 393 394#elif defined(__i386) 395 396 ENTRY_NP(fpdisable) 397 STTS(%eax) 398 ret 399 SET_SIZE(fpdisable) 400 401#endif /* __i386 */ 402#endif /* __lint */ 403 404/* 405 * Initialize the fpu hardware. 406 */ 407 408#if defined(__lint) 409 410void 411fpinit(void) 412{} 413 414#else /* __lint */ 415 416#if defined(__amd64) 417 418 ENTRY_NP(fpinit) 419 CLTS 420 leaq sse_initial(%rip), %rax 421 fxrstor (%rax) /* load clean initial state */ 422 ret 423 SET_SIZE(fpinit) 424 425#elif defined(__i386) 426 427 ENTRY_NP(fpinit) 428 CLTS 429 cmpl $__FP_SSE, fp_kind 430 je 1f 431 432 fninit 433 movl $x87_initial, %eax 434 frstor (%eax) /* load clean initial state */ 435 ret 4361: 437 movl $sse_initial, %eax 438 fxrstor (%eax) /* load clean initial state */ 439 ret 440 SET_SIZE(fpinit) 441 442#endif /* __i386 */ 443#endif /* __lint */ 444 445/* 446 * Clears FPU exception state. 447 * Returns the FP status word. 448 */ 449 450#if defined(__lint) 451 452uint32_t 453fperr_reset(void) 454{ return (0); } 455 456uint32_t 457fpxerr_reset(void) 458{ return (0); } 459 460#else /* __lint */ 461 462#if defined(__amd64) 463 464 ENTRY_NP(fperr_reset) 465 CLTS 466 xorl %eax, %eax 467 fnstsw %ax 468 fnclex 469 ret 470 SET_SIZE(fperr_reset) 471 472 ENTRY_NP(fpxerr_reset) 473 pushq %rbp 474 movq %rsp, %rbp 475 subq $0x10, %rsp /* make some temporary space */ 476 CLTS 477 stmxcsr (%rsp) 478 movl (%rsp), %eax 479 andl $_BITNOT(SSE_MXCSR_EFLAGS), (%rsp) 480 ldmxcsr (%rsp) /* clear processor exceptions */ 481 leave 482 ret 483 SET_SIZE(fpxerr_reset) 484 485#elif defined(__i386) 486 487 ENTRY_NP(fperr_reset) 488 CLTS 489 xorl %eax, %eax 490 fnstsw %ax 491 fnclex 492 ret 493 SET_SIZE(fperr_reset) 494 495 ENTRY_NP(fpxerr_reset) 496 CLTS 497 subl $4, %esp /* make some temporary space */ 498 stmxcsr (%esp) 499 movl (%esp), %eax 500 andl $_BITNOT(SSE_MXCSR_EFLAGS), (%esp) 501 ldmxcsr (%esp) /* clear processor exceptions */ 502 addl $4, %esp 503 ret 504 SET_SIZE(fpxerr_reset) 505 506#endif /* __i386 */ 507#endif /* __lint */ 508 509#if defined(__lint) 510 511uint32_t 512fpgetcwsw(void) 513{ 514 return (0); 515} 516 517#else /* __lint */ 518 519#if defined(__amd64) 520 521 ENTRY_NP(fpgetcwsw) 522 pushq %rbp 523 movq %rsp, %rbp 524 subq $0x10, %rsp /* make some temporary space */ 525 CLTS 526 fnstsw (%rsp) /* store the status word */ 527 fnstcw 2(%rsp) /* store the control word */ 528 movl (%rsp), %eax /* put both in %eax */ 529 leave 530 ret 531 SET_SIZE(fpgetcwsw) 532 533#elif defined(__i386) 534 535 ENTRY_NP(fpgetcwsw) 536 CLTS 537 subl $4, %esp /* make some temporary space */ 538 fnstsw (%esp) /* store the status word */ 539 fnstcw 2(%esp) /* store the control word */ 540 movl (%esp), %eax /* put both in %eax */ 541 addl $4, %esp 542 ret 543 SET_SIZE(fpgetcwsw) 544 545#endif /* __i386 */ 546#endif /* __lint */ 547 548/* 549 * Returns the MXCSR register. 550 */ 551 552#if defined(__lint) 553 554uint32_t 555fpgetmxcsr(void) 556{ 557 return (0); 558} 559 560#else /* __lint */ 561 562#if defined(__amd64) 563 564 ENTRY_NP(fpgetmxcsr) 565 pushq %rbp 566 movq %rsp, %rbp 567 subq $0x10, %rsp /* make some temporary space */ 568 CLTS 569 stmxcsr (%rsp) 570 movl (%rsp), %eax 571 leave 572 ret 573 SET_SIZE(fpgetmxcsr) 574 575#elif defined(__i386) 576 577 ENTRY_NP(fpgetmxcsr) 578 CLTS 579 subl $4, %esp /* make some temporary space */ 580 stmxcsr (%esp) 581 movl (%esp), %eax 582 addl $4, %esp 583 ret 584 SET_SIZE(fpgetmxcsr) 585 586#endif /* __i386 */ 587#endif /* __lint */ 588