1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ 28/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ 29/* All Rights Reserved */ 30 31/* Copyright (c) 1987, 1988 Microsoft Corporation */ 32/* All Rights Reserved */ 33 34#pragma ident "%Z%%M% %I% %E% SMI" 35 36#include <sys/asm_linkage.h> 37#include <sys/asm_misc.h> 38#include <sys/regset.h> 39#include <sys/privregs.h> 40#include <sys/x86_archext.h> 41 42#if defined(__lint) 43#include <sys/types.h> 44#include <sys/fp.h> 45#else 46#include "assym.h" 47#endif 48 49#if defined(__lint) 50 51uint_t 52fpu_initial_probe(void) 53{ return (0); } 54 55#else /* __lint */ 56 57 /* 58 * Returns zero if x87 "chip" is present(!) 59 */ 60 ENTRY_NP(fpu_initial_probe) 61 CLTS 62 fninit 63 fnstsw %ax 64 movzbl %al, %eax 65 ret 66 SET_SIZE(fpu_initial_probe) 67 68#endif /* __lint */ 69 70#if defined(__lint) 71 72/*ARGSUSED*/ 73void 74fxsave_insn(struct fxsave_state *fx) 75{} 76 77#else /* __lint */ 78 79#if defined(__amd64) 80 81 ENTRY_NP(fxsave_insn) 82 FXSAVEQ ((%rdi)) 83 ret 84 SET_SIZE(fxsave_insn) 85 86#elif defined(__i386) 87 88 ENTRY_NP(fxsave_insn) 89 movl 4(%esp), %eax 90 fxsave (%eax) 91 ret 92 SET_SIZE(fxsave_insn) 93 94#endif 95 96#endif /* __lint */ 97 98#if defined(__i386) 99 100/* 101 * If (num1/num2 > num1/num3) the FPU has the FDIV bug. 102 */ 103 104#if defined(__lint) 105 106int 107fpu_probe_pentium_fdivbug(void) 108{ return (0); } 109 110#else /* __lint */ 111 112 ENTRY_NP(fpu_probe_pentium_fdivbug) 113 fldl .num1 114 fldl .num2 115 fdivr %st(1), %st 116 fxch %st(1) 117 fdivl .num3 118 fcompp 119 fstsw %ax 120 sahf 121 jae 0f 122 movl $1, %eax 123 ret 124 1250: xorl %eax, %eax 126 ret 127 128 .align 4 129.num1: .4byte 0xbce4217d /* 4.999999 */ 130 .4byte 0x4013ffff 131.num2: .4byte 0x0 /* 15.0 */ 132 .4byte 0x402e0000 133.num3: .4byte 0xde7210bf /* 14.999999 */ 134 .4byte 0x402dffff 135 SET_SIZE(fpu_probe_pentium_fdivbug) 136 137#endif /* __lint */ 138 139/* 140 * To cope with processors that do not implement fxsave/fxrstor 141 * instructions, patch hot paths in the kernel to use them only 142 * when that feature has been detected. 143 */ 144 145#if defined(__lint) 146 147void 148patch_sse(void) 149{} 150 151void 152patch_sse2(void) 153{} 154 155#else /* __lint */ 156 157 ENTRY_NP(patch_sse) 158 _HOT_PATCH_PROLOG 159 / 160 / frstor (%ebx); nop -> fxrstor (%ebx) 161 / 162 _HOT_PATCH(_fxrstor_ebx_insn, _patch_fxrstor_ebx, 3) 163 / 164 / lock; xorl $0, (%esp) -> sfence; ret 165 / 166 _HOT_PATCH(_sfence_ret_insn, _patch_sfence_ret, 4) 167 _HOT_PATCH_EPILOG 168 ret 169_fxrstor_ebx_insn: / see ndptrap_frstor() 170 fxrstor (%ebx) 171_ldmxcsr_ebx_insn: / see resume_from_zombie() 172 ldmxcsr (%ebx) 173_sfence_ret_insn: / see membar_producer() 174 .byte 0xf, 0xae, 0xf8 / [sfence instruction] 175 ret 176 SET_SIZE(patch_sse) 177 178 ENTRY_NP(patch_sse2) 179 _HOT_PATCH_PROLOG 180 / 181 / lock; xorl $0, (%esp) -> lfence; ret 182 / 183 _HOT_PATCH(_lfence_ret_insn, _patch_lfence_ret, 4) 184 _HOT_PATCH_EPILOG 185 ret 186_lfence_ret_insn: / see membar_consumer() 187 .byte 0xf, 0xae, 0xe8 / [lfence instruction] 188 ret 189 SET_SIZE(patch_sse2) 190 191#endif /* __lint */ 192#endif /* __i386 */ 193 194 195/* 196 * One of these routines is called from any lwp with floating 197 * point context as part of the prolog of a context switch. 198 */ 199 200#if defined(__lint) 201 202/*ARGSUSED*/ 203void 204fpxsave_ctxt(void *arg) 205{} 206 207/*ARGSUSED*/ 208void 209fpnsave_ctxt(void *arg) 210{} 211 212#else /* __lint */ 213 214#if defined(__amd64) 215 216 ENTRY_NP(fpxsave_ctxt) 217 cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) 218 jne 1f 219 220 movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) 221 FXSAVEQ (FPU_CTX_FPU_REGS(%rdi)) 222 223 /* 224 * On certain AMD processors, the "exception pointers" i.e. the last 225 * instruction pointer, last data pointer, and last opcode 226 * are saved by the fxsave instruction ONLY if the exception summary 227 * bit is set. 228 * 229 * To ensure that we don't leak these values into the next context 230 * on the cpu, we could just issue an fninit here, but that's 231 * rather slow and so we issue an instruction sequence that 232 * clears them more quickly, if a little obscurely. 233 */ 234 btw $7, FXSAVE_STATE_FSW(%rdi) /* Test saved ES bit */ 235 jnc 0f /* jump if ES = 0 */ 236 fnclex /* clear pending x87 exceptions */ 2370: ffree %st(7) /* clear tag bit to remove possible stack overflow */ 238 fildl .fpzero_const(%rip) 239 /* dummy load changes all exception pointers */ 240 STTS(%rsi) /* trap on next fpu touch */ 2411: rep; ret /* use 2 byte return instruction when branch target */ 242 /* AMD Software Optimization Guide - Section 6.2 */ 243 SET_SIZE(fpxsave_ctxt) 244 245#elif defined(__i386) 246 247 ENTRY_NP(fpnsave_ctxt) 248 movl 4(%esp), %eax /* a struct fpu_ctx */ 249 cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%eax) 250 jne 1f 251 252 movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax) 253 fnsave FPU_CTX_FPU_REGS(%eax) 254 /* (fnsave also reinitializes x87 state) */ 255 STTS(%edx) /* trap on next fpu touch */ 2561: rep; ret /* use 2 byte return instruction when branch target */ 257 /* AMD Software Optimization Guide - Section 6.2 */ 258 SET_SIZE(fpnsave_ctxt) 259 260 ENTRY_NP(fpxsave_ctxt) 261 movl 4(%esp), %eax /* a struct fpu_ctx */ 262 cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%eax) 263 jne 1f 264 265 movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax) 266 fxsave FPU_CTX_FPU_REGS(%eax) 267 /* (see notes above about "exception pointers") */ 268 btw $7, FXSAVE_STATE_FSW(%eax) /* Test saved ES bit */ 269 jnc 0f /* jump if ES = 0 */ 270 fnclex /* clear pending x87 exceptions */ 2710: ffree %st(7) /* clear tag bit to remove possible stack overflow */ 272 fildl .fpzero_const 273 /* dummy load changes all exception pointers */ 274 STTS(%edx) /* trap on next fpu touch */ 2751: rep; ret /* use 2 byte return instruction when branch target */ 276 /* AMD Software Optimization Guide - Section 6.2 */ 277 SET_SIZE(fpxsave_ctxt) 278 279#endif /* __i386 */ 280 281 .align 8 282.fpzero_const: 283 .4byte 0x0 284 .4byte 0x0 285 286#endif /* __lint */ 287 288 289#if defined(__lint) 290 291/*ARGSUSED*/ 292void 293fpsave(struct fnsave_state *f) 294{} 295 296/*ARGSUSED*/ 297void 298fpxsave(struct fxsave_state *f) 299{} 300 301#else /* __lint */ 302 303#if defined(__amd64) 304 305 ENTRY_NP(fpxsave) 306 CLTS 307 FXSAVEQ ((%rdi)) 308 fninit /* clear exceptions, init x87 tags */ 309 STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ 310 ret 311 SET_SIZE(fpxsave) 312 313#elif defined(__i386) 314 315 ENTRY_NP(fpsave) 316 CLTS 317 movl 4(%esp), %eax 318 fnsave (%eax) 319 STTS(%eax) /* set TS bit in %cr0 (disable FPU) */ 320 ret 321 SET_SIZE(fpsave) 322 323 ENTRY_NP(fpxsave) 324 CLTS 325 movl 4(%esp), %eax 326 fxsave (%eax) 327 fninit /* clear exceptions, init x87 tags */ 328 STTS(%eax) /* set TS bit in %cr0 (disable FPU) */ 329 ret 330 SET_SIZE(fpxsave) 331 332#endif /* __i386 */ 333#endif /* __lint */ 334 335#if defined(__lint) 336 337/*ARGSUSED*/ 338void 339fprestore(struct fnsave_state *f) 340{} 341 342/*ARGSUSED*/ 343void 344fpxrestore(struct fxsave_state *f) 345{} 346 347#else /* __lint */ 348 349#if defined(__amd64) 350 351 ENTRY_NP(fpxrestore) 352 CLTS 353 FXRSTORQ ((%rdi)) 354 ret 355 SET_SIZE(fpxrestore) 356 357#elif defined(__i386) 358 359 ENTRY_NP(fprestore) 360 CLTS 361 movl 4(%esp), %eax 362 frstor (%eax) 363 ret 364 SET_SIZE(fprestore) 365 366 ENTRY_NP(fpxrestore) 367 CLTS 368 movl 4(%esp), %eax 369 fxrstor (%eax) 370 ret 371 SET_SIZE(fpxrestore) 372 373#endif /* __i386 */ 374#endif /* __lint */ 375 376/* 377 * Disable the floating point unit. 378 */ 379 380#if defined(__lint) 381 382void 383fpdisable(void) 384{} 385 386#else /* __lint */ 387 388#if defined(__amd64) 389 390 ENTRY_NP(fpdisable) 391 STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ 392 ret 393 SET_SIZE(fpdisable) 394 395#elif defined(__i386) 396 397 ENTRY_NP(fpdisable) 398 STTS(%eax) 399 ret 400 SET_SIZE(fpdisable) 401 402#endif /* __i386 */ 403#endif /* __lint */ 404 405/* 406 * Initialize the fpu hardware. 407 */ 408 409#if defined(__lint) 410 411void 412fpinit(void) 413{} 414 415#else /* __lint */ 416 417#if defined(__amd64) 418 419 ENTRY_NP(fpinit) 420 CLTS 421 leaq sse_initial(%rip), %rax 422 FXRSTORQ ((%rax)) /* load clean initial state */ 423 ret 424 SET_SIZE(fpinit) 425 426#elif defined(__i386) 427 428 ENTRY_NP(fpinit) 429 CLTS 430 cmpl $__FP_SSE, fp_kind 431 je 1f 432 433 fninit 434 movl $x87_initial, %eax 435 frstor (%eax) /* load clean initial state */ 436 ret 4371: 438 movl $sse_initial, %eax 439 fxrstor (%eax) /* load clean initial state */ 440 ret 441 SET_SIZE(fpinit) 442 443#endif /* __i386 */ 444#endif /* __lint */ 445 446/* 447 * Clears FPU exception state. 448 * Returns the FP status word. 449 */ 450 451#if defined(__lint) 452 453uint32_t 454fperr_reset(void) 455{ return (0); } 456 457uint32_t 458fpxerr_reset(void) 459{ return (0); } 460 461#else /* __lint */ 462 463#if defined(__amd64) 464 465 ENTRY_NP(fperr_reset) 466 CLTS 467 xorl %eax, %eax 468 fnstsw %ax 469 fnclex 470 ret 471 SET_SIZE(fperr_reset) 472 473 ENTRY_NP(fpxerr_reset) 474 pushq %rbp 475 movq %rsp, %rbp 476 subq $0x10, %rsp /* make some temporary space */ 477 CLTS 478 stmxcsr (%rsp) 479 movl (%rsp), %eax 480 andl $_BITNOT(SSE_MXCSR_EFLAGS), (%rsp) 481 ldmxcsr (%rsp) /* clear processor exceptions */ 482 leave 483 ret 484 SET_SIZE(fpxerr_reset) 485 486#elif defined(__i386) 487 488 ENTRY_NP(fperr_reset) 489 CLTS 490 xorl %eax, %eax 491 fnstsw %ax 492 fnclex 493 ret 494 SET_SIZE(fperr_reset) 495 496 ENTRY_NP(fpxerr_reset) 497 CLTS 498 subl $4, %esp /* make some temporary space */ 499 stmxcsr (%esp) 500 movl (%esp), %eax 501 andl $_BITNOT(SSE_MXCSR_EFLAGS), (%esp) 502 ldmxcsr (%esp) /* clear processor exceptions */ 503 addl $4, %esp 504 ret 505 SET_SIZE(fpxerr_reset) 506 507#endif /* __i386 */ 508#endif /* __lint */ 509 510#if defined(__lint) 511 512uint32_t 513fpgetcwsw(void) 514{ 515 return (0); 516} 517 518#else /* __lint */ 519 520#if defined(__amd64) 521 522 ENTRY_NP(fpgetcwsw) 523 pushq %rbp 524 movq %rsp, %rbp 525 subq $0x10, %rsp /* make some temporary space */ 526 CLTS 527 fnstsw (%rsp) /* store the status word */ 528 fnstcw 2(%rsp) /* store the control word */ 529 movl (%rsp), %eax /* put both in %eax */ 530 leave 531 ret 532 SET_SIZE(fpgetcwsw) 533 534#elif defined(__i386) 535 536 ENTRY_NP(fpgetcwsw) 537 CLTS 538 subl $4, %esp /* make some temporary space */ 539 fnstsw (%esp) /* store the status word */ 540 fnstcw 2(%esp) /* store the control word */ 541 movl (%esp), %eax /* put both in %eax */ 542 addl $4, %esp 543 ret 544 SET_SIZE(fpgetcwsw) 545 546#endif /* __i386 */ 547#endif /* __lint */ 548 549/* 550 * Returns the MXCSR register. 551 */ 552 553#if defined(__lint) 554 555uint32_t 556fpgetmxcsr(void) 557{ 558 return (0); 559} 560 561#else /* __lint */ 562 563#if defined(__amd64) 564 565 ENTRY_NP(fpgetmxcsr) 566 pushq %rbp 567 movq %rsp, %rbp 568 subq $0x10, %rsp /* make some temporary space */ 569 CLTS 570 stmxcsr (%rsp) 571 movl (%rsp), %eax 572 leave 573 ret 574 SET_SIZE(fpgetmxcsr) 575 576#elif defined(__i386) 577 578 ENTRY_NP(fpgetmxcsr) 579 CLTS 580 subl $4, %esp /* make some temporary space */ 581 stmxcsr (%esp) 582 movl (%esp), %eax 583 addl $4, %esp 584 ret 585 SET_SIZE(fpgetmxcsr) 586 587#endif /* __i386 */ 588#endif /* __lint */ 589