xref: /titanic_51/usr/src/uts/i86pc/os/fpu_subr.c (revision 7af88ac71631ebf259c6c4c22a9f649ddff3e270)
1ae115bc7Smrj /*
2ae115bc7Smrj  * CDDL HEADER START
3ae115bc7Smrj  *
4ae115bc7Smrj  * The contents of this file are subject to the terms of the
5ae115bc7Smrj  * Common Development and Distribution License (the "License").
6ae115bc7Smrj  * You may not use this file except in compliance with the License.
7ae115bc7Smrj  *
8ae115bc7Smrj  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9ae115bc7Smrj  * or http://www.opensolaris.org/os/licensing.
10ae115bc7Smrj  * See the License for the specific language governing permissions
11ae115bc7Smrj  * and limitations under the License.
12ae115bc7Smrj  *
13ae115bc7Smrj  * When distributing Covered Code, include this CDDL HEADER in each
14ae115bc7Smrj  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15ae115bc7Smrj  * If applicable, add the following below this CDDL HEADER, with the
16ae115bc7Smrj  * fields enclosed by brackets "[]" replaced with your own identifying
17ae115bc7Smrj  * information: Portions Copyright [yyyy] [name of copyright owner]
18ae115bc7Smrj  *
19ae115bc7Smrj  * CDDL HEADER END
20ae115bc7Smrj  */
21ae115bc7Smrj 
22ae115bc7Smrj /*
237417cfdeSKuriakose Kuruvilla  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24ae115bc7Smrj  */
25ae115bc7Smrj 
26ae115bc7Smrj /*
27ae115bc7Smrj  * Floating point configuration.
28ae115bc7Smrj  */
29ae115bc7Smrj 
30ae115bc7Smrj #include <sys/types.h>
31ae115bc7Smrj #include <sys/regset.h>
32ae115bc7Smrj #include <sys/privregs.h>
33ae115bc7Smrj #include <sys/x86_archext.h>
34ae115bc7Smrj #include <sys/archsystm.h>
35ae115bc7Smrj #include <sys/fp.h>
36ae115bc7Smrj #include <sys/cmn_err.h>
37ae115bc7Smrj 
38ae115bc7Smrj #define	XMM_ALIGN	16
39ae115bc7Smrj 
40ae115bc7Smrj /*
41ae115bc7Smrj  * If fpu_exists is non-zero, fpu_probe will attempt to use any
42ae115bc7Smrj  * hardware FPU (subject to other constraints, see below).  If
43ae115bc7Smrj  * fpu_exists is zero, fpu_probe will report that there is no
44ae115bc7Smrj  * FPU even if there is one.
45ae115bc7Smrj  */
46ae115bc7Smrj int fpu_exists = 1;
47ae115bc7Smrj 
48ae115bc7Smrj int fp_kind = FP_387;
49ae115bc7Smrj 
50ae115bc7Smrj /*
51*7af88ac7SKuriakose Kuruvilla  * Mechanism to save FPU state.
52*7af88ac7SKuriakose Kuruvilla  */
53*7af88ac7SKuriakose Kuruvilla #if defined(__amd64)
54*7af88ac7SKuriakose Kuruvilla int fp_save_mech = FP_FXSAVE;
55*7af88ac7SKuriakose Kuruvilla #elif defined(__i386)
56*7af88ac7SKuriakose Kuruvilla int fp_save_mech = FP_FNSAVE;
57*7af88ac7SKuriakose Kuruvilla #endif
58*7af88ac7SKuriakose Kuruvilla 
59*7af88ac7SKuriakose Kuruvilla /*
60ae115bc7Smrj  * The variable fpu_ignored is provided to allow other code to
61ae115bc7Smrj  * determine whether emulation is being done because there is
62ae115bc7Smrj  * no FPU or because of an override requested via /etc/system.
63ae115bc7Smrj  */
64ae115bc7Smrj int fpu_ignored = 0;
65ae115bc7Smrj 
66ae115bc7Smrj /*
67ae115bc7Smrj  * Used by ppcopy and ppzero to determine whether or not to use the
68ae115bc7Smrj  * SSE-based pagecopy and pagezero routines
69ae115bc7Smrj  */
70ae115bc7Smrj int use_sse_pagecopy = 0;
71ae115bc7Smrj int use_sse_pagezero = 0;
72ae115bc7Smrj int use_sse_copy = 0;
73ae115bc7Smrj 
74ae115bc7Smrj #if defined(__i386)
75ae115bc7Smrj 
76ae115bc7Smrj /*
77ae115bc7Smrj  * The variable fpu_pentium_fdivbug is provided to allow other code to
78ae115bc7Smrj  * determine whether the system contains a Pentium with the FDIV problem.
79ae115bc7Smrj  */
80ae115bc7Smrj int fpu_pentium_fdivbug = 0;
81ae115bc7Smrj 
82ae115bc7Smrj #endif
83ae115bc7Smrj 
84843e1988Sjohnlev #if defined(__xpv)
85843e1988Sjohnlev 
86843e1988Sjohnlev /*
87843e1988Sjohnlev  * Use of SSE or otherwise is forcibly configured for us by the hypervisor.
88843e1988Sjohnlev  */
89843e1988Sjohnlev 
90843e1988Sjohnlev #define	ENABLE_SSE()
91843e1988Sjohnlev #define	DISABLE_SSE()
92843e1988Sjohnlev 
93843e1988Sjohnlev #else	/* __xpv */
94ae115bc7Smrj 
95ae115bc7Smrj #define	ENABLE_SSE()	setcr4(CR4_ENABLE_SSE_FLAGS(getcr4()))
96ae115bc7Smrj #define	DISABLE_SSE()	setcr4(CR4_DISABLE_SSE_FLAGS(getcr4()))
97ae115bc7Smrj 
98843e1988Sjohnlev #endif	/* __xpv */
99843e1988Sjohnlev 
100ae115bc7Smrj /*
101ae115bc7Smrj  * Try and figure out what kind of FP capabilities we have, and
102ae115bc7Smrj  * set up the control registers accordingly.
103ae115bc7Smrj  */
104ae115bc7Smrj void
105ae115bc7Smrj fpu_probe(void)
106ae115bc7Smrj {
107ae115bc7Smrj 	do {
108ae115bc7Smrj 		if (fpu_initial_probe() != 0)
109ae115bc7Smrj 			continue;
110ae115bc7Smrj 
111ae115bc7Smrj 		if (fpu_exists == 0) {
112ae115bc7Smrj 			fpu_ignored = 1;
113ae115bc7Smrj 			continue;
114ae115bc7Smrj 		}
115ae115bc7Smrj 
116ae115bc7Smrj #if defined(__i386)
117ae115bc7Smrj 		fpu_pentium_fdivbug = fpu_probe_pentium_fdivbug();
118ae115bc7Smrj 		/*
119ae115bc7Smrj 		 * The test does some real floating point operations.
120ae115bc7Smrj 		 * Reset it back to previous state.
121ae115bc7Smrj 		 */
122ae115bc7Smrj 		(void) fpu_initial_probe();
123ae115bc7Smrj 
124ae115bc7Smrj 		if (fpu_pentium_fdivbug != 0) {
125ae115bc7Smrj 			fpu_ignored = 1;
126ae115bc7Smrj 			continue;
127ae115bc7Smrj 		}
128ae115bc7Smrj #endif
129ae115bc7Smrj 
130843e1988Sjohnlev #ifndef __xpv
131ae115bc7Smrj 		/*
132ae115bc7Smrj 		 * Check and see if the fpu is present by looking
133ae115bc7Smrj 		 * at the "extension type" bit.  (While this used to
134ae115bc7Smrj 		 * indicate a 387DX coprocessor in days gone by,
135ae115bc7Smrj 		 * it's forced on by modern implementations for
136ae115bc7Smrj 		 * compatibility.)
137ae115bc7Smrj 		 */
138ae115bc7Smrj 		if ((getcr0() & CR0_ET) == 0)
139ae115bc7Smrj 			continue;
140843e1988Sjohnlev #endif
141ae115bc7Smrj 
142ae115bc7Smrj #if defined(__amd64)
143ae115bc7Smrj 		/*
144ae115bc7Smrj 		 * SSE and SSE2 are required for the 64-bit ABI.
145ae115bc7Smrj 		 *
146ae115bc7Smrj 		 * If they're not present, we can in principal run
147ae115bc7Smrj 		 * 32-bit userland, though 64-bit processes will be hosed.
148ae115bc7Smrj 		 *
149ae115bc7Smrj 		 * (Perhaps we should complain more about this case!)
150ae115bc7Smrj 		 */
1517417cfdeSKuriakose Kuruvilla 		if (is_x86_feature(x86_featureset, X86FSET_SSE) &&
1527417cfdeSKuriakose Kuruvilla 		    is_x86_feature(x86_featureset, X86FSET_SSE2)) {
153*7af88ac7SKuriakose Kuruvilla 			fp_kind |= __FP_SSE;
154ae115bc7Smrj 			ENABLE_SSE();
155*7af88ac7SKuriakose Kuruvilla 
156*7af88ac7SKuriakose Kuruvilla 			if (is_x86_feature(x86_featureset, X86FSET_AVX)) {
157*7af88ac7SKuriakose Kuruvilla 				ASSERT(is_x86_feature(x86_featureset,
158*7af88ac7SKuriakose Kuruvilla 				    X86FSET_XSAVE));
159*7af88ac7SKuriakose Kuruvilla 				fp_kind |= __FP_AVX;
160*7af88ac7SKuriakose Kuruvilla 			}
161*7af88ac7SKuriakose Kuruvilla 
162*7af88ac7SKuriakose Kuruvilla 			if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
163*7af88ac7SKuriakose Kuruvilla 				fp_save_mech = FP_XSAVE;
164*7af88ac7SKuriakose Kuruvilla 				fpsave_ctxt = xsave_ctxt;
165*7af88ac7SKuriakose Kuruvilla 				patch_xsave();
166*7af88ac7SKuriakose Kuruvilla 			}
167ae115bc7Smrj 		}
168ae115bc7Smrj #elif defined(__i386)
169ae115bc7Smrj 		/*
170ae115bc7Smrj 		 * SSE and SSE2 are both optional, and we patch kernel
171ae115bc7Smrj 		 * code to exploit it when present.
172ae115bc7Smrj 		 */
1737417cfdeSKuriakose Kuruvilla 		if (is_x86_feature(x86_featureset, X86FSET_SSE)) {
174*7af88ac7SKuriakose Kuruvilla 			fp_kind |= __FP_SSE;
175ae115bc7Smrj 			ENABLE_SSE();
176*7af88ac7SKuriakose Kuruvilla 			fp_save_mech = FP_FXSAVE;
177*7af88ac7SKuriakose Kuruvilla 			fpsave_ctxt = fpxsave_ctxt;
178*7af88ac7SKuriakose Kuruvilla 
179*7af88ac7SKuriakose Kuruvilla 			if (is_x86_feature(x86_featureset, X86FSET_SSE2)) {
180*7af88ac7SKuriakose Kuruvilla 				patch_sse2();
181*7af88ac7SKuriakose Kuruvilla 			}
182*7af88ac7SKuriakose Kuruvilla 
183*7af88ac7SKuriakose Kuruvilla 			if (is_x86_feature(x86_featureset, X86FSET_AVX)) {
184*7af88ac7SKuriakose Kuruvilla 				ASSERT(is_x86_feature(x86_featureset,
185*7af88ac7SKuriakose Kuruvilla 				    X86FSET_XSAVE));
186*7af88ac7SKuriakose Kuruvilla 				fp_kind |= __FP_AVX;
187*7af88ac7SKuriakose Kuruvilla 			}
188*7af88ac7SKuriakose Kuruvilla 
189*7af88ac7SKuriakose Kuruvilla 			if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
190*7af88ac7SKuriakose Kuruvilla 				fp_save_mech = FP_XSAVE;
191*7af88ac7SKuriakose Kuruvilla 				fpsave_ctxt = xsave_ctxt;
192*7af88ac7SKuriakose Kuruvilla 				patch_xsave();
193*7af88ac7SKuriakose Kuruvilla 			} else {
194*7af88ac7SKuriakose Kuruvilla 				patch_sse();	/* use fxrstor */
195*7af88ac7SKuriakose Kuruvilla 			}
196ae115bc7Smrj 		} else {
1977417cfdeSKuriakose Kuruvilla 			remove_x86_feature(x86_featureset, X86FSET_SSE2);
198ae115bc7Smrj 			/*
199*7af88ac7SKuriakose Kuruvilla 			 * We will not likely to have a chip with AVX but not
200*7af88ac7SKuriakose Kuruvilla 			 * SSE. But to be safe we disable AVX if SSE is not
201*7af88ac7SKuriakose Kuruvilla 			 * enabled.
202*7af88ac7SKuriakose Kuruvilla 			 */
203*7af88ac7SKuriakose Kuruvilla 			remove_x86_feature(x86_featureset, X86FSET_AVX);
204*7af88ac7SKuriakose Kuruvilla 			/*
205ae115bc7Smrj 			 * (Just in case the BIOS decided we wanted SSE
206ae115bc7Smrj 			 * enabled when we didn't. See 4965674.)
207ae115bc7Smrj 			 */
208ae115bc7Smrj 			DISABLE_SSE();
209ae115bc7Smrj 		}
210ae115bc7Smrj #endif
2117417cfdeSKuriakose Kuruvilla 		if (is_x86_feature(x86_featureset, X86FSET_SSE2)) {
212ae115bc7Smrj 			use_sse_pagecopy = use_sse_pagezero = use_sse_copy = 1;
213ae115bc7Smrj 		}
214ae115bc7Smrj 
215*7af88ac7SKuriakose Kuruvilla 		if (fp_kind & __FP_SSE) {
216ae115bc7Smrj 			struct fxsave_state *fx;
217ae115bc7Smrj 			uint8_t fxsave_state[sizeof (struct fxsave_state) +
218ae115bc7Smrj 			    XMM_ALIGN];
219ae115bc7Smrj 
220ae115bc7Smrj 			/*
221ae115bc7Smrj 			 * Extract the mxcsr mask from our first fxsave
222ae115bc7Smrj 			 */
223ae115bc7Smrj 			fx = (void *)(((uintptr_t)(&fxsave_state[0]) +
224ae115bc7Smrj 			    XMM_ALIGN) & ~(XMM_ALIGN - 1ul));
225ae115bc7Smrj 
226ae115bc7Smrj 			fx->fx_mxcsr_mask = 0;
227ae115bc7Smrj 			fxsave_insn(fx);
228ae115bc7Smrj 			if (fx->fx_mxcsr_mask != 0) {
229ae115bc7Smrj 				/*
230ae115bc7Smrj 				 * Override default mask initialized in fpu.c
231ae115bc7Smrj 				 */
232ae115bc7Smrj 				sse_mxcsr_mask = fx->fx_mxcsr_mask;
233ae115bc7Smrj 			}
234ae115bc7Smrj 		}
235ae115bc7Smrj 
236ae115bc7Smrj 		setcr0(CR0_ENABLE_FPU_FLAGS(getcr0()));
237ae115bc7Smrj 		return;
238ae115bc7Smrj 		/*CONSTANTCONDITION*/
239ae115bc7Smrj 	} while (0);
240ae115bc7Smrj 
241ae115bc7Smrj 	/*
242ae115bc7Smrj 	 * No FPU hardware present
243ae115bc7Smrj 	 */
244ae115bc7Smrj 	setcr0(CR0_DISABLE_FPU_FLAGS(getcr0()));
245ae115bc7Smrj 	DISABLE_SSE();
246ae115bc7Smrj 	fp_kind = FP_NO;
247ae115bc7Smrj 	fpu_exists = 0;
248ae115bc7Smrj }
249