xref: /freebsd/sys/amd64/amd64/initcpu.c (revision 401ab69cff8fa2320a9f8ea4baa114a6da6c952b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) KATO Takenori, 1997, 1998.
5  *
6  * All rights reserved.  Unpublished rights reserved under the copyright
7  * laws of Japan.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer as
15  *    the first lines of this file unmodified.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #include "opt_cpu.h"
34 
35 #include <sys/param.h>
36 #include <sys/kernel.h>
37 #include <sys/pcpu.h>
38 #include <sys/systm.h>
39 #include <sys/sysctl.h>
40 
41 #include <machine/cputypes.h>
42 #include <machine/md_var.h>
43 #include <machine/psl.h>
44 #include <machine/specialreg.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 
49 static int	hw_instruction_sse;
50 SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD,
51     &hw_instruction_sse, 0, "SIMD/MMX2 instructions available in CPU");
52 static int	lower_sharedpage_init;
53 int		hw_lower_amd64_sharedpage;
54 SYSCTL_INT(_hw, OID_AUTO, lower_amd64_sharedpage, CTLFLAG_RDTUN,
55     &hw_lower_amd64_sharedpage, 0,
56    "Lower sharedpage to work around Ryzen issue with executing code near the top of user memory");
57 /*
58  * -1: automatic (default)
59  *  0: keep enable CLFLUSH
60  *  1: force disable CLFLUSH
61  */
62 static int	hw_clflush_disable = -1;
63 
64 static void
65 init_amd(void)
66 {
67 	uint64_t msr;
68 
69 	/*
70 	 * C1E renders the local APIC timer dead, so we disable it by
71 	 * reading the Interrupt Pending Message register and clearing
72 	 * both C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27).
73 	 *
74 	 * Reference:
75 	 *   "BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh Processors"
76 	 *   #32559 revision 3.00+
77 	 *
78 	 * Detect the presence of C1E capability mostly on latest
79 	 * dual-cores (or future) k8 family.  Affected models range is
80 	 * taken from Linux sources.
81 	 */
82 	if ((CPUID_TO_FAMILY(cpu_id) == 0xf ||
83 	    CPUID_TO_FAMILY(cpu_id) == 0x10) && (cpu_feature2 & CPUID2_HV) == 0)
84 		cpu_amdc1e_bug = 1;
85 
86 	/*
87 	 * Work around Erratum 721 for Family 10h and 12h processors.
88 	 * These processors may incorrectly update the stack pointer
89 	 * after a long series of push and/or near-call instructions,
90 	 * or a long series of pop and/or near-return instructions.
91 	 *
92 	 * http://support.amd.com/us/Processor_TechDocs/41322_10h_Rev_Gd.pdf
93 	 * http://support.amd.com/us/Processor_TechDocs/44739_12h_Rev_Gd.pdf
94 	 *
95 	 * Hypervisors do not provide access to the errata MSR,
96 	 * causing #GP exception on attempt to apply the errata.  The
97 	 * MSR write shall be done on host and persist globally
98 	 * anyway, so do not try to do it when under virtualization.
99 	 */
100 	switch (CPUID_TO_FAMILY(cpu_id)) {
101 	case 0x10:
102 	case 0x12:
103 		if ((cpu_feature2 & CPUID2_HV) == 0)
104 			wrmsr(MSR_DE_CFG, rdmsr(MSR_DE_CFG) | 1);
105 		break;
106 	}
107 
108 	/*
109 	 * BIOS may fail to set InitApicIdCpuIdLo to 1 as it should per BKDG.
110 	 * So, do it here or otherwise some tools could be confused by
111 	 * Initial Local APIC ID reported with CPUID Function 1 in EBX.
112 	 */
113 	if (CPUID_TO_FAMILY(cpu_id) == 0x10) {
114 		if ((cpu_feature2 & CPUID2_HV) == 0) {
115 			msr = rdmsr(MSR_NB_CFG1);
116 			msr |= (uint64_t)1 << 54;
117 			wrmsr(MSR_NB_CFG1, msr);
118 		}
119 	}
120 
121 	/*
122 	 * BIOS may configure Family 10h processors to convert WC+ cache type
123 	 * to CD.  That can hurt performance of guest VMs using nested paging.
124 	 * The relevant MSR bit is not documented in the BKDG,
125 	 * the fix is borrowed from Linux.
126 	 */
127 	if (CPUID_TO_FAMILY(cpu_id) == 0x10) {
128 		if ((cpu_feature2 & CPUID2_HV) == 0) {
129 			msr = rdmsr(0xc001102a);
130 			msr &= ~((uint64_t)1 << 24);
131 			wrmsr(0xc001102a, msr);
132 		}
133 	}
134 
135 	/*
136 	 * Work around Erratum 793: Specific Combination of Writes to Write
137 	 * Combined Memory Types and Locked Instructions May Cause Core Hang.
138 	 * See Revision Guide for AMD Family 16h Models 00h-0Fh Processors,
139 	 * revision 3.04 or later, publication 51810.
140 	 */
141 	if (CPUID_TO_FAMILY(cpu_id) == 0x16 && CPUID_TO_MODEL(cpu_id) <= 0xf) {
142 		if ((cpu_feature2 & CPUID2_HV) == 0) {
143 			msr = rdmsr(MSR_LS_CFG);
144 			msr |= (uint64_t)1 << 15;
145 			wrmsr(MSR_LS_CFG, msr);
146 		}
147 	}
148 
149 	/* Ryzen erratas. */
150 	if (CPUID_TO_FAMILY(cpu_id) == 0x17 && CPUID_TO_MODEL(cpu_id) == 0x1 &&
151 	    (cpu_feature2 & CPUID2_HV) == 0) {
152 		/* 1021 */
153 		msr = rdmsr(MSR_DE_CFG);
154 		msr |= 0x2000;
155 		wrmsr(MSR_DE_CFG, msr);
156 
157 		/* 1033 */
158 		msr = rdmsr(MSR_LS_CFG);
159 		msr |= 0x10;
160 		wrmsr(MSR_LS_CFG, msr);
161 
162 		/* 1049 */
163 		msr = rdmsr(0xc0011028);
164 		msr |= 0x10;
165 		wrmsr(0xc0011028, msr);
166 
167 		/* 1095 */
168 		msr = rdmsr(MSR_LS_CFG);
169 		msr |= 0x200000000000000;
170 		wrmsr(MSR_LS_CFG, msr);
171 	}
172 
173 	/*
174 	 * Work around a problem on Ryzen that is triggered by executing
175 	 * code near the top of user memory, in our case the signal
176 	 * trampoline code in the shared page on amd64.
177 	 *
178 	 * This function is executed once for the BSP before tunables take
179 	 * effect so the value determined here can be overridden by the
180 	 * tunable.  This function is then executed again for each AP and
181 	 * also on resume.  Set a flag the first time so that value set by
182 	 * the tunable is not overwritten.
183 	 *
184 	 * The stepping and/or microcode versions should be checked after
185 	 * this issue is fixed by AMD so that we don't use this mode if not
186 	 * needed.
187 	 */
188 	if (lower_sharedpage_init == 0) {
189 		lower_sharedpage_init = 1;
190 		if (CPUID_TO_FAMILY(cpu_id) == 0x17 ||
191 		    CPUID_TO_FAMILY(cpu_id) == 0x18) {
192 			hw_lower_amd64_sharedpage = 1;
193 		}
194 	}
195 }
196 
197 /*
198  * Initialize special VIA features
199  */
200 static void
201 init_via(void)
202 {
203 	u_int regs[4], val;
204 
205 	/*
206 	 * Check extended CPUID for PadLock features.
207 	 *
208 	 * http://www.via.com.tw/en/downloads/whitepapers/initiatives/padlock/programming_guide.pdf
209 	 */
210 	do_cpuid(0xc0000000, regs);
211 	if (regs[0] >= 0xc0000001) {
212 		do_cpuid(0xc0000001, regs);
213 		val = regs[3];
214 	} else
215 		return;
216 
217 	/* Enable RNG if present. */
218 	if ((val & VIA_CPUID_HAS_RNG) != 0) {
219 		via_feature_rng = VIA_HAS_RNG;
220 		wrmsr(0x110B, rdmsr(0x110B) | VIA_CPUID_DO_RNG);
221 	}
222 
223 	/* Enable PadLock if present. */
224 	if ((val & VIA_CPUID_HAS_ACE) != 0)
225 		via_feature_xcrypt |= VIA_HAS_AES;
226 	if ((val & VIA_CPUID_HAS_ACE2) != 0)
227 		via_feature_xcrypt |= VIA_HAS_AESCTR;
228 	if ((val & VIA_CPUID_HAS_PHE) != 0)
229 		via_feature_xcrypt |= VIA_HAS_SHA;
230 	if ((val & VIA_CPUID_HAS_PMM) != 0)
231 		via_feature_xcrypt |= VIA_HAS_MM;
232 	if (via_feature_xcrypt != 0)
233 		wrmsr(0x1107, rdmsr(0x1107) | (1 << 28));
234 }
235 
236 /*
237  * The value for the TSC_AUX MSR and rdtscp/rdpid on the invoking CPU.
238  *
239  * Caller should prevent CPU migration.
240  */
241 u_int
242 cpu_auxmsr(void)
243 {
244 	KASSERT((read_rflags() & PSL_I) == 0, ("context switch possible"));
245 	return (PCPU_GET(cpuid));
246 }
247 
248 void
249 cpu_init_small_core(void)
250 {
251 	u_int r[4];
252 
253 	if (cpu_high < 0x1a)
254 		return;
255 
256 	cpuid_count(0x1a, 0, r);
257 	if ((r[0] & CPUID_HYBRID_CORE_MASK) != CPUID_HYBRID_SMALL_CORE)
258 		return;
259 
260 	PCPU_SET(small_core, 1);
261 	if (pmap_pcid_enabled && invpcid_works &&
262 	    pmap_pcid_invlpg_workaround_uena) {
263 		PCPU_SET(pcid_invlpg_workaround, 1);
264 		pmap_pcid_invlpg_workaround = 1;
265 	}
266 }
267 
268 /*
269  * Initialize CPU control registers
270  */
271 void
272 initializecpu(void)
273 {
274 	uint64_t msr;
275 	uint32_t cr4;
276 
277 	TSENTER();
278 	cr4 = rcr4();
279 	if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
280 		cr4 |= CR4_FXSR | CR4_XMM;
281 		hw_instruction_sse = 1;
282 	}
283 	if (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE)
284 		cr4 |= CR4_FSGSBASE;
285 
286 	if (cpu_stdext_feature2 & CPUID_STDEXT2_PKU)
287 		cr4 |= CR4_PKE;
288 
289 	/*
290 	 * If SMEP is present, we only need to flush RSB (by default)
291 	 * on context switches, to prevent cross-process ret2spec
292 	 * attacks.  Do it automatically if ibrs_disable is set, to
293 	 * complete the mitigation.
294 	 *
295 	 * Postpone enabling the SMEP on the boot CPU until the page
296 	 * tables are switched from the boot loader identity mapping
297 	 * to the kernel tables.  The boot loader enables the U bit in
298 	 * its tables.
299 	 */
300 	if (IS_BSP()) {
301 		if (cpu_stdext_feature & CPUID_STDEXT_SMEP &&
302 		    !TUNABLE_INT_FETCH(
303 		    "machdep.mitigations.cpu_flush_rsb_ctxsw",
304 		    &cpu_flush_rsb_ctxsw) &&
305 		    hw_ibrs_disable)
306 			cpu_flush_rsb_ctxsw = 1;
307 	} else {
308 		if (cpu_stdext_feature & CPUID_STDEXT_SMEP)
309 			cr4 |= CR4_SMEP;
310 		if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
311 			cr4 |= CR4_SMAP;
312 	}
313 	TSENTER2("load_cr4");
314 	load_cr4(cr4);
315 	TSEXIT2("load_cr4");
316 	/* Reload cpu ext features to reflect cr4 changes */
317 	if (IS_BSP() && cold)
318 		identify_cpu_ext_features();
319 	if (IS_BSP() && (amd_feature & AMDID_NX) != 0) {
320 		msr = rdmsr(MSR_EFER) | EFER_NXE;
321 		wrmsr(MSR_EFER, msr);
322 		pg_nx = PG_NX;
323 	}
324 	hw_ibrs_recalculate(false);
325 	hw_ssb_recalculate(false);
326 	amd64_syscall_ret_flush_l1d_recalc();
327 	x86_rngds_mitg_recalculate(false);
328 	switch (cpu_vendor_id) {
329 	case CPU_VENDOR_AMD:
330 	case CPU_VENDOR_HYGON:
331 		init_amd();
332 		break;
333 	case CPU_VENDOR_CENTAUR:
334 		init_via();
335 		break;
336 	}
337 
338 	if ((amd_feature & AMDID_RDTSCP) != 0 ||
339 	    (cpu_stdext_feature2 & CPUID_STDEXT2_RDPID) != 0)
340 		wrmsr(MSR_TSC_AUX, cpu_auxmsr());
341 
342 	if (!IS_BSP())
343 		cpu_init_small_core();
344 	TSEXIT();
345 }
346 
347 void
348 initializecpucache(void)
349 {
350 
351 	/*
352 	 * CPUID with %eax = 1, %ebx returns
353 	 * Bits 15-8: CLFLUSH line size
354 	 * 	(Value * 8 = cache line size in bytes)
355 	 */
356 	if ((cpu_feature & CPUID_CLFSH) != 0)
357 		cpu_clflush_line_size = ((cpu_procinfo >> 8) & 0xff) * 8;
358 	/*
359 	 * XXXKIB: (temporary) hack to work around traps generated
360 	 * when CLFLUSHing APIC register window under virtualization
361 	 * environments.  These environments tend to disable the
362 	 * CPUID_SS feature even though the native CPU supports it.
363 	 */
364 	TUNABLE_INT_FETCH("hw.clflush_disable", &hw_clflush_disable);
365 	if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1) {
366 		cpu_feature &= ~CPUID_CLFSH;
367 		cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT;
368 	}
369 
370 	/*
371 	 * The kernel's use of CLFLUSH{,OPT} can be disabled manually
372 	 * by setting the hw.clflush_disable tunable.
373 	 */
374 	if (hw_clflush_disable == 1) {
375 		cpu_feature &= ~CPUID_CLFSH;
376 		cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT;
377 	}
378 }
379