1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) KATO Takenori, 1997, 1998. 5 * 6 * All rights reserved. Unpublished rights reserved under the copyright 7 * laws of Japan. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer as 15 * the first lines of this file unmodified. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 #include "opt_cpu.h" 34 35 #include <sys/param.h> 36 #include <sys/kernel.h> 37 #include <sys/pcpu.h> 38 #include <sys/systm.h> 39 #include <sys/sysctl.h> 40 41 #include <machine/cputypes.h> 42 #include <machine/md_var.h> 43 #include <machine/psl.h> 44 #include <machine/specialreg.h> 45 46 #include <vm/vm.h> 47 #include <vm/pmap.h> 48 49 static int hw_instruction_sse; 50 SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD, 51 &hw_instruction_sse, 0, "SIMD/MMX2 instructions available in CPU"); 52 static int lower_sharedpage_init; 53 int hw_lower_amd64_sharedpage; 54 SYSCTL_INT(_hw, OID_AUTO, lower_amd64_sharedpage, CTLFLAG_RDTUN, 55 &hw_lower_amd64_sharedpage, 0, 56 "Lower sharedpage to work around Ryzen issue with executing code near the top of user memory"); 57 /* 58 * -1: automatic (default) 59 * 0: keep enable CLFLUSH 60 * 1: force disable CLFLUSH 61 */ 62 static int hw_clflush_disable = -1; 63 64 static void 65 init_amd(void) 66 { 67 uint64_t msr; 68 69 /* 70 * C1E renders the local APIC timer dead, so we disable it by 71 * reading the Interrupt Pending Message register and clearing 72 * both C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27). 73 * 74 * Reference: 75 * "BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh Processors" 76 * #32559 revision 3.00+ 77 * 78 * Detect the presence of C1E capability mostly on latest 79 * dual-cores (or future) k8 family. Affected models range is 80 * taken from Linux sources. 81 */ 82 if ((CPUID_TO_FAMILY(cpu_id) == 0xf || 83 CPUID_TO_FAMILY(cpu_id) == 0x10) && (cpu_feature2 & CPUID2_HV) == 0) 84 cpu_amdc1e_bug = 1; 85 86 /* 87 * Work around Erratum 721 for Family 10h and 12h processors. 88 * These processors may incorrectly update the stack pointer 89 * after a long series of push and/or near-call instructions, 90 * or a long series of pop and/or near-return instructions. 91 * 92 * http://support.amd.com/us/Processor_TechDocs/41322_10h_Rev_Gd.pdf 93 * http://support.amd.com/us/Processor_TechDocs/44739_12h_Rev_Gd.pdf 94 * 95 * Hypervisors do not provide access to the errata MSR, 96 * causing #GP exception on attempt to apply the errata. The 97 * MSR write shall be done on host and persist globally 98 * anyway, so do not try to do it when under virtualization. 99 */ 100 switch (CPUID_TO_FAMILY(cpu_id)) { 101 case 0x10: 102 case 0x12: 103 if ((cpu_feature2 & CPUID2_HV) == 0) 104 wrmsr(MSR_DE_CFG, rdmsr(MSR_DE_CFG) | 105 DE_CFG_10H_12H_STACK_POINTER_JUMP_FIX_BIT); 106 break; 107 } 108 109 /* 110 * BIOS may fail to set InitApicIdCpuIdLo to 1 as it should per BKDG. 111 * So, do it here or otherwise some tools could be confused by 112 * Initial Local APIC ID reported with CPUID Function 1 in EBX. 113 */ 114 if (CPUID_TO_FAMILY(cpu_id) == 0x10) { 115 if ((cpu_feature2 & CPUID2_HV) == 0) { 116 msr = rdmsr(MSR_NB_CFG1); 117 msr |= (uint64_t)1 << 54; 118 wrmsr(MSR_NB_CFG1, msr); 119 } 120 } 121 122 /* 123 * BIOS may configure Family 10h processors to convert WC+ cache type 124 * to CD. That can hurt performance of guest VMs using nested paging. 125 * The relevant MSR bit is not documented in the BKDG, 126 * the fix is borrowed from Linux. 127 */ 128 if (CPUID_TO_FAMILY(cpu_id) == 0x10) { 129 if ((cpu_feature2 & CPUID2_HV) == 0) { 130 msr = rdmsr(0xc001102a); 131 msr &= ~((uint64_t)1 << 24); 132 wrmsr(0xc001102a, msr); 133 } 134 } 135 136 /* 137 * Work around Erratum 793: Specific Combination of Writes to Write 138 * Combined Memory Types and Locked Instructions May Cause Core Hang. 139 * See Revision Guide for AMD Family 16h Models 00h-0Fh Processors, 140 * revision 3.04 or later, publication 51810. 141 */ 142 if (CPUID_TO_FAMILY(cpu_id) == 0x16 && CPUID_TO_MODEL(cpu_id) <= 0xf) { 143 if ((cpu_feature2 & CPUID2_HV) == 0) { 144 msr = rdmsr(MSR_LS_CFG); 145 msr |= (uint64_t)1 << 15; 146 wrmsr(MSR_LS_CFG, msr); 147 } 148 } 149 150 /* Ryzen erratas. */ 151 if (CPUID_TO_FAMILY(cpu_id) == 0x17 && CPUID_TO_MODEL(cpu_id) == 0x1 && 152 (cpu_feature2 & CPUID2_HV) == 0) { 153 /* 1021 */ 154 msr = rdmsr(MSR_DE_CFG); 155 msr |= DE_CFG_ZEN_LOAD_STALE_DATA_FIX_BIT; 156 wrmsr(MSR_DE_CFG, msr); 157 158 /* 1033 */ 159 msr = rdmsr(MSR_LS_CFG); 160 msr |= 0x10; 161 wrmsr(MSR_LS_CFG, msr); 162 163 /* 1049 */ 164 msr = rdmsr(0xc0011028); 165 msr |= 0x10; 166 wrmsr(0xc0011028, msr); 167 168 /* 1095 */ 169 msr = rdmsr(MSR_LS_CFG); 170 msr |= 0x200000000000000; 171 wrmsr(MSR_LS_CFG, msr); 172 } 173 174 /* 175 * Work around a problem on Ryzen that is triggered by executing 176 * code near the top of user memory, in our case the signal 177 * trampoline code in the shared page on amd64. 178 * 179 * This function is executed once for the BSP before tunables take 180 * effect so the value determined here can be overridden by the 181 * tunable. This function is then executed again for each AP and 182 * also on resume. Set a flag the first time so that value set by 183 * the tunable is not overwritten. 184 * 185 * The stepping and/or microcode versions should be checked after 186 * this issue is fixed by AMD so that we don't use this mode if not 187 * needed. 188 */ 189 if (lower_sharedpage_init == 0) { 190 lower_sharedpage_init = 1; 191 if (CPUID_TO_FAMILY(cpu_id) == 0x17 || 192 CPUID_TO_FAMILY(cpu_id) == 0x18) { 193 hw_lower_amd64_sharedpage = 1; 194 } 195 } 196 197 /* Zenbleed. See the comments in 'cpu_machdep.c'. */ 198 zenbleed_check_and_apply(false); 199 } 200 201 /* 202 * Initialize special VIA features 203 */ 204 static void 205 init_via(void) 206 { 207 u_int regs[4], val; 208 209 /* 210 * Check extended CPUID for PadLock features. 211 * 212 * http://www.via.com.tw/en/downloads/whitepapers/initiatives/padlock/programming_guide.pdf 213 */ 214 do_cpuid(0xc0000000, regs); 215 if (regs[0] >= 0xc0000001) { 216 do_cpuid(0xc0000001, regs); 217 val = regs[3]; 218 } else 219 return; 220 221 /* Enable RNG if present. */ 222 if ((val & VIA_CPUID_HAS_RNG) != 0) { 223 via_feature_rng = VIA_HAS_RNG; 224 wrmsr(0x110B, rdmsr(0x110B) | VIA_CPUID_DO_RNG); 225 } 226 227 /* Enable PadLock if present. */ 228 if ((val & VIA_CPUID_HAS_ACE) != 0) 229 via_feature_xcrypt |= VIA_HAS_AES; 230 if ((val & VIA_CPUID_HAS_ACE2) != 0) 231 via_feature_xcrypt |= VIA_HAS_AESCTR; 232 if ((val & VIA_CPUID_HAS_PHE) != 0) 233 via_feature_xcrypt |= VIA_HAS_SHA; 234 if ((val & VIA_CPUID_HAS_PMM) != 0) 235 via_feature_xcrypt |= VIA_HAS_MM; 236 if (via_feature_xcrypt != 0) 237 wrmsr(0x1107, rdmsr(0x1107) | (1 << 28)); 238 } 239 240 /* 241 * The value for the TSC_AUX MSR and rdtscp/rdpid on the invoking CPU. 242 * 243 * Caller should prevent CPU migration. 244 */ 245 u_int 246 cpu_auxmsr(void) 247 { 248 KASSERT((read_rflags() & PSL_I) == 0, ("context switch possible")); 249 return (PCPU_GET(cpuid)); 250 } 251 252 void 253 cpu_init_small_core(void) 254 { 255 u_int r[4]; 256 257 if (cpu_high < 0x1a) 258 return; 259 260 cpuid_count(0x1a, 0, r); 261 if ((r[0] & CPUID_HYBRID_CORE_MASK) != CPUID_HYBRID_SMALL_CORE) 262 return; 263 264 PCPU_SET(small_core, 1); 265 if (pmap_pcid_enabled && invpcid_works && 266 pmap_pcid_invlpg_workaround_uena) { 267 PCPU_SET(pcid_invlpg_workaround, 1); 268 pmap_pcid_invlpg_workaround = 1; 269 } 270 } 271 272 /* 273 * Initialize CPU control registers 274 */ 275 void 276 initializecpu(void) 277 { 278 uint64_t msr; 279 uint32_t cr4; 280 281 TSENTER(); 282 cr4 = rcr4(); 283 if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) { 284 cr4 |= CR4_FXSR | CR4_XMM; 285 hw_instruction_sse = 1; 286 } 287 if (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) 288 cr4 |= CR4_FSGSBASE; 289 290 if (cpu_stdext_feature2 & CPUID_STDEXT2_PKU) 291 cr4 |= CR4_PKE; 292 293 /* 294 * If SMEP is present, we only need to flush RSB (by default) 295 * on context switches, to prevent cross-process ret2spec 296 * attacks. Do it automatically if ibrs_disable is set, to 297 * complete the mitigation. 298 * 299 * Postpone enabling the SMEP on the boot CPU until the page 300 * tables are switched from the boot loader identity mapping 301 * to the kernel tables. The boot loader enables the U bit in 302 * its tables. 303 */ 304 if (IS_BSP()) { 305 if (cpu_stdext_feature & CPUID_STDEXT_SMEP && 306 !TUNABLE_INT_FETCH( 307 "machdep.mitigations.cpu_flush_rsb_ctxsw", 308 &cpu_flush_rsb_ctxsw) && 309 hw_ibrs_disable) 310 cpu_flush_rsb_ctxsw = 1; 311 } else { 312 if (cpu_stdext_feature & CPUID_STDEXT_SMEP) 313 cr4 |= CR4_SMEP; 314 if (cpu_stdext_feature & CPUID_STDEXT_SMAP) 315 cr4 |= CR4_SMAP; 316 } 317 TSENTER2("load_cr4"); 318 load_cr4(cr4); 319 TSEXIT2("load_cr4"); 320 /* Reload cpu ext features to reflect cr4 changes */ 321 if (IS_BSP() && cold) 322 identify_cpu_ext_features(); 323 if (IS_BSP() && (amd_feature & AMDID_NX) != 0) { 324 msr = rdmsr(MSR_EFER) | EFER_NXE; 325 wrmsr(MSR_EFER, msr); 326 pg_nx = PG_NX; 327 } 328 hw_ibrs_recalculate(false); 329 hw_ssb_recalculate(false); 330 amd64_syscall_ret_flush_l1d_recalc(); 331 x86_rngds_mitg_recalculate(false); 332 switch (cpu_vendor_id) { 333 case CPU_VENDOR_AMD: 334 case CPU_VENDOR_HYGON: 335 init_amd(); 336 break; 337 case CPU_VENDOR_CENTAUR: 338 init_via(); 339 break; 340 } 341 342 if ((amd_feature & AMDID_RDTSCP) != 0 || 343 (cpu_stdext_feature2 & CPUID_STDEXT2_RDPID) != 0) 344 wrmsr(MSR_TSC_AUX, cpu_auxmsr()); 345 346 if (!IS_BSP()) 347 cpu_init_small_core(); 348 TSEXIT(); 349 } 350 351 void 352 initializecpucache(void) 353 { 354 355 /* 356 * CPUID with %eax = 1, %ebx returns 357 * Bits 15-8: CLFLUSH line size 358 * (Value * 8 = cache line size in bytes) 359 */ 360 if ((cpu_feature & CPUID_CLFSH) != 0) 361 cpu_clflush_line_size = ((cpu_procinfo >> 8) & 0xff) * 8; 362 /* 363 * XXXKIB: (temporary) hack to work around traps generated 364 * when CLFLUSHing APIC register window under virtualization 365 * environments. These environments tend to disable the 366 * CPUID_SS feature even though the native CPU supports it. 367 */ 368 TUNABLE_INT_FETCH("hw.clflush_disable", &hw_clflush_disable); 369 if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1) { 370 cpu_feature &= ~CPUID_CLFSH; 371 cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT; 372 } 373 374 /* 375 * The kernel's use of CLFLUSH{,OPT} can be disabled manually 376 * by setting the hw.clflush_disable tunable. 377 */ 378 if (hw_clflush_disable == 1) { 379 cpu_feature &= ~CPUID_CLFSH; 380 cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT; 381 } 382 } 383