1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/proc.h> 35 36 #include <machine/clock.h> 37 #include <machine/cpufunc.h> 38 #include <machine/md_var.h> 39 #include <machine/pcb.h> 40 #include <machine/specialreg.h> 41 #include <machine/vmm.h> 42 43 #include "vmx.h" 44 #include "vmx_msr.h" 45 #include "x86.h" 46 47 static bool 48 vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos) 49 { 50 51 return ((msr_val & (1UL << (bitpos + 32))) != 0); 52 } 53 54 static bool 55 vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos) 56 { 57 58 return ((msr_val & (1UL << bitpos)) == 0); 59 } 60 61 uint32_t 62 vmx_revision(void) 63 { 64 65 return (rdmsr(MSR_VMX_BASIC) & 0xffffffff); 66 } 67 68 /* 69 * Generate a bitmask to be used for the VMCS execution control fields. 70 * 71 * The caller specifies what bits should be set to one in 'ones_mask' 72 * and what bits should be set to zero in 'zeros_mask'. The don't-care 73 * bits are set to the default value. The default values are obtained 74 * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining 75 * VMX Capabilities". 76 * 77 * Returns zero on success and non-zero on error. 78 */ 79 int 80 vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask, 81 uint32_t zeros_mask, uint32_t *retval) 82 { 83 int i; 84 uint64_t val, trueval; 85 bool true_ctls_avail, one_allowed, zero_allowed; 86 87 /* We cannot ask the same bit to be set to both '1' and '0' */ 88 if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask)) 89 return (EINVAL); 90 91 true_ctls_avail = (rdmsr(MSR_VMX_BASIC) & (1UL << 55)) != 0; 92 93 val = rdmsr(ctl_reg); 94 if (true_ctls_avail) 95 trueval = rdmsr(true_ctl_reg); /* step c */ 96 else 97 trueval = val; /* step a */ 98 99 for (i = 0; i < 32; i++) { 100 one_allowed = vmx_ctl_allows_one_setting(trueval, i); 101 zero_allowed = vmx_ctl_allows_zero_setting(trueval, i); 102 103 KASSERT(one_allowed || zero_allowed, 104 ("invalid zero/one setting for bit %d of ctl 0x%0x, " 105 "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg)); 106 107 if (zero_allowed && !one_allowed) { /* b(i),c(i) */ 108 if (ones_mask & (1 << i)) 109 return (EINVAL); 110 *retval &= ~(1 << i); 111 } else if (one_allowed && !zero_allowed) { /* b(i),c(i) */ 112 if (zeros_mask & (1 << i)) 113 return (EINVAL); 114 *retval |= 1 << i; 115 } else { 116 if (zeros_mask & (1 << i)) /* b(ii),c(ii) */ 117 *retval &= ~(1 << i); 118 else if (ones_mask & (1 << i)) /* b(ii), c(ii) */ 119 *retval |= 1 << i; 120 else if (!true_ctls_avail) 121 *retval &= ~(1 << i); /* b(iii) */ 122 else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/ 123 *retval &= ~(1 << i); 124 else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */ 125 *retval |= 1 << i; 126 else { 127 panic("vmx_set_ctlreg: unable to determine " 128 "correct value of ctl bit %d for msr " 129 "0x%0x and true msr 0x%0x", i, ctl_reg, 130 true_ctl_reg); 131 } 132 } 133 } 134 135 return (0); 136 } 137 138 void 139 msr_bitmap_initialize(char *bitmap) 140 { 141 142 memset(bitmap, 0xff, PAGE_SIZE); 143 } 144 145 int 146 msr_bitmap_change_access(char *bitmap, u_int msr, int access) 147 { 148 int byte, bit; 149 150 if (msr <= 0x00001FFF) 151 byte = msr / 8; 152 else if (msr >= 0xC0000000 && msr <= 0xC0001FFF) 153 byte = 1024 + (msr - 0xC0000000) / 8; 154 else 155 return (EINVAL); 156 157 bit = msr & 0x7; 158 159 if (access & MSR_BITMAP_ACCESS_READ) 160 bitmap[byte] &= ~(1 << bit); 161 else 162 bitmap[byte] |= 1 << bit; 163 164 byte += 2048; 165 if (access & MSR_BITMAP_ACCESS_WRITE) 166 bitmap[byte] &= ~(1 << bit); 167 else 168 bitmap[byte] |= 1 << bit; 169 170 return (0); 171 } 172 173 static uint64_t misc_enable; 174 static uint64_t platform_info; 175 static uint64_t turbo_ratio_limit; 176 static uint64_t host_msrs[GUEST_MSR_NUM]; 177 178 static bool 179 nehalem_cpu(void) 180 { 181 u_int family, model; 182 183 /* 184 * The family:model numbers belonging to the Nehalem microarchitecture 185 * are documented in Section 35.5, Intel SDM dated Feb 2014. 186 */ 187 family = CPUID_TO_FAMILY(cpu_id); 188 model = CPUID_TO_MODEL(cpu_id); 189 if (family == 0x6) { 190 switch (model) { 191 case 0x1A: 192 case 0x1E: 193 case 0x1F: 194 case 0x2E: 195 return (true); 196 default: 197 break; 198 } 199 } 200 return (false); 201 } 202 203 static bool 204 westmere_cpu(void) 205 { 206 u_int family, model; 207 208 /* 209 * The family:model numbers belonging to the Westmere microarchitecture 210 * are documented in Section 35.6, Intel SDM dated Feb 2014. 211 */ 212 family = CPUID_TO_FAMILY(cpu_id); 213 model = CPUID_TO_MODEL(cpu_id); 214 if (family == 0x6) { 215 switch (model) { 216 case 0x25: 217 case 0x2C: 218 return (true); 219 default: 220 break; 221 } 222 } 223 return (false); 224 } 225 226 static bool 227 pat_valid(uint64_t val) 228 { 229 int i, pa; 230 231 /* 232 * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT" 233 * 234 * Extract PA0 through PA7 and validate that each one encodes a 235 * valid memory type. 236 */ 237 for (i = 0; i < 8; i++) { 238 pa = (val >> (i * 8)) & 0xff; 239 if (pa == 2 || pa == 3 || pa >= 8) 240 return (false); 241 } 242 return (true); 243 } 244 245 void 246 vmx_msr_init(void) 247 { 248 uint64_t bus_freq, ratio; 249 int i; 250 251 /* 252 * It is safe to cache the values of the following MSRs because 253 * they don't change based on curcpu, curproc or curthread. 254 */ 255 host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR); 256 host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR); 257 host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR); 258 host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK); 259 260 /* 261 * Initialize emulated MSRs 262 */ 263 misc_enable = rdmsr(MSR_IA32_MISC_ENABLE); 264 /* 265 * Set mandatory bits 266 * 11: branch trace disabled 267 * 12: PEBS unavailable 268 * Clear unsupported features 269 * 16: SpeedStep enable 270 * 18: enable MONITOR FSM 271 */ 272 misc_enable |= (1 << 12) | (1 << 11); 273 misc_enable &= ~((1 << 18) | (1 << 16)); 274 275 if (nehalem_cpu() || westmere_cpu()) 276 bus_freq = 133330000; /* 133Mhz */ 277 else 278 bus_freq = 100000000; /* 100Mhz */ 279 280 /* 281 * XXXtime 282 * The ratio should really be based on the virtual TSC frequency as 283 * opposed to the host TSC. 284 */ 285 ratio = (tsc_freq / bus_freq) & 0xff; 286 287 /* 288 * The register definition is based on the micro-architecture 289 * but the following bits are always the same: 290 * [15:8] Maximum Non-Turbo Ratio 291 * [28] Programmable Ratio Limit for Turbo Mode 292 * [29] Programmable TDC-TDP Limit for Turbo Mode 293 * [47:40] Maximum Efficiency Ratio 294 * 295 * The other bits can be safely set to 0 on all 296 * micro-architectures up to Haswell. 297 */ 298 platform_info = (ratio << 8) | (ratio << 40); 299 300 /* 301 * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is 302 * dependent on the maximum cores per package supported by the micro- 303 * architecture. For e.g., Westmere supports 6 cores per package and 304 * uses the low 48 bits. Sandybridge support 8 cores per package and 305 * uses up all 64 bits. 306 * 307 * However, the unused bits are reserved so we pretend that all bits 308 * in this MSR are valid. 309 */ 310 for (i = 0; i < 8; i++) 311 turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio; 312 } 313 314 void 315 vmx_msr_guest_init(struct vmx *vmx, struct vmx_vcpu *vcpu) 316 { 317 /* 318 * The permissions bitmap is shared between all vcpus so initialize it 319 * once when initializing the vBSP. 320 */ 321 if (vcpu->vcpuid == 0) { 322 guest_msr_rw(vmx, MSR_LSTAR); 323 guest_msr_rw(vmx, MSR_CSTAR); 324 guest_msr_rw(vmx, MSR_STAR); 325 guest_msr_rw(vmx, MSR_SF_MASK); 326 guest_msr_rw(vmx, MSR_KGSBASE); 327 } 328 329 /* 330 * Initialize guest IA32_PAT MSR with default value after reset. 331 */ 332 vcpu->guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) | 333 PAT_VALUE(1, PAT_WRITE_THROUGH) | 334 PAT_VALUE(2, PAT_UNCACHED) | 335 PAT_VALUE(3, PAT_UNCACHEABLE) | 336 PAT_VALUE(4, PAT_WRITE_BACK) | 337 PAT_VALUE(5, PAT_WRITE_THROUGH) | 338 PAT_VALUE(6, PAT_UNCACHED) | 339 PAT_VALUE(7, PAT_UNCACHEABLE); 340 341 return; 342 } 343 344 void 345 vmx_msr_guest_enter(struct vmx_vcpu *vcpu) 346 { 347 348 /* Save host MSRs (in particular, KGSBASE) and restore guest MSRs */ 349 update_pcb_bases(curpcb); 350 wrmsr(MSR_LSTAR, vcpu->guest_msrs[IDX_MSR_LSTAR]); 351 wrmsr(MSR_CSTAR, vcpu->guest_msrs[IDX_MSR_CSTAR]); 352 wrmsr(MSR_STAR, vcpu->guest_msrs[IDX_MSR_STAR]); 353 wrmsr(MSR_SF_MASK, vcpu->guest_msrs[IDX_MSR_SF_MASK]); 354 wrmsr(MSR_KGSBASE, vcpu->guest_msrs[IDX_MSR_KGSBASE]); 355 } 356 357 void 358 vmx_msr_guest_enter_tsc_aux(struct vmx *vmx, struct vmx_vcpu *vcpu) 359 { 360 uint64_t guest_tsc_aux = vcpu->guest_msrs[IDX_MSR_TSC_AUX]; 361 uint32_t host_aux = cpu_auxmsr(); 362 363 if (vmx_have_msr_tsc_aux && guest_tsc_aux != host_aux) 364 wrmsr(MSR_TSC_AUX, guest_tsc_aux); 365 } 366 367 void 368 vmx_msr_guest_exit(struct vmx_vcpu *vcpu) 369 { 370 371 /* Save guest MSRs */ 372 vcpu->guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR); 373 vcpu->guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR); 374 vcpu->guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR); 375 vcpu->guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK); 376 vcpu->guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE); 377 378 /* Restore host MSRs */ 379 wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]); 380 wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]); 381 wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]); 382 wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]); 383 384 /* MSR_KGSBASE will be restored on the way back to userspace */ 385 } 386 387 void 388 vmx_msr_guest_exit_tsc_aux(struct vmx *vmx, struct vmx_vcpu *vcpu) 389 { 390 uint64_t guest_tsc_aux = vcpu->guest_msrs[IDX_MSR_TSC_AUX]; 391 uint32_t host_aux = cpu_auxmsr(); 392 393 if (vmx_have_msr_tsc_aux && guest_tsc_aux != host_aux) 394 /* 395 * Note that it is not necessary to save the guest value 396 * here; vcpu->guest_msrs[IDX_MSR_TSC_AUX] always 397 * contains the current value since it is updated whenever 398 * the guest writes to it (which is expected to be very 399 * rare). 400 */ 401 wrmsr(MSR_TSC_AUX, host_aux); 402 } 403 404 int 405 vmx_rdmsr(struct vmx_vcpu *vcpu, u_int num, uint64_t *val, bool *retu) 406 { 407 int error; 408 409 error = 0; 410 411 switch (num) { 412 case MSR_MCG_CAP: 413 case MSR_MCG_STATUS: 414 *val = 0; 415 break; 416 case MSR_MTRRcap: 417 case MSR_MTRRdefType: 418 case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7: 419 case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: 420 case MSR_MTRR64kBase: 421 case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: 422 if (vm_rdmtrr(&vcpu->mtrr, num, val) != 0) { 423 vm_inject_gp(vcpu->vcpu); 424 } 425 break; 426 case MSR_IA32_MISC_ENABLE: 427 *val = misc_enable; 428 break; 429 case MSR_PLATFORM_INFO: 430 *val = platform_info; 431 break; 432 case MSR_TURBO_RATIO_LIMIT: 433 case MSR_TURBO_RATIO_LIMIT1: 434 *val = turbo_ratio_limit; 435 break; 436 case MSR_PAT: 437 *val = vcpu->guest_msrs[IDX_MSR_PAT]; 438 break; 439 default: 440 error = EINVAL; 441 break; 442 } 443 return (error); 444 } 445 446 int 447 vmx_wrmsr(struct vmx_vcpu *vcpu, u_int num, uint64_t val, bool *retu) 448 { 449 uint64_t changed; 450 int error; 451 452 error = 0; 453 454 switch (num) { 455 case MSR_MCG_CAP: 456 case MSR_MCG_STATUS: 457 break; /* ignore writes */ 458 case MSR_MTRRcap: 459 case MSR_MTRRdefType: 460 case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7: 461 case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: 462 case MSR_MTRR64kBase: 463 case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: 464 if (vm_wrmtrr(&vcpu->mtrr, num, val) != 0) { 465 vm_inject_gp(vcpu->vcpu); 466 } 467 break; 468 case MSR_IA32_MISC_ENABLE: 469 changed = val ^ misc_enable; 470 /* 471 * If the host has disabled the NX feature then the guest 472 * also cannot use it. However, a Linux guest will try to 473 * enable the NX feature by writing to the MISC_ENABLE MSR. 474 * 475 * This can be safely ignored because the memory management 476 * code looks at CPUID.80000001H:EDX.NX to check if the 477 * functionality is actually enabled. 478 */ 479 changed &= ~(1UL << 34); 480 481 /* 482 * Punt to userspace if any other bits are being modified. 483 */ 484 if (changed) 485 error = EINVAL; 486 487 break; 488 case MSR_PAT: 489 if (pat_valid(val)) 490 vcpu->guest_msrs[IDX_MSR_PAT] = val; 491 else 492 vm_inject_gp(vcpu->vcpu); 493 break; 494 case MSR_TSC: 495 error = vmx_set_tsc_offset(vcpu, val - rdtsc()); 496 break; 497 case MSR_TSC_AUX: 498 if (vmx_have_msr_tsc_aux) 499 /* 500 * vmx_msr_guest_enter_tsc_aux() will apply this 501 * value when it is called immediately before guest 502 * entry. 503 */ 504 vcpu->guest_msrs[IDX_MSR_TSC_AUX] = val; 505 else 506 vm_inject_gp(vcpu->vcpu); 507 break; 508 default: 509 error = EINVAL; 510 break; 511 } 512 513 return (error); 514 } 515