1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/proc.h> 32 33 #include <machine/clock.h> 34 #include <machine/cpufunc.h> 35 #include <machine/md_var.h> 36 #include <machine/pcb.h> 37 #include <machine/specialreg.h> 38 #include <machine/vmm.h> 39 40 #include "vmx.h" 41 #include "vmx_msr.h" 42 #include "x86.h" 43 44 static bool 45 vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos) 46 { 47 48 return ((msr_val & (1UL << (bitpos + 32))) != 0); 49 } 50 51 static bool 52 vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos) 53 { 54 55 return ((msr_val & (1UL << bitpos)) == 0); 56 } 57 58 uint32_t 59 vmx_revision(void) 60 { 61 62 return (rdmsr(MSR_VMX_BASIC) & 0xffffffff); 63 } 64 65 /* 66 * Generate a bitmask to be used for the VMCS execution control fields. 67 * 68 * The caller specifies what bits should be set to one in 'ones_mask' 69 * and what bits should be set to zero in 'zeros_mask'. The don't-care 70 * bits are set to the default value. The default values are obtained 71 * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining 72 * VMX Capabilities". 73 * 74 * Returns zero on success and non-zero on error. 75 */ 76 int 77 vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask, 78 uint32_t zeros_mask, uint32_t *retval) 79 { 80 int i; 81 uint64_t val, trueval; 82 bool true_ctls_avail, one_allowed, zero_allowed; 83 84 /* We cannot ask the same bit to be set to both '1' and '0' */ 85 if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask)) 86 return (EINVAL); 87 88 true_ctls_avail = (rdmsr(MSR_VMX_BASIC) & (1UL << 55)) != 0; 89 90 val = rdmsr(ctl_reg); 91 if (true_ctls_avail) 92 trueval = rdmsr(true_ctl_reg); /* step c */ 93 else 94 trueval = val; /* step a */ 95 96 for (i = 0; i < 32; i++) { 97 one_allowed = vmx_ctl_allows_one_setting(trueval, i); 98 zero_allowed = vmx_ctl_allows_zero_setting(trueval, i); 99 100 KASSERT(one_allowed || zero_allowed, 101 ("invalid zero/one setting for bit %d of ctl 0x%0x, " 102 "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg)); 103 104 if (zero_allowed && !one_allowed) { /* b(i),c(i) */ 105 if (ones_mask & (1 << i)) 106 return (EINVAL); 107 *retval &= ~(1 << i); 108 } else if (one_allowed && !zero_allowed) { /* b(i),c(i) */ 109 if (zeros_mask & (1 << i)) 110 return (EINVAL); 111 *retval |= 1 << i; 112 } else { 113 if (zeros_mask & (1 << i)) /* b(ii),c(ii) */ 114 *retval &= ~(1 << i); 115 else if (ones_mask & (1 << i)) /* b(ii), c(ii) */ 116 *retval |= 1 << i; 117 else if (!true_ctls_avail) 118 *retval &= ~(1 << i); /* b(iii) */ 119 else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/ 120 *retval &= ~(1 << i); 121 else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */ 122 *retval |= 1 << i; 123 else { 124 panic("vmx_set_ctlreg: unable to determine " 125 "correct value of ctl bit %d for msr " 126 "0x%0x and true msr 0x%0x", i, ctl_reg, 127 true_ctl_reg); 128 } 129 } 130 } 131 132 return (0); 133 } 134 135 void 136 msr_bitmap_initialize(char *bitmap) 137 { 138 139 memset(bitmap, 0xff, PAGE_SIZE); 140 } 141 142 int 143 msr_bitmap_change_access(char *bitmap, u_int msr, int access) 144 { 145 int byte, bit; 146 147 if (msr <= 0x00001FFF) 148 byte = msr / 8; 149 else if (msr >= 0xC0000000 && msr <= 0xC0001FFF) 150 byte = 1024 + (msr - 0xC0000000) / 8; 151 else 152 return (EINVAL); 153 154 bit = msr & 0x7; 155 156 if (access & MSR_BITMAP_ACCESS_READ) 157 bitmap[byte] &= ~(1 << bit); 158 else 159 bitmap[byte] |= 1 << bit; 160 161 byte += 2048; 162 if (access & MSR_BITMAP_ACCESS_WRITE) 163 bitmap[byte] &= ~(1 << bit); 164 else 165 bitmap[byte] |= 1 << bit; 166 167 return (0); 168 } 169 170 static uint64_t misc_enable; 171 static uint64_t platform_info; 172 static uint64_t turbo_ratio_limit; 173 static uint64_t host_msrs[GUEST_MSR_NUM]; 174 175 static bool 176 nehalem_cpu(void) 177 { 178 u_int family, model; 179 180 /* 181 * The family:model numbers belonging to the Nehalem microarchitecture 182 * are documented in Section 35.5, Intel SDM dated Feb 2014. 183 */ 184 family = CPUID_TO_FAMILY(cpu_id); 185 model = CPUID_TO_MODEL(cpu_id); 186 if (family == 0x6) { 187 switch (model) { 188 case 0x1A: 189 case 0x1E: 190 case 0x1F: 191 case 0x2E: 192 return (true); 193 default: 194 break; 195 } 196 } 197 return (false); 198 } 199 200 static bool 201 westmere_cpu(void) 202 { 203 u_int family, model; 204 205 /* 206 * The family:model numbers belonging to the Westmere microarchitecture 207 * are documented in Section 35.6, Intel SDM dated Feb 2014. 208 */ 209 family = CPUID_TO_FAMILY(cpu_id); 210 model = CPUID_TO_MODEL(cpu_id); 211 if (family == 0x6) { 212 switch (model) { 213 case 0x25: 214 case 0x2C: 215 return (true); 216 default: 217 break; 218 } 219 } 220 return (false); 221 } 222 223 static bool 224 pat_valid(uint64_t val) 225 { 226 int i, pa; 227 228 /* 229 * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT" 230 * 231 * Extract PA0 through PA7 and validate that each one encodes a 232 * valid memory type. 233 */ 234 for (i = 0; i < 8; i++) { 235 pa = (val >> (i * 8)) & 0xff; 236 if (pa == 2 || pa == 3 || pa >= 8) 237 return (false); 238 } 239 return (true); 240 } 241 242 void 243 vmx_msr_init(void) 244 { 245 uint64_t bus_freq, ratio; 246 int i; 247 248 /* 249 * It is safe to cache the values of the following MSRs because 250 * they don't change based on curcpu, curproc or curthread. 251 */ 252 host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR); 253 host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR); 254 host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR); 255 host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK); 256 257 /* 258 * Initialize emulated MSRs 259 */ 260 misc_enable = rdmsr(MSR_IA32_MISC_ENABLE); 261 /* 262 * Set mandatory bits 263 * 11: branch trace disabled 264 * 12: PEBS unavailable 265 * Clear unsupported features 266 * 16: SpeedStep enable 267 * 18: enable MONITOR FSM 268 */ 269 misc_enable |= (1 << 12) | (1 << 11); 270 misc_enable &= ~((1 << 18) | (1 << 16)); 271 272 if (nehalem_cpu() || westmere_cpu()) 273 bus_freq = 133330000; /* 133Mhz */ 274 else 275 bus_freq = 100000000; /* 100Mhz */ 276 277 /* 278 * XXXtime 279 * The ratio should really be based on the virtual TSC frequency as 280 * opposed to the host TSC. 281 */ 282 ratio = (tsc_freq / bus_freq) & 0xff; 283 284 /* 285 * The register definition is based on the micro-architecture 286 * but the following bits are always the same: 287 * [15:8] Maximum Non-Turbo Ratio 288 * [28] Programmable Ratio Limit for Turbo Mode 289 * [29] Programmable TDC-TDP Limit for Turbo Mode 290 * [47:40] Maximum Efficiency Ratio 291 * 292 * The other bits can be safely set to 0 on all 293 * micro-architectures up to Haswell. 294 */ 295 platform_info = (ratio << 8) | (ratio << 40); 296 297 /* 298 * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is 299 * dependent on the maximum cores per package supported by the micro- 300 * architecture. For e.g., Westmere supports 6 cores per package and 301 * uses the low 48 bits. Sandybridge support 8 cores per package and 302 * uses up all 64 bits. 303 * 304 * However, the unused bits are reserved so we pretend that all bits 305 * in this MSR are valid. 306 */ 307 for (i = 0; i < 8; i++) 308 turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio; 309 } 310 311 void 312 vmx_msr_guest_init(struct vmx *vmx, struct vmx_vcpu *vcpu) 313 { 314 /* 315 * The permissions bitmap is shared between all vcpus so initialize it 316 * once when initializing the vBSP. 317 */ 318 if (vcpu->vcpuid == 0) { 319 guest_msr_rw(vmx, MSR_LSTAR); 320 guest_msr_rw(vmx, MSR_CSTAR); 321 guest_msr_rw(vmx, MSR_STAR); 322 guest_msr_rw(vmx, MSR_SF_MASK); 323 guest_msr_rw(vmx, MSR_KGSBASE); 324 } 325 326 /* 327 * Initialize guest IA32_PAT MSR with default value after reset. 328 */ 329 vcpu->guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) | 330 PAT_VALUE(1, PAT_WRITE_THROUGH) | 331 PAT_VALUE(2, PAT_UNCACHED) | 332 PAT_VALUE(3, PAT_UNCACHEABLE) | 333 PAT_VALUE(4, PAT_WRITE_BACK) | 334 PAT_VALUE(5, PAT_WRITE_THROUGH) | 335 PAT_VALUE(6, PAT_UNCACHED) | 336 PAT_VALUE(7, PAT_UNCACHEABLE); 337 338 return; 339 } 340 341 void 342 vmx_msr_guest_enter(struct vmx_vcpu *vcpu) 343 { 344 345 /* Save host MSRs (in particular, KGSBASE) and restore guest MSRs */ 346 update_pcb_bases(curpcb); 347 wrmsr(MSR_LSTAR, vcpu->guest_msrs[IDX_MSR_LSTAR]); 348 wrmsr(MSR_CSTAR, vcpu->guest_msrs[IDX_MSR_CSTAR]); 349 wrmsr(MSR_STAR, vcpu->guest_msrs[IDX_MSR_STAR]); 350 wrmsr(MSR_SF_MASK, vcpu->guest_msrs[IDX_MSR_SF_MASK]); 351 wrmsr(MSR_KGSBASE, vcpu->guest_msrs[IDX_MSR_KGSBASE]); 352 } 353 354 void 355 vmx_msr_guest_enter_tsc_aux(struct vmx *vmx, struct vmx_vcpu *vcpu) 356 { 357 uint64_t guest_tsc_aux = vcpu->guest_msrs[IDX_MSR_TSC_AUX]; 358 uint32_t host_aux = cpu_auxmsr(); 359 360 if (vmx_have_msr_tsc_aux && guest_tsc_aux != host_aux) 361 wrmsr(MSR_TSC_AUX, guest_tsc_aux); 362 } 363 364 void 365 vmx_msr_guest_exit(struct vmx_vcpu *vcpu) 366 { 367 368 /* Save guest MSRs */ 369 vcpu->guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR); 370 vcpu->guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR); 371 vcpu->guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR); 372 vcpu->guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK); 373 vcpu->guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE); 374 375 /* Restore host MSRs */ 376 wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]); 377 wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]); 378 wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]); 379 wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]); 380 381 /* MSR_KGSBASE will be restored on the way back to userspace */ 382 } 383 384 void 385 vmx_msr_guest_exit_tsc_aux(struct vmx *vmx, struct vmx_vcpu *vcpu) 386 { 387 uint64_t guest_tsc_aux = vcpu->guest_msrs[IDX_MSR_TSC_AUX]; 388 uint32_t host_aux = cpu_auxmsr(); 389 390 if (vmx_have_msr_tsc_aux && guest_tsc_aux != host_aux) 391 /* 392 * Note that it is not necessary to save the guest value 393 * here; vcpu->guest_msrs[IDX_MSR_TSC_AUX] always 394 * contains the current value since it is updated whenever 395 * the guest writes to it (which is expected to be very 396 * rare). 397 */ 398 wrmsr(MSR_TSC_AUX, host_aux); 399 } 400 401 int 402 vmx_rdmsr(struct vmx_vcpu *vcpu, u_int num, uint64_t *val, bool *retu) 403 { 404 int error; 405 406 error = 0; 407 408 switch (num) { 409 case MSR_MCG_CAP: 410 case MSR_MCG_STATUS: 411 *val = 0; 412 break; 413 case MSR_MTRRcap: 414 case MSR_MTRRdefType: 415 case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7: 416 case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: 417 case MSR_MTRR64kBase: 418 case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: 419 if (vm_rdmtrr(&vcpu->mtrr, num, val) != 0) { 420 vm_inject_gp(vcpu->vcpu); 421 } 422 break; 423 case MSR_IA32_MISC_ENABLE: 424 *val = misc_enable; 425 break; 426 case MSR_PLATFORM_INFO: 427 *val = platform_info; 428 break; 429 case MSR_TURBO_RATIO_LIMIT: 430 case MSR_TURBO_RATIO_LIMIT1: 431 *val = turbo_ratio_limit; 432 break; 433 case MSR_PAT: 434 *val = vcpu->guest_msrs[IDX_MSR_PAT]; 435 break; 436 default: 437 error = EINVAL; 438 break; 439 } 440 return (error); 441 } 442 443 int 444 vmx_wrmsr(struct vmx_vcpu *vcpu, u_int num, uint64_t val, bool *retu) 445 { 446 uint64_t changed; 447 int error; 448 449 error = 0; 450 451 switch (num) { 452 case MSR_MCG_CAP: 453 case MSR_MCG_STATUS: 454 break; /* ignore writes */ 455 case MSR_MTRRcap: 456 case MSR_MTRRdefType: 457 case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7: 458 case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: 459 case MSR_MTRR64kBase: 460 case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: 461 if (vm_wrmtrr(&vcpu->mtrr, num, val) != 0) { 462 vm_inject_gp(vcpu->vcpu); 463 } 464 break; 465 case MSR_IA32_MISC_ENABLE: 466 changed = val ^ misc_enable; 467 /* 468 * If the host has disabled the NX feature then the guest 469 * also cannot use it. However, a Linux guest will try to 470 * enable the NX feature by writing to the MISC_ENABLE MSR. 471 * 472 * This can be safely ignored because the memory management 473 * code looks at CPUID.80000001H:EDX.NX to check if the 474 * functionality is actually enabled. 475 */ 476 changed &= ~(1UL << 34); 477 478 /* 479 * Punt to userspace if any other bits are being modified. 480 */ 481 if (changed) 482 error = EINVAL; 483 484 break; 485 case MSR_PAT: 486 if (pat_valid(val)) 487 vcpu->guest_msrs[IDX_MSR_PAT] = val; 488 else 489 vm_inject_gp(vcpu->vcpu); 490 break; 491 case MSR_TSC: 492 error = vmx_set_tsc_offset(vcpu, val - rdtsc()); 493 break; 494 case MSR_TSC_AUX: 495 if (vmx_have_msr_tsc_aux) 496 /* 497 * vmx_msr_guest_enter_tsc_aux() will apply this 498 * value when it is called immediately before guest 499 * entry. 500 */ 501 vcpu->guest_msrs[IDX_MSR_TSC_AUX] = val; 502 else 503 vm_inject_gp(vcpu->vcpu); 504 break; 505 default: 506 error = EINVAL; 507 break; 508 } 509 510 return (error); 511 } 512