1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 35 #include <machine/clock.h> 36 #include <machine/cpufunc.h> 37 #include <machine/md_var.h> 38 #include <machine/specialreg.h> 39 #include <machine/vmm.h> 40 41 #include "vmx.h" 42 #include "vmx_msr.h" 43 44 static boolean_t 45 vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos) 46 { 47 48 if (msr_val & (1UL << (bitpos + 32))) 49 return (TRUE); 50 else 51 return (FALSE); 52 } 53 54 static boolean_t 55 vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos) 56 { 57 58 if ((msr_val & (1UL << bitpos)) == 0) 59 return (TRUE); 60 else 61 return (FALSE); 62 } 63 64 uint32_t 65 vmx_revision(void) 66 { 67 68 return (rdmsr(MSR_VMX_BASIC) & 0xffffffff); 69 } 70 71 /* 72 * Generate a bitmask to be used for the VMCS execution control fields. 73 * 74 * The caller specifies what bits should be set to one in 'ones_mask' 75 * and what bits should be set to zero in 'zeros_mask'. The don't-care 76 * bits are set to the default value. The default values are obtained 77 * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining 78 * VMX Capabilities". 79 * 80 * Returns zero on success and non-zero on error. 81 */ 82 int 83 vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask, 84 uint32_t zeros_mask, uint32_t *retval) 85 { 86 int i; 87 uint64_t val, trueval; 88 boolean_t true_ctls_avail, one_allowed, zero_allowed; 89 90 /* We cannot ask the same bit to be set to both '1' and '0' */ 91 if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask)) 92 return (EINVAL); 93 94 if (rdmsr(MSR_VMX_BASIC) & (1UL << 55)) 95 true_ctls_avail = TRUE; 96 else 97 true_ctls_avail = FALSE; 98 99 val = rdmsr(ctl_reg); 100 if (true_ctls_avail) 101 trueval = rdmsr(true_ctl_reg); /* step c */ 102 else 103 trueval = val; /* step a */ 104 105 for (i = 0; i < 32; i++) { 106 one_allowed = vmx_ctl_allows_one_setting(trueval, i); 107 zero_allowed = vmx_ctl_allows_zero_setting(trueval, i); 108 109 KASSERT(one_allowed || zero_allowed, 110 ("invalid zero/one setting for bit %d of ctl 0x%0x, " 111 "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg)); 112 113 if (zero_allowed && !one_allowed) { /* b(i),c(i) */ 114 if (ones_mask & (1 << i)) 115 return (EINVAL); 116 *retval &= ~(1 << i); 117 } else if (one_allowed && !zero_allowed) { /* b(i),c(i) */ 118 if (zeros_mask & (1 << i)) 119 return (EINVAL); 120 *retval |= 1 << i; 121 } else { 122 if (zeros_mask & (1 << i)) /* b(ii),c(ii) */ 123 *retval &= ~(1 << i); 124 else if (ones_mask & (1 << i)) /* b(ii), c(ii) */ 125 *retval |= 1 << i; 126 else if (!true_ctls_avail) 127 *retval &= ~(1 << i); /* b(iii) */ 128 else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/ 129 *retval &= ~(1 << i); 130 else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */ 131 *retval |= 1 << i; 132 else { 133 panic("vmx_set_ctlreg: unable to determine " 134 "correct value of ctl bit %d for msr " 135 "0x%0x and true msr 0x%0x", i, ctl_reg, 136 true_ctl_reg); 137 } 138 } 139 } 140 141 return (0); 142 } 143 144 void 145 msr_bitmap_initialize(char *bitmap) 146 { 147 148 memset(bitmap, 0xff, PAGE_SIZE); 149 } 150 151 int 152 msr_bitmap_change_access(char *bitmap, u_int msr, int access) 153 { 154 int byte, bit; 155 156 if (msr <= 0x00001FFF) 157 byte = msr / 8; 158 else if (msr >= 0xC0000000 && msr <= 0xC0001FFF) 159 byte = 1024 + (msr - 0xC0000000) / 8; 160 else 161 return (EINVAL); 162 163 bit = msr & 0x7; 164 165 if (access & MSR_BITMAP_ACCESS_READ) 166 bitmap[byte] &= ~(1 << bit); 167 else 168 bitmap[byte] |= 1 << bit; 169 170 byte += 2048; 171 if (access & MSR_BITMAP_ACCESS_WRITE) 172 bitmap[byte] &= ~(1 << bit); 173 else 174 bitmap[byte] |= 1 << bit; 175 176 return (0); 177 } 178 179 static uint64_t misc_enable; 180 static uint64_t platform_info; 181 static uint64_t turbo_ratio_limit; 182 static uint64_t host_msrs[GUEST_MSR_NUM]; 183 184 static bool 185 nehalem_cpu(void) 186 { 187 u_int family, model; 188 189 /* 190 * The family:model numbers belonging to the Nehalem microarchitecture 191 * are documented in Section 35.5, Intel SDM dated Feb 2014. 192 */ 193 family = CPUID_TO_FAMILY(cpu_id); 194 model = CPUID_TO_MODEL(cpu_id); 195 if (family == 0x6) { 196 switch (model) { 197 case 0x1A: 198 case 0x1E: 199 case 0x1F: 200 case 0x2E: 201 return (true); 202 default: 203 break; 204 } 205 } 206 return (false); 207 } 208 209 static bool 210 westmere_cpu(void) 211 { 212 u_int family, model; 213 214 /* 215 * The family:model numbers belonging to the Westmere microarchitecture 216 * are documented in Section 35.6, Intel SDM dated Feb 2014. 217 */ 218 family = CPUID_TO_FAMILY(cpu_id); 219 model = CPUID_TO_MODEL(cpu_id); 220 if (family == 0x6) { 221 switch (model) { 222 case 0x25: 223 case 0x2C: 224 return (true); 225 default: 226 break; 227 } 228 } 229 return (false); 230 } 231 232 static bool 233 pat_valid(uint64_t val) 234 { 235 int i, pa; 236 237 /* 238 * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT" 239 * 240 * Extract PA0 through PA7 and validate that each one encodes a 241 * valid memory type. 242 */ 243 for (i = 0; i < 8; i++) { 244 pa = (val >> (i * 8)) & 0xff; 245 if (pa == 2 || pa == 3 || pa >= 8) 246 return (false); 247 } 248 return (true); 249 } 250 251 void 252 vmx_msr_init(void) 253 { 254 uint64_t bus_freq, ratio; 255 int i; 256 257 /* 258 * It is safe to cache the values of the following MSRs because 259 * they don't change based on curcpu, curproc or curthread. 260 */ 261 host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR); 262 host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR); 263 host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR); 264 host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK); 265 266 /* 267 * Initialize emulated MSRs 268 */ 269 misc_enable = rdmsr(MSR_IA32_MISC_ENABLE); 270 /* 271 * Set mandatory bits 272 * 11: branch trace disabled 273 * 12: PEBS unavailable 274 * Clear unsupported features 275 * 16: SpeedStep enable 276 * 18: enable MONITOR FSM 277 */ 278 misc_enable |= (1 << 12) | (1 << 11); 279 misc_enable &= ~((1 << 18) | (1 << 16)); 280 281 if (nehalem_cpu() || westmere_cpu()) 282 bus_freq = 133330000; /* 133Mhz */ 283 else 284 bus_freq = 100000000; /* 100Mhz */ 285 286 /* 287 * XXXtime 288 * The ratio should really be based on the virtual TSC frequency as 289 * opposed to the host TSC. 290 */ 291 ratio = (tsc_freq / bus_freq) & 0xff; 292 293 /* 294 * The register definition is based on the micro-architecture 295 * but the following bits are always the same: 296 * [15:8] Maximum Non-Turbo Ratio 297 * [28] Programmable Ratio Limit for Turbo Mode 298 * [29] Programmable TDC-TDP Limit for Turbo Mode 299 * [47:40] Maximum Efficiency Ratio 300 * 301 * The other bits can be safely set to 0 on all 302 * micro-architectures up to Haswell. 303 */ 304 platform_info = (ratio << 8) | (ratio << 40); 305 306 /* 307 * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is 308 * dependent on the maximum cores per package supported by the micro- 309 * architecture. For e.g., Westmere supports 6 cores per package and 310 * uses the low 48 bits. Sandybridge support 8 cores per package and 311 * uses up all 64 bits. 312 * 313 * However, the unused bits are reserved so we pretend that all bits 314 * in this MSR are valid. 315 */ 316 for (i = 0; i < 8; i++) 317 turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio; 318 } 319 320 void 321 vmx_msr_guest_init(struct vmx *vmx, int vcpuid) 322 { 323 uint64_t *guest_msrs; 324 325 guest_msrs = vmx->guest_msrs[vcpuid]; 326 327 /* 328 * The permissions bitmap is shared between all vcpus so initialize it 329 * once when initializing the vBSP. 330 */ 331 if (vcpuid == 0) { 332 guest_msr_rw(vmx, MSR_LSTAR); 333 guest_msr_rw(vmx, MSR_CSTAR); 334 guest_msr_rw(vmx, MSR_STAR); 335 guest_msr_rw(vmx, MSR_SF_MASK); 336 guest_msr_rw(vmx, MSR_KGSBASE); 337 } 338 339 /* 340 * Initialize guest IA32_PAT MSR with default value after reset. 341 */ 342 guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) | 343 PAT_VALUE(1, PAT_WRITE_THROUGH) | 344 PAT_VALUE(2, PAT_UNCACHED) | 345 PAT_VALUE(3, PAT_UNCACHEABLE) | 346 PAT_VALUE(4, PAT_WRITE_BACK) | 347 PAT_VALUE(5, PAT_WRITE_THROUGH) | 348 PAT_VALUE(6, PAT_UNCACHED) | 349 PAT_VALUE(7, PAT_UNCACHEABLE); 350 351 return; 352 } 353 354 void 355 vmx_msr_guest_enter(struct vmx *vmx, int vcpuid) 356 { 357 uint64_t *guest_msrs = vmx->guest_msrs[vcpuid]; 358 359 /* Save host MSRs (if any) and restore guest MSRs */ 360 wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]); 361 wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]); 362 wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]); 363 wrmsr(MSR_SF_MASK, guest_msrs[IDX_MSR_SF_MASK]); 364 wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]); 365 } 366 367 void 368 vmx_msr_guest_exit(struct vmx *vmx, int vcpuid) 369 { 370 uint64_t *guest_msrs = vmx->guest_msrs[vcpuid]; 371 372 /* Save guest MSRs */ 373 guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR); 374 guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR); 375 guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR); 376 guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK); 377 guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE); 378 379 /* Restore host MSRs */ 380 wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]); 381 wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]); 382 wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]); 383 wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]); 384 385 /* MSR_KGSBASE will be restored on the way back to userspace */ 386 } 387 388 int 389 vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu) 390 { 391 const uint64_t *guest_msrs; 392 int error; 393 394 guest_msrs = vmx->guest_msrs[vcpuid]; 395 error = 0; 396 397 switch (num) { 398 case MSR_MCG_CAP: 399 case MSR_MCG_STATUS: 400 *val = 0; 401 break; 402 case MSR_MTRRcap: 403 case MSR_MTRRdefType: 404 case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8: 405 case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: 406 case MSR_MTRR64kBase: 407 *val = 0; 408 break; 409 case MSR_IA32_MISC_ENABLE: 410 *val = misc_enable; 411 break; 412 case MSR_PLATFORM_INFO: 413 *val = platform_info; 414 break; 415 case MSR_TURBO_RATIO_LIMIT: 416 case MSR_TURBO_RATIO_LIMIT1: 417 *val = turbo_ratio_limit; 418 break; 419 case MSR_PAT: 420 *val = guest_msrs[IDX_MSR_PAT]; 421 break; 422 default: 423 error = EINVAL; 424 break; 425 } 426 return (error); 427 } 428 429 int 430 vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu) 431 { 432 uint64_t *guest_msrs; 433 uint64_t changed; 434 int error; 435 436 guest_msrs = vmx->guest_msrs[vcpuid]; 437 error = 0; 438 439 switch (num) { 440 case MSR_MCG_CAP: 441 case MSR_MCG_STATUS: 442 break; /* ignore writes */ 443 case MSR_MTRRcap: 444 vm_inject_gp(vmx->vm, vcpuid); 445 break; 446 case MSR_MTRRdefType: 447 case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8: 448 case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: 449 case MSR_MTRR64kBase: 450 break; /* Ignore writes */ 451 case MSR_IA32_MISC_ENABLE: 452 changed = val ^ misc_enable; 453 /* 454 * If the host has disabled the NX feature then the guest 455 * also cannot use it. However, a Linux guest will try to 456 * enable the NX feature by writing to the MISC_ENABLE MSR. 457 * 458 * This can be safely ignored because the memory management 459 * code looks at CPUID.80000001H:EDX.NX to check if the 460 * functionality is actually enabled. 461 */ 462 changed &= ~(1UL << 34); 463 464 /* 465 * Punt to userspace if any other bits are being modified. 466 */ 467 if (changed) 468 error = EINVAL; 469 470 break; 471 case MSR_PAT: 472 if (pat_valid(val)) 473 guest_msrs[IDX_MSR_PAT] = val; 474 else 475 vm_inject_gp(vmx->vm, vcpuid); 476 break; 477 case MSR_TSC: 478 error = vmx_set_tsc_offset(vmx, vcpuid, val - rdtsc()); 479 break; 480 default: 481 error = EINVAL; 482 break; 483 } 484 485 return (error); 486 } 487