1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/proc.h> 35 36 #include <machine/clock.h> 37 #include <machine/cpufunc.h> 38 #include <machine/md_var.h> 39 #include <machine/pcb.h> 40 #include <machine/specialreg.h> 41 #include <machine/vmm.h> 42 43 #include "vmx.h" 44 #include "vmx_msr.h" 45 46 static boolean_t 47 vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos) 48 { 49 50 if (msr_val & (1UL << (bitpos + 32))) 51 return (TRUE); 52 else 53 return (FALSE); 54 } 55 56 static boolean_t 57 vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos) 58 { 59 60 if ((msr_val & (1UL << bitpos)) == 0) 61 return (TRUE); 62 else 63 return (FALSE); 64 } 65 66 uint32_t 67 vmx_revision(void) 68 { 69 70 return (rdmsr(MSR_VMX_BASIC) & 0xffffffff); 71 } 72 73 /* 74 * Generate a bitmask to be used for the VMCS execution control fields. 75 * 76 * The caller specifies what bits should be set to one in 'ones_mask' 77 * and what bits should be set to zero in 'zeros_mask'. The don't-care 78 * bits are set to the default value. The default values are obtained 79 * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining 80 * VMX Capabilities". 81 * 82 * Returns zero on success and non-zero on error. 83 */ 84 int 85 vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask, 86 uint32_t zeros_mask, uint32_t *retval) 87 { 88 int i; 89 uint64_t val, trueval; 90 boolean_t true_ctls_avail, one_allowed, zero_allowed; 91 92 /* We cannot ask the same bit to be set to both '1' and '0' */ 93 if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask)) 94 return (EINVAL); 95 96 if (rdmsr(MSR_VMX_BASIC) & (1UL << 55)) 97 true_ctls_avail = TRUE; 98 else 99 true_ctls_avail = FALSE; 100 101 val = rdmsr(ctl_reg); 102 if (true_ctls_avail) 103 trueval = rdmsr(true_ctl_reg); /* step c */ 104 else 105 trueval = val; /* step a */ 106 107 for (i = 0; i < 32; i++) { 108 one_allowed = vmx_ctl_allows_one_setting(trueval, i); 109 zero_allowed = vmx_ctl_allows_zero_setting(trueval, i); 110 111 KASSERT(one_allowed || zero_allowed, 112 ("invalid zero/one setting for bit %d of ctl 0x%0x, " 113 "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg)); 114 115 if (zero_allowed && !one_allowed) { /* b(i),c(i) */ 116 if (ones_mask & (1 << i)) 117 return (EINVAL); 118 *retval &= ~(1 << i); 119 } else if (one_allowed && !zero_allowed) { /* b(i),c(i) */ 120 if (zeros_mask & (1 << i)) 121 return (EINVAL); 122 *retval |= 1 << i; 123 } else { 124 if (zeros_mask & (1 << i)) /* b(ii),c(ii) */ 125 *retval &= ~(1 << i); 126 else if (ones_mask & (1 << i)) /* b(ii), c(ii) */ 127 *retval |= 1 << i; 128 else if (!true_ctls_avail) 129 *retval &= ~(1 << i); /* b(iii) */ 130 else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/ 131 *retval &= ~(1 << i); 132 else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */ 133 *retval |= 1 << i; 134 else { 135 panic("vmx_set_ctlreg: unable to determine " 136 "correct value of ctl bit %d for msr " 137 "0x%0x and true msr 0x%0x", i, ctl_reg, 138 true_ctl_reg); 139 } 140 } 141 } 142 143 return (0); 144 } 145 146 void 147 msr_bitmap_initialize(char *bitmap) 148 { 149 150 memset(bitmap, 0xff, PAGE_SIZE); 151 } 152 153 int 154 msr_bitmap_change_access(char *bitmap, u_int msr, int access) 155 { 156 int byte, bit; 157 158 if (msr <= 0x00001FFF) 159 byte = msr / 8; 160 else if (msr >= 0xC0000000 && msr <= 0xC0001FFF) 161 byte = 1024 + (msr - 0xC0000000) / 8; 162 else 163 return (EINVAL); 164 165 bit = msr & 0x7; 166 167 if (access & MSR_BITMAP_ACCESS_READ) 168 bitmap[byte] &= ~(1 << bit); 169 else 170 bitmap[byte] |= 1 << bit; 171 172 byte += 2048; 173 if (access & MSR_BITMAP_ACCESS_WRITE) 174 bitmap[byte] &= ~(1 << bit); 175 else 176 bitmap[byte] |= 1 << bit; 177 178 return (0); 179 } 180 181 static uint64_t misc_enable; 182 static uint64_t platform_info; 183 static uint64_t turbo_ratio_limit; 184 static uint64_t host_msrs[GUEST_MSR_NUM]; 185 186 static bool 187 nehalem_cpu(void) 188 { 189 u_int family, model; 190 191 /* 192 * The family:model numbers belonging to the Nehalem microarchitecture 193 * are documented in Section 35.5, Intel SDM dated Feb 2014. 194 */ 195 family = CPUID_TO_FAMILY(cpu_id); 196 model = CPUID_TO_MODEL(cpu_id); 197 if (family == 0x6) { 198 switch (model) { 199 case 0x1A: 200 case 0x1E: 201 case 0x1F: 202 case 0x2E: 203 return (true); 204 default: 205 break; 206 } 207 } 208 return (false); 209 } 210 211 static bool 212 westmere_cpu(void) 213 { 214 u_int family, model; 215 216 /* 217 * The family:model numbers belonging to the Westmere microarchitecture 218 * are documented in Section 35.6, Intel SDM dated Feb 2014. 219 */ 220 family = CPUID_TO_FAMILY(cpu_id); 221 model = CPUID_TO_MODEL(cpu_id); 222 if (family == 0x6) { 223 switch (model) { 224 case 0x25: 225 case 0x2C: 226 return (true); 227 default: 228 break; 229 } 230 } 231 return (false); 232 } 233 234 static bool 235 pat_valid(uint64_t val) 236 { 237 int i, pa; 238 239 /* 240 * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT" 241 * 242 * Extract PA0 through PA7 and validate that each one encodes a 243 * valid memory type. 244 */ 245 for (i = 0; i < 8; i++) { 246 pa = (val >> (i * 8)) & 0xff; 247 if (pa == 2 || pa == 3 || pa >= 8) 248 return (false); 249 } 250 return (true); 251 } 252 253 void 254 vmx_msr_init(void) 255 { 256 uint64_t bus_freq, ratio; 257 int i; 258 259 /* 260 * It is safe to cache the values of the following MSRs because 261 * they don't change based on curcpu, curproc or curthread. 262 */ 263 host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR); 264 host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR); 265 host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR); 266 host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK); 267 268 /* 269 * Initialize emulated MSRs 270 */ 271 misc_enable = rdmsr(MSR_IA32_MISC_ENABLE); 272 /* 273 * Set mandatory bits 274 * 11: branch trace disabled 275 * 12: PEBS unavailable 276 * Clear unsupported features 277 * 16: SpeedStep enable 278 * 18: enable MONITOR FSM 279 */ 280 misc_enable |= (1 << 12) | (1 << 11); 281 misc_enable &= ~((1 << 18) | (1 << 16)); 282 283 if (nehalem_cpu() || westmere_cpu()) 284 bus_freq = 133330000; /* 133Mhz */ 285 else 286 bus_freq = 100000000; /* 100Mhz */ 287 288 /* 289 * XXXtime 290 * The ratio should really be based on the virtual TSC frequency as 291 * opposed to the host TSC. 292 */ 293 ratio = (tsc_freq / bus_freq) & 0xff; 294 295 /* 296 * The register definition is based on the micro-architecture 297 * but the following bits are always the same: 298 * [15:8] Maximum Non-Turbo Ratio 299 * [28] Programmable Ratio Limit for Turbo Mode 300 * [29] Programmable TDC-TDP Limit for Turbo Mode 301 * [47:40] Maximum Efficiency Ratio 302 * 303 * The other bits can be safely set to 0 on all 304 * micro-architectures up to Haswell. 305 */ 306 platform_info = (ratio << 8) | (ratio << 40); 307 308 /* 309 * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is 310 * dependent on the maximum cores per package supported by the micro- 311 * architecture. For e.g., Westmere supports 6 cores per package and 312 * uses the low 48 bits. Sandybridge support 8 cores per package and 313 * uses up all 64 bits. 314 * 315 * However, the unused bits are reserved so we pretend that all bits 316 * in this MSR are valid. 317 */ 318 for (i = 0; i < 8; i++) 319 turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio; 320 } 321 322 void 323 vmx_msr_guest_init(struct vmx *vmx, int vcpuid) 324 { 325 uint64_t *guest_msrs; 326 327 guest_msrs = vmx->guest_msrs[vcpuid]; 328 329 /* 330 * The permissions bitmap is shared between all vcpus so initialize it 331 * once when initializing the vBSP. 332 */ 333 if (vcpuid == 0) { 334 guest_msr_rw(vmx, MSR_LSTAR); 335 guest_msr_rw(vmx, MSR_CSTAR); 336 guest_msr_rw(vmx, MSR_STAR); 337 guest_msr_rw(vmx, MSR_SF_MASK); 338 guest_msr_rw(vmx, MSR_KGSBASE); 339 } 340 341 /* 342 * Initialize guest IA32_PAT MSR with default value after reset. 343 */ 344 guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) | 345 PAT_VALUE(1, PAT_WRITE_THROUGH) | 346 PAT_VALUE(2, PAT_UNCACHED) | 347 PAT_VALUE(3, PAT_UNCACHEABLE) | 348 PAT_VALUE(4, PAT_WRITE_BACK) | 349 PAT_VALUE(5, PAT_WRITE_THROUGH) | 350 PAT_VALUE(6, PAT_UNCACHED) | 351 PAT_VALUE(7, PAT_UNCACHEABLE); 352 353 return; 354 } 355 356 void 357 vmx_msr_guest_enter(struct vmx *vmx, int vcpuid) 358 { 359 uint64_t *guest_msrs = vmx->guest_msrs[vcpuid]; 360 361 /* Save host MSRs (in particular, KGSBASE) and restore guest MSRs */ 362 update_pcb_bases(curpcb); 363 wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]); 364 wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]); 365 wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]); 366 wrmsr(MSR_SF_MASK, guest_msrs[IDX_MSR_SF_MASK]); 367 wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]); 368 } 369 370 void 371 vmx_msr_guest_exit(struct vmx *vmx, int vcpuid) 372 { 373 uint64_t *guest_msrs = vmx->guest_msrs[vcpuid]; 374 375 /* Save guest MSRs */ 376 guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR); 377 guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR); 378 guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR); 379 guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK); 380 guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE); 381 382 /* Restore host MSRs */ 383 wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]); 384 wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]); 385 wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]); 386 wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]); 387 388 /* MSR_KGSBASE will be restored on the way back to userspace */ 389 } 390 391 int 392 vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu) 393 { 394 const uint64_t *guest_msrs; 395 int error; 396 397 guest_msrs = vmx->guest_msrs[vcpuid]; 398 error = 0; 399 400 switch (num) { 401 case MSR_MCG_CAP: 402 case MSR_MCG_STATUS: 403 *val = 0; 404 break; 405 case MSR_MTRRcap: 406 case MSR_MTRRdefType: 407 case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8: 408 case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: 409 case MSR_MTRR64kBase: 410 *val = 0; 411 break; 412 case MSR_IA32_MISC_ENABLE: 413 *val = misc_enable; 414 break; 415 case MSR_PLATFORM_INFO: 416 *val = platform_info; 417 break; 418 case MSR_TURBO_RATIO_LIMIT: 419 case MSR_TURBO_RATIO_LIMIT1: 420 *val = turbo_ratio_limit; 421 break; 422 case MSR_PAT: 423 *val = guest_msrs[IDX_MSR_PAT]; 424 break; 425 default: 426 error = EINVAL; 427 break; 428 } 429 return (error); 430 } 431 432 int 433 vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu) 434 { 435 uint64_t *guest_msrs; 436 uint64_t changed; 437 int error; 438 439 guest_msrs = vmx->guest_msrs[vcpuid]; 440 error = 0; 441 442 switch (num) { 443 case MSR_MCG_CAP: 444 case MSR_MCG_STATUS: 445 break; /* ignore writes */ 446 case MSR_MTRRcap: 447 vm_inject_gp(vmx->vm, vcpuid); 448 break; 449 case MSR_MTRRdefType: 450 case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8: 451 case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: 452 case MSR_MTRR64kBase: 453 break; /* Ignore writes */ 454 case MSR_IA32_MISC_ENABLE: 455 changed = val ^ misc_enable; 456 /* 457 * If the host has disabled the NX feature then the guest 458 * also cannot use it. However, a Linux guest will try to 459 * enable the NX feature by writing to the MISC_ENABLE MSR. 460 * 461 * This can be safely ignored because the memory management 462 * code looks at CPUID.80000001H:EDX.NX to check if the 463 * functionality is actually enabled. 464 */ 465 changed &= ~(1UL << 34); 466 467 /* 468 * Punt to userspace if any other bits are being modified. 469 */ 470 if (changed) 471 error = EINVAL; 472 473 break; 474 case MSR_PAT: 475 if (pat_valid(val)) 476 guest_msrs[IDX_MSR_PAT] = val; 477 else 478 vm_inject_gp(vmx->vm, vcpuid); 479 break; 480 case MSR_TSC: 481 error = vmx_set_tsc_offset(vmx, vcpuid, val - rdtsc()); 482 break; 483 default: 484 error = EINVAL; 485 break; 486 } 487 488 return (error); 489 } 490