1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/cpuset.h> 35 36 #include <machine/clock.h> 37 #include <machine/cpufunc.h> 38 #include <machine/md_var.h> 39 #include <machine/specialreg.h> 40 #include <machine/vmm.h> 41 42 #include "vmx.h" 43 #include "vmx_msr.h" 44 45 static boolean_t 46 vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos) 47 { 48 49 if (msr_val & (1UL << (bitpos + 32))) 50 return (TRUE); 51 else 52 return (FALSE); 53 } 54 55 static boolean_t 56 vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos) 57 { 58 59 if ((msr_val & (1UL << bitpos)) == 0) 60 return (TRUE); 61 else 62 return (FALSE); 63 } 64 65 uint32_t 66 vmx_revision(void) 67 { 68 69 return (rdmsr(MSR_VMX_BASIC) & 0xffffffff); 70 } 71 72 /* 73 * Generate a bitmask to be used for the VMCS execution control fields. 74 * 75 * The caller specifies what bits should be set to one in 'ones_mask' 76 * and what bits should be set to zero in 'zeros_mask'. The don't-care 77 * bits are set to the default value. The default values are obtained 78 * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining 79 * VMX Capabilities". 80 * 81 * Returns zero on success and non-zero on error. 82 */ 83 int 84 vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask, 85 uint32_t zeros_mask, uint32_t *retval) 86 { 87 int i; 88 uint64_t val, trueval; 89 boolean_t true_ctls_avail, one_allowed, zero_allowed; 90 91 /* We cannot ask the same bit to be set to both '1' and '0' */ 92 if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask)) 93 return (EINVAL); 94 95 if (rdmsr(MSR_VMX_BASIC) & (1UL << 55)) 96 true_ctls_avail = TRUE; 97 else 98 true_ctls_avail = FALSE; 99 100 val = rdmsr(ctl_reg); 101 if (true_ctls_avail) 102 trueval = rdmsr(true_ctl_reg); /* step c */ 103 else 104 trueval = val; /* step a */ 105 106 for (i = 0; i < 32; i++) { 107 one_allowed = vmx_ctl_allows_one_setting(trueval, i); 108 zero_allowed = vmx_ctl_allows_zero_setting(trueval, i); 109 110 KASSERT(one_allowed || zero_allowed, 111 ("invalid zero/one setting for bit %d of ctl 0x%0x, " 112 "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg)); 113 114 if (zero_allowed && !one_allowed) { /* b(i),c(i) */ 115 if (ones_mask & (1 << i)) 116 return (EINVAL); 117 *retval &= ~(1 << i); 118 } else if (one_allowed && !zero_allowed) { /* b(i),c(i) */ 119 if (zeros_mask & (1 << i)) 120 return (EINVAL); 121 *retval |= 1 << i; 122 } else { 123 if (zeros_mask & (1 << i)) /* b(ii),c(ii) */ 124 *retval &= ~(1 << i); 125 else if (ones_mask & (1 << i)) /* b(ii), c(ii) */ 126 *retval |= 1 << i; 127 else if (!true_ctls_avail) 128 *retval &= ~(1 << i); /* b(iii) */ 129 else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/ 130 *retval &= ~(1 << i); 131 else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */ 132 *retval |= 1 << i; 133 else { 134 panic("vmx_set_ctlreg: unable to determine " 135 "correct value of ctl bit %d for msr " 136 "0x%0x and true msr 0x%0x", i, ctl_reg, 137 true_ctl_reg); 138 } 139 } 140 } 141 142 return (0); 143 } 144 145 void 146 msr_bitmap_initialize(char *bitmap) 147 { 148 149 memset(bitmap, 0xff, PAGE_SIZE); 150 } 151 152 int 153 msr_bitmap_change_access(char *bitmap, u_int msr, int access) 154 { 155 int byte, bit; 156 157 if (msr <= 0x00001FFF) 158 byte = msr / 8; 159 else if (msr >= 0xC0000000 && msr <= 0xC0001FFF) 160 byte = 1024 + (msr - 0xC0000000) / 8; 161 else 162 return (EINVAL); 163 164 bit = msr & 0x7; 165 166 if (access & MSR_BITMAP_ACCESS_READ) 167 bitmap[byte] &= ~(1 << bit); 168 else 169 bitmap[byte] |= 1 << bit; 170 171 byte += 2048; 172 if (access & MSR_BITMAP_ACCESS_WRITE) 173 bitmap[byte] &= ~(1 << bit); 174 else 175 bitmap[byte] |= 1 << bit; 176 177 return (0); 178 } 179 180 static uint64_t misc_enable; 181 static uint64_t platform_info; 182 static uint64_t turbo_ratio_limit; 183 static uint64_t host_msrs[GUEST_MSR_NUM]; 184 185 static bool 186 nehalem_cpu(void) 187 { 188 u_int family, model; 189 190 /* 191 * The family:model numbers belonging to the Nehalem microarchitecture 192 * are documented in Section 35.5, Intel SDM dated Feb 2014. 193 */ 194 family = CPUID_TO_FAMILY(cpu_id); 195 model = CPUID_TO_MODEL(cpu_id); 196 if (family == 0x6) { 197 switch (model) { 198 case 0x1A: 199 case 0x1E: 200 case 0x1F: 201 case 0x2E: 202 return (true); 203 default: 204 break; 205 } 206 } 207 return (false); 208 } 209 210 static bool 211 westmere_cpu(void) 212 { 213 u_int family, model; 214 215 /* 216 * The family:model numbers belonging to the Westmere microarchitecture 217 * are documented in Section 35.6, Intel SDM dated Feb 2014. 218 */ 219 family = CPUID_TO_FAMILY(cpu_id); 220 model = CPUID_TO_MODEL(cpu_id); 221 if (family == 0x6) { 222 switch (model) { 223 case 0x25: 224 case 0x2C: 225 return (true); 226 default: 227 break; 228 } 229 } 230 return (false); 231 } 232 233 static bool 234 pat_valid(uint64_t val) 235 { 236 int i, pa; 237 238 /* 239 * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT" 240 * 241 * Extract PA0 through PA7 and validate that each one encodes a 242 * valid memory type. 243 */ 244 for (i = 0; i < 8; i++) { 245 pa = (val >> (i * 8)) & 0xff; 246 if (pa == 2 || pa == 3 || pa >= 8) 247 return (false); 248 } 249 return (true); 250 } 251 252 void 253 vmx_msr_init(void) 254 { 255 uint64_t bus_freq, ratio; 256 int i; 257 258 /* 259 * It is safe to cache the values of the following MSRs because 260 * they don't change based on curcpu, curproc or curthread. 261 */ 262 host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR); 263 host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR); 264 host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR); 265 host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK); 266 267 /* 268 * Initialize emulated MSRs 269 */ 270 misc_enable = rdmsr(MSR_IA32_MISC_ENABLE); 271 /* 272 * Set mandatory bits 273 * 11: branch trace disabled 274 * 12: PEBS unavailable 275 * Clear unsupported features 276 * 16: SpeedStep enable 277 * 18: enable MONITOR FSM 278 */ 279 misc_enable |= (1 << 12) | (1 << 11); 280 misc_enable &= ~((1 << 18) | (1 << 16)); 281 282 if (nehalem_cpu() || westmere_cpu()) 283 bus_freq = 133330000; /* 133Mhz */ 284 else 285 bus_freq = 100000000; /* 100Mhz */ 286 287 /* 288 * XXXtime 289 * The ratio should really be based on the virtual TSC frequency as 290 * opposed to the host TSC. 291 */ 292 ratio = (tsc_freq / bus_freq) & 0xff; 293 294 /* 295 * The register definition is based on the micro-architecture 296 * but the following bits are always the same: 297 * [15:8] Maximum Non-Turbo Ratio 298 * [28] Programmable Ratio Limit for Turbo Mode 299 * [29] Programmable TDC-TDP Limit for Turbo Mode 300 * [47:40] Maximum Efficiency Ratio 301 * 302 * The other bits can be safely set to 0 on all 303 * micro-architectures up to Haswell. 304 */ 305 platform_info = (ratio << 8) | (ratio << 40); 306 307 /* 308 * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is 309 * dependent on the maximum cores per package supported by the micro- 310 * architecture. For e.g., Westmere supports 6 cores per package and 311 * uses the low 48 bits. Sandybridge support 8 cores per package and 312 * uses up all 64 bits. 313 * 314 * However, the unused bits are reserved so we pretend that all bits 315 * in this MSR are valid. 316 */ 317 for (i = 0; i < 8; i++) 318 turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio; 319 } 320 321 void 322 vmx_msr_guest_init(struct vmx *vmx, int vcpuid) 323 { 324 uint64_t *guest_msrs; 325 326 guest_msrs = vmx->guest_msrs[vcpuid]; 327 328 /* 329 * The permissions bitmap is shared between all vcpus so initialize it 330 * once when initializing the vBSP. 331 */ 332 if (vcpuid == 0) { 333 guest_msr_rw(vmx, MSR_LSTAR); 334 guest_msr_rw(vmx, MSR_CSTAR); 335 guest_msr_rw(vmx, MSR_STAR); 336 guest_msr_rw(vmx, MSR_SF_MASK); 337 guest_msr_rw(vmx, MSR_KGSBASE); 338 } 339 340 /* 341 * Initialize guest IA32_PAT MSR with default value after reset. 342 */ 343 guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) | 344 PAT_VALUE(1, PAT_WRITE_THROUGH) | 345 PAT_VALUE(2, PAT_UNCACHED) | 346 PAT_VALUE(3, PAT_UNCACHEABLE) | 347 PAT_VALUE(4, PAT_WRITE_BACK) | 348 PAT_VALUE(5, PAT_WRITE_THROUGH) | 349 PAT_VALUE(6, PAT_UNCACHED) | 350 PAT_VALUE(7, PAT_UNCACHEABLE); 351 352 return; 353 } 354 355 void 356 vmx_msr_guest_enter(struct vmx *vmx, int vcpuid) 357 { 358 uint64_t *guest_msrs = vmx->guest_msrs[vcpuid]; 359 360 /* Save host MSRs (if any) and restore guest MSRs */ 361 wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]); 362 wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]); 363 wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]); 364 wrmsr(MSR_SF_MASK, guest_msrs[IDX_MSR_SF_MASK]); 365 wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]); 366 } 367 368 void 369 vmx_msr_guest_exit(struct vmx *vmx, int vcpuid) 370 { 371 uint64_t *guest_msrs = vmx->guest_msrs[vcpuid]; 372 373 /* Save guest MSRs */ 374 guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR); 375 guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR); 376 guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR); 377 guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK); 378 guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE); 379 380 /* Restore host MSRs */ 381 wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]); 382 wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]); 383 wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]); 384 wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]); 385 386 /* MSR_KGSBASE will be restored on the way back to userspace */ 387 } 388 389 int 390 vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu) 391 { 392 const uint64_t *guest_msrs; 393 int error; 394 395 guest_msrs = vmx->guest_msrs[vcpuid]; 396 error = 0; 397 398 switch (num) { 399 case MSR_IA32_MISC_ENABLE: 400 *val = misc_enable; 401 break; 402 case MSR_PLATFORM_INFO: 403 *val = platform_info; 404 break; 405 case MSR_TURBO_RATIO_LIMIT: 406 case MSR_TURBO_RATIO_LIMIT1: 407 *val = turbo_ratio_limit; 408 break; 409 case MSR_PAT: 410 *val = guest_msrs[IDX_MSR_PAT]; 411 break; 412 default: 413 error = EINVAL; 414 break; 415 } 416 return (error); 417 } 418 419 int 420 vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu) 421 { 422 uint64_t *guest_msrs; 423 uint64_t changed; 424 int error; 425 426 guest_msrs = vmx->guest_msrs[vcpuid]; 427 error = 0; 428 429 switch (num) { 430 case MSR_IA32_MISC_ENABLE: 431 changed = val ^ misc_enable; 432 /* 433 * If the host has disabled the NX feature then the guest 434 * also cannot use it. However, a Linux guest will try to 435 * enable the NX feature by writing to the MISC_ENABLE MSR. 436 * 437 * This can be safely ignored because the memory management 438 * code looks at CPUID.80000001H:EDX.NX to check if the 439 * functionality is actually enabled. 440 */ 441 changed &= ~(1UL << 34); 442 443 /* 444 * Punt to userspace if any other bits are being modified. 445 */ 446 if (changed) 447 error = EINVAL; 448 449 break; 450 case MSR_PAT: 451 if (pat_valid(val)) 452 guest_msrs[IDX_MSR_PAT] = val; 453 else 454 vm_inject_gp(vmx->vm, vcpuid); 455 break; 456 default: 457 error = EINVAL; 458 break; 459 } 460 461 return (error); 462 } 463