1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/proc.h> 37 38 #include <machine/clock.h> 39 #include <machine/cpufunc.h> 40 #include <machine/md_var.h> 41 #include <machine/pcb.h> 42 #include <machine/specialreg.h> 43 #include <machine/vmm.h> 44 45 #include "vmx.h" 46 #include "vmx_msr.h" 47 48 static bool 49 vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos) 50 { 51 52 return ((msr_val & (1UL << (bitpos + 32))) != 0); 53 } 54 55 static bool 56 vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos) 57 { 58 59 return ((msr_val & (1UL << bitpos)) == 0); 60 } 61 62 uint32_t 63 vmx_revision(void) 64 { 65 66 return (rdmsr(MSR_VMX_BASIC) & 0xffffffff); 67 } 68 69 /* 70 * Generate a bitmask to be used for the VMCS execution control fields. 71 * 72 * The caller specifies what bits should be set to one in 'ones_mask' 73 * and what bits should be set to zero in 'zeros_mask'. The don't-care 74 * bits are set to the default value. The default values are obtained 75 * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining 76 * VMX Capabilities". 77 * 78 * Returns zero on success and non-zero on error. 79 */ 80 int 81 vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask, 82 uint32_t zeros_mask, uint32_t *retval) 83 { 84 int i; 85 uint64_t val, trueval; 86 bool true_ctls_avail, one_allowed, zero_allowed; 87 88 /* We cannot ask the same bit to be set to both '1' and '0' */ 89 if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask)) 90 return (EINVAL); 91 92 true_ctls_avail = (rdmsr(MSR_VMX_BASIC) & (1UL << 55)) != 0; 93 94 val = rdmsr(ctl_reg); 95 if (true_ctls_avail) 96 trueval = rdmsr(true_ctl_reg); /* step c */ 97 else 98 trueval = val; /* step a */ 99 100 for (i = 0; i < 32; i++) { 101 one_allowed = vmx_ctl_allows_one_setting(trueval, i); 102 zero_allowed = vmx_ctl_allows_zero_setting(trueval, i); 103 104 KASSERT(one_allowed || zero_allowed, 105 ("invalid zero/one setting for bit %d of ctl 0x%0x, " 106 "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg)); 107 108 if (zero_allowed && !one_allowed) { /* b(i),c(i) */ 109 if (ones_mask & (1 << i)) 110 return (EINVAL); 111 *retval &= ~(1 << i); 112 } else if (one_allowed && !zero_allowed) { /* b(i),c(i) */ 113 if (zeros_mask & (1 << i)) 114 return (EINVAL); 115 *retval |= 1 << i; 116 } else { 117 if (zeros_mask & (1 << i)) /* b(ii),c(ii) */ 118 *retval &= ~(1 << i); 119 else if (ones_mask & (1 << i)) /* b(ii), c(ii) */ 120 *retval |= 1 << i; 121 else if (!true_ctls_avail) 122 *retval &= ~(1 << i); /* b(iii) */ 123 else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/ 124 *retval &= ~(1 << i); 125 else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */ 126 *retval |= 1 << i; 127 else { 128 panic("vmx_set_ctlreg: unable to determine " 129 "correct value of ctl bit %d for msr " 130 "0x%0x and true msr 0x%0x", i, ctl_reg, 131 true_ctl_reg); 132 } 133 } 134 } 135 136 return (0); 137 } 138 139 void 140 msr_bitmap_initialize(char *bitmap) 141 { 142 143 memset(bitmap, 0xff, PAGE_SIZE); 144 } 145 146 int 147 msr_bitmap_change_access(char *bitmap, u_int msr, int access) 148 { 149 int byte, bit; 150 151 if (msr <= 0x00001FFF) 152 byte = msr / 8; 153 else if (msr >= 0xC0000000 && msr <= 0xC0001FFF) 154 byte = 1024 + (msr - 0xC0000000) / 8; 155 else 156 return (EINVAL); 157 158 bit = msr & 0x7; 159 160 if (access & MSR_BITMAP_ACCESS_READ) 161 bitmap[byte] &= ~(1 << bit); 162 else 163 bitmap[byte] |= 1 << bit; 164 165 byte += 2048; 166 if (access & MSR_BITMAP_ACCESS_WRITE) 167 bitmap[byte] &= ~(1 << bit); 168 else 169 bitmap[byte] |= 1 << bit; 170 171 return (0); 172 } 173 174 static uint64_t misc_enable; 175 static uint64_t platform_info; 176 static uint64_t turbo_ratio_limit; 177 static uint64_t host_msrs[GUEST_MSR_NUM]; 178 179 static bool 180 nehalem_cpu(void) 181 { 182 u_int family, model; 183 184 /* 185 * The family:model numbers belonging to the Nehalem microarchitecture 186 * are documented in Section 35.5, Intel SDM dated Feb 2014. 187 */ 188 family = CPUID_TO_FAMILY(cpu_id); 189 model = CPUID_TO_MODEL(cpu_id); 190 if (family == 0x6) { 191 switch (model) { 192 case 0x1A: 193 case 0x1E: 194 case 0x1F: 195 case 0x2E: 196 return (true); 197 default: 198 break; 199 } 200 } 201 return (false); 202 } 203 204 static bool 205 westmere_cpu(void) 206 { 207 u_int family, model; 208 209 /* 210 * The family:model numbers belonging to the Westmere microarchitecture 211 * are documented in Section 35.6, Intel SDM dated Feb 2014. 212 */ 213 family = CPUID_TO_FAMILY(cpu_id); 214 model = CPUID_TO_MODEL(cpu_id); 215 if (family == 0x6) { 216 switch (model) { 217 case 0x25: 218 case 0x2C: 219 return (true); 220 default: 221 break; 222 } 223 } 224 return (false); 225 } 226 227 static bool 228 pat_valid(uint64_t val) 229 { 230 int i, pa; 231 232 /* 233 * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT" 234 * 235 * Extract PA0 through PA7 and validate that each one encodes a 236 * valid memory type. 237 */ 238 for (i = 0; i < 8; i++) { 239 pa = (val >> (i * 8)) & 0xff; 240 if (pa == 2 || pa == 3 || pa >= 8) 241 return (false); 242 } 243 return (true); 244 } 245 246 void 247 vmx_msr_init(void) 248 { 249 uint64_t bus_freq, ratio; 250 int i; 251 252 /* 253 * It is safe to cache the values of the following MSRs because 254 * they don't change based on curcpu, curproc or curthread. 255 */ 256 host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR); 257 host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR); 258 host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR); 259 host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK); 260 261 /* 262 * Initialize emulated MSRs 263 */ 264 misc_enable = rdmsr(MSR_IA32_MISC_ENABLE); 265 /* 266 * Set mandatory bits 267 * 11: branch trace disabled 268 * 12: PEBS unavailable 269 * Clear unsupported features 270 * 16: SpeedStep enable 271 * 18: enable MONITOR FSM 272 */ 273 misc_enable |= (1 << 12) | (1 << 11); 274 misc_enable &= ~((1 << 18) | (1 << 16)); 275 276 if (nehalem_cpu() || westmere_cpu()) 277 bus_freq = 133330000; /* 133Mhz */ 278 else 279 bus_freq = 100000000; /* 100Mhz */ 280 281 /* 282 * XXXtime 283 * The ratio should really be based on the virtual TSC frequency as 284 * opposed to the host TSC. 285 */ 286 ratio = (tsc_freq / bus_freq) & 0xff; 287 288 /* 289 * The register definition is based on the micro-architecture 290 * but the following bits are always the same: 291 * [15:8] Maximum Non-Turbo Ratio 292 * [28] Programmable Ratio Limit for Turbo Mode 293 * [29] Programmable TDC-TDP Limit for Turbo Mode 294 * [47:40] Maximum Efficiency Ratio 295 * 296 * The other bits can be safely set to 0 on all 297 * micro-architectures up to Haswell. 298 */ 299 platform_info = (ratio << 8) | (ratio << 40); 300 301 /* 302 * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is 303 * dependent on the maximum cores per package supported by the micro- 304 * architecture. For e.g., Westmere supports 6 cores per package and 305 * uses the low 48 bits. Sandybridge support 8 cores per package and 306 * uses up all 64 bits. 307 * 308 * However, the unused bits are reserved so we pretend that all bits 309 * in this MSR are valid. 310 */ 311 for (i = 0; i < 8; i++) 312 turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio; 313 } 314 315 void 316 vmx_msr_guest_init(struct vmx *vmx, int vcpuid) 317 { 318 uint64_t *guest_msrs; 319 320 guest_msrs = vmx->guest_msrs[vcpuid]; 321 322 /* 323 * The permissions bitmap is shared between all vcpus so initialize it 324 * once when initializing the vBSP. 325 */ 326 if (vcpuid == 0) { 327 guest_msr_rw(vmx, MSR_LSTAR); 328 guest_msr_rw(vmx, MSR_CSTAR); 329 guest_msr_rw(vmx, MSR_STAR); 330 guest_msr_rw(vmx, MSR_SF_MASK); 331 guest_msr_rw(vmx, MSR_KGSBASE); 332 } 333 334 /* 335 * Initialize guest IA32_PAT MSR with default value after reset. 336 */ 337 guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) | 338 PAT_VALUE(1, PAT_WRITE_THROUGH) | 339 PAT_VALUE(2, PAT_UNCACHED) | 340 PAT_VALUE(3, PAT_UNCACHEABLE) | 341 PAT_VALUE(4, PAT_WRITE_BACK) | 342 PAT_VALUE(5, PAT_WRITE_THROUGH) | 343 PAT_VALUE(6, PAT_UNCACHED) | 344 PAT_VALUE(7, PAT_UNCACHEABLE); 345 346 return; 347 } 348 349 void 350 vmx_msr_guest_enter(struct vmx *vmx, int vcpuid) 351 { 352 uint64_t *guest_msrs = vmx->guest_msrs[vcpuid]; 353 354 /* Save host MSRs (in particular, KGSBASE) and restore guest MSRs */ 355 update_pcb_bases(curpcb); 356 wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]); 357 wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]); 358 wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]); 359 wrmsr(MSR_SF_MASK, guest_msrs[IDX_MSR_SF_MASK]); 360 wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]); 361 } 362 363 void 364 vmx_msr_guest_exit(struct vmx *vmx, int vcpuid) 365 { 366 uint64_t *guest_msrs = vmx->guest_msrs[vcpuid]; 367 368 /* Save guest MSRs */ 369 guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR); 370 guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR); 371 guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR); 372 guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK); 373 guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE); 374 375 /* Restore host MSRs */ 376 wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]); 377 wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]); 378 wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]); 379 wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]); 380 381 /* MSR_KGSBASE will be restored on the way back to userspace */ 382 } 383 384 int 385 vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu) 386 { 387 const uint64_t *guest_msrs; 388 int error; 389 390 guest_msrs = vmx->guest_msrs[vcpuid]; 391 error = 0; 392 393 switch (num) { 394 case MSR_MCG_CAP: 395 case MSR_MCG_STATUS: 396 *val = 0; 397 break; 398 case MSR_MTRRcap: 399 case MSR_MTRRdefType: 400 case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8: 401 case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: 402 case MSR_MTRR64kBase: 403 *val = 0; 404 break; 405 case MSR_IA32_MISC_ENABLE: 406 *val = misc_enable; 407 break; 408 case MSR_PLATFORM_INFO: 409 *val = platform_info; 410 break; 411 case MSR_TURBO_RATIO_LIMIT: 412 case MSR_TURBO_RATIO_LIMIT1: 413 *val = turbo_ratio_limit; 414 break; 415 case MSR_PAT: 416 *val = guest_msrs[IDX_MSR_PAT]; 417 break; 418 default: 419 error = EINVAL; 420 break; 421 } 422 return (error); 423 } 424 425 int 426 vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu) 427 { 428 uint64_t *guest_msrs; 429 uint64_t changed; 430 int error; 431 432 guest_msrs = vmx->guest_msrs[vcpuid]; 433 error = 0; 434 435 switch (num) { 436 case MSR_MCG_CAP: 437 case MSR_MCG_STATUS: 438 break; /* ignore writes */ 439 case MSR_MTRRcap: 440 vm_inject_gp(vmx->vm, vcpuid); 441 break; 442 case MSR_MTRRdefType: 443 case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8: 444 case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: 445 case MSR_MTRR64kBase: 446 break; /* Ignore writes */ 447 case MSR_IA32_MISC_ENABLE: 448 changed = val ^ misc_enable; 449 /* 450 * If the host has disabled the NX feature then the guest 451 * also cannot use it. However, a Linux guest will try to 452 * enable the NX feature by writing to the MISC_ENABLE MSR. 453 * 454 * This can be safely ignored because the memory management 455 * code looks at CPUID.80000001H:EDX.NX to check if the 456 * functionality is actually enabled. 457 */ 458 changed &= ~(1UL << 34); 459 460 /* 461 * Punt to userspace if any other bits are being modified. 462 */ 463 if (changed) 464 error = EINVAL; 465 466 break; 467 case MSR_PAT: 468 if (pat_valid(val)) 469 guest_msrs[IDX_MSR_PAT] = val; 470 else 471 vm_inject_gp(vmx->vm, vcpuid); 472 break; 473 case MSR_TSC: 474 error = vmx_set_tsc_offset(vmx, vcpuid, val - rdtsc()); 475 break; 476 default: 477 error = EINVAL; 478 break; 479 } 480 481 return (error); 482 } 483