1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/proc.h> 37 38 #include <machine/clock.h> 39 #include <machine/cpufunc.h> 40 #include <machine/md_var.h> 41 #include <machine/pcb.h> 42 #include <machine/specialreg.h> 43 #include <machine/vmm.h> 44 45 #include "vmx.h" 46 #include "vmx_msr.h" 47 48 static boolean_t 49 vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos) 50 { 51 52 if (msr_val & (1UL << (bitpos + 32))) 53 return (TRUE); 54 else 55 return (FALSE); 56 } 57 58 static boolean_t 59 vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos) 60 { 61 62 if ((msr_val & (1UL << bitpos)) == 0) 63 return (TRUE); 64 else 65 return (FALSE); 66 } 67 68 uint32_t 69 vmx_revision(void) 70 { 71 72 return (rdmsr(MSR_VMX_BASIC) & 0xffffffff); 73 } 74 75 /* 76 * Generate a bitmask to be used for the VMCS execution control fields. 77 * 78 * The caller specifies what bits should be set to one in 'ones_mask' 79 * and what bits should be set to zero in 'zeros_mask'. The don't-care 80 * bits are set to the default value. The default values are obtained 81 * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining 82 * VMX Capabilities". 83 * 84 * Returns zero on success and non-zero on error. 85 */ 86 int 87 vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask, 88 uint32_t zeros_mask, uint32_t *retval) 89 { 90 int i; 91 uint64_t val, trueval; 92 boolean_t true_ctls_avail, one_allowed, zero_allowed; 93 94 /* We cannot ask the same bit to be set to both '1' and '0' */ 95 if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask)) 96 return (EINVAL); 97 98 if (rdmsr(MSR_VMX_BASIC) & (1UL << 55)) 99 true_ctls_avail = TRUE; 100 else 101 true_ctls_avail = FALSE; 102 103 val = rdmsr(ctl_reg); 104 if (true_ctls_avail) 105 trueval = rdmsr(true_ctl_reg); /* step c */ 106 else 107 trueval = val; /* step a */ 108 109 for (i = 0; i < 32; i++) { 110 one_allowed = vmx_ctl_allows_one_setting(trueval, i); 111 zero_allowed = vmx_ctl_allows_zero_setting(trueval, i); 112 113 KASSERT(one_allowed || zero_allowed, 114 ("invalid zero/one setting for bit %d of ctl 0x%0x, " 115 "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg)); 116 117 if (zero_allowed && !one_allowed) { /* b(i),c(i) */ 118 if (ones_mask & (1 << i)) 119 return (EINVAL); 120 *retval &= ~(1 << i); 121 } else if (one_allowed && !zero_allowed) { /* b(i),c(i) */ 122 if (zeros_mask & (1 << i)) 123 return (EINVAL); 124 *retval |= 1 << i; 125 } else { 126 if (zeros_mask & (1 << i)) /* b(ii),c(ii) */ 127 *retval &= ~(1 << i); 128 else if (ones_mask & (1 << i)) /* b(ii), c(ii) */ 129 *retval |= 1 << i; 130 else if (!true_ctls_avail) 131 *retval &= ~(1 << i); /* b(iii) */ 132 else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/ 133 *retval &= ~(1 << i); 134 else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */ 135 *retval |= 1 << i; 136 else { 137 panic("vmx_set_ctlreg: unable to determine " 138 "correct value of ctl bit %d for msr " 139 "0x%0x and true msr 0x%0x", i, ctl_reg, 140 true_ctl_reg); 141 } 142 } 143 } 144 145 return (0); 146 } 147 148 void 149 msr_bitmap_initialize(char *bitmap) 150 { 151 152 memset(bitmap, 0xff, PAGE_SIZE); 153 } 154 155 int 156 msr_bitmap_change_access(char *bitmap, u_int msr, int access) 157 { 158 int byte, bit; 159 160 if (msr <= 0x00001FFF) 161 byte = msr / 8; 162 else if (msr >= 0xC0000000 && msr <= 0xC0001FFF) 163 byte = 1024 + (msr - 0xC0000000) / 8; 164 else 165 return (EINVAL); 166 167 bit = msr & 0x7; 168 169 if (access & MSR_BITMAP_ACCESS_READ) 170 bitmap[byte] &= ~(1 << bit); 171 else 172 bitmap[byte] |= 1 << bit; 173 174 byte += 2048; 175 if (access & MSR_BITMAP_ACCESS_WRITE) 176 bitmap[byte] &= ~(1 << bit); 177 else 178 bitmap[byte] |= 1 << bit; 179 180 return (0); 181 } 182 183 static uint64_t misc_enable; 184 static uint64_t platform_info; 185 static uint64_t turbo_ratio_limit; 186 static uint64_t host_msrs[GUEST_MSR_NUM]; 187 188 static bool 189 nehalem_cpu(void) 190 { 191 u_int family, model; 192 193 /* 194 * The family:model numbers belonging to the Nehalem microarchitecture 195 * are documented in Section 35.5, Intel SDM dated Feb 2014. 196 */ 197 family = CPUID_TO_FAMILY(cpu_id); 198 model = CPUID_TO_MODEL(cpu_id); 199 if (family == 0x6) { 200 switch (model) { 201 case 0x1A: 202 case 0x1E: 203 case 0x1F: 204 case 0x2E: 205 return (true); 206 default: 207 break; 208 } 209 } 210 return (false); 211 } 212 213 static bool 214 westmere_cpu(void) 215 { 216 u_int family, model; 217 218 /* 219 * The family:model numbers belonging to the Westmere microarchitecture 220 * are documented in Section 35.6, Intel SDM dated Feb 2014. 221 */ 222 family = CPUID_TO_FAMILY(cpu_id); 223 model = CPUID_TO_MODEL(cpu_id); 224 if (family == 0x6) { 225 switch (model) { 226 case 0x25: 227 case 0x2C: 228 return (true); 229 default: 230 break; 231 } 232 } 233 return (false); 234 } 235 236 static bool 237 pat_valid(uint64_t val) 238 { 239 int i, pa; 240 241 /* 242 * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT" 243 * 244 * Extract PA0 through PA7 and validate that each one encodes a 245 * valid memory type. 246 */ 247 for (i = 0; i < 8; i++) { 248 pa = (val >> (i * 8)) & 0xff; 249 if (pa == 2 || pa == 3 || pa >= 8) 250 return (false); 251 } 252 return (true); 253 } 254 255 void 256 vmx_msr_init(void) 257 { 258 uint64_t bus_freq, ratio; 259 int i; 260 261 /* 262 * It is safe to cache the values of the following MSRs because 263 * they don't change based on curcpu, curproc or curthread. 264 */ 265 host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR); 266 host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR); 267 host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR); 268 host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK); 269 270 /* 271 * Initialize emulated MSRs 272 */ 273 misc_enable = rdmsr(MSR_IA32_MISC_ENABLE); 274 /* 275 * Set mandatory bits 276 * 11: branch trace disabled 277 * 12: PEBS unavailable 278 * Clear unsupported features 279 * 16: SpeedStep enable 280 * 18: enable MONITOR FSM 281 */ 282 misc_enable |= (1 << 12) | (1 << 11); 283 misc_enable &= ~((1 << 18) | (1 << 16)); 284 285 if (nehalem_cpu() || westmere_cpu()) 286 bus_freq = 133330000; /* 133Mhz */ 287 else 288 bus_freq = 100000000; /* 100Mhz */ 289 290 /* 291 * XXXtime 292 * The ratio should really be based on the virtual TSC frequency as 293 * opposed to the host TSC. 294 */ 295 ratio = (tsc_freq / bus_freq) & 0xff; 296 297 /* 298 * The register definition is based on the micro-architecture 299 * but the following bits are always the same: 300 * [15:8] Maximum Non-Turbo Ratio 301 * [28] Programmable Ratio Limit for Turbo Mode 302 * [29] Programmable TDC-TDP Limit for Turbo Mode 303 * [47:40] Maximum Efficiency Ratio 304 * 305 * The other bits can be safely set to 0 on all 306 * micro-architectures up to Haswell. 307 */ 308 platform_info = (ratio << 8) | (ratio << 40); 309 310 /* 311 * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is 312 * dependent on the maximum cores per package supported by the micro- 313 * architecture. For e.g., Westmere supports 6 cores per package and 314 * uses the low 48 bits. Sandybridge support 8 cores per package and 315 * uses up all 64 bits. 316 * 317 * However, the unused bits are reserved so we pretend that all bits 318 * in this MSR are valid. 319 */ 320 for (i = 0; i < 8; i++) 321 turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio; 322 } 323 324 void 325 vmx_msr_guest_init(struct vmx *vmx, int vcpuid) 326 { 327 uint64_t *guest_msrs; 328 329 guest_msrs = vmx->guest_msrs[vcpuid]; 330 331 /* 332 * The permissions bitmap is shared between all vcpus so initialize it 333 * once when initializing the vBSP. 334 */ 335 if (vcpuid == 0) { 336 guest_msr_rw(vmx, MSR_LSTAR); 337 guest_msr_rw(vmx, MSR_CSTAR); 338 guest_msr_rw(vmx, MSR_STAR); 339 guest_msr_rw(vmx, MSR_SF_MASK); 340 guest_msr_rw(vmx, MSR_KGSBASE); 341 } 342 343 /* 344 * Initialize guest IA32_PAT MSR with default value after reset. 345 */ 346 guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) | 347 PAT_VALUE(1, PAT_WRITE_THROUGH) | 348 PAT_VALUE(2, PAT_UNCACHED) | 349 PAT_VALUE(3, PAT_UNCACHEABLE) | 350 PAT_VALUE(4, PAT_WRITE_BACK) | 351 PAT_VALUE(5, PAT_WRITE_THROUGH) | 352 PAT_VALUE(6, PAT_UNCACHED) | 353 PAT_VALUE(7, PAT_UNCACHEABLE); 354 355 return; 356 } 357 358 void 359 vmx_msr_guest_enter(struct vmx *vmx, int vcpuid) 360 { 361 uint64_t *guest_msrs = vmx->guest_msrs[vcpuid]; 362 363 /* Save host MSRs (in particular, KGSBASE) and restore guest MSRs */ 364 update_pcb_bases(curpcb); 365 wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]); 366 wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]); 367 wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]); 368 wrmsr(MSR_SF_MASK, guest_msrs[IDX_MSR_SF_MASK]); 369 wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]); 370 } 371 372 void 373 vmx_msr_guest_exit(struct vmx *vmx, int vcpuid) 374 { 375 uint64_t *guest_msrs = vmx->guest_msrs[vcpuid]; 376 377 /* Save guest MSRs */ 378 guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR); 379 guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR); 380 guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR); 381 guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK); 382 guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE); 383 384 /* Restore host MSRs */ 385 wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]); 386 wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]); 387 wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]); 388 wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]); 389 390 /* MSR_KGSBASE will be restored on the way back to userspace */ 391 } 392 393 int 394 vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu) 395 { 396 const uint64_t *guest_msrs; 397 int error; 398 399 guest_msrs = vmx->guest_msrs[vcpuid]; 400 error = 0; 401 402 switch (num) { 403 case MSR_MCG_CAP: 404 case MSR_MCG_STATUS: 405 *val = 0; 406 break; 407 case MSR_MTRRcap: 408 case MSR_MTRRdefType: 409 case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8: 410 case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: 411 case MSR_MTRR64kBase: 412 *val = 0; 413 break; 414 case MSR_IA32_MISC_ENABLE: 415 *val = misc_enable; 416 break; 417 case MSR_PLATFORM_INFO: 418 *val = platform_info; 419 break; 420 case MSR_TURBO_RATIO_LIMIT: 421 case MSR_TURBO_RATIO_LIMIT1: 422 *val = turbo_ratio_limit; 423 break; 424 case MSR_PAT: 425 *val = guest_msrs[IDX_MSR_PAT]; 426 break; 427 default: 428 error = EINVAL; 429 break; 430 } 431 return (error); 432 } 433 434 int 435 vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu) 436 { 437 uint64_t *guest_msrs; 438 uint64_t changed; 439 int error; 440 441 guest_msrs = vmx->guest_msrs[vcpuid]; 442 error = 0; 443 444 switch (num) { 445 case MSR_MCG_CAP: 446 case MSR_MCG_STATUS: 447 break; /* ignore writes */ 448 case MSR_MTRRcap: 449 vm_inject_gp(vmx->vm, vcpuid); 450 break; 451 case MSR_MTRRdefType: 452 case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8: 453 case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: 454 case MSR_MTRR64kBase: 455 break; /* Ignore writes */ 456 case MSR_IA32_MISC_ENABLE: 457 changed = val ^ misc_enable; 458 /* 459 * If the host has disabled the NX feature then the guest 460 * also cannot use it. However, a Linux guest will try to 461 * enable the NX feature by writing to the MISC_ENABLE MSR. 462 * 463 * This can be safely ignored because the memory management 464 * code looks at CPUID.80000001H:EDX.NX to check if the 465 * functionality is actually enabled. 466 */ 467 changed &= ~(1UL << 34); 468 469 /* 470 * Punt to userspace if any other bits are being modified. 471 */ 472 if (changed) 473 error = EINVAL; 474 475 break; 476 case MSR_PAT: 477 if (pat_valid(val)) 478 guest_msrs[IDX_MSR_PAT] = val; 479 else 480 vm_inject_gp(vmx->vm, vcpuid); 481 break; 482 case MSR_TSC: 483 error = vmx_set_tsc_offset(vmx, vcpuid, val - rdtsc()); 484 break; 485 default: 486 error = EINVAL; 487 break; 488 } 489 490 return (error); 491 } 492