xref: /freebsd/sys/amd64/vmm/intel/vmx_msr.c (revision 95ee2897e98f5d444f26ed2334cc7c439f9c16c6)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/proc.h>
35 
36 #include <machine/clock.h>
37 #include <machine/cpufunc.h>
38 #include <machine/md_var.h>
39 #include <machine/pcb.h>
40 #include <machine/specialreg.h>
41 #include <machine/vmm.h>
42 
43 #include "vmx.h"
44 #include "vmx_msr.h"
45 #include "x86.h"
46 
47 static bool
48 vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
49 {
50 
51 	return ((msr_val & (1UL << (bitpos + 32))) != 0);
52 }
53 
54 static bool
55 vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
56 {
57 
58 	return ((msr_val & (1UL << bitpos)) == 0);
59 }
60 
61 uint32_t
62 vmx_revision(void)
63 {
64 
65 	return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
66 }
67 
68 /*
69  * Generate a bitmask to be used for the VMCS execution control fields.
70  *
71  * The caller specifies what bits should be set to one in 'ones_mask'
72  * and what bits should be set to zero in 'zeros_mask'. The don't-care
73  * bits are set to the default value. The default values are obtained
74  * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining
75  * VMX Capabilities".
76  *
77  * Returns zero on success and non-zero on error.
78  */
79 int
80 vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
81 	       uint32_t zeros_mask, uint32_t *retval)
82 {
83 	int i;
84 	uint64_t val, trueval;
85 	bool true_ctls_avail, one_allowed, zero_allowed;
86 
87 	/* We cannot ask the same bit to be set to both '1' and '0' */
88 	if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask))
89 		return (EINVAL);
90 
91 	true_ctls_avail = (rdmsr(MSR_VMX_BASIC) & (1UL << 55)) != 0;
92 
93 	val = rdmsr(ctl_reg);
94 	if (true_ctls_avail)
95 		trueval = rdmsr(true_ctl_reg);		/* step c */
96 	else
97 		trueval = val;				/* step a */
98 
99 	for (i = 0; i < 32; i++) {
100 		one_allowed = vmx_ctl_allows_one_setting(trueval, i);
101 		zero_allowed = vmx_ctl_allows_zero_setting(trueval, i);
102 
103 		KASSERT(one_allowed || zero_allowed,
104 			("invalid zero/one setting for bit %d of ctl 0x%0x, "
105 			 "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg));
106 
107 		if (zero_allowed && !one_allowed) {		/* b(i),c(i) */
108 			if (ones_mask & (1 << i))
109 				return (EINVAL);
110 			*retval &= ~(1 << i);
111 		} else if (one_allowed && !zero_allowed) {	/* b(i),c(i) */
112 			if (zeros_mask & (1 << i))
113 				return (EINVAL);
114 			*retval |= 1 << i;
115 		} else {
116 			if (zeros_mask & (1 << i))	/* b(ii),c(ii) */
117 				*retval &= ~(1 << i);
118 			else if (ones_mask & (1 << i)) /* b(ii), c(ii) */
119 				*retval |= 1 << i;
120 			else if (!true_ctls_avail)
121 				*retval &= ~(1 << i);	/* b(iii) */
122 			else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/
123 				*retval &= ~(1 << i);
124 			else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */
125 				*retval |= 1 << i;
126 			else {
127 				panic("vmx_set_ctlreg: unable to determine "
128 				      "correct value of ctl bit %d for msr "
129 				      "0x%0x and true msr 0x%0x", i, ctl_reg,
130 				      true_ctl_reg);
131 			}
132 		}
133 	}
134 
135 	return (0);
136 }
137 
138 void
139 msr_bitmap_initialize(char *bitmap)
140 {
141 
142 	memset(bitmap, 0xff, PAGE_SIZE);
143 }
144 
145 int
146 msr_bitmap_change_access(char *bitmap, u_int msr, int access)
147 {
148 	int byte, bit;
149 
150 	if (msr <= 0x00001FFF)
151 		byte = msr / 8;
152 	else if (msr >= 0xC0000000 && msr <= 0xC0001FFF)
153 		byte = 1024 + (msr - 0xC0000000) / 8;
154 	else
155 		return (EINVAL);
156 
157 	bit = msr & 0x7;
158 
159 	if (access & MSR_BITMAP_ACCESS_READ)
160 		bitmap[byte] &= ~(1 << bit);
161 	else
162 		bitmap[byte] |= 1 << bit;
163 
164 	byte += 2048;
165 	if (access & MSR_BITMAP_ACCESS_WRITE)
166 		bitmap[byte] &= ~(1 << bit);
167 	else
168 		bitmap[byte] |= 1 << bit;
169 
170 	return (0);
171 }
172 
173 static uint64_t misc_enable;
174 static uint64_t platform_info;
175 static uint64_t turbo_ratio_limit;
176 static uint64_t host_msrs[GUEST_MSR_NUM];
177 
178 static bool
179 nehalem_cpu(void)
180 {
181 	u_int family, model;
182 
183 	/*
184 	 * The family:model numbers belonging to the Nehalem microarchitecture
185 	 * are documented in Section 35.5, Intel SDM dated Feb 2014.
186 	 */
187 	family = CPUID_TO_FAMILY(cpu_id);
188 	model = CPUID_TO_MODEL(cpu_id);
189 	if (family == 0x6) {
190 		switch (model) {
191 		case 0x1A:
192 		case 0x1E:
193 		case 0x1F:
194 		case 0x2E:
195 			return (true);
196 		default:
197 			break;
198 		}
199 	}
200 	return (false);
201 }
202 
203 static bool
204 westmere_cpu(void)
205 {
206 	u_int family, model;
207 
208 	/*
209 	 * The family:model numbers belonging to the Westmere microarchitecture
210 	 * are documented in Section 35.6, Intel SDM dated Feb 2014.
211 	 */
212 	family = CPUID_TO_FAMILY(cpu_id);
213 	model = CPUID_TO_MODEL(cpu_id);
214 	if (family == 0x6) {
215 		switch (model) {
216 		case 0x25:
217 		case 0x2C:
218 			return (true);
219 		default:
220 			break;
221 		}
222 	}
223 	return (false);
224 }
225 
226 static bool
227 pat_valid(uint64_t val)
228 {
229 	int i, pa;
230 
231 	/*
232 	 * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT"
233 	 *
234 	 * Extract PA0 through PA7 and validate that each one encodes a
235 	 * valid memory type.
236 	 */
237 	for (i = 0; i < 8; i++) {
238 		pa = (val >> (i * 8)) & 0xff;
239 		if (pa == 2 || pa == 3 || pa >= 8)
240 			return (false);
241 	}
242 	return (true);
243 }
244 
245 void
246 vmx_msr_init(void)
247 {
248 	uint64_t bus_freq, ratio;
249 	int i;
250 
251 	/*
252 	 * It is safe to cache the values of the following MSRs because
253 	 * they don't change based on curcpu, curproc or curthread.
254 	 */
255 	host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
256 	host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
257 	host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
258 	host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
259 
260 	/*
261 	 * Initialize emulated MSRs
262 	 */
263 	misc_enable = rdmsr(MSR_IA32_MISC_ENABLE);
264 	/*
265 	 * Set mandatory bits
266 	 *  11:   branch trace disabled
267 	 *  12:   PEBS unavailable
268 	 * Clear unsupported features
269 	 *  16:   SpeedStep enable
270 	 *  18:   enable MONITOR FSM
271 	 */
272 	misc_enable |= (1 << 12) | (1 << 11);
273 	misc_enable &= ~((1 << 18) | (1 << 16));
274 
275 	if (nehalem_cpu() || westmere_cpu())
276 		bus_freq = 133330000;		/* 133Mhz */
277 	else
278 		bus_freq = 100000000;		/* 100Mhz */
279 
280 	/*
281 	 * XXXtime
282 	 * The ratio should really be based on the virtual TSC frequency as
283 	 * opposed to the host TSC.
284 	 */
285 	ratio = (tsc_freq / bus_freq) & 0xff;
286 
287 	/*
288 	 * The register definition is based on the micro-architecture
289 	 * but the following bits are always the same:
290 	 * [15:8]  Maximum Non-Turbo Ratio
291 	 * [28]    Programmable Ratio Limit for Turbo Mode
292 	 * [29]    Programmable TDC-TDP Limit for Turbo Mode
293 	 * [47:40] Maximum Efficiency Ratio
294 	 *
295 	 * The other bits can be safely set to 0 on all
296 	 * micro-architectures up to Haswell.
297 	 */
298 	platform_info = (ratio << 8) | (ratio << 40);
299 
300 	/*
301 	 * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is
302 	 * dependent on the maximum cores per package supported by the micro-
303 	 * architecture. For e.g., Westmere supports 6 cores per package and
304 	 * uses the low 48 bits. Sandybridge support 8 cores per package and
305 	 * uses up all 64 bits.
306 	 *
307 	 * However, the unused bits are reserved so we pretend that all bits
308 	 * in this MSR are valid.
309 	 */
310 	for (i = 0; i < 8; i++)
311 		turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio;
312 }
313 
314 void
315 vmx_msr_guest_init(struct vmx *vmx, struct vmx_vcpu *vcpu)
316 {
317 	/*
318 	 * The permissions bitmap is shared between all vcpus so initialize it
319 	 * once when initializing the vBSP.
320 	 */
321 	if (vcpu->vcpuid == 0) {
322 		guest_msr_rw(vmx, MSR_LSTAR);
323 		guest_msr_rw(vmx, MSR_CSTAR);
324 		guest_msr_rw(vmx, MSR_STAR);
325 		guest_msr_rw(vmx, MSR_SF_MASK);
326 		guest_msr_rw(vmx, MSR_KGSBASE);
327 	}
328 
329 	/*
330 	 * Initialize guest IA32_PAT MSR with default value after reset.
331 	 */
332 	vcpu->guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) |
333 	    PAT_VALUE(1, PAT_WRITE_THROUGH)	|
334 	    PAT_VALUE(2, PAT_UNCACHED)		|
335 	    PAT_VALUE(3, PAT_UNCACHEABLE)	|
336 	    PAT_VALUE(4, PAT_WRITE_BACK)	|
337 	    PAT_VALUE(5, PAT_WRITE_THROUGH)	|
338 	    PAT_VALUE(6, PAT_UNCACHED)		|
339 	    PAT_VALUE(7, PAT_UNCACHEABLE);
340 
341 	return;
342 }
343 
344 void
345 vmx_msr_guest_enter(struct vmx_vcpu *vcpu)
346 {
347 
348 	/* Save host MSRs (in particular, KGSBASE) and restore guest MSRs */
349 	update_pcb_bases(curpcb);
350 	wrmsr(MSR_LSTAR, vcpu->guest_msrs[IDX_MSR_LSTAR]);
351 	wrmsr(MSR_CSTAR, vcpu->guest_msrs[IDX_MSR_CSTAR]);
352 	wrmsr(MSR_STAR, vcpu->guest_msrs[IDX_MSR_STAR]);
353 	wrmsr(MSR_SF_MASK, vcpu->guest_msrs[IDX_MSR_SF_MASK]);
354 	wrmsr(MSR_KGSBASE, vcpu->guest_msrs[IDX_MSR_KGSBASE]);
355 }
356 
357 void
358 vmx_msr_guest_enter_tsc_aux(struct vmx *vmx, struct vmx_vcpu *vcpu)
359 {
360 	uint64_t guest_tsc_aux = vcpu->guest_msrs[IDX_MSR_TSC_AUX];
361 	uint32_t host_aux = cpu_auxmsr();
362 
363 	if (vmx_have_msr_tsc_aux && guest_tsc_aux != host_aux)
364 		wrmsr(MSR_TSC_AUX, guest_tsc_aux);
365 }
366 
367 void
368 vmx_msr_guest_exit(struct vmx_vcpu *vcpu)
369 {
370 
371 	/* Save guest MSRs */
372 	vcpu->guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
373 	vcpu->guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
374 	vcpu->guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
375 	vcpu->guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
376 	vcpu->guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE);
377 
378 	/* Restore host MSRs */
379 	wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]);
380 	wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]);
381 	wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]);
382 	wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]);
383 
384 	/* MSR_KGSBASE will be restored on the way back to userspace */
385 }
386 
387 void
388 vmx_msr_guest_exit_tsc_aux(struct vmx *vmx, struct vmx_vcpu *vcpu)
389 {
390 	uint64_t guest_tsc_aux = vcpu->guest_msrs[IDX_MSR_TSC_AUX];
391 	uint32_t host_aux = cpu_auxmsr();
392 
393 	if (vmx_have_msr_tsc_aux && guest_tsc_aux != host_aux)
394 		/*
395 		 * Note that it is not necessary to save the guest value
396 		 * here; vcpu->guest_msrs[IDX_MSR_TSC_AUX] always
397 		 * contains the current value since it is updated whenever
398 		 * the guest writes to it (which is expected to be very
399 		 * rare).
400 		 */
401 		wrmsr(MSR_TSC_AUX, host_aux);
402 }
403 
404 int
405 vmx_rdmsr(struct vmx_vcpu *vcpu, u_int num, uint64_t *val, bool *retu)
406 {
407 	int error;
408 
409 	error = 0;
410 
411 	switch (num) {
412 	case MSR_MCG_CAP:
413 	case MSR_MCG_STATUS:
414 		*val = 0;
415 		break;
416 	case MSR_MTRRcap:
417 	case MSR_MTRRdefType:
418 	case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7:
419 	case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
420 	case MSR_MTRR64kBase:
421 	case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1:
422 		if (vm_rdmtrr(&vcpu->mtrr, num, val) != 0) {
423 			vm_inject_gp(vcpu->vcpu);
424 		}
425 		break;
426 	case MSR_IA32_MISC_ENABLE:
427 		*val = misc_enable;
428 		break;
429 	case MSR_PLATFORM_INFO:
430 		*val = platform_info;
431 		break;
432 	case MSR_TURBO_RATIO_LIMIT:
433 	case MSR_TURBO_RATIO_LIMIT1:
434 		*val = turbo_ratio_limit;
435 		break;
436 	case MSR_PAT:
437 		*val = vcpu->guest_msrs[IDX_MSR_PAT];
438 		break;
439 	default:
440 		error = EINVAL;
441 		break;
442 	}
443 	return (error);
444 }
445 
446 int
447 vmx_wrmsr(struct vmx_vcpu *vcpu, u_int num, uint64_t val, bool *retu)
448 {
449 	uint64_t changed;
450 	int error;
451 
452 	error = 0;
453 
454 	switch (num) {
455 	case MSR_MCG_CAP:
456 	case MSR_MCG_STATUS:
457 		break;		/* ignore writes */
458 	case MSR_MTRRcap:
459 	case MSR_MTRRdefType:
460 	case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7:
461 	case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
462 	case MSR_MTRR64kBase:
463 	case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1:
464 		if (vm_wrmtrr(&vcpu->mtrr, num, val) != 0) {
465 			vm_inject_gp(vcpu->vcpu);
466 		}
467 		break;
468 	case MSR_IA32_MISC_ENABLE:
469 		changed = val ^ misc_enable;
470 		/*
471 		 * If the host has disabled the NX feature then the guest
472 		 * also cannot use it. However, a Linux guest will try to
473 		 * enable the NX feature by writing to the MISC_ENABLE MSR.
474 		 *
475 		 * This can be safely ignored because the memory management
476 		 * code looks at CPUID.80000001H:EDX.NX to check if the
477 		 * functionality is actually enabled.
478 		 */
479 		changed &= ~(1UL << 34);
480 
481 		/*
482 		 * Punt to userspace if any other bits are being modified.
483 		 */
484 		if (changed)
485 			error = EINVAL;
486 
487 		break;
488 	case MSR_PAT:
489 		if (pat_valid(val))
490 			vcpu->guest_msrs[IDX_MSR_PAT] = val;
491 		else
492 			vm_inject_gp(vcpu->vcpu);
493 		break;
494 	case MSR_TSC:
495 		error = vmx_set_tsc_offset(vcpu, val - rdtsc());
496 		break;
497 	case MSR_TSC_AUX:
498 		if (vmx_have_msr_tsc_aux)
499 			/*
500 			 * vmx_msr_guest_enter_tsc_aux() will apply this
501 			 * value when it is called immediately before guest
502 			 * entry.
503 			 */
504 			vcpu->guest_msrs[IDX_MSR_TSC_AUX] = val;
505 		else
506 			vm_inject_gp(vcpu->vcpu);
507 		break;
508 	default:
509 		error = EINVAL;
510 		break;
511 	}
512 
513 	return (error);
514 }
515