xref: /freebsd/sys/amd64/vmm/intel/vmx_msr.c (revision d3d381b2b194b4d24853e92eecef55f262688d1a)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/proc.h>
37 
38 #include <machine/clock.h>
39 #include <machine/cpufunc.h>
40 #include <machine/md_var.h>
41 #include <machine/pcb.h>
42 #include <machine/specialreg.h>
43 #include <machine/vmm.h>
44 
45 #include "vmx.h"
46 #include "vmx_msr.h"
47 
48 static boolean_t
49 vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
50 {
51 
52 	if (msr_val & (1UL << (bitpos + 32)))
53 		return (TRUE);
54 	else
55 		return (FALSE);
56 }
57 
58 static boolean_t
59 vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
60 {
61 
62 	if ((msr_val & (1UL << bitpos)) == 0)
63 		return (TRUE);
64 	else
65 		return (FALSE);
66 }
67 
68 uint32_t
69 vmx_revision(void)
70 {
71 
72 	return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
73 }
74 
75 /*
76  * Generate a bitmask to be used for the VMCS execution control fields.
77  *
78  * The caller specifies what bits should be set to one in 'ones_mask'
79  * and what bits should be set to zero in 'zeros_mask'. The don't-care
80  * bits are set to the default value. The default values are obtained
81  * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining
82  * VMX Capabilities".
83  *
84  * Returns zero on success and non-zero on error.
85  */
86 int
87 vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
88 	       uint32_t zeros_mask, uint32_t *retval)
89 {
90 	int i;
91 	uint64_t val, trueval;
92 	boolean_t true_ctls_avail, one_allowed, zero_allowed;
93 
94 	/* We cannot ask the same bit to be set to both '1' and '0' */
95 	if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask))
96 		return (EINVAL);
97 
98 	if (rdmsr(MSR_VMX_BASIC) & (1UL << 55))
99 		true_ctls_avail = TRUE;
100 	else
101 		true_ctls_avail = FALSE;
102 
103 	val = rdmsr(ctl_reg);
104 	if (true_ctls_avail)
105 		trueval = rdmsr(true_ctl_reg);		/* step c */
106 	else
107 		trueval = val;				/* step a */
108 
109 	for (i = 0; i < 32; i++) {
110 		one_allowed = vmx_ctl_allows_one_setting(trueval, i);
111 		zero_allowed = vmx_ctl_allows_zero_setting(trueval, i);
112 
113 		KASSERT(one_allowed || zero_allowed,
114 			("invalid zero/one setting for bit %d of ctl 0x%0x, "
115 			 "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg));
116 
117 		if (zero_allowed && !one_allowed) {		/* b(i),c(i) */
118 			if (ones_mask & (1 << i))
119 				return (EINVAL);
120 			*retval &= ~(1 << i);
121 		} else if (one_allowed && !zero_allowed) {	/* b(i),c(i) */
122 			if (zeros_mask & (1 << i))
123 				return (EINVAL);
124 			*retval |= 1 << i;
125 		} else {
126 			if (zeros_mask & (1 << i))	/* b(ii),c(ii) */
127 				*retval &= ~(1 << i);
128 			else if (ones_mask & (1 << i)) /* b(ii), c(ii) */
129 				*retval |= 1 << i;
130 			else if (!true_ctls_avail)
131 				*retval &= ~(1 << i);	/* b(iii) */
132 			else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/
133 				*retval &= ~(1 << i);
134 			else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */
135 				*retval |= 1 << i;
136 			else {
137 				panic("vmx_set_ctlreg: unable to determine "
138 				      "correct value of ctl bit %d for msr "
139 				      "0x%0x and true msr 0x%0x", i, ctl_reg,
140 				      true_ctl_reg);
141 			}
142 		}
143 	}
144 
145 	return (0);
146 }
147 
148 void
149 msr_bitmap_initialize(char *bitmap)
150 {
151 
152 	memset(bitmap, 0xff, PAGE_SIZE);
153 }
154 
155 int
156 msr_bitmap_change_access(char *bitmap, u_int msr, int access)
157 {
158 	int byte, bit;
159 
160 	if (msr <= 0x00001FFF)
161 		byte = msr / 8;
162 	else if (msr >= 0xC0000000 && msr <= 0xC0001FFF)
163 		byte = 1024 + (msr - 0xC0000000) / 8;
164 	else
165 		return (EINVAL);
166 
167 	bit = msr & 0x7;
168 
169 	if (access & MSR_BITMAP_ACCESS_READ)
170 		bitmap[byte] &= ~(1 << bit);
171 	else
172 		bitmap[byte] |= 1 << bit;
173 
174 	byte += 2048;
175 	if (access & MSR_BITMAP_ACCESS_WRITE)
176 		bitmap[byte] &= ~(1 << bit);
177 	else
178 		bitmap[byte] |= 1 << bit;
179 
180 	return (0);
181 }
182 
183 static uint64_t misc_enable;
184 static uint64_t platform_info;
185 static uint64_t turbo_ratio_limit;
186 static uint64_t host_msrs[GUEST_MSR_NUM];
187 
188 static bool
189 nehalem_cpu(void)
190 {
191 	u_int family, model;
192 
193 	/*
194 	 * The family:model numbers belonging to the Nehalem microarchitecture
195 	 * are documented in Section 35.5, Intel SDM dated Feb 2014.
196 	 */
197 	family = CPUID_TO_FAMILY(cpu_id);
198 	model = CPUID_TO_MODEL(cpu_id);
199 	if (family == 0x6) {
200 		switch (model) {
201 		case 0x1A:
202 		case 0x1E:
203 		case 0x1F:
204 		case 0x2E:
205 			return (true);
206 		default:
207 			break;
208 		}
209 	}
210 	return (false);
211 }
212 
213 static bool
214 westmere_cpu(void)
215 {
216 	u_int family, model;
217 
218 	/*
219 	 * The family:model numbers belonging to the Westmere microarchitecture
220 	 * are documented in Section 35.6, Intel SDM dated Feb 2014.
221 	 */
222 	family = CPUID_TO_FAMILY(cpu_id);
223 	model = CPUID_TO_MODEL(cpu_id);
224 	if (family == 0x6) {
225 		switch (model) {
226 		case 0x25:
227 		case 0x2C:
228 			return (true);
229 		default:
230 			break;
231 		}
232 	}
233 	return (false);
234 }
235 
236 static bool
237 pat_valid(uint64_t val)
238 {
239 	int i, pa;
240 
241 	/*
242 	 * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT"
243 	 *
244 	 * Extract PA0 through PA7 and validate that each one encodes a
245 	 * valid memory type.
246 	 */
247 	for (i = 0; i < 8; i++) {
248 		pa = (val >> (i * 8)) & 0xff;
249 		if (pa == 2 || pa == 3 || pa >= 8)
250 			return (false);
251 	}
252 	return (true);
253 }
254 
255 void
256 vmx_msr_init(void)
257 {
258 	uint64_t bus_freq, ratio;
259 	int i;
260 
261 	/*
262 	 * It is safe to cache the values of the following MSRs because
263 	 * they don't change based on curcpu, curproc or curthread.
264 	 */
265 	host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
266 	host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
267 	host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
268 	host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
269 
270 	/*
271 	 * Initialize emulated MSRs
272 	 */
273 	misc_enable = rdmsr(MSR_IA32_MISC_ENABLE);
274 	/*
275 	 * Set mandatory bits
276 	 *  11:   branch trace disabled
277 	 *  12:   PEBS unavailable
278 	 * Clear unsupported features
279 	 *  16:   SpeedStep enable
280 	 *  18:   enable MONITOR FSM
281 	 */
282 	misc_enable |= (1 << 12) | (1 << 11);
283 	misc_enable &= ~((1 << 18) | (1 << 16));
284 
285 	if (nehalem_cpu() || westmere_cpu())
286 		bus_freq = 133330000;		/* 133Mhz */
287 	else
288 		bus_freq = 100000000;		/* 100Mhz */
289 
290 	/*
291 	 * XXXtime
292 	 * The ratio should really be based on the virtual TSC frequency as
293 	 * opposed to the host TSC.
294 	 */
295 	ratio = (tsc_freq / bus_freq) & 0xff;
296 
297 	/*
298 	 * The register definition is based on the micro-architecture
299 	 * but the following bits are always the same:
300 	 * [15:8]  Maximum Non-Turbo Ratio
301 	 * [28]    Programmable Ratio Limit for Turbo Mode
302 	 * [29]    Programmable TDC-TDP Limit for Turbo Mode
303 	 * [47:40] Maximum Efficiency Ratio
304 	 *
305 	 * The other bits can be safely set to 0 on all
306 	 * micro-architectures up to Haswell.
307 	 */
308 	platform_info = (ratio << 8) | (ratio << 40);
309 
310 	/*
311 	 * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is
312 	 * dependent on the maximum cores per package supported by the micro-
313 	 * architecture. For e.g., Westmere supports 6 cores per package and
314 	 * uses the low 48 bits. Sandybridge support 8 cores per package and
315 	 * uses up all 64 bits.
316 	 *
317 	 * However, the unused bits are reserved so we pretend that all bits
318 	 * in this MSR are valid.
319 	 */
320 	for (i = 0; i < 8; i++)
321 		turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio;
322 }
323 
324 void
325 vmx_msr_guest_init(struct vmx *vmx, int vcpuid)
326 {
327 	uint64_t *guest_msrs;
328 
329 	guest_msrs = vmx->guest_msrs[vcpuid];
330 
331 	/*
332 	 * The permissions bitmap is shared between all vcpus so initialize it
333 	 * once when initializing the vBSP.
334 	 */
335 	if (vcpuid == 0) {
336 		guest_msr_rw(vmx, MSR_LSTAR);
337 		guest_msr_rw(vmx, MSR_CSTAR);
338 		guest_msr_rw(vmx, MSR_STAR);
339 		guest_msr_rw(vmx, MSR_SF_MASK);
340 		guest_msr_rw(vmx, MSR_KGSBASE);
341 	}
342 
343 	/*
344 	 * Initialize guest IA32_PAT MSR with default value after reset.
345 	 */
346 	guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) |
347 	    PAT_VALUE(1, PAT_WRITE_THROUGH)	|
348 	    PAT_VALUE(2, PAT_UNCACHED)		|
349 	    PAT_VALUE(3, PAT_UNCACHEABLE)	|
350 	    PAT_VALUE(4, PAT_WRITE_BACK)	|
351 	    PAT_VALUE(5, PAT_WRITE_THROUGH)	|
352 	    PAT_VALUE(6, PAT_UNCACHED)		|
353 	    PAT_VALUE(7, PAT_UNCACHEABLE);
354 
355 	return;
356 }
357 
358 void
359 vmx_msr_guest_enter(struct vmx *vmx, int vcpuid)
360 {
361 	uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
362 
363 	/* Save host MSRs (in particular, KGSBASE) and restore guest MSRs */
364 	update_pcb_bases(curpcb);
365 	wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]);
366 	wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]);
367 	wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]);
368 	wrmsr(MSR_SF_MASK, guest_msrs[IDX_MSR_SF_MASK]);
369 	wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]);
370 }
371 
372 void
373 vmx_msr_guest_exit(struct vmx *vmx, int vcpuid)
374 {
375 	uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
376 
377 	/* Save guest MSRs */
378 	guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
379 	guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
380 	guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
381 	guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
382 	guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE);
383 
384 	/* Restore host MSRs */
385 	wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]);
386 	wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]);
387 	wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]);
388 	wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]);
389 
390 	/* MSR_KGSBASE will be restored on the way back to userspace */
391 }
392 
393 int
394 vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
395 {
396 	const uint64_t *guest_msrs;
397 	int error;
398 
399 	guest_msrs = vmx->guest_msrs[vcpuid];
400 	error = 0;
401 
402 	switch (num) {
403 	case MSR_MCG_CAP:
404 	case MSR_MCG_STATUS:
405 		*val = 0;
406 		break;
407 	case MSR_MTRRcap:
408 	case MSR_MTRRdefType:
409 	case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
410 	case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
411 	case MSR_MTRR64kBase:
412 		*val = 0;
413 		break;
414 	case MSR_IA32_MISC_ENABLE:
415 		*val = misc_enable;
416 		break;
417 	case MSR_PLATFORM_INFO:
418 		*val = platform_info;
419 		break;
420 	case MSR_TURBO_RATIO_LIMIT:
421 	case MSR_TURBO_RATIO_LIMIT1:
422 		*val = turbo_ratio_limit;
423 		break;
424 	case MSR_PAT:
425 		*val = guest_msrs[IDX_MSR_PAT];
426 		break;
427 	default:
428 		error = EINVAL;
429 		break;
430 	}
431 	return (error);
432 }
433 
434 int
435 vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
436 {
437 	uint64_t *guest_msrs;
438 	uint64_t changed;
439 	int error;
440 
441 	guest_msrs = vmx->guest_msrs[vcpuid];
442 	error = 0;
443 
444 	switch (num) {
445 	case MSR_MCG_CAP:
446 	case MSR_MCG_STATUS:
447 		break;		/* ignore writes */
448 	case MSR_MTRRcap:
449 		vm_inject_gp(vmx->vm, vcpuid);
450 		break;
451 	case MSR_MTRRdefType:
452 	case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
453 	case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
454 	case MSR_MTRR64kBase:
455 		break;		/* Ignore writes */
456 	case MSR_IA32_MISC_ENABLE:
457 		changed = val ^ misc_enable;
458 		/*
459 		 * If the host has disabled the NX feature then the guest
460 		 * also cannot use it. However, a Linux guest will try to
461 		 * enable the NX feature by writing to the MISC_ENABLE MSR.
462 		 *
463 		 * This can be safely ignored because the memory management
464 		 * code looks at CPUID.80000001H:EDX.NX to check if the
465 		 * functionality is actually enabled.
466 		 */
467 		changed &= ~(1UL << 34);
468 
469 		/*
470 		 * Punt to userspace if any other bits are being modified.
471 		 */
472 		if (changed)
473 			error = EINVAL;
474 
475 		break;
476 	case MSR_PAT:
477 		if (pat_valid(val))
478 			guest_msrs[IDX_MSR_PAT] = val;
479 		else
480 			vm_inject_gp(vmx->vm, vcpuid);
481 		break;
482 	case MSR_TSC:
483 		error = vmx_set_tsc_offset(vmx, vcpuid, val - rdtsc());
484 		break;
485 	default:
486 		error = EINVAL;
487 		break;
488 	}
489 
490 	return (error);
491 }
492