xref: /freebsd/sys/amd64/vmm/intel/vmx_msr.c (revision b4af4f93c682e445bf159f0d1ec90b636296c946)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/proc.h>
37 
38 #include <machine/clock.h>
39 #include <machine/cpufunc.h>
40 #include <machine/md_var.h>
41 #include <machine/pcb.h>
42 #include <machine/specialreg.h>
43 #include <machine/vmm.h>
44 
45 #include "vmx.h"
46 #include "vmx_msr.h"
47 
48 static bool
49 vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
50 {
51 
52 	return ((msr_val & (1UL << (bitpos + 32))) != 0);
53 }
54 
55 static bool
56 vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
57 {
58 
59 	return ((msr_val & (1UL << bitpos)) == 0);
60 }
61 
62 uint32_t
63 vmx_revision(void)
64 {
65 
66 	return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
67 }
68 
69 /*
70  * Generate a bitmask to be used for the VMCS execution control fields.
71  *
72  * The caller specifies what bits should be set to one in 'ones_mask'
73  * and what bits should be set to zero in 'zeros_mask'. The don't-care
74  * bits are set to the default value. The default values are obtained
75  * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining
76  * VMX Capabilities".
77  *
78  * Returns zero on success and non-zero on error.
79  */
80 int
81 vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
82 	       uint32_t zeros_mask, uint32_t *retval)
83 {
84 	int i;
85 	uint64_t val, trueval;
86 	bool true_ctls_avail, one_allowed, zero_allowed;
87 
88 	/* We cannot ask the same bit to be set to both '1' and '0' */
89 	if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask))
90 		return (EINVAL);
91 
92 	true_ctls_avail = (rdmsr(MSR_VMX_BASIC) & (1UL << 55)) != 0;
93 
94 	val = rdmsr(ctl_reg);
95 	if (true_ctls_avail)
96 		trueval = rdmsr(true_ctl_reg);		/* step c */
97 	else
98 		trueval = val;				/* step a */
99 
100 	for (i = 0; i < 32; i++) {
101 		one_allowed = vmx_ctl_allows_one_setting(trueval, i);
102 		zero_allowed = vmx_ctl_allows_zero_setting(trueval, i);
103 
104 		KASSERT(one_allowed || zero_allowed,
105 			("invalid zero/one setting for bit %d of ctl 0x%0x, "
106 			 "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg));
107 
108 		if (zero_allowed && !one_allowed) {		/* b(i),c(i) */
109 			if (ones_mask & (1 << i))
110 				return (EINVAL);
111 			*retval &= ~(1 << i);
112 		} else if (one_allowed && !zero_allowed) {	/* b(i),c(i) */
113 			if (zeros_mask & (1 << i))
114 				return (EINVAL);
115 			*retval |= 1 << i;
116 		} else {
117 			if (zeros_mask & (1 << i))	/* b(ii),c(ii) */
118 				*retval &= ~(1 << i);
119 			else if (ones_mask & (1 << i)) /* b(ii), c(ii) */
120 				*retval |= 1 << i;
121 			else if (!true_ctls_avail)
122 				*retval &= ~(1 << i);	/* b(iii) */
123 			else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/
124 				*retval &= ~(1 << i);
125 			else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */
126 				*retval |= 1 << i;
127 			else {
128 				panic("vmx_set_ctlreg: unable to determine "
129 				      "correct value of ctl bit %d for msr "
130 				      "0x%0x and true msr 0x%0x", i, ctl_reg,
131 				      true_ctl_reg);
132 			}
133 		}
134 	}
135 
136 	return (0);
137 }
138 
139 void
140 msr_bitmap_initialize(char *bitmap)
141 {
142 
143 	memset(bitmap, 0xff, PAGE_SIZE);
144 }
145 
146 int
147 msr_bitmap_change_access(char *bitmap, u_int msr, int access)
148 {
149 	int byte, bit;
150 
151 	if (msr <= 0x00001FFF)
152 		byte = msr / 8;
153 	else if (msr >= 0xC0000000 && msr <= 0xC0001FFF)
154 		byte = 1024 + (msr - 0xC0000000) / 8;
155 	else
156 		return (EINVAL);
157 
158 	bit = msr & 0x7;
159 
160 	if (access & MSR_BITMAP_ACCESS_READ)
161 		bitmap[byte] &= ~(1 << bit);
162 	else
163 		bitmap[byte] |= 1 << bit;
164 
165 	byte += 2048;
166 	if (access & MSR_BITMAP_ACCESS_WRITE)
167 		bitmap[byte] &= ~(1 << bit);
168 	else
169 		bitmap[byte] |= 1 << bit;
170 
171 	return (0);
172 }
173 
174 static uint64_t misc_enable;
175 static uint64_t platform_info;
176 static uint64_t turbo_ratio_limit;
177 static uint64_t host_msrs[GUEST_MSR_NUM];
178 
179 static bool
180 nehalem_cpu(void)
181 {
182 	u_int family, model;
183 
184 	/*
185 	 * The family:model numbers belonging to the Nehalem microarchitecture
186 	 * are documented in Section 35.5, Intel SDM dated Feb 2014.
187 	 */
188 	family = CPUID_TO_FAMILY(cpu_id);
189 	model = CPUID_TO_MODEL(cpu_id);
190 	if (family == 0x6) {
191 		switch (model) {
192 		case 0x1A:
193 		case 0x1E:
194 		case 0x1F:
195 		case 0x2E:
196 			return (true);
197 		default:
198 			break;
199 		}
200 	}
201 	return (false);
202 }
203 
204 static bool
205 westmere_cpu(void)
206 {
207 	u_int family, model;
208 
209 	/*
210 	 * The family:model numbers belonging to the Westmere microarchitecture
211 	 * are documented in Section 35.6, Intel SDM dated Feb 2014.
212 	 */
213 	family = CPUID_TO_FAMILY(cpu_id);
214 	model = CPUID_TO_MODEL(cpu_id);
215 	if (family == 0x6) {
216 		switch (model) {
217 		case 0x25:
218 		case 0x2C:
219 			return (true);
220 		default:
221 			break;
222 		}
223 	}
224 	return (false);
225 }
226 
227 static bool
228 pat_valid(uint64_t val)
229 {
230 	int i, pa;
231 
232 	/*
233 	 * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT"
234 	 *
235 	 * Extract PA0 through PA7 and validate that each one encodes a
236 	 * valid memory type.
237 	 */
238 	for (i = 0; i < 8; i++) {
239 		pa = (val >> (i * 8)) & 0xff;
240 		if (pa == 2 || pa == 3 || pa >= 8)
241 			return (false);
242 	}
243 	return (true);
244 }
245 
246 void
247 vmx_msr_init(void)
248 {
249 	uint64_t bus_freq, ratio;
250 	int i;
251 
252 	/*
253 	 * It is safe to cache the values of the following MSRs because
254 	 * they don't change based on curcpu, curproc or curthread.
255 	 */
256 	host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
257 	host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
258 	host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
259 	host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
260 
261 	/*
262 	 * Initialize emulated MSRs
263 	 */
264 	misc_enable = rdmsr(MSR_IA32_MISC_ENABLE);
265 	/*
266 	 * Set mandatory bits
267 	 *  11:   branch trace disabled
268 	 *  12:   PEBS unavailable
269 	 * Clear unsupported features
270 	 *  16:   SpeedStep enable
271 	 *  18:   enable MONITOR FSM
272 	 */
273 	misc_enable |= (1 << 12) | (1 << 11);
274 	misc_enable &= ~((1 << 18) | (1 << 16));
275 
276 	if (nehalem_cpu() || westmere_cpu())
277 		bus_freq = 133330000;		/* 133Mhz */
278 	else
279 		bus_freq = 100000000;		/* 100Mhz */
280 
281 	/*
282 	 * XXXtime
283 	 * The ratio should really be based on the virtual TSC frequency as
284 	 * opposed to the host TSC.
285 	 */
286 	ratio = (tsc_freq / bus_freq) & 0xff;
287 
288 	/*
289 	 * The register definition is based on the micro-architecture
290 	 * but the following bits are always the same:
291 	 * [15:8]  Maximum Non-Turbo Ratio
292 	 * [28]    Programmable Ratio Limit for Turbo Mode
293 	 * [29]    Programmable TDC-TDP Limit for Turbo Mode
294 	 * [47:40] Maximum Efficiency Ratio
295 	 *
296 	 * The other bits can be safely set to 0 on all
297 	 * micro-architectures up to Haswell.
298 	 */
299 	platform_info = (ratio << 8) | (ratio << 40);
300 
301 	/*
302 	 * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is
303 	 * dependent on the maximum cores per package supported by the micro-
304 	 * architecture. For e.g., Westmere supports 6 cores per package and
305 	 * uses the low 48 bits. Sandybridge support 8 cores per package and
306 	 * uses up all 64 bits.
307 	 *
308 	 * However, the unused bits are reserved so we pretend that all bits
309 	 * in this MSR are valid.
310 	 */
311 	for (i = 0; i < 8; i++)
312 		turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio;
313 }
314 
315 void
316 vmx_msr_guest_init(struct vmx *vmx, int vcpuid)
317 {
318 	uint64_t *guest_msrs;
319 
320 	guest_msrs = vmx->guest_msrs[vcpuid];
321 
322 	/*
323 	 * The permissions bitmap is shared between all vcpus so initialize it
324 	 * once when initializing the vBSP.
325 	 */
326 	if (vcpuid == 0) {
327 		guest_msr_rw(vmx, MSR_LSTAR);
328 		guest_msr_rw(vmx, MSR_CSTAR);
329 		guest_msr_rw(vmx, MSR_STAR);
330 		guest_msr_rw(vmx, MSR_SF_MASK);
331 		guest_msr_rw(vmx, MSR_KGSBASE);
332 	}
333 
334 	/*
335 	 * Initialize guest IA32_PAT MSR with default value after reset.
336 	 */
337 	guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) |
338 	    PAT_VALUE(1, PAT_WRITE_THROUGH)	|
339 	    PAT_VALUE(2, PAT_UNCACHED)		|
340 	    PAT_VALUE(3, PAT_UNCACHEABLE)	|
341 	    PAT_VALUE(4, PAT_WRITE_BACK)	|
342 	    PAT_VALUE(5, PAT_WRITE_THROUGH)	|
343 	    PAT_VALUE(6, PAT_UNCACHED)		|
344 	    PAT_VALUE(7, PAT_UNCACHEABLE);
345 
346 	return;
347 }
348 
349 void
350 vmx_msr_guest_enter(struct vmx *vmx, int vcpuid)
351 {
352 	uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
353 
354 	/* Save host MSRs (in particular, KGSBASE) and restore guest MSRs */
355 	update_pcb_bases(curpcb);
356 	wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]);
357 	wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]);
358 	wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]);
359 	wrmsr(MSR_SF_MASK, guest_msrs[IDX_MSR_SF_MASK]);
360 	wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]);
361 }
362 
363 void
364 vmx_msr_guest_exit(struct vmx *vmx, int vcpuid)
365 {
366 	uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
367 
368 	/* Save guest MSRs */
369 	guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
370 	guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
371 	guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
372 	guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
373 	guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE);
374 
375 	/* Restore host MSRs */
376 	wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]);
377 	wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]);
378 	wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]);
379 	wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]);
380 
381 	/* MSR_KGSBASE will be restored on the way back to userspace */
382 }
383 
384 int
385 vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
386 {
387 	const uint64_t *guest_msrs;
388 	int error;
389 
390 	guest_msrs = vmx->guest_msrs[vcpuid];
391 	error = 0;
392 
393 	switch (num) {
394 	case MSR_MCG_CAP:
395 	case MSR_MCG_STATUS:
396 		*val = 0;
397 		break;
398 	case MSR_MTRRcap:
399 	case MSR_MTRRdefType:
400 	case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
401 	case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
402 	case MSR_MTRR64kBase:
403 		*val = 0;
404 		break;
405 	case MSR_IA32_MISC_ENABLE:
406 		*val = misc_enable;
407 		break;
408 	case MSR_PLATFORM_INFO:
409 		*val = platform_info;
410 		break;
411 	case MSR_TURBO_RATIO_LIMIT:
412 	case MSR_TURBO_RATIO_LIMIT1:
413 		*val = turbo_ratio_limit;
414 		break;
415 	case MSR_PAT:
416 		*val = guest_msrs[IDX_MSR_PAT];
417 		break;
418 	default:
419 		error = EINVAL;
420 		break;
421 	}
422 	return (error);
423 }
424 
425 int
426 vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
427 {
428 	uint64_t *guest_msrs;
429 	uint64_t changed;
430 	int error;
431 
432 	guest_msrs = vmx->guest_msrs[vcpuid];
433 	error = 0;
434 
435 	switch (num) {
436 	case MSR_MCG_CAP:
437 	case MSR_MCG_STATUS:
438 		break;		/* ignore writes */
439 	case MSR_MTRRcap:
440 		vm_inject_gp(vmx->vm, vcpuid);
441 		break;
442 	case MSR_MTRRdefType:
443 	case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
444 	case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
445 	case MSR_MTRR64kBase:
446 		break;		/* Ignore writes */
447 	case MSR_IA32_MISC_ENABLE:
448 		changed = val ^ misc_enable;
449 		/*
450 		 * If the host has disabled the NX feature then the guest
451 		 * also cannot use it. However, a Linux guest will try to
452 		 * enable the NX feature by writing to the MISC_ENABLE MSR.
453 		 *
454 		 * This can be safely ignored because the memory management
455 		 * code looks at CPUID.80000001H:EDX.NX to check if the
456 		 * functionality is actually enabled.
457 		 */
458 		changed &= ~(1UL << 34);
459 
460 		/*
461 		 * Punt to userspace if any other bits are being modified.
462 		 */
463 		if (changed)
464 			error = EINVAL;
465 
466 		break;
467 	case MSR_PAT:
468 		if (pat_valid(val))
469 			guest_msrs[IDX_MSR_PAT] = val;
470 		else
471 			vm_inject_gp(vmx->vm, vcpuid);
472 		break;
473 	case MSR_TSC:
474 		error = vmx_set_tsc_offset(vmx, vcpuid, val - rdtsc());
475 		break;
476 	default:
477 		error = EINVAL;
478 		break;
479 	}
480 
481 	return (error);
482 }
483