xref: /freebsd/sys/amd64/vmm/intel/vmx_msr.c (revision 640235e2c2ba32947f7c59d168437ffa1280f1e6)
1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 
35 #include <machine/clock.h>
36 #include <machine/cpufunc.h>
37 #include <machine/md_var.h>
38 #include <machine/specialreg.h>
39 #include <machine/vmm.h>
40 
41 #include "vmx.h"
42 #include "vmx_msr.h"
43 
44 static boolean_t
45 vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
46 {
47 
48 	if (msr_val & (1UL << (bitpos + 32)))
49 		return (TRUE);
50 	else
51 		return (FALSE);
52 }
53 
54 static boolean_t
55 vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
56 {
57 
58 	if ((msr_val & (1UL << bitpos)) == 0)
59 		return (TRUE);
60 	else
61 		return (FALSE);
62 }
63 
64 uint32_t
65 vmx_revision(void)
66 {
67 
68 	return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
69 }
70 
71 /*
72  * Generate a bitmask to be used for the VMCS execution control fields.
73  *
74  * The caller specifies what bits should be set to one in 'ones_mask'
75  * and what bits should be set to zero in 'zeros_mask'. The don't-care
76  * bits are set to the default value. The default values are obtained
77  * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining
78  * VMX Capabilities".
79  *
80  * Returns zero on success and non-zero on error.
81  */
82 int
83 vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
84 	       uint32_t zeros_mask, uint32_t *retval)
85 {
86 	int i;
87 	uint64_t val, trueval;
88 	boolean_t true_ctls_avail, one_allowed, zero_allowed;
89 
90 	/* We cannot ask the same bit to be set to both '1' and '0' */
91 	if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask))
92 		return (EINVAL);
93 
94 	if (rdmsr(MSR_VMX_BASIC) & (1UL << 55))
95 		true_ctls_avail = TRUE;
96 	else
97 		true_ctls_avail = FALSE;
98 
99 	val = rdmsr(ctl_reg);
100 	if (true_ctls_avail)
101 		trueval = rdmsr(true_ctl_reg);		/* step c */
102 	else
103 		trueval = val;				/* step a */
104 
105 	for (i = 0; i < 32; i++) {
106 		one_allowed = vmx_ctl_allows_one_setting(trueval, i);
107 		zero_allowed = vmx_ctl_allows_zero_setting(trueval, i);
108 
109 		KASSERT(one_allowed || zero_allowed,
110 			("invalid zero/one setting for bit %d of ctl 0x%0x, "
111 			 "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg));
112 
113 		if (zero_allowed && !one_allowed) {		/* b(i),c(i) */
114 			if (ones_mask & (1 << i))
115 				return (EINVAL);
116 			*retval &= ~(1 << i);
117 		} else if (one_allowed && !zero_allowed) {	/* b(i),c(i) */
118 			if (zeros_mask & (1 << i))
119 				return (EINVAL);
120 			*retval |= 1 << i;
121 		} else {
122 			if (zeros_mask & (1 << i))	/* b(ii),c(ii) */
123 				*retval &= ~(1 << i);
124 			else if (ones_mask & (1 << i)) /* b(ii), c(ii) */
125 				*retval |= 1 << i;
126 			else if (!true_ctls_avail)
127 				*retval &= ~(1 << i);	/* b(iii) */
128 			else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/
129 				*retval &= ~(1 << i);
130 			else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */
131 				*retval |= 1 << i;
132 			else {
133 				panic("vmx_set_ctlreg: unable to determine "
134 				      "correct value of ctl bit %d for msr "
135 				      "0x%0x and true msr 0x%0x", i, ctl_reg,
136 				      true_ctl_reg);
137 			}
138 		}
139 	}
140 
141 	return (0);
142 }
143 
144 void
145 msr_bitmap_initialize(char *bitmap)
146 {
147 
148 	memset(bitmap, 0xff, PAGE_SIZE);
149 }
150 
151 int
152 msr_bitmap_change_access(char *bitmap, u_int msr, int access)
153 {
154 	int byte, bit;
155 
156 	if (msr <= 0x00001FFF)
157 		byte = msr / 8;
158 	else if (msr >= 0xC0000000 && msr <= 0xC0001FFF)
159 		byte = 1024 + (msr - 0xC0000000) / 8;
160 	else
161 		return (EINVAL);
162 
163 	bit = msr & 0x7;
164 
165 	if (access & MSR_BITMAP_ACCESS_READ)
166 		bitmap[byte] &= ~(1 << bit);
167 	else
168 		bitmap[byte] |= 1 << bit;
169 
170 	byte += 2048;
171 	if (access & MSR_BITMAP_ACCESS_WRITE)
172 		bitmap[byte] &= ~(1 << bit);
173 	else
174 		bitmap[byte] |= 1 << bit;
175 
176 	return (0);
177 }
178 
179 static uint64_t misc_enable;
180 static uint64_t platform_info;
181 static uint64_t turbo_ratio_limit;
182 static uint64_t host_msrs[GUEST_MSR_NUM];
183 
184 static bool
185 nehalem_cpu(void)
186 {
187 	u_int family, model;
188 
189 	/*
190 	 * The family:model numbers belonging to the Nehalem microarchitecture
191 	 * are documented in Section 35.5, Intel SDM dated Feb 2014.
192 	 */
193 	family = CPUID_TO_FAMILY(cpu_id);
194 	model = CPUID_TO_MODEL(cpu_id);
195 	if (family == 0x6) {
196 		switch (model) {
197 		case 0x1A:
198 		case 0x1E:
199 		case 0x1F:
200 		case 0x2E:
201 			return (true);
202 		default:
203 			break;
204 		}
205 	}
206 	return (false);
207 }
208 
209 static bool
210 westmere_cpu(void)
211 {
212 	u_int family, model;
213 
214 	/*
215 	 * The family:model numbers belonging to the Westmere microarchitecture
216 	 * are documented in Section 35.6, Intel SDM dated Feb 2014.
217 	 */
218 	family = CPUID_TO_FAMILY(cpu_id);
219 	model = CPUID_TO_MODEL(cpu_id);
220 	if (family == 0x6) {
221 		switch (model) {
222 		case 0x25:
223 		case 0x2C:
224 			return (true);
225 		default:
226 			break;
227 		}
228 	}
229 	return (false);
230 }
231 
232 static bool
233 pat_valid(uint64_t val)
234 {
235 	int i, pa;
236 
237 	/*
238 	 * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT"
239 	 *
240 	 * Extract PA0 through PA7 and validate that each one encodes a
241 	 * valid memory type.
242 	 */
243 	for (i = 0; i < 8; i++) {
244 		pa = (val >> (i * 8)) & 0xff;
245 		if (pa == 2 || pa == 3 || pa >= 8)
246 			return (false);
247 	}
248 	return (true);
249 }
250 
251 void
252 vmx_msr_init(void)
253 {
254 	uint64_t bus_freq, ratio;
255 	int i;
256 
257 	/*
258 	 * It is safe to cache the values of the following MSRs because
259 	 * they don't change based on curcpu, curproc or curthread.
260 	 */
261 	host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
262 	host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
263 	host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
264 	host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
265 
266 	/*
267 	 * Initialize emulated MSRs
268 	 */
269 	misc_enable = rdmsr(MSR_IA32_MISC_ENABLE);
270 	/*
271 	 * Set mandatory bits
272 	 *  11:   branch trace disabled
273 	 *  12:   PEBS unavailable
274 	 * Clear unsupported features
275 	 *  16:   SpeedStep enable
276 	 *  18:   enable MONITOR FSM
277 	 */
278 	misc_enable |= (1 << 12) | (1 << 11);
279 	misc_enable &= ~((1 << 18) | (1 << 16));
280 
281 	if (nehalem_cpu() || westmere_cpu())
282 		bus_freq = 133330000;		/* 133Mhz */
283 	else
284 		bus_freq = 100000000;		/* 100Mhz */
285 
286 	/*
287 	 * XXXtime
288 	 * The ratio should really be based on the virtual TSC frequency as
289 	 * opposed to the host TSC.
290 	 */
291 	ratio = (tsc_freq / bus_freq) & 0xff;
292 
293 	/*
294 	 * The register definition is based on the micro-architecture
295 	 * but the following bits are always the same:
296 	 * [15:8]  Maximum Non-Turbo Ratio
297 	 * [28]    Programmable Ratio Limit for Turbo Mode
298 	 * [29]    Programmable TDC-TDP Limit for Turbo Mode
299 	 * [47:40] Maximum Efficiency Ratio
300 	 *
301 	 * The other bits can be safely set to 0 on all
302 	 * micro-architectures up to Haswell.
303 	 */
304 	platform_info = (ratio << 8) | (ratio << 40);
305 
306 	/*
307 	 * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is
308 	 * dependent on the maximum cores per package supported by the micro-
309 	 * architecture. For e.g., Westmere supports 6 cores per package and
310 	 * uses the low 48 bits. Sandybridge support 8 cores per package and
311 	 * uses up all 64 bits.
312 	 *
313 	 * However, the unused bits are reserved so we pretend that all bits
314 	 * in this MSR are valid.
315 	 */
316 	for (i = 0; i < 8; i++)
317 		turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio;
318 }
319 
320 void
321 vmx_msr_guest_init(struct vmx *vmx, int vcpuid)
322 {
323 	uint64_t *guest_msrs;
324 
325 	guest_msrs = vmx->guest_msrs[vcpuid];
326 
327 	/*
328 	 * The permissions bitmap is shared between all vcpus so initialize it
329 	 * once when initializing the vBSP.
330 	 */
331 	if (vcpuid == 0) {
332 		guest_msr_rw(vmx, MSR_LSTAR);
333 		guest_msr_rw(vmx, MSR_CSTAR);
334 		guest_msr_rw(vmx, MSR_STAR);
335 		guest_msr_rw(vmx, MSR_SF_MASK);
336 		guest_msr_rw(vmx, MSR_KGSBASE);
337 	}
338 
339 	/*
340 	 * Initialize guest IA32_PAT MSR with default value after reset.
341 	 */
342 	guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) |
343 	    PAT_VALUE(1, PAT_WRITE_THROUGH)	|
344 	    PAT_VALUE(2, PAT_UNCACHED)		|
345 	    PAT_VALUE(3, PAT_UNCACHEABLE)	|
346 	    PAT_VALUE(4, PAT_WRITE_BACK)	|
347 	    PAT_VALUE(5, PAT_WRITE_THROUGH)	|
348 	    PAT_VALUE(6, PAT_UNCACHED)		|
349 	    PAT_VALUE(7, PAT_UNCACHEABLE);
350 
351 	return;
352 }
353 
354 void
355 vmx_msr_guest_enter(struct vmx *vmx, int vcpuid)
356 {
357 	uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
358 
359 	/* Save host MSRs (if any) and restore guest MSRs */
360 	wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]);
361 	wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]);
362 	wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]);
363 	wrmsr(MSR_SF_MASK, guest_msrs[IDX_MSR_SF_MASK]);
364 	wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]);
365 }
366 
367 void
368 vmx_msr_guest_exit(struct vmx *vmx, int vcpuid)
369 {
370 	uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
371 
372 	/* Save guest MSRs */
373 	guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
374 	guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
375 	guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
376 	guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
377 	guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE);
378 
379 	/* Restore host MSRs */
380 	wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]);
381 	wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]);
382 	wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]);
383 	wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]);
384 
385 	/* MSR_KGSBASE will be restored on the way back to userspace */
386 }
387 
388 int
389 vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
390 {
391 	const uint64_t *guest_msrs;
392 	int error;
393 
394 	guest_msrs = vmx->guest_msrs[vcpuid];
395 	error = 0;
396 
397 	switch (num) {
398 	case MSR_MCG_CAP:
399 	case MSR_MCG_STATUS:
400 		*val = 0;
401 		break;
402 	case MSR_MTRRcap:
403 	case MSR_MTRRdefType:
404 	case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
405 	case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
406 	case MSR_MTRR64kBase:
407 		*val = 0;
408 		break;
409 	case MSR_IA32_MISC_ENABLE:
410 		*val = misc_enable;
411 		break;
412 	case MSR_PLATFORM_INFO:
413 		*val = platform_info;
414 		break;
415 	case MSR_TURBO_RATIO_LIMIT:
416 	case MSR_TURBO_RATIO_LIMIT1:
417 		*val = turbo_ratio_limit;
418 		break;
419 	case MSR_PAT:
420 		*val = guest_msrs[IDX_MSR_PAT];
421 		break;
422 	default:
423 		error = EINVAL;
424 		break;
425 	}
426 	return (error);
427 }
428 
429 int
430 vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
431 {
432 	uint64_t *guest_msrs;
433 	uint64_t changed;
434 	int error;
435 
436 	guest_msrs = vmx->guest_msrs[vcpuid];
437 	error = 0;
438 
439 	switch (num) {
440 	case MSR_MCG_CAP:
441 	case MSR_MCG_STATUS:
442 		break;		/* ignore writes */
443 	case MSR_MTRRcap:
444 		vm_inject_gp(vmx->vm, vcpuid);
445 		break;
446 	case MSR_MTRRdefType:
447 	case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
448 	case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
449 	case MSR_MTRR64kBase:
450 		break;		/* Ignore writes */
451 	case MSR_IA32_MISC_ENABLE:
452 		changed = val ^ misc_enable;
453 		/*
454 		 * If the host has disabled the NX feature then the guest
455 		 * also cannot use it. However, a Linux guest will try to
456 		 * enable the NX feature by writing to the MISC_ENABLE MSR.
457 		 *
458 		 * This can be safely ignored because the memory management
459 		 * code looks at CPUID.80000001H:EDX.NX to check if the
460 		 * functionality is actually enabled.
461 		 */
462 		changed &= ~(1UL << 34);
463 
464 		/*
465 		 * Punt to userspace if any other bits are being modified.
466 		 */
467 		if (changed)
468 			error = EINVAL;
469 
470 		break;
471 	case MSR_PAT:
472 		if (pat_valid(val))
473 			guest_msrs[IDX_MSR_PAT] = val;
474 		else
475 			vm_inject_gp(vmx->vm, vcpuid);
476 		break;
477 	case MSR_TSC:
478 		error = vmx_set_tsc_offset(vmx, vcpuid, val - rdtsc());
479 		break;
480 	default:
481 		error = EINVAL;
482 		break;
483 	}
484 
485 	return (error);
486 }
487