xref: /freebsd/sys/amd64/vmm/intel/vmx_msr.c (revision ec0e626bafb335b30c499d06066997f54b10c092)
1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/cpuset.h>
35 
36 #include <machine/clock.h>
37 #include <machine/cpufunc.h>
38 #include <machine/md_var.h>
39 #include <machine/specialreg.h>
40 #include <machine/vmm.h>
41 
42 #include "vmx.h"
43 #include "vmx_msr.h"
44 
45 static boolean_t
46 vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
47 {
48 
49 	if (msr_val & (1UL << (bitpos + 32)))
50 		return (TRUE);
51 	else
52 		return (FALSE);
53 }
54 
55 static boolean_t
56 vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
57 {
58 
59 	if ((msr_val & (1UL << bitpos)) == 0)
60 		return (TRUE);
61 	else
62 		return (FALSE);
63 }
64 
65 uint32_t
66 vmx_revision(void)
67 {
68 
69 	return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
70 }
71 
72 /*
73  * Generate a bitmask to be used for the VMCS execution control fields.
74  *
75  * The caller specifies what bits should be set to one in 'ones_mask'
76  * and what bits should be set to zero in 'zeros_mask'. The don't-care
77  * bits are set to the default value. The default values are obtained
78  * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining
79  * VMX Capabilities".
80  *
81  * Returns zero on success and non-zero on error.
82  */
83 int
84 vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
85 	       uint32_t zeros_mask, uint32_t *retval)
86 {
87 	int i;
88 	uint64_t val, trueval;
89 	boolean_t true_ctls_avail, one_allowed, zero_allowed;
90 
91 	/* We cannot ask the same bit to be set to both '1' and '0' */
92 	if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask))
93 		return (EINVAL);
94 
95 	if (rdmsr(MSR_VMX_BASIC) & (1UL << 55))
96 		true_ctls_avail = TRUE;
97 	else
98 		true_ctls_avail = FALSE;
99 
100 	val = rdmsr(ctl_reg);
101 	if (true_ctls_avail)
102 		trueval = rdmsr(true_ctl_reg);		/* step c */
103 	else
104 		trueval = val;				/* step a */
105 
106 	for (i = 0; i < 32; i++) {
107 		one_allowed = vmx_ctl_allows_one_setting(trueval, i);
108 		zero_allowed = vmx_ctl_allows_zero_setting(trueval, i);
109 
110 		KASSERT(one_allowed || zero_allowed,
111 			("invalid zero/one setting for bit %d of ctl 0x%0x, "
112 			 "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg));
113 
114 		if (zero_allowed && !one_allowed) {		/* b(i),c(i) */
115 			if (ones_mask & (1 << i))
116 				return (EINVAL);
117 			*retval &= ~(1 << i);
118 		} else if (one_allowed && !zero_allowed) {	/* b(i),c(i) */
119 			if (zeros_mask & (1 << i))
120 				return (EINVAL);
121 			*retval |= 1 << i;
122 		} else {
123 			if (zeros_mask & (1 << i))	/* b(ii),c(ii) */
124 				*retval &= ~(1 << i);
125 			else if (ones_mask & (1 << i)) /* b(ii), c(ii) */
126 				*retval |= 1 << i;
127 			else if (!true_ctls_avail)
128 				*retval &= ~(1 << i);	/* b(iii) */
129 			else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/
130 				*retval &= ~(1 << i);
131 			else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */
132 				*retval |= 1 << i;
133 			else {
134 				panic("vmx_set_ctlreg: unable to determine "
135 				      "correct value of ctl bit %d for msr "
136 				      "0x%0x and true msr 0x%0x", i, ctl_reg,
137 				      true_ctl_reg);
138 			}
139 		}
140 	}
141 
142 	return (0);
143 }
144 
145 void
146 msr_bitmap_initialize(char *bitmap)
147 {
148 
149 	memset(bitmap, 0xff, PAGE_SIZE);
150 }
151 
152 int
153 msr_bitmap_change_access(char *bitmap, u_int msr, int access)
154 {
155 	int byte, bit;
156 
157 	if (msr <= 0x00001FFF)
158 		byte = msr / 8;
159 	else if (msr >= 0xC0000000 && msr <= 0xC0001FFF)
160 		byte = 1024 + (msr - 0xC0000000) / 8;
161 	else
162 		return (EINVAL);
163 
164 	bit = msr & 0x7;
165 
166 	if (access & MSR_BITMAP_ACCESS_READ)
167 		bitmap[byte] &= ~(1 << bit);
168 	else
169 		bitmap[byte] |= 1 << bit;
170 
171 	byte += 2048;
172 	if (access & MSR_BITMAP_ACCESS_WRITE)
173 		bitmap[byte] &= ~(1 << bit);
174 	else
175 		bitmap[byte] |= 1 << bit;
176 
177 	return (0);
178 }
179 
180 static uint64_t misc_enable;
181 static uint64_t platform_info;
182 static uint64_t turbo_ratio_limit;
183 static uint64_t host_msrs[GUEST_MSR_NUM];
184 
185 static bool
186 nehalem_cpu(void)
187 {
188 	u_int family, model;
189 
190 	/*
191 	 * The family:model numbers belonging to the Nehalem microarchitecture
192 	 * are documented in Section 35.5, Intel SDM dated Feb 2014.
193 	 */
194 	family = CPUID_TO_FAMILY(cpu_id);
195 	model = CPUID_TO_MODEL(cpu_id);
196 	if (family == 0x6) {
197 		switch (model) {
198 		case 0x1A:
199 		case 0x1E:
200 		case 0x1F:
201 		case 0x2E:
202 			return (true);
203 		default:
204 			break;
205 		}
206 	}
207 	return (false);
208 }
209 
210 static bool
211 westmere_cpu(void)
212 {
213 	u_int family, model;
214 
215 	/*
216 	 * The family:model numbers belonging to the Westmere microarchitecture
217 	 * are documented in Section 35.6, Intel SDM dated Feb 2014.
218 	 */
219 	family = CPUID_TO_FAMILY(cpu_id);
220 	model = CPUID_TO_MODEL(cpu_id);
221 	if (family == 0x6) {
222 		switch (model) {
223 		case 0x25:
224 		case 0x2C:
225 			return (true);
226 		default:
227 			break;
228 		}
229 	}
230 	return (false);
231 }
232 
233 static bool
234 pat_valid(uint64_t val)
235 {
236 	int i, pa;
237 
238 	/*
239 	 * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT"
240 	 *
241 	 * Extract PA0 through PA7 and validate that each one encodes a
242 	 * valid memory type.
243 	 */
244 	for (i = 0; i < 8; i++) {
245 		pa = (val >> (i * 8)) & 0xff;
246 		if (pa == 2 || pa == 3 || pa >= 8)
247 			return (false);
248 	}
249 	return (true);
250 }
251 
252 void
253 vmx_msr_init(void)
254 {
255 	uint64_t bus_freq, ratio;
256 	int i;
257 
258 	/*
259 	 * It is safe to cache the values of the following MSRs because
260 	 * they don't change based on curcpu, curproc or curthread.
261 	 */
262 	host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
263 	host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
264 	host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
265 	host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
266 
267 	/*
268 	 * Initialize emulated MSRs
269 	 */
270 	misc_enable = rdmsr(MSR_IA32_MISC_ENABLE);
271 	/*
272 	 * Set mandatory bits
273 	 *  11:   branch trace disabled
274 	 *  12:   PEBS unavailable
275 	 * Clear unsupported features
276 	 *  16:   SpeedStep enable
277 	 *  18:   enable MONITOR FSM
278 	 */
279 	misc_enable |= (1 << 12) | (1 << 11);
280 	misc_enable &= ~((1 << 18) | (1 << 16));
281 
282 	if (nehalem_cpu() || westmere_cpu())
283 		bus_freq = 133330000;		/* 133Mhz */
284 	else
285 		bus_freq = 100000000;		/* 100Mhz */
286 
287 	/*
288 	 * XXXtime
289 	 * The ratio should really be based on the virtual TSC frequency as
290 	 * opposed to the host TSC.
291 	 */
292 	ratio = (tsc_freq / bus_freq) & 0xff;
293 
294 	/*
295 	 * The register definition is based on the micro-architecture
296 	 * but the following bits are always the same:
297 	 * [15:8]  Maximum Non-Turbo Ratio
298 	 * [28]    Programmable Ratio Limit for Turbo Mode
299 	 * [29]    Programmable TDC-TDP Limit for Turbo Mode
300 	 * [47:40] Maximum Efficiency Ratio
301 	 *
302 	 * The other bits can be safely set to 0 on all
303 	 * micro-architectures up to Haswell.
304 	 */
305 	platform_info = (ratio << 8) | (ratio << 40);
306 
307 	/*
308 	 * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is
309 	 * dependent on the maximum cores per package supported by the micro-
310 	 * architecture. For e.g., Westmere supports 6 cores per package and
311 	 * uses the low 48 bits. Sandybridge support 8 cores per package and
312 	 * uses up all 64 bits.
313 	 *
314 	 * However, the unused bits are reserved so we pretend that all bits
315 	 * in this MSR are valid.
316 	 */
317 	for (i = 0; i < 8; i++)
318 		turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio;
319 }
320 
321 void
322 vmx_msr_guest_init(struct vmx *vmx, int vcpuid)
323 {
324 	uint64_t *guest_msrs;
325 
326 	guest_msrs = vmx->guest_msrs[vcpuid];
327 
328 	/*
329 	 * The permissions bitmap is shared between all vcpus so initialize it
330 	 * once when initializing the vBSP.
331 	 */
332 	if (vcpuid == 0) {
333 		guest_msr_rw(vmx, MSR_LSTAR);
334 		guest_msr_rw(vmx, MSR_CSTAR);
335 		guest_msr_rw(vmx, MSR_STAR);
336 		guest_msr_rw(vmx, MSR_SF_MASK);
337 		guest_msr_rw(vmx, MSR_KGSBASE);
338 	}
339 
340 	/*
341 	 * Initialize guest IA32_PAT MSR with default value after reset.
342 	 */
343 	guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) |
344 	    PAT_VALUE(1, PAT_WRITE_THROUGH)	|
345 	    PAT_VALUE(2, PAT_UNCACHED)		|
346 	    PAT_VALUE(3, PAT_UNCACHEABLE)	|
347 	    PAT_VALUE(4, PAT_WRITE_BACK)	|
348 	    PAT_VALUE(5, PAT_WRITE_THROUGH)	|
349 	    PAT_VALUE(6, PAT_UNCACHED)		|
350 	    PAT_VALUE(7, PAT_UNCACHEABLE);
351 
352 	return;
353 }
354 
355 void
356 vmx_msr_guest_enter(struct vmx *vmx, int vcpuid)
357 {
358 	uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
359 
360 	/* Save host MSRs (if any) and restore guest MSRs */
361 	wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]);
362 	wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]);
363 	wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]);
364 	wrmsr(MSR_SF_MASK, guest_msrs[IDX_MSR_SF_MASK]);
365 	wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]);
366 }
367 
368 void
369 vmx_msr_guest_exit(struct vmx *vmx, int vcpuid)
370 {
371 	uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
372 
373 	/* Save guest MSRs */
374 	guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
375 	guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
376 	guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
377 	guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
378 	guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE);
379 
380 	/* Restore host MSRs */
381 	wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]);
382 	wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]);
383 	wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]);
384 	wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]);
385 
386 	/* MSR_KGSBASE will be restored on the way back to userspace */
387 }
388 
389 int
390 vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
391 {
392 	const uint64_t *guest_msrs;
393 	int error;
394 
395 	guest_msrs = vmx->guest_msrs[vcpuid];
396 	error = 0;
397 
398 	switch (num) {
399 	case MSR_IA32_MISC_ENABLE:
400 		*val = misc_enable;
401 		break;
402 	case MSR_PLATFORM_INFO:
403 		*val = platform_info;
404 		break;
405 	case MSR_TURBO_RATIO_LIMIT:
406 	case MSR_TURBO_RATIO_LIMIT1:
407 		*val = turbo_ratio_limit;
408 		break;
409 	case MSR_PAT:
410 		*val = guest_msrs[IDX_MSR_PAT];
411 		break;
412 	default:
413 		error = EINVAL;
414 		break;
415 	}
416 	return (error);
417 }
418 
419 int
420 vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
421 {
422 	uint64_t *guest_msrs;
423 	uint64_t changed;
424 	int error;
425 
426 	guest_msrs = vmx->guest_msrs[vcpuid];
427 	error = 0;
428 
429 	switch (num) {
430 	case MSR_IA32_MISC_ENABLE:
431 		changed = val ^ misc_enable;
432 		/*
433 		 * If the host has disabled the NX feature then the guest
434 		 * also cannot use it. However, a Linux guest will try to
435 		 * enable the NX feature by writing to the MISC_ENABLE MSR.
436 		 *
437 		 * This can be safely ignored because the memory management
438 		 * code looks at CPUID.80000001H:EDX.NX to check if the
439 		 * functionality is actually enabled.
440 		 */
441 		changed &= ~(1UL << 34);
442 
443 		/*
444 		 * Punt to userspace if any other bits are being modified.
445 		 */
446 		if (changed)
447 			error = EINVAL;
448 
449 		break;
450 	case MSR_PAT:
451 		if (pat_valid(val))
452 			guest_msrs[IDX_MSR_PAT] = val;
453 		else
454 			vm_inject_gp(vmx->vm, vcpuid);
455 		break;
456 	default:
457 		error = EINVAL;
458 		break;
459 	}
460 
461 	return (error);
462 }
463