xref: /freebsd/sys/amd64/vmm/x86.c (revision 2ff63af9b88c7413b7d71715b5532625752a248e)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/pcpu.h>
34 #include <sys/systm.h>
35 #include <sys/sysctl.h>
36 
37 #include <machine/clock.h>
38 #include <machine/cpufunc.h>
39 #include <machine/md_var.h>
40 #include <machine/segments.h>
41 #include <machine/specialreg.h>
42 
43 #include <machine/vmm.h>
44 
45 #include "vmm_host.h"
46 #include "vmm_ktr.h"
47 #include "vmm_util.h"
48 #include "x86.h"
49 
50 SYSCTL_DECL(_hw_vmm);
51 static SYSCTL_NODE(_hw_vmm, OID_AUTO, topology, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
52     NULL);
53 
54 #define	CPUID_VM_HIGH		0x40000000
55 
56 static const char bhyve_id[12] = "bhyve bhyve ";
57 
58 static uint64_t bhyve_xcpuids;
59 SYSCTL_ULONG(_hw_vmm, OID_AUTO, bhyve_xcpuids, CTLFLAG_RW, &bhyve_xcpuids, 0,
60     "Number of times an unknown cpuid leaf was accessed");
61 
62 static int cpuid_leaf_b = 1;
63 SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN,
64     &cpuid_leaf_b, 0, NULL);
65 
66 /*
67  * Round up to the next power of two, if necessary, and then take log2.
68  * Returns -1 if argument is zero.
69  */
70 static __inline int
71 log2(u_int x)
72 {
73 
74 	return (fls(x << (1 - powerof2(x))) - 1);
75 }
76 
77 int
78 x86_emulate_cpuid(struct vcpu *vcpu, uint64_t *rax, uint64_t *rbx,
79     uint64_t *rcx, uint64_t *rdx)
80 {
81 	struct vm *vm = vcpu_vm(vcpu);
82 	int vcpu_id = vcpu_vcpuid(vcpu);
83 	const struct xsave_limits *limits;
84 	uint64_t cr4;
85 	int error, enable_invpcid, enable_rdpid, enable_rdtscp, level,
86 	    width, x2apic_id;
87 	unsigned int func, regs[4], logical_cpus, param;
88 	enum x2apic_state x2apic_state;
89 	uint16_t cores, maxcpus, sockets, threads;
90 
91 	/*
92 	 * The function of CPUID is controlled through the provided value of
93 	 * %eax (and secondarily %ecx, for certain leaf data).
94 	 */
95 	func = (uint32_t)*rax;
96 	param = (uint32_t)*rcx;
97 
98 	VCPU_CTR2(vm, vcpu_id, "cpuid %#x,%#x", func, param);
99 
100 	/*
101 	 * Requests for invalid CPUID levels should map to the highest
102 	 * available level instead.
103 	 */
104 	if (cpu_exthigh != 0 && func >= 0x80000000) {
105 		if (func > cpu_exthigh)
106 			func = cpu_exthigh;
107 	} else if (func >= 0x40000000) {
108 		if (func > CPUID_VM_HIGH)
109 			func = CPUID_VM_HIGH;
110 	} else if (func > cpu_high) {
111 		func = cpu_high;
112 	}
113 
114 	/*
115 	 * In general the approach used for CPU topology is to
116 	 * advertise a flat topology where all CPUs are packages with
117 	 * no multi-core or SMT.
118 	 */
119 	switch (func) {
120 		/*
121 		 * Pass these through to the guest
122 		 */
123 		case CPUID_0000_0000:
124 		case CPUID_0000_0002:
125 		case CPUID_0000_0003:
126 		case CPUID_8000_0000:
127 		case CPUID_8000_0002:
128 		case CPUID_8000_0003:
129 		case CPUID_8000_0004:
130 		case CPUID_8000_0006:
131 			cpuid_count(func, param, regs);
132 			break;
133 		case CPUID_8000_0008:
134 			cpuid_count(func, param, regs);
135 			if (vmm_is_svm()) {
136 				/*
137 				 * As on Intel (0000_0007:0, EDX), mask out
138 				 * unsupported or unsafe AMD extended features
139 				 * (8000_0008 EBX).
140 				 */
141 				regs[1] &= (AMDFEID_CLZERO | AMDFEID_IRPERF |
142 				    AMDFEID_XSAVEERPTR);
143 
144 				vm_get_topology(vm, &sockets, &cores, &threads,
145 				    &maxcpus);
146 				/*
147 				 * Here, width is ApicIdCoreIdSize, present on
148 				 * at least Family 15h and newer.  It
149 				 * represents the "number of bits in the
150 				 * initial apicid that indicate thread id
151 				 * within a package."
152 				 *
153 				 * Our topo_probe_amd() uses it for
154 				 * pkg_id_shift and other OSes may rely on it.
155 				 */
156 				width = MIN(0xF, log2(threads * cores));
157 				if (width < 0x4)
158 					width = 0;
159 				logical_cpus = MIN(0xFF, threads * cores - 1);
160 				regs[2] = (width << AMDID_COREID_SIZE_SHIFT) | logical_cpus;
161 			}
162 			break;
163 
164 		case CPUID_8000_0001:
165 			cpuid_count(func, param, regs);
166 
167 			/*
168 			 * Hide SVM from guest.
169 			 */
170 			regs[2] &= ~AMDID2_SVM;
171 
172 			/*
173 			 * Don't advertise extended performance counter MSRs
174 			 * to the guest.
175 			 */
176 			regs[2] &= ~AMDID2_PCXC;
177 			regs[2] &= ~AMDID2_PNXC;
178 			regs[2] &= ~AMDID2_PTSCEL2I;
179 
180 			/*
181 			 * Don't advertise Instruction Based Sampling feature.
182 			 */
183 			regs[2] &= ~AMDID2_IBS;
184 
185 			/* NodeID MSR not available */
186 			regs[2] &= ~AMDID2_NODE_ID;
187 
188 			/* Don't advertise the OS visible workaround feature */
189 			regs[2] &= ~AMDID2_OSVW;
190 
191 			/* Hide mwaitx/monitorx capability from the guest */
192 			regs[2] &= ~AMDID2_MWAITX;
193 
194 			/* Advertise RDTSCP if it is enabled. */
195 			error = vm_get_capability(vcpu,
196 			    VM_CAP_RDTSCP, &enable_rdtscp);
197 			if (error == 0 && enable_rdtscp)
198 				regs[3] |= AMDID_RDTSCP;
199 			else
200 				regs[3] &= ~AMDID_RDTSCP;
201 			break;
202 
203 		case CPUID_8000_0007:
204 			/*
205 			 * AMD uses this leaf to advertise the processor's
206 			 * power monitoring and RAS capabilities. These
207 			 * features are hardware-specific and exposing
208 			 * them to a guest doesn't make a lot of sense.
209 			 *
210 			 * Intel uses this leaf only to advertise the
211 			 * "Invariant TSC" feature with all other bits
212 			 * being reserved (set to zero).
213 			 */
214 			regs[0] = 0;
215 			regs[1] = 0;
216 			regs[2] = 0;
217 			regs[3] = 0;
218 
219 			/*
220 			 * "Invariant TSC" can be advertised to the guest if:
221 			 * - host TSC frequency is invariant
222 			 * - host TSCs are synchronized across physical cpus
223 			 *
224 			 * XXX This still falls short because the vcpu
225 			 * can observe the TSC moving backwards as it
226 			 * migrates across physical cpus. But at least
227 			 * it should discourage the guest from using the
228 			 * TSC to keep track of time.
229 			 */
230 			if (tsc_is_invariant && smp_tsc)
231 				regs[3] |= AMDPM_TSC_INVARIANT;
232 			break;
233 
234 		case CPUID_8000_001D:
235 			/* AMD Cache topology, like 0000_0004 for Intel. */
236 			if (!vmm_is_svm())
237 				goto default_leaf;
238 
239 			/*
240 			 * Similar to Intel, generate a ficticious cache
241 			 * topology for the guest with L3 shared by the
242 			 * package, and L1 and L2 local to a core.
243 			 */
244 			vm_get_topology(vm, &sockets, &cores, &threads,
245 			    &maxcpus);
246 			switch (param) {
247 			case 0:
248 				logical_cpus = threads;
249 				level = 1;
250 				func = 1;	/* data cache */
251 				break;
252 			case 1:
253 				logical_cpus = threads;
254 				level = 2;
255 				func = 3;	/* unified cache */
256 				break;
257 			case 2:
258 				logical_cpus = threads * cores;
259 				level = 3;
260 				func = 3;	/* unified cache */
261 				break;
262 			default:
263 				logical_cpus = 0;
264 				level = 0;
265 				func = 0;
266 				break;
267 			}
268 
269 			logical_cpus = MIN(0xfff, logical_cpus - 1);
270 			regs[0] = (logical_cpus << 14) | (1 << 8) |
271 			    (level << 5) | func;
272 			regs[1] = (func > 0) ? (CACHE_LINE_SIZE - 1) : 0;
273 			regs[2] = 0;
274 			regs[3] = 0;
275 			break;
276 
277 		case CPUID_8000_001E:
278 			/*
279 			 * AMD Family 16h+ and Hygon Family 18h additional
280 			 * identifiers.
281 			 */
282 			if (!vmm_is_svm() || CPUID_TO_FAMILY(cpu_id) < 0x16)
283 				goto default_leaf;
284 
285 			vm_get_topology(vm, &sockets, &cores, &threads,
286 			    &maxcpus);
287 			regs[0] = vcpu_id;
288 			threads = MIN(0xFF, threads - 1);
289 			regs[1] = (threads << 8) |
290 			    (vcpu_id >> log2(threads + 1));
291 			/*
292 			 * XXX Bhyve topology cannot yet represent >1 node per
293 			 * processor.
294 			 */
295 			regs[2] = 0;
296 			regs[3] = 0;
297 			break;
298 
299 		case CPUID_0000_0001:
300 			do_cpuid(1, regs);
301 
302 			error = vm_get_x2apic_state(vcpu, &x2apic_state);
303 			if (error) {
304 				panic("x86_emulate_cpuid: error %d "
305 				      "fetching x2apic state", error);
306 			}
307 
308 			/*
309 			 * Override the APIC ID only in ebx
310 			 */
311 			regs[1] &= ~(CPUID_LOCAL_APIC_ID);
312 			regs[1] |= (vcpu_id << CPUID_0000_0001_APICID_SHIFT);
313 
314 			/*
315 			 * Don't expose VMX, SpeedStep, TME or SMX capability.
316 			 * Advertise x2APIC capability and Hypervisor guest.
317 			 */
318 			regs[2] &= ~(CPUID2_VMX | CPUID2_EST | CPUID2_TM2);
319 			regs[2] &= ~(CPUID2_SMX);
320 
321 			regs[2] |= CPUID2_HV;
322 
323 			if (x2apic_state != X2APIC_DISABLED)
324 				regs[2] |= CPUID2_X2APIC;
325 			else
326 				regs[2] &= ~CPUID2_X2APIC;
327 
328 			/*
329 			 * Only advertise CPUID2_XSAVE in the guest if
330 			 * the host is using XSAVE.
331 			 */
332 			if (!(regs[2] & CPUID2_OSXSAVE))
333 				regs[2] &= ~CPUID2_XSAVE;
334 
335 			/*
336 			 * If CPUID2_XSAVE is being advertised and the
337 			 * guest has set CR4_XSAVE, set
338 			 * CPUID2_OSXSAVE.
339 			 */
340 			regs[2] &= ~CPUID2_OSXSAVE;
341 			if (regs[2] & CPUID2_XSAVE) {
342 				error = vm_get_register(vcpu,
343 				    VM_REG_GUEST_CR4, &cr4);
344 				if (error)
345 					panic("x86_emulate_cpuid: error %d "
346 					      "fetching %%cr4", error);
347 				if (cr4 & CR4_XSAVE)
348 					regs[2] |= CPUID2_OSXSAVE;
349 			}
350 
351 			/*
352 			 * Hide monitor/mwait until we know how to deal with
353 			 * these instructions.
354 			 */
355 			regs[2] &= ~CPUID2_MON;
356 
357                         /*
358 			 * Hide the performance and debug features.
359 			 */
360 			regs[2] &= ~CPUID2_PDCM;
361 
362 			/*
363 			 * No TSC deadline support in the APIC yet
364 			 */
365 			regs[2] &= ~CPUID2_TSCDLT;
366 
367 			/*
368 			 * Hide thermal monitoring
369 			 */
370 			regs[3] &= ~(CPUID_ACPI | CPUID_TM);
371 
372 			/*
373 			 * Hide the debug store capability.
374 			 */
375 			regs[3] &= ~CPUID_DS;
376 
377 			/*
378 			 * Advertise the Machine Check and MTRR capability.
379 			 *
380 			 * Some guest OSes (e.g. Windows) will not boot if
381 			 * these features are absent.
382 			 */
383 			regs[3] |= (CPUID_MCA | CPUID_MCE | CPUID_MTRR);
384 
385 			vm_get_topology(vm, &sockets, &cores, &threads,
386 			    &maxcpus);
387 			logical_cpus = threads * cores;
388 			regs[1] &= ~CPUID_HTT_CORES;
389 			regs[1] |= (logical_cpus & 0xff) << 16;
390 			regs[3] |= CPUID_HTT;
391 			break;
392 
393 		case CPUID_0000_0004:
394 			cpuid_count(func, param, regs);
395 
396 			if (regs[0] || regs[1] || regs[2] || regs[3]) {
397 				vm_get_topology(vm, &sockets, &cores, &threads,
398 				    &maxcpus);
399 				regs[0] &= 0x3ff;
400 				regs[0] |= (cores - 1) << 26;
401 				/*
402 				 * Cache topology:
403 				 * - L1 and L2 are shared only by the logical
404 				 *   processors in a single core.
405 				 * - L3 and above are shared by all logical
406 				 *   processors in the package.
407 				 */
408 				logical_cpus = threads;
409 				level = (regs[0] >> 5) & 0x7;
410 				if (level >= 3)
411 					logical_cpus *= cores;
412 				regs[0] |= (logical_cpus - 1) << 14;
413 			}
414 			break;
415 
416 		case CPUID_0000_0007:
417 			regs[0] = 0;
418 			regs[1] = 0;
419 			regs[2] = 0;
420 			regs[3] = 0;
421 
422 			/* leaf 0 */
423 			if (param == 0) {
424 				cpuid_count(func, param, regs);
425 
426 				/* Only leaf 0 is supported */
427 				regs[0] = 0;
428 
429 				/*
430 				 * Expose known-safe features.
431 				 */
432 				regs[1] &= CPUID_STDEXT_FSGSBASE |
433 				    CPUID_STDEXT_BMI1 | CPUID_STDEXT_HLE |
434 				    CPUID_STDEXT_AVX2 | CPUID_STDEXT_SMEP |
435 				    CPUID_STDEXT_BMI2 |
436 				    CPUID_STDEXT_ERMS | CPUID_STDEXT_RTM |
437 				    CPUID_STDEXT_AVX512F |
438 				    CPUID_STDEXT_AVX512DQ |
439 				    CPUID_STDEXT_RDSEED |
440 				    CPUID_STDEXT_SMAP |
441 				    CPUID_STDEXT_AVX512PF |
442 				    CPUID_STDEXT_AVX512ER |
443 				    CPUID_STDEXT_AVX512CD | CPUID_STDEXT_SHA |
444 				    CPUID_STDEXT_AVX512BW |
445 				    CPUID_STDEXT_AVX512VL;
446 				regs[2] &= CPUID_STDEXT2_VAES |
447 				    CPUID_STDEXT2_VPCLMULQDQ;
448 				regs[3] &= CPUID_STDEXT3_MD_CLEAR;
449 
450 				/* Advertise RDPID if it is enabled. */
451 				error = vm_get_capability(vcpu, VM_CAP_RDPID,
452 				    &enable_rdpid);
453 				if (error == 0 && enable_rdpid)
454 					regs[2] |= CPUID_STDEXT2_RDPID;
455 
456 				/* Advertise INVPCID if it is enabled. */
457 				error = vm_get_capability(vcpu,
458 				    VM_CAP_ENABLE_INVPCID, &enable_invpcid);
459 				if (error == 0 && enable_invpcid)
460 					regs[1] |= CPUID_STDEXT_INVPCID;
461 			}
462 			break;
463 
464 		case CPUID_0000_0006:
465 			regs[0] = CPUTPM1_ARAT;
466 			regs[1] = 0;
467 			regs[2] = 0;
468 			regs[3] = 0;
469 			break;
470 
471 		case CPUID_0000_000A:
472 			/*
473 			 * Handle the access, but report 0 for
474 			 * all options
475 			 */
476 			regs[0] = 0;
477 			regs[1] = 0;
478 			regs[2] = 0;
479 			regs[3] = 0;
480 			break;
481 
482 		case CPUID_0000_000B:
483 			/*
484 			 * Intel processor topology enumeration
485 			 */
486 			if (vmm_is_intel()) {
487 				vm_get_topology(vm, &sockets, &cores, &threads,
488 				    &maxcpus);
489 				if (param == 0) {
490 					logical_cpus = threads;
491 					width = log2(logical_cpus);
492 					level = CPUID_TYPE_SMT;
493 					x2apic_id = vcpu_id;
494 				}
495 
496 				if (param == 1) {
497 					logical_cpus = threads * cores;
498 					width = log2(logical_cpus);
499 					level = CPUID_TYPE_CORE;
500 					x2apic_id = vcpu_id;
501 				}
502 
503 				if (!cpuid_leaf_b || param >= 2) {
504 					width = 0;
505 					logical_cpus = 0;
506 					level = 0;
507 					x2apic_id = 0;
508 				}
509 
510 				regs[0] = width & 0x1f;
511 				regs[1] = logical_cpus & 0xffff;
512 				regs[2] = (level << 8) | (param & 0xff);
513 				regs[3] = x2apic_id;
514 			} else {
515 				regs[0] = 0;
516 				regs[1] = 0;
517 				regs[2] = 0;
518 				regs[3] = 0;
519 			}
520 			break;
521 
522 		case CPUID_0000_000D:
523 			limits = vmm_get_xsave_limits();
524 			if (!limits->xsave_enabled) {
525 				regs[0] = 0;
526 				regs[1] = 0;
527 				regs[2] = 0;
528 				regs[3] = 0;
529 				break;
530 			}
531 
532 			cpuid_count(func, param, regs);
533 			switch (param) {
534 			case 0:
535 				/*
536 				 * Only permit the guest to use bits
537 				 * that are active in the host in
538 				 * %xcr0.  Also, claim that the
539 				 * maximum save area size is
540 				 * equivalent to the host's current
541 				 * save area size.  Since this runs
542 				 * "inside" of vmrun(), it runs with
543 				 * the guest's xcr0, so the current
544 				 * save area size is correct as-is.
545 				 */
546 				regs[0] &= limits->xcr0_allowed;
547 				regs[2] = limits->xsave_max_size;
548 				regs[3] &= (limits->xcr0_allowed >> 32);
549 				break;
550 			case 1:
551 				/* Only permit XSAVEOPT. */
552 				regs[0] &= CPUID_EXTSTATE_XSAVEOPT;
553 				regs[1] = 0;
554 				regs[2] = 0;
555 				regs[3] = 0;
556 				break;
557 			default:
558 				/*
559 				 * If the leaf is for a permitted feature,
560 				 * pass through as-is, otherwise return
561 				 * all zeroes.
562 				 */
563 				if (!(limits->xcr0_allowed & (1ul << param))) {
564 					regs[0] = 0;
565 					regs[1] = 0;
566 					regs[2] = 0;
567 					regs[3] = 0;
568 				}
569 				break;
570 			}
571 			break;
572 
573 		case CPUID_0000_000F:
574 		case CPUID_0000_0010:
575 			/*
576 			 * Do not report any Resource Director Technology
577 			 * capabilities.  Exposing control of cache or memory
578 			 * controller resource partitioning to the guest is not
579 			 * at all sensible.
580 			 *
581 			 * This is already hidden at a high level by masking of
582 			 * leaf 0x7.  Even still, a guest may look here for
583 			 * detailed capability information.
584 			 */
585 			regs[0] = 0;
586 			regs[1] = 0;
587 			regs[2] = 0;
588 			regs[3] = 0;
589 			break;
590 
591 		case CPUID_0000_0015:
592 			/*
593 			 * Don't report CPU TSC/Crystal ratio and clock
594 			 * values since guests may use these to derive the
595 			 * local APIC frequency..
596 			 */
597 			regs[0] = 0;
598 			regs[1] = 0;
599 			regs[2] = 0;
600 			regs[3] = 0;
601 			break;
602 
603 		case 0x40000000:
604 			regs[0] = CPUID_VM_HIGH;
605 			bcopy(bhyve_id, &regs[1], 4);
606 			bcopy(bhyve_id + 4, &regs[2], 4);
607 			bcopy(bhyve_id + 8, &regs[3], 4);
608 			break;
609 
610 		default:
611 default_leaf:
612 			/*
613 			 * The leaf value has already been clamped so
614 			 * simply pass this through, keeping count of
615 			 * how many unhandled leaf values have been seen.
616 			 */
617 			atomic_add_long(&bhyve_xcpuids, 1);
618 			cpuid_count(func, param, regs);
619 			break;
620 	}
621 
622 	/*
623 	 * CPUID clears the upper 32-bits of the long-mode registers.
624 	 */
625 	*rax = regs[0];
626 	*rbx = regs[1];
627 	*rcx = regs[2];
628 	*rdx = regs[3];
629 
630 	return (1);
631 }
632 
633 bool
634 vm_cpuid_capability(struct vcpu *vcpu, enum vm_cpuid_capability cap)
635 {
636 	bool rv;
637 
638 	KASSERT(cap > 0 && cap < VCC_LAST, ("%s: invalid vm_cpu_capability %d",
639 	    __func__, cap));
640 
641 	/*
642 	 * Simply passthrough the capabilities of the host cpu for now.
643 	 */
644 	rv = false;
645 	switch (cap) {
646 	case VCC_NO_EXECUTE:
647 		if (amd_feature & AMDID_NX)
648 			rv = true;
649 		break;
650 	case VCC_FFXSR:
651 		if (amd_feature & AMDID_FFXSR)
652 			rv = true;
653 		break;
654 	case VCC_TCE:
655 		if (amd_feature2 & AMDID2_TCE)
656 			rv = true;
657 		break;
658 	default:
659 		panic("%s: unknown vm_cpu_capability %d", __func__, cap);
660 	}
661 	return (rv);
662 }
663 
664 int
665 vm_rdmtrr(struct vm_mtrr *mtrr, u_int num, uint64_t *val)
666 {
667 	switch (num) {
668 	case MSR_MTRRcap:
669 		*val = MTRR_CAP_WC | MTRR_CAP_FIXED | VMM_MTRR_VAR_MAX;
670 		break;
671 	case MSR_MTRRdefType:
672 		*val = mtrr->def_type;
673 		break;
674 	case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7:
675 		*val = mtrr->fixed4k[num - MSR_MTRR4kBase];
676 		break;
677 	case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
678 		*val = mtrr->fixed16k[num - MSR_MTRR16kBase];
679 		break;
680 	case MSR_MTRR64kBase:
681 		*val = mtrr->fixed64k;
682 		break;
683 	case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: {
684 		u_int offset = num - MSR_MTRRVarBase;
685 		if (offset % 2 == 0) {
686 			*val = mtrr->var[offset / 2].base;
687 		} else {
688 			*val = mtrr->var[offset / 2].mask;
689 		}
690 		break;
691 	}
692 	default:
693 		return (-1);
694 	}
695 
696 	return (0);
697 }
698 
699 int
700 vm_wrmtrr(struct vm_mtrr *mtrr, u_int num, uint64_t val)
701 {
702 	switch (num) {
703 	case MSR_MTRRcap:
704 		/* MTRRCAP is read only */
705 		return (-1);
706 	case MSR_MTRRdefType:
707 		if (val & ~VMM_MTRR_DEF_MASK) {
708 			/* generate #GP on writes to reserved fields */
709 			return (-1);
710 		}
711 		mtrr->def_type = val;
712 		break;
713 	case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7:
714 		mtrr->fixed4k[num - MSR_MTRR4kBase] = val;
715 		break;
716 	case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
717 		mtrr->fixed16k[num - MSR_MTRR16kBase] = val;
718 		break;
719 	case MSR_MTRR64kBase:
720 		mtrr->fixed64k = val;
721 		break;
722 	case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: {
723 		u_int offset = num - MSR_MTRRVarBase;
724 		if (offset % 2 == 0) {
725 			if (val & ~VMM_MTRR_PHYSBASE_MASK) {
726 				/* generate #GP on writes to reserved fields */
727 				return (-1);
728 			}
729 			mtrr->var[offset / 2].base = val;
730 		} else {
731 			if (val & ~VMM_MTRR_PHYSMASK_MASK) {
732 				/* generate #GP on writes to reserved fields */
733 				return (-1);
734 			}
735 			mtrr->var[offset / 2].mask = val;
736 		}
737 		break;
738 	}
739 	default:
740 		return (-1);
741 	}
742 
743 	return (0);
744 }
745