xref: /freebsd/sys/amd64/vmm/x86.c (revision 0698ce429f78f548f7eb3e54476fb312109ddd8b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/pcpu.h>
31 #include <sys/systm.h>
32 #include <sys/sysctl.h>
33 
34 #include <machine/clock.h>
35 #include <machine/cpufunc.h>
36 #include <machine/md_var.h>
37 #include <machine/segments.h>
38 #include <machine/specialreg.h>
39 #include <machine/vmm.h>
40 
41 #include <dev/vmm/vmm_ktr.h>
42 
43 #include "vmm_host.h"
44 #include "vmm_util.h"
45 #include "x86.h"
46 
47 SYSCTL_DECL(_hw_vmm);
48 static SYSCTL_NODE(_hw_vmm, OID_AUTO, topology, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
49     NULL);
50 
51 #define	CPUID_VM_HIGH		0x40000000
52 
53 static const char bhyve_id[12] = "bhyve bhyve ";
54 
55 static uint64_t bhyve_xcpuids;
56 SYSCTL_ULONG(_hw_vmm, OID_AUTO, bhyve_xcpuids, CTLFLAG_RW, &bhyve_xcpuids, 0,
57     "Number of times an unknown cpuid leaf was accessed");
58 
59 static int cpuid_leaf_b = 1;
60 SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN,
61     &cpuid_leaf_b, 0, NULL);
62 
63 /*
64  * Compute ceil(log2(x)).  Returns -1 if x is zero.
65  */
66 static __inline int
log2(u_int x)67 log2(u_int x)
68 {
69 
70 	return (x == 0 ? -1 : order_base_2(x));
71 }
72 
73 int
x86_emulate_cpuid(struct vcpu * vcpu,uint64_t * rax,uint64_t * rbx,uint64_t * rcx,uint64_t * rdx)74 x86_emulate_cpuid(struct vcpu *vcpu, uint64_t *rax, uint64_t *rbx,
75     uint64_t *rcx, uint64_t *rdx)
76 {
77 	struct vm *vm = vcpu_vm(vcpu);
78 	int vcpu_id = vcpu_vcpuid(vcpu);
79 	const struct xsave_limits *limits;
80 	uint64_t cr4;
81 	int error, enable_invpcid, enable_rdpid, enable_rdtscp, level,
82 	    width, x2apic_id;
83 	unsigned int func, regs[4], logical_cpus, param;
84 	enum x2apic_state x2apic_state;
85 	uint16_t cores, maxcpus, sockets, threads;
86 
87 	/*
88 	 * The function of CPUID is controlled through the provided value of
89 	 * %eax (and secondarily %ecx, for certain leaf data).
90 	 */
91 	func = (uint32_t)*rax;
92 	param = (uint32_t)*rcx;
93 
94 	VCPU_CTR2(vm, vcpu_id, "cpuid %#x,%#x", func, param);
95 
96 	/*
97 	 * Requests for invalid CPUID levels should map to the highest
98 	 * available level instead.
99 	 */
100 	if (cpu_exthigh != 0 && func >= 0x80000000) {
101 		if (func > cpu_exthigh)
102 			func = cpu_exthigh;
103 	} else if (func >= 0x40000000) {
104 		if (func > CPUID_VM_HIGH)
105 			func = CPUID_VM_HIGH;
106 	} else if (func > cpu_high) {
107 		func = cpu_high;
108 	}
109 
110 	/*
111 	 * In general the approach used for CPU topology is to
112 	 * advertise a flat topology where all CPUs are packages with
113 	 * no multi-core or SMT.
114 	 */
115 	switch (func) {
116 		/*
117 		 * Pass these through to the guest
118 		 */
119 		case CPUID_0000_0000:
120 		case CPUID_0000_0002:
121 		case CPUID_0000_0003:
122 		case CPUID_8000_0000:
123 		case CPUID_8000_0002:
124 		case CPUID_8000_0003:
125 		case CPUID_8000_0004:
126 		case CPUID_8000_0006:
127 			cpuid_count(func, param, regs);
128 			break;
129 		case CPUID_8000_0008:
130 			cpuid_count(func, param, regs);
131 			if (vmm_is_svm()) {
132 				/*
133 				 * As on Intel (0000_0007:0, EDX), mask out
134 				 * unsupported or unsafe AMD extended features
135 				 * (8000_0008 EBX).
136 				 */
137 				regs[1] &= (AMDFEID_CLZERO | AMDFEID_IRPERF |
138 				    AMDFEID_XSAVEERPTR);
139 
140 				vm_get_topology(vm, &sockets, &cores, &threads,
141 				    &maxcpus);
142 				/*
143 				 * Here, width is ApicIdCoreIdSize, present on
144 				 * at least Family 15h and newer.  It
145 				 * represents the "number of bits in the
146 				 * initial apicid that indicate thread id
147 				 * within a package."
148 				 *
149 				 * Our topo_probe_amd() uses it for
150 				 * pkg_id_shift and other OSes may rely on it.
151 				 */
152 				width = MIN(0xF, log2(threads * cores));
153 				logical_cpus = MIN(0xFF, threads * cores - 1);
154 				regs[2] = (width << AMDID_COREID_SIZE_SHIFT) | logical_cpus;
155 			}
156 			break;
157 
158 		case CPUID_8000_0001:
159 			cpuid_count(func, param, regs);
160 
161 			/*
162 			 * Hide SVM from guest.
163 			 */
164 			regs[2] &= ~AMDID2_SVM;
165 
166 			/*
167 			 * Don't advertise extended performance counter MSRs
168 			 * to the guest.
169 			 */
170 			regs[2] &= ~AMDID2_PCXC;
171 			regs[2] &= ~AMDID2_PNXC;
172 			regs[2] &= ~AMDID2_PTSCEL2I;
173 
174 			/*
175 			 * Don't advertise Instruction Based Sampling feature.
176 			 */
177 			regs[2] &= ~AMDID2_IBS;
178 
179 			/* NodeID MSR not available */
180 			regs[2] &= ~AMDID2_NODE_ID;
181 
182 			/* Don't advertise the OS visible workaround feature */
183 			regs[2] &= ~AMDID2_OSVW;
184 
185 			/* Hide mwaitx/monitorx capability from the guest */
186 			regs[2] &= ~AMDID2_MWAITX;
187 
188 			/* Advertise RDTSCP if it is enabled. */
189 			error = vm_get_capability(vcpu,
190 			    VM_CAP_RDTSCP, &enable_rdtscp);
191 			if (error == 0 && enable_rdtscp)
192 				regs[3] |= AMDID_RDTSCP;
193 			else
194 				regs[3] &= ~AMDID_RDTSCP;
195 			break;
196 
197 		case CPUID_8000_0007:
198 			/*
199 			 * AMD uses this leaf to advertise the processor's
200 			 * power monitoring and RAS capabilities. These
201 			 * features are hardware-specific and exposing
202 			 * them to a guest doesn't make a lot of sense.
203 			 *
204 			 * Intel uses this leaf only to advertise the
205 			 * "Invariant TSC" feature with all other bits
206 			 * being reserved (set to zero).
207 			 */
208 			regs[0] = 0;
209 			regs[1] = 0;
210 			regs[2] = 0;
211 			regs[3] = 0;
212 
213 			/*
214 			 * "Invariant TSC" can be advertised to the guest if:
215 			 * - host TSC frequency is invariant
216 			 * - host TSCs are synchronized across physical cpus
217 			 *
218 			 * XXX This still falls short because the vcpu
219 			 * can observe the TSC moving backwards as it
220 			 * migrates across physical cpus. But at least
221 			 * it should discourage the guest from using the
222 			 * TSC to keep track of time.
223 			 */
224 			if (tsc_is_invariant && smp_tsc)
225 				regs[3] |= AMDPM_TSC_INVARIANT;
226 			break;
227 
228 		case CPUID_8000_001D:
229 			/* AMD Cache topology, like 0000_0004 for Intel. */
230 			if (!vmm_is_svm())
231 				goto default_leaf;
232 
233 			/*
234 			 * Similar to Intel, generate a fictitious cache
235 			 * topology for the guest with L3 shared by the
236 			 * package, and L1 and L2 local to a core.
237 			 */
238 			vm_get_topology(vm, &sockets, &cores, &threads,
239 			    &maxcpus);
240 			switch (param) {
241 			case 0:
242 				logical_cpus = threads;
243 				level = 1;
244 				func = 1;	/* data cache */
245 				break;
246 			case 1:
247 				logical_cpus = threads;
248 				level = 2;
249 				func = 3;	/* unified cache */
250 				break;
251 			case 2:
252 				logical_cpus = threads * cores;
253 				level = 3;
254 				func = 3;	/* unified cache */
255 				break;
256 			default:
257 				logical_cpus = sockets * threads * cores;
258 				level = 0;
259 				func = 0;
260 				break;
261 			}
262 
263 			logical_cpus = MIN(0xfff, logical_cpus - 1);
264 			regs[0] = (logical_cpus << 14) | (1 << 8) |
265 			    (level << 5) | func;
266 			regs[1] = (func > 0) ? (CACHE_LINE_SIZE - 1) : 0;
267 
268 			/*
269 			 * ecx: Number of cache ways for non-fully
270 			 * associative cache, minus 1.  Reported value
271 			 * of zero means there is one way.
272 			 */
273 			regs[2] = 0;
274 
275 			regs[3] = 0;
276 			break;
277 
278 		case CPUID_8000_001E:
279 			/*
280 			 * AMD Family 16h+ and Hygon Family 18h additional
281 			 * identifiers.
282 			 */
283 			if (!vmm_is_svm() || CPUID_TO_FAMILY(cpu_id) < 0x16)
284 				goto default_leaf;
285 
286 			vm_get_topology(vm, &sockets, &cores, &threads,
287 			    &maxcpus);
288 			regs[0] = vcpu_id;
289 			threads = MIN(0xFF, threads - 1);
290 			regs[1] = (threads << 8) |
291 			    (vcpu_id >> log2(threads + 1));
292 			/*
293 			 * XXX Bhyve topology cannot yet represent >1 node per
294 			 * processor.
295 			 */
296 			regs[2] = 0;
297 			regs[3] = 0;
298 			break;
299 
300 		case CPUID_0000_0001:
301 			do_cpuid(1, regs);
302 
303 			error = vm_get_x2apic_state(vcpu, &x2apic_state);
304 			if (error) {
305 				panic("x86_emulate_cpuid: error %d "
306 				      "fetching x2apic state", error);
307 			}
308 
309 			/*
310 			 * Override the APIC ID only in ebx
311 			 */
312 			regs[1] &= ~(CPUID_LOCAL_APIC_ID);
313 			regs[1] |= (vcpu_id << CPUID_0000_0001_APICID_SHIFT);
314 
315 			/*
316 			 * Don't expose VMX, SpeedStep, TME or SMX capability.
317 			 * Advertise x2APIC capability and Hypervisor guest.
318 			 */
319 			regs[2] &= ~(CPUID2_VMX | CPUID2_EST | CPUID2_TM2);
320 			regs[2] &= ~(CPUID2_SMX);
321 
322 			regs[2] |= CPUID2_HV;
323 
324 			if (x2apic_state != X2APIC_DISABLED)
325 				regs[2] |= CPUID2_X2APIC;
326 			else
327 				regs[2] &= ~CPUID2_X2APIC;
328 
329 			/*
330 			 * Only advertise CPUID2_XSAVE in the guest if
331 			 * the host is using XSAVE.
332 			 */
333 			if (!(regs[2] & CPUID2_OSXSAVE))
334 				regs[2] &= ~CPUID2_XSAVE;
335 
336 			/*
337 			 * If CPUID2_XSAVE is being advertised and the
338 			 * guest has set CR4_XSAVE, set
339 			 * CPUID2_OSXSAVE.
340 			 */
341 			regs[2] &= ~CPUID2_OSXSAVE;
342 			if (regs[2] & CPUID2_XSAVE) {
343 				error = vm_get_register(vcpu,
344 				    VM_REG_GUEST_CR4, &cr4);
345 				if (error)
346 					panic("x86_emulate_cpuid: error %d "
347 					      "fetching %%cr4", error);
348 				if (cr4 & CR4_XSAVE)
349 					regs[2] |= CPUID2_OSXSAVE;
350 			}
351 
352 			/*
353 			 * Hide monitor/mwait until we know how to deal with
354 			 * these instructions.
355 			 */
356 			regs[2] &= ~CPUID2_MON;
357 
358                         /*
359 			 * Hide the performance and debug features.
360 			 */
361 			regs[2] &= ~CPUID2_PDCM;
362 
363 			/*
364 			 * No TSC deadline support in the APIC yet
365 			 */
366 			regs[2] &= ~CPUID2_TSCDLT;
367 
368 			/*
369 			 * Hide thermal monitoring
370 			 */
371 			regs[3] &= ~(CPUID_ACPI | CPUID_TM);
372 
373 			/*
374 			 * Hide the debug store capability.
375 			 */
376 			regs[3] &= ~CPUID_DS;
377 
378 			/*
379 			 * Advertise the Machine Check and MTRR capability.
380 			 *
381 			 * Some guest OSes (e.g. Windows) will not boot if
382 			 * these features are absent.
383 			 */
384 			regs[3] |= (CPUID_MCA | CPUID_MCE | CPUID_MTRR);
385 
386 			vm_get_topology(vm, &sockets, &cores, &threads,
387 			    &maxcpus);
388 			logical_cpus = threads * cores;
389 			regs[1] &= ~CPUID_HTT_CORES;
390 			regs[1] |= (logical_cpus & 0xff) << 16;
391 			regs[3] |= CPUID_HTT;
392 			break;
393 
394 		case CPUID_0000_0004:
395 			cpuid_count(func, param, regs);
396 
397 			if (regs[0] || regs[1] || regs[2] || regs[3]) {
398 				vm_get_topology(vm, &sockets, &cores, &threads,
399 				    &maxcpus);
400 				regs[0] &= 0x3ff;
401 				regs[0] |= (cores - 1) << 26;
402 				/*
403 				 * Cache topology:
404 				 * - L1 and L2 are shared only by the logical
405 				 *   processors in a single core.
406 				 * - L3 and above are shared by all logical
407 				 *   processors in the package.
408 				 */
409 				logical_cpus = threads;
410 				level = (regs[0] >> 5) & 0x7;
411 				if (level >= 3)
412 					logical_cpus *= cores;
413 				regs[0] |= (logical_cpus - 1) << 14;
414 			}
415 			break;
416 
417 		case CPUID_0000_0007:
418 			regs[0] = 0;
419 			regs[1] = 0;
420 			regs[2] = 0;
421 			regs[3] = 0;
422 
423 			/* leaf 0 */
424 			if (param == 0) {
425 				cpuid_count(func, param, regs);
426 
427 				/* Only leaf 0 is supported */
428 				regs[0] = 0;
429 
430 				/*
431 				 * Expose known-safe features.
432 				 */
433 				regs[1] &= CPUID_STDEXT_FSGSBASE |
434 				    CPUID_STDEXT_BMI1 | CPUID_STDEXT_HLE |
435 				    CPUID_STDEXT_AVX2 | CPUID_STDEXT_SMEP |
436 				    CPUID_STDEXT_BMI2 |
437 				    CPUID_STDEXT_ERMS | CPUID_STDEXT_RTM |
438 				    CPUID_STDEXT_AVX512F |
439 				    CPUID_STDEXT_AVX512DQ |
440 				    CPUID_STDEXT_RDSEED |
441 				    CPUID_STDEXT_SMAP |
442 				    CPUID_STDEXT_AVX512PF |
443 				    CPUID_STDEXT_AVX512ER |
444 				    CPUID_STDEXT_AVX512CD | CPUID_STDEXT_SHA |
445 				    CPUID_STDEXT_AVX512BW |
446 				    CPUID_STDEXT_AVX512VL;
447 				regs[2] &= CPUID_STDEXT2_VAES |
448 				    CPUID_STDEXT2_VPCLMULQDQ;
449 				regs[3] &= CPUID_STDEXT3_MD_CLEAR;
450 
451 				/* Advertise RDPID if it is enabled. */
452 				error = vm_get_capability(vcpu, VM_CAP_RDPID,
453 				    &enable_rdpid);
454 				if (error == 0 && enable_rdpid)
455 					regs[2] |= CPUID_STDEXT2_RDPID;
456 
457 				/* Advertise INVPCID if it is enabled. */
458 				error = vm_get_capability(vcpu,
459 				    VM_CAP_ENABLE_INVPCID, &enable_invpcid);
460 				if (error == 0 && enable_invpcid)
461 					regs[1] |= CPUID_STDEXT_INVPCID;
462 			}
463 			break;
464 
465 		case CPUID_0000_0006:
466 			regs[0] = CPUTPM1_ARAT;
467 			regs[1] = 0;
468 			regs[2] = 0;
469 			regs[3] = 0;
470 			break;
471 
472 		case CPUID_0000_000A:
473 			/*
474 			 * Handle the access, but report 0 for
475 			 * all options
476 			 */
477 			regs[0] = 0;
478 			regs[1] = 0;
479 			regs[2] = 0;
480 			regs[3] = 0;
481 			break;
482 
483 		case CPUID_0000_000B:
484 			/*
485 			 * Intel processor topology enumeration
486 			 */
487 			if (vmm_is_intel()) {
488 				vm_get_topology(vm, &sockets, &cores, &threads,
489 				    &maxcpus);
490 				if (param == 0) {
491 					logical_cpus = threads;
492 					width = log2(logical_cpus);
493 					level = CPUID_TYPE_SMT;
494 					x2apic_id = vcpu_id;
495 				}
496 
497 				if (param == 1) {
498 					logical_cpus = threads * cores;
499 					width = log2(logical_cpus);
500 					level = CPUID_TYPE_CORE;
501 					x2apic_id = vcpu_id;
502 				}
503 
504 				if (!cpuid_leaf_b || param >= 2) {
505 					width = 0;
506 					logical_cpus = 0;
507 					level = 0;
508 					x2apic_id = 0;
509 				}
510 
511 				regs[0] = width & 0x1f;
512 				regs[1] = logical_cpus & 0xffff;
513 				regs[2] = (level << 8) | (param & 0xff);
514 				regs[3] = x2apic_id;
515 			} else {
516 				regs[0] = 0;
517 				regs[1] = 0;
518 				regs[2] = 0;
519 				regs[3] = 0;
520 			}
521 			break;
522 
523 		case CPUID_0000_000D:
524 			limits = vmm_get_xsave_limits();
525 			if (!limits->xsave_enabled) {
526 				regs[0] = 0;
527 				regs[1] = 0;
528 				regs[2] = 0;
529 				regs[3] = 0;
530 				break;
531 			}
532 
533 			cpuid_count(func, param, regs);
534 			switch (param) {
535 			case 0:
536 				/*
537 				 * Only permit the guest to use bits
538 				 * that are active in the host in
539 				 * %xcr0.  Also, claim that the
540 				 * maximum save area size is
541 				 * equivalent to the host's current
542 				 * save area size.  Since this runs
543 				 * "inside" of vmrun(), it runs with
544 				 * the guest's xcr0, so the current
545 				 * save area size is correct as-is.
546 				 */
547 				regs[0] &= limits->xcr0_allowed;
548 				regs[2] = limits->xsave_max_size;
549 				regs[3] &= (limits->xcr0_allowed >> 32);
550 				break;
551 			case 1:
552 				/* Only permit XSAVEOPT. */
553 				regs[0] &= CPUID_EXTSTATE_XSAVEOPT;
554 				regs[1] = 0;
555 				regs[2] = 0;
556 				regs[3] = 0;
557 				break;
558 			default:
559 				/*
560 				 * If the leaf is for a permitted feature,
561 				 * pass through as-is, otherwise return
562 				 * all zeroes.
563 				 */
564 				if (!(limits->xcr0_allowed & (1ul << param))) {
565 					regs[0] = 0;
566 					regs[1] = 0;
567 					regs[2] = 0;
568 					regs[3] = 0;
569 				}
570 				break;
571 			}
572 			break;
573 
574 		case CPUID_0000_000F:
575 		case CPUID_0000_0010:
576 			/*
577 			 * Do not report any Resource Director Technology
578 			 * capabilities.  Exposing control of cache or memory
579 			 * controller resource partitioning to the guest is not
580 			 * at all sensible.
581 			 *
582 			 * This is already hidden at a high level by masking of
583 			 * leaf 0x7.  Even still, a guest may look here for
584 			 * detailed capability information.
585 			 */
586 			regs[0] = 0;
587 			regs[1] = 0;
588 			regs[2] = 0;
589 			regs[3] = 0;
590 			break;
591 
592 		case CPUID_0000_0015:
593 			/*
594 			 * Don't report CPU TSC/Crystal ratio and clock
595 			 * values since guests may use these to derive the
596 			 * local APIC frequency..
597 			 */
598 			regs[0] = 0;
599 			regs[1] = 0;
600 			regs[2] = 0;
601 			regs[3] = 0;
602 			break;
603 
604 		case 0x40000000:
605 			regs[0] = CPUID_VM_HIGH;
606 			bcopy(bhyve_id, &regs[1], 4);
607 			bcopy(bhyve_id + 4, &regs[2], 4);
608 			bcopy(bhyve_id + 8, &regs[3], 4);
609 			break;
610 
611 		default:
612 default_leaf:
613 			/*
614 			 * The leaf value has already been clamped so
615 			 * simply pass this through, keeping count of
616 			 * how many unhandled leaf values have been seen.
617 			 */
618 			atomic_add_long(&bhyve_xcpuids, 1);
619 			cpuid_count(func, param, regs);
620 			break;
621 	}
622 
623 	/*
624 	 * CPUID clears the upper 32-bits of the long-mode registers.
625 	 */
626 	*rax = regs[0];
627 	*rbx = regs[1];
628 	*rcx = regs[2];
629 	*rdx = regs[3];
630 
631 	return (1);
632 }
633 
634 bool
vm_cpuid_capability(struct vcpu * vcpu,enum vm_cpuid_capability cap)635 vm_cpuid_capability(struct vcpu *vcpu, enum vm_cpuid_capability cap)
636 {
637 	bool rv;
638 
639 	KASSERT(cap > 0 && cap < VCC_LAST, ("%s: invalid vm_cpu_capability %d",
640 	    __func__, cap));
641 
642 	/*
643 	 * Simply passthrough the capabilities of the host cpu for now.
644 	 */
645 	rv = false;
646 	switch (cap) {
647 	case VCC_NO_EXECUTE:
648 		if (amd_feature & AMDID_NX)
649 			rv = true;
650 		break;
651 	case VCC_FFXSR:
652 		if (amd_feature & AMDID_FFXSR)
653 			rv = true;
654 		break;
655 	case VCC_TCE:
656 		if (amd_feature2 & AMDID2_TCE)
657 			rv = true;
658 		break;
659 	default:
660 		panic("%s: unknown vm_cpu_capability %d", __func__, cap);
661 	}
662 	return (rv);
663 }
664 
665 int
vm_rdmtrr(struct vm_mtrr * mtrr,u_int num,uint64_t * val)666 vm_rdmtrr(struct vm_mtrr *mtrr, u_int num, uint64_t *val)
667 {
668 	switch (num) {
669 	case MSR_MTRRcap:
670 		*val = MTRR_CAP_WC | MTRR_CAP_FIXED | VMM_MTRR_VAR_MAX;
671 		break;
672 	case MSR_MTRRdefType:
673 		*val = mtrr->def_type;
674 		break;
675 	case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7:
676 		*val = mtrr->fixed4k[num - MSR_MTRR4kBase];
677 		break;
678 	case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
679 		*val = mtrr->fixed16k[num - MSR_MTRR16kBase];
680 		break;
681 	case MSR_MTRR64kBase:
682 		*val = mtrr->fixed64k;
683 		break;
684 	case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: {
685 		u_int offset = num - MSR_MTRRVarBase;
686 		if (offset % 2 == 0) {
687 			*val = mtrr->var[offset / 2].base;
688 		} else {
689 			*val = mtrr->var[offset / 2].mask;
690 		}
691 		break;
692 	}
693 	default:
694 		return (-1);
695 	}
696 
697 	return (0);
698 }
699 
700 int
vm_wrmtrr(struct vm_mtrr * mtrr,u_int num,uint64_t val)701 vm_wrmtrr(struct vm_mtrr *mtrr, u_int num, uint64_t val)
702 {
703 	switch (num) {
704 	case MSR_MTRRcap:
705 		/* MTRRCAP is read only */
706 		return (-1);
707 	case MSR_MTRRdefType:
708 		if (val & ~VMM_MTRR_DEF_MASK) {
709 			/* generate #GP on writes to reserved fields */
710 			return (-1);
711 		}
712 		mtrr->def_type = val;
713 		break;
714 	case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7:
715 		mtrr->fixed4k[num - MSR_MTRR4kBase] = val;
716 		break;
717 	case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
718 		mtrr->fixed16k[num - MSR_MTRR16kBase] = val;
719 		break;
720 	case MSR_MTRR64kBase:
721 		mtrr->fixed64k = val;
722 		break;
723 	case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: {
724 		u_int offset = num - MSR_MTRRVarBase;
725 		if (offset % 2 == 0) {
726 			if (val & ~VMM_MTRR_PHYSBASE_MASK) {
727 				/* generate #GP on writes to reserved fields */
728 				return (-1);
729 			}
730 			mtrr->var[offset / 2].base = val;
731 		} else {
732 			if (val & ~VMM_MTRR_PHYSMASK_MASK) {
733 				/* generate #GP on writes to reserved fields */
734 				return (-1);
735 			}
736 			mtrr->var[offset / 2].mask = val;
737 		}
738 		break;
739 	}
740 	default:
741 		return (-1);
742 	}
743 
744 	return (0);
745 }
746