xref: /illumos-gate/usr/src/uts/intel/io/vmm/x86.c (revision ba5ca68405ba4441c86a6cfc87f4ddcb3565c81d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 /*
31  * This file and its contents are supplied under the terms of the
32  * Common Development and Distribution License ("CDDL"), version 1.0.
33  * You may only use this file in accordance with the terms of version
34  * 1.0 of the CDDL.
35  *
36  * A full copy of the text of the CDDL should have accompanied this
37  * source.  A copy of the CDDL is also available via the Internet at
38  * http://www.illumos.org/license/CDDL.
39  *
40  * Copyright 2014 Pluribus Networks Inc.
41  * Copyright 2018 Joyent, Inc.
42  * Copyright 2020 Oxide Computer Company
43  */
44 
45 #include <sys/cdefs.h>
46 __FBSDID("$FreeBSD$");
47 
48 #include <sys/param.h>
49 #include <sys/pcpu.h>
50 #include <sys/systm.h>
51 #include <sys/sysctl.h>
52 #include <sys/x86_archext.h>
53 
54 #include <machine/clock.h>
55 #include <machine/cpufunc.h>
56 #include <machine/md_var.h>
57 #include <machine/segments.h>
58 #include <machine/specialreg.h>
59 
60 #include <machine/vmm.h>
61 #include <sys/vmm_kernel.h>
62 
63 #include "vmm_host.h"
64 #include "vmm_util.h"
65 
66 SYSCTL_DECL(_hw_vmm);
67 
68 #define	CPUID_VM_HIGH		0x40000000
69 
70 static const char bhyve_id[12] = "bhyve bhyve ";
71 
72 /* Number of times an unknown cpuid leaf was accessed */
73 static uint64_t bhyve_xcpuids;
74 
75 static int cpuid_leaf_b = 1;
76 
77 /*
78  * Force exposition of the invariant TSC capability, regardless of whether the
79  * host CPU reports having it.
80  */
81 static int vmm_force_invariant_tsc = 0;
82 
83 #define	CPUID_0000_0000	(0x0)
84 #define	CPUID_0000_0001	(0x1)
85 #define	CPUID_0000_0002	(0x2)
86 #define	CPUID_0000_0003	(0x3)
87 #define	CPUID_0000_0004	(0x4)
88 #define	CPUID_0000_0006	(0x6)
89 #define	CPUID_0000_0007	(0x7)
90 #define	CPUID_0000_000A	(0xA)
91 #define	CPUID_0000_000B	(0xB)
92 #define	CPUID_0000_000D	(0xD)
93 #define	CPUID_0000_000F	(0xF)
94 #define	CPUID_0000_0010	(0x10)
95 #define	CPUID_0000_0015	(0x15)
96 #define	CPUID_8000_0000	(0x80000000)
97 #define	CPUID_8000_0001	(0x80000001)
98 #define	CPUID_8000_0002	(0x80000002)
99 #define	CPUID_8000_0003	(0x80000003)
100 #define	CPUID_8000_0004	(0x80000004)
101 #define	CPUID_8000_0006	(0x80000006)
102 #define	CPUID_8000_0007	(0x80000007)
103 #define	CPUID_8000_0008	(0x80000008)
104 #define	CPUID_8000_001D	(0x8000001D)
105 #define	CPUID_8000_001E	(0x8000001E)
106 
107 /*
108  * CPUID instruction Fn0000_0001:
109  */
110 #define	CPUID_0000_0001_APICID_MASK	(0xff<<24)
111 #define	CPUID_0000_0001_APICID_SHIFT	24
112 
113 /*
114  * CPUID instruction Fn0000_0001 ECX
115  */
116 #define	CPUID_0000_0001_FEAT0_VMX	(1<<5)
117 
118 
119 /*
120  * Round up to the next power of two, if necessary, and then take log2.
121  * Returns -1 if argument is zero.
122  */
123 static __inline int
124 log2(uint_t x)
125 {
126 
127 	return (fls(x << (1 - powerof2(x))) - 1);
128 }
129 
130 int
131 x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx,
132     uint64_t *rcx, uint64_t *rdx)
133 {
134 	const struct xsave_limits *limits;
135 	uint64_t cr4;
136 	int error, enable_invpcid, level, width = 0, x2apic_id = 0;
137 	unsigned int func, regs[4], logical_cpus = 0, param;
138 	enum x2apic_state x2apic_state;
139 	uint16_t cores, maxcpus, sockets, threads;
140 
141 	/*
142 	 * The function of CPUID is controlled through the provided value of
143 	 * %eax (and secondarily %ecx, for certain leaf data).
144 	 */
145 	func = (uint32_t)*rax;
146 	param = (uint32_t)*rcx;
147 
148 	/*
149 	 * Requests for invalid CPUID levels should map to the highest
150 	 * available level instead.
151 	 */
152 	if (cpu_exthigh != 0 && func >= 0x80000000) {
153 		if (func > cpu_exthigh)
154 			func = cpu_exthigh;
155 	} else if (func >= 0x40000000) {
156 		if (func > CPUID_VM_HIGH)
157 			func = CPUID_VM_HIGH;
158 	} else if (func > cpu_high) {
159 		func = cpu_high;
160 	}
161 
162 	/*
163 	 * In general the approach used for CPU topology is to
164 	 * advertise a flat topology where all CPUs are packages with
165 	 * no multi-core or SMT.
166 	 */
167 	switch (func) {
168 		/*
169 		 * Pass these through to the guest
170 		 */
171 		case CPUID_0000_0000:
172 		case CPUID_0000_0002:
173 		case CPUID_0000_0003:
174 		case CPUID_8000_0000:
175 		case CPUID_8000_0002:
176 		case CPUID_8000_0003:
177 		case CPUID_8000_0004:
178 		case CPUID_8000_0006:
179 			cpuid_count(func, param, regs);
180 			break;
181 		case CPUID_8000_0008:
182 			cpuid_count(func, param, regs);
183 			if (vmm_is_svm()) {
184 				/*
185 				 * As on Intel (0000_0007:0, EDX), mask out
186 				 * unsupported or unsafe AMD extended features
187 				 * (8000_0008 EBX).
188 				 */
189 				regs[1] &= (AMDFEID_CLZERO | AMDFEID_IRPERF |
190 				    AMDFEID_XSAVEERPTR);
191 
192 				vm_get_topology(vm, &sockets, &cores, &threads,
193 				    &maxcpus);
194 				/*
195 				 * Here, width is ApicIdCoreIdSize, present on
196 				 * at least Family 15h and newer.  It
197 				 * represents the "number of bits in the
198 				 * initial apicid that indicate thread id
199 				 * within a package."
200 				 *
201 				 * Our topo_probe_amd() uses it for
202 				 * pkg_id_shift and other OSes may rely on it.
203 				 */
204 				width = MIN(0xF, log2(threads * cores));
205 				if (width < 0x4)
206 					width = 0;
207 				logical_cpus = MIN(0xFF, threads * cores - 1);
208 				regs[2] = (width << AMDID_COREID_SIZE_SHIFT) |
209 				    logical_cpus;
210 			}
211 			break;
212 
213 		case CPUID_8000_0001:
214 			cpuid_count(func, param, regs);
215 
216 			/*
217 			 * Hide SVM from guest.
218 			 */
219 			regs[2] &= ~AMDID2_SVM;
220 
221 			/*
222 			 * Don't advertise extended performance counter MSRs
223 			 * to the guest.
224 			 */
225 			regs[2] &= ~AMDID2_PCXC;
226 			regs[2] &= ~AMDID2_PNXC;
227 			regs[2] &= ~AMDID2_PTSCEL2I;
228 
229 			/*
230 			 * Don't advertise Instruction Based Sampling feature.
231 			 */
232 			regs[2] &= ~AMDID2_IBS;
233 
234 			/* NodeID MSR not available */
235 			regs[2] &= ~AMDID2_NODE_ID;
236 
237 			/* Don't advertise the OS visible workaround feature */
238 			regs[2] &= ~AMDID2_OSVW;
239 
240 			/* Hide mwaitx/monitorx capability from the guest */
241 			regs[2] &= ~AMDID2_MWAITX;
242 
243 #ifndef __FreeBSD__
244 			/*
245 			 * Detection routines for TCE and FFXSR are missing
246 			 * from our vm_cpuid_capability() detection logic
247 			 * today.  Mask them out until that is remedied.
248 			 * They do not appear to be in common usage, so their
249 			 * absence should not cause undue trouble.
250 			 */
251 			regs[2] &= ~AMDID2_TCE;
252 			regs[3] &= ~AMDID_FFXSR;
253 #endif
254 
255 			/*
256 			 * Hide rdtscp/ia32_tsc_aux until we know how
257 			 * to deal with them.
258 			 */
259 			regs[3] &= ~AMDID_RDTSCP;
260 			break;
261 
262 		case CPUID_8000_0007:
263 			cpuid_count(func, param, regs);
264 			/*
265 			 * AMD uses this leaf to advertise the processor's
266 			 * power monitoring and RAS capabilities. These
267 			 * features are hardware-specific and exposing
268 			 * them to a guest doesn't make a lot of sense.
269 			 *
270 			 * Intel uses this leaf only to advertise the
271 			 * "Invariant TSC" feature with all other bits
272 			 * being reserved (set to zero).
273 			 */
274 			regs[0] = 0;
275 			regs[1] = 0;
276 			regs[2] = 0;
277 
278 			/*
279 			 * If the host system possesses an invariant TSC, then
280 			 * it is safe to expose to the guest.
281 			 *
282 			 * If there is measured skew between host TSCs, it will
283 			 * be properly offset so guests do not observe any
284 			 * change between CPU migrations.
285 			 */
286 			regs[3] &= AMDPM_TSC_INVARIANT;
287 
288 			/*
289 			 * Since illumos avoids deep C-states on CPUs which do
290 			 * not support an invariant TSC, it may be safe (and
291 			 * desired) to unconditionally expose that capability to
292 			 * the guest.
293 			 */
294 			if (vmm_force_invariant_tsc != 0) {
295 				regs[3] |= AMDPM_TSC_INVARIANT;
296 			}
297 			break;
298 
299 		case CPUID_8000_001D:
300 			/* AMD Cache topology, like 0000_0004 for Intel. */
301 			if (!vmm_is_svm())
302 				goto default_leaf;
303 
304 			/*
305 			 * Similar to Intel, generate a ficticious cache
306 			 * topology for the guest with L3 shared by the
307 			 * package, and L1 and L2 local to a core.
308 			 */
309 			vm_get_topology(vm, &sockets, &cores, &threads,
310 			    &maxcpus);
311 			switch (param) {
312 			case 0:
313 				logical_cpus = threads;
314 				level = 1;
315 				func = 1;	/* data cache */
316 				break;
317 			case 1:
318 				logical_cpus = threads;
319 				level = 2;
320 				func = 3;	/* unified cache */
321 				break;
322 			case 2:
323 				logical_cpus = threads * cores;
324 				level = 3;
325 				func = 3;	/* unified cache */
326 				break;
327 			default:
328 				logical_cpus = 0;
329 				level = 0;
330 				func = 0;
331 				break;
332 			}
333 
334 			logical_cpus = MIN(0xfff, logical_cpus - 1);
335 			regs[0] = (logical_cpus << 14) | (1 << 8) |
336 			    (level << 5) | func;
337 			regs[1] = (func > 0) ? (CACHE_LINE_SIZE - 1) : 0;
338 			regs[2] = 0;
339 			regs[3] = 0;
340 			break;
341 
342 		case CPUID_8000_001E:
343 			/*
344 			 * AMD Family 16h+ and Hygon Family 18h additional
345 			 * identifiers.
346 			 */
347 			if (!vmm_is_svm() || CPUID_TO_FAMILY(cpu_id) < 0x16)
348 				goto default_leaf;
349 
350 			vm_get_topology(vm, &sockets, &cores, &threads,
351 			    &maxcpus);
352 			regs[0] = vcpu_id;
353 			threads = MIN(0xFF, threads - 1);
354 			regs[1] = (threads << 8) |
355 			    (vcpu_id >> log2(threads + 1));
356 			/*
357 			 * XXX Bhyve topology cannot yet represent >1 node per
358 			 * processor.
359 			 */
360 			regs[2] = 0;
361 			regs[3] = 0;
362 			break;
363 
364 		case CPUID_0000_0001:
365 			do_cpuid(1, regs);
366 
367 			error = vm_get_x2apic_state(vm, vcpu_id, &x2apic_state);
368 			if (error) {
369 				panic("x86_emulate_cpuid: error %d "
370 				    "fetching x2apic state", error);
371 			}
372 
373 			/*
374 			 * Override the APIC ID only in ebx
375 			 */
376 			regs[1] &= ~(CPUID_LOCAL_APIC_ID);
377 			regs[1] |= (vcpu_id << CPUID_0000_0001_APICID_SHIFT);
378 
379 			/*
380 			 * Don't expose VMX, SpeedStep, TME or SMX capability.
381 			 * Advertise x2APIC capability and Hypervisor guest.
382 			 */
383 			regs[2] &= ~(CPUID2_VMX | CPUID2_EST | CPUID2_TM2);
384 			regs[2] &= ~(CPUID2_SMX);
385 
386 			regs[2] |= CPUID2_HV;
387 
388 			if (x2apic_state != X2APIC_DISABLED)
389 				regs[2] |= CPUID2_X2APIC;
390 			else
391 				regs[2] &= ~CPUID2_X2APIC;
392 
393 			/*
394 			 * Only advertise CPUID2_XSAVE in the guest if
395 			 * the host is using XSAVE.
396 			 */
397 			if (!(regs[2] & CPUID2_OSXSAVE))
398 				regs[2] &= ~CPUID2_XSAVE;
399 
400 			/*
401 			 * If CPUID2_XSAVE is being advertised and the
402 			 * guest has set CR4_XSAVE, set
403 			 * CPUID2_OSXSAVE.
404 			 */
405 			regs[2] &= ~CPUID2_OSXSAVE;
406 			if (regs[2] & CPUID2_XSAVE) {
407 				error = vm_get_register(vm, vcpu_id,
408 				    VM_REG_GUEST_CR4, &cr4);
409 				if (error)
410 					panic("x86_emulate_cpuid: error %d "
411 					    "fetching %%cr4", error);
412 				if (cr4 & CR4_XSAVE)
413 					regs[2] |= CPUID2_OSXSAVE;
414 			}
415 
416 			/*
417 			 * Hide monitor/mwait until we know how to deal with
418 			 * these instructions.
419 			 */
420 			regs[2] &= ~CPUID2_MON;
421 
422 			/*
423 			 * Hide the performance and debug features.
424 			 */
425 			regs[2] &= ~CPUID2_PDCM;
426 
427 			/*
428 			 * No TSC deadline support in the APIC yet
429 			 */
430 			regs[2] &= ~CPUID2_TSCDLT;
431 
432 			/*
433 			 * Hide thermal monitoring
434 			 */
435 			regs[3] &= ~(CPUID_ACPI | CPUID_TM);
436 
437 			/*
438 			 * Hide the debug store capability.
439 			 */
440 			regs[3] &= ~CPUID_DS;
441 
442 			/*
443 			 * Advertise the Machine Check and MTRR capability.
444 			 *
445 			 * Some guest OSes (e.g. Windows) will not boot if
446 			 * these features are absent.
447 			 */
448 			regs[3] |= (CPUID_MCA | CPUID_MCE | CPUID_MTRR);
449 
450 			vm_get_topology(vm, &sockets, &cores, &threads,
451 			    &maxcpus);
452 			logical_cpus = threads * cores;
453 			regs[1] &= ~CPUID_HTT_CORES;
454 			regs[1] |= (logical_cpus & 0xff) << 16;
455 			regs[3] |= CPUID_HTT;
456 			break;
457 
458 		case CPUID_0000_0004:
459 			cpuid_count(func, param, regs);
460 
461 			if (regs[0] || regs[1] || regs[2] || regs[3]) {
462 				vm_get_topology(vm, &sockets, &cores, &threads,
463 				    &maxcpus);
464 				regs[0] &= 0x3ff;
465 				regs[0] |= (cores - 1) << 26;
466 				/*
467 				 * Cache topology:
468 				 * - L1 and L2 are shared only by the logical
469 				 *   processors in a single core.
470 				 * - L3 and above are shared by all logical
471 				 *   processors in the package.
472 				 */
473 				logical_cpus = threads;
474 				level = (regs[0] >> 5) & 0x7;
475 				if (level >= 3)
476 					logical_cpus *= cores;
477 				regs[0] |= (logical_cpus - 1) << 14;
478 			}
479 			break;
480 
481 		case CPUID_0000_0007:
482 			regs[0] = 0;
483 			regs[1] = 0;
484 			regs[2] = 0;
485 			regs[3] = 0;
486 
487 			/* leaf 0 */
488 			if (param == 0) {
489 				cpuid_count(func, param, regs);
490 
491 				/* Only leaf 0 is supported */
492 				regs[0] = 0;
493 
494 				/*
495 				 * Expose known-safe features.
496 				 */
497 				regs[1] &= (CPUID_STDEXT_FSGSBASE |
498 				    CPUID_STDEXT_BMI1 | CPUID_STDEXT_HLE |
499 				    CPUID_STDEXT_AVX2 | CPUID_STDEXT_SMEP |
500 				    CPUID_STDEXT_BMI2 |
501 				    CPUID_STDEXT_ERMS | CPUID_STDEXT_RTM |
502 				    CPUID_STDEXT_AVX512F |
503 				    CPUID_STDEXT_RDSEED |
504 				    CPUID_STDEXT_SMAP |
505 				    CPUID_STDEXT_AVX512PF |
506 				    CPUID_STDEXT_AVX512ER |
507 				    CPUID_STDEXT_AVX512CD | CPUID_STDEXT_SHA);
508 				regs[2] = 0;
509 				regs[3] &= CPUID_STDEXT3_MD_CLEAR;
510 
511 				/* Advertise INVPCID if it is enabled. */
512 				error = vm_get_capability(vm, vcpu_id,
513 				    VM_CAP_ENABLE_INVPCID, &enable_invpcid);
514 				if (error == 0 && enable_invpcid)
515 					regs[1] |= CPUID_STDEXT_INVPCID;
516 			}
517 			break;
518 
519 		case CPUID_0000_0006:
520 			regs[0] = CPUTPM1_ARAT;
521 			regs[1] = 0;
522 			regs[2] = 0;
523 			regs[3] = 0;
524 			break;
525 
526 		case CPUID_0000_000A:
527 			/*
528 			 * Handle the access, but report 0 for
529 			 * all options
530 			 */
531 			regs[0] = 0;
532 			regs[1] = 0;
533 			regs[2] = 0;
534 			regs[3] = 0;
535 			break;
536 
537 		case CPUID_0000_000B:
538 			/*
539 			 * Intel processor topology enumeration
540 			 */
541 			if (vmm_is_intel()) {
542 				vm_get_topology(vm, &sockets, &cores, &threads,
543 				    &maxcpus);
544 				if (param == 0) {
545 					logical_cpus = threads;
546 					width = log2(logical_cpus);
547 					level = CPUID_TYPE_SMT;
548 					x2apic_id = vcpu_id;
549 				}
550 
551 				if (param == 1) {
552 					logical_cpus = threads * cores;
553 					width = log2(logical_cpus);
554 					level = CPUID_TYPE_CORE;
555 					x2apic_id = vcpu_id;
556 				}
557 
558 				if (!cpuid_leaf_b || param >= 2) {
559 					width = 0;
560 					logical_cpus = 0;
561 					level = 0;
562 					x2apic_id = 0;
563 				}
564 
565 				regs[0] = width & 0x1f;
566 				regs[1] = logical_cpus & 0xffff;
567 				regs[2] = (level << 8) | (param & 0xff);
568 				regs[3] = x2apic_id;
569 			} else {
570 				regs[0] = 0;
571 				regs[1] = 0;
572 				regs[2] = 0;
573 				regs[3] = 0;
574 			}
575 			break;
576 
577 		case CPUID_0000_000D:
578 			limits = vmm_get_xsave_limits();
579 			if (!limits->xsave_enabled) {
580 				regs[0] = 0;
581 				regs[1] = 0;
582 				regs[2] = 0;
583 				regs[3] = 0;
584 				break;
585 			}
586 
587 			cpuid_count(func, param, regs);
588 			switch (param) {
589 			case 0:
590 				/*
591 				 * Only permit the guest to use bits
592 				 * that are active in the host in
593 				 * %xcr0.  Also, claim that the
594 				 * maximum save area size is
595 				 * equivalent to the host's current
596 				 * save area size.  Since this runs
597 				 * "inside" of vmrun(), it runs with
598 				 * the guest's xcr0, so the current
599 				 * save area size is correct as-is.
600 				 */
601 				regs[0] &= limits->xcr0_allowed;
602 				regs[2] = limits->xsave_max_size;
603 				regs[3] &= (limits->xcr0_allowed >> 32);
604 				break;
605 			case 1:
606 				/* Only permit XSAVEOPT. */
607 				regs[0] &= CPUID_EXTSTATE_XSAVEOPT;
608 				regs[1] = 0;
609 				regs[2] = 0;
610 				regs[3] = 0;
611 				break;
612 			default:
613 				/*
614 				 * If the leaf is for a permitted feature,
615 				 * pass through as-is, otherwise return
616 				 * all zeroes.
617 				 */
618 				if (!(limits->xcr0_allowed & (1ul << param))) {
619 					regs[0] = 0;
620 					regs[1] = 0;
621 					regs[2] = 0;
622 					regs[3] = 0;
623 				}
624 				break;
625 			}
626 			break;
627 
628 		case CPUID_0000_000F:
629 		case CPUID_0000_0010:
630 			/*
631 			 * Do not report any Resource Director Technology
632 			 * capabilities.  Exposing control of cache or memory
633 			 * controller resource partitioning to the guest is not
634 			 * at all sensible.
635 			 *
636 			 * This is already hidden at a high level by masking of
637 			 * leaf 0x7.  Even still, a guest may look here for
638 			 * detailed capability information.
639 			 */
640 			regs[0] = 0;
641 			regs[1] = 0;
642 			regs[2] = 0;
643 			regs[3] = 0;
644 			break;
645 
646 		case CPUID_0000_0015:
647 			/*
648 			 * Don't report CPU TSC/Crystal ratio and clock
649 			 * values since guests may use these to derive the
650 			 * local APIC frequency..
651 			 */
652 			regs[0] = 0;
653 			regs[1] = 0;
654 			regs[2] = 0;
655 			regs[3] = 0;
656 			break;
657 
658 		case 0x40000000:
659 			regs[0] = CPUID_VM_HIGH;
660 			bcopy(bhyve_id, &regs[1], 4);
661 			bcopy(bhyve_id + 4, &regs[2], 4);
662 			bcopy(bhyve_id + 8, &regs[3], 4);
663 			break;
664 
665 		default:
666 default_leaf:
667 			/*
668 			 * The leaf value has already been clamped so
669 			 * simply pass this through, keeping count of
670 			 * how many unhandled leaf values have been seen.
671 			 */
672 			atomic_add_long(&bhyve_xcpuids, 1);
673 			cpuid_count(func, param, regs);
674 			break;
675 	}
676 
677 	/*
678 	 * CPUID clears the upper 32-bits of the long-mode registers.
679 	 */
680 	*rax = regs[0];
681 	*rbx = regs[1];
682 	*rcx = regs[2];
683 	*rdx = regs[3];
684 
685 	return (1);
686 }
687 
688 /*
689  * Return 'true' if the capability 'cap' is enabled in this virtual cpu
690  * and 'false' otherwise.
691  */
692 bool
693 vm_cpuid_capability(struct vm *vm, int vcpuid, enum vm_cpuid_capability cap)
694 {
695 	bool rv;
696 
697 	KASSERT(cap > 0 && cap < VCC_LAST, ("%s: invalid vm_cpu_capability %d",
698 	    __func__, cap));
699 
700 	/*
701 	 * Simply passthrough the capabilities of the host cpu for now.
702 	 */
703 	rv = false;
704 	switch (cap) {
705 #ifdef __FreeBSD__
706 	case VCC_NO_EXECUTE:
707 		if (amd_feature & AMDID_NX)
708 			rv = true;
709 		break;
710 	case VCC_FFXSR:
711 		if (amd_feature & AMDID_FFXSR)
712 			rv = true;
713 		break;
714 	case VCC_TCE:
715 		if (amd_feature2 & AMDID2_TCE)
716 			rv = true;
717 		break;
718 #else
719 	case VCC_NO_EXECUTE:
720 		if (is_x86_feature(x86_featureset, X86FSET_NX))
721 			rv = true;
722 		break;
723 	/* XXXJOY: No kernel detection for FFXR or TCE at present, so ignore */
724 	case VCC_FFXSR:
725 	case VCC_TCE:
726 		break;
727 #endif
728 	default:
729 		panic("%s: unknown vm_cpu_capability %d", __func__, cap);
730 	}
731 	return (rv);
732 }
733 
734 bool
735 validate_guest_xcr0(uint64_t val, uint64_t limit_mask)
736 {
737 	/* x87 feature must be enabled */
738 	if ((val & XFEATURE_ENABLED_X87) == 0) {
739 		return (false);
740 	}
741 	/* AVX cannot be enabled without SSE */
742 	if ((val & (XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX)) ==
743 	    XFEATURE_ENABLED_SSE) {
744 		return (false);
745 	}
746 	/* No bits should be outside what we dictate to be allowed */
747 	if ((val & ~limit_mask) != 0) {
748 		return (false);
749 	}
750 
751 	return (true);
752 }
753