xref: /freebsd/sys/amd64/vmm/x86.c (revision f5f5f1e7d6de64f0245d913840f28306334241de)
1366f6083SPeter Grehan /*-
2c49761ddSPedro F. Giffuni  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3c49761ddSPedro F. Giffuni  *
4366f6083SPeter Grehan  * Copyright (c) 2011 NetApp, Inc.
5366f6083SPeter Grehan  * All rights reserved.
6366f6083SPeter Grehan  *
7366f6083SPeter Grehan  * Redistribution and use in source and binary forms, with or without
8366f6083SPeter Grehan  * modification, are permitted provided that the following conditions
9366f6083SPeter Grehan  * are met:
10366f6083SPeter Grehan  * 1. Redistributions of source code must retain the above copyright
11366f6083SPeter Grehan  *    notice, this list of conditions and the following disclaimer.
12366f6083SPeter Grehan  * 2. Redistributions in binary form must reproduce the above copyright
13366f6083SPeter Grehan  *    notice, this list of conditions and the following disclaimer in the
14366f6083SPeter Grehan  *    documentation and/or other materials provided with the distribution.
15366f6083SPeter Grehan  *
16366f6083SPeter Grehan  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17366f6083SPeter Grehan  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18366f6083SPeter Grehan  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19366f6083SPeter Grehan  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20366f6083SPeter Grehan  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21366f6083SPeter Grehan  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22366f6083SPeter Grehan  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23366f6083SPeter Grehan  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24366f6083SPeter Grehan  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25366f6083SPeter Grehan  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26366f6083SPeter Grehan  * SUCH DAMAGE.
27366f6083SPeter Grehan  *
28366f6083SPeter Grehan  * $FreeBSD$
29366f6083SPeter Grehan  */
30366f6083SPeter Grehan 
31366f6083SPeter Grehan #include <sys/cdefs.h>
32366f6083SPeter Grehan __FBSDID("$FreeBSD$");
33366f6083SPeter Grehan 
34a2da7af6SNeel Natu #include <sys/param.h>
35abb023fbSJohn Baldwin #include <sys/pcpu.h>
368b287612SJohn Baldwin #include <sys/systm.h>
378bd3845dSNeel Natu #include <sys/sysctl.h>
38366f6083SPeter Grehan 
391472b87fSNeel Natu #include <machine/clock.h>
40366f6083SPeter Grehan #include <machine/cpufunc.h>
418b287612SJohn Baldwin #include <machine/md_var.h>
42abb023fbSJohn Baldwin #include <machine/segments.h>
43366f6083SPeter Grehan #include <machine/specialreg.h>
44366f6083SPeter Grehan 
45a2da7af6SNeel Natu #include <machine/vmm.h>
46a2da7af6SNeel Natu 
47abb023fbSJohn Baldwin #include "vmm_host.h"
485a1f0b36SNeel Natu #include "vmm_ktr.h"
495a1f0b36SNeel Natu #include "vmm_util.h"
50366f6083SPeter Grehan #include "x86.h"
51366f6083SPeter Grehan 
528bd3845dSNeel Natu SYSCTL_DECL(_hw_vmm);
53b40598c5SPawel Biernacki static SYSCTL_NODE(_hw_vmm, OID_AUTO, topology, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
54b40598c5SPawel Biernacki     NULL);
558bd3845dSNeel Natu 
568b287612SJohn Baldwin #define	CPUID_VM_HIGH		0x40000000
578b287612SJohn Baldwin 
58560d5edaSPeter Grehan static const char bhyve_id[12] = "bhyve bhyve ";
59560d5edaSPeter Grehan 
60560d5edaSPeter Grehan static uint64_t bhyve_xcpuids;
615a1f0b36SNeel Natu SYSCTL_ULONG(_hw_vmm, OID_AUTO, bhyve_xcpuids, CTLFLAG_RW, &bhyve_xcpuids, 0,
625a1f0b36SNeel Natu     "Number of times an unknown cpuid leaf was accessed");
638b287612SJohn Baldwin 
6401d822d3SRodney W. Grimes #if __FreeBSD_version < 1200060	/* Remove after 11 EOL helps MFCing */
6501d822d3SRodney W. Grimes extern u_int threads_per_core;
668bd3845dSNeel Natu SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, threads_per_core, CTLFLAG_RDTUN,
678bd3845dSNeel Natu     &threads_per_core, 0, NULL);
688bd3845dSNeel Natu 
6901d822d3SRodney W. Grimes extern u_int cores_per_package;
708bd3845dSNeel Natu SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, cores_per_package, CTLFLAG_RDTUN,
718bd3845dSNeel Natu     &cores_per_package, 0, NULL);
7201d822d3SRodney W. Grimes #endif
738bd3845dSNeel Natu 
748bd3845dSNeel Natu static int cpuid_leaf_b = 1;
758bd3845dSNeel Natu SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN,
768bd3845dSNeel Natu     &cpuid_leaf_b, 0, NULL);
778bd3845dSNeel Natu 
788bd3845dSNeel Natu /*
798bd3845dSNeel Natu  * Round up to the next power of two, if necessary, and then take log2.
808bd3845dSNeel Natu  * Returns -1 if argument is zero.
818bd3845dSNeel Natu  */
828bd3845dSNeel Natu static __inline int
838bd3845dSNeel Natu log2(u_int x)
848bd3845dSNeel Natu {
858bd3845dSNeel Natu 
868bd3845dSNeel Natu 	return (fls(x << (1 - powerof2(x))) - 1);
878bd3845dSNeel Natu }
888bd3845dSNeel Natu 
89366f6083SPeter Grehan int
90a2da7af6SNeel Natu x86_emulate_cpuid(struct vm *vm, int vcpu_id,
91a2da7af6SNeel Natu 		  uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
92366f6083SPeter Grehan {
93abb023fbSJohn Baldwin 	const struct xsave_limits *limits;
94abb023fbSJohn Baldwin 	uint64_t cr4;
95*f5f5f1e7SPeter Grehan 	int error, enable_invpcid, enable_rdpid, enable_rdtscp, level,
96*f5f5f1e7SPeter Grehan 	    width, x2apic_id;
978bd3845dSNeel Natu 	unsigned int func, regs[4], logical_cpus;
98a2da7af6SNeel Natu 	enum x2apic_state x2apic_state;
9901d822d3SRodney W. Grimes 	uint16_t cores, maxcpus, sockets, threads;
100366f6083SPeter Grehan 
1015a1f0b36SNeel Natu 	VCPU_CTR2(vm, vcpu_id, "cpuid %#x,%#x", *eax, *ecx);
1025a1f0b36SNeel Natu 
1038b287612SJohn Baldwin 	/*
1048b287612SJohn Baldwin 	 * Requests for invalid CPUID levels should map to the highest
1058b287612SJohn Baldwin 	 * available level instead.
1068b287612SJohn Baldwin 	 */
1078b287612SJohn Baldwin 	if (cpu_exthigh != 0 && *eax >= 0x80000000) {
1088b287612SJohn Baldwin 		if (*eax > cpu_exthigh)
1098b287612SJohn Baldwin 			*eax = cpu_exthigh;
1108b287612SJohn Baldwin 	} else if (*eax >= 0x40000000) {
1118b287612SJohn Baldwin 		if (*eax > CPUID_VM_HIGH)
1128b287612SJohn Baldwin 			*eax = CPUID_VM_HIGH;
1138b287612SJohn Baldwin 	} else if (*eax > cpu_high) {
1148b287612SJohn Baldwin 		*eax = cpu_high;
1158b287612SJohn Baldwin 	}
116366f6083SPeter Grehan 
11725448de2SNeel Natu 	func = *eax;
11825448de2SNeel Natu 
1198b287612SJohn Baldwin 	/*
1208b287612SJohn Baldwin 	 * In general the approach used for CPU topology is to
1218b287612SJohn Baldwin 	 * advertise a flat topology where all CPUs are packages with
1228b287612SJohn Baldwin 	 * no multi-core or SMT.
1238b287612SJohn Baldwin 	 */
124366f6083SPeter Grehan 	switch (func) {
125560d5edaSPeter Grehan 		/*
126560d5edaSPeter Grehan 		 * Pass these through to the guest
127560d5edaSPeter Grehan 		 */
128366f6083SPeter Grehan 		case CPUID_0000_0000:
129366f6083SPeter Grehan 		case CPUID_0000_0002:
130366f6083SPeter Grehan 		case CPUID_0000_0003:
131366f6083SPeter Grehan 		case CPUID_8000_0000:
132366f6083SPeter Grehan 		case CPUID_8000_0002:
133366f6083SPeter Grehan 		case CPUID_8000_0003:
134366f6083SPeter Grehan 		case CPUID_8000_0004:
135366f6083SPeter Grehan 		case CPUID_8000_0006:
1365a1f0b36SNeel Natu 			cpuid_count(*eax, *ecx, regs);
1375a1f0b36SNeel Natu 			break;
138366f6083SPeter Grehan 		case CPUID_8000_0008:
1398b287612SJohn Baldwin 			cpuid_count(*eax, *ecx, regs);
140caab5042SKonstantin Belousov 			if (vmm_is_svm()) {
141d0c7cde5SConrad Meyer 				/*
142d0c7cde5SConrad Meyer 				 * As on Intel (0000_0007:0, EDX), mask out
143d0c7cde5SConrad Meyer 				 * unsupported or unsafe AMD extended features
144d0c7cde5SConrad Meyer 				 * (8000_0008 EBX).
145d0c7cde5SConrad Meyer 				 */
146d0c7cde5SConrad Meyer 				regs[1] &= (AMDFEID_CLZERO | AMDFEID_IRPERF |
147d0c7cde5SConrad Meyer 				    AMDFEID_XSAVEERPTR);
148d0c7cde5SConrad Meyer 
14901d822d3SRodney W. Grimes 				vm_get_topology(vm, &sockets, &cores, &threads,
15001d822d3SRodney W. Grimes 				    &maxcpus);
15115b7da10SConrad Meyer 				/*
15215b7da10SConrad Meyer 				 * Here, width is ApicIdCoreIdSize, present on
15315b7da10SConrad Meyer 				 * at least Family 15h and newer.  It
15415b7da10SConrad Meyer 				 * represents the "number of bits in the
15515b7da10SConrad Meyer 				 * initial apicid that indicate thread id
15615b7da10SConrad Meyer 				 * within a package."
15715b7da10SConrad Meyer 				 *
15815b7da10SConrad Meyer 				 * Our topo_probe_amd() uses it for
15915b7da10SConrad Meyer 				 * pkg_id_shift and other OSes may rely on it.
16015b7da10SConrad Meyer 				 */
16115b7da10SConrad Meyer 				width = MIN(0xF, log2(threads * cores));
16215b7da10SConrad Meyer 				if (width < 0x4)
16315b7da10SConrad Meyer 					width = 0;
16415b7da10SConrad Meyer 				logical_cpus = MIN(0xFF, threads * cores - 1);
16515b7da10SConrad Meyer 				regs[2] = (width << AMDID_COREID_SIZE_SHIFT) | logical_cpus;
1665a1f0b36SNeel Natu 			}
167366f6083SPeter Grehan 			break;
168366f6083SPeter Grehan 
169560d5edaSPeter Grehan 		case CPUID_8000_0001:
17006053618SNeel Natu 			cpuid_count(*eax, *ecx, regs);
17106053618SNeel Natu 
17206053618SNeel Natu 			/*
17315b7da10SConrad Meyer 			 * Hide SVM from guest.
17406053618SNeel Natu 			 */
17515b7da10SConrad Meyer 			regs[2] &= ~AMDID2_SVM;
17606053618SNeel Natu 
177560d5edaSPeter Grehan 			/*
17802904c45SNeel Natu 			 * Don't advertise extended performance counter MSRs
17902904c45SNeel Natu 			 * to the guest.
18002904c45SNeel Natu 			 */
18102904c45SNeel Natu 			regs[2] &= ~AMDID2_PCXC;
18202904c45SNeel Natu 			regs[2] &= ~AMDID2_PNXC;
18302904c45SNeel Natu 			regs[2] &= ~AMDID2_PTSCEL2I;
18402904c45SNeel Natu 
18502904c45SNeel Natu 			/*
1862688a818SNeel Natu 			 * Don't advertise Instruction Based Sampling feature.
1872688a818SNeel Natu 			 */
1882688a818SNeel Natu 			regs[2] &= ~AMDID2_IBS;
1892688a818SNeel Natu 
19065d5111aSNeel Natu 			/* NodeID MSR not available */
19165d5111aSNeel Natu 			regs[2] &= ~AMDID2_NODE_ID;
19265d5111aSNeel Natu 
193592cd7d3SNeel Natu 			/* Don't advertise the OS visible workaround feature */
194592cd7d3SNeel Natu 			regs[2] &= ~AMDID2_OSVW;
195592cd7d3SNeel Natu 
1963da44302SPeter Grehan 			/* Hide mwaitx/monitorx capability from the guest */
1973da44302SPeter Grehan 			regs[2] &= ~AMDID2_MWAITX;
1983da44302SPeter Grehan 
199*f5f5f1e7SPeter Grehan 			/* Advertise RDTSCP if it is enabled. */
200*f5f5f1e7SPeter Grehan 			error = vm_get_capability(vm, vcpu_id,
201*f5f5f1e7SPeter Grehan 			    VM_CAP_RDTSCP, &enable_rdtscp);
202*f5f5f1e7SPeter Grehan 			if (error == 0 && enable_rdtscp)
203*f5f5f1e7SPeter Grehan 				regs[3] |= AMDID_RDTSCP;
204*f5f5f1e7SPeter Grehan 			else
205560d5edaSPeter Grehan 				regs[3] &= ~AMDID_RDTSCP;
206560d5edaSPeter Grehan 			break;
207560d5edaSPeter Grehan 
2081472b87fSNeel Natu 		case CPUID_8000_0007:
2091472b87fSNeel Natu 			/*
210592cd7d3SNeel Natu 			 * AMD uses this leaf to advertise the processor's
211592cd7d3SNeel Natu 			 * power monitoring and RAS capabilities. These
212592cd7d3SNeel Natu 			 * features are hardware-specific and exposing
213592cd7d3SNeel Natu 			 * them to a guest doesn't make a lot of sense.
214592cd7d3SNeel Natu 			 *
215592cd7d3SNeel Natu 			 * Intel uses this leaf only to advertise the
216592cd7d3SNeel Natu 			 * "Invariant TSC" feature with all other bits
217592cd7d3SNeel Natu 			 * being reserved (set to zero).
218592cd7d3SNeel Natu 			 */
219592cd7d3SNeel Natu 			regs[0] = 0;
220592cd7d3SNeel Natu 			regs[1] = 0;
221592cd7d3SNeel Natu 			regs[2] = 0;
222592cd7d3SNeel Natu 			regs[3] = 0;
223592cd7d3SNeel Natu 
224592cd7d3SNeel Natu 			/*
225592cd7d3SNeel Natu 			 * "Invariant TSC" can be advertised to the guest if:
226592cd7d3SNeel Natu 			 * - host TSC frequency is invariant
227592cd7d3SNeel Natu 			 * - host TSCs are synchronized across physical cpus
2281472b87fSNeel Natu 			 *
2291472b87fSNeel Natu 			 * XXX This still falls short because the vcpu
2301472b87fSNeel Natu 			 * can observe the TSC moving backwards as it
2311472b87fSNeel Natu 			 * migrates across physical cpus. But at least
2321472b87fSNeel Natu 			 * it should discourage the guest from using the
2331472b87fSNeel Natu 			 * TSC to keep track of time.
2341472b87fSNeel Natu 			 */
235592cd7d3SNeel Natu 			if (tsc_is_invariant && smp_tsc)
236592cd7d3SNeel Natu 				regs[3] |= AMDPM_TSC_INVARIANT;
2371472b87fSNeel Natu 			break;
2381472b87fSNeel Natu 
23915b7da10SConrad Meyer 		case CPUID_8000_001D:
24015b7da10SConrad Meyer 			/* AMD Cache topology, like 0000_0004 for Intel. */
241caab5042SKonstantin Belousov 			if (!vmm_is_svm())
24215b7da10SConrad Meyer 				goto default_leaf;
24315b7da10SConrad Meyer 
24415b7da10SConrad Meyer 			/*
24515b7da10SConrad Meyer 			 * Similar to Intel, generate a ficticious cache
24615b7da10SConrad Meyer 			 * topology for the guest with L3 shared by the
24715b7da10SConrad Meyer 			 * package, and L1 and L2 local to a core.
24815b7da10SConrad Meyer 			 */
24915b7da10SConrad Meyer 			vm_get_topology(vm, &sockets, &cores, &threads,
25015b7da10SConrad Meyer 			    &maxcpus);
25115b7da10SConrad Meyer 			switch (*ecx) {
25215b7da10SConrad Meyer 			case 0:
25315b7da10SConrad Meyer 				logical_cpus = threads;
25415b7da10SConrad Meyer 				level = 1;
25515b7da10SConrad Meyer 				func = 1;	/* data cache */
25615b7da10SConrad Meyer 				break;
25715b7da10SConrad Meyer 			case 1:
25815b7da10SConrad Meyer 				logical_cpus = threads;
25915b7da10SConrad Meyer 				level = 2;
26015b7da10SConrad Meyer 				func = 3;	/* unified cache */
26115b7da10SConrad Meyer 				break;
26215b7da10SConrad Meyer 			case 2:
26315b7da10SConrad Meyer 				logical_cpus = threads * cores;
26415b7da10SConrad Meyer 				level = 3;
26515b7da10SConrad Meyer 				func = 3;	/* unified cache */
26615b7da10SConrad Meyer 				break;
26715b7da10SConrad Meyer 			default:
26815b7da10SConrad Meyer 				logical_cpus = 0;
26915b7da10SConrad Meyer 				level = 0;
27015b7da10SConrad Meyer 				func = 0;
27115b7da10SConrad Meyer 				break;
27215b7da10SConrad Meyer 			}
27315b7da10SConrad Meyer 
27415b7da10SConrad Meyer 			logical_cpus = MIN(0xfff, logical_cpus - 1);
27515b7da10SConrad Meyer 			regs[0] = (logical_cpus << 14) | (1 << 8) |
27615b7da10SConrad Meyer 			    (level << 5) | func;
27715b7da10SConrad Meyer 			regs[1] = (func > 0) ? (CACHE_LINE_SIZE - 1) : 0;
27815b7da10SConrad Meyer 			regs[2] = 0;
27915b7da10SConrad Meyer 			regs[3] = 0;
28015b7da10SConrad Meyer 			break;
28115b7da10SConrad Meyer 
28215b7da10SConrad Meyer 		case CPUID_8000_001E:
283caab5042SKonstantin Belousov 			/*
284caab5042SKonstantin Belousov 			 * AMD Family 16h+ and Hygon Family 18h additional
285caab5042SKonstantin Belousov 			 * identifiers.
286caab5042SKonstantin Belousov 			 */
287caab5042SKonstantin Belousov 			if (!vmm_is_svm() || CPUID_TO_FAMILY(cpu_id) < 0x16)
28815b7da10SConrad Meyer 				goto default_leaf;
28915b7da10SConrad Meyer 
29015b7da10SConrad Meyer 			vm_get_topology(vm, &sockets, &cores, &threads,
29115b7da10SConrad Meyer 			    &maxcpus);
29215b7da10SConrad Meyer 			regs[0] = vcpu_id;
29315b7da10SConrad Meyer 			threads = MIN(0xFF, threads - 1);
29415b7da10SConrad Meyer 			regs[1] = (threads << 8) |
29515b7da10SConrad Meyer 			    (vcpu_id >> log2(threads + 1));
29615b7da10SConrad Meyer 			/*
29715b7da10SConrad Meyer 			 * XXX Bhyve topology cannot yet represent >1 node per
29815b7da10SConrad Meyer 			 * processor.
29915b7da10SConrad Meyer 			 */
30015b7da10SConrad Meyer 			regs[2] = 0;
30115b7da10SConrad Meyer 			regs[3] = 0;
30215b7da10SConrad Meyer 			break;
30315b7da10SConrad Meyer 
304366f6083SPeter Grehan 		case CPUID_0000_0001:
3058b287612SJohn Baldwin 			do_cpuid(1, regs);
3068b287612SJohn Baldwin 
307a2da7af6SNeel Natu 			error = vm_get_x2apic_state(vm, vcpu_id, &x2apic_state);
308a2da7af6SNeel Natu 			if (error) {
309a2da7af6SNeel Natu 				panic("x86_emulate_cpuid: error %d "
310a2da7af6SNeel Natu 				      "fetching x2apic state", error);
311a2da7af6SNeel Natu 			}
312a2da7af6SNeel Natu 
313366f6083SPeter Grehan 			/*
314366f6083SPeter Grehan 			 * Override the APIC ID only in ebx
315366f6083SPeter Grehan 			 */
3168b287612SJohn Baldwin 			regs[1] &= ~(CPUID_LOCAL_APIC_ID);
3178b287612SJohn Baldwin 			regs[1] |= (vcpu_id << CPUID_0000_0001_APICID_SHIFT);
318366f6083SPeter Grehan 
319366f6083SPeter Grehan 			/*
32031708084SNeel Natu 			 * Don't expose VMX, SpeedStep, TME or SMX capability.
3218b287612SJohn Baldwin 			 * Advertise x2APIC capability and Hypervisor guest.
322366f6083SPeter Grehan 			 */
3238b287612SJohn Baldwin 			regs[2] &= ~(CPUID2_VMX | CPUID2_EST | CPUID2_TM2);
32431708084SNeel Natu 			regs[2] &= ~(CPUID2_SMX);
325a2da7af6SNeel Natu 
326a2da7af6SNeel Natu 			regs[2] |= CPUID2_HV;
327a2da7af6SNeel Natu 
328a2da7af6SNeel Natu 			if (x2apic_state != X2APIC_DISABLED)
329a2da7af6SNeel Natu 				regs[2] |= CPUID2_X2APIC;
33052e5c8a2SNeel Natu 			else
33152e5c8a2SNeel Natu 				regs[2] &= ~CPUID2_X2APIC;
332366f6083SPeter Grehan 
333366f6083SPeter Grehan 			/*
334abb023fbSJohn Baldwin 			 * Only advertise CPUID2_XSAVE in the guest if
335abb023fbSJohn Baldwin 			 * the host is using XSAVE.
336298379f7SPeter Grehan 			 */
337abb023fbSJohn Baldwin 			if (!(regs[2] & CPUID2_OSXSAVE))
338abb023fbSJohn Baldwin 				regs[2] &= ~CPUID2_XSAVE;
339abb023fbSJohn Baldwin 
340abb023fbSJohn Baldwin 			/*
341abb023fbSJohn Baldwin 			 * If CPUID2_XSAVE is being advertised and the
342abb023fbSJohn Baldwin 			 * guest has set CR4_XSAVE, set
343abb023fbSJohn Baldwin 			 * CPUID2_OSXSAVE.
344abb023fbSJohn Baldwin 			 */
345abb023fbSJohn Baldwin 			regs[2] &= ~CPUID2_OSXSAVE;
346abb023fbSJohn Baldwin 			if (regs[2] & CPUID2_XSAVE) {
347abb023fbSJohn Baldwin 				error = vm_get_register(vm, vcpu_id,
348abb023fbSJohn Baldwin 				    VM_REG_GUEST_CR4, &cr4);
349abb023fbSJohn Baldwin 				if (error)
350abb023fbSJohn Baldwin 					panic("x86_emulate_cpuid: error %d "
351abb023fbSJohn Baldwin 					      "fetching %%cr4", error);
352abb023fbSJohn Baldwin 				if (cr4 & CR4_XSAVE)
353abb023fbSJohn Baldwin 					regs[2] |= CPUID2_OSXSAVE;
354abb023fbSJohn Baldwin 			}
355298379f7SPeter Grehan 
356298379f7SPeter Grehan 			/*
357ff6ec151SNeel Natu 			 * Hide monitor/mwait until we know how to deal with
358ff6ec151SNeel Natu 			 * these instructions.
359ff6ec151SNeel Natu 			 */
360ff6ec151SNeel Natu 			regs[2] &= ~CPUID2_MON;
361ff6ec151SNeel Natu 
362ff6ec151SNeel Natu                         /*
363560d5edaSPeter Grehan 			 * Hide the performance and debug features.
364560d5edaSPeter Grehan 			 */
365560d5edaSPeter Grehan 			regs[2] &= ~CPUID2_PDCM;
366560d5edaSPeter Grehan 
367517e21d3SPeter Grehan 			/*
368517e21d3SPeter Grehan 			 * No TSC deadline support in the APIC yet
369517e21d3SPeter Grehan 			 */
370517e21d3SPeter Grehan 			regs[2] &= ~CPUID2_TSCDLT;
371517e21d3SPeter Grehan 
372560d5edaSPeter Grehan 			/*
3731f3025e1SPeter Grehan 			 * Hide thermal monitoring
3741f3025e1SPeter Grehan 			 */
3751f3025e1SPeter Grehan 			regs[3] &= ~(CPUID_ACPI | CPUID_TM);
3761f3025e1SPeter Grehan 
3771f3025e1SPeter Grehan 			/*
378560d5edaSPeter Grehan 			 * Hide the debug store capability.
379560d5edaSPeter Grehan 			 */
380560d5edaSPeter Grehan 			regs[3] &= ~CPUID_DS;
381560d5edaSPeter Grehan 
3821d29bfc1SNeel Natu 			/*
3831d29bfc1SNeel Natu 			 * Advertise the Machine Check and MTRR capability.
3841d29bfc1SNeel Natu 			 *
3851d29bfc1SNeel Natu 			 * Some guest OSes (e.g. Windows) will not boot if
3861d29bfc1SNeel Natu 			 * these features are absent.
3871d29bfc1SNeel Natu 			 */
3881d29bfc1SNeel Natu 			regs[3] |= (CPUID_MCA | CPUID_MCE | CPUID_MTRR);
3891d29bfc1SNeel Natu 
39001d822d3SRodney W. Grimes 			vm_get_topology(vm, &sockets, &cores, &threads,
39101d822d3SRodney W. Grimes 			    &maxcpus);
39201d822d3SRodney W. Grimes 			logical_cpus = threads * cores;
3938b287612SJohn Baldwin 			regs[1] &= ~CPUID_HTT_CORES;
3948bd3845dSNeel Natu 			regs[1] |= (logical_cpus & 0xff) << 16;
3958bd3845dSNeel Natu 			regs[3] |= CPUID_HTT;
3968b287612SJohn Baldwin 			break;
3978b287612SJohn Baldwin 
3988b287612SJohn Baldwin 		case CPUID_0000_0004:
399534dc967SNeel Natu 			cpuid_count(*eax, *ecx, regs);
4008b287612SJohn Baldwin 
4018bd3845dSNeel Natu 			if (regs[0] || regs[1] || regs[2] || regs[3]) {
40201d822d3SRodney W. Grimes 				vm_get_topology(vm, &sockets, &cores, &threads,
40301d822d3SRodney W. Grimes 				    &maxcpus);
404534dc967SNeel Natu 				regs[0] &= 0x3ff;
40501d822d3SRodney W. Grimes 				regs[0] |= (cores - 1) << 26;
4068b287612SJohn Baldwin 				/*
4078bd3845dSNeel Natu 				 * Cache topology:
4088bd3845dSNeel Natu 				 * - L1 and L2 are shared only by the logical
4098bd3845dSNeel Natu 				 *   processors in a single core.
4108bd3845dSNeel Natu 				 * - L3 and above are shared by all logical
4118bd3845dSNeel Natu 				 *   processors in the package.
4128b287612SJohn Baldwin 				 */
41301d822d3SRodney W. Grimes 				logical_cpus = threads;
4148bd3845dSNeel Natu 				level = (regs[0] >> 5) & 0x7;
4158bd3845dSNeel Natu 				if (level >= 3)
41601d822d3SRodney W. Grimes 					logical_cpus *= cores;
4178bd3845dSNeel Natu 				regs[0] |= (logical_cpus - 1) << 14;
4188bd3845dSNeel Natu 			}
419366f6083SPeter Grehan 			break;
420366f6083SPeter Grehan 
421a0cad470SPeter Grehan 		case CPUID_0000_0007:
42249cc03daSNeel Natu 			regs[0] = 0;
42349cc03daSNeel Natu 			regs[1] = 0;
42449cc03daSNeel Natu 			regs[2] = 0;
42549cc03daSNeel Natu 			regs[3] = 0;
42649cc03daSNeel Natu 
42749cc03daSNeel Natu 			/* leaf 0 */
42849cc03daSNeel Natu 			if (*ecx == 0) {
42944a68c4eSJohn Baldwin 				cpuid_count(*eax, *ecx, regs);
43044a68c4eSJohn Baldwin 
43144a68c4eSJohn Baldwin 				/* Only leaf 0 is supported */
43244a68c4eSJohn Baldwin 				regs[0] = 0;
43344a68c4eSJohn Baldwin 
43444a68c4eSJohn Baldwin 				/*
43544a68c4eSJohn Baldwin 				 * Expose known-safe features.
43644a68c4eSJohn Baldwin 				 */
43744a68c4eSJohn Baldwin 				regs[1] &= (CPUID_STDEXT_FSGSBASE |
43844a68c4eSJohn Baldwin 				    CPUID_STDEXT_BMI1 | CPUID_STDEXT_HLE |
43944a68c4eSJohn Baldwin 				    CPUID_STDEXT_AVX2 | CPUID_STDEXT_BMI2 |
44044a68c4eSJohn Baldwin 				    CPUID_STDEXT_ERMS | CPUID_STDEXT_RTM |
44144a68c4eSJohn Baldwin 				    CPUID_STDEXT_AVX512F |
442fce2d624SConrad Meyer 				    CPUID_STDEXT_RDSEED |
44344a68c4eSJohn Baldwin 				    CPUID_STDEXT_AVX512PF |
44444a68c4eSJohn Baldwin 				    CPUID_STDEXT_AVX512ER |
44515b7da10SConrad Meyer 				    CPUID_STDEXT_AVX512CD | CPUID_STDEXT_SHA);
44644a68c4eSJohn Baldwin 				regs[2] = 0;
447e519cee3SJohn Baldwin 				regs[3] &= CPUID_STDEXT3_MD_CLEAR;
44844a68c4eSJohn Baldwin 
449*f5f5f1e7SPeter Grehan 				/* Advertise RDPID if it is enabled. */
450*f5f5f1e7SPeter Grehan 				error = vm_get_capability(vm, vcpu_id,
451*f5f5f1e7SPeter Grehan 				    VM_CAP_RDPID, &enable_rdpid);
452*f5f5f1e7SPeter Grehan 				if (error == 0 && enable_rdpid)
453*f5f5f1e7SPeter Grehan 					regs[2] |= CPUID_STDEXT2_RDPID;
454*f5f5f1e7SPeter Grehan 
45544a68c4eSJohn Baldwin 				/* Advertise INVPCID if it is enabled. */
45649cc03daSNeel Natu 				error = vm_get_capability(vm, vcpu_id,
45749cc03daSNeel Natu 				    VM_CAP_ENABLE_INVPCID, &enable_invpcid);
45849cc03daSNeel Natu 				if (error == 0 && enable_invpcid)
45949cc03daSNeel Natu 					regs[1] |= CPUID_STDEXT_INVPCID;
46049cc03daSNeel Natu 			}
46149cc03daSNeel Natu 			break;
46249cc03daSNeel Natu 
46349cc03daSNeel Natu 		case CPUID_0000_0006:
464c077e628SAlexander Motin 			regs[0] = CPUTPM1_ARAT;
465c077e628SAlexander Motin 			regs[1] = 0;
466c077e628SAlexander Motin 			regs[2] = 0;
467c077e628SAlexander Motin 			regs[3] = 0;
468c077e628SAlexander Motin 			break;
469c077e628SAlexander Motin 
470560d5edaSPeter Grehan 		case CPUID_0000_000A:
4711f3025e1SPeter Grehan 			/*
4721f3025e1SPeter Grehan 			 * Handle the access, but report 0 for
4731f3025e1SPeter Grehan 			 * all options
4741f3025e1SPeter Grehan 			 */
4751f3025e1SPeter Grehan 			regs[0] = 0;
4761f3025e1SPeter Grehan 			regs[1] = 0;
4771f3025e1SPeter Grehan 			regs[2] = 0;
4781f3025e1SPeter Grehan 			regs[3] = 0;
4791f3025e1SPeter Grehan 			break;
4801f3025e1SPeter Grehan 
481366f6083SPeter Grehan 		case CPUID_0000_000B:
482366f6083SPeter Grehan 			/*
48315b7da10SConrad Meyer 			 * Intel processor topology enumeration
484366f6083SPeter Grehan 			 */
48515b7da10SConrad Meyer 			if (vmm_is_intel()) {
48601d822d3SRodney W. Grimes 				vm_get_topology(vm, &sockets, &cores, &threads,
48701d822d3SRodney W. Grimes 				    &maxcpus);
4888bd3845dSNeel Natu 				if (*ecx == 0) {
48901d822d3SRodney W. Grimes 					logical_cpus = threads;
4908bd3845dSNeel Natu 					width = log2(logical_cpus);
4918bd3845dSNeel Natu 					level = CPUID_TYPE_SMT;
4928bd3845dSNeel Natu 					x2apic_id = vcpu_id;
4938bd3845dSNeel Natu 				}
4948bd3845dSNeel Natu 
4958bd3845dSNeel Natu 				if (*ecx == 1) {
49601d822d3SRodney W. Grimes 					logical_cpus = threads * cores;
4978bd3845dSNeel Natu 					width = log2(logical_cpus);
4988bd3845dSNeel Natu 					level = CPUID_TYPE_CORE;
4998bd3845dSNeel Natu 					x2apic_id = vcpu_id;
5008bd3845dSNeel Natu 				}
5018bd3845dSNeel Natu 
5028bd3845dSNeel Natu 				if (!cpuid_leaf_b || *ecx >= 2) {
5038bd3845dSNeel Natu 					width = 0;
5048bd3845dSNeel Natu 					logical_cpus = 0;
5058bd3845dSNeel Natu 					level = 0;
5068bd3845dSNeel Natu 					x2apic_id = 0;
5078bd3845dSNeel Natu 				}
5088bd3845dSNeel Natu 
5098bd3845dSNeel Natu 				regs[0] = width & 0x1f;
5108bd3845dSNeel Natu 				regs[1] = logical_cpus & 0xffff;
5118bd3845dSNeel Natu 				regs[2] = (level << 8) | (*ecx & 0xff);
5128bd3845dSNeel Natu 				regs[3] = x2apic_id;
51315b7da10SConrad Meyer 			} else {
51415b7da10SConrad Meyer 				regs[0] = 0;
51515b7da10SConrad Meyer 				regs[1] = 0;
51615b7da10SConrad Meyer 				regs[2] = 0;
51715b7da10SConrad Meyer 				regs[3] = 0;
51815b7da10SConrad Meyer 			}
519366f6083SPeter Grehan 			break;
520366f6083SPeter Grehan 
521abb023fbSJohn Baldwin 		case CPUID_0000_000D:
522abb023fbSJohn Baldwin 			limits = vmm_get_xsave_limits();
523abb023fbSJohn Baldwin 			if (!limits->xsave_enabled) {
524abb023fbSJohn Baldwin 				regs[0] = 0;
525abb023fbSJohn Baldwin 				regs[1] = 0;
526abb023fbSJohn Baldwin 				regs[2] = 0;
527abb023fbSJohn Baldwin 				regs[3] = 0;
528abb023fbSJohn Baldwin 				break;
529abb023fbSJohn Baldwin 			}
530abb023fbSJohn Baldwin 
531abb023fbSJohn Baldwin 			cpuid_count(*eax, *ecx, regs);
532abb023fbSJohn Baldwin 			switch (*ecx) {
533abb023fbSJohn Baldwin 			case 0:
534abb023fbSJohn Baldwin 				/*
535abb023fbSJohn Baldwin 				 * Only permit the guest to use bits
536abb023fbSJohn Baldwin 				 * that are active in the host in
537abb023fbSJohn Baldwin 				 * %xcr0.  Also, claim that the
538abb023fbSJohn Baldwin 				 * maximum save area size is
539abb023fbSJohn Baldwin 				 * equivalent to the host's current
540abb023fbSJohn Baldwin 				 * save area size.  Since this runs
541abb023fbSJohn Baldwin 				 * "inside" of vmrun(), it runs with
542abb023fbSJohn Baldwin 				 * the guest's xcr0, so the current
543abb023fbSJohn Baldwin 				 * save area size is correct as-is.
544abb023fbSJohn Baldwin 				 */
545abb023fbSJohn Baldwin 				regs[0] &= limits->xcr0_allowed;
546abb023fbSJohn Baldwin 				regs[2] = limits->xsave_max_size;
547abb023fbSJohn Baldwin 				regs[3] &= (limits->xcr0_allowed >> 32);
548abb023fbSJohn Baldwin 				break;
549abb023fbSJohn Baldwin 			case 1:
550abb023fbSJohn Baldwin 				/* Only permit XSAVEOPT. */
551abb023fbSJohn Baldwin 				regs[0] &= CPUID_EXTSTATE_XSAVEOPT;
552abb023fbSJohn Baldwin 				regs[1] = 0;
553abb023fbSJohn Baldwin 				regs[2] = 0;
554abb023fbSJohn Baldwin 				regs[3] = 0;
555abb023fbSJohn Baldwin 				break;
556abb023fbSJohn Baldwin 			default:
557abb023fbSJohn Baldwin 				/*
558abb023fbSJohn Baldwin 				 * If the leaf is for a permitted feature,
559abb023fbSJohn Baldwin 				 * pass through as-is, otherwise return
560abb023fbSJohn Baldwin 				 * all zeroes.
561abb023fbSJohn Baldwin 				 */
562abb023fbSJohn Baldwin 				if (!(limits->xcr0_allowed & (1ul << *ecx))) {
563abb023fbSJohn Baldwin 					regs[0] = 0;
564abb023fbSJohn Baldwin 					regs[1] = 0;
565abb023fbSJohn Baldwin 					regs[2] = 0;
566abb023fbSJohn Baldwin 					regs[3] = 0;
567abb023fbSJohn Baldwin 				}
568abb023fbSJohn Baldwin 				break;
569abb023fbSJohn Baldwin 			}
570abb023fbSJohn Baldwin 			break;
571abb023fbSJohn Baldwin 
572ec048c75SPeter Grehan 		case CPUID_0000_0015:
573ec048c75SPeter Grehan 			/*
574ec048c75SPeter Grehan 			 * Don't report CPU TSC/Crystal ratio and clock
575ec048c75SPeter Grehan 			 * values since guests may use these to derive the
576ec048c75SPeter Grehan 			 * local APIC frequency..
577ec048c75SPeter Grehan 			 */
578ec048c75SPeter Grehan 			regs[0] = 0;
579ec048c75SPeter Grehan 			regs[1] = 0;
580ec048c75SPeter Grehan 			regs[2] = 0;
581ec048c75SPeter Grehan 			regs[3] = 0;
582ec048c75SPeter Grehan 			break;
583ec048c75SPeter Grehan 
5848b287612SJohn Baldwin 		case 0x40000000:
5858b287612SJohn Baldwin 			regs[0] = CPUID_VM_HIGH;
5868b287612SJohn Baldwin 			bcopy(bhyve_id, &regs[1], 4);
587560d5edaSPeter Grehan 			bcopy(bhyve_id + 4, &regs[2], 4);
588560d5edaSPeter Grehan 			bcopy(bhyve_id + 8, &regs[3], 4);
5898b287612SJohn Baldwin 			break;
590560d5edaSPeter Grehan 
591366f6083SPeter Grehan 		default:
59215b7da10SConrad Meyer default_leaf:
593560d5edaSPeter Grehan 			/*
594560d5edaSPeter Grehan 			 * The leaf value has already been clamped so
595560d5edaSPeter Grehan 			 * simply pass this through, keeping count of
596560d5edaSPeter Grehan 			 * how many unhandled leaf values have been seen.
597560d5edaSPeter Grehan 			 */
598560d5edaSPeter Grehan 			atomic_add_long(&bhyve_xcpuids, 1);
599560d5edaSPeter Grehan 			cpuid_count(*eax, *ecx, regs);
600560d5edaSPeter Grehan 			break;
601366f6083SPeter Grehan 	}
602366f6083SPeter Grehan 
603366f6083SPeter Grehan 	*eax = regs[0];
604366f6083SPeter Grehan 	*ebx = regs[1];
605366f6083SPeter Grehan 	*ecx = regs[2];
606366f6083SPeter Grehan 	*edx = regs[3];
607560d5edaSPeter Grehan 
608366f6083SPeter Grehan 	return (1);
609366f6083SPeter Grehan }
610ea91ca92SNeel Natu 
611ea91ca92SNeel Natu bool
612ea91ca92SNeel Natu vm_cpuid_capability(struct vm *vm, int vcpuid, enum vm_cpuid_capability cap)
613ea91ca92SNeel Natu {
614ea91ca92SNeel Natu 	bool rv;
615ea91ca92SNeel Natu 
616ea91ca92SNeel Natu 	KASSERT(cap > 0 && cap < VCC_LAST, ("%s: invalid vm_cpu_capability %d",
617ea91ca92SNeel Natu 	    __func__, cap));
618ea91ca92SNeel Natu 
619ea91ca92SNeel Natu 	/*
620ea91ca92SNeel Natu 	 * Simply passthrough the capabilities of the host cpu for now.
621ea91ca92SNeel Natu 	 */
622ea91ca92SNeel Natu 	rv = false;
623ea91ca92SNeel Natu 	switch (cap) {
624ea91ca92SNeel Natu 	case VCC_NO_EXECUTE:
625ea91ca92SNeel Natu 		if (amd_feature & AMDID_NX)
626ea91ca92SNeel Natu 			rv = true;
627ea91ca92SNeel Natu 		break;
628ea91ca92SNeel Natu 	case VCC_FFXSR:
629ea91ca92SNeel Natu 		if (amd_feature & AMDID_FFXSR)
630ea91ca92SNeel Natu 			rv = true;
631ea91ca92SNeel Natu 		break;
632ea91ca92SNeel Natu 	case VCC_TCE:
633ea91ca92SNeel Natu 		if (amd_feature2 & AMDID2_TCE)
634ea91ca92SNeel Natu 			rv = true;
635ea91ca92SNeel Natu 		break;
636ea91ca92SNeel Natu 	default:
637ea91ca92SNeel Natu 		panic("%s: unknown vm_cpu_capability %d", __func__, cap);
638ea91ca92SNeel Natu 	}
639ea91ca92SNeel Natu 	return (rv);
640ea91ca92SNeel Natu }
641