xref: /freebsd/sys/amd64/vmm/x86.c (revision c49761dd572ca667babda4253e14498c7161e21a)
1366f6083SPeter Grehan /*-
2*c49761ddSPedro F. Giffuni  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3*c49761ddSPedro F. Giffuni  *
4366f6083SPeter Grehan  * Copyright (c) 2011 NetApp, Inc.
5366f6083SPeter Grehan  * All rights reserved.
6366f6083SPeter Grehan  *
7366f6083SPeter Grehan  * Redistribution and use in source and binary forms, with or without
8366f6083SPeter Grehan  * modification, are permitted provided that the following conditions
9366f6083SPeter Grehan  * are met:
10366f6083SPeter Grehan  * 1. Redistributions of source code must retain the above copyright
11366f6083SPeter Grehan  *    notice, this list of conditions and the following disclaimer.
12366f6083SPeter Grehan  * 2. Redistributions in binary form must reproduce the above copyright
13366f6083SPeter Grehan  *    notice, this list of conditions and the following disclaimer in the
14366f6083SPeter Grehan  *    documentation and/or other materials provided with the distribution.
15366f6083SPeter Grehan  *
16366f6083SPeter Grehan  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17366f6083SPeter Grehan  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18366f6083SPeter Grehan  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19366f6083SPeter Grehan  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20366f6083SPeter Grehan  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21366f6083SPeter Grehan  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22366f6083SPeter Grehan  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23366f6083SPeter Grehan  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24366f6083SPeter Grehan  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25366f6083SPeter Grehan  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26366f6083SPeter Grehan  * SUCH DAMAGE.
27366f6083SPeter Grehan  *
28366f6083SPeter Grehan  * $FreeBSD$
29366f6083SPeter Grehan  */
30366f6083SPeter Grehan 
31366f6083SPeter Grehan #include <sys/cdefs.h>
32366f6083SPeter Grehan __FBSDID("$FreeBSD$");
33366f6083SPeter Grehan 
34a2da7af6SNeel Natu #include <sys/param.h>
35abb023fbSJohn Baldwin #include <sys/pcpu.h>
368b287612SJohn Baldwin #include <sys/systm.h>
378bd3845dSNeel Natu #include <sys/sysctl.h>
38366f6083SPeter Grehan 
391472b87fSNeel Natu #include <machine/clock.h>
40366f6083SPeter Grehan #include <machine/cpufunc.h>
418b287612SJohn Baldwin #include <machine/md_var.h>
42abb023fbSJohn Baldwin #include <machine/segments.h>
43366f6083SPeter Grehan #include <machine/specialreg.h>
44366f6083SPeter Grehan 
45a2da7af6SNeel Natu #include <machine/vmm.h>
46a2da7af6SNeel Natu 
47abb023fbSJohn Baldwin #include "vmm_host.h"
485a1f0b36SNeel Natu #include "vmm_ktr.h"
495a1f0b36SNeel Natu #include "vmm_util.h"
50366f6083SPeter Grehan #include "x86.h"
51366f6083SPeter Grehan 
528bd3845dSNeel Natu SYSCTL_DECL(_hw_vmm);
538bd3845dSNeel Natu static SYSCTL_NODE(_hw_vmm, OID_AUTO, topology, CTLFLAG_RD, 0, NULL);
548bd3845dSNeel Natu 
558b287612SJohn Baldwin #define	CPUID_VM_HIGH		0x40000000
568b287612SJohn Baldwin 
57560d5edaSPeter Grehan static const char bhyve_id[12] = "bhyve bhyve ";
58560d5edaSPeter Grehan 
59560d5edaSPeter Grehan static uint64_t bhyve_xcpuids;
605a1f0b36SNeel Natu SYSCTL_ULONG(_hw_vmm, OID_AUTO, bhyve_xcpuids, CTLFLAG_RW, &bhyve_xcpuids, 0,
615a1f0b36SNeel Natu     "Number of times an unknown cpuid leaf was accessed");
628b287612SJohn Baldwin 
638bd3845dSNeel Natu /*
648bd3845dSNeel Natu  * The default CPU topology is a single thread per package.
658bd3845dSNeel Natu  */
668bd3845dSNeel Natu static u_int threads_per_core = 1;
678bd3845dSNeel Natu SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, threads_per_core, CTLFLAG_RDTUN,
688bd3845dSNeel Natu     &threads_per_core, 0, NULL);
698bd3845dSNeel Natu 
708bd3845dSNeel Natu static u_int cores_per_package = 1;
718bd3845dSNeel Natu SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, cores_per_package, CTLFLAG_RDTUN,
728bd3845dSNeel Natu     &cores_per_package, 0, NULL);
738bd3845dSNeel Natu 
748bd3845dSNeel Natu static int cpuid_leaf_b = 1;
758bd3845dSNeel Natu SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN,
768bd3845dSNeel Natu     &cpuid_leaf_b, 0, NULL);
778bd3845dSNeel Natu 
788bd3845dSNeel Natu /*
798bd3845dSNeel Natu  * Round up to the next power of two, if necessary, and then take log2.
808bd3845dSNeel Natu  * Returns -1 if argument is zero.
818bd3845dSNeel Natu  */
828bd3845dSNeel Natu static __inline int
838bd3845dSNeel Natu log2(u_int x)
848bd3845dSNeel Natu {
858bd3845dSNeel Natu 
868bd3845dSNeel Natu 	return (fls(x << (1 - powerof2(x))) - 1);
878bd3845dSNeel Natu }
888bd3845dSNeel Natu 
89366f6083SPeter Grehan int
90a2da7af6SNeel Natu x86_emulate_cpuid(struct vm *vm, int vcpu_id,
91a2da7af6SNeel Natu 		  uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
92366f6083SPeter Grehan {
93abb023fbSJohn Baldwin 	const struct xsave_limits *limits;
94abb023fbSJohn Baldwin 	uint64_t cr4;
958bd3845dSNeel Natu 	int error, enable_invpcid, level, width, x2apic_id;
968bd3845dSNeel Natu 	unsigned int func, regs[4], logical_cpus;
97a2da7af6SNeel Natu 	enum x2apic_state x2apic_state;
98366f6083SPeter Grehan 
995a1f0b36SNeel Natu 	VCPU_CTR2(vm, vcpu_id, "cpuid %#x,%#x", *eax, *ecx);
1005a1f0b36SNeel Natu 
1018b287612SJohn Baldwin 	/*
1028b287612SJohn Baldwin 	 * Requests for invalid CPUID levels should map to the highest
1038b287612SJohn Baldwin 	 * available level instead.
1048b287612SJohn Baldwin 	 */
1058b287612SJohn Baldwin 	if (cpu_exthigh != 0 && *eax >= 0x80000000) {
1068b287612SJohn Baldwin 		if (*eax > cpu_exthigh)
1078b287612SJohn Baldwin 			*eax = cpu_exthigh;
1088b287612SJohn Baldwin 	} else if (*eax >= 0x40000000) {
1098b287612SJohn Baldwin 		if (*eax > CPUID_VM_HIGH)
1108b287612SJohn Baldwin 			*eax = CPUID_VM_HIGH;
1118b287612SJohn Baldwin 	} else if (*eax > cpu_high) {
1128b287612SJohn Baldwin 		*eax = cpu_high;
1138b287612SJohn Baldwin 	}
114366f6083SPeter Grehan 
11525448de2SNeel Natu 	func = *eax;
11625448de2SNeel Natu 
1178b287612SJohn Baldwin 	/*
1188b287612SJohn Baldwin 	 * In general the approach used for CPU topology is to
1198b287612SJohn Baldwin 	 * advertise a flat topology where all CPUs are packages with
1208b287612SJohn Baldwin 	 * no multi-core or SMT.
1218b287612SJohn Baldwin 	 */
122366f6083SPeter Grehan 	switch (func) {
123560d5edaSPeter Grehan 		/*
124560d5edaSPeter Grehan 		 * Pass these through to the guest
125560d5edaSPeter Grehan 		 */
126366f6083SPeter Grehan 		case CPUID_0000_0000:
127366f6083SPeter Grehan 		case CPUID_0000_0002:
128366f6083SPeter Grehan 		case CPUID_0000_0003:
129366f6083SPeter Grehan 		case CPUID_8000_0000:
130366f6083SPeter Grehan 		case CPUID_8000_0002:
131366f6083SPeter Grehan 		case CPUID_8000_0003:
132366f6083SPeter Grehan 		case CPUID_8000_0004:
133366f6083SPeter Grehan 		case CPUID_8000_0006:
1345a1f0b36SNeel Natu 			cpuid_count(*eax, *ecx, regs);
1355a1f0b36SNeel Natu 			break;
136366f6083SPeter Grehan 		case CPUID_8000_0008:
1378b287612SJohn Baldwin 			cpuid_count(*eax, *ecx, regs);
1385a1f0b36SNeel Natu 			if (vmm_is_amd()) {
1395a1f0b36SNeel Natu 				/*
1405a1f0b36SNeel Natu 				 * XXX this might appear silly because AMD
1415a1f0b36SNeel Natu 				 * cpus don't have threads.
1425a1f0b36SNeel Natu 				 *
1435a1f0b36SNeel Natu 				 * However this matches the logical cpus as
1445a1f0b36SNeel Natu 				 * advertised by leaf 0x1 and will work even
1455a1f0b36SNeel Natu 				 * if the 'threads_per_core' tunable is set
1465a1f0b36SNeel Natu 				 * incorrectly on an AMD host.
1475a1f0b36SNeel Natu 				 */
1485a1f0b36SNeel Natu 				logical_cpus = threads_per_core *
1495a1f0b36SNeel Natu 				    cores_per_package;
1505a1f0b36SNeel Natu 				regs[2] = logical_cpus - 1;
1515a1f0b36SNeel Natu 			}
152366f6083SPeter Grehan 			break;
153366f6083SPeter Grehan 
154560d5edaSPeter Grehan 		case CPUID_8000_0001:
15506053618SNeel Natu 			cpuid_count(*eax, *ecx, regs);
15606053618SNeel Natu 
15706053618SNeel Natu 			/*
1585a1f0b36SNeel Natu 			 * Hide SVM and Topology Extension features from guest.
15906053618SNeel Natu 			 */
1605a1f0b36SNeel Natu 			regs[2] &= ~(AMDID2_SVM | AMDID2_TOPOLOGY);
16106053618SNeel Natu 
162560d5edaSPeter Grehan 			/*
16302904c45SNeel Natu 			 * Don't advertise extended performance counter MSRs
16402904c45SNeel Natu 			 * to the guest.
16502904c45SNeel Natu 			 */
16602904c45SNeel Natu 			regs[2] &= ~AMDID2_PCXC;
16702904c45SNeel Natu 			regs[2] &= ~AMDID2_PNXC;
16802904c45SNeel Natu 			regs[2] &= ~AMDID2_PTSCEL2I;
16902904c45SNeel Natu 
17002904c45SNeel Natu 			/*
1712688a818SNeel Natu 			 * Don't advertise Instruction Based Sampling feature.
1722688a818SNeel Natu 			 */
1732688a818SNeel Natu 			regs[2] &= ~AMDID2_IBS;
1742688a818SNeel Natu 
17565d5111aSNeel Natu 			/* NodeID MSR not available */
17665d5111aSNeel Natu 			regs[2] &= ~AMDID2_NODE_ID;
17765d5111aSNeel Natu 
178592cd7d3SNeel Natu 			/* Don't advertise the OS visible workaround feature */
179592cd7d3SNeel Natu 			regs[2] &= ~AMDID2_OSVW;
180592cd7d3SNeel Natu 
1813da44302SPeter Grehan 			/* Hide mwaitx/monitorx capability from the guest */
1823da44302SPeter Grehan 			regs[2] &= ~AMDID2_MWAITX;
1833da44302SPeter Grehan 
1842688a818SNeel Natu 			/*
185560d5edaSPeter Grehan 			 * Hide rdtscp/ia32_tsc_aux until we know how
186560d5edaSPeter Grehan 			 * to deal with them.
187560d5edaSPeter Grehan 			 */
188560d5edaSPeter Grehan 			regs[3] &= ~AMDID_RDTSCP;
189560d5edaSPeter Grehan 			break;
190560d5edaSPeter Grehan 
1911472b87fSNeel Natu 		case CPUID_8000_0007:
1921472b87fSNeel Natu 			/*
193592cd7d3SNeel Natu 			 * AMD uses this leaf to advertise the processor's
194592cd7d3SNeel Natu 			 * power monitoring and RAS capabilities. These
195592cd7d3SNeel Natu 			 * features are hardware-specific and exposing
196592cd7d3SNeel Natu 			 * them to a guest doesn't make a lot of sense.
197592cd7d3SNeel Natu 			 *
198592cd7d3SNeel Natu 			 * Intel uses this leaf only to advertise the
199592cd7d3SNeel Natu 			 * "Invariant TSC" feature with all other bits
200592cd7d3SNeel Natu 			 * being reserved (set to zero).
201592cd7d3SNeel Natu 			 */
202592cd7d3SNeel Natu 			regs[0] = 0;
203592cd7d3SNeel Natu 			regs[1] = 0;
204592cd7d3SNeel Natu 			regs[2] = 0;
205592cd7d3SNeel Natu 			regs[3] = 0;
206592cd7d3SNeel Natu 
207592cd7d3SNeel Natu 			/*
208592cd7d3SNeel Natu 			 * "Invariant TSC" can be advertised to the guest if:
209592cd7d3SNeel Natu 			 * - host TSC frequency is invariant
210592cd7d3SNeel Natu 			 * - host TSCs are synchronized across physical cpus
2111472b87fSNeel Natu 			 *
2121472b87fSNeel Natu 			 * XXX This still falls short because the vcpu
2131472b87fSNeel Natu 			 * can observe the TSC moving backwards as it
2141472b87fSNeel Natu 			 * migrates across physical cpus. But at least
2151472b87fSNeel Natu 			 * it should discourage the guest from using the
2161472b87fSNeel Natu 			 * TSC to keep track of time.
2171472b87fSNeel Natu 			 */
218592cd7d3SNeel Natu 			if (tsc_is_invariant && smp_tsc)
219592cd7d3SNeel Natu 				regs[3] |= AMDPM_TSC_INVARIANT;
2201472b87fSNeel Natu 			break;
2211472b87fSNeel Natu 
222366f6083SPeter Grehan 		case CPUID_0000_0001:
2238b287612SJohn Baldwin 			do_cpuid(1, regs);
2248b287612SJohn Baldwin 
225a2da7af6SNeel Natu 			error = vm_get_x2apic_state(vm, vcpu_id, &x2apic_state);
226a2da7af6SNeel Natu 			if (error) {
227a2da7af6SNeel Natu 				panic("x86_emulate_cpuid: error %d "
228a2da7af6SNeel Natu 				      "fetching x2apic state", error);
229a2da7af6SNeel Natu 			}
230a2da7af6SNeel Natu 
231366f6083SPeter Grehan 			/*
232366f6083SPeter Grehan 			 * Override the APIC ID only in ebx
233366f6083SPeter Grehan 			 */
2348b287612SJohn Baldwin 			regs[1] &= ~(CPUID_LOCAL_APIC_ID);
2358b287612SJohn Baldwin 			regs[1] |= (vcpu_id << CPUID_0000_0001_APICID_SHIFT);
236366f6083SPeter Grehan 
237366f6083SPeter Grehan 			/*
23831708084SNeel Natu 			 * Don't expose VMX, SpeedStep, TME or SMX capability.
2398b287612SJohn Baldwin 			 * Advertise x2APIC capability and Hypervisor guest.
240366f6083SPeter Grehan 			 */
2418b287612SJohn Baldwin 			regs[2] &= ~(CPUID2_VMX | CPUID2_EST | CPUID2_TM2);
24231708084SNeel Natu 			regs[2] &= ~(CPUID2_SMX);
243a2da7af6SNeel Natu 
244a2da7af6SNeel Natu 			regs[2] |= CPUID2_HV;
245a2da7af6SNeel Natu 
246a2da7af6SNeel Natu 			if (x2apic_state != X2APIC_DISABLED)
247a2da7af6SNeel Natu 				regs[2] |= CPUID2_X2APIC;
24852e5c8a2SNeel Natu 			else
24952e5c8a2SNeel Natu 				regs[2] &= ~CPUID2_X2APIC;
250366f6083SPeter Grehan 
251366f6083SPeter Grehan 			/*
252abb023fbSJohn Baldwin 			 * Only advertise CPUID2_XSAVE in the guest if
253abb023fbSJohn Baldwin 			 * the host is using XSAVE.
254298379f7SPeter Grehan 			 */
255abb023fbSJohn Baldwin 			if (!(regs[2] & CPUID2_OSXSAVE))
256abb023fbSJohn Baldwin 				regs[2] &= ~CPUID2_XSAVE;
257abb023fbSJohn Baldwin 
258abb023fbSJohn Baldwin 			/*
259abb023fbSJohn Baldwin 			 * If CPUID2_XSAVE is being advertised and the
260abb023fbSJohn Baldwin 			 * guest has set CR4_XSAVE, set
261abb023fbSJohn Baldwin 			 * CPUID2_OSXSAVE.
262abb023fbSJohn Baldwin 			 */
263abb023fbSJohn Baldwin 			regs[2] &= ~CPUID2_OSXSAVE;
264abb023fbSJohn Baldwin 			if (regs[2] & CPUID2_XSAVE) {
265abb023fbSJohn Baldwin 				error = vm_get_register(vm, vcpu_id,
266abb023fbSJohn Baldwin 				    VM_REG_GUEST_CR4, &cr4);
267abb023fbSJohn Baldwin 				if (error)
268abb023fbSJohn Baldwin 					panic("x86_emulate_cpuid: error %d "
269abb023fbSJohn Baldwin 					      "fetching %%cr4", error);
270abb023fbSJohn Baldwin 				if (cr4 & CR4_XSAVE)
271abb023fbSJohn Baldwin 					regs[2] |= CPUID2_OSXSAVE;
272abb023fbSJohn Baldwin 			}
273298379f7SPeter Grehan 
274298379f7SPeter Grehan 			/*
275ff6ec151SNeel Natu 			 * Hide monitor/mwait until we know how to deal with
276ff6ec151SNeel Natu 			 * these instructions.
277ff6ec151SNeel Natu 			 */
278ff6ec151SNeel Natu 			regs[2] &= ~CPUID2_MON;
279ff6ec151SNeel Natu 
280ff6ec151SNeel Natu                         /*
281560d5edaSPeter Grehan 			 * Hide the performance and debug features.
282560d5edaSPeter Grehan 			 */
283560d5edaSPeter Grehan 			regs[2] &= ~CPUID2_PDCM;
284560d5edaSPeter Grehan 
285517e21d3SPeter Grehan 			/*
286517e21d3SPeter Grehan 			 * No TSC deadline support in the APIC yet
287517e21d3SPeter Grehan 			 */
288517e21d3SPeter Grehan 			regs[2] &= ~CPUID2_TSCDLT;
289517e21d3SPeter Grehan 
290560d5edaSPeter Grehan 			/*
2911f3025e1SPeter Grehan 			 * Hide thermal monitoring
2921f3025e1SPeter Grehan 			 */
2931f3025e1SPeter Grehan 			regs[3] &= ~(CPUID_ACPI | CPUID_TM);
2941f3025e1SPeter Grehan 
2951f3025e1SPeter Grehan 			/*
296560d5edaSPeter Grehan 			 * Hide the debug store capability.
297560d5edaSPeter Grehan 			 */
298560d5edaSPeter Grehan 			regs[3] &= ~CPUID_DS;
299560d5edaSPeter Grehan 
3001d29bfc1SNeel Natu 			/*
3011d29bfc1SNeel Natu 			 * Advertise the Machine Check and MTRR capability.
3021d29bfc1SNeel Natu 			 *
3031d29bfc1SNeel Natu 			 * Some guest OSes (e.g. Windows) will not boot if
3041d29bfc1SNeel Natu 			 * these features are absent.
3051d29bfc1SNeel Natu 			 */
3061d29bfc1SNeel Natu 			regs[3] |= (CPUID_MCA | CPUID_MCE | CPUID_MTRR);
3071d29bfc1SNeel Natu 
3088bd3845dSNeel Natu 			logical_cpus = threads_per_core * cores_per_package;
3098b287612SJohn Baldwin 			regs[1] &= ~CPUID_HTT_CORES;
3108bd3845dSNeel Natu 			regs[1] |= (logical_cpus & 0xff) << 16;
3118bd3845dSNeel Natu 			regs[3] |= CPUID_HTT;
3128b287612SJohn Baldwin 			break;
3138b287612SJohn Baldwin 
3148b287612SJohn Baldwin 		case CPUID_0000_0004:
315534dc967SNeel Natu 			cpuid_count(*eax, *ecx, regs);
3168b287612SJohn Baldwin 
3178bd3845dSNeel Natu 			if (regs[0] || regs[1] || regs[2] || regs[3]) {
318534dc967SNeel Natu 				regs[0] &= 0x3ff;
3198bd3845dSNeel Natu 				regs[0] |= (cores_per_package - 1) << 26;
3208b287612SJohn Baldwin 				/*
3218bd3845dSNeel Natu 				 * Cache topology:
3228bd3845dSNeel Natu 				 * - L1 and L2 are shared only by the logical
3238bd3845dSNeel Natu 				 *   processors in a single core.
3248bd3845dSNeel Natu 				 * - L3 and above are shared by all logical
3258bd3845dSNeel Natu 				 *   processors in the package.
3268b287612SJohn Baldwin 				 */
3278bd3845dSNeel Natu 				logical_cpus = threads_per_core;
3288bd3845dSNeel Natu 				level = (regs[0] >> 5) & 0x7;
3298bd3845dSNeel Natu 				if (level >= 3)
3308bd3845dSNeel Natu 					logical_cpus *= cores_per_package;
3318bd3845dSNeel Natu 				regs[0] |= (logical_cpus - 1) << 14;
3328bd3845dSNeel Natu 			}
333366f6083SPeter Grehan 			break;
334366f6083SPeter Grehan 
335a0cad470SPeter Grehan 		case CPUID_0000_0007:
33649cc03daSNeel Natu 			regs[0] = 0;
33749cc03daSNeel Natu 			regs[1] = 0;
33849cc03daSNeel Natu 			regs[2] = 0;
33949cc03daSNeel Natu 			regs[3] = 0;
34049cc03daSNeel Natu 
34149cc03daSNeel Natu 			/* leaf 0 */
34249cc03daSNeel Natu 			if (*ecx == 0) {
34344a68c4eSJohn Baldwin 				cpuid_count(*eax, *ecx, regs);
34444a68c4eSJohn Baldwin 
34544a68c4eSJohn Baldwin 				/* Only leaf 0 is supported */
34644a68c4eSJohn Baldwin 				regs[0] = 0;
34744a68c4eSJohn Baldwin 
34844a68c4eSJohn Baldwin 				/*
34944a68c4eSJohn Baldwin 				 * Expose known-safe features.
35044a68c4eSJohn Baldwin 				 */
35144a68c4eSJohn Baldwin 				regs[1] &= (CPUID_STDEXT_FSGSBASE |
35244a68c4eSJohn Baldwin 				    CPUID_STDEXT_BMI1 | CPUID_STDEXT_HLE |
35344a68c4eSJohn Baldwin 				    CPUID_STDEXT_AVX2 | CPUID_STDEXT_BMI2 |
35444a68c4eSJohn Baldwin 				    CPUID_STDEXT_ERMS | CPUID_STDEXT_RTM |
35544a68c4eSJohn Baldwin 				    CPUID_STDEXT_AVX512F |
35644a68c4eSJohn Baldwin 				    CPUID_STDEXT_AVX512PF |
35744a68c4eSJohn Baldwin 				    CPUID_STDEXT_AVX512ER |
35844a68c4eSJohn Baldwin 				    CPUID_STDEXT_AVX512CD);
35944a68c4eSJohn Baldwin 				regs[2] = 0;
36044a68c4eSJohn Baldwin 				regs[3] = 0;
36144a68c4eSJohn Baldwin 
36244a68c4eSJohn Baldwin 				/* Advertise INVPCID if it is enabled. */
36349cc03daSNeel Natu 				error = vm_get_capability(vm, vcpu_id,
36449cc03daSNeel Natu 				    VM_CAP_ENABLE_INVPCID, &enable_invpcid);
36549cc03daSNeel Natu 				if (error == 0 && enable_invpcid)
36649cc03daSNeel Natu 					regs[1] |= CPUID_STDEXT_INVPCID;
36749cc03daSNeel Natu 			}
36849cc03daSNeel Natu 			break;
36949cc03daSNeel Natu 
37049cc03daSNeel Natu 		case CPUID_0000_0006:
371c077e628SAlexander Motin 			regs[0] = CPUTPM1_ARAT;
372c077e628SAlexander Motin 			regs[1] = 0;
373c077e628SAlexander Motin 			regs[2] = 0;
374c077e628SAlexander Motin 			regs[3] = 0;
375c077e628SAlexander Motin 			break;
376c077e628SAlexander Motin 
377560d5edaSPeter Grehan 		case CPUID_0000_000A:
3781f3025e1SPeter Grehan 			/*
3791f3025e1SPeter Grehan 			 * Handle the access, but report 0 for
3801f3025e1SPeter Grehan 			 * all options
3811f3025e1SPeter Grehan 			 */
3821f3025e1SPeter Grehan 			regs[0] = 0;
3831f3025e1SPeter Grehan 			regs[1] = 0;
3841f3025e1SPeter Grehan 			regs[2] = 0;
3851f3025e1SPeter Grehan 			regs[3] = 0;
3861f3025e1SPeter Grehan 			break;
3871f3025e1SPeter Grehan 
388366f6083SPeter Grehan 		case CPUID_0000_000B:
389366f6083SPeter Grehan 			/*
390366f6083SPeter Grehan 			 * Processor topology enumeration
391366f6083SPeter Grehan 			 */
3928bd3845dSNeel Natu 			if (*ecx == 0) {
3938bd3845dSNeel Natu 				logical_cpus = threads_per_core;
3948bd3845dSNeel Natu 				width = log2(logical_cpus);
3958bd3845dSNeel Natu 				level = CPUID_TYPE_SMT;
3968bd3845dSNeel Natu 				x2apic_id = vcpu_id;
3978bd3845dSNeel Natu 			}
3988bd3845dSNeel Natu 
3998bd3845dSNeel Natu 			if (*ecx == 1) {
4008bd3845dSNeel Natu 				logical_cpus = threads_per_core *
4018bd3845dSNeel Natu 				    cores_per_package;
4028bd3845dSNeel Natu 				width = log2(logical_cpus);
4038bd3845dSNeel Natu 				level = CPUID_TYPE_CORE;
4048bd3845dSNeel Natu 				x2apic_id = vcpu_id;
4058bd3845dSNeel Natu 			}
4068bd3845dSNeel Natu 
4078bd3845dSNeel Natu 			if (!cpuid_leaf_b || *ecx >= 2) {
4088bd3845dSNeel Natu 				width = 0;
4098bd3845dSNeel Natu 				logical_cpus = 0;
4108bd3845dSNeel Natu 				level = 0;
4118bd3845dSNeel Natu 				x2apic_id = 0;
4128bd3845dSNeel Natu 			}
4138bd3845dSNeel Natu 
4148bd3845dSNeel Natu 			regs[0] = width & 0x1f;
4158bd3845dSNeel Natu 			regs[1] = logical_cpus & 0xffff;
4168bd3845dSNeel Natu 			regs[2] = (level << 8) | (*ecx & 0xff);
4178bd3845dSNeel Natu 			regs[3] = x2apic_id;
418366f6083SPeter Grehan 			break;
419366f6083SPeter Grehan 
420abb023fbSJohn Baldwin 		case CPUID_0000_000D:
421abb023fbSJohn Baldwin 			limits = vmm_get_xsave_limits();
422abb023fbSJohn Baldwin 			if (!limits->xsave_enabled) {
423abb023fbSJohn Baldwin 				regs[0] = 0;
424abb023fbSJohn Baldwin 				regs[1] = 0;
425abb023fbSJohn Baldwin 				regs[2] = 0;
426abb023fbSJohn Baldwin 				regs[3] = 0;
427abb023fbSJohn Baldwin 				break;
428abb023fbSJohn Baldwin 			}
429abb023fbSJohn Baldwin 
430abb023fbSJohn Baldwin 			cpuid_count(*eax, *ecx, regs);
431abb023fbSJohn Baldwin 			switch (*ecx) {
432abb023fbSJohn Baldwin 			case 0:
433abb023fbSJohn Baldwin 				/*
434abb023fbSJohn Baldwin 				 * Only permit the guest to use bits
435abb023fbSJohn Baldwin 				 * that are active in the host in
436abb023fbSJohn Baldwin 				 * %xcr0.  Also, claim that the
437abb023fbSJohn Baldwin 				 * maximum save area size is
438abb023fbSJohn Baldwin 				 * equivalent to the host's current
439abb023fbSJohn Baldwin 				 * save area size.  Since this runs
440abb023fbSJohn Baldwin 				 * "inside" of vmrun(), it runs with
441abb023fbSJohn Baldwin 				 * the guest's xcr0, so the current
442abb023fbSJohn Baldwin 				 * save area size is correct as-is.
443abb023fbSJohn Baldwin 				 */
444abb023fbSJohn Baldwin 				regs[0] &= limits->xcr0_allowed;
445abb023fbSJohn Baldwin 				regs[2] = limits->xsave_max_size;
446abb023fbSJohn Baldwin 				regs[3] &= (limits->xcr0_allowed >> 32);
447abb023fbSJohn Baldwin 				break;
448abb023fbSJohn Baldwin 			case 1:
449abb023fbSJohn Baldwin 				/* Only permit XSAVEOPT. */
450abb023fbSJohn Baldwin 				regs[0] &= CPUID_EXTSTATE_XSAVEOPT;
451abb023fbSJohn Baldwin 				regs[1] = 0;
452abb023fbSJohn Baldwin 				regs[2] = 0;
453abb023fbSJohn Baldwin 				regs[3] = 0;
454abb023fbSJohn Baldwin 				break;
455abb023fbSJohn Baldwin 			default:
456abb023fbSJohn Baldwin 				/*
457abb023fbSJohn Baldwin 				 * If the leaf is for a permitted feature,
458abb023fbSJohn Baldwin 				 * pass through as-is, otherwise return
459abb023fbSJohn Baldwin 				 * all zeroes.
460abb023fbSJohn Baldwin 				 */
461abb023fbSJohn Baldwin 				if (!(limits->xcr0_allowed & (1ul << *ecx))) {
462abb023fbSJohn Baldwin 					regs[0] = 0;
463abb023fbSJohn Baldwin 					regs[1] = 0;
464abb023fbSJohn Baldwin 					regs[2] = 0;
465abb023fbSJohn Baldwin 					regs[3] = 0;
466abb023fbSJohn Baldwin 				}
467abb023fbSJohn Baldwin 				break;
468abb023fbSJohn Baldwin 			}
469abb023fbSJohn Baldwin 			break;
470abb023fbSJohn Baldwin 
4718b287612SJohn Baldwin 		case 0x40000000:
4728b287612SJohn Baldwin 			regs[0] = CPUID_VM_HIGH;
4738b287612SJohn Baldwin 			bcopy(bhyve_id, &regs[1], 4);
474560d5edaSPeter Grehan 			bcopy(bhyve_id + 4, &regs[2], 4);
475560d5edaSPeter Grehan 			bcopy(bhyve_id + 8, &regs[3], 4);
4768b287612SJohn Baldwin 			break;
477560d5edaSPeter Grehan 
478366f6083SPeter Grehan 		default:
479560d5edaSPeter Grehan 			/*
480560d5edaSPeter Grehan 			 * The leaf value has already been clamped so
481560d5edaSPeter Grehan 			 * simply pass this through, keeping count of
482560d5edaSPeter Grehan 			 * how many unhandled leaf values have been seen.
483560d5edaSPeter Grehan 			 */
484560d5edaSPeter Grehan 			atomic_add_long(&bhyve_xcpuids, 1);
485560d5edaSPeter Grehan 			cpuid_count(*eax, *ecx, regs);
486560d5edaSPeter Grehan 			break;
487366f6083SPeter Grehan 	}
488366f6083SPeter Grehan 
489366f6083SPeter Grehan 	*eax = regs[0];
490366f6083SPeter Grehan 	*ebx = regs[1];
491366f6083SPeter Grehan 	*ecx = regs[2];
492366f6083SPeter Grehan 	*edx = regs[3];
493560d5edaSPeter Grehan 
494366f6083SPeter Grehan 	return (1);
495366f6083SPeter Grehan }
496ea91ca92SNeel Natu 
497ea91ca92SNeel Natu bool
498ea91ca92SNeel Natu vm_cpuid_capability(struct vm *vm, int vcpuid, enum vm_cpuid_capability cap)
499ea91ca92SNeel Natu {
500ea91ca92SNeel Natu 	bool rv;
501ea91ca92SNeel Natu 
502ea91ca92SNeel Natu 	KASSERT(cap > 0 && cap < VCC_LAST, ("%s: invalid vm_cpu_capability %d",
503ea91ca92SNeel Natu 	    __func__, cap));
504ea91ca92SNeel Natu 
505ea91ca92SNeel Natu 	/*
506ea91ca92SNeel Natu 	 * Simply passthrough the capabilities of the host cpu for now.
507ea91ca92SNeel Natu 	 */
508ea91ca92SNeel Natu 	rv = false;
509ea91ca92SNeel Natu 	switch (cap) {
510ea91ca92SNeel Natu 	case VCC_NO_EXECUTE:
511ea91ca92SNeel Natu 		if (amd_feature & AMDID_NX)
512ea91ca92SNeel Natu 			rv = true;
513ea91ca92SNeel Natu 		break;
514ea91ca92SNeel Natu 	case VCC_FFXSR:
515ea91ca92SNeel Natu 		if (amd_feature & AMDID_FFXSR)
516ea91ca92SNeel Natu 			rv = true;
517ea91ca92SNeel Natu 		break;
518ea91ca92SNeel Natu 	case VCC_TCE:
519ea91ca92SNeel Natu 		if (amd_feature2 & AMDID2_TCE)
520ea91ca92SNeel Natu 			rv = true;
521ea91ca92SNeel Natu 		break;
522ea91ca92SNeel Natu 	default:
523ea91ca92SNeel Natu 		panic("%s: unknown vm_cpu_capability %d", __func__, cap);
524ea91ca92SNeel Natu 	}
525ea91ca92SNeel Natu 	return (rv);
526ea91ca92SNeel Natu }
527