xref: /freebsd/sys/amd64/vmm/x86.c (revision 15b7da10ac76a08b8be5e9329a23e240a4865ec8)
1366f6083SPeter Grehan /*-
2c49761ddSPedro F. Giffuni  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3c49761ddSPedro F. Giffuni  *
4366f6083SPeter Grehan  * Copyright (c) 2011 NetApp, Inc.
5366f6083SPeter Grehan  * All rights reserved.
6366f6083SPeter Grehan  *
7366f6083SPeter Grehan  * Redistribution and use in source and binary forms, with or without
8366f6083SPeter Grehan  * modification, are permitted provided that the following conditions
9366f6083SPeter Grehan  * are met:
10366f6083SPeter Grehan  * 1. Redistributions of source code must retain the above copyright
11366f6083SPeter Grehan  *    notice, this list of conditions and the following disclaimer.
12366f6083SPeter Grehan  * 2. Redistributions in binary form must reproduce the above copyright
13366f6083SPeter Grehan  *    notice, this list of conditions and the following disclaimer in the
14366f6083SPeter Grehan  *    documentation and/or other materials provided with the distribution.
15366f6083SPeter Grehan  *
16366f6083SPeter Grehan  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17366f6083SPeter Grehan  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18366f6083SPeter Grehan  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19366f6083SPeter Grehan  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20366f6083SPeter Grehan  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21366f6083SPeter Grehan  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22366f6083SPeter Grehan  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23366f6083SPeter Grehan  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24366f6083SPeter Grehan  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25366f6083SPeter Grehan  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26366f6083SPeter Grehan  * SUCH DAMAGE.
27366f6083SPeter Grehan  *
28366f6083SPeter Grehan  * $FreeBSD$
29366f6083SPeter Grehan  */
30366f6083SPeter Grehan 
31366f6083SPeter Grehan #include <sys/cdefs.h>
32366f6083SPeter Grehan __FBSDID("$FreeBSD$");
33366f6083SPeter Grehan 
34a2da7af6SNeel Natu #include <sys/param.h>
35abb023fbSJohn Baldwin #include <sys/pcpu.h>
368b287612SJohn Baldwin #include <sys/systm.h>
378bd3845dSNeel Natu #include <sys/sysctl.h>
38366f6083SPeter Grehan 
391472b87fSNeel Natu #include <machine/clock.h>
40366f6083SPeter Grehan #include <machine/cpufunc.h>
418b287612SJohn Baldwin #include <machine/md_var.h>
42abb023fbSJohn Baldwin #include <machine/segments.h>
43366f6083SPeter Grehan #include <machine/specialreg.h>
44366f6083SPeter Grehan 
45a2da7af6SNeel Natu #include <machine/vmm.h>
46a2da7af6SNeel Natu 
47abb023fbSJohn Baldwin #include "vmm_host.h"
485a1f0b36SNeel Natu #include "vmm_ktr.h"
495a1f0b36SNeel Natu #include "vmm_util.h"
50366f6083SPeter Grehan #include "x86.h"
51366f6083SPeter Grehan 
528bd3845dSNeel Natu SYSCTL_DECL(_hw_vmm);
538bd3845dSNeel Natu static SYSCTL_NODE(_hw_vmm, OID_AUTO, topology, CTLFLAG_RD, 0, NULL);
548bd3845dSNeel Natu 
558b287612SJohn Baldwin #define	CPUID_VM_HIGH		0x40000000
568b287612SJohn Baldwin 
57560d5edaSPeter Grehan static const char bhyve_id[12] = "bhyve bhyve ";
58560d5edaSPeter Grehan 
59560d5edaSPeter Grehan static uint64_t bhyve_xcpuids;
605a1f0b36SNeel Natu SYSCTL_ULONG(_hw_vmm, OID_AUTO, bhyve_xcpuids, CTLFLAG_RW, &bhyve_xcpuids, 0,
615a1f0b36SNeel Natu     "Number of times an unknown cpuid leaf was accessed");
628b287612SJohn Baldwin 
6301d822d3SRodney W. Grimes #if __FreeBSD_version < 1200060	/* Remove after 11 EOL helps MFCing */
6401d822d3SRodney W. Grimes extern u_int threads_per_core;
658bd3845dSNeel Natu SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, threads_per_core, CTLFLAG_RDTUN,
668bd3845dSNeel Natu     &threads_per_core, 0, NULL);
678bd3845dSNeel Natu 
6801d822d3SRodney W. Grimes extern u_int cores_per_package;
698bd3845dSNeel Natu SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, cores_per_package, CTLFLAG_RDTUN,
708bd3845dSNeel Natu     &cores_per_package, 0, NULL);
7101d822d3SRodney W. Grimes #endif
728bd3845dSNeel Natu 
738bd3845dSNeel Natu static int cpuid_leaf_b = 1;
748bd3845dSNeel Natu SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN,
758bd3845dSNeel Natu     &cpuid_leaf_b, 0, NULL);
768bd3845dSNeel Natu 
778bd3845dSNeel Natu /*
788bd3845dSNeel Natu  * Round up to the next power of two, if necessary, and then take log2.
798bd3845dSNeel Natu  * Returns -1 if argument is zero.
808bd3845dSNeel Natu  */
818bd3845dSNeel Natu static __inline int
828bd3845dSNeel Natu log2(u_int x)
838bd3845dSNeel Natu {
848bd3845dSNeel Natu 
858bd3845dSNeel Natu 	return (fls(x << (1 - powerof2(x))) - 1);
868bd3845dSNeel Natu }
878bd3845dSNeel Natu 
88366f6083SPeter Grehan int
89a2da7af6SNeel Natu x86_emulate_cpuid(struct vm *vm, int vcpu_id,
90a2da7af6SNeel Natu 		  uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
91366f6083SPeter Grehan {
92abb023fbSJohn Baldwin 	const struct xsave_limits *limits;
93abb023fbSJohn Baldwin 	uint64_t cr4;
948bd3845dSNeel Natu 	int error, enable_invpcid, level, width, x2apic_id;
958bd3845dSNeel Natu 	unsigned int func, regs[4], logical_cpus;
96a2da7af6SNeel Natu 	enum x2apic_state x2apic_state;
9701d822d3SRodney W. Grimes 	uint16_t cores, maxcpus, sockets, threads;
98366f6083SPeter Grehan 
995a1f0b36SNeel Natu 	VCPU_CTR2(vm, vcpu_id, "cpuid %#x,%#x", *eax, *ecx);
1005a1f0b36SNeel Natu 
1018b287612SJohn Baldwin 	/*
1028b287612SJohn Baldwin 	 * Requests for invalid CPUID levels should map to the highest
1038b287612SJohn Baldwin 	 * available level instead.
1048b287612SJohn Baldwin 	 */
1058b287612SJohn Baldwin 	if (cpu_exthigh != 0 && *eax >= 0x80000000) {
1068b287612SJohn Baldwin 		if (*eax > cpu_exthigh)
1078b287612SJohn Baldwin 			*eax = cpu_exthigh;
1088b287612SJohn Baldwin 	} else if (*eax >= 0x40000000) {
1098b287612SJohn Baldwin 		if (*eax > CPUID_VM_HIGH)
1108b287612SJohn Baldwin 			*eax = CPUID_VM_HIGH;
1118b287612SJohn Baldwin 	} else if (*eax > cpu_high) {
1128b287612SJohn Baldwin 		*eax = cpu_high;
1138b287612SJohn Baldwin 	}
114366f6083SPeter Grehan 
11525448de2SNeel Natu 	func = *eax;
11625448de2SNeel Natu 
1178b287612SJohn Baldwin 	/*
1188b287612SJohn Baldwin 	 * In general the approach used for CPU topology is to
1198b287612SJohn Baldwin 	 * advertise a flat topology where all CPUs are packages with
1208b287612SJohn Baldwin 	 * no multi-core or SMT.
1218b287612SJohn Baldwin 	 */
122366f6083SPeter Grehan 	switch (func) {
123560d5edaSPeter Grehan 		/*
124560d5edaSPeter Grehan 		 * Pass these through to the guest
125560d5edaSPeter Grehan 		 */
126366f6083SPeter Grehan 		case CPUID_0000_0000:
127366f6083SPeter Grehan 		case CPUID_0000_0002:
128366f6083SPeter Grehan 		case CPUID_0000_0003:
129366f6083SPeter Grehan 		case CPUID_8000_0000:
130366f6083SPeter Grehan 		case CPUID_8000_0002:
131366f6083SPeter Grehan 		case CPUID_8000_0003:
132366f6083SPeter Grehan 		case CPUID_8000_0004:
133366f6083SPeter Grehan 		case CPUID_8000_0006:
1345a1f0b36SNeel Natu 			cpuid_count(*eax, *ecx, regs);
1355a1f0b36SNeel Natu 			break;
136366f6083SPeter Grehan 		case CPUID_8000_0008:
1378b287612SJohn Baldwin 			cpuid_count(*eax, *ecx, regs);
1385a1f0b36SNeel Natu 			if (vmm_is_amd()) {
13901d822d3SRodney W. Grimes 				vm_get_topology(vm, &sockets, &cores, &threads,
14001d822d3SRodney W. Grimes 				    &maxcpus);
141*15b7da10SConrad Meyer 				/*
142*15b7da10SConrad Meyer 				 * Here, width is ApicIdCoreIdSize, present on
143*15b7da10SConrad Meyer 				 * at least Family 15h and newer.  It
144*15b7da10SConrad Meyer 				 * represents the "number of bits in the
145*15b7da10SConrad Meyer 				 * initial apicid that indicate thread id
146*15b7da10SConrad Meyer 				 * within a package."
147*15b7da10SConrad Meyer 				 *
148*15b7da10SConrad Meyer 				 * Our topo_probe_amd() uses it for
149*15b7da10SConrad Meyer 				 * pkg_id_shift and other OSes may rely on it.
150*15b7da10SConrad Meyer 				 */
151*15b7da10SConrad Meyer 				width = MIN(0xF, log2(threads * cores));
152*15b7da10SConrad Meyer 				if (width < 0x4)
153*15b7da10SConrad Meyer 					width = 0;
154*15b7da10SConrad Meyer 				logical_cpus = MIN(0xFF, threads * cores - 1);
155*15b7da10SConrad Meyer 				regs[2] = (width << AMDID_COREID_SIZE_SHIFT) | logical_cpus;
1565a1f0b36SNeel Natu 			}
157366f6083SPeter Grehan 			break;
158366f6083SPeter Grehan 
159560d5edaSPeter Grehan 		case CPUID_8000_0001:
16006053618SNeel Natu 			cpuid_count(*eax, *ecx, regs);
16106053618SNeel Natu 
16206053618SNeel Natu 			/*
163*15b7da10SConrad Meyer 			 * Hide SVM from guest.
16406053618SNeel Natu 			 */
165*15b7da10SConrad Meyer 			regs[2] &= ~AMDID2_SVM;
16606053618SNeel Natu 
167560d5edaSPeter Grehan 			/*
16802904c45SNeel Natu 			 * Don't advertise extended performance counter MSRs
16902904c45SNeel Natu 			 * to the guest.
17002904c45SNeel Natu 			 */
17102904c45SNeel Natu 			regs[2] &= ~AMDID2_PCXC;
17202904c45SNeel Natu 			regs[2] &= ~AMDID2_PNXC;
17302904c45SNeel Natu 			regs[2] &= ~AMDID2_PTSCEL2I;
17402904c45SNeel Natu 
17502904c45SNeel Natu 			/*
1762688a818SNeel Natu 			 * Don't advertise Instruction Based Sampling feature.
1772688a818SNeel Natu 			 */
1782688a818SNeel Natu 			regs[2] &= ~AMDID2_IBS;
1792688a818SNeel Natu 
18065d5111aSNeel Natu 			/* NodeID MSR not available */
18165d5111aSNeel Natu 			regs[2] &= ~AMDID2_NODE_ID;
18265d5111aSNeel Natu 
183592cd7d3SNeel Natu 			/* Don't advertise the OS visible workaround feature */
184592cd7d3SNeel Natu 			regs[2] &= ~AMDID2_OSVW;
185592cd7d3SNeel Natu 
1863da44302SPeter Grehan 			/* Hide mwaitx/monitorx capability from the guest */
1873da44302SPeter Grehan 			regs[2] &= ~AMDID2_MWAITX;
1883da44302SPeter Grehan 
1892688a818SNeel Natu 			/*
190560d5edaSPeter Grehan 			 * Hide rdtscp/ia32_tsc_aux until we know how
191560d5edaSPeter Grehan 			 * to deal with them.
192560d5edaSPeter Grehan 			 */
193560d5edaSPeter Grehan 			regs[3] &= ~AMDID_RDTSCP;
194560d5edaSPeter Grehan 			break;
195560d5edaSPeter Grehan 
1961472b87fSNeel Natu 		case CPUID_8000_0007:
1971472b87fSNeel Natu 			/*
198592cd7d3SNeel Natu 			 * AMD uses this leaf to advertise the processor's
199592cd7d3SNeel Natu 			 * power monitoring and RAS capabilities. These
200592cd7d3SNeel Natu 			 * features are hardware-specific and exposing
201592cd7d3SNeel Natu 			 * them to a guest doesn't make a lot of sense.
202592cd7d3SNeel Natu 			 *
203592cd7d3SNeel Natu 			 * Intel uses this leaf only to advertise the
204592cd7d3SNeel Natu 			 * "Invariant TSC" feature with all other bits
205592cd7d3SNeel Natu 			 * being reserved (set to zero).
206592cd7d3SNeel Natu 			 */
207592cd7d3SNeel Natu 			regs[0] = 0;
208592cd7d3SNeel Natu 			regs[1] = 0;
209592cd7d3SNeel Natu 			regs[2] = 0;
210592cd7d3SNeel Natu 			regs[3] = 0;
211592cd7d3SNeel Natu 
212592cd7d3SNeel Natu 			/*
213592cd7d3SNeel Natu 			 * "Invariant TSC" can be advertised to the guest if:
214592cd7d3SNeel Natu 			 * - host TSC frequency is invariant
215592cd7d3SNeel Natu 			 * - host TSCs are synchronized across physical cpus
2161472b87fSNeel Natu 			 *
2171472b87fSNeel Natu 			 * XXX This still falls short because the vcpu
2181472b87fSNeel Natu 			 * can observe the TSC moving backwards as it
2191472b87fSNeel Natu 			 * migrates across physical cpus. But at least
2201472b87fSNeel Natu 			 * it should discourage the guest from using the
2211472b87fSNeel Natu 			 * TSC to keep track of time.
2221472b87fSNeel Natu 			 */
223592cd7d3SNeel Natu 			if (tsc_is_invariant && smp_tsc)
224592cd7d3SNeel Natu 				regs[3] |= AMDPM_TSC_INVARIANT;
2251472b87fSNeel Natu 			break;
2261472b87fSNeel Natu 
227*15b7da10SConrad Meyer 		case CPUID_8000_001D:
228*15b7da10SConrad Meyer 			/* AMD Cache topology, like 0000_0004 for Intel. */
229*15b7da10SConrad Meyer 			if (!vmm_is_amd())
230*15b7da10SConrad Meyer 				goto default_leaf;
231*15b7da10SConrad Meyer 
232*15b7da10SConrad Meyer 			/*
233*15b7da10SConrad Meyer 			 * Similar to Intel, generate a ficticious cache
234*15b7da10SConrad Meyer 			 * topology for the guest with L3 shared by the
235*15b7da10SConrad Meyer 			 * package, and L1 and L2 local to a core.
236*15b7da10SConrad Meyer 			 */
237*15b7da10SConrad Meyer 			vm_get_topology(vm, &sockets, &cores, &threads,
238*15b7da10SConrad Meyer 			    &maxcpus);
239*15b7da10SConrad Meyer 			switch (*ecx) {
240*15b7da10SConrad Meyer 			case 0:
241*15b7da10SConrad Meyer 				logical_cpus = threads;
242*15b7da10SConrad Meyer 				level = 1;
243*15b7da10SConrad Meyer 				func = 1;	/* data cache */
244*15b7da10SConrad Meyer 				break;
245*15b7da10SConrad Meyer 			case 1:
246*15b7da10SConrad Meyer 				logical_cpus = threads;
247*15b7da10SConrad Meyer 				level = 2;
248*15b7da10SConrad Meyer 				func = 3;	/* unified cache */
249*15b7da10SConrad Meyer 				break;
250*15b7da10SConrad Meyer 			case 2:
251*15b7da10SConrad Meyer 				logical_cpus = threads * cores;
252*15b7da10SConrad Meyer 				level = 3;
253*15b7da10SConrad Meyer 				func = 3;	/* unified cache */
254*15b7da10SConrad Meyer 				break;
255*15b7da10SConrad Meyer 			default:
256*15b7da10SConrad Meyer 				logical_cpus = 0;
257*15b7da10SConrad Meyer 				level = 0;
258*15b7da10SConrad Meyer 				func = 0;
259*15b7da10SConrad Meyer 				break;
260*15b7da10SConrad Meyer 			}
261*15b7da10SConrad Meyer 
262*15b7da10SConrad Meyer 			logical_cpus = MIN(0xfff, logical_cpus - 1);
263*15b7da10SConrad Meyer 			regs[0] = (logical_cpus << 14) | (1 << 8) |
264*15b7da10SConrad Meyer 			    (level << 5) | func;
265*15b7da10SConrad Meyer 			regs[1] = (func > 0) ? (CACHE_LINE_SIZE - 1) : 0;
266*15b7da10SConrad Meyer 			regs[2] = 0;
267*15b7da10SConrad Meyer 			regs[3] = 0;
268*15b7da10SConrad Meyer 			break;
269*15b7da10SConrad Meyer 
270*15b7da10SConrad Meyer 		case CPUID_8000_001E:
271*15b7da10SConrad Meyer 			/* AMD Family 16h+ additional identifiers */
272*15b7da10SConrad Meyer 			if (!vmm_is_amd() || CPUID_TO_FAMILY(cpu_id) < 0x16)
273*15b7da10SConrad Meyer 				goto default_leaf;
274*15b7da10SConrad Meyer 
275*15b7da10SConrad Meyer 			vm_get_topology(vm, &sockets, &cores, &threads,
276*15b7da10SConrad Meyer 			    &maxcpus);
277*15b7da10SConrad Meyer 			regs[0] = vcpu_id;
278*15b7da10SConrad Meyer 			threads = MIN(0xFF, threads - 1);
279*15b7da10SConrad Meyer 			regs[1] = (threads << 8) |
280*15b7da10SConrad Meyer 			    (vcpu_id >> log2(threads + 1));
281*15b7da10SConrad Meyer 			/*
282*15b7da10SConrad Meyer 			 * XXX Bhyve topology cannot yet represent >1 node per
283*15b7da10SConrad Meyer 			 * processor.
284*15b7da10SConrad Meyer 			 */
285*15b7da10SConrad Meyer 			regs[2] = 0;
286*15b7da10SConrad Meyer 			regs[3] = 0;
287*15b7da10SConrad Meyer 			break;
288*15b7da10SConrad Meyer 
289366f6083SPeter Grehan 		case CPUID_0000_0001:
2908b287612SJohn Baldwin 			do_cpuid(1, regs);
2918b287612SJohn Baldwin 
292a2da7af6SNeel Natu 			error = vm_get_x2apic_state(vm, vcpu_id, &x2apic_state);
293a2da7af6SNeel Natu 			if (error) {
294a2da7af6SNeel Natu 				panic("x86_emulate_cpuid: error %d "
295a2da7af6SNeel Natu 				      "fetching x2apic state", error);
296a2da7af6SNeel Natu 			}
297a2da7af6SNeel Natu 
298366f6083SPeter Grehan 			/*
299366f6083SPeter Grehan 			 * Override the APIC ID only in ebx
300366f6083SPeter Grehan 			 */
3018b287612SJohn Baldwin 			regs[1] &= ~(CPUID_LOCAL_APIC_ID);
3028b287612SJohn Baldwin 			regs[1] |= (vcpu_id << CPUID_0000_0001_APICID_SHIFT);
303366f6083SPeter Grehan 
304366f6083SPeter Grehan 			/*
30531708084SNeel Natu 			 * Don't expose VMX, SpeedStep, TME or SMX capability.
3068b287612SJohn Baldwin 			 * Advertise x2APIC capability and Hypervisor guest.
307366f6083SPeter Grehan 			 */
3088b287612SJohn Baldwin 			regs[2] &= ~(CPUID2_VMX | CPUID2_EST | CPUID2_TM2);
30931708084SNeel Natu 			regs[2] &= ~(CPUID2_SMX);
310a2da7af6SNeel Natu 
311a2da7af6SNeel Natu 			regs[2] |= CPUID2_HV;
312a2da7af6SNeel Natu 
313a2da7af6SNeel Natu 			if (x2apic_state != X2APIC_DISABLED)
314a2da7af6SNeel Natu 				regs[2] |= CPUID2_X2APIC;
31552e5c8a2SNeel Natu 			else
31652e5c8a2SNeel Natu 				regs[2] &= ~CPUID2_X2APIC;
317366f6083SPeter Grehan 
318366f6083SPeter Grehan 			/*
319abb023fbSJohn Baldwin 			 * Only advertise CPUID2_XSAVE in the guest if
320abb023fbSJohn Baldwin 			 * the host is using XSAVE.
321298379f7SPeter Grehan 			 */
322abb023fbSJohn Baldwin 			if (!(regs[2] & CPUID2_OSXSAVE))
323abb023fbSJohn Baldwin 				regs[2] &= ~CPUID2_XSAVE;
324abb023fbSJohn Baldwin 
325abb023fbSJohn Baldwin 			/*
326abb023fbSJohn Baldwin 			 * If CPUID2_XSAVE is being advertised and the
327abb023fbSJohn Baldwin 			 * guest has set CR4_XSAVE, set
328abb023fbSJohn Baldwin 			 * CPUID2_OSXSAVE.
329abb023fbSJohn Baldwin 			 */
330abb023fbSJohn Baldwin 			regs[2] &= ~CPUID2_OSXSAVE;
331abb023fbSJohn Baldwin 			if (regs[2] & CPUID2_XSAVE) {
332abb023fbSJohn Baldwin 				error = vm_get_register(vm, vcpu_id,
333abb023fbSJohn Baldwin 				    VM_REG_GUEST_CR4, &cr4);
334abb023fbSJohn Baldwin 				if (error)
335abb023fbSJohn Baldwin 					panic("x86_emulate_cpuid: error %d "
336abb023fbSJohn Baldwin 					      "fetching %%cr4", error);
337abb023fbSJohn Baldwin 				if (cr4 & CR4_XSAVE)
338abb023fbSJohn Baldwin 					regs[2] |= CPUID2_OSXSAVE;
339abb023fbSJohn Baldwin 			}
340298379f7SPeter Grehan 
341298379f7SPeter Grehan 			/*
342ff6ec151SNeel Natu 			 * Hide monitor/mwait until we know how to deal with
343ff6ec151SNeel Natu 			 * these instructions.
344ff6ec151SNeel Natu 			 */
345ff6ec151SNeel Natu 			regs[2] &= ~CPUID2_MON;
346ff6ec151SNeel Natu 
347ff6ec151SNeel Natu                         /*
348560d5edaSPeter Grehan 			 * Hide the performance and debug features.
349560d5edaSPeter Grehan 			 */
350560d5edaSPeter Grehan 			regs[2] &= ~CPUID2_PDCM;
351560d5edaSPeter Grehan 
352517e21d3SPeter Grehan 			/*
353517e21d3SPeter Grehan 			 * No TSC deadline support in the APIC yet
354517e21d3SPeter Grehan 			 */
355517e21d3SPeter Grehan 			regs[2] &= ~CPUID2_TSCDLT;
356517e21d3SPeter Grehan 
357560d5edaSPeter Grehan 			/*
3581f3025e1SPeter Grehan 			 * Hide thermal monitoring
3591f3025e1SPeter Grehan 			 */
3601f3025e1SPeter Grehan 			regs[3] &= ~(CPUID_ACPI | CPUID_TM);
3611f3025e1SPeter Grehan 
3621f3025e1SPeter Grehan 			/*
363560d5edaSPeter Grehan 			 * Hide the debug store capability.
364560d5edaSPeter Grehan 			 */
365560d5edaSPeter Grehan 			regs[3] &= ~CPUID_DS;
366560d5edaSPeter Grehan 
3671d29bfc1SNeel Natu 			/*
3681d29bfc1SNeel Natu 			 * Advertise the Machine Check and MTRR capability.
3691d29bfc1SNeel Natu 			 *
3701d29bfc1SNeel Natu 			 * Some guest OSes (e.g. Windows) will not boot if
3711d29bfc1SNeel Natu 			 * these features are absent.
3721d29bfc1SNeel Natu 			 */
3731d29bfc1SNeel Natu 			regs[3] |= (CPUID_MCA | CPUID_MCE | CPUID_MTRR);
3741d29bfc1SNeel Natu 
37501d822d3SRodney W. Grimes 			vm_get_topology(vm, &sockets, &cores, &threads,
37601d822d3SRodney W. Grimes 			    &maxcpus);
37701d822d3SRodney W. Grimes 			logical_cpus = threads * cores;
3788b287612SJohn Baldwin 			regs[1] &= ~CPUID_HTT_CORES;
3798bd3845dSNeel Natu 			regs[1] |= (logical_cpus & 0xff) << 16;
3808bd3845dSNeel Natu 			regs[3] |= CPUID_HTT;
3818b287612SJohn Baldwin 			break;
3828b287612SJohn Baldwin 
3838b287612SJohn Baldwin 		case CPUID_0000_0004:
384534dc967SNeel Natu 			cpuid_count(*eax, *ecx, regs);
3858b287612SJohn Baldwin 
3868bd3845dSNeel Natu 			if (regs[0] || regs[1] || regs[2] || regs[3]) {
38701d822d3SRodney W. Grimes 				vm_get_topology(vm, &sockets, &cores, &threads,
38801d822d3SRodney W. Grimes 				    &maxcpus);
389534dc967SNeel Natu 				regs[0] &= 0x3ff;
39001d822d3SRodney W. Grimes 				regs[0] |= (cores - 1) << 26;
3918b287612SJohn Baldwin 				/*
3928bd3845dSNeel Natu 				 * Cache topology:
3938bd3845dSNeel Natu 				 * - L1 and L2 are shared only by the logical
3948bd3845dSNeel Natu 				 *   processors in a single core.
3958bd3845dSNeel Natu 				 * - L3 and above are shared by all logical
3968bd3845dSNeel Natu 				 *   processors in the package.
3978b287612SJohn Baldwin 				 */
39801d822d3SRodney W. Grimes 				logical_cpus = threads;
3998bd3845dSNeel Natu 				level = (regs[0] >> 5) & 0x7;
4008bd3845dSNeel Natu 				if (level >= 3)
40101d822d3SRodney W. Grimes 					logical_cpus *= cores;
4028bd3845dSNeel Natu 				regs[0] |= (logical_cpus - 1) << 14;
4038bd3845dSNeel Natu 			}
404366f6083SPeter Grehan 			break;
405366f6083SPeter Grehan 
406a0cad470SPeter Grehan 		case CPUID_0000_0007:
40749cc03daSNeel Natu 			regs[0] = 0;
40849cc03daSNeel Natu 			regs[1] = 0;
40949cc03daSNeel Natu 			regs[2] = 0;
41049cc03daSNeel Natu 			regs[3] = 0;
41149cc03daSNeel Natu 
41249cc03daSNeel Natu 			/* leaf 0 */
41349cc03daSNeel Natu 			if (*ecx == 0) {
41444a68c4eSJohn Baldwin 				cpuid_count(*eax, *ecx, regs);
41544a68c4eSJohn Baldwin 
41644a68c4eSJohn Baldwin 				/* Only leaf 0 is supported */
41744a68c4eSJohn Baldwin 				regs[0] = 0;
41844a68c4eSJohn Baldwin 
41944a68c4eSJohn Baldwin 				/*
42044a68c4eSJohn Baldwin 				 * Expose known-safe features.
42144a68c4eSJohn Baldwin 				 */
42244a68c4eSJohn Baldwin 				regs[1] &= (CPUID_STDEXT_FSGSBASE |
42344a68c4eSJohn Baldwin 				    CPUID_STDEXT_BMI1 | CPUID_STDEXT_HLE |
42444a68c4eSJohn Baldwin 				    CPUID_STDEXT_AVX2 | CPUID_STDEXT_BMI2 |
42544a68c4eSJohn Baldwin 				    CPUID_STDEXT_ERMS | CPUID_STDEXT_RTM |
42644a68c4eSJohn Baldwin 				    CPUID_STDEXT_AVX512F |
42744a68c4eSJohn Baldwin 				    CPUID_STDEXT_AVX512PF |
42844a68c4eSJohn Baldwin 				    CPUID_STDEXT_AVX512ER |
429*15b7da10SConrad Meyer 				    CPUID_STDEXT_AVX512CD | CPUID_STDEXT_SHA);
43044a68c4eSJohn Baldwin 				regs[2] = 0;
43144a68c4eSJohn Baldwin 				regs[3] = 0;
43244a68c4eSJohn Baldwin 
43344a68c4eSJohn Baldwin 				/* Advertise INVPCID if it is enabled. */
43449cc03daSNeel Natu 				error = vm_get_capability(vm, vcpu_id,
43549cc03daSNeel Natu 				    VM_CAP_ENABLE_INVPCID, &enable_invpcid);
43649cc03daSNeel Natu 				if (error == 0 && enable_invpcid)
43749cc03daSNeel Natu 					regs[1] |= CPUID_STDEXT_INVPCID;
43849cc03daSNeel Natu 			}
43949cc03daSNeel Natu 			break;
44049cc03daSNeel Natu 
44149cc03daSNeel Natu 		case CPUID_0000_0006:
442c077e628SAlexander Motin 			regs[0] = CPUTPM1_ARAT;
443c077e628SAlexander Motin 			regs[1] = 0;
444c077e628SAlexander Motin 			regs[2] = 0;
445c077e628SAlexander Motin 			regs[3] = 0;
446c077e628SAlexander Motin 			break;
447c077e628SAlexander Motin 
448560d5edaSPeter Grehan 		case CPUID_0000_000A:
4491f3025e1SPeter Grehan 			/*
4501f3025e1SPeter Grehan 			 * Handle the access, but report 0 for
4511f3025e1SPeter Grehan 			 * all options
4521f3025e1SPeter Grehan 			 */
4531f3025e1SPeter Grehan 			regs[0] = 0;
4541f3025e1SPeter Grehan 			regs[1] = 0;
4551f3025e1SPeter Grehan 			regs[2] = 0;
4561f3025e1SPeter Grehan 			regs[3] = 0;
4571f3025e1SPeter Grehan 			break;
4581f3025e1SPeter Grehan 
459366f6083SPeter Grehan 		case CPUID_0000_000B:
460366f6083SPeter Grehan 			/*
461*15b7da10SConrad Meyer 			 * Intel processor topology enumeration
462366f6083SPeter Grehan 			 */
463*15b7da10SConrad Meyer 			if (vmm_is_intel()) {
46401d822d3SRodney W. Grimes 				vm_get_topology(vm, &sockets, &cores, &threads,
46501d822d3SRodney W. Grimes 				    &maxcpus);
4668bd3845dSNeel Natu 				if (*ecx == 0) {
46701d822d3SRodney W. Grimes 					logical_cpus = threads;
4688bd3845dSNeel Natu 					width = log2(logical_cpus);
4698bd3845dSNeel Natu 					level = CPUID_TYPE_SMT;
4708bd3845dSNeel Natu 					x2apic_id = vcpu_id;
4718bd3845dSNeel Natu 				}
4728bd3845dSNeel Natu 
4738bd3845dSNeel Natu 				if (*ecx == 1) {
47401d822d3SRodney W. Grimes 					logical_cpus = threads * cores;
4758bd3845dSNeel Natu 					width = log2(logical_cpus);
4768bd3845dSNeel Natu 					level = CPUID_TYPE_CORE;
4778bd3845dSNeel Natu 					x2apic_id = vcpu_id;
4788bd3845dSNeel Natu 				}
4798bd3845dSNeel Natu 
4808bd3845dSNeel Natu 				if (!cpuid_leaf_b || *ecx >= 2) {
4818bd3845dSNeel Natu 					width = 0;
4828bd3845dSNeel Natu 					logical_cpus = 0;
4838bd3845dSNeel Natu 					level = 0;
4848bd3845dSNeel Natu 					x2apic_id = 0;
4858bd3845dSNeel Natu 				}
4868bd3845dSNeel Natu 
4878bd3845dSNeel Natu 				regs[0] = width & 0x1f;
4888bd3845dSNeel Natu 				regs[1] = logical_cpus & 0xffff;
4898bd3845dSNeel Natu 				regs[2] = (level << 8) | (*ecx & 0xff);
4908bd3845dSNeel Natu 				regs[3] = x2apic_id;
491*15b7da10SConrad Meyer 			} else {
492*15b7da10SConrad Meyer 				regs[0] = 0;
493*15b7da10SConrad Meyer 				regs[1] = 0;
494*15b7da10SConrad Meyer 				regs[2] = 0;
495*15b7da10SConrad Meyer 				regs[3] = 0;
496*15b7da10SConrad Meyer 			}
497366f6083SPeter Grehan 			break;
498366f6083SPeter Grehan 
499abb023fbSJohn Baldwin 		case CPUID_0000_000D:
500abb023fbSJohn Baldwin 			limits = vmm_get_xsave_limits();
501abb023fbSJohn Baldwin 			if (!limits->xsave_enabled) {
502abb023fbSJohn Baldwin 				regs[0] = 0;
503abb023fbSJohn Baldwin 				regs[1] = 0;
504abb023fbSJohn Baldwin 				regs[2] = 0;
505abb023fbSJohn Baldwin 				regs[3] = 0;
506abb023fbSJohn Baldwin 				break;
507abb023fbSJohn Baldwin 			}
508abb023fbSJohn Baldwin 
509abb023fbSJohn Baldwin 			cpuid_count(*eax, *ecx, regs);
510abb023fbSJohn Baldwin 			switch (*ecx) {
511abb023fbSJohn Baldwin 			case 0:
512abb023fbSJohn Baldwin 				/*
513abb023fbSJohn Baldwin 				 * Only permit the guest to use bits
514abb023fbSJohn Baldwin 				 * that are active in the host in
515abb023fbSJohn Baldwin 				 * %xcr0.  Also, claim that the
516abb023fbSJohn Baldwin 				 * maximum save area size is
517abb023fbSJohn Baldwin 				 * equivalent to the host's current
518abb023fbSJohn Baldwin 				 * save area size.  Since this runs
519abb023fbSJohn Baldwin 				 * "inside" of vmrun(), it runs with
520abb023fbSJohn Baldwin 				 * the guest's xcr0, so the current
521abb023fbSJohn Baldwin 				 * save area size is correct as-is.
522abb023fbSJohn Baldwin 				 */
523abb023fbSJohn Baldwin 				regs[0] &= limits->xcr0_allowed;
524abb023fbSJohn Baldwin 				regs[2] = limits->xsave_max_size;
525abb023fbSJohn Baldwin 				regs[3] &= (limits->xcr0_allowed >> 32);
526abb023fbSJohn Baldwin 				break;
527abb023fbSJohn Baldwin 			case 1:
528abb023fbSJohn Baldwin 				/* Only permit XSAVEOPT. */
529abb023fbSJohn Baldwin 				regs[0] &= CPUID_EXTSTATE_XSAVEOPT;
530abb023fbSJohn Baldwin 				regs[1] = 0;
531abb023fbSJohn Baldwin 				regs[2] = 0;
532abb023fbSJohn Baldwin 				regs[3] = 0;
533abb023fbSJohn Baldwin 				break;
534abb023fbSJohn Baldwin 			default:
535abb023fbSJohn Baldwin 				/*
536abb023fbSJohn Baldwin 				 * If the leaf is for a permitted feature,
537abb023fbSJohn Baldwin 				 * pass through as-is, otherwise return
538abb023fbSJohn Baldwin 				 * all zeroes.
539abb023fbSJohn Baldwin 				 */
540abb023fbSJohn Baldwin 				if (!(limits->xcr0_allowed & (1ul << *ecx))) {
541abb023fbSJohn Baldwin 					regs[0] = 0;
542abb023fbSJohn Baldwin 					regs[1] = 0;
543abb023fbSJohn Baldwin 					regs[2] = 0;
544abb023fbSJohn Baldwin 					regs[3] = 0;
545abb023fbSJohn Baldwin 				}
546abb023fbSJohn Baldwin 				break;
547abb023fbSJohn Baldwin 			}
548abb023fbSJohn Baldwin 			break;
549abb023fbSJohn Baldwin 
5508b287612SJohn Baldwin 		case 0x40000000:
5518b287612SJohn Baldwin 			regs[0] = CPUID_VM_HIGH;
5528b287612SJohn Baldwin 			bcopy(bhyve_id, &regs[1], 4);
553560d5edaSPeter Grehan 			bcopy(bhyve_id + 4, &regs[2], 4);
554560d5edaSPeter Grehan 			bcopy(bhyve_id + 8, &regs[3], 4);
5558b287612SJohn Baldwin 			break;
556560d5edaSPeter Grehan 
557366f6083SPeter Grehan 		default:
558*15b7da10SConrad Meyer default_leaf:
559560d5edaSPeter Grehan 			/*
560560d5edaSPeter Grehan 			 * The leaf value has already been clamped so
561560d5edaSPeter Grehan 			 * simply pass this through, keeping count of
562560d5edaSPeter Grehan 			 * how many unhandled leaf values have been seen.
563560d5edaSPeter Grehan 			 */
564560d5edaSPeter Grehan 			atomic_add_long(&bhyve_xcpuids, 1);
565560d5edaSPeter Grehan 			cpuid_count(*eax, *ecx, regs);
566560d5edaSPeter Grehan 			break;
567366f6083SPeter Grehan 	}
568366f6083SPeter Grehan 
569366f6083SPeter Grehan 	*eax = regs[0];
570366f6083SPeter Grehan 	*ebx = regs[1];
571366f6083SPeter Grehan 	*ecx = regs[2];
572366f6083SPeter Grehan 	*edx = regs[3];
573560d5edaSPeter Grehan 
574366f6083SPeter Grehan 	return (1);
575366f6083SPeter Grehan }
576ea91ca92SNeel Natu 
577ea91ca92SNeel Natu bool
578ea91ca92SNeel Natu vm_cpuid_capability(struct vm *vm, int vcpuid, enum vm_cpuid_capability cap)
579ea91ca92SNeel Natu {
580ea91ca92SNeel Natu 	bool rv;
581ea91ca92SNeel Natu 
582ea91ca92SNeel Natu 	KASSERT(cap > 0 && cap < VCC_LAST, ("%s: invalid vm_cpu_capability %d",
583ea91ca92SNeel Natu 	    __func__, cap));
584ea91ca92SNeel Natu 
585ea91ca92SNeel Natu 	/*
586ea91ca92SNeel Natu 	 * Simply passthrough the capabilities of the host cpu for now.
587ea91ca92SNeel Natu 	 */
588ea91ca92SNeel Natu 	rv = false;
589ea91ca92SNeel Natu 	switch (cap) {
590ea91ca92SNeel Natu 	case VCC_NO_EXECUTE:
591ea91ca92SNeel Natu 		if (amd_feature & AMDID_NX)
592ea91ca92SNeel Natu 			rv = true;
593ea91ca92SNeel Natu 		break;
594ea91ca92SNeel Natu 	case VCC_FFXSR:
595ea91ca92SNeel Natu 		if (amd_feature & AMDID_FFXSR)
596ea91ca92SNeel Natu 			rv = true;
597ea91ca92SNeel Natu 		break;
598ea91ca92SNeel Natu 	case VCC_TCE:
599ea91ca92SNeel Natu 		if (amd_feature2 & AMDID2_TCE)
600ea91ca92SNeel Natu 			rv = true;
601ea91ca92SNeel Natu 		break;
602ea91ca92SNeel Natu 	default:
603ea91ca92SNeel Natu 		panic("%s: unknown vm_cpu_capability %d", __func__, cap);
604ea91ca92SNeel Natu 	}
605ea91ca92SNeel Natu 	return (rv);
606ea91ca92SNeel Natu }
607