xref: /freebsd/sys/amd64/vmm/x86.c (revision a3f2a9c57eb78f68bc6bba7b0f8f0f35bea3c93b)
1366f6083SPeter Grehan /*-
2c49761ddSPedro F. Giffuni  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3c49761ddSPedro F. Giffuni  *
4366f6083SPeter Grehan  * Copyright (c) 2011 NetApp, Inc.
5366f6083SPeter Grehan  * All rights reserved.
6366f6083SPeter Grehan  *
7366f6083SPeter Grehan  * Redistribution and use in source and binary forms, with or without
8366f6083SPeter Grehan  * modification, are permitted provided that the following conditions
9366f6083SPeter Grehan  * are met:
10366f6083SPeter Grehan  * 1. Redistributions of source code must retain the above copyright
11366f6083SPeter Grehan  *    notice, this list of conditions and the following disclaimer.
12366f6083SPeter Grehan  * 2. Redistributions in binary form must reproduce the above copyright
13366f6083SPeter Grehan  *    notice, this list of conditions and the following disclaimer in the
14366f6083SPeter Grehan  *    documentation and/or other materials provided with the distribution.
15366f6083SPeter Grehan  *
16366f6083SPeter Grehan  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17366f6083SPeter Grehan  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18366f6083SPeter Grehan  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19366f6083SPeter Grehan  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20366f6083SPeter Grehan  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21366f6083SPeter Grehan  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22366f6083SPeter Grehan  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23366f6083SPeter Grehan  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24366f6083SPeter Grehan  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25366f6083SPeter Grehan  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26366f6083SPeter Grehan  * SUCH DAMAGE.
27366f6083SPeter Grehan  *
28366f6083SPeter Grehan  * $FreeBSD$
29366f6083SPeter Grehan  */
30366f6083SPeter Grehan 
31366f6083SPeter Grehan #include <sys/cdefs.h>
32366f6083SPeter Grehan __FBSDID("$FreeBSD$");
33366f6083SPeter Grehan 
34a2da7af6SNeel Natu #include <sys/param.h>
35abb023fbSJohn Baldwin #include <sys/pcpu.h>
368b287612SJohn Baldwin #include <sys/systm.h>
378bd3845dSNeel Natu #include <sys/sysctl.h>
38366f6083SPeter Grehan 
391472b87fSNeel Natu #include <machine/clock.h>
40366f6083SPeter Grehan #include <machine/cpufunc.h>
418b287612SJohn Baldwin #include <machine/md_var.h>
42abb023fbSJohn Baldwin #include <machine/segments.h>
43366f6083SPeter Grehan #include <machine/specialreg.h>
44366f6083SPeter Grehan 
45a2da7af6SNeel Natu #include <machine/vmm.h>
46a2da7af6SNeel Natu 
47abb023fbSJohn Baldwin #include "vmm_host.h"
485a1f0b36SNeel Natu #include "vmm_ktr.h"
495a1f0b36SNeel Natu #include "vmm_util.h"
50366f6083SPeter Grehan #include "x86.h"
51366f6083SPeter Grehan 
528bd3845dSNeel Natu SYSCTL_DECL(_hw_vmm);
53b40598c5SPawel Biernacki static SYSCTL_NODE(_hw_vmm, OID_AUTO, topology, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
54b40598c5SPawel Biernacki     NULL);
558bd3845dSNeel Natu 
568b287612SJohn Baldwin #define	CPUID_VM_HIGH		0x40000000
578b287612SJohn Baldwin 
58560d5edaSPeter Grehan static const char bhyve_id[12] = "bhyve bhyve ";
59560d5edaSPeter Grehan 
60560d5edaSPeter Grehan static uint64_t bhyve_xcpuids;
615a1f0b36SNeel Natu SYSCTL_ULONG(_hw_vmm, OID_AUTO, bhyve_xcpuids, CTLFLAG_RW, &bhyve_xcpuids, 0,
625a1f0b36SNeel Natu     "Number of times an unknown cpuid leaf was accessed");
638b287612SJohn Baldwin 
6401d822d3SRodney W. Grimes #if __FreeBSD_version < 1200060	/* Remove after 11 EOL helps MFCing */
6501d822d3SRodney W. Grimes extern u_int threads_per_core;
668bd3845dSNeel Natu SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, threads_per_core, CTLFLAG_RDTUN,
678bd3845dSNeel Natu     &threads_per_core, 0, NULL);
688bd3845dSNeel Natu 
6901d822d3SRodney W. Grimes extern u_int cores_per_package;
708bd3845dSNeel Natu SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, cores_per_package, CTLFLAG_RDTUN,
718bd3845dSNeel Natu     &cores_per_package, 0, NULL);
7201d822d3SRodney W. Grimes #endif
738bd3845dSNeel Natu 
748bd3845dSNeel Natu static int cpuid_leaf_b = 1;
758bd3845dSNeel Natu SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN,
768bd3845dSNeel Natu     &cpuid_leaf_b, 0, NULL);
778bd3845dSNeel Natu 
788bd3845dSNeel Natu /*
798bd3845dSNeel Natu  * Round up to the next power of two, if necessary, and then take log2.
808bd3845dSNeel Natu  * Returns -1 if argument is zero.
818bd3845dSNeel Natu  */
828bd3845dSNeel Natu static __inline int
838bd3845dSNeel Natu log2(u_int x)
848bd3845dSNeel Natu {
858bd3845dSNeel Natu 
868bd3845dSNeel Natu 	return (fls(x << (1 - powerof2(x))) - 1);
878bd3845dSNeel Natu }
888bd3845dSNeel Natu 
89366f6083SPeter Grehan int
90*a3f2a9c5SJohn Baldwin x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx,
91*a3f2a9c5SJohn Baldwin     uint64_t *rcx, uint64_t *rdx)
92366f6083SPeter Grehan {
93abb023fbSJohn Baldwin 	const struct xsave_limits *limits;
94abb023fbSJohn Baldwin 	uint64_t cr4;
95f5f5f1e7SPeter Grehan 	int error, enable_invpcid, enable_rdpid, enable_rdtscp, level,
96f5f5f1e7SPeter Grehan 	    width, x2apic_id;
97*a3f2a9c5SJohn Baldwin 	unsigned int func, regs[4], logical_cpus, param;
98a2da7af6SNeel Natu 	enum x2apic_state x2apic_state;
9901d822d3SRodney W. Grimes 	uint16_t cores, maxcpus, sockets, threads;
100366f6083SPeter Grehan 
101*a3f2a9c5SJohn Baldwin 	/*
102*a3f2a9c5SJohn Baldwin 	 * The function of CPUID is controlled through the provided value of
103*a3f2a9c5SJohn Baldwin 	 * %eax (and secondarily %ecx, for certain leaf data).
104*a3f2a9c5SJohn Baldwin 	 */
105*a3f2a9c5SJohn Baldwin 	func = (uint32_t)*rax;
106*a3f2a9c5SJohn Baldwin 	param = (uint32_t)*rcx;
107*a3f2a9c5SJohn Baldwin 
108*a3f2a9c5SJohn Baldwin 	VCPU_CTR2(vm, vcpu_id, "cpuid %#x,%#x", func, param);
1095a1f0b36SNeel Natu 
1108b287612SJohn Baldwin 	/*
1118b287612SJohn Baldwin 	 * Requests for invalid CPUID levels should map to the highest
1128b287612SJohn Baldwin 	 * available level instead.
1138b287612SJohn Baldwin 	 */
114*a3f2a9c5SJohn Baldwin 	if (cpu_exthigh != 0 && func >= 0x80000000) {
115*a3f2a9c5SJohn Baldwin 		if (func > cpu_exthigh)
116*a3f2a9c5SJohn Baldwin 			func = cpu_exthigh;
117*a3f2a9c5SJohn Baldwin 	} else if (func >= 0x40000000) {
118*a3f2a9c5SJohn Baldwin 		if (func > CPUID_VM_HIGH)
119*a3f2a9c5SJohn Baldwin 			func = CPUID_VM_HIGH;
120*a3f2a9c5SJohn Baldwin 	} else if (func > cpu_high) {
121*a3f2a9c5SJohn Baldwin 		func = cpu_high;
1228b287612SJohn Baldwin 	}
123366f6083SPeter Grehan 
1248b287612SJohn Baldwin 	/*
1258b287612SJohn Baldwin 	 * In general the approach used for CPU topology is to
1268b287612SJohn Baldwin 	 * advertise a flat topology where all CPUs are packages with
1278b287612SJohn Baldwin 	 * no multi-core or SMT.
1288b287612SJohn Baldwin 	 */
129366f6083SPeter Grehan 	switch (func) {
130560d5edaSPeter Grehan 		/*
131560d5edaSPeter Grehan 		 * Pass these through to the guest
132560d5edaSPeter Grehan 		 */
133366f6083SPeter Grehan 		case CPUID_0000_0000:
134366f6083SPeter Grehan 		case CPUID_0000_0002:
135366f6083SPeter Grehan 		case CPUID_0000_0003:
136366f6083SPeter Grehan 		case CPUID_8000_0000:
137366f6083SPeter Grehan 		case CPUID_8000_0002:
138366f6083SPeter Grehan 		case CPUID_8000_0003:
139366f6083SPeter Grehan 		case CPUID_8000_0004:
140366f6083SPeter Grehan 		case CPUID_8000_0006:
141*a3f2a9c5SJohn Baldwin 			cpuid_count(func, param, regs);
1425a1f0b36SNeel Natu 			break;
143366f6083SPeter Grehan 		case CPUID_8000_0008:
144*a3f2a9c5SJohn Baldwin 			cpuid_count(func, param, regs);
145caab5042SKonstantin Belousov 			if (vmm_is_svm()) {
146d0c7cde5SConrad Meyer 				/*
147d0c7cde5SConrad Meyer 				 * As on Intel (0000_0007:0, EDX), mask out
148d0c7cde5SConrad Meyer 				 * unsupported or unsafe AMD extended features
149d0c7cde5SConrad Meyer 				 * (8000_0008 EBX).
150d0c7cde5SConrad Meyer 				 */
151d0c7cde5SConrad Meyer 				regs[1] &= (AMDFEID_CLZERO | AMDFEID_IRPERF |
152d0c7cde5SConrad Meyer 				    AMDFEID_XSAVEERPTR);
153d0c7cde5SConrad Meyer 
15401d822d3SRodney W. Grimes 				vm_get_topology(vm, &sockets, &cores, &threads,
15501d822d3SRodney W. Grimes 				    &maxcpus);
15615b7da10SConrad Meyer 				/*
15715b7da10SConrad Meyer 				 * Here, width is ApicIdCoreIdSize, present on
15815b7da10SConrad Meyer 				 * at least Family 15h and newer.  It
15915b7da10SConrad Meyer 				 * represents the "number of bits in the
16015b7da10SConrad Meyer 				 * initial apicid that indicate thread id
16115b7da10SConrad Meyer 				 * within a package."
16215b7da10SConrad Meyer 				 *
16315b7da10SConrad Meyer 				 * Our topo_probe_amd() uses it for
16415b7da10SConrad Meyer 				 * pkg_id_shift and other OSes may rely on it.
16515b7da10SConrad Meyer 				 */
16615b7da10SConrad Meyer 				width = MIN(0xF, log2(threads * cores));
16715b7da10SConrad Meyer 				if (width < 0x4)
16815b7da10SConrad Meyer 					width = 0;
16915b7da10SConrad Meyer 				logical_cpus = MIN(0xFF, threads * cores - 1);
17015b7da10SConrad Meyer 				regs[2] = (width << AMDID_COREID_SIZE_SHIFT) | logical_cpus;
1715a1f0b36SNeel Natu 			}
172366f6083SPeter Grehan 			break;
173366f6083SPeter Grehan 
174560d5edaSPeter Grehan 		case CPUID_8000_0001:
175*a3f2a9c5SJohn Baldwin 			cpuid_count(func, param, regs);
17606053618SNeel Natu 
17706053618SNeel Natu 			/*
17815b7da10SConrad Meyer 			 * Hide SVM from guest.
17906053618SNeel Natu 			 */
18015b7da10SConrad Meyer 			regs[2] &= ~AMDID2_SVM;
18106053618SNeel Natu 
182560d5edaSPeter Grehan 			/*
18302904c45SNeel Natu 			 * Don't advertise extended performance counter MSRs
18402904c45SNeel Natu 			 * to the guest.
18502904c45SNeel Natu 			 */
18602904c45SNeel Natu 			regs[2] &= ~AMDID2_PCXC;
18702904c45SNeel Natu 			regs[2] &= ~AMDID2_PNXC;
18802904c45SNeel Natu 			regs[2] &= ~AMDID2_PTSCEL2I;
18902904c45SNeel Natu 
19002904c45SNeel Natu 			/*
1912688a818SNeel Natu 			 * Don't advertise Instruction Based Sampling feature.
1922688a818SNeel Natu 			 */
1932688a818SNeel Natu 			regs[2] &= ~AMDID2_IBS;
1942688a818SNeel Natu 
19565d5111aSNeel Natu 			/* NodeID MSR not available */
19665d5111aSNeel Natu 			regs[2] &= ~AMDID2_NODE_ID;
19765d5111aSNeel Natu 
198592cd7d3SNeel Natu 			/* Don't advertise the OS visible workaround feature */
199592cd7d3SNeel Natu 			regs[2] &= ~AMDID2_OSVW;
200592cd7d3SNeel Natu 
2013da44302SPeter Grehan 			/* Hide mwaitx/monitorx capability from the guest */
2023da44302SPeter Grehan 			regs[2] &= ~AMDID2_MWAITX;
2033da44302SPeter Grehan 
204f5f5f1e7SPeter Grehan 			/* Advertise RDTSCP if it is enabled. */
205f5f5f1e7SPeter Grehan 			error = vm_get_capability(vm, vcpu_id,
206f5f5f1e7SPeter Grehan 			    VM_CAP_RDTSCP, &enable_rdtscp);
207f5f5f1e7SPeter Grehan 			if (error == 0 && enable_rdtscp)
208f5f5f1e7SPeter Grehan 				regs[3] |= AMDID_RDTSCP;
209f5f5f1e7SPeter Grehan 			else
210560d5edaSPeter Grehan 				regs[3] &= ~AMDID_RDTSCP;
211560d5edaSPeter Grehan 			break;
212560d5edaSPeter Grehan 
2131472b87fSNeel Natu 		case CPUID_8000_0007:
2141472b87fSNeel Natu 			/*
215592cd7d3SNeel Natu 			 * AMD uses this leaf to advertise the processor's
216592cd7d3SNeel Natu 			 * power monitoring and RAS capabilities. These
217592cd7d3SNeel Natu 			 * features are hardware-specific and exposing
218592cd7d3SNeel Natu 			 * them to a guest doesn't make a lot of sense.
219592cd7d3SNeel Natu 			 *
220592cd7d3SNeel Natu 			 * Intel uses this leaf only to advertise the
221592cd7d3SNeel Natu 			 * "Invariant TSC" feature with all other bits
222592cd7d3SNeel Natu 			 * being reserved (set to zero).
223592cd7d3SNeel Natu 			 */
224592cd7d3SNeel Natu 			regs[0] = 0;
225592cd7d3SNeel Natu 			regs[1] = 0;
226592cd7d3SNeel Natu 			regs[2] = 0;
227592cd7d3SNeel Natu 			regs[3] = 0;
228592cd7d3SNeel Natu 
229592cd7d3SNeel Natu 			/*
230592cd7d3SNeel Natu 			 * "Invariant TSC" can be advertised to the guest if:
231592cd7d3SNeel Natu 			 * - host TSC frequency is invariant
232592cd7d3SNeel Natu 			 * - host TSCs are synchronized across physical cpus
2331472b87fSNeel Natu 			 *
2341472b87fSNeel Natu 			 * XXX This still falls short because the vcpu
2351472b87fSNeel Natu 			 * can observe the TSC moving backwards as it
2361472b87fSNeel Natu 			 * migrates across physical cpus. But at least
2371472b87fSNeel Natu 			 * it should discourage the guest from using the
2381472b87fSNeel Natu 			 * TSC to keep track of time.
2391472b87fSNeel Natu 			 */
240592cd7d3SNeel Natu 			if (tsc_is_invariant && smp_tsc)
241592cd7d3SNeel Natu 				regs[3] |= AMDPM_TSC_INVARIANT;
2421472b87fSNeel Natu 			break;
2431472b87fSNeel Natu 
24415b7da10SConrad Meyer 		case CPUID_8000_001D:
24515b7da10SConrad Meyer 			/* AMD Cache topology, like 0000_0004 for Intel. */
246caab5042SKonstantin Belousov 			if (!vmm_is_svm())
24715b7da10SConrad Meyer 				goto default_leaf;
24815b7da10SConrad Meyer 
24915b7da10SConrad Meyer 			/*
25015b7da10SConrad Meyer 			 * Similar to Intel, generate a ficticious cache
25115b7da10SConrad Meyer 			 * topology for the guest with L3 shared by the
25215b7da10SConrad Meyer 			 * package, and L1 and L2 local to a core.
25315b7da10SConrad Meyer 			 */
25415b7da10SConrad Meyer 			vm_get_topology(vm, &sockets, &cores, &threads,
25515b7da10SConrad Meyer 			    &maxcpus);
256*a3f2a9c5SJohn Baldwin 			switch (param) {
25715b7da10SConrad Meyer 			case 0:
25815b7da10SConrad Meyer 				logical_cpus = threads;
25915b7da10SConrad Meyer 				level = 1;
26015b7da10SConrad Meyer 				func = 1;	/* data cache */
26115b7da10SConrad Meyer 				break;
26215b7da10SConrad Meyer 			case 1:
26315b7da10SConrad Meyer 				logical_cpus = threads;
26415b7da10SConrad Meyer 				level = 2;
26515b7da10SConrad Meyer 				func = 3;	/* unified cache */
26615b7da10SConrad Meyer 				break;
26715b7da10SConrad Meyer 			case 2:
26815b7da10SConrad Meyer 				logical_cpus = threads * cores;
26915b7da10SConrad Meyer 				level = 3;
27015b7da10SConrad Meyer 				func = 3;	/* unified cache */
27115b7da10SConrad Meyer 				break;
27215b7da10SConrad Meyer 			default:
27315b7da10SConrad Meyer 				logical_cpus = 0;
27415b7da10SConrad Meyer 				level = 0;
27515b7da10SConrad Meyer 				func = 0;
27615b7da10SConrad Meyer 				break;
27715b7da10SConrad Meyer 			}
27815b7da10SConrad Meyer 
27915b7da10SConrad Meyer 			logical_cpus = MIN(0xfff, logical_cpus - 1);
28015b7da10SConrad Meyer 			regs[0] = (logical_cpus << 14) | (1 << 8) |
28115b7da10SConrad Meyer 			    (level << 5) | func;
28215b7da10SConrad Meyer 			regs[1] = (func > 0) ? (CACHE_LINE_SIZE - 1) : 0;
28315b7da10SConrad Meyer 			regs[2] = 0;
28415b7da10SConrad Meyer 			regs[3] = 0;
28515b7da10SConrad Meyer 			break;
28615b7da10SConrad Meyer 
28715b7da10SConrad Meyer 		case CPUID_8000_001E:
288caab5042SKonstantin Belousov 			/*
289caab5042SKonstantin Belousov 			 * AMD Family 16h+ and Hygon Family 18h additional
290caab5042SKonstantin Belousov 			 * identifiers.
291caab5042SKonstantin Belousov 			 */
292caab5042SKonstantin Belousov 			if (!vmm_is_svm() || CPUID_TO_FAMILY(cpu_id) < 0x16)
29315b7da10SConrad Meyer 				goto default_leaf;
29415b7da10SConrad Meyer 
29515b7da10SConrad Meyer 			vm_get_topology(vm, &sockets, &cores, &threads,
29615b7da10SConrad Meyer 			    &maxcpus);
29715b7da10SConrad Meyer 			regs[0] = vcpu_id;
29815b7da10SConrad Meyer 			threads = MIN(0xFF, threads - 1);
29915b7da10SConrad Meyer 			regs[1] = (threads << 8) |
30015b7da10SConrad Meyer 			    (vcpu_id >> log2(threads + 1));
30115b7da10SConrad Meyer 			/*
30215b7da10SConrad Meyer 			 * XXX Bhyve topology cannot yet represent >1 node per
30315b7da10SConrad Meyer 			 * processor.
30415b7da10SConrad Meyer 			 */
30515b7da10SConrad Meyer 			regs[2] = 0;
30615b7da10SConrad Meyer 			regs[3] = 0;
30715b7da10SConrad Meyer 			break;
30815b7da10SConrad Meyer 
309366f6083SPeter Grehan 		case CPUID_0000_0001:
3108b287612SJohn Baldwin 			do_cpuid(1, regs);
3118b287612SJohn Baldwin 
312a2da7af6SNeel Natu 			error = vm_get_x2apic_state(vm, vcpu_id, &x2apic_state);
313a2da7af6SNeel Natu 			if (error) {
314a2da7af6SNeel Natu 				panic("x86_emulate_cpuid: error %d "
315a2da7af6SNeel Natu 				      "fetching x2apic state", error);
316a2da7af6SNeel Natu 			}
317a2da7af6SNeel Natu 
318366f6083SPeter Grehan 			/*
319366f6083SPeter Grehan 			 * Override the APIC ID only in ebx
320366f6083SPeter Grehan 			 */
3218b287612SJohn Baldwin 			regs[1] &= ~(CPUID_LOCAL_APIC_ID);
3228b287612SJohn Baldwin 			regs[1] |= (vcpu_id << CPUID_0000_0001_APICID_SHIFT);
323366f6083SPeter Grehan 
324366f6083SPeter Grehan 			/*
32531708084SNeel Natu 			 * Don't expose VMX, SpeedStep, TME or SMX capability.
3268b287612SJohn Baldwin 			 * Advertise x2APIC capability and Hypervisor guest.
327366f6083SPeter Grehan 			 */
3288b287612SJohn Baldwin 			regs[2] &= ~(CPUID2_VMX | CPUID2_EST | CPUID2_TM2);
32931708084SNeel Natu 			regs[2] &= ~(CPUID2_SMX);
330a2da7af6SNeel Natu 
331a2da7af6SNeel Natu 			regs[2] |= CPUID2_HV;
332a2da7af6SNeel Natu 
333a2da7af6SNeel Natu 			if (x2apic_state != X2APIC_DISABLED)
334a2da7af6SNeel Natu 				regs[2] |= CPUID2_X2APIC;
33552e5c8a2SNeel Natu 			else
33652e5c8a2SNeel Natu 				regs[2] &= ~CPUID2_X2APIC;
337366f6083SPeter Grehan 
338366f6083SPeter Grehan 			/*
339abb023fbSJohn Baldwin 			 * Only advertise CPUID2_XSAVE in the guest if
340abb023fbSJohn Baldwin 			 * the host is using XSAVE.
341298379f7SPeter Grehan 			 */
342abb023fbSJohn Baldwin 			if (!(regs[2] & CPUID2_OSXSAVE))
343abb023fbSJohn Baldwin 				regs[2] &= ~CPUID2_XSAVE;
344abb023fbSJohn Baldwin 
345abb023fbSJohn Baldwin 			/*
346abb023fbSJohn Baldwin 			 * If CPUID2_XSAVE is being advertised and the
347abb023fbSJohn Baldwin 			 * guest has set CR4_XSAVE, set
348abb023fbSJohn Baldwin 			 * CPUID2_OSXSAVE.
349abb023fbSJohn Baldwin 			 */
350abb023fbSJohn Baldwin 			regs[2] &= ~CPUID2_OSXSAVE;
351abb023fbSJohn Baldwin 			if (regs[2] & CPUID2_XSAVE) {
352abb023fbSJohn Baldwin 				error = vm_get_register(vm, vcpu_id,
353abb023fbSJohn Baldwin 				    VM_REG_GUEST_CR4, &cr4);
354abb023fbSJohn Baldwin 				if (error)
355abb023fbSJohn Baldwin 					panic("x86_emulate_cpuid: error %d "
356abb023fbSJohn Baldwin 					      "fetching %%cr4", error);
357abb023fbSJohn Baldwin 				if (cr4 & CR4_XSAVE)
358abb023fbSJohn Baldwin 					regs[2] |= CPUID2_OSXSAVE;
359abb023fbSJohn Baldwin 			}
360298379f7SPeter Grehan 
361298379f7SPeter Grehan 			/*
362ff6ec151SNeel Natu 			 * Hide monitor/mwait until we know how to deal with
363ff6ec151SNeel Natu 			 * these instructions.
364ff6ec151SNeel Natu 			 */
365ff6ec151SNeel Natu 			regs[2] &= ~CPUID2_MON;
366ff6ec151SNeel Natu 
367ff6ec151SNeel Natu                         /*
368560d5edaSPeter Grehan 			 * Hide the performance and debug features.
369560d5edaSPeter Grehan 			 */
370560d5edaSPeter Grehan 			regs[2] &= ~CPUID2_PDCM;
371560d5edaSPeter Grehan 
372517e21d3SPeter Grehan 			/*
373517e21d3SPeter Grehan 			 * No TSC deadline support in the APIC yet
374517e21d3SPeter Grehan 			 */
375517e21d3SPeter Grehan 			regs[2] &= ~CPUID2_TSCDLT;
376517e21d3SPeter Grehan 
377560d5edaSPeter Grehan 			/*
3781f3025e1SPeter Grehan 			 * Hide thermal monitoring
3791f3025e1SPeter Grehan 			 */
3801f3025e1SPeter Grehan 			regs[3] &= ~(CPUID_ACPI | CPUID_TM);
3811f3025e1SPeter Grehan 
3821f3025e1SPeter Grehan 			/*
383560d5edaSPeter Grehan 			 * Hide the debug store capability.
384560d5edaSPeter Grehan 			 */
385560d5edaSPeter Grehan 			regs[3] &= ~CPUID_DS;
386560d5edaSPeter Grehan 
3871d29bfc1SNeel Natu 			/*
3881d29bfc1SNeel Natu 			 * Advertise the Machine Check and MTRR capability.
3891d29bfc1SNeel Natu 			 *
3901d29bfc1SNeel Natu 			 * Some guest OSes (e.g. Windows) will not boot if
3911d29bfc1SNeel Natu 			 * these features are absent.
3921d29bfc1SNeel Natu 			 */
3931d29bfc1SNeel Natu 			regs[3] |= (CPUID_MCA | CPUID_MCE | CPUID_MTRR);
3941d29bfc1SNeel Natu 
39501d822d3SRodney W. Grimes 			vm_get_topology(vm, &sockets, &cores, &threads,
39601d822d3SRodney W. Grimes 			    &maxcpus);
39701d822d3SRodney W. Grimes 			logical_cpus = threads * cores;
3988b287612SJohn Baldwin 			regs[1] &= ~CPUID_HTT_CORES;
3998bd3845dSNeel Natu 			regs[1] |= (logical_cpus & 0xff) << 16;
4008bd3845dSNeel Natu 			regs[3] |= CPUID_HTT;
4018b287612SJohn Baldwin 			break;
4028b287612SJohn Baldwin 
4038b287612SJohn Baldwin 		case CPUID_0000_0004:
404*a3f2a9c5SJohn Baldwin 			cpuid_count(func, param, regs);
4058b287612SJohn Baldwin 
4068bd3845dSNeel Natu 			if (regs[0] || regs[1] || regs[2] || regs[3]) {
40701d822d3SRodney W. Grimes 				vm_get_topology(vm, &sockets, &cores, &threads,
40801d822d3SRodney W. Grimes 				    &maxcpus);
409534dc967SNeel Natu 				regs[0] &= 0x3ff;
41001d822d3SRodney W. Grimes 				regs[0] |= (cores - 1) << 26;
4118b287612SJohn Baldwin 				/*
4128bd3845dSNeel Natu 				 * Cache topology:
4138bd3845dSNeel Natu 				 * - L1 and L2 are shared only by the logical
4148bd3845dSNeel Natu 				 *   processors in a single core.
4158bd3845dSNeel Natu 				 * - L3 and above are shared by all logical
4168bd3845dSNeel Natu 				 *   processors in the package.
4178b287612SJohn Baldwin 				 */
41801d822d3SRodney W. Grimes 				logical_cpus = threads;
4198bd3845dSNeel Natu 				level = (regs[0] >> 5) & 0x7;
4208bd3845dSNeel Natu 				if (level >= 3)
42101d822d3SRodney W. Grimes 					logical_cpus *= cores;
4228bd3845dSNeel Natu 				regs[0] |= (logical_cpus - 1) << 14;
4238bd3845dSNeel Natu 			}
424366f6083SPeter Grehan 			break;
425366f6083SPeter Grehan 
426a0cad470SPeter Grehan 		case CPUID_0000_0007:
42749cc03daSNeel Natu 			regs[0] = 0;
42849cc03daSNeel Natu 			regs[1] = 0;
42949cc03daSNeel Natu 			regs[2] = 0;
43049cc03daSNeel Natu 			regs[3] = 0;
43149cc03daSNeel Natu 
43249cc03daSNeel Natu 			/* leaf 0 */
433*a3f2a9c5SJohn Baldwin 			if (param == 0) {
434*a3f2a9c5SJohn Baldwin 				cpuid_count(func, param, regs);
43544a68c4eSJohn Baldwin 
43644a68c4eSJohn Baldwin 				/* Only leaf 0 is supported */
43744a68c4eSJohn Baldwin 				regs[0] = 0;
43844a68c4eSJohn Baldwin 
43944a68c4eSJohn Baldwin 				/*
44044a68c4eSJohn Baldwin 				 * Expose known-safe features.
44144a68c4eSJohn Baldwin 				 */
44244a68c4eSJohn Baldwin 				regs[1] &= (CPUID_STDEXT_FSGSBASE |
44344a68c4eSJohn Baldwin 				    CPUID_STDEXT_BMI1 | CPUID_STDEXT_HLE |
44444a68c4eSJohn Baldwin 				    CPUID_STDEXT_AVX2 | CPUID_STDEXT_BMI2 |
44544a68c4eSJohn Baldwin 				    CPUID_STDEXT_ERMS | CPUID_STDEXT_RTM |
44644a68c4eSJohn Baldwin 				    CPUID_STDEXT_AVX512F |
447fce2d624SConrad Meyer 				    CPUID_STDEXT_RDSEED |
44844a68c4eSJohn Baldwin 				    CPUID_STDEXT_AVX512PF |
44944a68c4eSJohn Baldwin 				    CPUID_STDEXT_AVX512ER |
45015b7da10SConrad Meyer 				    CPUID_STDEXT_AVX512CD | CPUID_STDEXT_SHA);
45144a68c4eSJohn Baldwin 				regs[2] = 0;
452e519cee3SJohn Baldwin 				regs[3] &= CPUID_STDEXT3_MD_CLEAR;
45344a68c4eSJohn Baldwin 
454f5f5f1e7SPeter Grehan 				/* Advertise RDPID if it is enabled. */
455f5f5f1e7SPeter Grehan 				error = vm_get_capability(vm, vcpu_id,
456f5f5f1e7SPeter Grehan 				    VM_CAP_RDPID, &enable_rdpid);
457f5f5f1e7SPeter Grehan 				if (error == 0 && enable_rdpid)
458f5f5f1e7SPeter Grehan 					regs[2] |= CPUID_STDEXT2_RDPID;
459f5f5f1e7SPeter Grehan 
46044a68c4eSJohn Baldwin 				/* Advertise INVPCID if it is enabled. */
46149cc03daSNeel Natu 				error = vm_get_capability(vm, vcpu_id,
46249cc03daSNeel Natu 				    VM_CAP_ENABLE_INVPCID, &enable_invpcid);
46349cc03daSNeel Natu 				if (error == 0 && enable_invpcid)
46449cc03daSNeel Natu 					regs[1] |= CPUID_STDEXT_INVPCID;
46549cc03daSNeel Natu 			}
46649cc03daSNeel Natu 			break;
46749cc03daSNeel Natu 
46849cc03daSNeel Natu 		case CPUID_0000_0006:
469c077e628SAlexander Motin 			regs[0] = CPUTPM1_ARAT;
470c077e628SAlexander Motin 			regs[1] = 0;
471c077e628SAlexander Motin 			regs[2] = 0;
472c077e628SAlexander Motin 			regs[3] = 0;
473c077e628SAlexander Motin 			break;
474c077e628SAlexander Motin 
475560d5edaSPeter Grehan 		case CPUID_0000_000A:
4761f3025e1SPeter Grehan 			/*
4771f3025e1SPeter Grehan 			 * Handle the access, but report 0 for
4781f3025e1SPeter Grehan 			 * all options
4791f3025e1SPeter Grehan 			 */
4801f3025e1SPeter Grehan 			regs[0] = 0;
4811f3025e1SPeter Grehan 			regs[1] = 0;
4821f3025e1SPeter Grehan 			regs[2] = 0;
4831f3025e1SPeter Grehan 			regs[3] = 0;
4841f3025e1SPeter Grehan 			break;
4851f3025e1SPeter Grehan 
486366f6083SPeter Grehan 		case CPUID_0000_000B:
487366f6083SPeter Grehan 			/*
48815b7da10SConrad Meyer 			 * Intel processor topology enumeration
489366f6083SPeter Grehan 			 */
49015b7da10SConrad Meyer 			if (vmm_is_intel()) {
49101d822d3SRodney W. Grimes 				vm_get_topology(vm, &sockets, &cores, &threads,
49201d822d3SRodney W. Grimes 				    &maxcpus);
493*a3f2a9c5SJohn Baldwin 				if (param == 0) {
49401d822d3SRodney W. Grimes 					logical_cpus = threads;
4958bd3845dSNeel Natu 					width = log2(logical_cpus);
4968bd3845dSNeel Natu 					level = CPUID_TYPE_SMT;
4978bd3845dSNeel Natu 					x2apic_id = vcpu_id;
4988bd3845dSNeel Natu 				}
4998bd3845dSNeel Natu 
500*a3f2a9c5SJohn Baldwin 				if (param == 1) {
50101d822d3SRodney W. Grimes 					logical_cpus = threads * cores;
5028bd3845dSNeel Natu 					width = log2(logical_cpus);
5038bd3845dSNeel Natu 					level = CPUID_TYPE_CORE;
5048bd3845dSNeel Natu 					x2apic_id = vcpu_id;
5058bd3845dSNeel Natu 				}
5068bd3845dSNeel Natu 
507*a3f2a9c5SJohn Baldwin 				if (!cpuid_leaf_b || param >= 2) {
5088bd3845dSNeel Natu 					width = 0;
5098bd3845dSNeel Natu 					logical_cpus = 0;
5108bd3845dSNeel Natu 					level = 0;
5118bd3845dSNeel Natu 					x2apic_id = 0;
5128bd3845dSNeel Natu 				}
5138bd3845dSNeel Natu 
5148bd3845dSNeel Natu 				regs[0] = width & 0x1f;
5158bd3845dSNeel Natu 				regs[1] = logical_cpus & 0xffff;
516*a3f2a9c5SJohn Baldwin 				regs[2] = (level << 8) | (param & 0xff);
5178bd3845dSNeel Natu 				regs[3] = x2apic_id;
51815b7da10SConrad Meyer 			} else {
51915b7da10SConrad Meyer 				regs[0] = 0;
52015b7da10SConrad Meyer 				regs[1] = 0;
52115b7da10SConrad Meyer 				regs[2] = 0;
52215b7da10SConrad Meyer 				regs[3] = 0;
52315b7da10SConrad Meyer 			}
524366f6083SPeter Grehan 			break;
525366f6083SPeter Grehan 
526abb023fbSJohn Baldwin 		case CPUID_0000_000D:
527abb023fbSJohn Baldwin 			limits = vmm_get_xsave_limits();
528abb023fbSJohn Baldwin 			if (!limits->xsave_enabled) {
529abb023fbSJohn Baldwin 				regs[0] = 0;
530abb023fbSJohn Baldwin 				regs[1] = 0;
531abb023fbSJohn Baldwin 				regs[2] = 0;
532abb023fbSJohn Baldwin 				regs[3] = 0;
533abb023fbSJohn Baldwin 				break;
534abb023fbSJohn Baldwin 			}
535abb023fbSJohn Baldwin 
536*a3f2a9c5SJohn Baldwin 			cpuid_count(func, param, regs);
537*a3f2a9c5SJohn Baldwin 			switch (param) {
538abb023fbSJohn Baldwin 			case 0:
539abb023fbSJohn Baldwin 				/*
540abb023fbSJohn Baldwin 				 * Only permit the guest to use bits
541abb023fbSJohn Baldwin 				 * that are active in the host in
542abb023fbSJohn Baldwin 				 * %xcr0.  Also, claim that the
543abb023fbSJohn Baldwin 				 * maximum save area size is
544abb023fbSJohn Baldwin 				 * equivalent to the host's current
545abb023fbSJohn Baldwin 				 * save area size.  Since this runs
546abb023fbSJohn Baldwin 				 * "inside" of vmrun(), it runs with
547abb023fbSJohn Baldwin 				 * the guest's xcr0, so the current
548abb023fbSJohn Baldwin 				 * save area size is correct as-is.
549abb023fbSJohn Baldwin 				 */
550abb023fbSJohn Baldwin 				regs[0] &= limits->xcr0_allowed;
551abb023fbSJohn Baldwin 				regs[2] = limits->xsave_max_size;
552abb023fbSJohn Baldwin 				regs[3] &= (limits->xcr0_allowed >> 32);
553abb023fbSJohn Baldwin 				break;
554abb023fbSJohn Baldwin 			case 1:
555abb023fbSJohn Baldwin 				/* Only permit XSAVEOPT. */
556abb023fbSJohn Baldwin 				regs[0] &= CPUID_EXTSTATE_XSAVEOPT;
557abb023fbSJohn Baldwin 				regs[1] = 0;
558abb023fbSJohn Baldwin 				regs[2] = 0;
559abb023fbSJohn Baldwin 				regs[3] = 0;
560abb023fbSJohn Baldwin 				break;
561abb023fbSJohn Baldwin 			default:
562abb023fbSJohn Baldwin 				/*
563abb023fbSJohn Baldwin 				 * If the leaf is for a permitted feature,
564abb023fbSJohn Baldwin 				 * pass through as-is, otherwise return
565abb023fbSJohn Baldwin 				 * all zeroes.
566abb023fbSJohn Baldwin 				 */
567*a3f2a9c5SJohn Baldwin 				if (!(limits->xcr0_allowed & (1ul << param))) {
568abb023fbSJohn Baldwin 					regs[0] = 0;
569abb023fbSJohn Baldwin 					regs[1] = 0;
570abb023fbSJohn Baldwin 					regs[2] = 0;
571abb023fbSJohn Baldwin 					regs[3] = 0;
572abb023fbSJohn Baldwin 				}
573abb023fbSJohn Baldwin 				break;
574abb023fbSJohn Baldwin 			}
575abb023fbSJohn Baldwin 			break;
576abb023fbSJohn Baldwin 
577ec048c75SPeter Grehan 		case CPUID_0000_0015:
578ec048c75SPeter Grehan 			/*
579ec048c75SPeter Grehan 			 * Don't report CPU TSC/Crystal ratio and clock
580ec048c75SPeter Grehan 			 * values since guests may use these to derive the
581ec048c75SPeter Grehan 			 * local APIC frequency..
582ec048c75SPeter Grehan 			 */
583ec048c75SPeter Grehan 			regs[0] = 0;
584ec048c75SPeter Grehan 			regs[1] = 0;
585ec048c75SPeter Grehan 			regs[2] = 0;
586ec048c75SPeter Grehan 			regs[3] = 0;
587ec048c75SPeter Grehan 			break;
588ec048c75SPeter Grehan 
5898b287612SJohn Baldwin 		case 0x40000000:
5908b287612SJohn Baldwin 			regs[0] = CPUID_VM_HIGH;
5918b287612SJohn Baldwin 			bcopy(bhyve_id, &regs[1], 4);
592560d5edaSPeter Grehan 			bcopy(bhyve_id + 4, &regs[2], 4);
593560d5edaSPeter Grehan 			bcopy(bhyve_id + 8, &regs[3], 4);
5948b287612SJohn Baldwin 			break;
595560d5edaSPeter Grehan 
596366f6083SPeter Grehan 		default:
59715b7da10SConrad Meyer default_leaf:
598560d5edaSPeter Grehan 			/*
599560d5edaSPeter Grehan 			 * The leaf value has already been clamped so
600560d5edaSPeter Grehan 			 * simply pass this through, keeping count of
601560d5edaSPeter Grehan 			 * how many unhandled leaf values have been seen.
602560d5edaSPeter Grehan 			 */
603560d5edaSPeter Grehan 			atomic_add_long(&bhyve_xcpuids, 1);
604*a3f2a9c5SJohn Baldwin 			cpuid_count(func, param, regs);
605560d5edaSPeter Grehan 			break;
606366f6083SPeter Grehan 	}
607366f6083SPeter Grehan 
608*a3f2a9c5SJohn Baldwin 	/*
609*a3f2a9c5SJohn Baldwin 	 * CPUID clears the upper 32-bits of the long-mode registers.
610*a3f2a9c5SJohn Baldwin 	 */
611*a3f2a9c5SJohn Baldwin 	*rax = regs[0];
612*a3f2a9c5SJohn Baldwin 	*rbx = regs[1];
613*a3f2a9c5SJohn Baldwin 	*rcx = regs[2];
614*a3f2a9c5SJohn Baldwin 	*rdx = regs[3];
615560d5edaSPeter Grehan 
616366f6083SPeter Grehan 	return (1);
617366f6083SPeter Grehan }
618ea91ca92SNeel Natu 
619ea91ca92SNeel Natu bool
620ea91ca92SNeel Natu vm_cpuid_capability(struct vm *vm, int vcpuid, enum vm_cpuid_capability cap)
621ea91ca92SNeel Natu {
622ea91ca92SNeel Natu 	bool rv;
623ea91ca92SNeel Natu 
624ea91ca92SNeel Natu 	KASSERT(cap > 0 && cap < VCC_LAST, ("%s: invalid vm_cpu_capability %d",
625ea91ca92SNeel Natu 	    __func__, cap));
626ea91ca92SNeel Natu 
627ea91ca92SNeel Natu 	/*
628ea91ca92SNeel Natu 	 * Simply passthrough the capabilities of the host cpu for now.
629ea91ca92SNeel Natu 	 */
630ea91ca92SNeel Natu 	rv = false;
631ea91ca92SNeel Natu 	switch (cap) {
632ea91ca92SNeel Natu 	case VCC_NO_EXECUTE:
633ea91ca92SNeel Natu 		if (amd_feature & AMDID_NX)
634ea91ca92SNeel Natu 			rv = true;
635ea91ca92SNeel Natu 		break;
636ea91ca92SNeel Natu 	case VCC_FFXSR:
637ea91ca92SNeel Natu 		if (amd_feature & AMDID_FFXSR)
638ea91ca92SNeel Natu 			rv = true;
639ea91ca92SNeel Natu 		break;
640ea91ca92SNeel Natu 	case VCC_TCE:
641ea91ca92SNeel Natu 		if (amd_feature2 & AMDID2_TCE)
642ea91ca92SNeel Natu 			rv = true;
643ea91ca92SNeel Natu 		break;
644ea91ca92SNeel Natu 	default:
645ea91ca92SNeel Natu 		panic("%s: unknown vm_cpu_capability %d", __func__, cap);
646ea91ca92SNeel Natu 	}
647ea91ca92SNeel Natu 	return (rv);
648ea91ca92SNeel Natu }
649