1366f6083SPeter Grehan /*-
24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause
3c49761ddSPedro F. Giffuni *
4366f6083SPeter Grehan * Copyright (c) 2011 NetApp, Inc.
5366f6083SPeter Grehan * All rights reserved.
6366f6083SPeter Grehan *
7366f6083SPeter Grehan * Redistribution and use in source and binary forms, with or without
8366f6083SPeter Grehan * modification, are permitted provided that the following conditions
9366f6083SPeter Grehan * are met:
10366f6083SPeter Grehan * 1. Redistributions of source code must retain the above copyright
11366f6083SPeter Grehan * notice, this list of conditions and the following disclaimer.
12366f6083SPeter Grehan * 2. Redistributions in binary form must reproduce the above copyright
13366f6083SPeter Grehan * notice, this list of conditions and the following disclaimer in the
14366f6083SPeter Grehan * documentation and/or other materials provided with the distribution.
15366f6083SPeter Grehan *
16366f6083SPeter Grehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17366f6083SPeter Grehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18366f6083SPeter Grehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19366f6083SPeter Grehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20366f6083SPeter Grehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21366f6083SPeter Grehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22366f6083SPeter Grehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23366f6083SPeter Grehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24366f6083SPeter Grehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25366f6083SPeter Grehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26366f6083SPeter Grehan * SUCH DAMAGE.
27366f6083SPeter Grehan */
28366f6083SPeter Grehan
29a2da7af6SNeel Natu #include <sys/param.h>
30abb023fbSJohn Baldwin #include <sys/pcpu.h>
318b287612SJohn Baldwin #include <sys/systm.h>
328bd3845dSNeel Natu #include <sys/sysctl.h>
33366f6083SPeter Grehan
341472b87fSNeel Natu #include <machine/clock.h>
35366f6083SPeter Grehan #include <machine/cpufunc.h>
368b287612SJohn Baldwin #include <machine/md_var.h>
37abb023fbSJohn Baldwin #include <machine/segments.h>
38366f6083SPeter Grehan #include <machine/specialreg.h>
39a2da7af6SNeel Natu #include <machine/vmm.h>
40a2da7af6SNeel Natu
413ccb0233SMark Johnston #include <dev/vmm/vmm_ktr.h>
423ccb0233SMark Johnston
43abb023fbSJohn Baldwin #include "vmm_host.h"
445a1f0b36SNeel Natu #include "vmm_util.h"
45366f6083SPeter Grehan #include "x86.h"
46366f6083SPeter Grehan
478bd3845dSNeel Natu SYSCTL_DECL(_hw_vmm);
48b40598c5SPawel Biernacki static SYSCTL_NODE(_hw_vmm, OID_AUTO, topology, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
49b40598c5SPawel Biernacki NULL);
508bd3845dSNeel Natu
518b287612SJohn Baldwin #define CPUID_VM_HIGH 0x40000000
528b287612SJohn Baldwin
53560d5edaSPeter Grehan static const char bhyve_id[12] = "bhyve bhyve ";
54560d5edaSPeter Grehan
55560d5edaSPeter Grehan static uint64_t bhyve_xcpuids;
565a1f0b36SNeel Natu SYSCTL_ULONG(_hw_vmm, OID_AUTO, bhyve_xcpuids, CTLFLAG_RW, &bhyve_xcpuids, 0,
575a1f0b36SNeel Natu "Number of times an unknown cpuid leaf was accessed");
588b287612SJohn Baldwin
598bd3845dSNeel Natu static int cpuid_leaf_b = 1;
608bd3845dSNeel Natu SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN,
618bd3845dSNeel Natu &cpuid_leaf_b, 0, NULL);
628bd3845dSNeel Natu
638bd3845dSNeel Natu /*
649ff14629SDoug Moore * Compute ceil(log2(x)). Returns -1 if x is zero.
658bd3845dSNeel Natu */
668bd3845dSNeel Natu static __inline int
log2(u_int x)678bd3845dSNeel Natu log2(u_int x)
688bd3845dSNeel Natu {
698bd3845dSNeel Natu
705dbf8861SDoug Moore return (x == 0 ? -1 : order_base_2(x));
718bd3845dSNeel Natu }
728bd3845dSNeel Natu
73366f6083SPeter Grehan int
x86_emulate_cpuid(struct vcpu * vcpu,uint64_t * rax,uint64_t * rbx,uint64_t * rcx,uint64_t * rdx)7480cb5d84SJohn Baldwin x86_emulate_cpuid(struct vcpu *vcpu, uint64_t *rax, uint64_t *rbx,
75a3f2a9c5SJohn Baldwin uint64_t *rcx, uint64_t *rdx)
76366f6083SPeter Grehan {
7780cb5d84SJohn Baldwin struct vm *vm = vcpu_vm(vcpu);
7880cb5d84SJohn Baldwin int vcpu_id = vcpu_vcpuid(vcpu);
79abb023fbSJohn Baldwin const struct xsave_limits *limits;
80abb023fbSJohn Baldwin uint64_t cr4;
81f5f5f1e7SPeter Grehan int error, enable_invpcid, enable_rdpid, enable_rdtscp, level,
82f5f5f1e7SPeter Grehan width, x2apic_id;
83a3f2a9c5SJohn Baldwin unsigned int func, regs[4], logical_cpus, param;
84a2da7af6SNeel Natu enum x2apic_state x2apic_state;
8501d822d3SRodney W. Grimes uint16_t cores, maxcpus, sockets, threads;
86366f6083SPeter Grehan
87a3f2a9c5SJohn Baldwin /*
88a3f2a9c5SJohn Baldwin * The function of CPUID is controlled through the provided value of
89a3f2a9c5SJohn Baldwin * %eax (and secondarily %ecx, for certain leaf data).
90a3f2a9c5SJohn Baldwin */
91a3f2a9c5SJohn Baldwin func = (uint32_t)*rax;
92a3f2a9c5SJohn Baldwin param = (uint32_t)*rcx;
93a3f2a9c5SJohn Baldwin
94a3f2a9c5SJohn Baldwin VCPU_CTR2(vm, vcpu_id, "cpuid %#x,%#x", func, param);
955a1f0b36SNeel Natu
968b287612SJohn Baldwin /*
978b287612SJohn Baldwin * Requests for invalid CPUID levels should map to the highest
988b287612SJohn Baldwin * available level instead.
998b287612SJohn Baldwin */
100a3f2a9c5SJohn Baldwin if (cpu_exthigh != 0 && func >= 0x80000000) {
101a3f2a9c5SJohn Baldwin if (func > cpu_exthigh)
102a3f2a9c5SJohn Baldwin func = cpu_exthigh;
103a3f2a9c5SJohn Baldwin } else if (func >= 0x40000000) {
104a3f2a9c5SJohn Baldwin if (func > CPUID_VM_HIGH)
105a3f2a9c5SJohn Baldwin func = CPUID_VM_HIGH;
106a3f2a9c5SJohn Baldwin } else if (func > cpu_high) {
107a3f2a9c5SJohn Baldwin func = cpu_high;
1088b287612SJohn Baldwin }
109366f6083SPeter Grehan
1108b287612SJohn Baldwin /*
1118b287612SJohn Baldwin * In general the approach used for CPU topology is to
1128b287612SJohn Baldwin * advertise a flat topology where all CPUs are packages with
1138b287612SJohn Baldwin * no multi-core or SMT.
1148b287612SJohn Baldwin */
115366f6083SPeter Grehan switch (func) {
116560d5edaSPeter Grehan /*
117560d5edaSPeter Grehan * Pass these through to the guest
118560d5edaSPeter Grehan */
119366f6083SPeter Grehan case CPUID_0000_0000:
120366f6083SPeter Grehan case CPUID_0000_0002:
121366f6083SPeter Grehan case CPUID_0000_0003:
122366f6083SPeter Grehan case CPUID_8000_0000:
123366f6083SPeter Grehan case CPUID_8000_0002:
124366f6083SPeter Grehan case CPUID_8000_0003:
125366f6083SPeter Grehan case CPUID_8000_0004:
126366f6083SPeter Grehan case CPUID_8000_0006:
127a3f2a9c5SJohn Baldwin cpuid_count(func, param, regs);
1285a1f0b36SNeel Natu break;
129366f6083SPeter Grehan case CPUID_8000_0008:
130a3f2a9c5SJohn Baldwin cpuid_count(func, param, regs);
131caab5042SKonstantin Belousov if (vmm_is_svm()) {
132d0c7cde5SConrad Meyer /*
133d0c7cde5SConrad Meyer * As on Intel (0000_0007:0, EDX), mask out
134d0c7cde5SConrad Meyer * unsupported or unsafe AMD extended features
135d0c7cde5SConrad Meyer * (8000_0008 EBX).
136d0c7cde5SConrad Meyer */
137d0c7cde5SConrad Meyer regs[1] &= (AMDFEID_CLZERO | AMDFEID_IRPERF |
138d0c7cde5SConrad Meyer AMDFEID_XSAVEERPTR);
139d0c7cde5SConrad Meyer
14001d822d3SRodney W. Grimes vm_get_topology(vm, &sockets, &cores, &threads,
14101d822d3SRodney W. Grimes &maxcpus);
14215b7da10SConrad Meyer /*
14315b7da10SConrad Meyer * Here, width is ApicIdCoreIdSize, present on
14415b7da10SConrad Meyer * at least Family 15h and newer. It
14515b7da10SConrad Meyer * represents the "number of bits in the
14615b7da10SConrad Meyer * initial apicid that indicate thread id
14715b7da10SConrad Meyer * within a package."
14815b7da10SConrad Meyer *
14915b7da10SConrad Meyer * Our topo_probe_amd() uses it for
15015b7da10SConrad Meyer * pkg_id_shift and other OSes may rely on it.
15115b7da10SConrad Meyer */
15215b7da10SConrad Meyer width = MIN(0xF, log2(threads * cores));
15315b7da10SConrad Meyer logical_cpus = MIN(0xFF, threads * cores - 1);
15415b7da10SConrad Meyer regs[2] = (width << AMDID_COREID_SIZE_SHIFT) | logical_cpus;
1555a1f0b36SNeel Natu }
156366f6083SPeter Grehan break;
157366f6083SPeter Grehan
158560d5edaSPeter Grehan case CPUID_8000_0001:
159a3f2a9c5SJohn Baldwin cpuid_count(func, param, regs);
16006053618SNeel Natu
16106053618SNeel Natu /*
16215b7da10SConrad Meyer * Hide SVM from guest.
16306053618SNeel Natu */
16415b7da10SConrad Meyer regs[2] &= ~AMDID2_SVM;
16506053618SNeel Natu
166560d5edaSPeter Grehan /*
16702904c45SNeel Natu * Don't advertise extended performance counter MSRs
16802904c45SNeel Natu * to the guest.
16902904c45SNeel Natu */
17002904c45SNeel Natu regs[2] &= ~AMDID2_PCXC;
17102904c45SNeel Natu regs[2] &= ~AMDID2_PNXC;
17202904c45SNeel Natu regs[2] &= ~AMDID2_PTSCEL2I;
17302904c45SNeel Natu
17402904c45SNeel Natu /*
1752688a818SNeel Natu * Don't advertise Instruction Based Sampling feature.
1762688a818SNeel Natu */
1772688a818SNeel Natu regs[2] &= ~AMDID2_IBS;
1782688a818SNeel Natu
17965d5111aSNeel Natu /* NodeID MSR not available */
18065d5111aSNeel Natu regs[2] &= ~AMDID2_NODE_ID;
18165d5111aSNeel Natu
182592cd7d3SNeel Natu /* Don't advertise the OS visible workaround feature */
183592cd7d3SNeel Natu regs[2] &= ~AMDID2_OSVW;
184592cd7d3SNeel Natu
1853da44302SPeter Grehan /* Hide mwaitx/monitorx capability from the guest */
1863da44302SPeter Grehan regs[2] &= ~AMDID2_MWAITX;
1873da44302SPeter Grehan
188f5f5f1e7SPeter Grehan /* Advertise RDTSCP if it is enabled. */
1893f0f4b15SJohn Baldwin error = vm_get_capability(vcpu,
190f5f5f1e7SPeter Grehan VM_CAP_RDTSCP, &enable_rdtscp);
191f5f5f1e7SPeter Grehan if (error == 0 && enable_rdtscp)
192f5f5f1e7SPeter Grehan regs[3] |= AMDID_RDTSCP;
193f5f5f1e7SPeter Grehan else
194560d5edaSPeter Grehan regs[3] &= ~AMDID_RDTSCP;
195560d5edaSPeter Grehan break;
196560d5edaSPeter Grehan
1971472b87fSNeel Natu case CPUID_8000_0007:
1981472b87fSNeel Natu /*
199592cd7d3SNeel Natu * AMD uses this leaf to advertise the processor's
200592cd7d3SNeel Natu * power monitoring and RAS capabilities. These
201592cd7d3SNeel Natu * features are hardware-specific and exposing
202592cd7d3SNeel Natu * them to a guest doesn't make a lot of sense.
203592cd7d3SNeel Natu *
204592cd7d3SNeel Natu * Intel uses this leaf only to advertise the
205592cd7d3SNeel Natu * "Invariant TSC" feature with all other bits
206592cd7d3SNeel Natu * being reserved (set to zero).
207592cd7d3SNeel Natu */
208592cd7d3SNeel Natu regs[0] = 0;
209592cd7d3SNeel Natu regs[1] = 0;
210592cd7d3SNeel Natu regs[2] = 0;
211592cd7d3SNeel Natu regs[3] = 0;
212592cd7d3SNeel Natu
213592cd7d3SNeel Natu /*
214592cd7d3SNeel Natu * "Invariant TSC" can be advertised to the guest if:
215592cd7d3SNeel Natu * - host TSC frequency is invariant
216592cd7d3SNeel Natu * - host TSCs are synchronized across physical cpus
2171472b87fSNeel Natu *
2181472b87fSNeel Natu * XXX This still falls short because the vcpu
2191472b87fSNeel Natu * can observe the TSC moving backwards as it
2201472b87fSNeel Natu * migrates across physical cpus. But at least
2211472b87fSNeel Natu * it should discourage the guest from using the
2221472b87fSNeel Natu * TSC to keep track of time.
2231472b87fSNeel Natu */
224592cd7d3SNeel Natu if (tsc_is_invariant && smp_tsc)
225592cd7d3SNeel Natu regs[3] |= AMDPM_TSC_INVARIANT;
2261472b87fSNeel Natu break;
2271472b87fSNeel Natu
22815b7da10SConrad Meyer case CPUID_8000_001D:
22915b7da10SConrad Meyer /* AMD Cache topology, like 0000_0004 for Intel. */
230caab5042SKonstantin Belousov if (!vmm_is_svm())
23115b7da10SConrad Meyer goto default_leaf;
23215b7da10SConrad Meyer
23315b7da10SConrad Meyer /*
2348d66b134SElyes Haouas * Similar to Intel, generate a fictitious cache
23515b7da10SConrad Meyer * topology for the guest with L3 shared by the
23615b7da10SConrad Meyer * package, and L1 and L2 local to a core.
23715b7da10SConrad Meyer */
23815b7da10SConrad Meyer vm_get_topology(vm, &sockets, &cores, &threads,
23915b7da10SConrad Meyer &maxcpus);
240a3f2a9c5SJohn Baldwin switch (param) {
24115b7da10SConrad Meyer case 0:
24215b7da10SConrad Meyer logical_cpus = threads;
24315b7da10SConrad Meyer level = 1;
24415b7da10SConrad Meyer func = 1; /* data cache */
24515b7da10SConrad Meyer break;
24615b7da10SConrad Meyer case 1:
24715b7da10SConrad Meyer logical_cpus = threads;
24815b7da10SConrad Meyer level = 2;
24915b7da10SConrad Meyer func = 3; /* unified cache */
25015b7da10SConrad Meyer break;
25115b7da10SConrad Meyer case 2:
25215b7da10SConrad Meyer logical_cpus = threads * cores;
25315b7da10SConrad Meyer level = 3;
25415b7da10SConrad Meyer func = 3; /* unified cache */
25515b7da10SConrad Meyer break;
25615b7da10SConrad Meyer default:
257*0698ce42SKonstantin Belousov logical_cpus = sockets * threads * cores;
25815b7da10SConrad Meyer level = 0;
25915b7da10SConrad Meyer func = 0;
26015b7da10SConrad Meyer break;
26115b7da10SConrad Meyer }
26215b7da10SConrad Meyer
26315b7da10SConrad Meyer logical_cpus = MIN(0xfff, logical_cpus - 1);
26415b7da10SConrad Meyer regs[0] = (logical_cpus << 14) | (1 << 8) |
26515b7da10SConrad Meyer (level << 5) | func;
26615b7da10SConrad Meyer regs[1] = (func > 0) ? (CACHE_LINE_SIZE - 1) : 0;
267*0698ce42SKonstantin Belousov
268*0698ce42SKonstantin Belousov /*
269*0698ce42SKonstantin Belousov * ecx: Number of cache ways for non-fully
270*0698ce42SKonstantin Belousov * associative cache, minus 1. Reported value
271*0698ce42SKonstantin Belousov * of zero means there is one way.
272*0698ce42SKonstantin Belousov */
27315b7da10SConrad Meyer regs[2] = 0;
274*0698ce42SKonstantin Belousov
27515b7da10SConrad Meyer regs[3] = 0;
27615b7da10SConrad Meyer break;
27715b7da10SConrad Meyer
27815b7da10SConrad Meyer case CPUID_8000_001E:
279caab5042SKonstantin Belousov /*
280caab5042SKonstantin Belousov * AMD Family 16h+ and Hygon Family 18h additional
281caab5042SKonstantin Belousov * identifiers.
282caab5042SKonstantin Belousov */
283caab5042SKonstantin Belousov if (!vmm_is_svm() || CPUID_TO_FAMILY(cpu_id) < 0x16)
28415b7da10SConrad Meyer goto default_leaf;
28515b7da10SConrad Meyer
28615b7da10SConrad Meyer vm_get_topology(vm, &sockets, &cores, &threads,
28715b7da10SConrad Meyer &maxcpus);
28815b7da10SConrad Meyer regs[0] = vcpu_id;
28915b7da10SConrad Meyer threads = MIN(0xFF, threads - 1);
29015b7da10SConrad Meyer regs[1] = (threads << 8) |
29115b7da10SConrad Meyer (vcpu_id >> log2(threads + 1));
29215b7da10SConrad Meyer /*
29315b7da10SConrad Meyer * XXX Bhyve topology cannot yet represent >1 node per
29415b7da10SConrad Meyer * processor.
29515b7da10SConrad Meyer */
29615b7da10SConrad Meyer regs[2] = 0;
29715b7da10SConrad Meyer regs[3] = 0;
29815b7da10SConrad Meyer break;
29915b7da10SConrad Meyer
300366f6083SPeter Grehan case CPUID_0000_0001:
3018b287612SJohn Baldwin do_cpuid(1, regs);
3028b287612SJohn Baldwin
3033f0f4b15SJohn Baldwin error = vm_get_x2apic_state(vcpu, &x2apic_state);
304a2da7af6SNeel Natu if (error) {
305a2da7af6SNeel Natu panic("x86_emulate_cpuid: error %d "
306a2da7af6SNeel Natu "fetching x2apic state", error);
307a2da7af6SNeel Natu }
308a2da7af6SNeel Natu
309366f6083SPeter Grehan /*
310366f6083SPeter Grehan * Override the APIC ID only in ebx
311366f6083SPeter Grehan */
3128b287612SJohn Baldwin regs[1] &= ~(CPUID_LOCAL_APIC_ID);
3138b287612SJohn Baldwin regs[1] |= (vcpu_id << CPUID_0000_0001_APICID_SHIFT);
314366f6083SPeter Grehan
315366f6083SPeter Grehan /*
31631708084SNeel Natu * Don't expose VMX, SpeedStep, TME or SMX capability.
3178b287612SJohn Baldwin * Advertise x2APIC capability and Hypervisor guest.
318366f6083SPeter Grehan */
3198b287612SJohn Baldwin regs[2] &= ~(CPUID2_VMX | CPUID2_EST | CPUID2_TM2);
32031708084SNeel Natu regs[2] &= ~(CPUID2_SMX);
321a2da7af6SNeel Natu
322a2da7af6SNeel Natu regs[2] |= CPUID2_HV;
323a2da7af6SNeel Natu
324a2da7af6SNeel Natu if (x2apic_state != X2APIC_DISABLED)
325a2da7af6SNeel Natu regs[2] |= CPUID2_X2APIC;
32652e5c8a2SNeel Natu else
32752e5c8a2SNeel Natu regs[2] &= ~CPUID2_X2APIC;
328366f6083SPeter Grehan
329366f6083SPeter Grehan /*
330abb023fbSJohn Baldwin * Only advertise CPUID2_XSAVE in the guest if
331abb023fbSJohn Baldwin * the host is using XSAVE.
332298379f7SPeter Grehan */
333abb023fbSJohn Baldwin if (!(regs[2] & CPUID2_OSXSAVE))
334abb023fbSJohn Baldwin regs[2] &= ~CPUID2_XSAVE;
335abb023fbSJohn Baldwin
336abb023fbSJohn Baldwin /*
337abb023fbSJohn Baldwin * If CPUID2_XSAVE is being advertised and the
338abb023fbSJohn Baldwin * guest has set CR4_XSAVE, set
339abb023fbSJohn Baldwin * CPUID2_OSXSAVE.
340abb023fbSJohn Baldwin */
341abb023fbSJohn Baldwin regs[2] &= ~CPUID2_OSXSAVE;
342abb023fbSJohn Baldwin if (regs[2] & CPUID2_XSAVE) {
34380cb5d84SJohn Baldwin error = vm_get_register(vcpu,
344abb023fbSJohn Baldwin VM_REG_GUEST_CR4, &cr4);
345abb023fbSJohn Baldwin if (error)
346abb023fbSJohn Baldwin panic("x86_emulate_cpuid: error %d "
347abb023fbSJohn Baldwin "fetching %%cr4", error);
348abb023fbSJohn Baldwin if (cr4 & CR4_XSAVE)
349abb023fbSJohn Baldwin regs[2] |= CPUID2_OSXSAVE;
350abb023fbSJohn Baldwin }
351298379f7SPeter Grehan
352298379f7SPeter Grehan /*
353ff6ec151SNeel Natu * Hide monitor/mwait until we know how to deal with
354ff6ec151SNeel Natu * these instructions.
355ff6ec151SNeel Natu */
356ff6ec151SNeel Natu regs[2] &= ~CPUID2_MON;
357ff6ec151SNeel Natu
358ff6ec151SNeel Natu /*
359560d5edaSPeter Grehan * Hide the performance and debug features.
360560d5edaSPeter Grehan */
361560d5edaSPeter Grehan regs[2] &= ~CPUID2_PDCM;
362560d5edaSPeter Grehan
363517e21d3SPeter Grehan /*
364517e21d3SPeter Grehan * No TSC deadline support in the APIC yet
365517e21d3SPeter Grehan */
366517e21d3SPeter Grehan regs[2] &= ~CPUID2_TSCDLT;
367517e21d3SPeter Grehan
368560d5edaSPeter Grehan /*
3691f3025e1SPeter Grehan * Hide thermal monitoring
3701f3025e1SPeter Grehan */
3711f3025e1SPeter Grehan regs[3] &= ~(CPUID_ACPI | CPUID_TM);
3721f3025e1SPeter Grehan
3731f3025e1SPeter Grehan /*
374560d5edaSPeter Grehan * Hide the debug store capability.
375560d5edaSPeter Grehan */
376560d5edaSPeter Grehan regs[3] &= ~CPUID_DS;
377560d5edaSPeter Grehan
3781d29bfc1SNeel Natu /*
3791d29bfc1SNeel Natu * Advertise the Machine Check and MTRR capability.
3801d29bfc1SNeel Natu *
3811d29bfc1SNeel Natu * Some guest OSes (e.g. Windows) will not boot if
3821d29bfc1SNeel Natu * these features are absent.
3831d29bfc1SNeel Natu */
3841d29bfc1SNeel Natu regs[3] |= (CPUID_MCA | CPUID_MCE | CPUID_MTRR);
3851d29bfc1SNeel Natu
38601d822d3SRodney W. Grimes vm_get_topology(vm, &sockets, &cores, &threads,
38701d822d3SRodney W. Grimes &maxcpus);
38801d822d3SRodney W. Grimes logical_cpus = threads * cores;
3898b287612SJohn Baldwin regs[1] &= ~CPUID_HTT_CORES;
3908bd3845dSNeel Natu regs[1] |= (logical_cpus & 0xff) << 16;
3918bd3845dSNeel Natu regs[3] |= CPUID_HTT;
3928b287612SJohn Baldwin break;
3938b287612SJohn Baldwin
3948b287612SJohn Baldwin case CPUID_0000_0004:
395a3f2a9c5SJohn Baldwin cpuid_count(func, param, regs);
3968b287612SJohn Baldwin
3978bd3845dSNeel Natu if (regs[0] || regs[1] || regs[2] || regs[3]) {
39801d822d3SRodney W. Grimes vm_get_topology(vm, &sockets, &cores, &threads,
39901d822d3SRodney W. Grimes &maxcpus);
400534dc967SNeel Natu regs[0] &= 0x3ff;
40101d822d3SRodney W. Grimes regs[0] |= (cores - 1) << 26;
4028b287612SJohn Baldwin /*
4038bd3845dSNeel Natu * Cache topology:
4048bd3845dSNeel Natu * - L1 and L2 are shared only by the logical
4058bd3845dSNeel Natu * processors in a single core.
4068bd3845dSNeel Natu * - L3 and above are shared by all logical
4078bd3845dSNeel Natu * processors in the package.
4088b287612SJohn Baldwin */
40901d822d3SRodney W. Grimes logical_cpus = threads;
4108bd3845dSNeel Natu level = (regs[0] >> 5) & 0x7;
4118bd3845dSNeel Natu if (level >= 3)
41201d822d3SRodney W. Grimes logical_cpus *= cores;
4138bd3845dSNeel Natu regs[0] |= (logical_cpus - 1) << 14;
4148bd3845dSNeel Natu }
415366f6083SPeter Grehan break;
416366f6083SPeter Grehan
417a0cad470SPeter Grehan case CPUID_0000_0007:
41849cc03daSNeel Natu regs[0] = 0;
41949cc03daSNeel Natu regs[1] = 0;
42049cc03daSNeel Natu regs[2] = 0;
42149cc03daSNeel Natu regs[3] = 0;
42249cc03daSNeel Natu
42349cc03daSNeel Natu /* leaf 0 */
424a3f2a9c5SJohn Baldwin if (param == 0) {
425a3f2a9c5SJohn Baldwin cpuid_count(func, param, regs);
42644a68c4eSJohn Baldwin
42744a68c4eSJohn Baldwin /* Only leaf 0 is supported */
42844a68c4eSJohn Baldwin regs[0] = 0;
42944a68c4eSJohn Baldwin
43044a68c4eSJohn Baldwin /*
43144a68c4eSJohn Baldwin * Expose known-safe features.
43244a68c4eSJohn Baldwin */
43347cf1b37SMark Johnston regs[1] &= CPUID_STDEXT_FSGSBASE |
43444a68c4eSJohn Baldwin CPUID_STDEXT_BMI1 | CPUID_STDEXT_HLE |
4354c599db7SMark Johnston CPUID_STDEXT_AVX2 | CPUID_STDEXT_SMEP |
4364c599db7SMark Johnston CPUID_STDEXT_BMI2 |
43744a68c4eSJohn Baldwin CPUID_STDEXT_ERMS | CPUID_STDEXT_RTM |
43844a68c4eSJohn Baldwin CPUID_STDEXT_AVX512F |
43947cf1b37SMark Johnston CPUID_STDEXT_AVX512DQ |
440fce2d624SConrad Meyer CPUID_STDEXT_RDSEED |
4414c599db7SMark Johnston CPUID_STDEXT_SMAP |
44244a68c4eSJohn Baldwin CPUID_STDEXT_AVX512PF |
44344a68c4eSJohn Baldwin CPUID_STDEXT_AVX512ER |
44447cf1b37SMark Johnston CPUID_STDEXT_AVX512CD | CPUID_STDEXT_SHA |
44547cf1b37SMark Johnston CPUID_STDEXT_AVX512BW |
44647cf1b37SMark Johnston CPUID_STDEXT_AVX512VL;
44747cf1b37SMark Johnston regs[2] &= CPUID_STDEXT2_VAES |
44847cf1b37SMark Johnston CPUID_STDEXT2_VPCLMULQDQ;
449e519cee3SJohn Baldwin regs[3] &= CPUID_STDEXT3_MD_CLEAR;
45044a68c4eSJohn Baldwin
451f5f5f1e7SPeter Grehan /* Advertise RDPID if it is enabled. */
4523f0f4b15SJohn Baldwin error = vm_get_capability(vcpu, VM_CAP_RDPID,
4533f0f4b15SJohn Baldwin &enable_rdpid);
454f5f5f1e7SPeter Grehan if (error == 0 && enable_rdpid)
455f5f5f1e7SPeter Grehan regs[2] |= CPUID_STDEXT2_RDPID;
456f5f5f1e7SPeter Grehan
45744a68c4eSJohn Baldwin /* Advertise INVPCID if it is enabled. */
4583f0f4b15SJohn Baldwin error = vm_get_capability(vcpu,
45949cc03daSNeel Natu VM_CAP_ENABLE_INVPCID, &enable_invpcid);
46049cc03daSNeel Natu if (error == 0 && enable_invpcid)
46149cc03daSNeel Natu regs[1] |= CPUID_STDEXT_INVPCID;
46249cc03daSNeel Natu }
46349cc03daSNeel Natu break;
46449cc03daSNeel Natu
46549cc03daSNeel Natu case CPUID_0000_0006:
466c077e628SAlexander Motin regs[0] = CPUTPM1_ARAT;
467c077e628SAlexander Motin regs[1] = 0;
468c077e628SAlexander Motin regs[2] = 0;
469c077e628SAlexander Motin regs[3] = 0;
470c077e628SAlexander Motin break;
471c077e628SAlexander Motin
472560d5edaSPeter Grehan case CPUID_0000_000A:
4731f3025e1SPeter Grehan /*
4741f3025e1SPeter Grehan * Handle the access, but report 0 for
4751f3025e1SPeter Grehan * all options
4761f3025e1SPeter Grehan */
4771f3025e1SPeter Grehan regs[0] = 0;
4781f3025e1SPeter Grehan regs[1] = 0;
4791f3025e1SPeter Grehan regs[2] = 0;
4801f3025e1SPeter Grehan regs[3] = 0;
4811f3025e1SPeter Grehan break;
4821f3025e1SPeter Grehan
483366f6083SPeter Grehan case CPUID_0000_000B:
484366f6083SPeter Grehan /*
48515b7da10SConrad Meyer * Intel processor topology enumeration
486366f6083SPeter Grehan */
48715b7da10SConrad Meyer if (vmm_is_intel()) {
48801d822d3SRodney W. Grimes vm_get_topology(vm, &sockets, &cores, &threads,
48901d822d3SRodney W. Grimes &maxcpus);
490a3f2a9c5SJohn Baldwin if (param == 0) {
49101d822d3SRodney W. Grimes logical_cpus = threads;
4928bd3845dSNeel Natu width = log2(logical_cpus);
4938bd3845dSNeel Natu level = CPUID_TYPE_SMT;
4948bd3845dSNeel Natu x2apic_id = vcpu_id;
4958bd3845dSNeel Natu }
4968bd3845dSNeel Natu
497a3f2a9c5SJohn Baldwin if (param == 1) {
49801d822d3SRodney W. Grimes logical_cpus = threads * cores;
4998bd3845dSNeel Natu width = log2(logical_cpus);
5008bd3845dSNeel Natu level = CPUID_TYPE_CORE;
5018bd3845dSNeel Natu x2apic_id = vcpu_id;
5028bd3845dSNeel Natu }
5038bd3845dSNeel Natu
504a3f2a9c5SJohn Baldwin if (!cpuid_leaf_b || param >= 2) {
5058bd3845dSNeel Natu width = 0;
5068bd3845dSNeel Natu logical_cpus = 0;
5078bd3845dSNeel Natu level = 0;
5088bd3845dSNeel Natu x2apic_id = 0;
5098bd3845dSNeel Natu }
5108bd3845dSNeel Natu
5118bd3845dSNeel Natu regs[0] = width & 0x1f;
5128bd3845dSNeel Natu regs[1] = logical_cpus & 0xffff;
513a3f2a9c5SJohn Baldwin regs[2] = (level << 8) | (param & 0xff);
5148bd3845dSNeel Natu regs[3] = x2apic_id;
51515b7da10SConrad Meyer } else {
51615b7da10SConrad Meyer regs[0] = 0;
51715b7da10SConrad Meyer regs[1] = 0;
51815b7da10SConrad Meyer regs[2] = 0;
51915b7da10SConrad Meyer regs[3] = 0;
52015b7da10SConrad Meyer }
521366f6083SPeter Grehan break;
522366f6083SPeter Grehan
523abb023fbSJohn Baldwin case CPUID_0000_000D:
524abb023fbSJohn Baldwin limits = vmm_get_xsave_limits();
525abb023fbSJohn Baldwin if (!limits->xsave_enabled) {
526abb023fbSJohn Baldwin regs[0] = 0;
527abb023fbSJohn Baldwin regs[1] = 0;
528abb023fbSJohn Baldwin regs[2] = 0;
529abb023fbSJohn Baldwin regs[3] = 0;
530abb023fbSJohn Baldwin break;
531abb023fbSJohn Baldwin }
532abb023fbSJohn Baldwin
533a3f2a9c5SJohn Baldwin cpuid_count(func, param, regs);
534a3f2a9c5SJohn Baldwin switch (param) {
535abb023fbSJohn Baldwin case 0:
536abb023fbSJohn Baldwin /*
537abb023fbSJohn Baldwin * Only permit the guest to use bits
538abb023fbSJohn Baldwin * that are active in the host in
539abb023fbSJohn Baldwin * %xcr0. Also, claim that the
540abb023fbSJohn Baldwin * maximum save area size is
541abb023fbSJohn Baldwin * equivalent to the host's current
542abb023fbSJohn Baldwin * save area size. Since this runs
543abb023fbSJohn Baldwin * "inside" of vmrun(), it runs with
544abb023fbSJohn Baldwin * the guest's xcr0, so the current
545abb023fbSJohn Baldwin * save area size is correct as-is.
546abb023fbSJohn Baldwin */
547abb023fbSJohn Baldwin regs[0] &= limits->xcr0_allowed;
548abb023fbSJohn Baldwin regs[2] = limits->xsave_max_size;
549abb023fbSJohn Baldwin regs[3] &= (limits->xcr0_allowed >> 32);
550abb023fbSJohn Baldwin break;
551abb023fbSJohn Baldwin case 1:
552abb023fbSJohn Baldwin /* Only permit XSAVEOPT. */
553abb023fbSJohn Baldwin regs[0] &= CPUID_EXTSTATE_XSAVEOPT;
554abb023fbSJohn Baldwin regs[1] = 0;
555abb023fbSJohn Baldwin regs[2] = 0;
556abb023fbSJohn Baldwin regs[3] = 0;
557abb023fbSJohn Baldwin break;
558abb023fbSJohn Baldwin default:
559abb023fbSJohn Baldwin /*
560abb023fbSJohn Baldwin * If the leaf is for a permitted feature,
561abb023fbSJohn Baldwin * pass through as-is, otherwise return
562abb023fbSJohn Baldwin * all zeroes.
563abb023fbSJohn Baldwin */
564a3f2a9c5SJohn Baldwin if (!(limits->xcr0_allowed & (1ul << param))) {
565abb023fbSJohn Baldwin regs[0] = 0;
566abb023fbSJohn Baldwin regs[1] = 0;
567abb023fbSJohn Baldwin regs[2] = 0;
568abb023fbSJohn Baldwin regs[3] = 0;
569abb023fbSJohn Baldwin }
570abb023fbSJohn Baldwin break;
571abb023fbSJohn Baldwin }
572abb023fbSJohn Baldwin break;
573abb023fbSJohn Baldwin
5745afcca13SVitaliy Gusev case CPUID_0000_000F:
5755afcca13SVitaliy Gusev case CPUID_0000_0010:
5765afcca13SVitaliy Gusev /*
5775afcca13SVitaliy Gusev * Do not report any Resource Director Technology
5785afcca13SVitaliy Gusev * capabilities. Exposing control of cache or memory
5795afcca13SVitaliy Gusev * controller resource partitioning to the guest is not
5805afcca13SVitaliy Gusev * at all sensible.
5815afcca13SVitaliy Gusev *
5825afcca13SVitaliy Gusev * This is already hidden at a high level by masking of
5835afcca13SVitaliy Gusev * leaf 0x7. Even still, a guest may look here for
5845afcca13SVitaliy Gusev * detailed capability information.
5855afcca13SVitaliy Gusev */
5865afcca13SVitaliy Gusev regs[0] = 0;
5875afcca13SVitaliy Gusev regs[1] = 0;
5885afcca13SVitaliy Gusev regs[2] = 0;
5895afcca13SVitaliy Gusev regs[3] = 0;
5905afcca13SVitaliy Gusev break;
5915afcca13SVitaliy Gusev
592ec048c75SPeter Grehan case CPUID_0000_0015:
593ec048c75SPeter Grehan /*
594ec048c75SPeter Grehan * Don't report CPU TSC/Crystal ratio and clock
595ec048c75SPeter Grehan * values since guests may use these to derive the
596ec048c75SPeter Grehan * local APIC frequency..
597ec048c75SPeter Grehan */
598ec048c75SPeter Grehan regs[0] = 0;
599ec048c75SPeter Grehan regs[1] = 0;
600ec048c75SPeter Grehan regs[2] = 0;
601ec048c75SPeter Grehan regs[3] = 0;
602ec048c75SPeter Grehan break;
603ec048c75SPeter Grehan
6048b287612SJohn Baldwin case 0x40000000:
6058b287612SJohn Baldwin regs[0] = CPUID_VM_HIGH;
6068b287612SJohn Baldwin bcopy(bhyve_id, ®s[1], 4);
607560d5edaSPeter Grehan bcopy(bhyve_id + 4, ®s[2], 4);
608560d5edaSPeter Grehan bcopy(bhyve_id + 8, ®s[3], 4);
6098b287612SJohn Baldwin break;
610560d5edaSPeter Grehan
611366f6083SPeter Grehan default:
61215b7da10SConrad Meyer default_leaf:
613560d5edaSPeter Grehan /*
614560d5edaSPeter Grehan * The leaf value has already been clamped so
615560d5edaSPeter Grehan * simply pass this through, keeping count of
616560d5edaSPeter Grehan * how many unhandled leaf values have been seen.
617560d5edaSPeter Grehan */
618560d5edaSPeter Grehan atomic_add_long(&bhyve_xcpuids, 1);
619a3f2a9c5SJohn Baldwin cpuid_count(func, param, regs);
620560d5edaSPeter Grehan break;
621366f6083SPeter Grehan }
622366f6083SPeter Grehan
623a3f2a9c5SJohn Baldwin /*
624a3f2a9c5SJohn Baldwin * CPUID clears the upper 32-bits of the long-mode registers.
625a3f2a9c5SJohn Baldwin */
626a3f2a9c5SJohn Baldwin *rax = regs[0];
627a3f2a9c5SJohn Baldwin *rbx = regs[1];
628a3f2a9c5SJohn Baldwin *rcx = regs[2];
629a3f2a9c5SJohn Baldwin *rdx = regs[3];
630560d5edaSPeter Grehan
631366f6083SPeter Grehan return (1);
632366f6083SPeter Grehan }
633ea91ca92SNeel Natu
634ea91ca92SNeel Natu bool
vm_cpuid_capability(struct vcpu * vcpu,enum vm_cpuid_capability cap)63580cb5d84SJohn Baldwin vm_cpuid_capability(struct vcpu *vcpu, enum vm_cpuid_capability cap)
636ea91ca92SNeel Natu {
637ea91ca92SNeel Natu bool rv;
638ea91ca92SNeel Natu
639ea91ca92SNeel Natu KASSERT(cap > 0 && cap < VCC_LAST, ("%s: invalid vm_cpu_capability %d",
640ea91ca92SNeel Natu __func__, cap));
641ea91ca92SNeel Natu
642ea91ca92SNeel Natu /*
643ea91ca92SNeel Natu * Simply passthrough the capabilities of the host cpu for now.
644ea91ca92SNeel Natu */
645ea91ca92SNeel Natu rv = false;
646ea91ca92SNeel Natu switch (cap) {
647ea91ca92SNeel Natu case VCC_NO_EXECUTE:
648ea91ca92SNeel Natu if (amd_feature & AMDID_NX)
649ea91ca92SNeel Natu rv = true;
650ea91ca92SNeel Natu break;
651ea91ca92SNeel Natu case VCC_FFXSR:
652ea91ca92SNeel Natu if (amd_feature & AMDID_FFXSR)
653ea91ca92SNeel Natu rv = true;
654ea91ca92SNeel Natu break;
655ea91ca92SNeel Natu case VCC_TCE:
656ea91ca92SNeel Natu if (amd_feature2 & AMDID2_TCE)
657ea91ca92SNeel Natu rv = true;
658ea91ca92SNeel Natu break;
659ea91ca92SNeel Natu default:
660ea91ca92SNeel Natu panic("%s: unknown vm_cpu_capability %d", __func__, cap);
661ea91ca92SNeel Natu }
662ea91ca92SNeel Natu return (rv);
663ea91ca92SNeel Natu }
6646171e026SCorvin Köhne
6656171e026SCorvin Köhne int
vm_rdmtrr(struct vm_mtrr * mtrr,u_int num,uint64_t * val)6666171e026SCorvin Köhne vm_rdmtrr(struct vm_mtrr *mtrr, u_int num, uint64_t *val)
6676171e026SCorvin Köhne {
6686171e026SCorvin Köhne switch (num) {
6696171e026SCorvin Köhne case MSR_MTRRcap:
6706171e026SCorvin Köhne *val = MTRR_CAP_WC | MTRR_CAP_FIXED | VMM_MTRR_VAR_MAX;
6716171e026SCorvin Köhne break;
6726171e026SCorvin Köhne case MSR_MTRRdefType:
6736171e026SCorvin Köhne *val = mtrr->def_type;
6746171e026SCorvin Köhne break;
6756171e026SCorvin Köhne case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7:
6766171e026SCorvin Köhne *val = mtrr->fixed4k[num - MSR_MTRR4kBase];
6776171e026SCorvin Köhne break;
6786171e026SCorvin Köhne case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
6796171e026SCorvin Köhne *val = mtrr->fixed16k[num - MSR_MTRR16kBase];
6806171e026SCorvin Köhne break;
6816171e026SCorvin Köhne case MSR_MTRR64kBase:
6826171e026SCorvin Köhne *val = mtrr->fixed64k;
6836171e026SCorvin Köhne break;
6846171e026SCorvin Köhne case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: {
6856171e026SCorvin Köhne u_int offset = num - MSR_MTRRVarBase;
6866171e026SCorvin Köhne if (offset % 2 == 0) {
6876171e026SCorvin Köhne *val = mtrr->var[offset / 2].base;
6886171e026SCorvin Köhne } else {
6896171e026SCorvin Köhne *val = mtrr->var[offset / 2].mask;
6906171e026SCorvin Köhne }
6916171e026SCorvin Köhne break;
6926171e026SCorvin Köhne }
6936171e026SCorvin Köhne default:
6946171e026SCorvin Köhne return (-1);
6956171e026SCorvin Köhne }
6966171e026SCorvin Köhne
6976171e026SCorvin Köhne return (0);
6986171e026SCorvin Köhne }
6996171e026SCorvin Köhne
7006171e026SCorvin Köhne int
vm_wrmtrr(struct vm_mtrr * mtrr,u_int num,uint64_t val)7016171e026SCorvin Köhne vm_wrmtrr(struct vm_mtrr *mtrr, u_int num, uint64_t val)
7026171e026SCorvin Köhne {
7036171e026SCorvin Köhne switch (num) {
7046171e026SCorvin Köhne case MSR_MTRRcap:
7056171e026SCorvin Köhne /* MTRRCAP is read only */
7066171e026SCorvin Köhne return (-1);
7076171e026SCorvin Köhne case MSR_MTRRdefType:
7086171e026SCorvin Köhne if (val & ~VMM_MTRR_DEF_MASK) {
7096171e026SCorvin Köhne /* generate #GP on writes to reserved fields */
7106171e026SCorvin Köhne return (-1);
7116171e026SCorvin Köhne }
7126171e026SCorvin Köhne mtrr->def_type = val;
7136171e026SCorvin Köhne break;
7146171e026SCorvin Köhne case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7:
7156171e026SCorvin Köhne mtrr->fixed4k[num - MSR_MTRR4kBase] = val;
7166171e026SCorvin Köhne break;
7176171e026SCorvin Köhne case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
7186171e026SCorvin Köhne mtrr->fixed16k[num - MSR_MTRR16kBase] = val;
7196171e026SCorvin Köhne break;
7206171e026SCorvin Köhne case MSR_MTRR64kBase:
7216171e026SCorvin Köhne mtrr->fixed64k = val;
7226171e026SCorvin Köhne break;
7236171e026SCorvin Köhne case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: {
7246171e026SCorvin Köhne u_int offset = num - MSR_MTRRVarBase;
7256171e026SCorvin Köhne if (offset % 2 == 0) {
7266171e026SCorvin Köhne if (val & ~VMM_MTRR_PHYSBASE_MASK) {
7276171e026SCorvin Köhne /* generate #GP on writes to reserved fields */
7286171e026SCorvin Köhne return (-1);
7296171e026SCorvin Köhne }
7306171e026SCorvin Köhne mtrr->var[offset / 2].base = val;
7316171e026SCorvin Köhne } else {
7326171e026SCorvin Köhne if (val & ~VMM_MTRR_PHYSMASK_MASK) {
7336171e026SCorvin Köhne /* generate #GP on writes to reserved fields */
7346171e026SCorvin Köhne return (-1);
7356171e026SCorvin Köhne }
7366171e026SCorvin Köhne mtrr->var[offset / 2].mask = val;
7376171e026SCorvin Köhne }
7386171e026SCorvin Köhne break;
7396171e026SCorvin Köhne }
7406171e026SCorvin Köhne default:
7416171e026SCorvin Köhne return (-1);
7426171e026SCorvin Köhne }
7436171e026SCorvin Köhne
7446171e026SCorvin Köhne return (0);
7456171e026SCorvin Köhne }
746