1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11 /* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */
12
13 /*
14 * Copyright 2025 Oxide Computer Company
15 */
16
17 #include <sys/kernel.h>
18 #include <sys/sysmacros.h>
19 #include <sys/cmn_err.h>
20 #include <sys/cpuvar.h>
21 #include <sys/systm.h>
22 #include <sys/x86_archext.h>
23
24 #include <sys/vmm_kernel.h>
25 #include "svm.h"
26 #include "svm_softc.h"
27 #include "svm_pmu.h"
28
29 /*
30 * Allow guests to use perf counter resources.
31 */
32 int svm_pmu_enabled = 1;
33
34 /*
35 * Force guest exits (preclude disabling intercepts) access to perf counter
36 * resources via RDPMC and RDMSR/WRMSR.
37 */
38 int svm_pmu_force_exit = 0;
39
40 void
svm_pmu_init(struct svm_softc * svm_sc)41 svm_pmu_init(struct svm_softc *svm_sc)
42 {
43 if (!is_x86_feature(x86_featureset, X86FSET_AMD_PCEC) ||
44 svm_pmu_enabled == 0) {
45 svm_sc->pmu_flavor = SPF_NONE;
46 return;
47 }
48
49 switch (uarchrev_uarch(cpuid_getuarchrev(CPU))) {
50 case X86_UARCH_AMD_LEGACY:
51 svm_sc->pmu_flavor = SPF_PRE_ZEN;
52 break;
53 case X86_UARCH_AMD_ZEN1:
54 case X86_UARCH_AMD_ZENPLUS:
55 svm_sc->pmu_flavor = SPF_ZEN1;
56 break;
57 case X86_UARCH_AMD_ZEN2:
58 case X86_UARCH_AMD_ZEN3:
59 case X86_UARCH_AMD_ZEN4:
60 case X86_UARCH_AMD_ZEN5:
61 svm_sc->pmu_flavor = SPF_ZEN2;
62 break;
63 default:
64 /* Exclude unrecognized uarch from perf counter access */
65 svm_sc->pmu_flavor = SPF_NONE;
66 return;
67 }
68
69 /* Turn on base and extended CPCs for all vCPUs */
70 const uint_t maxcpu = vm_get_maxcpus(svm_sc->vm);
71 for (uint_t i = 0; i < maxcpu; i++) {
72 struct svm_pmu_vcpu *pmu_vcpu = svm_get_pmu(svm_sc, i);
73
74 pmu_vcpu->spv_hma_state.hscs_flags = HCF_EN_BASE | HCF_EN_EXTD;
75 }
76 }
77
78 static bool
svm_pmu_is_active(const struct svm_pmu_vcpu * pmu)79 svm_pmu_is_active(const struct svm_pmu_vcpu *pmu)
80 {
81 return (pmu->spv_hma_state.hscs_flags != HCF_DISABLED);
82 }
83
84 static bool
svm_pmu_is_evt_msr(uint32_t msr)85 svm_pmu_is_evt_msr(uint32_t msr)
86 {
87 switch (msr) {
88 case MSR_AMD_K7_PERF_EVTSEL0:
89 case MSR_AMD_K7_PERF_EVTSEL1:
90 case MSR_AMD_K7_PERF_EVTSEL2:
91 case MSR_AMD_K7_PERF_EVTSEL3:
92 case MSR_AMD_F15H_PERF_EVTSEL0:
93 case MSR_AMD_F15H_PERF_EVTSEL1:
94 case MSR_AMD_F15H_PERF_EVTSEL2:
95 case MSR_AMD_F15H_PERF_EVTSEL3:
96 case MSR_AMD_F15H_PERF_EVTSEL4:
97 case MSR_AMD_F15H_PERF_EVTSEL5:
98 return (true);
99 default:
100 return (false);
101 }
102 }
103
104 static bool
svm_pmu_is_ctr_msr(uint32_t msr)105 svm_pmu_is_ctr_msr(uint32_t msr)
106 {
107 switch (msr) {
108 case MSR_AMD_K7_PERF_CTR0:
109 case MSR_AMD_K7_PERF_CTR1:
110 case MSR_AMD_K7_PERF_CTR2:
111 case MSR_AMD_K7_PERF_CTR3:
112 case MSR_AMD_F15H_PERF_CTR0:
113 case MSR_AMD_F15H_PERF_CTR1:
114 case MSR_AMD_F15H_PERF_CTR2:
115 case MSR_AMD_F15H_PERF_CTR3:
116 case MSR_AMD_F15H_PERF_CTR4:
117 case MSR_AMD_F15H_PERF_CTR5:
118 return (true);
119 default:
120 return (false);
121 }
122 }
123
124 static uint_t
svm_pmu_msr_to_idx(uint32_t msr)125 svm_pmu_msr_to_idx(uint32_t msr)
126 {
127 switch (msr) {
128 case MSR_AMD_K7_PERF_EVTSEL0:
129 case MSR_AMD_K7_PERF_EVTSEL1:
130 case MSR_AMD_K7_PERF_EVTSEL2:
131 case MSR_AMD_K7_PERF_EVTSEL3:
132 return (msr - MSR_AMD_K7_PERF_EVTSEL0);
133 case MSR_AMD_K7_PERF_CTR0:
134 case MSR_AMD_K7_PERF_CTR1:
135 case MSR_AMD_K7_PERF_CTR2:
136 case MSR_AMD_K7_PERF_CTR3:
137 return (msr - MSR_AMD_K7_PERF_CTR0);
138 case MSR_AMD_F15H_PERF_EVTSEL0:
139 case MSR_AMD_F15H_PERF_EVTSEL1:
140 case MSR_AMD_F15H_PERF_EVTSEL2:
141 case MSR_AMD_F15H_PERF_EVTSEL3:
142 case MSR_AMD_F15H_PERF_EVTSEL4:
143 case MSR_AMD_F15H_PERF_EVTSEL5:
144 return ((msr - MSR_AMD_F15H_PERF_EVTSEL0) / 2);
145 case MSR_AMD_F15H_PERF_CTR0:
146 case MSR_AMD_F15H_PERF_CTR1:
147 case MSR_AMD_F15H_PERF_CTR2:
148 case MSR_AMD_F15H_PERF_CTR3:
149 case MSR_AMD_F15H_PERF_CTR4:
150 case MSR_AMD_F15H_PERF_CTR5:
151 return ((msr - MSR_AMD_F15H_PERF_CTR0) / 2);
152 default:
153 panic("unexpected perf. counter MSR: %X", msr);
154 }
155 }
156
157 bool
svm_pmu_owned_msr(uint32_t msr)158 svm_pmu_owned_msr(uint32_t msr)
159 {
160 return (svm_pmu_is_evt_msr(msr) || svm_pmu_is_ctr_msr(msr));
161 }
162
163 /*
164 * Is guest access to a given evtsel allowed for the "flavor" of the PMU?
165 *
166 * Initial access is fairly limited, providing access to only the evtsels
167 * expected to be used by Linux `perf stat`.
168 */
169 static bool
svm_pmu_evtsel_allowed(uint64_t evtsel,svm_pmu_flavor_t flavor)170 svm_pmu_evtsel_allowed(uint64_t evtsel, svm_pmu_flavor_t flavor)
171 {
172 const uint64_t evt = evtsel & AMD_PERF_EVTSEL_EVT_MASK;
173 const uint16_t umask = evtsel & AMD_PERF_EVTSEL_UNIT_MASK;
174
175 /*
176 * Some of the perf counters have stayed fairly consistent in their
177 * identifiers throughout the AMD product line.
178 */
179 switch (evt) {
180 case 0x76: /* CPU cycles */
181 case 0xc0: /* Retired instructions */
182 case 0xc2: /* Branch instructions */
183 case 0xc3: /* Branch misses */
184 return (true);
185 default:
186 break;
187 }
188
189 if (flavor == SPF_PRE_ZEN) {
190 switch (evt) {
191 case 0x7d: /* Cache hits */
192 case 0x7e: /* Cache misses */
193 return (true);
194 default:
195 return (false);
196 }
197 } else if (flavor == SPF_ZEN1) {
198 switch (evt) {
199 case 0x60: /* L2 accesses (group 1) */
200 case 0x64: /* Core to L2 access status */
201 return (true);
202 case 0x87: /* IC fetch stall */
203 switch (umask) {
204 case 0x0100: /* backend */
205 case 0x0200: /* frontend */
206 return (true);
207 default:
208 return (false);
209 }
210 default:
211 return (false);
212 }
213 } else if (flavor == SPF_ZEN2) {
214 switch (evt) {
215 case 0x60: /* L2 accesses (group 1) */
216 case 0x64: /* Core to L2 access status */
217 case 0xa9: /* u-op queue empty (frontend stall) */
218 return (true);
219 default:
220 return (false);
221 }
222 }
223
224 return (false);
225 }
226
227 vm_msr_result_t
svm_pmu_rdmsr(struct svm_softc * svm_sc,int vcpu,uint32_t msr,uint64_t * valp)228 svm_pmu_rdmsr(struct svm_softc *svm_sc, int vcpu, uint32_t msr, uint64_t *valp)
229 {
230 ASSERT(svm_pmu_owned_msr(msr));
231
232 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu);
233
234 if (!svm_pmu_is_active(pmu)) {
235 return (VMR_UNHANLDED);
236 }
237
238 if (svm_pmu_is_evt_msr(msr)) {
239 const uint_t idx = svm_pmu_msr_to_idx(msr);
240
241 *valp = pmu->spv_evtsel_shadow[idx];
242 } else if (svm_pmu_is_ctr_msr(msr)) {
243 const uint_t idx = svm_pmu_msr_to_idx(msr);
244
245 *valp = pmu->spv_hma_state.hscs_regs[idx].hc_ctr;
246 } else {
247 /* UNREACHABLE */
248 return (VMR_UNHANLDED);
249 }
250
251 return (VMR_OK);
252 }
253
254 vm_msr_result_t
svm_pmu_wrmsr(struct svm_softc * svm_sc,int vcpu,uint32_t msr,uint64_t val)255 svm_pmu_wrmsr(struct svm_softc *svm_sc, int vcpu, uint32_t msr, uint64_t val)
256 {
257 ASSERT(svm_pmu_owned_msr(msr));
258
259 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu);
260 const svm_pmu_flavor_t flavor = svm_sc->pmu_flavor;
261
262 if (!svm_pmu_is_active(pmu)) {
263 return (VMR_UNHANLDED);
264 }
265
266 if (svm_pmu_is_evt_msr(msr)) {
267 const uint_t idx = svm_pmu_msr_to_idx(msr);
268
269 /*
270 * Keep the unmodified evtsel shadowed, should the guest choose
271 * to read it out later.
272 *
273 * XXX: Should we balk at reserved bits being set?
274 */
275 pmu->spv_evtsel_shadow[idx] = val;
276
277 if (!svm_pmu_evtsel_allowed(val, flavor)) {
278 /*
279 * Disable any counters which have been configured with
280 * an event selector which we do not allow access to.
281 */
282 val = 0;
283 }
284 pmu->spv_hma_state.hscs_regs[idx].hc_evtsel = val;
285 } else if (svm_pmu_is_ctr_msr(msr)) {
286 const uint_t idx = svm_pmu_msr_to_idx(msr);
287
288 pmu->spv_hma_state.hscs_regs[idx].hc_ctr = val;
289 } else {
290 /* UNREACHABLE */
291 return (VMR_UNHANLDED);
292 }
293
294 return (VMR_OK);
295 }
296
297 bool
svm_pmu_rdpmc(struct svm_softc * svm_sc,int vcpu,uint32_t ecx,uint64_t * valp)298 svm_pmu_rdpmc(struct svm_softc *svm_sc, int vcpu, uint32_t ecx, uint64_t *valp)
299 {
300 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu);
301
302 if (!svm_pmu_is_active(pmu)) {
303 return (false);
304 }
305 if (ecx >= SVM_PMU_MAX_COUNTERS) {
306 return (false);
307 }
308
309 *valp = pmu->spv_hma_state.hscs_regs[ecx].hc_ctr;
310 return (true);
311 }
312
313 /*
314 * Attempt to load guest PMU state, if the guest vCPU happens to be actively
315 * using any counters. Host state will be saved if such loading occurs.
316 *
317 * The results of any state loading may require adjustment of guest intercepts
318 * and thus demands a call to svm_apply_dirty() prior to VM entry.
319 */
320 void
svm_pmu_enter(struct svm_softc * svm_sc,int vcpu)321 svm_pmu_enter(struct svm_softc *svm_sc, int vcpu)
322 {
323 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu);
324
325 if (!svm_pmu_is_active(pmu)) {
326 return;
327 }
328
329 hma_svm_cpc_res_t entry = hma_svm_cpc_enter(&pmu->spv_hma_state);
330
331 /*
332 * Until per-vCPU MSR bitmaps are available, ignore ability to expose
333 * direct guest access to counter MSRs
334 */
335 entry &= ~HSCR_ACCESS_CTR_MSR;
336
337 if (entry != pmu->spv_last_entry) {
338 /* Update intercepts to match what is allowed per HMA. */
339 if (entry & HSCR_ACCESS_RDPMC && svm_pmu_force_exit == 0) {
340 svm_disable_intercept(svm_sc, vcpu, VMCB_CTRL1_INTCPT,
341 VMCB_INTCPT_RDPMC);
342 } else {
343 svm_enable_intercept(svm_sc, vcpu, VMCB_CTRL1_INTCPT,
344 VMCB_INTCPT_RDPMC);
345 }
346 }
347 pmu->spv_last_entry = entry;
348 }
349
350 /*
351 * If guest PMU state is active, save it, and restore the host state.
352 */
353 void
svm_pmu_exit(struct svm_softc * svm_sc,int vcpu)354 svm_pmu_exit(struct svm_softc *svm_sc, int vcpu)
355 {
356 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu);
357
358 if (!svm_pmu_is_active(pmu)) {
359 return;
360 }
361
362 hma_svm_cpc_exit(&pmu->spv_hma_state);
363 }
364
365 static int
svm_pmu_data_read(struct vm * vm,int vcpuid,const vmm_data_req_t * req)366 svm_pmu_data_read(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
367 {
368 VERIFY3U(req->vdr_class, ==, VDC_PMU_AMD);
369 VERIFY3U(req->vdr_version, ==, 1);
370 VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_pmu_amd_v1));
371
372 struct svm_softc *svm_sc = vm_get_cookie(vm);
373 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpuid);
374 struct vdi_pmu_amd_v1 *out = req->vdr_data;
375
376 if (!svm_pmu_is_active(pmu)) {
377 bzero(out, sizeof (out));
378 return (0);
379 }
380
381 for (uint_t i = 0; i < SVM_PMU_MAX_COUNTERS; i++) {
382 out->vpa_evtsel[i] = pmu->spv_evtsel_shadow[i];
383 out->vpa_ctr[i] = pmu->spv_hma_state.hscs_regs[i].hc_ctr;
384 }
385 return (0);
386 }
387
388 static int
svm_pmu_data_write(struct vm * vm,int vcpuid,const vmm_data_req_t * req)389 svm_pmu_data_write(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
390 {
391 VERIFY3U(req->vdr_class, ==, VDC_PMU_AMD);
392 VERIFY3U(req->vdr_version, ==, 1);
393 VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_pmu_amd_v1));
394
395 struct svm_softc *svm_sc = vm_get_cookie(vm);
396 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpuid);
397 const struct vdi_pmu_amd_v1 *src = req->vdr_data;
398
399 if (!svm_pmu_is_active(pmu)) {
400 /*
401 * Skip importing state for an inactive PMU.
402 *
403 * It might be appropriate to return an error here, but it's not
404 * clear what would be most appropriate (or what userspace would
405 * do in such a case).
406 */
407 return (0);
408 }
409
410 const svm_pmu_flavor_t flavor = svm_sc->pmu_flavor;
411 for (uint_t i = 0; i < SVM_PMU_MAX_COUNTERS; i++) {
412 const uint64_t evtsel = src->vpa_evtsel[i];
413
414 /*
415 * Shadow evtsel is kept as-is, but the "active" value undergoes
416 * same verification as guest WRMSR.
417 */
418 pmu->spv_evtsel_shadow[i] = evtsel;
419 if (svm_pmu_evtsel_allowed(evtsel, flavor)) {
420 pmu->spv_hma_state.hscs_regs[i].hc_evtsel = evtsel;
421 } else {
422 pmu->spv_hma_state.hscs_regs[i].hc_evtsel = 0;
423 }
424 pmu->spv_hma_state.hscs_regs[i].hc_ctr = src->vpa_ctr[i];
425 }
426 return (0);
427 }
428
429 static const vmm_data_version_entry_t pmu_amd_v1 = {
430 .vdve_class = VDC_PMU_AMD,
431 .vdve_version = 1,
432 .vdve_len_expect = sizeof (struct vdi_pmu_amd_v1),
433 .vdve_vcpu_readf = svm_pmu_data_read,
434 .vdve_vcpu_writef = svm_pmu_data_write,
435 };
436 VMM_DATA_VERSION(pmu_amd_v1);
437