1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2025 Oxide Computer Company
14 */
15
16 #include <sys/kernel.h>
17 #include <sys/sysmacros.h>
18 #include <sys/cmn_err.h>
19 #include <sys/cpuvar.h>
20 #include <sys/systm.h>
21 #include <sys/x86_archext.h>
22
23 #include <sys/vmm_kernel.h>
24 #include "svm.h"
25 #include "svm_softc.h"
26 #include "svm_pmu.h"
27
28 /*
29 * Allow guests to use perf counter resources.
30 */
31 int svm_pmu_enabled = 1;
32
33 /*
34 * Force guest exits (preclude disabling intercepts) access to perf counter
35 * resources via RDPMC and RDMSR/WRMSR.
36 */
37 int svm_pmu_force_exit = 0;
38
39 void
svm_pmu_init(struct svm_softc * svm_sc)40 svm_pmu_init(struct svm_softc *svm_sc)
41 {
42 if (!is_x86_feature(x86_featureset, X86FSET_AMD_PCEC) ||
43 svm_pmu_enabled == 0) {
44 svm_sc->pmu_flavor = SPF_NONE;
45 return;
46 }
47
48 switch (uarchrev_uarch(cpuid_getuarchrev(CPU))) {
49 case X86_UARCH_AMD_LEGACY:
50 svm_sc->pmu_flavor = SPF_PRE_ZEN;
51 break;
52 case X86_UARCH_AMD_ZEN1:
53 case X86_UARCH_AMD_ZENPLUS:
54 svm_sc->pmu_flavor = SPF_ZEN1;
55 break;
56 case X86_UARCH_AMD_ZEN2:
57 case X86_UARCH_AMD_ZEN3:
58 case X86_UARCH_AMD_ZEN4:
59 case X86_UARCH_AMD_ZEN5:
60 svm_sc->pmu_flavor = SPF_ZEN2;
61 break;
62 default:
63 /* Exclude unrecognized uarch from perf counter access */
64 svm_sc->pmu_flavor = SPF_NONE;
65 return;
66 }
67
68 /* Turn on base and extended CPCs for all vCPUs */
69 const uint_t maxcpu = vm_get_maxcpus(svm_sc->vm);
70 for (uint_t i = 0; i < maxcpu; i++) {
71 struct svm_pmu_vcpu *pmu_vcpu = svm_get_pmu(svm_sc, i);
72
73 pmu_vcpu->spv_hma_state.hscs_flags = HCF_EN_BASE | HCF_EN_EXTD;
74 }
75 }
76
77 static bool
svm_pmu_is_active(const struct svm_pmu_vcpu * pmu)78 svm_pmu_is_active(const struct svm_pmu_vcpu *pmu)
79 {
80 return (pmu->spv_hma_state.hscs_flags != HCF_DISABLED);
81 }
82
83 static bool
svm_pmu_is_evt_msr(uint32_t msr)84 svm_pmu_is_evt_msr(uint32_t msr)
85 {
86 switch (msr) {
87 case MSR_AMD_K7_PERF_EVTSEL0:
88 case MSR_AMD_K7_PERF_EVTSEL1:
89 case MSR_AMD_K7_PERF_EVTSEL2:
90 case MSR_AMD_K7_PERF_EVTSEL3:
91 case MSR_AMD_F15H_PERF_EVTSEL0:
92 case MSR_AMD_F15H_PERF_EVTSEL1:
93 case MSR_AMD_F15H_PERF_EVTSEL2:
94 case MSR_AMD_F15H_PERF_EVTSEL3:
95 case MSR_AMD_F15H_PERF_EVTSEL4:
96 case MSR_AMD_F15H_PERF_EVTSEL5:
97 return (true);
98 default:
99 return (false);
100 }
101 }
102
103 static bool
svm_pmu_is_ctr_msr(uint32_t msr)104 svm_pmu_is_ctr_msr(uint32_t msr)
105 {
106 switch (msr) {
107 case MSR_AMD_K7_PERF_CTR0:
108 case MSR_AMD_K7_PERF_CTR1:
109 case MSR_AMD_K7_PERF_CTR2:
110 case MSR_AMD_K7_PERF_CTR3:
111 case MSR_AMD_F15H_PERF_CTR0:
112 case MSR_AMD_F15H_PERF_CTR1:
113 case MSR_AMD_F15H_PERF_CTR2:
114 case MSR_AMD_F15H_PERF_CTR3:
115 case MSR_AMD_F15H_PERF_CTR4:
116 case MSR_AMD_F15H_PERF_CTR5:
117 return (true);
118 default:
119 return (false);
120 }
121 }
122
123 static uint_t
svm_pmu_msr_to_idx(uint32_t msr)124 svm_pmu_msr_to_idx(uint32_t msr)
125 {
126 switch (msr) {
127 case MSR_AMD_K7_PERF_EVTSEL0:
128 case MSR_AMD_K7_PERF_EVTSEL1:
129 case MSR_AMD_K7_PERF_EVTSEL2:
130 case MSR_AMD_K7_PERF_EVTSEL3:
131 return (msr - MSR_AMD_K7_PERF_EVTSEL0);
132 case MSR_AMD_K7_PERF_CTR0:
133 case MSR_AMD_K7_PERF_CTR1:
134 case MSR_AMD_K7_PERF_CTR2:
135 case MSR_AMD_K7_PERF_CTR3:
136 return (msr - MSR_AMD_K7_PERF_CTR0);
137 case MSR_AMD_F15H_PERF_EVTSEL0:
138 case MSR_AMD_F15H_PERF_EVTSEL1:
139 case MSR_AMD_F15H_PERF_EVTSEL2:
140 case MSR_AMD_F15H_PERF_EVTSEL3:
141 case MSR_AMD_F15H_PERF_EVTSEL4:
142 case MSR_AMD_F15H_PERF_EVTSEL5:
143 return ((msr - MSR_AMD_F15H_PERF_EVTSEL0) / 2);
144 case MSR_AMD_F15H_PERF_CTR0:
145 case MSR_AMD_F15H_PERF_CTR1:
146 case MSR_AMD_F15H_PERF_CTR2:
147 case MSR_AMD_F15H_PERF_CTR3:
148 case MSR_AMD_F15H_PERF_CTR4:
149 case MSR_AMD_F15H_PERF_CTR5:
150 return ((msr - MSR_AMD_F15H_PERF_CTR0) / 2);
151 default:
152 panic("unexpected perf. counter MSR: %X", msr);
153 }
154 }
155
156 bool
svm_pmu_owned_msr(uint32_t msr)157 svm_pmu_owned_msr(uint32_t msr)
158 {
159 return (svm_pmu_is_evt_msr(msr) || svm_pmu_is_ctr_msr(msr));
160 }
161
162 /*
163 * Is guest access to a given evtsel allowed for the "flavor" of the PMU?
164 *
165 * Initial access is fairly limited, providing access to only the evtsels
166 * expected to be used by Linux `perf stat`.
167 */
168 static bool
svm_pmu_evtsel_allowed(uint64_t evtsel,svm_pmu_flavor_t flavor)169 svm_pmu_evtsel_allowed(uint64_t evtsel, svm_pmu_flavor_t flavor)
170 {
171 const uint64_t evt = evtsel & AMD_PERF_EVTSEL_EVT_MASK;
172 const uint16_t umask = evtsel & AMD_PERF_EVTSEL_UNIT_MASK;
173
174 /*
175 * Some of the perf counters have stayed fairly consistent in their
176 * identifiers throughout the AMD product line.
177 */
178 switch (evt) {
179 case 0x76: /* CPU cycles */
180 case 0xc0: /* Retired instructions */
181 case 0xc2: /* Branch instructions */
182 case 0xc3: /* Branch misses */
183 return (true);
184 default:
185 break;
186 }
187
188 if (flavor == SPF_PRE_ZEN) {
189 switch (evt) {
190 case 0x7d: /* Cache hits */
191 case 0x7e: /* Cache misses */
192 return (true);
193 default:
194 return (false);
195 }
196 } else if (flavor == SPF_ZEN1) {
197 switch (evt) {
198 case 0x60: /* L2 accesses (group 1) */
199 case 0x64: /* Core to L2 access status */
200 return (true);
201 case 0x87: /* IC fetch stall */
202 switch (umask) {
203 case 0x0100: /* backend */
204 case 0x0200: /* frontend */
205 return (true);
206 default:
207 return (false);
208 }
209 default:
210 return (false);
211 }
212 } else if (flavor == SPF_ZEN2) {
213 switch (evt) {
214 case 0x60: /* L2 accesses (group 1) */
215 case 0x64: /* Core to L2 access status */
216 case 0xa9: /* u-op queue empty (frontend stall) */
217 return (true);
218 default:
219 return (false);
220 }
221 }
222
223 return (false);
224 }
225
226 vm_msr_result_t
svm_pmu_rdmsr(struct svm_softc * svm_sc,int vcpu,uint32_t msr,uint64_t * valp)227 svm_pmu_rdmsr(struct svm_softc *svm_sc, int vcpu, uint32_t msr, uint64_t *valp)
228 {
229 ASSERT(svm_pmu_owned_msr(msr));
230
231 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu);
232
233 if (!svm_pmu_is_active(pmu)) {
234 return (VMR_UNHANLDED);
235 }
236
237 if (svm_pmu_is_evt_msr(msr)) {
238 const uint_t idx = svm_pmu_msr_to_idx(msr);
239
240 *valp = pmu->spv_evtsel_shadow[idx];
241 } else if (svm_pmu_is_ctr_msr(msr)) {
242 const uint_t idx = svm_pmu_msr_to_idx(msr);
243
244 *valp = pmu->spv_hma_state.hscs_regs[idx].hc_ctr;
245 } else {
246 /* UNREACHABLE */
247 return (VMR_UNHANLDED);
248 }
249
250 return (VMR_OK);
251 }
252
253 vm_msr_result_t
svm_pmu_wrmsr(struct svm_softc * svm_sc,int vcpu,uint32_t msr,uint64_t val)254 svm_pmu_wrmsr(struct svm_softc *svm_sc, int vcpu, uint32_t msr, uint64_t val)
255 {
256 ASSERT(svm_pmu_owned_msr(msr));
257
258 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu);
259 const svm_pmu_flavor_t flavor = svm_sc->pmu_flavor;
260
261 if (!svm_pmu_is_active(pmu)) {
262 return (VMR_UNHANLDED);
263 }
264
265 if (svm_pmu_is_evt_msr(msr)) {
266 const uint_t idx = svm_pmu_msr_to_idx(msr);
267
268 /*
269 * Keep the unmodified evtsel shadowed, should the guest choose
270 * to read it out later.
271 *
272 * XXX: Should we balk at reserved bits being set?
273 */
274 pmu->spv_evtsel_shadow[idx] = val;
275
276 if (!svm_pmu_evtsel_allowed(val, flavor)) {
277 /*
278 * Disable any counters which have been configured with
279 * an event selector which we do not allow access to.
280 */
281 val = 0;
282 }
283 pmu->spv_hma_state.hscs_regs[idx].hc_evtsel = val;
284 } else if (svm_pmu_is_ctr_msr(msr)) {
285 const uint_t idx = svm_pmu_msr_to_idx(msr);
286
287 pmu->spv_hma_state.hscs_regs[idx].hc_ctr = val;
288 } else {
289 /* UNREACHABLE */
290 return (VMR_UNHANLDED);
291 }
292
293 return (VMR_OK);
294 }
295
296 bool
svm_pmu_rdpmc(struct svm_softc * svm_sc,int vcpu,uint32_t ecx,uint64_t * valp)297 svm_pmu_rdpmc(struct svm_softc *svm_sc, int vcpu, uint32_t ecx, uint64_t *valp)
298 {
299 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu);
300
301 if (!svm_pmu_is_active(pmu)) {
302 return (false);
303 }
304 if (ecx >= SVM_PMU_MAX_COUNTERS) {
305 return (false);
306 }
307
308 *valp = pmu->spv_hma_state.hscs_regs[ecx].hc_ctr;
309 return (true);
310 }
311
312 /*
313 * Attempt to load guest PMU state, if the guest vCPU happens to be actively
314 * using any counters. Host state will be saved if such loading occurs.
315 *
316 * The results of any state loading may require adjustment of guest intercepts
317 * and thus demands a call to svm_apply_dirty() prior to VM entry.
318 */
319 void
svm_pmu_enter(struct svm_softc * svm_sc,int vcpu)320 svm_pmu_enter(struct svm_softc *svm_sc, int vcpu)
321 {
322 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu);
323
324 if (!svm_pmu_is_active(pmu)) {
325 return;
326 }
327
328 hma_svm_cpc_res_t entry = hma_svm_cpc_enter(&pmu->spv_hma_state);
329
330 /*
331 * Until per-vCPU MSR bitmaps are available, ignore ability to expose
332 * direct guest access to counter MSRs
333 */
334 entry &= ~HSCR_ACCESS_CTR_MSR;
335
336 if (entry != pmu->spv_last_entry) {
337 /* Update intercepts to match what is allowed per HMA. */
338 if (entry & HSCR_ACCESS_RDPMC && svm_pmu_force_exit == 0) {
339 svm_disable_intercept(svm_sc, vcpu, VMCB_CTRL1_INTCPT,
340 VMCB_INTCPT_RDPMC);
341 } else {
342 svm_enable_intercept(svm_sc, vcpu, VMCB_CTRL1_INTCPT,
343 VMCB_INTCPT_RDPMC);
344 }
345 }
346 pmu->spv_last_entry = entry;
347 }
348
349 /*
350 * If guest PMU state is active, save it, and restore the host state.
351 */
352 void
svm_pmu_exit(struct svm_softc * svm_sc,int vcpu)353 svm_pmu_exit(struct svm_softc *svm_sc, int vcpu)
354 {
355 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu);
356
357 if (!svm_pmu_is_active(pmu)) {
358 return;
359 }
360
361 hma_svm_cpc_exit(&pmu->spv_hma_state);
362 }
363
364 static int
svm_pmu_data_read(struct vm * vm,int vcpuid,const vmm_data_req_t * req)365 svm_pmu_data_read(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
366 {
367 VERIFY3U(req->vdr_class, ==, VDC_PMU_AMD);
368 VERIFY3U(req->vdr_version, ==, 1);
369 VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_pmu_amd_v1));
370
371 struct svm_softc *svm_sc = vm_get_cookie(vm);
372 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpuid);
373 struct vdi_pmu_amd_v1 *out = req->vdr_data;
374
375 if (!svm_pmu_is_active(pmu)) {
376 bzero(out, sizeof (out));
377 return (0);
378 }
379
380 for (uint_t i = 0; i < SVM_PMU_MAX_COUNTERS; i++) {
381 out->vpa_evtsel[i] = pmu->spv_evtsel_shadow[i];
382 out->vpa_ctr[i] = pmu->spv_hma_state.hscs_regs[i].hc_ctr;
383 }
384 return (0);
385 }
386
387 static int
svm_pmu_data_write(struct vm * vm,int vcpuid,const vmm_data_req_t * req)388 svm_pmu_data_write(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
389 {
390 VERIFY3U(req->vdr_class, ==, VDC_PMU_AMD);
391 VERIFY3U(req->vdr_version, ==, 1);
392 VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_pmu_amd_v1));
393
394 struct svm_softc *svm_sc = vm_get_cookie(vm);
395 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpuid);
396 const struct vdi_pmu_amd_v1 *src = req->vdr_data;
397
398 if (!svm_pmu_is_active(pmu)) {
399 /*
400 * Skip importing state for an inactive PMU.
401 *
402 * It might be appropriate to return an error here, but it's not
403 * clear what would be most appropriate (or what userspace would
404 * do in such a case).
405 */
406 return (0);
407 }
408
409 const svm_pmu_flavor_t flavor = svm_sc->pmu_flavor;
410 for (uint_t i = 0; i < SVM_PMU_MAX_COUNTERS; i++) {
411 const uint64_t evtsel = src->vpa_evtsel[i];
412
413 /*
414 * Shadow evtsel is kept as-is, but the "active" value undergoes
415 * same verification as guest WRMSR.
416 */
417 pmu->spv_evtsel_shadow[i] = evtsel;
418 if (svm_pmu_evtsel_allowed(evtsel, flavor)) {
419 pmu->spv_hma_state.hscs_regs[i].hc_evtsel = evtsel;
420 } else {
421 pmu->spv_hma_state.hscs_regs[i].hc_evtsel = 0;
422 }
423 pmu->spv_hma_state.hscs_regs[i].hc_ctr = src->vpa_ctr[i];
424 }
425 return (0);
426 }
427
428 static const vmm_data_version_entry_t pmu_amd_v1 = {
429 .vdve_class = VDC_PMU_AMD,
430 .vdve_version = 1,
431 .vdve_len_expect = sizeof (struct vdi_pmu_amd_v1),
432 .vdve_vcpu_readf = svm_pmu_data_read,
433 .vdve_vcpu_writef = svm_pmu_data_write,
434 };
435 VMM_DATA_VERSION(pmu_amd_v1);
436