xref: /illumos-gate/usr/src/uts/intel/io/vmm/amd/svm_pmu.c (revision fdad6fbf87b201fdb96a704fc41fa8be1e4efbc8)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 /* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */
12 
13 /*
14  * Copyright 2025 Oxide Computer Company
15  */
16 
17 #include <sys/kernel.h>
18 #include <sys/sysmacros.h>
19 #include <sys/cmn_err.h>
20 #include <sys/cpuvar.h>
21 #include <sys/systm.h>
22 #include <sys/x86_archext.h>
23 
24 #include <sys/vmm_kernel.h>
25 #include "svm.h"
26 #include "svm_softc.h"
27 #include "svm_pmu.h"
28 
29 /*
30  * Allow guests to use perf counter resources.
31  */
32 int svm_pmu_enabled = 1;
33 
34 /*
35  * Force guest exits (preclude disabling intercepts) access to perf counter
36  * resources via RDPMC and RDMSR/WRMSR.
37  */
38 int svm_pmu_force_exit = 0;
39 
40 void
svm_pmu_init(struct svm_softc * svm_sc)41 svm_pmu_init(struct svm_softc *svm_sc)
42 {
43 	if (!is_x86_feature(x86_featureset, X86FSET_AMD_PCEC) ||
44 	    svm_pmu_enabled == 0) {
45 		svm_sc->pmu_flavor = SPF_NONE;
46 		return;
47 	}
48 
49 	switch (uarchrev_uarch(cpuid_getuarchrev(CPU))) {
50 	case X86_UARCH_AMD_LEGACY:
51 		svm_sc->pmu_flavor = SPF_PRE_ZEN;
52 		break;
53 	case X86_UARCH_AMD_ZEN1:
54 	case X86_UARCH_AMD_ZENPLUS:
55 		svm_sc->pmu_flavor = SPF_ZEN1;
56 		break;
57 	case X86_UARCH_AMD_ZEN2:
58 	case X86_UARCH_AMD_ZEN3:
59 	case X86_UARCH_AMD_ZEN4:
60 	case X86_UARCH_AMD_ZEN5:
61 		svm_sc->pmu_flavor = SPF_ZEN2;
62 		break;
63 	default:
64 		/* Exclude unrecognized uarch from perf counter access */
65 		svm_sc->pmu_flavor = SPF_NONE;
66 		return;
67 	}
68 
69 	/* Turn on base and extended CPCs for all vCPUs */
70 	const uint_t maxcpu = vm_get_maxcpus(svm_sc->vm);
71 	for (uint_t i = 0; i < maxcpu; i++) {
72 		struct svm_pmu_vcpu *pmu_vcpu = svm_get_pmu(svm_sc, i);
73 
74 		pmu_vcpu->spv_hma_state.hscs_flags = HCF_EN_BASE | HCF_EN_EXTD;
75 	}
76 }
77 
78 static bool
svm_pmu_is_active(const struct svm_pmu_vcpu * pmu)79 svm_pmu_is_active(const struct svm_pmu_vcpu *pmu)
80 {
81 	return (pmu->spv_hma_state.hscs_flags != HCF_DISABLED);
82 }
83 
84 static bool
svm_pmu_is_evt_msr(uint32_t msr)85 svm_pmu_is_evt_msr(uint32_t msr)
86 {
87 	switch (msr) {
88 	case MSR_AMD_K7_PERF_EVTSEL0:
89 	case MSR_AMD_K7_PERF_EVTSEL1:
90 	case MSR_AMD_K7_PERF_EVTSEL2:
91 	case MSR_AMD_K7_PERF_EVTSEL3:
92 	case MSR_AMD_F15H_PERF_EVTSEL0:
93 	case MSR_AMD_F15H_PERF_EVTSEL1:
94 	case MSR_AMD_F15H_PERF_EVTSEL2:
95 	case MSR_AMD_F15H_PERF_EVTSEL3:
96 	case MSR_AMD_F15H_PERF_EVTSEL4:
97 	case MSR_AMD_F15H_PERF_EVTSEL5:
98 		return (true);
99 	default:
100 		return (false);
101 	}
102 }
103 
104 static bool
svm_pmu_is_ctr_msr(uint32_t msr)105 svm_pmu_is_ctr_msr(uint32_t msr)
106 {
107 	switch (msr) {
108 	case MSR_AMD_K7_PERF_CTR0:
109 	case MSR_AMD_K7_PERF_CTR1:
110 	case MSR_AMD_K7_PERF_CTR2:
111 	case MSR_AMD_K7_PERF_CTR3:
112 	case MSR_AMD_F15H_PERF_CTR0:
113 	case MSR_AMD_F15H_PERF_CTR1:
114 	case MSR_AMD_F15H_PERF_CTR2:
115 	case MSR_AMD_F15H_PERF_CTR3:
116 	case MSR_AMD_F15H_PERF_CTR4:
117 	case MSR_AMD_F15H_PERF_CTR5:
118 		return (true);
119 	default:
120 		return (false);
121 	}
122 }
123 
124 static uint_t
svm_pmu_msr_to_idx(uint32_t msr)125 svm_pmu_msr_to_idx(uint32_t msr)
126 {
127 	switch (msr) {
128 	case MSR_AMD_K7_PERF_EVTSEL0:
129 	case MSR_AMD_K7_PERF_EVTSEL1:
130 	case MSR_AMD_K7_PERF_EVTSEL2:
131 	case MSR_AMD_K7_PERF_EVTSEL3:
132 		return (msr - MSR_AMD_K7_PERF_EVTSEL0);
133 	case MSR_AMD_K7_PERF_CTR0:
134 	case MSR_AMD_K7_PERF_CTR1:
135 	case MSR_AMD_K7_PERF_CTR2:
136 	case MSR_AMD_K7_PERF_CTR3:
137 		return (msr - MSR_AMD_K7_PERF_CTR0);
138 	case MSR_AMD_F15H_PERF_EVTSEL0:
139 	case MSR_AMD_F15H_PERF_EVTSEL1:
140 	case MSR_AMD_F15H_PERF_EVTSEL2:
141 	case MSR_AMD_F15H_PERF_EVTSEL3:
142 	case MSR_AMD_F15H_PERF_EVTSEL4:
143 	case MSR_AMD_F15H_PERF_EVTSEL5:
144 		return ((msr - MSR_AMD_F15H_PERF_EVTSEL0) / 2);
145 	case MSR_AMD_F15H_PERF_CTR0:
146 	case MSR_AMD_F15H_PERF_CTR1:
147 	case MSR_AMD_F15H_PERF_CTR2:
148 	case MSR_AMD_F15H_PERF_CTR3:
149 	case MSR_AMD_F15H_PERF_CTR4:
150 	case MSR_AMD_F15H_PERF_CTR5:
151 		return ((msr - MSR_AMD_F15H_PERF_CTR0) / 2);
152 	default:
153 		panic("unexpected perf. counter MSR: %X", msr);
154 	}
155 }
156 
157 bool
svm_pmu_owned_msr(uint32_t msr)158 svm_pmu_owned_msr(uint32_t msr)
159 {
160 	return (svm_pmu_is_evt_msr(msr) || svm_pmu_is_ctr_msr(msr));
161 }
162 
163 /*
164  * Is guest access to a given evtsel allowed for the "flavor" of the PMU?
165  *
166  * Initial access is fairly limited, providing access to only the evtsels
167  * expected to be used by Linux `perf stat`.
168  */
169 static bool
svm_pmu_evtsel_allowed(uint64_t evtsel,svm_pmu_flavor_t flavor)170 svm_pmu_evtsel_allowed(uint64_t evtsel, svm_pmu_flavor_t flavor)
171 {
172 	const uint64_t evt = evtsel & AMD_PERF_EVTSEL_EVT_MASK;
173 	const uint16_t umask = evtsel & AMD_PERF_EVTSEL_UNIT_MASK;
174 
175 	/*
176 	 * Some of the perf counters have stayed fairly consistent in their
177 	 * identifiers throughout the AMD product line.
178 	 */
179 	switch (evt) {
180 	case 0x76:	/* CPU cycles */
181 	case 0xc0:	/* Retired instructions */
182 	case 0xc2:	/* Branch instructions */
183 	case 0xc3:	/* Branch misses */
184 		return (true);
185 	default:
186 		break;
187 	}
188 
189 	if (flavor == SPF_PRE_ZEN) {
190 		switch (evt) {
191 		case 0x7d: /* Cache hits */
192 		case 0x7e: /* Cache misses */
193 			return (true);
194 		default:
195 			return (false);
196 		}
197 	} else if (flavor == SPF_ZEN1) {
198 		switch (evt) {
199 		case 0x60: /* L2 accesses (group 1) */
200 		case 0x64: /* Core to L2 access status */
201 			return (true);
202 		case 0x87: /* IC fetch stall */
203 			switch (umask) {
204 			case 0x0100: /* backend */
205 			case 0x0200: /* frontend */
206 				return (true);
207 			default:
208 				return (false);
209 			}
210 		default:
211 			return (false);
212 		}
213 	} else if (flavor == SPF_ZEN2) {
214 		switch (evt) {
215 		case 0x60: /* L2 accesses (group 1) */
216 		case 0x64: /* Core to L2 access status */
217 		case 0xa9: /* u-op queue empty (frontend stall) */
218 			return (true);
219 		default:
220 			return (false);
221 		}
222 	}
223 
224 	return (false);
225 }
226 
227 vm_msr_result_t
svm_pmu_rdmsr(struct svm_softc * svm_sc,int vcpu,uint32_t msr,uint64_t * valp)228 svm_pmu_rdmsr(struct svm_softc *svm_sc, int vcpu, uint32_t msr, uint64_t *valp)
229 {
230 	ASSERT(svm_pmu_owned_msr(msr));
231 
232 	struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu);
233 
234 	if (!svm_pmu_is_active(pmu)) {
235 		return (VMR_UNHANLDED);
236 	}
237 
238 	if (svm_pmu_is_evt_msr(msr)) {
239 		const uint_t idx = svm_pmu_msr_to_idx(msr);
240 
241 		*valp = pmu->spv_evtsel_shadow[idx];
242 	} else if (svm_pmu_is_ctr_msr(msr)) {
243 		const uint_t idx = svm_pmu_msr_to_idx(msr);
244 
245 		*valp = pmu->spv_hma_state.hscs_regs[idx].hc_ctr;
246 	} else {
247 		/* UNREACHABLE */
248 		return (VMR_UNHANLDED);
249 	}
250 
251 	return (VMR_OK);
252 }
253 
254 vm_msr_result_t
svm_pmu_wrmsr(struct svm_softc * svm_sc,int vcpu,uint32_t msr,uint64_t val)255 svm_pmu_wrmsr(struct svm_softc *svm_sc, int vcpu, uint32_t msr, uint64_t val)
256 {
257 	ASSERT(svm_pmu_owned_msr(msr));
258 
259 	struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu);
260 	const svm_pmu_flavor_t flavor = svm_sc->pmu_flavor;
261 
262 	if (!svm_pmu_is_active(pmu)) {
263 		return (VMR_UNHANLDED);
264 	}
265 
266 	if (svm_pmu_is_evt_msr(msr)) {
267 		const uint_t idx = svm_pmu_msr_to_idx(msr);
268 
269 		/*
270 		 * Keep the unmodified evtsel shadowed, should the guest choose
271 		 * to read it out later.
272 		 *
273 		 * XXX: Should we balk at reserved bits being set?
274 		 */
275 		pmu->spv_evtsel_shadow[idx] = val;
276 
277 		if (!svm_pmu_evtsel_allowed(val, flavor)) {
278 			/*
279 			 * Disable any counters which have been configured with
280 			 * an event selector which we do not allow access to.
281 			 */
282 			val = 0;
283 		}
284 		pmu->spv_hma_state.hscs_regs[idx].hc_evtsel = val;
285 	} else if (svm_pmu_is_ctr_msr(msr)) {
286 		const uint_t idx = svm_pmu_msr_to_idx(msr);
287 
288 		pmu->spv_hma_state.hscs_regs[idx].hc_ctr = val;
289 	} else {
290 		/* UNREACHABLE */
291 		return (VMR_UNHANLDED);
292 	}
293 
294 	return (VMR_OK);
295 }
296 
297 bool
svm_pmu_rdpmc(struct svm_softc * svm_sc,int vcpu,uint32_t ecx,uint64_t * valp)298 svm_pmu_rdpmc(struct svm_softc *svm_sc, int vcpu, uint32_t ecx, uint64_t *valp)
299 {
300 	struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu);
301 
302 	if (!svm_pmu_is_active(pmu)) {
303 		return (false);
304 	}
305 	if (ecx >= SVM_PMU_MAX_COUNTERS) {
306 		return (false);
307 	}
308 
309 	*valp = pmu->spv_hma_state.hscs_regs[ecx].hc_ctr;
310 	return (true);
311 }
312 
313 /*
314  * Attempt to load guest PMU state, if the guest vCPU happens to be actively
315  * using any counters.  Host state will be saved if such loading occurs.
316  *
317  * The results of any state loading may require adjustment of guest intercepts
318  * and thus demands a call to svm_apply_dirty() prior to VM entry.
319  */
320 void
svm_pmu_enter(struct svm_softc * svm_sc,int vcpu)321 svm_pmu_enter(struct svm_softc *svm_sc, int vcpu)
322 {
323 	struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu);
324 
325 	if (!svm_pmu_is_active(pmu)) {
326 		return;
327 	}
328 
329 	hma_svm_cpc_res_t entry = hma_svm_cpc_enter(&pmu->spv_hma_state);
330 
331 	/*
332 	 * Until per-vCPU MSR bitmaps are available, ignore ability to expose
333 	 * direct guest access to counter MSRs
334 	 */
335 	entry &= ~HSCR_ACCESS_CTR_MSR;
336 
337 	if (entry != pmu->spv_last_entry) {
338 		/* Update intercepts to match what is allowed per HMA.  */
339 		if (entry & HSCR_ACCESS_RDPMC && svm_pmu_force_exit == 0) {
340 			svm_disable_intercept(svm_sc, vcpu, VMCB_CTRL1_INTCPT,
341 			    VMCB_INTCPT_RDPMC);
342 		} else {
343 			svm_enable_intercept(svm_sc, vcpu, VMCB_CTRL1_INTCPT,
344 			    VMCB_INTCPT_RDPMC);
345 		}
346 	}
347 	pmu->spv_last_entry = entry;
348 }
349 
350 /*
351  * If guest PMU state is active, save it, and restore the host state.
352  */
353 void
svm_pmu_exit(struct svm_softc * svm_sc,int vcpu)354 svm_pmu_exit(struct svm_softc *svm_sc, int vcpu)
355 {
356 	struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu);
357 
358 	if (!svm_pmu_is_active(pmu)) {
359 		return;
360 	}
361 
362 	hma_svm_cpc_exit(&pmu->spv_hma_state);
363 }
364 
365 static int
svm_pmu_data_read(struct vm * vm,int vcpuid,const vmm_data_req_t * req)366 svm_pmu_data_read(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
367 {
368 	VERIFY3U(req->vdr_class, ==, VDC_PMU_AMD);
369 	VERIFY3U(req->vdr_version, ==, 1);
370 	VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_pmu_amd_v1));
371 
372 	struct svm_softc *svm_sc = vm_get_cookie(vm);
373 	struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpuid);
374 	struct vdi_pmu_amd_v1 *out = req->vdr_data;
375 
376 	if (!svm_pmu_is_active(pmu)) {
377 		bzero(out, sizeof (out));
378 		return (0);
379 	}
380 
381 	for (uint_t i = 0; i < SVM_PMU_MAX_COUNTERS; i++) {
382 		out->vpa_evtsel[i] = pmu->spv_evtsel_shadow[i];
383 		out->vpa_ctr[i] = pmu->spv_hma_state.hscs_regs[i].hc_ctr;
384 	}
385 	return (0);
386 }
387 
388 static int
svm_pmu_data_write(struct vm * vm,int vcpuid,const vmm_data_req_t * req)389 svm_pmu_data_write(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
390 {
391 	VERIFY3U(req->vdr_class, ==, VDC_PMU_AMD);
392 	VERIFY3U(req->vdr_version, ==, 1);
393 	VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_pmu_amd_v1));
394 
395 	struct svm_softc *svm_sc = vm_get_cookie(vm);
396 	struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpuid);
397 	const struct vdi_pmu_amd_v1 *src = req->vdr_data;
398 
399 	if (!svm_pmu_is_active(pmu)) {
400 		/*
401 		 * Skip importing state for an inactive PMU.
402 		 *
403 		 * It might be appropriate to return an error here, but it's not
404 		 * clear what would be most appropriate (or what userspace would
405 		 * do in such a case).
406 		 */
407 		return (0);
408 	}
409 
410 	const svm_pmu_flavor_t flavor = svm_sc->pmu_flavor;
411 	for (uint_t i = 0; i < SVM_PMU_MAX_COUNTERS; i++) {
412 		const uint64_t evtsel = src->vpa_evtsel[i];
413 
414 		/*
415 		 * Shadow evtsel is kept as-is, but the "active" value undergoes
416 		 * same verification as guest WRMSR.
417 		 */
418 		pmu->spv_evtsel_shadow[i] = evtsel;
419 		if (svm_pmu_evtsel_allowed(evtsel, flavor)) {
420 			pmu->spv_hma_state.hscs_regs[i].hc_evtsel = evtsel;
421 		} else {
422 			pmu->spv_hma_state.hscs_regs[i].hc_evtsel = 0;
423 		}
424 		pmu->spv_hma_state.hscs_regs[i].hc_ctr = src->vpa_ctr[i];
425 	}
426 	return (0);
427 }
428 
429 static const vmm_data_version_entry_t pmu_amd_v1 = {
430 	.vdve_class = VDC_PMU_AMD,
431 	.vdve_version = 1,
432 	.vdve_len_expect = sizeof (struct vdi_pmu_amd_v1),
433 	.vdve_vcpu_readf = svm_pmu_data_read,
434 	.vdve_vcpu_writef = svm_pmu_data_write,
435 };
436 VMM_DATA_VERSION(pmu_amd_v1);
437