xref: /illumos-gate/usr/src/uts/intel/io/vmm/amd/svm_pmu.c (revision 09ea9c53cd9ac02c506f68475d98e8f07b457ffc)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2025 Oxide Computer Company
14  */
15 
16 #include <sys/kernel.h>
17 #include <sys/sysmacros.h>
18 #include <sys/cmn_err.h>
19 #include <sys/cpuvar.h>
20 #include <sys/systm.h>
21 #include <sys/x86_archext.h>
22 
23 #include <sys/vmm_kernel.h>
24 #include "svm.h"
25 #include "svm_softc.h"
26 #include "svm_pmu.h"
27 
28 /*
29  * Allow guests to use perf counter resources.
30  */
31 int svm_pmu_enabled = 1;
32 
33 /*
34  * Force guest exits (preclude disabling intercepts) access to perf counter
35  * resources via RDPMC and RDMSR/WRMSR.
36  */
37 int svm_pmu_force_exit = 0;
38 
39 void
svm_pmu_init(struct svm_softc * svm_sc)40 svm_pmu_init(struct svm_softc *svm_sc)
41 {
42 	if (!is_x86_feature(x86_featureset, X86FSET_AMD_PCEC) ||
43 	    svm_pmu_enabled == 0) {
44 		svm_sc->pmu_flavor = SPF_NONE;
45 		return;
46 	}
47 
48 	switch (uarchrev_uarch(cpuid_getuarchrev(CPU))) {
49 	case X86_UARCH_AMD_LEGACY:
50 		svm_sc->pmu_flavor = SPF_PRE_ZEN;
51 		break;
52 	case X86_UARCH_AMD_ZEN1:
53 	case X86_UARCH_AMD_ZENPLUS:
54 		svm_sc->pmu_flavor = SPF_ZEN1;
55 		break;
56 	case X86_UARCH_AMD_ZEN2:
57 	case X86_UARCH_AMD_ZEN3:
58 	case X86_UARCH_AMD_ZEN4:
59 	case X86_UARCH_AMD_ZEN5:
60 		svm_sc->pmu_flavor = SPF_ZEN2;
61 		break;
62 	default:
63 		/* Exclude unrecognized uarch from perf counter access */
64 		svm_sc->pmu_flavor = SPF_NONE;
65 		return;
66 	}
67 
68 	/* Turn on base and extended CPCs for all vCPUs */
69 	const uint_t maxcpu = vm_get_maxcpus(svm_sc->vm);
70 	for (uint_t i = 0; i < maxcpu; i++) {
71 		struct svm_pmu_vcpu *pmu_vcpu = svm_get_pmu(svm_sc, i);
72 
73 		pmu_vcpu->spv_hma_state.hscs_flags = HCF_EN_BASE | HCF_EN_EXTD;
74 	}
75 }
76 
77 static bool
svm_pmu_is_active(const struct svm_pmu_vcpu * pmu)78 svm_pmu_is_active(const struct svm_pmu_vcpu *pmu)
79 {
80 	return (pmu->spv_hma_state.hscs_flags != HCF_DISABLED);
81 }
82 
83 static bool
svm_pmu_is_evt_msr(uint32_t msr)84 svm_pmu_is_evt_msr(uint32_t msr)
85 {
86 	switch (msr) {
87 	case MSR_AMD_K7_PERF_EVTSEL0:
88 	case MSR_AMD_K7_PERF_EVTSEL1:
89 	case MSR_AMD_K7_PERF_EVTSEL2:
90 	case MSR_AMD_K7_PERF_EVTSEL3:
91 	case MSR_AMD_F15H_PERF_EVTSEL0:
92 	case MSR_AMD_F15H_PERF_EVTSEL1:
93 	case MSR_AMD_F15H_PERF_EVTSEL2:
94 	case MSR_AMD_F15H_PERF_EVTSEL3:
95 	case MSR_AMD_F15H_PERF_EVTSEL4:
96 	case MSR_AMD_F15H_PERF_EVTSEL5:
97 		return (true);
98 	default:
99 		return (false);
100 	}
101 }
102 
103 static bool
svm_pmu_is_ctr_msr(uint32_t msr)104 svm_pmu_is_ctr_msr(uint32_t msr)
105 {
106 	switch (msr) {
107 	case MSR_AMD_K7_PERF_CTR0:
108 	case MSR_AMD_K7_PERF_CTR1:
109 	case MSR_AMD_K7_PERF_CTR2:
110 	case MSR_AMD_K7_PERF_CTR3:
111 	case MSR_AMD_F15H_PERF_CTR0:
112 	case MSR_AMD_F15H_PERF_CTR1:
113 	case MSR_AMD_F15H_PERF_CTR2:
114 	case MSR_AMD_F15H_PERF_CTR3:
115 	case MSR_AMD_F15H_PERF_CTR4:
116 	case MSR_AMD_F15H_PERF_CTR5:
117 		return (true);
118 	default:
119 		return (false);
120 	}
121 }
122 
123 static uint_t
svm_pmu_msr_to_idx(uint32_t msr)124 svm_pmu_msr_to_idx(uint32_t msr)
125 {
126 	switch (msr) {
127 	case MSR_AMD_K7_PERF_EVTSEL0:
128 	case MSR_AMD_K7_PERF_EVTSEL1:
129 	case MSR_AMD_K7_PERF_EVTSEL2:
130 	case MSR_AMD_K7_PERF_EVTSEL3:
131 		return (msr - MSR_AMD_K7_PERF_EVTSEL0);
132 	case MSR_AMD_K7_PERF_CTR0:
133 	case MSR_AMD_K7_PERF_CTR1:
134 	case MSR_AMD_K7_PERF_CTR2:
135 	case MSR_AMD_K7_PERF_CTR3:
136 		return (msr - MSR_AMD_K7_PERF_CTR0);
137 	case MSR_AMD_F15H_PERF_EVTSEL0:
138 	case MSR_AMD_F15H_PERF_EVTSEL1:
139 	case MSR_AMD_F15H_PERF_EVTSEL2:
140 	case MSR_AMD_F15H_PERF_EVTSEL3:
141 	case MSR_AMD_F15H_PERF_EVTSEL4:
142 	case MSR_AMD_F15H_PERF_EVTSEL5:
143 		return ((msr - MSR_AMD_F15H_PERF_EVTSEL0) / 2);
144 	case MSR_AMD_F15H_PERF_CTR0:
145 	case MSR_AMD_F15H_PERF_CTR1:
146 	case MSR_AMD_F15H_PERF_CTR2:
147 	case MSR_AMD_F15H_PERF_CTR3:
148 	case MSR_AMD_F15H_PERF_CTR4:
149 	case MSR_AMD_F15H_PERF_CTR5:
150 		return ((msr - MSR_AMD_F15H_PERF_CTR0) / 2);
151 	default:
152 		panic("unexpected perf. counter MSR: %X", msr);
153 	}
154 }
155 
156 bool
svm_pmu_owned_msr(uint32_t msr)157 svm_pmu_owned_msr(uint32_t msr)
158 {
159 	return (svm_pmu_is_evt_msr(msr) || svm_pmu_is_ctr_msr(msr));
160 }
161 
162 /*
163  * Is guest access to a given evtsel allowed for the "flavor" of the PMU?
164  *
165  * Initial access is fairly limited, providing access to only the evtsels
166  * expected to be used by Linux `perf stat`.
167  */
168 static bool
svm_pmu_evtsel_allowed(uint64_t evtsel,svm_pmu_flavor_t flavor)169 svm_pmu_evtsel_allowed(uint64_t evtsel, svm_pmu_flavor_t flavor)
170 {
171 	const uint64_t evt = evtsel & AMD_PERF_EVTSEL_EVT_MASK;
172 	const uint16_t umask = evtsel & AMD_PERF_EVTSEL_UNIT_MASK;
173 
174 	/*
175 	 * Some of the perf counters have stayed fairly consistent in their
176 	 * identifiers throughout the AMD product line.
177 	 */
178 	switch (evt) {
179 	case 0x76:	/* CPU cycles */
180 	case 0xc0:	/* Retired instructions */
181 	case 0xc2:	/* Branch instructions */
182 	case 0xc3:	/* Branch misses */
183 		return (true);
184 	default:
185 		break;
186 	}
187 
188 	if (flavor == SPF_PRE_ZEN) {
189 		switch (evt) {
190 		case 0x7d: /* Cache hits */
191 		case 0x7e: /* Cache misses */
192 			return (true);
193 		default:
194 			return (false);
195 		}
196 	} else if (flavor == SPF_ZEN1) {
197 		switch (evt) {
198 		case 0x60: /* L2 accesses (group 1) */
199 		case 0x64: /* Core to L2 access status */
200 			return (true);
201 		case 0x87: /* IC fetch stall */
202 			switch (umask) {
203 			case 0x0100: /* backend */
204 			case 0x0200: /* frontend */
205 				return (true);
206 			default:
207 				return (false);
208 			}
209 		default:
210 			return (false);
211 		}
212 	} else if (flavor == SPF_ZEN2) {
213 		switch (evt) {
214 		case 0x60: /* L2 accesses (group 1) */
215 		case 0x64: /* Core to L2 access status */
216 		case 0xa9: /* u-op queue empty (frontend stall) */
217 			return (true);
218 		default:
219 			return (false);
220 		}
221 	}
222 
223 	return (false);
224 }
225 
226 vm_msr_result_t
svm_pmu_rdmsr(struct svm_softc * svm_sc,int vcpu,uint32_t msr,uint64_t * valp)227 svm_pmu_rdmsr(struct svm_softc *svm_sc, int vcpu, uint32_t msr, uint64_t *valp)
228 {
229 	ASSERT(svm_pmu_owned_msr(msr));
230 
231 	struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu);
232 
233 	if (!svm_pmu_is_active(pmu)) {
234 		return (VMR_UNHANLDED);
235 	}
236 
237 	if (svm_pmu_is_evt_msr(msr)) {
238 		const uint_t idx = svm_pmu_msr_to_idx(msr);
239 
240 		*valp = pmu->spv_evtsel_shadow[idx];
241 	} else if (svm_pmu_is_ctr_msr(msr)) {
242 		const uint_t idx = svm_pmu_msr_to_idx(msr);
243 
244 		*valp = pmu->spv_hma_state.hscs_regs[idx].hc_ctr;
245 	} else {
246 		/* UNREACHABLE */
247 		return (VMR_UNHANLDED);
248 	}
249 
250 	return (VMR_OK);
251 }
252 
253 vm_msr_result_t
svm_pmu_wrmsr(struct svm_softc * svm_sc,int vcpu,uint32_t msr,uint64_t val)254 svm_pmu_wrmsr(struct svm_softc *svm_sc, int vcpu, uint32_t msr, uint64_t val)
255 {
256 	ASSERT(svm_pmu_owned_msr(msr));
257 
258 	struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu);
259 	const svm_pmu_flavor_t flavor = svm_sc->pmu_flavor;
260 
261 	if (!svm_pmu_is_active(pmu)) {
262 		return (VMR_UNHANLDED);
263 	}
264 
265 	if (svm_pmu_is_evt_msr(msr)) {
266 		const uint_t idx = svm_pmu_msr_to_idx(msr);
267 
268 		/*
269 		 * Keep the unmodified evtsel shadowed, should the guest choose
270 		 * to read it out later.
271 		 *
272 		 * XXX: Should we balk at reserved bits being set?
273 		 */
274 		pmu->spv_evtsel_shadow[idx] = val;
275 
276 		if (!svm_pmu_evtsel_allowed(val, flavor)) {
277 			/*
278 			 * Disable any counters which have been configured with
279 			 * an event selector which we do not allow access to.
280 			 */
281 			val = 0;
282 		}
283 		pmu->spv_hma_state.hscs_regs[idx].hc_evtsel = val;
284 	} else if (svm_pmu_is_ctr_msr(msr)) {
285 		const uint_t idx = svm_pmu_msr_to_idx(msr);
286 
287 		pmu->spv_hma_state.hscs_regs[idx].hc_ctr = val;
288 	} else {
289 		/* UNREACHABLE */
290 		return (VMR_UNHANLDED);
291 	}
292 
293 	return (VMR_OK);
294 }
295 
296 bool
svm_pmu_rdpmc(struct svm_softc * svm_sc,int vcpu,uint32_t ecx,uint64_t * valp)297 svm_pmu_rdpmc(struct svm_softc *svm_sc, int vcpu, uint32_t ecx, uint64_t *valp)
298 {
299 	struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu);
300 
301 	if (!svm_pmu_is_active(pmu)) {
302 		return (false);
303 	}
304 	if (ecx >= SVM_PMU_MAX_COUNTERS) {
305 		return (false);
306 	}
307 
308 	*valp = pmu->spv_hma_state.hscs_regs[ecx].hc_ctr;
309 	return (true);
310 }
311 
312 /*
313  * Attempt to load guest PMU state, if the guest vCPU happens to be actively
314  * using any counters.  Host state will be saved if such loading occurs.
315  *
316  * The results of any state loading may require adjustment of guest intercepts
317  * and thus demands a call to svm_apply_dirty() prior to VM entry.
318  */
319 void
svm_pmu_enter(struct svm_softc * svm_sc,int vcpu)320 svm_pmu_enter(struct svm_softc *svm_sc, int vcpu)
321 {
322 	struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu);
323 
324 	if (!svm_pmu_is_active(pmu)) {
325 		return;
326 	}
327 
328 	hma_svm_cpc_res_t entry = hma_svm_cpc_enter(&pmu->spv_hma_state);
329 
330 	/*
331 	 * Until per-vCPU MSR bitmaps are available, ignore ability to expose
332 	 * direct guest access to counter MSRs
333 	 */
334 	entry &= ~HSCR_ACCESS_CTR_MSR;
335 
336 	if (entry != pmu->spv_last_entry) {
337 		/* Update intercepts to match what is allowed per HMA.  */
338 		if (entry & HSCR_ACCESS_RDPMC && svm_pmu_force_exit == 0) {
339 			svm_disable_intercept(svm_sc, vcpu, VMCB_CTRL1_INTCPT,
340 			    VMCB_INTCPT_RDPMC);
341 		} else {
342 			svm_enable_intercept(svm_sc, vcpu, VMCB_CTRL1_INTCPT,
343 			    VMCB_INTCPT_RDPMC);
344 		}
345 	}
346 	pmu->spv_last_entry = entry;
347 }
348 
349 /*
350  * If guest PMU state is active, save it, and restore the host state.
351  */
352 void
svm_pmu_exit(struct svm_softc * svm_sc,int vcpu)353 svm_pmu_exit(struct svm_softc *svm_sc, int vcpu)
354 {
355 	struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu);
356 
357 	if (!svm_pmu_is_active(pmu)) {
358 		return;
359 	}
360 
361 	hma_svm_cpc_exit(&pmu->spv_hma_state);
362 }
363 
364 static int
svm_pmu_data_read(struct vm * vm,int vcpuid,const vmm_data_req_t * req)365 svm_pmu_data_read(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
366 {
367 	VERIFY3U(req->vdr_class, ==, VDC_PMU_AMD);
368 	VERIFY3U(req->vdr_version, ==, 1);
369 	VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_pmu_amd_v1));
370 
371 	struct svm_softc *svm_sc = vm_get_cookie(vm);
372 	struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpuid);
373 	struct vdi_pmu_amd_v1 *out = req->vdr_data;
374 
375 	if (!svm_pmu_is_active(pmu)) {
376 		bzero(out, sizeof (out));
377 		return (0);
378 	}
379 
380 	for (uint_t i = 0; i < SVM_PMU_MAX_COUNTERS; i++) {
381 		out->vpa_evtsel[i] = pmu->spv_evtsel_shadow[i];
382 		out->vpa_ctr[i] = pmu->spv_hma_state.hscs_regs[i].hc_ctr;
383 	}
384 	return (0);
385 }
386 
387 static int
svm_pmu_data_write(struct vm * vm,int vcpuid,const vmm_data_req_t * req)388 svm_pmu_data_write(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
389 {
390 	VERIFY3U(req->vdr_class, ==, VDC_PMU_AMD);
391 	VERIFY3U(req->vdr_version, ==, 1);
392 	VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_pmu_amd_v1));
393 
394 	struct svm_softc *svm_sc = vm_get_cookie(vm);
395 	struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpuid);
396 	const struct vdi_pmu_amd_v1 *src = req->vdr_data;
397 
398 	if (!svm_pmu_is_active(pmu)) {
399 		/*
400 		 * Skip importing state for an inactive PMU.
401 		 *
402 		 * It might be appropriate to return an error here, but it's not
403 		 * clear what would be most appropriate (or what userspace would
404 		 * do in such a case).
405 		 */
406 		return (0);
407 	}
408 
409 	const svm_pmu_flavor_t flavor = svm_sc->pmu_flavor;
410 	for (uint_t i = 0; i < SVM_PMU_MAX_COUNTERS; i++) {
411 		const uint64_t evtsel = src->vpa_evtsel[i];
412 
413 		/*
414 		 * Shadow evtsel is kept as-is, but the "active" value undergoes
415 		 * same verification as guest WRMSR.
416 		 */
417 		pmu->spv_evtsel_shadow[i] = evtsel;
418 		if (svm_pmu_evtsel_allowed(evtsel, flavor)) {
419 			pmu->spv_hma_state.hscs_regs[i].hc_evtsel = evtsel;
420 		} else {
421 			pmu->spv_hma_state.hscs_regs[i].hc_evtsel = 0;
422 		}
423 		pmu->spv_hma_state.hscs_regs[i].hc_ctr = src->vpa_ctr[i];
424 	}
425 	return (0);
426 }
427 
428 static const vmm_data_version_entry_t pmu_amd_v1 = {
429 	.vdve_class = VDC_PMU_AMD,
430 	.vdve_version = 1,
431 	.vdve_len_expect = sizeof (struct vdi_pmu_amd_v1),
432 	.vdve_vcpu_readf = svm_pmu_data_read,
433 	.vdve_vcpu_writef = svm_pmu_data_write,
434 };
435 VMM_DATA_VERSION(pmu_amd_v1);
436