1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2025 Oxide Computer Company 14 */ 15 16 #include <sys/kernel.h> 17 #include <sys/sysmacros.h> 18 #include <sys/cmn_err.h> 19 #include <sys/cpuvar.h> 20 #include <sys/systm.h> 21 #include <sys/x86_archext.h> 22 23 #include <sys/vmm_kernel.h> 24 #include "svm.h" 25 #include "svm_softc.h" 26 #include "svm_pmu.h" 27 28 /* 29 * Allow guests to use perf counter resources. 30 */ 31 int svm_pmu_enabled = 1; 32 33 /* 34 * Force guest exits (preclude disabling intercepts) access to perf counter 35 * resources via RDPMC and RDMSR/WRMSR. 36 */ 37 int svm_pmu_force_exit = 0; 38 39 void 40 svm_pmu_init(struct svm_softc *svm_sc) 41 { 42 if (!is_x86_feature(x86_featureset, X86FSET_AMD_PCEC) || 43 svm_pmu_enabled == 0) { 44 svm_sc->pmu_flavor = SPF_NONE; 45 return; 46 } 47 48 switch (uarchrev_uarch(cpuid_getuarchrev(CPU))) { 49 case X86_UARCH_AMD_LEGACY: 50 svm_sc->pmu_flavor = SPF_PRE_ZEN; 51 break; 52 case X86_UARCH_AMD_ZEN1: 53 case X86_UARCH_AMD_ZENPLUS: 54 svm_sc->pmu_flavor = SPF_ZEN1; 55 break; 56 case X86_UARCH_AMD_ZEN2: 57 case X86_UARCH_AMD_ZEN3: 58 case X86_UARCH_AMD_ZEN4: 59 case X86_UARCH_AMD_ZEN5: 60 svm_sc->pmu_flavor = SPF_ZEN2; 61 break; 62 default: 63 /* Exclude unrecognized uarch from perf counter access */ 64 svm_sc->pmu_flavor = SPF_NONE; 65 return; 66 } 67 68 /* Turn on base and extended CPCs for all vCPUs */ 69 const uint_t maxcpu = vm_get_maxcpus(svm_sc->vm); 70 for (uint_t i = 0; i < maxcpu; i++) { 71 struct svm_pmu_vcpu *pmu_vcpu = svm_get_pmu(svm_sc, i); 72 73 pmu_vcpu->spv_hma_state.hscs_flags = HCF_EN_BASE | HCF_EN_EXTD; 74 } 75 } 76 77 static bool 78 svm_pmu_is_active(const struct svm_pmu_vcpu *pmu) 79 { 80 return (pmu->spv_hma_state.hscs_flags != HCF_DISABLED); 81 } 82 83 static bool 84 svm_pmu_is_evt_msr(uint32_t msr) 85 { 86 switch (msr) { 87 case MSR_AMD_K7_PERF_EVTSEL0: 88 case MSR_AMD_K7_PERF_EVTSEL1: 89 case MSR_AMD_K7_PERF_EVTSEL2: 90 case MSR_AMD_K7_PERF_EVTSEL3: 91 case MSR_AMD_F15H_PERF_EVTSEL0: 92 case MSR_AMD_F15H_PERF_EVTSEL1: 93 case MSR_AMD_F15H_PERF_EVTSEL2: 94 case MSR_AMD_F15H_PERF_EVTSEL3: 95 case MSR_AMD_F15H_PERF_EVTSEL4: 96 case MSR_AMD_F15H_PERF_EVTSEL5: 97 return (true); 98 default: 99 return (false); 100 } 101 } 102 103 static bool 104 svm_pmu_is_ctr_msr(uint32_t msr) 105 { 106 switch (msr) { 107 case MSR_AMD_K7_PERF_CTR0: 108 case MSR_AMD_K7_PERF_CTR1: 109 case MSR_AMD_K7_PERF_CTR2: 110 case MSR_AMD_K7_PERF_CTR3: 111 case MSR_AMD_F15H_PERF_CTR0: 112 case MSR_AMD_F15H_PERF_CTR1: 113 case MSR_AMD_F15H_PERF_CTR2: 114 case MSR_AMD_F15H_PERF_CTR3: 115 case MSR_AMD_F15H_PERF_CTR4: 116 case MSR_AMD_F15H_PERF_CTR5: 117 return (true); 118 default: 119 return (false); 120 } 121 } 122 123 static uint_t 124 svm_pmu_msr_to_idx(uint32_t msr) 125 { 126 switch (msr) { 127 case MSR_AMD_K7_PERF_EVTSEL0: 128 case MSR_AMD_K7_PERF_EVTSEL1: 129 case MSR_AMD_K7_PERF_EVTSEL2: 130 case MSR_AMD_K7_PERF_EVTSEL3: 131 return (msr - MSR_AMD_K7_PERF_EVTSEL0); 132 case MSR_AMD_K7_PERF_CTR0: 133 case MSR_AMD_K7_PERF_CTR1: 134 case MSR_AMD_K7_PERF_CTR2: 135 case MSR_AMD_K7_PERF_CTR3: 136 return (msr - MSR_AMD_K7_PERF_CTR0); 137 case MSR_AMD_F15H_PERF_EVTSEL0: 138 case MSR_AMD_F15H_PERF_EVTSEL1: 139 case MSR_AMD_F15H_PERF_EVTSEL2: 140 case MSR_AMD_F15H_PERF_EVTSEL3: 141 case MSR_AMD_F15H_PERF_EVTSEL4: 142 case MSR_AMD_F15H_PERF_EVTSEL5: 143 return ((msr - MSR_AMD_F15H_PERF_EVTSEL0) / 2); 144 case MSR_AMD_F15H_PERF_CTR0: 145 case MSR_AMD_F15H_PERF_CTR1: 146 case MSR_AMD_F15H_PERF_CTR2: 147 case MSR_AMD_F15H_PERF_CTR3: 148 case MSR_AMD_F15H_PERF_CTR4: 149 case MSR_AMD_F15H_PERF_CTR5: 150 return ((msr - MSR_AMD_F15H_PERF_CTR0) / 2); 151 default: 152 panic("unexpected perf. counter MSR: %X", msr); 153 } 154 } 155 156 bool 157 svm_pmu_owned_msr(uint32_t msr) 158 { 159 return (svm_pmu_is_evt_msr(msr) || svm_pmu_is_ctr_msr(msr)); 160 } 161 162 /* 163 * Is guest access to a given evtsel allowed for the "flavor" of the PMU? 164 * 165 * Initial access is fairly limited, providing access to only the evtsels 166 * expected to be used by Linux `perf stat`. 167 */ 168 static bool 169 svm_pmu_evtsel_allowed(uint64_t evtsel, svm_pmu_flavor_t flavor) 170 { 171 const uint64_t evt = evtsel & AMD_PERF_EVTSEL_EVT_MASK; 172 const uint16_t umask = evtsel & AMD_PERF_EVTSEL_UNIT_MASK; 173 174 /* 175 * Some of the perf counters have stayed fairly consistent in their 176 * identifiers throughout the AMD product line. 177 */ 178 switch (evt) { 179 case 0x76: /* CPU cycles */ 180 case 0xc0: /* Retired instructions */ 181 case 0xc2: /* Branch instructions */ 182 case 0xc3: /* Branch misses */ 183 return (true); 184 default: 185 break; 186 } 187 188 if (flavor == SPF_PRE_ZEN) { 189 switch (evt) { 190 case 0x7d: /* Cache hits */ 191 case 0x7e: /* Cache misses */ 192 return (true); 193 default: 194 return (false); 195 } 196 } else if (flavor == SPF_ZEN1) { 197 switch (evt) { 198 case 0x60: /* L2 accesses (group 1) */ 199 case 0x64: /* Core to L2 access status */ 200 return (true); 201 case 0x87: /* IC fetch stall */ 202 switch (umask) { 203 case 0x0100: /* backend */ 204 case 0x0200: /* frontend */ 205 return (true); 206 default: 207 return (false); 208 } 209 default: 210 return (false); 211 } 212 } else if (flavor == SPF_ZEN2) { 213 switch (evt) { 214 case 0x60: /* L2 accesses (group 1) */ 215 case 0x64: /* Core to L2 access status */ 216 case 0xa9: /* u-op queue empty (frontend stall) */ 217 return (true); 218 default: 219 return (false); 220 } 221 } 222 223 return (false); 224 } 225 226 vm_msr_result_t 227 svm_pmu_rdmsr(struct svm_softc *svm_sc, int vcpu, uint32_t msr, uint64_t *valp) 228 { 229 ASSERT(svm_pmu_owned_msr(msr)); 230 231 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu); 232 233 if (!svm_pmu_is_active(pmu)) { 234 return (VMR_UNHANLDED); 235 } 236 237 if (svm_pmu_is_evt_msr(msr)) { 238 const uint_t idx = svm_pmu_msr_to_idx(msr); 239 240 *valp = pmu->spv_evtsel_shadow[idx]; 241 } else if (svm_pmu_is_ctr_msr(msr)) { 242 const uint_t idx = svm_pmu_msr_to_idx(msr); 243 244 *valp = pmu->spv_hma_state.hscs_regs[idx].hc_ctr; 245 } else { 246 /* UNREACHABLE */ 247 return (VMR_UNHANLDED); 248 } 249 250 return (VMR_OK); 251 } 252 253 vm_msr_result_t 254 svm_pmu_wrmsr(struct svm_softc *svm_sc, int vcpu, uint32_t msr, uint64_t val) 255 { 256 ASSERT(svm_pmu_owned_msr(msr)); 257 258 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu); 259 const svm_pmu_flavor_t flavor = svm_sc->pmu_flavor; 260 261 if (!svm_pmu_is_active(pmu)) { 262 return (VMR_UNHANLDED); 263 } 264 265 if (svm_pmu_is_evt_msr(msr)) { 266 const uint_t idx = svm_pmu_msr_to_idx(msr); 267 268 /* 269 * Keep the unmodified evtsel shadowed, should the guest choose 270 * to read it out later. 271 * 272 * XXX: Should we balk at reserved bits being set? 273 */ 274 pmu->spv_evtsel_shadow[idx] = val; 275 276 if (!svm_pmu_evtsel_allowed(val, flavor)) { 277 /* 278 * Disable any counters which have been configured with 279 * an event selector which we do not allow access to. 280 */ 281 val = 0; 282 } 283 pmu->spv_hma_state.hscs_regs[idx].hc_evtsel = val; 284 } else if (svm_pmu_is_ctr_msr(msr)) { 285 const uint_t idx = svm_pmu_msr_to_idx(msr); 286 287 pmu->spv_hma_state.hscs_regs[idx].hc_ctr = val; 288 } else { 289 /* UNREACHABLE */ 290 return (VMR_UNHANLDED); 291 } 292 293 return (VMR_OK); 294 } 295 296 bool 297 svm_pmu_rdpmc(struct svm_softc *svm_sc, int vcpu, uint32_t ecx, uint64_t *valp) 298 { 299 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu); 300 301 if (!svm_pmu_is_active(pmu)) { 302 return (false); 303 } 304 if (ecx >= SVM_PMU_MAX_COUNTERS) { 305 return (false); 306 } 307 308 *valp = pmu->spv_hma_state.hscs_regs[ecx].hc_ctr; 309 return (true); 310 } 311 312 /* 313 * Attempt to load guest PMU state, if the guest vCPU happens to be actively 314 * using any counters. Host state will be saved if such loading occurs. 315 * 316 * The results of any state loading may require adjustment of guest intercepts 317 * and thus demands a call to svm_apply_dirty() prior to VM entry. 318 */ 319 void 320 svm_pmu_enter(struct svm_softc *svm_sc, int vcpu) 321 { 322 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu); 323 324 if (!svm_pmu_is_active(pmu)) { 325 return; 326 } 327 328 hma_svm_cpc_res_t entry = hma_svm_cpc_enter(&pmu->spv_hma_state); 329 330 /* 331 * Until per-vCPU MSR bitmaps are available, ignore ability to expose 332 * direct guest access to counter MSRs 333 */ 334 entry &= ~HSCR_ACCESS_CTR_MSR; 335 336 if (entry != pmu->spv_last_entry) { 337 /* Update intercepts to match what is allowed per HMA. */ 338 if (entry & HSCR_ACCESS_RDPMC && svm_pmu_force_exit == 0) { 339 svm_disable_intercept(svm_sc, vcpu, VMCB_CTRL1_INTCPT, 340 VMCB_INTCPT_RDPMC); 341 } else { 342 svm_enable_intercept(svm_sc, vcpu, VMCB_CTRL1_INTCPT, 343 VMCB_INTCPT_RDPMC); 344 } 345 } 346 pmu->spv_last_entry = entry; 347 } 348 349 /* 350 * If guest PMU state is active, save it, and restore the host state. 351 */ 352 void 353 svm_pmu_exit(struct svm_softc *svm_sc, int vcpu) 354 { 355 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu); 356 357 if (!svm_pmu_is_active(pmu)) { 358 return; 359 } 360 361 hma_svm_cpc_exit(&pmu->spv_hma_state); 362 } 363 364 static int 365 svm_pmu_data_read(struct vm *vm, int vcpuid, const vmm_data_req_t *req) 366 { 367 VERIFY3U(req->vdr_class, ==, VDC_PMU_AMD); 368 VERIFY3U(req->vdr_version, ==, 1); 369 VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_pmu_amd_v1)); 370 371 struct svm_softc *svm_sc = vm_get_cookie(vm); 372 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpuid); 373 struct vdi_pmu_amd_v1 *out = req->vdr_data; 374 375 if (!svm_pmu_is_active(pmu)) { 376 bzero(out, sizeof (out)); 377 return (0); 378 } 379 380 for (uint_t i = 0; i < SVM_PMU_MAX_COUNTERS; i++) { 381 out->vpa_evtsel[i] = pmu->spv_evtsel_shadow[i]; 382 out->vpa_ctr[i] = pmu->spv_hma_state.hscs_regs[i].hc_ctr; 383 } 384 return (0); 385 } 386 387 static int 388 svm_pmu_data_write(struct vm *vm, int vcpuid, const vmm_data_req_t *req) 389 { 390 VERIFY3U(req->vdr_class, ==, VDC_PMU_AMD); 391 VERIFY3U(req->vdr_version, ==, 1); 392 VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_pmu_amd_v1)); 393 394 struct svm_softc *svm_sc = vm_get_cookie(vm); 395 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpuid); 396 const struct vdi_pmu_amd_v1 *src = req->vdr_data; 397 398 if (!svm_pmu_is_active(pmu)) { 399 /* 400 * Skip importing state for an inactive PMU. 401 * 402 * It might be appropriate to return an error here, but it's not 403 * clear what would be most appropriate (or what userspace would 404 * do in such a case). 405 */ 406 return (0); 407 } 408 409 const svm_pmu_flavor_t flavor = svm_sc->pmu_flavor; 410 for (uint_t i = 0; i < SVM_PMU_MAX_COUNTERS; i++) { 411 const uint64_t evtsel = src->vpa_evtsel[i]; 412 413 /* 414 * Shadow evtsel is kept as-is, but the "active" value undergoes 415 * same verification as guest WRMSR. 416 */ 417 pmu->spv_evtsel_shadow[i] = evtsel; 418 if (svm_pmu_evtsel_allowed(evtsel, flavor)) { 419 pmu->spv_hma_state.hscs_regs[i].hc_evtsel = evtsel; 420 } else { 421 pmu->spv_hma_state.hscs_regs[i].hc_evtsel = 0; 422 } 423 pmu->spv_hma_state.hscs_regs[i].hc_ctr = src->vpa_ctr[i]; 424 } 425 return (0); 426 } 427 428 static const vmm_data_version_entry_t pmu_amd_v1 = { 429 .vdve_class = VDC_PMU_AMD, 430 .vdve_version = 1, 431 .vdve_len_expect = sizeof (struct vdi_pmu_amd_v1), 432 .vdve_vcpu_readf = svm_pmu_data_read, 433 .vdve_vcpu_writef = svm_pmu_data_write, 434 }; 435 VMM_DATA_VERSION(pmu_amd_v1); 436