1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 /* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */ 12 13 /* 14 * Copyright 2025 Oxide Computer Company 15 */ 16 17 #include <sys/kernel.h> 18 #include <sys/sysmacros.h> 19 #include <sys/cmn_err.h> 20 #include <sys/cpuvar.h> 21 #include <sys/systm.h> 22 #include <sys/x86_archext.h> 23 24 #include <sys/vmm_kernel.h> 25 #include "svm.h" 26 #include "svm_softc.h" 27 #include "svm_pmu.h" 28 29 /* 30 * Allow guests to use perf counter resources. 31 */ 32 int svm_pmu_enabled = 1; 33 34 /* 35 * Force guest exits (preclude disabling intercepts) access to perf counter 36 * resources via RDPMC and RDMSR/WRMSR. 37 */ 38 int svm_pmu_force_exit = 0; 39 40 void 41 svm_pmu_init(struct svm_softc *svm_sc) 42 { 43 if (!is_x86_feature(x86_featureset, X86FSET_AMD_PCEC) || 44 svm_pmu_enabled == 0) { 45 svm_sc->pmu_flavor = SPF_NONE; 46 return; 47 } 48 49 switch (uarchrev_uarch(cpuid_getuarchrev(CPU))) { 50 case X86_UARCH_AMD_LEGACY: 51 svm_sc->pmu_flavor = SPF_PRE_ZEN; 52 break; 53 case X86_UARCH_AMD_ZEN1: 54 case X86_UARCH_AMD_ZENPLUS: 55 svm_sc->pmu_flavor = SPF_ZEN1; 56 break; 57 case X86_UARCH_AMD_ZEN2: 58 case X86_UARCH_AMD_ZEN3: 59 case X86_UARCH_AMD_ZEN4: 60 case X86_UARCH_AMD_ZEN5: 61 svm_sc->pmu_flavor = SPF_ZEN2; 62 break; 63 default: 64 /* Exclude unrecognized uarch from perf counter access */ 65 svm_sc->pmu_flavor = SPF_NONE; 66 return; 67 } 68 69 /* Turn on base and extended CPCs for all vCPUs */ 70 const uint_t maxcpu = vm_get_maxcpus(svm_sc->vm); 71 for (uint_t i = 0; i < maxcpu; i++) { 72 struct svm_pmu_vcpu *pmu_vcpu = svm_get_pmu(svm_sc, i); 73 74 pmu_vcpu->spv_hma_state.hscs_flags = HCF_EN_BASE | HCF_EN_EXTD; 75 } 76 } 77 78 static bool 79 svm_pmu_is_active(const struct svm_pmu_vcpu *pmu) 80 { 81 return (pmu->spv_hma_state.hscs_flags != HCF_DISABLED); 82 } 83 84 static bool 85 svm_pmu_is_evt_msr(uint32_t msr) 86 { 87 switch (msr) { 88 case MSR_AMD_K7_PERF_EVTSEL0: 89 case MSR_AMD_K7_PERF_EVTSEL1: 90 case MSR_AMD_K7_PERF_EVTSEL2: 91 case MSR_AMD_K7_PERF_EVTSEL3: 92 case MSR_AMD_F15H_PERF_EVTSEL0: 93 case MSR_AMD_F15H_PERF_EVTSEL1: 94 case MSR_AMD_F15H_PERF_EVTSEL2: 95 case MSR_AMD_F15H_PERF_EVTSEL3: 96 case MSR_AMD_F15H_PERF_EVTSEL4: 97 case MSR_AMD_F15H_PERF_EVTSEL5: 98 return (true); 99 default: 100 return (false); 101 } 102 } 103 104 static bool 105 svm_pmu_is_ctr_msr(uint32_t msr) 106 { 107 switch (msr) { 108 case MSR_AMD_K7_PERF_CTR0: 109 case MSR_AMD_K7_PERF_CTR1: 110 case MSR_AMD_K7_PERF_CTR2: 111 case MSR_AMD_K7_PERF_CTR3: 112 case MSR_AMD_F15H_PERF_CTR0: 113 case MSR_AMD_F15H_PERF_CTR1: 114 case MSR_AMD_F15H_PERF_CTR2: 115 case MSR_AMD_F15H_PERF_CTR3: 116 case MSR_AMD_F15H_PERF_CTR4: 117 case MSR_AMD_F15H_PERF_CTR5: 118 return (true); 119 default: 120 return (false); 121 } 122 } 123 124 static uint_t 125 svm_pmu_msr_to_idx(uint32_t msr) 126 { 127 switch (msr) { 128 case MSR_AMD_K7_PERF_EVTSEL0: 129 case MSR_AMD_K7_PERF_EVTSEL1: 130 case MSR_AMD_K7_PERF_EVTSEL2: 131 case MSR_AMD_K7_PERF_EVTSEL3: 132 return (msr - MSR_AMD_K7_PERF_EVTSEL0); 133 case MSR_AMD_K7_PERF_CTR0: 134 case MSR_AMD_K7_PERF_CTR1: 135 case MSR_AMD_K7_PERF_CTR2: 136 case MSR_AMD_K7_PERF_CTR3: 137 return (msr - MSR_AMD_K7_PERF_CTR0); 138 case MSR_AMD_F15H_PERF_EVTSEL0: 139 case MSR_AMD_F15H_PERF_EVTSEL1: 140 case MSR_AMD_F15H_PERF_EVTSEL2: 141 case MSR_AMD_F15H_PERF_EVTSEL3: 142 case MSR_AMD_F15H_PERF_EVTSEL4: 143 case MSR_AMD_F15H_PERF_EVTSEL5: 144 return ((msr - MSR_AMD_F15H_PERF_EVTSEL0) / 2); 145 case MSR_AMD_F15H_PERF_CTR0: 146 case MSR_AMD_F15H_PERF_CTR1: 147 case MSR_AMD_F15H_PERF_CTR2: 148 case MSR_AMD_F15H_PERF_CTR3: 149 case MSR_AMD_F15H_PERF_CTR4: 150 case MSR_AMD_F15H_PERF_CTR5: 151 return ((msr - MSR_AMD_F15H_PERF_CTR0) / 2); 152 default: 153 panic("unexpected perf. counter MSR: %X", msr); 154 } 155 } 156 157 bool 158 svm_pmu_owned_msr(uint32_t msr) 159 { 160 return (svm_pmu_is_evt_msr(msr) || svm_pmu_is_ctr_msr(msr)); 161 } 162 163 /* 164 * Is guest access to a given evtsel allowed for the "flavor" of the PMU? 165 * 166 * Initial access is fairly limited, providing access to only the evtsels 167 * expected to be used by Linux `perf stat`. 168 */ 169 static bool 170 svm_pmu_evtsel_allowed(uint64_t evtsel, svm_pmu_flavor_t flavor) 171 { 172 const uint64_t evt = evtsel & AMD_PERF_EVTSEL_EVT_MASK; 173 const uint16_t umask = evtsel & AMD_PERF_EVTSEL_UNIT_MASK; 174 175 /* 176 * Some of the perf counters have stayed fairly consistent in their 177 * identifiers throughout the AMD product line. 178 */ 179 switch (evt) { 180 case 0x76: /* CPU cycles */ 181 case 0xc0: /* Retired instructions */ 182 case 0xc2: /* Branch instructions */ 183 case 0xc3: /* Branch misses */ 184 return (true); 185 default: 186 break; 187 } 188 189 if (flavor == SPF_PRE_ZEN) { 190 switch (evt) { 191 case 0x7d: /* Cache hits */ 192 case 0x7e: /* Cache misses */ 193 return (true); 194 default: 195 return (false); 196 } 197 } else if (flavor == SPF_ZEN1) { 198 switch (evt) { 199 case 0x60: /* L2 accesses (group 1) */ 200 case 0x64: /* Core to L2 access status */ 201 return (true); 202 case 0x87: /* IC fetch stall */ 203 switch (umask) { 204 case 0x0100: /* backend */ 205 case 0x0200: /* frontend */ 206 return (true); 207 default: 208 return (false); 209 } 210 default: 211 return (false); 212 } 213 } else if (flavor == SPF_ZEN2) { 214 switch (evt) { 215 case 0x60: /* L2 accesses (group 1) */ 216 case 0x64: /* Core to L2 access status */ 217 case 0xa9: /* u-op queue empty (frontend stall) */ 218 return (true); 219 default: 220 return (false); 221 } 222 } 223 224 return (false); 225 } 226 227 vm_msr_result_t 228 svm_pmu_rdmsr(struct svm_softc *svm_sc, int vcpu, uint32_t msr, uint64_t *valp) 229 { 230 ASSERT(svm_pmu_owned_msr(msr)); 231 232 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu); 233 234 if (!svm_pmu_is_active(pmu)) { 235 return (VMR_UNHANLDED); 236 } 237 238 if (svm_pmu_is_evt_msr(msr)) { 239 const uint_t idx = svm_pmu_msr_to_idx(msr); 240 241 *valp = pmu->spv_evtsel_shadow[idx]; 242 } else if (svm_pmu_is_ctr_msr(msr)) { 243 const uint_t idx = svm_pmu_msr_to_idx(msr); 244 245 *valp = pmu->spv_hma_state.hscs_regs[idx].hc_ctr; 246 } else { 247 /* UNREACHABLE */ 248 return (VMR_UNHANLDED); 249 } 250 251 return (VMR_OK); 252 } 253 254 vm_msr_result_t 255 svm_pmu_wrmsr(struct svm_softc *svm_sc, int vcpu, uint32_t msr, uint64_t val) 256 { 257 ASSERT(svm_pmu_owned_msr(msr)); 258 259 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu); 260 const svm_pmu_flavor_t flavor = svm_sc->pmu_flavor; 261 262 if (!svm_pmu_is_active(pmu)) { 263 return (VMR_UNHANLDED); 264 } 265 266 if (svm_pmu_is_evt_msr(msr)) { 267 const uint_t idx = svm_pmu_msr_to_idx(msr); 268 269 /* 270 * Keep the unmodified evtsel shadowed, should the guest choose 271 * to read it out later. 272 * 273 * XXX: Should we balk at reserved bits being set? 274 */ 275 pmu->spv_evtsel_shadow[idx] = val; 276 277 if (!svm_pmu_evtsel_allowed(val, flavor)) { 278 /* 279 * Disable any counters which have been configured with 280 * an event selector which we do not allow access to. 281 */ 282 val = 0; 283 } 284 pmu->spv_hma_state.hscs_regs[idx].hc_evtsel = val; 285 } else if (svm_pmu_is_ctr_msr(msr)) { 286 const uint_t idx = svm_pmu_msr_to_idx(msr); 287 288 pmu->spv_hma_state.hscs_regs[idx].hc_ctr = val; 289 } else { 290 /* UNREACHABLE */ 291 return (VMR_UNHANLDED); 292 } 293 294 return (VMR_OK); 295 } 296 297 bool 298 svm_pmu_rdpmc(struct svm_softc *svm_sc, int vcpu, uint32_t ecx, uint64_t *valp) 299 { 300 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu); 301 302 if (!svm_pmu_is_active(pmu)) { 303 return (false); 304 } 305 if (ecx >= SVM_PMU_MAX_COUNTERS) { 306 return (false); 307 } 308 309 *valp = pmu->spv_hma_state.hscs_regs[ecx].hc_ctr; 310 return (true); 311 } 312 313 /* 314 * Attempt to load guest PMU state, if the guest vCPU happens to be actively 315 * using any counters. Host state will be saved if such loading occurs. 316 * 317 * The results of any state loading may require adjustment of guest intercepts 318 * and thus demands a call to svm_apply_dirty() prior to VM entry. 319 */ 320 void 321 svm_pmu_enter(struct svm_softc *svm_sc, int vcpu) 322 { 323 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu); 324 325 if (!svm_pmu_is_active(pmu)) { 326 return; 327 } 328 329 hma_svm_cpc_res_t entry = hma_svm_cpc_enter(&pmu->spv_hma_state); 330 331 /* 332 * Until per-vCPU MSR bitmaps are available, ignore ability to expose 333 * direct guest access to counter MSRs 334 */ 335 entry &= ~HSCR_ACCESS_CTR_MSR; 336 337 if (entry != pmu->spv_last_entry) { 338 /* Update intercepts to match what is allowed per HMA. */ 339 if (entry & HSCR_ACCESS_RDPMC && svm_pmu_force_exit == 0) { 340 svm_disable_intercept(svm_sc, vcpu, VMCB_CTRL1_INTCPT, 341 VMCB_INTCPT_RDPMC); 342 } else { 343 svm_enable_intercept(svm_sc, vcpu, VMCB_CTRL1_INTCPT, 344 VMCB_INTCPT_RDPMC); 345 } 346 } 347 pmu->spv_last_entry = entry; 348 } 349 350 /* 351 * If guest PMU state is active, save it, and restore the host state. 352 */ 353 void 354 svm_pmu_exit(struct svm_softc *svm_sc, int vcpu) 355 { 356 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpu); 357 358 if (!svm_pmu_is_active(pmu)) { 359 return; 360 } 361 362 hma_svm_cpc_exit(&pmu->spv_hma_state); 363 } 364 365 static int 366 svm_pmu_data_read(struct vm *vm, int vcpuid, const vmm_data_req_t *req) 367 { 368 VERIFY3U(req->vdr_class, ==, VDC_PMU_AMD); 369 VERIFY3U(req->vdr_version, ==, 1); 370 VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_pmu_amd_v1)); 371 372 struct svm_softc *svm_sc = vm_get_cookie(vm); 373 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpuid); 374 struct vdi_pmu_amd_v1 *out = req->vdr_data; 375 376 if (!svm_pmu_is_active(pmu)) { 377 bzero(out, sizeof (out)); 378 return (0); 379 } 380 381 for (uint_t i = 0; i < SVM_PMU_MAX_COUNTERS; i++) { 382 out->vpa_evtsel[i] = pmu->spv_evtsel_shadow[i]; 383 out->vpa_ctr[i] = pmu->spv_hma_state.hscs_regs[i].hc_ctr; 384 } 385 return (0); 386 } 387 388 static int 389 svm_pmu_data_write(struct vm *vm, int vcpuid, const vmm_data_req_t *req) 390 { 391 VERIFY3U(req->vdr_class, ==, VDC_PMU_AMD); 392 VERIFY3U(req->vdr_version, ==, 1); 393 VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_pmu_amd_v1)); 394 395 struct svm_softc *svm_sc = vm_get_cookie(vm); 396 struct svm_pmu_vcpu *pmu = svm_get_pmu(svm_sc, vcpuid); 397 const struct vdi_pmu_amd_v1 *src = req->vdr_data; 398 399 if (!svm_pmu_is_active(pmu)) { 400 /* 401 * Skip importing state for an inactive PMU. 402 * 403 * It might be appropriate to return an error here, but it's not 404 * clear what would be most appropriate (or what userspace would 405 * do in such a case). 406 */ 407 return (0); 408 } 409 410 const svm_pmu_flavor_t flavor = svm_sc->pmu_flavor; 411 for (uint_t i = 0; i < SVM_PMU_MAX_COUNTERS; i++) { 412 const uint64_t evtsel = src->vpa_evtsel[i]; 413 414 /* 415 * Shadow evtsel is kept as-is, but the "active" value undergoes 416 * same verification as guest WRMSR. 417 */ 418 pmu->spv_evtsel_shadow[i] = evtsel; 419 if (svm_pmu_evtsel_allowed(evtsel, flavor)) { 420 pmu->spv_hma_state.hscs_regs[i].hc_evtsel = evtsel; 421 } else { 422 pmu->spv_hma_state.hscs_regs[i].hc_evtsel = 0; 423 } 424 pmu->spv_hma_state.hscs_regs[i].hc_ctr = src->vpa_ctr[i]; 425 } 426 return (0); 427 } 428 429 static const vmm_data_version_entry_t pmu_amd_v1 = { 430 .vdve_class = VDC_PMU_AMD, 431 .vdve_version = 1, 432 .vdve_len_expect = sizeof (struct vdi_pmu_amd_v1), 433 .vdve_vcpu_readf = svm_pmu_data_read, 434 .vdve_vcpu_writef = svm_pmu_data_write, 435 }; 436 VMM_DATA_VERSION(pmu_amd_v1); 437