1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2023, Tencent, Inc. 4 */ 5 #include <x86intrin.h> 6 7 #include "pmu.h" 8 #include "processor.h" 9 10 /* Number of iterations of the loop for the guest measurement payload. */ 11 #define NUM_LOOPS 10 12 13 /* Each iteration of the loop retires one branch instruction. */ 14 #define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS) 15 16 /* 17 * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, 18 * 1 LOOP. 19 */ 20 #define NUM_INSNS_PER_LOOP 4 21 22 /* 23 * Number of "extra" instructions that will be counted, i.e. the number of 24 * instructions that are needed to set up the loop and then disable the 25 * counter. 2 MOV, 2 XOR, 1 WRMSR. 26 */ 27 #define NUM_EXTRA_INSNS 5 28 29 /* Total number of instructions retired within the measured section. */ 30 #define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS) 31 32 /* Track which architectural events are supported by hardware. */ 33 static uint32_t hardware_pmu_arch_events; 34 35 static uint8_t kvm_pmu_version; 36 static bool kvm_has_perf_caps; 37 38 #define X86_PMU_FEATURE_NULL \ 39 ({ \ 40 struct kvm_x86_pmu_feature feature = {}; \ 41 \ 42 feature; \ 43 }) 44 45 static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event) 46 { 47 return !(*(u64 *)&event); 48 } 49 50 struct kvm_intel_pmu_event { 51 struct kvm_x86_pmu_feature gp_event; 52 struct kvm_x86_pmu_feature fixed_event; 53 }; 54 55 /* 56 * Wrap the array to appease the compiler, as the macros used to construct each 57 * kvm_x86_pmu_feature use syntax that's only valid in function scope, and the 58 * compiler often thinks the feature definitions aren't compile-time constants. 59 */ 60 static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx) 61 { 62 const struct kvm_intel_pmu_event __intel_event_to_feature[] = { 63 [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED }, 64 [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED }, 65 /* 66 * Note, the fixed counter for reference cycles is NOT the same as the 67 * general purpose architectural event. The fixed counter explicitly 68 * counts at the same frequency as the TSC, whereas the GP event counts 69 * at a fixed, but uarch specific, frequency. Bundle them here for 70 * simplicity. 71 */ 72 [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED }, 73 [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL }, 74 [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL }, 75 [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL }, 76 [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL }, 77 [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED }, 78 [INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BE_BOUND, X86_PMU_FEATURE_NULL }, 79 [INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BAD_SPEC, X86_PMU_FEATURE_NULL }, 80 [INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_FE_BOUND, X86_PMU_FEATURE_NULL }, 81 [INTEL_ARCH_TOPDOWN_RETIRING_INDEX] = { X86_PMU_FEATURE_TOPDOWN_RETIRING, X86_PMU_FEATURE_NULL }, 82 [INTEL_ARCH_LBR_INSERTS_INDEX] = { X86_PMU_FEATURE_LBR_INSERTS, X86_PMU_FEATURE_NULL }, 83 }; 84 85 kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS); 86 87 return __intel_event_to_feature[idx]; 88 } 89 90 static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, 91 void *guest_code, 92 uint8_t pmu_version, 93 uint64_t perf_capabilities) 94 { 95 struct kvm_vm *vm; 96 97 vm = vm_create_with_one_vcpu(vcpu, guest_code); 98 sync_global_to_guest(vm, kvm_pmu_version); 99 sync_global_to_guest(vm, hardware_pmu_arch_events); 100 101 /* 102 * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling 103 * features via PERF_CAPABILITIES if the guest doesn't have a vPMU. 104 */ 105 if (kvm_has_perf_caps) 106 vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities); 107 108 vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version); 109 return vm; 110 } 111 112 static void run_vcpu(struct kvm_vcpu *vcpu) 113 { 114 struct ucall uc; 115 116 do { 117 vcpu_run(vcpu); 118 switch (get_ucall(vcpu, &uc)) { 119 case UCALL_SYNC: 120 break; 121 case UCALL_ABORT: 122 REPORT_GUEST_ASSERT(uc); 123 break; 124 case UCALL_PRINTF: 125 pr_info("%s", uc.buffer); 126 break; 127 case UCALL_DONE: 128 break; 129 default: 130 TEST_FAIL("Unexpected ucall: %lu", uc.cmd); 131 } 132 } while (uc.cmd != UCALL_DONE); 133 } 134 135 static uint8_t guest_get_pmu_version(void) 136 { 137 /* 138 * Return the effective PMU version, i.e. the minimum between what KVM 139 * supports and what is enumerated to the guest. The host deliberately 140 * advertises a PMU version to the guest beyond what is actually 141 * supported by KVM to verify KVM doesn't freak out and do something 142 * bizarre with an architecturally valid, but unsupported, version. 143 */ 144 return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION)); 145 } 146 147 /* 148 * If an architectural event is supported and guaranteed to generate at least 149 * one "hit, assert that its count is non-zero. If an event isn't supported or 150 * the test can't guarantee the associated action will occur, then all bets are 151 * off regarding the count, i.e. no checks can be done. 152 * 153 * Sanity check that in all cases, the event doesn't count when it's disabled, 154 * and that KVM correctly emulates the write of an arbitrary value. 155 */ 156 static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr) 157 { 158 uint64_t count; 159 160 count = _rdpmc(pmc); 161 if (!(hardware_pmu_arch_events & BIT(idx))) 162 goto sanity_checks; 163 164 switch (idx) { 165 case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX: 166 /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */ 167 if (this_pmu_has_errata(INSTRUCTIONS_RETIRED_OVERCOUNT)) 168 GUEST_ASSERT(count >= NUM_INSNS_RETIRED); 169 else 170 GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED); 171 break; 172 case INTEL_ARCH_BRANCHES_RETIRED_INDEX: 173 /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */ 174 if (this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT)) 175 GUEST_ASSERT(count >= NUM_BRANCH_INSNS_RETIRED); 176 else 177 GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED); 178 break; 179 case INTEL_ARCH_LLC_REFERENCES_INDEX: 180 case INTEL_ARCH_LLC_MISSES_INDEX: 181 if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) && 182 !this_cpu_has(X86_FEATURE_CLFLUSH)) 183 break; 184 fallthrough; 185 case INTEL_ARCH_CPU_CYCLES_INDEX: 186 case INTEL_ARCH_REFERENCE_CYCLES_INDEX: 187 case INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX: 188 case INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX: 189 GUEST_ASSERT_NE(count, 0); 190 break; 191 case INTEL_ARCH_TOPDOWN_SLOTS_INDEX: 192 case INTEL_ARCH_TOPDOWN_RETIRING_INDEX: 193 __GUEST_ASSERT(count >= NUM_INSNS_RETIRED, 194 "Expected top-down slots >= %u, got count = %lu", 195 NUM_INSNS_RETIRED, count); 196 break; 197 default: 198 break; 199 } 200 201 sanity_checks: 202 __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS})); 203 GUEST_ASSERT_EQ(_rdpmc(pmc), count); 204 205 wrmsr(pmc_msr, 0xdead); 206 GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead); 207 } 208 209 /* 210 * Enable and disable the PMC in a monolithic asm blob to ensure that the 211 * compiler can't insert _any_ code into the measured sequence. Note, ECX 212 * doesn't need to be clobbered as the input value, @pmc_msr, is restored 213 * before the end of the sequence. 214 * 215 * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the 216 * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and 217 * misses, i.e. to allow testing that those events actually count. 218 * 219 * If forced emulation is enabled (and specified), force emulation on a subset 220 * of the measured code to verify that KVM correctly emulates instructions and 221 * branches retired events in conjunction with hardware also counting said 222 * events. 223 */ 224 #define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP) \ 225 do { \ 226 __asm__ __volatile__("wrmsr\n\t" \ 227 " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \ 228 "1:\n\t" \ 229 clflush "\n\t" \ 230 "mfence\n\t" \ 231 "mov %[m], %%eax\n\t" \ 232 FEP "loop 1b\n\t" \ 233 FEP "mov %%edi, %%ecx\n\t" \ 234 FEP "xor %%eax, %%eax\n\t" \ 235 FEP "xor %%edx, %%edx\n\t" \ 236 "wrmsr\n\t" \ 237 :: "a"((uint32_t)_value), "d"(_value >> 32), \ 238 "c"(_msr), "D"(_msr), [m]"m"(kvm_pmu_version) \ 239 ); \ 240 } while (0) 241 242 #define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \ 243 do { \ 244 wrmsr(_pmc_msr, 0); \ 245 \ 246 if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \ 247 GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP); \ 248 else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \ 249 GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush %[m]", FEP); \ 250 else \ 251 GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \ 252 \ 253 guest_assert_event_count(_idx, _pmc, _pmc_msr); \ 254 } while (0) 255 256 static void __guest_test_arch_event(uint8_t idx, uint32_t pmc, uint32_t pmc_msr, 257 uint32_t ctrl_msr, uint64_t ctrl_msr_value) 258 { 259 GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, ""); 260 261 if (is_forced_emulation_enabled) 262 GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP); 263 } 264 265 static void guest_test_arch_event(uint8_t idx) 266 { 267 uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); 268 uint32_t pmu_version = guest_get_pmu_version(); 269 /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */ 270 bool guest_has_perf_global_ctrl = pmu_version >= 2; 271 struct kvm_x86_pmu_feature gp_event, fixed_event; 272 uint32_t base_pmc_msr; 273 unsigned int i; 274 275 /* The host side shouldn't invoke this without a guest PMU. */ 276 GUEST_ASSERT(pmu_version); 277 278 if (this_cpu_has(X86_FEATURE_PDCM) && 279 rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES) 280 base_pmc_msr = MSR_IA32_PMC0; 281 else 282 base_pmc_msr = MSR_IA32_PERFCTR0; 283 284 gp_event = intel_event_to_feature(idx).gp_event; 285 GUEST_ASSERT_EQ(idx, gp_event.f.bit); 286 287 GUEST_ASSERT(nr_gp_counters); 288 289 for (i = 0; i < nr_gp_counters; i++) { 290 uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS | 291 ARCH_PERFMON_EVENTSEL_ENABLE | 292 intel_pmu_arch_events[idx]; 293 294 wrmsr(MSR_P6_EVNTSEL0 + i, 0); 295 if (guest_has_perf_global_ctrl) 296 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i)); 297 298 __guest_test_arch_event(idx, i, base_pmc_msr + i, 299 MSR_P6_EVNTSEL0 + i, eventsel); 300 } 301 302 if (!guest_has_perf_global_ctrl) 303 return; 304 305 fixed_event = intel_event_to_feature(idx).fixed_event; 306 if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event)) 307 return; 308 309 i = fixed_event.f.bit; 310 311 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); 312 313 __guest_test_arch_event(idx, i | INTEL_RDPMC_FIXED, 314 MSR_CORE_PERF_FIXED_CTR0 + i, 315 MSR_CORE_PERF_GLOBAL_CTRL, 316 FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); 317 } 318 319 static void guest_test_arch_events(void) 320 { 321 uint8_t i; 322 323 for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) 324 guest_test_arch_event(i); 325 326 GUEST_DONE(); 327 } 328 329 static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities, 330 uint8_t length, uint32_t unavailable_mask) 331 { 332 struct kvm_vcpu *vcpu; 333 struct kvm_vm *vm; 334 335 /* Testing arch events requires a vPMU (there are no negative tests). */ 336 if (!pmu_version) 337 return; 338 339 unavailable_mask &= GENMASK(X86_PROPERTY_PMU_EVENTS_MASK.hi_bit, 340 X86_PROPERTY_PMU_EVENTS_MASK.lo_bit); 341 342 vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events, 343 pmu_version, perf_capabilities); 344 345 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH, 346 length); 347 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK, 348 unavailable_mask); 349 350 run_vcpu(vcpu); 351 352 kvm_vm_free(vm); 353 } 354 355 /* 356 * Limit testing to MSRs that are actually defined by Intel (in the SDM). MSRs 357 * that aren't defined counter MSRs *probably* don't exist, but there's no 358 * guarantee that currently undefined MSR indices won't be used for something 359 * other than PMCs in the future. 360 */ 361 #define MAX_NR_GP_COUNTERS 8 362 #define MAX_NR_FIXED_COUNTERS 3 363 364 #define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \ 365 __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \ 366 "Expected %s on " #insn "(0x%x), got %s", \ 367 expect_gp ? "#GP" : "no fault", msr, ex_str(vector)) \ 368 369 #define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \ 370 __GUEST_ASSERT(val == expected, \ 371 "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \ 372 msr, expected, val); 373 374 static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success, 375 uint64_t expected_val) 376 { 377 uint8_t vector; 378 uint64_t val; 379 380 vector = rdpmc_safe(rdpmc_idx, &val); 381 GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector); 382 if (expect_success) 383 GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val); 384 385 if (!is_forced_emulation_enabled) 386 return; 387 388 vector = rdpmc_safe_fep(rdpmc_idx, &val); 389 GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector); 390 if (expect_success) 391 GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val); 392 } 393 394 static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters, 395 uint8_t nr_counters, uint32_t or_mask) 396 { 397 const bool pmu_has_fast_mode = !guest_get_pmu_version(); 398 uint8_t i; 399 400 for (i = 0; i < nr_possible_counters; i++) { 401 /* 402 * TODO: Test a value that validates full-width writes and the 403 * width of the counters. 404 */ 405 const uint64_t test_val = 0xffff; 406 const uint32_t msr = base_msr + i; 407 408 /* 409 * Fixed counters are supported if the counter is less than the 410 * number of enumerated contiguous counters *or* the counter is 411 * explicitly enumerated in the supported counters mask. 412 */ 413 const bool expect_success = i < nr_counters || (or_mask & BIT(i)); 414 415 /* 416 * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are 417 * unsupported, i.e. doesn't #GP and reads back '0'. 418 */ 419 const uint64_t expected_val = expect_success ? test_val : 0; 420 const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 && 421 msr != MSR_P6_PERFCTR1; 422 uint32_t rdpmc_idx; 423 uint8_t vector; 424 uint64_t val; 425 426 vector = wrmsr_safe(msr, test_val); 427 GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector); 428 429 vector = rdmsr_safe(msr, &val); 430 GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector); 431 432 /* On #GP, the result of RDMSR is undefined. */ 433 if (!expect_gp) 434 GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val); 435 436 /* 437 * Redo the read tests with RDPMC, which has different indexing 438 * semantics and additional capabilities. 439 */ 440 rdpmc_idx = i; 441 if (base_msr == MSR_CORE_PERF_FIXED_CTR0) 442 rdpmc_idx |= INTEL_RDPMC_FIXED; 443 444 guest_test_rdpmc(rdpmc_idx, expect_success, expected_val); 445 446 /* 447 * KVM doesn't support non-architectural PMUs, i.e. it should 448 * impossible to have fast mode RDPMC. Verify that attempting 449 * to use fast RDPMC always #GPs. 450 */ 451 GUEST_ASSERT(!expect_success || !pmu_has_fast_mode); 452 rdpmc_idx |= INTEL_RDPMC_FAST; 453 guest_test_rdpmc(rdpmc_idx, false, -1ull); 454 455 vector = wrmsr_safe(msr, 0); 456 GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector); 457 } 458 } 459 460 static void guest_test_gp_counters(void) 461 { 462 uint8_t pmu_version = guest_get_pmu_version(); 463 uint8_t nr_gp_counters = 0; 464 uint32_t base_msr; 465 466 if (pmu_version) 467 nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); 468 469 /* 470 * For v2+ PMUs, PERF_GLOBAL_CTRL's architectural post-RESET value is 471 * "Sets bits n-1:0 and clears the upper bits", where 'n' is the number 472 * of GP counters. If there are no GP counters, require KVM to leave 473 * PERF_GLOBAL_CTRL '0'. This edge case isn't covered by the SDM, but 474 * follow the spirit of the architecture and only globally enable GP 475 * counters, of which there are none. 476 */ 477 if (pmu_version > 1) { 478 uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL); 479 480 if (nr_gp_counters) 481 GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0)); 482 else 483 GUEST_ASSERT_EQ(global_ctrl, 0); 484 } 485 486 if (this_cpu_has(X86_FEATURE_PDCM) && 487 rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES) 488 base_msr = MSR_IA32_PMC0; 489 else 490 base_msr = MSR_IA32_PERFCTR0; 491 492 guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0); 493 GUEST_DONE(); 494 } 495 496 static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities, 497 uint8_t nr_gp_counters) 498 { 499 struct kvm_vcpu *vcpu; 500 struct kvm_vm *vm; 501 502 vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters, 503 pmu_version, perf_capabilities); 504 505 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS, 506 nr_gp_counters); 507 508 run_vcpu(vcpu); 509 510 kvm_vm_free(vm); 511 } 512 513 static void guest_test_fixed_counters(void) 514 { 515 uint64_t supported_bitmask = 0; 516 uint8_t nr_fixed_counters = 0; 517 uint8_t i; 518 519 /* Fixed counters require Architectural vPMU Version 2+. */ 520 if (guest_get_pmu_version() >= 2) 521 nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); 522 523 /* 524 * The supported bitmask for fixed counters was introduced in PMU 525 * version 5. 526 */ 527 if (guest_get_pmu_version() >= 5) 528 supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK); 529 530 guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS, 531 nr_fixed_counters, supported_bitmask); 532 533 for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) { 534 uint8_t vector; 535 uint64_t val; 536 537 if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) { 538 vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL, 539 FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); 540 __GUEST_ASSERT(vector == GP_VECTOR, 541 "Expected #GP for counter %u in FIXED_CTR_CTRL", i); 542 543 vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL, 544 FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); 545 __GUEST_ASSERT(vector == GP_VECTOR, 546 "Expected #GP for counter %u in PERF_GLOBAL_CTRL", i); 547 continue; 548 } 549 550 wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0); 551 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); 552 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); 553 __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS})); 554 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); 555 val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i); 556 557 GUEST_ASSERT_NE(val, 0); 558 } 559 GUEST_DONE(); 560 } 561 562 static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities, 563 uint8_t nr_fixed_counters, 564 uint32_t supported_bitmask) 565 { 566 struct kvm_vcpu *vcpu; 567 struct kvm_vm *vm; 568 569 vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters, 570 pmu_version, perf_capabilities); 571 572 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK, 573 supported_bitmask); 574 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS, 575 nr_fixed_counters); 576 577 run_vcpu(vcpu); 578 579 kvm_vm_free(vm); 580 } 581 582 static void test_intel_counters(void) 583 { 584 uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); 585 uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); 586 uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); 587 unsigned int i; 588 uint8_t v, j; 589 uint32_t k; 590 591 const uint64_t perf_caps[] = { 592 0, 593 PMU_CAP_FW_WRITES, 594 }; 595 596 /* 597 * To keep the total runtime reasonable, test only a handful of select, 598 * semi-arbitrary values for the mask of unavailable PMU events. Test 599 * 0 (all events available) and all ones (no events available) as well 600 * as alternating bit sequencues, e.g. to detect if KVM is checking the 601 * wrong bit(s). 602 */ 603 const uint32_t unavailable_masks[] = { 604 0x0, 605 0xffffffffu, 606 0xaaaaaaaau, 607 0x55555555u, 608 0xf0f0f0f0u, 609 0x0f0f0f0fu, 610 0xa0a0a0a0u, 611 0x0a0a0a0au, 612 0x50505050u, 613 0x05050505u, 614 }; 615 616 /* 617 * Test up to PMU v5, which is the current maximum version defined by 618 * Intel, i.e. is the last version that is guaranteed to be backwards 619 * compatible with KVM's existing behavior. 620 */ 621 uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5); 622 623 /* 624 * Detect the existence of events that aren't supported by selftests. 625 * This will (obviously) fail any time hardware adds support for a new 626 * event, but it's worth paying that price to keep the test fresh. 627 */ 628 TEST_ASSERT(this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) <= NR_INTEL_ARCH_EVENTS, 629 "New architectural event(s) detected; please update this test (length = %u, mask = %x)", 630 this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH), 631 this_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK)); 632 633 /* 634 * Iterate over known arch events irrespective of KVM/hardware support 635 * to verify that KVM doesn't reject programming of events just because 636 * the *architectural* encoding is unsupported. Track which events are 637 * supported in hardware; the guest side will validate supported events 638 * count correctly, even if *enumeration* of the event is unsupported 639 * by KVM and/or isn't exposed to the guest. 640 */ 641 for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) { 642 if (this_pmu_has(intel_event_to_feature(i).gp_event)) 643 hardware_pmu_arch_events |= BIT(i); 644 } 645 646 for (v = 0; v <= max_pmu_version; v++) { 647 for (i = 0; i < ARRAY_SIZE(perf_caps); i++) { 648 if (!kvm_has_perf_caps && perf_caps[i]) 649 continue; 650 651 pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n", 652 v, perf_caps[i]); 653 654 /* 655 * Test single bits for all PMU version and lengths up 656 * the number of events +1 (to verify KVM doesn't do 657 * weird things if the guest length is greater than the 658 * host length). Explicitly test a mask of '0' and all 659 * ones i.e. all events being available and unavailable. 660 */ 661 for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) { 662 for (k = 1; k < ARRAY_SIZE(unavailable_masks); k++) 663 test_arch_events(v, perf_caps[i], j, unavailable_masks[k]); 664 } 665 666 pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n", 667 v, perf_caps[i]); 668 for (j = 0; j <= nr_gp_counters; j++) 669 test_gp_counters(v, perf_caps[i], j); 670 671 pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n", 672 v, perf_caps[i]); 673 for (j = 0; j <= nr_fixed_counters; j++) { 674 for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++) 675 test_fixed_counters(v, perf_caps[i], j, k); 676 } 677 } 678 } 679 } 680 681 int main(int argc, char *argv[]) 682 { 683 TEST_REQUIRE(kvm_is_pmu_enabled()); 684 685 TEST_REQUIRE(host_cpu_is_intel); 686 TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION)); 687 TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0); 688 689 kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); 690 kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM); 691 692 test_intel_counters(); 693 694 return 0; 695 } 696