1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2023, Tencent, Inc. 4 */ 5 #include <x86intrin.h> 6 7 #include "pmu.h" 8 #include "processor.h" 9 10 /* Number of iterations of the loop for the guest measurement payload. */ 11 #define NUM_LOOPS 10 12 13 /* Each iteration of the loop retires one branch instruction. */ 14 #define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS) 15 16 /* 17 * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, 18 * 1 LOOP. 19 */ 20 #define NUM_INSNS_PER_LOOP 4 21 22 /* 23 * Number of "extra" instructions that will be counted, i.e. the number of 24 * instructions that are needed to set up the loop and then disable the 25 * counter. 2 MOV, 2 XOR, 1 WRMSR. 26 */ 27 #define NUM_EXTRA_INSNS 5 28 29 /* Total number of instructions retired within the measured section. */ 30 #define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS) 31 32 /* Track which architectural events are supported by hardware. */ 33 static uint32_t hardware_pmu_arch_events; 34 35 static uint8_t kvm_pmu_version; 36 static bool kvm_has_perf_caps; 37 38 #define X86_PMU_FEATURE_NULL \ 39 ({ \ 40 struct kvm_x86_pmu_feature feature = {}; \ 41 \ 42 feature; \ 43 }) 44 45 static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event) 46 { 47 return !(*(u64 *)&event); 48 } 49 50 struct kvm_intel_pmu_event { 51 struct kvm_x86_pmu_feature gp_event; 52 struct kvm_x86_pmu_feature fixed_event; 53 }; 54 55 /* 56 * Wrap the array to appease the compiler, as the macros used to construct each 57 * kvm_x86_pmu_feature use syntax that's only valid in function scope, and the 58 * compiler often thinks the feature definitions aren't compile-time constants. 59 */ 60 static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx) 61 { 62 const struct kvm_intel_pmu_event __intel_event_to_feature[] = { 63 [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED }, 64 [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED }, 65 /* 66 * Note, the fixed counter for reference cycles is NOT the same as the 67 * general purpose architectural event. The fixed counter explicitly 68 * counts at the same frequency as the TSC, whereas the GP event counts 69 * at a fixed, but uarch specific, frequency. Bundle them here for 70 * simplicity. 71 */ 72 [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED }, 73 [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL }, 74 [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL }, 75 [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL }, 76 [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL }, 77 [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED }, 78 [INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BE_BOUND, X86_PMU_FEATURE_NULL }, 79 [INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BAD_SPEC, X86_PMU_FEATURE_NULL }, 80 [INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_FE_BOUND, X86_PMU_FEATURE_NULL }, 81 [INTEL_ARCH_TOPDOWN_RETIRING_INDEX] = { X86_PMU_FEATURE_TOPDOWN_RETIRING, X86_PMU_FEATURE_NULL }, 82 [INTEL_ARCH_LBR_INSERTS_INDEX] = { X86_PMU_FEATURE_LBR_INSERTS, X86_PMU_FEATURE_NULL }, 83 }; 84 85 kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS); 86 87 return __intel_event_to_feature[idx]; 88 } 89 90 static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, 91 void *guest_code, 92 uint8_t pmu_version, 93 uint64_t perf_capabilities) 94 { 95 struct kvm_vm *vm; 96 97 vm = vm_create_with_one_vcpu(vcpu, guest_code); 98 sync_global_to_guest(vm, kvm_pmu_version); 99 sync_global_to_guest(vm, hardware_pmu_arch_events); 100 101 /* 102 * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling 103 * features via PERF_CAPABILITIES if the guest doesn't have a vPMU. 104 */ 105 if (kvm_has_perf_caps) 106 vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities); 107 108 vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version); 109 return vm; 110 } 111 112 static void run_vcpu(struct kvm_vcpu *vcpu) 113 { 114 struct ucall uc; 115 116 do { 117 vcpu_run(vcpu); 118 switch (get_ucall(vcpu, &uc)) { 119 case UCALL_SYNC: 120 break; 121 case UCALL_ABORT: 122 REPORT_GUEST_ASSERT(uc); 123 break; 124 case UCALL_PRINTF: 125 pr_info("%s", uc.buffer); 126 break; 127 case UCALL_DONE: 128 break; 129 default: 130 TEST_FAIL("Unexpected ucall: %lu", uc.cmd); 131 } 132 } while (uc.cmd != UCALL_DONE); 133 } 134 135 static uint8_t guest_get_pmu_version(void) 136 { 137 /* 138 * Return the effective PMU version, i.e. the minimum between what KVM 139 * supports and what is enumerated to the guest. The host deliberately 140 * advertises a PMU version to the guest beyond what is actually 141 * supported by KVM to verify KVM doesn't freak out and do something 142 * bizarre with an architecturally valid, but unsupported, version. 143 */ 144 return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION)); 145 } 146 147 /* 148 * If an architectural event is supported and guaranteed to generate at least 149 * one "hit, assert that its count is non-zero. If an event isn't supported or 150 * the test can't guarantee the associated action will occur, then all bets are 151 * off regarding the count, i.e. no checks can be done. 152 * 153 * Sanity check that in all cases, the event doesn't count when it's disabled, 154 * and that KVM correctly emulates the write of an arbitrary value. 155 */ 156 static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr) 157 { 158 uint64_t count; 159 160 count = _rdpmc(pmc); 161 if (!(hardware_pmu_arch_events & BIT(idx))) 162 goto sanity_checks; 163 164 switch (idx) { 165 case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX: 166 GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED); 167 break; 168 case INTEL_ARCH_BRANCHES_RETIRED_INDEX: 169 GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED); 170 break; 171 case INTEL_ARCH_LLC_REFERENCES_INDEX: 172 case INTEL_ARCH_LLC_MISSES_INDEX: 173 if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) && 174 !this_cpu_has(X86_FEATURE_CLFLUSH)) 175 break; 176 fallthrough; 177 case INTEL_ARCH_CPU_CYCLES_INDEX: 178 case INTEL_ARCH_REFERENCE_CYCLES_INDEX: 179 case INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX: 180 case INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX: 181 GUEST_ASSERT_NE(count, 0); 182 break; 183 case INTEL_ARCH_TOPDOWN_SLOTS_INDEX: 184 case INTEL_ARCH_TOPDOWN_RETIRING_INDEX: 185 __GUEST_ASSERT(count >= NUM_INSNS_RETIRED, 186 "Expected top-down slots >= %u, got count = %lu", 187 NUM_INSNS_RETIRED, count); 188 break; 189 default: 190 break; 191 } 192 193 sanity_checks: 194 __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS})); 195 GUEST_ASSERT_EQ(_rdpmc(pmc), count); 196 197 wrmsr(pmc_msr, 0xdead); 198 GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead); 199 } 200 201 /* 202 * Enable and disable the PMC in a monolithic asm blob to ensure that the 203 * compiler can't insert _any_ code into the measured sequence. Note, ECX 204 * doesn't need to be clobbered as the input value, @pmc_msr, is restored 205 * before the end of the sequence. 206 * 207 * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the 208 * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and 209 * misses, i.e. to allow testing that those events actually count. 210 * 211 * If forced emulation is enabled (and specified), force emulation on a subset 212 * of the measured code to verify that KVM correctly emulates instructions and 213 * branches retired events in conjunction with hardware also counting said 214 * events. 215 */ 216 #define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP) \ 217 do { \ 218 __asm__ __volatile__("wrmsr\n\t" \ 219 " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \ 220 "1:\n\t" \ 221 clflush "\n\t" \ 222 "mfence\n\t" \ 223 "mov %[m], %%eax\n\t" \ 224 FEP "loop 1b\n\t" \ 225 FEP "mov %%edi, %%ecx\n\t" \ 226 FEP "xor %%eax, %%eax\n\t" \ 227 FEP "xor %%edx, %%edx\n\t" \ 228 "wrmsr\n\t" \ 229 :: "a"((uint32_t)_value), "d"(_value >> 32), \ 230 "c"(_msr), "D"(_msr), [m]"m"(kvm_pmu_version) \ 231 ); \ 232 } while (0) 233 234 #define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \ 235 do { \ 236 wrmsr(_pmc_msr, 0); \ 237 \ 238 if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \ 239 GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP); \ 240 else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \ 241 GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush %[m]", FEP); \ 242 else \ 243 GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \ 244 \ 245 guest_assert_event_count(_idx, _pmc, _pmc_msr); \ 246 } while (0) 247 248 static void __guest_test_arch_event(uint8_t idx, uint32_t pmc, uint32_t pmc_msr, 249 uint32_t ctrl_msr, uint64_t ctrl_msr_value) 250 { 251 GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, ""); 252 253 if (is_forced_emulation_enabled) 254 GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP); 255 } 256 257 static void guest_test_arch_event(uint8_t idx) 258 { 259 uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); 260 uint32_t pmu_version = guest_get_pmu_version(); 261 /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */ 262 bool guest_has_perf_global_ctrl = pmu_version >= 2; 263 struct kvm_x86_pmu_feature gp_event, fixed_event; 264 uint32_t base_pmc_msr; 265 unsigned int i; 266 267 /* The host side shouldn't invoke this without a guest PMU. */ 268 GUEST_ASSERT(pmu_version); 269 270 if (this_cpu_has(X86_FEATURE_PDCM) && 271 rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES) 272 base_pmc_msr = MSR_IA32_PMC0; 273 else 274 base_pmc_msr = MSR_IA32_PERFCTR0; 275 276 gp_event = intel_event_to_feature(idx).gp_event; 277 GUEST_ASSERT_EQ(idx, gp_event.f.bit); 278 279 GUEST_ASSERT(nr_gp_counters); 280 281 for (i = 0; i < nr_gp_counters; i++) { 282 uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS | 283 ARCH_PERFMON_EVENTSEL_ENABLE | 284 intel_pmu_arch_events[idx]; 285 286 wrmsr(MSR_P6_EVNTSEL0 + i, 0); 287 if (guest_has_perf_global_ctrl) 288 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i)); 289 290 __guest_test_arch_event(idx, i, base_pmc_msr + i, 291 MSR_P6_EVNTSEL0 + i, eventsel); 292 } 293 294 if (!guest_has_perf_global_ctrl) 295 return; 296 297 fixed_event = intel_event_to_feature(idx).fixed_event; 298 if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event)) 299 return; 300 301 i = fixed_event.f.bit; 302 303 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); 304 305 __guest_test_arch_event(idx, i | INTEL_RDPMC_FIXED, 306 MSR_CORE_PERF_FIXED_CTR0 + i, 307 MSR_CORE_PERF_GLOBAL_CTRL, 308 FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); 309 } 310 311 static void guest_test_arch_events(void) 312 { 313 uint8_t i; 314 315 for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) 316 guest_test_arch_event(i); 317 318 GUEST_DONE(); 319 } 320 321 static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities, 322 uint8_t length, uint32_t unavailable_mask) 323 { 324 struct kvm_vcpu *vcpu; 325 struct kvm_vm *vm; 326 327 /* Testing arch events requires a vPMU (there are no negative tests). */ 328 if (!pmu_version) 329 return; 330 331 unavailable_mask &= GENMASK(X86_PROPERTY_PMU_EVENTS_MASK.hi_bit, 332 X86_PROPERTY_PMU_EVENTS_MASK.lo_bit); 333 334 vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events, 335 pmu_version, perf_capabilities); 336 337 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH, 338 length); 339 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK, 340 unavailable_mask); 341 342 run_vcpu(vcpu); 343 344 kvm_vm_free(vm); 345 } 346 347 /* 348 * Limit testing to MSRs that are actually defined by Intel (in the SDM). MSRs 349 * that aren't defined counter MSRs *probably* don't exist, but there's no 350 * guarantee that currently undefined MSR indices won't be used for something 351 * other than PMCs in the future. 352 */ 353 #define MAX_NR_GP_COUNTERS 8 354 #define MAX_NR_FIXED_COUNTERS 3 355 356 #define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \ 357 __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \ 358 "Expected %s on " #insn "(0x%x), got vector %u", \ 359 expect_gp ? "#GP" : "no fault", msr, vector) \ 360 361 #define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \ 362 __GUEST_ASSERT(val == expected, \ 363 "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \ 364 msr, expected, val); 365 366 static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success, 367 uint64_t expected_val) 368 { 369 uint8_t vector; 370 uint64_t val; 371 372 vector = rdpmc_safe(rdpmc_idx, &val); 373 GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector); 374 if (expect_success) 375 GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val); 376 377 if (!is_forced_emulation_enabled) 378 return; 379 380 vector = rdpmc_safe_fep(rdpmc_idx, &val); 381 GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector); 382 if (expect_success) 383 GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val); 384 } 385 386 static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters, 387 uint8_t nr_counters, uint32_t or_mask) 388 { 389 const bool pmu_has_fast_mode = !guest_get_pmu_version(); 390 uint8_t i; 391 392 for (i = 0; i < nr_possible_counters; i++) { 393 /* 394 * TODO: Test a value that validates full-width writes and the 395 * width of the counters. 396 */ 397 const uint64_t test_val = 0xffff; 398 const uint32_t msr = base_msr + i; 399 400 /* 401 * Fixed counters are supported if the counter is less than the 402 * number of enumerated contiguous counters *or* the counter is 403 * explicitly enumerated in the supported counters mask. 404 */ 405 const bool expect_success = i < nr_counters || (or_mask & BIT(i)); 406 407 /* 408 * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are 409 * unsupported, i.e. doesn't #GP and reads back '0'. 410 */ 411 const uint64_t expected_val = expect_success ? test_val : 0; 412 const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 && 413 msr != MSR_P6_PERFCTR1; 414 uint32_t rdpmc_idx; 415 uint8_t vector; 416 uint64_t val; 417 418 vector = wrmsr_safe(msr, test_val); 419 GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector); 420 421 vector = rdmsr_safe(msr, &val); 422 GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector); 423 424 /* On #GP, the result of RDMSR is undefined. */ 425 if (!expect_gp) 426 GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val); 427 428 /* 429 * Redo the read tests with RDPMC, which has different indexing 430 * semantics and additional capabilities. 431 */ 432 rdpmc_idx = i; 433 if (base_msr == MSR_CORE_PERF_FIXED_CTR0) 434 rdpmc_idx |= INTEL_RDPMC_FIXED; 435 436 guest_test_rdpmc(rdpmc_idx, expect_success, expected_val); 437 438 /* 439 * KVM doesn't support non-architectural PMUs, i.e. it should 440 * impossible to have fast mode RDPMC. Verify that attempting 441 * to use fast RDPMC always #GPs. 442 */ 443 GUEST_ASSERT(!expect_success || !pmu_has_fast_mode); 444 rdpmc_idx |= INTEL_RDPMC_FAST; 445 guest_test_rdpmc(rdpmc_idx, false, -1ull); 446 447 vector = wrmsr_safe(msr, 0); 448 GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector); 449 } 450 } 451 452 static void guest_test_gp_counters(void) 453 { 454 uint8_t pmu_version = guest_get_pmu_version(); 455 uint8_t nr_gp_counters = 0; 456 uint32_t base_msr; 457 458 if (pmu_version) 459 nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); 460 461 /* 462 * For v2+ PMUs, PERF_GLOBAL_CTRL's architectural post-RESET value is 463 * "Sets bits n-1:0 and clears the upper bits", where 'n' is the number 464 * of GP counters. If there are no GP counters, require KVM to leave 465 * PERF_GLOBAL_CTRL '0'. This edge case isn't covered by the SDM, but 466 * follow the spirit of the architecture and only globally enable GP 467 * counters, of which there are none. 468 */ 469 if (pmu_version > 1) { 470 uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL); 471 472 if (nr_gp_counters) 473 GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0)); 474 else 475 GUEST_ASSERT_EQ(global_ctrl, 0); 476 } 477 478 if (this_cpu_has(X86_FEATURE_PDCM) && 479 rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES) 480 base_msr = MSR_IA32_PMC0; 481 else 482 base_msr = MSR_IA32_PERFCTR0; 483 484 guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0); 485 GUEST_DONE(); 486 } 487 488 static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities, 489 uint8_t nr_gp_counters) 490 { 491 struct kvm_vcpu *vcpu; 492 struct kvm_vm *vm; 493 494 vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters, 495 pmu_version, perf_capabilities); 496 497 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS, 498 nr_gp_counters); 499 500 run_vcpu(vcpu); 501 502 kvm_vm_free(vm); 503 } 504 505 static void guest_test_fixed_counters(void) 506 { 507 uint64_t supported_bitmask = 0; 508 uint8_t nr_fixed_counters = 0; 509 uint8_t i; 510 511 /* Fixed counters require Architectural vPMU Version 2+. */ 512 if (guest_get_pmu_version() >= 2) 513 nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); 514 515 /* 516 * The supported bitmask for fixed counters was introduced in PMU 517 * version 5. 518 */ 519 if (guest_get_pmu_version() >= 5) 520 supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK); 521 522 guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS, 523 nr_fixed_counters, supported_bitmask); 524 525 for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) { 526 uint8_t vector; 527 uint64_t val; 528 529 if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) { 530 vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL, 531 FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); 532 __GUEST_ASSERT(vector == GP_VECTOR, 533 "Expected #GP for counter %u in FIXED_CTR_CTRL", i); 534 535 vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL, 536 FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); 537 __GUEST_ASSERT(vector == GP_VECTOR, 538 "Expected #GP for counter %u in PERF_GLOBAL_CTRL", i); 539 continue; 540 } 541 542 wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0); 543 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); 544 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); 545 __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS})); 546 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); 547 val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i); 548 549 GUEST_ASSERT_NE(val, 0); 550 } 551 GUEST_DONE(); 552 } 553 554 static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities, 555 uint8_t nr_fixed_counters, 556 uint32_t supported_bitmask) 557 { 558 struct kvm_vcpu *vcpu; 559 struct kvm_vm *vm; 560 561 vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters, 562 pmu_version, perf_capabilities); 563 564 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK, 565 supported_bitmask); 566 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS, 567 nr_fixed_counters); 568 569 run_vcpu(vcpu); 570 571 kvm_vm_free(vm); 572 } 573 574 static void test_intel_counters(void) 575 { 576 uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); 577 uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); 578 uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); 579 unsigned int i; 580 uint8_t v, j; 581 uint32_t k; 582 583 const uint64_t perf_caps[] = { 584 0, 585 PMU_CAP_FW_WRITES, 586 }; 587 588 /* 589 * To keep the total runtime reasonable, test only a handful of select, 590 * semi-arbitrary values for the mask of unavailable PMU events. Test 591 * 0 (all events available) and all ones (no events available) as well 592 * as alternating bit sequencues, e.g. to detect if KVM is checking the 593 * wrong bit(s). 594 */ 595 const uint32_t unavailable_masks[] = { 596 0x0, 597 0xffffffffu, 598 0xaaaaaaaau, 599 0x55555555u, 600 0xf0f0f0f0u, 601 0x0f0f0f0fu, 602 0xa0a0a0a0u, 603 0x0a0a0a0au, 604 0x50505050u, 605 0x05050505u, 606 }; 607 608 /* 609 * Test up to PMU v5, which is the current maximum version defined by 610 * Intel, i.e. is the last version that is guaranteed to be backwards 611 * compatible with KVM's existing behavior. 612 */ 613 uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5); 614 615 /* 616 * Detect the existence of events that aren't supported by selftests. 617 * This will (obviously) fail any time hardware adds support for a new 618 * event, but it's worth paying that price to keep the test fresh. 619 */ 620 TEST_ASSERT(this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) <= NR_INTEL_ARCH_EVENTS, 621 "New architectural event(s) detected; please update this test (length = %u, mask = %x)", 622 this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH), 623 this_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK)); 624 625 /* 626 * Iterate over known arch events irrespective of KVM/hardware support 627 * to verify that KVM doesn't reject programming of events just because 628 * the *architectural* encoding is unsupported. Track which events are 629 * supported in hardware; the guest side will validate supported events 630 * count correctly, even if *enumeration* of the event is unsupported 631 * by KVM and/or isn't exposed to the guest. 632 */ 633 for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) { 634 if (this_pmu_has(intel_event_to_feature(i).gp_event)) 635 hardware_pmu_arch_events |= BIT(i); 636 } 637 638 for (v = 0; v <= max_pmu_version; v++) { 639 for (i = 0; i < ARRAY_SIZE(perf_caps); i++) { 640 if (!kvm_has_perf_caps && perf_caps[i]) 641 continue; 642 643 pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n", 644 v, perf_caps[i]); 645 646 /* 647 * Test single bits for all PMU version and lengths up 648 * the number of events +1 (to verify KVM doesn't do 649 * weird things if the guest length is greater than the 650 * host length). Explicitly test a mask of '0' and all 651 * ones i.e. all events being available and unavailable. 652 */ 653 for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) { 654 for (k = 1; k < ARRAY_SIZE(unavailable_masks); k++) 655 test_arch_events(v, perf_caps[i], j, unavailable_masks[k]); 656 } 657 658 pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n", 659 v, perf_caps[i]); 660 for (j = 0; j <= nr_gp_counters; j++) 661 test_gp_counters(v, perf_caps[i], j); 662 663 pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n", 664 v, perf_caps[i]); 665 for (j = 0; j <= nr_fixed_counters; j++) { 666 for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++) 667 test_fixed_counters(v, perf_caps[i], j, k); 668 } 669 } 670 } 671 } 672 673 int main(int argc, char *argv[]) 674 { 675 TEST_REQUIRE(kvm_is_pmu_enabled()); 676 677 TEST_REQUIRE(host_cpu_is_intel); 678 TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION)); 679 TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0); 680 681 kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); 682 kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM); 683 684 test_intel_counters(); 685 686 return 0; 687 } 688