1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2023, Tencent, Inc. 4 */ 5 #include <x86intrin.h> 6 7 #include "pmu.h" 8 #include "processor.h" 9 10 /* Number of iterations of the loop for the guest measurement payload. */ 11 #define NUM_LOOPS 10 12 13 /* Each iteration of the loop retires one branch instruction. */ 14 #define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS) 15 16 /* 17 * Number of instructions in each loop. 1 ENTER, 1 CLFLUSH/CLFLUSHOPT/NOP, 18 * 1 MFENCE, 1 MOV, 1 LEAVE, 1 LOOP. 19 */ 20 #define NUM_INSNS_PER_LOOP 6 21 22 /* 23 * Number of "extra" instructions that will be counted, i.e. the number of 24 * instructions that are needed to set up the loop and then disable the 25 * counter. 2 MOV, 2 XOR, 1 WRMSR. 26 */ 27 #define NUM_EXTRA_INSNS 5 28 29 /* Total number of instructions retired within the measured section. */ 30 #define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS) 31 32 /* Track which architectural events are supported by hardware. */ 33 static uint32_t hardware_pmu_arch_events; 34 35 static uint8_t kvm_pmu_version; 36 static bool kvm_has_perf_caps; 37 38 #define X86_PMU_FEATURE_NULL \ 39 ({ \ 40 struct kvm_x86_pmu_feature feature = {}; \ 41 \ 42 feature; \ 43 }) 44 45 static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event) 46 { 47 return !(*(u64 *)&event); 48 } 49 50 struct kvm_intel_pmu_event { 51 struct kvm_x86_pmu_feature gp_event; 52 struct kvm_x86_pmu_feature fixed_event; 53 }; 54 55 /* 56 * Wrap the array to appease the compiler, as the macros used to construct each 57 * kvm_x86_pmu_feature use syntax that's only valid in function scope, and the 58 * compiler often thinks the feature definitions aren't compile-time constants. 59 */ 60 static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx) 61 { 62 const struct kvm_intel_pmu_event __intel_event_to_feature[] = { 63 [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED }, 64 [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED }, 65 /* 66 * Note, the fixed counter for reference cycles is NOT the same as the 67 * general purpose architectural event. The fixed counter explicitly 68 * counts at the same frequency as the TSC, whereas the GP event counts 69 * at a fixed, but uarch specific, frequency. Bundle them here for 70 * simplicity. 71 */ 72 [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED }, 73 [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL }, 74 [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL }, 75 [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL }, 76 [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL }, 77 [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED }, 78 [INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BE_BOUND, X86_PMU_FEATURE_NULL }, 79 [INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BAD_SPEC, X86_PMU_FEATURE_NULL }, 80 [INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_FE_BOUND, X86_PMU_FEATURE_NULL }, 81 [INTEL_ARCH_TOPDOWN_RETIRING_INDEX] = { X86_PMU_FEATURE_TOPDOWN_RETIRING, X86_PMU_FEATURE_NULL }, 82 [INTEL_ARCH_LBR_INSERTS_INDEX] = { X86_PMU_FEATURE_LBR_INSERTS, X86_PMU_FEATURE_NULL }, 83 }; 84 85 kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS); 86 87 return __intel_event_to_feature[idx]; 88 } 89 90 static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, 91 void *guest_code, 92 uint8_t pmu_version, 93 uint64_t perf_capabilities) 94 { 95 struct kvm_vm *vm; 96 97 vm = vm_create_with_one_vcpu(vcpu, guest_code); 98 sync_global_to_guest(vm, kvm_pmu_version); 99 sync_global_to_guest(vm, hardware_pmu_arch_events); 100 101 /* 102 * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling 103 * features via PERF_CAPABILITIES if the guest doesn't have a vPMU. 104 */ 105 if (kvm_has_perf_caps) 106 vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities); 107 108 vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version); 109 return vm; 110 } 111 112 static void run_vcpu(struct kvm_vcpu *vcpu) 113 { 114 struct ucall uc; 115 116 do { 117 vcpu_run(vcpu); 118 switch (get_ucall(vcpu, &uc)) { 119 case UCALL_SYNC: 120 break; 121 case UCALL_ABORT: 122 REPORT_GUEST_ASSERT(uc); 123 break; 124 case UCALL_PRINTF: 125 pr_info("%s", uc.buffer); 126 break; 127 case UCALL_DONE: 128 break; 129 default: 130 TEST_FAIL("Unexpected ucall: %lu", uc.cmd); 131 } 132 } while (uc.cmd != UCALL_DONE); 133 } 134 135 static uint8_t guest_get_pmu_version(void) 136 { 137 /* 138 * Return the effective PMU version, i.e. the minimum between what KVM 139 * supports and what is enumerated to the guest. The host deliberately 140 * advertises a PMU version to the guest beyond what is actually 141 * supported by KVM to verify KVM doesn't freak out and do something 142 * bizarre with an architecturally valid, but unsupported, version. 143 */ 144 return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION)); 145 } 146 147 /* 148 * If an architectural event is supported and guaranteed to generate at least 149 * one "hit, assert that its count is non-zero. If an event isn't supported or 150 * the test can't guarantee the associated action will occur, then all bets are 151 * off regarding the count, i.e. no checks can be done. 152 * 153 * Sanity check that in all cases, the event doesn't count when it's disabled, 154 * and that KVM correctly emulates the write of an arbitrary value. 155 */ 156 static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr) 157 { 158 uint64_t count; 159 160 count = _rdpmc(pmc); 161 if (!(hardware_pmu_arch_events & BIT(idx))) 162 goto sanity_checks; 163 164 switch (idx) { 165 case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX: 166 /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */ 167 if (this_pmu_has_errata(INSTRUCTIONS_RETIRED_OVERCOUNT)) 168 GUEST_ASSERT(count >= NUM_INSNS_RETIRED); 169 else 170 GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED); 171 break; 172 case INTEL_ARCH_BRANCHES_RETIRED_INDEX: 173 /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */ 174 if (this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT)) 175 GUEST_ASSERT(count >= NUM_BRANCH_INSNS_RETIRED); 176 else 177 GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED); 178 break; 179 case INTEL_ARCH_LLC_REFERENCES_INDEX: 180 case INTEL_ARCH_LLC_MISSES_INDEX: 181 if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) && 182 !this_cpu_has(X86_FEATURE_CLFLUSH)) 183 break; 184 fallthrough; 185 case INTEL_ARCH_CPU_CYCLES_INDEX: 186 case INTEL_ARCH_REFERENCE_CYCLES_INDEX: 187 case INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX: 188 case INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX: 189 GUEST_ASSERT_NE(count, 0); 190 break; 191 case INTEL_ARCH_TOPDOWN_SLOTS_INDEX: 192 case INTEL_ARCH_TOPDOWN_RETIRING_INDEX: 193 __GUEST_ASSERT(count >= NUM_INSNS_RETIRED, 194 "Expected top-down slots >= %u, got count = %lu", 195 NUM_INSNS_RETIRED, count); 196 break; 197 default: 198 break; 199 } 200 201 sanity_checks: 202 __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS})); 203 GUEST_ASSERT_EQ(_rdpmc(pmc), count); 204 205 wrmsr(pmc_msr, 0xdead); 206 GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead); 207 } 208 209 /* 210 * Enable and disable the PMC in a monolithic asm blob to ensure that the 211 * compiler can't insert _any_ code into the measured sequence. Note, ECX 212 * doesn't need to be clobbered as the input value, @pmc_msr, is restored 213 * before the end of the sequence. 214 * 215 * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the 216 * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and 217 * misses, i.e. to allow testing that those events actually count. 218 * 219 * If forced emulation is enabled (and specified), force emulation on a subset 220 * of the measured code to verify that KVM correctly emulates instructions and 221 * branches retired events in conjunction with hardware also counting said 222 * events. 223 */ 224 #define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP) \ 225 do { \ 226 __asm__ __volatile__("wrmsr\n\t" \ 227 " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \ 228 "1:\n\t" \ 229 FEP "enter $0, $0\n\t" \ 230 clflush "\n\t" \ 231 "mfence\n\t" \ 232 "mov %[m], %%eax\n\t" \ 233 FEP "leave\n\t" \ 234 FEP "loop 1b\n\t" \ 235 FEP "mov %%edi, %%ecx\n\t" \ 236 FEP "xor %%eax, %%eax\n\t" \ 237 FEP "xor %%edx, %%edx\n\t" \ 238 "wrmsr\n\t" \ 239 :: "a"((uint32_t)_value), "d"(_value >> 32), \ 240 "c"(_msr), "D"(_msr), [m]"m"(kvm_pmu_version) \ 241 ); \ 242 } while (0) 243 244 #define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \ 245 do { \ 246 wrmsr(_pmc_msr, 0); \ 247 \ 248 if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \ 249 GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP); \ 250 else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \ 251 GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush %[m]", FEP); \ 252 else \ 253 GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \ 254 \ 255 guest_assert_event_count(_idx, _pmc, _pmc_msr); \ 256 } while (0) 257 258 static void __guest_test_arch_event(uint8_t idx, uint32_t pmc, uint32_t pmc_msr, 259 uint32_t ctrl_msr, uint64_t ctrl_msr_value) 260 { 261 GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, ""); 262 263 if (is_forced_emulation_enabled) 264 GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP); 265 } 266 267 static void guest_test_arch_event(uint8_t idx) 268 { 269 uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); 270 uint32_t pmu_version = guest_get_pmu_version(); 271 /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */ 272 bool guest_has_perf_global_ctrl = pmu_version >= 2; 273 struct kvm_x86_pmu_feature gp_event, fixed_event; 274 uint32_t base_pmc_msr; 275 unsigned int i; 276 277 /* The host side shouldn't invoke this without a guest PMU. */ 278 GUEST_ASSERT(pmu_version); 279 280 if (this_cpu_has(X86_FEATURE_PDCM) && 281 rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES) 282 base_pmc_msr = MSR_IA32_PMC0; 283 else 284 base_pmc_msr = MSR_IA32_PERFCTR0; 285 286 gp_event = intel_event_to_feature(idx).gp_event; 287 GUEST_ASSERT_EQ(idx, gp_event.f.bit); 288 289 GUEST_ASSERT(nr_gp_counters); 290 291 for (i = 0; i < nr_gp_counters; i++) { 292 uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS | 293 ARCH_PERFMON_EVENTSEL_ENABLE | 294 intel_pmu_arch_events[idx]; 295 296 wrmsr(MSR_P6_EVNTSEL0 + i, 0); 297 if (guest_has_perf_global_ctrl) 298 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i)); 299 300 __guest_test_arch_event(idx, i, base_pmc_msr + i, 301 MSR_P6_EVNTSEL0 + i, eventsel); 302 } 303 304 if (!guest_has_perf_global_ctrl) 305 return; 306 307 fixed_event = intel_event_to_feature(idx).fixed_event; 308 if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event)) 309 return; 310 311 i = fixed_event.f.bit; 312 313 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); 314 315 __guest_test_arch_event(idx, i | INTEL_RDPMC_FIXED, 316 MSR_CORE_PERF_FIXED_CTR0 + i, 317 MSR_CORE_PERF_GLOBAL_CTRL, 318 FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); 319 } 320 321 static void guest_test_arch_events(void) 322 { 323 uint8_t i; 324 325 for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) 326 guest_test_arch_event(i); 327 328 GUEST_DONE(); 329 } 330 331 static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities, 332 uint8_t length, uint32_t unavailable_mask) 333 { 334 struct kvm_vcpu *vcpu; 335 struct kvm_vm *vm; 336 337 /* Testing arch events requires a vPMU (there are no negative tests). */ 338 if (!pmu_version) 339 return; 340 341 unavailable_mask &= GENMASK(X86_PROPERTY_PMU_EVENTS_MASK.hi_bit, 342 X86_PROPERTY_PMU_EVENTS_MASK.lo_bit); 343 344 vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events, 345 pmu_version, perf_capabilities); 346 347 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH, 348 length); 349 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK, 350 unavailable_mask); 351 352 run_vcpu(vcpu); 353 354 kvm_vm_free(vm); 355 } 356 357 /* 358 * Limit testing to MSRs that are actually defined by Intel (in the SDM). MSRs 359 * that aren't defined counter MSRs *probably* don't exist, but there's no 360 * guarantee that currently undefined MSR indices won't be used for something 361 * other than PMCs in the future. 362 */ 363 #define MAX_NR_GP_COUNTERS 8 364 #define MAX_NR_FIXED_COUNTERS 3 365 366 #define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \ 367 __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \ 368 "Expected %s on " #insn "(0x%x), got %s", \ 369 expect_gp ? "#GP" : "no fault", msr, ex_str(vector)) \ 370 371 #define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \ 372 __GUEST_ASSERT(val == expected, \ 373 "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \ 374 msr, expected, val); 375 376 static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success, 377 uint64_t expected_val) 378 { 379 uint8_t vector; 380 uint64_t val; 381 382 vector = rdpmc_safe(rdpmc_idx, &val); 383 GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector); 384 if (expect_success) 385 GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val); 386 387 if (!is_forced_emulation_enabled) 388 return; 389 390 vector = rdpmc_safe_fep(rdpmc_idx, &val); 391 GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector); 392 if (expect_success) 393 GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val); 394 } 395 396 static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters, 397 uint8_t nr_counters, uint32_t or_mask) 398 { 399 const bool pmu_has_fast_mode = !guest_get_pmu_version(); 400 uint8_t i; 401 402 for (i = 0; i < nr_possible_counters; i++) { 403 /* 404 * TODO: Test a value that validates full-width writes and the 405 * width of the counters. 406 */ 407 const uint64_t test_val = 0xffff; 408 const uint32_t msr = base_msr + i; 409 410 /* 411 * Fixed counters are supported if the counter is less than the 412 * number of enumerated contiguous counters *or* the counter is 413 * explicitly enumerated in the supported counters mask. 414 */ 415 const bool expect_success = i < nr_counters || (or_mask & BIT(i)); 416 417 /* 418 * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are 419 * unsupported, i.e. doesn't #GP and reads back '0'. 420 */ 421 const uint64_t expected_val = expect_success ? test_val : 0; 422 const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 && 423 msr != MSR_P6_PERFCTR1; 424 uint32_t rdpmc_idx; 425 uint8_t vector; 426 uint64_t val; 427 428 vector = wrmsr_safe(msr, test_val); 429 GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector); 430 431 vector = rdmsr_safe(msr, &val); 432 GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector); 433 434 /* On #GP, the result of RDMSR is undefined. */ 435 if (!expect_gp) 436 GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val); 437 438 /* 439 * Redo the read tests with RDPMC, which has different indexing 440 * semantics and additional capabilities. 441 */ 442 rdpmc_idx = i; 443 if (base_msr == MSR_CORE_PERF_FIXED_CTR0) 444 rdpmc_idx |= INTEL_RDPMC_FIXED; 445 446 guest_test_rdpmc(rdpmc_idx, expect_success, expected_val); 447 448 /* 449 * KVM doesn't support non-architectural PMUs, i.e. it should 450 * impossible to have fast mode RDPMC. Verify that attempting 451 * to use fast RDPMC always #GPs. 452 */ 453 GUEST_ASSERT(!expect_success || !pmu_has_fast_mode); 454 rdpmc_idx |= INTEL_RDPMC_FAST; 455 guest_test_rdpmc(rdpmc_idx, false, -1ull); 456 457 vector = wrmsr_safe(msr, 0); 458 GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector); 459 } 460 } 461 462 static void guest_test_gp_counters(void) 463 { 464 uint8_t pmu_version = guest_get_pmu_version(); 465 uint8_t nr_gp_counters = 0; 466 uint32_t base_msr; 467 468 if (pmu_version) 469 nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); 470 471 /* 472 * For v2+ PMUs, PERF_GLOBAL_CTRL's architectural post-RESET value is 473 * "Sets bits n-1:0 and clears the upper bits", where 'n' is the number 474 * of GP counters. If there are no GP counters, require KVM to leave 475 * PERF_GLOBAL_CTRL '0'. This edge case isn't covered by the SDM, but 476 * follow the spirit of the architecture and only globally enable GP 477 * counters, of which there are none. 478 */ 479 if (pmu_version > 1) { 480 uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL); 481 482 if (nr_gp_counters) 483 GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0)); 484 else 485 GUEST_ASSERT_EQ(global_ctrl, 0); 486 } 487 488 if (this_cpu_has(X86_FEATURE_PDCM) && 489 rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES) 490 base_msr = MSR_IA32_PMC0; 491 else 492 base_msr = MSR_IA32_PERFCTR0; 493 494 guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0); 495 GUEST_DONE(); 496 } 497 498 static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities, 499 uint8_t nr_gp_counters) 500 { 501 struct kvm_vcpu *vcpu; 502 struct kvm_vm *vm; 503 504 vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters, 505 pmu_version, perf_capabilities); 506 507 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS, 508 nr_gp_counters); 509 510 run_vcpu(vcpu); 511 512 kvm_vm_free(vm); 513 } 514 515 static void guest_test_fixed_counters(void) 516 { 517 uint64_t supported_bitmask = 0; 518 uint8_t nr_fixed_counters = 0; 519 uint8_t i; 520 521 /* Fixed counters require Architectural vPMU Version 2+. */ 522 if (guest_get_pmu_version() >= 2) 523 nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); 524 525 /* 526 * The supported bitmask for fixed counters was introduced in PMU 527 * version 5. 528 */ 529 if (guest_get_pmu_version() >= 5) 530 supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK); 531 532 guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS, 533 nr_fixed_counters, supported_bitmask); 534 535 for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) { 536 uint8_t vector; 537 uint64_t val; 538 539 if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) { 540 vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL, 541 FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); 542 __GUEST_ASSERT(vector == GP_VECTOR, 543 "Expected #GP for counter %u in FIXED_CTR_CTRL", i); 544 545 vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL, 546 FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); 547 __GUEST_ASSERT(vector == GP_VECTOR, 548 "Expected #GP for counter %u in PERF_GLOBAL_CTRL", i); 549 continue; 550 } 551 552 wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0); 553 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); 554 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); 555 __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS})); 556 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); 557 val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i); 558 559 GUEST_ASSERT_NE(val, 0); 560 } 561 GUEST_DONE(); 562 } 563 564 static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities, 565 uint8_t nr_fixed_counters, 566 uint32_t supported_bitmask) 567 { 568 struct kvm_vcpu *vcpu; 569 struct kvm_vm *vm; 570 571 vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters, 572 pmu_version, perf_capabilities); 573 574 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK, 575 supported_bitmask); 576 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS, 577 nr_fixed_counters); 578 579 run_vcpu(vcpu); 580 581 kvm_vm_free(vm); 582 } 583 584 static void test_intel_counters(void) 585 { 586 uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); 587 uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); 588 uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); 589 unsigned int i; 590 uint8_t v, j; 591 uint32_t k; 592 593 const uint64_t perf_caps[] = { 594 0, 595 PMU_CAP_FW_WRITES, 596 }; 597 598 /* 599 * To keep the total runtime reasonable, test only a handful of select, 600 * semi-arbitrary values for the mask of unavailable PMU events. Test 601 * 0 (all events available) and all ones (no events available) as well 602 * as alternating bit sequencues, e.g. to detect if KVM is checking the 603 * wrong bit(s). 604 */ 605 const uint32_t unavailable_masks[] = { 606 0x0, 607 0xffffffffu, 608 0xaaaaaaaau, 609 0x55555555u, 610 0xf0f0f0f0u, 611 0x0f0f0f0fu, 612 0xa0a0a0a0u, 613 0x0a0a0a0au, 614 0x50505050u, 615 0x05050505u, 616 }; 617 618 /* 619 * Test up to PMU v5, which is the current maximum version defined by 620 * Intel, i.e. is the last version that is guaranteed to be backwards 621 * compatible with KVM's existing behavior. 622 */ 623 uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5); 624 625 /* 626 * Detect the existence of events that aren't supported by selftests. 627 * This will (obviously) fail any time hardware adds support for a new 628 * event, but it's worth paying that price to keep the test fresh. 629 */ 630 TEST_ASSERT(this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) <= NR_INTEL_ARCH_EVENTS, 631 "New architectural event(s) detected; please update this test (length = %u, mask = %x)", 632 this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH), 633 this_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK)); 634 635 /* 636 * Iterate over known arch events irrespective of KVM/hardware support 637 * to verify that KVM doesn't reject programming of events just because 638 * the *architectural* encoding is unsupported. Track which events are 639 * supported in hardware; the guest side will validate supported events 640 * count correctly, even if *enumeration* of the event is unsupported 641 * by KVM and/or isn't exposed to the guest. 642 */ 643 for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) { 644 if (this_pmu_has(intel_event_to_feature(i).gp_event)) 645 hardware_pmu_arch_events |= BIT(i); 646 } 647 648 for (v = 0; v <= max_pmu_version; v++) { 649 for (i = 0; i < ARRAY_SIZE(perf_caps); i++) { 650 if (!kvm_has_perf_caps && perf_caps[i]) 651 continue; 652 653 pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n", 654 v, perf_caps[i]); 655 656 /* 657 * Test single bits for all PMU version and lengths up 658 * the number of events +1 (to verify KVM doesn't do 659 * weird things if the guest length is greater than the 660 * host length). Explicitly test a mask of '0' and all 661 * ones i.e. all events being available and unavailable. 662 */ 663 for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) { 664 for (k = 1; k < ARRAY_SIZE(unavailable_masks); k++) 665 test_arch_events(v, perf_caps[i], j, unavailable_masks[k]); 666 } 667 668 pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n", 669 v, perf_caps[i]); 670 for (j = 0; j <= nr_gp_counters; j++) 671 test_gp_counters(v, perf_caps[i], j); 672 673 pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n", 674 v, perf_caps[i]); 675 for (j = 0; j <= nr_fixed_counters; j++) { 676 for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++) 677 test_fixed_counters(v, perf_caps[i], j, k); 678 } 679 } 680 } 681 } 682 683 int main(int argc, char *argv[]) 684 { 685 TEST_REQUIRE(kvm_is_pmu_enabled()); 686 687 TEST_REQUIRE(host_cpu_is_intel); 688 TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION)); 689 TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0); 690 691 kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); 692 kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM); 693 694 test_intel_counters(); 695 696 return 0; 697 } 698