1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2023, Tencent, Inc.
4 */
5 #include <x86intrin.h>
6
7 #include "pmu.h"
8 #include "processor.h"
9
10 /* Number of iterations of the loop for the guest measurement payload. */
11 #define NUM_LOOPS 10
12
13 /* Each iteration of the loop retires one branch instruction. */
14 #define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS)
15
16 /*
17 * Number of instructions in each loop. 1 ENTER, 1 CLFLUSH/CLFLUSHOPT/NOP,
18 * 1 MFENCE, 1 MOV, 1 LEAVE, 1 LOOP.
19 */
20 #define NUM_INSNS_PER_LOOP 6
21
22 /*
23 * Number of "extra" instructions that will be counted, i.e. the number of
24 * instructions that are needed to set up the loop and then disable the
25 * counter. 2 MOV, 2 XOR, 1 WRMSR.
26 */
27 #define NUM_EXTRA_INSNS 5
28
29 /* Total number of instructions retired within the measured section. */
30 #define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS)
31
32 /* Track which architectural events are supported by hardware. */
33 static uint32_t hardware_pmu_arch_events;
34
35 static uint8_t kvm_pmu_version;
36 static bool kvm_has_perf_caps;
37
38 #define X86_PMU_FEATURE_NULL \
39 ({ \
40 struct kvm_x86_pmu_feature feature = {}; \
41 \
42 feature; \
43 })
44
pmu_is_null_feature(struct kvm_x86_pmu_feature event)45 static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
46 {
47 return !(*(u64 *)&event);
48 }
49
50 struct kvm_intel_pmu_event {
51 struct kvm_x86_pmu_feature gp_event;
52 struct kvm_x86_pmu_feature fixed_event;
53 };
54
55 /*
56 * Wrap the array to appease the compiler, as the macros used to construct each
57 * kvm_x86_pmu_feature use syntax that's only valid in function scope, and the
58 * compiler often thinks the feature definitions aren't compile-time constants.
59 */
intel_event_to_feature(uint8_t idx)60 static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx)
61 {
62 const struct kvm_intel_pmu_event __intel_event_to_feature[] = {
63 [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
64 [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
65 /*
66 * Note, the fixed counter for reference cycles is NOT the same as the
67 * general purpose architectural event. The fixed counter explicitly
68 * counts at the same frequency as the TSC, whereas the GP event counts
69 * at a fixed, but uarch specific, frequency. Bundle them here for
70 * simplicity.
71 */
72 [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
73 [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
74 [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
75 [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
76 [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
77 [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
78 [INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BE_BOUND, X86_PMU_FEATURE_NULL },
79 [INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BAD_SPEC, X86_PMU_FEATURE_NULL },
80 [INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_FE_BOUND, X86_PMU_FEATURE_NULL },
81 [INTEL_ARCH_TOPDOWN_RETIRING_INDEX] = { X86_PMU_FEATURE_TOPDOWN_RETIRING, X86_PMU_FEATURE_NULL },
82 [INTEL_ARCH_LBR_INSERTS_INDEX] = { X86_PMU_FEATURE_LBR_INSERTS, X86_PMU_FEATURE_NULL },
83 };
84
85 kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS);
86
87 return __intel_event_to_feature[idx];
88 }
89
pmu_vm_create_with_one_vcpu(struct kvm_vcpu ** vcpu,void * guest_code,uint8_t pmu_version,uint64_t perf_capabilities)90 static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
91 void *guest_code,
92 uint8_t pmu_version,
93 uint64_t perf_capabilities)
94 {
95 struct kvm_vm *vm;
96
97 vm = vm_create_with_one_vcpu(vcpu, guest_code);
98 sync_global_to_guest(vm, kvm_pmu_version);
99 sync_global_to_guest(vm, hardware_pmu_arch_events);
100
101 /*
102 * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
103 * features via PERF_CAPABILITIES if the guest doesn't have a vPMU.
104 */
105 if (kvm_has_perf_caps)
106 vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
107
108 vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version);
109 return vm;
110 }
111
run_vcpu(struct kvm_vcpu * vcpu)112 static void run_vcpu(struct kvm_vcpu *vcpu)
113 {
114 struct ucall uc;
115
116 do {
117 vcpu_run(vcpu);
118 switch (get_ucall(vcpu, &uc)) {
119 case UCALL_SYNC:
120 break;
121 case UCALL_ABORT:
122 REPORT_GUEST_ASSERT(uc);
123 break;
124 case UCALL_PRINTF:
125 pr_info("%s", uc.buffer);
126 break;
127 case UCALL_DONE:
128 break;
129 default:
130 TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
131 }
132 } while (uc.cmd != UCALL_DONE);
133 }
134
guest_get_pmu_version(void)135 static uint8_t guest_get_pmu_version(void)
136 {
137 /*
138 * Return the effective PMU version, i.e. the minimum between what KVM
139 * supports and what is enumerated to the guest. The host deliberately
140 * advertises a PMU version to the guest beyond what is actually
141 * supported by KVM to verify KVM doesn't freak out and do something
142 * bizarre with an architecturally valid, but unsupported, version.
143 */
144 return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
145 }
146
147 /*
148 * If an architectural event is supported and guaranteed to generate at least
149 * one "hit, assert that its count is non-zero. If an event isn't supported or
150 * the test can't guarantee the associated action will occur, then all bets are
151 * off regarding the count, i.e. no checks can be done.
152 *
153 * Sanity check that in all cases, the event doesn't count when it's disabled,
154 * and that KVM correctly emulates the write of an arbitrary value.
155 */
guest_assert_event_count(uint8_t idx,uint32_t pmc,uint32_t pmc_msr)156 static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr)
157 {
158 uint64_t count;
159
160 count = _rdpmc(pmc);
161 if (!(hardware_pmu_arch_events & BIT(idx)))
162 goto sanity_checks;
163
164 switch (idx) {
165 case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
166 /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */
167 if (this_pmu_has_errata(INSTRUCTIONS_RETIRED_OVERCOUNT))
168 GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
169 else
170 GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
171 break;
172 case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
173 /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */
174 if (this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT))
175 GUEST_ASSERT(count >= NUM_BRANCH_INSNS_RETIRED);
176 else
177 GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
178 break;
179 case INTEL_ARCH_LLC_REFERENCES_INDEX:
180 case INTEL_ARCH_LLC_MISSES_INDEX:
181 if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
182 !this_cpu_has(X86_FEATURE_CLFLUSH))
183 break;
184 fallthrough;
185 case INTEL_ARCH_CPU_CYCLES_INDEX:
186 case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
187 case INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX:
188 case INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX:
189 GUEST_ASSERT_NE(count, 0);
190 break;
191 case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
192 case INTEL_ARCH_TOPDOWN_RETIRING_INDEX:
193 __GUEST_ASSERT(count >= NUM_INSNS_RETIRED,
194 "Expected top-down slots >= %u, got count = %lu",
195 NUM_INSNS_RETIRED, count);
196 break;
197 default:
198 break;
199 }
200
201 sanity_checks:
202 __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
203 GUEST_ASSERT_EQ(_rdpmc(pmc), count);
204
205 wrmsr(pmc_msr, 0xdead);
206 GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead);
207 }
208
209 /*
210 * Enable and disable the PMC in a monolithic asm blob to ensure that the
211 * compiler can't insert _any_ code into the measured sequence. Note, ECX
212 * doesn't need to be clobbered as the input value, @pmc_msr, is restored
213 * before the end of the sequence.
214 *
215 * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
216 * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and
217 * misses, i.e. to allow testing that those events actually count.
218 *
219 * If forced emulation is enabled (and specified), force emulation on a subset
220 * of the measured code to verify that KVM correctly emulates instructions and
221 * branches retired events in conjunction with hardware also counting said
222 * events.
223 */
224 #define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP) \
225 do { \
226 __asm__ __volatile__("wrmsr\n\t" \
227 " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \
228 "1:\n\t" \
229 FEP "enter $0, $0\n\t" \
230 clflush "\n\t" \
231 "mfence\n\t" \
232 "mov %[m], %%eax\n\t" \
233 FEP "leave\n\t" \
234 FEP "loop 1b\n\t" \
235 FEP "mov %%edi, %%ecx\n\t" \
236 FEP "xor %%eax, %%eax\n\t" \
237 FEP "xor %%edx, %%edx\n\t" \
238 "wrmsr\n\t" \
239 :: "a"((uint32_t)_value), "d"(_value >> 32), \
240 "c"(_msr), "D"(_msr), [m]"m"(kvm_pmu_version) \
241 ); \
242 } while (0)
243
244 #define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
245 do { \
246 wrmsr(_pmc_msr, 0); \
247 \
248 if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \
249 GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP); \
250 else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \
251 GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush %[m]", FEP); \
252 else \
253 GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \
254 \
255 guest_assert_event_count(_idx, _pmc, _pmc_msr); \
256 } while (0)
257
__guest_test_arch_event(uint8_t idx,uint32_t pmc,uint32_t pmc_msr,uint32_t ctrl_msr,uint64_t ctrl_msr_value)258 static void __guest_test_arch_event(uint8_t idx, uint32_t pmc, uint32_t pmc_msr,
259 uint32_t ctrl_msr, uint64_t ctrl_msr_value)
260 {
261 GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
262
263 if (is_forced_emulation_enabled)
264 GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
265 }
266
guest_test_arch_event(uint8_t idx)267 static void guest_test_arch_event(uint8_t idx)
268 {
269 uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
270 uint32_t pmu_version = guest_get_pmu_version();
271 /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
272 bool guest_has_perf_global_ctrl = pmu_version >= 2;
273 struct kvm_x86_pmu_feature gp_event, fixed_event;
274 uint32_t base_pmc_msr;
275 unsigned int i;
276
277 /* The host side shouldn't invoke this without a guest PMU. */
278 GUEST_ASSERT(pmu_version);
279
280 if (this_cpu_has(X86_FEATURE_PDCM) &&
281 rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
282 base_pmc_msr = MSR_IA32_PMC0;
283 else
284 base_pmc_msr = MSR_IA32_PERFCTR0;
285
286 gp_event = intel_event_to_feature(idx).gp_event;
287 GUEST_ASSERT_EQ(idx, gp_event.f.bit);
288
289 GUEST_ASSERT(nr_gp_counters);
290
291 for (i = 0; i < nr_gp_counters; i++) {
292 uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
293 ARCH_PERFMON_EVENTSEL_ENABLE |
294 intel_pmu_arch_events[idx];
295
296 wrmsr(MSR_P6_EVNTSEL0 + i, 0);
297 if (guest_has_perf_global_ctrl)
298 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
299
300 __guest_test_arch_event(idx, i, base_pmc_msr + i,
301 MSR_P6_EVNTSEL0 + i, eventsel);
302 }
303
304 if (!guest_has_perf_global_ctrl)
305 return;
306
307 fixed_event = intel_event_to_feature(idx).fixed_event;
308 if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
309 return;
310
311 i = fixed_event.f.bit;
312
313 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
314
315 __guest_test_arch_event(idx, i | INTEL_RDPMC_FIXED,
316 MSR_CORE_PERF_FIXED_CTR0 + i,
317 MSR_CORE_PERF_GLOBAL_CTRL,
318 FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
319 }
320
guest_test_arch_events(void)321 static void guest_test_arch_events(void)
322 {
323 uint8_t i;
324
325 for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
326 guest_test_arch_event(i);
327
328 GUEST_DONE();
329 }
330
test_arch_events(uint8_t pmu_version,uint64_t perf_capabilities,uint8_t length,uint32_t unavailable_mask)331 static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
332 uint8_t length, uint32_t unavailable_mask)
333 {
334 struct kvm_vcpu *vcpu;
335 struct kvm_vm *vm;
336
337 /* Testing arch events requires a vPMU (there are no negative tests). */
338 if (!pmu_version)
339 return;
340
341 unavailable_mask &= GENMASK(X86_PROPERTY_PMU_EVENTS_MASK.hi_bit,
342 X86_PROPERTY_PMU_EVENTS_MASK.lo_bit);
343
344 vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
345 pmu_version, perf_capabilities);
346
347 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH,
348 length);
349 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK,
350 unavailable_mask);
351
352 run_vcpu(vcpu);
353
354 kvm_vm_free(vm);
355 }
356
357 /*
358 * Limit testing to MSRs that are actually defined by Intel (in the SDM). MSRs
359 * that aren't defined counter MSRs *probably* don't exist, but there's no
360 * guarantee that currently undefined MSR indices won't be used for something
361 * other than PMCs in the future.
362 */
363 #define MAX_NR_GP_COUNTERS 8
364 #define MAX_NR_FIXED_COUNTERS 3
365
366 #define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \
367 __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \
368 "Expected %s on " #insn "(0x%x), got %s", \
369 expect_gp ? "#GP" : "no fault", msr, ex_str(vector)) \
370
371 #define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \
372 __GUEST_ASSERT(val == expected, \
373 "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \
374 msr, expected, val);
375
guest_test_rdpmc(uint32_t rdpmc_idx,bool expect_success,uint64_t expected_val)376 static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
377 uint64_t expected_val)
378 {
379 uint8_t vector;
380 uint64_t val;
381
382 vector = rdpmc_safe(rdpmc_idx, &val);
383 GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
384 if (expect_success)
385 GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
386
387 if (!is_forced_emulation_enabled)
388 return;
389
390 vector = rdpmc_safe_fep(rdpmc_idx, &val);
391 GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
392 if (expect_success)
393 GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
394 }
395
guest_rd_wr_counters(uint32_t base_msr,uint8_t nr_possible_counters,uint8_t nr_counters,uint32_t or_mask)396 static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters,
397 uint8_t nr_counters, uint32_t or_mask)
398 {
399 const bool pmu_has_fast_mode = !guest_get_pmu_version();
400 uint8_t i;
401
402 for (i = 0; i < nr_possible_counters; i++) {
403 /*
404 * TODO: Test a value that validates full-width writes and the
405 * width of the counters.
406 */
407 const uint64_t test_val = 0xffff;
408 const uint32_t msr = base_msr + i;
409
410 /*
411 * Fixed counters are supported if the counter is less than the
412 * number of enumerated contiguous counters *or* the counter is
413 * explicitly enumerated in the supported counters mask.
414 */
415 const bool expect_success = i < nr_counters || (or_mask & BIT(i));
416
417 /*
418 * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are
419 * unsupported, i.e. doesn't #GP and reads back '0'.
420 */
421 const uint64_t expected_val = expect_success ? test_val : 0;
422 const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
423 msr != MSR_P6_PERFCTR1;
424 uint32_t rdpmc_idx;
425 uint8_t vector;
426 uint64_t val;
427
428 vector = wrmsr_safe(msr, test_val);
429 GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
430
431 vector = rdmsr_safe(msr, &val);
432 GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector);
433
434 /* On #GP, the result of RDMSR is undefined. */
435 if (!expect_gp)
436 GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val);
437
438 /*
439 * Redo the read tests with RDPMC, which has different indexing
440 * semantics and additional capabilities.
441 */
442 rdpmc_idx = i;
443 if (base_msr == MSR_CORE_PERF_FIXED_CTR0)
444 rdpmc_idx |= INTEL_RDPMC_FIXED;
445
446 guest_test_rdpmc(rdpmc_idx, expect_success, expected_val);
447
448 /*
449 * KVM doesn't support non-architectural PMUs, i.e. it should
450 * impossible to have fast mode RDPMC. Verify that attempting
451 * to use fast RDPMC always #GPs.
452 */
453 GUEST_ASSERT(!expect_success || !pmu_has_fast_mode);
454 rdpmc_idx |= INTEL_RDPMC_FAST;
455 guest_test_rdpmc(rdpmc_idx, false, -1ull);
456
457 vector = wrmsr_safe(msr, 0);
458 GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
459 }
460 }
461
guest_test_gp_counters(void)462 static void guest_test_gp_counters(void)
463 {
464 uint8_t pmu_version = guest_get_pmu_version();
465 uint8_t nr_gp_counters = 0;
466 uint32_t base_msr;
467
468 if (pmu_version)
469 nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
470
471 /*
472 * For v2+ PMUs, PERF_GLOBAL_CTRL's architectural post-RESET value is
473 * "Sets bits n-1:0 and clears the upper bits", where 'n' is the number
474 * of GP counters. If there are no GP counters, require KVM to leave
475 * PERF_GLOBAL_CTRL '0'. This edge case isn't covered by the SDM, but
476 * follow the spirit of the architecture and only globally enable GP
477 * counters, of which there are none.
478 */
479 if (pmu_version > 1) {
480 uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL);
481
482 if (nr_gp_counters)
483 GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0));
484 else
485 GUEST_ASSERT_EQ(global_ctrl, 0);
486 }
487
488 if (this_cpu_has(X86_FEATURE_PDCM) &&
489 rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
490 base_msr = MSR_IA32_PMC0;
491 else
492 base_msr = MSR_IA32_PERFCTR0;
493
494 guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0);
495 GUEST_DONE();
496 }
497
test_gp_counters(uint8_t pmu_version,uint64_t perf_capabilities,uint8_t nr_gp_counters)498 static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities,
499 uint8_t nr_gp_counters)
500 {
501 struct kvm_vcpu *vcpu;
502 struct kvm_vm *vm;
503
504 vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters,
505 pmu_version, perf_capabilities);
506
507 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS,
508 nr_gp_counters);
509
510 run_vcpu(vcpu);
511
512 kvm_vm_free(vm);
513 }
514
guest_test_fixed_counters(void)515 static void guest_test_fixed_counters(void)
516 {
517 uint64_t supported_bitmask = 0;
518 uint8_t nr_fixed_counters = 0;
519 uint8_t i;
520
521 /* Fixed counters require Architectural vPMU Version 2+. */
522 if (guest_get_pmu_version() >= 2)
523 nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
524
525 /*
526 * The supported bitmask for fixed counters was introduced in PMU
527 * version 5.
528 */
529 if (guest_get_pmu_version() >= 5)
530 supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK);
531
532 guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS,
533 nr_fixed_counters, supported_bitmask);
534
535 for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) {
536 uint8_t vector;
537 uint64_t val;
538
539 if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) {
540 vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL,
541 FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
542 __GUEST_ASSERT(vector == GP_VECTOR,
543 "Expected #GP for counter %u in FIXED_CTR_CTRL", i);
544
545 vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL,
546 FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
547 __GUEST_ASSERT(vector == GP_VECTOR,
548 "Expected #GP for counter %u in PERF_GLOBAL_CTRL", i);
549 continue;
550 }
551
552 wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
553 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
554 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
555 __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
556 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
557 val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i);
558
559 GUEST_ASSERT_NE(val, 0);
560 }
561 GUEST_DONE();
562 }
563
test_fixed_counters(uint8_t pmu_version,uint64_t perf_capabilities,uint8_t nr_fixed_counters,uint32_t supported_bitmask)564 static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
565 uint8_t nr_fixed_counters,
566 uint32_t supported_bitmask)
567 {
568 struct kvm_vcpu *vcpu;
569 struct kvm_vm *vm;
570
571 vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters,
572 pmu_version, perf_capabilities);
573
574 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK,
575 supported_bitmask);
576 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS,
577 nr_fixed_counters);
578
579 run_vcpu(vcpu);
580
581 kvm_vm_free(vm);
582 }
583
test_intel_counters(void)584 static void test_intel_counters(void)
585 {
586 uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
587 uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
588 uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
589 unsigned int i;
590 uint8_t v, j;
591 uint32_t k;
592
593 const uint64_t perf_caps[] = {
594 0,
595 PMU_CAP_FW_WRITES,
596 };
597
598 /*
599 * To keep the total runtime reasonable, test only a handful of select,
600 * semi-arbitrary values for the mask of unavailable PMU events. Test
601 * 0 (all events available) and all ones (no events available) as well
602 * as alternating bit sequencues, e.g. to detect if KVM is checking the
603 * wrong bit(s).
604 */
605 const uint32_t unavailable_masks[] = {
606 0x0,
607 0xffffffffu,
608 0xaaaaaaaau,
609 0x55555555u,
610 0xf0f0f0f0u,
611 0x0f0f0f0fu,
612 0xa0a0a0a0u,
613 0x0a0a0a0au,
614 0x50505050u,
615 0x05050505u,
616 };
617
618 /*
619 * Test up to PMU v5, which is the current maximum version defined by
620 * Intel, i.e. is the last version that is guaranteed to be backwards
621 * compatible with KVM's existing behavior.
622 */
623 uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
624
625 /*
626 * Detect the existence of events that aren't supported by selftests.
627 * This will (obviously) fail any time hardware adds support for a new
628 * event, but it's worth paying that price to keep the test fresh.
629 */
630 TEST_ASSERT(this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) <= NR_INTEL_ARCH_EVENTS,
631 "New architectural event(s) detected; please update this test (length = %u, mask = %x)",
632 this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH),
633 this_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
634
635 /*
636 * Iterate over known arch events irrespective of KVM/hardware support
637 * to verify that KVM doesn't reject programming of events just because
638 * the *architectural* encoding is unsupported. Track which events are
639 * supported in hardware; the guest side will validate supported events
640 * count correctly, even if *enumeration* of the event is unsupported
641 * by KVM and/or isn't exposed to the guest.
642 */
643 for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) {
644 if (this_pmu_has(intel_event_to_feature(i).gp_event))
645 hardware_pmu_arch_events |= BIT(i);
646 }
647
648 for (v = 0; v <= max_pmu_version; v++) {
649 for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
650 if (!kvm_has_perf_caps && perf_caps[i])
651 continue;
652
653 pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
654 v, perf_caps[i]);
655
656 /*
657 * Test single bits for all PMU version and lengths up
658 * the number of events +1 (to verify KVM doesn't do
659 * weird things if the guest length is greater than the
660 * host length). Explicitly test a mask of '0' and all
661 * ones i.e. all events being available and unavailable.
662 */
663 for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) {
664 for (k = 1; k < ARRAY_SIZE(unavailable_masks); k++)
665 test_arch_events(v, perf_caps[i], j, unavailable_masks[k]);
666 }
667
668 pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n",
669 v, perf_caps[i]);
670 for (j = 0; j <= nr_gp_counters; j++)
671 test_gp_counters(v, perf_caps[i], j);
672
673 pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n",
674 v, perf_caps[i]);
675 for (j = 0; j <= nr_fixed_counters; j++) {
676 for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++)
677 test_fixed_counters(v, perf_caps[i], j, k);
678 }
679 }
680 }
681 }
682
main(int argc,char * argv[])683 int main(int argc, char *argv[])
684 {
685 TEST_REQUIRE(kvm_is_pmu_enabled());
686
687 TEST_REQUIRE(host_cpu_is_intel);
688 TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
689 TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
690
691 kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
692 kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM);
693
694 test_intel_counters();
695
696 return 0;
697 }
698