xref: /linux/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c (revision c94cd9508b1335b949fd13ebd269313c65492df0)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2023, Tencent, Inc.
4  */
5 #include <x86intrin.h>
6 
7 #include "pmu.h"
8 #include "processor.h"
9 
10 /* Number of iterations of the loop for the guest measurement payload. */
11 #define NUM_LOOPS			10
12 
13 /* Each iteration of the loop retires one branch instruction. */
14 #define NUM_BRANCH_INSNS_RETIRED	(NUM_LOOPS)
15 
16 /*
17  * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE,
18  * 1 LOOP.
19  */
20 #define NUM_INSNS_PER_LOOP		3
21 
22 /*
23  * Number of "extra" instructions that will be counted, i.e. the number of
24  * instructions that are needed to set up the loop and then disable the
25  * counter.  2 MOV, 2 XOR, 1 WRMSR.
26  */
27 #define NUM_EXTRA_INSNS			5
28 
29 /* Total number of instructions retired within the measured section. */
30 #define NUM_INSNS_RETIRED		(NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS)
31 
32 
33 static uint8_t kvm_pmu_version;
34 static bool kvm_has_perf_caps;
35 
36 static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
37 						  void *guest_code,
38 						  uint8_t pmu_version,
39 						  uint64_t perf_capabilities)
40 {
41 	struct kvm_vm *vm;
42 
43 	vm = vm_create_with_one_vcpu(vcpu, guest_code);
44 	sync_global_to_guest(vm, kvm_pmu_version);
45 
46 	/*
47 	 * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
48 	 * features via PERF_CAPABILITIES if the guest doesn't have a vPMU.
49 	 */
50 	if (kvm_has_perf_caps)
51 		vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
52 
53 	vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version);
54 	return vm;
55 }
56 
57 static void run_vcpu(struct kvm_vcpu *vcpu)
58 {
59 	struct ucall uc;
60 
61 	do {
62 		vcpu_run(vcpu);
63 		switch (get_ucall(vcpu, &uc)) {
64 		case UCALL_SYNC:
65 			break;
66 		case UCALL_ABORT:
67 			REPORT_GUEST_ASSERT(uc);
68 			break;
69 		case UCALL_PRINTF:
70 			pr_info("%s", uc.buffer);
71 			break;
72 		case UCALL_DONE:
73 			break;
74 		default:
75 			TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
76 		}
77 	} while (uc.cmd != UCALL_DONE);
78 }
79 
80 static uint8_t guest_get_pmu_version(void)
81 {
82 	/*
83 	 * Return the effective PMU version, i.e. the minimum between what KVM
84 	 * supports and what is enumerated to the guest.  The host deliberately
85 	 * advertises a PMU version to the guest beyond what is actually
86 	 * supported by KVM to verify KVM doesn't freak out and do something
87 	 * bizarre with an architecturally valid, but unsupported, version.
88 	 */
89 	return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
90 }
91 
92 /*
93  * If an architectural event is supported and guaranteed to generate at least
94  * one "hit, assert that its count is non-zero.  If an event isn't supported or
95  * the test can't guarantee the associated action will occur, then all bets are
96  * off regarding the count, i.e. no checks can be done.
97  *
98  * Sanity check that in all cases, the event doesn't count when it's disabled,
99  * and that KVM correctly emulates the write of an arbitrary value.
100  */
101 static void guest_assert_event_count(uint8_t idx,
102 				     struct kvm_x86_pmu_feature event,
103 				     uint32_t pmc, uint32_t pmc_msr)
104 {
105 	uint64_t count;
106 
107 	count = _rdpmc(pmc);
108 	if (!this_pmu_has(event))
109 		goto sanity_checks;
110 
111 	switch (idx) {
112 	case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
113 		GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
114 		break;
115 	case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
116 		GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
117 		break;
118 	case INTEL_ARCH_LLC_REFERENCES_INDEX:
119 	case INTEL_ARCH_LLC_MISSES_INDEX:
120 		if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
121 		    !this_cpu_has(X86_FEATURE_CLFLUSH))
122 			break;
123 		fallthrough;
124 	case INTEL_ARCH_CPU_CYCLES_INDEX:
125 	case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
126 		GUEST_ASSERT_NE(count, 0);
127 		break;
128 	case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
129 		GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
130 		break;
131 	default:
132 		break;
133 	}
134 
135 sanity_checks:
136 	__asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
137 	GUEST_ASSERT_EQ(_rdpmc(pmc), count);
138 
139 	wrmsr(pmc_msr, 0xdead);
140 	GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead);
141 }
142 
143 /*
144  * Enable and disable the PMC in a monolithic asm blob to ensure that the
145  * compiler can't insert _any_ code into the measured sequence.  Note, ECX
146  * doesn't need to be clobbered as the input value, @pmc_msr, is restored
147  * before the end of the sequence.
148  *
149  * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
150  * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and
151  * misses, i.e. to allow testing that those events actually count.
152  *
153  * If forced emulation is enabled (and specified), force emulation on a subset
154  * of the measured code to verify that KVM correctly emulates instructions and
155  * branches retired events in conjunction with hardware also counting said
156  * events.
157  */
158 #define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP)				\
159 do {										\
160 	__asm__ __volatile__("wrmsr\n\t"					\
161 			     " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t"	\
162 			     "1:\n\t"						\
163 			     clflush "\n\t"					\
164 			     "mfence\n\t"					\
165 			     FEP "loop 1b\n\t"					\
166 			     FEP "mov %%edi, %%ecx\n\t"				\
167 			     FEP "xor %%eax, %%eax\n\t"				\
168 			     FEP "xor %%edx, %%edx\n\t"				\
169 			     "wrmsr\n\t"					\
170 			     :: "a"((uint32_t)_value), "d"(_value >> 32),	\
171 				"c"(_msr), "D"(_msr)				\
172 	);									\
173 } while (0)
174 
175 #define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP)	\
176 do {										\
177 	wrmsr(pmc_msr, 0);							\
178 										\
179 	if (this_cpu_has(X86_FEATURE_CLFLUSHOPT))				\
180 		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP);	\
181 	else if (this_cpu_has(X86_FEATURE_CLFLUSH))				\
182 		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP);	\
183 	else									\
184 		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP);		\
185 										\
186 	guest_assert_event_count(_idx, _event, _pmc, _pmc_msr);			\
187 } while (0)
188 
189 static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event,
190 				    uint32_t pmc, uint32_t pmc_msr,
191 				    uint32_t ctrl_msr, uint64_t ctrl_msr_value)
192 {
193 	GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
194 
195 	if (is_forced_emulation_enabled)
196 		GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
197 }
198 
199 #define X86_PMU_FEATURE_NULL						\
200 ({									\
201 	struct kvm_x86_pmu_feature feature = {};			\
202 									\
203 	feature;							\
204 })
205 
206 static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
207 {
208 	return !(*(u64 *)&event);
209 }
210 
211 static void guest_test_arch_event(uint8_t idx)
212 {
213 	const struct {
214 		struct kvm_x86_pmu_feature gp_event;
215 		struct kvm_x86_pmu_feature fixed_event;
216 	} intel_event_to_feature[] = {
217 		[INTEL_ARCH_CPU_CYCLES_INDEX]		 = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
218 		[INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX]	 = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
219 		/*
220 		 * Note, the fixed counter for reference cycles is NOT the same
221 		 * as the general purpose architectural event.  The fixed counter
222 		 * explicitly counts at the same frequency as the TSC, whereas
223 		 * the GP event counts at a fixed, but uarch specific, frequency.
224 		 * Bundle them here for simplicity.
225 		 */
226 		[INTEL_ARCH_REFERENCE_CYCLES_INDEX]	 = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
227 		[INTEL_ARCH_LLC_REFERENCES_INDEX]	 = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
228 		[INTEL_ARCH_LLC_MISSES_INDEX]		 = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
229 		[INTEL_ARCH_BRANCHES_RETIRED_INDEX]	 = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
230 		[INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
231 		[INTEL_ARCH_TOPDOWN_SLOTS_INDEX]	 = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
232 	};
233 
234 	uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
235 	uint32_t pmu_version = guest_get_pmu_version();
236 	/* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
237 	bool guest_has_perf_global_ctrl = pmu_version >= 2;
238 	struct kvm_x86_pmu_feature gp_event, fixed_event;
239 	uint32_t base_pmc_msr;
240 	unsigned int i;
241 
242 	/* The host side shouldn't invoke this without a guest PMU. */
243 	GUEST_ASSERT(pmu_version);
244 
245 	if (this_cpu_has(X86_FEATURE_PDCM) &&
246 	    rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
247 		base_pmc_msr = MSR_IA32_PMC0;
248 	else
249 		base_pmc_msr = MSR_IA32_PERFCTR0;
250 
251 	gp_event = intel_event_to_feature[idx].gp_event;
252 	GUEST_ASSERT_EQ(idx, gp_event.f.bit);
253 
254 	GUEST_ASSERT(nr_gp_counters);
255 
256 	for (i = 0; i < nr_gp_counters; i++) {
257 		uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
258 				    ARCH_PERFMON_EVENTSEL_ENABLE |
259 				    intel_pmu_arch_events[idx];
260 
261 		wrmsr(MSR_P6_EVNTSEL0 + i, 0);
262 		if (guest_has_perf_global_ctrl)
263 			wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
264 
265 		__guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
266 					MSR_P6_EVNTSEL0 + i, eventsel);
267 	}
268 
269 	if (!guest_has_perf_global_ctrl)
270 		return;
271 
272 	fixed_event = intel_event_to_feature[idx].fixed_event;
273 	if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
274 		return;
275 
276 	i = fixed_event.f.bit;
277 
278 	wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
279 
280 	__guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED,
281 				MSR_CORE_PERF_FIXED_CTR0 + i,
282 				MSR_CORE_PERF_GLOBAL_CTRL,
283 				FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
284 }
285 
286 static void guest_test_arch_events(void)
287 {
288 	uint8_t i;
289 
290 	for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
291 		guest_test_arch_event(i);
292 
293 	GUEST_DONE();
294 }
295 
296 static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
297 			     uint8_t length, uint8_t unavailable_mask)
298 {
299 	struct kvm_vcpu *vcpu;
300 	struct kvm_vm *vm;
301 
302 	/* Testing arch events requires a vPMU (there are no negative tests). */
303 	if (!pmu_version)
304 		return;
305 
306 	vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
307 					 pmu_version, perf_capabilities);
308 
309 	vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH,
310 				length);
311 	vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK,
312 				unavailable_mask);
313 
314 	run_vcpu(vcpu);
315 
316 	kvm_vm_free(vm);
317 }
318 
319 /*
320  * Limit testing to MSRs that are actually defined by Intel (in the SDM).  MSRs
321  * that aren't defined counter MSRs *probably* don't exist, but there's no
322  * guarantee that currently undefined MSR indices won't be used for something
323  * other than PMCs in the future.
324  */
325 #define MAX_NR_GP_COUNTERS	8
326 #define MAX_NR_FIXED_COUNTERS	3
327 
328 #define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector)		\
329 __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector,			\
330 	       "Expected %s on " #insn "(0x%x), got vector %u",			\
331 	       expect_gp ? "#GP" : "no fault", msr, vector)			\
332 
333 #define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected)			\
334 	__GUEST_ASSERT(val == expected_val,					\
335 		       "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx",	\
336 		       msr, expected_val, val);
337 
338 static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
339 			     uint64_t expected_val)
340 {
341 	uint8_t vector;
342 	uint64_t val;
343 
344 	vector = rdpmc_safe(rdpmc_idx, &val);
345 	GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
346 	if (expect_success)
347 		GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
348 
349 	if (!is_forced_emulation_enabled)
350 		return;
351 
352 	vector = rdpmc_safe_fep(rdpmc_idx, &val);
353 	GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
354 	if (expect_success)
355 		GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
356 }
357 
358 static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters,
359 				 uint8_t nr_counters, uint32_t or_mask)
360 {
361 	const bool pmu_has_fast_mode = !guest_get_pmu_version();
362 	uint8_t i;
363 
364 	for (i = 0; i < nr_possible_counters; i++) {
365 		/*
366 		 * TODO: Test a value that validates full-width writes and the
367 		 * width of the counters.
368 		 */
369 		const uint64_t test_val = 0xffff;
370 		const uint32_t msr = base_msr + i;
371 
372 		/*
373 		 * Fixed counters are supported if the counter is less than the
374 		 * number of enumerated contiguous counters *or* the counter is
375 		 * explicitly enumerated in the supported counters mask.
376 		 */
377 		const bool expect_success = i < nr_counters || (or_mask & BIT(i));
378 
379 		/*
380 		 * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are
381 		 * unsupported, i.e. doesn't #GP and reads back '0'.
382 		 */
383 		const uint64_t expected_val = expect_success ? test_val : 0;
384 		const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
385 				       msr != MSR_P6_PERFCTR1;
386 		uint32_t rdpmc_idx;
387 		uint8_t vector;
388 		uint64_t val;
389 
390 		vector = wrmsr_safe(msr, test_val);
391 		GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
392 
393 		vector = rdmsr_safe(msr, &val);
394 		GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector);
395 
396 		/* On #GP, the result of RDMSR is undefined. */
397 		if (!expect_gp)
398 			GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val);
399 
400 		/*
401 		 * Redo the read tests with RDPMC, which has different indexing
402 		 * semantics and additional capabilities.
403 		 */
404 		rdpmc_idx = i;
405 		if (base_msr == MSR_CORE_PERF_FIXED_CTR0)
406 			rdpmc_idx |= INTEL_RDPMC_FIXED;
407 
408 		guest_test_rdpmc(rdpmc_idx, expect_success, expected_val);
409 
410 		/*
411 		 * KVM doesn't support non-architectural PMUs, i.e. it should
412 		 * impossible to have fast mode RDPMC.  Verify that attempting
413 		 * to use fast RDPMC always #GPs.
414 		 */
415 		GUEST_ASSERT(!expect_success || !pmu_has_fast_mode);
416 		rdpmc_idx |= INTEL_RDPMC_FAST;
417 		guest_test_rdpmc(rdpmc_idx, false, -1ull);
418 
419 		vector = wrmsr_safe(msr, 0);
420 		GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
421 	}
422 }
423 
424 static void guest_test_gp_counters(void)
425 {
426 	uint8_t pmu_version = guest_get_pmu_version();
427 	uint8_t nr_gp_counters = 0;
428 	uint32_t base_msr;
429 
430 	if (pmu_version)
431 		nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
432 
433 	/*
434 	 * For v2+ PMUs, PERF_GLOBAL_CTRL's architectural post-RESET value is
435 	 * "Sets bits n-1:0 and clears the upper bits", where 'n' is the number
436 	 * of GP counters.  If there are no GP counters, require KVM to leave
437 	 * PERF_GLOBAL_CTRL '0'.  This edge case isn't covered by the SDM, but
438 	 * follow the spirit of the architecture and only globally enable GP
439 	 * counters, of which there are none.
440 	 */
441 	if (pmu_version > 1) {
442 		uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL);
443 
444 		if (nr_gp_counters)
445 			GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0));
446 		else
447 			GUEST_ASSERT_EQ(global_ctrl, 0);
448 	}
449 
450 	if (this_cpu_has(X86_FEATURE_PDCM) &&
451 	    rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
452 		base_msr = MSR_IA32_PMC0;
453 	else
454 		base_msr = MSR_IA32_PERFCTR0;
455 
456 	guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0);
457 	GUEST_DONE();
458 }
459 
460 static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities,
461 			     uint8_t nr_gp_counters)
462 {
463 	struct kvm_vcpu *vcpu;
464 	struct kvm_vm *vm;
465 
466 	vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters,
467 					 pmu_version, perf_capabilities);
468 
469 	vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS,
470 				nr_gp_counters);
471 
472 	run_vcpu(vcpu);
473 
474 	kvm_vm_free(vm);
475 }
476 
477 static void guest_test_fixed_counters(void)
478 {
479 	uint64_t supported_bitmask = 0;
480 	uint8_t nr_fixed_counters = 0;
481 	uint8_t i;
482 
483 	/* Fixed counters require Architectural vPMU Version 2+. */
484 	if (guest_get_pmu_version() >= 2)
485 		nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
486 
487 	/*
488 	 * The supported bitmask for fixed counters was introduced in PMU
489 	 * version 5.
490 	 */
491 	if (guest_get_pmu_version() >= 5)
492 		supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK);
493 
494 	guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS,
495 			     nr_fixed_counters, supported_bitmask);
496 
497 	for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) {
498 		uint8_t vector;
499 		uint64_t val;
500 
501 		if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) {
502 			vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL,
503 					    FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
504 			__GUEST_ASSERT(vector == GP_VECTOR,
505 				       "Expected #GP for counter %u in FIXED_CTR_CTRL", i);
506 
507 			vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL,
508 					    FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
509 			__GUEST_ASSERT(vector == GP_VECTOR,
510 				       "Expected #GP for counter %u in PERF_GLOBAL_CTRL", i);
511 			continue;
512 		}
513 
514 		wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
515 		wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
516 		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
517 		__asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
518 		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
519 		val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i);
520 
521 		GUEST_ASSERT_NE(val, 0);
522 	}
523 	GUEST_DONE();
524 }
525 
526 static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
527 				uint8_t nr_fixed_counters,
528 				uint32_t supported_bitmask)
529 {
530 	struct kvm_vcpu *vcpu;
531 	struct kvm_vm *vm;
532 
533 	vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters,
534 					 pmu_version, perf_capabilities);
535 
536 	vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK,
537 				supported_bitmask);
538 	vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS,
539 				nr_fixed_counters);
540 
541 	run_vcpu(vcpu);
542 
543 	kvm_vm_free(vm);
544 }
545 
546 static void test_intel_counters(void)
547 {
548 	uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
549 	uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
550 	uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
551 	uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
552 	unsigned int i;
553 	uint8_t v, j;
554 	uint32_t k;
555 
556 	const uint64_t perf_caps[] = {
557 		0,
558 		PMU_CAP_FW_WRITES,
559 	};
560 
561 	/*
562 	 * Test up to PMU v5, which is the current maximum version defined by
563 	 * Intel, i.e. is the last version that is guaranteed to be backwards
564 	 * compatible with KVM's existing behavior.
565 	 */
566 	uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
567 
568 	/*
569 	 * Detect the existence of events that aren't supported by selftests.
570 	 * This will (obviously) fail any time the kernel adds support for a
571 	 * new event, but it's worth paying that price to keep the test fresh.
572 	 */
573 	TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS,
574 		    "New architectural event(s) detected; please update this test (length = %u, mask = %x)",
575 		    nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
576 
577 	/*
578 	 * Force iterating over known arch events regardless of whether or not
579 	 * KVM/hardware supports a given event.
580 	 */
581 	nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS);
582 
583 	for (v = 0; v <= max_pmu_version; v++) {
584 		for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
585 			if (!kvm_has_perf_caps && perf_caps[i])
586 				continue;
587 
588 			pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
589 				v, perf_caps[i]);
590 			/*
591 			 * To keep the total runtime reasonable, test every
592 			 * possible non-zero, non-reserved bitmap combination
593 			 * only with the native PMU version and the full bit
594 			 * vector length.
595 			 */
596 			if (v == pmu_version) {
597 				for (k = 1; k < (BIT(nr_arch_events) - 1); k++)
598 					test_arch_events(v, perf_caps[i], nr_arch_events, k);
599 			}
600 			/*
601 			 * Test single bits for all PMU version and lengths up
602 			 * the number of events +1 (to verify KVM doesn't do
603 			 * weird things if the guest length is greater than the
604 			 * host length).  Explicitly test a mask of '0' and all
605 			 * ones i.e. all events being available and unavailable.
606 			 */
607 			for (j = 0; j <= nr_arch_events + 1; j++) {
608 				test_arch_events(v, perf_caps[i], j, 0);
609 				test_arch_events(v, perf_caps[i], j, 0xff);
610 
611 				for (k = 0; k < nr_arch_events; k++)
612 					test_arch_events(v, perf_caps[i], j, BIT(k));
613 			}
614 
615 			pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n",
616 				v, perf_caps[i]);
617 			for (j = 0; j <= nr_gp_counters; j++)
618 				test_gp_counters(v, perf_caps[i], j);
619 
620 			pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n",
621 				v, perf_caps[i]);
622 			for (j = 0; j <= nr_fixed_counters; j++) {
623 				for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++)
624 					test_fixed_counters(v, perf_caps[i], j, k);
625 			}
626 		}
627 	}
628 }
629 
630 int main(int argc, char *argv[])
631 {
632 	TEST_REQUIRE(kvm_is_pmu_enabled());
633 
634 	TEST_REQUIRE(host_cpu_is_intel);
635 	TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
636 	TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
637 
638 	kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
639 	kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM);
640 
641 	test_intel_counters();
642 
643 	return 0;
644 }
645