xref: /linux/tools/testing/selftests/kvm/x86/pmu_counters_test.c (revision 6ec982b5a2c7c9f0f956fd955416ac11f52bf50a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2023, Tencent, Inc.
4  */
5 #include <x86intrin.h>
6 
7 #include "pmu.h"
8 #include "processor.h"
9 
10 /* Number of iterations of the loop for the guest measurement payload. */
11 #define NUM_LOOPS			10
12 
13 /* Each iteration of the loop retires one branch instruction. */
14 #define NUM_BRANCH_INSNS_RETIRED	(NUM_LOOPS)
15 
16 /*
17  * Number of instructions in each loop. 1 ENTER, 1 CLFLUSH/CLFLUSHOPT/NOP,
18  * 1 MFENCE, 1 MOV, 1 LEAVE, 1 LOOP.
19  */
20 #define NUM_INSNS_PER_LOOP		6
21 
22 /*
23  * Number of "extra" instructions that will be counted, i.e. the number of
24  * instructions that are needed to set up the loop and then disable the
25  * counter.  2 MOV, 2 XOR, 1 WRMSR.
26  */
27 #define NUM_EXTRA_INSNS			5
28 
29 /* Total number of instructions retired within the measured section. */
30 #define NUM_INSNS_RETIRED		(NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS)
31 
32 /* Track which architectural events are supported by hardware. */
33 static u32 hardware_pmu_arch_events;
34 
35 static u8 kvm_pmu_version;
36 static bool kvm_has_perf_caps;
37 
38 #define X86_PMU_FEATURE_NULL						\
39 ({									\
40 	struct kvm_x86_pmu_feature feature = {};			\
41 									\
42 	feature;							\
43 })
44 
45 static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
46 {
47 	return !(*(u64 *)&event);
48 }
49 
50 struct kvm_intel_pmu_event {
51 	struct kvm_x86_pmu_feature gp_event;
52 	struct kvm_x86_pmu_feature fixed_event;
53 };
54 
55 /*
56  * Wrap the array to appease the compiler, as the macros used to construct each
57  * kvm_x86_pmu_feature use syntax that's only valid in function scope, and the
58  * compiler often thinks the feature definitions aren't compile-time constants.
59  */
60 static struct kvm_intel_pmu_event intel_event_to_feature(u8 idx)
61 {
62 	const struct kvm_intel_pmu_event __intel_event_to_feature[] = {
63 		[INTEL_ARCH_CPU_CYCLES_INDEX]		 = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
64 		[INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX]	 = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
65 		/*
66 		 * Note, the fixed counter for reference cycles is NOT the same as the
67 		 * general purpose architectural event.  The fixed counter explicitly
68 		 * counts at the same frequency as the TSC, whereas the GP event counts
69 		 * at a fixed, but uarch specific, frequency.  Bundle them here for
70 		 * simplicity.
71 		 */
72 		[INTEL_ARCH_REFERENCE_CYCLES_INDEX]	 = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
73 		[INTEL_ARCH_LLC_REFERENCES_INDEX]	 = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
74 		[INTEL_ARCH_LLC_MISSES_INDEX]		 = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
75 		[INTEL_ARCH_BRANCHES_RETIRED_INDEX]	 = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
76 		[INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
77 		[INTEL_ARCH_TOPDOWN_SLOTS_INDEX]	 = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
78 		[INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX]	 = { X86_PMU_FEATURE_TOPDOWN_BE_BOUND, X86_PMU_FEATURE_NULL },
79 		[INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX]	 = { X86_PMU_FEATURE_TOPDOWN_BAD_SPEC, X86_PMU_FEATURE_NULL },
80 		[INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX]	 = { X86_PMU_FEATURE_TOPDOWN_FE_BOUND, X86_PMU_FEATURE_NULL },
81 		[INTEL_ARCH_TOPDOWN_RETIRING_INDEX]	 = { X86_PMU_FEATURE_TOPDOWN_RETIRING, X86_PMU_FEATURE_NULL },
82 		[INTEL_ARCH_LBR_INSERTS_INDEX]		 = { X86_PMU_FEATURE_LBR_INSERTS, X86_PMU_FEATURE_NULL },
83 	};
84 
85 	kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS);
86 
87 	return __intel_event_to_feature[idx];
88 }
89 
90 static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
91 						  void *guest_code,
92 						  u8 pmu_version,
93 						  u64 perf_capabilities)
94 {
95 	struct kvm_vm *vm;
96 
97 	vm = vm_create_with_one_vcpu(vcpu, guest_code);
98 	sync_global_to_guest(vm, kvm_pmu_version);
99 	sync_global_to_guest(vm, hardware_pmu_arch_events);
100 
101 	/*
102 	 * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
103 	 * features via PERF_CAPABILITIES if the guest doesn't have a vPMU.
104 	 */
105 	if (kvm_has_perf_caps)
106 		vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
107 
108 	vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version);
109 	return vm;
110 }
111 
112 static void run_vcpu(struct kvm_vcpu *vcpu)
113 {
114 	struct ucall uc;
115 
116 	do {
117 		vcpu_run(vcpu);
118 		switch (get_ucall(vcpu, &uc)) {
119 		case UCALL_SYNC:
120 			break;
121 		case UCALL_ABORT:
122 			REPORT_GUEST_ASSERT(uc);
123 			break;
124 		case UCALL_PRINTF:
125 			pr_info("%s", uc.buffer);
126 			break;
127 		case UCALL_DONE:
128 			break;
129 		default:
130 			TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
131 		}
132 	} while (uc.cmd != UCALL_DONE);
133 }
134 
135 static u8 guest_get_pmu_version(void)
136 {
137 	/*
138 	 * Return the effective PMU version, i.e. the minimum between what KVM
139 	 * supports and what is enumerated to the guest.  The host deliberately
140 	 * advertises a PMU version to the guest beyond what is actually
141 	 * supported by KVM to verify KVM doesn't freak out and do something
142 	 * bizarre with an architecturally valid, but unsupported, version.
143 	 */
144 	return min_t(u8, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
145 }
146 
147 /*
148  * If an architectural event is supported and guaranteed to generate at least
149  * one "hit, assert that its count is non-zero.  If an event isn't supported or
150  * the test can't guarantee the associated action will occur, then all bets are
151  * off regarding the count, i.e. no checks can be done.
152  *
153  * Sanity check that in all cases, the event doesn't count when it's disabled,
154  * and that KVM correctly emulates the write of an arbitrary value.
155  */
156 static void guest_assert_event_count(u8 idx, u32 pmc, u32 pmc_msr)
157 {
158 	u64 count;
159 
160 	count = _rdpmc(pmc);
161 	if (!(hardware_pmu_arch_events & BIT(idx)))
162 		goto sanity_checks;
163 
164 	switch (idx) {
165 	case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
166 		/* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */
167 		if (this_pmu_has_errata(INSTRUCTIONS_RETIRED_OVERCOUNT))
168 			GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
169 		else
170 			GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
171 		break;
172 	case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
173 		/* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */
174 		if (this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT))
175 			GUEST_ASSERT(count >= NUM_BRANCH_INSNS_RETIRED);
176 		else
177 			GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
178 		break;
179 	case INTEL_ARCH_LLC_REFERENCES_INDEX:
180 	case INTEL_ARCH_LLC_MISSES_INDEX:
181 		if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
182 		    !this_cpu_has(X86_FEATURE_CLFLUSH))
183 			break;
184 		fallthrough;
185 	case INTEL_ARCH_CPU_CYCLES_INDEX:
186 	case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
187 	case INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX:
188 	case INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX:
189 		GUEST_ASSERT_NE(count, 0);
190 		break;
191 	case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
192 	case INTEL_ARCH_TOPDOWN_RETIRING_INDEX:
193 		__GUEST_ASSERT(count >= NUM_INSNS_RETIRED,
194 			       "Expected top-down slots >= %u, got count = %lu",
195 			       NUM_INSNS_RETIRED, count);
196 		break;
197 	default:
198 		break;
199 	}
200 
201 sanity_checks:
202 	__asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
203 	GUEST_ASSERT_EQ(_rdpmc(pmc), count);
204 
205 	wrmsr(pmc_msr, 0xdead);
206 	GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead);
207 }
208 
209 /*
210  * Enable and disable the PMC in a monolithic asm blob to ensure that the
211  * compiler can't insert _any_ code into the measured sequence.  Note, ECX
212  * doesn't need to be clobbered as the input value, @pmc_msr, is restored
213  * before the end of the sequence.
214  *
215  * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
216  * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and
217  * misses, i.e. to allow testing that those events actually count.
218  *
219  * If forced emulation is enabled (and specified), force emulation on a subset
220  * of the measured code to verify that KVM correctly emulates instructions and
221  * branches retired events in conjunction with hardware also counting said
222  * events.
223  */
224 #define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP)				\
225 do {										\
226 	__asm__ __volatile__("wrmsr\n\t"					\
227 			     " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t"	\
228 			     "1:\n\t"						\
229 			     FEP "enter $0, $0\n\t"				\
230 			     clflush "\n\t"					\
231 			     "mfence\n\t"					\
232 			     "mov %[m], %%eax\n\t"				\
233 			     FEP "leave\n\t"					\
234 			     FEP "loop 1b\n\t"					\
235 			     FEP "mov %%edi, %%ecx\n\t"				\
236 			     FEP "xor %%eax, %%eax\n\t"				\
237 			     FEP "xor %%edx, %%edx\n\t"				\
238 			     "wrmsr\n\t"					\
239 			     :: "a"((u32)_value), "d"(_value >> 32),	\
240 				"c"(_msr), "D"(_msr), [m]"m"(kvm_pmu_version)	\
241 	);									\
242 } while (0)
243 
244 #define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP)		\
245 do {										\
246 	wrmsr(_pmc_msr, 0);							\
247 										\
248 	if (this_cpu_has(X86_FEATURE_CLFLUSHOPT))				\
249 		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP);	\
250 	else if (this_cpu_has(X86_FEATURE_CLFLUSH))				\
251 		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush  %[m]", FEP);	\
252 	else									\
253 		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP);		\
254 										\
255 	guest_assert_event_count(_idx, _pmc, _pmc_msr);				\
256 } while (0)
257 
258 static void __guest_test_arch_event(u8 idx, u32 pmc, u32 pmc_msr,
259 				    u32 ctrl_msr, u64 ctrl_msr_value)
260 {
261 	GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
262 
263 	if (is_forced_emulation_enabled)
264 		GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
265 }
266 
267 static void guest_test_arch_event(u8 idx)
268 {
269 	u32 nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
270 	u32 pmu_version = guest_get_pmu_version();
271 	/* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
272 	bool guest_has_perf_global_ctrl = pmu_version >= 2;
273 	struct kvm_x86_pmu_feature gp_event, fixed_event;
274 	u32 base_pmc_msr;
275 	unsigned int i;
276 
277 	/* The host side shouldn't invoke this without a guest PMU. */
278 	GUEST_ASSERT(pmu_version);
279 
280 	if (this_cpu_has(X86_FEATURE_PDCM) &&
281 	    rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
282 		base_pmc_msr = MSR_IA32_PMC0;
283 	else
284 		base_pmc_msr = MSR_IA32_PERFCTR0;
285 
286 	gp_event = intel_event_to_feature(idx).gp_event;
287 	GUEST_ASSERT_EQ(idx, gp_event.f.bit);
288 
289 	GUEST_ASSERT(nr_gp_counters);
290 
291 	for (i = 0; i < nr_gp_counters; i++) {
292 		u64 eventsel = ARCH_PERFMON_EVENTSEL_OS |
293 				    ARCH_PERFMON_EVENTSEL_ENABLE |
294 				    intel_pmu_arch_events[idx];
295 
296 		wrmsr(MSR_P6_EVNTSEL0 + i, 0);
297 		if (guest_has_perf_global_ctrl)
298 			wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
299 
300 		__guest_test_arch_event(idx, i, base_pmc_msr + i,
301 					MSR_P6_EVNTSEL0 + i, eventsel);
302 	}
303 
304 	if (!guest_has_perf_global_ctrl)
305 		return;
306 
307 	fixed_event = intel_event_to_feature(idx).fixed_event;
308 	if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
309 		return;
310 
311 	i = fixed_event.f.bit;
312 
313 	wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
314 
315 	__guest_test_arch_event(idx, i | INTEL_RDPMC_FIXED,
316 				MSR_CORE_PERF_FIXED_CTR0 + i,
317 				MSR_CORE_PERF_GLOBAL_CTRL,
318 				FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
319 }
320 
321 static void guest_test_arch_events(void)
322 {
323 	u8 i;
324 
325 	for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
326 		guest_test_arch_event(i);
327 
328 	GUEST_DONE();
329 }
330 
331 static void test_arch_events(u8 pmu_version, u64 perf_capabilities,
332 			     u8 length, u32 unavailable_mask)
333 {
334 	struct kvm_vcpu *vcpu;
335 	struct kvm_vm *vm;
336 
337 	/* Testing arch events requires a vPMU (there are no negative tests). */
338 	if (!pmu_version)
339 		return;
340 
341 	unavailable_mask &= GENMASK(X86_PROPERTY_PMU_EVENTS_MASK.hi_bit,
342 				    X86_PROPERTY_PMU_EVENTS_MASK.lo_bit);
343 
344 	vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
345 					 pmu_version, perf_capabilities);
346 
347 	vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH,
348 				length);
349 	vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK,
350 				unavailable_mask);
351 
352 	run_vcpu(vcpu);
353 
354 	kvm_vm_free(vm);
355 }
356 
357 /*
358  * Limit testing to MSRs that are actually defined by Intel (in the SDM).  MSRs
359  * that aren't defined counter MSRs *probably* don't exist, but there's no
360  * guarantee that currently undefined MSR indices won't be used for something
361  * other than PMCs in the future.
362  */
363 #define MAX_NR_GP_COUNTERS	8
364 #define MAX_NR_FIXED_COUNTERS	3
365 
366 #define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector)		\
367 __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector,			\
368 	       "Expected %s on " #insn "(0x%x), got %s",			\
369 	       expect_gp ? "#GP" : "no fault", msr, ex_str(vector))		\
370 
371 #define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected)			\
372 	__GUEST_ASSERT(val == expected,					\
373 		       "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx",	\
374 		       msr, expected, val);
375 
376 static void guest_test_rdpmc(u32 rdpmc_idx, bool expect_success,
377 			     u64 expected_val)
378 {
379 	u8 vector;
380 	u64 val;
381 
382 	vector = rdpmc_safe(rdpmc_idx, &val);
383 	GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
384 	if (expect_success)
385 		GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
386 
387 	if (!is_forced_emulation_enabled)
388 		return;
389 
390 	vector = rdpmc_safe_fep(rdpmc_idx, &val);
391 	GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
392 	if (expect_success)
393 		GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
394 }
395 
396 static void guest_rd_wr_counters(u32 base_msr, u8 nr_possible_counters,
397 				 u8 nr_counters, u32 or_mask)
398 {
399 	const bool pmu_has_fast_mode = !guest_get_pmu_version();
400 	u8 i;
401 
402 	for (i = 0; i < nr_possible_counters; i++) {
403 		/*
404 		 * TODO: Test a value that validates full-width writes and the
405 		 * width of the counters.
406 		 */
407 		const u64 test_val = 0xffff;
408 		const u32 msr = base_msr + i;
409 
410 		/*
411 		 * Fixed counters are supported if the counter is less than the
412 		 * number of enumerated contiguous counters *or* the counter is
413 		 * explicitly enumerated in the supported counters mask.
414 		 */
415 		const bool expect_success = i < nr_counters || (or_mask & BIT(i));
416 
417 		/*
418 		 * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are
419 		 * unsupported, i.e. doesn't #GP and reads back '0'.
420 		 */
421 		const u64 expected_val = expect_success ? test_val : 0;
422 		const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
423 				       msr != MSR_P6_PERFCTR1;
424 		u32 rdpmc_idx;
425 		u8 vector;
426 		u64 val;
427 
428 		vector = wrmsr_safe(msr, test_val);
429 		GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
430 
431 		vector = rdmsr_safe(msr, &val);
432 		GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector);
433 
434 		/* On #GP, the result of RDMSR is undefined. */
435 		if (!expect_gp)
436 			GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val);
437 
438 		/*
439 		 * Redo the read tests with RDPMC, which has different indexing
440 		 * semantics and additional capabilities.
441 		 */
442 		rdpmc_idx = i;
443 		if (base_msr == MSR_CORE_PERF_FIXED_CTR0)
444 			rdpmc_idx |= INTEL_RDPMC_FIXED;
445 
446 		guest_test_rdpmc(rdpmc_idx, expect_success, expected_val);
447 
448 		/*
449 		 * KVM doesn't support non-architectural PMUs, i.e. it should
450 		 * impossible to have fast mode RDPMC.  Verify that attempting
451 		 * to use fast RDPMC always #GPs.
452 		 */
453 		GUEST_ASSERT(!expect_success || !pmu_has_fast_mode);
454 		rdpmc_idx |= INTEL_RDPMC_FAST;
455 		guest_test_rdpmc(rdpmc_idx, false, -1ull);
456 
457 		vector = wrmsr_safe(msr, 0);
458 		GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
459 	}
460 }
461 
462 static void guest_test_gp_counters(void)
463 {
464 	u8 pmu_version = guest_get_pmu_version();
465 	u8 nr_gp_counters = 0;
466 	u32 base_msr;
467 
468 	if (pmu_version)
469 		nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
470 
471 	/*
472 	 * For v2+ PMUs, PERF_GLOBAL_CTRL's architectural post-RESET value is
473 	 * "Sets bits n-1:0 and clears the upper bits", where 'n' is the number
474 	 * of GP counters.  If there are no GP counters, require KVM to leave
475 	 * PERF_GLOBAL_CTRL '0'.  This edge case isn't covered by the SDM, but
476 	 * follow the spirit of the architecture and only globally enable GP
477 	 * counters, of which there are none.
478 	 */
479 	if (pmu_version > 1) {
480 		u64 global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL);
481 
482 		if (nr_gp_counters)
483 			GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0));
484 		else
485 			GUEST_ASSERT_EQ(global_ctrl, 0);
486 	}
487 
488 	if (this_cpu_has(X86_FEATURE_PDCM) &&
489 	    rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
490 		base_msr = MSR_IA32_PMC0;
491 	else
492 		base_msr = MSR_IA32_PERFCTR0;
493 
494 	guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0);
495 	GUEST_DONE();
496 }
497 
498 static void test_gp_counters(u8 pmu_version, u64 perf_capabilities,
499 			     u8 nr_gp_counters)
500 {
501 	struct kvm_vcpu *vcpu;
502 	struct kvm_vm *vm;
503 
504 	vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters,
505 					 pmu_version, perf_capabilities);
506 
507 	vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS,
508 				nr_gp_counters);
509 
510 	run_vcpu(vcpu);
511 
512 	kvm_vm_free(vm);
513 }
514 
515 static void guest_test_fixed_counters(void)
516 {
517 	u64 supported_bitmask = 0;
518 	u8 nr_fixed_counters = 0;
519 	u8 i;
520 
521 	/* Fixed counters require Architectural vPMU Version 2+. */
522 	if (guest_get_pmu_version() >= 2)
523 		nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
524 
525 	/*
526 	 * The supported bitmask for fixed counters was introduced in PMU
527 	 * version 5.
528 	 */
529 	if (guest_get_pmu_version() >= 5)
530 		supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK);
531 
532 	guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS,
533 			     nr_fixed_counters, supported_bitmask);
534 
535 	for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) {
536 		u8 vector;
537 		u64 val;
538 
539 		if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) {
540 			vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL,
541 					    FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
542 			__GUEST_ASSERT(vector == GP_VECTOR,
543 				       "Expected #GP for counter %u in FIXED_CTR_CTRL", i);
544 
545 			vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL,
546 					    FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
547 			__GUEST_ASSERT(vector == GP_VECTOR,
548 				       "Expected #GP for counter %u in PERF_GLOBAL_CTRL", i);
549 			continue;
550 		}
551 
552 		wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
553 		wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
554 		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
555 		__asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
556 		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
557 		val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i);
558 
559 		GUEST_ASSERT_NE(val, 0);
560 	}
561 	GUEST_DONE();
562 }
563 
564 static void test_fixed_counters(u8 pmu_version, u64 perf_capabilities,
565 				u8 nr_fixed_counters, u32 supported_bitmask)
566 {
567 	struct kvm_vcpu *vcpu;
568 	struct kvm_vm *vm;
569 
570 	vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters,
571 					 pmu_version, perf_capabilities);
572 
573 	vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK,
574 				supported_bitmask);
575 	vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS,
576 				nr_fixed_counters);
577 
578 	run_vcpu(vcpu);
579 
580 	kvm_vm_free(vm);
581 }
582 
583 static void test_intel_counters(void)
584 {
585 	u8 nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
586 	u8 nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
587 	u8 pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
588 	unsigned int i;
589 	u8 v, j;
590 	u32 k;
591 
592 	const u64 perf_caps[] = {
593 		0,
594 		PMU_CAP_FW_WRITES,
595 	};
596 
597 	/*
598 	 * To keep the total runtime reasonable, test only a handful of select,
599 	 * semi-arbitrary values for the mask of unavailable PMU events.  Test
600 	 * 0 (all events available) and all ones (no events available) as well
601 	 * as alternating bit sequencues, e.g. to detect if KVM is checking the
602 	 * wrong bit(s).
603 	 */
604 	const u32 unavailable_masks[] = {
605 		0x0,
606 		0xffffffffu,
607 		0xaaaaaaaau,
608 		0x55555555u,
609 		0xf0f0f0f0u,
610 		0x0f0f0f0fu,
611 		0xa0a0a0a0u,
612 		0x0a0a0a0au,
613 		0x50505050u,
614 		0x05050505u,
615 	};
616 
617 	/*
618 	 * Test up to PMU v5, which is the current maximum version defined by
619 	 * Intel, i.e. is the last version that is guaranteed to be backwards
620 	 * compatible with KVM's existing behavior.
621 	 */
622 	u8 max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
623 
624 	/*
625 	 * Detect the existence of events that aren't supported by selftests.
626 	 * This will (obviously) fail any time hardware adds support for a new
627 	 * event, but it's worth paying that price to keep the test fresh.
628 	 */
629 	TEST_ASSERT(this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) <= NR_INTEL_ARCH_EVENTS,
630 		    "New architectural event(s) detected; please update this test (length = %u, mask = %x)",
631 		    this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH),
632 		    this_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
633 
634 	/*
635 	 * Iterate over known arch events irrespective of KVM/hardware support
636 	 * to verify that KVM doesn't reject programming of events just because
637 	 * the *architectural* encoding is unsupported.  Track which events are
638 	 * supported in hardware; the guest side will validate supported events
639 	 * count correctly, even if *enumeration* of the event is unsupported
640 	 * by KVM and/or isn't exposed to the guest.
641 	 */
642 	for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) {
643 		if (this_pmu_has(intel_event_to_feature(i).gp_event))
644 			hardware_pmu_arch_events |= BIT(i);
645 	}
646 
647 	for (v = 0; v <= max_pmu_version; v++) {
648 		for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
649 			if (!kvm_has_perf_caps && perf_caps[i])
650 				continue;
651 
652 			pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
653 				v, perf_caps[i]);
654 
655 			/*
656 			 * Test single bits for all PMU version and lengths up
657 			 * the number of events +1 (to verify KVM doesn't do
658 			 * weird things if the guest length is greater than the
659 			 * host length).  Explicitly test a mask of '0' and all
660 			 * ones i.e. all events being available and unavailable.
661 			 */
662 			for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) {
663 				for (k = 1; k < ARRAY_SIZE(unavailable_masks); k++)
664 					test_arch_events(v, perf_caps[i], j, unavailable_masks[k]);
665 			}
666 
667 			pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n",
668 				v, perf_caps[i]);
669 			for (j = 0; j <= nr_gp_counters; j++)
670 				test_gp_counters(v, perf_caps[i], j);
671 
672 			pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n",
673 				v, perf_caps[i]);
674 			for (j = 0; j <= nr_fixed_counters; j++) {
675 				for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++)
676 					test_fixed_counters(v, perf_caps[i], j, k);
677 			}
678 		}
679 	}
680 }
681 
682 int main(int argc, char *argv[])
683 {
684 	TEST_REQUIRE(kvm_is_pmu_enabled());
685 
686 	TEST_REQUIRE(host_cpu_is_intel);
687 	TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
688 	TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
689 
690 	kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
691 	kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM);
692 
693 	test_intel_counters();
694 
695 	return 0;
696 }
697