xref: /linux/arch/arm64/kvm/pmu-emul.c (revision 132db93572821ec2fdf81e354cc40f558faf7e4f)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2015 Linaro Ltd.
4  * Author: Shannon Zhao <shannon.zhao@linaro.org>
5  */
6 
7 #include <linux/cpu.h>
8 #include <linux/kvm.h>
9 #include <linux/kvm_host.h>
10 #include <linux/perf_event.h>
11 #include <linux/perf/arm_pmu.h>
12 #include <linux/uaccess.h>
13 #include <asm/kvm_emulate.h>
14 #include <kvm/arm_pmu.h>
15 #include <kvm/arm_vgic.h>
16 
17 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
18 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
19 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
20 
21 #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
22 
23 /**
24  * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter
25  * @vcpu: The vcpu pointer
26  * @select_idx: The counter index
27  */
28 static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
29 {
30 	return (select_idx == ARMV8_PMU_CYCLE_IDX &&
31 		__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
32 }
33 
34 static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
35 {
36 	struct kvm_pmu *pmu;
37 	struct kvm_vcpu_arch *vcpu_arch;
38 
39 	pmc -= pmc->idx;
40 	pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
41 	vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
42 	return container_of(vcpu_arch, struct kvm_vcpu, arch);
43 }
44 
45 /**
46  * kvm_pmu_pmc_is_chained - determine if the pmc is chained
47  * @pmc: The PMU counter pointer
48  */
49 static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
50 {
51 	struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
52 
53 	return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
54 }
55 
56 /**
57  * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter
58  * @select_idx: The counter index
59  */
60 static bool kvm_pmu_idx_is_high_counter(u64 select_idx)
61 {
62 	return select_idx & 0x1;
63 }
64 
65 /**
66  * kvm_pmu_get_canonical_pmc - obtain the canonical pmc
67  * @pmc: The PMU counter pointer
68  *
69  * When a pair of PMCs are chained together we use the low counter (canonical)
70  * to hold the underlying perf event.
71  */
72 static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
73 {
74 	if (kvm_pmu_pmc_is_chained(pmc) &&
75 	    kvm_pmu_idx_is_high_counter(pmc->idx))
76 		return pmc - 1;
77 
78 	return pmc;
79 }
80 static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc)
81 {
82 	if (kvm_pmu_idx_is_high_counter(pmc->idx))
83 		return pmc - 1;
84 	else
85 		return pmc + 1;
86 }
87 
88 /**
89  * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
90  * @vcpu: The vcpu pointer
91  * @select_idx: The counter index
92  */
93 static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)
94 {
95 	u64 eventsel, reg;
96 
97 	select_idx |= 0x1;
98 
99 	if (select_idx == ARMV8_PMU_CYCLE_IDX)
100 		return false;
101 
102 	reg = PMEVTYPER0_EL0 + select_idx;
103 	eventsel = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_EVENT;
104 
105 	return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
106 }
107 
108 /**
109  * kvm_pmu_get_pair_counter_value - get PMU counter value
110  * @vcpu: The vcpu pointer
111  * @pmc: The PMU counter pointer
112  */
113 static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
114 					  struct kvm_pmc *pmc)
115 {
116 	u64 counter, counter_high, reg, enabled, running;
117 
118 	if (kvm_pmu_pmc_is_chained(pmc)) {
119 		pmc = kvm_pmu_get_canonical_pmc(pmc);
120 		reg = PMEVCNTR0_EL0 + pmc->idx;
121 
122 		counter = __vcpu_sys_reg(vcpu, reg);
123 		counter_high = __vcpu_sys_reg(vcpu, reg + 1);
124 
125 		counter = lower_32_bits(counter) | (counter_high << 32);
126 	} else {
127 		reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
128 		      ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
129 		counter = __vcpu_sys_reg(vcpu, reg);
130 	}
131 
132 	/*
133 	 * The real counter value is equal to the value of counter register plus
134 	 * the value perf event counts.
135 	 */
136 	if (pmc->perf_event)
137 		counter += perf_event_read_value(pmc->perf_event, &enabled,
138 						 &running);
139 
140 	return counter;
141 }
142 
143 /**
144  * kvm_pmu_get_counter_value - get PMU counter value
145  * @vcpu: The vcpu pointer
146  * @select_idx: The counter index
147  */
148 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
149 {
150 	u64 counter;
151 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
152 	struct kvm_pmc *pmc = &pmu->pmc[select_idx];
153 
154 	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
155 
156 	if (kvm_pmu_pmc_is_chained(pmc) &&
157 	    kvm_pmu_idx_is_high_counter(select_idx))
158 		counter = upper_32_bits(counter);
159 	else if (select_idx != ARMV8_PMU_CYCLE_IDX)
160 		counter = lower_32_bits(counter);
161 
162 	return counter;
163 }
164 
165 /**
166  * kvm_pmu_set_counter_value - set PMU counter value
167  * @vcpu: The vcpu pointer
168  * @select_idx: The counter index
169  * @val: The counter value
170  */
171 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
172 {
173 	u64 reg;
174 
175 	reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
176 	      ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
177 	__vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
178 
179 	/* Recreate the perf event to reflect the updated sample_period */
180 	kvm_pmu_create_perf_event(vcpu, select_idx);
181 }
182 
183 /**
184  * kvm_pmu_release_perf_event - remove the perf event
185  * @pmc: The PMU counter pointer
186  */
187 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
188 {
189 	pmc = kvm_pmu_get_canonical_pmc(pmc);
190 	if (pmc->perf_event) {
191 		perf_event_disable(pmc->perf_event);
192 		perf_event_release_kernel(pmc->perf_event);
193 		pmc->perf_event = NULL;
194 	}
195 }
196 
197 /**
198  * kvm_pmu_stop_counter - stop PMU counter
199  * @pmc: The PMU counter pointer
200  *
201  * If this counter has been configured to monitor some event, release it here.
202  */
203 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
204 {
205 	u64 counter, reg, val;
206 
207 	pmc = kvm_pmu_get_canonical_pmc(pmc);
208 	if (!pmc->perf_event)
209 		return;
210 
211 	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
212 
213 	if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
214 		reg = PMCCNTR_EL0;
215 		val = counter;
216 	} else {
217 		reg = PMEVCNTR0_EL0 + pmc->idx;
218 		val = lower_32_bits(counter);
219 	}
220 
221 	__vcpu_sys_reg(vcpu, reg) = val;
222 
223 	if (kvm_pmu_pmc_is_chained(pmc))
224 		__vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
225 
226 	kvm_pmu_release_perf_event(pmc);
227 }
228 
229 /**
230  * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
231  * @vcpu: The vcpu pointer
232  *
233  */
234 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
235 {
236 	int i;
237 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
238 
239 	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
240 		pmu->pmc[i].idx = i;
241 }
242 
243 /**
244  * kvm_pmu_vcpu_reset - reset pmu state for cpu
245  * @vcpu: The vcpu pointer
246  *
247  */
248 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
249 {
250 	unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
251 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
252 	int i;
253 
254 	for_each_set_bit(i, &mask, 32)
255 		kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
256 
257 	bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
258 }
259 
260 /**
261  * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
262  * @vcpu: The vcpu pointer
263  *
264  */
265 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
266 {
267 	int i;
268 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
269 
270 	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
271 		kvm_pmu_release_perf_event(&pmu->pmc[i]);
272 }
273 
274 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
275 {
276 	u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
277 
278 	val &= ARMV8_PMU_PMCR_N_MASK;
279 	if (val == 0)
280 		return BIT(ARMV8_PMU_CYCLE_IDX);
281 	else
282 		return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
283 }
284 
285 /**
286  * kvm_pmu_enable_counter_mask - enable selected PMU counters
287  * @vcpu: The vcpu pointer
288  * @val: the value guest writes to PMCNTENSET register
289  *
290  * Call perf_event_enable to start counting the perf event
291  */
292 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
293 {
294 	int i;
295 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
296 	struct kvm_pmc *pmc;
297 
298 	if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
299 		return;
300 
301 	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
302 		if (!(val & BIT(i)))
303 			continue;
304 
305 		pmc = &pmu->pmc[i];
306 
307 		/* A change in the enable state may affect the chain state */
308 		kvm_pmu_update_pmc_chained(vcpu, i);
309 		kvm_pmu_create_perf_event(vcpu, i);
310 
311 		/* At this point, pmc must be the canonical */
312 		if (pmc->perf_event) {
313 			perf_event_enable(pmc->perf_event);
314 			if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
315 				kvm_debug("fail to enable perf event\n");
316 		}
317 	}
318 }
319 
320 /**
321  * kvm_pmu_disable_counter_mask - disable selected PMU counters
322  * @vcpu: The vcpu pointer
323  * @val: the value guest writes to PMCNTENCLR register
324  *
325  * Call perf_event_disable to stop counting the perf event
326  */
327 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
328 {
329 	int i;
330 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
331 	struct kvm_pmc *pmc;
332 
333 	if (!val)
334 		return;
335 
336 	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
337 		if (!(val & BIT(i)))
338 			continue;
339 
340 		pmc = &pmu->pmc[i];
341 
342 		/* A change in the enable state may affect the chain state */
343 		kvm_pmu_update_pmc_chained(vcpu, i);
344 		kvm_pmu_create_perf_event(vcpu, i);
345 
346 		/* At this point, pmc must be the canonical */
347 		if (pmc->perf_event)
348 			perf_event_disable(pmc->perf_event);
349 	}
350 }
351 
352 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
353 {
354 	u64 reg = 0;
355 
356 	if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
357 		reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
358 		reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
359 		reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
360 		reg &= kvm_pmu_valid_counter_mask(vcpu);
361 	}
362 
363 	return reg;
364 }
365 
366 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
367 {
368 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
369 	bool overflow;
370 
371 	if (!kvm_arm_pmu_v3_ready(vcpu))
372 		return;
373 
374 	overflow = !!kvm_pmu_overflow_status(vcpu);
375 	if (pmu->irq_level == overflow)
376 		return;
377 
378 	pmu->irq_level = overflow;
379 
380 	if (likely(irqchip_in_kernel(vcpu->kvm))) {
381 		int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
382 					      pmu->irq_num, overflow, pmu);
383 		WARN_ON(ret);
384 	}
385 }
386 
387 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
388 {
389 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
390 	struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
391 	bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;
392 
393 	if (likely(irqchip_in_kernel(vcpu->kvm)))
394 		return false;
395 
396 	return pmu->irq_level != run_level;
397 }
398 
399 /*
400  * Reflect the PMU overflow interrupt output level into the kvm_run structure
401  */
402 void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
403 {
404 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
405 
406 	/* Populate the timer bitmap for user space */
407 	regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
408 	if (vcpu->arch.pmu.irq_level)
409 		regs->device_irq_level |= KVM_ARM_DEV_PMU;
410 }
411 
412 /**
413  * kvm_pmu_flush_hwstate - flush pmu state to cpu
414  * @vcpu: The vcpu pointer
415  *
416  * Check if the PMU has overflowed while we were running in the host, and inject
417  * an interrupt if that was the case.
418  */
419 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu)
420 {
421 	kvm_pmu_update_state(vcpu);
422 }
423 
424 /**
425  * kvm_pmu_sync_hwstate - sync pmu state from cpu
426  * @vcpu: The vcpu pointer
427  *
428  * Check if the PMU has overflowed while we were running in the guest, and
429  * inject an interrupt if that was the case.
430  */
431 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
432 {
433 	kvm_pmu_update_state(vcpu);
434 }
435 
436 /**
437  * When the perf event overflows, set the overflow status and inform the vcpu.
438  */
439 static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
440 				  struct perf_sample_data *data,
441 				  struct pt_regs *regs)
442 {
443 	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
444 	struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
445 	struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
446 	int idx = pmc->idx;
447 	u64 period;
448 
449 	cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);
450 
451 	/*
452 	 * Reset the sample period to the architectural limit,
453 	 * i.e. the point where the counter overflows.
454 	 */
455 	period = -(local64_read(&perf_event->count));
456 
457 	if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
458 		period &= GENMASK(31, 0);
459 
460 	local64_set(&perf_event->hw.period_left, 0);
461 	perf_event->attr.sample_period = period;
462 	perf_event->hw.sample_period = period;
463 
464 	__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
465 
466 	if (kvm_pmu_overflow_status(vcpu)) {
467 		kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
468 		kvm_vcpu_kick(vcpu);
469 	}
470 
471 	cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
472 }
473 
474 /**
475  * kvm_pmu_software_increment - do software increment
476  * @vcpu: The vcpu pointer
477  * @val: the value guest writes to PMSWINC register
478  */
479 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
480 {
481 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
482 	int i;
483 
484 	if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
485 		return;
486 
487 	/* Weed out disabled counters */
488 	val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
489 
490 	for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
491 		u64 type, reg;
492 
493 		if (!(val & BIT(i)))
494 			continue;
495 
496 		/* PMSWINC only applies to ... SW_INC! */
497 		type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
498 		type &= ARMV8_PMU_EVTYPE_EVENT;
499 		if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)
500 			continue;
501 
502 		/* increment this even SW_INC counter */
503 		reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
504 		reg = lower_32_bits(reg);
505 		__vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
506 
507 		if (reg) /* no overflow on the low part */
508 			continue;
509 
510 		if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) {
511 			/* increment the high counter */
512 			reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
513 			reg = lower_32_bits(reg);
514 			__vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
515 			if (!reg) /* mark overflow on the high counter */
516 				__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
517 		} else {
518 			/* mark overflow on low counter */
519 			__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
520 		}
521 	}
522 }
523 
524 /**
525  * kvm_pmu_handle_pmcr - handle PMCR register
526  * @vcpu: The vcpu pointer
527  * @val: the value guest writes to PMCR register
528  */
529 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
530 {
531 	unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
532 	int i;
533 
534 	if (val & ARMV8_PMU_PMCR_E) {
535 		kvm_pmu_enable_counter_mask(vcpu,
536 		       __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
537 	} else {
538 		kvm_pmu_disable_counter_mask(vcpu, mask);
539 	}
540 
541 	if (val & ARMV8_PMU_PMCR_C)
542 		kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
543 
544 	if (val & ARMV8_PMU_PMCR_P) {
545 		for_each_set_bit(i, &mask, 32)
546 			kvm_pmu_set_counter_value(vcpu, i, 0);
547 	}
548 }
549 
550 static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
551 {
552 	return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
553 	       (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
554 }
555 
556 /**
557  * kvm_pmu_create_perf_event - create a perf event for a counter
558  * @vcpu: The vcpu pointer
559  * @select_idx: The number of selected counter
560  */
561 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
562 {
563 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
564 	struct kvm_pmc *pmc;
565 	struct perf_event *event;
566 	struct perf_event_attr attr;
567 	u64 eventsel, counter, reg, data;
568 
569 	/*
570 	 * For chained counters the event type and filtering attributes are
571 	 * obtained from the low/even counter. We also use this counter to
572 	 * determine if the event is enabled/disabled.
573 	 */
574 	pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]);
575 
576 	reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
577 	      ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx;
578 	data = __vcpu_sys_reg(vcpu, reg);
579 
580 	kvm_pmu_stop_counter(vcpu, pmc);
581 	eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
582 
583 	/* Software increment event does't need to be backed by a perf event */
584 	if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
585 	    pmc->idx != ARMV8_PMU_CYCLE_IDX)
586 		return;
587 
588 	memset(&attr, 0, sizeof(struct perf_event_attr));
589 	attr.type = PERF_TYPE_RAW;
590 	attr.size = sizeof(attr);
591 	attr.pinned = 1;
592 	attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
593 	attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
594 	attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
595 	attr.exclude_hv = 1; /* Don't count EL2 events */
596 	attr.exclude_host = 1; /* Don't count host events */
597 	attr.config = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ?
598 		ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
599 
600 	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
601 
602 	if (kvm_pmu_pmc_is_chained(pmc)) {
603 		/**
604 		 * The initial sample period (overflow count) of an event. For
605 		 * chained counters we only support overflow interrupts on the
606 		 * high counter.
607 		 */
608 		attr.sample_period = (-counter) & GENMASK(63, 0);
609 		attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
610 
611 		event = perf_event_create_kernel_counter(&attr, -1, current,
612 							 kvm_pmu_perf_overflow,
613 							 pmc + 1);
614 	} else {
615 		/* The initial sample period (overflow count) of an event. */
616 		if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
617 			attr.sample_period = (-counter) & GENMASK(63, 0);
618 		else
619 			attr.sample_period = (-counter) & GENMASK(31, 0);
620 
621 		event = perf_event_create_kernel_counter(&attr, -1, current,
622 						 kvm_pmu_perf_overflow, pmc);
623 	}
624 
625 	if (IS_ERR(event)) {
626 		pr_err_once("kvm: pmu event creation failed %ld\n",
627 			    PTR_ERR(event));
628 		return;
629 	}
630 
631 	pmc->perf_event = event;
632 }
633 
634 /**
635  * kvm_pmu_update_pmc_chained - update chained bitmap
636  * @vcpu: The vcpu pointer
637  * @select_idx: The number of selected counter
638  *
639  * Update the chained bitmap based on the event type written in the
640  * typer register and the enable state of the odd register.
641  */
642 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
643 {
644 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
645 	struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc;
646 	bool new_state, old_state;
647 
648 	old_state = kvm_pmu_pmc_is_chained(pmc);
649 	new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
650 		    kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1);
651 
652 	if (old_state == new_state)
653 		return;
654 
655 	canonical_pmc = kvm_pmu_get_canonical_pmc(pmc);
656 	kvm_pmu_stop_counter(vcpu, canonical_pmc);
657 	if (new_state) {
658 		/*
659 		 * During promotion from !chained to chained we must ensure
660 		 * the adjacent counter is stopped and its event destroyed
661 		 */
662 		kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc));
663 		set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
664 		return;
665 	}
666 	clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
667 }
668 
669 /**
670  * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
671  * @vcpu: The vcpu pointer
672  * @data: The data guest writes to PMXEVTYPER_EL0
673  * @select_idx: The number of selected counter
674  *
675  * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
676  * event with given hardware event number. Here we call perf_event API to
677  * emulate this action and create a kernel perf event for it.
678  */
679 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
680 				    u64 select_idx)
681 {
682 	u64 reg, event_type = data & ARMV8_PMU_EVTYPE_MASK;
683 
684 	reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
685 	      ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;
686 
687 	__vcpu_sys_reg(vcpu, reg) = event_type;
688 
689 	kvm_pmu_update_pmc_chained(vcpu, select_idx);
690 	kvm_pmu_create_perf_event(vcpu, select_idx);
691 }
692 
693 bool kvm_arm_support_pmu_v3(void)
694 {
695 	/*
696 	 * Check if HW_PERF_EVENTS are supported by checking the number of
697 	 * hardware performance counters. This could ensure the presence of
698 	 * a physical PMU and CONFIG_PERF_EVENT is selected.
699 	 */
700 	return (perf_num_counters() > 0);
701 }
702 
703 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
704 {
705 	if (!vcpu->arch.pmu.created)
706 		return 0;
707 
708 	/*
709 	 * A valid interrupt configuration for the PMU is either to have a
710 	 * properly configured interrupt number and using an in-kernel
711 	 * irqchip, or to not have an in-kernel GIC and not set an IRQ.
712 	 */
713 	if (irqchip_in_kernel(vcpu->kvm)) {
714 		int irq = vcpu->arch.pmu.irq_num;
715 		if (!kvm_arm_pmu_irq_initialized(vcpu))
716 			return -EINVAL;
717 
718 		/*
719 		 * If we are using an in-kernel vgic, at this point we know
720 		 * the vgic will be initialized, so we can check the PMU irq
721 		 * number against the dimensions of the vgic and make sure
722 		 * it's valid.
723 		 */
724 		if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
725 			return -EINVAL;
726 	} else if (kvm_arm_pmu_irq_initialized(vcpu)) {
727 		   return -EINVAL;
728 	}
729 
730 	kvm_pmu_vcpu_reset(vcpu);
731 	vcpu->arch.pmu.ready = true;
732 
733 	return 0;
734 }
735 
736 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
737 {
738 	if (!kvm_arm_support_pmu_v3())
739 		return -ENODEV;
740 
741 	if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
742 		return -ENXIO;
743 
744 	if (vcpu->arch.pmu.created)
745 		return -EBUSY;
746 
747 	if (irqchip_in_kernel(vcpu->kvm)) {
748 		int ret;
749 
750 		/*
751 		 * If using the PMU with an in-kernel virtual GIC
752 		 * implementation, we require the GIC to be already
753 		 * initialized when initializing the PMU.
754 		 */
755 		if (!vgic_initialized(vcpu->kvm))
756 			return -ENODEV;
757 
758 		if (!kvm_arm_pmu_irq_initialized(vcpu))
759 			return -ENXIO;
760 
761 		ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
762 					 &vcpu->arch.pmu);
763 		if (ret)
764 			return ret;
765 	}
766 
767 	vcpu->arch.pmu.created = true;
768 	return 0;
769 }
770 
771 /*
772  * For one VM the interrupt type must be same for each vcpu.
773  * As a PPI, the interrupt number is the same for all vcpus,
774  * while as an SPI it must be a separate number per vcpu.
775  */
776 static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
777 {
778 	int i;
779 	struct kvm_vcpu *vcpu;
780 
781 	kvm_for_each_vcpu(i, vcpu, kvm) {
782 		if (!kvm_arm_pmu_irq_initialized(vcpu))
783 			continue;
784 
785 		if (irq_is_ppi(irq)) {
786 			if (vcpu->arch.pmu.irq_num != irq)
787 				return false;
788 		} else {
789 			if (vcpu->arch.pmu.irq_num == irq)
790 				return false;
791 		}
792 	}
793 
794 	return true;
795 }
796 
797 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
798 {
799 	switch (attr->attr) {
800 	case KVM_ARM_VCPU_PMU_V3_IRQ: {
801 		int __user *uaddr = (int __user *)(long)attr->addr;
802 		int irq;
803 
804 		if (!irqchip_in_kernel(vcpu->kvm))
805 			return -EINVAL;
806 
807 		if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
808 			return -ENODEV;
809 
810 		if (get_user(irq, uaddr))
811 			return -EFAULT;
812 
813 		/* The PMU overflow interrupt can be a PPI or a valid SPI. */
814 		if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
815 			return -EINVAL;
816 
817 		if (!pmu_irq_is_valid(vcpu->kvm, irq))
818 			return -EINVAL;
819 
820 		if (kvm_arm_pmu_irq_initialized(vcpu))
821 			return -EBUSY;
822 
823 		kvm_debug("Set kvm ARM PMU irq: %d\n", irq);
824 		vcpu->arch.pmu.irq_num = irq;
825 		return 0;
826 	}
827 	case KVM_ARM_VCPU_PMU_V3_INIT:
828 		return kvm_arm_pmu_v3_init(vcpu);
829 	}
830 
831 	return -ENXIO;
832 }
833 
834 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
835 {
836 	switch (attr->attr) {
837 	case KVM_ARM_VCPU_PMU_V3_IRQ: {
838 		int __user *uaddr = (int __user *)(long)attr->addr;
839 		int irq;
840 
841 		if (!irqchip_in_kernel(vcpu->kvm))
842 			return -EINVAL;
843 
844 		if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
845 			return -ENODEV;
846 
847 		if (!kvm_arm_pmu_irq_initialized(vcpu))
848 			return -ENXIO;
849 
850 		irq = vcpu->arch.pmu.irq_num;
851 		return put_user(irq, uaddr);
852 	}
853 	}
854 
855 	return -ENXIO;
856 }
857 
858 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
859 {
860 	switch (attr->attr) {
861 	case KVM_ARM_VCPU_PMU_V3_IRQ:
862 	case KVM_ARM_VCPU_PMU_V3_INIT:
863 		if (kvm_arm_support_pmu_v3() &&
864 		    test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
865 			return 0;
866 	}
867 
868 	return -ENXIO;
869 }
870