xref: /linux/arch/riscv/kvm/vcpu_pmu.c (revision 11e8c7e9471cf8e6ae6ec7324a3174191cd965e3)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2023 Rivos Inc
4  *
5  * Authors:
6  *     Atish Patra <atishp@rivosinc.com>
7  */
8 
9 #define pr_fmt(fmt)	"riscv-kvm-pmu: " fmt
10 #include <linux/errno.h>
11 #include <linux/err.h>
12 #include <linux/kvm_host.h>
13 #include <linux/nospec.h>
14 #include <linux/perf/riscv_pmu.h>
15 #include <asm/csr.h>
16 #include <asm/kvm_vcpu_sbi.h>
17 #include <asm/kvm_vcpu_pmu.h>
18 #include <asm/sbi.h>
19 #include <linux/bitops.h>
20 
21 #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs)
22 #define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16)
23 #define get_event_code(x) ((x) & SBI_PMU_EVENT_IDX_CODE_MASK)
24 
25 static enum perf_hw_id hw_event_perf_map[SBI_PMU_HW_GENERAL_MAX] = {
26 	[SBI_PMU_HW_CPU_CYCLES] = PERF_COUNT_HW_CPU_CYCLES,
27 	[SBI_PMU_HW_INSTRUCTIONS] = PERF_COUNT_HW_INSTRUCTIONS,
28 	[SBI_PMU_HW_CACHE_REFERENCES] = PERF_COUNT_HW_CACHE_REFERENCES,
29 	[SBI_PMU_HW_CACHE_MISSES] = PERF_COUNT_HW_CACHE_MISSES,
30 	[SBI_PMU_HW_BRANCH_INSTRUCTIONS] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
31 	[SBI_PMU_HW_BRANCH_MISSES] = PERF_COUNT_HW_BRANCH_MISSES,
32 	[SBI_PMU_HW_BUS_CYCLES] = PERF_COUNT_HW_BUS_CYCLES,
33 	[SBI_PMU_HW_STALLED_CYCLES_FRONTEND] = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,
34 	[SBI_PMU_HW_STALLED_CYCLES_BACKEND] = PERF_COUNT_HW_STALLED_CYCLES_BACKEND,
35 	[SBI_PMU_HW_REF_CPU_CYCLES] = PERF_COUNT_HW_REF_CPU_CYCLES,
36 };
37 
kvm_pmu_get_sample_period(struct kvm_pmc * pmc)38 static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc)
39 {
40 	u64 counter_val_mask = GENMASK(pmc->cinfo.width, 0);
41 	u64 sample_period;
42 
43 	if (!pmc->counter_val)
44 		sample_period = counter_val_mask;
45 	else
46 		sample_period = (-pmc->counter_val) & counter_val_mask;
47 
48 	return sample_period;
49 }
50 
kvm_pmu_get_perf_event_type(unsigned long eidx)51 static u32 kvm_pmu_get_perf_event_type(unsigned long eidx)
52 {
53 	enum sbi_pmu_event_type etype = get_event_type(eidx);
54 	u32 type = PERF_TYPE_MAX;
55 
56 	switch (etype) {
57 	case SBI_PMU_EVENT_TYPE_HW:
58 		type = PERF_TYPE_HARDWARE;
59 		break;
60 	case SBI_PMU_EVENT_TYPE_CACHE:
61 		type = PERF_TYPE_HW_CACHE;
62 		break;
63 	case SBI_PMU_EVENT_TYPE_RAW:
64 	case SBI_PMU_EVENT_TYPE_RAW_V2:
65 	case SBI_PMU_EVENT_TYPE_FW:
66 		type = PERF_TYPE_RAW;
67 		break;
68 	default:
69 		break;
70 	}
71 
72 	return type;
73 }
74 
kvm_pmu_is_fw_event(unsigned long eidx)75 static bool kvm_pmu_is_fw_event(unsigned long eidx)
76 {
77 	return get_event_type(eidx) == SBI_PMU_EVENT_TYPE_FW;
78 }
79 
kvm_pmu_release_perf_event(struct kvm_pmc * pmc)80 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
81 {
82 	if (pmc->perf_event) {
83 		perf_event_disable(pmc->perf_event);
84 		perf_event_release_kernel(pmc->perf_event);
85 		pmc->perf_event = NULL;
86 	}
87 }
88 
kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code)89 static u64 kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code)
90 {
91 	return hw_event_perf_map[array_index_nospec(sbi_event_code,
92 						    SBI_PMU_HW_GENERAL_MAX)];
93 }
94 
kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code)95 static u64 kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code)
96 {
97 	u64 config = U64_MAX;
98 	unsigned int cache_type, cache_op, cache_result;
99 
100 	/* All the cache event masks lie within 0xFF. No separate masking is necessary */
101 	cache_type = (sbi_event_code & SBI_PMU_EVENT_CACHE_ID_CODE_MASK) >>
102 		      SBI_PMU_EVENT_CACHE_ID_SHIFT;
103 	cache_op = (sbi_event_code & SBI_PMU_EVENT_CACHE_OP_ID_CODE_MASK) >>
104 		    SBI_PMU_EVENT_CACHE_OP_SHIFT;
105 	cache_result = sbi_event_code & SBI_PMU_EVENT_CACHE_RESULT_ID_CODE_MASK;
106 
107 	if (cache_type >= PERF_COUNT_HW_CACHE_MAX ||
108 	    cache_op >= PERF_COUNT_HW_CACHE_OP_MAX ||
109 	    cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
110 		return config;
111 
112 	config = cache_type | (cache_op << 8) | (cache_result << 16);
113 
114 	return config;
115 }
116 
kvm_pmu_get_perf_event_config(unsigned long eidx,uint64_t evt_data)117 static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data)
118 {
119 	enum sbi_pmu_event_type etype = get_event_type(eidx);
120 	u32 ecode = get_event_code(eidx);
121 	u64 config = U64_MAX;
122 
123 	switch (etype) {
124 	case SBI_PMU_EVENT_TYPE_HW:
125 		if (ecode < SBI_PMU_HW_GENERAL_MAX)
126 			config = kvm_pmu_get_perf_event_hw_config(ecode);
127 		break;
128 	case SBI_PMU_EVENT_TYPE_CACHE:
129 		config = kvm_pmu_get_perf_event_cache_config(ecode);
130 		break;
131 	case SBI_PMU_EVENT_TYPE_RAW:
132 		config = evt_data & RISCV_PMU_RAW_EVENT_MASK;
133 		break;
134 	case SBI_PMU_EVENT_TYPE_RAW_V2:
135 		config = evt_data & RISCV_PMU_RAW_EVENT_V2_MASK;
136 		break;
137 	case SBI_PMU_EVENT_TYPE_FW:
138 		if (ecode < SBI_PMU_FW_MAX)
139 			config = (1ULL << 63) | ecode;
140 		break;
141 	default:
142 		break;
143 	}
144 
145 	return config;
146 }
147 
kvm_pmu_get_fixed_pmc_index(unsigned long eidx)148 static int kvm_pmu_get_fixed_pmc_index(unsigned long eidx)
149 {
150 	u32 etype = kvm_pmu_get_perf_event_type(eidx);
151 	u32 ecode = get_event_code(eidx);
152 
153 	if (etype != SBI_PMU_EVENT_TYPE_HW)
154 		return -EINVAL;
155 
156 	if (ecode == SBI_PMU_HW_CPU_CYCLES)
157 		return 0;
158 	else if (ecode == SBI_PMU_HW_INSTRUCTIONS)
159 		return 2;
160 	else
161 		return -EINVAL;
162 }
163 
kvm_pmu_get_programmable_pmc_index(struct kvm_pmu * kvpmu,unsigned long eidx,unsigned long cbase,unsigned long cmask)164 static int kvm_pmu_get_programmable_pmc_index(struct kvm_pmu *kvpmu, unsigned long eidx,
165 					      unsigned long cbase, unsigned long cmask)
166 {
167 	int ctr_idx = -1;
168 	int i, pmc_idx;
169 	int min, max;
170 
171 	if (kvm_pmu_is_fw_event(eidx)) {
172 		/* Firmware counters are mapped 1:1 starting from num_hw_ctrs for simplicity */
173 		min = kvpmu->num_hw_ctrs;
174 		max = min + kvpmu->num_fw_ctrs;
175 	} else {
176 		/* First 3 counters are reserved for fixed counters */
177 		min = 3;
178 		max = kvpmu->num_hw_ctrs;
179 	}
180 
181 	for_each_set_bit(i, &cmask, BITS_PER_LONG) {
182 		pmc_idx = i + cbase;
183 		if ((pmc_idx >= min && pmc_idx < max) &&
184 		    !test_bit(pmc_idx, kvpmu->pmc_in_use)) {
185 			ctr_idx = pmc_idx;
186 			break;
187 		}
188 	}
189 
190 	return ctr_idx;
191 }
192 
pmu_get_pmc_index(struct kvm_pmu * pmu,unsigned long eidx,unsigned long cbase,unsigned long cmask)193 static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx,
194 			     unsigned long cbase, unsigned long cmask)
195 {
196 	int ret;
197 
198 	/* Fixed counters need to be have fixed mapping as they have different width */
199 	ret = kvm_pmu_get_fixed_pmc_index(eidx);
200 	if (ret >= 0)
201 		return ret;
202 
203 	return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask);
204 }
205 
pmu_fw_ctr_read_hi(struct kvm_vcpu * vcpu,unsigned long cidx,unsigned long * out_val)206 static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
207 			      unsigned long *out_val)
208 {
209 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
210 	struct kvm_pmc *pmc;
211 	int fevent_code;
212 
213 	if (!IS_ENABLED(CONFIG_32BIT)) {
214 		pr_warn("%s: should be invoked for only RV32\n", __func__);
215 		return -EINVAL;
216 	}
217 
218 	if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
219 		pr_warn("Invalid counter id [%ld]during read\n", cidx);
220 		return -EINVAL;
221 	}
222 
223 	cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS);
224 	pmc = &kvpmu->pmc[cidx];
225 
226 	if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW)
227 		return -EINVAL;
228 
229 	fevent_code = get_event_code(pmc->event_idx);
230 	pmc->counter_val = kvpmu->fw_event[fevent_code].value;
231 
232 	*out_val = pmc->counter_val >> 32;
233 
234 	return 0;
235 }
236 
pmu_ctr_read(struct kvm_vcpu * vcpu,unsigned long cidx,unsigned long * out_val)237 static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
238 			unsigned long *out_val)
239 {
240 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
241 	struct kvm_pmc *pmc;
242 	u64 enabled, running;
243 	int fevent_code;
244 
245 	if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
246 		pr_warn("Invalid counter id [%ld] during read\n", cidx);
247 		return -EINVAL;
248 	}
249 
250 	cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS);
251 	pmc = &kvpmu->pmc[cidx];
252 
253 	if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
254 		fevent_code = get_event_code(pmc->event_idx);
255 		pmc->counter_val = kvpmu->fw_event[fevent_code].value;
256 	} else if (pmc->perf_event) {
257 		pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running);
258 	} else {
259 		return -EINVAL;
260 	}
261 	*out_val = pmc->counter_val;
262 
263 	return 0;
264 }
265 
kvm_pmu_validate_counter_mask(struct kvm_pmu * kvpmu,unsigned long ctr_base,unsigned long ctr_mask)266 static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base,
267 					 unsigned long ctr_mask)
268 {
269 	/* Make sure the we have a valid counter mask requested from the caller */
270 	if (!ctr_mask || (ctr_base + __fls(ctr_mask) >= kvm_pmu_num_counters(kvpmu)))
271 		return -EINVAL;
272 
273 	return 0;
274 }
275 
kvm_riscv_pmu_overflow(struct perf_event * perf_event,struct perf_sample_data * data,struct pt_regs * regs)276 static void kvm_riscv_pmu_overflow(struct perf_event *perf_event,
277 				   struct perf_sample_data *data,
278 				   struct pt_regs *regs)
279 {
280 	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
281 	struct kvm_vcpu *vcpu = pmc->vcpu;
282 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
283 	struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu);
284 	u64 period;
285 
286 	/*
287 	 * Stop the event counting by directly accessing the perf_event.
288 	 * Otherwise, this needs to deferred via a workqueue.
289 	 * That will introduce skew in the counter value because the actual
290 	 * physical counter would start after returning from this function.
291 	 * It will be stopped again once the workqueue is scheduled
292 	 */
293 	rpmu->pmu.stop(perf_event, PERF_EF_UPDATE);
294 
295 	/*
296 	 * The hw counter would start automatically when this function returns.
297 	 * Thus, the host may continue to interrupt and inject it to the guest
298 	 * even without the guest configuring the next event. Depending on the hardware
299 	 * the host may have some sluggishness only if privilege mode filtering is not
300 	 * available. In an ideal world, where qemu is not the only capable hardware,
301 	 * this can be removed.
302 	 * FYI: ARM64 does this way while x86 doesn't do anything as such.
303 	 * TODO: Should we keep it for RISC-V ?
304 	 */
305 	period = -(local64_read(&perf_event->count));
306 
307 	local64_set(&perf_event->hw.period_left, 0);
308 	perf_event->attr.sample_period = period;
309 	perf_event->hw.sample_period = period;
310 
311 	set_bit(pmc->idx, kvpmu->pmc_overflown);
312 	kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF);
313 
314 	rpmu->pmu.start(perf_event, PERF_EF_RELOAD);
315 }
316 
kvm_pmu_create_perf_event(struct kvm_pmc * pmc,struct perf_event_attr * attr,unsigned long flags,unsigned long eidx,unsigned long evtdata)317 static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
318 				      unsigned long flags, unsigned long eidx,
319 				      unsigned long evtdata)
320 {
321 	struct perf_event *event;
322 
323 	kvm_pmu_release_perf_event(pmc);
324 	attr->config = kvm_pmu_get_perf_event_config(eidx, evtdata);
325 	if (flags & SBI_PMU_CFG_FLAG_CLEAR_VALUE) {
326 		//TODO: Do we really want to clear the value in hardware counter
327 		pmc->counter_val = 0;
328 	}
329 
330 	/*
331 	 * Set the default sample_period for now. The guest specified value
332 	 * will be updated in the start call.
333 	 */
334 	attr->sample_period = kvm_pmu_get_sample_period(pmc);
335 
336 	event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
337 	if (IS_ERR(event)) {
338 		pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
339 		return PTR_ERR(event);
340 	}
341 
342 	pmc->perf_event = event;
343 	if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
344 		perf_event_enable(pmc->perf_event);
345 
346 	return 0;
347 }
348 
kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu * vcpu,unsigned long fid)349 int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid)
350 {
351 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
352 	struct kvm_fw_event *fevent;
353 
354 	if (!kvpmu || fid >= SBI_PMU_FW_MAX)
355 		return -EINVAL;
356 
357 	fevent = &kvpmu->fw_event[fid];
358 	if (fevent->started)
359 		fevent->value++;
360 
361 	return 0;
362 }
363 
kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu * vcpu,unsigned int csr_num,unsigned long * val,unsigned long new_val,unsigned long wr_mask)364 int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
365 				unsigned long *val, unsigned long new_val,
366 				unsigned long wr_mask)
367 {
368 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
369 	int cidx, ret = KVM_INSN_CONTINUE_NEXT_SEPC;
370 
371 	if (!kvpmu || !kvpmu->init_done) {
372 		/*
373 		 * In absence of sscofpmf in the platform, the guest OS may use
374 		 * the legacy PMU driver to read cycle/instret. In that case,
375 		 * just return 0 to avoid any illegal trap. However, any other
376 		 * hpmcounter access should result in illegal trap as they must
377 		 * be access through SBI PMU only.
378 		 */
379 		if (csr_num == CSR_CYCLE || csr_num == CSR_INSTRET) {
380 			*val = 0;
381 			return ret;
382 		} else {
383 			return KVM_INSN_ILLEGAL_TRAP;
384 		}
385 	}
386 
387 	/* The counter CSR are read only. Thus, any write should result in illegal traps */
388 	if (wr_mask)
389 		return KVM_INSN_ILLEGAL_TRAP;
390 
391 	cidx = csr_num - CSR_CYCLE;
392 
393 	if (pmu_ctr_read(vcpu, cidx, val) < 0)
394 		return KVM_INSN_ILLEGAL_TRAP;
395 
396 	return ret;
397 }
398 
kvm_pmu_clear_snapshot_area(struct kvm_vcpu * vcpu)399 static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu)
400 {
401 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
402 
403 	kfree(kvpmu->sdata);
404 	kvpmu->sdata = NULL;
405 	kvpmu->snapshot_addr = INVALID_GPA;
406 }
407 
kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu * vcpu,unsigned long saddr_low,unsigned long saddr_high,unsigned long flags,struct kvm_vcpu_sbi_return * retdata)408 int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
409 				      unsigned long saddr_high, unsigned long flags,
410 				      struct kvm_vcpu_sbi_return *retdata)
411 {
412 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
413 	int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data);
414 	int sbiret = 0;
415 	gpa_t saddr;
416 
417 	if (!kvpmu || flags) {
418 		sbiret = SBI_ERR_INVALID_PARAM;
419 		goto out;
420 	}
421 
422 	if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) {
423 		kvm_pmu_clear_snapshot_area(vcpu);
424 		return 0;
425 	}
426 
427 	saddr = saddr_low;
428 
429 	if (saddr_high != 0) {
430 		if (IS_ENABLED(CONFIG_32BIT))
431 			saddr |= ((gpa_t)saddr_high << 32);
432 		else
433 			sbiret = SBI_ERR_INVALID_ADDRESS;
434 		goto out;
435 	}
436 
437 	kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC);
438 	if (!kvpmu->sdata)
439 		return -ENOMEM;
440 
441 	/* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */
442 	if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) {
443 		kfree(kvpmu->sdata);
444 		sbiret = SBI_ERR_INVALID_ADDRESS;
445 		goto out;
446 	}
447 
448 	kvpmu->snapshot_addr = saddr;
449 
450 out:
451 	retdata->err_val = sbiret;
452 
453 	return 0;
454 }
455 
kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu * vcpu,unsigned long saddr_low,unsigned long saddr_high,unsigned long num_events,unsigned long flags,struct kvm_vcpu_sbi_return * retdata)456 int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low,
457 				  unsigned long saddr_high, unsigned long num_events,
458 				  unsigned long flags, struct kvm_vcpu_sbi_return *retdata)
459 {
460 	struct riscv_pmu_event_info *einfo = NULL;
461 	int shmem_size = num_events * sizeof(*einfo);
462 	gpa_t shmem;
463 	u32 eidx, etype;
464 	u64 econfig;
465 	int ret;
466 
467 	if (flags != 0 || (saddr_low & (SZ_16 - 1) || num_events == 0)) {
468 		ret = SBI_ERR_INVALID_PARAM;
469 		goto out;
470 	}
471 
472 	shmem = saddr_low;
473 	if (saddr_high != 0) {
474 		if (IS_ENABLED(CONFIG_32BIT)) {
475 			shmem |= ((gpa_t)saddr_high << 32);
476 		} else {
477 			ret = SBI_ERR_INVALID_ADDRESS;
478 			goto out;
479 		}
480 	}
481 
482 	einfo = kzalloc(shmem_size, GFP_KERNEL);
483 	if (!einfo)
484 		return -ENOMEM;
485 
486 	ret = kvm_vcpu_read_guest(vcpu, shmem, einfo, shmem_size);
487 	if (ret) {
488 		ret = SBI_ERR_FAILURE;
489 		goto free_mem;
490 	}
491 
492 	for (int i = 0; i < num_events; i++) {
493 		eidx = einfo[i].event_idx;
494 		etype = kvm_pmu_get_perf_event_type(eidx);
495 		econfig = kvm_pmu_get_perf_event_config(eidx, einfo[i].event_data);
496 		ret = riscv_pmu_get_event_info(etype, econfig, NULL);
497 		einfo[i].output = (ret > 0) ? 1 : 0;
498 	}
499 
500 	ret = kvm_vcpu_write_guest(vcpu, shmem, einfo, shmem_size);
501 	if (ret)
502 		ret = SBI_ERR_INVALID_ADDRESS;
503 
504 free_mem:
505 	kfree(einfo);
506 out:
507 	retdata->err_val = ret;
508 
509 	return 0;
510 }
511 
kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu * vcpu,struct kvm_vcpu_sbi_return * retdata)512 int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu,
513 				struct kvm_vcpu_sbi_return *retdata)
514 {
515 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
516 
517 	retdata->out_val = kvm_pmu_num_counters(kvpmu);
518 
519 	return 0;
520 }
521 
kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu * vcpu,unsigned long cidx,struct kvm_vcpu_sbi_return * retdata)522 int kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu *vcpu, unsigned long cidx,
523 				struct kvm_vcpu_sbi_return *retdata)
524 {
525 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
526 
527 	if (cidx >= RISCV_KVM_MAX_COUNTERS || cidx == 1) {
528 		retdata->err_val = SBI_ERR_INVALID_PARAM;
529 		return 0;
530 	}
531 
532 	cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS);
533 	retdata->out_val = kvpmu->pmc[cidx].cinfo.value;
534 
535 	return 0;
536 }
537 
kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu * vcpu,unsigned long ctr_base,unsigned long ctr_mask,unsigned long flags,u64 ival,struct kvm_vcpu_sbi_return * retdata)538 int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
539 				 unsigned long ctr_mask, unsigned long flags, u64 ival,
540 				 struct kvm_vcpu_sbi_return *retdata)
541 {
542 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
543 	int i, pmc_index, sbiret = 0;
544 	struct kvm_pmc *pmc;
545 	int fevent_code;
546 	bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT;
547 
548 	if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
549 		sbiret = SBI_ERR_INVALID_PARAM;
550 		goto out;
551 	}
552 
553 	if (snap_flag_set) {
554 		if (kvpmu->snapshot_addr == INVALID_GPA) {
555 			sbiret = SBI_ERR_NO_SHMEM;
556 			goto out;
557 		}
558 		if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
559 					sizeof(struct riscv_pmu_snapshot_data))) {
560 			pr_warn("Unable to read snapshot shared memory while starting counters\n");
561 			sbiret = SBI_ERR_FAILURE;
562 			goto out;
563 		}
564 	}
565 	/* Start the counters that have been configured and requested by the guest */
566 	for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
567 		pmc_index = array_index_nospec(i + ctr_base,
568 					       RISCV_KVM_MAX_COUNTERS);
569 		if (!test_bit(pmc_index, kvpmu->pmc_in_use))
570 			continue;
571 		/* The guest started the counter again. Reset the overflow status */
572 		clear_bit(pmc_index, kvpmu->pmc_overflown);
573 		pmc = &kvpmu->pmc[pmc_index];
574 		if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) {
575 			pmc->counter_val = ival;
576 		} else if (snap_flag_set) {
577 			/* The counter index in the snapshot are relative to the counter base */
578 			pmc->counter_val = kvpmu->sdata->ctr_values[i];
579 		}
580 
581 		if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
582 			fevent_code = get_event_code(pmc->event_idx);
583 			if (fevent_code >= SBI_PMU_FW_MAX) {
584 				sbiret = SBI_ERR_INVALID_PARAM;
585 				goto out;
586 			}
587 
588 			/* Check if the counter was already started for some reason */
589 			if (kvpmu->fw_event[fevent_code].started) {
590 				sbiret = SBI_ERR_ALREADY_STARTED;
591 				continue;
592 			}
593 
594 			kvpmu->fw_event[fevent_code].started = true;
595 			kvpmu->fw_event[fevent_code].value = pmc->counter_val;
596 		} else if (pmc->perf_event) {
597 			if (unlikely(pmc->started)) {
598 				sbiret = SBI_ERR_ALREADY_STARTED;
599 				continue;
600 			}
601 			perf_event_period(pmc->perf_event, kvm_pmu_get_sample_period(pmc));
602 			perf_event_enable(pmc->perf_event);
603 			pmc->started = true;
604 		} else {
605 			sbiret = SBI_ERR_INVALID_PARAM;
606 		}
607 	}
608 
609 out:
610 	retdata->err_val = sbiret;
611 
612 	return 0;
613 }
614 
kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu * vcpu,unsigned long ctr_base,unsigned long ctr_mask,unsigned long flags,struct kvm_vcpu_sbi_return * retdata)615 int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
616 				unsigned long ctr_mask, unsigned long flags,
617 				struct kvm_vcpu_sbi_return *retdata)
618 {
619 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
620 	int i, pmc_index, sbiret = 0;
621 	u64 enabled, running;
622 	struct kvm_pmc *pmc;
623 	int fevent_code;
624 	bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
625 	bool shmem_needs_update = false;
626 
627 	if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
628 		sbiret = SBI_ERR_INVALID_PARAM;
629 		goto out;
630 	}
631 
632 	if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) {
633 		sbiret = SBI_ERR_NO_SHMEM;
634 		goto out;
635 	}
636 
637 	/* Stop the counters that have been configured and requested by the guest */
638 	for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
639 		pmc_index = array_index_nospec(i + ctr_base,
640 					       RISCV_KVM_MAX_COUNTERS);
641 		if (!test_bit(pmc_index, kvpmu->pmc_in_use))
642 			continue;
643 		pmc = &kvpmu->pmc[pmc_index];
644 		if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
645 			fevent_code = get_event_code(pmc->event_idx);
646 			if (fevent_code >= SBI_PMU_FW_MAX) {
647 				sbiret = SBI_ERR_INVALID_PARAM;
648 				goto out;
649 			}
650 
651 			if (!kvpmu->fw_event[fevent_code].started)
652 				sbiret = SBI_ERR_ALREADY_STOPPED;
653 
654 			kvpmu->fw_event[fevent_code].started = false;
655 		} else if (pmc->perf_event) {
656 			if (pmc->started) {
657 				/* Stop counting the counter */
658 				perf_event_disable(pmc->perf_event);
659 				pmc->started = false;
660 			} else {
661 				sbiret = SBI_ERR_ALREADY_STOPPED;
662 			}
663 
664 			if (flags & SBI_PMU_STOP_FLAG_RESET)
665 				/* Release the counter if this is a reset request */
666 				kvm_pmu_release_perf_event(pmc);
667 		} else {
668 			sbiret = SBI_ERR_INVALID_PARAM;
669 		}
670 
671 		if (snap_flag_set && !sbiret) {
672 			if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW)
673 				pmc->counter_val = kvpmu->fw_event[fevent_code].value;
674 			else if (pmc->perf_event)
675 				pmc->counter_val += perf_event_read_value(pmc->perf_event,
676 									  &enabled, &running);
677 			/*
678 			 * The counter and overflow indicies in the snapshot region are w.r.to
679 			 * cbase. Modify the set bit in the counter mask instead of the pmc_index
680 			 * which indicates the absolute counter index.
681 			 */
682 			if (test_bit(pmc_index, kvpmu->pmc_overflown))
683 				kvpmu->sdata->ctr_overflow_mask |= BIT(i);
684 			kvpmu->sdata->ctr_values[i] = pmc->counter_val;
685 			shmem_needs_update = true;
686 		}
687 
688 		if (flags & SBI_PMU_STOP_FLAG_RESET) {
689 			pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
690 			clear_bit(pmc_index, kvpmu->pmc_in_use);
691 			clear_bit(pmc_index, kvpmu->pmc_overflown);
692 			if (snap_flag_set) {
693 				/*
694 				 * Only clear the given counter as the caller is responsible to
695 				 * validate both the overflow mask and configured counters.
696 				 */
697 				kvpmu->sdata->ctr_overflow_mask &= ~BIT(i);
698 				shmem_needs_update = true;
699 			}
700 		}
701 	}
702 
703 	if (shmem_needs_update)
704 		kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
705 					     sizeof(struct riscv_pmu_snapshot_data));
706 
707 out:
708 	retdata->err_val = sbiret;
709 
710 	return 0;
711 }
712 
kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu * vcpu,unsigned long ctr_base,unsigned long ctr_mask,unsigned long flags,unsigned long eidx,u64 evtdata,struct kvm_vcpu_sbi_return * retdata)713 int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_base,
714 				     unsigned long ctr_mask, unsigned long flags,
715 				     unsigned long eidx, u64 evtdata,
716 				     struct kvm_vcpu_sbi_return *retdata)
717 {
718 	int ctr_idx, sbiret = 0;
719 	long ret;
720 	bool is_fevent;
721 	unsigned long event_code;
722 	u32 etype = kvm_pmu_get_perf_event_type(eidx);
723 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
724 	struct kvm_pmc *pmc = NULL;
725 	struct perf_event_attr attr = {
726 		.type = etype,
727 		.size = sizeof(struct perf_event_attr),
728 		.pinned = true,
729 		.disabled = true,
730 		/*
731 		 * It should never reach here if the platform doesn't support the sscofpmf
732 		 * extension as mode filtering won't work without it.
733 		 */
734 		.exclude_host = true,
735 		.exclude_hv = true,
736 		.exclude_user = !!(flags & SBI_PMU_CFG_FLAG_SET_UINH),
737 		.exclude_kernel = !!(flags & SBI_PMU_CFG_FLAG_SET_SINH),
738 		.config1 = RISCV_PMU_CONFIG1_GUEST_EVENTS,
739 	};
740 
741 	if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
742 		sbiret = SBI_ERR_INVALID_PARAM;
743 		goto out;
744 	}
745 
746 	event_code = get_event_code(eidx);
747 	is_fevent = kvm_pmu_is_fw_event(eidx);
748 	if (is_fevent && event_code >= SBI_PMU_FW_MAX) {
749 		sbiret = SBI_ERR_NOT_SUPPORTED;
750 		goto out;
751 	}
752 
753 	/*
754 	 * SKIP_MATCH flag indicates the caller is aware of the assigned counter
755 	 * for this event. Just do a sanity check if it already marked used.
756 	 */
757 	if (flags & SBI_PMU_CFG_FLAG_SKIP_MATCH) {
758 		if (!test_bit(ctr_base + __ffs(ctr_mask), kvpmu->pmc_in_use)) {
759 			sbiret = SBI_ERR_FAILURE;
760 			goto out;
761 		}
762 		ctr_idx = ctr_base + __ffs(ctr_mask);
763 	} else  {
764 		ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask);
765 		if (ctr_idx < 0) {
766 			sbiret = SBI_ERR_NOT_SUPPORTED;
767 			goto out;
768 		}
769 	}
770 
771 	ctr_idx = array_index_nospec(ctr_idx, RISCV_KVM_MAX_COUNTERS);
772 	pmc = &kvpmu->pmc[ctr_idx];
773 	pmc->idx = ctr_idx;
774 
775 	if (is_fevent) {
776 		if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
777 			kvpmu->fw_event[event_code].started = true;
778 	} else {
779 		ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata);
780 		if (ret) {
781 			sbiret = SBI_ERR_NOT_SUPPORTED;
782 			goto out;
783 		}
784 	}
785 
786 	set_bit(ctr_idx, kvpmu->pmc_in_use);
787 	pmc->event_idx = eidx;
788 	retdata->out_val = ctr_idx;
789 out:
790 	retdata->err_val = sbiret;
791 
792 	return 0;
793 }
794 
kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu * vcpu,unsigned long cidx,struct kvm_vcpu_sbi_return * retdata)795 int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
796 				      struct kvm_vcpu_sbi_return *retdata)
797 {
798 	int ret;
799 
800 	ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val);
801 	if (ret == -EINVAL)
802 		retdata->err_val = SBI_ERR_INVALID_PARAM;
803 
804 	return 0;
805 }
806 
kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu * vcpu,unsigned long cidx,struct kvm_vcpu_sbi_return * retdata)807 int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
808 				struct kvm_vcpu_sbi_return *retdata)
809 {
810 	int ret;
811 
812 	ret = pmu_ctr_read(vcpu, cidx, &retdata->out_val);
813 	if (ret == -EINVAL)
814 		retdata->err_val = SBI_ERR_INVALID_PARAM;
815 
816 	return 0;
817 }
818 
kvm_riscv_vcpu_pmu_init(struct kvm_vcpu * vcpu)819 void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
820 {
821 	int i = 0, ret, num_hw_ctrs = 0, hpm_width = 0;
822 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
823 	struct kvm_pmc *pmc;
824 
825 	/*
826 	 * PMU functionality should be only available to guests if privilege mode
827 	 * filtering is available in the host. Otherwise, guest will always count
828 	 * events while the execution is in hypervisor mode.
829 	 */
830 	if (!riscv_isa_extension_available(NULL, SSCOFPMF))
831 		return;
832 
833 	ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs);
834 	if (ret < 0 || !hpm_width || !num_hw_ctrs)
835 		return;
836 
837 	/*
838 	 * Increase the number of hardware counters to offset the time counter.
839 	 */
840 	kvpmu->num_hw_ctrs = num_hw_ctrs + 1;
841 	kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX;
842 	memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
843 	kvpmu->snapshot_addr = INVALID_GPA;
844 
845 	if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) {
846 		pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA");
847 		kvpmu->num_hw_ctrs = RISCV_KVM_MAX_HW_CTRS;
848 	}
849 
850 	/*
851 	 * There is no correlation between the logical hardware counter and virtual counters.
852 	 * However, we need to encode a hpmcounter CSR in the counter info field so that
853 	 * KVM can trap n emulate the read. This works well in the migration use case as
854 	 * KVM doesn't care if the actual hpmcounter is available in the hardware or not.
855 	 */
856 	for (i = 0; i < kvm_pmu_num_counters(kvpmu); i++) {
857 		/* TIME CSR shouldn't be read from perf interface */
858 		if (i == 1)
859 			continue;
860 		pmc = &kvpmu->pmc[i];
861 		pmc->idx = i;
862 		pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
863 		pmc->vcpu = vcpu;
864 		if (i < kvpmu->num_hw_ctrs) {
865 			pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW;
866 			if (i < 3)
867 				/* CY, IR counters */
868 				pmc->cinfo.width = 63;
869 			else
870 				pmc->cinfo.width = hpm_width;
871 			/*
872 			 * The CSR number doesn't have any relation with the logical
873 			 * hardware counters. The CSR numbers are encoded sequentially
874 			 * to avoid maintaining a map between the virtual counter
875 			 * and CSR number.
876 			 */
877 			pmc->cinfo.csr = CSR_CYCLE + i;
878 		} else {
879 			pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW;
880 			pmc->cinfo.width = 63;
881 		}
882 	}
883 
884 	kvpmu->init_done = true;
885 }
886 
kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu * vcpu)887 void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
888 {
889 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
890 	struct kvm_pmc *pmc;
891 	int i;
892 
893 	if (!kvpmu)
894 		return;
895 
896 	for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) {
897 		pmc = &kvpmu->pmc[i];
898 		pmc->counter_val = 0;
899 		kvm_pmu_release_perf_event(pmc);
900 		pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
901 	}
902 	bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS);
903 	bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS);
904 	memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
905 	kvm_pmu_clear_snapshot_area(vcpu);
906 }
907 
kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu * vcpu)908 void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu)
909 {
910 	kvm_riscv_vcpu_pmu_deinit(vcpu);
911 }
912