xref: /linux/arch/riscv/kvm/vcpu_pmu.c (revision feff82eb5f4075d541990d0ba60dad14ea83ea9b)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2023 Rivos Inc
4  *
5  * Authors:
6  *     Atish Patra <atishp@rivosinc.com>
7  */
8 
9 #define pr_fmt(fmt)	"riscv-kvm-pmu: " fmt
10 #include <linux/bitops.h>
11 #include <linux/errno.h>
12 #include <linux/err.h>
13 #include <linux/kvm_host.h>
14 #include <linux/nospec.h>
15 #include <linux/perf/riscv_pmu.h>
16 #include <asm/csr.h>
17 #include <asm/kvm_isa.h>
18 #include <asm/kvm_vcpu_sbi.h>
19 #include <asm/kvm_vcpu_pmu.h>
20 #include <asm/sbi.h>
21 
22 #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs)
23 #define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16)
24 #define get_event_code(x) ((x) & SBI_PMU_EVENT_IDX_CODE_MASK)
25 
26 static enum perf_hw_id hw_event_perf_map[SBI_PMU_HW_GENERAL_MAX] = {
27 	[SBI_PMU_HW_CPU_CYCLES] = PERF_COUNT_HW_CPU_CYCLES,
28 	[SBI_PMU_HW_INSTRUCTIONS] = PERF_COUNT_HW_INSTRUCTIONS,
29 	[SBI_PMU_HW_CACHE_REFERENCES] = PERF_COUNT_HW_CACHE_REFERENCES,
30 	[SBI_PMU_HW_CACHE_MISSES] = PERF_COUNT_HW_CACHE_MISSES,
31 	[SBI_PMU_HW_BRANCH_INSTRUCTIONS] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
32 	[SBI_PMU_HW_BRANCH_MISSES] = PERF_COUNT_HW_BRANCH_MISSES,
33 	[SBI_PMU_HW_BUS_CYCLES] = PERF_COUNT_HW_BUS_CYCLES,
34 	[SBI_PMU_HW_STALLED_CYCLES_FRONTEND] = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,
35 	[SBI_PMU_HW_STALLED_CYCLES_BACKEND] = PERF_COUNT_HW_STALLED_CYCLES_BACKEND,
36 	[SBI_PMU_HW_REF_CPU_CYCLES] = PERF_COUNT_HW_REF_CPU_CYCLES,
37 };
38 
kvm_pmu_get_sample_period(struct kvm_pmc * pmc)39 static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc)
40 {
41 	u64 counter_val_mask = GENMASK(pmc->cinfo.width, 0);
42 	u64 sample_period;
43 
44 	if (!pmc->counter_val)
45 		sample_period = counter_val_mask;
46 	else
47 		sample_period = (-pmc->counter_val) & counter_val_mask;
48 
49 	return sample_period;
50 }
51 
kvm_pmu_get_perf_event_type(unsigned long eidx)52 static u32 kvm_pmu_get_perf_event_type(unsigned long eidx)
53 {
54 	enum sbi_pmu_event_type etype = get_event_type(eidx);
55 	u32 type = PERF_TYPE_MAX;
56 
57 	switch (etype) {
58 	case SBI_PMU_EVENT_TYPE_HW:
59 		type = PERF_TYPE_HARDWARE;
60 		break;
61 	case SBI_PMU_EVENT_TYPE_CACHE:
62 		type = PERF_TYPE_HW_CACHE;
63 		break;
64 	case SBI_PMU_EVENT_TYPE_RAW:
65 	case SBI_PMU_EVENT_TYPE_RAW_V2:
66 	case SBI_PMU_EVENT_TYPE_FW:
67 		type = PERF_TYPE_RAW;
68 		break;
69 	default:
70 		break;
71 	}
72 
73 	return type;
74 }
75 
kvm_pmu_is_fw_event(unsigned long eidx)76 static bool kvm_pmu_is_fw_event(unsigned long eidx)
77 {
78 	return get_event_type(eidx) == SBI_PMU_EVENT_TYPE_FW;
79 }
80 
kvm_pmu_release_perf_event(struct kvm_pmc * pmc)81 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
82 {
83 	if (pmc->perf_event) {
84 		perf_event_disable(pmc->perf_event);
85 		perf_event_release_kernel(pmc->perf_event);
86 		pmc->perf_event = NULL;
87 	}
88 }
89 
kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code)90 static u64 kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code)
91 {
92 	return hw_event_perf_map[array_index_nospec(sbi_event_code,
93 						    SBI_PMU_HW_GENERAL_MAX)];
94 }
95 
kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code)96 static u64 kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code)
97 {
98 	u64 config = U64_MAX;
99 	unsigned int cache_type, cache_op, cache_result;
100 
101 	/* All the cache event masks lie within 0xFF. No separate masking is necessary */
102 	cache_type = (sbi_event_code & SBI_PMU_EVENT_CACHE_ID_CODE_MASK) >>
103 		      SBI_PMU_EVENT_CACHE_ID_SHIFT;
104 	cache_op = (sbi_event_code & SBI_PMU_EVENT_CACHE_OP_ID_CODE_MASK) >>
105 		    SBI_PMU_EVENT_CACHE_OP_SHIFT;
106 	cache_result = sbi_event_code & SBI_PMU_EVENT_CACHE_RESULT_ID_CODE_MASK;
107 
108 	if (cache_type >= PERF_COUNT_HW_CACHE_MAX ||
109 	    cache_op >= PERF_COUNT_HW_CACHE_OP_MAX ||
110 	    cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
111 		return config;
112 
113 	config = cache_type | (cache_op << 8) | (cache_result << 16);
114 
115 	return config;
116 }
117 
kvm_pmu_get_perf_event_config(unsigned long eidx,uint64_t evt_data)118 static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data)
119 {
120 	enum sbi_pmu_event_type etype = get_event_type(eidx);
121 	u32 ecode = get_event_code(eidx);
122 	u64 config = U64_MAX;
123 
124 	switch (etype) {
125 	case SBI_PMU_EVENT_TYPE_HW:
126 		if (ecode < SBI_PMU_HW_GENERAL_MAX)
127 			config = kvm_pmu_get_perf_event_hw_config(ecode);
128 		break;
129 	case SBI_PMU_EVENT_TYPE_CACHE:
130 		config = kvm_pmu_get_perf_event_cache_config(ecode);
131 		break;
132 	case SBI_PMU_EVENT_TYPE_RAW:
133 		config = evt_data & RISCV_PMU_RAW_EVENT_MASK;
134 		break;
135 	case SBI_PMU_EVENT_TYPE_RAW_V2:
136 		config = evt_data & RISCV_PMU_RAW_EVENT_V2_MASK;
137 		break;
138 	case SBI_PMU_EVENT_TYPE_FW:
139 		if (ecode < SBI_PMU_FW_MAX)
140 			config = (1ULL << 63) | ecode;
141 		break;
142 	default:
143 		break;
144 	}
145 
146 	return config;
147 }
148 
kvm_pmu_get_fixed_pmc_index(unsigned long eidx)149 static int kvm_pmu_get_fixed_pmc_index(unsigned long eidx)
150 {
151 	u32 etype = kvm_pmu_get_perf_event_type(eidx);
152 	u32 ecode = get_event_code(eidx);
153 
154 	if (etype != SBI_PMU_EVENT_TYPE_HW)
155 		return -EINVAL;
156 
157 	if (ecode == SBI_PMU_HW_CPU_CYCLES)
158 		return 0;
159 	else if (ecode == SBI_PMU_HW_INSTRUCTIONS)
160 		return 2;
161 	else
162 		return -EINVAL;
163 }
164 
kvm_pmu_get_programmable_pmc_index(struct kvm_pmu * kvpmu,unsigned long eidx,unsigned long cbase,unsigned long cmask)165 static int kvm_pmu_get_programmable_pmc_index(struct kvm_pmu *kvpmu, unsigned long eidx,
166 					      unsigned long cbase, unsigned long cmask)
167 {
168 	int ctr_idx = -1;
169 	int i, pmc_idx;
170 	int min, max;
171 
172 	if (kvm_pmu_is_fw_event(eidx)) {
173 		/* Firmware counters are mapped 1:1 starting from num_hw_ctrs for simplicity */
174 		min = kvpmu->num_hw_ctrs;
175 		max = min + kvpmu->num_fw_ctrs;
176 	} else {
177 		/* First 3 counters are reserved for fixed counters */
178 		min = 3;
179 		max = kvpmu->num_hw_ctrs;
180 	}
181 
182 	for_each_set_bit(i, &cmask, BITS_PER_LONG) {
183 		pmc_idx = i + cbase;
184 		if ((pmc_idx >= min && pmc_idx < max) &&
185 		    !test_bit(pmc_idx, kvpmu->pmc_in_use)) {
186 			ctr_idx = pmc_idx;
187 			break;
188 		}
189 	}
190 
191 	return ctr_idx;
192 }
193 
pmu_get_pmc_index(struct kvm_pmu * pmu,unsigned long eidx,unsigned long cbase,unsigned long cmask)194 static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx,
195 			     unsigned long cbase, unsigned long cmask)
196 {
197 	int ret;
198 
199 	/* Fixed counters need to be have fixed mapping as they have different width */
200 	ret = kvm_pmu_get_fixed_pmc_index(eidx);
201 	if (ret >= 0)
202 		return ret;
203 
204 	return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask);
205 }
206 
pmu_fw_ctr_read_hi(struct kvm_vcpu * vcpu,unsigned long cidx,unsigned long * out_val)207 static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
208 			      unsigned long *out_val)
209 {
210 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
211 	struct kvm_pmc *pmc;
212 	int fevent_code;
213 
214 	if (!IS_ENABLED(CONFIG_32BIT)) {
215 		pr_warn("%s: should be invoked for only RV32\n", __func__);
216 		return -EINVAL;
217 	}
218 
219 	if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
220 		pr_warn("Invalid counter id [%ld]during read\n", cidx);
221 		return -EINVAL;
222 	}
223 
224 	cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS);
225 	pmc = &kvpmu->pmc[cidx];
226 
227 	if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW)
228 		return -EINVAL;
229 
230 	if (pmc->event_idx == SBI_PMU_EVENT_IDX_INVALID)
231 		return -EINVAL;
232 
233 	fevent_code = get_event_code(pmc->event_idx);
234 	if (WARN_ONCE(fevent_code >= SBI_PMU_FW_MAX,
235 	    "Invalid firmware event code: %d\n", fevent_code))
236 		return -EINVAL;
237 
238 	pmc->counter_val = kvpmu->fw_event[fevent_code].value;
239 
240 	*out_val = pmc->counter_val >> 32;
241 
242 	return 0;
243 }
244 
pmu_ctr_read(struct kvm_vcpu * vcpu,unsigned long cidx,unsigned long * out_val)245 static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
246 			unsigned long *out_val)
247 {
248 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
249 	struct kvm_pmc *pmc;
250 	u64 enabled, running;
251 	int fevent_code;
252 
253 	if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
254 		pr_warn("Invalid counter id [%ld] during read\n", cidx);
255 		return -EINVAL;
256 	}
257 
258 	cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS);
259 	pmc = &kvpmu->pmc[cidx];
260 
261 	if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
262 		if (pmc->event_idx == SBI_PMU_EVENT_IDX_INVALID)
263 			return -EINVAL;
264 
265 		fevent_code = get_event_code(pmc->event_idx);
266 		if (WARN_ONCE(fevent_code >= SBI_PMU_FW_MAX,
267 		    "Invalid firmware event code: %d\n", fevent_code))
268 			return -EINVAL;
269 
270 		pmc->counter_val = kvpmu->fw_event[fevent_code].value;
271 	} else if (pmc->perf_event) {
272 		pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running);
273 	} else {
274 		return -EINVAL;
275 	}
276 	*out_val = pmc->counter_val;
277 
278 	return 0;
279 }
280 
kvm_pmu_validate_counter_mask(struct kvm_pmu * kvpmu,unsigned long ctr_base,unsigned long ctr_mask)281 static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base,
282 					 unsigned long ctr_mask)
283 {
284 	unsigned long num_ctrs = kvm_pmu_num_counters(kvpmu);
285 
286 	/* Make sure we have a valid counter mask requested from the caller */
287 	if (!ctr_mask || ctr_base >= num_ctrs || (ctr_base + __fls(ctr_mask) >= num_ctrs))
288 		return -EINVAL;
289 
290 	return 0;
291 }
292 
kvm_riscv_pmu_overflow(struct perf_event * perf_event,struct perf_sample_data * data,struct pt_regs * regs)293 static void kvm_riscv_pmu_overflow(struct perf_event *perf_event,
294 				   struct perf_sample_data *data,
295 				   struct pt_regs *regs)
296 {
297 	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
298 	struct kvm_vcpu *vcpu = pmc->vcpu;
299 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
300 	struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu);
301 	u64 period;
302 
303 	/*
304 	 * Stop the event counting by directly accessing the perf_event.
305 	 * Otherwise, this needs to deferred via a workqueue.
306 	 * That will introduce skew in the counter value because the actual
307 	 * physical counter would start after returning from this function.
308 	 * It will be stopped again once the workqueue is scheduled
309 	 */
310 	rpmu->pmu.stop(perf_event, PERF_EF_UPDATE);
311 
312 	/*
313 	 * The hw counter would start automatically when this function returns.
314 	 * Thus, the host may continue to interrupt and inject it to the guest
315 	 * even without the guest configuring the next event. Depending on the hardware
316 	 * the host may have some sluggishness only if privilege mode filtering is not
317 	 * available. In an ideal world, where qemu is not the only capable hardware,
318 	 * this can be removed.
319 	 * FYI: ARM64 does this way while x86 doesn't do anything as such.
320 	 * TODO: Should we keep it for RISC-V ?
321 	 */
322 	period = -(local64_read(&perf_event->count));
323 
324 	local64_set(&perf_event->hw.period_left, 0);
325 	perf_event->attr.sample_period = period;
326 	perf_event->hw.sample_period = period;
327 
328 	set_bit(pmc->idx, kvpmu->pmc_overflown);
329 	kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF);
330 
331 	rpmu->pmu.start(perf_event, PERF_EF_RELOAD);
332 }
333 
kvm_pmu_create_perf_event(struct kvm_pmc * pmc,struct perf_event_attr * attr,unsigned long flags,unsigned long eidx,unsigned long evtdata)334 static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
335 				      unsigned long flags, unsigned long eidx,
336 				      unsigned long evtdata)
337 {
338 	struct perf_event *event;
339 
340 	kvm_pmu_release_perf_event(pmc);
341 	attr->config = kvm_pmu_get_perf_event_config(eidx, evtdata);
342 	if (flags & SBI_PMU_CFG_FLAG_CLEAR_VALUE) {
343 		//TODO: Do we really want to clear the value in hardware counter
344 		pmc->counter_val = 0;
345 	}
346 
347 	/*
348 	 * Set the default sample_period for now. The guest specified value
349 	 * will be updated in the start call.
350 	 */
351 	attr->sample_period = kvm_pmu_get_sample_period(pmc);
352 
353 	event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
354 	if (IS_ERR(event)) {
355 		pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
356 		return PTR_ERR(event);
357 	}
358 
359 	pmc->perf_event = event;
360 	if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
361 		perf_event_enable(pmc->perf_event);
362 
363 	return 0;
364 }
365 
kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu * vcpu,unsigned long fid)366 int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid)
367 {
368 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
369 	struct kvm_fw_event *fevent;
370 
371 	if (!kvpmu || fid >= SBI_PMU_FW_MAX)
372 		return -EINVAL;
373 
374 	fevent = &kvpmu->fw_event[fid];
375 	if (fevent->started)
376 		fevent->value++;
377 
378 	return 0;
379 }
380 
kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu * vcpu,unsigned int csr_num,unsigned long * val,unsigned long new_val,unsigned long wr_mask)381 int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
382 				unsigned long *val, unsigned long new_val,
383 				unsigned long wr_mask)
384 {
385 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
386 	int cidx, ret = KVM_INSN_CONTINUE_NEXT_SEPC;
387 
388 	if (!kvpmu || !kvpmu->init_done) {
389 		/*
390 		 * In absence of sscofpmf in the platform, the guest OS may use
391 		 * the legacy PMU driver to read cycle/instret. In that case,
392 		 * just return 0 to avoid any illegal trap. However, any other
393 		 * hpmcounter access should result in illegal trap as they must
394 		 * be access through SBI PMU only.
395 		 */
396 		if (csr_num == CSR_CYCLE || csr_num == CSR_INSTRET) {
397 			*val = 0;
398 			return ret;
399 		} else {
400 			return KVM_INSN_ILLEGAL_TRAP;
401 		}
402 	}
403 
404 	/* The counter CSR are read only. Thus, any write should result in illegal traps */
405 	if (wr_mask)
406 		return KVM_INSN_ILLEGAL_TRAP;
407 
408 	cidx = csr_num - CSR_CYCLE;
409 
410 	if (pmu_ctr_read(vcpu, cidx, val) < 0)
411 		return KVM_INSN_ILLEGAL_TRAP;
412 
413 	return ret;
414 }
415 
kvm_pmu_clear_snapshot_area(struct kvm_vcpu * vcpu)416 static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu)
417 {
418 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
419 
420 	kfree(kvpmu->sdata);
421 	kvpmu->sdata = NULL;
422 	kvpmu->snapshot_addr = INVALID_GPA;
423 }
424 
kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu * vcpu,unsigned long saddr_low,unsigned long saddr_high,unsigned long flags,struct kvm_vcpu_sbi_return * retdata)425 int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
426 				      unsigned long saddr_high, unsigned long flags,
427 				      struct kvm_vcpu_sbi_return *retdata)
428 {
429 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
430 	int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data);
431 	int sbiret = 0;
432 	gpa_t saddr;
433 
434 	if (!kvpmu || flags) {
435 		sbiret = SBI_ERR_INVALID_PARAM;
436 		goto out;
437 	}
438 
439 	if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) {
440 		kvm_pmu_clear_snapshot_area(vcpu);
441 		return 0;
442 	}
443 
444 	saddr = saddr_low;
445 
446 	if (saddr_high != 0) {
447 		if (IS_ENABLED(CONFIG_32BIT)) {
448 			saddr |= ((gpa_t)saddr_high << 32);
449 		} else {
450 			sbiret = SBI_ERR_INVALID_ADDRESS;
451 			goto out;
452 		}
453 	}
454 
455 	kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC);
456 	if (!kvpmu->sdata)
457 		return -ENOMEM;
458 
459 	/* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */
460 	if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) {
461 		kfree(kvpmu->sdata);
462 		kvpmu->sdata = NULL;
463 		sbiret = SBI_ERR_INVALID_ADDRESS;
464 		goto out;
465 	}
466 
467 	kvpmu->snapshot_addr = saddr;
468 
469 out:
470 	retdata->err_val = sbiret;
471 
472 	return 0;
473 }
474 
kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu * vcpu,unsigned long saddr_low,unsigned long saddr_high,unsigned long num_events,unsigned long flags,struct kvm_vcpu_sbi_return * retdata)475 int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low,
476 				  unsigned long saddr_high, unsigned long num_events,
477 				  unsigned long flags, struct kvm_vcpu_sbi_return *retdata)
478 {
479 	struct riscv_pmu_event_info *einfo = NULL;
480 	int shmem_size = num_events * sizeof(*einfo);
481 	gpa_t shmem;
482 	u32 eidx, etype;
483 	u64 econfig;
484 	int ret;
485 
486 	if (flags != 0 || (saddr_low & (SZ_16 - 1) || num_events == 0)) {
487 		ret = SBI_ERR_INVALID_PARAM;
488 		goto out;
489 	}
490 
491 	shmem = saddr_low;
492 	if (saddr_high != 0) {
493 		if (IS_ENABLED(CONFIG_32BIT)) {
494 			shmem |= ((gpa_t)saddr_high << 32);
495 		} else {
496 			ret = SBI_ERR_INVALID_ADDRESS;
497 			goto out;
498 		}
499 	}
500 
501 	einfo = kzalloc(shmem_size, GFP_KERNEL);
502 	if (!einfo)
503 		return -ENOMEM;
504 
505 	ret = kvm_vcpu_read_guest(vcpu, shmem, einfo, shmem_size);
506 	if (ret) {
507 		ret = SBI_ERR_FAILURE;
508 		goto free_mem;
509 	}
510 
511 	for (int i = 0; i < num_events; i++) {
512 		eidx = einfo[i].event_idx;
513 		etype = kvm_pmu_get_perf_event_type(eidx);
514 		econfig = kvm_pmu_get_perf_event_config(eidx, einfo[i].event_data);
515 		ret = riscv_pmu_get_event_info(etype, econfig, NULL);
516 		einfo[i].output = (ret > 0) ? 1 : 0;
517 	}
518 
519 	ret = kvm_vcpu_write_guest(vcpu, shmem, einfo, shmem_size);
520 	if (ret)
521 		ret = SBI_ERR_INVALID_ADDRESS;
522 
523 free_mem:
524 	kfree(einfo);
525 out:
526 	retdata->err_val = ret;
527 
528 	return 0;
529 }
530 
kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu * vcpu,struct kvm_vcpu_sbi_return * retdata)531 int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu,
532 				struct kvm_vcpu_sbi_return *retdata)
533 {
534 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
535 
536 	retdata->out_val = kvm_pmu_num_counters(kvpmu);
537 
538 	return 0;
539 }
540 
kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu * vcpu,unsigned long cidx,struct kvm_vcpu_sbi_return * retdata)541 int kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu *vcpu, unsigned long cidx,
542 				struct kvm_vcpu_sbi_return *retdata)
543 {
544 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
545 
546 	if (cidx >= RISCV_KVM_MAX_COUNTERS || cidx == 1) {
547 		retdata->err_val = SBI_ERR_INVALID_PARAM;
548 		return 0;
549 	}
550 
551 	cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS);
552 	retdata->out_val = kvpmu->pmc[cidx].cinfo.value;
553 
554 	return 0;
555 }
556 
kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu * vcpu,unsigned long ctr_base,unsigned long ctr_mask,unsigned long flags,u64 ival,struct kvm_vcpu_sbi_return * retdata)557 int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
558 				 unsigned long ctr_mask, unsigned long flags, u64 ival,
559 				 struct kvm_vcpu_sbi_return *retdata)
560 {
561 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
562 	int i, pmc_index, sbiret = 0;
563 	struct kvm_pmc *pmc;
564 	int fevent_code;
565 	bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT;
566 
567 	if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
568 		sbiret = SBI_ERR_INVALID_PARAM;
569 		goto out;
570 	}
571 
572 	if (snap_flag_set) {
573 		if (kvpmu->snapshot_addr == INVALID_GPA) {
574 			sbiret = SBI_ERR_NO_SHMEM;
575 			goto out;
576 		}
577 		if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
578 					sizeof(struct riscv_pmu_snapshot_data))) {
579 			pr_warn("Unable to read snapshot shared memory while starting counters\n");
580 			sbiret = SBI_ERR_FAILURE;
581 			goto out;
582 		}
583 	}
584 	/* Start the counters that have been configured and requested by the guest */
585 	for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
586 		pmc_index = array_index_nospec(i + ctr_base,
587 					       RISCV_KVM_MAX_COUNTERS);
588 		if (!test_bit(pmc_index, kvpmu->pmc_in_use))
589 			continue;
590 		/* The guest started the counter again. Reset the overflow status */
591 		clear_bit(pmc_index, kvpmu->pmc_overflown);
592 		pmc = &kvpmu->pmc[pmc_index];
593 		if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) {
594 			pmc->counter_val = ival;
595 		} else if (snap_flag_set) {
596 			/* The counter index in the snapshot are relative to the counter base */
597 			pmc->counter_val = kvpmu->sdata->ctr_values[i];
598 		}
599 
600 		if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
601 			fevent_code = get_event_code(pmc->event_idx);
602 			if (fevent_code >= SBI_PMU_FW_MAX) {
603 				sbiret = SBI_ERR_INVALID_PARAM;
604 				goto out;
605 			}
606 
607 			/* Check if the counter was already started for some reason */
608 			if (kvpmu->fw_event[fevent_code].started) {
609 				sbiret = SBI_ERR_ALREADY_STARTED;
610 				continue;
611 			}
612 
613 			kvpmu->fw_event[fevent_code].started = true;
614 			kvpmu->fw_event[fevent_code].value = pmc->counter_val;
615 		} else if (pmc->perf_event) {
616 			if (unlikely(pmc->started)) {
617 				sbiret = SBI_ERR_ALREADY_STARTED;
618 				continue;
619 			}
620 			perf_event_period(pmc->perf_event, kvm_pmu_get_sample_period(pmc));
621 			perf_event_enable(pmc->perf_event);
622 			pmc->started = true;
623 		} else {
624 			sbiret = SBI_ERR_INVALID_PARAM;
625 		}
626 	}
627 
628 out:
629 	retdata->err_val = sbiret;
630 
631 	return 0;
632 }
633 
kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu * vcpu,unsigned long ctr_base,unsigned long ctr_mask,unsigned long flags,struct kvm_vcpu_sbi_return * retdata)634 int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
635 				unsigned long ctr_mask, unsigned long flags,
636 				struct kvm_vcpu_sbi_return *retdata)
637 {
638 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
639 	int i, pmc_index, sbiret = 0;
640 	u64 enabled, running;
641 	struct kvm_pmc *pmc;
642 	int fevent_code;
643 	bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
644 	bool shmem_needs_update = false;
645 
646 	if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
647 		sbiret = SBI_ERR_INVALID_PARAM;
648 		goto out;
649 	}
650 
651 	if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) {
652 		sbiret = SBI_ERR_NO_SHMEM;
653 		goto out;
654 	}
655 
656 	/* Stop the counters that have been configured and requested by the guest */
657 	for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
658 		pmc_index = array_index_nospec(i + ctr_base,
659 					       RISCV_KVM_MAX_COUNTERS);
660 		if (!test_bit(pmc_index, kvpmu->pmc_in_use))
661 			continue;
662 		pmc = &kvpmu->pmc[pmc_index];
663 		if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
664 			fevent_code = get_event_code(pmc->event_idx);
665 			if (fevent_code >= SBI_PMU_FW_MAX) {
666 				sbiret = SBI_ERR_INVALID_PARAM;
667 				goto out;
668 			}
669 
670 			if (!kvpmu->fw_event[fevent_code].started)
671 				sbiret = SBI_ERR_ALREADY_STOPPED;
672 
673 			kvpmu->fw_event[fevent_code].started = false;
674 		} else if (pmc->perf_event) {
675 			if (pmc->started) {
676 				/* Stop counting the counter */
677 				perf_event_disable(pmc->perf_event);
678 				pmc->started = false;
679 			} else {
680 				sbiret = SBI_ERR_ALREADY_STOPPED;
681 			}
682 
683 			if (flags & SBI_PMU_STOP_FLAG_RESET)
684 				/* Release the counter if this is a reset request */
685 				kvm_pmu_release_perf_event(pmc);
686 		} else {
687 			sbiret = SBI_ERR_INVALID_PARAM;
688 		}
689 
690 		if (snap_flag_set && !sbiret) {
691 			if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW)
692 				pmc->counter_val = kvpmu->fw_event[fevent_code].value;
693 			else if (pmc->perf_event)
694 				pmc->counter_val += perf_event_read_value(pmc->perf_event,
695 									  &enabled, &running);
696 			/*
697 			 * The counter and overflow indices in the snapshot region are w.r.to
698 			 * cbase. Modify the set bit in the counter mask instead of the pmc_index
699 			 * which indicates the absolute counter index.
700 			 */
701 			if (test_bit(pmc_index, kvpmu->pmc_overflown))
702 				kvpmu->sdata->ctr_overflow_mask |= BIT(i);
703 			kvpmu->sdata->ctr_values[i] = pmc->counter_val;
704 			shmem_needs_update = true;
705 		}
706 
707 		if (flags & SBI_PMU_STOP_FLAG_RESET) {
708 			pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
709 			clear_bit(pmc_index, kvpmu->pmc_in_use);
710 			clear_bit(pmc_index, kvpmu->pmc_overflown);
711 			if (snap_flag_set) {
712 				/*
713 				 * Only clear the given counter as the caller is responsible to
714 				 * validate both the overflow mask and configured counters.
715 				 */
716 				kvpmu->sdata->ctr_overflow_mask &= ~BIT(i);
717 				shmem_needs_update = true;
718 			}
719 		}
720 	}
721 
722 	if (shmem_needs_update)
723 		kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
724 					     sizeof(struct riscv_pmu_snapshot_data));
725 
726 out:
727 	retdata->err_val = sbiret;
728 
729 	return 0;
730 }
731 
kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu * vcpu,unsigned long ctr_base,unsigned long ctr_mask,unsigned long flags,unsigned long eidx,u64 evtdata,struct kvm_vcpu_sbi_return * retdata)732 int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_base,
733 				     unsigned long ctr_mask, unsigned long flags,
734 				     unsigned long eidx, u64 evtdata,
735 				     struct kvm_vcpu_sbi_return *retdata)
736 {
737 	int ctr_idx, sbiret = 0;
738 	long ret;
739 	bool is_fevent;
740 	unsigned long event_code;
741 	u32 etype = kvm_pmu_get_perf_event_type(eidx);
742 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
743 	struct kvm_pmc *pmc = NULL;
744 	struct perf_event_attr attr = {
745 		.type = etype,
746 		.size = sizeof(struct perf_event_attr),
747 		.pinned = true,
748 		.disabled = true,
749 		/*
750 		 * It should never reach here if the platform doesn't support the sscofpmf
751 		 * extension as mode filtering won't work without it.
752 		 */
753 		.exclude_host = true,
754 		.exclude_hv = true,
755 		.exclude_user = !!(flags & SBI_PMU_CFG_FLAG_SET_UINH),
756 		.exclude_kernel = !!(flags & SBI_PMU_CFG_FLAG_SET_SINH),
757 		.config1 = RISCV_PMU_CONFIG1_GUEST_EVENTS,
758 	};
759 
760 	if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
761 		sbiret = SBI_ERR_INVALID_PARAM;
762 		goto out;
763 	}
764 
765 	event_code = get_event_code(eidx);
766 	is_fevent = kvm_pmu_is_fw_event(eidx);
767 	if (is_fevent && event_code >= SBI_PMU_FW_MAX) {
768 		sbiret = SBI_ERR_NOT_SUPPORTED;
769 		goto out;
770 	}
771 
772 	/*
773 	 * SKIP_MATCH flag indicates the caller is aware of the assigned counter
774 	 * for this event. Just do a sanity check if it already marked used.
775 	 */
776 	if (flags & SBI_PMU_CFG_FLAG_SKIP_MATCH) {
777 		if (!test_bit(ctr_base + __ffs(ctr_mask), kvpmu->pmc_in_use)) {
778 			sbiret = SBI_ERR_FAILURE;
779 			goto out;
780 		}
781 		ctr_idx = ctr_base + __ffs(ctr_mask);
782 	} else  {
783 		ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask);
784 		if (ctr_idx < 0) {
785 			sbiret = SBI_ERR_NOT_SUPPORTED;
786 			goto out;
787 		}
788 	}
789 
790 	ctr_idx = array_index_nospec(ctr_idx, RISCV_KVM_MAX_COUNTERS);
791 	pmc = &kvpmu->pmc[ctr_idx];
792 	pmc->idx = ctr_idx;
793 
794 	if (is_fevent) {
795 		if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
796 			kvpmu->fw_event[event_code].started = true;
797 	} else {
798 		ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata);
799 		if (ret) {
800 			sbiret = SBI_ERR_NOT_SUPPORTED;
801 			goto out;
802 		}
803 	}
804 
805 	set_bit(ctr_idx, kvpmu->pmc_in_use);
806 	pmc->event_idx = eidx;
807 	retdata->out_val = ctr_idx;
808 out:
809 	retdata->err_val = sbiret;
810 
811 	return 0;
812 }
813 
kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu * vcpu,unsigned long cidx,struct kvm_vcpu_sbi_return * retdata)814 int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
815 				      struct kvm_vcpu_sbi_return *retdata)
816 {
817 	int ret;
818 
819 	ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val);
820 	if (ret == -EINVAL)
821 		retdata->err_val = SBI_ERR_INVALID_PARAM;
822 
823 	return 0;
824 }
825 
kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu * vcpu,unsigned long cidx,struct kvm_vcpu_sbi_return * retdata)826 int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
827 				struct kvm_vcpu_sbi_return *retdata)
828 {
829 	int ret;
830 
831 	ret = pmu_ctr_read(vcpu, cidx, &retdata->out_val);
832 	if (ret == -EINVAL)
833 		retdata->err_val = SBI_ERR_INVALID_PARAM;
834 
835 	return 0;
836 }
837 
kvm_riscv_vcpu_pmu_init(struct kvm_vcpu * vcpu)838 void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
839 {
840 	int i = 0, ret, num_hw_ctrs = 0, hpm_width = 0;
841 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
842 	struct kvm_pmc *pmc;
843 
844 	/*
845 	 * PMU functionality should be only available to guests if privilege mode
846 	 * filtering is available in the host. Otherwise, guest will always count
847 	 * events while the execution is in hypervisor mode.
848 	 */
849 	if (kvm_riscv_isa_check_host(SSCOFPMF))
850 		return;
851 
852 	ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs);
853 	if (ret < 0 || !hpm_width || !num_hw_ctrs)
854 		return;
855 
856 	/*
857 	 * Increase the number of hardware counters to offset the time counter.
858 	 */
859 	kvpmu->num_hw_ctrs = num_hw_ctrs + 1;
860 	kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX;
861 	memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
862 	kvpmu->snapshot_addr = INVALID_GPA;
863 
864 	if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) {
865 		pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA");
866 		kvpmu->num_hw_ctrs = RISCV_KVM_MAX_HW_CTRS;
867 	}
868 
869 	/*
870 	 * There is no correlation between the logical hardware counter and virtual counters.
871 	 * However, we need to encode a hpmcounter CSR in the counter info field so that
872 	 * KVM can trap n emulate the read. This works well in the migration use case as
873 	 * KVM doesn't care if the actual hpmcounter is available in the hardware or not.
874 	 */
875 	for (i = 0; i < kvm_pmu_num_counters(kvpmu); i++) {
876 		/* TIME CSR shouldn't be read from perf interface */
877 		if (i == 1)
878 			continue;
879 		pmc = &kvpmu->pmc[i];
880 		pmc->idx = i;
881 		pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
882 		pmc->vcpu = vcpu;
883 		if (i < kvpmu->num_hw_ctrs) {
884 			pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW;
885 			if (i < 3)
886 				/* CY, IR counters */
887 				pmc->cinfo.width = 63;
888 			else
889 				pmc->cinfo.width = hpm_width;
890 			/*
891 			 * The CSR number doesn't have any relation with the logical
892 			 * hardware counters. The CSR numbers are encoded sequentially
893 			 * to avoid maintaining a map between the virtual counter
894 			 * and CSR number.
895 			 */
896 			pmc->cinfo.csr = CSR_CYCLE + i;
897 		} else {
898 			pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW;
899 			pmc->cinfo.width = 63;
900 		}
901 	}
902 
903 	kvpmu->init_done = true;
904 }
905 
kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu * vcpu)906 void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
907 {
908 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
909 	struct kvm_pmc *pmc;
910 	int i;
911 
912 	if (!kvpmu)
913 		return;
914 
915 	for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) {
916 		pmc = &kvpmu->pmc[i];
917 		pmc->counter_val = 0;
918 		kvm_pmu_release_perf_event(pmc);
919 		pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
920 	}
921 	bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS);
922 	bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS);
923 	memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
924 	kvm_pmu_clear_snapshot_area(vcpu);
925 }
926 
kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu * vcpu)927 void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu)
928 {
929 	kvm_riscv_vcpu_pmu_deinit(vcpu);
930 }
931