1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2023 Rivos Inc
4 *
5 * Authors:
6 * Atish Patra <atishp@rivosinc.com>
7 */
8
9 #define pr_fmt(fmt) "riscv-kvm-pmu: " fmt
10 #include <linux/bitops.h>
11 #include <linux/errno.h>
12 #include <linux/err.h>
13 #include <linux/kvm_host.h>
14 #include <linux/nospec.h>
15 #include <linux/perf/riscv_pmu.h>
16 #include <asm/csr.h>
17 #include <asm/kvm_isa.h>
18 #include <asm/kvm_vcpu_sbi.h>
19 #include <asm/kvm_vcpu_pmu.h>
20 #include <asm/sbi.h>
21
22 #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs)
23 #define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16)
24 #define get_event_code(x) ((x) & SBI_PMU_EVENT_IDX_CODE_MASK)
25
26 static enum perf_hw_id hw_event_perf_map[SBI_PMU_HW_GENERAL_MAX] = {
27 [SBI_PMU_HW_CPU_CYCLES] = PERF_COUNT_HW_CPU_CYCLES,
28 [SBI_PMU_HW_INSTRUCTIONS] = PERF_COUNT_HW_INSTRUCTIONS,
29 [SBI_PMU_HW_CACHE_REFERENCES] = PERF_COUNT_HW_CACHE_REFERENCES,
30 [SBI_PMU_HW_CACHE_MISSES] = PERF_COUNT_HW_CACHE_MISSES,
31 [SBI_PMU_HW_BRANCH_INSTRUCTIONS] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
32 [SBI_PMU_HW_BRANCH_MISSES] = PERF_COUNT_HW_BRANCH_MISSES,
33 [SBI_PMU_HW_BUS_CYCLES] = PERF_COUNT_HW_BUS_CYCLES,
34 [SBI_PMU_HW_STALLED_CYCLES_FRONTEND] = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,
35 [SBI_PMU_HW_STALLED_CYCLES_BACKEND] = PERF_COUNT_HW_STALLED_CYCLES_BACKEND,
36 [SBI_PMU_HW_REF_CPU_CYCLES] = PERF_COUNT_HW_REF_CPU_CYCLES,
37 };
38
kvm_pmu_get_sample_period(struct kvm_pmc * pmc)39 static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc)
40 {
41 u64 counter_val_mask = GENMASK(pmc->cinfo.width, 0);
42 u64 sample_period;
43
44 if (!pmc->counter_val)
45 sample_period = counter_val_mask;
46 else
47 sample_period = (-pmc->counter_val) & counter_val_mask;
48
49 return sample_period;
50 }
51
kvm_pmu_get_perf_event_type(unsigned long eidx)52 static u32 kvm_pmu_get_perf_event_type(unsigned long eidx)
53 {
54 enum sbi_pmu_event_type etype = get_event_type(eidx);
55 u32 type = PERF_TYPE_MAX;
56
57 switch (etype) {
58 case SBI_PMU_EVENT_TYPE_HW:
59 type = PERF_TYPE_HARDWARE;
60 break;
61 case SBI_PMU_EVENT_TYPE_CACHE:
62 type = PERF_TYPE_HW_CACHE;
63 break;
64 case SBI_PMU_EVENT_TYPE_RAW:
65 case SBI_PMU_EVENT_TYPE_RAW_V2:
66 case SBI_PMU_EVENT_TYPE_FW:
67 type = PERF_TYPE_RAW;
68 break;
69 default:
70 break;
71 }
72
73 return type;
74 }
75
kvm_pmu_is_fw_event(unsigned long eidx)76 static bool kvm_pmu_is_fw_event(unsigned long eidx)
77 {
78 return get_event_type(eidx) == SBI_PMU_EVENT_TYPE_FW;
79 }
80
kvm_pmu_release_perf_event(struct kvm_pmc * pmc)81 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
82 {
83 if (pmc->perf_event) {
84 perf_event_disable(pmc->perf_event);
85 perf_event_release_kernel(pmc->perf_event);
86 pmc->perf_event = NULL;
87 }
88 }
89
kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code)90 static u64 kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code)
91 {
92 return hw_event_perf_map[array_index_nospec(sbi_event_code,
93 SBI_PMU_HW_GENERAL_MAX)];
94 }
95
kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code)96 static u64 kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code)
97 {
98 u64 config = U64_MAX;
99 unsigned int cache_type, cache_op, cache_result;
100
101 /* All the cache event masks lie within 0xFF. No separate masking is necessary */
102 cache_type = (sbi_event_code & SBI_PMU_EVENT_CACHE_ID_CODE_MASK) >>
103 SBI_PMU_EVENT_CACHE_ID_SHIFT;
104 cache_op = (sbi_event_code & SBI_PMU_EVENT_CACHE_OP_ID_CODE_MASK) >>
105 SBI_PMU_EVENT_CACHE_OP_SHIFT;
106 cache_result = sbi_event_code & SBI_PMU_EVENT_CACHE_RESULT_ID_CODE_MASK;
107
108 if (cache_type >= PERF_COUNT_HW_CACHE_MAX ||
109 cache_op >= PERF_COUNT_HW_CACHE_OP_MAX ||
110 cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
111 return config;
112
113 config = cache_type | (cache_op << 8) | (cache_result << 16);
114
115 return config;
116 }
117
kvm_pmu_get_perf_event_config(unsigned long eidx,uint64_t evt_data)118 static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data)
119 {
120 enum sbi_pmu_event_type etype = get_event_type(eidx);
121 u32 ecode = get_event_code(eidx);
122 u64 config = U64_MAX;
123
124 switch (etype) {
125 case SBI_PMU_EVENT_TYPE_HW:
126 if (ecode < SBI_PMU_HW_GENERAL_MAX)
127 config = kvm_pmu_get_perf_event_hw_config(ecode);
128 break;
129 case SBI_PMU_EVENT_TYPE_CACHE:
130 config = kvm_pmu_get_perf_event_cache_config(ecode);
131 break;
132 case SBI_PMU_EVENT_TYPE_RAW:
133 config = evt_data & RISCV_PMU_RAW_EVENT_MASK;
134 break;
135 case SBI_PMU_EVENT_TYPE_RAW_V2:
136 config = evt_data & RISCV_PMU_RAW_EVENT_V2_MASK;
137 break;
138 case SBI_PMU_EVENT_TYPE_FW:
139 if (ecode < SBI_PMU_FW_MAX)
140 config = (1ULL << 63) | ecode;
141 break;
142 default:
143 break;
144 }
145
146 return config;
147 }
148
kvm_pmu_get_fixed_pmc_index(unsigned long eidx)149 static int kvm_pmu_get_fixed_pmc_index(unsigned long eidx)
150 {
151 u32 etype = kvm_pmu_get_perf_event_type(eidx);
152 u32 ecode = get_event_code(eidx);
153
154 if (etype != SBI_PMU_EVENT_TYPE_HW)
155 return -EINVAL;
156
157 if (ecode == SBI_PMU_HW_CPU_CYCLES)
158 return 0;
159 else if (ecode == SBI_PMU_HW_INSTRUCTIONS)
160 return 2;
161 else
162 return -EINVAL;
163 }
164
kvm_pmu_get_programmable_pmc_index(struct kvm_pmu * kvpmu,unsigned long eidx,unsigned long cbase,unsigned long cmask)165 static int kvm_pmu_get_programmable_pmc_index(struct kvm_pmu *kvpmu, unsigned long eidx,
166 unsigned long cbase, unsigned long cmask)
167 {
168 int ctr_idx = -1;
169 int i, pmc_idx;
170 int min, max;
171
172 if (kvm_pmu_is_fw_event(eidx)) {
173 /* Firmware counters are mapped 1:1 starting from num_hw_ctrs for simplicity */
174 min = kvpmu->num_hw_ctrs;
175 max = min + kvpmu->num_fw_ctrs;
176 } else {
177 /* First 3 counters are reserved for fixed counters */
178 min = 3;
179 max = kvpmu->num_hw_ctrs;
180 }
181
182 for_each_set_bit(i, &cmask, BITS_PER_LONG) {
183 pmc_idx = i + cbase;
184 if ((pmc_idx >= min && pmc_idx < max) &&
185 !test_bit(pmc_idx, kvpmu->pmc_in_use)) {
186 ctr_idx = pmc_idx;
187 break;
188 }
189 }
190
191 return ctr_idx;
192 }
193
pmu_get_pmc_index(struct kvm_pmu * pmu,unsigned long eidx,unsigned long cbase,unsigned long cmask)194 static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx,
195 unsigned long cbase, unsigned long cmask)
196 {
197 int ret;
198
199 /* Fixed counters need to be have fixed mapping as they have different width */
200 ret = kvm_pmu_get_fixed_pmc_index(eidx);
201 if (ret >= 0)
202 return ret;
203
204 return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask);
205 }
206
pmu_fw_ctr_read_hi(struct kvm_vcpu * vcpu,unsigned long cidx,unsigned long * out_val)207 static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
208 unsigned long *out_val)
209 {
210 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
211 struct kvm_pmc *pmc;
212 int fevent_code;
213
214 if (!IS_ENABLED(CONFIG_32BIT)) {
215 pr_warn("%s: should be invoked for only RV32\n", __func__);
216 return -EINVAL;
217 }
218
219 if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
220 pr_warn("Invalid counter id [%ld]during read\n", cidx);
221 return -EINVAL;
222 }
223
224 cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS);
225 pmc = &kvpmu->pmc[cidx];
226
227 if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW)
228 return -EINVAL;
229
230 if (pmc->event_idx == SBI_PMU_EVENT_IDX_INVALID)
231 return -EINVAL;
232
233 fevent_code = get_event_code(pmc->event_idx);
234 if (WARN_ONCE(fevent_code >= SBI_PMU_FW_MAX,
235 "Invalid firmware event code: %d\n", fevent_code))
236 return -EINVAL;
237
238 pmc->counter_val = kvpmu->fw_event[fevent_code].value;
239
240 *out_val = pmc->counter_val >> 32;
241
242 return 0;
243 }
244
pmu_ctr_read(struct kvm_vcpu * vcpu,unsigned long cidx,unsigned long * out_val)245 static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
246 unsigned long *out_val)
247 {
248 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
249 struct kvm_pmc *pmc;
250 u64 enabled, running;
251 int fevent_code;
252
253 if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
254 pr_warn("Invalid counter id [%ld] during read\n", cidx);
255 return -EINVAL;
256 }
257
258 cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS);
259 pmc = &kvpmu->pmc[cidx];
260
261 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
262 if (pmc->event_idx == SBI_PMU_EVENT_IDX_INVALID)
263 return -EINVAL;
264
265 fevent_code = get_event_code(pmc->event_idx);
266 if (WARN_ONCE(fevent_code >= SBI_PMU_FW_MAX,
267 "Invalid firmware event code: %d\n", fevent_code))
268 return -EINVAL;
269
270 pmc->counter_val = kvpmu->fw_event[fevent_code].value;
271 } else if (pmc->perf_event) {
272 pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running);
273 } else {
274 return -EINVAL;
275 }
276 *out_val = pmc->counter_val;
277
278 return 0;
279 }
280
kvm_pmu_validate_counter_mask(struct kvm_pmu * kvpmu,unsigned long ctr_base,unsigned long ctr_mask)281 static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base,
282 unsigned long ctr_mask)
283 {
284 unsigned long num_ctrs = kvm_pmu_num_counters(kvpmu);
285
286 /* Make sure we have a valid counter mask requested from the caller */
287 if (!ctr_mask || ctr_base >= num_ctrs || (ctr_base + __fls(ctr_mask) >= num_ctrs))
288 return -EINVAL;
289
290 return 0;
291 }
292
kvm_riscv_pmu_overflow(struct perf_event * perf_event,struct perf_sample_data * data,struct pt_regs * regs)293 static void kvm_riscv_pmu_overflow(struct perf_event *perf_event,
294 struct perf_sample_data *data,
295 struct pt_regs *regs)
296 {
297 struct kvm_pmc *pmc = perf_event->overflow_handler_context;
298 struct kvm_vcpu *vcpu = pmc->vcpu;
299 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
300 struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu);
301 u64 period;
302
303 /*
304 * Stop the event counting by directly accessing the perf_event.
305 * Otherwise, this needs to deferred via a workqueue.
306 * That will introduce skew in the counter value because the actual
307 * physical counter would start after returning from this function.
308 * It will be stopped again once the workqueue is scheduled
309 */
310 rpmu->pmu.stop(perf_event, PERF_EF_UPDATE);
311
312 /*
313 * The hw counter would start automatically when this function returns.
314 * Thus, the host may continue to interrupt and inject it to the guest
315 * even without the guest configuring the next event. Depending on the hardware
316 * the host may have some sluggishness only if privilege mode filtering is not
317 * available. In an ideal world, where qemu is not the only capable hardware,
318 * this can be removed.
319 * FYI: ARM64 does this way while x86 doesn't do anything as such.
320 * TODO: Should we keep it for RISC-V ?
321 */
322 period = -(local64_read(&perf_event->count));
323
324 local64_set(&perf_event->hw.period_left, 0);
325 perf_event->attr.sample_period = period;
326 perf_event->hw.sample_period = period;
327
328 set_bit(pmc->idx, kvpmu->pmc_overflown);
329 kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF);
330
331 rpmu->pmu.start(perf_event, PERF_EF_RELOAD);
332 }
333
kvm_pmu_create_perf_event(struct kvm_pmc * pmc,struct perf_event_attr * attr,unsigned long flags,unsigned long eidx,unsigned long evtdata)334 static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
335 unsigned long flags, unsigned long eidx,
336 unsigned long evtdata)
337 {
338 struct perf_event *event;
339
340 kvm_pmu_release_perf_event(pmc);
341 attr->config = kvm_pmu_get_perf_event_config(eidx, evtdata);
342 if (flags & SBI_PMU_CFG_FLAG_CLEAR_VALUE) {
343 //TODO: Do we really want to clear the value in hardware counter
344 pmc->counter_val = 0;
345 }
346
347 /*
348 * Set the default sample_period for now. The guest specified value
349 * will be updated in the start call.
350 */
351 attr->sample_period = kvm_pmu_get_sample_period(pmc);
352
353 event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
354 if (IS_ERR(event)) {
355 pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
356 return PTR_ERR(event);
357 }
358
359 pmc->perf_event = event;
360 if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
361 perf_event_enable(pmc->perf_event);
362
363 return 0;
364 }
365
kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu * vcpu,unsigned long fid)366 int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid)
367 {
368 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
369 struct kvm_fw_event *fevent;
370
371 if (!kvpmu || fid >= SBI_PMU_FW_MAX)
372 return -EINVAL;
373
374 fevent = &kvpmu->fw_event[fid];
375 if (fevent->started)
376 fevent->value++;
377
378 return 0;
379 }
380
kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu * vcpu,unsigned int csr_num,unsigned long * val,unsigned long new_val,unsigned long wr_mask)381 int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
382 unsigned long *val, unsigned long new_val,
383 unsigned long wr_mask)
384 {
385 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
386 int cidx, ret = KVM_INSN_CONTINUE_NEXT_SEPC;
387
388 if (!kvpmu || !kvpmu->init_done) {
389 /*
390 * In absence of sscofpmf in the platform, the guest OS may use
391 * the legacy PMU driver to read cycle/instret. In that case,
392 * just return 0 to avoid any illegal trap. However, any other
393 * hpmcounter access should result in illegal trap as they must
394 * be access through SBI PMU only.
395 */
396 if (csr_num == CSR_CYCLE || csr_num == CSR_INSTRET) {
397 *val = 0;
398 return ret;
399 } else {
400 return KVM_INSN_ILLEGAL_TRAP;
401 }
402 }
403
404 /* The counter CSR are read only. Thus, any write should result in illegal traps */
405 if (wr_mask)
406 return KVM_INSN_ILLEGAL_TRAP;
407
408 cidx = csr_num - CSR_CYCLE;
409
410 if (pmu_ctr_read(vcpu, cidx, val) < 0)
411 return KVM_INSN_ILLEGAL_TRAP;
412
413 return ret;
414 }
415
kvm_pmu_clear_snapshot_area(struct kvm_vcpu * vcpu)416 static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu)
417 {
418 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
419
420 kfree(kvpmu->sdata);
421 kvpmu->sdata = NULL;
422 kvpmu->snapshot_addr = INVALID_GPA;
423 }
424
kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu * vcpu,unsigned long saddr_low,unsigned long saddr_high,unsigned long flags,struct kvm_vcpu_sbi_return * retdata)425 int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
426 unsigned long saddr_high, unsigned long flags,
427 struct kvm_vcpu_sbi_return *retdata)
428 {
429 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
430 int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data);
431 int sbiret = 0;
432 gpa_t saddr;
433
434 if (!kvpmu || flags) {
435 sbiret = SBI_ERR_INVALID_PARAM;
436 goto out;
437 }
438
439 if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) {
440 kvm_pmu_clear_snapshot_area(vcpu);
441 return 0;
442 }
443
444 saddr = saddr_low;
445
446 if (saddr_high != 0) {
447 if (IS_ENABLED(CONFIG_32BIT)) {
448 saddr |= ((gpa_t)saddr_high << 32);
449 } else {
450 sbiret = SBI_ERR_INVALID_ADDRESS;
451 goto out;
452 }
453 }
454
455 kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC);
456 if (!kvpmu->sdata) {
457 sbiret = SBI_ERR_FAILURE;
458 goto out;
459 }
460
461 /* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */
462 if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) {
463 kfree(kvpmu->sdata);
464 kvpmu->sdata = NULL;
465 sbiret = SBI_ERR_INVALID_ADDRESS;
466 goto out;
467 }
468
469 kvpmu->snapshot_addr = saddr;
470
471 out:
472 retdata->err_val = sbiret;
473
474 return 0;
475 }
476
kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu * vcpu,unsigned long saddr_low,unsigned long saddr_high,unsigned long num_events,unsigned long flags,struct kvm_vcpu_sbi_return * retdata)477 int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low,
478 unsigned long saddr_high, unsigned long num_events,
479 unsigned long flags, struct kvm_vcpu_sbi_return *retdata)
480 {
481 struct riscv_pmu_event_info *einfo = NULL;
482 int shmem_size = num_events * sizeof(*einfo);
483 gpa_t shmem;
484 u32 eidx, etype;
485 u64 econfig;
486 int ret;
487
488 if (flags != 0 || (saddr_low & (SZ_16 - 1) || num_events == 0)) {
489 ret = SBI_ERR_INVALID_PARAM;
490 goto out;
491 }
492
493 shmem = saddr_low;
494 if (saddr_high != 0) {
495 if (IS_ENABLED(CONFIG_32BIT)) {
496 shmem |= ((gpa_t)saddr_high << 32);
497 } else {
498 ret = SBI_ERR_INVALID_ADDRESS;
499 goto out;
500 }
501 }
502
503 einfo = kzalloc(shmem_size, GFP_KERNEL);
504 if (!einfo) {
505 ret = SBI_ERR_FAILURE;
506 goto out;
507 }
508
509 ret = kvm_vcpu_read_guest(vcpu, shmem, einfo, shmem_size);
510 if (ret) {
511 ret = SBI_ERR_FAILURE;
512 goto free_mem;
513 }
514
515 for (int i = 0; i < num_events; i++) {
516 eidx = einfo[i].event_idx;
517 etype = kvm_pmu_get_perf_event_type(eidx);
518 econfig = kvm_pmu_get_perf_event_config(eidx, einfo[i].event_data);
519 ret = riscv_pmu_get_event_info(etype, econfig, NULL);
520 einfo[i].output = (ret > 0) ? 1 : 0;
521 }
522
523 ret = kvm_vcpu_write_guest(vcpu, shmem, einfo, shmem_size);
524 if (ret)
525 ret = SBI_ERR_INVALID_ADDRESS;
526
527 free_mem:
528 kfree(einfo);
529 out:
530 retdata->err_val = ret;
531
532 return 0;
533 }
534
kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu * vcpu,struct kvm_vcpu_sbi_return * retdata)535 int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu,
536 struct kvm_vcpu_sbi_return *retdata)
537 {
538 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
539
540 retdata->out_val = kvm_pmu_num_counters(kvpmu);
541
542 return 0;
543 }
544
kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu * vcpu,unsigned long cidx,struct kvm_vcpu_sbi_return * retdata)545 int kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu *vcpu, unsigned long cidx,
546 struct kvm_vcpu_sbi_return *retdata)
547 {
548 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
549
550 if (cidx >= RISCV_KVM_MAX_COUNTERS || cidx == 1) {
551 retdata->err_val = SBI_ERR_INVALID_PARAM;
552 return 0;
553 }
554
555 cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS);
556 retdata->out_val = kvpmu->pmc[cidx].cinfo.value;
557
558 return 0;
559 }
560
kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu * vcpu,unsigned long ctr_base,unsigned long ctr_mask,unsigned long flags,u64 ival,struct kvm_vcpu_sbi_return * retdata)561 int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
562 unsigned long ctr_mask, unsigned long flags, u64 ival,
563 struct kvm_vcpu_sbi_return *retdata)
564 {
565 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
566 int i, pmc_index, sbiret = 0;
567 struct kvm_pmc *pmc;
568 int fevent_code;
569 bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT;
570
571 if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
572 sbiret = SBI_ERR_INVALID_PARAM;
573 goto out;
574 }
575
576 if (snap_flag_set) {
577 if (kvpmu->snapshot_addr == INVALID_GPA) {
578 sbiret = SBI_ERR_NO_SHMEM;
579 goto out;
580 }
581 if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
582 sizeof(struct riscv_pmu_snapshot_data))) {
583 pr_warn("Unable to read snapshot shared memory while starting counters\n");
584 sbiret = SBI_ERR_FAILURE;
585 goto out;
586 }
587 }
588 /* Start the counters that have been configured and requested by the guest */
589 for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
590 pmc_index = array_index_nospec(i + ctr_base,
591 RISCV_KVM_MAX_COUNTERS);
592 if (!test_bit(pmc_index, kvpmu->pmc_in_use))
593 continue;
594 /* The guest started the counter again. Reset the overflow status */
595 clear_bit(pmc_index, kvpmu->pmc_overflown);
596 pmc = &kvpmu->pmc[pmc_index];
597 if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) {
598 pmc->counter_val = ival;
599 } else if (snap_flag_set) {
600 /* The counter index in the snapshot are relative to the counter base */
601 pmc->counter_val = kvpmu->sdata->ctr_values[i];
602 }
603
604 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
605 fevent_code = get_event_code(pmc->event_idx);
606 if (fevent_code >= SBI_PMU_FW_MAX) {
607 sbiret = SBI_ERR_INVALID_PARAM;
608 goto out;
609 }
610
611 /* Check if the counter was already started for some reason */
612 if (kvpmu->fw_event[fevent_code].started) {
613 sbiret = SBI_ERR_ALREADY_STARTED;
614 continue;
615 }
616
617 kvpmu->fw_event[fevent_code].started = true;
618 kvpmu->fw_event[fevent_code].value = pmc->counter_val;
619 } else if (pmc->perf_event) {
620 if (unlikely(pmc->started)) {
621 sbiret = SBI_ERR_ALREADY_STARTED;
622 continue;
623 }
624 perf_event_period(pmc->perf_event, kvm_pmu_get_sample_period(pmc));
625 perf_event_enable(pmc->perf_event);
626 pmc->started = true;
627 } else {
628 sbiret = SBI_ERR_INVALID_PARAM;
629 }
630 }
631
632 out:
633 retdata->err_val = sbiret;
634
635 return 0;
636 }
637
kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu * vcpu,unsigned long ctr_base,unsigned long ctr_mask,unsigned long flags,struct kvm_vcpu_sbi_return * retdata)638 int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
639 unsigned long ctr_mask, unsigned long flags,
640 struct kvm_vcpu_sbi_return *retdata)
641 {
642 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
643 int i, pmc_index, sbiret = 0;
644 u64 enabled, running;
645 struct kvm_pmc *pmc;
646 int fevent_code;
647 bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
648 bool shmem_needs_update = false;
649
650 if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
651 sbiret = SBI_ERR_INVALID_PARAM;
652 goto out;
653 }
654
655 if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) {
656 sbiret = SBI_ERR_NO_SHMEM;
657 goto out;
658 }
659
660 /* Stop the counters that have been configured and requested by the guest */
661 for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
662 pmc_index = array_index_nospec(i + ctr_base,
663 RISCV_KVM_MAX_COUNTERS);
664 if (!test_bit(pmc_index, kvpmu->pmc_in_use))
665 continue;
666 pmc = &kvpmu->pmc[pmc_index];
667 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
668 fevent_code = get_event_code(pmc->event_idx);
669 if (fevent_code >= SBI_PMU_FW_MAX) {
670 sbiret = SBI_ERR_INVALID_PARAM;
671 goto out;
672 }
673
674 if (!kvpmu->fw_event[fevent_code].started)
675 sbiret = SBI_ERR_ALREADY_STOPPED;
676
677 kvpmu->fw_event[fevent_code].started = false;
678 } else if (pmc->perf_event) {
679 if (pmc->started) {
680 /* Stop counting the counter */
681 perf_event_disable(pmc->perf_event);
682 pmc->started = false;
683 } else {
684 sbiret = SBI_ERR_ALREADY_STOPPED;
685 }
686
687 if (flags & SBI_PMU_STOP_FLAG_RESET)
688 /* Release the counter if this is a reset request */
689 kvm_pmu_release_perf_event(pmc);
690 } else {
691 sbiret = SBI_ERR_INVALID_PARAM;
692 }
693
694 if (snap_flag_set && !sbiret) {
695 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW)
696 pmc->counter_val = kvpmu->fw_event[fevent_code].value;
697 else if (pmc->perf_event)
698 pmc->counter_val += perf_event_read_value(pmc->perf_event,
699 &enabled, &running);
700 /*
701 * The counter and overflow indices in the snapshot region are w.r.to
702 * cbase. Modify the set bit in the counter mask instead of the pmc_index
703 * which indicates the absolute counter index.
704 */
705 if (test_bit(pmc_index, kvpmu->pmc_overflown))
706 kvpmu->sdata->ctr_overflow_mask |= BIT(i);
707 kvpmu->sdata->ctr_values[i] = pmc->counter_val;
708 shmem_needs_update = true;
709 }
710
711 if (flags & SBI_PMU_STOP_FLAG_RESET) {
712 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
713 clear_bit(pmc_index, kvpmu->pmc_in_use);
714 clear_bit(pmc_index, kvpmu->pmc_overflown);
715 if (snap_flag_set) {
716 /*
717 * Only clear the given counter as the caller is responsible to
718 * validate both the overflow mask and configured counters.
719 */
720 kvpmu->sdata->ctr_overflow_mask &= ~BIT(i);
721 shmem_needs_update = true;
722 }
723 }
724 }
725
726 if (shmem_needs_update)
727 kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
728 sizeof(struct riscv_pmu_snapshot_data));
729
730 out:
731 retdata->err_val = sbiret;
732
733 return 0;
734 }
735
kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu * vcpu,unsigned long ctr_base,unsigned long ctr_mask,unsigned long flags,unsigned long eidx,u64 evtdata,struct kvm_vcpu_sbi_return * retdata)736 int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_base,
737 unsigned long ctr_mask, unsigned long flags,
738 unsigned long eidx, u64 evtdata,
739 struct kvm_vcpu_sbi_return *retdata)
740 {
741 int ctr_idx, sbiret = 0;
742 long ret;
743 bool is_fevent;
744 unsigned long event_code;
745 u32 etype = kvm_pmu_get_perf_event_type(eidx);
746 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
747 struct kvm_pmc *pmc = NULL;
748 struct perf_event_attr attr = {
749 .type = etype,
750 .size = sizeof(struct perf_event_attr),
751 .pinned = true,
752 .disabled = true,
753 /*
754 * It should never reach here if the platform doesn't support the sscofpmf
755 * extension as mode filtering won't work without it.
756 */
757 .exclude_host = true,
758 .exclude_hv = true,
759 .exclude_user = !!(flags & SBI_PMU_CFG_FLAG_SET_UINH),
760 .exclude_kernel = !!(flags & SBI_PMU_CFG_FLAG_SET_SINH),
761 .config1 = RISCV_PMU_CONFIG1_GUEST_EVENTS,
762 };
763
764 if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
765 sbiret = SBI_ERR_INVALID_PARAM;
766 goto out;
767 }
768
769 event_code = get_event_code(eidx);
770 is_fevent = kvm_pmu_is_fw_event(eidx);
771 if (is_fevent && event_code >= SBI_PMU_FW_MAX) {
772 sbiret = SBI_ERR_NOT_SUPPORTED;
773 goto out;
774 }
775
776 /*
777 * SKIP_MATCH flag indicates the caller is aware of the assigned counter
778 * for this event. Just do a sanity check if it already marked used.
779 */
780 if (flags & SBI_PMU_CFG_FLAG_SKIP_MATCH) {
781 if (!test_bit(ctr_base + __ffs(ctr_mask), kvpmu->pmc_in_use)) {
782 sbiret = SBI_ERR_FAILURE;
783 goto out;
784 }
785 ctr_idx = ctr_base + __ffs(ctr_mask);
786 } else {
787 ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask);
788 if (ctr_idx < 0) {
789 sbiret = SBI_ERR_NOT_SUPPORTED;
790 goto out;
791 }
792 }
793
794 ctr_idx = array_index_nospec(ctr_idx, RISCV_KVM_MAX_COUNTERS);
795 pmc = &kvpmu->pmc[ctr_idx];
796 pmc->idx = ctr_idx;
797
798 if (is_fevent) {
799 if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
800 kvpmu->fw_event[event_code].started = true;
801 } else {
802 ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata);
803 if (ret) {
804 sbiret = SBI_ERR_NOT_SUPPORTED;
805 goto out;
806 }
807 }
808
809 set_bit(ctr_idx, kvpmu->pmc_in_use);
810 pmc->event_idx = eidx;
811 retdata->out_val = ctr_idx;
812 out:
813 retdata->err_val = sbiret;
814
815 return 0;
816 }
817
kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu * vcpu,unsigned long cidx,struct kvm_vcpu_sbi_return * retdata)818 int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
819 struct kvm_vcpu_sbi_return *retdata)
820 {
821 int ret;
822
823 ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val);
824 if (ret == -EINVAL)
825 retdata->err_val = SBI_ERR_INVALID_PARAM;
826
827 return 0;
828 }
829
kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu * vcpu,unsigned long cidx,struct kvm_vcpu_sbi_return * retdata)830 int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
831 struct kvm_vcpu_sbi_return *retdata)
832 {
833 int ret;
834
835 ret = pmu_ctr_read(vcpu, cidx, &retdata->out_val);
836 if (ret == -EINVAL)
837 retdata->err_val = SBI_ERR_INVALID_PARAM;
838
839 return 0;
840 }
841
kvm_riscv_vcpu_pmu_init(struct kvm_vcpu * vcpu)842 void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
843 {
844 int i = 0, ret, num_hw_ctrs = 0, hpm_width = 0;
845 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
846 struct kvm_pmc *pmc;
847
848 /*
849 * PMU functionality should be only available to guests if privilege mode
850 * filtering is available in the host. Otherwise, guest will always count
851 * events while the execution is in hypervisor mode.
852 */
853 if (kvm_riscv_isa_check_host(SSCOFPMF))
854 return;
855
856 ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs);
857 if (ret < 0 || !hpm_width || !num_hw_ctrs)
858 return;
859
860 /*
861 * Increase the number of hardware counters to offset the time counter.
862 */
863 kvpmu->num_hw_ctrs = num_hw_ctrs + 1;
864 kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX;
865 memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
866 kvpmu->snapshot_addr = INVALID_GPA;
867
868 if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) {
869 pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA");
870 kvpmu->num_hw_ctrs = RISCV_KVM_MAX_HW_CTRS;
871 }
872
873 /*
874 * There is no correlation between the logical hardware counter and virtual counters.
875 * However, we need to encode a hpmcounter CSR in the counter info field so that
876 * KVM can trap n emulate the read. This works well in the migration use case as
877 * KVM doesn't care if the actual hpmcounter is available in the hardware or not.
878 */
879 for (i = 0; i < kvm_pmu_num_counters(kvpmu); i++) {
880 /* TIME CSR shouldn't be read from perf interface */
881 if (i == 1)
882 continue;
883 pmc = &kvpmu->pmc[i];
884 pmc->idx = i;
885 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
886 pmc->vcpu = vcpu;
887 if (i < kvpmu->num_hw_ctrs) {
888 pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW;
889 if (i < 3)
890 /* CY, IR counters */
891 pmc->cinfo.width = 63;
892 else
893 pmc->cinfo.width = hpm_width;
894 /*
895 * The CSR number doesn't have any relation with the logical
896 * hardware counters. The CSR numbers are encoded sequentially
897 * to avoid maintaining a map between the virtual counter
898 * and CSR number.
899 */
900 pmc->cinfo.csr = CSR_CYCLE + i;
901 } else {
902 pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW;
903 pmc->cinfo.width = 63;
904 }
905 }
906
907 kvpmu->init_done = true;
908 }
909
kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu * vcpu)910 void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
911 {
912 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
913 struct kvm_pmc *pmc;
914 int i;
915
916 if (!kvpmu)
917 return;
918
919 for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) {
920 pmc = &kvpmu->pmc[i];
921 pmc->counter_val = 0;
922 kvm_pmu_release_perf_event(pmc);
923 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
924 }
925 bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS);
926 bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS);
927 memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
928 kvm_pmu_clear_snapshot_area(vcpu);
929 }
930
kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu * vcpu)931 void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu)
932 {
933 kvm_riscv_vcpu_pmu_deinit(vcpu);
934 }
935