1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2023 Rivos Inc 4 * 5 * Authors: 6 * Atish Patra <atishp@rivosinc.com> 7 */ 8 9 #define pr_fmt(fmt) "riscv-kvm-pmu: " fmt 10 #include <linux/errno.h> 11 #include <linux/err.h> 12 #include <linux/kvm_host.h> 13 #include <linux/perf/riscv_pmu.h> 14 #include <asm/csr.h> 15 #include <asm/kvm_vcpu_sbi.h> 16 #include <asm/kvm_vcpu_pmu.h> 17 #include <asm/sbi.h> 18 #include <linux/bitops.h> 19 20 #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs) 21 #define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16) 22 #define get_event_code(x) ((x) & SBI_PMU_EVENT_IDX_CODE_MASK) 23 24 static enum perf_hw_id hw_event_perf_map[SBI_PMU_HW_GENERAL_MAX] = { 25 [SBI_PMU_HW_CPU_CYCLES] = PERF_COUNT_HW_CPU_CYCLES, 26 [SBI_PMU_HW_INSTRUCTIONS] = PERF_COUNT_HW_INSTRUCTIONS, 27 [SBI_PMU_HW_CACHE_REFERENCES] = PERF_COUNT_HW_CACHE_REFERENCES, 28 [SBI_PMU_HW_CACHE_MISSES] = PERF_COUNT_HW_CACHE_MISSES, 29 [SBI_PMU_HW_BRANCH_INSTRUCTIONS] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS, 30 [SBI_PMU_HW_BRANCH_MISSES] = PERF_COUNT_HW_BRANCH_MISSES, 31 [SBI_PMU_HW_BUS_CYCLES] = PERF_COUNT_HW_BUS_CYCLES, 32 [SBI_PMU_HW_STALLED_CYCLES_FRONTEND] = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, 33 [SBI_PMU_HW_STALLED_CYCLES_BACKEND] = PERF_COUNT_HW_STALLED_CYCLES_BACKEND, 34 [SBI_PMU_HW_REF_CPU_CYCLES] = PERF_COUNT_HW_REF_CPU_CYCLES, 35 }; 36 37 static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc) 38 { 39 u64 counter_val_mask = GENMASK(pmc->cinfo.width, 0); 40 u64 sample_period; 41 42 if (!pmc->counter_val) 43 sample_period = counter_val_mask; 44 else 45 sample_period = (-pmc->counter_val) & counter_val_mask; 46 47 return sample_period; 48 } 49 50 static u32 kvm_pmu_get_perf_event_type(unsigned long eidx) 51 { 52 enum sbi_pmu_event_type etype = get_event_type(eidx); 53 u32 type = PERF_TYPE_MAX; 54 55 switch (etype) { 56 case SBI_PMU_EVENT_TYPE_HW: 57 type = PERF_TYPE_HARDWARE; 58 break; 59 case SBI_PMU_EVENT_TYPE_CACHE: 60 type = PERF_TYPE_HW_CACHE; 61 break; 62 case SBI_PMU_EVENT_TYPE_RAW: 63 case SBI_PMU_EVENT_TYPE_FW: 64 type = PERF_TYPE_RAW; 65 break; 66 default: 67 break; 68 } 69 70 return type; 71 } 72 73 static bool kvm_pmu_is_fw_event(unsigned long eidx) 74 { 75 return get_event_type(eidx) == SBI_PMU_EVENT_TYPE_FW; 76 } 77 78 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) 79 { 80 if (pmc->perf_event) { 81 perf_event_disable(pmc->perf_event); 82 perf_event_release_kernel(pmc->perf_event); 83 pmc->perf_event = NULL; 84 } 85 } 86 87 static u64 kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code) 88 { 89 return hw_event_perf_map[sbi_event_code]; 90 } 91 92 static u64 kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code) 93 { 94 u64 config = U64_MAX; 95 unsigned int cache_type, cache_op, cache_result; 96 97 /* All the cache event masks lie within 0xFF. No separate masking is necessary */ 98 cache_type = (sbi_event_code & SBI_PMU_EVENT_CACHE_ID_CODE_MASK) >> 99 SBI_PMU_EVENT_CACHE_ID_SHIFT; 100 cache_op = (sbi_event_code & SBI_PMU_EVENT_CACHE_OP_ID_CODE_MASK) >> 101 SBI_PMU_EVENT_CACHE_OP_SHIFT; 102 cache_result = sbi_event_code & SBI_PMU_EVENT_CACHE_RESULT_ID_CODE_MASK; 103 104 if (cache_type >= PERF_COUNT_HW_CACHE_MAX || 105 cache_op >= PERF_COUNT_HW_CACHE_OP_MAX || 106 cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 107 return config; 108 109 config = cache_type | (cache_op << 8) | (cache_result << 16); 110 111 return config; 112 } 113 114 static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data) 115 { 116 enum sbi_pmu_event_type etype = get_event_type(eidx); 117 u32 ecode = get_event_code(eidx); 118 u64 config = U64_MAX; 119 120 switch (etype) { 121 case SBI_PMU_EVENT_TYPE_HW: 122 if (ecode < SBI_PMU_HW_GENERAL_MAX) 123 config = kvm_pmu_get_perf_event_hw_config(ecode); 124 break; 125 case SBI_PMU_EVENT_TYPE_CACHE: 126 config = kvm_pmu_get_perf_event_cache_config(ecode); 127 break; 128 case SBI_PMU_EVENT_TYPE_RAW: 129 config = evt_data & RISCV_PMU_RAW_EVENT_MASK; 130 break; 131 case SBI_PMU_EVENT_TYPE_FW: 132 if (ecode < SBI_PMU_FW_MAX) 133 config = (1ULL << 63) | ecode; 134 break; 135 default: 136 break; 137 } 138 139 return config; 140 } 141 142 static int kvm_pmu_get_fixed_pmc_index(unsigned long eidx) 143 { 144 u32 etype = kvm_pmu_get_perf_event_type(eidx); 145 u32 ecode = get_event_code(eidx); 146 147 if (etype != SBI_PMU_EVENT_TYPE_HW) 148 return -EINVAL; 149 150 if (ecode == SBI_PMU_HW_CPU_CYCLES) 151 return 0; 152 else if (ecode == SBI_PMU_HW_INSTRUCTIONS) 153 return 2; 154 else 155 return -EINVAL; 156 } 157 158 static int kvm_pmu_get_programmable_pmc_index(struct kvm_pmu *kvpmu, unsigned long eidx, 159 unsigned long cbase, unsigned long cmask) 160 { 161 int ctr_idx = -1; 162 int i, pmc_idx; 163 int min, max; 164 165 if (kvm_pmu_is_fw_event(eidx)) { 166 /* Firmware counters are mapped 1:1 starting from num_hw_ctrs for simplicity */ 167 min = kvpmu->num_hw_ctrs; 168 max = min + kvpmu->num_fw_ctrs; 169 } else { 170 /* First 3 counters are reserved for fixed counters */ 171 min = 3; 172 max = kvpmu->num_hw_ctrs; 173 } 174 175 for_each_set_bit(i, &cmask, BITS_PER_LONG) { 176 pmc_idx = i + cbase; 177 if ((pmc_idx >= min && pmc_idx < max) && 178 !test_bit(pmc_idx, kvpmu->pmc_in_use)) { 179 ctr_idx = pmc_idx; 180 break; 181 } 182 } 183 184 return ctr_idx; 185 } 186 187 static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx, 188 unsigned long cbase, unsigned long cmask) 189 { 190 int ret; 191 192 /* Fixed counters need to be have fixed mapping as they have different width */ 193 ret = kvm_pmu_get_fixed_pmc_index(eidx); 194 if (ret >= 0) 195 return ret; 196 197 return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask); 198 } 199 200 static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, 201 unsigned long *out_val) 202 { 203 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 204 struct kvm_pmc *pmc; 205 int fevent_code; 206 207 if (!IS_ENABLED(CONFIG_32BIT)) { 208 pr_warn("%s: should be invoked for only RV32\n", __func__); 209 return -EINVAL; 210 } 211 212 if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) { 213 pr_warn("Invalid counter id [%ld]during read\n", cidx); 214 return -EINVAL; 215 } 216 217 pmc = &kvpmu->pmc[cidx]; 218 219 if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW) 220 return -EINVAL; 221 222 fevent_code = get_event_code(pmc->event_idx); 223 pmc->counter_val = kvpmu->fw_event[fevent_code].value; 224 225 *out_val = pmc->counter_val >> 32; 226 227 return 0; 228 } 229 230 static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, 231 unsigned long *out_val) 232 { 233 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 234 struct kvm_pmc *pmc; 235 u64 enabled, running; 236 int fevent_code; 237 238 if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) { 239 pr_warn("Invalid counter id [%ld] during read\n", cidx); 240 return -EINVAL; 241 } 242 243 pmc = &kvpmu->pmc[cidx]; 244 245 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { 246 fevent_code = get_event_code(pmc->event_idx); 247 pmc->counter_val = kvpmu->fw_event[fevent_code].value; 248 } else if (pmc->perf_event) { 249 pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running); 250 } else { 251 return -EINVAL; 252 } 253 *out_val = pmc->counter_val; 254 255 return 0; 256 } 257 258 static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base, 259 unsigned long ctr_mask) 260 { 261 /* Make sure the we have a valid counter mask requested from the caller */ 262 if (!ctr_mask || (ctr_base + __fls(ctr_mask) >= kvm_pmu_num_counters(kvpmu))) 263 return -EINVAL; 264 265 return 0; 266 } 267 268 static void kvm_riscv_pmu_overflow(struct perf_event *perf_event, 269 struct perf_sample_data *data, 270 struct pt_regs *regs) 271 { 272 struct kvm_pmc *pmc = perf_event->overflow_handler_context; 273 struct kvm_vcpu *vcpu = pmc->vcpu; 274 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 275 struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu); 276 u64 period; 277 278 /* 279 * Stop the event counting by directly accessing the perf_event. 280 * Otherwise, this needs to deferred via a workqueue. 281 * That will introduce skew in the counter value because the actual 282 * physical counter would start after returning from this function. 283 * It will be stopped again once the workqueue is scheduled 284 */ 285 rpmu->pmu.stop(perf_event, PERF_EF_UPDATE); 286 287 /* 288 * The hw counter would start automatically when this function returns. 289 * Thus, the host may continue to interrupt and inject it to the guest 290 * even without the guest configuring the next event. Depending on the hardware 291 * the host may have some sluggishness only if privilege mode filtering is not 292 * available. In an ideal world, where qemu is not the only capable hardware, 293 * this can be removed. 294 * FYI: ARM64 does this way while x86 doesn't do anything as such. 295 * TODO: Should we keep it for RISC-V ? 296 */ 297 period = -(local64_read(&perf_event->count)); 298 299 local64_set(&perf_event->hw.period_left, 0); 300 perf_event->attr.sample_period = period; 301 perf_event->hw.sample_period = period; 302 303 set_bit(pmc->idx, kvpmu->pmc_overflown); 304 kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF); 305 306 rpmu->pmu.start(perf_event, PERF_EF_RELOAD); 307 } 308 309 static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr, 310 unsigned long flags, unsigned long eidx, 311 unsigned long evtdata) 312 { 313 struct perf_event *event; 314 315 kvm_pmu_release_perf_event(pmc); 316 attr->config = kvm_pmu_get_perf_event_config(eidx, evtdata); 317 if (flags & SBI_PMU_CFG_FLAG_CLEAR_VALUE) { 318 //TODO: Do we really want to clear the value in hardware counter 319 pmc->counter_val = 0; 320 } 321 322 /* 323 * Set the default sample_period for now. The guest specified value 324 * will be updated in the start call. 325 */ 326 attr->sample_period = kvm_pmu_get_sample_period(pmc); 327 328 event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc); 329 if (IS_ERR(event)) { 330 pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); 331 return PTR_ERR(event); 332 } 333 334 pmc->perf_event = event; 335 if (flags & SBI_PMU_CFG_FLAG_AUTO_START) 336 perf_event_enable(pmc->perf_event); 337 338 return 0; 339 } 340 341 int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid) 342 { 343 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 344 struct kvm_fw_event *fevent; 345 346 if (!kvpmu || fid >= SBI_PMU_FW_MAX) 347 return -EINVAL; 348 349 fevent = &kvpmu->fw_event[fid]; 350 if (fevent->started) 351 fevent->value++; 352 353 return 0; 354 } 355 356 int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num, 357 unsigned long *val, unsigned long new_val, 358 unsigned long wr_mask) 359 { 360 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 361 int cidx, ret = KVM_INSN_CONTINUE_NEXT_SEPC; 362 363 if (!kvpmu || !kvpmu->init_done) { 364 /* 365 * In absence of sscofpmf in the platform, the guest OS may use 366 * the legacy PMU driver to read cycle/instret. In that case, 367 * just return 0 to avoid any illegal trap. However, any other 368 * hpmcounter access should result in illegal trap as they must 369 * be access through SBI PMU only. 370 */ 371 if (csr_num == CSR_CYCLE || csr_num == CSR_INSTRET) { 372 *val = 0; 373 return ret; 374 } else { 375 return KVM_INSN_ILLEGAL_TRAP; 376 } 377 } 378 379 /* The counter CSR are read only. Thus, any write should result in illegal traps */ 380 if (wr_mask) 381 return KVM_INSN_ILLEGAL_TRAP; 382 383 cidx = csr_num - CSR_CYCLE; 384 385 if (pmu_ctr_read(vcpu, cidx, val) < 0) 386 return KVM_INSN_ILLEGAL_TRAP; 387 388 return ret; 389 } 390 391 static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu) 392 { 393 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 394 int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); 395 396 if (kvpmu->sdata) { 397 if (kvpmu->snapshot_addr != INVALID_GPA) { 398 memset(kvpmu->sdata, 0, snapshot_area_size); 399 kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, 400 kvpmu->sdata, snapshot_area_size); 401 } else { 402 pr_warn("snapshot address invalid\n"); 403 } 404 kfree(kvpmu->sdata); 405 kvpmu->sdata = NULL; 406 } 407 kvpmu->snapshot_addr = INVALID_GPA; 408 } 409 410 int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low, 411 unsigned long saddr_high, unsigned long flags, 412 struct kvm_vcpu_sbi_return *retdata) 413 { 414 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 415 int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); 416 int sbiret = 0; 417 gpa_t saddr; 418 unsigned long hva; 419 bool writable; 420 421 if (!kvpmu || flags) { 422 sbiret = SBI_ERR_INVALID_PARAM; 423 goto out; 424 } 425 426 if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) { 427 kvm_pmu_clear_snapshot_area(vcpu); 428 return 0; 429 } 430 431 saddr = saddr_low; 432 433 if (saddr_high != 0) { 434 if (IS_ENABLED(CONFIG_32BIT)) 435 saddr |= ((gpa_t)saddr_high << 32); 436 else 437 sbiret = SBI_ERR_INVALID_ADDRESS; 438 goto out; 439 } 440 441 hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable); 442 if (kvm_is_error_hva(hva) || !writable) { 443 sbiret = SBI_ERR_INVALID_ADDRESS; 444 goto out; 445 } 446 447 kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC); 448 if (!kvpmu->sdata) 449 return -ENOMEM; 450 451 if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) { 452 kfree(kvpmu->sdata); 453 sbiret = SBI_ERR_FAILURE; 454 goto out; 455 } 456 457 kvpmu->snapshot_addr = saddr; 458 459 out: 460 retdata->err_val = sbiret; 461 462 return 0; 463 } 464 465 int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu, 466 struct kvm_vcpu_sbi_return *retdata) 467 { 468 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 469 470 retdata->out_val = kvm_pmu_num_counters(kvpmu); 471 472 return 0; 473 } 474 475 int kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu *vcpu, unsigned long cidx, 476 struct kvm_vcpu_sbi_return *retdata) 477 { 478 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 479 480 if (cidx > RISCV_KVM_MAX_COUNTERS || cidx == 1) { 481 retdata->err_val = SBI_ERR_INVALID_PARAM; 482 return 0; 483 } 484 485 retdata->out_val = kvpmu->pmc[cidx].cinfo.value; 486 487 return 0; 488 } 489 490 int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base, 491 unsigned long ctr_mask, unsigned long flags, u64 ival, 492 struct kvm_vcpu_sbi_return *retdata) 493 { 494 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 495 int i, pmc_index, sbiret = 0; 496 struct kvm_pmc *pmc; 497 int fevent_code; 498 bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT; 499 500 if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { 501 sbiret = SBI_ERR_INVALID_PARAM; 502 goto out; 503 } 504 505 if (snap_flag_set) { 506 if (kvpmu->snapshot_addr == INVALID_GPA) { 507 sbiret = SBI_ERR_NO_SHMEM; 508 goto out; 509 } 510 if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, 511 sizeof(struct riscv_pmu_snapshot_data))) { 512 pr_warn("Unable to read snapshot shared memory while starting counters\n"); 513 sbiret = SBI_ERR_FAILURE; 514 goto out; 515 } 516 } 517 /* Start the counters that have been configured and requested by the guest */ 518 for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { 519 pmc_index = i + ctr_base; 520 if (!test_bit(pmc_index, kvpmu->pmc_in_use)) 521 continue; 522 /* The guest started the counter again. Reset the overflow status */ 523 clear_bit(pmc_index, kvpmu->pmc_overflown); 524 pmc = &kvpmu->pmc[pmc_index]; 525 if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) { 526 pmc->counter_val = ival; 527 } else if (snap_flag_set) { 528 /* The counter index in the snapshot are relative to the counter base */ 529 pmc->counter_val = kvpmu->sdata->ctr_values[i]; 530 } 531 532 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { 533 fevent_code = get_event_code(pmc->event_idx); 534 if (fevent_code >= SBI_PMU_FW_MAX) { 535 sbiret = SBI_ERR_INVALID_PARAM; 536 goto out; 537 } 538 539 /* Check if the counter was already started for some reason */ 540 if (kvpmu->fw_event[fevent_code].started) { 541 sbiret = SBI_ERR_ALREADY_STARTED; 542 continue; 543 } 544 545 kvpmu->fw_event[fevent_code].started = true; 546 kvpmu->fw_event[fevent_code].value = pmc->counter_val; 547 } else if (pmc->perf_event) { 548 if (unlikely(pmc->started)) { 549 sbiret = SBI_ERR_ALREADY_STARTED; 550 continue; 551 } 552 perf_event_period(pmc->perf_event, kvm_pmu_get_sample_period(pmc)); 553 perf_event_enable(pmc->perf_event); 554 pmc->started = true; 555 } else { 556 sbiret = SBI_ERR_INVALID_PARAM; 557 } 558 } 559 560 out: 561 retdata->err_val = sbiret; 562 563 return 0; 564 } 565 566 int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, 567 unsigned long ctr_mask, unsigned long flags, 568 struct kvm_vcpu_sbi_return *retdata) 569 { 570 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 571 int i, pmc_index, sbiret = 0; 572 u64 enabled, running; 573 struct kvm_pmc *pmc; 574 int fevent_code; 575 bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT; 576 bool shmem_needs_update = false; 577 578 if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { 579 sbiret = SBI_ERR_INVALID_PARAM; 580 goto out; 581 } 582 583 if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) { 584 sbiret = SBI_ERR_NO_SHMEM; 585 goto out; 586 } 587 588 /* Stop the counters that have been configured and requested by the guest */ 589 for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { 590 pmc_index = i + ctr_base; 591 if (!test_bit(pmc_index, kvpmu->pmc_in_use)) 592 continue; 593 pmc = &kvpmu->pmc[pmc_index]; 594 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { 595 fevent_code = get_event_code(pmc->event_idx); 596 if (fevent_code >= SBI_PMU_FW_MAX) { 597 sbiret = SBI_ERR_INVALID_PARAM; 598 goto out; 599 } 600 601 if (!kvpmu->fw_event[fevent_code].started) 602 sbiret = SBI_ERR_ALREADY_STOPPED; 603 604 kvpmu->fw_event[fevent_code].started = false; 605 } else if (pmc->perf_event) { 606 if (pmc->started) { 607 /* Stop counting the counter */ 608 perf_event_disable(pmc->perf_event); 609 pmc->started = false; 610 } else { 611 sbiret = SBI_ERR_ALREADY_STOPPED; 612 } 613 614 if (flags & SBI_PMU_STOP_FLAG_RESET) 615 /* Release the counter if this is a reset request */ 616 kvm_pmu_release_perf_event(pmc); 617 } else { 618 sbiret = SBI_ERR_INVALID_PARAM; 619 } 620 621 if (snap_flag_set && !sbiret) { 622 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) 623 pmc->counter_val = kvpmu->fw_event[fevent_code].value; 624 else if (pmc->perf_event) 625 pmc->counter_val += perf_event_read_value(pmc->perf_event, 626 &enabled, &running); 627 /* 628 * The counter and overflow indicies in the snapshot region are w.r.to 629 * cbase. Modify the set bit in the counter mask instead of the pmc_index 630 * which indicates the absolute counter index. 631 */ 632 if (test_bit(pmc_index, kvpmu->pmc_overflown)) 633 kvpmu->sdata->ctr_overflow_mask |= BIT(i); 634 kvpmu->sdata->ctr_values[i] = pmc->counter_val; 635 shmem_needs_update = true; 636 } 637 638 if (flags & SBI_PMU_STOP_FLAG_RESET) { 639 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; 640 clear_bit(pmc_index, kvpmu->pmc_in_use); 641 clear_bit(pmc_index, kvpmu->pmc_overflown); 642 if (snap_flag_set) { 643 /* 644 * Only clear the given counter as the caller is responsible to 645 * validate both the overflow mask and configured counters. 646 */ 647 kvpmu->sdata->ctr_overflow_mask &= ~BIT(i); 648 shmem_needs_update = true; 649 } 650 } 651 } 652 653 if (shmem_needs_update) 654 kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, 655 sizeof(struct riscv_pmu_snapshot_data)); 656 657 out: 658 retdata->err_val = sbiret; 659 660 return 0; 661 } 662 663 int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_base, 664 unsigned long ctr_mask, unsigned long flags, 665 unsigned long eidx, u64 evtdata, 666 struct kvm_vcpu_sbi_return *retdata) 667 { 668 int ctr_idx, sbiret = 0; 669 long ret; 670 bool is_fevent; 671 unsigned long event_code; 672 u32 etype = kvm_pmu_get_perf_event_type(eidx); 673 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 674 struct kvm_pmc *pmc = NULL; 675 struct perf_event_attr attr = { 676 .type = etype, 677 .size = sizeof(struct perf_event_attr), 678 .pinned = true, 679 /* 680 * It should never reach here if the platform doesn't support the sscofpmf 681 * extension as mode filtering won't work without it. 682 */ 683 .exclude_host = true, 684 .exclude_hv = true, 685 .exclude_user = !!(flags & SBI_PMU_CFG_FLAG_SET_UINH), 686 .exclude_kernel = !!(flags & SBI_PMU_CFG_FLAG_SET_SINH), 687 .config1 = RISCV_PMU_CONFIG1_GUEST_EVENTS, 688 }; 689 690 if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { 691 sbiret = SBI_ERR_INVALID_PARAM; 692 goto out; 693 } 694 695 event_code = get_event_code(eidx); 696 is_fevent = kvm_pmu_is_fw_event(eidx); 697 if (is_fevent && event_code >= SBI_PMU_FW_MAX) { 698 sbiret = SBI_ERR_NOT_SUPPORTED; 699 goto out; 700 } 701 702 /* 703 * SKIP_MATCH flag indicates the caller is aware of the assigned counter 704 * for this event. Just do a sanity check if it already marked used. 705 */ 706 if (flags & SBI_PMU_CFG_FLAG_SKIP_MATCH) { 707 if (!test_bit(ctr_base + __ffs(ctr_mask), kvpmu->pmc_in_use)) { 708 sbiret = SBI_ERR_FAILURE; 709 goto out; 710 } 711 ctr_idx = ctr_base + __ffs(ctr_mask); 712 } else { 713 ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask); 714 if (ctr_idx < 0) { 715 sbiret = SBI_ERR_NOT_SUPPORTED; 716 goto out; 717 } 718 } 719 720 pmc = &kvpmu->pmc[ctr_idx]; 721 pmc->idx = ctr_idx; 722 723 if (is_fevent) { 724 if (flags & SBI_PMU_CFG_FLAG_AUTO_START) 725 kvpmu->fw_event[event_code].started = true; 726 } else { 727 ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata); 728 if (ret) { 729 sbiret = SBI_ERR_NOT_SUPPORTED; 730 goto out; 731 } 732 } 733 734 set_bit(ctr_idx, kvpmu->pmc_in_use); 735 pmc->event_idx = eidx; 736 retdata->out_val = ctr_idx; 737 out: 738 retdata->err_val = sbiret; 739 740 return 0; 741 } 742 743 int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, 744 struct kvm_vcpu_sbi_return *retdata) 745 { 746 int ret; 747 748 ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val); 749 if (ret == -EINVAL) 750 retdata->err_val = SBI_ERR_INVALID_PARAM; 751 752 return 0; 753 } 754 755 int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, 756 struct kvm_vcpu_sbi_return *retdata) 757 { 758 int ret; 759 760 ret = pmu_ctr_read(vcpu, cidx, &retdata->out_val); 761 if (ret == -EINVAL) 762 retdata->err_val = SBI_ERR_INVALID_PARAM; 763 764 return 0; 765 } 766 767 void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) 768 { 769 int i = 0, ret, num_hw_ctrs = 0, hpm_width = 0; 770 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 771 struct kvm_pmc *pmc; 772 773 /* 774 * PMU functionality should be only available to guests if privilege mode 775 * filtering is available in the host. Otherwise, guest will always count 776 * events while the execution is in hypervisor mode. 777 */ 778 if (!riscv_isa_extension_available(NULL, SSCOFPMF)) 779 return; 780 781 ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs); 782 if (ret < 0 || !hpm_width || !num_hw_ctrs) 783 return; 784 785 /* 786 * Increase the number of hardware counters to offset the time counter. 787 */ 788 kvpmu->num_hw_ctrs = num_hw_ctrs + 1; 789 kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX; 790 memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); 791 kvpmu->snapshot_addr = INVALID_GPA; 792 793 if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) { 794 pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA"); 795 kvpmu->num_hw_ctrs = RISCV_KVM_MAX_HW_CTRS; 796 } 797 798 /* 799 * There is no correlation between the logical hardware counter and virtual counters. 800 * However, we need to encode a hpmcounter CSR in the counter info field so that 801 * KVM can trap n emulate the read. This works well in the migration use case as 802 * KVM doesn't care if the actual hpmcounter is available in the hardware or not. 803 */ 804 for (i = 0; i < kvm_pmu_num_counters(kvpmu); i++) { 805 /* TIME CSR shouldn't be read from perf interface */ 806 if (i == 1) 807 continue; 808 pmc = &kvpmu->pmc[i]; 809 pmc->idx = i; 810 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; 811 pmc->vcpu = vcpu; 812 if (i < kvpmu->num_hw_ctrs) { 813 pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW; 814 if (i < 3) 815 /* CY, IR counters */ 816 pmc->cinfo.width = 63; 817 else 818 pmc->cinfo.width = hpm_width; 819 /* 820 * The CSR number doesn't have any relation with the logical 821 * hardware counters. The CSR numbers are encoded sequentially 822 * to avoid maintaining a map between the virtual counter 823 * and CSR number. 824 */ 825 pmc->cinfo.csr = CSR_CYCLE + i; 826 } else { 827 pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW; 828 pmc->cinfo.width = 63; 829 } 830 } 831 832 kvpmu->init_done = true; 833 } 834 835 void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu) 836 { 837 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 838 struct kvm_pmc *pmc; 839 int i; 840 841 if (!kvpmu) 842 return; 843 844 for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) { 845 pmc = &kvpmu->pmc[i]; 846 pmc->counter_val = 0; 847 kvm_pmu_release_perf_event(pmc); 848 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; 849 } 850 bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS); 851 bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS); 852 memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); 853 kvm_pmu_clear_snapshot_area(vcpu); 854 } 855 856 void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu) 857 { 858 kvm_riscv_vcpu_pmu_deinit(vcpu); 859 } 860