1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2023 Rivos Inc 4 * 5 * Authors: 6 * Atish Patra <atishp@rivosinc.com> 7 */ 8 9 #define pr_fmt(fmt) "riscv-kvm-pmu: " fmt 10 #include <linux/errno.h> 11 #include <linux/err.h> 12 #include <linux/kvm_host.h> 13 #include <linux/perf/riscv_pmu.h> 14 #include <asm/csr.h> 15 #include <asm/kvm_vcpu_sbi.h> 16 #include <asm/kvm_vcpu_pmu.h> 17 #include <asm/sbi.h> 18 #include <linux/bitops.h> 19 20 #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs) 21 #define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16) 22 #define get_event_code(x) ((x) & SBI_PMU_EVENT_IDX_CODE_MASK) 23 24 static enum perf_hw_id hw_event_perf_map[SBI_PMU_HW_GENERAL_MAX] = { 25 [SBI_PMU_HW_CPU_CYCLES] = PERF_COUNT_HW_CPU_CYCLES, 26 [SBI_PMU_HW_INSTRUCTIONS] = PERF_COUNT_HW_INSTRUCTIONS, 27 [SBI_PMU_HW_CACHE_REFERENCES] = PERF_COUNT_HW_CACHE_REFERENCES, 28 [SBI_PMU_HW_CACHE_MISSES] = PERF_COUNT_HW_CACHE_MISSES, 29 [SBI_PMU_HW_BRANCH_INSTRUCTIONS] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS, 30 [SBI_PMU_HW_BRANCH_MISSES] = PERF_COUNT_HW_BRANCH_MISSES, 31 [SBI_PMU_HW_BUS_CYCLES] = PERF_COUNT_HW_BUS_CYCLES, 32 [SBI_PMU_HW_STALLED_CYCLES_FRONTEND] = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, 33 [SBI_PMU_HW_STALLED_CYCLES_BACKEND] = PERF_COUNT_HW_STALLED_CYCLES_BACKEND, 34 [SBI_PMU_HW_REF_CPU_CYCLES] = PERF_COUNT_HW_REF_CPU_CYCLES, 35 }; 36 37 static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc) 38 { 39 u64 counter_val_mask = GENMASK(pmc->cinfo.width, 0); 40 u64 sample_period; 41 42 if (!pmc->counter_val) 43 sample_period = counter_val_mask; 44 else 45 sample_period = (-pmc->counter_val) & counter_val_mask; 46 47 return sample_period; 48 } 49 50 static u32 kvm_pmu_get_perf_event_type(unsigned long eidx) 51 { 52 enum sbi_pmu_event_type etype = get_event_type(eidx); 53 u32 type = PERF_TYPE_MAX; 54 55 switch (etype) { 56 case SBI_PMU_EVENT_TYPE_HW: 57 type = PERF_TYPE_HARDWARE; 58 break; 59 case SBI_PMU_EVENT_TYPE_CACHE: 60 type = PERF_TYPE_HW_CACHE; 61 break; 62 case SBI_PMU_EVENT_TYPE_RAW: 63 case SBI_PMU_EVENT_TYPE_RAW_V2: 64 case SBI_PMU_EVENT_TYPE_FW: 65 type = PERF_TYPE_RAW; 66 break; 67 default: 68 break; 69 } 70 71 return type; 72 } 73 74 static bool kvm_pmu_is_fw_event(unsigned long eidx) 75 { 76 return get_event_type(eidx) == SBI_PMU_EVENT_TYPE_FW; 77 } 78 79 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) 80 { 81 if (pmc->perf_event) { 82 perf_event_disable(pmc->perf_event); 83 perf_event_release_kernel(pmc->perf_event); 84 pmc->perf_event = NULL; 85 } 86 } 87 88 static u64 kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code) 89 { 90 return hw_event_perf_map[sbi_event_code]; 91 } 92 93 static u64 kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code) 94 { 95 u64 config = U64_MAX; 96 unsigned int cache_type, cache_op, cache_result; 97 98 /* All the cache event masks lie within 0xFF. No separate masking is necessary */ 99 cache_type = (sbi_event_code & SBI_PMU_EVENT_CACHE_ID_CODE_MASK) >> 100 SBI_PMU_EVENT_CACHE_ID_SHIFT; 101 cache_op = (sbi_event_code & SBI_PMU_EVENT_CACHE_OP_ID_CODE_MASK) >> 102 SBI_PMU_EVENT_CACHE_OP_SHIFT; 103 cache_result = sbi_event_code & SBI_PMU_EVENT_CACHE_RESULT_ID_CODE_MASK; 104 105 if (cache_type >= PERF_COUNT_HW_CACHE_MAX || 106 cache_op >= PERF_COUNT_HW_CACHE_OP_MAX || 107 cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 108 return config; 109 110 config = cache_type | (cache_op << 8) | (cache_result << 16); 111 112 return config; 113 } 114 115 static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data) 116 { 117 enum sbi_pmu_event_type etype = get_event_type(eidx); 118 u32 ecode = get_event_code(eidx); 119 u64 config = U64_MAX; 120 121 switch (etype) { 122 case SBI_PMU_EVENT_TYPE_HW: 123 if (ecode < SBI_PMU_HW_GENERAL_MAX) 124 config = kvm_pmu_get_perf_event_hw_config(ecode); 125 break; 126 case SBI_PMU_EVENT_TYPE_CACHE: 127 config = kvm_pmu_get_perf_event_cache_config(ecode); 128 break; 129 case SBI_PMU_EVENT_TYPE_RAW: 130 config = evt_data & RISCV_PMU_RAW_EVENT_MASK; 131 break; 132 case SBI_PMU_EVENT_TYPE_RAW_V2: 133 config = evt_data & RISCV_PMU_RAW_EVENT_V2_MASK; 134 break; 135 case SBI_PMU_EVENT_TYPE_FW: 136 if (ecode < SBI_PMU_FW_MAX) 137 config = (1ULL << 63) | ecode; 138 break; 139 default: 140 break; 141 } 142 143 return config; 144 } 145 146 static int kvm_pmu_get_fixed_pmc_index(unsigned long eidx) 147 { 148 u32 etype = kvm_pmu_get_perf_event_type(eidx); 149 u32 ecode = get_event_code(eidx); 150 151 if (etype != SBI_PMU_EVENT_TYPE_HW) 152 return -EINVAL; 153 154 if (ecode == SBI_PMU_HW_CPU_CYCLES) 155 return 0; 156 else if (ecode == SBI_PMU_HW_INSTRUCTIONS) 157 return 2; 158 else 159 return -EINVAL; 160 } 161 162 static int kvm_pmu_get_programmable_pmc_index(struct kvm_pmu *kvpmu, unsigned long eidx, 163 unsigned long cbase, unsigned long cmask) 164 { 165 int ctr_idx = -1; 166 int i, pmc_idx; 167 int min, max; 168 169 if (kvm_pmu_is_fw_event(eidx)) { 170 /* Firmware counters are mapped 1:1 starting from num_hw_ctrs for simplicity */ 171 min = kvpmu->num_hw_ctrs; 172 max = min + kvpmu->num_fw_ctrs; 173 } else { 174 /* First 3 counters are reserved for fixed counters */ 175 min = 3; 176 max = kvpmu->num_hw_ctrs; 177 } 178 179 for_each_set_bit(i, &cmask, BITS_PER_LONG) { 180 pmc_idx = i + cbase; 181 if ((pmc_idx >= min && pmc_idx < max) && 182 !test_bit(pmc_idx, kvpmu->pmc_in_use)) { 183 ctr_idx = pmc_idx; 184 break; 185 } 186 } 187 188 return ctr_idx; 189 } 190 191 static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx, 192 unsigned long cbase, unsigned long cmask) 193 { 194 int ret; 195 196 /* Fixed counters need to be have fixed mapping as they have different width */ 197 ret = kvm_pmu_get_fixed_pmc_index(eidx); 198 if (ret >= 0) 199 return ret; 200 201 return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask); 202 } 203 204 static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, 205 unsigned long *out_val) 206 { 207 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 208 struct kvm_pmc *pmc; 209 int fevent_code; 210 211 if (!IS_ENABLED(CONFIG_32BIT)) { 212 pr_warn("%s: should be invoked for only RV32\n", __func__); 213 return -EINVAL; 214 } 215 216 if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) { 217 pr_warn("Invalid counter id [%ld]during read\n", cidx); 218 return -EINVAL; 219 } 220 221 pmc = &kvpmu->pmc[cidx]; 222 223 if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW) 224 return -EINVAL; 225 226 fevent_code = get_event_code(pmc->event_idx); 227 pmc->counter_val = kvpmu->fw_event[fevent_code].value; 228 229 *out_val = pmc->counter_val >> 32; 230 231 return 0; 232 } 233 234 static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, 235 unsigned long *out_val) 236 { 237 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 238 struct kvm_pmc *pmc; 239 u64 enabled, running; 240 int fevent_code; 241 242 if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) { 243 pr_warn("Invalid counter id [%ld] during read\n", cidx); 244 return -EINVAL; 245 } 246 247 pmc = &kvpmu->pmc[cidx]; 248 249 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { 250 fevent_code = get_event_code(pmc->event_idx); 251 pmc->counter_val = kvpmu->fw_event[fevent_code].value; 252 } else if (pmc->perf_event) { 253 pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running); 254 } else { 255 return -EINVAL; 256 } 257 *out_val = pmc->counter_val; 258 259 return 0; 260 } 261 262 static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base, 263 unsigned long ctr_mask) 264 { 265 /* Make sure the we have a valid counter mask requested from the caller */ 266 if (!ctr_mask || (ctr_base + __fls(ctr_mask) >= kvm_pmu_num_counters(kvpmu))) 267 return -EINVAL; 268 269 return 0; 270 } 271 272 static void kvm_riscv_pmu_overflow(struct perf_event *perf_event, 273 struct perf_sample_data *data, 274 struct pt_regs *regs) 275 { 276 struct kvm_pmc *pmc = perf_event->overflow_handler_context; 277 struct kvm_vcpu *vcpu = pmc->vcpu; 278 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 279 struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu); 280 u64 period; 281 282 /* 283 * Stop the event counting by directly accessing the perf_event. 284 * Otherwise, this needs to deferred via a workqueue. 285 * That will introduce skew in the counter value because the actual 286 * physical counter would start after returning from this function. 287 * It will be stopped again once the workqueue is scheduled 288 */ 289 rpmu->pmu.stop(perf_event, PERF_EF_UPDATE); 290 291 /* 292 * The hw counter would start automatically when this function returns. 293 * Thus, the host may continue to interrupt and inject it to the guest 294 * even without the guest configuring the next event. Depending on the hardware 295 * the host may have some sluggishness only if privilege mode filtering is not 296 * available. In an ideal world, where qemu is not the only capable hardware, 297 * this can be removed. 298 * FYI: ARM64 does this way while x86 doesn't do anything as such. 299 * TODO: Should we keep it for RISC-V ? 300 */ 301 period = -(local64_read(&perf_event->count)); 302 303 local64_set(&perf_event->hw.period_left, 0); 304 perf_event->attr.sample_period = period; 305 perf_event->hw.sample_period = period; 306 307 set_bit(pmc->idx, kvpmu->pmc_overflown); 308 kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF); 309 310 rpmu->pmu.start(perf_event, PERF_EF_RELOAD); 311 } 312 313 static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr, 314 unsigned long flags, unsigned long eidx, 315 unsigned long evtdata) 316 { 317 struct perf_event *event; 318 319 kvm_pmu_release_perf_event(pmc); 320 attr->config = kvm_pmu_get_perf_event_config(eidx, evtdata); 321 if (flags & SBI_PMU_CFG_FLAG_CLEAR_VALUE) { 322 //TODO: Do we really want to clear the value in hardware counter 323 pmc->counter_val = 0; 324 } 325 326 /* 327 * Set the default sample_period for now. The guest specified value 328 * will be updated in the start call. 329 */ 330 attr->sample_period = kvm_pmu_get_sample_period(pmc); 331 332 event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc); 333 if (IS_ERR(event)) { 334 pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); 335 return PTR_ERR(event); 336 } 337 338 pmc->perf_event = event; 339 if (flags & SBI_PMU_CFG_FLAG_AUTO_START) 340 perf_event_enable(pmc->perf_event); 341 342 return 0; 343 } 344 345 int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid) 346 { 347 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 348 struct kvm_fw_event *fevent; 349 350 if (!kvpmu || fid >= SBI_PMU_FW_MAX) 351 return -EINVAL; 352 353 fevent = &kvpmu->fw_event[fid]; 354 if (fevent->started) 355 fevent->value++; 356 357 return 0; 358 } 359 360 int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num, 361 unsigned long *val, unsigned long new_val, 362 unsigned long wr_mask) 363 { 364 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 365 int cidx, ret = KVM_INSN_CONTINUE_NEXT_SEPC; 366 367 if (!kvpmu || !kvpmu->init_done) { 368 /* 369 * In absence of sscofpmf in the platform, the guest OS may use 370 * the legacy PMU driver to read cycle/instret. In that case, 371 * just return 0 to avoid any illegal trap. However, any other 372 * hpmcounter access should result in illegal trap as they must 373 * be access through SBI PMU only. 374 */ 375 if (csr_num == CSR_CYCLE || csr_num == CSR_INSTRET) { 376 *val = 0; 377 return ret; 378 } else { 379 return KVM_INSN_ILLEGAL_TRAP; 380 } 381 } 382 383 /* The counter CSR are read only. Thus, any write should result in illegal traps */ 384 if (wr_mask) 385 return KVM_INSN_ILLEGAL_TRAP; 386 387 cidx = csr_num - CSR_CYCLE; 388 389 if (pmu_ctr_read(vcpu, cidx, val) < 0) 390 return KVM_INSN_ILLEGAL_TRAP; 391 392 return ret; 393 } 394 395 static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu) 396 { 397 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 398 399 kfree(kvpmu->sdata); 400 kvpmu->sdata = NULL; 401 kvpmu->snapshot_addr = INVALID_GPA; 402 } 403 404 int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low, 405 unsigned long saddr_high, unsigned long flags, 406 struct kvm_vcpu_sbi_return *retdata) 407 { 408 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 409 int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); 410 int sbiret = 0; 411 gpa_t saddr; 412 413 if (!kvpmu || flags) { 414 sbiret = SBI_ERR_INVALID_PARAM; 415 goto out; 416 } 417 418 if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) { 419 kvm_pmu_clear_snapshot_area(vcpu); 420 return 0; 421 } 422 423 saddr = saddr_low; 424 425 if (saddr_high != 0) { 426 if (IS_ENABLED(CONFIG_32BIT)) 427 saddr |= ((gpa_t)saddr_high << 32); 428 else 429 sbiret = SBI_ERR_INVALID_ADDRESS; 430 goto out; 431 } 432 433 kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC); 434 if (!kvpmu->sdata) 435 return -ENOMEM; 436 437 /* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */ 438 if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) { 439 kfree(kvpmu->sdata); 440 sbiret = SBI_ERR_INVALID_ADDRESS; 441 goto out; 442 } 443 444 kvpmu->snapshot_addr = saddr; 445 446 out: 447 retdata->err_val = sbiret; 448 449 return 0; 450 } 451 452 int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low, 453 unsigned long saddr_high, unsigned long num_events, 454 unsigned long flags, struct kvm_vcpu_sbi_return *retdata) 455 { 456 struct riscv_pmu_event_info *einfo = NULL; 457 int shmem_size = num_events * sizeof(*einfo); 458 gpa_t shmem; 459 u32 eidx, etype; 460 u64 econfig; 461 int ret; 462 463 if (flags != 0 || (saddr_low & (SZ_16 - 1) || num_events == 0)) { 464 ret = SBI_ERR_INVALID_PARAM; 465 goto out; 466 } 467 468 shmem = saddr_low; 469 if (saddr_high != 0) { 470 if (IS_ENABLED(CONFIG_32BIT)) { 471 shmem |= ((gpa_t)saddr_high << 32); 472 } else { 473 ret = SBI_ERR_INVALID_ADDRESS; 474 goto out; 475 } 476 } 477 478 einfo = kzalloc(shmem_size, GFP_KERNEL); 479 if (!einfo) 480 return -ENOMEM; 481 482 ret = kvm_vcpu_read_guest(vcpu, shmem, einfo, shmem_size); 483 if (ret) { 484 ret = SBI_ERR_FAILURE; 485 goto free_mem; 486 } 487 488 for (int i = 0; i < num_events; i++) { 489 eidx = einfo[i].event_idx; 490 etype = kvm_pmu_get_perf_event_type(eidx); 491 econfig = kvm_pmu_get_perf_event_config(eidx, einfo[i].event_data); 492 ret = riscv_pmu_get_event_info(etype, econfig, NULL); 493 einfo[i].output = (ret > 0) ? 1 : 0; 494 } 495 496 ret = kvm_vcpu_write_guest(vcpu, shmem, einfo, shmem_size); 497 if (ret) { 498 ret = SBI_ERR_INVALID_ADDRESS; 499 goto free_mem; 500 } 501 502 ret = 0; 503 free_mem: 504 kfree(einfo); 505 out: 506 retdata->err_val = ret; 507 508 return 0; 509 } 510 511 int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu, 512 struct kvm_vcpu_sbi_return *retdata) 513 { 514 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 515 516 retdata->out_val = kvm_pmu_num_counters(kvpmu); 517 518 return 0; 519 } 520 521 int kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu *vcpu, unsigned long cidx, 522 struct kvm_vcpu_sbi_return *retdata) 523 { 524 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 525 526 if (cidx > RISCV_KVM_MAX_COUNTERS || cidx == 1) { 527 retdata->err_val = SBI_ERR_INVALID_PARAM; 528 return 0; 529 } 530 531 retdata->out_val = kvpmu->pmc[cidx].cinfo.value; 532 533 return 0; 534 } 535 536 int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base, 537 unsigned long ctr_mask, unsigned long flags, u64 ival, 538 struct kvm_vcpu_sbi_return *retdata) 539 { 540 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 541 int i, pmc_index, sbiret = 0; 542 struct kvm_pmc *pmc; 543 int fevent_code; 544 bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT; 545 546 if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { 547 sbiret = SBI_ERR_INVALID_PARAM; 548 goto out; 549 } 550 551 if (snap_flag_set) { 552 if (kvpmu->snapshot_addr == INVALID_GPA) { 553 sbiret = SBI_ERR_NO_SHMEM; 554 goto out; 555 } 556 if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, 557 sizeof(struct riscv_pmu_snapshot_data))) { 558 pr_warn("Unable to read snapshot shared memory while starting counters\n"); 559 sbiret = SBI_ERR_FAILURE; 560 goto out; 561 } 562 } 563 /* Start the counters that have been configured and requested by the guest */ 564 for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { 565 pmc_index = i + ctr_base; 566 if (!test_bit(pmc_index, kvpmu->pmc_in_use)) 567 continue; 568 /* The guest started the counter again. Reset the overflow status */ 569 clear_bit(pmc_index, kvpmu->pmc_overflown); 570 pmc = &kvpmu->pmc[pmc_index]; 571 if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) { 572 pmc->counter_val = ival; 573 } else if (snap_flag_set) { 574 /* The counter index in the snapshot are relative to the counter base */ 575 pmc->counter_val = kvpmu->sdata->ctr_values[i]; 576 } 577 578 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { 579 fevent_code = get_event_code(pmc->event_idx); 580 if (fevent_code >= SBI_PMU_FW_MAX) { 581 sbiret = SBI_ERR_INVALID_PARAM; 582 goto out; 583 } 584 585 /* Check if the counter was already started for some reason */ 586 if (kvpmu->fw_event[fevent_code].started) { 587 sbiret = SBI_ERR_ALREADY_STARTED; 588 continue; 589 } 590 591 kvpmu->fw_event[fevent_code].started = true; 592 kvpmu->fw_event[fevent_code].value = pmc->counter_val; 593 } else if (pmc->perf_event) { 594 if (unlikely(pmc->started)) { 595 sbiret = SBI_ERR_ALREADY_STARTED; 596 continue; 597 } 598 perf_event_period(pmc->perf_event, kvm_pmu_get_sample_period(pmc)); 599 perf_event_enable(pmc->perf_event); 600 pmc->started = true; 601 } else { 602 sbiret = SBI_ERR_INVALID_PARAM; 603 } 604 } 605 606 out: 607 retdata->err_val = sbiret; 608 609 return 0; 610 } 611 612 int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, 613 unsigned long ctr_mask, unsigned long flags, 614 struct kvm_vcpu_sbi_return *retdata) 615 { 616 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 617 int i, pmc_index, sbiret = 0; 618 u64 enabled, running; 619 struct kvm_pmc *pmc; 620 int fevent_code; 621 bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT; 622 bool shmem_needs_update = false; 623 624 if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { 625 sbiret = SBI_ERR_INVALID_PARAM; 626 goto out; 627 } 628 629 if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) { 630 sbiret = SBI_ERR_NO_SHMEM; 631 goto out; 632 } 633 634 /* Stop the counters that have been configured and requested by the guest */ 635 for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { 636 pmc_index = i + ctr_base; 637 if (!test_bit(pmc_index, kvpmu->pmc_in_use)) 638 continue; 639 pmc = &kvpmu->pmc[pmc_index]; 640 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { 641 fevent_code = get_event_code(pmc->event_idx); 642 if (fevent_code >= SBI_PMU_FW_MAX) { 643 sbiret = SBI_ERR_INVALID_PARAM; 644 goto out; 645 } 646 647 if (!kvpmu->fw_event[fevent_code].started) 648 sbiret = SBI_ERR_ALREADY_STOPPED; 649 650 kvpmu->fw_event[fevent_code].started = false; 651 } else if (pmc->perf_event) { 652 if (pmc->started) { 653 /* Stop counting the counter */ 654 perf_event_disable(pmc->perf_event); 655 pmc->started = false; 656 } else { 657 sbiret = SBI_ERR_ALREADY_STOPPED; 658 } 659 660 if (flags & SBI_PMU_STOP_FLAG_RESET) 661 /* Release the counter if this is a reset request */ 662 kvm_pmu_release_perf_event(pmc); 663 } else { 664 sbiret = SBI_ERR_INVALID_PARAM; 665 } 666 667 if (snap_flag_set && !sbiret) { 668 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) 669 pmc->counter_val = kvpmu->fw_event[fevent_code].value; 670 else if (pmc->perf_event) 671 pmc->counter_val += perf_event_read_value(pmc->perf_event, 672 &enabled, &running); 673 /* 674 * The counter and overflow indicies in the snapshot region are w.r.to 675 * cbase. Modify the set bit in the counter mask instead of the pmc_index 676 * which indicates the absolute counter index. 677 */ 678 if (test_bit(pmc_index, kvpmu->pmc_overflown)) 679 kvpmu->sdata->ctr_overflow_mask |= BIT(i); 680 kvpmu->sdata->ctr_values[i] = pmc->counter_val; 681 shmem_needs_update = true; 682 } 683 684 if (flags & SBI_PMU_STOP_FLAG_RESET) { 685 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; 686 clear_bit(pmc_index, kvpmu->pmc_in_use); 687 clear_bit(pmc_index, kvpmu->pmc_overflown); 688 if (snap_flag_set) { 689 /* 690 * Only clear the given counter as the caller is responsible to 691 * validate both the overflow mask and configured counters. 692 */ 693 kvpmu->sdata->ctr_overflow_mask &= ~BIT(i); 694 shmem_needs_update = true; 695 } 696 } 697 } 698 699 if (shmem_needs_update) 700 kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, 701 sizeof(struct riscv_pmu_snapshot_data)); 702 703 out: 704 retdata->err_val = sbiret; 705 706 return 0; 707 } 708 709 int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_base, 710 unsigned long ctr_mask, unsigned long flags, 711 unsigned long eidx, u64 evtdata, 712 struct kvm_vcpu_sbi_return *retdata) 713 { 714 int ctr_idx, sbiret = 0; 715 long ret; 716 bool is_fevent; 717 unsigned long event_code; 718 u32 etype = kvm_pmu_get_perf_event_type(eidx); 719 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 720 struct kvm_pmc *pmc = NULL; 721 struct perf_event_attr attr = { 722 .type = etype, 723 .size = sizeof(struct perf_event_attr), 724 .pinned = true, 725 .disabled = true, 726 /* 727 * It should never reach here if the platform doesn't support the sscofpmf 728 * extension as mode filtering won't work without it. 729 */ 730 .exclude_host = true, 731 .exclude_hv = true, 732 .exclude_user = !!(flags & SBI_PMU_CFG_FLAG_SET_UINH), 733 .exclude_kernel = !!(flags & SBI_PMU_CFG_FLAG_SET_SINH), 734 .config1 = RISCV_PMU_CONFIG1_GUEST_EVENTS, 735 }; 736 737 if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { 738 sbiret = SBI_ERR_INVALID_PARAM; 739 goto out; 740 } 741 742 event_code = get_event_code(eidx); 743 is_fevent = kvm_pmu_is_fw_event(eidx); 744 if (is_fevent && event_code >= SBI_PMU_FW_MAX) { 745 sbiret = SBI_ERR_NOT_SUPPORTED; 746 goto out; 747 } 748 749 /* 750 * SKIP_MATCH flag indicates the caller is aware of the assigned counter 751 * for this event. Just do a sanity check if it already marked used. 752 */ 753 if (flags & SBI_PMU_CFG_FLAG_SKIP_MATCH) { 754 if (!test_bit(ctr_base + __ffs(ctr_mask), kvpmu->pmc_in_use)) { 755 sbiret = SBI_ERR_FAILURE; 756 goto out; 757 } 758 ctr_idx = ctr_base + __ffs(ctr_mask); 759 } else { 760 ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask); 761 if (ctr_idx < 0) { 762 sbiret = SBI_ERR_NOT_SUPPORTED; 763 goto out; 764 } 765 } 766 767 pmc = &kvpmu->pmc[ctr_idx]; 768 pmc->idx = ctr_idx; 769 770 if (is_fevent) { 771 if (flags & SBI_PMU_CFG_FLAG_AUTO_START) 772 kvpmu->fw_event[event_code].started = true; 773 } else { 774 ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata); 775 if (ret) { 776 sbiret = SBI_ERR_NOT_SUPPORTED; 777 goto out; 778 } 779 } 780 781 set_bit(ctr_idx, kvpmu->pmc_in_use); 782 pmc->event_idx = eidx; 783 retdata->out_val = ctr_idx; 784 out: 785 retdata->err_val = sbiret; 786 787 return 0; 788 } 789 790 int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, 791 struct kvm_vcpu_sbi_return *retdata) 792 { 793 int ret; 794 795 ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val); 796 if (ret == -EINVAL) 797 retdata->err_val = SBI_ERR_INVALID_PARAM; 798 799 return 0; 800 } 801 802 int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, 803 struct kvm_vcpu_sbi_return *retdata) 804 { 805 int ret; 806 807 ret = pmu_ctr_read(vcpu, cidx, &retdata->out_val); 808 if (ret == -EINVAL) 809 retdata->err_val = SBI_ERR_INVALID_PARAM; 810 811 return 0; 812 } 813 814 void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) 815 { 816 int i = 0, ret, num_hw_ctrs = 0, hpm_width = 0; 817 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 818 struct kvm_pmc *pmc; 819 820 /* 821 * PMU functionality should be only available to guests if privilege mode 822 * filtering is available in the host. Otherwise, guest will always count 823 * events while the execution is in hypervisor mode. 824 */ 825 if (!riscv_isa_extension_available(NULL, SSCOFPMF)) 826 return; 827 828 ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs); 829 if (ret < 0 || !hpm_width || !num_hw_ctrs) 830 return; 831 832 /* 833 * Increase the number of hardware counters to offset the time counter. 834 */ 835 kvpmu->num_hw_ctrs = num_hw_ctrs + 1; 836 kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX; 837 memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); 838 kvpmu->snapshot_addr = INVALID_GPA; 839 840 if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) { 841 pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA"); 842 kvpmu->num_hw_ctrs = RISCV_KVM_MAX_HW_CTRS; 843 } 844 845 /* 846 * There is no correlation between the logical hardware counter and virtual counters. 847 * However, we need to encode a hpmcounter CSR in the counter info field so that 848 * KVM can trap n emulate the read. This works well in the migration use case as 849 * KVM doesn't care if the actual hpmcounter is available in the hardware or not. 850 */ 851 for (i = 0; i < kvm_pmu_num_counters(kvpmu); i++) { 852 /* TIME CSR shouldn't be read from perf interface */ 853 if (i == 1) 854 continue; 855 pmc = &kvpmu->pmc[i]; 856 pmc->idx = i; 857 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; 858 pmc->vcpu = vcpu; 859 if (i < kvpmu->num_hw_ctrs) { 860 pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW; 861 if (i < 3) 862 /* CY, IR counters */ 863 pmc->cinfo.width = 63; 864 else 865 pmc->cinfo.width = hpm_width; 866 /* 867 * The CSR number doesn't have any relation with the logical 868 * hardware counters. The CSR numbers are encoded sequentially 869 * to avoid maintaining a map between the virtual counter 870 * and CSR number. 871 */ 872 pmc->cinfo.csr = CSR_CYCLE + i; 873 } else { 874 pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW; 875 pmc->cinfo.width = 63; 876 } 877 } 878 879 kvpmu->init_done = true; 880 } 881 882 void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu) 883 { 884 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 885 struct kvm_pmc *pmc; 886 int i; 887 888 if (!kvpmu) 889 return; 890 891 for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) { 892 pmc = &kvpmu->pmc[i]; 893 pmc->counter_val = 0; 894 kvm_pmu_release_perf_event(pmc); 895 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; 896 } 897 bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS); 898 bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS); 899 memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); 900 kvm_pmu_clear_snapshot_area(vcpu); 901 } 902 903 void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu) 904 { 905 kvm_riscv_vcpu_pmu_deinit(vcpu); 906 } 907