1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2023 Rivos Inc 4 * 5 * Authors: 6 * Atish Patra <atishp@rivosinc.com> 7 */ 8 9 #define pr_fmt(fmt) "riscv-kvm-pmu: " fmt 10 #include <linux/bitops.h> 11 #include <linux/errno.h> 12 #include <linux/err.h> 13 #include <linux/kvm_host.h> 14 #include <linux/nospec.h> 15 #include <linux/perf/riscv_pmu.h> 16 #include <asm/csr.h> 17 #include <asm/kvm_isa.h> 18 #include <asm/kvm_vcpu_sbi.h> 19 #include <asm/kvm_vcpu_pmu.h> 20 #include <asm/sbi.h> 21 22 #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs) 23 #define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16) 24 #define get_event_code(x) ((x) & SBI_PMU_EVENT_IDX_CODE_MASK) 25 26 static enum perf_hw_id hw_event_perf_map[SBI_PMU_HW_GENERAL_MAX] = { 27 [SBI_PMU_HW_CPU_CYCLES] = PERF_COUNT_HW_CPU_CYCLES, 28 [SBI_PMU_HW_INSTRUCTIONS] = PERF_COUNT_HW_INSTRUCTIONS, 29 [SBI_PMU_HW_CACHE_REFERENCES] = PERF_COUNT_HW_CACHE_REFERENCES, 30 [SBI_PMU_HW_CACHE_MISSES] = PERF_COUNT_HW_CACHE_MISSES, 31 [SBI_PMU_HW_BRANCH_INSTRUCTIONS] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS, 32 [SBI_PMU_HW_BRANCH_MISSES] = PERF_COUNT_HW_BRANCH_MISSES, 33 [SBI_PMU_HW_BUS_CYCLES] = PERF_COUNT_HW_BUS_CYCLES, 34 [SBI_PMU_HW_STALLED_CYCLES_FRONTEND] = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, 35 [SBI_PMU_HW_STALLED_CYCLES_BACKEND] = PERF_COUNT_HW_STALLED_CYCLES_BACKEND, 36 [SBI_PMU_HW_REF_CPU_CYCLES] = PERF_COUNT_HW_REF_CPU_CYCLES, 37 }; 38 39 static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc) 40 { 41 u64 counter_val_mask = GENMASK(pmc->cinfo.width, 0); 42 u64 sample_period; 43 44 if (!pmc->counter_val) 45 sample_period = counter_val_mask; 46 else 47 sample_period = (-pmc->counter_val) & counter_val_mask; 48 49 return sample_period; 50 } 51 52 static u32 kvm_pmu_get_perf_event_type(unsigned long eidx) 53 { 54 enum sbi_pmu_event_type etype = get_event_type(eidx); 55 u32 type = PERF_TYPE_MAX; 56 57 switch (etype) { 58 case SBI_PMU_EVENT_TYPE_HW: 59 type = PERF_TYPE_HARDWARE; 60 break; 61 case SBI_PMU_EVENT_TYPE_CACHE: 62 type = PERF_TYPE_HW_CACHE; 63 break; 64 case SBI_PMU_EVENT_TYPE_RAW: 65 case SBI_PMU_EVENT_TYPE_RAW_V2: 66 case SBI_PMU_EVENT_TYPE_FW: 67 type = PERF_TYPE_RAW; 68 break; 69 default: 70 break; 71 } 72 73 return type; 74 } 75 76 static bool kvm_pmu_is_fw_event(unsigned long eidx) 77 { 78 return get_event_type(eidx) == SBI_PMU_EVENT_TYPE_FW; 79 } 80 81 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) 82 { 83 if (pmc->perf_event) { 84 perf_event_disable(pmc->perf_event); 85 perf_event_release_kernel(pmc->perf_event); 86 pmc->perf_event = NULL; 87 } 88 } 89 90 static u64 kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code) 91 { 92 return hw_event_perf_map[array_index_nospec(sbi_event_code, 93 SBI_PMU_HW_GENERAL_MAX)]; 94 } 95 96 static u64 kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code) 97 { 98 u64 config = U64_MAX; 99 unsigned int cache_type, cache_op, cache_result; 100 101 /* All the cache event masks lie within 0xFF. No separate masking is necessary */ 102 cache_type = (sbi_event_code & SBI_PMU_EVENT_CACHE_ID_CODE_MASK) >> 103 SBI_PMU_EVENT_CACHE_ID_SHIFT; 104 cache_op = (sbi_event_code & SBI_PMU_EVENT_CACHE_OP_ID_CODE_MASK) >> 105 SBI_PMU_EVENT_CACHE_OP_SHIFT; 106 cache_result = sbi_event_code & SBI_PMU_EVENT_CACHE_RESULT_ID_CODE_MASK; 107 108 if (cache_type >= PERF_COUNT_HW_CACHE_MAX || 109 cache_op >= PERF_COUNT_HW_CACHE_OP_MAX || 110 cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 111 return config; 112 113 config = cache_type | (cache_op << 8) | (cache_result << 16); 114 115 return config; 116 } 117 118 static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data) 119 { 120 enum sbi_pmu_event_type etype = get_event_type(eidx); 121 u32 ecode = get_event_code(eidx); 122 u64 config = U64_MAX; 123 124 switch (etype) { 125 case SBI_PMU_EVENT_TYPE_HW: 126 if (ecode < SBI_PMU_HW_GENERAL_MAX) 127 config = kvm_pmu_get_perf_event_hw_config(ecode); 128 break; 129 case SBI_PMU_EVENT_TYPE_CACHE: 130 config = kvm_pmu_get_perf_event_cache_config(ecode); 131 break; 132 case SBI_PMU_EVENT_TYPE_RAW: 133 config = evt_data & RISCV_PMU_RAW_EVENT_MASK; 134 break; 135 case SBI_PMU_EVENT_TYPE_RAW_V2: 136 config = evt_data & RISCV_PMU_RAW_EVENT_V2_MASK; 137 break; 138 case SBI_PMU_EVENT_TYPE_FW: 139 if (ecode < SBI_PMU_FW_MAX) 140 config = (1ULL << 63) | ecode; 141 break; 142 default: 143 break; 144 } 145 146 return config; 147 } 148 149 static int kvm_pmu_get_fixed_pmc_index(unsigned long eidx) 150 { 151 u32 etype = kvm_pmu_get_perf_event_type(eidx); 152 u32 ecode = get_event_code(eidx); 153 154 if (etype != SBI_PMU_EVENT_TYPE_HW) 155 return -EINVAL; 156 157 if (ecode == SBI_PMU_HW_CPU_CYCLES) 158 return 0; 159 else if (ecode == SBI_PMU_HW_INSTRUCTIONS) 160 return 2; 161 else 162 return -EINVAL; 163 } 164 165 static int kvm_pmu_get_programmable_pmc_index(struct kvm_pmu *kvpmu, unsigned long eidx, 166 unsigned long cbase, unsigned long cmask) 167 { 168 int ctr_idx = -1; 169 int i, pmc_idx; 170 int min, max; 171 172 if (kvm_pmu_is_fw_event(eidx)) { 173 /* Firmware counters are mapped 1:1 starting from num_hw_ctrs for simplicity */ 174 min = kvpmu->num_hw_ctrs; 175 max = min + kvpmu->num_fw_ctrs; 176 } else { 177 /* First 3 counters are reserved for fixed counters */ 178 min = 3; 179 max = kvpmu->num_hw_ctrs; 180 } 181 182 for_each_set_bit(i, &cmask, BITS_PER_LONG) { 183 pmc_idx = i + cbase; 184 if ((pmc_idx >= min && pmc_idx < max) && 185 !test_bit(pmc_idx, kvpmu->pmc_in_use)) { 186 ctr_idx = pmc_idx; 187 break; 188 } 189 } 190 191 return ctr_idx; 192 } 193 194 static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx, 195 unsigned long cbase, unsigned long cmask) 196 { 197 int ret; 198 199 /* Fixed counters need to be have fixed mapping as they have different width */ 200 ret = kvm_pmu_get_fixed_pmc_index(eidx); 201 if (ret >= 0) 202 return ret; 203 204 return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask); 205 } 206 207 static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, 208 unsigned long *out_val) 209 { 210 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 211 struct kvm_pmc *pmc; 212 int fevent_code; 213 214 if (!IS_ENABLED(CONFIG_32BIT)) { 215 pr_warn("%s: should be invoked for only RV32\n", __func__); 216 return -EINVAL; 217 } 218 219 if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) { 220 pr_warn("Invalid counter id [%ld]during read\n", cidx); 221 return -EINVAL; 222 } 223 224 cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS); 225 pmc = &kvpmu->pmc[cidx]; 226 227 if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW) 228 return -EINVAL; 229 230 if (pmc->event_idx == SBI_PMU_EVENT_IDX_INVALID) 231 return -EINVAL; 232 233 fevent_code = get_event_code(pmc->event_idx); 234 if (WARN_ONCE(fevent_code >= SBI_PMU_FW_MAX, 235 "Invalid firmware event code: %d\n", fevent_code)) 236 return -EINVAL; 237 238 pmc->counter_val = kvpmu->fw_event[fevent_code].value; 239 240 *out_val = pmc->counter_val >> 32; 241 242 return 0; 243 } 244 245 static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, 246 unsigned long *out_val) 247 { 248 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 249 struct kvm_pmc *pmc; 250 u64 enabled, running; 251 int fevent_code; 252 253 if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) { 254 pr_warn("Invalid counter id [%ld] during read\n", cidx); 255 return -EINVAL; 256 } 257 258 cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS); 259 pmc = &kvpmu->pmc[cidx]; 260 261 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { 262 if (pmc->event_idx == SBI_PMU_EVENT_IDX_INVALID) 263 return -EINVAL; 264 265 fevent_code = get_event_code(pmc->event_idx); 266 if (WARN_ONCE(fevent_code >= SBI_PMU_FW_MAX, 267 "Invalid firmware event code: %d\n", fevent_code)) 268 return -EINVAL; 269 270 pmc->counter_val = kvpmu->fw_event[fevent_code].value; 271 } else if (pmc->perf_event) { 272 pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running); 273 } else { 274 return -EINVAL; 275 } 276 *out_val = pmc->counter_val; 277 278 return 0; 279 } 280 281 static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base, 282 unsigned long ctr_mask) 283 { 284 unsigned long num_ctrs = kvm_pmu_num_counters(kvpmu); 285 286 /* Make sure we have a valid counter mask requested from the caller */ 287 if (!ctr_mask || ctr_base >= num_ctrs || (ctr_base + __fls(ctr_mask) >= num_ctrs)) 288 return -EINVAL; 289 290 return 0; 291 } 292 293 static void kvm_riscv_pmu_overflow(struct perf_event *perf_event, 294 struct perf_sample_data *data, 295 struct pt_regs *regs) 296 { 297 struct kvm_pmc *pmc = perf_event->overflow_handler_context; 298 struct kvm_vcpu *vcpu = pmc->vcpu; 299 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 300 struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu); 301 u64 period; 302 303 /* 304 * Stop the event counting by directly accessing the perf_event. 305 * Otherwise, this needs to deferred via a workqueue. 306 * That will introduce skew in the counter value because the actual 307 * physical counter would start after returning from this function. 308 * It will be stopped again once the workqueue is scheduled 309 */ 310 rpmu->pmu.stop(perf_event, PERF_EF_UPDATE); 311 312 /* 313 * The hw counter would start automatically when this function returns. 314 * Thus, the host may continue to interrupt and inject it to the guest 315 * even without the guest configuring the next event. Depending on the hardware 316 * the host may have some sluggishness only if privilege mode filtering is not 317 * available. In an ideal world, where qemu is not the only capable hardware, 318 * this can be removed. 319 * FYI: ARM64 does this way while x86 doesn't do anything as such. 320 * TODO: Should we keep it for RISC-V ? 321 */ 322 period = -(local64_read(&perf_event->count)); 323 324 local64_set(&perf_event->hw.period_left, 0); 325 perf_event->attr.sample_period = period; 326 perf_event->hw.sample_period = period; 327 328 set_bit(pmc->idx, kvpmu->pmc_overflown); 329 kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF); 330 331 rpmu->pmu.start(perf_event, PERF_EF_RELOAD); 332 } 333 334 static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr, 335 unsigned long flags, unsigned long eidx, 336 unsigned long evtdata) 337 { 338 struct perf_event *event; 339 340 kvm_pmu_release_perf_event(pmc); 341 attr->config = kvm_pmu_get_perf_event_config(eidx, evtdata); 342 if (flags & SBI_PMU_CFG_FLAG_CLEAR_VALUE) { 343 //TODO: Do we really want to clear the value in hardware counter 344 pmc->counter_val = 0; 345 } 346 347 /* 348 * Set the default sample_period for now. The guest specified value 349 * will be updated in the start call. 350 */ 351 attr->sample_period = kvm_pmu_get_sample_period(pmc); 352 353 event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc); 354 if (IS_ERR(event)) { 355 pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); 356 return PTR_ERR(event); 357 } 358 359 pmc->perf_event = event; 360 if (flags & SBI_PMU_CFG_FLAG_AUTO_START) 361 perf_event_enable(pmc->perf_event); 362 363 return 0; 364 } 365 366 int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid) 367 { 368 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 369 struct kvm_fw_event *fevent; 370 371 if (!kvpmu || fid >= SBI_PMU_FW_MAX) 372 return -EINVAL; 373 374 fevent = &kvpmu->fw_event[fid]; 375 if (fevent->started) 376 fevent->value++; 377 378 return 0; 379 } 380 381 int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num, 382 unsigned long *val, unsigned long new_val, 383 unsigned long wr_mask) 384 { 385 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 386 int cidx, ret = KVM_INSN_CONTINUE_NEXT_SEPC; 387 388 if (!kvpmu || !kvpmu->init_done) { 389 /* 390 * In absence of sscofpmf in the platform, the guest OS may use 391 * the legacy PMU driver to read cycle/instret. In that case, 392 * just return 0 to avoid any illegal trap. However, any other 393 * hpmcounter access should result in illegal trap as they must 394 * be access through SBI PMU only. 395 */ 396 if (csr_num == CSR_CYCLE || csr_num == CSR_INSTRET) { 397 *val = 0; 398 return ret; 399 } else { 400 return KVM_INSN_ILLEGAL_TRAP; 401 } 402 } 403 404 /* The counter CSR are read only. Thus, any write should result in illegal traps */ 405 if (wr_mask) 406 return KVM_INSN_ILLEGAL_TRAP; 407 408 cidx = csr_num - CSR_CYCLE; 409 410 if (pmu_ctr_read(vcpu, cidx, val) < 0) 411 return KVM_INSN_ILLEGAL_TRAP; 412 413 return ret; 414 } 415 416 static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu) 417 { 418 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 419 420 kfree(kvpmu->sdata); 421 kvpmu->sdata = NULL; 422 kvpmu->snapshot_addr = INVALID_GPA; 423 } 424 425 int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low, 426 unsigned long saddr_high, unsigned long flags, 427 struct kvm_vcpu_sbi_return *retdata) 428 { 429 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 430 int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); 431 int sbiret = 0; 432 gpa_t saddr; 433 434 if (!kvpmu || flags) { 435 sbiret = SBI_ERR_INVALID_PARAM; 436 goto out; 437 } 438 439 if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) { 440 kvm_pmu_clear_snapshot_area(vcpu); 441 return 0; 442 } 443 444 saddr = saddr_low; 445 446 if (saddr_high != 0) { 447 if (IS_ENABLED(CONFIG_32BIT)) { 448 saddr |= ((gpa_t)saddr_high << 32); 449 } else { 450 sbiret = SBI_ERR_INVALID_ADDRESS; 451 goto out; 452 } 453 } 454 455 kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC); 456 if (!kvpmu->sdata) { 457 sbiret = SBI_ERR_FAILURE; 458 goto out; 459 } 460 461 /* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */ 462 if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) { 463 kfree(kvpmu->sdata); 464 kvpmu->sdata = NULL; 465 sbiret = SBI_ERR_INVALID_ADDRESS; 466 goto out; 467 } 468 469 kvpmu->snapshot_addr = saddr; 470 471 out: 472 retdata->err_val = sbiret; 473 474 return 0; 475 } 476 477 int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low, 478 unsigned long saddr_high, unsigned long num_events, 479 unsigned long flags, struct kvm_vcpu_sbi_return *retdata) 480 { 481 struct riscv_pmu_event_info *einfo = NULL; 482 int shmem_size = num_events * sizeof(*einfo); 483 gpa_t shmem; 484 u32 eidx, etype; 485 u64 econfig; 486 int ret; 487 488 if (flags != 0 || (saddr_low & (SZ_16 - 1) || num_events == 0)) { 489 ret = SBI_ERR_INVALID_PARAM; 490 goto out; 491 } 492 493 shmem = saddr_low; 494 if (saddr_high != 0) { 495 if (IS_ENABLED(CONFIG_32BIT)) { 496 shmem |= ((gpa_t)saddr_high << 32); 497 } else { 498 ret = SBI_ERR_INVALID_ADDRESS; 499 goto out; 500 } 501 } 502 503 einfo = kzalloc(shmem_size, GFP_KERNEL); 504 if (!einfo) { 505 ret = SBI_ERR_FAILURE; 506 goto out; 507 } 508 509 ret = kvm_vcpu_read_guest(vcpu, shmem, einfo, shmem_size); 510 if (ret) { 511 ret = SBI_ERR_FAILURE; 512 goto free_mem; 513 } 514 515 for (int i = 0; i < num_events; i++) { 516 eidx = einfo[i].event_idx; 517 etype = kvm_pmu_get_perf_event_type(eidx); 518 econfig = kvm_pmu_get_perf_event_config(eidx, einfo[i].event_data); 519 ret = riscv_pmu_get_event_info(etype, econfig, NULL); 520 einfo[i].output = (ret > 0) ? 1 : 0; 521 } 522 523 ret = kvm_vcpu_write_guest(vcpu, shmem, einfo, shmem_size); 524 if (ret) 525 ret = SBI_ERR_INVALID_ADDRESS; 526 527 free_mem: 528 kfree(einfo); 529 out: 530 retdata->err_val = ret; 531 532 return 0; 533 } 534 535 int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu, 536 struct kvm_vcpu_sbi_return *retdata) 537 { 538 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 539 540 retdata->out_val = kvm_pmu_num_counters(kvpmu); 541 542 return 0; 543 } 544 545 int kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu *vcpu, unsigned long cidx, 546 struct kvm_vcpu_sbi_return *retdata) 547 { 548 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 549 550 if (cidx >= RISCV_KVM_MAX_COUNTERS || cidx == 1) { 551 retdata->err_val = SBI_ERR_INVALID_PARAM; 552 return 0; 553 } 554 555 cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS); 556 retdata->out_val = kvpmu->pmc[cidx].cinfo.value; 557 558 return 0; 559 } 560 561 int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base, 562 unsigned long ctr_mask, unsigned long flags, u64 ival, 563 struct kvm_vcpu_sbi_return *retdata) 564 { 565 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 566 int i, pmc_index, sbiret = 0; 567 struct kvm_pmc *pmc; 568 int fevent_code; 569 bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT; 570 571 if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { 572 sbiret = SBI_ERR_INVALID_PARAM; 573 goto out; 574 } 575 576 if (snap_flag_set) { 577 if (kvpmu->snapshot_addr == INVALID_GPA) { 578 sbiret = SBI_ERR_NO_SHMEM; 579 goto out; 580 } 581 if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, 582 sizeof(struct riscv_pmu_snapshot_data))) { 583 pr_warn("Unable to read snapshot shared memory while starting counters\n"); 584 sbiret = SBI_ERR_FAILURE; 585 goto out; 586 } 587 } 588 /* Start the counters that have been configured and requested by the guest */ 589 for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { 590 pmc_index = array_index_nospec(i + ctr_base, 591 RISCV_KVM_MAX_COUNTERS); 592 if (!test_bit(pmc_index, kvpmu->pmc_in_use)) 593 continue; 594 /* The guest started the counter again. Reset the overflow status */ 595 clear_bit(pmc_index, kvpmu->pmc_overflown); 596 pmc = &kvpmu->pmc[pmc_index]; 597 if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) { 598 pmc->counter_val = ival; 599 } else if (snap_flag_set) { 600 /* The counter index in the snapshot are relative to the counter base */ 601 pmc->counter_val = kvpmu->sdata->ctr_values[i]; 602 } 603 604 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { 605 fevent_code = get_event_code(pmc->event_idx); 606 if (fevent_code >= SBI_PMU_FW_MAX) { 607 sbiret = SBI_ERR_INVALID_PARAM; 608 goto out; 609 } 610 611 /* Check if the counter was already started for some reason */ 612 if (kvpmu->fw_event[fevent_code].started) { 613 sbiret = SBI_ERR_ALREADY_STARTED; 614 continue; 615 } 616 617 kvpmu->fw_event[fevent_code].started = true; 618 kvpmu->fw_event[fevent_code].value = pmc->counter_val; 619 } else if (pmc->perf_event) { 620 if (unlikely(pmc->started)) { 621 sbiret = SBI_ERR_ALREADY_STARTED; 622 continue; 623 } 624 perf_event_period(pmc->perf_event, kvm_pmu_get_sample_period(pmc)); 625 perf_event_enable(pmc->perf_event); 626 pmc->started = true; 627 } else { 628 sbiret = SBI_ERR_INVALID_PARAM; 629 } 630 } 631 632 out: 633 retdata->err_val = sbiret; 634 635 return 0; 636 } 637 638 int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, 639 unsigned long ctr_mask, unsigned long flags, 640 struct kvm_vcpu_sbi_return *retdata) 641 { 642 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 643 int i, pmc_index, sbiret = 0; 644 u64 enabled, running; 645 struct kvm_pmc *pmc; 646 int fevent_code; 647 bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT; 648 bool shmem_needs_update = false; 649 650 if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { 651 sbiret = SBI_ERR_INVALID_PARAM; 652 goto out; 653 } 654 655 if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) { 656 sbiret = SBI_ERR_NO_SHMEM; 657 goto out; 658 } 659 660 /* Stop the counters that have been configured and requested by the guest */ 661 for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { 662 pmc_index = array_index_nospec(i + ctr_base, 663 RISCV_KVM_MAX_COUNTERS); 664 if (!test_bit(pmc_index, kvpmu->pmc_in_use)) 665 continue; 666 pmc = &kvpmu->pmc[pmc_index]; 667 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { 668 fevent_code = get_event_code(pmc->event_idx); 669 if (fevent_code >= SBI_PMU_FW_MAX) { 670 sbiret = SBI_ERR_INVALID_PARAM; 671 goto out; 672 } 673 674 if (!kvpmu->fw_event[fevent_code].started) 675 sbiret = SBI_ERR_ALREADY_STOPPED; 676 677 kvpmu->fw_event[fevent_code].started = false; 678 } else if (pmc->perf_event) { 679 if (pmc->started) { 680 /* Stop counting the counter */ 681 perf_event_disable(pmc->perf_event); 682 pmc->started = false; 683 } else { 684 sbiret = SBI_ERR_ALREADY_STOPPED; 685 } 686 687 if (flags & SBI_PMU_STOP_FLAG_RESET) 688 /* Release the counter if this is a reset request */ 689 kvm_pmu_release_perf_event(pmc); 690 } else { 691 sbiret = SBI_ERR_INVALID_PARAM; 692 } 693 694 if (snap_flag_set && !sbiret) { 695 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) 696 pmc->counter_val = kvpmu->fw_event[fevent_code].value; 697 else if (pmc->perf_event) 698 pmc->counter_val += perf_event_read_value(pmc->perf_event, 699 &enabled, &running); 700 /* 701 * The counter and overflow indices in the snapshot region are w.r.to 702 * cbase. Modify the set bit in the counter mask instead of the pmc_index 703 * which indicates the absolute counter index. 704 */ 705 if (test_bit(pmc_index, kvpmu->pmc_overflown)) 706 kvpmu->sdata->ctr_overflow_mask |= BIT(i); 707 kvpmu->sdata->ctr_values[i] = pmc->counter_val; 708 shmem_needs_update = true; 709 } 710 711 if (flags & SBI_PMU_STOP_FLAG_RESET) { 712 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; 713 clear_bit(pmc_index, kvpmu->pmc_in_use); 714 clear_bit(pmc_index, kvpmu->pmc_overflown); 715 if (snap_flag_set) { 716 /* 717 * Only clear the given counter as the caller is responsible to 718 * validate both the overflow mask and configured counters. 719 */ 720 kvpmu->sdata->ctr_overflow_mask &= ~BIT(i); 721 shmem_needs_update = true; 722 } 723 } 724 } 725 726 if (shmem_needs_update) 727 kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, 728 sizeof(struct riscv_pmu_snapshot_data)); 729 730 out: 731 retdata->err_val = sbiret; 732 733 return 0; 734 } 735 736 int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_base, 737 unsigned long ctr_mask, unsigned long flags, 738 unsigned long eidx, u64 evtdata, 739 struct kvm_vcpu_sbi_return *retdata) 740 { 741 int ctr_idx, sbiret = 0; 742 long ret; 743 bool is_fevent; 744 unsigned long event_code; 745 u32 etype = kvm_pmu_get_perf_event_type(eidx); 746 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 747 struct kvm_pmc *pmc = NULL; 748 struct perf_event_attr attr = { 749 .type = etype, 750 .size = sizeof(struct perf_event_attr), 751 .pinned = true, 752 .disabled = true, 753 /* 754 * It should never reach here if the platform doesn't support the sscofpmf 755 * extension as mode filtering won't work without it. 756 */ 757 .exclude_host = true, 758 .exclude_hv = true, 759 .exclude_user = !!(flags & SBI_PMU_CFG_FLAG_SET_UINH), 760 .exclude_kernel = !!(flags & SBI_PMU_CFG_FLAG_SET_SINH), 761 .config1 = RISCV_PMU_CONFIG1_GUEST_EVENTS, 762 }; 763 764 if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { 765 sbiret = SBI_ERR_INVALID_PARAM; 766 goto out; 767 } 768 769 event_code = get_event_code(eidx); 770 is_fevent = kvm_pmu_is_fw_event(eidx); 771 if (is_fevent && event_code >= SBI_PMU_FW_MAX) { 772 sbiret = SBI_ERR_NOT_SUPPORTED; 773 goto out; 774 } 775 776 /* 777 * SKIP_MATCH flag indicates the caller is aware of the assigned counter 778 * for this event. Just do a sanity check if it already marked used. 779 */ 780 if (flags & SBI_PMU_CFG_FLAG_SKIP_MATCH) { 781 if (!test_bit(ctr_base + __ffs(ctr_mask), kvpmu->pmc_in_use)) { 782 sbiret = SBI_ERR_FAILURE; 783 goto out; 784 } 785 ctr_idx = ctr_base + __ffs(ctr_mask); 786 } else { 787 ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask); 788 if (ctr_idx < 0) { 789 sbiret = SBI_ERR_NOT_SUPPORTED; 790 goto out; 791 } 792 } 793 794 ctr_idx = array_index_nospec(ctr_idx, RISCV_KVM_MAX_COUNTERS); 795 pmc = &kvpmu->pmc[ctr_idx]; 796 pmc->idx = ctr_idx; 797 798 if (is_fevent) { 799 if (flags & SBI_PMU_CFG_FLAG_AUTO_START) 800 kvpmu->fw_event[event_code].started = true; 801 } else { 802 ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata); 803 if (ret) { 804 sbiret = SBI_ERR_NOT_SUPPORTED; 805 goto out; 806 } 807 } 808 809 set_bit(ctr_idx, kvpmu->pmc_in_use); 810 pmc->event_idx = eidx; 811 retdata->out_val = ctr_idx; 812 out: 813 retdata->err_val = sbiret; 814 815 return 0; 816 } 817 818 int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, 819 struct kvm_vcpu_sbi_return *retdata) 820 { 821 int ret; 822 823 ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val); 824 if (ret == -EINVAL) 825 retdata->err_val = SBI_ERR_INVALID_PARAM; 826 827 return 0; 828 } 829 830 int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, 831 struct kvm_vcpu_sbi_return *retdata) 832 { 833 int ret; 834 835 ret = pmu_ctr_read(vcpu, cidx, &retdata->out_val); 836 if (ret == -EINVAL) 837 retdata->err_val = SBI_ERR_INVALID_PARAM; 838 839 return 0; 840 } 841 842 void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) 843 { 844 int i = 0, ret, num_hw_ctrs = 0, hpm_width = 0; 845 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 846 struct kvm_pmc *pmc; 847 848 /* 849 * PMU functionality should be only available to guests if privilege mode 850 * filtering is available in the host. Otherwise, guest will always count 851 * events while the execution is in hypervisor mode. 852 */ 853 if (kvm_riscv_isa_check_host(SSCOFPMF)) 854 return; 855 856 ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs); 857 if (ret < 0 || !hpm_width || !num_hw_ctrs) 858 return; 859 860 /* 861 * Increase the number of hardware counters to offset the time counter. 862 */ 863 kvpmu->num_hw_ctrs = num_hw_ctrs + 1; 864 kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX; 865 memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); 866 kvpmu->snapshot_addr = INVALID_GPA; 867 868 if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) { 869 pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA"); 870 kvpmu->num_hw_ctrs = RISCV_KVM_MAX_HW_CTRS; 871 } 872 873 /* 874 * There is no correlation between the logical hardware counter and virtual counters. 875 * However, we need to encode a hpmcounter CSR in the counter info field so that 876 * KVM can trap n emulate the read. This works well in the migration use case as 877 * KVM doesn't care if the actual hpmcounter is available in the hardware or not. 878 */ 879 for (i = 0; i < kvm_pmu_num_counters(kvpmu); i++) { 880 /* TIME CSR shouldn't be read from perf interface */ 881 if (i == 1) 882 continue; 883 pmc = &kvpmu->pmc[i]; 884 pmc->idx = i; 885 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; 886 pmc->vcpu = vcpu; 887 if (i < kvpmu->num_hw_ctrs) { 888 pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW; 889 if (i < 3) 890 /* CY, IR counters */ 891 pmc->cinfo.width = 63; 892 else 893 pmc->cinfo.width = hpm_width; 894 /* 895 * The CSR number doesn't have any relation with the logical 896 * hardware counters. The CSR numbers are encoded sequentially 897 * to avoid maintaining a map between the virtual counter 898 * and CSR number. 899 */ 900 pmc->cinfo.csr = CSR_CYCLE + i; 901 } else { 902 pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW; 903 pmc->cinfo.width = 63; 904 } 905 } 906 907 kvpmu->init_done = true; 908 } 909 910 void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu) 911 { 912 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 913 struct kvm_pmc *pmc; 914 int i; 915 916 if (!kvpmu) 917 return; 918 919 for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) { 920 pmc = &kvpmu->pmc[i]; 921 pmc->counter_val = 0; 922 kvm_pmu_release_perf_event(pmc); 923 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; 924 } 925 bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS); 926 bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS); 927 memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); 928 kvm_pmu_clear_snapshot_area(vcpu); 929 } 930 931 void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu) 932 { 933 kvm_riscv_vcpu_pmu_deinit(vcpu); 934 } 935