1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2023 Rivos Inc 4 * 5 * Authors: 6 * Atish Patra <atishp@rivosinc.com> 7 */ 8 9 #define pr_fmt(fmt) "riscv-kvm-pmu: " fmt 10 #include <linux/bitops.h> 11 #include <linux/errno.h> 12 #include <linux/err.h> 13 #include <linux/kvm_host.h> 14 #include <linux/nospec.h> 15 #include <linux/perf/riscv_pmu.h> 16 #include <asm/csr.h> 17 #include <asm/kvm_isa.h> 18 #include <asm/kvm_vcpu_sbi.h> 19 #include <asm/kvm_vcpu_pmu.h> 20 #include <asm/sbi.h> 21 22 #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs) 23 #define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16) 24 #define get_event_code(x) ((x) & SBI_PMU_EVENT_IDX_CODE_MASK) 25 26 static enum perf_hw_id hw_event_perf_map[SBI_PMU_HW_GENERAL_MAX] = { 27 [SBI_PMU_HW_CPU_CYCLES] = PERF_COUNT_HW_CPU_CYCLES, 28 [SBI_PMU_HW_INSTRUCTIONS] = PERF_COUNT_HW_INSTRUCTIONS, 29 [SBI_PMU_HW_CACHE_REFERENCES] = PERF_COUNT_HW_CACHE_REFERENCES, 30 [SBI_PMU_HW_CACHE_MISSES] = PERF_COUNT_HW_CACHE_MISSES, 31 [SBI_PMU_HW_BRANCH_INSTRUCTIONS] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS, 32 [SBI_PMU_HW_BRANCH_MISSES] = PERF_COUNT_HW_BRANCH_MISSES, 33 [SBI_PMU_HW_BUS_CYCLES] = PERF_COUNT_HW_BUS_CYCLES, 34 [SBI_PMU_HW_STALLED_CYCLES_FRONTEND] = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, 35 [SBI_PMU_HW_STALLED_CYCLES_BACKEND] = PERF_COUNT_HW_STALLED_CYCLES_BACKEND, 36 [SBI_PMU_HW_REF_CPU_CYCLES] = PERF_COUNT_HW_REF_CPU_CYCLES, 37 }; 38 39 static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc) 40 { 41 u64 counter_val_mask = GENMASK(pmc->cinfo.width, 0); 42 u64 sample_period; 43 44 if (!pmc->counter_val) 45 sample_period = counter_val_mask; 46 else 47 sample_period = (-pmc->counter_val) & counter_val_mask; 48 49 return sample_period; 50 } 51 52 static u32 kvm_pmu_get_perf_event_type(unsigned long eidx) 53 { 54 enum sbi_pmu_event_type etype = get_event_type(eidx); 55 u32 type = PERF_TYPE_MAX; 56 57 switch (etype) { 58 case SBI_PMU_EVENT_TYPE_HW: 59 type = PERF_TYPE_HARDWARE; 60 break; 61 case SBI_PMU_EVENT_TYPE_CACHE: 62 type = PERF_TYPE_HW_CACHE; 63 break; 64 case SBI_PMU_EVENT_TYPE_RAW: 65 case SBI_PMU_EVENT_TYPE_RAW_V2: 66 case SBI_PMU_EVENT_TYPE_FW: 67 type = PERF_TYPE_RAW; 68 break; 69 default: 70 break; 71 } 72 73 return type; 74 } 75 76 static bool kvm_pmu_is_fw_event(unsigned long eidx) 77 { 78 return get_event_type(eidx) == SBI_PMU_EVENT_TYPE_FW; 79 } 80 81 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) 82 { 83 if (pmc->perf_event) { 84 perf_event_disable(pmc->perf_event); 85 perf_event_release_kernel(pmc->perf_event); 86 pmc->perf_event = NULL; 87 } 88 } 89 90 static u64 kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code) 91 { 92 return hw_event_perf_map[array_index_nospec(sbi_event_code, 93 SBI_PMU_HW_GENERAL_MAX)]; 94 } 95 96 static u64 kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code) 97 { 98 u64 config = U64_MAX; 99 unsigned int cache_type, cache_op, cache_result; 100 101 /* All the cache event masks lie within 0xFF. No separate masking is necessary */ 102 cache_type = (sbi_event_code & SBI_PMU_EVENT_CACHE_ID_CODE_MASK) >> 103 SBI_PMU_EVENT_CACHE_ID_SHIFT; 104 cache_op = (sbi_event_code & SBI_PMU_EVENT_CACHE_OP_ID_CODE_MASK) >> 105 SBI_PMU_EVENT_CACHE_OP_SHIFT; 106 cache_result = sbi_event_code & SBI_PMU_EVENT_CACHE_RESULT_ID_CODE_MASK; 107 108 if (cache_type >= PERF_COUNT_HW_CACHE_MAX || 109 cache_op >= PERF_COUNT_HW_CACHE_OP_MAX || 110 cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 111 return config; 112 113 config = cache_type | (cache_op << 8) | (cache_result << 16); 114 115 return config; 116 } 117 118 static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data) 119 { 120 enum sbi_pmu_event_type etype = get_event_type(eidx); 121 u32 ecode = get_event_code(eidx); 122 u64 config = U64_MAX; 123 124 switch (etype) { 125 case SBI_PMU_EVENT_TYPE_HW: 126 if (ecode < SBI_PMU_HW_GENERAL_MAX) 127 config = kvm_pmu_get_perf_event_hw_config(ecode); 128 break; 129 case SBI_PMU_EVENT_TYPE_CACHE: 130 config = kvm_pmu_get_perf_event_cache_config(ecode); 131 break; 132 case SBI_PMU_EVENT_TYPE_RAW: 133 config = evt_data & RISCV_PMU_RAW_EVENT_MASK; 134 break; 135 case SBI_PMU_EVENT_TYPE_RAW_V2: 136 config = evt_data & RISCV_PMU_RAW_EVENT_V2_MASK; 137 break; 138 case SBI_PMU_EVENT_TYPE_FW: 139 if (ecode < SBI_PMU_FW_MAX) 140 config = (1ULL << 63) | ecode; 141 break; 142 default: 143 break; 144 } 145 146 return config; 147 } 148 149 static int kvm_pmu_get_fixed_pmc_index(unsigned long eidx) 150 { 151 u32 etype = kvm_pmu_get_perf_event_type(eidx); 152 u32 ecode = get_event_code(eidx); 153 154 if (etype != SBI_PMU_EVENT_TYPE_HW) 155 return -EINVAL; 156 157 if (ecode == SBI_PMU_HW_CPU_CYCLES) 158 return 0; 159 else if (ecode == SBI_PMU_HW_INSTRUCTIONS) 160 return 2; 161 else 162 return -EINVAL; 163 } 164 165 static int kvm_pmu_get_programmable_pmc_index(struct kvm_pmu *kvpmu, unsigned long eidx, 166 unsigned long cbase, unsigned long cmask) 167 { 168 int ctr_idx = -1; 169 int i, pmc_idx; 170 int min, max; 171 172 if (kvm_pmu_is_fw_event(eidx)) { 173 /* Firmware counters are mapped 1:1 starting from num_hw_ctrs for simplicity */ 174 min = kvpmu->num_hw_ctrs; 175 max = min + kvpmu->num_fw_ctrs; 176 } else { 177 /* First 3 counters are reserved for fixed counters */ 178 min = 3; 179 max = kvpmu->num_hw_ctrs; 180 } 181 182 for_each_set_bit(i, &cmask, BITS_PER_LONG) { 183 pmc_idx = i + cbase; 184 if ((pmc_idx >= min && pmc_idx < max) && 185 !test_bit(pmc_idx, kvpmu->pmc_in_use)) { 186 ctr_idx = pmc_idx; 187 break; 188 } 189 } 190 191 return ctr_idx; 192 } 193 194 static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx, 195 unsigned long cbase, unsigned long cmask) 196 { 197 int ret; 198 199 /* Fixed counters need to be have fixed mapping as they have different width */ 200 ret = kvm_pmu_get_fixed_pmc_index(eidx); 201 if (ret >= 0) 202 return ret; 203 204 return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask); 205 } 206 207 static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, 208 unsigned long *out_val) 209 { 210 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 211 struct kvm_pmc *pmc; 212 int fevent_code; 213 214 if (!IS_ENABLED(CONFIG_32BIT)) { 215 pr_warn("%s: should be invoked for only RV32\n", __func__); 216 return -EINVAL; 217 } 218 219 if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) { 220 pr_warn("Invalid counter id [%ld]during read\n", cidx); 221 return -EINVAL; 222 } 223 224 cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS); 225 pmc = &kvpmu->pmc[cidx]; 226 227 if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW) 228 return -EINVAL; 229 230 if (pmc->event_idx == SBI_PMU_EVENT_IDX_INVALID) 231 return -EINVAL; 232 233 fevent_code = get_event_code(pmc->event_idx); 234 if (WARN_ONCE(fevent_code >= SBI_PMU_FW_MAX, 235 "Invalid firmware event code: %d\n", fevent_code)) 236 return -EINVAL; 237 238 pmc->counter_val = kvpmu->fw_event[fevent_code].value; 239 240 *out_val = pmc->counter_val >> 32; 241 242 return 0; 243 } 244 245 static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, 246 unsigned long *out_val) 247 { 248 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 249 struct kvm_pmc *pmc; 250 u64 enabled, running; 251 int fevent_code; 252 253 if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) { 254 pr_warn("Invalid counter id [%ld] during read\n", cidx); 255 return -EINVAL; 256 } 257 258 cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS); 259 pmc = &kvpmu->pmc[cidx]; 260 261 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { 262 if (pmc->event_idx == SBI_PMU_EVENT_IDX_INVALID) 263 return -EINVAL; 264 265 fevent_code = get_event_code(pmc->event_idx); 266 if (WARN_ONCE(fevent_code >= SBI_PMU_FW_MAX, 267 "Invalid firmware event code: %d\n", fevent_code)) 268 return -EINVAL; 269 270 pmc->counter_val = kvpmu->fw_event[fevent_code].value; 271 } else if (pmc->perf_event) { 272 pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running); 273 } else { 274 return -EINVAL; 275 } 276 *out_val = pmc->counter_val; 277 278 return 0; 279 } 280 281 static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base, 282 unsigned long ctr_mask) 283 { 284 unsigned long num_ctrs = kvm_pmu_num_counters(kvpmu); 285 286 /* Make sure we have a valid counter mask requested from the caller */ 287 if (!ctr_mask || ctr_base >= num_ctrs || (ctr_base + __fls(ctr_mask) >= num_ctrs)) 288 return -EINVAL; 289 290 return 0; 291 } 292 293 static void kvm_riscv_pmu_overflow(struct perf_event *perf_event, 294 struct perf_sample_data *data, 295 struct pt_regs *regs) 296 { 297 struct kvm_pmc *pmc = perf_event->overflow_handler_context; 298 struct kvm_vcpu *vcpu = pmc->vcpu; 299 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 300 struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu); 301 u64 period; 302 303 /* 304 * Stop the event counting by directly accessing the perf_event. 305 * Otherwise, this needs to deferred via a workqueue. 306 * That will introduce skew in the counter value because the actual 307 * physical counter would start after returning from this function. 308 * It will be stopped again once the workqueue is scheduled 309 */ 310 rpmu->pmu.stop(perf_event, PERF_EF_UPDATE); 311 312 /* 313 * The hw counter would start automatically when this function returns. 314 * Thus, the host may continue to interrupt and inject it to the guest 315 * even without the guest configuring the next event. Depending on the hardware 316 * the host may have some sluggishness only if privilege mode filtering is not 317 * available. In an ideal world, where qemu is not the only capable hardware, 318 * this can be removed. 319 * FYI: ARM64 does this way while x86 doesn't do anything as such. 320 * TODO: Should we keep it for RISC-V ? 321 */ 322 period = -(local64_read(&perf_event->count)); 323 324 local64_set(&perf_event->hw.period_left, 0); 325 perf_event->attr.sample_period = period; 326 perf_event->hw.sample_period = period; 327 328 set_bit(pmc->idx, kvpmu->pmc_overflown); 329 kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF); 330 331 rpmu->pmu.start(perf_event, PERF_EF_RELOAD); 332 } 333 334 static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr, 335 unsigned long flags, unsigned long eidx, 336 unsigned long evtdata) 337 { 338 struct perf_event *event; 339 340 kvm_pmu_release_perf_event(pmc); 341 attr->config = kvm_pmu_get_perf_event_config(eidx, evtdata); 342 if (flags & SBI_PMU_CFG_FLAG_CLEAR_VALUE) { 343 //TODO: Do we really want to clear the value in hardware counter 344 pmc->counter_val = 0; 345 } 346 347 /* 348 * Set the default sample_period for now. The guest specified value 349 * will be updated in the start call. 350 */ 351 attr->sample_period = kvm_pmu_get_sample_period(pmc); 352 353 event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc); 354 if (IS_ERR(event)) { 355 pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); 356 return PTR_ERR(event); 357 } 358 359 pmc->perf_event = event; 360 if (flags & SBI_PMU_CFG_FLAG_AUTO_START) 361 perf_event_enable(pmc->perf_event); 362 363 return 0; 364 } 365 366 int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid) 367 { 368 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 369 struct kvm_fw_event *fevent; 370 371 if (!kvpmu || fid >= SBI_PMU_FW_MAX) 372 return -EINVAL; 373 374 fevent = &kvpmu->fw_event[fid]; 375 if (fevent->started) 376 fevent->value++; 377 378 return 0; 379 } 380 381 int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num, 382 unsigned long *val, unsigned long new_val, 383 unsigned long wr_mask) 384 { 385 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 386 int cidx, ret = KVM_INSN_CONTINUE_NEXT_SEPC; 387 388 if (!kvpmu || !kvpmu->init_done) { 389 /* 390 * In absence of sscofpmf in the platform, the guest OS may use 391 * the legacy PMU driver to read cycle/instret. In that case, 392 * just return 0 to avoid any illegal trap. However, any other 393 * hpmcounter access should result in illegal trap as they must 394 * be access through SBI PMU only. 395 */ 396 if (csr_num == CSR_CYCLE || csr_num == CSR_INSTRET) { 397 *val = 0; 398 return ret; 399 } else { 400 return KVM_INSN_ILLEGAL_TRAP; 401 } 402 } 403 404 /* The counter CSR are read only. Thus, any write should result in illegal traps */ 405 if (wr_mask) 406 return KVM_INSN_ILLEGAL_TRAP; 407 408 cidx = csr_num - CSR_CYCLE; 409 410 if (pmu_ctr_read(vcpu, cidx, val) < 0) 411 return KVM_INSN_ILLEGAL_TRAP; 412 413 return ret; 414 } 415 416 static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu) 417 { 418 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 419 420 kfree(kvpmu->sdata); 421 kvpmu->sdata = NULL; 422 kvpmu->snapshot_addr = INVALID_GPA; 423 } 424 425 int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low, 426 unsigned long saddr_high, unsigned long flags, 427 struct kvm_vcpu_sbi_return *retdata) 428 { 429 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 430 int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); 431 int sbiret = 0; 432 gpa_t saddr; 433 434 if (!kvpmu || flags) { 435 sbiret = SBI_ERR_INVALID_PARAM; 436 goto out; 437 } 438 439 if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) { 440 kvm_pmu_clear_snapshot_area(vcpu); 441 return 0; 442 } 443 444 saddr = saddr_low; 445 446 if (saddr_high != 0) { 447 if (IS_ENABLED(CONFIG_32BIT)) { 448 saddr |= ((gpa_t)saddr_high << 32); 449 } else { 450 sbiret = SBI_ERR_INVALID_ADDRESS; 451 goto out; 452 } 453 } 454 455 kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC); 456 if (!kvpmu->sdata) 457 return -ENOMEM; 458 459 /* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */ 460 if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) { 461 kfree(kvpmu->sdata); 462 kvpmu->sdata = NULL; 463 sbiret = SBI_ERR_INVALID_ADDRESS; 464 goto out; 465 } 466 467 kvpmu->snapshot_addr = saddr; 468 469 out: 470 retdata->err_val = sbiret; 471 472 return 0; 473 } 474 475 int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low, 476 unsigned long saddr_high, unsigned long num_events, 477 unsigned long flags, struct kvm_vcpu_sbi_return *retdata) 478 { 479 struct riscv_pmu_event_info *einfo = NULL; 480 int shmem_size = num_events * sizeof(*einfo); 481 gpa_t shmem; 482 u32 eidx, etype; 483 u64 econfig; 484 int ret; 485 486 if (flags != 0 || (saddr_low & (SZ_16 - 1) || num_events == 0)) { 487 ret = SBI_ERR_INVALID_PARAM; 488 goto out; 489 } 490 491 shmem = saddr_low; 492 if (saddr_high != 0) { 493 if (IS_ENABLED(CONFIG_32BIT)) { 494 shmem |= ((gpa_t)saddr_high << 32); 495 } else { 496 ret = SBI_ERR_INVALID_ADDRESS; 497 goto out; 498 } 499 } 500 501 einfo = kzalloc(shmem_size, GFP_KERNEL); 502 if (!einfo) 503 return -ENOMEM; 504 505 ret = kvm_vcpu_read_guest(vcpu, shmem, einfo, shmem_size); 506 if (ret) { 507 ret = SBI_ERR_FAILURE; 508 goto free_mem; 509 } 510 511 for (int i = 0; i < num_events; i++) { 512 eidx = einfo[i].event_idx; 513 etype = kvm_pmu_get_perf_event_type(eidx); 514 econfig = kvm_pmu_get_perf_event_config(eidx, einfo[i].event_data); 515 ret = riscv_pmu_get_event_info(etype, econfig, NULL); 516 einfo[i].output = (ret > 0) ? 1 : 0; 517 } 518 519 ret = kvm_vcpu_write_guest(vcpu, shmem, einfo, shmem_size); 520 if (ret) 521 ret = SBI_ERR_INVALID_ADDRESS; 522 523 free_mem: 524 kfree(einfo); 525 out: 526 retdata->err_val = ret; 527 528 return 0; 529 } 530 531 int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu, 532 struct kvm_vcpu_sbi_return *retdata) 533 { 534 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 535 536 retdata->out_val = kvm_pmu_num_counters(kvpmu); 537 538 return 0; 539 } 540 541 int kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu *vcpu, unsigned long cidx, 542 struct kvm_vcpu_sbi_return *retdata) 543 { 544 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 545 546 if (cidx >= RISCV_KVM_MAX_COUNTERS || cidx == 1) { 547 retdata->err_val = SBI_ERR_INVALID_PARAM; 548 return 0; 549 } 550 551 cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS); 552 retdata->out_val = kvpmu->pmc[cidx].cinfo.value; 553 554 return 0; 555 } 556 557 int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base, 558 unsigned long ctr_mask, unsigned long flags, u64 ival, 559 struct kvm_vcpu_sbi_return *retdata) 560 { 561 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 562 int i, pmc_index, sbiret = 0; 563 struct kvm_pmc *pmc; 564 int fevent_code; 565 bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT; 566 567 if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { 568 sbiret = SBI_ERR_INVALID_PARAM; 569 goto out; 570 } 571 572 if (snap_flag_set) { 573 if (kvpmu->snapshot_addr == INVALID_GPA) { 574 sbiret = SBI_ERR_NO_SHMEM; 575 goto out; 576 } 577 if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, 578 sizeof(struct riscv_pmu_snapshot_data))) { 579 pr_warn("Unable to read snapshot shared memory while starting counters\n"); 580 sbiret = SBI_ERR_FAILURE; 581 goto out; 582 } 583 } 584 /* Start the counters that have been configured and requested by the guest */ 585 for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { 586 pmc_index = array_index_nospec(i + ctr_base, 587 RISCV_KVM_MAX_COUNTERS); 588 if (!test_bit(pmc_index, kvpmu->pmc_in_use)) 589 continue; 590 /* The guest started the counter again. Reset the overflow status */ 591 clear_bit(pmc_index, kvpmu->pmc_overflown); 592 pmc = &kvpmu->pmc[pmc_index]; 593 if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) { 594 pmc->counter_val = ival; 595 } else if (snap_flag_set) { 596 /* The counter index in the snapshot are relative to the counter base */ 597 pmc->counter_val = kvpmu->sdata->ctr_values[i]; 598 } 599 600 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { 601 fevent_code = get_event_code(pmc->event_idx); 602 if (fevent_code >= SBI_PMU_FW_MAX) { 603 sbiret = SBI_ERR_INVALID_PARAM; 604 goto out; 605 } 606 607 /* Check if the counter was already started for some reason */ 608 if (kvpmu->fw_event[fevent_code].started) { 609 sbiret = SBI_ERR_ALREADY_STARTED; 610 continue; 611 } 612 613 kvpmu->fw_event[fevent_code].started = true; 614 kvpmu->fw_event[fevent_code].value = pmc->counter_val; 615 } else if (pmc->perf_event) { 616 if (unlikely(pmc->started)) { 617 sbiret = SBI_ERR_ALREADY_STARTED; 618 continue; 619 } 620 perf_event_period(pmc->perf_event, kvm_pmu_get_sample_period(pmc)); 621 perf_event_enable(pmc->perf_event); 622 pmc->started = true; 623 } else { 624 sbiret = SBI_ERR_INVALID_PARAM; 625 } 626 } 627 628 out: 629 retdata->err_val = sbiret; 630 631 return 0; 632 } 633 634 int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, 635 unsigned long ctr_mask, unsigned long flags, 636 struct kvm_vcpu_sbi_return *retdata) 637 { 638 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 639 int i, pmc_index, sbiret = 0; 640 u64 enabled, running; 641 struct kvm_pmc *pmc; 642 int fevent_code; 643 bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT; 644 bool shmem_needs_update = false; 645 646 if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { 647 sbiret = SBI_ERR_INVALID_PARAM; 648 goto out; 649 } 650 651 if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) { 652 sbiret = SBI_ERR_NO_SHMEM; 653 goto out; 654 } 655 656 /* Stop the counters that have been configured and requested by the guest */ 657 for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { 658 pmc_index = array_index_nospec(i + ctr_base, 659 RISCV_KVM_MAX_COUNTERS); 660 if (!test_bit(pmc_index, kvpmu->pmc_in_use)) 661 continue; 662 pmc = &kvpmu->pmc[pmc_index]; 663 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { 664 fevent_code = get_event_code(pmc->event_idx); 665 if (fevent_code >= SBI_PMU_FW_MAX) { 666 sbiret = SBI_ERR_INVALID_PARAM; 667 goto out; 668 } 669 670 if (!kvpmu->fw_event[fevent_code].started) 671 sbiret = SBI_ERR_ALREADY_STOPPED; 672 673 kvpmu->fw_event[fevent_code].started = false; 674 } else if (pmc->perf_event) { 675 if (pmc->started) { 676 /* Stop counting the counter */ 677 perf_event_disable(pmc->perf_event); 678 pmc->started = false; 679 } else { 680 sbiret = SBI_ERR_ALREADY_STOPPED; 681 } 682 683 if (flags & SBI_PMU_STOP_FLAG_RESET) 684 /* Release the counter if this is a reset request */ 685 kvm_pmu_release_perf_event(pmc); 686 } else { 687 sbiret = SBI_ERR_INVALID_PARAM; 688 } 689 690 if (snap_flag_set && !sbiret) { 691 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) 692 pmc->counter_val = kvpmu->fw_event[fevent_code].value; 693 else if (pmc->perf_event) 694 pmc->counter_val += perf_event_read_value(pmc->perf_event, 695 &enabled, &running); 696 /* 697 * The counter and overflow indices in the snapshot region are w.r.to 698 * cbase. Modify the set bit in the counter mask instead of the pmc_index 699 * which indicates the absolute counter index. 700 */ 701 if (test_bit(pmc_index, kvpmu->pmc_overflown)) 702 kvpmu->sdata->ctr_overflow_mask |= BIT(i); 703 kvpmu->sdata->ctr_values[i] = pmc->counter_val; 704 shmem_needs_update = true; 705 } 706 707 if (flags & SBI_PMU_STOP_FLAG_RESET) { 708 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; 709 clear_bit(pmc_index, kvpmu->pmc_in_use); 710 clear_bit(pmc_index, kvpmu->pmc_overflown); 711 if (snap_flag_set) { 712 /* 713 * Only clear the given counter as the caller is responsible to 714 * validate both the overflow mask and configured counters. 715 */ 716 kvpmu->sdata->ctr_overflow_mask &= ~BIT(i); 717 shmem_needs_update = true; 718 } 719 } 720 } 721 722 if (shmem_needs_update) 723 kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, 724 sizeof(struct riscv_pmu_snapshot_data)); 725 726 out: 727 retdata->err_val = sbiret; 728 729 return 0; 730 } 731 732 int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_base, 733 unsigned long ctr_mask, unsigned long flags, 734 unsigned long eidx, u64 evtdata, 735 struct kvm_vcpu_sbi_return *retdata) 736 { 737 int ctr_idx, sbiret = 0; 738 long ret; 739 bool is_fevent; 740 unsigned long event_code; 741 u32 etype = kvm_pmu_get_perf_event_type(eidx); 742 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 743 struct kvm_pmc *pmc = NULL; 744 struct perf_event_attr attr = { 745 .type = etype, 746 .size = sizeof(struct perf_event_attr), 747 .pinned = true, 748 .disabled = true, 749 /* 750 * It should never reach here if the platform doesn't support the sscofpmf 751 * extension as mode filtering won't work without it. 752 */ 753 .exclude_host = true, 754 .exclude_hv = true, 755 .exclude_user = !!(flags & SBI_PMU_CFG_FLAG_SET_UINH), 756 .exclude_kernel = !!(flags & SBI_PMU_CFG_FLAG_SET_SINH), 757 .config1 = RISCV_PMU_CONFIG1_GUEST_EVENTS, 758 }; 759 760 if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { 761 sbiret = SBI_ERR_INVALID_PARAM; 762 goto out; 763 } 764 765 event_code = get_event_code(eidx); 766 is_fevent = kvm_pmu_is_fw_event(eidx); 767 if (is_fevent && event_code >= SBI_PMU_FW_MAX) { 768 sbiret = SBI_ERR_NOT_SUPPORTED; 769 goto out; 770 } 771 772 /* 773 * SKIP_MATCH flag indicates the caller is aware of the assigned counter 774 * for this event. Just do a sanity check if it already marked used. 775 */ 776 if (flags & SBI_PMU_CFG_FLAG_SKIP_MATCH) { 777 if (!test_bit(ctr_base + __ffs(ctr_mask), kvpmu->pmc_in_use)) { 778 sbiret = SBI_ERR_FAILURE; 779 goto out; 780 } 781 ctr_idx = ctr_base + __ffs(ctr_mask); 782 } else { 783 ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask); 784 if (ctr_idx < 0) { 785 sbiret = SBI_ERR_NOT_SUPPORTED; 786 goto out; 787 } 788 } 789 790 ctr_idx = array_index_nospec(ctr_idx, RISCV_KVM_MAX_COUNTERS); 791 pmc = &kvpmu->pmc[ctr_idx]; 792 pmc->idx = ctr_idx; 793 794 if (is_fevent) { 795 if (flags & SBI_PMU_CFG_FLAG_AUTO_START) 796 kvpmu->fw_event[event_code].started = true; 797 } else { 798 ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata); 799 if (ret) { 800 sbiret = SBI_ERR_NOT_SUPPORTED; 801 goto out; 802 } 803 } 804 805 set_bit(ctr_idx, kvpmu->pmc_in_use); 806 pmc->event_idx = eidx; 807 retdata->out_val = ctr_idx; 808 out: 809 retdata->err_val = sbiret; 810 811 return 0; 812 } 813 814 int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, 815 struct kvm_vcpu_sbi_return *retdata) 816 { 817 int ret; 818 819 ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val); 820 if (ret == -EINVAL) 821 retdata->err_val = SBI_ERR_INVALID_PARAM; 822 823 return 0; 824 } 825 826 int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, 827 struct kvm_vcpu_sbi_return *retdata) 828 { 829 int ret; 830 831 ret = pmu_ctr_read(vcpu, cidx, &retdata->out_val); 832 if (ret == -EINVAL) 833 retdata->err_val = SBI_ERR_INVALID_PARAM; 834 835 return 0; 836 } 837 838 void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) 839 { 840 int i = 0, ret, num_hw_ctrs = 0, hpm_width = 0; 841 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 842 struct kvm_pmc *pmc; 843 844 /* 845 * PMU functionality should be only available to guests if privilege mode 846 * filtering is available in the host. Otherwise, guest will always count 847 * events while the execution is in hypervisor mode. 848 */ 849 if (kvm_riscv_isa_check_host(SSCOFPMF)) 850 return; 851 852 ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs); 853 if (ret < 0 || !hpm_width || !num_hw_ctrs) 854 return; 855 856 /* 857 * Increase the number of hardware counters to offset the time counter. 858 */ 859 kvpmu->num_hw_ctrs = num_hw_ctrs + 1; 860 kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX; 861 memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); 862 kvpmu->snapshot_addr = INVALID_GPA; 863 864 if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) { 865 pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA"); 866 kvpmu->num_hw_ctrs = RISCV_KVM_MAX_HW_CTRS; 867 } 868 869 /* 870 * There is no correlation between the logical hardware counter and virtual counters. 871 * However, we need to encode a hpmcounter CSR in the counter info field so that 872 * KVM can trap n emulate the read. This works well in the migration use case as 873 * KVM doesn't care if the actual hpmcounter is available in the hardware or not. 874 */ 875 for (i = 0; i < kvm_pmu_num_counters(kvpmu); i++) { 876 /* TIME CSR shouldn't be read from perf interface */ 877 if (i == 1) 878 continue; 879 pmc = &kvpmu->pmc[i]; 880 pmc->idx = i; 881 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; 882 pmc->vcpu = vcpu; 883 if (i < kvpmu->num_hw_ctrs) { 884 pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW; 885 if (i < 3) 886 /* CY, IR counters */ 887 pmc->cinfo.width = 63; 888 else 889 pmc->cinfo.width = hpm_width; 890 /* 891 * The CSR number doesn't have any relation with the logical 892 * hardware counters. The CSR numbers are encoded sequentially 893 * to avoid maintaining a map between the virtual counter 894 * and CSR number. 895 */ 896 pmc->cinfo.csr = CSR_CYCLE + i; 897 } else { 898 pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW; 899 pmc->cinfo.width = 63; 900 } 901 } 902 903 kvpmu->init_done = true; 904 } 905 906 void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu) 907 { 908 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 909 struct kvm_pmc *pmc; 910 int i; 911 912 if (!kvpmu) 913 return; 914 915 for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) { 916 pmc = &kvpmu->pmc[i]; 917 pmc->counter_val = 0; 918 kvm_pmu_release_perf_event(pmc); 919 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; 920 } 921 bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS); 922 bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS); 923 memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); 924 kvm_pmu_clear_snapshot_area(vcpu); 925 } 926 927 void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu) 928 { 929 kvm_riscv_vcpu_pmu_deinit(vcpu); 930 } 931