1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2023 Rivos Inc 4 * 5 * Authors: 6 * Atish Patra <atishp@rivosinc.com> 7 */ 8 9 #define pr_fmt(fmt) "riscv-kvm-pmu: " fmt 10 #include <linux/errno.h> 11 #include <linux/err.h> 12 #include <linux/kvm_host.h> 13 #include <linux/nospec.h> 14 #include <linux/perf/riscv_pmu.h> 15 #include <asm/csr.h> 16 #include <asm/kvm_vcpu_sbi.h> 17 #include <asm/kvm_vcpu_pmu.h> 18 #include <asm/sbi.h> 19 #include <linux/bitops.h> 20 21 #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs) 22 #define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16) 23 #define get_event_code(x) ((x) & SBI_PMU_EVENT_IDX_CODE_MASK) 24 25 static enum perf_hw_id hw_event_perf_map[SBI_PMU_HW_GENERAL_MAX] = { 26 [SBI_PMU_HW_CPU_CYCLES] = PERF_COUNT_HW_CPU_CYCLES, 27 [SBI_PMU_HW_INSTRUCTIONS] = PERF_COUNT_HW_INSTRUCTIONS, 28 [SBI_PMU_HW_CACHE_REFERENCES] = PERF_COUNT_HW_CACHE_REFERENCES, 29 [SBI_PMU_HW_CACHE_MISSES] = PERF_COUNT_HW_CACHE_MISSES, 30 [SBI_PMU_HW_BRANCH_INSTRUCTIONS] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS, 31 [SBI_PMU_HW_BRANCH_MISSES] = PERF_COUNT_HW_BRANCH_MISSES, 32 [SBI_PMU_HW_BUS_CYCLES] = PERF_COUNT_HW_BUS_CYCLES, 33 [SBI_PMU_HW_STALLED_CYCLES_FRONTEND] = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, 34 [SBI_PMU_HW_STALLED_CYCLES_BACKEND] = PERF_COUNT_HW_STALLED_CYCLES_BACKEND, 35 [SBI_PMU_HW_REF_CPU_CYCLES] = PERF_COUNT_HW_REF_CPU_CYCLES, 36 }; 37 38 static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc) 39 { 40 u64 counter_val_mask = GENMASK(pmc->cinfo.width, 0); 41 u64 sample_period; 42 43 if (!pmc->counter_val) 44 sample_period = counter_val_mask; 45 else 46 sample_period = (-pmc->counter_val) & counter_val_mask; 47 48 return sample_period; 49 } 50 51 static u32 kvm_pmu_get_perf_event_type(unsigned long eidx) 52 { 53 enum sbi_pmu_event_type etype = get_event_type(eidx); 54 u32 type = PERF_TYPE_MAX; 55 56 switch (etype) { 57 case SBI_PMU_EVENT_TYPE_HW: 58 type = PERF_TYPE_HARDWARE; 59 break; 60 case SBI_PMU_EVENT_TYPE_CACHE: 61 type = PERF_TYPE_HW_CACHE; 62 break; 63 case SBI_PMU_EVENT_TYPE_RAW: 64 case SBI_PMU_EVENT_TYPE_RAW_V2: 65 case SBI_PMU_EVENT_TYPE_FW: 66 type = PERF_TYPE_RAW; 67 break; 68 default: 69 break; 70 } 71 72 return type; 73 } 74 75 static bool kvm_pmu_is_fw_event(unsigned long eidx) 76 { 77 return get_event_type(eidx) == SBI_PMU_EVENT_TYPE_FW; 78 } 79 80 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) 81 { 82 if (pmc->perf_event) { 83 perf_event_disable(pmc->perf_event); 84 perf_event_release_kernel(pmc->perf_event); 85 pmc->perf_event = NULL; 86 } 87 } 88 89 static u64 kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code) 90 { 91 return hw_event_perf_map[array_index_nospec(sbi_event_code, 92 SBI_PMU_HW_GENERAL_MAX)]; 93 } 94 95 static u64 kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code) 96 { 97 u64 config = U64_MAX; 98 unsigned int cache_type, cache_op, cache_result; 99 100 /* All the cache event masks lie within 0xFF. No separate masking is necessary */ 101 cache_type = (sbi_event_code & SBI_PMU_EVENT_CACHE_ID_CODE_MASK) >> 102 SBI_PMU_EVENT_CACHE_ID_SHIFT; 103 cache_op = (sbi_event_code & SBI_PMU_EVENT_CACHE_OP_ID_CODE_MASK) >> 104 SBI_PMU_EVENT_CACHE_OP_SHIFT; 105 cache_result = sbi_event_code & SBI_PMU_EVENT_CACHE_RESULT_ID_CODE_MASK; 106 107 if (cache_type >= PERF_COUNT_HW_CACHE_MAX || 108 cache_op >= PERF_COUNT_HW_CACHE_OP_MAX || 109 cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 110 return config; 111 112 config = cache_type | (cache_op << 8) | (cache_result << 16); 113 114 return config; 115 } 116 117 static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data) 118 { 119 enum sbi_pmu_event_type etype = get_event_type(eidx); 120 u32 ecode = get_event_code(eidx); 121 u64 config = U64_MAX; 122 123 switch (etype) { 124 case SBI_PMU_EVENT_TYPE_HW: 125 if (ecode < SBI_PMU_HW_GENERAL_MAX) 126 config = kvm_pmu_get_perf_event_hw_config(ecode); 127 break; 128 case SBI_PMU_EVENT_TYPE_CACHE: 129 config = kvm_pmu_get_perf_event_cache_config(ecode); 130 break; 131 case SBI_PMU_EVENT_TYPE_RAW: 132 config = evt_data & RISCV_PMU_RAW_EVENT_MASK; 133 break; 134 case SBI_PMU_EVENT_TYPE_RAW_V2: 135 config = evt_data & RISCV_PMU_RAW_EVENT_V2_MASK; 136 break; 137 case SBI_PMU_EVENT_TYPE_FW: 138 if (ecode < SBI_PMU_FW_MAX) 139 config = (1ULL << 63) | ecode; 140 break; 141 default: 142 break; 143 } 144 145 return config; 146 } 147 148 static int kvm_pmu_get_fixed_pmc_index(unsigned long eidx) 149 { 150 u32 etype = kvm_pmu_get_perf_event_type(eidx); 151 u32 ecode = get_event_code(eidx); 152 153 if (etype != SBI_PMU_EVENT_TYPE_HW) 154 return -EINVAL; 155 156 if (ecode == SBI_PMU_HW_CPU_CYCLES) 157 return 0; 158 else if (ecode == SBI_PMU_HW_INSTRUCTIONS) 159 return 2; 160 else 161 return -EINVAL; 162 } 163 164 static int kvm_pmu_get_programmable_pmc_index(struct kvm_pmu *kvpmu, unsigned long eidx, 165 unsigned long cbase, unsigned long cmask) 166 { 167 int ctr_idx = -1; 168 int i, pmc_idx; 169 int min, max; 170 171 if (kvm_pmu_is_fw_event(eidx)) { 172 /* Firmware counters are mapped 1:1 starting from num_hw_ctrs for simplicity */ 173 min = kvpmu->num_hw_ctrs; 174 max = min + kvpmu->num_fw_ctrs; 175 } else { 176 /* First 3 counters are reserved for fixed counters */ 177 min = 3; 178 max = kvpmu->num_hw_ctrs; 179 } 180 181 for_each_set_bit(i, &cmask, BITS_PER_LONG) { 182 pmc_idx = i + cbase; 183 if ((pmc_idx >= min && pmc_idx < max) && 184 !test_bit(pmc_idx, kvpmu->pmc_in_use)) { 185 ctr_idx = pmc_idx; 186 break; 187 } 188 } 189 190 return ctr_idx; 191 } 192 193 static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx, 194 unsigned long cbase, unsigned long cmask) 195 { 196 int ret; 197 198 /* Fixed counters need to be have fixed mapping as they have different width */ 199 ret = kvm_pmu_get_fixed_pmc_index(eidx); 200 if (ret >= 0) 201 return ret; 202 203 return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask); 204 } 205 206 static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, 207 unsigned long *out_val) 208 { 209 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 210 struct kvm_pmc *pmc; 211 int fevent_code; 212 213 if (!IS_ENABLED(CONFIG_32BIT)) { 214 pr_warn("%s: should be invoked for only RV32\n", __func__); 215 return -EINVAL; 216 } 217 218 if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) { 219 pr_warn("Invalid counter id [%ld]during read\n", cidx); 220 return -EINVAL; 221 } 222 223 cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS); 224 pmc = &kvpmu->pmc[cidx]; 225 226 if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW) 227 return -EINVAL; 228 229 fevent_code = get_event_code(pmc->event_idx); 230 pmc->counter_val = kvpmu->fw_event[fevent_code].value; 231 232 *out_val = pmc->counter_val >> 32; 233 234 return 0; 235 } 236 237 static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, 238 unsigned long *out_val) 239 { 240 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 241 struct kvm_pmc *pmc; 242 u64 enabled, running; 243 int fevent_code; 244 245 if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) { 246 pr_warn("Invalid counter id [%ld] during read\n", cidx); 247 return -EINVAL; 248 } 249 250 cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS); 251 pmc = &kvpmu->pmc[cidx]; 252 253 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { 254 fevent_code = get_event_code(pmc->event_idx); 255 pmc->counter_val = kvpmu->fw_event[fevent_code].value; 256 } else if (pmc->perf_event) { 257 pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running); 258 } else { 259 return -EINVAL; 260 } 261 *out_val = pmc->counter_val; 262 263 return 0; 264 } 265 266 static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base, 267 unsigned long ctr_mask) 268 { 269 /* Make sure the we have a valid counter mask requested from the caller */ 270 if (!ctr_mask || (ctr_base + __fls(ctr_mask) >= kvm_pmu_num_counters(kvpmu))) 271 return -EINVAL; 272 273 return 0; 274 } 275 276 static void kvm_riscv_pmu_overflow(struct perf_event *perf_event, 277 struct perf_sample_data *data, 278 struct pt_regs *regs) 279 { 280 struct kvm_pmc *pmc = perf_event->overflow_handler_context; 281 struct kvm_vcpu *vcpu = pmc->vcpu; 282 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 283 struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu); 284 u64 period; 285 286 /* 287 * Stop the event counting by directly accessing the perf_event. 288 * Otherwise, this needs to deferred via a workqueue. 289 * That will introduce skew in the counter value because the actual 290 * physical counter would start after returning from this function. 291 * It will be stopped again once the workqueue is scheduled 292 */ 293 rpmu->pmu.stop(perf_event, PERF_EF_UPDATE); 294 295 /* 296 * The hw counter would start automatically when this function returns. 297 * Thus, the host may continue to interrupt and inject it to the guest 298 * even without the guest configuring the next event. Depending on the hardware 299 * the host may have some sluggishness only if privilege mode filtering is not 300 * available. In an ideal world, where qemu is not the only capable hardware, 301 * this can be removed. 302 * FYI: ARM64 does this way while x86 doesn't do anything as such. 303 * TODO: Should we keep it for RISC-V ? 304 */ 305 period = -(local64_read(&perf_event->count)); 306 307 local64_set(&perf_event->hw.period_left, 0); 308 perf_event->attr.sample_period = period; 309 perf_event->hw.sample_period = period; 310 311 set_bit(pmc->idx, kvpmu->pmc_overflown); 312 kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF); 313 314 rpmu->pmu.start(perf_event, PERF_EF_RELOAD); 315 } 316 317 static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr, 318 unsigned long flags, unsigned long eidx, 319 unsigned long evtdata) 320 { 321 struct perf_event *event; 322 323 kvm_pmu_release_perf_event(pmc); 324 attr->config = kvm_pmu_get_perf_event_config(eidx, evtdata); 325 if (flags & SBI_PMU_CFG_FLAG_CLEAR_VALUE) { 326 //TODO: Do we really want to clear the value in hardware counter 327 pmc->counter_val = 0; 328 } 329 330 /* 331 * Set the default sample_period for now. The guest specified value 332 * will be updated in the start call. 333 */ 334 attr->sample_period = kvm_pmu_get_sample_period(pmc); 335 336 event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc); 337 if (IS_ERR(event)) { 338 pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); 339 return PTR_ERR(event); 340 } 341 342 pmc->perf_event = event; 343 if (flags & SBI_PMU_CFG_FLAG_AUTO_START) 344 perf_event_enable(pmc->perf_event); 345 346 return 0; 347 } 348 349 int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid) 350 { 351 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 352 struct kvm_fw_event *fevent; 353 354 if (!kvpmu || fid >= SBI_PMU_FW_MAX) 355 return -EINVAL; 356 357 fevent = &kvpmu->fw_event[fid]; 358 if (fevent->started) 359 fevent->value++; 360 361 return 0; 362 } 363 364 int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num, 365 unsigned long *val, unsigned long new_val, 366 unsigned long wr_mask) 367 { 368 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 369 int cidx, ret = KVM_INSN_CONTINUE_NEXT_SEPC; 370 371 if (!kvpmu || !kvpmu->init_done) { 372 /* 373 * In absence of sscofpmf in the platform, the guest OS may use 374 * the legacy PMU driver to read cycle/instret. In that case, 375 * just return 0 to avoid any illegal trap. However, any other 376 * hpmcounter access should result in illegal trap as they must 377 * be access through SBI PMU only. 378 */ 379 if (csr_num == CSR_CYCLE || csr_num == CSR_INSTRET) { 380 *val = 0; 381 return ret; 382 } else { 383 return KVM_INSN_ILLEGAL_TRAP; 384 } 385 } 386 387 /* The counter CSR are read only. Thus, any write should result in illegal traps */ 388 if (wr_mask) 389 return KVM_INSN_ILLEGAL_TRAP; 390 391 cidx = csr_num - CSR_CYCLE; 392 393 if (pmu_ctr_read(vcpu, cidx, val) < 0) 394 return KVM_INSN_ILLEGAL_TRAP; 395 396 return ret; 397 } 398 399 static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu) 400 { 401 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 402 403 kfree(kvpmu->sdata); 404 kvpmu->sdata = NULL; 405 kvpmu->snapshot_addr = INVALID_GPA; 406 } 407 408 int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low, 409 unsigned long saddr_high, unsigned long flags, 410 struct kvm_vcpu_sbi_return *retdata) 411 { 412 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 413 int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); 414 int sbiret = 0; 415 gpa_t saddr; 416 417 if (!kvpmu || flags) { 418 sbiret = SBI_ERR_INVALID_PARAM; 419 goto out; 420 } 421 422 if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) { 423 kvm_pmu_clear_snapshot_area(vcpu); 424 return 0; 425 } 426 427 saddr = saddr_low; 428 429 if (saddr_high != 0) { 430 if (IS_ENABLED(CONFIG_32BIT)) 431 saddr |= ((gpa_t)saddr_high << 32); 432 else 433 sbiret = SBI_ERR_INVALID_ADDRESS; 434 goto out; 435 } 436 437 kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC); 438 if (!kvpmu->sdata) 439 return -ENOMEM; 440 441 /* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */ 442 if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) { 443 kfree(kvpmu->sdata); 444 sbiret = SBI_ERR_INVALID_ADDRESS; 445 goto out; 446 } 447 448 kvpmu->snapshot_addr = saddr; 449 450 out: 451 retdata->err_val = sbiret; 452 453 return 0; 454 } 455 456 int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low, 457 unsigned long saddr_high, unsigned long num_events, 458 unsigned long flags, struct kvm_vcpu_sbi_return *retdata) 459 { 460 struct riscv_pmu_event_info *einfo = NULL; 461 int shmem_size = num_events * sizeof(*einfo); 462 gpa_t shmem; 463 u32 eidx, etype; 464 u64 econfig; 465 int ret; 466 467 if (flags != 0 || (saddr_low & (SZ_16 - 1) || num_events == 0)) { 468 ret = SBI_ERR_INVALID_PARAM; 469 goto out; 470 } 471 472 shmem = saddr_low; 473 if (saddr_high != 0) { 474 if (IS_ENABLED(CONFIG_32BIT)) { 475 shmem |= ((gpa_t)saddr_high << 32); 476 } else { 477 ret = SBI_ERR_INVALID_ADDRESS; 478 goto out; 479 } 480 } 481 482 einfo = kzalloc(shmem_size, GFP_KERNEL); 483 if (!einfo) 484 return -ENOMEM; 485 486 ret = kvm_vcpu_read_guest(vcpu, shmem, einfo, shmem_size); 487 if (ret) { 488 ret = SBI_ERR_FAILURE; 489 goto free_mem; 490 } 491 492 for (int i = 0; i < num_events; i++) { 493 eidx = einfo[i].event_idx; 494 etype = kvm_pmu_get_perf_event_type(eidx); 495 econfig = kvm_pmu_get_perf_event_config(eidx, einfo[i].event_data); 496 ret = riscv_pmu_get_event_info(etype, econfig, NULL); 497 einfo[i].output = (ret > 0) ? 1 : 0; 498 } 499 500 ret = kvm_vcpu_write_guest(vcpu, shmem, einfo, shmem_size); 501 if (ret) 502 ret = SBI_ERR_INVALID_ADDRESS; 503 504 free_mem: 505 kfree(einfo); 506 out: 507 retdata->err_val = ret; 508 509 return 0; 510 } 511 512 int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu, 513 struct kvm_vcpu_sbi_return *retdata) 514 { 515 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 516 517 retdata->out_val = kvm_pmu_num_counters(kvpmu); 518 519 return 0; 520 } 521 522 int kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu *vcpu, unsigned long cidx, 523 struct kvm_vcpu_sbi_return *retdata) 524 { 525 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 526 527 if (cidx >= RISCV_KVM_MAX_COUNTERS || cidx == 1) { 528 retdata->err_val = SBI_ERR_INVALID_PARAM; 529 return 0; 530 } 531 532 cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS); 533 retdata->out_val = kvpmu->pmc[cidx].cinfo.value; 534 535 return 0; 536 } 537 538 int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base, 539 unsigned long ctr_mask, unsigned long flags, u64 ival, 540 struct kvm_vcpu_sbi_return *retdata) 541 { 542 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 543 int i, pmc_index, sbiret = 0; 544 struct kvm_pmc *pmc; 545 int fevent_code; 546 bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT; 547 548 if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { 549 sbiret = SBI_ERR_INVALID_PARAM; 550 goto out; 551 } 552 553 if (snap_flag_set) { 554 if (kvpmu->snapshot_addr == INVALID_GPA) { 555 sbiret = SBI_ERR_NO_SHMEM; 556 goto out; 557 } 558 if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, 559 sizeof(struct riscv_pmu_snapshot_data))) { 560 pr_warn("Unable to read snapshot shared memory while starting counters\n"); 561 sbiret = SBI_ERR_FAILURE; 562 goto out; 563 } 564 } 565 /* Start the counters that have been configured and requested by the guest */ 566 for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { 567 pmc_index = array_index_nospec(i + ctr_base, 568 RISCV_KVM_MAX_COUNTERS); 569 if (!test_bit(pmc_index, kvpmu->pmc_in_use)) 570 continue; 571 /* The guest started the counter again. Reset the overflow status */ 572 clear_bit(pmc_index, kvpmu->pmc_overflown); 573 pmc = &kvpmu->pmc[pmc_index]; 574 if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) { 575 pmc->counter_val = ival; 576 } else if (snap_flag_set) { 577 /* The counter index in the snapshot are relative to the counter base */ 578 pmc->counter_val = kvpmu->sdata->ctr_values[i]; 579 } 580 581 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { 582 fevent_code = get_event_code(pmc->event_idx); 583 if (fevent_code >= SBI_PMU_FW_MAX) { 584 sbiret = SBI_ERR_INVALID_PARAM; 585 goto out; 586 } 587 588 /* Check if the counter was already started for some reason */ 589 if (kvpmu->fw_event[fevent_code].started) { 590 sbiret = SBI_ERR_ALREADY_STARTED; 591 continue; 592 } 593 594 kvpmu->fw_event[fevent_code].started = true; 595 kvpmu->fw_event[fevent_code].value = pmc->counter_val; 596 } else if (pmc->perf_event) { 597 if (unlikely(pmc->started)) { 598 sbiret = SBI_ERR_ALREADY_STARTED; 599 continue; 600 } 601 perf_event_period(pmc->perf_event, kvm_pmu_get_sample_period(pmc)); 602 perf_event_enable(pmc->perf_event); 603 pmc->started = true; 604 } else { 605 sbiret = SBI_ERR_INVALID_PARAM; 606 } 607 } 608 609 out: 610 retdata->err_val = sbiret; 611 612 return 0; 613 } 614 615 int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, 616 unsigned long ctr_mask, unsigned long flags, 617 struct kvm_vcpu_sbi_return *retdata) 618 { 619 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 620 int i, pmc_index, sbiret = 0; 621 u64 enabled, running; 622 struct kvm_pmc *pmc; 623 int fevent_code; 624 bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT; 625 bool shmem_needs_update = false; 626 627 if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { 628 sbiret = SBI_ERR_INVALID_PARAM; 629 goto out; 630 } 631 632 if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) { 633 sbiret = SBI_ERR_NO_SHMEM; 634 goto out; 635 } 636 637 /* Stop the counters that have been configured and requested by the guest */ 638 for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { 639 pmc_index = array_index_nospec(i + ctr_base, 640 RISCV_KVM_MAX_COUNTERS); 641 if (!test_bit(pmc_index, kvpmu->pmc_in_use)) 642 continue; 643 pmc = &kvpmu->pmc[pmc_index]; 644 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { 645 fevent_code = get_event_code(pmc->event_idx); 646 if (fevent_code >= SBI_PMU_FW_MAX) { 647 sbiret = SBI_ERR_INVALID_PARAM; 648 goto out; 649 } 650 651 if (!kvpmu->fw_event[fevent_code].started) 652 sbiret = SBI_ERR_ALREADY_STOPPED; 653 654 kvpmu->fw_event[fevent_code].started = false; 655 } else if (pmc->perf_event) { 656 if (pmc->started) { 657 /* Stop counting the counter */ 658 perf_event_disable(pmc->perf_event); 659 pmc->started = false; 660 } else { 661 sbiret = SBI_ERR_ALREADY_STOPPED; 662 } 663 664 if (flags & SBI_PMU_STOP_FLAG_RESET) 665 /* Release the counter if this is a reset request */ 666 kvm_pmu_release_perf_event(pmc); 667 } else { 668 sbiret = SBI_ERR_INVALID_PARAM; 669 } 670 671 if (snap_flag_set && !sbiret) { 672 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) 673 pmc->counter_val = kvpmu->fw_event[fevent_code].value; 674 else if (pmc->perf_event) 675 pmc->counter_val += perf_event_read_value(pmc->perf_event, 676 &enabled, &running); 677 /* 678 * The counter and overflow indicies in the snapshot region are w.r.to 679 * cbase. Modify the set bit in the counter mask instead of the pmc_index 680 * which indicates the absolute counter index. 681 */ 682 if (test_bit(pmc_index, kvpmu->pmc_overflown)) 683 kvpmu->sdata->ctr_overflow_mask |= BIT(i); 684 kvpmu->sdata->ctr_values[i] = pmc->counter_val; 685 shmem_needs_update = true; 686 } 687 688 if (flags & SBI_PMU_STOP_FLAG_RESET) { 689 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; 690 clear_bit(pmc_index, kvpmu->pmc_in_use); 691 clear_bit(pmc_index, kvpmu->pmc_overflown); 692 if (snap_flag_set) { 693 /* 694 * Only clear the given counter as the caller is responsible to 695 * validate both the overflow mask and configured counters. 696 */ 697 kvpmu->sdata->ctr_overflow_mask &= ~BIT(i); 698 shmem_needs_update = true; 699 } 700 } 701 } 702 703 if (shmem_needs_update) 704 kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, 705 sizeof(struct riscv_pmu_snapshot_data)); 706 707 out: 708 retdata->err_val = sbiret; 709 710 return 0; 711 } 712 713 int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_base, 714 unsigned long ctr_mask, unsigned long flags, 715 unsigned long eidx, u64 evtdata, 716 struct kvm_vcpu_sbi_return *retdata) 717 { 718 int ctr_idx, sbiret = 0; 719 long ret; 720 bool is_fevent; 721 unsigned long event_code; 722 u32 etype = kvm_pmu_get_perf_event_type(eidx); 723 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 724 struct kvm_pmc *pmc = NULL; 725 struct perf_event_attr attr = { 726 .type = etype, 727 .size = sizeof(struct perf_event_attr), 728 .pinned = true, 729 .disabled = true, 730 /* 731 * It should never reach here if the platform doesn't support the sscofpmf 732 * extension as mode filtering won't work without it. 733 */ 734 .exclude_host = true, 735 .exclude_hv = true, 736 .exclude_user = !!(flags & SBI_PMU_CFG_FLAG_SET_UINH), 737 .exclude_kernel = !!(flags & SBI_PMU_CFG_FLAG_SET_SINH), 738 .config1 = RISCV_PMU_CONFIG1_GUEST_EVENTS, 739 }; 740 741 if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { 742 sbiret = SBI_ERR_INVALID_PARAM; 743 goto out; 744 } 745 746 event_code = get_event_code(eidx); 747 is_fevent = kvm_pmu_is_fw_event(eidx); 748 if (is_fevent && event_code >= SBI_PMU_FW_MAX) { 749 sbiret = SBI_ERR_NOT_SUPPORTED; 750 goto out; 751 } 752 753 /* 754 * SKIP_MATCH flag indicates the caller is aware of the assigned counter 755 * for this event. Just do a sanity check if it already marked used. 756 */ 757 if (flags & SBI_PMU_CFG_FLAG_SKIP_MATCH) { 758 if (!test_bit(ctr_base + __ffs(ctr_mask), kvpmu->pmc_in_use)) { 759 sbiret = SBI_ERR_FAILURE; 760 goto out; 761 } 762 ctr_idx = ctr_base + __ffs(ctr_mask); 763 } else { 764 ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask); 765 if (ctr_idx < 0) { 766 sbiret = SBI_ERR_NOT_SUPPORTED; 767 goto out; 768 } 769 } 770 771 ctr_idx = array_index_nospec(ctr_idx, RISCV_KVM_MAX_COUNTERS); 772 pmc = &kvpmu->pmc[ctr_idx]; 773 pmc->idx = ctr_idx; 774 775 if (is_fevent) { 776 if (flags & SBI_PMU_CFG_FLAG_AUTO_START) 777 kvpmu->fw_event[event_code].started = true; 778 } else { 779 ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata); 780 if (ret) { 781 sbiret = SBI_ERR_NOT_SUPPORTED; 782 goto out; 783 } 784 } 785 786 set_bit(ctr_idx, kvpmu->pmc_in_use); 787 pmc->event_idx = eidx; 788 retdata->out_val = ctr_idx; 789 out: 790 retdata->err_val = sbiret; 791 792 return 0; 793 } 794 795 int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, 796 struct kvm_vcpu_sbi_return *retdata) 797 { 798 int ret; 799 800 ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val); 801 if (ret == -EINVAL) 802 retdata->err_val = SBI_ERR_INVALID_PARAM; 803 804 return 0; 805 } 806 807 int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, 808 struct kvm_vcpu_sbi_return *retdata) 809 { 810 int ret; 811 812 ret = pmu_ctr_read(vcpu, cidx, &retdata->out_val); 813 if (ret == -EINVAL) 814 retdata->err_val = SBI_ERR_INVALID_PARAM; 815 816 return 0; 817 } 818 819 void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) 820 { 821 int i = 0, ret, num_hw_ctrs = 0, hpm_width = 0; 822 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 823 struct kvm_pmc *pmc; 824 825 /* 826 * PMU functionality should be only available to guests if privilege mode 827 * filtering is available in the host. Otherwise, guest will always count 828 * events while the execution is in hypervisor mode. 829 */ 830 if (!riscv_isa_extension_available(NULL, SSCOFPMF)) 831 return; 832 833 ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs); 834 if (ret < 0 || !hpm_width || !num_hw_ctrs) 835 return; 836 837 /* 838 * Increase the number of hardware counters to offset the time counter. 839 */ 840 kvpmu->num_hw_ctrs = num_hw_ctrs + 1; 841 kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX; 842 memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); 843 kvpmu->snapshot_addr = INVALID_GPA; 844 845 if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) { 846 pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA"); 847 kvpmu->num_hw_ctrs = RISCV_KVM_MAX_HW_CTRS; 848 } 849 850 /* 851 * There is no correlation between the logical hardware counter and virtual counters. 852 * However, we need to encode a hpmcounter CSR in the counter info field so that 853 * KVM can trap n emulate the read. This works well in the migration use case as 854 * KVM doesn't care if the actual hpmcounter is available in the hardware or not. 855 */ 856 for (i = 0; i < kvm_pmu_num_counters(kvpmu); i++) { 857 /* TIME CSR shouldn't be read from perf interface */ 858 if (i == 1) 859 continue; 860 pmc = &kvpmu->pmc[i]; 861 pmc->idx = i; 862 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; 863 pmc->vcpu = vcpu; 864 if (i < kvpmu->num_hw_ctrs) { 865 pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW; 866 if (i < 3) 867 /* CY, IR counters */ 868 pmc->cinfo.width = 63; 869 else 870 pmc->cinfo.width = hpm_width; 871 /* 872 * The CSR number doesn't have any relation with the logical 873 * hardware counters. The CSR numbers are encoded sequentially 874 * to avoid maintaining a map between the virtual counter 875 * and CSR number. 876 */ 877 pmc->cinfo.csr = CSR_CYCLE + i; 878 } else { 879 pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW; 880 pmc->cinfo.width = 63; 881 } 882 } 883 884 kvpmu->init_done = true; 885 } 886 887 void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu) 888 { 889 struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); 890 struct kvm_pmc *pmc; 891 int i; 892 893 if (!kvpmu) 894 return; 895 896 for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) { 897 pmc = &kvpmu->pmc[i]; 898 pmc->counter_val = 0; 899 kvm_pmu_release_perf_event(pmc); 900 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; 901 } 902 bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS); 903 bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS); 904 memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); 905 kvm_pmu_clear_snapshot_area(vcpu); 906 } 907 908 void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu) 909 { 910 kvm_riscv_vcpu_pmu_deinit(vcpu); 911 } 912