1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * RISC-V performance counter support. 4 * 5 * Copyright (C) 2021 Western Digital Corporation or its affiliates. 6 * 7 * This code is based on ARM perf event code which is in turn based on 8 * sparc64 and x86 code. 9 */ 10 11 #define pr_fmt(fmt) "riscv-pmu-sbi: " fmt 12 13 #include <linux/mod_devicetable.h> 14 #include <linux/perf/riscv_pmu.h> 15 #include <linux/platform_device.h> 16 #include <linux/irq.h> 17 #include <linux/irqdomain.h> 18 #include <linux/of_irq.h> 19 #include <linux/of.h> 20 #include <linux/cpu_pm.h> 21 #include <linux/sched/clock.h> 22 #include <linux/soc/andes/irq.h> 23 #include <linux/workqueue.h> 24 25 #include <asm/errata_list.h> 26 #include <asm/sbi.h> 27 #include <asm/cpufeature.h> 28 29 #define ALT_SBI_PMU_OVERFLOW(__ovl) \ 30 asm volatile(ALTERNATIVE_2( \ 31 "csrr %0, " __stringify(CSR_SCOUNTOVF), \ 32 "csrr %0, " __stringify(THEAD_C9XX_CSR_SCOUNTEROF), \ 33 THEAD_VENDOR_ID, ERRATA_THEAD_PMU, \ 34 CONFIG_ERRATA_THEAD_PMU, \ 35 "csrr %0, " __stringify(ANDES_CSR_SCOUNTEROF), \ 36 0, RISCV_ISA_EXT_XANDESPMU, \ 37 CONFIG_ANDES_CUSTOM_PMU) \ 38 : "=r" (__ovl) : \ 39 : "memory") 40 41 #define ALT_SBI_PMU_OVF_CLEAR_PENDING(__irq_mask) \ 42 asm volatile(ALTERNATIVE( \ 43 "csrc " __stringify(CSR_IP) ", %0\n\t", \ 44 "csrc " __stringify(ANDES_CSR_SLIP) ", %0\n\t", \ 45 0, RISCV_ISA_EXT_XANDESPMU, \ 46 CONFIG_ANDES_CUSTOM_PMU) \ 47 : : "r"(__irq_mask) \ 48 : "memory") 49 50 #define SYSCTL_NO_USER_ACCESS 0 51 #define SYSCTL_USER_ACCESS 1 52 #define SYSCTL_LEGACY 2 53 54 #define PERF_EVENT_FLAG_NO_USER_ACCESS BIT(SYSCTL_NO_USER_ACCESS) 55 #define PERF_EVENT_FLAG_USER_ACCESS BIT(SYSCTL_USER_ACCESS) 56 #define PERF_EVENT_FLAG_LEGACY BIT(SYSCTL_LEGACY) 57 58 PMU_FORMAT_ATTR(event, "config:0-47"); 59 PMU_FORMAT_ATTR(firmware, "config:63"); 60 61 static bool sbi_v2_available; 62 static DEFINE_STATIC_KEY_FALSE(sbi_pmu_snapshot_available); 63 #define sbi_pmu_snapshot_available() \ 64 static_branch_unlikely(&sbi_pmu_snapshot_available) 65 66 static struct attribute *riscv_arch_formats_attr[] = { 67 &format_attr_event.attr, 68 &format_attr_firmware.attr, 69 NULL, 70 }; 71 72 static struct attribute_group riscv_pmu_format_group = { 73 .name = "format", 74 .attrs = riscv_arch_formats_attr, 75 }; 76 77 static const struct attribute_group *riscv_pmu_attr_groups[] = { 78 &riscv_pmu_format_group, 79 NULL, 80 }; 81 82 /* Allow user mode access by default */ 83 static int sysctl_perf_user_access __read_mostly = SYSCTL_USER_ACCESS; 84 85 /* 86 * RISC-V doesn't have heterogeneous harts yet. This need to be part of 87 * per_cpu in case of harts with different pmu counters 88 */ 89 static union sbi_pmu_ctr_info *pmu_ctr_list; 90 static bool riscv_pmu_use_irq; 91 static unsigned int riscv_pmu_irq_num; 92 static unsigned int riscv_pmu_irq_mask; 93 static unsigned int riscv_pmu_irq; 94 95 /* Cache the available counters in a bitmask */ 96 static unsigned long cmask; 97 98 struct sbi_pmu_event_data { 99 union { 100 union { 101 struct hw_gen_event { 102 uint32_t event_code:16; 103 uint32_t event_type:4; 104 uint32_t reserved:12; 105 } hw_gen_event; 106 struct hw_cache_event { 107 uint32_t result_id:1; 108 uint32_t op_id:2; 109 uint32_t cache_id:13; 110 uint32_t event_type:4; 111 uint32_t reserved:12; 112 } hw_cache_event; 113 }; 114 uint32_t event_idx; 115 }; 116 }; 117 118 static struct sbi_pmu_event_data pmu_hw_event_map[] = { 119 [PERF_COUNT_HW_CPU_CYCLES] = {.hw_gen_event = { 120 SBI_PMU_HW_CPU_CYCLES, 121 SBI_PMU_EVENT_TYPE_HW, 0}}, 122 [PERF_COUNT_HW_INSTRUCTIONS] = {.hw_gen_event = { 123 SBI_PMU_HW_INSTRUCTIONS, 124 SBI_PMU_EVENT_TYPE_HW, 0}}, 125 [PERF_COUNT_HW_CACHE_REFERENCES] = {.hw_gen_event = { 126 SBI_PMU_HW_CACHE_REFERENCES, 127 SBI_PMU_EVENT_TYPE_HW, 0}}, 128 [PERF_COUNT_HW_CACHE_MISSES] = {.hw_gen_event = { 129 SBI_PMU_HW_CACHE_MISSES, 130 SBI_PMU_EVENT_TYPE_HW, 0}}, 131 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = {.hw_gen_event = { 132 SBI_PMU_HW_BRANCH_INSTRUCTIONS, 133 SBI_PMU_EVENT_TYPE_HW, 0}}, 134 [PERF_COUNT_HW_BRANCH_MISSES] = {.hw_gen_event = { 135 SBI_PMU_HW_BRANCH_MISSES, 136 SBI_PMU_EVENT_TYPE_HW, 0}}, 137 [PERF_COUNT_HW_BUS_CYCLES] = {.hw_gen_event = { 138 SBI_PMU_HW_BUS_CYCLES, 139 SBI_PMU_EVENT_TYPE_HW, 0}}, 140 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = {.hw_gen_event = { 141 SBI_PMU_HW_STALLED_CYCLES_FRONTEND, 142 SBI_PMU_EVENT_TYPE_HW, 0}}, 143 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = {.hw_gen_event = { 144 SBI_PMU_HW_STALLED_CYCLES_BACKEND, 145 SBI_PMU_EVENT_TYPE_HW, 0}}, 146 [PERF_COUNT_HW_REF_CPU_CYCLES] = {.hw_gen_event = { 147 SBI_PMU_HW_REF_CPU_CYCLES, 148 SBI_PMU_EVENT_TYPE_HW, 0}}, 149 }; 150 151 #define C(x) PERF_COUNT_HW_CACHE_##x 152 static struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX] 153 [PERF_COUNT_HW_CACHE_OP_MAX] 154 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 155 [C(L1D)] = { 156 [C(OP_READ)] = { 157 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), 158 C(OP_READ), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 159 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), 160 C(OP_READ), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 161 }, 162 [C(OP_WRITE)] = { 163 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), 164 C(OP_WRITE), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 165 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), 166 C(OP_WRITE), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 167 }, 168 [C(OP_PREFETCH)] = { 169 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), 170 C(OP_PREFETCH), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 171 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), 172 C(OP_PREFETCH), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 173 }, 174 }, 175 [C(L1I)] = { 176 [C(OP_READ)] = { 177 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), 178 C(OP_READ), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 179 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), C(OP_READ), 180 C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 181 }, 182 [C(OP_WRITE)] = { 183 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), 184 C(OP_WRITE), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 185 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), 186 C(OP_WRITE), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 187 }, 188 [C(OP_PREFETCH)] = { 189 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), 190 C(OP_PREFETCH), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 191 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), 192 C(OP_PREFETCH), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 193 }, 194 }, 195 [C(LL)] = { 196 [C(OP_READ)] = { 197 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), 198 C(OP_READ), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 199 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), 200 C(OP_READ), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 201 }, 202 [C(OP_WRITE)] = { 203 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), 204 C(OP_WRITE), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 205 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), 206 C(OP_WRITE), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 207 }, 208 [C(OP_PREFETCH)] = { 209 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), 210 C(OP_PREFETCH), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 211 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), 212 C(OP_PREFETCH), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 213 }, 214 }, 215 [C(DTLB)] = { 216 [C(OP_READ)] = { 217 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), 218 C(OP_READ), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 219 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), 220 C(OP_READ), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 221 }, 222 [C(OP_WRITE)] = { 223 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), 224 C(OP_WRITE), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 225 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), 226 C(OP_WRITE), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 227 }, 228 [C(OP_PREFETCH)] = { 229 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), 230 C(OP_PREFETCH), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 231 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), 232 C(OP_PREFETCH), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 233 }, 234 }, 235 [C(ITLB)] = { 236 [C(OP_READ)] = { 237 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), 238 C(OP_READ), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 239 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), 240 C(OP_READ), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 241 }, 242 [C(OP_WRITE)] = { 243 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), 244 C(OP_WRITE), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 245 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), 246 C(OP_WRITE), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 247 }, 248 [C(OP_PREFETCH)] = { 249 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), 250 C(OP_PREFETCH), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 251 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), 252 C(OP_PREFETCH), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 253 }, 254 }, 255 [C(BPU)] = { 256 [C(OP_READ)] = { 257 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), 258 C(OP_READ), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 259 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), 260 C(OP_READ), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 261 }, 262 [C(OP_WRITE)] = { 263 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), 264 C(OP_WRITE), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 265 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), 266 C(OP_WRITE), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 267 }, 268 [C(OP_PREFETCH)] = { 269 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), 270 C(OP_PREFETCH), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 271 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), 272 C(OP_PREFETCH), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 273 }, 274 }, 275 [C(NODE)] = { 276 [C(OP_READ)] = { 277 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), 278 C(OP_READ), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 279 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), 280 C(OP_READ), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 281 }, 282 [C(OP_WRITE)] = { 283 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), 284 C(OP_WRITE), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 285 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), 286 C(OP_WRITE), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 287 }, 288 [C(OP_PREFETCH)] = { 289 [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), 290 C(OP_PREFETCH), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 291 [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), 292 C(OP_PREFETCH), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, 293 }, 294 }, 295 }; 296 297 static void pmu_sbi_check_event(struct sbi_pmu_event_data *edata) 298 { 299 struct sbiret ret; 300 301 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, 302 0, cmask, 0, edata->event_idx, 0, 0); 303 if (!ret.error) { 304 sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, 305 ret.value, 0x1, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0); 306 } else if (ret.error == SBI_ERR_NOT_SUPPORTED) { 307 /* This event cannot be monitored by any counter */ 308 edata->event_idx = -EINVAL; 309 } 310 } 311 312 static void pmu_sbi_check_std_events(struct work_struct *work) 313 { 314 for (int i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++) 315 pmu_sbi_check_event(&pmu_hw_event_map[i]); 316 317 for (int i = 0; i < ARRAY_SIZE(pmu_cache_event_map); i++) 318 for (int j = 0; j < ARRAY_SIZE(pmu_cache_event_map[i]); j++) 319 for (int k = 0; k < ARRAY_SIZE(pmu_cache_event_map[i][j]); k++) 320 pmu_sbi_check_event(&pmu_cache_event_map[i][j][k]); 321 } 322 323 static DECLARE_WORK(check_std_events_work, pmu_sbi_check_std_events); 324 325 static int pmu_sbi_ctr_get_width(int idx) 326 { 327 return pmu_ctr_list[idx].width; 328 } 329 330 static bool pmu_sbi_ctr_is_fw(int cidx) 331 { 332 union sbi_pmu_ctr_info *info; 333 334 info = &pmu_ctr_list[cidx]; 335 if (!info) 336 return false; 337 338 return (info->type == SBI_PMU_CTR_TYPE_FW) ? true : false; 339 } 340 341 /* 342 * Returns the counter width of a programmable counter and number of hardware 343 * counters. As we don't support heterogeneous CPUs yet, it is okay to just 344 * return the counter width of the first programmable counter. 345 */ 346 int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr) 347 { 348 int i; 349 union sbi_pmu_ctr_info *info; 350 u32 hpm_width = 0, hpm_count = 0; 351 352 if (!cmask) 353 return -EINVAL; 354 355 for_each_set_bit(i, &cmask, RISCV_MAX_COUNTERS) { 356 info = &pmu_ctr_list[i]; 357 if (!info) 358 continue; 359 if (!hpm_width && info->csr != CSR_CYCLE && info->csr != CSR_INSTRET) 360 hpm_width = info->width; 361 if (info->type == SBI_PMU_CTR_TYPE_HW) 362 hpm_count++; 363 } 364 365 *hw_ctr_width = hpm_width; 366 *num_hw_ctr = hpm_count; 367 368 return 0; 369 } 370 EXPORT_SYMBOL_GPL(riscv_pmu_get_hpm_info); 371 372 static uint8_t pmu_sbi_csr_index(struct perf_event *event) 373 { 374 return pmu_ctr_list[event->hw.idx].csr - CSR_CYCLE; 375 } 376 377 static unsigned long pmu_sbi_get_filter_flags(struct perf_event *event) 378 { 379 unsigned long cflags = 0; 380 bool guest_events = false; 381 382 if (event->attr.config1 & RISCV_PMU_CONFIG1_GUEST_EVENTS) 383 guest_events = true; 384 if (event->attr.exclude_kernel) 385 cflags |= guest_events ? SBI_PMU_CFG_FLAG_SET_VSINH : SBI_PMU_CFG_FLAG_SET_SINH; 386 if (event->attr.exclude_user) 387 cflags |= guest_events ? SBI_PMU_CFG_FLAG_SET_VUINH : SBI_PMU_CFG_FLAG_SET_UINH; 388 if (guest_events && event->attr.exclude_hv) 389 cflags |= SBI_PMU_CFG_FLAG_SET_SINH; 390 if (event->attr.exclude_host) 391 cflags |= SBI_PMU_CFG_FLAG_SET_UINH | SBI_PMU_CFG_FLAG_SET_SINH; 392 if (event->attr.exclude_guest) 393 cflags |= SBI_PMU_CFG_FLAG_SET_VSINH | SBI_PMU_CFG_FLAG_SET_VUINH; 394 395 return cflags; 396 } 397 398 static int pmu_sbi_ctr_get_idx(struct perf_event *event) 399 { 400 struct hw_perf_event *hwc = &event->hw; 401 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 402 struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); 403 struct sbiret ret; 404 int idx; 405 uint64_t cbase = 0, cmask = rvpmu->cmask; 406 unsigned long cflags = 0; 407 408 cflags = pmu_sbi_get_filter_flags(event); 409 410 /* 411 * In legacy mode, we have to force the fixed counters for those events 412 * but not in the user access mode as we want to use the other counters 413 * that support sampling/filtering. 414 */ 415 if (hwc->flags & PERF_EVENT_FLAG_LEGACY) { 416 if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES) { 417 cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH; 418 cmask = 1; 419 } else if (event->attr.config == PERF_COUNT_HW_INSTRUCTIONS) { 420 cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH; 421 cmask = BIT(CSR_INSTRET - CSR_CYCLE); 422 } 423 } 424 425 /* retrieve the available counter index */ 426 #if defined(CONFIG_32BIT) 427 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase, 428 cmask, cflags, hwc->event_base, hwc->config, 429 hwc->config >> 32); 430 #else 431 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase, 432 cmask, cflags, hwc->event_base, hwc->config, 0); 433 #endif 434 if (ret.error) { 435 pr_debug("Not able to find a counter for event %lx config %llx\n", 436 hwc->event_base, hwc->config); 437 return sbi_err_map_linux_errno(ret.error); 438 } 439 440 idx = ret.value; 441 if (!test_bit(idx, &rvpmu->cmask) || !pmu_ctr_list[idx].value) 442 return -ENOENT; 443 444 /* Additional sanity check for the counter id */ 445 if (pmu_sbi_ctr_is_fw(idx)) { 446 if (!test_and_set_bit(idx, cpuc->used_fw_ctrs)) 447 return idx; 448 } else { 449 if (!test_and_set_bit(idx, cpuc->used_hw_ctrs)) 450 return idx; 451 } 452 453 return -ENOENT; 454 } 455 456 static void pmu_sbi_ctr_clear_idx(struct perf_event *event) 457 { 458 459 struct hw_perf_event *hwc = &event->hw; 460 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 461 struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); 462 int idx = hwc->idx; 463 464 if (pmu_sbi_ctr_is_fw(idx)) 465 clear_bit(idx, cpuc->used_fw_ctrs); 466 else 467 clear_bit(idx, cpuc->used_hw_ctrs); 468 } 469 470 static int pmu_event_find_cache(u64 config) 471 { 472 unsigned int cache_type, cache_op, cache_result, ret; 473 474 cache_type = (config >> 0) & 0xff; 475 if (cache_type >= PERF_COUNT_HW_CACHE_MAX) 476 return -EINVAL; 477 478 cache_op = (config >> 8) & 0xff; 479 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) 480 return -EINVAL; 481 482 cache_result = (config >> 16) & 0xff; 483 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 484 return -EINVAL; 485 486 ret = pmu_cache_event_map[cache_type][cache_op][cache_result].event_idx; 487 488 return ret; 489 } 490 491 static bool pmu_sbi_is_fw_event(struct perf_event *event) 492 { 493 u32 type = event->attr.type; 494 u64 config = event->attr.config; 495 496 if ((type == PERF_TYPE_RAW) && ((config >> 63) == 1)) 497 return true; 498 else 499 return false; 500 } 501 502 static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig) 503 { 504 u32 type = event->attr.type; 505 u64 config = event->attr.config; 506 int bSoftware; 507 u64 raw_config_val; 508 int ret; 509 510 /* 511 * Ensure we are finished checking standard hardware events for 512 * validity before allowing userspace to configure any events. 513 */ 514 flush_work(&check_std_events_work); 515 516 switch (type) { 517 case PERF_TYPE_HARDWARE: 518 if (config >= PERF_COUNT_HW_MAX) 519 return -EINVAL; 520 ret = pmu_hw_event_map[event->attr.config].event_idx; 521 break; 522 case PERF_TYPE_HW_CACHE: 523 ret = pmu_event_find_cache(config); 524 break; 525 case PERF_TYPE_RAW: 526 /* 527 * As per SBI specification, the upper 16 bits must be unused for 528 * a raw event. Use the MSB (63b) to distinguish between hardware 529 * raw event and firmware events. 530 */ 531 bSoftware = config >> 63; 532 raw_config_val = config & RISCV_PMU_RAW_EVENT_MASK; 533 if (bSoftware) { 534 ret = (raw_config_val & 0xFFFF) | 535 (SBI_PMU_EVENT_TYPE_FW << 16); 536 } else { 537 ret = RISCV_PMU_RAW_EVENT_IDX; 538 *econfig = raw_config_val; 539 } 540 break; 541 default: 542 ret = -EINVAL; 543 break; 544 } 545 546 return ret; 547 } 548 549 static void pmu_sbi_snapshot_free(struct riscv_pmu *pmu) 550 { 551 int cpu; 552 553 for_each_possible_cpu(cpu) { 554 struct cpu_hw_events *cpu_hw_evt = per_cpu_ptr(pmu->hw_events, cpu); 555 556 if (!cpu_hw_evt->snapshot_addr) 557 continue; 558 559 free_page((unsigned long)cpu_hw_evt->snapshot_addr); 560 cpu_hw_evt->snapshot_addr = NULL; 561 cpu_hw_evt->snapshot_addr_phys = 0; 562 } 563 } 564 565 static int pmu_sbi_snapshot_alloc(struct riscv_pmu *pmu) 566 { 567 int cpu; 568 struct page *snapshot_page; 569 570 for_each_possible_cpu(cpu) { 571 struct cpu_hw_events *cpu_hw_evt = per_cpu_ptr(pmu->hw_events, cpu); 572 573 snapshot_page = alloc_page(GFP_ATOMIC | __GFP_ZERO); 574 if (!snapshot_page) { 575 pmu_sbi_snapshot_free(pmu); 576 return -ENOMEM; 577 } 578 cpu_hw_evt->snapshot_addr = page_to_virt(snapshot_page); 579 cpu_hw_evt->snapshot_addr_phys = page_to_phys(snapshot_page); 580 } 581 582 return 0; 583 } 584 585 static int pmu_sbi_snapshot_disable(void) 586 { 587 struct sbiret ret; 588 589 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM, SBI_SHMEM_DISABLE, 590 SBI_SHMEM_DISABLE, 0, 0, 0, 0); 591 if (ret.error) { 592 pr_warn("failed to disable snapshot shared memory\n"); 593 return sbi_err_map_linux_errno(ret.error); 594 } 595 596 return 0; 597 } 598 599 static int pmu_sbi_snapshot_setup(struct riscv_pmu *pmu, int cpu) 600 { 601 struct cpu_hw_events *cpu_hw_evt; 602 struct sbiret ret = {0}; 603 604 cpu_hw_evt = per_cpu_ptr(pmu->hw_events, cpu); 605 if (!cpu_hw_evt->snapshot_addr_phys) 606 return -EINVAL; 607 608 if (cpu_hw_evt->snapshot_set_done) 609 return 0; 610 611 if (IS_ENABLED(CONFIG_32BIT)) 612 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM, 613 cpu_hw_evt->snapshot_addr_phys, 614 (u64)(cpu_hw_evt->snapshot_addr_phys) >> 32, 0, 0, 0, 0); 615 else 616 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM, 617 cpu_hw_evt->snapshot_addr_phys, 0, 0, 0, 0, 0); 618 619 /* Free up the snapshot area memory and fall back to SBI PMU calls without snapshot */ 620 if (ret.error) { 621 if (ret.error != SBI_ERR_NOT_SUPPORTED) 622 pr_warn("pmu snapshot setup failed with error %ld\n", ret.error); 623 return sbi_err_map_linux_errno(ret.error); 624 } 625 626 memset(cpu_hw_evt->snapshot_cval_shcopy, 0, sizeof(u64) * RISCV_MAX_COUNTERS); 627 cpu_hw_evt->snapshot_set_done = true; 628 629 return 0; 630 } 631 632 static u64 pmu_sbi_ctr_read(struct perf_event *event) 633 { 634 struct hw_perf_event *hwc = &event->hw; 635 int idx = hwc->idx; 636 struct sbiret ret; 637 u64 val = 0; 638 struct riscv_pmu *pmu = to_riscv_pmu(event->pmu); 639 struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); 640 struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr; 641 union sbi_pmu_ctr_info info = pmu_ctr_list[idx]; 642 643 /* Read the value from the shared memory directly only if counter is stopped */ 644 if (sbi_pmu_snapshot_available() && (hwc->state & PERF_HES_STOPPED)) { 645 val = sdata->ctr_values[idx]; 646 return val; 647 } 648 649 if (pmu_sbi_is_fw_event(event)) { 650 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ, 651 hwc->idx, 0, 0, 0, 0, 0); 652 if (ret.error) 653 return 0; 654 655 val = ret.value; 656 if (IS_ENABLED(CONFIG_32BIT) && sbi_v2_available && info.width >= 32) { 657 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ_HI, 658 hwc->idx, 0, 0, 0, 0, 0); 659 if (!ret.error) 660 val |= ((u64)ret.value << 32); 661 else 662 WARN_ONCE(1, "Unable to read upper 32 bits of firmware counter error: %ld\n", 663 ret.error); 664 } 665 } else { 666 val = riscv_pmu_ctr_read_csr(info.csr); 667 if (IS_ENABLED(CONFIG_32BIT)) 668 val |= ((u64)riscv_pmu_ctr_read_csr(info.csr + 0x80)) << 32; 669 } 670 671 return val; 672 } 673 674 static void pmu_sbi_set_scounteren(void *arg) 675 { 676 struct perf_event *event = (struct perf_event *)arg; 677 678 if (event->hw.idx != -1) 679 csr_write(CSR_SCOUNTEREN, 680 csr_read(CSR_SCOUNTEREN) | BIT(pmu_sbi_csr_index(event))); 681 } 682 683 static void pmu_sbi_reset_scounteren(void *arg) 684 { 685 struct perf_event *event = (struct perf_event *)arg; 686 687 if (event->hw.idx != -1) 688 csr_write(CSR_SCOUNTEREN, 689 csr_read(CSR_SCOUNTEREN) & ~BIT(pmu_sbi_csr_index(event))); 690 } 691 692 static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival) 693 { 694 struct sbiret ret; 695 struct hw_perf_event *hwc = &event->hw; 696 unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE; 697 698 /* There is no benefit setting SNAPSHOT FLAG for a single counter */ 699 #if defined(CONFIG_32BIT) 700 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, hwc->idx, 701 1, flag, ival, ival >> 32, 0); 702 #else 703 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, hwc->idx, 704 1, flag, ival, 0, 0); 705 #endif 706 if (ret.error && (ret.error != SBI_ERR_ALREADY_STARTED)) 707 pr_err("Starting counter idx %d failed with error %d\n", 708 hwc->idx, sbi_err_map_linux_errno(ret.error)); 709 710 if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) && 711 (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT)) 712 pmu_sbi_set_scounteren((void *)event); 713 } 714 715 static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag) 716 { 717 struct sbiret ret; 718 struct hw_perf_event *hwc = &event->hw; 719 struct riscv_pmu *pmu = to_riscv_pmu(event->pmu); 720 struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); 721 struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr; 722 723 if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) && 724 (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT)) 725 pmu_sbi_reset_scounteren((void *)event); 726 727 if (sbi_pmu_snapshot_available()) 728 flag |= SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT; 729 730 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0); 731 if (!ret.error && sbi_pmu_snapshot_available()) { 732 /* 733 * The counter snapshot is based on the index base specified by hwc->idx. 734 * The actual counter value is updated in shared memory at index 0 when counter 735 * mask is 0x01. To ensure accurate counter values, it's necessary to transfer 736 * the counter value to shared memory. However, if hwc->idx is zero, the counter 737 * value is already correctly updated in shared memory, requiring no further 738 * adjustment. 739 */ 740 if (hwc->idx > 0) { 741 sdata->ctr_values[hwc->idx] = sdata->ctr_values[0]; 742 sdata->ctr_values[0] = 0; 743 } 744 } else if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) && 745 flag != SBI_PMU_STOP_FLAG_RESET) { 746 pr_err("Stopping counter idx %d failed with error %d\n", 747 hwc->idx, sbi_err_map_linux_errno(ret.error)); 748 } 749 } 750 751 static int pmu_sbi_find_num_ctrs(void) 752 { 753 struct sbiret ret; 754 755 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_NUM_COUNTERS, 0, 0, 0, 0, 0, 0); 756 if (!ret.error) 757 return ret.value; 758 else 759 return sbi_err_map_linux_errno(ret.error); 760 } 761 762 static int pmu_sbi_get_ctrinfo(int nctr, unsigned long *mask) 763 { 764 struct sbiret ret; 765 int i, num_hw_ctr = 0, num_fw_ctr = 0; 766 union sbi_pmu_ctr_info cinfo; 767 768 pmu_ctr_list = kcalloc(nctr, sizeof(*pmu_ctr_list), GFP_KERNEL); 769 if (!pmu_ctr_list) 770 return -ENOMEM; 771 772 for (i = 0; i < nctr; i++) { 773 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i, 0, 0, 0, 0, 0); 774 if (ret.error) 775 /* The logical counter ids are not expected to be contiguous */ 776 continue; 777 778 *mask |= BIT(i); 779 780 cinfo.value = ret.value; 781 if (cinfo.type == SBI_PMU_CTR_TYPE_FW) 782 num_fw_ctr++; 783 else 784 num_hw_ctr++; 785 pmu_ctr_list[i].value = cinfo.value; 786 } 787 788 pr_info("%d firmware and %d hardware counters\n", num_fw_ctr, num_hw_ctr); 789 790 return 0; 791 } 792 793 static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu) 794 { 795 /* 796 * No need to check the error because we are disabling all the counters 797 * which may include counters that are not enabled yet. 798 */ 799 sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, 800 0, pmu->cmask, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0); 801 } 802 803 static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu) 804 { 805 struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); 806 struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr; 807 unsigned long flag = 0; 808 int i, idx; 809 struct sbiret ret; 810 u64 temp_ctr_overflow_mask = 0; 811 812 if (sbi_pmu_snapshot_available()) 813 flag = SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT; 814 815 /* Reset the shadow copy to avoid save/restore any value from previous overflow */ 816 memset(cpu_hw_evt->snapshot_cval_shcopy, 0, sizeof(u64) * RISCV_MAX_COUNTERS); 817 818 for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) { 819 /* No need to check the error here as we can't do anything about the error */ 820 ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, i * BITS_PER_LONG, 821 cpu_hw_evt->used_hw_ctrs[i], flag, 0, 0, 0); 822 if (!ret.error && sbi_pmu_snapshot_available()) { 823 /* Save the counter values to avoid clobbering */ 824 for_each_set_bit(idx, &cpu_hw_evt->used_hw_ctrs[i], BITS_PER_LONG) 825 cpu_hw_evt->snapshot_cval_shcopy[i * BITS_PER_LONG + idx] = 826 sdata->ctr_values[idx]; 827 /* Save the overflow mask to avoid clobbering */ 828 temp_ctr_overflow_mask |= sdata->ctr_overflow_mask << (i * BITS_PER_LONG); 829 } 830 } 831 832 /* Restore the counter values to the shared memory for used hw counters */ 833 if (sbi_pmu_snapshot_available()) { 834 for_each_set_bit(idx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS) 835 sdata->ctr_values[idx] = cpu_hw_evt->snapshot_cval_shcopy[idx]; 836 if (temp_ctr_overflow_mask) 837 sdata->ctr_overflow_mask = temp_ctr_overflow_mask; 838 } 839 } 840 841 /* 842 * This function starts all the used counters in two step approach. 843 * Any counter that did not overflow can be start in a single step 844 * while the overflowed counters need to be started with updated initialization 845 * value. 846 */ 847 static inline void pmu_sbi_start_ovf_ctrs_sbi(struct cpu_hw_events *cpu_hw_evt, 848 u64 ctr_ovf_mask) 849 { 850 int idx = 0, i; 851 struct perf_event *event; 852 unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE; 853 unsigned long ctr_start_mask = 0; 854 uint64_t max_period; 855 struct hw_perf_event *hwc; 856 u64 init_val = 0; 857 858 for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) { 859 ctr_start_mask = cpu_hw_evt->used_hw_ctrs[i] & ~ctr_ovf_mask; 860 /* Start all the counters that did not overflow in a single shot */ 861 sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, i * BITS_PER_LONG, ctr_start_mask, 862 0, 0, 0, 0); 863 } 864 865 /* Reinitialize and start all the counter that overflowed */ 866 while (ctr_ovf_mask) { 867 if (ctr_ovf_mask & 0x01) { 868 event = cpu_hw_evt->events[idx]; 869 hwc = &event->hw; 870 max_period = riscv_pmu_ctr_get_width_mask(event); 871 init_val = local64_read(&hwc->prev_count) & max_period; 872 #if defined(CONFIG_32BIT) 873 sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, 1, 874 flag, init_val, init_val >> 32, 0); 875 #else 876 sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, 1, 877 flag, init_val, 0, 0); 878 #endif 879 perf_event_update_userpage(event); 880 } 881 ctr_ovf_mask = ctr_ovf_mask >> 1; 882 idx++; 883 } 884 } 885 886 static inline void pmu_sbi_start_ovf_ctrs_snapshot(struct cpu_hw_events *cpu_hw_evt, 887 u64 ctr_ovf_mask) 888 { 889 int i, idx = 0; 890 struct perf_event *event; 891 unsigned long flag = SBI_PMU_START_FLAG_INIT_SNAPSHOT; 892 u64 max_period, init_val = 0; 893 struct hw_perf_event *hwc; 894 struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr; 895 896 for_each_set_bit(idx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS) { 897 if (ctr_ovf_mask & BIT(idx)) { 898 event = cpu_hw_evt->events[idx]; 899 hwc = &event->hw; 900 max_period = riscv_pmu_ctr_get_width_mask(event); 901 init_val = local64_read(&hwc->prev_count) & max_period; 902 cpu_hw_evt->snapshot_cval_shcopy[idx] = init_val; 903 } 904 /* 905 * We do not need to update the non-overflow counters the previous 906 * value should have been there already. 907 */ 908 } 909 910 for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) { 911 /* Restore the counter values to relative indices for used hw counters */ 912 for_each_set_bit(idx, &cpu_hw_evt->used_hw_ctrs[i], BITS_PER_LONG) 913 sdata->ctr_values[idx] = 914 cpu_hw_evt->snapshot_cval_shcopy[idx + i * BITS_PER_LONG]; 915 /* Start all the counters in a single shot */ 916 sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx * BITS_PER_LONG, 917 cpu_hw_evt->used_hw_ctrs[i], flag, 0, 0, 0); 918 } 919 } 920 921 static void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu, 922 u64 ctr_ovf_mask) 923 { 924 struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); 925 926 if (sbi_pmu_snapshot_available()) 927 pmu_sbi_start_ovf_ctrs_snapshot(cpu_hw_evt, ctr_ovf_mask); 928 else 929 pmu_sbi_start_ovf_ctrs_sbi(cpu_hw_evt, ctr_ovf_mask); 930 } 931 932 static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) 933 { 934 struct perf_sample_data data; 935 struct pt_regs *regs; 936 struct hw_perf_event *hw_evt; 937 union sbi_pmu_ctr_info *info; 938 int lidx, hidx, fidx; 939 struct riscv_pmu *pmu; 940 struct perf_event *event; 941 u64 overflow; 942 u64 overflowed_ctrs = 0; 943 struct cpu_hw_events *cpu_hw_evt = dev; 944 u64 start_clock = sched_clock(); 945 struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr; 946 947 if (WARN_ON_ONCE(!cpu_hw_evt)) 948 return IRQ_NONE; 949 950 /* Firmware counter don't support overflow yet */ 951 fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS); 952 if (fidx == RISCV_MAX_COUNTERS) { 953 csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num)); 954 return IRQ_NONE; 955 } 956 957 event = cpu_hw_evt->events[fidx]; 958 if (!event) { 959 ALT_SBI_PMU_OVF_CLEAR_PENDING(riscv_pmu_irq_mask); 960 return IRQ_NONE; 961 } 962 963 pmu = to_riscv_pmu(event->pmu); 964 pmu_sbi_stop_hw_ctrs(pmu); 965 966 /* Overflow status register should only be read after counter are stopped */ 967 if (sbi_pmu_snapshot_available()) 968 overflow = sdata->ctr_overflow_mask; 969 else 970 ALT_SBI_PMU_OVERFLOW(overflow); 971 972 /* 973 * Overflow interrupt pending bit should only be cleared after stopping 974 * all the counters to avoid any race condition. 975 */ 976 ALT_SBI_PMU_OVF_CLEAR_PENDING(riscv_pmu_irq_mask); 977 978 /* No overflow bit is set */ 979 if (!overflow) 980 return IRQ_NONE; 981 982 regs = get_irq_regs(); 983 984 for_each_set_bit(lidx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS) { 985 struct perf_event *event = cpu_hw_evt->events[lidx]; 986 987 /* Skip if invalid event or user did not request a sampling */ 988 if (!event || !is_sampling_event(event)) 989 continue; 990 991 info = &pmu_ctr_list[lidx]; 992 /* Do a sanity check */ 993 if (!info || info->type != SBI_PMU_CTR_TYPE_HW) 994 continue; 995 996 if (sbi_pmu_snapshot_available()) 997 /* SBI implementation already updated the logical indicies */ 998 hidx = lidx; 999 else 1000 /* compute hardware counter index */ 1001 hidx = info->csr - CSR_CYCLE; 1002 1003 /* check if the corresponding bit is set in sscountovf or overflow mask in shmem */ 1004 if (!(overflow & BIT(hidx))) 1005 continue; 1006 1007 /* 1008 * Keep a track of overflowed counters so that they can be started 1009 * with updated initial value. 1010 */ 1011 overflowed_ctrs |= BIT(lidx); 1012 hw_evt = &event->hw; 1013 /* Update the event states here so that we know the state while reading */ 1014 hw_evt->state |= PERF_HES_STOPPED; 1015 riscv_pmu_event_update(event); 1016 hw_evt->state |= PERF_HES_UPTODATE; 1017 perf_sample_data_init(&data, 0, hw_evt->last_period); 1018 if (riscv_pmu_event_set_period(event)) { 1019 /* 1020 * Unlike other ISAs, RISC-V don't have to disable interrupts 1021 * to avoid throttling here. As per the specification, the 1022 * interrupt remains disabled until the OF bit is set. 1023 * Interrupts are enabled again only during the start. 1024 * TODO: We will need to stop the guest counters once 1025 * virtualization support is added. 1026 */ 1027 perf_event_overflow(event, &data, regs); 1028 } 1029 /* Reset the state as we are going to start the counter after the loop */ 1030 hw_evt->state = 0; 1031 } 1032 1033 pmu_sbi_start_overflow_mask(pmu, overflowed_ctrs); 1034 perf_sample_event_took(sched_clock() - start_clock); 1035 1036 return IRQ_HANDLED; 1037 } 1038 1039 static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node) 1040 { 1041 struct riscv_pmu *pmu = hlist_entry_safe(node, struct riscv_pmu, node); 1042 struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); 1043 1044 /* 1045 * We keep enabling userspace access to CYCLE, TIME and INSTRET via the 1046 * legacy option but that will be removed in the future. 1047 */ 1048 if (sysctl_perf_user_access == SYSCTL_LEGACY) 1049 csr_write(CSR_SCOUNTEREN, 0x7); 1050 else 1051 csr_write(CSR_SCOUNTEREN, 0x2); 1052 1053 /* Stop all the counters so that they can be enabled from perf */ 1054 pmu_sbi_stop_all(pmu); 1055 1056 if (riscv_pmu_use_irq) { 1057 cpu_hw_evt->irq = riscv_pmu_irq; 1058 ALT_SBI_PMU_OVF_CLEAR_PENDING(riscv_pmu_irq_mask); 1059 enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE); 1060 } 1061 1062 if (sbi_pmu_snapshot_available()) 1063 return pmu_sbi_snapshot_setup(pmu, cpu); 1064 1065 return 0; 1066 } 1067 1068 static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node) 1069 { 1070 if (riscv_pmu_use_irq) { 1071 disable_percpu_irq(riscv_pmu_irq); 1072 } 1073 1074 /* Disable all counters access for user mode now */ 1075 csr_write(CSR_SCOUNTEREN, 0x0); 1076 1077 if (sbi_pmu_snapshot_available()) 1078 return pmu_sbi_snapshot_disable(); 1079 1080 return 0; 1081 } 1082 1083 static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pdev) 1084 { 1085 int ret; 1086 struct cpu_hw_events __percpu *hw_events = pmu->hw_events; 1087 struct irq_domain *domain = NULL; 1088 1089 if (riscv_isa_extension_available(NULL, SSCOFPMF)) { 1090 riscv_pmu_irq_num = RV_IRQ_PMU; 1091 riscv_pmu_use_irq = true; 1092 } else if (IS_ENABLED(CONFIG_ERRATA_THEAD_PMU) && 1093 riscv_cached_mvendorid(0) == THEAD_VENDOR_ID && 1094 riscv_cached_marchid(0) == 0 && 1095 riscv_cached_mimpid(0) == 0) { 1096 riscv_pmu_irq_num = THEAD_C9XX_RV_IRQ_PMU; 1097 riscv_pmu_use_irq = true; 1098 } else if (riscv_isa_extension_available(NULL, XANDESPMU) && 1099 IS_ENABLED(CONFIG_ANDES_CUSTOM_PMU)) { 1100 riscv_pmu_irq_num = ANDES_SLI_CAUSE_BASE + ANDES_RV_IRQ_PMOVI; 1101 riscv_pmu_use_irq = true; 1102 } 1103 1104 riscv_pmu_irq_mask = BIT(riscv_pmu_irq_num % BITS_PER_LONG); 1105 1106 if (!riscv_pmu_use_irq) 1107 return -EOPNOTSUPP; 1108 1109 domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(), 1110 DOMAIN_BUS_ANY); 1111 if (!domain) { 1112 pr_err("Failed to find INTC IRQ root domain\n"); 1113 return -ENODEV; 1114 } 1115 1116 riscv_pmu_irq = irq_create_mapping(domain, riscv_pmu_irq_num); 1117 if (!riscv_pmu_irq) { 1118 pr_err("Failed to map PMU interrupt for node\n"); 1119 return -ENODEV; 1120 } 1121 1122 ret = request_percpu_irq(riscv_pmu_irq, pmu_sbi_ovf_handler, "riscv-pmu", hw_events); 1123 if (ret) { 1124 pr_err("registering percpu irq failed [%d]\n", ret); 1125 return ret; 1126 } 1127 1128 return 0; 1129 } 1130 1131 #ifdef CONFIG_CPU_PM 1132 static int riscv_pm_pmu_notify(struct notifier_block *b, unsigned long cmd, 1133 void *v) 1134 { 1135 struct riscv_pmu *rvpmu = container_of(b, struct riscv_pmu, riscv_pm_nb); 1136 struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); 1137 int enabled = bitmap_weight(cpuc->used_hw_ctrs, RISCV_MAX_COUNTERS); 1138 struct perf_event *event; 1139 int idx; 1140 1141 if (!enabled) 1142 return NOTIFY_OK; 1143 1144 for (idx = 0; idx < RISCV_MAX_COUNTERS; idx++) { 1145 event = cpuc->events[idx]; 1146 if (!event) 1147 continue; 1148 1149 switch (cmd) { 1150 case CPU_PM_ENTER: 1151 /* 1152 * Stop and update the counter 1153 */ 1154 riscv_pmu_stop(event, PERF_EF_UPDATE); 1155 break; 1156 case CPU_PM_EXIT: 1157 case CPU_PM_ENTER_FAILED: 1158 /* 1159 * Restore and enable the counter. 1160 */ 1161 riscv_pmu_start(event, PERF_EF_RELOAD); 1162 break; 1163 default: 1164 break; 1165 } 1166 } 1167 1168 return NOTIFY_OK; 1169 } 1170 1171 static int riscv_pm_pmu_register(struct riscv_pmu *pmu) 1172 { 1173 pmu->riscv_pm_nb.notifier_call = riscv_pm_pmu_notify; 1174 return cpu_pm_register_notifier(&pmu->riscv_pm_nb); 1175 } 1176 1177 static void riscv_pm_pmu_unregister(struct riscv_pmu *pmu) 1178 { 1179 cpu_pm_unregister_notifier(&pmu->riscv_pm_nb); 1180 } 1181 #else 1182 static inline int riscv_pm_pmu_register(struct riscv_pmu *pmu) { return 0; } 1183 static inline void riscv_pm_pmu_unregister(struct riscv_pmu *pmu) { } 1184 #endif 1185 1186 static void riscv_pmu_destroy(struct riscv_pmu *pmu) 1187 { 1188 if (sbi_v2_available) { 1189 if (sbi_pmu_snapshot_available()) { 1190 pmu_sbi_snapshot_disable(); 1191 pmu_sbi_snapshot_free(pmu); 1192 } 1193 } 1194 riscv_pm_pmu_unregister(pmu); 1195 cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node); 1196 } 1197 1198 static void pmu_sbi_event_init(struct perf_event *event) 1199 { 1200 /* 1201 * The permissions are set at event_init so that we do not depend 1202 * on the sysctl value that can change. 1203 */ 1204 if (sysctl_perf_user_access == SYSCTL_NO_USER_ACCESS) 1205 event->hw.flags |= PERF_EVENT_FLAG_NO_USER_ACCESS; 1206 else if (sysctl_perf_user_access == SYSCTL_USER_ACCESS) 1207 event->hw.flags |= PERF_EVENT_FLAG_USER_ACCESS; 1208 else 1209 event->hw.flags |= PERF_EVENT_FLAG_LEGACY; 1210 } 1211 1212 static void pmu_sbi_event_mapped(struct perf_event *event, struct mm_struct *mm) 1213 { 1214 if (event->hw.flags & PERF_EVENT_FLAG_NO_USER_ACCESS) 1215 return; 1216 1217 if (event->hw.flags & PERF_EVENT_FLAG_LEGACY) { 1218 if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES && 1219 event->attr.config != PERF_COUNT_HW_INSTRUCTIONS) { 1220 return; 1221 } 1222 } 1223 1224 /* 1225 * The user mmapped the event to directly access it: this is where 1226 * we determine based on sysctl_perf_user_access if we grant userspace 1227 * the direct access to this event. That means that within the same 1228 * task, some events may be directly accessible and some other may not, 1229 * if the user changes the value of sysctl_perf_user_accesss in the 1230 * meantime. 1231 */ 1232 1233 event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT; 1234 1235 /* 1236 * We must enable userspace access *before* advertising in the user page 1237 * that it is possible to do so to avoid any race. 1238 * And we must notify all cpus here because threads that currently run 1239 * on other cpus will try to directly access the counter too without 1240 * calling pmu_sbi_ctr_start. 1241 */ 1242 if (event->hw.flags & PERF_EVENT_FLAG_USER_ACCESS) 1243 on_each_cpu_mask(mm_cpumask(mm), 1244 pmu_sbi_set_scounteren, (void *)event, 1); 1245 } 1246 1247 static void pmu_sbi_event_unmapped(struct perf_event *event, struct mm_struct *mm) 1248 { 1249 if (event->hw.flags & PERF_EVENT_FLAG_NO_USER_ACCESS) 1250 return; 1251 1252 if (event->hw.flags & PERF_EVENT_FLAG_LEGACY) { 1253 if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES && 1254 event->attr.config != PERF_COUNT_HW_INSTRUCTIONS) { 1255 return; 1256 } 1257 } 1258 1259 /* 1260 * Here we can directly remove user access since the user does not have 1261 * access to the user page anymore so we avoid the racy window where the 1262 * user could have read cap_user_rdpmc to true right before we disable 1263 * it. 1264 */ 1265 event->hw.flags &= ~PERF_EVENT_FLAG_USER_READ_CNT; 1266 1267 if (event->hw.flags & PERF_EVENT_FLAG_USER_ACCESS) 1268 on_each_cpu_mask(mm_cpumask(mm), 1269 pmu_sbi_reset_scounteren, (void *)event, 1); 1270 } 1271 1272 static void riscv_pmu_update_counter_access(void *info) 1273 { 1274 if (sysctl_perf_user_access == SYSCTL_LEGACY) 1275 csr_write(CSR_SCOUNTEREN, 0x7); 1276 else 1277 csr_write(CSR_SCOUNTEREN, 0x2); 1278 } 1279 1280 static int riscv_pmu_proc_user_access_handler(struct ctl_table *table, 1281 int write, void *buffer, 1282 size_t *lenp, loff_t *ppos) 1283 { 1284 int prev = sysctl_perf_user_access; 1285 int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 1286 1287 /* 1288 * Test against the previous value since we clear SCOUNTEREN when 1289 * sysctl_perf_user_access is set to SYSCTL_USER_ACCESS, but we should 1290 * not do that if that was already the case. 1291 */ 1292 if (ret || !write || prev == sysctl_perf_user_access) 1293 return ret; 1294 1295 on_each_cpu(riscv_pmu_update_counter_access, NULL, 1); 1296 1297 return 0; 1298 } 1299 1300 static struct ctl_table sbi_pmu_sysctl_table[] = { 1301 { 1302 .procname = "perf_user_access", 1303 .data = &sysctl_perf_user_access, 1304 .maxlen = sizeof(unsigned int), 1305 .mode = 0644, 1306 .proc_handler = riscv_pmu_proc_user_access_handler, 1307 .extra1 = SYSCTL_ZERO, 1308 .extra2 = SYSCTL_TWO, 1309 }, 1310 }; 1311 1312 static int pmu_sbi_device_probe(struct platform_device *pdev) 1313 { 1314 struct riscv_pmu *pmu = NULL; 1315 int ret = -ENODEV; 1316 int num_counters; 1317 1318 pr_info("SBI PMU extension is available\n"); 1319 pmu = riscv_pmu_alloc(); 1320 if (!pmu) 1321 return -ENOMEM; 1322 1323 num_counters = pmu_sbi_find_num_ctrs(); 1324 if (num_counters < 0) { 1325 pr_err("SBI PMU extension doesn't provide any counters\n"); 1326 goto out_free; 1327 } 1328 1329 /* It is possible to get from SBI more than max number of counters */ 1330 if (num_counters > RISCV_MAX_COUNTERS) { 1331 num_counters = RISCV_MAX_COUNTERS; 1332 pr_info("SBI returned more than maximum number of counters. Limiting the number of counters to %d\n", num_counters); 1333 } 1334 1335 /* cache all the information about counters now */ 1336 if (pmu_sbi_get_ctrinfo(num_counters, &cmask)) 1337 goto out_free; 1338 1339 ret = pmu_sbi_setup_irqs(pmu, pdev); 1340 if (ret < 0) { 1341 pr_info("Perf sampling/filtering is not supported as sscof extension is not available\n"); 1342 pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; 1343 pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE; 1344 } 1345 1346 pmu->pmu.attr_groups = riscv_pmu_attr_groups; 1347 pmu->pmu.parent = &pdev->dev; 1348 pmu->cmask = cmask; 1349 pmu->ctr_start = pmu_sbi_ctr_start; 1350 pmu->ctr_stop = pmu_sbi_ctr_stop; 1351 pmu->event_map = pmu_sbi_event_map; 1352 pmu->ctr_get_idx = pmu_sbi_ctr_get_idx; 1353 pmu->ctr_get_width = pmu_sbi_ctr_get_width; 1354 pmu->ctr_clear_idx = pmu_sbi_ctr_clear_idx; 1355 pmu->ctr_read = pmu_sbi_ctr_read; 1356 pmu->event_init = pmu_sbi_event_init; 1357 pmu->event_mapped = pmu_sbi_event_mapped; 1358 pmu->event_unmapped = pmu_sbi_event_unmapped; 1359 pmu->csr_index = pmu_sbi_csr_index; 1360 1361 ret = riscv_pm_pmu_register(pmu); 1362 if (ret) 1363 goto out_unregister; 1364 1365 ret = perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW); 1366 if (ret) 1367 goto out_unregister; 1368 1369 /* SBI PMU Snapsphot is only available in SBI v2.0 */ 1370 if (sbi_v2_available) { 1371 ret = pmu_sbi_snapshot_alloc(pmu); 1372 if (ret) 1373 goto out_unregister; 1374 1375 ret = pmu_sbi_snapshot_setup(pmu, smp_processor_id()); 1376 if (ret) { 1377 /* Snapshot is an optional feature. Continue if not available */ 1378 pmu_sbi_snapshot_free(pmu); 1379 } else { 1380 pr_info("SBI PMU snapshot detected\n"); 1381 /* 1382 * We enable it once here for the boot cpu. If snapshot shmem setup 1383 * fails during cpu hotplug process, it will fail to start the cpu 1384 * as we can not handle hetergenous PMUs with different snapshot 1385 * capability. 1386 */ 1387 static_branch_enable(&sbi_pmu_snapshot_available); 1388 } 1389 } 1390 1391 register_sysctl("kernel", sbi_pmu_sysctl_table); 1392 1393 ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node); 1394 if (ret) 1395 goto out_unregister; 1396 1397 /* Asynchronously check which standard events are available */ 1398 schedule_work(&check_std_events_work); 1399 1400 return 0; 1401 1402 out_unregister: 1403 riscv_pmu_destroy(pmu); 1404 1405 out_free: 1406 kfree(pmu); 1407 return ret; 1408 } 1409 1410 static struct platform_driver pmu_sbi_driver = { 1411 .probe = pmu_sbi_device_probe, 1412 .driver = { 1413 .name = RISCV_PMU_SBI_PDEV_NAME, 1414 }, 1415 }; 1416 1417 static int __init pmu_sbi_devinit(void) 1418 { 1419 int ret; 1420 struct platform_device *pdev; 1421 1422 if (sbi_spec_version < sbi_mk_version(0, 3) || 1423 !sbi_probe_extension(SBI_EXT_PMU)) { 1424 return 0; 1425 } 1426 1427 if (sbi_spec_version >= sbi_mk_version(2, 0)) 1428 sbi_v2_available = true; 1429 1430 ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_RISCV_STARTING, 1431 "perf/riscv/pmu:starting", 1432 pmu_sbi_starting_cpu, pmu_sbi_dying_cpu); 1433 if (ret) { 1434 pr_err("CPU hotplug notifier could not be registered: %d\n", 1435 ret); 1436 return ret; 1437 } 1438 1439 ret = platform_driver_register(&pmu_sbi_driver); 1440 if (ret) 1441 return ret; 1442 1443 pdev = platform_device_register_simple(RISCV_PMU_SBI_PDEV_NAME, -1, NULL, 0); 1444 if (IS_ERR(pdev)) { 1445 platform_driver_unregister(&pmu_sbi_driver); 1446 return PTR_ERR(pdev); 1447 } 1448 1449 /* Notify legacy implementation that SBI pmu is available*/ 1450 riscv_pmu_legacy_skip_init(); 1451 1452 return ret; 1453 } 1454 device_initcall(pmu_sbi_devinit) 1455