1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * RISC-V performance counter support. 4 * 5 * Copyright (C) 2021 Western Digital Corporation or its affiliates. 6 * 7 * This implementation is based on old RISC-V perf and ARM perf event code 8 * which are in turn based on sparc64 and x86 code. 9 */ 10 11 #include <linux/cpumask.h> 12 #include <linux/irq.h> 13 #include <linux/irqdesc.h> 14 #include <linux/perf/riscv_pmu.h> 15 #include <linux/printk.h> 16 #include <linux/smp.h> 17 #include <linux/sched_clock.h> 18 19 #include <asm/sbi.h> 20 21 static bool riscv_perf_user_access(struct perf_event *event) 22 { 23 return ((event->attr.type == PERF_TYPE_HARDWARE) || 24 (event->attr.type == PERF_TYPE_HW_CACHE) || 25 (event->attr.type == PERF_TYPE_RAW)) && 26 !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT) && 27 (event->hw.idx != -1); 28 } 29 30 void arch_perf_update_userpage(struct perf_event *event, 31 struct perf_event_mmap_page *userpg, u64 now) 32 { 33 struct clock_read_data *rd; 34 unsigned int seq; 35 u64 ns; 36 37 userpg->cap_user_time = 0; 38 userpg->cap_user_time_zero = 0; 39 userpg->cap_user_time_short = 0; 40 userpg->cap_user_rdpmc = riscv_perf_user_access(event); 41 42 /* 43 * The counters are 64-bit but the priv spec doesn't mandate all the 44 * bits to be implemented: that's why, counter width can vary based on 45 * the cpu vendor. 46 */ 47 if (userpg->cap_user_rdpmc) 48 userpg->pmc_width = to_riscv_pmu(event->pmu)->ctr_get_width(event->hw.idx) + 1; 49 50 do { 51 rd = sched_clock_read_begin(&seq); 52 53 userpg->time_mult = rd->mult; 54 userpg->time_shift = rd->shift; 55 userpg->time_zero = rd->epoch_ns; 56 userpg->time_cycles = rd->epoch_cyc; 57 userpg->time_mask = rd->sched_clock_mask; 58 59 /* 60 * Subtract the cycle base, such that software that 61 * doesn't know about cap_user_time_short still 'works' 62 * assuming no wraps. 63 */ 64 ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift); 65 userpg->time_zero -= ns; 66 67 } while (sched_clock_read_retry(seq)); 68 69 userpg->time_offset = userpg->time_zero - now; 70 71 /* 72 * time_shift is not expected to be greater than 31 due to 73 * the original published conversion algorithm shifting a 74 * 32-bit value (now specifies a 64-bit value) - refer 75 * perf_event_mmap_page documentation in perf_event.h. 76 */ 77 if (userpg->time_shift == 32) { 78 userpg->time_shift = 31; 79 userpg->time_mult >>= 1; 80 } 81 82 /* 83 * Internal timekeeping for enabled/running/stopped times 84 * is always computed with the sched_clock. 85 */ 86 userpg->cap_user_time = 1; 87 userpg->cap_user_time_zero = 1; 88 userpg->cap_user_time_short = 1; 89 } 90 91 static unsigned long csr_read_num(int csr_num) 92 { 93 #define switchcase_csr_read(__csr_num, __val) {\ 94 case __csr_num: \ 95 __val = csr_read(__csr_num); \ 96 break; } 97 #define switchcase_csr_read_2(__csr_num, __val) {\ 98 switchcase_csr_read(__csr_num + 0, __val) \ 99 switchcase_csr_read(__csr_num + 1, __val)} 100 #define switchcase_csr_read_4(__csr_num, __val) {\ 101 switchcase_csr_read_2(__csr_num + 0, __val) \ 102 switchcase_csr_read_2(__csr_num + 2, __val)} 103 #define switchcase_csr_read_8(__csr_num, __val) {\ 104 switchcase_csr_read_4(__csr_num + 0, __val) \ 105 switchcase_csr_read_4(__csr_num + 4, __val)} 106 #define switchcase_csr_read_16(__csr_num, __val) {\ 107 switchcase_csr_read_8(__csr_num + 0, __val) \ 108 switchcase_csr_read_8(__csr_num + 8, __val)} 109 #define switchcase_csr_read_32(__csr_num, __val) {\ 110 switchcase_csr_read_16(__csr_num + 0, __val) \ 111 switchcase_csr_read_16(__csr_num + 16, __val)} 112 113 unsigned long ret = 0; 114 115 switch (csr_num) { 116 switchcase_csr_read_32(CSR_CYCLE, ret) 117 switchcase_csr_read_32(CSR_CYCLEH, ret) 118 default : 119 break; 120 } 121 122 return ret; 123 #undef switchcase_csr_read_32 124 #undef switchcase_csr_read_16 125 #undef switchcase_csr_read_8 126 #undef switchcase_csr_read_4 127 #undef switchcase_csr_read_2 128 #undef switchcase_csr_read 129 } 130 131 /* 132 * Read the CSR of a corresponding counter. 133 */ 134 unsigned long riscv_pmu_ctr_read_csr(unsigned long csr) 135 { 136 if (csr < CSR_CYCLE || csr > CSR_HPMCOUNTER31H || 137 (csr > CSR_HPMCOUNTER31 && csr < CSR_CYCLEH)) { 138 pr_err("Invalid performance counter csr %lx\n", csr); 139 return -EINVAL; 140 } 141 142 return csr_read_num(csr); 143 } 144 145 u64 riscv_pmu_ctr_get_width_mask(struct perf_event *event) 146 { 147 int cwidth; 148 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 149 struct hw_perf_event *hwc = &event->hw; 150 151 if (hwc->idx == -1) 152 /* Handle init case where idx is not initialized yet */ 153 cwidth = rvpmu->ctr_get_width(0); 154 else 155 cwidth = rvpmu->ctr_get_width(hwc->idx); 156 157 return GENMASK_ULL(cwidth, 0); 158 } 159 160 u64 riscv_pmu_event_update(struct perf_event *event) 161 { 162 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 163 struct hw_perf_event *hwc = &event->hw; 164 u64 prev_raw_count, new_raw_count; 165 unsigned long cmask; 166 u64 oldval, delta; 167 168 if (!rvpmu->ctr_read || (hwc->state & PERF_HES_UPTODATE)) 169 return 0; 170 171 cmask = riscv_pmu_ctr_get_width_mask(event); 172 173 do { 174 prev_raw_count = local64_read(&hwc->prev_count); 175 new_raw_count = rvpmu->ctr_read(event); 176 oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count, 177 new_raw_count); 178 } while (oldval != prev_raw_count); 179 180 delta = (new_raw_count - prev_raw_count) & cmask; 181 local64_add(delta, &event->count); 182 local64_sub(delta, &hwc->period_left); 183 184 return delta; 185 } 186 187 void riscv_pmu_stop(struct perf_event *event, int flags) 188 { 189 struct hw_perf_event *hwc = &event->hw; 190 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 191 192 if (!(hwc->state & PERF_HES_STOPPED)) { 193 if (rvpmu->ctr_stop) { 194 rvpmu->ctr_stop(event, 0); 195 hwc->state |= PERF_HES_STOPPED; 196 } 197 riscv_pmu_event_update(event); 198 hwc->state |= PERF_HES_UPTODATE; 199 } 200 } 201 202 int riscv_pmu_event_set_period(struct perf_event *event) 203 { 204 struct hw_perf_event *hwc = &event->hw; 205 s64 left = local64_read(&hwc->period_left); 206 s64 period = hwc->sample_period; 207 int overflow = 0; 208 uint64_t max_period = riscv_pmu_ctr_get_width_mask(event); 209 210 if (unlikely(left <= -period)) { 211 left = period; 212 local64_set(&hwc->period_left, left); 213 hwc->last_period = period; 214 overflow = 1; 215 } 216 217 if (unlikely(left <= 0)) { 218 left += period; 219 local64_set(&hwc->period_left, left); 220 hwc->last_period = period; 221 overflow = 1; 222 } 223 224 /* 225 * Limit the maximum period to prevent the counter value 226 * from overtaking the one we are about to program. In 227 * effect we are reducing max_period to account for 228 * interrupt latency (and we are being very conservative). 229 */ 230 if (left > (max_period >> 1)) 231 left = (max_period >> 1); 232 233 local64_set(&hwc->prev_count, (u64)-left); 234 235 perf_event_update_userpage(event); 236 237 return overflow; 238 } 239 240 void riscv_pmu_start(struct perf_event *event, int flags) 241 { 242 struct hw_perf_event *hwc = &event->hw; 243 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 244 uint64_t max_period = riscv_pmu_ctr_get_width_mask(event); 245 u64 init_val; 246 247 if (flags & PERF_EF_RELOAD) 248 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); 249 250 hwc->state = 0; 251 riscv_pmu_event_set_period(event); 252 init_val = local64_read(&hwc->prev_count) & max_period; 253 rvpmu->ctr_start(event, init_val); 254 perf_event_update_userpage(event); 255 } 256 257 static int riscv_pmu_add(struct perf_event *event, int flags) 258 { 259 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 260 struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); 261 struct hw_perf_event *hwc = &event->hw; 262 int idx; 263 264 idx = rvpmu->ctr_get_idx(event); 265 if (idx < 0) 266 return idx; 267 268 hwc->idx = idx; 269 cpuc->events[idx] = event; 270 cpuc->n_events++; 271 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 272 if (flags & PERF_EF_START) 273 riscv_pmu_start(event, PERF_EF_RELOAD); 274 275 /* Propagate our changes to the userspace mapping. */ 276 perf_event_update_userpage(event); 277 278 return 0; 279 } 280 281 static void riscv_pmu_del(struct perf_event *event, int flags) 282 { 283 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 284 struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); 285 struct hw_perf_event *hwc = &event->hw; 286 287 riscv_pmu_stop(event, PERF_EF_UPDATE); 288 cpuc->events[hwc->idx] = NULL; 289 /* The firmware need to reset the counter mapping */ 290 if (rvpmu->ctr_stop) 291 rvpmu->ctr_stop(event, RISCV_PMU_STOP_FLAG_RESET); 292 cpuc->n_events--; 293 if (rvpmu->ctr_clear_idx) 294 rvpmu->ctr_clear_idx(event); 295 perf_event_update_userpage(event); 296 hwc->idx = -1; 297 } 298 299 static void riscv_pmu_read(struct perf_event *event) 300 { 301 riscv_pmu_event_update(event); 302 } 303 304 static int riscv_pmu_event_init(struct perf_event *event) 305 { 306 struct hw_perf_event *hwc = &event->hw; 307 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 308 int mapped_event; 309 u64 event_config = 0; 310 uint64_t cmask; 311 312 /* driver does not support branch stack sampling */ 313 if (has_branch_stack(event)) 314 return -EOPNOTSUPP; 315 316 hwc->flags = 0; 317 mapped_event = rvpmu->event_map(event, &event_config); 318 if (mapped_event < 0) { 319 pr_debug("event %x:%llx not supported\n", event->attr.type, 320 event->attr.config); 321 return mapped_event; 322 } 323 324 /* 325 * idx is set to -1 because the index of a general event should not be 326 * decided until binding to some counter in pmu->add(). 327 * config will contain the information about counter CSR 328 * the idx will contain the counter index 329 */ 330 hwc->config = event_config; 331 hwc->idx = -1; 332 hwc->event_base = mapped_event; 333 334 if (rvpmu->event_init) 335 rvpmu->event_init(event); 336 337 if (!is_sampling_event(event)) { 338 /* 339 * For non-sampling runs, limit the sample_period to half 340 * of the counter width. That way, the new counter value 341 * is far less likely to overtake the previous one unless 342 * you have some serious IRQ latency issues. 343 */ 344 cmask = riscv_pmu_ctr_get_width_mask(event); 345 hwc->sample_period = cmask >> 1; 346 hwc->last_period = hwc->sample_period; 347 local64_set(&hwc->period_left, hwc->sample_period); 348 } 349 350 return 0; 351 } 352 353 static int riscv_pmu_event_idx(struct perf_event *event) 354 { 355 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 356 357 if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT)) 358 return 0; 359 360 if (rvpmu->csr_index) 361 return rvpmu->csr_index(event) + 1; 362 363 return 0; 364 } 365 366 static void riscv_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) 367 { 368 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 369 370 if (rvpmu->event_mapped) { 371 rvpmu->event_mapped(event, mm); 372 perf_event_update_userpage(event); 373 } 374 } 375 376 static void riscv_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm) 377 { 378 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 379 380 if (rvpmu->event_unmapped) { 381 rvpmu->event_unmapped(event, mm); 382 perf_event_update_userpage(event); 383 } 384 } 385 386 struct riscv_pmu *riscv_pmu_alloc(void) 387 { 388 struct riscv_pmu *pmu; 389 int cpuid, i; 390 struct cpu_hw_events *cpuc; 391 392 pmu = kzalloc(sizeof(*pmu), GFP_KERNEL); 393 if (!pmu) 394 goto out; 395 396 pmu->hw_events = alloc_percpu_gfp(struct cpu_hw_events, GFP_KERNEL); 397 if (!pmu->hw_events) { 398 pr_info("failed to allocate per-cpu PMU data.\n"); 399 goto out_free_pmu; 400 } 401 402 for_each_possible_cpu(cpuid) { 403 cpuc = per_cpu_ptr(pmu->hw_events, cpuid); 404 cpuc->n_events = 0; 405 for (i = 0; i < RISCV_MAX_COUNTERS; i++) 406 cpuc->events[i] = NULL; 407 cpuc->snapshot_addr = NULL; 408 } 409 pmu->pmu = (struct pmu) { 410 .event_init = riscv_pmu_event_init, 411 .event_mapped = riscv_pmu_event_mapped, 412 .event_unmapped = riscv_pmu_event_unmapped, 413 .event_idx = riscv_pmu_event_idx, 414 .add = riscv_pmu_add, 415 .del = riscv_pmu_del, 416 .start = riscv_pmu_start, 417 .stop = riscv_pmu_stop, 418 .read = riscv_pmu_read, 419 }; 420 421 return pmu; 422 423 out_free_pmu: 424 kfree(pmu); 425 out: 426 return NULL; 427 } 428