1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * RISC-V performance counter support. 4 * 5 * Copyright (C) 2021 Western Digital Corporation or its affiliates. 6 * 7 * This implementation is based on old RISC-V perf and ARM perf event code 8 * which are in turn based on sparc64 and x86 code. 9 */ 10 11 #include <linux/cpumask.h> 12 #include <linux/irq.h> 13 #include <linux/irqdesc.h> 14 #include <linux/perf/riscv_pmu.h> 15 #include <linux/printk.h> 16 #include <linux/smp.h> 17 #include <linux/sched_clock.h> 18 19 #include <asm/sbi.h> 20 21 static bool riscv_perf_user_access(struct perf_event *event) 22 { 23 return ((event->attr.type == PERF_TYPE_HARDWARE) || 24 (event->attr.type == PERF_TYPE_HW_CACHE) || 25 (event->attr.type == PERF_TYPE_RAW)) && 26 !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT) && 27 (event->hw.idx != -1); 28 } 29 30 void arch_perf_update_userpage(struct perf_event *event, 31 struct perf_event_mmap_page *userpg, u64 now) 32 { 33 struct clock_read_data *rd; 34 unsigned int seq; 35 u64 ns; 36 37 userpg->cap_user_time = 0; 38 userpg->cap_user_time_zero = 0; 39 userpg->cap_user_time_short = 0; 40 userpg->cap_user_rdpmc = riscv_perf_user_access(event); 41 42 #ifdef CONFIG_RISCV_PMU 43 /* 44 * The counters are 64-bit but the priv spec doesn't mandate all the 45 * bits to be implemented: that's why, counter width can vary based on 46 * the cpu vendor. 47 */ 48 if (userpg->cap_user_rdpmc) 49 userpg->pmc_width = to_riscv_pmu(event->pmu)->ctr_get_width(event->hw.idx) + 1; 50 #endif 51 52 do { 53 rd = sched_clock_read_begin(&seq); 54 55 userpg->time_mult = rd->mult; 56 userpg->time_shift = rd->shift; 57 userpg->time_zero = rd->epoch_ns; 58 userpg->time_cycles = rd->epoch_cyc; 59 userpg->time_mask = rd->sched_clock_mask; 60 61 /* 62 * Subtract the cycle base, such that software that 63 * doesn't know about cap_user_time_short still 'works' 64 * assuming no wraps. 65 */ 66 ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift); 67 userpg->time_zero -= ns; 68 69 } while (sched_clock_read_retry(seq)); 70 71 userpg->time_offset = userpg->time_zero - now; 72 73 /* 74 * time_shift is not expected to be greater than 31 due to 75 * the original published conversion algorithm shifting a 76 * 32-bit value (now specifies a 64-bit value) - refer 77 * perf_event_mmap_page documentation in perf_event.h. 78 */ 79 if (userpg->time_shift == 32) { 80 userpg->time_shift = 31; 81 userpg->time_mult >>= 1; 82 } 83 84 /* 85 * Internal timekeeping for enabled/running/stopped times 86 * is always computed with the sched_clock. 87 */ 88 userpg->cap_user_time = 1; 89 userpg->cap_user_time_zero = 1; 90 userpg->cap_user_time_short = 1; 91 } 92 93 static unsigned long csr_read_num(int csr_num) 94 { 95 #define switchcase_csr_read(__csr_num, __val) {\ 96 case __csr_num: \ 97 __val = csr_read(__csr_num); \ 98 break; } 99 #define switchcase_csr_read_2(__csr_num, __val) {\ 100 switchcase_csr_read(__csr_num + 0, __val) \ 101 switchcase_csr_read(__csr_num + 1, __val)} 102 #define switchcase_csr_read_4(__csr_num, __val) {\ 103 switchcase_csr_read_2(__csr_num + 0, __val) \ 104 switchcase_csr_read_2(__csr_num + 2, __val)} 105 #define switchcase_csr_read_8(__csr_num, __val) {\ 106 switchcase_csr_read_4(__csr_num + 0, __val) \ 107 switchcase_csr_read_4(__csr_num + 4, __val)} 108 #define switchcase_csr_read_16(__csr_num, __val) {\ 109 switchcase_csr_read_8(__csr_num + 0, __val) \ 110 switchcase_csr_read_8(__csr_num + 8, __val)} 111 #define switchcase_csr_read_32(__csr_num, __val) {\ 112 switchcase_csr_read_16(__csr_num + 0, __val) \ 113 switchcase_csr_read_16(__csr_num + 16, __val)} 114 115 unsigned long ret = 0; 116 117 switch (csr_num) { 118 switchcase_csr_read_32(CSR_CYCLE, ret) 119 switchcase_csr_read_32(CSR_CYCLEH, ret) 120 default : 121 break; 122 } 123 124 return ret; 125 #undef switchcase_csr_read_32 126 #undef switchcase_csr_read_16 127 #undef switchcase_csr_read_8 128 #undef switchcase_csr_read_4 129 #undef switchcase_csr_read_2 130 #undef switchcase_csr_read 131 } 132 133 /* 134 * Read the CSR of a corresponding counter. 135 */ 136 unsigned long riscv_pmu_ctr_read_csr(unsigned long csr) 137 { 138 if (csr < CSR_CYCLE || csr > CSR_HPMCOUNTER31H || 139 (csr > CSR_HPMCOUNTER31 && csr < CSR_CYCLEH)) { 140 pr_err("Invalid performance counter csr %lx\n", csr); 141 return -EINVAL; 142 } 143 144 return csr_read_num(csr); 145 } 146 147 u64 riscv_pmu_ctr_get_width_mask(struct perf_event *event) 148 { 149 int cwidth; 150 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 151 struct hw_perf_event *hwc = &event->hw; 152 153 if (hwc->idx == -1) 154 /* Handle init case where idx is not initialized yet */ 155 cwidth = rvpmu->ctr_get_width(0); 156 else 157 cwidth = rvpmu->ctr_get_width(hwc->idx); 158 159 return GENMASK_ULL(cwidth, 0); 160 } 161 162 u64 riscv_pmu_event_update(struct perf_event *event) 163 { 164 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 165 struct hw_perf_event *hwc = &event->hw; 166 u64 prev_raw_count, new_raw_count; 167 unsigned long cmask; 168 u64 oldval, delta; 169 170 if (!rvpmu->ctr_read || (hwc->state & PERF_HES_UPTODATE)) 171 return 0; 172 173 cmask = riscv_pmu_ctr_get_width_mask(event); 174 175 do { 176 prev_raw_count = local64_read(&hwc->prev_count); 177 new_raw_count = rvpmu->ctr_read(event); 178 oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count, 179 new_raw_count); 180 } while (oldval != prev_raw_count); 181 182 delta = (new_raw_count - prev_raw_count) & cmask; 183 local64_add(delta, &event->count); 184 local64_sub(delta, &hwc->period_left); 185 186 return delta; 187 } 188 189 void riscv_pmu_stop(struct perf_event *event, int flags) 190 { 191 struct hw_perf_event *hwc = &event->hw; 192 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 193 194 if (!(hwc->state & PERF_HES_STOPPED)) { 195 if (rvpmu->ctr_stop) { 196 rvpmu->ctr_stop(event, 0); 197 hwc->state |= PERF_HES_STOPPED; 198 } 199 riscv_pmu_event_update(event); 200 hwc->state |= PERF_HES_UPTODATE; 201 } 202 } 203 204 int riscv_pmu_event_set_period(struct perf_event *event) 205 { 206 struct hw_perf_event *hwc = &event->hw; 207 s64 left = local64_read(&hwc->period_left); 208 s64 period = hwc->sample_period; 209 int overflow = 0; 210 uint64_t max_period = riscv_pmu_ctr_get_width_mask(event); 211 212 if (unlikely(left <= -period)) { 213 left = period; 214 local64_set(&hwc->period_left, left); 215 hwc->last_period = period; 216 overflow = 1; 217 } 218 219 if (unlikely(left <= 0)) { 220 left += period; 221 local64_set(&hwc->period_left, left); 222 hwc->last_period = period; 223 overflow = 1; 224 } 225 226 /* 227 * Limit the maximum period to prevent the counter value 228 * from overtaking the one we are about to program. In 229 * effect we are reducing max_period to account for 230 * interrupt latency (and we are being very conservative). 231 */ 232 if (left > (max_period >> 1)) 233 left = (max_period >> 1); 234 235 local64_set(&hwc->prev_count, (u64)-left); 236 237 perf_event_update_userpage(event); 238 239 return overflow; 240 } 241 242 void riscv_pmu_start(struct perf_event *event, int flags) 243 { 244 struct hw_perf_event *hwc = &event->hw; 245 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 246 uint64_t max_period = riscv_pmu_ctr_get_width_mask(event); 247 u64 init_val; 248 249 if (flags & PERF_EF_RELOAD) 250 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); 251 252 hwc->state = 0; 253 riscv_pmu_event_set_period(event); 254 init_val = local64_read(&hwc->prev_count) & max_period; 255 rvpmu->ctr_start(event, init_val); 256 perf_event_update_userpage(event); 257 } 258 259 static int riscv_pmu_add(struct perf_event *event, int flags) 260 { 261 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 262 struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); 263 struct hw_perf_event *hwc = &event->hw; 264 int idx; 265 266 idx = rvpmu->ctr_get_idx(event); 267 if (idx < 0) 268 return idx; 269 270 hwc->idx = idx; 271 cpuc->events[idx] = event; 272 cpuc->n_events++; 273 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 274 if (flags & PERF_EF_START) 275 riscv_pmu_start(event, PERF_EF_RELOAD); 276 277 /* Propagate our changes to the userspace mapping. */ 278 perf_event_update_userpage(event); 279 280 return 0; 281 } 282 283 static void riscv_pmu_del(struct perf_event *event, int flags) 284 { 285 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 286 struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); 287 struct hw_perf_event *hwc = &event->hw; 288 289 riscv_pmu_stop(event, PERF_EF_UPDATE); 290 cpuc->events[hwc->idx] = NULL; 291 /* The firmware need to reset the counter mapping */ 292 if (rvpmu->ctr_stop) 293 rvpmu->ctr_stop(event, RISCV_PMU_STOP_FLAG_RESET); 294 cpuc->n_events--; 295 if (rvpmu->ctr_clear_idx) 296 rvpmu->ctr_clear_idx(event); 297 perf_event_update_userpage(event); 298 hwc->idx = -1; 299 } 300 301 static void riscv_pmu_read(struct perf_event *event) 302 { 303 riscv_pmu_event_update(event); 304 } 305 306 static int riscv_pmu_event_init(struct perf_event *event) 307 { 308 struct hw_perf_event *hwc = &event->hw; 309 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 310 int mapped_event; 311 u64 event_config = 0; 312 uint64_t cmask; 313 314 /* driver does not support branch stack sampling */ 315 if (has_branch_stack(event)) 316 return -EOPNOTSUPP; 317 318 hwc->flags = 0; 319 mapped_event = rvpmu->event_map(event, &event_config); 320 if (mapped_event < 0) { 321 pr_debug("event %x:%llx not supported\n", event->attr.type, 322 event->attr.config); 323 return mapped_event; 324 } 325 326 /* 327 * idx is set to -1 because the index of a general event should not be 328 * decided until binding to some counter in pmu->add(). 329 * config will contain the information about counter CSR 330 * the idx will contain the counter index 331 */ 332 hwc->config = event_config; 333 hwc->idx = -1; 334 hwc->event_base = mapped_event; 335 336 if (rvpmu->event_init) 337 rvpmu->event_init(event); 338 339 if (!is_sampling_event(event)) { 340 /* 341 * For non-sampling runs, limit the sample_period to half 342 * of the counter width. That way, the new counter value 343 * is far less likely to overtake the previous one unless 344 * you have some serious IRQ latency issues. 345 */ 346 cmask = riscv_pmu_ctr_get_width_mask(event); 347 hwc->sample_period = cmask >> 1; 348 hwc->last_period = hwc->sample_period; 349 local64_set(&hwc->period_left, hwc->sample_period); 350 } 351 352 return 0; 353 } 354 355 static int riscv_pmu_event_idx(struct perf_event *event) 356 { 357 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 358 359 if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT)) 360 return 0; 361 362 if (rvpmu->csr_index) 363 return rvpmu->csr_index(event) + 1; 364 365 return 0; 366 } 367 368 static void riscv_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) 369 { 370 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 371 372 if (rvpmu->event_mapped) { 373 rvpmu->event_mapped(event, mm); 374 perf_event_update_userpage(event); 375 } 376 } 377 378 static void riscv_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm) 379 { 380 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 381 382 if (rvpmu->event_unmapped) { 383 rvpmu->event_unmapped(event, mm); 384 perf_event_update_userpage(event); 385 } 386 } 387 388 struct riscv_pmu *riscv_pmu_alloc(void) 389 { 390 struct riscv_pmu *pmu; 391 int cpuid, i; 392 struct cpu_hw_events *cpuc; 393 394 pmu = kzalloc(sizeof(*pmu), GFP_KERNEL); 395 if (!pmu) 396 goto out; 397 398 pmu->hw_events = alloc_percpu_gfp(struct cpu_hw_events, GFP_KERNEL); 399 if (!pmu->hw_events) { 400 pr_info("failed to allocate per-cpu PMU data.\n"); 401 goto out_free_pmu; 402 } 403 404 for_each_possible_cpu(cpuid) { 405 cpuc = per_cpu_ptr(pmu->hw_events, cpuid); 406 cpuc->n_events = 0; 407 for (i = 0; i < RISCV_MAX_COUNTERS; i++) 408 cpuc->events[i] = NULL; 409 cpuc->snapshot_addr = NULL; 410 } 411 pmu->pmu = (struct pmu) { 412 .event_init = riscv_pmu_event_init, 413 .event_mapped = riscv_pmu_event_mapped, 414 .event_unmapped = riscv_pmu_event_unmapped, 415 .event_idx = riscv_pmu_event_idx, 416 .add = riscv_pmu_add, 417 .del = riscv_pmu_del, 418 .start = riscv_pmu_start, 419 .stop = riscv_pmu_stop, 420 .read = riscv_pmu_read, 421 }; 422 423 return pmu; 424 425 out_free_pmu: 426 kfree(pmu); 427 out: 428 return NULL; 429 } 430