1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * RISC-V performance counter support. 4 * 5 * Copyright (C) 2021 Western Digital Corporation or its affiliates. 6 * 7 * This implementation is based on old RISC-V perf and ARM perf event code 8 * which are in turn based on sparc64 and x86 code. 9 */ 10 11 #include <linux/cpumask.h> 12 #include <linux/irq.h> 13 #include <linux/irqdesc.h> 14 #include <linux/perf/riscv_pmu.h> 15 #include <linux/printk.h> 16 #include <linux/smp.h> 17 #include <linux/sched_clock.h> 18 19 #include <asm/sbi.h> 20 21 static bool riscv_perf_user_access(struct perf_event *event) 22 { 23 return ((event->attr.type == PERF_TYPE_HARDWARE) || 24 (event->attr.type == PERF_TYPE_HW_CACHE) || 25 (event->attr.type == PERF_TYPE_RAW)) && 26 !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT) && 27 (event->hw.idx != -1); 28 } 29 30 void arch_perf_update_userpage(struct perf_event *event, 31 struct perf_event_mmap_page *userpg, u64 now) 32 { 33 struct clock_read_data *rd; 34 unsigned int seq; 35 u64 ns; 36 37 userpg->cap_user_time = 0; 38 userpg->cap_user_time_zero = 0; 39 userpg->cap_user_time_short = 0; 40 userpg->cap_user_rdpmc = riscv_perf_user_access(event); 41 42 #ifdef CONFIG_RISCV_PMU 43 /* 44 * The counters are 64-bit but the priv spec doesn't mandate all the 45 * bits to be implemented: that's why, counter width can vary based on 46 * the cpu vendor. 47 */ 48 if (userpg->cap_user_rdpmc) 49 userpg->pmc_width = to_riscv_pmu(event->pmu)->ctr_get_width(event->hw.idx) + 1; 50 #endif 51 52 do { 53 rd = sched_clock_read_begin(&seq); 54 55 userpg->time_mult = rd->mult; 56 userpg->time_shift = rd->shift; 57 userpg->time_zero = rd->epoch_ns; 58 userpg->time_cycles = rd->epoch_cyc; 59 userpg->time_mask = rd->sched_clock_mask; 60 61 /* 62 * Subtract the cycle base, such that software that 63 * doesn't know about cap_user_time_short still 'works' 64 * assuming no wraps. 65 */ 66 ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift); 67 userpg->time_zero -= ns; 68 69 } while (sched_clock_read_retry(seq)); 70 71 userpg->time_offset = userpg->time_zero - now; 72 73 /* 74 * time_shift is not expected to be greater than 31 due to 75 * the original published conversion algorithm shifting a 76 * 32-bit value (now specifies a 64-bit value) - refer 77 * perf_event_mmap_page documentation in perf_event.h. 78 */ 79 if (userpg->time_shift == 32) { 80 userpg->time_shift = 31; 81 userpg->time_mult >>= 1; 82 } 83 84 /* 85 * Internal timekeeping for enabled/running/stopped times 86 * is always computed with the sched_clock. 87 */ 88 userpg->cap_user_time = 1; 89 userpg->cap_user_time_zero = 1; 90 userpg->cap_user_time_short = 1; 91 } 92 93 static unsigned long csr_read_num(int csr_num) 94 { 95 #define switchcase_csr_read(__csr_num, __val) {\ 96 case __csr_num: \ 97 __val = csr_read(__csr_num); \ 98 break; } 99 #define switchcase_csr_read_2(__csr_num, __val) {\ 100 switchcase_csr_read(__csr_num + 0, __val) \ 101 switchcase_csr_read(__csr_num + 1, __val)} 102 #define switchcase_csr_read_4(__csr_num, __val) {\ 103 switchcase_csr_read_2(__csr_num + 0, __val) \ 104 switchcase_csr_read_2(__csr_num + 2, __val)} 105 #define switchcase_csr_read_8(__csr_num, __val) {\ 106 switchcase_csr_read_4(__csr_num + 0, __val) \ 107 switchcase_csr_read_4(__csr_num + 4, __val)} 108 #define switchcase_csr_read_16(__csr_num, __val) {\ 109 switchcase_csr_read_8(__csr_num + 0, __val) \ 110 switchcase_csr_read_8(__csr_num + 8, __val)} 111 #define switchcase_csr_read_32(__csr_num, __val) {\ 112 switchcase_csr_read_16(__csr_num + 0, __val) \ 113 switchcase_csr_read_16(__csr_num + 16, __val)} 114 115 unsigned long ret = 0; 116 117 switch (csr_num) { 118 switchcase_csr_read_32(CSR_CYCLE, ret) 119 switchcase_csr_read_32(CSR_CYCLEH, ret) 120 default : 121 break; 122 } 123 124 return ret; 125 #undef switchcase_csr_read_32 126 #undef switchcase_csr_read_16 127 #undef switchcase_csr_read_8 128 #undef switchcase_csr_read_4 129 #undef switchcase_csr_read_2 130 #undef switchcase_csr_read 131 } 132 133 /* 134 * Read the CSR of a corresponding counter. 135 */ 136 unsigned long riscv_pmu_ctr_read_csr(unsigned long csr) 137 { 138 if (csr < CSR_CYCLE || csr > CSR_HPMCOUNTER31H || 139 (csr > CSR_HPMCOUNTER31 && csr < CSR_CYCLEH)) { 140 pr_err("Invalid performance counter csr %lx\n", csr); 141 return -EINVAL; 142 } 143 144 return csr_read_num(csr); 145 } 146 147 u64 riscv_pmu_ctr_get_width_mask(struct perf_event *event) 148 { 149 int cwidth; 150 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 151 struct hw_perf_event *hwc = &event->hw; 152 153 if (!rvpmu->ctr_get_width) 154 /** 155 * If the pmu driver doesn't support counter width, set it to default 156 * maximum allowed by the specification. 157 */ 158 cwidth = 63; 159 else { 160 if (hwc->idx == -1) 161 /* Handle init case where idx is not initialized yet */ 162 cwidth = rvpmu->ctr_get_width(0); 163 else 164 cwidth = rvpmu->ctr_get_width(hwc->idx); 165 } 166 167 return GENMASK_ULL(cwidth, 0); 168 } 169 170 u64 riscv_pmu_event_update(struct perf_event *event) 171 { 172 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 173 struct hw_perf_event *hwc = &event->hw; 174 u64 prev_raw_count, new_raw_count; 175 unsigned long cmask; 176 u64 oldval, delta; 177 178 if (!rvpmu->ctr_read) 179 return 0; 180 181 cmask = riscv_pmu_ctr_get_width_mask(event); 182 183 do { 184 prev_raw_count = local64_read(&hwc->prev_count); 185 new_raw_count = rvpmu->ctr_read(event); 186 oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count, 187 new_raw_count); 188 } while (oldval != prev_raw_count); 189 190 delta = (new_raw_count - prev_raw_count) & cmask; 191 local64_add(delta, &event->count); 192 local64_sub(delta, &hwc->period_left); 193 194 return delta; 195 } 196 197 void riscv_pmu_stop(struct perf_event *event, int flags) 198 { 199 struct hw_perf_event *hwc = &event->hw; 200 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 201 202 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); 203 204 if (!(hwc->state & PERF_HES_STOPPED)) { 205 if (rvpmu->ctr_stop) { 206 rvpmu->ctr_stop(event, 0); 207 hwc->state |= PERF_HES_STOPPED; 208 } 209 riscv_pmu_event_update(event); 210 hwc->state |= PERF_HES_UPTODATE; 211 } 212 } 213 214 int riscv_pmu_event_set_period(struct perf_event *event) 215 { 216 struct hw_perf_event *hwc = &event->hw; 217 s64 left = local64_read(&hwc->period_left); 218 s64 period = hwc->sample_period; 219 int overflow = 0; 220 uint64_t max_period = riscv_pmu_ctr_get_width_mask(event); 221 222 if (unlikely(left <= -period)) { 223 left = period; 224 local64_set(&hwc->period_left, left); 225 hwc->last_period = period; 226 overflow = 1; 227 } 228 229 if (unlikely(left <= 0)) { 230 left += period; 231 local64_set(&hwc->period_left, left); 232 hwc->last_period = period; 233 overflow = 1; 234 } 235 236 /* 237 * Limit the maximum period to prevent the counter value 238 * from overtaking the one we are about to program. In 239 * effect we are reducing max_period to account for 240 * interrupt latency (and we are being very conservative). 241 */ 242 if (left > (max_period >> 1)) 243 left = (max_period >> 1); 244 245 local64_set(&hwc->prev_count, (u64)-left); 246 247 perf_event_update_userpage(event); 248 249 return overflow; 250 } 251 252 void riscv_pmu_start(struct perf_event *event, int flags) 253 { 254 struct hw_perf_event *hwc = &event->hw; 255 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 256 uint64_t max_period = riscv_pmu_ctr_get_width_mask(event); 257 u64 init_val; 258 259 if (flags & PERF_EF_RELOAD) 260 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); 261 262 hwc->state = 0; 263 riscv_pmu_event_set_period(event); 264 init_val = local64_read(&hwc->prev_count) & max_period; 265 rvpmu->ctr_start(event, init_val); 266 perf_event_update_userpage(event); 267 } 268 269 static int riscv_pmu_add(struct perf_event *event, int flags) 270 { 271 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 272 struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); 273 struct hw_perf_event *hwc = &event->hw; 274 int idx; 275 276 idx = rvpmu->ctr_get_idx(event); 277 if (idx < 0) 278 return idx; 279 280 hwc->idx = idx; 281 cpuc->events[idx] = event; 282 cpuc->n_events++; 283 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 284 if (flags & PERF_EF_START) 285 riscv_pmu_start(event, PERF_EF_RELOAD); 286 287 /* Propagate our changes to the userspace mapping. */ 288 perf_event_update_userpage(event); 289 290 return 0; 291 } 292 293 static void riscv_pmu_del(struct perf_event *event, int flags) 294 { 295 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 296 struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); 297 struct hw_perf_event *hwc = &event->hw; 298 299 riscv_pmu_stop(event, PERF_EF_UPDATE); 300 cpuc->events[hwc->idx] = NULL; 301 /* The firmware need to reset the counter mapping */ 302 if (rvpmu->ctr_stop) 303 rvpmu->ctr_stop(event, RISCV_PMU_STOP_FLAG_RESET); 304 cpuc->n_events--; 305 if (rvpmu->ctr_clear_idx) 306 rvpmu->ctr_clear_idx(event); 307 perf_event_update_userpage(event); 308 hwc->idx = -1; 309 } 310 311 static void riscv_pmu_read(struct perf_event *event) 312 { 313 riscv_pmu_event_update(event); 314 } 315 316 static int riscv_pmu_event_init(struct perf_event *event) 317 { 318 struct hw_perf_event *hwc = &event->hw; 319 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 320 int mapped_event; 321 u64 event_config = 0; 322 uint64_t cmask; 323 324 hwc->flags = 0; 325 mapped_event = rvpmu->event_map(event, &event_config); 326 if (mapped_event < 0) { 327 pr_debug("event %x:%llx not supported\n", event->attr.type, 328 event->attr.config); 329 return mapped_event; 330 } 331 332 /* 333 * idx is set to -1 because the index of a general event should not be 334 * decided until binding to some counter in pmu->add(). 335 * config will contain the information about counter CSR 336 * the idx will contain the counter index 337 */ 338 hwc->config = event_config; 339 hwc->idx = -1; 340 hwc->event_base = mapped_event; 341 342 if (rvpmu->event_init) 343 rvpmu->event_init(event); 344 345 if (!is_sampling_event(event)) { 346 /* 347 * For non-sampling runs, limit the sample_period to half 348 * of the counter width. That way, the new counter value 349 * is far less likely to overtake the previous one unless 350 * you have some serious IRQ latency issues. 351 */ 352 cmask = riscv_pmu_ctr_get_width_mask(event); 353 hwc->sample_period = cmask >> 1; 354 hwc->last_period = hwc->sample_period; 355 local64_set(&hwc->period_left, hwc->sample_period); 356 } 357 358 return 0; 359 } 360 361 static int riscv_pmu_event_idx(struct perf_event *event) 362 { 363 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 364 365 if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT)) 366 return 0; 367 368 if (rvpmu->csr_index) 369 return rvpmu->csr_index(event) + 1; 370 371 return 0; 372 } 373 374 static void riscv_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) 375 { 376 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 377 378 if (rvpmu->event_mapped) { 379 rvpmu->event_mapped(event, mm); 380 perf_event_update_userpage(event); 381 } 382 } 383 384 static void riscv_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm) 385 { 386 struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); 387 388 if (rvpmu->event_unmapped) { 389 rvpmu->event_unmapped(event, mm); 390 perf_event_update_userpage(event); 391 } 392 } 393 394 struct riscv_pmu *riscv_pmu_alloc(void) 395 { 396 struct riscv_pmu *pmu; 397 int cpuid, i; 398 struct cpu_hw_events *cpuc; 399 400 pmu = kzalloc(sizeof(*pmu), GFP_KERNEL); 401 if (!pmu) 402 goto out; 403 404 pmu->hw_events = alloc_percpu_gfp(struct cpu_hw_events, GFP_KERNEL); 405 if (!pmu->hw_events) { 406 pr_info("failed to allocate per-cpu PMU data.\n"); 407 goto out_free_pmu; 408 } 409 410 for_each_possible_cpu(cpuid) { 411 cpuc = per_cpu_ptr(pmu->hw_events, cpuid); 412 cpuc->n_events = 0; 413 for (i = 0; i < RISCV_MAX_COUNTERS; i++) 414 cpuc->events[i] = NULL; 415 } 416 pmu->pmu = (struct pmu) { 417 .event_init = riscv_pmu_event_init, 418 .event_mapped = riscv_pmu_event_mapped, 419 .event_unmapped = riscv_pmu_event_unmapped, 420 .event_idx = riscv_pmu_event_idx, 421 .add = riscv_pmu_add, 422 .del = riscv_pmu_del, 423 .start = riscv_pmu_start, 424 .stop = riscv_pmu_stop, 425 .read = riscv_pmu_read, 426 }; 427 428 return pmu; 429 430 out_free_pmu: 431 kfree(pmu); 432 out: 433 return NULL; 434 } 435