1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Linux performance counter support for LoongArch. 4 * 5 * Copyright (C) 2022 Loongson Technology Corporation Limited 6 * 7 * Derived from MIPS: 8 * Copyright (C) 2010 MIPS Technologies, Inc. 9 * Copyright (C) 2011 Cavium Networks, Inc. 10 * Author: Deng-Cheng Zhu 11 */ 12 13 #include <linux/cpumask.h> 14 #include <linux/interrupt.h> 15 #include <linux/smp.h> 16 #include <linux/kernel.h> 17 #include <linux/perf_event.h> 18 #include <linux/uaccess.h> 19 #include <linux/sched/task_stack.h> 20 21 #include <asm/irq.h> 22 #include <asm/irq_regs.h> 23 #include <asm/stacktrace.h> 24 #include <asm/unwind.h> 25 26 /* 27 * Get the return address for a single stackframe and return a pointer to the 28 * next frame tail. 29 */ 30 static unsigned long 31 user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp) 32 { 33 unsigned long err; 34 unsigned long __user *user_frame_tail; 35 struct stack_frame buftail; 36 37 user_frame_tail = (unsigned long __user *)(fp - sizeof(struct stack_frame)); 38 39 /* Also check accessibility of one struct frame_tail beyond */ 40 if (!access_ok(user_frame_tail, sizeof(buftail))) 41 return 0; 42 43 pagefault_disable(); 44 err = __copy_from_user_inatomic(&buftail, user_frame_tail, sizeof(buftail)); 45 pagefault_enable(); 46 47 if (err || (unsigned long)user_frame_tail >= buftail.fp) 48 return 0; 49 50 perf_callchain_store(entry, buftail.ra); 51 52 return buftail.fp; 53 } 54 55 void perf_callchain_user(struct perf_callchain_entry_ctx *entry, 56 struct pt_regs *regs) 57 { 58 unsigned long fp; 59 60 if (perf_guest_state()) { 61 /* We don't support guest os callchain now */ 62 return; 63 } 64 65 perf_callchain_store(entry, regs->csr_era); 66 67 fp = regs->regs[22]; 68 69 while (entry->nr < entry->max_stack && fp && !((unsigned long)fp & 0xf)) 70 fp = user_backtrace(entry, fp); 71 } 72 73 void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, 74 struct pt_regs *regs) 75 { 76 struct unwind_state state; 77 unsigned long addr; 78 79 for (unwind_start(&state, current, regs); 80 !unwind_done(&state); unwind_next_frame(&state)) { 81 addr = unwind_get_return_address(&state); 82 if (!addr || perf_callchain_store(entry, addr)) 83 return; 84 } 85 } 86 87 #define LOONGARCH_MAX_HWEVENTS 32 88 89 struct cpu_hw_events { 90 /* Array of events on this cpu. */ 91 struct perf_event *events[LOONGARCH_MAX_HWEVENTS]; 92 93 /* 94 * Set the bit (indexed by the counter number) when the counter 95 * is used for an event. 96 */ 97 unsigned long used_mask[BITS_TO_LONGS(LOONGARCH_MAX_HWEVENTS)]; 98 99 /* 100 * Software copy of the control register for each performance counter. 101 */ 102 unsigned int saved_ctrl[LOONGARCH_MAX_HWEVENTS]; 103 }; 104 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { 105 .saved_ctrl = {0}, 106 }; 107 108 /* The description of LoongArch performance events. */ 109 struct loongarch_perf_event { 110 unsigned int event_id; 111 }; 112 113 static struct loongarch_perf_event raw_event; 114 static DEFINE_MUTEX(raw_event_mutex); 115 116 #define C(x) PERF_COUNT_HW_CACHE_##x 117 #define HW_OP_UNSUPPORTED 0xffffffff 118 #define CACHE_OP_UNSUPPORTED 0xffffffff 119 120 #define PERF_MAP_ALL_UNSUPPORTED \ 121 [0 ... PERF_COUNT_HW_MAX - 1] = {HW_OP_UNSUPPORTED} 122 123 #define PERF_CACHE_MAP_ALL_UNSUPPORTED \ 124 [0 ... C(MAX) - 1] = { \ 125 [0 ... C(OP_MAX) - 1] = { \ 126 [0 ... C(RESULT_MAX) - 1] = {CACHE_OP_UNSUPPORTED}, \ 127 }, \ 128 } 129 130 struct loongarch_pmu { 131 u64 max_period; 132 u64 valid_count; 133 u64 overflow; 134 const char *name; 135 unsigned int num_counters; 136 u64 (*read_counter)(unsigned int idx); 137 void (*write_counter)(unsigned int idx, u64 val); 138 const struct loongarch_perf_event *(*map_raw_event)(u64 config); 139 const struct loongarch_perf_event (*general_event_map)[PERF_COUNT_HW_MAX]; 140 const struct loongarch_perf_event (*cache_event_map) 141 [PERF_COUNT_HW_CACHE_MAX] 142 [PERF_COUNT_HW_CACHE_OP_MAX] 143 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 144 }; 145 146 static struct loongarch_pmu loongarch_pmu; 147 148 #define M_PERFCTL_EVENT(event) (event & CSR_PERFCTRL_EVENT) 149 150 #define M_PERFCTL_COUNT_EVENT_WHENEVER (CSR_PERFCTRL_PLV0 | \ 151 CSR_PERFCTRL_PLV1 | \ 152 CSR_PERFCTRL_PLV2 | \ 153 CSR_PERFCTRL_PLV3 | \ 154 CSR_PERFCTRL_IE) 155 156 #define M_PERFCTL_CONFIG_MASK 0x1f0000 157 158 static void pause_local_counters(void); 159 static void resume_local_counters(void); 160 161 static u64 loongarch_pmu_read_counter(unsigned int idx) 162 { 163 u64 val = -1; 164 165 switch (idx) { 166 case 0: 167 val = read_csr_perfcntr0(); 168 break; 169 case 1: 170 val = read_csr_perfcntr1(); 171 break; 172 case 2: 173 val = read_csr_perfcntr2(); 174 break; 175 case 3: 176 val = read_csr_perfcntr3(); 177 break; 178 default: 179 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); 180 return 0; 181 } 182 183 return val; 184 } 185 186 static void loongarch_pmu_write_counter(unsigned int idx, u64 val) 187 { 188 switch (idx) { 189 case 0: 190 write_csr_perfcntr0(val); 191 return; 192 case 1: 193 write_csr_perfcntr1(val); 194 return; 195 case 2: 196 write_csr_perfcntr2(val); 197 return; 198 case 3: 199 write_csr_perfcntr3(val); 200 return; 201 default: 202 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); 203 return; 204 } 205 } 206 207 static unsigned int loongarch_pmu_read_control(unsigned int idx) 208 { 209 unsigned int val = -1; 210 211 switch (idx) { 212 case 0: 213 val = read_csr_perfctrl0(); 214 break; 215 case 1: 216 val = read_csr_perfctrl1(); 217 break; 218 case 2: 219 val = read_csr_perfctrl2(); 220 break; 221 case 3: 222 val = read_csr_perfctrl3(); 223 break; 224 default: 225 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); 226 return 0; 227 } 228 229 return val; 230 } 231 232 static void loongarch_pmu_write_control(unsigned int idx, unsigned int val) 233 { 234 switch (idx) { 235 case 0: 236 write_csr_perfctrl0(val); 237 return; 238 case 1: 239 write_csr_perfctrl1(val); 240 return; 241 case 2: 242 write_csr_perfctrl2(val); 243 return; 244 case 3: 245 write_csr_perfctrl3(val); 246 return; 247 default: 248 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); 249 return; 250 } 251 } 252 253 static int loongarch_pmu_alloc_counter(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) 254 { 255 int i; 256 257 for (i = 0; i < loongarch_pmu.num_counters; i++) { 258 if (!test_and_set_bit(i, cpuc->used_mask)) 259 return i; 260 } 261 262 return -EAGAIN; 263 } 264 265 static void loongarch_pmu_enable_event(struct hw_perf_event *evt, int idx) 266 { 267 unsigned int cpu; 268 struct perf_event *event = container_of(evt, struct perf_event, hw); 269 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 270 271 WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); 272 273 /* Make sure interrupt enabled. */ 274 cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base) | 275 (evt->config_base & M_PERFCTL_CONFIG_MASK) | CSR_PERFCTRL_IE; 276 277 cpu = (event->cpu >= 0) ? event->cpu : smp_processor_id(); 278 279 /* 280 * We do not actually let the counter run. Leave it until start(). 281 */ 282 pr_debug("Enabling perf counter for CPU%d\n", cpu); 283 } 284 285 static void loongarch_pmu_disable_event(int idx) 286 { 287 unsigned long flags; 288 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 289 290 WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); 291 292 local_irq_save(flags); 293 cpuc->saved_ctrl[idx] = loongarch_pmu_read_control(idx) & 294 ~M_PERFCTL_COUNT_EVENT_WHENEVER; 295 loongarch_pmu_write_control(idx, cpuc->saved_ctrl[idx]); 296 local_irq_restore(flags); 297 } 298 299 static int loongarch_pmu_event_set_period(struct perf_event *event, 300 struct hw_perf_event *hwc, 301 int idx) 302 { 303 int ret = 0; 304 u64 left = local64_read(&hwc->period_left); 305 u64 period = hwc->sample_period; 306 307 if (unlikely((left + period) & (1ULL << 63))) { 308 /* left underflowed by more than period. */ 309 left = period; 310 local64_set(&hwc->period_left, left); 311 hwc->last_period = period; 312 ret = 1; 313 } else if (unlikely((left + period) <= period)) { 314 /* left underflowed by less than period. */ 315 left += period; 316 local64_set(&hwc->period_left, left); 317 hwc->last_period = period; 318 ret = 1; 319 } 320 321 if (left > loongarch_pmu.max_period) { 322 left = loongarch_pmu.max_period; 323 local64_set(&hwc->period_left, left); 324 } 325 326 local64_set(&hwc->prev_count, loongarch_pmu.overflow - left); 327 328 loongarch_pmu.write_counter(idx, loongarch_pmu.overflow - left); 329 330 perf_event_update_userpage(event); 331 332 return ret; 333 } 334 335 static void loongarch_pmu_event_update(struct perf_event *event, 336 struct hw_perf_event *hwc, 337 int idx) 338 { 339 u64 delta; 340 u64 prev_raw_count, new_raw_count; 341 342 again: 343 prev_raw_count = local64_read(&hwc->prev_count); 344 new_raw_count = loongarch_pmu.read_counter(idx); 345 346 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 347 new_raw_count) != prev_raw_count) 348 goto again; 349 350 delta = new_raw_count - prev_raw_count; 351 352 local64_add(delta, &event->count); 353 local64_sub(delta, &hwc->period_left); 354 } 355 356 static void loongarch_pmu_start(struct perf_event *event, int flags) 357 { 358 struct hw_perf_event *hwc = &event->hw; 359 360 if (flags & PERF_EF_RELOAD) 361 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); 362 363 hwc->state = 0; 364 365 /* Set the period for the event. */ 366 loongarch_pmu_event_set_period(event, hwc, hwc->idx); 367 368 /* Enable the event. */ 369 loongarch_pmu_enable_event(hwc, hwc->idx); 370 } 371 372 static void loongarch_pmu_stop(struct perf_event *event, int flags) 373 { 374 struct hw_perf_event *hwc = &event->hw; 375 376 if (!(hwc->state & PERF_HES_STOPPED)) { 377 /* We are working on a local event. */ 378 loongarch_pmu_disable_event(hwc->idx); 379 barrier(); 380 loongarch_pmu_event_update(event, hwc, hwc->idx); 381 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; 382 } 383 } 384 385 static int loongarch_pmu_add(struct perf_event *event, int flags) 386 { 387 int idx, err = 0; 388 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 389 struct hw_perf_event *hwc = &event->hw; 390 391 perf_pmu_disable(event->pmu); 392 393 /* To look for a free counter for this event. */ 394 idx = loongarch_pmu_alloc_counter(cpuc, hwc); 395 if (idx < 0) { 396 err = idx; 397 goto out; 398 } 399 400 /* 401 * If there is an event in the counter we are going to use then 402 * make sure it is disabled. 403 */ 404 event->hw.idx = idx; 405 loongarch_pmu_disable_event(idx); 406 cpuc->events[idx] = event; 407 408 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; 409 if (flags & PERF_EF_START) 410 loongarch_pmu_start(event, PERF_EF_RELOAD); 411 412 /* Propagate our changes to the userspace mapping. */ 413 perf_event_update_userpage(event); 414 415 out: 416 perf_pmu_enable(event->pmu); 417 return err; 418 } 419 420 static void loongarch_pmu_del(struct perf_event *event, int flags) 421 { 422 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 423 struct hw_perf_event *hwc = &event->hw; 424 int idx = hwc->idx; 425 426 WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); 427 428 loongarch_pmu_stop(event, PERF_EF_UPDATE); 429 cpuc->events[idx] = NULL; 430 clear_bit(idx, cpuc->used_mask); 431 432 perf_event_update_userpage(event); 433 } 434 435 static void loongarch_pmu_read(struct perf_event *event) 436 { 437 struct hw_perf_event *hwc = &event->hw; 438 439 /* Don't read disabled counters! */ 440 if (hwc->idx < 0) 441 return; 442 443 loongarch_pmu_event_update(event, hwc, hwc->idx); 444 } 445 446 static void loongarch_pmu_enable(struct pmu *pmu) 447 { 448 resume_local_counters(); 449 } 450 451 static void loongarch_pmu_disable(struct pmu *pmu) 452 { 453 pause_local_counters(); 454 } 455 456 static DEFINE_MUTEX(pmu_reserve_mutex); 457 static atomic_t active_events = ATOMIC_INIT(0); 458 459 static void reset_counters(void *arg); 460 static int __hw_perf_event_init(struct perf_event *event); 461 462 static void hw_perf_event_destroy(struct perf_event *event) 463 { 464 if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) { 465 on_each_cpu(reset_counters, NULL, 1); 466 free_irq(get_percpu_irq(INT_PCOV), &loongarch_pmu); 467 mutex_unlock(&pmu_reserve_mutex); 468 } 469 } 470 471 static void handle_associated_event(struct cpu_hw_events *cpuc, int idx, 472 struct perf_sample_data *data, struct pt_regs *regs) 473 { 474 struct perf_event *event = cpuc->events[idx]; 475 struct hw_perf_event *hwc = &event->hw; 476 477 loongarch_pmu_event_update(event, hwc, idx); 478 data->period = event->hw.last_period; 479 if (!loongarch_pmu_event_set_period(event, hwc, idx)) 480 return; 481 482 perf_event_overflow(event, data, regs); 483 } 484 485 static irqreturn_t pmu_handle_irq(int irq, void *dev) 486 { 487 int n; 488 int handled = IRQ_NONE; 489 uint64_t counter; 490 struct pt_regs *regs; 491 struct perf_sample_data data; 492 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 493 494 /* 495 * First we pause the local counters, so that when we are locked 496 * here, the counters are all paused. When it gets locked due to 497 * perf_disable(), the timer interrupt handler will be delayed. 498 * 499 * See also loongarch_pmu_start(). 500 */ 501 pause_local_counters(); 502 503 regs = get_irq_regs(); 504 505 perf_sample_data_init(&data, 0, 0); 506 507 for (n = 0; n < loongarch_pmu.num_counters; n++) { 508 if (test_bit(n, cpuc->used_mask)) { 509 counter = loongarch_pmu.read_counter(n); 510 if (counter & loongarch_pmu.overflow) { 511 handle_associated_event(cpuc, n, &data, regs); 512 handled = IRQ_HANDLED; 513 } 514 } 515 } 516 517 resume_local_counters(); 518 519 /* 520 * Do all the work for the pending perf events. We can do this 521 * in here because the performance counter interrupt is a regular 522 * interrupt, not NMI. 523 */ 524 if (handled == IRQ_HANDLED) 525 irq_work_run(); 526 527 return handled; 528 } 529 530 static int loongarch_pmu_event_init(struct perf_event *event) 531 { 532 int r, irq; 533 unsigned long flags; 534 535 /* does not support taken branch sampling */ 536 if (has_branch_stack(event)) 537 return -EOPNOTSUPP; 538 539 switch (event->attr.type) { 540 case PERF_TYPE_RAW: 541 case PERF_TYPE_HARDWARE: 542 case PERF_TYPE_HW_CACHE: 543 break; 544 545 default: 546 /* Init it to avoid false validate_group */ 547 event->hw.event_base = 0xffffffff; 548 return -ENOENT; 549 } 550 551 if (event->cpu >= 0 && !cpu_online(event->cpu)) 552 return -ENODEV; 553 554 irq = get_percpu_irq(INT_PCOV); 555 flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_SUSPEND | IRQF_SHARED; 556 if (!atomic_inc_not_zero(&active_events)) { 557 mutex_lock(&pmu_reserve_mutex); 558 if (atomic_read(&active_events) == 0) { 559 r = request_irq(irq, pmu_handle_irq, flags, "Perf_PMU", &loongarch_pmu); 560 if (r < 0) { 561 mutex_unlock(&pmu_reserve_mutex); 562 pr_warn("PMU IRQ request failed\n"); 563 return -ENODEV; 564 } 565 } 566 atomic_inc(&active_events); 567 mutex_unlock(&pmu_reserve_mutex); 568 } 569 570 return __hw_perf_event_init(event); 571 } 572 573 static struct pmu pmu = { 574 .pmu_enable = loongarch_pmu_enable, 575 .pmu_disable = loongarch_pmu_disable, 576 .event_init = loongarch_pmu_event_init, 577 .add = loongarch_pmu_add, 578 .del = loongarch_pmu_del, 579 .start = loongarch_pmu_start, 580 .stop = loongarch_pmu_stop, 581 .read = loongarch_pmu_read, 582 }; 583 584 static unsigned int loongarch_pmu_perf_event_encode(const struct loongarch_perf_event *pev) 585 { 586 return M_PERFCTL_EVENT(pev->event_id); 587 } 588 589 static const struct loongarch_perf_event *loongarch_pmu_map_general_event(int idx) 590 { 591 const struct loongarch_perf_event *pev; 592 593 pev = &(*loongarch_pmu.general_event_map)[idx]; 594 595 if (pev->event_id == HW_OP_UNSUPPORTED) 596 return ERR_PTR(-ENOENT); 597 598 return pev; 599 } 600 601 static const struct loongarch_perf_event *loongarch_pmu_map_cache_event(u64 config) 602 { 603 unsigned int cache_type, cache_op, cache_result; 604 const struct loongarch_perf_event *pev; 605 606 cache_type = (config >> 0) & 0xff; 607 if (cache_type >= PERF_COUNT_HW_CACHE_MAX) 608 return ERR_PTR(-EINVAL); 609 610 cache_op = (config >> 8) & 0xff; 611 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) 612 return ERR_PTR(-EINVAL); 613 614 cache_result = (config >> 16) & 0xff; 615 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 616 return ERR_PTR(-EINVAL); 617 618 pev = &((*loongarch_pmu.cache_event_map) 619 [cache_type] 620 [cache_op] 621 [cache_result]); 622 623 if (pev->event_id == CACHE_OP_UNSUPPORTED) 624 return ERR_PTR(-ENOENT); 625 626 return pev; 627 } 628 629 static inline bool loongarch_pmu_event_requires_counter(const struct perf_event *event) 630 { 631 switch (event->attr.type) { 632 case PERF_TYPE_HARDWARE: 633 case PERF_TYPE_HW_CACHE: 634 case PERF_TYPE_RAW: 635 return true; 636 default: 637 return false; 638 } 639 } 640 641 static int validate_group(struct perf_event *event) 642 { 643 struct cpu_hw_events fake_cpuc; 644 struct perf_event *sibling, *leader = event->group_leader; 645 646 memset(&fake_cpuc, 0, sizeof(fake_cpuc)); 647 648 if (loongarch_pmu_event_requires_counter(leader) && 649 loongarch_pmu_alloc_counter(&fake_cpuc, &leader->hw) < 0) 650 return -EINVAL; 651 652 for_each_sibling_event(sibling, leader) { 653 if (loongarch_pmu_event_requires_counter(sibling) && 654 loongarch_pmu_alloc_counter(&fake_cpuc, &sibling->hw) < 0) 655 return -EINVAL; 656 } 657 658 if (loongarch_pmu_event_requires_counter(event) && 659 loongarch_pmu_alloc_counter(&fake_cpuc, &event->hw) < 0) 660 return -EINVAL; 661 662 return 0; 663 } 664 665 static void reset_counters(void *arg) 666 { 667 int n; 668 int counters = loongarch_pmu.num_counters; 669 670 for (n = 0; n < counters; n++) { 671 loongarch_pmu_write_control(n, 0); 672 loongarch_pmu.write_counter(n, 0); 673 } 674 } 675 676 static const struct loongarch_perf_event loongson_event_map[PERF_COUNT_HW_MAX] = { 677 PERF_MAP_ALL_UNSUPPORTED, 678 [PERF_COUNT_HW_CPU_CYCLES] = { 0x00 }, 679 [PERF_COUNT_HW_INSTRUCTIONS] = { 0x01 }, 680 [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x08 }, 681 [PERF_COUNT_HW_CACHE_MISSES] = { 0x09 }, 682 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x02 }, 683 [PERF_COUNT_HW_BRANCH_MISSES] = { 0x03 }, 684 }; 685 686 static const struct loongarch_perf_event loongson_cache_map 687 [PERF_COUNT_HW_CACHE_MAX] 688 [PERF_COUNT_HW_CACHE_OP_MAX] 689 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 690 PERF_CACHE_MAP_ALL_UNSUPPORTED, 691 [C(L1D)] = { 692 /* 693 * Like some other architectures (e.g. ARM), the performance 694 * counters don't differentiate between read and write 695 * accesses/misses, so this isn't strictly correct, but it's the 696 * best we can do. Writes and reads get combined. 697 */ 698 [C(OP_READ)] = { 699 [C(RESULT_ACCESS)] = { 0x8 }, 700 [C(RESULT_MISS)] = { 0x9 }, 701 }, 702 [C(OP_WRITE)] = { 703 [C(RESULT_ACCESS)] = { 0x8 }, 704 [C(RESULT_MISS)] = { 0x9 }, 705 }, 706 [C(OP_PREFETCH)] = { 707 [C(RESULT_ACCESS)] = { 0xaa }, 708 [C(RESULT_MISS)] = { 0xa9 }, 709 }, 710 }, 711 [C(L1I)] = { 712 [C(OP_READ)] = { 713 [C(RESULT_ACCESS)] = { 0x6 }, 714 [C(RESULT_MISS)] = { 0x7 }, 715 }, 716 }, 717 [C(LL)] = { 718 [C(OP_READ)] = { 719 [C(RESULT_ACCESS)] = { 0xc }, 720 [C(RESULT_MISS)] = { 0xd }, 721 }, 722 [C(OP_WRITE)] = { 723 [C(RESULT_ACCESS)] = { 0xc }, 724 [C(RESULT_MISS)] = { 0xd }, 725 }, 726 }, 727 [C(ITLB)] = { 728 [C(OP_READ)] = { 729 [C(RESULT_MISS)] = { 0x3b }, 730 }, 731 }, 732 [C(DTLB)] = { 733 [C(OP_READ)] = { 734 [C(RESULT_ACCESS)] = { 0x4 }, 735 [C(RESULT_MISS)] = { 0x3c }, 736 }, 737 [C(OP_WRITE)] = { 738 [C(RESULT_ACCESS)] = { 0x4 }, 739 [C(RESULT_MISS)] = { 0x3c }, 740 }, 741 }, 742 [C(BPU)] = { 743 /* Using the same code for *HW_BRANCH* */ 744 [C(OP_READ)] = { 745 [C(RESULT_ACCESS)] = { 0x02 }, 746 [C(RESULT_MISS)] = { 0x03 }, 747 }, 748 }, 749 }; 750 751 static int __hw_perf_event_init(struct perf_event *event) 752 { 753 int err; 754 struct hw_perf_event *hwc = &event->hw; 755 struct perf_event_attr *attr = &event->attr; 756 const struct loongarch_perf_event *pev; 757 758 /* Returning LoongArch event descriptor for generic perf event. */ 759 if (PERF_TYPE_HARDWARE == event->attr.type) { 760 if (event->attr.config >= PERF_COUNT_HW_MAX) 761 return -EINVAL; 762 pev = loongarch_pmu_map_general_event(event->attr.config); 763 } else if (PERF_TYPE_HW_CACHE == event->attr.type) { 764 pev = loongarch_pmu_map_cache_event(event->attr.config); 765 } else if (PERF_TYPE_RAW == event->attr.type) { 766 /* We are working on the global raw event. */ 767 mutex_lock(&raw_event_mutex); 768 pev = loongarch_pmu.map_raw_event(event->attr.config); 769 } else { 770 /* The event type is not (yet) supported. */ 771 return -EOPNOTSUPP; 772 } 773 774 if (IS_ERR(pev)) { 775 if (PERF_TYPE_RAW == event->attr.type) 776 mutex_unlock(&raw_event_mutex); 777 return PTR_ERR(pev); 778 } 779 780 /* 781 * We allow max flexibility on how each individual counter shared 782 * by the single CPU operates (the mode exclusion and the range). 783 */ 784 hwc->config_base = CSR_PERFCTRL_IE; 785 786 hwc->event_base = loongarch_pmu_perf_event_encode(pev); 787 if (PERF_TYPE_RAW == event->attr.type) 788 mutex_unlock(&raw_event_mutex); 789 790 if (!attr->exclude_user) { 791 hwc->config_base |= CSR_PERFCTRL_PLV3; 792 hwc->config_base |= CSR_PERFCTRL_PLV2; 793 } 794 if (!attr->exclude_kernel) { 795 hwc->config_base |= CSR_PERFCTRL_PLV0; 796 } 797 if (!attr->exclude_hv) { 798 hwc->config_base |= CSR_PERFCTRL_PLV1; 799 } 800 801 hwc->config_base &= M_PERFCTL_CONFIG_MASK; 802 /* 803 * The event can belong to another cpu. We do not assign a local 804 * counter for it for now. 805 */ 806 hwc->idx = -1; 807 hwc->config = 0; 808 809 if (!hwc->sample_period) { 810 hwc->sample_period = loongarch_pmu.max_period; 811 hwc->last_period = hwc->sample_period; 812 local64_set(&hwc->period_left, hwc->sample_period); 813 } 814 815 err = 0; 816 if (event->group_leader != event) 817 err = validate_group(event); 818 819 event->destroy = hw_perf_event_destroy; 820 821 if (err) 822 event->destroy(event); 823 824 return err; 825 } 826 827 static void pause_local_counters(void) 828 { 829 unsigned long flags; 830 int ctr = loongarch_pmu.num_counters; 831 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 832 833 local_irq_save(flags); 834 do { 835 ctr--; 836 cpuc->saved_ctrl[ctr] = loongarch_pmu_read_control(ctr); 837 loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr] & 838 ~M_PERFCTL_COUNT_EVENT_WHENEVER); 839 } while (ctr > 0); 840 local_irq_restore(flags); 841 } 842 843 static void resume_local_counters(void) 844 { 845 int ctr = loongarch_pmu.num_counters; 846 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 847 848 do { 849 ctr--; 850 loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr]); 851 } while (ctr > 0); 852 } 853 854 static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config) 855 { 856 raw_event.event_id = M_PERFCTL_EVENT(config); 857 858 return &raw_event; 859 } 860 861 static int __init init_hw_perf_events(void) 862 { 863 int bits, counters; 864 865 if (!cpu_has_pmp) 866 return -ENODEV; 867 868 pr_info("Performance counters: "); 869 bits = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMBITS) >> CPUCFG6_PMBITS_SHIFT) + 1; 870 counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT) + 1; 871 872 loongarch_pmu.num_counters = counters; 873 loongarch_pmu.max_period = (1ULL << 63) - 1; 874 loongarch_pmu.valid_count = (1ULL << 63) - 1; 875 loongarch_pmu.overflow = 1ULL << 63; 876 loongarch_pmu.name = "loongarch/loongson64"; 877 loongarch_pmu.read_counter = loongarch_pmu_read_counter; 878 loongarch_pmu.write_counter = loongarch_pmu_write_counter; 879 loongarch_pmu.map_raw_event = loongarch_pmu_map_raw_event; 880 loongarch_pmu.general_event_map = &loongson_event_map; 881 loongarch_pmu.cache_event_map = &loongson_cache_map; 882 883 on_each_cpu(reset_counters, NULL, 1); 884 885 pr_cont("%s PMU enabled, %d %d-bit counters available to each CPU.\n", 886 loongarch_pmu.name, counters, bits); 887 888 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); 889 890 return 0; 891 } 892 pure_initcall(init_hw_perf_events); 893