1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Linux performance counter support for LoongArch. 4 * 5 * Copyright (C) 2022 Loongson Technology Corporation Limited 6 * 7 * Derived from MIPS: 8 * Copyright (C) 2010 MIPS Technologies, Inc. 9 * Copyright (C) 2011 Cavium Networks, Inc. 10 * Author: Deng-Cheng Zhu 11 */ 12 13 #include <linux/cpumask.h> 14 #include <linux/interrupt.h> 15 #include <linux/smp.h> 16 #include <linux/kernel.h> 17 #include <linux/perf_event.h> 18 #include <linux/uaccess.h> 19 #include <linux/sched/task_stack.h> 20 21 #include <asm/irq.h> 22 #include <asm/irq_regs.h> 23 #include <asm/stacktrace.h> 24 #include <asm/unwind.h> 25 26 /* 27 * Get the return address for a single stackframe and return a pointer to the 28 * next frame tail. 29 */ 30 static unsigned long 31 user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp) 32 { 33 unsigned long err; 34 unsigned long __user *user_frame_tail; 35 struct stack_frame buftail; 36 37 user_frame_tail = (unsigned long __user *)(fp - sizeof(struct stack_frame)); 38 39 /* Also check accessibility of one struct frame_tail beyond */ 40 if (!access_ok(user_frame_tail, sizeof(buftail))) 41 return 0; 42 43 pagefault_disable(); 44 err = __copy_from_user_inatomic(&buftail, user_frame_tail, sizeof(buftail)); 45 pagefault_enable(); 46 47 if (err || (unsigned long)user_frame_tail >= buftail.fp) 48 return 0; 49 50 perf_callchain_store(entry, buftail.ra); 51 52 return buftail.fp; 53 } 54 55 void perf_callchain_user(struct perf_callchain_entry_ctx *entry, 56 struct pt_regs *regs) 57 { 58 unsigned long fp; 59 60 if (perf_guest_state()) { 61 /* We don't support guest os callchain now */ 62 return; 63 } 64 65 perf_callchain_store(entry, regs->csr_era); 66 67 fp = regs->regs[22]; 68 69 while (entry->nr < entry->max_stack && fp && !((unsigned long)fp & 0xf)) 70 fp = user_backtrace(entry, fp); 71 } 72 73 void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, 74 struct pt_regs *regs) 75 { 76 struct unwind_state state; 77 unsigned long addr; 78 79 for (unwind_start(&state, current, regs); 80 !unwind_done(&state); unwind_next_frame(&state)) { 81 addr = unwind_get_return_address(&state); 82 if (!addr || perf_callchain_store(entry, addr)) 83 return; 84 } 85 } 86 87 #define LOONGARCH_MAX_HWEVENTS 32 88 89 struct cpu_hw_events { 90 /* Array of events on this cpu. */ 91 struct perf_event *events[LOONGARCH_MAX_HWEVENTS]; 92 93 /* 94 * Set the bit (indexed by the counter number) when the counter 95 * is used for an event. 96 */ 97 unsigned long used_mask[BITS_TO_LONGS(LOONGARCH_MAX_HWEVENTS)]; 98 99 /* 100 * Software copy of the control register for each performance counter. 101 */ 102 unsigned int saved_ctrl[LOONGARCH_MAX_HWEVENTS]; 103 }; 104 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { 105 .saved_ctrl = {0}, 106 }; 107 108 /* The description of LoongArch performance events. */ 109 struct loongarch_perf_event { 110 unsigned int event_id; 111 }; 112 113 static struct loongarch_perf_event raw_event; 114 static DEFINE_MUTEX(raw_event_mutex); 115 116 #define C(x) PERF_COUNT_HW_CACHE_##x 117 #define HW_OP_UNSUPPORTED 0xffffffff 118 #define CACHE_OP_UNSUPPORTED 0xffffffff 119 120 #define PERF_MAP_ALL_UNSUPPORTED \ 121 [0 ... PERF_COUNT_HW_MAX - 1] = {HW_OP_UNSUPPORTED} 122 123 #define PERF_CACHE_MAP_ALL_UNSUPPORTED \ 124 [0 ... C(MAX) - 1] = { \ 125 [0 ... C(OP_MAX) - 1] = { \ 126 [0 ... C(RESULT_MAX) - 1] = {CACHE_OP_UNSUPPORTED}, \ 127 }, \ 128 } 129 130 struct loongarch_pmu { 131 u64 max_period; 132 u64 valid_count; 133 u64 overflow; 134 const char *name; 135 unsigned int num_counters; 136 u64 (*read_counter)(unsigned int idx); 137 void (*write_counter)(unsigned int idx, u64 val); 138 const struct loongarch_perf_event *(*map_raw_event)(u64 config); 139 const struct loongarch_perf_event (*general_event_map)[PERF_COUNT_HW_MAX]; 140 const struct loongarch_perf_event (*cache_event_map) 141 [PERF_COUNT_HW_CACHE_MAX] 142 [PERF_COUNT_HW_CACHE_OP_MAX] 143 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 144 }; 145 146 static struct loongarch_pmu loongarch_pmu; 147 148 #define M_PERFCTL_EVENT(event) (event & CSR_PERFCTRL_EVENT) 149 150 #define M_PERFCTL_COUNT_EVENT_WHENEVER (CSR_PERFCTRL_PLV0 | \ 151 CSR_PERFCTRL_PLV1 | \ 152 CSR_PERFCTRL_PLV2 | \ 153 CSR_PERFCTRL_PLV3 | \ 154 CSR_PERFCTRL_IE) 155 156 #define M_PERFCTL_CONFIG_MASK 0x1f0000 157 158 static void pause_local_counters(void); 159 static void resume_local_counters(void); 160 161 static u64 loongarch_pmu_read_counter(unsigned int idx) 162 { 163 u64 val = -1; 164 165 switch (idx) { 166 case 0: 167 val = read_csr_perfcntr0(); 168 break; 169 case 1: 170 val = read_csr_perfcntr1(); 171 break; 172 case 2: 173 val = read_csr_perfcntr2(); 174 break; 175 case 3: 176 val = read_csr_perfcntr3(); 177 break; 178 default: 179 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); 180 return 0; 181 } 182 183 return val; 184 } 185 186 static void loongarch_pmu_write_counter(unsigned int idx, u64 val) 187 { 188 switch (idx) { 189 case 0: 190 write_csr_perfcntr0(val); 191 return; 192 case 1: 193 write_csr_perfcntr1(val); 194 return; 195 case 2: 196 write_csr_perfcntr2(val); 197 return; 198 case 3: 199 write_csr_perfcntr3(val); 200 return; 201 default: 202 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); 203 return; 204 } 205 } 206 207 static unsigned int loongarch_pmu_read_control(unsigned int idx) 208 { 209 unsigned int val = -1; 210 211 switch (idx) { 212 case 0: 213 val = read_csr_perfctrl0(); 214 break; 215 case 1: 216 val = read_csr_perfctrl1(); 217 break; 218 case 2: 219 val = read_csr_perfctrl2(); 220 break; 221 case 3: 222 val = read_csr_perfctrl3(); 223 break; 224 default: 225 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); 226 return 0; 227 } 228 229 return val; 230 } 231 232 static void loongarch_pmu_write_control(unsigned int idx, unsigned int val) 233 { 234 switch (idx) { 235 case 0: 236 write_csr_perfctrl0(val); 237 return; 238 case 1: 239 write_csr_perfctrl1(val); 240 return; 241 case 2: 242 write_csr_perfctrl2(val); 243 return; 244 case 3: 245 write_csr_perfctrl3(val); 246 return; 247 default: 248 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); 249 return; 250 } 251 } 252 253 static int loongarch_pmu_alloc_counter(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) 254 { 255 int i; 256 257 for (i = 0; i < loongarch_pmu.num_counters; i++) { 258 if (!test_and_set_bit(i, cpuc->used_mask)) 259 return i; 260 } 261 262 return -EAGAIN; 263 } 264 265 static void loongarch_pmu_enable_event(struct hw_perf_event *evt, int idx) 266 { 267 unsigned int cpu; 268 struct perf_event *event = container_of(evt, struct perf_event, hw); 269 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 270 271 WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); 272 273 /* Make sure interrupt enabled. */ 274 cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base) | 275 (evt->config_base & M_PERFCTL_CONFIG_MASK) | CSR_PERFCTRL_IE; 276 277 cpu = (event->cpu >= 0) ? event->cpu : smp_processor_id(); 278 279 /* 280 * We do not actually let the counter run. Leave it until start(). 281 */ 282 pr_debug("Enabling perf counter for CPU%d\n", cpu); 283 } 284 285 static void loongarch_pmu_disable_event(int idx) 286 { 287 unsigned long flags; 288 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 289 290 WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); 291 292 local_irq_save(flags); 293 cpuc->saved_ctrl[idx] = loongarch_pmu_read_control(idx) & 294 ~M_PERFCTL_COUNT_EVENT_WHENEVER; 295 loongarch_pmu_write_control(idx, cpuc->saved_ctrl[idx]); 296 local_irq_restore(flags); 297 } 298 299 static int loongarch_pmu_event_set_period(struct perf_event *event, 300 struct hw_perf_event *hwc, 301 int idx) 302 { 303 int ret = 0; 304 u64 left = local64_read(&hwc->period_left); 305 u64 period = hwc->sample_period; 306 307 if (unlikely((left + period) & (1ULL << 63))) { 308 /* left underflowed by more than period. */ 309 left = period; 310 local64_set(&hwc->period_left, left); 311 hwc->last_period = period; 312 ret = 1; 313 } else if (unlikely((left + period) <= period)) { 314 /* left underflowed by less than period. */ 315 left += period; 316 local64_set(&hwc->period_left, left); 317 hwc->last_period = period; 318 ret = 1; 319 } 320 321 if (left > loongarch_pmu.max_period) { 322 left = loongarch_pmu.max_period; 323 local64_set(&hwc->period_left, left); 324 } 325 326 local64_set(&hwc->prev_count, loongarch_pmu.overflow - left); 327 328 loongarch_pmu.write_counter(idx, loongarch_pmu.overflow - left); 329 330 perf_event_update_userpage(event); 331 332 return ret; 333 } 334 335 static void loongarch_pmu_event_update(struct perf_event *event, 336 struct hw_perf_event *hwc, 337 int idx) 338 { 339 u64 delta; 340 u64 prev_raw_count, new_raw_count; 341 342 again: 343 prev_raw_count = local64_read(&hwc->prev_count); 344 new_raw_count = loongarch_pmu.read_counter(idx); 345 346 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 347 new_raw_count) != prev_raw_count) 348 goto again; 349 350 delta = new_raw_count - prev_raw_count; 351 352 local64_add(delta, &event->count); 353 local64_sub(delta, &hwc->period_left); 354 } 355 356 static void loongarch_pmu_start(struct perf_event *event, int flags) 357 { 358 struct hw_perf_event *hwc = &event->hw; 359 360 if (flags & PERF_EF_RELOAD) 361 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); 362 363 hwc->state = 0; 364 365 /* Set the period for the event. */ 366 loongarch_pmu_event_set_period(event, hwc, hwc->idx); 367 368 /* Enable the event. */ 369 loongarch_pmu_enable_event(hwc, hwc->idx); 370 } 371 372 static void loongarch_pmu_stop(struct perf_event *event, int flags) 373 { 374 struct hw_perf_event *hwc = &event->hw; 375 376 if (!(hwc->state & PERF_HES_STOPPED)) { 377 /* We are working on a local event. */ 378 loongarch_pmu_disable_event(hwc->idx); 379 barrier(); 380 loongarch_pmu_event_update(event, hwc, hwc->idx); 381 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; 382 } 383 } 384 385 static int loongarch_pmu_add(struct perf_event *event, int flags) 386 { 387 int idx, err = 0; 388 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 389 struct hw_perf_event *hwc = &event->hw; 390 391 perf_pmu_disable(event->pmu); 392 393 /* To look for a free counter for this event. */ 394 idx = loongarch_pmu_alloc_counter(cpuc, hwc); 395 if (idx < 0) { 396 err = idx; 397 goto out; 398 } 399 400 /* 401 * If there is an event in the counter we are going to use then 402 * make sure it is disabled. 403 */ 404 event->hw.idx = idx; 405 loongarch_pmu_disable_event(idx); 406 cpuc->events[idx] = event; 407 408 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; 409 if (flags & PERF_EF_START) 410 loongarch_pmu_start(event, PERF_EF_RELOAD); 411 412 /* Propagate our changes to the userspace mapping. */ 413 perf_event_update_userpage(event); 414 415 out: 416 perf_pmu_enable(event->pmu); 417 return err; 418 } 419 420 static void loongarch_pmu_del(struct perf_event *event, int flags) 421 { 422 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 423 struct hw_perf_event *hwc = &event->hw; 424 int idx = hwc->idx; 425 426 WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); 427 428 loongarch_pmu_stop(event, PERF_EF_UPDATE); 429 cpuc->events[idx] = NULL; 430 clear_bit(idx, cpuc->used_mask); 431 432 perf_event_update_userpage(event); 433 } 434 435 static void loongarch_pmu_read(struct perf_event *event) 436 { 437 struct hw_perf_event *hwc = &event->hw; 438 439 /* Don't read disabled counters! */ 440 if (hwc->idx < 0) 441 return; 442 443 loongarch_pmu_event_update(event, hwc, hwc->idx); 444 } 445 446 static void loongarch_pmu_enable(struct pmu *pmu) 447 { 448 resume_local_counters(); 449 } 450 451 static void loongarch_pmu_disable(struct pmu *pmu) 452 { 453 pause_local_counters(); 454 } 455 456 static DEFINE_MUTEX(pmu_reserve_mutex); 457 static atomic_t active_events = ATOMIC_INIT(0); 458 459 static void reset_counters(void *arg); 460 static int __hw_perf_event_init(struct perf_event *event); 461 462 static void hw_perf_event_destroy(struct perf_event *event) 463 { 464 if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) { 465 on_each_cpu(reset_counters, NULL, 1); 466 free_irq(get_percpu_irq(INT_PCOV), &loongarch_pmu); 467 mutex_unlock(&pmu_reserve_mutex); 468 } 469 } 470 471 static void handle_associated_event(struct cpu_hw_events *cpuc, int idx, 472 struct perf_sample_data *data, struct pt_regs *regs) 473 { 474 struct perf_event *event = cpuc->events[idx]; 475 struct hw_perf_event *hwc = &event->hw; 476 477 loongarch_pmu_event_update(event, hwc, idx); 478 data->period = event->hw.last_period; 479 if (!loongarch_pmu_event_set_period(event, hwc, idx)) 480 return; 481 482 if (perf_event_overflow(event, data, regs)) 483 loongarch_pmu_disable_event(idx); 484 } 485 486 static irqreturn_t pmu_handle_irq(int irq, void *dev) 487 { 488 int n; 489 int handled = IRQ_NONE; 490 uint64_t counter; 491 struct pt_regs *regs; 492 struct perf_sample_data data; 493 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 494 495 /* 496 * First we pause the local counters, so that when we are locked 497 * here, the counters are all paused. When it gets locked due to 498 * perf_disable(), the timer interrupt handler will be delayed. 499 * 500 * See also loongarch_pmu_start(). 501 */ 502 pause_local_counters(); 503 504 regs = get_irq_regs(); 505 506 perf_sample_data_init(&data, 0, 0); 507 508 for (n = 0; n < loongarch_pmu.num_counters; n++) { 509 if (test_bit(n, cpuc->used_mask)) { 510 counter = loongarch_pmu.read_counter(n); 511 if (counter & loongarch_pmu.overflow) { 512 handle_associated_event(cpuc, n, &data, regs); 513 handled = IRQ_HANDLED; 514 } 515 } 516 } 517 518 resume_local_counters(); 519 520 /* 521 * Do all the work for the pending perf events. We can do this 522 * in here because the performance counter interrupt is a regular 523 * interrupt, not NMI. 524 */ 525 if (handled == IRQ_HANDLED) 526 irq_work_run(); 527 528 return handled; 529 } 530 531 static int loongarch_pmu_event_init(struct perf_event *event) 532 { 533 int r, irq; 534 unsigned long flags; 535 536 /* does not support taken branch sampling */ 537 if (has_branch_stack(event)) 538 return -EOPNOTSUPP; 539 540 switch (event->attr.type) { 541 case PERF_TYPE_RAW: 542 case PERF_TYPE_HARDWARE: 543 case PERF_TYPE_HW_CACHE: 544 break; 545 546 default: 547 /* Init it to avoid false validate_group */ 548 event->hw.event_base = 0xffffffff; 549 return -ENOENT; 550 } 551 552 if (event->cpu >= 0 && !cpu_online(event->cpu)) 553 return -ENODEV; 554 555 irq = get_percpu_irq(INT_PCOV); 556 flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_SUSPEND | IRQF_SHARED; 557 if (!atomic_inc_not_zero(&active_events)) { 558 mutex_lock(&pmu_reserve_mutex); 559 if (atomic_read(&active_events) == 0) { 560 r = request_irq(irq, pmu_handle_irq, flags, "Perf_PMU", &loongarch_pmu); 561 if (r < 0) { 562 mutex_unlock(&pmu_reserve_mutex); 563 pr_warn("PMU IRQ request failed\n"); 564 return -ENODEV; 565 } 566 } 567 atomic_inc(&active_events); 568 mutex_unlock(&pmu_reserve_mutex); 569 } 570 571 return __hw_perf_event_init(event); 572 } 573 574 static struct pmu pmu = { 575 .pmu_enable = loongarch_pmu_enable, 576 .pmu_disable = loongarch_pmu_disable, 577 .event_init = loongarch_pmu_event_init, 578 .add = loongarch_pmu_add, 579 .del = loongarch_pmu_del, 580 .start = loongarch_pmu_start, 581 .stop = loongarch_pmu_stop, 582 .read = loongarch_pmu_read, 583 }; 584 585 static unsigned int loongarch_pmu_perf_event_encode(const struct loongarch_perf_event *pev) 586 { 587 return M_PERFCTL_EVENT(pev->event_id); 588 } 589 590 static const struct loongarch_perf_event *loongarch_pmu_map_general_event(int idx) 591 { 592 const struct loongarch_perf_event *pev; 593 594 pev = &(*loongarch_pmu.general_event_map)[idx]; 595 596 if (pev->event_id == HW_OP_UNSUPPORTED) 597 return ERR_PTR(-ENOENT); 598 599 return pev; 600 } 601 602 static const struct loongarch_perf_event *loongarch_pmu_map_cache_event(u64 config) 603 { 604 unsigned int cache_type, cache_op, cache_result; 605 const struct loongarch_perf_event *pev; 606 607 cache_type = (config >> 0) & 0xff; 608 if (cache_type >= PERF_COUNT_HW_CACHE_MAX) 609 return ERR_PTR(-EINVAL); 610 611 cache_op = (config >> 8) & 0xff; 612 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) 613 return ERR_PTR(-EINVAL); 614 615 cache_result = (config >> 16) & 0xff; 616 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 617 return ERR_PTR(-EINVAL); 618 619 pev = &((*loongarch_pmu.cache_event_map) 620 [cache_type] 621 [cache_op] 622 [cache_result]); 623 624 if (pev->event_id == CACHE_OP_UNSUPPORTED) 625 return ERR_PTR(-ENOENT); 626 627 return pev; 628 } 629 630 static int validate_group(struct perf_event *event) 631 { 632 struct cpu_hw_events fake_cpuc; 633 struct perf_event *sibling, *leader = event->group_leader; 634 635 memset(&fake_cpuc, 0, sizeof(fake_cpuc)); 636 637 if (loongarch_pmu_alloc_counter(&fake_cpuc, &leader->hw) < 0) 638 return -EINVAL; 639 640 for_each_sibling_event(sibling, leader) { 641 if (loongarch_pmu_alloc_counter(&fake_cpuc, &sibling->hw) < 0) 642 return -EINVAL; 643 } 644 645 if (loongarch_pmu_alloc_counter(&fake_cpuc, &event->hw) < 0) 646 return -EINVAL; 647 648 return 0; 649 } 650 651 static void reset_counters(void *arg) 652 { 653 int n; 654 int counters = loongarch_pmu.num_counters; 655 656 for (n = 0; n < counters; n++) { 657 loongarch_pmu_write_control(n, 0); 658 loongarch_pmu.write_counter(n, 0); 659 } 660 } 661 662 static const struct loongarch_perf_event loongson_event_map[PERF_COUNT_HW_MAX] = { 663 PERF_MAP_ALL_UNSUPPORTED, 664 [PERF_COUNT_HW_CPU_CYCLES] = { 0x00 }, 665 [PERF_COUNT_HW_INSTRUCTIONS] = { 0x01 }, 666 [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x08 }, 667 [PERF_COUNT_HW_CACHE_MISSES] = { 0x09 }, 668 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x02 }, 669 [PERF_COUNT_HW_BRANCH_MISSES] = { 0x03 }, 670 }; 671 672 static const struct loongarch_perf_event loongson_cache_map 673 [PERF_COUNT_HW_CACHE_MAX] 674 [PERF_COUNT_HW_CACHE_OP_MAX] 675 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 676 PERF_CACHE_MAP_ALL_UNSUPPORTED, 677 [C(L1D)] = { 678 /* 679 * Like some other architectures (e.g. ARM), the performance 680 * counters don't differentiate between read and write 681 * accesses/misses, so this isn't strictly correct, but it's the 682 * best we can do. Writes and reads get combined. 683 */ 684 [C(OP_READ)] = { 685 [C(RESULT_ACCESS)] = { 0x8 }, 686 [C(RESULT_MISS)] = { 0x9 }, 687 }, 688 [C(OP_WRITE)] = { 689 [C(RESULT_ACCESS)] = { 0x8 }, 690 [C(RESULT_MISS)] = { 0x9 }, 691 }, 692 [C(OP_PREFETCH)] = { 693 [C(RESULT_ACCESS)] = { 0xaa }, 694 [C(RESULT_MISS)] = { 0xa9 }, 695 }, 696 }, 697 [C(L1I)] = { 698 [C(OP_READ)] = { 699 [C(RESULT_ACCESS)] = { 0x6 }, 700 [C(RESULT_MISS)] = { 0x7 }, 701 }, 702 }, 703 [C(LL)] = { 704 [C(OP_READ)] = { 705 [C(RESULT_ACCESS)] = { 0xc }, 706 [C(RESULT_MISS)] = { 0xd }, 707 }, 708 [C(OP_WRITE)] = { 709 [C(RESULT_ACCESS)] = { 0xc }, 710 [C(RESULT_MISS)] = { 0xd }, 711 }, 712 }, 713 [C(ITLB)] = { 714 [C(OP_READ)] = { 715 [C(RESULT_MISS)] = { 0x3b }, 716 }, 717 }, 718 [C(DTLB)] = { 719 [C(OP_READ)] = { 720 [C(RESULT_ACCESS)] = { 0x4 }, 721 [C(RESULT_MISS)] = { 0x3c }, 722 }, 723 [C(OP_WRITE)] = { 724 [C(RESULT_ACCESS)] = { 0x4 }, 725 [C(RESULT_MISS)] = { 0x3c }, 726 }, 727 }, 728 [C(BPU)] = { 729 /* Using the same code for *HW_BRANCH* */ 730 [C(OP_READ)] = { 731 [C(RESULT_ACCESS)] = { 0x02 }, 732 [C(RESULT_MISS)] = { 0x03 }, 733 }, 734 }, 735 }; 736 737 static int __hw_perf_event_init(struct perf_event *event) 738 { 739 int err; 740 struct hw_perf_event *hwc = &event->hw; 741 struct perf_event_attr *attr = &event->attr; 742 const struct loongarch_perf_event *pev; 743 744 /* Returning LoongArch event descriptor for generic perf event. */ 745 if (PERF_TYPE_HARDWARE == event->attr.type) { 746 if (event->attr.config >= PERF_COUNT_HW_MAX) 747 return -EINVAL; 748 pev = loongarch_pmu_map_general_event(event->attr.config); 749 } else if (PERF_TYPE_HW_CACHE == event->attr.type) { 750 pev = loongarch_pmu_map_cache_event(event->attr.config); 751 } else if (PERF_TYPE_RAW == event->attr.type) { 752 /* We are working on the global raw event. */ 753 mutex_lock(&raw_event_mutex); 754 pev = loongarch_pmu.map_raw_event(event->attr.config); 755 } else { 756 /* The event type is not (yet) supported. */ 757 return -EOPNOTSUPP; 758 } 759 760 if (IS_ERR(pev)) { 761 if (PERF_TYPE_RAW == event->attr.type) 762 mutex_unlock(&raw_event_mutex); 763 return PTR_ERR(pev); 764 } 765 766 /* 767 * We allow max flexibility on how each individual counter shared 768 * by the single CPU operates (the mode exclusion and the range). 769 */ 770 hwc->config_base = CSR_PERFCTRL_IE; 771 772 hwc->event_base = loongarch_pmu_perf_event_encode(pev); 773 if (PERF_TYPE_RAW == event->attr.type) 774 mutex_unlock(&raw_event_mutex); 775 776 if (!attr->exclude_user) { 777 hwc->config_base |= CSR_PERFCTRL_PLV3; 778 hwc->config_base |= CSR_PERFCTRL_PLV2; 779 } 780 if (!attr->exclude_kernel) { 781 hwc->config_base |= CSR_PERFCTRL_PLV0; 782 } 783 if (!attr->exclude_hv) { 784 hwc->config_base |= CSR_PERFCTRL_PLV1; 785 } 786 787 hwc->config_base &= M_PERFCTL_CONFIG_MASK; 788 /* 789 * The event can belong to another cpu. We do not assign a local 790 * counter for it for now. 791 */ 792 hwc->idx = -1; 793 hwc->config = 0; 794 795 if (!hwc->sample_period) { 796 hwc->sample_period = loongarch_pmu.max_period; 797 hwc->last_period = hwc->sample_period; 798 local64_set(&hwc->period_left, hwc->sample_period); 799 } 800 801 err = 0; 802 if (event->group_leader != event) 803 err = validate_group(event); 804 805 event->destroy = hw_perf_event_destroy; 806 807 if (err) 808 event->destroy(event); 809 810 return err; 811 } 812 813 static void pause_local_counters(void) 814 { 815 unsigned long flags; 816 int ctr = loongarch_pmu.num_counters; 817 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 818 819 local_irq_save(flags); 820 do { 821 ctr--; 822 cpuc->saved_ctrl[ctr] = loongarch_pmu_read_control(ctr); 823 loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr] & 824 ~M_PERFCTL_COUNT_EVENT_WHENEVER); 825 } while (ctr > 0); 826 local_irq_restore(flags); 827 } 828 829 static void resume_local_counters(void) 830 { 831 int ctr = loongarch_pmu.num_counters; 832 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 833 834 do { 835 ctr--; 836 loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr]); 837 } while (ctr > 0); 838 } 839 840 static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config) 841 { 842 raw_event.event_id = M_PERFCTL_EVENT(config); 843 844 return &raw_event; 845 } 846 847 static int __init init_hw_perf_events(void) 848 { 849 int counters; 850 851 if (!cpu_has_pmp) 852 return -ENODEV; 853 854 pr_info("Performance counters: "); 855 counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> 4) + 1; 856 857 loongarch_pmu.num_counters = counters; 858 loongarch_pmu.max_period = (1ULL << 63) - 1; 859 loongarch_pmu.valid_count = (1ULL << 63) - 1; 860 loongarch_pmu.overflow = 1ULL << 63; 861 loongarch_pmu.name = "loongarch/loongson64"; 862 loongarch_pmu.read_counter = loongarch_pmu_read_counter; 863 loongarch_pmu.write_counter = loongarch_pmu_write_counter; 864 loongarch_pmu.map_raw_event = loongarch_pmu_map_raw_event; 865 loongarch_pmu.general_event_map = &loongson_event_map; 866 loongarch_pmu.cache_event_map = &loongson_cache_map; 867 868 on_each_cpu(reset_counters, NULL, 1); 869 870 pr_cont("%s PMU enabled, %d %d-bit counters available to each CPU.\n", 871 loongarch_pmu.name, counters, 64); 872 873 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); 874 875 return 0; 876 } 877 pure_initcall(init_hw_perf_events); 878