1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Linux performance counter support for LoongArch. 4 * 5 * Copyright (C) 2022 Loongson Technology Corporation Limited 6 * 7 * Derived from MIPS: 8 * Copyright (C) 2010 MIPS Technologies, Inc. 9 * Copyright (C) 2011 Cavium Networks, Inc. 10 * Author: Deng-Cheng Zhu 11 */ 12 13 #include <linux/cpumask.h> 14 #include <linux/interrupt.h> 15 #include <linux/smp.h> 16 #include <linux/kernel.h> 17 #include <linux/perf_event.h> 18 #include <linux/uaccess.h> 19 #include <linux/sched/task_stack.h> 20 21 #include <asm/irq.h> 22 #include <asm/irq_regs.h> 23 #include <asm/stacktrace.h> 24 #include <asm/unwind.h> 25 26 /* 27 * Get the return address for a single stackframe and return a pointer to the 28 * next frame tail. 29 */ 30 static unsigned long 31 user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp) 32 { 33 unsigned long err; 34 unsigned long __user *user_frame_tail; 35 struct stack_frame buftail; 36 37 user_frame_tail = (unsigned long __user *)(fp - sizeof(struct stack_frame)); 38 39 /* Also check accessibility of one struct frame_tail beyond */ 40 if (!access_ok(user_frame_tail, sizeof(buftail))) 41 return 0; 42 43 pagefault_disable(); 44 err = __copy_from_user_inatomic(&buftail, user_frame_tail, sizeof(buftail)); 45 pagefault_enable(); 46 47 if (err || (unsigned long)user_frame_tail >= buftail.fp) 48 return 0; 49 50 perf_callchain_store(entry, buftail.ra); 51 52 return buftail.fp; 53 } 54 55 void perf_callchain_user(struct perf_callchain_entry_ctx *entry, 56 struct pt_regs *regs) 57 { 58 unsigned long fp; 59 60 if (perf_guest_state()) { 61 /* We don't support guest os callchain now */ 62 return; 63 } 64 65 perf_callchain_store(entry, regs->csr_era); 66 67 fp = regs->regs[22]; 68 69 while (entry->nr < entry->max_stack && fp && !((unsigned long)fp & 0xf)) 70 fp = user_backtrace(entry, fp); 71 } 72 73 void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, 74 struct pt_regs *regs) 75 { 76 struct unwind_state state; 77 unsigned long addr; 78 79 for (unwind_start(&state, current, regs); 80 !unwind_done(&state); unwind_next_frame(&state)) { 81 addr = unwind_get_return_address(&state); 82 if (!addr || perf_callchain_store(entry, addr)) 83 return; 84 } 85 } 86 87 #define LOONGARCH_MAX_HWEVENTS 32 88 89 struct cpu_hw_events { 90 /* Array of events on this cpu. */ 91 struct perf_event *events[LOONGARCH_MAX_HWEVENTS]; 92 93 /* 94 * Set the bit (indexed by the counter number) when the counter 95 * is used for an event. 96 */ 97 unsigned long used_mask[BITS_TO_LONGS(LOONGARCH_MAX_HWEVENTS)]; 98 99 /* 100 * Software copy of the control register for each performance counter. 101 */ 102 unsigned int saved_ctrl[LOONGARCH_MAX_HWEVENTS]; 103 }; 104 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { 105 .saved_ctrl = {0}, 106 }; 107 108 /* The description of LoongArch performance events. */ 109 struct loongarch_perf_event { 110 unsigned int event_id; 111 }; 112 113 static struct loongarch_perf_event raw_event; 114 static DEFINE_MUTEX(raw_event_mutex); 115 116 #define C(x) PERF_COUNT_HW_CACHE_##x 117 #define HW_OP_UNSUPPORTED 0xffffffff 118 #define CACHE_OP_UNSUPPORTED 0xffffffff 119 120 #define PERF_MAP_ALL_UNSUPPORTED \ 121 [0 ... PERF_COUNT_HW_MAX - 1] = {HW_OP_UNSUPPORTED} 122 123 #define PERF_CACHE_MAP_ALL_UNSUPPORTED \ 124 [0 ... C(MAX) - 1] = { \ 125 [0 ... C(OP_MAX) - 1] = { \ 126 [0 ... C(RESULT_MAX) - 1] = {CACHE_OP_UNSUPPORTED}, \ 127 }, \ 128 } 129 130 struct loongarch_pmu { 131 u64 max_period; 132 u64 valid_count; 133 u64 overflow; 134 const char *name; 135 unsigned int num_counters; 136 u64 (*read_counter)(unsigned int idx); 137 void (*write_counter)(unsigned int idx, u64 val); 138 const struct loongarch_perf_event *(*map_raw_event)(u64 config); 139 const struct loongarch_perf_event (*general_event_map)[PERF_COUNT_HW_MAX]; 140 const struct loongarch_perf_event (*cache_event_map) 141 [PERF_COUNT_HW_CACHE_MAX] 142 [PERF_COUNT_HW_CACHE_OP_MAX] 143 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 144 }; 145 146 static struct loongarch_pmu loongarch_pmu; 147 148 #define M_PERFCTL_EVENT(event) (event & CSR_PERFCTRL_EVENT) 149 150 #define M_PERFCTL_COUNT_EVENT_WHENEVER (CSR_PERFCTRL_PLV0 | \ 151 CSR_PERFCTRL_PLV1 | \ 152 CSR_PERFCTRL_PLV2 | \ 153 CSR_PERFCTRL_PLV3 | \ 154 CSR_PERFCTRL_IE) 155 156 #define M_PERFCTL_CONFIG_MASK 0x1f0000 157 158 static void pause_local_counters(void); 159 static void resume_local_counters(void); 160 161 static u64 loongarch_pmu_read_counter(unsigned int idx) 162 { 163 u64 val = -1; 164 165 switch (idx) { 166 case 0: 167 val = read_csr_perfcntr0(); 168 break; 169 case 1: 170 val = read_csr_perfcntr1(); 171 break; 172 case 2: 173 val = read_csr_perfcntr2(); 174 break; 175 case 3: 176 val = read_csr_perfcntr3(); 177 break; 178 default: 179 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); 180 return 0; 181 } 182 183 return val; 184 } 185 186 static void loongarch_pmu_write_counter(unsigned int idx, u64 val) 187 { 188 switch (idx) { 189 case 0: 190 write_csr_perfcntr0(val); 191 return; 192 case 1: 193 write_csr_perfcntr1(val); 194 return; 195 case 2: 196 write_csr_perfcntr2(val); 197 return; 198 case 3: 199 write_csr_perfcntr3(val); 200 return; 201 default: 202 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); 203 return; 204 } 205 } 206 207 static unsigned int loongarch_pmu_read_control(unsigned int idx) 208 { 209 unsigned int val = -1; 210 211 switch (idx) { 212 case 0: 213 val = read_csr_perfctrl0(); 214 break; 215 case 1: 216 val = read_csr_perfctrl1(); 217 break; 218 case 2: 219 val = read_csr_perfctrl2(); 220 break; 221 case 3: 222 val = read_csr_perfctrl3(); 223 break; 224 default: 225 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); 226 return 0; 227 } 228 229 return val; 230 } 231 232 static void loongarch_pmu_write_control(unsigned int idx, unsigned int val) 233 { 234 switch (idx) { 235 case 0: 236 write_csr_perfctrl0(val); 237 return; 238 case 1: 239 write_csr_perfctrl1(val); 240 return; 241 case 2: 242 write_csr_perfctrl2(val); 243 return; 244 case 3: 245 write_csr_perfctrl3(val); 246 return; 247 default: 248 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); 249 return; 250 } 251 } 252 253 static int loongarch_pmu_alloc_counter(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) 254 { 255 int i; 256 257 for (i = 0; i < loongarch_pmu.num_counters; i++) { 258 if (!test_and_set_bit(i, cpuc->used_mask)) 259 return i; 260 } 261 262 return -EAGAIN; 263 } 264 265 static void loongarch_pmu_enable_event(struct hw_perf_event *evt, int idx) 266 { 267 unsigned int cpu; 268 struct perf_event *event = container_of(evt, struct perf_event, hw); 269 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 270 271 WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); 272 273 /* Make sure interrupt enabled. */ 274 cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) | 275 (evt->config_base & M_PERFCTL_CONFIG_MASK) | CSR_PERFCTRL_IE; 276 277 cpu = (event->cpu >= 0) ? event->cpu : smp_processor_id(); 278 279 /* 280 * We do not actually let the counter run. Leave it until start(). 281 */ 282 pr_debug("Enabling perf counter for CPU%d\n", cpu); 283 } 284 285 static void loongarch_pmu_disable_event(int idx) 286 { 287 unsigned long flags; 288 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 289 290 WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); 291 292 local_irq_save(flags); 293 cpuc->saved_ctrl[idx] = loongarch_pmu_read_control(idx) & 294 ~M_PERFCTL_COUNT_EVENT_WHENEVER; 295 loongarch_pmu_write_control(idx, cpuc->saved_ctrl[idx]); 296 local_irq_restore(flags); 297 } 298 299 static int loongarch_pmu_event_set_period(struct perf_event *event, 300 struct hw_perf_event *hwc, 301 int idx) 302 { 303 int ret = 0; 304 u64 left = local64_read(&hwc->period_left); 305 u64 period = hwc->sample_period; 306 307 if (unlikely((left + period) & (1ULL << 63))) { 308 /* left underflowed by more than period. */ 309 left = period; 310 local64_set(&hwc->period_left, left); 311 hwc->last_period = period; 312 ret = 1; 313 } else if (unlikely((left + period) <= period)) { 314 /* left underflowed by less than period. */ 315 left += period; 316 local64_set(&hwc->period_left, left); 317 hwc->last_period = period; 318 ret = 1; 319 } 320 321 if (left > loongarch_pmu.max_period) { 322 left = loongarch_pmu.max_period; 323 local64_set(&hwc->period_left, left); 324 } 325 326 local64_set(&hwc->prev_count, loongarch_pmu.overflow - left); 327 328 loongarch_pmu.write_counter(idx, loongarch_pmu.overflow - left); 329 330 perf_event_update_userpage(event); 331 332 return ret; 333 } 334 335 static void loongarch_pmu_event_update(struct perf_event *event, 336 struct hw_perf_event *hwc, 337 int idx) 338 { 339 u64 delta; 340 u64 prev_raw_count, new_raw_count; 341 342 again: 343 prev_raw_count = local64_read(&hwc->prev_count); 344 new_raw_count = loongarch_pmu.read_counter(idx); 345 346 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 347 new_raw_count) != prev_raw_count) 348 goto again; 349 350 delta = new_raw_count - prev_raw_count; 351 352 local64_add(delta, &event->count); 353 local64_sub(delta, &hwc->period_left); 354 } 355 356 static void loongarch_pmu_start(struct perf_event *event, int flags) 357 { 358 struct hw_perf_event *hwc = &event->hw; 359 360 if (flags & PERF_EF_RELOAD) 361 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); 362 363 hwc->state = 0; 364 365 /* Set the period for the event. */ 366 loongarch_pmu_event_set_period(event, hwc, hwc->idx); 367 368 /* Enable the event. */ 369 loongarch_pmu_enable_event(hwc, hwc->idx); 370 } 371 372 static void loongarch_pmu_stop(struct perf_event *event, int flags) 373 { 374 struct hw_perf_event *hwc = &event->hw; 375 376 if (!(hwc->state & PERF_HES_STOPPED)) { 377 /* We are working on a local event. */ 378 loongarch_pmu_disable_event(hwc->idx); 379 barrier(); 380 loongarch_pmu_event_update(event, hwc, hwc->idx); 381 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; 382 } 383 } 384 385 static int loongarch_pmu_add(struct perf_event *event, int flags) 386 { 387 int idx, err = 0; 388 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 389 struct hw_perf_event *hwc = &event->hw; 390 391 perf_pmu_disable(event->pmu); 392 393 /* To look for a free counter for this event. */ 394 idx = loongarch_pmu_alloc_counter(cpuc, hwc); 395 if (idx < 0) { 396 err = idx; 397 goto out; 398 } 399 400 /* 401 * If there is an event in the counter we are going to use then 402 * make sure it is disabled. 403 */ 404 event->hw.idx = idx; 405 loongarch_pmu_disable_event(idx); 406 cpuc->events[idx] = event; 407 408 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; 409 if (flags & PERF_EF_START) 410 loongarch_pmu_start(event, PERF_EF_RELOAD); 411 412 /* Propagate our changes to the userspace mapping. */ 413 perf_event_update_userpage(event); 414 415 out: 416 perf_pmu_enable(event->pmu); 417 return err; 418 } 419 420 static void loongarch_pmu_del(struct perf_event *event, int flags) 421 { 422 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 423 struct hw_perf_event *hwc = &event->hw; 424 int idx = hwc->idx; 425 426 WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); 427 428 loongarch_pmu_stop(event, PERF_EF_UPDATE); 429 cpuc->events[idx] = NULL; 430 clear_bit(idx, cpuc->used_mask); 431 432 perf_event_update_userpage(event); 433 } 434 435 static void loongarch_pmu_read(struct perf_event *event) 436 { 437 struct hw_perf_event *hwc = &event->hw; 438 439 /* Don't read disabled counters! */ 440 if (hwc->idx < 0) 441 return; 442 443 loongarch_pmu_event_update(event, hwc, hwc->idx); 444 } 445 446 static void loongarch_pmu_enable(struct pmu *pmu) 447 { 448 resume_local_counters(); 449 } 450 451 static void loongarch_pmu_disable(struct pmu *pmu) 452 { 453 pause_local_counters(); 454 } 455 456 static DEFINE_MUTEX(pmu_reserve_mutex); 457 static atomic_t active_events = ATOMIC_INIT(0); 458 459 static int get_pmc_irq(void) 460 { 461 struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); 462 463 if (d) 464 return irq_create_mapping(d, EXCCODE_PMC - EXCCODE_INT_START); 465 466 return -EINVAL; 467 } 468 469 static void reset_counters(void *arg); 470 static int __hw_perf_event_init(struct perf_event *event); 471 472 static void hw_perf_event_destroy(struct perf_event *event) 473 { 474 if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) { 475 on_each_cpu(reset_counters, NULL, 1); 476 free_irq(get_pmc_irq(), &loongarch_pmu); 477 mutex_unlock(&pmu_reserve_mutex); 478 } 479 } 480 481 static void handle_associated_event(struct cpu_hw_events *cpuc, int idx, 482 struct perf_sample_data *data, struct pt_regs *regs) 483 { 484 struct perf_event *event = cpuc->events[idx]; 485 struct hw_perf_event *hwc = &event->hw; 486 487 loongarch_pmu_event_update(event, hwc, idx); 488 data->period = event->hw.last_period; 489 if (!loongarch_pmu_event_set_period(event, hwc, idx)) 490 return; 491 492 if (perf_event_overflow(event, data, regs)) 493 loongarch_pmu_disable_event(idx); 494 } 495 496 static irqreturn_t pmu_handle_irq(int irq, void *dev) 497 { 498 int n; 499 int handled = IRQ_NONE; 500 uint64_t counter; 501 struct pt_regs *regs; 502 struct perf_sample_data data; 503 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 504 505 /* 506 * First we pause the local counters, so that when we are locked 507 * here, the counters are all paused. When it gets locked due to 508 * perf_disable(), the timer interrupt handler will be delayed. 509 * 510 * See also loongarch_pmu_start(). 511 */ 512 pause_local_counters(); 513 514 regs = get_irq_regs(); 515 516 perf_sample_data_init(&data, 0, 0); 517 518 for (n = 0; n < loongarch_pmu.num_counters; n++) { 519 if (test_bit(n, cpuc->used_mask)) { 520 counter = loongarch_pmu.read_counter(n); 521 if (counter & loongarch_pmu.overflow) { 522 handle_associated_event(cpuc, n, &data, regs); 523 handled = IRQ_HANDLED; 524 } 525 } 526 } 527 528 resume_local_counters(); 529 530 /* 531 * Do all the work for the pending perf events. We can do this 532 * in here because the performance counter interrupt is a regular 533 * interrupt, not NMI. 534 */ 535 if (handled == IRQ_HANDLED) 536 irq_work_run(); 537 538 return handled; 539 } 540 541 static int loongarch_pmu_event_init(struct perf_event *event) 542 { 543 int r, irq; 544 unsigned long flags; 545 546 /* does not support taken branch sampling */ 547 if (has_branch_stack(event)) 548 return -EOPNOTSUPP; 549 550 switch (event->attr.type) { 551 case PERF_TYPE_RAW: 552 case PERF_TYPE_HARDWARE: 553 case PERF_TYPE_HW_CACHE: 554 break; 555 556 default: 557 /* Init it to avoid false validate_group */ 558 event->hw.event_base = 0xffffffff; 559 return -ENOENT; 560 } 561 562 if (event->cpu >= 0 && !cpu_online(event->cpu)) 563 return -ENODEV; 564 565 irq = get_pmc_irq(); 566 flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_SUSPEND | IRQF_SHARED; 567 if (!atomic_inc_not_zero(&active_events)) { 568 mutex_lock(&pmu_reserve_mutex); 569 if (atomic_read(&active_events) == 0) { 570 r = request_irq(irq, pmu_handle_irq, flags, "Perf_PMU", &loongarch_pmu); 571 if (r < 0) { 572 mutex_unlock(&pmu_reserve_mutex); 573 pr_warn("PMU IRQ request failed\n"); 574 return -ENODEV; 575 } 576 } 577 atomic_inc(&active_events); 578 mutex_unlock(&pmu_reserve_mutex); 579 } 580 581 return __hw_perf_event_init(event); 582 } 583 584 static struct pmu pmu = { 585 .pmu_enable = loongarch_pmu_enable, 586 .pmu_disable = loongarch_pmu_disable, 587 .event_init = loongarch_pmu_event_init, 588 .add = loongarch_pmu_add, 589 .del = loongarch_pmu_del, 590 .start = loongarch_pmu_start, 591 .stop = loongarch_pmu_stop, 592 .read = loongarch_pmu_read, 593 }; 594 595 static unsigned int loongarch_pmu_perf_event_encode(const struct loongarch_perf_event *pev) 596 { 597 return (pev->event_id & 0xff); 598 } 599 600 static const struct loongarch_perf_event *loongarch_pmu_map_general_event(int idx) 601 { 602 const struct loongarch_perf_event *pev; 603 604 pev = &(*loongarch_pmu.general_event_map)[idx]; 605 606 if (pev->event_id == HW_OP_UNSUPPORTED) 607 return ERR_PTR(-ENOENT); 608 609 return pev; 610 } 611 612 static const struct loongarch_perf_event *loongarch_pmu_map_cache_event(u64 config) 613 { 614 unsigned int cache_type, cache_op, cache_result; 615 const struct loongarch_perf_event *pev; 616 617 cache_type = (config >> 0) & 0xff; 618 if (cache_type >= PERF_COUNT_HW_CACHE_MAX) 619 return ERR_PTR(-EINVAL); 620 621 cache_op = (config >> 8) & 0xff; 622 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) 623 return ERR_PTR(-EINVAL); 624 625 cache_result = (config >> 16) & 0xff; 626 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 627 return ERR_PTR(-EINVAL); 628 629 pev = &((*loongarch_pmu.cache_event_map) 630 [cache_type] 631 [cache_op] 632 [cache_result]); 633 634 if (pev->event_id == CACHE_OP_UNSUPPORTED) 635 return ERR_PTR(-ENOENT); 636 637 return pev; 638 } 639 640 static int validate_group(struct perf_event *event) 641 { 642 struct cpu_hw_events fake_cpuc; 643 struct perf_event *sibling, *leader = event->group_leader; 644 645 memset(&fake_cpuc, 0, sizeof(fake_cpuc)); 646 647 if (loongarch_pmu_alloc_counter(&fake_cpuc, &leader->hw) < 0) 648 return -EINVAL; 649 650 for_each_sibling_event(sibling, leader) { 651 if (loongarch_pmu_alloc_counter(&fake_cpuc, &sibling->hw) < 0) 652 return -EINVAL; 653 } 654 655 if (loongarch_pmu_alloc_counter(&fake_cpuc, &event->hw) < 0) 656 return -EINVAL; 657 658 return 0; 659 } 660 661 static void reset_counters(void *arg) 662 { 663 int n; 664 int counters = loongarch_pmu.num_counters; 665 666 for (n = 0; n < counters; n++) { 667 loongarch_pmu_write_control(n, 0); 668 loongarch_pmu.write_counter(n, 0); 669 } 670 } 671 672 static const struct loongarch_perf_event loongson_event_map[PERF_COUNT_HW_MAX] = { 673 PERF_MAP_ALL_UNSUPPORTED, 674 [PERF_COUNT_HW_CPU_CYCLES] = { 0x00 }, 675 [PERF_COUNT_HW_INSTRUCTIONS] = { 0x01 }, 676 [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x08 }, 677 [PERF_COUNT_HW_CACHE_MISSES] = { 0x09 }, 678 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x02 }, 679 [PERF_COUNT_HW_BRANCH_MISSES] = { 0x03 }, 680 }; 681 682 static const struct loongarch_perf_event loongson_cache_map 683 [PERF_COUNT_HW_CACHE_MAX] 684 [PERF_COUNT_HW_CACHE_OP_MAX] 685 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 686 PERF_CACHE_MAP_ALL_UNSUPPORTED, 687 [C(L1D)] = { 688 /* 689 * Like some other architectures (e.g. ARM), the performance 690 * counters don't differentiate between read and write 691 * accesses/misses, so this isn't strictly correct, but it's the 692 * best we can do. Writes and reads get combined. 693 */ 694 [C(OP_READ)] = { 695 [C(RESULT_ACCESS)] = { 0x8 }, 696 [C(RESULT_MISS)] = { 0x9 }, 697 }, 698 [C(OP_WRITE)] = { 699 [C(RESULT_ACCESS)] = { 0x8 }, 700 [C(RESULT_MISS)] = { 0x9 }, 701 }, 702 [C(OP_PREFETCH)] = { 703 [C(RESULT_ACCESS)] = { 0xaa }, 704 [C(RESULT_MISS)] = { 0xa9 }, 705 }, 706 }, 707 [C(L1I)] = { 708 [C(OP_READ)] = { 709 [C(RESULT_ACCESS)] = { 0x6 }, 710 [C(RESULT_MISS)] = { 0x7 }, 711 }, 712 }, 713 [C(LL)] = { 714 [C(OP_READ)] = { 715 [C(RESULT_ACCESS)] = { 0xc }, 716 [C(RESULT_MISS)] = { 0xd }, 717 }, 718 [C(OP_WRITE)] = { 719 [C(RESULT_ACCESS)] = { 0xc }, 720 [C(RESULT_MISS)] = { 0xd }, 721 }, 722 }, 723 [C(ITLB)] = { 724 [C(OP_READ)] = { 725 [C(RESULT_MISS)] = { 0x3b }, 726 }, 727 }, 728 [C(DTLB)] = { 729 [C(OP_READ)] = { 730 [C(RESULT_ACCESS)] = { 0x4 }, 731 [C(RESULT_MISS)] = { 0x3c }, 732 }, 733 [C(OP_WRITE)] = { 734 [C(RESULT_ACCESS)] = { 0x4 }, 735 [C(RESULT_MISS)] = { 0x3c }, 736 }, 737 }, 738 [C(BPU)] = { 739 /* Using the same code for *HW_BRANCH* */ 740 [C(OP_READ)] = { 741 [C(RESULT_ACCESS)] = { 0x02 }, 742 [C(RESULT_MISS)] = { 0x03 }, 743 }, 744 }, 745 }; 746 747 static int __hw_perf_event_init(struct perf_event *event) 748 { 749 int err; 750 struct hw_perf_event *hwc = &event->hw; 751 struct perf_event_attr *attr = &event->attr; 752 const struct loongarch_perf_event *pev; 753 754 /* Returning LoongArch event descriptor for generic perf event. */ 755 if (PERF_TYPE_HARDWARE == event->attr.type) { 756 if (event->attr.config >= PERF_COUNT_HW_MAX) 757 return -EINVAL; 758 pev = loongarch_pmu_map_general_event(event->attr.config); 759 } else if (PERF_TYPE_HW_CACHE == event->attr.type) { 760 pev = loongarch_pmu_map_cache_event(event->attr.config); 761 } else if (PERF_TYPE_RAW == event->attr.type) { 762 /* We are working on the global raw event. */ 763 mutex_lock(&raw_event_mutex); 764 pev = loongarch_pmu.map_raw_event(event->attr.config); 765 } else { 766 /* The event type is not (yet) supported. */ 767 return -EOPNOTSUPP; 768 } 769 770 if (IS_ERR(pev)) { 771 if (PERF_TYPE_RAW == event->attr.type) 772 mutex_unlock(&raw_event_mutex); 773 return PTR_ERR(pev); 774 } 775 776 /* 777 * We allow max flexibility on how each individual counter shared 778 * by the single CPU operates (the mode exclusion and the range). 779 */ 780 hwc->config_base = CSR_PERFCTRL_IE; 781 782 hwc->event_base = loongarch_pmu_perf_event_encode(pev); 783 if (PERF_TYPE_RAW == event->attr.type) 784 mutex_unlock(&raw_event_mutex); 785 786 if (!attr->exclude_user) { 787 hwc->config_base |= CSR_PERFCTRL_PLV3; 788 hwc->config_base |= CSR_PERFCTRL_PLV2; 789 } 790 if (!attr->exclude_kernel) { 791 hwc->config_base |= CSR_PERFCTRL_PLV0; 792 } 793 if (!attr->exclude_hv) { 794 hwc->config_base |= CSR_PERFCTRL_PLV1; 795 } 796 797 hwc->config_base &= M_PERFCTL_CONFIG_MASK; 798 /* 799 * The event can belong to another cpu. We do not assign a local 800 * counter for it for now. 801 */ 802 hwc->idx = -1; 803 hwc->config = 0; 804 805 if (!hwc->sample_period) { 806 hwc->sample_period = loongarch_pmu.max_period; 807 hwc->last_period = hwc->sample_period; 808 local64_set(&hwc->period_left, hwc->sample_period); 809 } 810 811 err = 0; 812 if (event->group_leader != event) 813 err = validate_group(event); 814 815 event->destroy = hw_perf_event_destroy; 816 817 if (err) 818 event->destroy(event); 819 820 return err; 821 } 822 823 static void pause_local_counters(void) 824 { 825 unsigned long flags; 826 int ctr = loongarch_pmu.num_counters; 827 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 828 829 local_irq_save(flags); 830 do { 831 ctr--; 832 cpuc->saved_ctrl[ctr] = loongarch_pmu_read_control(ctr); 833 loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr] & 834 ~M_PERFCTL_COUNT_EVENT_WHENEVER); 835 } while (ctr > 0); 836 local_irq_restore(flags); 837 } 838 839 static void resume_local_counters(void) 840 { 841 int ctr = loongarch_pmu.num_counters; 842 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 843 844 do { 845 ctr--; 846 loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr]); 847 } while (ctr > 0); 848 } 849 850 static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config) 851 { 852 raw_event.event_id = config & 0xff; 853 854 return &raw_event; 855 } 856 857 static int __init init_hw_perf_events(void) 858 { 859 int counters; 860 861 if (!cpu_has_pmp) 862 return -ENODEV; 863 864 pr_info("Performance counters: "); 865 counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> 4) + 1; 866 867 loongarch_pmu.num_counters = counters; 868 loongarch_pmu.max_period = (1ULL << 63) - 1; 869 loongarch_pmu.valid_count = (1ULL << 63) - 1; 870 loongarch_pmu.overflow = 1ULL << 63; 871 loongarch_pmu.name = "loongarch/loongson64"; 872 loongarch_pmu.read_counter = loongarch_pmu_read_counter; 873 loongarch_pmu.write_counter = loongarch_pmu_write_counter; 874 loongarch_pmu.map_raw_event = loongarch_pmu_map_raw_event; 875 loongarch_pmu.general_event_map = &loongson_event_map; 876 loongarch_pmu.cache_event_map = &loongson_cache_map; 877 878 on_each_cpu(reset_counters, NULL, 1); 879 880 pr_cont("%s PMU enabled, %d %d-bit counters available to each CPU.\n", 881 loongarch_pmu.name, counters, 64); 882 883 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); 884 885 return 0; 886 } 887 early_initcall(init_hw_perf_events); 888