1 /* 2 * Performance event support - powerpc architecture code 3 * 4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 #include <linux/kernel.h> 12 #include <linux/sched.h> 13 #include <linux/perf_event.h> 14 #include <linux/percpu.h> 15 #include <linux/hardirq.h> 16 #include <asm/reg.h> 17 #include <asm/pmc.h> 18 #include <asm/machdep.h> 19 #include <asm/firmware.h> 20 #include <asm/ptrace.h> 21 22 struct cpu_hw_events { 23 int n_events; 24 int n_percpu; 25 int disabled; 26 int n_added; 27 int n_limited; 28 u8 pmcs_enabled; 29 struct perf_event *event[MAX_HWEVENTS]; 30 u64 events[MAX_HWEVENTS]; 31 unsigned int flags[MAX_HWEVENTS]; 32 unsigned long mmcr[3]; 33 struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS]; 34 u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; 35 u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; 36 unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; 37 unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; 38 39 unsigned int group_flag; 40 int n_txn_start; 41 }; 42 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); 43 44 struct power_pmu *ppmu; 45 46 /* 47 * Normally, to ignore kernel events we set the FCS (freeze counters 48 * in supervisor mode) bit in MMCR0, but if the kernel runs with the 49 * hypervisor bit set in the MSR, or if we are running on a processor 50 * where the hypervisor bit is forced to 1 (as on Apple G5 processors), 51 * then we need to use the FCHV bit to ignore kernel events. 52 */ 53 static unsigned int freeze_events_kernel = MMCR0_FCS; 54 55 /* 56 * 32-bit doesn't have MMCRA but does have an MMCR2, 57 * and a few other names are different. 58 */ 59 #ifdef CONFIG_PPC32 60 61 #define MMCR0_FCHV 0 62 #define MMCR0_PMCjCE MMCR0_PMCnCE 63 64 #define SPRN_MMCRA SPRN_MMCR2 65 #define MMCRA_SAMPLE_ENABLE 0 66 67 static inline unsigned long perf_ip_adjust(struct pt_regs *regs) 68 { 69 return 0; 70 } 71 static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { } 72 static inline u32 perf_get_misc_flags(struct pt_regs *regs) 73 { 74 return 0; 75 } 76 static inline void perf_read_regs(struct pt_regs *regs) 77 { 78 regs->result = 0; 79 } 80 static inline int perf_intr_is_nmi(struct pt_regs *regs) 81 { 82 return 0; 83 } 84 85 #endif /* CONFIG_PPC32 */ 86 87 /* 88 * Things that are specific to 64-bit implementations. 89 */ 90 #ifdef CONFIG_PPC64 91 92 static inline unsigned long perf_ip_adjust(struct pt_regs *regs) 93 { 94 unsigned long mmcra = regs->dsisr; 95 96 if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) { 97 unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT; 98 if (slot > 1) 99 return 4 * (slot - 1); 100 } 101 return 0; 102 } 103 104 /* 105 * The user wants a data address recorded. 106 * If we're not doing instruction sampling, give them the SDAR 107 * (sampled data address). If we are doing instruction sampling, then 108 * only give them the SDAR if it corresponds to the instruction 109 * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC 110 * bit in MMCRA. 111 */ 112 static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) 113 { 114 unsigned long mmcra = regs->dsisr; 115 unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? 116 POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; 117 118 if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) 119 *addrp = mfspr(SPRN_SDAR); 120 } 121 122 static bool mmcra_sihv(unsigned long mmcra) 123 { 124 unsigned long sihv = MMCRA_SIHV; 125 126 if (ppmu->flags & PPMU_ALT_SIPR) 127 sihv = POWER6_MMCRA_SIHV; 128 129 return !!(mmcra & sihv); 130 } 131 132 static bool mmcra_sipr(unsigned long mmcra) 133 { 134 unsigned long sipr = MMCRA_SIPR; 135 136 if (ppmu->flags & PPMU_ALT_SIPR) 137 sipr = POWER6_MMCRA_SIPR; 138 139 return !!(mmcra & sipr); 140 } 141 142 static inline u32 perf_flags_from_msr(struct pt_regs *regs) 143 { 144 if (regs->msr & MSR_PR) 145 return PERF_RECORD_MISC_USER; 146 if ((regs->msr & MSR_HV) && freeze_events_kernel != MMCR0_FCHV) 147 return PERF_RECORD_MISC_HYPERVISOR; 148 return PERF_RECORD_MISC_KERNEL; 149 } 150 151 static inline u32 perf_get_misc_flags(struct pt_regs *regs) 152 { 153 unsigned long mmcra = regs->dsisr; 154 unsigned long use_siar = regs->result; 155 156 if (!use_siar) 157 return perf_flags_from_msr(regs); 158 159 /* 160 * If we don't have flags in MMCRA, rather than using 161 * the MSR, we intuit the flags from the address in 162 * SIAR which should give slightly more reliable 163 * results 164 */ 165 if (ppmu->flags & PPMU_NO_SIPR) { 166 unsigned long siar = mfspr(SPRN_SIAR); 167 if (siar >= PAGE_OFFSET) 168 return PERF_RECORD_MISC_KERNEL; 169 return PERF_RECORD_MISC_USER; 170 } 171 172 /* PR has priority over HV, so order below is important */ 173 if (mmcra_sipr(mmcra)) 174 return PERF_RECORD_MISC_USER; 175 if (mmcra_sihv(mmcra) && (freeze_events_kernel != MMCR0_FCHV)) 176 return PERF_RECORD_MISC_HYPERVISOR; 177 return PERF_RECORD_MISC_KERNEL; 178 } 179 180 /* 181 * Overload regs->dsisr to store MMCRA so we only need to read it once 182 * on each interrupt. 183 * Overload regs->result to specify whether we should use the MSR (result 184 * is zero) or the SIAR (result is non zero). 185 */ 186 static inline void perf_read_regs(struct pt_regs *regs) 187 { 188 unsigned long mmcra = mfspr(SPRN_MMCRA); 189 int marked = mmcra & MMCRA_SAMPLE_ENABLE; 190 int use_siar; 191 192 /* 193 * If this isn't a PMU exception (eg a software event) the SIAR is 194 * not valid. Use pt_regs. 195 * 196 * If it is a marked event use the SIAR. 197 * 198 * If the PMU doesn't update the SIAR for non marked events use 199 * pt_regs. 200 * 201 * If the PMU has HV/PR flags then check to see if they 202 * place the exception in userspace. If so, use pt_regs. In 203 * continuous sampling mode the SIAR and the PMU exception are 204 * not synchronised, so they may be many instructions apart. 205 * This can result in confusing backtraces. We still want 206 * hypervisor samples as well as samples in the kernel with 207 * interrupts off hence the userspace check. 208 */ 209 if (TRAP(regs) != 0xf00) 210 use_siar = 0; 211 else if (marked) 212 use_siar = 1; 213 else if ((ppmu->flags & PPMU_NO_CONT_SAMPLING)) 214 use_siar = 0; 215 else if (!(ppmu->flags & PPMU_NO_SIPR) && mmcra_sipr(mmcra)) 216 use_siar = 0; 217 else 218 use_siar = 1; 219 220 regs->dsisr = mmcra; 221 regs->result = use_siar; 222 } 223 224 /* 225 * If interrupts were soft-disabled when a PMU interrupt occurs, treat 226 * it as an NMI. 227 */ 228 static inline int perf_intr_is_nmi(struct pt_regs *regs) 229 { 230 return !regs->softe; 231 } 232 233 #endif /* CONFIG_PPC64 */ 234 235 static void perf_event_interrupt(struct pt_regs *regs); 236 237 void perf_event_print_debug(void) 238 { 239 } 240 241 /* 242 * Read one performance monitor counter (PMC). 243 */ 244 static unsigned long read_pmc(int idx) 245 { 246 unsigned long val; 247 248 switch (idx) { 249 case 1: 250 val = mfspr(SPRN_PMC1); 251 break; 252 case 2: 253 val = mfspr(SPRN_PMC2); 254 break; 255 case 3: 256 val = mfspr(SPRN_PMC3); 257 break; 258 case 4: 259 val = mfspr(SPRN_PMC4); 260 break; 261 case 5: 262 val = mfspr(SPRN_PMC5); 263 break; 264 case 6: 265 val = mfspr(SPRN_PMC6); 266 break; 267 #ifdef CONFIG_PPC64 268 case 7: 269 val = mfspr(SPRN_PMC7); 270 break; 271 case 8: 272 val = mfspr(SPRN_PMC8); 273 break; 274 #endif /* CONFIG_PPC64 */ 275 default: 276 printk(KERN_ERR "oops trying to read PMC%d\n", idx); 277 val = 0; 278 } 279 return val; 280 } 281 282 /* 283 * Write one PMC. 284 */ 285 static void write_pmc(int idx, unsigned long val) 286 { 287 switch (idx) { 288 case 1: 289 mtspr(SPRN_PMC1, val); 290 break; 291 case 2: 292 mtspr(SPRN_PMC2, val); 293 break; 294 case 3: 295 mtspr(SPRN_PMC3, val); 296 break; 297 case 4: 298 mtspr(SPRN_PMC4, val); 299 break; 300 case 5: 301 mtspr(SPRN_PMC5, val); 302 break; 303 case 6: 304 mtspr(SPRN_PMC6, val); 305 break; 306 #ifdef CONFIG_PPC64 307 case 7: 308 mtspr(SPRN_PMC7, val); 309 break; 310 case 8: 311 mtspr(SPRN_PMC8, val); 312 break; 313 #endif /* CONFIG_PPC64 */ 314 default: 315 printk(KERN_ERR "oops trying to write PMC%d\n", idx); 316 } 317 } 318 319 /* 320 * Check if a set of events can all go on the PMU at once. 321 * If they can't, this will look at alternative codes for the events 322 * and see if any combination of alternative codes is feasible. 323 * The feasible set is returned in event_id[]. 324 */ 325 static int power_check_constraints(struct cpu_hw_events *cpuhw, 326 u64 event_id[], unsigned int cflags[], 327 int n_ev) 328 { 329 unsigned long mask, value, nv; 330 unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS]; 331 int n_alt[MAX_HWEVENTS], choice[MAX_HWEVENTS]; 332 int i, j; 333 unsigned long addf = ppmu->add_fields; 334 unsigned long tadd = ppmu->test_adder; 335 336 if (n_ev > ppmu->n_counter) 337 return -1; 338 339 /* First see if the events will go on as-is */ 340 for (i = 0; i < n_ev; ++i) { 341 if ((cflags[i] & PPMU_LIMITED_PMC_REQD) 342 && !ppmu->limited_pmc_event(event_id[i])) { 343 ppmu->get_alternatives(event_id[i], cflags[i], 344 cpuhw->alternatives[i]); 345 event_id[i] = cpuhw->alternatives[i][0]; 346 } 347 if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0], 348 &cpuhw->avalues[i][0])) 349 return -1; 350 } 351 value = mask = 0; 352 for (i = 0; i < n_ev; ++i) { 353 nv = (value | cpuhw->avalues[i][0]) + 354 (value & cpuhw->avalues[i][0] & addf); 355 if ((((nv + tadd) ^ value) & mask) != 0 || 356 (((nv + tadd) ^ cpuhw->avalues[i][0]) & 357 cpuhw->amasks[i][0]) != 0) 358 break; 359 value = nv; 360 mask |= cpuhw->amasks[i][0]; 361 } 362 if (i == n_ev) 363 return 0; /* all OK */ 364 365 /* doesn't work, gather alternatives... */ 366 if (!ppmu->get_alternatives) 367 return -1; 368 for (i = 0; i < n_ev; ++i) { 369 choice[i] = 0; 370 n_alt[i] = ppmu->get_alternatives(event_id[i], cflags[i], 371 cpuhw->alternatives[i]); 372 for (j = 1; j < n_alt[i]; ++j) 373 ppmu->get_constraint(cpuhw->alternatives[i][j], 374 &cpuhw->amasks[i][j], 375 &cpuhw->avalues[i][j]); 376 } 377 378 /* enumerate all possibilities and see if any will work */ 379 i = 0; 380 j = -1; 381 value = mask = nv = 0; 382 while (i < n_ev) { 383 if (j >= 0) { 384 /* we're backtracking, restore context */ 385 value = svalues[i]; 386 mask = smasks[i]; 387 j = choice[i]; 388 } 389 /* 390 * See if any alternative k for event_id i, 391 * where k > j, will satisfy the constraints. 392 */ 393 while (++j < n_alt[i]) { 394 nv = (value | cpuhw->avalues[i][j]) + 395 (value & cpuhw->avalues[i][j] & addf); 396 if ((((nv + tadd) ^ value) & mask) == 0 && 397 (((nv + tadd) ^ cpuhw->avalues[i][j]) 398 & cpuhw->amasks[i][j]) == 0) 399 break; 400 } 401 if (j >= n_alt[i]) { 402 /* 403 * No feasible alternative, backtrack 404 * to event_id i-1 and continue enumerating its 405 * alternatives from where we got up to. 406 */ 407 if (--i < 0) 408 return -1; 409 } else { 410 /* 411 * Found a feasible alternative for event_id i, 412 * remember where we got up to with this event_id, 413 * go on to the next event_id, and start with 414 * the first alternative for it. 415 */ 416 choice[i] = j; 417 svalues[i] = value; 418 smasks[i] = mask; 419 value = nv; 420 mask |= cpuhw->amasks[i][j]; 421 ++i; 422 j = -1; 423 } 424 } 425 426 /* OK, we have a feasible combination, tell the caller the solution */ 427 for (i = 0; i < n_ev; ++i) 428 event_id[i] = cpuhw->alternatives[i][choice[i]]; 429 return 0; 430 } 431 432 /* 433 * Check if newly-added events have consistent settings for 434 * exclude_{user,kernel,hv} with each other and any previously 435 * added events. 436 */ 437 static int check_excludes(struct perf_event **ctrs, unsigned int cflags[], 438 int n_prev, int n_new) 439 { 440 int eu = 0, ek = 0, eh = 0; 441 int i, n, first; 442 struct perf_event *event; 443 444 n = n_prev + n_new; 445 if (n <= 1) 446 return 0; 447 448 first = 1; 449 for (i = 0; i < n; ++i) { 450 if (cflags[i] & PPMU_LIMITED_PMC_OK) { 451 cflags[i] &= ~PPMU_LIMITED_PMC_REQD; 452 continue; 453 } 454 event = ctrs[i]; 455 if (first) { 456 eu = event->attr.exclude_user; 457 ek = event->attr.exclude_kernel; 458 eh = event->attr.exclude_hv; 459 first = 0; 460 } else if (event->attr.exclude_user != eu || 461 event->attr.exclude_kernel != ek || 462 event->attr.exclude_hv != eh) { 463 return -EAGAIN; 464 } 465 } 466 467 if (eu || ek || eh) 468 for (i = 0; i < n; ++i) 469 if (cflags[i] & PPMU_LIMITED_PMC_OK) 470 cflags[i] |= PPMU_LIMITED_PMC_REQD; 471 472 return 0; 473 } 474 475 static u64 check_and_compute_delta(u64 prev, u64 val) 476 { 477 u64 delta = (val - prev) & 0xfffffffful; 478 479 /* 480 * POWER7 can roll back counter values, if the new value is smaller 481 * than the previous value it will cause the delta and the counter to 482 * have bogus values unless we rolled a counter over. If a coutner is 483 * rolled back, it will be smaller, but within 256, which is the maximum 484 * number of events to rollback at once. If we dectect a rollback 485 * return 0. This can lead to a small lack of precision in the 486 * counters. 487 */ 488 if (prev > val && (prev - val) < 256) 489 delta = 0; 490 491 return delta; 492 } 493 494 static void power_pmu_read(struct perf_event *event) 495 { 496 s64 val, delta, prev; 497 498 if (event->hw.state & PERF_HES_STOPPED) 499 return; 500 501 if (!event->hw.idx) 502 return; 503 /* 504 * Performance monitor interrupts come even when interrupts 505 * are soft-disabled, as long as interrupts are hard-enabled. 506 * Therefore we treat them like NMIs. 507 */ 508 do { 509 prev = local64_read(&event->hw.prev_count); 510 barrier(); 511 val = read_pmc(event->hw.idx); 512 delta = check_and_compute_delta(prev, val); 513 if (!delta) 514 return; 515 } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); 516 517 local64_add(delta, &event->count); 518 local64_sub(delta, &event->hw.period_left); 519 } 520 521 /* 522 * On some machines, PMC5 and PMC6 can't be written, don't respect 523 * the freeze conditions, and don't generate interrupts. This tells 524 * us if `event' is using such a PMC. 525 */ 526 static int is_limited_pmc(int pmcnum) 527 { 528 return (ppmu->flags & PPMU_LIMITED_PMC5_6) 529 && (pmcnum == 5 || pmcnum == 6); 530 } 531 532 static void freeze_limited_counters(struct cpu_hw_events *cpuhw, 533 unsigned long pmc5, unsigned long pmc6) 534 { 535 struct perf_event *event; 536 u64 val, prev, delta; 537 int i; 538 539 for (i = 0; i < cpuhw->n_limited; ++i) { 540 event = cpuhw->limited_counter[i]; 541 if (!event->hw.idx) 542 continue; 543 val = (event->hw.idx == 5) ? pmc5 : pmc6; 544 prev = local64_read(&event->hw.prev_count); 545 event->hw.idx = 0; 546 delta = check_and_compute_delta(prev, val); 547 if (delta) 548 local64_add(delta, &event->count); 549 } 550 } 551 552 static void thaw_limited_counters(struct cpu_hw_events *cpuhw, 553 unsigned long pmc5, unsigned long pmc6) 554 { 555 struct perf_event *event; 556 u64 val, prev; 557 int i; 558 559 for (i = 0; i < cpuhw->n_limited; ++i) { 560 event = cpuhw->limited_counter[i]; 561 event->hw.idx = cpuhw->limited_hwidx[i]; 562 val = (event->hw.idx == 5) ? pmc5 : pmc6; 563 prev = local64_read(&event->hw.prev_count); 564 if (check_and_compute_delta(prev, val)) 565 local64_set(&event->hw.prev_count, val); 566 perf_event_update_userpage(event); 567 } 568 } 569 570 /* 571 * Since limited events don't respect the freeze conditions, we 572 * have to read them immediately after freezing or unfreezing the 573 * other events. We try to keep the values from the limited 574 * events as consistent as possible by keeping the delay (in 575 * cycles and instructions) between freezing/unfreezing and reading 576 * the limited events as small and consistent as possible. 577 * Therefore, if any limited events are in use, we read them 578 * both, and always in the same order, to minimize variability, 579 * and do it inside the same asm that writes MMCR0. 580 */ 581 static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) 582 { 583 unsigned long pmc5, pmc6; 584 585 if (!cpuhw->n_limited) { 586 mtspr(SPRN_MMCR0, mmcr0); 587 return; 588 } 589 590 /* 591 * Write MMCR0, then read PMC5 and PMC6 immediately. 592 * To ensure we don't get a performance monitor interrupt 593 * between writing MMCR0 and freezing/thawing the limited 594 * events, we first write MMCR0 with the event overflow 595 * interrupt enable bits turned off. 596 */ 597 asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5" 598 : "=&r" (pmc5), "=&r" (pmc6) 599 : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)), 600 "i" (SPRN_MMCR0), 601 "i" (SPRN_PMC5), "i" (SPRN_PMC6)); 602 603 if (mmcr0 & MMCR0_FC) 604 freeze_limited_counters(cpuhw, pmc5, pmc6); 605 else 606 thaw_limited_counters(cpuhw, pmc5, pmc6); 607 608 /* 609 * Write the full MMCR0 including the event overflow interrupt 610 * enable bits, if necessary. 611 */ 612 if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE)) 613 mtspr(SPRN_MMCR0, mmcr0); 614 } 615 616 /* 617 * Disable all events to prevent PMU interrupts and to allow 618 * events to be added or removed. 619 */ 620 static void power_pmu_disable(struct pmu *pmu) 621 { 622 struct cpu_hw_events *cpuhw; 623 unsigned long flags; 624 625 if (!ppmu) 626 return; 627 local_irq_save(flags); 628 cpuhw = &__get_cpu_var(cpu_hw_events); 629 630 if (!cpuhw->disabled) { 631 cpuhw->disabled = 1; 632 cpuhw->n_added = 0; 633 634 /* 635 * Check if we ever enabled the PMU on this cpu. 636 */ 637 if (!cpuhw->pmcs_enabled) { 638 ppc_enable_pmcs(); 639 cpuhw->pmcs_enabled = 1; 640 } 641 642 /* 643 * Disable instruction sampling if it was enabled 644 */ 645 if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { 646 mtspr(SPRN_MMCRA, 647 cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); 648 mb(); 649 } 650 651 /* 652 * Set the 'freeze counters' bit. 653 * The barrier is to make sure the mtspr has been 654 * executed and the PMU has frozen the events 655 * before we return. 656 */ 657 write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC); 658 mb(); 659 } 660 local_irq_restore(flags); 661 } 662 663 /* 664 * Re-enable all events if disable == 0. 665 * If we were previously disabled and events were added, then 666 * put the new config on the PMU. 667 */ 668 static void power_pmu_enable(struct pmu *pmu) 669 { 670 struct perf_event *event; 671 struct cpu_hw_events *cpuhw; 672 unsigned long flags; 673 long i; 674 unsigned long val; 675 s64 left; 676 unsigned int hwc_index[MAX_HWEVENTS]; 677 int n_lim; 678 int idx; 679 680 if (!ppmu) 681 return; 682 local_irq_save(flags); 683 cpuhw = &__get_cpu_var(cpu_hw_events); 684 if (!cpuhw->disabled) { 685 local_irq_restore(flags); 686 return; 687 } 688 cpuhw->disabled = 0; 689 690 /* 691 * If we didn't change anything, or only removed events, 692 * no need to recalculate MMCR* settings and reset the PMCs. 693 * Just reenable the PMU with the current MMCR* settings 694 * (possibly updated for removal of events). 695 */ 696 if (!cpuhw->n_added) { 697 mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); 698 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); 699 if (cpuhw->n_events == 0) 700 ppc_set_pmu_inuse(0); 701 goto out_enable; 702 } 703 704 /* 705 * Compute MMCR* values for the new set of events 706 */ 707 if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index, 708 cpuhw->mmcr)) { 709 /* shouldn't ever get here */ 710 printk(KERN_ERR "oops compute_mmcr failed\n"); 711 goto out; 712 } 713 714 /* 715 * Add in MMCR0 freeze bits corresponding to the 716 * attr.exclude_* bits for the first event. 717 * We have already checked that all events have the 718 * same values for these bits as the first event. 719 */ 720 event = cpuhw->event[0]; 721 if (event->attr.exclude_user) 722 cpuhw->mmcr[0] |= MMCR0_FCP; 723 if (event->attr.exclude_kernel) 724 cpuhw->mmcr[0] |= freeze_events_kernel; 725 if (event->attr.exclude_hv) 726 cpuhw->mmcr[0] |= MMCR0_FCHV; 727 728 /* 729 * Write the new configuration to MMCR* with the freeze 730 * bit set and set the hardware events to their initial values. 731 * Then unfreeze the events. 732 */ 733 ppc_set_pmu_inuse(1); 734 mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); 735 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); 736 mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) 737 | MMCR0_FC); 738 739 /* 740 * Read off any pre-existing events that need to move 741 * to another PMC. 742 */ 743 for (i = 0; i < cpuhw->n_events; ++i) { 744 event = cpuhw->event[i]; 745 if (event->hw.idx && event->hw.idx != hwc_index[i] + 1) { 746 power_pmu_read(event); 747 write_pmc(event->hw.idx, 0); 748 event->hw.idx = 0; 749 } 750 } 751 752 /* 753 * Initialize the PMCs for all the new and moved events. 754 */ 755 cpuhw->n_limited = n_lim = 0; 756 for (i = 0; i < cpuhw->n_events; ++i) { 757 event = cpuhw->event[i]; 758 if (event->hw.idx) 759 continue; 760 idx = hwc_index[i] + 1; 761 if (is_limited_pmc(idx)) { 762 cpuhw->limited_counter[n_lim] = event; 763 cpuhw->limited_hwidx[n_lim] = idx; 764 ++n_lim; 765 continue; 766 } 767 val = 0; 768 if (event->hw.sample_period) { 769 left = local64_read(&event->hw.period_left); 770 if (left < 0x80000000L) 771 val = 0x80000000L - left; 772 } 773 local64_set(&event->hw.prev_count, val); 774 event->hw.idx = idx; 775 if (event->hw.state & PERF_HES_STOPPED) 776 val = 0; 777 write_pmc(idx, val); 778 perf_event_update_userpage(event); 779 } 780 cpuhw->n_limited = n_lim; 781 cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; 782 783 out_enable: 784 mb(); 785 write_mmcr0(cpuhw, cpuhw->mmcr[0]); 786 787 /* 788 * Enable instruction sampling if necessary 789 */ 790 if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { 791 mb(); 792 mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); 793 } 794 795 out: 796 local_irq_restore(flags); 797 } 798 799 static int collect_events(struct perf_event *group, int max_count, 800 struct perf_event *ctrs[], u64 *events, 801 unsigned int *flags) 802 { 803 int n = 0; 804 struct perf_event *event; 805 806 if (!is_software_event(group)) { 807 if (n >= max_count) 808 return -1; 809 ctrs[n] = group; 810 flags[n] = group->hw.event_base; 811 events[n++] = group->hw.config; 812 } 813 list_for_each_entry(event, &group->sibling_list, group_entry) { 814 if (!is_software_event(event) && 815 event->state != PERF_EVENT_STATE_OFF) { 816 if (n >= max_count) 817 return -1; 818 ctrs[n] = event; 819 flags[n] = event->hw.event_base; 820 events[n++] = event->hw.config; 821 } 822 } 823 return n; 824 } 825 826 /* 827 * Add a event to the PMU. 828 * If all events are not already frozen, then we disable and 829 * re-enable the PMU in order to get hw_perf_enable to do the 830 * actual work of reconfiguring the PMU. 831 */ 832 static int power_pmu_add(struct perf_event *event, int ef_flags) 833 { 834 struct cpu_hw_events *cpuhw; 835 unsigned long flags; 836 int n0; 837 int ret = -EAGAIN; 838 839 local_irq_save(flags); 840 perf_pmu_disable(event->pmu); 841 842 /* 843 * Add the event to the list (if there is room) 844 * and check whether the total set is still feasible. 845 */ 846 cpuhw = &__get_cpu_var(cpu_hw_events); 847 n0 = cpuhw->n_events; 848 if (n0 >= ppmu->n_counter) 849 goto out; 850 cpuhw->event[n0] = event; 851 cpuhw->events[n0] = event->hw.config; 852 cpuhw->flags[n0] = event->hw.event_base; 853 854 if (!(ef_flags & PERF_EF_START)) 855 event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; 856 857 /* 858 * If group events scheduling transaction was started, 859 * skip the schedulability test here, it will be performed 860 * at commit time(->commit_txn) as a whole 861 */ 862 if (cpuhw->group_flag & PERF_EVENT_TXN) 863 goto nocheck; 864 865 if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) 866 goto out; 867 if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) 868 goto out; 869 event->hw.config = cpuhw->events[n0]; 870 871 nocheck: 872 ++cpuhw->n_events; 873 ++cpuhw->n_added; 874 875 ret = 0; 876 out: 877 perf_pmu_enable(event->pmu); 878 local_irq_restore(flags); 879 return ret; 880 } 881 882 /* 883 * Remove a event from the PMU. 884 */ 885 static void power_pmu_del(struct perf_event *event, int ef_flags) 886 { 887 struct cpu_hw_events *cpuhw; 888 long i; 889 unsigned long flags; 890 891 local_irq_save(flags); 892 perf_pmu_disable(event->pmu); 893 894 power_pmu_read(event); 895 896 cpuhw = &__get_cpu_var(cpu_hw_events); 897 for (i = 0; i < cpuhw->n_events; ++i) { 898 if (event == cpuhw->event[i]) { 899 while (++i < cpuhw->n_events) { 900 cpuhw->event[i-1] = cpuhw->event[i]; 901 cpuhw->events[i-1] = cpuhw->events[i]; 902 cpuhw->flags[i-1] = cpuhw->flags[i]; 903 } 904 --cpuhw->n_events; 905 ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr); 906 if (event->hw.idx) { 907 write_pmc(event->hw.idx, 0); 908 event->hw.idx = 0; 909 } 910 perf_event_update_userpage(event); 911 break; 912 } 913 } 914 for (i = 0; i < cpuhw->n_limited; ++i) 915 if (event == cpuhw->limited_counter[i]) 916 break; 917 if (i < cpuhw->n_limited) { 918 while (++i < cpuhw->n_limited) { 919 cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i]; 920 cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i]; 921 } 922 --cpuhw->n_limited; 923 } 924 if (cpuhw->n_events == 0) { 925 /* disable exceptions if no events are running */ 926 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); 927 } 928 929 perf_pmu_enable(event->pmu); 930 local_irq_restore(flags); 931 } 932 933 /* 934 * POWER-PMU does not support disabling individual counters, hence 935 * program their cycle counter to their max value and ignore the interrupts. 936 */ 937 938 static void power_pmu_start(struct perf_event *event, int ef_flags) 939 { 940 unsigned long flags; 941 s64 left; 942 unsigned long val; 943 944 if (!event->hw.idx || !event->hw.sample_period) 945 return; 946 947 if (!(event->hw.state & PERF_HES_STOPPED)) 948 return; 949 950 if (ef_flags & PERF_EF_RELOAD) 951 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); 952 953 local_irq_save(flags); 954 perf_pmu_disable(event->pmu); 955 956 event->hw.state = 0; 957 left = local64_read(&event->hw.period_left); 958 959 val = 0; 960 if (left < 0x80000000L) 961 val = 0x80000000L - left; 962 963 write_pmc(event->hw.idx, val); 964 965 perf_event_update_userpage(event); 966 perf_pmu_enable(event->pmu); 967 local_irq_restore(flags); 968 } 969 970 static void power_pmu_stop(struct perf_event *event, int ef_flags) 971 { 972 unsigned long flags; 973 974 if (!event->hw.idx || !event->hw.sample_period) 975 return; 976 977 if (event->hw.state & PERF_HES_STOPPED) 978 return; 979 980 local_irq_save(flags); 981 perf_pmu_disable(event->pmu); 982 983 power_pmu_read(event); 984 event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; 985 write_pmc(event->hw.idx, 0); 986 987 perf_event_update_userpage(event); 988 perf_pmu_enable(event->pmu); 989 local_irq_restore(flags); 990 } 991 992 /* 993 * Start group events scheduling transaction 994 * Set the flag to make pmu::enable() not perform the 995 * schedulability test, it will be performed at commit time 996 */ 997 void power_pmu_start_txn(struct pmu *pmu) 998 { 999 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1000 1001 perf_pmu_disable(pmu); 1002 cpuhw->group_flag |= PERF_EVENT_TXN; 1003 cpuhw->n_txn_start = cpuhw->n_events; 1004 } 1005 1006 /* 1007 * Stop group events scheduling transaction 1008 * Clear the flag and pmu::enable() will perform the 1009 * schedulability test. 1010 */ 1011 void power_pmu_cancel_txn(struct pmu *pmu) 1012 { 1013 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1014 1015 cpuhw->group_flag &= ~PERF_EVENT_TXN; 1016 perf_pmu_enable(pmu); 1017 } 1018 1019 /* 1020 * Commit group events scheduling transaction 1021 * Perform the group schedulability test as a whole 1022 * Return 0 if success 1023 */ 1024 int power_pmu_commit_txn(struct pmu *pmu) 1025 { 1026 struct cpu_hw_events *cpuhw; 1027 long i, n; 1028 1029 if (!ppmu) 1030 return -EAGAIN; 1031 cpuhw = &__get_cpu_var(cpu_hw_events); 1032 n = cpuhw->n_events; 1033 if (check_excludes(cpuhw->event, cpuhw->flags, 0, n)) 1034 return -EAGAIN; 1035 i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n); 1036 if (i < 0) 1037 return -EAGAIN; 1038 1039 for (i = cpuhw->n_txn_start; i < n; ++i) 1040 cpuhw->event[i]->hw.config = cpuhw->events[i]; 1041 1042 cpuhw->group_flag &= ~PERF_EVENT_TXN; 1043 perf_pmu_enable(pmu); 1044 return 0; 1045 } 1046 1047 /* 1048 * Return 1 if we might be able to put event on a limited PMC, 1049 * or 0 if not. 1050 * A event can only go on a limited PMC if it counts something 1051 * that a limited PMC can count, doesn't require interrupts, and 1052 * doesn't exclude any processor mode. 1053 */ 1054 static int can_go_on_limited_pmc(struct perf_event *event, u64 ev, 1055 unsigned int flags) 1056 { 1057 int n; 1058 u64 alt[MAX_EVENT_ALTERNATIVES]; 1059 1060 if (event->attr.exclude_user 1061 || event->attr.exclude_kernel 1062 || event->attr.exclude_hv 1063 || event->attr.sample_period) 1064 return 0; 1065 1066 if (ppmu->limited_pmc_event(ev)) 1067 return 1; 1068 1069 /* 1070 * The requested event_id isn't on a limited PMC already; 1071 * see if any alternative code goes on a limited PMC. 1072 */ 1073 if (!ppmu->get_alternatives) 1074 return 0; 1075 1076 flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD; 1077 n = ppmu->get_alternatives(ev, flags, alt); 1078 1079 return n > 0; 1080 } 1081 1082 /* 1083 * Find an alternative event_id that goes on a normal PMC, if possible, 1084 * and return the event_id code, or 0 if there is no such alternative. 1085 * (Note: event_id code 0 is "don't count" on all machines.) 1086 */ 1087 static u64 normal_pmc_alternative(u64 ev, unsigned long flags) 1088 { 1089 u64 alt[MAX_EVENT_ALTERNATIVES]; 1090 int n; 1091 1092 flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD); 1093 n = ppmu->get_alternatives(ev, flags, alt); 1094 if (!n) 1095 return 0; 1096 return alt[0]; 1097 } 1098 1099 /* Number of perf_events counting hardware events */ 1100 static atomic_t num_events; 1101 /* Used to avoid races in calling reserve/release_pmc_hardware */ 1102 static DEFINE_MUTEX(pmc_reserve_mutex); 1103 1104 /* 1105 * Release the PMU if this is the last perf_event. 1106 */ 1107 static void hw_perf_event_destroy(struct perf_event *event) 1108 { 1109 if (!atomic_add_unless(&num_events, -1, 1)) { 1110 mutex_lock(&pmc_reserve_mutex); 1111 if (atomic_dec_return(&num_events) == 0) 1112 release_pmc_hardware(); 1113 mutex_unlock(&pmc_reserve_mutex); 1114 } 1115 } 1116 1117 /* 1118 * Translate a generic cache event_id config to a raw event_id code. 1119 */ 1120 static int hw_perf_cache_event(u64 config, u64 *eventp) 1121 { 1122 unsigned long type, op, result; 1123 int ev; 1124 1125 if (!ppmu->cache_events) 1126 return -EINVAL; 1127 1128 /* unpack config */ 1129 type = config & 0xff; 1130 op = (config >> 8) & 0xff; 1131 result = (config >> 16) & 0xff; 1132 1133 if (type >= PERF_COUNT_HW_CACHE_MAX || 1134 op >= PERF_COUNT_HW_CACHE_OP_MAX || 1135 result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 1136 return -EINVAL; 1137 1138 ev = (*ppmu->cache_events)[type][op][result]; 1139 if (ev == 0) 1140 return -EOPNOTSUPP; 1141 if (ev == -1) 1142 return -EINVAL; 1143 *eventp = ev; 1144 return 0; 1145 } 1146 1147 static int power_pmu_event_init(struct perf_event *event) 1148 { 1149 u64 ev; 1150 unsigned long flags; 1151 struct perf_event *ctrs[MAX_HWEVENTS]; 1152 u64 events[MAX_HWEVENTS]; 1153 unsigned int cflags[MAX_HWEVENTS]; 1154 int n; 1155 int err; 1156 struct cpu_hw_events *cpuhw; 1157 1158 if (!ppmu) 1159 return -ENOENT; 1160 1161 /* does not support taken branch sampling */ 1162 if (has_branch_stack(event)) 1163 return -EOPNOTSUPP; 1164 1165 switch (event->attr.type) { 1166 case PERF_TYPE_HARDWARE: 1167 ev = event->attr.config; 1168 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) 1169 return -EOPNOTSUPP; 1170 ev = ppmu->generic_events[ev]; 1171 break; 1172 case PERF_TYPE_HW_CACHE: 1173 err = hw_perf_cache_event(event->attr.config, &ev); 1174 if (err) 1175 return err; 1176 break; 1177 case PERF_TYPE_RAW: 1178 ev = event->attr.config; 1179 break; 1180 default: 1181 return -ENOENT; 1182 } 1183 1184 event->hw.config_base = ev; 1185 event->hw.idx = 0; 1186 1187 /* 1188 * If we are not running on a hypervisor, force the 1189 * exclude_hv bit to 0 so that we don't care what 1190 * the user set it to. 1191 */ 1192 if (!firmware_has_feature(FW_FEATURE_LPAR)) 1193 event->attr.exclude_hv = 0; 1194 1195 /* 1196 * If this is a per-task event, then we can use 1197 * PM_RUN_* events interchangeably with their non RUN_* 1198 * equivalents, e.g. PM_RUN_CYC instead of PM_CYC. 1199 * XXX we should check if the task is an idle task. 1200 */ 1201 flags = 0; 1202 if (event->attach_state & PERF_ATTACH_TASK) 1203 flags |= PPMU_ONLY_COUNT_RUN; 1204 1205 /* 1206 * If this machine has limited events, check whether this 1207 * event_id could go on a limited event. 1208 */ 1209 if (ppmu->flags & PPMU_LIMITED_PMC5_6) { 1210 if (can_go_on_limited_pmc(event, ev, flags)) { 1211 flags |= PPMU_LIMITED_PMC_OK; 1212 } else if (ppmu->limited_pmc_event(ev)) { 1213 /* 1214 * The requested event_id is on a limited PMC, 1215 * but we can't use a limited PMC; see if any 1216 * alternative goes on a normal PMC. 1217 */ 1218 ev = normal_pmc_alternative(ev, flags); 1219 if (!ev) 1220 return -EINVAL; 1221 } 1222 } 1223 1224 /* 1225 * If this is in a group, check if it can go on with all the 1226 * other hardware events in the group. We assume the event 1227 * hasn't been linked into its leader's sibling list at this point. 1228 */ 1229 n = 0; 1230 if (event->group_leader != event) { 1231 n = collect_events(event->group_leader, ppmu->n_counter - 1, 1232 ctrs, events, cflags); 1233 if (n < 0) 1234 return -EINVAL; 1235 } 1236 events[n] = ev; 1237 ctrs[n] = event; 1238 cflags[n] = flags; 1239 if (check_excludes(ctrs, cflags, n, 1)) 1240 return -EINVAL; 1241 1242 cpuhw = &get_cpu_var(cpu_hw_events); 1243 err = power_check_constraints(cpuhw, events, cflags, n + 1); 1244 put_cpu_var(cpu_hw_events); 1245 if (err) 1246 return -EINVAL; 1247 1248 event->hw.config = events[n]; 1249 event->hw.event_base = cflags[n]; 1250 event->hw.last_period = event->hw.sample_period; 1251 local64_set(&event->hw.period_left, event->hw.last_period); 1252 1253 /* 1254 * See if we need to reserve the PMU. 1255 * If no events are currently in use, then we have to take a 1256 * mutex to ensure that we don't race with another task doing 1257 * reserve_pmc_hardware or release_pmc_hardware. 1258 */ 1259 err = 0; 1260 if (!atomic_inc_not_zero(&num_events)) { 1261 mutex_lock(&pmc_reserve_mutex); 1262 if (atomic_read(&num_events) == 0 && 1263 reserve_pmc_hardware(perf_event_interrupt)) 1264 err = -EBUSY; 1265 else 1266 atomic_inc(&num_events); 1267 mutex_unlock(&pmc_reserve_mutex); 1268 } 1269 event->destroy = hw_perf_event_destroy; 1270 1271 return err; 1272 } 1273 1274 static int power_pmu_event_idx(struct perf_event *event) 1275 { 1276 return event->hw.idx; 1277 } 1278 1279 struct pmu power_pmu = { 1280 .pmu_enable = power_pmu_enable, 1281 .pmu_disable = power_pmu_disable, 1282 .event_init = power_pmu_event_init, 1283 .add = power_pmu_add, 1284 .del = power_pmu_del, 1285 .start = power_pmu_start, 1286 .stop = power_pmu_stop, 1287 .read = power_pmu_read, 1288 .start_txn = power_pmu_start_txn, 1289 .cancel_txn = power_pmu_cancel_txn, 1290 .commit_txn = power_pmu_commit_txn, 1291 .event_idx = power_pmu_event_idx, 1292 }; 1293 1294 /* 1295 * A counter has overflowed; update its count and record 1296 * things if requested. Note that interrupts are hard-disabled 1297 * here so there is no possibility of being interrupted. 1298 */ 1299 static void record_and_restart(struct perf_event *event, unsigned long val, 1300 struct pt_regs *regs) 1301 { 1302 u64 period = event->hw.sample_period; 1303 s64 prev, delta, left; 1304 int record = 0; 1305 1306 if (event->hw.state & PERF_HES_STOPPED) { 1307 write_pmc(event->hw.idx, 0); 1308 return; 1309 } 1310 1311 /* we don't have to worry about interrupts here */ 1312 prev = local64_read(&event->hw.prev_count); 1313 delta = check_and_compute_delta(prev, val); 1314 local64_add(delta, &event->count); 1315 1316 /* 1317 * See if the total period for this event has expired, 1318 * and update for the next period. 1319 */ 1320 val = 0; 1321 left = local64_read(&event->hw.period_left) - delta; 1322 if (period) { 1323 if (left <= 0) { 1324 left += period; 1325 if (left <= 0) 1326 left = period; 1327 record = 1; 1328 event->hw.last_period = event->hw.sample_period; 1329 } 1330 if (left < 0x80000000LL) 1331 val = 0x80000000LL - left; 1332 } 1333 1334 write_pmc(event->hw.idx, val); 1335 local64_set(&event->hw.prev_count, val); 1336 local64_set(&event->hw.period_left, left); 1337 perf_event_update_userpage(event); 1338 1339 /* 1340 * Finally record data if requested. 1341 */ 1342 if (record) { 1343 struct perf_sample_data data; 1344 1345 perf_sample_data_init(&data, ~0ULL, event->hw.last_period); 1346 1347 if (event->attr.sample_type & PERF_SAMPLE_ADDR) 1348 perf_get_data_addr(regs, &data.addr); 1349 1350 if (perf_event_overflow(event, &data, regs)) 1351 power_pmu_stop(event, 0); 1352 } 1353 } 1354 1355 /* 1356 * Called from generic code to get the misc flags (i.e. processor mode) 1357 * for an event_id. 1358 */ 1359 unsigned long perf_misc_flags(struct pt_regs *regs) 1360 { 1361 u32 flags = perf_get_misc_flags(regs); 1362 1363 if (flags) 1364 return flags; 1365 return user_mode(regs) ? PERF_RECORD_MISC_USER : 1366 PERF_RECORD_MISC_KERNEL; 1367 } 1368 1369 /* 1370 * Called from generic code to get the instruction pointer 1371 * for an event_id. 1372 */ 1373 unsigned long perf_instruction_pointer(struct pt_regs *regs) 1374 { 1375 unsigned long use_siar = regs->result; 1376 1377 if (use_siar) 1378 return mfspr(SPRN_SIAR) + perf_ip_adjust(regs); 1379 else 1380 return regs->nip; 1381 } 1382 1383 static bool pmc_overflow(unsigned long val) 1384 { 1385 if ((int)val < 0) 1386 return true; 1387 1388 /* 1389 * Events on POWER7 can roll back if a speculative event doesn't 1390 * eventually complete. Unfortunately in some rare cases they will 1391 * raise a performance monitor exception. We need to catch this to 1392 * ensure we reset the PMC. In all cases the PMC will be 256 or less 1393 * cycles from overflow. 1394 * 1395 * We only do this if the first pass fails to find any overflowing 1396 * PMCs because a user might set a period of less than 256 and we 1397 * don't want to mistakenly reset them. 1398 */ 1399 if (__is_processor(PV_POWER7) && ((0x80000000 - val) <= 256)) 1400 return true; 1401 1402 return false; 1403 } 1404 1405 /* 1406 * Performance monitor interrupt stuff 1407 */ 1408 static void perf_event_interrupt(struct pt_regs *regs) 1409 { 1410 int i; 1411 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1412 struct perf_event *event; 1413 unsigned long val; 1414 int found = 0; 1415 int nmi; 1416 1417 if (cpuhw->n_limited) 1418 freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), 1419 mfspr(SPRN_PMC6)); 1420 1421 perf_read_regs(regs); 1422 1423 nmi = perf_intr_is_nmi(regs); 1424 if (nmi) 1425 nmi_enter(); 1426 else 1427 irq_enter(); 1428 1429 for (i = 0; i < cpuhw->n_events; ++i) { 1430 event = cpuhw->event[i]; 1431 if (!event->hw.idx || is_limited_pmc(event->hw.idx)) 1432 continue; 1433 val = read_pmc(event->hw.idx); 1434 if (pmc_overflow(val)) { 1435 /* event has overflowed */ 1436 found = 1; 1437 record_and_restart(event, val, regs); 1438 } 1439 } 1440 1441 /* 1442 * In case we didn't find and reset the event that caused 1443 * the interrupt, scan all events and reset any that are 1444 * negative, to avoid getting continual interrupts. 1445 * Any that we processed in the previous loop will not be negative. 1446 */ 1447 if (!found) { 1448 for (i = 0; i < ppmu->n_counter; ++i) { 1449 if (is_limited_pmc(i + 1)) 1450 continue; 1451 val = read_pmc(i + 1); 1452 if (pmc_overflow(val)) 1453 write_pmc(i + 1, 0); 1454 } 1455 } 1456 1457 /* 1458 * Reset MMCR0 to its normal value. This will set PMXE and 1459 * clear FC (freeze counters) and PMAO (perf mon alert occurred) 1460 * and thus allow interrupts to occur again. 1461 * XXX might want to use MSR.PM to keep the events frozen until 1462 * we get back out of this interrupt. 1463 */ 1464 write_mmcr0(cpuhw, cpuhw->mmcr[0]); 1465 1466 if (nmi) 1467 nmi_exit(); 1468 else 1469 irq_exit(); 1470 } 1471 1472 static void power_pmu_setup(int cpu) 1473 { 1474 struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); 1475 1476 if (!ppmu) 1477 return; 1478 memset(cpuhw, 0, sizeof(*cpuhw)); 1479 cpuhw->mmcr[0] = MMCR0_FC; 1480 } 1481 1482 static int __cpuinit 1483 power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) 1484 { 1485 unsigned int cpu = (long)hcpu; 1486 1487 switch (action & ~CPU_TASKS_FROZEN) { 1488 case CPU_UP_PREPARE: 1489 power_pmu_setup(cpu); 1490 break; 1491 1492 default: 1493 break; 1494 } 1495 1496 return NOTIFY_OK; 1497 } 1498 1499 int __cpuinit register_power_pmu(struct power_pmu *pmu) 1500 { 1501 if (ppmu) 1502 return -EBUSY; /* something's already registered */ 1503 1504 ppmu = pmu; 1505 pr_info("%s performance monitor hardware support registered\n", 1506 pmu->name); 1507 1508 #ifdef MSR_HV 1509 /* 1510 * Use FCHV to ignore kernel events if MSR.HV is set. 1511 */ 1512 if (mfmsr() & MSR_HV) 1513 freeze_events_kernel = MMCR0_FCHV; 1514 #endif /* CONFIG_PPC64 */ 1515 1516 perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW); 1517 perf_cpu_notifier(power_pmu_notifier); 1518 1519 return 0; 1520 } 1521