1 /* Performance event support for sparc64. 2 * 3 * Copyright (C) 2009 David S. Miller <davem@davemloft.net> 4 * 5 * This code is based almost entirely upon the x86 perf event 6 * code, which is: 7 * 8 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> 9 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar 10 * Copyright (C) 2009 Jaswinder Singh Rajput 11 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter 12 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 13 */ 14 15 #include <linux/perf_event.h> 16 #include <linux/kprobes.h> 17 #include <linux/kernel.h> 18 #include <linux/kdebug.h> 19 #include <linux/mutex.h> 20 21 #include <asm/cpudata.h> 22 #include <asm/atomic.h> 23 #include <asm/nmi.h> 24 #include <asm/pcr.h> 25 26 /* Sparc64 chips have two performance counters, 32-bits each, with 27 * overflow interrupts generated on transition from 0xffffffff to 0. 28 * The counters are accessed in one go using a 64-bit register. 29 * 30 * Both counters are controlled using a single control register. The 31 * only way to stop all sampling is to clear all of the context (user, 32 * supervisor, hypervisor) sampling enable bits. But these bits apply 33 * to both counters, thus the two counters can't be enabled/disabled 34 * individually. 35 * 36 * The control register has two event fields, one for each of the two 37 * counters. It's thus nearly impossible to have one counter going 38 * while keeping the other one stopped. Therefore it is possible to 39 * get overflow interrupts for counters not currently "in use" and 40 * that condition must be checked in the overflow interrupt handler. 41 * 42 * So we use a hack, in that we program inactive counters with the 43 * "sw_count0" and "sw_count1" events. These count how many times 44 * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an 45 * unusual way to encode a NOP and therefore will not trigger in 46 * normal code. 47 */ 48 49 #define MAX_HWEVENTS 2 50 #define MAX_PERIOD ((1UL << 32) - 1) 51 52 #define PIC_UPPER_INDEX 0 53 #define PIC_LOWER_INDEX 1 54 55 struct cpu_hw_events { 56 struct perf_event *events[MAX_HWEVENTS]; 57 unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; 58 unsigned long active_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; 59 u64 pcr; 60 int enabled; 61 }; 62 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; 63 64 struct perf_event_map { 65 u16 encoding; 66 u8 pic_mask; 67 #define PIC_NONE 0x00 68 #define PIC_UPPER 0x01 69 #define PIC_LOWER 0x02 70 }; 71 72 static unsigned long perf_event_encode(const struct perf_event_map *pmap) 73 { 74 return ((unsigned long) pmap->encoding << 16) | pmap->pic_mask; 75 } 76 77 static void perf_event_decode(unsigned long val, u16 *enc, u8 *msk) 78 { 79 *msk = val & 0xff; 80 *enc = val >> 16; 81 } 82 83 #define C(x) PERF_COUNT_HW_CACHE_##x 84 85 #define CACHE_OP_UNSUPPORTED 0xfffe 86 #define CACHE_OP_NONSENSE 0xffff 87 88 typedef struct perf_event_map cache_map_t 89 [PERF_COUNT_HW_CACHE_MAX] 90 [PERF_COUNT_HW_CACHE_OP_MAX] 91 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 92 93 struct sparc_pmu { 94 const struct perf_event_map *(*event_map)(int); 95 const cache_map_t *cache_map; 96 int max_events; 97 int upper_shift; 98 int lower_shift; 99 int event_mask; 100 int hv_bit; 101 int irq_bit; 102 int upper_nop; 103 int lower_nop; 104 }; 105 106 static const struct perf_event_map ultra3_perfmon_event_map[] = { 107 [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, 108 [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, 109 [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER }, 110 [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER }, 111 }; 112 113 static const struct perf_event_map *ultra3_event_map(int event_id) 114 { 115 return &ultra3_perfmon_event_map[event_id]; 116 } 117 118 static const cache_map_t ultra3_cache_map = { 119 [C(L1D)] = { 120 [C(OP_READ)] = { 121 [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, 122 [C(RESULT_MISS)] = { 0x09, PIC_UPPER, }, 123 }, 124 [C(OP_WRITE)] = { 125 [C(RESULT_ACCESS)] = { 0x0a, PIC_LOWER }, 126 [C(RESULT_MISS)] = { 0x0a, PIC_UPPER }, 127 }, 128 [C(OP_PREFETCH)] = { 129 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 130 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 131 }, 132 }, 133 [C(L1I)] = { 134 [C(OP_READ)] = { 135 [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, 136 [C(RESULT_MISS)] = { 0x09, PIC_UPPER, }, 137 }, 138 [ C(OP_WRITE) ] = { 139 [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, 140 [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, 141 }, 142 [ C(OP_PREFETCH) ] = { 143 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 144 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 145 }, 146 }, 147 [C(LL)] = { 148 [C(OP_READ)] = { 149 [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER, }, 150 [C(RESULT_MISS)] = { 0x0c, PIC_UPPER, }, 151 }, 152 [C(OP_WRITE)] = { 153 [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER }, 154 [C(RESULT_MISS)] = { 0x0c, PIC_UPPER }, 155 }, 156 [C(OP_PREFETCH)] = { 157 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 158 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 159 }, 160 }, 161 [C(DTLB)] = { 162 [C(OP_READ)] = { 163 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 164 [C(RESULT_MISS)] = { 0x12, PIC_UPPER, }, 165 }, 166 [ C(OP_WRITE) ] = { 167 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 168 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 169 }, 170 [ C(OP_PREFETCH) ] = { 171 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 172 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 173 }, 174 }, 175 [C(ITLB)] = { 176 [C(OP_READ)] = { 177 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 178 [C(RESULT_MISS)] = { 0x11, PIC_UPPER, }, 179 }, 180 [ C(OP_WRITE) ] = { 181 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 182 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 183 }, 184 [ C(OP_PREFETCH) ] = { 185 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 186 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 187 }, 188 }, 189 [C(BPU)] = { 190 [C(OP_READ)] = { 191 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 192 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 193 }, 194 [ C(OP_WRITE) ] = { 195 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 196 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 197 }, 198 [ C(OP_PREFETCH) ] = { 199 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 200 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 201 }, 202 }, 203 }; 204 205 static const struct sparc_pmu ultra3_pmu = { 206 .event_map = ultra3_event_map, 207 .cache_map = &ultra3_cache_map, 208 .max_events = ARRAY_SIZE(ultra3_perfmon_event_map), 209 .upper_shift = 11, 210 .lower_shift = 4, 211 .event_mask = 0x3f, 212 .upper_nop = 0x1c, 213 .lower_nop = 0x14, 214 }; 215 216 /* Niagara1 is very limited. The upper PIC is hard-locked to count 217 * only instructions, so it is free running which creates all kinds of 218 * problems. Some hardware designs make one wonder if the creator 219 * even looked at how this stuff gets used by software. 220 */ 221 static const struct perf_event_map niagara1_perfmon_event_map[] = { 222 [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, PIC_UPPER }, 223 [PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, PIC_UPPER }, 224 [PERF_COUNT_HW_CACHE_REFERENCES] = { 0, PIC_NONE }, 225 [PERF_COUNT_HW_CACHE_MISSES] = { 0x03, PIC_LOWER }, 226 }; 227 228 static const struct perf_event_map *niagara1_event_map(int event_id) 229 { 230 return &niagara1_perfmon_event_map[event_id]; 231 } 232 233 static const cache_map_t niagara1_cache_map = { 234 [C(L1D)] = { 235 [C(OP_READ)] = { 236 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 237 [C(RESULT_MISS)] = { 0x03, PIC_LOWER, }, 238 }, 239 [C(OP_WRITE)] = { 240 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 241 [C(RESULT_MISS)] = { 0x03, PIC_LOWER, }, 242 }, 243 [C(OP_PREFETCH)] = { 244 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 245 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 246 }, 247 }, 248 [C(L1I)] = { 249 [C(OP_READ)] = { 250 [C(RESULT_ACCESS)] = { 0x00, PIC_UPPER }, 251 [C(RESULT_MISS)] = { 0x02, PIC_LOWER, }, 252 }, 253 [ C(OP_WRITE) ] = { 254 [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, 255 [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, 256 }, 257 [ C(OP_PREFETCH) ] = { 258 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 259 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 260 }, 261 }, 262 [C(LL)] = { 263 [C(OP_READ)] = { 264 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 265 [C(RESULT_MISS)] = { 0x07, PIC_LOWER, }, 266 }, 267 [C(OP_WRITE)] = { 268 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 269 [C(RESULT_MISS)] = { 0x07, PIC_LOWER, }, 270 }, 271 [C(OP_PREFETCH)] = { 272 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 273 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 274 }, 275 }, 276 [C(DTLB)] = { 277 [C(OP_READ)] = { 278 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 279 [C(RESULT_MISS)] = { 0x05, PIC_LOWER, }, 280 }, 281 [ C(OP_WRITE) ] = { 282 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 283 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 284 }, 285 [ C(OP_PREFETCH) ] = { 286 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 287 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 288 }, 289 }, 290 [C(ITLB)] = { 291 [C(OP_READ)] = { 292 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 293 [C(RESULT_MISS)] = { 0x04, PIC_LOWER, }, 294 }, 295 [ C(OP_WRITE) ] = { 296 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 297 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 298 }, 299 [ C(OP_PREFETCH) ] = { 300 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 301 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 302 }, 303 }, 304 [C(BPU)] = { 305 [C(OP_READ)] = { 306 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 307 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 308 }, 309 [ C(OP_WRITE) ] = { 310 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 311 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 312 }, 313 [ C(OP_PREFETCH) ] = { 314 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 315 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 316 }, 317 }, 318 }; 319 320 static const struct sparc_pmu niagara1_pmu = { 321 .event_map = niagara1_event_map, 322 .cache_map = &niagara1_cache_map, 323 .max_events = ARRAY_SIZE(niagara1_perfmon_event_map), 324 .upper_shift = 0, 325 .lower_shift = 4, 326 .event_mask = 0x7, 327 .upper_nop = 0x0, 328 .lower_nop = 0x0, 329 }; 330 331 static const struct perf_event_map niagara2_perfmon_event_map[] = { 332 [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER }, 333 [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER }, 334 [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER }, 335 [PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER }, 336 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER }, 337 [PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER }, 338 }; 339 340 static const struct perf_event_map *niagara2_event_map(int event_id) 341 { 342 return &niagara2_perfmon_event_map[event_id]; 343 } 344 345 static const cache_map_t niagara2_cache_map = { 346 [C(L1D)] = { 347 [C(OP_READ)] = { 348 [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, }, 349 [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, }, 350 }, 351 [C(OP_WRITE)] = { 352 [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, }, 353 [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, }, 354 }, 355 [C(OP_PREFETCH)] = { 356 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 357 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 358 }, 359 }, 360 [C(L1I)] = { 361 [C(OP_READ)] = { 362 [C(RESULT_ACCESS)] = { 0x02ff, PIC_UPPER | PIC_LOWER, }, 363 [C(RESULT_MISS)] = { 0x0301, PIC_UPPER | PIC_LOWER, }, 364 }, 365 [ C(OP_WRITE) ] = { 366 [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, 367 [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, 368 }, 369 [ C(OP_PREFETCH) ] = { 370 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 371 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 372 }, 373 }, 374 [C(LL)] = { 375 [C(OP_READ)] = { 376 [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, }, 377 [C(RESULT_MISS)] = { 0x0330, PIC_UPPER | PIC_LOWER, }, 378 }, 379 [C(OP_WRITE)] = { 380 [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, }, 381 [C(RESULT_MISS)] = { 0x0320, PIC_UPPER | PIC_LOWER, }, 382 }, 383 [C(OP_PREFETCH)] = { 384 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 385 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 386 }, 387 }, 388 [C(DTLB)] = { 389 [C(OP_READ)] = { 390 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 391 [C(RESULT_MISS)] = { 0x0b08, PIC_UPPER | PIC_LOWER, }, 392 }, 393 [ C(OP_WRITE) ] = { 394 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 395 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 396 }, 397 [ C(OP_PREFETCH) ] = { 398 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 399 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 400 }, 401 }, 402 [C(ITLB)] = { 403 [C(OP_READ)] = { 404 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 405 [C(RESULT_MISS)] = { 0xb04, PIC_UPPER | PIC_LOWER, }, 406 }, 407 [ C(OP_WRITE) ] = { 408 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 409 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 410 }, 411 [ C(OP_PREFETCH) ] = { 412 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 413 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 414 }, 415 }, 416 [C(BPU)] = { 417 [C(OP_READ)] = { 418 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 419 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 420 }, 421 [ C(OP_WRITE) ] = { 422 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 423 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 424 }, 425 [ C(OP_PREFETCH) ] = { 426 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 427 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 428 }, 429 }, 430 }; 431 432 static const struct sparc_pmu niagara2_pmu = { 433 .event_map = niagara2_event_map, 434 .cache_map = &niagara2_cache_map, 435 .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), 436 .upper_shift = 19, 437 .lower_shift = 6, 438 .event_mask = 0xfff, 439 .hv_bit = 0x8, 440 .irq_bit = 0x03, 441 .upper_nop = 0x220, 442 .lower_nop = 0x220, 443 }; 444 445 static const struct sparc_pmu *sparc_pmu __read_mostly; 446 447 static u64 event_encoding(u64 event_id, int idx) 448 { 449 if (idx == PIC_UPPER_INDEX) 450 event_id <<= sparc_pmu->upper_shift; 451 else 452 event_id <<= sparc_pmu->lower_shift; 453 return event_id; 454 } 455 456 static u64 mask_for_index(int idx) 457 { 458 return event_encoding(sparc_pmu->event_mask, idx); 459 } 460 461 static u64 nop_for_index(int idx) 462 { 463 return event_encoding(idx == PIC_UPPER_INDEX ? 464 sparc_pmu->upper_nop : 465 sparc_pmu->lower_nop, idx); 466 } 467 468 static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) 469 { 470 u64 val, mask = mask_for_index(idx); 471 472 val = cpuc->pcr; 473 val &= ~mask; 474 val |= hwc->config; 475 cpuc->pcr = val; 476 477 pcr_ops->write(cpuc->pcr); 478 } 479 480 static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) 481 { 482 u64 mask = mask_for_index(idx); 483 u64 nop = nop_for_index(idx); 484 u64 val; 485 486 val = cpuc->pcr; 487 val &= ~mask; 488 val |= nop; 489 cpuc->pcr = val; 490 491 pcr_ops->write(cpuc->pcr); 492 } 493 494 void hw_perf_enable(void) 495 { 496 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 497 u64 val; 498 int i; 499 500 if (cpuc->enabled) 501 return; 502 503 cpuc->enabled = 1; 504 barrier(); 505 506 val = cpuc->pcr; 507 508 for (i = 0; i < MAX_HWEVENTS; i++) { 509 struct perf_event *cp = cpuc->events[i]; 510 struct hw_perf_event *hwc; 511 512 if (!cp) 513 continue; 514 hwc = &cp->hw; 515 val |= hwc->config_base; 516 } 517 518 cpuc->pcr = val; 519 520 pcr_ops->write(cpuc->pcr); 521 } 522 523 void hw_perf_disable(void) 524 { 525 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 526 u64 val; 527 528 if (!cpuc->enabled) 529 return; 530 531 cpuc->enabled = 0; 532 533 val = cpuc->pcr; 534 val &= ~(PCR_UTRACE | PCR_STRACE | 535 sparc_pmu->hv_bit | sparc_pmu->irq_bit); 536 cpuc->pcr = val; 537 538 pcr_ops->write(cpuc->pcr); 539 } 540 541 static u32 read_pmc(int idx) 542 { 543 u64 val; 544 545 read_pic(val); 546 if (idx == PIC_UPPER_INDEX) 547 val >>= 32; 548 549 return val & 0xffffffff; 550 } 551 552 static void write_pmc(int idx, u64 val) 553 { 554 u64 shift, mask, pic; 555 556 shift = 0; 557 if (idx == PIC_UPPER_INDEX) 558 shift = 32; 559 560 mask = ((u64) 0xffffffff) << shift; 561 val <<= shift; 562 563 read_pic(pic); 564 pic &= ~mask; 565 pic |= val; 566 write_pic(pic); 567 } 568 569 static int sparc_perf_event_set_period(struct perf_event *event, 570 struct hw_perf_event *hwc, int idx) 571 { 572 s64 left = atomic64_read(&hwc->period_left); 573 s64 period = hwc->sample_period; 574 int ret = 0; 575 576 if (unlikely(left <= -period)) { 577 left = period; 578 atomic64_set(&hwc->period_left, left); 579 hwc->last_period = period; 580 ret = 1; 581 } 582 583 if (unlikely(left <= 0)) { 584 left += period; 585 atomic64_set(&hwc->period_left, left); 586 hwc->last_period = period; 587 ret = 1; 588 } 589 if (left > MAX_PERIOD) 590 left = MAX_PERIOD; 591 592 atomic64_set(&hwc->prev_count, (u64)-left); 593 594 write_pmc(idx, (u64)(-left) & 0xffffffff); 595 596 perf_event_update_userpage(event); 597 598 return ret; 599 } 600 601 static int sparc_pmu_enable(struct perf_event *event) 602 { 603 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 604 struct hw_perf_event *hwc = &event->hw; 605 int idx = hwc->idx; 606 607 if (test_and_set_bit(idx, cpuc->used_mask)) 608 return -EAGAIN; 609 610 sparc_pmu_disable_event(cpuc, hwc, idx); 611 612 cpuc->events[idx] = event; 613 set_bit(idx, cpuc->active_mask); 614 615 sparc_perf_event_set_period(event, hwc, idx); 616 sparc_pmu_enable_event(cpuc, hwc, idx); 617 perf_event_update_userpage(event); 618 return 0; 619 } 620 621 static u64 sparc_perf_event_update(struct perf_event *event, 622 struct hw_perf_event *hwc, int idx) 623 { 624 int shift = 64 - 32; 625 u64 prev_raw_count, new_raw_count; 626 s64 delta; 627 628 again: 629 prev_raw_count = atomic64_read(&hwc->prev_count); 630 new_raw_count = read_pmc(idx); 631 632 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, 633 new_raw_count) != prev_raw_count) 634 goto again; 635 636 delta = (new_raw_count << shift) - (prev_raw_count << shift); 637 delta >>= shift; 638 639 atomic64_add(delta, &event->count); 640 atomic64_sub(delta, &hwc->period_left); 641 642 return new_raw_count; 643 } 644 645 static void sparc_pmu_disable(struct perf_event *event) 646 { 647 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 648 struct hw_perf_event *hwc = &event->hw; 649 int idx = hwc->idx; 650 651 clear_bit(idx, cpuc->active_mask); 652 sparc_pmu_disable_event(cpuc, hwc, idx); 653 654 barrier(); 655 656 sparc_perf_event_update(event, hwc, idx); 657 cpuc->events[idx] = NULL; 658 clear_bit(idx, cpuc->used_mask); 659 660 perf_event_update_userpage(event); 661 } 662 663 static void sparc_pmu_read(struct perf_event *event) 664 { 665 struct hw_perf_event *hwc = &event->hw; 666 667 sparc_perf_event_update(event, hwc, hwc->idx); 668 } 669 670 static void sparc_pmu_unthrottle(struct perf_event *event) 671 { 672 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 673 struct hw_perf_event *hwc = &event->hw; 674 675 sparc_pmu_enable_event(cpuc, hwc, hwc->idx); 676 } 677 678 static atomic_t active_events = ATOMIC_INIT(0); 679 static DEFINE_MUTEX(pmc_grab_mutex); 680 681 static void perf_stop_nmi_watchdog(void *unused) 682 { 683 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 684 685 stop_nmi_watchdog(NULL); 686 cpuc->pcr = pcr_ops->read(); 687 } 688 689 void perf_event_grab_pmc(void) 690 { 691 if (atomic_inc_not_zero(&active_events)) 692 return; 693 694 mutex_lock(&pmc_grab_mutex); 695 if (atomic_read(&active_events) == 0) { 696 if (atomic_read(&nmi_active) > 0) { 697 on_each_cpu(perf_stop_nmi_watchdog, NULL, 1); 698 BUG_ON(atomic_read(&nmi_active) != 0); 699 } 700 atomic_inc(&active_events); 701 } 702 mutex_unlock(&pmc_grab_mutex); 703 } 704 705 void perf_event_release_pmc(void) 706 { 707 if (atomic_dec_and_mutex_lock(&active_events, &pmc_grab_mutex)) { 708 if (atomic_read(&nmi_active) == 0) 709 on_each_cpu(start_nmi_watchdog, NULL, 1); 710 mutex_unlock(&pmc_grab_mutex); 711 } 712 } 713 714 static const struct perf_event_map *sparc_map_cache_event(u64 config) 715 { 716 unsigned int cache_type, cache_op, cache_result; 717 const struct perf_event_map *pmap; 718 719 if (!sparc_pmu->cache_map) 720 return ERR_PTR(-ENOENT); 721 722 cache_type = (config >> 0) & 0xff; 723 if (cache_type >= PERF_COUNT_HW_CACHE_MAX) 724 return ERR_PTR(-EINVAL); 725 726 cache_op = (config >> 8) & 0xff; 727 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) 728 return ERR_PTR(-EINVAL); 729 730 cache_result = (config >> 16) & 0xff; 731 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 732 return ERR_PTR(-EINVAL); 733 734 pmap = &((*sparc_pmu->cache_map)[cache_type][cache_op][cache_result]); 735 736 if (pmap->encoding == CACHE_OP_UNSUPPORTED) 737 return ERR_PTR(-ENOENT); 738 739 if (pmap->encoding == CACHE_OP_NONSENSE) 740 return ERR_PTR(-EINVAL); 741 742 return pmap; 743 } 744 745 static void hw_perf_event_destroy(struct perf_event *event) 746 { 747 perf_event_release_pmc(); 748 } 749 750 /* Make sure all events can be scheduled into the hardware at 751 * the same time. This is simplified by the fact that we only 752 * need to support 2 simultaneous HW events. 753 */ 754 static int sparc_check_constraints(unsigned long *events, int n_ev) 755 { 756 if (n_ev <= perf_max_events) { 757 u8 msk1, msk2; 758 u16 dummy; 759 760 if (n_ev == 1) 761 return 0; 762 BUG_ON(n_ev != 2); 763 perf_event_decode(events[0], &dummy, &msk1); 764 perf_event_decode(events[1], &dummy, &msk2); 765 766 /* If both events can go on any counter, OK. */ 767 if (msk1 == (PIC_UPPER | PIC_LOWER) && 768 msk2 == (PIC_UPPER | PIC_LOWER)) 769 return 0; 770 771 /* If one event is limited to a specific counter, 772 * and the other can go on both, OK. 773 */ 774 if ((msk1 == PIC_UPPER || msk1 == PIC_LOWER) && 775 msk2 == (PIC_UPPER | PIC_LOWER)) 776 return 0; 777 if ((msk2 == PIC_UPPER || msk2 == PIC_LOWER) && 778 msk1 == (PIC_UPPER | PIC_LOWER)) 779 return 0; 780 781 /* If the events are fixed to different counters, OK. */ 782 if ((msk1 == PIC_UPPER && msk2 == PIC_LOWER) || 783 (msk1 == PIC_LOWER && msk2 == PIC_UPPER)) 784 return 0; 785 786 /* Otherwise, there is a conflict. */ 787 } 788 789 return -1; 790 } 791 792 static int check_excludes(struct perf_event **evts, int n_prev, int n_new) 793 { 794 int eu = 0, ek = 0, eh = 0; 795 struct perf_event *event; 796 int i, n, first; 797 798 n = n_prev + n_new; 799 if (n <= 1) 800 return 0; 801 802 first = 1; 803 for (i = 0; i < n; i++) { 804 event = evts[i]; 805 if (first) { 806 eu = event->attr.exclude_user; 807 ek = event->attr.exclude_kernel; 808 eh = event->attr.exclude_hv; 809 first = 0; 810 } else if (event->attr.exclude_user != eu || 811 event->attr.exclude_kernel != ek || 812 event->attr.exclude_hv != eh) { 813 return -EAGAIN; 814 } 815 } 816 817 return 0; 818 } 819 820 static int collect_events(struct perf_event *group, int max_count, 821 struct perf_event *evts[], unsigned long *events) 822 { 823 struct perf_event *event; 824 int n = 0; 825 826 if (!is_software_event(group)) { 827 if (n >= max_count) 828 return -1; 829 evts[n] = group; 830 events[n++] = group->hw.event_base; 831 } 832 list_for_each_entry(event, &group->sibling_list, group_entry) { 833 if (!is_software_event(event) && 834 event->state != PERF_EVENT_STATE_OFF) { 835 if (n >= max_count) 836 return -1; 837 evts[n] = event; 838 events[n++] = event->hw.event_base; 839 } 840 } 841 return n; 842 } 843 844 static int __hw_perf_event_init(struct perf_event *event) 845 { 846 struct perf_event_attr *attr = &event->attr; 847 struct perf_event *evts[MAX_HWEVENTS]; 848 struct hw_perf_event *hwc = &event->hw; 849 unsigned long events[MAX_HWEVENTS]; 850 const struct perf_event_map *pmap; 851 u64 enc; 852 int n; 853 854 if (atomic_read(&nmi_active) < 0) 855 return -ENODEV; 856 857 if (attr->type == PERF_TYPE_HARDWARE) { 858 if (attr->config >= sparc_pmu->max_events) 859 return -EINVAL; 860 pmap = sparc_pmu->event_map(attr->config); 861 } else if (attr->type == PERF_TYPE_HW_CACHE) { 862 pmap = sparc_map_cache_event(attr->config); 863 if (IS_ERR(pmap)) 864 return PTR_ERR(pmap); 865 } else 866 return -EOPNOTSUPP; 867 868 /* We save the enable bits in the config_base. So to 869 * turn off sampling just write 'config', and to enable 870 * things write 'config | config_base'. 871 */ 872 hwc->config_base = sparc_pmu->irq_bit; 873 if (!attr->exclude_user) 874 hwc->config_base |= PCR_UTRACE; 875 if (!attr->exclude_kernel) 876 hwc->config_base |= PCR_STRACE; 877 if (!attr->exclude_hv) 878 hwc->config_base |= sparc_pmu->hv_bit; 879 880 hwc->event_base = perf_event_encode(pmap); 881 882 enc = pmap->encoding; 883 884 n = 0; 885 if (event->group_leader != event) { 886 n = collect_events(event->group_leader, 887 perf_max_events - 1, 888 evts, events); 889 if (n < 0) 890 return -EINVAL; 891 } 892 events[n] = hwc->event_base; 893 evts[n] = event; 894 895 if (check_excludes(evts, n, 1)) 896 return -EINVAL; 897 898 if (sparc_check_constraints(events, n + 1)) 899 return -EINVAL; 900 901 /* Try to do all error checking before this point, as unwinding 902 * state after grabbing the PMC is difficult. 903 */ 904 perf_event_grab_pmc(); 905 event->destroy = hw_perf_event_destroy; 906 907 if (!hwc->sample_period) { 908 hwc->sample_period = MAX_PERIOD; 909 hwc->last_period = hwc->sample_period; 910 atomic64_set(&hwc->period_left, hwc->sample_period); 911 } 912 913 if (pmap->pic_mask & PIC_UPPER) { 914 hwc->idx = PIC_UPPER_INDEX; 915 enc <<= sparc_pmu->upper_shift; 916 } else { 917 hwc->idx = PIC_LOWER_INDEX; 918 enc <<= sparc_pmu->lower_shift; 919 } 920 921 hwc->config |= enc; 922 return 0; 923 } 924 925 static const struct pmu pmu = { 926 .enable = sparc_pmu_enable, 927 .disable = sparc_pmu_disable, 928 .read = sparc_pmu_read, 929 .unthrottle = sparc_pmu_unthrottle, 930 }; 931 932 const struct pmu *hw_perf_event_init(struct perf_event *event) 933 { 934 int err = __hw_perf_event_init(event); 935 936 if (err) 937 return ERR_PTR(err); 938 return &pmu; 939 } 940 941 void perf_event_print_debug(void) 942 { 943 unsigned long flags; 944 u64 pcr, pic; 945 int cpu; 946 947 if (!sparc_pmu) 948 return; 949 950 local_irq_save(flags); 951 952 cpu = smp_processor_id(); 953 954 pcr = pcr_ops->read(); 955 read_pic(pic); 956 957 pr_info("\n"); 958 pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n", 959 cpu, pcr, pic); 960 961 local_irq_restore(flags); 962 } 963 964 static int __kprobes perf_event_nmi_handler(struct notifier_block *self, 965 unsigned long cmd, void *__args) 966 { 967 struct die_args *args = __args; 968 struct perf_sample_data data; 969 struct cpu_hw_events *cpuc; 970 struct pt_regs *regs; 971 int idx; 972 973 if (!atomic_read(&active_events)) 974 return NOTIFY_DONE; 975 976 switch (cmd) { 977 case DIE_NMI: 978 break; 979 980 default: 981 return NOTIFY_DONE; 982 } 983 984 regs = args->regs; 985 986 data.addr = 0; 987 988 cpuc = &__get_cpu_var(cpu_hw_events); 989 for (idx = 0; idx < MAX_HWEVENTS; idx++) { 990 struct perf_event *event = cpuc->events[idx]; 991 struct hw_perf_event *hwc; 992 u64 val; 993 994 if (!test_bit(idx, cpuc->active_mask)) 995 continue; 996 hwc = &event->hw; 997 val = sparc_perf_event_update(event, hwc, idx); 998 if (val & (1ULL << 31)) 999 continue; 1000 1001 data.period = event->hw.last_period; 1002 if (!sparc_perf_event_set_period(event, hwc, idx)) 1003 continue; 1004 1005 if (perf_event_overflow(event, 1, &data, regs)) 1006 sparc_pmu_disable_event(cpuc, hwc, idx); 1007 } 1008 1009 return NOTIFY_STOP; 1010 } 1011 1012 static __read_mostly struct notifier_block perf_event_nmi_notifier = { 1013 .notifier_call = perf_event_nmi_handler, 1014 }; 1015 1016 static bool __init supported_pmu(void) 1017 { 1018 if (!strcmp(sparc_pmu_type, "ultra3") || 1019 !strcmp(sparc_pmu_type, "ultra3+") || 1020 !strcmp(sparc_pmu_type, "ultra3i") || 1021 !strcmp(sparc_pmu_type, "ultra4+")) { 1022 sparc_pmu = &ultra3_pmu; 1023 return true; 1024 } 1025 if (!strcmp(sparc_pmu_type, "niagara")) { 1026 sparc_pmu = &niagara1_pmu; 1027 return true; 1028 } 1029 if (!strcmp(sparc_pmu_type, "niagara2")) { 1030 sparc_pmu = &niagara2_pmu; 1031 return true; 1032 } 1033 return false; 1034 } 1035 1036 void __init init_hw_perf_events(void) 1037 { 1038 pr_info("Performance events: "); 1039 1040 if (!supported_pmu()) { 1041 pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); 1042 return; 1043 } 1044 1045 pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); 1046 1047 /* All sparc64 PMUs currently have 2 events. But this simple 1048 * driver only supports one active event at a time. 1049 */ 1050 perf_max_events = 1; 1051 1052 register_die_notifier(&perf_event_nmi_notifier); 1053 } 1054