1 /* Performance event support for sparc64. 2 * 3 * Copyright (C) 2009, 2010 David S. Miller <davem@davemloft.net> 4 * 5 * This code is based almost entirely upon the x86 perf event 6 * code, which is: 7 * 8 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> 9 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar 10 * Copyright (C) 2009 Jaswinder Singh Rajput 11 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter 12 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 13 */ 14 15 #include <linux/perf_event.h> 16 #include <linux/kprobes.h> 17 #include <linux/ftrace.h> 18 #include <linux/kernel.h> 19 #include <linux/kdebug.h> 20 #include <linux/mutex.h> 21 22 #include <asm/stacktrace.h> 23 #include <asm/cpudata.h> 24 #include <asm/uaccess.h> 25 #include <asm/atomic.h> 26 #include <asm/nmi.h> 27 #include <asm/pcr.h> 28 29 #include "kstack.h" 30 31 /* Sparc64 chips have two performance counters, 32-bits each, with 32 * overflow interrupts generated on transition from 0xffffffff to 0. 33 * The counters are accessed in one go using a 64-bit register. 34 * 35 * Both counters are controlled using a single control register. The 36 * only way to stop all sampling is to clear all of the context (user, 37 * supervisor, hypervisor) sampling enable bits. But these bits apply 38 * to both counters, thus the two counters can't be enabled/disabled 39 * individually. 40 * 41 * The control register has two event fields, one for each of the two 42 * counters. It's thus nearly impossible to have one counter going 43 * while keeping the other one stopped. Therefore it is possible to 44 * get overflow interrupts for counters not currently "in use" and 45 * that condition must be checked in the overflow interrupt handler. 46 * 47 * So we use a hack, in that we program inactive counters with the 48 * "sw_count0" and "sw_count1" events. These count how many times 49 * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an 50 * unusual way to encode a NOP and therefore will not trigger in 51 * normal code. 52 */ 53 54 #define MAX_HWEVENTS 2 55 #define MAX_PERIOD ((1UL << 32) - 1) 56 57 #define PIC_UPPER_INDEX 0 58 #define PIC_LOWER_INDEX 1 59 #define PIC_NO_INDEX -1 60 61 struct cpu_hw_events { 62 /* Number of events currently scheduled onto this cpu. 63 * This tells how many entries in the arrays below 64 * are valid. 65 */ 66 int n_events; 67 68 /* Number of new events added since the last hw_perf_disable(). 69 * This works because the perf event layer always adds new 70 * events inside of a perf_{disable,enable}() sequence. 71 */ 72 int n_added; 73 74 /* Array of events current scheduled on this cpu. */ 75 struct perf_event *event[MAX_HWEVENTS]; 76 77 /* Array of encoded longs, specifying the %pcr register 78 * encoding and the mask of PIC counters this even can 79 * be scheduled on. See perf_event_encode() et al. 80 */ 81 unsigned long events[MAX_HWEVENTS]; 82 83 /* The current counter index assigned to an event. When the 84 * event hasn't been programmed into the cpu yet, this will 85 * hold PIC_NO_INDEX. The event->hw.idx value tells us where 86 * we ought to schedule the event. 87 */ 88 int current_idx[MAX_HWEVENTS]; 89 90 /* Software copy of %pcr register on this cpu. */ 91 u64 pcr; 92 93 /* Enabled/disable state. */ 94 int enabled; 95 }; 96 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; 97 98 /* An event map describes the characteristics of a performance 99 * counter event. In particular it gives the encoding as well as 100 * a mask telling which counters the event can be measured on. 101 */ 102 struct perf_event_map { 103 u16 encoding; 104 u8 pic_mask; 105 #define PIC_NONE 0x00 106 #define PIC_UPPER 0x01 107 #define PIC_LOWER 0x02 108 }; 109 110 /* Encode a perf_event_map entry into a long. */ 111 static unsigned long perf_event_encode(const struct perf_event_map *pmap) 112 { 113 return ((unsigned long) pmap->encoding << 16) | pmap->pic_mask; 114 } 115 116 static u8 perf_event_get_msk(unsigned long val) 117 { 118 return val & 0xff; 119 } 120 121 static u64 perf_event_get_enc(unsigned long val) 122 { 123 return val >> 16; 124 } 125 126 #define C(x) PERF_COUNT_HW_CACHE_##x 127 128 #define CACHE_OP_UNSUPPORTED 0xfffe 129 #define CACHE_OP_NONSENSE 0xffff 130 131 typedef struct perf_event_map cache_map_t 132 [PERF_COUNT_HW_CACHE_MAX] 133 [PERF_COUNT_HW_CACHE_OP_MAX] 134 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 135 136 struct sparc_pmu { 137 const struct perf_event_map *(*event_map)(int); 138 const cache_map_t *cache_map; 139 int max_events; 140 int upper_shift; 141 int lower_shift; 142 int event_mask; 143 int hv_bit; 144 int irq_bit; 145 int upper_nop; 146 int lower_nop; 147 }; 148 149 static const struct perf_event_map ultra3_perfmon_event_map[] = { 150 [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, 151 [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, 152 [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER }, 153 [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER }, 154 }; 155 156 static const struct perf_event_map *ultra3_event_map(int event_id) 157 { 158 return &ultra3_perfmon_event_map[event_id]; 159 } 160 161 static const cache_map_t ultra3_cache_map = { 162 [C(L1D)] = { 163 [C(OP_READ)] = { 164 [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, 165 [C(RESULT_MISS)] = { 0x09, PIC_UPPER, }, 166 }, 167 [C(OP_WRITE)] = { 168 [C(RESULT_ACCESS)] = { 0x0a, PIC_LOWER }, 169 [C(RESULT_MISS)] = { 0x0a, PIC_UPPER }, 170 }, 171 [C(OP_PREFETCH)] = { 172 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 173 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 174 }, 175 }, 176 [C(L1I)] = { 177 [C(OP_READ)] = { 178 [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, 179 [C(RESULT_MISS)] = { 0x09, PIC_UPPER, }, 180 }, 181 [ C(OP_WRITE) ] = { 182 [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, 183 [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, 184 }, 185 [ C(OP_PREFETCH) ] = { 186 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 187 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 188 }, 189 }, 190 [C(LL)] = { 191 [C(OP_READ)] = { 192 [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER, }, 193 [C(RESULT_MISS)] = { 0x0c, PIC_UPPER, }, 194 }, 195 [C(OP_WRITE)] = { 196 [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER }, 197 [C(RESULT_MISS)] = { 0x0c, PIC_UPPER }, 198 }, 199 [C(OP_PREFETCH)] = { 200 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 201 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 202 }, 203 }, 204 [C(DTLB)] = { 205 [C(OP_READ)] = { 206 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 207 [C(RESULT_MISS)] = { 0x12, PIC_UPPER, }, 208 }, 209 [ C(OP_WRITE) ] = { 210 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 211 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 212 }, 213 [ C(OP_PREFETCH) ] = { 214 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 215 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 216 }, 217 }, 218 [C(ITLB)] = { 219 [C(OP_READ)] = { 220 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 221 [C(RESULT_MISS)] = { 0x11, PIC_UPPER, }, 222 }, 223 [ C(OP_WRITE) ] = { 224 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 225 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 226 }, 227 [ C(OP_PREFETCH) ] = { 228 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 229 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 230 }, 231 }, 232 [C(BPU)] = { 233 [C(OP_READ)] = { 234 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 235 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 236 }, 237 [ C(OP_WRITE) ] = { 238 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 239 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 240 }, 241 [ C(OP_PREFETCH) ] = { 242 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 243 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 244 }, 245 }, 246 }; 247 248 static const struct sparc_pmu ultra3_pmu = { 249 .event_map = ultra3_event_map, 250 .cache_map = &ultra3_cache_map, 251 .max_events = ARRAY_SIZE(ultra3_perfmon_event_map), 252 .upper_shift = 11, 253 .lower_shift = 4, 254 .event_mask = 0x3f, 255 .upper_nop = 0x1c, 256 .lower_nop = 0x14, 257 }; 258 259 /* Niagara1 is very limited. The upper PIC is hard-locked to count 260 * only instructions, so it is free running which creates all kinds of 261 * problems. Some hardware designs make one wonder if the creator 262 * even looked at how this stuff gets used by software. 263 */ 264 static const struct perf_event_map niagara1_perfmon_event_map[] = { 265 [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, PIC_UPPER }, 266 [PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, PIC_UPPER }, 267 [PERF_COUNT_HW_CACHE_REFERENCES] = { 0, PIC_NONE }, 268 [PERF_COUNT_HW_CACHE_MISSES] = { 0x03, PIC_LOWER }, 269 }; 270 271 static const struct perf_event_map *niagara1_event_map(int event_id) 272 { 273 return &niagara1_perfmon_event_map[event_id]; 274 } 275 276 static const cache_map_t niagara1_cache_map = { 277 [C(L1D)] = { 278 [C(OP_READ)] = { 279 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 280 [C(RESULT_MISS)] = { 0x03, PIC_LOWER, }, 281 }, 282 [C(OP_WRITE)] = { 283 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 284 [C(RESULT_MISS)] = { 0x03, PIC_LOWER, }, 285 }, 286 [C(OP_PREFETCH)] = { 287 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 288 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 289 }, 290 }, 291 [C(L1I)] = { 292 [C(OP_READ)] = { 293 [C(RESULT_ACCESS)] = { 0x00, PIC_UPPER }, 294 [C(RESULT_MISS)] = { 0x02, PIC_LOWER, }, 295 }, 296 [ C(OP_WRITE) ] = { 297 [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, 298 [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, 299 }, 300 [ C(OP_PREFETCH) ] = { 301 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 302 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 303 }, 304 }, 305 [C(LL)] = { 306 [C(OP_READ)] = { 307 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 308 [C(RESULT_MISS)] = { 0x07, PIC_LOWER, }, 309 }, 310 [C(OP_WRITE)] = { 311 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 312 [C(RESULT_MISS)] = { 0x07, PIC_LOWER, }, 313 }, 314 [C(OP_PREFETCH)] = { 315 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 316 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 317 }, 318 }, 319 [C(DTLB)] = { 320 [C(OP_READ)] = { 321 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 322 [C(RESULT_MISS)] = { 0x05, PIC_LOWER, }, 323 }, 324 [ C(OP_WRITE) ] = { 325 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 326 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 327 }, 328 [ C(OP_PREFETCH) ] = { 329 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 330 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 331 }, 332 }, 333 [C(ITLB)] = { 334 [C(OP_READ)] = { 335 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 336 [C(RESULT_MISS)] = { 0x04, PIC_LOWER, }, 337 }, 338 [ C(OP_WRITE) ] = { 339 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 340 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 341 }, 342 [ C(OP_PREFETCH) ] = { 343 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 344 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 345 }, 346 }, 347 [C(BPU)] = { 348 [C(OP_READ)] = { 349 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 350 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 351 }, 352 [ C(OP_WRITE) ] = { 353 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 354 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 355 }, 356 [ C(OP_PREFETCH) ] = { 357 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 358 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 359 }, 360 }, 361 }; 362 363 static const struct sparc_pmu niagara1_pmu = { 364 .event_map = niagara1_event_map, 365 .cache_map = &niagara1_cache_map, 366 .max_events = ARRAY_SIZE(niagara1_perfmon_event_map), 367 .upper_shift = 0, 368 .lower_shift = 4, 369 .event_mask = 0x7, 370 .upper_nop = 0x0, 371 .lower_nop = 0x0, 372 }; 373 374 static const struct perf_event_map niagara2_perfmon_event_map[] = { 375 [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER }, 376 [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER }, 377 [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER }, 378 [PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER }, 379 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER }, 380 [PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER }, 381 }; 382 383 static const struct perf_event_map *niagara2_event_map(int event_id) 384 { 385 return &niagara2_perfmon_event_map[event_id]; 386 } 387 388 static const cache_map_t niagara2_cache_map = { 389 [C(L1D)] = { 390 [C(OP_READ)] = { 391 [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, }, 392 [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, }, 393 }, 394 [C(OP_WRITE)] = { 395 [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, }, 396 [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, }, 397 }, 398 [C(OP_PREFETCH)] = { 399 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 400 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 401 }, 402 }, 403 [C(L1I)] = { 404 [C(OP_READ)] = { 405 [C(RESULT_ACCESS)] = { 0x02ff, PIC_UPPER | PIC_LOWER, }, 406 [C(RESULT_MISS)] = { 0x0301, PIC_UPPER | PIC_LOWER, }, 407 }, 408 [ C(OP_WRITE) ] = { 409 [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, 410 [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, 411 }, 412 [ C(OP_PREFETCH) ] = { 413 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 414 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 415 }, 416 }, 417 [C(LL)] = { 418 [C(OP_READ)] = { 419 [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, }, 420 [C(RESULT_MISS)] = { 0x0330, PIC_UPPER | PIC_LOWER, }, 421 }, 422 [C(OP_WRITE)] = { 423 [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, }, 424 [C(RESULT_MISS)] = { 0x0320, PIC_UPPER | PIC_LOWER, }, 425 }, 426 [C(OP_PREFETCH)] = { 427 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 428 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 429 }, 430 }, 431 [C(DTLB)] = { 432 [C(OP_READ)] = { 433 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 434 [C(RESULT_MISS)] = { 0x0b08, PIC_UPPER | PIC_LOWER, }, 435 }, 436 [ C(OP_WRITE) ] = { 437 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 438 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 439 }, 440 [ C(OP_PREFETCH) ] = { 441 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 442 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 443 }, 444 }, 445 [C(ITLB)] = { 446 [C(OP_READ)] = { 447 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 448 [C(RESULT_MISS)] = { 0xb04, PIC_UPPER | PIC_LOWER, }, 449 }, 450 [ C(OP_WRITE) ] = { 451 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 452 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 453 }, 454 [ C(OP_PREFETCH) ] = { 455 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 456 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 457 }, 458 }, 459 [C(BPU)] = { 460 [C(OP_READ)] = { 461 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 462 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 463 }, 464 [ C(OP_WRITE) ] = { 465 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 466 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 467 }, 468 [ C(OP_PREFETCH) ] = { 469 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 470 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 471 }, 472 }, 473 }; 474 475 static const struct sparc_pmu niagara2_pmu = { 476 .event_map = niagara2_event_map, 477 .cache_map = &niagara2_cache_map, 478 .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), 479 .upper_shift = 19, 480 .lower_shift = 6, 481 .event_mask = 0xfff, 482 .hv_bit = 0x8, 483 .irq_bit = 0x30, 484 .upper_nop = 0x220, 485 .lower_nop = 0x220, 486 }; 487 488 static const struct sparc_pmu *sparc_pmu __read_mostly; 489 490 static u64 event_encoding(u64 event_id, int idx) 491 { 492 if (idx == PIC_UPPER_INDEX) 493 event_id <<= sparc_pmu->upper_shift; 494 else 495 event_id <<= sparc_pmu->lower_shift; 496 return event_id; 497 } 498 499 static u64 mask_for_index(int idx) 500 { 501 return event_encoding(sparc_pmu->event_mask, idx); 502 } 503 504 static u64 nop_for_index(int idx) 505 { 506 return event_encoding(idx == PIC_UPPER_INDEX ? 507 sparc_pmu->upper_nop : 508 sparc_pmu->lower_nop, idx); 509 } 510 511 static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) 512 { 513 u64 val, mask = mask_for_index(idx); 514 515 val = cpuc->pcr; 516 val &= ~mask; 517 val |= hwc->config; 518 cpuc->pcr = val; 519 520 pcr_ops->write(cpuc->pcr); 521 } 522 523 static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) 524 { 525 u64 mask = mask_for_index(idx); 526 u64 nop = nop_for_index(idx); 527 u64 val; 528 529 val = cpuc->pcr; 530 val &= ~mask; 531 val |= nop; 532 cpuc->pcr = val; 533 534 pcr_ops->write(cpuc->pcr); 535 } 536 537 static u32 read_pmc(int idx) 538 { 539 u64 val; 540 541 read_pic(val); 542 if (idx == PIC_UPPER_INDEX) 543 val >>= 32; 544 545 return val & 0xffffffff; 546 } 547 548 static void write_pmc(int idx, u64 val) 549 { 550 u64 shift, mask, pic; 551 552 shift = 0; 553 if (idx == PIC_UPPER_INDEX) 554 shift = 32; 555 556 mask = ((u64) 0xffffffff) << shift; 557 val <<= shift; 558 559 read_pic(pic); 560 pic &= ~mask; 561 pic |= val; 562 write_pic(pic); 563 } 564 565 static u64 sparc_perf_event_update(struct perf_event *event, 566 struct hw_perf_event *hwc, int idx) 567 { 568 int shift = 64 - 32; 569 u64 prev_raw_count, new_raw_count; 570 s64 delta; 571 572 again: 573 prev_raw_count = atomic64_read(&hwc->prev_count); 574 new_raw_count = read_pmc(idx); 575 576 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, 577 new_raw_count) != prev_raw_count) 578 goto again; 579 580 delta = (new_raw_count << shift) - (prev_raw_count << shift); 581 delta >>= shift; 582 583 atomic64_add(delta, &event->count); 584 atomic64_sub(delta, &hwc->period_left); 585 586 return new_raw_count; 587 } 588 589 static int sparc_perf_event_set_period(struct perf_event *event, 590 struct hw_perf_event *hwc, int idx) 591 { 592 s64 left = atomic64_read(&hwc->period_left); 593 s64 period = hwc->sample_period; 594 int ret = 0; 595 596 if (unlikely(left <= -period)) { 597 left = period; 598 atomic64_set(&hwc->period_left, left); 599 hwc->last_period = period; 600 ret = 1; 601 } 602 603 if (unlikely(left <= 0)) { 604 left += period; 605 atomic64_set(&hwc->period_left, left); 606 hwc->last_period = period; 607 ret = 1; 608 } 609 if (left > MAX_PERIOD) 610 left = MAX_PERIOD; 611 612 atomic64_set(&hwc->prev_count, (u64)-left); 613 614 write_pmc(idx, (u64)(-left) & 0xffffffff); 615 616 perf_event_update_userpage(event); 617 618 return ret; 619 } 620 621 /* If performance event entries have been added, move existing 622 * events around (if necessary) and then assign new entries to 623 * counters. 624 */ 625 static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) 626 { 627 int i; 628 629 if (!cpuc->n_added) 630 goto out; 631 632 /* Read in the counters which are moving. */ 633 for (i = 0; i < cpuc->n_events; i++) { 634 struct perf_event *cp = cpuc->event[i]; 635 636 if (cpuc->current_idx[i] != PIC_NO_INDEX && 637 cpuc->current_idx[i] != cp->hw.idx) { 638 sparc_perf_event_update(cp, &cp->hw, 639 cpuc->current_idx[i]); 640 cpuc->current_idx[i] = PIC_NO_INDEX; 641 } 642 } 643 644 /* Assign to counters all unassigned events. */ 645 for (i = 0; i < cpuc->n_events; i++) { 646 struct perf_event *cp = cpuc->event[i]; 647 struct hw_perf_event *hwc = &cp->hw; 648 int idx = hwc->idx; 649 u64 enc; 650 651 if (cpuc->current_idx[i] != PIC_NO_INDEX) 652 continue; 653 654 sparc_perf_event_set_period(cp, hwc, idx); 655 cpuc->current_idx[i] = idx; 656 657 enc = perf_event_get_enc(cpuc->events[i]); 658 pcr |= event_encoding(enc, idx); 659 } 660 out: 661 return pcr; 662 } 663 664 void hw_perf_enable(void) 665 { 666 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 667 u64 pcr; 668 669 if (cpuc->enabled) 670 return; 671 672 cpuc->enabled = 1; 673 barrier(); 674 675 pcr = cpuc->pcr; 676 if (!cpuc->n_events) { 677 pcr = 0; 678 } else { 679 pcr = maybe_change_configuration(cpuc, pcr); 680 681 /* We require that all of the events have the same 682 * configuration, so just fetch the settings from the 683 * first entry. 684 */ 685 cpuc->pcr = pcr | cpuc->event[0]->hw.config_base; 686 } 687 688 pcr_ops->write(cpuc->pcr); 689 } 690 691 void hw_perf_disable(void) 692 { 693 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 694 u64 val; 695 696 if (!cpuc->enabled) 697 return; 698 699 cpuc->enabled = 0; 700 cpuc->n_added = 0; 701 702 val = cpuc->pcr; 703 val &= ~(PCR_UTRACE | PCR_STRACE | 704 sparc_pmu->hv_bit | sparc_pmu->irq_bit); 705 cpuc->pcr = val; 706 707 pcr_ops->write(cpuc->pcr); 708 } 709 710 static void sparc_pmu_disable(struct perf_event *event) 711 { 712 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 713 struct hw_perf_event *hwc = &event->hw; 714 unsigned long flags; 715 int i; 716 717 local_irq_save(flags); 718 perf_disable(); 719 720 for (i = 0; i < cpuc->n_events; i++) { 721 if (event == cpuc->event[i]) { 722 int idx = cpuc->current_idx[i]; 723 724 /* Shift remaining entries down into 725 * the existing slot. 726 */ 727 while (++i < cpuc->n_events) { 728 cpuc->event[i - 1] = cpuc->event[i]; 729 cpuc->events[i - 1] = cpuc->events[i]; 730 cpuc->current_idx[i - 1] = 731 cpuc->current_idx[i]; 732 } 733 734 /* Absorb the final count and turn off the 735 * event. 736 */ 737 sparc_pmu_disable_event(cpuc, hwc, idx); 738 barrier(); 739 sparc_perf_event_update(event, hwc, idx); 740 741 perf_event_update_userpage(event); 742 743 cpuc->n_events--; 744 break; 745 } 746 } 747 748 perf_enable(); 749 local_irq_restore(flags); 750 } 751 752 static int active_event_index(struct cpu_hw_events *cpuc, 753 struct perf_event *event) 754 { 755 int i; 756 757 for (i = 0; i < cpuc->n_events; i++) { 758 if (cpuc->event[i] == event) 759 break; 760 } 761 BUG_ON(i == cpuc->n_events); 762 return cpuc->current_idx[i]; 763 } 764 765 static void sparc_pmu_read(struct perf_event *event) 766 { 767 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 768 int idx = active_event_index(cpuc, event); 769 struct hw_perf_event *hwc = &event->hw; 770 771 sparc_perf_event_update(event, hwc, idx); 772 } 773 774 static void sparc_pmu_unthrottle(struct perf_event *event) 775 { 776 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 777 int idx = active_event_index(cpuc, event); 778 struct hw_perf_event *hwc = &event->hw; 779 780 sparc_pmu_enable_event(cpuc, hwc, idx); 781 } 782 783 static atomic_t active_events = ATOMIC_INIT(0); 784 static DEFINE_MUTEX(pmc_grab_mutex); 785 786 static void perf_stop_nmi_watchdog(void *unused) 787 { 788 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 789 790 stop_nmi_watchdog(NULL); 791 cpuc->pcr = pcr_ops->read(); 792 } 793 794 void perf_event_grab_pmc(void) 795 { 796 if (atomic_inc_not_zero(&active_events)) 797 return; 798 799 mutex_lock(&pmc_grab_mutex); 800 if (atomic_read(&active_events) == 0) { 801 if (atomic_read(&nmi_active) > 0) { 802 on_each_cpu(perf_stop_nmi_watchdog, NULL, 1); 803 BUG_ON(atomic_read(&nmi_active) != 0); 804 } 805 atomic_inc(&active_events); 806 } 807 mutex_unlock(&pmc_grab_mutex); 808 } 809 810 void perf_event_release_pmc(void) 811 { 812 if (atomic_dec_and_mutex_lock(&active_events, &pmc_grab_mutex)) { 813 if (atomic_read(&nmi_active) == 0) 814 on_each_cpu(start_nmi_watchdog, NULL, 1); 815 mutex_unlock(&pmc_grab_mutex); 816 } 817 } 818 819 static const struct perf_event_map *sparc_map_cache_event(u64 config) 820 { 821 unsigned int cache_type, cache_op, cache_result; 822 const struct perf_event_map *pmap; 823 824 if (!sparc_pmu->cache_map) 825 return ERR_PTR(-ENOENT); 826 827 cache_type = (config >> 0) & 0xff; 828 if (cache_type >= PERF_COUNT_HW_CACHE_MAX) 829 return ERR_PTR(-EINVAL); 830 831 cache_op = (config >> 8) & 0xff; 832 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) 833 return ERR_PTR(-EINVAL); 834 835 cache_result = (config >> 16) & 0xff; 836 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 837 return ERR_PTR(-EINVAL); 838 839 pmap = &((*sparc_pmu->cache_map)[cache_type][cache_op][cache_result]); 840 841 if (pmap->encoding == CACHE_OP_UNSUPPORTED) 842 return ERR_PTR(-ENOENT); 843 844 if (pmap->encoding == CACHE_OP_NONSENSE) 845 return ERR_PTR(-EINVAL); 846 847 return pmap; 848 } 849 850 static void hw_perf_event_destroy(struct perf_event *event) 851 { 852 perf_event_release_pmc(); 853 } 854 855 /* Make sure all events can be scheduled into the hardware at 856 * the same time. This is simplified by the fact that we only 857 * need to support 2 simultaneous HW events. 858 * 859 * As a side effect, the evts[]->hw.idx values will be assigned 860 * on success. These are pending indexes. When the events are 861 * actually programmed into the chip, these values will propagate 862 * to the per-cpu cpuc->current_idx[] slots, see the code in 863 * maybe_change_configuration() for details. 864 */ 865 static int sparc_check_constraints(struct perf_event **evts, 866 unsigned long *events, int n_ev) 867 { 868 u8 msk0 = 0, msk1 = 0; 869 int idx0 = 0; 870 871 /* This case is possible when we are invoked from 872 * hw_perf_group_sched_in(). 873 */ 874 if (!n_ev) 875 return 0; 876 877 if (n_ev > perf_max_events) 878 return -1; 879 880 msk0 = perf_event_get_msk(events[0]); 881 if (n_ev == 1) { 882 if (msk0 & PIC_LOWER) 883 idx0 = 1; 884 goto success; 885 } 886 BUG_ON(n_ev != 2); 887 msk1 = perf_event_get_msk(events[1]); 888 889 /* If both events can go on any counter, OK. */ 890 if (msk0 == (PIC_UPPER | PIC_LOWER) && 891 msk1 == (PIC_UPPER | PIC_LOWER)) 892 goto success; 893 894 /* If one event is limited to a specific counter, 895 * and the other can go on both, OK. 896 */ 897 if ((msk0 == PIC_UPPER || msk0 == PIC_LOWER) && 898 msk1 == (PIC_UPPER | PIC_LOWER)) { 899 if (msk0 & PIC_LOWER) 900 idx0 = 1; 901 goto success; 902 } 903 904 if ((msk1 == PIC_UPPER || msk1 == PIC_LOWER) && 905 msk0 == (PIC_UPPER | PIC_LOWER)) { 906 if (msk1 & PIC_UPPER) 907 idx0 = 1; 908 goto success; 909 } 910 911 /* If the events are fixed to different counters, OK. */ 912 if ((msk0 == PIC_UPPER && msk1 == PIC_LOWER) || 913 (msk0 == PIC_LOWER && msk1 == PIC_UPPER)) { 914 if (msk0 & PIC_LOWER) 915 idx0 = 1; 916 goto success; 917 } 918 919 /* Otherwise, there is a conflict. */ 920 return -1; 921 922 success: 923 evts[0]->hw.idx = idx0; 924 if (n_ev == 2) 925 evts[1]->hw.idx = idx0 ^ 1; 926 return 0; 927 } 928 929 static int check_excludes(struct perf_event **evts, int n_prev, int n_new) 930 { 931 int eu = 0, ek = 0, eh = 0; 932 struct perf_event *event; 933 int i, n, first; 934 935 n = n_prev + n_new; 936 if (n <= 1) 937 return 0; 938 939 first = 1; 940 for (i = 0; i < n; i++) { 941 event = evts[i]; 942 if (first) { 943 eu = event->attr.exclude_user; 944 ek = event->attr.exclude_kernel; 945 eh = event->attr.exclude_hv; 946 first = 0; 947 } else if (event->attr.exclude_user != eu || 948 event->attr.exclude_kernel != ek || 949 event->attr.exclude_hv != eh) { 950 return -EAGAIN; 951 } 952 } 953 954 return 0; 955 } 956 957 static int collect_events(struct perf_event *group, int max_count, 958 struct perf_event *evts[], unsigned long *events, 959 int *current_idx) 960 { 961 struct perf_event *event; 962 int n = 0; 963 964 if (!is_software_event(group)) { 965 if (n >= max_count) 966 return -1; 967 evts[n] = group; 968 events[n] = group->hw.event_base; 969 current_idx[n++] = PIC_NO_INDEX; 970 } 971 list_for_each_entry(event, &group->sibling_list, group_entry) { 972 if (!is_software_event(event) && 973 event->state != PERF_EVENT_STATE_OFF) { 974 if (n >= max_count) 975 return -1; 976 evts[n] = event; 977 events[n] = event->hw.event_base; 978 current_idx[n++] = PIC_NO_INDEX; 979 } 980 } 981 return n; 982 } 983 984 static void event_sched_in(struct perf_event *event) 985 { 986 event->state = PERF_EVENT_STATE_ACTIVE; 987 event->oncpu = smp_processor_id(); 988 event->tstamp_running += event->ctx->time - event->tstamp_stopped; 989 if (is_software_event(event)) 990 event->pmu->enable(event); 991 } 992 993 int hw_perf_group_sched_in(struct perf_event *group_leader, 994 struct perf_cpu_context *cpuctx, 995 struct perf_event_context *ctx) 996 { 997 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 998 struct perf_event *sub; 999 int n0, n; 1000 1001 if (!sparc_pmu) 1002 return 0; 1003 1004 n0 = cpuc->n_events; 1005 n = collect_events(group_leader, perf_max_events - n0, 1006 &cpuc->event[n0], &cpuc->events[n0], 1007 &cpuc->current_idx[n0]); 1008 if (n < 0) 1009 return -EAGAIN; 1010 if (check_excludes(cpuc->event, n0, n)) 1011 return -EINVAL; 1012 if (sparc_check_constraints(cpuc->event, cpuc->events, n + n0)) 1013 return -EAGAIN; 1014 cpuc->n_events = n0 + n; 1015 cpuc->n_added += n; 1016 1017 cpuctx->active_oncpu += n; 1018 n = 1; 1019 event_sched_in(group_leader); 1020 list_for_each_entry(sub, &group_leader->sibling_list, group_entry) { 1021 if (sub->state != PERF_EVENT_STATE_OFF) { 1022 event_sched_in(sub); 1023 n++; 1024 } 1025 } 1026 ctx->nr_active += n; 1027 1028 return 1; 1029 } 1030 1031 static int sparc_pmu_enable(struct perf_event *event) 1032 { 1033 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1034 int n0, ret = -EAGAIN; 1035 unsigned long flags; 1036 1037 local_irq_save(flags); 1038 perf_disable(); 1039 1040 n0 = cpuc->n_events; 1041 if (n0 >= perf_max_events) 1042 goto out; 1043 1044 cpuc->event[n0] = event; 1045 cpuc->events[n0] = event->hw.event_base; 1046 cpuc->current_idx[n0] = PIC_NO_INDEX; 1047 1048 if (check_excludes(cpuc->event, n0, 1)) 1049 goto out; 1050 if (sparc_check_constraints(cpuc->event, cpuc->events, n0 + 1)) 1051 goto out; 1052 1053 cpuc->n_events++; 1054 cpuc->n_added++; 1055 1056 ret = 0; 1057 out: 1058 perf_enable(); 1059 local_irq_restore(flags); 1060 return ret; 1061 } 1062 1063 static int __hw_perf_event_init(struct perf_event *event) 1064 { 1065 struct perf_event_attr *attr = &event->attr; 1066 struct perf_event *evts[MAX_HWEVENTS]; 1067 struct hw_perf_event *hwc = &event->hw; 1068 unsigned long events[MAX_HWEVENTS]; 1069 int current_idx_dmy[MAX_HWEVENTS]; 1070 const struct perf_event_map *pmap; 1071 int n; 1072 1073 if (atomic_read(&nmi_active) < 0) 1074 return -ENODEV; 1075 1076 if (attr->type == PERF_TYPE_HARDWARE) { 1077 if (attr->config >= sparc_pmu->max_events) 1078 return -EINVAL; 1079 pmap = sparc_pmu->event_map(attr->config); 1080 } else if (attr->type == PERF_TYPE_HW_CACHE) { 1081 pmap = sparc_map_cache_event(attr->config); 1082 if (IS_ERR(pmap)) 1083 return PTR_ERR(pmap); 1084 } else 1085 return -EOPNOTSUPP; 1086 1087 /* We save the enable bits in the config_base. */ 1088 hwc->config_base = sparc_pmu->irq_bit; 1089 if (!attr->exclude_user) 1090 hwc->config_base |= PCR_UTRACE; 1091 if (!attr->exclude_kernel) 1092 hwc->config_base |= PCR_STRACE; 1093 if (!attr->exclude_hv) 1094 hwc->config_base |= sparc_pmu->hv_bit; 1095 1096 hwc->event_base = perf_event_encode(pmap); 1097 1098 n = 0; 1099 if (event->group_leader != event) { 1100 n = collect_events(event->group_leader, 1101 perf_max_events - 1, 1102 evts, events, current_idx_dmy); 1103 if (n < 0) 1104 return -EINVAL; 1105 } 1106 events[n] = hwc->event_base; 1107 evts[n] = event; 1108 1109 if (check_excludes(evts, n, 1)) 1110 return -EINVAL; 1111 1112 if (sparc_check_constraints(evts, events, n + 1)) 1113 return -EINVAL; 1114 1115 hwc->idx = PIC_NO_INDEX; 1116 1117 /* Try to do all error checking before this point, as unwinding 1118 * state after grabbing the PMC is difficult. 1119 */ 1120 perf_event_grab_pmc(); 1121 event->destroy = hw_perf_event_destroy; 1122 1123 if (!hwc->sample_period) { 1124 hwc->sample_period = MAX_PERIOD; 1125 hwc->last_period = hwc->sample_period; 1126 atomic64_set(&hwc->period_left, hwc->sample_period); 1127 } 1128 1129 return 0; 1130 } 1131 1132 static const struct pmu pmu = { 1133 .enable = sparc_pmu_enable, 1134 .disable = sparc_pmu_disable, 1135 .read = sparc_pmu_read, 1136 .unthrottle = sparc_pmu_unthrottle, 1137 }; 1138 1139 const struct pmu *hw_perf_event_init(struct perf_event *event) 1140 { 1141 int err = __hw_perf_event_init(event); 1142 1143 if (err) 1144 return ERR_PTR(err); 1145 return &pmu; 1146 } 1147 1148 void perf_event_print_debug(void) 1149 { 1150 unsigned long flags; 1151 u64 pcr, pic; 1152 int cpu; 1153 1154 if (!sparc_pmu) 1155 return; 1156 1157 local_irq_save(flags); 1158 1159 cpu = smp_processor_id(); 1160 1161 pcr = pcr_ops->read(); 1162 read_pic(pic); 1163 1164 pr_info("\n"); 1165 pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n", 1166 cpu, pcr, pic); 1167 1168 local_irq_restore(flags); 1169 } 1170 1171 static int __kprobes perf_event_nmi_handler(struct notifier_block *self, 1172 unsigned long cmd, void *__args) 1173 { 1174 struct die_args *args = __args; 1175 struct perf_sample_data data; 1176 struct cpu_hw_events *cpuc; 1177 struct pt_regs *regs; 1178 int i; 1179 1180 if (!atomic_read(&active_events)) 1181 return NOTIFY_DONE; 1182 1183 switch (cmd) { 1184 case DIE_NMI: 1185 break; 1186 1187 default: 1188 return NOTIFY_DONE; 1189 } 1190 1191 regs = args->regs; 1192 1193 perf_sample_data_init(&data, 0); 1194 1195 cpuc = &__get_cpu_var(cpu_hw_events); 1196 1197 /* If the PMU has the TOE IRQ enable bits, we need to do a 1198 * dummy write to the %pcr to clear the overflow bits and thus 1199 * the interrupt. 1200 * 1201 * Do this before we peek at the counters to determine 1202 * overflow so we don't lose any events. 1203 */ 1204 if (sparc_pmu->irq_bit) 1205 pcr_ops->write(cpuc->pcr); 1206 1207 for (i = 0; i < cpuc->n_events; i++) { 1208 struct perf_event *event = cpuc->event[i]; 1209 int idx = cpuc->current_idx[i]; 1210 struct hw_perf_event *hwc; 1211 u64 val; 1212 1213 hwc = &event->hw; 1214 val = sparc_perf_event_update(event, hwc, idx); 1215 if (val & (1ULL << 31)) 1216 continue; 1217 1218 data.period = event->hw.last_period; 1219 if (!sparc_perf_event_set_period(event, hwc, idx)) 1220 continue; 1221 1222 if (perf_event_overflow(event, 1, &data, regs)) 1223 sparc_pmu_disable_event(cpuc, hwc, idx); 1224 } 1225 1226 return NOTIFY_STOP; 1227 } 1228 1229 static __read_mostly struct notifier_block perf_event_nmi_notifier = { 1230 .notifier_call = perf_event_nmi_handler, 1231 }; 1232 1233 static bool __init supported_pmu(void) 1234 { 1235 if (!strcmp(sparc_pmu_type, "ultra3") || 1236 !strcmp(sparc_pmu_type, "ultra3+") || 1237 !strcmp(sparc_pmu_type, "ultra3i") || 1238 !strcmp(sparc_pmu_type, "ultra4+")) { 1239 sparc_pmu = &ultra3_pmu; 1240 return true; 1241 } 1242 if (!strcmp(sparc_pmu_type, "niagara")) { 1243 sparc_pmu = &niagara1_pmu; 1244 return true; 1245 } 1246 if (!strcmp(sparc_pmu_type, "niagara2")) { 1247 sparc_pmu = &niagara2_pmu; 1248 return true; 1249 } 1250 return false; 1251 } 1252 1253 void __init init_hw_perf_events(void) 1254 { 1255 pr_info("Performance events: "); 1256 1257 if (!supported_pmu()) { 1258 pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); 1259 return; 1260 } 1261 1262 pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); 1263 1264 /* All sparc64 PMUs currently have 2 events. */ 1265 perf_max_events = 2; 1266 1267 register_die_notifier(&perf_event_nmi_notifier); 1268 } 1269 1270 static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) 1271 { 1272 if (entry->nr < PERF_MAX_STACK_DEPTH) 1273 entry->ip[entry->nr++] = ip; 1274 } 1275 1276 static void perf_callchain_kernel(struct pt_regs *regs, 1277 struct perf_callchain_entry *entry) 1278 { 1279 unsigned long ksp, fp; 1280 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 1281 int graph = 0; 1282 #endif 1283 1284 callchain_store(entry, PERF_CONTEXT_KERNEL); 1285 callchain_store(entry, regs->tpc); 1286 1287 ksp = regs->u_regs[UREG_I6]; 1288 fp = ksp + STACK_BIAS; 1289 do { 1290 struct sparc_stackf *sf; 1291 struct pt_regs *regs; 1292 unsigned long pc; 1293 1294 if (!kstack_valid(current_thread_info(), fp)) 1295 break; 1296 1297 sf = (struct sparc_stackf *) fp; 1298 regs = (struct pt_regs *) (sf + 1); 1299 1300 if (kstack_is_trap_frame(current_thread_info(), regs)) { 1301 if (user_mode(regs)) 1302 break; 1303 pc = regs->tpc; 1304 fp = regs->u_regs[UREG_I6] + STACK_BIAS; 1305 } else { 1306 pc = sf->callers_pc; 1307 fp = (unsigned long)sf->fp + STACK_BIAS; 1308 } 1309 callchain_store(entry, pc); 1310 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 1311 if ((pc + 8UL) == (unsigned long) &return_to_handler) { 1312 int index = current->curr_ret_stack; 1313 if (current->ret_stack && index >= graph) { 1314 pc = current->ret_stack[index - graph].ret; 1315 callchain_store(entry, pc); 1316 graph++; 1317 } 1318 } 1319 #endif 1320 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1321 } 1322 1323 static void perf_callchain_user_64(struct pt_regs *regs, 1324 struct perf_callchain_entry *entry) 1325 { 1326 unsigned long ufp; 1327 1328 callchain_store(entry, PERF_CONTEXT_USER); 1329 callchain_store(entry, regs->tpc); 1330 1331 ufp = regs->u_regs[UREG_I6] + STACK_BIAS; 1332 do { 1333 struct sparc_stackf *usf, sf; 1334 unsigned long pc; 1335 1336 usf = (struct sparc_stackf *) ufp; 1337 if (__copy_from_user_inatomic(&sf, usf, sizeof(sf))) 1338 break; 1339 1340 pc = sf.callers_pc; 1341 ufp = (unsigned long)sf.fp + STACK_BIAS; 1342 callchain_store(entry, pc); 1343 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1344 } 1345 1346 static void perf_callchain_user_32(struct pt_regs *regs, 1347 struct perf_callchain_entry *entry) 1348 { 1349 unsigned long ufp; 1350 1351 callchain_store(entry, PERF_CONTEXT_USER); 1352 callchain_store(entry, regs->tpc); 1353 1354 ufp = regs->u_regs[UREG_I6] & 0xffffffffUL; 1355 do { 1356 struct sparc_stackf32 *usf, sf; 1357 unsigned long pc; 1358 1359 usf = (struct sparc_stackf32 *) ufp; 1360 if (__copy_from_user_inatomic(&sf, usf, sizeof(sf))) 1361 break; 1362 1363 pc = sf.callers_pc; 1364 ufp = (unsigned long)sf.fp; 1365 callchain_store(entry, pc); 1366 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1367 } 1368 1369 /* Like powerpc we can't get PMU interrupts within the PMU handler, 1370 * so no need for separate NMI and IRQ chains as on x86. 1371 */ 1372 static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); 1373 1374 struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) 1375 { 1376 struct perf_callchain_entry *entry = &__get_cpu_var(callchain); 1377 1378 entry->nr = 0; 1379 if (!user_mode(regs)) { 1380 stack_trace_flush(); 1381 perf_callchain_kernel(regs, entry); 1382 if (current->mm) 1383 regs = task_pt_regs(current); 1384 else 1385 regs = NULL; 1386 } 1387 if (regs) { 1388 flushw_user(); 1389 if (test_thread_flag(TIF_32BIT)) 1390 perf_callchain_user_32(regs, entry); 1391 else 1392 perf_callchain_user_64(regs, entry); 1393 } 1394 return entry; 1395 } 1396