1 /* 2 * Xtensa Performance Monitor Module driver 3 * See Tensilica Debug User's Guide for PMU registers documentation. 4 * 5 * Copyright (C) 2015 Cadence Design Systems Inc. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 12 #include <linux/interrupt.h> 13 #include <linux/irqdomain.h> 14 #include <linux/module.h> 15 #include <linux/of.h> 16 #include <linux/perf_event.h> 17 #include <linux/platform_device.h> 18 19 #include <asm/processor.h> 20 #include <asm/stacktrace.h> 21 22 /* Global control/status for all perf counters */ 23 #define XTENSA_PMU_PMG 0x1000 24 /* Perf counter values */ 25 #define XTENSA_PMU_PM(i) (0x1080 + (i) * 4) 26 /* Perf counter control registers */ 27 #define XTENSA_PMU_PMCTRL(i) (0x1100 + (i) * 4) 28 /* Perf counter status registers */ 29 #define XTENSA_PMU_PMSTAT(i) (0x1180 + (i) * 4) 30 31 #define XTENSA_PMU_PMG_PMEN 0x1 32 33 #define XTENSA_PMU_COUNTER_MASK 0xffffffffULL 34 #define XTENSA_PMU_COUNTER_MAX 0x7fffffff 35 36 #define XTENSA_PMU_PMCTRL_INTEN 0x00000001 37 #define XTENSA_PMU_PMCTRL_KRNLCNT 0x00000008 38 #define XTENSA_PMU_PMCTRL_TRACELEVEL 0x000000f0 39 #define XTENSA_PMU_PMCTRL_SELECT_SHIFT 8 40 #define XTENSA_PMU_PMCTRL_SELECT 0x00001f00 41 #define XTENSA_PMU_PMCTRL_MASK_SHIFT 16 42 #define XTENSA_PMU_PMCTRL_MASK 0xffff0000 43 44 #define XTENSA_PMU_MASK(select, mask) \ 45 (((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \ 46 ((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \ 47 XTENSA_PMU_PMCTRL_TRACELEVEL | \ 48 XTENSA_PMU_PMCTRL_INTEN) 49 50 #define XTENSA_PMU_PMSTAT_OVFL 0x00000001 51 #define XTENSA_PMU_PMSTAT_INTASRT 0x00000010 52 53 struct xtensa_pmu_events { 54 /* Array of events currently on this core */ 55 struct perf_event *event[XCHAL_NUM_PERF_COUNTERS]; 56 /* Bitmap of used hardware counters */ 57 unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)]; 58 }; 59 static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events); 60 61 static const u32 xtensa_hw_ctl[] = { 62 [PERF_COUNT_HW_CPU_CYCLES] = XTENSA_PMU_MASK(0, 0x1), 63 [PERF_COUNT_HW_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0xffff), 64 [PERF_COUNT_HW_CACHE_REFERENCES] = XTENSA_PMU_MASK(10, 0x1), 65 [PERF_COUNT_HW_CACHE_MISSES] = XTENSA_PMU_MASK(12, 0x1), 66 /* Taken and non-taken branches + taken loop ends */ 67 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0x490), 68 /* Instruction-related + other global stall cycles */ 69 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XTENSA_PMU_MASK(4, 0x1ff), 70 /* Data-related global stall cycles */ 71 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = XTENSA_PMU_MASK(3, 0x1ff), 72 }; 73 74 #define C(_x) PERF_COUNT_HW_CACHE_##_x 75 76 static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = { 77 [C(L1D)] = { 78 [C(OP_READ)] = { 79 [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(10, 0x1), 80 [C(RESULT_MISS)] = XTENSA_PMU_MASK(10, 0x2), 81 }, 82 [C(OP_WRITE)] = { 83 [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(11, 0x1), 84 [C(RESULT_MISS)] = XTENSA_PMU_MASK(11, 0x2), 85 }, 86 }, 87 [C(L1I)] = { 88 [C(OP_READ)] = { 89 [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(8, 0x1), 90 [C(RESULT_MISS)] = XTENSA_PMU_MASK(8, 0x2), 91 }, 92 }, 93 [C(DTLB)] = { 94 [C(OP_READ)] = { 95 [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(9, 0x1), 96 [C(RESULT_MISS)] = XTENSA_PMU_MASK(9, 0x8), 97 }, 98 }, 99 [C(ITLB)] = { 100 [C(OP_READ)] = { 101 [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(7, 0x1), 102 [C(RESULT_MISS)] = XTENSA_PMU_MASK(7, 0x8), 103 }, 104 }, 105 }; 106 107 static int xtensa_pmu_cache_event(u64 config) 108 { 109 unsigned int cache_type, cache_op, cache_result; 110 int ret; 111 112 cache_type = (config >> 0) & 0xff; 113 cache_op = (config >> 8) & 0xff; 114 cache_result = (config >> 16) & 0xff; 115 116 if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) || 117 cache_op >= C(OP_MAX) || 118 cache_result >= C(RESULT_MAX)) 119 return -EINVAL; 120 121 ret = xtensa_cache_ctl[cache_type][cache_op][cache_result]; 122 123 if (ret == 0) 124 return -EINVAL; 125 126 return ret; 127 } 128 129 static inline uint32_t xtensa_pmu_read_counter(int idx) 130 { 131 return get_er(XTENSA_PMU_PM(idx)); 132 } 133 134 static inline void xtensa_pmu_write_counter(int idx, uint32_t v) 135 { 136 set_er(v, XTENSA_PMU_PM(idx)); 137 } 138 139 static void xtensa_perf_event_update(struct perf_event *event, 140 struct hw_perf_event *hwc, int idx) 141 { 142 uint64_t prev_raw_count, new_raw_count; 143 int64_t delta; 144 145 do { 146 prev_raw_count = local64_read(&hwc->prev_count); 147 new_raw_count = xtensa_pmu_read_counter(event->hw.idx); 148 } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 149 new_raw_count) != prev_raw_count); 150 151 delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK; 152 153 local64_add(delta, &event->count); 154 local64_sub(delta, &hwc->period_left); 155 } 156 157 static bool xtensa_perf_event_set_period(struct perf_event *event, 158 struct hw_perf_event *hwc, int idx) 159 { 160 bool rc = false; 161 s64 left; 162 163 if (!is_sampling_event(event)) { 164 left = XTENSA_PMU_COUNTER_MAX; 165 } else { 166 s64 period = hwc->sample_period; 167 168 left = local64_read(&hwc->period_left); 169 if (left <= -period) { 170 left = period; 171 local64_set(&hwc->period_left, left); 172 hwc->last_period = period; 173 rc = true; 174 } else if (left <= 0) { 175 left += period; 176 local64_set(&hwc->period_left, left); 177 hwc->last_period = period; 178 rc = true; 179 } 180 if (left > XTENSA_PMU_COUNTER_MAX) 181 left = XTENSA_PMU_COUNTER_MAX; 182 } 183 184 local64_set(&hwc->prev_count, -left); 185 xtensa_pmu_write_counter(idx, -left); 186 perf_event_update_userpage(event); 187 188 return rc; 189 } 190 191 static void xtensa_pmu_enable(struct pmu *pmu) 192 { 193 set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG); 194 } 195 196 static void xtensa_pmu_disable(struct pmu *pmu) 197 { 198 set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG); 199 } 200 201 static int xtensa_pmu_event_init(struct perf_event *event) 202 { 203 int ret; 204 205 switch (event->attr.type) { 206 case PERF_TYPE_HARDWARE: 207 if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) || 208 xtensa_hw_ctl[event->attr.config] == 0) 209 return -EINVAL; 210 event->hw.config = xtensa_hw_ctl[event->attr.config]; 211 return 0; 212 213 case PERF_TYPE_HW_CACHE: 214 ret = xtensa_pmu_cache_event(event->attr.config); 215 if (ret < 0) 216 return ret; 217 event->hw.config = ret; 218 return 0; 219 220 case PERF_TYPE_RAW: 221 /* Not 'previous counter' select */ 222 if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) == 223 (1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT)) 224 return -EINVAL; 225 event->hw.config = (event->attr.config & 226 (XTENSA_PMU_PMCTRL_KRNLCNT | 227 XTENSA_PMU_PMCTRL_TRACELEVEL | 228 XTENSA_PMU_PMCTRL_SELECT | 229 XTENSA_PMU_PMCTRL_MASK)) | 230 XTENSA_PMU_PMCTRL_INTEN; 231 return 0; 232 233 default: 234 return -ENOENT; 235 } 236 } 237 238 /* 239 * Starts/Stops a counter present on the PMU. The PMI handler 240 * should stop the counter when perf_event_overflow() returns 241 * !0. ->start() will be used to continue. 242 */ 243 static void xtensa_pmu_start(struct perf_event *event, int flags) 244 { 245 struct hw_perf_event *hwc = &event->hw; 246 int idx = hwc->idx; 247 248 if (WARN_ON_ONCE(idx == -1)) 249 return; 250 251 if (flags & PERF_EF_RELOAD) { 252 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); 253 xtensa_perf_event_set_period(event, hwc, idx); 254 } 255 256 hwc->state = 0; 257 258 set_er(hwc->config, XTENSA_PMU_PMCTRL(idx)); 259 } 260 261 static void xtensa_pmu_stop(struct perf_event *event, int flags) 262 { 263 struct hw_perf_event *hwc = &event->hw; 264 int idx = hwc->idx; 265 266 if (!(hwc->state & PERF_HES_STOPPED)) { 267 set_er(0, XTENSA_PMU_PMCTRL(idx)); 268 set_er(get_er(XTENSA_PMU_PMSTAT(idx)), 269 XTENSA_PMU_PMSTAT(idx)); 270 hwc->state |= PERF_HES_STOPPED; 271 } 272 273 if ((flags & PERF_EF_UPDATE) && 274 !(event->hw.state & PERF_HES_UPTODATE)) { 275 xtensa_perf_event_update(event, &event->hw, idx); 276 event->hw.state |= PERF_HES_UPTODATE; 277 } 278 } 279 280 /* 281 * Adds/Removes a counter to/from the PMU, can be done inside 282 * a transaction, see the ->*_txn() methods. 283 */ 284 static int xtensa_pmu_add(struct perf_event *event, int flags) 285 { 286 struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events); 287 struct hw_perf_event *hwc = &event->hw; 288 int idx = hwc->idx; 289 290 if (__test_and_set_bit(idx, ev->used_mask)) { 291 idx = find_first_zero_bit(ev->used_mask, 292 XCHAL_NUM_PERF_COUNTERS); 293 if (idx == XCHAL_NUM_PERF_COUNTERS) 294 return -EAGAIN; 295 296 __set_bit(idx, ev->used_mask); 297 hwc->idx = idx; 298 } 299 ev->event[idx] = event; 300 301 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 302 303 if (flags & PERF_EF_START) 304 xtensa_pmu_start(event, PERF_EF_RELOAD); 305 306 perf_event_update_userpage(event); 307 return 0; 308 } 309 310 static void xtensa_pmu_del(struct perf_event *event, int flags) 311 { 312 struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events); 313 314 xtensa_pmu_stop(event, PERF_EF_UPDATE); 315 __clear_bit(event->hw.idx, ev->used_mask); 316 perf_event_update_userpage(event); 317 } 318 319 static void xtensa_pmu_read(struct perf_event *event) 320 { 321 xtensa_perf_event_update(event, &event->hw, event->hw.idx); 322 } 323 324 static int callchain_trace(struct stackframe *frame, void *data) 325 { 326 struct perf_callchain_entry *entry = data; 327 328 perf_callchain_store(entry, frame->pc); 329 return 0; 330 } 331 332 void perf_callchain_kernel(struct perf_callchain_entry *entry, 333 struct pt_regs *regs) 334 { 335 xtensa_backtrace_kernel(regs, PERF_MAX_STACK_DEPTH, 336 callchain_trace, NULL, entry); 337 } 338 339 void perf_callchain_user(struct perf_callchain_entry *entry, 340 struct pt_regs *regs) 341 { 342 xtensa_backtrace_user(regs, PERF_MAX_STACK_DEPTH, 343 callchain_trace, entry); 344 } 345 346 void perf_event_print_debug(void) 347 { 348 unsigned long flags; 349 unsigned i; 350 351 local_irq_save(flags); 352 pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(), 353 get_er(XTENSA_PMU_PMG)); 354 for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) 355 pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n", 356 i, get_er(XTENSA_PMU_PM(i)), 357 i, get_er(XTENSA_PMU_PMCTRL(i)), 358 i, get_er(XTENSA_PMU_PMSTAT(i))); 359 local_irq_restore(flags); 360 } 361 362 irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id) 363 { 364 irqreturn_t rc = IRQ_NONE; 365 struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events); 366 unsigned i; 367 368 for (i = find_first_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS); 369 i < XCHAL_NUM_PERF_COUNTERS; 370 i = find_next_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS, i + 1)) { 371 uint32_t v = get_er(XTENSA_PMU_PMSTAT(i)); 372 struct perf_event *event = ev->event[i]; 373 struct hw_perf_event *hwc = &event->hw; 374 u64 last_period; 375 376 if (!(v & XTENSA_PMU_PMSTAT_OVFL)) 377 continue; 378 379 set_er(v, XTENSA_PMU_PMSTAT(i)); 380 xtensa_perf_event_update(event, hwc, i); 381 last_period = hwc->last_period; 382 if (xtensa_perf_event_set_period(event, hwc, i)) { 383 struct perf_sample_data data; 384 struct pt_regs *regs = get_irq_regs(); 385 386 perf_sample_data_init(&data, 0, last_period); 387 if (perf_event_overflow(event, &data, regs)) 388 xtensa_pmu_stop(event, 0); 389 } 390 391 rc = IRQ_HANDLED; 392 } 393 return rc; 394 } 395 396 static struct pmu xtensa_pmu = { 397 .pmu_enable = xtensa_pmu_enable, 398 .pmu_disable = xtensa_pmu_disable, 399 .event_init = xtensa_pmu_event_init, 400 .add = xtensa_pmu_add, 401 .del = xtensa_pmu_del, 402 .start = xtensa_pmu_start, 403 .stop = xtensa_pmu_stop, 404 .read = xtensa_pmu_read, 405 }; 406 407 static void xtensa_pmu_setup(void) 408 { 409 unsigned i; 410 411 set_er(0, XTENSA_PMU_PMG); 412 for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) { 413 set_er(0, XTENSA_PMU_PMCTRL(i)); 414 set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i)); 415 } 416 } 417 418 static int xtensa_pmu_notifier(struct notifier_block *self, 419 unsigned long action, void *data) 420 { 421 switch (action & ~CPU_TASKS_FROZEN) { 422 case CPU_STARTING: 423 xtensa_pmu_setup(); 424 break; 425 426 default: 427 break; 428 } 429 430 return NOTIFY_OK; 431 } 432 433 static int __init xtensa_pmu_init(void) 434 { 435 int ret; 436 int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT); 437 438 perf_cpu_notifier(xtensa_pmu_notifier); 439 #if XTENSA_FAKE_NMI 440 enable_irq(irq); 441 #else 442 ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU, 443 "pmu", NULL); 444 if (ret < 0) 445 return ret; 446 #endif 447 448 ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW); 449 if (ret) 450 free_irq(irq, NULL); 451 452 return ret; 453 } 454 early_initcall(xtensa_pmu_init); 455