1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Xtensa Performance Monitor Module driver
4 * See Tensilica Debug User's Guide for PMU registers documentation.
5 *
6 * Copyright (C) 2015 Cadence Design Systems Inc.
7 */
8
9 #include <linux/interrupt.h>
10 #include <linux/irqdomain.h>
11 #include <linux/module.h>
12 #include <linux/of.h>
13 #include <linux/perf_event.h>
14 #include <linux/platform_device.h>
15
16 #include <asm/core.h>
17 #include <asm/processor.h>
18 #include <asm/stacktrace.h>
19
20 #define XTENSA_HWVERSION_RG_2015_0 260000
21
22 #if XCHAL_HW_MIN_VERSION >= XTENSA_HWVERSION_RG_2015_0
23 #define XTENSA_PMU_ERI_BASE 0x00101000
24 #else
25 #define XTENSA_PMU_ERI_BASE 0x00001000
26 #endif
27
28 /* Global control/status for all perf counters */
29 #define XTENSA_PMU_PMG XTENSA_PMU_ERI_BASE
30 /* Perf counter values */
31 #define XTENSA_PMU_PM(i) (XTENSA_PMU_ERI_BASE + 0x80 + (i) * 4)
32 /* Perf counter control registers */
33 #define XTENSA_PMU_PMCTRL(i) (XTENSA_PMU_ERI_BASE + 0x100 + (i) * 4)
34 /* Perf counter status registers */
35 #define XTENSA_PMU_PMSTAT(i) (XTENSA_PMU_ERI_BASE + 0x180 + (i) * 4)
36
37 #define XTENSA_PMU_PMG_PMEN 0x1
38
39 #define XTENSA_PMU_COUNTER_MASK 0xffffffffULL
40 #define XTENSA_PMU_COUNTER_MAX 0x7fffffff
41
42 #define XTENSA_PMU_PMCTRL_INTEN 0x00000001
43 #define XTENSA_PMU_PMCTRL_KRNLCNT 0x00000008
44 #define XTENSA_PMU_PMCTRL_TRACELEVEL 0x000000f0
45 #define XTENSA_PMU_PMCTRL_SELECT_SHIFT 8
46 #define XTENSA_PMU_PMCTRL_SELECT 0x00001f00
47 #define XTENSA_PMU_PMCTRL_MASK_SHIFT 16
48 #define XTENSA_PMU_PMCTRL_MASK 0xffff0000
49
50 #define XTENSA_PMU_MASK(select, mask) \
51 (((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \
52 ((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \
53 XTENSA_PMU_PMCTRL_TRACELEVEL | \
54 XTENSA_PMU_PMCTRL_INTEN)
55
56 #define XTENSA_PMU_PMSTAT_OVFL 0x00000001
57 #define XTENSA_PMU_PMSTAT_INTASRT 0x00000010
58
59 struct xtensa_pmu_events {
60 /* Array of events currently on this core */
61 struct perf_event *event[XCHAL_NUM_PERF_COUNTERS];
62 /* Bitmap of used hardware counters */
63 unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)];
64 };
65 static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events);
66
67 static const u32 xtensa_hw_ctl[] = {
68 [PERF_COUNT_HW_CPU_CYCLES] = XTENSA_PMU_MASK(0, 0x1),
69 [PERF_COUNT_HW_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0xffff),
70 [PERF_COUNT_HW_CACHE_REFERENCES] = XTENSA_PMU_MASK(10, 0x1),
71 [PERF_COUNT_HW_CACHE_MISSES] = XTENSA_PMU_MASK(12, 0x1),
72 /* Taken and non-taken branches + taken loop ends */
73 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0x490),
74 /* Instruction-related + other global stall cycles */
75 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XTENSA_PMU_MASK(4, 0x1ff),
76 /* Data-related global stall cycles */
77 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = XTENSA_PMU_MASK(3, 0x1ff),
78 };
79
80 #define C(_x) PERF_COUNT_HW_CACHE_##_x
81
82 static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = {
83 [C(L1D)] = {
84 [C(OP_READ)] = {
85 [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(10, 0x1),
86 [C(RESULT_MISS)] = XTENSA_PMU_MASK(10, 0x2),
87 },
88 [C(OP_WRITE)] = {
89 [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(11, 0x1),
90 [C(RESULT_MISS)] = XTENSA_PMU_MASK(11, 0x2),
91 },
92 },
93 [C(L1I)] = {
94 [C(OP_READ)] = {
95 [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(8, 0x1),
96 [C(RESULT_MISS)] = XTENSA_PMU_MASK(8, 0x2),
97 },
98 },
99 [C(DTLB)] = {
100 [C(OP_READ)] = {
101 [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(9, 0x1),
102 [C(RESULT_MISS)] = XTENSA_PMU_MASK(9, 0x8),
103 },
104 },
105 [C(ITLB)] = {
106 [C(OP_READ)] = {
107 [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(7, 0x1),
108 [C(RESULT_MISS)] = XTENSA_PMU_MASK(7, 0x8),
109 },
110 },
111 };
112
xtensa_pmu_cache_event(u64 config)113 static int xtensa_pmu_cache_event(u64 config)
114 {
115 unsigned int cache_type, cache_op, cache_result;
116 int ret;
117
118 cache_type = (config >> 0) & 0xff;
119 cache_op = (config >> 8) & 0xff;
120 cache_result = (config >> 16) & 0xff;
121
122 if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) ||
123 cache_op >= C(OP_MAX) ||
124 cache_result >= C(RESULT_MAX))
125 return -EINVAL;
126
127 ret = xtensa_cache_ctl[cache_type][cache_op][cache_result];
128
129 if (ret == 0)
130 return -EINVAL;
131
132 return ret;
133 }
134
xtensa_pmu_read_counter(int idx)135 static inline uint32_t xtensa_pmu_read_counter(int idx)
136 {
137 return get_er(XTENSA_PMU_PM(idx));
138 }
139
xtensa_pmu_write_counter(int idx,uint32_t v)140 static inline void xtensa_pmu_write_counter(int idx, uint32_t v)
141 {
142 set_er(v, XTENSA_PMU_PM(idx));
143 }
144
xtensa_perf_event_update(struct perf_event * event,struct hw_perf_event * hwc,int idx)145 static void xtensa_perf_event_update(struct perf_event *event,
146 struct hw_perf_event *hwc, int idx)
147 {
148 uint64_t prev_raw_count, new_raw_count;
149 int64_t delta;
150
151 do {
152 prev_raw_count = local64_read(&hwc->prev_count);
153 new_raw_count = xtensa_pmu_read_counter(event->hw.idx);
154 } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
155 new_raw_count) != prev_raw_count);
156
157 delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK;
158
159 local64_add(delta, &event->count);
160 local64_sub(delta, &hwc->period_left);
161 }
162
xtensa_perf_event_set_period(struct perf_event * event,struct hw_perf_event * hwc,int idx)163 static bool xtensa_perf_event_set_period(struct perf_event *event,
164 struct hw_perf_event *hwc, int idx)
165 {
166 bool rc = false;
167 s64 left;
168
169 if (!is_sampling_event(event)) {
170 left = XTENSA_PMU_COUNTER_MAX;
171 } else {
172 s64 period = hwc->sample_period;
173
174 left = local64_read(&hwc->period_left);
175 if (left <= -period) {
176 left = period;
177 local64_set(&hwc->period_left, left);
178 hwc->last_period = period;
179 rc = true;
180 } else if (left <= 0) {
181 left += period;
182 local64_set(&hwc->period_left, left);
183 hwc->last_period = period;
184 rc = true;
185 }
186 if (left > XTENSA_PMU_COUNTER_MAX)
187 left = XTENSA_PMU_COUNTER_MAX;
188 }
189
190 local64_set(&hwc->prev_count, -left);
191 xtensa_pmu_write_counter(idx, -left);
192 perf_event_update_userpage(event);
193
194 return rc;
195 }
196
xtensa_pmu_enable(struct pmu * pmu)197 static void xtensa_pmu_enable(struct pmu *pmu)
198 {
199 set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
200 }
201
xtensa_pmu_disable(struct pmu * pmu)202 static void xtensa_pmu_disable(struct pmu *pmu)
203 {
204 set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
205 }
206
xtensa_pmu_event_init(struct perf_event * event)207 static int xtensa_pmu_event_init(struct perf_event *event)
208 {
209 int ret;
210
211 switch (event->attr.type) {
212 case PERF_TYPE_HARDWARE:
213 if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) ||
214 xtensa_hw_ctl[event->attr.config] == 0)
215 return -EINVAL;
216 event->hw.config = xtensa_hw_ctl[event->attr.config];
217 return 0;
218
219 case PERF_TYPE_HW_CACHE:
220 ret = xtensa_pmu_cache_event(event->attr.config);
221 if (ret < 0)
222 return ret;
223 event->hw.config = ret;
224 return 0;
225
226 case PERF_TYPE_RAW:
227 /* Not 'previous counter' select */
228 if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) ==
229 (1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT))
230 return -EINVAL;
231 event->hw.config = (event->attr.config &
232 (XTENSA_PMU_PMCTRL_KRNLCNT |
233 XTENSA_PMU_PMCTRL_TRACELEVEL |
234 XTENSA_PMU_PMCTRL_SELECT |
235 XTENSA_PMU_PMCTRL_MASK)) |
236 XTENSA_PMU_PMCTRL_INTEN;
237 return 0;
238
239 default:
240 return -ENOENT;
241 }
242 }
243
244 /*
245 * Starts/Stops a counter present on the PMU. The PMI handler
246 * should stop the counter when perf_event_overflow() returns
247 * !0. ->start() will be used to continue.
248 */
xtensa_pmu_start(struct perf_event * event,int flags)249 static void xtensa_pmu_start(struct perf_event *event, int flags)
250 {
251 struct hw_perf_event *hwc = &event->hw;
252 int idx = hwc->idx;
253
254 if (WARN_ON_ONCE(idx == -1))
255 return;
256
257 if (flags & PERF_EF_RELOAD) {
258 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
259 xtensa_perf_event_set_period(event, hwc, idx);
260 }
261
262 hwc->state = 0;
263
264 set_er(hwc->config, XTENSA_PMU_PMCTRL(idx));
265 }
266
xtensa_pmu_stop(struct perf_event * event,int flags)267 static void xtensa_pmu_stop(struct perf_event *event, int flags)
268 {
269 struct hw_perf_event *hwc = &event->hw;
270 int idx = hwc->idx;
271
272 if (!(hwc->state & PERF_HES_STOPPED)) {
273 set_er(0, XTENSA_PMU_PMCTRL(idx));
274 set_er(get_er(XTENSA_PMU_PMSTAT(idx)),
275 XTENSA_PMU_PMSTAT(idx));
276 hwc->state |= PERF_HES_STOPPED;
277 }
278
279 if ((flags & PERF_EF_UPDATE) &&
280 !(event->hw.state & PERF_HES_UPTODATE)) {
281 xtensa_perf_event_update(event, &event->hw, idx);
282 event->hw.state |= PERF_HES_UPTODATE;
283 }
284 }
285
286 /*
287 * Adds/Removes a counter to/from the PMU, can be done inside
288 * a transaction, see the ->*_txn() methods.
289 */
xtensa_pmu_add(struct perf_event * event,int flags)290 static int xtensa_pmu_add(struct perf_event *event, int flags)
291 {
292 struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
293 struct hw_perf_event *hwc = &event->hw;
294 int idx = hwc->idx;
295
296 if (__test_and_set_bit(idx, ev->used_mask)) {
297 idx = find_first_zero_bit(ev->used_mask,
298 XCHAL_NUM_PERF_COUNTERS);
299 if (idx == XCHAL_NUM_PERF_COUNTERS)
300 return -EAGAIN;
301
302 __set_bit(idx, ev->used_mask);
303 hwc->idx = idx;
304 }
305 ev->event[idx] = event;
306
307 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
308
309 if (flags & PERF_EF_START)
310 xtensa_pmu_start(event, PERF_EF_RELOAD);
311
312 perf_event_update_userpage(event);
313 return 0;
314 }
315
xtensa_pmu_del(struct perf_event * event,int flags)316 static void xtensa_pmu_del(struct perf_event *event, int flags)
317 {
318 struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
319
320 xtensa_pmu_stop(event, PERF_EF_UPDATE);
321 __clear_bit(event->hw.idx, ev->used_mask);
322 perf_event_update_userpage(event);
323 }
324
xtensa_pmu_read(struct perf_event * event)325 static void xtensa_pmu_read(struct perf_event *event)
326 {
327 xtensa_perf_event_update(event, &event->hw, event->hw.idx);
328 }
329
callchain_trace(struct stackframe * frame,void * data)330 static int callchain_trace(struct stackframe *frame, void *data)
331 {
332 struct perf_callchain_entry_ctx *entry = data;
333
334 perf_callchain_store(entry, frame->pc);
335 return 0;
336 }
337
perf_callchain_kernel(struct perf_callchain_entry_ctx * entry,struct pt_regs * regs)338 void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
339 struct pt_regs *regs)
340 {
341 xtensa_backtrace_kernel(regs, entry->max_stack,
342 callchain_trace, NULL, entry);
343 }
344
perf_callchain_user(struct perf_callchain_entry_ctx * entry,struct pt_regs * regs)345 void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
346 struct pt_regs *regs)
347 {
348 xtensa_backtrace_user(regs, entry->max_stack,
349 callchain_trace, entry);
350 }
351
perf_event_print_debug(void)352 void perf_event_print_debug(void)
353 {
354 unsigned long flags;
355 unsigned i;
356
357 local_irq_save(flags);
358 pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(),
359 get_er(XTENSA_PMU_PMG));
360 for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i)
361 pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n",
362 i, get_er(XTENSA_PMU_PM(i)),
363 i, get_er(XTENSA_PMU_PMCTRL(i)),
364 i, get_er(XTENSA_PMU_PMSTAT(i)));
365 local_irq_restore(flags);
366 }
367
xtensa_pmu_irq_handler(int irq,void * dev_id)368 irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
369 {
370 irqreturn_t rc = IRQ_NONE;
371 struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
372 unsigned i;
373
374 for_each_set_bit(i, ev->used_mask, XCHAL_NUM_PERF_COUNTERS) {
375 uint32_t v = get_er(XTENSA_PMU_PMSTAT(i));
376 struct perf_event *event = ev->event[i];
377 struct hw_perf_event *hwc = &event->hw;
378 u64 last_period;
379
380 if (!(v & XTENSA_PMU_PMSTAT_OVFL))
381 continue;
382
383 set_er(v, XTENSA_PMU_PMSTAT(i));
384 xtensa_perf_event_update(event, hwc, i);
385 last_period = hwc->last_period;
386 if (xtensa_perf_event_set_period(event, hwc, i)) {
387 struct perf_sample_data data;
388 struct pt_regs *regs = get_irq_regs();
389
390 perf_sample_data_init(&data, 0, last_period);
391 if (perf_event_overflow(event, &data, regs))
392 xtensa_pmu_stop(event, 0);
393 }
394
395 rc = IRQ_HANDLED;
396 }
397 return rc;
398 }
399
400 static struct pmu xtensa_pmu = {
401 .pmu_enable = xtensa_pmu_enable,
402 .pmu_disable = xtensa_pmu_disable,
403 .event_init = xtensa_pmu_event_init,
404 .add = xtensa_pmu_add,
405 .del = xtensa_pmu_del,
406 .start = xtensa_pmu_start,
407 .stop = xtensa_pmu_stop,
408 .read = xtensa_pmu_read,
409 };
410
xtensa_pmu_setup(unsigned int cpu)411 static int xtensa_pmu_setup(unsigned int cpu)
412 {
413 unsigned i;
414
415 set_er(0, XTENSA_PMU_PMG);
416 for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) {
417 set_er(0, XTENSA_PMU_PMCTRL(i));
418 set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i));
419 }
420 return 0;
421 }
422
xtensa_pmu_init(void)423 static int __init xtensa_pmu_init(void)
424 {
425 int ret;
426 int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT);
427
428 ret = cpuhp_setup_state(CPUHP_AP_PERF_XTENSA_STARTING,
429 "perf/xtensa:starting", xtensa_pmu_setup,
430 NULL);
431 if (ret) {
432 pr_err("xtensa_pmu: failed to register CPU-hotplug.\n");
433 return ret;
434 }
435 #if XTENSA_FAKE_NMI
436 enable_irq(irq);
437 #else
438 ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU,
439 "pmu", NULL);
440 if (ret < 0)
441 return ret;
442 #endif
443
444 ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW);
445 if (ret)
446 free_irq(irq, NULL);
447
448 return ret;
449 }
450 early_initcall(xtensa_pmu_init);
451