xref: /linux/arch/xtensa/kernel/perf_event.c (revision eb01fe7abbe2d0b38824d2a93fdb4cc3eaf2ccc1)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Xtensa Performance Monitor Module driver
4  * See Tensilica Debug User's Guide for PMU registers documentation.
5  *
6  * Copyright (C) 2015 Cadence Design Systems Inc.
7  */
8 
9 #include <linux/interrupt.h>
10 #include <linux/irqdomain.h>
11 #include <linux/module.h>
12 #include <linux/of.h>
13 #include <linux/perf_event.h>
14 #include <linux/platform_device.h>
15 
16 #include <asm/core.h>
17 #include <asm/processor.h>
18 #include <asm/stacktrace.h>
19 
20 #define XTENSA_HWVERSION_RG_2015_0	260000
21 
22 #if XCHAL_HW_MIN_VERSION >= XTENSA_HWVERSION_RG_2015_0
23 #define XTENSA_PMU_ERI_BASE		0x00101000
24 #else
25 #define XTENSA_PMU_ERI_BASE		0x00001000
26 #endif
27 
28 /* Global control/status for all perf counters */
29 #define XTENSA_PMU_PMG			XTENSA_PMU_ERI_BASE
30 /* Perf counter values */
31 #define XTENSA_PMU_PM(i)		(XTENSA_PMU_ERI_BASE + 0x80 + (i) * 4)
32 /* Perf counter control registers */
33 #define XTENSA_PMU_PMCTRL(i)		(XTENSA_PMU_ERI_BASE + 0x100 + (i) * 4)
34 /* Perf counter status registers */
35 #define XTENSA_PMU_PMSTAT(i)		(XTENSA_PMU_ERI_BASE + 0x180 + (i) * 4)
36 
37 #define XTENSA_PMU_PMG_PMEN		0x1
38 
39 #define XTENSA_PMU_COUNTER_MASK		0xffffffffULL
40 #define XTENSA_PMU_COUNTER_MAX		0x7fffffff
41 
42 #define XTENSA_PMU_PMCTRL_INTEN		0x00000001
43 #define XTENSA_PMU_PMCTRL_KRNLCNT	0x00000008
44 #define XTENSA_PMU_PMCTRL_TRACELEVEL	0x000000f0
45 #define XTENSA_PMU_PMCTRL_SELECT_SHIFT	8
46 #define XTENSA_PMU_PMCTRL_SELECT	0x00001f00
47 #define XTENSA_PMU_PMCTRL_MASK_SHIFT	16
48 #define XTENSA_PMU_PMCTRL_MASK		0xffff0000
49 
50 #define XTENSA_PMU_MASK(select, mask) \
51 	(((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \
52 	 ((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \
53 	 XTENSA_PMU_PMCTRL_TRACELEVEL | \
54 	 XTENSA_PMU_PMCTRL_INTEN)
55 
56 #define XTENSA_PMU_PMSTAT_OVFL		0x00000001
57 #define XTENSA_PMU_PMSTAT_INTASRT	0x00000010
58 
59 struct xtensa_pmu_events {
60 	/* Array of events currently on this core */
61 	struct perf_event *event[XCHAL_NUM_PERF_COUNTERS];
62 	/* Bitmap of used hardware counters */
63 	unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)];
64 };
65 static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events);
66 
67 static const u32 xtensa_hw_ctl[] = {
68 	[PERF_COUNT_HW_CPU_CYCLES]		= XTENSA_PMU_MASK(0, 0x1),
69 	[PERF_COUNT_HW_INSTRUCTIONS]		= XTENSA_PMU_MASK(2, 0xffff),
70 	[PERF_COUNT_HW_CACHE_REFERENCES]	= XTENSA_PMU_MASK(10, 0x1),
71 	[PERF_COUNT_HW_CACHE_MISSES]		= XTENSA_PMU_MASK(12, 0x1),
72 	/* Taken and non-taken branches + taken loop ends */
73 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= XTENSA_PMU_MASK(2, 0x490),
74 	/* Instruction-related + other global stall cycles */
75 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= XTENSA_PMU_MASK(4, 0x1ff),
76 	/* Data-related global stall cycles */
77 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= XTENSA_PMU_MASK(3, 0x1ff),
78 };
79 
80 #define C(_x) PERF_COUNT_HW_CACHE_##_x
81 
82 static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = {
83 	[C(L1D)] = {
84 		[C(OP_READ)] = {
85 			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(10, 0x1),
86 			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(10, 0x2),
87 		},
88 		[C(OP_WRITE)] = {
89 			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(11, 0x1),
90 			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(11, 0x2),
91 		},
92 	},
93 	[C(L1I)] = {
94 		[C(OP_READ)] = {
95 			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(8, 0x1),
96 			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(8, 0x2),
97 		},
98 	},
99 	[C(DTLB)] = {
100 		[C(OP_READ)] = {
101 			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(9, 0x1),
102 			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(9, 0x8),
103 		},
104 	},
105 	[C(ITLB)] = {
106 		[C(OP_READ)] = {
107 			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(7, 0x1),
108 			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(7, 0x8),
109 		},
110 	},
111 };
112 
113 static int xtensa_pmu_cache_event(u64 config)
114 {
115 	unsigned int cache_type, cache_op, cache_result;
116 	int ret;
117 
118 	cache_type = (config >>  0) & 0xff;
119 	cache_op = (config >>  8) & 0xff;
120 	cache_result = (config >> 16) & 0xff;
121 
122 	if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) ||
123 	    cache_op >= C(OP_MAX) ||
124 	    cache_result >= C(RESULT_MAX))
125 		return -EINVAL;
126 
127 	ret = xtensa_cache_ctl[cache_type][cache_op][cache_result];
128 
129 	if (ret == 0)
130 		return -EINVAL;
131 
132 	return ret;
133 }
134 
135 static inline uint32_t xtensa_pmu_read_counter(int idx)
136 {
137 	return get_er(XTENSA_PMU_PM(idx));
138 }
139 
140 static inline void xtensa_pmu_write_counter(int idx, uint32_t v)
141 {
142 	set_er(v, XTENSA_PMU_PM(idx));
143 }
144 
145 static void xtensa_perf_event_update(struct perf_event *event,
146 				     struct hw_perf_event *hwc, int idx)
147 {
148 	uint64_t prev_raw_count, new_raw_count;
149 	int64_t delta;
150 
151 	do {
152 		prev_raw_count = local64_read(&hwc->prev_count);
153 		new_raw_count = xtensa_pmu_read_counter(event->hw.idx);
154 	} while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
155 				 new_raw_count) != prev_raw_count);
156 
157 	delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK;
158 
159 	local64_add(delta, &event->count);
160 	local64_sub(delta, &hwc->period_left);
161 }
162 
163 static bool xtensa_perf_event_set_period(struct perf_event *event,
164 					 struct hw_perf_event *hwc, int idx)
165 {
166 	bool rc = false;
167 	s64 left;
168 
169 	if (!is_sampling_event(event)) {
170 		left = XTENSA_PMU_COUNTER_MAX;
171 	} else {
172 		s64 period = hwc->sample_period;
173 
174 		left = local64_read(&hwc->period_left);
175 		if (left <= -period) {
176 			left = period;
177 			local64_set(&hwc->period_left, left);
178 			hwc->last_period = period;
179 			rc = true;
180 		} else if (left <= 0) {
181 			left += period;
182 			local64_set(&hwc->period_left, left);
183 			hwc->last_period = period;
184 			rc = true;
185 		}
186 		if (left > XTENSA_PMU_COUNTER_MAX)
187 			left = XTENSA_PMU_COUNTER_MAX;
188 	}
189 
190 	local64_set(&hwc->prev_count, -left);
191 	xtensa_pmu_write_counter(idx, -left);
192 	perf_event_update_userpage(event);
193 
194 	return rc;
195 }
196 
197 static void xtensa_pmu_enable(struct pmu *pmu)
198 {
199 	set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
200 }
201 
202 static void xtensa_pmu_disable(struct pmu *pmu)
203 {
204 	set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
205 }
206 
207 static int xtensa_pmu_event_init(struct perf_event *event)
208 {
209 	int ret;
210 
211 	switch (event->attr.type) {
212 	case PERF_TYPE_HARDWARE:
213 		if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) ||
214 		    xtensa_hw_ctl[event->attr.config] == 0)
215 			return -EINVAL;
216 		event->hw.config = xtensa_hw_ctl[event->attr.config];
217 		return 0;
218 
219 	case PERF_TYPE_HW_CACHE:
220 		ret = xtensa_pmu_cache_event(event->attr.config);
221 		if (ret < 0)
222 			return ret;
223 		event->hw.config = ret;
224 		return 0;
225 
226 	case PERF_TYPE_RAW:
227 		/* Not 'previous counter' select */
228 		if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) ==
229 		    (1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT))
230 			return -EINVAL;
231 		event->hw.config = (event->attr.config &
232 				    (XTENSA_PMU_PMCTRL_KRNLCNT |
233 				     XTENSA_PMU_PMCTRL_TRACELEVEL |
234 				     XTENSA_PMU_PMCTRL_SELECT |
235 				     XTENSA_PMU_PMCTRL_MASK)) |
236 			XTENSA_PMU_PMCTRL_INTEN;
237 		return 0;
238 
239 	default:
240 		return -ENOENT;
241 	}
242 }
243 
244 /*
245  * Starts/Stops a counter present on the PMU. The PMI handler
246  * should stop the counter when perf_event_overflow() returns
247  * !0. ->start() will be used to continue.
248  */
249 static void xtensa_pmu_start(struct perf_event *event, int flags)
250 {
251 	struct hw_perf_event *hwc = &event->hw;
252 	int idx = hwc->idx;
253 
254 	if (WARN_ON_ONCE(idx == -1))
255 		return;
256 
257 	if (flags & PERF_EF_RELOAD) {
258 		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
259 		xtensa_perf_event_set_period(event, hwc, idx);
260 	}
261 
262 	hwc->state = 0;
263 
264 	set_er(hwc->config, XTENSA_PMU_PMCTRL(idx));
265 }
266 
267 static void xtensa_pmu_stop(struct perf_event *event, int flags)
268 {
269 	struct hw_perf_event *hwc = &event->hw;
270 	int idx = hwc->idx;
271 
272 	if (!(hwc->state & PERF_HES_STOPPED)) {
273 		set_er(0, XTENSA_PMU_PMCTRL(idx));
274 		set_er(get_er(XTENSA_PMU_PMSTAT(idx)),
275 		       XTENSA_PMU_PMSTAT(idx));
276 		hwc->state |= PERF_HES_STOPPED;
277 	}
278 
279 	if ((flags & PERF_EF_UPDATE) &&
280 	    !(event->hw.state & PERF_HES_UPTODATE)) {
281 		xtensa_perf_event_update(event, &event->hw, idx);
282 		event->hw.state |= PERF_HES_UPTODATE;
283 	}
284 }
285 
286 /*
287  * Adds/Removes a counter to/from the PMU, can be done inside
288  * a transaction, see the ->*_txn() methods.
289  */
290 static int xtensa_pmu_add(struct perf_event *event, int flags)
291 {
292 	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
293 	struct hw_perf_event *hwc = &event->hw;
294 	int idx = hwc->idx;
295 
296 	if (__test_and_set_bit(idx, ev->used_mask)) {
297 		idx = find_first_zero_bit(ev->used_mask,
298 					  XCHAL_NUM_PERF_COUNTERS);
299 		if (idx == XCHAL_NUM_PERF_COUNTERS)
300 			return -EAGAIN;
301 
302 		__set_bit(idx, ev->used_mask);
303 		hwc->idx = idx;
304 	}
305 	ev->event[idx] = event;
306 
307 	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
308 
309 	if (flags & PERF_EF_START)
310 		xtensa_pmu_start(event, PERF_EF_RELOAD);
311 
312 	perf_event_update_userpage(event);
313 	return 0;
314 }
315 
316 static void xtensa_pmu_del(struct perf_event *event, int flags)
317 {
318 	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
319 
320 	xtensa_pmu_stop(event, PERF_EF_UPDATE);
321 	__clear_bit(event->hw.idx, ev->used_mask);
322 	perf_event_update_userpage(event);
323 }
324 
325 static void xtensa_pmu_read(struct perf_event *event)
326 {
327 	xtensa_perf_event_update(event, &event->hw, event->hw.idx);
328 }
329 
330 static int callchain_trace(struct stackframe *frame, void *data)
331 {
332 	struct perf_callchain_entry_ctx *entry = data;
333 
334 	perf_callchain_store(entry, frame->pc);
335 	return 0;
336 }
337 
338 void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
339 			   struct pt_regs *regs)
340 {
341 	xtensa_backtrace_kernel(regs, entry->max_stack,
342 				callchain_trace, NULL, entry);
343 }
344 
345 void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
346 			 struct pt_regs *regs)
347 {
348 	xtensa_backtrace_user(regs, entry->max_stack,
349 			      callchain_trace, entry);
350 }
351 
352 void perf_event_print_debug(void)
353 {
354 	unsigned long flags;
355 	unsigned i;
356 
357 	local_irq_save(flags);
358 	pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(),
359 		get_er(XTENSA_PMU_PMG));
360 	for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i)
361 		pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n",
362 			i, get_er(XTENSA_PMU_PM(i)),
363 			i, get_er(XTENSA_PMU_PMCTRL(i)),
364 			i, get_er(XTENSA_PMU_PMSTAT(i)));
365 	local_irq_restore(flags);
366 }
367 
368 irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
369 {
370 	irqreturn_t rc = IRQ_NONE;
371 	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
372 	unsigned i;
373 
374 	for_each_set_bit(i, ev->used_mask, XCHAL_NUM_PERF_COUNTERS) {
375 		uint32_t v = get_er(XTENSA_PMU_PMSTAT(i));
376 		struct perf_event *event = ev->event[i];
377 		struct hw_perf_event *hwc = &event->hw;
378 		u64 last_period;
379 
380 		if (!(v & XTENSA_PMU_PMSTAT_OVFL))
381 			continue;
382 
383 		set_er(v, XTENSA_PMU_PMSTAT(i));
384 		xtensa_perf_event_update(event, hwc, i);
385 		last_period = hwc->last_period;
386 		if (xtensa_perf_event_set_period(event, hwc, i)) {
387 			struct perf_sample_data data;
388 			struct pt_regs *regs = get_irq_regs();
389 
390 			perf_sample_data_init(&data, 0, last_period);
391 			if (perf_event_overflow(event, &data, regs))
392 				xtensa_pmu_stop(event, 0);
393 		}
394 
395 		rc = IRQ_HANDLED;
396 	}
397 	return rc;
398 }
399 
400 static struct pmu xtensa_pmu = {
401 	.pmu_enable = xtensa_pmu_enable,
402 	.pmu_disable = xtensa_pmu_disable,
403 	.event_init = xtensa_pmu_event_init,
404 	.add = xtensa_pmu_add,
405 	.del = xtensa_pmu_del,
406 	.start = xtensa_pmu_start,
407 	.stop = xtensa_pmu_stop,
408 	.read = xtensa_pmu_read,
409 };
410 
411 static int xtensa_pmu_setup(unsigned int cpu)
412 {
413 	unsigned i;
414 
415 	set_er(0, XTENSA_PMU_PMG);
416 	for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) {
417 		set_er(0, XTENSA_PMU_PMCTRL(i));
418 		set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i));
419 	}
420 	return 0;
421 }
422 
423 static int __init xtensa_pmu_init(void)
424 {
425 	int ret;
426 	int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT);
427 
428 	ret = cpuhp_setup_state(CPUHP_AP_PERF_XTENSA_STARTING,
429 				"perf/xtensa:starting", xtensa_pmu_setup,
430 				NULL);
431 	if (ret) {
432 		pr_err("xtensa_pmu: failed to register CPU-hotplug.\n");
433 		return ret;
434 	}
435 #if XTENSA_FAKE_NMI
436 	enable_irq(irq);
437 #else
438 	ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU,
439 			  "pmu", NULL);
440 	if (ret < 0)
441 		return ret;
442 #endif
443 
444 	ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW);
445 	if (ret)
446 		free_irq(irq, NULL);
447 
448 	return ret;
449 }
450 early_initcall(xtensa_pmu_init);
451