xref: /linux/drivers/perf/arm_v6_pmu.c (revision 114143a595895c03fbefccfd8346fc51fb4908ed)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ARMv6 Performance counter handling code.
4  *
5  * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
6  *
7  * ARMv6 has 2 configurable performance counters and a single cycle counter.
8  * They all share a single reset bit but can be written to zero so we can use
9  * that for a reset.
10  *
11  * The counters can't be individually enabled or disabled so when we remove
12  * one event and replace it with another we could get spurious counts from the
13  * wrong event. However, we can take advantage of the fact that the
14  * performance counters can export events to the event bus, and the event bus
15  * itself can be monitored. This requires that we *don't* export the events to
16  * the event bus. The procedure for disabling a configurable counter is:
17  *	- change the counter to count the ETMEXTOUT[0] signal (0x20). This
18  *	  effectively stops the counter from counting.
19  *	- disable the counter's interrupt generation (each counter has it's
20  *	  own interrupt enable bit).
21  * Once stopped, the counter value can be written as 0 to reset.
22  *
23  * To enable a counter:
24  *	- enable the counter's interrupt generation.
25  *	- set the new event type.
26  *
27  * Note: the dedicated cycle counter only counts cycles and can't be
28  * enabled/disabled independently of the others. When we want to disable the
29  * cycle counter, we have to just disable the interrupt reporting and start
30  * ignoring that counter. When re-enabling, we have to reset the value and
31  * enable the interrupt.
32  */
33 
34 #include <asm/cputype.h>
35 #include <asm/irq_regs.h>
36 
37 #include <linux/of.h>
38 #include <linux/perf/arm_pmu.h>
39 #include <linux/platform_device.h>
40 
41 enum armv6_perf_types {
42 	ARMV6_PERFCTR_ICACHE_MISS	    = 0x0,
43 	ARMV6_PERFCTR_IBUF_STALL	    = 0x1,
44 	ARMV6_PERFCTR_DDEP_STALL	    = 0x2,
45 	ARMV6_PERFCTR_ITLB_MISS		    = 0x3,
46 	ARMV6_PERFCTR_DTLB_MISS		    = 0x4,
47 	ARMV6_PERFCTR_BR_EXEC		    = 0x5,
48 	ARMV6_PERFCTR_BR_MISPREDICT	    = 0x6,
49 	ARMV6_PERFCTR_INSTR_EXEC	    = 0x7,
50 	ARMV6_PERFCTR_DCACHE_HIT	    = 0x9,
51 	ARMV6_PERFCTR_DCACHE_ACCESS	    = 0xA,
52 	ARMV6_PERFCTR_DCACHE_MISS	    = 0xB,
53 	ARMV6_PERFCTR_DCACHE_WBACK	    = 0xC,
54 	ARMV6_PERFCTR_SW_PC_CHANGE	    = 0xD,
55 	ARMV6_PERFCTR_MAIN_TLB_MISS	    = 0xF,
56 	ARMV6_PERFCTR_EXPL_D_ACCESS	    = 0x10,
57 	ARMV6_PERFCTR_LSU_FULL_STALL	    = 0x11,
58 	ARMV6_PERFCTR_WBUF_DRAINED	    = 0x12,
59 	ARMV6_PERFCTR_CPU_CYCLES	    = 0xFF,
60 	ARMV6_PERFCTR_NOP		    = 0x20,
61 };
62 
63 enum armv6_counters {
64 	ARMV6_CYCLE_COUNTER = 0,
65 	ARMV6_COUNTER0,
66 	ARMV6_COUNTER1,
67 	ARMV6_NUM_COUNTERS
68 };
69 
70 /*
71  * The hardware events that we support. We do support cache operations but
72  * we have harvard caches and no way to combine instruction and data
73  * accesses/misses in hardware.
74  */
75 static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
76 	PERF_MAP_ALL_UNSUPPORTED,
77 	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV6_PERFCTR_CPU_CYCLES,
78 	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV6_PERFCTR_INSTR_EXEC,
79 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV6_PERFCTR_BR_EXEC,
80 	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV6_PERFCTR_BR_MISPREDICT,
81 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= ARMV6_PERFCTR_IBUF_STALL,
82 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= ARMV6_PERFCTR_LSU_FULL_STALL,
83 };
84 
85 static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
86 					  [PERF_COUNT_HW_CACHE_OP_MAX]
87 					  [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
88 	PERF_CACHE_MAP_ALL_UNSUPPORTED,
89 
90 	/*
91 	 * The performance counters don't differentiate between read and write
92 	 * accesses/misses so this isn't strictly correct, but it's the best we
93 	 * can do. Writes and reads get combined.
94 	 */
95 	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV6_PERFCTR_DCACHE_ACCESS,
96 	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV6_PERFCTR_DCACHE_MISS,
97 	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV6_PERFCTR_DCACHE_ACCESS,
98 	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV6_PERFCTR_DCACHE_MISS,
99 
100 	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV6_PERFCTR_ICACHE_MISS,
101 
102 	/*
103 	 * The ARM performance counters can count micro DTLB misses, micro ITLB
104 	 * misses and main TLB misses. There isn't an event for TLB misses, so
105 	 * use the micro misses here and if users want the main TLB misses they
106 	 * can use a raw counter.
107 	 */
108 	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV6_PERFCTR_DTLB_MISS,
109 	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV6_PERFCTR_DTLB_MISS,
110 
111 	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV6_PERFCTR_ITLB_MISS,
112 	[C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV6_PERFCTR_ITLB_MISS,
113 };
114 
115 static inline unsigned long
armv6_pmcr_read(void)116 armv6_pmcr_read(void)
117 {
118 	u32 val;
119 	asm volatile("mrc   p15, 0, %0, c15, c12, 0" : "=r"(val));
120 	return val;
121 }
122 
123 static inline void
armv6_pmcr_write(unsigned long val)124 armv6_pmcr_write(unsigned long val)
125 {
126 	asm volatile("mcr   p15, 0, %0, c15, c12, 0" : : "r"(val));
127 }
128 
129 #define ARMV6_PMCR_ENABLE		(1 << 0)
130 #define ARMV6_PMCR_CTR01_RESET		(1 << 1)
131 #define ARMV6_PMCR_CCOUNT_RESET		(1 << 2)
132 #define ARMV6_PMCR_CCOUNT_DIV		(1 << 3)
133 #define ARMV6_PMCR_COUNT0_IEN		(1 << 4)
134 #define ARMV6_PMCR_COUNT1_IEN		(1 << 5)
135 #define ARMV6_PMCR_CCOUNT_IEN		(1 << 6)
136 #define ARMV6_PMCR_COUNT0_OVERFLOW	(1 << 8)
137 #define ARMV6_PMCR_COUNT1_OVERFLOW	(1 << 9)
138 #define ARMV6_PMCR_CCOUNT_OVERFLOW	(1 << 10)
139 #define ARMV6_PMCR_EVT_COUNT0_SHIFT	20
140 #define ARMV6_PMCR_EVT_COUNT0_MASK	(0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
141 #define ARMV6_PMCR_EVT_COUNT1_SHIFT	12
142 #define ARMV6_PMCR_EVT_COUNT1_MASK	(0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
143 
144 #define ARMV6_PMCR_OVERFLOWED_MASK \
145 	(ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
146 	 ARMV6_PMCR_CCOUNT_OVERFLOW)
147 
148 static inline int
armv6_pmcr_has_overflowed(unsigned long pmcr)149 armv6_pmcr_has_overflowed(unsigned long pmcr)
150 {
151 	return pmcr & ARMV6_PMCR_OVERFLOWED_MASK;
152 }
153 
154 static inline int
armv6_pmcr_counter_has_overflowed(unsigned long pmcr,enum armv6_counters counter)155 armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
156 				  enum armv6_counters counter)
157 {
158 	int ret = 0;
159 
160 	if (ARMV6_CYCLE_COUNTER == counter)
161 		ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
162 	else if (ARMV6_COUNTER0 == counter)
163 		ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
164 	else if (ARMV6_COUNTER1 == counter)
165 		ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
166 	else
167 		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
168 
169 	return ret;
170 }
171 
armv6pmu_read_counter(struct perf_event * event)172 static inline u64 armv6pmu_read_counter(struct perf_event *event)
173 {
174 	struct hw_perf_event *hwc = &event->hw;
175 	int counter = hwc->idx;
176 	unsigned long value = 0;
177 
178 	if (ARMV6_CYCLE_COUNTER == counter)
179 		asm volatile("mrc   p15, 0, %0, c15, c12, 1" : "=r"(value));
180 	else if (ARMV6_COUNTER0 == counter)
181 		asm volatile("mrc   p15, 0, %0, c15, c12, 2" : "=r"(value));
182 	else if (ARMV6_COUNTER1 == counter)
183 		asm volatile("mrc   p15, 0, %0, c15, c12, 3" : "=r"(value));
184 	else
185 		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
186 
187 	return value;
188 }
189 
armv6pmu_write_counter(struct perf_event * event,u64 value)190 static inline void armv6pmu_write_counter(struct perf_event *event, u64 value)
191 {
192 	struct hw_perf_event *hwc = &event->hw;
193 	int counter = hwc->idx;
194 
195 	if (ARMV6_CYCLE_COUNTER == counter)
196 		asm volatile("mcr   p15, 0, %0, c15, c12, 1" : : "r"(value));
197 	else if (ARMV6_COUNTER0 == counter)
198 		asm volatile("mcr   p15, 0, %0, c15, c12, 2" : : "r"(value));
199 	else if (ARMV6_COUNTER1 == counter)
200 		asm volatile("mcr   p15, 0, %0, c15, c12, 3" : : "r"(value));
201 	else
202 		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
203 }
204 
armv6pmu_enable_event(struct perf_event * event)205 static void armv6pmu_enable_event(struct perf_event *event)
206 {
207 	unsigned long val, mask, evt;
208 	struct hw_perf_event *hwc = &event->hw;
209 	int idx = hwc->idx;
210 
211 	if (ARMV6_CYCLE_COUNTER == idx) {
212 		mask	= 0;
213 		evt	= ARMV6_PMCR_CCOUNT_IEN;
214 	} else if (ARMV6_COUNTER0 == idx) {
215 		mask	= ARMV6_PMCR_EVT_COUNT0_MASK;
216 		evt	= (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
217 			  ARMV6_PMCR_COUNT0_IEN;
218 	} else if (ARMV6_COUNTER1 == idx) {
219 		mask	= ARMV6_PMCR_EVT_COUNT1_MASK;
220 		evt	= (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
221 			  ARMV6_PMCR_COUNT1_IEN;
222 	} else {
223 		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
224 		return;
225 	}
226 
227 	/*
228 	 * Mask out the current event and set the counter to count the event
229 	 * that we're interested in.
230 	 */
231 	val = armv6_pmcr_read();
232 	val &= ~mask;
233 	val |= evt;
234 	armv6_pmcr_write(val);
235 }
236 
237 static irqreturn_t
armv6pmu_handle_irq(struct arm_pmu * cpu_pmu)238 armv6pmu_handle_irq(struct arm_pmu *cpu_pmu)
239 {
240 	unsigned long pmcr = armv6_pmcr_read();
241 	struct perf_sample_data data;
242 	struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
243 	struct pt_regs *regs;
244 	int idx;
245 
246 	if (!armv6_pmcr_has_overflowed(pmcr))
247 		return IRQ_NONE;
248 
249 	regs = get_irq_regs();
250 
251 	/*
252 	 * The interrupts are cleared by writing the overflow flags back to
253 	 * the control register. All of the other bits don't have any effect
254 	 * if they are rewritten, so write the whole value back.
255 	 */
256 	armv6_pmcr_write(pmcr);
257 
258 	for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV6_NUM_COUNTERS) {
259 		struct perf_event *event = cpuc->events[idx];
260 		struct hw_perf_event *hwc;
261 
262 		/* Ignore if we don't have an event. */
263 		if (!event)
264 			continue;
265 
266 		/*
267 		 * We have a single interrupt for all counters. Check that
268 		 * each counter has overflowed before we process it.
269 		 */
270 		if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
271 			continue;
272 
273 		hwc = &event->hw;
274 		armpmu_event_update(event);
275 		perf_sample_data_init(&data, 0, hwc->last_period);
276 		if (!armpmu_event_set_period(event))
277 			continue;
278 
279 		if (perf_event_overflow(event, &data, regs))
280 			cpu_pmu->disable(event);
281 	}
282 
283 	/*
284 	 * Handle the pending perf events.
285 	 *
286 	 * Note: this call *must* be run with interrupts disabled. For
287 	 * platforms that can have the PMU interrupts raised as an NMI, this
288 	 * will not work.
289 	 */
290 	irq_work_run();
291 
292 	return IRQ_HANDLED;
293 }
294 
armv6pmu_start(struct arm_pmu * cpu_pmu)295 static void armv6pmu_start(struct arm_pmu *cpu_pmu)
296 {
297 	unsigned long val;
298 
299 	val = armv6_pmcr_read();
300 	val |= ARMV6_PMCR_ENABLE;
301 	armv6_pmcr_write(val);
302 }
303 
armv6pmu_stop(struct arm_pmu * cpu_pmu)304 static void armv6pmu_stop(struct arm_pmu *cpu_pmu)
305 {
306 	unsigned long val;
307 
308 	val = armv6_pmcr_read();
309 	val &= ~ARMV6_PMCR_ENABLE;
310 	armv6_pmcr_write(val);
311 }
312 
313 static int
armv6pmu_get_event_idx(struct pmu_hw_events * cpuc,struct perf_event * event)314 armv6pmu_get_event_idx(struct pmu_hw_events *cpuc,
315 				struct perf_event *event)
316 {
317 	struct hw_perf_event *hwc = &event->hw;
318 	/* Always place a cycle counter into the cycle counter. */
319 	if (ARMV6_PERFCTR_CPU_CYCLES == hwc->config_base) {
320 		if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
321 			return -EAGAIN;
322 
323 		return ARMV6_CYCLE_COUNTER;
324 	} else {
325 		/*
326 		 * For anything other than a cycle counter, try and use
327 		 * counter0 and counter1.
328 		 */
329 		if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask))
330 			return ARMV6_COUNTER1;
331 
332 		if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask))
333 			return ARMV6_COUNTER0;
334 
335 		/* The counters are all in use. */
336 		return -EAGAIN;
337 	}
338 }
339 
armv6pmu_clear_event_idx(struct pmu_hw_events * cpuc,struct perf_event * event)340 static void armv6pmu_clear_event_idx(struct pmu_hw_events *cpuc,
341 				     struct perf_event *event)
342 {
343 	clear_bit(event->hw.idx, cpuc->used_mask);
344 }
345 
armv6pmu_disable_event(struct perf_event * event)346 static void armv6pmu_disable_event(struct perf_event *event)
347 {
348 	unsigned long val, mask, evt;
349 	struct hw_perf_event *hwc = &event->hw;
350 	int idx = hwc->idx;
351 
352 	if (ARMV6_CYCLE_COUNTER == idx) {
353 		mask	= ARMV6_PMCR_CCOUNT_IEN;
354 		evt	= 0;
355 	} else if (ARMV6_COUNTER0 == idx) {
356 		mask	= ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
357 		evt	= ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
358 	} else if (ARMV6_COUNTER1 == idx) {
359 		mask	= ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
360 		evt	= ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
361 	} else {
362 		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
363 		return;
364 	}
365 
366 	/*
367 	 * Mask out the current event and set the counter to count the number
368 	 * of ETM bus signal assertion cycles. The external reporting should
369 	 * be disabled and so this should never increment.
370 	 */
371 	val = armv6_pmcr_read();
372 	val &= ~mask;
373 	val |= evt;
374 	armv6_pmcr_write(val);
375 }
376 
armv6_map_event(struct perf_event * event)377 static int armv6_map_event(struct perf_event *event)
378 {
379 	return armpmu_map_event(event, &armv6_perf_map,
380 				&armv6_perf_cache_map, 0xFF);
381 }
382 
armv6pmu_init(struct arm_pmu * cpu_pmu)383 static void armv6pmu_init(struct arm_pmu *cpu_pmu)
384 {
385 	cpu_pmu->handle_irq	= armv6pmu_handle_irq;
386 	cpu_pmu->enable		= armv6pmu_enable_event;
387 	cpu_pmu->disable	= armv6pmu_disable_event;
388 	cpu_pmu->read_counter	= armv6pmu_read_counter;
389 	cpu_pmu->write_counter	= armv6pmu_write_counter;
390 	cpu_pmu->get_event_idx	= armv6pmu_get_event_idx;
391 	cpu_pmu->clear_event_idx = armv6pmu_clear_event_idx;
392 	cpu_pmu->start		= armv6pmu_start;
393 	cpu_pmu->stop		= armv6pmu_stop;
394 	cpu_pmu->map_event	= armv6_map_event;
395 
396 	bitmap_set(cpu_pmu->cntr_mask, 0, ARMV6_NUM_COUNTERS);
397 }
398 
armv6_1136_pmu_init(struct arm_pmu * cpu_pmu)399 static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu)
400 {
401 	armv6pmu_init(cpu_pmu);
402 	cpu_pmu->name		= "armv6_1136";
403 	return 0;
404 }
405 
armv6_1176_pmu_init(struct arm_pmu * cpu_pmu)406 static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu)
407 {
408 	armv6pmu_init(cpu_pmu);
409 	cpu_pmu->name		= "armv6_1176";
410 	return 0;
411 }
412 
413 static const struct of_device_id armv6_pmu_of_device_ids[] = {
414 	{.compatible = "arm,arm1176-pmu",	.data = armv6_1176_pmu_init},
415 	{.compatible = "arm,arm1136-pmu",	.data = armv6_1136_pmu_init},
416 	{ /* sentinel value */ }
417 };
418 
armv6_pmu_device_probe(struct platform_device * pdev)419 static int armv6_pmu_device_probe(struct platform_device *pdev)
420 {
421 	return arm_pmu_device_probe(pdev, armv6_pmu_of_device_ids, NULL);
422 }
423 
424 static struct platform_driver armv6_pmu_driver = {
425 	.driver		= {
426 		.name	= "armv6-pmu",
427 		.of_match_table = armv6_pmu_of_device_ids,
428 	},
429 	.probe		= armv6_pmu_device_probe,
430 };
431 
432 builtin_platform_driver(armv6_pmu_driver);
433