xref: /linux/drivers/hwtracing/coresight/coresight-trbe.c (revision e3966940559d52aa1800a008dcfeec218dd31f88)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * This driver enables Trace Buffer Extension (TRBE) as a per-cpu coresight
4  * sink device could then pair with an appropriate per-cpu coresight source
5  * device (ETE) thus generating required trace data. Trace can be enabled
6  * via the perf framework.
7  *
8  * The AUX buffer handling is inspired from Arm SPE PMU driver.
9  *
10  * Copyright (C) 2020 ARM Ltd.
11  *
12  * Author: Anshuman Khandual <anshuman.khandual@arm.com>
13  */
14 #define DRVNAME "arm_trbe"
15 
16 #define pr_fmt(fmt) DRVNAME ": " fmt
17 
18 #include <asm/barrier.h>
19 #include <asm/cpufeature.h>
20 #include <linux/kvm_host.h>
21 #include <linux/vmalloc.h>
22 
23 #include "coresight-self-hosted-trace.h"
24 #include "coresight-trbe.h"
25 
26 #define PERF_IDX2OFF(idx, buf) \
27 	((idx) % ((unsigned long)(buf)->nr_pages << PAGE_SHIFT))
28 
29 /*
30  * A padding packet that will help the user space tools
31  * in skipping relevant sections in the captured trace
32  * data which could not be decoded. TRBE doesn't support
33  * formatting the trace data, unlike the legacy CoreSight
34  * sinks and thus we use ETE trace packets to pad the
35  * sections of the buffer.
36  */
37 #define ETE_IGNORE_PACKET		0x70
38 
39 /*
40  * Minimum amount of meaningful trace will contain:
41  * A-Sync, Trace Info, Trace On, Address, Atom.
42  * This is about 44bytes of ETE trace. To be on
43  * the safer side, we assume 64bytes is the minimum
44  * space required for a meaningful session, before
45  * we hit a "WRAP" event.
46  */
47 #define TRBE_TRACE_MIN_BUF_SIZE		64
48 
49 enum trbe_fault_action {
50 	TRBE_FAULT_ACT_WRAP,
51 	TRBE_FAULT_ACT_SPURIOUS,
52 	TRBE_FAULT_ACT_FATAL,
53 };
54 
55 struct trbe_buf {
56 	/*
57 	 * Even though trbe_base represents vmap()
58 	 * mapped allocated buffer's start address,
59 	 * it's being as unsigned long for various
60 	 * arithmetic and comparision operations &
61 	 * also to be consistent with trbe_write &
62 	 * trbe_limit sibling pointers.
63 	 */
64 	unsigned long trbe_base;
65 	/* The base programmed into the TRBE */
66 	unsigned long trbe_hw_base;
67 	unsigned long trbe_limit;
68 	unsigned long trbe_write;
69 	int nr_pages;
70 	void **pages;
71 	bool snapshot;
72 	struct trbe_cpudata *cpudata;
73 };
74 
75 /*
76  * TRBE erratum list
77  *
78  * The errata are defined in arm64 generic cpu_errata framework.
79  * Since the errata work arounds could be applied individually
80  * to the affected CPUs inside the TRBE driver, we need to know if
81  * a given CPU is affected by the erratum. Unlike the other erratum
82  * work arounds, TRBE driver needs to check multiple times during
83  * a trace session. Thus we need a quicker access to per-CPU
84  * errata and not issue costly this_cpu_has_cap() everytime.
85  * We keep a set of the affected errata in trbe_cpudata, per TRBE.
86  *
87  * We rely on the corresponding cpucaps to be defined for a given
88  * TRBE erratum. We map the given cpucap into a TRBE internal number
89  * to make the tracking of the errata lean.
90  *
91  * This helps in :
92  *   - Not duplicating the detection logic
93  *   - Streamlined detection of erratum across the system
94  */
95 #define TRBE_WORKAROUND_OVERWRITE_FILL_MODE	0
96 #define TRBE_WORKAROUND_WRITE_OUT_OF_RANGE	1
97 #define TRBE_NEEDS_DRAIN_AFTER_DISABLE		2
98 #define TRBE_NEEDS_CTXT_SYNC_AFTER_ENABLE	3
99 #define TRBE_IS_BROKEN				4
100 
101 static int trbe_errata_cpucaps[] = {
102 	[TRBE_WORKAROUND_OVERWRITE_FILL_MODE] = ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE,
103 	[TRBE_WORKAROUND_WRITE_OUT_OF_RANGE] = ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE,
104 	[TRBE_NEEDS_DRAIN_AFTER_DISABLE] = ARM64_WORKAROUND_2064142,
105 	[TRBE_NEEDS_CTXT_SYNC_AFTER_ENABLE] = ARM64_WORKAROUND_2038923,
106 	[TRBE_IS_BROKEN] = ARM64_WORKAROUND_1902691,
107 	-1,		/* Sentinel, must be the last entry */
108 };
109 
110 /* The total number of listed errata in trbe_errata_cpucaps */
111 #define TRBE_ERRATA_MAX			(ARRAY_SIZE(trbe_errata_cpucaps) - 1)
112 
113 /*
114  * Safe limit for the number of bytes that may be overwritten
115  * when ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE is triggered.
116  */
117 #define TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES	256
118 
119 /*
120  * struct trbe_cpudata: TRBE instance specific data
121  * @trbe_flag		- TRBE dirty/access flag support
122  * @trbe_hw_align	- Actual TRBE alignment required for TRBPTR_EL1.
123  * @trbe_align		- Software alignment used for the TRBPTR_EL1.
124  * @cpu			- CPU this TRBE belongs to.
125  * @mode		- Mode of current operation. (perf/disabled)
126  * @drvdata		- TRBE specific drvdata
127  * @errata		- Bit map for the errata on this TRBE.
128  */
129 struct trbe_cpudata {
130 	bool trbe_flag;
131 	u64 trbe_hw_align;
132 	u64 trbe_align;
133 	int cpu;
134 	enum cs_mode mode;
135 	struct trbe_buf *buf;
136 	struct trbe_drvdata *drvdata;
137 	DECLARE_BITMAP(errata, TRBE_ERRATA_MAX);
138 };
139 
140 struct trbe_drvdata {
141 	struct trbe_cpudata __percpu *cpudata;
142 	struct perf_output_handle * __percpu *handle;
143 	struct hlist_node hotplug_node;
144 	int irq;
145 	cpumask_t supported_cpus;
146 	enum cpuhp_state trbe_online;
147 	struct platform_device *pdev;
148 };
149 
150 static void trbe_check_errata(struct trbe_cpudata *cpudata)
151 {
152 	int i;
153 
154 	for (i = 0; i < TRBE_ERRATA_MAX; i++) {
155 		int cap = trbe_errata_cpucaps[i];
156 
157 		if (WARN_ON_ONCE(cap < 0))
158 			return;
159 		if (this_cpu_has_cap(cap))
160 			set_bit(i, cpudata->errata);
161 	}
162 }
163 
164 static bool trbe_has_erratum(struct trbe_cpudata *cpudata, int i)
165 {
166 	return (i < TRBE_ERRATA_MAX) && test_bit(i, cpudata->errata);
167 }
168 
169 static bool trbe_may_overwrite_in_fill_mode(struct trbe_cpudata *cpudata)
170 {
171 	return trbe_has_erratum(cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE);
172 }
173 
174 static bool trbe_may_write_out_of_range(struct trbe_cpudata *cpudata)
175 {
176 	return trbe_has_erratum(cpudata, TRBE_WORKAROUND_WRITE_OUT_OF_RANGE);
177 }
178 
179 static bool trbe_needs_drain_after_disable(struct trbe_cpudata *cpudata)
180 {
181 	/*
182 	 * Errata affected TRBE implementation will need TSB CSYNC and
183 	 * DSB in order to prevent subsequent writes into certain TRBE
184 	 * system registers from being ignored and not effected.
185 	 */
186 	return trbe_has_erratum(cpudata, TRBE_NEEDS_DRAIN_AFTER_DISABLE);
187 }
188 
189 static bool trbe_needs_ctxt_sync_after_enable(struct trbe_cpudata *cpudata)
190 {
191 	/*
192 	 * Errata affected TRBE implementation will need an additional
193 	 * context synchronization in order to prevent an inconsistent
194 	 * TRBE prohibited region view on the CPU which could possibly
195 	 * corrupt the TRBE buffer or the TRBE state.
196 	 */
197 	return trbe_has_erratum(cpudata, TRBE_NEEDS_CTXT_SYNC_AFTER_ENABLE);
198 }
199 
200 static bool trbe_is_broken(struct trbe_cpudata *cpudata)
201 {
202 	return trbe_has_erratum(cpudata, TRBE_IS_BROKEN);
203 }
204 
205 static int trbe_alloc_node(struct perf_event *event)
206 {
207 	if (event->cpu == -1)
208 		return NUMA_NO_NODE;
209 	return cpu_to_node(event->cpu);
210 }
211 
212 static void trbe_drain_buffer(void)
213 {
214 	tsb_csync();
215 	dsb(nsh);
216 }
217 
218 static void set_trbe_enabled(struct trbe_cpudata *cpudata, u64 trblimitr)
219 {
220 	/*
221 	 * Enable the TRBE without clearing LIMITPTR which
222 	 * might be required for fetching the buffer limits.
223 	 */
224 	trblimitr |= TRBLIMITR_EL1_E;
225 	write_sysreg_s(trblimitr, SYS_TRBLIMITR_EL1);
226 	kvm_enable_trbe();
227 
228 	/* Synchronize the TRBE enable event */
229 	isb();
230 
231 	if (trbe_needs_ctxt_sync_after_enable(cpudata))
232 		isb();
233 }
234 
235 static void set_trbe_disabled(struct trbe_cpudata *cpudata)
236 {
237 	u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
238 
239 	/*
240 	 * Disable the TRBE without clearing LIMITPTR which
241 	 * might be required for fetching the buffer limits.
242 	 */
243 	trblimitr &= ~TRBLIMITR_EL1_E;
244 	write_sysreg_s(trblimitr, SYS_TRBLIMITR_EL1);
245 	kvm_disable_trbe();
246 
247 	if (trbe_needs_drain_after_disable(cpudata))
248 		trbe_drain_buffer();
249 	isb();
250 }
251 
252 static void trbe_drain_and_disable_local(struct trbe_cpudata *cpudata)
253 {
254 	trbe_drain_buffer();
255 	set_trbe_disabled(cpudata);
256 }
257 
258 static void trbe_reset_local(struct trbe_cpudata *cpudata)
259 {
260 	write_sysreg_s(0, SYS_TRBLIMITR_EL1);
261 	isb();
262 	trbe_drain_buffer();
263 	write_sysreg_s(0, SYS_TRBPTR_EL1);
264 	write_sysreg_s(0, SYS_TRBBASER_EL1);
265 	write_sysreg_s(0, SYS_TRBSR_EL1);
266 }
267 
268 static void trbe_report_wrap_event(struct perf_output_handle *handle)
269 {
270 	/*
271 	 * Mark the buffer to indicate that there was a WRAP event by
272 	 * setting the COLLISION flag. This indicates to the user that
273 	 * the TRBE trace collection was stopped without stopping the
274 	 * ETE and thus there might be some amount of trace that was
275 	 * lost between the time the WRAP was detected and the IRQ
276 	 * was consumed by the CPU.
277 	 *
278 	 * Setting the TRUNCATED flag would move the event to STOPPED
279 	 * state unnecessarily, even when there is space left in the
280 	 * ring buffer. Using the COLLISION flag doesn't have this side
281 	 * effect. We only set TRUNCATED flag when there is no space
282 	 * left in the ring buffer.
283 	 */
284 	perf_aux_output_flag(handle, PERF_AUX_FLAG_COLLISION);
285 }
286 
287 static void trbe_stop_and_truncate_event(struct perf_output_handle *handle)
288 {
289 	struct trbe_buf *buf = etm_perf_sink_config(handle);
290 
291 	/*
292 	 * We cannot proceed with the buffer collection and we
293 	 * do not have any data for the current session. The
294 	 * etm_perf driver expects to close out the aux_buffer
295 	 * at event_stop(). So disable the TRBE here and leave
296 	 * the update_buffer() to return a 0 size.
297 	 */
298 	trbe_drain_and_disable_local(buf->cpudata);
299 	perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
300 	perf_aux_output_end(handle, 0);
301 	*this_cpu_ptr(buf->cpudata->drvdata->handle) = NULL;
302 }
303 
304 /*
305  * TRBE Buffer Management
306  *
307  * The TRBE buffer spans from the base pointer till the limit pointer. When enabled,
308  * it starts writing trace data from the write pointer onward till the limit pointer.
309  * When the write pointer reaches the address just before the limit pointer, it gets
310  * wrapped around again to the base pointer. This is called a TRBE wrap event, which
311  * generates a maintenance interrupt when operated in WRAP or FILL mode. This driver
312  * uses FILL mode, where the TRBE stops the trace collection at wrap event. The IRQ
313  * handler updates the AUX buffer and re-enables the TRBE with updated WRITE and
314  * LIMIT pointers.
315  *
316  *	Wrap around with an IRQ
317  *	------ < ------ < ------- < ----- < -----
318  *	|					|
319  *	------ > ------ > ------- > ----- > -----
320  *
321  *	+---------------+-----------------------+
322  *	|		|			|
323  *	+---------------+-----------------------+
324  *	Base Pointer	Write Pointer		Limit Pointer
325  *
326  * The base and limit pointers always needs to be PAGE_SIZE aligned. But the write
327  * pointer can be aligned to the implementation defined TRBE trace buffer alignment
328  * as captured in trbe_cpudata->trbe_align.
329  *
330  *
331  *		head		tail		wakeup
332  *	+---------------------------------------+----- ~ ~ ------
333  *	|$$$$$$$|################|$$$$$$$$$$$$$$|		|
334  *	+---------------------------------------+----- ~ ~ ------
335  *	Base Pointer	Write Pointer		Limit Pointer
336  *
337  * The perf_output_handle indices (head, tail, wakeup) are monotonically increasing
338  * values which tracks all the driver writes and user reads from the perf auxiliary
339  * buffer. Generally [head..tail] is the area where the driver can write into unless
340  * the wakeup is behind the tail. Enabled TRBE buffer span needs to be adjusted and
341  * configured depending on the perf_output_handle indices, so that the driver does
342  * not override into areas in the perf auxiliary buffer which is being or yet to be
343  * consumed from the user space. The enabled TRBE buffer area is a moving subset of
344  * the allocated perf auxiliary buffer.
345  */
346 
347 static void __trbe_pad_buf(struct trbe_buf *buf, u64 offset, int len)
348 {
349 	memset((void *)buf->trbe_base + offset, ETE_IGNORE_PACKET, len);
350 }
351 
352 static void trbe_pad_buf(struct perf_output_handle *handle, int len)
353 {
354 	struct trbe_buf *buf = etm_perf_sink_config(handle);
355 	u64 head = PERF_IDX2OFF(handle->head, buf);
356 
357 	__trbe_pad_buf(buf, head, len);
358 	if (!buf->snapshot)
359 		perf_aux_output_skip(handle, len);
360 }
361 
362 static unsigned long trbe_snapshot_offset(struct perf_output_handle *handle)
363 {
364 	struct trbe_buf *buf = etm_perf_sink_config(handle);
365 
366 	/*
367 	 * The ETE trace has alignment synchronization packets allowing
368 	 * the decoder to reset in case of an overflow or corruption.
369 	 * So we can use the entire buffer for the snapshot mode.
370 	 */
371 	return buf->nr_pages * PAGE_SIZE;
372 }
373 
374 static u64 trbe_min_trace_buf_size(struct perf_output_handle *handle)
375 {
376 	u64 size = TRBE_TRACE_MIN_BUF_SIZE;
377 	struct trbe_buf *buf = etm_perf_sink_config(handle);
378 	struct trbe_cpudata *cpudata = buf->cpudata;
379 
380 	/*
381 	 * When the TRBE is affected by an erratum that could make it
382 	 * write to the next "virtually addressed" page beyond the LIMIT.
383 	 * We need to make sure there is always a PAGE after the LIMIT,
384 	 * within the buffer. Thus we ensure there is at least an extra
385 	 * page than normal. With this we could then adjust the LIMIT
386 	 * pointer down by a PAGE later.
387 	 */
388 	if (trbe_may_write_out_of_range(cpudata))
389 		size += PAGE_SIZE;
390 	return size;
391 }
392 
393 /*
394  * TRBE Limit Calculation
395  *
396  * The following markers are used to illustrate various TRBE buffer situations.
397  *
398  * $$$$ - Data area, unconsumed captured trace data, not to be overridden
399  * #### - Free area, enabled, trace will be written
400  * %%%% - Free area, disabled, trace will not be written
401  * ==== - Free area, padded with ETE_IGNORE_PACKET, trace will be skipped
402  */
403 static unsigned long __trbe_normal_offset(struct perf_output_handle *handle)
404 {
405 	struct trbe_buf *buf = etm_perf_sink_config(handle);
406 	struct trbe_cpudata *cpudata = buf->cpudata;
407 	const u64 bufsize = buf->nr_pages * PAGE_SIZE;
408 	u64 limit = bufsize;
409 	u64 head, tail, wakeup;
410 
411 	head = PERF_IDX2OFF(handle->head, buf);
412 
413 	/*
414 	 *		head
415 	 *	------->|
416 	 *	|
417 	 *	head	TRBE align	tail
418 	 * +----|-------|---------------|-------+
419 	 * |$$$$|=======|###############|$$$$$$$|
420 	 * +----|-------|---------------|-------+
421 	 * trbe_base				trbe_base + nr_pages
422 	 *
423 	 * Perf aux buffer output head position can be misaligned depending on
424 	 * various factors including user space reads. In case misaligned, head
425 	 * needs to be aligned before TRBE can be configured. Pad the alignment
426 	 * gap with ETE_IGNORE_PACKET bytes that will be ignored by user tools
427 	 * and skip this section thus advancing the head.
428 	 */
429 	if (!IS_ALIGNED(head, cpudata->trbe_align)) {
430 		unsigned long delta = roundup(head, cpudata->trbe_align) - head;
431 
432 		delta = min(delta, handle->size);
433 		trbe_pad_buf(handle, delta);
434 		head = PERF_IDX2OFF(handle->head, buf);
435 	}
436 
437 	/*
438 	 *	head = tail (size = 0)
439 	 * +----|-------------------------------+
440 	 * |$$$$|$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$	|
441 	 * +----|-------------------------------+
442 	 * trbe_base				trbe_base + nr_pages
443 	 *
444 	 * Perf aux buffer does not have any space for the driver to write into.
445 	 */
446 	if (!handle->size)
447 		return 0;
448 
449 	/* Compute the tail and wakeup indices now that we've aligned head */
450 	tail = PERF_IDX2OFF(handle->head + handle->size, buf);
451 	wakeup = PERF_IDX2OFF(handle->wakeup, buf);
452 
453 	/*
454 	 * Lets calculate the buffer area which TRBE could write into. There
455 	 * are three possible scenarios here. Limit needs to be aligned with
456 	 * PAGE_SIZE per the TRBE requirement. Always avoid clobbering the
457 	 * unconsumed data.
458 	 *
459 	 * 1) head < tail
460 	 *
461 	 *	head			tail
462 	 * +----|-----------------------|-------+
463 	 * |$$$$|#######################|$$$$$$$|
464 	 * +----|-----------------------|-------+
465 	 * trbe_base			limit	trbe_base + nr_pages
466 	 *
467 	 * TRBE could write into [head..tail] area. Unless the tail is right at
468 	 * the end of the buffer, neither an wrap around nor an IRQ is expected
469 	 * while being enabled.
470 	 *
471 	 * 2) head == tail
472 	 *
473 	 *	head = tail (size > 0)
474 	 * +----|-------------------------------+
475 	 * |%%%%|###############################|
476 	 * +----|-------------------------------+
477 	 * trbe_base				limit = trbe_base + nr_pages
478 	 *
479 	 * TRBE should just write into [head..base + nr_pages] area even though
480 	 * the entire buffer is empty. Reason being, when the trace reaches the
481 	 * end of the buffer, it will just wrap around with an IRQ giving an
482 	 * opportunity to reconfigure the buffer.
483 	 *
484 	 * 3) tail < head
485 	 *
486 	 *	tail			head
487 	 * +----|-----------------------|-------+
488 	 * |%%%%|$$$$$$$$$$$$$$$$$$$$$$$|#######|
489 	 * +----|-----------------------|-------+
490 	 * trbe_base				limit = trbe_base + nr_pages
491 	 *
492 	 * TRBE should just write into [head..base + nr_pages] area even though
493 	 * the [trbe_base..tail] is also empty. Reason being, when the trace
494 	 * reaches the end of the buffer, it will just wrap around with an IRQ
495 	 * giving an opportunity to reconfigure the buffer.
496 	 */
497 	if (head < tail)
498 		limit = round_down(tail, PAGE_SIZE);
499 
500 	/*
501 	 * Wakeup may be arbitrarily far into the future. If it's not in the
502 	 * current generation, either we'll wrap before hitting it, or it's
503 	 * in the past and has been handled already.
504 	 *
505 	 * If there's a wakeup before we wrap, arrange to be woken up by the
506 	 * page boundary following it. Keep the tail boundary if that's lower.
507 	 *
508 	 *	head		wakeup	tail
509 	 * +----|---------------|-------|-------+
510 	 * |$$$$|###############|%%%%%%%|$$$$$$$|
511 	 * +----|---------------|-------|-------+
512 	 * trbe_base		limit		trbe_base + nr_pages
513 	 */
514 	if (handle->wakeup < (handle->head + handle->size) && head <= wakeup)
515 		limit = min(limit, round_up(wakeup, PAGE_SIZE));
516 
517 	/*
518 	 * There are two situation when this can happen i.e limit is before
519 	 * the head and hence TRBE cannot be configured.
520 	 *
521 	 * 1) head < tail (aligned down with PAGE_SIZE) and also they are both
522 	 * within the same PAGE size range.
523 	 *
524 	 *			PAGE_SIZE
525 	 *		|----------------------|
526 	 *
527 	 *		limit	head	tail
528 	 * +------------|------|--------|-------+
529 	 * |$$$$$$$$$$$$$$$$$$$|========|$$$$$$$|
530 	 * +------------|------|--------|-------+
531 	 * trbe_base				trbe_base + nr_pages
532 	 *
533 	 * 2) head < wakeup (aligned up with PAGE_SIZE) < tail and also both
534 	 * head and wakeup are within same PAGE size range.
535 	 *
536 	 *		PAGE_SIZE
537 	 *	|----------------------|
538 	 *
539 	 *	limit	head	wakeup  tail
540 	 * +----|------|-------|--------|-------+
541 	 * |$$$$$$$$$$$|=======|========|$$$$$$$|
542 	 * +----|------|-------|--------|-------+
543 	 * trbe_base				trbe_base + nr_pages
544 	 */
545 	if (limit > head)
546 		return limit;
547 
548 	trbe_pad_buf(handle, handle->size);
549 	return 0;
550 }
551 
552 static unsigned long trbe_normal_offset(struct perf_output_handle *handle)
553 {
554 	struct trbe_buf *buf = etm_perf_sink_config(handle);
555 	u64 limit = __trbe_normal_offset(handle);
556 	u64 head = PERF_IDX2OFF(handle->head, buf);
557 
558 	/*
559 	 * If the head is too close to the limit and we don't
560 	 * have space for a meaningful run, we rather pad it
561 	 * and start fresh.
562 	 *
563 	 * We might have to do this more than once to make sure
564 	 * we have enough required space.
565 	 */
566 	while (limit && ((limit - head) < trbe_min_trace_buf_size(handle))) {
567 		trbe_pad_buf(handle, limit - head);
568 		limit = __trbe_normal_offset(handle);
569 		head = PERF_IDX2OFF(handle->head, buf);
570 	}
571 	return limit;
572 }
573 
574 static unsigned long compute_trbe_buffer_limit(struct perf_output_handle *handle)
575 {
576 	struct trbe_buf *buf = etm_perf_sink_config(handle);
577 	unsigned long offset;
578 
579 	if (buf->snapshot)
580 		offset = trbe_snapshot_offset(handle);
581 	else
582 		offset = trbe_normal_offset(handle);
583 	return buf->trbe_base + offset;
584 }
585 
586 static void clr_trbe_status(void)
587 {
588 	u64 trbsr = read_sysreg_s(SYS_TRBSR_EL1);
589 
590 	WARN_ON(is_trbe_enabled());
591 	trbsr &= ~TRBSR_EL1_IRQ;
592 	trbsr &= ~TRBSR_EL1_TRG;
593 	trbsr &= ~TRBSR_EL1_WRAP;
594 	trbsr &= ~TRBSR_EL1_EC_MASK;
595 	trbsr &= ~TRBSR_EL1_BSC_MASK;
596 	trbsr &= ~TRBSR_EL1_S;
597 	write_sysreg_s(trbsr, SYS_TRBSR_EL1);
598 }
599 
600 static void set_trbe_limit_pointer_enabled(struct trbe_buf *buf)
601 {
602 	u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
603 	unsigned long addr = buf->trbe_limit;
604 
605 	WARN_ON(!IS_ALIGNED(addr, (1UL << TRBLIMITR_EL1_LIMIT_SHIFT)));
606 	WARN_ON(!IS_ALIGNED(addr, PAGE_SIZE));
607 
608 	trblimitr &= ~TRBLIMITR_EL1_nVM;
609 	trblimitr &= ~TRBLIMITR_EL1_FM_MASK;
610 	trblimitr &= ~TRBLIMITR_EL1_TM_MASK;
611 	trblimitr &= ~TRBLIMITR_EL1_LIMIT_MASK;
612 
613 	/*
614 	 * Fill trace buffer mode is used here while configuring the
615 	 * TRBE for trace capture. In this particular mode, the trace
616 	 * collection is stopped and a maintenance interrupt is raised
617 	 * when the current write pointer wraps. This pause in trace
618 	 * collection gives the software an opportunity to capture the
619 	 * trace data in the interrupt handler, before reconfiguring
620 	 * the TRBE.
621 	 */
622 	trblimitr |= (TRBLIMITR_EL1_FM_FILL << TRBLIMITR_EL1_FM_SHIFT) &
623 		     TRBLIMITR_EL1_FM_MASK;
624 
625 	/*
626 	 * Trigger mode is not used here while configuring the TRBE for
627 	 * the trace capture. Hence just keep this in the ignore mode.
628 	 */
629 	trblimitr |= (TRBLIMITR_EL1_TM_IGNR << TRBLIMITR_EL1_TM_SHIFT) &
630 		     TRBLIMITR_EL1_TM_MASK;
631 	trblimitr |= (addr & PAGE_MASK);
632 	set_trbe_enabled(buf->cpudata, trblimitr);
633 }
634 
635 static void trbe_enable_hw(struct trbe_buf *buf)
636 {
637 	WARN_ON(buf->trbe_hw_base < buf->trbe_base);
638 	WARN_ON(buf->trbe_write < buf->trbe_hw_base);
639 	WARN_ON(buf->trbe_write >= buf->trbe_limit);
640 	set_trbe_disabled(buf->cpudata);
641 	clr_trbe_status();
642 	set_trbe_base_pointer(buf->trbe_hw_base);
643 	set_trbe_write_pointer(buf->trbe_write);
644 
645 	/*
646 	 * Synchronize all the register updates
647 	 * till now before enabling the TRBE.
648 	 */
649 	isb();
650 	set_trbe_limit_pointer_enabled(buf);
651 }
652 
653 static enum trbe_fault_action trbe_get_fault_act(struct perf_output_handle *handle,
654 						 u64 trbsr)
655 {
656 	int ec = get_trbe_ec(trbsr);
657 	int bsc = get_trbe_bsc(trbsr);
658 	struct trbe_buf *buf = etm_perf_sink_config(handle);
659 	struct trbe_cpudata *cpudata = buf->cpudata;
660 
661 	WARN_ON(is_trbe_running(trbsr));
662 	if (is_trbe_trg(trbsr) || is_trbe_abort(trbsr))
663 		return TRBE_FAULT_ACT_FATAL;
664 
665 	if ((ec == TRBE_EC_STAGE1_ABORT) || (ec == TRBE_EC_STAGE2_ABORT))
666 		return TRBE_FAULT_ACT_FATAL;
667 
668 	/*
669 	 * If the trbe is affected by TRBE_WORKAROUND_OVERWRITE_FILL_MODE,
670 	 * it might write data after a WRAP event in the fill mode.
671 	 * Thus the check TRBPTR == TRBBASER will not be honored.
672 	 */
673 	if ((is_trbe_wrap(trbsr) && (ec == TRBE_EC_OTHERS) && (bsc == TRBE_BSC_FILLED)) &&
674 	    (trbe_may_overwrite_in_fill_mode(cpudata) ||
675 	     get_trbe_write_pointer() == get_trbe_base_pointer()))
676 		return TRBE_FAULT_ACT_WRAP;
677 
678 	return TRBE_FAULT_ACT_SPURIOUS;
679 }
680 
681 static unsigned long trbe_get_trace_size(struct perf_output_handle *handle,
682 					 struct trbe_buf *buf, bool wrap)
683 {
684 	u64 write;
685 	u64 start_off, end_off;
686 	u64 size;
687 	u64 overwrite_skip = TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES;
688 
689 	/*
690 	 * If the TRBE has wrapped around the write pointer has
691 	 * wrapped and should be treated as limit.
692 	 *
693 	 * When the TRBE is affected by TRBE_WORKAROUND_WRITE_OUT_OF_RANGE,
694 	 * it may write upto 64bytes beyond the "LIMIT". The driver already
695 	 * keeps a valid page next to the LIMIT and we could potentially
696 	 * consume the trace data that may have been collected there. But we
697 	 * cannot be really sure it is available, and the TRBPTR may not
698 	 * indicate the same. Also, affected cores are also affected by another
699 	 * erratum which forces the PAGE_SIZE alignment on the TRBPTR, and thus
700 	 * could potentially pad an entire PAGE_SIZE - 64bytes, to get those
701 	 * 64bytes. Thus we ignore the potential triggering of the erratum
702 	 * on WRAP and limit the data to LIMIT.
703 	 */
704 	if (wrap)
705 		write = get_trbe_limit_pointer();
706 	else
707 		write = get_trbe_write_pointer();
708 
709 	/*
710 	 * TRBE may use a different base address than the base
711 	 * of the ring buffer. Thus use the beginning of the ring
712 	 * buffer to compute the offsets.
713 	 */
714 	end_off = write - buf->trbe_base;
715 	start_off = PERF_IDX2OFF(handle->head, buf);
716 
717 	if (WARN_ON_ONCE(end_off < start_off))
718 		return 0;
719 
720 	size = end_off - start_off;
721 	/*
722 	 * If the TRBE is affected by the following erratum, we must fill
723 	 * the space we skipped with IGNORE packets. And we are always
724 	 * guaranteed to have at least a PAGE_SIZE space in the buffer.
725 	 */
726 	if (trbe_has_erratum(buf->cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE) &&
727 	    !WARN_ON(size < overwrite_skip))
728 		__trbe_pad_buf(buf, start_off, overwrite_skip);
729 
730 	return size;
731 }
732 
733 static void *arm_trbe_alloc_buffer(struct coresight_device *csdev,
734 				   struct perf_event *event, void **pages,
735 				   int nr_pages, bool snapshot)
736 {
737 	struct trbe_buf *buf;
738 	struct page **pglist;
739 	int i;
740 
741 	/*
742 	 * TRBE LIMIT and TRBE WRITE pointers must be page aligned. But with
743 	 * just a single page, there would not be any room left while writing
744 	 * into a partially filled TRBE buffer after the page size alignment.
745 	 * Hence restrict the minimum buffer size as two pages.
746 	 */
747 	if (nr_pages < 2)
748 		return NULL;
749 
750 	buf = kzalloc_node(sizeof(*buf), GFP_KERNEL, trbe_alloc_node(event));
751 	if (!buf)
752 		return NULL;
753 
754 	pglist = kcalloc(nr_pages, sizeof(*pglist), GFP_KERNEL);
755 	if (!pglist) {
756 		kfree(buf);
757 		return NULL;
758 	}
759 
760 	for (i = 0; i < nr_pages; i++)
761 		pglist[i] = virt_to_page(pages[i]);
762 
763 	buf->trbe_base = (unsigned long)vmap(pglist, nr_pages, VM_MAP, PAGE_KERNEL);
764 	if (!buf->trbe_base) {
765 		kfree(pglist);
766 		kfree(buf);
767 		return NULL;
768 	}
769 	buf->trbe_limit = buf->trbe_base + nr_pages * PAGE_SIZE;
770 	buf->trbe_write = buf->trbe_base;
771 	buf->snapshot = snapshot;
772 	buf->nr_pages = nr_pages;
773 	buf->pages = pages;
774 	kfree(pglist);
775 	return buf;
776 }
777 
778 static void arm_trbe_free_buffer(void *config)
779 {
780 	struct trbe_buf *buf = config;
781 
782 	vunmap((void *)buf->trbe_base);
783 	kfree(buf);
784 }
785 
786 static unsigned long arm_trbe_update_buffer(struct coresight_device *csdev,
787 					    struct perf_output_handle *handle,
788 					    void *config)
789 {
790 	struct trbe_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
791 	struct trbe_cpudata *cpudata = dev_get_drvdata(&csdev->dev);
792 	struct trbe_buf *buf = config;
793 	enum trbe_fault_action act;
794 	unsigned long size, status;
795 	unsigned long flags;
796 	bool wrap = false;
797 
798 	WARN_ON(buf->cpudata != cpudata);
799 	WARN_ON(cpudata->cpu != smp_processor_id());
800 	WARN_ON(cpudata->drvdata != drvdata);
801 	if (cpudata->mode != CS_MODE_PERF)
802 		return 0;
803 
804 	/*
805 	 * We are about to disable the TRBE. And this could in turn
806 	 * fill up the buffer triggering, an IRQ. This could be consumed
807 	 * by the PE asynchronously, causing a race here against
808 	 * the IRQ handler in closing out the handle. So, let us
809 	 * make sure the IRQ can't trigger while we are collecting
810 	 * the buffer. We also make sure that a WRAP event is handled
811 	 * accordingly.
812 	 */
813 	local_irq_save(flags);
814 
815 	/*
816 	 * If the TRBE was disabled due to lack of space in the AUX buffer or a
817 	 * spurious fault, the driver leaves it disabled, truncating the buffer.
818 	 * Since the etm_perf driver expects to close out the AUX buffer, the
819 	 * driver skips it. Thus, just pass in 0 size here to indicate that the
820 	 * buffer was truncated.
821 	 */
822 	if (!is_trbe_enabled()) {
823 		size = 0;
824 		goto done;
825 	}
826 	/*
827 	 * perf handle structure needs to be shared with the TRBE IRQ handler for
828 	 * capturing trace data and restarting the handle. There is a probability
829 	 * of an undefined reference based crash when etm event is being stopped
830 	 * while a TRBE IRQ also getting processed. This happens due the release
831 	 * of perf handle via perf_aux_output_end() in etm_event_stop(). Stopping
832 	 * the TRBE here will ensure that no IRQ could be generated when the perf
833 	 * handle gets freed in etm_event_stop().
834 	 */
835 	trbe_drain_and_disable_local(cpudata);
836 
837 	/* Check if there is a pending interrupt and handle it here */
838 	status = read_sysreg_s(SYS_TRBSR_EL1);
839 	if (is_trbe_irq(status)) {
840 
841 		/*
842 		 * Now that we are handling the IRQ here, clear the IRQ
843 		 * from the status, to let the irq handler know that it
844 		 * is taken care of.
845 		 */
846 		clr_trbe_irq();
847 		isb();
848 
849 		act = trbe_get_fault_act(handle, status);
850 		/*
851 		 * If this was not due to a WRAP event, we have some
852 		 * errors and as such buffer is empty.
853 		 */
854 		if (act != TRBE_FAULT_ACT_WRAP) {
855 			size = 0;
856 			goto done;
857 		}
858 
859 		trbe_report_wrap_event(handle);
860 		wrap = true;
861 	}
862 
863 	size = trbe_get_trace_size(handle, buf, wrap);
864 
865 done:
866 	local_irq_restore(flags);
867 
868 	if (buf->snapshot)
869 		handle->head += size;
870 	return size;
871 }
872 
873 
874 static int trbe_apply_work_around_before_enable(struct trbe_buf *buf)
875 {
876 	/*
877 	 * TRBE_WORKAROUND_OVERWRITE_FILL_MODE causes the TRBE to overwrite a few cache
878 	 * line size from the "TRBBASER_EL1" in the event of a "FILL".
879 	 * Thus, we could loose some amount of the trace at the base.
880 	 *
881 	 * Before Fix:
882 	 *
883 	 *  normal-BASE     head (normal-TRBPTR)         tail (normal-LIMIT)
884 	 *  |                   \/                       /
885 	 *   -------------------------------------------------------------
886 	 *  |   Pg0      |   Pg1       |           |          |  PgN     |
887 	 *   -------------------------------------------------------------
888 	 *
889 	 * In the normal course of action, we would set the TRBBASER to the
890 	 * beginning of the ring-buffer (normal-BASE). But with the erratum,
891 	 * the TRBE could overwrite the contents at the "normal-BASE", after
892 	 * hitting the "normal-LIMIT", since it doesn't stop as expected. And
893 	 * this is wrong. This could result in overwriting trace collected in
894 	 * one of the previous runs, being consumed by the user. So we must
895 	 * always make sure that the TRBBASER is within the region
896 	 * [head, head+size]. Note that TRBBASER must be PAGE aligned,
897 	 *
898 	 *  After moving the BASE:
899 	 *
900 	 *  normal-BASE     head (normal-TRBPTR)         tail (normal-LIMIT)
901 	 *  |                   \/                       /
902 	 *   -------------------------------------------------------------
903 	 *  |         |          |xyzdef.     |..   tuvw|                |
904 	 *   -------------------------------------------------------------
905 	 *                      /
906 	 *              New-BASER
907 	 *
908 	 * Also, we would set the TRBPTR to head (after adjusting for
909 	 * alignment) at normal-PTR. This would mean that the last few bytes
910 	 * of the trace (say, "xyz") might overwrite the first few bytes of
911 	 * trace written ("abc"). More importantly they will appear in what
912 	 * userspace sees as the beginning of the trace, which is wrong. We may
913 	 * not always have space to move the latest trace "xyz" to the correct
914 	 * order as it must appear beyond the LIMIT. (i.e, [head..head+size]).
915 	 * Thus it is easier to ignore those bytes than to complicate the
916 	 * driver to move it, assuming that the erratum was triggered and
917 	 * doing additional checks to see if there is indeed allowed space at
918 	 * TRBLIMITR.LIMIT.
919 	 *
920 	 *  Thus the full workaround will move the BASE and the PTR and would
921 	 *  look like (after padding at the skipped bytes at the end of
922 	 *  session) :
923 	 *
924 	 *  normal-BASE     head (normal-TRBPTR)         tail (normal-LIMIT)
925 	 *  |                   \/                       /
926 	 *   -------------------------------------------------------------
927 	 *  |         |          |///abc..     |..  rst|                |
928 	 *   -------------------------------------------------------------
929 	 *                      /    |
930 	 *              New-BASER    New-TRBPTR
931 	 *
932 	 * To summarize, with the work around:
933 	 *
934 	 *  - We always align the offset for the next session to PAGE_SIZE
935 	 *    (This is to ensure we can program the TRBBASER to this offset
936 	 *    within the region [head...head+size]).
937 	 *
938 	 *  - At TRBE enable:
939 	 *     - Set the TRBBASER to the page aligned offset of the current
940 	 *       proposed write offset. (which is guaranteed to be aligned
941 	 *       as above)
942 	 *     - Move the TRBPTR to skip first 256bytes (that might be
943 	 *       overwritten with the erratum). This ensures that the trace
944 	 *       generated in the session is not re-written.
945 	 *
946 	 *  - At trace collection:
947 	 *     - Pad the 256bytes skipped above again with IGNORE packets.
948 	 */
949 	if (trbe_has_erratum(buf->cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE)) {
950 		if (WARN_ON(!IS_ALIGNED(buf->trbe_write, PAGE_SIZE)))
951 			return -EINVAL;
952 		buf->trbe_hw_base = buf->trbe_write;
953 		buf->trbe_write += TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES;
954 	}
955 
956 	/*
957 	 * TRBE_WORKAROUND_WRITE_OUT_OF_RANGE could cause the TRBE to write to
958 	 * the next page after the TRBLIMITR.LIMIT. For perf, the "next page"
959 	 * may be:
960 	 *     - The page beyond the ring buffer. This could mean, TRBE could
961 	 *       corrupt another entity (kernel / user)
962 	 *     - A portion of the "ring buffer" consumed by the userspace.
963 	 *       i.e, a page outisde [head, head + size].
964 	 *
965 	 * We work around this by:
966 	 *     - Making sure that we have at least an extra space of PAGE left
967 	 *       in the ring buffer [head, head + size], than we normally do
968 	 *       without the erratum. See trbe_min_trace_buf_size().
969 	 *
970 	 *     - Adjust the TRBLIMITR.LIMIT to leave the extra PAGE outside
971 	 *       the TRBE's range (i.e [TRBBASER, TRBLIMITR.LIMI] ).
972 	 */
973 	if (trbe_has_erratum(buf->cpudata, TRBE_WORKAROUND_WRITE_OUT_OF_RANGE)) {
974 		s64 space = buf->trbe_limit - buf->trbe_write;
975 		/*
976 		 * We must have more than a PAGE_SIZE worth space in the proposed
977 		 * range for the TRBE.
978 		 */
979 		if (WARN_ON(space <= PAGE_SIZE ||
980 			    !IS_ALIGNED(buf->trbe_limit, PAGE_SIZE)))
981 			return -EINVAL;
982 		buf->trbe_limit -= PAGE_SIZE;
983 	}
984 
985 	return 0;
986 }
987 
988 static int __arm_trbe_enable(struct trbe_buf *buf,
989 			     struct perf_output_handle *handle)
990 {
991 	int ret = 0;
992 
993 	perf_aux_output_flag(handle, PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
994 	buf->trbe_limit = compute_trbe_buffer_limit(handle);
995 	buf->trbe_write = buf->trbe_base + PERF_IDX2OFF(handle->head, buf);
996 	if (buf->trbe_limit == buf->trbe_base) {
997 		ret = -ENOSPC;
998 		goto err;
999 	}
1000 	/* Set the base of the TRBE to the buffer base */
1001 	buf->trbe_hw_base = buf->trbe_base;
1002 
1003 	ret = trbe_apply_work_around_before_enable(buf);
1004 	if (ret)
1005 		goto err;
1006 
1007 	*this_cpu_ptr(buf->cpudata->drvdata->handle) = handle;
1008 	trbe_enable_hw(buf);
1009 	return 0;
1010 err:
1011 	trbe_stop_and_truncate_event(handle);
1012 	return ret;
1013 }
1014 
1015 static int arm_trbe_enable(struct coresight_device *csdev, enum cs_mode mode,
1016 			   void *data)
1017 {
1018 	struct trbe_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
1019 	struct trbe_cpudata *cpudata = dev_get_drvdata(&csdev->dev);
1020 	struct perf_output_handle *handle = data;
1021 	struct trbe_buf *buf = etm_perf_sink_config(handle);
1022 
1023 	WARN_ON(cpudata->cpu != smp_processor_id());
1024 	WARN_ON(cpudata->drvdata != drvdata);
1025 	if (mode != CS_MODE_PERF)
1026 		return -EINVAL;
1027 
1028 	cpudata->buf = buf;
1029 	cpudata->mode = mode;
1030 	buf->cpudata = cpudata;
1031 
1032 	return __arm_trbe_enable(buf, handle);
1033 }
1034 
1035 static int arm_trbe_disable(struct coresight_device *csdev)
1036 {
1037 	struct trbe_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
1038 	struct trbe_cpudata *cpudata = dev_get_drvdata(&csdev->dev);
1039 	struct trbe_buf *buf = cpudata->buf;
1040 
1041 	WARN_ON(buf->cpudata != cpudata);
1042 	WARN_ON(cpudata->cpu != smp_processor_id());
1043 	WARN_ON(cpudata->drvdata != drvdata);
1044 	if (cpudata->mode != CS_MODE_PERF)
1045 		return -EINVAL;
1046 
1047 	trbe_drain_and_disable_local(cpudata);
1048 	buf->cpudata = NULL;
1049 	cpudata->buf = NULL;
1050 	cpudata->mode = CS_MODE_DISABLED;
1051 	return 0;
1052 }
1053 
1054 static void trbe_handle_spurious(struct perf_output_handle *handle)
1055 {
1056 	struct trbe_buf *buf = etm_perf_sink_config(handle);
1057 	u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
1058 
1059 	/*
1060 	 * If the IRQ was spurious, simply re-enable the TRBE
1061 	 * back without modifying the buffer parameters to
1062 	 * retain the trace collected so far.
1063 	 */
1064 	set_trbe_enabled(buf->cpudata, trblimitr);
1065 }
1066 
1067 static int trbe_handle_overflow(struct perf_output_handle *handle)
1068 {
1069 	struct perf_event *event = handle->event;
1070 	struct trbe_buf *buf = etm_perf_sink_config(handle);
1071 	unsigned long size;
1072 	struct etm_event_data *event_data;
1073 
1074 	size = trbe_get_trace_size(handle, buf, true);
1075 	if (buf->snapshot)
1076 		handle->head += size;
1077 
1078 	trbe_report_wrap_event(handle);
1079 	perf_aux_output_end(handle, size);
1080 	event_data = perf_aux_output_begin(handle, event);
1081 	if (!event_data) {
1082 		/*
1083 		 * We are unable to restart the trace collection,
1084 		 * thus leave the TRBE disabled. The etm-perf driver
1085 		 * is able to detect this with a disconnected handle
1086 		 * (handle->event = NULL).
1087 		 */
1088 		trbe_drain_and_disable_local(buf->cpudata);
1089 		*this_cpu_ptr(buf->cpudata->drvdata->handle) = NULL;
1090 		return -EINVAL;
1091 	}
1092 
1093 	return __arm_trbe_enable(buf, handle);
1094 }
1095 
1096 static bool is_perf_trbe(struct perf_output_handle *handle)
1097 {
1098 	struct trbe_buf *buf = etm_perf_sink_config(handle);
1099 	struct trbe_cpudata *cpudata = buf->cpudata;
1100 	struct trbe_drvdata *drvdata = cpudata->drvdata;
1101 	int cpu = smp_processor_id();
1102 
1103 	WARN_ON(buf->trbe_hw_base != get_trbe_base_pointer());
1104 	WARN_ON(buf->trbe_limit != get_trbe_limit_pointer());
1105 
1106 	if (cpudata->mode != CS_MODE_PERF)
1107 		return false;
1108 
1109 	if (cpudata->cpu != cpu)
1110 		return false;
1111 
1112 	if (!cpumask_test_cpu(cpu, &drvdata->supported_cpus))
1113 		return false;
1114 
1115 	return true;
1116 }
1117 
1118 static u64 cpu_prohibit_trace(void)
1119 {
1120 	u64 trfcr = read_trfcr();
1121 
1122 	/* Prohibit tracing at EL0 & the kernel EL */
1123 	write_trfcr(trfcr & ~(TRFCR_EL1_ExTRE | TRFCR_EL1_E0TRE));
1124 	/* Return the original value of the TRFCR */
1125 	return trfcr;
1126 }
1127 
1128 static irqreturn_t arm_trbe_irq_handler(int irq, void *dev)
1129 {
1130 	struct perf_output_handle **handle_ptr = dev;
1131 	struct perf_output_handle *handle = *handle_ptr;
1132 	struct trbe_buf *buf = etm_perf_sink_config(handle);
1133 	enum trbe_fault_action act;
1134 	u64 status;
1135 	bool truncated = false;
1136 	u64 trfcr;
1137 
1138 	/* Reads to TRBSR_EL1 is fine when TRBE is active */
1139 	status = read_sysreg_s(SYS_TRBSR_EL1);
1140 	/*
1141 	 * If the pending IRQ was handled by update_buffer callback
1142 	 * we have nothing to do here.
1143 	 */
1144 	if (!is_trbe_irq(status))
1145 		return IRQ_NONE;
1146 
1147 	/* Prohibit the CPU from tracing before we disable the TRBE */
1148 	trfcr = cpu_prohibit_trace();
1149 	/*
1150 	 * Ensure the trace is visible to the CPUs and
1151 	 * any external aborts have been resolved.
1152 	 */
1153 	trbe_drain_and_disable_local(buf->cpudata);
1154 	clr_trbe_irq();
1155 	isb();
1156 
1157 	if (WARN_ON_ONCE(!handle) || !perf_get_aux(handle))
1158 		return IRQ_NONE;
1159 
1160 	if (!is_perf_trbe(handle))
1161 		return IRQ_NONE;
1162 
1163 	act = trbe_get_fault_act(handle, status);
1164 	switch (act) {
1165 	case TRBE_FAULT_ACT_WRAP:
1166 		truncated = !!trbe_handle_overflow(handle);
1167 		break;
1168 	case TRBE_FAULT_ACT_SPURIOUS:
1169 		trbe_handle_spurious(handle);
1170 		break;
1171 	case TRBE_FAULT_ACT_FATAL:
1172 		trbe_stop_and_truncate_event(handle);
1173 		truncated = true;
1174 		break;
1175 	}
1176 
1177 	/*
1178 	 * If the buffer was truncated, ensure perf callbacks
1179 	 * have completed, which will disable the event.
1180 	 *
1181 	 * Otherwise, restore the trace filter controls to
1182 	 * allow the tracing.
1183 	 */
1184 	if (truncated)
1185 		irq_work_run();
1186 	else
1187 		write_trfcr(trfcr);
1188 
1189 	return IRQ_HANDLED;
1190 }
1191 
1192 static const struct coresight_ops_sink arm_trbe_sink_ops = {
1193 	.enable		= arm_trbe_enable,
1194 	.disable	= arm_trbe_disable,
1195 	.alloc_buffer	= arm_trbe_alloc_buffer,
1196 	.free_buffer	= arm_trbe_free_buffer,
1197 	.update_buffer	= arm_trbe_update_buffer,
1198 };
1199 
1200 static const struct coresight_ops arm_trbe_cs_ops = {
1201 	.sink_ops	= &arm_trbe_sink_ops,
1202 };
1203 
1204 static ssize_t align_show(struct device *dev, struct device_attribute *attr, char *buf)
1205 {
1206 	struct trbe_cpudata *cpudata = dev_get_drvdata(dev);
1207 
1208 	return sprintf(buf, "%llx\n", cpudata->trbe_hw_align);
1209 }
1210 static DEVICE_ATTR_RO(align);
1211 
1212 static ssize_t flag_show(struct device *dev, struct device_attribute *attr, char *buf)
1213 {
1214 	struct trbe_cpudata *cpudata = dev_get_drvdata(dev);
1215 
1216 	return sprintf(buf, "%d\n", cpudata->trbe_flag);
1217 }
1218 static DEVICE_ATTR_RO(flag);
1219 
1220 static struct attribute *arm_trbe_attrs[] = {
1221 	&dev_attr_align.attr,
1222 	&dev_attr_flag.attr,
1223 	NULL,
1224 };
1225 
1226 static const struct attribute_group arm_trbe_group = {
1227 	.attrs = arm_trbe_attrs,
1228 };
1229 
1230 static const struct attribute_group *arm_trbe_groups[] = {
1231 	&arm_trbe_group,
1232 	NULL,
1233 };
1234 
1235 static void arm_trbe_enable_cpu(void *info)
1236 {
1237 	struct trbe_drvdata *drvdata = info;
1238 	struct trbe_cpudata *cpudata = this_cpu_ptr(drvdata->cpudata);
1239 
1240 	trbe_reset_local(cpudata);
1241 	enable_percpu_irq(drvdata->irq, IRQ_TYPE_NONE);
1242 }
1243 
1244 static void arm_trbe_disable_cpu(void *info)
1245 {
1246 	struct trbe_drvdata *drvdata = info;
1247 	struct trbe_cpudata *cpudata = this_cpu_ptr(drvdata->cpudata);
1248 
1249 	disable_percpu_irq(drvdata->irq);
1250 	trbe_reset_local(cpudata);
1251 }
1252 
1253 
1254 static void arm_trbe_register_coresight_cpu(struct trbe_drvdata *drvdata, int cpu)
1255 {
1256 	struct trbe_cpudata *cpudata = per_cpu_ptr(drvdata->cpudata, cpu);
1257 	struct coresight_device *trbe_csdev = coresight_get_percpu_sink(cpu);
1258 	struct coresight_desc desc = { 0 };
1259 	struct device *dev;
1260 
1261 	if (WARN_ON(trbe_csdev))
1262 		return;
1263 
1264 	/* If the TRBE was not probed on the CPU, we shouldn't be here */
1265 	if (WARN_ON(!cpudata->drvdata))
1266 		return;
1267 
1268 	dev = &cpudata->drvdata->pdev->dev;
1269 	desc.name = devm_kasprintf(dev, GFP_KERNEL, "trbe%d", cpu);
1270 	if (!desc.name)
1271 		goto cpu_clear;
1272 	/*
1273 	 * TRBE coresight devices do not need regular connections
1274 	 * information, as the paths get built between all percpu
1275 	 * source and their respective percpu sink devices. Though
1276 	 * coresight_register() expect device connections via the
1277 	 * platform_data, which TRBE devices do not have. As they
1278 	 * are not real ACPI devices, coresight_get_platform_data()
1279 	 * ends up failing. Instead let's allocate a dummy zeroed
1280 	 * coresight_platform_data structure and assign that back
1281 	 * into the device for that purpose.
1282 	 */
1283 	desc.pdata = devm_kzalloc(dev, sizeof(*desc.pdata), GFP_KERNEL);
1284 	if (!desc.pdata)
1285 		goto cpu_clear;
1286 
1287 	desc.type = CORESIGHT_DEV_TYPE_SINK;
1288 	desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_PERCPU_SYSMEM;
1289 	desc.ops = &arm_trbe_cs_ops;
1290 	desc.groups = arm_trbe_groups;
1291 	desc.dev = dev;
1292 	trbe_csdev = coresight_register(&desc);
1293 	if (IS_ERR(trbe_csdev))
1294 		goto cpu_clear;
1295 
1296 	dev_set_drvdata(&trbe_csdev->dev, cpudata);
1297 	coresight_set_percpu_sink(cpu, trbe_csdev);
1298 	return;
1299 cpu_clear:
1300 	cpumask_clear_cpu(cpu, &drvdata->supported_cpus);
1301 }
1302 
1303 /*
1304  * Must be called with preemption disabled, for trbe_check_errata().
1305  */
1306 static void arm_trbe_probe_cpu(void *info)
1307 {
1308 	struct trbe_drvdata *drvdata = info;
1309 	int cpu = smp_processor_id();
1310 	struct trbe_cpudata *cpudata = per_cpu_ptr(drvdata->cpudata, cpu);
1311 	u64 trbidr;
1312 
1313 	if (WARN_ON(!cpudata))
1314 		goto cpu_clear;
1315 
1316 	if (!is_trbe_available()) {
1317 		pr_err("TRBE is not implemented on cpu %d\n", cpu);
1318 		goto cpu_clear;
1319 	}
1320 
1321 	trbidr = read_sysreg_s(SYS_TRBIDR_EL1);
1322 	if (!is_trbe_programmable(trbidr)) {
1323 		pr_err("TRBE is owned in higher exception level on cpu %d\n", cpu);
1324 		goto cpu_clear;
1325 	}
1326 
1327 	cpudata->trbe_hw_align = 1ULL << get_trbe_address_align(trbidr);
1328 	if (cpudata->trbe_hw_align > SZ_2K) {
1329 		pr_err("Unsupported alignment on cpu %d\n", cpu);
1330 		goto cpu_clear;
1331 	}
1332 
1333 	/*
1334 	 * Run the TRBE erratum checks, now that we know
1335 	 * this instance is about to be registered.
1336 	 */
1337 	trbe_check_errata(cpudata);
1338 
1339 	if (trbe_is_broken(cpudata)) {
1340 		pr_err("Disabling TRBE on cpu%d due to erratum\n", cpu);
1341 		goto cpu_clear;
1342 	}
1343 
1344 	/*
1345 	 * If the TRBE is affected by erratum TRBE_WORKAROUND_OVERWRITE_FILL_MODE,
1346 	 * we must always program the TBRPTR_EL1, 256bytes from a page
1347 	 * boundary, with TRBBASER_EL1 set to the page, to prevent
1348 	 * TRBE over-writing 256bytes at TRBBASER_EL1 on FILL event.
1349 	 *
1350 	 * Thus make sure we always align our write pointer to a PAGE_SIZE,
1351 	 * which also guarantees that we have at least a PAGE_SIZE space in
1352 	 * the buffer (TRBLIMITR is PAGE aligned) and thus we can skip
1353 	 * the required bytes at the base.
1354 	 */
1355 	if (trbe_may_overwrite_in_fill_mode(cpudata))
1356 		cpudata->trbe_align = PAGE_SIZE;
1357 	else
1358 		cpudata->trbe_align = cpudata->trbe_hw_align;
1359 
1360 	cpudata->trbe_flag = get_trbe_flag_update(trbidr);
1361 	cpudata->cpu = cpu;
1362 	cpudata->drvdata = drvdata;
1363 	return;
1364 cpu_clear:
1365 	cpumask_clear_cpu(cpu, &drvdata->supported_cpus);
1366 }
1367 
1368 static void arm_trbe_remove_coresight_cpu(struct trbe_drvdata *drvdata, int cpu)
1369 {
1370 	struct coresight_device *trbe_csdev = coresight_get_percpu_sink(cpu);
1371 
1372 	if (trbe_csdev) {
1373 		coresight_unregister(trbe_csdev);
1374 		coresight_set_percpu_sink(cpu, NULL);
1375 	}
1376 }
1377 
1378 static int arm_trbe_probe_coresight(struct trbe_drvdata *drvdata)
1379 {
1380 	int cpu;
1381 
1382 	drvdata->cpudata = alloc_percpu(typeof(*drvdata->cpudata));
1383 	if (!drvdata->cpudata)
1384 		return -ENOMEM;
1385 
1386 	for_each_cpu(cpu, &drvdata->supported_cpus) {
1387 		/* If we fail to probe the CPU, let us defer it to hotplug callbacks */
1388 		if (smp_call_function_single(cpu, arm_trbe_probe_cpu, drvdata, 1))
1389 			continue;
1390 		if (cpumask_test_cpu(cpu, &drvdata->supported_cpus))
1391 			arm_trbe_register_coresight_cpu(drvdata, cpu);
1392 		if (cpumask_test_cpu(cpu, &drvdata->supported_cpus))
1393 			smp_call_function_single(cpu, arm_trbe_enable_cpu, drvdata, 1);
1394 	}
1395 	return 0;
1396 }
1397 
1398 static int arm_trbe_remove_coresight(struct trbe_drvdata *drvdata)
1399 {
1400 	int cpu;
1401 
1402 	for_each_cpu(cpu, &drvdata->supported_cpus) {
1403 		smp_call_function_single(cpu, arm_trbe_disable_cpu, drvdata, 1);
1404 		arm_trbe_remove_coresight_cpu(drvdata, cpu);
1405 	}
1406 	free_percpu(drvdata->cpudata);
1407 	return 0;
1408 }
1409 
1410 static void arm_trbe_probe_hotplugged_cpu(struct trbe_drvdata *drvdata)
1411 {
1412 	preempt_disable();
1413 	arm_trbe_probe_cpu(drvdata);
1414 	preempt_enable();
1415 }
1416 
1417 static int arm_trbe_cpu_startup(unsigned int cpu, struct hlist_node *node)
1418 {
1419 	struct trbe_drvdata *drvdata = hlist_entry_safe(node, struct trbe_drvdata, hotplug_node);
1420 
1421 	if (cpumask_test_cpu(cpu, &drvdata->supported_cpus)) {
1422 
1423 		/*
1424 		 * If this CPU was not probed for TRBE,
1425 		 * initialize it now.
1426 		 */
1427 		if (!coresight_get_percpu_sink(cpu)) {
1428 			arm_trbe_probe_hotplugged_cpu(drvdata);
1429 			if (cpumask_test_cpu(cpu, &drvdata->supported_cpus))
1430 				arm_trbe_register_coresight_cpu(drvdata, cpu);
1431 			if (cpumask_test_cpu(cpu, &drvdata->supported_cpus))
1432 				arm_trbe_enable_cpu(drvdata);
1433 		} else {
1434 			arm_trbe_enable_cpu(drvdata);
1435 		}
1436 	}
1437 	return 0;
1438 }
1439 
1440 static int arm_trbe_cpu_teardown(unsigned int cpu, struct hlist_node *node)
1441 {
1442 	struct trbe_drvdata *drvdata = hlist_entry_safe(node, struct trbe_drvdata, hotplug_node);
1443 
1444 	if (cpumask_test_cpu(cpu, &drvdata->supported_cpus))
1445 		arm_trbe_disable_cpu(drvdata);
1446 	return 0;
1447 }
1448 
1449 static int arm_trbe_probe_cpuhp(struct trbe_drvdata *drvdata)
1450 {
1451 	enum cpuhp_state trbe_online;
1452 	int ret;
1453 
1454 	trbe_online = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DRVNAME,
1455 					      arm_trbe_cpu_startup, arm_trbe_cpu_teardown);
1456 	if (trbe_online < 0)
1457 		return trbe_online;
1458 
1459 	ret = cpuhp_state_add_instance(trbe_online, &drvdata->hotplug_node);
1460 	if (ret) {
1461 		cpuhp_remove_multi_state(trbe_online);
1462 		return ret;
1463 	}
1464 	drvdata->trbe_online = trbe_online;
1465 	return 0;
1466 }
1467 
1468 static void arm_trbe_remove_cpuhp(struct trbe_drvdata *drvdata)
1469 {
1470 	cpuhp_state_remove_instance(drvdata->trbe_online, &drvdata->hotplug_node);
1471 	cpuhp_remove_multi_state(drvdata->trbe_online);
1472 }
1473 
1474 static int arm_trbe_probe_irq(struct platform_device *pdev,
1475 			      struct trbe_drvdata *drvdata)
1476 {
1477 	int ret;
1478 
1479 	drvdata->irq = platform_get_irq(pdev, 0);
1480 	if (drvdata->irq < 0) {
1481 		pr_err("IRQ not found for the platform device\n");
1482 		return drvdata->irq;
1483 	}
1484 
1485 	if (!irq_is_percpu(drvdata->irq)) {
1486 		pr_err("IRQ is not a PPI\n");
1487 		return -EINVAL;
1488 	}
1489 
1490 	if (irq_get_percpu_devid_partition(drvdata->irq, &drvdata->supported_cpus))
1491 		return -EINVAL;
1492 
1493 	drvdata->handle = alloc_percpu(struct perf_output_handle *);
1494 	if (!drvdata->handle)
1495 		return -ENOMEM;
1496 
1497 	ret = request_percpu_irq(drvdata->irq, arm_trbe_irq_handler, DRVNAME, drvdata->handle);
1498 	if (ret) {
1499 		free_percpu(drvdata->handle);
1500 		return ret;
1501 	}
1502 	return 0;
1503 }
1504 
1505 static void arm_trbe_remove_irq(struct trbe_drvdata *drvdata)
1506 {
1507 	free_percpu_irq(drvdata->irq, drvdata->handle);
1508 	free_percpu(drvdata->handle);
1509 }
1510 
1511 static int arm_trbe_device_probe(struct platform_device *pdev)
1512 {
1513 	struct trbe_drvdata *drvdata;
1514 	struct device *dev = &pdev->dev;
1515 	int ret;
1516 
1517 	/* Trace capture is not possible with kernel page table isolation */
1518 	if (arm64_kernel_unmapped_at_el0()) {
1519 		pr_err("TRBE wouldn't work if kernel gets unmapped at EL0\n");
1520 		return -EOPNOTSUPP;
1521 	}
1522 
1523 	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
1524 	if (!drvdata)
1525 		return -ENOMEM;
1526 
1527 	dev_set_drvdata(dev, drvdata);
1528 	drvdata->pdev = pdev;
1529 	ret = arm_trbe_probe_irq(pdev, drvdata);
1530 	if (ret)
1531 		return ret;
1532 
1533 	ret = arm_trbe_probe_coresight(drvdata);
1534 	if (ret)
1535 		goto probe_failed;
1536 
1537 	ret = arm_trbe_probe_cpuhp(drvdata);
1538 	if (ret)
1539 		goto cpuhp_failed;
1540 
1541 	return 0;
1542 cpuhp_failed:
1543 	arm_trbe_remove_coresight(drvdata);
1544 probe_failed:
1545 	arm_trbe_remove_irq(drvdata);
1546 	return ret;
1547 }
1548 
1549 static void arm_trbe_device_remove(struct platform_device *pdev)
1550 {
1551 	struct trbe_drvdata *drvdata = platform_get_drvdata(pdev);
1552 
1553 	arm_trbe_remove_cpuhp(drvdata);
1554 	arm_trbe_remove_coresight(drvdata);
1555 	arm_trbe_remove_irq(drvdata);
1556 }
1557 
1558 static const struct of_device_id arm_trbe_of_match[] = {
1559 	{ .compatible = "arm,trace-buffer-extension"},
1560 	{},
1561 };
1562 MODULE_DEVICE_TABLE(of, arm_trbe_of_match);
1563 
1564 #ifdef CONFIG_ACPI
1565 static const struct platform_device_id arm_trbe_acpi_match[] = {
1566 	{ ARMV8_TRBE_PDEV_NAME, 0 },
1567 	{ }
1568 };
1569 MODULE_DEVICE_TABLE(platform, arm_trbe_acpi_match);
1570 #endif
1571 
1572 static struct platform_driver arm_trbe_driver = {
1573 	.id_table = ACPI_PTR(arm_trbe_acpi_match),
1574 	.driver	= {
1575 		.name = DRVNAME,
1576 		.of_match_table = of_match_ptr(arm_trbe_of_match),
1577 		.suppress_bind_attrs = true,
1578 	},
1579 	.probe	= arm_trbe_device_probe,
1580 	.remove = arm_trbe_device_remove,
1581 };
1582 
1583 static int __init arm_trbe_init(void)
1584 {
1585 	int ret;
1586 
1587 	ret = platform_driver_register(&arm_trbe_driver);
1588 	if (!ret)
1589 		return 0;
1590 
1591 	pr_err("Error registering %s platform driver\n", DRVNAME);
1592 	return ret;
1593 }
1594 
1595 static void __exit arm_trbe_exit(void)
1596 {
1597 	platform_driver_unregister(&arm_trbe_driver);
1598 }
1599 module_init(arm_trbe_init);
1600 module_exit(arm_trbe_exit);
1601 
1602 MODULE_AUTHOR("Anshuman Khandual <anshuman.khandual@arm.com>");
1603 MODULE_DESCRIPTION("Arm Trace Buffer Extension (TRBE) driver");
1604 MODULE_LICENSE("GPL v2");
1605