xref: /linux/drivers/hwtracing/coresight/coresight-trbe.c (revision da06d6eb523bdd20d063395d6cf7f4c873d338e8)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * This driver enables Trace Buffer Extension (TRBE) as a per-cpu coresight
4  * sink device could then pair with an appropriate per-cpu coresight source
5  * device (ETE) thus generating required trace data. Trace can be enabled
6  * via the perf framework.
7  *
8  * The AUX buffer handling is inspired from Arm SPE PMU driver.
9  *
10  * Copyright (C) 2020 ARM Ltd.
11  *
12  * Author: Anshuman Khandual <anshuman.khandual@arm.com>
13  */
14 #define DRVNAME "arm_trbe"
15 
16 #define pr_fmt(fmt) DRVNAME ": " fmt
17 
18 #include <asm/barrier.h>
19 #include <asm/cpufeature.h>
20 #include <linux/kvm_host.h>
21 #include <linux/vmalloc.h>
22 
23 #include "coresight-self-hosted-trace.h"
24 #include "coresight-trbe.h"
25 
26 #define PERF_IDX2OFF(idx, buf) \
27 	((idx) % ((unsigned long)(buf)->nr_pages << PAGE_SHIFT))
28 
29 /*
30  * A padding packet that will help the user space tools
31  * in skipping relevant sections in the captured trace
32  * data which could not be decoded. TRBE doesn't support
33  * formatting the trace data, unlike the legacy CoreSight
34  * sinks and thus we use ETE trace packets to pad the
35  * sections of the buffer.
36  */
37 #define ETE_IGNORE_PACKET		0x70
38 
39 /*
40  * Minimum amount of meaningful trace will contain:
41  * A-Sync, Trace Info, Trace On, Address, Atom.
42  * This is about 44bytes of ETE trace. To be on
43  * the safer side, we assume 64bytes is the minimum
44  * space required for a meaningful session, before
45  * we hit a "WRAP" event.
46  */
47 #define TRBE_TRACE_MIN_BUF_SIZE		64
48 
49 enum trbe_fault_action {
50 	TRBE_FAULT_ACT_WRAP,
51 	TRBE_FAULT_ACT_SPURIOUS,
52 	TRBE_FAULT_ACT_FATAL,
53 };
54 
55 struct trbe_buf {
56 	/*
57 	 * Even though trbe_base represents vmap()
58 	 * mapped allocated buffer's start address,
59 	 * it's being as unsigned long for various
60 	 * arithmetic and comparision operations &
61 	 * also to be consistent with trbe_write &
62 	 * trbe_limit sibling pointers.
63 	 */
64 	unsigned long trbe_base;
65 	/* The base programmed into the TRBE */
66 	unsigned long trbe_hw_base;
67 	unsigned long trbe_limit;
68 	unsigned long trbe_write;
69 	int nr_pages;
70 	void **pages;
71 	bool snapshot;
72 	struct trbe_cpudata *cpudata;
73 };
74 
75 /*
76  * TRBE erratum list
77  *
78  * The errata are defined in arm64 generic cpu_errata framework.
79  * Since the errata work arounds could be applied individually
80  * to the affected CPUs inside the TRBE driver, we need to know if
81  * a given CPU is affected by the erratum. Unlike the other erratum
82  * work arounds, TRBE driver needs to check multiple times during
83  * a trace session. Thus we need a quicker access to per-CPU
84  * errata and not issue costly this_cpu_has_cap() everytime.
85  * We keep a set of the affected errata in trbe_cpudata, per TRBE.
86  *
87  * We rely on the corresponding cpucaps to be defined for a given
88  * TRBE erratum. We map the given cpucap into a TRBE internal number
89  * to make the tracking of the errata lean.
90  *
91  * This helps in :
92  *   - Not duplicating the detection logic
93  *   - Streamlined detection of erratum across the system
94  */
95 #define TRBE_WORKAROUND_OVERWRITE_FILL_MODE	0
96 #define TRBE_WORKAROUND_WRITE_OUT_OF_RANGE	1
97 #define TRBE_NEEDS_DRAIN_AFTER_DISABLE		2
98 #define TRBE_NEEDS_CTXT_SYNC_AFTER_ENABLE	3
99 #define TRBE_IS_BROKEN				4
100 
101 static int trbe_errata_cpucaps[] = {
102 	[TRBE_WORKAROUND_OVERWRITE_FILL_MODE] = ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE,
103 	[TRBE_WORKAROUND_WRITE_OUT_OF_RANGE] = ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE,
104 	[TRBE_NEEDS_DRAIN_AFTER_DISABLE] = ARM64_WORKAROUND_2064142,
105 	[TRBE_NEEDS_CTXT_SYNC_AFTER_ENABLE] = ARM64_WORKAROUND_2038923,
106 	[TRBE_IS_BROKEN] = ARM64_WORKAROUND_1902691,
107 	-1,		/* Sentinel, must be the last entry */
108 };
109 
110 /* The total number of listed errata in trbe_errata_cpucaps */
111 #define TRBE_ERRATA_MAX			(ARRAY_SIZE(trbe_errata_cpucaps) - 1)
112 
113 /*
114  * Safe limit for the number of bytes that may be overwritten
115  * when ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE is triggered.
116  */
117 #define TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES	256
118 
119 /*
120  * struct trbe_save_state: Register values representing TRBE state
121  * @trblimitr		- Trace Buffer Limit Address Register value
122  * @trbbaser		- Trace Buffer Base Register value
123  * @trbptr		- Trace Buffer Write Pointer Register value
124  * @trbsr		- Trace Buffer Status Register value
125  */
126 struct trbe_save_state {
127 	u64 trblimitr;
128 	u64 trbbaser;
129 	u64 trbptr;
130 	u64 trbsr;
131 };
132 
133 /*
134  * struct trbe_cpudata: TRBE instance specific data
135  * @trbe_flag		- TRBE dirty/access flag support
136  * @trbe_hw_align	- Actual TRBE alignment required for TRBPTR_EL1.
137  * @trbe_align		- Software alignment used for the TRBPTR_EL1.
138  * @cpu			- CPU this TRBE belongs to.
139  * @mode		- Mode of current operation. (perf/disabled)
140  * @drvdata		- TRBE specific drvdata
141  * @errata		- Bit map for the errata on this TRBE.
142  */
143 struct trbe_cpudata {
144 	bool trbe_flag;
145 	u64 trbe_hw_align;
146 	u64 trbe_align;
147 	int cpu;
148 	enum cs_mode mode;
149 	struct trbe_buf *buf;
150 	struct trbe_drvdata *drvdata;
151 	struct trbe_save_state save_state;
152 	DECLARE_BITMAP(errata, TRBE_ERRATA_MAX);
153 };
154 
155 struct trbe_drvdata {
156 	struct trbe_cpudata __percpu *cpudata;
157 	struct perf_output_handle * __percpu *handle;
158 	struct hlist_node hotplug_node;
159 	int irq;
160 	cpumask_t supported_cpus;
161 	enum cpuhp_state trbe_online;
162 	struct platform_device *pdev;
163 };
164 
165 static void trbe_check_errata(struct trbe_cpudata *cpudata)
166 {
167 	int i;
168 
169 	for (i = 0; i < TRBE_ERRATA_MAX; i++) {
170 		int cap = trbe_errata_cpucaps[i];
171 
172 		if (WARN_ON_ONCE(cap < 0))
173 			return;
174 		if (this_cpu_has_cap(cap))
175 			set_bit(i, cpudata->errata);
176 	}
177 }
178 
179 static bool trbe_has_erratum(struct trbe_cpudata *cpudata, int i)
180 {
181 	return (i < TRBE_ERRATA_MAX) && test_bit(i, cpudata->errata);
182 }
183 
184 static bool trbe_may_overwrite_in_fill_mode(struct trbe_cpudata *cpudata)
185 {
186 	return trbe_has_erratum(cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE);
187 }
188 
189 static bool trbe_may_write_out_of_range(struct trbe_cpudata *cpudata)
190 {
191 	return trbe_has_erratum(cpudata, TRBE_WORKAROUND_WRITE_OUT_OF_RANGE);
192 }
193 
194 static bool trbe_needs_drain_after_disable(struct trbe_cpudata *cpudata)
195 {
196 	/*
197 	 * Errata affected TRBE implementation will need TSB CSYNC and
198 	 * DSB in order to prevent subsequent writes into certain TRBE
199 	 * system registers from being ignored and not effected.
200 	 */
201 	return trbe_has_erratum(cpudata, TRBE_NEEDS_DRAIN_AFTER_DISABLE);
202 }
203 
204 static bool trbe_needs_ctxt_sync_after_enable(struct trbe_cpudata *cpudata)
205 {
206 	/*
207 	 * Errata affected TRBE implementation will need an additional
208 	 * context synchronization in order to prevent an inconsistent
209 	 * TRBE prohibited region view on the CPU which could possibly
210 	 * corrupt the TRBE buffer or the TRBE state.
211 	 */
212 	return trbe_has_erratum(cpudata, TRBE_NEEDS_CTXT_SYNC_AFTER_ENABLE);
213 }
214 
215 static bool trbe_is_broken(struct trbe_cpudata *cpudata)
216 {
217 	return trbe_has_erratum(cpudata, TRBE_IS_BROKEN);
218 }
219 
220 static int trbe_alloc_node(struct perf_event *event)
221 {
222 	if (event->cpu == -1)
223 		return NUMA_NO_NODE;
224 	return cpu_to_node(event->cpu);
225 }
226 
227 static void trbe_drain_buffer(void)
228 {
229 	tsb_csync();
230 	dsb(nsh);
231 }
232 
233 static void set_trbe_enabled(struct trbe_cpudata *cpudata, u64 trblimitr)
234 {
235 	/*
236 	 * Enable the TRBE without clearing LIMITPTR which
237 	 * might be required for fetching the buffer limits.
238 	 */
239 	trblimitr |= TRBLIMITR_EL1_E;
240 	write_sysreg_s(trblimitr, SYS_TRBLIMITR_EL1);
241 	kvm_enable_trbe();
242 
243 	/* Synchronize the TRBE enable event */
244 	isb();
245 
246 	if (trbe_needs_ctxt_sync_after_enable(cpudata))
247 		isb();
248 }
249 
250 static void set_trbe_disabled(struct trbe_cpudata *cpudata)
251 {
252 	u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
253 
254 	/*
255 	 * Disable the TRBE without clearing LIMITPTR which
256 	 * might be required for fetching the buffer limits.
257 	 */
258 	trblimitr &= ~TRBLIMITR_EL1_E;
259 	write_sysreg_s(trblimitr, SYS_TRBLIMITR_EL1);
260 	kvm_disable_trbe();
261 
262 	if (trbe_needs_drain_after_disable(cpudata))
263 		trbe_drain_buffer();
264 	isb();
265 }
266 
267 static void trbe_drain_and_disable_local(struct trbe_cpudata *cpudata)
268 {
269 	trbe_drain_buffer();
270 	set_trbe_disabled(cpudata);
271 }
272 
273 static void trbe_reset_local(struct trbe_cpudata *cpudata)
274 {
275 	write_sysreg_s(0, SYS_TRBLIMITR_EL1);
276 	isb();
277 	trbe_drain_buffer();
278 	write_sysreg_s(0, SYS_TRBPTR_EL1);
279 	write_sysreg_s(0, SYS_TRBBASER_EL1);
280 	write_sysreg_s(0, SYS_TRBSR_EL1);
281 }
282 
283 static void trbe_report_wrap_event(struct perf_output_handle *handle)
284 {
285 	/*
286 	 * Mark the buffer to indicate that there was a WRAP event by
287 	 * setting the COLLISION flag. This indicates to the user that
288 	 * the TRBE trace collection was stopped without stopping the
289 	 * ETE and thus there might be some amount of trace that was
290 	 * lost between the time the WRAP was detected and the IRQ
291 	 * was consumed by the CPU.
292 	 *
293 	 * Setting the TRUNCATED flag would move the event to STOPPED
294 	 * state unnecessarily, even when there is space left in the
295 	 * ring buffer. Using the COLLISION flag doesn't have this side
296 	 * effect. We only set TRUNCATED flag when there is no space
297 	 * left in the ring buffer.
298 	 */
299 	perf_aux_output_flag(handle, PERF_AUX_FLAG_COLLISION);
300 }
301 
302 static void trbe_stop_and_truncate_event(struct perf_output_handle *handle)
303 {
304 	struct trbe_buf *buf = etm_perf_sink_config(handle);
305 
306 	/*
307 	 * We cannot proceed with the buffer collection and we
308 	 * do not have any data for the current session. The
309 	 * etm_perf driver expects to close out the aux_buffer
310 	 * at event_stop(). So disable the TRBE here and leave
311 	 * the update_buffer() to return a 0 size.
312 	 */
313 	trbe_drain_and_disable_local(buf->cpudata);
314 	perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
315 	perf_aux_output_end(handle, 0);
316 	*this_cpu_ptr(buf->cpudata->drvdata->handle) = NULL;
317 }
318 
319 /*
320  * TRBE Buffer Management
321  *
322  * The TRBE buffer spans from the base pointer till the limit pointer. When enabled,
323  * it starts writing trace data from the write pointer onward till the limit pointer.
324  * When the write pointer reaches the address just before the limit pointer, it gets
325  * wrapped around again to the base pointer. This is called a TRBE wrap event, which
326  * generates a maintenance interrupt when operated in WRAP or FILL mode. This driver
327  * uses FILL mode, where the TRBE stops the trace collection at wrap event. The IRQ
328  * handler updates the AUX buffer and re-enables the TRBE with updated WRITE and
329  * LIMIT pointers.
330  *
331  *	Wrap around with an IRQ
332  *	------ < ------ < ------- < ----- < -----
333  *	|					|
334  *	------ > ------ > ------- > ----- > -----
335  *
336  *	+---------------+-----------------------+
337  *	|		|			|
338  *	+---------------+-----------------------+
339  *	Base Pointer	Write Pointer		Limit Pointer
340  *
341  * The base and limit pointers always needs to be PAGE_SIZE aligned. But the write
342  * pointer can be aligned to the implementation defined TRBE trace buffer alignment
343  * as captured in trbe_cpudata->trbe_align.
344  *
345  *
346  *		head		tail		wakeup
347  *	+---------------------------------------+----- ~ ~ ------
348  *	|$$$$$$$|################|$$$$$$$$$$$$$$|		|
349  *	+---------------------------------------+----- ~ ~ ------
350  *	Base Pointer	Write Pointer		Limit Pointer
351  *
352  * The perf_output_handle indices (head, tail, wakeup) are monotonically increasing
353  * values which tracks all the driver writes and user reads from the perf auxiliary
354  * buffer. Generally [head..tail] is the area where the driver can write into unless
355  * the wakeup is behind the tail. Enabled TRBE buffer span needs to be adjusted and
356  * configured depending on the perf_output_handle indices, so that the driver does
357  * not override into areas in the perf auxiliary buffer which is being or yet to be
358  * consumed from the user space. The enabled TRBE buffer area is a moving subset of
359  * the allocated perf auxiliary buffer.
360  */
361 
362 static void __trbe_pad_buf(struct trbe_buf *buf, u64 offset, int len)
363 {
364 	memset((void *)buf->trbe_base + offset, ETE_IGNORE_PACKET, len);
365 }
366 
367 static void trbe_pad_buf(struct perf_output_handle *handle, int len)
368 {
369 	struct trbe_buf *buf = etm_perf_sink_config(handle);
370 	u64 head = PERF_IDX2OFF(handle->head, buf);
371 
372 	__trbe_pad_buf(buf, head, len);
373 	if (!buf->snapshot)
374 		perf_aux_output_skip(handle, len);
375 }
376 
377 static unsigned long trbe_snapshot_offset(struct perf_output_handle *handle)
378 {
379 	struct trbe_buf *buf = etm_perf_sink_config(handle);
380 
381 	/*
382 	 * The ETE trace has alignment synchronization packets allowing
383 	 * the decoder to reset in case of an overflow or corruption.
384 	 * So we can use the entire buffer for the snapshot mode.
385 	 */
386 	return buf->nr_pages * PAGE_SIZE;
387 }
388 
389 static u64 trbe_min_trace_buf_size(struct perf_output_handle *handle)
390 {
391 	u64 size = TRBE_TRACE_MIN_BUF_SIZE;
392 	struct trbe_buf *buf = etm_perf_sink_config(handle);
393 	struct trbe_cpudata *cpudata = buf->cpudata;
394 
395 	/*
396 	 * When the TRBE is affected by an erratum that could make it
397 	 * write to the next "virtually addressed" page beyond the LIMIT.
398 	 * We need to make sure there is always a PAGE after the LIMIT,
399 	 * within the buffer. Thus we ensure there is at least an extra
400 	 * page than normal. With this we could then adjust the LIMIT
401 	 * pointer down by a PAGE later.
402 	 */
403 	if (trbe_may_write_out_of_range(cpudata))
404 		size += PAGE_SIZE;
405 	return size;
406 }
407 
408 /*
409  * TRBE Limit Calculation
410  *
411  * The following markers are used to illustrate various TRBE buffer situations.
412  *
413  * $$$$ - Data area, unconsumed captured trace data, not to be overridden
414  * #### - Free area, enabled, trace will be written
415  * %%%% - Free area, disabled, trace will not be written
416  * ==== - Free area, padded with ETE_IGNORE_PACKET, trace will be skipped
417  */
418 static unsigned long __trbe_normal_offset(struct perf_output_handle *handle)
419 {
420 	struct trbe_buf *buf = etm_perf_sink_config(handle);
421 	struct trbe_cpudata *cpudata = buf->cpudata;
422 	const u64 bufsize = buf->nr_pages * PAGE_SIZE;
423 	u64 limit = bufsize;
424 	u64 head, tail, wakeup;
425 
426 	head = PERF_IDX2OFF(handle->head, buf);
427 
428 	/*
429 	 *		head
430 	 *	------->|
431 	 *	|
432 	 *	head	TRBE align	tail
433 	 * +----|-------|---------------|-------+
434 	 * |$$$$|=======|###############|$$$$$$$|
435 	 * +----|-------|---------------|-------+
436 	 * trbe_base				trbe_base + nr_pages
437 	 *
438 	 * Perf aux buffer output head position can be misaligned depending on
439 	 * various factors including user space reads. In case misaligned, head
440 	 * needs to be aligned before TRBE can be configured. Pad the alignment
441 	 * gap with ETE_IGNORE_PACKET bytes that will be ignored by user tools
442 	 * and skip this section thus advancing the head.
443 	 */
444 	if (!IS_ALIGNED(head, cpudata->trbe_align)) {
445 		unsigned long delta = roundup(head, cpudata->trbe_align) - head;
446 
447 		delta = min(delta, handle->size);
448 		trbe_pad_buf(handle, delta);
449 		head = PERF_IDX2OFF(handle->head, buf);
450 	}
451 
452 	/*
453 	 *	head = tail (size = 0)
454 	 * +----|-------------------------------+
455 	 * |$$$$|$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$	|
456 	 * +----|-------------------------------+
457 	 * trbe_base				trbe_base + nr_pages
458 	 *
459 	 * Perf aux buffer does not have any space for the driver to write into.
460 	 */
461 	if (!handle->size)
462 		return 0;
463 
464 	/* Compute the tail and wakeup indices now that we've aligned head */
465 	tail = PERF_IDX2OFF(handle->head + handle->size, buf);
466 	wakeup = PERF_IDX2OFF(handle->wakeup, buf);
467 
468 	/*
469 	 * Lets calculate the buffer area which TRBE could write into. There
470 	 * are three possible scenarios here. Limit needs to be aligned with
471 	 * PAGE_SIZE per the TRBE requirement. Always avoid clobbering the
472 	 * unconsumed data.
473 	 *
474 	 * 1) head < tail
475 	 *
476 	 *	head			tail
477 	 * +----|-----------------------|-------+
478 	 * |$$$$|#######################|$$$$$$$|
479 	 * +----|-----------------------|-------+
480 	 * trbe_base			limit	trbe_base + nr_pages
481 	 *
482 	 * TRBE could write into [head..tail] area. Unless the tail is right at
483 	 * the end of the buffer, neither an wrap around nor an IRQ is expected
484 	 * while being enabled.
485 	 *
486 	 * 2) head == tail
487 	 *
488 	 *	head = tail (size > 0)
489 	 * +----|-------------------------------+
490 	 * |%%%%|###############################|
491 	 * +----|-------------------------------+
492 	 * trbe_base				limit = trbe_base + nr_pages
493 	 *
494 	 * TRBE should just write into [head..base + nr_pages] area even though
495 	 * the entire buffer is empty. Reason being, when the trace reaches the
496 	 * end of the buffer, it will just wrap around with an IRQ giving an
497 	 * opportunity to reconfigure the buffer.
498 	 *
499 	 * 3) tail < head
500 	 *
501 	 *	tail			head
502 	 * +----|-----------------------|-------+
503 	 * |%%%%|$$$$$$$$$$$$$$$$$$$$$$$|#######|
504 	 * +----|-----------------------|-------+
505 	 * trbe_base				limit = trbe_base + nr_pages
506 	 *
507 	 * TRBE should just write into [head..base + nr_pages] area even though
508 	 * the [trbe_base..tail] is also empty. Reason being, when the trace
509 	 * reaches the end of the buffer, it will just wrap around with an IRQ
510 	 * giving an opportunity to reconfigure the buffer.
511 	 */
512 	if (head < tail)
513 		limit = round_down(tail, PAGE_SIZE);
514 
515 	/*
516 	 * Wakeup may be arbitrarily far into the future. If it's not in the
517 	 * current generation, either we'll wrap before hitting it, or it's
518 	 * in the past and has been handled already.
519 	 *
520 	 * If there's a wakeup before we wrap, arrange to be woken up by the
521 	 * page boundary following it. Keep the tail boundary if that's lower.
522 	 *
523 	 *	head		wakeup	tail
524 	 * +----|---------------|-------|-------+
525 	 * |$$$$|###############|%%%%%%%|$$$$$$$|
526 	 * +----|---------------|-------|-------+
527 	 * trbe_base		limit		trbe_base + nr_pages
528 	 */
529 	if (handle->wakeup < (handle->head + handle->size) && head <= wakeup)
530 		limit = min(limit, round_up(wakeup, PAGE_SIZE));
531 
532 	/*
533 	 * There are two situation when this can happen i.e limit is before
534 	 * the head and hence TRBE cannot be configured.
535 	 *
536 	 * 1) head < tail (aligned down with PAGE_SIZE) and also they are both
537 	 * within the same PAGE size range.
538 	 *
539 	 *			PAGE_SIZE
540 	 *		|----------------------|
541 	 *
542 	 *		limit	head	tail
543 	 * +------------|------|--------|-------+
544 	 * |$$$$$$$$$$$$$$$$$$$|========|$$$$$$$|
545 	 * +------------|------|--------|-------+
546 	 * trbe_base				trbe_base + nr_pages
547 	 *
548 	 * 2) head < wakeup (aligned up with PAGE_SIZE) < tail and also both
549 	 * head and wakeup are within same PAGE size range.
550 	 *
551 	 *		PAGE_SIZE
552 	 *	|----------------------|
553 	 *
554 	 *	limit	head	wakeup  tail
555 	 * +----|------|-------|--------|-------+
556 	 * |$$$$$$$$$$$|=======|========|$$$$$$$|
557 	 * +----|------|-------|--------|-------+
558 	 * trbe_base				trbe_base + nr_pages
559 	 */
560 	if (limit > head)
561 		return limit;
562 
563 	trbe_pad_buf(handle, handle->size);
564 	return 0;
565 }
566 
567 static unsigned long trbe_normal_offset(struct perf_output_handle *handle)
568 {
569 	struct trbe_buf *buf = etm_perf_sink_config(handle);
570 	u64 limit = __trbe_normal_offset(handle);
571 	u64 head = PERF_IDX2OFF(handle->head, buf);
572 
573 	/*
574 	 * If the head is too close to the limit and we don't
575 	 * have space for a meaningful run, we rather pad it
576 	 * and start fresh.
577 	 *
578 	 * We might have to do this more than once to make sure
579 	 * we have enough required space.
580 	 */
581 	while (limit && ((limit - head) < trbe_min_trace_buf_size(handle))) {
582 		trbe_pad_buf(handle, limit - head);
583 		limit = __trbe_normal_offset(handle);
584 		head = PERF_IDX2OFF(handle->head, buf);
585 	}
586 	return limit;
587 }
588 
589 static unsigned long compute_trbe_buffer_limit(struct perf_output_handle *handle)
590 {
591 	struct trbe_buf *buf = etm_perf_sink_config(handle);
592 	unsigned long offset;
593 
594 	if (buf->snapshot)
595 		offset = trbe_snapshot_offset(handle);
596 	else
597 		offset = trbe_normal_offset(handle);
598 	return buf->trbe_base + offset;
599 }
600 
601 static void clr_trbe_status(void)
602 {
603 	u64 trbsr = read_sysreg_s(SYS_TRBSR_EL1);
604 
605 	WARN_ON(is_trbe_enabled());
606 	trbsr &= ~TRBSR_EL1_IRQ;
607 	trbsr &= ~TRBSR_EL1_TRG;
608 	trbsr &= ~TRBSR_EL1_WRAP;
609 	trbsr &= ~TRBSR_EL1_EC_MASK;
610 	trbsr &= ~TRBSR_EL1_BSC_MASK;
611 	trbsr &= ~TRBSR_EL1_S;
612 	write_sysreg_s(trbsr, SYS_TRBSR_EL1);
613 }
614 
615 static void set_trbe_limit_pointer_enabled(struct trbe_buf *buf)
616 {
617 	u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
618 	unsigned long addr = buf->trbe_limit;
619 
620 	WARN_ON(!IS_ALIGNED(addr, (1UL << TRBLIMITR_EL1_LIMIT_SHIFT)));
621 	WARN_ON(!IS_ALIGNED(addr, PAGE_SIZE));
622 
623 	trblimitr &= ~TRBLIMITR_EL1_nVM;
624 	trblimitr &= ~TRBLIMITR_EL1_FM_MASK;
625 	trblimitr &= ~TRBLIMITR_EL1_TM_MASK;
626 	trblimitr &= ~TRBLIMITR_EL1_LIMIT_MASK;
627 
628 	/*
629 	 * Fill trace buffer mode is used here while configuring the
630 	 * TRBE for trace capture. In this particular mode, the trace
631 	 * collection is stopped and a maintenance interrupt is raised
632 	 * when the current write pointer wraps. This pause in trace
633 	 * collection gives the software an opportunity to capture the
634 	 * trace data in the interrupt handler, before reconfiguring
635 	 * the TRBE.
636 	 */
637 	trblimitr |= (TRBLIMITR_EL1_FM_FILL << TRBLIMITR_EL1_FM_SHIFT) &
638 		     TRBLIMITR_EL1_FM_MASK;
639 
640 	/*
641 	 * Trigger mode is not used here while configuring the TRBE for
642 	 * the trace capture. Hence just keep this in the ignore mode.
643 	 */
644 	trblimitr |= (TRBLIMITR_EL1_TM_IGNR << TRBLIMITR_EL1_TM_SHIFT) &
645 		     TRBLIMITR_EL1_TM_MASK;
646 	trblimitr |= (addr & PAGE_MASK);
647 	set_trbe_enabled(buf->cpudata, trblimitr);
648 }
649 
650 static void trbe_enable_hw(struct trbe_buf *buf)
651 {
652 	WARN_ON(buf->trbe_hw_base < buf->trbe_base);
653 	WARN_ON(buf->trbe_write < buf->trbe_hw_base);
654 	WARN_ON(buf->trbe_write >= buf->trbe_limit);
655 	set_trbe_disabled(buf->cpudata);
656 	clr_trbe_status();
657 	set_trbe_base_pointer(buf->trbe_hw_base);
658 	set_trbe_write_pointer(buf->trbe_write);
659 
660 	/*
661 	 * Synchronize all the register updates
662 	 * till now before enabling the TRBE.
663 	 */
664 	isb();
665 	set_trbe_limit_pointer_enabled(buf);
666 }
667 
668 static enum trbe_fault_action trbe_get_fault_act(struct perf_output_handle *handle,
669 						 u64 trbsr)
670 {
671 	int ec = get_trbe_ec(trbsr);
672 	int bsc = get_trbe_bsc(trbsr);
673 	struct trbe_buf *buf = etm_perf_sink_config(handle);
674 	struct trbe_cpudata *cpudata = buf->cpudata;
675 
676 	WARN_ON(is_trbe_running(trbsr));
677 	if (is_trbe_trg(trbsr) || is_trbe_abort(trbsr))
678 		return TRBE_FAULT_ACT_FATAL;
679 
680 	if ((ec == TRBE_EC_STAGE1_ABORT) || (ec == TRBE_EC_STAGE2_ABORT))
681 		return TRBE_FAULT_ACT_FATAL;
682 
683 	/*
684 	 * If the trbe is affected by TRBE_WORKAROUND_OVERWRITE_FILL_MODE,
685 	 * it might write data after a WRAP event in the fill mode.
686 	 * Thus the check TRBPTR == TRBBASER will not be honored.
687 	 */
688 	if ((is_trbe_wrap(trbsr) && (ec == TRBE_EC_OTHERS) && (bsc == TRBE_BSC_FILLED)) &&
689 	    (trbe_may_overwrite_in_fill_mode(cpudata) ||
690 	     get_trbe_write_pointer() == get_trbe_base_pointer()))
691 		return TRBE_FAULT_ACT_WRAP;
692 
693 	return TRBE_FAULT_ACT_SPURIOUS;
694 }
695 
696 static unsigned long trbe_get_trace_size(struct perf_output_handle *handle,
697 					 struct trbe_buf *buf, bool wrap)
698 {
699 	u64 write;
700 	u64 start_off, end_off;
701 	u64 size;
702 	u64 overwrite_skip = TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES;
703 
704 	/*
705 	 * If the TRBE has wrapped around the write pointer has
706 	 * wrapped and should be treated as limit.
707 	 *
708 	 * When the TRBE is affected by TRBE_WORKAROUND_WRITE_OUT_OF_RANGE,
709 	 * it may write upto 64bytes beyond the "LIMIT". The driver already
710 	 * keeps a valid page next to the LIMIT and we could potentially
711 	 * consume the trace data that may have been collected there. But we
712 	 * cannot be really sure it is available, and the TRBPTR may not
713 	 * indicate the same. Also, affected cores are also affected by another
714 	 * erratum which forces the PAGE_SIZE alignment on the TRBPTR, and thus
715 	 * could potentially pad an entire PAGE_SIZE - 64bytes, to get those
716 	 * 64bytes. Thus we ignore the potential triggering of the erratum
717 	 * on WRAP and limit the data to LIMIT.
718 	 */
719 	if (wrap)
720 		write = get_trbe_limit_pointer();
721 	else
722 		write = get_trbe_write_pointer();
723 
724 	/*
725 	 * TRBE may use a different base address than the base
726 	 * of the ring buffer. Thus use the beginning of the ring
727 	 * buffer to compute the offsets.
728 	 */
729 	end_off = write - buf->trbe_base;
730 	start_off = PERF_IDX2OFF(handle->head, buf);
731 
732 	if (WARN_ON_ONCE(end_off < start_off))
733 		return 0;
734 
735 	size = end_off - start_off;
736 	/*
737 	 * If the TRBE is affected by the following erratum, we must fill
738 	 * the space we skipped with IGNORE packets. And we are always
739 	 * guaranteed to have at least a PAGE_SIZE space in the buffer.
740 	 */
741 	if (trbe_has_erratum(buf->cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE) &&
742 	    !WARN_ON(size < overwrite_skip))
743 		__trbe_pad_buf(buf, start_off, overwrite_skip);
744 
745 	return size;
746 }
747 
748 static void *arm_trbe_alloc_buffer(struct coresight_device *csdev,
749 				   struct perf_event *event, void **pages,
750 				   int nr_pages, bool snapshot)
751 {
752 	struct trbe_buf *buf;
753 	struct page **pglist;
754 	int i;
755 
756 	/*
757 	 * TRBE LIMIT and TRBE WRITE pointers must be page aligned. But with
758 	 * just a single page, there would not be any room left while writing
759 	 * into a partially filled TRBE buffer after the page size alignment.
760 	 * Hence restrict the minimum buffer size as two pages.
761 	 */
762 	if (nr_pages < 2)
763 		return NULL;
764 
765 	buf = kzalloc_node(sizeof(*buf), GFP_KERNEL, trbe_alloc_node(event));
766 	if (!buf)
767 		return NULL;
768 
769 	pglist = kzalloc_objs(*pglist, nr_pages);
770 	if (!pglist) {
771 		kfree(buf);
772 		return NULL;
773 	}
774 
775 	for (i = 0; i < nr_pages; i++)
776 		pglist[i] = virt_to_page(pages[i]);
777 
778 	buf->trbe_base = (unsigned long)vmap(pglist, nr_pages, VM_MAP, PAGE_KERNEL);
779 	if (!buf->trbe_base) {
780 		kfree(pglist);
781 		kfree(buf);
782 		return NULL;
783 	}
784 	buf->trbe_limit = buf->trbe_base + nr_pages * PAGE_SIZE;
785 	buf->trbe_write = buf->trbe_base;
786 	buf->snapshot = snapshot;
787 	buf->nr_pages = nr_pages;
788 	buf->pages = pages;
789 	kfree(pglist);
790 	return buf;
791 }
792 
793 static void arm_trbe_free_buffer(void *config)
794 {
795 	struct trbe_buf *buf = config;
796 
797 	vunmap((void *)buf->trbe_base);
798 	kfree(buf);
799 }
800 
801 static unsigned long arm_trbe_update_buffer(struct coresight_device *csdev,
802 					    struct perf_output_handle *handle,
803 					    void *config)
804 {
805 	struct trbe_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
806 	struct trbe_cpudata *cpudata = dev_get_drvdata(&csdev->dev);
807 	struct trbe_buf *buf = config;
808 	enum trbe_fault_action act;
809 	unsigned long size, status;
810 	unsigned long flags;
811 	bool wrap = false;
812 
813 	WARN_ON(buf->cpudata != cpudata);
814 	WARN_ON(cpudata->cpu != smp_processor_id());
815 	WARN_ON(cpudata->drvdata != drvdata);
816 	if (cpudata->mode != CS_MODE_PERF)
817 		return 0;
818 
819 	/*
820 	 * We are about to disable the TRBE. And this could in turn
821 	 * fill up the buffer triggering, an IRQ. This could be consumed
822 	 * by the PE asynchronously, causing a race here against
823 	 * the IRQ handler in closing out the handle. So, let us
824 	 * make sure the IRQ can't trigger while we are collecting
825 	 * the buffer. We also make sure that a WRAP event is handled
826 	 * accordingly.
827 	 */
828 	local_irq_save(flags);
829 
830 	/*
831 	 * If the TRBE was disabled due to lack of space in the AUX buffer or a
832 	 * spurious fault, the driver leaves it disabled, truncating the buffer.
833 	 * Since the etm_perf driver expects to close out the AUX buffer, the
834 	 * driver skips it. Thus, just pass in 0 size here to indicate that the
835 	 * buffer was truncated.
836 	 */
837 	if (!is_trbe_enabled()) {
838 		size = 0;
839 		goto done;
840 	}
841 	/*
842 	 * perf handle structure needs to be shared with the TRBE IRQ handler for
843 	 * capturing trace data and restarting the handle. There is a probability
844 	 * of an undefined reference based crash when etm event is being stopped
845 	 * while a TRBE IRQ also getting processed. This happens due the release
846 	 * of perf handle via perf_aux_output_end() in etm_event_stop(). Stopping
847 	 * the TRBE here will ensure that no IRQ could be generated when the perf
848 	 * handle gets freed in etm_event_stop().
849 	 */
850 	trbe_drain_and_disable_local(cpudata);
851 
852 	/* Check if there is a pending interrupt and handle it here */
853 	status = read_sysreg_s(SYS_TRBSR_EL1);
854 	if (is_trbe_irq(status)) {
855 
856 		/*
857 		 * Now that we are handling the IRQ here, clear the IRQ
858 		 * from the status, to let the irq handler know that it
859 		 * is taken care of.
860 		 */
861 		clr_trbe_irq();
862 		isb();
863 
864 		act = trbe_get_fault_act(handle, status);
865 		/*
866 		 * If this was not due to a WRAP event, we have some
867 		 * errors and as such buffer is empty.
868 		 */
869 		if (act != TRBE_FAULT_ACT_WRAP) {
870 			size = 0;
871 			goto done;
872 		}
873 
874 		trbe_report_wrap_event(handle);
875 		wrap = true;
876 	}
877 
878 	size = trbe_get_trace_size(handle, buf, wrap);
879 
880 done:
881 	local_irq_restore(flags);
882 
883 	if (buf->snapshot)
884 		handle->head += size;
885 	return size;
886 }
887 
888 
889 static int trbe_apply_work_around_before_enable(struct trbe_buf *buf)
890 {
891 	/*
892 	 * TRBE_WORKAROUND_OVERWRITE_FILL_MODE causes the TRBE to overwrite a few cache
893 	 * line size from the "TRBBASER_EL1" in the event of a "FILL".
894 	 * Thus, we could loose some amount of the trace at the base.
895 	 *
896 	 * Before Fix:
897 	 *
898 	 *  normal-BASE     head (normal-TRBPTR)         tail (normal-LIMIT)
899 	 *  |                   \/                       /
900 	 *   -------------------------------------------------------------
901 	 *  |   Pg0      |   Pg1       |           |          |  PgN     |
902 	 *   -------------------------------------------------------------
903 	 *
904 	 * In the normal course of action, we would set the TRBBASER to the
905 	 * beginning of the ring-buffer (normal-BASE). But with the erratum,
906 	 * the TRBE could overwrite the contents at the "normal-BASE", after
907 	 * hitting the "normal-LIMIT", since it doesn't stop as expected. And
908 	 * this is wrong. This could result in overwriting trace collected in
909 	 * one of the previous runs, being consumed by the user. So we must
910 	 * always make sure that the TRBBASER is within the region
911 	 * [head, head+size]. Note that TRBBASER must be PAGE aligned,
912 	 *
913 	 *  After moving the BASE:
914 	 *
915 	 *  normal-BASE     head (normal-TRBPTR)         tail (normal-LIMIT)
916 	 *  |                   \/                       /
917 	 *   -------------------------------------------------------------
918 	 *  |         |          |xyzdef.     |..   tuvw|                |
919 	 *   -------------------------------------------------------------
920 	 *                      /
921 	 *              New-BASER
922 	 *
923 	 * Also, we would set the TRBPTR to head (after adjusting for
924 	 * alignment) at normal-PTR. This would mean that the last few bytes
925 	 * of the trace (say, "xyz") might overwrite the first few bytes of
926 	 * trace written ("abc"). More importantly they will appear in what
927 	 * userspace sees as the beginning of the trace, which is wrong. We may
928 	 * not always have space to move the latest trace "xyz" to the correct
929 	 * order as it must appear beyond the LIMIT. (i.e, [head..head+size]).
930 	 * Thus it is easier to ignore those bytes than to complicate the
931 	 * driver to move it, assuming that the erratum was triggered and
932 	 * doing additional checks to see if there is indeed allowed space at
933 	 * TRBLIMITR.LIMIT.
934 	 *
935 	 *  Thus the full workaround will move the BASE and the PTR and would
936 	 *  look like (after padding at the skipped bytes at the end of
937 	 *  session) :
938 	 *
939 	 *  normal-BASE     head (normal-TRBPTR)         tail (normal-LIMIT)
940 	 *  |                   \/                       /
941 	 *   -------------------------------------------------------------
942 	 *  |         |          |///abc..     |..  rst|                |
943 	 *   -------------------------------------------------------------
944 	 *                      /    |
945 	 *              New-BASER    New-TRBPTR
946 	 *
947 	 * To summarize, with the work around:
948 	 *
949 	 *  - We always align the offset for the next session to PAGE_SIZE
950 	 *    (This is to ensure we can program the TRBBASER to this offset
951 	 *    within the region [head...head+size]).
952 	 *
953 	 *  - At TRBE enable:
954 	 *     - Set the TRBBASER to the page aligned offset of the current
955 	 *       proposed write offset. (which is guaranteed to be aligned
956 	 *       as above)
957 	 *     - Move the TRBPTR to skip first 256bytes (that might be
958 	 *       overwritten with the erratum). This ensures that the trace
959 	 *       generated in the session is not re-written.
960 	 *
961 	 *  - At trace collection:
962 	 *     - Pad the 256bytes skipped above again with IGNORE packets.
963 	 */
964 	if (trbe_has_erratum(buf->cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE)) {
965 		if (WARN_ON(!IS_ALIGNED(buf->trbe_write, PAGE_SIZE)))
966 			return -EINVAL;
967 		buf->trbe_hw_base = buf->trbe_write;
968 		buf->trbe_write += TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES;
969 	}
970 
971 	/*
972 	 * TRBE_WORKAROUND_WRITE_OUT_OF_RANGE could cause the TRBE to write to
973 	 * the next page after the TRBLIMITR.LIMIT. For perf, the "next page"
974 	 * may be:
975 	 *     - The page beyond the ring buffer. This could mean, TRBE could
976 	 *       corrupt another entity (kernel / user)
977 	 *     - A portion of the "ring buffer" consumed by the userspace.
978 	 *       i.e, a page outisde [head, head + size].
979 	 *
980 	 * We work around this by:
981 	 *     - Making sure that we have at least an extra space of PAGE left
982 	 *       in the ring buffer [head, head + size], than we normally do
983 	 *       without the erratum. See trbe_min_trace_buf_size().
984 	 *
985 	 *     - Adjust the TRBLIMITR.LIMIT to leave the extra PAGE outside
986 	 *       the TRBE's range (i.e [TRBBASER, TRBLIMITR.LIMI] ).
987 	 */
988 	if (trbe_has_erratum(buf->cpudata, TRBE_WORKAROUND_WRITE_OUT_OF_RANGE)) {
989 		s64 space = buf->trbe_limit - buf->trbe_write;
990 		/*
991 		 * We must have more than a PAGE_SIZE worth space in the proposed
992 		 * range for the TRBE.
993 		 */
994 		if (WARN_ON(space <= PAGE_SIZE ||
995 			    !IS_ALIGNED(buf->trbe_limit, PAGE_SIZE)))
996 			return -EINVAL;
997 		buf->trbe_limit -= PAGE_SIZE;
998 	}
999 
1000 	return 0;
1001 }
1002 
1003 static int __arm_trbe_enable(struct trbe_buf *buf,
1004 			     struct perf_output_handle *handle)
1005 {
1006 	int ret = 0;
1007 
1008 	perf_aux_output_flag(handle, PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
1009 	buf->trbe_limit = compute_trbe_buffer_limit(handle);
1010 	buf->trbe_write = buf->trbe_base + PERF_IDX2OFF(handle->head, buf);
1011 	if (buf->trbe_limit == buf->trbe_base) {
1012 		ret = -ENOSPC;
1013 		goto err;
1014 	}
1015 	/* Set the base of the TRBE to the buffer base */
1016 	buf->trbe_hw_base = buf->trbe_base;
1017 
1018 	ret = trbe_apply_work_around_before_enable(buf);
1019 	if (ret)
1020 		goto err;
1021 
1022 	*this_cpu_ptr(buf->cpudata->drvdata->handle) = handle;
1023 	trbe_enable_hw(buf);
1024 	return 0;
1025 err:
1026 	trbe_stop_and_truncate_event(handle);
1027 	return ret;
1028 }
1029 
1030 static int arm_trbe_enable(struct coresight_device *csdev, enum cs_mode mode,
1031 			   struct coresight_path *path)
1032 {
1033 	struct trbe_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
1034 	struct trbe_cpudata *cpudata = dev_get_drvdata(&csdev->dev);
1035 	struct perf_output_handle *handle = path->handle;
1036 	struct trbe_buf *buf = etm_perf_sink_config(handle);
1037 
1038 	WARN_ON(cpudata->cpu != smp_processor_id());
1039 	WARN_ON(cpudata->drvdata != drvdata);
1040 	if (mode != CS_MODE_PERF)
1041 		return -EINVAL;
1042 
1043 	cpudata->buf = buf;
1044 	cpudata->mode = mode;
1045 	buf->cpudata = cpudata;
1046 
1047 	return __arm_trbe_enable(buf, handle);
1048 }
1049 
1050 static int arm_trbe_disable(struct coresight_device *csdev)
1051 {
1052 	struct trbe_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
1053 	struct trbe_cpudata *cpudata = dev_get_drvdata(&csdev->dev);
1054 	struct trbe_buf *buf = cpudata->buf;
1055 
1056 	WARN_ON(buf->cpudata != cpudata);
1057 	WARN_ON(cpudata->cpu != smp_processor_id());
1058 	WARN_ON(cpudata->drvdata != drvdata);
1059 	if (cpudata->mode != CS_MODE_PERF)
1060 		return -EINVAL;
1061 
1062 	trbe_drain_and_disable_local(cpudata);
1063 	buf->cpudata = NULL;
1064 	cpudata->buf = NULL;
1065 	cpudata->mode = CS_MODE_DISABLED;
1066 	return 0;
1067 }
1068 
1069 static void trbe_handle_spurious(struct perf_output_handle *handle)
1070 {
1071 	struct trbe_buf *buf = etm_perf_sink_config(handle);
1072 	u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
1073 
1074 	/*
1075 	 * If the IRQ was spurious, simply re-enable the TRBE
1076 	 * back without modifying the buffer parameters to
1077 	 * retain the trace collected so far.
1078 	 */
1079 	set_trbe_enabled(buf->cpudata, trblimitr);
1080 }
1081 
1082 static int trbe_handle_overflow(struct perf_output_handle *handle)
1083 {
1084 	struct perf_event *event = handle->event;
1085 	struct trbe_buf *buf = etm_perf_sink_config(handle);
1086 	unsigned long size;
1087 	struct etm_event_data *event_data;
1088 
1089 	size = trbe_get_trace_size(handle, buf, true);
1090 	if (buf->snapshot)
1091 		handle->head += size;
1092 
1093 	trbe_report_wrap_event(handle);
1094 	perf_aux_output_end(handle, size);
1095 	event_data = perf_aux_output_begin(handle, event);
1096 	if (!event_data) {
1097 		/*
1098 		 * We are unable to restart the trace collection,
1099 		 * thus leave the TRBE disabled. The etm-perf driver
1100 		 * is able to detect this with a disconnected handle
1101 		 * (handle->event = NULL).
1102 		 */
1103 		trbe_drain_and_disable_local(buf->cpudata);
1104 		*this_cpu_ptr(buf->cpudata->drvdata->handle) = NULL;
1105 		return -EINVAL;
1106 	}
1107 
1108 	return __arm_trbe_enable(buf, handle);
1109 }
1110 
1111 static bool is_perf_trbe(struct perf_output_handle *handle)
1112 {
1113 	struct trbe_buf *buf = etm_perf_sink_config(handle);
1114 	struct trbe_cpudata *cpudata = buf->cpudata;
1115 	struct trbe_drvdata *drvdata = cpudata->drvdata;
1116 	int cpu = smp_processor_id();
1117 
1118 	WARN_ON(buf->trbe_hw_base != get_trbe_base_pointer());
1119 	WARN_ON(buf->trbe_limit != get_trbe_limit_pointer());
1120 
1121 	if (cpudata->mode != CS_MODE_PERF)
1122 		return false;
1123 
1124 	if (cpudata->cpu != cpu)
1125 		return false;
1126 
1127 	if (!cpumask_test_cpu(cpu, &drvdata->supported_cpus))
1128 		return false;
1129 
1130 	return true;
1131 }
1132 
1133 static u64 cpu_prohibit_trace(void)
1134 {
1135 	u64 trfcr = read_trfcr();
1136 
1137 	/* Prohibit tracing at EL0 & the kernel EL */
1138 	write_trfcr(trfcr & ~(TRFCR_EL1_ExTRE | TRFCR_EL1_E0TRE));
1139 	/* Return the original value of the TRFCR */
1140 	return trfcr;
1141 }
1142 
1143 static irqreturn_t arm_trbe_irq_handler(int irq, void *dev)
1144 {
1145 	struct perf_output_handle **handle_ptr = dev;
1146 	struct perf_output_handle *handle = *handle_ptr;
1147 	struct trbe_buf *buf = etm_perf_sink_config(handle);
1148 	enum trbe_fault_action act;
1149 	u64 status;
1150 	bool truncated = false;
1151 	u64 trfcr;
1152 
1153 	/* Reads to TRBSR_EL1 is fine when TRBE is active */
1154 	status = read_sysreg_s(SYS_TRBSR_EL1);
1155 	/*
1156 	 * If the pending IRQ was handled by update_buffer callback
1157 	 * we have nothing to do here.
1158 	 */
1159 	if (!is_trbe_irq(status))
1160 		return IRQ_NONE;
1161 
1162 	/* Prohibit the CPU from tracing before we disable the TRBE */
1163 	trfcr = cpu_prohibit_trace();
1164 	/*
1165 	 * Ensure the trace is visible to the CPUs and
1166 	 * any external aborts have been resolved.
1167 	 */
1168 	trbe_drain_and_disable_local(buf->cpudata);
1169 	clr_trbe_irq();
1170 	isb();
1171 
1172 	if (WARN_ON_ONCE(!handle) || !perf_get_aux(handle))
1173 		return IRQ_NONE;
1174 
1175 	if (!is_perf_trbe(handle))
1176 		return IRQ_NONE;
1177 
1178 	act = trbe_get_fault_act(handle, status);
1179 	switch (act) {
1180 	case TRBE_FAULT_ACT_WRAP:
1181 		truncated = !!trbe_handle_overflow(handle);
1182 		break;
1183 	case TRBE_FAULT_ACT_SPURIOUS:
1184 		trbe_handle_spurious(handle);
1185 		break;
1186 	case TRBE_FAULT_ACT_FATAL:
1187 		trbe_stop_and_truncate_event(handle);
1188 		truncated = true;
1189 		break;
1190 	}
1191 
1192 	/*
1193 	 * If the buffer was truncated, ensure perf callbacks
1194 	 * have completed, which will disable the event.
1195 	 *
1196 	 * Otherwise, restore the trace filter controls to
1197 	 * allow the tracing.
1198 	 */
1199 	if (truncated)
1200 		irq_work_run();
1201 	else
1202 		write_trfcr(trfcr);
1203 
1204 	return IRQ_HANDLED;
1205 }
1206 
1207 static int arm_trbe_save(struct coresight_device *csdev)
1208 {
1209 	struct trbe_cpudata *cpudata = dev_get_drvdata(&csdev->dev);
1210 	struct trbe_save_state *state = &cpudata->save_state;
1211 
1212 	state->trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
1213 
1214 	/* Disable the unit, ensure the writes to memory are complete */
1215 	if (state->trblimitr & TRBLIMITR_EL1_E)
1216 		trbe_drain_and_disable_local(cpudata);
1217 
1218 	state->trbbaser = read_sysreg_s(SYS_TRBBASER_EL1);
1219 	state->trbptr = read_sysreg_s(SYS_TRBPTR_EL1);
1220 	state->trbsr = read_sysreg_s(SYS_TRBSR_EL1);
1221 	return 0;
1222 }
1223 
1224 static void arm_trbe_restore(struct coresight_device *csdev)
1225 {
1226 	struct trbe_cpudata *cpudata = dev_get_drvdata(&csdev->dev);
1227 	struct trbe_save_state *state = &cpudata->save_state;
1228 
1229 	write_sysreg_s(state->trbbaser, SYS_TRBBASER_EL1);
1230 	write_sysreg_s(state->trbptr, SYS_TRBPTR_EL1);
1231 	write_sysreg_s(state->trbsr, SYS_TRBSR_EL1);
1232 
1233 	if (!(state->trblimitr & TRBLIMITR_EL1_E)) {
1234 		write_sysreg_s(state->trblimitr, SYS_TRBLIMITR_EL1);
1235 	} else {
1236 		/*
1237 		 * The section K5.5 Context switching, Arm ARM (ARM DDI 0487
1238 		 * L.a), S_PKLXF requires a Context synchronization event to
1239 		 * guarantee the Trace Buffer Unit will observe the new values
1240 		 * of the system registers.
1241 		 */
1242 		isb();
1243 		set_trbe_enabled(cpudata, state->trblimitr);
1244 	}
1245 }
1246 
1247 static const struct coresight_ops_sink arm_trbe_sink_ops = {
1248 	.enable		= arm_trbe_enable,
1249 	.disable	= arm_trbe_disable,
1250 	.alloc_buffer	= arm_trbe_alloc_buffer,
1251 	.free_buffer	= arm_trbe_free_buffer,
1252 	.update_buffer	= arm_trbe_update_buffer,
1253 };
1254 
1255 static const struct coresight_ops arm_trbe_cs_ops = {
1256 	.pm_save_disable	= arm_trbe_save,
1257 	.pm_restore_enable	= arm_trbe_restore,
1258 	.sink_ops		= &arm_trbe_sink_ops,
1259 };
1260 
1261 static ssize_t align_show(struct device *dev, struct device_attribute *attr, char *buf)
1262 {
1263 	struct trbe_cpudata *cpudata = dev_get_drvdata(dev);
1264 
1265 	return sprintf(buf, "%llx\n", cpudata->trbe_hw_align);
1266 }
1267 static DEVICE_ATTR_RO(align);
1268 
1269 static ssize_t flag_show(struct device *dev, struct device_attribute *attr, char *buf)
1270 {
1271 	struct trbe_cpudata *cpudata = dev_get_drvdata(dev);
1272 
1273 	return sprintf(buf, "%d\n", cpudata->trbe_flag);
1274 }
1275 static DEVICE_ATTR_RO(flag);
1276 
1277 static struct attribute *arm_trbe_attrs[] = {
1278 	&dev_attr_align.attr,
1279 	&dev_attr_flag.attr,
1280 	NULL,
1281 };
1282 
1283 static const struct attribute_group arm_trbe_group = {
1284 	.attrs = arm_trbe_attrs,
1285 };
1286 
1287 static const struct attribute_group *arm_trbe_groups[] = {
1288 	&arm_trbe_group,
1289 	NULL,
1290 };
1291 
1292 static void arm_trbe_enable_cpu(void *info)
1293 {
1294 	struct trbe_drvdata *drvdata = info;
1295 	struct trbe_cpudata *cpudata = this_cpu_ptr(drvdata->cpudata);
1296 
1297 	trbe_reset_local(cpudata);
1298 	enable_percpu_irq(drvdata->irq, IRQ_TYPE_NONE);
1299 }
1300 
1301 static void arm_trbe_disable_cpu(void *info)
1302 {
1303 	struct trbe_drvdata *drvdata = info;
1304 	struct trbe_cpudata *cpudata = this_cpu_ptr(drvdata->cpudata);
1305 
1306 	disable_percpu_irq(drvdata->irq);
1307 	trbe_reset_local(cpudata);
1308 }
1309 
1310 
1311 static void arm_trbe_register_coresight_cpu(struct trbe_drvdata *drvdata, int cpu)
1312 {
1313 	struct trbe_cpudata *cpudata = per_cpu_ptr(drvdata->cpudata, cpu);
1314 	struct coresight_device *trbe_csdev = coresight_get_percpu_sink(cpu);
1315 	struct coresight_desc desc = { 0 };
1316 	struct device *dev;
1317 
1318 	if (WARN_ON(trbe_csdev))
1319 		return;
1320 
1321 	/* If the TRBE was not probed on the CPU, we shouldn't be here */
1322 	if (WARN_ON(!cpudata->drvdata))
1323 		return;
1324 
1325 	dev = &cpudata->drvdata->pdev->dev;
1326 	desc.name = devm_kasprintf(dev, GFP_KERNEL, "trbe%d", cpu);
1327 	if (!desc.name)
1328 		goto cpu_clear;
1329 	/*
1330 	 * TRBE coresight devices do not need regular connections
1331 	 * information, as the paths get built between all percpu
1332 	 * source and their respective percpu sink devices. Though
1333 	 * coresight_register() expect device connections via the
1334 	 * platform_data, which TRBE devices do not have. As they
1335 	 * are not real ACPI devices, coresight_get_platform_data()
1336 	 * ends up failing. Instead let's allocate a dummy zeroed
1337 	 * coresight_platform_data structure and assign that back
1338 	 * into the device for that purpose.
1339 	 */
1340 	desc.pdata = devm_kzalloc(dev, sizeof(*desc.pdata), GFP_KERNEL);
1341 	if (!desc.pdata)
1342 		goto cpu_clear;
1343 
1344 	desc.type = CORESIGHT_DEV_TYPE_SINK;
1345 	desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_PERCPU_SYSMEM;
1346 	desc.ops = &arm_trbe_cs_ops;
1347 	desc.groups = arm_trbe_groups;
1348 	desc.dev = dev;
1349 	desc.cpu = cpu;
1350 	desc.flags = CORESIGHT_DESC_CPU_BOUND;
1351 	trbe_csdev = coresight_register(&desc);
1352 	if (IS_ERR(trbe_csdev))
1353 		goto cpu_clear;
1354 
1355 	dev_set_drvdata(&trbe_csdev->dev, cpudata);
1356 	coresight_set_percpu_sink(cpu, trbe_csdev);
1357 	return;
1358 cpu_clear:
1359 	cpumask_clear_cpu(cpu, &drvdata->supported_cpus);
1360 }
1361 
1362 /*
1363  * Must be called with preemption disabled, for trbe_check_errata().
1364  */
1365 static void arm_trbe_probe_cpu(void *info)
1366 {
1367 	struct trbe_drvdata *drvdata = info;
1368 	int cpu = smp_processor_id();
1369 	struct trbe_cpudata *cpudata = per_cpu_ptr(drvdata->cpudata, cpu);
1370 	u64 trbidr;
1371 
1372 	if (WARN_ON(!cpudata))
1373 		goto cpu_clear;
1374 
1375 	if (!is_trbe_available()) {
1376 		pr_err("TRBE is not implemented on cpu %d\n", cpu);
1377 		goto cpu_clear;
1378 	}
1379 
1380 	trbidr = read_sysreg_s(SYS_TRBIDR_EL1);
1381 	if (!is_trbe_programmable(trbidr)) {
1382 		pr_err("TRBE is owned in higher exception level on cpu %d\n", cpu);
1383 		goto cpu_clear;
1384 	}
1385 
1386 	cpudata->trbe_hw_align = 1ULL << get_trbe_address_align(trbidr);
1387 	if (cpudata->trbe_hw_align > SZ_2K) {
1388 		pr_err("Unsupported alignment on cpu %d\n", cpu);
1389 		goto cpu_clear;
1390 	}
1391 
1392 	/*
1393 	 * Run the TRBE erratum checks, now that we know
1394 	 * this instance is about to be registered.
1395 	 */
1396 	trbe_check_errata(cpudata);
1397 
1398 	if (trbe_is_broken(cpudata)) {
1399 		pr_err("Disabling TRBE on cpu%d due to erratum\n", cpu);
1400 		goto cpu_clear;
1401 	}
1402 
1403 	/*
1404 	 * If the TRBE is affected by erratum TRBE_WORKAROUND_OVERWRITE_FILL_MODE,
1405 	 * we must always program the TBRPTR_EL1, 256bytes from a page
1406 	 * boundary, with TRBBASER_EL1 set to the page, to prevent
1407 	 * TRBE over-writing 256bytes at TRBBASER_EL1 on FILL event.
1408 	 *
1409 	 * Thus make sure we always align our write pointer to a PAGE_SIZE,
1410 	 * which also guarantees that we have at least a PAGE_SIZE space in
1411 	 * the buffer (TRBLIMITR is PAGE aligned) and thus we can skip
1412 	 * the required bytes at the base.
1413 	 */
1414 	if (trbe_may_overwrite_in_fill_mode(cpudata))
1415 		cpudata->trbe_align = PAGE_SIZE;
1416 	else
1417 		cpudata->trbe_align = cpudata->trbe_hw_align;
1418 
1419 	cpudata->trbe_flag = get_trbe_flag_update(trbidr);
1420 	cpudata->cpu = cpu;
1421 	cpudata->drvdata = drvdata;
1422 	return;
1423 cpu_clear:
1424 	cpumask_clear_cpu(cpu, &drvdata->supported_cpus);
1425 }
1426 
1427 static void arm_trbe_remove_coresight_cpu(struct trbe_drvdata *drvdata, int cpu)
1428 {
1429 	struct coresight_device *trbe_csdev = coresight_get_percpu_sink(cpu);
1430 
1431 	if (trbe_csdev) {
1432 		coresight_unregister(trbe_csdev);
1433 		coresight_set_percpu_sink(cpu, NULL);
1434 	}
1435 }
1436 
1437 static int arm_trbe_probe_coresight(struct trbe_drvdata *drvdata)
1438 {
1439 	int cpu;
1440 
1441 	drvdata->cpudata = alloc_percpu(typeof(*drvdata->cpudata));
1442 	if (!drvdata->cpudata)
1443 		return -ENOMEM;
1444 
1445 	for_each_cpu(cpu, &drvdata->supported_cpus) {
1446 		/* If we fail to probe the CPU, let us defer it to hotplug callbacks */
1447 		if (smp_call_function_single(cpu, arm_trbe_probe_cpu, drvdata, 1))
1448 			continue;
1449 		if (cpumask_test_cpu(cpu, &drvdata->supported_cpus))
1450 			arm_trbe_register_coresight_cpu(drvdata, cpu);
1451 		if (cpumask_test_cpu(cpu, &drvdata->supported_cpus))
1452 			smp_call_function_single(cpu, arm_trbe_enable_cpu, drvdata, 1);
1453 	}
1454 	return 0;
1455 }
1456 
1457 static int arm_trbe_remove_coresight(struct trbe_drvdata *drvdata)
1458 {
1459 	int cpu;
1460 
1461 	for_each_cpu(cpu, &drvdata->supported_cpus) {
1462 		smp_call_function_single(cpu, arm_trbe_disable_cpu, drvdata, 1);
1463 		arm_trbe_remove_coresight_cpu(drvdata, cpu);
1464 	}
1465 	free_percpu(drvdata->cpudata);
1466 	return 0;
1467 }
1468 
1469 static void arm_trbe_probe_hotplugged_cpu(struct trbe_drvdata *drvdata)
1470 {
1471 	preempt_disable();
1472 	arm_trbe_probe_cpu(drvdata);
1473 	preempt_enable();
1474 }
1475 
1476 static int arm_trbe_cpu_startup(unsigned int cpu, struct hlist_node *node)
1477 {
1478 	struct trbe_drvdata *drvdata = hlist_entry_safe(node, struct trbe_drvdata, hotplug_node);
1479 
1480 	if (cpumask_test_cpu(cpu, &drvdata->supported_cpus)) {
1481 
1482 		/*
1483 		 * If this CPU was not probed for TRBE,
1484 		 * initialize it now.
1485 		 */
1486 		if (!coresight_get_percpu_sink(cpu)) {
1487 			arm_trbe_probe_hotplugged_cpu(drvdata);
1488 			if (cpumask_test_cpu(cpu, &drvdata->supported_cpus))
1489 				arm_trbe_register_coresight_cpu(drvdata, cpu);
1490 			if (cpumask_test_cpu(cpu, &drvdata->supported_cpus))
1491 				arm_trbe_enable_cpu(drvdata);
1492 		} else {
1493 			arm_trbe_enable_cpu(drvdata);
1494 		}
1495 	}
1496 	return 0;
1497 }
1498 
1499 static int arm_trbe_cpu_teardown(unsigned int cpu, struct hlist_node *node)
1500 {
1501 	struct trbe_drvdata *drvdata = hlist_entry_safe(node, struct trbe_drvdata, hotplug_node);
1502 
1503 	if (cpumask_test_cpu(cpu, &drvdata->supported_cpus))
1504 		arm_trbe_disable_cpu(drvdata);
1505 	return 0;
1506 }
1507 
1508 static int arm_trbe_probe_cpuhp(struct trbe_drvdata *drvdata)
1509 {
1510 	enum cpuhp_state trbe_online;
1511 	int ret;
1512 
1513 	trbe_online = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DRVNAME,
1514 					      arm_trbe_cpu_startup, arm_trbe_cpu_teardown);
1515 	if (trbe_online < 0)
1516 		return trbe_online;
1517 
1518 	ret = cpuhp_state_add_instance(trbe_online, &drvdata->hotplug_node);
1519 	if (ret) {
1520 		cpuhp_remove_multi_state(trbe_online);
1521 		return ret;
1522 	}
1523 	drvdata->trbe_online = trbe_online;
1524 	return 0;
1525 }
1526 
1527 static void arm_trbe_remove_cpuhp(struct trbe_drvdata *drvdata)
1528 {
1529 	cpuhp_state_remove_instance(drvdata->trbe_online, &drvdata->hotplug_node);
1530 	cpuhp_remove_multi_state(drvdata->trbe_online);
1531 }
1532 
1533 static int arm_trbe_probe_irq(struct platform_device *pdev,
1534 			      struct trbe_drvdata *drvdata)
1535 {
1536 	const struct cpumask *affinity;
1537 	int ret;
1538 
1539 	drvdata->irq = platform_get_irq_affinity(pdev, 0, &affinity);
1540 	if (drvdata->irq < 0) {
1541 		pr_err("IRQ not found for the platform device\n");
1542 		return drvdata->irq;
1543 	}
1544 
1545 	if (!irq_is_percpu(drvdata->irq)) {
1546 		pr_err("IRQ is not a PPI\n");
1547 		return -EINVAL;
1548 	}
1549 
1550 	cpumask_copy(&drvdata->supported_cpus, affinity);
1551 
1552 	drvdata->handle = alloc_percpu(struct perf_output_handle *);
1553 	if (!drvdata->handle)
1554 		return -ENOMEM;
1555 
1556 	ret = request_percpu_irq_affinity(drvdata->irq, arm_trbe_irq_handler, DRVNAME,
1557 					  affinity, drvdata->handle);
1558 	if (ret) {
1559 		free_percpu(drvdata->handle);
1560 		return ret;
1561 	}
1562 	return 0;
1563 }
1564 
1565 static void arm_trbe_remove_irq(struct trbe_drvdata *drvdata)
1566 {
1567 	free_percpu_irq(drvdata->irq, drvdata->handle);
1568 	free_percpu(drvdata->handle);
1569 }
1570 
1571 static int arm_trbe_device_probe(struct platform_device *pdev)
1572 {
1573 	struct trbe_drvdata *drvdata;
1574 	struct device *dev = &pdev->dev;
1575 	int ret;
1576 
1577 	/* Trace capture is not possible with kernel page table isolation */
1578 	if (arm64_kernel_unmapped_at_el0()) {
1579 		pr_err("TRBE wouldn't work if kernel gets unmapped at EL0\n");
1580 		return -EOPNOTSUPP;
1581 	}
1582 
1583 	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
1584 	if (!drvdata)
1585 		return -ENOMEM;
1586 
1587 	dev_set_drvdata(dev, drvdata);
1588 	drvdata->pdev = pdev;
1589 	ret = arm_trbe_probe_irq(pdev, drvdata);
1590 	if (ret)
1591 		return ret;
1592 
1593 	ret = arm_trbe_probe_coresight(drvdata);
1594 	if (ret)
1595 		goto probe_failed;
1596 
1597 	ret = arm_trbe_probe_cpuhp(drvdata);
1598 	if (ret)
1599 		goto cpuhp_failed;
1600 
1601 	return 0;
1602 cpuhp_failed:
1603 	arm_trbe_remove_coresight(drvdata);
1604 probe_failed:
1605 	arm_trbe_remove_irq(drvdata);
1606 	return ret;
1607 }
1608 
1609 static void arm_trbe_device_remove(struct platform_device *pdev)
1610 {
1611 	struct trbe_drvdata *drvdata = platform_get_drvdata(pdev);
1612 
1613 	arm_trbe_remove_cpuhp(drvdata);
1614 	arm_trbe_remove_coresight(drvdata);
1615 	arm_trbe_remove_irq(drvdata);
1616 }
1617 
1618 static const struct of_device_id arm_trbe_of_match[] = {
1619 	{ .compatible = "arm,trace-buffer-extension"},
1620 	{},
1621 };
1622 MODULE_DEVICE_TABLE(of, arm_trbe_of_match);
1623 
1624 #ifdef CONFIG_ACPI
1625 static const struct platform_device_id arm_trbe_acpi_match[] = {
1626 	{ ARMV8_TRBE_PDEV_NAME, 0 },
1627 	{ }
1628 };
1629 MODULE_DEVICE_TABLE(platform, arm_trbe_acpi_match);
1630 #endif
1631 
1632 static struct platform_driver arm_trbe_driver = {
1633 	.id_table = ACPI_PTR(arm_trbe_acpi_match),
1634 	.driver	= {
1635 		.name = DRVNAME,
1636 		.of_match_table = of_match_ptr(arm_trbe_of_match),
1637 		.suppress_bind_attrs = true,
1638 	},
1639 	.probe	= arm_trbe_device_probe,
1640 	.remove = arm_trbe_device_remove,
1641 };
1642 
1643 static int __init arm_trbe_init(void)
1644 {
1645 	int ret;
1646 
1647 	ret = platform_driver_register(&arm_trbe_driver);
1648 	if (!ret)
1649 		return 0;
1650 
1651 	pr_err("Error registering %s platform driver\n", DRVNAME);
1652 	return ret;
1653 }
1654 
1655 static void __exit arm_trbe_exit(void)
1656 {
1657 	platform_driver_unregister(&arm_trbe_driver);
1658 }
1659 module_init(arm_trbe_init);
1660 module_exit(arm_trbe_exit);
1661 
1662 MODULE_AUTHOR("Anshuman Khandual <anshuman.khandual@arm.com>");
1663 MODULE_DESCRIPTION("Arm Trace Buffer Extension (TRBE) driver");
1664 MODULE_LICENSE("GPL v2");
1665