xref: /linux/tools/perf/util/cs-etm.c (revision 5d83b9cbe7cf40746948a419c5018f4d617a86fa)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright(C) 2015-2018 Linaro Limited.
4  *
5  * Author: Tor Jeremiassen <tor@ti.com>
6  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
7  */
8 
9 #include <linux/kernel.h>
10 #include <linux/bitfield.h>
11 #include <linux/bitops.h>
12 #include <linux/coresight-pmu.h>
13 #include <linux/err.h>
14 #include <linux/log2.h>
15 #include <linux/types.h>
16 #include <linux/zalloc.h>
17 
18 #include <stdlib.h>
19 
20 #include "auxtrace.h"
21 #include "color.h"
22 #include "cs-etm.h"
23 #include "cs-etm-decoder/cs-etm-decoder.h"
24 #include "debug.h"
25 #include "dso.h"
26 #include "evlist.h"
27 #include "intlist.h"
28 #include "machine.h"
29 #include "map.h"
30 #include "perf.h"
31 #include "session.h"
32 #include "map_symbol.h"
33 #include "branch.h"
34 #include "symbol.h"
35 #include "tool.h"
36 #include "thread.h"
37 #include "thread-stack.h"
38 #include "tsc.h"
39 #include <tools/libc_compat.h>
40 #include "util/synthetic-events.h"
41 #include "util/util.h"
42 
43 struct cs_etm_auxtrace {
44 	struct auxtrace auxtrace;
45 	struct auxtrace_queues queues;
46 	struct auxtrace_heap heap;
47 	struct itrace_synth_opts synth_opts;
48 	struct perf_session *session;
49 	struct perf_tsc_conversion tc;
50 
51 	/*
52 	 * Timeless has no timestamps in the trace so overlapping mmap lookups
53 	 * are less accurate but produces smaller trace data. We use context IDs
54 	 * in the trace instead of matching timestamps with fork records so
55 	 * they're not really needed in the general case. Overlapping mmaps
56 	 * happen in cases like between a fork and an exec.
57 	 */
58 	bool timeless_decoding;
59 
60 	/*
61 	 * Per-thread ignores the trace channel ID and instead assumes that
62 	 * everything in a buffer comes from the same process regardless of
63 	 * which CPU it ran on. It also implies no context IDs so the TID is
64 	 * taken from the auxtrace buffer.
65 	 */
66 	bool per_thread_decoding;
67 	bool snapshot_mode;
68 	bool data_queued;
69 	bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
70 
71 	int num_cpu;
72 	u64 latest_kernel_timestamp;
73 	u32 auxtrace_type;
74 	u64 branches_sample_type;
75 	u64 branches_id;
76 	u64 instructions_sample_type;
77 	u64 instructions_sample_period;
78 	u64 instructions_id;
79 	u64 **metadata;
80 	unsigned int pmu_type;
81 	enum cs_etm_pid_fmt pid_fmt;
82 };
83 
84 struct cs_etm_traceid_queue {
85 	u8 trace_chan_id;
86 	u64 period_instructions;
87 	size_t last_branch_pos;
88 	union perf_event *event_buf;
89 	struct thread *thread;
90 	struct thread *prev_packet_thread;
91 	ocsd_ex_level prev_packet_el;
92 	ocsd_ex_level el;
93 	struct branch_stack *last_branch;
94 	struct branch_stack *last_branch_rb;
95 	struct cs_etm_packet *prev_packet;
96 	struct cs_etm_packet *packet;
97 	struct cs_etm_packet_queue packet_queue;
98 };
99 
100 struct cs_etm_queue {
101 	struct cs_etm_auxtrace *etm;
102 	struct cs_etm_decoder *decoder;
103 	struct auxtrace_buffer *buffer;
104 	unsigned int queue_nr;
105 	u8 pending_timestamp_chan_id;
106 	u64 offset;
107 	const unsigned char *buf;
108 	size_t buf_len, buf_used;
109 	/* Conversion between traceID and index in traceid_queues array */
110 	struct intlist *traceid_queues_list;
111 	struct cs_etm_traceid_queue **traceid_queues;
112 };
113 
114 /* RB tree for quick conversion between traceID and metadata pointers */
115 static struct intlist *traceid_list;
116 
117 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
118 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
119 					   pid_t tid);
120 static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
121 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
122 
123 /* PTMs ETMIDR [11:8] set to b0011 */
124 #define ETMIDR_PTM_VERSION 0x00000300
125 
126 /*
127  * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
128  * work with.  One option is to modify to auxtrace_heap_XYZ() API or simply
129  * encode the etm queue number as the upper 16 bit and the channel as
130  * the lower 16 bit.
131  */
132 #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id)	\
133 		      (queue_nr << 16 | trace_chan_id)
134 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
135 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
136 
137 static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
138 {
139 	etmidr &= ETMIDR_PTM_VERSION;
140 
141 	if (etmidr == ETMIDR_PTM_VERSION)
142 		return CS_ETM_PROTO_PTM;
143 
144 	return CS_ETM_PROTO_ETMV3;
145 }
146 
147 static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
148 {
149 	struct int_node *inode;
150 	u64 *metadata;
151 
152 	inode = intlist__find(traceid_list, trace_chan_id);
153 	if (!inode)
154 		return -EINVAL;
155 
156 	metadata = inode->priv;
157 	*magic = metadata[CS_ETM_MAGIC];
158 	return 0;
159 }
160 
161 int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
162 {
163 	struct int_node *inode;
164 	u64 *metadata;
165 
166 	inode = intlist__find(traceid_list, trace_chan_id);
167 	if (!inode)
168 		return -EINVAL;
169 
170 	metadata = inode->priv;
171 	*cpu = (int)metadata[CS_ETM_CPU];
172 	return 0;
173 }
174 
175 /*
176  * The returned PID format is presented as an enum:
177  *
178  *   CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced.
179  *   CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced.
180  *   CS_ETM_PIDFMT_NONE: No context IDs
181  *
182  * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
183  * are enabled at the same time when the session runs on an EL2 kernel.
184  * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
185  * recorded in the trace data, the tool will selectively use
186  * CONTEXTIDR_EL2 as PID.
187  *
188  * The result is cached in etm->pid_fmt so this function only needs to be called
189  * when processing the aux info.
190  */
191 static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata)
192 {
193 	u64 val;
194 
195 	if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
196 		val = metadata[CS_ETM_ETMCR];
197 		/* CONTEXTIDR is traced */
198 		if (val & BIT(ETM_OPT_CTXTID))
199 			return CS_ETM_PIDFMT_CTXTID;
200 	} else {
201 		val = metadata[CS_ETMV4_TRCCONFIGR];
202 		/* CONTEXTIDR_EL2 is traced */
203 		if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
204 			return CS_ETM_PIDFMT_CTXTID2;
205 		/* CONTEXTIDR_EL1 is traced */
206 		else if (val & BIT(ETM4_CFG_BIT_CTXTID))
207 			return CS_ETM_PIDFMT_CTXTID;
208 	}
209 
210 	return CS_ETM_PIDFMT_NONE;
211 }
212 
213 enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
214 {
215 	return etmq->etm->pid_fmt;
216 }
217 
218 static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
219 {
220 	struct int_node *inode;
221 
222 	/* Get an RB node for this CPU */
223 	inode = intlist__findnew(traceid_list, trace_chan_id);
224 
225 	/* Something went wrong, no need to continue */
226 	if (!inode)
227 		return -ENOMEM;
228 
229 	/*
230 	 * The node for that CPU should not be taken.
231 	 * Back out if that's the case.
232 	 */
233 	if (inode->priv)
234 		return -EINVAL;
235 
236 	/* All good, associate the traceID with the metadata pointer */
237 	inode->priv = cpu_metadata;
238 
239 	return 0;
240 }
241 
242 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
243 {
244 	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
245 
246 	switch (cs_etm_magic) {
247 	case __perf_cs_etmv3_magic:
248 		*trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
249 				      CORESIGHT_TRACE_ID_VAL_MASK);
250 		break;
251 	case __perf_cs_etmv4_magic:
252 	case __perf_cs_ete_magic:
253 		*trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
254 				      CORESIGHT_TRACE_ID_VAL_MASK);
255 		break;
256 	default:
257 		return -EINVAL;
258 	}
259 	return 0;
260 }
261 
262 /*
263  * update metadata trace ID from the value found in the AUX_HW_INFO packet.
264  * This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present.
265  */
266 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
267 {
268 	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
269 
270 	switch (cs_etm_magic) {
271 	case __perf_cs_etmv3_magic:
272 		 cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id;
273 		break;
274 	case __perf_cs_etmv4_magic:
275 	case __perf_cs_ete_magic:
276 		cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id;
277 		break;
278 
279 	default:
280 		return -EINVAL;
281 	}
282 	return 0;
283 }
284 
285 /*
286  * Get a metadata index for a specific cpu from an array.
287  *
288  */
289 static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu)
290 {
291 	int i;
292 
293 	for (i = 0; i < etm->num_cpu; i++) {
294 		if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) {
295 			return i;
296 		}
297 	}
298 
299 	return -1;
300 }
301 
302 /*
303  * Get a metadata for a specific cpu from an array.
304  *
305  */
306 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu)
307 {
308 	int idx = get_cpu_data_idx(etm, cpu);
309 
310 	return (idx != -1) ? etm->metadata[idx] : NULL;
311 }
312 
313 /*
314  * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
315  *
316  * The payload associates the Trace ID and the CPU.
317  * The routine is tolerant of seeing multiple packets with the same association,
318  * but a CPU / Trace ID association changing during a session is an error.
319  */
320 static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
321 					    union perf_event *event)
322 {
323 	struct cs_etm_auxtrace *etm;
324 	struct perf_sample sample;
325 	struct int_node *inode;
326 	struct evsel *evsel;
327 	u64 *cpu_data;
328 	u64 hw_id;
329 	int cpu, version, err;
330 	u8 trace_chan_id, curr_chan_id;
331 
332 	/* extract and parse the HW ID */
333 	hw_id = event->aux_output_hw_id.hw_id;
334 	version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
335 	trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
336 
337 	/* check that we can handle this version */
338 	if (version > CS_AUX_HW_ID_CURR_VERSION)
339 		return -EINVAL;
340 
341 	/* get access to the etm metadata */
342 	etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
343 	if (!etm || !etm->metadata)
344 		return -EINVAL;
345 
346 	/* parse the sample to get the CPU */
347 	evsel = evlist__event2evsel(session->evlist, event);
348 	if (!evsel)
349 		return -EINVAL;
350 	err = evsel__parse_sample(evsel, event, &sample);
351 	if (err)
352 		return err;
353 	cpu = sample.cpu;
354 	if (cpu == -1) {
355 		/* no CPU in the sample - possibly recorded with an old version of perf */
356 		pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
357 		return -EINVAL;
358 	}
359 
360 	/* See if the ID is mapped to a CPU, and it matches the current CPU */
361 	inode = intlist__find(traceid_list, trace_chan_id);
362 	if (inode) {
363 		cpu_data = inode->priv;
364 		if ((int)cpu_data[CS_ETM_CPU] != cpu) {
365 			pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
366 			return -EINVAL;
367 		}
368 
369 		/* check that the mapped ID matches */
370 		err = cs_etm__metadata_get_trace_id(&curr_chan_id, cpu_data);
371 		if (err)
372 			return err;
373 		if (curr_chan_id != trace_chan_id) {
374 			pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
375 			return -EINVAL;
376 		}
377 
378 		/* mapped and matched - return OK */
379 		return 0;
380 	}
381 
382 	cpu_data = get_cpu_data(etm, cpu);
383 	if (cpu_data == NULL)
384 		return err;
385 
386 	/* not one we've seen before - lets map it */
387 	err = cs_etm__map_trace_id(trace_chan_id, cpu_data);
388 	if (err)
389 		return err;
390 
391 	/*
392 	 * if we are picking up the association from the packet, need to plug
393 	 * the correct trace ID into the metadata for setting up decoders later.
394 	 */
395 	err = cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
396 	return err;
397 }
398 
399 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
400 					      u8 trace_chan_id)
401 {
402 	/*
403 	 * When a timestamp packet is encountered the backend code
404 	 * is stopped so that the front end has time to process packets
405 	 * that were accumulated in the traceID queue.  Since there can
406 	 * be more than one channel per cs_etm_queue, we need to specify
407 	 * what traceID queue needs servicing.
408 	 */
409 	etmq->pending_timestamp_chan_id = trace_chan_id;
410 }
411 
412 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
413 				      u8 *trace_chan_id)
414 {
415 	struct cs_etm_packet_queue *packet_queue;
416 
417 	if (!etmq->pending_timestamp_chan_id)
418 		return 0;
419 
420 	if (trace_chan_id)
421 		*trace_chan_id = etmq->pending_timestamp_chan_id;
422 
423 	packet_queue = cs_etm__etmq_get_packet_queue(etmq,
424 						     etmq->pending_timestamp_chan_id);
425 	if (!packet_queue)
426 		return 0;
427 
428 	/* Acknowledge pending status */
429 	etmq->pending_timestamp_chan_id = 0;
430 
431 	/* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
432 	return packet_queue->cs_timestamp;
433 }
434 
435 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
436 {
437 	int i;
438 
439 	queue->head = 0;
440 	queue->tail = 0;
441 	queue->packet_count = 0;
442 	for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
443 		queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
444 		queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
445 		queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
446 		queue->packet_buffer[i].instr_count = 0;
447 		queue->packet_buffer[i].last_instr_taken_branch = false;
448 		queue->packet_buffer[i].last_instr_size = 0;
449 		queue->packet_buffer[i].last_instr_type = 0;
450 		queue->packet_buffer[i].last_instr_subtype = 0;
451 		queue->packet_buffer[i].last_instr_cond = 0;
452 		queue->packet_buffer[i].flags = 0;
453 		queue->packet_buffer[i].exception_number = UINT32_MAX;
454 		queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
455 		queue->packet_buffer[i].cpu = INT_MIN;
456 	}
457 }
458 
459 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
460 {
461 	int idx;
462 	struct int_node *inode;
463 	struct cs_etm_traceid_queue *tidq;
464 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
465 
466 	intlist__for_each_entry(inode, traceid_queues_list) {
467 		idx = (int)(intptr_t)inode->priv;
468 		tidq = etmq->traceid_queues[idx];
469 		cs_etm__clear_packet_queue(&tidq->packet_queue);
470 	}
471 }
472 
473 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
474 				      struct cs_etm_traceid_queue *tidq,
475 				      u8 trace_chan_id)
476 {
477 	int rc = -ENOMEM;
478 	struct auxtrace_queue *queue;
479 	struct cs_etm_auxtrace *etm = etmq->etm;
480 
481 	cs_etm__clear_packet_queue(&tidq->packet_queue);
482 
483 	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
484 	tidq->trace_chan_id = trace_chan_id;
485 	tidq->el = tidq->prev_packet_el = ocsd_EL_unknown;
486 	tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
487 					       queue->tid);
488 	tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host);
489 
490 	tidq->packet = zalloc(sizeof(struct cs_etm_packet));
491 	if (!tidq->packet)
492 		goto out;
493 
494 	tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
495 	if (!tidq->prev_packet)
496 		goto out_free;
497 
498 	if (etm->synth_opts.last_branch) {
499 		size_t sz = sizeof(struct branch_stack);
500 
501 		sz += etm->synth_opts.last_branch_sz *
502 		      sizeof(struct branch_entry);
503 		tidq->last_branch = zalloc(sz);
504 		if (!tidq->last_branch)
505 			goto out_free;
506 		tidq->last_branch_rb = zalloc(sz);
507 		if (!tidq->last_branch_rb)
508 			goto out_free;
509 	}
510 
511 	tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
512 	if (!tidq->event_buf)
513 		goto out_free;
514 
515 	return 0;
516 
517 out_free:
518 	zfree(&tidq->last_branch_rb);
519 	zfree(&tidq->last_branch);
520 	zfree(&tidq->prev_packet);
521 	zfree(&tidq->packet);
522 out:
523 	return rc;
524 }
525 
526 static struct cs_etm_traceid_queue
527 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
528 {
529 	int idx;
530 	struct int_node *inode;
531 	struct intlist *traceid_queues_list;
532 	struct cs_etm_traceid_queue *tidq, **traceid_queues;
533 	struct cs_etm_auxtrace *etm = etmq->etm;
534 
535 	if (etm->per_thread_decoding)
536 		trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
537 
538 	traceid_queues_list = etmq->traceid_queues_list;
539 
540 	/*
541 	 * Check if the traceid_queue exist for this traceID by looking
542 	 * in the queue list.
543 	 */
544 	inode = intlist__find(traceid_queues_list, trace_chan_id);
545 	if (inode) {
546 		idx = (int)(intptr_t)inode->priv;
547 		return etmq->traceid_queues[idx];
548 	}
549 
550 	/* We couldn't find a traceid_queue for this traceID, allocate one */
551 	tidq = malloc(sizeof(*tidq));
552 	if (!tidq)
553 		return NULL;
554 
555 	memset(tidq, 0, sizeof(*tidq));
556 
557 	/* Get a valid index for the new traceid_queue */
558 	idx = intlist__nr_entries(traceid_queues_list);
559 	/* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
560 	inode = intlist__findnew(traceid_queues_list, trace_chan_id);
561 	if (!inode)
562 		goto out_free;
563 
564 	/* Associate this traceID with this index */
565 	inode->priv = (void *)(intptr_t)idx;
566 
567 	if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
568 		goto out_free;
569 
570 	/* Grow the traceid_queues array by one unit */
571 	traceid_queues = etmq->traceid_queues;
572 	traceid_queues = reallocarray(traceid_queues,
573 				      idx + 1,
574 				      sizeof(*traceid_queues));
575 
576 	/*
577 	 * On failure reallocarray() returns NULL and the original block of
578 	 * memory is left untouched.
579 	 */
580 	if (!traceid_queues)
581 		goto out_free;
582 
583 	traceid_queues[idx] = tidq;
584 	etmq->traceid_queues = traceid_queues;
585 
586 	return etmq->traceid_queues[idx];
587 
588 out_free:
589 	/*
590 	 * Function intlist__remove() removes the inode from the list
591 	 * and delete the memory associated to it.
592 	 */
593 	intlist__remove(traceid_queues_list, inode);
594 	free(tidq);
595 
596 	return NULL;
597 }
598 
599 struct cs_etm_packet_queue
600 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
601 {
602 	struct cs_etm_traceid_queue *tidq;
603 
604 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
605 	if (tidq)
606 		return &tidq->packet_queue;
607 
608 	return NULL;
609 }
610 
611 static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
612 				struct cs_etm_traceid_queue *tidq)
613 {
614 	struct cs_etm_packet *tmp;
615 
616 	if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
617 	    etm->synth_opts.instructions) {
618 		/*
619 		 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
620 		 * the next incoming packet.
621 		 *
622 		 * Threads and exception levels are also tracked for both the
623 		 * previous and current packets. This is because the previous
624 		 * packet is used for the 'from' IP for branch samples, so the
625 		 * thread at that time must also be assigned to that sample.
626 		 * Across discontinuity packets the thread can change, so by
627 		 * tracking the thread for the previous packet the branch sample
628 		 * will have the correct info.
629 		 */
630 		tmp = tidq->packet;
631 		tidq->packet = tidq->prev_packet;
632 		tidq->prev_packet = tmp;
633 		tidq->prev_packet_el = tidq->el;
634 		thread__put(tidq->prev_packet_thread);
635 		tidq->prev_packet_thread = thread__get(tidq->thread);
636 	}
637 }
638 
639 static void cs_etm__packet_dump(const char *pkt_string)
640 {
641 	const char *color = PERF_COLOR_BLUE;
642 	int len = strlen(pkt_string);
643 
644 	if (len && (pkt_string[len-1] == '\n'))
645 		color_fprintf(stdout, color, "	%s", pkt_string);
646 	else
647 		color_fprintf(stdout, color, "	%s\n", pkt_string);
648 
649 	fflush(stdout);
650 }
651 
652 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
653 					  struct cs_etm_auxtrace *etm, int t_idx,
654 					  int m_idx, u32 etmidr)
655 {
656 	u64 **metadata = etm->metadata;
657 
658 	t_params[t_idx].protocol = cs_etm__get_v7_protocol_version(etmidr);
659 	t_params[t_idx].etmv3.reg_ctrl = metadata[m_idx][CS_ETM_ETMCR];
660 	t_params[t_idx].etmv3.reg_trc_id = metadata[m_idx][CS_ETM_ETMTRACEIDR];
661 }
662 
663 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
664 					  struct cs_etm_auxtrace *etm, int t_idx,
665 					  int m_idx)
666 {
667 	u64 **metadata = etm->metadata;
668 
669 	t_params[t_idx].protocol = CS_ETM_PROTO_ETMV4i;
670 	t_params[t_idx].etmv4.reg_idr0 = metadata[m_idx][CS_ETMV4_TRCIDR0];
671 	t_params[t_idx].etmv4.reg_idr1 = metadata[m_idx][CS_ETMV4_TRCIDR1];
672 	t_params[t_idx].etmv4.reg_idr2 = metadata[m_idx][CS_ETMV4_TRCIDR2];
673 	t_params[t_idx].etmv4.reg_idr8 = metadata[m_idx][CS_ETMV4_TRCIDR8];
674 	t_params[t_idx].etmv4.reg_configr = metadata[m_idx][CS_ETMV4_TRCCONFIGR];
675 	t_params[t_idx].etmv4.reg_traceidr = metadata[m_idx][CS_ETMV4_TRCTRACEIDR];
676 }
677 
678 static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
679 					  struct cs_etm_auxtrace *etm, int t_idx,
680 					  int m_idx)
681 {
682 	u64 **metadata = etm->metadata;
683 
684 	t_params[t_idx].protocol = CS_ETM_PROTO_ETE;
685 	t_params[t_idx].ete.reg_idr0 = metadata[m_idx][CS_ETE_TRCIDR0];
686 	t_params[t_idx].ete.reg_idr1 = metadata[m_idx][CS_ETE_TRCIDR1];
687 	t_params[t_idx].ete.reg_idr2 = metadata[m_idx][CS_ETE_TRCIDR2];
688 	t_params[t_idx].ete.reg_idr8 = metadata[m_idx][CS_ETE_TRCIDR8];
689 	t_params[t_idx].ete.reg_configr = metadata[m_idx][CS_ETE_TRCCONFIGR];
690 	t_params[t_idx].ete.reg_traceidr = metadata[m_idx][CS_ETE_TRCTRACEIDR];
691 	t_params[t_idx].ete.reg_devarch = metadata[m_idx][CS_ETE_TRCDEVARCH];
692 }
693 
694 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
695 				     struct cs_etm_auxtrace *etm,
696 				     bool formatted,
697 				     int sample_cpu,
698 				     int decoders)
699 {
700 	int t_idx, m_idx;
701 	u32 etmidr;
702 	u64 architecture;
703 
704 	for (t_idx = 0; t_idx < decoders; t_idx++) {
705 		if (formatted)
706 			m_idx = t_idx;
707 		else {
708 			m_idx = get_cpu_data_idx(etm, sample_cpu);
709 			if (m_idx == -1) {
710 				pr_warning("CS_ETM: unknown CPU, falling back to first metadata\n");
711 				m_idx = 0;
712 			}
713 		}
714 
715 		architecture = etm->metadata[m_idx][CS_ETM_MAGIC];
716 
717 		switch (architecture) {
718 		case __perf_cs_etmv3_magic:
719 			etmidr = etm->metadata[m_idx][CS_ETM_ETMIDR];
720 			cs_etm__set_trace_param_etmv3(t_params, etm, t_idx, m_idx, etmidr);
721 			break;
722 		case __perf_cs_etmv4_magic:
723 			cs_etm__set_trace_param_etmv4(t_params, etm, t_idx, m_idx);
724 			break;
725 		case __perf_cs_ete_magic:
726 			cs_etm__set_trace_param_ete(t_params, etm, t_idx, m_idx);
727 			break;
728 		default:
729 			return -EINVAL;
730 		}
731 	}
732 
733 	return 0;
734 }
735 
736 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
737 				       struct cs_etm_queue *etmq,
738 				       enum cs_etm_decoder_operation mode,
739 				       bool formatted)
740 {
741 	int ret = -EINVAL;
742 
743 	if (!(mode < CS_ETM_OPERATION_MAX))
744 		goto out;
745 
746 	d_params->packet_printer = cs_etm__packet_dump;
747 	d_params->operation = mode;
748 	d_params->data = etmq;
749 	d_params->formatted = formatted;
750 	d_params->fsyncs = false;
751 	d_params->hsyncs = false;
752 	d_params->frame_aligned = true;
753 
754 	ret = 0;
755 out:
756 	return ret;
757 }
758 
759 static void cs_etm__dump_event(struct cs_etm_queue *etmq,
760 			       struct auxtrace_buffer *buffer)
761 {
762 	int ret;
763 	const char *color = PERF_COLOR_BLUE;
764 	size_t buffer_used = 0;
765 
766 	fprintf(stdout, "\n");
767 	color_fprintf(stdout, color,
768 		     ". ... CoreSight %s Trace data: size %#zx bytes\n",
769 		     cs_etm_decoder__get_name(etmq->decoder), buffer->size);
770 
771 	do {
772 		size_t consumed;
773 
774 		ret = cs_etm_decoder__process_data_block(
775 				etmq->decoder, buffer->offset,
776 				&((u8 *)buffer->data)[buffer_used],
777 				buffer->size - buffer_used, &consumed);
778 		if (ret)
779 			break;
780 
781 		buffer_used += consumed;
782 	} while (buffer_used < buffer->size);
783 
784 	cs_etm_decoder__reset(etmq->decoder);
785 }
786 
787 static int cs_etm__flush_events(struct perf_session *session,
788 				struct perf_tool *tool)
789 {
790 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
791 						   struct cs_etm_auxtrace,
792 						   auxtrace);
793 	if (dump_trace)
794 		return 0;
795 
796 	if (!tool->ordered_events)
797 		return -EINVAL;
798 
799 	if (etm->timeless_decoding) {
800 		/*
801 		 * Pass tid = -1 to process all queues. But likely they will have
802 		 * already been processed on PERF_RECORD_EXIT anyway.
803 		 */
804 		return cs_etm__process_timeless_queues(etm, -1);
805 	}
806 
807 	return cs_etm__process_timestamped_queues(etm);
808 }
809 
810 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
811 {
812 	int idx;
813 	uintptr_t priv;
814 	struct int_node *inode, *tmp;
815 	struct cs_etm_traceid_queue *tidq;
816 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
817 
818 	intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
819 		priv = (uintptr_t)inode->priv;
820 		idx = priv;
821 
822 		/* Free this traceid_queue from the array */
823 		tidq = etmq->traceid_queues[idx];
824 		thread__zput(tidq->thread);
825 		thread__zput(tidq->prev_packet_thread);
826 		zfree(&tidq->event_buf);
827 		zfree(&tidq->last_branch);
828 		zfree(&tidq->last_branch_rb);
829 		zfree(&tidq->prev_packet);
830 		zfree(&tidq->packet);
831 		zfree(&tidq);
832 
833 		/*
834 		 * Function intlist__remove() removes the inode from the list
835 		 * and delete the memory associated to it.
836 		 */
837 		intlist__remove(traceid_queues_list, inode);
838 	}
839 
840 	/* Then the RB tree itself */
841 	intlist__delete(traceid_queues_list);
842 	etmq->traceid_queues_list = NULL;
843 
844 	/* finally free the traceid_queues array */
845 	zfree(&etmq->traceid_queues);
846 }
847 
848 static void cs_etm__free_queue(void *priv)
849 {
850 	struct cs_etm_queue *etmq = priv;
851 
852 	if (!etmq)
853 		return;
854 
855 	cs_etm_decoder__free(etmq->decoder);
856 	cs_etm__free_traceid_queues(etmq);
857 	free(etmq);
858 }
859 
860 static void cs_etm__free_events(struct perf_session *session)
861 {
862 	unsigned int i;
863 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
864 						   struct cs_etm_auxtrace,
865 						   auxtrace);
866 	struct auxtrace_queues *queues = &aux->queues;
867 
868 	for (i = 0; i < queues->nr_queues; i++) {
869 		cs_etm__free_queue(queues->queue_array[i].priv);
870 		queues->queue_array[i].priv = NULL;
871 	}
872 
873 	auxtrace_queues__free(queues);
874 }
875 
876 static void cs_etm__free(struct perf_session *session)
877 {
878 	int i;
879 	struct int_node *inode, *tmp;
880 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
881 						   struct cs_etm_auxtrace,
882 						   auxtrace);
883 	cs_etm__free_events(session);
884 	session->auxtrace = NULL;
885 
886 	/* First remove all traceID/metadata nodes for the RB tree */
887 	intlist__for_each_entry_safe(inode, tmp, traceid_list)
888 		intlist__remove(traceid_list, inode);
889 	/* Then the RB tree itself */
890 	intlist__delete(traceid_list);
891 
892 	for (i = 0; i < aux->num_cpu; i++)
893 		zfree(&aux->metadata[i]);
894 
895 	zfree(&aux->metadata);
896 	zfree(&aux);
897 }
898 
899 static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
900 				      struct evsel *evsel)
901 {
902 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
903 						   struct cs_etm_auxtrace,
904 						   auxtrace);
905 
906 	return evsel->core.attr.type == aux->pmu_type;
907 }
908 
909 static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq,
910 					   ocsd_ex_level el)
911 {
912 	enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq);
913 
914 	/*
915 	 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels
916 	 * running at EL1 assume everything is the host.
917 	 */
918 	if (pid_fmt == CS_ETM_PIDFMT_CTXTID)
919 		return &etmq->etm->session->machines.host;
920 
921 	/*
922 	 * Not perfect, but otherwise assume anything in EL1 is the default
923 	 * guest, and everything else is the host. Distinguishing between guest
924 	 * and host userspaces isn't currently supported either. Neither is
925 	 * multiple guest support. All this does is reduce the likeliness of
926 	 * decode errors where we look into the host kernel maps when it should
927 	 * have been the guest maps.
928 	 */
929 	switch (el) {
930 	case ocsd_EL1:
931 		return machines__find_guest(&etmq->etm->session->machines,
932 					    DEFAULT_GUEST_KERNEL_ID);
933 	case ocsd_EL3:
934 	case ocsd_EL2:
935 	case ocsd_EL0:
936 	case ocsd_EL_unknown:
937 	default:
938 		return &etmq->etm->session->machines.host;
939 	}
940 }
941 
942 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address,
943 			   ocsd_ex_level el)
944 {
945 	struct machine *machine = cs_etm__get_machine(etmq, el);
946 
947 	if (address >= machine__kernel_start(machine)) {
948 		if (machine__is_host(machine))
949 			return PERF_RECORD_MISC_KERNEL;
950 		else
951 			return PERF_RECORD_MISC_GUEST_KERNEL;
952 	} else {
953 		if (machine__is_host(machine))
954 			return PERF_RECORD_MISC_USER;
955 		else {
956 			/*
957 			 * Can't really happen at the moment because
958 			 * cs_etm__get_machine() will always return
959 			 * machines.host for any non EL1 trace.
960 			 */
961 			return PERF_RECORD_MISC_GUEST_USER;
962 		}
963 	}
964 }
965 
966 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
967 			      u64 address, size_t size, u8 *buffer,
968 			      const ocsd_mem_space_acc_t mem_space)
969 {
970 	u8  cpumode;
971 	u64 offset;
972 	int len;
973 	struct addr_location al;
974 	struct dso *dso;
975 	struct cs_etm_traceid_queue *tidq;
976 	int ret = 0;
977 
978 	if (!etmq)
979 		return 0;
980 
981 	addr_location__init(&al);
982 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
983 	if (!tidq)
984 		goto out;
985 
986 	/*
987 	 * We've already tracked EL along side the PID in cs_etm__set_thread()
988 	 * so double check that it matches what OpenCSD thinks as well. It
989 	 * doesn't distinguish between EL0 and EL1 for this mem access callback
990 	 * so we had to do the extra tracking. Skip validation if it's any of
991 	 * the 'any' values.
992 	 */
993 	if (!(mem_space == OCSD_MEM_SPACE_ANY ||
994 	      mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) {
995 		if (mem_space & OCSD_MEM_SPACE_EL1N) {
996 			/* Includes both non secure EL1 and EL0 */
997 			assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0);
998 		} else if (mem_space & OCSD_MEM_SPACE_EL2)
999 			assert(tidq->el == ocsd_EL2);
1000 		else if (mem_space & OCSD_MEM_SPACE_EL3)
1001 			assert(tidq->el == ocsd_EL3);
1002 	}
1003 
1004 	cpumode = cs_etm__cpu_mode(etmq, address, tidq->el);
1005 
1006 	if (!thread__find_map(tidq->thread, cpumode, address, &al))
1007 		goto out;
1008 
1009 	dso = map__dso(al.map);
1010 	if (!dso)
1011 		goto out;
1012 
1013 	if (dso->data.status == DSO_DATA_STATUS_ERROR &&
1014 	    dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE))
1015 		goto out;
1016 
1017 	offset = map__map_ip(al.map, address);
1018 
1019 	map__load(al.map);
1020 
1021 	len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)),
1022 				    offset, buffer, size);
1023 
1024 	if (len <= 0) {
1025 		ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
1026 				 "              Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
1027 		if (!dso->auxtrace_warned) {
1028 			pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
1029 				    address,
1030 				    dso->long_name ? dso->long_name : "Unknown");
1031 			dso->auxtrace_warned = true;
1032 		}
1033 		goto out;
1034 	}
1035 	ret = len;
1036 out:
1037 	addr_location__exit(&al);
1038 	return ret;
1039 }
1040 
1041 static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
1042 						bool formatted, int sample_cpu)
1043 {
1044 	struct cs_etm_decoder_params d_params;
1045 	struct cs_etm_trace_params  *t_params = NULL;
1046 	struct cs_etm_queue *etmq;
1047 	/*
1048 	 * Each queue can only contain data from one CPU when unformatted, so only one decoder is
1049 	 * needed.
1050 	 */
1051 	int decoders = formatted ? etm->num_cpu : 1;
1052 
1053 	etmq = zalloc(sizeof(*etmq));
1054 	if (!etmq)
1055 		return NULL;
1056 
1057 	etmq->traceid_queues_list = intlist__new(NULL);
1058 	if (!etmq->traceid_queues_list)
1059 		goto out_free;
1060 
1061 	/* Use metadata to fill in trace parameters for trace decoder */
1062 	t_params = zalloc(sizeof(*t_params) * decoders);
1063 
1064 	if (!t_params)
1065 		goto out_free;
1066 
1067 	if (cs_etm__init_trace_params(t_params, etm, formatted, sample_cpu, decoders))
1068 		goto out_free;
1069 
1070 	/* Set decoder parameters to decode trace packets */
1071 	if (cs_etm__init_decoder_params(&d_params, etmq,
1072 					dump_trace ? CS_ETM_OPERATION_PRINT :
1073 						     CS_ETM_OPERATION_DECODE,
1074 					formatted))
1075 		goto out_free;
1076 
1077 	etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
1078 					    t_params);
1079 
1080 	if (!etmq->decoder)
1081 		goto out_free;
1082 
1083 	/*
1084 	 * Register a function to handle all memory accesses required by
1085 	 * the trace decoder library.
1086 	 */
1087 	if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
1088 					      0x0L, ((u64) -1L),
1089 					      cs_etm__mem_access))
1090 		goto out_free_decoder;
1091 
1092 	zfree(&t_params);
1093 	return etmq;
1094 
1095 out_free_decoder:
1096 	cs_etm_decoder__free(etmq->decoder);
1097 out_free:
1098 	intlist__delete(etmq->traceid_queues_list);
1099 	free(etmq);
1100 
1101 	return NULL;
1102 }
1103 
1104 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
1105 			       struct auxtrace_queue *queue,
1106 			       unsigned int queue_nr,
1107 			       bool formatted,
1108 			       int sample_cpu)
1109 {
1110 	struct cs_etm_queue *etmq = queue->priv;
1111 
1112 	if (list_empty(&queue->head) || etmq)
1113 		return 0;
1114 
1115 	etmq = cs_etm__alloc_queue(etm, formatted, sample_cpu);
1116 
1117 	if (!etmq)
1118 		return -ENOMEM;
1119 
1120 	queue->priv = etmq;
1121 	etmq->etm = etm;
1122 	etmq->queue_nr = queue_nr;
1123 	etmq->offset = 0;
1124 
1125 	return 0;
1126 }
1127 
1128 static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
1129 					    struct cs_etm_queue *etmq,
1130 					    unsigned int queue_nr)
1131 {
1132 	int ret = 0;
1133 	unsigned int cs_queue_nr;
1134 	u8 trace_chan_id;
1135 	u64 cs_timestamp;
1136 
1137 	/*
1138 	 * We are under a CPU-wide trace scenario.  As such we need to know
1139 	 * when the code that generated the traces started to execute so that
1140 	 * it can be correlated with execution on other CPUs.  So we get a
1141 	 * handle on the beginning of traces and decode until we find a
1142 	 * timestamp.  The timestamp is then added to the auxtrace min heap
1143 	 * in order to know what nibble (of all the etmqs) to decode first.
1144 	 */
1145 	while (1) {
1146 		/*
1147 		 * Fetch an aux_buffer from this etmq.  Bail if no more
1148 		 * blocks or an error has been encountered.
1149 		 */
1150 		ret = cs_etm__get_data_block(etmq);
1151 		if (ret <= 0)
1152 			goto out;
1153 
1154 		/*
1155 		 * Run decoder on the trace block.  The decoder will stop when
1156 		 * encountering a CS timestamp, a full packet queue or the end of
1157 		 * trace for that block.
1158 		 */
1159 		ret = cs_etm__decode_data_block(etmq);
1160 		if (ret)
1161 			goto out;
1162 
1163 		/*
1164 		 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
1165 		 * the timestamp calculation for us.
1166 		 */
1167 		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
1168 
1169 		/* We found a timestamp, no need to continue. */
1170 		if (cs_timestamp)
1171 			break;
1172 
1173 		/*
1174 		 * We didn't find a timestamp so empty all the traceid packet
1175 		 * queues before looking for another timestamp packet, either
1176 		 * in the current data block or a new one.  Packets that were
1177 		 * just decoded are useless since no timestamp has been
1178 		 * associated with them.  As such simply discard them.
1179 		 */
1180 		cs_etm__clear_all_packet_queues(etmq);
1181 	}
1182 
1183 	/*
1184 	 * We have a timestamp.  Add it to the min heap to reflect when
1185 	 * instructions conveyed by the range packets of this traceID queue
1186 	 * started to execute.  Once the same has been done for all the traceID
1187 	 * queues of each etmq, redenring and decoding can start in
1188 	 * chronological order.
1189 	 *
1190 	 * Note that packets decoded above are still in the traceID's packet
1191 	 * queue and will be processed in cs_etm__process_timestamped_queues().
1192 	 */
1193 	cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
1194 	ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
1195 out:
1196 	return ret;
1197 }
1198 
1199 static inline
1200 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
1201 				 struct cs_etm_traceid_queue *tidq)
1202 {
1203 	struct branch_stack *bs_src = tidq->last_branch_rb;
1204 	struct branch_stack *bs_dst = tidq->last_branch;
1205 	size_t nr = 0;
1206 
1207 	/*
1208 	 * Set the number of records before early exit: ->nr is used to
1209 	 * determine how many branches to copy from ->entries.
1210 	 */
1211 	bs_dst->nr = bs_src->nr;
1212 
1213 	/*
1214 	 * Early exit when there is nothing to copy.
1215 	 */
1216 	if (!bs_src->nr)
1217 		return;
1218 
1219 	/*
1220 	 * As bs_src->entries is a circular buffer, we need to copy from it in
1221 	 * two steps.  First, copy the branches from the most recently inserted
1222 	 * branch ->last_branch_pos until the end of bs_src->entries buffer.
1223 	 */
1224 	nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
1225 	memcpy(&bs_dst->entries[0],
1226 	       &bs_src->entries[tidq->last_branch_pos],
1227 	       sizeof(struct branch_entry) * nr);
1228 
1229 	/*
1230 	 * If we wrapped around at least once, the branches from the beginning
1231 	 * of the bs_src->entries buffer and until the ->last_branch_pos element
1232 	 * are older valid branches: copy them over.  The total number of
1233 	 * branches copied over will be equal to the number of branches asked by
1234 	 * the user in last_branch_sz.
1235 	 */
1236 	if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
1237 		memcpy(&bs_dst->entries[nr],
1238 		       &bs_src->entries[0],
1239 		       sizeof(struct branch_entry) * tidq->last_branch_pos);
1240 	}
1241 }
1242 
1243 static inline
1244 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
1245 {
1246 	tidq->last_branch_pos = 0;
1247 	tidq->last_branch_rb->nr = 0;
1248 }
1249 
1250 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
1251 					 u8 trace_chan_id, u64 addr)
1252 {
1253 	u8 instrBytes[2];
1254 
1255 	cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes),
1256 			   instrBytes, 0);
1257 	/*
1258 	 * T32 instruction size is indicated by bits[15:11] of the first
1259 	 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
1260 	 * denote a 32-bit instruction.
1261 	 */
1262 	return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
1263 }
1264 
1265 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
1266 {
1267 	/* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1268 	if (packet->sample_type == CS_ETM_DISCONTINUITY)
1269 		return 0;
1270 
1271 	return packet->start_addr;
1272 }
1273 
1274 static inline
1275 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
1276 {
1277 	/* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1278 	if (packet->sample_type == CS_ETM_DISCONTINUITY)
1279 		return 0;
1280 
1281 	return packet->end_addr - packet->last_instr_size;
1282 }
1283 
1284 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
1285 				     u64 trace_chan_id,
1286 				     const struct cs_etm_packet *packet,
1287 				     u64 offset)
1288 {
1289 	if (packet->isa == CS_ETM_ISA_T32) {
1290 		u64 addr = packet->start_addr;
1291 
1292 		while (offset) {
1293 			addr += cs_etm__t32_instr_size(etmq,
1294 						       trace_chan_id, addr);
1295 			offset--;
1296 		}
1297 		return addr;
1298 	}
1299 
1300 	/* Assume a 4 byte instruction size (A32/A64) */
1301 	return packet->start_addr + offset * 4;
1302 }
1303 
1304 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
1305 					  struct cs_etm_traceid_queue *tidq)
1306 {
1307 	struct branch_stack *bs = tidq->last_branch_rb;
1308 	struct branch_entry *be;
1309 
1310 	/*
1311 	 * The branches are recorded in a circular buffer in reverse
1312 	 * chronological order: we start recording from the last element of the
1313 	 * buffer down.  After writing the first element of the stack, move the
1314 	 * insert position back to the end of the buffer.
1315 	 */
1316 	if (!tidq->last_branch_pos)
1317 		tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
1318 
1319 	tidq->last_branch_pos -= 1;
1320 
1321 	be       = &bs->entries[tidq->last_branch_pos];
1322 	be->from = cs_etm__last_executed_instr(tidq->prev_packet);
1323 	be->to	 = cs_etm__first_executed_instr(tidq->packet);
1324 	/* No support for mispredict */
1325 	be->flags.mispred = 0;
1326 	be->flags.predicted = 1;
1327 
1328 	/*
1329 	 * Increment bs->nr until reaching the number of last branches asked by
1330 	 * the user on the command line.
1331 	 */
1332 	if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
1333 		bs->nr += 1;
1334 }
1335 
1336 static int cs_etm__inject_event(union perf_event *event,
1337 			       struct perf_sample *sample, u64 type)
1338 {
1339 	event->header.size = perf_event__sample_event_size(sample, type, 0);
1340 	return perf_event__synthesize_sample(event, type, 0, sample);
1341 }
1342 
1343 
1344 static int
1345 cs_etm__get_trace(struct cs_etm_queue *etmq)
1346 {
1347 	struct auxtrace_buffer *aux_buffer = etmq->buffer;
1348 	struct auxtrace_buffer *old_buffer = aux_buffer;
1349 	struct auxtrace_queue *queue;
1350 
1351 	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
1352 
1353 	aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
1354 
1355 	/* If no more data, drop the previous auxtrace_buffer and return */
1356 	if (!aux_buffer) {
1357 		if (old_buffer)
1358 			auxtrace_buffer__drop_data(old_buffer);
1359 		etmq->buf_len = 0;
1360 		return 0;
1361 	}
1362 
1363 	etmq->buffer = aux_buffer;
1364 
1365 	/* If the aux_buffer doesn't have data associated, try to load it */
1366 	if (!aux_buffer->data) {
1367 		/* get the file desc associated with the perf data file */
1368 		int fd = perf_data__fd(etmq->etm->session->data);
1369 
1370 		aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
1371 		if (!aux_buffer->data)
1372 			return -ENOMEM;
1373 	}
1374 
1375 	/* If valid, drop the previous buffer */
1376 	if (old_buffer)
1377 		auxtrace_buffer__drop_data(old_buffer);
1378 
1379 	etmq->buf_used = 0;
1380 	etmq->buf_len = aux_buffer->size;
1381 	etmq->buf = aux_buffer->data;
1382 
1383 	return etmq->buf_len;
1384 }
1385 
1386 static void cs_etm__set_thread(struct cs_etm_queue *etmq,
1387 			       struct cs_etm_traceid_queue *tidq, pid_t tid,
1388 			       ocsd_ex_level el)
1389 {
1390 	struct machine *machine = cs_etm__get_machine(etmq, el);
1391 
1392 	if (tid != -1) {
1393 		thread__zput(tidq->thread);
1394 		tidq->thread = machine__find_thread(machine, -1, tid);
1395 	}
1396 
1397 	/* Couldn't find a known thread */
1398 	if (!tidq->thread)
1399 		tidq->thread = machine__idle_thread(machine);
1400 
1401 	tidq->el = el;
1402 }
1403 
1404 int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
1405 			    u8 trace_chan_id, ocsd_ex_level el)
1406 {
1407 	struct cs_etm_traceid_queue *tidq;
1408 
1409 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1410 	if (!tidq)
1411 		return -EINVAL;
1412 
1413 	cs_etm__set_thread(etmq, tidq, tid, el);
1414 	return 0;
1415 }
1416 
1417 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
1418 {
1419 	return !!etmq->etm->timeless_decoding;
1420 }
1421 
1422 static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
1423 			      u64 trace_chan_id,
1424 			      const struct cs_etm_packet *packet,
1425 			      struct perf_sample *sample)
1426 {
1427 	/*
1428 	 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY
1429 	 * packet, so directly bail out with 'insn_len' = 0.
1430 	 */
1431 	if (packet->sample_type == CS_ETM_DISCONTINUITY) {
1432 		sample->insn_len = 0;
1433 		return;
1434 	}
1435 
1436 	/*
1437 	 * T32 instruction size might be 32-bit or 16-bit, decide by calling
1438 	 * cs_etm__t32_instr_size().
1439 	 */
1440 	if (packet->isa == CS_ETM_ISA_T32)
1441 		sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id,
1442 							  sample->ip);
1443 	/* Otherwise, A64 and A32 instruction size are always 32-bit. */
1444 	else
1445 		sample->insn_len = 4;
1446 
1447 	cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len,
1448 			   (void *)sample->insn, 0);
1449 }
1450 
1451 u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
1452 {
1453 	struct cs_etm_auxtrace *etm = etmq->etm;
1454 
1455 	if (etm->has_virtual_ts)
1456 		return tsc_to_perf_time(cs_timestamp, &etm->tc);
1457 	else
1458 		return cs_timestamp;
1459 }
1460 
1461 static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
1462 					       struct cs_etm_traceid_queue *tidq)
1463 {
1464 	struct cs_etm_auxtrace *etm = etmq->etm;
1465 	struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
1466 
1467 	if (!etm->timeless_decoding && etm->has_virtual_ts)
1468 		return packet_queue->cs_timestamp;
1469 	else
1470 		return etm->latest_kernel_timestamp;
1471 }
1472 
1473 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
1474 					    struct cs_etm_traceid_queue *tidq,
1475 					    u64 addr, u64 period)
1476 {
1477 	int ret = 0;
1478 	struct cs_etm_auxtrace *etm = etmq->etm;
1479 	union perf_event *event = tidq->event_buf;
1480 	struct perf_sample sample = {.ip = 0,};
1481 
1482 	event->sample.header.type = PERF_RECORD_SAMPLE;
1483 	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
1484 	event->sample.header.size = sizeof(struct perf_event_header);
1485 
1486 	/* Set time field based on etm auxtrace config. */
1487 	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1488 
1489 	sample.ip = addr;
1490 	sample.pid = thread__pid(tidq->thread);
1491 	sample.tid = thread__tid(tidq->thread);
1492 	sample.id = etmq->etm->instructions_id;
1493 	sample.stream_id = etmq->etm->instructions_id;
1494 	sample.period = period;
1495 	sample.cpu = tidq->packet->cpu;
1496 	sample.flags = tidq->prev_packet->flags;
1497 	sample.cpumode = event->sample.header.misc;
1498 
1499 	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
1500 
1501 	if (etm->synth_opts.last_branch)
1502 		sample.branch_stack = tidq->last_branch;
1503 
1504 	if (etm->synth_opts.inject) {
1505 		ret = cs_etm__inject_event(event, &sample,
1506 					   etm->instructions_sample_type);
1507 		if (ret)
1508 			return ret;
1509 	}
1510 
1511 	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1512 
1513 	if (ret)
1514 		pr_err(
1515 			"CS ETM Trace: failed to deliver instruction event, error %d\n",
1516 			ret);
1517 
1518 	return ret;
1519 }
1520 
1521 /*
1522  * The cs etm packet encodes an instruction range between a branch target
1523  * and the next taken branch. Generate sample accordingly.
1524  */
1525 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
1526 				       struct cs_etm_traceid_queue *tidq)
1527 {
1528 	int ret = 0;
1529 	struct cs_etm_auxtrace *etm = etmq->etm;
1530 	struct perf_sample sample = {.ip = 0,};
1531 	union perf_event *event = tidq->event_buf;
1532 	struct dummy_branch_stack {
1533 		u64			nr;
1534 		u64			hw_idx;
1535 		struct branch_entry	entries;
1536 	} dummy_bs;
1537 	u64 ip;
1538 
1539 	ip = cs_etm__last_executed_instr(tidq->prev_packet);
1540 
1541 	event->sample.header.type = PERF_RECORD_SAMPLE;
1542 	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip,
1543 						     tidq->prev_packet_el);
1544 	event->sample.header.size = sizeof(struct perf_event_header);
1545 
1546 	/* Set time field based on etm auxtrace config. */
1547 	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1548 
1549 	sample.ip = ip;
1550 	sample.pid = thread__pid(tidq->prev_packet_thread);
1551 	sample.tid = thread__tid(tidq->prev_packet_thread);
1552 	sample.addr = cs_etm__first_executed_instr(tidq->packet);
1553 	sample.id = etmq->etm->branches_id;
1554 	sample.stream_id = etmq->etm->branches_id;
1555 	sample.period = 1;
1556 	sample.cpu = tidq->packet->cpu;
1557 	sample.flags = tidq->prev_packet->flags;
1558 	sample.cpumode = event->sample.header.misc;
1559 
1560 	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet,
1561 			  &sample);
1562 
1563 	/*
1564 	 * perf report cannot handle events without a branch stack
1565 	 */
1566 	if (etm->synth_opts.last_branch) {
1567 		dummy_bs = (struct dummy_branch_stack){
1568 			.nr = 1,
1569 			.hw_idx = -1ULL,
1570 			.entries = {
1571 				.from = sample.ip,
1572 				.to = sample.addr,
1573 			},
1574 		};
1575 		sample.branch_stack = (struct branch_stack *)&dummy_bs;
1576 	}
1577 
1578 	if (etm->synth_opts.inject) {
1579 		ret = cs_etm__inject_event(event, &sample,
1580 					   etm->branches_sample_type);
1581 		if (ret)
1582 			return ret;
1583 	}
1584 
1585 	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1586 
1587 	if (ret)
1588 		pr_err(
1589 		"CS ETM Trace: failed to deliver instruction event, error %d\n",
1590 		ret);
1591 
1592 	return ret;
1593 }
1594 
1595 struct cs_etm_synth {
1596 	struct perf_tool dummy_tool;
1597 	struct perf_session *session;
1598 };
1599 
1600 static int cs_etm__event_synth(struct perf_tool *tool,
1601 			       union perf_event *event,
1602 			       struct perf_sample *sample __maybe_unused,
1603 			       struct machine *machine __maybe_unused)
1604 {
1605 	struct cs_etm_synth *cs_etm_synth =
1606 		      container_of(tool, struct cs_etm_synth, dummy_tool);
1607 
1608 	return perf_session__deliver_synth_event(cs_etm_synth->session,
1609 						 event, NULL);
1610 }
1611 
1612 static int cs_etm__synth_event(struct perf_session *session,
1613 			       struct perf_event_attr *attr, u64 id)
1614 {
1615 	struct cs_etm_synth cs_etm_synth;
1616 
1617 	memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth));
1618 	cs_etm_synth.session = session;
1619 
1620 	return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1,
1621 					   &id, cs_etm__event_synth);
1622 }
1623 
1624 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
1625 				struct perf_session *session)
1626 {
1627 	struct evlist *evlist = session->evlist;
1628 	struct evsel *evsel;
1629 	struct perf_event_attr attr;
1630 	bool found = false;
1631 	u64 id;
1632 	int err;
1633 
1634 	evlist__for_each_entry(evlist, evsel) {
1635 		if (evsel->core.attr.type == etm->pmu_type) {
1636 			found = true;
1637 			break;
1638 		}
1639 	}
1640 
1641 	if (!found) {
1642 		pr_debug("No selected events with CoreSight Trace data\n");
1643 		return 0;
1644 	}
1645 
1646 	memset(&attr, 0, sizeof(struct perf_event_attr));
1647 	attr.size = sizeof(struct perf_event_attr);
1648 	attr.type = PERF_TYPE_HARDWARE;
1649 	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
1650 	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1651 			    PERF_SAMPLE_PERIOD;
1652 	if (etm->timeless_decoding)
1653 		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1654 	else
1655 		attr.sample_type |= PERF_SAMPLE_TIME;
1656 
1657 	attr.exclude_user = evsel->core.attr.exclude_user;
1658 	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1659 	attr.exclude_hv = evsel->core.attr.exclude_hv;
1660 	attr.exclude_host = evsel->core.attr.exclude_host;
1661 	attr.exclude_guest = evsel->core.attr.exclude_guest;
1662 	attr.sample_id_all = evsel->core.attr.sample_id_all;
1663 	attr.read_format = evsel->core.attr.read_format;
1664 
1665 	/* create new id val to be a fixed offset from evsel id */
1666 	id = evsel->core.id[0] + 1000000000;
1667 
1668 	if (!id)
1669 		id = 1;
1670 
1671 	if (etm->synth_opts.branches) {
1672 		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1673 		attr.sample_period = 1;
1674 		attr.sample_type |= PERF_SAMPLE_ADDR;
1675 		err = cs_etm__synth_event(session, &attr, id);
1676 		if (err)
1677 			return err;
1678 		etm->branches_sample_type = attr.sample_type;
1679 		etm->branches_id = id;
1680 		id += 1;
1681 		attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
1682 	}
1683 
1684 	if (etm->synth_opts.last_branch) {
1685 		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1686 		/*
1687 		 * We don't use the hardware index, but the sample generation
1688 		 * code uses the new format branch_stack with this field,
1689 		 * so the event attributes must indicate that it's present.
1690 		 */
1691 		attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
1692 	}
1693 
1694 	if (etm->synth_opts.instructions) {
1695 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1696 		attr.sample_period = etm->synth_opts.period;
1697 		etm->instructions_sample_period = attr.sample_period;
1698 		err = cs_etm__synth_event(session, &attr, id);
1699 		if (err)
1700 			return err;
1701 		etm->instructions_sample_type = attr.sample_type;
1702 		etm->instructions_id = id;
1703 		id += 1;
1704 	}
1705 
1706 	return 0;
1707 }
1708 
1709 static int cs_etm__sample(struct cs_etm_queue *etmq,
1710 			  struct cs_etm_traceid_queue *tidq)
1711 {
1712 	struct cs_etm_auxtrace *etm = etmq->etm;
1713 	int ret;
1714 	u8 trace_chan_id = tidq->trace_chan_id;
1715 	u64 instrs_prev;
1716 
1717 	/* Get instructions remainder from previous packet */
1718 	instrs_prev = tidq->period_instructions;
1719 
1720 	tidq->period_instructions += tidq->packet->instr_count;
1721 
1722 	/*
1723 	 * Record a branch when the last instruction in
1724 	 * PREV_PACKET is a branch.
1725 	 */
1726 	if (etm->synth_opts.last_branch &&
1727 	    tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1728 	    tidq->prev_packet->last_instr_taken_branch)
1729 		cs_etm__update_last_branch_rb(etmq, tidq);
1730 
1731 	if (etm->synth_opts.instructions &&
1732 	    tidq->period_instructions >= etm->instructions_sample_period) {
1733 		/*
1734 		 * Emit instruction sample periodically
1735 		 * TODO: allow period to be defined in cycles and clock time
1736 		 */
1737 
1738 		/*
1739 		 * Below diagram demonstrates the instruction samples
1740 		 * generation flows:
1741 		 *
1742 		 *    Instrs     Instrs       Instrs       Instrs
1743 		 *   Sample(n)  Sample(n+1)  Sample(n+2)  Sample(n+3)
1744 		 *    |            |            |            |
1745 		 *    V            V            V            V
1746 		 *   --------------------------------------------------
1747 		 *            ^                                  ^
1748 		 *            |                                  |
1749 		 *         Period                             Period
1750 		 *    instructions(Pi)                   instructions(Pi')
1751 		 *
1752 		 *            |                                  |
1753 		 *            \---------------- -----------------/
1754 		 *                             V
1755 		 *                 tidq->packet->instr_count
1756 		 *
1757 		 * Instrs Sample(n...) are the synthesised samples occurring
1758 		 * every etm->instructions_sample_period instructions - as
1759 		 * defined on the perf command line.  Sample(n) is being the
1760 		 * last sample before the current etm packet, n+1 to n+3
1761 		 * samples are generated from the current etm packet.
1762 		 *
1763 		 * tidq->packet->instr_count represents the number of
1764 		 * instructions in the current etm packet.
1765 		 *
1766 		 * Period instructions (Pi) contains the number of
1767 		 * instructions executed after the sample point(n) from the
1768 		 * previous etm packet.  This will always be less than
1769 		 * etm->instructions_sample_period.
1770 		 *
1771 		 * When generate new samples, it combines with two parts
1772 		 * instructions, one is the tail of the old packet and another
1773 		 * is the head of the new coming packet, to generate
1774 		 * sample(n+1); sample(n+2) and sample(n+3) consume the
1775 		 * instructions with sample period.  After sample(n+3), the rest
1776 		 * instructions will be used by later packet and it is assigned
1777 		 * to tidq->period_instructions for next round calculation.
1778 		 */
1779 
1780 		/*
1781 		 * Get the initial offset into the current packet instructions;
1782 		 * entry conditions ensure that instrs_prev is less than
1783 		 * etm->instructions_sample_period.
1784 		 */
1785 		u64 offset = etm->instructions_sample_period - instrs_prev;
1786 		u64 addr;
1787 
1788 		/* Prepare last branches for instruction sample */
1789 		if (etm->synth_opts.last_branch)
1790 			cs_etm__copy_last_branch_rb(etmq, tidq);
1791 
1792 		while (tidq->period_instructions >=
1793 				etm->instructions_sample_period) {
1794 			/*
1795 			 * Calculate the address of the sampled instruction (-1
1796 			 * as sample is reported as though instruction has just
1797 			 * been executed, but PC has not advanced to next
1798 			 * instruction)
1799 			 */
1800 			addr = cs_etm__instr_addr(etmq, trace_chan_id,
1801 						  tidq->packet, offset - 1);
1802 			ret = cs_etm__synth_instruction_sample(
1803 				etmq, tidq, addr,
1804 				etm->instructions_sample_period);
1805 			if (ret)
1806 				return ret;
1807 
1808 			offset += etm->instructions_sample_period;
1809 			tidq->period_instructions -=
1810 				etm->instructions_sample_period;
1811 		}
1812 	}
1813 
1814 	if (etm->synth_opts.branches) {
1815 		bool generate_sample = false;
1816 
1817 		/* Generate sample for tracing on packet */
1818 		if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1819 			generate_sample = true;
1820 
1821 		/* Generate sample for branch taken packet */
1822 		if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1823 		    tidq->prev_packet->last_instr_taken_branch)
1824 			generate_sample = true;
1825 
1826 		if (generate_sample) {
1827 			ret = cs_etm__synth_branch_sample(etmq, tidq);
1828 			if (ret)
1829 				return ret;
1830 		}
1831 	}
1832 
1833 	cs_etm__packet_swap(etm, tidq);
1834 
1835 	return 0;
1836 }
1837 
1838 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
1839 {
1840 	/*
1841 	 * When the exception packet is inserted, whether the last instruction
1842 	 * in previous range packet is taken branch or not, we need to force
1843 	 * to set 'prev_packet->last_instr_taken_branch' to true.  This ensures
1844 	 * to generate branch sample for the instruction range before the
1845 	 * exception is trapped to kernel or before the exception returning.
1846 	 *
1847 	 * The exception packet includes the dummy address values, so don't
1848 	 * swap PACKET with PREV_PACKET.  This keeps PREV_PACKET to be useful
1849 	 * for generating instruction and branch samples.
1850 	 */
1851 	if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
1852 		tidq->prev_packet->last_instr_taken_branch = true;
1853 
1854 	return 0;
1855 }
1856 
1857 static int cs_etm__flush(struct cs_etm_queue *etmq,
1858 			 struct cs_etm_traceid_queue *tidq)
1859 {
1860 	int err = 0;
1861 	struct cs_etm_auxtrace *etm = etmq->etm;
1862 
1863 	/* Handle start tracing packet */
1864 	if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
1865 		goto swap_packet;
1866 
1867 	if (etmq->etm->synth_opts.last_branch &&
1868 	    etmq->etm->synth_opts.instructions &&
1869 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1870 		u64 addr;
1871 
1872 		/* Prepare last branches for instruction sample */
1873 		cs_etm__copy_last_branch_rb(etmq, tidq);
1874 
1875 		/*
1876 		 * Generate a last branch event for the branches left in the
1877 		 * circular buffer at the end of the trace.
1878 		 *
1879 		 * Use the address of the end of the last reported execution
1880 		 * range
1881 		 */
1882 		addr = cs_etm__last_executed_instr(tidq->prev_packet);
1883 
1884 		err = cs_etm__synth_instruction_sample(
1885 			etmq, tidq, addr,
1886 			tidq->period_instructions);
1887 		if (err)
1888 			return err;
1889 
1890 		tidq->period_instructions = 0;
1891 
1892 	}
1893 
1894 	if (etm->synth_opts.branches &&
1895 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1896 		err = cs_etm__synth_branch_sample(etmq, tidq);
1897 		if (err)
1898 			return err;
1899 	}
1900 
1901 swap_packet:
1902 	cs_etm__packet_swap(etm, tidq);
1903 
1904 	/* Reset last branches after flush the trace */
1905 	if (etm->synth_opts.last_branch)
1906 		cs_etm__reset_last_branch_rb(tidq);
1907 
1908 	return err;
1909 }
1910 
1911 static int cs_etm__end_block(struct cs_etm_queue *etmq,
1912 			     struct cs_etm_traceid_queue *tidq)
1913 {
1914 	int err;
1915 
1916 	/*
1917 	 * It has no new packet coming and 'etmq->packet' contains the stale
1918 	 * packet which was set at the previous time with packets swapping;
1919 	 * so skip to generate branch sample to avoid stale packet.
1920 	 *
1921 	 * For this case only flush branch stack and generate a last branch
1922 	 * event for the branches left in the circular buffer at the end of
1923 	 * the trace.
1924 	 */
1925 	if (etmq->etm->synth_opts.last_branch &&
1926 	    etmq->etm->synth_opts.instructions &&
1927 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1928 		u64 addr;
1929 
1930 		/* Prepare last branches for instruction sample */
1931 		cs_etm__copy_last_branch_rb(etmq, tidq);
1932 
1933 		/*
1934 		 * Use the address of the end of the last reported execution
1935 		 * range.
1936 		 */
1937 		addr = cs_etm__last_executed_instr(tidq->prev_packet);
1938 
1939 		err = cs_etm__synth_instruction_sample(
1940 			etmq, tidq, addr,
1941 			tidq->period_instructions);
1942 		if (err)
1943 			return err;
1944 
1945 		tidq->period_instructions = 0;
1946 	}
1947 
1948 	return 0;
1949 }
1950 /*
1951  * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
1952  *			   if need be.
1953  * Returns:	< 0	if error
1954  *		= 0	if no more auxtrace_buffer to read
1955  *		> 0	if the current buffer isn't empty yet
1956  */
1957 static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
1958 {
1959 	int ret;
1960 
1961 	if (!etmq->buf_len) {
1962 		ret = cs_etm__get_trace(etmq);
1963 		if (ret <= 0)
1964 			return ret;
1965 		/*
1966 		 * We cannot assume consecutive blocks in the data file
1967 		 * are contiguous, reset the decoder to force re-sync.
1968 		 */
1969 		ret = cs_etm_decoder__reset(etmq->decoder);
1970 		if (ret)
1971 			return ret;
1972 	}
1973 
1974 	return etmq->buf_len;
1975 }
1976 
1977 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
1978 				 struct cs_etm_packet *packet,
1979 				 u64 end_addr)
1980 {
1981 	/* Initialise to keep compiler happy */
1982 	u16 instr16 = 0;
1983 	u32 instr32 = 0;
1984 	u64 addr;
1985 
1986 	switch (packet->isa) {
1987 	case CS_ETM_ISA_T32:
1988 		/*
1989 		 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
1990 		 *
1991 		 *  b'15         b'8
1992 		 * +-----------------+--------+
1993 		 * | 1 1 0 1 1 1 1 1 |  imm8  |
1994 		 * +-----------------+--------+
1995 		 *
1996 		 * According to the specification, it only defines SVC for T32
1997 		 * with 16 bits instruction and has no definition for 32bits;
1998 		 * so below only read 2 bytes as instruction size for T32.
1999 		 */
2000 		addr = end_addr - 2;
2001 		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16),
2002 				   (u8 *)&instr16, 0);
2003 		if ((instr16 & 0xFF00) == 0xDF00)
2004 			return true;
2005 
2006 		break;
2007 	case CS_ETM_ISA_A32:
2008 		/*
2009 		 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
2010 		 *
2011 		 *  b'31 b'28 b'27 b'24
2012 		 * +---------+---------+-------------------------+
2013 		 * |  !1111  | 1 1 1 1 |        imm24            |
2014 		 * +---------+---------+-------------------------+
2015 		 */
2016 		addr = end_addr - 4;
2017 		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2018 				   (u8 *)&instr32, 0);
2019 		if ((instr32 & 0x0F000000) == 0x0F000000 &&
2020 		    (instr32 & 0xF0000000) != 0xF0000000)
2021 			return true;
2022 
2023 		break;
2024 	case CS_ETM_ISA_A64:
2025 		/*
2026 		 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
2027 		 *
2028 		 *  b'31               b'21           b'4     b'0
2029 		 * +-----------------------+---------+-----------+
2030 		 * | 1 1 0 1 0 1 0 0 0 0 0 |  imm16  | 0 0 0 0 1 |
2031 		 * +-----------------------+---------+-----------+
2032 		 */
2033 		addr = end_addr - 4;
2034 		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2035 				   (u8 *)&instr32, 0);
2036 		if ((instr32 & 0xFFE0001F) == 0xd4000001)
2037 			return true;
2038 
2039 		break;
2040 	case CS_ETM_ISA_UNKNOWN:
2041 	default:
2042 		break;
2043 	}
2044 
2045 	return false;
2046 }
2047 
2048 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
2049 			       struct cs_etm_traceid_queue *tidq, u64 magic)
2050 {
2051 	u8 trace_chan_id = tidq->trace_chan_id;
2052 	struct cs_etm_packet *packet = tidq->packet;
2053 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2054 
2055 	if (magic == __perf_cs_etmv3_magic)
2056 		if (packet->exception_number == CS_ETMV3_EXC_SVC)
2057 			return true;
2058 
2059 	/*
2060 	 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
2061 	 * HVC cases; need to check if it's SVC instruction based on
2062 	 * packet address.
2063 	 */
2064 	if (magic == __perf_cs_etmv4_magic) {
2065 		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2066 		    cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2067 					 prev_packet->end_addr))
2068 			return true;
2069 	}
2070 
2071 	return false;
2072 }
2073 
2074 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
2075 				       u64 magic)
2076 {
2077 	struct cs_etm_packet *packet = tidq->packet;
2078 
2079 	if (magic == __perf_cs_etmv3_magic)
2080 		if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
2081 		    packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
2082 		    packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
2083 		    packet->exception_number == CS_ETMV3_EXC_IRQ ||
2084 		    packet->exception_number == CS_ETMV3_EXC_FIQ)
2085 			return true;
2086 
2087 	if (magic == __perf_cs_etmv4_magic)
2088 		if (packet->exception_number == CS_ETMV4_EXC_RESET ||
2089 		    packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
2090 		    packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
2091 		    packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
2092 		    packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
2093 		    packet->exception_number == CS_ETMV4_EXC_IRQ ||
2094 		    packet->exception_number == CS_ETMV4_EXC_FIQ)
2095 			return true;
2096 
2097 	return false;
2098 }
2099 
2100 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
2101 				      struct cs_etm_traceid_queue *tidq,
2102 				      u64 magic)
2103 {
2104 	u8 trace_chan_id = tidq->trace_chan_id;
2105 	struct cs_etm_packet *packet = tidq->packet;
2106 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2107 
2108 	if (magic == __perf_cs_etmv3_magic)
2109 		if (packet->exception_number == CS_ETMV3_EXC_SMC ||
2110 		    packet->exception_number == CS_ETMV3_EXC_HYP ||
2111 		    packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
2112 		    packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
2113 		    packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
2114 		    packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
2115 		    packet->exception_number == CS_ETMV3_EXC_GENERIC)
2116 			return true;
2117 
2118 	if (magic == __perf_cs_etmv4_magic) {
2119 		if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
2120 		    packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
2121 		    packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
2122 		    packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
2123 			return true;
2124 
2125 		/*
2126 		 * For CS_ETMV4_EXC_CALL, except SVC other instructions
2127 		 * (SMC, HVC) are taken as sync exceptions.
2128 		 */
2129 		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2130 		    !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2131 					  prev_packet->end_addr))
2132 			return true;
2133 
2134 		/*
2135 		 * ETMv4 has 5 bits for exception number; if the numbers
2136 		 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
2137 		 * they are implementation defined exceptions.
2138 		 *
2139 		 * For this case, simply take it as sync exception.
2140 		 */
2141 		if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
2142 		    packet->exception_number <= CS_ETMV4_EXC_END)
2143 			return true;
2144 	}
2145 
2146 	return false;
2147 }
2148 
2149 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
2150 				    struct cs_etm_traceid_queue *tidq)
2151 {
2152 	struct cs_etm_packet *packet = tidq->packet;
2153 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2154 	u8 trace_chan_id = tidq->trace_chan_id;
2155 	u64 magic;
2156 	int ret;
2157 
2158 	switch (packet->sample_type) {
2159 	case CS_ETM_RANGE:
2160 		/*
2161 		 * Immediate branch instruction without neither link nor
2162 		 * return flag, it's normal branch instruction within
2163 		 * the function.
2164 		 */
2165 		if (packet->last_instr_type == OCSD_INSTR_BR &&
2166 		    packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
2167 			packet->flags = PERF_IP_FLAG_BRANCH;
2168 
2169 			if (packet->last_instr_cond)
2170 				packet->flags |= PERF_IP_FLAG_CONDITIONAL;
2171 		}
2172 
2173 		/*
2174 		 * Immediate branch instruction with link (e.g. BL), this is
2175 		 * branch instruction for function call.
2176 		 */
2177 		if (packet->last_instr_type == OCSD_INSTR_BR &&
2178 		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2179 			packet->flags = PERF_IP_FLAG_BRANCH |
2180 					PERF_IP_FLAG_CALL;
2181 
2182 		/*
2183 		 * Indirect branch instruction with link (e.g. BLR), this is
2184 		 * branch instruction for function call.
2185 		 */
2186 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2187 		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2188 			packet->flags = PERF_IP_FLAG_BRANCH |
2189 					PERF_IP_FLAG_CALL;
2190 
2191 		/*
2192 		 * Indirect branch instruction with subtype of
2193 		 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
2194 		 * function return for A32/T32.
2195 		 */
2196 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2197 		    packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
2198 			packet->flags = PERF_IP_FLAG_BRANCH |
2199 					PERF_IP_FLAG_RETURN;
2200 
2201 		/*
2202 		 * Indirect branch instruction without link (e.g. BR), usually
2203 		 * this is used for function return, especially for functions
2204 		 * within dynamic link lib.
2205 		 */
2206 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2207 		    packet->last_instr_subtype == OCSD_S_INSTR_NONE)
2208 			packet->flags = PERF_IP_FLAG_BRANCH |
2209 					PERF_IP_FLAG_RETURN;
2210 
2211 		/* Return instruction for function return. */
2212 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2213 		    packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
2214 			packet->flags = PERF_IP_FLAG_BRANCH |
2215 					PERF_IP_FLAG_RETURN;
2216 
2217 		/*
2218 		 * Decoder might insert a discontinuity in the middle of
2219 		 * instruction packets, fixup prev_packet with flag
2220 		 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
2221 		 */
2222 		if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
2223 			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2224 					      PERF_IP_FLAG_TRACE_BEGIN;
2225 
2226 		/*
2227 		 * If the previous packet is an exception return packet
2228 		 * and the return address just follows SVC instruction,
2229 		 * it needs to calibrate the previous packet sample flags
2230 		 * as PERF_IP_FLAG_SYSCALLRET.
2231 		 */
2232 		if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
2233 					   PERF_IP_FLAG_RETURN |
2234 					   PERF_IP_FLAG_INTERRUPT) &&
2235 		    cs_etm__is_svc_instr(etmq, trace_chan_id,
2236 					 packet, packet->start_addr))
2237 			prev_packet->flags = PERF_IP_FLAG_BRANCH |
2238 					     PERF_IP_FLAG_RETURN |
2239 					     PERF_IP_FLAG_SYSCALLRET;
2240 		break;
2241 	case CS_ETM_DISCONTINUITY:
2242 		/*
2243 		 * The trace is discontinuous, if the previous packet is
2244 		 * instruction packet, set flag PERF_IP_FLAG_TRACE_END
2245 		 * for previous packet.
2246 		 */
2247 		if (prev_packet->sample_type == CS_ETM_RANGE)
2248 			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2249 					      PERF_IP_FLAG_TRACE_END;
2250 		break;
2251 	case CS_ETM_EXCEPTION:
2252 		ret = cs_etm__get_magic(packet->trace_chan_id, &magic);
2253 		if (ret)
2254 			return ret;
2255 
2256 		/* The exception is for system call. */
2257 		if (cs_etm__is_syscall(etmq, tidq, magic))
2258 			packet->flags = PERF_IP_FLAG_BRANCH |
2259 					PERF_IP_FLAG_CALL |
2260 					PERF_IP_FLAG_SYSCALLRET;
2261 		/*
2262 		 * The exceptions are triggered by external signals from bus,
2263 		 * interrupt controller, debug module, PE reset or halt.
2264 		 */
2265 		else if (cs_etm__is_async_exception(tidq, magic))
2266 			packet->flags = PERF_IP_FLAG_BRANCH |
2267 					PERF_IP_FLAG_CALL |
2268 					PERF_IP_FLAG_ASYNC |
2269 					PERF_IP_FLAG_INTERRUPT;
2270 		/*
2271 		 * Otherwise, exception is caused by trap, instruction &
2272 		 * data fault, or alignment errors.
2273 		 */
2274 		else if (cs_etm__is_sync_exception(etmq, tidq, magic))
2275 			packet->flags = PERF_IP_FLAG_BRANCH |
2276 					PERF_IP_FLAG_CALL |
2277 					PERF_IP_FLAG_INTERRUPT;
2278 
2279 		/*
2280 		 * When the exception packet is inserted, since exception
2281 		 * packet is not used standalone for generating samples
2282 		 * and it's affiliation to the previous instruction range
2283 		 * packet; so set previous range packet flags to tell perf
2284 		 * it is an exception taken branch.
2285 		 */
2286 		if (prev_packet->sample_type == CS_ETM_RANGE)
2287 			prev_packet->flags = packet->flags;
2288 		break;
2289 	case CS_ETM_EXCEPTION_RET:
2290 		/*
2291 		 * When the exception return packet is inserted, since
2292 		 * exception return packet is not used standalone for
2293 		 * generating samples and it's affiliation to the previous
2294 		 * instruction range packet; so set previous range packet
2295 		 * flags to tell perf it is an exception return branch.
2296 		 *
2297 		 * The exception return can be for either system call or
2298 		 * other exception types; unfortunately the packet doesn't
2299 		 * contain exception type related info so we cannot decide
2300 		 * the exception type purely based on exception return packet.
2301 		 * If we record the exception number from exception packet and
2302 		 * reuse it for exception return packet, this is not reliable
2303 		 * due the trace can be discontinuity or the interrupt can
2304 		 * be nested, thus the recorded exception number cannot be
2305 		 * used for exception return packet for these two cases.
2306 		 *
2307 		 * For exception return packet, we only need to distinguish the
2308 		 * packet is for system call or for other types.  Thus the
2309 		 * decision can be deferred when receive the next packet which
2310 		 * contains the return address, based on the return address we
2311 		 * can read out the previous instruction and check if it's a
2312 		 * system call instruction and then calibrate the sample flag
2313 		 * as needed.
2314 		 */
2315 		if (prev_packet->sample_type == CS_ETM_RANGE)
2316 			prev_packet->flags = PERF_IP_FLAG_BRANCH |
2317 					     PERF_IP_FLAG_RETURN |
2318 					     PERF_IP_FLAG_INTERRUPT;
2319 		break;
2320 	case CS_ETM_EMPTY:
2321 	default:
2322 		break;
2323 	}
2324 
2325 	return 0;
2326 }
2327 
2328 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
2329 {
2330 	int ret = 0;
2331 	size_t processed = 0;
2332 
2333 	/*
2334 	 * Packets are decoded and added to the decoder's packet queue
2335 	 * until the decoder packet processing callback has requested that
2336 	 * processing stops or there is nothing left in the buffer.  Normal
2337 	 * operations that stop processing are a timestamp packet or a full
2338 	 * decoder buffer queue.
2339 	 */
2340 	ret = cs_etm_decoder__process_data_block(etmq->decoder,
2341 						 etmq->offset,
2342 						 &etmq->buf[etmq->buf_used],
2343 						 etmq->buf_len,
2344 						 &processed);
2345 	if (ret)
2346 		goto out;
2347 
2348 	etmq->offset += processed;
2349 	etmq->buf_used += processed;
2350 	etmq->buf_len -= processed;
2351 
2352 out:
2353 	return ret;
2354 }
2355 
2356 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
2357 					 struct cs_etm_traceid_queue *tidq)
2358 {
2359 	int ret;
2360 	struct cs_etm_packet_queue *packet_queue;
2361 
2362 	packet_queue = &tidq->packet_queue;
2363 
2364 	/* Process each packet in this chunk */
2365 	while (1) {
2366 		ret = cs_etm_decoder__get_packet(packet_queue,
2367 						 tidq->packet);
2368 		if (ret <= 0)
2369 			/*
2370 			 * Stop processing this chunk on
2371 			 * end of data or error
2372 			 */
2373 			break;
2374 
2375 		/*
2376 		 * Since packet addresses are swapped in packet
2377 		 * handling within below switch() statements,
2378 		 * thus setting sample flags must be called
2379 		 * prior to switch() statement to use address
2380 		 * information before packets swapping.
2381 		 */
2382 		ret = cs_etm__set_sample_flags(etmq, tidq);
2383 		if (ret < 0)
2384 			break;
2385 
2386 		switch (tidq->packet->sample_type) {
2387 		case CS_ETM_RANGE:
2388 			/*
2389 			 * If the packet contains an instruction
2390 			 * range, generate instruction sequence
2391 			 * events.
2392 			 */
2393 			cs_etm__sample(etmq, tidq);
2394 			break;
2395 		case CS_ETM_EXCEPTION:
2396 		case CS_ETM_EXCEPTION_RET:
2397 			/*
2398 			 * If the exception packet is coming,
2399 			 * make sure the previous instruction
2400 			 * range packet to be handled properly.
2401 			 */
2402 			cs_etm__exception(tidq);
2403 			break;
2404 		case CS_ETM_DISCONTINUITY:
2405 			/*
2406 			 * Discontinuity in trace, flush
2407 			 * previous branch stack
2408 			 */
2409 			cs_etm__flush(etmq, tidq);
2410 			break;
2411 		case CS_ETM_EMPTY:
2412 			/*
2413 			 * Should not receive empty packet,
2414 			 * report error.
2415 			 */
2416 			pr_err("CS ETM Trace: empty packet\n");
2417 			return -EINVAL;
2418 		default:
2419 			break;
2420 		}
2421 	}
2422 
2423 	return ret;
2424 }
2425 
2426 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
2427 {
2428 	int idx;
2429 	struct int_node *inode;
2430 	struct cs_etm_traceid_queue *tidq;
2431 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
2432 
2433 	intlist__for_each_entry(inode, traceid_queues_list) {
2434 		idx = (int)(intptr_t)inode->priv;
2435 		tidq = etmq->traceid_queues[idx];
2436 
2437 		/* Ignore return value */
2438 		cs_etm__process_traceid_queue(etmq, tidq);
2439 
2440 		/*
2441 		 * Generate an instruction sample with the remaining
2442 		 * branchstack entries.
2443 		 */
2444 		cs_etm__flush(etmq, tidq);
2445 	}
2446 }
2447 
2448 static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq)
2449 {
2450 	int err = 0;
2451 	struct cs_etm_traceid_queue *tidq;
2452 
2453 	tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
2454 	if (!tidq)
2455 		return -EINVAL;
2456 
2457 	/* Go through each buffer in the queue and decode them one by one */
2458 	while (1) {
2459 		err = cs_etm__get_data_block(etmq);
2460 		if (err <= 0)
2461 			return err;
2462 
2463 		/* Run trace decoder until buffer consumed or end of trace */
2464 		do {
2465 			err = cs_etm__decode_data_block(etmq);
2466 			if (err)
2467 				return err;
2468 
2469 			/*
2470 			 * Process each packet in this chunk, nothing to do if
2471 			 * an error occurs other than hoping the next one will
2472 			 * be better.
2473 			 */
2474 			err = cs_etm__process_traceid_queue(etmq, tidq);
2475 
2476 		} while (etmq->buf_len);
2477 
2478 		if (err == 0)
2479 			/* Flush any remaining branch stack entries */
2480 			err = cs_etm__end_block(etmq, tidq);
2481 	}
2482 
2483 	return err;
2484 }
2485 
2486 static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq)
2487 {
2488 	int idx, err = 0;
2489 	struct cs_etm_traceid_queue *tidq;
2490 	struct int_node *inode;
2491 
2492 	/* Go through each buffer in the queue and decode them one by one */
2493 	while (1) {
2494 		err = cs_etm__get_data_block(etmq);
2495 		if (err <= 0)
2496 			return err;
2497 
2498 		/* Run trace decoder until buffer consumed or end of trace */
2499 		do {
2500 			err = cs_etm__decode_data_block(etmq);
2501 			if (err)
2502 				return err;
2503 
2504 			/*
2505 			 * cs_etm__run_per_thread_timeless_decoder() runs on a
2506 			 * single traceID queue because each TID has a separate
2507 			 * buffer. But here in per-cpu mode we need to iterate
2508 			 * over each channel instead.
2509 			 */
2510 			intlist__for_each_entry(inode,
2511 						etmq->traceid_queues_list) {
2512 				idx = (int)(intptr_t)inode->priv;
2513 				tidq = etmq->traceid_queues[idx];
2514 				cs_etm__process_traceid_queue(etmq, tidq);
2515 			}
2516 		} while (etmq->buf_len);
2517 
2518 		intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2519 			idx = (int)(intptr_t)inode->priv;
2520 			tidq = etmq->traceid_queues[idx];
2521 			/* Flush any remaining branch stack entries */
2522 			err = cs_etm__end_block(etmq, tidq);
2523 			if (err)
2524 				return err;
2525 		}
2526 	}
2527 
2528 	return err;
2529 }
2530 
2531 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
2532 					   pid_t tid)
2533 {
2534 	unsigned int i;
2535 	struct auxtrace_queues *queues = &etm->queues;
2536 
2537 	for (i = 0; i < queues->nr_queues; i++) {
2538 		struct auxtrace_queue *queue = &etm->queues.queue_array[i];
2539 		struct cs_etm_queue *etmq = queue->priv;
2540 		struct cs_etm_traceid_queue *tidq;
2541 
2542 		if (!etmq)
2543 			continue;
2544 
2545 		if (etm->per_thread_decoding) {
2546 			tidq = cs_etm__etmq_get_traceid_queue(
2547 				etmq, CS_ETM_PER_THREAD_TRACEID);
2548 
2549 			if (!tidq)
2550 				continue;
2551 
2552 			if (tid == -1 || thread__tid(tidq->thread) == tid)
2553 				cs_etm__run_per_thread_timeless_decoder(etmq);
2554 		} else
2555 			cs_etm__run_per_cpu_timeless_decoder(etmq);
2556 	}
2557 
2558 	return 0;
2559 }
2560 
2561 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm)
2562 {
2563 	int ret = 0;
2564 	unsigned int cs_queue_nr, queue_nr, i;
2565 	u8 trace_chan_id;
2566 	u64 cs_timestamp;
2567 	struct auxtrace_queue *queue;
2568 	struct cs_etm_queue *etmq;
2569 	struct cs_etm_traceid_queue *tidq;
2570 
2571 	/*
2572 	 * Pre-populate the heap with one entry from each queue so that we can
2573 	 * start processing in time order across all queues.
2574 	 */
2575 	for (i = 0; i < etm->queues.nr_queues; i++) {
2576 		etmq = etm->queues.queue_array[i].priv;
2577 		if (!etmq)
2578 			continue;
2579 
2580 		ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
2581 		if (ret)
2582 			return ret;
2583 	}
2584 
2585 	while (1) {
2586 		if (!etm->heap.heap_cnt)
2587 			goto out;
2588 
2589 		/* Take the entry at the top of the min heap */
2590 		cs_queue_nr = etm->heap.heap_array[0].queue_nr;
2591 		queue_nr = TO_QUEUE_NR(cs_queue_nr);
2592 		trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
2593 		queue = &etm->queues.queue_array[queue_nr];
2594 		etmq = queue->priv;
2595 
2596 		/*
2597 		 * Remove the top entry from the heap since we are about
2598 		 * to process it.
2599 		 */
2600 		auxtrace_heap__pop(&etm->heap);
2601 
2602 		tidq  = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
2603 		if (!tidq) {
2604 			/*
2605 			 * No traceID queue has been allocated for this traceID,
2606 			 * which means something somewhere went very wrong.  No
2607 			 * other choice than simply exit.
2608 			 */
2609 			ret = -EINVAL;
2610 			goto out;
2611 		}
2612 
2613 		/*
2614 		 * Packets associated with this timestamp are already in
2615 		 * the etmq's traceID queue, so process them.
2616 		 */
2617 		ret = cs_etm__process_traceid_queue(etmq, tidq);
2618 		if (ret < 0)
2619 			goto out;
2620 
2621 		/*
2622 		 * Packets for this timestamp have been processed, time to
2623 		 * move on to the next timestamp, fetching a new auxtrace_buffer
2624 		 * if need be.
2625 		 */
2626 refetch:
2627 		ret = cs_etm__get_data_block(etmq);
2628 		if (ret < 0)
2629 			goto out;
2630 
2631 		/*
2632 		 * No more auxtrace_buffers to process in this etmq, simply
2633 		 * move on to another entry in the auxtrace_heap.
2634 		 */
2635 		if (!ret)
2636 			continue;
2637 
2638 		ret = cs_etm__decode_data_block(etmq);
2639 		if (ret)
2640 			goto out;
2641 
2642 		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
2643 
2644 		if (!cs_timestamp) {
2645 			/*
2646 			 * Function cs_etm__decode_data_block() returns when
2647 			 * there is no more traces to decode in the current
2648 			 * auxtrace_buffer OR when a timestamp has been
2649 			 * encountered on any of the traceID queues.  Since we
2650 			 * did not get a timestamp, there is no more traces to
2651 			 * process in this auxtrace_buffer.  As such empty and
2652 			 * flush all traceID queues.
2653 			 */
2654 			cs_etm__clear_all_traceid_queues(etmq);
2655 
2656 			/* Fetch another auxtrace_buffer for this etmq */
2657 			goto refetch;
2658 		}
2659 
2660 		/*
2661 		 * Add to the min heap the timestamp for packets that have
2662 		 * just been decoded.  They will be processed and synthesized
2663 		 * during the next call to cs_etm__process_traceid_queue() for
2664 		 * this queue/traceID.
2665 		 */
2666 		cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
2667 		ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
2668 	}
2669 
2670 out:
2671 	return ret;
2672 }
2673 
2674 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
2675 					union perf_event *event)
2676 {
2677 	struct thread *th;
2678 
2679 	if (etm->timeless_decoding)
2680 		return 0;
2681 
2682 	/*
2683 	 * Add the tid/pid to the log so that we can get a match when we get a
2684 	 * contextID from the decoder. Only track for the host: only kernel
2685 	 * trace is supported for guests which wouldn't need pids so this should
2686 	 * be fine.
2687 	 */
2688 	th = machine__findnew_thread(&etm->session->machines.host,
2689 				     event->itrace_start.pid,
2690 				     event->itrace_start.tid);
2691 	if (!th)
2692 		return -ENOMEM;
2693 
2694 	thread__put(th);
2695 
2696 	return 0;
2697 }
2698 
2699 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
2700 					   union perf_event *event)
2701 {
2702 	struct thread *th;
2703 	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2704 
2705 	/*
2706 	 * Context switch in per-thread mode are irrelevant since perf
2707 	 * will start/stop tracing as the process is scheduled.
2708 	 */
2709 	if (etm->timeless_decoding)
2710 		return 0;
2711 
2712 	/*
2713 	 * SWITCH_IN events carry the next process to be switched out while
2714 	 * SWITCH_OUT events carry the process to be switched in.  As such
2715 	 * we don't care about IN events.
2716 	 */
2717 	if (!out)
2718 		return 0;
2719 
2720 	/*
2721 	 * Add the tid/pid to the log so that we can get a match when we get a
2722 	 * contextID from the decoder. Only track for the host: only kernel
2723 	 * trace is supported for guests which wouldn't need pids so this should
2724 	 * be fine.
2725 	 */
2726 	th = machine__findnew_thread(&etm->session->machines.host,
2727 				     event->context_switch.next_prev_pid,
2728 				     event->context_switch.next_prev_tid);
2729 	if (!th)
2730 		return -ENOMEM;
2731 
2732 	thread__put(th);
2733 
2734 	return 0;
2735 }
2736 
2737 static int cs_etm__process_event(struct perf_session *session,
2738 				 union perf_event *event,
2739 				 struct perf_sample *sample,
2740 				 struct perf_tool *tool)
2741 {
2742 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2743 						   struct cs_etm_auxtrace,
2744 						   auxtrace);
2745 
2746 	if (dump_trace)
2747 		return 0;
2748 
2749 	if (!tool->ordered_events) {
2750 		pr_err("CoreSight ETM Trace requires ordered events\n");
2751 		return -EINVAL;
2752 	}
2753 
2754 	switch (event->header.type) {
2755 	case PERF_RECORD_EXIT:
2756 		/*
2757 		 * Don't need to wait for cs_etm__flush_events() in per-thread mode to
2758 		 * start the decode because we know there will be no more trace from
2759 		 * this thread. All this does is emit samples earlier than waiting for
2760 		 * the flush in other modes, but with timestamps it makes sense to wait
2761 		 * for flush so that events from different threads are interleaved
2762 		 * properly.
2763 		 */
2764 		if (etm->per_thread_decoding && etm->timeless_decoding)
2765 			return cs_etm__process_timeless_queues(etm,
2766 							       event->fork.tid);
2767 		break;
2768 
2769 	case PERF_RECORD_ITRACE_START:
2770 		return cs_etm__process_itrace_start(etm, event);
2771 
2772 	case PERF_RECORD_SWITCH_CPU_WIDE:
2773 		return cs_etm__process_switch_cpu_wide(etm, event);
2774 
2775 	case PERF_RECORD_AUX:
2776 		/*
2777 		 * Record the latest kernel timestamp available in the header
2778 		 * for samples so that synthesised samples occur from this point
2779 		 * onwards.
2780 		 */
2781 		if (sample->time && (sample->time != (u64)-1))
2782 			etm->latest_kernel_timestamp = sample->time;
2783 		break;
2784 
2785 	default:
2786 		break;
2787 	}
2788 
2789 	return 0;
2790 }
2791 
2792 static void dump_queued_data(struct cs_etm_auxtrace *etm,
2793 			     struct perf_record_auxtrace *event)
2794 {
2795 	struct auxtrace_buffer *buf;
2796 	unsigned int i;
2797 	/*
2798 	 * Find all buffers with same reference in the queues and dump them.
2799 	 * This is because the queues can contain multiple entries of the same
2800 	 * buffer that were split on aux records.
2801 	 */
2802 	for (i = 0; i < etm->queues.nr_queues; ++i)
2803 		list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
2804 			if (buf->reference == event->reference)
2805 				cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
2806 }
2807 
2808 static int cs_etm__process_auxtrace_event(struct perf_session *session,
2809 					  union perf_event *event,
2810 					  struct perf_tool *tool __maybe_unused)
2811 {
2812 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2813 						   struct cs_etm_auxtrace,
2814 						   auxtrace);
2815 	if (!etm->data_queued) {
2816 		struct auxtrace_buffer *buffer;
2817 		off_t  data_offset;
2818 		int fd = perf_data__fd(session->data);
2819 		bool is_pipe = perf_data__is_pipe(session->data);
2820 		int err;
2821 		int idx = event->auxtrace.idx;
2822 
2823 		if (is_pipe)
2824 			data_offset = 0;
2825 		else {
2826 			data_offset = lseek(fd, 0, SEEK_CUR);
2827 			if (data_offset == -1)
2828 				return -errno;
2829 		}
2830 
2831 		err = auxtrace_queues__add_event(&etm->queues, session,
2832 						 event, data_offset, &buffer);
2833 		if (err)
2834 			return err;
2835 
2836 		/*
2837 		 * Knowing if the trace is formatted or not requires a lookup of
2838 		 * the aux record so only works in non-piped mode where data is
2839 		 * queued in cs_etm__queue_aux_records(). Always assume
2840 		 * formatted in piped mode (true).
2841 		 */
2842 		err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
2843 					  idx, true, -1);
2844 		if (err)
2845 			return err;
2846 
2847 		if (dump_trace)
2848 			if (auxtrace_buffer__get_data(buffer, fd)) {
2849 				cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
2850 				auxtrace_buffer__put_data(buffer);
2851 			}
2852 	} else if (dump_trace)
2853 		dump_queued_data(etm, &event->auxtrace);
2854 
2855 	return 0;
2856 }
2857 
2858 static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm)
2859 {
2860 	struct evsel *evsel;
2861 	struct evlist *evlist = etm->session->evlist;
2862 
2863 	/* Override timeless mode with user input from --itrace=Z */
2864 	if (etm->synth_opts.timeless_decoding) {
2865 		etm->timeless_decoding = true;
2866 		return 0;
2867 	}
2868 
2869 	/*
2870 	 * Find the cs_etm evsel and look at what its timestamp setting was
2871 	 */
2872 	evlist__for_each_entry(evlist, evsel)
2873 		if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) {
2874 			etm->timeless_decoding =
2875 				!(evsel->core.attr.config & BIT(ETM_OPT_TS));
2876 			return 0;
2877 		}
2878 
2879 	pr_err("CS ETM: Couldn't find ETM evsel\n");
2880 	return -EINVAL;
2881 }
2882 
2883 /*
2884  * Read a single cpu parameter block from the auxtrace_info priv block.
2885  *
2886  * For version 1 there is a per cpu nr_params entry. If we are handling
2887  * version 1 file, then there may be less, the same, or more params
2888  * indicated by this value than the compile time number we understand.
2889  *
2890  * For a version 0 info block, there are a fixed number, and we need to
2891  * fill out the nr_param value in the metadata we create.
2892  */
2893 static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
2894 				    int out_blk_size, int nr_params_v0)
2895 {
2896 	u64 *metadata = NULL;
2897 	int hdr_version;
2898 	int nr_in_params, nr_out_params, nr_cmn_params;
2899 	int i, k;
2900 
2901 	metadata = zalloc(sizeof(*metadata) * out_blk_size);
2902 	if (!metadata)
2903 		return NULL;
2904 
2905 	/* read block current index & version */
2906 	i = *buff_in_offset;
2907 	hdr_version = buff_in[CS_HEADER_VERSION];
2908 
2909 	if (!hdr_version) {
2910 	/* read version 0 info block into a version 1 metadata block  */
2911 		nr_in_params = nr_params_v0;
2912 		metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
2913 		metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
2914 		metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
2915 		/* remaining block params at offset +1 from source */
2916 		for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
2917 			metadata[k + 1] = buff_in[i + k];
2918 		/* version 0 has 2 common params */
2919 		nr_cmn_params = 2;
2920 	} else {
2921 	/* read version 1 info block - input and output nr_params may differ */
2922 		/* version 1 has 3 common params */
2923 		nr_cmn_params = 3;
2924 		nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
2925 
2926 		/* if input has more params than output - skip excess */
2927 		nr_out_params = nr_in_params + nr_cmn_params;
2928 		if (nr_out_params > out_blk_size)
2929 			nr_out_params = out_blk_size;
2930 
2931 		for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
2932 			metadata[k] = buff_in[i + k];
2933 
2934 		/* record the actual nr params we copied */
2935 		metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
2936 	}
2937 
2938 	/* adjust in offset by number of in params used */
2939 	i += nr_in_params + nr_cmn_params;
2940 	*buff_in_offset = i;
2941 	return metadata;
2942 }
2943 
2944 /**
2945  * Puts a fragment of an auxtrace buffer into the auxtrace queues based
2946  * on the bounds of aux_event, if it matches with the buffer that's at
2947  * file_offset.
2948  *
2949  * Normally, whole auxtrace buffers would be added to the queue. But we
2950  * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
2951  * is reset across each buffer, so splitting the buffers up in advance has
2952  * the same effect.
2953  */
2954 static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
2955 				      struct perf_record_aux *aux_event, struct perf_sample *sample)
2956 {
2957 	int err;
2958 	char buf[PERF_SAMPLE_MAX_SIZE];
2959 	union perf_event *auxtrace_event_union;
2960 	struct perf_record_auxtrace *auxtrace_event;
2961 	union perf_event auxtrace_fragment;
2962 	__u64 aux_offset, aux_size;
2963 	__u32 idx;
2964 	bool formatted;
2965 
2966 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2967 						   struct cs_etm_auxtrace,
2968 						   auxtrace);
2969 
2970 	/*
2971 	 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
2972 	 * from looping through the auxtrace index.
2973 	 */
2974 	err = perf_session__peek_event(session, file_offset, buf,
2975 				       PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL);
2976 	if (err)
2977 		return err;
2978 	auxtrace_event = &auxtrace_event_union->auxtrace;
2979 	if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
2980 		return -EINVAL;
2981 
2982 	if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) ||
2983 		auxtrace_event->header.size != sz) {
2984 		return -EINVAL;
2985 	}
2986 
2987 	/*
2988 	 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
2989 	 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
2990 	 * CPU as we set this always for the AUX_OUTPUT_HW_ID event.
2991 	 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
2992 	 * Return 'not found' if mismatch.
2993 	 */
2994 	if (auxtrace_event->cpu == (__u32) -1) {
2995 		etm->per_thread_decoding = true;
2996 		if (auxtrace_event->tid != sample->tid)
2997 			return 1;
2998 	} else if (auxtrace_event->cpu != sample->cpu) {
2999 		if (etm->per_thread_decoding) {
3000 			/*
3001 			 * Found a per-cpu buffer after a per-thread one was
3002 			 * already found
3003 			 */
3004 			pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n");
3005 			return -EINVAL;
3006 		}
3007 		return 1;
3008 	}
3009 
3010 	if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
3011 		/*
3012 		 * Clamp size in snapshot mode. The buffer size is clamped in
3013 		 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
3014 		 * the buffer size.
3015 		 */
3016 		aux_size = min(aux_event->aux_size, auxtrace_event->size);
3017 
3018 		/*
3019 		 * In this mode, the head also points to the end of the buffer so aux_offset
3020 		 * needs to have the size subtracted so it points to the beginning as in normal mode
3021 		 */
3022 		aux_offset = aux_event->aux_offset - aux_size;
3023 	} else {
3024 		aux_size = aux_event->aux_size;
3025 		aux_offset = aux_event->aux_offset;
3026 	}
3027 
3028 	if (aux_offset >= auxtrace_event->offset &&
3029 	    aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
3030 		/*
3031 		 * If this AUX event was inside this buffer somewhere, create a new auxtrace event
3032 		 * based on the sizes of the aux event, and queue that fragment.
3033 		 */
3034 		auxtrace_fragment.auxtrace = *auxtrace_event;
3035 		auxtrace_fragment.auxtrace.size = aux_size;
3036 		auxtrace_fragment.auxtrace.offset = aux_offset;
3037 		file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
3038 
3039 		pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
3040 			  " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
3041 		err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
3042 						 file_offset, NULL);
3043 		if (err)
3044 			return err;
3045 
3046 		idx = auxtrace_event->idx;
3047 		formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
3048 		return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
3049 					   idx, formatted, sample->cpu);
3050 	}
3051 
3052 	/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
3053 	return 1;
3054 }
3055 
3056 static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event,
3057 					u64 offset __maybe_unused, void *data __maybe_unused)
3058 {
3059 	/* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */
3060 	if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) {
3061 		(*(int *)data)++; /* increment found count */
3062 		return cs_etm__process_aux_output_hw_id(session, event);
3063 	}
3064 	return 0;
3065 }
3066 
3067 static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
3068 					u64 offset __maybe_unused, void *data __maybe_unused)
3069 {
3070 	struct perf_sample sample;
3071 	int ret;
3072 	struct auxtrace_index_entry *ent;
3073 	struct auxtrace_index *auxtrace_index;
3074 	struct evsel *evsel;
3075 	size_t i;
3076 
3077 	/* Don't care about any other events, we're only queuing buffers for AUX events */
3078 	if (event->header.type != PERF_RECORD_AUX)
3079 		return 0;
3080 
3081 	if (event->header.size < sizeof(struct perf_record_aux))
3082 		return -EINVAL;
3083 
3084 	/* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
3085 	if (!event->aux.aux_size)
3086 		return 0;
3087 
3088 	/*
3089 	 * Parse the sample, we need the sample_id_all data that comes after the event so that the
3090 	 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
3091 	 */
3092 	evsel = evlist__event2evsel(session->evlist, event);
3093 	if (!evsel)
3094 		return -EINVAL;
3095 	ret = evsel__parse_sample(evsel, event, &sample);
3096 	if (ret)
3097 		return ret;
3098 
3099 	/*
3100 	 * Loop through the auxtrace index to find the buffer that matches up with this aux event.
3101 	 */
3102 	list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
3103 		for (i = 0; i < auxtrace_index->nr; i++) {
3104 			ent = &auxtrace_index->entries[i];
3105 			ret = cs_etm__queue_aux_fragment(session, ent->file_offset,
3106 							 ent->sz, &event->aux, &sample);
3107 			/*
3108 			 * Stop search on error or successful values. Continue search on
3109 			 * 1 ('not found')
3110 			 */
3111 			if (ret != 1)
3112 				return ret;
3113 		}
3114 	}
3115 
3116 	/*
3117 	 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
3118 	 * don't exit with an error because it will still be possible to decode other aux records.
3119 	 */
3120 	pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
3121 	       " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
3122 	return 0;
3123 }
3124 
3125 static int cs_etm__queue_aux_records(struct perf_session *session)
3126 {
3127 	struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
3128 								struct auxtrace_index, list);
3129 	if (index && index->nr > 0)
3130 		return perf_session__peek_events(session, session->header.data_offset,
3131 						 session->header.data_size,
3132 						 cs_etm__queue_aux_records_cb, NULL);
3133 
3134 	/*
3135 	 * We would get here if there are no entries in the index (either no auxtrace
3136 	 * buffers or no index at all). Fail silently as there is the possibility of
3137 	 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
3138 	 * false.
3139 	 *
3140 	 * In that scenario, buffers will not be split by AUX records.
3141 	 */
3142 	return 0;
3143 }
3144 
3145 #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
3146 				  (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
3147 
3148 /*
3149  * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
3150  * timestamps).
3151  */
3152 static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
3153 {
3154 	int j;
3155 
3156 	for (j = 0; j < num_cpu; j++) {
3157 		switch (metadata[j][CS_ETM_MAGIC]) {
3158 		case __perf_cs_etmv4_magic:
3159 			if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
3160 				return false;
3161 			break;
3162 		case __perf_cs_ete_magic:
3163 			if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1)
3164 				return false;
3165 			break;
3166 		default:
3167 			/* Unknown / unsupported magic number. */
3168 			return false;
3169 		}
3170 	}
3171 	return true;
3172 }
3173 
3174 /* map trace ids to correct metadata block, from information in metadata */
3175 static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
3176 {
3177 	u64 cs_etm_magic;
3178 	u8 trace_chan_id;
3179 	int i, err;
3180 
3181 	for (i = 0; i < num_cpu; i++) {
3182 		cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3183 		switch (cs_etm_magic) {
3184 		case __perf_cs_etmv3_magic:
3185 			metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3186 			trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]);
3187 			break;
3188 		case __perf_cs_etmv4_magic:
3189 		case __perf_cs_ete_magic:
3190 			metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3191 			trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]);
3192 			break;
3193 		default:
3194 			/* unknown magic number */
3195 			return -EINVAL;
3196 		}
3197 		err = cs_etm__map_trace_id(trace_chan_id, metadata[i]);
3198 		if (err)
3199 			return err;
3200 	}
3201 	return 0;
3202 }
3203 
3204 /*
3205  * If we found AUX_HW_ID packets, then set any metadata marked as unused to the
3206  * unused value to reduce the number of unneeded decoders created.
3207  */
3208 static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
3209 {
3210 	u64 cs_etm_magic;
3211 	int i;
3212 
3213 	for (i = 0; i < num_cpu; i++) {
3214 		cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3215 		switch (cs_etm_magic) {
3216 		case __perf_cs_etmv3_magic:
3217 			if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
3218 				metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
3219 			break;
3220 		case __perf_cs_etmv4_magic:
3221 		case __perf_cs_ete_magic:
3222 			if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
3223 				metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
3224 			break;
3225 		default:
3226 			/* unknown magic number */
3227 			return -EINVAL;
3228 		}
3229 	}
3230 	return 0;
3231 }
3232 
3233 int cs_etm__process_auxtrace_info_full(union perf_event *event,
3234 				       struct perf_session *session)
3235 {
3236 	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
3237 	struct cs_etm_auxtrace *etm = NULL;
3238 	struct perf_record_time_conv *tc = &session->time_conv;
3239 	int event_header_size = sizeof(struct perf_event_header);
3240 	int total_size = auxtrace_info->header.size;
3241 	int priv_size = 0;
3242 	int num_cpu;
3243 	int err = 0;
3244 	int aux_hw_id_found;
3245 	int i, j;
3246 	u64 *ptr = NULL;
3247 	u64 **metadata = NULL;
3248 
3249 	/*
3250 	 * Create an RB tree for traceID-metadata tuple.  Since the conversion
3251 	 * has to be made for each packet that gets decoded, optimizing access
3252 	 * in anything other than a sequential array is worth doing.
3253 	 */
3254 	traceid_list = intlist__new(NULL);
3255 	if (!traceid_list)
3256 		return -ENOMEM;
3257 
3258 	/* First the global part */
3259 	ptr = (u64 *) auxtrace_info->priv;
3260 	num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
3261 	metadata = zalloc(sizeof(*metadata) * num_cpu);
3262 	if (!metadata) {
3263 		err = -ENOMEM;
3264 		goto err_free_traceid_list;
3265 	}
3266 
3267 	/* Start parsing after the common part of the header */
3268 	i = CS_HEADER_VERSION_MAX;
3269 
3270 	/*
3271 	 * The metadata is stored in the auxtrace_info section and encodes
3272 	 * the configuration of the ARM embedded trace macrocell which is
3273 	 * required by the trace decoder to properly decode the trace due
3274 	 * to its highly compressed nature.
3275 	 */
3276 	for (j = 0; j < num_cpu; j++) {
3277 		if (ptr[i] == __perf_cs_etmv3_magic) {
3278 			metadata[j] =
3279 				cs_etm__create_meta_blk(ptr, &i,
3280 							CS_ETM_PRIV_MAX,
3281 							CS_ETM_NR_TRC_PARAMS_V0);
3282 		} else if (ptr[i] == __perf_cs_etmv4_magic) {
3283 			metadata[j] =
3284 				cs_etm__create_meta_blk(ptr, &i,
3285 							CS_ETMV4_PRIV_MAX,
3286 							CS_ETMV4_NR_TRC_PARAMS_V0);
3287 		} else if (ptr[i] == __perf_cs_ete_magic) {
3288 			metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
3289 		} else {
3290 			ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
3291 				  ptr[i]);
3292 			err = -EINVAL;
3293 			goto err_free_metadata;
3294 		}
3295 
3296 		if (!metadata[j]) {
3297 			err = -ENOMEM;
3298 			goto err_free_metadata;
3299 		}
3300 	}
3301 
3302 	/*
3303 	 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
3304 	 * CS_ETMV4_PRIV_MAX mark how many double words are in the
3305 	 * global metadata, and each cpu's metadata respectively.
3306 	 * The following tests if the correct number of double words was
3307 	 * present in the auxtrace info section.
3308 	 */
3309 	priv_size = total_size - event_header_size - INFO_HEADER_SIZE;
3310 	if (i * 8 != priv_size) {
3311 		err = -EINVAL;
3312 		goto err_free_metadata;
3313 	}
3314 
3315 	etm = zalloc(sizeof(*etm));
3316 
3317 	if (!etm) {
3318 		err = -ENOMEM;
3319 		goto err_free_metadata;
3320 	}
3321 
3322 	/*
3323 	 * As all the ETMs run at the same exception level, the system should
3324 	 * have the same PID format crossing CPUs.  So cache the PID format
3325 	 * and reuse it for sequential decoding.
3326 	 */
3327 	etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
3328 
3329 	err = auxtrace_queues__init(&etm->queues);
3330 	if (err)
3331 		goto err_free_etm;
3332 
3333 	if (session->itrace_synth_opts->set) {
3334 		etm->synth_opts = *session->itrace_synth_opts;
3335 	} else {
3336 		itrace_synth_opts__set_default(&etm->synth_opts,
3337 				session->itrace_synth_opts->default_no_sample);
3338 		etm->synth_opts.callchain = false;
3339 	}
3340 
3341 	etm->session = session;
3342 
3343 	etm->num_cpu = num_cpu;
3344 	etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
3345 	etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
3346 	etm->metadata = metadata;
3347 	etm->auxtrace_type = auxtrace_info->type;
3348 
3349 	if (etm->synth_opts.use_timestamp)
3350 		/*
3351 		 * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature,
3352 		 * therefore the decoder cannot know if the timestamp trace is
3353 		 * same with the kernel time.
3354 		 *
3355 		 * If a user has knowledge for the working platform and can
3356 		 * specify itrace option 'T' to tell decoder to forcely use the
3357 		 * traced timestamp as the kernel time.
3358 		 */
3359 		etm->has_virtual_ts = true;
3360 	else
3361 		/* Use virtual timestamps if all ETMs report ts_source = 1 */
3362 		etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
3363 
3364 	if (!etm->has_virtual_ts)
3365 		ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
3366 			    "The time field of the samples will not be set accurately.\n"
3367 			    "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n"
3368 			    "you can specify the itrace option 'T' for timestamp decoding\n"
3369 			    "if the Coresight timestamp on the platform is same with the kernel time.\n\n");
3370 
3371 	etm->auxtrace.process_event = cs_etm__process_event;
3372 	etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
3373 	etm->auxtrace.flush_events = cs_etm__flush_events;
3374 	etm->auxtrace.free_events = cs_etm__free_events;
3375 	etm->auxtrace.free = cs_etm__free;
3376 	etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
3377 	session->auxtrace = &etm->auxtrace;
3378 
3379 	err = cs_etm__setup_timeless_decoding(etm);
3380 	if (err)
3381 		return err;
3382 
3383 	etm->tc.time_shift = tc->time_shift;
3384 	etm->tc.time_mult = tc->time_mult;
3385 	etm->tc.time_zero = tc->time_zero;
3386 	if (event_contains(*tc, time_cycles)) {
3387 		etm->tc.time_cycles = tc->time_cycles;
3388 		etm->tc.time_mask = tc->time_mask;
3389 		etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
3390 		etm->tc.cap_user_time_short = tc->cap_user_time_short;
3391 	}
3392 	err = cs_etm__synth_events(etm, session);
3393 	if (err)
3394 		goto err_free_queues;
3395 
3396 	/*
3397 	 * Map Trace ID values to CPU metadata.
3398 	 *
3399 	 * Trace metadata will always contain Trace ID values from the legacy algorithm. If the
3400 	 * files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata
3401 	 * ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set.
3402 	 *
3403 	 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
3404 	 * the same IDs as the old algorithm as far as is possible, unless there are clashes
3405 	 * in which case a different value will be used. This means an older perf may still
3406 	 * be able to record and read files generate on a newer system.
3407 	 *
3408 	 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of
3409 	 * those packets. If they are there then the values will be mapped and plugged into
3410 	 * the metadata. We then set any remaining metadata values with the used flag to a
3411 	 * value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required.
3412 	 *
3413 	 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
3414 	 * then we map Trace ID values to CPU directly from the metadata - clearing any unused
3415 	 * flags if present.
3416 	 */
3417 
3418 	/* first scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
3419 	aux_hw_id_found = 0;
3420 	err = perf_session__peek_events(session, session->header.data_offset,
3421 					session->header.data_size,
3422 					cs_etm__process_aux_hw_id_cb, &aux_hw_id_found);
3423 	if (err)
3424 		goto err_free_queues;
3425 
3426 	/* if HW ID found then clear any unused metadata ID values */
3427 	if (aux_hw_id_found)
3428 		err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata);
3429 	/* otherwise, this is a file with metadata values only, map from metadata */
3430 	else
3431 		err = cs_etm__map_trace_ids_metadata(num_cpu, metadata);
3432 
3433 	if (err)
3434 		goto err_free_queues;
3435 
3436 	err = cs_etm__queue_aux_records(session);
3437 	if (err)
3438 		goto err_free_queues;
3439 
3440 	etm->data_queued = etm->queues.populated;
3441 	return 0;
3442 
3443 err_free_queues:
3444 	auxtrace_queues__free(&etm->queues);
3445 	session->auxtrace = NULL;
3446 err_free_etm:
3447 	zfree(&etm);
3448 err_free_metadata:
3449 	/* No need to check @metadata[j], free(NULL) is supported */
3450 	for (j = 0; j < num_cpu; j++)
3451 		zfree(&metadata[j]);
3452 	zfree(&metadata);
3453 err_free_traceid_list:
3454 	intlist__delete(traceid_list);
3455 	return err;
3456 }
3457