xref: /linux/tools/perf/util/cs-etm.c (revision d261f9ebcf424535fe04e720a1cfa023be409f52)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright(C) 2015-2018 Linaro Limited.
4  *
5  * Author: Tor Jeremiassen <tor@ti.com>
6  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
7  */
8 
9 #include <linux/kernel.h>
10 #include <linux/bitfield.h>
11 #include <linux/bitops.h>
12 #include <linux/coresight-pmu.h>
13 #include <linux/err.h>
14 #include <linux/log2.h>
15 #include <linux/types.h>
16 #include <linux/zalloc.h>
17 
18 #include <stdlib.h>
19 
20 #include "auxtrace.h"
21 #include "color.h"
22 #include "cs-etm.h"
23 #include "cs-etm-decoder/cs-etm-decoder.h"
24 #include "debug.h"
25 #include "dso.h"
26 #include "evlist.h"
27 #include "intlist.h"
28 #include "machine.h"
29 #include "map.h"
30 #include "perf.h"
31 #include "session.h"
32 #include "map_symbol.h"
33 #include "branch.h"
34 #include "symbol.h"
35 #include "tool.h"
36 #include "thread.h"
37 #include "thread-stack.h"
38 #include "tsc.h"
39 #include <tools/libc_compat.h>
40 #include "util/synthetic-events.h"
41 #include "util/util.h"
42 
43 struct cs_etm_auxtrace {
44 	struct auxtrace auxtrace;
45 	struct auxtrace_queues queues;
46 	struct auxtrace_heap heap;
47 	struct itrace_synth_opts synth_opts;
48 	struct perf_session *session;
49 	struct perf_tsc_conversion tc;
50 
51 	/*
52 	 * Timeless has no timestamps in the trace so overlapping mmap lookups
53 	 * are less accurate but produces smaller trace data. We use context IDs
54 	 * in the trace instead of matching timestamps with fork records so
55 	 * they're not really needed in the general case. Overlapping mmaps
56 	 * happen in cases like between a fork and an exec.
57 	 */
58 	bool timeless_decoding;
59 
60 	/*
61 	 * Per-thread ignores the trace channel ID and instead assumes that
62 	 * everything in a buffer comes from the same process regardless of
63 	 * which CPU it ran on. It also implies no context IDs so the TID is
64 	 * taken from the auxtrace buffer.
65 	 */
66 	bool per_thread_decoding;
67 	bool snapshot_mode;
68 	bool data_queued;
69 	bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
70 
71 	int num_cpu;
72 	u64 latest_kernel_timestamp;
73 	u32 auxtrace_type;
74 	u64 branches_sample_type;
75 	u64 branches_id;
76 	u64 instructions_sample_type;
77 	u64 instructions_sample_period;
78 	u64 instructions_id;
79 	u64 **metadata;
80 	unsigned int pmu_type;
81 	enum cs_etm_pid_fmt pid_fmt;
82 };
83 
84 struct cs_etm_traceid_queue {
85 	u8 trace_chan_id;
86 	u64 period_instructions;
87 	size_t last_branch_pos;
88 	union perf_event *event_buf;
89 	struct thread *thread;
90 	struct thread *prev_packet_thread;
91 	ocsd_ex_level prev_packet_el;
92 	ocsd_ex_level el;
93 	struct branch_stack *last_branch;
94 	struct branch_stack *last_branch_rb;
95 	struct cs_etm_packet *prev_packet;
96 	struct cs_etm_packet *packet;
97 	struct cs_etm_packet_queue packet_queue;
98 };
99 
100 struct cs_etm_queue {
101 	struct cs_etm_auxtrace *etm;
102 	struct cs_etm_decoder *decoder;
103 	struct auxtrace_buffer *buffer;
104 	unsigned int queue_nr;
105 	u8 pending_timestamp_chan_id;
106 	u64 offset;
107 	const unsigned char *buf;
108 	size_t buf_len, buf_used;
109 	/* Conversion between traceID and index in traceid_queues array */
110 	struct intlist *traceid_queues_list;
111 	struct cs_etm_traceid_queue **traceid_queues;
112 };
113 
114 /* RB tree for quick conversion between traceID and metadata pointers */
115 static struct intlist *traceid_list;
116 
117 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
118 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
119 					   pid_t tid);
120 static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
121 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
122 
123 /* PTMs ETMIDR [11:8] set to b0011 */
124 #define ETMIDR_PTM_VERSION 0x00000300
125 
126 /*
127  * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
128  * work with.  One option is to modify to auxtrace_heap_XYZ() API or simply
129  * encode the etm queue number as the upper 16 bit and the channel as
130  * the lower 16 bit.
131  */
132 #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id)	\
133 		      (queue_nr << 16 | trace_chan_id)
134 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
135 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
136 
137 static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
138 {
139 	etmidr &= ETMIDR_PTM_VERSION;
140 
141 	if (etmidr == ETMIDR_PTM_VERSION)
142 		return CS_ETM_PROTO_PTM;
143 
144 	return CS_ETM_PROTO_ETMV3;
145 }
146 
147 static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
148 {
149 	struct int_node *inode;
150 	u64 *metadata;
151 
152 	inode = intlist__find(traceid_list, trace_chan_id);
153 	if (!inode)
154 		return -EINVAL;
155 
156 	metadata = inode->priv;
157 	*magic = metadata[CS_ETM_MAGIC];
158 	return 0;
159 }
160 
161 int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
162 {
163 	struct int_node *inode;
164 	u64 *metadata;
165 
166 	inode = intlist__find(traceid_list, trace_chan_id);
167 	if (!inode)
168 		return -EINVAL;
169 
170 	metadata = inode->priv;
171 	*cpu = (int)metadata[CS_ETM_CPU];
172 	return 0;
173 }
174 
175 /*
176  * The returned PID format is presented as an enum:
177  *
178  *   CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced.
179  *   CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced.
180  *   CS_ETM_PIDFMT_NONE: No context IDs
181  *
182  * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
183  * are enabled at the same time when the session runs on an EL2 kernel.
184  * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
185  * recorded in the trace data, the tool will selectively use
186  * CONTEXTIDR_EL2 as PID.
187  *
188  * The result is cached in etm->pid_fmt so this function only needs to be called
189  * when processing the aux info.
190  */
191 static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata)
192 {
193 	u64 val;
194 
195 	if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
196 		val = metadata[CS_ETM_ETMCR];
197 		/* CONTEXTIDR is traced */
198 		if (val & BIT(ETM_OPT_CTXTID))
199 			return CS_ETM_PIDFMT_CTXTID;
200 	} else {
201 		val = metadata[CS_ETMV4_TRCCONFIGR];
202 		/* CONTEXTIDR_EL2 is traced */
203 		if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
204 			return CS_ETM_PIDFMT_CTXTID2;
205 		/* CONTEXTIDR_EL1 is traced */
206 		else if (val & BIT(ETM4_CFG_BIT_CTXTID))
207 			return CS_ETM_PIDFMT_CTXTID;
208 	}
209 
210 	return CS_ETM_PIDFMT_NONE;
211 }
212 
213 enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
214 {
215 	return etmq->etm->pid_fmt;
216 }
217 
218 static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
219 {
220 	struct int_node *inode;
221 
222 	/* Get an RB node for this CPU */
223 	inode = intlist__findnew(traceid_list, trace_chan_id);
224 
225 	/* Something went wrong, no need to continue */
226 	if (!inode)
227 		return -ENOMEM;
228 
229 	/*
230 	 * The node for that CPU should not be taken.
231 	 * Back out if that's the case.
232 	 */
233 	if (inode->priv)
234 		return -EINVAL;
235 
236 	/* All good, associate the traceID with the metadata pointer */
237 	inode->priv = cpu_metadata;
238 
239 	return 0;
240 }
241 
242 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
243 {
244 	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
245 
246 	switch (cs_etm_magic) {
247 	case __perf_cs_etmv3_magic:
248 		*trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
249 				      CORESIGHT_TRACE_ID_VAL_MASK);
250 		break;
251 	case __perf_cs_etmv4_magic:
252 	case __perf_cs_ete_magic:
253 		*trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
254 				      CORESIGHT_TRACE_ID_VAL_MASK);
255 		break;
256 	default:
257 		return -EINVAL;
258 	}
259 	return 0;
260 }
261 
262 /*
263  * update metadata trace ID from the value found in the AUX_HW_INFO packet.
264  * This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present.
265  */
266 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
267 {
268 	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
269 
270 	switch (cs_etm_magic) {
271 	case __perf_cs_etmv3_magic:
272 		 cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id;
273 		break;
274 	case __perf_cs_etmv4_magic:
275 	case __perf_cs_ete_magic:
276 		cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id;
277 		break;
278 
279 	default:
280 		return -EINVAL;
281 	}
282 	return 0;
283 }
284 
285 /*
286  * Get a metadata index for a specific cpu from an array.
287  *
288  */
289 static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu)
290 {
291 	int i;
292 
293 	for (i = 0; i < etm->num_cpu; i++) {
294 		if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) {
295 			return i;
296 		}
297 	}
298 
299 	return -1;
300 }
301 
302 /*
303  * Get a metadata for a specific cpu from an array.
304  *
305  */
306 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu)
307 {
308 	int idx = get_cpu_data_idx(etm, cpu);
309 
310 	return (idx != -1) ? etm->metadata[idx] : NULL;
311 }
312 
313 /*
314  * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
315  *
316  * The payload associates the Trace ID and the CPU.
317  * The routine is tolerant of seeing multiple packets with the same association,
318  * but a CPU / Trace ID association changing during a session is an error.
319  */
320 static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
321 					    union perf_event *event)
322 {
323 	struct cs_etm_auxtrace *etm;
324 	struct perf_sample sample;
325 	struct int_node *inode;
326 	struct evsel *evsel;
327 	u64 *cpu_data;
328 	u64 hw_id;
329 	int cpu, version, err;
330 	u8 trace_chan_id, curr_chan_id;
331 
332 	/* extract and parse the HW ID */
333 	hw_id = event->aux_output_hw_id.hw_id;
334 	version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
335 	trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
336 
337 	/* check that we can handle this version */
338 	if (version > CS_AUX_HW_ID_CURR_VERSION) {
339 		pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n",
340 		       version);
341 		return -EINVAL;
342 	}
343 
344 	/* get access to the etm metadata */
345 	etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
346 	if (!etm || !etm->metadata)
347 		return -EINVAL;
348 
349 	/* parse the sample to get the CPU */
350 	evsel = evlist__event2evsel(session->evlist, event);
351 	if (!evsel)
352 		return -EINVAL;
353 	err = evsel__parse_sample(evsel, event, &sample);
354 	if (err)
355 		return err;
356 	cpu = sample.cpu;
357 	if (cpu == -1) {
358 		/* no CPU in the sample - possibly recorded with an old version of perf */
359 		pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
360 		return -EINVAL;
361 	}
362 
363 	/* See if the ID is mapped to a CPU, and it matches the current CPU */
364 	inode = intlist__find(traceid_list, trace_chan_id);
365 	if (inode) {
366 		cpu_data = inode->priv;
367 		if ((int)cpu_data[CS_ETM_CPU] != cpu) {
368 			pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
369 			return -EINVAL;
370 		}
371 
372 		/* check that the mapped ID matches */
373 		err = cs_etm__metadata_get_trace_id(&curr_chan_id, cpu_data);
374 		if (err)
375 			return err;
376 		if (curr_chan_id != trace_chan_id) {
377 			pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
378 			return -EINVAL;
379 		}
380 
381 		/* mapped and matched - return OK */
382 		return 0;
383 	}
384 
385 	cpu_data = get_cpu_data(etm, cpu);
386 	if (cpu_data == NULL)
387 		return err;
388 
389 	/* not one we've seen before - lets map it */
390 	err = cs_etm__map_trace_id(trace_chan_id, cpu_data);
391 	if (err)
392 		return err;
393 
394 	/*
395 	 * if we are picking up the association from the packet, need to plug
396 	 * the correct trace ID into the metadata for setting up decoders later.
397 	 */
398 	err = cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
399 	return err;
400 }
401 
402 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
403 					      u8 trace_chan_id)
404 {
405 	/*
406 	 * When a timestamp packet is encountered the backend code
407 	 * is stopped so that the front end has time to process packets
408 	 * that were accumulated in the traceID queue.  Since there can
409 	 * be more than one channel per cs_etm_queue, we need to specify
410 	 * what traceID queue needs servicing.
411 	 */
412 	etmq->pending_timestamp_chan_id = trace_chan_id;
413 }
414 
415 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
416 				      u8 *trace_chan_id)
417 {
418 	struct cs_etm_packet_queue *packet_queue;
419 
420 	if (!etmq->pending_timestamp_chan_id)
421 		return 0;
422 
423 	if (trace_chan_id)
424 		*trace_chan_id = etmq->pending_timestamp_chan_id;
425 
426 	packet_queue = cs_etm__etmq_get_packet_queue(etmq,
427 						     etmq->pending_timestamp_chan_id);
428 	if (!packet_queue)
429 		return 0;
430 
431 	/* Acknowledge pending status */
432 	etmq->pending_timestamp_chan_id = 0;
433 
434 	/* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
435 	return packet_queue->cs_timestamp;
436 }
437 
438 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
439 {
440 	int i;
441 
442 	queue->head = 0;
443 	queue->tail = 0;
444 	queue->packet_count = 0;
445 	for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
446 		queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
447 		queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
448 		queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
449 		queue->packet_buffer[i].instr_count = 0;
450 		queue->packet_buffer[i].last_instr_taken_branch = false;
451 		queue->packet_buffer[i].last_instr_size = 0;
452 		queue->packet_buffer[i].last_instr_type = 0;
453 		queue->packet_buffer[i].last_instr_subtype = 0;
454 		queue->packet_buffer[i].last_instr_cond = 0;
455 		queue->packet_buffer[i].flags = 0;
456 		queue->packet_buffer[i].exception_number = UINT32_MAX;
457 		queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
458 		queue->packet_buffer[i].cpu = INT_MIN;
459 	}
460 }
461 
462 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
463 {
464 	int idx;
465 	struct int_node *inode;
466 	struct cs_etm_traceid_queue *tidq;
467 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
468 
469 	intlist__for_each_entry(inode, traceid_queues_list) {
470 		idx = (int)(intptr_t)inode->priv;
471 		tidq = etmq->traceid_queues[idx];
472 		cs_etm__clear_packet_queue(&tidq->packet_queue);
473 	}
474 }
475 
476 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
477 				      struct cs_etm_traceid_queue *tidq,
478 				      u8 trace_chan_id)
479 {
480 	int rc = -ENOMEM;
481 	struct auxtrace_queue *queue;
482 	struct cs_etm_auxtrace *etm = etmq->etm;
483 
484 	cs_etm__clear_packet_queue(&tidq->packet_queue);
485 
486 	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
487 	tidq->trace_chan_id = trace_chan_id;
488 	tidq->el = tidq->prev_packet_el = ocsd_EL_unknown;
489 	tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
490 					       queue->tid);
491 	tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host);
492 
493 	tidq->packet = zalloc(sizeof(struct cs_etm_packet));
494 	if (!tidq->packet)
495 		goto out;
496 
497 	tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
498 	if (!tidq->prev_packet)
499 		goto out_free;
500 
501 	if (etm->synth_opts.last_branch) {
502 		size_t sz = sizeof(struct branch_stack);
503 
504 		sz += etm->synth_opts.last_branch_sz *
505 		      sizeof(struct branch_entry);
506 		tidq->last_branch = zalloc(sz);
507 		if (!tidq->last_branch)
508 			goto out_free;
509 		tidq->last_branch_rb = zalloc(sz);
510 		if (!tidq->last_branch_rb)
511 			goto out_free;
512 	}
513 
514 	tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
515 	if (!tidq->event_buf)
516 		goto out_free;
517 
518 	return 0;
519 
520 out_free:
521 	zfree(&tidq->last_branch_rb);
522 	zfree(&tidq->last_branch);
523 	zfree(&tidq->prev_packet);
524 	zfree(&tidq->packet);
525 out:
526 	return rc;
527 }
528 
529 static struct cs_etm_traceid_queue
530 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
531 {
532 	int idx;
533 	struct int_node *inode;
534 	struct intlist *traceid_queues_list;
535 	struct cs_etm_traceid_queue *tidq, **traceid_queues;
536 	struct cs_etm_auxtrace *etm = etmq->etm;
537 
538 	if (etm->per_thread_decoding)
539 		trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
540 
541 	traceid_queues_list = etmq->traceid_queues_list;
542 
543 	/*
544 	 * Check if the traceid_queue exist for this traceID by looking
545 	 * in the queue list.
546 	 */
547 	inode = intlist__find(traceid_queues_list, trace_chan_id);
548 	if (inode) {
549 		idx = (int)(intptr_t)inode->priv;
550 		return etmq->traceid_queues[idx];
551 	}
552 
553 	/* We couldn't find a traceid_queue for this traceID, allocate one */
554 	tidq = malloc(sizeof(*tidq));
555 	if (!tidq)
556 		return NULL;
557 
558 	memset(tidq, 0, sizeof(*tidq));
559 
560 	/* Get a valid index for the new traceid_queue */
561 	idx = intlist__nr_entries(traceid_queues_list);
562 	/* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
563 	inode = intlist__findnew(traceid_queues_list, trace_chan_id);
564 	if (!inode)
565 		goto out_free;
566 
567 	/* Associate this traceID with this index */
568 	inode->priv = (void *)(intptr_t)idx;
569 
570 	if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
571 		goto out_free;
572 
573 	/* Grow the traceid_queues array by one unit */
574 	traceid_queues = etmq->traceid_queues;
575 	traceid_queues = reallocarray(traceid_queues,
576 				      idx + 1,
577 				      sizeof(*traceid_queues));
578 
579 	/*
580 	 * On failure reallocarray() returns NULL and the original block of
581 	 * memory is left untouched.
582 	 */
583 	if (!traceid_queues)
584 		goto out_free;
585 
586 	traceid_queues[idx] = tidq;
587 	etmq->traceid_queues = traceid_queues;
588 
589 	return etmq->traceid_queues[idx];
590 
591 out_free:
592 	/*
593 	 * Function intlist__remove() removes the inode from the list
594 	 * and delete the memory associated to it.
595 	 */
596 	intlist__remove(traceid_queues_list, inode);
597 	free(tidq);
598 
599 	return NULL;
600 }
601 
602 struct cs_etm_packet_queue
603 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
604 {
605 	struct cs_etm_traceid_queue *tidq;
606 
607 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
608 	if (tidq)
609 		return &tidq->packet_queue;
610 
611 	return NULL;
612 }
613 
614 static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
615 				struct cs_etm_traceid_queue *tidq)
616 {
617 	struct cs_etm_packet *tmp;
618 
619 	if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
620 	    etm->synth_opts.instructions) {
621 		/*
622 		 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
623 		 * the next incoming packet.
624 		 *
625 		 * Threads and exception levels are also tracked for both the
626 		 * previous and current packets. This is because the previous
627 		 * packet is used for the 'from' IP for branch samples, so the
628 		 * thread at that time must also be assigned to that sample.
629 		 * Across discontinuity packets the thread can change, so by
630 		 * tracking the thread for the previous packet the branch sample
631 		 * will have the correct info.
632 		 */
633 		tmp = tidq->packet;
634 		tidq->packet = tidq->prev_packet;
635 		tidq->prev_packet = tmp;
636 		tidq->prev_packet_el = tidq->el;
637 		thread__put(tidq->prev_packet_thread);
638 		tidq->prev_packet_thread = thread__get(tidq->thread);
639 	}
640 }
641 
642 static void cs_etm__packet_dump(const char *pkt_string)
643 {
644 	const char *color = PERF_COLOR_BLUE;
645 	int len = strlen(pkt_string);
646 
647 	if (len && (pkt_string[len-1] == '\n'))
648 		color_fprintf(stdout, color, "	%s", pkt_string);
649 	else
650 		color_fprintf(stdout, color, "	%s\n", pkt_string);
651 
652 	fflush(stdout);
653 }
654 
655 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
656 					  struct cs_etm_auxtrace *etm, int t_idx,
657 					  int m_idx, u32 etmidr)
658 {
659 	u64 **metadata = etm->metadata;
660 
661 	t_params[t_idx].protocol = cs_etm__get_v7_protocol_version(etmidr);
662 	t_params[t_idx].etmv3.reg_ctrl = metadata[m_idx][CS_ETM_ETMCR];
663 	t_params[t_idx].etmv3.reg_trc_id = metadata[m_idx][CS_ETM_ETMTRACEIDR];
664 }
665 
666 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
667 					  struct cs_etm_auxtrace *etm, int t_idx,
668 					  int m_idx)
669 {
670 	u64 **metadata = etm->metadata;
671 
672 	t_params[t_idx].protocol = CS_ETM_PROTO_ETMV4i;
673 	t_params[t_idx].etmv4.reg_idr0 = metadata[m_idx][CS_ETMV4_TRCIDR0];
674 	t_params[t_idx].etmv4.reg_idr1 = metadata[m_idx][CS_ETMV4_TRCIDR1];
675 	t_params[t_idx].etmv4.reg_idr2 = metadata[m_idx][CS_ETMV4_TRCIDR2];
676 	t_params[t_idx].etmv4.reg_idr8 = metadata[m_idx][CS_ETMV4_TRCIDR8];
677 	t_params[t_idx].etmv4.reg_configr = metadata[m_idx][CS_ETMV4_TRCCONFIGR];
678 	t_params[t_idx].etmv4.reg_traceidr = metadata[m_idx][CS_ETMV4_TRCTRACEIDR];
679 }
680 
681 static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
682 					  struct cs_etm_auxtrace *etm, int t_idx,
683 					  int m_idx)
684 {
685 	u64 **metadata = etm->metadata;
686 
687 	t_params[t_idx].protocol = CS_ETM_PROTO_ETE;
688 	t_params[t_idx].ete.reg_idr0 = metadata[m_idx][CS_ETE_TRCIDR0];
689 	t_params[t_idx].ete.reg_idr1 = metadata[m_idx][CS_ETE_TRCIDR1];
690 	t_params[t_idx].ete.reg_idr2 = metadata[m_idx][CS_ETE_TRCIDR2];
691 	t_params[t_idx].ete.reg_idr8 = metadata[m_idx][CS_ETE_TRCIDR8];
692 	t_params[t_idx].ete.reg_configr = metadata[m_idx][CS_ETE_TRCCONFIGR];
693 	t_params[t_idx].ete.reg_traceidr = metadata[m_idx][CS_ETE_TRCTRACEIDR];
694 	t_params[t_idx].ete.reg_devarch = metadata[m_idx][CS_ETE_TRCDEVARCH];
695 }
696 
697 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
698 				     struct cs_etm_auxtrace *etm,
699 				     bool formatted,
700 				     int sample_cpu,
701 				     int decoders)
702 {
703 	int t_idx, m_idx;
704 	u32 etmidr;
705 	u64 architecture;
706 
707 	for (t_idx = 0; t_idx < decoders; t_idx++) {
708 		if (formatted)
709 			m_idx = t_idx;
710 		else {
711 			m_idx = get_cpu_data_idx(etm, sample_cpu);
712 			if (m_idx == -1) {
713 				pr_warning("CS_ETM: unknown CPU, falling back to first metadata\n");
714 				m_idx = 0;
715 			}
716 		}
717 
718 		architecture = etm->metadata[m_idx][CS_ETM_MAGIC];
719 
720 		switch (architecture) {
721 		case __perf_cs_etmv3_magic:
722 			etmidr = etm->metadata[m_idx][CS_ETM_ETMIDR];
723 			cs_etm__set_trace_param_etmv3(t_params, etm, t_idx, m_idx, etmidr);
724 			break;
725 		case __perf_cs_etmv4_magic:
726 			cs_etm__set_trace_param_etmv4(t_params, etm, t_idx, m_idx);
727 			break;
728 		case __perf_cs_ete_magic:
729 			cs_etm__set_trace_param_ete(t_params, etm, t_idx, m_idx);
730 			break;
731 		default:
732 			return -EINVAL;
733 		}
734 	}
735 
736 	return 0;
737 }
738 
739 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
740 				       struct cs_etm_queue *etmq,
741 				       enum cs_etm_decoder_operation mode,
742 				       bool formatted)
743 {
744 	int ret = -EINVAL;
745 
746 	if (!(mode < CS_ETM_OPERATION_MAX))
747 		goto out;
748 
749 	d_params->packet_printer = cs_etm__packet_dump;
750 	d_params->operation = mode;
751 	d_params->data = etmq;
752 	d_params->formatted = formatted;
753 	d_params->fsyncs = false;
754 	d_params->hsyncs = false;
755 	d_params->frame_aligned = true;
756 
757 	ret = 0;
758 out:
759 	return ret;
760 }
761 
762 static void cs_etm__dump_event(struct cs_etm_queue *etmq,
763 			       struct auxtrace_buffer *buffer)
764 {
765 	int ret;
766 	const char *color = PERF_COLOR_BLUE;
767 	size_t buffer_used = 0;
768 
769 	fprintf(stdout, "\n");
770 	color_fprintf(stdout, color,
771 		     ". ... CoreSight %s Trace data: size %#zx bytes\n",
772 		     cs_etm_decoder__get_name(etmq->decoder), buffer->size);
773 
774 	do {
775 		size_t consumed;
776 
777 		ret = cs_etm_decoder__process_data_block(
778 				etmq->decoder, buffer->offset,
779 				&((u8 *)buffer->data)[buffer_used],
780 				buffer->size - buffer_used, &consumed);
781 		if (ret)
782 			break;
783 
784 		buffer_used += consumed;
785 	} while (buffer_used < buffer->size);
786 
787 	cs_etm_decoder__reset(etmq->decoder);
788 }
789 
790 static int cs_etm__flush_events(struct perf_session *session,
791 				struct perf_tool *tool)
792 {
793 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
794 						   struct cs_etm_auxtrace,
795 						   auxtrace);
796 	if (dump_trace)
797 		return 0;
798 
799 	if (!tool->ordered_events)
800 		return -EINVAL;
801 
802 	if (etm->timeless_decoding) {
803 		/*
804 		 * Pass tid = -1 to process all queues. But likely they will have
805 		 * already been processed on PERF_RECORD_EXIT anyway.
806 		 */
807 		return cs_etm__process_timeless_queues(etm, -1);
808 	}
809 
810 	return cs_etm__process_timestamped_queues(etm);
811 }
812 
813 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
814 {
815 	int idx;
816 	uintptr_t priv;
817 	struct int_node *inode, *tmp;
818 	struct cs_etm_traceid_queue *tidq;
819 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
820 
821 	intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
822 		priv = (uintptr_t)inode->priv;
823 		idx = priv;
824 
825 		/* Free this traceid_queue from the array */
826 		tidq = etmq->traceid_queues[idx];
827 		thread__zput(tidq->thread);
828 		thread__zput(tidq->prev_packet_thread);
829 		zfree(&tidq->event_buf);
830 		zfree(&tidq->last_branch);
831 		zfree(&tidq->last_branch_rb);
832 		zfree(&tidq->prev_packet);
833 		zfree(&tidq->packet);
834 		zfree(&tidq);
835 
836 		/*
837 		 * Function intlist__remove() removes the inode from the list
838 		 * and delete the memory associated to it.
839 		 */
840 		intlist__remove(traceid_queues_list, inode);
841 	}
842 
843 	/* Then the RB tree itself */
844 	intlist__delete(traceid_queues_list);
845 	etmq->traceid_queues_list = NULL;
846 
847 	/* finally free the traceid_queues array */
848 	zfree(&etmq->traceid_queues);
849 }
850 
851 static void cs_etm__free_queue(void *priv)
852 {
853 	struct cs_etm_queue *etmq = priv;
854 
855 	if (!etmq)
856 		return;
857 
858 	cs_etm_decoder__free(etmq->decoder);
859 	cs_etm__free_traceid_queues(etmq);
860 	free(etmq);
861 }
862 
863 static void cs_etm__free_events(struct perf_session *session)
864 {
865 	unsigned int i;
866 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
867 						   struct cs_etm_auxtrace,
868 						   auxtrace);
869 	struct auxtrace_queues *queues = &aux->queues;
870 
871 	for (i = 0; i < queues->nr_queues; i++) {
872 		cs_etm__free_queue(queues->queue_array[i].priv);
873 		queues->queue_array[i].priv = NULL;
874 	}
875 
876 	auxtrace_queues__free(queues);
877 }
878 
879 static void cs_etm__free(struct perf_session *session)
880 {
881 	int i;
882 	struct int_node *inode, *tmp;
883 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
884 						   struct cs_etm_auxtrace,
885 						   auxtrace);
886 	cs_etm__free_events(session);
887 	session->auxtrace = NULL;
888 
889 	/* First remove all traceID/metadata nodes for the RB tree */
890 	intlist__for_each_entry_safe(inode, tmp, traceid_list)
891 		intlist__remove(traceid_list, inode);
892 	/* Then the RB tree itself */
893 	intlist__delete(traceid_list);
894 
895 	for (i = 0; i < aux->num_cpu; i++)
896 		zfree(&aux->metadata[i]);
897 
898 	zfree(&aux->metadata);
899 	zfree(&aux);
900 }
901 
902 static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
903 				      struct evsel *evsel)
904 {
905 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
906 						   struct cs_etm_auxtrace,
907 						   auxtrace);
908 
909 	return evsel->core.attr.type == aux->pmu_type;
910 }
911 
912 static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq,
913 					   ocsd_ex_level el)
914 {
915 	enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq);
916 
917 	/*
918 	 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels
919 	 * running at EL1 assume everything is the host.
920 	 */
921 	if (pid_fmt == CS_ETM_PIDFMT_CTXTID)
922 		return &etmq->etm->session->machines.host;
923 
924 	/*
925 	 * Not perfect, but otherwise assume anything in EL1 is the default
926 	 * guest, and everything else is the host. Distinguishing between guest
927 	 * and host userspaces isn't currently supported either. Neither is
928 	 * multiple guest support. All this does is reduce the likeliness of
929 	 * decode errors where we look into the host kernel maps when it should
930 	 * have been the guest maps.
931 	 */
932 	switch (el) {
933 	case ocsd_EL1:
934 		return machines__find_guest(&etmq->etm->session->machines,
935 					    DEFAULT_GUEST_KERNEL_ID);
936 	case ocsd_EL3:
937 	case ocsd_EL2:
938 	case ocsd_EL0:
939 	case ocsd_EL_unknown:
940 	default:
941 		return &etmq->etm->session->machines.host;
942 	}
943 }
944 
945 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address,
946 			   ocsd_ex_level el)
947 {
948 	struct machine *machine = cs_etm__get_machine(etmq, el);
949 
950 	if (address >= machine__kernel_start(machine)) {
951 		if (machine__is_host(machine))
952 			return PERF_RECORD_MISC_KERNEL;
953 		else
954 			return PERF_RECORD_MISC_GUEST_KERNEL;
955 	} else {
956 		if (machine__is_host(machine))
957 			return PERF_RECORD_MISC_USER;
958 		else {
959 			/*
960 			 * Can't really happen at the moment because
961 			 * cs_etm__get_machine() will always return
962 			 * machines.host for any non EL1 trace.
963 			 */
964 			return PERF_RECORD_MISC_GUEST_USER;
965 		}
966 	}
967 }
968 
969 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
970 			      u64 address, size_t size, u8 *buffer,
971 			      const ocsd_mem_space_acc_t mem_space)
972 {
973 	u8  cpumode;
974 	u64 offset;
975 	int len;
976 	struct addr_location al;
977 	struct dso *dso;
978 	struct cs_etm_traceid_queue *tidq;
979 	int ret = 0;
980 
981 	if (!etmq)
982 		return 0;
983 
984 	addr_location__init(&al);
985 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
986 	if (!tidq)
987 		goto out;
988 
989 	/*
990 	 * We've already tracked EL along side the PID in cs_etm__set_thread()
991 	 * so double check that it matches what OpenCSD thinks as well. It
992 	 * doesn't distinguish between EL0 and EL1 for this mem access callback
993 	 * so we had to do the extra tracking. Skip validation if it's any of
994 	 * the 'any' values.
995 	 */
996 	if (!(mem_space == OCSD_MEM_SPACE_ANY ||
997 	      mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) {
998 		if (mem_space & OCSD_MEM_SPACE_EL1N) {
999 			/* Includes both non secure EL1 and EL0 */
1000 			assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0);
1001 		} else if (mem_space & OCSD_MEM_SPACE_EL2)
1002 			assert(tidq->el == ocsd_EL2);
1003 		else if (mem_space & OCSD_MEM_SPACE_EL3)
1004 			assert(tidq->el == ocsd_EL3);
1005 	}
1006 
1007 	cpumode = cs_etm__cpu_mode(etmq, address, tidq->el);
1008 
1009 	if (!thread__find_map(tidq->thread, cpumode, address, &al))
1010 		goto out;
1011 
1012 	dso = map__dso(al.map);
1013 	if (!dso)
1014 		goto out;
1015 
1016 	if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR &&
1017 	    dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE))
1018 		goto out;
1019 
1020 	offset = map__map_ip(al.map, address);
1021 
1022 	map__load(al.map);
1023 
1024 	len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)),
1025 				    offset, buffer, size);
1026 
1027 	if (len <= 0) {
1028 		ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
1029 				 "              Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
1030 		if (!dso__auxtrace_warned(dso)) {
1031 			pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
1032 				address,
1033 				dso__long_name(dso) ? dso__long_name(dso) : "Unknown");
1034 			dso__set_auxtrace_warned(dso);
1035 		}
1036 		goto out;
1037 	}
1038 	ret = len;
1039 out:
1040 	addr_location__exit(&al);
1041 	return ret;
1042 }
1043 
1044 static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
1045 						bool formatted, int sample_cpu)
1046 {
1047 	struct cs_etm_decoder_params d_params;
1048 	struct cs_etm_trace_params  *t_params = NULL;
1049 	struct cs_etm_queue *etmq;
1050 	/*
1051 	 * Each queue can only contain data from one CPU when unformatted, so only one decoder is
1052 	 * needed.
1053 	 */
1054 	int decoders = formatted ? etm->num_cpu : 1;
1055 
1056 	etmq = zalloc(sizeof(*etmq));
1057 	if (!etmq)
1058 		return NULL;
1059 
1060 	etmq->traceid_queues_list = intlist__new(NULL);
1061 	if (!etmq->traceid_queues_list)
1062 		goto out_free;
1063 
1064 	/* Use metadata to fill in trace parameters for trace decoder */
1065 	t_params = zalloc(sizeof(*t_params) * decoders);
1066 
1067 	if (!t_params)
1068 		goto out_free;
1069 
1070 	if (cs_etm__init_trace_params(t_params, etm, formatted, sample_cpu, decoders))
1071 		goto out_free;
1072 
1073 	/* Set decoder parameters to decode trace packets */
1074 	if (cs_etm__init_decoder_params(&d_params, etmq,
1075 					dump_trace ? CS_ETM_OPERATION_PRINT :
1076 						     CS_ETM_OPERATION_DECODE,
1077 					formatted))
1078 		goto out_free;
1079 
1080 	etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
1081 					    t_params);
1082 
1083 	if (!etmq->decoder)
1084 		goto out_free;
1085 
1086 	/*
1087 	 * Register a function to handle all memory accesses required by
1088 	 * the trace decoder library.
1089 	 */
1090 	if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
1091 					      0x0L, ((u64) -1L),
1092 					      cs_etm__mem_access))
1093 		goto out_free_decoder;
1094 
1095 	zfree(&t_params);
1096 	return etmq;
1097 
1098 out_free_decoder:
1099 	cs_etm_decoder__free(etmq->decoder);
1100 out_free:
1101 	intlist__delete(etmq->traceid_queues_list);
1102 	free(etmq);
1103 
1104 	return NULL;
1105 }
1106 
1107 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
1108 			       struct auxtrace_queue *queue,
1109 			       unsigned int queue_nr,
1110 			       bool formatted,
1111 			       int sample_cpu)
1112 {
1113 	struct cs_etm_queue *etmq = queue->priv;
1114 
1115 	if (list_empty(&queue->head) || etmq)
1116 		return 0;
1117 
1118 	etmq = cs_etm__alloc_queue(etm, formatted, sample_cpu);
1119 
1120 	if (!etmq)
1121 		return -ENOMEM;
1122 
1123 	queue->priv = etmq;
1124 	etmq->etm = etm;
1125 	etmq->queue_nr = queue_nr;
1126 	etmq->offset = 0;
1127 
1128 	return 0;
1129 }
1130 
1131 static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
1132 					    struct cs_etm_queue *etmq,
1133 					    unsigned int queue_nr)
1134 {
1135 	int ret = 0;
1136 	unsigned int cs_queue_nr;
1137 	u8 trace_chan_id;
1138 	u64 cs_timestamp;
1139 
1140 	/*
1141 	 * We are under a CPU-wide trace scenario.  As such we need to know
1142 	 * when the code that generated the traces started to execute so that
1143 	 * it can be correlated with execution on other CPUs.  So we get a
1144 	 * handle on the beginning of traces and decode until we find a
1145 	 * timestamp.  The timestamp is then added to the auxtrace min heap
1146 	 * in order to know what nibble (of all the etmqs) to decode first.
1147 	 */
1148 	while (1) {
1149 		/*
1150 		 * Fetch an aux_buffer from this etmq.  Bail if no more
1151 		 * blocks or an error has been encountered.
1152 		 */
1153 		ret = cs_etm__get_data_block(etmq);
1154 		if (ret <= 0)
1155 			goto out;
1156 
1157 		/*
1158 		 * Run decoder on the trace block.  The decoder will stop when
1159 		 * encountering a CS timestamp, a full packet queue or the end of
1160 		 * trace for that block.
1161 		 */
1162 		ret = cs_etm__decode_data_block(etmq);
1163 		if (ret)
1164 			goto out;
1165 
1166 		/*
1167 		 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
1168 		 * the timestamp calculation for us.
1169 		 */
1170 		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
1171 
1172 		/* We found a timestamp, no need to continue. */
1173 		if (cs_timestamp)
1174 			break;
1175 
1176 		/*
1177 		 * We didn't find a timestamp so empty all the traceid packet
1178 		 * queues before looking for another timestamp packet, either
1179 		 * in the current data block or a new one.  Packets that were
1180 		 * just decoded are useless since no timestamp has been
1181 		 * associated with them.  As such simply discard them.
1182 		 */
1183 		cs_etm__clear_all_packet_queues(etmq);
1184 	}
1185 
1186 	/*
1187 	 * We have a timestamp.  Add it to the min heap to reflect when
1188 	 * instructions conveyed by the range packets of this traceID queue
1189 	 * started to execute.  Once the same has been done for all the traceID
1190 	 * queues of each etmq, redenring and decoding can start in
1191 	 * chronological order.
1192 	 *
1193 	 * Note that packets decoded above are still in the traceID's packet
1194 	 * queue and will be processed in cs_etm__process_timestamped_queues().
1195 	 */
1196 	cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
1197 	ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
1198 out:
1199 	return ret;
1200 }
1201 
1202 static inline
1203 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
1204 				 struct cs_etm_traceid_queue *tidq)
1205 {
1206 	struct branch_stack *bs_src = tidq->last_branch_rb;
1207 	struct branch_stack *bs_dst = tidq->last_branch;
1208 	size_t nr = 0;
1209 
1210 	/*
1211 	 * Set the number of records before early exit: ->nr is used to
1212 	 * determine how many branches to copy from ->entries.
1213 	 */
1214 	bs_dst->nr = bs_src->nr;
1215 
1216 	/*
1217 	 * Early exit when there is nothing to copy.
1218 	 */
1219 	if (!bs_src->nr)
1220 		return;
1221 
1222 	/*
1223 	 * As bs_src->entries is a circular buffer, we need to copy from it in
1224 	 * two steps.  First, copy the branches from the most recently inserted
1225 	 * branch ->last_branch_pos until the end of bs_src->entries buffer.
1226 	 */
1227 	nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
1228 	memcpy(&bs_dst->entries[0],
1229 	       &bs_src->entries[tidq->last_branch_pos],
1230 	       sizeof(struct branch_entry) * nr);
1231 
1232 	/*
1233 	 * If we wrapped around at least once, the branches from the beginning
1234 	 * of the bs_src->entries buffer and until the ->last_branch_pos element
1235 	 * are older valid branches: copy them over.  The total number of
1236 	 * branches copied over will be equal to the number of branches asked by
1237 	 * the user in last_branch_sz.
1238 	 */
1239 	if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
1240 		memcpy(&bs_dst->entries[nr],
1241 		       &bs_src->entries[0],
1242 		       sizeof(struct branch_entry) * tidq->last_branch_pos);
1243 	}
1244 }
1245 
1246 static inline
1247 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
1248 {
1249 	tidq->last_branch_pos = 0;
1250 	tidq->last_branch_rb->nr = 0;
1251 }
1252 
1253 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
1254 					 u8 trace_chan_id, u64 addr)
1255 {
1256 	u8 instrBytes[2];
1257 
1258 	cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes),
1259 			   instrBytes, 0);
1260 	/*
1261 	 * T32 instruction size is indicated by bits[15:11] of the first
1262 	 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
1263 	 * denote a 32-bit instruction.
1264 	 */
1265 	return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
1266 }
1267 
1268 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
1269 {
1270 	/*
1271 	 * Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't
1272 	 * appear in samples.
1273 	 */
1274 	if (packet->sample_type == CS_ETM_DISCONTINUITY ||
1275 	    packet->sample_type == CS_ETM_EXCEPTION)
1276 		return 0;
1277 
1278 	return packet->start_addr;
1279 }
1280 
1281 static inline
1282 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
1283 {
1284 	/* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1285 	if (packet->sample_type == CS_ETM_DISCONTINUITY)
1286 		return 0;
1287 
1288 	return packet->end_addr - packet->last_instr_size;
1289 }
1290 
1291 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
1292 				     u64 trace_chan_id,
1293 				     const struct cs_etm_packet *packet,
1294 				     u64 offset)
1295 {
1296 	if (packet->isa == CS_ETM_ISA_T32) {
1297 		u64 addr = packet->start_addr;
1298 
1299 		while (offset) {
1300 			addr += cs_etm__t32_instr_size(etmq,
1301 						       trace_chan_id, addr);
1302 			offset--;
1303 		}
1304 		return addr;
1305 	}
1306 
1307 	/* Assume a 4 byte instruction size (A32/A64) */
1308 	return packet->start_addr + offset * 4;
1309 }
1310 
1311 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
1312 					  struct cs_etm_traceid_queue *tidq)
1313 {
1314 	struct branch_stack *bs = tidq->last_branch_rb;
1315 	struct branch_entry *be;
1316 
1317 	/*
1318 	 * The branches are recorded in a circular buffer in reverse
1319 	 * chronological order: we start recording from the last element of the
1320 	 * buffer down.  After writing the first element of the stack, move the
1321 	 * insert position back to the end of the buffer.
1322 	 */
1323 	if (!tidq->last_branch_pos)
1324 		tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
1325 
1326 	tidq->last_branch_pos -= 1;
1327 
1328 	be       = &bs->entries[tidq->last_branch_pos];
1329 	be->from = cs_etm__last_executed_instr(tidq->prev_packet);
1330 	be->to	 = cs_etm__first_executed_instr(tidq->packet);
1331 	/* No support for mispredict */
1332 	be->flags.mispred = 0;
1333 	be->flags.predicted = 1;
1334 
1335 	/*
1336 	 * Increment bs->nr until reaching the number of last branches asked by
1337 	 * the user on the command line.
1338 	 */
1339 	if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
1340 		bs->nr += 1;
1341 }
1342 
1343 static int cs_etm__inject_event(union perf_event *event,
1344 			       struct perf_sample *sample, u64 type)
1345 {
1346 	event->header.size = perf_event__sample_event_size(sample, type, 0);
1347 	return perf_event__synthesize_sample(event, type, 0, sample);
1348 }
1349 
1350 
1351 static int
1352 cs_etm__get_trace(struct cs_etm_queue *etmq)
1353 {
1354 	struct auxtrace_buffer *aux_buffer = etmq->buffer;
1355 	struct auxtrace_buffer *old_buffer = aux_buffer;
1356 	struct auxtrace_queue *queue;
1357 
1358 	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
1359 
1360 	aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
1361 
1362 	/* If no more data, drop the previous auxtrace_buffer and return */
1363 	if (!aux_buffer) {
1364 		if (old_buffer)
1365 			auxtrace_buffer__drop_data(old_buffer);
1366 		etmq->buf_len = 0;
1367 		return 0;
1368 	}
1369 
1370 	etmq->buffer = aux_buffer;
1371 
1372 	/* If the aux_buffer doesn't have data associated, try to load it */
1373 	if (!aux_buffer->data) {
1374 		/* get the file desc associated with the perf data file */
1375 		int fd = perf_data__fd(etmq->etm->session->data);
1376 
1377 		aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
1378 		if (!aux_buffer->data)
1379 			return -ENOMEM;
1380 	}
1381 
1382 	/* If valid, drop the previous buffer */
1383 	if (old_buffer)
1384 		auxtrace_buffer__drop_data(old_buffer);
1385 
1386 	etmq->buf_used = 0;
1387 	etmq->buf_len = aux_buffer->size;
1388 	etmq->buf = aux_buffer->data;
1389 
1390 	return etmq->buf_len;
1391 }
1392 
1393 static void cs_etm__set_thread(struct cs_etm_queue *etmq,
1394 			       struct cs_etm_traceid_queue *tidq, pid_t tid,
1395 			       ocsd_ex_level el)
1396 {
1397 	struct machine *machine = cs_etm__get_machine(etmq, el);
1398 
1399 	if (tid != -1) {
1400 		thread__zput(tidq->thread);
1401 		tidq->thread = machine__find_thread(machine, -1, tid);
1402 	}
1403 
1404 	/* Couldn't find a known thread */
1405 	if (!tidq->thread)
1406 		tidq->thread = machine__idle_thread(machine);
1407 
1408 	tidq->el = el;
1409 }
1410 
1411 int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
1412 			    u8 trace_chan_id, ocsd_ex_level el)
1413 {
1414 	struct cs_etm_traceid_queue *tidq;
1415 
1416 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1417 	if (!tidq)
1418 		return -EINVAL;
1419 
1420 	cs_etm__set_thread(etmq, tidq, tid, el);
1421 	return 0;
1422 }
1423 
1424 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
1425 {
1426 	return !!etmq->etm->timeless_decoding;
1427 }
1428 
1429 static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
1430 			      u64 trace_chan_id,
1431 			      const struct cs_etm_packet *packet,
1432 			      struct perf_sample *sample)
1433 {
1434 	/*
1435 	 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY
1436 	 * packet, so directly bail out with 'insn_len' = 0.
1437 	 */
1438 	if (packet->sample_type == CS_ETM_DISCONTINUITY) {
1439 		sample->insn_len = 0;
1440 		return;
1441 	}
1442 
1443 	/*
1444 	 * T32 instruction size might be 32-bit or 16-bit, decide by calling
1445 	 * cs_etm__t32_instr_size().
1446 	 */
1447 	if (packet->isa == CS_ETM_ISA_T32)
1448 		sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id,
1449 							  sample->ip);
1450 	/* Otherwise, A64 and A32 instruction size are always 32-bit. */
1451 	else
1452 		sample->insn_len = 4;
1453 
1454 	cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len,
1455 			   (void *)sample->insn, 0);
1456 }
1457 
1458 u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
1459 {
1460 	struct cs_etm_auxtrace *etm = etmq->etm;
1461 
1462 	if (etm->has_virtual_ts)
1463 		return tsc_to_perf_time(cs_timestamp, &etm->tc);
1464 	else
1465 		return cs_timestamp;
1466 }
1467 
1468 static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
1469 					       struct cs_etm_traceid_queue *tidq)
1470 {
1471 	struct cs_etm_auxtrace *etm = etmq->etm;
1472 	struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
1473 
1474 	if (!etm->timeless_decoding && etm->has_virtual_ts)
1475 		return packet_queue->cs_timestamp;
1476 	else
1477 		return etm->latest_kernel_timestamp;
1478 }
1479 
1480 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
1481 					    struct cs_etm_traceid_queue *tidq,
1482 					    u64 addr, u64 period)
1483 {
1484 	int ret = 0;
1485 	struct cs_etm_auxtrace *etm = etmq->etm;
1486 	union perf_event *event = tidq->event_buf;
1487 	struct perf_sample sample = {.ip = 0,};
1488 
1489 	event->sample.header.type = PERF_RECORD_SAMPLE;
1490 	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
1491 	event->sample.header.size = sizeof(struct perf_event_header);
1492 
1493 	/* Set time field based on etm auxtrace config. */
1494 	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1495 
1496 	sample.ip = addr;
1497 	sample.pid = thread__pid(tidq->thread);
1498 	sample.tid = thread__tid(tidq->thread);
1499 	sample.id = etmq->etm->instructions_id;
1500 	sample.stream_id = etmq->etm->instructions_id;
1501 	sample.period = period;
1502 	sample.cpu = tidq->packet->cpu;
1503 	sample.flags = tidq->prev_packet->flags;
1504 	sample.cpumode = event->sample.header.misc;
1505 
1506 	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
1507 
1508 	if (etm->synth_opts.last_branch)
1509 		sample.branch_stack = tidq->last_branch;
1510 
1511 	if (etm->synth_opts.inject) {
1512 		ret = cs_etm__inject_event(event, &sample,
1513 					   etm->instructions_sample_type);
1514 		if (ret)
1515 			return ret;
1516 	}
1517 
1518 	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1519 
1520 	if (ret)
1521 		pr_err(
1522 			"CS ETM Trace: failed to deliver instruction event, error %d\n",
1523 			ret);
1524 
1525 	return ret;
1526 }
1527 
1528 /*
1529  * The cs etm packet encodes an instruction range between a branch target
1530  * and the next taken branch. Generate sample accordingly.
1531  */
1532 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
1533 				       struct cs_etm_traceid_queue *tidq)
1534 {
1535 	int ret = 0;
1536 	struct cs_etm_auxtrace *etm = etmq->etm;
1537 	struct perf_sample sample = {.ip = 0,};
1538 	union perf_event *event = tidq->event_buf;
1539 	struct dummy_branch_stack {
1540 		u64			nr;
1541 		u64			hw_idx;
1542 		struct branch_entry	entries;
1543 	} dummy_bs;
1544 	u64 ip;
1545 
1546 	ip = cs_etm__last_executed_instr(tidq->prev_packet);
1547 
1548 	event->sample.header.type = PERF_RECORD_SAMPLE;
1549 	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip,
1550 						     tidq->prev_packet_el);
1551 	event->sample.header.size = sizeof(struct perf_event_header);
1552 
1553 	/* Set time field based on etm auxtrace config. */
1554 	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1555 
1556 	sample.ip = ip;
1557 	sample.pid = thread__pid(tidq->prev_packet_thread);
1558 	sample.tid = thread__tid(tidq->prev_packet_thread);
1559 	sample.addr = cs_etm__first_executed_instr(tidq->packet);
1560 	sample.id = etmq->etm->branches_id;
1561 	sample.stream_id = etmq->etm->branches_id;
1562 	sample.period = 1;
1563 	sample.cpu = tidq->packet->cpu;
1564 	sample.flags = tidq->prev_packet->flags;
1565 	sample.cpumode = event->sample.header.misc;
1566 
1567 	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet,
1568 			  &sample);
1569 
1570 	/*
1571 	 * perf report cannot handle events without a branch stack
1572 	 */
1573 	if (etm->synth_opts.last_branch) {
1574 		dummy_bs = (struct dummy_branch_stack){
1575 			.nr = 1,
1576 			.hw_idx = -1ULL,
1577 			.entries = {
1578 				.from = sample.ip,
1579 				.to = sample.addr,
1580 			},
1581 		};
1582 		sample.branch_stack = (struct branch_stack *)&dummy_bs;
1583 	}
1584 
1585 	if (etm->synth_opts.inject) {
1586 		ret = cs_etm__inject_event(event, &sample,
1587 					   etm->branches_sample_type);
1588 		if (ret)
1589 			return ret;
1590 	}
1591 
1592 	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1593 
1594 	if (ret)
1595 		pr_err(
1596 		"CS ETM Trace: failed to deliver instruction event, error %d\n",
1597 		ret);
1598 
1599 	return ret;
1600 }
1601 
1602 struct cs_etm_synth {
1603 	struct perf_tool dummy_tool;
1604 	struct perf_session *session;
1605 };
1606 
1607 static int cs_etm__event_synth(struct perf_tool *tool,
1608 			       union perf_event *event,
1609 			       struct perf_sample *sample __maybe_unused,
1610 			       struct machine *machine __maybe_unused)
1611 {
1612 	struct cs_etm_synth *cs_etm_synth =
1613 		      container_of(tool, struct cs_etm_synth, dummy_tool);
1614 
1615 	return perf_session__deliver_synth_event(cs_etm_synth->session,
1616 						 event, NULL);
1617 }
1618 
1619 static int cs_etm__synth_event(struct perf_session *session,
1620 			       struct perf_event_attr *attr, u64 id)
1621 {
1622 	struct cs_etm_synth cs_etm_synth;
1623 
1624 	memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth));
1625 	cs_etm_synth.session = session;
1626 
1627 	return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1,
1628 					   &id, cs_etm__event_synth);
1629 }
1630 
1631 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
1632 				struct perf_session *session)
1633 {
1634 	struct evlist *evlist = session->evlist;
1635 	struct evsel *evsel;
1636 	struct perf_event_attr attr;
1637 	bool found = false;
1638 	u64 id;
1639 	int err;
1640 
1641 	evlist__for_each_entry(evlist, evsel) {
1642 		if (evsel->core.attr.type == etm->pmu_type) {
1643 			found = true;
1644 			break;
1645 		}
1646 	}
1647 
1648 	if (!found) {
1649 		pr_debug("No selected events with CoreSight Trace data\n");
1650 		return 0;
1651 	}
1652 
1653 	memset(&attr, 0, sizeof(struct perf_event_attr));
1654 	attr.size = sizeof(struct perf_event_attr);
1655 	attr.type = PERF_TYPE_HARDWARE;
1656 	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
1657 	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1658 			    PERF_SAMPLE_PERIOD;
1659 	if (etm->timeless_decoding)
1660 		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1661 	else
1662 		attr.sample_type |= PERF_SAMPLE_TIME;
1663 
1664 	attr.exclude_user = evsel->core.attr.exclude_user;
1665 	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1666 	attr.exclude_hv = evsel->core.attr.exclude_hv;
1667 	attr.exclude_host = evsel->core.attr.exclude_host;
1668 	attr.exclude_guest = evsel->core.attr.exclude_guest;
1669 	attr.sample_id_all = evsel->core.attr.sample_id_all;
1670 	attr.read_format = evsel->core.attr.read_format;
1671 
1672 	/* create new id val to be a fixed offset from evsel id */
1673 	id = evsel->core.id[0] + 1000000000;
1674 
1675 	if (!id)
1676 		id = 1;
1677 
1678 	if (etm->synth_opts.branches) {
1679 		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1680 		attr.sample_period = 1;
1681 		attr.sample_type |= PERF_SAMPLE_ADDR;
1682 		err = cs_etm__synth_event(session, &attr, id);
1683 		if (err)
1684 			return err;
1685 		etm->branches_sample_type = attr.sample_type;
1686 		etm->branches_id = id;
1687 		id += 1;
1688 		attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
1689 	}
1690 
1691 	if (etm->synth_opts.last_branch) {
1692 		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1693 		/*
1694 		 * We don't use the hardware index, but the sample generation
1695 		 * code uses the new format branch_stack with this field,
1696 		 * so the event attributes must indicate that it's present.
1697 		 */
1698 		attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
1699 	}
1700 
1701 	if (etm->synth_opts.instructions) {
1702 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1703 		attr.sample_period = etm->synth_opts.period;
1704 		etm->instructions_sample_period = attr.sample_period;
1705 		err = cs_etm__synth_event(session, &attr, id);
1706 		if (err)
1707 			return err;
1708 		etm->instructions_sample_type = attr.sample_type;
1709 		etm->instructions_id = id;
1710 		id += 1;
1711 	}
1712 
1713 	return 0;
1714 }
1715 
1716 static int cs_etm__sample(struct cs_etm_queue *etmq,
1717 			  struct cs_etm_traceid_queue *tidq)
1718 {
1719 	struct cs_etm_auxtrace *etm = etmq->etm;
1720 	int ret;
1721 	u8 trace_chan_id = tidq->trace_chan_id;
1722 	u64 instrs_prev;
1723 
1724 	/* Get instructions remainder from previous packet */
1725 	instrs_prev = tidq->period_instructions;
1726 
1727 	tidq->period_instructions += tidq->packet->instr_count;
1728 
1729 	/*
1730 	 * Record a branch when the last instruction in
1731 	 * PREV_PACKET is a branch.
1732 	 */
1733 	if (etm->synth_opts.last_branch &&
1734 	    tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1735 	    tidq->prev_packet->last_instr_taken_branch)
1736 		cs_etm__update_last_branch_rb(etmq, tidq);
1737 
1738 	if (etm->synth_opts.instructions &&
1739 	    tidq->period_instructions >= etm->instructions_sample_period) {
1740 		/*
1741 		 * Emit instruction sample periodically
1742 		 * TODO: allow period to be defined in cycles and clock time
1743 		 */
1744 
1745 		/*
1746 		 * Below diagram demonstrates the instruction samples
1747 		 * generation flows:
1748 		 *
1749 		 *    Instrs     Instrs       Instrs       Instrs
1750 		 *   Sample(n)  Sample(n+1)  Sample(n+2)  Sample(n+3)
1751 		 *    |            |            |            |
1752 		 *    V            V            V            V
1753 		 *   --------------------------------------------------
1754 		 *            ^                                  ^
1755 		 *            |                                  |
1756 		 *         Period                             Period
1757 		 *    instructions(Pi)                   instructions(Pi')
1758 		 *
1759 		 *            |                                  |
1760 		 *            \---------------- -----------------/
1761 		 *                             V
1762 		 *                 tidq->packet->instr_count
1763 		 *
1764 		 * Instrs Sample(n...) are the synthesised samples occurring
1765 		 * every etm->instructions_sample_period instructions - as
1766 		 * defined on the perf command line.  Sample(n) is being the
1767 		 * last sample before the current etm packet, n+1 to n+3
1768 		 * samples are generated from the current etm packet.
1769 		 *
1770 		 * tidq->packet->instr_count represents the number of
1771 		 * instructions in the current etm packet.
1772 		 *
1773 		 * Period instructions (Pi) contains the number of
1774 		 * instructions executed after the sample point(n) from the
1775 		 * previous etm packet.  This will always be less than
1776 		 * etm->instructions_sample_period.
1777 		 *
1778 		 * When generate new samples, it combines with two parts
1779 		 * instructions, one is the tail of the old packet and another
1780 		 * is the head of the new coming packet, to generate
1781 		 * sample(n+1); sample(n+2) and sample(n+3) consume the
1782 		 * instructions with sample period.  After sample(n+3), the rest
1783 		 * instructions will be used by later packet and it is assigned
1784 		 * to tidq->period_instructions for next round calculation.
1785 		 */
1786 
1787 		/*
1788 		 * Get the initial offset into the current packet instructions;
1789 		 * entry conditions ensure that instrs_prev is less than
1790 		 * etm->instructions_sample_period.
1791 		 */
1792 		u64 offset = etm->instructions_sample_period - instrs_prev;
1793 		u64 addr;
1794 
1795 		/* Prepare last branches for instruction sample */
1796 		if (etm->synth_opts.last_branch)
1797 			cs_etm__copy_last_branch_rb(etmq, tidq);
1798 
1799 		while (tidq->period_instructions >=
1800 				etm->instructions_sample_period) {
1801 			/*
1802 			 * Calculate the address of the sampled instruction (-1
1803 			 * as sample is reported as though instruction has just
1804 			 * been executed, but PC has not advanced to next
1805 			 * instruction)
1806 			 */
1807 			addr = cs_etm__instr_addr(etmq, trace_chan_id,
1808 						  tidq->packet, offset - 1);
1809 			ret = cs_etm__synth_instruction_sample(
1810 				etmq, tidq, addr,
1811 				etm->instructions_sample_period);
1812 			if (ret)
1813 				return ret;
1814 
1815 			offset += etm->instructions_sample_period;
1816 			tidq->period_instructions -=
1817 				etm->instructions_sample_period;
1818 		}
1819 	}
1820 
1821 	if (etm->synth_opts.branches) {
1822 		bool generate_sample = false;
1823 
1824 		/* Generate sample for tracing on packet */
1825 		if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1826 			generate_sample = true;
1827 
1828 		/* Generate sample for branch taken packet */
1829 		if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1830 		    tidq->prev_packet->last_instr_taken_branch)
1831 			generate_sample = true;
1832 
1833 		if (generate_sample) {
1834 			ret = cs_etm__synth_branch_sample(etmq, tidq);
1835 			if (ret)
1836 				return ret;
1837 		}
1838 	}
1839 
1840 	cs_etm__packet_swap(etm, tidq);
1841 
1842 	return 0;
1843 }
1844 
1845 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
1846 {
1847 	/*
1848 	 * When the exception packet is inserted, whether the last instruction
1849 	 * in previous range packet is taken branch or not, we need to force
1850 	 * to set 'prev_packet->last_instr_taken_branch' to true.  This ensures
1851 	 * to generate branch sample for the instruction range before the
1852 	 * exception is trapped to kernel or before the exception returning.
1853 	 *
1854 	 * The exception packet includes the dummy address values, so don't
1855 	 * swap PACKET with PREV_PACKET.  This keeps PREV_PACKET to be useful
1856 	 * for generating instruction and branch samples.
1857 	 */
1858 	if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
1859 		tidq->prev_packet->last_instr_taken_branch = true;
1860 
1861 	return 0;
1862 }
1863 
1864 static int cs_etm__flush(struct cs_etm_queue *etmq,
1865 			 struct cs_etm_traceid_queue *tidq)
1866 {
1867 	int err = 0;
1868 	struct cs_etm_auxtrace *etm = etmq->etm;
1869 
1870 	/* Handle start tracing packet */
1871 	if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
1872 		goto swap_packet;
1873 
1874 	if (etmq->etm->synth_opts.last_branch &&
1875 	    etmq->etm->synth_opts.instructions &&
1876 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1877 		u64 addr;
1878 
1879 		/* Prepare last branches for instruction sample */
1880 		cs_etm__copy_last_branch_rb(etmq, tidq);
1881 
1882 		/*
1883 		 * Generate a last branch event for the branches left in the
1884 		 * circular buffer at the end of the trace.
1885 		 *
1886 		 * Use the address of the end of the last reported execution
1887 		 * range
1888 		 */
1889 		addr = cs_etm__last_executed_instr(tidq->prev_packet);
1890 
1891 		err = cs_etm__synth_instruction_sample(
1892 			etmq, tidq, addr,
1893 			tidq->period_instructions);
1894 		if (err)
1895 			return err;
1896 
1897 		tidq->period_instructions = 0;
1898 
1899 	}
1900 
1901 	if (etm->synth_opts.branches &&
1902 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1903 		err = cs_etm__synth_branch_sample(etmq, tidq);
1904 		if (err)
1905 			return err;
1906 	}
1907 
1908 swap_packet:
1909 	cs_etm__packet_swap(etm, tidq);
1910 
1911 	/* Reset last branches after flush the trace */
1912 	if (etm->synth_opts.last_branch)
1913 		cs_etm__reset_last_branch_rb(tidq);
1914 
1915 	return err;
1916 }
1917 
1918 static int cs_etm__end_block(struct cs_etm_queue *etmq,
1919 			     struct cs_etm_traceid_queue *tidq)
1920 {
1921 	int err;
1922 
1923 	/*
1924 	 * It has no new packet coming and 'etmq->packet' contains the stale
1925 	 * packet which was set at the previous time with packets swapping;
1926 	 * so skip to generate branch sample to avoid stale packet.
1927 	 *
1928 	 * For this case only flush branch stack and generate a last branch
1929 	 * event for the branches left in the circular buffer at the end of
1930 	 * the trace.
1931 	 */
1932 	if (etmq->etm->synth_opts.last_branch &&
1933 	    etmq->etm->synth_opts.instructions &&
1934 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1935 		u64 addr;
1936 
1937 		/* Prepare last branches for instruction sample */
1938 		cs_etm__copy_last_branch_rb(etmq, tidq);
1939 
1940 		/*
1941 		 * Use the address of the end of the last reported execution
1942 		 * range.
1943 		 */
1944 		addr = cs_etm__last_executed_instr(tidq->prev_packet);
1945 
1946 		err = cs_etm__synth_instruction_sample(
1947 			etmq, tidq, addr,
1948 			tidq->period_instructions);
1949 		if (err)
1950 			return err;
1951 
1952 		tidq->period_instructions = 0;
1953 	}
1954 
1955 	return 0;
1956 }
1957 /*
1958  * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
1959  *			   if need be.
1960  * Returns:	< 0	if error
1961  *		= 0	if no more auxtrace_buffer to read
1962  *		> 0	if the current buffer isn't empty yet
1963  */
1964 static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
1965 {
1966 	int ret;
1967 
1968 	if (!etmq->buf_len) {
1969 		ret = cs_etm__get_trace(etmq);
1970 		if (ret <= 0)
1971 			return ret;
1972 		/*
1973 		 * We cannot assume consecutive blocks in the data file
1974 		 * are contiguous, reset the decoder to force re-sync.
1975 		 */
1976 		ret = cs_etm_decoder__reset(etmq->decoder);
1977 		if (ret)
1978 			return ret;
1979 	}
1980 
1981 	return etmq->buf_len;
1982 }
1983 
1984 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
1985 				 struct cs_etm_packet *packet,
1986 				 u64 end_addr)
1987 {
1988 	/* Initialise to keep compiler happy */
1989 	u16 instr16 = 0;
1990 	u32 instr32 = 0;
1991 	u64 addr;
1992 
1993 	switch (packet->isa) {
1994 	case CS_ETM_ISA_T32:
1995 		/*
1996 		 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
1997 		 *
1998 		 *  b'15         b'8
1999 		 * +-----------------+--------+
2000 		 * | 1 1 0 1 1 1 1 1 |  imm8  |
2001 		 * +-----------------+--------+
2002 		 *
2003 		 * According to the specification, it only defines SVC for T32
2004 		 * with 16 bits instruction and has no definition for 32bits;
2005 		 * so below only read 2 bytes as instruction size for T32.
2006 		 */
2007 		addr = end_addr - 2;
2008 		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16),
2009 				   (u8 *)&instr16, 0);
2010 		if ((instr16 & 0xFF00) == 0xDF00)
2011 			return true;
2012 
2013 		break;
2014 	case CS_ETM_ISA_A32:
2015 		/*
2016 		 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
2017 		 *
2018 		 *  b'31 b'28 b'27 b'24
2019 		 * +---------+---------+-------------------------+
2020 		 * |  !1111  | 1 1 1 1 |        imm24            |
2021 		 * +---------+---------+-------------------------+
2022 		 */
2023 		addr = end_addr - 4;
2024 		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2025 				   (u8 *)&instr32, 0);
2026 		if ((instr32 & 0x0F000000) == 0x0F000000 &&
2027 		    (instr32 & 0xF0000000) != 0xF0000000)
2028 			return true;
2029 
2030 		break;
2031 	case CS_ETM_ISA_A64:
2032 		/*
2033 		 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
2034 		 *
2035 		 *  b'31               b'21           b'4     b'0
2036 		 * +-----------------------+---------+-----------+
2037 		 * | 1 1 0 1 0 1 0 0 0 0 0 |  imm16  | 0 0 0 0 1 |
2038 		 * +-----------------------+---------+-----------+
2039 		 */
2040 		addr = end_addr - 4;
2041 		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2042 				   (u8 *)&instr32, 0);
2043 		if ((instr32 & 0xFFE0001F) == 0xd4000001)
2044 			return true;
2045 
2046 		break;
2047 	case CS_ETM_ISA_UNKNOWN:
2048 	default:
2049 		break;
2050 	}
2051 
2052 	return false;
2053 }
2054 
2055 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
2056 			       struct cs_etm_traceid_queue *tidq, u64 magic)
2057 {
2058 	u8 trace_chan_id = tidq->trace_chan_id;
2059 	struct cs_etm_packet *packet = tidq->packet;
2060 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2061 
2062 	if (magic == __perf_cs_etmv3_magic)
2063 		if (packet->exception_number == CS_ETMV3_EXC_SVC)
2064 			return true;
2065 
2066 	/*
2067 	 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
2068 	 * HVC cases; need to check if it's SVC instruction based on
2069 	 * packet address.
2070 	 */
2071 	if (magic == __perf_cs_etmv4_magic) {
2072 		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2073 		    cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2074 					 prev_packet->end_addr))
2075 			return true;
2076 	}
2077 
2078 	return false;
2079 }
2080 
2081 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
2082 				       u64 magic)
2083 {
2084 	struct cs_etm_packet *packet = tidq->packet;
2085 
2086 	if (magic == __perf_cs_etmv3_magic)
2087 		if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
2088 		    packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
2089 		    packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
2090 		    packet->exception_number == CS_ETMV3_EXC_IRQ ||
2091 		    packet->exception_number == CS_ETMV3_EXC_FIQ)
2092 			return true;
2093 
2094 	if (magic == __perf_cs_etmv4_magic)
2095 		if (packet->exception_number == CS_ETMV4_EXC_RESET ||
2096 		    packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
2097 		    packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
2098 		    packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
2099 		    packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
2100 		    packet->exception_number == CS_ETMV4_EXC_IRQ ||
2101 		    packet->exception_number == CS_ETMV4_EXC_FIQ)
2102 			return true;
2103 
2104 	return false;
2105 }
2106 
2107 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
2108 				      struct cs_etm_traceid_queue *tidq,
2109 				      u64 magic)
2110 {
2111 	u8 trace_chan_id = tidq->trace_chan_id;
2112 	struct cs_etm_packet *packet = tidq->packet;
2113 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2114 
2115 	if (magic == __perf_cs_etmv3_magic)
2116 		if (packet->exception_number == CS_ETMV3_EXC_SMC ||
2117 		    packet->exception_number == CS_ETMV3_EXC_HYP ||
2118 		    packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
2119 		    packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
2120 		    packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
2121 		    packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
2122 		    packet->exception_number == CS_ETMV3_EXC_GENERIC)
2123 			return true;
2124 
2125 	if (magic == __perf_cs_etmv4_magic) {
2126 		if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
2127 		    packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
2128 		    packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
2129 		    packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
2130 			return true;
2131 
2132 		/*
2133 		 * For CS_ETMV4_EXC_CALL, except SVC other instructions
2134 		 * (SMC, HVC) are taken as sync exceptions.
2135 		 */
2136 		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2137 		    !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2138 					  prev_packet->end_addr))
2139 			return true;
2140 
2141 		/*
2142 		 * ETMv4 has 5 bits for exception number; if the numbers
2143 		 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
2144 		 * they are implementation defined exceptions.
2145 		 *
2146 		 * For this case, simply take it as sync exception.
2147 		 */
2148 		if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
2149 		    packet->exception_number <= CS_ETMV4_EXC_END)
2150 			return true;
2151 	}
2152 
2153 	return false;
2154 }
2155 
2156 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
2157 				    struct cs_etm_traceid_queue *tidq)
2158 {
2159 	struct cs_etm_packet *packet = tidq->packet;
2160 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2161 	u8 trace_chan_id = tidq->trace_chan_id;
2162 	u64 magic;
2163 	int ret;
2164 
2165 	switch (packet->sample_type) {
2166 	case CS_ETM_RANGE:
2167 		/*
2168 		 * Immediate branch instruction without neither link nor
2169 		 * return flag, it's normal branch instruction within
2170 		 * the function.
2171 		 */
2172 		if (packet->last_instr_type == OCSD_INSTR_BR &&
2173 		    packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
2174 			packet->flags = PERF_IP_FLAG_BRANCH;
2175 
2176 			if (packet->last_instr_cond)
2177 				packet->flags |= PERF_IP_FLAG_CONDITIONAL;
2178 		}
2179 
2180 		/*
2181 		 * Immediate branch instruction with link (e.g. BL), this is
2182 		 * branch instruction for function call.
2183 		 */
2184 		if (packet->last_instr_type == OCSD_INSTR_BR &&
2185 		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2186 			packet->flags = PERF_IP_FLAG_BRANCH |
2187 					PERF_IP_FLAG_CALL;
2188 
2189 		/*
2190 		 * Indirect branch instruction with link (e.g. BLR), this is
2191 		 * branch instruction for function call.
2192 		 */
2193 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2194 		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2195 			packet->flags = PERF_IP_FLAG_BRANCH |
2196 					PERF_IP_FLAG_CALL;
2197 
2198 		/*
2199 		 * Indirect branch instruction with subtype of
2200 		 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
2201 		 * function return for A32/T32.
2202 		 */
2203 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2204 		    packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
2205 			packet->flags = PERF_IP_FLAG_BRANCH |
2206 					PERF_IP_FLAG_RETURN;
2207 
2208 		/*
2209 		 * Indirect branch instruction without link (e.g. BR), usually
2210 		 * this is used for function return, especially for functions
2211 		 * within dynamic link lib.
2212 		 */
2213 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2214 		    packet->last_instr_subtype == OCSD_S_INSTR_NONE)
2215 			packet->flags = PERF_IP_FLAG_BRANCH |
2216 					PERF_IP_FLAG_RETURN;
2217 
2218 		/* Return instruction for function return. */
2219 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2220 		    packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
2221 			packet->flags = PERF_IP_FLAG_BRANCH |
2222 					PERF_IP_FLAG_RETURN;
2223 
2224 		/*
2225 		 * Decoder might insert a discontinuity in the middle of
2226 		 * instruction packets, fixup prev_packet with flag
2227 		 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
2228 		 */
2229 		if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
2230 			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2231 					      PERF_IP_FLAG_TRACE_BEGIN;
2232 
2233 		/*
2234 		 * If the previous packet is an exception return packet
2235 		 * and the return address just follows SVC instruction,
2236 		 * it needs to calibrate the previous packet sample flags
2237 		 * as PERF_IP_FLAG_SYSCALLRET.
2238 		 */
2239 		if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
2240 					   PERF_IP_FLAG_RETURN |
2241 					   PERF_IP_FLAG_INTERRUPT) &&
2242 		    cs_etm__is_svc_instr(etmq, trace_chan_id,
2243 					 packet, packet->start_addr))
2244 			prev_packet->flags = PERF_IP_FLAG_BRANCH |
2245 					     PERF_IP_FLAG_RETURN |
2246 					     PERF_IP_FLAG_SYSCALLRET;
2247 		break;
2248 	case CS_ETM_DISCONTINUITY:
2249 		/*
2250 		 * The trace is discontinuous, if the previous packet is
2251 		 * instruction packet, set flag PERF_IP_FLAG_TRACE_END
2252 		 * for previous packet.
2253 		 */
2254 		if (prev_packet->sample_type == CS_ETM_RANGE)
2255 			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2256 					      PERF_IP_FLAG_TRACE_END;
2257 		break;
2258 	case CS_ETM_EXCEPTION:
2259 		ret = cs_etm__get_magic(packet->trace_chan_id, &magic);
2260 		if (ret)
2261 			return ret;
2262 
2263 		/* The exception is for system call. */
2264 		if (cs_etm__is_syscall(etmq, tidq, magic))
2265 			packet->flags = PERF_IP_FLAG_BRANCH |
2266 					PERF_IP_FLAG_CALL |
2267 					PERF_IP_FLAG_SYSCALLRET;
2268 		/*
2269 		 * The exceptions are triggered by external signals from bus,
2270 		 * interrupt controller, debug module, PE reset or halt.
2271 		 */
2272 		else if (cs_etm__is_async_exception(tidq, magic))
2273 			packet->flags = PERF_IP_FLAG_BRANCH |
2274 					PERF_IP_FLAG_CALL |
2275 					PERF_IP_FLAG_ASYNC |
2276 					PERF_IP_FLAG_INTERRUPT;
2277 		/*
2278 		 * Otherwise, exception is caused by trap, instruction &
2279 		 * data fault, or alignment errors.
2280 		 */
2281 		else if (cs_etm__is_sync_exception(etmq, tidq, magic))
2282 			packet->flags = PERF_IP_FLAG_BRANCH |
2283 					PERF_IP_FLAG_CALL |
2284 					PERF_IP_FLAG_INTERRUPT;
2285 
2286 		/*
2287 		 * When the exception packet is inserted, since exception
2288 		 * packet is not used standalone for generating samples
2289 		 * and it's affiliation to the previous instruction range
2290 		 * packet; so set previous range packet flags to tell perf
2291 		 * it is an exception taken branch.
2292 		 */
2293 		if (prev_packet->sample_type == CS_ETM_RANGE)
2294 			prev_packet->flags = packet->flags;
2295 		break;
2296 	case CS_ETM_EXCEPTION_RET:
2297 		/*
2298 		 * When the exception return packet is inserted, since
2299 		 * exception return packet is not used standalone for
2300 		 * generating samples and it's affiliation to the previous
2301 		 * instruction range packet; so set previous range packet
2302 		 * flags to tell perf it is an exception return branch.
2303 		 *
2304 		 * The exception return can be for either system call or
2305 		 * other exception types; unfortunately the packet doesn't
2306 		 * contain exception type related info so we cannot decide
2307 		 * the exception type purely based on exception return packet.
2308 		 * If we record the exception number from exception packet and
2309 		 * reuse it for exception return packet, this is not reliable
2310 		 * due the trace can be discontinuity or the interrupt can
2311 		 * be nested, thus the recorded exception number cannot be
2312 		 * used for exception return packet for these two cases.
2313 		 *
2314 		 * For exception return packet, we only need to distinguish the
2315 		 * packet is for system call or for other types.  Thus the
2316 		 * decision can be deferred when receive the next packet which
2317 		 * contains the return address, based on the return address we
2318 		 * can read out the previous instruction and check if it's a
2319 		 * system call instruction and then calibrate the sample flag
2320 		 * as needed.
2321 		 */
2322 		if (prev_packet->sample_type == CS_ETM_RANGE)
2323 			prev_packet->flags = PERF_IP_FLAG_BRANCH |
2324 					     PERF_IP_FLAG_RETURN |
2325 					     PERF_IP_FLAG_INTERRUPT;
2326 		break;
2327 	case CS_ETM_EMPTY:
2328 	default:
2329 		break;
2330 	}
2331 
2332 	return 0;
2333 }
2334 
2335 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
2336 {
2337 	int ret = 0;
2338 	size_t processed = 0;
2339 
2340 	/*
2341 	 * Packets are decoded and added to the decoder's packet queue
2342 	 * until the decoder packet processing callback has requested that
2343 	 * processing stops or there is nothing left in the buffer.  Normal
2344 	 * operations that stop processing are a timestamp packet or a full
2345 	 * decoder buffer queue.
2346 	 */
2347 	ret = cs_etm_decoder__process_data_block(etmq->decoder,
2348 						 etmq->offset,
2349 						 &etmq->buf[etmq->buf_used],
2350 						 etmq->buf_len,
2351 						 &processed);
2352 	if (ret)
2353 		goto out;
2354 
2355 	etmq->offset += processed;
2356 	etmq->buf_used += processed;
2357 	etmq->buf_len -= processed;
2358 
2359 out:
2360 	return ret;
2361 }
2362 
2363 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
2364 					 struct cs_etm_traceid_queue *tidq)
2365 {
2366 	int ret;
2367 	struct cs_etm_packet_queue *packet_queue;
2368 
2369 	packet_queue = &tidq->packet_queue;
2370 
2371 	/* Process each packet in this chunk */
2372 	while (1) {
2373 		ret = cs_etm_decoder__get_packet(packet_queue,
2374 						 tidq->packet);
2375 		if (ret <= 0)
2376 			/*
2377 			 * Stop processing this chunk on
2378 			 * end of data or error
2379 			 */
2380 			break;
2381 
2382 		/*
2383 		 * Since packet addresses are swapped in packet
2384 		 * handling within below switch() statements,
2385 		 * thus setting sample flags must be called
2386 		 * prior to switch() statement to use address
2387 		 * information before packets swapping.
2388 		 */
2389 		ret = cs_etm__set_sample_flags(etmq, tidq);
2390 		if (ret < 0)
2391 			break;
2392 
2393 		switch (tidq->packet->sample_type) {
2394 		case CS_ETM_RANGE:
2395 			/*
2396 			 * If the packet contains an instruction
2397 			 * range, generate instruction sequence
2398 			 * events.
2399 			 */
2400 			cs_etm__sample(etmq, tidq);
2401 			break;
2402 		case CS_ETM_EXCEPTION:
2403 		case CS_ETM_EXCEPTION_RET:
2404 			/*
2405 			 * If the exception packet is coming,
2406 			 * make sure the previous instruction
2407 			 * range packet to be handled properly.
2408 			 */
2409 			cs_etm__exception(tidq);
2410 			break;
2411 		case CS_ETM_DISCONTINUITY:
2412 			/*
2413 			 * Discontinuity in trace, flush
2414 			 * previous branch stack
2415 			 */
2416 			cs_etm__flush(etmq, tidq);
2417 			break;
2418 		case CS_ETM_EMPTY:
2419 			/*
2420 			 * Should not receive empty packet,
2421 			 * report error.
2422 			 */
2423 			pr_err("CS ETM Trace: empty packet\n");
2424 			return -EINVAL;
2425 		default:
2426 			break;
2427 		}
2428 	}
2429 
2430 	return ret;
2431 }
2432 
2433 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
2434 {
2435 	int idx;
2436 	struct int_node *inode;
2437 	struct cs_etm_traceid_queue *tidq;
2438 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
2439 
2440 	intlist__for_each_entry(inode, traceid_queues_list) {
2441 		idx = (int)(intptr_t)inode->priv;
2442 		tidq = etmq->traceid_queues[idx];
2443 
2444 		/* Ignore return value */
2445 		cs_etm__process_traceid_queue(etmq, tidq);
2446 
2447 		/*
2448 		 * Generate an instruction sample with the remaining
2449 		 * branchstack entries.
2450 		 */
2451 		cs_etm__flush(etmq, tidq);
2452 	}
2453 }
2454 
2455 static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq)
2456 {
2457 	int err = 0;
2458 	struct cs_etm_traceid_queue *tidq;
2459 
2460 	tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
2461 	if (!tidq)
2462 		return -EINVAL;
2463 
2464 	/* Go through each buffer in the queue and decode them one by one */
2465 	while (1) {
2466 		err = cs_etm__get_data_block(etmq);
2467 		if (err <= 0)
2468 			return err;
2469 
2470 		/* Run trace decoder until buffer consumed or end of trace */
2471 		do {
2472 			err = cs_etm__decode_data_block(etmq);
2473 			if (err)
2474 				return err;
2475 
2476 			/*
2477 			 * Process each packet in this chunk, nothing to do if
2478 			 * an error occurs other than hoping the next one will
2479 			 * be better.
2480 			 */
2481 			err = cs_etm__process_traceid_queue(etmq, tidq);
2482 
2483 		} while (etmq->buf_len);
2484 
2485 		if (err == 0)
2486 			/* Flush any remaining branch stack entries */
2487 			err = cs_etm__end_block(etmq, tidq);
2488 	}
2489 
2490 	return err;
2491 }
2492 
2493 static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq)
2494 {
2495 	int idx, err = 0;
2496 	struct cs_etm_traceid_queue *tidq;
2497 	struct int_node *inode;
2498 
2499 	/* Go through each buffer in the queue and decode them one by one */
2500 	while (1) {
2501 		err = cs_etm__get_data_block(etmq);
2502 		if (err <= 0)
2503 			return err;
2504 
2505 		/* Run trace decoder until buffer consumed or end of trace */
2506 		do {
2507 			err = cs_etm__decode_data_block(etmq);
2508 			if (err)
2509 				return err;
2510 
2511 			/*
2512 			 * cs_etm__run_per_thread_timeless_decoder() runs on a
2513 			 * single traceID queue because each TID has a separate
2514 			 * buffer. But here in per-cpu mode we need to iterate
2515 			 * over each channel instead.
2516 			 */
2517 			intlist__for_each_entry(inode,
2518 						etmq->traceid_queues_list) {
2519 				idx = (int)(intptr_t)inode->priv;
2520 				tidq = etmq->traceid_queues[idx];
2521 				cs_etm__process_traceid_queue(etmq, tidq);
2522 			}
2523 		} while (etmq->buf_len);
2524 
2525 		intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2526 			idx = (int)(intptr_t)inode->priv;
2527 			tidq = etmq->traceid_queues[idx];
2528 			/* Flush any remaining branch stack entries */
2529 			err = cs_etm__end_block(etmq, tidq);
2530 			if (err)
2531 				return err;
2532 		}
2533 	}
2534 
2535 	return err;
2536 }
2537 
2538 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
2539 					   pid_t tid)
2540 {
2541 	unsigned int i;
2542 	struct auxtrace_queues *queues = &etm->queues;
2543 
2544 	for (i = 0; i < queues->nr_queues; i++) {
2545 		struct auxtrace_queue *queue = &etm->queues.queue_array[i];
2546 		struct cs_etm_queue *etmq = queue->priv;
2547 		struct cs_etm_traceid_queue *tidq;
2548 
2549 		if (!etmq)
2550 			continue;
2551 
2552 		if (etm->per_thread_decoding) {
2553 			tidq = cs_etm__etmq_get_traceid_queue(
2554 				etmq, CS_ETM_PER_THREAD_TRACEID);
2555 
2556 			if (!tidq)
2557 				continue;
2558 
2559 			if (tid == -1 || thread__tid(tidq->thread) == tid)
2560 				cs_etm__run_per_thread_timeless_decoder(etmq);
2561 		} else
2562 			cs_etm__run_per_cpu_timeless_decoder(etmq);
2563 	}
2564 
2565 	return 0;
2566 }
2567 
2568 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm)
2569 {
2570 	int ret = 0;
2571 	unsigned int cs_queue_nr, queue_nr, i;
2572 	u8 trace_chan_id;
2573 	u64 cs_timestamp;
2574 	struct auxtrace_queue *queue;
2575 	struct cs_etm_queue *etmq;
2576 	struct cs_etm_traceid_queue *tidq;
2577 
2578 	/*
2579 	 * Pre-populate the heap with one entry from each queue so that we can
2580 	 * start processing in time order across all queues.
2581 	 */
2582 	for (i = 0; i < etm->queues.nr_queues; i++) {
2583 		etmq = etm->queues.queue_array[i].priv;
2584 		if (!etmq)
2585 			continue;
2586 
2587 		ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
2588 		if (ret)
2589 			return ret;
2590 	}
2591 
2592 	while (1) {
2593 		if (!etm->heap.heap_cnt)
2594 			goto out;
2595 
2596 		/* Take the entry at the top of the min heap */
2597 		cs_queue_nr = etm->heap.heap_array[0].queue_nr;
2598 		queue_nr = TO_QUEUE_NR(cs_queue_nr);
2599 		trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
2600 		queue = &etm->queues.queue_array[queue_nr];
2601 		etmq = queue->priv;
2602 
2603 		/*
2604 		 * Remove the top entry from the heap since we are about
2605 		 * to process it.
2606 		 */
2607 		auxtrace_heap__pop(&etm->heap);
2608 
2609 		tidq  = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
2610 		if (!tidq) {
2611 			/*
2612 			 * No traceID queue has been allocated for this traceID,
2613 			 * which means something somewhere went very wrong.  No
2614 			 * other choice than simply exit.
2615 			 */
2616 			ret = -EINVAL;
2617 			goto out;
2618 		}
2619 
2620 		/*
2621 		 * Packets associated with this timestamp are already in
2622 		 * the etmq's traceID queue, so process them.
2623 		 */
2624 		ret = cs_etm__process_traceid_queue(etmq, tidq);
2625 		if (ret < 0)
2626 			goto out;
2627 
2628 		/*
2629 		 * Packets for this timestamp have been processed, time to
2630 		 * move on to the next timestamp, fetching a new auxtrace_buffer
2631 		 * if need be.
2632 		 */
2633 refetch:
2634 		ret = cs_etm__get_data_block(etmq);
2635 		if (ret < 0)
2636 			goto out;
2637 
2638 		/*
2639 		 * No more auxtrace_buffers to process in this etmq, simply
2640 		 * move on to another entry in the auxtrace_heap.
2641 		 */
2642 		if (!ret)
2643 			continue;
2644 
2645 		ret = cs_etm__decode_data_block(etmq);
2646 		if (ret)
2647 			goto out;
2648 
2649 		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
2650 
2651 		if (!cs_timestamp) {
2652 			/*
2653 			 * Function cs_etm__decode_data_block() returns when
2654 			 * there is no more traces to decode in the current
2655 			 * auxtrace_buffer OR when a timestamp has been
2656 			 * encountered on any of the traceID queues.  Since we
2657 			 * did not get a timestamp, there is no more traces to
2658 			 * process in this auxtrace_buffer.  As such empty and
2659 			 * flush all traceID queues.
2660 			 */
2661 			cs_etm__clear_all_traceid_queues(etmq);
2662 
2663 			/* Fetch another auxtrace_buffer for this etmq */
2664 			goto refetch;
2665 		}
2666 
2667 		/*
2668 		 * Add to the min heap the timestamp for packets that have
2669 		 * just been decoded.  They will be processed and synthesized
2670 		 * during the next call to cs_etm__process_traceid_queue() for
2671 		 * this queue/traceID.
2672 		 */
2673 		cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
2674 		ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
2675 	}
2676 
2677 out:
2678 	return ret;
2679 }
2680 
2681 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
2682 					union perf_event *event)
2683 {
2684 	struct thread *th;
2685 
2686 	if (etm->timeless_decoding)
2687 		return 0;
2688 
2689 	/*
2690 	 * Add the tid/pid to the log so that we can get a match when we get a
2691 	 * contextID from the decoder. Only track for the host: only kernel
2692 	 * trace is supported for guests which wouldn't need pids so this should
2693 	 * be fine.
2694 	 */
2695 	th = machine__findnew_thread(&etm->session->machines.host,
2696 				     event->itrace_start.pid,
2697 				     event->itrace_start.tid);
2698 	if (!th)
2699 		return -ENOMEM;
2700 
2701 	thread__put(th);
2702 
2703 	return 0;
2704 }
2705 
2706 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
2707 					   union perf_event *event)
2708 {
2709 	struct thread *th;
2710 	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2711 
2712 	/*
2713 	 * Context switch in per-thread mode are irrelevant since perf
2714 	 * will start/stop tracing as the process is scheduled.
2715 	 */
2716 	if (etm->timeless_decoding)
2717 		return 0;
2718 
2719 	/*
2720 	 * SWITCH_IN events carry the next process to be switched out while
2721 	 * SWITCH_OUT events carry the process to be switched in.  As such
2722 	 * we don't care about IN events.
2723 	 */
2724 	if (!out)
2725 		return 0;
2726 
2727 	/*
2728 	 * Add the tid/pid to the log so that we can get a match when we get a
2729 	 * contextID from the decoder. Only track for the host: only kernel
2730 	 * trace is supported for guests which wouldn't need pids so this should
2731 	 * be fine.
2732 	 */
2733 	th = machine__findnew_thread(&etm->session->machines.host,
2734 				     event->context_switch.next_prev_pid,
2735 				     event->context_switch.next_prev_tid);
2736 	if (!th)
2737 		return -ENOMEM;
2738 
2739 	thread__put(th);
2740 
2741 	return 0;
2742 }
2743 
2744 static int cs_etm__process_event(struct perf_session *session,
2745 				 union perf_event *event,
2746 				 struct perf_sample *sample,
2747 				 struct perf_tool *tool)
2748 {
2749 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2750 						   struct cs_etm_auxtrace,
2751 						   auxtrace);
2752 
2753 	if (dump_trace)
2754 		return 0;
2755 
2756 	if (!tool->ordered_events) {
2757 		pr_err("CoreSight ETM Trace requires ordered events\n");
2758 		return -EINVAL;
2759 	}
2760 
2761 	switch (event->header.type) {
2762 	case PERF_RECORD_EXIT:
2763 		/*
2764 		 * Don't need to wait for cs_etm__flush_events() in per-thread mode to
2765 		 * start the decode because we know there will be no more trace from
2766 		 * this thread. All this does is emit samples earlier than waiting for
2767 		 * the flush in other modes, but with timestamps it makes sense to wait
2768 		 * for flush so that events from different threads are interleaved
2769 		 * properly.
2770 		 */
2771 		if (etm->per_thread_decoding && etm->timeless_decoding)
2772 			return cs_etm__process_timeless_queues(etm,
2773 							       event->fork.tid);
2774 		break;
2775 
2776 	case PERF_RECORD_ITRACE_START:
2777 		return cs_etm__process_itrace_start(etm, event);
2778 
2779 	case PERF_RECORD_SWITCH_CPU_WIDE:
2780 		return cs_etm__process_switch_cpu_wide(etm, event);
2781 
2782 	case PERF_RECORD_AUX:
2783 		/*
2784 		 * Record the latest kernel timestamp available in the header
2785 		 * for samples so that synthesised samples occur from this point
2786 		 * onwards.
2787 		 */
2788 		if (sample->time && (sample->time != (u64)-1))
2789 			etm->latest_kernel_timestamp = sample->time;
2790 		break;
2791 
2792 	default:
2793 		break;
2794 	}
2795 
2796 	return 0;
2797 }
2798 
2799 static void dump_queued_data(struct cs_etm_auxtrace *etm,
2800 			     struct perf_record_auxtrace *event)
2801 {
2802 	struct auxtrace_buffer *buf;
2803 	unsigned int i;
2804 	/*
2805 	 * Find all buffers with same reference in the queues and dump them.
2806 	 * This is because the queues can contain multiple entries of the same
2807 	 * buffer that were split on aux records.
2808 	 */
2809 	for (i = 0; i < etm->queues.nr_queues; ++i)
2810 		list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
2811 			if (buf->reference == event->reference)
2812 				cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
2813 }
2814 
2815 static int cs_etm__process_auxtrace_event(struct perf_session *session,
2816 					  union perf_event *event,
2817 					  struct perf_tool *tool __maybe_unused)
2818 {
2819 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2820 						   struct cs_etm_auxtrace,
2821 						   auxtrace);
2822 	if (!etm->data_queued) {
2823 		struct auxtrace_buffer *buffer;
2824 		off_t  data_offset;
2825 		int fd = perf_data__fd(session->data);
2826 		bool is_pipe = perf_data__is_pipe(session->data);
2827 		int err;
2828 		int idx = event->auxtrace.idx;
2829 
2830 		if (is_pipe)
2831 			data_offset = 0;
2832 		else {
2833 			data_offset = lseek(fd, 0, SEEK_CUR);
2834 			if (data_offset == -1)
2835 				return -errno;
2836 		}
2837 
2838 		err = auxtrace_queues__add_event(&etm->queues, session,
2839 						 event, data_offset, &buffer);
2840 		if (err)
2841 			return err;
2842 
2843 		/*
2844 		 * Knowing if the trace is formatted or not requires a lookup of
2845 		 * the aux record so only works in non-piped mode where data is
2846 		 * queued in cs_etm__queue_aux_records(). Always assume
2847 		 * formatted in piped mode (true).
2848 		 */
2849 		err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
2850 					  idx, true, -1);
2851 		if (err)
2852 			return err;
2853 
2854 		if (dump_trace)
2855 			if (auxtrace_buffer__get_data(buffer, fd)) {
2856 				cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
2857 				auxtrace_buffer__put_data(buffer);
2858 			}
2859 	} else if (dump_trace)
2860 		dump_queued_data(etm, &event->auxtrace);
2861 
2862 	return 0;
2863 }
2864 
2865 static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm)
2866 {
2867 	struct evsel *evsel;
2868 	struct evlist *evlist = etm->session->evlist;
2869 
2870 	/* Override timeless mode with user input from --itrace=Z */
2871 	if (etm->synth_opts.timeless_decoding) {
2872 		etm->timeless_decoding = true;
2873 		return 0;
2874 	}
2875 
2876 	/*
2877 	 * Find the cs_etm evsel and look at what its timestamp setting was
2878 	 */
2879 	evlist__for_each_entry(evlist, evsel)
2880 		if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) {
2881 			etm->timeless_decoding =
2882 				!(evsel->core.attr.config & BIT(ETM_OPT_TS));
2883 			return 0;
2884 		}
2885 
2886 	pr_err("CS ETM: Couldn't find ETM evsel\n");
2887 	return -EINVAL;
2888 }
2889 
2890 /*
2891  * Read a single cpu parameter block from the auxtrace_info priv block.
2892  *
2893  * For version 1 there is a per cpu nr_params entry. If we are handling
2894  * version 1 file, then there may be less, the same, or more params
2895  * indicated by this value than the compile time number we understand.
2896  *
2897  * For a version 0 info block, there are a fixed number, and we need to
2898  * fill out the nr_param value in the metadata we create.
2899  */
2900 static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
2901 				    int out_blk_size, int nr_params_v0)
2902 {
2903 	u64 *metadata = NULL;
2904 	int hdr_version;
2905 	int nr_in_params, nr_out_params, nr_cmn_params;
2906 	int i, k;
2907 
2908 	metadata = zalloc(sizeof(*metadata) * out_blk_size);
2909 	if (!metadata)
2910 		return NULL;
2911 
2912 	/* read block current index & version */
2913 	i = *buff_in_offset;
2914 	hdr_version = buff_in[CS_HEADER_VERSION];
2915 
2916 	if (!hdr_version) {
2917 	/* read version 0 info block into a version 1 metadata block  */
2918 		nr_in_params = nr_params_v0;
2919 		metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
2920 		metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
2921 		metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
2922 		/* remaining block params at offset +1 from source */
2923 		for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
2924 			metadata[k + 1] = buff_in[i + k];
2925 		/* version 0 has 2 common params */
2926 		nr_cmn_params = 2;
2927 	} else {
2928 	/* read version 1 info block - input and output nr_params may differ */
2929 		/* version 1 has 3 common params */
2930 		nr_cmn_params = 3;
2931 		nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
2932 
2933 		/* if input has more params than output - skip excess */
2934 		nr_out_params = nr_in_params + nr_cmn_params;
2935 		if (nr_out_params > out_blk_size)
2936 			nr_out_params = out_blk_size;
2937 
2938 		for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
2939 			metadata[k] = buff_in[i + k];
2940 
2941 		/* record the actual nr params we copied */
2942 		metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
2943 	}
2944 
2945 	/* adjust in offset by number of in params used */
2946 	i += nr_in_params + nr_cmn_params;
2947 	*buff_in_offset = i;
2948 	return metadata;
2949 }
2950 
2951 /**
2952  * Puts a fragment of an auxtrace buffer into the auxtrace queues based
2953  * on the bounds of aux_event, if it matches with the buffer that's at
2954  * file_offset.
2955  *
2956  * Normally, whole auxtrace buffers would be added to the queue. But we
2957  * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
2958  * is reset across each buffer, so splitting the buffers up in advance has
2959  * the same effect.
2960  */
2961 static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
2962 				      struct perf_record_aux *aux_event, struct perf_sample *sample)
2963 {
2964 	int err;
2965 	char buf[PERF_SAMPLE_MAX_SIZE];
2966 	union perf_event *auxtrace_event_union;
2967 	struct perf_record_auxtrace *auxtrace_event;
2968 	union perf_event auxtrace_fragment;
2969 	__u64 aux_offset, aux_size;
2970 	__u32 idx;
2971 	bool formatted;
2972 
2973 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2974 						   struct cs_etm_auxtrace,
2975 						   auxtrace);
2976 
2977 	/*
2978 	 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
2979 	 * from looping through the auxtrace index.
2980 	 */
2981 	err = perf_session__peek_event(session, file_offset, buf,
2982 				       PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL);
2983 	if (err)
2984 		return err;
2985 	auxtrace_event = &auxtrace_event_union->auxtrace;
2986 	if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
2987 		return -EINVAL;
2988 
2989 	if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) ||
2990 		auxtrace_event->header.size != sz) {
2991 		return -EINVAL;
2992 	}
2993 
2994 	/*
2995 	 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
2996 	 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
2997 	 * CPU as we set this always for the AUX_OUTPUT_HW_ID event.
2998 	 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
2999 	 * Return 'not found' if mismatch.
3000 	 */
3001 	if (auxtrace_event->cpu == (__u32) -1) {
3002 		etm->per_thread_decoding = true;
3003 		if (auxtrace_event->tid != sample->tid)
3004 			return 1;
3005 	} else if (auxtrace_event->cpu != sample->cpu) {
3006 		if (etm->per_thread_decoding) {
3007 			/*
3008 			 * Found a per-cpu buffer after a per-thread one was
3009 			 * already found
3010 			 */
3011 			pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n");
3012 			return -EINVAL;
3013 		}
3014 		return 1;
3015 	}
3016 
3017 	if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
3018 		/*
3019 		 * Clamp size in snapshot mode. The buffer size is clamped in
3020 		 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
3021 		 * the buffer size.
3022 		 */
3023 		aux_size = min(aux_event->aux_size, auxtrace_event->size);
3024 
3025 		/*
3026 		 * In this mode, the head also points to the end of the buffer so aux_offset
3027 		 * needs to have the size subtracted so it points to the beginning as in normal mode
3028 		 */
3029 		aux_offset = aux_event->aux_offset - aux_size;
3030 	} else {
3031 		aux_size = aux_event->aux_size;
3032 		aux_offset = aux_event->aux_offset;
3033 	}
3034 
3035 	if (aux_offset >= auxtrace_event->offset &&
3036 	    aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
3037 		/*
3038 		 * If this AUX event was inside this buffer somewhere, create a new auxtrace event
3039 		 * based on the sizes of the aux event, and queue that fragment.
3040 		 */
3041 		auxtrace_fragment.auxtrace = *auxtrace_event;
3042 		auxtrace_fragment.auxtrace.size = aux_size;
3043 		auxtrace_fragment.auxtrace.offset = aux_offset;
3044 		file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
3045 
3046 		pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
3047 			  " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
3048 		err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
3049 						 file_offset, NULL);
3050 		if (err)
3051 			return err;
3052 
3053 		idx = auxtrace_event->idx;
3054 		formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
3055 		return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
3056 					   idx, formatted, sample->cpu);
3057 	}
3058 
3059 	/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
3060 	return 1;
3061 }
3062 
3063 static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event,
3064 					u64 offset __maybe_unused, void *data __maybe_unused)
3065 {
3066 	/* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */
3067 	if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) {
3068 		(*(int *)data)++; /* increment found count */
3069 		return cs_etm__process_aux_output_hw_id(session, event);
3070 	}
3071 	return 0;
3072 }
3073 
3074 static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
3075 					u64 offset __maybe_unused, void *data __maybe_unused)
3076 {
3077 	struct perf_sample sample;
3078 	int ret;
3079 	struct auxtrace_index_entry *ent;
3080 	struct auxtrace_index *auxtrace_index;
3081 	struct evsel *evsel;
3082 	size_t i;
3083 
3084 	/* Don't care about any other events, we're only queuing buffers for AUX events */
3085 	if (event->header.type != PERF_RECORD_AUX)
3086 		return 0;
3087 
3088 	if (event->header.size < sizeof(struct perf_record_aux))
3089 		return -EINVAL;
3090 
3091 	/* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
3092 	if (!event->aux.aux_size)
3093 		return 0;
3094 
3095 	/*
3096 	 * Parse the sample, we need the sample_id_all data that comes after the event so that the
3097 	 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
3098 	 */
3099 	evsel = evlist__event2evsel(session->evlist, event);
3100 	if (!evsel)
3101 		return -EINVAL;
3102 	ret = evsel__parse_sample(evsel, event, &sample);
3103 	if (ret)
3104 		return ret;
3105 
3106 	/*
3107 	 * Loop through the auxtrace index to find the buffer that matches up with this aux event.
3108 	 */
3109 	list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
3110 		for (i = 0; i < auxtrace_index->nr; i++) {
3111 			ent = &auxtrace_index->entries[i];
3112 			ret = cs_etm__queue_aux_fragment(session, ent->file_offset,
3113 							 ent->sz, &event->aux, &sample);
3114 			/*
3115 			 * Stop search on error or successful values. Continue search on
3116 			 * 1 ('not found')
3117 			 */
3118 			if (ret != 1)
3119 				return ret;
3120 		}
3121 	}
3122 
3123 	/*
3124 	 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
3125 	 * don't exit with an error because it will still be possible to decode other aux records.
3126 	 */
3127 	pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
3128 	       " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
3129 	return 0;
3130 }
3131 
3132 static int cs_etm__queue_aux_records(struct perf_session *session)
3133 {
3134 	struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
3135 								struct auxtrace_index, list);
3136 	if (index && index->nr > 0)
3137 		return perf_session__peek_events(session, session->header.data_offset,
3138 						 session->header.data_size,
3139 						 cs_etm__queue_aux_records_cb, NULL);
3140 
3141 	/*
3142 	 * We would get here if there are no entries in the index (either no auxtrace
3143 	 * buffers or no index at all). Fail silently as there is the possibility of
3144 	 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
3145 	 * false.
3146 	 *
3147 	 * In that scenario, buffers will not be split by AUX records.
3148 	 */
3149 	return 0;
3150 }
3151 
3152 #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
3153 				  (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
3154 
3155 /*
3156  * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
3157  * timestamps).
3158  */
3159 static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
3160 {
3161 	int j;
3162 
3163 	for (j = 0; j < num_cpu; j++) {
3164 		switch (metadata[j][CS_ETM_MAGIC]) {
3165 		case __perf_cs_etmv4_magic:
3166 			if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
3167 				return false;
3168 			break;
3169 		case __perf_cs_ete_magic:
3170 			if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1)
3171 				return false;
3172 			break;
3173 		default:
3174 			/* Unknown / unsupported magic number. */
3175 			return false;
3176 		}
3177 	}
3178 	return true;
3179 }
3180 
3181 /* map trace ids to correct metadata block, from information in metadata */
3182 static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
3183 {
3184 	u64 cs_etm_magic;
3185 	u8 trace_chan_id;
3186 	int i, err;
3187 
3188 	for (i = 0; i < num_cpu; i++) {
3189 		cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3190 		switch (cs_etm_magic) {
3191 		case __perf_cs_etmv3_magic:
3192 			metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3193 			trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]);
3194 			break;
3195 		case __perf_cs_etmv4_magic:
3196 		case __perf_cs_ete_magic:
3197 			metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3198 			trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]);
3199 			break;
3200 		default:
3201 			/* unknown magic number */
3202 			return -EINVAL;
3203 		}
3204 		err = cs_etm__map_trace_id(trace_chan_id, metadata[i]);
3205 		if (err)
3206 			return err;
3207 	}
3208 	return 0;
3209 }
3210 
3211 /*
3212  * If we found AUX_HW_ID packets, then set any metadata marked as unused to the
3213  * unused value to reduce the number of unneeded decoders created.
3214  */
3215 static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
3216 {
3217 	u64 cs_etm_magic;
3218 	int i;
3219 
3220 	for (i = 0; i < num_cpu; i++) {
3221 		cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3222 		switch (cs_etm_magic) {
3223 		case __perf_cs_etmv3_magic:
3224 			if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
3225 				metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
3226 			break;
3227 		case __perf_cs_etmv4_magic:
3228 		case __perf_cs_ete_magic:
3229 			if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
3230 				metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
3231 			break;
3232 		default:
3233 			/* unknown magic number */
3234 			return -EINVAL;
3235 		}
3236 	}
3237 	return 0;
3238 }
3239 
3240 int cs_etm__process_auxtrace_info_full(union perf_event *event,
3241 				       struct perf_session *session)
3242 {
3243 	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
3244 	struct cs_etm_auxtrace *etm = NULL;
3245 	struct perf_record_time_conv *tc = &session->time_conv;
3246 	int event_header_size = sizeof(struct perf_event_header);
3247 	int total_size = auxtrace_info->header.size;
3248 	int priv_size = 0;
3249 	int num_cpu;
3250 	int err = 0;
3251 	int aux_hw_id_found;
3252 	int i, j;
3253 	u64 *ptr = NULL;
3254 	u64 **metadata = NULL;
3255 
3256 	/*
3257 	 * Create an RB tree for traceID-metadata tuple.  Since the conversion
3258 	 * has to be made for each packet that gets decoded, optimizing access
3259 	 * in anything other than a sequential array is worth doing.
3260 	 */
3261 	traceid_list = intlist__new(NULL);
3262 	if (!traceid_list)
3263 		return -ENOMEM;
3264 
3265 	/* First the global part */
3266 	ptr = (u64 *) auxtrace_info->priv;
3267 	num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
3268 	metadata = zalloc(sizeof(*metadata) * num_cpu);
3269 	if (!metadata) {
3270 		err = -ENOMEM;
3271 		goto err_free_traceid_list;
3272 	}
3273 
3274 	/* Start parsing after the common part of the header */
3275 	i = CS_HEADER_VERSION_MAX;
3276 
3277 	/*
3278 	 * The metadata is stored in the auxtrace_info section and encodes
3279 	 * the configuration of the ARM embedded trace macrocell which is
3280 	 * required by the trace decoder to properly decode the trace due
3281 	 * to its highly compressed nature.
3282 	 */
3283 	for (j = 0; j < num_cpu; j++) {
3284 		if (ptr[i] == __perf_cs_etmv3_magic) {
3285 			metadata[j] =
3286 				cs_etm__create_meta_blk(ptr, &i,
3287 							CS_ETM_PRIV_MAX,
3288 							CS_ETM_NR_TRC_PARAMS_V0);
3289 		} else if (ptr[i] == __perf_cs_etmv4_magic) {
3290 			metadata[j] =
3291 				cs_etm__create_meta_blk(ptr, &i,
3292 							CS_ETMV4_PRIV_MAX,
3293 							CS_ETMV4_NR_TRC_PARAMS_V0);
3294 		} else if (ptr[i] == __perf_cs_ete_magic) {
3295 			metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
3296 		} else {
3297 			ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
3298 				  ptr[i]);
3299 			err = -EINVAL;
3300 			goto err_free_metadata;
3301 		}
3302 
3303 		if (!metadata[j]) {
3304 			err = -ENOMEM;
3305 			goto err_free_metadata;
3306 		}
3307 	}
3308 
3309 	/*
3310 	 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
3311 	 * CS_ETMV4_PRIV_MAX mark how many double words are in the
3312 	 * global metadata, and each cpu's metadata respectively.
3313 	 * The following tests if the correct number of double words was
3314 	 * present in the auxtrace info section.
3315 	 */
3316 	priv_size = total_size - event_header_size - INFO_HEADER_SIZE;
3317 	if (i * 8 != priv_size) {
3318 		err = -EINVAL;
3319 		goto err_free_metadata;
3320 	}
3321 
3322 	etm = zalloc(sizeof(*etm));
3323 
3324 	if (!etm) {
3325 		err = -ENOMEM;
3326 		goto err_free_metadata;
3327 	}
3328 
3329 	/*
3330 	 * As all the ETMs run at the same exception level, the system should
3331 	 * have the same PID format crossing CPUs.  So cache the PID format
3332 	 * and reuse it for sequential decoding.
3333 	 */
3334 	etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
3335 
3336 	err = auxtrace_queues__init(&etm->queues);
3337 	if (err)
3338 		goto err_free_etm;
3339 
3340 	if (session->itrace_synth_opts->set) {
3341 		etm->synth_opts = *session->itrace_synth_opts;
3342 	} else {
3343 		itrace_synth_opts__set_default(&etm->synth_opts,
3344 				session->itrace_synth_opts->default_no_sample);
3345 		etm->synth_opts.callchain = false;
3346 	}
3347 
3348 	etm->session = session;
3349 
3350 	etm->num_cpu = num_cpu;
3351 	etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
3352 	etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
3353 	etm->metadata = metadata;
3354 	etm->auxtrace_type = auxtrace_info->type;
3355 
3356 	if (etm->synth_opts.use_timestamp)
3357 		/*
3358 		 * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature,
3359 		 * therefore the decoder cannot know if the timestamp trace is
3360 		 * same with the kernel time.
3361 		 *
3362 		 * If a user has knowledge for the working platform and can
3363 		 * specify itrace option 'T' to tell decoder to forcely use the
3364 		 * traced timestamp as the kernel time.
3365 		 */
3366 		etm->has_virtual_ts = true;
3367 	else
3368 		/* Use virtual timestamps if all ETMs report ts_source = 1 */
3369 		etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
3370 
3371 	if (!etm->has_virtual_ts)
3372 		ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
3373 			    "The time field of the samples will not be set accurately.\n"
3374 			    "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n"
3375 			    "you can specify the itrace option 'T' for timestamp decoding\n"
3376 			    "if the Coresight timestamp on the platform is same with the kernel time.\n\n");
3377 
3378 	etm->auxtrace.process_event = cs_etm__process_event;
3379 	etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
3380 	etm->auxtrace.flush_events = cs_etm__flush_events;
3381 	etm->auxtrace.free_events = cs_etm__free_events;
3382 	etm->auxtrace.free = cs_etm__free;
3383 	etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
3384 	session->auxtrace = &etm->auxtrace;
3385 
3386 	err = cs_etm__setup_timeless_decoding(etm);
3387 	if (err)
3388 		return err;
3389 
3390 	etm->tc.time_shift = tc->time_shift;
3391 	etm->tc.time_mult = tc->time_mult;
3392 	etm->tc.time_zero = tc->time_zero;
3393 	if (event_contains(*tc, time_cycles)) {
3394 		etm->tc.time_cycles = tc->time_cycles;
3395 		etm->tc.time_mask = tc->time_mask;
3396 		etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
3397 		etm->tc.cap_user_time_short = tc->cap_user_time_short;
3398 	}
3399 	err = cs_etm__synth_events(etm, session);
3400 	if (err)
3401 		goto err_free_queues;
3402 
3403 	/*
3404 	 * Map Trace ID values to CPU metadata.
3405 	 *
3406 	 * Trace metadata will always contain Trace ID values from the legacy algorithm. If the
3407 	 * files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata
3408 	 * ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set.
3409 	 *
3410 	 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
3411 	 * the same IDs as the old algorithm as far as is possible, unless there are clashes
3412 	 * in which case a different value will be used. This means an older perf may still
3413 	 * be able to record and read files generate on a newer system.
3414 	 *
3415 	 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of
3416 	 * those packets. If they are there then the values will be mapped and plugged into
3417 	 * the metadata. We then set any remaining metadata values with the used flag to a
3418 	 * value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required.
3419 	 *
3420 	 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
3421 	 * then we map Trace ID values to CPU directly from the metadata - clearing any unused
3422 	 * flags if present.
3423 	 */
3424 
3425 	/* first scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
3426 	aux_hw_id_found = 0;
3427 	err = perf_session__peek_events(session, session->header.data_offset,
3428 					session->header.data_size,
3429 					cs_etm__process_aux_hw_id_cb, &aux_hw_id_found);
3430 	if (err)
3431 		goto err_free_queues;
3432 
3433 	/* if HW ID found then clear any unused metadata ID values */
3434 	if (aux_hw_id_found)
3435 		err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata);
3436 	/* otherwise, this is a file with metadata values only, map from metadata */
3437 	else
3438 		err = cs_etm__map_trace_ids_metadata(num_cpu, metadata);
3439 
3440 	if (err)
3441 		goto err_free_queues;
3442 
3443 	err = cs_etm__queue_aux_records(session);
3444 	if (err)
3445 		goto err_free_queues;
3446 
3447 	etm->data_queued = etm->queues.populated;
3448 	return 0;
3449 
3450 err_free_queues:
3451 	auxtrace_queues__free(&etm->queues);
3452 	session->auxtrace = NULL;
3453 err_free_etm:
3454 	zfree(&etm);
3455 err_free_metadata:
3456 	/* No need to check @metadata[j], free(NULL) is supported */
3457 	for (j = 0; j < num_cpu; j++)
3458 		zfree(&metadata[j]);
3459 	zfree(&metadata);
3460 err_free_traceid_list:
3461 	intlist__delete(traceid_list);
3462 	return err;
3463 }
3464