xref: /linux/tools/perf/util/cs-etm.c (revision cdd30ebb1b9f36159d66f088b61aee264e649d7a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright(C) 2015-2018 Linaro Limited.
4  *
5  * Author: Tor Jeremiassen <tor@ti.com>
6  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
7  */
8 
9 #include <linux/kernel.h>
10 #include <linux/bitfield.h>
11 #include <linux/bitops.h>
12 #include <linux/coresight-pmu.h>
13 #include <linux/err.h>
14 #include <linux/log2.h>
15 #include <linux/types.h>
16 #include <linux/zalloc.h>
17 
18 #include <stdlib.h>
19 
20 #include "auxtrace.h"
21 #include "color.h"
22 #include "cs-etm.h"
23 #include "cs-etm-decoder/cs-etm-decoder.h"
24 #include "debug.h"
25 #include "dso.h"
26 #include "evlist.h"
27 #include "intlist.h"
28 #include "machine.h"
29 #include "map.h"
30 #include "perf.h"
31 #include "session.h"
32 #include "map_symbol.h"
33 #include "branch.h"
34 #include "symbol.h"
35 #include "tool.h"
36 #include "thread.h"
37 #include "thread-stack.h"
38 #include "tsc.h"
39 #include <tools/libc_compat.h>
40 #include "util/synthetic-events.h"
41 #include "util/util.h"
42 
43 struct cs_etm_auxtrace {
44 	struct auxtrace auxtrace;
45 	struct auxtrace_queues queues;
46 	struct auxtrace_heap heap;
47 	struct itrace_synth_opts synth_opts;
48 	struct perf_session *session;
49 	struct perf_tsc_conversion tc;
50 
51 	/*
52 	 * Timeless has no timestamps in the trace so overlapping mmap lookups
53 	 * are less accurate but produces smaller trace data. We use context IDs
54 	 * in the trace instead of matching timestamps with fork records so
55 	 * they're not really needed in the general case. Overlapping mmaps
56 	 * happen in cases like between a fork and an exec.
57 	 */
58 	bool timeless_decoding;
59 
60 	/*
61 	 * Per-thread ignores the trace channel ID and instead assumes that
62 	 * everything in a buffer comes from the same process regardless of
63 	 * which CPU it ran on. It also implies no context IDs so the TID is
64 	 * taken from the auxtrace buffer.
65 	 */
66 	bool per_thread_decoding;
67 	bool snapshot_mode;
68 	bool data_queued;
69 	bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
70 
71 	int num_cpu;
72 	u64 latest_kernel_timestamp;
73 	u32 auxtrace_type;
74 	u64 branches_sample_type;
75 	u64 branches_id;
76 	u64 instructions_sample_type;
77 	u64 instructions_sample_period;
78 	u64 instructions_id;
79 	u64 **metadata;
80 	unsigned int pmu_type;
81 	enum cs_etm_pid_fmt pid_fmt;
82 };
83 
84 struct cs_etm_traceid_queue {
85 	u8 trace_chan_id;
86 	u64 period_instructions;
87 	size_t last_branch_pos;
88 	union perf_event *event_buf;
89 	struct thread *thread;
90 	struct thread *prev_packet_thread;
91 	ocsd_ex_level prev_packet_el;
92 	ocsd_ex_level el;
93 	struct branch_stack *last_branch;
94 	struct branch_stack *last_branch_rb;
95 	struct cs_etm_packet *prev_packet;
96 	struct cs_etm_packet *packet;
97 	struct cs_etm_packet_queue packet_queue;
98 };
99 
100 enum cs_etm_format {
101 	UNSET,
102 	FORMATTED,
103 	UNFORMATTED
104 };
105 
106 struct cs_etm_queue {
107 	struct cs_etm_auxtrace *etm;
108 	struct cs_etm_decoder *decoder;
109 	struct auxtrace_buffer *buffer;
110 	unsigned int queue_nr;
111 	u8 pending_timestamp_chan_id;
112 	enum cs_etm_format format;
113 	u64 offset;
114 	const unsigned char *buf;
115 	size_t buf_len, buf_used;
116 	/* Conversion between traceID and index in traceid_queues array */
117 	struct intlist *traceid_queues_list;
118 	struct cs_etm_traceid_queue **traceid_queues;
119 	/* Conversion between traceID and metadata pointers */
120 	struct intlist *traceid_list;
121 	/*
122 	 * Same as traceid_list, but traceid_list may be a reference to another
123 	 * queue's which has a matching sink ID.
124 	 */
125 	struct intlist *own_traceid_list;
126 	u32 sink_id;
127 };
128 
129 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
130 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
131 					   pid_t tid);
132 static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
133 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
134 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata);
135 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu);
136 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata);
137 
138 /* PTMs ETMIDR [11:8] set to b0011 */
139 #define ETMIDR_PTM_VERSION 0x00000300
140 
141 /*
142  * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
143  * work with.  One option is to modify to auxtrace_heap_XYZ() API or simply
144  * encode the etm queue number as the upper 16 bit and the channel as
145  * the lower 16 bit.
146  */
147 #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id)	\
148 		      (queue_nr << 16 | trace_chan_id)
149 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
150 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
151 #define SINK_UNSET ((u32) -1)
152 
153 static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
154 {
155 	etmidr &= ETMIDR_PTM_VERSION;
156 
157 	if (etmidr == ETMIDR_PTM_VERSION)
158 		return CS_ETM_PROTO_PTM;
159 
160 	return CS_ETM_PROTO_ETMV3;
161 }
162 
163 static int cs_etm__get_magic(struct cs_etm_queue *etmq, u8 trace_chan_id, u64 *magic)
164 {
165 	struct int_node *inode;
166 	u64 *metadata;
167 
168 	inode = intlist__find(etmq->traceid_list, trace_chan_id);
169 	if (!inode)
170 		return -EINVAL;
171 
172 	metadata = inode->priv;
173 	*magic = metadata[CS_ETM_MAGIC];
174 	return 0;
175 }
176 
177 int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu)
178 {
179 	struct int_node *inode;
180 	u64 *metadata;
181 
182 	inode = intlist__find(etmq->traceid_list, trace_chan_id);
183 	if (!inode)
184 		return -EINVAL;
185 
186 	metadata = inode->priv;
187 	*cpu = (int)metadata[CS_ETM_CPU];
188 	return 0;
189 }
190 
191 /*
192  * The returned PID format is presented as an enum:
193  *
194  *   CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced.
195  *   CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced.
196  *   CS_ETM_PIDFMT_NONE: No context IDs
197  *
198  * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
199  * are enabled at the same time when the session runs on an EL2 kernel.
200  * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
201  * recorded in the trace data, the tool will selectively use
202  * CONTEXTIDR_EL2 as PID.
203  *
204  * The result is cached in etm->pid_fmt so this function only needs to be called
205  * when processing the aux info.
206  */
207 static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata)
208 {
209 	u64 val;
210 
211 	if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
212 		val = metadata[CS_ETM_ETMCR];
213 		/* CONTEXTIDR is traced */
214 		if (val & BIT(ETM_OPT_CTXTID))
215 			return CS_ETM_PIDFMT_CTXTID;
216 	} else {
217 		val = metadata[CS_ETMV4_TRCCONFIGR];
218 		/* CONTEXTIDR_EL2 is traced */
219 		if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
220 			return CS_ETM_PIDFMT_CTXTID2;
221 		/* CONTEXTIDR_EL1 is traced */
222 		else if (val & BIT(ETM4_CFG_BIT_CTXTID))
223 			return CS_ETM_PIDFMT_CTXTID;
224 	}
225 
226 	return CS_ETM_PIDFMT_NONE;
227 }
228 
229 enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
230 {
231 	return etmq->etm->pid_fmt;
232 }
233 
234 static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq,
235 					u8 trace_chan_id, u64 *cpu_metadata)
236 {
237 	/* Get an RB node for this CPU */
238 	struct int_node *inode = intlist__findnew(etmq->traceid_list, trace_chan_id);
239 
240 	/* Something went wrong, no need to continue */
241 	if (!inode)
242 		return -ENOMEM;
243 
244 	/* Disallow re-mapping a different traceID to metadata pair. */
245 	if (inode->priv) {
246 		u64 *curr_cpu_data = inode->priv;
247 		u8 curr_chan_id;
248 		int err;
249 
250 		if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) {
251 			/*
252 			 * With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs
253 			 * are expected (but not supported) in per-thread mode,
254 			 * rather than signifying an error.
255 			 */
256 			if (etmq->etm->per_thread_decoding)
257 				pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n");
258 			else
259 				pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
260 
261 			return -EINVAL;
262 		}
263 
264 		/* check that the mapped ID matches */
265 		err = cs_etm__metadata_get_trace_id(&curr_chan_id, curr_cpu_data);
266 		if (err)
267 			return err;
268 
269 		if (curr_chan_id != trace_chan_id) {
270 			pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
271 			return -EINVAL;
272 		}
273 
274 		/* Skip re-adding the same mappings if everything matched */
275 		return 0;
276 	}
277 
278 	/* Not one we've seen before, associate the traceID with the metadata pointer */
279 	inode->priv = cpu_metadata;
280 
281 	return 0;
282 }
283 
284 static struct cs_etm_queue *cs_etm__get_queue(struct cs_etm_auxtrace *etm, int cpu)
285 {
286 	if (etm->per_thread_decoding)
287 		return etm->queues.queue_array[0].priv;
288 	else
289 		return etm->queues.queue_array[cpu].priv;
290 }
291 
292 static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace *etm, u8 trace_chan_id,
293 				   u64 *cpu_metadata)
294 {
295 	struct cs_etm_queue *etmq;
296 
297 	/*
298 	 * If the queue is unformatted then only save one mapping in the
299 	 * queue associated with that CPU so only one decoder is made.
300 	 */
301 	etmq = cs_etm__get_queue(etm, cpu_metadata[CS_ETM_CPU]);
302 	if (etmq->format == UNFORMATTED)
303 		return cs_etm__insert_trace_id_node(etmq, trace_chan_id,
304 						    cpu_metadata);
305 
306 	/*
307 	 * Otherwise, version 0 trace IDs are global so save them into every
308 	 * queue.
309 	 */
310 	for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
311 		int ret;
312 
313 		etmq = etm->queues.queue_array[i].priv;
314 		ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id,
315 						   cpu_metadata);
316 		if (ret)
317 			return ret;
318 	}
319 
320 	return 0;
321 }
322 
323 static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu,
324 				       u64 hw_id)
325 {
326 	int err;
327 	u64 *cpu_data;
328 	u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
329 
330 	cpu_data = get_cpu_data(etm, cpu);
331 	if (cpu_data == NULL)
332 		return -EINVAL;
333 
334 	err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_data);
335 	if (err)
336 		return err;
337 
338 	/*
339 	 * if we are picking up the association from the packet, need to plug
340 	 * the correct trace ID into the metadata for setting up decoders later.
341 	 */
342 	return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
343 }
344 
345 static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace *etm, int cpu,
346 					 u64 hw_id)
347 {
348 	struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu);
349 	int ret;
350 	u64 *cpu_data;
351 	u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id);
352 	u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
353 
354 	/*
355 	 * Check sink id hasn't changed in per-cpu mode. In per-thread mode,
356 	 * let it pass for now until an actual overlapping trace ID is hit. In
357 	 * most cases IDs won't overlap even if the sink changes.
358 	 */
359 	if (!etmq->etm->per_thread_decoding && etmq->sink_id != SINK_UNSET &&
360 	    etmq->sink_id != sink_id) {
361 		pr_err("CS_ETM: mismatch between sink IDs\n");
362 		return -EINVAL;
363 	}
364 
365 	etmq->sink_id = sink_id;
366 
367 	/* Find which other queues use this sink and link their ID maps */
368 	for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
369 		struct cs_etm_queue *other_etmq = etm->queues.queue_array[i].priv;
370 
371 		/* Different sinks, skip */
372 		if (other_etmq->sink_id != etmq->sink_id)
373 			continue;
374 
375 		/* Already linked, skip */
376 		if (other_etmq->traceid_list == etmq->traceid_list)
377 			continue;
378 
379 		/* At the point of first linking, this one should be empty */
380 		if (!intlist__empty(etmq->traceid_list)) {
381 			pr_err("CS_ETM: Can't link populated trace ID lists\n");
382 			return -EINVAL;
383 		}
384 
385 		etmq->own_traceid_list = NULL;
386 		intlist__delete(etmq->traceid_list);
387 		etmq->traceid_list = other_etmq->traceid_list;
388 		break;
389 	}
390 
391 	cpu_data = get_cpu_data(etm, cpu);
392 	ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data);
393 	if (ret)
394 		return ret;
395 
396 	ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data);
397 	if (ret)
398 		return ret;
399 
400 	return 0;
401 }
402 
403 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
404 {
405 	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
406 
407 	switch (cs_etm_magic) {
408 	case __perf_cs_etmv3_magic:
409 		*trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
410 				      CORESIGHT_TRACE_ID_VAL_MASK);
411 		break;
412 	case __perf_cs_etmv4_magic:
413 	case __perf_cs_ete_magic:
414 		*trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
415 				      CORESIGHT_TRACE_ID_VAL_MASK);
416 		break;
417 	default:
418 		return -EINVAL;
419 	}
420 	return 0;
421 }
422 
423 /*
424  * update metadata trace ID from the value found in the AUX_HW_INFO packet.
425  */
426 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
427 {
428 	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
429 
430 	switch (cs_etm_magic) {
431 	case __perf_cs_etmv3_magic:
432 		 cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id;
433 		break;
434 	case __perf_cs_etmv4_magic:
435 	case __perf_cs_ete_magic:
436 		cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id;
437 		break;
438 
439 	default:
440 		return -EINVAL;
441 	}
442 	return 0;
443 }
444 
445 /*
446  * Get a metadata index for a specific cpu from an array.
447  *
448  */
449 static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu)
450 {
451 	int i;
452 
453 	for (i = 0; i < etm->num_cpu; i++) {
454 		if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) {
455 			return i;
456 		}
457 	}
458 
459 	return -1;
460 }
461 
462 /*
463  * Get a metadata for a specific cpu from an array.
464  *
465  */
466 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu)
467 {
468 	int idx = get_cpu_data_idx(etm, cpu);
469 
470 	return (idx != -1) ? etm->metadata[idx] : NULL;
471 }
472 
473 /*
474  * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
475  *
476  * The payload associates the Trace ID and the CPU.
477  * The routine is tolerant of seeing multiple packets with the same association,
478  * but a CPU / Trace ID association changing during a session is an error.
479  */
480 static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
481 					    union perf_event *event)
482 {
483 	struct cs_etm_auxtrace *etm;
484 	struct perf_sample sample;
485 	struct evsel *evsel;
486 	u64 hw_id;
487 	int cpu, version, err;
488 
489 	/* extract and parse the HW ID */
490 	hw_id = event->aux_output_hw_id.hw_id;
491 	version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id);
492 
493 	/* check that we can handle this version */
494 	if (version > CS_AUX_HW_ID_MAJOR_VERSION) {
495 		pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n",
496 		       version);
497 		return -EINVAL;
498 	}
499 
500 	/* get access to the etm metadata */
501 	etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
502 	if (!etm || !etm->metadata)
503 		return -EINVAL;
504 
505 	/* parse the sample to get the CPU */
506 	evsel = evlist__event2evsel(session->evlist, event);
507 	if (!evsel)
508 		return -EINVAL;
509 	err = evsel__parse_sample(evsel, event, &sample);
510 	if (err)
511 		return err;
512 	cpu = sample.cpu;
513 	if (cpu == -1) {
514 		/* no CPU in the sample - possibly recorded with an old version of perf */
515 		pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
516 		return -EINVAL;
517 	}
518 
519 	if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0)
520 		return cs_etm__process_trace_id_v0(etm, cpu, hw_id);
521 
522 	return cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
523 }
524 
525 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
526 					      u8 trace_chan_id)
527 {
528 	/*
529 	 * When a timestamp packet is encountered the backend code
530 	 * is stopped so that the front end has time to process packets
531 	 * that were accumulated in the traceID queue.  Since there can
532 	 * be more than one channel per cs_etm_queue, we need to specify
533 	 * what traceID queue needs servicing.
534 	 */
535 	etmq->pending_timestamp_chan_id = trace_chan_id;
536 }
537 
538 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
539 				      u8 *trace_chan_id)
540 {
541 	struct cs_etm_packet_queue *packet_queue;
542 
543 	if (!etmq->pending_timestamp_chan_id)
544 		return 0;
545 
546 	if (trace_chan_id)
547 		*trace_chan_id = etmq->pending_timestamp_chan_id;
548 
549 	packet_queue = cs_etm__etmq_get_packet_queue(etmq,
550 						     etmq->pending_timestamp_chan_id);
551 	if (!packet_queue)
552 		return 0;
553 
554 	/* Acknowledge pending status */
555 	etmq->pending_timestamp_chan_id = 0;
556 
557 	/* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
558 	return packet_queue->cs_timestamp;
559 }
560 
561 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
562 {
563 	int i;
564 
565 	queue->head = 0;
566 	queue->tail = 0;
567 	queue->packet_count = 0;
568 	for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
569 		queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
570 		queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
571 		queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
572 		queue->packet_buffer[i].instr_count = 0;
573 		queue->packet_buffer[i].last_instr_taken_branch = false;
574 		queue->packet_buffer[i].last_instr_size = 0;
575 		queue->packet_buffer[i].last_instr_type = 0;
576 		queue->packet_buffer[i].last_instr_subtype = 0;
577 		queue->packet_buffer[i].last_instr_cond = 0;
578 		queue->packet_buffer[i].flags = 0;
579 		queue->packet_buffer[i].exception_number = UINT32_MAX;
580 		queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
581 		queue->packet_buffer[i].cpu = INT_MIN;
582 	}
583 }
584 
585 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
586 {
587 	int idx;
588 	struct int_node *inode;
589 	struct cs_etm_traceid_queue *tidq;
590 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
591 
592 	intlist__for_each_entry(inode, traceid_queues_list) {
593 		idx = (int)(intptr_t)inode->priv;
594 		tidq = etmq->traceid_queues[idx];
595 		cs_etm__clear_packet_queue(&tidq->packet_queue);
596 	}
597 }
598 
599 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
600 				      struct cs_etm_traceid_queue *tidq,
601 				      u8 trace_chan_id)
602 {
603 	int rc = -ENOMEM;
604 	struct auxtrace_queue *queue;
605 	struct cs_etm_auxtrace *etm = etmq->etm;
606 
607 	cs_etm__clear_packet_queue(&tidq->packet_queue);
608 
609 	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
610 	tidq->trace_chan_id = trace_chan_id;
611 	tidq->el = tidq->prev_packet_el = ocsd_EL_unknown;
612 	tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
613 					       queue->tid);
614 	tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host);
615 
616 	tidq->packet = zalloc(sizeof(struct cs_etm_packet));
617 	if (!tidq->packet)
618 		goto out;
619 
620 	tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
621 	if (!tidq->prev_packet)
622 		goto out_free;
623 
624 	if (etm->synth_opts.last_branch) {
625 		size_t sz = sizeof(struct branch_stack);
626 
627 		sz += etm->synth_opts.last_branch_sz *
628 		      sizeof(struct branch_entry);
629 		tidq->last_branch = zalloc(sz);
630 		if (!tidq->last_branch)
631 			goto out_free;
632 		tidq->last_branch_rb = zalloc(sz);
633 		if (!tidq->last_branch_rb)
634 			goto out_free;
635 	}
636 
637 	tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
638 	if (!tidq->event_buf)
639 		goto out_free;
640 
641 	return 0;
642 
643 out_free:
644 	zfree(&tidq->last_branch_rb);
645 	zfree(&tidq->last_branch);
646 	zfree(&tidq->prev_packet);
647 	zfree(&tidq->packet);
648 out:
649 	return rc;
650 }
651 
652 static struct cs_etm_traceid_queue
653 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
654 {
655 	int idx;
656 	struct int_node *inode;
657 	struct intlist *traceid_queues_list;
658 	struct cs_etm_traceid_queue *tidq, **traceid_queues;
659 	struct cs_etm_auxtrace *etm = etmq->etm;
660 
661 	if (etm->per_thread_decoding)
662 		trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
663 
664 	traceid_queues_list = etmq->traceid_queues_list;
665 
666 	/*
667 	 * Check if the traceid_queue exist for this traceID by looking
668 	 * in the queue list.
669 	 */
670 	inode = intlist__find(traceid_queues_list, trace_chan_id);
671 	if (inode) {
672 		idx = (int)(intptr_t)inode->priv;
673 		return etmq->traceid_queues[idx];
674 	}
675 
676 	/* We couldn't find a traceid_queue for this traceID, allocate one */
677 	tidq = malloc(sizeof(*tidq));
678 	if (!tidq)
679 		return NULL;
680 
681 	memset(tidq, 0, sizeof(*tidq));
682 
683 	/* Get a valid index for the new traceid_queue */
684 	idx = intlist__nr_entries(traceid_queues_list);
685 	/* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
686 	inode = intlist__findnew(traceid_queues_list, trace_chan_id);
687 	if (!inode)
688 		goto out_free;
689 
690 	/* Associate this traceID with this index */
691 	inode->priv = (void *)(intptr_t)idx;
692 
693 	if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
694 		goto out_free;
695 
696 	/* Grow the traceid_queues array by one unit */
697 	traceid_queues = etmq->traceid_queues;
698 	traceid_queues = reallocarray(traceid_queues,
699 				      idx + 1,
700 				      sizeof(*traceid_queues));
701 
702 	/*
703 	 * On failure reallocarray() returns NULL and the original block of
704 	 * memory is left untouched.
705 	 */
706 	if (!traceid_queues)
707 		goto out_free;
708 
709 	traceid_queues[idx] = tidq;
710 	etmq->traceid_queues = traceid_queues;
711 
712 	return etmq->traceid_queues[idx];
713 
714 out_free:
715 	/*
716 	 * Function intlist__remove() removes the inode from the list
717 	 * and delete the memory associated to it.
718 	 */
719 	intlist__remove(traceid_queues_list, inode);
720 	free(tidq);
721 
722 	return NULL;
723 }
724 
725 struct cs_etm_packet_queue
726 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
727 {
728 	struct cs_etm_traceid_queue *tidq;
729 
730 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
731 	if (tidq)
732 		return &tidq->packet_queue;
733 
734 	return NULL;
735 }
736 
737 static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
738 				struct cs_etm_traceid_queue *tidq)
739 {
740 	struct cs_etm_packet *tmp;
741 
742 	if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
743 	    etm->synth_opts.instructions) {
744 		/*
745 		 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
746 		 * the next incoming packet.
747 		 *
748 		 * Threads and exception levels are also tracked for both the
749 		 * previous and current packets. This is because the previous
750 		 * packet is used for the 'from' IP for branch samples, so the
751 		 * thread at that time must also be assigned to that sample.
752 		 * Across discontinuity packets the thread can change, so by
753 		 * tracking the thread for the previous packet the branch sample
754 		 * will have the correct info.
755 		 */
756 		tmp = tidq->packet;
757 		tidq->packet = tidq->prev_packet;
758 		tidq->prev_packet = tmp;
759 		tidq->prev_packet_el = tidq->el;
760 		thread__put(tidq->prev_packet_thread);
761 		tidq->prev_packet_thread = thread__get(tidq->thread);
762 	}
763 }
764 
765 static void cs_etm__packet_dump(const char *pkt_string, void *data)
766 {
767 	const char *color = PERF_COLOR_BLUE;
768 	int len = strlen(pkt_string);
769 	struct cs_etm_queue *etmq = data;
770 	char queue_nr[64];
771 
772 	if (verbose)
773 		snprintf(queue_nr, sizeof(queue_nr), "Qnr:%d; ", etmq->queue_nr);
774 	else
775 		queue_nr[0] = '\0';
776 
777 	if (len && (pkt_string[len-1] == '\n'))
778 		color_fprintf(stdout, color, "	%s%s", queue_nr, pkt_string);
779 	else
780 		color_fprintf(stdout, color, "	%s%s\n", queue_nr, pkt_string);
781 
782 	fflush(stdout);
783 }
784 
785 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
786 					  u64 *metadata, u32 etmidr)
787 {
788 	t_params->protocol = cs_etm__get_v7_protocol_version(etmidr);
789 	t_params->etmv3.reg_ctrl = metadata[CS_ETM_ETMCR];
790 	t_params->etmv3.reg_trc_id = metadata[CS_ETM_ETMTRACEIDR];
791 }
792 
793 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
794 					  u64 *metadata)
795 {
796 	t_params->protocol = CS_ETM_PROTO_ETMV4i;
797 	t_params->etmv4.reg_idr0 = metadata[CS_ETMV4_TRCIDR0];
798 	t_params->etmv4.reg_idr1 = metadata[CS_ETMV4_TRCIDR1];
799 	t_params->etmv4.reg_idr2 = metadata[CS_ETMV4_TRCIDR2];
800 	t_params->etmv4.reg_idr8 = metadata[CS_ETMV4_TRCIDR8];
801 	t_params->etmv4.reg_configr = metadata[CS_ETMV4_TRCCONFIGR];
802 	t_params->etmv4.reg_traceidr = metadata[CS_ETMV4_TRCTRACEIDR];
803 }
804 
805 static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
806 					u64 *metadata)
807 {
808 	t_params->protocol = CS_ETM_PROTO_ETE;
809 	t_params->ete.reg_idr0 = metadata[CS_ETE_TRCIDR0];
810 	t_params->ete.reg_idr1 = metadata[CS_ETE_TRCIDR1];
811 	t_params->ete.reg_idr2 = metadata[CS_ETE_TRCIDR2];
812 	t_params->ete.reg_idr8 = metadata[CS_ETE_TRCIDR8];
813 	t_params->ete.reg_configr = metadata[CS_ETE_TRCCONFIGR];
814 	t_params->ete.reg_traceidr = metadata[CS_ETE_TRCTRACEIDR];
815 	t_params->ete.reg_devarch = metadata[CS_ETE_TRCDEVARCH];
816 }
817 
818 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
819 				     struct cs_etm_queue *etmq)
820 {
821 	struct int_node *inode;
822 
823 	intlist__for_each_entry(inode, etmq->traceid_list) {
824 		u64 *metadata = inode->priv;
825 		u64 architecture = metadata[CS_ETM_MAGIC];
826 		u32 etmidr;
827 
828 		switch (architecture) {
829 		case __perf_cs_etmv3_magic:
830 			etmidr = metadata[CS_ETM_ETMIDR];
831 			cs_etm__set_trace_param_etmv3(t_params++, metadata, etmidr);
832 			break;
833 		case __perf_cs_etmv4_magic:
834 			cs_etm__set_trace_param_etmv4(t_params++, metadata);
835 			break;
836 		case __perf_cs_ete_magic:
837 			cs_etm__set_trace_param_ete(t_params++, metadata);
838 			break;
839 		default:
840 			return -EINVAL;
841 		}
842 	}
843 
844 	return 0;
845 }
846 
847 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
848 				       struct cs_etm_queue *etmq,
849 				       enum cs_etm_decoder_operation mode)
850 {
851 	int ret = -EINVAL;
852 
853 	if (!(mode < CS_ETM_OPERATION_MAX))
854 		goto out;
855 
856 	d_params->packet_printer = cs_etm__packet_dump;
857 	d_params->operation = mode;
858 	d_params->data = etmq;
859 	d_params->formatted = etmq->format == FORMATTED;
860 	d_params->fsyncs = false;
861 	d_params->hsyncs = false;
862 	d_params->frame_aligned = true;
863 
864 	ret = 0;
865 out:
866 	return ret;
867 }
868 
869 static void cs_etm__dump_event(struct cs_etm_queue *etmq,
870 			       struct auxtrace_buffer *buffer)
871 {
872 	int ret;
873 	const char *color = PERF_COLOR_BLUE;
874 	size_t buffer_used = 0;
875 
876 	fprintf(stdout, "\n");
877 	color_fprintf(stdout, color,
878 		     ". ... CoreSight %s Trace data: size %#zx bytes\n",
879 		     cs_etm_decoder__get_name(etmq->decoder), buffer->size);
880 
881 	do {
882 		size_t consumed;
883 
884 		ret = cs_etm_decoder__process_data_block(
885 				etmq->decoder, buffer->offset,
886 				&((u8 *)buffer->data)[buffer_used],
887 				buffer->size - buffer_used, &consumed);
888 		if (ret)
889 			break;
890 
891 		buffer_used += consumed;
892 	} while (buffer_used < buffer->size);
893 
894 	cs_etm_decoder__reset(etmq->decoder);
895 }
896 
897 static int cs_etm__flush_events(struct perf_session *session,
898 				const struct perf_tool *tool)
899 {
900 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
901 						   struct cs_etm_auxtrace,
902 						   auxtrace);
903 	if (dump_trace)
904 		return 0;
905 
906 	if (!tool->ordered_events)
907 		return -EINVAL;
908 
909 	if (etm->timeless_decoding) {
910 		/*
911 		 * Pass tid = -1 to process all queues. But likely they will have
912 		 * already been processed on PERF_RECORD_EXIT anyway.
913 		 */
914 		return cs_etm__process_timeless_queues(etm, -1);
915 	}
916 
917 	return cs_etm__process_timestamped_queues(etm);
918 }
919 
920 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
921 {
922 	int idx;
923 	uintptr_t priv;
924 	struct int_node *inode, *tmp;
925 	struct cs_etm_traceid_queue *tidq;
926 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
927 
928 	intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
929 		priv = (uintptr_t)inode->priv;
930 		idx = priv;
931 
932 		/* Free this traceid_queue from the array */
933 		tidq = etmq->traceid_queues[idx];
934 		thread__zput(tidq->thread);
935 		thread__zput(tidq->prev_packet_thread);
936 		zfree(&tidq->event_buf);
937 		zfree(&tidq->last_branch);
938 		zfree(&tidq->last_branch_rb);
939 		zfree(&tidq->prev_packet);
940 		zfree(&tidq->packet);
941 		zfree(&tidq);
942 
943 		/*
944 		 * Function intlist__remove() removes the inode from the list
945 		 * and delete the memory associated to it.
946 		 */
947 		intlist__remove(traceid_queues_list, inode);
948 	}
949 
950 	/* Then the RB tree itself */
951 	intlist__delete(traceid_queues_list);
952 	etmq->traceid_queues_list = NULL;
953 
954 	/* finally free the traceid_queues array */
955 	zfree(&etmq->traceid_queues);
956 }
957 
958 static void cs_etm__free_queue(void *priv)
959 {
960 	struct int_node *inode, *tmp;
961 	struct cs_etm_queue *etmq = priv;
962 
963 	if (!etmq)
964 		return;
965 
966 	cs_etm_decoder__free(etmq->decoder);
967 	cs_etm__free_traceid_queues(etmq);
968 
969 	if (etmq->own_traceid_list) {
970 		/* First remove all traceID/metadata nodes for the RB tree */
971 		intlist__for_each_entry_safe(inode, tmp, etmq->own_traceid_list)
972 			intlist__remove(etmq->own_traceid_list, inode);
973 
974 		/* Then the RB tree itself */
975 		intlist__delete(etmq->own_traceid_list);
976 	}
977 
978 	free(etmq);
979 }
980 
981 static void cs_etm__free_events(struct perf_session *session)
982 {
983 	unsigned int i;
984 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
985 						   struct cs_etm_auxtrace,
986 						   auxtrace);
987 	struct auxtrace_queues *queues = &aux->queues;
988 
989 	for (i = 0; i < queues->nr_queues; i++) {
990 		cs_etm__free_queue(queues->queue_array[i].priv);
991 		queues->queue_array[i].priv = NULL;
992 	}
993 
994 	auxtrace_queues__free(queues);
995 }
996 
997 static void cs_etm__free(struct perf_session *session)
998 {
999 	int i;
1000 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
1001 						   struct cs_etm_auxtrace,
1002 						   auxtrace);
1003 	cs_etm__free_events(session);
1004 	session->auxtrace = NULL;
1005 
1006 	for (i = 0; i < aux->num_cpu; i++)
1007 		zfree(&aux->metadata[i]);
1008 
1009 	zfree(&aux->metadata);
1010 	zfree(&aux);
1011 }
1012 
1013 static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
1014 				      struct evsel *evsel)
1015 {
1016 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
1017 						   struct cs_etm_auxtrace,
1018 						   auxtrace);
1019 
1020 	return evsel->core.attr.type == aux->pmu_type;
1021 }
1022 
1023 static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq,
1024 					   ocsd_ex_level el)
1025 {
1026 	enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq);
1027 
1028 	/*
1029 	 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels
1030 	 * running at EL1 assume everything is the host.
1031 	 */
1032 	if (pid_fmt == CS_ETM_PIDFMT_CTXTID)
1033 		return &etmq->etm->session->machines.host;
1034 
1035 	/*
1036 	 * Not perfect, but otherwise assume anything in EL1 is the default
1037 	 * guest, and everything else is the host. Distinguishing between guest
1038 	 * and host userspaces isn't currently supported either. Neither is
1039 	 * multiple guest support. All this does is reduce the likeliness of
1040 	 * decode errors where we look into the host kernel maps when it should
1041 	 * have been the guest maps.
1042 	 */
1043 	switch (el) {
1044 	case ocsd_EL1:
1045 		return machines__find_guest(&etmq->etm->session->machines,
1046 					    DEFAULT_GUEST_KERNEL_ID);
1047 	case ocsd_EL3:
1048 	case ocsd_EL2:
1049 	case ocsd_EL0:
1050 	case ocsd_EL_unknown:
1051 	default:
1052 		return &etmq->etm->session->machines.host;
1053 	}
1054 }
1055 
1056 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address,
1057 			   ocsd_ex_level el)
1058 {
1059 	struct machine *machine = cs_etm__get_machine(etmq, el);
1060 
1061 	if (address >= machine__kernel_start(machine)) {
1062 		if (machine__is_host(machine))
1063 			return PERF_RECORD_MISC_KERNEL;
1064 		else
1065 			return PERF_RECORD_MISC_GUEST_KERNEL;
1066 	} else {
1067 		if (machine__is_host(machine))
1068 			return PERF_RECORD_MISC_USER;
1069 		else {
1070 			/*
1071 			 * Can't really happen at the moment because
1072 			 * cs_etm__get_machine() will always return
1073 			 * machines.host for any non EL1 trace.
1074 			 */
1075 			return PERF_RECORD_MISC_GUEST_USER;
1076 		}
1077 	}
1078 }
1079 
1080 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
1081 			      u64 address, size_t size, u8 *buffer,
1082 			      const ocsd_mem_space_acc_t mem_space)
1083 {
1084 	u8  cpumode;
1085 	u64 offset;
1086 	int len;
1087 	struct addr_location al;
1088 	struct dso *dso;
1089 	struct cs_etm_traceid_queue *tidq;
1090 	int ret = 0;
1091 
1092 	if (!etmq)
1093 		return 0;
1094 
1095 	addr_location__init(&al);
1096 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1097 	if (!tidq)
1098 		goto out;
1099 
1100 	/*
1101 	 * We've already tracked EL along side the PID in cs_etm__set_thread()
1102 	 * so double check that it matches what OpenCSD thinks as well. It
1103 	 * doesn't distinguish between EL0 and EL1 for this mem access callback
1104 	 * so we had to do the extra tracking. Skip validation if it's any of
1105 	 * the 'any' values.
1106 	 */
1107 	if (!(mem_space == OCSD_MEM_SPACE_ANY ||
1108 	      mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) {
1109 		if (mem_space & OCSD_MEM_SPACE_EL1N) {
1110 			/* Includes both non secure EL1 and EL0 */
1111 			assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0);
1112 		} else if (mem_space & OCSD_MEM_SPACE_EL2)
1113 			assert(tidq->el == ocsd_EL2);
1114 		else if (mem_space & OCSD_MEM_SPACE_EL3)
1115 			assert(tidq->el == ocsd_EL3);
1116 	}
1117 
1118 	cpumode = cs_etm__cpu_mode(etmq, address, tidq->el);
1119 
1120 	if (!thread__find_map(tidq->thread, cpumode, address, &al))
1121 		goto out;
1122 
1123 	dso = map__dso(al.map);
1124 	if (!dso)
1125 		goto out;
1126 
1127 	if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR &&
1128 	    dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE))
1129 		goto out;
1130 
1131 	offset = map__map_ip(al.map, address);
1132 
1133 	map__load(al.map);
1134 
1135 	len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)),
1136 				    offset, buffer, size);
1137 
1138 	if (len <= 0) {
1139 		ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
1140 				 "              Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
1141 		if (!dso__auxtrace_warned(dso)) {
1142 			pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
1143 				address,
1144 				dso__long_name(dso) ? dso__long_name(dso) : "Unknown");
1145 			dso__set_auxtrace_warned(dso);
1146 		}
1147 		goto out;
1148 	}
1149 	ret = len;
1150 out:
1151 	addr_location__exit(&al);
1152 	return ret;
1153 }
1154 
1155 static struct cs_etm_queue *cs_etm__alloc_queue(void)
1156 {
1157 	struct cs_etm_queue *etmq = zalloc(sizeof(*etmq));
1158 	if (!etmq)
1159 		return NULL;
1160 
1161 	etmq->traceid_queues_list = intlist__new(NULL);
1162 	if (!etmq->traceid_queues_list)
1163 		goto out_free;
1164 
1165 	/*
1166 	 * Create an RB tree for traceID-metadata tuple.  Since the conversion
1167 	 * has to be made for each packet that gets decoded, optimizing access
1168 	 * in anything other than a sequential array is worth doing.
1169 	 */
1170 	etmq->traceid_list = etmq->own_traceid_list = intlist__new(NULL);
1171 	if (!etmq->traceid_list)
1172 		goto out_free;
1173 
1174 	return etmq;
1175 
1176 out_free:
1177 	intlist__delete(etmq->traceid_queues_list);
1178 	free(etmq);
1179 
1180 	return NULL;
1181 }
1182 
1183 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
1184 			       struct auxtrace_queue *queue,
1185 			       unsigned int queue_nr)
1186 {
1187 	struct cs_etm_queue *etmq = queue->priv;
1188 
1189 	if (etmq)
1190 		return 0;
1191 
1192 	etmq = cs_etm__alloc_queue();
1193 
1194 	if (!etmq)
1195 		return -ENOMEM;
1196 
1197 	queue->priv = etmq;
1198 	etmq->etm = etm;
1199 	etmq->queue_nr = queue_nr;
1200 	queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */
1201 	etmq->offset = 0;
1202 	etmq->sink_id = SINK_UNSET;
1203 
1204 	return 0;
1205 }
1206 
1207 static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
1208 					    struct cs_etm_queue *etmq,
1209 					    unsigned int queue_nr)
1210 {
1211 	int ret = 0;
1212 	unsigned int cs_queue_nr;
1213 	u8 trace_chan_id;
1214 	u64 cs_timestamp;
1215 
1216 	/*
1217 	 * We are under a CPU-wide trace scenario.  As such we need to know
1218 	 * when the code that generated the traces started to execute so that
1219 	 * it can be correlated with execution on other CPUs.  So we get a
1220 	 * handle on the beginning of traces and decode until we find a
1221 	 * timestamp.  The timestamp is then added to the auxtrace min heap
1222 	 * in order to know what nibble (of all the etmqs) to decode first.
1223 	 */
1224 	while (1) {
1225 		/*
1226 		 * Fetch an aux_buffer from this etmq.  Bail if no more
1227 		 * blocks or an error has been encountered.
1228 		 */
1229 		ret = cs_etm__get_data_block(etmq);
1230 		if (ret <= 0)
1231 			goto out;
1232 
1233 		/*
1234 		 * Run decoder on the trace block.  The decoder will stop when
1235 		 * encountering a CS timestamp, a full packet queue or the end of
1236 		 * trace for that block.
1237 		 */
1238 		ret = cs_etm__decode_data_block(etmq);
1239 		if (ret)
1240 			goto out;
1241 
1242 		/*
1243 		 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
1244 		 * the timestamp calculation for us.
1245 		 */
1246 		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
1247 
1248 		/* We found a timestamp, no need to continue. */
1249 		if (cs_timestamp)
1250 			break;
1251 
1252 		/*
1253 		 * We didn't find a timestamp so empty all the traceid packet
1254 		 * queues before looking for another timestamp packet, either
1255 		 * in the current data block or a new one.  Packets that were
1256 		 * just decoded are useless since no timestamp has been
1257 		 * associated with them.  As such simply discard them.
1258 		 */
1259 		cs_etm__clear_all_packet_queues(etmq);
1260 	}
1261 
1262 	/*
1263 	 * We have a timestamp.  Add it to the min heap to reflect when
1264 	 * instructions conveyed by the range packets of this traceID queue
1265 	 * started to execute.  Once the same has been done for all the traceID
1266 	 * queues of each etmq, redenring and decoding can start in
1267 	 * chronological order.
1268 	 *
1269 	 * Note that packets decoded above are still in the traceID's packet
1270 	 * queue and will be processed in cs_etm__process_timestamped_queues().
1271 	 */
1272 	cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
1273 	ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
1274 out:
1275 	return ret;
1276 }
1277 
1278 static inline
1279 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
1280 				 struct cs_etm_traceid_queue *tidq)
1281 {
1282 	struct branch_stack *bs_src = tidq->last_branch_rb;
1283 	struct branch_stack *bs_dst = tidq->last_branch;
1284 	size_t nr = 0;
1285 
1286 	/*
1287 	 * Set the number of records before early exit: ->nr is used to
1288 	 * determine how many branches to copy from ->entries.
1289 	 */
1290 	bs_dst->nr = bs_src->nr;
1291 
1292 	/*
1293 	 * Early exit when there is nothing to copy.
1294 	 */
1295 	if (!bs_src->nr)
1296 		return;
1297 
1298 	/*
1299 	 * As bs_src->entries is a circular buffer, we need to copy from it in
1300 	 * two steps.  First, copy the branches from the most recently inserted
1301 	 * branch ->last_branch_pos until the end of bs_src->entries buffer.
1302 	 */
1303 	nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
1304 	memcpy(&bs_dst->entries[0],
1305 	       &bs_src->entries[tidq->last_branch_pos],
1306 	       sizeof(struct branch_entry) * nr);
1307 
1308 	/*
1309 	 * If we wrapped around at least once, the branches from the beginning
1310 	 * of the bs_src->entries buffer and until the ->last_branch_pos element
1311 	 * are older valid branches: copy them over.  The total number of
1312 	 * branches copied over will be equal to the number of branches asked by
1313 	 * the user in last_branch_sz.
1314 	 */
1315 	if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
1316 		memcpy(&bs_dst->entries[nr],
1317 		       &bs_src->entries[0],
1318 		       sizeof(struct branch_entry) * tidq->last_branch_pos);
1319 	}
1320 }
1321 
1322 static inline
1323 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
1324 {
1325 	tidq->last_branch_pos = 0;
1326 	tidq->last_branch_rb->nr = 0;
1327 }
1328 
1329 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
1330 					 u8 trace_chan_id, u64 addr)
1331 {
1332 	u8 instrBytes[2];
1333 
1334 	cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes),
1335 			   instrBytes, 0);
1336 	/*
1337 	 * T32 instruction size is indicated by bits[15:11] of the first
1338 	 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
1339 	 * denote a 32-bit instruction.
1340 	 */
1341 	return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
1342 }
1343 
1344 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
1345 {
1346 	/*
1347 	 * Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't
1348 	 * appear in samples.
1349 	 */
1350 	if (packet->sample_type == CS_ETM_DISCONTINUITY ||
1351 	    packet->sample_type == CS_ETM_EXCEPTION)
1352 		return 0;
1353 
1354 	return packet->start_addr;
1355 }
1356 
1357 static inline
1358 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
1359 {
1360 	/* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1361 	if (packet->sample_type == CS_ETM_DISCONTINUITY)
1362 		return 0;
1363 
1364 	return packet->end_addr - packet->last_instr_size;
1365 }
1366 
1367 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
1368 				     u64 trace_chan_id,
1369 				     const struct cs_etm_packet *packet,
1370 				     u64 offset)
1371 {
1372 	if (packet->isa == CS_ETM_ISA_T32) {
1373 		u64 addr = packet->start_addr;
1374 
1375 		while (offset) {
1376 			addr += cs_etm__t32_instr_size(etmq,
1377 						       trace_chan_id, addr);
1378 			offset--;
1379 		}
1380 		return addr;
1381 	}
1382 
1383 	/* Assume a 4 byte instruction size (A32/A64) */
1384 	return packet->start_addr + offset * 4;
1385 }
1386 
1387 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
1388 					  struct cs_etm_traceid_queue *tidq)
1389 {
1390 	struct branch_stack *bs = tidq->last_branch_rb;
1391 	struct branch_entry *be;
1392 
1393 	/*
1394 	 * The branches are recorded in a circular buffer in reverse
1395 	 * chronological order: we start recording from the last element of the
1396 	 * buffer down.  After writing the first element of the stack, move the
1397 	 * insert position back to the end of the buffer.
1398 	 */
1399 	if (!tidq->last_branch_pos)
1400 		tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
1401 
1402 	tidq->last_branch_pos -= 1;
1403 
1404 	be       = &bs->entries[tidq->last_branch_pos];
1405 	be->from = cs_etm__last_executed_instr(tidq->prev_packet);
1406 	be->to	 = cs_etm__first_executed_instr(tidq->packet);
1407 	/* No support for mispredict */
1408 	be->flags.mispred = 0;
1409 	be->flags.predicted = 1;
1410 
1411 	/*
1412 	 * Increment bs->nr until reaching the number of last branches asked by
1413 	 * the user on the command line.
1414 	 */
1415 	if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
1416 		bs->nr += 1;
1417 }
1418 
1419 static int cs_etm__inject_event(union perf_event *event,
1420 			       struct perf_sample *sample, u64 type)
1421 {
1422 	event->header.size = perf_event__sample_event_size(sample, type, 0);
1423 	return perf_event__synthesize_sample(event, type, 0, sample);
1424 }
1425 
1426 
1427 static int
1428 cs_etm__get_trace(struct cs_etm_queue *etmq)
1429 {
1430 	struct auxtrace_buffer *aux_buffer = etmq->buffer;
1431 	struct auxtrace_buffer *old_buffer = aux_buffer;
1432 	struct auxtrace_queue *queue;
1433 
1434 	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
1435 
1436 	aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
1437 
1438 	/* If no more data, drop the previous auxtrace_buffer and return */
1439 	if (!aux_buffer) {
1440 		if (old_buffer)
1441 			auxtrace_buffer__drop_data(old_buffer);
1442 		etmq->buf_len = 0;
1443 		return 0;
1444 	}
1445 
1446 	etmq->buffer = aux_buffer;
1447 
1448 	/* If the aux_buffer doesn't have data associated, try to load it */
1449 	if (!aux_buffer->data) {
1450 		/* get the file desc associated with the perf data file */
1451 		int fd = perf_data__fd(etmq->etm->session->data);
1452 
1453 		aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
1454 		if (!aux_buffer->data)
1455 			return -ENOMEM;
1456 	}
1457 
1458 	/* If valid, drop the previous buffer */
1459 	if (old_buffer)
1460 		auxtrace_buffer__drop_data(old_buffer);
1461 
1462 	etmq->buf_used = 0;
1463 	etmq->buf_len = aux_buffer->size;
1464 	etmq->buf = aux_buffer->data;
1465 
1466 	return etmq->buf_len;
1467 }
1468 
1469 static void cs_etm__set_thread(struct cs_etm_queue *etmq,
1470 			       struct cs_etm_traceid_queue *tidq, pid_t tid,
1471 			       ocsd_ex_level el)
1472 {
1473 	struct machine *machine = cs_etm__get_machine(etmq, el);
1474 
1475 	if (tid != -1) {
1476 		thread__zput(tidq->thread);
1477 		tidq->thread = machine__find_thread(machine, -1, tid);
1478 	}
1479 
1480 	/* Couldn't find a known thread */
1481 	if (!tidq->thread)
1482 		tidq->thread = machine__idle_thread(machine);
1483 
1484 	tidq->el = el;
1485 }
1486 
1487 int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
1488 			    u8 trace_chan_id, ocsd_ex_level el)
1489 {
1490 	struct cs_etm_traceid_queue *tidq;
1491 
1492 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1493 	if (!tidq)
1494 		return -EINVAL;
1495 
1496 	cs_etm__set_thread(etmq, tidq, tid, el);
1497 	return 0;
1498 }
1499 
1500 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
1501 {
1502 	return !!etmq->etm->timeless_decoding;
1503 }
1504 
1505 static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
1506 			      u64 trace_chan_id,
1507 			      const struct cs_etm_packet *packet,
1508 			      struct perf_sample *sample)
1509 {
1510 	/*
1511 	 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY
1512 	 * packet, so directly bail out with 'insn_len' = 0.
1513 	 */
1514 	if (packet->sample_type == CS_ETM_DISCONTINUITY) {
1515 		sample->insn_len = 0;
1516 		return;
1517 	}
1518 
1519 	/*
1520 	 * T32 instruction size might be 32-bit or 16-bit, decide by calling
1521 	 * cs_etm__t32_instr_size().
1522 	 */
1523 	if (packet->isa == CS_ETM_ISA_T32)
1524 		sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id,
1525 							  sample->ip);
1526 	/* Otherwise, A64 and A32 instruction size are always 32-bit. */
1527 	else
1528 		sample->insn_len = 4;
1529 
1530 	cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len,
1531 			   (void *)sample->insn, 0);
1532 }
1533 
1534 u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
1535 {
1536 	struct cs_etm_auxtrace *etm = etmq->etm;
1537 
1538 	if (etm->has_virtual_ts)
1539 		return tsc_to_perf_time(cs_timestamp, &etm->tc);
1540 	else
1541 		return cs_timestamp;
1542 }
1543 
1544 static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
1545 					       struct cs_etm_traceid_queue *tidq)
1546 {
1547 	struct cs_etm_auxtrace *etm = etmq->etm;
1548 	struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
1549 
1550 	if (!etm->timeless_decoding && etm->has_virtual_ts)
1551 		return packet_queue->cs_timestamp;
1552 	else
1553 		return etm->latest_kernel_timestamp;
1554 }
1555 
1556 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
1557 					    struct cs_etm_traceid_queue *tidq,
1558 					    u64 addr, u64 period)
1559 {
1560 	int ret = 0;
1561 	struct cs_etm_auxtrace *etm = etmq->etm;
1562 	union perf_event *event = tidq->event_buf;
1563 	struct perf_sample sample = {.ip = 0,};
1564 
1565 	event->sample.header.type = PERF_RECORD_SAMPLE;
1566 	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
1567 	event->sample.header.size = sizeof(struct perf_event_header);
1568 
1569 	/* Set time field based on etm auxtrace config. */
1570 	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1571 
1572 	sample.ip = addr;
1573 	sample.pid = thread__pid(tidq->thread);
1574 	sample.tid = thread__tid(tidq->thread);
1575 	sample.id = etmq->etm->instructions_id;
1576 	sample.stream_id = etmq->etm->instructions_id;
1577 	sample.period = period;
1578 	sample.cpu = tidq->packet->cpu;
1579 	sample.flags = tidq->prev_packet->flags;
1580 	sample.cpumode = event->sample.header.misc;
1581 
1582 	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
1583 
1584 	if (etm->synth_opts.last_branch)
1585 		sample.branch_stack = tidq->last_branch;
1586 
1587 	if (etm->synth_opts.inject) {
1588 		ret = cs_etm__inject_event(event, &sample,
1589 					   etm->instructions_sample_type);
1590 		if (ret)
1591 			return ret;
1592 	}
1593 
1594 	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1595 
1596 	if (ret)
1597 		pr_err(
1598 			"CS ETM Trace: failed to deliver instruction event, error %d\n",
1599 			ret);
1600 
1601 	return ret;
1602 }
1603 
1604 /*
1605  * The cs etm packet encodes an instruction range between a branch target
1606  * and the next taken branch. Generate sample accordingly.
1607  */
1608 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
1609 				       struct cs_etm_traceid_queue *tidq)
1610 {
1611 	int ret = 0;
1612 	struct cs_etm_auxtrace *etm = etmq->etm;
1613 	struct perf_sample sample = {.ip = 0,};
1614 	union perf_event *event = tidq->event_buf;
1615 	struct dummy_branch_stack {
1616 		u64			nr;
1617 		u64			hw_idx;
1618 		struct branch_entry	entries;
1619 	} dummy_bs;
1620 	u64 ip;
1621 
1622 	ip = cs_etm__last_executed_instr(tidq->prev_packet);
1623 
1624 	event->sample.header.type = PERF_RECORD_SAMPLE;
1625 	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip,
1626 						     tidq->prev_packet_el);
1627 	event->sample.header.size = sizeof(struct perf_event_header);
1628 
1629 	/* Set time field based on etm auxtrace config. */
1630 	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1631 
1632 	sample.ip = ip;
1633 	sample.pid = thread__pid(tidq->prev_packet_thread);
1634 	sample.tid = thread__tid(tidq->prev_packet_thread);
1635 	sample.addr = cs_etm__first_executed_instr(tidq->packet);
1636 	sample.id = etmq->etm->branches_id;
1637 	sample.stream_id = etmq->etm->branches_id;
1638 	sample.period = 1;
1639 	sample.cpu = tidq->packet->cpu;
1640 	sample.flags = tidq->prev_packet->flags;
1641 	sample.cpumode = event->sample.header.misc;
1642 
1643 	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet,
1644 			  &sample);
1645 
1646 	/*
1647 	 * perf report cannot handle events without a branch stack
1648 	 */
1649 	if (etm->synth_opts.last_branch) {
1650 		dummy_bs = (struct dummy_branch_stack){
1651 			.nr = 1,
1652 			.hw_idx = -1ULL,
1653 			.entries = {
1654 				.from = sample.ip,
1655 				.to = sample.addr,
1656 			},
1657 		};
1658 		sample.branch_stack = (struct branch_stack *)&dummy_bs;
1659 	}
1660 
1661 	if (etm->synth_opts.inject) {
1662 		ret = cs_etm__inject_event(event, &sample,
1663 					   etm->branches_sample_type);
1664 		if (ret)
1665 			return ret;
1666 	}
1667 
1668 	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1669 
1670 	if (ret)
1671 		pr_err(
1672 		"CS ETM Trace: failed to deliver instruction event, error %d\n",
1673 		ret);
1674 
1675 	return ret;
1676 }
1677 
1678 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
1679 				struct perf_session *session)
1680 {
1681 	struct evlist *evlist = session->evlist;
1682 	struct evsel *evsel;
1683 	struct perf_event_attr attr;
1684 	bool found = false;
1685 	u64 id;
1686 	int err;
1687 
1688 	evlist__for_each_entry(evlist, evsel) {
1689 		if (evsel->core.attr.type == etm->pmu_type) {
1690 			found = true;
1691 			break;
1692 		}
1693 	}
1694 
1695 	if (!found) {
1696 		pr_debug("No selected events with CoreSight Trace data\n");
1697 		return 0;
1698 	}
1699 
1700 	memset(&attr, 0, sizeof(struct perf_event_attr));
1701 	attr.size = sizeof(struct perf_event_attr);
1702 	attr.type = PERF_TYPE_HARDWARE;
1703 	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
1704 	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1705 			    PERF_SAMPLE_PERIOD;
1706 	if (etm->timeless_decoding)
1707 		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1708 	else
1709 		attr.sample_type |= PERF_SAMPLE_TIME;
1710 
1711 	attr.exclude_user = evsel->core.attr.exclude_user;
1712 	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1713 	attr.exclude_hv = evsel->core.attr.exclude_hv;
1714 	attr.exclude_host = evsel->core.attr.exclude_host;
1715 	attr.exclude_guest = evsel->core.attr.exclude_guest;
1716 	attr.sample_id_all = evsel->core.attr.sample_id_all;
1717 	attr.read_format = evsel->core.attr.read_format;
1718 
1719 	/* create new id val to be a fixed offset from evsel id */
1720 	id = evsel->core.id[0] + 1000000000;
1721 
1722 	if (!id)
1723 		id = 1;
1724 
1725 	if (etm->synth_opts.branches) {
1726 		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1727 		attr.sample_period = 1;
1728 		attr.sample_type |= PERF_SAMPLE_ADDR;
1729 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1730 		if (err)
1731 			return err;
1732 		etm->branches_sample_type = attr.sample_type;
1733 		etm->branches_id = id;
1734 		id += 1;
1735 		attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
1736 	}
1737 
1738 	if (etm->synth_opts.last_branch) {
1739 		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1740 		/*
1741 		 * We don't use the hardware index, but the sample generation
1742 		 * code uses the new format branch_stack with this field,
1743 		 * so the event attributes must indicate that it's present.
1744 		 */
1745 		attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
1746 	}
1747 
1748 	if (etm->synth_opts.instructions) {
1749 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1750 		attr.sample_period = etm->synth_opts.period;
1751 		etm->instructions_sample_period = attr.sample_period;
1752 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1753 		if (err)
1754 			return err;
1755 		etm->instructions_sample_type = attr.sample_type;
1756 		etm->instructions_id = id;
1757 		id += 1;
1758 	}
1759 
1760 	return 0;
1761 }
1762 
1763 static int cs_etm__sample(struct cs_etm_queue *etmq,
1764 			  struct cs_etm_traceid_queue *tidq)
1765 {
1766 	struct cs_etm_auxtrace *etm = etmq->etm;
1767 	int ret;
1768 	u8 trace_chan_id = tidq->trace_chan_id;
1769 	u64 instrs_prev;
1770 
1771 	/* Get instructions remainder from previous packet */
1772 	instrs_prev = tidq->period_instructions;
1773 
1774 	tidq->period_instructions += tidq->packet->instr_count;
1775 
1776 	/*
1777 	 * Record a branch when the last instruction in
1778 	 * PREV_PACKET is a branch.
1779 	 */
1780 	if (etm->synth_opts.last_branch &&
1781 	    tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1782 	    tidq->prev_packet->last_instr_taken_branch)
1783 		cs_etm__update_last_branch_rb(etmq, tidq);
1784 
1785 	if (etm->synth_opts.instructions &&
1786 	    tidq->period_instructions >= etm->instructions_sample_period) {
1787 		/*
1788 		 * Emit instruction sample periodically
1789 		 * TODO: allow period to be defined in cycles and clock time
1790 		 */
1791 
1792 		/*
1793 		 * Below diagram demonstrates the instruction samples
1794 		 * generation flows:
1795 		 *
1796 		 *    Instrs     Instrs       Instrs       Instrs
1797 		 *   Sample(n)  Sample(n+1)  Sample(n+2)  Sample(n+3)
1798 		 *    |            |            |            |
1799 		 *    V            V            V            V
1800 		 *   --------------------------------------------------
1801 		 *            ^                                  ^
1802 		 *            |                                  |
1803 		 *         Period                             Period
1804 		 *    instructions(Pi)                   instructions(Pi')
1805 		 *
1806 		 *            |                                  |
1807 		 *            \---------------- -----------------/
1808 		 *                             V
1809 		 *                 tidq->packet->instr_count
1810 		 *
1811 		 * Instrs Sample(n...) are the synthesised samples occurring
1812 		 * every etm->instructions_sample_period instructions - as
1813 		 * defined on the perf command line.  Sample(n) is being the
1814 		 * last sample before the current etm packet, n+1 to n+3
1815 		 * samples are generated from the current etm packet.
1816 		 *
1817 		 * tidq->packet->instr_count represents the number of
1818 		 * instructions in the current etm packet.
1819 		 *
1820 		 * Period instructions (Pi) contains the number of
1821 		 * instructions executed after the sample point(n) from the
1822 		 * previous etm packet.  This will always be less than
1823 		 * etm->instructions_sample_period.
1824 		 *
1825 		 * When generate new samples, it combines with two parts
1826 		 * instructions, one is the tail of the old packet and another
1827 		 * is the head of the new coming packet, to generate
1828 		 * sample(n+1); sample(n+2) and sample(n+3) consume the
1829 		 * instructions with sample period.  After sample(n+3), the rest
1830 		 * instructions will be used by later packet and it is assigned
1831 		 * to tidq->period_instructions for next round calculation.
1832 		 */
1833 
1834 		/*
1835 		 * Get the initial offset into the current packet instructions;
1836 		 * entry conditions ensure that instrs_prev is less than
1837 		 * etm->instructions_sample_period.
1838 		 */
1839 		u64 offset = etm->instructions_sample_period - instrs_prev;
1840 		u64 addr;
1841 
1842 		/* Prepare last branches for instruction sample */
1843 		if (etm->synth_opts.last_branch)
1844 			cs_etm__copy_last_branch_rb(etmq, tidq);
1845 
1846 		while (tidq->period_instructions >=
1847 				etm->instructions_sample_period) {
1848 			/*
1849 			 * Calculate the address of the sampled instruction (-1
1850 			 * as sample is reported as though instruction has just
1851 			 * been executed, but PC has not advanced to next
1852 			 * instruction)
1853 			 */
1854 			addr = cs_etm__instr_addr(etmq, trace_chan_id,
1855 						  tidq->packet, offset - 1);
1856 			ret = cs_etm__synth_instruction_sample(
1857 				etmq, tidq, addr,
1858 				etm->instructions_sample_period);
1859 			if (ret)
1860 				return ret;
1861 
1862 			offset += etm->instructions_sample_period;
1863 			tidq->period_instructions -=
1864 				etm->instructions_sample_period;
1865 		}
1866 	}
1867 
1868 	if (etm->synth_opts.branches) {
1869 		bool generate_sample = false;
1870 
1871 		/* Generate sample for tracing on packet */
1872 		if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1873 			generate_sample = true;
1874 
1875 		/* Generate sample for branch taken packet */
1876 		if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1877 		    tidq->prev_packet->last_instr_taken_branch)
1878 			generate_sample = true;
1879 
1880 		if (generate_sample) {
1881 			ret = cs_etm__synth_branch_sample(etmq, tidq);
1882 			if (ret)
1883 				return ret;
1884 		}
1885 	}
1886 
1887 	cs_etm__packet_swap(etm, tidq);
1888 
1889 	return 0;
1890 }
1891 
1892 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
1893 {
1894 	/*
1895 	 * When the exception packet is inserted, whether the last instruction
1896 	 * in previous range packet is taken branch or not, we need to force
1897 	 * to set 'prev_packet->last_instr_taken_branch' to true.  This ensures
1898 	 * to generate branch sample for the instruction range before the
1899 	 * exception is trapped to kernel or before the exception returning.
1900 	 *
1901 	 * The exception packet includes the dummy address values, so don't
1902 	 * swap PACKET with PREV_PACKET.  This keeps PREV_PACKET to be useful
1903 	 * for generating instruction and branch samples.
1904 	 */
1905 	if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
1906 		tidq->prev_packet->last_instr_taken_branch = true;
1907 
1908 	return 0;
1909 }
1910 
1911 static int cs_etm__flush(struct cs_etm_queue *etmq,
1912 			 struct cs_etm_traceid_queue *tidq)
1913 {
1914 	int err = 0;
1915 	struct cs_etm_auxtrace *etm = etmq->etm;
1916 
1917 	/* Handle start tracing packet */
1918 	if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
1919 		goto swap_packet;
1920 
1921 	if (etmq->etm->synth_opts.last_branch &&
1922 	    etmq->etm->synth_opts.instructions &&
1923 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1924 		u64 addr;
1925 
1926 		/* Prepare last branches for instruction sample */
1927 		cs_etm__copy_last_branch_rb(etmq, tidq);
1928 
1929 		/*
1930 		 * Generate a last branch event for the branches left in the
1931 		 * circular buffer at the end of the trace.
1932 		 *
1933 		 * Use the address of the end of the last reported execution
1934 		 * range
1935 		 */
1936 		addr = cs_etm__last_executed_instr(tidq->prev_packet);
1937 
1938 		err = cs_etm__synth_instruction_sample(
1939 			etmq, tidq, addr,
1940 			tidq->period_instructions);
1941 		if (err)
1942 			return err;
1943 
1944 		tidq->period_instructions = 0;
1945 
1946 	}
1947 
1948 	if (etm->synth_opts.branches &&
1949 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1950 		err = cs_etm__synth_branch_sample(etmq, tidq);
1951 		if (err)
1952 			return err;
1953 	}
1954 
1955 swap_packet:
1956 	cs_etm__packet_swap(etm, tidq);
1957 
1958 	/* Reset last branches after flush the trace */
1959 	if (etm->synth_opts.last_branch)
1960 		cs_etm__reset_last_branch_rb(tidq);
1961 
1962 	return err;
1963 }
1964 
1965 static int cs_etm__end_block(struct cs_etm_queue *etmq,
1966 			     struct cs_etm_traceid_queue *tidq)
1967 {
1968 	int err;
1969 
1970 	/*
1971 	 * It has no new packet coming and 'etmq->packet' contains the stale
1972 	 * packet which was set at the previous time with packets swapping;
1973 	 * so skip to generate branch sample to avoid stale packet.
1974 	 *
1975 	 * For this case only flush branch stack and generate a last branch
1976 	 * event for the branches left in the circular buffer at the end of
1977 	 * the trace.
1978 	 */
1979 	if (etmq->etm->synth_opts.last_branch &&
1980 	    etmq->etm->synth_opts.instructions &&
1981 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1982 		u64 addr;
1983 
1984 		/* Prepare last branches for instruction sample */
1985 		cs_etm__copy_last_branch_rb(etmq, tidq);
1986 
1987 		/*
1988 		 * Use the address of the end of the last reported execution
1989 		 * range.
1990 		 */
1991 		addr = cs_etm__last_executed_instr(tidq->prev_packet);
1992 
1993 		err = cs_etm__synth_instruction_sample(
1994 			etmq, tidq, addr,
1995 			tidq->period_instructions);
1996 		if (err)
1997 			return err;
1998 
1999 		tidq->period_instructions = 0;
2000 	}
2001 
2002 	return 0;
2003 }
2004 /*
2005  * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
2006  *			   if need be.
2007  * Returns:	< 0	if error
2008  *		= 0	if no more auxtrace_buffer to read
2009  *		> 0	if the current buffer isn't empty yet
2010  */
2011 static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
2012 {
2013 	int ret;
2014 
2015 	if (!etmq->buf_len) {
2016 		ret = cs_etm__get_trace(etmq);
2017 		if (ret <= 0)
2018 			return ret;
2019 		/*
2020 		 * We cannot assume consecutive blocks in the data file
2021 		 * are contiguous, reset the decoder to force re-sync.
2022 		 */
2023 		ret = cs_etm_decoder__reset(etmq->decoder);
2024 		if (ret)
2025 			return ret;
2026 	}
2027 
2028 	return etmq->buf_len;
2029 }
2030 
2031 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
2032 				 struct cs_etm_packet *packet,
2033 				 u64 end_addr)
2034 {
2035 	/* Initialise to keep compiler happy */
2036 	u16 instr16 = 0;
2037 	u32 instr32 = 0;
2038 	u64 addr;
2039 
2040 	switch (packet->isa) {
2041 	case CS_ETM_ISA_T32:
2042 		/*
2043 		 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
2044 		 *
2045 		 *  b'15         b'8
2046 		 * +-----------------+--------+
2047 		 * | 1 1 0 1 1 1 1 1 |  imm8  |
2048 		 * +-----------------+--------+
2049 		 *
2050 		 * According to the specification, it only defines SVC for T32
2051 		 * with 16 bits instruction and has no definition for 32bits;
2052 		 * so below only read 2 bytes as instruction size for T32.
2053 		 */
2054 		addr = end_addr - 2;
2055 		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16),
2056 				   (u8 *)&instr16, 0);
2057 		if ((instr16 & 0xFF00) == 0xDF00)
2058 			return true;
2059 
2060 		break;
2061 	case CS_ETM_ISA_A32:
2062 		/*
2063 		 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
2064 		 *
2065 		 *  b'31 b'28 b'27 b'24
2066 		 * +---------+---------+-------------------------+
2067 		 * |  !1111  | 1 1 1 1 |        imm24            |
2068 		 * +---------+---------+-------------------------+
2069 		 */
2070 		addr = end_addr - 4;
2071 		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2072 				   (u8 *)&instr32, 0);
2073 		if ((instr32 & 0x0F000000) == 0x0F000000 &&
2074 		    (instr32 & 0xF0000000) != 0xF0000000)
2075 			return true;
2076 
2077 		break;
2078 	case CS_ETM_ISA_A64:
2079 		/*
2080 		 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
2081 		 *
2082 		 *  b'31               b'21           b'4     b'0
2083 		 * +-----------------------+---------+-----------+
2084 		 * | 1 1 0 1 0 1 0 0 0 0 0 |  imm16  | 0 0 0 0 1 |
2085 		 * +-----------------------+---------+-----------+
2086 		 */
2087 		addr = end_addr - 4;
2088 		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2089 				   (u8 *)&instr32, 0);
2090 		if ((instr32 & 0xFFE0001F) == 0xd4000001)
2091 			return true;
2092 
2093 		break;
2094 	case CS_ETM_ISA_UNKNOWN:
2095 	default:
2096 		break;
2097 	}
2098 
2099 	return false;
2100 }
2101 
2102 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
2103 			       struct cs_etm_traceid_queue *tidq, u64 magic)
2104 {
2105 	u8 trace_chan_id = tidq->trace_chan_id;
2106 	struct cs_etm_packet *packet = tidq->packet;
2107 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2108 
2109 	if (magic == __perf_cs_etmv3_magic)
2110 		if (packet->exception_number == CS_ETMV3_EXC_SVC)
2111 			return true;
2112 
2113 	/*
2114 	 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
2115 	 * HVC cases; need to check if it's SVC instruction based on
2116 	 * packet address.
2117 	 */
2118 	if (magic == __perf_cs_etmv4_magic) {
2119 		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2120 		    cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2121 					 prev_packet->end_addr))
2122 			return true;
2123 	}
2124 
2125 	return false;
2126 }
2127 
2128 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
2129 				       u64 magic)
2130 {
2131 	struct cs_etm_packet *packet = tidq->packet;
2132 
2133 	if (magic == __perf_cs_etmv3_magic)
2134 		if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
2135 		    packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
2136 		    packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
2137 		    packet->exception_number == CS_ETMV3_EXC_IRQ ||
2138 		    packet->exception_number == CS_ETMV3_EXC_FIQ)
2139 			return true;
2140 
2141 	if (magic == __perf_cs_etmv4_magic)
2142 		if (packet->exception_number == CS_ETMV4_EXC_RESET ||
2143 		    packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
2144 		    packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
2145 		    packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
2146 		    packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
2147 		    packet->exception_number == CS_ETMV4_EXC_IRQ ||
2148 		    packet->exception_number == CS_ETMV4_EXC_FIQ)
2149 			return true;
2150 
2151 	return false;
2152 }
2153 
2154 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
2155 				      struct cs_etm_traceid_queue *tidq,
2156 				      u64 magic)
2157 {
2158 	u8 trace_chan_id = tidq->trace_chan_id;
2159 	struct cs_etm_packet *packet = tidq->packet;
2160 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2161 
2162 	if (magic == __perf_cs_etmv3_magic)
2163 		if (packet->exception_number == CS_ETMV3_EXC_SMC ||
2164 		    packet->exception_number == CS_ETMV3_EXC_HYP ||
2165 		    packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
2166 		    packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
2167 		    packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
2168 		    packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
2169 		    packet->exception_number == CS_ETMV3_EXC_GENERIC)
2170 			return true;
2171 
2172 	if (magic == __perf_cs_etmv4_magic) {
2173 		if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
2174 		    packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
2175 		    packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
2176 		    packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
2177 			return true;
2178 
2179 		/*
2180 		 * For CS_ETMV4_EXC_CALL, except SVC other instructions
2181 		 * (SMC, HVC) are taken as sync exceptions.
2182 		 */
2183 		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2184 		    !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2185 					  prev_packet->end_addr))
2186 			return true;
2187 
2188 		/*
2189 		 * ETMv4 has 5 bits for exception number; if the numbers
2190 		 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
2191 		 * they are implementation defined exceptions.
2192 		 *
2193 		 * For this case, simply take it as sync exception.
2194 		 */
2195 		if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
2196 		    packet->exception_number <= CS_ETMV4_EXC_END)
2197 			return true;
2198 	}
2199 
2200 	return false;
2201 }
2202 
2203 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
2204 				    struct cs_etm_traceid_queue *tidq)
2205 {
2206 	struct cs_etm_packet *packet = tidq->packet;
2207 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2208 	u8 trace_chan_id = tidq->trace_chan_id;
2209 	u64 magic;
2210 	int ret;
2211 
2212 	switch (packet->sample_type) {
2213 	case CS_ETM_RANGE:
2214 		/*
2215 		 * Immediate branch instruction without neither link nor
2216 		 * return flag, it's normal branch instruction within
2217 		 * the function.
2218 		 */
2219 		if (packet->last_instr_type == OCSD_INSTR_BR &&
2220 		    packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
2221 			packet->flags = PERF_IP_FLAG_BRANCH;
2222 
2223 			if (packet->last_instr_cond)
2224 				packet->flags |= PERF_IP_FLAG_CONDITIONAL;
2225 		}
2226 
2227 		/*
2228 		 * Immediate branch instruction with link (e.g. BL), this is
2229 		 * branch instruction for function call.
2230 		 */
2231 		if (packet->last_instr_type == OCSD_INSTR_BR &&
2232 		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2233 			packet->flags = PERF_IP_FLAG_BRANCH |
2234 					PERF_IP_FLAG_CALL;
2235 
2236 		/*
2237 		 * Indirect branch instruction with link (e.g. BLR), this is
2238 		 * branch instruction for function call.
2239 		 */
2240 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2241 		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2242 			packet->flags = PERF_IP_FLAG_BRANCH |
2243 					PERF_IP_FLAG_CALL;
2244 
2245 		/*
2246 		 * Indirect branch instruction with subtype of
2247 		 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
2248 		 * function return for A32/T32.
2249 		 */
2250 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2251 		    packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
2252 			packet->flags = PERF_IP_FLAG_BRANCH |
2253 					PERF_IP_FLAG_RETURN;
2254 
2255 		/*
2256 		 * Indirect branch instruction without link (e.g. BR), usually
2257 		 * this is used for function return, especially for functions
2258 		 * within dynamic link lib.
2259 		 */
2260 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2261 		    packet->last_instr_subtype == OCSD_S_INSTR_NONE)
2262 			packet->flags = PERF_IP_FLAG_BRANCH |
2263 					PERF_IP_FLAG_RETURN;
2264 
2265 		/* Return instruction for function return. */
2266 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2267 		    packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
2268 			packet->flags = PERF_IP_FLAG_BRANCH |
2269 					PERF_IP_FLAG_RETURN;
2270 
2271 		/*
2272 		 * Decoder might insert a discontinuity in the middle of
2273 		 * instruction packets, fixup prev_packet with flag
2274 		 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
2275 		 */
2276 		if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
2277 			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2278 					      PERF_IP_FLAG_TRACE_BEGIN;
2279 
2280 		/*
2281 		 * If the previous packet is an exception return packet
2282 		 * and the return address just follows SVC instruction,
2283 		 * it needs to calibrate the previous packet sample flags
2284 		 * as PERF_IP_FLAG_SYSCALLRET.
2285 		 */
2286 		if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
2287 					   PERF_IP_FLAG_RETURN |
2288 					   PERF_IP_FLAG_INTERRUPT) &&
2289 		    cs_etm__is_svc_instr(etmq, trace_chan_id,
2290 					 packet, packet->start_addr))
2291 			prev_packet->flags = PERF_IP_FLAG_BRANCH |
2292 					     PERF_IP_FLAG_RETURN |
2293 					     PERF_IP_FLAG_SYSCALLRET;
2294 		break;
2295 	case CS_ETM_DISCONTINUITY:
2296 		/*
2297 		 * The trace is discontinuous, if the previous packet is
2298 		 * instruction packet, set flag PERF_IP_FLAG_TRACE_END
2299 		 * for previous packet.
2300 		 */
2301 		if (prev_packet->sample_type == CS_ETM_RANGE)
2302 			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2303 					      PERF_IP_FLAG_TRACE_END;
2304 		break;
2305 	case CS_ETM_EXCEPTION:
2306 		ret = cs_etm__get_magic(etmq, packet->trace_chan_id, &magic);
2307 		if (ret)
2308 			return ret;
2309 
2310 		/* The exception is for system call. */
2311 		if (cs_etm__is_syscall(etmq, tidq, magic))
2312 			packet->flags = PERF_IP_FLAG_BRANCH |
2313 					PERF_IP_FLAG_CALL |
2314 					PERF_IP_FLAG_SYSCALLRET;
2315 		/*
2316 		 * The exceptions are triggered by external signals from bus,
2317 		 * interrupt controller, debug module, PE reset or halt.
2318 		 */
2319 		else if (cs_etm__is_async_exception(tidq, magic))
2320 			packet->flags = PERF_IP_FLAG_BRANCH |
2321 					PERF_IP_FLAG_CALL |
2322 					PERF_IP_FLAG_ASYNC |
2323 					PERF_IP_FLAG_INTERRUPT;
2324 		/*
2325 		 * Otherwise, exception is caused by trap, instruction &
2326 		 * data fault, or alignment errors.
2327 		 */
2328 		else if (cs_etm__is_sync_exception(etmq, tidq, magic))
2329 			packet->flags = PERF_IP_FLAG_BRANCH |
2330 					PERF_IP_FLAG_CALL |
2331 					PERF_IP_FLAG_INTERRUPT;
2332 
2333 		/*
2334 		 * When the exception packet is inserted, since exception
2335 		 * packet is not used standalone for generating samples
2336 		 * and it's affiliation to the previous instruction range
2337 		 * packet; so set previous range packet flags to tell perf
2338 		 * it is an exception taken branch.
2339 		 */
2340 		if (prev_packet->sample_type == CS_ETM_RANGE)
2341 			prev_packet->flags = packet->flags;
2342 		break;
2343 	case CS_ETM_EXCEPTION_RET:
2344 		/*
2345 		 * When the exception return packet is inserted, since
2346 		 * exception return packet is not used standalone for
2347 		 * generating samples and it's affiliation to the previous
2348 		 * instruction range packet; so set previous range packet
2349 		 * flags to tell perf it is an exception return branch.
2350 		 *
2351 		 * The exception return can be for either system call or
2352 		 * other exception types; unfortunately the packet doesn't
2353 		 * contain exception type related info so we cannot decide
2354 		 * the exception type purely based on exception return packet.
2355 		 * If we record the exception number from exception packet and
2356 		 * reuse it for exception return packet, this is not reliable
2357 		 * due the trace can be discontinuity or the interrupt can
2358 		 * be nested, thus the recorded exception number cannot be
2359 		 * used for exception return packet for these two cases.
2360 		 *
2361 		 * For exception return packet, we only need to distinguish the
2362 		 * packet is for system call or for other types.  Thus the
2363 		 * decision can be deferred when receive the next packet which
2364 		 * contains the return address, based on the return address we
2365 		 * can read out the previous instruction and check if it's a
2366 		 * system call instruction and then calibrate the sample flag
2367 		 * as needed.
2368 		 */
2369 		if (prev_packet->sample_type == CS_ETM_RANGE)
2370 			prev_packet->flags = PERF_IP_FLAG_BRANCH |
2371 					     PERF_IP_FLAG_RETURN |
2372 					     PERF_IP_FLAG_INTERRUPT;
2373 		break;
2374 	case CS_ETM_EMPTY:
2375 	default:
2376 		break;
2377 	}
2378 
2379 	return 0;
2380 }
2381 
2382 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
2383 {
2384 	int ret = 0;
2385 	size_t processed = 0;
2386 
2387 	/*
2388 	 * Packets are decoded and added to the decoder's packet queue
2389 	 * until the decoder packet processing callback has requested that
2390 	 * processing stops or there is nothing left in the buffer.  Normal
2391 	 * operations that stop processing are a timestamp packet or a full
2392 	 * decoder buffer queue.
2393 	 */
2394 	ret = cs_etm_decoder__process_data_block(etmq->decoder,
2395 						 etmq->offset,
2396 						 &etmq->buf[etmq->buf_used],
2397 						 etmq->buf_len,
2398 						 &processed);
2399 	if (ret)
2400 		goto out;
2401 
2402 	etmq->offset += processed;
2403 	etmq->buf_used += processed;
2404 	etmq->buf_len -= processed;
2405 
2406 out:
2407 	return ret;
2408 }
2409 
2410 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
2411 					 struct cs_etm_traceid_queue *tidq)
2412 {
2413 	int ret;
2414 	struct cs_etm_packet_queue *packet_queue;
2415 
2416 	packet_queue = &tidq->packet_queue;
2417 
2418 	/* Process each packet in this chunk */
2419 	while (1) {
2420 		ret = cs_etm_decoder__get_packet(packet_queue,
2421 						 tidq->packet);
2422 		if (ret <= 0)
2423 			/*
2424 			 * Stop processing this chunk on
2425 			 * end of data or error
2426 			 */
2427 			break;
2428 
2429 		/*
2430 		 * Since packet addresses are swapped in packet
2431 		 * handling within below switch() statements,
2432 		 * thus setting sample flags must be called
2433 		 * prior to switch() statement to use address
2434 		 * information before packets swapping.
2435 		 */
2436 		ret = cs_etm__set_sample_flags(etmq, tidq);
2437 		if (ret < 0)
2438 			break;
2439 
2440 		switch (tidq->packet->sample_type) {
2441 		case CS_ETM_RANGE:
2442 			/*
2443 			 * If the packet contains an instruction
2444 			 * range, generate instruction sequence
2445 			 * events.
2446 			 */
2447 			cs_etm__sample(etmq, tidq);
2448 			break;
2449 		case CS_ETM_EXCEPTION:
2450 		case CS_ETM_EXCEPTION_RET:
2451 			/*
2452 			 * If the exception packet is coming,
2453 			 * make sure the previous instruction
2454 			 * range packet to be handled properly.
2455 			 */
2456 			cs_etm__exception(tidq);
2457 			break;
2458 		case CS_ETM_DISCONTINUITY:
2459 			/*
2460 			 * Discontinuity in trace, flush
2461 			 * previous branch stack
2462 			 */
2463 			cs_etm__flush(etmq, tidq);
2464 			break;
2465 		case CS_ETM_EMPTY:
2466 			/*
2467 			 * Should not receive empty packet,
2468 			 * report error.
2469 			 */
2470 			pr_err("CS ETM Trace: empty packet\n");
2471 			return -EINVAL;
2472 		default:
2473 			break;
2474 		}
2475 	}
2476 
2477 	return ret;
2478 }
2479 
2480 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
2481 {
2482 	int idx;
2483 	struct int_node *inode;
2484 	struct cs_etm_traceid_queue *tidq;
2485 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
2486 
2487 	intlist__for_each_entry(inode, traceid_queues_list) {
2488 		idx = (int)(intptr_t)inode->priv;
2489 		tidq = etmq->traceid_queues[idx];
2490 
2491 		/* Ignore return value */
2492 		cs_etm__process_traceid_queue(etmq, tidq);
2493 	}
2494 }
2495 
2496 static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq)
2497 {
2498 	int err = 0;
2499 	struct cs_etm_traceid_queue *tidq;
2500 
2501 	tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
2502 	if (!tidq)
2503 		return -EINVAL;
2504 
2505 	/* Go through each buffer in the queue and decode them one by one */
2506 	while (1) {
2507 		err = cs_etm__get_data_block(etmq);
2508 		if (err <= 0)
2509 			return err;
2510 
2511 		/* Run trace decoder until buffer consumed or end of trace */
2512 		do {
2513 			err = cs_etm__decode_data_block(etmq);
2514 			if (err)
2515 				return err;
2516 
2517 			/*
2518 			 * Process each packet in this chunk, nothing to do if
2519 			 * an error occurs other than hoping the next one will
2520 			 * be better.
2521 			 */
2522 			err = cs_etm__process_traceid_queue(etmq, tidq);
2523 
2524 		} while (etmq->buf_len);
2525 
2526 		if (err == 0)
2527 			/* Flush any remaining branch stack entries */
2528 			err = cs_etm__end_block(etmq, tidq);
2529 	}
2530 
2531 	return err;
2532 }
2533 
2534 static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq)
2535 {
2536 	int idx, err = 0;
2537 	struct cs_etm_traceid_queue *tidq;
2538 	struct int_node *inode;
2539 
2540 	/* Go through each buffer in the queue and decode them one by one */
2541 	while (1) {
2542 		err = cs_etm__get_data_block(etmq);
2543 		if (err <= 0)
2544 			return err;
2545 
2546 		/* Run trace decoder until buffer consumed or end of trace */
2547 		do {
2548 			err = cs_etm__decode_data_block(etmq);
2549 			if (err)
2550 				return err;
2551 
2552 			/*
2553 			 * cs_etm__run_per_thread_timeless_decoder() runs on a
2554 			 * single traceID queue because each TID has a separate
2555 			 * buffer. But here in per-cpu mode we need to iterate
2556 			 * over each channel instead.
2557 			 */
2558 			intlist__for_each_entry(inode,
2559 						etmq->traceid_queues_list) {
2560 				idx = (int)(intptr_t)inode->priv;
2561 				tidq = etmq->traceid_queues[idx];
2562 				cs_etm__process_traceid_queue(etmq, tidq);
2563 			}
2564 		} while (etmq->buf_len);
2565 
2566 		intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2567 			idx = (int)(intptr_t)inode->priv;
2568 			tidq = etmq->traceid_queues[idx];
2569 			/* Flush any remaining branch stack entries */
2570 			err = cs_etm__end_block(etmq, tidq);
2571 			if (err)
2572 				return err;
2573 		}
2574 	}
2575 
2576 	return err;
2577 }
2578 
2579 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
2580 					   pid_t tid)
2581 {
2582 	unsigned int i;
2583 	struct auxtrace_queues *queues = &etm->queues;
2584 
2585 	for (i = 0; i < queues->nr_queues; i++) {
2586 		struct auxtrace_queue *queue = &etm->queues.queue_array[i];
2587 		struct cs_etm_queue *etmq = queue->priv;
2588 		struct cs_etm_traceid_queue *tidq;
2589 
2590 		if (!etmq)
2591 			continue;
2592 
2593 		if (etm->per_thread_decoding) {
2594 			tidq = cs_etm__etmq_get_traceid_queue(
2595 				etmq, CS_ETM_PER_THREAD_TRACEID);
2596 
2597 			if (!tidq)
2598 				continue;
2599 
2600 			if (tid == -1 || thread__tid(tidq->thread) == tid)
2601 				cs_etm__run_per_thread_timeless_decoder(etmq);
2602 		} else
2603 			cs_etm__run_per_cpu_timeless_decoder(etmq);
2604 	}
2605 
2606 	return 0;
2607 }
2608 
2609 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm)
2610 {
2611 	int ret = 0;
2612 	unsigned int cs_queue_nr, queue_nr, i;
2613 	u8 trace_chan_id;
2614 	u64 cs_timestamp;
2615 	struct auxtrace_queue *queue;
2616 	struct cs_etm_queue *etmq;
2617 	struct cs_etm_traceid_queue *tidq;
2618 
2619 	/*
2620 	 * Pre-populate the heap with one entry from each queue so that we can
2621 	 * start processing in time order across all queues.
2622 	 */
2623 	for (i = 0; i < etm->queues.nr_queues; i++) {
2624 		etmq = etm->queues.queue_array[i].priv;
2625 		if (!etmq)
2626 			continue;
2627 
2628 		ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
2629 		if (ret)
2630 			return ret;
2631 	}
2632 
2633 	while (1) {
2634 		if (!etm->heap.heap_cnt)
2635 			break;
2636 
2637 		/* Take the entry at the top of the min heap */
2638 		cs_queue_nr = etm->heap.heap_array[0].queue_nr;
2639 		queue_nr = TO_QUEUE_NR(cs_queue_nr);
2640 		trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
2641 		queue = &etm->queues.queue_array[queue_nr];
2642 		etmq = queue->priv;
2643 
2644 		/*
2645 		 * Remove the top entry from the heap since we are about
2646 		 * to process it.
2647 		 */
2648 		auxtrace_heap__pop(&etm->heap);
2649 
2650 		tidq  = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
2651 		if (!tidq) {
2652 			/*
2653 			 * No traceID queue has been allocated for this traceID,
2654 			 * which means something somewhere went very wrong.  No
2655 			 * other choice than simply exit.
2656 			 */
2657 			ret = -EINVAL;
2658 			goto out;
2659 		}
2660 
2661 		/*
2662 		 * Packets associated with this timestamp are already in
2663 		 * the etmq's traceID queue, so process them.
2664 		 */
2665 		ret = cs_etm__process_traceid_queue(etmq, tidq);
2666 		if (ret < 0)
2667 			goto out;
2668 
2669 		/*
2670 		 * Packets for this timestamp have been processed, time to
2671 		 * move on to the next timestamp, fetching a new auxtrace_buffer
2672 		 * if need be.
2673 		 */
2674 refetch:
2675 		ret = cs_etm__get_data_block(etmq);
2676 		if (ret < 0)
2677 			goto out;
2678 
2679 		/*
2680 		 * No more auxtrace_buffers to process in this etmq, simply
2681 		 * move on to another entry in the auxtrace_heap.
2682 		 */
2683 		if (!ret)
2684 			continue;
2685 
2686 		ret = cs_etm__decode_data_block(etmq);
2687 		if (ret)
2688 			goto out;
2689 
2690 		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
2691 
2692 		if (!cs_timestamp) {
2693 			/*
2694 			 * Function cs_etm__decode_data_block() returns when
2695 			 * there is no more traces to decode in the current
2696 			 * auxtrace_buffer OR when a timestamp has been
2697 			 * encountered on any of the traceID queues.  Since we
2698 			 * did not get a timestamp, there is no more traces to
2699 			 * process in this auxtrace_buffer.  As such empty and
2700 			 * flush all traceID queues.
2701 			 */
2702 			cs_etm__clear_all_traceid_queues(etmq);
2703 
2704 			/* Fetch another auxtrace_buffer for this etmq */
2705 			goto refetch;
2706 		}
2707 
2708 		/*
2709 		 * Add to the min heap the timestamp for packets that have
2710 		 * just been decoded.  They will be processed and synthesized
2711 		 * during the next call to cs_etm__process_traceid_queue() for
2712 		 * this queue/traceID.
2713 		 */
2714 		cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
2715 		ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
2716 	}
2717 
2718 	for (i = 0; i < etm->queues.nr_queues; i++) {
2719 		struct int_node *inode;
2720 
2721 		etmq = etm->queues.queue_array[i].priv;
2722 		if (!etmq)
2723 			continue;
2724 
2725 		intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2726 			int idx = (int)(intptr_t)inode->priv;
2727 
2728 			/* Flush any remaining branch stack entries */
2729 			tidq = etmq->traceid_queues[idx];
2730 			ret = cs_etm__end_block(etmq, tidq);
2731 			if (ret)
2732 				return ret;
2733 		}
2734 	}
2735 out:
2736 	return ret;
2737 }
2738 
2739 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
2740 					union perf_event *event)
2741 {
2742 	struct thread *th;
2743 
2744 	if (etm->timeless_decoding)
2745 		return 0;
2746 
2747 	/*
2748 	 * Add the tid/pid to the log so that we can get a match when we get a
2749 	 * contextID from the decoder. Only track for the host: only kernel
2750 	 * trace is supported for guests which wouldn't need pids so this should
2751 	 * be fine.
2752 	 */
2753 	th = machine__findnew_thread(&etm->session->machines.host,
2754 				     event->itrace_start.pid,
2755 				     event->itrace_start.tid);
2756 	if (!th)
2757 		return -ENOMEM;
2758 
2759 	thread__put(th);
2760 
2761 	return 0;
2762 }
2763 
2764 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
2765 					   union perf_event *event)
2766 {
2767 	struct thread *th;
2768 	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2769 
2770 	/*
2771 	 * Context switch in per-thread mode are irrelevant since perf
2772 	 * will start/stop tracing as the process is scheduled.
2773 	 */
2774 	if (etm->timeless_decoding)
2775 		return 0;
2776 
2777 	/*
2778 	 * SWITCH_IN events carry the next process to be switched out while
2779 	 * SWITCH_OUT events carry the process to be switched in.  As such
2780 	 * we don't care about IN events.
2781 	 */
2782 	if (!out)
2783 		return 0;
2784 
2785 	/*
2786 	 * Add the tid/pid to the log so that we can get a match when we get a
2787 	 * contextID from the decoder. Only track for the host: only kernel
2788 	 * trace is supported for guests which wouldn't need pids so this should
2789 	 * be fine.
2790 	 */
2791 	th = machine__findnew_thread(&etm->session->machines.host,
2792 				     event->context_switch.next_prev_pid,
2793 				     event->context_switch.next_prev_tid);
2794 	if (!th)
2795 		return -ENOMEM;
2796 
2797 	thread__put(th);
2798 
2799 	return 0;
2800 }
2801 
2802 static int cs_etm__process_event(struct perf_session *session,
2803 				 union perf_event *event,
2804 				 struct perf_sample *sample,
2805 				 const struct perf_tool *tool)
2806 {
2807 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2808 						   struct cs_etm_auxtrace,
2809 						   auxtrace);
2810 
2811 	if (dump_trace)
2812 		return 0;
2813 
2814 	if (!tool->ordered_events) {
2815 		pr_err("CoreSight ETM Trace requires ordered events\n");
2816 		return -EINVAL;
2817 	}
2818 
2819 	switch (event->header.type) {
2820 	case PERF_RECORD_EXIT:
2821 		/*
2822 		 * Don't need to wait for cs_etm__flush_events() in per-thread mode to
2823 		 * start the decode because we know there will be no more trace from
2824 		 * this thread. All this does is emit samples earlier than waiting for
2825 		 * the flush in other modes, but with timestamps it makes sense to wait
2826 		 * for flush so that events from different threads are interleaved
2827 		 * properly.
2828 		 */
2829 		if (etm->per_thread_decoding && etm->timeless_decoding)
2830 			return cs_etm__process_timeless_queues(etm,
2831 							       event->fork.tid);
2832 		break;
2833 
2834 	case PERF_RECORD_ITRACE_START:
2835 		return cs_etm__process_itrace_start(etm, event);
2836 
2837 	case PERF_RECORD_SWITCH_CPU_WIDE:
2838 		return cs_etm__process_switch_cpu_wide(etm, event);
2839 
2840 	case PERF_RECORD_AUX:
2841 		/*
2842 		 * Record the latest kernel timestamp available in the header
2843 		 * for samples so that synthesised samples occur from this point
2844 		 * onwards.
2845 		 */
2846 		if (sample->time && (sample->time != (u64)-1))
2847 			etm->latest_kernel_timestamp = sample->time;
2848 		break;
2849 
2850 	default:
2851 		break;
2852 	}
2853 
2854 	return 0;
2855 }
2856 
2857 static void dump_queued_data(struct cs_etm_auxtrace *etm,
2858 			     struct perf_record_auxtrace *event)
2859 {
2860 	struct auxtrace_buffer *buf;
2861 	unsigned int i;
2862 	/*
2863 	 * Find all buffers with same reference in the queues and dump them.
2864 	 * This is because the queues can contain multiple entries of the same
2865 	 * buffer that were split on aux records.
2866 	 */
2867 	for (i = 0; i < etm->queues.nr_queues; ++i)
2868 		list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
2869 			if (buf->reference == event->reference)
2870 				cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
2871 }
2872 
2873 static int cs_etm__process_auxtrace_event(struct perf_session *session,
2874 					  union perf_event *event,
2875 					  const struct perf_tool *tool __maybe_unused)
2876 {
2877 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2878 						   struct cs_etm_auxtrace,
2879 						   auxtrace);
2880 	if (!etm->data_queued) {
2881 		struct auxtrace_buffer *buffer;
2882 		off_t  data_offset;
2883 		int fd = perf_data__fd(session->data);
2884 		bool is_pipe = perf_data__is_pipe(session->data);
2885 		int err;
2886 		int idx = event->auxtrace.idx;
2887 
2888 		if (is_pipe)
2889 			data_offset = 0;
2890 		else {
2891 			data_offset = lseek(fd, 0, SEEK_CUR);
2892 			if (data_offset == -1)
2893 				return -errno;
2894 		}
2895 
2896 		err = auxtrace_queues__add_event(&etm->queues, session,
2897 						 event, data_offset, &buffer);
2898 		if (err)
2899 			return err;
2900 
2901 		if (dump_trace)
2902 			if (auxtrace_buffer__get_data(buffer, fd)) {
2903 				cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
2904 				auxtrace_buffer__put_data(buffer);
2905 			}
2906 	} else if (dump_trace)
2907 		dump_queued_data(etm, &event->auxtrace);
2908 
2909 	return 0;
2910 }
2911 
2912 static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm)
2913 {
2914 	struct evsel *evsel;
2915 	struct evlist *evlist = etm->session->evlist;
2916 
2917 	/* Override timeless mode with user input from --itrace=Z */
2918 	if (etm->synth_opts.timeless_decoding) {
2919 		etm->timeless_decoding = true;
2920 		return 0;
2921 	}
2922 
2923 	/*
2924 	 * Find the cs_etm evsel and look at what its timestamp setting was
2925 	 */
2926 	evlist__for_each_entry(evlist, evsel)
2927 		if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) {
2928 			etm->timeless_decoding =
2929 				!(evsel->core.attr.config & BIT(ETM_OPT_TS));
2930 			return 0;
2931 		}
2932 
2933 	pr_err("CS ETM: Couldn't find ETM evsel\n");
2934 	return -EINVAL;
2935 }
2936 
2937 /*
2938  * Read a single cpu parameter block from the auxtrace_info priv block.
2939  *
2940  * For version 1 there is a per cpu nr_params entry. If we are handling
2941  * version 1 file, then there may be less, the same, or more params
2942  * indicated by this value than the compile time number we understand.
2943  *
2944  * For a version 0 info block, there are a fixed number, and we need to
2945  * fill out the nr_param value in the metadata we create.
2946  */
2947 static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
2948 				    int out_blk_size, int nr_params_v0)
2949 {
2950 	u64 *metadata = NULL;
2951 	int hdr_version;
2952 	int nr_in_params, nr_out_params, nr_cmn_params;
2953 	int i, k;
2954 
2955 	metadata = zalloc(sizeof(*metadata) * out_blk_size);
2956 	if (!metadata)
2957 		return NULL;
2958 
2959 	/* read block current index & version */
2960 	i = *buff_in_offset;
2961 	hdr_version = buff_in[CS_HEADER_VERSION];
2962 
2963 	if (!hdr_version) {
2964 	/* read version 0 info block into a version 1 metadata block  */
2965 		nr_in_params = nr_params_v0;
2966 		metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
2967 		metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
2968 		metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
2969 		/* remaining block params at offset +1 from source */
2970 		for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
2971 			metadata[k + 1] = buff_in[i + k];
2972 		/* version 0 has 2 common params */
2973 		nr_cmn_params = 2;
2974 	} else {
2975 	/* read version 1 info block - input and output nr_params may differ */
2976 		/* version 1 has 3 common params */
2977 		nr_cmn_params = 3;
2978 		nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
2979 
2980 		/* if input has more params than output - skip excess */
2981 		nr_out_params = nr_in_params + nr_cmn_params;
2982 		if (nr_out_params > out_blk_size)
2983 			nr_out_params = out_blk_size;
2984 
2985 		for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
2986 			metadata[k] = buff_in[i + k];
2987 
2988 		/* record the actual nr params we copied */
2989 		metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
2990 	}
2991 
2992 	/* adjust in offset by number of in params used */
2993 	i += nr_in_params + nr_cmn_params;
2994 	*buff_in_offset = i;
2995 	return metadata;
2996 }
2997 
2998 /**
2999  * Puts a fragment of an auxtrace buffer into the auxtrace queues based
3000  * on the bounds of aux_event, if it matches with the buffer that's at
3001  * file_offset.
3002  *
3003  * Normally, whole auxtrace buffers would be added to the queue. But we
3004  * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
3005  * is reset across each buffer, so splitting the buffers up in advance has
3006  * the same effect.
3007  */
3008 static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
3009 				      struct perf_record_aux *aux_event, struct perf_sample *sample)
3010 {
3011 	int err;
3012 	char buf[PERF_SAMPLE_MAX_SIZE];
3013 	union perf_event *auxtrace_event_union;
3014 	struct perf_record_auxtrace *auxtrace_event;
3015 	union perf_event auxtrace_fragment;
3016 	__u64 aux_offset, aux_size;
3017 	enum cs_etm_format format;
3018 
3019 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
3020 						   struct cs_etm_auxtrace,
3021 						   auxtrace);
3022 
3023 	/*
3024 	 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
3025 	 * from looping through the auxtrace index.
3026 	 */
3027 	err = perf_session__peek_event(session, file_offset, buf,
3028 				       PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL);
3029 	if (err)
3030 		return err;
3031 	auxtrace_event = &auxtrace_event_union->auxtrace;
3032 	if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
3033 		return -EINVAL;
3034 
3035 	if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) ||
3036 		auxtrace_event->header.size != sz) {
3037 		return -EINVAL;
3038 	}
3039 
3040 	/*
3041 	 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
3042 	 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
3043 	 * CPU as we set this always for the AUX_OUTPUT_HW_ID event.
3044 	 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
3045 	 * Return 'not found' if mismatch.
3046 	 */
3047 	if (auxtrace_event->cpu == (__u32) -1) {
3048 		etm->per_thread_decoding = true;
3049 		if (auxtrace_event->tid != sample->tid)
3050 			return 1;
3051 	} else if (auxtrace_event->cpu != sample->cpu) {
3052 		if (etm->per_thread_decoding) {
3053 			/*
3054 			 * Found a per-cpu buffer after a per-thread one was
3055 			 * already found
3056 			 */
3057 			pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n");
3058 			return -EINVAL;
3059 		}
3060 		return 1;
3061 	}
3062 
3063 	if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
3064 		/*
3065 		 * Clamp size in snapshot mode. The buffer size is clamped in
3066 		 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
3067 		 * the buffer size.
3068 		 */
3069 		aux_size = min(aux_event->aux_size, auxtrace_event->size);
3070 
3071 		/*
3072 		 * In this mode, the head also points to the end of the buffer so aux_offset
3073 		 * needs to have the size subtracted so it points to the beginning as in normal mode
3074 		 */
3075 		aux_offset = aux_event->aux_offset - aux_size;
3076 	} else {
3077 		aux_size = aux_event->aux_size;
3078 		aux_offset = aux_event->aux_offset;
3079 	}
3080 
3081 	if (aux_offset >= auxtrace_event->offset &&
3082 	    aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
3083 		struct cs_etm_queue *etmq = etm->queues.queue_array[auxtrace_event->idx].priv;
3084 
3085 		/*
3086 		 * If this AUX event was inside this buffer somewhere, create a new auxtrace event
3087 		 * based on the sizes of the aux event, and queue that fragment.
3088 		 */
3089 		auxtrace_fragment.auxtrace = *auxtrace_event;
3090 		auxtrace_fragment.auxtrace.size = aux_size;
3091 		auxtrace_fragment.auxtrace.offset = aux_offset;
3092 		file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
3093 
3094 		pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
3095 			  " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
3096 		err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
3097 						 file_offset, NULL);
3098 		if (err)
3099 			return err;
3100 
3101 		format = (aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW) ?
3102 				UNFORMATTED : FORMATTED;
3103 		if (etmq->format != UNSET && format != etmq->format) {
3104 			pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n");
3105 			return -EINVAL;
3106 		}
3107 		etmq->format = format;
3108 		return 0;
3109 	}
3110 
3111 	/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
3112 	return 1;
3113 }
3114 
3115 static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event,
3116 					u64 offset __maybe_unused, void *data __maybe_unused)
3117 {
3118 	/* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */
3119 	if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) {
3120 		(*(int *)data)++; /* increment found count */
3121 		return cs_etm__process_aux_output_hw_id(session, event);
3122 	}
3123 	return 0;
3124 }
3125 
3126 static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
3127 					u64 offset __maybe_unused, void *data __maybe_unused)
3128 {
3129 	struct perf_sample sample;
3130 	int ret;
3131 	struct auxtrace_index_entry *ent;
3132 	struct auxtrace_index *auxtrace_index;
3133 	struct evsel *evsel;
3134 	size_t i;
3135 
3136 	/* Don't care about any other events, we're only queuing buffers for AUX events */
3137 	if (event->header.type != PERF_RECORD_AUX)
3138 		return 0;
3139 
3140 	if (event->header.size < sizeof(struct perf_record_aux))
3141 		return -EINVAL;
3142 
3143 	/* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
3144 	if (!event->aux.aux_size)
3145 		return 0;
3146 
3147 	/*
3148 	 * Parse the sample, we need the sample_id_all data that comes after the event so that the
3149 	 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
3150 	 */
3151 	evsel = evlist__event2evsel(session->evlist, event);
3152 	if (!evsel)
3153 		return -EINVAL;
3154 	ret = evsel__parse_sample(evsel, event, &sample);
3155 	if (ret)
3156 		return ret;
3157 
3158 	/*
3159 	 * Loop through the auxtrace index to find the buffer that matches up with this aux event.
3160 	 */
3161 	list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
3162 		for (i = 0; i < auxtrace_index->nr; i++) {
3163 			ent = &auxtrace_index->entries[i];
3164 			ret = cs_etm__queue_aux_fragment(session, ent->file_offset,
3165 							 ent->sz, &event->aux, &sample);
3166 			/*
3167 			 * Stop search on error or successful values. Continue search on
3168 			 * 1 ('not found')
3169 			 */
3170 			if (ret != 1)
3171 				return ret;
3172 		}
3173 	}
3174 
3175 	/*
3176 	 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
3177 	 * don't exit with an error because it will still be possible to decode other aux records.
3178 	 */
3179 	pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
3180 	       " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
3181 	return 0;
3182 }
3183 
3184 static int cs_etm__queue_aux_records(struct perf_session *session)
3185 {
3186 	struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
3187 								struct auxtrace_index, list);
3188 	if (index && index->nr > 0)
3189 		return perf_session__peek_events(session, session->header.data_offset,
3190 						 session->header.data_size,
3191 						 cs_etm__queue_aux_records_cb, NULL);
3192 
3193 	/*
3194 	 * We would get here if there are no entries in the index (either no auxtrace
3195 	 * buffers or no index at all). Fail silently as there is the possibility of
3196 	 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
3197 	 * false.
3198 	 *
3199 	 * In that scenario, buffers will not be split by AUX records.
3200 	 */
3201 	return 0;
3202 }
3203 
3204 #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
3205 				  (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
3206 
3207 /*
3208  * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
3209  * timestamps).
3210  */
3211 static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
3212 {
3213 	int j;
3214 
3215 	for (j = 0; j < num_cpu; j++) {
3216 		switch (metadata[j][CS_ETM_MAGIC]) {
3217 		case __perf_cs_etmv4_magic:
3218 			if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
3219 				return false;
3220 			break;
3221 		case __perf_cs_ete_magic:
3222 			if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1)
3223 				return false;
3224 			break;
3225 		default:
3226 			/* Unknown / unsupported magic number. */
3227 			return false;
3228 		}
3229 	}
3230 	return true;
3231 }
3232 
3233 /* map trace ids to correct metadata block, from information in metadata */
3234 static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_cpu,
3235 					  u64 **metadata)
3236 {
3237 	u64 cs_etm_magic;
3238 	u8 trace_chan_id;
3239 	int i, err;
3240 
3241 	for (i = 0; i < num_cpu; i++) {
3242 		cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3243 		switch (cs_etm_magic) {
3244 		case __perf_cs_etmv3_magic:
3245 			metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3246 			trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]);
3247 			break;
3248 		case __perf_cs_etmv4_magic:
3249 		case __perf_cs_ete_magic:
3250 			metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3251 			trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]);
3252 			break;
3253 		default:
3254 			/* unknown magic number */
3255 			return -EINVAL;
3256 		}
3257 		err = cs_etm__map_trace_id_v0(etm, trace_chan_id, metadata[i]);
3258 		if (err)
3259 			return err;
3260 	}
3261 	return 0;
3262 }
3263 
3264 /*
3265  * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX
3266  * (formatted or not) packets to create the decoders.
3267  */
3268 static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq)
3269 {
3270 	struct cs_etm_decoder_params d_params;
3271 	struct cs_etm_trace_params  *t_params;
3272 	int decoders = intlist__nr_entries(etmq->traceid_list);
3273 
3274 	if (decoders == 0)
3275 		return 0;
3276 
3277 	/*
3278 	 * Each queue can only contain data from one CPU when unformatted, so only one decoder is
3279 	 * needed.
3280 	 */
3281 	if (etmq->format == UNFORMATTED)
3282 		assert(decoders == 1);
3283 
3284 	/* Use metadata to fill in trace parameters for trace decoder */
3285 	t_params = zalloc(sizeof(*t_params) * decoders);
3286 
3287 	if (!t_params)
3288 		goto out_free;
3289 
3290 	if (cs_etm__init_trace_params(t_params, etmq))
3291 		goto out_free;
3292 
3293 	/* Set decoder parameters to decode trace packets */
3294 	if (cs_etm__init_decoder_params(&d_params, etmq,
3295 					dump_trace ? CS_ETM_OPERATION_PRINT :
3296 						     CS_ETM_OPERATION_DECODE))
3297 		goto out_free;
3298 
3299 	etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
3300 					    t_params);
3301 
3302 	if (!etmq->decoder)
3303 		goto out_free;
3304 
3305 	/*
3306 	 * Register a function to handle all memory accesses required by
3307 	 * the trace decoder library.
3308 	 */
3309 	if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
3310 					      0x0L, ((u64) -1L),
3311 					      cs_etm__mem_access))
3312 		goto out_free_decoder;
3313 
3314 	zfree(&t_params);
3315 	return 0;
3316 
3317 out_free_decoder:
3318 	cs_etm_decoder__free(etmq->decoder);
3319 out_free:
3320 	zfree(&t_params);
3321 	return -EINVAL;
3322 }
3323 
3324 static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm)
3325 {
3326 	struct auxtrace_queues *queues = &etm->queues;
3327 
3328 	for (unsigned int i = 0; i < queues->nr_queues; i++) {
3329 		bool empty = list_empty(&queues->queue_array[i].head);
3330 		struct cs_etm_queue *etmq = queues->queue_array[i].priv;
3331 		int ret;
3332 
3333 		/*
3334 		 * Don't create decoders for empty queues, mainly because
3335 		 * etmq->format is unknown for empty queues.
3336 		 */
3337 		assert(empty || etmq->format != UNSET);
3338 		if (empty)
3339 			continue;
3340 
3341 		ret = cs_etm__create_queue_decoders(etmq);
3342 		if (ret)
3343 			return ret;
3344 	}
3345 	return 0;
3346 }
3347 
3348 int cs_etm__process_auxtrace_info_full(union perf_event *event,
3349 				       struct perf_session *session)
3350 {
3351 	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
3352 	struct cs_etm_auxtrace *etm = NULL;
3353 	struct perf_record_time_conv *tc = &session->time_conv;
3354 	int event_header_size = sizeof(struct perf_event_header);
3355 	int total_size = auxtrace_info->header.size;
3356 	int priv_size = 0;
3357 	int num_cpu, max_cpu = 0;
3358 	int err = 0;
3359 	int aux_hw_id_found;
3360 	int i;
3361 	u64 *ptr = NULL;
3362 	u64 **metadata = NULL;
3363 
3364 	/* First the global part */
3365 	ptr = (u64 *) auxtrace_info->priv;
3366 	num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
3367 	metadata = zalloc(sizeof(*metadata) * num_cpu);
3368 	if (!metadata)
3369 		return -ENOMEM;
3370 
3371 	/* Start parsing after the common part of the header */
3372 	i = CS_HEADER_VERSION_MAX;
3373 
3374 	/*
3375 	 * The metadata is stored in the auxtrace_info section and encodes
3376 	 * the configuration of the ARM embedded trace macrocell which is
3377 	 * required by the trace decoder to properly decode the trace due
3378 	 * to its highly compressed nature.
3379 	 */
3380 	for (int j = 0; j < num_cpu; j++) {
3381 		if (ptr[i] == __perf_cs_etmv3_magic) {
3382 			metadata[j] =
3383 				cs_etm__create_meta_blk(ptr, &i,
3384 							CS_ETM_PRIV_MAX,
3385 							CS_ETM_NR_TRC_PARAMS_V0);
3386 		} else if (ptr[i] == __perf_cs_etmv4_magic) {
3387 			metadata[j] =
3388 				cs_etm__create_meta_blk(ptr, &i,
3389 							CS_ETMV4_PRIV_MAX,
3390 							CS_ETMV4_NR_TRC_PARAMS_V0);
3391 		} else if (ptr[i] == __perf_cs_ete_magic) {
3392 			metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
3393 		} else {
3394 			ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
3395 				  ptr[i]);
3396 			err = -EINVAL;
3397 			goto err_free_metadata;
3398 		}
3399 
3400 		if (!metadata[j]) {
3401 			err = -ENOMEM;
3402 			goto err_free_metadata;
3403 		}
3404 
3405 		if ((int) metadata[j][CS_ETM_CPU] > max_cpu)
3406 			max_cpu = metadata[j][CS_ETM_CPU];
3407 	}
3408 
3409 	/*
3410 	 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
3411 	 * CS_ETMV4_PRIV_MAX mark how many double words are in the
3412 	 * global metadata, and each cpu's metadata respectively.
3413 	 * The following tests if the correct number of double words was
3414 	 * present in the auxtrace info section.
3415 	 */
3416 	priv_size = total_size - event_header_size - INFO_HEADER_SIZE;
3417 	if (i * 8 != priv_size) {
3418 		err = -EINVAL;
3419 		goto err_free_metadata;
3420 	}
3421 
3422 	etm = zalloc(sizeof(*etm));
3423 
3424 	if (!etm) {
3425 		err = -ENOMEM;
3426 		goto err_free_metadata;
3427 	}
3428 
3429 	/*
3430 	 * As all the ETMs run at the same exception level, the system should
3431 	 * have the same PID format crossing CPUs.  So cache the PID format
3432 	 * and reuse it for sequential decoding.
3433 	 */
3434 	etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
3435 
3436 	err = auxtrace_queues__init_nr(&etm->queues, max_cpu + 1);
3437 	if (err)
3438 		goto err_free_etm;
3439 
3440 	for (unsigned int j = 0; j < etm->queues.nr_queues; ++j) {
3441 		err = cs_etm__setup_queue(etm, &etm->queues.queue_array[j], j);
3442 		if (err)
3443 			goto err_free_queues;
3444 	}
3445 
3446 	if (session->itrace_synth_opts->set) {
3447 		etm->synth_opts = *session->itrace_synth_opts;
3448 	} else {
3449 		itrace_synth_opts__set_default(&etm->synth_opts,
3450 				session->itrace_synth_opts->default_no_sample);
3451 		etm->synth_opts.callchain = false;
3452 	}
3453 
3454 	etm->session = session;
3455 
3456 	etm->num_cpu = num_cpu;
3457 	etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
3458 	etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
3459 	etm->metadata = metadata;
3460 	etm->auxtrace_type = auxtrace_info->type;
3461 
3462 	if (etm->synth_opts.use_timestamp)
3463 		/*
3464 		 * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature,
3465 		 * therefore the decoder cannot know if the timestamp trace is
3466 		 * same with the kernel time.
3467 		 *
3468 		 * If a user has knowledge for the working platform and can
3469 		 * specify itrace option 'T' to tell decoder to forcely use the
3470 		 * traced timestamp as the kernel time.
3471 		 */
3472 		etm->has_virtual_ts = true;
3473 	else
3474 		/* Use virtual timestamps if all ETMs report ts_source = 1 */
3475 		etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
3476 
3477 	if (!etm->has_virtual_ts)
3478 		ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
3479 			    "The time field of the samples will not be set accurately.\n"
3480 			    "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n"
3481 			    "you can specify the itrace option 'T' for timestamp decoding\n"
3482 			    "if the Coresight timestamp on the platform is same with the kernel time.\n\n");
3483 
3484 	etm->auxtrace.process_event = cs_etm__process_event;
3485 	etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
3486 	etm->auxtrace.flush_events = cs_etm__flush_events;
3487 	etm->auxtrace.free_events = cs_etm__free_events;
3488 	etm->auxtrace.free = cs_etm__free;
3489 	etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
3490 	session->auxtrace = &etm->auxtrace;
3491 
3492 	err = cs_etm__setup_timeless_decoding(etm);
3493 	if (err)
3494 		return err;
3495 
3496 	etm->tc.time_shift = tc->time_shift;
3497 	etm->tc.time_mult = tc->time_mult;
3498 	etm->tc.time_zero = tc->time_zero;
3499 	if (event_contains(*tc, time_cycles)) {
3500 		etm->tc.time_cycles = tc->time_cycles;
3501 		etm->tc.time_mask = tc->time_mask;
3502 		etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
3503 		etm->tc.cap_user_time_short = tc->cap_user_time_short;
3504 	}
3505 	err = cs_etm__synth_events(etm, session);
3506 	if (err)
3507 		goto err_free_queues;
3508 
3509 	err = cs_etm__queue_aux_records(session);
3510 	if (err)
3511 		goto err_free_queues;
3512 
3513 	/*
3514 	 * Map Trace ID values to CPU metadata.
3515 	 *
3516 	 * Trace metadata will always contain Trace ID values from the legacy algorithm
3517 	 * in case it's read by a version of Perf that doesn't know about HW_ID packets
3518 	 * or the kernel doesn't emit them.
3519 	 *
3520 	 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
3521 	 * the same IDs as the old algorithm as far as is possible, unless there are clashes
3522 	 * in which case a different value will be used. This means an older perf may still
3523 	 * be able to record and read files generate on a newer system.
3524 	 *
3525 	 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of
3526 	 * those packets. If they are there then the values will be mapped and plugged into
3527 	 * the metadata and decoders are only created for each mapping received.
3528 	 *
3529 	 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
3530 	 * then we map Trace ID values to CPU directly from the metadata and create decoders
3531 	 * for all mappings.
3532 	 */
3533 
3534 	/* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
3535 	aux_hw_id_found = 0;
3536 	err = perf_session__peek_events(session, session->header.data_offset,
3537 					session->header.data_size,
3538 					cs_etm__process_aux_hw_id_cb, &aux_hw_id_found);
3539 	if (err)
3540 		goto err_free_queues;
3541 
3542 	/* if no HW ID found this is a file with metadata values only, map from metadata */
3543 	if (!aux_hw_id_found) {
3544 		err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata);
3545 		if (err)
3546 			goto err_free_queues;
3547 	}
3548 
3549 	err = cs_etm__create_decoders(etm);
3550 	if (err)
3551 		goto err_free_queues;
3552 
3553 	etm->data_queued = etm->queues.populated;
3554 	return 0;
3555 
3556 err_free_queues:
3557 	auxtrace_queues__free(&etm->queues);
3558 	session->auxtrace = NULL;
3559 err_free_etm:
3560 	zfree(&etm);
3561 err_free_metadata:
3562 	/* No need to check @metadata[j], free(NULL) is supported */
3563 	for (int j = 0; j < num_cpu; j++)
3564 		zfree(&metadata[j]);
3565 	zfree(&metadata);
3566 	return err;
3567 }
3568