xref: /linux/tools/perf/util/cs-etm.c (revision bf4afc53b77aeaa48b5409da5c8da6bb4eff7f43)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright(C) 2015-2018 Linaro Limited.
4  *
5  * Author: Tor Jeremiassen <tor@ti.com>
6  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
7  */
8 
9 #include <linux/bitfield.h>
10 #include <linux/bitops.h>
11 #include <linux/coresight-pmu.h>
12 #include <linux/err.h>
13 #include <linux/log2.h>
14 #include <linux/types.h>
15 #include <linux/zalloc.h>
16 
17 #include <stdlib.h>
18 
19 #include "auxtrace.h"
20 #include "color.h"
21 #include "cs-etm.h"
22 #include "cs-etm-decoder/cs-etm-decoder.h"
23 #include "debug.h"
24 #include "dso.h"
25 #include "evlist.h"
26 #include "intlist.h"
27 #include "machine.h"
28 #include "map.h"
29 #include "perf.h"
30 #include "session.h"
31 #include "map_symbol.h"
32 #include "branch.h"
33 #include "symbol.h"
34 #include "tool.h"
35 #include "thread.h"
36 #include "thread-stack.h"
37 #include "tsc.h"
38 #include <tools/libc_compat.h>
39 #include "util/synthetic-events.h"
40 #include "util/util.h"
41 
42 struct cs_etm_auxtrace {
43 	struct auxtrace auxtrace;
44 	struct auxtrace_queues queues;
45 	struct auxtrace_heap heap;
46 	struct itrace_synth_opts synth_opts;
47 	struct perf_session *session;
48 	struct perf_tsc_conversion tc;
49 
50 	/*
51 	 * Timeless has no timestamps in the trace so overlapping mmap lookups
52 	 * are less accurate but produces smaller trace data. We use context IDs
53 	 * in the trace instead of matching timestamps with fork records so
54 	 * they're not really needed in the general case. Overlapping mmaps
55 	 * happen in cases like between a fork and an exec.
56 	 */
57 	bool timeless_decoding;
58 
59 	/*
60 	 * Per-thread ignores the trace channel ID and instead assumes that
61 	 * everything in a buffer comes from the same process regardless of
62 	 * which CPU it ran on. It also implies no context IDs so the TID is
63 	 * taken from the auxtrace buffer.
64 	 */
65 	bool per_thread_decoding;
66 	bool snapshot_mode;
67 	bool data_queued;
68 	bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
69 
70 	int num_cpu;
71 	u64 latest_kernel_timestamp;
72 	u32 auxtrace_type;
73 	u64 branches_sample_type;
74 	u64 branches_id;
75 	u64 instructions_sample_type;
76 	u64 instructions_sample_period;
77 	u64 instructions_id;
78 	u64 **metadata;
79 	unsigned int pmu_type;
80 	enum cs_etm_pid_fmt pid_fmt;
81 };
82 
83 struct cs_etm_traceid_queue {
84 	u8 trace_chan_id;
85 	u64 period_instructions;
86 	size_t last_branch_pos;
87 	union perf_event *event_buf;
88 	struct thread *thread;
89 	struct thread *prev_packet_thread;
90 	ocsd_ex_level prev_packet_el;
91 	ocsd_ex_level el;
92 	struct branch_stack *last_branch;
93 	struct branch_stack *last_branch_rb;
94 	struct cs_etm_packet *prev_packet;
95 	struct cs_etm_packet *packet;
96 	struct cs_etm_packet_queue packet_queue;
97 };
98 
99 enum cs_etm_format {
100 	UNSET,
101 	FORMATTED,
102 	UNFORMATTED
103 };
104 
105 struct cs_etm_queue {
106 	struct cs_etm_auxtrace *etm;
107 	struct cs_etm_decoder *decoder;
108 	struct auxtrace_buffer *buffer;
109 	unsigned int queue_nr;
110 	u8 pending_timestamp_chan_id;
111 	enum cs_etm_format format;
112 	u64 offset;
113 	const unsigned char *buf;
114 	size_t buf_len, buf_used;
115 	/* Conversion between traceID and index in traceid_queues array */
116 	struct intlist *traceid_queues_list;
117 	struct cs_etm_traceid_queue **traceid_queues;
118 	/* Conversion between traceID and metadata pointers */
119 	struct intlist *traceid_list;
120 	/*
121 	 * Same as traceid_list, but traceid_list may be a reference to another
122 	 * queue's which has a matching sink ID.
123 	 */
124 	struct intlist *own_traceid_list;
125 	u32 sink_id;
126 };
127 
128 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
129 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
130 					   pid_t tid);
131 static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
132 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
133 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata);
134 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu);
135 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata);
136 
137 /* PTMs ETMIDR [11:8] set to b0011 */
138 #define ETMIDR_PTM_VERSION 0x00000300
139 
140 /*
141  * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
142  * work with.  One option is to modify to auxtrace_heap_XYZ() API or simply
143  * encode the etm queue number as the upper 16 bit and the channel as
144  * the lower 16 bit.
145  */
146 #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id)	\
147 		      (queue_nr << 16 | trace_chan_id)
148 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
149 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
150 #define SINK_UNSET ((u32) -1)
151 
152 static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
153 {
154 	etmidr &= ETMIDR_PTM_VERSION;
155 
156 	if (etmidr == ETMIDR_PTM_VERSION)
157 		return CS_ETM_PROTO_PTM;
158 
159 	return CS_ETM_PROTO_ETMV3;
160 }
161 
162 static int cs_etm__get_magic(struct cs_etm_queue *etmq, u8 trace_chan_id, u64 *magic)
163 {
164 	struct int_node *inode;
165 	u64 *metadata;
166 
167 	inode = intlist__find(etmq->traceid_list, trace_chan_id);
168 	if (!inode)
169 		return -EINVAL;
170 
171 	metadata = inode->priv;
172 	*magic = metadata[CS_ETM_MAGIC];
173 	return 0;
174 }
175 
176 int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu)
177 {
178 	struct int_node *inode;
179 	u64 *metadata;
180 
181 	inode = intlist__find(etmq->traceid_list, trace_chan_id);
182 	if (!inode)
183 		return -EINVAL;
184 
185 	metadata = inode->priv;
186 	*cpu = (int)metadata[CS_ETM_CPU];
187 	return 0;
188 }
189 
190 /*
191  * The returned PID format is presented as an enum:
192  *
193  *   CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced.
194  *   CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced.
195  *   CS_ETM_PIDFMT_NONE: No context IDs
196  *
197  * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
198  * are enabled at the same time when the session runs on an EL2 kernel.
199  * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
200  * recorded in the trace data, the tool will selectively use
201  * CONTEXTIDR_EL2 as PID.
202  *
203  * The result is cached in etm->pid_fmt so this function only needs to be called
204  * when processing the aux info.
205  */
206 static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata)
207 {
208 	u64 val;
209 
210 	if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
211 		val = metadata[CS_ETM_ETMCR];
212 		/* CONTEXTIDR is traced */
213 		if (val & BIT(ETM_OPT_CTXTID))
214 			return CS_ETM_PIDFMT_CTXTID;
215 	} else {
216 		val = metadata[CS_ETMV4_TRCCONFIGR];
217 		/* CONTEXTIDR_EL2 is traced */
218 		if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
219 			return CS_ETM_PIDFMT_CTXTID2;
220 		/* CONTEXTIDR_EL1 is traced */
221 		else if (val & BIT(ETM4_CFG_BIT_CTXTID))
222 			return CS_ETM_PIDFMT_CTXTID;
223 	}
224 
225 	return CS_ETM_PIDFMT_NONE;
226 }
227 
228 enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
229 {
230 	return etmq->etm->pid_fmt;
231 }
232 
233 static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq,
234 					u8 trace_chan_id, u64 *cpu_metadata)
235 {
236 	/* Get an RB node for this CPU */
237 	struct int_node *inode = intlist__findnew(etmq->traceid_list, trace_chan_id);
238 
239 	/* Something went wrong, no need to continue */
240 	if (!inode)
241 		return -ENOMEM;
242 
243 	/* Disallow re-mapping a different traceID to metadata pair. */
244 	if (inode->priv) {
245 		u64 *curr_cpu_data = inode->priv;
246 		u8 curr_chan_id;
247 		int err;
248 
249 		if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) {
250 			/*
251 			 * With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs
252 			 * are expected (but not supported) in per-thread mode,
253 			 * rather than signifying an error.
254 			 */
255 			if (etmq->etm->per_thread_decoding)
256 				pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n");
257 			else
258 				pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
259 
260 			return -EINVAL;
261 		}
262 
263 		/* check that the mapped ID matches */
264 		err = cs_etm__metadata_get_trace_id(&curr_chan_id, curr_cpu_data);
265 		if (err)
266 			return err;
267 
268 		if (curr_chan_id != trace_chan_id) {
269 			pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
270 			return -EINVAL;
271 		}
272 
273 		/* Skip re-adding the same mappings if everything matched */
274 		return 0;
275 	}
276 
277 	/* Not one we've seen before, associate the traceID with the metadata pointer */
278 	inode->priv = cpu_metadata;
279 
280 	return 0;
281 }
282 
283 static struct cs_etm_queue *cs_etm__get_queue(struct cs_etm_auxtrace *etm, int cpu)
284 {
285 	if (etm->per_thread_decoding)
286 		return etm->queues.queue_array[0].priv;
287 	else
288 		return etm->queues.queue_array[cpu].priv;
289 }
290 
291 static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace *etm, u8 trace_chan_id,
292 				   u64 *cpu_metadata)
293 {
294 	struct cs_etm_queue *etmq;
295 
296 	/*
297 	 * If the queue is unformatted then only save one mapping in the
298 	 * queue associated with that CPU so only one decoder is made.
299 	 */
300 	etmq = cs_etm__get_queue(etm, cpu_metadata[CS_ETM_CPU]);
301 	if (etmq->format == UNFORMATTED)
302 		return cs_etm__insert_trace_id_node(etmq, trace_chan_id,
303 						    cpu_metadata);
304 
305 	/*
306 	 * Otherwise, version 0 trace IDs are global so save them into every
307 	 * queue.
308 	 */
309 	for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
310 		int ret;
311 
312 		etmq = etm->queues.queue_array[i].priv;
313 		ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id,
314 						   cpu_metadata);
315 		if (ret)
316 			return ret;
317 	}
318 
319 	return 0;
320 }
321 
322 static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu,
323 				       u64 hw_id)
324 {
325 	int err;
326 	u64 *cpu_data;
327 	u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
328 
329 	cpu_data = get_cpu_data(etm, cpu);
330 	if (cpu_data == NULL)
331 		return -EINVAL;
332 
333 	err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_data);
334 	if (err)
335 		return err;
336 
337 	/*
338 	 * if we are picking up the association from the packet, need to plug
339 	 * the correct trace ID into the metadata for setting up decoders later.
340 	 */
341 	return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
342 }
343 
344 static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace *etm, int cpu,
345 					 u64 hw_id)
346 {
347 	struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu);
348 	int ret;
349 	u64 *cpu_data;
350 	u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id);
351 	u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
352 
353 	/*
354 	 * Check sink id hasn't changed in per-cpu mode. In per-thread mode,
355 	 * let it pass for now until an actual overlapping trace ID is hit. In
356 	 * most cases IDs won't overlap even if the sink changes.
357 	 */
358 	if (!etmq->etm->per_thread_decoding && etmq->sink_id != SINK_UNSET &&
359 	    etmq->sink_id != sink_id) {
360 		pr_err("CS_ETM: mismatch between sink IDs\n");
361 		return -EINVAL;
362 	}
363 
364 	etmq->sink_id = sink_id;
365 
366 	/* Find which other queues use this sink and link their ID maps */
367 	for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
368 		struct cs_etm_queue *other_etmq = etm->queues.queue_array[i].priv;
369 
370 		/* Different sinks, skip */
371 		if (other_etmq->sink_id != etmq->sink_id)
372 			continue;
373 
374 		/* Already linked, skip */
375 		if (other_etmq->traceid_list == etmq->traceid_list)
376 			continue;
377 
378 		/* At the point of first linking, this one should be empty */
379 		if (!intlist__empty(etmq->traceid_list)) {
380 			pr_err("CS_ETM: Can't link populated trace ID lists\n");
381 			return -EINVAL;
382 		}
383 
384 		etmq->own_traceid_list = NULL;
385 		intlist__delete(etmq->traceid_list);
386 		etmq->traceid_list = other_etmq->traceid_list;
387 		break;
388 	}
389 
390 	cpu_data = get_cpu_data(etm, cpu);
391 	ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data);
392 	if (ret)
393 		return ret;
394 
395 	ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data);
396 	if (ret)
397 		return ret;
398 
399 	return 0;
400 }
401 
402 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
403 {
404 	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
405 
406 	switch (cs_etm_magic) {
407 	case __perf_cs_etmv3_magic:
408 		*trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
409 				      CORESIGHT_TRACE_ID_VAL_MASK);
410 		break;
411 	case __perf_cs_etmv4_magic:
412 	case __perf_cs_ete_magic:
413 		*trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
414 				      CORESIGHT_TRACE_ID_VAL_MASK);
415 		break;
416 	default:
417 		return -EINVAL;
418 	}
419 	return 0;
420 }
421 
422 /*
423  * update metadata trace ID from the value found in the AUX_HW_INFO packet.
424  */
425 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
426 {
427 	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
428 
429 	switch (cs_etm_magic) {
430 	case __perf_cs_etmv3_magic:
431 		 cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id;
432 		break;
433 	case __perf_cs_etmv4_magic:
434 	case __perf_cs_ete_magic:
435 		cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id;
436 		break;
437 
438 	default:
439 		return -EINVAL;
440 	}
441 	return 0;
442 }
443 
444 /*
445  * Get a metadata index for a specific cpu from an array.
446  *
447  */
448 static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu)
449 {
450 	int i;
451 
452 	for (i = 0; i < etm->num_cpu; i++) {
453 		if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) {
454 			return i;
455 		}
456 	}
457 
458 	return -1;
459 }
460 
461 /*
462  * Get a metadata for a specific cpu from an array.
463  *
464  */
465 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu)
466 {
467 	int idx = get_cpu_data_idx(etm, cpu);
468 
469 	return (idx != -1) ? etm->metadata[idx] : NULL;
470 }
471 
472 /*
473  * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
474  *
475  * The payload associates the Trace ID and the CPU.
476  * The routine is tolerant of seeing multiple packets with the same association,
477  * but a CPU / Trace ID association changing during a session is an error.
478  */
479 static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
480 					    union perf_event *event)
481 {
482 	struct cs_etm_auxtrace *etm;
483 	struct perf_sample sample;
484 	struct evsel *evsel;
485 	u64 hw_id;
486 	int cpu, version, err;
487 
488 	/* extract and parse the HW ID */
489 	hw_id = event->aux_output_hw_id.hw_id;
490 	version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id);
491 
492 	/* check that we can handle this version */
493 	if (version > CS_AUX_HW_ID_MAJOR_VERSION) {
494 		pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n",
495 		       version);
496 		return -EINVAL;
497 	}
498 
499 	/* get access to the etm metadata */
500 	etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
501 	if (!etm || !etm->metadata)
502 		return -EINVAL;
503 
504 	/* parse the sample to get the CPU */
505 	evsel = evlist__event2evsel(session->evlist, event);
506 	if (!evsel)
507 		return -EINVAL;
508 	perf_sample__init(&sample, /*all=*/false);
509 	err = evsel__parse_sample(evsel, event, &sample);
510 	if (err)
511 		goto out;
512 	cpu = sample.cpu;
513 	if (cpu == -1) {
514 		/* no CPU in the sample - possibly recorded with an old version of perf */
515 		pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
516 		err = -EINVAL;
517 		goto out;
518 	}
519 
520 	if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0) {
521 		err = cs_etm__process_trace_id_v0(etm, cpu, hw_id);
522 		goto out;
523 	}
524 
525 	err = cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
526 out:
527 	perf_sample__exit(&sample);
528 	return err;
529 }
530 
531 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
532 					      u8 trace_chan_id)
533 {
534 	/*
535 	 * When a timestamp packet is encountered the backend code
536 	 * is stopped so that the front end has time to process packets
537 	 * that were accumulated in the traceID queue.  Since there can
538 	 * be more than one channel per cs_etm_queue, we need to specify
539 	 * what traceID queue needs servicing.
540 	 */
541 	etmq->pending_timestamp_chan_id = trace_chan_id;
542 }
543 
544 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
545 				      u8 *trace_chan_id)
546 {
547 	struct cs_etm_packet_queue *packet_queue;
548 
549 	if (!etmq->pending_timestamp_chan_id)
550 		return 0;
551 
552 	if (trace_chan_id)
553 		*trace_chan_id = etmq->pending_timestamp_chan_id;
554 
555 	packet_queue = cs_etm__etmq_get_packet_queue(etmq,
556 						     etmq->pending_timestamp_chan_id);
557 	if (!packet_queue)
558 		return 0;
559 
560 	/* Acknowledge pending status */
561 	etmq->pending_timestamp_chan_id = 0;
562 
563 	/* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
564 	return packet_queue->cs_timestamp;
565 }
566 
567 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
568 {
569 	int i;
570 
571 	queue->head = 0;
572 	queue->tail = 0;
573 	queue->packet_count = 0;
574 	for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
575 		queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
576 		queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
577 		queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
578 		queue->packet_buffer[i].instr_count = 0;
579 		queue->packet_buffer[i].last_instr_taken_branch = false;
580 		queue->packet_buffer[i].last_instr_size = 0;
581 		queue->packet_buffer[i].last_instr_type = 0;
582 		queue->packet_buffer[i].last_instr_subtype = 0;
583 		queue->packet_buffer[i].last_instr_cond = 0;
584 		queue->packet_buffer[i].flags = 0;
585 		queue->packet_buffer[i].exception_number = UINT32_MAX;
586 		queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
587 		queue->packet_buffer[i].cpu = INT_MIN;
588 	}
589 }
590 
591 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
592 {
593 	int idx;
594 	struct int_node *inode;
595 	struct cs_etm_traceid_queue *tidq;
596 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
597 
598 	intlist__for_each_entry(inode, traceid_queues_list) {
599 		idx = (int)(intptr_t)inode->priv;
600 		tidq = etmq->traceid_queues[idx];
601 		cs_etm__clear_packet_queue(&tidq->packet_queue);
602 	}
603 }
604 
605 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
606 				      struct cs_etm_traceid_queue *tidq,
607 				      u8 trace_chan_id)
608 {
609 	int rc = -ENOMEM;
610 	struct auxtrace_queue *queue;
611 	struct cs_etm_auxtrace *etm = etmq->etm;
612 
613 	cs_etm__clear_packet_queue(&tidq->packet_queue);
614 
615 	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
616 	tidq->trace_chan_id = trace_chan_id;
617 	tidq->el = tidq->prev_packet_el = ocsd_EL_unknown;
618 	tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
619 					       queue->tid);
620 	tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host);
621 
622 	tidq->packet = zalloc(sizeof(struct cs_etm_packet));
623 	if (!tidq->packet)
624 		goto out;
625 
626 	tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
627 	if (!tidq->prev_packet)
628 		goto out_free;
629 
630 	if (etm->synth_opts.last_branch) {
631 		size_t sz = sizeof(struct branch_stack);
632 
633 		sz += etm->synth_opts.last_branch_sz *
634 		      sizeof(struct branch_entry);
635 		tidq->last_branch = zalloc(sz);
636 		if (!tidq->last_branch)
637 			goto out_free;
638 		tidq->last_branch_rb = zalloc(sz);
639 		if (!tidq->last_branch_rb)
640 			goto out_free;
641 	}
642 
643 	tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
644 	if (!tidq->event_buf)
645 		goto out_free;
646 
647 	return 0;
648 
649 out_free:
650 	zfree(&tidq->last_branch_rb);
651 	zfree(&tidq->last_branch);
652 	zfree(&tidq->prev_packet);
653 	zfree(&tidq->packet);
654 out:
655 	return rc;
656 }
657 
658 static struct cs_etm_traceid_queue
659 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
660 {
661 	int idx;
662 	struct int_node *inode;
663 	struct intlist *traceid_queues_list;
664 	struct cs_etm_traceid_queue *tidq, **traceid_queues;
665 	struct cs_etm_auxtrace *etm = etmq->etm;
666 
667 	if (etm->per_thread_decoding)
668 		trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
669 
670 	traceid_queues_list = etmq->traceid_queues_list;
671 
672 	/*
673 	 * Check if the traceid_queue exist for this traceID by looking
674 	 * in the queue list.
675 	 */
676 	inode = intlist__find(traceid_queues_list, trace_chan_id);
677 	if (inode) {
678 		idx = (int)(intptr_t)inode->priv;
679 		return etmq->traceid_queues[idx];
680 	}
681 
682 	/* We couldn't find a traceid_queue for this traceID, allocate one */
683 	tidq = malloc(sizeof(*tidq));
684 	if (!tidq)
685 		return NULL;
686 
687 	memset(tidq, 0, sizeof(*tidq));
688 
689 	/* Get a valid index for the new traceid_queue */
690 	idx = intlist__nr_entries(traceid_queues_list);
691 	/* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
692 	inode = intlist__findnew(traceid_queues_list, trace_chan_id);
693 	if (!inode)
694 		goto out_free;
695 
696 	/* Associate this traceID with this index */
697 	inode->priv = (void *)(intptr_t)idx;
698 
699 	if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
700 		goto out_free;
701 
702 	/* Grow the traceid_queues array by one unit */
703 	traceid_queues = etmq->traceid_queues;
704 	traceid_queues = reallocarray(traceid_queues,
705 				      idx + 1,
706 				      sizeof(*traceid_queues));
707 
708 	/*
709 	 * On failure reallocarray() returns NULL and the original block of
710 	 * memory is left untouched.
711 	 */
712 	if (!traceid_queues)
713 		goto out_free;
714 
715 	traceid_queues[idx] = tidq;
716 	etmq->traceid_queues = traceid_queues;
717 
718 	return etmq->traceid_queues[idx];
719 
720 out_free:
721 	/*
722 	 * Function intlist__remove() removes the inode from the list
723 	 * and delete the memory associated to it.
724 	 */
725 	intlist__remove(traceid_queues_list, inode);
726 	free(tidq);
727 
728 	return NULL;
729 }
730 
731 struct cs_etm_packet_queue
732 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
733 {
734 	struct cs_etm_traceid_queue *tidq;
735 
736 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
737 	if (tidq)
738 		return &tidq->packet_queue;
739 
740 	return NULL;
741 }
742 
743 static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
744 				struct cs_etm_traceid_queue *tidq)
745 {
746 	struct cs_etm_packet *tmp;
747 
748 	if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
749 	    etm->synth_opts.instructions) {
750 		/*
751 		 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
752 		 * the next incoming packet.
753 		 *
754 		 * Threads and exception levels are also tracked for both the
755 		 * previous and current packets. This is because the previous
756 		 * packet is used for the 'from' IP for branch samples, so the
757 		 * thread at that time must also be assigned to that sample.
758 		 * Across discontinuity packets the thread can change, so by
759 		 * tracking the thread for the previous packet the branch sample
760 		 * will have the correct info.
761 		 */
762 		tmp = tidq->packet;
763 		tidq->packet = tidq->prev_packet;
764 		tidq->prev_packet = tmp;
765 		tidq->prev_packet_el = tidq->el;
766 		thread__put(tidq->prev_packet_thread);
767 		tidq->prev_packet_thread = thread__get(tidq->thread);
768 	}
769 }
770 
771 static void cs_etm__packet_dump(const char *pkt_string, void *data)
772 {
773 	const char *color = PERF_COLOR_BLUE;
774 	int len = strlen(pkt_string);
775 	struct cs_etm_queue *etmq = data;
776 	char queue_nr[64];
777 
778 	if (verbose)
779 		snprintf(queue_nr, sizeof(queue_nr), "Qnr:%u; ", etmq->queue_nr);
780 	else
781 		queue_nr[0] = '\0';
782 
783 	if (len && (pkt_string[len-1] == '\n'))
784 		color_fprintf(stdout, color, "	%s%s", queue_nr, pkt_string);
785 	else
786 		color_fprintf(stdout, color, "	%s%s\n", queue_nr, pkt_string);
787 
788 	fflush(stdout);
789 }
790 
791 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
792 					  u64 *metadata, u32 etmidr)
793 {
794 	t_params->protocol = cs_etm__get_v7_protocol_version(etmidr);
795 	t_params->etmv3.reg_ctrl = metadata[CS_ETM_ETMCR];
796 	t_params->etmv3.reg_trc_id = metadata[CS_ETM_ETMTRACEIDR];
797 }
798 
799 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
800 					  u64 *metadata)
801 {
802 	t_params->protocol = CS_ETM_PROTO_ETMV4i;
803 	t_params->etmv4.reg_idr0 = metadata[CS_ETMV4_TRCIDR0];
804 	t_params->etmv4.reg_idr1 = metadata[CS_ETMV4_TRCIDR1];
805 	t_params->etmv4.reg_idr2 = metadata[CS_ETMV4_TRCIDR2];
806 	t_params->etmv4.reg_idr8 = metadata[CS_ETMV4_TRCIDR8];
807 	t_params->etmv4.reg_configr = metadata[CS_ETMV4_TRCCONFIGR];
808 	t_params->etmv4.reg_traceidr = metadata[CS_ETMV4_TRCTRACEIDR];
809 }
810 
811 static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
812 					u64 *metadata)
813 {
814 	t_params->protocol = CS_ETM_PROTO_ETE;
815 	t_params->ete.reg_idr0 = metadata[CS_ETE_TRCIDR0];
816 	t_params->ete.reg_idr1 = metadata[CS_ETE_TRCIDR1];
817 	t_params->ete.reg_idr2 = metadata[CS_ETE_TRCIDR2];
818 	t_params->ete.reg_idr8 = metadata[CS_ETE_TRCIDR8];
819 	t_params->ete.reg_configr = metadata[CS_ETE_TRCCONFIGR];
820 	t_params->ete.reg_traceidr = metadata[CS_ETE_TRCTRACEIDR];
821 	t_params->ete.reg_devarch = metadata[CS_ETE_TRCDEVARCH];
822 }
823 
824 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
825 				     struct cs_etm_queue *etmq)
826 {
827 	struct int_node *inode;
828 
829 	intlist__for_each_entry(inode, etmq->traceid_list) {
830 		u64 *metadata = inode->priv;
831 		u64 architecture = metadata[CS_ETM_MAGIC];
832 		u32 etmidr;
833 
834 		switch (architecture) {
835 		case __perf_cs_etmv3_magic:
836 			etmidr = metadata[CS_ETM_ETMIDR];
837 			cs_etm__set_trace_param_etmv3(t_params++, metadata, etmidr);
838 			break;
839 		case __perf_cs_etmv4_magic:
840 			cs_etm__set_trace_param_etmv4(t_params++, metadata);
841 			break;
842 		case __perf_cs_ete_magic:
843 			cs_etm__set_trace_param_ete(t_params++, metadata);
844 			break;
845 		default:
846 			return -EINVAL;
847 		}
848 	}
849 
850 	return 0;
851 }
852 
853 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
854 				       struct cs_etm_queue *etmq,
855 				       enum cs_etm_decoder_operation mode)
856 {
857 	int ret = -EINVAL;
858 
859 	if (!(mode < CS_ETM_OPERATION_MAX))
860 		goto out;
861 
862 	d_params->packet_printer = cs_etm__packet_dump;
863 	d_params->operation = mode;
864 	d_params->data = etmq;
865 	d_params->formatted = etmq->format == FORMATTED;
866 	d_params->fsyncs = false;
867 	d_params->hsyncs = false;
868 	d_params->frame_aligned = true;
869 
870 	ret = 0;
871 out:
872 	return ret;
873 }
874 
875 static void cs_etm__dump_event(struct cs_etm_queue *etmq,
876 			       struct auxtrace_buffer *buffer)
877 {
878 	int ret;
879 	const char *color = PERF_COLOR_BLUE;
880 	size_t buffer_used = 0;
881 
882 	fprintf(stdout, "\n");
883 	color_fprintf(stdout, color,
884 		     ". ... CoreSight %s Trace data: size %#zx bytes\n",
885 		     cs_etm_decoder__get_name(etmq->decoder), buffer->size);
886 
887 	do {
888 		size_t consumed;
889 
890 		ret = cs_etm_decoder__process_data_block(
891 				etmq->decoder, buffer->offset,
892 				&((u8 *)buffer->data)[buffer_used],
893 				buffer->size - buffer_used, &consumed);
894 		if (ret)
895 			break;
896 
897 		buffer_used += consumed;
898 	} while (buffer_used < buffer->size);
899 
900 	cs_etm_decoder__reset(etmq->decoder);
901 }
902 
903 static int cs_etm__flush_events(struct perf_session *session,
904 				const struct perf_tool *tool)
905 {
906 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
907 						   struct cs_etm_auxtrace,
908 						   auxtrace);
909 	if (dump_trace)
910 		return 0;
911 
912 	if (!tool->ordered_events)
913 		return -EINVAL;
914 
915 	if (etm->timeless_decoding) {
916 		/*
917 		 * Pass tid = -1 to process all queues. But likely they will have
918 		 * already been processed on PERF_RECORD_EXIT anyway.
919 		 */
920 		return cs_etm__process_timeless_queues(etm, -1);
921 	}
922 
923 	return cs_etm__process_timestamped_queues(etm);
924 }
925 
926 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
927 {
928 	int idx;
929 	uintptr_t priv;
930 	struct int_node *inode, *tmp;
931 	struct cs_etm_traceid_queue *tidq;
932 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
933 
934 	intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
935 		priv = (uintptr_t)inode->priv;
936 		idx = priv;
937 
938 		/* Free this traceid_queue from the array */
939 		tidq = etmq->traceid_queues[idx];
940 		thread__zput(tidq->thread);
941 		thread__zput(tidq->prev_packet_thread);
942 		zfree(&tidq->event_buf);
943 		zfree(&tidq->last_branch);
944 		zfree(&tidq->last_branch_rb);
945 		zfree(&tidq->prev_packet);
946 		zfree(&tidq->packet);
947 		zfree(&tidq);
948 
949 		/*
950 		 * Function intlist__remove() removes the inode from the list
951 		 * and delete the memory associated to it.
952 		 */
953 		intlist__remove(traceid_queues_list, inode);
954 	}
955 
956 	/* Then the RB tree itself */
957 	intlist__delete(traceid_queues_list);
958 	etmq->traceid_queues_list = NULL;
959 
960 	/* finally free the traceid_queues array */
961 	zfree(&etmq->traceid_queues);
962 }
963 
964 static void cs_etm__free_queue(void *priv)
965 {
966 	struct int_node *inode, *tmp;
967 	struct cs_etm_queue *etmq = priv;
968 
969 	if (!etmq)
970 		return;
971 
972 	cs_etm_decoder__free(etmq->decoder);
973 	cs_etm__free_traceid_queues(etmq);
974 
975 	if (etmq->own_traceid_list) {
976 		/* First remove all traceID/metadata nodes for the RB tree */
977 		intlist__for_each_entry_safe(inode, tmp, etmq->own_traceid_list)
978 			intlist__remove(etmq->own_traceid_list, inode);
979 
980 		/* Then the RB tree itself */
981 		intlist__delete(etmq->own_traceid_list);
982 	}
983 
984 	free(etmq);
985 }
986 
987 static void cs_etm__free_events(struct perf_session *session)
988 {
989 	unsigned int i;
990 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
991 						   struct cs_etm_auxtrace,
992 						   auxtrace);
993 	struct auxtrace_queues *queues = &aux->queues;
994 
995 	for (i = 0; i < queues->nr_queues; i++) {
996 		cs_etm__free_queue(queues->queue_array[i].priv);
997 		queues->queue_array[i].priv = NULL;
998 	}
999 
1000 	auxtrace_queues__free(queues);
1001 }
1002 
1003 static void cs_etm__free(struct perf_session *session)
1004 {
1005 	int i;
1006 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
1007 						   struct cs_etm_auxtrace,
1008 						   auxtrace);
1009 	cs_etm__free_events(session);
1010 	session->auxtrace = NULL;
1011 
1012 	for (i = 0; i < aux->num_cpu; i++)
1013 		zfree(&aux->metadata[i]);
1014 
1015 	zfree(&aux->metadata);
1016 	zfree(&aux);
1017 }
1018 
1019 static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
1020 				      struct evsel *evsel)
1021 {
1022 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
1023 						   struct cs_etm_auxtrace,
1024 						   auxtrace);
1025 
1026 	return evsel->core.attr.type == aux->pmu_type;
1027 }
1028 
1029 static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq,
1030 					   ocsd_ex_level el)
1031 {
1032 	enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq);
1033 
1034 	/*
1035 	 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels
1036 	 * running at EL1 assume everything is the host.
1037 	 */
1038 	if (pid_fmt == CS_ETM_PIDFMT_CTXTID)
1039 		return &etmq->etm->session->machines.host;
1040 
1041 	/*
1042 	 * Not perfect, but otherwise assume anything in EL1 is the default
1043 	 * guest, and everything else is the host. Distinguishing between guest
1044 	 * and host userspaces isn't currently supported either. Neither is
1045 	 * multiple guest support. All this does is reduce the likeliness of
1046 	 * decode errors where we look into the host kernel maps when it should
1047 	 * have been the guest maps.
1048 	 */
1049 	switch (el) {
1050 	case ocsd_EL1:
1051 		return machines__find_guest(&etmq->etm->session->machines,
1052 					    DEFAULT_GUEST_KERNEL_ID);
1053 	case ocsd_EL3:
1054 	case ocsd_EL2:
1055 	case ocsd_EL0:
1056 	case ocsd_EL_unknown:
1057 	default:
1058 		return &etmq->etm->session->machines.host;
1059 	}
1060 }
1061 
1062 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address,
1063 			   ocsd_ex_level el)
1064 {
1065 	struct machine *machine = cs_etm__get_machine(etmq, el);
1066 
1067 	if (address >= machine__kernel_start(machine)) {
1068 		if (machine__is_host(machine))
1069 			return PERF_RECORD_MISC_KERNEL;
1070 		else
1071 			return PERF_RECORD_MISC_GUEST_KERNEL;
1072 	} else {
1073 		if (machine__is_host(machine))
1074 			return PERF_RECORD_MISC_USER;
1075 		else {
1076 			/*
1077 			 * Can't really happen at the moment because
1078 			 * cs_etm__get_machine() will always return
1079 			 * machines.host for any non EL1 trace.
1080 			 */
1081 			return PERF_RECORD_MISC_GUEST_USER;
1082 		}
1083 	}
1084 }
1085 
1086 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
1087 			      u64 address, size_t size, u8 *buffer,
1088 			      const ocsd_mem_space_acc_t mem_space)
1089 {
1090 	u8  cpumode;
1091 	u64 offset;
1092 	int len;
1093 	struct addr_location al;
1094 	struct dso *dso;
1095 	struct cs_etm_traceid_queue *tidq;
1096 	int ret = 0;
1097 
1098 	if (!etmq)
1099 		return 0;
1100 
1101 	addr_location__init(&al);
1102 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1103 	if (!tidq)
1104 		goto out;
1105 
1106 	/*
1107 	 * We've already tracked EL along side the PID in cs_etm__set_thread()
1108 	 * so double check that it matches what OpenCSD thinks as well. It
1109 	 * doesn't distinguish between EL0 and EL1 for this mem access callback
1110 	 * so we had to do the extra tracking. Skip validation if it's any of
1111 	 * the 'any' values.
1112 	 */
1113 	if (!(mem_space == OCSD_MEM_SPACE_ANY ||
1114 	      mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) {
1115 		if (mem_space & OCSD_MEM_SPACE_EL1N) {
1116 			/* Includes both non secure EL1 and EL0 */
1117 			assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0);
1118 		} else if (mem_space & OCSD_MEM_SPACE_EL2)
1119 			assert(tidq->el == ocsd_EL2);
1120 		else if (mem_space & OCSD_MEM_SPACE_EL3)
1121 			assert(tidq->el == ocsd_EL3);
1122 	}
1123 
1124 	cpumode = cs_etm__cpu_mode(etmq, address, tidq->el);
1125 
1126 	if (!thread__find_map(tidq->thread, cpumode, address, &al))
1127 		goto out;
1128 
1129 	dso = map__dso(al.map);
1130 	if (!dso)
1131 		goto out;
1132 
1133 	if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR &&
1134 	    dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE))
1135 		goto out;
1136 
1137 	offset = map__map_ip(al.map, address);
1138 
1139 	map__load(al.map);
1140 
1141 	len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)),
1142 				    offset, buffer, size);
1143 
1144 	if (len <= 0) {
1145 		ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
1146 				 "              Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
1147 		if (!dso__auxtrace_warned(dso)) {
1148 			pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
1149 				address,
1150 				dso__long_name(dso) ? dso__long_name(dso) : "Unknown");
1151 			dso__set_auxtrace_warned(dso);
1152 		}
1153 		goto out;
1154 	}
1155 	ret = len;
1156 out:
1157 	addr_location__exit(&al);
1158 	return ret;
1159 }
1160 
1161 static struct cs_etm_queue *cs_etm__alloc_queue(void)
1162 {
1163 	struct cs_etm_queue *etmq = zalloc(sizeof(*etmq));
1164 	if (!etmq)
1165 		return NULL;
1166 
1167 	etmq->traceid_queues_list = intlist__new(NULL);
1168 	if (!etmq->traceid_queues_list)
1169 		goto out_free;
1170 
1171 	/*
1172 	 * Create an RB tree for traceID-metadata tuple.  Since the conversion
1173 	 * has to be made for each packet that gets decoded, optimizing access
1174 	 * in anything other than a sequential array is worth doing.
1175 	 */
1176 	etmq->traceid_list = etmq->own_traceid_list = intlist__new(NULL);
1177 	if (!etmq->traceid_list)
1178 		goto out_free;
1179 
1180 	return etmq;
1181 
1182 out_free:
1183 	intlist__delete(etmq->traceid_queues_list);
1184 	free(etmq);
1185 
1186 	return NULL;
1187 }
1188 
1189 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
1190 			       struct auxtrace_queue *queue,
1191 			       unsigned int queue_nr)
1192 {
1193 	struct cs_etm_queue *etmq = queue->priv;
1194 
1195 	if (etmq)
1196 		return 0;
1197 
1198 	etmq = cs_etm__alloc_queue();
1199 
1200 	if (!etmq)
1201 		return -ENOMEM;
1202 
1203 	queue->priv = etmq;
1204 	etmq->etm = etm;
1205 	etmq->queue_nr = queue_nr;
1206 	queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */
1207 	etmq->offset = 0;
1208 	etmq->sink_id = SINK_UNSET;
1209 
1210 	return 0;
1211 }
1212 
1213 static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
1214 					    struct cs_etm_queue *etmq,
1215 					    unsigned int queue_nr)
1216 {
1217 	int ret = 0;
1218 	unsigned int cs_queue_nr;
1219 	u8 trace_chan_id;
1220 	u64 cs_timestamp;
1221 
1222 	/*
1223 	 * We are under a CPU-wide trace scenario.  As such we need to know
1224 	 * when the code that generated the traces started to execute so that
1225 	 * it can be correlated with execution on other CPUs.  So we get a
1226 	 * handle on the beginning of traces and decode until we find a
1227 	 * timestamp.  The timestamp is then added to the auxtrace min heap
1228 	 * in order to know what nibble (of all the etmqs) to decode first.
1229 	 */
1230 	while (1) {
1231 		/*
1232 		 * Fetch an aux_buffer from this etmq.  Bail if no more
1233 		 * blocks or an error has been encountered.
1234 		 */
1235 		ret = cs_etm__get_data_block(etmq);
1236 		if (ret <= 0)
1237 			goto out;
1238 
1239 		/*
1240 		 * Run decoder on the trace block.  The decoder will stop when
1241 		 * encountering a CS timestamp, a full packet queue or the end of
1242 		 * trace for that block.
1243 		 */
1244 		ret = cs_etm__decode_data_block(etmq);
1245 		if (ret)
1246 			goto out;
1247 
1248 		/*
1249 		 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
1250 		 * the timestamp calculation for us.
1251 		 */
1252 		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
1253 
1254 		/* We found a timestamp, no need to continue. */
1255 		if (cs_timestamp)
1256 			break;
1257 
1258 		/*
1259 		 * We didn't find a timestamp so empty all the traceid packet
1260 		 * queues before looking for another timestamp packet, either
1261 		 * in the current data block or a new one.  Packets that were
1262 		 * just decoded are useless since no timestamp has been
1263 		 * associated with them.  As such simply discard them.
1264 		 */
1265 		cs_etm__clear_all_packet_queues(etmq);
1266 	}
1267 
1268 	/*
1269 	 * We have a timestamp.  Add it to the min heap to reflect when
1270 	 * instructions conveyed by the range packets of this traceID queue
1271 	 * started to execute.  Once the same has been done for all the traceID
1272 	 * queues of each etmq, redenring and decoding can start in
1273 	 * chronological order.
1274 	 *
1275 	 * Note that packets decoded above are still in the traceID's packet
1276 	 * queue and will be processed in cs_etm__process_timestamped_queues().
1277 	 */
1278 	cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
1279 	ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
1280 out:
1281 	return ret;
1282 }
1283 
1284 static inline
1285 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
1286 				 struct cs_etm_traceid_queue *tidq)
1287 {
1288 	struct branch_stack *bs_src = tidq->last_branch_rb;
1289 	struct branch_stack *bs_dst = tidq->last_branch;
1290 	size_t nr = 0;
1291 
1292 	/*
1293 	 * Set the number of records before early exit: ->nr is used to
1294 	 * determine how many branches to copy from ->entries.
1295 	 */
1296 	bs_dst->nr = bs_src->nr;
1297 
1298 	/*
1299 	 * Early exit when there is nothing to copy.
1300 	 */
1301 	if (!bs_src->nr)
1302 		return;
1303 
1304 	/*
1305 	 * As bs_src->entries is a circular buffer, we need to copy from it in
1306 	 * two steps.  First, copy the branches from the most recently inserted
1307 	 * branch ->last_branch_pos until the end of bs_src->entries buffer.
1308 	 */
1309 	nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
1310 	memcpy(&bs_dst->entries[0],
1311 	       &bs_src->entries[tidq->last_branch_pos],
1312 	       sizeof(struct branch_entry) * nr);
1313 
1314 	/*
1315 	 * If we wrapped around at least once, the branches from the beginning
1316 	 * of the bs_src->entries buffer and until the ->last_branch_pos element
1317 	 * are older valid branches: copy them over.  The total number of
1318 	 * branches copied over will be equal to the number of branches asked by
1319 	 * the user in last_branch_sz.
1320 	 */
1321 	if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
1322 		memcpy(&bs_dst->entries[nr],
1323 		       &bs_src->entries[0],
1324 		       sizeof(struct branch_entry) * tidq->last_branch_pos);
1325 	}
1326 }
1327 
1328 static inline
1329 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
1330 {
1331 	tidq->last_branch_pos = 0;
1332 	tidq->last_branch_rb->nr = 0;
1333 }
1334 
1335 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
1336 					 u8 trace_chan_id, u64 addr)
1337 {
1338 	u8 instrBytes[2];
1339 
1340 	cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes),
1341 			   instrBytes, 0);
1342 	/*
1343 	 * T32 instruction size is indicated by bits[15:11] of the first
1344 	 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
1345 	 * denote a 32-bit instruction.
1346 	 */
1347 	return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
1348 }
1349 
1350 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
1351 {
1352 	/*
1353 	 * Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't
1354 	 * appear in samples.
1355 	 */
1356 	if (packet->sample_type == CS_ETM_DISCONTINUITY ||
1357 	    packet->sample_type == CS_ETM_EXCEPTION)
1358 		return 0;
1359 
1360 	return packet->start_addr;
1361 }
1362 
1363 static inline
1364 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
1365 {
1366 	/* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1367 	if (packet->sample_type == CS_ETM_DISCONTINUITY)
1368 		return 0;
1369 
1370 	return packet->end_addr - packet->last_instr_size;
1371 }
1372 
1373 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
1374 				     u64 trace_chan_id,
1375 				     const struct cs_etm_packet *packet,
1376 				     u64 offset)
1377 {
1378 	if (packet->isa == CS_ETM_ISA_T32) {
1379 		u64 addr = packet->start_addr;
1380 
1381 		while (offset) {
1382 			addr += cs_etm__t32_instr_size(etmq,
1383 						       trace_chan_id, addr);
1384 			offset--;
1385 		}
1386 		return addr;
1387 	}
1388 
1389 	/* Assume a 4 byte instruction size (A32/A64) */
1390 	return packet->start_addr + offset * 4;
1391 }
1392 
1393 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
1394 					  struct cs_etm_traceid_queue *tidq)
1395 {
1396 	struct branch_stack *bs = tidq->last_branch_rb;
1397 	struct branch_entry *be;
1398 
1399 	/*
1400 	 * The branches are recorded in a circular buffer in reverse
1401 	 * chronological order: we start recording from the last element of the
1402 	 * buffer down.  After writing the first element of the stack, move the
1403 	 * insert position back to the end of the buffer.
1404 	 */
1405 	if (!tidq->last_branch_pos)
1406 		tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
1407 
1408 	tidq->last_branch_pos -= 1;
1409 
1410 	be       = &bs->entries[tidq->last_branch_pos];
1411 	be->from = cs_etm__last_executed_instr(tidq->prev_packet);
1412 	be->to	 = cs_etm__first_executed_instr(tidq->packet);
1413 	/* No support for mispredict */
1414 	be->flags.mispred = 0;
1415 	be->flags.predicted = 1;
1416 
1417 	/*
1418 	 * Increment bs->nr until reaching the number of last branches asked by
1419 	 * the user on the command line.
1420 	 */
1421 	if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
1422 		bs->nr += 1;
1423 }
1424 
1425 static int cs_etm__inject_event(union perf_event *event,
1426 			       struct perf_sample *sample, u64 type)
1427 {
1428 	event->header.size = perf_event__sample_event_size(sample, type, 0);
1429 	return perf_event__synthesize_sample(event, type, 0, sample);
1430 }
1431 
1432 
1433 static int
1434 cs_etm__get_trace(struct cs_etm_queue *etmq)
1435 {
1436 	struct auxtrace_buffer *aux_buffer = etmq->buffer;
1437 	struct auxtrace_buffer *old_buffer = aux_buffer;
1438 	struct auxtrace_queue *queue;
1439 
1440 	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
1441 
1442 	aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
1443 
1444 	/* If no more data, drop the previous auxtrace_buffer and return */
1445 	if (!aux_buffer) {
1446 		if (old_buffer)
1447 			auxtrace_buffer__drop_data(old_buffer);
1448 		etmq->buf_len = 0;
1449 		return 0;
1450 	}
1451 
1452 	etmq->buffer = aux_buffer;
1453 
1454 	/* If the aux_buffer doesn't have data associated, try to load it */
1455 	if (!aux_buffer->data) {
1456 		/* get the file desc associated with the perf data file */
1457 		int fd = perf_data__fd(etmq->etm->session->data);
1458 
1459 		aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
1460 		if (!aux_buffer->data)
1461 			return -ENOMEM;
1462 	}
1463 
1464 	/* If valid, drop the previous buffer */
1465 	if (old_buffer)
1466 		auxtrace_buffer__drop_data(old_buffer);
1467 
1468 	etmq->buf_used = 0;
1469 	etmq->buf_len = aux_buffer->size;
1470 	etmq->buf = aux_buffer->data;
1471 
1472 	return etmq->buf_len;
1473 }
1474 
1475 static void cs_etm__set_thread(struct cs_etm_queue *etmq,
1476 			       struct cs_etm_traceid_queue *tidq, pid_t tid,
1477 			       ocsd_ex_level el)
1478 {
1479 	struct machine *machine = cs_etm__get_machine(etmq, el);
1480 
1481 	if (tid != -1) {
1482 		thread__zput(tidq->thread);
1483 		tidq->thread = machine__find_thread(machine, -1, tid);
1484 	}
1485 
1486 	/* Couldn't find a known thread */
1487 	if (!tidq->thread)
1488 		tidq->thread = machine__idle_thread(machine);
1489 
1490 	tidq->el = el;
1491 }
1492 
1493 int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
1494 			    u8 trace_chan_id, ocsd_ex_level el)
1495 {
1496 	struct cs_etm_traceid_queue *tidq;
1497 
1498 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1499 	if (!tidq)
1500 		return -EINVAL;
1501 
1502 	cs_etm__set_thread(etmq, tidq, tid, el);
1503 	return 0;
1504 }
1505 
1506 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
1507 {
1508 	return !!etmq->etm->timeless_decoding;
1509 }
1510 
1511 static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
1512 			      u64 trace_chan_id,
1513 			      const struct cs_etm_packet *packet,
1514 			      struct perf_sample *sample)
1515 {
1516 	/*
1517 	 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY
1518 	 * packet, so directly bail out with 'insn_len' = 0.
1519 	 */
1520 	if (packet->sample_type == CS_ETM_DISCONTINUITY) {
1521 		sample->insn_len = 0;
1522 		return;
1523 	}
1524 
1525 	/*
1526 	 * T32 instruction size might be 32-bit or 16-bit, decide by calling
1527 	 * cs_etm__t32_instr_size().
1528 	 */
1529 	if (packet->isa == CS_ETM_ISA_T32)
1530 		sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id,
1531 							  sample->ip);
1532 	/* Otherwise, A64 and A32 instruction size are always 32-bit. */
1533 	else
1534 		sample->insn_len = 4;
1535 
1536 	cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len,
1537 			   (void *)sample->insn, 0);
1538 }
1539 
1540 u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
1541 {
1542 	struct cs_etm_auxtrace *etm = etmq->etm;
1543 
1544 	if (etm->has_virtual_ts)
1545 		return tsc_to_perf_time(cs_timestamp, &etm->tc);
1546 	else
1547 		return cs_timestamp;
1548 }
1549 
1550 static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
1551 					       struct cs_etm_traceid_queue *tidq)
1552 {
1553 	struct cs_etm_auxtrace *etm = etmq->etm;
1554 	struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
1555 
1556 	if (!etm->timeless_decoding && etm->has_virtual_ts)
1557 		return packet_queue->cs_timestamp;
1558 	else
1559 		return etm->latest_kernel_timestamp;
1560 }
1561 
1562 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
1563 					    struct cs_etm_traceid_queue *tidq,
1564 					    u64 addr, u64 period)
1565 {
1566 	int ret = 0;
1567 	struct cs_etm_auxtrace *etm = etmq->etm;
1568 	union perf_event *event = tidq->event_buf;
1569 	struct perf_sample sample;
1570 
1571 	perf_sample__init(&sample, /*all=*/true);
1572 	event->sample.header.type = PERF_RECORD_SAMPLE;
1573 	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
1574 	event->sample.header.size = sizeof(struct perf_event_header);
1575 
1576 	/* Set time field based on etm auxtrace config. */
1577 	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1578 
1579 	sample.ip = addr;
1580 	sample.pid = thread__pid(tidq->thread);
1581 	sample.tid = thread__tid(tidq->thread);
1582 	sample.id = etmq->etm->instructions_id;
1583 	sample.stream_id = etmq->etm->instructions_id;
1584 	sample.period = period;
1585 	sample.cpu = tidq->packet->cpu;
1586 	sample.flags = tidq->prev_packet->flags;
1587 	sample.cpumode = event->sample.header.misc;
1588 
1589 	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
1590 
1591 	if (etm->synth_opts.last_branch)
1592 		sample.branch_stack = tidq->last_branch;
1593 
1594 	if (etm->synth_opts.inject) {
1595 		ret = cs_etm__inject_event(event, &sample,
1596 					   etm->instructions_sample_type);
1597 		if (ret)
1598 			return ret;
1599 	}
1600 
1601 	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1602 
1603 	if (ret)
1604 		pr_err(
1605 			"CS ETM Trace: failed to deliver instruction event, error %d\n",
1606 			ret);
1607 
1608 	perf_sample__exit(&sample);
1609 	return ret;
1610 }
1611 
1612 /*
1613  * The cs etm packet encodes an instruction range between a branch target
1614  * and the next taken branch. Generate sample accordingly.
1615  */
1616 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
1617 				       struct cs_etm_traceid_queue *tidq)
1618 {
1619 	int ret = 0;
1620 	struct cs_etm_auxtrace *etm = etmq->etm;
1621 	struct perf_sample sample = {.ip = 0,};
1622 	union perf_event *event = tidq->event_buf;
1623 	struct dummy_branch_stack {
1624 		u64			nr;
1625 		u64			hw_idx;
1626 		struct branch_entry	entries;
1627 	} dummy_bs;
1628 	u64 ip;
1629 
1630 	ip = cs_etm__last_executed_instr(tidq->prev_packet);
1631 
1632 	event->sample.header.type = PERF_RECORD_SAMPLE;
1633 	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip,
1634 						     tidq->prev_packet_el);
1635 	event->sample.header.size = sizeof(struct perf_event_header);
1636 
1637 	/* Set time field based on etm auxtrace config. */
1638 	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1639 
1640 	sample.ip = ip;
1641 	sample.pid = thread__pid(tidq->prev_packet_thread);
1642 	sample.tid = thread__tid(tidq->prev_packet_thread);
1643 	sample.addr = cs_etm__first_executed_instr(tidq->packet);
1644 	sample.id = etmq->etm->branches_id;
1645 	sample.stream_id = etmq->etm->branches_id;
1646 	sample.period = 1;
1647 	sample.cpu = tidq->packet->cpu;
1648 	sample.flags = tidq->prev_packet->flags;
1649 	sample.cpumode = event->sample.header.misc;
1650 
1651 	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet,
1652 			  &sample);
1653 
1654 	/*
1655 	 * perf report cannot handle events without a branch stack
1656 	 */
1657 	if (etm->synth_opts.last_branch) {
1658 		dummy_bs = (struct dummy_branch_stack){
1659 			.nr = 1,
1660 			.hw_idx = -1ULL,
1661 			.entries = {
1662 				.from = sample.ip,
1663 				.to = sample.addr,
1664 			},
1665 		};
1666 		sample.branch_stack = (struct branch_stack *)&dummy_bs;
1667 	}
1668 
1669 	if (etm->synth_opts.inject) {
1670 		ret = cs_etm__inject_event(event, &sample,
1671 					   etm->branches_sample_type);
1672 		if (ret)
1673 			return ret;
1674 	}
1675 
1676 	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1677 
1678 	if (ret)
1679 		pr_err(
1680 		"CS ETM Trace: failed to deliver instruction event, error %d\n",
1681 		ret);
1682 
1683 	return ret;
1684 }
1685 
1686 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
1687 				struct perf_session *session)
1688 {
1689 	struct evlist *evlist = session->evlist;
1690 	struct evsel *evsel;
1691 	struct perf_event_attr attr;
1692 	bool found = false;
1693 	u64 id;
1694 	int err;
1695 
1696 	evlist__for_each_entry(evlist, evsel) {
1697 		if (evsel->core.attr.type == etm->pmu_type) {
1698 			found = true;
1699 			break;
1700 		}
1701 	}
1702 
1703 	if (!found) {
1704 		pr_debug("No selected events with CoreSight Trace data\n");
1705 		return 0;
1706 	}
1707 
1708 	memset(&attr, 0, sizeof(struct perf_event_attr));
1709 	attr.size = sizeof(struct perf_event_attr);
1710 	attr.type = PERF_TYPE_HARDWARE;
1711 	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
1712 	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1713 			    PERF_SAMPLE_PERIOD;
1714 	if (etm->timeless_decoding)
1715 		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1716 	else
1717 		attr.sample_type |= PERF_SAMPLE_TIME;
1718 
1719 	attr.exclude_user = evsel->core.attr.exclude_user;
1720 	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1721 	attr.exclude_hv = evsel->core.attr.exclude_hv;
1722 	attr.exclude_host = evsel->core.attr.exclude_host;
1723 	attr.exclude_guest = evsel->core.attr.exclude_guest;
1724 	attr.sample_id_all = evsel->core.attr.sample_id_all;
1725 	attr.read_format = evsel->core.attr.read_format;
1726 
1727 	/* create new id val to be a fixed offset from evsel id */
1728 	id = auxtrace_synth_id_range_start(evsel);
1729 
1730 	if (etm->synth_opts.branches) {
1731 		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1732 		attr.sample_period = 1;
1733 		attr.sample_type |= PERF_SAMPLE_ADDR;
1734 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1735 		if (err)
1736 			return err;
1737 		etm->branches_sample_type = attr.sample_type;
1738 		etm->branches_id = id;
1739 		id += 1;
1740 		attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
1741 	}
1742 
1743 	if (etm->synth_opts.last_branch) {
1744 		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1745 		/*
1746 		 * We don't use the hardware index, but the sample generation
1747 		 * code uses the new format branch_stack with this field,
1748 		 * so the event attributes must indicate that it's present.
1749 		 */
1750 		attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
1751 	}
1752 
1753 	if (etm->synth_opts.instructions) {
1754 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1755 		attr.sample_period = etm->synth_opts.period;
1756 		etm->instructions_sample_period = attr.sample_period;
1757 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1758 		if (err)
1759 			return err;
1760 		etm->instructions_sample_type = attr.sample_type;
1761 		etm->instructions_id = id;
1762 		id += 1;
1763 	}
1764 
1765 	return 0;
1766 }
1767 
1768 static int cs_etm__sample(struct cs_etm_queue *etmq,
1769 			  struct cs_etm_traceid_queue *tidq)
1770 {
1771 	struct cs_etm_auxtrace *etm = etmq->etm;
1772 	int ret;
1773 	u8 trace_chan_id = tidq->trace_chan_id;
1774 	u64 instrs_prev;
1775 
1776 	/* Get instructions remainder from previous packet */
1777 	instrs_prev = tidq->period_instructions;
1778 
1779 	tidq->period_instructions += tidq->packet->instr_count;
1780 
1781 	/*
1782 	 * Record a branch when the last instruction in
1783 	 * PREV_PACKET is a branch.
1784 	 */
1785 	if (etm->synth_opts.last_branch &&
1786 	    tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1787 	    tidq->prev_packet->last_instr_taken_branch)
1788 		cs_etm__update_last_branch_rb(etmq, tidq);
1789 
1790 	if (etm->synth_opts.instructions &&
1791 	    tidq->period_instructions >= etm->instructions_sample_period) {
1792 		/*
1793 		 * Emit instruction sample periodically
1794 		 * TODO: allow period to be defined in cycles and clock time
1795 		 */
1796 
1797 		/*
1798 		 * Below diagram demonstrates the instruction samples
1799 		 * generation flows:
1800 		 *
1801 		 *    Instrs     Instrs       Instrs       Instrs
1802 		 *   Sample(n)  Sample(n+1)  Sample(n+2)  Sample(n+3)
1803 		 *    |            |            |            |
1804 		 *    V            V            V            V
1805 		 *   --------------------------------------------------
1806 		 *            ^                                  ^
1807 		 *            |                                  |
1808 		 *         Period                             Period
1809 		 *    instructions(Pi)                   instructions(Pi')
1810 		 *
1811 		 *            |                                  |
1812 		 *            \---------------- -----------------/
1813 		 *                             V
1814 		 *                 tidq->packet->instr_count
1815 		 *
1816 		 * Instrs Sample(n...) are the synthesised samples occurring
1817 		 * every etm->instructions_sample_period instructions - as
1818 		 * defined on the perf command line.  Sample(n) is being the
1819 		 * last sample before the current etm packet, n+1 to n+3
1820 		 * samples are generated from the current etm packet.
1821 		 *
1822 		 * tidq->packet->instr_count represents the number of
1823 		 * instructions in the current etm packet.
1824 		 *
1825 		 * Period instructions (Pi) contains the number of
1826 		 * instructions executed after the sample point(n) from the
1827 		 * previous etm packet.  This will always be less than
1828 		 * etm->instructions_sample_period.
1829 		 *
1830 		 * When generate new samples, it combines with two parts
1831 		 * instructions, one is the tail of the old packet and another
1832 		 * is the head of the new coming packet, to generate
1833 		 * sample(n+1); sample(n+2) and sample(n+3) consume the
1834 		 * instructions with sample period.  After sample(n+3), the rest
1835 		 * instructions will be used by later packet and it is assigned
1836 		 * to tidq->period_instructions for next round calculation.
1837 		 */
1838 
1839 		/*
1840 		 * Get the initial offset into the current packet instructions;
1841 		 * entry conditions ensure that instrs_prev is less than
1842 		 * etm->instructions_sample_period.
1843 		 */
1844 		u64 offset = etm->instructions_sample_period - instrs_prev;
1845 		u64 addr;
1846 
1847 		/* Prepare last branches for instruction sample */
1848 		if (etm->synth_opts.last_branch)
1849 			cs_etm__copy_last_branch_rb(etmq, tidq);
1850 
1851 		while (tidq->period_instructions >=
1852 				etm->instructions_sample_period) {
1853 			/*
1854 			 * Calculate the address of the sampled instruction (-1
1855 			 * as sample is reported as though instruction has just
1856 			 * been executed, but PC has not advanced to next
1857 			 * instruction)
1858 			 */
1859 			addr = cs_etm__instr_addr(etmq, trace_chan_id,
1860 						  tidq->packet, offset - 1);
1861 			ret = cs_etm__synth_instruction_sample(
1862 				etmq, tidq, addr,
1863 				etm->instructions_sample_period);
1864 			if (ret)
1865 				return ret;
1866 
1867 			offset += etm->instructions_sample_period;
1868 			tidq->period_instructions -=
1869 				etm->instructions_sample_period;
1870 		}
1871 	}
1872 
1873 	if (etm->synth_opts.branches) {
1874 		bool generate_sample = false;
1875 
1876 		/* Generate sample for tracing on packet */
1877 		if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1878 			generate_sample = true;
1879 
1880 		/* Generate sample for branch taken packet */
1881 		if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1882 		    tidq->prev_packet->last_instr_taken_branch)
1883 			generate_sample = true;
1884 
1885 		if (generate_sample) {
1886 			ret = cs_etm__synth_branch_sample(etmq, tidq);
1887 			if (ret)
1888 				return ret;
1889 		}
1890 	}
1891 
1892 	cs_etm__packet_swap(etm, tidq);
1893 
1894 	return 0;
1895 }
1896 
1897 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
1898 {
1899 	/*
1900 	 * When the exception packet is inserted, whether the last instruction
1901 	 * in previous range packet is taken branch or not, we need to force
1902 	 * to set 'prev_packet->last_instr_taken_branch' to true.  This ensures
1903 	 * to generate branch sample for the instruction range before the
1904 	 * exception is trapped to kernel or before the exception returning.
1905 	 *
1906 	 * The exception packet includes the dummy address values, so don't
1907 	 * swap PACKET with PREV_PACKET.  This keeps PREV_PACKET to be useful
1908 	 * for generating instruction and branch samples.
1909 	 */
1910 	if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
1911 		tidq->prev_packet->last_instr_taken_branch = true;
1912 
1913 	return 0;
1914 }
1915 
1916 static int cs_etm__flush(struct cs_etm_queue *etmq,
1917 			 struct cs_etm_traceid_queue *tidq)
1918 {
1919 	int err = 0;
1920 	struct cs_etm_auxtrace *etm = etmq->etm;
1921 
1922 	/* Handle start tracing packet */
1923 	if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
1924 		goto swap_packet;
1925 
1926 	if (etmq->etm->synth_opts.last_branch &&
1927 	    etmq->etm->synth_opts.instructions &&
1928 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1929 		u64 addr;
1930 
1931 		/* Prepare last branches for instruction sample */
1932 		cs_etm__copy_last_branch_rb(etmq, tidq);
1933 
1934 		/*
1935 		 * Generate a last branch event for the branches left in the
1936 		 * circular buffer at the end of the trace.
1937 		 *
1938 		 * Use the address of the end of the last reported execution
1939 		 * range
1940 		 */
1941 		addr = cs_etm__last_executed_instr(tidq->prev_packet);
1942 
1943 		err = cs_etm__synth_instruction_sample(
1944 			etmq, tidq, addr,
1945 			tidq->period_instructions);
1946 		if (err)
1947 			return err;
1948 
1949 		tidq->period_instructions = 0;
1950 
1951 	}
1952 
1953 	if (etm->synth_opts.branches &&
1954 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1955 		err = cs_etm__synth_branch_sample(etmq, tidq);
1956 		if (err)
1957 			return err;
1958 	}
1959 
1960 swap_packet:
1961 	cs_etm__packet_swap(etm, tidq);
1962 
1963 	/* Reset last branches after flush the trace */
1964 	if (etm->synth_opts.last_branch)
1965 		cs_etm__reset_last_branch_rb(tidq);
1966 
1967 	return err;
1968 }
1969 
1970 static int cs_etm__end_block(struct cs_etm_queue *etmq,
1971 			     struct cs_etm_traceid_queue *tidq)
1972 {
1973 	int err;
1974 
1975 	/*
1976 	 * It has no new packet coming and 'etmq->packet' contains the stale
1977 	 * packet which was set at the previous time with packets swapping;
1978 	 * so skip to generate branch sample to avoid stale packet.
1979 	 *
1980 	 * For this case only flush branch stack and generate a last branch
1981 	 * event for the branches left in the circular buffer at the end of
1982 	 * the trace.
1983 	 */
1984 	if (etmq->etm->synth_opts.last_branch &&
1985 	    etmq->etm->synth_opts.instructions &&
1986 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1987 		u64 addr;
1988 
1989 		/* Prepare last branches for instruction sample */
1990 		cs_etm__copy_last_branch_rb(etmq, tidq);
1991 
1992 		/*
1993 		 * Use the address of the end of the last reported execution
1994 		 * range.
1995 		 */
1996 		addr = cs_etm__last_executed_instr(tidq->prev_packet);
1997 
1998 		err = cs_etm__synth_instruction_sample(
1999 			etmq, tidq, addr,
2000 			tidq->period_instructions);
2001 		if (err)
2002 			return err;
2003 
2004 		tidq->period_instructions = 0;
2005 	}
2006 
2007 	return 0;
2008 }
2009 /*
2010  * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
2011  *			   if need be.
2012  * Returns:	< 0	if error
2013  *		= 0	if no more auxtrace_buffer to read
2014  *		> 0	if the current buffer isn't empty yet
2015  */
2016 static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
2017 {
2018 	int ret;
2019 
2020 	if (!etmq->buf_len) {
2021 		ret = cs_etm__get_trace(etmq);
2022 		if (ret <= 0)
2023 			return ret;
2024 		/*
2025 		 * We cannot assume consecutive blocks in the data file
2026 		 * are contiguous, reset the decoder to force re-sync.
2027 		 */
2028 		ret = cs_etm_decoder__reset(etmq->decoder);
2029 		if (ret)
2030 			return ret;
2031 	}
2032 
2033 	return etmq->buf_len;
2034 }
2035 
2036 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
2037 				 struct cs_etm_packet *packet,
2038 				 u64 end_addr)
2039 {
2040 	/* Initialise to keep compiler happy */
2041 	u16 instr16 = 0;
2042 	u32 instr32 = 0;
2043 	u64 addr;
2044 
2045 	switch (packet->isa) {
2046 	case CS_ETM_ISA_T32:
2047 		/*
2048 		 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
2049 		 *
2050 		 *  b'15         b'8
2051 		 * +-----------------+--------+
2052 		 * | 1 1 0 1 1 1 1 1 |  imm8  |
2053 		 * +-----------------+--------+
2054 		 *
2055 		 * According to the specification, it only defines SVC for T32
2056 		 * with 16 bits instruction and has no definition for 32bits;
2057 		 * so below only read 2 bytes as instruction size for T32.
2058 		 */
2059 		addr = end_addr - 2;
2060 		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16),
2061 				   (u8 *)&instr16, 0);
2062 		if ((instr16 & 0xFF00) == 0xDF00)
2063 			return true;
2064 
2065 		break;
2066 	case CS_ETM_ISA_A32:
2067 		/*
2068 		 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
2069 		 *
2070 		 *  b'31 b'28 b'27 b'24
2071 		 * +---------+---------+-------------------------+
2072 		 * |  !1111  | 1 1 1 1 |        imm24            |
2073 		 * +---------+---------+-------------------------+
2074 		 */
2075 		addr = end_addr - 4;
2076 		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2077 				   (u8 *)&instr32, 0);
2078 		if ((instr32 & 0x0F000000) == 0x0F000000 &&
2079 		    (instr32 & 0xF0000000) != 0xF0000000)
2080 			return true;
2081 
2082 		break;
2083 	case CS_ETM_ISA_A64:
2084 		/*
2085 		 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
2086 		 *
2087 		 *  b'31               b'21           b'4     b'0
2088 		 * +-----------------------+---------+-----------+
2089 		 * | 1 1 0 1 0 1 0 0 0 0 0 |  imm16  | 0 0 0 0 1 |
2090 		 * +-----------------------+---------+-----------+
2091 		 */
2092 		addr = end_addr - 4;
2093 		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2094 				   (u8 *)&instr32, 0);
2095 		if ((instr32 & 0xFFE0001F) == 0xd4000001)
2096 			return true;
2097 
2098 		break;
2099 	case CS_ETM_ISA_UNKNOWN:
2100 	default:
2101 		break;
2102 	}
2103 
2104 	return false;
2105 }
2106 
2107 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
2108 			       struct cs_etm_traceid_queue *tidq, u64 magic)
2109 {
2110 	u8 trace_chan_id = tidq->trace_chan_id;
2111 	struct cs_etm_packet *packet = tidq->packet;
2112 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2113 
2114 	if (magic == __perf_cs_etmv3_magic)
2115 		if (packet->exception_number == CS_ETMV3_EXC_SVC)
2116 			return true;
2117 
2118 	/*
2119 	 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
2120 	 * HVC cases; need to check if it's SVC instruction based on
2121 	 * packet address.
2122 	 */
2123 	if (magic == __perf_cs_etmv4_magic) {
2124 		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2125 		    cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2126 					 prev_packet->end_addr))
2127 			return true;
2128 	}
2129 
2130 	return false;
2131 }
2132 
2133 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
2134 				       u64 magic)
2135 {
2136 	struct cs_etm_packet *packet = tidq->packet;
2137 
2138 	if (magic == __perf_cs_etmv3_magic)
2139 		if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
2140 		    packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
2141 		    packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
2142 		    packet->exception_number == CS_ETMV3_EXC_IRQ ||
2143 		    packet->exception_number == CS_ETMV3_EXC_FIQ)
2144 			return true;
2145 
2146 	if (magic == __perf_cs_etmv4_magic)
2147 		if (packet->exception_number == CS_ETMV4_EXC_RESET ||
2148 		    packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
2149 		    packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
2150 		    packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
2151 		    packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
2152 		    packet->exception_number == CS_ETMV4_EXC_IRQ ||
2153 		    packet->exception_number == CS_ETMV4_EXC_FIQ)
2154 			return true;
2155 
2156 	return false;
2157 }
2158 
2159 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
2160 				      struct cs_etm_traceid_queue *tidq,
2161 				      u64 magic)
2162 {
2163 	u8 trace_chan_id = tidq->trace_chan_id;
2164 	struct cs_etm_packet *packet = tidq->packet;
2165 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2166 
2167 	if (magic == __perf_cs_etmv3_magic)
2168 		if (packet->exception_number == CS_ETMV3_EXC_SMC ||
2169 		    packet->exception_number == CS_ETMV3_EXC_HYP ||
2170 		    packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
2171 		    packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
2172 		    packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
2173 		    packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
2174 		    packet->exception_number == CS_ETMV3_EXC_GENERIC)
2175 			return true;
2176 
2177 	if (magic == __perf_cs_etmv4_magic) {
2178 		if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
2179 		    packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
2180 		    packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
2181 		    packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
2182 			return true;
2183 
2184 		/*
2185 		 * For CS_ETMV4_EXC_CALL, except SVC other instructions
2186 		 * (SMC, HVC) are taken as sync exceptions.
2187 		 */
2188 		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2189 		    !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2190 					  prev_packet->end_addr))
2191 			return true;
2192 
2193 		/*
2194 		 * ETMv4 has 5 bits for exception number; if the numbers
2195 		 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
2196 		 * they are implementation defined exceptions.
2197 		 *
2198 		 * For this case, simply take it as sync exception.
2199 		 */
2200 		if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
2201 		    packet->exception_number <= CS_ETMV4_EXC_END)
2202 			return true;
2203 	}
2204 
2205 	return false;
2206 }
2207 
2208 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
2209 				    struct cs_etm_traceid_queue *tidq)
2210 {
2211 	struct cs_etm_packet *packet = tidq->packet;
2212 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2213 	u8 trace_chan_id = tidq->trace_chan_id;
2214 	u64 magic;
2215 	int ret;
2216 
2217 	switch (packet->sample_type) {
2218 	case CS_ETM_RANGE:
2219 		/*
2220 		 * Immediate branch instruction without neither link nor
2221 		 * return flag, it's normal branch instruction within
2222 		 * the function.
2223 		 */
2224 		if (packet->last_instr_type == OCSD_INSTR_BR &&
2225 		    packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
2226 			packet->flags = PERF_IP_FLAG_BRANCH;
2227 
2228 			if (packet->last_instr_cond)
2229 				packet->flags |= PERF_IP_FLAG_CONDITIONAL;
2230 		}
2231 
2232 		/*
2233 		 * Immediate branch instruction with link (e.g. BL), this is
2234 		 * branch instruction for function call.
2235 		 */
2236 		if (packet->last_instr_type == OCSD_INSTR_BR &&
2237 		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2238 			packet->flags = PERF_IP_FLAG_BRANCH |
2239 					PERF_IP_FLAG_CALL;
2240 
2241 		/*
2242 		 * Indirect branch instruction with link (e.g. BLR), this is
2243 		 * branch instruction for function call.
2244 		 */
2245 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2246 		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2247 			packet->flags = PERF_IP_FLAG_BRANCH |
2248 					PERF_IP_FLAG_CALL;
2249 
2250 		/*
2251 		 * Indirect branch instruction with subtype of
2252 		 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
2253 		 * function return for A32/T32.
2254 		 */
2255 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2256 		    packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
2257 			packet->flags = PERF_IP_FLAG_BRANCH |
2258 					PERF_IP_FLAG_RETURN;
2259 
2260 		/*
2261 		 * Indirect branch instruction without link (e.g. BR), usually
2262 		 * this is used for function return, especially for functions
2263 		 * within dynamic link lib.
2264 		 */
2265 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2266 		    packet->last_instr_subtype == OCSD_S_INSTR_NONE)
2267 			packet->flags = PERF_IP_FLAG_BRANCH |
2268 					PERF_IP_FLAG_RETURN;
2269 
2270 		/* Return instruction for function return. */
2271 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2272 		    packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
2273 			packet->flags = PERF_IP_FLAG_BRANCH |
2274 					PERF_IP_FLAG_RETURN;
2275 
2276 		/*
2277 		 * Decoder might insert a discontinuity in the middle of
2278 		 * instruction packets, fixup prev_packet with flag
2279 		 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
2280 		 */
2281 		if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
2282 			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2283 					      PERF_IP_FLAG_TRACE_BEGIN;
2284 
2285 		/*
2286 		 * If the previous packet is an exception return packet
2287 		 * and the return address just follows SVC instruction,
2288 		 * it needs to calibrate the previous packet sample flags
2289 		 * as PERF_IP_FLAG_SYSCALLRET.
2290 		 */
2291 		if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
2292 					   PERF_IP_FLAG_RETURN |
2293 					   PERF_IP_FLAG_INTERRUPT) &&
2294 		    cs_etm__is_svc_instr(etmq, trace_chan_id,
2295 					 packet, packet->start_addr))
2296 			prev_packet->flags = PERF_IP_FLAG_BRANCH |
2297 					     PERF_IP_FLAG_RETURN |
2298 					     PERF_IP_FLAG_SYSCALLRET;
2299 		break;
2300 	case CS_ETM_DISCONTINUITY:
2301 		/*
2302 		 * The trace is discontinuous, if the previous packet is
2303 		 * instruction packet, set flag PERF_IP_FLAG_TRACE_END
2304 		 * for previous packet.
2305 		 */
2306 		if (prev_packet->sample_type == CS_ETM_RANGE)
2307 			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2308 					      PERF_IP_FLAG_TRACE_END;
2309 		break;
2310 	case CS_ETM_EXCEPTION:
2311 		ret = cs_etm__get_magic(etmq, packet->trace_chan_id, &magic);
2312 		if (ret)
2313 			return ret;
2314 
2315 		/* The exception is for system call. */
2316 		if (cs_etm__is_syscall(etmq, tidq, magic))
2317 			packet->flags = PERF_IP_FLAG_BRANCH |
2318 					PERF_IP_FLAG_CALL |
2319 					PERF_IP_FLAG_SYSCALLRET;
2320 		/*
2321 		 * The exceptions are triggered by external signals from bus,
2322 		 * interrupt controller, debug module, PE reset or halt.
2323 		 */
2324 		else if (cs_etm__is_async_exception(tidq, magic))
2325 			packet->flags = PERF_IP_FLAG_BRANCH |
2326 					PERF_IP_FLAG_CALL |
2327 					PERF_IP_FLAG_ASYNC |
2328 					PERF_IP_FLAG_INTERRUPT;
2329 		/*
2330 		 * Otherwise, exception is caused by trap, instruction &
2331 		 * data fault, or alignment errors.
2332 		 */
2333 		else if (cs_etm__is_sync_exception(etmq, tidq, magic))
2334 			packet->flags = PERF_IP_FLAG_BRANCH |
2335 					PERF_IP_FLAG_CALL |
2336 					PERF_IP_FLAG_INTERRUPT;
2337 
2338 		/*
2339 		 * When the exception packet is inserted, since exception
2340 		 * packet is not used standalone for generating samples
2341 		 * and it's affiliation to the previous instruction range
2342 		 * packet; so set previous range packet flags to tell perf
2343 		 * it is an exception taken branch.
2344 		 */
2345 		if (prev_packet->sample_type == CS_ETM_RANGE)
2346 			prev_packet->flags = packet->flags;
2347 		break;
2348 	case CS_ETM_EXCEPTION_RET:
2349 		/*
2350 		 * When the exception return packet is inserted, since
2351 		 * exception return packet is not used standalone for
2352 		 * generating samples and it's affiliation to the previous
2353 		 * instruction range packet; so set previous range packet
2354 		 * flags to tell perf it is an exception return branch.
2355 		 *
2356 		 * The exception return can be for either system call or
2357 		 * other exception types; unfortunately the packet doesn't
2358 		 * contain exception type related info so we cannot decide
2359 		 * the exception type purely based on exception return packet.
2360 		 * If we record the exception number from exception packet and
2361 		 * reuse it for exception return packet, this is not reliable
2362 		 * due the trace can be discontinuity or the interrupt can
2363 		 * be nested, thus the recorded exception number cannot be
2364 		 * used for exception return packet for these two cases.
2365 		 *
2366 		 * For exception return packet, we only need to distinguish the
2367 		 * packet is for system call or for other types.  Thus the
2368 		 * decision can be deferred when receive the next packet which
2369 		 * contains the return address, based on the return address we
2370 		 * can read out the previous instruction and check if it's a
2371 		 * system call instruction and then calibrate the sample flag
2372 		 * as needed.
2373 		 */
2374 		if (prev_packet->sample_type == CS_ETM_RANGE)
2375 			prev_packet->flags = PERF_IP_FLAG_BRANCH |
2376 					     PERF_IP_FLAG_RETURN |
2377 					     PERF_IP_FLAG_INTERRUPT;
2378 		break;
2379 	case CS_ETM_EMPTY:
2380 	default:
2381 		break;
2382 	}
2383 
2384 	return 0;
2385 }
2386 
2387 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
2388 {
2389 	int ret = 0;
2390 	size_t processed = 0;
2391 
2392 	/*
2393 	 * Packets are decoded and added to the decoder's packet queue
2394 	 * until the decoder packet processing callback has requested that
2395 	 * processing stops or there is nothing left in the buffer.  Normal
2396 	 * operations that stop processing are a timestamp packet or a full
2397 	 * decoder buffer queue.
2398 	 */
2399 	ret = cs_etm_decoder__process_data_block(etmq->decoder,
2400 						 etmq->offset,
2401 						 &etmq->buf[etmq->buf_used],
2402 						 etmq->buf_len,
2403 						 &processed);
2404 	if (ret)
2405 		goto out;
2406 
2407 	etmq->offset += processed;
2408 	etmq->buf_used += processed;
2409 	etmq->buf_len -= processed;
2410 
2411 out:
2412 	return ret;
2413 }
2414 
2415 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
2416 					 struct cs_etm_traceid_queue *tidq)
2417 {
2418 	int ret;
2419 	struct cs_etm_packet_queue *packet_queue;
2420 
2421 	packet_queue = &tidq->packet_queue;
2422 
2423 	/* Process each packet in this chunk */
2424 	while (1) {
2425 		ret = cs_etm_decoder__get_packet(packet_queue,
2426 						 tidq->packet);
2427 		if (ret <= 0)
2428 			/*
2429 			 * Stop processing this chunk on
2430 			 * end of data or error
2431 			 */
2432 			break;
2433 
2434 		/*
2435 		 * Since packet addresses are swapped in packet
2436 		 * handling within below switch() statements,
2437 		 * thus setting sample flags must be called
2438 		 * prior to switch() statement to use address
2439 		 * information before packets swapping.
2440 		 */
2441 		ret = cs_etm__set_sample_flags(etmq, tidq);
2442 		if (ret < 0)
2443 			break;
2444 
2445 		switch (tidq->packet->sample_type) {
2446 		case CS_ETM_RANGE:
2447 			/*
2448 			 * If the packet contains an instruction
2449 			 * range, generate instruction sequence
2450 			 * events.
2451 			 */
2452 			cs_etm__sample(etmq, tidq);
2453 			break;
2454 		case CS_ETM_EXCEPTION:
2455 		case CS_ETM_EXCEPTION_RET:
2456 			/*
2457 			 * If the exception packet is coming,
2458 			 * make sure the previous instruction
2459 			 * range packet to be handled properly.
2460 			 */
2461 			cs_etm__exception(tidq);
2462 			break;
2463 		case CS_ETM_DISCONTINUITY:
2464 			/*
2465 			 * Discontinuity in trace, flush
2466 			 * previous branch stack
2467 			 */
2468 			cs_etm__flush(etmq, tidq);
2469 			break;
2470 		case CS_ETM_EMPTY:
2471 			/*
2472 			 * Should not receive empty packet,
2473 			 * report error.
2474 			 */
2475 			pr_err("CS ETM Trace: empty packet\n");
2476 			return -EINVAL;
2477 		default:
2478 			break;
2479 		}
2480 	}
2481 
2482 	return ret;
2483 }
2484 
2485 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
2486 {
2487 	int idx;
2488 	struct int_node *inode;
2489 	struct cs_etm_traceid_queue *tidq;
2490 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
2491 
2492 	intlist__for_each_entry(inode, traceid_queues_list) {
2493 		idx = (int)(intptr_t)inode->priv;
2494 		tidq = etmq->traceid_queues[idx];
2495 
2496 		/* Ignore return value */
2497 		cs_etm__process_traceid_queue(etmq, tidq);
2498 	}
2499 }
2500 
2501 static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq)
2502 {
2503 	int err = 0;
2504 	struct cs_etm_traceid_queue *tidq;
2505 
2506 	tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
2507 	if (!tidq)
2508 		return -EINVAL;
2509 
2510 	/* Go through each buffer in the queue and decode them one by one */
2511 	while (1) {
2512 		err = cs_etm__get_data_block(etmq);
2513 		if (err <= 0)
2514 			return err;
2515 
2516 		/* Run trace decoder until buffer consumed or end of trace */
2517 		do {
2518 			err = cs_etm__decode_data_block(etmq);
2519 			if (err)
2520 				return err;
2521 
2522 			/*
2523 			 * Process each packet in this chunk, nothing to do if
2524 			 * an error occurs other than hoping the next one will
2525 			 * be better.
2526 			 */
2527 			err = cs_etm__process_traceid_queue(etmq, tidq);
2528 
2529 		} while (etmq->buf_len);
2530 
2531 		if (err == 0)
2532 			/* Flush any remaining branch stack entries */
2533 			err = cs_etm__end_block(etmq, tidq);
2534 	}
2535 
2536 	return err;
2537 }
2538 
2539 static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq)
2540 {
2541 	int idx, err = 0;
2542 	struct cs_etm_traceid_queue *tidq;
2543 	struct int_node *inode;
2544 
2545 	/* Go through each buffer in the queue and decode them one by one */
2546 	while (1) {
2547 		err = cs_etm__get_data_block(etmq);
2548 		if (err <= 0)
2549 			return err;
2550 
2551 		/* Run trace decoder until buffer consumed or end of trace */
2552 		do {
2553 			err = cs_etm__decode_data_block(etmq);
2554 			if (err)
2555 				return err;
2556 
2557 			/*
2558 			 * cs_etm__run_per_thread_timeless_decoder() runs on a
2559 			 * single traceID queue because each TID has a separate
2560 			 * buffer. But here in per-cpu mode we need to iterate
2561 			 * over each channel instead.
2562 			 */
2563 			intlist__for_each_entry(inode,
2564 						etmq->traceid_queues_list) {
2565 				idx = (int)(intptr_t)inode->priv;
2566 				tidq = etmq->traceid_queues[idx];
2567 				cs_etm__process_traceid_queue(etmq, tidq);
2568 			}
2569 		} while (etmq->buf_len);
2570 
2571 		intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2572 			idx = (int)(intptr_t)inode->priv;
2573 			tidq = etmq->traceid_queues[idx];
2574 			/* Flush any remaining branch stack entries */
2575 			err = cs_etm__end_block(etmq, tidq);
2576 			if (err)
2577 				return err;
2578 		}
2579 	}
2580 
2581 	return err;
2582 }
2583 
2584 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
2585 					   pid_t tid)
2586 {
2587 	unsigned int i;
2588 	struct auxtrace_queues *queues = &etm->queues;
2589 
2590 	for (i = 0; i < queues->nr_queues; i++) {
2591 		struct auxtrace_queue *queue = &etm->queues.queue_array[i];
2592 		struct cs_etm_queue *etmq = queue->priv;
2593 		struct cs_etm_traceid_queue *tidq;
2594 
2595 		if (!etmq)
2596 			continue;
2597 
2598 		if (etm->per_thread_decoding) {
2599 			tidq = cs_etm__etmq_get_traceid_queue(
2600 				etmq, CS_ETM_PER_THREAD_TRACEID);
2601 
2602 			if (!tidq)
2603 				continue;
2604 
2605 			if (tid == -1 || thread__tid(tidq->thread) == tid)
2606 				cs_etm__run_per_thread_timeless_decoder(etmq);
2607 		} else
2608 			cs_etm__run_per_cpu_timeless_decoder(etmq);
2609 	}
2610 
2611 	return 0;
2612 }
2613 
2614 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm)
2615 {
2616 	int ret = 0;
2617 	unsigned int cs_queue_nr, queue_nr, i;
2618 	u8 trace_chan_id;
2619 	u64 cs_timestamp;
2620 	struct auxtrace_queue *queue;
2621 	struct cs_etm_queue *etmq;
2622 	struct cs_etm_traceid_queue *tidq;
2623 
2624 	/*
2625 	 * Pre-populate the heap with one entry from each queue so that we can
2626 	 * start processing in time order across all queues.
2627 	 */
2628 	for (i = 0; i < etm->queues.nr_queues; i++) {
2629 		etmq = etm->queues.queue_array[i].priv;
2630 		if (!etmq)
2631 			continue;
2632 
2633 		ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
2634 		if (ret)
2635 			return ret;
2636 	}
2637 
2638 	while (1) {
2639 		if (!etm->heap.heap_cnt)
2640 			break;
2641 
2642 		/* Take the entry at the top of the min heap */
2643 		cs_queue_nr = etm->heap.heap_array[0].queue_nr;
2644 		queue_nr = TO_QUEUE_NR(cs_queue_nr);
2645 		trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
2646 		queue = &etm->queues.queue_array[queue_nr];
2647 		etmq = queue->priv;
2648 
2649 		/*
2650 		 * Remove the top entry from the heap since we are about
2651 		 * to process it.
2652 		 */
2653 		auxtrace_heap__pop(&etm->heap);
2654 
2655 		tidq  = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
2656 		if (!tidq) {
2657 			/*
2658 			 * No traceID queue has been allocated for this traceID,
2659 			 * which means something somewhere went very wrong.  No
2660 			 * other choice than simply exit.
2661 			 */
2662 			ret = -EINVAL;
2663 			goto out;
2664 		}
2665 
2666 		/*
2667 		 * Packets associated with this timestamp are already in
2668 		 * the etmq's traceID queue, so process them.
2669 		 */
2670 		ret = cs_etm__process_traceid_queue(etmq, tidq);
2671 		if (ret < 0)
2672 			goto out;
2673 
2674 		/*
2675 		 * Packets for this timestamp have been processed, time to
2676 		 * move on to the next timestamp, fetching a new auxtrace_buffer
2677 		 * if need be.
2678 		 */
2679 refetch:
2680 		ret = cs_etm__get_data_block(etmq);
2681 		if (ret < 0)
2682 			goto out;
2683 
2684 		/*
2685 		 * No more auxtrace_buffers to process in this etmq, simply
2686 		 * move on to another entry in the auxtrace_heap.
2687 		 */
2688 		if (!ret)
2689 			continue;
2690 
2691 		ret = cs_etm__decode_data_block(etmq);
2692 		if (ret)
2693 			goto out;
2694 
2695 		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
2696 
2697 		if (!cs_timestamp) {
2698 			/*
2699 			 * Function cs_etm__decode_data_block() returns when
2700 			 * there is no more traces to decode in the current
2701 			 * auxtrace_buffer OR when a timestamp has been
2702 			 * encountered on any of the traceID queues.  Since we
2703 			 * did not get a timestamp, there is no more traces to
2704 			 * process in this auxtrace_buffer.  As such empty and
2705 			 * flush all traceID queues.
2706 			 */
2707 			cs_etm__clear_all_traceid_queues(etmq);
2708 
2709 			/* Fetch another auxtrace_buffer for this etmq */
2710 			goto refetch;
2711 		}
2712 
2713 		/*
2714 		 * Add to the min heap the timestamp for packets that have
2715 		 * just been decoded.  They will be processed and synthesized
2716 		 * during the next call to cs_etm__process_traceid_queue() for
2717 		 * this queue/traceID.
2718 		 */
2719 		cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
2720 		ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
2721 	}
2722 
2723 	for (i = 0; i < etm->queues.nr_queues; i++) {
2724 		struct int_node *inode;
2725 
2726 		etmq = etm->queues.queue_array[i].priv;
2727 		if (!etmq)
2728 			continue;
2729 
2730 		intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2731 			int idx = (int)(intptr_t)inode->priv;
2732 
2733 			/* Flush any remaining branch stack entries */
2734 			tidq = etmq->traceid_queues[idx];
2735 			ret = cs_etm__end_block(etmq, tidq);
2736 			if (ret)
2737 				return ret;
2738 		}
2739 	}
2740 out:
2741 	return ret;
2742 }
2743 
2744 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
2745 					union perf_event *event)
2746 {
2747 	struct thread *th;
2748 
2749 	if (etm->timeless_decoding)
2750 		return 0;
2751 
2752 	/*
2753 	 * Add the tid/pid to the log so that we can get a match when we get a
2754 	 * contextID from the decoder. Only track for the host: only kernel
2755 	 * trace is supported for guests which wouldn't need pids so this should
2756 	 * be fine.
2757 	 */
2758 	th = machine__findnew_thread(&etm->session->machines.host,
2759 				     event->itrace_start.pid,
2760 				     event->itrace_start.tid);
2761 	if (!th)
2762 		return -ENOMEM;
2763 
2764 	thread__put(th);
2765 
2766 	return 0;
2767 }
2768 
2769 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
2770 					   union perf_event *event)
2771 {
2772 	struct thread *th;
2773 	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2774 
2775 	/*
2776 	 * Context switch in per-thread mode are irrelevant since perf
2777 	 * will start/stop tracing as the process is scheduled.
2778 	 */
2779 	if (etm->timeless_decoding)
2780 		return 0;
2781 
2782 	/*
2783 	 * SWITCH_IN events carry the next process to be switched out while
2784 	 * SWITCH_OUT events carry the process to be switched in.  As such
2785 	 * we don't care about IN events.
2786 	 */
2787 	if (!out)
2788 		return 0;
2789 
2790 	/*
2791 	 * Add the tid/pid to the log so that we can get a match when we get a
2792 	 * contextID from the decoder. Only track for the host: only kernel
2793 	 * trace is supported for guests which wouldn't need pids so this should
2794 	 * be fine.
2795 	 */
2796 	th = machine__findnew_thread(&etm->session->machines.host,
2797 				     event->context_switch.next_prev_pid,
2798 				     event->context_switch.next_prev_tid);
2799 	if (!th)
2800 		return -ENOMEM;
2801 
2802 	thread__put(th);
2803 
2804 	return 0;
2805 }
2806 
2807 static int cs_etm__process_event(struct perf_session *session,
2808 				 union perf_event *event,
2809 				 struct perf_sample *sample,
2810 				 const struct perf_tool *tool)
2811 {
2812 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2813 						   struct cs_etm_auxtrace,
2814 						   auxtrace);
2815 
2816 	if (dump_trace)
2817 		return 0;
2818 
2819 	if (!tool->ordered_events) {
2820 		pr_err("CoreSight ETM Trace requires ordered events\n");
2821 		return -EINVAL;
2822 	}
2823 
2824 	switch (event->header.type) {
2825 	case PERF_RECORD_EXIT:
2826 		/*
2827 		 * Don't need to wait for cs_etm__flush_events() in per-thread mode to
2828 		 * start the decode because we know there will be no more trace from
2829 		 * this thread. All this does is emit samples earlier than waiting for
2830 		 * the flush in other modes, but with timestamps it makes sense to wait
2831 		 * for flush so that events from different threads are interleaved
2832 		 * properly.
2833 		 */
2834 		if (etm->per_thread_decoding && etm->timeless_decoding)
2835 			return cs_etm__process_timeless_queues(etm,
2836 							       event->fork.tid);
2837 		break;
2838 
2839 	case PERF_RECORD_ITRACE_START:
2840 		return cs_etm__process_itrace_start(etm, event);
2841 
2842 	case PERF_RECORD_SWITCH_CPU_WIDE:
2843 		return cs_etm__process_switch_cpu_wide(etm, event);
2844 
2845 	case PERF_RECORD_AUX:
2846 		/*
2847 		 * Record the latest kernel timestamp available in the header
2848 		 * for samples so that synthesised samples occur from this point
2849 		 * onwards.
2850 		 */
2851 		if (sample->time && (sample->time != (u64)-1))
2852 			etm->latest_kernel_timestamp = sample->time;
2853 		break;
2854 
2855 	default:
2856 		break;
2857 	}
2858 
2859 	return 0;
2860 }
2861 
2862 static void dump_queued_data(struct cs_etm_auxtrace *etm,
2863 			     struct perf_record_auxtrace *event)
2864 {
2865 	struct auxtrace_buffer *buf;
2866 	unsigned int i;
2867 	/*
2868 	 * Find all buffers with same reference in the queues and dump them.
2869 	 * This is because the queues can contain multiple entries of the same
2870 	 * buffer that were split on aux records.
2871 	 */
2872 	for (i = 0; i < etm->queues.nr_queues; ++i)
2873 		list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
2874 			if (buf->reference == event->reference)
2875 				cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
2876 }
2877 
2878 static int cs_etm__process_auxtrace_event(struct perf_session *session,
2879 					  union perf_event *event,
2880 					  const struct perf_tool *tool __maybe_unused)
2881 {
2882 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2883 						   struct cs_etm_auxtrace,
2884 						   auxtrace);
2885 	if (!etm->data_queued) {
2886 		struct auxtrace_buffer *buffer;
2887 		off_t  data_offset;
2888 		int fd = perf_data__fd(session->data);
2889 		bool is_pipe = perf_data__is_pipe(session->data);
2890 		int err;
2891 		int idx = event->auxtrace.idx;
2892 
2893 		if (is_pipe)
2894 			data_offset = 0;
2895 		else {
2896 			data_offset = lseek(fd, 0, SEEK_CUR);
2897 			if (data_offset == -1)
2898 				return -errno;
2899 		}
2900 
2901 		err = auxtrace_queues__add_event(&etm->queues, session,
2902 						 event, data_offset, &buffer);
2903 		if (err)
2904 			return err;
2905 
2906 		if (dump_trace)
2907 			if (auxtrace_buffer__get_data(buffer, fd)) {
2908 				cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
2909 				auxtrace_buffer__put_data(buffer);
2910 			}
2911 	} else if (dump_trace)
2912 		dump_queued_data(etm, &event->auxtrace);
2913 
2914 	return 0;
2915 }
2916 
2917 static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm)
2918 {
2919 	struct evsel *evsel;
2920 	struct evlist *evlist = etm->session->evlist;
2921 
2922 	/* Override timeless mode with user input from --itrace=Z */
2923 	if (etm->synth_opts.timeless_decoding) {
2924 		etm->timeless_decoding = true;
2925 		return 0;
2926 	}
2927 
2928 	/*
2929 	 * Find the cs_etm evsel and look at what its timestamp setting was
2930 	 */
2931 	evlist__for_each_entry(evlist, evsel)
2932 		if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) {
2933 			etm->timeless_decoding =
2934 				!(evsel->core.attr.config & BIT(ETM_OPT_TS));
2935 			return 0;
2936 		}
2937 
2938 	pr_err("CS ETM: Couldn't find ETM evsel\n");
2939 	return -EINVAL;
2940 }
2941 
2942 /*
2943  * Read a single cpu parameter block from the auxtrace_info priv block.
2944  *
2945  * For version 1 there is a per cpu nr_params entry. If we are handling
2946  * version 1 file, then there may be less, the same, or more params
2947  * indicated by this value than the compile time number we understand.
2948  *
2949  * For a version 0 info block, there are a fixed number, and we need to
2950  * fill out the nr_param value in the metadata we create.
2951  */
2952 static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
2953 				    int out_blk_size, int nr_params_v0)
2954 {
2955 	u64 *metadata = NULL;
2956 	int hdr_version;
2957 	int nr_in_params, nr_out_params, nr_cmn_params;
2958 	int i, k;
2959 
2960 	metadata = zalloc(sizeof(*metadata) * out_blk_size);
2961 	if (!metadata)
2962 		return NULL;
2963 
2964 	/* read block current index & version */
2965 	i = *buff_in_offset;
2966 	hdr_version = buff_in[CS_HEADER_VERSION];
2967 
2968 	if (!hdr_version) {
2969 	/* read version 0 info block into a version 1 metadata block  */
2970 		nr_in_params = nr_params_v0;
2971 		metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
2972 		metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
2973 		metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
2974 		/* remaining block params at offset +1 from source */
2975 		for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
2976 			metadata[k + 1] = buff_in[i + k];
2977 		/* version 0 has 2 common params */
2978 		nr_cmn_params = 2;
2979 	} else {
2980 	/* read version 1 info block - input and output nr_params may differ */
2981 		/* version 1 has 3 common params */
2982 		nr_cmn_params = 3;
2983 		nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
2984 
2985 		/* if input has more params than output - skip excess */
2986 		nr_out_params = nr_in_params + nr_cmn_params;
2987 		if (nr_out_params > out_blk_size)
2988 			nr_out_params = out_blk_size;
2989 
2990 		for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
2991 			metadata[k] = buff_in[i + k];
2992 
2993 		/* record the actual nr params we copied */
2994 		metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
2995 	}
2996 
2997 	/* adjust in offset by number of in params used */
2998 	i += nr_in_params + nr_cmn_params;
2999 	*buff_in_offset = i;
3000 	return metadata;
3001 }
3002 
3003 /**
3004  * Puts a fragment of an auxtrace buffer into the auxtrace queues based
3005  * on the bounds of aux_event, if it matches with the buffer that's at
3006  * file_offset.
3007  *
3008  * Normally, whole auxtrace buffers would be added to the queue. But we
3009  * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
3010  * is reset across each buffer, so splitting the buffers up in advance has
3011  * the same effect.
3012  */
3013 static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
3014 				      struct perf_record_aux *aux_event, struct perf_sample *sample)
3015 {
3016 	int err;
3017 	char buf[PERF_SAMPLE_MAX_SIZE];
3018 	union perf_event *auxtrace_event_union;
3019 	struct perf_record_auxtrace *auxtrace_event;
3020 	union perf_event auxtrace_fragment;
3021 	__u64 aux_offset, aux_size;
3022 	enum cs_etm_format format;
3023 
3024 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
3025 						   struct cs_etm_auxtrace,
3026 						   auxtrace);
3027 
3028 	/*
3029 	 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
3030 	 * from looping through the auxtrace index.
3031 	 */
3032 	err = perf_session__peek_event(session, file_offset, buf,
3033 				       PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL);
3034 	if (err)
3035 		return err;
3036 	auxtrace_event = &auxtrace_event_union->auxtrace;
3037 	if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
3038 		return -EINVAL;
3039 
3040 	if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) ||
3041 		auxtrace_event->header.size != sz) {
3042 		return -EINVAL;
3043 	}
3044 
3045 	/*
3046 	 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
3047 	 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
3048 	 * CPU as we set this always for the AUX_OUTPUT_HW_ID event.
3049 	 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
3050 	 * Return 'not found' if mismatch.
3051 	 */
3052 	if (auxtrace_event->cpu == (__u32) -1) {
3053 		etm->per_thread_decoding = true;
3054 		if (auxtrace_event->tid != sample->tid)
3055 			return 1;
3056 	} else if (auxtrace_event->cpu != sample->cpu) {
3057 		if (etm->per_thread_decoding) {
3058 			/*
3059 			 * Found a per-cpu buffer after a per-thread one was
3060 			 * already found
3061 			 */
3062 			pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n");
3063 			return -EINVAL;
3064 		}
3065 		return 1;
3066 	}
3067 
3068 	if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
3069 		/*
3070 		 * Clamp size in snapshot mode. The buffer size is clamped in
3071 		 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
3072 		 * the buffer size.
3073 		 */
3074 		aux_size = min(aux_event->aux_size, auxtrace_event->size);
3075 
3076 		/*
3077 		 * In this mode, the head also points to the end of the buffer so aux_offset
3078 		 * needs to have the size subtracted so it points to the beginning as in normal mode
3079 		 */
3080 		aux_offset = aux_event->aux_offset - aux_size;
3081 	} else {
3082 		aux_size = aux_event->aux_size;
3083 		aux_offset = aux_event->aux_offset;
3084 	}
3085 
3086 	if (aux_offset >= auxtrace_event->offset &&
3087 	    aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
3088 		struct cs_etm_queue *etmq = cs_etm__get_queue(etm, auxtrace_event->cpu);
3089 
3090 		/*
3091 		 * If this AUX event was inside this buffer somewhere, create a new auxtrace event
3092 		 * based on the sizes of the aux event, and queue that fragment.
3093 		 */
3094 		auxtrace_fragment.auxtrace = *auxtrace_event;
3095 		auxtrace_fragment.auxtrace.size = aux_size;
3096 		auxtrace_fragment.auxtrace.offset = aux_offset;
3097 		auxtrace_fragment.auxtrace.idx = etmq->queue_nr;
3098 		file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
3099 
3100 		pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
3101 			  " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
3102 		err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
3103 						 file_offset, NULL);
3104 		if (err)
3105 			return err;
3106 
3107 		format = (aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW) ?
3108 				UNFORMATTED : FORMATTED;
3109 		if (etmq->format != UNSET && format != etmq->format) {
3110 			pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n");
3111 			return -EINVAL;
3112 		}
3113 		etmq->format = format;
3114 		return 0;
3115 	}
3116 
3117 	/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
3118 	return 1;
3119 }
3120 
3121 static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event,
3122 					u64 offset __maybe_unused, void *data __maybe_unused)
3123 {
3124 	/* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */
3125 	if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) {
3126 		(*(int *)data)++; /* increment found count */
3127 		return cs_etm__process_aux_output_hw_id(session, event);
3128 	}
3129 	return 0;
3130 }
3131 
3132 static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
3133 					u64 offset __maybe_unused, void *data __maybe_unused)
3134 {
3135 	struct perf_sample sample;
3136 	int ret;
3137 	struct auxtrace_index_entry *ent;
3138 	struct auxtrace_index *auxtrace_index;
3139 	struct evsel *evsel;
3140 	size_t i;
3141 
3142 	/* Don't care about any other events, we're only queuing buffers for AUX events */
3143 	if (event->header.type != PERF_RECORD_AUX)
3144 		return 0;
3145 
3146 	if (event->header.size < sizeof(struct perf_record_aux))
3147 		return -EINVAL;
3148 
3149 	/* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
3150 	if (!event->aux.aux_size)
3151 		return 0;
3152 
3153 	/*
3154 	 * Parse the sample, we need the sample_id_all data that comes after the event so that the
3155 	 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
3156 	 */
3157 	evsel = evlist__event2evsel(session->evlist, event);
3158 	if (!evsel)
3159 		return -EINVAL;
3160 	perf_sample__init(&sample, /*all=*/false);
3161 	ret = evsel__parse_sample(evsel, event, &sample);
3162 	if (ret)
3163 		goto out;
3164 
3165 	/*
3166 	 * Loop through the auxtrace index to find the buffer that matches up with this aux event.
3167 	 */
3168 	list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
3169 		for (i = 0; i < auxtrace_index->nr; i++) {
3170 			ent = &auxtrace_index->entries[i];
3171 			ret = cs_etm__queue_aux_fragment(session, ent->file_offset,
3172 							 ent->sz, &event->aux, &sample);
3173 			/*
3174 			 * Stop search on error or successful values. Continue search on
3175 			 * 1 ('not found')
3176 			 */
3177 			if (ret != 1)
3178 				goto out;
3179 		}
3180 	}
3181 
3182 	/*
3183 	 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
3184 	 * don't exit with an error because it will still be possible to decode other aux records.
3185 	 */
3186 	pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
3187 	       " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
3188 	ret = 0;
3189 out:
3190 	perf_sample__exit(&sample);
3191 	return ret;
3192 }
3193 
3194 static int cs_etm__queue_aux_records(struct perf_session *session)
3195 {
3196 	struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
3197 								struct auxtrace_index, list);
3198 	if (index && index->nr > 0)
3199 		return perf_session__peek_events(session, session->header.data_offset,
3200 						 session->header.data_size,
3201 						 cs_etm__queue_aux_records_cb, NULL);
3202 
3203 	/*
3204 	 * We would get here if there are no entries in the index (either no auxtrace
3205 	 * buffers or no index at all). Fail silently as there is the possibility of
3206 	 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
3207 	 * false.
3208 	 *
3209 	 * In that scenario, buffers will not be split by AUX records.
3210 	 */
3211 	return 0;
3212 }
3213 
3214 #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
3215 				  (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
3216 
3217 /*
3218  * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
3219  * timestamps).
3220  */
3221 static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
3222 {
3223 	int j;
3224 
3225 	for (j = 0; j < num_cpu; j++) {
3226 		switch (metadata[j][CS_ETM_MAGIC]) {
3227 		case __perf_cs_etmv4_magic:
3228 			if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
3229 				return false;
3230 			break;
3231 		case __perf_cs_ete_magic:
3232 			if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1)
3233 				return false;
3234 			break;
3235 		default:
3236 			/* Unknown / unsupported magic number. */
3237 			return false;
3238 		}
3239 	}
3240 	return true;
3241 }
3242 
3243 /* map trace ids to correct metadata block, from information in metadata */
3244 static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_cpu,
3245 					  u64 **metadata)
3246 {
3247 	u64 cs_etm_magic;
3248 	u8 trace_chan_id;
3249 	int i, err;
3250 
3251 	for (i = 0; i < num_cpu; i++) {
3252 		cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3253 		switch (cs_etm_magic) {
3254 		case __perf_cs_etmv3_magic:
3255 			metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3256 			trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]);
3257 			break;
3258 		case __perf_cs_etmv4_magic:
3259 		case __perf_cs_ete_magic:
3260 			metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3261 			trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]);
3262 			break;
3263 		default:
3264 			/* unknown magic number */
3265 			return -EINVAL;
3266 		}
3267 		err = cs_etm__map_trace_id_v0(etm, trace_chan_id, metadata[i]);
3268 		if (err)
3269 			return err;
3270 	}
3271 	return 0;
3272 }
3273 
3274 /*
3275  * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX
3276  * (formatted or not) packets to create the decoders.
3277  */
3278 static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq)
3279 {
3280 	struct cs_etm_decoder_params d_params;
3281 	struct cs_etm_trace_params  *t_params;
3282 	int decoders = intlist__nr_entries(etmq->traceid_list);
3283 
3284 	if (decoders == 0)
3285 		return 0;
3286 
3287 	/*
3288 	 * Each queue can only contain data from one CPU when unformatted, so only one decoder is
3289 	 * needed.
3290 	 */
3291 	if (etmq->format == UNFORMATTED)
3292 		assert(decoders == 1);
3293 
3294 	/* Use metadata to fill in trace parameters for trace decoder */
3295 	t_params = zalloc(sizeof(*t_params) * decoders);
3296 
3297 	if (!t_params)
3298 		goto out_free;
3299 
3300 	if (cs_etm__init_trace_params(t_params, etmq))
3301 		goto out_free;
3302 
3303 	/* Set decoder parameters to decode trace packets */
3304 	if (cs_etm__init_decoder_params(&d_params, etmq,
3305 					dump_trace ? CS_ETM_OPERATION_PRINT :
3306 						     CS_ETM_OPERATION_DECODE))
3307 		goto out_free;
3308 
3309 	etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
3310 					    t_params);
3311 
3312 	if (!etmq->decoder)
3313 		goto out_free;
3314 
3315 	/*
3316 	 * Register a function to handle all memory accesses required by
3317 	 * the trace decoder library.
3318 	 */
3319 	if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
3320 					      0x0L, ((u64) -1L),
3321 					      cs_etm__mem_access))
3322 		goto out_free_decoder;
3323 
3324 	zfree(&t_params);
3325 	return 0;
3326 
3327 out_free_decoder:
3328 	cs_etm_decoder__free(etmq->decoder);
3329 out_free:
3330 	zfree(&t_params);
3331 	return -EINVAL;
3332 }
3333 
3334 static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm)
3335 {
3336 	struct auxtrace_queues *queues = &etm->queues;
3337 
3338 	for (unsigned int i = 0; i < queues->nr_queues; i++) {
3339 		bool empty = list_empty(&queues->queue_array[i].head);
3340 		struct cs_etm_queue *etmq = queues->queue_array[i].priv;
3341 		int ret;
3342 
3343 		/*
3344 		 * Don't create decoders for empty queues, mainly because
3345 		 * etmq->format is unknown for empty queues.
3346 		 */
3347 		assert(empty || etmq->format != UNSET);
3348 		if (empty)
3349 			continue;
3350 
3351 		ret = cs_etm__create_queue_decoders(etmq);
3352 		if (ret)
3353 			return ret;
3354 	}
3355 	return 0;
3356 }
3357 
3358 int cs_etm__process_auxtrace_info_full(union perf_event *event,
3359 				       struct perf_session *session)
3360 {
3361 	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
3362 	struct cs_etm_auxtrace *etm = NULL;
3363 	struct perf_record_time_conv *tc = &session->time_conv;
3364 	int event_header_size = sizeof(struct perf_event_header);
3365 	int total_size = auxtrace_info->header.size;
3366 	int priv_size = 0;
3367 	int num_cpu, max_cpu = 0;
3368 	int err = 0;
3369 	int aux_hw_id_found;
3370 	int i;
3371 	u64 *ptr = NULL;
3372 	u64 **metadata = NULL;
3373 
3374 	/* First the global part */
3375 	ptr = (u64 *) auxtrace_info->priv;
3376 	num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
3377 	metadata = zalloc(sizeof(*metadata) * num_cpu);
3378 	if (!metadata)
3379 		return -ENOMEM;
3380 
3381 	/* Start parsing after the common part of the header */
3382 	i = CS_HEADER_VERSION_MAX;
3383 
3384 	/*
3385 	 * The metadata is stored in the auxtrace_info section and encodes
3386 	 * the configuration of the ARM embedded trace macrocell which is
3387 	 * required by the trace decoder to properly decode the trace due
3388 	 * to its highly compressed nature.
3389 	 */
3390 	for (int j = 0; j < num_cpu; j++) {
3391 		if (ptr[i] == __perf_cs_etmv3_magic) {
3392 			metadata[j] =
3393 				cs_etm__create_meta_blk(ptr, &i,
3394 							CS_ETM_PRIV_MAX,
3395 							CS_ETM_NR_TRC_PARAMS_V0);
3396 		} else if (ptr[i] == __perf_cs_etmv4_magic) {
3397 			metadata[j] =
3398 				cs_etm__create_meta_blk(ptr, &i,
3399 							CS_ETMV4_PRIV_MAX,
3400 							CS_ETMV4_NR_TRC_PARAMS_V0);
3401 		} else if (ptr[i] == __perf_cs_ete_magic) {
3402 			metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
3403 		} else {
3404 			ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
3405 				  ptr[i]);
3406 			err = -EINVAL;
3407 			goto err_free_metadata;
3408 		}
3409 
3410 		if (!metadata[j]) {
3411 			err = -ENOMEM;
3412 			goto err_free_metadata;
3413 		}
3414 
3415 		if ((int) metadata[j][CS_ETM_CPU] > max_cpu)
3416 			max_cpu = metadata[j][CS_ETM_CPU];
3417 	}
3418 
3419 	/*
3420 	 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
3421 	 * CS_ETMV4_PRIV_MAX mark how many double words are in the
3422 	 * global metadata, and each cpu's metadata respectively.
3423 	 * The following tests if the correct number of double words was
3424 	 * present in the auxtrace info section.
3425 	 */
3426 	priv_size = total_size - event_header_size - INFO_HEADER_SIZE;
3427 	if (i * 8 != priv_size) {
3428 		err = -EINVAL;
3429 		goto err_free_metadata;
3430 	}
3431 
3432 	etm = zalloc(sizeof(*etm));
3433 
3434 	if (!etm) {
3435 		err = -ENOMEM;
3436 		goto err_free_metadata;
3437 	}
3438 
3439 	/*
3440 	 * As all the ETMs run at the same exception level, the system should
3441 	 * have the same PID format crossing CPUs.  So cache the PID format
3442 	 * and reuse it for sequential decoding.
3443 	 */
3444 	etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
3445 
3446 	err = auxtrace_queues__init_nr(&etm->queues, max_cpu + 1);
3447 	if (err)
3448 		goto err_free_etm;
3449 
3450 	for (unsigned int j = 0; j < etm->queues.nr_queues; ++j) {
3451 		err = cs_etm__setup_queue(etm, &etm->queues.queue_array[j], j);
3452 		if (err)
3453 			goto err_free_queues;
3454 	}
3455 
3456 	if (session->itrace_synth_opts->set) {
3457 		etm->synth_opts = *session->itrace_synth_opts;
3458 	} else {
3459 		itrace_synth_opts__set_default(&etm->synth_opts,
3460 				session->itrace_synth_opts->default_no_sample);
3461 		etm->synth_opts.callchain = false;
3462 	}
3463 
3464 	etm->session = session;
3465 
3466 	etm->num_cpu = num_cpu;
3467 	etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
3468 	etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
3469 	etm->metadata = metadata;
3470 	etm->auxtrace_type = auxtrace_info->type;
3471 
3472 	if (etm->synth_opts.use_timestamp)
3473 		/*
3474 		 * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature,
3475 		 * therefore the decoder cannot know if the timestamp trace is
3476 		 * same with the kernel time.
3477 		 *
3478 		 * If a user has knowledge for the working platform and can
3479 		 * specify itrace option 'T' to tell decoder to forcely use the
3480 		 * traced timestamp as the kernel time.
3481 		 */
3482 		etm->has_virtual_ts = true;
3483 	else
3484 		/* Use virtual timestamps if all ETMs report ts_source = 1 */
3485 		etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
3486 
3487 	if (!etm->has_virtual_ts)
3488 		ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
3489 			    "The time field of the samples will not be set accurately.\n"
3490 			    "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n"
3491 			    "you can specify the itrace option 'T' for timestamp decoding\n"
3492 			    "if the Coresight timestamp on the platform is same with the kernel time.\n\n");
3493 
3494 	etm->auxtrace.process_event = cs_etm__process_event;
3495 	etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
3496 	etm->auxtrace.flush_events = cs_etm__flush_events;
3497 	etm->auxtrace.free_events = cs_etm__free_events;
3498 	etm->auxtrace.free = cs_etm__free;
3499 	etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
3500 	session->auxtrace = &etm->auxtrace;
3501 
3502 	err = cs_etm__setup_timeless_decoding(etm);
3503 	if (err)
3504 		return err;
3505 
3506 	etm->tc.time_shift = tc->time_shift;
3507 	etm->tc.time_mult = tc->time_mult;
3508 	etm->tc.time_zero = tc->time_zero;
3509 	if (event_contains(*tc, time_cycles)) {
3510 		etm->tc.time_cycles = tc->time_cycles;
3511 		etm->tc.time_mask = tc->time_mask;
3512 		etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
3513 		etm->tc.cap_user_time_short = tc->cap_user_time_short;
3514 	}
3515 	err = cs_etm__synth_events(etm, session);
3516 	if (err)
3517 		goto err_free_queues;
3518 
3519 	err = cs_etm__queue_aux_records(session);
3520 	if (err)
3521 		goto err_free_queues;
3522 
3523 	/*
3524 	 * Map Trace ID values to CPU metadata.
3525 	 *
3526 	 * Trace metadata will always contain Trace ID values from the legacy algorithm
3527 	 * in case it's read by a version of Perf that doesn't know about HW_ID packets
3528 	 * or the kernel doesn't emit them.
3529 	 *
3530 	 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
3531 	 * the same IDs as the old algorithm as far as is possible, unless there are clashes
3532 	 * in which case a different value will be used. This means an older perf may still
3533 	 * be able to record and read files generate on a newer system.
3534 	 *
3535 	 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of
3536 	 * those packets. If they are there then the values will be mapped and plugged into
3537 	 * the metadata and decoders are only created for each mapping received.
3538 	 *
3539 	 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
3540 	 * then we map Trace ID values to CPU directly from the metadata and create decoders
3541 	 * for all mappings.
3542 	 */
3543 
3544 	/* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
3545 	aux_hw_id_found = 0;
3546 	err = perf_session__peek_events(session, session->header.data_offset,
3547 					session->header.data_size,
3548 					cs_etm__process_aux_hw_id_cb, &aux_hw_id_found);
3549 	if (err)
3550 		goto err_free_queues;
3551 
3552 	/* if no HW ID found this is a file with metadata values only, map from metadata */
3553 	if (!aux_hw_id_found) {
3554 		err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata);
3555 		if (err)
3556 			goto err_free_queues;
3557 	}
3558 
3559 	err = cs_etm__create_decoders(etm);
3560 	if (err)
3561 		goto err_free_queues;
3562 
3563 	etm->data_queued = etm->queues.populated;
3564 	return 0;
3565 
3566 err_free_queues:
3567 	auxtrace_queues__free(&etm->queues);
3568 	session->auxtrace = NULL;
3569 err_free_etm:
3570 	zfree(&etm);
3571 err_free_metadata:
3572 	/* No need to check @metadata[j], free(NULL) is supported */
3573 	for (int j = 0; j < num_cpu; j++)
3574 		zfree(&metadata[j]);
3575 	zfree(&metadata);
3576 	return err;
3577 }
3578