xref: /linux/tools/perf/util/cs-etm.c (revision 77c123f53e97ad4bde0271eb671b71774a99ebf6)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright(C) 2015-2018 Linaro Limited.
4  *
5  * Author: Tor Jeremiassen <tor@ti.com>
6  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
7  */
8 
9 #include <linux/kernel.h>
10 #include <linux/bitfield.h>
11 #include <linux/bitops.h>
12 #include <linux/coresight-pmu.h>
13 #include <linux/err.h>
14 #include <linux/log2.h>
15 #include <linux/types.h>
16 #include <linux/zalloc.h>
17 
18 #include <stdlib.h>
19 
20 #include "auxtrace.h"
21 #include "color.h"
22 #include "cs-etm.h"
23 #include "cs-etm-decoder/cs-etm-decoder.h"
24 #include "debug.h"
25 #include "dso.h"
26 #include "evlist.h"
27 #include "intlist.h"
28 #include "machine.h"
29 #include "map.h"
30 #include "perf.h"
31 #include "session.h"
32 #include "map_symbol.h"
33 #include "branch.h"
34 #include "symbol.h"
35 #include "tool.h"
36 #include "thread.h"
37 #include "thread-stack.h"
38 #include "tsc.h"
39 #include <tools/libc_compat.h>
40 #include "util/synthetic-events.h"
41 #include "util/util.h"
42 
43 struct cs_etm_auxtrace {
44 	struct auxtrace auxtrace;
45 	struct auxtrace_queues queues;
46 	struct auxtrace_heap heap;
47 	struct itrace_synth_opts synth_opts;
48 	struct perf_session *session;
49 	struct perf_tsc_conversion tc;
50 
51 	/*
52 	 * Timeless has no timestamps in the trace so overlapping mmap lookups
53 	 * are less accurate but produces smaller trace data. We use context IDs
54 	 * in the trace instead of matching timestamps with fork records so
55 	 * they're not really needed in the general case. Overlapping mmaps
56 	 * happen in cases like between a fork and an exec.
57 	 */
58 	bool timeless_decoding;
59 
60 	/*
61 	 * Per-thread ignores the trace channel ID and instead assumes that
62 	 * everything in a buffer comes from the same process regardless of
63 	 * which CPU it ran on. It also implies no context IDs so the TID is
64 	 * taken from the auxtrace buffer.
65 	 */
66 	bool per_thread_decoding;
67 	bool snapshot_mode;
68 	bool data_queued;
69 	bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
70 
71 	int num_cpu;
72 	u64 latest_kernel_timestamp;
73 	u32 auxtrace_type;
74 	u64 branches_sample_type;
75 	u64 branches_id;
76 	u64 instructions_sample_type;
77 	u64 instructions_sample_period;
78 	u64 instructions_id;
79 	u64 **metadata;
80 	unsigned int pmu_type;
81 	enum cs_etm_pid_fmt pid_fmt;
82 };
83 
84 struct cs_etm_traceid_queue {
85 	u8 trace_chan_id;
86 	u64 period_instructions;
87 	size_t last_branch_pos;
88 	union perf_event *event_buf;
89 	struct thread *thread;
90 	struct thread *prev_packet_thread;
91 	ocsd_ex_level prev_packet_el;
92 	ocsd_ex_level el;
93 	struct branch_stack *last_branch;
94 	struct branch_stack *last_branch_rb;
95 	struct cs_etm_packet *prev_packet;
96 	struct cs_etm_packet *packet;
97 	struct cs_etm_packet_queue packet_queue;
98 };
99 
100 enum cs_etm_format {
101 	UNSET,
102 	FORMATTED,
103 	UNFORMATTED
104 };
105 
106 struct cs_etm_queue {
107 	struct cs_etm_auxtrace *etm;
108 	struct cs_etm_decoder *decoder;
109 	struct auxtrace_buffer *buffer;
110 	unsigned int queue_nr;
111 	u8 pending_timestamp_chan_id;
112 	enum cs_etm_format format;
113 	u64 offset;
114 	const unsigned char *buf;
115 	size_t buf_len, buf_used;
116 	/* Conversion between traceID and index in traceid_queues array */
117 	struct intlist *traceid_queues_list;
118 	struct cs_etm_traceid_queue **traceid_queues;
119 	/* Conversion between traceID and metadata pointers */
120 	struct intlist *traceid_list;
121 };
122 
123 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
124 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
125 					   pid_t tid);
126 static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
127 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
128 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata);
129 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu);
130 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata);
131 
132 /* PTMs ETMIDR [11:8] set to b0011 */
133 #define ETMIDR_PTM_VERSION 0x00000300
134 
135 /*
136  * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
137  * work with.  One option is to modify to auxtrace_heap_XYZ() API or simply
138  * encode the etm queue number as the upper 16 bit and the channel as
139  * the lower 16 bit.
140  */
141 #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id)	\
142 		      (queue_nr << 16 | trace_chan_id)
143 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
144 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
145 
146 static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
147 {
148 	etmidr &= ETMIDR_PTM_VERSION;
149 
150 	if (etmidr == ETMIDR_PTM_VERSION)
151 		return CS_ETM_PROTO_PTM;
152 
153 	return CS_ETM_PROTO_ETMV3;
154 }
155 
156 static int cs_etm__get_magic(struct cs_etm_queue *etmq, u8 trace_chan_id, u64 *magic)
157 {
158 	struct int_node *inode;
159 	u64 *metadata;
160 
161 	inode = intlist__find(etmq->traceid_list, trace_chan_id);
162 	if (!inode)
163 		return -EINVAL;
164 
165 	metadata = inode->priv;
166 	*magic = metadata[CS_ETM_MAGIC];
167 	return 0;
168 }
169 
170 int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu)
171 {
172 	struct int_node *inode;
173 	u64 *metadata;
174 
175 	inode = intlist__find(etmq->traceid_list, trace_chan_id);
176 	if (!inode)
177 		return -EINVAL;
178 
179 	metadata = inode->priv;
180 	*cpu = (int)metadata[CS_ETM_CPU];
181 	return 0;
182 }
183 
184 /*
185  * The returned PID format is presented as an enum:
186  *
187  *   CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced.
188  *   CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced.
189  *   CS_ETM_PIDFMT_NONE: No context IDs
190  *
191  * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
192  * are enabled at the same time when the session runs on an EL2 kernel.
193  * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
194  * recorded in the trace data, the tool will selectively use
195  * CONTEXTIDR_EL2 as PID.
196  *
197  * The result is cached in etm->pid_fmt so this function only needs to be called
198  * when processing the aux info.
199  */
200 static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata)
201 {
202 	u64 val;
203 
204 	if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
205 		val = metadata[CS_ETM_ETMCR];
206 		/* CONTEXTIDR is traced */
207 		if (val & BIT(ETM_OPT_CTXTID))
208 			return CS_ETM_PIDFMT_CTXTID;
209 	} else {
210 		val = metadata[CS_ETMV4_TRCCONFIGR];
211 		/* CONTEXTIDR_EL2 is traced */
212 		if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
213 			return CS_ETM_PIDFMT_CTXTID2;
214 		/* CONTEXTIDR_EL1 is traced */
215 		else if (val & BIT(ETM4_CFG_BIT_CTXTID))
216 			return CS_ETM_PIDFMT_CTXTID;
217 	}
218 
219 	return CS_ETM_PIDFMT_NONE;
220 }
221 
222 enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
223 {
224 	return etmq->etm->pid_fmt;
225 }
226 
227 static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq,
228 					u8 trace_chan_id, u64 *cpu_metadata)
229 {
230 	/* Get an RB node for this CPU */
231 	struct int_node *inode = intlist__findnew(etmq->traceid_list, trace_chan_id);
232 
233 	/* Something went wrong, no need to continue */
234 	if (!inode)
235 		return -ENOMEM;
236 
237 	/* Disallow re-mapping a different traceID to metadata pair. */
238 	if (inode->priv) {
239 		u64 *curr_cpu_data = inode->priv;
240 		u8 curr_chan_id;
241 		int err;
242 
243 		if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) {
244 			pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
245 			return -EINVAL;
246 		}
247 
248 		/* check that the mapped ID matches */
249 		err = cs_etm__metadata_get_trace_id(&curr_chan_id, curr_cpu_data);
250 		if (err)
251 			return err;
252 
253 		if (curr_chan_id != trace_chan_id) {
254 			pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
255 			return -EINVAL;
256 		}
257 
258 		/* Skip re-adding the same mappings if everything matched */
259 		return 0;
260 	}
261 
262 	/* Not one we've seen before, associate the traceID with the metadata pointer */
263 	inode->priv = cpu_metadata;
264 
265 	return 0;
266 }
267 
268 static struct cs_etm_queue *cs_etm__get_queue(struct cs_etm_auxtrace *etm, int cpu)
269 {
270 	if (etm->per_thread_decoding)
271 		return etm->queues.queue_array[0].priv;
272 	else
273 		return etm->queues.queue_array[cpu].priv;
274 }
275 
276 static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace *etm, u8 trace_chan_id,
277 				   u64 *cpu_metadata)
278 {
279 	struct cs_etm_queue *etmq;
280 
281 	/*
282 	 * If the queue is unformatted then only save one mapping in the
283 	 * queue associated with that CPU so only one decoder is made.
284 	 */
285 	etmq = cs_etm__get_queue(etm, cpu_metadata[CS_ETM_CPU]);
286 	if (etmq->format == UNFORMATTED)
287 		return cs_etm__insert_trace_id_node(etmq, trace_chan_id,
288 						    cpu_metadata);
289 
290 	/*
291 	 * Otherwise, version 0 trace IDs are global so save them into every
292 	 * queue.
293 	 */
294 	for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
295 		int ret;
296 
297 		etmq = etm->queues.queue_array[i].priv;
298 		ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id,
299 						   cpu_metadata);
300 		if (ret)
301 			return ret;
302 	}
303 
304 	return 0;
305 }
306 
307 static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu,
308 				       u64 hw_id)
309 {
310 	int err;
311 	u64 *cpu_data;
312 	u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
313 
314 	cpu_data = get_cpu_data(etm, cpu);
315 	if (cpu_data == NULL)
316 		return -EINVAL;
317 
318 	err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_data);
319 	if (err)
320 		return err;
321 
322 	/*
323 	 * if we are picking up the association from the packet, need to plug
324 	 * the correct trace ID into the metadata for setting up decoders later.
325 	 */
326 	return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
327 }
328 
329 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
330 {
331 	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
332 
333 	switch (cs_etm_magic) {
334 	case __perf_cs_etmv3_magic:
335 		*trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
336 				      CORESIGHT_TRACE_ID_VAL_MASK);
337 		break;
338 	case __perf_cs_etmv4_magic:
339 	case __perf_cs_ete_magic:
340 		*trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
341 				      CORESIGHT_TRACE_ID_VAL_MASK);
342 		break;
343 	default:
344 		return -EINVAL;
345 	}
346 	return 0;
347 }
348 
349 /*
350  * update metadata trace ID from the value found in the AUX_HW_INFO packet.
351  * This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present.
352  */
353 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
354 {
355 	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
356 
357 	switch (cs_etm_magic) {
358 	case __perf_cs_etmv3_magic:
359 		 cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id;
360 		break;
361 	case __perf_cs_etmv4_magic:
362 	case __perf_cs_ete_magic:
363 		cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id;
364 		break;
365 
366 	default:
367 		return -EINVAL;
368 	}
369 	return 0;
370 }
371 
372 /*
373  * Get a metadata index for a specific cpu from an array.
374  *
375  */
376 static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu)
377 {
378 	int i;
379 
380 	for (i = 0; i < etm->num_cpu; i++) {
381 		if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) {
382 			return i;
383 		}
384 	}
385 
386 	return -1;
387 }
388 
389 /*
390  * Get a metadata for a specific cpu from an array.
391  *
392  */
393 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu)
394 {
395 	int idx = get_cpu_data_idx(etm, cpu);
396 
397 	return (idx != -1) ? etm->metadata[idx] : NULL;
398 }
399 
400 /*
401  * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
402  *
403  * The payload associates the Trace ID and the CPU.
404  * The routine is tolerant of seeing multiple packets with the same association,
405  * but a CPU / Trace ID association changing during a session is an error.
406  */
407 static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
408 					    union perf_event *event)
409 {
410 	struct cs_etm_auxtrace *etm;
411 	struct perf_sample sample;
412 	struct evsel *evsel;
413 	u64 hw_id;
414 	int cpu, version, err;
415 
416 	/* extract and parse the HW ID */
417 	hw_id = event->aux_output_hw_id.hw_id;
418 	version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
419 
420 	/* check that we can handle this version */
421 	if (version > CS_AUX_HW_ID_CURR_VERSION) {
422 		pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n",
423 		       version);
424 		return -EINVAL;
425 	}
426 
427 	/* get access to the etm metadata */
428 	etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
429 	if (!etm || !etm->metadata)
430 		return -EINVAL;
431 
432 	/* parse the sample to get the CPU */
433 	evsel = evlist__event2evsel(session->evlist, event);
434 	if (!evsel)
435 		return -EINVAL;
436 	err = evsel__parse_sample(evsel, event, &sample);
437 	if (err)
438 		return err;
439 	cpu = sample.cpu;
440 	if (cpu == -1) {
441 		/* no CPU in the sample - possibly recorded with an old version of perf */
442 		pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
443 		return -EINVAL;
444 	}
445 
446 	return cs_etm__process_trace_id_v0(etm, cpu, hw_id);
447 }
448 
449 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
450 					      u8 trace_chan_id)
451 {
452 	/*
453 	 * When a timestamp packet is encountered the backend code
454 	 * is stopped so that the front end has time to process packets
455 	 * that were accumulated in the traceID queue.  Since there can
456 	 * be more than one channel per cs_etm_queue, we need to specify
457 	 * what traceID queue needs servicing.
458 	 */
459 	etmq->pending_timestamp_chan_id = trace_chan_id;
460 }
461 
462 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
463 				      u8 *trace_chan_id)
464 {
465 	struct cs_etm_packet_queue *packet_queue;
466 
467 	if (!etmq->pending_timestamp_chan_id)
468 		return 0;
469 
470 	if (trace_chan_id)
471 		*trace_chan_id = etmq->pending_timestamp_chan_id;
472 
473 	packet_queue = cs_etm__etmq_get_packet_queue(etmq,
474 						     etmq->pending_timestamp_chan_id);
475 	if (!packet_queue)
476 		return 0;
477 
478 	/* Acknowledge pending status */
479 	etmq->pending_timestamp_chan_id = 0;
480 
481 	/* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
482 	return packet_queue->cs_timestamp;
483 }
484 
485 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
486 {
487 	int i;
488 
489 	queue->head = 0;
490 	queue->tail = 0;
491 	queue->packet_count = 0;
492 	for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
493 		queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
494 		queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
495 		queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
496 		queue->packet_buffer[i].instr_count = 0;
497 		queue->packet_buffer[i].last_instr_taken_branch = false;
498 		queue->packet_buffer[i].last_instr_size = 0;
499 		queue->packet_buffer[i].last_instr_type = 0;
500 		queue->packet_buffer[i].last_instr_subtype = 0;
501 		queue->packet_buffer[i].last_instr_cond = 0;
502 		queue->packet_buffer[i].flags = 0;
503 		queue->packet_buffer[i].exception_number = UINT32_MAX;
504 		queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
505 		queue->packet_buffer[i].cpu = INT_MIN;
506 	}
507 }
508 
509 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
510 {
511 	int idx;
512 	struct int_node *inode;
513 	struct cs_etm_traceid_queue *tidq;
514 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
515 
516 	intlist__for_each_entry(inode, traceid_queues_list) {
517 		idx = (int)(intptr_t)inode->priv;
518 		tidq = etmq->traceid_queues[idx];
519 		cs_etm__clear_packet_queue(&tidq->packet_queue);
520 	}
521 }
522 
523 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
524 				      struct cs_etm_traceid_queue *tidq,
525 				      u8 trace_chan_id)
526 {
527 	int rc = -ENOMEM;
528 	struct auxtrace_queue *queue;
529 	struct cs_etm_auxtrace *etm = etmq->etm;
530 
531 	cs_etm__clear_packet_queue(&tidq->packet_queue);
532 
533 	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
534 	tidq->trace_chan_id = trace_chan_id;
535 	tidq->el = tidq->prev_packet_el = ocsd_EL_unknown;
536 	tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
537 					       queue->tid);
538 	tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host);
539 
540 	tidq->packet = zalloc(sizeof(struct cs_etm_packet));
541 	if (!tidq->packet)
542 		goto out;
543 
544 	tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
545 	if (!tidq->prev_packet)
546 		goto out_free;
547 
548 	if (etm->synth_opts.last_branch) {
549 		size_t sz = sizeof(struct branch_stack);
550 
551 		sz += etm->synth_opts.last_branch_sz *
552 		      sizeof(struct branch_entry);
553 		tidq->last_branch = zalloc(sz);
554 		if (!tidq->last_branch)
555 			goto out_free;
556 		tidq->last_branch_rb = zalloc(sz);
557 		if (!tidq->last_branch_rb)
558 			goto out_free;
559 	}
560 
561 	tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
562 	if (!tidq->event_buf)
563 		goto out_free;
564 
565 	return 0;
566 
567 out_free:
568 	zfree(&tidq->last_branch_rb);
569 	zfree(&tidq->last_branch);
570 	zfree(&tidq->prev_packet);
571 	zfree(&tidq->packet);
572 out:
573 	return rc;
574 }
575 
576 static struct cs_etm_traceid_queue
577 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
578 {
579 	int idx;
580 	struct int_node *inode;
581 	struct intlist *traceid_queues_list;
582 	struct cs_etm_traceid_queue *tidq, **traceid_queues;
583 	struct cs_etm_auxtrace *etm = etmq->etm;
584 
585 	if (etm->per_thread_decoding)
586 		trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
587 
588 	traceid_queues_list = etmq->traceid_queues_list;
589 
590 	/*
591 	 * Check if the traceid_queue exist for this traceID by looking
592 	 * in the queue list.
593 	 */
594 	inode = intlist__find(traceid_queues_list, trace_chan_id);
595 	if (inode) {
596 		idx = (int)(intptr_t)inode->priv;
597 		return etmq->traceid_queues[idx];
598 	}
599 
600 	/* We couldn't find a traceid_queue for this traceID, allocate one */
601 	tidq = malloc(sizeof(*tidq));
602 	if (!tidq)
603 		return NULL;
604 
605 	memset(tidq, 0, sizeof(*tidq));
606 
607 	/* Get a valid index for the new traceid_queue */
608 	idx = intlist__nr_entries(traceid_queues_list);
609 	/* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
610 	inode = intlist__findnew(traceid_queues_list, trace_chan_id);
611 	if (!inode)
612 		goto out_free;
613 
614 	/* Associate this traceID with this index */
615 	inode->priv = (void *)(intptr_t)idx;
616 
617 	if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
618 		goto out_free;
619 
620 	/* Grow the traceid_queues array by one unit */
621 	traceid_queues = etmq->traceid_queues;
622 	traceid_queues = reallocarray(traceid_queues,
623 				      idx + 1,
624 				      sizeof(*traceid_queues));
625 
626 	/*
627 	 * On failure reallocarray() returns NULL and the original block of
628 	 * memory is left untouched.
629 	 */
630 	if (!traceid_queues)
631 		goto out_free;
632 
633 	traceid_queues[idx] = tidq;
634 	etmq->traceid_queues = traceid_queues;
635 
636 	return etmq->traceid_queues[idx];
637 
638 out_free:
639 	/*
640 	 * Function intlist__remove() removes the inode from the list
641 	 * and delete the memory associated to it.
642 	 */
643 	intlist__remove(traceid_queues_list, inode);
644 	free(tidq);
645 
646 	return NULL;
647 }
648 
649 struct cs_etm_packet_queue
650 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
651 {
652 	struct cs_etm_traceid_queue *tidq;
653 
654 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
655 	if (tidq)
656 		return &tidq->packet_queue;
657 
658 	return NULL;
659 }
660 
661 static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
662 				struct cs_etm_traceid_queue *tidq)
663 {
664 	struct cs_etm_packet *tmp;
665 
666 	if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
667 	    etm->synth_opts.instructions) {
668 		/*
669 		 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
670 		 * the next incoming packet.
671 		 *
672 		 * Threads and exception levels are also tracked for both the
673 		 * previous and current packets. This is because the previous
674 		 * packet is used for the 'from' IP for branch samples, so the
675 		 * thread at that time must also be assigned to that sample.
676 		 * Across discontinuity packets the thread can change, so by
677 		 * tracking the thread for the previous packet the branch sample
678 		 * will have the correct info.
679 		 */
680 		tmp = tidq->packet;
681 		tidq->packet = tidq->prev_packet;
682 		tidq->prev_packet = tmp;
683 		tidq->prev_packet_el = tidq->el;
684 		thread__put(tidq->prev_packet_thread);
685 		tidq->prev_packet_thread = thread__get(tidq->thread);
686 	}
687 }
688 
689 static void cs_etm__packet_dump(const char *pkt_string)
690 {
691 	const char *color = PERF_COLOR_BLUE;
692 	int len = strlen(pkt_string);
693 
694 	if (len && (pkt_string[len-1] == '\n'))
695 		color_fprintf(stdout, color, "	%s", pkt_string);
696 	else
697 		color_fprintf(stdout, color, "	%s\n", pkt_string);
698 
699 	fflush(stdout);
700 }
701 
702 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
703 					  struct cs_etm_auxtrace *etm, int t_idx,
704 					  int m_idx, u32 etmidr)
705 {
706 	u64 **metadata = etm->metadata;
707 
708 	t_params[t_idx].protocol = cs_etm__get_v7_protocol_version(etmidr);
709 	t_params[t_idx].etmv3.reg_ctrl = metadata[m_idx][CS_ETM_ETMCR];
710 	t_params[t_idx].etmv3.reg_trc_id = metadata[m_idx][CS_ETM_ETMTRACEIDR];
711 }
712 
713 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
714 					  struct cs_etm_auxtrace *etm, int t_idx,
715 					  int m_idx)
716 {
717 	u64 **metadata = etm->metadata;
718 
719 	t_params[t_idx].protocol = CS_ETM_PROTO_ETMV4i;
720 	t_params[t_idx].etmv4.reg_idr0 = metadata[m_idx][CS_ETMV4_TRCIDR0];
721 	t_params[t_idx].etmv4.reg_idr1 = metadata[m_idx][CS_ETMV4_TRCIDR1];
722 	t_params[t_idx].etmv4.reg_idr2 = metadata[m_idx][CS_ETMV4_TRCIDR2];
723 	t_params[t_idx].etmv4.reg_idr8 = metadata[m_idx][CS_ETMV4_TRCIDR8];
724 	t_params[t_idx].etmv4.reg_configr = metadata[m_idx][CS_ETMV4_TRCCONFIGR];
725 	t_params[t_idx].etmv4.reg_traceidr = metadata[m_idx][CS_ETMV4_TRCTRACEIDR];
726 }
727 
728 static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
729 					  struct cs_etm_auxtrace *etm, int t_idx,
730 					  int m_idx)
731 {
732 	u64 **metadata = etm->metadata;
733 
734 	t_params[t_idx].protocol = CS_ETM_PROTO_ETE;
735 	t_params[t_idx].ete.reg_idr0 = metadata[m_idx][CS_ETE_TRCIDR0];
736 	t_params[t_idx].ete.reg_idr1 = metadata[m_idx][CS_ETE_TRCIDR1];
737 	t_params[t_idx].ete.reg_idr2 = metadata[m_idx][CS_ETE_TRCIDR2];
738 	t_params[t_idx].ete.reg_idr8 = metadata[m_idx][CS_ETE_TRCIDR8];
739 	t_params[t_idx].ete.reg_configr = metadata[m_idx][CS_ETE_TRCCONFIGR];
740 	t_params[t_idx].ete.reg_traceidr = metadata[m_idx][CS_ETE_TRCTRACEIDR];
741 	t_params[t_idx].ete.reg_devarch = metadata[m_idx][CS_ETE_TRCDEVARCH];
742 }
743 
744 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
745 				     struct cs_etm_auxtrace *etm,
746 				     enum cs_etm_format format,
747 				     int sample_cpu,
748 				     int decoders)
749 {
750 	int t_idx, m_idx;
751 	u32 etmidr;
752 	u64 architecture;
753 
754 	for (t_idx = 0; t_idx < decoders; t_idx++) {
755 		if (format == FORMATTED)
756 			m_idx = t_idx;
757 		else {
758 			m_idx = get_cpu_data_idx(etm, sample_cpu);
759 			if (m_idx == -1) {
760 				pr_warning("CS_ETM: unknown CPU, falling back to first metadata\n");
761 				m_idx = 0;
762 			}
763 		}
764 
765 		architecture = etm->metadata[m_idx][CS_ETM_MAGIC];
766 
767 		switch (architecture) {
768 		case __perf_cs_etmv3_magic:
769 			etmidr = etm->metadata[m_idx][CS_ETM_ETMIDR];
770 			cs_etm__set_trace_param_etmv3(t_params, etm, t_idx, m_idx, etmidr);
771 			break;
772 		case __perf_cs_etmv4_magic:
773 			cs_etm__set_trace_param_etmv4(t_params, etm, t_idx, m_idx);
774 			break;
775 		case __perf_cs_ete_magic:
776 			cs_etm__set_trace_param_ete(t_params, etm, t_idx, m_idx);
777 			break;
778 		default:
779 			return -EINVAL;
780 		}
781 	}
782 
783 	return 0;
784 }
785 
786 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
787 				       struct cs_etm_queue *etmq,
788 				       enum cs_etm_decoder_operation mode)
789 {
790 	int ret = -EINVAL;
791 
792 	if (!(mode < CS_ETM_OPERATION_MAX))
793 		goto out;
794 
795 	d_params->packet_printer = cs_etm__packet_dump;
796 	d_params->operation = mode;
797 	d_params->data = etmq;
798 	d_params->formatted = etmq->format == FORMATTED;
799 	d_params->fsyncs = false;
800 	d_params->hsyncs = false;
801 	d_params->frame_aligned = true;
802 
803 	ret = 0;
804 out:
805 	return ret;
806 }
807 
808 static void cs_etm__dump_event(struct cs_etm_queue *etmq,
809 			       struct auxtrace_buffer *buffer)
810 {
811 	int ret;
812 	const char *color = PERF_COLOR_BLUE;
813 	size_t buffer_used = 0;
814 
815 	fprintf(stdout, "\n");
816 	color_fprintf(stdout, color,
817 		     ". ... CoreSight %s Trace data: size %#zx bytes\n",
818 		     cs_etm_decoder__get_name(etmq->decoder), buffer->size);
819 
820 	do {
821 		size_t consumed;
822 
823 		ret = cs_etm_decoder__process_data_block(
824 				etmq->decoder, buffer->offset,
825 				&((u8 *)buffer->data)[buffer_used],
826 				buffer->size - buffer_used, &consumed);
827 		if (ret)
828 			break;
829 
830 		buffer_used += consumed;
831 	} while (buffer_used < buffer->size);
832 
833 	cs_etm_decoder__reset(etmq->decoder);
834 }
835 
836 static int cs_etm__flush_events(struct perf_session *session,
837 				const struct perf_tool *tool)
838 {
839 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
840 						   struct cs_etm_auxtrace,
841 						   auxtrace);
842 	if (dump_trace)
843 		return 0;
844 
845 	if (!tool->ordered_events)
846 		return -EINVAL;
847 
848 	if (etm->timeless_decoding) {
849 		/*
850 		 * Pass tid = -1 to process all queues. But likely they will have
851 		 * already been processed on PERF_RECORD_EXIT anyway.
852 		 */
853 		return cs_etm__process_timeless_queues(etm, -1);
854 	}
855 
856 	return cs_etm__process_timestamped_queues(etm);
857 }
858 
859 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
860 {
861 	int idx;
862 	uintptr_t priv;
863 	struct int_node *inode, *tmp;
864 	struct cs_etm_traceid_queue *tidq;
865 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
866 
867 	intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
868 		priv = (uintptr_t)inode->priv;
869 		idx = priv;
870 
871 		/* Free this traceid_queue from the array */
872 		tidq = etmq->traceid_queues[idx];
873 		thread__zput(tidq->thread);
874 		thread__zput(tidq->prev_packet_thread);
875 		zfree(&tidq->event_buf);
876 		zfree(&tidq->last_branch);
877 		zfree(&tidq->last_branch_rb);
878 		zfree(&tidq->prev_packet);
879 		zfree(&tidq->packet);
880 		zfree(&tidq);
881 
882 		/*
883 		 * Function intlist__remove() removes the inode from the list
884 		 * and delete the memory associated to it.
885 		 */
886 		intlist__remove(traceid_queues_list, inode);
887 	}
888 
889 	/* Then the RB tree itself */
890 	intlist__delete(traceid_queues_list);
891 	etmq->traceid_queues_list = NULL;
892 
893 	/* finally free the traceid_queues array */
894 	zfree(&etmq->traceid_queues);
895 }
896 
897 static void cs_etm__free_queue(void *priv)
898 {
899 	struct int_node *inode, *tmp;
900 	struct cs_etm_queue *etmq = priv;
901 
902 	if (!etmq)
903 		return;
904 
905 	cs_etm_decoder__free(etmq->decoder);
906 	cs_etm__free_traceid_queues(etmq);
907 
908 	/* First remove all traceID/metadata nodes for the RB tree */
909 	intlist__for_each_entry_safe(inode, tmp, etmq->traceid_list)
910 		intlist__remove(etmq->traceid_list, inode);
911 
912 	/* Then the RB tree itself */
913 	intlist__delete(etmq->traceid_list);
914 
915 	free(etmq);
916 }
917 
918 static void cs_etm__free_events(struct perf_session *session)
919 {
920 	unsigned int i;
921 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
922 						   struct cs_etm_auxtrace,
923 						   auxtrace);
924 	struct auxtrace_queues *queues = &aux->queues;
925 
926 	for (i = 0; i < queues->nr_queues; i++) {
927 		cs_etm__free_queue(queues->queue_array[i].priv);
928 		queues->queue_array[i].priv = NULL;
929 	}
930 
931 	auxtrace_queues__free(queues);
932 }
933 
934 static void cs_etm__free(struct perf_session *session)
935 {
936 	int i;
937 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
938 						   struct cs_etm_auxtrace,
939 						   auxtrace);
940 	cs_etm__free_events(session);
941 	session->auxtrace = NULL;
942 
943 	for (i = 0; i < aux->num_cpu; i++)
944 		zfree(&aux->metadata[i]);
945 
946 	zfree(&aux->metadata);
947 	zfree(&aux);
948 }
949 
950 static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
951 				      struct evsel *evsel)
952 {
953 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
954 						   struct cs_etm_auxtrace,
955 						   auxtrace);
956 
957 	return evsel->core.attr.type == aux->pmu_type;
958 }
959 
960 static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq,
961 					   ocsd_ex_level el)
962 {
963 	enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq);
964 
965 	/*
966 	 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels
967 	 * running at EL1 assume everything is the host.
968 	 */
969 	if (pid_fmt == CS_ETM_PIDFMT_CTXTID)
970 		return &etmq->etm->session->machines.host;
971 
972 	/*
973 	 * Not perfect, but otherwise assume anything in EL1 is the default
974 	 * guest, and everything else is the host. Distinguishing between guest
975 	 * and host userspaces isn't currently supported either. Neither is
976 	 * multiple guest support. All this does is reduce the likeliness of
977 	 * decode errors where we look into the host kernel maps when it should
978 	 * have been the guest maps.
979 	 */
980 	switch (el) {
981 	case ocsd_EL1:
982 		return machines__find_guest(&etmq->etm->session->machines,
983 					    DEFAULT_GUEST_KERNEL_ID);
984 	case ocsd_EL3:
985 	case ocsd_EL2:
986 	case ocsd_EL0:
987 	case ocsd_EL_unknown:
988 	default:
989 		return &etmq->etm->session->machines.host;
990 	}
991 }
992 
993 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address,
994 			   ocsd_ex_level el)
995 {
996 	struct machine *machine = cs_etm__get_machine(etmq, el);
997 
998 	if (address >= machine__kernel_start(machine)) {
999 		if (machine__is_host(machine))
1000 			return PERF_RECORD_MISC_KERNEL;
1001 		else
1002 			return PERF_RECORD_MISC_GUEST_KERNEL;
1003 	} else {
1004 		if (machine__is_host(machine))
1005 			return PERF_RECORD_MISC_USER;
1006 		else {
1007 			/*
1008 			 * Can't really happen at the moment because
1009 			 * cs_etm__get_machine() will always return
1010 			 * machines.host for any non EL1 trace.
1011 			 */
1012 			return PERF_RECORD_MISC_GUEST_USER;
1013 		}
1014 	}
1015 }
1016 
1017 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
1018 			      u64 address, size_t size, u8 *buffer,
1019 			      const ocsd_mem_space_acc_t mem_space)
1020 {
1021 	u8  cpumode;
1022 	u64 offset;
1023 	int len;
1024 	struct addr_location al;
1025 	struct dso *dso;
1026 	struct cs_etm_traceid_queue *tidq;
1027 	int ret = 0;
1028 
1029 	if (!etmq)
1030 		return 0;
1031 
1032 	addr_location__init(&al);
1033 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1034 	if (!tidq)
1035 		goto out;
1036 
1037 	/*
1038 	 * We've already tracked EL along side the PID in cs_etm__set_thread()
1039 	 * so double check that it matches what OpenCSD thinks as well. It
1040 	 * doesn't distinguish between EL0 and EL1 for this mem access callback
1041 	 * so we had to do the extra tracking. Skip validation if it's any of
1042 	 * the 'any' values.
1043 	 */
1044 	if (!(mem_space == OCSD_MEM_SPACE_ANY ||
1045 	      mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) {
1046 		if (mem_space & OCSD_MEM_SPACE_EL1N) {
1047 			/* Includes both non secure EL1 and EL0 */
1048 			assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0);
1049 		} else if (mem_space & OCSD_MEM_SPACE_EL2)
1050 			assert(tidq->el == ocsd_EL2);
1051 		else if (mem_space & OCSD_MEM_SPACE_EL3)
1052 			assert(tidq->el == ocsd_EL3);
1053 	}
1054 
1055 	cpumode = cs_etm__cpu_mode(etmq, address, tidq->el);
1056 
1057 	if (!thread__find_map(tidq->thread, cpumode, address, &al))
1058 		goto out;
1059 
1060 	dso = map__dso(al.map);
1061 	if (!dso)
1062 		goto out;
1063 
1064 	if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR &&
1065 	    dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE))
1066 		goto out;
1067 
1068 	offset = map__map_ip(al.map, address);
1069 
1070 	map__load(al.map);
1071 
1072 	len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)),
1073 				    offset, buffer, size);
1074 
1075 	if (len <= 0) {
1076 		ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
1077 				 "              Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
1078 		if (!dso__auxtrace_warned(dso)) {
1079 			pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
1080 				address,
1081 				dso__long_name(dso) ? dso__long_name(dso) : "Unknown");
1082 			dso__set_auxtrace_warned(dso);
1083 		}
1084 		goto out;
1085 	}
1086 	ret = len;
1087 out:
1088 	addr_location__exit(&al);
1089 	return ret;
1090 }
1091 
1092 static struct cs_etm_queue *cs_etm__alloc_queue(void)
1093 {
1094 	struct cs_etm_queue *etmq = zalloc(sizeof(*etmq));
1095 	if (!etmq)
1096 		return NULL;
1097 
1098 	etmq->traceid_queues_list = intlist__new(NULL);
1099 	if (!etmq->traceid_queues_list)
1100 		goto out_free;
1101 
1102 	/*
1103 	 * Create an RB tree for traceID-metadata tuple.  Since the conversion
1104 	 * has to be made for each packet that gets decoded, optimizing access
1105 	 * in anything other than a sequential array is worth doing.
1106 	 */
1107 	etmq->traceid_list = intlist__new(NULL);
1108 	if (!etmq->traceid_list)
1109 		goto out_free;
1110 
1111 	return etmq;
1112 
1113 out_free:
1114 	intlist__delete(etmq->traceid_queues_list);
1115 	free(etmq);
1116 
1117 	return NULL;
1118 }
1119 
1120 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
1121 			       struct auxtrace_queue *queue,
1122 			       unsigned int queue_nr)
1123 {
1124 	struct cs_etm_queue *etmq = queue->priv;
1125 
1126 	if (etmq)
1127 		return 0;
1128 
1129 	etmq = cs_etm__alloc_queue();
1130 
1131 	if (!etmq)
1132 		return -ENOMEM;
1133 
1134 	queue->priv = etmq;
1135 	etmq->etm = etm;
1136 	etmq->queue_nr = queue_nr;
1137 	queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */
1138 	etmq->offset = 0;
1139 
1140 	return 0;
1141 }
1142 
1143 static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
1144 					    struct cs_etm_queue *etmq,
1145 					    unsigned int queue_nr)
1146 {
1147 	int ret = 0;
1148 	unsigned int cs_queue_nr;
1149 	u8 trace_chan_id;
1150 	u64 cs_timestamp;
1151 
1152 	/*
1153 	 * We are under a CPU-wide trace scenario.  As such we need to know
1154 	 * when the code that generated the traces started to execute so that
1155 	 * it can be correlated with execution on other CPUs.  So we get a
1156 	 * handle on the beginning of traces and decode until we find a
1157 	 * timestamp.  The timestamp is then added to the auxtrace min heap
1158 	 * in order to know what nibble (of all the etmqs) to decode first.
1159 	 */
1160 	while (1) {
1161 		/*
1162 		 * Fetch an aux_buffer from this etmq.  Bail if no more
1163 		 * blocks or an error has been encountered.
1164 		 */
1165 		ret = cs_etm__get_data_block(etmq);
1166 		if (ret <= 0)
1167 			goto out;
1168 
1169 		/*
1170 		 * Run decoder on the trace block.  The decoder will stop when
1171 		 * encountering a CS timestamp, a full packet queue or the end of
1172 		 * trace for that block.
1173 		 */
1174 		ret = cs_etm__decode_data_block(etmq);
1175 		if (ret)
1176 			goto out;
1177 
1178 		/*
1179 		 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
1180 		 * the timestamp calculation for us.
1181 		 */
1182 		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
1183 
1184 		/* We found a timestamp, no need to continue. */
1185 		if (cs_timestamp)
1186 			break;
1187 
1188 		/*
1189 		 * We didn't find a timestamp so empty all the traceid packet
1190 		 * queues before looking for another timestamp packet, either
1191 		 * in the current data block or a new one.  Packets that were
1192 		 * just decoded are useless since no timestamp has been
1193 		 * associated with them.  As such simply discard them.
1194 		 */
1195 		cs_etm__clear_all_packet_queues(etmq);
1196 	}
1197 
1198 	/*
1199 	 * We have a timestamp.  Add it to the min heap to reflect when
1200 	 * instructions conveyed by the range packets of this traceID queue
1201 	 * started to execute.  Once the same has been done for all the traceID
1202 	 * queues of each etmq, redenring and decoding can start in
1203 	 * chronological order.
1204 	 *
1205 	 * Note that packets decoded above are still in the traceID's packet
1206 	 * queue and will be processed in cs_etm__process_timestamped_queues().
1207 	 */
1208 	cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
1209 	ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
1210 out:
1211 	return ret;
1212 }
1213 
1214 static inline
1215 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
1216 				 struct cs_etm_traceid_queue *tidq)
1217 {
1218 	struct branch_stack *bs_src = tidq->last_branch_rb;
1219 	struct branch_stack *bs_dst = tidq->last_branch;
1220 	size_t nr = 0;
1221 
1222 	/*
1223 	 * Set the number of records before early exit: ->nr is used to
1224 	 * determine how many branches to copy from ->entries.
1225 	 */
1226 	bs_dst->nr = bs_src->nr;
1227 
1228 	/*
1229 	 * Early exit when there is nothing to copy.
1230 	 */
1231 	if (!bs_src->nr)
1232 		return;
1233 
1234 	/*
1235 	 * As bs_src->entries is a circular buffer, we need to copy from it in
1236 	 * two steps.  First, copy the branches from the most recently inserted
1237 	 * branch ->last_branch_pos until the end of bs_src->entries buffer.
1238 	 */
1239 	nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
1240 	memcpy(&bs_dst->entries[0],
1241 	       &bs_src->entries[tidq->last_branch_pos],
1242 	       sizeof(struct branch_entry) * nr);
1243 
1244 	/*
1245 	 * If we wrapped around at least once, the branches from the beginning
1246 	 * of the bs_src->entries buffer and until the ->last_branch_pos element
1247 	 * are older valid branches: copy them over.  The total number of
1248 	 * branches copied over will be equal to the number of branches asked by
1249 	 * the user in last_branch_sz.
1250 	 */
1251 	if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
1252 		memcpy(&bs_dst->entries[nr],
1253 		       &bs_src->entries[0],
1254 		       sizeof(struct branch_entry) * tidq->last_branch_pos);
1255 	}
1256 }
1257 
1258 static inline
1259 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
1260 {
1261 	tidq->last_branch_pos = 0;
1262 	tidq->last_branch_rb->nr = 0;
1263 }
1264 
1265 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
1266 					 u8 trace_chan_id, u64 addr)
1267 {
1268 	u8 instrBytes[2];
1269 
1270 	cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes),
1271 			   instrBytes, 0);
1272 	/*
1273 	 * T32 instruction size is indicated by bits[15:11] of the first
1274 	 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
1275 	 * denote a 32-bit instruction.
1276 	 */
1277 	return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
1278 }
1279 
1280 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
1281 {
1282 	/*
1283 	 * Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't
1284 	 * appear in samples.
1285 	 */
1286 	if (packet->sample_type == CS_ETM_DISCONTINUITY ||
1287 	    packet->sample_type == CS_ETM_EXCEPTION)
1288 		return 0;
1289 
1290 	return packet->start_addr;
1291 }
1292 
1293 static inline
1294 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
1295 {
1296 	/* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1297 	if (packet->sample_type == CS_ETM_DISCONTINUITY)
1298 		return 0;
1299 
1300 	return packet->end_addr - packet->last_instr_size;
1301 }
1302 
1303 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
1304 				     u64 trace_chan_id,
1305 				     const struct cs_etm_packet *packet,
1306 				     u64 offset)
1307 {
1308 	if (packet->isa == CS_ETM_ISA_T32) {
1309 		u64 addr = packet->start_addr;
1310 
1311 		while (offset) {
1312 			addr += cs_etm__t32_instr_size(etmq,
1313 						       trace_chan_id, addr);
1314 			offset--;
1315 		}
1316 		return addr;
1317 	}
1318 
1319 	/* Assume a 4 byte instruction size (A32/A64) */
1320 	return packet->start_addr + offset * 4;
1321 }
1322 
1323 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
1324 					  struct cs_etm_traceid_queue *tidq)
1325 {
1326 	struct branch_stack *bs = tidq->last_branch_rb;
1327 	struct branch_entry *be;
1328 
1329 	/*
1330 	 * The branches are recorded in a circular buffer in reverse
1331 	 * chronological order: we start recording from the last element of the
1332 	 * buffer down.  After writing the first element of the stack, move the
1333 	 * insert position back to the end of the buffer.
1334 	 */
1335 	if (!tidq->last_branch_pos)
1336 		tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
1337 
1338 	tidq->last_branch_pos -= 1;
1339 
1340 	be       = &bs->entries[tidq->last_branch_pos];
1341 	be->from = cs_etm__last_executed_instr(tidq->prev_packet);
1342 	be->to	 = cs_etm__first_executed_instr(tidq->packet);
1343 	/* No support for mispredict */
1344 	be->flags.mispred = 0;
1345 	be->flags.predicted = 1;
1346 
1347 	/*
1348 	 * Increment bs->nr until reaching the number of last branches asked by
1349 	 * the user on the command line.
1350 	 */
1351 	if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
1352 		bs->nr += 1;
1353 }
1354 
1355 static int cs_etm__inject_event(union perf_event *event,
1356 			       struct perf_sample *sample, u64 type)
1357 {
1358 	event->header.size = perf_event__sample_event_size(sample, type, 0);
1359 	return perf_event__synthesize_sample(event, type, 0, sample);
1360 }
1361 
1362 
1363 static int
1364 cs_etm__get_trace(struct cs_etm_queue *etmq)
1365 {
1366 	struct auxtrace_buffer *aux_buffer = etmq->buffer;
1367 	struct auxtrace_buffer *old_buffer = aux_buffer;
1368 	struct auxtrace_queue *queue;
1369 
1370 	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
1371 
1372 	aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
1373 
1374 	/* If no more data, drop the previous auxtrace_buffer and return */
1375 	if (!aux_buffer) {
1376 		if (old_buffer)
1377 			auxtrace_buffer__drop_data(old_buffer);
1378 		etmq->buf_len = 0;
1379 		return 0;
1380 	}
1381 
1382 	etmq->buffer = aux_buffer;
1383 
1384 	/* If the aux_buffer doesn't have data associated, try to load it */
1385 	if (!aux_buffer->data) {
1386 		/* get the file desc associated with the perf data file */
1387 		int fd = perf_data__fd(etmq->etm->session->data);
1388 
1389 		aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
1390 		if (!aux_buffer->data)
1391 			return -ENOMEM;
1392 	}
1393 
1394 	/* If valid, drop the previous buffer */
1395 	if (old_buffer)
1396 		auxtrace_buffer__drop_data(old_buffer);
1397 
1398 	etmq->buf_used = 0;
1399 	etmq->buf_len = aux_buffer->size;
1400 	etmq->buf = aux_buffer->data;
1401 
1402 	return etmq->buf_len;
1403 }
1404 
1405 static void cs_etm__set_thread(struct cs_etm_queue *etmq,
1406 			       struct cs_etm_traceid_queue *tidq, pid_t tid,
1407 			       ocsd_ex_level el)
1408 {
1409 	struct machine *machine = cs_etm__get_machine(etmq, el);
1410 
1411 	if (tid != -1) {
1412 		thread__zput(tidq->thread);
1413 		tidq->thread = machine__find_thread(machine, -1, tid);
1414 	}
1415 
1416 	/* Couldn't find a known thread */
1417 	if (!tidq->thread)
1418 		tidq->thread = machine__idle_thread(machine);
1419 
1420 	tidq->el = el;
1421 }
1422 
1423 int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
1424 			    u8 trace_chan_id, ocsd_ex_level el)
1425 {
1426 	struct cs_etm_traceid_queue *tidq;
1427 
1428 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1429 	if (!tidq)
1430 		return -EINVAL;
1431 
1432 	cs_etm__set_thread(etmq, tidq, tid, el);
1433 	return 0;
1434 }
1435 
1436 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
1437 {
1438 	return !!etmq->etm->timeless_decoding;
1439 }
1440 
1441 static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
1442 			      u64 trace_chan_id,
1443 			      const struct cs_etm_packet *packet,
1444 			      struct perf_sample *sample)
1445 {
1446 	/*
1447 	 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY
1448 	 * packet, so directly bail out with 'insn_len' = 0.
1449 	 */
1450 	if (packet->sample_type == CS_ETM_DISCONTINUITY) {
1451 		sample->insn_len = 0;
1452 		return;
1453 	}
1454 
1455 	/*
1456 	 * T32 instruction size might be 32-bit or 16-bit, decide by calling
1457 	 * cs_etm__t32_instr_size().
1458 	 */
1459 	if (packet->isa == CS_ETM_ISA_T32)
1460 		sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id,
1461 							  sample->ip);
1462 	/* Otherwise, A64 and A32 instruction size are always 32-bit. */
1463 	else
1464 		sample->insn_len = 4;
1465 
1466 	cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len,
1467 			   (void *)sample->insn, 0);
1468 }
1469 
1470 u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
1471 {
1472 	struct cs_etm_auxtrace *etm = etmq->etm;
1473 
1474 	if (etm->has_virtual_ts)
1475 		return tsc_to_perf_time(cs_timestamp, &etm->tc);
1476 	else
1477 		return cs_timestamp;
1478 }
1479 
1480 static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
1481 					       struct cs_etm_traceid_queue *tidq)
1482 {
1483 	struct cs_etm_auxtrace *etm = etmq->etm;
1484 	struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
1485 
1486 	if (!etm->timeless_decoding && etm->has_virtual_ts)
1487 		return packet_queue->cs_timestamp;
1488 	else
1489 		return etm->latest_kernel_timestamp;
1490 }
1491 
1492 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
1493 					    struct cs_etm_traceid_queue *tidq,
1494 					    u64 addr, u64 period)
1495 {
1496 	int ret = 0;
1497 	struct cs_etm_auxtrace *etm = etmq->etm;
1498 	union perf_event *event = tidq->event_buf;
1499 	struct perf_sample sample = {.ip = 0,};
1500 
1501 	event->sample.header.type = PERF_RECORD_SAMPLE;
1502 	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
1503 	event->sample.header.size = sizeof(struct perf_event_header);
1504 
1505 	/* Set time field based on etm auxtrace config. */
1506 	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1507 
1508 	sample.ip = addr;
1509 	sample.pid = thread__pid(tidq->thread);
1510 	sample.tid = thread__tid(tidq->thread);
1511 	sample.id = etmq->etm->instructions_id;
1512 	sample.stream_id = etmq->etm->instructions_id;
1513 	sample.period = period;
1514 	sample.cpu = tidq->packet->cpu;
1515 	sample.flags = tidq->prev_packet->flags;
1516 	sample.cpumode = event->sample.header.misc;
1517 
1518 	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
1519 
1520 	if (etm->synth_opts.last_branch)
1521 		sample.branch_stack = tidq->last_branch;
1522 
1523 	if (etm->synth_opts.inject) {
1524 		ret = cs_etm__inject_event(event, &sample,
1525 					   etm->instructions_sample_type);
1526 		if (ret)
1527 			return ret;
1528 	}
1529 
1530 	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1531 
1532 	if (ret)
1533 		pr_err(
1534 			"CS ETM Trace: failed to deliver instruction event, error %d\n",
1535 			ret);
1536 
1537 	return ret;
1538 }
1539 
1540 /*
1541  * The cs etm packet encodes an instruction range between a branch target
1542  * and the next taken branch. Generate sample accordingly.
1543  */
1544 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
1545 				       struct cs_etm_traceid_queue *tidq)
1546 {
1547 	int ret = 0;
1548 	struct cs_etm_auxtrace *etm = etmq->etm;
1549 	struct perf_sample sample = {.ip = 0,};
1550 	union perf_event *event = tidq->event_buf;
1551 	struct dummy_branch_stack {
1552 		u64			nr;
1553 		u64			hw_idx;
1554 		struct branch_entry	entries;
1555 	} dummy_bs;
1556 	u64 ip;
1557 
1558 	ip = cs_etm__last_executed_instr(tidq->prev_packet);
1559 
1560 	event->sample.header.type = PERF_RECORD_SAMPLE;
1561 	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip,
1562 						     tidq->prev_packet_el);
1563 	event->sample.header.size = sizeof(struct perf_event_header);
1564 
1565 	/* Set time field based on etm auxtrace config. */
1566 	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1567 
1568 	sample.ip = ip;
1569 	sample.pid = thread__pid(tidq->prev_packet_thread);
1570 	sample.tid = thread__tid(tidq->prev_packet_thread);
1571 	sample.addr = cs_etm__first_executed_instr(tidq->packet);
1572 	sample.id = etmq->etm->branches_id;
1573 	sample.stream_id = etmq->etm->branches_id;
1574 	sample.period = 1;
1575 	sample.cpu = tidq->packet->cpu;
1576 	sample.flags = tidq->prev_packet->flags;
1577 	sample.cpumode = event->sample.header.misc;
1578 
1579 	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet,
1580 			  &sample);
1581 
1582 	/*
1583 	 * perf report cannot handle events without a branch stack
1584 	 */
1585 	if (etm->synth_opts.last_branch) {
1586 		dummy_bs = (struct dummy_branch_stack){
1587 			.nr = 1,
1588 			.hw_idx = -1ULL,
1589 			.entries = {
1590 				.from = sample.ip,
1591 				.to = sample.addr,
1592 			},
1593 		};
1594 		sample.branch_stack = (struct branch_stack *)&dummy_bs;
1595 	}
1596 
1597 	if (etm->synth_opts.inject) {
1598 		ret = cs_etm__inject_event(event, &sample,
1599 					   etm->branches_sample_type);
1600 		if (ret)
1601 			return ret;
1602 	}
1603 
1604 	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1605 
1606 	if (ret)
1607 		pr_err(
1608 		"CS ETM Trace: failed to deliver instruction event, error %d\n",
1609 		ret);
1610 
1611 	return ret;
1612 }
1613 
1614 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
1615 				struct perf_session *session)
1616 {
1617 	struct evlist *evlist = session->evlist;
1618 	struct evsel *evsel;
1619 	struct perf_event_attr attr;
1620 	bool found = false;
1621 	u64 id;
1622 	int err;
1623 
1624 	evlist__for_each_entry(evlist, evsel) {
1625 		if (evsel->core.attr.type == etm->pmu_type) {
1626 			found = true;
1627 			break;
1628 		}
1629 	}
1630 
1631 	if (!found) {
1632 		pr_debug("No selected events with CoreSight Trace data\n");
1633 		return 0;
1634 	}
1635 
1636 	memset(&attr, 0, sizeof(struct perf_event_attr));
1637 	attr.size = sizeof(struct perf_event_attr);
1638 	attr.type = PERF_TYPE_HARDWARE;
1639 	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
1640 	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1641 			    PERF_SAMPLE_PERIOD;
1642 	if (etm->timeless_decoding)
1643 		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1644 	else
1645 		attr.sample_type |= PERF_SAMPLE_TIME;
1646 
1647 	attr.exclude_user = evsel->core.attr.exclude_user;
1648 	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1649 	attr.exclude_hv = evsel->core.attr.exclude_hv;
1650 	attr.exclude_host = evsel->core.attr.exclude_host;
1651 	attr.exclude_guest = evsel->core.attr.exclude_guest;
1652 	attr.sample_id_all = evsel->core.attr.sample_id_all;
1653 	attr.read_format = evsel->core.attr.read_format;
1654 
1655 	/* create new id val to be a fixed offset from evsel id */
1656 	id = evsel->core.id[0] + 1000000000;
1657 
1658 	if (!id)
1659 		id = 1;
1660 
1661 	if (etm->synth_opts.branches) {
1662 		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1663 		attr.sample_period = 1;
1664 		attr.sample_type |= PERF_SAMPLE_ADDR;
1665 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1666 		if (err)
1667 			return err;
1668 		etm->branches_sample_type = attr.sample_type;
1669 		etm->branches_id = id;
1670 		id += 1;
1671 		attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
1672 	}
1673 
1674 	if (etm->synth_opts.last_branch) {
1675 		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1676 		/*
1677 		 * We don't use the hardware index, but the sample generation
1678 		 * code uses the new format branch_stack with this field,
1679 		 * so the event attributes must indicate that it's present.
1680 		 */
1681 		attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
1682 	}
1683 
1684 	if (etm->synth_opts.instructions) {
1685 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1686 		attr.sample_period = etm->synth_opts.period;
1687 		etm->instructions_sample_period = attr.sample_period;
1688 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1689 		if (err)
1690 			return err;
1691 		etm->instructions_sample_type = attr.sample_type;
1692 		etm->instructions_id = id;
1693 		id += 1;
1694 	}
1695 
1696 	return 0;
1697 }
1698 
1699 static int cs_etm__sample(struct cs_etm_queue *etmq,
1700 			  struct cs_etm_traceid_queue *tidq)
1701 {
1702 	struct cs_etm_auxtrace *etm = etmq->etm;
1703 	int ret;
1704 	u8 trace_chan_id = tidq->trace_chan_id;
1705 	u64 instrs_prev;
1706 
1707 	/* Get instructions remainder from previous packet */
1708 	instrs_prev = tidq->period_instructions;
1709 
1710 	tidq->period_instructions += tidq->packet->instr_count;
1711 
1712 	/*
1713 	 * Record a branch when the last instruction in
1714 	 * PREV_PACKET is a branch.
1715 	 */
1716 	if (etm->synth_opts.last_branch &&
1717 	    tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1718 	    tidq->prev_packet->last_instr_taken_branch)
1719 		cs_etm__update_last_branch_rb(etmq, tidq);
1720 
1721 	if (etm->synth_opts.instructions &&
1722 	    tidq->period_instructions >= etm->instructions_sample_period) {
1723 		/*
1724 		 * Emit instruction sample periodically
1725 		 * TODO: allow period to be defined in cycles and clock time
1726 		 */
1727 
1728 		/*
1729 		 * Below diagram demonstrates the instruction samples
1730 		 * generation flows:
1731 		 *
1732 		 *    Instrs     Instrs       Instrs       Instrs
1733 		 *   Sample(n)  Sample(n+1)  Sample(n+2)  Sample(n+3)
1734 		 *    |            |            |            |
1735 		 *    V            V            V            V
1736 		 *   --------------------------------------------------
1737 		 *            ^                                  ^
1738 		 *            |                                  |
1739 		 *         Period                             Period
1740 		 *    instructions(Pi)                   instructions(Pi')
1741 		 *
1742 		 *            |                                  |
1743 		 *            \---------------- -----------------/
1744 		 *                             V
1745 		 *                 tidq->packet->instr_count
1746 		 *
1747 		 * Instrs Sample(n...) are the synthesised samples occurring
1748 		 * every etm->instructions_sample_period instructions - as
1749 		 * defined on the perf command line.  Sample(n) is being the
1750 		 * last sample before the current etm packet, n+1 to n+3
1751 		 * samples are generated from the current etm packet.
1752 		 *
1753 		 * tidq->packet->instr_count represents the number of
1754 		 * instructions in the current etm packet.
1755 		 *
1756 		 * Period instructions (Pi) contains the number of
1757 		 * instructions executed after the sample point(n) from the
1758 		 * previous etm packet.  This will always be less than
1759 		 * etm->instructions_sample_period.
1760 		 *
1761 		 * When generate new samples, it combines with two parts
1762 		 * instructions, one is the tail of the old packet and another
1763 		 * is the head of the new coming packet, to generate
1764 		 * sample(n+1); sample(n+2) and sample(n+3) consume the
1765 		 * instructions with sample period.  After sample(n+3), the rest
1766 		 * instructions will be used by later packet and it is assigned
1767 		 * to tidq->period_instructions for next round calculation.
1768 		 */
1769 
1770 		/*
1771 		 * Get the initial offset into the current packet instructions;
1772 		 * entry conditions ensure that instrs_prev is less than
1773 		 * etm->instructions_sample_period.
1774 		 */
1775 		u64 offset = etm->instructions_sample_period - instrs_prev;
1776 		u64 addr;
1777 
1778 		/* Prepare last branches for instruction sample */
1779 		if (etm->synth_opts.last_branch)
1780 			cs_etm__copy_last_branch_rb(etmq, tidq);
1781 
1782 		while (tidq->period_instructions >=
1783 				etm->instructions_sample_period) {
1784 			/*
1785 			 * Calculate the address of the sampled instruction (-1
1786 			 * as sample is reported as though instruction has just
1787 			 * been executed, but PC has not advanced to next
1788 			 * instruction)
1789 			 */
1790 			addr = cs_etm__instr_addr(etmq, trace_chan_id,
1791 						  tidq->packet, offset - 1);
1792 			ret = cs_etm__synth_instruction_sample(
1793 				etmq, tidq, addr,
1794 				etm->instructions_sample_period);
1795 			if (ret)
1796 				return ret;
1797 
1798 			offset += etm->instructions_sample_period;
1799 			tidq->period_instructions -=
1800 				etm->instructions_sample_period;
1801 		}
1802 	}
1803 
1804 	if (etm->synth_opts.branches) {
1805 		bool generate_sample = false;
1806 
1807 		/* Generate sample for tracing on packet */
1808 		if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1809 			generate_sample = true;
1810 
1811 		/* Generate sample for branch taken packet */
1812 		if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1813 		    tidq->prev_packet->last_instr_taken_branch)
1814 			generate_sample = true;
1815 
1816 		if (generate_sample) {
1817 			ret = cs_etm__synth_branch_sample(etmq, tidq);
1818 			if (ret)
1819 				return ret;
1820 		}
1821 	}
1822 
1823 	cs_etm__packet_swap(etm, tidq);
1824 
1825 	return 0;
1826 }
1827 
1828 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
1829 {
1830 	/*
1831 	 * When the exception packet is inserted, whether the last instruction
1832 	 * in previous range packet is taken branch or not, we need to force
1833 	 * to set 'prev_packet->last_instr_taken_branch' to true.  This ensures
1834 	 * to generate branch sample for the instruction range before the
1835 	 * exception is trapped to kernel or before the exception returning.
1836 	 *
1837 	 * The exception packet includes the dummy address values, so don't
1838 	 * swap PACKET with PREV_PACKET.  This keeps PREV_PACKET to be useful
1839 	 * for generating instruction and branch samples.
1840 	 */
1841 	if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
1842 		tidq->prev_packet->last_instr_taken_branch = true;
1843 
1844 	return 0;
1845 }
1846 
1847 static int cs_etm__flush(struct cs_etm_queue *etmq,
1848 			 struct cs_etm_traceid_queue *tidq)
1849 {
1850 	int err = 0;
1851 	struct cs_etm_auxtrace *etm = etmq->etm;
1852 
1853 	/* Handle start tracing packet */
1854 	if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
1855 		goto swap_packet;
1856 
1857 	if (etmq->etm->synth_opts.last_branch &&
1858 	    etmq->etm->synth_opts.instructions &&
1859 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1860 		u64 addr;
1861 
1862 		/* Prepare last branches for instruction sample */
1863 		cs_etm__copy_last_branch_rb(etmq, tidq);
1864 
1865 		/*
1866 		 * Generate a last branch event for the branches left in the
1867 		 * circular buffer at the end of the trace.
1868 		 *
1869 		 * Use the address of the end of the last reported execution
1870 		 * range
1871 		 */
1872 		addr = cs_etm__last_executed_instr(tidq->prev_packet);
1873 
1874 		err = cs_etm__synth_instruction_sample(
1875 			etmq, tidq, addr,
1876 			tidq->period_instructions);
1877 		if (err)
1878 			return err;
1879 
1880 		tidq->period_instructions = 0;
1881 
1882 	}
1883 
1884 	if (etm->synth_opts.branches &&
1885 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1886 		err = cs_etm__synth_branch_sample(etmq, tidq);
1887 		if (err)
1888 			return err;
1889 	}
1890 
1891 swap_packet:
1892 	cs_etm__packet_swap(etm, tidq);
1893 
1894 	/* Reset last branches after flush the trace */
1895 	if (etm->synth_opts.last_branch)
1896 		cs_etm__reset_last_branch_rb(tidq);
1897 
1898 	return err;
1899 }
1900 
1901 static int cs_etm__end_block(struct cs_etm_queue *etmq,
1902 			     struct cs_etm_traceid_queue *tidq)
1903 {
1904 	int err;
1905 
1906 	/*
1907 	 * It has no new packet coming and 'etmq->packet' contains the stale
1908 	 * packet which was set at the previous time with packets swapping;
1909 	 * so skip to generate branch sample to avoid stale packet.
1910 	 *
1911 	 * For this case only flush branch stack and generate a last branch
1912 	 * event for the branches left in the circular buffer at the end of
1913 	 * the trace.
1914 	 */
1915 	if (etmq->etm->synth_opts.last_branch &&
1916 	    etmq->etm->synth_opts.instructions &&
1917 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1918 		u64 addr;
1919 
1920 		/* Prepare last branches for instruction sample */
1921 		cs_etm__copy_last_branch_rb(etmq, tidq);
1922 
1923 		/*
1924 		 * Use the address of the end of the last reported execution
1925 		 * range.
1926 		 */
1927 		addr = cs_etm__last_executed_instr(tidq->prev_packet);
1928 
1929 		err = cs_etm__synth_instruction_sample(
1930 			etmq, tidq, addr,
1931 			tidq->period_instructions);
1932 		if (err)
1933 			return err;
1934 
1935 		tidq->period_instructions = 0;
1936 	}
1937 
1938 	return 0;
1939 }
1940 /*
1941  * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
1942  *			   if need be.
1943  * Returns:	< 0	if error
1944  *		= 0	if no more auxtrace_buffer to read
1945  *		> 0	if the current buffer isn't empty yet
1946  */
1947 static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
1948 {
1949 	int ret;
1950 
1951 	if (!etmq->buf_len) {
1952 		ret = cs_etm__get_trace(etmq);
1953 		if (ret <= 0)
1954 			return ret;
1955 		/*
1956 		 * We cannot assume consecutive blocks in the data file
1957 		 * are contiguous, reset the decoder to force re-sync.
1958 		 */
1959 		ret = cs_etm_decoder__reset(etmq->decoder);
1960 		if (ret)
1961 			return ret;
1962 	}
1963 
1964 	return etmq->buf_len;
1965 }
1966 
1967 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
1968 				 struct cs_etm_packet *packet,
1969 				 u64 end_addr)
1970 {
1971 	/* Initialise to keep compiler happy */
1972 	u16 instr16 = 0;
1973 	u32 instr32 = 0;
1974 	u64 addr;
1975 
1976 	switch (packet->isa) {
1977 	case CS_ETM_ISA_T32:
1978 		/*
1979 		 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
1980 		 *
1981 		 *  b'15         b'8
1982 		 * +-----------------+--------+
1983 		 * | 1 1 0 1 1 1 1 1 |  imm8  |
1984 		 * +-----------------+--------+
1985 		 *
1986 		 * According to the specification, it only defines SVC for T32
1987 		 * with 16 bits instruction and has no definition for 32bits;
1988 		 * so below only read 2 bytes as instruction size for T32.
1989 		 */
1990 		addr = end_addr - 2;
1991 		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16),
1992 				   (u8 *)&instr16, 0);
1993 		if ((instr16 & 0xFF00) == 0xDF00)
1994 			return true;
1995 
1996 		break;
1997 	case CS_ETM_ISA_A32:
1998 		/*
1999 		 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
2000 		 *
2001 		 *  b'31 b'28 b'27 b'24
2002 		 * +---------+---------+-------------------------+
2003 		 * |  !1111  | 1 1 1 1 |        imm24            |
2004 		 * +---------+---------+-------------------------+
2005 		 */
2006 		addr = end_addr - 4;
2007 		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2008 				   (u8 *)&instr32, 0);
2009 		if ((instr32 & 0x0F000000) == 0x0F000000 &&
2010 		    (instr32 & 0xF0000000) != 0xF0000000)
2011 			return true;
2012 
2013 		break;
2014 	case CS_ETM_ISA_A64:
2015 		/*
2016 		 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
2017 		 *
2018 		 *  b'31               b'21           b'4     b'0
2019 		 * +-----------------------+---------+-----------+
2020 		 * | 1 1 0 1 0 1 0 0 0 0 0 |  imm16  | 0 0 0 0 1 |
2021 		 * +-----------------------+---------+-----------+
2022 		 */
2023 		addr = end_addr - 4;
2024 		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2025 				   (u8 *)&instr32, 0);
2026 		if ((instr32 & 0xFFE0001F) == 0xd4000001)
2027 			return true;
2028 
2029 		break;
2030 	case CS_ETM_ISA_UNKNOWN:
2031 	default:
2032 		break;
2033 	}
2034 
2035 	return false;
2036 }
2037 
2038 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
2039 			       struct cs_etm_traceid_queue *tidq, u64 magic)
2040 {
2041 	u8 trace_chan_id = tidq->trace_chan_id;
2042 	struct cs_etm_packet *packet = tidq->packet;
2043 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2044 
2045 	if (magic == __perf_cs_etmv3_magic)
2046 		if (packet->exception_number == CS_ETMV3_EXC_SVC)
2047 			return true;
2048 
2049 	/*
2050 	 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
2051 	 * HVC cases; need to check if it's SVC instruction based on
2052 	 * packet address.
2053 	 */
2054 	if (magic == __perf_cs_etmv4_magic) {
2055 		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2056 		    cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2057 					 prev_packet->end_addr))
2058 			return true;
2059 	}
2060 
2061 	return false;
2062 }
2063 
2064 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
2065 				       u64 magic)
2066 {
2067 	struct cs_etm_packet *packet = tidq->packet;
2068 
2069 	if (magic == __perf_cs_etmv3_magic)
2070 		if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
2071 		    packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
2072 		    packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
2073 		    packet->exception_number == CS_ETMV3_EXC_IRQ ||
2074 		    packet->exception_number == CS_ETMV3_EXC_FIQ)
2075 			return true;
2076 
2077 	if (magic == __perf_cs_etmv4_magic)
2078 		if (packet->exception_number == CS_ETMV4_EXC_RESET ||
2079 		    packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
2080 		    packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
2081 		    packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
2082 		    packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
2083 		    packet->exception_number == CS_ETMV4_EXC_IRQ ||
2084 		    packet->exception_number == CS_ETMV4_EXC_FIQ)
2085 			return true;
2086 
2087 	return false;
2088 }
2089 
2090 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
2091 				      struct cs_etm_traceid_queue *tidq,
2092 				      u64 magic)
2093 {
2094 	u8 trace_chan_id = tidq->trace_chan_id;
2095 	struct cs_etm_packet *packet = tidq->packet;
2096 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2097 
2098 	if (magic == __perf_cs_etmv3_magic)
2099 		if (packet->exception_number == CS_ETMV3_EXC_SMC ||
2100 		    packet->exception_number == CS_ETMV3_EXC_HYP ||
2101 		    packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
2102 		    packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
2103 		    packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
2104 		    packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
2105 		    packet->exception_number == CS_ETMV3_EXC_GENERIC)
2106 			return true;
2107 
2108 	if (magic == __perf_cs_etmv4_magic) {
2109 		if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
2110 		    packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
2111 		    packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
2112 		    packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
2113 			return true;
2114 
2115 		/*
2116 		 * For CS_ETMV4_EXC_CALL, except SVC other instructions
2117 		 * (SMC, HVC) are taken as sync exceptions.
2118 		 */
2119 		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2120 		    !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2121 					  prev_packet->end_addr))
2122 			return true;
2123 
2124 		/*
2125 		 * ETMv4 has 5 bits for exception number; if the numbers
2126 		 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
2127 		 * they are implementation defined exceptions.
2128 		 *
2129 		 * For this case, simply take it as sync exception.
2130 		 */
2131 		if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
2132 		    packet->exception_number <= CS_ETMV4_EXC_END)
2133 			return true;
2134 	}
2135 
2136 	return false;
2137 }
2138 
2139 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
2140 				    struct cs_etm_traceid_queue *tidq)
2141 {
2142 	struct cs_etm_packet *packet = tidq->packet;
2143 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2144 	u8 trace_chan_id = tidq->trace_chan_id;
2145 	u64 magic;
2146 	int ret;
2147 
2148 	switch (packet->sample_type) {
2149 	case CS_ETM_RANGE:
2150 		/*
2151 		 * Immediate branch instruction without neither link nor
2152 		 * return flag, it's normal branch instruction within
2153 		 * the function.
2154 		 */
2155 		if (packet->last_instr_type == OCSD_INSTR_BR &&
2156 		    packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
2157 			packet->flags = PERF_IP_FLAG_BRANCH;
2158 
2159 			if (packet->last_instr_cond)
2160 				packet->flags |= PERF_IP_FLAG_CONDITIONAL;
2161 		}
2162 
2163 		/*
2164 		 * Immediate branch instruction with link (e.g. BL), this is
2165 		 * branch instruction for function call.
2166 		 */
2167 		if (packet->last_instr_type == OCSD_INSTR_BR &&
2168 		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2169 			packet->flags = PERF_IP_FLAG_BRANCH |
2170 					PERF_IP_FLAG_CALL;
2171 
2172 		/*
2173 		 * Indirect branch instruction with link (e.g. BLR), this is
2174 		 * branch instruction for function call.
2175 		 */
2176 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2177 		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2178 			packet->flags = PERF_IP_FLAG_BRANCH |
2179 					PERF_IP_FLAG_CALL;
2180 
2181 		/*
2182 		 * Indirect branch instruction with subtype of
2183 		 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
2184 		 * function return for A32/T32.
2185 		 */
2186 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2187 		    packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
2188 			packet->flags = PERF_IP_FLAG_BRANCH |
2189 					PERF_IP_FLAG_RETURN;
2190 
2191 		/*
2192 		 * Indirect branch instruction without link (e.g. BR), usually
2193 		 * this is used for function return, especially for functions
2194 		 * within dynamic link lib.
2195 		 */
2196 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2197 		    packet->last_instr_subtype == OCSD_S_INSTR_NONE)
2198 			packet->flags = PERF_IP_FLAG_BRANCH |
2199 					PERF_IP_FLAG_RETURN;
2200 
2201 		/* Return instruction for function return. */
2202 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2203 		    packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
2204 			packet->flags = PERF_IP_FLAG_BRANCH |
2205 					PERF_IP_FLAG_RETURN;
2206 
2207 		/*
2208 		 * Decoder might insert a discontinuity in the middle of
2209 		 * instruction packets, fixup prev_packet with flag
2210 		 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
2211 		 */
2212 		if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
2213 			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2214 					      PERF_IP_FLAG_TRACE_BEGIN;
2215 
2216 		/*
2217 		 * If the previous packet is an exception return packet
2218 		 * and the return address just follows SVC instruction,
2219 		 * it needs to calibrate the previous packet sample flags
2220 		 * as PERF_IP_FLAG_SYSCALLRET.
2221 		 */
2222 		if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
2223 					   PERF_IP_FLAG_RETURN |
2224 					   PERF_IP_FLAG_INTERRUPT) &&
2225 		    cs_etm__is_svc_instr(etmq, trace_chan_id,
2226 					 packet, packet->start_addr))
2227 			prev_packet->flags = PERF_IP_FLAG_BRANCH |
2228 					     PERF_IP_FLAG_RETURN |
2229 					     PERF_IP_FLAG_SYSCALLRET;
2230 		break;
2231 	case CS_ETM_DISCONTINUITY:
2232 		/*
2233 		 * The trace is discontinuous, if the previous packet is
2234 		 * instruction packet, set flag PERF_IP_FLAG_TRACE_END
2235 		 * for previous packet.
2236 		 */
2237 		if (prev_packet->sample_type == CS_ETM_RANGE)
2238 			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2239 					      PERF_IP_FLAG_TRACE_END;
2240 		break;
2241 	case CS_ETM_EXCEPTION:
2242 		ret = cs_etm__get_magic(etmq, packet->trace_chan_id, &magic);
2243 		if (ret)
2244 			return ret;
2245 
2246 		/* The exception is for system call. */
2247 		if (cs_etm__is_syscall(etmq, tidq, magic))
2248 			packet->flags = PERF_IP_FLAG_BRANCH |
2249 					PERF_IP_FLAG_CALL |
2250 					PERF_IP_FLAG_SYSCALLRET;
2251 		/*
2252 		 * The exceptions are triggered by external signals from bus,
2253 		 * interrupt controller, debug module, PE reset or halt.
2254 		 */
2255 		else if (cs_etm__is_async_exception(tidq, magic))
2256 			packet->flags = PERF_IP_FLAG_BRANCH |
2257 					PERF_IP_FLAG_CALL |
2258 					PERF_IP_FLAG_ASYNC |
2259 					PERF_IP_FLAG_INTERRUPT;
2260 		/*
2261 		 * Otherwise, exception is caused by trap, instruction &
2262 		 * data fault, or alignment errors.
2263 		 */
2264 		else if (cs_etm__is_sync_exception(etmq, tidq, magic))
2265 			packet->flags = PERF_IP_FLAG_BRANCH |
2266 					PERF_IP_FLAG_CALL |
2267 					PERF_IP_FLAG_INTERRUPT;
2268 
2269 		/*
2270 		 * When the exception packet is inserted, since exception
2271 		 * packet is not used standalone for generating samples
2272 		 * and it's affiliation to the previous instruction range
2273 		 * packet; so set previous range packet flags to tell perf
2274 		 * it is an exception taken branch.
2275 		 */
2276 		if (prev_packet->sample_type == CS_ETM_RANGE)
2277 			prev_packet->flags = packet->flags;
2278 		break;
2279 	case CS_ETM_EXCEPTION_RET:
2280 		/*
2281 		 * When the exception return packet is inserted, since
2282 		 * exception return packet is not used standalone for
2283 		 * generating samples and it's affiliation to the previous
2284 		 * instruction range packet; so set previous range packet
2285 		 * flags to tell perf it is an exception return branch.
2286 		 *
2287 		 * The exception return can be for either system call or
2288 		 * other exception types; unfortunately the packet doesn't
2289 		 * contain exception type related info so we cannot decide
2290 		 * the exception type purely based on exception return packet.
2291 		 * If we record the exception number from exception packet and
2292 		 * reuse it for exception return packet, this is not reliable
2293 		 * due the trace can be discontinuity or the interrupt can
2294 		 * be nested, thus the recorded exception number cannot be
2295 		 * used for exception return packet for these two cases.
2296 		 *
2297 		 * For exception return packet, we only need to distinguish the
2298 		 * packet is for system call or for other types.  Thus the
2299 		 * decision can be deferred when receive the next packet which
2300 		 * contains the return address, based on the return address we
2301 		 * can read out the previous instruction and check if it's a
2302 		 * system call instruction and then calibrate the sample flag
2303 		 * as needed.
2304 		 */
2305 		if (prev_packet->sample_type == CS_ETM_RANGE)
2306 			prev_packet->flags = PERF_IP_FLAG_BRANCH |
2307 					     PERF_IP_FLAG_RETURN |
2308 					     PERF_IP_FLAG_INTERRUPT;
2309 		break;
2310 	case CS_ETM_EMPTY:
2311 	default:
2312 		break;
2313 	}
2314 
2315 	return 0;
2316 }
2317 
2318 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
2319 {
2320 	int ret = 0;
2321 	size_t processed = 0;
2322 
2323 	/*
2324 	 * Packets are decoded and added to the decoder's packet queue
2325 	 * until the decoder packet processing callback has requested that
2326 	 * processing stops or there is nothing left in the buffer.  Normal
2327 	 * operations that stop processing are a timestamp packet or a full
2328 	 * decoder buffer queue.
2329 	 */
2330 	ret = cs_etm_decoder__process_data_block(etmq->decoder,
2331 						 etmq->offset,
2332 						 &etmq->buf[etmq->buf_used],
2333 						 etmq->buf_len,
2334 						 &processed);
2335 	if (ret)
2336 		goto out;
2337 
2338 	etmq->offset += processed;
2339 	etmq->buf_used += processed;
2340 	etmq->buf_len -= processed;
2341 
2342 out:
2343 	return ret;
2344 }
2345 
2346 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
2347 					 struct cs_etm_traceid_queue *tidq)
2348 {
2349 	int ret;
2350 	struct cs_etm_packet_queue *packet_queue;
2351 
2352 	packet_queue = &tidq->packet_queue;
2353 
2354 	/* Process each packet in this chunk */
2355 	while (1) {
2356 		ret = cs_etm_decoder__get_packet(packet_queue,
2357 						 tidq->packet);
2358 		if (ret <= 0)
2359 			/*
2360 			 * Stop processing this chunk on
2361 			 * end of data or error
2362 			 */
2363 			break;
2364 
2365 		/*
2366 		 * Since packet addresses are swapped in packet
2367 		 * handling within below switch() statements,
2368 		 * thus setting sample flags must be called
2369 		 * prior to switch() statement to use address
2370 		 * information before packets swapping.
2371 		 */
2372 		ret = cs_etm__set_sample_flags(etmq, tidq);
2373 		if (ret < 0)
2374 			break;
2375 
2376 		switch (tidq->packet->sample_type) {
2377 		case CS_ETM_RANGE:
2378 			/*
2379 			 * If the packet contains an instruction
2380 			 * range, generate instruction sequence
2381 			 * events.
2382 			 */
2383 			cs_etm__sample(etmq, tidq);
2384 			break;
2385 		case CS_ETM_EXCEPTION:
2386 		case CS_ETM_EXCEPTION_RET:
2387 			/*
2388 			 * If the exception packet is coming,
2389 			 * make sure the previous instruction
2390 			 * range packet to be handled properly.
2391 			 */
2392 			cs_etm__exception(tidq);
2393 			break;
2394 		case CS_ETM_DISCONTINUITY:
2395 			/*
2396 			 * Discontinuity in trace, flush
2397 			 * previous branch stack
2398 			 */
2399 			cs_etm__flush(etmq, tidq);
2400 			break;
2401 		case CS_ETM_EMPTY:
2402 			/*
2403 			 * Should not receive empty packet,
2404 			 * report error.
2405 			 */
2406 			pr_err("CS ETM Trace: empty packet\n");
2407 			return -EINVAL;
2408 		default:
2409 			break;
2410 		}
2411 	}
2412 
2413 	return ret;
2414 }
2415 
2416 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
2417 {
2418 	int idx;
2419 	struct int_node *inode;
2420 	struct cs_etm_traceid_queue *tidq;
2421 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
2422 
2423 	intlist__for_each_entry(inode, traceid_queues_list) {
2424 		idx = (int)(intptr_t)inode->priv;
2425 		tidq = etmq->traceid_queues[idx];
2426 
2427 		/* Ignore return value */
2428 		cs_etm__process_traceid_queue(etmq, tidq);
2429 
2430 		/*
2431 		 * Generate an instruction sample with the remaining
2432 		 * branchstack entries.
2433 		 */
2434 		cs_etm__flush(etmq, tidq);
2435 	}
2436 }
2437 
2438 static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq)
2439 {
2440 	int err = 0;
2441 	struct cs_etm_traceid_queue *tidq;
2442 
2443 	tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
2444 	if (!tidq)
2445 		return -EINVAL;
2446 
2447 	/* Go through each buffer in the queue and decode them one by one */
2448 	while (1) {
2449 		err = cs_etm__get_data_block(etmq);
2450 		if (err <= 0)
2451 			return err;
2452 
2453 		/* Run trace decoder until buffer consumed or end of trace */
2454 		do {
2455 			err = cs_etm__decode_data_block(etmq);
2456 			if (err)
2457 				return err;
2458 
2459 			/*
2460 			 * Process each packet in this chunk, nothing to do if
2461 			 * an error occurs other than hoping the next one will
2462 			 * be better.
2463 			 */
2464 			err = cs_etm__process_traceid_queue(etmq, tidq);
2465 
2466 		} while (etmq->buf_len);
2467 
2468 		if (err == 0)
2469 			/* Flush any remaining branch stack entries */
2470 			err = cs_etm__end_block(etmq, tidq);
2471 	}
2472 
2473 	return err;
2474 }
2475 
2476 static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq)
2477 {
2478 	int idx, err = 0;
2479 	struct cs_etm_traceid_queue *tidq;
2480 	struct int_node *inode;
2481 
2482 	/* Go through each buffer in the queue and decode them one by one */
2483 	while (1) {
2484 		err = cs_etm__get_data_block(etmq);
2485 		if (err <= 0)
2486 			return err;
2487 
2488 		/* Run trace decoder until buffer consumed or end of trace */
2489 		do {
2490 			err = cs_etm__decode_data_block(etmq);
2491 			if (err)
2492 				return err;
2493 
2494 			/*
2495 			 * cs_etm__run_per_thread_timeless_decoder() runs on a
2496 			 * single traceID queue because each TID has a separate
2497 			 * buffer. But here in per-cpu mode we need to iterate
2498 			 * over each channel instead.
2499 			 */
2500 			intlist__for_each_entry(inode,
2501 						etmq->traceid_queues_list) {
2502 				idx = (int)(intptr_t)inode->priv;
2503 				tidq = etmq->traceid_queues[idx];
2504 				cs_etm__process_traceid_queue(etmq, tidq);
2505 			}
2506 		} while (etmq->buf_len);
2507 
2508 		intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2509 			idx = (int)(intptr_t)inode->priv;
2510 			tidq = etmq->traceid_queues[idx];
2511 			/* Flush any remaining branch stack entries */
2512 			err = cs_etm__end_block(etmq, tidq);
2513 			if (err)
2514 				return err;
2515 		}
2516 	}
2517 
2518 	return err;
2519 }
2520 
2521 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
2522 					   pid_t tid)
2523 {
2524 	unsigned int i;
2525 	struct auxtrace_queues *queues = &etm->queues;
2526 
2527 	for (i = 0; i < queues->nr_queues; i++) {
2528 		struct auxtrace_queue *queue = &etm->queues.queue_array[i];
2529 		struct cs_etm_queue *etmq = queue->priv;
2530 		struct cs_etm_traceid_queue *tidq;
2531 
2532 		if (!etmq)
2533 			continue;
2534 
2535 		if (etm->per_thread_decoding) {
2536 			tidq = cs_etm__etmq_get_traceid_queue(
2537 				etmq, CS_ETM_PER_THREAD_TRACEID);
2538 
2539 			if (!tidq)
2540 				continue;
2541 
2542 			if (tid == -1 || thread__tid(tidq->thread) == tid)
2543 				cs_etm__run_per_thread_timeless_decoder(etmq);
2544 		} else
2545 			cs_etm__run_per_cpu_timeless_decoder(etmq);
2546 	}
2547 
2548 	return 0;
2549 }
2550 
2551 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm)
2552 {
2553 	int ret = 0;
2554 	unsigned int cs_queue_nr, queue_nr, i;
2555 	u8 trace_chan_id;
2556 	u64 cs_timestamp;
2557 	struct auxtrace_queue *queue;
2558 	struct cs_etm_queue *etmq;
2559 	struct cs_etm_traceid_queue *tidq;
2560 
2561 	/*
2562 	 * Pre-populate the heap with one entry from each queue so that we can
2563 	 * start processing in time order across all queues.
2564 	 */
2565 	for (i = 0; i < etm->queues.nr_queues; i++) {
2566 		etmq = etm->queues.queue_array[i].priv;
2567 		if (!etmq)
2568 			continue;
2569 
2570 		ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
2571 		if (ret)
2572 			return ret;
2573 	}
2574 
2575 	while (1) {
2576 		if (!etm->heap.heap_cnt)
2577 			goto out;
2578 
2579 		/* Take the entry at the top of the min heap */
2580 		cs_queue_nr = etm->heap.heap_array[0].queue_nr;
2581 		queue_nr = TO_QUEUE_NR(cs_queue_nr);
2582 		trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
2583 		queue = &etm->queues.queue_array[queue_nr];
2584 		etmq = queue->priv;
2585 
2586 		/*
2587 		 * Remove the top entry from the heap since we are about
2588 		 * to process it.
2589 		 */
2590 		auxtrace_heap__pop(&etm->heap);
2591 
2592 		tidq  = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
2593 		if (!tidq) {
2594 			/*
2595 			 * No traceID queue has been allocated for this traceID,
2596 			 * which means something somewhere went very wrong.  No
2597 			 * other choice than simply exit.
2598 			 */
2599 			ret = -EINVAL;
2600 			goto out;
2601 		}
2602 
2603 		/*
2604 		 * Packets associated with this timestamp are already in
2605 		 * the etmq's traceID queue, so process them.
2606 		 */
2607 		ret = cs_etm__process_traceid_queue(etmq, tidq);
2608 		if (ret < 0)
2609 			goto out;
2610 
2611 		/*
2612 		 * Packets for this timestamp have been processed, time to
2613 		 * move on to the next timestamp, fetching a new auxtrace_buffer
2614 		 * if need be.
2615 		 */
2616 refetch:
2617 		ret = cs_etm__get_data_block(etmq);
2618 		if (ret < 0)
2619 			goto out;
2620 
2621 		/*
2622 		 * No more auxtrace_buffers to process in this etmq, simply
2623 		 * move on to another entry in the auxtrace_heap.
2624 		 */
2625 		if (!ret)
2626 			continue;
2627 
2628 		ret = cs_etm__decode_data_block(etmq);
2629 		if (ret)
2630 			goto out;
2631 
2632 		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
2633 
2634 		if (!cs_timestamp) {
2635 			/*
2636 			 * Function cs_etm__decode_data_block() returns when
2637 			 * there is no more traces to decode in the current
2638 			 * auxtrace_buffer OR when a timestamp has been
2639 			 * encountered on any of the traceID queues.  Since we
2640 			 * did not get a timestamp, there is no more traces to
2641 			 * process in this auxtrace_buffer.  As such empty and
2642 			 * flush all traceID queues.
2643 			 */
2644 			cs_etm__clear_all_traceid_queues(etmq);
2645 
2646 			/* Fetch another auxtrace_buffer for this etmq */
2647 			goto refetch;
2648 		}
2649 
2650 		/*
2651 		 * Add to the min heap the timestamp for packets that have
2652 		 * just been decoded.  They will be processed and synthesized
2653 		 * during the next call to cs_etm__process_traceid_queue() for
2654 		 * this queue/traceID.
2655 		 */
2656 		cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
2657 		ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
2658 	}
2659 
2660 out:
2661 	return ret;
2662 }
2663 
2664 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
2665 					union perf_event *event)
2666 {
2667 	struct thread *th;
2668 
2669 	if (etm->timeless_decoding)
2670 		return 0;
2671 
2672 	/*
2673 	 * Add the tid/pid to the log so that we can get a match when we get a
2674 	 * contextID from the decoder. Only track for the host: only kernel
2675 	 * trace is supported for guests which wouldn't need pids so this should
2676 	 * be fine.
2677 	 */
2678 	th = machine__findnew_thread(&etm->session->machines.host,
2679 				     event->itrace_start.pid,
2680 				     event->itrace_start.tid);
2681 	if (!th)
2682 		return -ENOMEM;
2683 
2684 	thread__put(th);
2685 
2686 	return 0;
2687 }
2688 
2689 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
2690 					   union perf_event *event)
2691 {
2692 	struct thread *th;
2693 	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2694 
2695 	/*
2696 	 * Context switch in per-thread mode are irrelevant since perf
2697 	 * will start/stop tracing as the process is scheduled.
2698 	 */
2699 	if (etm->timeless_decoding)
2700 		return 0;
2701 
2702 	/*
2703 	 * SWITCH_IN events carry the next process to be switched out while
2704 	 * SWITCH_OUT events carry the process to be switched in.  As such
2705 	 * we don't care about IN events.
2706 	 */
2707 	if (!out)
2708 		return 0;
2709 
2710 	/*
2711 	 * Add the tid/pid to the log so that we can get a match when we get a
2712 	 * contextID from the decoder. Only track for the host: only kernel
2713 	 * trace is supported for guests which wouldn't need pids so this should
2714 	 * be fine.
2715 	 */
2716 	th = machine__findnew_thread(&etm->session->machines.host,
2717 				     event->context_switch.next_prev_pid,
2718 				     event->context_switch.next_prev_tid);
2719 	if (!th)
2720 		return -ENOMEM;
2721 
2722 	thread__put(th);
2723 
2724 	return 0;
2725 }
2726 
2727 static int cs_etm__process_event(struct perf_session *session,
2728 				 union perf_event *event,
2729 				 struct perf_sample *sample,
2730 				 const struct perf_tool *tool)
2731 {
2732 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2733 						   struct cs_etm_auxtrace,
2734 						   auxtrace);
2735 
2736 	if (dump_trace)
2737 		return 0;
2738 
2739 	if (!tool->ordered_events) {
2740 		pr_err("CoreSight ETM Trace requires ordered events\n");
2741 		return -EINVAL;
2742 	}
2743 
2744 	switch (event->header.type) {
2745 	case PERF_RECORD_EXIT:
2746 		/*
2747 		 * Don't need to wait for cs_etm__flush_events() in per-thread mode to
2748 		 * start the decode because we know there will be no more trace from
2749 		 * this thread. All this does is emit samples earlier than waiting for
2750 		 * the flush in other modes, but with timestamps it makes sense to wait
2751 		 * for flush so that events from different threads are interleaved
2752 		 * properly.
2753 		 */
2754 		if (etm->per_thread_decoding && etm->timeless_decoding)
2755 			return cs_etm__process_timeless_queues(etm,
2756 							       event->fork.tid);
2757 		break;
2758 
2759 	case PERF_RECORD_ITRACE_START:
2760 		return cs_etm__process_itrace_start(etm, event);
2761 
2762 	case PERF_RECORD_SWITCH_CPU_WIDE:
2763 		return cs_etm__process_switch_cpu_wide(etm, event);
2764 
2765 	case PERF_RECORD_AUX:
2766 		/*
2767 		 * Record the latest kernel timestamp available in the header
2768 		 * for samples so that synthesised samples occur from this point
2769 		 * onwards.
2770 		 */
2771 		if (sample->time && (sample->time != (u64)-1))
2772 			etm->latest_kernel_timestamp = sample->time;
2773 		break;
2774 
2775 	default:
2776 		break;
2777 	}
2778 
2779 	return 0;
2780 }
2781 
2782 static void dump_queued_data(struct cs_etm_auxtrace *etm,
2783 			     struct perf_record_auxtrace *event)
2784 {
2785 	struct auxtrace_buffer *buf;
2786 	unsigned int i;
2787 	/*
2788 	 * Find all buffers with same reference in the queues and dump them.
2789 	 * This is because the queues can contain multiple entries of the same
2790 	 * buffer that were split on aux records.
2791 	 */
2792 	for (i = 0; i < etm->queues.nr_queues; ++i)
2793 		list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
2794 			if (buf->reference == event->reference)
2795 				cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
2796 }
2797 
2798 static int cs_etm__process_auxtrace_event(struct perf_session *session,
2799 					  union perf_event *event,
2800 					  const struct perf_tool *tool __maybe_unused)
2801 {
2802 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2803 						   struct cs_etm_auxtrace,
2804 						   auxtrace);
2805 	if (!etm->data_queued) {
2806 		struct auxtrace_buffer *buffer;
2807 		off_t  data_offset;
2808 		int fd = perf_data__fd(session->data);
2809 		bool is_pipe = perf_data__is_pipe(session->data);
2810 		int err;
2811 		int idx = event->auxtrace.idx;
2812 
2813 		if (is_pipe)
2814 			data_offset = 0;
2815 		else {
2816 			data_offset = lseek(fd, 0, SEEK_CUR);
2817 			if (data_offset == -1)
2818 				return -errno;
2819 		}
2820 
2821 		err = auxtrace_queues__add_event(&etm->queues, session,
2822 						 event, data_offset, &buffer);
2823 		if (err)
2824 			return err;
2825 
2826 		if (dump_trace)
2827 			if (auxtrace_buffer__get_data(buffer, fd)) {
2828 				cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
2829 				auxtrace_buffer__put_data(buffer);
2830 			}
2831 	} else if (dump_trace)
2832 		dump_queued_data(etm, &event->auxtrace);
2833 
2834 	return 0;
2835 }
2836 
2837 static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm)
2838 {
2839 	struct evsel *evsel;
2840 	struct evlist *evlist = etm->session->evlist;
2841 
2842 	/* Override timeless mode with user input from --itrace=Z */
2843 	if (etm->synth_opts.timeless_decoding) {
2844 		etm->timeless_decoding = true;
2845 		return 0;
2846 	}
2847 
2848 	/*
2849 	 * Find the cs_etm evsel and look at what its timestamp setting was
2850 	 */
2851 	evlist__for_each_entry(evlist, evsel)
2852 		if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) {
2853 			etm->timeless_decoding =
2854 				!(evsel->core.attr.config & BIT(ETM_OPT_TS));
2855 			return 0;
2856 		}
2857 
2858 	pr_err("CS ETM: Couldn't find ETM evsel\n");
2859 	return -EINVAL;
2860 }
2861 
2862 /*
2863  * Read a single cpu parameter block from the auxtrace_info priv block.
2864  *
2865  * For version 1 there is a per cpu nr_params entry. If we are handling
2866  * version 1 file, then there may be less, the same, or more params
2867  * indicated by this value than the compile time number we understand.
2868  *
2869  * For a version 0 info block, there are a fixed number, and we need to
2870  * fill out the nr_param value in the metadata we create.
2871  */
2872 static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
2873 				    int out_blk_size, int nr_params_v0)
2874 {
2875 	u64 *metadata = NULL;
2876 	int hdr_version;
2877 	int nr_in_params, nr_out_params, nr_cmn_params;
2878 	int i, k;
2879 
2880 	metadata = zalloc(sizeof(*metadata) * out_blk_size);
2881 	if (!metadata)
2882 		return NULL;
2883 
2884 	/* read block current index & version */
2885 	i = *buff_in_offset;
2886 	hdr_version = buff_in[CS_HEADER_VERSION];
2887 
2888 	if (!hdr_version) {
2889 	/* read version 0 info block into a version 1 metadata block  */
2890 		nr_in_params = nr_params_v0;
2891 		metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
2892 		metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
2893 		metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
2894 		/* remaining block params at offset +1 from source */
2895 		for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
2896 			metadata[k + 1] = buff_in[i + k];
2897 		/* version 0 has 2 common params */
2898 		nr_cmn_params = 2;
2899 	} else {
2900 	/* read version 1 info block - input and output nr_params may differ */
2901 		/* version 1 has 3 common params */
2902 		nr_cmn_params = 3;
2903 		nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
2904 
2905 		/* if input has more params than output - skip excess */
2906 		nr_out_params = nr_in_params + nr_cmn_params;
2907 		if (nr_out_params > out_blk_size)
2908 			nr_out_params = out_blk_size;
2909 
2910 		for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
2911 			metadata[k] = buff_in[i + k];
2912 
2913 		/* record the actual nr params we copied */
2914 		metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
2915 	}
2916 
2917 	/* adjust in offset by number of in params used */
2918 	i += nr_in_params + nr_cmn_params;
2919 	*buff_in_offset = i;
2920 	return metadata;
2921 }
2922 
2923 /**
2924  * Puts a fragment of an auxtrace buffer into the auxtrace queues based
2925  * on the bounds of aux_event, if it matches with the buffer that's at
2926  * file_offset.
2927  *
2928  * Normally, whole auxtrace buffers would be added to the queue. But we
2929  * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
2930  * is reset across each buffer, so splitting the buffers up in advance has
2931  * the same effect.
2932  */
2933 static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
2934 				      struct perf_record_aux *aux_event, struct perf_sample *sample)
2935 {
2936 	int err;
2937 	char buf[PERF_SAMPLE_MAX_SIZE];
2938 	union perf_event *auxtrace_event_union;
2939 	struct perf_record_auxtrace *auxtrace_event;
2940 	union perf_event auxtrace_fragment;
2941 	__u64 aux_offset, aux_size;
2942 	enum cs_etm_format format;
2943 
2944 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2945 						   struct cs_etm_auxtrace,
2946 						   auxtrace);
2947 
2948 	/*
2949 	 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
2950 	 * from looping through the auxtrace index.
2951 	 */
2952 	err = perf_session__peek_event(session, file_offset, buf,
2953 				       PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL);
2954 	if (err)
2955 		return err;
2956 	auxtrace_event = &auxtrace_event_union->auxtrace;
2957 	if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
2958 		return -EINVAL;
2959 
2960 	if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) ||
2961 		auxtrace_event->header.size != sz) {
2962 		return -EINVAL;
2963 	}
2964 
2965 	/*
2966 	 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
2967 	 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
2968 	 * CPU as we set this always for the AUX_OUTPUT_HW_ID event.
2969 	 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
2970 	 * Return 'not found' if mismatch.
2971 	 */
2972 	if (auxtrace_event->cpu == (__u32) -1) {
2973 		etm->per_thread_decoding = true;
2974 		if (auxtrace_event->tid != sample->tid)
2975 			return 1;
2976 	} else if (auxtrace_event->cpu != sample->cpu) {
2977 		if (etm->per_thread_decoding) {
2978 			/*
2979 			 * Found a per-cpu buffer after a per-thread one was
2980 			 * already found
2981 			 */
2982 			pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n");
2983 			return -EINVAL;
2984 		}
2985 		return 1;
2986 	}
2987 
2988 	if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
2989 		/*
2990 		 * Clamp size in snapshot mode. The buffer size is clamped in
2991 		 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
2992 		 * the buffer size.
2993 		 */
2994 		aux_size = min(aux_event->aux_size, auxtrace_event->size);
2995 
2996 		/*
2997 		 * In this mode, the head also points to the end of the buffer so aux_offset
2998 		 * needs to have the size subtracted so it points to the beginning as in normal mode
2999 		 */
3000 		aux_offset = aux_event->aux_offset - aux_size;
3001 	} else {
3002 		aux_size = aux_event->aux_size;
3003 		aux_offset = aux_event->aux_offset;
3004 	}
3005 
3006 	if (aux_offset >= auxtrace_event->offset &&
3007 	    aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
3008 		struct cs_etm_queue *etmq = etm->queues.queue_array[auxtrace_event->idx].priv;
3009 
3010 		/*
3011 		 * If this AUX event was inside this buffer somewhere, create a new auxtrace event
3012 		 * based on the sizes of the aux event, and queue that fragment.
3013 		 */
3014 		auxtrace_fragment.auxtrace = *auxtrace_event;
3015 		auxtrace_fragment.auxtrace.size = aux_size;
3016 		auxtrace_fragment.auxtrace.offset = aux_offset;
3017 		file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
3018 
3019 		pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
3020 			  " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
3021 		err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
3022 						 file_offset, NULL);
3023 		if (err)
3024 			return err;
3025 
3026 		format = (aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW) ?
3027 				UNFORMATTED : FORMATTED;
3028 		if (etmq->format != UNSET && format != etmq->format) {
3029 			pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n");
3030 			return -EINVAL;
3031 		}
3032 		etmq->format = format;
3033 		return 0;
3034 	}
3035 
3036 	/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
3037 	return 1;
3038 }
3039 
3040 static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event,
3041 					u64 offset __maybe_unused, void *data __maybe_unused)
3042 {
3043 	/* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */
3044 	if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) {
3045 		(*(int *)data)++; /* increment found count */
3046 		return cs_etm__process_aux_output_hw_id(session, event);
3047 	}
3048 	return 0;
3049 }
3050 
3051 static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
3052 					u64 offset __maybe_unused, void *data __maybe_unused)
3053 {
3054 	struct perf_sample sample;
3055 	int ret;
3056 	struct auxtrace_index_entry *ent;
3057 	struct auxtrace_index *auxtrace_index;
3058 	struct evsel *evsel;
3059 	size_t i;
3060 
3061 	/* Don't care about any other events, we're only queuing buffers for AUX events */
3062 	if (event->header.type != PERF_RECORD_AUX)
3063 		return 0;
3064 
3065 	if (event->header.size < sizeof(struct perf_record_aux))
3066 		return -EINVAL;
3067 
3068 	/* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
3069 	if (!event->aux.aux_size)
3070 		return 0;
3071 
3072 	/*
3073 	 * Parse the sample, we need the sample_id_all data that comes after the event so that the
3074 	 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
3075 	 */
3076 	evsel = evlist__event2evsel(session->evlist, event);
3077 	if (!evsel)
3078 		return -EINVAL;
3079 	ret = evsel__parse_sample(evsel, event, &sample);
3080 	if (ret)
3081 		return ret;
3082 
3083 	/*
3084 	 * Loop through the auxtrace index to find the buffer that matches up with this aux event.
3085 	 */
3086 	list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
3087 		for (i = 0; i < auxtrace_index->nr; i++) {
3088 			ent = &auxtrace_index->entries[i];
3089 			ret = cs_etm__queue_aux_fragment(session, ent->file_offset,
3090 							 ent->sz, &event->aux, &sample);
3091 			/*
3092 			 * Stop search on error or successful values. Continue search on
3093 			 * 1 ('not found')
3094 			 */
3095 			if (ret != 1)
3096 				return ret;
3097 		}
3098 	}
3099 
3100 	/*
3101 	 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
3102 	 * don't exit with an error because it will still be possible to decode other aux records.
3103 	 */
3104 	pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
3105 	       " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
3106 	return 0;
3107 }
3108 
3109 static int cs_etm__queue_aux_records(struct perf_session *session)
3110 {
3111 	struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
3112 								struct auxtrace_index, list);
3113 	if (index && index->nr > 0)
3114 		return perf_session__peek_events(session, session->header.data_offset,
3115 						 session->header.data_size,
3116 						 cs_etm__queue_aux_records_cb, NULL);
3117 
3118 	/*
3119 	 * We would get here if there are no entries in the index (either no auxtrace
3120 	 * buffers or no index at all). Fail silently as there is the possibility of
3121 	 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
3122 	 * false.
3123 	 *
3124 	 * In that scenario, buffers will not be split by AUX records.
3125 	 */
3126 	return 0;
3127 }
3128 
3129 #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
3130 				  (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
3131 
3132 /*
3133  * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
3134  * timestamps).
3135  */
3136 static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
3137 {
3138 	int j;
3139 
3140 	for (j = 0; j < num_cpu; j++) {
3141 		switch (metadata[j][CS_ETM_MAGIC]) {
3142 		case __perf_cs_etmv4_magic:
3143 			if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
3144 				return false;
3145 			break;
3146 		case __perf_cs_ete_magic:
3147 			if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1)
3148 				return false;
3149 			break;
3150 		default:
3151 			/* Unknown / unsupported magic number. */
3152 			return false;
3153 		}
3154 	}
3155 	return true;
3156 }
3157 
3158 /* map trace ids to correct metadata block, from information in metadata */
3159 static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_cpu,
3160 					  u64 **metadata)
3161 {
3162 	u64 cs_etm_magic;
3163 	u8 trace_chan_id;
3164 	int i, err;
3165 
3166 	for (i = 0; i < num_cpu; i++) {
3167 		cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3168 		switch (cs_etm_magic) {
3169 		case __perf_cs_etmv3_magic:
3170 			metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3171 			trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]);
3172 			break;
3173 		case __perf_cs_etmv4_magic:
3174 		case __perf_cs_ete_magic:
3175 			metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3176 			trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]);
3177 			break;
3178 		default:
3179 			/* unknown magic number */
3180 			return -EINVAL;
3181 		}
3182 		err = cs_etm__map_trace_id_v0(etm, trace_chan_id, metadata[i]);
3183 		if (err)
3184 			return err;
3185 	}
3186 	return 0;
3187 }
3188 
3189 /*
3190  * If we found AUX_HW_ID packets, then set any metadata marked as unused to the
3191  * unused value to reduce the number of unneeded decoders created.
3192  */
3193 static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
3194 {
3195 	u64 cs_etm_magic;
3196 	int i;
3197 
3198 	for (i = 0; i < num_cpu; i++) {
3199 		cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3200 		switch (cs_etm_magic) {
3201 		case __perf_cs_etmv3_magic:
3202 			if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
3203 				metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
3204 			break;
3205 		case __perf_cs_etmv4_magic:
3206 		case __perf_cs_ete_magic:
3207 			if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
3208 				metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
3209 			break;
3210 		default:
3211 			/* unknown magic number */
3212 			return -EINVAL;
3213 		}
3214 	}
3215 	return 0;
3216 }
3217 
3218 /*
3219  * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX
3220  * (formatted or not) packets to create the decoders.
3221  */
3222 static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq)
3223 {
3224 	struct cs_etm_decoder_params d_params;
3225 
3226 	/*
3227 	 * Each queue can only contain data from one CPU when unformatted, so only one decoder is
3228 	 * needed.
3229 	 */
3230 	int decoders = etmq->format == FORMATTED ? etmq->etm->num_cpu : 1;
3231 
3232 	/* Use metadata to fill in trace parameters for trace decoder */
3233 	struct cs_etm_trace_params  *t_params = zalloc(sizeof(*t_params) * decoders);
3234 
3235 	if (!t_params)
3236 		goto out_free;
3237 
3238 	if (cs_etm__init_trace_params(t_params, etmq->etm, etmq->format,
3239 				      etmq->queue_nr, decoders))
3240 		goto out_free;
3241 
3242 	/* Set decoder parameters to decode trace packets */
3243 	if (cs_etm__init_decoder_params(&d_params, etmq,
3244 					dump_trace ? CS_ETM_OPERATION_PRINT :
3245 						     CS_ETM_OPERATION_DECODE))
3246 		goto out_free;
3247 
3248 	etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
3249 					    t_params);
3250 
3251 	if (!etmq->decoder)
3252 		goto out_free;
3253 
3254 	/*
3255 	 * Register a function to handle all memory accesses required by
3256 	 * the trace decoder library.
3257 	 */
3258 	if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
3259 					      0x0L, ((u64) -1L),
3260 					      cs_etm__mem_access))
3261 		goto out_free_decoder;
3262 
3263 	zfree(&t_params);
3264 	return 0;
3265 
3266 out_free_decoder:
3267 	cs_etm_decoder__free(etmq->decoder);
3268 out_free:
3269 	zfree(&t_params);
3270 	return -EINVAL;
3271 }
3272 
3273 static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm)
3274 {
3275 	struct auxtrace_queues *queues = &etm->queues;
3276 
3277 	for (unsigned int i = 0; i < queues->nr_queues; i++) {
3278 		bool empty = list_empty(&queues->queue_array[i].head);
3279 		struct cs_etm_queue *etmq = queues->queue_array[i].priv;
3280 		int ret;
3281 
3282 		/*
3283 		 * Don't create decoders for empty queues, mainly because
3284 		 * etmq->format is unknown for empty queues.
3285 		 */
3286 		assert(empty == (etmq->format == UNSET));
3287 		if (empty)
3288 			continue;
3289 
3290 		ret = cs_etm__create_queue_decoders(etmq);
3291 		if (ret)
3292 			return ret;
3293 	}
3294 	return 0;
3295 }
3296 
3297 int cs_etm__process_auxtrace_info_full(union perf_event *event,
3298 				       struct perf_session *session)
3299 {
3300 	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
3301 	struct cs_etm_auxtrace *etm = NULL;
3302 	struct perf_record_time_conv *tc = &session->time_conv;
3303 	int event_header_size = sizeof(struct perf_event_header);
3304 	int total_size = auxtrace_info->header.size;
3305 	int priv_size = 0;
3306 	int num_cpu, max_cpu = 0;
3307 	int err = 0;
3308 	int aux_hw_id_found;
3309 	int i;
3310 	u64 *ptr = NULL;
3311 	u64 **metadata = NULL;
3312 
3313 	/* First the global part */
3314 	ptr = (u64 *) auxtrace_info->priv;
3315 	num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
3316 	metadata = zalloc(sizeof(*metadata) * num_cpu);
3317 	if (!metadata)
3318 		return -ENOMEM;
3319 
3320 	/* Start parsing after the common part of the header */
3321 	i = CS_HEADER_VERSION_MAX;
3322 
3323 	/*
3324 	 * The metadata is stored in the auxtrace_info section and encodes
3325 	 * the configuration of the ARM embedded trace macrocell which is
3326 	 * required by the trace decoder to properly decode the trace due
3327 	 * to its highly compressed nature.
3328 	 */
3329 	for (int j = 0; j < num_cpu; j++) {
3330 		if (ptr[i] == __perf_cs_etmv3_magic) {
3331 			metadata[j] =
3332 				cs_etm__create_meta_blk(ptr, &i,
3333 							CS_ETM_PRIV_MAX,
3334 							CS_ETM_NR_TRC_PARAMS_V0);
3335 		} else if (ptr[i] == __perf_cs_etmv4_magic) {
3336 			metadata[j] =
3337 				cs_etm__create_meta_blk(ptr, &i,
3338 							CS_ETMV4_PRIV_MAX,
3339 							CS_ETMV4_NR_TRC_PARAMS_V0);
3340 		} else if (ptr[i] == __perf_cs_ete_magic) {
3341 			metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
3342 		} else {
3343 			ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
3344 				  ptr[i]);
3345 			err = -EINVAL;
3346 			goto err_free_metadata;
3347 		}
3348 
3349 		if (!metadata[j]) {
3350 			err = -ENOMEM;
3351 			goto err_free_metadata;
3352 		}
3353 
3354 		if ((int) metadata[j][CS_ETM_CPU] > max_cpu)
3355 			max_cpu = metadata[j][CS_ETM_CPU];
3356 	}
3357 
3358 	/*
3359 	 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
3360 	 * CS_ETMV4_PRIV_MAX mark how many double words are in the
3361 	 * global metadata, and each cpu's metadata respectively.
3362 	 * The following tests if the correct number of double words was
3363 	 * present in the auxtrace info section.
3364 	 */
3365 	priv_size = total_size - event_header_size - INFO_HEADER_SIZE;
3366 	if (i * 8 != priv_size) {
3367 		err = -EINVAL;
3368 		goto err_free_metadata;
3369 	}
3370 
3371 	etm = zalloc(sizeof(*etm));
3372 
3373 	if (!etm) {
3374 		err = -ENOMEM;
3375 		goto err_free_metadata;
3376 	}
3377 
3378 	/*
3379 	 * As all the ETMs run at the same exception level, the system should
3380 	 * have the same PID format crossing CPUs.  So cache the PID format
3381 	 * and reuse it for sequential decoding.
3382 	 */
3383 	etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
3384 
3385 	err = auxtrace_queues__init_nr(&etm->queues, max_cpu + 1);
3386 	if (err)
3387 		goto err_free_etm;
3388 
3389 	for (unsigned int j = 0; j < etm->queues.nr_queues; ++j) {
3390 		err = cs_etm__setup_queue(etm, &etm->queues.queue_array[j], j);
3391 		if (err)
3392 			goto err_free_queues;
3393 	}
3394 
3395 	if (session->itrace_synth_opts->set) {
3396 		etm->synth_opts = *session->itrace_synth_opts;
3397 	} else {
3398 		itrace_synth_opts__set_default(&etm->synth_opts,
3399 				session->itrace_synth_opts->default_no_sample);
3400 		etm->synth_opts.callchain = false;
3401 	}
3402 
3403 	etm->session = session;
3404 
3405 	etm->num_cpu = num_cpu;
3406 	etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
3407 	etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
3408 	etm->metadata = metadata;
3409 	etm->auxtrace_type = auxtrace_info->type;
3410 
3411 	if (etm->synth_opts.use_timestamp)
3412 		/*
3413 		 * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature,
3414 		 * therefore the decoder cannot know if the timestamp trace is
3415 		 * same with the kernel time.
3416 		 *
3417 		 * If a user has knowledge for the working platform and can
3418 		 * specify itrace option 'T' to tell decoder to forcely use the
3419 		 * traced timestamp as the kernel time.
3420 		 */
3421 		etm->has_virtual_ts = true;
3422 	else
3423 		/* Use virtual timestamps if all ETMs report ts_source = 1 */
3424 		etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
3425 
3426 	if (!etm->has_virtual_ts)
3427 		ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
3428 			    "The time field of the samples will not be set accurately.\n"
3429 			    "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n"
3430 			    "you can specify the itrace option 'T' for timestamp decoding\n"
3431 			    "if the Coresight timestamp on the platform is same with the kernel time.\n\n");
3432 
3433 	etm->auxtrace.process_event = cs_etm__process_event;
3434 	etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
3435 	etm->auxtrace.flush_events = cs_etm__flush_events;
3436 	etm->auxtrace.free_events = cs_etm__free_events;
3437 	etm->auxtrace.free = cs_etm__free;
3438 	etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
3439 	session->auxtrace = &etm->auxtrace;
3440 
3441 	err = cs_etm__setup_timeless_decoding(etm);
3442 	if (err)
3443 		return err;
3444 
3445 	etm->tc.time_shift = tc->time_shift;
3446 	etm->tc.time_mult = tc->time_mult;
3447 	etm->tc.time_zero = tc->time_zero;
3448 	if (event_contains(*tc, time_cycles)) {
3449 		etm->tc.time_cycles = tc->time_cycles;
3450 		etm->tc.time_mask = tc->time_mask;
3451 		etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
3452 		etm->tc.cap_user_time_short = tc->cap_user_time_short;
3453 	}
3454 	err = cs_etm__synth_events(etm, session);
3455 	if (err)
3456 		goto err_free_queues;
3457 
3458 	err = cs_etm__queue_aux_records(session);
3459 	if (err)
3460 		goto err_free_queues;
3461 
3462 	/*
3463 	 * Map Trace ID values to CPU metadata.
3464 	 *
3465 	 * Trace metadata will always contain Trace ID values from the legacy algorithm. If the
3466 	 * files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata
3467 	 * ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set.
3468 	 *
3469 	 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
3470 	 * the same IDs as the old algorithm as far as is possible, unless there are clashes
3471 	 * in which case a different value will be used. This means an older perf may still
3472 	 * be able to record and read files generate on a newer system.
3473 	 *
3474 	 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of
3475 	 * those packets. If they are there then the values will be mapped and plugged into
3476 	 * the metadata. We then set any remaining metadata values with the used flag to a
3477 	 * value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required.
3478 	 *
3479 	 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
3480 	 * then we map Trace ID values to CPU directly from the metadata - clearing any unused
3481 	 * flags if present.
3482 	 */
3483 
3484 	/* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
3485 	aux_hw_id_found = 0;
3486 	err = perf_session__peek_events(session, session->header.data_offset,
3487 					session->header.data_size,
3488 					cs_etm__process_aux_hw_id_cb, &aux_hw_id_found);
3489 	if (err)
3490 		goto err_free_queues;
3491 
3492 	/* if HW ID found then clear any unused metadata ID values */
3493 	if (aux_hw_id_found)
3494 		err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata);
3495 	/* otherwise, this is a file with metadata values only, map from metadata */
3496 	else
3497 		err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata);
3498 
3499 	if (err)
3500 		goto err_free_queues;
3501 
3502 	err = cs_etm__create_decoders(etm);
3503 	if (err)
3504 		goto err_free_queues;
3505 
3506 	etm->data_queued = etm->queues.populated;
3507 	return 0;
3508 
3509 err_free_queues:
3510 	auxtrace_queues__free(&etm->queues);
3511 	session->auxtrace = NULL;
3512 err_free_etm:
3513 	zfree(&etm);
3514 err_free_metadata:
3515 	/* No need to check @metadata[j], free(NULL) is supported */
3516 	for (int j = 0; j < num_cpu; j++)
3517 		zfree(&metadata[j]);
3518 	zfree(&metadata);
3519 	return err;
3520 }
3521