xref: /linux/tools/perf/util/cs-etm.c (revision 9a989e60cc6e29d98aed2087425cba53bf4b392d)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright(C) 2015-2018 Linaro Limited.
4  *
5  * Author: Tor Jeremiassen <tor@ti.com>
6  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
7  */
8 
9 #include <limits.h>
10 #include <linux/bitfield.h>
11 #include <linux/bitops.h>
12 #include <linux/coresight-pmu.h>
13 #include <linux/err.h>
14 #include <linux/log2.h>
15 #include <linux/types.h>
16 #include <linux/zalloc.h>
17 
18 #include <stdlib.h>
19 
20 #include "auxtrace.h"
21 #include "color.h"
22 #include "cs-etm.h"
23 #include "cs-etm-decoder/cs-etm-decoder.h"
24 #include "debug.h"
25 #include "dso.h"
26 #include "evlist.h"
27 #include "intlist.h"
28 #include "machine.h"
29 #include "map.h"
30 #include "perf.h"
31 #include "session.h"
32 #include "map_symbol.h"
33 #include "branch.h"
34 #include "symbol.h"
35 #include "tool.h"
36 #include "thread.h"
37 #include "thread-stack.h"
38 #include "tsc.h"
39 #include <tools/libc_compat.h>
40 #include "util/synthetic-events.h"
41 #include "util/util.h"
42 
43 struct cs_etm_auxtrace {
44 	struct auxtrace auxtrace;
45 	struct auxtrace_queues queues;
46 	struct auxtrace_heap heap;
47 	struct itrace_synth_opts synth_opts;
48 	struct perf_session *session;
49 	struct perf_tsc_conversion tc;
50 
51 	/*
52 	 * Timeless has no timestamps in the trace so overlapping mmap lookups
53 	 * are less accurate but produces smaller trace data. We use context IDs
54 	 * in the trace instead of matching timestamps with fork records so
55 	 * they're not really needed in the general case. Overlapping mmaps
56 	 * happen in cases like between a fork and an exec.
57 	 */
58 	bool timeless_decoding;
59 
60 	/*
61 	 * Per-thread ignores the trace channel ID and instead assumes that
62 	 * everything in a buffer comes from the same process regardless of
63 	 * which CPU it ran on. It also implies no context IDs so the TID is
64 	 * taken from the auxtrace buffer.
65 	 */
66 	bool per_thread_decoding;
67 	bool snapshot_mode;
68 	bool data_queued;
69 	bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
70 
71 	int num_cpu;
72 	u64 latest_kernel_timestamp;
73 	u32 auxtrace_type;
74 	u64 branches_sample_type;
75 	u64 branches_id;
76 	u64 instructions_sample_type;
77 	u64 instructions_sample_period;
78 	u64 instructions_id;
79 	u64 **metadata;
80 	unsigned int pmu_type;
81 	enum cs_etm_pid_fmt pid_fmt;
82 };
83 
84 struct cs_etm_traceid_queue {
85 	u8 trace_chan_id;
86 	u64 period_instructions;
87 	size_t last_branch_pos;
88 	union perf_event *event_buf;
89 	struct branch_stack *last_branch;
90 	struct branch_stack *last_branch_rb;
91 	struct cs_etm_packet *prev_packet;
92 	struct cs_etm_packet *packet;
93 	struct cs_etm_packet_queue packet_queue;
94 
95 	struct thread *decode_thread;
96 	ocsd_ex_level decode_el;
97 
98 	/*
99 	 * The frontend accesses the EL from '[prev_]packet' because it needs
100 	 * previous EL for branch and current EL for instruction samples. It's
101 	 * not possible to change thread in a single branch sample so no need to
102 	 * store or access the thread through the packet.
103 	 */
104 	struct thread *frontend_thread;
105 };
106 
107 enum cs_etm_format {
108 	UNSET,
109 	FORMATTED,
110 	UNFORMATTED
111 };
112 
113 struct cs_etm_queue {
114 	struct cs_etm_auxtrace *etm;
115 	struct cs_etm_decoder *decoder;
116 	struct auxtrace_buffer *buffer;
117 	unsigned int queue_nr;
118 	u8 pending_timestamp_chan_id;
119 	enum cs_etm_format format;
120 	u64 offset;
121 	const unsigned char *buf;
122 	size_t buf_len, buf_used;
123 	/* Conversion between traceID and index in traceid_queues array */
124 	struct intlist *traceid_queues_list;
125 	struct cs_etm_traceid_queue **traceid_queues;
126 	/* Conversion between traceID and metadata pointers */
127 	struct intlist *traceid_list;
128 	/*
129 	 * Same as traceid_list, but traceid_list may be a reference to another
130 	 * queue's which has a matching sink ID.
131 	 */
132 	struct intlist *own_traceid_list;
133 	u32 sink_id;
134 };
135 
136 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
137 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
138 					   pid_t tid);
139 static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
140 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
141 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata);
142 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu);
143 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata);
144 
145 /* PTMs ETMIDR [11:8] set to b0011 */
146 #define ETMIDR_PTM_VERSION 0x00000300
147 
148 /*
149  * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
150  * work with.  One option is to modify to auxtrace_heap_XYZ() API or simply
151  * encode the etm queue number as the upper 16 bit and the channel as
152  * the lower 16 bit.
153  */
154 #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id)	\
155 		      (queue_nr << 16 | trace_chan_id)
156 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
157 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
158 #define SINK_UNSET ((u32) -1)
159 
160 static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
161 {
162 	etmidr &= ETMIDR_PTM_VERSION;
163 
164 	if (etmidr == ETMIDR_PTM_VERSION)
165 		return CS_ETM_PROTO_PTM;
166 
167 	return CS_ETM_PROTO_ETMV3;
168 }
169 
170 static int cs_etm__get_magic(struct cs_etm_queue *etmq, u8 trace_chan_id, u64 *magic)
171 {
172 	struct int_node *inode;
173 	u64 *metadata;
174 
175 	inode = intlist__find(etmq->traceid_list, trace_chan_id);
176 	if (!inode)
177 		return -EINVAL;
178 
179 	metadata = inode->priv;
180 	*magic = metadata[CS_ETM_MAGIC];
181 	return 0;
182 }
183 
184 int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu)
185 {
186 	struct int_node *inode;
187 	u64 *metadata;
188 
189 	inode = intlist__find(etmq->traceid_list, trace_chan_id);
190 	if (!inode)
191 		return -EINVAL;
192 
193 	metadata = inode->priv;
194 	*cpu = (int)metadata[CS_ETM_CPU];
195 	return 0;
196 }
197 
198 /*
199  * The returned PID format is presented as an enum:
200  *
201  *   CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced.
202  *   CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced.
203  *   CS_ETM_PIDFMT_NONE: No context IDs
204  *
205  * It's possible that the two format attributes 'contextid1' and 'contextid2'
206  * are enabled at the same time when the session runs on an EL2 kernel.
207  * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
208  * recorded in the trace data, the tool will selectively use
209  * CONTEXTIDR_EL2 as PID.
210  *
211  * The result is cached in etm->pid_fmt so this function only needs to be called
212  * when processing the aux info.
213  */
214 static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata)
215 {
216 	u64 val;
217 
218 	if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
219 		val = metadata[CS_ETM_ETMCR];
220 		/* CONTEXTIDR is traced */
221 		if (val & ETMCR_CTXTID)
222 			return CS_ETM_PIDFMT_CTXTID;
223 	} else {
224 		val = metadata[CS_ETMV4_TRCCONFIGR];
225 		/* CONTEXTIDR_EL2 is traced */
226 		if (val & (TRCCONFIGR_VMID | TRCCONFIGR_VMIDOPT))
227 			return CS_ETM_PIDFMT_CTXTID2;
228 		/* CONTEXTIDR_EL1 is traced */
229 		else if (val & TRCCONFIGR_CID)
230 			return CS_ETM_PIDFMT_CTXTID;
231 	}
232 
233 	return CS_ETM_PIDFMT_NONE;
234 }
235 
236 enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
237 {
238 	return etmq->etm->pid_fmt;
239 }
240 
241 static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq,
242 					u8 trace_chan_id, u64 *cpu_metadata)
243 {
244 	/* Get an RB node for this CPU */
245 	struct int_node *inode = intlist__findnew(etmq->traceid_list, trace_chan_id);
246 
247 	/* Something went wrong, no need to continue */
248 	if (!inode)
249 		return -ENOMEM;
250 
251 	/* Disallow re-mapping a different traceID to metadata pair. */
252 	if (inode->priv) {
253 		u64 *curr_cpu_data = inode->priv;
254 		u8 curr_chan_id;
255 		int err;
256 
257 		if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) {
258 			/*
259 			 * With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs
260 			 * are expected (but not supported) in per-thread mode,
261 			 * rather than signifying an error.
262 			 */
263 			if (etmq->etm->per_thread_decoding)
264 				pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n");
265 			else
266 				pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
267 
268 			return -EINVAL;
269 		}
270 
271 		/* check that the mapped ID matches */
272 		err = cs_etm__metadata_get_trace_id(&curr_chan_id, curr_cpu_data);
273 		if (err)
274 			return err;
275 
276 		if (curr_chan_id != trace_chan_id) {
277 			pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
278 			return -EINVAL;
279 		}
280 
281 		/* Skip re-adding the same mappings if everything matched */
282 		return 0;
283 	}
284 
285 	/* Not one we've seen before, associate the traceID with the metadata pointer */
286 	inode->priv = cpu_metadata;
287 
288 	return 0;
289 }
290 
291 static struct cs_etm_queue *cs_etm__get_queue(struct cs_etm_auxtrace *etm, int cpu)
292 {
293 	if (etm->per_thread_decoding)
294 		return etm->queues.queue_array[0].priv;
295 
296 	if (cpu < 0 || cpu >= (int)etm->queues.nr_queues)
297 		return NULL;
298 
299 	return etm->queues.queue_array[cpu].priv;
300 }
301 
302 static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace *etm, u8 trace_chan_id,
303 				   u64 *cpu_metadata)
304 {
305 	struct cs_etm_queue *etmq;
306 
307 	/*
308 	 * If the queue is unformatted then only save one mapping in the
309 	 * queue associated with that CPU so only one decoder is made.
310 	 */
311 	etmq = cs_etm__get_queue(etm, cpu_metadata[CS_ETM_CPU]);
312 	if (!etmq)
313 		return -EINVAL;
314 
315 	if (etmq->format == UNFORMATTED)
316 		return cs_etm__insert_trace_id_node(etmq, trace_chan_id,
317 						    cpu_metadata);
318 
319 	/*
320 	 * Otherwise, version 0 trace IDs are global so save them into every
321 	 * queue.
322 	 */
323 	for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
324 		int ret;
325 
326 		etmq = etm->queues.queue_array[i].priv;
327 		if (!etmq)
328 			continue;
329 
330 		ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id,
331 						   cpu_metadata);
332 		if (ret)
333 			return ret;
334 	}
335 
336 	return 0;
337 }
338 
339 static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu,
340 				       u64 hw_id)
341 {
342 	int err;
343 	u64 *cpu_data;
344 	u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
345 
346 	cpu_data = get_cpu_data(etm, cpu);
347 	if (cpu_data == NULL)
348 		return -EINVAL;
349 
350 	err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_data);
351 	if (err)
352 		return err;
353 
354 	/*
355 	 * if we are picking up the association from the packet, need to plug
356 	 * the correct trace ID into the metadata for setting up decoders later.
357 	 */
358 	return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
359 }
360 
361 static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace *etm, int cpu,
362 					 u64 hw_id)
363 {
364 	struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu);
365 	int ret;
366 	u64 *cpu_data;
367 	u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id);
368 	u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
369 
370 	if (!etmq)
371 		return -EINVAL;
372 
373 	/*
374 	 * Check sink id hasn't changed in per-cpu mode. In per-thread mode,
375 	 * let it pass for now until an actual overlapping trace ID is hit. In
376 	 * most cases IDs won't overlap even if the sink changes.
377 	 */
378 	if (!etmq->etm->per_thread_decoding && etmq->sink_id != SINK_UNSET &&
379 	    etmq->sink_id != sink_id) {
380 		pr_err("CS_ETM: mismatch between sink IDs\n");
381 		return -EINVAL;
382 	}
383 
384 	etmq->sink_id = sink_id;
385 
386 	/* Find which other queues use this sink and link their ID maps */
387 	for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
388 		struct cs_etm_queue *other_etmq = etm->queues.queue_array[i].priv;
389 
390 		if (!other_etmq)
391 			continue;
392 
393 		/* Different sinks, skip */
394 		if (other_etmq->sink_id != etmq->sink_id)
395 			continue;
396 
397 		/* Already linked, skip */
398 		if (other_etmq->traceid_list == etmq->traceid_list)
399 			continue;
400 
401 		/* At the point of first linking, this one should be empty */
402 		if (!intlist__empty(etmq->traceid_list)) {
403 			pr_err("CS_ETM: Can't link populated trace ID lists\n");
404 			return -EINVAL;
405 		}
406 
407 		etmq->own_traceid_list = NULL;
408 		intlist__delete(etmq->traceid_list);
409 		etmq->traceid_list = other_etmq->traceid_list;
410 		break;
411 	}
412 
413 	cpu_data = get_cpu_data(etm, cpu);
414 	if (!cpu_data)
415 		return -EINVAL;
416 
417 	ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data);
418 	if (ret)
419 		return ret;
420 
421 	ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data);
422 	if (ret)
423 		return ret;
424 
425 	return 0;
426 }
427 
428 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
429 {
430 	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
431 
432 	switch (cs_etm_magic) {
433 	case __perf_cs_etmv3_magic:
434 		*trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
435 				      CORESIGHT_TRACE_ID_VAL_MASK);
436 		break;
437 	case __perf_cs_etmv4_magic:
438 	case __perf_cs_ete_magic:
439 		*trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
440 				      CORESIGHT_TRACE_ID_VAL_MASK);
441 		break;
442 	default:
443 		return -EINVAL;
444 	}
445 	return 0;
446 }
447 
448 /*
449  * update metadata trace ID from the value found in the AUX_HW_INFO packet.
450  */
451 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
452 {
453 	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
454 
455 	switch (cs_etm_magic) {
456 	case __perf_cs_etmv3_magic:
457 		 cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id;
458 		break;
459 	case __perf_cs_etmv4_magic:
460 	case __perf_cs_ete_magic:
461 		cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id;
462 		break;
463 
464 	default:
465 		return -EINVAL;
466 	}
467 	return 0;
468 }
469 
470 /*
471  * Get a metadata index for a specific cpu from an array.
472  *
473  */
474 static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu)
475 {
476 	int i;
477 
478 	for (i = 0; i < etm->num_cpu; i++) {
479 		if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) {
480 			return i;
481 		}
482 	}
483 
484 	return -1;
485 }
486 
487 /*
488  * Get a metadata for a specific cpu from an array.
489  *
490  */
491 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu)
492 {
493 	int idx = get_cpu_data_idx(etm, cpu);
494 
495 	return (idx != -1) ? etm->metadata[idx] : NULL;
496 }
497 
498 /*
499  * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
500  *
501  * The payload associates the Trace ID and the CPU.
502  * The routine is tolerant of seeing multiple packets with the same association,
503  * but a CPU / Trace ID association changing during a session is an error.
504  */
505 static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
506 					    union perf_event *event)
507 {
508 	struct cs_etm_auxtrace *etm;
509 	struct perf_sample sample;
510 	struct evsel *evsel;
511 	u64 hw_id;
512 	int cpu, version, err;
513 
514 	/* extract and parse the HW ID */
515 	hw_id = event->aux_output_hw_id.hw_id;
516 	version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id);
517 
518 	/* check that we can handle this version */
519 	if (version > CS_AUX_HW_ID_MAJOR_VERSION) {
520 		pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n",
521 		       version);
522 		return -EINVAL;
523 	}
524 
525 	/* get access to the etm metadata */
526 	etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
527 	if (!etm || !etm->metadata)
528 		return -EINVAL;
529 
530 	/* parse the sample to get the CPU */
531 	evsel = evlist__event2evsel(session->evlist, event);
532 	if (!evsel)
533 		return -EINVAL;
534 	perf_sample__init(&sample, /*all=*/false);
535 	err = evsel__parse_sample(evsel, event, &sample);
536 	if (err)
537 		goto out;
538 	cpu = sample.cpu;
539 	if (cpu == -1) {
540 		/* no CPU in the sample - possibly recorded with an old version of perf */
541 		pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
542 		err = -EINVAL;
543 		goto out;
544 	}
545 
546 	if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0) {
547 		err = cs_etm__process_trace_id_v0(etm, cpu, hw_id);
548 		goto out;
549 	}
550 
551 	err = cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
552 out:
553 	perf_sample__exit(&sample);
554 	return err;
555 }
556 
557 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
558 					      u8 trace_chan_id)
559 {
560 	/*
561 	 * When a timestamp packet is encountered the backend code
562 	 * is stopped so that the front end has time to process packets
563 	 * that were accumulated in the traceID queue.  Since there can
564 	 * be more than one channel per cs_etm_queue, we need to specify
565 	 * what traceID queue needs servicing.
566 	 */
567 	etmq->pending_timestamp_chan_id = trace_chan_id;
568 }
569 
570 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
571 				      u8 *trace_chan_id)
572 {
573 	struct cs_etm_packet_queue *packet_queue;
574 
575 	if (!etmq->pending_timestamp_chan_id)
576 		return 0;
577 
578 	if (trace_chan_id)
579 		*trace_chan_id = etmq->pending_timestamp_chan_id;
580 
581 	packet_queue = cs_etm__etmq_get_packet_queue(etmq,
582 						     etmq->pending_timestamp_chan_id);
583 	if (!packet_queue)
584 		return 0;
585 
586 	/* Acknowledge pending status */
587 	etmq->pending_timestamp_chan_id = 0;
588 
589 	/* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
590 	return packet_queue->cs_timestamp;
591 }
592 
593 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
594 {
595 	int i;
596 
597 	queue->head = 0;
598 	queue->tail = 0;
599 	queue->packet_count = 0;
600 	for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
601 		queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
602 		queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
603 		queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
604 		queue->packet_buffer[i].instr_count = 0;
605 		queue->packet_buffer[i].last_instr_taken_branch = false;
606 		queue->packet_buffer[i].last_instr_size = 0;
607 		queue->packet_buffer[i].last_instr_type = 0;
608 		queue->packet_buffer[i].last_instr_subtype = 0;
609 		queue->packet_buffer[i].last_instr_cond = 0;
610 		queue->packet_buffer[i].flags = 0;
611 		queue->packet_buffer[i].exception_number = UINT32_MAX;
612 		queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
613 		queue->packet_buffer[i].cpu = INT_MIN;
614 	}
615 }
616 
617 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
618 {
619 	int idx;
620 	struct int_node *inode;
621 	struct cs_etm_traceid_queue *tidq;
622 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
623 
624 	intlist__for_each_entry(inode, traceid_queues_list) {
625 		idx = (int)(intptr_t)inode->priv;
626 		tidq = etmq->traceid_queues[idx];
627 		cs_etm__clear_packet_queue(&tidq->packet_queue);
628 	}
629 }
630 
631 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
632 				      struct cs_etm_traceid_queue *tidq,
633 				      u8 trace_chan_id)
634 {
635 	int rc = -ENOMEM;
636 	struct auxtrace_queue *queue;
637 	struct cs_etm_auxtrace *etm = etmq->etm;
638 
639 	cs_etm__clear_packet_queue(&tidq->packet_queue);
640 
641 	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
642 	tidq->trace_chan_id = trace_chan_id;
643 	tidq->decode_el = ocsd_EL_unknown;
644 	tidq->frontend_thread = machine__findnew_thread(&etm->session->machines.host, -1,
645 					       queue->tid);
646 	tidq->decode_thread = machine__findnew_thread(&etm->session->machines.host, -1,
647 					       queue->tid);
648 
649 	tidq->packet = zalloc(sizeof(struct cs_etm_packet));
650 	if (!tidq->packet)
651 		goto out;
652 
653 	tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
654 	if (!tidq->prev_packet)
655 		goto out_free;
656 
657 	if (etm->synth_opts.last_branch) {
658 		size_t sz = sizeof(struct branch_stack);
659 
660 		sz += etm->synth_opts.last_branch_sz *
661 		      sizeof(struct branch_entry);
662 		tidq->last_branch = zalloc(sz);
663 		if (!tidq->last_branch)
664 			goto out_free;
665 		tidq->last_branch_rb = zalloc(sz);
666 		if (!tidq->last_branch_rb)
667 			goto out_free;
668 	}
669 
670 	tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
671 	if (!tidq->event_buf)
672 		goto out_free;
673 
674 	return 0;
675 
676 out_free:
677 	zfree(&tidq->last_branch_rb);
678 	zfree(&tidq->last_branch);
679 	zfree(&tidq->prev_packet);
680 	zfree(&tidq->packet);
681 out:
682 	return rc;
683 }
684 
685 static struct cs_etm_traceid_queue
686 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
687 {
688 	int idx;
689 	struct int_node *inode;
690 	struct intlist *traceid_queues_list;
691 	struct cs_etm_traceid_queue *tidq, **traceid_queues;
692 	struct cs_etm_auxtrace *etm = etmq->etm;
693 
694 	if (etm->per_thread_decoding)
695 		trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
696 
697 	traceid_queues_list = etmq->traceid_queues_list;
698 
699 	/*
700 	 * Check if the traceid_queue exist for this traceID by looking
701 	 * in the queue list.
702 	 */
703 	inode = intlist__find(traceid_queues_list, trace_chan_id);
704 	if (inode) {
705 		idx = (int)(intptr_t)inode->priv;
706 		return etmq->traceid_queues[idx];
707 	}
708 
709 	/* We couldn't find a traceid_queue for this traceID, allocate one */
710 	tidq = malloc(sizeof(*tidq));
711 	if (!tidq)
712 		return NULL;
713 
714 	memset(tidq, 0, sizeof(*tidq));
715 
716 	/* Get a valid index for the new traceid_queue */
717 	idx = intlist__nr_entries(traceid_queues_list);
718 	/* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
719 	inode = intlist__findnew(traceid_queues_list, trace_chan_id);
720 	if (!inode)
721 		goto out_free;
722 
723 	/* Associate this traceID with this index */
724 	inode->priv = (void *)(intptr_t)idx;
725 
726 	if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
727 		goto out_free;
728 
729 	/* Grow the traceid_queues array by one unit */
730 	traceid_queues = etmq->traceid_queues;
731 	traceid_queues = reallocarray(traceid_queues,
732 				      idx + 1,
733 				      sizeof(*traceid_queues));
734 
735 	/*
736 	 * On failure reallocarray() returns NULL and the original block of
737 	 * memory is left untouched.
738 	 */
739 	if (!traceid_queues)
740 		goto out_free;
741 
742 	traceid_queues[idx] = tidq;
743 	etmq->traceid_queues = traceid_queues;
744 
745 	return etmq->traceid_queues[idx];
746 
747 out_free:
748 	/*
749 	 * Function intlist__remove() removes the inode from the list
750 	 * and delete the memory associated to it.
751 	 */
752 	intlist__remove(traceid_queues_list, inode);
753 	free(tidq);
754 
755 	return NULL;
756 }
757 
758 struct cs_etm_packet_queue
759 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
760 {
761 	struct cs_etm_traceid_queue *tidq;
762 
763 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
764 	if (tidq)
765 		return &tidq->packet_queue;
766 
767 	return NULL;
768 }
769 
770 static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
771 				struct cs_etm_traceid_queue *tidq)
772 {
773 	struct cs_etm_packet *tmp;
774 
775 	if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
776 	    etm->synth_opts.instructions) {
777 		/*
778 		 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
779 		 * the next incoming packet.
780 		 */
781 		tmp = tidq->packet;
782 		tidq->packet = tidq->prev_packet;
783 		tidq->prev_packet = tmp;
784 	}
785 }
786 
787 static void cs_etm__packet_dump(const char *pkt_string, void *data)
788 {
789 	const char *color = PERF_COLOR_BLUE;
790 	int len = strlen(pkt_string);
791 	struct cs_etm_queue *etmq = data;
792 	char queue_nr[64];
793 
794 	if (verbose)
795 		snprintf(queue_nr, sizeof(queue_nr), "Qnr:%u; ", etmq->queue_nr);
796 	else
797 		queue_nr[0] = '\0';
798 
799 	if (len && (pkt_string[len-1] == '\n'))
800 		color_fprintf(stdout, color, "	%s%s", queue_nr, pkt_string);
801 	else
802 		color_fprintf(stdout, color, "	%s%s\n", queue_nr, pkt_string);
803 
804 	fflush(stdout);
805 }
806 
807 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
808 					  u64 *metadata, u32 etmidr)
809 {
810 	t_params->protocol = cs_etm__get_v7_protocol_version(etmidr);
811 	t_params->etmv3.reg_ctrl = metadata[CS_ETM_ETMCR];
812 	t_params->etmv3.reg_trc_id = metadata[CS_ETM_ETMTRACEIDR];
813 }
814 
815 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
816 					  u64 *metadata)
817 {
818 	t_params->protocol = CS_ETM_PROTO_ETMV4i;
819 	t_params->etmv4.reg_idr0 = metadata[CS_ETMV4_TRCIDR0];
820 	t_params->etmv4.reg_idr1 = metadata[CS_ETMV4_TRCIDR1];
821 	t_params->etmv4.reg_idr2 = metadata[CS_ETMV4_TRCIDR2];
822 	t_params->etmv4.reg_idr8 = metadata[CS_ETMV4_TRCIDR8];
823 	t_params->etmv4.reg_configr = metadata[CS_ETMV4_TRCCONFIGR];
824 	t_params->etmv4.reg_traceidr = metadata[CS_ETMV4_TRCTRACEIDR];
825 }
826 
827 static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
828 					u64 *metadata)
829 {
830 	t_params->protocol = CS_ETM_PROTO_ETE;
831 	t_params->ete.reg_idr0 = metadata[CS_ETE_TRCIDR0];
832 	t_params->ete.reg_idr1 = metadata[CS_ETE_TRCIDR1];
833 	t_params->ete.reg_idr2 = metadata[CS_ETE_TRCIDR2];
834 	t_params->ete.reg_idr8 = metadata[CS_ETE_TRCIDR8];
835 	t_params->ete.reg_configr = metadata[CS_ETE_TRCCONFIGR];
836 	t_params->ete.reg_traceidr = metadata[CS_ETE_TRCTRACEIDR];
837 	t_params->ete.reg_devarch = metadata[CS_ETE_TRCDEVARCH];
838 }
839 
840 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
841 				     struct cs_etm_queue *etmq)
842 {
843 	struct int_node *inode;
844 
845 	intlist__for_each_entry(inode, etmq->traceid_list) {
846 		u64 *metadata = inode->priv;
847 		u64 architecture = metadata[CS_ETM_MAGIC];
848 		u32 etmidr;
849 
850 		switch (architecture) {
851 		case __perf_cs_etmv3_magic:
852 			etmidr = metadata[CS_ETM_ETMIDR];
853 			cs_etm__set_trace_param_etmv3(t_params++, metadata, etmidr);
854 			break;
855 		case __perf_cs_etmv4_magic:
856 			cs_etm__set_trace_param_etmv4(t_params++, metadata);
857 			break;
858 		case __perf_cs_ete_magic:
859 			cs_etm__set_trace_param_ete(t_params++, metadata);
860 			break;
861 		default:
862 			return -EINVAL;
863 		}
864 	}
865 
866 	return 0;
867 }
868 
869 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
870 				       struct cs_etm_queue *etmq,
871 				       enum cs_etm_decoder_operation mode)
872 {
873 	int ret = -EINVAL;
874 
875 	if (!(mode < CS_ETM_OPERATION_MAX))
876 		goto out;
877 
878 	d_params->packet_printer = cs_etm__packet_dump;
879 	d_params->operation = mode;
880 	d_params->data = etmq;
881 	d_params->formatted = etmq->format == FORMATTED;
882 	d_params->fsyncs = false;
883 	d_params->hsyncs = false;
884 	d_params->frame_aligned = true;
885 
886 	ret = 0;
887 out:
888 	return ret;
889 }
890 
891 static void cs_etm__dump_event(struct cs_etm_queue *etmq,
892 			       struct auxtrace_buffer *buffer)
893 {
894 	int ret;
895 	const char *color = PERF_COLOR_BLUE;
896 	size_t buffer_used = 0;
897 
898 	fprintf(stdout, "\n");
899 	color_fprintf(stdout, color,
900 		     ". ... CoreSight %s Trace data: size %#zx bytes\n",
901 		     cs_etm_decoder__get_name(etmq->decoder), buffer->size);
902 
903 	do {
904 		size_t consumed;
905 
906 		ret = cs_etm_decoder__process_data_block(
907 				etmq->decoder, buffer->offset,
908 				&((u8 *)buffer->data)[buffer_used],
909 				buffer->size - buffer_used, &consumed);
910 		if (ret)
911 			break;
912 
913 		buffer_used += consumed;
914 	} while (buffer_used < buffer->size);
915 
916 	cs_etm_decoder__reset(etmq->decoder);
917 }
918 
919 static int cs_etm__flush_events(struct perf_session *session,
920 				const struct perf_tool *tool)
921 {
922 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
923 						   struct cs_etm_auxtrace,
924 						   auxtrace);
925 	if (dump_trace)
926 		return 0;
927 
928 	if (!tool->ordered_events)
929 		return -EINVAL;
930 
931 	if (etm->timeless_decoding) {
932 		/*
933 		 * Pass tid = -1 to process all queues. But likely they will have
934 		 * already been processed on PERF_RECORD_EXIT anyway.
935 		 */
936 		return cs_etm__process_timeless_queues(etm, -1);
937 	}
938 
939 	return cs_etm__process_timestamped_queues(etm);
940 }
941 
942 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
943 {
944 	int idx;
945 	uintptr_t priv;
946 	struct int_node *inode, *tmp;
947 	struct cs_etm_traceid_queue *tidq;
948 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
949 
950 	intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
951 		priv = (uintptr_t)inode->priv;
952 		idx = priv;
953 
954 		/* Free this traceid_queue from the array */
955 		tidq = etmq->traceid_queues[idx];
956 		thread__zput(tidq->frontend_thread);
957 		thread__zput(tidq->decode_thread);
958 		zfree(&tidq->event_buf);
959 		zfree(&tidq->last_branch);
960 		zfree(&tidq->last_branch_rb);
961 		zfree(&tidq->prev_packet);
962 		zfree(&tidq->packet);
963 		zfree(&tidq);
964 
965 		/*
966 		 * Function intlist__remove() removes the inode from the list
967 		 * and delete the memory associated to it.
968 		 */
969 		intlist__remove(traceid_queues_list, inode);
970 	}
971 
972 	/* Then the RB tree itself */
973 	intlist__delete(traceid_queues_list);
974 	etmq->traceid_queues_list = NULL;
975 
976 	/* finally free the traceid_queues array */
977 	zfree(&etmq->traceid_queues);
978 }
979 
980 static void cs_etm__free_queue(void *priv)
981 {
982 	struct int_node *inode, *tmp;
983 	struct cs_etm_queue *etmq = priv;
984 
985 	if (!etmq)
986 		return;
987 
988 	cs_etm_decoder__free(etmq->decoder);
989 	cs_etm__free_traceid_queues(etmq);
990 
991 	if (etmq->own_traceid_list) {
992 		/* First remove all traceID/metadata nodes for the RB tree */
993 		intlist__for_each_entry_safe(inode, tmp, etmq->own_traceid_list)
994 			intlist__remove(etmq->own_traceid_list, inode);
995 
996 		/* Then the RB tree itself */
997 		intlist__delete(etmq->own_traceid_list);
998 	}
999 
1000 	free(etmq);
1001 }
1002 
1003 static void cs_etm__free_events(struct perf_session *session)
1004 {
1005 	unsigned int i;
1006 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
1007 						   struct cs_etm_auxtrace,
1008 						   auxtrace);
1009 	struct auxtrace_queues *queues = &aux->queues;
1010 
1011 	for (i = 0; i < queues->nr_queues; i++) {
1012 		cs_etm__free_queue(queues->queue_array[i].priv);
1013 		queues->queue_array[i].priv = NULL;
1014 	}
1015 
1016 	auxtrace_queues__free(queues);
1017 }
1018 
1019 static void cs_etm__free(struct perf_session *session)
1020 {
1021 	int i;
1022 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
1023 						   struct cs_etm_auxtrace,
1024 						   auxtrace);
1025 	cs_etm__free_events(session);
1026 	session->auxtrace = NULL;
1027 
1028 	for (i = 0; i < aux->num_cpu; i++)
1029 		zfree(&aux->metadata[i]);
1030 
1031 	zfree(&aux->metadata);
1032 	zfree(&aux);
1033 }
1034 
1035 static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
1036 				      struct evsel *evsel)
1037 {
1038 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
1039 						   struct cs_etm_auxtrace,
1040 						   auxtrace);
1041 
1042 	return evsel->core.attr.type == aux->pmu_type;
1043 }
1044 
1045 static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq,
1046 					   ocsd_ex_level el)
1047 {
1048 	enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq);
1049 
1050 	/*
1051 	 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels
1052 	 * running at EL1 assume everything is the host.
1053 	 */
1054 	if (pid_fmt == CS_ETM_PIDFMT_CTXTID)
1055 		return &etmq->etm->session->machines.host;
1056 
1057 	/*
1058 	 * Not perfect, but otherwise assume anything in EL1 is the default
1059 	 * guest, and everything else is the host. Distinguishing between guest
1060 	 * and host userspaces isn't currently supported either. Neither is
1061 	 * multiple guest support. All this does is reduce the likeliness of
1062 	 * decode errors where we look into the host kernel maps when it should
1063 	 * have been the guest maps.
1064 	 */
1065 	switch (el) {
1066 	case ocsd_EL1:
1067 		return machines__find_guest(&etmq->etm->session->machines,
1068 					    DEFAULT_GUEST_KERNEL_ID);
1069 	case ocsd_EL3:
1070 	case ocsd_EL2:
1071 	case ocsd_EL0:
1072 	case ocsd_EL_unknown:
1073 	default:
1074 		return &etmq->etm->session->machines.host;
1075 	}
1076 }
1077 
1078 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address,
1079 			   ocsd_ex_level el)
1080 {
1081 	struct machine *machine = cs_etm__get_machine(etmq, el);
1082 
1083 	if (address >= machine__kernel_start(machine)) {
1084 		if (machine__is_host(machine))
1085 			return PERF_RECORD_MISC_KERNEL;
1086 		else
1087 			return PERF_RECORD_MISC_GUEST_KERNEL;
1088 	} else {
1089 		if (machine__is_host(machine))
1090 			return PERF_RECORD_MISC_USER;
1091 		else {
1092 			/*
1093 			 * Can't really happen at the moment because
1094 			 * cs_etm__get_machine() will always return
1095 			 * machines.host for any non EL1 trace.
1096 			 */
1097 			return PERF_RECORD_MISC_GUEST_USER;
1098 		}
1099 	}
1100 }
1101 
1102 static u32 __cs_etm__mem_access(struct cs_etm_queue *etmq,
1103 				u64 address, size_t size, u8 *buffer,
1104 				const ocsd_mem_space_acc_t mem_space,
1105 				ocsd_ex_level el, struct thread *thread)
1106 {
1107 	u8  cpumode;
1108 	u64 offset;
1109 	int len;
1110 	struct addr_location al;
1111 	struct dso *dso;
1112 	int ret = 0;
1113 
1114 	if (!etmq)
1115 		return 0;
1116 
1117 	addr_location__init(&al);
1118 
1119 	/*
1120 	 * We track EL for the frontend and the backend when receiving context
1121 	 * and range packets. OpenCSD doesn't distinguish between EL0 and EL1
1122 	 * for this mem access callback so we had to do the extra tracking. Skip
1123 	 * validation if it's any of the 'any' values.
1124 	 */
1125 	if (!(mem_space == OCSD_MEM_SPACE_ANY ||
1126 	      mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) {
1127 		if (mem_space & OCSD_MEM_SPACE_EL1N) {
1128 			/* Includes both non secure EL1 and EL0 */
1129 			assert(el == ocsd_EL1 || el == ocsd_EL0);
1130 		} else if (mem_space & OCSD_MEM_SPACE_EL2)
1131 			assert(el == ocsd_EL2);
1132 		else if (mem_space & OCSD_MEM_SPACE_EL3)
1133 			assert(el == ocsd_EL3);
1134 	}
1135 
1136 	cpumode = cs_etm__cpu_mode(etmq, address, el);
1137 
1138 	if (!thread__find_map(thread, cpumode, address, &al))
1139 		goto out;
1140 
1141 	dso = map__dso(al.map);
1142 	if (!dso)
1143 		goto out;
1144 
1145 	if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR &&
1146 	    dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE))
1147 		goto out;
1148 
1149 	offset = map__map_ip(al.map, address);
1150 
1151 	map__load(al.map);
1152 
1153 	len = dso__data_read_offset(dso, maps__machine(thread__maps(thread)),
1154 				    offset, buffer, size);
1155 
1156 	if (len <= 0) {
1157 		ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
1158 				 "              Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
1159 		if (!dso__auxtrace_warned(dso)) {
1160 			pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
1161 				address,
1162 				dso__long_name(dso) ? dso__long_name(dso) : "Unknown");
1163 			dso__set_auxtrace_warned(dso);
1164 		}
1165 		goto out;
1166 	}
1167 	ret = len;
1168 out:
1169 	addr_location__exit(&al);
1170 	return ret;
1171 }
1172 
1173 static u32 cs_etm__frontend_mem_access(struct cs_etm_queue *etmq,
1174 				       struct cs_etm_traceid_queue *tidq,
1175 				       struct cs_etm_packet *packet,
1176 				       u64 address, size_t size, u8 *buffer)
1177 {
1178 	return __cs_etm__mem_access(etmq, address, size, buffer, 0, packet->el,
1179 				    tidq->frontend_thread);
1180 }
1181 
1182 static u32 cs_etm__decoder_mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
1183 				      u64 address, size_t size, u8 *buffer,
1184 				      const ocsd_mem_space_acc_t mem_space)
1185 {
1186 	struct cs_etm_traceid_queue *tidq;
1187 
1188 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1189 	if (!tidq)
1190 		return 0;
1191 
1192 	return __cs_etm__mem_access(etmq, address, size, buffer,
1193 				    mem_space, tidq->decode_el,
1194 				    tidq->decode_thread);
1195 }
1196 
1197 static struct cs_etm_queue *cs_etm__alloc_queue(void)
1198 {
1199 	struct cs_etm_queue *etmq = zalloc(sizeof(*etmq));
1200 	if (!etmq)
1201 		return NULL;
1202 
1203 	etmq->traceid_queues_list = intlist__new(NULL);
1204 	if (!etmq->traceid_queues_list)
1205 		goto out_free;
1206 
1207 	/*
1208 	 * Create an RB tree for traceID-metadata tuple.  Since the conversion
1209 	 * has to be made for each packet that gets decoded, optimizing access
1210 	 * in anything other than a sequential array is worth doing.
1211 	 */
1212 	etmq->traceid_list = etmq->own_traceid_list = intlist__new(NULL);
1213 	if (!etmq->traceid_list)
1214 		goto out_free;
1215 
1216 	return etmq;
1217 
1218 out_free:
1219 	intlist__delete(etmq->traceid_queues_list);
1220 	free(etmq);
1221 
1222 	return NULL;
1223 }
1224 
1225 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
1226 			       struct auxtrace_queue *queue,
1227 			       unsigned int queue_nr)
1228 {
1229 	struct cs_etm_queue *etmq = queue->priv;
1230 
1231 	if (etmq)
1232 		return 0;
1233 
1234 	etmq = cs_etm__alloc_queue();
1235 
1236 	if (!etmq)
1237 		return -ENOMEM;
1238 
1239 	queue->priv = etmq;
1240 	etmq->etm = etm;
1241 	etmq->queue_nr = queue_nr;
1242 	queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */
1243 	etmq->offset = 0;
1244 	etmq->sink_id = SINK_UNSET;
1245 
1246 	return 0;
1247 }
1248 
1249 static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
1250 					    struct cs_etm_queue *etmq,
1251 					    unsigned int queue_nr)
1252 {
1253 	int ret = 0;
1254 	unsigned int cs_queue_nr;
1255 	u8 trace_chan_id;
1256 	u64 cs_timestamp;
1257 
1258 	/*
1259 	 * We are under a CPU-wide trace scenario.  As such we need to know
1260 	 * when the code that generated the traces started to execute so that
1261 	 * it can be correlated with execution on other CPUs.  So we get a
1262 	 * handle on the beginning of traces and decode until we find a
1263 	 * timestamp.  The timestamp is then added to the auxtrace min heap
1264 	 * in order to know what nibble (of all the etmqs) to decode first.
1265 	 */
1266 	while (1) {
1267 		/*
1268 		 * Fetch an aux_buffer from this etmq.  Bail if no more
1269 		 * blocks or an error has been encountered.
1270 		 */
1271 		ret = cs_etm__get_data_block(etmq);
1272 		if (ret <= 0)
1273 			goto out;
1274 
1275 		/*
1276 		 * Run decoder on the trace block.  The decoder will stop when
1277 		 * encountering a CS timestamp, a full packet queue or the end of
1278 		 * trace for that block.
1279 		 */
1280 		ret = cs_etm__decode_data_block(etmq);
1281 		if (ret)
1282 			goto out;
1283 
1284 		/*
1285 		 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
1286 		 * the timestamp calculation for us.
1287 		 */
1288 		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
1289 
1290 		/* We found a timestamp, no need to continue. */
1291 		if (cs_timestamp)
1292 			break;
1293 
1294 		/*
1295 		 * We didn't find a timestamp so empty all the traceid packet
1296 		 * queues before looking for another timestamp packet, either
1297 		 * in the current data block or a new one.  Packets that were
1298 		 * just decoded are useless since no timestamp has been
1299 		 * associated with them.  As such simply discard them.
1300 		 */
1301 		cs_etm__clear_all_packet_queues(etmq);
1302 	}
1303 
1304 	/*
1305 	 * We have a timestamp.  Add it to the min heap to reflect when
1306 	 * instructions conveyed by the range packets of this traceID queue
1307 	 * started to execute.  Once the same has been done for all the traceID
1308 	 * queues of each etmq, redenring and decoding can start in
1309 	 * chronological order.
1310 	 *
1311 	 * Note that packets decoded above are still in the traceID's packet
1312 	 * queue and will be processed in cs_etm__process_timestamped_queues().
1313 	 */
1314 	cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
1315 	ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
1316 out:
1317 	return ret;
1318 }
1319 
1320 static inline
1321 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
1322 				 struct cs_etm_traceid_queue *tidq)
1323 {
1324 	struct branch_stack *bs_src = tidq->last_branch_rb;
1325 	struct branch_stack *bs_dst = tidq->last_branch;
1326 	size_t nr = 0;
1327 
1328 	/*
1329 	 * Set the number of records before early exit: ->nr is used to
1330 	 * determine how many branches to copy from ->entries.
1331 	 */
1332 	bs_dst->nr = bs_src->nr;
1333 
1334 	/*
1335 	 * Early exit when there is nothing to copy.
1336 	 */
1337 	if (!bs_src->nr)
1338 		return;
1339 
1340 	/*
1341 	 * As bs_src->entries is a circular buffer, we need to copy from it in
1342 	 * two steps.  First, copy the branches from the most recently inserted
1343 	 * branch ->last_branch_pos until the end of bs_src->entries buffer.
1344 	 */
1345 	nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
1346 	memcpy(&bs_dst->entries[0],
1347 	       &bs_src->entries[tidq->last_branch_pos],
1348 	       sizeof(struct branch_entry) * nr);
1349 
1350 	/*
1351 	 * If we wrapped around at least once, the branches from the beginning
1352 	 * of the bs_src->entries buffer and until the ->last_branch_pos element
1353 	 * are older valid branches: copy them over.  The total number of
1354 	 * branches copied over will be equal to the number of branches asked by
1355 	 * the user in last_branch_sz.
1356 	 */
1357 	if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
1358 		memcpy(&bs_dst->entries[nr],
1359 		       &bs_src->entries[0],
1360 		       sizeof(struct branch_entry) * tidq->last_branch_pos);
1361 	}
1362 }
1363 
1364 static inline
1365 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
1366 {
1367 	tidq->last_branch_pos = 0;
1368 	tidq->last_branch_rb->nr = 0;
1369 }
1370 
1371 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
1372 					 struct cs_etm_traceid_queue *tidq,
1373 					 struct cs_etm_packet *packet, u64 addr)
1374 {
1375 	u8 instrBytes[2];
1376 
1377 	cs_etm__frontend_mem_access(etmq, tidq, packet, addr,
1378 				    ARRAY_SIZE(instrBytes), instrBytes);
1379 	/*
1380 	 * T32 instruction size is indicated by bits[15:11] of the first
1381 	 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
1382 	 * denote a 32-bit instruction.
1383 	 */
1384 	return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
1385 }
1386 
1387 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
1388 {
1389 	/*
1390 	 * Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't
1391 	 * appear in samples.
1392 	 */
1393 	if (packet->sample_type == CS_ETM_DISCONTINUITY ||
1394 	    packet->sample_type == CS_ETM_EXCEPTION)
1395 		return 0;
1396 
1397 	return packet->start_addr;
1398 }
1399 
1400 static inline
1401 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
1402 {
1403 	/* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1404 	if (packet->sample_type == CS_ETM_DISCONTINUITY)
1405 		return 0;
1406 
1407 	return packet->end_addr - packet->last_instr_size;
1408 }
1409 
1410 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
1411 				     struct cs_etm_traceid_queue *tidq,
1412 				     struct cs_etm_packet *packet,
1413 				     u64 offset)
1414 {
1415 	if (packet->isa == CS_ETM_ISA_T32) {
1416 		u64 addr = packet->start_addr;
1417 
1418 		while (offset) {
1419 			addr += cs_etm__t32_instr_size(etmq, tidq, packet,
1420 						       addr);
1421 			offset--;
1422 		}
1423 		return addr;
1424 	}
1425 
1426 	/* Assume a 4 byte instruction size (A32/A64) */
1427 	return packet->start_addr + offset * 4;
1428 }
1429 
1430 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
1431 					  struct cs_etm_traceid_queue *tidq)
1432 {
1433 	struct branch_stack *bs = tidq->last_branch_rb;
1434 	struct branch_entry *be;
1435 
1436 	/*
1437 	 * The branches are recorded in a circular buffer in reverse
1438 	 * chronological order: we start recording from the last element of the
1439 	 * buffer down.  After writing the first element of the stack, move the
1440 	 * insert position back to the end of the buffer.
1441 	 */
1442 	if (!tidq->last_branch_pos)
1443 		tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
1444 
1445 	tidq->last_branch_pos -= 1;
1446 
1447 	be       = &bs->entries[tidq->last_branch_pos];
1448 	be->from = cs_etm__last_executed_instr(tidq->prev_packet);
1449 	be->to	 = cs_etm__first_executed_instr(tidq->packet);
1450 	/* No support for mispredict */
1451 	be->flags.mispred = 0;
1452 	be->flags.predicted = 1;
1453 
1454 	/*
1455 	 * Increment bs->nr until reaching the number of last branches asked by
1456 	 * the user on the command line.
1457 	 */
1458 	if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
1459 		bs->nr += 1;
1460 }
1461 
1462 static int cs_etm__inject_event(struct cs_etm_auxtrace *etm, union perf_event *event,
1463 			       struct perf_sample *sample, u64 type)
1464 {
1465 	struct evsel *evsel = sample->evsel;
1466 	u64 branch_sample_type = 0;
1467 	size_t sz;
1468 
1469 	if (!evsel && etm->session && etm->session->evlist)
1470 		evsel = evlist__id2evsel(etm->session->evlist, sample->id);
1471 
1472 	if (evsel)
1473 		branch_sample_type = evsel->core.attr.branch_sample_type;
1474 
1475 	sz = perf_event__sample_event_size(sample, type, /*read_format=*/0,
1476 					   branch_sample_type);
1477 	if (sz >= PERF_SAMPLE_MAX_SIZE) {
1478 		pr_err("Sample size %zu exceeds max size %d\n", sz, PERF_SAMPLE_MAX_SIZE);
1479 		return -EFAULT;
1480 	}
1481 	event->header.size = sz;
1482 
1483 	return perf_event__synthesize_sample(event, type, /*read_format=*/0,
1484 					     branch_sample_type, sample);
1485 }
1486 
1487 
1488 static int
1489 cs_etm__get_trace(struct cs_etm_queue *etmq)
1490 {
1491 	struct auxtrace_buffer *aux_buffer = etmq->buffer;
1492 	struct auxtrace_buffer *old_buffer = aux_buffer;
1493 	struct auxtrace_queue *queue;
1494 
1495 	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
1496 
1497 	aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
1498 
1499 	/* If no more data, drop the previous auxtrace_buffer and return */
1500 	if (!aux_buffer) {
1501 		if (old_buffer)
1502 			auxtrace_buffer__drop_data(old_buffer);
1503 		etmq->buf_len = 0;
1504 		return 0;
1505 	}
1506 
1507 	etmq->buffer = aux_buffer;
1508 
1509 	/* If the aux_buffer doesn't have data associated, try to load it */
1510 	if (!aux_buffer->data) {
1511 		/* get the file desc associated with the perf data file */
1512 		int fd = perf_data__fd(etmq->etm->session->data);
1513 
1514 		aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
1515 		if (!aux_buffer->data)
1516 			return -ENOMEM;
1517 	}
1518 
1519 	/* If valid, drop the previous buffer */
1520 	if (old_buffer)
1521 		auxtrace_buffer__drop_data(old_buffer);
1522 
1523 	etmq->buf_used = 0;
1524 	etmq->buf_len = aux_buffer->size;
1525 	etmq->buf = aux_buffer->data;
1526 
1527 	return etmq->buf_len;
1528 }
1529 
1530 /*
1531  * Convert a raw thread number to a thread struct and assign it to **thread.
1532  */
1533 static int cs_etm__etmq_update_thread(struct cs_etm_queue *etmq,
1534 				      ocsd_ex_level el, pid_t tid,
1535 				      struct thread **thread)
1536 {
1537 	struct machine *machine = cs_etm__get_machine(etmq, el);
1538 
1539 	if (!machine || !*thread)
1540 		return -EINVAL;
1541 
1542 	if (tid != -1) {
1543 		thread__zput(*thread);
1544 		*thread = machine__find_thread(machine, -1, tid);
1545 	}
1546 
1547 	/* Couldn't find a known thread */
1548 	if (!*thread)
1549 		*thread = machine__idle_thread(machine);
1550 
1551 	return 0;
1552 }
1553 
1554 /*
1555  * Set the thread and EL of the decode context which is ahead in time of the
1556  * frontend context.
1557  */
1558 int cs_etm__etmq_update_decode_context(struct cs_etm_queue *etmq,
1559 				       u8 trace_chan_id,
1560 				       ocsd_ex_level el, pid_t tid)
1561 {
1562 	struct cs_etm_traceid_queue *tidq;
1563 	int ret;
1564 
1565 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1566 	if (!tidq)
1567 		return -EINVAL;
1568 
1569 	ret = cs_etm__etmq_update_thread(etmq, el, tid,
1570 					 &tidq->decode_thread);
1571 	if (ret)
1572 		return ret;
1573 
1574 	tidq->decode_el = el;
1575 	return 0;
1576 }
1577 
1578 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
1579 {
1580 	return !!etmq->etm->timeless_decoding;
1581 }
1582 
1583 static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
1584 			      struct cs_etm_traceid_queue *tidq,
1585 			      struct cs_etm_packet *packet,
1586 			      struct perf_sample *sample)
1587 {
1588 	/*
1589 	 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY
1590 	 * packet, so directly bail out with 'insn_len' = 0.
1591 	 */
1592 	if (packet->sample_type == CS_ETM_DISCONTINUITY) {
1593 		sample->insn_len = 0;
1594 		return;
1595 	}
1596 
1597 	/*
1598 	 * T32 instruction size might be 32-bit or 16-bit, decide by calling
1599 	 * cs_etm__t32_instr_size().
1600 	 */
1601 	if (packet->isa == CS_ETM_ISA_T32)
1602 		sample->insn_len = cs_etm__t32_instr_size(etmq, tidq, packet,
1603 							  sample->ip);
1604 	/* Otherwise, A64 and A32 instruction size are always 32-bit. */
1605 	else
1606 		sample->insn_len = 4;
1607 
1608 	cs_etm__frontend_mem_access(etmq, tidq, packet, sample->ip,
1609 				    sample->insn_len, (void *)sample->insn);
1610 }
1611 
1612 u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
1613 {
1614 	struct cs_etm_auxtrace *etm = etmq->etm;
1615 
1616 	if (etm->has_virtual_ts)
1617 		return tsc_to_perf_time(cs_timestamp, &etm->tc);
1618 	else
1619 		return cs_timestamp;
1620 }
1621 
1622 static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
1623 					       struct cs_etm_traceid_queue *tidq)
1624 {
1625 	struct cs_etm_auxtrace *etm = etmq->etm;
1626 	struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
1627 
1628 	if (!etm->timeless_decoding && etm->has_virtual_ts)
1629 		return packet_queue->cs_timestamp;
1630 	else
1631 		return etm->latest_kernel_timestamp;
1632 }
1633 
1634 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
1635 					    struct cs_etm_traceid_queue *tidq,
1636 					    struct cs_etm_packet *packet,
1637 					    u64 addr, u64 period)
1638 {
1639 	int ret = 0;
1640 	struct cs_etm_auxtrace *etm = etmq->etm;
1641 	union perf_event *event = tidq->event_buf;
1642 	struct perf_sample sample;
1643 
1644 	perf_sample__init(&sample, /*all=*/true);
1645 	event->sample.header.type = PERF_RECORD_SAMPLE;
1646 	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, packet->el);
1647 	event->sample.header.size = sizeof(struct perf_event_header);
1648 
1649 	/* Set time field based on etm auxtrace config. */
1650 	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1651 
1652 	sample.ip = addr;
1653 	sample.pid = thread__pid(tidq->frontend_thread);
1654 	sample.tid = thread__tid(tidq->frontend_thread);
1655 	sample.id = etmq->etm->instructions_id;
1656 	sample.stream_id = etmq->etm->instructions_id;
1657 	sample.period = period;
1658 	sample.cpu = packet->cpu;
1659 	sample.flags = tidq->prev_packet->flags;
1660 	sample.cpumode = event->sample.header.misc;
1661 
1662 	cs_etm__copy_insn(etmq, tidq, packet, &sample);
1663 
1664 	if (etm->synth_opts.last_branch)
1665 		sample.branch_stack = tidq->last_branch;
1666 
1667 	if (etm->synth_opts.inject) {
1668 		ret = cs_etm__inject_event(etm, event, &sample,
1669 					   etm->instructions_sample_type);
1670 		if (ret)
1671 			return ret;
1672 	}
1673 
1674 	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1675 
1676 	if (ret)
1677 		pr_err(
1678 			"CS ETM Trace: failed to deliver instruction event, error %d\n",
1679 			ret);
1680 
1681 	perf_sample__exit(&sample);
1682 	return ret;
1683 }
1684 
1685 /*
1686  * The cs etm packet encodes an instruction range between a branch target
1687  * and the next taken branch. Generate sample accordingly.
1688  */
1689 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
1690 				       struct cs_etm_traceid_queue *tidq)
1691 {
1692 	int ret = 0;
1693 	struct cs_etm_auxtrace *etm = etmq->etm;
1694 	struct perf_sample sample = {.ip = 0,};
1695 	union perf_event *event = tidq->event_buf;
1696 	struct dummy_branch_stack {
1697 		u64			nr;
1698 		u64			hw_idx;
1699 		struct branch_entry	entries;
1700 	} dummy_bs;
1701 	u64 ip;
1702 
1703 	ip = cs_etm__last_executed_instr(tidq->prev_packet);
1704 
1705 	event->sample.header.type = PERF_RECORD_SAMPLE;
1706 	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip,
1707 						     tidq->prev_packet->el);
1708 	event->sample.header.size = sizeof(struct perf_event_header);
1709 
1710 	/* Set time field based on etm auxtrace config. */
1711 	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1712 
1713 	sample.ip = ip;
1714 	sample.pid = thread__pid(tidq->frontend_thread);
1715 	sample.tid = thread__tid(tidq->frontend_thread);
1716 	sample.addr = cs_etm__first_executed_instr(tidq->packet);
1717 	sample.id = etmq->etm->branches_id;
1718 	sample.stream_id = etmq->etm->branches_id;
1719 	sample.period = 1;
1720 	sample.cpu = tidq->packet->cpu;
1721 	sample.flags = tidq->prev_packet->flags;
1722 	sample.cpumode = event->sample.header.misc;
1723 
1724 	cs_etm__copy_insn(etmq, tidq, tidq->prev_packet, &sample);
1725 
1726 	/*
1727 	 * perf report cannot handle events without a branch stack
1728 	 */
1729 	if (etm->synth_opts.last_branch) {
1730 		dummy_bs = (struct dummy_branch_stack){
1731 			.nr = 1,
1732 			.hw_idx = -1ULL,
1733 			.entries = {
1734 				.from = sample.ip,
1735 				.to = sample.addr,
1736 			},
1737 		};
1738 		sample.branch_stack = (struct branch_stack *)&dummy_bs;
1739 	}
1740 
1741 	if (etm->synth_opts.inject) {
1742 		ret = cs_etm__inject_event(etm, event, &sample,
1743 					   etm->branches_sample_type);
1744 		if (ret)
1745 			return ret;
1746 	}
1747 
1748 	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1749 
1750 	if (ret)
1751 		pr_err(
1752 		"CS ETM Trace: failed to deliver instruction event, error %d\n",
1753 		ret);
1754 
1755 	return ret;
1756 }
1757 
1758 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
1759 				struct perf_session *session)
1760 {
1761 	struct evlist *evlist = session->evlist;
1762 	struct evsel *evsel;
1763 	struct perf_event_attr attr;
1764 	bool found = false;
1765 	u64 id;
1766 	int err;
1767 
1768 	evlist__for_each_entry(evlist, evsel) {
1769 		if (evsel->core.attr.type == etm->pmu_type) {
1770 			found = true;
1771 			break;
1772 		}
1773 	}
1774 
1775 	if (!found) {
1776 		pr_debug("No selected events with CoreSight Trace data\n");
1777 		return 0;
1778 	}
1779 
1780 	memset(&attr, 0, sizeof(struct perf_event_attr));
1781 	attr.size = sizeof(struct perf_event_attr);
1782 	attr.type = PERF_TYPE_HARDWARE;
1783 	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
1784 	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1785 			    PERF_SAMPLE_PERIOD;
1786 	if (etm->timeless_decoding)
1787 		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1788 	else
1789 		attr.sample_type |= PERF_SAMPLE_TIME;
1790 
1791 	attr.exclude_user = evsel->core.attr.exclude_user;
1792 	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1793 	attr.exclude_hv = evsel->core.attr.exclude_hv;
1794 	attr.exclude_host = evsel->core.attr.exclude_host;
1795 	attr.exclude_guest = evsel->core.attr.exclude_guest;
1796 	attr.sample_id_all = evsel->core.attr.sample_id_all;
1797 	attr.read_format = evsel->core.attr.read_format;
1798 
1799 	/* create new id val to be a fixed offset from evsel id */
1800 	id = auxtrace_synth_id_range_start(evsel);
1801 
1802 	if (etm->synth_opts.branches) {
1803 		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1804 		attr.sample_period = 1;
1805 		attr.sample_type |= PERF_SAMPLE_ADDR;
1806 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1807 		if (err)
1808 			return err;
1809 		etm->branches_sample_type = attr.sample_type;
1810 		etm->branches_id = id;
1811 		id += 1;
1812 		attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
1813 	}
1814 
1815 	if (etm->synth_opts.last_branch) {
1816 		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1817 		/*
1818 		 * We don't use the hardware index, but the sample generation
1819 		 * code uses the new format branch_stack with this field,
1820 		 * so the event attributes must indicate that it's present.
1821 		 */
1822 		attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
1823 	}
1824 
1825 	if (etm->synth_opts.instructions) {
1826 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1827 		attr.sample_period = etm->synth_opts.period;
1828 		etm->instructions_sample_period = attr.sample_period;
1829 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1830 		if (err)
1831 			return err;
1832 		etm->instructions_sample_type = attr.sample_type;
1833 		etm->instructions_id = id;
1834 		id += 1;
1835 	}
1836 
1837 	return 0;
1838 }
1839 
1840 static int cs_etm__sample(struct cs_etm_queue *etmq,
1841 			  struct cs_etm_traceid_queue *tidq)
1842 {
1843 	struct cs_etm_auxtrace *etm = etmq->etm;
1844 	int ret;
1845 	u64 instrs_prev;
1846 
1847 	/* Get instructions remainder from previous packet */
1848 	instrs_prev = tidq->period_instructions;
1849 
1850 	tidq->period_instructions += tidq->packet->instr_count;
1851 
1852 	/*
1853 	 * Record a branch when the last instruction in
1854 	 * PREV_PACKET is a branch.
1855 	 */
1856 	if (etm->synth_opts.last_branch &&
1857 	    tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1858 	    tidq->prev_packet->last_instr_taken_branch)
1859 		cs_etm__update_last_branch_rb(etmq, tidq);
1860 
1861 	if (etm->synth_opts.instructions &&
1862 	    tidq->period_instructions >= etm->instructions_sample_period) {
1863 		/*
1864 		 * Emit instruction sample periodically
1865 		 * TODO: allow period to be defined in cycles and clock time
1866 		 */
1867 
1868 		/*
1869 		 * Below diagram demonstrates the instruction samples
1870 		 * generation flows:
1871 		 *
1872 		 *    Instrs     Instrs       Instrs       Instrs
1873 		 *   Sample(n)  Sample(n+1)  Sample(n+2)  Sample(n+3)
1874 		 *    |            |            |            |
1875 		 *    V            V            V            V
1876 		 *   --------------------------------------------------
1877 		 *            ^                                  ^
1878 		 *            |                                  |
1879 		 *         Period                             Period
1880 		 *    instructions(Pi)                   instructions(Pi')
1881 		 *
1882 		 *            |                                  |
1883 		 *            \---------------- -----------------/
1884 		 *                             V
1885 		 *                 tidq->packet->instr_count
1886 		 *
1887 		 * Instrs Sample(n...) are the synthesised samples occurring
1888 		 * every etm->instructions_sample_period instructions - as
1889 		 * defined on the perf command line.  Sample(n) is being the
1890 		 * last sample before the current etm packet, n+1 to n+3
1891 		 * samples are generated from the current etm packet.
1892 		 *
1893 		 * tidq->packet->instr_count represents the number of
1894 		 * instructions in the current etm packet.
1895 		 *
1896 		 * Period instructions (Pi) contains the number of
1897 		 * instructions executed after the sample point(n) from the
1898 		 * previous etm packet.  This will always be less than
1899 		 * etm->instructions_sample_period.
1900 		 *
1901 		 * When generate new samples, it combines with two parts
1902 		 * instructions, one is the tail of the old packet and another
1903 		 * is the head of the new coming packet, to generate
1904 		 * sample(n+1); sample(n+2) and sample(n+3) consume the
1905 		 * instructions with sample period.  After sample(n+3), the rest
1906 		 * instructions will be used by later packet and it is assigned
1907 		 * to tidq->period_instructions for next round calculation.
1908 		 */
1909 
1910 		/*
1911 		 * Get the initial offset into the current packet instructions;
1912 		 * entry conditions ensure that instrs_prev is less than
1913 		 * etm->instructions_sample_period.
1914 		 */
1915 		u64 offset = etm->instructions_sample_period - instrs_prev;
1916 		u64 addr;
1917 
1918 		/* Prepare last branches for instruction sample */
1919 		if (etm->synth_opts.last_branch)
1920 			cs_etm__copy_last_branch_rb(etmq, tidq);
1921 
1922 		while (tidq->period_instructions >=
1923 				etm->instructions_sample_period) {
1924 			/*
1925 			 * Calculate the address of the sampled instruction (-1
1926 			 * as sample is reported as though instruction has just
1927 			 * been executed, but PC has not advanced to next
1928 			 * instruction)
1929 			 */
1930 			addr = cs_etm__instr_addr(etmq, tidq, tidq->packet,
1931 						  offset - 1);
1932 			ret = cs_etm__synth_instruction_sample(
1933 				etmq, tidq, tidq->packet, addr,
1934 				etm->instructions_sample_period);
1935 			if (ret)
1936 				return ret;
1937 
1938 			offset += etm->instructions_sample_period;
1939 			tidq->period_instructions -=
1940 				etm->instructions_sample_period;
1941 		}
1942 	}
1943 
1944 	if (etm->synth_opts.branches) {
1945 		bool generate_sample = false;
1946 
1947 		/* Generate sample for tracing on packet */
1948 		if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1949 			generate_sample = true;
1950 
1951 		/* Generate sample for branch taken packet */
1952 		if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1953 		    tidq->prev_packet->last_instr_taken_branch)
1954 			generate_sample = true;
1955 
1956 		if (generate_sample) {
1957 			ret = cs_etm__synth_branch_sample(etmq, tidq);
1958 			if (ret)
1959 				return ret;
1960 		}
1961 	}
1962 
1963 	cs_etm__packet_swap(etm, tidq);
1964 
1965 	return 0;
1966 }
1967 
1968 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
1969 {
1970 	/*
1971 	 * When the exception packet is inserted, whether the last instruction
1972 	 * in previous range packet is taken branch or not, we need to force
1973 	 * to set 'prev_packet->last_instr_taken_branch' to true.  This ensures
1974 	 * to generate branch sample for the instruction range before the
1975 	 * exception is trapped to kernel or before the exception returning.
1976 	 *
1977 	 * The exception packet includes the dummy address values, so don't
1978 	 * swap PACKET with PREV_PACKET.  This keeps PREV_PACKET to be useful
1979 	 * for generating instruction and branch samples.
1980 	 */
1981 	if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
1982 		tidq->prev_packet->last_instr_taken_branch = true;
1983 
1984 	return 0;
1985 }
1986 
1987 static int cs_etm__flush(struct cs_etm_queue *etmq,
1988 			 struct cs_etm_traceid_queue *tidq)
1989 {
1990 	int err = 0;
1991 	struct cs_etm_auxtrace *etm = etmq->etm;
1992 
1993 	/* Handle start tracing packet */
1994 	if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
1995 		goto swap_packet;
1996 
1997 	if (etmq->etm->synth_opts.last_branch &&
1998 	    etmq->etm->synth_opts.instructions &&
1999 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
2000 		u64 addr;
2001 
2002 		/* Prepare last branches for instruction sample */
2003 		cs_etm__copy_last_branch_rb(etmq, tidq);
2004 
2005 		/*
2006 		 * Generate a last branch event for the branches left in the
2007 		 * circular buffer at the end of the trace.
2008 		 *
2009 		 * Use the address of the end of the last reported execution
2010 		 * range
2011 		 */
2012 		addr = cs_etm__last_executed_instr(tidq->prev_packet);
2013 
2014 		err = cs_etm__synth_instruction_sample(
2015 			etmq, tidq, tidq->prev_packet, addr,
2016 			tidq->period_instructions);
2017 		if (err)
2018 			return err;
2019 
2020 		tidq->period_instructions = 0;
2021 
2022 	}
2023 
2024 	if (etm->synth_opts.branches &&
2025 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
2026 		err = cs_etm__synth_branch_sample(etmq, tidq);
2027 		if (err)
2028 			return err;
2029 	}
2030 
2031 swap_packet:
2032 	cs_etm__packet_swap(etm, tidq);
2033 
2034 	/* Reset last branches after flush the trace */
2035 	if (etm->synth_opts.last_branch)
2036 		cs_etm__reset_last_branch_rb(tidq);
2037 
2038 	return err;
2039 }
2040 
2041 static int cs_etm__end_block(struct cs_etm_queue *etmq,
2042 			     struct cs_etm_traceid_queue *tidq)
2043 {
2044 	int err;
2045 
2046 	/*
2047 	 * It has no new packet coming and 'etmq->packet' contains the stale
2048 	 * packet which was set at the previous time with packets swapping;
2049 	 * so skip to generate branch sample to avoid stale packet.
2050 	 *
2051 	 * For this case only flush branch stack and generate a last branch
2052 	 * event for the branches left in the circular buffer at the end of
2053 	 * the trace.
2054 	 */
2055 	if (etmq->etm->synth_opts.last_branch &&
2056 	    etmq->etm->synth_opts.instructions &&
2057 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
2058 		u64 addr;
2059 
2060 		/* Prepare last branches for instruction sample */
2061 		cs_etm__copy_last_branch_rb(etmq, tidq);
2062 
2063 		/*
2064 		 * Use the address of the end of the last reported execution
2065 		 * range.
2066 		 */
2067 		addr = cs_etm__last_executed_instr(tidq->prev_packet);
2068 
2069 		err = cs_etm__synth_instruction_sample(
2070 			etmq, tidq, tidq->prev_packet, addr,
2071 			tidq->period_instructions);
2072 		if (err)
2073 			return err;
2074 
2075 		tidq->period_instructions = 0;
2076 	}
2077 
2078 	return 0;
2079 }
2080 /*
2081  * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
2082  *			   if need be.
2083  * Returns:	< 0	if error
2084  *		= 0	if no more auxtrace_buffer to read
2085  *		> 0	if the current buffer isn't empty yet
2086  */
2087 static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
2088 {
2089 	int ret;
2090 
2091 	if (!etmq->buf_len) {
2092 		ret = cs_etm__get_trace(etmq);
2093 		if (ret <= 0)
2094 			return ret;
2095 		/*
2096 		 * We cannot assume consecutive blocks in the data file
2097 		 * are contiguous, reset the decoder to force re-sync.
2098 		 */
2099 		ret = cs_etm_decoder__reset(etmq->decoder);
2100 		if (ret)
2101 			return ret;
2102 	}
2103 
2104 	return etmq->buf_len;
2105 }
2106 
2107 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq,
2108 				 struct cs_etm_traceid_queue *tidq,
2109 				 struct cs_etm_packet *packet, u64 end_addr)
2110 {
2111 	/* Initialise to keep compiler happy */
2112 	u16 instr16 = 0;
2113 	u32 instr32 = 0;
2114 	u64 addr;
2115 
2116 	switch (packet->isa) {
2117 	case CS_ETM_ISA_T32:
2118 		/*
2119 		 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
2120 		 *
2121 		 *  b'15         b'8
2122 		 * +-----------------+--------+
2123 		 * | 1 1 0 1 1 1 1 1 |  imm8  |
2124 		 * +-----------------+--------+
2125 		 *
2126 		 * According to the specification, it only defines SVC for T32
2127 		 * with 16 bits instruction and has no definition for 32bits;
2128 		 * so below only read 2 bytes as instruction size for T32.
2129 		 */
2130 		addr = end_addr - 2;
2131 		cs_etm__frontend_mem_access(etmq, tidq, packet, addr,
2132 					    sizeof(instr16), (u8 *)&instr16);
2133 		if ((instr16 & 0xFF00) == 0xDF00)
2134 			return true;
2135 
2136 		break;
2137 	case CS_ETM_ISA_A32:
2138 		/*
2139 		 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
2140 		 *
2141 		 *  b'31 b'28 b'27 b'24
2142 		 * +---------+---------+-------------------------+
2143 		 * |  !1111  | 1 1 1 1 |        imm24            |
2144 		 * +---------+---------+-------------------------+
2145 		 */
2146 		addr = end_addr - 4;
2147 		cs_etm__frontend_mem_access(etmq, tidq, packet, addr,
2148 					    sizeof(instr32), (u8 *)&instr32);
2149 		if ((instr32 & 0x0F000000) == 0x0F000000 &&
2150 		    (instr32 & 0xF0000000) != 0xF0000000)
2151 			return true;
2152 
2153 		break;
2154 	case CS_ETM_ISA_A64:
2155 		/*
2156 		 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
2157 		 *
2158 		 *  b'31               b'21           b'4     b'0
2159 		 * +-----------------------+---------+-----------+
2160 		 * | 1 1 0 1 0 1 0 0 0 0 0 |  imm16  | 0 0 0 0 1 |
2161 		 * +-----------------------+---------+-----------+
2162 		 */
2163 		addr = end_addr - 4;
2164 		cs_etm__frontend_mem_access(etmq, tidq, packet, addr,
2165 					    sizeof(instr32), (u8 *)&instr32);
2166 		if ((instr32 & 0xFFE0001F) == 0xd4000001)
2167 			return true;
2168 
2169 		break;
2170 	case CS_ETM_ISA_UNKNOWN:
2171 	default:
2172 		break;
2173 	}
2174 
2175 	return false;
2176 }
2177 
2178 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
2179 			       struct cs_etm_traceid_queue *tidq, u64 magic)
2180 {
2181 	struct cs_etm_packet *packet = tidq->packet;
2182 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2183 
2184 	if (magic == __perf_cs_etmv3_magic)
2185 		if (packet->exception_number == CS_ETMV3_EXC_SVC)
2186 			return true;
2187 
2188 	/*
2189 	 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
2190 	 * HVC cases; need to check if it's SVC instruction based on
2191 	 * packet address.
2192 	 */
2193 	if (magic == __perf_cs_etmv4_magic) {
2194 		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2195 		    cs_etm__is_svc_instr(etmq, tidq, prev_packet,
2196 					 prev_packet->end_addr))
2197 			return true;
2198 	}
2199 
2200 	return false;
2201 }
2202 
2203 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
2204 				       u64 magic)
2205 {
2206 	struct cs_etm_packet *packet = tidq->packet;
2207 
2208 	if (magic == __perf_cs_etmv3_magic)
2209 		if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
2210 		    packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
2211 		    packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
2212 		    packet->exception_number == CS_ETMV3_EXC_IRQ ||
2213 		    packet->exception_number == CS_ETMV3_EXC_FIQ)
2214 			return true;
2215 
2216 	if (magic == __perf_cs_etmv4_magic)
2217 		if (packet->exception_number == CS_ETMV4_EXC_RESET ||
2218 		    packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
2219 		    packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
2220 		    packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
2221 		    packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
2222 		    packet->exception_number == CS_ETMV4_EXC_IRQ ||
2223 		    packet->exception_number == CS_ETMV4_EXC_FIQ)
2224 			return true;
2225 
2226 	return false;
2227 }
2228 
2229 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
2230 				      struct cs_etm_traceid_queue *tidq,
2231 				      u64 magic)
2232 {
2233 	struct cs_etm_packet *packet = tidq->packet;
2234 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2235 
2236 	if (magic == __perf_cs_etmv3_magic)
2237 		if (packet->exception_number == CS_ETMV3_EXC_SMC ||
2238 		    packet->exception_number == CS_ETMV3_EXC_HYP ||
2239 		    packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
2240 		    packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
2241 		    packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
2242 		    packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
2243 		    packet->exception_number == CS_ETMV3_EXC_GENERIC)
2244 			return true;
2245 
2246 	if (magic == __perf_cs_etmv4_magic) {
2247 		if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
2248 		    packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
2249 		    packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
2250 		    packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
2251 			return true;
2252 
2253 		/*
2254 		 * For CS_ETMV4_EXC_CALL, except SVC other instructions
2255 		 * (SMC, HVC) are taken as sync exceptions.
2256 		 */
2257 		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2258 		    !cs_etm__is_svc_instr(etmq, tidq, prev_packet,
2259 					  prev_packet->end_addr))
2260 			return true;
2261 
2262 		/*
2263 		 * ETMv4 has 5 bits for exception number; if the numbers
2264 		 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
2265 		 * they are implementation defined exceptions.
2266 		 *
2267 		 * For this case, simply take it as sync exception.
2268 		 */
2269 		if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
2270 		    packet->exception_number <= CS_ETMV4_EXC_END)
2271 			return true;
2272 	}
2273 
2274 	return false;
2275 }
2276 
2277 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
2278 				    struct cs_etm_traceid_queue *tidq)
2279 {
2280 	struct cs_etm_packet *packet = tidq->packet;
2281 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2282 	u64 magic;
2283 	int ret;
2284 
2285 	switch (packet->sample_type) {
2286 	case CS_ETM_RANGE:
2287 		/*
2288 		 * Immediate branch instruction without neither link nor
2289 		 * return flag, it's normal branch instruction within
2290 		 * the function.
2291 		 */
2292 		if (packet->last_instr_type == OCSD_INSTR_BR &&
2293 		    packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
2294 			packet->flags = PERF_IP_FLAG_BRANCH;
2295 
2296 			if (packet->last_instr_cond)
2297 				packet->flags |= PERF_IP_FLAG_CONDITIONAL;
2298 		}
2299 
2300 		/*
2301 		 * Immediate branch instruction with link (e.g. BL), this is
2302 		 * branch instruction for function call.
2303 		 */
2304 		if (packet->last_instr_type == OCSD_INSTR_BR &&
2305 		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2306 			packet->flags = PERF_IP_FLAG_BRANCH |
2307 					PERF_IP_FLAG_CALL;
2308 
2309 		/*
2310 		 * Indirect branch instruction with link (e.g. BLR), this is
2311 		 * branch instruction for function call.
2312 		 */
2313 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2314 		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2315 			packet->flags = PERF_IP_FLAG_BRANCH |
2316 					PERF_IP_FLAG_CALL;
2317 
2318 		/*
2319 		 * Indirect branch instruction with subtype of
2320 		 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
2321 		 * function return for A32/T32.
2322 		 */
2323 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2324 		    packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
2325 			packet->flags = PERF_IP_FLAG_BRANCH |
2326 					PERF_IP_FLAG_RETURN;
2327 
2328 		/*
2329 		 * Indirect branch instruction without link (e.g. BR), usually
2330 		 * this is used for function return, especially for functions
2331 		 * within dynamic link lib.
2332 		 */
2333 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2334 		    packet->last_instr_subtype == OCSD_S_INSTR_NONE)
2335 			packet->flags = PERF_IP_FLAG_BRANCH |
2336 					PERF_IP_FLAG_RETURN;
2337 
2338 		/* Return instruction for function return. */
2339 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2340 		    packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
2341 			packet->flags = PERF_IP_FLAG_BRANCH |
2342 					PERF_IP_FLAG_RETURN;
2343 
2344 		/*
2345 		 * Decoder might insert a discontinuity in the middle of
2346 		 * instruction packets, fixup prev_packet with flag
2347 		 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
2348 		 */
2349 		if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
2350 			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2351 					      PERF_IP_FLAG_TRACE_BEGIN;
2352 
2353 		/*
2354 		 * If the previous packet is an exception return packet
2355 		 * and the return address just follows SVC instruction,
2356 		 * it needs to calibrate the previous packet sample flags
2357 		 * as PERF_IP_FLAG_SYSCALLRET.
2358 		 */
2359 		if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
2360 					   PERF_IP_FLAG_RETURN |
2361 					   PERF_IP_FLAG_INTERRUPT) &&
2362 		    cs_etm__is_svc_instr(etmq, tidq, packet, packet->start_addr)) {
2363 			prev_packet->flags = PERF_IP_FLAG_BRANCH |
2364 					     PERF_IP_FLAG_RETURN |
2365 					     PERF_IP_FLAG_SYSCALLRET;
2366 		}
2367 		break;
2368 	case CS_ETM_DISCONTINUITY:
2369 		/*
2370 		 * The trace is discontinuous, if the previous packet is
2371 		 * instruction packet, set flag PERF_IP_FLAG_TRACE_END
2372 		 * for previous packet.
2373 		 */
2374 		if (prev_packet->sample_type == CS_ETM_RANGE)
2375 			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2376 					      PERF_IP_FLAG_TRACE_END;
2377 		break;
2378 	case CS_ETM_EXCEPTION:
2379 		ret = cs_etm__get_magic(etmq, packet->trace_chan_id, &magic);
2380 		if (ret)
2381 			return ret;
2382 
2383 		/* The exception is for system call. */
2384 		if (cs_etm__is_syscall(etmq, tidq, magic))
2385 			packet->flags = PERF_IP_FLAG_BRANCH |
2386 					PERF_IP_FLAG_CALL |
2387 					PERF_IP_FLAG_SYSCALLRET;
2388 		/*
2389 		 * The exceptions are triggered by external signals from bus,
2390 		 * interrupt controller, debug module, PE reset or halt.
2391 		 */
2392 		else if (cs_etm__is_async_exception(tidq, magic))
2393 			packet->flags = PERF_IP_FLAG_BRANCH |
2394 					PERF_IP_FLAG_CALL |
2395 					PERF_IP_FLAG_ASYNC |
2396 					PERF_IP_FLAG_INTERRUPT;
2397 		/*
2398 		 * Otherwise, exception is caused by trap, instruction &
2399 		 * data fault, or alignment errors.
2400 		 */
2401 		else if (cs_etm__is_sync_exception(etmq, tidq, magic))
2402 			packet->flags = PERF_IP_FLAG_BRANCH |
2403 					PERF_IP_FLAG_CALL |
2404 					PERF_IP_FLAG_INTERRUPT;
2405 
2406 		/*
2407 		 * When the exception packet is inserted, since exception
2408 		 * packet is not used standalone for generating samples
2409 		 * and it's affiliation to the previous instruction range
2410 		 * packet; so set previous range packet flags to tell perf
2411 		 * it is an exception taken branch.
2412 		 */
2413 		if (prev_packet->sample_type == CS_ETM_RANGE)
2414 			prev_packet->flags = packet->flags;
2415 		break;
2416 	case CS_ETM_EXCEPTION_RET:
2417 		/*
2418 		 * When the exception return packet is inserted, since
2419 		 * exception return packet is not used standalone for
2420 		 * generating samples and it's affiliation to the previous
2421 		 * instruction range packet; so set previous range packet
2422 		 * flags to tell perf it is an exception return branch.
2423 		 *
2424 		 * The exception return can be for either system call or
2425 		 * other exception types; unfortunately the packet doesn't
2426 		 * contain exception type related info so we cannot decide
2427 		 * the exception type purely based on exception return packet.
2428 		 * If we record the exception number from exception packet and
2429 		 * reuse it for exception return packet, this is not reliable
2430 		 * due the trace can be discontinuity or the interrupt can
2431 		 * be nested, thus the recorded exception number cannot be
2432 		 * used for exception return packet for these two cases.
2433 		 *
2434 		 * For exception return packet, we only need to distinguish the
2435 		 * packet is for system call or for other types.  Thus the
2436 		 * decision can be deferred when receive the next packet which
2437 		 * contains the return address, based on the return address we
2438 		 * can read out the previous instruction and check if it's a
2439 		 * system call instruction and then calibrate the sample flag
2440 		 * as needed.
2441 		 */
2442 		if (prev_packet->sample_type == CS_ETM_RANGE)
2443 			prev_packet->flags = PERF_IP_FLAG_BRANCH |
2444 					     PERF_IP_FLAG_RETURN |
2445 					     PERF_IP_FLAG_INTERRUPT;
2446 		break;
2447 	case CS_ETM_CONTEXT:
2448 	case CS_ETM_EMPTY:
2449 	default:
2450 		break;
2451 	}
2452 
2453 	return 0;
2454 }
2455 
2456 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
2457 {
2458 	int ret = 0;
2459 	size_t processed = 0;
2460 
2461 	/*
2462 	 * Packets are decoded and added to the decoder's packet queue
2463 	 * until the decoder packet processing callback has requested that
2464 	 * processing stops or there is nothing left in the buffer.  Normal
2465 	 * operations that stop processing are a timestamp packet or a full
2466 	 * decoder buffer queue.
2467 	 */
2468 	ret = cs_etm_decoder__process_data_block(etmq->decoder,
2469 						 etmq->offset,
2470 						 &etmq->buf[etmq->buf_used],
2471 						 etmq->buf_len,
2472 						 &processed);
2473 	if (ret)
2474 		goto out;
2475 
2476 	etmq->offset += processed;
2477 	etmq->buf_used += processed;
2478 	etmq->buf_len -= processed;
2479 
2480 out:
2481 	return ret;
2482 }
2483 
2484 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
2485 					 struct cs_etm_traceid_queue *tidq)
2486 {
2487 	int ret;
2488 	struct cs_etm_packet_queue *packet_queue;
2489 
2490 	packet_queue = &tidq->packet_queue;
2491 
2492 	/* Process each packet in this chunk */
2493 	while (1) {
2494 		ret = cs_etm_decoder__get_packet(packet_queue,
2495 						 tidq->packet);
2496 		if (ret <= 0)
2497 			/*
2498 			 * Stop processing this chunk on
2499 			 * end of data or error
2500 			 */
2501 			break;
2502 
2503 		/*
2504 		 * Since packet addresses are swapped in packet
2505 		 * handling within below switch() statements,
2506 		 * thus setting sample flags must be called
2507 		 * prior to switch() statement to use address
2508 		 * information before packets swapping.
2509 		 */
2510 		ret = cs_etm__set_sample_flags(etmq, tidq);
2511 		if (ret < 0)
2512 			break;
2513 
2514 		switch (tidq->packet->sample_type) {
2515 		case CS_ETM_RANGE:
2516 			/*
2517 			 * If the packet contains an instruction
2518 			 * range, generate instruction sequence
2519 			 * events.
2520 			 */
2521 			cs_etm__sample(etmq, tidq);
2522 			break;
2523 		case CS_ETM_CONTEXT:
2524 			/*
2525 			 * Update context but don't swap packet. Keep the
2526 			 * previous one for branch source address info, if
2527 			 * tracing the kernel the context packet will be emitted
2528 			 * between two ranges.
2529 			 */
2530 			ret = cs_etm__etmq_update_thread(etmq, tidq->packet->el,
2531 							 tidq->packet->tid,
2532 							 &tidq->frontend_thread);
2533 			if (ret)
2534 				goto out;
2535 			break;
2536 		case CS_ETM_EXCEPTION:
2537 		case CS_ETM_EXCEPTION_RET:
2538 			/*
2539 			 * If the exception packet is coming,
2540 			 * make sure the previous instruction
2541 			 * range packet to be handled properly.
2542 			 */
2543 			cs_etm__exception(tidq);
2544 			break;
2545 		case CS_ETM_DISCONTINUITY:
2546 			/*
2547 			 * Discontinuity in trace, flush
2548 			 * previous branch stack
2549 			 */
2550 			cs_etm__flush(etmq, tidq);
2551 			break;
2552 		case CS_ETM_EMPTY:
2553 			/*
2554 			 * Should not receive empty packet,
2555 			 * report error.
2556 			 */
2557 			pr_err("CS ETM Trace: empty packet\n");
2558 			return -EINVAL;
2559 		default:
2560 			break;
2561 		}
2562 	}
2563 
2564 out:
2565 	return ret;
2566 }
2567 
2568 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
2569 {
2570 	int idx;
2571 	struct int_node *inode;
2572 	struct cs_etm_traceid_queue *tidq;
2573 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
2574 
2575 	intlist__for_each_entry(inode, traceid_queues_list) {
2576 		idx = (int)(intptr_t)inode->priv;
2577 		tidq = etmq->traceid_queues[idx];
2578 
2579 		/* Ignore return value */
2580 		cs_etm__process_traceid_queue(etmq, tidq);
2581 	}
2582 }
2583 
2584 static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq)
2585 {
2586 	int err = 0;
2587 	struct cs_etm_traceid_queue *tidq;
2588 
2589 	tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
2590 	if (!tidq)
2591 		return -EINVAL;
2592 
2593 	/* Go through each buffer in the queue and decode them one by one */
2594 	while (1) {
2595 		err = cs_etm__get_data_block(etmq);
2596 		if (err <= 0)
2597 			return err;
2598 
2599 		/* Run trace decoder until buffer consumed or end of trace */
2600 		do {
2601 			err = cs_etm__decode_data_block(etmq);
2602 			if (err)
2603 				return err;
2604 
2605 			/*
2606 			 * Process each packet in this chunk, nothing to do if
2607 			 * an error occurs other than hoping the next one will
2608 			 * be better.
2609 			 */
2610 			err = cs_etm__process_traceid_queue(etmq, tidq);
2611 
2612 		} while (etmq->buf_len);
2613 
2614 		if (err == 0)
2615 			/* Flush any remaining branch stack entries */
2616 			err = cs_etm__end_block(etmq, tidq);
2617 	}
2618 
2619 	return err;
2620 }
2621 
2622 static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq)
2623 {
2624 	int idx, err = 0;
2625 	struct cs_etm_traceid_queue *tidq;
2626 	struct int_node *inode;
2627 
2628 	/* Go through each buffer in the queue and decode them one by one */
2629 	while (1) {
2630 		err = cs_etm__get_data_block(etmq);
2631 		if (err <= 0)
2632 			return err;
2633 
2634 		/* Run trace decoder until buffer consumed or end of trace */
2635 		do {
2636 			err = cs_etm__decode_data_block(etmq);
2637 			if (err)
2638 				return err;
2639 
2640 			/*
2641 			 * cs_etm__run_per_thread_timeless_decoder() runs on a
2642 			 * single traceID queue because each TID has a separate
2643 			 * buffer. But here in per-cpu mode we need to iterate
2644 			 * over each channel instead.
2645 			 */
2646 			intlist__for_each_entry(inode,
2647 						etmq->traceid_queues_list) {
2648 				idx = (int)(intptr_t)inode->priv;
2649 				tidq = etmq->traceid_queues[idx];
2650 				cs_etm__process_traceid_queue(etmq, tidq);
2651 			}
2652 		} while (etmq->buf_len);
2653 
2654 		intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2655 			idx = (int)(intptr_t)inode->priv;
2656 			tidq = etmq->traceid_queues[idx];
2657 			/* Flush any remaining branch stack entries */
2658 			err = cs_etm__end_block(etmq, tidq);
2659 			if (err)
2660 				return err;
2661 		}
2662 	}
2663 
2664 	return err;
2665 }
2666 
2667 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
2668 					   pid_t tid)
2669 {
2670 	unsigned int i;
2671 	struct auxtrace_queues *queues = &etm->queues;
2672 
2673 	for (i = 0; i < queues->nr_queues; i++) {
2674 		struct auxtrace_queue *queue = &etm->queues.queue_array[i];
2675 		struct cs_etm_queue *etmq = queue->priv;
2676 		struct cs_etm_traceid_queue *tidq;
2677 
2678 		if (!etmq)
2679 			continue;
2680 
2681 		if (etm->per_thread_decoding) {
2682 			tidq = cs_etm__etmq_get_traceid_queue(
2683 				etmq, CS_ETM_PER_THREAD_TRACEID);
2684 
2685 			if (!tidq)
2686 				continue;
2687 
2688 			if (tid == -1 || thread__tid(tidq->frontend_thread) == tid)
2689 				cs_etm__run_per_thread_timeless_decoder(etmq);
2690 		} else
2691 			cs_etm__run_per_cpu_timeless_decoder(etmq);
2692 	}
2693 
2694 	return 0;
2695 }
2696 
2697 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm)
2698 {
2699 	int ret = 0;
2700 	unsigned int cs_queue_nr, queue_nr, i;
2701 	u8 trace_chan_id;
2702 	u64 cs_timestamp;
2703 	struct auxtrace_queue *queue;
2704 	struct cs_etm_queue *etmq;
2705 	struct cs_etm_traceid_queue *tidq;
2706 
2707 	/*
2708 	 * Pre-populate the heap with one entry from each queue so that we can
2709 	 * start processing in time order across all queues.
2710 	 */
2711 	for (i = 0; i < etm->queues.nr_queues; i++) {
2712 		etmq = etm->queues.queue_array[i].priv;
2713 		if (!etmq)
2714 			continue;
2715 
2716 		ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
2717 		if (ret)
2718 			return ret;
2719 	}
2720 
2721 	while (1) {
2722 		if (!etm->heap.heap_cnt)
2723 			break;
2724 
2725 		/* Take the entry at the top of the min heap */
2726 		cs_queue_nr = etm->heap.heap_array[0].queue_nr;
2727 		queue_nr = TO_QUEUE_NR(cs_queue_nr);
2728 		trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
2729 		queue = &etm->queues.queue_array[queue_nr];
2730 		etmq = queue->priv;
2731 
2732 		/*
2733 		 * Remove the top entry from the heap since we are about
2734 		 * to process it.
2735 		 */
2736 		auxtrace_heap__pop(&etm->heap);
2737 
2738 		tidq  = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
2739 		if (!tidq) {
2740 			/*
2741 			 * No traceID queue has been allocated for this traceID,
2742 			 * which means something somewhere went very wrong.  No
2743 			 * other choice than simply exit.
2744 			 */
2745 			ret = -EINVAL;
2746 			goto out;
2747 		}
2748 
2749 		/*
2750 		 * Packets associated with this timestamp are already in
2751 		 * the etmq's traceID queue, so process them.
2752 		 */
2753 		ret = cs_etm__process_traceid_queue(etmq, tidq);
2754 		if (ret < 0)
2755 			goto out;
2756 
2757 		/*
2758 		 * Packets for this timestamp have been processed, time to
2759 		 * move on to the next timestamp, fetching a new auxtrace_buffer
2760 		 * if need be.
2761 		 */
2762 refetch:
2763 		ret = cs_etm__get_data_block(etmq);
2764 		if (ret < 0)
2765 			goto out;
2766 
2767 		/*
2768 		 * No more auxtrace_buffers to process in this etmq, simply
2769 		 * move on to another entry in the auxtrace_heap.
2770 		 */
2771 		if (!ret)
2772 			continue;
2773 
2774 		ret = cs_etm__decode_data_block(etmq);
2775 		if (ret)
2776 			goto out;
2777 
2778 		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
2779 
2780 		if (!cs_timestamp) {
2781 			/*
2782 			 * Function cs_etm__decode_data_block() returns when
2783 			 * there is no more traces to decode in the current
2784 			 * auxtrace_buffer OR when a timestamp has been
2785 			 * encountered on any of the traceID queues.  Since we
2786 			 * did not get a timestamp, there is no more traces to
2787 			 * process in this auxtrace_buffer.  As such empty and
2788 			 * flush all traceID queues.
2789 			 */
2790 			cs_etm__clear_all_traceid_queues(etmq);
2791 
2792 			/* Fetch another auxtrace_buffer for this etmq */
2793 			goto refetch;
2794 		}
2795 
2796 		/*
2797 		 * Add to the min heap the timestamp for packets that have
2798 		 * just been decoded.  They will be processed and synthesized
2799 		 * during the next call to cs_etm__process_traceid_queue() for
2800 		 * this queue/traceID.
2801 		 */
2802 		cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
2803 		ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
2804 	}
2805 
2806 	for (i = 0; i < etm->queues.nr_queues; i++) {
2807 		struct int_node *inode;
2808 
2809 		etmq = etm->queues.queue_array[i].priv;
2810 		if (!etmq)
2811 			continue;
2812 
2813 		intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2814 			int idx = (int)(intptr_t)inode->priv;
2815 
2816 			/* Flush any remaining branch stack entries */
2817 			tidq = etmq->traceid_queues[idx];
2818 			ret = cs_etm__end_block(etmq, tidq);
2819 			if (ret)
2820 				return ret;
2821 		}
2822 	}
2823 out:
2824 	return ret;
2825 }
2826 
2827 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
2828 					union perf_event *event)
2829 {
2830 	struct thread *th;
2831 
2832 	if (etm->timeless_decoding)
2833 		return 0;
2834 
2835 	/*
2836 	 * Add the tid/pid to the log so that we can get a match when we get a
2837 	 * contextID from the decoder. Only track for the host: only kernel
2838 	 * trace is supported for guests which wouldn't need pids so this should
2839 	 * be fine.
2840 	 */
2841 	th = machine__findnew_thread(&etm->session->machines.host,
2842 				     event->itrace_start.pid,
2843 				     event->itrace_start.tid);
2844 	if (!th)
2845 		return -ENOMEM;
2846 
2847 	thread__put(th);
2848 
2849 	return 0;
2850 }
2851 
2852 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
2853 					   union perf_event *event)
2854 {
2855 	struct thread *th;
2856 	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2857 
2858 	/*
2859 	 * Context switch in per-thread mode are irrelevant since perf
2860 	 * will start/stop tracing as the process is scheduled.
2861 	 */
2862 	if (etm->timeless_decoding)
2863 		return 0;
2864 
2865 	/*
2866 	 * SWITCH_IN events carry the next process to be switched out while
2867 	 * SWITCH_OUT events carry the process to be switched in.  As such
2868 	 * we don't care about IN events.
2869 	 */
2870 	if (!out)
2871 		return 0;
2872 
2873 	/*
2874 	 * Add the tid/pid to the log so that we can get a match when we get a
2875 	 * contextID from the decoder. Only track for the host: only kernel
2876 	 * trace is supported for guests which wouldn't need pids so this should
2877 	 * be fine.
2878 	 */
2879 	th = machine__findnew_thread(&etm->session->machines.host,
2880 				     event->context_switch.next_prev_pid,
2881 				     event->context_switch.next_prev_tid);
2882 	if (!th)
2883 		return -ENOMEM;
2884 
2885 	thread__put(th);
2886 
2887 	return 0;
2888 }
2889 
2890 static int cs_etm__process_event(struct perf_session *session,
2891 				 union perf_event *event,
2892 				 struct perf_sample *sample,
2893 				 const struct perf_tool *tool)
2894 {
2895 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2896 						   struct cs_etm_auxtrace,
2897 						   auxtrace);
2898 
2899 	if (dump_trace)
2900 		return 0;
2901 
2902 	if (!tool->ordered_events) {
2903 		pr_err("CoreSight ETM Trace requires ordered events\n");
2904 		return -EINVAL;
2905 	}
2906 
2907 	switch (event->header.type) {
2908 	case PERF_RECORD_EXIT:
2909 		/*
2910 		 * Don't need to wait for cs_etm__flush_events() in per-thread mode to
2911 		 * start the decode because we know there will be no more trace from
2912 		 * this thread. All this does is emit samples earlier than waiting for
2913 		 * the flush in other modes, but with timestamps it makes sense to wait
2914 		 * for flush so that events from different threads are interleaved
2915 		 * properly.
2916 		 */
2917 		if (etm->per_thread_decoding && etm->timeless_decoding)
2918 			return cs_etm__process_timeless_queues(etm,
2919 							       event->fork.tid);
2920 		break;
2921 
2922 	case PERF_RECORD_ITRACE_START:
2923 		return cs_etm__process_itrace_start(etm, event);
2924 
2925 	case PERF_RECORD_SWITCH_CPU_WIDE:
2926 		return cs_etm__process_switch_cpu_wide(etm, event);
2927 
2928 	case PERF_RECORD_AUX:
2929 		/*
2930 		 * Record the latest kernel timestamp available in the header
2931 		 * for samples so that synthesised samples occur from this point
2932 		 * onwards.
2933 		 */
2934 		if (sample->time && (sample->time != (u64)-1))
2935 			etm->latest_kernel_timestamp = sample->time;
2936 		break;
2937 
2938 	default:
2939 		break;
2940 	}
2941 
2942 	return 0;
2943 }
2944 
2945 static void dump_queued_data(struct cs_etm_auxtrace *etm,
2946 			     struct perf_record_auxtrace *event)
2947 {
2948 	struct auxtrace_buffer *buf;
2949 	unsigned int i;
2950 	/*
2951 	 * Find all buffers with same reference in the queues and dump them.
2952 	 * This is because the queues can contain multiple entries of the same
2953 	 * buffer that were split on aux records.
2954 	 */
2955 	for (i = 0; i < etm->queues.nr_queues; ++i)
2956 		list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
2957 			if (buf->reference == event->reference)
2958 				cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
2959 }
2960 
2961 static int cs_etm__process_auxtrace_event(struct perf_session *session,
2962 					  union perf_event *event,
2963 					  const struct perf_tool *tool __maybe_unused)
2964 {
2965 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2966 						   struct cs_etm_auxtrace,
2967 						   auxtrace);
2968 	if (!etm->data_queued) {
2969 		struct auxtrace_buffer *buffer;
2970 		off_t  data_offset;
2971 		int fd = perf_data__fd(session->data);
2972 		bool is_pipe = perf_data__is_pipe(session->data);
2973 		int err;
2974 		int idx = event->auxtrace.idx;
2975 
2976 		if (is_pipe)
2977 			data_offset = 0;
2978 		else {
2979 			data_offset = lseek(fd, 0, SEEK_CUR);
2980 			if (data_offset == -1)
2981 				return -errno;
2982 		}
2983 
2984 		err = auxtrace_queues__add_event(&etm->queues, session,
2985 						 event, data_offset, &buffer);
2986 		if (err)
2987 			return err;
2988 
2989 		if (dump_trace)
2990 			if (auxtrace_buffer__get_data(buffer, fd)) {
2991 				cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
2992 				auxtrace_buffer__put_data(buffer);
2993 			}
2994 	} else if (dump_trace)
2995 		dump_queued_data(etm, &event->auxtrace);
2996 
2997 	return 0;
2998 }
2999 
3000 static void cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm)
3001 {
3002 	/* Take first ETM as all options will be the same for all ETMs */
3003 	u64 *metadata = etm->metadata[0];
3004 
3005 	/* Override timeless mode with user input from --itrace=Z */
3006 	if (etm->synth_opts.timeless_decoding) {
3007 		etm->timeless_decoding = true;
3008 		return;
3009 	}
3010 
3011 	if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic)
3012 		etm->timeless_decoding = !(metadata[CS_ETM_ETMCR] & ETMCR_TIMESTAMP_EN);
3013 	else
3014 		etm->timeless_decoding = !(metadata[CS_ETMV4_TRCCONFIGR] & TRCCONFIGR_TS);
3015 }
3016 
3017 /*
3018  * Read a single cpu parameter block from the auxtrace_info priv block.
3019  *
3020  * For version 1 there is a per cpu nr_params entry. If we are handling
3021  * version 1 file, then there may be less, the same, or more params
3022  * indicated by this value than the compile time number we understand.
3023  *
3024  * For a version 0 info block, there are a fixed number, and we need to
3025  * fill out the nr_param value in the metadata we create.
3026  */
3027 static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
3028 				    int out_blk_size, int nr_params_v0)
3029 {
3030 	u64 *metadata = NULL;
3031 	int hdr_version;
3032 	int nr_in_params, nr_out_params, nr_cmn_params;
3033 	int i, k;
3034 
3035 	metadata = zalloc(sizeof(*metadata) * out_blk_size);
3036 	if (!metadata)
3037 		return NULL;
3038 
3039 	/* read block current index & version */
3040 	i = *buff_in_offset;
3041 	hdr_version = buff_in[CS_HEADER_VERSION];
3042 
3043 	if (!hdr_version) {
3044 	/* read version 0 info block into a version 1 metadata block  */
3045 		nr_in_params = nr_params_v0;
3046 		metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
3047 		metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
3048 		metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
3049 		/* remaining block params at offset +1 from source */
3050 		for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
3051 			metadata[k + 1] = buff_in[i + k];
3052 		/* version 0 has 2 common params */
3053 		nr_cmn_params = 2;
3054 	} else {
3055 	/* read version 1 info block - input and output nr_params may differ */
3056 		/* version 1 has 3 common params */
3057 		nr_cmn_params = 3;
3058 		nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
3059 
3060 		/* if input has more params than output - skip excess */
3061 		nr_out_params = nr_in_params + nr_cmn_params;
3062 		if (nr_out_params > out_blk_size)
3063 			nr_out_params = out_blk_size;
3064 
3065 		for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
3066 			metadata[k] = buff_in[i + k];
3067 
3068 		/* record the actual nr params we copied */
3069 		metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
3070 	}
3071 
3072 	/* adjust in offset by number of in params used */
3073 	i += nr_in_params + nr_cmn_params;
3074 	*buff_in_offset = i;
3075 	return metadata;
3076 }
3077 
3078 /**
3079  * Puts a fragment of an auxtrace buffer into the auxtrace queues based
3080  * on the bounds of aux_event, if it matches with the buffer that's at
3081  * file_offset.
3082  *
3083  * Normally, whole auxtrace buffers would be added to the queue. But we
3084  * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
3085  * is reset across each buffer, so splitting the buffers up in advance has
3086  * the same effect.
3087  */
3088 static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
3089 				      struct perf_record_aux *aux_event, struct perf_sample *sample)
3090 {
3091 	int err;
3092 	char buf[PERF_SAMPLE_MAX_SIZE];
3093 	union perf_event *auxtrace_event_union;
3094 	struct perf_record_auxtrace *auxtrace_event;
3095 	union perf_event auxtrace_fragment;
3096 	__u64 aux_offset, aux_size;
3097 	enum cs_etm_format format;
3098 
3099 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
3100 						   struct cs_etm_auxtrace,
3101 						   auxtrace);
3102 
3103 	/*
3104 	 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
3105 	 * from looping through the auxtrace index.
3106 	 */
3107 	err = perf_session__peek_event(session, file_offset, buf,
3108 				       PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL);
3109 	if (err)
3110 		return err;
3111 	auxtrace_event = &auxtrace_event_union->auxtrace;
3112 	if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
3113 		return -EINVAL;
3114 
3115 	if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) ||
3116 		auxtrace_event->header.size != sz) {
3117 		return -EINVAL;
3118 	}
3119 
3120 	/*
3121 	 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
3122 	 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
3123 	 * CPU as we set this always for the AUX_OUTPUT_HW_ID event.
3124 	 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
3125 	 * Return 'not found' if mismatch.
3126 	 */
3127 	if (auxtrace_event->cpu == (__u32) -1) {
3128 		etm->per_thread_decoding = true;
3129 		if (auxtrace_event->tid != sample->tid)
3130 			return 1;
3131 	} else if (auxtrace_event->cpu != sample->cpu) {
3132 		if (etm->per_thread_decoding) {
3133 			/*
3134 			 * Found a per-cpu buffer after a per-thread one was
3135 			 * already found
3136 			 */
3137 			pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n");
3138 			return -EINVAL;
3139 		}
3140 		return 1;
3141 	}
3142 
3143 	if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
3144 		/*
3145 		 * Clamp size in snapshot mode. The buffer size is clamped in
3146 		 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
3147 		 * the buffer size.
3148 		 */
3149 		aux_size = min(aux_event->aux_size, auxtrace_event->size);
3150 
3151 		/*
3152 		 * In this mode, the head also points to the end of the buffer so aux_offset
3153 		 * needs to have the size subtracted so it points to the beginning as in normal mode
3154 		 */
3155 		aux_offset = aux_event->aux_offset - aux_size;
3156 	} else {
3157 		aux_size = aux_event->aux_size;
3158 		aux_offset = aux_event->aux_offset;
3159 	}
3160 
3161 	if (aux_offset >= auxtrace_event->offset &&
3162 	    aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
3163 		struct cs_etm_queue *etmq = cs_etm__get_queue(etm, auxtrace_event->cpu);
3164 
3165 		if (!etmq)
3166 			return -EINVAL;
3167 
3168 		/*
3169 		 * If this AUX event was inside this buffer somewhere, create a new auxtrace event
3170 		 * based on the sizes of the aux event, and queue that fragment.
3171 		 */
3172 		auxtrace_fragment.auxtrace = *auxtrace_event;
3173 		auxtrace_fragment.auxtrace.size = aux_size;
3174 		auxtrace_fragment.auxtrace.offset = aux_offset;
3175 		auxtrace_fragment.auxtrace.idx = etmq->queue_nr;
3176 		file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
3177 
3178 		pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
3179 			  " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
3180 		err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
3181 						 file_offset, NULL);
3182 		if (err)
3183 			return err;
3184 
3185 		format = (aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW) ?
3186 				UNFORMATTED : FORMATTED;
3187 		if (etmq->format != UNSET && format != etmq->format) {
3188 			pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n");
3189 			return -EINVAL;
3190 		}
3191 		etmq->format = format;
3192 		return 0;
3193 	}
3194 
3195 	/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
3196 	return 1;
3197 }
3198 
3199 static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event,
3200 					u64 offset __maybe_unused, void *data __maybe_unused)
3201 {
3202 	/* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */
3203 	if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) {
3204 		(*(int *)data)++; /* increment found count */
3205 		return cs_etm__process_aux_output_hw_id(session, event);
3206 	}
3207 	return 0;
3208 }
3209 
3210 static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
3211 					u64 offset __maybe_unused, void *data __maybe_unused)
3212 {
3213 	struct perf_sample sample;
3214 	int ret;
3215 	struct auxtrace_index_entry *ent;
3216 	struct auxtrace_index *auxtrace_index;
3217 	struct evsel *evsel;
3218 	size_t i;
3219 
3220 	/* Don't care about any other events, we're only queuing buffers for AUX events */
3221 	if (event->header.type != PERF_RECORD_AUX)
3222 		return 0;
3223 
3224 	if (event->header.size < sizeof(struct perf_record_aux))
3225 		return -EINVAL;
3226 
3227 	/* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
3228 	if (!event->aux.aux_size)
3229 		return 0;
3230 
3231 	/*
3232 	 * Parse the sample, we need the sample_id_all data that comes after the event so that the
3233 	 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
3234 	 */
3235 	evsel = evlist__event2evsel(session->evlist, event);
3236 	if (!evsel)
3237 		return -EINVAL;
3238 	perf_sample__init(&sample, /*all=*/false);
3239 	ret = evsel__parse_sample(evsel, event, &sample);
3240 	if (ret)
3241 		goto out;
3242 
3243 	/*
3244 	 * Loop through the auxtrace index to find the buffer that matches up with this aux event.
3245 	 */
3246 	list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
3247 		for (i = 0; i < auxtrace_index->nr; i++) {
3248 			ent = &auxtrace_index->entries[i];
3249 			ret = cs_etm__queue_aux_fragment(session, ent->file_offset,
3250 							 ent->sz, &event->aux, &sample);
3251 			/*
3252 			 * Stop search on error or successful values. Continue search on
3253 			 * 1 ('not found')
3254 			 */
3255 			if (ret != 1)
3256 				goto out;
3257 		}
3258 	}
3259 
3260 	/*
3261 	 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
3262 	 * don't exit with an error because it will still be possible to decode other aux records.
3263 	 */
3264 	pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
3265 	       " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
3266 	ret = 0;
3267 out:
3268 	perf_sample__exit(&sample);
3269 	return ret;
3270 }
3271 
3272 static int cs_etm__queue_aux_records(struct perf_session *session)
3273 {
3274 	struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
3275 								struct auxtrace_index, list);
3276 	if (index && index->nr > 0)
3277 		return perf_session__peek_events(session, session->header.data_offset,
3278 						 session->header.data_size,
3279 						 cs_etm__queue_aux_records_cb, NULL);
3280 
3281 	/*
3282 	 * We would get here if there are no entries in the index (either no auxtrace
3283 	 * buffers or no index at all). Fail silently as there is the possibility of
3284 	 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
3285 	 * false.
3286 	 *
3287 	 * In that scenario, buffers will not be split by AUX records.
3288 	 */
3289 	return 0;
3290 }
3291 
3292 #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
3293 				  (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
3294 
3295 /*
3296  * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
3297  * timestamps).
3298  */
3299 static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
3300 {
3301 	int j;
3302 
3303 	for (j = 0; j < num_cpu; j++) {
3304 		switch (metadata[j][CS_ETM_MAGIC]) {
3305 		case __perf_cs_etmv4_magic:
3306 			if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
3307 				return false;
3308 			break;
3309 		case __perf_cs_ete_magic:
3310 			if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1)
3311 				return false;
3312 			break;
3313 		default:
3314 			/* Unknown / unsupported magic number. */
3315 			return false;
3316 		}
3317 	}
3318 	return true;
3319 }
3320 
3321 /* map trace ids to correct metadata block, from information in metadata */
3322 static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_cpu,
3323 					  u64 **metadata)
3324 {
3325 	u64 cs_etm_magic;
3326 	u8 trace_chan_id;
3327 	int i, err;
3328 
3329 	for (i = 0; i < num_cpu; i++) {
3330 		cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3331 		switch (cs_etm_magic) {
3332 		case __perf_cs_etmv3_magic:
3333 			metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3334 			trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]);
3335 			break;
3336 		case __perf_cs_etmv4_magic:
3337 		case __perf_cs_ete_magic:
3338 			metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3339 			trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]);
3340 			break;
3341 		default:
3342 			/* unknown magic number */
3343 			return -EINVAL;
3344 		}
3345 		err = cs_etm__map_trace_id_v0(etm, trace_chan_id, metadata[i]);
3346 		if (err)
3347 			return err;
3348 	}
3349 	return 0;
3350 }
3351 
3352 /*
3353  * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX
3354  * (formatted or not) packets to create the decoders.
3355  */
3356 static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq)
3357 {
3358 	struct cs_etm_decoder_params d_params;
3359 	struct cs_etm_trace_params  *t_params;
3360 	int decoders = intlist__nr_entries(etmq->traceid_list);
3361 
3362 	if (decoders == 0)
3363 		return 0;
3364 
3365 	/*
3366 	 * Each queue can only contain data from one CPU when unformatted, so only one decoder is
3367 	 * needed.
3368 	 */
3369 	if (etmq->format == UNFORMATTED)
3370 		assert(decoders == 1);
3371 
3372 	/* Use metadata to fill in trace parameters for trace decoder */
3373 	t_params = zalloc(sizeof(*t_params) * decoders);
3374 
3375 	if (!t_params)
3376 		goto out_free;
3377 
3378 	if (cs_etm__init_trace_params(t_params, etmq))
3379 		goto out_free;
3380 
3381 	/* Set decoder parameters to decode trace packets */
3382 	if (cs_etm__init_decoder_params(&d_params, etmq,
3383 					dump_trace ? CS_ETM_OPERATION_PRINT :
3384 						     CS_ETM_OPERATION_DECODE))
3385 		goto out_free;
3386 
3387 	etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
3388 					    t_params);
3389 
3390 	if (!etmq->decoder)
3391 		goto out_free;
3392 
3393 	/*
3394 	 * Register a function to handle all memory accesses required by
3395 	 * the trace decoder library.
3396 	 */
3397 	if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
3398 					      0x0L, ((u64) -1L),
3399 					      cs_etm__decoder_mem_access))
3400 		goto out_free_decoder;
3401 
3402 	zfree(&t_params);
3403 	return 0;
3404 
3405 out_free_decoder:
3406 	cs_etm_decoder__free(etmq->decoder);
3407 out_free:
3408 	zfree(&t_params);
3409 	return -EINVAL;
3410 }
3411 
3412 static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm)
3413 {
3414 	struct auxtrace_queues *queues = &etm->queues;
3415 
3416 	for (unsigned int i = 0; i < queues->nr_queues; i++) {
3417 		bool empty = list_empty(&queues->queue_array[i].head);
3418 		struct cs_etm_queue *etmq = queues->queue_array[i].priv;
3419 		int ret;
3420 
3421 		/*
3422 		 * Don't create decoders for empty queues, mainly because
3423 		 * etmq->format is unknown for empty queues.
3424 		 */
3425 		assert(empty || etmq->format != UNSET);
3426 		if (empty)
3427 			continue;
3428 
3429 		ret = cs_etm__create_queue_decoders(etmq);
3430 		if (ret)
3431 			return ret;
3432 	}
3433 	return 0;
3434 }
3435 
3436 int cs_etm__process_auxtrace_info_full(union perf_event *event,
3437 				       struct perf_session *session)
3438 {
3439 	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
3440 	struct cs_etm_auxtrace *etm = NULL;
3441 	struct perf_record_time_conv *tc = &session->time_conv;
3442 	int event_header_size = sizeof(struct perf_event_header);
3443 	int total_size = auxtrace_info->header.size;
3444 	int priv_size = 0;
3445 	int num_cpu, max_cpu = 0;
3446 	int err = 0;
3447 	int aux_hw_id_found;
3448 	int i;
3449 	u64 *ptr = NULL;
3450 	u64 **metadata = NULL;
3451 
3452 	/* First the global part */
3453 	ptr = (u64 *) auxtrace_info->priv;
3454 	num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
3455 
3456 	/*
3457 	 * Bound num_cpu by the event size: the global header consumes
3458 	 * CS_ETM_HEADER_SIZE bytes, and each CPU needs at least one u64
3459 	 * metadata entry after that.
3460 	 */
3461 	priv_size = total_size - event_header_size - INFO_HEADER_SIZE -
3462 		    CS_ETM_HEADER_SIZE;
3463 	if (num_cpu <= 0 || priv_size <= 0 ||
3464 	    num_cpu > priv_size / (int)sizeof(u64))
3465 		return -EINVAL;
3466 
3467 	metadata = zalloc(sizeof(*metadata) * num_cpu);
3468 	if (!metadata)
3469 		return -ENOMEM;
3470 
3471 	/* Start parsing after the common part of the header */
3472 	i = CS_HEADER_VERSION_MAX;
3473 
3474 	/*
3475 	 * The metadata is stored in the auxtrace_info section and encodes
3476 	 * the configuration of the ARM embedded trace macrocell which is
3477 	 * required by the trace decoder to properly decode the trace due
3478 	 * to its highly compressed nature.
3479 	 */
3480 	for (int j = 0; j < num_cpu; j++) {
3481 		if (ptr[i] == __perf_cs_etmv3_magic) {
3482 			metadata[j] =
3483 				cs_etm__create_meta_blk(ptr, &i,
3484 							CS_ETM_PRIV_MAX,
3485 							CS_ETM_NR_TRC_PARAMS_V0);
3486 		} else if (ptr[i] == __perf_cs_etmv4_magic) {
3487 			metadata[j] =
3488 				cs_etm__create_meta_blk(ptr, &i,
3489 							CS_ETMV4_PRIV_MAX,
3490 							CS_ETMV4_NR_TRC_PARAMS_V0);
3491 		} else if (ptr[i] == __perf_cs_ete_magic) {
3492 			metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
3493 		} else {
3494 			ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
3495 				  ptr[i]);
3496 			err = -EINVAL;
3497 			goto err_free_metadata;
3498 		}
3499 
3500 		if (!metadata[j]) {
3501 			err = -ENOMEM;
3502 			goto err_free_metadata;
3503 		}
3504 
3505 		/* CPU id comes from perf.data and must fit max_cpu + 1 without overflow */
3506 		if (metadata[j][CS_ETM_CPU] >= INT_MAX) {
3507 			err = -EINVAL;
3508 			goto err_free_metadata;
3509 		}
3510 
3511 		if ((int)metadata[j][CS_ETM_CPU] > max_cpu)
3512 			max_cpu = metadata[j][CS_ETM_CPU];
3513 	}
3514 
3515 	/*
3516 	 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
3517 	 * CS_ETMV4_PRIV_MAX mark how many double words are in the
3518 	 * global metadata, and each cpu's metadata respectively.
3519 	 * The following tests if the correct number of double words was
3520 	 * present in the auxtrace info section.
3521 	 */
3522 	priv_size = total_size - event_header_size - INFO_HEADER_SIZE;
3523 	if (i * 8 != priv_size) {
3524 		err = -EINVAL;
3525 		goto err_free_metadata;
3526 	}
3527 
3528 	etm = zalloc(sizeof(*etm));
3529 
3530 	if (!etm) {
3531 		err = -ENOMEM;
3532 		goto err_free_metadata;
3533 	}
3534 
3535 	/*
3536 	 * As all the ETMs run at the same exception level, the system should
3537 	 * have the same PID format crossing CPUs.  So cache the PID format
3538 	 * and reuse it for sequential decoding.
3539 	 */
3540 	etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
3541 
3542 	err = auxtrace_queues__init_nr(&etm->queues, max_cpu + 1);
3543 	if (err)
3544 		goto err_free_etm;
3545 
3546 	for (unsigned int j = 0; j < etm->queues.nr_queues; ++j) {
3547 		err = cs_etm__setup_queue(etm, &etm->queues.queue_array[j], j);
3548 		if (err)
3549 			goto err_free_queues;
3550 	}
3551 
3552 	if (session->itrace_synth_opts->set) {
3553 		etm->synth_opts = *session->itrace_synth_opts;
3554 	} else {
3555 		itrace_synth_opts__set_default(&etm->synth_opts,
3556 				session->itrace_synth_opts->default_no_sample);
3557 		etm->synth_opts.callchain = false;
3558 	}
3559 
3560 	etm->session = session;
3561 
3562 	etm->num_cpu = num_cpu;
3563 	etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
3564 	etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
3565 	etm->metadata = metadata;
3566 	etm->auxtrace_type = auxtrace_info->type;
3567 
3568 	if (etm->synth_opts.use_timestamp)
3569 		/*
3570 		 * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature,
3571 		 * therefore the decoder cannot know if the timestamp trace is
3572 		 * same with the kernel time.
3573 		 *
3574 		 * If a user has knowledge for the working platform and can
3575 		 * specify itrace option 'T' to tell decoder to forcely use the
3576 		 * traced timestamp as the kernel time.
3577 		 */
3578 		etm->has_virtual_ts = true;
3579 	else
3580 		/* Use virtual timestamps if all ETMs report ts_source = 1 */
3581 		etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
3582 
3583 	if (!etm->has_virtual_ts)
3584 		ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
3585 			    "The time field of the samples will not be set accurately.\n"
3586 			    "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n"
3587 			    "you can specify the itrace option 'T' for timestamp decoding\n"
3588 			    "if the Coresight timestamp on the platform is same with the kernel time.\n\n");
3589 
3590 	etm->auxtrace.process_event = cs_etm__process_event;
3591 	etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
3592 	etm->auxtrace.flush_events = cs_etm__flush_events;
3593 	etm->auxtrace.free_events = cs_etm__free_events;
3594 	etm->auxtrace.free = cs_etm__free;
3595 	etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
3596 	session->auxtrace = &etm->auxtrace;
3597 
3598 	cs_etm__setup_timeless_decoding(etm);
3599 
3600 	etm->tc.time_shift = tc->time_shift;
3601 	etm->tc.time_mult = tc->time_mult;
3602 	etm->tc.time_zero = tc->time_zero;
3603 	if (event_contains(*tc, cap_user_time_short)) {
3604 		etm->tc.time_cycles = tc->time_cycles;
3605 		etm->tc.time_mask = tc->time_mask;
3606 		etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
3607 		etm->tc.cap_user_time_short = tc->cap_user_time_short;
3608 	}
3609 	err = cs_etm__synth_events(etm, session);
3610 	if (err)
3611 		goto err_free_queues;
3612 
3613 	err = cs_etm__queue_aux_records(session);
3614 	if (err)
3615 		goto err_free_queues;
3616 
3617 	/*
3618 	 * Map Trace ID values to CPU metadata.
3619 	 *
3620 	 * Trace metadata will always contain Trace ID values from the legacy algorithm
3621 	 * in case it's read by a version of Perf that doesn't know about HW_ID packets
3622 	 * or the kernel doesn't emit them.
3623 	 *
3624 	 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
3625 	 * the same IDs as the old algorithm as far as is possible, unless there are clashes
3626 	 * in which case a different value will be used. This means an older perf may still
3627 	 * be able to record and read files generate on a newer system.
3628 	 *
3629 	 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of
3630 	 * those packets. If they are there then the values will be mapped and plugged into
3631 	 * the metadata and decoders are only created for each mapping received.
3632 	 *
3633 	 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
3634 	 * then we map Trace ID values to CPU directly from the metadata and create decoders
3635 	 * for all mappings.
3636 	 */
3637 
3638 	/* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
3639 	aux_hw_id_found = 0;
3640 	err = perf_session__peek_events(session, session->header.data_offset,
3641 					session->header.data_size,
3642 					cs_etm__process_aux_hw_id_cb, &aux_hw_id_found);
3643 	if (err)
3644 		goto err_free_queues;
3645 
3646 	/* if no HW ID found this is a file with metadata values only, map from metadata */
3647 	if (!aux_hw_id_found) {
3648 		err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata);
3649 		if (err)
3650 			goto err_free_queues;
3651 	}
3652 
3653 	err = cs_etm__create_decoders(etm);
3654 	if (err)
3655 		goto err_free_queues;
3656 
3657 	etm->data_queued = etm->queues.populated;
3658 	return 0;
3659 
3660 err_free_queues:
3661 	auxtrace_queues__free(&etm->queues);
3662 	session->auxtrace = NULL;
3663 err_free_etm:
3664 	zfree(&etm);
3665 err_free_metadata:
3666 	/* No need to check @metadata[j], free(NULL) is supported */
3667 	for (int j = 0; j < num_cpu; j++)
3668 		zfree(&metadata[j]);
3669 	zfree(&metadata);
3670 	return err;
3671 }
3672