xref: /linux/tools/perf/util/cs-etm.c (revision 802f0d58d52e8e34e08718479475ccdff0caffa0)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright(C) 2015-2018 Linaro Limited.
4  *
5  * Author: Tor Jeremiassen <tor@ti.com>
6  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
7  */
8 
9 #include <linux/kernel.h>
10 #include <linux/bitfield.h>
11 #include <linux/bitops.h>
12 #include <linux/coresight-pmu.h>
13 #include <linux/err.h>
14 #include <linux/log2.h>
15 #include <linux/types.h>
16 #include <linux/zalloc.h>
17 
18 #include <stdlib.h>
19 
20 #include "auxtrace.h"
21 #include "color.h"
22 #include "cs-etm.h"
23 #include "cs-etm-decoder/cs-etm-decoder.h"
24 #include "debug.h"
25 #include "dso.h"
26 #include "evlist.h"
27 #include "intlist.h"
28 #include "machine.h"
29 #include "map.h"
30 #include "perf.h"
31 #include "session.h"
32 #include "map_symbol.h"
33 #include "branch.h"
34 #include "symbol.h"
35 #include "tool.h"
36 #include "thread.h"
37 #include "thread-stack.h"
38 #include "tsc.h"
39 #include <tools/libc_compat.h>
40 #include "util/synthetic-events.h"
41 #include "util/util.h"
42 
43 struct cs_etm_auxtrace {
44 	struct auxtrace auxtrace;
45 	struct auxtrace_queues queues;
46 	struct auxtrace_heap heap;
47 	struct itrace_synth_opts synth_opts;
48 	struct perf_session *session;
49 	struct perf_tsc_conversion tc;
50 
51 	/*
52 	 * Timeless has no timestamps in the trace so overlapping mmap lookups
53 	 * are less accurate but produces smaller trace data. We use context IDs
54 	 * in the trace instead of matching timestamps with fork records so
55 	 * they're not really needed in the general case. Overlapping mmaps
56 	 * happen in cases like between a fork and an exec.
57 	 */
58 	bool timeless_decoding;
59 
60 	/*
61 	 * Per-thread ignores the trace channel ID and instead assumes that
62 	 * everything in a buffer comes from the same process regardless of
63 	 * which CPU it ran on. It also implies no context IDs so the TID is
64 	 * taken from the auxtrace buffer.
65 	 */
66 	bool per_thread_decoding;
67 	bool snapshot_mode;
68 	bool data_queued;
69 	bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
70 
71 	int num_cpu;
72 	u64 latest_kernel_timestamp;
73 	u32 auxtrace_type;
74 	u64 branches_sample_type;
75 	u64 branches_id;
76 	u64 instructions_sample_type;
77 	u64 instructions_sample_period;
78 	u64 instructions_id;
79 	u64 **metadata;
80 	unsigned int pmu_type;
81 	enum cs_etm_pid_fmt pid_fmt;
82 };
83 
84 struct cs_etm_traceid_queue {
85 	u8 trace_chan_id;
86 	u64 period_instructions;
87 	size_t last_branch_pos;
88 	union perf_event *event_buf;
89 	struct thread *thread;
90 	struct thread *prev_packet_thread;
91 	ocsd_ex_level prev_packet_el;
92 	ocsd_ex_level el;
93 	struct branch_stack *last_branch;
94 	struct branch_stack *last_branch_rb;
95 	struct cs_etm_packet *prev_packet;
96 	struct cs_etm_packet *packet;
97 	struct cs_etm_packet_queue packet_queue;
98 };
99 
100 enum cs_etm_format {
101 	UNSET,
102 	FORMATTED,
103 	UNFORMATTED
104 };
105 
106 struct cs_etm_queue {
107 	struct cs_etm_auxtrace *etm;
108 	struct cs_etm_decoder *decoder;
109 	struct auxtrace_buffer *buffer;
110 	unsigned int queue_nr;
111 	u8 pending_timestamp_chan_id;
112 	enum cs_etm_format format;
113 	u64 offset;
114 	const unsigned char *buf;
115 	size_t buf_len, buf_used;
116 	/* Conversion between traceID and index in traceid_queues array */
117 	struct intlist *traceid_queues_list;
118 	struct cs_etm_traceid_queue **traceid_queues;
119 	/* Conversion between traceID and metadata pointers */
120 	struct intlist *traceid_list;
121 	/*
122 	 * Same as traceid_list, but traceid_list may be a reference to another
123 	 * queue's which has a matching sink ID.
124 	 */
125 	struct intlist *own_traceid_list;
126 	u32 sink_id;
127 };
128 
129 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
130 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
131 					   pid_t tid);
132 static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
133 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
134 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata);
135 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu);
136 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata);
137 
138 /* PTMs ETMIDR [11:8] set to b0011 */
139 #define ETMIDR_PTM_VERSION 0x00000300
140 
141 /*
142  * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
143  * work with.  One option is to modify to auxtrace_heap_XYZ() API or simply
144  * encode the etm queue number as the upper 16 bit and the channel as
145  * the lower 16 bit.
146  */
147 #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id)	\
148 		      (queue_nr << 16 | trace_chan_id)
149 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
150 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
151 #define SINK_UNSET ((u32) -1)
152 
153 static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
154 {
155 	etmidr &= ETMIDR_PTM_VERSION;
156 
157 	if (etmidr == ETMIDR_PTM_VERSION)
158 		return CS_ETM_PROTO_PTM;
159 
160 	return CS_ETM_PROTO_ETMV3;
161 }
162 
163 static int cs_etm__get_magic(struct cs_etm_queue *etmq, u8 trace_chan_id, u64 *magic)
164 {
165 	struct int_node *inode;
166 	u64 *metadata;
167 
168 	inode = intlist__find(etmq->traceid_list, trace_chan_id);
169 	if (!inode)
170 		return -EINVAL;
171 
172 	metadata = inode->priv;
173 	*magic = metadata[CS_ETM_MAGIC];
174 	return 0;
175 }
176 
177 int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu)
178 {
179 	struct int_node *inode;
180 	u64 *metadata;
181 
182 	inode = intlist__find(etmq->traceid_list, trace_chan_id);
183 	if (!inode)
184 		return -EINVAL;
185 
186 	metadata = inode->priv;
187 	*cpu = (int)metadata[CS_ETM_CPU];
188 	return 0;
189 }
190 
191 /*
192  * The returned PID format is presented as an enum:
193  *
194  *   CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced.
195  *   CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced.
196  *   CS_ETM_PIDFMT_NONE: No context IDs
197  *
198  * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
199  * are enabled at the same time when the session runs on an EL2 kernel.
200  * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
201  * recorded in the trace data, the tool will selectively use
202  * CONTEXTIDR_EL2 as PID.
203  *
204  * The result is cached in etm->pid_fmt so this function only needs to be called
205  * when processing the aux info.
206  */
207 static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata)
208 {
209 	u64 val;
210 
211 	if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
212 		val = metadata[CS_ETM_ETMCR];
213 		/* CONTEXTIDR is traced */
214 		if (val & BIT(ETM_OPT_CTXTID))
215 			return CS_ETM_PIDFMT_CTXTID;
216 	} else {
217 		val = metadata[CS_ETMV4_TRCCONFIGR];
218 		/* CONTEXTIDR_EL2 is traced */
219 		if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
220 			return CS_ETM_PIDFMT_CTXTID2;
221 		/* CONTEXTIDR_EL1 is traced */
222 		else if (val & BIT(ETM4_CFG_BIT_CTXTID))
223 			return CS_ETM_PIDFMT_CTXTID;
224 	}
225 
226 	return CS_ETM_PIDFMT_NONE;
227 }
228 
229 enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
230 {
231 	return etmq->etm->pid_fmt;
232 }
233 
234 static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq,
235 					u8 trace_chan_id, u64 *cpu_metadata)
236 {
237 	/* Get an RB node for this CPU */
238 	struct int_node *inode = intlist__findnew(etmq->traceid_list, trace_chan_id);
239 
240 	/* Something went wrong, no need to continue */
241 	if (!inode)
242 		return -ENOMEM;
243 
244 	/* Disallow re-mapping a different traceID to metadata pair. */
245 	if (inode->priv) {
246 		u64 *curr_cpu_data = inode->priv;
247 		u8 curr_chan_id;
248 		int err;
249 
250 		if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) {
251 			/*
252 			 * With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs
253 			 * are expected (but not supported) in per-thread mode,
254 			 * rather than signifying an error.
255 			 */
256 			if (etmq->etm->per_thread_decoding)
257 				pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n");
258 			else
259 				pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
260 
261 			return -EINVAL;
262 		}
263 
264 		/* check that the mapped ID matches */
265 		err = cs_etm__metadata_get_trace_id(&curr_chan_id, curr_cpu_data);
266 		if (err)
267 			return err;
268 
269 		if (curr_chan_id != trace_chan_id) {
270 			pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
271 			return -EINVAL;
272 		}
273 
274 		/* Skip re-adding the same mappings if everything matched */
275 		return 0;
276 	}
277 
278 	/* Not one we've seen before, associate the traceID with the metadata pointer */
279 	inode->priv = cpu_metadata;
280 
281 	return 0;
282 }
283 
284 static struct cs_etm_queue *cs_etm__get_queue(struct cs_etm_auxtrace *etm, int cpu)
285 {
286 	if (etm->per_thread_decoding)
287 		return etm->queues.queue_array[0].priv;
288 	else
289 		return etm->queues.queue_array[cpu].priv;
290 }
291 
292 static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace *etm, u8 trace_chan_id,
293 				   u64 *cpu_metadata)
294 {
295 	struct cs_etm_queue *etmq;
296 
297 	/*
298 	 * If the queue is unformatted then only save one mapping in the
299 	 * queue associated with that CPU so only one decoder is made.
300 	 */
301 	etmq = cs_etm__get_queue(etm, cpu_metadata[CS_ETM_CPU]);
302 	if (etmq->format == UNFORMATTED)
303 		return cs_etm__insert_trace_id_node(etmq, trace_chan_id,
304 						    cpu_metadata);
305 
306 	/*
307 	 * Otherwise, version 0 trace IDs are global so save them into every
308 	 * queue.
309 	 */
310 	for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
311 		int ret;
312 
313 		etmq = etm->queues.queue_array[i].priv;
314 		ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id,
315 						   cpu_metadata);
316 		if (ret)
317 			return ret;
318 	}
319 
320 	return 0;
321 }
322 
323 static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu,
324 				       u64 hw_id)
325 {
326 	int err;
327 	u64 *cpu_data;
328 	u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
329 
330 	cpu_data = get_cpu_data(etm, cpu);
331 	if (cpu_data == NULL)
332 		return -EINVAL;
333 
334 	err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_data);
335 	if (err)
336 		return err;
337 
338 	/*
339 	 * if we are picking up the association from the packet, need to plug
340 	 * the correct trace ID into the metadata for setting up decoders later.
341 	 */
342 	return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
343 }
344 
345 static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace *etm, int cpu,
346 					 u64 hw_id)
347 {
348 	struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu);
349 	int ret;
350 	u64 *cpu_data;
351 	u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id);
352 	u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
353 
354 	/*
355 	 * Check sink id hasn't changed in per-cpu mode. In per-thread mode,
356 	 * let it pass for now until an actual overlapping trace ID is hit. In
357 	 * most cases IDs won't overlap even if the sink changes.
358 	 */
359 	if (!etmq->etm->per_thread_decoding && etmq->sink_id != SINK_UNSET &&
360 	    etmq->sink_id != sink_id) {
361 		pr_err("CS_ETM: mismatch between sink IDs\n");
362 		return -EINVAL;
363 	}
364 
365 	etmq->sink_id = sink_id;
366 
367 	/* Find which other queues use this sink and link their ID maps */
368 	for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
369 		struct cs_etm_queue *other_etmq = etm->queues.queue_array[i].priv;
370 
371 		/* Different sinks, skip */
372 		if (other_etmq->sink_id != etmq->sink_id)
373 			continue;
374 
375 		/* Already linked, skip */
376 		if (other_etmq->traceid_list == etmq->traceid_list)
377 			continue;
378 
379 		/* At the point of first linking, this one should be empty */
380 		if (!intlist__empty(etmq->traceid_list)) {
381 			pr_err("CS_ETM: Can't link populated trace ID lists\n");
382 			return -EINVAL;
383 		}
384 
385 		etmq->own_traceid_list = NULL;
386 		intlist__delete(etmq->traceid_list);
387 		etmq->traceid_list = other_etmq->traceid_list;
388 		break;
389 	}
390 
391 	cpu_data = get_cpu_data(etm, cpu);
392 	ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data);
393 	if (ret)
394 		return ret;
395 
396 	ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data);
397 	if (ret)
398 		return ret;
399 
400 	return 0;
401 }
402 
403 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
404 {
405 	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
406 
407 	switch (cs_etm_magic) {
408 	case __perf_cs_etmv3_magic:
409 		*trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
410 				      CORESIGHT_TRACE_ID_VAL_MASK);
411 		break;
412 	case __perf_cs_etmv4_magic:
413 	case __perf_cs_ete_magic:
414 		*trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
415 				      CORESIGHT_TRACE_ID_VAL_MASK);
416 		break;
417 	default:
418 		return -EINVAL;
419 	}
420 	return 0;
421 }
422 
423 /*
424  * update metadata trace ID from the value found in the AUX_HW_INFO packet.
425  */
426 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
427 {
428 	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
429 
430 	switch (cs_etm_magic) {
431 	case __perf_cs_etmv3_magic:
432 		 cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id;
433 		break;
434 	case __perf_cs_etmv4_magic:
435 	case __perf_cs_ete_magic:
436 		cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id;
437 		break;
438 
439 	default:
440 		return -EINVAL;
441 	}
442 	return 0;
443 }
444 
445 /*
446  * Get a metadata index for a specific cpu from an array.
447  *
448  */
449 static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu)
450 {
451 	int i;
452 
453 	for (i = 0; i < etm->num_cpu; i++) {
454 		if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) {
455 			return i;
456 		}
457 	}
458 
459 	return -1;
460 }
461 
462 /*
463  * Get a metadata for a specific cpu from an array.
464  *
465  */
466 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu)
467 {
468 	int idx = get_cpu_data_idx(etm, cpu);
469 
470 	return (idx != -1) ? etm->metadata[idx] : NULL;
471 }
472 
473 /*
474  * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
475  *
476  * The payload associates the Trace ID and the CPU.
477  * The routine is tolerant of seeing multiple packets with the same association,
478  * but a CPU / Trace ID association changing during a session is an error.
479  */
480 static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
481 					    union perf_event *event)
482 {
483 	struct cs_etm_auxtrace *etm;
484 	struct perf_sample sample;
485 	struct evsel *evsel;
486 	u64 hw_id;
487 	int cpu, version, err;
488 
489 	/* extract and parse the HW ID */
490 	hw_id = event->aux_output_hw_id.hw_id;
491 	version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id);
492 
493 	/* check that we can handle this version */
494 	if (version > CS_AUX_HW_ID_MAJOR_VERSION) {
495 		pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n",
496 		       version);
497 		return -EINVAL;
498 	}
499 
500 	/* get access to the etm metadata */
501 	etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
502 	if (!etm || !etm->metadata)
503 		return -EINVAL;
504 
505 	/* parse the sample to get the CPU */
506 	evsel = evlist__event2evsel(session->evlist, event);
507 	if (!evsel)
508 		return -EINVAL;
509 	perf_sample__init(&sample, /*all=*/false);
510 	err = evsel__parse_sample(evsel, event, &sample);
511 	if (err)
512 		goto out;
513 	cpu = sample.cpu;
514 	if (cpu == -1) {
515 		/* no CPU in the sample - possibly recorded with an old version of perf */
516 		pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
517 		err = -EINVAL;
518 		goto out;
519 	}
520 
521 	if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0) {
522 		err = cs_etm__process_trace_id_v0(etm, cpu, hw_id);
523 		goto out;
524 	}
525 
526 	err = cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
527 out:
528 	perf_sample__exit(&sample);
529 	return err;
530 }
531 
532 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
533 					      u8 trace_chan_id)
534 {
535 	/*
536 	 * When a timestamp packet is encountered the backend code
537 	 * is stopped so that the front end has time to process packets
538 	 * that were accumulated in the traceID queue.  Since there can
539 	 * be more than one channel per cs_etm_queue, we need to specify
540 	 * what traceID queue needs servicing.
541 	 */
542 	etmq->pending_timestamp_chan_id = trace_chan_id;
543 }
544 
545 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
546 				      u8 *trace_chan_id)
547 {
548 	struct cs_etm_packet_queue *packet_queue;
549 
550 	if (!etmq->pending_timestamp_chan_id)
551 		return 0;
552 
553 	if (trace_chan_id)
554 		*trace_chan_id = etmq->pending_timestamp_chan_id;
555 
556 	packet_queue = cs_etm__etmq_get_packet_queue(etmq,
557 						     etmq->pending_timestamp_chan_id);
558 	if (!packet_queue)
559 		return 0;
560 
561 	/* Acknowledge pending status */
562 	etmq->pending_timestamp_chan_id = 0;
563 
564 	/* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
565 	return packet_queue->cs_timestamp;
566 }
567 
568 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
569 {
570 	int i;
571 
572 	queue->head = 0;
573 	queue->tail = 0;
574 	queue->packet_count = 0;
575 	for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
576 		queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
577 		queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
578 		queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
579 		queue->packet_buffer[i].instr_count = 0;
580 		queue->packet_buffer[i].last_instr_taken_branch = false;
581 		queue->packet_buffer[i].last_instr_size = 0;
582 		queue->packet_buffer[i].last_instr_type = 0;
583 		queue->packet_buffer[i].last_instr_subtype = 0;
584 		queue->packet_buffer[i].last_instr_cond = 0;
585 		queue->packet_buffer[i].flags = 0;
586 		queue->packet_buffer[i].exception_number = UINT32_MAX;
587 		queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
588 		queue->packet_buffer[i].cpu = INT_MIN;
589 	}
590 }
591 
592 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
593 {
594 	int idx;
595 	struct int_node *inode;
596 	struct cs_etm_traceid_queue *tidq;
597 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
598 
599 	intlist__for_each_entry(inode, traceid_queues_list) {
600 		idx = (int)(intptr_t)inode->priv;
601 		tidq = etmq->traceid_queues[idx];
602 		cs_etm__clear_packet_queue(&tidq->packet_queue);
603 	}
604 }
605 
606 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
607 				      struct cs_etm_traceid_queue *tidq,
608 				      u8 trace_chan_id)
609 {
610 	int rc = -ENOMEM;
611 	struct auxtrace_queue *queue;
612 	struct cs_etm_auxtrace *etm = etmq->etm;
613 
614 	cs_etm__clear_packet_queue(&tidq->packet_queue);
615 
616 	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
617 	tidq->trace_chan_id = trace_chan_id;
618 	tidq->el = tidq->prev_packet_el = ocsd_EL_unknown;
619 	tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
620 					       queue->tid);
621 	tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host);
622 
623 	tidq->packet = zalloc(sizeof(struct cs_etm_packet));
624 	if (!tidq->packet)
625 		goto out;
626 
627 	tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
628 	if (!tidq->prev_packet)
629 		goto out_free;
630 
631 	if (etm->synth_opts.last_branch) {
632 		size_t sz = sizeof(struct branch_stack);
633 
634 		sz += etm->synth_opts.last_branch_sz *
635 		      sizeof(struct branch_entry);
636 		tidq->last_branch = zalloc(sz);
637 		if (!tidq->last_branch)
638 			goto out_free;
639 		tidq->last_branch_rb = zalloc(sz);
640 		if (!tidq->last_branch_rb)
641 			goto out_free;
642 	}
643 
644 	tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
645 	if (!tidq->event_buf)
646 		goto out_free;
647 
648 	return 0;
649 
650 out_free:
651 	zfree(&tidq->last_branch_rb);
652 	zfree(&tidq->last_branch);
653 	zfree(&tidq->prev_packet);
654 	zfree(&tidq->packet);
655 out:
656 	return rc;
657 }
658 
659 static struct cs_etm_traceid_queue
660 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
661 {
662 	int idx;
663 	struct int_node *inode;
664 	struct intlist *traceid_queues_list;
665 	struct cs_etm_traceid_queue *tidq, **traceid_queues;
666 	struct cs_etm_auxtrace *etm = etmq->etm;
667 
668 	if (etm->per_thread_decoding)
669 		trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
670 
671 	traceid_queues_list = etmq->traceid_queues_list;
672 
673 	/*
674 	 * Check if the traceid_queue exist for this traceID by looking
675 	 * in the queue list.
676 	 */
677 	inode = intlist__find(traceid_queues_list, trace_chan_id);
678 	if (inode) {
679 		idx = (int)(intptr_t)inode->priv;
680 		return etmq->traceid_queues[idx];
681 	}
682 
683 	/* We couldn't find a traceid_queue for this traceID, allocate one */
684 	tidq = malloc(sizeof(*tidq));
685 	if (!tidq)
686 		return NULL;
687 
688 	memset(tidq, 0, sizeof(*tidq));
689 
690 	/* Get a valid index for the new traceid_queue */
691 	idx = intlist__nr_entries(traceid_queues_list);
692 	/* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
693 	inode = intlist__findnew(traceid_queues_list, trace_chan_id);
694 	if (!inode)
695 		goto out_free;
696 
697 	/* Associate this traceID with this index */
698 	inode->priv = (void *)(intptr_t)idx;
699 
700 	if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
701 		goto out_free;
702 
703 	/* Grow the traceid_queues array by one unit */
704 	traceid_queues = etmq->traceid_queues;
705 	traceid_queues = reallocarray(traceid_queues,
706 				      idx + 1,
707 				      sizeof(*traceid_queues));
708 
709 	/*
710 	 * On failure reallocarray() returns NULL and the original block of
711 	 * memory is left untouched.
712 	 */
713 	if (!traceid_queues)
714 		goto out_free;
715 
716 	traceid_queues[idx] = tidq;
717 	etmq->traceid_queues = traceid_queues;
718 
719 	return etmq->traceid_queues[idx];
720 
721 out_free:
722 	/*
723 	 * Function intlist__remove() removes the inode from the list
724 	 * and delete the memory associated to it.
725 	 */
726 	intlist__remove(traceid_queues_list, inode);
727 	free(tidq);
728 
729 	return NULL;
730 }
731 
732 struct cs_etm_packet_queue
733 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
734 {
735 	struct cs_etm_traceid_queue *tidq;
736 
737 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
738 	if (tidq)
739 		return &tidq->packet_queue;
740 
741 	return NULL;
742 }
743 
744 static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
745 				struct cs_etm_traceid_queue *tidq)
746 {
747 	struct cs_etm_packet *tmp;
748 
749 	if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
750 	    etm->synth_opts.instructions) {
751 		/*
752 		 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
753 		 * the next incoming packet.
754 		 *
755 		 * Threads and exception levels are also tracked for both the
756 		 * previous and current packets. This is because the previous
757 		 * packet is used for the 'from' IP for branch samples, so the
758 		 * thread at that time must also be assigned to that sample.
759 		 * Across discontinuity packets the thread can change, so by
760 		 * tracking the thread for the previous packet the branch sample
761 		 * will have the correct info.
762 		 */
763 		tmp = tidq->packet;
764 		tidq->packet = tidq->prev_packet;
765 		tidq->prev_packet = tmp;
766 		tidq->prev_packet_el = tidq->el;
767 		thread__put(tidq->prev_packet_thread);
768 		tidq->prev_packet_thread = thread__get(tidq->thread);
769 	}
770 }
771 
772 static void cs_etm__packet_dump(const char *pkt_string, void *data)
773 {
774 	const char *color = PERF_COLOR_BLUE;
775 	int len = strlen(pkt_string);
776 	struct cs_etm_queue *etmq = data;
777 	char queue_nr[64];
778 
779 	if (verbose)
780 		snprintf(queue_nr, sizeof(queue_nr), "Qnr:%d; ", etmq->queue_nr);
781 	else
782 		queue_nr[0] = '\0';
783 
784 	if (len && (pkt_string[len-1] == '\n'))
785 		color_fprintf(stdout, color, "	%s%s", queue_nr, pkt_string);
786 	else
787 		color_fprintf(stdout, color, "	%s%s\n", queue_nr, pkt_string);
788 
789 	fflush(stdout);
790 }
791 
792 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
793 					  u64 *metadata, u32 etmidr)
794 {
795 	t_params->protocol = cs_etm__get_v7_protocol_version(etmidr);
796 	t_params->etmv3.reg_ctrl = metadata[CS_ETM_ETMCR];
797 	t_params->etmv3.reg_trc_id = metadata[CS_ETM_ETMTRACEIDR];
798 }
799 
800 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
801 					  u64 *metadata)
802 {
803 	t_params->protocol = CS_ETM_PROTO_ETMV4i;
804 	t_params->etmv4.reg_idr0 = metadata[CS_ETMV4_TRCIDR0];
805 	t_params->etmv4.reg_idr1 = metadata[CS_ETMV4_TRCIDR1];
806 	t_params->etmv4.reg_idr2 = metadata[CS_ETMV4_TRCIDR2];
807 	t_params->etmv4.reg_idr8 = metadata[CS_ETMV4_TRCIDR8];
808 	t_params->etmv4.reg_configr = metadata[CS_ETMV4_TRCCONFIGR];
809 	t_params->etmv4.reg_traceidr = metadata[CS_ETMV4_TRCTRACEIDR];
810 }
811 
812 static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
813 					u64 *metadata)
814 {
815 	t_params->protocol = CS_ETM_PROTO_ETE;
816 	t_params->ete.reg_idr0 = metadata[CS_ETE_TRCIDR0];
817 	t_params->ete.reg_idr1 = metadata[CS_ETE_TRCIDR1];
818 	t_params->ete.reg_idr2 = metadata[CS_ETE_TRCIDR2];
819 	t_params->ete.reg_idr8 = metadata[CS_ETE_TRCIDR8];
820 	t_params->ete.reg_configr = metadata[CS_ETE_TRCCONFIGR];
821 	t_params->ete.reg_traceidr = metadata[CS_ETE_TRCTRACEIDR];
822 	t_params->ete.reg_devarch = metadata[CS_ETE_TRCDEVARCH];
823 }
824 
825 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
826 				     struct cs_etm_queue *etmq)
827 {
828 	struct int_node *inode;
829 
830 	intlist__for_each_entry(inode, etmq->traceid_list) {
831 		u64 *metadata = inode->priv;
832 		u64 architecture = metadata[CS_ETM_MAGIC];
833 		u32 etmidr;
834 
835 		switch (architecture) {
836 		case __perf_cs_etmv3_magic:
837 			etmidr = metadata[CS_ETM_ETMIDR];
838 			cs_etm__set_trace_param_etmv3(t_params++, metadata, etmidr);
839 			break;
840 		case __perf_cs_etmv4_magic:
841 			cs_etm__set_trace_param_etmv4(t_params++, metadata);
842 			break;
843 		case __perf_cs_ete_magic:
844 			cs_etm__set_trace_param_ete(t_params++, metadata);
845 			break;
846 		default:
847 			return -EINVAL;
848 		}
849 	}
850 
851 	return 0;
852 }
853 
854 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
855 				       struct cs_etm_queue *etmq,
856 				       enum cs_etm_decoder_operation mode)
857 {
858 	int ret = -EINVAL;
859 
860 	if (!(mode < CS_ETM_OPERATION_MAX))
861 		goto out;
862 
863 	d_params->packet_printer = cs_etm__packet_dump;
864 	d_params->operation = mode;
865 	d_params->data = etmq;
866 	d_params->formatted = etmq->format == FORMATTED;
867 	d_params->fsyncs = false;
868 	d_params->hsyncs = false;
869 	d_params->frame_aligned = true;
870 
871 	ret = 0;
872 out:
873 	return ret;
874 }
875 
876 static void cs_etm__dump_event(struct cs_etm_queue *etmq,
877 			       struct auxtrace_buffer *buffer)
878 {
879 	int ret;
880 	const char *color = PERF_COLOR_BLUE;
881 	size_t buffer_used = 0;
882 
883 	fprintf(stdout, "\n");
884 	color_fprintf(stdout, color,
885 		     ". ... CoreSight %s Trace data: size %#zx bytes\n",
886 		     cs_etm_decoder__get_name(etmq->decoder), buffer->size);
887 
888 	do {
889 		size_t consumed;
890 
891 		ret = cs_etm_decoder__process_data_block(
892 				etmq->decoder, buffer->offset,
893 				&((u8 *)buffer->data)[buffer_used],
894 				buffer->size - buffer_used, &consumed);
895 		if (ret)
896 			break;
897 
898 		buffer_used += consumed;
899 	} while (buffer_used < buffer->size);
900 
901 	cs_etm_decoder__reset(etmq->decoder);
902 }
903 
904 static int cs_etm__flush_events(struct perf_session *session,
905 				const struct perf_tool *tool)
906 {
907 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
908 						   struct cs_etm_auxtrace,
909 						   auxtrace);
910 	if (dump_trace)
911 		return 0;
912 
913 	if (!tool->ordered_events)
914 		return -EINVAL;
915 
916 	if (etm->timeless_decoding) {
917 		/*
918 		 * Pass tid = -1 to process all queues. But likely they will have
919 		 * already been processed on PERF_RECORD_EXIT anyway.
920 		 */
921 		return cs_etm__process_timeless_queues(etm, -1);
922 	}
923 
924 	return cs_etm__process_timestamped_queues(etm);
925 }
926 
927 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
928 {
929 	int idx;
930 	uintptr_t priv;
931 	struct int_node *inode, *tmp;
932 	struct cs_etm_traceid_queue *tidq;
933 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
934 
935 	intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
936 		priv = (uintptr_t)inode->priv;
937 		idx = priv;
938 
939 		/* Free this traceid_queue from the array */
940 		tidq = etmq->traceid_queues[idx];
941 		thread__zput(tidq->thread);
942 		thread__zput(tidq->prev_packet_thread);
943 		zfree(&tidq->event_buf);
944 		zfree(&tidq->last_branch);
945 		zfree(&tidq->last_branch_rb);
946 		zfree(&tidq->prev_packet);
947 		zfree(&tidq->packet);
948 		zfree(&tidq);
949 
950 		/*
951 		 * Function intlist__remove() removes the inode from the list
952 		 * and delete the memory associated to it.
953 		 */
954 		intlist__remove(traceid_queues_list, inode);
955 	}
956 
957 	/* Then the RB tree itself */
958 	intlist__delete(traceid_queues_list);
959 	etmq->traceid_queues_list = NULL;
960 
961 	/* finally free the traceid_queues array */
962 	zfree(&etmq->traceid_queues);
963 }
964 
965 static void cs_etm__free_queue(void *priv)
966 {
967 	struct int_node *inode, *tmp;
968 	struct cs_etm_queue *etmq = priv;
969 
970 	if (!etmq)
971 		return;
972 
973 	cs_etm_decoder__free(etmq->decoder);
974 	cs_etm__free_traceid_queues(etmq);
975 
976 	if (etmq->own_traceid_list) {
977 		/* First remove all traceID/metadata nodes for the RB tree */
978 		intlist__for_each_entry_safe(inode, tmp, etmq->own_traceid_list)
979 			intlist__remove(etmq->own_traceid_list, inode);
980 
981 		/* Then the RB tree itself */
982 		intlist__delete(etmq->own_traceid_list);
983 	}
984 
985 	free(etmq);
986 }
987 
988 static void cs_etm__free_events(struct perf_session *session)
989 {
990 	unsigned int i;
991 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
992 						   struct cs_etm_auxtrace,
993 						   auxtrace);
994 	struct auxtrace_queues *queues = &aux->queues;
995 
996 	for (i = 0; i < queues->nr_queues; i++) {
997 		cs_etm__free_queue(queues->queue_array[i].priv);
998 		queues->queue_array[i].priv = NULL;
999 	}
1000 
1001 	auxtrace_queues__free(queues);
1002 }
1003 
1004 static void cs_etm__free(struct perf_session *session)
1005 {
1006 	int i;
1007 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
1008 						   struct cs_etm_auxtrace,
1009 						   auxtrace);
1010 	cs_etm__free_events(session);
1011 	session->auxtrace = NULL;
1012 
1013 	for (i = 0; i < aux->num_cpu; i++)
1014 		zfree(&aux->metadata[i]);
1015 
1016 	zfree(&aux->metadata);
1017 	zfree(&aux);
1018 }
1019 
1020 static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
1021 				      struct evsel *evsel)
1022 {
1023 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
1024 						   struct cs_etm_auxtrace,
1025 						   auxtrace);
1026 
1027 	return evsel->core.attr.type == aux->pmu_type;
1028 }
1029 
1030 static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq,
1031 					   ocsd_ex_level el)
1032 {
1033 	enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq);
1034 
1035 	/*
1036 	 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels
1037 	 * running at EL1 assume everything is the host.
1038 	 */
1039 	if (pid_fmt == CS_ETM_PIDFMT_CTXTID)
1040 		return &etmq->etm->session->machines.host;
1041 
1042 	/*
1043 	 * Not perfect, but otherwise assume anything in EL1 is the default
1044 	 * guest, and everything else is the host. Distinguishing between guest
1045 	 * and host userspaces isn't currently supported either. Neither is
1046 	 * multiple guest support. All this does is reduce the likeliness of
1047 	 * decode errors where we look into the host kernel maps when it should
1048 	 * have been the guest maps.
1049 	 */
1050 	switch (el) {
1051 	case ocsd_EL1:
1052 		return machines__find_guest(&etmq->etm->session->machines,
1053 					    DEFAULT_GUEST_KERNEL_ID);
1054 	case ocsd_EL3:
1055 	case ocsd_EL2:
1056 	case ocsd_EL0:
1057 	case ocsd_EL_unknown:
1058 	default:
1059 		return &etmq->etm->session->machines.host;
1060 	}
1061 }
1062 
1063 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address,
1064 			   ocsd_ex_level el)
1065 {
1066 	struct machine *machine = cs_etm__get_machine(etmq, el);
1067 
1068 	if (address >= machine__kernel_start(machine)) {
1069 		if (machine__is_host(machine))
1070 			return PERF_RECORD_MISC_KERNEL;
1071 		else
1072 			return PERF_RECORD_MISC_GUEST_KERNEL;
1073 	} else {
1074 		if (machine__is_host(machine))
1075 			return PERF_RECORD_MISC_USER;
1076 		else {
1077 			/*
1078 			 * Can't really happen at the moment because
1079 			 * cs_etm__get_machine() will always return
1080 			 * machines.host for any non EL1 trace.
1081 			 */
1082 			return PERF_RECORD_MISC_GUEST_USER;
1083 		}
1084 	}
1085 }
1086 
1087 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
1088 			      u64 address, size_t size, u8 *buffer,
1089 			      const ocsd_mem_space_acc_t mem_space)
1090 {
1091 	u8  cpumode;
1092 	u64 offset;
1093 	int len;
1094 	struct addr_location al;
1095 	struct dso *dso;
1096 	struct cs_etm_traceid_queue *tidq;
1097 	int ret = 0;
1098 
1099 	if (!etmq)
1100 		return 0;
1101 
1102 	addr_location__init(&al);
1103 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1104 	if (!tidq)
1105 		goto out;
1106 
1107 	/*
1108 	 * We've already tracked EL along side the PID in cs_etm__set_thread()
1109 	 * so double check that it matches what OpenCSD thinks as well. It
1110 	 * doesn't distinguish between EL0 and EL1 for this mem access callback
1111 	 * so we had to do the extra tracking. Skip validation if it's any of
1112 	 * the 'any' values.
1113 	 */
1114 	if (!(mem_space == OCSD_MEM_SPACE_ANY ||
1115 	      mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) {
1116 		if (mem_space & OCSD_MEM_SPACE_EL1N) {
1117 			/* Includes both non secure EL1 and EL0 */
1118 			assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0);
1119 		} else if (mem_space & OCSD_MEM_SPACE_EL2)
1120 			assert(tidq->el == ocsd_EL2);
1121 		else if (mem_space & OCSD_MEM_SPACE_EL3)
1122 			assert(tidq->el == ocsd_EL3);
1123 	}
1124 
1125 	cpumode = cs_etm__cpu_mode(etmq, address, tidq->el);
1126 
1127 	if (!thread__find_map(tidq->thread, cpumode, address, &al))
1128 		goto out;
1129 
1130 	dso = map__dso(al.map);
1131 	if (!dso)
1132 		goto out;
1133 
1134 	if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR &&
1135 	    dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE))
1136 		goto out;
1137 
1138 	offset = map__map_ip(al.map, address);
1139 
1140 	map__load(al.map);
1141 
1142 	len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)),
1143 				    offset, buffer, size);
1144 
1145 	if (len <= 0) {
1146 		ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
1147 				 "              Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
1148 		if (!dso__auxtrace_warned(dso)) {
1149 			pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
1150 				address,
1151 				dso__long_name(dso) ? dso__long_name(dso) : "Unknown");
1152 			dso__set_auxtrace_warned(dso);
1153 		}
1154 		goto out;
1155 	}
1156 	ret = len;
1157 out:
1158 	addr_location__exit(&al);
1159 	return ret;
1160 }
1161 
1162 static struct cs_etm_queue *cs_etm__alloc_queue(void)
1163 {
1164 	struct cs_etm_queue *etmq = zalloc(sizeof(*etmq));
1165 	if (!etmq)
1166 		return NULL;
1167 
1168 	etmq->traceid_queues_list = intlist__new(NULL);
1169 	if (!etmq->traceid_queues_list)
1170 		goto out_free;
1171 
1172 	/*
1173 	 * Create an RB tree for traceID-metadata tuple.  Since the conversion
1174 	 * has to be made for each packet that gets decoded, optimizing access
1175 	 * in anything other than a sequential array is worth doing.
1176 	 */
1177 	etmq->traceid_list = etmq->own_traceid_list = intlist__new(NULL);
1178 	if (!etmq->traceid_list)
1179 		goto out_free;
1180 
1181 	return etmq;
1182 
1183 out_free:
1184 	intlist__delete(etmq->traceid_queues_list);
1185 	free(etmq);
1186 
1187 	return NULL;
1188 }
1189 
1190 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
1191 			       struct auxtrace_queue *queue,
1192 			       unsigned int queue_nr)
1193 {
1194 	struct cs_etm_queue *etmq = queue->priv;
1195 
1196 	if (etmq)
1197 		return 0;
1198 
1199 	etmq = cs_etm__alloc_queue();
1200 
1201 	if (!etmq)
1202 		return -ENOMEM;
1203 
1204 	queue->priv = etmq;
1205 	etmq->etm = etm;
1206 	etmq->queue_nr = queue_nr;
1207 	queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */
1208 	etmq->offset = 0;
1209 	etmq->sink_id = SINK_UNSET;
1210 
1211 	return 0;
1212 }
1213 
1214 static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
1215 					    struct cs_etm_queue *etmq,
1216 					    unsigned int queue_nr)
1217 {
1218 	int ret = 0;
1219 	unsigned int cs_queue_nr;
1220 	u8 trace_chan_id;
1221 	u64 cs_timestamp;
1222 
1223 	/*
1224 	 * We are under a CPU-wide trace scenario.  As such we need to know
1225 	 * when the code that generated the traces started to execute so that
1226 	 * it can be correlated with execution on other CPUs.  So we get a
1227 	 * handle on the beginning of traces and decode until we find a
1228 	 * timestamp.  The timestamp is then added to the auxtrace min heap
1229 	 * in order to know what nibble (of all the etmqs) to decode first.
1230 	 */
1231 	while (1) {
1232 		/*
1233 		 * Fetch an aux_buffer from this etmq.  Bail if no more
1234 		 * blocks or an error has been encountered.
1235 		 */
1236 		ret = cs_etm__get_data_block(etmq);
1237 		if (ret <= 0)
1238 			goto out;
1239 
1240 		/*
1241 		 * Run decoder on the trace block.  The decoder will stop when
1242 		 * encountering a CS timestamp, a full packet queue or the end of
1243 		 * trace for that block.
1244 		 */
1245 		ret = cs_etm__decode_data_block(etmq);
1246 		if (ret)
1247 			goto out;
1248 
1249 		/*
1250 		 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
1251 		 * the timestamp calculation for us.
1252 		 */
1253 		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
1254 
1255 		/* We found a timestamp, no need to continue. */
1256 		if (cs_timestamp)
1257 			break;
1258 
1259 		/*
1260 		 * We didn't find a timestamp so empty all the traceid packet
1261 		 * queues before looking for another timestamp packet, either
1262 		 * in the current data block or a new one.  Packets that were
1263 		 * just decoded are useless since no timestamp has been
1264 		 * associated with them.  As such simply discard them.
1265 		 */
1266 		cs_etm__clear_all_packet_queues(etmq);
1267 	}
1268 
1269 	/*
1270 	 * We have a timestamp.  Add it to the min heap to reflect when
1271 	 * instructions conveyed by the range packets of this traceID queue
1272 	 * started to execute.  Once the same has been done for all the traceID
1273 	 * queues of each etmq, redenring and decoding can start in
1274 	 * chronological order.
1275 	 *
1276 	 * Note that packets decoded above are still in the traceID's packet
1277 	 * queue and will be processed in cs_etm__process_timestamped_queues().
1278 	 */
1279 	cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
1280 	ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
1281 out:
1282 	return ret;
1283 }
1284 
1285 static inline
1286 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
1287 				 struct cs_etm_traceid_queue *tidq)
1288 {
1289 	struct branch_stack *bs_src = tidq->last_branch_rb;
1290 	struct branch_stack *bs_dst = tidq->last_branch;
1291 	size_t nr = 0;
1292 
1293 	/*
1294 	 * Set the number of records before early exit: ->nr is used to
1295 	 * determine how many branches to copy from ->entries.
1296 	 */
1297 	bs_dst->nr = bs_src->nr;
1298 
1299 	/*
1300 	 * Early exit when there is nothing to copy.
1301 	 */
1302 	if (!bs_src->nr)
1303 		return;
1304 
1305 	/*
1306 	 * As bs_src->entries is a circular buffer, we need to copy from it in
1307 	 * two steps.  First, copy the branches from the most recently inserted
1308 	 * branch ->last_branch_pos until the end of bs_src->entries buffer.
1309 	 */
1310 	nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
1311 	memcpy(&bs_dst->entries[0],
1312 	       &bs_src->entries[tidq->last_branch_pos],
1313 	       sizeof(struct branch_entry) * nr);
1314 
1315 	/*
1316 	 * If we wrapped around at least once, the branches from the beginning
1317 	 * of the bs_src->entries buffer and until the ->last_branch_pos element
1318 	 * are older valid branches: copy them over.  The total number of
1319 	 * branches copied over will be equal to the number of branches asked by
1320 	 * the user in last_branch_sz.
1321 	 */
1322 	if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
1323 		memcpy(&bs_dst->entries[nr],
1324 		       &bs_src->entries[0],
1325 		       sizeof(struct branch_entry) * tidq->last_branch_pos);
1326 	}
1327 }
1328 
1329 static inline
1330 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
1331 {
1332 	tidq->last_branch_pos = 0;
1333 	tidq->last_branch_rb->nr = 0;
1334 }
1335 
1336 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
1337 					 u8 trace_chan_id, u64 addr)
1338 {
1339 	u8 instrBytes[2];
1340 
1341 	cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes),
1342 			   instrBytes, 0);
1343 	/*
1344 	 * T32 instruction size is indicated by bits[15:11] of the first
1345 	 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
1346 	 * denote a 32-bit instruction.
1347 	 */
1348 	return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
1349 }
1350 
1351 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
1352 {
1353 	/*
1354 	 * Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't
1355 	 * appear in samples.
1356 	 */
1357 	if (packet->sample_type == CS_ETM_DISCONTINUITY ||
1358 	    packet->sample_type == CS_ETM_EXCEPTION)
1359 		return 0;
1360 
1361 	return packet->start_addr;
1362 }
1363 
1364 static inline
1365 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
1366 {
1367 	/* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1368 	if (packet->sample_type == CS_ETM_DISCONTINUITY)
1369 		return 0;
1370 
1371 	return packet->end_addr - packet->last_instr_size;
1372 }
1373 
1374 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
1375 				     u64 trace_chan_id,
1376 				     const struct cs_etm_packet *packet,
1377 				     u64 offset)
1378 {
1379 	if (packet->isa == CS_ETM_ISA_T32) {
1380 		u64 addr = packet->start_addr;
1381 
1382 		while (offset) {
1383 			addr += cs_etm__t32_instr_size(etmq,
1384 						       trace_chan_id, addr);
1385 			offset--;
1386 		}
1387 		return addr;
1388 	}
1389 
1390 	/* Assume a 4 byte instruction size (A32/A64) */
1391 	return packet->start_addr + offset * 4;
1392 }
1393 
1394 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
1395 					  struct cs_etm_traceid_queue *tidq)
1396 {
1397 	struct branch_stack *bs = tidq->last_branch_rb;
1398 	struct branch_entry *be;
1399 
1400 	/*
1401 	 * The branches are recorded in a circular buffer in reverse
1402 	 * chronological order: we start recording from the last element of the
1403 	 * buffer down.  After writing the first element of the stack, move the
1404 	 * insert position back to the end of the buffer.
1405 	 */
1406 	if (!tidq->last_branch_pos)
1407 		tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
1408 
1409 	tidq->last_branch_pos -= 1;
1410 
1411 	be       = &bs->entries[tidq->last_branch_pos];
1412 	be->from = cs_etm__last_executed_instr(tidq->prev_packet);
1413 	be->to	 = cs_etm__first_executed_instr(tidq->packet);
1414 	/* No support for mispredict */
1415 	be->flags.mispred = 0;
1416 	be->flags.predicted = 1;
1417 
1418 	/*
1419 	 * Increment bs->nr until reaching the number of last branches asked by
1420 	 * the user on the command line.
1421 	 */
1422 	if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
1423 		bs->nr += 1;
1424 }
1425 
1426 static int cs_etm__inject_event(union perf_event *event,
1427 			       struct perf_sample *sample, u64 type)
1428 {
1429 	event->header.size = perf_event__sample_event_size(sample, type, 0);
1430 	return perf_event__synthesize_sample(event, type, 0, sample);
1431 }
1432 
1433 
1434 static int
1435 cs_etm__get_trace(struct cs_etm_queue *etmq)
1436 {
1437 	struct auxtrace_buffer *aux_buffer = etmq->buffer;
1438 	struct auxtrace_buffer *old_buffer = aux_buffer;
1439 	struct auxtrace_queue *queue;
1440 
1441 	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
1442 
1443 	aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
1444 
1445 	/* If no more data, drop the previous auxtrace_buffer and return */
1446 	if (!aux_buffer) {
1447 		if (old_buffer)
1448 			auxtrace_buffer__drop_data(old_buffer);
1449 		etmq->buf_len = 0;
1450 		return 0;
1451 	}
1452 
1453 	etmq->buffer = aux_buffer;
1454 
1455 	/* If the aux_buffer doesn't have data associated, try to load it */
1456 	if (!aux_buffer->data) {
1457 		/* get the file desc associated with the perf data file */
1458 		int fd = perf_data__fd(etmq->etm->session->data);
1459 
1460 		aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
1461 		if (!aux_buffer->data)
1462 			return -ENOMEM;
1463 	}
1464 
1465 	/* If valid, drop the previous buffer */
1466 	if (old_buffer)
1467 		auxtrace_buffer__drop_data(old_buffer);
1468 
1469 	etmq->buf_used = 0;
1470 	etmq->buf_len = aux_buffer->size;
1471 	etmq->buf = aux_buffer->data;
1472 
1473 	return etmq->buf_len;
1474 }
1475 
1476 static void cs_etm__set_thread(struct cs_etm_queue *etmq,
1477 			       struct cs_etm_traceid_queue *tidq, pid_t tid,
1478 			       ocsd_ex_level el)
1479 {
1480 	struct machine *machine = cs_etm__get_machine(etmq, el);
1481 
1482 	if (tid != -1) {
1483 		thread__zput(tidq->thread);
1484 		tidq->thread = machine__find_thread(machine, -1, tid);
1485 	}
1486 
1487 	/* Couldn't find a known thread */
1488 	if (!tidq->thread)
1489 		tidq->thread = machine__idle_thread(machine);
1490 
1491 	tidq->el = el;
1492 }
1493 
1494 int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
1495 			    u8 trace_chan_id, ocsd_ex_level el)
1496 {
1497 	struct cs_etm_traceid_queue *tidq;
1498 
1499 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1500 	if (!tidq)
1501 		return -EINVAL;
1502 
1503 	cs_etm__set_thread(etmq, tidq, tid, el);
1504 	return 0;
1505 }
1506 
1507 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
1508 {
1509 	return !!etmq->etm->timeless_decoding;
1510 }
1511 
1512 static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
1513 			      u64 trace_chan_id,
1514 			      const struct cs_etm_packet *packet,
1515 			      struct perf_sample *sample)
1516 {
1517 	/*
1518 	 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY
1519 	 * packet, so directly bail out with 'insn_len' = 0.
1520 	 */
1521 	if (packet->sample_type == CS_ETM_DISCONTINUITY) {
1522 		sample->insn_len = 0;
1523 		return;
1524 	}
1525 
1526 	/*
1527 	 * T32 instruction size might be 32-bit or 16-bit, decide by calling
1528 	 * cs_etm__t32_instr_size().
1529 	 */
1530 	if (packet->isa == CS_ETM_ISA_T32)
1531 		sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id,
1532 							  sample->ip);
1533 	/* Otherwise, A64 and A32 instruction size are always 32-bit. */
1534 	else
1535 		sample->insn_len = 4;
1536 
1537 	cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len,
1538 			   (void *)sample->insn, 0);
1539 }
1540 
1541 u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
1542 {
1543 	struct cs_etm_auxtrace *etm = etmq->etm;
1544 
1545 	if (etm->has_virtual_ts)
1546 		return tsc_to_perf_time(cs_timestamp, &etm->tc);
1547 	else
1548 		return cs_timestamp;
1549 }
1550 
1551 static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
1552 					       struct cs_etm_traceid_queue *tidq)
1553 {
1554 	struct cs_etm_auxtrace *etm = etmq->etm;
1555 	struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
1556 
1557 	if (!etm->timeless_decoding && etm->has_virtual_ts)
1558 		return packet_queue->cs_timestamp;
1559 	else
1560 		return etm->latest_kernel_timestamp;
1561 }
1562 
1563 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
1564 					    struct cs_etm_traceid_queue *tidq,
1565 					    u64 addr, u64 period)
1566 {
1567 	int ret = 0;
1568 	struct cs_etm_auxtrace *etm = etmq->etm;
1569 	union perf_event *event = tidq->event_buf;
1570 	struct perf_sample sample;
1571 
1572 	perf_sample__init(&sample, /*all=*/true);
1573 	event->sample.header.type = PERF_RECORD_SAMPLE;
1574 	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
1575 	event->sample.header.size = sizeof(struct perf_event_header);
1576 
1577 	/* Set time field based on etm auxtrace config. */
1578 	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1579 
1580 	sample.ip = addr;
1581 	sample.pid = thread__pid(tidq->thread);
1582 	sample.tid = thread__tid(tidq->thread);
1583 	sample.id = etmq->etm->instructions_id;
1584 	sample.stream_id = etmq->etm->instructions_id;
1585 	sample.period = period;
1586 	sample.cpu = tidq->packet->cpu;
1587 	sample.flags = tidq->prev_packet->flags;
1588 	sample.cpumode = event->sample.header.misc;
1589 
1590 	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
1591 
1592 	if (etm->synth_opts.last_branch)
1593 		sample.branch_stack = tidq->last_branch;
1594 
1595 	if (etm->synth_opts.inject) {
1596 		ret = cs_etm__inject_event(event, &sample,
1597 					   etm->instructions_sample_type);
1598 		if (ret)
1599 			return ret;
1600 	}
1601 
1602 	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1603 
1604 	if (ret)
1605 		pr_err(
1606 			"CS ETM Trace: failed to deliver instruction event, error %d\n",
1607 			ret);
1608 
1609 	perf_sample__exit(&sample);
1610 	return ret;
1611 }
1612 
1613 /*
1614  * The cs etm packet encodes an instruction range between a branch target
1615  * and the next taken branch. Generate sample accordingly.
1616  */
1617 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
1618 				       struct cs_etm_traceid_queue *tidq)
1619 {
1620 	int ret = 0;
1621 	struct cs_etm_auxtrace *etm = etmq->etm;
1622 	struct perf_sample sample = {.ip = 0,};
1623 	union perf_event *event = tidq->event_buf;
1624 	struct dummy_branch_stack {
1625 		u64			nr;
1626 		u64			hw_idx;
1627 		struct branch_entry	entries;
1628 	} dummy_bs;
1629 	u64 ip;
1630 
1631 	ip = cs_etm__last_executed_instr(tidq->prev_packet);
1632 
1633 	event->sample.header.type = PERF_RECORD_SAMPLE;
1634 	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip,
1635 						     tidq->prev_packet_el);
1636 	event->sample.header.size = sizeof(struct perf_event_header);
1637 
1638 	/* Set time field based on etm auxtrace config. */
1639 	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1640 
1641 	sample.ip = ip;
1642 	sample.pid = thread__pid(tidq->prev_packet_thread);
1643 	sample.tid = thread__tid(tidq->prev_packet_thread);
1644 	sample.addr = cs_etm__first_executed_instr(tidq->packet);
1645 	sample.id = etmq->etm->branches_id;
1646 	sample.stream_id = etmq->etm->branches_id;
1647 	sample.period = 1;
1648 	sample.cpu = tidq->packet->cpu;
1649 	sample.flags = tidq->prev_packet->flags;
1650 	sample.cpumode = event->sample.header.misc;
1651 
1652 	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet,
1653 			  &sample);
1654 
1655 	/*
1656 	 * perf report cannot handle events without a branch stack
1657 	 */
1658 	if (etm->synth_opts.last_branch) {
1659 		dummy_bs = (struct dummy_branch_stack){
1660 			.nr = 1,
1661 			.hw_idx = -1ULL,
1662 			.entries = {
1663 				.from = sample.ip,
1664 				.to = sample.addr,
1665 			},
1666 		};
1667 		sample.branch_stack = (struct branch_stack *)&dummy_bs;
1668 	}
1669 
1670 	if (etm->synth_opts.inject) {
1671 		ret = cs_etm__inject_event(event, &sample,
1672 					   etm->branches_sample_type);
1673 		if (ret)
1674 			return ret;
1675 	}
1676 
1677 	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1678 
1679 	if (ret)
1680 		pr_err(
1681 		"CS ETM Trace: failed to deliver instruction event, error %d\n",
1682 		ret);
1683 
1684 	return ret;
1685 }
1686 
1687 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
1688 				struct perf_session *session)
1689 {
1690 	struct evlist *evlist = session->evlist;
1691 	struct evsel *evsel;
1692 	struct perf_event_attr attr;
1693 	bool found = false;
1694 	u64 id;
1695 	int err;
1696 
1697 	evlist__for_each_entry(evlist, evsel) {
1698 		if (evsel->core.attr.type == etm->pmu_type) {
1699 			found = true;
1700 			break;
1701 		}
1702 	}
1703 
1704 	if (!found) {
1705 		pr_debug("No selected events with CoreSight Trace data\n");
1706 		return 0;
1707 	}
1708 
1709 	memset(&attr, 0, sizeof(struct perf_event_attr));
1710 	attr.size = sizeof(struct perf_event_attr);
1711 	attr.type = PERF_TYPE_HARDWARE;
1712 	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
1713 	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1714 			    PERF_SAMPLE_PERIOD;
1715 	if (etm->timeless_decoding)
1716 		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1717 	else
1718 		attr.sample_type |= PERF_SAMPLE_TIME;
1719 
1720 	attr.exclude_user = evsel->core.attr.exclude_user;
1721 	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1722 	attr.exclude_hv = evsel->core.attr.exclude_hv;
1723 	attr.exclude_host = evsel->core.attr.exclude_host;
1724 	attr.exclude_guest = evsel->core.attr.exclude_guest;
1725 	attr.sample_id_all = evsel->core.attr.sample_id_all;
1726 	attr.read_format = evsel->core.attr.read_format;
1727 
1728 	/* create new id val to be a fixed offset from evsel id */
1729 	id = evsel->core.id[0] + 1000000000;
1730 
1731 	if (!id)
1732 		id = 1;
1733 
1734 	if (etm->synth_opts.branches) {
1735 		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1736 		attr.sample_period = 1;
1737 		attr.sample_type |= PERF_SAMPLE_ADDR;
1738 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1739 		if (err)
1740 			return err;
1741 		etm->branches_sample_type = attr.sample_type;
1742 		etm->branches_id = id;
1743 		id += 1;
1744 		attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
1745 	}
1746 
1747 	if (etm->synth_opts.last_branch) {
1748 		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1749 		/*
1750 		 * We don't use the hardware index, but the sample generation
1751 		 * code uses the new format branch_stack with this field,
1752 		 * so the event attributes must indicate that it's present.
1753 		 */
1754 		attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
1755 	}
1756 
1757 	if (etm->synth_opts.instructions) {
1758 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1759 		attr.sample_period = etm->synth_opts.period;
1760 		etm->instructions_sample_period = attr.sample_period;
1761 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1762 		if (err)
1763 			return err;
1764 		etm->instructions_sample_type = attr.sample_type;
1765 		etm->instructions_id = id;
1766 		id += 1;
1767 	}
1768 
1769 	return 0;
1770 }
1771 
1772 static int cs_etm__sample(struct cs_etm_queue *etmq,
1773 			  struct cs_etm_traceid_queue *tidq)
1774 {
1775 	struct cs_etm_auxtrace *etm = etmq->etm;
1776 	int ret;
1777 	u8 trace_chan_id = tidq->trace_chan_id;
1778 	u64 instrs_prev;
1779 
1780 	/* Get instructions remainder from previous packet */
1781 	instrs_prev = tidq->period_instructions;
1782 
1783 	tidq->period_instructions += tidq->packet->instr_count;
1784 
1785 	/*
1786 	 * Record a branch when the last instruction in
1787 	 * PREV_PACKET is a branch.
1788 	 */
1789 	if (etm->synth_opts.last_branch &&
1790 	    tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1791 	    tidq->prev_packet->last_instr_taken_branch)
1792 		cs_etm__update_last_branch_rb(etmq, tidq);
1793 
1794 	if (etm->synth_opts.instructions &&
1795 	    tidq->period_instructions >= etm->instructions_sample_period) {
1796 		/*
1797 		 * Emit instruction sample periodically
1798 		 * TODO: allow period to be defined in cycles and clock time
1799 		 */
1800 
1801 		/*
1802 		 * Below diagram demonstrates the instruction samples
1803 		 * generation flows:
1804 		 *
1805 		 *    Instrs     Instrs       Instrs       Instrs
1806 		 *   Sample(n)  Sample(n+1)  Sample(n+2)  Sample(n+3)
1807 		 *    |            |            |            |
1808 		 *    V            V            V            V
1809 		 *   --------------------------------------------------
1810 		 *            ^                                  ^
1811 		 *            |                                  |
1812 		 *         Period                             Period
1813 		 *    instructions(Pi)                   instructions(Pi')
1814 		 *
1815 		 *            |                                  |
1816 		 *            \---------------- -----------------/
1817 		 *                             V
1818 		 *                 tidq->packet->instr_count
1819 		 *
1820 		 * Instrs Sample(n...) are the synthesised samples occurring
1821 		 * every etm->instructions_sample_period instructions - as
1822 		 * defined on the perf command line.  Sample(n) is being the
1823 		 * last sample before the current etm packet, n+1 to n+3
1824 		 * samples are generated from the current etm packet.
1825 		 *
1826 		 * tidq->packet->instr_count represents the number of
1827 		 * instructions in the current etm packet.
1828 		 *
1829 		 * Period instructions (Pi) contains the number of
1830 		 * instructions executed after the sample point(n) from the
1831 		 * previous etm packet.  This will always be less than
1832 		 * etm->instructions_sample_period.
1833 		 *
1834 		 * When generate new samples, it combines with two parts
1835 		 * instructions, one is the tail of the old packet and another
1836 		 * is the head of the new coming packet, to generate
1837 		 * sample(n+1); sample(n+2) and sample(n+3) consume the
1838 		 * instructions with sample period.  After sample(n+3), the rest
1839 		 * instructions will be used by later packet and it is assigned
1840 		 * to tidq->period_instructions for next round calculation.
1841 		 */
1842 
1843 		/*
1844 		 * Get the initial offset into the current packet instructions;
1845 		 * entry conditions ensure that instrs_prev is less than
1846 		 * etm->instructions_sample_period.
1847 		 */
1848 		u64 offset = etm->instructions_sample_period - instrs_prev;
1849 		u64 addr;
1850 
1851 		/* Prepare last branches for instruction sample */
1852 		if (etm->synth_opts.last_branch)
1853 			cs_etm__copy_last_branch_rb(etmq, tidq);
1854 
1855 		while (tidq->period_instructions >=
1856 				etm->instructions_sample_period) {
1857 			/*
1858 			 * Calculate the address of the sampled instruction (-1
1859 			 * as sample is reported as though instruction has just
1860 			 * been executed, but PC has not advanced to next
1861 			 * instruction)
1862 			 */
1863 			addr = cs_etm__instr_addr(etmq, trace_chan_id,
1864 						  tidq->packet, offset - 1);
1865 			ret = cs_etm__synth_instruction_sample(
1866 				etmq, tidq, addr,
1867 				etm->instructions_sample_period);
1868 			if (ret)
1869 				return ret;
1870 
1871 			offset += etm->instructions_sample_period;
1872 			tidq->period_instructions -=
1873 				etm->instructions_sample_period;
1874 		}
1875 	}
1876 
1877 	if (etm->synth_opts.branches) {
1878 		bool generate_sample = false;
1879 
1880 		/* Generate sample for tracing on packet */
1881 		if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1882 			generate_sample = true;
1883 
1884 		/* Generate sample for branch taken packet */
1885 		if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1886 		    tidq->prev_packet->last_instr_taken_branch)
1887 			generate_sample = true;
1888 
1889 		if (generate_sample) {
1890 			ret = cs_etm__synth_branch_sample(etmq, tidq);
1891 			if (ret)
1892 				return ret;
1893 		}
1894 	}
1895 
1896 	cs_etm__packet_swap(etm, tidq);
1897 
1898 	return 0;
1899 }
1900 
1901 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
1902 {
1903 	/*
1904 	 * When the exception packet is inserted, whether the last instruction
1905 	 * in previous range packet is taken branch or not, we need to force
1906 	 * to set 'prev_packet->last_instr_taken_branch' to true.  This ensures
1907 	 * to generate branch sample for the instruction range before the
1908 	 * exception is trapped to kernel or before the exception returning.
1909 	 *
1910 	 * The exception packet includes the dummy address values, so don't
1911 	 * swap PACKET with PREV_PACKET.  This keeps PREV_PACKET to be useful
1912 	 * for generating instruction and branch samples.
1913 	 */
1914 	if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
1915 		tidq->prev_packet->last_instr_taken_branch = true;
1916 
1917 	return 0;
1918 }
1919 
1920 static int cs_etm__flush(struct cs_etm_queue *etmq,
1921 			 struct cs_etm_traceid_queue *tidq)
1922 {
1923 	int err = 0;
1924 	struct cs_etm_auxtrace *etm = etmq->etm;
1925 
1926 	/* Handle start tracing packet */
1927 	if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
1928 		goto swap_packet;
1929 
1930 	if (etmq->etm->synth_opts.last_branch &&
1931 	    etmq->etm->synth_opts.instructions &&
1932 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1933 		u64 addr;
1934 
1935 		/* Prepare last branches for instruction sample */
1936 		cs_etm__copy_last_branch_rb(etmq, tidq);
1937 
1938 		/*
1939 		 * Generate a last branch event for the branches left in the
1940 		 * circular buffer at the end of the trace.
1941 		 *
1942 		 * Use the address of the end of the last reported execution
1943 		 * range
1944 		 */
1945 		addr = cs_etm__last_executed_instr(tidq->prev_packet);
1946 
1947 		err = cs_etm__synth_instruction_sample(
1948 			etmq, tidq, addr,
1949 			tidq->period_instructions);
1950 		if (err)
1951 			return err;
1952 
1953 		tidq->period_instructions = 0;
1954 
1955 	}
1956 
1957 	if (etm->synth_opts.branches &&
1958 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1959 		err = cs_etm__synth_branch_sample(etmq, tidq);
1960 		if (err)
1961 			return err;
1962 	}
1963 
1964 swap_packet:
1965 	cs_etm__packet_swap(etm, tidq);
1966 
1967 	/* Reset last branches after flush the trace */
1968 	if (etm->synth_opts.last_branch)
1969 		cs_etm__reset_last_branch_rb(tidq);
1970 
1971 	return err;
1972 }
1973 
1974 static int cs_etm__end_block(struct cs_etm_queue *etmq,
1975 			     struct cs_etm_traceid_queue *tidq)
1976 {
1977 	int err;
1978 
1979 	/*
1980 	 * It has no new packet coming and 'etmq->packet' contains the stale
1981 	 * packet which was set at the previous time with packets swapping;
1982 	 * so skip to generate branch sample to avoid stale packet.
1983 	 *
1984 	 * For this case only flush branch stack and generate a last branch
1985 	 * event for the branches left in the circular buffer at the end of
1986 	 * the trace.
1987 	 */
1988 	if (etmq->etm->synth_opts.last_branch &&
1989 	    etmq->etm->synth_opts.instructions &&
1990 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1991 		u64 addr;
1992 
1993 		/* Prepare last branches for instruction sample */
1994 		cs_etm__copy_last_branch_rb(etmq, tidq);
1995 
1996 		/*
1997 		 * Use the address of the end of the last reported execution
1998 		 * range.
1999 		 */
2000 		addr = cs_etm__last_executed_instr(tidq->prev_packet);
2001 
2002 		err = cs_etm__synth_instruction_sample(
2003 			etmq, tidq, addr,
2004 			tidq->period_instructions);
2005 		if (err)
2006 			return err;
2007 
2008 		tidq->period_instructions = 0;
2009 	}
2010 
2011 	return 0;
2012 }
2013 /*
2014  * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
2015  *			   if need be.
2016  * Returns:	< 0	if error
2017  *		= 0	if no more auxtrace_buffer to read
2018  *		> 0	if the current buffer isn't empty yet
2019  */
2020 static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
2021 {
2022 	int ret;
2023 
2024 	if (!etmq->buf_len) {
2025 		ret = cs_etm__get_trace(etmq);
2026 		if (ret <= 0)
2027 			return ret;
2028 		/*
2029 		 * We cannot assume consecutive blocks in the data file
2030 		 * are contiguous, reset the decoder to force re-sync.
2031 		 */
2032 		ret = cs_etm_decoder__reset(etmq->decoder);
2033 		if (ret)
2034 			return ret;
2035 	}
2036 
2037 	return etmq->buf_len;
2038 }
2039 
2040 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
2041 				 struct cs_etm_packet *packet,
2042 				 u64 end_addr)
2043 {
2044 	/* Initialise to keep compiler happy */
2045 	u16 instr16 = 0;
2046 	u32 instr32 = 0;
2047 	u64 addr;
2048 
2049 	switch (packet->isa) {
2050 	case CS_ETM_ISA_T32:
2051 		/*
2052 		 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
2053 		 *
2054 		 *  b'15         b'8
2055 		 * +-----------------+--------+
2056 		 * | 1 1 0 1 1 1 1 1 |  imm8  |
2057 		 * +-----------------+--------+
2058 		 *
2059 		 * According to the specification, it only defines SVC for T32
2060 		 * with 16 bits instruction and has no definition for 32bits;
2061 		 * so below only read 2 bytes as instruction size for T32.
2062 		 */
2063 		addr = end_addr - 2;
2064 		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16),
2065 				   (u8 *)&instr16, 0);
2066 		if ((instr16 & 0xFF00) == 0xDF00)
2067 			return true;
2068 
2069 		break;
2070 	case CS_ETM_ISA_A32:
2071 		/*
2072 		 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
2073 		 *
2074 		 *  b'31 b'28 b'27 b'24
2075 		 * +---------+---------+-------------------------+
2076 		 * |  !1111  | 1 1 1 1 |        imm24            |
2077 		 * +---------+---------+-------------------------+
2078 		 */
2079 		addr = end_addr - 4;
2080 		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2081 				   (u8 *)&instr32, 0);
2082 		if ((instr32 & 0x0F000000) == 0x0F000000 &&
2083 		    (instr32 & 0xF0000000) != 0xF0000000)
2084 			return true;
2085 
2086 		break;
2087 	case CS_ETM_ISA_A64:
2088 		/*
2089 		 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
2090 		 *
2091 		 *  b'31               b'21           b'4     b'0
2092 		 * +-----------------------+---------+-----------+
2093 		 * | 1 1 0 1 0 1 0 0 0 0 0 |  imm16  | 0 0 0 0 1 |
2094 		 * +-----------------------+---------+-----------+
2095 		 */
2096 		addr = end_addr - 4;
2097 		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2098 				   (u8 *)&instr32, 0);
2099 		if ((instr32 & 0xFFE0001F) == 0xd4000001)
2100 			return true;
2101 
2102 		break;
2103 	case CS_ETM_ISA_UNKNOWN:
2104 	default:
2105 		break;
2106 	}
2107 
2108 	return false;
2109 }
2110 
2111 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
2112 			       struct cs_etm_traceid_queue *tidq, u64 magic)
2113 {
2114 	u8 trace_chan_id = tidq->trace_chan_id;
2115 	struct cs_etm_packet *packet = tidq->packet;
2116 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2117 
2118 	if (magic == __perf_cs_etmv3_magic)
2119 		if (packet->exception_number == CS_ETMV3_EXC_SVC)
2120 			return true;
2121 
2122 	/*
2123 	 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
2124 	 * HVC cases; need to check if it's SVC instruction based on
2125 	 * packet address.
2126 	 */
2127 	if (magic == __perf_cs_etmv4_magic) {
2128 		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2129 		    cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2130 					 prev_packet->end_addr))
2131 			return true;
2132 	}
2133 
2134 	return false;
2135 }
2136 
2137 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
2138 				       u64 magic)
2139 {
2140 	struct cs_etm_packet *packet = tidq->packet;
2141 
2142 	if (magic == __perf_cs_etmv3_magic)
2143 		if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
2144 		    packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
2145 		    packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
2146 		    packet->exception_number == CS_ETMV3_EXC_IRQ ||
2147 		    packet->exception_number == CS_ETMV3_EXC_FIQ)
2148 			return true;
2149 
2150 	if (magic == __perf_cs_etmv4_magic)
2151 		if (packet->exception_number == CS_ETMV4_EXC_RESET ||
2152 		    packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
2153 		    packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
2154 		    packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
2155 		    packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
2156 		    packet->exception_number == CS_ETMV4_EXC_IRQ ||
2157 		    packet->exception_number == CS_ETMV4_EXC_FIQ)
2158 			return true;
2159 
2160 	return false;
2161 }
2162 
2163 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
2164 				      struct cs_etm_traceid_queue *tidq,
2165 				      u64 magic)
2166 {
2167 	u8 trace_chan_id = tidq->trace_chan_id;
2168 	struct cs_etm_packet *packet = tidq->packet;
2169 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2170 
2171 	if (magic == __perf_cs_etmv3_magic)
2172 		if (packet->exception_number == CS_ETMV3_EXC_SMC ||
2173 		    packet->exception_number == CS_ETMV3_EXC_HYP ||
2174 		    packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
2175 		    packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
2176 		    packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
2177 		    packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
2178 		    packet->exception_number == CS_ETMV3_EXC_GENERIC)
2179 			return true;
2180 
2181 	if (magic == __perf_cs_etmv4_magic) {
2182 		if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
2183 		    packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
2184 		    packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
2185 		    packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
2186 			return true;
2187 
2188 		/*
2189 		 * For CS_ETMV4_EXC_CALL, except SVC other instructions
2190 		 * (SMC, HVC) are taken as sync exceptions.
2191 		 */
2192 		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2193 		    !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2194 					  prev_packet->end_addr))
2195 			return true;
2196 
2197 		/*
2198 		 * ETMv4 has 5 bits for exception number; if the numbers
2199 		 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
2200 		 * they are implementation defined exceptions.
2201 		 *
2202 		 * For this case, simply take it as sync exception.
2203 		 */
2204 		if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
2205 		    packet->exception_number <= CS_ETMV4_EXC_END)
2206 			return true;
2207 	}
2208 
2209 	return false;
2210 }
2211 
2212 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
2213 				    struct cs_etm_traceid_queue *tidq)
2214 {
2215 	struct cs_etm_packet *packet = tidq->packet;
2216 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2217 	u8 trace_chan_id = tidq->trace_chan_id;
2218 	u64 magic;
2219 	int ret;
2220 
2221 	switch (packet->sample_type) {
2222 	case CS_ETM_RANGE:
2223 		/*
2224 		 * Immediate branch instruction without neither link nor
2225 		 * return flag, it's normal branch instruction within
2226 		 * the function.
2227 		 */
2228 		if (packet->last_instr_type == OCSD_INSTR_BR &&
2229 		    packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
2230 			packet->flags = PERF_IP_FLAG_BRANCH;
2231 
2232 			if (packet->last_instr_cond)
2233 				packet->flags |= PERF_IP_FLAG_CONDITIONAL;
2234 		}
2235 
2236 		/*
2237 		 * Immediate branch instruction with link (e.g. BL), this is
2238 		 * branch instruction for function call.
2239 		 */
2240 		if (packet->last_instr_type == OCSD_INSTR_BR &&
2241 		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2242 			packet->flags = PERF_IP_FLAG_BRANCH |
2243 					PERF_IP_FLAG_CALL;
2244 
2245 		/*
2246 		 * Indirect branch instruction with link (e.g. BLR), this is
2247 		 * branch instruction for function call.
2248 		 */
2249 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2250 		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2251 			packet->flags = PERF_IP_FLAG_BRANCH |
2252 					PERF_IP_FLAG_CALL;
2253 
2254 		/*
2255 		 * Indirect branch instruction with subtype of
2256 		 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
2257 		 * function return for A32/T32.
2258 		 */
2259 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2260 		    packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
2261 			packet->flags = PERF_IP_FLAG_BRANCH |
2262 					PERF_IP_FLAG_RETURN;
2263 
2264 		/*
2265 		 * Indirect branch instruction without link (e.g. BR), usually
2266 		 * this is used for function return, especially for functions
2267 		 * within dynamic link lib.
2268 		 */
2269 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2270 		    packet->last_instr_subtype == OCSD_S_INSTR_NONE)
2271 			packet->flags = PERF_IP_FLAG_BRANCH |
2272 					PERF_IP_FLAG_RETURN;
2273 
2274 		/* Return instruction for function return. */
2275 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2276 		    packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
2277 			packet->flags = PERF_IP_FLAG_BRANCH |
2278 					PERF_IP_FLAG_RETURN;
2279 
2280 		/*
2281 		 * Decoder might insert a discontinuity in the middle of
2282 		 * instruction packets, fixup prev_packet with flag
2283 		 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
2284 		 */
2285 		if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
2286 			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2287 					      PERF_IP_FLAG_TRACE_BEGIN;
2288 
2289 		/*
2290 		 * If the previous packet is an exception return packet
2291 		 * and the return address just follows SVC instruction,
2292 		 * it needs to calibrate the previous packet sample flags
2293 		 * as PERF_IP_FLAG_SYSCALLRET.
2294 		 */
2295 		if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
2296 					   PERF_IP_FLAG_RETURN |
2297 					   PERF_IP_FLAG_INTERRUPT) &&
2298 		    cs_etm__is_svc_instr(etmq, trace_chan_id,
2299 					 packet, packet->start_addr))
2300 			prev_packet->flags = PERF_IP_FLAG_BRANCH |
2301 					     PERF_IP_FLAG_RETURN |
2302 					     PERF_IP_FLAG_SYSCALLRET;
2303 		break;
2304 	case CS_ETM_DISCONTINUITY:
2305 		/*
2306 		 * The trace is discontinuous, if the previous packet is
2307 		 * instruction packet, set flag PERF_IP_FLAG_TRACE_END
2308 		 * for previous packet.
2309 		 */
2310 		if (prev_packet->sample_type == CS_ETM_RANGE)
2311 			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2312 					      PERF_IP_FLAG_TRACE_END;
2313 		break;
2314 	case CS_ETM_EXCEPTION:
2315 		ret = cs_etm__get_magic(etmq, packet->trace_chan_id, &magic);
2316 		if (ret)
2317 			return ret;
2318 
2319 		/* The exception is for system call. */
2320 		if (cs_etm__is_syscall(etmq, tidq, magic))
2321 			packet->flags = PERF_IP_FLAG_BRANCH |
2322 					PERF_IP_FLAG_CALL |
2323 					PERF_IP_FLAG_SYSCALLRET;
2324 		/*
2325 		 * The exceptions are triggered by external signals from bus,
2326 		 * interrupt controller, debug module, PE reset or halt.
2327 		 */
2328 		else if (cs_etm__is_async_exception(tidq, magic))
2329 			packet->flags = PERF_IP_FLAG_BRANCH |
2330 					PERF_IP_FLAG_CALL |
2331 					PERF_IP_FLAG_ASYNC |
2332 					PERF_IP_FLAG_INTERRUPT;
2333 		/*
2334 		 * Otherwise, exception is caused by trap, instruction &
2335 		 * data fault, or alignment errors.
2336 		 */
2337 		else if (cs_etm__is_sync_exception(etmq, tidq, magic))
2338 			packet->flags = PERF_IP_FLAG_BRANCH |
2339 					PERF_IP_FLAG_CALL |
2340 					PERF_IP_FLAG_INTERRUPT;
2341 
2342 		/*
2343 		 * When the exception packet is inserted, since exception
2344 		 * packet is not used standalone for generating samples
2345 		 * and it's affiliation to the previous instruction range
2346 		 * packet; so set previous range packet flags to tell perf
2347 		 * it is an exception taken branch.
2348 		 */
2349 		if (prev_packet->sample_type == CS_ETM_RANGE)
2350 			prev_packet->flags = packet->flags;
2351 		break;
2352 	case CS_ETM_EXCEPTION_RET:
2353 		/*
2354 		 * When the exception return packet is inserted, since
2355 		 * exception return packet is not used standalone for
2356 		 * generating samples and it's affiliation to the previous
2357 		 * instruction range packet; so set previous range packet
2358 		 * flags to tell perf it is an exception return branch.
2359 		 *
2360 		 * The exception return can be for either system call or
2361 		 * other exception types; unfortunately the packet doesn't
2362 		 * contain exception type related info so we cannot decide
2363 		 * the exception type purely based on exception return packet.
2364 		 * If we record the exception number from exception packet and
2365 		 * reuse it for exception return packet, this is not reliable
2366 		 * due the trace can be discontinuity or the interrupt can
2367 		 * be nested, thus the recorded exception number cannot be
2368 		 * used for exception return packet for these two cases.
2369 		 *
2370 		 * For exception return packet, we only need to distinguish the
2371 		 * packet is for system call or for other types.  Thus the
2372 		 * decision can be deferred when receive the next packet which
2373 		 * contains the return address, based on the return address we
2374 		 * can read out the previous instruction and check if it's a
2375 		 * system call instruction and then calibrate the sample flag
2376 		 * as needed.
2377 		 */
2378 		if (prev_packet->sample_type == CS_ETM_RANGE)
2379 			prev_packet->flags = PERF_IP_FLAG_BRANCH |
2380 					     PERF_IP_FLAG_RETURN |
2381 					     PERF_IP_FLAG_INTERRUPT;
2382 		break;
2383 	case CS_ETM_EMPTY:
2384 	default:
2385 		break;
2386 	}
2387 
2388 	return 0;
2389 }
2390 
2391 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
2392 {
2393 	int ret = 0;
2394 	size_t processed = 0;
2395 
2396 	/*
2397 	 * Packets are decoded and added to the decoder's packet queue
2398 	 * until the decoder packet processing callback has requested that
2399 	 * processing stops or there is nothing left in the buffer.  Normal
2400 	 * operations that stop processing are a timestamp packet or a full
2401 	 * decoder buffer queue.
2402 	 */
2403 	ret = cs_etm_decoder__process_data_block(etmq->decoder,
2404 						 etmq->offset,
2405 						 &etmq->buf[etmq->buf_used],
2406 						 etmq->buf_len,
2407 						 &processed);
2408 	if (ret)
2409 		goto out;
2410 
2411 	etmq->offset += processed;
2412 	etmq->buf_used += processed;
2413 	etmq->buf_len -= processed;
2414 
2415 out:
2416 	return ret;
2417 }
2418 
2419 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
2420 					 struct cs_etm_traceid_queue *tidq)
2421 {
2422 	int ret;
2423 	struct cs_etm_packet_queue *packet_queue;
2424 
2425 	packet_queue = &tidq->packet_queue;
2426 
2427 	/* Process each packet in this chunk */
2428 	while (1) {
2429 		ret = cs_etm_decoder__get_packet(packet_queue,
2430 						 tidq->packet);
2431 		if (ret <= 0)
2432 			/*
2433 			 * Stop processing this chunk on
2434 			 * end of data or error
2435 			 */
2436 			break;
2437 
2438 		/*
2439 		 * Since packet addresses are swapped in packet
2440 		 * handling within below switch() statements,
2441 		 * thus setting sample flags must be called
2442 		 * prior to switch() statement to use address
2443 		 * information before packets swapping.
2444 		 */
2445 		ret = cs_etm__set_sample_flags(etmq, tidq);
2446 		if (ret < 0)
2447 			break;
2448 
2449 		switch (tidq->packet->sample_type) {
2450 		case CS_ETM_RANGE:
2451 			/*
2452 			 * If the packet contains an instruction
2453 			 * range, generate instruction sequence
2454 			 * events.
2455 			 */
2456 			cs_etm__sample(etmq, tidq);
2457 			break;
2458 		case CS_ETM_EXCEPTION:
2459 		case CS_ETM_EXCEPTION_RET:
2460 			/*
2461 			 * If the exception packet is coming,
2462 			 * make sure the previous instruction
2463 			 * range packet to be handled properly.
2464 			 */
2465 			cs_etm__exception(tidq);
2466 			break;
2467 		case CS_ETM_DISCONTINUITY:
2468 			/*
2469 			 * Discontinuity in trace, flush
2470 			 * previous branch stack
2471 			 */
2472 			cs_etm__flush(etmq, tidq);
2473 			break;
2474 		case CS_ETM_EMPTY:
2475 			/*
2476 			 * Should not receive empty packet,
2477 			 * report error.
2478 			 */
2479 			pr_err("CS ETM Trace: empty packet\n");
2480 			return -EINVAL;
2481 		default:
2482 			break;
2483 		}
2484 	}
2485 
2486 	return ret;
2487 }
2488 
2489 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
2490 {
2491 	int idx;
2492 	struct int_node *inode;
2493 	struct cs_etm_traceid_queue *tidq;
2494 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
2495 
2496 	intlist__for_each_entry(inode, traceid_queues_list) {
2497 		idx = (int)(intptr_t)inode->priv;
2498 		tidq = etmq->traceid_queues[idx];
2499 
2500 		/* Ignore return value */
2501 		cs_etm__process_traceid_queue(etmq, tidq);
2502 	}
2503 }
2504 
2505 static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq)
2506 {
2507 	int err = 0;
2508 	struct cs_etm_traceid_queue *tidq;
2509 
2510 	tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
2511 	if (!tidq)
2512 		return -EINVAL;
2513 
2514 	/* Go through each buffer in the queue and decode them one by one */
2515 	while (1) {
2516 		err = cs_etm__get_data_block(etmq);
2517 		if (err <= 0)
2518 			return err;
2519 
2520 		/* Run trace decoder until buffer consumed or end of trace */
2521 		do {
2522 			err = cs_etm__decode_data_block(etmq);
2523 			if (err)
2524 				return err;
2525 
2526 			/*
2527 			 * Process each packet in this chunk, nothing to do if
2528 			 * an error occurs other than hoping the next one will
2529 			 * be better.
2530 			 */
2531 			err = cs_etm__process_traceid_queue(etmq, tidq);
2532 
2533 		} while (etmq->buf_len);
2534 
2535 		if (err == 0)
2536 			/* Flush any remaining branch stack entries */
2537 			err = cs_etm__end_block(etmq, tidq);
2538 	}
2539 
2540 	return err;
2541 }
2542 
2543 static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq)
2544 {
2545 	int idx, err = 0;
2546 	struct cs_etm_traceid_queue *tidq;
2547 	struct int_node *inode;
2548 
2549 	/* Go through each buffer in the queue and decode them one by one */
2550 	while (1) {
2551 		err = cs_etm__get_data_block(etmq);
2552 		if (err <= 0)
2553 			return err;
2554 
2555 		/* Run trace decoder until buffer consumed or end of trace */
2556 		do {
2557 			err = cs_etm__decode_data_block(etmq);
2558 			if (err)
2559 				return err;
2560 
2561 			/*
2562 			 * cs_etm__run_per_thread_timeless_decoder() runs on a
2563 			 * single traceID queue because each TID has a separate
2564 			 * buffer. But here in per-cpu mode we need to iterate
2565 			 * over each channel instead.
2566 			 */
2567 			intlist__for_each_entry(inode,
2568 						etmq->traceid_queues_list) {
2569 				idx = (int)(intptr_t)inode->priv;
2570 				tidq = etmq->traceid_queues[idx];
2571 				cs_etm__process_traceid_queue(etmq, tidq);
2572 			}
2573 		} while (etmq->buf_len);
2574 
2575 		intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2576 			idx = (int)(intptr_t)inode->priv;
2577 			tidq = etmq->traceid_queues[idx];
2578 			/* Flush any remaining branch stack entries */
2579 			err = cs_etm__end_block(etmq, tidq);
2580 			if (err)
2581 				return err;
2582 		}
2583 	}
2584 
2585 	return err;
2586 }
2587 
2588 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
2589 					   pid_t tid)
2590 {
2591 	unsigned int i;
2592 	struct auxtrace_queues *queues = &etm->queues;
2593 
2594 	for (i = 0; i < queues->nr_queues; i++) {
2595 		struct auxtrace_queue *queue = &etm->queues.queue_array[i];
2596 		struct cs_etm_queue *etmq = queue->priv;
2597 		struct cs_etm_traceid_queue *tidq;
2598 
2599 		if (!etmq)
2600 			continue;
2601 
2602 		if (etm->per_thread_decoding) {
2603 			tidq = cs_etm__etmq_get_traceid_queue(
2604 				etmq, CS_ETM_PER_THREAD_TRACEID);
2605 
2606 			if (!tidq)
2607 				continue;
2608 
2609 			if (tid == -1 || thread__tid(tidq->thread) == tid)
2610 				cs_etm__run_per_thread_timeless_decoder(etmq);
2611 		} else
2612 			cs_etm__run_per_cpu_timeless_decoder(etmq);
2613 	}
2614 
2615 	return 0;
2616 }
2617 
2618 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm)
2619 {
2620 	int ret = 0;
2621 	unsigned int cs_queue_nr, queue_nr, i;
2622 	u8 trace_chan_id;
2623 	u64 cs_timestamp;
2624 	struct auxtrace_queue *queue;
2625 	struct cs_etm_queue *etmq;
2626 	struct cs_etm_traceid_queue *tidq;
2627 
2628 	/*
2629 	 * Pre-populate the heap with one entry from each queue so that we can
2630 	 * start processing in time order across all queues.
2631 	 */
2632 	for (i = 0; i < etm->queues.nr_queues; i++) {
2633 		etmq = etm->queues.queue_array[i].priv;
2634 		if (!etmq)
2635 			continue;
2636 
2637 		ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
2638 		if (ret)
2639 			return ret;
2640 	}
2641 
2642 	while (1) {
2643 		if (!etm->heap.heap_cnt)
2644 			break;
2645 
2646 		/* Take the entry at the top of the min heap */
2647 		cs_queue_nr = etm->heap.heap_array[0].queue_nr;
2648 		queue_nr = TO_QUEUE_NR(cs_queue_nr);
2649 		trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
2650 		queue = &etm->queues.queue_array[queue_nr];
2651 		etmq = queue->priv;
2652 
2653 		/*
2654 		 * Remove the top entry from the heap since we are about
2655 		 * to process it.
2656 		 */
2657 		auxtrace_heap__pop(&etm->heap);
2658 
2659 		tidq  = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
2660 		if (!tidq) {
2661 			/*
2662 			 * No traceID queue has been allocated for this traceID,
2663 			 * which means something somewhere went very wrong.  No
2664 			 * other choice than simply exit.
2665 			 */
2666 			ret = -EINVAL;
2667 			goto out;
2668 		}
2669 
2670 		/*
2671 		 * Packets associated with this timestamp are already in
2672 		 * the etmq's traceID queue, so process them.
2673 		 */
2674 		ret = cs_etm__process_traceid_queue(etmq, tidq);
2675 		if (ret < 0)
2676 			goto out;
2677 
2678 		/*
2679 		 * Packets for this timestamp have been processed, time to
2680 		 * move on to the next timestamp, fetching a new auxtrace_buffer
2681 		 * if need be.
2682 		 */
2683 refetch:
2684 		ret = cs_etm__get_data_block(etmq);
2685 		if (ret < 0)
2686 			goto out;
2687 
2688 		/*
2689 		 * No more auxtrace_buffers to process in this etmq, simply
2690 		 * move on to another entry in the auxtrace_heap.
2691 		 */
2692 		if (!ret)
2693 			continue;
2694 
2695 		ret = cs_etm__decode_data_block(etmq);
2696 		if (ret)
2697 			goto out;
2698 
2699 		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
2700 
2701 		if (!cs_timestamp) {
2702 			/*
2703 			 * Function cs_etm__decode_data_block() returns when
2704 			 * there is no more traces to decode in the current
2705 			 * auxtrace_buffer OR when a timestamp has been
2706 			 * encountered on any of the traceID queues.  Since we
2707 			 * did not get a timestamp, there is no more traces to
2708 			 * process in this auxtrace_buffer.  As such empty and
2709 			 * flush all traceID queues.
2710 			 */
2711 			cs_etm__clear_all_traceid_queues(etmq);
2712 
2713 			/* Fetch another auxtrace_buffer for this etmq */
2714 			goto refetch;
2715 		}
2716 
2717 		/*
2718 		 * Add to the min heap the timestamp for packets that have
2719 		 * just been decoded.  They will be processed and synthesized
2720 		 * during the next call to cs_etm__process_traceid_queue() for
2721 		 * this queue/traceID.
2722 		 */
2723 		cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
2724 		ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
2725 	}
2726 
2727 	for (i = 0; i < etm->queues.nr_queues; i++) {
2728 		struct int_node *inode;
2729 
2730 		etmq = etm->queues.queue_array[i].priv;
2731 		if (!etmq)
2732 			continue;
2733 
2734 		intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2735 			int idx = (int)(intptr_t)inode->priv;
2736 
2737 			/* Flush any remaining branch stack entries */
2738 			tidq = etmq->traceid_queues[idx];
2739 			ret = cs_etm__end_block(etmq, tidq);
2740 			if (ret)
2741 				return ret;
2742 		}
2743 	}
2744 out:
2745 	return ret;
2746 }
2747 
2748 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
2749 					union perf_event *event)
2750 {
2751 	struct thread *th;
2752 
2753 	if (etm->timeless_decoding)
2754 		return 0;
2755 
2756 	/*
2757 	 * Add the tid/pid to the log so that we can get a match when we get a
2758 	 * contextID from the decoder. Only track for the host: only kernel
2759 	 * trace is supported for guests which wouldn't need pids so this should
2760 	 * be fine.
2761 	 */
2762 	th = machine__findnew_thread(&etm->session->machines.host,
2763 				     event->itrace_start.pid,
2764 				     event->itrace_start.tid);
2765 	if (!th)
2766 		return -ENOMEM;
2767 
2768 	thread__put(th);
2769 
2770 	return 0;
2771 }
2772 
2773 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
2774 					   union perf_event *event)
2775 {
2776 	struct thread *th;
2777 	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2778 
2779 	/*
2780 	 * Context switch in per-thread mode are irrelevant since perf
2781 	 * will start/stop tracing as the process is scheduled.
2782 	 */
2783 	if (etm->timeless_decoding)
2784 		return 0;
2785 
2786 	/*
2787 	 * SWITCH_IN events carry the next process to be switched out while
2788 	 * SWITCH_OUT events carry the process to be switched in.  As such
2789 	 * we don't care about IN events.
2790 	 */
2791 	if (!out)
2792 		return 0;
2793 
2794 	/*
2795 	 * Add the tid/pid to the log so that we can get a match when we get a
2796 	 * contextID from the decoder. Only track for the host: only kernel
2797 	 * trace is supported for guests which wouldn't need pids so this should
2798 	 * be fine.
2799 	 */
2800 	th = machine__findnew_thread(&etm->session->machines.host,
2801 				     event->context_switch.next_prev_pid,
2802 				     event->context_switch.next_prev_tid);
2803 	if (!th)
2804 		return -ENOMEM;
2805 
2806 	thread__put(th);
2807 
2808 	return 0;
2809 }
2810 
2811 static int cs_etm__process_event(struct perf_session *session,
2812 				 union perf_event *event,
2813 				 struct perf_sample *sample,
2814 				 const struct perf_tool *tool)
2815 {
2816 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2817 						   struct cs_etm_auxtrace,
2818 						   auxtrace);
2819 
2820 	if (dump_trace)
2821 		return 0;
2822 
2823 	if (!tool->ordered_events) {
2824 		pr_err("CoreSight ETM Trace requires ordered events\n");
2825 		return -EINVAL;
2826 	}
2827 
2828 	switch (event->header.type) {
2829 	case PERF_RECORD_EXIT:
2830 		/*
2831 		 * Don't need to wait for cs_etm__flush_events() in per-thread mode to
2832 		 * start the decode because we know there will be no more trace from
2833 		 * this thread. All this does is emit samples earlier than waiting for
2834 		 * the flush in other modes, but with timestamps it makes sense to wait
2835 		 * for flush so that events from different threads are interleaved
2836 		 * properly.
2837 		 */
2838 		if (etm->per_thread_decoding && etm->timeless_decoding)
2839 			return cs_etm__process_timeless_queues(etm,
2840 							       event->fork.tid);
2841 		break;
2842 
2843 	case PERF_RECORD_ITRACE_START:
2844 		return cs_etm__process_itrace_start(etm, event);
2845 
2846 	case PERF_RECORD_SWITCH_CPU_WIDE:
2847 		return cs_etm__process_switch_cpu_wide(etm, event);
2848 
2849 	case PERF_RECORD_AUX:
2850 		/*
2851 		 * Record the latest kernel timestamp available in the header
2852 		 * for samples so that synthesised samples occur from this point
2853 		 * onwards.
2854 		 */
2855 		if (sample->time && (sample->time != (u64)-1))
2856 			etm->latest_kernel_timestamp = sample->time;
2857 		break;
2858 
2859 	default:
2860 		break;
2861 	}
2862 
2863 	return 0;
2864 }
2865 
2866 static void dump_queued_data(struct cs_etm_auxtrace *etm,
2867 			     struct perf_record_auxtrace *event)
2868 {
2869 	struct auxtrace_buffer *buf;
2870 	unsigned int i;
2871 	/*
2872 	 * Find all buffers with same reference in the queues and dump them.
2873 	 * This is because the queues can contain multiple entries of the same
2874 	 * buffer that were split on aux records.
2875 	 */
2876 	for (i = 0; i < etm->queues.nr_queues; ++i)
2877 		list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
2878 			if (buf->reference == event->reference)
2879 				cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
2880 }
2881 
2882 static int cs_etm__process_auxtrace_event(struct perf_session *session,
2883 					  union perf_event *event,
2884 					  const struct perf_tool *tool __maybe_unused)
2885 {
2886 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2887 						   struct cs_etm_auxtrace,
2888 						   auxtrace);
2889 	if (!etm->data_queued) {
2890 		struct auxtrace_buffer *buffer;
2891 		off_t  data_offset;
2892 		int fd = perf_data__fd(session->data);
2893 		bool is_pipe = perf_data__is_pipe(session->data);
2894 		int err;
2895 		int idx = event->auxtrace.idx;
2896 
2897 		if (is_pipe)
2898 			data_offset = 0;
2899 		else {
2900 			data_offset = lseek(fd, 0, SEEK_CUR);
2901 			if (data_offset == -1)
2902 				return -errno;
2903 		}
2904 
2905 		err = auxtrace_queues__add_event(&etm->queues, session,
2906 						 event, data_offset, &buffer);
2907 		if (err)
2908 			return err;
2909 
2910 		if (dump_trace)
2911 			if (auxtrace_buffer__get_data(buffer, fd)) {
2912 				cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
2913 				auxtrace_buffer__put_data(buffer);
2914 			}
2915 	} else if (dump_trace)
2916 		dump_queued_data(etm, &event->auxtrace);
2917 
2918 	return 0;
2919 }
2920 
2921 static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm)
2922 {
2923 	struct evsel *evsel;
2924 	struct evlist *evlist = etm->session->evlist;
2925 
2926 	/* Override timeless mode with user input from --itrace=Z */
2927 	if (etm->synth_opts.timeless_decoding) {
2928 		etm->timeless_decoding = true;
2929 		return 0;
2930 	}
2931 
2932 	/*
2933 	 * Find the cs_etm evsel and look at what its timestamp setting was
2934 	 */
2935 	evlist__for_each_entry(evlist, evsel)
2936 		if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) {
2937 			etm->timeless_decoding =
2938 				!(evsel->core.attr.config & BIT(ETM_OPT_TS));
2939 			return 0;
2940 		}
2941 
2942 	pr_err("CS ETM: Couldn't find ETM evsel\n");
2943 	return -EINVAL;
2944 }
2945 
2946 /*
2947  * Read a single cpu parameter block from the auxtrace_info priv block.
2948  *
2949  * For version 1 there is a per cpu nr_params entry. If we are handling
2950  * version 1 file, then there may be less, the same, or more params
2951  * indicated by this value than the compile time number we understand.
2952  *
2953  * For a version 0 info block, there are a fixed number, and we need to
2954  * fill out the nr_param value in the metadata we create.
2955  */
2956 static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
2957 				    int out_blk_size, int nr_params_v0)
2958 {
2959 	u64 *metadata = NULL;
2960 	int hdr_version;
2961 	int nr_in_params, nr_out_params, nr_cmn_params;
2962 	int i, k;
2963 
2964 	metadata = zalloc(sizeof(*metadata) * out_blk_size);
2965 	if (!metadata)
2966 		return NULL;
2967 
2968 	/* read block current index & version */
2969 	i = *buff_in_offset;
2970 	hdr_version = buff_in[CS_HEADER_VERSION];
2971 
2972 	if (!hdr_version) {
2973 	/* read version 0 info block into a version 1 metadata block  */
2974 		nr_in_params = nr_params_v0;
2975 		metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
2976 		metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
2977 		metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
2978 		/* remaining block params at offset +1 from source */
2979 		for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
2980 			metadata[k + 1] = buff_in[i + k];
2981 		/* version 0 has 2 common params */
2982 		nr_cmn_params = 2;
2983 	} else {
2984 	/* read version 1 info block - input and output nr_params may differ */
2985 		/* version 1 has 3 common params */
2986 		nr_cmn_params = 3;
2987 		nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
2988 
2989 		/* if input has more params than output - skip excess */
2990 		nr_out_params = nr_in_params + nr_cmn_params;
2991 		if (nr_out_params > out_blk_size)
2992 			nr_out_params = out_blk_size;
2993 
2994 		for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
2995 			metadata[k] = buff_in[i + k];
2996 
2997 		/* record the actual nr params we copied */
2998 		metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
2999 	}
3000 
3001 	/* adjust in offset by number of in params used */
3002 	i += nr_in_params + nr_cmn_params;
3003 	*buff_in_offset = i;
3004 	return metadata;
3005 }
3006 
3007 /**
3008  * Puts a fragment of an auxtrace buffer into the auxtrace queues based
3009  * on the bounds of aux_event, if it matches with the buffer that's at
3010  * file_offset.
3011  *
3012  * Normally, whole auxtrace buffers would be added to the queue. But we
3013  * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
3014  * is reset across each buffer, so splitting the buffers up in advance has
3015  * the same effect.
3016  */
3017 static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
3018 				      struct perf_record_aux *aux_event, struct perf_sample *sample)
3019 {
3020 	int err;
3021 	char buf[PERF_SAMPLE_MAX_SIZE];
3022 	union perf_event *auxtrace_event_union;
3023 	struct perf_record_auxtrace *auxtrace_event;
3024 	union perf_event auxtrace_fragment;
3025 	__u64 aux_offset, aux_size;
3026 	enum cs_etm_format format;
3027 
3028 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
3029 						   struct cs_etm_auxtrace,
3030 						   auxtrace);
3031 
3032 	/*
3033 	 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
3034 	 * from looping through the auxtrace index.
3035 	 */
3036 	err = perf_session__peek_event(session, file_offset, buf,
3037 				       PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL);
3038 	if (err)
3039 		return err;
3040 	auxtrace_event = &auxtrace_event_union->auxtrace;
3041 	if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
3042 		return -EINVAL;
3043 
3044 	if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) ||
3045 		auxtrace_event->header.size != sz) {
3046 		return -EINVAL;
3047 	}
3048 
3049 	/*
3050 	 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
3051 	 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
3052 	 * CPU as we set this always for the AUX_OUTPUT_HW_ID event.
3053 	 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
3054 	 * Return 'not found' if mismatch.
3055 	 */
3056 	if (auxtrace_event->cpu == (__u32) -1) {
3057 		etm->per_thread_decoding = true;
3058 		if (auxtrace_event->tid != sample->tid)
3059 			return 1;
3060 	} else if (auxtrace_event->cpu != sample->cpu) {
3061 		if (etm->per_thread_decoding) {
3062 			/*
3063 			 * Found a per-cpu buffer after a per-thread one was
3064 			 * already found
3065 			 */
3066 			pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n");
3067 			return -EINVAL;
3068 		}
3069 		return 1;
3070 	}
3071 
3072 	if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
3073 		/*
3074 		 * Clamp size in snapshot mode. The buffer size is clamped in
3075 		 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
3076 		 * the buffer size.
3077 		 */
3078 		aux_size = min(aux_event->aux_size, auxtrace_event->size);
3079 
3080 		/*
3081 		 * In this mode, the head also points to the end of the buffer so aux_offset
3082 		 * needs to have the size subtracted so it points to the beginning as in normal mode
3083 		 */
3084 		aux_offset = aux_event->aux_offset - aux_size;
3085 	} else {
3086 		aux_size = aux_event->aux_size;
3087 		aux_offset = aux_event->aux_offset;
3088 	}
3089 
3090 	if (aux_offset >= auxtrace_event->offset &&
3091 	    aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
3092 		struct cs_etm_queue *etmq = etm->queues.queue_array[auxtrace_event->idx].priv;
3093 
3094 		/*
3095 		 * If this AUX event was inside this buffer somewhere, create a new auxtrace event
3096 		 * based on the sizes of the aux event, and queue that fragment.
3097 		 */
3098 		auxtrace_fragment.auxtrace = *auxtrace_event;
3099 		auxtrace_fragment.auxtrace.size = aux_size;
3100 		auxtrace_fragment.auxtrace.offset = aux_offset;
3101 		file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
3102 
3103 		pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
3104 			  " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
3105 		err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
3106 						 file_offset, NULL);
3107 		if (err)
3108 			return err;
3109 
3110 		format = (aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW) ?
3111 				UNFORMATTED : FORMATTED;
3112 		if (etmq->format != UNSET && format != etmq->format) {
3113 			pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n");
3114 			return -EINVAL;
3115 		}
3116 		etmq->format = format;
3117 		return 0;
3118 	}
3119 
3120 	/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
3121 	return 1;
3122 }
3123 
3124 static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event,
3125 					u64 offset __maybe_unused, void *data __maybe_unused)
3126 {
3127 	/* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */
3128 	if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) {
3129 		(*(int *)data)++; /* increment found count */
3130 		return cs_etm__process_aux_output_hw_id(session, event);
3131 	}
3132 	return 0;
3133 }
3134 
3135 static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
3136 					u64 offset __maybe_unused, void *data __maybe_unused)
3137 {
3138 	struct perf_sample sample;
3139 	int ret;
3140 	struct auxtrace_index_entry *ent;
3141 	struct auxtrace_index *auxtrace_index;
3142 	struct evsel *evsel;
3143 	size_t i;
3144 
3145 	/* Don't care about any other events, we're only queuing buffers for AUX events */
3146 	if (event->header.type != PERF_RECORD_AUX)
3147 		return 0;
3148 
3149 	if (event->header.size < sizeof(struct perf_record_aux))
3150 		return -EINVAL;
3151 
3152 	/* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
3153 	if (!event->aux.aux_size)
3154 		return 0;
3155 
3156 	/*
3157 	 * Parse the sample, we need the sample_id_all data that comes after the event so that the
3158 	 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
3159 	 */
3160 	evsel = evlist__event2evsel(session->evlist, event);
3161 	if (!evsel)
3162 		return -EINVAL;
3163 	perf_sample__init(&sample, /*all=*/false);
3164 	ret = evsel__parse_sample(evsel, event, &sample);
3165 	if (ret)
3166 		goto out;
3167 
3168 	/*
3169 	 * Loop through the auxtrace index to find the buffer that matches up with this aux event.
3170 	 */
3171 	list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
3172 		for (i = 0; i < auxtrace_index->nr; i++) {
3173 			ent = &auxtrace_index->entries[i];
3174 			ret = cs_etm__queue_aux_fragment(session, ent->file_offset,
3175 							 ent->sz, &event->aux, &sample);
3176 			/*
3177 			 * Stop search on error or successful values. Continue search on
3178 			 * 1 ('not found')
3179 			 */
3180 			if (ret != 1)
3181 				goto out;
3182 		}
3183 	}
3184 
3185 	/*
3186 	 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
3187 	 * don't exit with an error because it will still be possible to decode other aux records.
3188 	 */
3189 	pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
3190 	       " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
3191 	ret = 0;
3192 out:
3193 	perf_sample__exit(&sample);
3194 	return ret;
3195 }
3196 
3197 static int cs_etm__queue_aux_records(struct perf_session *session)
3198 {
3199 	struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
3200 								struct auxtrace_index, list);
3201 	if (index && index->nr > 0)
3202 		return perf_session__peek_events(session, session->header.data_offset,
3203 						 session->header.data_size,
3204 						 cs_etm__queue_aux_records_cb, NULL);
3205 
3206 	/*
3207 	 * We would get here if there are no entries in the index (either no auxtrace
3208 	 * buffers or no index at all). Fail silently as there is the possibility of
3209 	 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
3210 	 * false.
3211 	 *
3212 	 * In that scenario, buffers will not be split by AUX records.
3213 	 */
3214 	return 0;
3215 }
3216 
3217 #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
3218 				  (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
3219 
3220 /*
3221  * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
3222  * timestamps).
3223  */
3224 static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
3225 {
3226 	int j;
3227 
3228 	for (j = 0; j < num_cpu; j++) {
3229 		switch (metadata[j][CS_ETM_MAGIC]) {
3230 		case __perf_cs_etmv4_magic:
3231 			if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
3232 				return false;
3233 			break;
3234 		case __perf_cs_ete_magic:
3235 			if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1)
3236 				return false;
3237 			break;
3238 		default:
3239 			/* Unknown / unsupported magic number. */
3240 			return false;
3241 		}
3242 	}
3243 	return true;
3244 }
3245 
3246 /* map trace ids to correct metadata block, from information in metadata */
3247 static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_cpu,
3248 					  u64 **metadata)
3249 {
3250 	u64 cs_etm_magic;
3251 	u8 trace_chan_id;
3252 	int i, err;
3253 
3254 	for (i = 0; i < num_cpu; i++) {
3255 		cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3256 		switch (cs_etm_magic) {
3257 		case __perf_cs_etmv3_magic:
3258 			metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3259 			trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]);
3260 			break;
3261 		case __perf_cs_etmv4_magic:
3262 		case __perf_cs_ete_magic:
3263 			metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3264 			trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]);
3265 			break;
3266 		default:
3267 			/* unknown magic number */
3268 			return -EINVAL;
3269 		}
3270 		err = cs_etm__map_trace_id_v0(etm, trace_chan_id, metadata[i]);
3271 		if (err)
3272 			return err;
3273 	}
3274 	return 0;
3275 }
3276 
3277 /*
3278  * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX
3279  * (formatted or not) packets to create the decoders.
3280  */
3281 static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq)
3282 {
3283 	struct cs_etm_decoder_params d_params;
3284 	struct cs_etm_trace_params  *t_params;
3285 	int decoders = intlist__nr_entries(etmq->traceid_list);
3286 
3287 	if (decoders == 0)
3288 		return 0;
3289 
3290 	/*
3291 	 * Each queue can only contain data from one CPU when unformatted, so only one decoder is
3292 	 * needed.
3293 	 */
3294 	if (etmq->format == UNFORMATTED)
3295 		assert(decoders == 1);
3296 
3297 	/* Use metadata to fill in trace parameters for trace decoder */
3298 	t_params = zalloc(sizeof(*t_params) * decoders);
3299 
3300 	if (!t_params)
3301 		goto out_free;
3302 
3303 	if (cs_etm__init_trace_params(t_params, etmq))
3304 		goto out_free;
3305 
3306 	/* Set decoder parameters to decode trace packets */
3307 	if (cs_etm__init_decoder_params(&d_params, etmq,
3308 					dump_trace ? CS_ETM_OPERATION_PRINT :
3309 						     CS_ETM_OPERATION_DECODE))
3310 		goto out_free;
3311 
3312 	etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
3313 					    t_params);
3314 
3315 	if (!etmq->decoder)
3316 		goto out_free;
3317 
3318 	/*
3319 	 * Register a function to handle all memory accesses required by
3320 	 * the trace decoder library.
3321 	 */
3322 	if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
3323 					      0x0L, ((u64) -1L),
3324 					      cs_etm__mem_access))
3325 		goto out_free_decoder;
3326 
3327 	zfree(&t_params);
3328 	return 0;
3329 
3330 out_free_decoder:
3331 	cs_etm_decoder__free(etmq->decoder);
3332 out_free:
3333 	zfree(&t_params);
3334 	return -EINVAL;
3335 }
3336 
3337 static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm)
3338 {
3339 	struct auxtrace_queues *queues = &etm->queues;
3340 
3341 	for (unsigned int i = 0; i < queues->nr_queues; i++) {
3342 		bool empty = list_empty(&queues->queue_array[i].head);
3343 		struct cs_etm_queue *etmq = queues->queue_array[i].priv;
3344 		int ret;
3345 
3346 		/*
3347 		 * Don't create decoders for empty queues, mainly because
3348 		 * etmq->format is unknown for empty queues.
3349 		 */
3350 		assert(empty || etmq->format != UNSET);
3351 		if (empty)
3352 			continue;
3353 
3354 		ret = cs_etm__create_queue_decoders(etmq);
3355 		if (ret)
3356 			return ret;
3357 	}
3358 	return 0;
3359 }
3360 
3361 int cs_etm__process_auxtrace_info_full(union perf_event *event,
3362 				       struct perf_session *session)
3363 {
3364 	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
3365 	struct cs_etm_auxtrace *etm = NULL;
3366 	struct perf_record_time_conv *tc = &session->time_conv;
3367 	int event_header_size = sizeof(struct perf_event_header);
3368 	int total_size = auxtrace_info->header.size;
3369 	int priv_size = 0;
3370 	int num_cpu, max_cpu = 0;
3371 	int err = 0;
3372 	int aux_hw_id_found;
3373 	int i;
3374 	u64 *ptr = NULL;
3375 	u64 **metadata = NULL;
3376 
3377 	/* First the global part */
3378 	ptr = (u64 *) auxtrace_info->priv;
3379 	num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
3380 	metadata = zalloc(sizeof(*metadata) * num_cpu);
3381 	if (!metadata)
3382 		return -ENOMEM;
3383 
3384 	/* Start parsing after the common part of the header */
3385 	i = CS_HEADER_VERSION_MAX;
3386 
3387 	/*
3388 	 * The metadata is stored in the auxtrace_info section and encodes
3389 	 * the configuration of the ARM embedded trace macrocell which is
3390 	 * required by the trace decoder to properly decode the trace due
3391 	 * to its highly compressed nature.
3392 	 */
3393 	for (int j = 0; j < num_cpu; j++) {
3394 		if (ptr[i] == __perf_cs_etmv3_magic) {
3395 			metadata[j] =
3396 				cs_etm__create_meta_blk(ptr, &i,
3397 							CS_ETM_PRIV_MAX,
3398 							CS_ETM_NR_TRC_PARAMS_V0);
3399 		} else if (ptr[i] == __perf_cs_etmv4_magic) {
3400 			metadata[j] =
3401 				cs_etm__create_meta_blk(ptr, &i,
3402 							CS_ETMV4_PRIV_MAX,
3403 							CS_ETMV4_NR_TRC_PARAMS_V0);
3404 		} else if (ptr[i] == __perf_cs_ete_magic) {
3405 			metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
3406 		} else {
3407 			ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
3408 				  ptr[i]);
3409 			err = -EINVAL;
3410 			goto err_free_metadata;
3411 		}
3412 
3413 		if (!metadata[j]) {
3414 			err = -ENOMEM;
3415 			goto err_free_metadata;
3416 		}
3417 
3418 		if ((int) metadata[j][CS_ETM_CPU] > max_cpu)
3419 			max_cpu = metadata[j][CS_ETM_CPU];
3420 	}
3421 
3422 	/*
3423 	 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
3424 	 * CS_ETMV4_PRIV_MAX mark how many double words are in the
3425 	 * global metadata, and each cpu's metadata respectively.
3426 	 * The following tests if the correct number of double words was
3427 	 * present in the auxtrace info section.
3428 	 */
3429 	priv_size = total_size - event_header_size - INFO_HEADER_SIZE;
3430 	if (i * 8 != priv_size) {
3431 		err = -EINVAL;
3432 		goto err_free_metadata;
3433 	}
3434 
3435 	etm = zalloc(sizeof(*etm));
3436 
3437 	if (!etm) {
3438 		err = -ENOMEM;
3439 		goto err_free_metadata;
3440 	}
3441 
3442 	/*
3443 	 * As all the ETMs run at the same exception level, the system should
3444 	 * have the same PID format crossing CPUs.  So cache the PID format
3445 	 * and reuse it for sequential decoding.
3446 	 */
3447 	etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
3448 
3449 	err = auxtrace_queues__init_nr(&etm->queues, max_cpu + 1);
3450 	if (err)
3451 		goto err_free_etm;
3452 
3453 	for (unsigned int j = 0; j < etm->queues.nr_queues; ++j) {
3454 		err = cs_etm__setup_queue(etm, &etm->queues.queue_array[j], j);
3455 		if (err)
3456 			goto err_free_queues;
3457 	}
3458 
3459 	if (session->itrace_synth_opts->set) {
3460 		etm->synth_opts = *session->itrace_synth_opts;
3461 	} else {
3462 		itrace_synth_opts__set_default(&etm->synth_opts,
3463 				session->itrace_synth_opts->default_no_sample);
3464 		etm->synth_opts.callchain = false;
3465 	}
3466 
3467 	etm->session = session;
3468 
3469 	etm->num_cpu = num_cpu;
3470 	etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
3471 	etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
3472 	etm->metadata = metadata;
3473 	etm->auxtrace_type = auxtrace_info->type;
3474 
3475 	if (etm->synth_opts.use_timestamp)
3476 		/*
3477 		 * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature,
3478 		 * therefore the decoder cannot know if the timestamp trace is
3479 		 * same with the kernel time.
3480 		 *
3481 		 * If a user has knowledge for the working platform and can
3482 		 * specify itrace option 'T' to tell decoder to forcely use the
3483 		 * traced timestamp as the kernel time.
3484 		 */
3485 		etm->has_virtual_ts = true;
3486 	else
3487 		/* Use virtual timestamps if all ETMs report ts_source = 1 */
3488 		etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
3489 
3490 	if (!etm->has_virtual_ts)
3491 		ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
3492 			    "The time field of the samples will not be set accurately.\n"
3493 			    "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n"
3494 			    "you can specify the itrace option 'T' for timestamp decoding\n"
3495 			    "if the Coresight timestamp on the platform is same with the kernel time.\n\n");
3496 
3497 	etm->auxtrace.process_event = cs_etm__process_event;
3498 	etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
3499 	etm->auxtrace.flush_events = cs_etm__flush_events;
3500 	etm->auxtrace.free_events = cs_etm__free_events;
3501 	etm->auxtrace.free = cs_etm__free;
3502 	etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
3503 	session->auxtrace = &etm->auxtrace;
3504 
3505 	err = cs_etm__setup_timeless_decoding(etm);
3506 	if (err)
3507 		return err;
3508 
3509 	etm->tc.time_shift = tc->time_shift;
3510 	etm->tc.time_mult = tc->time_mult;
3511 	etm->tc.time_zero = tc->time_zero;
3512 	if (event_contains(*tc, time_cycles)) {
3513 		etm->tc.time_cycles = tc->time_cycles;
3514 		etm->tc.time_mask = tc->time_mask;
3515 		etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
3516 		etm->tc.cap_user_time_short = tc->cap_user_time_short;
3517 	}
3518 	err = cs_etm__synth_events(etm, session);
3519 	if (err)
3520 		goto err_free_queues;
3521 
3522 	err = cs_etm__queue_aux_records(session);
3523 	if (err)
3524 		goto err_free_queues;
3525 
3526 	/*
3527 	 * Map Trace ID values to CPU metadata.
3528 	 *
3529 	 * Trace metadata will always contain Trace ID values from the legacy algorithm
3530 	 * in case it's read by a version of Perf that doesn't know about HW_ID packets
3531 	 * or the kernel doesn't emit them.
3532 	 *
3533 	 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
3534 	 * the same IDs as the old algorithm as far as is possible, unless there are clashes
3535 	 * in which case a different value will be used. This means an older perf may still
3536 	 * be able to record and read files generate on a newer system.
3537 	 *
3538 	 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of
3539 	 * those packets. If they are there then the values will be mapped and plugged into
3540 	 * the metadata and decoders are only created for each mapping received.
3541 	 *
3542 	 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
3543 	 * then we map Trace ID values to CPU directly from the metadata and create decoders
3544 	 * for all mappings.
3545 	 */
3546 
3547 	/* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
3548 	aux_hw_id_found = 0;
3549 	err = perf_session__peek_events(session, session->header.data_offset,
3550 					session->header.data_size,
3551 					cs_etm__process_aux_hw_id_cb, &aux_hw_id_found);
3552 	if (err)
3553 		goto err_free_queues;
3554 
3555 	/* if no HW ID found this is a file with metadata values only, map from metadata */
3556 	if (!aux_hw_id_found) {
3557 		err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata);
3558 		if (err)
3559 			goto err_free_queues;
3560 	}
3561 
3562 	err = cs_etm__create_decoders(etm);
3563 	if (err)
3564 		goto err_free_queues;
3565 
3566 	etm->data_queued = etm->queues.populated;
3567 	return 0;
3568 
3569 err_free_queues:
3570 	auxtrace_queues__free(&etm->queues);
3571 	session->auxtrace = NULL;
3572 err_free_etm:
3573 	zfree(&etm);
3574 err_free_metadata:
3575 	/* No need to check @metadata[j], free(NULL) is supported */
3576 	for (int j = 0; j < num_cpu; j++)
3577 		zfree(&metadata[j]);
3578 	zfree(&metadata);
3579 	return err;
3580 }
3581