xref: /linux/tools/perf/util/cs-etm.c (revision 802f0d58d52e8e34e08718479475ccdff0caffa0)
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * Copyright(C) 2015-2018 Linaro Limited.
4   *
5   * Author: Tor Jeremiassen <tor@ti.com>
6   * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
7   */
8  
9  #include <linux/kernel.h>
10  #include <linux/bitfield.h>
11  #include <linux/bitops.h>
12  #include <linux/coresight-pmu.h>
13  #include <linux/err.h>
14  #include <linux/log2.h>
15  #include <linux/types.h>
16  #include <linux/zalloc.h>
17  
18  #include <stdlib.h>
19  
20  #include "auxtrace.h"
21  #include "color.h"
22  #include "cs-etm.h"
23  #include "cs-etm-decoder/cs-etm-decoder.h"
24  #include "debug.h"
25  #include "dso.h"
26  #include "evlist.h"
27  #include "intlist.h"
28  #include "machine.h"
29  #include "map.h"
30  #include "perf.h"
31  #include "session.h"
32  #include "map_symbol.h"
33  #include "branch.h"
34  #include "symbol.h"
35  #include "tool.h"
36  #include "thread.h"
37  #include "thread-stack.h"
38  #include "tsc.h"
39  #include <tools/libc_compat.h>
40  #include "util/synthetic-events.h"
41  #include "util/util.h"
42  
43  struct cs_etm_auxtrace {
44  	struct auxtrace auxtrace;
45  	struct auxtrace_queues queues;
46  	struct auxtrace_heap heap;
47  	struct itrace_synth_opts synth_opts;
48  	struct perf_session *session;
49  	struct perf_tsc_conversion tc;
50  
51  	/*
52  	 * Timeless has no timestamps in the trace so overlapping mmap lookups
53  	 * are less accurate but produces smaller trace data. We use context IDs
54  	 * in the trace instead of matching timestamps with fork records so
55  	 * they're not really needed in the general case. Overlapping mmaps
56  	 * happen in cases like between a fork and an exec.
57  	 */
58  	bool timeless_decoding;
59  
60  	/*
61  	 * Per-thread ignores the trace channel ID and instead assumes that
62  	 * everything in a buffer comes from the same process regardless of
63  	 * which CPU it ran on. It also implies no context IDs so the TID is
64  	 * taken from the auxtrace buffer.
65  	 */
66  	bool per_thread_decoding;
67  	bool snapshot_mode;
68  	bool data_queued;
69  	bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
70  
71  	int num_cpu;
72  	u64 latest_kernel_timestamp;
73  	u32 auxtrace_type;
74  	u64 branches_sample_type;
75  	u64 branches_id;
76  	u64 instructions_sample_type;
77  	u64 instructions_sample_period;
78  	u64 instructions_id;
79  	u64 **metadata;
80  	unsigned int pmu_type;
81  	enum cs_etm_pid_fmt pid_fmt;
82  };
83  
84  struct cs_etm_traceid_queue {
85  	u8 trace_chan_id;
86  	u64 period_instructions;
87  	size_t last_branch_pos;
88  	union perf_event *event_buf;
89  	struct thread *thread;
90  	struct thread *prev_packet_thread;
91  	ocsd_ex_level prev_packet_el;
92  	ocsd_ex_level el;
93  	struct branch_stack *last_branch;
94  	struct branch_stack *last_branch_rb;
95  	struct cs_etm_packet *prev_packet;
96  	struct cs_etm_packet *packet;
97  	struct cs_etm_packet_queue packet_queue;
98  };
99  
100  enum cs_etm_format {
101  	UNSET,
102  	FORMATTED,
103  	UNFORMATTED
104  };
105  
106  struct cs_etm_queue {
107  	struct cs_etm_auxtrace *etm;
108  	struct cs_etm_decoder *decoder;
109  	struct auxtrace_buffer *buffer;
110  	unsigned int queue_nr;
111  	u8 pending_timestamp_chan_id;
112  	enum cs_etm_format format;
113  	u64 offset;
114  	const unsigned char *buf;
115  	size_t buf_len, buf_used;
116  	/* Conversion between traceID and index in traceid_queues array */
117  	struct intlist *traceid_queues_list;
118  	struct cs_etm_traceid_queue **traceid_queues;
119  	/* Conversion between traceID and metadata pointers */
120  	struct intlist *traceid_list;
121  	/*
122  	 * Same as traceid_list, but traceid_list may be a reference to another
123  	 * queue's which has a matching sink ID.
124  	 */
125  	struct intlist *own_traceid_list;
126  	u32 sink_id;
127  };
128  
129  static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
130  static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
131  					   pid_t tid);
132  static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
133  static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
134  static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata);
135  static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu);
136  static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata);
137  
138  /* PTMs ETMIDR [11:8] set to b0011 */
139  #define ETMIDR_PTM_VERSION 0x00000300
140  
141  /*
142   * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
143   * work with.  One option is to modify to auxtrace_heap_XYZ() API or simply
144   * encode the etm queue number as the upper 16 bit and the channel as
145   * the lower 16 bit.
146   */
147  #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id)	\
148  		      (queue_nr << 16 | trace_chan_id)
149  #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
150  #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
151  #define SINK_UNSET ((u32) -1)
152  
153  static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
154  {
155  	etmidr &= ETMIDR_PTM_VERSION;
156  
157  	if (etmidr == ETMIDR_PTM_VERSION)
158  		return CS_ETM_PROTO_PTM;
159  
160  	return CS_ETM_PROTO_ETMV3;
161  }
162  
163  static int cs_etm__get_magic(struct cs_etm_queue *etmq, u8 trace_chan_id, u64 *magic)
164  {
165  	struct int_node *inode;
166  	u64 *metadata;
167  
168  	inode = intlist__find(etmq->traceid_list, trace_chan_id);
169  	if (!inode)
170  		return -EINVAL;
171  
172  	metadata = inode->priv;
173  	*magic = metadata[CS_ETM_MAGIC];
174  	return 0;
175  }
176  
177  int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu)
178  {
179  	struct int_node *inode;
180  	u64 *metadata;
181  
182  	inode = intlist__find(etmq->traceid_list, trace_chan_id);
183  	if (!inode)
184  		return -EINVAL;
185  
186  	metadata = inode->priv;
187  	*cpu = (int)metadata[CS_ETM_CPU];
188  	return 0;
189  }
190  
191  /*
192   * The returned PID format is presented as an enum:
193   *
194   *   CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced.
195   *   CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced.
196   *   CS_ETM_PIDFMT_NONE: No context IDs
197   *
198   * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
199   * are enabled at the same time when the session runs on an EL2 kernel.
200   * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
201   * recorded in the trace data, the tool will selectively use
202   * CONTEXTIDR_EL2 as PID.
203   *
204   * The result is cached in etm->pid_fmt so this function only needs to be called
205   * when processing the aux info.
206   */
207  static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata)
208  {
209  	u64 val;
210  
211  	if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
212  		val = metadata[CS_ETM_ETMCR];
213  		/* CONTEXTIDR is traced */
214  		if (val & BIT(ETM_OPT_CTXTID))
215  			return CS_ETM_PIDFMT_CTXTID;
216  	} else {
217  		val = metadata[CS_ETMV4_TRCCONFIGR];
218  		/* CONTEXTIDR_EL2 is traced */
219  		if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
220  			return CS_ETM_PIDFMT_CTXTID2;
221  		/* CONTEXTIDR_EL1 is traced */
222  		else if (val & BIT(ETM4_CFG_BIT_CTXTID))
223  			return CS_ETM_PIDFMT_CTXTID;
224  	}
225  
226  	return CS_ETM_PIDFMT_NONE;
227  }
228  
229  enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
230  {
231  	return etmq->etm->pid_fmt;
232  }
233  
234  static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq,
235  					u8 trace_chan_id, u64 *cpu_metadata)
236  {
237  	/* Get an RB node for this CPU */
238  	struct int_node *inode = intlist__findnew(etmq->traceid_list, trace_chan_id);
239  
240  	/* Something went wrong, no need to continue */
241  	if (!inode)
242  		return -ENOMEM;
243  
244  	/* Disallow re-mapping a different traceID to metadata pair. */
245  	if (inode->priv) {
246  		u64 *curr_cpu_data = inode->priv;
247  		u8 curr_chan_id;
248  		int err;
249  
250  		if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) {
251  			/*
252  			 * With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs
253  			 * are expected (but not supported) in per-thread mode,
254  			 * rather than signifying an error.
255  			 */
256  			if (etmq->etm->per_thread_decoding)
257  				pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n");
258  			else
259  				pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
260  
261  			return -EINVAL;
262  		}
263  
264  		/* check that the mapped ID matches */
265  		err = cs_etm__metadata_get_trace_id(&curr_chan_id, curr_cpu_data);
266  		if (err)
267  			return err;
268  
269  		if (curr_chan_id != trace_chan_id) {
270  			pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
271  			return -EINVAL;
272  		}
273  
274  		/* Skip re-adding the same mappings if everything matched */
275  		return 0;
276  	}
277  
278  	/* Not one we've seen before, associate the traceID with the metadata pointer */
279  	inode->priv = cpu_metadata;
280  
281  	return 0;
282  }
283  
284  static struct cs_etm_queue *cs_etm__get_queue(struct cs_etm_auxtrace *etm, int cpu)
285  {
286  	if (etm->per_thread_decoding)
287  		return etm->queues.queue_array[0].priv;
288  	else
289  		return etm->queues.queue_array[cpu].priv;
290  }
291  
292  static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace *etm, u8 trace_chan_id,
293  				   u64 *cpu_metadata)
294  {
295  	struct cs_etm_queue *etmq;
296  
297  	/*
298  	 * If the queue is unformatted then only save one mapping in the
299  	 * queue associated with that CPU so only one decoder is made.
300  	 */
301  	etmq = cs_etm__get_queue(etm, cpu_metadata[CS_ETM_CPU]);
302  	if (etmq->format == UNFORMATTED)
303  		return cs_etm__insert_trace_id_node(etmq, trace_chan_id,
304  						    cpu_metadata);
305  
306  	/*
307  	 * Otherwise, version 0 trace IDs are global so save them into every
308  	 * queue.
309  	 */
310  	for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
311  		int ret;
312  
313  		etmq = etm->queues.queue_array[i].priv;
314  		ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id,
315  						   cpu_metadata);
316  		if (ret)
317  			return ret;
318  	}
319  
320  	return 0;
321  }
322  
323  static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu,
324  				       u64 hw_id)
325  {
326  	int err;
327  	u64 *cpu_data;
328  	u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
329  
330  	cpu_data = get_cpu_data(etm, cpu);
331  	if (cpu_data == NULL)
332  		return -EINVAL;
333  
334  	err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_data);
335  	if (err)
336  		return err;
337  
338  	/*
339  	 * if we are picking up the association from the packet, need to plug
340  	 * the correct trace ID into the metadata for setting up decoders later.
341  	 */
342  	return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
343  }
344  
345  static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace *etm, int cpu,
346  					 u64 hw_id)
347  {
348  	struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu);
349  	int ret;
350  	u64 *cpu_data;
351  	u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id);
352  	u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
353  
354  	/*
355  	 * Check sink id hasn't changed in per-cpu mode. In per-thread mode,
356  	 * let it pass for now until an actual overlapping trace ID is hit. In
357  	 * most cases IDs won't overlap even if the sink changes.
358  	 */
359  	if (!etmq->etm->per_thread_decoding && etmq->sink_id != SINK_UNSET &&
360  	    etmq->sink_id != sink_id) {
361  		pr_err("CS_ETM: mismatch between sink IDs\n");
362  		return -EINVAL;
363  	}
364  
365  	etmq->sink_id = sink_id;
366  
367  	/* Find which other queues use this sink and link their ID maps */
368  	for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
369  		struct cs_etm_queue *other_etmq = etm->queues.queue_array[i].priv;
370  
371  		/* Different sinks, skip */
372  		if (other_etmq->sink_id != etmq->sink_id)
373  			continue;
374  
375  		/* Already linked, skip */
376  		if (other_etmq->traceid_list == etmq->traceid_list)
377  			continue;
378  
379  		/* At the point of first linking, this one should be empty */
380  		if (!intlist__empty(etmq->traceid_list)) {
381  			pr_err("CS_ETM: Can't link populated trace ID lists\n");
382  			return -EINVAL;
383  		}
384  
385  		etmq->own_traceid_list = NULL;
386  		intlist__delete(etmq->traceid_list);
387  		etmq->traceid_list = other_etmq->traceid_list;
388  		break;
389  	}
390  
391  	cpu_data = get_cpu_data(etm, cpu);
392  	ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data);
393  	if (ret)
394  		return ret;
395  
396  	ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data);
397  	if (ret)
398  		return ret;
399  
400  	return 0;
401  }
402  
403  static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
404  {
405  	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
406  
407  	switch (cs_etm_magic) {
408  	case __perf_cs_etmv3_magic:
409  		*trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
410  				      CORESIGHT_TRACE_ID_VAL_MASK);
411  		break;
412  	case __perf_cs_etmv4_magic:
413  	case __perf_cs_ete_magic:
414  		*trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
415  				      CORESIGHT_TRACE_ID_VAL_MASK);
416  		break;
417  	default:
418  		return -EINVAL;
419  	}
420  	return 0;
421  }
422  
423  /*
424   * update metadata trace ID from the value found in the AUX_HW_INFO packet.
425   */
426  static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
427  {
428  	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
429  
430  	switch (cs_etm_magic) {
431  	case __perf_cs_etmv3_magic:
432  		 cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id;
433  		break;
434  	case __perf_cs_etmv4_magic:
435  	case __perf_cs_ete_magic:
436  		cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id;
437  		break;
438  
439  	default:
440  		return -EINVAL;
441  	}
442  	return 0;
443  }
444  
445  /*
446   * Get a metadata index for a specific cpu from an array.
447   *
448   */
449  static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu)
450  {
451  	int i;
452  
453  	for (i = 0; i < etm->num_cpu; i++) {
454  		if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) {
455  			return i;
456  		}
457  	}
458  
459  	return -1;
460  }
461  
462  /*
463   * Get a metadata for a specific cpu from an array.
464   *
465   */
466  static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu)
467  {
468  	int idx = get_cpu_data_idx(etm, cpu);
469  
470  	return (idx != -1) ? etm->metadata[idx] : NULL;
471  }
472  
473  /*
474   * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
475   *
476   * The payload associates the Trace ID and the CPU.
477   * The routine is tolerant of seeing multiple packets with the same association,
478   * but a CPU / Trace ID association changing during a session is an error.
479   */
480  static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
481  					    union perf_event *event)
482  {
483  	struct cs_etm_auxtrace *etm;
484  	struct perf_sample sample;
485  	struct evsel *evsel;
486  	u64 hw_id;
487  	int cpu, version, err;
488  
489  	/* extract and parse the HW ID */
490  	hw_id = event->aux_output_hw_id.hw_id;
491  	version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id);
492  
493  	/* check that we can handle this version */
494  	if (version > CS_AUX_HW_ID_MAJOR_VERSION) {
495  		pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n",
496  		       version);
497  		return -EINVAL;
498  	}
499  
500  	/* get access to the etm metadata */
501  	etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
502  	if (!etm || !etm->metadata)
503  		return -EINVAL;
504  
505  	/* parse the sample to get the CPU */
506  	evsel = evlist__event2evsel(session->evlist, event);
507  	if (!evsel)
508  		return -EINVAL;
509  	perf_sample__init(&sample, /*all=*/false);
510  	err = evsel__parse_sample(evsel, event, &sample);
511  	if (err)
512  		goto out;
513  	cpu = sample.cpu;
514  	if (cpu == -1) {
515  		/* no CPU in the sample - possibly recorded with an old version of perf */
516  		pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
517  		err = -EINVAL;
518  		goto out;
519  	}
520  
521  	if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0) {
522  		err = cs_etm__process_trace_id_v0(etm, cpu, hw_id);
523  		goto out;
524  	}
525  
526  	err = cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
527  out:
528  	perf_sample__exit(&sample);
529  	return err;
530  }
531  
532  void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
533  					      u8 trace_chan_id)
534  {
535  	/*
536  	 * When a timestamp packet is encountered the backend code
537  	 * is stopped so that the front end has time to process packets
538  	 * that were accumulated in the traceID queue.  Since there can
539  	 * be more than one channel per cs_etm_queue, we need to specify
540  	 * what traceID queue needs servicing.
541  	 */
542  	etmq->pending_timestamp_chan_id = trace_chan_id;
543  }
544  
545  static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
546  				      u8 *trace_chan_id)
547  {
548  	struct cs_etm_packet_queue *packet_queue;
549  
550  	if (!etmq->pending_timestamp_chan_id)
551  		return 0;
552  
553  	if (trace_chan_id)
554  		*trace_chan_id = etmq->pending_timestamp_chan_id;
555  
556  	packet_queue = cs_etm__etmq_get_packet_queue(etmq,
557  						     etmq->pending_timestamp_chan_id);
558  	if (!packet_queue)
559  		return 0;
560  
561  	/* Acknowledge pending status */
562  	etmq->pending_timestamp_chan_id = 0;
563  
564  	/* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
565  	return packet_queue->cs_timestamp;
566  }
567  
568  static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
569  {
570  	int i;
571  
572  	queue->head = 0;
573  	queue->tail = 0;
574  	queue->packet_count = 0;
575  	for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
576  		queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
577  		queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
578  		queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
579  		queue->packet_buffer[i].instr_count = 0;
580  		queue->packet_buffer[i].last_instr_taken_branch = false;
581  		queue->packet_buffer[i].last_instr_size = 0;
582  		queue->packet_buffer[i].last_instr_type = 0;
583  		queue->packet_buffer[i].last_instr_subtype = 0;
584  		queue->packet_buffer[i].last_instr_cond = 0;
585  		queue->packet_buffer[i].flags = 0;
586  		queue->packet_buffer[i].exception_number = UINT32_MAX;
587  		queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
588  		queue->packet_buffer[i].cpu = INT_MIN;
589  	}
590  }
591  
592  static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
593  {
594  	int idx;
595  	struct int_node *inode;
596  	struct cs_etm_traceid_queue *tidq;
597  	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
598  
599  	intlist__for_each_entry(inode, traceid_queues_list) {
600  		idx = (int)(intptr_t)inode->priv;
601  		tidq = etmq->traceid_queues[idx];
602  		cs_etm__clear_packet_queue(&tidq->packet_queue);
603  	}
604  }
605  
606  static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
607  				      struct cs_etm_traceid_queue *tidq,
608  				      u8 trace_chan_id)
609  {
610  	int rc = -ENOMEM;
611  	struct auxtrace_queue *queue;
612  	struct cs_etm_auxtrace *etm = etmq->etm;
613  
614  	cs_etm__clear_packet_queue(&tidq->packet_queue);
615  
616  	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
617  	tidq->trace_chan_id = trace_chan_id;
618  	tidq->el = tidq->prev_packet_el = ocsd_EL_unknown;
619  	tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
620  					       queue->tid);
621  	tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host);
622  
623  	tidq->packet = zalloc(sizeof(struct cs_etm_packet));
624  	if (!tidq->packet)
625  		goto out;
626  
627  	tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
628  	if (!tidq->prev_packet)
629  		goto out_free;
630  
631  	if (etm->synth_opts.last_branch) {
632  		size_t sz = sizeof(struct branch_stack);
633  
634  		sz += etm->synth_opts.last_branch_sz *
635  		      sizeof(struct branch_entry);
636  		tidq->last_branch = zalloc(sz);
637  		if (!tidq->last_branch)
638  			goto out_free;
639  		tidq->last_branch_rb = zalloc(sz);
640  		if (!tidq->last_branch_rb)
641  			goto out_free;
642  	}
643  
644  	tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
645  	if (!tidq->event_buf)
646  		goto out_free;
647  
648  	return 0;
649  
650  out_free:
651  	zfree(&tidq->last_branch_rb);
652  	zfree(&tidq->last_branch);
653  	zfree(&tidq->prev_packet);
654  	zfree(&tidq->packet);
655  out:
656  	return rc;
657  }
658  
659  static struct cs_etm_traceid_queue
660  *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
661  {
662  	int idx;
663  	struct int_node *inode;
664  	struct intlist *traceid_queues_list;
665  	struct cs_etm_traceid_queue *tidq, **traceid_queues;
666  	struct cs_etm_auxtrace *etm = etmq->etm;
667  
668  	if (etm->per_thread_decoding)
669  		trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
670  
671  	traceid_queues_list = etmq->traceid_queues_list;
672  
673  	/*
674  	 * Check if the traceid_queue exist for this traceID by looking
675  	 * in the queue list.
676  	 */
677  	inode = intlist__find(traceid_queues_list, trace_chan_id);
678  	if (inode) {
679  		idx = (int)(intptr_t)inode->priv;
680  		return etmq->traceid_queues[idx];
681  	}
682  
683  	/* We couldn't find a traceid_queue for this traceID, allocate one */
684  	tidq = malloc(sizeof(*tidq));
685  	if (!tidq)
686  		return NULL;
687  
688  	memset(tidq, 0, sizeof(*tidq));
689  
690  	/* Get a valid index for the new traceid_queue */
691  	idx = intlist__nr_entries(traceid_queues_list);
692  	/* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
693  	inode = intlist__findnew(traceid_queues_list, trace_chan_id);
694  	if (!inode)
695  		goto out_free;
696  
697  	/* Associate this traceID with this index */
698  	inode->priv = (void *)(intptr_t)idx;
699  
700  	if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
701  		goto out_free;
702  
703  	/* Grow the traceid_queues array by one unit */
704  	traceid_queues = etmq->traceid_queues;
705  	traceid_queues = reallocarray(traceid_queues,
706  				      idx + 1,
707  				      sizeof(*traceid_queues));
708  
709  	/*
710  	 * On failure reallocarray() returns NULL and the original block of
711  	 * memory is left untouched.
712  	 */
713  	if (!traceid_queues)
714  		goto out_free;
715  
716  	traceid_queues[idx] = tidq;
717  	etmq->traceid_queues = traceid_queues;
718  
719  	return etmq->traceid_queues[idx];
720  
721  out_free:
722  	/*
723  	 * Function intlist__remove() removes the inode from the list
724  	 * and delete the memory associated to it.
725  	 */
726  	intlist__remove(traceid_queues_list, inode);
727  	free(tidq);
728  
729  	return NULL;
730  }
731  
732  struct cs_etm_packet_queue
733  *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
734  {
735  	struct cs_etm_traceid_queue *tidq;
736  
737  	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
738  	if (tidq)
739  		return &tidq->packet_queue;
740  
741  	return NULL;
742  }
743  
744  static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
745  				struct cs_etm_traceid_queue *tidq)
746  {
747  	struct cs_etm_packet *tmp;
748  
749  	if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
750  	    etm->synth_opts.instructions) {
751  		/*
752  		 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
753  		 * the next incoming packet.
754  		 *
755  		 * Threads and exception levels are also tracked for both the
756  		 * previous and current packets. This is because the previous
757  		 * packet is used for the 'from' IP for branch samples, so the
758  		 * thread at that time must also be assigned to that sample.
759  		 * Across discontinuity packets the thread can change, so by
760  		 * tracking the thread for the previous packet the branch sample
761  		 * will have the correct info.
762  		 */
763  		tmp = tidq->packet;
764  		tidq->packet = tidq->prev_packet;
765  		tidq->prev_packet = tmp;
766  		tidq->prev_packet_el = tidq->el;
767  		thread__put(tidq->prev_packet_thread);
768  		tidq->prev_packet_thread = thread__get(tidq->thread);
769  	}
770  }
771  
772  static void cs_etm__packet_dump(const char *pkt_string, void *data)
773  {
774  	const char *color = PERF_COLOR_BLUE;
775  	int len = strlen(pkt_string);
776  	struct cs_etm_queue *etmq = data;
777  	char queue_nr[64];
778  
779  	if (verbose)
780  		snprintf(queue_nr, sizeof(queue_nr), "Qnr:%d; ", etmq->queue_nr);
781  	else
782  		queue_nr[0] = '\0';
783  
784  	if (len && (pkt_string[len-1] == '\n'))
785  		color_fprintf(stdout, color, "	%s%s", queue_nr, pkt_string);
786  	else
787  		color_fprintf(stdout, color, "	%s%s\n", queue_nr, pkt_string);
788  
789  	fflush(stdout);
790  }
791  
792  static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
793  					  u64 *metadata, u32 etmidr)
794  {
795  	t_params->protocol = cs_etm__get_v7_protocol_version(etmidr);
796  	t_params->etmv3.reg_ctrl = metadata[CS_ETM_ETMCR];
797  	t_params->etmv3.reg_trc_id = metadata[CS_ETM_ETMTRACEIDR];
798  }
799  
800  static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
801  					  u64 *metadata)
802  {
803  	t_params->protocol = CS_ETM_PROTO_ETMV4i;
804  	t_params->etmv4.reg_idr0 = metadata[CS_ETMV4_TRCIDR0];
805  	t_params->etmv4.reg_idr1 = metadata[CS_ETMV4_TRCIDR1];
806  	t_params->etmv4.reg_idr2 = metadata[CS_ETMV4_TRCIDR2];
807  	t_params->etmv4.reg_idr8 = metadata[CS_ETMV4_TRCIDR8];
808  	t_params->etmv4.reg_configr = metadata[CS_ETMV4_TRCCONFIGR];
809  	t_params->etmv4.reg_traceidr = metadata[CS_ETMV4_TRCTRACEIDR];
810  }
811  
812  static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
813  					u64 *metadata)
814  {
815  	t_params->protocol = CS_ETM_PROTO_ETE;
816  	t_params->ete.reg_idr0 = metadata[CS_ETE_TRCIDR0];
817  	t_params->ete.reg_idr1 = metadata[CS_ETE_TRCIDR1];
818  	t_params->ete.reg_idr2 = metadata[CS_ETE_TRCIDR2];
819  	t_params->ete.reg_idr8 = metadata[CS_ETE_TRCIDR8];
820  	t_params->ete.reg_configr = metadata[CS_ETE_TRCCONFIGR];
821  	t_params->ete.reg_traceidr = metadata[CS_ETE_TRCTRACEIDR];
822  	t_params->ete.reg_devarch = metadata[CS_ETE_TRCDEVARCH];
823  }
824  
825  static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
826  				     struct cs_etm_queue *etmq)
827  {
828  	struct int_node *inode;
829  
830  	intlist__for_each_entry(inode, etmq->traceid_list) {
831  		u64 *metadata = inode->priv;
832  		u64 architecture = metadata[CS_ETM_MAGIC];
833  		u32 etmidr;
834  
835  		switch (architecture) {
836  		case __perf_cs_etmv3_magic:
837  			etmidr = metadata[CS_ETM_ETMIDR];
838  			cs_etm__set_trace_param_etmv3(t_params++, metadata, etmidr);
839  			break;
840  		case __perf_cs_etmv4_magic:
841  			cs_etm__set_trace_param_etmv4(t_params++, metadata);
842  			break;
843  		case __perf_cs_ete_magic:
844  			cs_etm__set_trace_param_ete(t_params++, metadata);
845  			break;
846  		default:
847  			return -EINVAL;
848  		}
849  	}
850  
851  	return 0;
852  }
853  
854  static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
855  				       struct cs_etm_queue *etmq,
856  				       enum cs_etm_decoder_operation mode)
857  {
858  	int ret = -EINVAL;
859  
860  	if (!(mode < CS_ETM_OPERATION_MAX))
861  		goto out;
862  
863  	d_params->packet_printer = cs_etm__packet_dump;
864  	d_params->operation = mode;
865  	d_params->data = etmq;
866  	d_params->formatted = etmq->format == FORMATTED;
867  	d_params->fsyncs = false;
868  	d_params->hsyncs = false;
869  	d_params->frame_aligned = true;
870  
871  	ret = 0;
872  out:
873  	return ret;
874  }
875  
876  static void cs_etm__dump_event(struct cs_etm_queue *etmq,
877  			       struct auxtrace_buffer *buffer)
878  {
879  	int ret;
880  	const char *color = PERF_COLOR_BLUE;
881  	size_t buffer_used = 0;
882  
883  	fprintf(stdout, "\n");
884  	color_fprintf(stdout, color,
885  		     ". ... CoreSight %s Trace data: size %#zx bytes\n",
886  		     cs_etm_decoder__get_name(etmq->decoder), buffer->size);
887  
888  	do {
889  		size_t consumed;
890  
891  		ret = cs_etm_decoder__process_data_block(
892  				etmq->decoder, buffer->offset,
893  				&((u8 *)buffer->data)[buffer_used],
894  				buffer->size - buffer_used, &consumed);
895  		if (ret)
896  			break;
897  
898  		buffer_used += consumed;
899  	} while (buffer_used < buffer->size);
900  
901  	cs_etm_decoder__reset(etmq->decoder);
902  }
903  
904  static int cs_etm__flush_events(struct perf_session *session,
905  				const struct perf_tool *tool)
906  {
907  	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
908  						   struct cs_etm_auxtrace,
909  						   auxtrace);
910  	if (dump_trace)
911  		return 0;
912  
913  	if (!tool->ordered_events)
914  		return -EINVAL;
915  
916  	if (etm->timeless_decoding) {
917  		/*
918  		 * Pass tid = -1 to process all queues. But likely they will have
919  		 * already been processed on PERF_RECORD_EXIT anyway.
920  		 */
921  		return cs_etm__process_timeless_queues(etm, -1);
922  	}
923  
924  	return cs_etm__process_timestamped_queues(etm);
925  }
926  
927  static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
928  {
929  	int idx;
930  	uintptr_t priv;
931  	struct int_node *inode, *tmp;
932  	struct cs_etm_traceid_queue *tidq;
933  	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
934  
935  	intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
936  		priv = (uintptr_t)inode->priv;
937  		idx = priv;
938  
939  		/* Free this traceid_queue from the array */
940  		tidq = etmq->traceid_queues[idx];
941  		thread__zput(tidq->thread);
942  		thread__zput(tidq->prev_packet_thread);
943  		zfree(&tidq->event_buf);
944  		zfree(&tidq->last_branch);
945  		zfree(&tidq->last_branch_rb);
946  		zfree(&tidq->prev_packet);
947  		zfree(&tidq->packet);
948  		zfree(&tidq);
949  
950  		/*
951  		 * Function intlist__remove() removes the inode from the list
952  		 * and delete the memory associated to it.
953  		 */
954  		intlist__remove(traceid_queues_list, inode);
955  	}
956  
957  	/* Then the RB tree itself */
958  	intlist__delete(traceid_queues_list);
959  	etmq->traceid_queues_list = NULL;
960  
961  	/* finally free the traceid_queues array */
962  	zfree(&etmq->traceid_queues);
963  }
964  
965  static void cs_etm__free_queue(void *priv)
966  {
967  	struct int_node *inode, *tmp;
968  	struct cs_etm_queue *etmq = priv;
969  
970  	if (!etmq)
971  		return;
972  
973  	cs_etm_decoder__free(etmq->decoder);
974  	cs_etm__free_traceid_queues(etmq);
975  
976  	if (etmq->own_traceid_list) {
977  		/* First remove all traceID/metadata nodes for the RB tree */
978  		intlist__for_each_entry_safe(inode, tmp, etmq->own_traceid_list)
979  			intlist__remove(etmq->own_traceid_list, inode);
980  
981  		/* Then the RB tree itself */
982  		intlist__delete(etmq->own_traceid_list);
983  	}
984  
985  	free(etmq);
986  }
987  
988  static void cs_etm__free_events(struct perf_session *session)
989  {
990  	unsigned int i;
991  	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
992  						   struct cs_etm_auxtrace,
993  						   auxtrace);
994  	struct auxtrace_queues *queues = &aux->queues;
995  
996  	for (i = 0; i < queues->nr_queues; i++) {
997  		cs_etm__free_queue(queues->queue_array[i].priv);
998  		queues->queue_array[i].priv = NULL;
999  	}
1000  
1001  	auxtrace_queues__free(queues);
1002  }
1003  
1004  static void cs_etm__free(struct perf_session *session)
1005  {
1006  	int i;
1007  	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
1008  						   struct cs_etm_auxtrace,
1009  						   auxtrace);
1010  	cs_etm__free_events(session);
1011  	session->auxtrace = NULL;
1012  
1013  	for (i = 0; i < aux->num_cpu; i++)
1014  		zfree(&aux->metadata[i]);
1015  
1016  	zfree(&aux->metadata);
1017  	zfree(&aux);
1018  }
1019  
1020  static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
1021  				      struct evsel *evsel)
1022  {
1023  	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
1024  						   struct cs_etm_auxtrace,
1025  						   auxtrace);
1026  
1027  	return evsel->core.attr.type == aux->pmu_type;
1028  }
1029  
1030  static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq,
1031  					   ocsd_ex_level el)
1032  {
1033  	enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq);
1034  
1035  	/*
1036  	 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels
1037  	 * running at EL1 assume everything is the host.
1038  	 */
1039  	if (pid_fmt == CS_ETM_PIDFMT_CTXTID)
1040  		return &etmq->etm->session->machines.host;
1041  
1042  	/*
1043  	 * Not perfect, but otherwise assume anything in EL1 is the default
1044  	 * guest, and everything else is the host. Distinguishing between guest
1045  	 * and host userspaces isn't currently supported either. Neither is
1046  	 * multiple guest support. All this does is reduce the likeliness of
1047  	 * decode errors where we look into the host kernel maps when it should
1048  	 * have been the guest maps.
1049  	 */
1050  	switch (el) {
1051  	case ocsd_EL1:
1052  		return machines__find_guest(&etmq->etm->session->machines,
1053  					    DEFAULT_GUEST_KERNEL_ID);
1054  	case ocsd_EL3:
1055  	case ocsd_EL2:
1056  	case ocsd_EL0:
1057  	case ocsd_EL_unknown:
1058  	default:
1059  		return &etmq->etm->session->machines.host;
1060  	}
1061  }
1062  
1063  static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address,
1064  			   ocsd_ex_level el)
1065  {
1066  	struct machine *machine = cs_etm__get_machine(etmq, el);
1067  
1068  	if (address >= machine__kernel_start(machine)) {
1069  		if (machine__is_host(machine))
1070  			return PERF_RECORD_MISC_KERNEL;
1071  		else
1072  			return PERF_RECORD_MISC_GUEST_KERNEL;
1073  	} else {
1074  		if (machine__is_host(machine))
1075  			return PERF_RECORD_MISC_USER;
1076  		else {
1077  			/*
1078  			 * Can't really happen at the moment because
1079  			 * cs_etm__get_machine() will always return
1080  			 * machines.host for any non EL1 trace.
1081  			 */
1082  			return PERF_RECORD_MISC_GUEST_USER;
1083  		}
1084  	}
1085  }
1086  
1087  static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
1088  			      u64 address, size_t size, u8 *buffer,
1089  			      const ocsd_mem_space_acc_t mem_space)
1090  {
1091  	u8  cpumode;
1092  	u64 offset;
1093  	int len;
1094  	struct addr_location al;
1095  	struct dso *dso;
1096  	struct cs_etm_traceid_queue *tidq;
1097  	int ret = 0;
1098  
1099  	if (!etmq)
1100  		return 0;
1101  
1102  	addr_location__init(&al);
1103  	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1104  	if (!tidq)
1105  		goto out;
1106  
1107  	/*
1108  	 * We've already tracked EL along side the PID in cs_etm__set_thread()
1109  	 * so double check that it matches what OpenCSD thinks as well. It
1110  	 * doesn't distinguish between EL0 and EL1 for this mem access callback
1111  	 * so we had to do the extra tracking. Skip validation if it's any of
1112  	 * the 'any' values.
1113  	 */
1114  	if (!(mem_space == OCSD_MEM_SPACE_ANY ||
1115  	      mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) {
1116  		if (mem_space & OCSD_MEM_SPACE_EL1N) {
1117  			/* Includes both non secure EL1 and EL0 */
1118  			assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0);
1119  		} else if (mem_space & OCSD_MEM_SPACE_EL2)
1120  			assert(tidq->el == ocsd_EL2);
1121  		else if (mem_space & OCSD_MEM_SPACE_EL3)
1122  			assert(tidq->el == ocsd_EL3);
1123  	}
1124  
1125  	cpumode = cs_etm__cpu_mode(etmq, address, tidq->el);
1126  
1127  	if (!thread__find_map(tidq->thread, cpumode, address, &al))
1128  		goto out;
1129  
1130  	dso = map__dso(al.map);
1131  	if (!dso)
1132  		goto out;
1133  
1134  	if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR &&
1135  	    dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE))
1136  		goto out;
1137  
1138  	offset = map__map_ip(al.map, address);
1139  
1140  	map__load(al.map);
1141  
1142  	len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)),
1143  				    offset, buffer, size);
1144  
1145  	if (len <= 0) {
1146  		ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
1147  				 "              Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
1148  		if (!dso__auxtrace_warned(dso)) {
1149  			pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
1150  				address,
1151  				dso__long_name(dso) ? dso__long_name(dso) : "Unknown");
1152  			dso__set_auxtrace_warned(dso);
1153  		}
1154  		goto out;
1155  	}
1156  	ret = len;
1157  out:
1158  	addr_location__exit(&al);
1159  	return ret;
1160  }
1161  
1162  static struct cs_etm_queue *cs_etm__alloc_queue(void)
1163  {
1164  	struct cs_etm_queue *etmq = zalloc(sizeof(*etmq));
1165  	if (!etmq)
1166  		return NULL;
1167  
1168  	etmq->traceid_queues_list = intlist__new(NULL);
1169  	if (!etmq->traceid_queues_list)
1170  		goto out_free;
1171  
1172  	/*
1173  	 * Create an RB tree for traceID-metadata tuple.  Since the conversion
1174  	 * has to be made for each packet that gets decoded, optimizing access
1175  	 * in anything other than a sequential array is worth doing.
1176  	 */
1177  	etmq->traceid_list = etmq->own_traceid_list = intlist__new(NULL);
1178  	if (!etmq->traceid_list)
1179  		goto out_free;
1180  
1181  	return etmq;
1182  
1183  out_free:
1184  	intlist__delete(etmq->traceid_queues_list);
1185  	free(etmq);
1186  
1187  	return NULL;
1188  }
1189  
1190  static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
1191  			       struct auxtrace_queue *queue,
1192  			       unsigned int queue_nr)
1193  {
1194  	struct cs_etm_queue *etmq = queue->priv;
1195  
1196  	if (etmq)
1197  		return 0;
1198  
1199  	etmq = cs_etm__alloc_queue();
1200  
1201  	if (!etmq)
1202  		return -ENOMEM;
1203  
1204  	queue->priv = etmq;
1205  	etmq->etm = etm;
1206  	etmq->queue_nr = queue_nr;
1207  	queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */
1208  	etmq->offset = 0;
1209  	etmq->sink_id = SINK_UNSET;
1210  
1211  	return 0;
1212  }
1213  
1214  static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
1215  					    struct cs_etm_queue *etmq,
1216  					    unsigned int queue_nr)
1217  {
1218  	int ret = 0;
1219  	unsigned int cs_queue_nr;
1220  	u8 trace_chan_id;
1221  	u64 cs_timestamp;
1222  
1223  	/*
1224  	 * We are under a CPU-wide trace scenario.  As such we need to know
1225  	 * when the code that generated the traces started to execute so that
1226  	 * it can be correlated with execution on other CPUs.  So we get a
1227  	 * handle on the beginning of traces and decode until we find a
1228  	 * timestamp.  The timestamp is then added to the auxtrace min heap
1229  	 * in order to know what nibble (of all the etmqs) to decode first.
1230  	 */
1231  	while (1) {
1232  		/*
1233  		 * Fetch an aux_buffer from this etmq.  Bail if no more
1234  		 * blocks or an error has been encountered.
1235  		 */
1236  		ret = cs_etm__get_data_block(etmq);
1237  		if (ret <= 0)
1238  			goto out;
1239  
1240  		/*
1241  		 * Run decoder on the trace block.  The decoder will stop when
1242  		 * encountering a CS timestamp, a full packet queue or the end of
1243  		 * trace for that block.
1244  		 */
1245  		ret = cs_etm__decode_data_block(etmq);
1246  		if (ret)
1247  			goto out;
1248  
1249  		/*
1250  		 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
1251  		 * the timestamp calculation for us.
1252  		 */
1253  		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
1254  
1255  		/* We found a timestamp, no need to continue. */
1256  		if (cs_timestamp)
1257  			break;
1258  
1259  		/*
1260  		 * We didn't find a timestamp so empty all the traceid packet
1261  		 * queues before looking for another timestamp packet, either
1262  		 * in the current data block or a new one.  Packets that were
1263  		 * just decoded are useless since no timestamp has been
1264  		 * associated with them.  As such simply discard them.
1265  		 */
1266  		cs_etm__clear_all_packet_queues(etmq);
1267  	}
1268  
1269  	/*
1270  	 * We have a timestamp.  Add it to the min heap to reflect when
1271  	 * instructions conveyed by the range packets of this traceID queue
1272  	 * started to execute.  Once the same has been done for all the traceID
1273  	 * queues of each etmq, redenring and decoding can start in
1274  	 * chronological order.
1275  	 *
1276  	 * Note that packets decoded above are still in the traceID's packet
1277  	 * queue and will be processed in cs_etm__process_timestamped_queues().
1278  	 */
1279  	cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
1280  	ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
1281  out:
1282  	return ret;
1283  }
1284  
1285  static inline
1286  void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
1287  				 struct cs_etm_traceid_queue *tidq)
1288  {
1289  	struct branch_stack *bs_src = tidq->last_branch_rb;
1290  	struct branch_stack *bs_dst = tidq->last_branch;
1291  	size_t nr = 0;
1292  
1293  	/*
1294  	 * Set the number of records before early exit: ->nr is used to
1295  	 * determine how many branches to copy from ->entries.
1296  	 */
1297  	bs_dst->nr = bs_src->nr;
1298  
1299  	/*
1300  	 * Early exit when there is nothing to copy.
1301  	 */
1302  	if (!bs_src->nr)
1303  		return;
1304  
1305  	/*
1306  	 * As bs_src->entries is a circular buffer, we need to copy from it in
1307  	 * two steps.  First, copy the branches from the most recently inserted
1308  	 * branch ->last_branch_pos until the end of bs_src->entries buffer.
1309  	 */
1310  	nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
1311  	memcpy(&bs_dst->entries[0],
1312  	       &bs_src->entries[tidq->last_branch_pos],
1313  	       sizeof(struct branch_entry) * nr);
1314  
1315  	/*
1316  	 * If we wrapped around at least once, the branches from the beginning
1317  	 * of the bs_src->entries buffer and until the ->last_branch_pos element
1318  	 * are older valid branches: copy them over.  The total number of
1319  	 * branches copied over will be equal to the number of branches asked by
1320  	 * the user in last_branch_sz.
1321  	 */
1322  	if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
1323  		memcpy(&bs_dst->entries[nr],
1324  		       &bs_src->entries[0],
1325  		       sizeof(struct branch_entry) * tidq->last_branch_pos);
1326  	}
1327  }
1328  
1329  static inline
1330  void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
1331  {
1332  	tidq->last_branch_pos = 0;
1333  	tidq->last_branch_rb->nr = 0;
1334  }
1335  
1336  static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
1337  					 u8 trace_chan_id, u64 addr)
1338  {
1339  	u8 instrBytes[2];
1340  
1341  	cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes),
1342  			   instrBytes, 0);
1343  	/*
1344  	 * T32 instruction size is indicated by bits[15:11] of the first
1345  	 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
1346  	 * denote a 32-bit instruction.
1347  	 */
1348  	return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
1349  }
1350  
1351  static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
1352  {
1353  	/*
1354  	 * Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't
1355  	 * appear in samples.
1356  	 */
1357  	if (packet->sample_type == CS_ETM_DISCONTINUITY ||
1358  	    packet->sample_type == CS_ETM_EXCEPTION)
1359  		return 0;
1360  
1361  	return packet->start_addr;
1362  }
1363  
1364  static inline
1365  u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
1366  {
1367  	/* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1368  	if (packet->sample_type == CS_ETM_DISCONTINUITY)
1369  		return 0;
1370  
1371  	return packet->end_addr - packet->last_instr_size;
1372  }
1373  
1374  static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
1375  				     u64 trace_chan_id,
1376  				     const struct cs_etm_packet *packet,
1377  				     u64 offset)
1378  {
1379  	if (packet->isa == CS_ETM_ISA_T32) {
1380  		u64 addr = packet->start_addr;
1381  
1382  		while (offset) {
1383  			addr += cs_etm__t32_instr_size(etmq,
1384  						       trace_chan_id, addr);
1385  			offset--;
1386  		}
1387  		return addr;
1388  	}
1389  
1390  	/* Assume a 4 byte instruction size (A32/A64) */
1391  	return packet->start_addr + offset * 4;
1392  }
1393  
1394  static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
1395  					  struct cs_etm_traceid_queue *tidq)
1396  {
1397  	struct branch_stack *bs = tidq->last_branch_rb;
1398  	struct branch_entry *be;
1399  
1400  	/*
1401  	 * The branches are recorded in a circular buffer in reverse
1402  	 * chronological order: we start recording from the last element of the
1403  	 * buffer down.  After writing the first element of the stack, move the
1404  	 * insert position back to the end of the buffer.
1405  	 */
1406  	if (!tidq->last_branch_pos)
1407  		tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
1408  
1409  	tidq->last_branch_pos -= 1;
1410  
1411  	be       = &bs->entries[tidq->last_branch_pos];
1412  	be->from = cs_etm__last_executed_instr(tidq->prev_packet);
1413  	be->to	 = cs_etm__first_executed_instr(tidq->packet);
1414  	/* No support for mispredict */
1415  	be->flags.mispred = 0;
1416  	be->flags.predicted = 1;
1417  
1418  	/*
1419  	 * Increment bs->nr until reaching the number of last branches asked by
1420  	 * the user on the command line.
1421  	 */
1422  	if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
1423  		bs->nr += 1;
1424  }
1425  
1426  static int cs_etm__inject_event(union perf_event *event,
1427  			       struct perf_sample *sample, u64 type)
1428  {
1429  	event->header.size = perf_event__sample_event_size(sample, type, 0);
1430  	return perf_event__synthesize_sample(event, type, 0, sample);
1431  }
1432  
1433  
1434  static int
1435  cs_etm__get_trace(struct cs_etm_queue *etmq)
1436  {
1437  	struct auxtrace_buffer *aux_buffer = etmq->buffer;
1438  	struct auxtrace_buffer *old_buffer = aux_buffer;
1439  	struct auxtrace_queue *queue;
1440  
1441  	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
1442  
1443  	aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
1444  
1445  	/* If no more data, drop the previous auxtrace_buffer and return */
1446  	if (!aux_buffer) {
1447  		if (old_buffer)
1448  			auxtrace_buffer__drop_data(old_buffer);
1449  		etmq->buf_len = 0;
1450  		return 0;
1451  	}
1452  
1453  	etmq->buffer = aux_buffer;
1454  
1455  	/* If the aux_buffer doesn't have data associated, try to load it */
1456  	if (!aux_buffer->data) {
1457  		/* get the file desc associated with the perf data file */
1458  		int fd = perf_data__fd(etmq->etm->session->data);
1459  
1460  		aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
1461  		if (!aux_buffer->data)
1462  			return -ENOMEM;
1463  	}
1464  
1465  	/* If valid, drop the previous buffer */
1466  	if (old_buffer)
1467  		auxtrace_buffer__drop_data(old_buffer);
1468  
1469  	etmq->buf_used = 0;
1470  	etmq->buf_len = aux_buffer->size;
1471  	etmq->buf = aux_buffer->data;
1472  
1473  	return etmq->buf_len;
1474  }
1475  
1476  static void cs_etm__set_thread(struct cs_etm_queue *etmq,
1477  			       struct cs_etm_traceid_queue *tidq, pid_t tid,
1478  			       ocsd_ex_level el)
1479  {
1480  	struct machine *machine = cs_etm__get_machine(etmq, el);
1481  
1482  	if (tid != -1) {
1483  		thread__zput(tidq->thread);
1484  		tidq->thread = machine__find_thread(machine, -1, tid);
1485  	}
1486  
1487  	/* Couldn't find a known thread */
1488  	if (!tidq->thread)
1489  		tidq->thread = machine__idle_thread(machine);
1490  
1491  	tidq->el = el;
1492  }
1493  
1494  int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
1495  			    u8 trace_chan_id, ocsd_ex_level el)
1496  {
1497  	struct cs_etm_traceid_queue *tidq;
1498  
1499  	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1500  	if (!tidq)
1501  		return -EINVAL;
1502  
1503  	cs_etm__set_thread(etmq, tidq, tid, el);
1504  	return 0;
1505  }
1506  
1507  bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
1508  {
1509  	return !!etmq->etm->timeless_decoding;
1510  }
1511  
1512  static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
1513  			      u64 trace_chan_id,
1514  			      const struct cs_etm_packet *packet,
1515  			      struct perf_sample *sample)
1516  {
1517  	/*
1518  	 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY
1519  	 * packet, so directly bail out with 'insn_len' = 0.
1520  	 */
1521  	if (packet->sample_type == CS_ETM_DISCONTINUITY) {
1522  		sample->insn_len = 0;
1523  		return;
1524  	}
1525  
1526  	/*
1527  	 * T32 instruction size might be 32-bit or 16-bit, decide by calling
1528  	 * cs_etm__t32_instr_size().
1529  	 */
1530  	if (packet->isa == CS_ETM_ISA_T32)
1531  		sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id,
1532  							  sample->ip);
1533  	/* Otherwise, A64 and A32 instruction size are always 32-bit. */
1534  	else
1535  		sample->insn_len = 4;
1536  
1537  	cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len,
1538  			   (void *)sample->insn, 0);
1539  }
1540  
1541  u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
1542  {
1543  	struct cs_etm_auxtrace *etm = etmq->etm;
1544  
1545  	if (etm->has_virtual_ts)
1546  		return tsc_to_perf_time(cs_timestamp, &etm->tc);
1547  	else
1548  		return cs_timestamp;
1549  }
1550  
1551  static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
1552  					       struct cs_etm_traceid_queue *tidq)
1553  {
1554  	struct cs_etm_auxtrace *etm = etmq->etm;
1555  	struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
1556  
1557  	if (!etm->timeless_decoding && etm->has_virtual_ts)
1558  		return packet_queue->cs_timestamp;
1559  	else
1560  		return etm->latest_kernel_timestamp;
1561  }
1562  
1563  static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
1564  					    struct cs_etm_traceid_queue *tidq,
1565  					    u64 addr, u64 period)
1566  {
1567  	int ret = 0;
1568  	struct cs_etm_auxtrace *etm = etmq->etm;
1569  	union perf_event *event = tidq->event_buf;
1570  	struct perf_sample sample;
1571  
1572  	perf_sample__init(&sample, /*all=*/true);
1573  	event->sample.header.type = PERF_RECORD_SAMPLE;
1574  	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
1575  	event->sample.header.size = sizeof(struct perf_event_header);
1576  
1577  	/* Set time field based on etm auxtrace config. */
1578  	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1579  
1580  	sample.ip = addr;
1581  	sample.pid = thread__pid(tidq->thread);
1582  	sample.tid = thread__tid(tidq->thread);
1583  	sample.id = etmq->etm->instructions_id;
1584  	sample.stream_id = etmq->etm->instructions_id;
1585  	sample.period = period;
1586  	sample.cpu = tidq->packet->cpu;
1587  	sample.flags = tidq->prev_packet->flags;
1588  	sample.cpumode = event->sample.header.misc;
1589  
1590  	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
1591  
1592  	if (etm->synth_opts.last_branch)
1593  		sample.branch_stack = tidq->last_branch;
1594  
1595  	if (etm->synth_opts.inject) {
1596  		ret = cs_etm__inject_event(event, &sample,
1597  					   etm->instructions_sample_type);
1598  		if (ret)
1599  			return ret;
1600  	}
1601  
1602  	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1603  
1604  	if (ret)
1605  		pr_err(
1606  			"CS ETM Trace: failed to deliver instruction event, error %d\n",
1607  			ret);
1608  
1609  	perf_sample__exit(&sample);
1610  	return ret;
1611  }
1612  
1613  /*
1614   * The cs etm packet encodes an instruction range between a branch target
1615   * and the next taken branch. Generate sample accordingly.
1616   */
1617  static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
1618  				       struct cs_etm_traceid_queue *tidq)
1619  {
1620  	int ret = 0;
1621  	struct cs_etm_auxtrace *etm = etmq->etm;
1622  	struct perf_sample sample = {.ip = 0,};
1623  	union perf_event *event = tidq->event_buf;
1624  	struct dummy_branch_stack {
1625  		u64			nr;
1626  		u64			hw_idx;
1627  		struct branch_entry	entries;
1628  	} dummy_bs;
1629  	u64 ip;
1630  
1631  	ip = cs_etm__last_executed_instr(tidq->prev_packet);
1632  
1633  	event->sample.header.type = PERF_RECORD_SAMPLE;
1634  	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip,
1635  						     tidq->prev_packet_el);
1636  	event->sample.header.size = sizeof(struct perf_event_header);
1637  
1638  	/* Set time field based on etm auxtrace config. */
1639  	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1640  
1641  	sample.ip = ip;
1642  	sample.pid = thread__pid(tidq->prev_packet_thread);
1643  	sample.tid = thread__tid(tidq->prev_packet_thread);
1644  	sample.addr = cs_etm__first_executed_instr(tidq->packet);
1645  	sample.id = etmq->etm->branches_id;
1646  	sample.stream_id = etmq->etm->branches_id;
1647  	sample.period = 1;
1648  	sample.cpu = tidq->packet->cpu;
1649  	sample.flags = tidq->prev_packet->flags;
1650  	sample.cpumode = event->sample.header.misc;
1651  
1652  	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet,
1653  			  &sample);
1654  
1655  	/*
1656  	 * perf report cannot handle events without a branch stack
1657  	 */
1658  	if (etm->synth_opts.last_branch) {
1659  		dummy_bs = (struct dummy_branch_stack){
1660  			.nr = 1,
1661  			.hw_idx = -1ULL,
1662  			.entries = {
1663  				.from = sample.ip,
1664  				.to = sample.addr,
1665  			},
1666  		};
1667  		sample.branch_stack = (struct branch_stack *)&dummy_bs;
1668  	}
1669  
1670  	if (etm->synth_opts.inject) {
1671  		ret = cs_etm__inject_event(event, &sample,
1672  					   etm->branches_sample_type);
1673  		if (ret)
1674  			return ret;
1675  	}
1676  
1677  	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1678  
1679  	if (ret)
1680  		pr_err(
1681  		"CS ETM Trace: failed to deliver instruction event, error %d\n",
1682  		ret);
1683  
1684  	return ret;
1685  }
1686  
1687  static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
1688  				struct perf_session *session)
1689  {
1690  	struct evlist *evlist = session->evlist;
1691  	struct evsel *evsel;
1692  	struct perf_event_attr attr;
1693  	bool found = false;
1694  	u64 id;
1695  	int err;
1696  
1697  	evlist__for_each_entry(evlist, evsel) {
1698  		if (evsel->core.attr.type == etm->pmu_type) {
1699  			found = true;
1700  			break;
1701  		}
1702  	}
1703  
1704  	if (!found) {
1705  		pr_debug("No selected events with CoreSight Trace data\n");
1706  		return 0;
1707  	}
1708  
1709  	memset(&attr, 0, sizeof(struct perf_event_attr));
1710  	attr.size = sizeof(struct perf_event_attr);
1711  	attr.type = PERF_TYPE_HARDWARE;
1712  	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
1713  	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1714  			    PERF_SAMPLE_PERIOD;
1715  	if (etm->timeless_decoding)
1716  		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1717  	else
1718  		attr.sample_type |= PERF_SAMPLE_TIME;
1719  
1720  	attr.exclude_user = evsel->core.attr.exclude_user;
1721  	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1722  	attr.exclude_hv = evsel->core.attr.exclude_hv;
1723  	attr.exclude_host = evsel->core.attr.exclude_host;
1724  	attr.exclude_guest = evsel->core.attr.exclude_guest;
1725  	attr.sample_id_all = evsel->core.attr.sample_id_all;
1726  	attr.read_format = evsel->core.attr.read_format;
1727  
1728  	/* create new id val to be a fixed offset from evsel id */
1729  	id = evsel->core.id[0] + 1000000000;
1730  
1731  	if (!id)
1732  		id = 1;
1733  
1734  	if (etm->synth_opts.branches) {
1735  		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1736  		attr.sample_period = 1;
1737  		attr.sample_type |= PERF_SAMPLE_ADDR;
1738  		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1739  		if (err)
1740  			return err;
1741  		etm->branches_sample_type = attr.sample_type;
1742  		etm->branches_id = id;
1743  		id += 1;
1744  		attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
1745  	}
1746  
1747  	if (etm->synth_opts.last_branch) {
1748  		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1749  		/*
1750  		 * We don't use the hardware index, but the sample generation
1751  		 * code uses the new format branch_stack with this field,
1752  		 * so the event attributes must indicate that it's present.
1753  		 */
1754  		attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
1755  	}
1756  
1757  	if (etm->synth_opts.instructions) {
1758  		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1759  		attr.sample_period = etm->synth_opts.period;
1760  		etm->instructions_sample_period = attr.sample_period;
1761  		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1762  		if (err)
1763  			return err;
1764  		etm->instructions_sample_type = attr.sample_type;
1765  		etm->instructions_id = id;
1766  		id += 1;
1767  	}
1768  
1769  	return 0;
1770  }
1771  
1772  static int cs_etm__sample(struct cs_etm_queue *etmq,
1773  			  struct cs_etm_traceid_queue *tidq)
1774  {
1775  	struct cs_etm_auxtrace *etm = etmq->etm;
1776  	int ret;
1777  	u8 trace_chan_id = tidq->trace_chan_id;
1778  	u64 instrs_prev;
1779  
1780  	/* Get instructions remainder from previous packet */
1781  	instrs_prev = tidq->period_instructions;
1782  
1783  	tidq->period_instructions += tidq->packet->instr_count;
1784  
1785  	/*
1786  	 * Record a branch when the last instruction in
1787  	 * PREV_PACKET is a branch.
1788  	 */
1789  	if (etm->synth_opts.last_branch &&
1790  	    tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1791  	    tidq->prev_packet->last_instr_taken_branch)
1792  		cs_etm__update_last_branch_rb(etmq, tidq);
1793  
1794  	if (etm->synth_opts.instructions &&
1795  	    tidq->period_instructions >= etm->instructions_sample_period) {
1796  		/*
1797  		 * Emit instruction sample periodically
1798  		 * TODO: allow period to be defined in cycles and clock time
1799  		 */
1800  
1801  		/*
1802  		 * Below diagram demonstrates the instruction samples
1803  		 * generation flows:
1804  		 *
1805  		 *    Instrs     Instrs       Instrs       Instrs
1806  		 *   Sample(n)  Sample(n+1)  Sample(n+2)  Sample(n+3)
1807  		 *    |            |            |            |
1808  		 *    V            V            V            V
1809  		 *   --------------------------------------------------
1810  		 *            ^                                  ^
1811  		 *            |                                  |
1812  		 *         Period                             Period
1813  		 *    instructions(Pi)                   instructions(Pi')
1814  		 *
1815  		 *            |                                  |
1816  		 *            \---------------- -----------------/
1817  		 *                             V
1818  		 *                 tidq->packet->instr_count
1819  		 *
1820  		 * Instrs Sample(n...) are the synthesised samples occurring
1821  		 * every etm->instructions_sample_period instructions - as
1822  		 * defined on the perf command line.  Sample(n) is being the
1823  		 * last sample before the current etm packet, n+1 to n+3
1824  		 * samples are generated from the current etm packet.
1825  		 *
1826  		 * tidq->packet->instr_count represents the number of
1827  		 * instructions in the current etm packet.
1828  		 *
1829  		 * Period instructions (Pi) contains the number of
1830  		 * instructions executed after the sample point(n) from the
1831  		 * previous etm packet.  This will always be less than
1832  		 * etm->instructions_sample_period.
1833  		 *
1834  		 * When generate new samples, it combines with two parts
1835  		 * instructions, one is the tail of the old packet and another
1836  		 * is the head of the new coming packet, to generate
1837  		 * sample(n+1); sample(n+2) and sample(n+3) consume the
1838  		 * instructions with sample period.  After sample(n+3), the rest
1839  		 * instructions will be used by later packet and it is assigned
1840  		 * to tidq->period_instructions for next round calculation.
1841  		 */
1842  
1843  		/*
1844  		 * Get the initial offset into the current packet instructions;
1845  		 * entry conditions ensure that instrs_prev is less than
1846  		 * etm->instructions_sample_period.
1847  		 */
1848  		u64 offset = etm->instructions_sample_period - instrs_prev;
1849  		u64 addr;
1850  
1851  		/* Prepare last branches for instruction sample */
1852  		if (etm->synth_opts.last_branch)
1853  			cs_etm__copy_last_branch_rb(etmq, tidq);
1854  
1855  		while (tidq->period_instructions >=
1856  				etm->instructions_sample_period) {
1857  			/*
1858  			 * Calculate the address of the sampled instruction (-1
1859  			 * as sample is reported as though instruction has just
1860  			 * been executed, but PC has not advanced to next
1861  			 * instruction)
1862  			 */
1863  			addr = cs_etm__instr_addr(etmq, trace_chan_id,
1864  						  tidq->packet, offset - 1);
1865  			ret = cs_etm__synth_instruction_sample(
1866  				etmq, tidq, addr,
1867  				etm->instructions_sample_period);
1868  			if (ret)
1869  				return ret;
1870  
1871  			offset += etm->instructions_sample_period;
1872  			tidq->period_instructions -=
1873  				etm->instructions_sample_period;
1874  		}
1875  	}
1876  
1877  	if (etm->synth_opts.branches) {
1878  		bool generate_sample = false;
1879  
1880  		/* Generate sample for tracing on packet */
1881  		if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1882  			generate_sample = true;
1883  
1884  		/* Generate sample for branch taken packet */
1885  		if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1886  		    tidq->prev_packet->last_instr_taken_branch)
1887  			generate_sample = true;
1888  
1889  		if (generate_sample) {
1890  			ret = cs_etm__synth_branch_sample(etmq, tidq);
1891  			if (ret)
1892  				return ret;
1893  		}
1894  	}
1895  
1896  	cs_etm__packet_swap(etm, tidq);
1897  
1898  	return 0;
1899  }
1900  
1901  static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
1902  {
1903  	/*
1904  	 * When the exception packet is inserted, whether the last instruction
1905  	 * in previous range packet is taken branch or not, we need to force
1906  	 * to set 'prev_packet->last_instr_taken_branch' to true.  This ensures
1907  	 * to generate branch sample for the instruction range before the
1908  	 * exception is trapped to kernel or before the exception returning.
1909  	 *
1910  	 * The exception packet includes the dummy address values, so don't
1911  	 * swap PACKET with PREV_PACKET.  This keeps PREV_PACKET to be useful
1912  	 * for generating instruction and branch samples.
1913  	 */
1914  	if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
1915  		tidq->prev_packet->last_instr_taken_branch = true;
1916  
1917  	return 0;
1918  }
1919  
1920  static int cs_etm__flush(struct cs_etm_queue *etmq,
1921  			 struct cs_etm_traceid_queue *tidq)
1922  {
1923  	int err = 0;
1924  	struct cs_etm_auxtrace *etm = etmq->etm;
1925  
1926  	/* Handle start tracing packet */
1927  	if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
1928  		goto swap_packet;
1929  
1930  	if (etmq->etm->synth_opts.last_branch &&
1931  	    etmq->etm->synth_opts.instructions &&
1932  	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1933  		u64 addr;
1934  
1935  		/* Prepare last branches for instruction sample */
1936  		cs_etm__copy_last_branch_rb(etmq, tidq);
1937  
1938  		/*
1939  		 * Generate a last branch event for the branches left in the
1940  		 * circular buffer at the end of the trace.
1941  		 *
1942  		 * Use the address of the end of the last reported execution
1943  		 * range
1944  		 */
1945  		addr = cs_etm__last_executed_instr(tidq->prev_packet);
1946  
1947  		err = cs_etm__synth_instruction_sample(
1948  			etmq, tidq, addr,
1949  			tidq->period_instructions);
1950  		if (err)
1951  			return err;
1952  
1953  		tidq->period_instructions = 0;
1954  
1955  	}
1956  
1957  	if (etm->synth_opts.branches &&
1958  	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1959  		err = cs_etm__synth_branch_sample(etmq, tidq);
1960  		if (err)
1961  			return err;
1962  	}
1963  
1964  swap_packet:
1965  	cs_etm__packet_swap(etm, tidq);
1966  
1967  	/* Reset last branches after flush the trace */
1968  	if (etm->synth_opts.last_branch)
1969  		cs_etm__reset_last_branch_rb(tidq);
1970  
1971  	return err;
1972  }
1973  
1974  static int cs_etm__end_block(struct cs_etm_queue *etmq,
1975  			     struct cs_etm_traceid_queue *tidq)
1976  {
1977  	int err;
1978  
1979  	/*
1980  	 * It has no new packet coming and 'etmq->packet' contains the stale
1981  	 * packet which was set at the previous time with packets swapping;
1982  	 * so skip to generate branch sample to avoid stale packet.
1983  	 *
1984  	 * For this case only flush branch stack and generate a last branch
1985  	 * event for the branches left in the circular buffer at the end of
1986  	 * the trace.
1987  	 */
1988  	if (etmq->etm->synth_opts.last_branch &&
1989  	    etmq->etm->synth_opts.instructions &&
1990  	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1991  		u64 addr;
1992  
1993  		/* Prepare last branches for instruction sample */
1994  		cs_etm__copy_last_branch_rb(etmq, tidq);
1995  
1996  		/*
1997  		 * Use the address of the end of the last reported execution
1998  		 * range.
1999  		 */
2000  		addr = cs_etm__last_executed_instr(tidq->prev_packet);
2001  
2002  		err = cs_etm__synth_instruction_sample(
2003  			etmq, tidq, addr,
2004  			tidq->period_instructions);
2005  		if (err)
2006  			return err;
2007  
2008  		tidq->period_instructions = 0;
2009  	}
2010  
2011  	return 0;
2012  }
2013  /*
2014   * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
2015   *			   if need be.
2016   * Returns:	< 0	if error
2017   *		= 0	if no more auxtrace_buffer to read
2018   *		> 0	if the current buffer isn't empty yet
2019   */
2020  static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
2021  {
2022  	int ret;
2023  
2024  	if (!etmq->buf_len) {
2025  		ret = cs_etm__get_trace(etmq);
2026  		if (ret <= 0)
2027  			return ret;
2028  		/*
2029  		 * We cannot assume consecutive blocks in the data file
2030  		 * are contiguous, reset the decoder to force re-sync.
2031  		 */
2032  		ret = cs_etm_decoder__reset(etmq->decoder);
2033  		if (ret)
2034  			return ret;
2035  	}
2036  
2037  	return etmq->buf_len;
2038  }
2039  
2040  static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
2041  				 struct cs_etm_packet *packet,
2042  				 u64 end_addr)
2043  {
2044  	/* Initialise to keep compiler happy */
2045  	u16 instr16 = 0;
2046  	u32 instr32 = 0;
2047  	u64 addr;
2048  
2049  	switch (packet->isa) {
2050  	case CS_ETM_ISA_T32:
2051  		/*
2052  		 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
2053  		 *
2054  		 *  b'15         b'8
2055  		 * +-----------------+--------+
2056  		 * | 1 1 0 1 1 1 1 1 |  imm8  |
2057  		 * +-----------------+--------+
2058  		 *
2059  		 * According to the specification, it only defines SVC for T32
2060  		 * with 16 bits instruction and has no definition for 32bits;
2061  		 * so below only read 2 bytes as instruction size for T32.
2062  		 */
2063  		addr = end_addr - 2;
2064  		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16),
2065  				   (u8 *)&instr16, 0);
2066  		if ((instr16 & 0xFF00) == 0xDF00)
2067  			return true;
2068  
2069  		break;
2070  	case CS_ETM_ISA_A32:
2071  		/*
2072  		 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
2073  		 *
2074  		 *  b'31 b'28 b'27 b'24
2075  		 * +---------+---------+-------------------------+
2076  		 * |  !1111  | 1 1 1 1 |        imm24            |
2077  		 * +---------+---------+-------------------------+
2078  		 */
2079  		addr = end_addr - 4;
2080  		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2081  				   (u8 *)&instr32, 0);
2082  		if ((instr32 & 0x0F000000) == 0x0F000000 &&
2083  		    (instr32 & 0xF0000000) != 0xF0000000)
2084  			return true;
2085  
2086  		break;
2087  	case CS_ETM_ISA_A64:
2088  		/*
2089  		 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
2090  		 *
2091  		 *  b'31               b'21           b'4     b'0
2092  		 * +-----------------------+---------+-----------+
2093  		 * | 1 1 0 1 0 1 0 0 0 0 0 |  imm16  | 0 0 0 0 1 |
2094  		 * +-----------------------+---------+-----------+
2095  		 */
2096  		addr = end_addr - 4;
2097  		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2098  				   (u8 *)&instr32, 0);
2099  		if ((instr32 & 0xFFE0001F) == 0xd4000001)
2100  			return true;
2101  
2102  		break;
2103  	case CS_ETM_ISA_UNKNOWN:
2104  	default:
2105  		break;
2106  	}
2107  
2108  	return false;
2109  }
2110  
2111  static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
2112  			       struct cs_etm_traceid_queue *tidq, u64 magic)
2113  {
2114  	u8 trace_chan_id = tidq->trace_chan_id;
2115  	struct cs_etm_packet *packet = tidq->packet;
2116  	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2117  
2118  	if (magic == __perf_cs_etmv3_magic)
2119  		if (packet->exception_number == CS_ETMV3_EXC_SVC)
2120  			return true;
2121  
2122  	/*
2123  	 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
2124  	 * HVC cases; need to check if it's SVC instruction based on
2125  	 * packet address.
2126  	 */
2127  	if (magic == __perf_cs_etmv4_magic) {
2128  		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2129  		    cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2130  					 prev_packet->end_addr))
2131  			return true;
2132  	}
2133  
2134  	return false;
2135  }
2136  
2137  static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
2138  				       u64 magic)
2139  {
2140  	struct cs_etm_packet *packet = tidq->packet;
2141  
2142  	if (magic == __perf_cs_etmv3_magic)
2143  		if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
2144  		    packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
2145  		    packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
2146  		    packet->exception_number == CS_ETMV3_EXC_IRQ ||
2147  		    packet->exception_number == CS_ETMV3_EXC_FIQ)
2148  			return true;
2149  
2150  	if (magic == __perf_cs_etmv4_magic)
2151  		if (packet->exception_number == CS_ETMV4_EXC_RESET ||
2152  		    packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
2153  		    packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
2154  		    packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
2155  		    packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
2156  		    packet->exception_number == CS_ETMV4_EXC_IRQ ||
2157  		    packet->exception_number == CS_ETMV4_EXC_FIQ)
2158  			return true;
2159  
2160  	return false;
2161  }
2162  
2163  static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
2164  				      struct cs_etm_traceid_queue *tidq,
2165  				      u64 magic)
2166  {
2167  	u8 trace_chan_id = tidq->trace_chan_id;
2168  	struct cs_etm_packet *packet = tidq->packet;
2169  	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2170  
2171  	if (magic == __perf_cs_etmv3_magic)
2172  		if (packet->exception_number == CS_ETMV3_EXC_SMC ||
2173  		    packet->exception_number == CS_ETMV3_EXC_HYP ||
2174  		    packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
2175  		    packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
2176  		    packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
2177  		    packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
2178  		    packet->exception_number == CS_ETMV3_EXC_GENERIC)
2179  			return true;
2180  
2181  	if (magic == __perf_cs_etmv4_magic) {
2182  		if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
2183  		    packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
2184  		    packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
2185  		    packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
2186  			return true;
2187  
2188  		/*
2189  		 * For CS_ETMV4_EXC_CALL, except SVC other instructions
2190  		 * (SMC, HVC) are taken as sync exceptions.
2191  		 */
2192  		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2193  		    !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2194  					  prev_packet->end_addr))
2195  			return true;
2196  
2197  		/*
2198  		 * ETMv4 has 5 bits for exception number; if the numbers
2199  		 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
2200  		 * they are implementation defined exceptions.
2201  		 *
2202  		 * For this case, simply take it as sync exception.
2203  		 */
2204  		if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
2205  		    packet->exception_number <= CS_ETMV4_EXC_END)
2206  			return true;
2207  	}
2208  
2209  	return false;
2210  }
2211  
2212  static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
2213  				    struct cs_etm_traceid_queue *tidq)
2214  {
2215  	struct cs_etm_packet *packet = tidq->packet;
2216  	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2217  	u8 trace_chan_id = tidq->trace_chan_id;
2218  	u64 magic;
2219  	int ret;
2220  
2221  	switch (packet->sample_type) {
2222  	case CS_ETM_RANGE:
2223  		/*
2224  		 * Immediate branch instruction without neither link nor
2225  		 * return flag, it's normal branch instruction within
2226  		 * the function.
2227  		 */
2228  		if (packet->last_instr_type == OCSD_INSTR_BR &&
2229  		    packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
2230  			packet->flags = PERF_IP_FLAG_BRANCH;
2231  
2232  			if (packet->last_instr_cond)
2233  				packet->flags |= PERF_IP_FLAG_CONDITIONAL;
2234  		}
2235  
2236  		/*
2237  		 * Immediate branch instruction with link (e.g. BL), this is
2238  		 * branch instruction for function call.
2239  		 */
2240  		if (packet->last_instr_type == OCSD_INSTR_BR &&
2241  		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2242  			packet->flags = PERF_IP_FLAG_BRANCH |
2243  					PERF_IP_FLAG_CALL;
2244  
2245  		/*
2246  		 * Indirect branch instruction with link (e.g. BLR), this is
2247  		 * branch instruction for function call.
2248  		 */
2249  		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2250  		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2251  			packet->flags = PERF_IP_FLAG_BRANCH |
2252  					PERF_IP_FLAG_CALL;
2253  
2254  		/*
2255  		 * Indirect branch instruction with subtype of
2256  		 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
2257  		 * function return for A32/T32.
2258  		 */
2259  		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2260  		    packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
2261  			packet->flags = PERF_IP_FLAG_BRANCH |
2262  					PERF_IP_FLAG_RETURN;
2263  
2264  		/*
2265  		 * Indirect branch instruction without link (e.g. BR), usually
2266  		 * this is used for function return, especially for functions
2267  		 * within dynamic link lib.
2268  		 */
2269  		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2270  		    packet->last_instr_subtype == OCSD_S_INSTR_NONE)
2271  			packet->flags = PERF_IP_FLAG_BRANCH |
2272  					PERF_IP_FLAG_RETURN;
2273  
2274  		/* Return instruction for function return. */
2275  		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2276  		    packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
2277  			packet->flags = PERF_IP_FLAG_BRANCH |
2278  					PERF_IP_FLAG_RETURN;
2279  
2280  		/*
2281  		 * Decoder might insert a discontinuity in the middle of
2282  		 * instruction packets, fixup prev_packet with flag
2283  		 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
2284  		 */
2285  		if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
2286  			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2287  					      PERF_IP_FLAG_TRACE_BEGIN;
2288  
2289  		/*
2290  		 * If the previous packet is an exception return packet
2291  		 * and the return address just follows SVC instruction,
2292  		 * it needs to calibrate the previous packet sample flags
2293  		 * as PERF_IP_FLAG_SYSCALLRET.
2294  		 */
2295  		if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
2296  					   PERF_IP_FLAG_RETURN |
2297  					   PERF_IP_FLAG_INTERRUPT) &&
2298  		    cs_etm__is_svc_instr(etmq, trace_chan_id,
2299  					 packet, packet->start_addr))
2300  			prev_packet->flags = PERF_IP_FLAG_BRANCH |
2301  					     PERF_IP_FLAG_RETURN |
2302  					     PERF_IP_FLAG_SYSCALLRET;
2303  		break;
2304  	case CS_ETM_DISCONTINUITY:
2305  		/*
2306  		 * The trace is discontinuous, if the previous packet is
2307  		 * instruction packet, set flag PERF_IP_FLAG_TRACE_END
2308  		 * for previous packet.
2309  		 */
2310  		if (prev_packet->sample_type == CS_ETM_RANGE)
2311  			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2312  					      PERF_IP_FLAG_TRACE_END;
2313  		break;
2314  	case CS_ETM_EXCEPTION:
2315  		ret = cs_etm__get_magic(etmq, packet->trace_chan_id, &magic);
2316  		if (ret)
2317  			return ret;
2318  
2319  		/* The exception is for system call. */
2320  		if (cs_etm__is_syscall(etmq, tidq, magic))
2321  			packet->flags = PERF_IP_FLAG_BRANCH |
2322  					PERF_IP_FLAG_CALL |
2323  					PERF_IP_FLAG_SYSCALLRET;
2324  		/*
2325  		 * The exceptions are triggered by external signals from bus,
2326  		 * interrupt controller, debug module, PE reset or halt.
2327  		 */
2328  		else if (cs_etm__is_async_exception(tidq, magic))
2329  			packet->flags = PERF_IP_FLAG_BRANCH |
2330  					PERF_IP_FLAG_CALL |
2331  					PERF_IP_FLAG_ASYNC |
2332  					PERF_IP_FLAG_INTERRUPT;
2333  		/*
2334  		 * Otherwise, exception is caused by trap, instruction &
2335  		 * data fault, or alignment errors.
2336  		 */
2337  		else if (cs_etm__is_sync_exception(etmq, tidq, magic))
2338  			packet->flags = PERF_IP_FLAG_BRANCH |
2339  					PERF_IP_FLAG_CALL |
2340  					PERF_IP_FLAG_INTERRUPT;
2341  
2342  		/*
2343  		 * When the exception packet is inserted, since exception
2344  		 * packet is not used standalone for generating samples
2345  		 * and it's affiliation to the previous instruction range
2346  		 * packet; so set previous range packet flags to tell perf
2347  		 * it is an exception taken branch.
2348  		 */
2349  		if (prev_packet->sample_type == CS_ETM_RANGE)
2350  			prev_packet->flags = packet->flags;
2351  		break;
2352  	case CS_ETM_EXCEPTION_RET:
2353  		/*
2354  		 * When the exception return packet is inserted, since
2355  		 * exception return packet is not used standalone for
2356  		 * generating samples and it's affiliation to the previous
2357  		 * instruction range packet; so set previous range packet
2358  		 * flags to tell perf it is an exception return branch.
2359  		 *
2360  		 * The exception return can be for either system call or
2361  		 * other exception types; unfortunately the packet doesn't
2362  		 * contain exception type related info so we cannot decide
2363  		 * the exception type purely based on exception return packet.
2364  		 * If we record the exception number from exception packet and
2365  		 * reuse it for exception return packet, this is not reliable
2366  		 * due the trace can be discontinuity or the interrupt can
2367  		 * be nested, thus the recorded exception number cannot be
2368  		 * used for exception return packet for these two cases.
2369  		 *
2370  		 * For exception return packet, we only need to distinguish the
2371  		 * packet is for system call or for other types.  Thus the
2372  		 * decision can be deferred when receive the next packet which
2373  		 * contains the return address, based on the return address we
2374  		 * can read out the previous instruction and check if it's a
2375  		 * system call instruction and then calibrate the sample flag
2376  		 * as needed.
2377  		 */
2378  		if (prev_packet->sample_type == CS_ETM_RANGE)
2379  			prev_packet->flags = PERF_IP_FLAG_BRANCH |
2380  					     PERF_IP_FLAG_RETURN |
2381  					     PERF_IP_FLAG_INTERRUPT;
2382  		break;
2383  	case CS_ETM_EMPTY:
2384  	default:
2385  		break;
2386  	}
2387  
2388  	return 0;
2389  }
2390  
2391  static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
2392  {
2393  	int ret = 0;
2394  	size_t processed = 0;
2395  
2396  	/*
2397  	 * Packets are decoded and added to the decoder's packet queue
2398  	 * until the decoder packet processing callback has requested that
2399  	 * processing stops or there is nothing left in the buffer.  Normal
2400  	 * operations that stop processing are a timestamp packet or a full
2401  	 * decoder buffer queue.
2402  	 */
2403  	ret = cs_etm_decoder__process_data_block(etmq->decoder,
2404  						 etmq->offset,
2405  						 &etmq->buf[etmq->buf_used],
2406  						 etmq->buf_len,
2407  						 &processed);
2408  	if (ret)
2409  		goto out;
2410  
2411  	etmq->offset += processed;
2412  	etmq->buf_used += processed;
2413  	etmq->buf_len -= processed;
2414  
2415  out:
2416  	return ret;
2417  }
2418  
2419  static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
2420  					 struct cs_etm_traceid_queue *tidq)
2421  {
2422  	int ret;
2423  	struct cs_etm_packet_queue *packet_queue;
2424  
2425  	packet_queue = &tidq->packet_queue;
2426  
2427  	/* Process each packet in this chunk */
2428  	while (1) {
2429  		ret = cs_etm_decoder__get_packet(packet_queue,
2430  						 tidq->packet);
2431  		if (ret <= 0)
2432  			/*
2433  			 * Stop processing this chunk on
2434  			 * end of data or error
2435  			 */
2436  			break;
2437  
2438  		/*
2439  		 * Since packet addresses are swapped in packet
2440  		 * handling within below switch() statements,
2441  		 * thus setting sample flags must be called
2442  		 * prior to switch() statement to use address
2443  		 * information before packets swapping.
2444  		 */
2445  		ret = cs_etm__set_sample_flags(etmq, tidq);
2446  		if (ret < 0)
2447  			break;
2448  
2449  		switch (tidq->packet->sample_type) {
2450  		case CS_ETM_RANGE:
2451  			/*
2452  			 * If the packet contains an instruction
2453  			 * range, generate instruction sequence
2454  			 * events.
2455  			 */
2456  			cs_etm__sample(etmq, tidq);
2457  			break;
2458  		case CS_ETM_EXCEPTION:
2459  		case CS_ETM_EXCEPTION_RET:
2460  			/*
2461  			 * If the exception packet is coming,
2462  			 * make sure the previous instruction
2463  			 * range packet to be handled properly.
2464  			 */
2465  			cs_etm__exception(tidq);
2466  			break;
2467  		case CS_ETM_DISCONTINUITY:
2468  			/*
2469  			 * Discontinuity in trace, flush
2470  			 * previous branch stack
2471  			 */
2472  			cs_etm__flush(etmq, tidq);
2473  			break;
2474  		case CS_ETM_EMPTY:
2475  			/*
2476  			 * Should not receive empty packet,
2477  			 * report error.
2478  			 */
2479  			pr_err("CS ETM Trace: empty packet\n");
2480  			return -EINVAL;
2481  		default:
2482  			break;
2483  		}
2484  	}
2485  
2486  	return ret;
2487  }
2488  
2489  static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
2490  {
2491  	int idx;
2492  	struct int_node *inode;
2493  	struct cs_etm_traceid_queue *tidq;
2494  	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
2495  
2496  	intlist__for_each_entry(inode, traceid_queues_list) {
2497  		idx = (int)(intptr_t)inode->priv;
2498  		tidq = etmq->traceid_queues[idx];
2499  
2500  		/* Ignore return value */
2501  		cs_etm__process_traceid_queue(etmq, tidq);
2502  	}
2503  }
2504  
2505  static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq)
2506  {
2507  	int err = 0;
2508  	struct cs_etm_traceid_queue *tidq;
2509  
2510  	tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
2511  	if (!tidq)
2512  		return -EINVAL;
2513  
2514  	/* Go through each buffer in the queue and decode them one by one */
2515  	while (1) {
2516  		err = cs_etm__get_data_block(etmq);
2517  		if (err <= 0)
2518  			return err;
2519  
2520  		/* Run trace decoder until buffer consumed or end of trace */
2521  		do {
2522  			err = cs_etm__decode_data_block(etmq);
2523  			if (err)
2524  				return err;
2525  
2526  			/*
2527  			 * Process each packet in this chunk, nothing to do if
2528  			 * an error occurs other than hoping the next one will
2529  			 * be better.
2530  			 */
2531  			err = cs_etm__process_traceid_queue(etmq, tidq);
2532  
2533  		} while (etmq->buf_len);
2534  
2535  		if (err == 0)
2536  			/* Flush any remaining branch stack entries */
2537  			err = cs_etm__end_block(etmq, tidq);
2538  	}
2539  
2540  	return err;
2541  }
2542  
2543  static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq)
2544  {
2545  	int idx, err = 0;
2546  	struct cs_etm_traceid_queue *tidq;
2547  	struct int_node *inode;
2548  
2549  	/* Go through each buffer in the queue and decode them one by one */
2550  	while (1) {
2551  		err = cs_etm__get_data_block(etmq);
2552  		if (err <= 0)
2553  			return err;
2554  
2555  		/* Run trace decoder until buffer consumed or end of trace */
2556  		do {
2557  			err = cs_etm__decode_data_block(etmq);
2558  			if (err)
2559  				return err;
2560  
2561  			/*
2562  			 * cs_etm__run_per_thread_timeless_decoder() runs on a
2563  			 * single traceID queue because each TID has a separate
2564  			 * buffer. But here in per-cpu mode we need to iterate
2565  			 * over each channel instead.
2566  			 */
2567  			intlist__for_each_entry(inode,
2568  						etmq->traceid_queues_list) {
2569  				idx = (int)(intptr_t)inode->priv;
2570  				tidq = etmq->traceid_queues[idx];
2571  				cs_etm__process_traceid_queue(etmq, tidq);
2572  			}
2573  		} while (etmq->buf_len);
2574  
2575  		intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2576  			idx = (int)(intptr_t)inode->priv;
2577  			tidq = etmq->traceid_queues[idx];
2578  			/* Flush any remaining branch stack entries */
2579  			err = cs_etm__end_block(etmq, tidq);
2580  			if (err)
2581  				return err;
2582  		}
2583  	}
2584  
2585  	return err;
2586  }
2587  
2588  static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
2589  					   pid_t tid)
2590  {
2591  	unsigned int i;
2592  	struct auxtrace_queues *queues = &etm->queues;
2593  
2594  	for (i = 0; i < queues->nr_queues; i++) {
2595  		struct auxtrace_queue *queue = &etm->queues.queue_array[i];
2596  		struct cs_etm_queue *etmq = queue->priv;
2597  		struct cs_etm_traceid_queue *tidq;
2598  
2599  		if (!etmq)
2600  			continue;
2601  
2602  		if (etm->per_thread_decoding) {
2603  			tidq = cs_etm__etmq_get_traceid_queue(
2604  				etmq, CS_ETM_PER_THREAD_TRACEID);
2605  
2606  			if (!tidq)
2607  				continue;
2608  
2609  			if (tid == -1 || thread__tid(tidq->thread) == tid)
2610  				cs_etm__run_per_thread_timeless_decoder(etmq);
2611  		} else
2612  			cs_etm__run_per_cpu_timeless_decoder(etmq);
2613  	}
2614  
2615  	return 0;
2616  }
2617  
2618  static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm)
2619  {
2620  	int ret = 0;
2621  	unsigned int cs_queue_nr, queue_nr, i;
2622  	u8 trace_chan_id;
2623  	u64 cs_timestamp;
2624  	struct auxtrace_queue *queue;
2625  	struct cs_etm_queue *etmq;
2626  	struct cs_etm_traceid_queue *tidq;
2627  
2628  	/*
2629  	 * Pre-populate the heap with one entry from each queue so that we can
2630  	 * start processing in time order across all queues.
2631  	 */
2632  	for (i = 0; i < etm->queues.nr_queues; i++) {
2633  		etmq = etm->queues.queue_array[i].priv;
2634  		if (!etmq)
2635  			continue;
2636  
2637  		ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
2638  		if (ret)
2639  			return ret;
2640  	}
2641  
2642  	while (1) {
2643  		if (!etm->heap.heap_cnt)
2644  			break;
2645  
2646  		/* Take the entry at the top of the min heap */
2647  		cs_queue_nr = etm->heap.heap_array[0].queue_nr;
2648  		queue_nr = TO_QUEUE_NR(cs_queue_nr);
2649  		trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
2650  		queue = &etm->queues.queue_array[queue_nr];
2651  		etmq = queue->priv;
2652  
2653  		/*
2654  		 * Remove the top entry from the heap since we are about
2655  		 * to process it.
2656  		 */
2657  		auxtrace_heap__pop(&etm->heap);
2658  
2659  		tidq  = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
2660  		if (!tidq) {
2661  			/*
2662  			 * No traceID queue has been allocated for this traceID,
2663  			 * which means something somewhere went very wrong.  No
2664  			 * other choice than simply exit.
2665  			 */
2666  			ret = -EINVAL;
2667  			goto out;
2668  		}
2669  
2670  		/*
2671  		 * Packets associated with this timestamp are already in
2672  		 * the etmq's traceID queue, so process them.
2673  		 */
2674  		ret = cs_etm__process_traceid_queue(etmq, tidq);
2675  		if (ret < 0)
2676  			goto out;
2677  
2678  		/*
2679  		 * Packets for this timestamp have been processed, time to
2680  		 * move on to the next timestamp, fetching a new auxtrace_buffer
2681  		 * if need be.
2682  		 */
2683  refetch:
2684  		ret = cs_etm__get_data_block(etmq);
2685  		if (ret < 0)
2686  			goto out;
2687  
2688  		/*
2689  		 * No more auxtrace_buffers to process in this etmq, simply
2690  		 * move on to another entry in the auxtrace_heap.
2691  		 */
2692  		if (!ret)
2693  			continue;
2694  
2695  		ret = cs_etm__decode_data_block(etmq);
2696  		if (ret)
2697  			goto out;
2698  
2699  		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
2700  
2701  		if (!cs_timestamp) {
2702  			/*
2703  			 * Function cs_etm__decode_data_block() returns when
2704  			 * there is no more traces to decode in the current
2705  			 * auxtrace_buffer OR when a timestamp has been
2706  			 * encountered on any of the traceID queues.  Since we
2707  			 * did not get a timestamp, there is no more traces to
2708  			 * process in this auxtrace_buffer.  As such empty and
2709  			 * flush all traceID queues.
2710  			 */
2711  			cs_etm__clear_all_traceid_queues(etmq);
2712  
2713  			/* Fetch another auxtrace_buffer for this etmq */
2714  			goto refetch;
2715  		}
2716  
2717  		/*
2718  		 * Add to the min heap the timestamp for packets that have
2719  		 * just been decoded.  They will be processed and synthesized
2720  		 * during the next call to cs_etm__process_traceid_queue() for
2721  		 * this queue/traceID.
2722  		 */
2723  		cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
2724  		ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
2725  	}
2726  
2727  	for (i = 0; i < etm->queues.nr_queues; i++) {
2728  		struct int_node *inode;
2729  
2730  		etmq = etm->queues.queue_array[i].priv;
2731  		if (!etmq)
2732  			continue;
2733  
2734  		intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2735  			int idx = (int)(intptr_t)inode->priv;
2736  
2737  			/* Flush any remaining branch stack entries */
2738  			tidq = etmq->traceid_queues[idx];
2739  			ret = cs_etm__end_block(etmq, tidq);
2740  			if (ret)
2741  				return ret;
2742  		}
2743  	}
2744  out:
2745  	return ret;
2746  }
2747  
2748  static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
2749  					union perf_event *event)
2750  {
2751  	struct thread *th;
2752  
2753  	if (etm->timeless_decoding)
2754  		return 0;
2755  
2756  	/*
2757  	 * Add the tid/pid to the log so that we can get a match when we get a
2758  	 * contextID from the decoder. Only track for the host: only kernel
2759  	 * trace is supported for guests which wouldn't need pids so this should
2760  	 * be fine.
2761  	 */
2762  	th = machine__findnew_thread(&etm->session->machines.host,
2763  				     event->itrace_start.pid,
2764  				     event->itrace_start.tid);
2765  	if (!th)
2766  		return -ENOMEM;
2767  
2768  	thread__put(th);
2769  
2770  	return 0;
2771  }
2772  
2773  static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
2774  					   union perf_event *event)
2775  {
2776  	struct thread *th;
2777  	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2778  
2779  	/*
2780  	 * Context switch in per-thread mode are irrelevant since perf
2781  	 * will start/stop tracing as the process is scheduled.
2782  	 */
2783  	if (etm->timeless_decoding)
2784  		return 0;
2785  
2786  	/*
2787  	 * SWITCH_IN events carry the next process to be switched out while
2788  	 * SWITCH_OUT events carry the process to be switched in.  As such
2789  	 * we don't care about IN events.
2790  	 */
2791  	if (!out)
2792  		return 0;
2793  
2794  	/*
2795  	 * Add the tid/pid to the log so that we can get a match when we get a
2796  	 * contextID from the decoder. Only track for the host: only kernel
2797  	 * trace is supported for guests which wouldn't need pids so this should
2798  	 * be fine.
2799  	 */
2800  	th = machine__findnew_thread(&etm->session->machines.host,
2801  				     event->context_switch.next_prev_pid,
2802  				     event->context_switch.next_prev_tid);
2803  	if (!th)
2804  		return -ENOMEM;
2805  
2806  	thread__put(th);
2807  
2808  	return 0;
2809  }
2810  
2811  static int cs_etm__process_event(struct perf_session *session,
2812  				 union perf_event *event,
2813  				 struct perf_sample *sample,
2814  				 const struct perf_tool *tool)
2815  {
2816  	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2817  						   struct cs_etm_auxtrace,
2818  						   auxtrace);
2819  
2820  	if (dump_trace)
2821  		return 0;
2822  
2823  	if (!tool->ordered_events) {
2824  		pr_err("CoreSight ETM Trace requires ordered events\n");
2825  		return -EINVAL;
2826  	}
2827  
2828  	switch (event->header.type) {
2829  	case PERF_RECORD_EXIT:
2830  		/*
2831  		 * Don't need to wait for cs_etm__flush_events() in per-thread mode to
2832  		 * start the decode because we know there will be no more trace from
2833  		 * this thread. All this does is emit samples earlier than waiting for
2834  		 * the flush in other modes, but with timestamps it makes sense to wait
2835  		 * for flush so that events from different threads are interleaved
2836  		 * properly.
2837  		 */
2838  		if (etm->per_thread_decoding && etm->timeless_decoding)
2839  			return cs_etm__process_timeless_queues(etm,
2840  							       event->fork.tid);
2841  		break;
2842  
2843  	case PERF_RECORD_ITRACE_START:
2844  		return cs_etm__process_itrace_start(etm, event);
2845  
2846  	case PERF_RECORD_SWITCH_CPU_WIDE:
2847  		return cs_etm__process_switch_cpu_wide(etm, event);
2848  
2849  	case PERF_RECORD_AUX:
2850  		/*
2851  		 * Record the latest kernel timestamp available in the header
2852  		 * for samples so that synthesised samples occur from this point
2853  		 * onwards.
2854  		 */
2855  		if (sample->time && (sample->time != (u64)-1))
2856  			etm->latest_kernel_timestamp = sample->time;
2857  		break;
2858  
2859  	default:
2860  		break;
2861  	}
2862  
2863  	return 0;
2864  }
2865  
2866  static void dump_queued_data(struct cs_etm_auxtrace *etm,
2867  			     struct perf_record_auxtrace *event)
2868  {
2869  	struct auxtrace_buffer *buf;
2870  	unsigned int i;
2871  	/*
2872  	 * Find all buffers with same reference in the queues and dump them.
2873  	 * This is because the queues can contain multiple entries of the same
2874  	 * buffer that were split on aux records.
2875  	 */
2876  	for (i = 0; i < etm->queues.nr_queues; ++i)
2877  		list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
2878  			if (buf->reference == event->reference)
2879  				cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
2880  }
2881  
2882  static int cs_etm__process_auxtrace_event(struct perf_session *session,
2883  					  union perf_event *event,
2884  					  const struct perf_tool *tool __maybe_unused)
2885  {
2886  	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2887  						   struct cs_etm_auxtrace,
2888  						   auxtrace);
2889  	if (!etm->data_queued) {
2890  		struct auxtrace_buffer *buffer;
2891  		off_t  data_offset;
2892  		int fd = perf_data__fd(session->data);
2893  		bool is_pipe = perf_data__is_pipe(session->data);
2894  		int err;
2895  		int idx = event->auxtrace.idx;
2896  
2897  		if (is_pipe)
2898  			data_offset = 0;
2899  		else {
2900  			data_offset = lseek(fd, 0, SEEK_CUR);
2901  			if (data_offset == -1)
2902  				return -errno;
2903  		}
2904  
2905  		err = auxtrace_queues__add_event(&etm->queues, session,
2906  						 event, data_offset, &buffer);
2907  		if (err)
2908  			return err;
2909  
2910  		if (dump_trace)
2911  			if (auxtrace_buffer__get_data(buffer, fd)) {
2912  				cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
2913  				auxtrace_buffer__put_data(buffer);
2914  			}
2915  	} else if (dump_trace)
2916  		dump_queued_data(etm, &event->auxtrace);
2917  
2918  	return 0;
2919  }
2920  
2921  static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm)
2922  {
2923  	struct evsel *evsel;
2924  	struct evlist *evlist = etm->session->evlist;
2925  
2926  	/* Override timeless mode with user input from --itrace=Z */
2927  	if (etm->synth_opts.timeless_decoding) {
2928  		etm->timeless_decoding = true;
2929  		return 0;
2930  	}
2931  
2932  	/*
2933  	 * Find the cs_etm evsel and look at what its timestamp setting was
2934  	 */
2935  	evlist__for_each_entry(evlist, evsel)
2936  		if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) {
2937  			etm->timeless_decoding =
2938  				!(evsel->core.attr.config & BIT(ETM_OPT_TS));
2939  			return 0;
2940  		}
2941  
2942  	pr_err("CS ETM: Couldn't find ETM evsel\n");
2943  	return -EINVAL;
2944  }
2945  
2946  /*
2947   * Read a single cpu parameter block from the auxtrace_info priv block.
2948   *
2949   * For version 1 there is a per cpu nr_params entry. If we are handling
2950   * version 1 file, then there may be less, the same, or more params
2951   * indicated by this value than the compile time number we understand.
2952   *
2953   * For a version 0 info block, there are a fixed number, and we need to
2954   * fill out the nr_param value in the metadata we create.
2955   */
2956  static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
2957  				    int out_blk_size, int nr_params_v0)
2958  {
2959  	u64 *metadata = NULL;
2960  	int hdr_version;
2961  	int nr_in_params, nr_out_params, nr_cmn_params;
2962  	int i, k;
2963  
2964  	metadata = zalloc(sizeof(*metadata) * out_blk_size);
2965  	if (!metadata)
2966  		return NULL;
2967  
2968  	/* read block current index & version */
2969  	i = *buff_in_offset;
2970  	hdr_version = buff_in[CS_HEADER_VERSION];
2971  
2972  	if (!hdr_version) {
2973  	/* read version 0 info block into a version 1 metadata block  */
2974  		nr_in_params = nr_params_v0;
2975  		metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
2976  		metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
2977  		metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
2978  		/* remaining block params at offset +1 from source */
2979  		for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
2980  			metadata[k + 1] = buff_in[i + k];
2981  		/* version 0 has 2 common params */
2982  		nr_cmn_params = 2;
2983  	} else {
2984  	/* read version 1 info block - input and output nr_params may differ */
2985  		/* version 1 has 3 common params */
2986  		nr_cmn_params = 3;
2987  		nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
2988  
2989  		/* if input has more params than output - skip excess */
2990  		nr_out_params = nr_in_params + nr_cmn_params;
2991  		if (nr_out_params > out_blk_size)
2992  			nr_out_params = out_blk_size;
2993  
2994  		for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
2995  			metadata[k] = buff_in[i + k];
2996  
2997  		/* record the actual nr params we copied */
2998  		metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
2999  	}
3000  
3001  	/* adjust in offset by number of in params used */
3002  	i += nr_in_params + nr_cmn_params;
3003  	*buff_in_offset = i;
3004  	return metadata;
3005  }
3006  
3007  /**
3008   * Puts a fragment of an auxtrace buffer into the auxtrace queues based
3009   * on the bounds of aux_event, if it matches with the buffer that's at
3010   * file_offset.
3011   *
3012   * Normally, whole auxtrace buffers would be added to the queue. But we
3013   * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
3014   * is reset across each buffer, so splitting the buffers up in advance has
3015   * the same effect.
3016   */
3017  static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
3018  				      struct perf_record_aux *aux_event, struct perf_sample *sample)
3019  {
3020  	int err;
3021  	char buf[PERF_SAMPLE_MAX_SIZE];
3022  	union perf_event *auxtrace_event_union;
3023  	struct perf_record_auxtrace *auxtrace_event;
3024  	union perf_event auxtrace_fragment;
3025  	__u64 aux_offset, aux_size;
3026  	enum cs_etm_format format;
3027  
3028  	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
3029  						   struct cs_etm_auxtrace,
3030  						   auxtrace);
3031  
3032  	/*
3033  	 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
3034  	 * from looping through the auxtrace index.
3035  	 */
3036  	err = perf_session__peek_event(session, file_offset, buf,
3037  				       PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL);
3038  	if (err)
3039  		return err;
3040  	auxtrace_event = &auxtrace_event_union->auxtrace;
3041  	if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
3042  		return -EINVAL;
3043  
3044  	if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) ||
3045  		auxtrace_event->header.size != sz) {
3046  		return -EINVAL;
3047  	}
3048  
3049  	/*
3050  	 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
3051  	 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
3052  	 * CPU as we set this always for the AUX_OUTPUT_HW_ID event.
3053  	 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
3054  	 * Return 'not found' if mismatch.
3055  	 */
3056  	if (auxtrace_event->cpu == (__u32) -1) {
3057  		etm->per_thread_decoding = true;
3058  		if (auxtrace_event->tid != sample->tid)
3059  			return 1;
3060  	} else if (auxtrace_event->cpu != sample->cpu) {
3061  		if (etm->per_thread_decoding) {
3062  			/*
3063  			 * Found a per-cpu buffer after a per-thread one was
3064  			 * already found
3065  			 */
3066  			pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n");
3067  			return -EINVAL;
3068  		}
3069  		return 1;
3070  	}
3071  
3072  	if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
3073  		/*
3074  		 * Clamp size in snapshot mode. The buffer size is clamped in
3075  		 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
3076  		 * the buffer size.
3077  		 */
3078  		aux_size = min(aux_event->aux_size, auxtrace_event->size);
3079  
3080  		/*
3081  		 * In this mode, the head also points to the end of the buffer so aux_offset
3082  		 * needs to have the size subtracted so it points to the beginning as in normal mode
3083  		 */
3084  		aux_offset = aux_event->aux_offset - aux_size;
3085  	} else {
3086  		aux_size = aux_event->aux_size;
3087  		aux_offset = aux_event->aux_offset;
3088  	}
3089  
3090  	if (aux_offset >= auxtrace_event->offset &&
3091  	    aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
3092  		struct cs_etm_queue *etmq = etm->queues.queue_array[auxtrace_event->idx].priv;
3093  
3094  		/*
3095  		 * If this AUX event was inside this buffer somewhere, create a new auxtrace event
3096  		 * based on the sizes of the aux event, and queue that fragment.
3097  		 */
3098  		auxtrace_fragment.auxtrace = *auxtrace_event;
3099  		auxtrace_fragment.auxtrace.size = aux_size;
3100  		auxtrace_fragment.auxtrace.offset = aux_offset;
3101  		file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
3102  
3103  		pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
3104  			  " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
3105  		err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
3106  						 file_offset, NULL);
3107  		if (err)
3108  			return err;
3109  
3110  		format = (aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW) ?
3111  				UNFORMATTED : FORMATTED;
3112  		if (etmq->format != UNSET && format != etmq->format) {
3113  			pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n");
3114  			return -EINVAL;
3115  		}
3116  		etmq->format = format;
3117  		return 0;
3118  	}
3119  
3120  	/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
3121  	return 1;
3122  }
3123  
3124  static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event,
3125  					u64 offset __maybe_unused, void *data __maybe_unused)
3126  {
3127  	/* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */
3128  	if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) {
3129  		(*(int *)data)++; /* increment found count */
3130  		return cs_etm__process_aux_output_hw_id(session, event);
3131  	}
3132  	return 0;
3133  }
3134  
3135  static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
3136  					u64 offset __maybe_unused, void *data __maybe_unused)
3137  {
3138  	struct perf_sample sample;
3139  	int ret;
3140  	struct auxtrace_index_entry *ent;
3141  	struct auxtrace_index *auxtrace_index;
3142  	struct evsel *evsel;
3143  	size_t i;
3144  
3145  	/* Don't care about any other events, we're only queuing buffers for AUX events */
3146  	if (event->header.type != PERF_RECORD_AUX)
3147  		return 0;
3148  
3149  	if (event->header.size < sizeof(struct perf_record_aux))
3150  		return -EINVAL;
3151  
3152  	/* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
3153  	if (!event->aux.aux_size)
3154  		return 0;
3155  
3156  	/*
3157  	 * Parse the sample, we need the sample_id_all data that comes after the event so that the
3158  	 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
3159  	 */
3160  	evsel = evlist__event2evsel(session->evlist, event);
3161  	if (!evsel)
3162  		return -EINVAL;
3163  	perf_sample__init(&sample, /*all=*/false);
3164  	ret = evsel__parse_sample(evsel, event, &sample);
3165  	if (ret)
3166  		goto out;
3167  
3168  	/*
3169  	 * Loop through the auxtrace index to find the buffer that matches up with this aux event.
3170  	 */
3171  	list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
3172  		for (i = 0; i < auxtrace_index->nr; i++) {
3173  			ent = &auxtrace_index->entries[i];
3174  			ret = cs_etm__queue_aux_fragment(session, ent->file_offset,
3175  							 ent->sz, &event->aux, &sample);
3176  			/*
3177  			 * Stop search on error or successful values. Continue search on
3178  			 * 1 ('not found')
3179  			 */
3180  			if (ret != 1)
3181  				goto out;
3182  		}
3183  	}
3184  
3185  	/*
3186  	 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
3187  	 * don't exit with an error because it will still be possible to decode other aux records.
3188  	 */
3189  	pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
3190  	       " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
3191  	ret = 0;
3192  out:
3193  	perf_sample__exit(&sample);
3194  	return ret;
3195  }
3196  
3197  static int cs_etm__queue_aux_records(struct perf_session *session)
3198  {
3199  	struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
3200  								struct auxtrace_index, list);
3201  	if (index && index->nr > 0)
3202  		return perf_session__peek_events(session, session->header.data_offset,
3203  						 session->header.data_size,
3204  						 cs_etm__queue_aux_records_cb, NULL);
3205  
3206  	/*
3207  	 * We would get here if there are no entries in the index (either no auxtrace
3208  	 * buffers or no index at all). Fail silently as there is the possibility of
3209  	 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
3210  	 * false.
3211  	 *
3212  	 * In that scenario, buffers will not be split by AUX records.
3213  	 */
3214  	return 0;
3215  }
3216  
3217  #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
3218  				  (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
3219  
3220  /*
3221   * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
3222   * timestamps).
3223   */
3224  static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
3225  {
3226  	int j;
3227  
3228  	for (j = 0; j < num_cpu; j++) {
3229  		switch (metadata[j][CS_ETM_MAGIC]) {
3230  		case __perf_cs_etmv4_magic:
3231  			if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
3232  				return false;
3233  			break;
3234  		case __perf_cs_ete_magic:
3235  			if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1)
3236  				return false;
3237  			break;
3238  		default:
3239  			/* Unknown / unsupported magic number. */
3240  			return false;
3241  		}
3242  	}
3243  	return true;
3244  }
3245  
3246  /* map trace ids to correct metadata block, from information in metadata */
3247  static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_cpu,
3248  					  u64 **metadata)
3249  {
3250  	u64 cs_etm_magic;
3251  	u8 trace_chan_id;
3252  	int i, err;
3253  
3254  	for (i = 0; i < num_cpu; i++) {
3255  		cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3256  		switch (cs_etm_magic) {
3257  		case __perf_cs_etmv3_magic:
3258  			metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3259  			trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]);
3260  			break;
3261  		case __perf_cs_etmv4_magic:
3262  		case __perf_cs_ete_magic:
3263  			metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3264  			trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]);
3265  			break;
3266  		default:
3267  			/* unknown magic number */
3268  			return -EINVAL;
3269  		}
3270  		err = cs_etm__map_trace_id_v0(etm, trace_chan_id, metadata[i]);
3271  		if (err)
3272  			return err;
3273  	}
3274  	return 0;
3275  }
3276  
3277  /*
3278   * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX
3279   * (formatted or not) packets to create the decoders.
3280   */
3281  static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq)
3282  {
3283  	struct cs_etm_decoder_params d_params;
3284  	struct cs_etm_trace_params  *t_params;
3285  	int decoders = intlist__nr_entries(etmq->traceid_list);
3286  
3287  	if (decoders == 0)
3288  		return 0;
3289  
3290  	/*
3291  	 * Each queue can only contain data from one CPU when unformatted, so only one decoder is
3292  	 * needed.
3293  	 */
3294  	if (etmq->format == UNFORMATTED)
3295  		assert(decoders == 1);
3296  
3297  	/* Use metadata to fill in trace parameters for trace decoder */
3298  	t_params = zalloc(sizeof(*t_params) * decoders);
3299  
3300  	if (!t_params)
3301  		goto out_free;
3302  
3303  	if (cs_etm__init_trace_params(t_params, etmq))
3304  		goto out_free;
3305  
3306  	/* Set decoder parameters to decode trace packets */
3307  	if (cs_etm__init_decoder_params(&d_params, etmq,
3308  					dump_trace ? CS_ETM_OPERATION_PRINT :
3309  						     CS_ETM_OPERATION_DECODE))
3310  		goto out_free;
3311  
3312  	etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
3313  					    t_params);
3314  
3315  	if (!etmq->decoder)
3316  		goto out_free;
3317  
3318  	/*
3319  	 * Register a function to handle all memory accesses required by
3320  	 * the trace decoder library.
3321  	 */
3322  	if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
3323  					      0x0L, ((u64) -1L),
3324  					      cs_etm__mem_access))
3325  		goto out_free_decoder;
3326  
3327  	zfree(&t_params);
3328  	return 0;
3329  
3330  out_free_decoder:
3331  	cs_etm_decoder__free(etmq->decoder);
3332  out_free:
3333  	zfree(&t_params);
3334  	return -EINVAL;
3335  }
3336  
3337  static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm)
3338  {
3339  	struct auxtrace_queues *queues = &etm->queues;
3340  
3341  	for (unsigned int i = 0; i < queues->nr_queues; i++) {
3342  		bool empty = list_empty(&queues->queue_array[i].head);
3343  		struct cs_etm_queue *etmq = queues->queue_array[i].priv;
3344  		int ret;
3345  
3346  		/*
3347  		 * Don't create decoders for empty queues, mainly because
3348  		 * etmq->format is unknown for empty queues.
3349  		 */
3350  		assert(empty || etmq->format != UNSET);
3351  		if (empty)
3352  			continue;
3353  
3354  		ret = cs_etm__create_queue_decoders(etmq);
3355  		if (ret)
3356  			return ret;
3357  	}
3358  	return 0;
3359  }
3360  
3361  int cs_etm__process_auxtrace_info_full(union perf_event *event,
3362  				       struct perf_session *session)
3363  {
3364  	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
3365  	struct cs_etm_auxtrace *etm = NULL;
3366  	struct perf_record_time_conv *tc = &session->time_conv;
3367  	int event_header_size = sizeof(struct perf_event_header);
3368  	int total_size = auxtrace_info->header.size;
3369  	int priv_size = 0;
3370  	int num_cpu, max_cpu = 0;
3371  	int err = 0;
3372  	int aux_hw_id_found;
3373  	int i;
3374  	u64 *ptr = NULL;
3375  	u64 **metadata = NULL;
3376  
3377  	/* First the global part */
3378  	ptr = (u64 *) auxtrace_info->priv;
3379  	num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
3380  	metadata = zalloc(sizeof(*metadata) * num_cpu);
3381  	if (!metadata)
3382  		return -ENOMEM;
3383  
3384  	/* Start parsing after the common part of the header */
3385  	i = CS_HEADER_VERSION_MAX;
3386  
3387  	/*
3388  	 * The metadata is stored in the auxtrace_info section and encodes
3389  	 * the configuration of the ARM embedded trace macrocell which is
3390  	 * required by the trace decoder to properly decode the trace due
3391  	 * to its highly compressed nature.
3392  	 */
3393  	for (int j = 0; j < num_cpu; j++) {
3394  		if (ptr[i] == __perf_cs_etmv3_magic) {
3395  			metadata[j] =
3396  				cs_etm__create_meta_blk(ptr, &i,
3397  							CS_ETM_PRIV_MAX,
3398  							CS_ETM_NR_TRC_PARAMS_V0);
3399  		} else if (ptr[i] == __perf_cs_etmv4_magic) {
3400  			metadata[j] =
3401  				cs_etm__create_meta_blk(ptr, &i,
3402  							CS_ETMV4_PRIV_MAX,
3403  							CS_ETMV4_NR_TRC_PARAMS_V0);
3404  		} else if (ptr[i] == __perf_cs_ete_magic) {
3405  			metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
3406  		} else {
3407  			ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
3408  				  ptr[i]);
3409  			err = -EINVAL;
3410  			goto err_free_metadata;
3411  		}
3412  
3413  		if (!metadata[j]) {
3414  			err = -ENOMEM;
3415  			goto err_free_metadata;
3416  		}
3417  
3418  		if ((int) metadata[j][CS_ETM_CPU] > max_cpu)
3419  			max_cpu = metadata[j][CS_ETM_CPU];
3420  	}
3421  
3422  	/*
3423  	 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
3424  	 * CS_ETMV4_PRIV_MAX mark how many double words are in the
3425  	 * global metadata, and each cpu's metadata respectively.
3426  	 * The following tests if the correct number of double words was
3427  	 * present in the auxtrace info section.
3428  	 */
3429  	priv_size = total_size - event_header_size - INFO_HEADER_SIZE;
3430  	if (i * 8 != priv_size) {
3431  		err = -EINVAL;
3432  		goto err_free_metadata;
3433  	}
3434  
3435  	etm = zalloc(sizeof(*etm));
3436  
3437  	if (!etm) {
3438  		err = -ENOMEM;
3439  		goto err_free_metadata;
3440  	}
3441  
3442  	/*
3443  	 * As all the ETMs run at the same exception level, the system should
3444  	 * have the same PID format crossing CPUs.  So cache the PID format
3445  	 * and reuse it for sequential decoding.
3446  	 */
3447  	etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
3448  
3449  	err = auxtrace_queues__init_nr(&etm->queues, max_cpu + 1);
3450  	if (err)
3451  		goto err_free_etm;
3452  
3453  	for (unsigned int j = 0; j < etm->queues.nr_queues; ++j) {
3454  		err = cs_etm__setup_queue(etm, &etm->queues.queue_array[j], j);
3455  		if (err)
3456  			goto err_free_queues;
3457  	}
3458  
3459  	if (session->itrace_synth_opts->set) {
3460  		etm->synth_opts = *session->itrace_synth_opts;
3461  	} else {
3462  		itrace_synth_opts__set_default(&etm->synth_opts,
3463  				session->itrace_synth_opts->default_no_sample);
3464  		etm->synth_opts.callchain = false;
3465  	}
3466  
3467  	etm->session = session;
3468  
3469  	etm->num_cpu = num_cpu;
3470  	etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
3471  	etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
3472  	etm->metadata = metadata;
3473  	etm->auxtrace_type = auxtrace_info->type;
3474  
3475  	if (etm->synth_opts.use_timestamp)
3476  		/*
3477  		 * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature,
3478  		 * therefore the decoder cannot know if the timestamp trace is
3479  		 * same with the kernel time.
3480  		 *
3481  		 * If a user has knowledge for the working platform and can
3482  		 * specify itrace option 'T' to tell decoder to forcely use the
3483  		 * traced timestamp as the kernel time.
3484  		 */
3485  		etm->has_virtual_ts = true;
3486  	else
3487  		/* Use virtual timestamps if all ETMs report ts_source = 1 */
3488  		etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
3489  
3490  	if (!etm->has_virtual_ts)
3491  		ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
3492  			    "The time field of the samples will not be set accurately.\n"
3493  			    "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n"
3494  			    "you can specify the itrace option 'T' for timestamp decoding\n"
3495  			    "if the Coresight timestamp on the platform is same with the kernel time.\n\n");
3496  
3497  	etm->auxtrace.process_event = cs_etm__process_event;
3498  	etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
3499  	etm->auxtrace.flush_events = cs_etm__flush_events;
3500  	etm->auxtrace.free_events = cs_etm__free_events;
3501  	etm->auxtrace.free = cs_etm__free;
3502  	etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
3503  	session->auxtrace = &etm->auxtrace;
3504  
3505  	err = cs_etm__setup_timeless_decoding(etm);
3506  	if (err)
3507  		return err;
3508  
3509  	etm->tc.time_shift = tc->time_shift;
3510  	etm->tc.time_mult = tc->time_mult;
3511  	etm->tc.time_zero = tc->time_zero;
3512  	if (event_contains(*tc, time_cycles)) {
3513  		etm->tc.time_cycles = tc->time_cycles;
3514  		etm->tc.time_mask = tc->time_mask;
3515  		etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
3516  		etm->tc.cap_user_time_short = tc->cap_user_time_short;
3517  	}
3518  	err = cs_etm__synth_events(etm, session);
3519  	if (err)
3520  		goto err_free_queues;
3521  
3522  	err = cs_etm__queue_aux_records(session);
3523  	if (err)
3524  		goto err_free_queues;
3525  
3526  	/*
3527  	 * Map Trace ID values to CPU metadata.
3528  	 *
3529  	 * Trace metadata will always contain Trace ID values from the legacy algorithm
3530  	 * in case it's read by a version of Perf that doesn't know about HW_ID packets
3531  	 * or the kernel doesn't emit them.
3532  	 *
3533  	 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
3534  	 * the same IDs as the old algorithm as far as is possible, unless there are clashes
3535  	 * in which case a different value will be used. This means an older perf may still
3536  	 * be able to record and read files generate on a newer system.
3537  	 *
3538  	 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of
3539  	 * those packets. If they are there then the values will be mapped and plugged into
3540  	 * the metadata and decoders are only created for each mapping received.
3541  	 *
3542  	 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
3543  	 * then we map Trace ID values to CPU directly from the metadata and create decoders
3544  	 * for all mappings.
3545  	 */
3546  
3547  	/* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
3548  	aux_hw_id_found = 0;
3549  	err = perf_session__peek_events(session, session->header.data_offset,
3550  					session->header.data_size,
3551  					cs_etm__process_aux_hw_id_cb, &aux_hw_id_found);
3552  	if (err)
3553  		goto err_free_queues;
3554  
3555  	/* if no HW ID found this is a file with metadata values only, map from metadata */
3556  	if (!aux_hw_id_found) {
3557  		err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata);
3558  		if (err)
3559  			goto err_free_queues;
3560  	}
3561  
3562  	err = cs_etm__create_decoders(etm);
3563  	if (err)
3564  		goto err_free_queues;
3565  
3566  	etm->data_queued = etm->queues.populated;
3567  	return 0;
3568  
3569  err_free_queues:
3570  	auxtrace_queues__free(&etm->queues);
3571  	session->auxtrace = NULL;
3572  err_free_etm:
3573  	zfree(&etm);
3574  err_free_metadata:
3575  	/* No need to check @metadata[j], free(NULL) is supported */
3576  	for (int j = 0; j < num_cpu; j++)
3577  		zfree(&metadata[j]);
3578  	zfree(&metadata);
3579  	return err;
3580  }
3581