xref: /linux/tools/perf/util/cs-etm.c (revision 9e906a9dead17d81d6c2687f65e159231d0e3286)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright(C) 2015-2018 Linaro Limited.
4  *
5  * Author: Tor Jeremiassen <tor@ti.com>
6  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
7  */
8 
9 #include <linux/kernel.h>
10 #include <linux/bitfield.h>
11 #include <linux/bitops.h>
12 #include <linux/coresight-pmu.h>
13 #include <linux/err.h>
14 #include <linux/log2.h>
15 #include <linux/types.h>
16 #include <linux/zalloc.h>
17 
18 #include <stdlib.h>
19 
20 #include "auxtrace.h"
21 #include "color.h"
22 #include "cs-etm.h"
23 #include "cs-etm-decoder/cs-etm-decoder.h"
24 #include "debug.h"
25 #include "dso.h"
26 #include "evlist.h"
27 #include "intlist.h"
28 #include "machine.h"
29 #include "map.h"
30 #include "perf.h"
31 #include "session.h"
32 #include "map_symbol.h"
33 #include "branch.h"
34 #include "symbol.h"
35 #include "tool.h"
36 #include "thread.h"
37 #include "thread-stack.h"
38 #include "tsc.h"
39 #include <tools/libc_compat.h>
40 #include "util/synthetic-events.h"
41 #include "util/util.h"
42 
43 struct cs_etm_auxtrace {
44 	struct auxtrace auxtrace;
45 	struct auxtrace_queues queues;
46 	struct auxtrace_heap heap;
47 	struct itrace_synth_opts synth_opts;
48 	struct perf_session *session;
49 	struct perf_tsc_conversion tc;
50 
51 	/*
52 	 * Timeless has no timestamps in the trace so overlapping mmap lookups
53 	 * are less accurate but produces smaller trace data. We use context IDs
54 	 * in the trace instead of matching timestamps with fork records so
55 	 * they're not really needed in the general case. Overlapping mmaps
56 	 * happen in cases like between a fork and an exec.
57 	 */
58 	bool timeless_decoding;
59 
60 	/*
61 	 * Per-thread ignores the trace channel ID and instead assumes that
62 	 * everything in a buffer comes from the same process regardless of
63 	 * which CPU it ran on. It also implies no context IDs so the TID is
64 	 * taken from the auxtrace buffer.
65 	 */
66 	bool per_thread_decoding;
67 	bool snapshot_mode;
68 	bool data_queued;
69 	bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
70 
71 	int num_cpu;
72 	u64 latest_kernel_timestamp;
73 	u32 auxtrace_type;
74 	u64 branches_sample_type;
75 	u64 branches_id;
76 	u64 instructions_sample_type;
77 	u64 instructions_sample_period;
78 	u64 instructions_id;
79 	u64 **metadata;
80 	unsigned int pmu_type;
81 	enum cs_etm_pid_fmt pid_fmt;
82 };
83 
84 struct cs_etm_traceid_queue {
85 	u8 trace_chan_id;
86 	u64 period_instructions;
87 	size_t last_branch_pos;
88 	union perf_event *event_buf;
89 	struct thread *thread;
90 	struct thread *prev_packet_thread;
91 	ocsd_ex_level prev_packet_el;
92 	ocsd_ex_level el;
93 	struct branch_stack *last_branch;
94 	struct branch_stack *last_branch_rb;
95 	struct cs_etm_packet *prev_packet;
96 	struct cs_etm_packet *packet;
97 	struct cs_etm_packet_queue packet_queue;
98 };
99 
100 enum cs_etm_format {
101 	UNSET,
102 	FORMATTED,
103 	UNFORMATTED
104 };
105 
106 struct cs_etm_queue {
107 	struct cs_etm_auxtrace *etm;
108 	struct cs_etm_decoder *decoder;
109 	struct auxtrace_buffer *buffer;
110 	unsigned int queue_nr;
111 	u8 pending_timestamp_chan_id;
112 	enum cs_etm_format format;
113 	u64 offset;
114 	const unsigned char *buf;
115 	size_t buf_len, buf_used;
116 	/* Conversion between traceID and index in traceid_queues array */
117 	struct intlist *traceid_queues_list;
118 	struct cs_etm_traceid_queue **traceid_queues;
119 	/* Conversion between traceID and metadata pointers */
120 	struct intlist *traceid_list;
121 	/*
122 	 * Same as traceid_list, but traceid_list may be a reference to another
123 	 * queue's which has a matching sink ID.
124 	 */
125 	struct intlist *own_traceid_list;
126 	u32 sink_id;
127 };
128 
129 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
130 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
131 					   pid_t tid);
132 static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
133 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
134 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata);
135 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu);
136 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata);
137 
138 /* PTMs ETMIDR [11:8] set to b0011 */
139 #define ETMIDR_PTM_VERSION 0x00000300
140 
141 /*
142  * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
143  * work with.  One option is to modify to auxtrace_heap_XYZ() API or simply
144  * encode the etm queue number as the upper 16 bit and the channel as
145  * the lower 16 bit.
146  */
147 #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id)	\
148 		      (queue_nr << 16 | trace_chan_id)
149 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
150 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
151 #define SINK_UNSET ((u32) -1)
152 
cs_etm__get_v7_protocol_version(u32 etmidr)153 static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
154 {
155 	etmidr &= ETMIDR_PTM_VERSION;
156 
157 	if (etmidr == ETMIDR_PTM_VERSION)
158 		return CS_ETM_PROTO_PTM;
159 
160 	return CS_ETM_PROTO_ETMV3;
161 }
162 
cs_etm__get_magic(struct cs_etm_queue * etmq,u8 trace_chan_id,u64 * magic)163 static int cs_etm__get_magic(struct cs_etm_queue *etmq, u8 trace_chan_id, u64 *magic)
164 {
165 	struct int_node *inode;
166 	u64 *metadata;
167 
168 	inode = intlist__find(etmq->traceid_list, trace_chan_id);
169 	if (!inode)
170 		return -EINVAL;
171 
172 	metadata = inode->priv;
173 	*magic = metadata[CS_ETM_MAGIC];
174 	return 0;
175 }
176 
cs_etm__get_cpu(struct cs_etm_queue * etmq,u8 trace_chan_id,int * cpu)177 int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu)
178 {
179 	struct int_node *inode;
180 	u64 *metadata;
181 
182 	inode = intlist__find(etmq->traceid_list, trace_chan_id);
183 	if (!inode)
184 		return -EINVAL;
185 
186 	metadata = inode->priv;
187 	*cpu = (int)metadata[CS_ETM_CPU];
188 	return 0;
189 }
190 
191 /*
192  * The returned PID format is presented as an enum:
193  *
194  *   CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced.
195  *   CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced.
196  *   CS_ETM_PIDFMT_NONE: No context IDs
197  *
198  * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
199  * are enabled at the same time when the session runs on an EL2 kernel.
200  * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
201  * recorded in the trace data, the tool will selectively use
202  * CONTEXTIDR_EL2 as PID.
203  *
204  * The result is cached in etm->pid_fmt so this function only needs to be called
205  * when processing the aux info.
206  */
cs_etm__init_pid_fmt(u64 * metadata)207 static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata)
208 {
209 	u64 val;
210 
211 	if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
212 		val = metadata[CS_ETM_ETMCR];
213 		/* CONTEXTIDR is traced */
214 		if (val & BIT(ETM_OPT_CTXTID))
215 			return CS_ETM_PIDFMT_CTXTID;
216 	} else {
217 		val = metadata[CS_ETMV4_TRCCONFIGR];
218 		/* CONTEXTIDR_EL2 is traced */
219 		if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
220 			return CS_ETM_PIDFMT_CTXTID2;
221 		/* CONTEXTIDR_EL1 is traced */
222 		else if (val & BIT(ETM4_CFG_BIT_CTXTID))
223 			return CS_ETM_PIDFMT_CTXTID;
224 	}
225 
226 	return CS_ETM_PIDFMT_NONE;
227 }
228 
cs_etm__get_pid_fmt(struct cs_etm_queue * etmq)229 enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
230 {
231 	return etmq->etm->pid_fmt;
232 }
233 
cs_etm__insert_trace_id_node(struct cs_etm_queue * etmq,u8 trace_chan_id,u64 * cpu_metadata)234 static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq,
235 					u8 trace_chan_id, u64 *cpu_metadata)
236 {
237 	/* Get an RB node for this CPU */
238 	struct int_node *inode = intlist__findnew(etmq->traceid_list, trace_chan_id);
239 
240 	/* Something went wrong, no need to continue */
241 	if (!inode)
242 		return -ENOMEM;
243 
244 	/* Disallow re-mapping a different traceID to metadata pair. */
245 	if (inode->priv) {
246 		u64 *curr_cpu_data = inode->priv;
247 		u8 curr_chan_id;
248 		int err;
249 
250 		if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) {
251 			/*
252 			 * With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs
253 			 * are expected (but not supported) in per-thread mode,
254 			 * rather than signifying an error.
255 			 */
256 			if (etmq->etm->per_thread_decoding)
257 				pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n");
258 			else
259 				pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
260 
261 			return -EINVAL;
262 		}
263 
264 		/* check that the mapped ID matches */
265 		err = cs_etm__metadata_get_trace_id(&curr_chan_id, curr_cpu_data);
266 		if (err)
267 			return err;
268 
269 		if (curr_chan_id != trace_chan_id) {
270 			pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
271 			return -EINVAL;
272 		}
273 
274 		/* Skip re-adding the same mappings if everything matched */
275 		return 0;
276 	}
277 
278 	/* Not one we've seen before, associate the traceID with the metadata pointer */
279 	inode->priv = cpu_metadata;
280 
281 	return 0;
282 }
283 
cs_etm__get_queue(struct cs_etm_auxtrace * etm,int cpu)284 static struct cs_etm_queue *cs_etm__get_queue(struct cs_etm_auxtrace *etm, int cpu)
285 {
286 	if (etm->per_thread_decoding)
287 		return etm->queues.queue_array[0].priv;
288 	else
289 		return etm->queues.queue_array[cpu].priv;
290 }
291 
cs_etm__map_trace_id_v0(struct cs_etm_auxtrace * etm,u8 trace_chan_id,u64 * cpu_metadata)292 static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace *etm, u8 trace_chan_id,
293 				   u64 *cpu_metadata)
294 {
295 	struct cs_etm_queue *etmq;
296 
297 	/*
298 	 * If the queue is unformatted then only save one mapping in the
299 	 * queue associated with that CPU so only one decoder is made.
300 	 */
301 	etmq = cs_etm__get_queue(etm, cpu_metadata[CS_ETM_CPU]);
302 	if (etmq->format == UNFORMATTED)
303 		return cs_etm__insert_trace_id_node(etmq, trace_chan_id,
304 						    cpu_metadata);
305 
306 	/*
307 	 * Otherwise, version 0 trace IDs are global so save them into every
308 	 * queue.
309 	 */
310 	for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
311 		int ret;
312 
313 		etmq = etm->queues.queue_array[i].priv;
314 		ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id,
315 						   cpu_metadata);
316 		if (ret)
317 			return ret;
318 	}
319 
320 	return 0;
321 }
322 
cs_etm__process_trace_id_v0(struct cs_etm_auxtrace * etm,int cpu,u64 hw_id)323 static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu,
324 				       u64 hw_id)
325 {
326 	int err;
327 	u64 *cpu_data;
328 	u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
329 
330 	cpu_data = get_cpu_data(etm, cpu);
331 	if (cpu_data == NULL)
332 		return -EINVAL;
333 
334 	err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_data);
335 	if (err)
336 		return err;
337 
338 	/*
339 	 * if we are picking up the association from the packet, need to plug
340 	 * the correct trace ID into the metadata for setting up decoders later.
341 	 */
342 	return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
343 }
344 
cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace * etm,int cpu,u64 hw_id)345 static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace *etm, int cpu,
346 					 u64 hw_id)
347 {
348 	struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu);
349 	int ret;
350 	u64 *cpu_data;
351 	u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id);
352 	u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
353 
354 	/*
355 	 * Check sink id hasn't changed in per-cpu mode. In per-thread mode,
356 	 * let it pass for now until an actual overlapping trace ID is hit. In
357 	 * most cases IDs won't overlap even if the sink changes.
358 	 */
359 	if (!etmq->etm->per_thread_decoding && etmq->sink_id != SINK_UNSET &&
360 	    etmq->sink_id != sink_id) {
361 		pr_err("CS_ETM: mismatch between sink IDs\n");
362 		return -EINVAL;
363 	}
364 
365 	etmq->sink_id = sink_id;
366 
367 	/* Find which other queues use this sink and link their ID maps */
368 	for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
369 		struct cs_etm_queue *other_etmq = etm->queues.queue_array[i].priv;
370 
371 		/* Different sinks, skip */
372 		if (other_etmq->sink_id != etmq->sink_id)
373 			continue;
374 
375 		/* Already linked, skip */
376 		if (other_etmq->traceid_list == etmq->traceid_list)
377 			continue;
378 
379 		/* At the point of first linking, this one should be empty */
380 		if (!intlist__empty(etmq->traceid_list)) {
381 			pr_err("CS_ETM: Can't link populated trace ID lists\n");
382 			return -EINVAL;
383 		}
384 
385 		etmq->own_traceid_list = NULL;
386 		intlist__delete(etmq->traceid_list);
387 		etmq->traceid_list = other_etmq->traceid_list;
388 		break;
389 	}
390 
391 	cpu_data = get_cpu_data(etm, cpu);
392 	ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data);
393 	if (ret)
394 		return ret;
395 
396 	ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data);
397 	if (ret)
398 		return ret;
399 
400 	return 0;
401 }
402 
cs_etm__metadata_get_trace_id(u8 * trace_chan_id,u64 * cpu_metadata)403 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
404 {
405 	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
406 
407 	switch (cs_etm_magic) {
408 	case __perf_cs_etmv3_magic:
409 		*trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
410 				      CORESIGHT_TRACE_ID_VAL_MASK);
411 		break;
412 	case __perf_cs_etmv4_magic:
413 	case __perf_cs_ete_magic:
414 		*trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
415 				      CORESIGHT_TRACE_ID_VAL_MASK);
416 		break;
417 	default:
418 		return -EINVAL;
419 	}
420 	return 0;
421 }
422 
423 /*
424  * update metadata trace ID from the value found in the AUX_HW_INFO packet.
425  */
cs_etm__metadata_set_trace_id(u8 trace_chan_id,u64 * cpu_metadata)426 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
427 {
428 	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
429 
430 	switch (cs_etm_magic) {
431 	case __perf_cs_etmv3_magic:
432 		 cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id;
433 		break;
434 	case __perf_cs_etmv4_magic:
435 	case __perf_cs_ete_magic:
436 		cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id;
437 		break;
438 
439 	default:
440 		return -EINVAL;
441 	}
442 	return 0;
443 }
444 
445 /*
446  * Get a metadata index for a specific cpu from an array.
447  *
448  */
get_cpu_data_idx(struct cs_etm_auxtrace * etm,int cpu)449 static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu)
450 {
451 	int i;
452 
453 	for (i = 0; i < etm->num_cpu; i++) {
454 		if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) {
455 			return i;
456 		}
457 	}
458 
459 	return -1;
460 }
461 
462 /*
463  * Get a metadata for a specific cpu from an array.
464  *
465  */
get_cpu_data(struct cs_etm_auxtrace * etm,int cpu)466 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu)
467 {
468 	int idx = get_cpu_data_idx(etm, cpu);
469 
470 	return (idx != -1) ? etm->metadata[idx] : NULL;
471 }
472 
473 /*
474  * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
475  *
476  * The payload associates the Trace ID and the CPU.
477  * The routine is tolerant of seeing multiple packets with the same association,
478  * but a CPU / Trace ID association changing during a session is an error.
479  */
cs_etm__process_aux_output_hw_id(struct perf_session * session,union perf_event * event)480 static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
481 					    union perf_event *event)
482 {
483 	struct cs_etm_auxtrace *etm;
484 	struct perf_sample sample;
485 	struct evsel *evsel;
486 	u64 hw_id;
487 	int cpu, version, err;
488 
489 	/* extract and parse the HW ID */
490 	hw_id = event->aux_output_hw_id.hw_id;
491 	version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id);
492 
493 	/* check that we can handle this version */
494 	if (version > CS_AUX_HW_ID_MAJOR_VERSION) {
495 		pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n",
496 		       version);
497 		return -EINVAL;
498 	}
499 
500 	/* get access to the etm metadata */
501 	etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
502 	if (!etm || !etm->metadata)
503 		return -EINVAL;
504 
505 	/* parse the sample to get the CPU */
506 	evsel = evlist__event2evsel(session->evlist, event);
507 	if (!evsel)
508 		return -EINVAL;
509 	perf_sample__init(&sample, /*all=*/false);
510 	err = evsel__parse_sample(evsel, event, &sample);
511 	if (err)
512 		goto out;
513 	cpu = sample.cpu;
514 	if (cpu == -1) {
515 		/* no CPU in the sample - possibly recorded with an old version of perf */
516 		pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
517 		err = -EINVAL;
518 		goto out;
519 	}
520 
521 	if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0) {
522 		err = cs_etm__process_trace_id_v0(etm, cpu, hw_id);
523 		goto out;
524 	}
525 
526 	err = cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
527 out:
528 	perf_sample__exit(&sample);
529 	return err;
530 }
531 
cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue * etmq,u8 trace_chan_id)532 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
533 					      u8 trace_chan_id)
534 {
535 	/*
536 	 * When a timestamp packet is encountered the backend code
537 	 * is stopped so that the front end has time to process packets
538 	 * that were accumulated in the traceID queue.  Since there can
539 	 * be more than one channel per cs_etm_queue, we need to specify
540 	 * what traceID queue needs servicing.
541 	 */
542 	etmq->pending_timestamp_chan_id = trace_chan_id;
543 }
544 
cs_etm__etmq_get_timestamp(struct cs_etm_queue * etmq,u8 * trace_chan_id)545 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
546 				      u8 *trace_chan_id)
547 {
548 	struct cs_etm_packet_queue *packet_queue;
549 
550 	if (!etmq->pending_timestamp_chan_id)
551 		return 0;
552 
553 	if (trace_chan_id)
554 		*trace_chan_id = etmq->pending_timestamp_chan_id;
555 
556 	packet_queue = cs_etm__etmq_get_packet_queue(etmq,
557 						     etmq->pending_timestamp_chan_id);
558 	if (!packet_queue)
559 		return 0;
560 
561 	/* Acknowledge pending status */
562 	etmq->pending_timestamp_chan_id = 0;
563 
564 	/* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
565 	return packet_queue->cs_timestamp;
566 }
567 
cs_etm__clear_packet_queue(struct cs_etm_packet_queue * queue)568 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
569 {
570 	int i;
571 
572 	queue->head = 0;
573 	queue->tail = 0;
574 	queue->packet_count = 0;
575 	for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
576 		queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
577 		queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
578 		queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
579 		queue->packet_buffer[i].instr_count = 0;
580 		queue->packet_buffer[i].last_instr_taken_branch = false;
581 		queue->packet_buffer[i].last_instr_size = 0;
582 		queue->packet_buffer[i].last_instr_type = 0;
583 		queue->packet_buffer[i].last_instr_subtype = 0;
584 		queue->packet_buffer[i].last_instr_cond = 0;
585 		queue->packet_buffer[i].flags = 0;
586 		queue->packet_buffer[i].exception_number = UINT32_MAX;
587 		queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
588 		queue->packet_buffer[i].cpu = INT_MIN;
589 	}
590 }
591 
cs_etm__clear_all_packet_queues(struct cs_etm_queue * etmq)592 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
593 {
594 	int idx;
595 	struct int_node *inode;
596 	struct cs_etm_traceid_queue *tidq;
597 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
598 
599 	intlist__for_each_entry(inode, traceid_queues_list) {
600 		idx = (int)(intptr_t)inode->priv;
601 		tidq = etmq->traceid_queues[idx];
602 		cs_etm__clear_packet_queue(&tidq->packet_queue);
603 	}
604 }
605 
cs_etm__init_traceid_queue(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,u8 trace_chan_id)606 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
607 				      struct cs_etm_traceid_queue *tidq,
608 				      u8 trace_chan_id)
609 {
610 	int rc = -ENOMEM;
611 	struct auxtrace_queue *queue;
612 	struct cs_etm_auxtrace *etm = etmq->etm;
613 
614 	cs_etm__clear_packet_queue(&tidq->packet_queue);
615 
616 	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
617 	tidq->trace_chan_id = trace_chan_id;
618 	tidq->el = tidq->prev_packet_el = ocsd_EL_unknown;
619 	tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
620 					       queue->tid);
621 	tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host);
622 
623 	tidq->packet = zalloc(sizeof(struct cs_etm_packet));
624 	if (!tidq->packet)
625 		goto out;
626 
627 	tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
628 	if (!tidq->prev_packet)
629 		goto out_free;
630 
631 	if (etm->synth_opts.last_branch) {
632 		size_t sz = sizeof(struct branch_stack);
633 
634 		sz += etm->synth_opts.last_branch_sz *
635 		      sizeof(struct branch_entry);
636 		tidq->last_branch = zalloc(sz);
637 		if (!tidq->last_branch)
638 			goto out_free;
639 		tidq->last_branch_rb = zalloc(sz);
640 		if (!tidq->last_branch_rb)
641 			goto out_free;
642 	}
643 
644 	tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
645 	if (!tidq->event_buf)
646 		goto out_free;
647 
648 	return 0;
649 
650 out_free:
651 	zfree(&tidq->last_branch_rb);
652 	zfree(&tidq->last_branch);
653 	zfree(&tidq->prev_packet);
654 	zfree(&tidq->packet);
655 out:
656 	return rc;
657 }
658 
659 static struct cs_etm_traceid_queue
cs_etm__etmq_get_traceid_queue(struct cs_etm_queue * etmq,u8 trace_chan_id)660 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
661 {
662 	int idx;
663 	struct int_node *inode;
664 	struct intlist *traceid_queues_list;
665 	struct cs_etm_traceid_queue *tidq, **traceid_queues;
666 	struct cs_etm_auxtrace *etm = etmq->etm;
667 
668 	if (etm->per_thread_decoding)
669 		trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
670 
671 	traceid_queues_list = etmq->traceid_queues_list;
672 
673 	/*
674 	 * Check if the traceid_queue exist for this traceID by looking
675 	 * in the queue list.
676 	 */
677 	inode = intlist__find(traceid_queues_list, trace_chan_id);
678 	if (inode) {
679 		idx = (int)(intptr_t)inode->priv;
680 		return etmq->traceid_queues[idx];
681 	}
682 
683 	/* We couldn't find a traceid_queue for this traceID, allocate one */
684 	tidq = malloc(sizeof(*tidq));
685 	if (!tidq)
686 		return NULL;
687 
688 	memset(tidq, 0, sizeof(*tidq));
689 
690 	/* Get a valid index for the new traceid_queue */
691 	idx = intlist__nr_entries(traceid_queues_list);
692 	/* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
693 	inode = intlist__findnew(traceid_queues_list, trace_chan_id);
694 	if (!inode)
695 		goto out_free;
696 
697 	/* Associate this traceID with this index */
698 	inode->priv = (void *)(intptr_t)idx;
699 
700 	if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
701 		goto out_free;
702 
703 	/* Grow the traceid_queues array by one unit */
704 	traceid_queues = etmq->traceid_queues;
705 	traceid_queues = reallocarray(traceid_queues,
706 				      idx + 1,
707 				      sizeof(*traceid_queues));
708 
709 	/*
710 	 * On failure reallocarray() returns NULL and the original block of
711 	 * memory is left untouched.
712 	 */
713 	if (!traceid_queues)
714 		goto out_free;
715 
716 	traceid_queues[idx] = tidq;
717 	etmq->traceid_queues = traceid_queues;
718 
719 	return etmq->traceid_queues[idx];
720 
721 out_free:
722 	/*
723 	 * Function intlist__remove() removes the inode from the list
724 	 * and delete the memory associated to it.
725 	 */
726 	intlist__remove(traceid_queues_list, inode);
727 	free(tidq);
728 
729 	return NULL;
730 }
731 
732 struct cs_etm_packet_queue
cs_etm__etmq_get_packet_queue(struct cs_etm_queue * etmq,u8 trace_chan_id)733 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
734 {
735 	struct cs_etm_traceid_queue *tidq;
736 
737 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
738 	if (tidq)
739 		return &tidq->packet_queue;
740 
741 	return NULL;
742 }
743 
cs_etm__packet_swap(struct cs_etm_auxtrace * etm,struct cs_etm_traceid_queue * tidq)744 static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
745 				struct cs_etm_traceid_queue *tidq)
746 {
747 	struct cs_etm_packet *tmp;
748 
749 	if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
750 	    etm->synth_opts.instructions) {
751 		/*
752 		 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
753 		 * the next incoming packet.
754 		 *
755 		 * Threads and exception levels are also tracked for both the
756 		 * previous and current packets. This is because the previous
757 		 * packet is used for the 'from' IP for branch samples, so the
758 		 * thread at that time must also be assigned to that sample.
759 		 * Across discontinuity packets the thread can change, so by
760 		 * tracking the thread for the previous packet the branch sample
761 		 * will have the correct info.
762 		 */
763 		tmp = tidq->packet;
764 		tidq->packet = tidq->prev_packet;
765 		tidq->prev_packet = tmp;
766 		tidq->prev_packet_el = tidq->el;
767 		thread__put(tidq->prev_packet_thread);
768 		tidq->prev_packet_thread = thread__get(tidq->thread);
769 	}
770 }
771 
cs_etm__packet_dump(const char * pkt_string,void * data)772 static void cs_etm__packet_dump(const char *pkt_string, void *data)
773 {
774 	const char *color = PERF_COLOR_BLUE;
775 	int len = strlen(pkt_string);
776 	struct cs_etm_queue *etmq = data;
777 	char queue_nr[64];
778 
779 	if (verbose)
780 		snprintf(queue_nr, sizeof(queue_nr), "Qnr:%u; ", etmq->queue_nr);
781 	else
782 		queue_nr[0] = '\0';
783 
784 	if (len && (pkt_string[len-1] == '\n'))
785 		color_fprintf(stdout, color, "	%s%s", queue_nr, pkt_string);
786 	else
787 		color_fprintf(stdout, color, "	%s%s\n", queue_nr, pkt_string);
788 
789 	fflush(stdout);
790 }
791 
cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params * t_params,u64 * metadata,u32 etmidr)792 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
793 					  u64 *metadata, u32 etmidr)
794 {
795 	t_params->protocol = cs_etm__get_v7_protocol_version(etmidr);
796 	t_params->etmv3.reg_ctrl = metadata[CS_ETM_ETMCR];
797 	t_params->etmv3.reg_trc_id = metadata[CS_ETM_ETMTRACEIDR];
798 }
799 
cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params * t_params,u64 * metadata)800 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
801 					  u64 *metadata)
802 {
803 	t_params->protocol = CS_ETM_PROTO_ETMV4i;
804 	t_params->etmv4.reg_idr0 = metadata[CS_ETMV4_TRCIDR0];
805 	t_params->etmv4.reg_idr1 = metadata[CS_ETMV4_TRCIDR1];
806 	t_params->etmv4.reg_idr2 = metadata[CS_ETMV4_TRCIDR2];
807 	t_params->etmv4.reg_idr8 = metadata[CS_ETMV4_TRCIDR8];
808 	t_params->etmv4.reg_configr = metadata[CS_ETMV4_TRCCONFIGR];
809 	t_params->etmv4.reg_traceidr = metadata[CS_ETMV4_TRCTRACEIDR];
810 }
811 
cs_etm__set_trace_param_ete(struct cs_etm_trace_params * t_params,u64 * metadata)812 static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
813 					u64 *metadata)
814 {
815 	t_params->protocol = CS_ETM_PROTO_ETE;
816 	t_params->ete.reg_idr0 = metadata[CS_ETE_TRCIDR0];
817 	t_params->ete.reg_idr1 = metadata[CS_ETE_TRCIDR1];
818 	t_params->ete.reg_idr2 = metadata[CS_ETE_TRCIDR2];
819 	t_params->ete.reg_idr8 = metadata[CS_ETE_TRCIDR8];
820 	t_params->ete.reg_configr = metadata[CS_ETE_TRCCONFIGR];
821 	t_params->ete.reg_traceidr = metadata[CS_ETE_TRCTRACEIDR];
822 	t_params->ete.reg_devarch = metadata[CS_ETE_TRCDEVARCH];
823 }
824 
cs_etm__init_trace_params(struct cs_etm_trace_params * t_params,struct cs_etm_queue * etmq)825 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
826 				     struct cs_etm_queue *etmq)
827 {
828 	struct int_node *inode;
829 
830 	intlist__for_each_entry(inode, etmq->traceid_list) {
831 		u64 *metadata = inode->priv;
832 		u64 architecture = metadata[CS_ETM_MAGIC];
833 		u32 etmidr;
834 
835 		switch (architecture) {
836 		case __perf_cs_etmv3_magic:
837 			etmidr = metadata[CS_ETM_ETMIDR];
838 			cs_etm__set_trace_param_etmv3(t_params++, metadata, etmidr);
839 			break;
840 		case __perf_cs_etmv4_magic:
841 			cs_etm__set_trace_param_etmv4(t_params++, metadata);
842 			break;
843 		case __perf_cs_ete_magic:
844 			cs_etm__set_trace_param_ete(t_params++, metadata);
845 			break;
846 		default:
847 			return -EINVAL;
848 		}
849 	}
850 
851 	return 0;
852 }
853 
cs_etm__init_decoder_params(struct cs_etm_decoder_params * d_params,struct cs_etm_queue * etmq,enum cs_etm_decoder_operation mode)854 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
855 				       struct cs_etm_queue *etmq,
856 				       enum cs_etm_decoder_operation mode)
857 {
858 	int ret = -EINVAL;
859 
860 	if (!(mode < CS_ETM_OPERATION_MAX))
861 		goto out;
862 
863 	d_params->packet_printer = cs_etm__packet_dump;
864 	d_params->operation = mode;
865 	d_params->data = etmq;
866 	d_params->formatted = etmq->format == FORMATTED;
867 	d_params->fsyncs = false;
868 	d_params->hsyncs = false;
869 	d_params->frame_aligned = true;
870 
871 	ret = 0;
872 out:
873 	return ret;
874 }
875 
cs_etm__dump_event(struct cs_etm_queue * etmq,struct auxtrace_buffer * buffer)876 static void cs_etm__dump_event(struct cs_etm_queue *etmq,
877 			       struct auxtrace_buffer *buffer)
878 {
879 	int ret;
880 	const char *color = PERF_COLOR_BLUE;
881 	size_t buffer_used = 0;
882 
883 	fprintf(stdout, "\n");
884 	color_fprintf(stdout, color,
885 		     ". ... CoreSight %s Trace data: size %#zx bytes\n",
886 		     cs_etm_decoder__get_name(etmq->decoder), buffer->size);
887 
888 	do {
889 		size_t consumed;
890 
891 		ret = cs_etm_decoder__process_data_block(
892 				etmq->decoder, buffer->offset,
893 				&((u8 *)buffer->data)[buffer_used],
894 				buffer->size - buffer_used, &consumed);
895 		if (ret)
896 			break;
897 
898 		buffer_used += consumed;
899 	} while (buffer_used < buffer->size);
900 
901 	cs_etm_decoder__reset(etmq->decoder);
902 }
903 
cs_etm__flush_events(struct perf_session * session,const struct perf_tool * tool)904 static int cs_etm__flush_events(struct perf_session *session,
905 				const struct perf_tool *tool)
906 {
907 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
908 						   struct cs_etm_auxtrace,
909 						   auxtrace);
910 	if (dump_trace)
911 		return 0;
912 
913 	if (!tool->ordered_events)
914 		return -EINVAL;
915 
916 	if (etm->timeless_decoding) {
917 		/*
918 		 * Pass tid = -1 to process all queues. But likely they will have
919 		 * already been processed on PERF_RECORD_EXIT anyway.
920 		 */
921 		return cs_etm__process_timeless_queues(etm, -1);
922 	}
923 
924 	return cs_etm__process_timestamped_queues(etm);
925 }
926 
cs_etm__free_traceid_queues(struct cs_etm_queue * etmq)927 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
928 {
929 	int idx;
930 	uintptr_t priv;
931 	struct int_node *inode, *tmp;
932 	struct cs_etm_traceid_queue *tidq;
933 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
934 
935 	intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
936 		priv = (uintptr_t)inode->priv;
937 		idx = priv;
938 
939 		/* Free this traceid_queue from the array */
940 		tidq = etmq->traceid_queues[idx];
941 		thread__zput(tidq->thread);
942 		thread__zput(tidq->prev_packet_thread);
943 		zfree(&tidq->event_buf);
944 		zfree(&tidq->last_branch);
945 		zfree(&tidq->last_branch_rb);
946 		zfree(&tidq->prev_packet);
947 		zfree(&tidq->packet);
948 		zfree(&tidq);
949 
950 		/*
951 		 * Function intlist__remove() removes the inode from the list
952 		 * and delete the memory associated to it.
953 		 */
954 		intlist__remove(traceid_queues_list, inode);
955 	}
956 
957 	/* Then the RB tree itself */
958 	intlist__delete(traceid_queues_list);
959 	etmq->traceid_queues_list = NULL;
960 
961 	/* finally free the traceid_queues array */
962 	zfree(&etmq->traceid_queues);
963 }
964 
cs_etm__free_queue(void * priv)965 static void cs_etm__free_queue(void *priv)
966 {
967 	struct int_node *inode, *tmp;
968 	struct cs_etm_queue *etmq = priv;
969 
970 	if (!etmq)
971 		return;
972 
973 	cs_etm_decoder__free(etmq->decoder);
974 	cs_etm__free_traceid_queues(etmq);
975 
976 	if (etmq->own_traceid_list) {
977 		/* First remove all traceID/metadata nodes for the RB tree */
978 		intlist__for_each_entry_safe(inode, tmp, etmq->own_traceid_list)
979 			intlist__remove(etmq->own_traceid_list, inode);
980 
981 		/* Then the RB tree itself */
982 		intlist__delete(etmq->own_traceid_list);
983 	}
984 
985 	free(etmq);
986 }
987 
cs_etm__free_events(struct perf_session * session)988 static void cs_etm__free_events(struct perf_session *session)
989 {
990 	unsigned int i;
991 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
992 						   struct cs_etm_auxtrace,
993 						   auxtrace);
994 	struct auxtrace_queues *queues = &aux->queues;
995 
996 	for (i = 0; i < queues->nr_queues; i++) {
997 		cs_etm__free_queue(queues->queue_array[i].priv);
998 		queues->queue_array[i].priv = NULL;
999 	}
1000 
1001 	auxtrace_queues__free(queues);
1002 }
1003 
cs_etm__free(struct perf_session * session)1004 static void cs_etm__free(struct perf_session *session)
1005 {
1006 	int i;
1007 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
1008 						   struct cs_etm_auxtrace,
1009 						   auxtrace);
1010 	cs_etm__free_events(session);
1011 	session->auxtrace = NULL;
1012 
1013 	for (i = 0; i < aux->num_cpu; i++)
1014 		zfree(&aux->metadata[i]);
1015 
1016 	zfree(&aux->metadata);
1017 	zfree(&aux);
1018 }
1019 
cs_etm__evsel_is_auxtrace(struct perf_session * session,struct evsel * evsel)1020 static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
1021 				      struct evsel *evsel)
1022 {
1023 	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
1024 						   struct cs_etm_auxtrace,
1025 						   auxtrace);
1026 
1027 	return evsel->core.attr.type == aux->pmu_type;
1028 }
1029 
cs_etm__get_machine(struct cs_etm_queue * etmq,ocsd_ex_level el)1030 static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq,
1031 					   ocsd_ex_level el)
1032 {
1033 	enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq);
1034 
1035 	/*
1036 	 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels
1037 	 * running at EL1 assume everything is the host.
1038 	 */
1039 	if (pid_fmt == CS_ETM_PIDFMT_CTXTID)
1040 		return &etmq->etm->session->machines.host;
1041 
1042 	/*
1043 	 * Not perfect, but otherwise assume anything in EL1 is the default
1044 	 * guest, and everything else is the host. Distinguishing between guest
1045 	 * and host userspaces isn't currently supported either. Neither is
1046 	 * multiple guest support. All this does is reduce the likeliness of
1047 	 * decode errors where we look into the host kernel maps when it should
1048 	 * have been the guest maps.
1049 	 */
1050 	switch (el) {
1051 	case ocsd_EL1:
1052 		return machines__find_guest(&etmq->etm->session->machines,
1053 					    DEFAULT_GUEST_KERNEL_ID);
1054 	case ocsd_EL3:
1055 	case ocsd_EL2:
1056 	case ocsd_EL0:
1057 	case ocsd_EL_unknown:
1058 	default:
1059 		return &etmq->etm->session->machines.host;
1060 	}
1061 }
1062 
cs_etm__cpu_mode(struct cs_etm_queue * etmq,u64 address,ocsd_ex_level el)1063 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address,
1064 			   ocsd_ex_level el)
1065 {
1066 	struct machine *machine = cs_etm__get_machine(etmq, el);
1067 
1068 	if (address >= machine__kernel_start(machine)) {
1069 		if (machine__is_host(machine))
1070 			return PERF_RECORD_MISC_KERNEL;
1071 		else
1072 			return PERF_RECORD_MISC_GUEST_KERNEL;
1073 	} else {
1074 		if (machine__is_host(machine))
1075 			return PERF_RECORD_MISC_USER;
1076 		else {
1077 			/*
1078 			 * Can't really happen at the moment because
1079 			 * cs_etm__get_machine() will always return
1080 			 * machines.host for any non EL1 trace.
1081 			 */
1082 			return PERF_RECORD_MISC_GUEST_USER;
1083 		}
1084 	}
1085 }
1086 
cs_etm__mem_access(struct cs_etm_queue * etmq,u8 trace_chan_id,u64 address,size_t size,u8 * buffer,const ocsd_mem_space_acc_t mem_space)1087 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
1088 			      u64 address, size_t size, u8 *buffer,
1089 			      const ocsd_mem_space_acc_t mem_space)
1090 {
1091 	u8  cpumode;
1092 	u64 offset;
1093 	int len;
1094 	struct addr_location al;
1095 	struct dso *dso;
1096 	struct cs_etm_traceid_queue *tidq;
1097 	int ret = 0;
1098 
1099 	if (!etmq)
1100 		return 0;
1101 
1102 	addr_location__init(&al);
1103 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1104 	if (!tidq)
1105 		goto out;
1106 
1107 	/*
1108 	 * We've already tracked EL along side the PID in cs_etm__set_thread()
1109 	 * so double check that it matches what OpenCSD thinks as well. It
1110 	 * doesn't distinguish between EL0 and EL1 for this mem access callback
1111 	 * so we had to do the extra tracking. Skip validation if it's any of
1112 	 * the 'any' values.
1113 	 */
1114 	if (!(mem_space == OCSD_MEM_SPACE_ANY ||
1115 	      mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) {
1116 		if (mem_space & OCSD_MEM_SPACE_EL1N) {
1117 			/* Includes both non secure EL1 and EL0 */
1118 			assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0);
1119 		} else if (mem_space & OCSD_MEM_SPACE_EL2)
1120 			assert(tidq->el == ocsd_EL2);
1121 		else if (mem_space & OCSD_MEM_SPACE_EL3)
1122 			assert(tidq->el == ocsd_EL3);
1123 	}
1124 
1125 	cpumode = cs_etm__cpu_mode(etmq, address, tidq->el);
1126 
1127 	if (!thread__find_map(tidq->thread, cpumode, address, &al))
1128 		goto out;
1129 
1130 	dso = map__dso(al.map);
1131 	if (!dso)
1132 		goto out;
1133 
1134 	if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR &&
1135 	    dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE))
1136 		goto out;
1137 
1138 	offset = map__map_ip(al.map, address);
1139 
1140 	map__load(al.map);
1141 
1142 	len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)),
1143 				    offset, buffer, size);
1144 
1145 	if (len <= 0) {
1146 		ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
1147 				 "              Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
1148 		if (!dso__auxtrace_warned(dso)) {
1149 			pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
1150 				address,
1151 				dso__long_name(dso) ? dso__long_name(dso) : "Unknown");
1152 			dso__set_auxtrace_warned(dso);
1153 		}
1154 		goto out;
1155 	}
1156 	ret = len;
1157 out:
1158 	addr_location__exit(&al);
1159 	return ret;
1160 }
1161 
cs_etm__alloc_queue(void)1162 static struct cs_etm_queue *cs_etm__alloc_queue(void)
1163 {
1164 	struct cs_etm_queue *etmq = zalloc(sizeof(*etmq));
1165 	if (!etmq)
1166 		return NULL;
1167 
1168 	etmq->traceid_queues_list = intlist__new(NULL);
1169 	if (!etmq->traceid_queues_list)
1170 		goto out_free;
1171 
1172 	/*
1173 	 * Create an RB tree for traceID-metadata tuple.  Since the conversion
1174 	 * has to be made for each packet that gets decoded, optimizing access
1175 	 * in anything other than a sequential array is worth doing.
1176 	 */
1177 	etmq->traceid_list = etmq->own_traceid_list = intlist__new(NULL);
1178 	if (!etmq->traceid_list)
1179 		goto out_free;
1180 
1181 	return etmq;
1182 
1183 out_free:
1184 	intlist__delete(etmq->traceid_queues_list);
1185 	free(etmq);
1186 
1187 	return NULL;
1188 }
1189 
cs_etm__setup_queue(struct cs_etm_auxtrace * etm,struct auxtrace_queue * queue,unsigned int queue_nr)1190 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
1191 			       struct auxtrace_queue *queue,
1192 			       unsigned int queue_nr)
1193 {
1194 	struct cs_etm_queue *etmq = queue->priv;
1195 
1196 	if (etmq)
1197 		return 0;
1198 
1199 	etmq = cs_etm__alloc_queue();
1200 
1201 	if (!etmq)
1202 		return -ENOMEM;
1203 
1204 	queue->priv = etmq;
1205 	etmq->etm = etm;
1206 	etmq->queue_nr = queue_nr;
1207 	queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */
1208 	etmq->offset = 0;
1209 	etmq->sink_id = SINK_UNSET;
1210 
1211 	return 0;
1212 }
1213 
cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace * etm,struct cs_etm_queue * etmq,unsigned int queue_nr)1214 static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
1215 					    struct cs_etm_queue *etmq,
1216 					    unsigned int queue_nr)
1217 {
1218 	int ret = 0;
1219 	unsigned int cs_queue_nr;
1220 	u8 trace_chan_id;
1221 	u64 cs_timestamp;
1222 
1223 	/*
1224 	 * We are under a CPU-wide trace scenario.  As such we need to know
1225 	 * when the code that generated the traces started to execute so that
1226 	 * it can be correlated with execution on other CPUs.  So we get a
1227 	 * handle on the beginning of traces and decode until we find a
1228 	 * timestamp.  The timestamp is then added to the auxtrace min heap
1229 	 * in order to know what nibble (of all the etmqs) to decode first.
1230 	 */
1231 	while (1) {
1232 		/*
1233 		 * Fetch an aux_buffer from this etmq.  Bail if no more
1234 		 * blocks or an error has been encountered.
1235 		 */
1236 		ret = cs_etm__get_data_block(etmq);
1237 		if (ret <= 0)
1238 			goto out;
1239 
1240 		/*
1241 		 * Run decoder on the trace block.  The decoder will stop when
1242 		 * encountering a CS timestamp, a full packet queue or the end of
1243 		 * trace for that block.
1244 		 */
1245 		ret = cs_etm__decode_data_block(etmq);
1246 		if (ret)
1247 			goto out;
1248 
1249 		/*
1250 		 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
1251 		 * the timestamp calculation for us.
1252 		 */
1253 		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
1254 
1255 		/* We found a timestamp, no need to continue. */
1256 		if (cs_timestamp)
1257 			break;
1258 
1259 		/*
1260 		 * We didn't find a timestamp so empty all the traceid packet
1261 		 * queues before looking for another timestamp packet, either
1262 		 * in the current data block or a new one.  Packets that were
1263 		 * just decoded are useless since no timestamp has been
1264 		 * associated with them.  As such simply discard them.
1265 		 */
1266 		cs_etm__clear_all_packet_queues(etmq);
1267 	}
1268 
1269 	/*
1270 	 * We have a timestamp.  Add it to the min heap to reflect when
1271 	 * instructions conveyed by the range packets of this traceID queue
1272 	 * started to execute.  Once the same has been done for all the traceID
1273 	 * queues of each etmq, redenring and decoding can start in
1274 	 * chronological order.
1275 	 *
1276 	 * Note that packets decoded above are still in the traceID's packet
1277 	 * queue and will be processed in cs_etm__process_timestamped_queues().
1278 	 */
1279 	cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
1280 	ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
1281 out:
1282 	return ret;
1283 }
1284 
1285 static inline
cs_etm__copy_last_branch_rb(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1286 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
1287 				 struct cs_etm_traceid_queue *tidq)
1288 {
1289 	struct branch_stack *bs_src = tidq->last_branch_rb;
1290 	struct branch_stack *bs_dst = tidq->last_branch;
1291 	size_t nr = 0;
1292 
1293 	/*
1294 	 * Set the number of records before early exit: ->nr is used to
1295 	 * determine how many branches to copy from ->entries.
1296 	 */
1297 	bs_dst->nr = bs_src->nr;
1298 
1299 	/*
1300 	 * Early exit when there is nothing to copy.
1301 	 */
1302 	if (!bs_src->nr)
1303 		return;
1304 
1305 	/*
1306 	 * As bs_src->entries is a circular buffer, we need to copy from it in
1307 	 * two steps.  First, copy the branches from the most recently inserted
1308 	 * branch ->last_branch_pos until the end of bs_src->entries buffer.
1309 	 */
1310 	nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
1311 	memcpy(&bs_dst->entries[0],
1312 	       &bs_src->entries[tidq->last_branch_pos],
1313 	       sizeof(struct branch_entry) * nr);
1314 
1315 	/*
1316 	 * If we wrapped around at least once, the branches from the beginning
1317 	 * of the bs_src->entries buffer and until the ->last_branch_pos element
1318 	 * are older valid branches: copy them over.  The total number of
1319 	 * branches copied over will be equal to the number of branches asked by
1320 	 * the user in last_branch_sz.
1321 	 */
1322 	if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
1323 		memcpy(&bs_dst->entries[nr],
1324 		       &bs_src->entries[0],
1325 		       sizeof(struct branch_entry) * tidq->last_branch_pos);
1326 	}
1327 }
1328 
1329 static inline
cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue * tidq)1330 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
1331 {
1332 	tidq->last_branch_pos = 0;
1333 	tidq->last_branch_rb->nr = 0;
1334 }
1335 
cs_etm__t32_instr_size(struct cs_etm_queue * etmq,u8 trace_chan_id,u64 addr)1336 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
1337 					 u8 trace_chan_id, u64 addr)
1338 {
1339 	u8 instrBytes[2];
1340 
1341 	cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes),
1342 			   instrBytes, 0);
1343 	/*
1344 	 * T32 instruction size is indicated by bits[15:11] of the first
1345 	 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
1346 	 * denote a 32-bit instruction.
1347 	 */
1348 	return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
1349 }
1350 
cs_etm__first_executed_instr(struct cs_etm_packet * packet)1351 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
1352 {
1353 	/*
1354 	 * Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't
1355 	 * appear in samples.
1356 	 */
1357 	if (packet->sample_type == CS_ETM_DISCONTINUITY ||
1358 	    packet->sample_type == CS_ETM_EXCEPTION)
1359 		return 0;
1360 
1361 	return packet->start_addr;
1362 }
1363 
1364 static inline
cs_etm__last_executed_instr(const struct cs_etm_packet * packet)1365 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
1366 {
1367 	/* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1368 	if (packet->sample_type == CS_ETM_DISCONTINUITY)
1369 		return 0;
1370 
1371 	return packet->end_addr - packet->last_instr_size;
1372 }
1373 
cs_etm__instr_addr(struct cs_etm_queue * etmq,u64 trace_chan_id,const struct cs_etm_packet * packet,u64 offset)1374 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
1375 				     u64 trace_chan_id,
1376 				     const struct cs_etm_packet *packet,
1377 				     u64 offset)
1378 {
1379 	if (packet->isa == CS_ETM_ISA_T32) {
1380 		u64 addr = packet->start_addr;
1381 
1382 		while (offset) {
1383 			addr += cs_etm__t32_instr_size(etmq,
1384 						       trace_chan_id, addr);
1385 			offset--;
1386 		}
1387 		return addr;
1388 	}
1389 
1390 	/* Assume a 4 byte instruction size (A32/A64) */
1391 	return packet->start_addr + offset * 4;
1392 }
1393 
cs_etm__update_last_branch_rb(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1394 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
1395 					  struct cs_etm_traceid_queue *tidq)
1396 {
1397 	struct branch_stack *bs = tidq->last_branch_rb;
1398 	struct branch_entry *be;
1399 
1400 	/*
1401 	 * The branches are recorded in a circular buffer in reverse
1402 	 * chronological order: we start recording from the last element of the
1403 	 * buffer down.  After writing the first element of the stack, move the
1404 	 * insert position back to the end of the buffer.
1405 	 */
1406 	if (!tidq->last_branch_pos)
1407 		tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
1408 
1409 	tidq->last_branch_pos -= 1;
1410 
1411 	be       = &bs->entries[tidq->last_branch_pos];
1412 	be->from = cs_etm__last_executed_instr(tidq->prev_packet);
1413 	be->to	 = cs_etm__first_executed_instr(tidq->packet);
1414 	/* No support for mispredict */
1415 	be->flags.mispred = 0;
1416 	be->flags.predicted = 1;
1417 
1418 	/*
1419 	 * Increment bs->nr until reaching the number of last branches asked by
1420 	 * the user on the command line.
1421 	 */
1422 	if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
1423 		bs->nr += 1;
1424 }
1425 
cs_etm__inject_event(union perf_event * event,struct perf_sample * sample,u64 type)1426 static int cs_etm__inject_event(union perf_event *event,
1427 			       struct perf_sample *sample, u64 type)
1428 {
1429 	event->header.size = perf_event__sample_event_size(sample, type, 0);
1430 	return perf_event__synthesize_sample(event, type, 0, sample);
1431 }
1432 
1433 
1434 static int
cs_etm__get_trace(struct cs_etm_queue * etmq)1435 cs_etm__get_trace(struct cs_etm_queue *etmq)
1436 {
1437 	struct auxtrace_buffer *aux_buffer = etmq->buffer;
1438 	struct auxtrace_buffer *old_buffer = aux_buffer;
1439 	struct auxtrace_queue *queue;
1440 
1441 	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
1442 
1443 	aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
1444 
1445 	/* If no more data, drop the previous auxtrace_buffer and return */
1446 	if (!aux_buffer) {
1447 		if (old_buffer)
1448 			auxtrace_buffer__drop_data(old_buffer);
1449 		etmq->buf_len = 0;
1450 		return 0;
1451 	}
1452 
1453 	etmq->buffer = aux_buffer;
1454 
1455 	/* If the aux_buffer doesn't have data associated, try to load it */
1456 	if (!aux_buffer->data) {
1457 		/* get the file desc associated with the perf data file */
1458 		int fd = perf_data__fd(etmq->etm->session->data);
1459 
1460 		aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
1461 		if (!aux_buffer->data)
1462 			return -ENOMEM;
1463 	}
1464 
1465 	/* If valid, drop the previous buffer */
1466 	if (old_buffer)
1467 		auxtrace_buffer__drop_data(old_buffer);
1468 
1469 	etmq->buf_used = 0;
1470 	etmq->buf_len = aux_buffer->size;
1471 	etmq->buf = aux_buffer->data;
1472 
1473 	return etmq->buf_len;
1474 }
1475 
cs_etm__set_thread(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,pid_t tid,ocsd_ex_level el)1476 static void cs_etm__set_thread(struct cs_etm_queue *etmq,
1477 			       struct cs_etm_traceid_queue *tidq, pid_t tid,
1478 			       ocsd_ex_level el)
1479 {
1480 	struct machine *machine = cs_etm__get_machine(etmq, el);
1481 
1482 	if (tid != -1) {
1483 		thread__zput(tidq->thread);
1484 		tidq->thread = machine__find_thread(machine, -1, tid);
1485 	}
1486 
1487 	/* Couldn't find a known thread */
1488 	if (!tidq->thread)
1489 		tidq->thread = machine__idle_thread(machine);
1490 
1491 	tidq->el = el;
1492 }
1493 
cs_etm__etmq_set_tid_el(struct cs_etm_queue * etmq,pid_t tid,u8 trace_chan_id,ocsd_ex_level el)1494 int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
1495 			    u8 trace_chan_id, ocsd_ex_level el)
1496 {
1497 	struct cs_etm_traceid_queue *tidq;
1498 
1499 	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1500 	if (!tidq)
1501 		return -EINVAL;
1502 
1503 	cs_etm__set_thread(etmq, tidq, tid, el);
1504 	return 0;
1505 }
1506 
cs_etm__etmq_is_timeless(struct cs_etm_queue * etmq)1507 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
1508 {
1509 	return !!etmq->etm->timeless_decoding;
1510 }
1511 
cs_etm__copy_insn(struct cs_etm_queue * etmq,u64 trace_chan_id,const struct cs_etm_packet * packet,struct perf_sample * sample)1512 static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
1513 			      u64 trace_chan_id,
1514 			      const struct cs_etm_packet *packet,
1515 			      struct perf_sample *sample)
1516 {
1517 	/*
1518 	 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY
1519 	 * packet, so directly bail out with 'insn_len' = 0.
1520 	 */
1521 	if (packet->sample_type == CS_ETM_DISCONTINUITY) {
1522 		sample->insn_len = 0;
1523 		return;
1524 	}
1525 
1526 	/*
1527 	 * T32 instruction size might be 32-bit or 16-bit, decide by calling
1528 	 * cs_etm__t32_instr_size().
1529 	 */
1530 	if (packet->isa == CS_ETM_ISA_T32)
1531 		sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id,
1532 							  sample->ip);
1533 	/* Otherwise, A64 and A32 instruction size are always 32-bit. */
1534 	else
1535 		sample->insn_len = 4;
1536 
1537 	cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len,
1538 			   (void *)sample->insn, 0);
1539 }
1540 
cs_etm__convert_sample_time(struct cs_etm_queue * etmq,u64 cs_timestamp)1541 u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
1542 {
1543 	struct cs_etm_auxtrace *etm = etmq->etm;
1544 
1545 	if (etm->has_virtual_ts)
1546 		return tsc_to_perf_time(cs_timestamp, &etm->tc);
1547 	else
1548 		return cs_timestamp;
1549 }
1550 
cs_etm__resolve_sample_time(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1551 static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
1552 					       struct cs_etm_traceid_queue *tidq)
1553 {
1554 	struct cs_etm_auxtrace *etm = etmq->etm;
1555 	struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
1556 
1557 	if (!etm->timeless_decoding && etm->has_virtual_ts)
1558 		return packet_queue->cs_timestamp;
1559 	else
1560 		return etm->latest_kernel_timestamp;
1561 }
1562 
cs_etm__synth_instruction_sample(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,u64 addr,u64 period)1563 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
1564 					    struct cs_etm_traceid_queue *tidq,
1565 					    u64 addr, u64 period)
1566 {
1567 	int ret = 0;
1568 	struct cs_etm_auxtrace *etm = etmq->etm;
1569 	union perf_event *event = tidq->event_buf;
1570 	struct perf_sample sample;
1571 
1572 	perf_sample__init(&sample, /*all=*/true);
1573 	event->sample.header.type = PERF_RECORD_SAMPLE;
1574 	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
1575 	event->sample.header.size = sizeof(struct perf_event_header);
1576 
1577 	/* Set time field based on etm auxtrace config. */
1578 	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1579 
1580 	sample.ip = addr;
1581 	sample.pid = thread__pid(tidq->thread);
1582 	sample.tid = thread__tid(tidq->thread);
1583 	sample.id = etmq->etm->instructions_id;
1584 	sample.stream_id = etmq->etm->instructions_id;
1585 	sample.period = period;
1586 	sample.cpu = tidq->packet->cpu;
1587 	sample.flags = tidq->prev_packet->flags;
1588 	sample.cpumode = event->sample.header.misc;
1589 
1590 	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
1591 
1592 	if (etm->synth_opts.last_branch)
1593 		sample.branch_stack = tidq->last_branch;
1594 
1595 	if (etm->synth_opts.inject) {
1596 		ret = cs_etm__inject_event(event, &sample,
1597 					   etm->instructions_sample_type);
1598 		if (ret)
1599 			return ret;
1600 	}
1601 
1602 	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1603 
1604 	if (ret)
1605 		pr_err(
1606 			"CS ETM Trace: failed to deliver instruction event, error %d\n",
1607 			ret);
1608 
1609 	perf_sample__exit(&sample);
1610 	return ret;
1611 }
1612 
1613 /*
1614  * The cs etm packet encodes an instruction range between a branch target
1615  * and the next taken branch. Generate sample accordingly.
1616  */
cs_etm__synth_branch_sample(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1617 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
1618 				       struct cs_etm_traceid_queue *tidq)
1619 {
1620 	int ret = 0;
1621 	struct cs_etm_auxtrace *etm = etmq->etm;
1622 	struct perf_sample sample = {.ip = 0,};
1623 	union perf_event *event = tidq->event_buf;
1624 	struct dummy_branch_stack {
1625 		u64			nr;
1626 		u64			hw_idx;
1627 		struct branch_entry	entries;
1628 	} dummy_bs;
1629 	u64 ip;
1630 
1631 	ip = cs_etm__last_executed_instr(tidq->prev_packet);
1632 
1633 	event->sample.header.type = PERF_RECORD_SAMPLE;
1634 	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip,
1635 						     tidq->prev_packet_el);
1636 	event->sample.header.size = sizeof(struct perf_event_header);
1637 
1638 	/* Set time field based on etm auxtrace config. */
1639 	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1640 
1641 	sample.ip = ip;
1642 	sample.pid = thread__pid(tidq->prev_packet_thread);
1643 	sample.tid = thread__tid(tidq->prev_packet_thread);
1644 	sample.addr = cs_etm__first_executed_instr(tidq->packet);
1645 	sample.id = etmq->etm->branches_id;
1646 	sample.stream_id = etmq->etm->branches_id;
1647 	sample.period = 1;
1648 	sample.cpu = tidq->packet->cpu;
1649 	sample.flags = tidq->prev_packet->flags;
1650 	sample.cpumode = event->sample.header.misc;
1651 
1652 	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet,
1653 			  &sample);
1654 
1655 	/*
1656 	 * perf report cannot handle events without a branch stack
1657 	 */
1658 	if (etm->synth_opts.last_branch) {
1659 		dummy_bs = (struct dummy_branch_stack){
1660 			.nr = 1,
1661 			.hw_idx = -1ULL,
1662 			.entries = {
1663 				.from = sample.ip,
1664 				.to = sample.addr,
1665 			},
1666 		};
1667 		sample.branch_stack = (struct branch_stack *)&dummy_bs;
1668 	}
1669 
1670 	if (etm->synth_opts.inject) {
1671 		ret = cs_etm__inject_event(event, &sample,
1672 					   etm->branches_sample_type);
1673 		if (ret)
1674 			return ret;
1675 	}
1676 
1677 	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1678 
1679 	if (ret)
1680 		pr_err(
1681 		"CS ETM Trace: failed to deliver instruction event, error %d\n",
1682 		ret);
1683 
1684 	return ret;
1685 }
1686 
cs_etm__synth_events(struct cs_etm_auxtrace * etm,struct perf_session * session)1687 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
1688 				struct perf_session *session)
1689 {
1690 	struct evlist *evlist = session->evlist;
1691 	struct evsel *evsel;
1692 	struct perf_event_attr attr;
1693 	bool found = false;
1694 	u64 id;
1695 	int err;
1696 
1697 	evlist__for_each_entry(evlist, evsel) {
1698 		if (evsel->core.attr.type == etm->pmu_type) {
1699 			found = true;
1700 			break;
1701 		}
1702 	}
1703 
1704 	if (!found) {
1705 		pr_debug("No selected events with CoreSight Trace data\n");
1706 		return 0;
1707 	}
1708 
1709 	memset(&attr, 0, sizeof(struct perf_event_attr));
1710 	attr.size = sizeof(struct perf_event_attr);
1711 	attr.type = PERF_TYPE_HARDWARE;
1712 	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
1713 	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1714 			    PERF_SAMPLE_PERIOD;
1715 	if (etm->timeless_decoding)
1716 		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1717 	else
1718 		attr.sample_type |= PERF_SAMPLE_TIME;
1719 
1720 	attr.exclude_user = evsel->core.attr.exclude_user;
1721 	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1722 	attr.exclude_hv = evsel->core.attr.exclude_hv;
1723 	attr.exclude_host = evsel->core.attr.exclude_host;
1724 	attr.exclude_guest = evsel->core.attr.exclude_guest;
1725 	attr.sample_id_all = evsel->core.attr.sample_id_all;
1726 	attr.read_format = evsel->core.attr.read_format;
1727 
1728 	/* create new id val to be a fixed offset from evsel id */
1729 	id = auxtrace_synth_id_range_start(evsel);
1730 
1731 	if (etm->synth_opts.branches) {
1732 		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1733 		attr.sample_period = 1;
1734 		attr.sample_type |= PERF_SAMPLE_ADDR;
1735 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1736 		if (err)
1737 			return err;
1738 		etm->branches_sample_type = attr.sample_type;
1739 		etm->branches_id = id;
1740 		id += 1;
1741 		attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
1742 	}
1743 
1744 	if (etm->synth_opts.last_branch) {
1745 		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1746 		/*
1747 		 * We don't use the hardware index, but the sample generation
1748 		 * code uses the new format branch_stack with this field,
1749 		 * so the event attributes must indicate that it's present.
1750 		 */
1751 		attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
1752 	}
1753 
1754 	if (etm->synth_opts.instructions) {
1755 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1756 		attr.sample_period = etm->synth_opts.period;
1757 		etm->instructions_sample_period = attr.sample_period;
1758 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1759 		if (err)
1760 			return err;
1761 		etm->instructions_sample_type = attr.sample_type;
1762 		etm->instructions_id = id;
1763 		id += 1;
1764 	}
1765 
1766 	return 0;
1767 }
1768 
cs_etm__sample(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1769 static int cs_etm__sample(struct cs_etm_queue *etmq,
1770 			  struct cs_etm_traceid_queue *tidq)
1771 {
1772 	struct cs_etm_auxtrace *etm = etmq->etm;
1773 	int ret;
1774 	u8 trace_chan_id = tidq->trace_chan_id;
1775 	u64 instrs_prev;
1776 
1777 	/* Get instructions remainder from previous packet */
1778 	instrs_prev = tidq->period_instructions;
1779 
1780 	tidq->period_instructions += tidq->packet->instr_count;
1781 
1782 	/*
1783 	 * Record a branch when the last instruction in
1784 	 * PREV_PACKET is a branch.
1785 	 */
1786 	if (etm->synth_opts.last_branch &&
1787 	    tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1788 	    tidq->prev_packet->last_instr_taken_branch)
1789 		cs_etm__update_last_branch_rb(etmq, tidq);
1790 
1791 	if (etm->synth_opts.instructions &&
1792 	    tidq->period_instructions >= etm->instructions_sample_period) {
1793 		/*
1794 		 * Emit instruction sample periodically
1795 		 * TODO: allow period to be defined in cycles and clock time
1796 		 */
1797 
1798 		/*
1799 		 * Below diagram demonstrates the instruction samples
1800 		 * generation flows:
1801 		 *
1802 		 *    Instrs     Instrs       Instrs       Instrs
1803 		 *   Sample(n)  Sample(n+1)  Sample(n+2)  Sample(n+3)
1804 		 *    |            |            |            |
1805 		 *    V            V            V            V
1806 		 *   --------------------------------------------------
1807 		 *            ^                                  ^
1808 		 *            |                                  |
1809 		 *         Period                             Period
1810 		 *    instructions(Pi)                   instructions(Pi')
1811 		 *
1812 		 *            |                                  |
1813 		 *            \---------------- -----------------/
1814 		 *                             V
1815 		 *                 tidq->packet->instr_count
1816 		 *
1817 		 * Instrs Sample(n...) are the synthesised samples occurring
1818 		 * every etm->instructions_sample_period instructions - as
1819 		 * defined on the perf command line.  Sample(n) is being the
1820 		 * last sample before the current etm packet, n+1 to n+3
1821 		 * samples are generated from the current etm packet.
1822 		 *
1823 		 * tidq->packet->instr_count represents the number of
1824 		 * instructions in the current etm packet.
1825 		 *
1826 		 * Period instructions (Pi) contains the number of
1827 		 * instructions executed after the sample point(n) from the
1828 		 * previous etm packet.  This will always be less than
1829 		 * etm->instructions_sample_period.
1830 		 *
1831 		 * When generate new samples, it combines with two parts
1832 		 * instructions, one is the tail of the old packet and another
1833 		 * is the head of the new coming packet, to generate
1834 		 * sample(n+1); sample(n+2) and sample(n+3) consume the
1835 		 * instructions with sample period.  After sample(n+3), the rest
1836 		 * instructions will be used by later packet and it is assigned
1837 		 * to tidq->period_instructions for next round calculation.
1838 		 */
1839 
1840 		/*
1841 		 * Get the initial offset into the current packet instructions;
1842 		 * entry conditions ensure that instrs_prev is less than
1843 		 * etm->instructions_sample_period.
1844 		 */
1845 		u64 offset = etm->instructions_sample_period - instrs_prev;
1846 		u64 addr;
1847 
1848 		/* Prepare last branches for instruction sample */
1849 		if (etm->synth_opts.last_branch)
1850 			cs_etm__copy_last_branch_rb(etmq, tidq);
1851 
1852 		while (tidq->period_instructions >=
1853 				etm->instructions_sample_period) {
1854 			/*
1855 			 * Calculate the address of the sampled instruction (-1
1856 			 * as sample is reported as though instruction has just
1857 			 * been executed, but PC has not advanced to next
1858 			 * instruction)
1859 			 */
1860 			addr = cs_etm__instr_addr(etmq, trace_chan_id,
1861 						  tidq->packet, offset - 1);
1862 			ret = cs_etm__synth_instruction_sample(
1863 				etmq, tidq, addr,
1864 				etm->instructions_sample_period);
1865 			if (ret)
1866 				return ret;
1867 
1868 			offset += etm->instructions_sample_period;
1869 			tidq->period_instructions -=
1870 				etm->instructions_sample_period;
1871 		}
1872 	}
1873 
1874 	if (etm->synth_opts.branches) {
1875 		bool generate_sample = false;
1876 
1877 		/* Generate sample for tracing on packet */
1878 		if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1879 			generate_sample = true;
1880 
1881 		/* Generate sample for branch taken packet */
1882 		if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1883 		    tidq->prev_packet->last_instr_taken_branch)
1884 			generate_sample = true;
1885 
1886 		if (generate_sample) {
1887 			ret = cs_etm__synth_branch_sample(etmq, tidq);
1888 			if (ret)
1889 				return ret;
1890 		}
1891 	}
1892 
1893 	cs_etm__packet_swap(etm, tidq);
1894 
1895 	return 0;
1896 }
1897 
cs_etm__exception(struct cs_etm_traceid_queue * tidq)1898 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
1899 {
1900 	/*
1901 	 * When the exception packet is inserted, whether the last instruction
1902 	 * in previous range packet is taken branch or not, we need to force
1903 	 * to set 'prev_packet->last_instr_taken_branch' to true.  This ensures
1904 	 * to generate branch sample for the instruction range before the
1905 	 * exception is trapped to kernel or before the exception returning.
1906 	 *
1907 	 * The exception packet includes the dummy address values, so don't
1908 	 * swap PACKET with PREV_PACKET.  This keeps PREV_PACKET to be useful
1909 	 * for generating instruction and branch samples.
1910 	 */
1911 	if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
1912 		tidq->prev_packet->last_instr_taken_branch = true;
1913 
1914 	return 0;
1915 }
1916 
cs_etm__flush(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1917 static int cs_etm__flush(struct cs_etm_queue *etmq,
1918 			 struct cs_etm_traceid_queue *tidq)
1919 {
1920 	int err = 0;
1921 	struct cs_etm_auxtrace *etm = etmq->etm;
1922 
1923 	/* Handle start tracing packet */
1924 	if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
1925 		goto swap_packet;
1926 
1927 	if (etmq->etm->synth_opts.last_branch &&
1928 	    etmq->etm->synth_opts.instructions &&
1929 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1930 		u64 addr;
1931 
1932 		/* Prepare last branches for instruction sample */
1933 		cs_etm__copy_last_branch_rb(etmq, tidq);
1934 
1935 		/*
1936 		 * Generate a last branch event for the branches left in the
1937 		 * circular buffer at the end of the trace.
1938 		 *
1939 		 * Use the address of the end of the last reported execution
1940 		 * range
1941 		 */
1942 		addr = cs_etm__last_executed_instr(tidq->prev_packet);
1943 
1944 		err = cs_etm__synth_instruction_sample(
1945 			etmq, tidq, addr,
1946 			tidq->period_instructions);
1947 		if (err)
1948 			return err;
1949 
1950 		tidq->period_instructions = 0;
1951 
1952 	}
1953 
1954 	if (etm->synth_opts.branches &&
1955 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1956 		err = cs_etm__synth_branch_sample(etmq, tidq);
1957 		if (err)
1958 			return err;
1959 	}
1960 
1961 swap_packet:
1962 	cs_etm__packet_swap(etm, tidq);
1963 
1964 	/* Reset last branches after flush the trace */
1965 	if (etm->synth_opts.last_branch)
1966 		cs_etm__reset_last_branch_rb(tidq);
1967 
1968 	return err;
1969 }
1970 
cs_etm__end_block(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1971 static int cs_etm__end_block(struct cs_etm_queue *etmq,
1972 			     struct cs_etm_traceid_queue *tidq)
1973 {
1974 	int err;
1975 
1976 	/*
1977 	 * It has no new packet coming and 'etmq->packet' contains the stale
1978 	 * packet which was set at the previous time with packets swapping;
1979 	 * so skip to generate branch sample to avoid stale packet.
1980 	 *
1981 	 * For this case only flush branch stack and generate a last branch
1982 	 * event for the branches left in the circular buffer at the end of
1983 	 * the trace.
1984 	 */
1985 	if (etmq->etm->synth_opts.last_branch &&
1986 	    etmq->etm->synth_opts.instructions &&
1987 	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1988 		u64 addr;
1989 
1990 		/* Prepare last branches for instruction sample */
1991 		cs_etm__copy_last_branch_rb(etmq, tidq);
1992 
1993 		/*
1994 		 * Use the address of the end of the last reported execution
1995 		 * range.
1996 		 */
1997 		addr = cs_etm__last_executed_instr(tidq->prev_packet);
1998 
1999 		err = cs_etm__synth_instruction_sample(
2000 			etmq, tidq, addr,
2001 			tidq->period_instructions);
2002 		if (err)
2003 			return err;
2004 
2005 		tidq->period_instructions = 0;
2006 	}
2007 
2008 	return 0;
2009 }
2010 /*
2011  * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
2012  *			   if need be.
2013  * Returns:	< 0	if error
2014  *		= 0	if no more auxtrace_buffer to read
2015  *		> 0	if the current buffer isn't empty yet
2016  */
cs_etm__get_data_block(struct cs_etm_queue * etmq)2017 static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
2018 {
2019 	int ret;
2020 
2021 	if (!etmq->buf_len) {
2022 		ret = cs_etm__get_trace(etmq);
2023 		if (ret <= 0)
2024 			return ret;
2025 		/*
2026 		 * We cannot assume consecutive blocks in the data file
2027 		 * are contiguous, reset the decoder to force re-sync.
2028 		 */
2029 		ret = cs_etm_decoder__reset(etmq->decoder);
2030 		if (ret)
2031 			return ret;
2032 	}
2033 
2034 	return etmq->buf_len;
2035 }
2036 
cs_etm__is_svc_instr(struct cs_etm_queue * etmq,u8 trace_chan_id,struct cs_etm_packet * packet,u64 end_addr)2037 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
2038 				 struct cs_etm_packet *packet,
2039 				 u64 end_addr)
2040 {
2041 	/* Initialise to keep compiler happy */
2042 	u16 instr16 = 0;
2043 	u32 instr32 = 0;
2044 	u64 addr;
2045 
2046 	switch (packet->isa) {
2047 	case CS_ETM_ISA_T32:
2048 		/*
2049 		 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
2050 		 *
2051 		 *  b'15         b'8
2052 		 * +-----------------+--------+
2053 		 * | 1 1 0 1 1 1 1 1 |  imm8  |
2054 		 * +-----------------+--------+
2055 		 *
2056 		 * According to the specification, it only defines SVC for T32
2057 		 * with 16 bits instruction and has no definition for 32bits;
2058 		 * so below only read 2 bytes as instruction size for T32.
2059 		 */
2060 		addr = end_addr - 2;
2061 		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16),
2062 				   (u8 *)&instr16, 0);
2063 		if ((instr16 & 0xFF00) == 0xDF00)
2064 			return true;
2065 
2066 		break;
2067 	case CS_ETM_ISA_A32:
2068 		/*
2069 		 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
2070 		 *
2071 		 *  b'31 b'28 b'27 b'24
2072 		 * +---------+---------+-------------------------+
2073 		 * |  !1111  | 1 1 1 1 |        imm24            |
2074 		 * +---------+---------+-------------------------+
2075 		 */
2076 		addr = end_addr - 4;
2077 		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2078 				   (u8 *)&instr32, 0);
2079 		if ((instr32 & 0x0F000000) == 0x0F000000 &&
2080 		    (instr32 & 0xF0000000) != 0xF0000000)
2081 			return true;
2082 
2083 		break;
2084 	case CS_ETM_ISA_A64:
2085 		/*
2086 		 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
2087 		 *
2088 		 *  b'31               b'21           b'4     b'0
2089 		 * +-----------------------+---------+-----------+
2090 		 * | 1 1 0 1 0 1 0 0 0 0 0 |  imm16  | 0 0 0 0 1 |
2091 		 * +-----------------------+---------+-----------+
2092 		 */
2093 		addr = end_addr - 4;
2094 		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2095 				   (u8 *)&instr32, 0);
2096 		if ((instr32 & 0xFFE0001F) == 0xd4000001)
2097 			return true;
2098 
2099 		break;
2100 	case CS_ETM_ISA_UNKNOWN:
2101 	default:
2102 		break;
2103 	}
2104 
2105 	return false;
2106 }
2107 
cs_etm__is_syscall(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,u64 magic)2108 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
2109 			       struct cs_etm_traceid_queue *tidq, u64 magic)
2110 {
2111 	u8 trace_chan_id = tidq->trace_chan_id;
2112 	struct cs_etm_packet *packet = tidq->packet;
2113 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2114 
2115 	if (magic == __perf_cs_etmv3_magic)
2116 		if (packet->exception_number == CS_ETMV3_EXC_SVC)
2117 			return true;
2118 
2119 	/*
2120 	 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
2121 	 * HVC cases; need to check if it's SVC instruction based on
2122 	 * packet address.
2123 	 */
2124 	if (magic == __perf_cs_etmv4_magic) {
2125 		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2126 		    cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2127 					 prev_packet->end_addr))
2128 			return true;
2129 	}
2130 
2131 	return false;
2132 }
2133 
cs_etm__is_async_exception(struct cs_etm_traceid_queue * tidq,u64 magic)2134 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
2135 				       u64 magic)
2136 {
2137 	struct cs_etm_packet *packet = tidq->packet;
2138 
2139 	if (magic == __perf_cs_etmv3_magic)
2140 		if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
2141 		    packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
2142 		    packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
2143 		    packet->exception_number == CS_ETMV3_EXC_IRQ ||
2144 		    packet->exception_number == CS_ETMV3_EXC_FIQ)
2145 			return true;
2146 
2147 	if (magic == __perf_cs_etmv4_magic)
2148 		if (packet->exception_number == CS_ETMV4_EXC_RESET ||
2149 		    packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
2150 		    packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
2151 		    packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
2152 		    packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
2153 		    packet->exception_number == CS_ETMV4_EXC_IRQ ||
2154 		    packet->exception_number == CS_ETMV4_EXC_FIQ)
2155 			return true;
2156 
2157 	return false;
2158 }
2159 
cs_etm__is_sync_exception(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,u64 magic)2160 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
2161 				      struct cs_etm_traceid_queue *tidq,
2162 				      u64 magic)
2163 {
2164 	u8 trace_chan_id = tidq->trace_chan_id;
2165 	struct cs_etm_packet *packet = tidq->packet;
2166 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2167 
2168 	if (magic == __perf_cs_etmv3_magic)
2169 		if (packet->exception_number == CS_ETMV3_EXC_SMC ||
2170 		    packet->exception_number == CS_ETMV3_EXC_HYP ||
2171 		    packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
2172 		    packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
2173 		    packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
2174 		    packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
2175 		    packet->exception_number == CS_ETMV3_EXC_GENERIC)
2176 			return true;
2177 
2178 	if (magic == __perf_cs_etmv4_magic) {
2179 		if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
2180 		    packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
2181 		    packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
2182 		    packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
2183 			return true;
2184 
2185 		/*
2186 		 * For CS_ETMV4_EXC_CALL, except SVC other instructions
2187 		 * (SMC, HVC) are taken as sync exceptions.
2188 		 */
2189 		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2190 		    !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2191 					  prev_packet->end_addr))
2192 			return true;
2193 
2194 		/*
2195 		 * ETMv4 has 5 bits for exception number; if the numbers
2196 		 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
2197 		 * they are implementation defined exceptions.
2198 		 *
2199 		 * For this case, simply take it as sync exception.
2200 		 */
2201 		if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
2202 		    packet->exception_number <= CS_ETMV4_EXC_END)
2203 			return true;
2204 	}
2205 
2206 	return false;
2207 }
2208 
cs_etm__set_sample_flags(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)2209 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
2210 				    struct cs_etm_traceid_queue *tidq)
2211 {
2212 	struct cs_etm_packet *packet = tidq->packet;
2213 	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2214 	u8 trace_chan_id = tidq->trace_chan_id;
2215 	u64 magic;
2216 	int ret;
2217 
2218 	switch (packet->sample_type) {
2219 	case CS_ETM_RANGE:
2220 		/*
2221 		 * Immediate branch instruction without neither link nor
2222 		 * return flag, it's normal branch instruction within
2223 		 * the function.
2224 		 */
2225 		if (packet->last_instr_type == OCSD_INSTR_BR &&
2226 		    packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
2227 			packet->flags = PERF_IP_FLAG_BRANCH;
2228 
2229 			if (packet->last_instr_cond)
2230 				packet->flags |= PERF_IP_FLAG_CONDITIONAL;
2231 		}
2232 
2233 		/*
2234 		 * Immediate branch instruction with link (e.g. BL), this is
2235 		 * branch instruction for function call.
2236 		 */
2237 		if (packet->last_instr_type == OCSD_INSTR_BR &&
2238 		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2239 			packet->flags = PERF_IP_FLAG_BRANCH |
2240 					PERF_IP_FLAG_CALL;
2241 
2242 		/*
2243 		 * Indirect branch instruction with link (e.g. BLR), this is
2244 		 * branch instruction for function call.
2245 		 */
2246 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2247 		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2248 			packet->flags = PERF_IP_FLAG_BRANCH |
2249 					PERF_IP_FLAG_CALL;
2250 
2251 		/*
2252 		 * Indirect branch instruction with subtype of
2253 		 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
2254 		 * function return for A32/T32.
2255 		 */
2256 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2257 		    packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
2258 			packet->flags = PERF_IP_FLAG_BRANCH |
2259 					PERF_IP_FLAG_RETURN;
2260 
2261 		/*
2262 		 * Indirect branch instruction without link (e.g. BR), usually
2263 		 * this is used for function return, especially for functions
2264 		 * within dynamic link lib.
2265 		 */
2266 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2267 		    packet->last_instr_subtype == OCSD_S_INSTR_NONE)
2268 			packet->flags = PERF_IP_FLAG_BRANCH |
2269 					PERF_IP_FLAG_RETURN;
2270 
2271 		/* Return instruction for function return. */
2272 		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2273 		    packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
2274 			packet->flags = PERF_IP_FLAG_BRANCH |
2275 					PERF_IP_FLAG_RETURN;
2276 
2277 		/*
2278 		 * Decoder might insert a discontinuity in the middle of
2279 		 * instruction packets, fixup prev_packet with flag
2280 		 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
2281 		 */
2282 		if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
2283 			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2284 					      PERF_IP_FLAG_TRACE_BEGIN;
2285 
2286 		/*
2287 		 * If the previous packet is an exception return packet
2288 		 * and the return address just follows SVC instruction,
2289 		 * it needs to calibrate the previous packet sample flags
2290 		 * as PERF_IP_FLAG_SYSCALLRET.
2291 		 */
2292 		if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
2293 					   PERF_IP_FLAG_RETURN |
2294 					   PERF_IP_FLAG_INTERRUPT) &&
2295 		    cs_etm__is_svc_instr(etmq, trace_chan_id,
2296 					 packet, packet->start_addr))
2297 			prev_packet->flags = PERF_IP_FLAG_BRANCH |
2298 					     PERF_IP_FLAG_RETURN |
2299 					     PERF_IP_FLAG_SYSCALLRET;
2300 		break;
2301 	case CS_ETM_DISCONTINUITY:
2302 		/*
2303 		 * The trace is discontinuous, if the previous packet is
2304 		 * instruction packet, set flag PERF_IP_FLAG_TRACE_END
2305 		 * for previous packet.
2306 		 */
2307 		if (prev_packet->sample_type == CS_ETM_RANGE)
2308 			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2309 					      PERF_IP_FLAG_TRACE_END;
2310 		break;
2311 	case CS_ETM_EXCEPTION:
2312 		ret = cs_etm__get_magic(etmq, packet->trace_chan_id, &magic);
2313 		if (ret)
2314 			return ret;
2315 
2316 		/* The exception is for system call. */
2317 		if (cs_etm__is_syscall(etmq, tidq, magic))
2318 			packet->flags = PERF_IP_FLAG_BRANCH |
2319 					PERF_IP_FLAG_CALL |
2320 					PERF_IP_FLAG_SYSCALLRET;
2321 		/*
2322 		 * The exceptions are triggered by external signals from bus,
2323 		 * interrupt controller, debug module, PE reset or halt.
2324 		 */
2325 		else if (cs_etm__is_async_exception(tidq, magic))
2326 			packet->flags = PERF_IP_FLAG_BRANCH |
2327 					PERF_IP_FLAG_CALL |
2328 					PERF_IP_FLAG_ASYNC |
2329 					PERF_IP_FLAG_INTERRUPT;
2330 		/*
2331 		 * Otherwise, exception is caused by trap, instruction &
2332 		 * data fault, or alignment errors.
2333 		 */
2334 		else if (cs_etm__is_sync_exception(etmq, tidq, magic))
2335 			packet->flags = PERF_IP_FLAG_BRANCH |
2336 					PERF_IP_FLAG_CALL |
2337 					PERF_IP_FLAG_INTERRUPT;
2338 
2339 		/*
2340 		 * When the exception packet is inserted, since exception
2341 		 * packet is not used standalone for generating samples
2342 		 * and it's affiliation to the previous instruction range
2343 		 * packet; so set previous range packet flags to tell perf
2344 		 * it is an exception taken branch.
2345 		 */
2346 		if (prev_packet->sample_type == CS_ETM_RANGE)
2347 			prev_packet->flags = packet->flags;
2348 		break;
2349 	case CS_ETM_EXCEPTION_RET:
2350 		/*
2351 		 * When the exception return packet is inserted, since
2352 		 * exception return packet is not used standalone for
2353 		 * generating samples and it's affiliation to the previous
2354 		 * instruction range packet; so set previous range packet
2355 		 * flags to tell perf it is an exception return branch.
2356 		 *
2357 		 * The exception return can be for either system call or
2358 		 * other exception types; unfortunately the packet doesn't
2359 		 * contain exception type related info so we cannot decide
2360 		 * the exception type purely based on exception return packet.
2361 		 * If we record the exception number from exception packet and
2362 		 * reuse it for exception return packet, this is not reliable
2363 		 * due the trace can be discontinuity or the interrupt can
2364 		 * be nested, thus the recorded exception number cannot be
2365 		 * used for exception return packet for these two cases.
2366 		 *
2367 		 * For exception return packet, we only need to distinguish the
2368 		 * packet is for system call or for other types.  Thus the
2369 		 * decision can be deferred when receive the next packet which
2370 		 * contains the return address, based on the return address we
2371 		 * can read out the previous instruction and check if it's a
2372 		 * system call instruction and then calibrate the sample flag
2373 		 * as needed.
2374 		 */
2375 		if (prev_packet->sample_type == CS_ETM_RANGE)
2376 			prev_packet->flags = PERF_IP_FLAG_BRANCH |
2377 					     PERF_IP_FLAG_RETURN |
2378 					     PERF_IP_FLAG_INTERRUPT;
2379 		break;
2380 	case CS_ETM_EMPTY:
2381 	default:
2382 		break;
2383 	}
2384 
2385 	return 0;
2386 }
2387 
cs_etm__decode_data_block(struct cs_etm_queue * etmq)2388 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
2389 {
2390 	int ret = 0;
2391 	size_t processed = 0;
2392 
2393 	/*
2394 	 * Packets are decoded and added to the decoder's packet queue
2395 	 * until the decoder packet processing callback has requested that
2396 	 * processing stops or there is nothing left in the buffer.  Normal
2397 	 * operations that stop processing are a timestamp packet or a full
2398 	 * decoder buffer queue.
2399 	 */
2400 	ret = cs_etm_decoder__process_data_block(etmq->decoder,
2401 						 etmq->offset,
2402 						 &etmq->buf[etmq->buf_used],
2403 						 etmq->buf_len,
2404 						 &processed);
2405 	if (ret)
2406 		goto out;
2407 
2408 	etmq->offset += processed;
2409 	etmq->buf_used += processed;
2410 	etmq->buf_len -= processed;
2411 
2412 out:
2413 	return ret;
2414 }
2415 
cs_etm__process_traceid_queue(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)2416 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
2417 					 struct cs_etm_traceid_queue *tidq)
2418 {
2419 	int ret;
2420 	struct cs_etm_packet_queue *packet_queue;
2421 
2422 	packet_queue = &tidq->packet_queue;
2423 
2424 	/* Process each packet in this chunk */
2425 	while (1) {
2426 		ret = cs_etm_decoder__get_packet(packet_queue,
2427 						 tidq->packet);
2428 		if (ret <= 0)
2429 			/*
2430 			 * Stop processing this chunk on
2431 			 * end of data or error
2432 			 */
2433 			break;
2434 
2435 		/*
2436 		 * Since packet addresses are swapped in packet
2437 		 * handling within below switch() statements,
2438 		 * thus setting sample flags must be called
2439 		 * prior to switch() statement to use address
2440 		 * information before packets swapping.
2441 		 */
2442 		ret = cs_etm__set_sample_flags(etmq, tidq);
2443 		if (ret < 0)
2444 			break;
2445 
2446 		switch (tidq->packet->sample_type) {
2447 		case CS_ETM_RANGE:
2448 			/*
2449 			 * If the packet contains an instruction
2450 			 * range, generate instruction sequence
2451 			 * events.
2452 			 */
2453 			cs_etm__sample(etmq, tidq);
2454 			break;
2455 		case CS_ETM_EXCEPTION:
2456 		case CS_ETM_EXCEPTION_RET:
2457 			/*
2458 			 * If the exception packet is coming,
2459 			 * make sure the previous instruction
2460 			 * range packet to be handled properly.
2461 			 */
2462 			cs_etm__exception(tidq);
2463 			break;
2464 		case CS_ETM_DISCONTINUITY:
2465 			/*
2466 			 * Discontinuity in trace, flush
2467 			 * previous branch stack
2468 			 */
2469 			cs_etm__flush(etmq, tidq);
2470 			break;
2471 		case CS_ETM_EMPTY:
2472 			/*
2473 			 * Should not receive empty packet,
2474 			 * report error.
2475 			 */
2476 			pr_err("CS ETM Trace: empty packet\n");
2477 			return -EINVAL;
2478 		default:
2479 			break;
2480 		}
2481 	}
2482 
2483 	return ret;
2484 }
2485 
cs_etm__clear_all_traceid_queues(struct cs_etm_queue * etmq)2486 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
2487 {
2488 	int idx;
2489 	struct int_node *inode;
2490 	struct cs_etm_traceid_queue *tidq;
2491 	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
2492 
2493 	intlist__for_each_entry(inode, traceid_queues_list) {
2494 		idx = (int)(intptr_t)inode->priv;
2495 		tidq = etmq->traceid_queues[idx];
2496 
2497 		/* Ignore return value */
2498 		cs_etm__process_traceid_queue(etmq, tidq);
2499 	}
2500 }
2501 
cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue * etmq)2502 static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq)
2503 {
2504 	int err = 0;
2505 	struct cs_etm_traceid_queue *tidq;
2506 
2507 	tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
2508 	if (!tidq)
2509 		return -EINVAL;
2510 
2511 	/* Go through each buffer in the queue and decode them one by one */
2512 	while (1) {
2513 		err = cs_etm__get_data_block(etmq);
2514 		if (err <= 0)
2515 			return err;
2516 
2517 		/* Run trace decoder until buffer consumed or end of trace */
2518 		do {
2519 			err = cs_etm__decode_data_block(etmq);
2520 			if (err)
2521 				return err;
2522 
2523 			/*
2524 			 * Process each packet in this chunk, nothing to do if
2525 			 * an error occurs other than hoping the next one will
2526 			 * be better.
2527 			 */
2528 			err = cs_etm__process_traceid_queue(etmq, tidq);
2529 
2530 		} while (etmq->buf_len);
2531 
2532 		if (err == 0)
2533 			/* Flush any remaining branch stack entries */
2534 			err = cs_etm__end_block(etmq, tidq);
2535 	}
2536 
2537 	return err;
2538 }
2539 
cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue * etmq)2540 static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq)
2541 {
2542 	int idx, err = 0;
2543 	struct cs_etm_traceid_queue *tidq;
2544 	struct int_node *inode;
2545 
2546 	/* Go through each buffer in the queue and decode them one by one */
2547 	while (1) {
2548 		err = cs_etm__get_data_block(etmq);
2549 		if (err <= 0)
2550 			return err;
2551 
2552 		/* Run trace decoder until buffer consumed or end of trace */
2553 		do {
2554 			err = cs_etm__decode_data_block(etmq);
2555 			if (err)
2556 				return err;
2557 
2558 			/*
2559 			 * cs_etm__run_per_thread_timeless_decoder() runs on a
2560 			 * single traceID queue because each TID has a separate
2561 			 * buffer. But here in per-cpu mode we need to iterate
2562 			 * over each channel instead.
2563 			 */
2564 			intlist__for_each_entry(inode,
2565 						etmq->traceid_queues_list) {
2566 				idx = (int)(intptr_t)inode->priv;
2567 				tidq = etmq->traceid_queues[idx];
2568 				cs_etm__process_traceid_queue(etmq, tidq);
2569 			}
2570 		} while (etmq->buf_len);
2571 
2572 		intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2573 			idx = (int)(intptr_t)inode->priv;
2574 			tidq = etmq->traceid_queues[idx];
2575 			/* Flush any remaining branch stack entries */
2576 			err = cs_etm__end_block(etmq, tidq);
2577 			if (err)
2578 				return err;
2579 		}
2580 	}
2581 
2582 	return err;
2583 }
2584 
cs_etm__process_timeless_queues(struct cs_etm_auxtrace * etm,pid_t tid)2585 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
2586 					   pid_t tid)
2587 {
2588 	unsigned int i;
2589 	struct auxtrace_queues *queues = &etm->queues;
2590 
2591 	for (i = 0; i < queues->nr_queues; i++) {
2592 		struct auxtrace_queue *queue = &etm->queues.queue_array[i];
2593 		struct cs_etm_queue *etmq = queue->priv;
2594 		struct cs_etm_traceid_queue *tidq;
2595 
2596 		if (!etmq)
2597 			continue;
2598 
2599 		if (etm->per_thread_decoding) {
2600 			tidq = cs_etm__etmq_get_traceid_queue(
2601 				etmq, CS_ETM_PER_THREAD_TRACEID);
2602 
2603 			if (!tidq)
2604 				continue;
2605 
2606 			if (tid == -1 || thread__tid(tidq->thread) == tid)
2607 				cs_etm__run_per_thread_timeless_decoder(etmq);
2608 		} else
2609 			cs_etm__run_per_cpu_timeless_decoder(etmq);
2610 	}
2611 
2612 	return 0;
2613 }
2614 
cs_etm__process_timestamped_queues(struct cs_etm_auxtrace * etm)2615 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm)
2616 {
2617 	int ret = 0;
2618 	unsigned int cs_queue_nr, queue_nr, i;
2619 	u8 trace_chan_id;
2620 	u64 cs_timestamp;
2621 	struct auxtrace_queue *queue;
2622 	struct cs_etm_queue *etmq;
2623 	struct cs_etm_traceid_queue *tidq;
2624 
2625 	/*
2626 	 * Pre-populate the heap with one entry from each queue so that we can
2627 	 * start processing in time order across all queues.
2628 	 */
2629 	for (i = 0; i < etm->queues.nr_queues; i++) {
2630 		etmq = etm->queues.queue_array[i].priv;
2631 		if (!etmq)
2632 			continue;
2633 
2634 		ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
2635 		if (ret)
2636 			return ret;
2637 	}
2638 
2639 	while (1) {
2640 		if (!etm->heap.heap_cnt)
2641 			break;
2642 
2643 		/* Take the entry at the top of the min heap */
2644 		cs_queue_nr = etm->heap.heap_array[0].queue_nr;
2645 		queue_nr = TO_QUEUE_NR(cs_queue_nr);
2646 		trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
2647 		queue = &etm->queues.queue_array[queue_nr];
2648 		etmq = queue->priv;
2649 
2650 		/*
2651 		 * Remove the top entry from the heap since we are about
2652 		 * to process it.
2653 		 */
2654 		auxtrace_heap__pop(&etm->heap);
2655 
2656 		tidq  = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
2657 		if (!tidq) {
2658 			/*
2659 			 * No traceID queue has been allocated for this traceID,
2660 			 * which means something somewhere went very wrong.  No
2661 			 * other choice than simply exit.
2662 			 */
2663 			ret = -EINVAL;
2664 			goto out;
2665 		}
2666 
2667 		/*
2668 		 * Packets associated with this timestamp are already in
2669 		 * the etmq's traceID queue, so process them.
2670 		 */
2671 		ret = cs_etm__process_traceid_queue(etmq, tidq);
2672 		if (ret < 0)
2673 			goto out;
2674 
2675 		/*
2676 		 * Packets for this timestamp have been processed, time to
2677 		 * move on to the next timestamp, fetching a new auxtrace_buffer
2678 		 * if need be.
2679 		 */
2680 refetch:
2681 		ret = cs_etm__get_data_block(etmq);
2682 		if (ret < 0)
2683 			goto out;
2684 
2685 		/*
2686 		 * No more auxtrace_buffers to process in this etmq, simply
2687 		 * move on to another entry in the auxtrace_heap.
2688 		 */
2689 		if (!ret)
2690 			continue;
2691 
2692 		ret = cs_etm__decode_data_block(etmq);
2693 		if (ret)
2694 			goto out;
2695 
2696 		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
2697 
2698 		if (!cs_timestamp) {
2699 			/*
2700 			 * Function cs_etm__decode_data_block() returns when
2701 			 * there is no more traces to decode in the current
2702 			 * auxtrace_buffer OR when a timestamp has been
2703 			 * encountered on any of the traceID queues.  Since we
2704 			 * did not get a timestamp, there is no more traces to
2705 			 * process in this auxtrace_buffer.  As such empty and
2706 			 * flush all traceID queues.
2707 			 */
2708 			cs_etm__clear_all_traceid_queues(etmq);
2709 
2710 			/* Fetch another auxtrace_buffer for this etmq */
2711 			goto refetch;
2712 		}
2713 
2714 		/*
2715 		 * Add to the min heap the timestamp for packets that have
2716 		 * just been decoded.  They will be processed and synthesized
2717 		 * during the next call to cs_etm__process_traceid_queue() for
2718 		 * this queue/traceID.
2719 		 */
2720 		cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
2721 		ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
2722 	}
2723 
2724 	for (i = 0; i < etm->queues.nr_queues; i++) {
2725 		struct int_node *inode;
2726 
2727 		etmq = etm->queues.queue_array[i].priv;
2728 		if (!etmq)
2729 			continue;
2730 
2731 		intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2732 			int idx = (int)(intptr_t)inode->priv;
2733 
2734 			/* Flush any remaining branch stack entries */
2735 			tidq = etmq->traceid_queues[idx];
2736 			ret = cs_etm__end_block(etmq, tidq);
2737 			if (ret)
2738 				return ret;
2739 		}
2740 	}
2741 out:
2742 	return ret;
2743 }
2744 
cs_etm__process_itrace_start(struct cs_etm_auxtrace * etm,union perf_event * event)2745 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
2746 					union perf_event *event)
2747 {
2748 	struct thread *th;
2749 
2750 	if (etm->timeless_decoding)
2751 		return 0;
2752 
2753 	/*
2754 	 * Add the tid/pid to the log so that we can get a match when we get a
2755 	 * contextID from the decoder. Only track for the host: only kernel
2756 	 * trace is supported for guests which wouldn't need pids so this should
2757 	 * be fine.
2758 	 */
2759 	th = machine__findnew_thread(&etm->session->machines.host,
2760 				     event->itrace_start.pid,
2761 				     event->itrace_start.tid);
2762 	if (!th)
2763 		return -ENOMEM;
2764 
2765 	thread__put(th);
2766 
2767 	return 0;
2768 }
2769 
cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace * etm,union perf_event * event)2770 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
2771 					   union perf_event *event)
2772 {
2773 	struct thread *th;
2774 	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2775 
2776 	/*
2777 	 * Context switch in per-thread mode are irrelevant since perf
2778 	 * will start/stop tracing as the process is scheduled.
2779 	 */
2780 	if (etm->timeless_decoding)
2781 		return 0;
2782 
2783 	/*
2784 	 * SWITCH_IN events carry the next process to be switched out while
2785 	 * SWITCH_OUT events carry the process to be switched in.  As such
2786 	 * we don't care about IN events.
2787 	 */
2788 	if (!out)
2789 		return 0;
2790 
2791 	/*
2792 	 * Add the tid/pid to the log so that we can get a match when we get a
2793 	 * contextID from the decoder. Only track for the host: only kernel
2794 	 * trace is supported for guests which wouldn't need pids so this should
2795 	 * be fine.
2796 	 */
2797 	th = machine__findnew_thread(&etm->session->machines.host,
2798 				     event->context_switch.next_prev_pid,
2799 				     event->context_switch.next_prev_tid);
2800 	if (!th)
2801 		return -ENOMEM;
2802 
2803 	thread__put(th);
2804 
2805 	return 0;
2806 }
2807 
cs_etm__process_event(struct perf_session * session,union perf_event * event,struct perf_sample * sample,const struct perf_tool * tool)2808 static int cs_etm__process_event(struct perf_session *session,
2809 				 union perf_event *event,
2810 				 struct perf_sample *sample,
2811 				 const struct perf_tool *tool)
2812 {
2813 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2814 						   struct cs_etm_auxtrace,
2815 						   auxtrace);
2816 
2817 	if (dump_trace)
2818 		return 0;
2819 
2820 	if (!tool->ordered_events) {
2821 		pr_err("CoreSight ETM Trace requires ordered events\n");
2822 		return -EINVAL;
2823 	}
2824 
2825 	switch (event->header.type) {
2826 	case PERF_RECORD_EXIT:
2827 		/*
2828 		 * Don't need to wait for cs_etm__flush_events() in per-thread mode to
2829 		 * start the decode because we know there will be no more trace from
2830 		 * this thread. All this does is emit samples earlier than waiting for
2831 		 * the flush in other modes, but with timestamps it makes sense to wait
2832 		 * for flush so that events from different threads are interleaved
2833 		 * properly.
2834 		 */
2835 		if (etm->per_thread_decoding && etm->timeless_decoding)
2836 			return cs_etm__process_timeless_queues(etm,
2837 							       event->fork.tid);
2838 		break;
2839 
2840 	case PERF_RECORD_ITRACE_START:
2841 		return cs_etm__process_itrace_start(etm, event);
2842 
2843 	case PERF_RECORD_SWITCH_CPU_WIDE:
2844 		return cs_etm__process_switch_cpu_wide(etm, event);
2845 
2846 	case PERF_RECORD_AUX:
2847 		/*
2848 		 * Record the latest kernel timestamp available in the header
2849 		 * for samples so that synthesised samples occur from this point
2850 		 * onwards.
2851 		 */
2852 		if (sample->time && (sample->time != (u64)-1))
2853 			etm->latest_kernel_timestamp = sample->time;
2854 		break;
2855 
2856 	default:
2857 		break;
2858 	}
2859 
2860 	return 0;
2861 }
2862 
dump_queued_data(struct cs_etm_auxtrace * etm,struct perf_record_auxtrace * event)2863 static void dump_queued_data(struct cs_etm_auxtrace *etm,
2864 			     struct perf_record_auxtrace *event)
2865 {
2866 	struct auxtrace_buffer *buf;
2867 	unsigned int i;
2868 	/*
2869 	 * Find all buffers with same reference in the queues and dump them.
2870 	 * This is because the queues can contain multiple entries of the same
2871 	 * buffer that were split on aux records.
2872 	 */
2873 	for (i = 0; i < etm->queues.nr_queues; ++i)
2874 		list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
2875 			if (buf->reference == event->reference)
2876 				cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
2877 }
2878 
cs_etm__process_auxtrace_event(struct perf_session * session,union perf_event * event,const struct perf_tool * tool __maybe_unused)2879 static int cs_etm__process_auxtrace_event(struct perf_session *session,
2880 					  union perf_event *event,
2881 					  const struct perf_tool *tool __maybe_unused)
2882 {
2883 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2884 						   struct cs_etm_auxtrace,
2885 						   auxtrace);
2886 	if (!etm->data_queued) {
2887 		struct auxtrace_buffer *buffer;
2888 		off_t  data_offset;
2889 		int fd = perf_data__fd(session->data);
2890 		bool is_pipe = perf_data__is_pipe(session->data);
2891 		int err;
2892 		int idx = event->auxtrace.idx;
2893 
2894 		if (is_pipe)
2895 			data_offset = 0;
2896 		else {
2897 			data_offset = lseek(fd, 0, SEEK_CUR);
2898 			if (data_offset == -1)
2899 				return -errno;
2900 		}
2901 
2902 		err = auxtrace_queues__add_event(&etm->queues, session,
2903 						 event, data_offset, &buffer);
2904 		if (err)
2905 			return err;
2906 
2907 		if (dump_trace)
2908 			if (auxtrace_buffer__get_data(buffer, fd)) {
2909 				cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
2910 				auxtrace_buffer__put_data(buffer);
2911 			}
2912 	} else if (dump_trace)
2913 		dump_queued_data(etm, &event->auxtrace);
2914 
2915 	return 0;
2916 }
2917 
cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace * etm)2918 static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm)
2919 {
2920 	struct evsel *evsel;
2921 	struct evlist *evlist = etm->session->evlist;
2922 
2923 	/* Override timeless mode with user input from --itrace=Z */
2924 	if (etm->synth_opts.timeless_decoding) {
2925 		etm->timeless_decoding = true;
2926 		return 0;
2927 	}
2928 
2929 	/*
2930 	 * Find the cs_etm evsel and look at what its timestamp setting was
2931 	 */
2932 	evlist__for_each_entry(evlist, evsel)
2933 		if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) {
2934 			etm->timeless_decoding =
2935 				!(evsel->core.attr.config & BIT(ETM_OPT_TS));
2936 			return 0;
2937 		}
2938 
2939 	pr_err("CS ETM: Couldn't find ETM evsel\n");
2940 	return -EINVAL;
2941 }
2942 
2943 /*
2944  * Read a single cpu parameter block from the auxtrace_info priv block.
2945  *
2946  * For version 1 there is a per cpu nr_params entry. If we are handling
2947  * version 1 file, then there may be less, the same, or more params
2948  * indicated by this value than the compile time number we understand.
2949  *
2950  * For a version 0 info block, there are a fixed number, and we need to
2951  * fill out the nr_param value in the metadata we create.
2952  */
cs_etm__create_meta_blk(u64 * buff_in,int * buff_in_offset,int out_blk_size,int nr_params_v0)2953 static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
2954 				    int out_blk_size, int nr_params_v0)
2955 {
2956 	u64 *metadata = NULL;
2957 	int hdr_version;
2958 	int nr_in_params, nr_out_params, nr_cmn_params;
2959 	int i, k;
2960 
2961 	metadata = zalloc(sizeof(*metadata) * out_blk_size);
2962 	if (!metadata)
2963 		return NULL;
2964 
2965 	/* read block current index & version */
2966 	i = *buff_in_offset;
2967 	hdr_version = buff_in[CS_HEADER_VERSION];
2968 
2969 	if (!hdr_version) {
2970 	/* read version 0 info block into a version 1 metadata block  */
2971 		nr_in_params = nr_params_v0;
2972 		metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
2973 		metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
2974 		metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
2975 		/* remaining block params at offset +1 from source */
2976 		for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
2977 			metadata[k + 1] = buff_in[i + k];
2978 		/* version 0 has 2 common params */
2979 		nr_cmn_params = 2;
2980 	} else {
2981 	/* read version 1 info block - input and output nr_params may differ */
2982 		/* version 1 has 3 common params */
2983 		nr_cmn_params = 3;
2984 		nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
2985 
2986 		/* if input has more params than output - skip excess */
2987 		nr_out_params = nr_in_params + nr_cmn_params;
2988 		if (nr_out_params > out_blk_size)
2989 			nr_out_params = out_blk_size;
2990 
2991 		for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
2992 			metadata[k] = buff_in[i + k];
2993 
2994 		/* record the actual nr params we copied */
2995 		metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
2996 	}
2997 
2998 	/* adjust in offset by number of in params used */
2999 	i += nr_in_params + nr_cmn_params;
3000 	*buff_in_offset = i;
3001 	return metadata;
3002 }
3003 
3004 /**
3005  * Puts a fragment of an auxtrace buffer into the auxtrace queues based
3006  * on the bounds of aux_event, if it matches with the buffer that's at
3007  * file_offset.
3008  *
3009  * Normally, whole auxtrace buffers would be added to the queue. But we
3010  * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
3011  * is reset across each buffer, so splitting the buffers up in advance has
3012  * the same effect.
3013  */
cs_etm__queue_aux_fragment(struct perf_session * session,off_t file_offset,size_t sz,struct perf_record_aux * aux_event,struct perf_sample * sample)3014 static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
3015 				      struct perf_record_aux *aux_event, struct perf_sample *sample)
3016 {
3017 	int err;
3018 	char buf[PERF_SAMPLE_MAX_SIZE];
3019 	union perf_event *auxtrace_event_union;
3020 	struct perf_record_auxtrace *auxtrace_event;
3021 	union perf_event auxtrace_fragment;
3022 	__u64 aux_offset, aux_size;
3023 	enum cs_etm_format format;
3024 
3025 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
3026 						   struct cs_etm_auxtrace,
3027 						   auxtrace);
3028 
3029 	/*
3030 	 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
3031 	 * from looping through the auxtrace index.
3032 	 */
3033 	err = perf_session__peek_event(session, file_offset, buf,
3034 				       PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL);
3035 	if (err)
3036 		return err;
3037 	auxtrace_event = &auxtrace_event_union->auxtrace;
3038 	if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
3039 		return -EINVAL;
3040 
3041 	if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) ||
3042 		auxtrace_event->header.size != sz) {
3043 		return -EINVAL;
3044 	}
3045 
3046 	/*
3047 	 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
3048 	 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
3049 	 * CPU as we set this always for the AUX_OUTPUT_HW_ID event.
3050 	 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
3051 	 * Return 'not found' if mismatch.
3052 	 */
3053 	if (auxtrace_event->cpu == (__u32) -1) {
3054 		etm->per_thread_decoding = true;
3055 		if (auxtrace_event->tid != sample->tid)
3056 			return 1;
3057 	} else if (auxtrace_event->cpu != sample->cpu) {
3058 		if (etm->per_thread_decoding) {
3059 			/*
3060 			 * Found a per-cpu buffer after a per-thread one was
3061 			 * already found
3062 			 */
3063 			pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n");
3064 			return -EINVAL;
3065 		}
3066 		return 1;
3067 	}
3068 
3069 	if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
3070 		/*
3071 		 * Clamp size in snapshot mode. The buffer size is clamped in
3072 		 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
3073 		 * the buffer size.
3074 		 */
3075 		aux_size = min(aux_event->aux_size, auxtrace_event->size);
3076 
3077 		/*
3078 		 * In this mode, the head also points to the end of the buffer so aux_offset
3079 		 * needs to have the size subtracted so it points to the beginning as in normal mode
3080 		 */
3081 		aux_offset = aux_event->aux_offset - aux_size;
3082 	} else {
3083 		aux_size = aux_event->aux_size;
3084 		aux_offset = aux_event->aux_offset;
3085 	}
3086 
3087 	if (aux_offset >= auxtrace_event->offset &&
3088 	    aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
3089 		struct cs_etm_queue *etmq = etm->queues.queue_array[auxtrace_event->idx].priv;
3090 
3091 		/*
3092 		 * If this AUX event was inside this buffer somewhere, create a new auxtrace event
3093 		 * based on the sizes of the aux event, and queue that fragment.
3094 		 */
3095 		auxtrace_fragment.auxtrace = *auxtrace_event;
3096 		auxtrace_fragment.auxtrace.size = aux_size;
3097 		auxtrace_fragment.auxtrace.offset = aux_offset;
3098 		file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
3099 
3100 		pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
3101 			  " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
3102 		err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
3103 						 file_offset, NULL);
3104 		if (err)
3105 			return err;
3106 
3107 		format = (aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW) ?
3108 				UNFORMATTED : FORMATTED;
3109 		if (etmq->format != UNSET && format != etmq->format) {
3110 			pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n");
3111 			return -EINVAL;
3112 		}
3113 		etmq->format = format;
3114 		return 0;
3115 	}
3116 
3117 	/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
3118 	return 1;
3119 }
3120 
cs_etm__process_aux_hw_id_cb(struct perf_session * session,union perf_event * event,u64 offset __maybe_unused,void * data __maybe_unused)3121 static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event,
3122 					u64 offset __maybe_unused, void *data __maybe_unused)
3123 {
3124 	/* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */
3125 	if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) {
3126 		(*(int *)data)++; /* increment found count */
3127 		return cs_etm__process_aux_output_hw_id(session, event);
3128 	}
3129 	return 0;
3130 }
3131 
cs_etm__queue_aux_records_cb(struct perf_session * session,union perf_event * event,u64 offset __maybe_unused,void * data __maybe_unused)3132 static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
3133 					u64 offset __maybe_unused, void *data __maybe_unused)
3134 {
3135 	struct perf_sample sample;
3136 	int ret;
3137 	struct auxtrace_index_entry *ent;
3138 	struct auxtrace_index *auxtrace_index;
3139 	struct evsel *evsel;
3140 	size_t i;
3141 
3142 	/* Don't care about any other events, we're only queuing buffers for AUX events */
3143 	if (event->header.type != PERF_RECORD_AUX)
3144 		return 0;
3145 
3146 	if (event->header.size < sizeof(struct perf_record_aux))
3147 		return -EINVAL;
3148 
3149 	/* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
3150 	if (!event->aux.aux_size)
3151 		return 0;
3152 
3153 	/*
3154 	 * Parse the sample, we need the sample_id_all data that comes after the event so that the
3155 	 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
3156 	 */
3157 	evsel = evlist__event2evsel(session->evlist, event);
3158 	if (!evsel)
3159 		return -EINVAL;
3160 	perf_sample__init(&sample, /*all=*/false);
3161 	ret = evsel__parse_sample(evsel, event, &sample);
3162 	if (ret)
3163 		goto out;
3164 
3165 	/*
3166 	 * Loop through the auxtrace index to find the buffer that matches up with this aux event.
3167 	 */
3168 	list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
3169 		for (i = 0; i < auxtrace_index->nr; i++) {
3170 			ent = &auxtrace_index->entries[i];
3171 			ret = cs_etm__queue_aux_fragment(session, ent->file_offset,
3172 							 ent->sz, &event->aux, &sample);
3173 			/*
3174 			 * Stop search on error or successful values. Continue search on
3175 			 * 1 ('not found')
3176 			 */
3177 			if (ret != 1)
3178 				goto out;
3179 		}
3180 	}
3181 
3182 	/*
3183 	 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
3184 	 * don't exit with an error because it will still be possible to decode other aux records.
3185 	 */
3186 	pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
3187 	       " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
3188 	ret = 0;
3189 out:
3190 	perf_sample__exit(&sample);
3191 	return ret;
3192 }
3193 
cs_etm__queue_aux_records(struct perf_session * session)3194 static int cs_etm__queue_aux_records(struct perf_session *session)
3195 {
3196 	struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
3197 								struct auxtrace_index, list);
3198 	if (index && index->nr > 0)
3199 		return perf_session__peek_events(session, session->header.data_offset,
3200 						 session->header.data_size,
3201 						 cs_etm__queue_aux_records_cb, NULL);
3202 
3203 	/*
3204 	 * We would get here if there are no entries in the index (either no auxtrace
3205 	 * buffers or no index at all). Fail silently as there is the possibility of
3206 	 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
3207 	 * false.
3208 	 *
3209 	 * In that scenario, buffers will not be split by AUX records.
3210 	 */
3211 	return 0;
3212 }
3213 
3214 #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
3215 				  (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
3216 
3217 /*
3218  * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
3219  * timestamps).
3220  */
cs_etm__has_virtual_ts(u64 ** metadata,int num_cpu)3221 static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
3222 {
3223 	int j;
3224 
3225 	for (j = 0; j < num_cpu; j++) {
3226 		switch (metadata[j][CS_ETM_MAGIC]) {
3227 		case __perf_cs_etmv4_magic:
3228 			if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
3229 				return false;
3230 			break;
3231 		case __perf_cs_ete_magic:
3232 			if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1)
3233 				return false;
3234 			break;
3235 		default:
3236 			/* Unknown / unsupported magic number. */
3237 			return false;
3238 		}
3239 	}
3240 	return true;
3241 }
3242 
3243 /* map trace ids to correct metadata block, from information in metadata */
cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace * etm,int num_cpu,u64 ** metadata)3244 static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_cpu,
3245 					  u64 **metadata)
3246 {
3247 	u64 cs_etm_magic;
3248 	u8 trace_chan_id;
3249 	int i, err;
3250 
3251 	for (i = 0; i < num_cpu; i++) {
3252 		cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3253 		switch (cs_etm_magic) {
3254 		case __perf_cs_etmv3_magic:
3255 			metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3256 			trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]);
3257 			break;
3258 		case __perf_cs_etmv4_magic:
3259 		case __perf_cs_ete_magic:
3260 			metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3261 			trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]);
3262 			break;
3263 		default:
3264 			/* unknown magic number */
3265 			return -EINVAL;
3266 		}
3267 		err = cs_etm__map_trace_id_v0(etm, trace_chan_id, metadata[i]);
3268 		if (err)
3269 			return err;
3270 	}
3271 	return 0;
3272 }
3273 
3274 /*
3275  * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX
3276  * (formatted or not) packets to create the decoders.
3277  */
cs_etm__create_queue_decoders(struct cs_etm_queue * etmq)3278 static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq)
3279 {
3280 	struct cs_etm_decoder_params d_params;
3281 	struct cs_etm_trace_params  *t_params;
3282 	int decoders = intlist__nr_entries(etmq->traceid_list);
3283 
3284 	if (decoders == 0)
3285 		return 0;
3286 
3287 	/*
3288 	 * Each queue can only contain data from one CPU when unformatted, so only one decoder is
3289 	 * needed.
3290 	 */
3291 	if (etmq->format == UNFORMATTED)
3292 		assert(decoders == 1);
3293 
3294 	/* Use metadata to fill in trace parameters for trace decoder */
3295 	t_params = zalloc(sizeof(*t_params) * decoders);
3296 
3297 	if (!t_params)
3298 		goto out_free;
3299 
3300 	if (cs_etm__init_trace_params(t_params, etmq))
3301 		goto out_free;
3302 
3303 	/* Set decoder parameters to decode trace packets */
3304 	if (cs_etm__init_decoder_params(&d_params, etmq,
3305 					dump_trace ? CS_ETM_OPERATION_PRINT :
3306 						     CS_ETM_OPERATION_DECODE))
3307 		goto out_free;
3308 
3309 	etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
3310 					    t_params);
3311 
3312 	if (!etmq->decoder)
3313 		goto out_free;
3314 
3315 	/*
3316 	 * Register a function to handle all memory accesses required by
3317 	 * the trace decoder library.
3318 	 */
3319 	if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
3320 					      0x0L, ((u64) -1L),
3321 					      cs_etm__mem_access))
3322 		goto out_free_decoder;
3323 
3324 	zfree(&t_params);
3325 	return 0;
3326 
3327 out_free_decoder:
3328 	cs_etm_decoder__free(etmq->decoder);
3329 out_free:
3330 	zfree(&t_params);
3331 	return -EINVAL;
3332 }
3333 
cs_etm__create_decoders(struct cs_etm_auxtrace * etm)3334 static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm)
3335 {
3336 	struct auxtrace_queues *queues = &etm->queues;
3337 
3338 	for (unsigned int i = 0; i < queues->nr_queues; i++) {
3339 		bool empty = list_empty(&queues->queue_array[i].head);
3340 		struct cs_etm_queue *etmq = queues->queue_array[i].priv;
3341 		int ret;
3342 
3343 		/*
3344 		 * Don't create decoders for empty queues, mainly because
3345 		 * etmq->format is unknown for empty queues.
3346 		 */
3347 		assert(empty || etmq->format != UNSET);
3348 		if (empty)
3349 			continue;
3350 
3351 		ret = cs_etm__create_queue_decoders(etmq);
3352 		if (ret)
3353 			return ret;
3354 	}
3355 	return 0;
3356 }
3357 
cs_etm__process_auxtrace_info_full(union perf_event * event,struct perf_session * session)3358 int cs_etm__process_auxtrace_info_full(union perf_event *event,
3359 				       struct perf_session *session)
3360 {
3361 	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
3362 	struct cs_etm_auxtrace *etm = NULL;
3363 	struct perf_record_time_conv *tc = &session->time_conv;
3364 	int event_header_size = sizeof(struct perf_event_header);
3365 	int total_size = auxtrace_info->header.size;
3366 	int priv_size = 0;
3367 	int num_cpu, max_cpu = 0;
3368 	int err = 0;
3369 	int aux_hw_id_found;
3370 	int i;
3371 	u64 *ptr = NULL;
3372 	u64 **metadata = NULL;
3373 
3374 	/* First the global part */
3375 	ptr = (u64 *) auxtrace_info->priv;
3376 	num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
3377 	metadata = zalloc(sizeof(*metadata) * num_cpu);
3378 	if (!metadata)
3379 		return -ENOMEM;
3380 
3381 	/* Start parsing after the common part of the header */
3382 	i = CS_HEADER_VERSION_MAX;
3383 
3384 	/*
3385 	 * The metadata is stored in the auxtrace_info section and encodes
3386 	 * the configuration of the ARM embedded trace macrocell which is
3387 	 * required by the trace decoder to properly decode the trace due
3388 	 * to its highly compressed nature.
3389 	 */
3390 	for (int j = 0; j < num_cpu; j++) {
3391 		if (ptr[i] == __perf_cs_etmv3_magic) {
3392 			metadata[j] =
3393 				cs_etm__create_meta_blk(ptr, &i,
3394 							CS_ETM_PRIV_MAX,
3395 							CS_ETM_NR_TRC_PARAMS_V0);
3396 		} else if (ptr[i] == __perf_cs_etmv4_magic) {
3397 			metadata[j] =
3398 				cs_etm__create_meta_blk(ptr, &i,
3399 							CS_ETMV4_PRIV_MAX,
3400 							CS_ETMV4_NR_TRC_PARAMS_V0);
3401 		} else if (ptr[i] == __perf_cs_ete_magic) {
3402 			metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
3403 		} else {
3404 			ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
3405 				  ptr[i]);
3406 			err = -EINVAL;
3407 			goto err_free_metadata;
3408 		}
3409 
3410 		if (!metadata[j]) {
3411 			err = -ENOMEM;
3412 			goto err_free_metadata;
3413 		}
3414 
3415 		if ((int) metadata[j][CS_ETM_CPU] > max_cpu)
3416 			max_cpu = metadata[j][CS_ETM_CPU];
3417 	}
3418 
3419 	/*
3420 	 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
3421 	 * CS_ETMV4_PRIV_MAX mark how many double words are in the
3422 	 * global metadata, and each cpu's metadata respectively.
3423 	 * The following tests if the correct number of double words was
3424 	 * present in the auxtrace info section.
3425 	 */
3426 	priv_size = total_size - event_header_size - INFO_HEADER_SIZE;
3427 	if (i * 8 != priv_size) {
3428 		err = -EINVAL;
3429 		goto err_free_metadata;
3430 	}
3431 
3432 	etm = zalloc(sizeof(*etm));
3433 
3434 	if (!etm) {
3435 		err = -ENOMEM;
3436 		goto err_free_metadata;
3437 	}
3438 
3439 	/*
3440 	 * As all the ETMs run at the same exception level, the system should
3441 	 * have the same PID format crossing CPUs.  So cache the PID format
3442 	 * and reuse it for sequential decoding.
3443 	 */
3444 	etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
3445 
3446 	err = auxtrace_queues__init_nr(&etm->queues, max_cpu + 1);
3447 	if (err)
3448 		goto err_free_etm;
3449 
3450 	for (unsigned int j = 0; j < etm->queues.nr_queues; ++j) {
3451 		err = cs_etm__setup_queue(etm, &etm->queues.queue_array[j], j);
3452 		if (err)
3453 			goto err_free_queues;
3454 	}
3455 
3456 	if (session->itrace_synth_opts->set) {
3457 		etm->synth_opts = *session->itrace_synth_opts;
3458 	} else {
3459 		itrace_synth_opts__set_default(&etm->synth_opts,
3460 				session->itrace_synth_opts->default_no_sample);
3461 		etm->synth_opts.callchain = false;
3462 	}
3463 
3464 	etm->session = session;
3465 
3466 	etm->num_cpu = num_cpu;
3467 	etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
3468 	etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
3469 	etm->metadata = metadata;
3470 	etm->auxtrace_type = auxtrace_info->type;
3471 
3472 	if (etm->synth_opts.use_timestamp)
3473 		/*
3474 		 * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature,
3475 		 * therefore the decoder cannot know if the timestamp trace is
3476 		 * same with the kernel time.
3477 		 *
3478 		 * If a user has knowledge for the working platform and can
3479 		 * specify itrace option 'T' to tell decoder to forcely use the
3480 		 * traced timestamp as the kernel time.
3481 		 */
3482 		etm->has_virtual_ts = true;
3483 	else
3484 		/* Use virtual timestamps if all ETMs report ts_source = 1 */
3485 		etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
3486 
3487 	if (!etm->has_virtual_ts)
3488 		ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
3489 			    "The time field of the samples will not be set accurately.\n"
3490 			    "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n"
3491 			    "you can specify the itrace option 'T' for timestamp decoding\n"
3492 			    "if the Coresight timestamp on the platform is same with the kernel time.\n\n");
3493 
3494 	etm->auxtrace.process_event = cs_etm__process_event;
3495 	etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
3496 	etm->auxtrace.flush_events = cs_etm__flush_events;
3497 	etm->auxtrace.free_events = cs_etm__free_events;
3498 	etm->auxtrace.free = cs_etm__free;
3499 	etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
3500 	session->auxtrace = &etm->auxtrace;
3501 
3502 	err = cs_etm__setup_timeless_decoding(etm);
3503 	if (err)
3504 		return err;
3505 
3506 	etm->tc.time_shift = tc->time_shift;
3507 	etm->tc.time_mult = tc->time_mult;
3508 	etm->tc.time_zero = tc->time_zero;
3509 	if (event_contains(*tc, time_cycles)) {
3510 		etm->tc.time_cycles = tc->time_cycles;
3511 		etm->tc.time_mask = tc->time_mask;
3512 		etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
3513 		etm->tc.cap_user_time_short = tc->cap_user_time_short;
3514 	}
3515 	err = cs_etm__synth_events(etm, session);
3516 	if (err)
3517 		goto err_free_queues;
3518 
3519 	err = cs_etm__queue_aux_records(session);
3520 	if (err)
3521 		goto err_free_queues;
3522 
3523 	/*
3524 	 * Map Trace ID values to CPU metadata.
3525 	 *
3526 	 * Trace metadata will always contain Trace ID values from the legacy algorithm
3527 	 * in case it's read by a version of Perf that doesn't know about HW_ID packets
3528 	 * or the kernel doesn't emit them.
3529 	 *
3530 	 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
3531 	 * the same IDs as the old algorithm as far as is possible, unless there are clashes
3532 	 * in which case a different value will be used. This means an older perf may still
3533 	 * be able to record and read files generate on a newer system.
3534 	 *
3535 	 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of
3536 	 * those packets. If they are there then the values will be mapped and plugged into
3537 	 * the metadata and decoders are only created for each mapping received.
3538 	 *
3539 	 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
3540 	 * then we map Trace ID values to CPU directly from the metadata and create decoders
3541 	 * for all mappings.
3542 	 */
3543 
3544 	/* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
3545 	aux_hw_id_found = 0;
3546 	err = perf_session__peek_events(session, session->header.data_offset,
3547 					session->header.data_size,
3548 					cs_etm__process_aux_hw_id_cb, &aux_hw_id_found);
3549 	if (err)
3550 		goto err_free_queues;
3551 
3552 	/* if no HW ID found this is a file with metadata values only, map from metadata */
3553 	if (!aux_hw_id_found) {
3554 		err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata);
3555 		if (err)
3556 			goto err_free_queues;
3557 	}
3558 
3559 	err = cs_etm__create_decoders(etm);
3560 	if (err)
3561 		goto err_free_queues;
3562 
3563 	etm->data_queued = etm->queues.populated;
3564 	return 0;
3565 
3566 err_free_queues:
3567 	auxtrace_queues__free(&etm->queues);
3568 	session->auxtrace = NULL;
3569 err_free_etm:
3570 	zfree(&etm);
3571 err_free_metadata:
3572 	/* No need to check @metadata[j], free(NULL) is supported */
3573 	for (int j = 0; j < num_cpu; j++)
3574 		zfree(&metadata[j]);
3575 	zfree(&metadata);
3576 	return err;
3577 }
3578